Skip to content

Failover routing between two ISP using Linux router

Имеем следующий вид сетевой конфигурации. Два разных провайдера, на роутер подаются вланами (20,21 вланы).

Нужно чтобы при падении одного из провайдеров - роутинг автоматически переключался на другого провайдера.

timba@pbd06-cr01:~$ ip a
1: lo: <LOOPBACK,UP,LOWER_UP> mtu 16436 qdisc noqueue state UNKNOWN 
    link/loopback 00:00:00:00:00:00 brd 00:00:00:00:00:00
    inet 127.0.0.1/8 scope host lo
    inet6 ::1/128 scope host 
       valid_lft forever preferred_lft forever
2: eth0: <BROADCAST,MULTICAST,UP,LOWER_UP> mtu 1500 qdisc pfifo_fast state UP qlen 1000
    link/ether 00:11:2f:88:9e:2d brd ff:ff:ff:ff:ff:ff
    inet 10.1.0.1/28 brd 10.1.0.15 scope global eth0
    inet6 fe80::211:2fff:fe88:9e2d/64 scope link 
       valid_lft forever preferred_lft forever
3: eth0.20@eth0: <BROADCAST,MULTICAST,UP,LOWER_UP> mtu 1500 qdisc noqueue state UP 
    link/ether 00:11:2f:88:9e:2d brd ff:ff:ff:ff:ff:ff
    inet 192.168.1.10/24 brd 192.168.1.255 scope global eth0.20
    inet6 fe80::211:2fff:fe88:9e2d/64 scope link 
       valid_lft forever preferred_lft forever
4: eth0.21@eth0: <BROADCAST,MULTICAST,UP,LOWER_UP> mtu 1500 qdisc noqueue state UP 
    link/ether 00:11:2f:88:9e:2d brd ff:ff:ff:ff:ff:ff
    inet 192.168.2.10/25 brd 192.168.2.127 scope global eth0.21
    inet6 fe80::211:2fff:fe88:9e2d/64 scope link 
       valid_lft forever preferred_lft forever

В интернетах нарыл следующий скрипт (немного подредактировал его под свои нужды):

#!/bin/bash
#Copyright Angsuman Chakraborty, Taragana. Permission is granted for personal, non-commercial use.
#The script may not be re-distributed in any form without written permission from Angsuman Chakraborty ( [email protected] ).
#The script may be modified for personal use.
#THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTIES OF MERCHANTIBILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE AUTHOR ACCEPTS NO RESPONSIBILITY IN ANY CONCEIVABLE MANNER.

# Conventionally 0 indicates success in this script.

# Time between checks in seconds
SLEEPTIME=10

#IP Address or domain name to ping. The script relies on the domain being 
#pingable and always available
TESTIP=8.8.8.8

#Ping timeout in seconds
TIMEOUT=2

# External interfaces
EXTIF1=eth0.20
EXTIF2=eth0.21

#IP address of external interfaces. This is not the gateway address.
IP1=192.168.1.10
IP2=192.168.2.10

#Gateway IP addresses. This is the first (hop) gateway, could be your router IP 
#address if it has been configured as the gateway
GW1=192.168.1.1
GW2=192.168.2.1

# Relative weights of routes. Keep this to a low integer value.
W1=1
W2=4

# Broadband providers name; use your own names here.
NAME1=ISP1
NAME2=ISP2

#No of repeats of success or failure before changing status of connection
SUCCESSREPEATCOUNT=4
FAILUREREPEATCOUNT=1

# Do not change anything below this line

# Last link status indicates the macro status of the link we determined. This is down initially to force routing change upfront. Don't change these values.
LLS1=1
LLS2=1

# Last ping status. Don't change these values.
LPS1=1
LPS2=1

# Current ping status. Don't change these values.
CPS1=1
CPS2=1

# Change link status indicates that the link needs to be changed. Don't change these values.
CLS1=1
CLS2=1

# Count of repeated up status or down status. Don't change these values.
COUNT1=0
COUNT2=0

while : ; do
        ping -W $TIMEOUT -I $IP1 -c 1 $TESTIP > /dev/null  2>&1
        RETVAL=$?

        if [ $RETVAL -ne 0 ]; then
		echo $NAME1 Down
		CPS1=1
        else
		CPS1=0
        fi

	if [ $LPS1 -ne $CPS1 ]; then
		echo Ping status changed for $NAME1 from $LPS1 to $CPS1 | mail -s "Failover watchdog report" adm
		COUNT1=1
	else
		if [ $LPS1 -ne $LLS1 ]; then
			COUNT1=`expr $COUNT1 + 1`
		fi
	fi

        if [[ $COUNT1 -ge $SUCCESSREPEATCOUNT || ($LLS1 -eq 0 && $COUNT1 -ge $FAILUREREPEATCOUNT) ]]; then
		echo Uptime status will be changed for $NAME1 from $LLS1 | mail -s "Failover watchdog report" adm
		CLS1=0
		COUNT1=0
		if [ $LLS1 -eq 1 ]; then
			LLS1=0
		else
			LLS1=1
		fi
	else 
		CLS1=1
        fi

	LPS1=$CPS1

	ping -W $TIMEOUT -I $IP2 -c 1 $TESTIP > /dev/null  2>&1
       	RETVAL=$?

	if [ $RETVAL -ne 0 ]; then
		echo $NAME2 Down
                CPS2=1
        else
                CPS2=0
        fi

        if [ $LPS2 -ne $CPS2 ]; then
		echo Ping status changed for $NAME2 from $LPS2 to $CPS2 | mail -s "Failover watchdog report" adm
                COUNT2=1
        else
                if [ $LPS2 -ne $LLS2 ]; then
                        COUNT2=`expr $COUNT2 + 1`
                fi
        fi

        if [[ $COUNT2 -ge $SUCCESSREPEATCOUNT || ($LLS2 -eq 0 && $COUNT2 -ge $FAILUREREPEATCOUNT) ]]; then
		echo Uptime status will be changed for $NAME2 from $LLS2 | mail -s "Failover watchdog report" adm
		CLS2=0
		COUNT2=0
                if [ $LLS2 -eq 1 ]; then
                        LLS2=0
                else
                        LLS2=1
                fi
	else
		CLS2=1
        fi

	LPS2=$CPS2

	if [[ $CLS1 -eq 0 || $CLS2 -eq 0 ]]; then
		if [[ $LLS1 -eq 1 && $LLS2 -eq 0 ]]; then 
			echo Switching to $NAME2 | mail -s "Failover watchdog report" adm
                        ip route replace default scope global via $GW2 dev $EXTIF2
		elif [[ $LLS1 -eq 0 && $LLS2 -eq 1 ]]; then
			echo Switching to $NAME1 | mail -s "Failover watchdog report" adm
                        ip route replace default scope global via $GW1 dev $EXTIF1
		elif [[ $LLS1 -eq 0 && $LLS2 -eq 0 ]]; then
			echo Restoring default load balancing | mail -s "Failover watchdog report" adm
                        ip route replace default scope global nexthop via $GW1 dev $EXTIF1 weight $W1 nexthop via $GW2 dev $EXTIF2 weight $W2
		fi
	fi
        sleep $SLEEPTIME
done

Скрипт надо запихнуть в inittab следующим образом:

echo "zz:2345:respawn:/usr/local/sbin/gwping.sh" >> /etc/inittab

Интерфейсы настроены так:

# This file describes the network interfaces available on your system
# and how to activate them. For more information, see interfaces(5).

# The loopback network interface
auto lo
iface lo inet loopback

# The primary network interface
allow-hotplug eth0
iface eth0 inet static
	address 10.1.0.1
	netmask 255.255.255.240

# Clients vlan
#auto eth0.102
#iface eth0.102 inet static
#        address 10.102.0.2
#        netmask 255.255.255.0
#        vlan-raw-device eth0

# Uplink to ISP1
auto eth0.20
iface eth0.20 inet static
        address 192.168.1.10
        netmask 255.255.255.0
        vlan-raw-device eth0
        post-up ip route add 192.168.1.1/32 dev eth0.20 src 192.168.1.10 table isp1
        post-up ip route add default via 192.168.1.1 table isp1
        post-up ip rule add from 192.168.1.10 table isp1
        post-down ip rule del from 192.168.1.10 table isp1

# Uplink to ISP2
auto eth0.21
iface eth0.21 inet static
        address 192.168.2.10
        netmask 255.255.255.128
        vlan-raw-device eth0
        post-up ip route add 192.168.2.1/32 dev eth0.21 src 192.168.2.10 table isp2
        post-up ip route add default via 192.168.2.1 table isp2
        post-up ip rule add from 192.168.2.10 table isp2
        post-down ip rule del from 192.168.2.10 table isp2

Идентификаторы таблиц роутинга имеют следующий вид:

#
# reserved values
#
255     local
254     main
253     default
0       unspec
#
# local
#
#1      inr.ruhep
200     isp1
201     isp2

С учётом того что в некоторых случаях используются PPPoE линки, которые не всегда корректно падают и поднимаются - надо допиливать скрипт на использование ifup/ifdown скриптов.

Но с чистыми ethernet подключениями он работает наотличненько.

Leave a Reply

Your email address will not be published. Required fields are marked *

π