xref: /linux/net/ipv4/fib_semantics.c (revision 81f7bf6cbaca02c034b0393c51fc22b29cba20f7)
11da177e4SLinus Torvalds /*
21da177e4SLinus Torvalds  * INET		An implementation of the TCP/IP protocol suite for the LINUX
31da177e4SLinus Torvalds  *		operating system.  INET is implemented using the  BSD Socket
41da177e4SLinus Torvalds  *		interface as the means of communication with the user level.
51da177e4SLinus Torvalds  *
61da177e4SLinus Torvalds  *		IPv4 Forwarding Information Base: semantics.
71da177e4SLinus Torvalds  *
81da177e4SLinus Torvalds  * Version:	$Id: fib_semantics.c,v 1.19 2002/01/12 07:54:56 davem Exp $
91da177e4SLinus Torvalds  *
101da177e4SLinus Torvalds  * Authors:	Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
111da177e4SLinus Torvalds  *
121da177e4SLinus Torvalds  *		This program is free software; you can redistribute it and/or
131da177e4SLinus Torvalds  *		modify it under the terms of the GNU General Public License
141da177e4SLinus Torvalds  *		as published by the Free Software Foundation; either version
151da177e4SLinus Torvalds  *		2 of the License, or (at your option) any later version.
161da177e4SLinus Torvalds  */
171da177e4SLinus Torvalds 
181da177e4SLinus Torvalds #include <asm/uaccess.h>
191da177e4SLinus Torvalds #include <asm/system.h>
201da177e4SLinus Torvalds #include <linux/bitops.h>
211da177e4SLinus Torvalds #include <linux/types.h>
221da177e4SLinus Torvalds #include <linux/kernel.h>
231da177e4SLinus Torvalds #include <linux/jiffies.h>
241da177e4SLinus Torvalds #include <linux/mm.h>
251da177e4SLinus Torvalds #include <linux/string.h>
261da177e4SLinus Torvalds #include <linux/socket.h>
271da177e4SLinus Torvalds #include <linux/sockios.h>
281da177e4SLinus Torvalds #include <linux/errno.h>
291da177e4SLinus Torvalds #include <linux/in.h>
301da177e4SLinus Torvalds #include <linux/inet.h>
3114c85021SArnaldo Carvalho de Melo #include <linux/inetdevice.h>
321da177e4SLinus Torvalds #include <linux/netdevice.h>
331da177e4SLinus Torvalds #include <linux/if_arp.h>
341da177e4SLinus Torvalds #include <linux/proc_fs.h>
351da177e4SLinus Torvalds #include <linux/skbuff.h>
361da177e4SLinus Torvalds #include <linux/init.h>
371da177e4SLinus Torvalds 
3814c85021SArnaldo Carvalho de Melo #include <net/arp.h>
391da177e4SLinus Torvalds #include <net/ip.h>
401da177e4SLinus Torvalds #include <net/protocol.h>
411da177e4SLinus Torvalds #include <net/route.h>
421da177e4SLinus Torvalds #include <net/tcp.h>
431da177e4SLinus Torvalds #include <net/sock.h>
441da177e4SLinus Torvalds #include <net/ip_fib.h>
451da177e4SLinus Torvalds #include <net/ip_mp_alg.h>
46f21c7bc5SThomas Graf #include <net/netlink.h>
474e902c57SThomas Graf #include <net/nexthop.h>
481da177e4SLinus Torvalds 
491da177e4SLinus Torvalds #include "fib_lookup.h"
501da177e4SLinus Torvalds 
511da177e4SLinus Torvalds #define FSprintk(a...)
521da177e4SLinus Torvalds 
53832b4c5eSStephen Hemminger static DEFINE_SPINLOCK(fib_info_lock);
541da177e4SLinus Torvalds static struct hlist_head *fib_info_hash;
551da177e4SLinus Torvalds static struct hlist_head *fib_info_laddrhash;
561da177e4SLinus Torvalds static unsigned int fib_hash_size;
571da177e4SLinus Torvalds static unsigned int fib_info_cnt;
581da177e4SLinus Torvalds 
591da177e4SLinus Torvalds #define DEVINDEX_HASHBITS 8
601da177e4SLinus Torvalds #define DEVINDEX_HASHSIZE (1U << DEVINDEX_HASHBITS)
611da177e4SLinus Torvalds static struct hlist_head fib_info_devhash[DEVINDEX_HASHSIZE];
621da177e4SLinus Torvalds 
631da177e4SLinus Torvalds #ifdef CONFIG_IP_ROUTE_MULTIPATH
641da177e4SLinus Torvalds 
651da177e4SLinus Torvalds static DEFINE_SPINLOCK(fib_multipath_lock);
661da177e4SLinus Torvalds 
671da177e4SLinus Torvalds #define for_nexthops(fi) { int nhsel; const struct fib_nh * nh; \
681da177e4SLinus Torvalds for (nhsel=0, nh = (fi)->fib_nh; nhsel < (fi)->fib_nhs; nh++, nhsel++)
691da177e4SLinus Torvalds 
701da177e4SLinus Torvalds #define change_nexthops(fi) { int nhsel; struct fib_nh * nh; \
711da177e4SLinus Torvalds for (nhsel=0, nh = (struct fib_nh*)((fi)->fib_nh); nhsel < (fi)->fib_nhs; nh++, nhsel++)
721da177e4SLinus Torvalds 
731da177e4SLinus Torvalds #else /* CONFIG_IP_ROUTE_MULTIPATH */
741da177e4SLinus Torvalds 
751da177e4SLinus Torvalds /* Hope, that gcc will optimize it to get rid of dummy loop */
761da177e4SLinus Torvalds 
771da177e4SLinus Torvalds #define for_nexthops(fi) { int nhsel=0; const struct fib_nh * nh = (fi)->fib_nh; \
781da177e4SLinus Torvalds for (nhsel=0; nhsel < 1; nhsel++)
791da177e4SLinus Torvalds 
801da177e4SLinus Torvalds #define change_nexthops(fi) { int nhsel=0; struct fib_nh * nh = (struct fib_nh*)((fi)->fib_nh); \
811da177e4SLinus Torvalds for (nhsel=0; nhsel < 1; nhsel++)
821da177e4SLinus Torvalds 
831da177e4SLinus Torvalds #endif /* CONFIG_IP_ROUTE_MULTIPATH */
841da177e4SLinus Torvalds 
851da177e4SLinus Torvalds #define endfor_nexthops(fi) }
861da177e4SLinus Torvalds 
871da177e4SLinus Torvalds 
889b5b5cffSArjan van de Ven static const struct
891da177e4SLinus Torvalds {
901da177e4SLinus Torvalds 	int	error;
911da177e4SLinus Torvalds 	u8	scope;
921da177e4SLinus Torvalds } fib_props[RTA_MAX + 1] = {
931da177e4SLinus Torvalds         {
941da177e4SLinus Torvalds 		.error	= 0,
951da177e4SLinus Torvalds 		.scope	= RT_SCOPE_NOWHERE,
961da177e4SLinus Torvalds 	},	/* RTN_UNSPEC */
971da177e4SLinus Torvalds 	{
981da177e4SLinus Torvalds 		.error	= 0,
991da177e4SLinus Torvalds 		.scope	= RT_SCOPE_UNIVERSE,
1001da177e4SLinus Torvalds 	},	/* RTN_UNICAST */
1011da177e4SLinus Torvalds 	{
1021da177e4SLinus Torvalds 		.error	= 0,
1031da177e4SLinus Torvalds 		.scope	= RT_SCOPE_HOST,
1041da177e4SLinus Torvalds 	},	/* RTN_LOCAL */
1051da177e4SLinus Torvalds 	{
1061da177e4SLinus Torvalds 		.error	= 0,
1071da177e4SLinus Torvalds 		.scope	= RT_SCOPE_LINK,
1081da177e4SLinus Torvalds 	},	/* RTN_BROADCAST */
1091da177e4SLinus Torvalds 	{
1101da177e4SLinus Torvalds 		.error	= 0,
1111da177e4SLinus Torvalds 		.scope	= RT_SCOPE_LINK,
1121da177e4SLinus Torvalds 	},	/* RTN_ANYCAST */
1131da177e4SLinus Torvalds 	{
1141da177e4SLinus Torvalds 		.error	= 0,
1151da177e4SLinus Torvalds 		.scope	= RT_SCOPE_UNIVERSE,
1161da177e4SLinus Torvalds 	},	/* RTN_MULTICAST */
1171da177e4SLinus Torvalds 	{
1181da177e4SLinus Torvalds 		.error	= -EINVAL,
1191da177e4SLinus Torvalds 		.scope	= RT_SCOPE_UNIVERSE,
1201da177e4SLinus Torvalds 	},	/* RTN_BLACKHOLE */
1211da177e4SLinus Torvalds 	{
1221da177e4SLinus Torvalds 		.error	= -EHOSTUNREACH,
1231da177e4SLinus Torvalds 		.scope	= RT_SCOPE_UNIVERSE,
1241da177e4SLinus Torvalds 	},	/* RTN_UNREACHABLE */
1251da177e4SLinus Torvalds 	{
1261da177e4SLinus Torvalds 		.error	= -EACCES,
1271da177e4SLinus Torvalds 		.scope	= RT_SCOPE_UNIVERSE,
1281da177e4SLinus Torvalds 	},	/* RTN_PROHIBIT */
1291da177e4SLinus Torvalds 	{
1301da177e4SLinus Torvalds 		.error	= -EAGAIN,
1311da177e4SLinus Torvalds 		.scope	= RT_SCOPE_UNIVERSE,
1321da177e4SLinus Torvalds 	},	/* RTN_THROW */
1331da177e4SLinus Torvalds 	{
1341da177e4SLinus Torvalds 		.error	= -EINVAL,
1351da177e4SLinus Torvalds 		.scope	= RT_SCOPE_NOWHERE,
1361da177e4SLinus Torvalds 	},	/* RTN_NAT */
1371da177e4SLinus Torvalds 	{
1381da177e4SLinus Torvalds 		.error	= -EINVAL,
1391da177e4SLinus Torvalds 		.scope	= RT_SCOPE_NOWHERE,
1401da177e4SLinus Torvalds 	},	/* RTN_XRESOLVE */
1411da177e4SLinus Torvalds };
1421da177e4SLinus Torvalds 
1431da177e4SLinus Torvalds 
1441da177e4SLinus Torvalds /* Release a nexthop info record */
1451da177e4SLinus Torvalds 
1461da177e4SLinus Torvalds void free_fib_info(struct fib_info *fi)
1471da177e4SLinus Torvalds {
1481da177e4SLinus Torvalds 	if (fi->fib_dead == 0) {
1491da177e4SLinus Torvalds 		printk("Freeing alive fib_info %p\n", fi);
1501da177e4SLinus Torvalds 		return;
1511da177e4SLinus Torvalds 	}
1521da177e4SLinus Torvalds 	change_nexthops(fi) {
1531da177e4SLinus Torvalds 		if (nh->nh_dev)
1541da177e4SLinus Torvalds 			dev_put(nh->nh_dev);
1551da177e4SLinus Torvalds 		nh->nh_dev = NULL;
1561da177e4SLinus Torvalds 	} endfor_nexthops(fi);
1571da177e4SLinus Torvalds 	fib_info_cnt--;
1581da177e4SLinus Torvalds 	kfree(fi);
1591da177e4SLinus Torvalds }
1601da177e4SLinus Torvalds 
1611da177e4SLinus Torvalds void fib_release_info(struct fib_info *fi)
1621da177e4SLinus Torvalds {
163832b4c5eSStephen Hemminger 	spin_lock_bh(&fib_info_lock);
1641da177e4SLinus Torvalds 	if (fi && --fi->fib_treeref == 0) {
1651da177e4SLinus Torvalds 		hlist_del(&fi->fib_hash);
1661da177e4SLinus Torvalds 		if (fi->fib_prefsrc)
1671da177e4SLinus Torvalds 			hlist_del(&fi->fib_lhash);
1681da177e4SLinus Torvalds 		change_nexthops(fi) {
1691da177e4SLinus Torvalds 			if (!nh->nh_dev)
1701da177e4SLinus Torvalds 				continue;
1711da177e4SLinus Torvalds 			hlist_del(&nh->nh_hash);
1721da177e4SLinus Torvalds 		} endfor_nexthops(fi)
1731da177e4SLinus Torvalds 		fi->fib_dead = 1;
1741da177e4SLinus Torvalds 		fib_info_put(fi);
1751da177e4SLinus Torvalds 	}
176832b4c5eSStephen Hemminger 	spin_unlock_bh(&fib_info_lock);
1771da177e4SLinus Torvalds }
1781da177e4SLinus Torvalds 
1791da177e4SLinus Torvalds static __inline__ int nh_comp(const struct fib_info *fi, const struct fib_info *ofi)
1801da177e4SLinus Torvalds {
1811da177e4SLinus Torvalds 	const struct fib_nh *onh = ofi->fib_nh;
1821da177e4SLinus Torvalds 
1831da177e4SLinus Torvalds 	for_nexthops(fi) {
1841da177e4SLinus Torvalds 		if (nh->nh_oif != onh->nh_oif ||
1851da177e4SLinus Torvalds 		    nh->nh_gw  != onh->nh_gw ||
1861da177e4SLinus Torvalds 		    nh->nh_scope != onh->nh_scope ||
1871da177e4SLinus Torvalds #ifdef CONFIG_IP_ROUTE_MULTIPATH
1881da177e4SLinus Torvalds 		    nh->nh_weight != onh->nh_weight ||
1891da177e4SLinus Torvalds #endif
1901da177e4SLinus Torvalds #ifdef CONFIG_NET_CLS_ROUTE
1911da177e4SLinus Torvalds 		    nh->nh_tclassid != onh->nh_tclassid ||
1921da177e4SLinus Torvalds #endif
1931da177e4SLinus Torvalds 		    ((nh->nh_flags^onh->nh_flags)&~RTNH_F_DEAD))
1941da177e4SLinus Torvalds 			return -1;
1951da177e4SLinus Torvalds 		onh++;
1961da177e4SLinus Torvalds 	} endfor_nexthops(fi);
1971da177e4SLinus Torvalds 	return 0;
1981da177e4SLinus Torvalds }
1991da177e4SLinus Torvalds 
2001da177e4SLinus Torvalds static inline unsigned int fib_info_hashfn(const struct fib_info *fi)
2011da177e4SLinus Torvalds {
2021da177e4SLinus Torvalds 	unsigned int mask = (fib_hash_size - 1);
2031da177e4SLinus Torvalds 	unsigned int val = fi->fib_nhs;
2041da177e4SLinus Torvalds 
2051da177e4SLinus Torvalds 	val ^= fi->fib_protocol;
206*81f7bf6cSAl Viro 	val ^= (__force u32)fi->fib_prefsrc;
2071da177e4SLinus Torvalds 	val ^= fi->fib_priority;
2081da177e4SLinus Torvalds 
2091da177e4SLinus Torvalds 	return (val ^ (val >> 7) ^ (val >> 12)) & mask;
2101da177e4SLinus Torvalds }
2111da177e4SLinus Torvalds 
2121da177e4SLinus Torvalds static struct fib_info *fib_find_info(const struct fib_info *nfi)
2131da177e4SLinus Torvalds {
2141da177e4SLinus Torvalds 	struct hlist_head *head;
2151da177e4SLinus Torvalds 	struct hlist_node *node;
2161da177e4SLinus Torvalds 	struct fib_info *fi;
2171da177e4SLinus Torvalds 	unsigned int hash;
2181da177e4SLinus Torvalds 
2191da177e4SLinus Torvalds 	hash = fib_info_hashfn(nfi);
2201da177e4SLinus Torvalds 	head = &fib_info_hash[hash];
2211da177e4SLinus Torvalds 
2221da177e4SLinus Torvalds 	hlist_for_each_entry(fi, node, head, fib_hash) {
2231da177e4SLinus Torvalds 		if (fi->fib_nhs != nfi->fib_nhs)
2241da177e4SLinus Torvalds 			continue;
2251da177e4SLinus Torvalds 		if (nfi->fib_protocol == fi->fib_protocol &&
2261da177e4SLinus Torvalds 		    nfi->fib_prefsrc == fi->fib_prefsrc &&
2271da177e4SLinus Torvalds 		    nfi->fib_priority == fi->fib_priority &&
2281da177e4SLinus Torvalds 		    memcmp(nfi->fib_metrics, fi->fib_metrics,
2291da177e4SLinus Torvalds 			   sizeof(fi->fib_metrics)) == 0 &&
2301da177e4SLinus Torvalds 		    ((nfi->fib_flags^fi->fib_flags)&~RTNH_F_DEAD) == 0 &&
2311da177e4SLinus Torvalds 		    (nfi->fib_nhs == 0 || nh_comp(fi, nfi) == 0))
2321da177e4SLinus Torvalds 			return fi;
2331da177e4SLinus Torvalds 	}
2341da177e4SLinus Torvalds 
2351da177e4SLinus Torvalds 	return NULL;
2361da177e4SLinus Torvalds }
2371da177e4SLinus Torvalds 
2381da177e4SLinus Torvalds static inline unsigned int fib_devindex_hashfn(unsigned int val)
2391da177e4SLinus Torvalds {
2401da177e4SLinus Torvalds 	unsigned int mask = DEVINDEX_HASHSIZE - 1;
2411da177e4SLinus Torvalds 
2421da177e4SLinus Torvalds 	return (val ^
2431da177e4SLinus Torvalds 		(val >> DEVINDEX_HASHBITS) ^
2441da177e4SLinus Torvalds 		(val >> (DEVINDEX_HASHBITS * 2))) & mask;
2451da177e4SLinus Torvalds }
2461da177e4SLinus Torvalds 
2471da177e4SLinus Torvalds /* Check, that the gateway is already configured.
2481da177e4SLinus Torvalds    Used only by redirect accept routine.
2491da177e4SLinus Torvalds  */
2501da177e4SLinus Torvalds 
251d878e72eSAl Viro int ip_fib_check_default(__be32 gw, struct net_device *dev)
2521da177e4SLinus Torvalds {
2531da177e4SLinus Torvalds 	struct hlist_head *head;
2541da177e4SLinus Torvalds 	struct hlist_node *node;
2551da177e4SLinus Torvalds 	struct fib_nh *nh;
2561da177e4SLinus Torvalds 	unsigned int hash;
2571da177e4SLinus Torvalds 
258832b4c5eSStephen Hemminger 	spin_lock(&fib_info_lock);
2591da177e4SLinus Torvalds 
2601da177e4SLinus Torvalds 	hash = fib_devindex_hashfn(dev->ifindex);
2611da177e4SLinus Torvalds 	head = &fib_info_devhash[hash];
2621da177e4SLinus Torvalds 	hlist_for_each_entry(nh, node, head, nh_hash) {
2631da177e4SLinus Torvalds 		if (nh->nh_dev == dev &&
2641da177e4SLinus Torvalds 		    nh->nh_gw == gw &&
2651da177e4SLinus Torvalds 		    !(nh->nh_flags&RTNH_F_DEAD)) {
266832b4c5eSStephen Hemminger 			spin_unlock(&fib_info_lock);
2671da177e4SLinus Torvalds 			return 0;
2681da177e4SLinus Torvalds 		}
2691da177e4SLinus Torvalds 	}
2701da177e4SLinus Torvalds 
271832b4c5eSStephen Hemminger 	spin_unlock(&fib_info_lock);
2721da177e4SLinus Torvalds 
2731da177e4SLinus Torvalds 	return -1;
2741da177e4SLinus Torvalds }
2751da177e4SLinus Torvalds 
276*81f7bf6cSAl Viro void rtmsg_fib(int event, __be32 key, struct fib_alias *fa,
2774e902c57SThomas Graf 	       int dst_len, u32 tb_id, struct nl_info *info)
2781da177e4SLinus Torvalds {
2791da177e4SLinus Torvalds 	struct sk_buff *skb;
280f21c7bc5SThomas Graf 	int payload = sizeof(struct rtmsg) + 256;
2814e902c57SThomas Graf 	u32 seq = info->nlh ? info->nlh->nlmsg_seq : 0;
282f21c7bc5SThomas Graf 	int err = -ENOBUFS;
2831da177e4SLinus Torvalds 
284f21c7bc5SThomas Graf 	skb = nlmsg_new(nlmsg_total_size(payload), GFP_KERNEL);
285f21c7bc5SThomas Graf 	if (skb == NULL)
286f21c7bc5SThomas Graf 		goto errout;
2871da177e4SLinus Torvalds 
2884e902c57SThomas Graf 	err = fib_dump_info(skb, info->pid, seq, event, tb_id,
289be403ea1SThomas Graf 			    fa->fa_type, fa->fa_scope, key, dst_len,
2904e902c57SThomas Graf 			    fa->fa_tos, fa->fa_info, 0);
291f21c7bc5SThomas Graf 	if (err < 0) {
2921da177e4SLinus Torvalds 		kfree_skb(skb);
293f21c7bc5SThomas Graf 		goto errout;
2941da177e4SLinus Torvalds 	}
295f21c7bc5SThomas Graf 
2964e902c57SThomas Graf 	err = rtnl_notify(skb, info->pid, RTNLGRP_IPV4_ROUTE,
2974e902c57SThomas Graf 			  info->nlh, GFP_KERNEL);
298f21c7bc5SThomas Graf errout:
299f21c7bc5SThomas Graf 	if (err < 0)
300f21c7bc5SThomas Graf 		rtnl_set_sk_err(RTNLGRP_IPV4_ROUTE, err);
3011da177e4SLinus Torvalds }
3021da177e4SLinus Torvalds 
3031da177e4SLinus Torvalds /* Return the first fib alias matching TOS with
3041da177e4SLinus Torvalds  * priority less than or equal to PRIO.
3051da177e4SLinus Torvalds  */
3061da177e4SLinus Torvalds struct fib_alias *fib_find_alias(struct list_head *fah, u8 tos, u32 prio)
3071da177e4SLinus Torvalds {
3081da177e4SLinus Torvalds 	if (fah) {
3091da177e4SLinus Torvalds 		struct fib_alias *fa;
3101da177e4SLinus Torvalds 		list_for_each_entry(fa, fah, fa_list) {
3111da177e4SLinus Torvalds 			if (fa->fa_tos > tos)
3121da177e4SLinus Torvalds 				continue;
3131da177e4SLinus Torvalds 			if (fa->fa_info->fib_priority >= prio ||
3141da177e4SLinus Torvalds 			    fa->fa_tos < tos)
3151da177e4SLinus Torvalds 				return fa;
3161da177e4SLinus Torvalds 		}
3171da177e4SLinus Torvalds 	}
3181da177e4SLinus Torvalds 	return NULL;
3191da177e4SLinus Torvalds }
3201da177e4SLinus Torvalds 
3211da177e4SLinus Torvalds int fib_detect_death(struct fib_info *fi, int order,
3221da177e4SLinus Torvalds 		     struct fib_info **last_resort, int *last_idx, int *dflt)
3231da177e4SLinus Torvalds {
3241da177e4SLinus Torvalds 	struct neighbour *n;
3251da177e4SLinus Torvalds 	int state = NUD_NONE;
3261da177e4SLinus Torvalds 
3271da177e4SLinus Torvalds 	n = neigh_lookup(&arp_tbl, &fi->fib_nh[0].nh_gw, fi->fib_dev);
3281da177e4SLinus Torvalds 	if (n) {
3291da177e4SLinus Torvalds 		state = n->nud_state;
3301da177e4SLinus Torvalds 		neigh_release(n);
3311da177e4SLinus Torvalds 	}
3321da177e4SLinus Torvalds 	if (state==NUD_REACHABLE)
3331da177e4SLinus Torvalds 		return 0;
3341da177e4SLinus Torvalds 	if ((state&NUD_VALID) && order != *dflt)
3351da177e4SLinus Torvalds 		return 0;
3361da177e4SLinus Torvalds 	if ((state&NUD_VALID) ||
3371da177e4SLinus Torvalds 	    (*last_idx<0 && order > *dflt)) {
3381da177e4SLinus Torvalds 		*last_resort = fi;
3391da177e4SLinus Torvalds 		*last_idx = order;
3401da177e4SLinus Torvalds 	}
3411da177e4SLinus Torvalds 	return 1;
3421da177e4SLinus Torvalds }
3431da177e4SLinus Torvalds 
3441da177e4SLinus Torvalds #ifdef CONFIG_IP_ROUTE_MULTIPATH
3451da177e4SLinus Torvalds 
3464e902c57SThomas Graf static int fib_count_nexthops(struct rtnexthop *rtnh, int remaining)
3471da177e4SLinus Torvalds {
3481da177e4SLinus Torvalds 	int nhs = 0;
3491da177e4SLinus Torvalds 
3504e902c57SThomas Graf 	while (rtnh_ok(rtnh, remaining)) {
3511da177e4SLinus Torvalds 		nhs++;
3524e902c57SThomas Graf 		rtnh = rtnh_next(rtnh, &remaining);
3531da177e4SLinus Torvalds 	}
3541da177e4SLinus Torvalds 
3554e902c57SThomas Graf 	/* leftover implies invalid nexthop configuration, discard it */
3564e902c57SThomas Graf 	return remaining > 0 ? 0 : nhs;
3574e902c57SThomas Graf }
3581da177e4SLinus Torvalds 
3594e902c57SThomas Graf static int fib_get_nhs(struct fib_info *fi, struct rtnexthop *rtnh,
3604e902c57SThomas Graf 		       int remaining, struct fib_config *cfg)
3614e902c57SThomas Graf {
3621da177e4SLinus Torvalds 	change_nexthops(fi) {
3634e902c57SThomas Graf 		int attrlen;
3644e902c57SThomas Graf 
3654e902c57SThomas Graf 		if (!rtnh_ok(rtnh, remaining))
3661da177e4SLinus Torvalds 			return -EINVAL;
3674e902c57SThomas Graf 
3684e902c57SThomas Graf 		nh->nh_flags = (cfg->fc_flags & ~0xFF) | rtnh->rtnh_flags;
3694e902c57SThomas Graf 		nh->nh_oif = rtnh->rtnh_ifindex;
3704e902c57SThomas Graf 		nh->nh_weight = rtnh->rtnh_hops + 1;
3714e902c57SThomas Graf 
3724e902c57SThomas Graf 		attrlen = rtnh_attrlen(rtnh);
3734e902c57SThomas Graf 		if (attrlen > 0) {
3744e902c57SThomas Graf 			struct nlattr *nla, *attrs = rtnh_attrs(rtnh);
3754e902c57SThomas Graf 
3764e902c57SThomas Graf 			nla = nla_find(attrs, attrlen, RTA_GATEWAY);
37717fb2c64SAl Viro 			nh->nh_gw = nla ? nla_get_be32(nla) : 0;
3781da177e4SLinus Torvalds #ifdef CONFIG_NET_CLS_ROUTE
3794e902c57SThomas Graf 			nla = nla_find(attrs, attrlen, RTA_FLOW);
3804e902c57SThomas Graf 			nh->nh_tclassid = nla ? nla_get_u32(nla) : 0;
3811da177e4SLinus Torvalds #endif
3821da177e4SLinus Torvalds 		}
3834e902c57SThomas Graf 
3844e902c57SThomas Graf 		rtnh = rtnh_next(rtnh, &remaining);
3851da177e4SLinus Torvalds 	} endfor_nexthops(fi);
3864e902c57SThomas Graf 
3871da177e4SLinus Torvalds 	return 0;
3881da177e4SLinus Torvalds }
3891da177e4SLinus Torvalds 
3901da177e4SLinus Torvalds #endif
3911da177e4SLinus Torvalds 
3924e902c57SThomas Graf int fib_nh_match(struct fib_config *cfg, struct fib_info *fi)
3931da177e4SLinus Torvalds {
3941da177e4SLinus Torvalds #ifdef CONFIG_IP_ROUTE_MULTIPATH
3954e902c57SThomas Graf 	struct rtnexthop *rtnh;
3964e902c57SThomas Graf 	int remaining;
3971da177e4SLinus Torvalds #endif
3981da177e4SLinus Torvalds 
3994e902c57SThomas Graf 	if (cfg->fc_priority && cfg->fc_priority != fi->fib_priority)
4001da177e4SLinus Torvalds 		return 1;
4011da177e4SLinus Torvalds 
4024e902c57SThomas Graf 	if (cfg->fc_oif || cfg->fc_gw) {
4034e902c57SThomas Graf 		if ((!cfg->fc_oif || cfg->fc_oif == fi->fib_nh->nh_oif) &&
4044e902c57SThomas Graf 		    (!cfg->fc_gw  || cfg->fc_gw == fi->fib_nh->nh_gw))
4051da177e4SLinus Torvalds 			return 0;
4061da177e4SLinus Torvalds 		return 1;
4071da177e4SLinus Torvalds 	}
4081da177e4SLinus Torvalds 
4091da177e4SLinus Torvalds #ifdef CONFIG_IP_ROUTE_MULTIPATH
4104e902c57SThomas Graf 	if (cfg->fc_mp == NULL)
4111da177e4SLinus Torvalds 		return 0;
4124e902c57SThomas Graf 
4134e902c57SThomas Graf 	rtnh = cfg->fc_mp;
4144e902c57SThomas Graf 	remaining = cfg->fc_mp_len;
4151da177e4SLinus Torvalds 
4161da177e4SLinus Torvalds 	for_nexthops(fi) {
4174e902c57SThomas Graf 		int attrlen;
4181da177e4SLinus Torvalds 
4194e902c57SThomas Graf 		if (!rtnh_ok(rtnh, remaining))
4201da177e4SLinus Torvalds 			return -EINVAL;
4214e902c57SThomas Graf 
4224e902c57SThomas Graf 		if (rtnh->rtnh_ifindex && rtnh->rtnh_ifindex != nh->nh_oif)
4231da177e4SLinus Torvalds 			return 1;
4244e902c57SThomas Graf 
4254e902c57SThomas Graf 		attrlen = rtnh_attrlen(rtnh);
4264e902c57SThomas Graf 		if (attrlen < 0) {
4274e902c57SThomas Graf 			struct nlattr *nla, *attrs = rtnh_attrs(rtnh);
4284e902c57SThomas Graf 
4294e902c57SThomas Graf 			nla = nla_find(attrs, attrlen, RTA_GATEWAY);
43017fb2c64SAl Viro 			if (nla && nla_get_be32(nla) != nh->nh_gw)
4311da177e4SLinus Torvalds 				return 1;
4321da177e4SLinus Torvalds #ifdef CONFIG_NET_CLS_ROUTE
4334e902c57SThomas Graf 			nla = nla_find(attrs, attrlen, RTA_FLOW);
4344e902c57SThomas Graf 			if (nla && nla_get_u32(nla) != nh->nh_tclassid)
4351da177e4SLinus Torvalds 				return 1;
4361da177e4SLinus Torvalds #endif
4371da177e4SLinus Torvalds 		}
4384e902c57SThomas Graf 
4394e902c57SThomas Graf 		rtnh = rtnh_next(rtnh, &remaining);
4401da177e4SLinus Torvalds 	} endfor_nexthops(fi);
4411da177e4SLinus Torvalds #endif
4421da177e4SLinus Torvalds 	return 0;
4431da177e4SLinus Torvalds }
4441da177e4SLinus Torvalds 
4451da177e4SLinus Torvalds 
4461da177e4SLinus Torvalds /*
4471da177e4SLinus Torvalds    Picture
4481da177e4SLinus Torvalds    -------
4491da177e4SLinus Torvalds 
4501da177e4SLinus Torvalds    Semantics of nexthop is very messy by historical reasons.
4511da177e4SLinus Torvalds    We have to take into account, that:
4521da177e4SLinus Torvalds    a) gateway can be actually local interface address,
4531da177e4SLinus Torvalds       so that gatewayed route is direct.
4541da177e4SLinus Torvalds    b) gateway must be on-link address, possibly
4551da177e4SLinus Torvalds       described not by an ifaddr, but also by a direct route.
4561da177e4SLinus Torvalds    c) If both gateway and interface are specified, they should not
4571da177e4SLinus Torvalds       contradict.
4581da177e4SLinus Torvalds    d) If we use tunnel routes, gateway could be not on-link.
4591da177e4SLinus Torvalds 
4601da177e4SLinus Torvalds    Attempt to reconcile all of these (alas, self-contradictory) conditions
4611da177e4SLinus Torvalds    results in pretty ugly and hairy code with obscure logic.
4621da177e4SLinus Torvalds 
4631da177e4SLinus Torvalds    I chose to generalized it instead, so that the size
4641da177e4SLinus Torvalds    of code does not increase practically, but it becomes
4651da177e4SLinus Torvalds    much more general.
4661da177e4SLinus Torvalds    Every prefix is assigned a "scope" value: "host" is local address,
4671da177e4SLinus Torvalds    "link" is direct route,
4681da177e4SLinus Torvalds    [ ... "site" ... "interior" ... ]
4691da177e4SLinus Torvalds    and "universe" is true gateway route with global meaning.
4701da177e4SLinus Torvalds 
4711da177e4SLinus Torvalds    Every prefix refers to a set of "nexthop"s (gw, oif),
4721da177e4SLinus Torvalds    where gw must have narrower scope. This recursion stops
4731da177e4SLinus Torvalds    when gw has LOCAL scope or if "nexthop" is declared ONLINK,
4741da177e4SLinus Torvalds    which means that gw is forced to be on link.
4751da177e4SLinus Torvalds 
4761da177e4SLinus Torvalds    Code is still hairy, but now it is apparently logically
4771da177e4SLinus Torvalds    consistent and very flexible. F.e. as by-product it allows
4781da177e4SLinus Torvalds    to co-exists in peace independent exterior and interior
4791da177e4SLinus Torvalds    routing processes.
4801da177e4SLinus Torvalds 
4811da177e4SLinus Torvalds    Normally it looks as following.
4821da177e4SLinus Torvalds 
4831da177e4SLinus Torvalds    {universe prefix}  -> (gw, oif) [scope link]
4841da177e4SLinus Torvalds                           |
4851da177e4SLinus Torvalds 			  |-> {link prefix} -> (gw, oif) [scope local]
4861da177e4SLinus Torvalds 			                        |
4871da177e4SLinus Torvalds 						|-> {local prefix} (terminal node)
4881da177e4SLinus Torvalds  */
4891da177e4SLinus Torvalds 
4904e902c57SThomas Graf static int fib_check_nh(struct fib_config *cfg, struct fib_info *fi,
4914e902c57SThomas Graf 			struct fib_nh *nh)
4921da177e4SLinus Torvalds {
4931da177e4SLinus Torvalds 	int err;
4941da177e4SLinus Torvalds 
4951da177e4SLinus Torvalds 	if (nh->nh_gw) {
4961da177e4SLinus Torvalds 		struct fib_result res;
4971da177e4SLinus Torvalds 
4981da177e4SLinus Torvalds #ifdef CONFIG_IP_ROUTE_PERVASIVE
4991da177e4SLinus Torvalds 		if (nh->nh_flags&RTNH_F_PERVASIVE)
5001da177e4SLinus Torvalds 			return 0;
5011da177e4SLinus Torvalds #endif
5021da177e4SLinus Torvalds 		if (nh->nh_flags&RTNH_F_ONLINK) {
5031da177e4SLinus Torvalds 			struct net_device *dev;
5041da177e4SLinus Torvalds 
5054e902c57SThomas Graf 			if (cfg->fc_scope >= RT_SCOPE_LINK)
5061da177e4SLinus Torvalds 				return -EINVAL;
5071da177e4SLinus Torvalds 			if (inet_addr_type(nh->nh_gw) != RTN_UNICAST)
5081da177e4SLinus Torvalds 				return -EINVAL;
5091da177e4SLinus Torvalds 			if ((dev = __dev_get_by_index(nh->nh_oif)) == NULL)
5101da177e4SLinus Torvalds 				return -ENODEV;
5111da177e4SLinus Torvalds 			if (!(dev->flags&IFF_UP))
5121da177e4SLinus Torvalds 				return -ENETDOWN;
5131da177e4SLinus Torvalds 			nh->nh_dev = dev;
5141da177e4SLinus Torvalds 			dev_hold(dev);
5151da177e4SLinus Torvalds 			nh->nh_scope = RT_SCOPE_LINK;
5161da177e4SLinus Torvalds 			return 0;
5171da177e4SLinus Torvalds 		}
5181da177e4SLinus Torvalds 		{
5194e902c57SThomas Graf 			struct flowi fl = {
5204e902c57SThomas Graf 				.nl_u = {
5214e902c57SThomas Graf 					.ip4_u = {
5224e902c57SThomas Graf 						.daddr = nh->nh_gw,
5234e902c57SThomas Graf 						.scope = cfg->fc_scope + 1,
5244e902c57SThomas Graf 					},
5254e902c57SThomas Graf 				},
5264e902c57SThomas Graf 				.oif = nh->nh_oif,
5274e902c57SThomas Graf 			};
5281da177e4SLinus Torvalds 
5291da177e4SLinus Torvalds 			/* It is not necessary, but requires a bit of thinking */
5301da177e4SLinus Torvalds 			if (fl.fl4_scope < RT_SCOPE_LINK)
5311da177e4SLinus Torvalds 				fl.fl4_scope = RT_SCOPE_LINK;
5321da177e4SLinus Torvalds 			if ((err = fib_lookup(&fl, &res)) != 0)
5331da177e4SLinus Torvalds 				return err;
5341da177e4SLinus Torvalds 		}
5351da177e4SLinus Torvalds 		err = -EINVAL;
5361da177e4SLinus Torvalds 		if (res.type != RTN_UNICAST && res.type != RTN_LOCAL)
5371da177e4SLinus Torvalds 			goto out;
5381da177e4SLinus Torvalds 		nh->nh_scope = res.scope;
5391da177e4SLinus Torvalds 		nh->nh_oif = FIB_RES_OIF(res);
5401da177e4SLinus Torvalds 		if ((nh->nh_dev = FIB_RES_DEV(res)) == NULL)
5411da177e4SLinus Torvalds 			goto out;
5421da177e4SLinus Torvalds 		dev_hold(nh->nh_dev);
5431da177e4SLinus Torvalds 		err = -ENETDOWN;
5441da177e4SLinus Torvalds 		if (!(nh->nh_dev->flags & IFF_UP))
5451da177e4SLinus Torvalds 			goto out;
5461da177e4SLinus Torvalds 		err = 0;
5471da177e4SLinus Torvalds out:
5481da177e4SLinus Torvalds 		fib_res_put(&res);
5491da177e4SLinus Torvalds 		return err;
5501da177e4SLinus Torvalds 	} else {
5511da177e4SLinus Torvalds 		struct in_device *in_dev;
5521da177e4SLinus Torvalds 
5531da177e4SLinus Torvalds 		if (nh->nh_flags&(RTNH_F_PERVASIVE|RTNH_F_ONLINK))
5541da177e4SLinus Torvalds 			return -EINVAL;
5551da177e4SLinus Torvalds 
5561da177e4SLinus Torvalds 		in_dev = inetdev_by_index(nh->nh_oif);
5571da177e4SLinus Torvalds 		if (in_dev == NULL)
5581da177e4SLinus Torvalds 			return -ENODEV;
5591da177e4SLinus Torvalds 		if (!(in_dev->dev->flags&IFF_UP)) {
5601da177e4SLinus Torvalds 			in_dev_put(in_dev);
5611da177e4SLinus Torvalds 			return -ENETDOWN;
5621da177e4SLinus Torvalds 		}
5631da177e4SLinus Torvalds 		nh->nh_dev = in_dev->dev;
5641da177e4SLinus Torvalds 		dev_hold(nh->nh_dev);
5651da177e4SLinus Torvalds 		nh->nh_scope = RT_SCOPE_HOST;
5661da177e4SLinus Torvalds 		in_dev_put(in_dev);
5671da177e4SLinus Torvalds 	}
5681da177e4SLinus Torvalds 	return 0;
5691da177e4SLinus Torvalds }
5701da177e4SLinus Torvalds 
571*81f7bf6cSAl Viro static inline unsigned int fib_laddr_hashfn(__be32 val)
5721da177e4SLinus Torvalds {
5731da177e4SLinus Torvalds 	unsigned int mask = (fib_hash_size - 1);
5741da177e4SLinus Torvalds 
575*81f7bf6cSAl Viro 	return ((__force u32)val ^ ((__force u32)val >> 7) ^ ((__force u32)val >> 14)) & mask;
5761da177e4SLinus Torvalds }
5771da177e4SLinus Torvalds 
5781da177e4SLinus Torvalds static struct hlist_head *fib_hash_alloc(int bytes)
5791da177e4SLinus Torvalds {
5801da177e4SLinus Torvalds 	if (bytes <= PAGE_SIZE)
5811da177e4SLinus Torvalds 		return kmalloc(bytes, GFP_KERNEL);
5821da177e4SLinus Torvalds 	else
5831da177e4SLinus Torvalds 		return (struct hlist_head *)
5841da177e4SLinus Torvalds 			__get_free_pages(GFP_KERNEL, get_order(bytes));
5851da177e4SLinus Torvalds }
5861da177e4SLinus Torvalds 
5871da177e4SLinus Torvalds static void fib_hash_free(struct hlist_head *hash, int bytes)
5881da177e4SLinus Torvalds {
5891da177e4SLinus Torvalds 	if (!hash)
5901da177e4SLinus Torvalds 		return;
5911da177e4SLinus Torvalds 
5921da177e4SLinus Torvalds 	if (bytes <= PAGE_SIZE)
5931da177e4SLinus Torvalds 		kfree(hash);
5941da177e4SLinus Torvalds 	else
5951da177e4SLinus Torvalds 		free_pages((unsigned long) hash, get_order(bytes));
5961da177e4SLinus Torvalds }
5971da177e4SLinus Torvalds 
5981da177e4SLinus Torvalds static void fib_hash_move(struct hlist_head *new_info_hash,
5991da177e4SLinus Torvalds 			  struct hlist_head *new_laddrhash,
6001da177e4SLinus Torvalds 			  unsigned int new_size)
6011da177e4SLinus Torvalds {
602b7656e7fSDavid S. Miller 	struct hlist_head *old_info_hash, *old_laddrhash;
6031da177e4SLinus Torvalds 	unsigned int old_size = fib_hash_size;
604b7656e7fSDavid S. Miller 	unsigned int i, bytes;
6051da177e4SLinus Torvalds 
606832b4c5eSStephen Hemminger 	spin_lock_bh(&fib_info_lock);
607b7656e7fSDavid S. Miller 	old_info_hash = fib_info_hash;
608b7656e7fSDavid S. Miller 	old_laddrhash = fib_info_laddrhash;
6091da177e4SLinus Torvalds 	fib_hash_size = new_size;
6101da177e4SLinus Torvalds 
6111da177e4SLinus Torvalds 	for (i = 0; i < old_size; i++) {
6121da177e4SLinus Torvalds 		struct hlist_head *head = &fib_info_hash[i];
6131da177e4SLinus Torvalds 		struct hlist_node *node, *n;
6141da177e4SLinus Torvalds 		struct fib_info *fi;
6151da177e4SLinus Torvalds 
6161da177e4SLinus Torvalds 		hlist_for_each_entry_safe(fi, node, n, head, fib_hash) {
6171da177e4SLinus Torvalds 			struct hlist_head *dest;
6181da177e4SLinus Torvalds 			unsigned int new_hash;
6191da177e4SLinus Torvalds 
6201da177e4SLinus Torvalds 			hlist_del(&fi->fib_hash);
6211da177e4SLinus Torvalds 
6221da177e4SLinus Torvalds 			new_hash = fib_info_hashfn(fi);
6231da177e4SLinus Torvalds 			dest = &new_info_hash[new_hash];
6241da177e4SLinus Torvalds 			hlist_add_head(&fi->fib_hash, dest);
6251da177e4SLinus Torvalds 		}
6261da177e4SLinus Torvalds 	}
6271da177e4SLinus Torvalds 	fib_info_hash = new_info_hash;
6281da177e4SLinus Torvalds 
6291da177e4SLinus Torvalds 	for (i = 0; i < old_size; i++) {
6301da177e4SLinus Torvalds 		struct hlist_head *lhead = &fib_info_laddrhash[i];
6311da177e4SLinus Torvalds 		struct hlist_node *node, *n;
6321da177e4SLinus Torvalds 		struct fib_info *fi;
6331da177e4SLinus Torvalds 
6341da177e4SLinus Torvalds 		hlist_for_each_entry_safe(fi, node, n, lhead, fib_lhash) {
6351da177e4SLinus Torvalds 			struct hlist_head *ldest;
6361da177e4SLinus Torvalds 			unsigned int new_hash;
6371da177e4SLinus Torvalds 
6381da177e4SLinus Torvalds 			hlist_del(&fi->fib_lhash);
6391da177e4SLinus Torvalds 
6401da177e4SLinus Torvalds 			new_hash = fib_laddr_hashfn(fi->fib_prefsrc);
6411da177e4SLinus Torvalds 			ldest = &new_laddrhash[new_hash];
6421da177e4SLinus Torvalds 			hlist_add_head(&fi->fib_lhash, ldest);
6431da177e4SLinus Torvalds 		}
6441da177e4SLinus Torvalds 	}
6451da177e4SLinus Torvalds 	fib_info_laddrhash = new_laddrhash;
6461da177e4SLinus Torvalds 
647832b4c5eSStephen Hemminger 	spin_unlock_bh(&fib_info_lock);
648b7656e7fSDavid S. Miller 
649b7656e7fSDavid S. Miller 	bytes = old_size * sizeof(struct hlist_head *);
650b7656e7fSDavid S. Miller 	fib_hash_free(old_info_hash, bytes);
651b7656e7fSDavid S. Miller 	fib_hash_free(old_laddrhash, bytes);
6521da177e4SLinus Torvalds }
6531da177e4SLinus Torvalds 
6544e902c57SThomas Graf struct fib_info *fib_create_info(struct fib_config *cfg)
6551da177e4SLinus Torvalds {
6561da177e4SLinus Torvalds 	int err;
6571da177e4SLinus Torvalds 	struct fib_info *fi = NULL;
6581da177e4SLinus Torvalds 	struct fib_info *ofi;
6591da177e4SLinus Torvalds 	int nhs = 1;
6601da177e4SLinus Torvalds 
6611da177e4SLinus Torvalds 	/* Fast check to catch the most weird cases */
6624e902c57SThomas Graf 	if (fib_props[cfg->fc_type].scope > cfg->fc_scope)
6631da177e4SLinus Torvalds 		goto err_inval;
6641da177e4SLinus Torvalds 
6651da177e4SLinus Torvalds #ifdef CONFIG_IP_ROUTE_MULTIPATH
6664e902c57SThomas Graf 	if (cfg->fc_mp) {
6674e902c57SThomas Graf 		nhs = fib_count_nexthops(cfg->fc_mp, cfg->fc_mp_len);
6681da177e4SLinus Torvalds 		if (nhs == 0)
6691da177e4SLinus Torvalds 			goto err_inval;
6701da177e4SLinus Torvalds 	}
6711da177e4SLinus Torvalds #endif
6721da177e4SLinus Torvalds #ifdef CONFIG_IP_ROUTE_MULTIPATH_CACHED
6734e902c57SThomas Graf 	if (cfg->fc_mp_alg) {
6744e902c57SThomas Graf 		if (cfg->fc_mp_alg < IP_MP_ALG_NONE ||
6754e902c57SThomas Graf 		    cfg->fc_mp_alg > IP_MP_ALG_MAX)
6761da177e4SLinus Torvalds 			goto err_inval;
6771da177e4SLinus Torvalds 	}
6781da177e4SLinus Torvalds #endif
6791da177e4SLinus Torvalds 
6801da177e4SLinus Torvalds 	err = -ENOBUFS;
6811da177e4SLinus Torvalds 	if (fib_info_cnt >= fib_hash_size) {
6821da177e4SLinus Torvalds 		unsigned int new_size = fib_hash_size << 1;
6831da177e4SLinus Torvalds 		struct hlist_head *new_info_hash;
6841da177e4SLinus Torvalds 		struct hlist_head *new_laddrhash;
6851da177e4SLinus Torvalds 		unsigned int bytes;
6861da177e4SLinus Torvalds 
6871da177e4SLinus Torvalds 		if (!new_size)
6881da177e4SLinus Torvalds 			new_size = 1;
6891da177e4SLinus Torvalds 		bytes = new_size * sizeof(struct hlist_head *);
6901da177e4SLinus Torvalds 		new_info_hash = fib_hash_alloc(bytes);
6911da177e4SLinus Torvalds 		new_laddrhash = fib_hash_alloc(bytes);
6921da177e4SLinus Torvalds 		if (!new_info_hash || !new_laddrhash) {
6931da177e4SLinus Torvalds 			fib_hash_free(new_info_hash, bytes);
6941da177e4SLinus Torvalds 			fib_hash_free(new_laddrhash, bytes);
6951da177e4SLinus Torvalds 		} else {
6961da177e4SLinus Torvalds 			memset(new_info_hash, 0, bytes);
6971da177e4SLinus Torvalds 			memset(new_laddrhash, 0, bytes);
6981da177e4SLinus Torvalds 
6991da177e4SLinus Torvalds 			fib_hash_move(new_info_hash, new_laddrhash, new_size);
7001da177e4SLinus Torvalds 		}
7011da177e4SLinus Torvalds 
7021da177e4SLinus Torvalds 		if (!fib_hash_size)
7031da177e4SLinus Torvalds 			goto failure;
7041da177e4SLinus Torvalds 	}
7051da177e4SLinus Torvalds 
7060da974f4SPanagiotis Issaris 	fi = kzalloc(sizeof(*fi)+nhs*sizeof(struct fib_nh), GFP_KERNEL);
7071da177e4SLinus Torvalds 	if (fi == NULL)
7081da177e4SLinus Torvalds 		goto failure;
7091da177e4SLinus Torvalds 	fib_info_cnt++;
7101da177e4SLinus Torvalds 
7114e902c57SThomas Graf 	fi->fib_protocol = cfg->fc_protocol;
7124e902c57SThomas Graf 	fi->fib_flags = cfg->fc_flags;
7134e902c57SThomas Graf 	fi->fib_priority = cfg->fc_priority;
7144e902c57SThomas Graf 	fi->fib_prefsrc = cfg->fc_prefsrc;
7151da177e4SLinus Torvalds 
7161da177e4SLinus Torvalds 	fi->fib_nhs = nhs;
7171da177e4SLinus Torvalds 	change_nexthops(fi) {
7181da177e4SLinus Torvalds 		nh->nh_parent = fi;
7191da177e4SLinus Torvalds 	} endfor_nexthops(fi)
7201da177e4SLinus Torvalds 
7214e902c57SThomas Graf 	if (cfg->fc_mx) {
7224e902c57SThomas Graf 		struct nlattr *nla;
7234e902c57SThomas Graf 		int remaining;
7241da177e4SLinus Torvalds 
7254e902c57SThomas Graf 		nla_for_each_attr(nla, cfg->fc_mx, cfg->fc_mx_len, remaining) {
7264e902c57SThomas Graf 			int type = nla->nla_type;
7274e902c57SThomas Graf 
7284e902c57SThomas Graf 			if (type) {
7294e902c57SThomas Graf 				if (type > RTAX_MAX)
7301da177e4SLinus Torvalds 					goto err_inval;
7314e902c57SThomas Graf 				fi->fib_metrics[type - 1] = nla_get_u32(nla);
7321da177e4SLinus Torvalds 			}
7331da177e4SLinus Torvalds 		}
7344e902c57SThomas Graf 	}
7351da177e4SLinus Torvalds 
7364e902c57SThomas Graf 	if (cfg->fc_mp) {
7371da177e4SLinus Torvalds #ifdef CONFIG_IP_ROUTE_MULTIPATH
7384e902c57SThomas Graf 		err = fib_get_nhs(fi, cfg->fc_mp, cfg->fc_mp_len, cfg);
7394e902c57SThomas Graf 		if (err != 0)
7401da177e4SLinus Torvalds 			goto failure;
7414e902c57SThomas Graf 		if (cfg->fc_oif && fi->fib_nh->nh_oif != cfg->fc_oif)
7421da177e4SLinus Torvalds 			goto err_inval;
7434e902c57SThomas Graf 		if (cfg->fc_gw && fi->fib_nh->nh_gw != cfg->fc_gw)
7441da177e4SLinus Torvalds 			goto err_inval;
7451da177e4SLinus Torvalds #ifdef CONFIG_NET_CLS_ROUTE
7464e902c57SThomas Graf 		if (cfg->fc_flow && fi->fib_nh->nh_tclassid != cfg->fc_flow)
7471da177e4SLinus Torvalds 			goto err_inval;
7481da177e4SLinus Torvalds #endif
7491da177e4SLinus Torvalds #else
7501da177e4SLinus Torvalds 		goto err_inval;
7511da177e4SLinus Torvalds #endif
7521da177e4SLinus Torvalds 	} else {
7531da177e4SLinus Torvalds 		struct fib_nh *nh = fi->fib_nh;
7544e902c57SThomas Graf 
7554e902c57SThomas Graf 		nh->nh_oif = cfg->fc_oif;
7564e902c57SThomas Graf 		nh->nh_gw = cfg->fc_gw;
7574e902c57SThomas Graf 		nh->nh_flags = cfg->fc_flags;
7581da177e4SLinus Torvalds #ifdef CONFIG_NET_CLS_ROUTE
7594e902c57SThomas Graf 		nh->nh_tclassid = cfg->fc_flow;
7601da177e4SLinus Torvalds #endif
7611da177e4SLinus Torvalds #ifdef CONFIG_IP_ROUTE_MULTIPATH
7621da177e4SLinus Torvalds 		nh->nh_weight = 1;
7631da177e4SLinus Torvalds #endif
7641da177e4SLinus Torvalds 	}
7651da177e4SLinus Torvalds 
7661da177e4SLinus Torvalds #ifdef CONFIG_IP_ROUTE_MULTIPATH_CACHED
7674e902c57SThomas Graf 	fi->fib_mp_alg = cfg->fc_mp_alg;
7681da177e4SLinus Torvalds #endif
7691da177e4SLinus Torvalds 
7704e902c57SThomas Graf 	if (fib_props[cfg->fc_type].error) {
7714e902c57SThomas Graf 		if (cfg->fc_gw || cfg->fc_oif || cfg->fc_mp)
7721da177e4SLinus Torvalds 			goto err_inval;
7731da177e4SLinus Torvalds 		goto link_it;
7741da177e4SLinus Torvalds 	}
7751da177e4SLinus Torvalds 
7764e902c57SThomas Graf 	if (cfg->fc_scope > RT_SCOPE_HOST)
7771da177e4SLinus Torvalds 		goto err_inval;
7781da177e4SLinus Torvalds 
7794e902c57SThomas Graf 	if (cfg->fc_scope == RT_SCOPE_HOST) {
7801da177e4SLinus Torvalds 		struct fib_nh *nh = fi->fib_nh;
7811da177e4SLinus Torvalds 
7821da177e4SLinus Torvalds 		/* Local address is added. */
7831da177e4SLinus Torvalds 		if (nhs != 1 || nh->nh_gw)
7841da177e4SLinus Torvalds 			goto err_inval;
7851da177e4SLinus Torvalds 		nh->nh_scope = RT_SCOPE_NOWHERE;
7861da177e4SLinus Torvalds 		nh->nh_dev = dev_get_by_index(fi->fib_nh->nh_oif);
7871da177e4SLinus Torvalds 		err = -ENODEV;
7881da177e4SLinus Torvalds 		if (nh->nh_dev == NULL)
7891da177e4SLinus Torvalds 			goto failure;
7901da177e4SLinus Torvalds 	} else {
7911da177e4SLinus Torvalds 		change_nexthops(fi) {
7924e902c57SThomas Graf 			if ((err = fib_check_nh(cfg, fi, nh)) != 0)
7931da177e4SLinus Torvalds 				goto failure;
7941da177e4SLinus Torvalds 		} endfor_nexthops(fi)
7951da177e4SLinus Torvalds 	}
7961da177e4SLinus Torvalds 
7971da177e4SLinus Torvalds 	if (fi->fib_prefsrc) {
7984e902c57SThomas Graf 		if (cfg->fc_type != RTN_LOCAL || !cfg->fc_dst ||
7994e902c57SThomas Graf 		    fi->fib_prefsrc != cfg->fc_dst)
8001da177e4SLinus Torvalds 			if (inet_addr_type(fi->fib_prefsrc) != RTN_LOCAL)
8011da177e4SLinus Torvalds 				goto err_inval;
8021da177e4SLinus Torvalds 	}
8031da177e4SLinus Torvalds 
8041da177e4SLinus Torvalds link_it:
8051da177e4SLinus Torvalds 	if ((ofi = fib_find_info(fi)) != NULL) {
8061da177e4SLinus Torvalds 		fi->fib_dead = 1;
8071da177e4SLinus Torvalds 		free_fib_info(fi);
8081da177e4SLinus Torvalds 		ofi->fib_treeref++;
8091da177e4SLinus Torvalds 		return ofi;
8101da177e4SLinus Torvalds 	}
8111da177e4SLinus Torvalds 
8121da177e4SLinus Torvalds 	fi->fib_treeref++;
8131da177e4SLinus Torvalds 	atomic_inc(&fi->fib_clntref);
814832b4c5eSStephen Hemminger 	spin_lock_bh(&fib_info_lock);
8151da177e4SLinus Torvalds 	hlist_add_head(&fi->fib_hash,
8161da177e4SLinus Torvalds 		       &fib_info_hash[fib_info_hashfn(fi)]);
8171da177e4SLinus Torvalds 	if (fi->fib_prefsrc) {
8181da177e4SLinus Torvalds 		struct hlist_head *head;
8191da177e4SLinus Torvalds 
8201da177e4SLinus Torvalds 		head = &fib_info_laddrhash[fib_laddr_hashfn(fi->fib_prefsrc)];
8211da177e4SLinus Torvalds 		hlist_add_head(&fi->fib_lhash, head);
8221da177e4SLinus Torvalds 	}
8231da177e4SLinus Torvalds 	change_nexthops(fi) {
8241da177e4SLinus Torvalds 		struct hlist_head *head;
8251da177e4SLinus Torvalds 		unsigned int hash;
8261da177e4SLinus Torvalds 
8271da177e4SLinus Torvalds 		if (!nh->nh_dev)
8281da177e4SLinus Torvalds 			continue;
8291da177e4SLinus Torvalds 		hash = fib_devindex_hashfn(nh->nh_dev->ifindex);
8301da177e4SLinus Torvalds 		head = &fib_info_devhash[hash];
8311da177e4SLinus Torvalds 		hlist_add_head(&nh->nh_hash, head);
8321da177e4SLinus Torvalds 	} endfor_nexthops(fi)
833832b4c5eSStephen Hemminger 	spin_unlock_bh(&fib_info_lock);
8341da177e4SLinus Torvalds 	return fi;
8351da177e4SLinus Torvalds 
8361da177e4SLinus Torvalds err_inval:
8371da177e4SLinus Torvalds 	err = -EINVAL;
8381da177e4SLinus Torvalds 
8391da177e4SLinus Torvalds failure:
8401da177e4SLinus Torvalds         if (fi) {
8411da177e4SLinus Torvalds 		fi->fib_dead = 1;
8421da177e4SLinus Torvalds 		free_fib_info(fi);
8431da177e4SLinus Torvalds 	}
8444e902c57SThomas Graf 
8454e902c57SThomas Graf 	return ERR_PTR(err);
8461da177e4SLinus Torvalds }
8471da177e4SLinus Torvalds 
848e5b43760SRobert Olsson /* Note! fib_semantic_match intentionally uses  RCU list functions. */
8491da177e4SLinus Torvalds int fib_semantic_match(struct list_head *head, const struct flowi *flp,
8501ef1b8c8SAl Viro 		       struct fib_result *res, __be32 zone, __be32 mask,
8511da177e4SLinus Torvalds 			int prefixlen)
8521da177e4SLinus Torvalds {
8531da177e4SLinus Torvalds 	struct fib_alias *fa;
8541da177e4SLinus Torvalds 	int nh_sel = 0;
8551da177e4SLinus Torvalds 
856e5b43760SRobert Olsson 	list_for_each_entry_rcu(fa, head, fa_list) {
8571da177e4SLinus Torvalds 		int err;
8581da177e4SLinus Torvalds 
8591da177e4SLinus Torvalds 		if (fa->fa_tos &&
8601da177e4SLinus Torvalds 		    fa->fa_tos != flp->fl4_tos)
8611da177e4SLinus Torvalds 			continue;
8621da177e4SLinus Torvalds 
8631da177e4SLinus Torvalds 		if (fa->fa_scope < flp->fl4_scope)
8641da177e4SLinus Torvalds 			continue;
8651da177e4SLinus Torvalds 
8661da177e4SLinus Torvalds 		fa->fa_state |= FA_S_ACCESSED;
8671da177e4SLinus Torvalds 
8681da177e4SLinus Torvalds 		err = fib_props[fa->fa_type].error;
8691da177e4SLinus Torvalds 		if (err == 0) {
8701da177e4SLinus Torvalds 			struct fib_info *fi = fa->fa_info;
8711da177e4SLinus Torvalds 
8721da177e4SLinus Torvalds 			if (fi->fib_flags & RTNH_F_DEAD)
8731da177e4SLinus Torvalds 				continue;
8741da177e4SLinus Torvalds 
8751da177e4SLinus Torvalds 			switch (fa->fa_type) {
8761da177e4SLinus Torvalds 			case RTN_UNICAST:
8771da177e4SLinus Torvalds 			case RTN_LOCAL:
8781da177e4SLinus Torvalds 			case RTN_BROADCAST:
8791da177e4SLinus Torvalds 			case RTN_ANYCAST:
8801da177e4SLinus Torvalds 			case RTN_MULTICAST:
8811da177e4SLinus Torvalds 				for_nexthops(fi) {
8821da177e4SLinus Torvalds 					if (nh->nh_flags&RTNH_F_DEAD)
8831da177e4SLinus Torvalds 						continue;
8841da177e4SLinus Torvalds 					if (!flp->oif || flp->oif == nh->nh_oif)
8851da177e4SLinus Torvalds 						break;
8861da177e4SLinus Torvalds 				}
8871da177e4SLinus Torvalds #ifdef CONFIG_IP_ROUTE_MULTIPATH
8881da177e4SLinus Torvalds 				if (nhsel < fi->fib_nhs) {
8891da177e4SLinus Torvalds 					nh_sel = nhsel;
8901da177e4SLinus Torvalds 					goto out_fill_res;
8911da177e4SLinus Torvalds 				}
8921da177e4SLinus Torvalds #else
8931da177e4SLinus Torvalds 				if (nhsel < 1) {
8941da177e4SLinus Torvalds 					goto out_fill_res;
8951da177e4SLinus Torvalds 				}
8961da177e4SLinus Torvalds #endif
8971da177e4SLinus Torvalds 				endfor_nexthops(fi);
8981da177e4SLinus Torvalds 				continue;
8991da177e4SLinus Torvalds 
9001da177e4SLinus Torvalds 			default:
9011da177e4SLinus Torvalds 				printk(KERN_DEBUG "impossible 102\n");
9021da177e4SLinus Torvalds 				return -EINVAL;
9031da177e4SLinus Torvalds 			};
9041da177e4SLinus Torvalds 		}
9051da177e4SLinus Torvalds 		return err;
9061da177e4SLinus Torvalds 	}
9071da177e4SLinus Torvalds 	return 1;
9081da177e4SLinus Torvalds 
9091da177e4SLinus Torvalds out_fill_res:
9101da177e4SLinus Torvalds 	res->prefixlen = prefixlen;
9111da177e4SLinus Torvalds 	res->nh_sel = nh_sel;
9121da177e4SLinus Torvalds 	res->type = fa->fa_type;
9131da177e4SLinus Torvalds 	res->scope = fa->fa_scope;
9141da177e4SLinus Torvalds 	res->fi = fa->fa_info;
9151da177e4SLinus Torvalds #ifdef CONFIG_IP_ROUTE_MULTIPATH_CACHED
9161da177e4SLinus Torvalds 	res->netmask = mask;
9171e8aa6f1SAl Viro 	res->network = zone & inet_make_mask(prefixlen);
9181da177e4SLinus Torvalds #endif
9191da177e4SLinus Torvalds 	atomic_inc(&res->fi->fib_clntref);
9201da177e4SLinus Torvalds 	return 0;
9211da177e4SLinus Torvalds }
9221da177e4SLinus Torvalds 
9231da177e4SLinus Torvalds /* Find appropriate source address to this destination */
9241da177e4SLinus Torvalds 
925b83738aeSAl Viro __be32 __fib_res_prefsrc(struct fib_result *res)
9261da177e4SLinus Torvalds {
9271da177e4SLinus Torvalds 	return inet_select_addr(FIB_RES_DEV(*res), FIB_RES_GW(*res), res->scope);
9281da177e4SLinus Torvalds }
9291da177e4SLinus Torvalds 
930be403ea1SThomas Graf int fib_dump_info(struct sk_buff *skb, u32 pid, u32 seq, int event,
931*81f7bf6cSAl Viro 		  u32 tb_id, u8 type, u8 scope, __be32 dst, int dst_len, u8 tos,
932b6544c0bSJamal Hadi Salim 		  struct fib_info *fi, unsigned int flags)
9331da177e4SLinus Torvalds {
9341da177e4SLinus Torvalds 	struct nlmsghdr *nlh;
935be403ea1SThomas Graf 	struct rtmsg *rtm;
9361da177e4SLinus Torvalds 
937be403ea1SThomas Graf 	nlh = nlmsg_put(skb, pid, seq, event, sizeof(*rtm), flags);
938be403ea1SThomas Graf 	if (nlh == NULL)
939be403ea1SThomas Graf 		return -ENOBUFS;
940be403ea1SThomas Graf 
941be403ea1SThomas Graf 	rtm = nlmsg_data(nlh);
9421da177e4SLinus Torvalds 	rtm->rtm_family = AF_INET;
9431da177e4SLinus Torvalds 	rtm->rtm_dst_len = dst_len;
9441da177e4SLinus Torvalds 	rtm->rtm_src_len = 0;
9451da177e4SLinus Torvalds 	rtm->rtm_tos = tos;
9461da177e4SLinus Torvalds 	rtm->rtm_table = tb_id;
947be403ea1SThomas Graf 	NLA_PUT_U32(skb, RTA_TABLE, tb_id);
9481da177e4SLinus Torvalds 	rtm->rtm_type = type;
9491da177e4SLinus Torvalds 	rtm->rtm_flags = fi->fib_flags;
9501da177e4SLinus Torvalds 	rtm->rtm_scope = scope;
9511da177e4SLinus Torvalds 	rtm->rtm_protocol = fi->fib_protocol;
952be403ea1SThomas Graf 
953be403ea1SThomas Graf 	if (rtm->rtm_dst_len)
95417fb2c64SAl Viro 		NLA_PUT_BE32(skb, RTA_DST, dst);
955be403ea1SThomas Graf 
9561da177e4SLinus Torvalds 	if (fi->fib_priority)
957be403ea1SThomas Graf 		NLA_PUT_U32(skb, RTA_PRIORITY, fi->fib_priority);
958be403ea1SThomas Graf 
9591da177e4SLinus Torvalds 	if (rtnetlink_put_metrics(skb, fi->fib_metrics) < 0)
960be403ea1SThomas Graf 		goto nla_put_failure;
961be403ea1SThomas Graf 
9621da177e4SLinus Torvalds 	if (fi->fib_prefsrc)
96317fb2c64SAl Viro 		NLA_PUT_BE32(skb, RTA_PREFSRC, fi->fib_prefsrc);
964be403ea1SThomas Graf 
9651da177e4SLinus Torvalds 	if (fi->fib_nhs == 1) {
9661da177e4SLinus Torvalds 		if (fi->fib_nh->nh_gw)
96717fb2c64SAl Viro 			NLA_PUT_BE32(skb, RTA_GATEWAY, fi->fib_nh->nh_gw);
968be403ea1SThomas Graf 
9691da177e4SLinus Torvalds 		if (fi->fib_nh->nh_oif)
970be403ea1SThomas Graf 			NLA_PUT_U32(skb, RTA_OIF, fi->fib_nh->nh_oif);
9718265abc0SPatrick McHardy #ifdef CONFIG_NET_CLS_ROUTE
9728265abc0SPatrick McHardy 		if (fi->fib_nh[0].nh_tclassid)
973be403ea1SThomas Graf 			NLA_PUT_U32(skb, RTA_FLOW, fi->fib_nh[0].nh_tclassid);
9748265abc0SPatrick McHardy #endif
9751da177e4SLinus Torvalds 	}
9761da177e4SLinus Torvalds #ifdef CONFIG_IP_ROUTE_MULTIPATH
9771da177e4SLinus Torvalds 	if (fi->fib_nhs > 1) {
978be403ea1SThomas Graf 		struct rtnexthop *rtnh;
979be403ea1SThomas Graf 		struct nlattr *mp;
980be403ea1SThomas Graf 
981be403ea1SThomas Graf 		mp = nla_nest_start(skb, RTA_MULTIPATH);
982be403ea1SThomas Graf 		if (mp == NULL)
983be403ea1SThomas Graf 			goto nla_put_failure;
9841da177e4SLinus Torvalds 
9851da177e4SLinus Torvalds 		for_nexthops(fi) {
986be403ea1SThomas Graf 			rtnh = nla_reserve_nohdr(skb, sizeof(*rtnh));
987be403ea1SThomas Graf 			if (rtnh == NULL)
988be403ea1SThomas Graf 				goto nla_put_failure;
989be403ea1SThomas Graf 
990be403ea1SThomas Graf 			rtnh->rtnh_flags = nh->nh_flags & 0xFF;
991be403ea1SThomas Graf 			rtnh->rtnh_hops = nh->nh_weight - 1;
992be403ea1SThomas Graf 			rtnh->rtnh_ifindex = nh->nh_oif;
993be403ea1SThomas Graf 
9941da177e4SLinus Torvalds 			if (nh->nh_gw)
99517fb2c64SAl Viro 				NLA_PUT_BE32(skb, RTA_GATEWAY, nh->nh_gw);
9968265abc0SPatrick McHardy #ifdef CONFIG_NET_CLS_ROUTE
9978265abc0SPatrick McHardy 			if (nh->nh_tclassid)
998be403ea1SThomas Graf 				NLA_PUT_U32(skb, RTA_FLOW, nh->nh_tclassid);
9998265abc0SPatrick McHardy #endif
1000be403ea1SThomas Graf 			/* length of rtnetlink header + attributes */
1001be403ea1SThomas Graf 			rtnh->rtnh_len = nlmsg_get_pos(skb) - (void *) rtnh;
10021da177e4SLinus Torvalds 		} endfor_nexthops(fi);
1003be403ea1SThomas Graf 
1004be403ea1SThomas Graf 		nla_nest_end(skb, mp);
10051da177e4SLinus Torvalds 	}
10061da177e4SLinus Torvalds #endif
1007be403ea1SThomas Graf 	return nlmsg_end(skb, nlh);
10081da177e4SLinus Torvalds 
1009be403ea1SThomas Graf nla_put_failure:
1010be403ea1SThomas Graf 	return nlmsg_cancel(skb, nlh);
10111da177e4SLinus Torvalds }
10121da177e4SLinus Torvalds 
10131da177e4SLinus Torvalds /*
10141da177e4SLinus Torvalds    Update FIB if:
10151da177e4SLinus Torvalds    - local address disappeared -> we must delete all the entries
10161da177e4SLinus Torvalds      referring to it.
10171da177e4SLinus Torvalds    - device went down -> we must shutdown all nexthops going via it.
10181da177e4SLinus Torvalds  */
10191da177e4SLinus Torvalds 
1020*81f7bf6cSAl Viro int fib_sync_down(__be32 local, struct net_device *dev, int force)
10211da177e4SLinus Torvalds {
10221da177e4SLinus Torvalds 	int ret = 0;
10231da177e4SLinus Torvalds 	int scope = RT_SCOPE_NOWHERE;
10241da177e4SLinus Torvalds 
10251da177e4SLinus Torvalds 	if (force)
10261da177e4SLinus Torvalds 		scope = -1;
10271da177e4SLinus Torvalds 
10281da177e4SLinus Torvalds 	if (local && fib_info_laddrhash) {
10291da177e4SLinus Torvalds 		unsigned int hash = fib_laddr_hashfn(local);
10301da177e4SLinus Torvalds 		struct hlist_head *head = &fib_info_laddrhash[hash];
10311da177e4SLinus Torvalds 		struct hlist_node *node;
10321da177e4SLinus Torvalds 		struct fib_info *fi;
10331da177e4SLinus Torvalds 
10341da177e4SLinus Torvalds 		hlist_for_each_entry(fi, node, head, fib_lhash) {
10351da177e4SLinus Torvalds 			if (fi->fib_prefsrc == local) {
10361da177e4SLinus Torvalds 				fi->fib_flags |= RTNH_F_DEAD;
10371da177e4SLinus Torvalds 				ret++;
10381da177e4SLinus Torvalds 			}
10391da177e4SLinus Torvalds 		}
10401da177e4SLinus Torvalds 	}
10411da177e4SLinus Torvalds 
10421da177e4SLinus Torvalds 	if (dev) {
10431da177e4SLinus Torvalds 		struct fib_info *prev_fi = NULL;
10441da177e4SLinus Torvalds 		unsigned int hash = fib_devindex_hashfn(dev->ifindex);
10451da177e4SLinus Torvalds 		struct hlist_head *head = &fib_info_devhash[hash];
10461da177e4SLinus Torvalds 		struct hlist_node *node;
10471da177e4SLinus Torvalds 		struct fib_nh *nh;
10481da177e4SLinus Torvalds 
10491da177e4SLinus Torvalds 		hlist_for_each_entry(nh, node, head, nh_hash) {
10501da177e4SLinus Torvalds 			struct fib_info *fi = nh->nh_parent;
10511da177e4SLinus Torvalds 			int dead;
10521da177e4SLinus Torvalds 
10531da177e4SLinus Torvalds 			BUG_ON(!fi->fib_nhs);
10541da177e4SLinus Torvalds 			if (nh->nh_dev != dev || fi == prev_fi)
10551da177e4SLinus Torvalds 				continue;
10561da177e4SLinus Torvalds 			prev_fi = fi;
10571da177e4SLinus Torvalds 			dead = 0;
10581da177e4SLinus Torvalds 			change_nexthops(fi) {
10591da177e4SLinus Torvalds 				if (nh->nh_flags&RTNH_F_DEAD)
10601da177e4SLinus Torvalds 					dead++;
10611da177e4SLinus Torvalds 				else if (nh->nh_dev == dev &&
10621da177e4SLinus Torvalds 					 nh->nh_scope != scope) {
10631da177e4SLinus Torvalds 					nh->nh_flags |= RTNH_F_DEAD;
10641da177e4SLinus Torvalds #ifdef CONFIG_IP_ROUTE_MULTIPATH
10651da177e4SLinus Torvalds 					spin_lock_bh(&fib_multipath_lock);
10661da177e4SLinus Torvalds 					fi->fib_power -= nh->nh_power;
10671da177e4SLinus Torvalds 					nh->nh_power = 0;
10681da177e4SLinus Torvalds 					spin_unlock_bh(&fib_multipath_lock);
10691da177e4SLinus Torvalds #endif
10701da177e4SLinus Torvalds 					dead++;
10711da177e4SLinus Torvalds 				}
10721da177e4SLinus Torvalds #ifdef CONFIG_IP_ROUTE_MULTIPATH
10731da177e4SLinus Torvalds 				if (force > 1 && nh->nh_dev == dev) {
10741da177e4SLinus Torvalds 					dead = fi->fib_nhs;
10751da177e4SLinus Torvalds 					break;
10761da177e4SLinus Torvalds 				}
10771da177e4SLinus Torvalds #endif
10781da177e4SLinus Torvalds 			} endfor_nexthops(fi)
10791da177e4SLinus Torvalds 			if (dead == fi->fib_nhs) {
10801da177e4SLinus Torvalds 				fi->fib_flags |= RTNH_F_DEAD;
10811da177e4SLinus Torvalds 				ret++;
10821da177e4SLinus Torvalds 			}
10831da177e4SLinus Torvalds 		}
10841da177e4SLinus Torvalds 	}
10851da177e4SLinus Torvalds 
10861da177e4SLinus Torvalds 	return ret;
10871da177e4SLinus Torvalds }
10881da177e4SLinus Torvalds 
10891da177e4SLinus Torvalds #ifdef CONFIG_IP_ROUTE_MULTIPATH
10901da177e4SLinus Torvalds 
10911da177e4SLinus Torvalds /*
10921da177e4SLinus Torvalds    Dead device goes up. We wake up dead nexthops.
10931da177e4SLinus Torvalds    It takes sense only on multipath routes.
10941da177e4SLinus Torvalds  */
10951da177e4SLinus Torvalds 
10961da177e4SLinus Torvalds int fib_sync_up(struct net_device *dev)
10971da177e4SLinus Torvalds {
10981da177e4SLinus Torvalds 	struct fib_info *prev_fi;
10991da177e4SLinus Torvalds 	unsigned int hash;
11001da177e4SLinus Torvalds 	struct hlist_head *head;
11011da177e4SLinus Torvalds 	struct hlist_node *node;
11021da177e4SLinus Torvalds 	struct fib_nh *nh;
11031da177e4SLinus Torvalds 	int ret;
11041da177e4SLinus Torvalds 
11051da177e4SLinus Torvalds 	if (!(dev->flags&IFF_UP))
11061da177e4SLinus Torvalds 		return 0;
11071da177e4SLinus Torvalds 
11081da177e4SLinus Torvalds 	prev_fi = NULL;
11091da177e4SLinus Torvalds 	hash = fib_devindex_hashfn(dev->ifindex);
11101da177e4SLinus Torvalds 	head = &fib_info_devhash[hash];
11111da177e4SLinus Torvalds 	ret = 0;
11121da177e4SLinus Torvalds 
11131da177e4SLinus Torvalds 	hlist_for_each_entry(nh, node, head, nh_hash) {
11141da177e4SLinus Torvalds 		struct fib_info *fi = nh->nh_parent;
11151da177e4SLinus Torvalds 		int alive;
11161da177e4SLinus Torvalds 
11171da177e4SLinus Torvalds 		BUG_ON(!fi->fib_nhs);
11181da177e4SLinus Torvalds 		if (nh->nh_dev != dev || fi == prev_fi)
11191da177e4SLinus Torvalds 			continue;
11201da177e4SLinus Torvalds 
11211da177e4SLinus Torvalds 		prev_fi = fi;
11221da177e4SLinus Torvalds 		alive = 0;
11231da177e4SLinus Torvalds 		change_nexthops(fi) {
11241da177e4SLinus Torvalds 			if (!(nh->nh_flags&RTNH_F_DEAD)) {
11251da177e4SLinus Torvalds 				alive++;
11261da177e4SLinus Torvalds 				continue;
11271da177e4SLinus Torvalds 			}
11281da177e4SLinus Torvalds 			if (nh->nh_dev == NULL || !(nh->nh_dev->flags&IFF_UP))
11291da177e4SLinus Torvalds 				continue;
1130e5ed6399SHerbert Xu 			if (nh->nh_dev != dev || !__in_dev_get_rtnl(dev))
11311da177e4SLinus Torvalds 				continue;
11321da177e4SLinus Torvalds 			alive++;
11331da177e4SLinus Torvalds 			spin_lock_bh(&fib_multipath_lock);
11341da177e4SLinus Torvalds 			nh->nh_power = 0;
11351da177e4SLinus Torvalds 			nh->nh_flags &= ~RTNH_F_DEAD;
11361da177e4SLinus Torvalds 			spin_unlock_bh(&fib_multipath_lock);
11371da177e4SLinus Torvalds 		} endfor_nexthops(fi)
11381da177e4SLinus Torvalds 
11391da177e4SLinus Torvalds 		if (alive > 0) {
11401da177e4SLinus Torvalds 			fi->fib_flags &= ~RTNH_F_DEAD;
11411da177e4SLinus Torvalds 			ret++;
11421da177e4SLinus Torvalds 		}
11431da177e4SLinus Torvalds 	}
11441da177e4SLinus Torvalds 
11451da177e4SLinus Torvalds 	return ret;
11461da177e4SLinus Torvalds }
11471da177e4SLinus Torvalds 
11481da177e4SLinus Torvalds /*
11491da177e4SLinus Torvalds    The algorithm is suboptimal, but it provides really
11501da177e4SLinus Torvalds    fair weighted route distribution.
11511da177e4SLinus Torvalds  */
11521da177e4SLinus Torvalds 
11531da177e4SLinus Torvalds void fib_select_multipath(const struct flowi *flp, struct fib_result *res)
11541da177e4SLinus Torvalds {
11551da177e4SLinus Torvalds 	struct fib_info *fi = res->fi;
11561da177e4SLinus Torvalds 	int w;
11571da177e4SLinus Torvalds 
11581da177e4SLinus Torvalds 	spin_lock_bh(&fib_multipath_lock);
11591da177e4SLinus Torvalds 	if (fi->fib_power <= 0) {
11601da177e4SLinus Torvalds 		int power = 0;
11611da177e4SLinus Torvalds 		change_nexthops(fi) {
11621da177e4SLinus Torvalds 			if (!(nh->nh_flags&RTNH_F_DEAD)) {
11631da177e4SLinus Torvalds 				power += nh->nh_weight;
11641da177e4SLinus Torvalds 				nh->nh_power = nh->nh_weight;
11651da177e4SLinus Torvalds 			}
11661da177e4SLinus Torvalds 		} endfor_nexthops(fi);
11671da177e4SLinus Torvalds 		fi->fib_power = power;
11681da177e4SLinus Torvalds 		if (power <= 0) {
11691da177e4SLinus Torvalds 			spin_unlock_bh(&fib_multipath_lock);
11701da177e4SLinus Torvalds 			/* Race condition: route has just become dead. */
11711da177e4SLinus Torvalds 			res->nh_sel = 0;
11721da177e4SLinus Torvalds 			return;
11731da177e4SLinus Torvalds 		}
11741da177e4SLinus Torvalds 	}
11751da177e4SLinus Torvalds 
11761da177e4SLinus Torvalds 
11771da177e4SLinus Torvalds 	/* w should be random number [0..fi->fib_power-1],
11781da177e4SLinus Torvalds 	   it is pretty bad approximation.
11791da177e4SLinus Torvalds 	 */
11801da177e4SLinus Torvalds 
11811da177e4SLinus Torvalds 	w = jiffies % fi->fib_power;
11821da177e4SLinus Torvalds 
11831da177e4SLinus Torvalds 	change_nexthops(fi) {
11841da177e4SLinus Torvalds 		if (!(nh->nh_flags&RTNH_F_DEAD) && nh->nh_power) {
11851da177e4SLinus Torvalds 			if ((w -= nh->nh_power) <= 0) {
11861da177e4SLinus Torvalds 				nh->nh_power--;
11871da177e4SLinus Torvalds 				fi->fib_power--;
11881da177e4SLinus Torvalds 				res->nh_sel = nhsel;
11891da177e4SLinus Torvalds 				spin_unlock_bh(&fib_multipath_lock);
11901da177e4SLinus Torvalds 				return;
11911da177e4SLinus Torvalds 			}
11921da177e4SLinus Torvalds 		}
11931da177e4SLinus Torvalds 	} endfor_nexthops(fi);
11941da177e4SLinus Torvalds 
11951da177e4SLinus Torvalds 	/* Race condition: route has just become dead. */
11961da177e4SLinus Torvalds 	res->nh_sel = 0;
11971da177e4SLinus Torvalds 	spin_unlock_bh(&fib_multipath_lock);
11981da177e4SLinus Torvalds }
11991da177e4SLinus Torvalds #endif
1200