xref: /linux/net/ipv4/fib_semantics.c (revision 4814bdbd590e835ecec2d5e505165ec1c19796b2)
11da177e4SLinus Torvalds /*
21da177e4SLinus Torvalds  * INET		An implementation of the TCP/IP protocol suite for the LINUX
31da177e4SLinus Torvalds  *		operating system.  INET is implemented using the  BSD Socket
41da177e4SLinus Torvalds  *		interface as the means of communication with the user level.
51da177e4SLinus Torvalds  *
61da177e4SLinus Torvalds  *		IPv4 Forwarding Information Base: semantics.
71da177e4SLinus Torvalds  *
81da177e4SLinus Torvalds  * Version:	$Id: fib_semantics.c,v 1.19 2002/01/12 07:54:56 davem Exp $
91da177e4SLinus Torvalds  *
101da177e4SLinus Torvalds  * Authors:	Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
111da177e4SLinus Torvalds  *
121da177e4SLinus Torvalds  *		This program is free software; you can redistribute it and/or
131da177e4SLinus Torvalds  *		modify it under the terms of the GNU General Public License
141da177e4SLinus Torvalds  *		as published by the Free Software Foundation; either version
151da177e4SLinus Torvalds  *		2 of the License, or (at your option) any later version.
161da177e4SLinus Torvalds  */
171da177e4SLinus Torvalds 
181da177e4SLinus Torvalds #include <asm/uaccess.h>
191da177e4SLinus Torvalds #include <asm/system.h>
201da177e4SLinus Torvalds #include <linux/bitops.h>
211da177e4SLinus Torvalds #include <linux/types.h>
221da177e4SLinus Torvalds #include <linux/kernel.h>
231da177e4SLinus Torvalds #include <linux/jiffies.h>
241da177e4SLinus Torvalds #include <linux/mm.h>
251da177e4SLinus Torvalds #include <linux/string.h>
261da177e4SLinus Torvalds #include <linux/socket.h>
271da177e4SLinus Torvalds #include <linux/sockios.h>
281da177e4SLinus Torvalds #include <linux/errno.h>
291da177e4SLinus Torvalds #include <linux/in.h>
301da177e4SLinus Torvalds #include <linux/inet.h>
3114c85021SArnaldo Carvalho de Melo #include <linux/inetdevice.h>
321da177e4SLinus Torvalds #include <linux/netdevice.h>
331da177e4SLinus Torvalds #include <linux/if_arp.h>
341da177e4SLinus Torvalds #include <linux/proc_fs.h>
351da177e4SLinus Torvalds #include <linux/skbuff.h>
361da177e4SLinus Torvalds #include <linux/init.h>
371da177e4SLinus Torvalds 
3814c85021SArnaldo Carvalho de Melo #include <net/arp.h>
391da177e4SLinus Torvalds #include <net/ip.h>
401da177e4SLinus Torvalds #include <net/protocol.h>
411da177e4SLinus Torvalds #include <net/route.h>
421da177e4SLinus Torvalds #include <net/tcp.h>
431da177e4SLinus Torvalds #include <net/sock.h>
441da177e4SLinus Torvalds #include <net/ip_fib.h>
45f21c7bc5SThomas Graf #include <net/netlink.h>
464e902c57SThomas Graf #include <net/nexthop.h>
471da177e4SLinus Torvalds 
481da177e4SLinus Torvalds #include "fib_lookup.h"
491da177e4SLinus Torvalds 
50832b4c5eSStephen Hemminger static DEFINE_SPINLOCK(fib_info_lock);
511da177e4SLinus Torvalds static struct hlist_head *fib_info_hash;
521da177e4SLinus Torvalds static struct hlist_head *fib_info_laddrhash;
531da177e4SLinus Torvalds static unsigned int fib_hash_size;
541da177e4SLinus Torvalds static unsigned int fib_info_cnt;
551da177e4SLinus Torvalds 
561da177e4SLinus Torvalds #define DEVINDEX_HASHBITS 8
571da177e4SLinus Torvalds #define DEVINDEX_HASHSIZE (1U << DEVINDEX_HASHBITS)
581da177e4SLinus Torvalds static struct hlist_head fib_info_devhash[DEVINDEX_HASHSIZE];
591da177e4SLinus Torvalds 
601da177e4SLinus Torvalds #ifdef CONFIG_IP_ROUTE_MULTIPATH
611da177e4SLinus Torvalds 
621da177e4SLinus Torvalds static DEFINE_SPINLOCK(fib_multipath_lock);
631da177e4SLinus Torvalds 
641da177e4SLinus Torvalds #define for_nexthops(fi) { int nhsel; const struct fib_nh * nh; \
651da177e4SLinus Torvalds for (nhsel=0, nh = (fi)->fib_nh; nhsel < (fi)->fib_nhs; nh++, nhsel++)
661da177e4SLinus Torvalds 
671da177e4SLinus Torvalds #define change_nexthops(fi) { int nhsel; struct fib_nh * nh; \
681da177e4SLinus Torvalds for (nhsel=0, nh = (struct fib_nh*)((fi)->fib_nh); nhsel < (fi)->fib_nhs; nh++, nhsel++)
691da177e4SLinus Torvalds 
701da177e4SLinus Torvalds #else /* CONFIG_IP_ROUTE_MULTIPATH */
711da177e4SLinus Torvalds 
721da177e4SLinus Torvalds /* Hope, that gcc will optimize it to get rid of dummy loop */
731da177e4SLinus Torvalds 
741da177e4SLinus Torvalds #define for_nexthops(fi) { int nhsel=0; const struct fib_nh * nh = (fi)->fib_nh; \
751da177e4SLinus Torvalds for (nhsel=0; nhsel < 1; nhsel++)
761da177e4SLinus Torvalds 
771da177e4SLinus Torvalds #define change_nexthops(fi) { int nhsel=0; struct fib_nh * nh = (struct fib_nh*)((fi)->fib_nh); \
781da177e4SLinus Torvalds for (nhsel=0; nhsel < 1; nhsel++)
791da177e4SLinus Torvalds 
801da177e4SLinus Torvalds #endif /* CONFIG_IP_ROUTE_MULTIPATH */
811da177e4SLinus Torvalds 
821da177e4SLinus Torvalds #define endfor_nexthops(fi) }
831da177e4SLinus Torvalds 
841da177e4SLinus Torvalds 
859b5b5cffSArjan van de Ven static const struct
861da177e4SLinus Torvalds {
871da177e4SLinus Torvalds 	int	error;
881da177e4SLinus Torvalds 	u8	scope;
89a0ee18b9SThomas Graf } fib_props[RTN_MAX + 1] = {
901da177e4SLinus Torvalds 	{
911da177e4SLinus Torvalds 		.error	= 0,
921da177e4SLinus Torvalds 		.scope	= RT_SCOPE_NOWHERE,
931da177e4SLinus Torvalds 	},	/* RTN_UNSPEC */
941da177e4SLinus Torvalds 	{
951da177e4SLinus Torvalds 		.error	= 0,
961da177e4SLinus Torvalds 		.scope	= RT_SCOPE_UNIVERSE,
971da177e4SLinus Torvalds 	},	/* RTN_UNICAST */
981da177e4SLinus Torvalds 	{
991da177e4SLinus Torvalds 		.error	= 0,
1001da177e4SLinus Torvalds 		.scope	= RT_SCOPE_HOST,
1011da177e4SLinus Torvalds 	},	/* RTN_LOCAL */
1021da177e4SLinus Torvalds 	{
1031da177e4SLinus Torvalds 		.error	= 0,
1041da177e4SLinus Torvalds 		.scope	= RT_SCOPE_LINK,
1051da177e4SLinus Torvalds 	},	/* RTN_BROADCAST */
1061da177e4SLinus Torvalds 	{
1071da177e4SLinus Torvalds 		.error	= 0,
1081da177e4SLinus Torvalds 		.scope	= RT_SCOPE_LINK,
1091da177e4SLinus Torvalds 	},	/* RTN_ANYCAST */
1101da177e4SLinus Torvalds 	{
1111da177e4SLinus Torvalds 		.error	= 0,
1121da177e4SLinus Torvalds 		.scope	= RT_SCOPE_UNIVERSE,
1131da177e4SLinus Torvalds 	},	/* RTN_MULTICAST */
1141da177e4SLinus Torvalds 	{
1151da177e4SLinus Torvalds 		.error	= -EINVAL,
1161da177e4SLinus Torvalds 		.scope	= RT_SCOPE_UNIVERSE,
1171da177e4SLinus Torvalds 	},	/* RTN_BLACKHOLE */
1181da177e4SLinus Torvalds 	{
1191da177e4SLinus Torvalds 		.error	= -EHOSTUNREACH,
1201da177e4SLinus Torvalds 		.scope	= RT_SCOPE_UNIVERSE,
1211da177e4SLinus Torvalds 	},	/* RTN_UNREACHABLE */
1221da177e4SLinus Torvalds 	{
1231da177e4SLinus Torvalds 		.error	= -EACCES,
1241da177e4SLinus Torvalds 		.scope	= RT_SCOPE_UNIVERSE,
1251da177e4SLinus Torvalds 	},	/* RTN_PROHIBIT */
1261da177e4SLinus Torvalds 	{
1271da177e4SLinus Torvalds 		.error	= -EAGAIN,
1281da177e4SLinus Torvalds 		.scope	= RT_SCOPE_UNIVERSE,
1291da177e4SLinus Torvalds 	},	/* RTN_THROW */
1301da177e4SLinus Torvalds 	{
1311da177e4SLinus Torvalds 		.error	= -EINVAL,
1321da177e4SLinus Torvalds 		.scope	= RT_SCOPE_NOWHERE,
1331da177e4SLinus Torvalds 	},	/* RTN_NAT */
1341da177e4SLinus Torvalds 	{
1351da177e4SLinus Torvalds 		.error	= -EINVAL,
1361da177e4SLinus Torvalds 		.scope	= RT_SCOPE_NOWHERE,
1371da177e4SLinus Torvalds 	},	/* RTN_XRESOLVE */
1381da177e4SLinus Torvalds };
1391da177e4SLinus Torvalds 
1401da177e4SLinus Torvalds 
1411da177e4SLinus Torvalds /* Release a nexthop info record */
1421da177e4SLinus Torvalds 
1431da177e4SLinus Torvalds void free_fib_info(struct fib_info *fi)
1441da177e4SLinus Torvalds {
1451da177e4SLinus Torvalds 	if (fi->fib_dead == 0) {
146a6db9010SStephen Hemminger 		printk(KERN_WARNING "Freeing alive fib_info %p\n", fi);
1471da177e4SLinus Torvalds 		return;
1481da177e4SLinus Torvalds 	}
1491da177e4SLinus Torvalds 	change_nexthops(fi) {
1501da177e4SLinus Torvalds 		if (nh->nh_dev)
1511da177e4SLinus Torvalds 			dev_put(nh->nh_dev);
1521da177e4SLinus Torvalds 		nh->nh_dev = NULL;
1531da177e4SLinus Torvalds 	} endfor_nexthops(fi);
1541da177e4SLinus Torvalds 	fib_info_cnt--;
1551da177e4SLinus Torvalds 	kfree(fi);
1561da177e4SLinus Torvalds }
1571da177e4SLinus Torvalds 
1581da177e4SLinus Torvalds void fib_release_info(struct fib_info *fi)
1591da177e4SLinus Torvalds {
160832b4c5eSStephen Hemminger 	spin_lock_bh(&fib_info_lock);
1611da177e4SLinus Torvalds 	if (fi && --fi->fib_treeref == 0) {
1621da177e4SLinus Torvalds 		hlist_del(&fi->fib_hash);
1631da177e4SLinus Torvalds 		if (fi->fib_prefsrc)
1641da177e4SLinus Torvalds 			hlist_del(&fi->fib_lhash);
1651da177e4SLinus Torvalds 		change_nexthops(fi) {
1661da177e4SLinus Torvalds 			if (!nh->nh_dev)
1671da177e4SLinus Torvalds 				continue;
1681da177e4SLinus Torvalds 			hlist_del(&nh->nh_hash);
1691da177e4SLinus Torvalds 		} endfor_nexthops(fi)
1701da177e4SLinus Torvalds 		fi->fib_dead = 1;
1711da177e4SLinus Torvalds 		fib_info_put(fi);
1721da177e4SLinus Torvalds 	}
173832b4c5eSStephen Hemminger 	spin_unlock_bh(&fib_info_lock);
1741da177e4SLinus Torvalds }
1751da177e4SLinus Torvalds 
1761da177e4SLinus Torvalds static __inline__ int nh_comp(const struct fib_info *fi, const struct fib_info *ofi)
1771da177e4SLinus Torvalds {
1781da177e4SLinus Torvalds 	const struct fib_nh *onh = ofi->fib_nh;
1791da177e4SLinus Torvalds 
1801da177e4SLinus Torvalds 	for_nexthops(fi) {
1811da177e4SLinus Torvalds 		if (nh->nh_oif != onh->nh_oif ||
1821da177e4SLinus Torvalds 		    nh->nh_gw  != onh->nh_gw ||
1831da177e4SLinus Torvalds 		    nh->nh_scope != onh->nh_scope ||
1841da177e4SLinus Torvalds #ifdef CONFIG_IP_ROUTE_MULTIPATH
1851da177e4SLinus Torvalds 		    nh->nh_weight != onh->nh_weight ||
1861da177e4SLinus Torvalds #endif
1871da177e4SLinus Torvalds #ifdef CONFIG_NET_CLS_ROUTE
1881da177e4SLinus Torvalds 		    nh->nh_tclassid != onh->nh_tclassid ||
1891da177e4SLinus Torvalds #endif
1901da177e4SLinus Torvalds 		    ((nh->nh_flags^onh->nh_flags)&~RTNH_F_DEAD))
1911da177e4SLinus Torvalds 			return -1;
1921da177e4SLinus Torvalds 		onh++;
1931da177e4SLinus Torvalds 	} endfor_nexthops(fi);
1941da177e4SLinus Torvalds 	return 0;
1951da177e4SLinus Torvalds }
1961da177e4SLinus Torvalds 
19788ebc72fSDavid S. Miller static inline unsigned int fib_devindex_hashfn(unsigned int val)
19888ebc72fSDavid S. Miller {
19988ebc72fSDavid S. Miller 	unsigned int mask = DEVINDEX_HASHSIZE - 1;
20088ebc72fSDavid S. Miller 
20188ebc72fSDavid S. Miller 	return (val ^
20288ebc72fSDavid S. Miller 		(val >> DEVINDEX_HASHBITS) ^
20388ebc72fSDavid S. Miller 		(val >> (DEVINDEX_HASHBITS * 2))) & mask;
20488ebc72fSDavid S. Miller }
20588ebc72fSDavid S. Miller 
2061da177e4SLinus Torvalds static inline unsigned int fib_info_hashfn(const struct fib_info *fi)
2071da177e4SLinus Torvalds {
2081da177e4SLinus Torvalds 	unsigned int mask = (fib_hash_size - 1);
2091da177e4SLinus Torvalds 	unsigned int val = fi->fib_nhs;
2101da177e4SLinus Torvalds 
2111da177e4SLinus Torvalds 	val ^= fi->fib_protocol;
21281f7bf6cSAl Viro 	val ^= (__force u32)fi->fib_prefsrc;
2131da177e4SLinus Torvalds 	val ^= fi->fib_priority;
21488ebc72fSDavid S. Miller 	for_nexthops(fi) {
21588ebc72fSDavid S. Miller 		val ^= fib_devindex_hashfn(nh->nh_oif);
21688ebc72fSDavid S. Miller 	} endfor_nexthops(fi)
2171da177e4SLinus Torvalds 
2181da177e4SLinus Torvalds 	return (val ^ (val >> 7) ^ (val >> 12)) & mask;
2191da177e4SLinus Torvalds }
2201da177e4SLinus Torvalds 
2211da177e4SLinus Torvalds static struct fib_info *fib_find_info(const struct fib_info *nfi)
2221da177e4SLinus Torvalds {
2231da177e4SLinus Torvalds 	struct hlist_head *head;
2241da177e4SLinus Torvalds 	struct hlist_node *node;
2251da177e4SLinus Torvalds 	struct fib_info *fi;
2261da177e4SLinus Torvalds 	unsigned int hash;
2271da177e4SLinus Torvalds 
2281da177e4SLinus Torvalds 	hash = fib_info_hashfn(nfi);
2291da177e4SLinus Torvalds 	head = &fib_info_hash[hash];
2301da177e4SLinus Torvalds 
2311da177e4SLinus Torvalds 	hlist_for_each_entry(fi, node, head, fib_hash) {
232*4814bdbdSDenis V. Lunev 		if (fi->fib_net != nfi->fib_net)
233*4814bdbdSDenis V. Lunev 			continue;
2341da177e4SLinus Torvalds 		if (fi->fib_nhs != nfi->fib_nhs)
2351da177e4SLinus Torvalds 			continue;
2361da177e4SLinus Torvalds 		if (nfi->fib_protocol == fi->fib_protocol &&
2371da177e4SLinus Torvalds 		    nfi->fib_prefsrc == fi->fib_prefsrc &&
2381da177e4SLinus Torvalds 		    nfi->fib_priority == fi->fib_priority &&
2391da177e4SLinus Torvalds 		    memcmp(nfi->fib_metrics, fi->fib_metrics,
2401da177e4SLinus Torvalds 			   sizeof(fi->fib_metrics)) == 0 &&
2411da177e4SLinus Torvalds 		    ((nfi->fib_flags^fi->fib_flags)&~RTNH_F_DEAD) == 0 &&
2421da177e4SLinus Torvalds 		    (nfi->fib_nhs == 0 || nh_comp(fi, nfi) == 0))
2431da177e4SLinus Torvalds 			return fi;
2441da177e4SLinus Torvalds 	}
2451da177e4SLinus Torvalds 
2461da177e4SLinus Torvalds 	return NULL;
2471da177e4SLinus Torvalds }
2481da177e4SLinus Torvalds 
2491da177e4SLinus Torvalds /* Check, that the gateway is already configured.
2501da177e4SLinus Torvalds    Used only by redirect accept routine.
2511da177e4SLinus Torvalds  */
2521da177e4SLinus Torvalds 
253d878e72eSAl Viro int ip_fib_check_default(__be32 gw, struct net_device *dev)
2541da177e4SLinus Torvalds {
2551da177e4SLinus Torvalds 	struct hlist_head *head;
2561da177e4SLinus Torvalds 	struct hlist_node *node;
2571da177e4SLinus Torvalds 	struct fib_nh *nh;
2581da177e4SLinus Torvalds 	unsigned int hash;
2591da177e4SLinus Torvalds 
260832b4c5eSStephen Hemminger 	spin_lock(&fib_info_lock);
2611da177e4SLinus Torvalds 
2621da177e4SLinus Torvalds 	hash = fib_devindex_hashfn(dev->ifindex);
2631da177e4SLinus Torvalds 	head = &fib_info_devhash[hash];
2641da177e4SLinus Torvalds 	hlist_for_each_entry(nh, node, head, nh_hash) {
2651da177e4SLinus Torvalds 		if (nh->nh_dev == dev &&
2661da177e4SLinus Torvalds 		    nh->nh_gw == gw &&
2671da177e4SLinus Torvalds 		    !(nh->nh_flags&RTNH_F_DEAD)) {
268832b4c5eSStephen Hemminger 			spin_unlock(&fib_info_lock);
2691da177e4SLinus Torvalds 			return 0;
2701da177e4SLinus Torvalds 		}
2711da177e4SLinus Torvalds 	}
2721da177e4SLinus Torvalds 
273832b4c5eSStephen Hemminger 	spin_unlock(&fib_info_lock);
2741da177e4SLinus Torvalds 
2751da177e4SLinus Torvalds 	return -1;
2761da177e4SLinus Torvalds }
2771da177e4SLinus Torvalds 
278339bf98fSThomas Graf static inline size_t fib_nlmsg_size(struct fib_info *fi)
279339bf98fSThomas Graf {
280339bf98fSThomas Graf 	size_t payload = NLMSG_ALIGN(sizeof(struct rtmsg))
281339bf98fSThomas Graf 			 + nla_total_size(4) /* RTA_TABLE */
282339bf98fSThomas Graf 			 + nla_total_size(4) /* RTA_DST */
283339bf98fSThomas Graf 			 + nla_total_size(4) /* RTA_PRIORITY */
284339bf98fSThomas Graf 			 + nla_total_size(4); /* RTA_PREFSRC */
285339bf98fSThomas Graf 
286339bf98fSThomas Graf 	/* space for nested metrics */
287339bf98fSThomas Graf 	payload += nla_total_size((RTAX_MAX * nla_total_size(4)));
288339bf98fSThomas Graf 
289339bf98fSThomas Graf 	if (fi->fib_nhs) {
290339bf98fSThomas Graf 		/* Also handles the special case fib_nhs == 1 */
291339bf98fSThomas Graf 
292339bf98fSThomas Graf 		/* each nexthop is packed in an attribute */
293339bf98fSThomas Graf 		size_t nhsize = nla_total_size(sizeof(struct rtnexthop));
294339bf98fSThomas Graf 
295339bf98fSThomas Graf 		/* may contain flow and gateway attribute */
296339bf98fSThomas Graf 		nhsize += 2 * nla_total_size(4);
297339bf98fSThomas Graf 
298339bf98fSThomas Graf 		/* all nexthops are packed in a nested attribute */
299339bf98fSThomas Graf 		payload += nla_total_size(fi->fib_nhs * nhsize);
300339bf98fSThomas Graf 	}
301339bf98fSThomas Graf 
302339bf98fSThomas Graf 	return payload;
303339bf98fSThomas Graf }
304339bf98fSThomas Graf 
30581f7bf6cSAl Viro void rtmsg_fib(int event, __be32 key, struct fib_alias *fa,
306b8f55831SMilan Kocian 	       int dst_len, u32 tb_id, struct nl_info *info,
307b8f55831SMilan Kocian 	       unsigned int nlm_flags)
3081da177e4SLinus Torvalds {
3091da177e4SLinus Torvalds 	struct sk_buff *skb;
3104e902c57SThomas Graf 	u32 seq = info->nlh ? info->nlh->nlmsg_seq : 0;
311f21c7bc5SThomas Graf 	int err = -ENOBUFS;
3121da177e4SLinus Torvalds 
313339bf98fSThomas Graf 	skb = nlmsg_new(fib_nlmsg_size(fa->fa_info), GFP_KERNEL);
314f21c7bc5SThomas Graf 	if (skb == NULL)
315f21c7bc5SThomas Graf 		goto errout;
3161da177e4SLinus Torvalds 
3174e902c57SThomas Graf 	err = fib_dump_info(skb, info->pid, seq, event, tb_id,
318be403ea1SThomas Graf 			    fa->fa_type, fa->fa_scope, key, dst_len,
319b8f55831SMilan Kocian 			    fa->fa_tos, fa->fa_info, nlm_flags);
32026932566SPatrick McHardy 	if (err < 0) {
32126932566SPatrick McHardy 		/* -EMSGSIZE implies BUG in fib_nlmsg_size() */
32226932566SPatrick McHardy 		WARN_ON(err == -EMSGSIZE);
32326932566SPatrick McHardy 		kfree_skb(skb);
32426932566SPatrick McHardy 		goto errout;
32526932566SPatrick McHardy 	}
3264d1169c1SDenis V. Lunev 	err = rtnl_notify(skb, info->nl_net, info->pid, RTNLGRP_IPV4_ROUTE,
3274e902c57SThomas Graf 			  info->nlh, GFP_KERNEL);
328f21c7bc5SThomas Graf errout:
329f21c7bc5SThomas Graf 	if (err < 0)
3304d1169c1SDenis V. Lunev 		rtnl_set_sk_err(info->nl_net, RTNLGRP_IPV4_ROUTE, err);
3311da177e4SLinus Torvalds }
3321da177e4SLinus Torvalds 
3331da177e4SLinus Torvalds /* Return the first fib alias matching TOS with
3341da177e4SLinus Torvalds  * priority less than or equal to PRIO.
3351da177e4SLinus Torvalds  */
3361da177e4SLinus Torvalds struct fib_alias *fib_find_alias(struct list_head *fah, u8 tos, u32 prio)
3371da177e4SLinus Torvalds {
3381da177e4SLinus Torvalds 	if (fah) {
3391da177e4SLinus Torvalds 		struct fib_alias *fa;
3401da177e4SLinus Torvalds 		list_for_each_entry(fa, fah, fa_list) {
3411da177e4SLinus Torvalds 			if (fa->fa_tos > tos)
3421da177e4SLinus Torvalds 				continue;
3431da177e4SLinus Torvalds 			if (fa->fa_info->fib_priority >= prio ||
3441da177e4SLinus Torvalds 			    fa->fa_tos < tos)
3451da177e4SLinus Torvalds 				return fa;
3461da177e4SLinus Torvalds 		}
3471da177e4SLinus Torvalds 	}
3481da177e4SLinus Torvalds 	return NULL;
3491da177e4SLinus Torvalds }
3501da177e4SLinus Torvalds 
3511da177e4SLinus Torvalds int fib_detect_death(struct fib_info *fi, int order,
352c17860a0SDenis V. Lunev 		     struct fib_info **last_resort, int *last_idx, int dflt)
3531da177e4SLinus Torvalds {
3541da177e4SLinus Torvalds 	struct neighbour *n;
3551da177e4SLinus Torvalds 	int state = NUD_NONE;
3561da177e4SLinus Torvalds 
3571da177e4SLinus Torvalds 	n = neigh_lookup(&arp_tbl, &fi->fib_nh[0].nh_gw, fi->fib_dev);
3581da177e4SLinus Torvalds 	if (n) {
3591da177e4SLinus Torvalds 		state = n->nud_state;
3601da177e4SLinus Torvalds 		neigh_release(n);
3611da177e4SLinus Torvalds 	}
3621da177e4SLinus Torvalds 	if (state==NUD_REACHABLE)
3631da177e4SLinus Torvalds 		return 0;
364c17860a0SDenis V. Lunev 	if ((state&NUD_VALID) && order != dflt)
3651da177e4SLinus Torvalds 		return 0;
3661da177e4SLinus Torvalds 	if ((state&NUD_VALID) ||
367c17860a0SDenis V. Lunev 	    (*last_idx<0 && order > dflt)) {
3681da177e4SLinus Torvalds 		*last_resort = fi;
3691da177e4SLinus Torvalds 		*last_idx = order;
3701da177e4SLinus Torvalds 	}
3711da177e4SLinus Torvalds 	return 1;
3721da177e4SLinus Torvalds }
3731da177e4SLinus Torvalds 
3741da177e4SLinus Torvalds #ifdef CONFIG_IP_ROUTE_MULTIPATH
3751da177e4SLinus Torvalds 
3764e902c57SThomas Graf static int fib_count_nexthops(struct rtnexthop *rtnh, int remaining)
3771da177e4SLinus Torvalds {
3781da177e4SLinus Torvalds 	int nhs = 0;
3791da177e4SLinus Torvalds 
3804e902c57SThomas Graf 	while (rtnh_ok(rtnh, remaining)) {
3811da177e4SLinus Torvalds 		nhs++;
3824e902c57SThomas Graf 		rtnh = rtnh_next(rtnh, &remaining);
3831da177e4SLinus Torvalds 	}
3841da177e4SLinus Torvalds 
3854e902c57SThomas Graf 	/* leftover implies invalid nexthop configuration, discard it */
3864e902c57SThomas Graf 	return remaining > 0 ? 0 : nhs;
3874e902c57SThomas Graf }
3881da177e4SLinus Torvalds 
3894e902c57SThomas Graf static int fib_get_nhs(struct fib_info *fi, struct rtnexthop *rtnh,
3904e902c57SThomas Graf 		       int remaining, struct fib_config *cfg)
3914e902c57SThomas Graf {
3921da177e4SLinus Torvalds 	change_nexthops(fi) {
3934e902c57SThomas Graf 		int attrlen;
3944e902c57SThomas Graf 
3954e902c57SThomas Graf 		if (!rtnh_ok(rtnh, remaining))
3961da177e4SLinus Torvalds 			return -EINVAL;
3974e902c57SThomas Graf 
3984e902c57SThomas Graf 		nh->nh_flags = (cfg->fc_flags & ~0xFF) | rtnh->rtnh_flags;
3994e902c57SThomas Graf 		nh->nh_oif = rtnh->rtnh_ifindex;
4004e902c57SThomas Graf 		nh->nh_weight = rtnh->rtnh_hops + 1;
4014e902c57SThomas Graf 
4024e902c57SThomas Graf 		attrlen = rtnh_attrlen(rtnh);
4034e902c57SThomas Graf 		if (attrlen > 0) {
4044e902c57SThomas Graf 			struct nlattr *nla, *attrs = rtnh_attrs(rtnh);
4054e902c57SThomas Graf 
4064e902c57SThomas Graf 			nla = nla_find(attrs, attrlen, RTA_GATEWAY);
40717fb2c64SAl Viro 			nh->nh_gw = nla ? nla_get_be32(nla) : 0;
4081da177e4SLinus Torvalds #ifdef CONFIG_NET_CLS_ROUTE
4094e902c57SThomas Graf 			nla = nla_find(attrs, attrlen, RTA_FLOW);
4104e902c57SThomas Graf 			nh->nh_tclassid = nla ? nla_get_u32(nla) : 0;
4111da177e4SLinus Torvalds #endif
4121da177e4SLinus Torvalds 		}
4134e902c57SThomas Graf 
4144e902c57SThomas Graf 		rtnh = rtnh_next(rtnh, &remaining);
4151da177e4SLinus Torvalds 	} endfor_nexthops(fi);
4164e902c57SThomas Graf 
4171da177e4SLinus Torvalds 	return 0;
4181da177e4SLinus Torvalds }
4191da177e4SLinus Torvalds 
4201da177e4SLinus Torvalds #endif
4211da177e4SLinus Torvalds 
4224e902c57SThomas Graf int fib_nh_match(struct fib_config *cfg, struct fib_info *fi)
4231da177e4SLinus Torvalds {
4241da177e4SLinus Torvalds #ifdef CONFIG_IP_ROUTE_MULTIPATH
4254e902c57SThomas Graf 	struct rtnexthop *rtnh;
4264e902c57SThomas Graf 	int remaining;
4271da177e4SLinus Torvalds #endif
4281da177e4SLinus Torvalds 
4294e902c57SThomas Graf 	if (cfg->fc_priority && cfg->fc_priority != fi->fib_priority)
4301da177e4SLinus Torvalds 		return 1;
4311da177e4SLinus Torvalds 
4324e902c57SThomas Graf 	if (cfg->fc_oif || cfg->fc_gw) {
4334e902c57SThomas Graf 		if ((!cfg->fc_oif || cfg->fc_oif == fi->fib_nh->nh_oif) &&
4344e902c57SThomas Graf 		    (!cfg->fc_gw  || cfg->fc_gw == fi->fib_nh->nh_gw))
4351da177e4SLinus Torvalds 			return 0;
4361da177e4SLinus Torvalds 		return 1;
4371da177e4SLinus Torvalds 	}
4381da177e4SLinus Torvalds 
4391da177e4SLinus Torvalds #ifdef CONFIG_IP_ROUTE_MULTIPATH
4404e902c57SThomas Graf 	if (cfg->fc_mp == NULL)
4411da177e4SLinus Torvalds 		return 0;
4424e902c57SThomas Graf 
4434e902c57SThomas Graf 	rtnh = cfg->fc_mp;
4444e902c57SThomas Graf 	remaining = cfg->fc_mp_len;
4451da177e4SLinus Torvalds 
4461da177e4SLinus Torvalds 	for_nexthops(fi) {
4474e902c57SThomas Graf 		int attrlen;
4481da177e4SLinus Torvalds 
4494e902c57SThomas Graf 		if (!rtnh_ok(rtnh, remaining))
4501da177e4SLinus Torvalds 			return -EINVAL;
4514e902c57SThomas Graf 
4524e902c57SThomas Graf 		if (rtnh->rtnh_ifindex && rtnh->rtnh_ifindex != nh->nh_oif)
4531da177e4SLinus Torvalds 			return 1;
4544e902c57SThomas Graf 
4554e902c57SThomas Graf 		attrlen = rtnh_attrlen(rtnh);
4564e902c57SThomas Graf 		if (attrlen < 0) {
4574e902c57SThomas Graf 			struct nlattr *nla, *attrs = rtnh_attrs(rtnh);
4584e902c57SThomas Graf 
4594e902c57SThomas Graf 			nla = nla_find(attrs, attrlen, RTA_GATEWAY);
46017fb2c64SAl Viro 			if (nla && nla_get_be32(nla) != nh->nh_gw)
4611da177e4SLinus Torvalds 				return 1;
4621da177e4SLinus Torvalds #ifdef CONFIG_NET_CLS_ROUTE
4634e902c57SThomas Graf 			nla = nla_find(attrs, attrlen, RTA_FLOW);
4644e902c57SThomas Graf 			if (nla && nla_get_u32(nla) != nh->nh_tclassid)
4651da177e4SLinus Torvalds 				return 1;
4661da177e4SLinus Torvalds #endif
4671da177e4SLinus Torvalds 		}
4684e902c57SThomas Graf 
4694e902c57SThomas Graf 		rtnh = rtnh_next(rtnh, &remaining);
4701da177e4SLinus Torvalds 	} endfor_nexthops(fi);
4711da177e4SLinus Torvalds #endif
4721da177e4SLinus Torvalds 	return 0;
4731da177e4SLinus Torvalds }
4741da177e4SLinus Torvalds 
4751da177e4SLinus Torvalds 
4761da177e4SLinus Torvalds /*
4771da177e4SLinus Torvalds    Picture
4781da177e4SLinus Torvalds    -------
4791da177e4SLinus Torvalds 
4801da177e4SLinus Torvalds    Semantics of nexthop is very messy by historical reasons.
4811da177e4SLinus Torvalds    We have to take into account, that:
4821da177e4SLinus Torvalds    a) gateway can be actually local interface address,
4831da177e4SLinus Torvalds       so that gatewayed route is direct.
4841da177e4SLinus Torvalds    b) gateway must be on-link address, possibly
4851da177e4SLinus Torvalds       described not by an ifaddr, but also by a direct route.
4861da177e4SLinus Torvalds    c) If both gateway and interface are specified, they should not
4871da177e4SLinus Torvalds       contradict.
4881da177e4SLinus Torvalds    d) If we use tunnel routes, gateway could be not on-link.
4891da177e4SLinus Torvalds 
4901da177e4SLinus Torvalds    Attempt to reconcile all of these (alas, self-contradictory) conditions
4911da177e4SLinus Torvalds    results in pretty ugly and hairy code with obscure logic.
4921da177e4SLinus Torvalds 
4931da177e4SLinus Torvalds    I chose to generalized it instead, so that the size
4941da177e4SLinus Torvalds    of code does not increase practically, but it becomes
4951da177e4SLinus Torvalds    much more general.
4961da177e4SLinus Torvalds    Every prefix is assigned a "scope" value: "host" is local address,
4971da177e4SLinus Torvalds    "link" is direct route,
4981da177e4SLinus Torvalds    [ ... "site" ... "interior" ... ]
4991da177e4SLinus Torvalds    and "universe" is true gateway route with global meaning.
5001da177e4SLinus Torvalds 
5011da177e4SLinus Torvalds    Every prefix refers to a set of "nexthop"s (gw, oif),
5021da177e4SLinus Torvalds    where gw must have narrower scope. This recursion stops
5031da177e4SLinus Torvalds    when gw has LOCAL scope or if "nexthop" is declared ONLINK,
5041da177e4SLinus Torvalds    which means that gw is forced to be on link.
5051da177e4SLinus Torvalds 
5061da177e4SLinus Torvalds    Code is still hairy, but now it is apparently logically
5071da177e4SLinus Torvalds    consistent and very flexible. F.e. as by-product it allows
5081da177e4SLinus Torvalds    to co-exists in peace independent exterior and interior
5091da177e4SLinus Torvalds    routing processes.
5101da177e4SLinus Torvalds 
5111da177e4SLinus Torvalds    Normally it looks as following.
5121da177e4SLinus Torvalds 
5131da177e4SLinus Torvalds    {universe prefix}  -> (gw, oif) [scope link]
5141da177e4SLinus Torvalds 			  |
5151da177e4SLinus Torvalds 			  |-> {link prefix} -> (gw, oif) [scope local]
5161da177e4SLinus Torvalds 						|
5171da177e4SLinus Torvalds 						|-> {local prefix} (terminal node)
5181da177e4SLinus Torvalds  */
5191da177e4SLinus Torvalds 
5204e902c57SThomas Graf static int fib_check_nh(struct fib_config *cfg, struct fib_info *fi,
5214e902c57SThomas Graf 			struct fib_nh *nh)
5221da177e4SLinus Torvalds {
5231da177e4SLinus Torvalds 	int err;
52486167a37SDenis V. Lunev 	struct net *net;
5251da177e4SLinus Torvalds 
52686167a37SDenis V. Lunev 	net = cfg->fc_nlinfo.nl_net;
5271da177e4SLinus Torvalds 	if (nh->nh_gw) {
5281da177e4SLinus Torvalds 		struct fib_result res;
5291da177e4SLinus Torvalds 
5301da177e4SLinus Torvalds #ifdef CONFIG_IP_ROUTE_PERVASIVE
5311da177e4SLinus Torvalds 		if (nh->nh_flags&RTNH_F_PERVASIVE)
5321da177e4SLinus Torvalds 			return 0;
5331da177e4SLinus Torvalds #endif
5341da177e4SLinus Torvalds 		if (nh->nh_flags&RTNH_F_ONLINK) {
5351da177e4SLinus Torvalds 			struct net_device *dev;
5361da177e4SLinus Torvalds 
5374e902c57SThomas Graf 			if (cfg->fc_scope >= RT_SCOPE_LINK)
5381da177e4SLinus Torvalds 				return -EINVAL;
53986167a37SDenis V. Lunev 			if (inet_addr_type(net, nh->nh_gw) != RTN_UNICAST)
5401da177e4SLinus Torvalds 				return -EINVAL;
54186167a37SDenis V. Lunev 			if ((dev = __dev_get_by_index(net, nh->nh_oif)) == NULL)
5421da177e4SLinus Torvalds 				return -ENODEV;
5431da177e4SLinus Torvalds 			if (!(dev->flags&IFF_UP))
5441da177e4SLinus Torvalds 				return -ENETDOWN;
5451da177e4SLinus Torvalds 			nh->nh_dev = dev;
5461da177e4SLinus Torvalds 			dev_hold(dev);
5471da177e4SLinus Torvalds 			nh->nh_scope = RT_SCOPE_LINK;
5481da177e4SLinus Torvalds 			return 0;
5491da177e4SLinus Torvalds 		}
5501da177e4SLinus Torvalds 		{
5514e902c57SThomas Graf 			struct flowi fl = {
5524e902c57SThomas Graf 				.nl_u = {
5534e902c57SThomas Graf 					.ip4_u = {
5544e902c57SThomas Graf 						.daddr = nh->nh_gw,
5554e902c57SThomas Graf 						.scope = cfg->fc_scope + 1,
5564e902c57SThomas Graf 					},
5574e902c57SThomas Graf 				},
5584e902c57SThomas Graf 				.oif = nh->nh_oif,
5594e902c57SThomas Graf 			};
5601da177e4SLinus Torvalds 
5611da177e4SLinus Torvalds 			/* It is not necessary, but requires a bit of thinking */
5621da177e4SLinus Torvalds 			if (fl.fl4_scope < RT_SCOPE_LINK)
5631da177e4SLinus Torvalds 				fl.fl4_scope = RT_SCOPE_LINK;
56486167a37SDenis V. Lunev 			if ((err = fib_lookup(net, &fl, &res)) != 0)
5651da177e4SLinus Torvalds 				return err;
5661da177e4SLinus Torvalds 		}
5671da177e4SLinus Torvalds 		err = -EINVAL;
5681da177e4SLinus Torvalds 		if (res.type != RTN_UNICAST && res.type != RTN_LOCAL)
5691da177e4SLinus Torvalds 			goto out;
5701da177e4SLinus Torvalds 		nh->nh_scope = res.scope;
5711da177e4SLinus Torvalds 		nh->nh_oif = FIB_RES_OIF(res);
5721da177e4SLinus Torvalds 		if ((nh->nh_dev = FIB_RES_DEV(res)) == NULL)
5731da177e4SLinus Torvalds 			goto out;
5741da177e4SLinus Torvalds 		dev_hold(nh->nh_dev);
5751da177e4SLinus Torvalds 		err = -ENETDOWN;
5761da177e4SLinus Torvalds 		if (!(nh->nh_dev->flags & IFF_UP))
5771da177e4SLinus Torvalds 			goto out;
5781da177e4SLinus Torvalds 		err = 0;
5791da177e4SLinus Torvalds out:
5801da177e4SLinus Torvalds 		fib_res_put(&res);
5811da177e4SLinus Torvalds 		return err;
5821da177e4SLinus Torvalds 	} else {
5831da177e4SLinus Torvalds 		struct in_device *in_dev;
5841da177e4SLinus Torvalds 
5851da177e4SLinus Torvalds 		if (nh->nh_flags&(RTNH_F_PERVASIVE|RTNH_F_ONLINK))
5861da177e4SLinus Torvalds 			return -EINVAL;
5871da177e4SLinus Torvalds 
58886167a37SDenis V. Lunev 		in_dev = inetdev_by_index(net, nh->nh_oif);
5891da177e4SLinus Torvalds 		if (in_dev == NULL)
5901da177e4SLinus Torvalds 			return -ENODEV;
5911da177e4SLinus Torvalds 		if (!(in_dev->dev->flags&IFF_UP)) {
5921da177e4SLinus Torvalds 			in_dev_put(in_dev);
5931da177e4SLinus Torvalds 			return -ENETDOWN;
5941da177e4SLinus Torvalds 		}
5951da177e4SLinus Torvalds 		nh->nh_dev = in_dev->dev;
5961da177e4SLinus Torvalds 		dev_hold(nh->nh_dev);
5971da177e4SLinus Torvalds 		nh->nh_scope = RT_SCOPE_HOST;
5981da177e4SLinus Torvalds 		in_dev_put(in_dev);
5991da177e4SLinus Torvalds 	}
6001da177e4SLinus Torvalds 	return 0;
6011da177e4SLinus Torvalds }
6021da177e4SLinus Torvalds 
60381f7bf6cSAl Viro static inline unsigned int fib_laddr_hashfn(__be32 val)
6041da177e4SLinus Torvalds {
6051da177e4SLinus Torvalds 	unsigned int mask = (fib_hash_size - 1);
6061da177e4SLinus Torvalds 
60781f7bf6cSAl Viro 	return ((__force u32)val ^ ((__force u32)val >> 7) ^ ((__force u32)val >> 14)) & mask;
6081da177e4SLinus Torvalds }
6091da177e4SLinus Torvalds 
6101da177e4SLinus Torvalds static struct hlist_head *fib_hash_alloc(int bytes)
6111da177e4SLinus Torvalds {
6121da177e4SLinus Torvalds 	if (bytes <= PAGE_SIZE)
61388f83491SJoonwoo Park 		return kzalloc(bytes, GFP_KERNEL);
6141da177e4SLinus Torvalds 	else
6151da177e4SLinus Torvalds 		return (struct hlist_head *)
61688f83491SJoonwoo Park 			__get_free_pages(GFP_KERNEL | __GFP_ZERO, get_order(bytes));
6171da177e4SLinus Torvalds }
6181da177e4SLinus Torvalds 
6191da177e4SLinus Torvalds static void fib_hash_free(struct hlist_head *hash, int bytes)
6201da177e4SLinus Torvalds {
6211da177e4SLinus Torvalds 	if (!hash)
6221da177e4SLinus Torvalds 		return;
6231da177e4SLinus Torvalds 
6241da177e4SLinus Torvalds 	if (bytes <= PAGE_SIZE)
6251da177e4SLinus Torvalds 		kfree(hash);
6261da177e4SLinus Torvalds 	else
6271da177e4SLinus Torvalds 		free_pages((unsigned long) hash, get_order(bytes));
6281da177e4SLinus Torvalds }
6291da177e4SLinus Torvalds 
6301da177e4SLinus Torvalds static void fib_hash_move(struct hlist_head *new_info_hash,
6311da177e4SLinus Torvalds 			  struct hlist_head *new_laddrhash,
6321da177e4SLinus Torvalds 			  unsigned int new_size)
6331da177e4SLinus Torvalds {
634b7656e7fSDavid S. Miller 	struct hlist_head *old_info_hash, *old_laddrhash;
6351da177e4SLinus Torvalds 	unsigned int old_size = fib_hash_size;
636b7656e7fSDavid S. Miller 	unsigned int i, bytes;
6371da177e4SLinus Torvalds 
638832b4c5eSStephen Hemminger 	spin_lock_bh(&fib_info_lock);
639b7656e7fSDavid S. Miller 	old_info_hash = fib_info_hash;
640b7656e7fSDavid S. Miller 	old_laddrhash = fib_info_laddrhash;
6411da177e4SLinus Torvalds 	fib_hash_size = new_size;
6421da177e4SLinus Torvalds 
6431da177e4SLinus Torvalds 	for (i = 0; i < old_size; i++) {
6441da177e4SLinus Torvalds 		struct hlist_head *head = &fib_info_hash[i];
6451da177e4SLinus Torvalds 		struct hlist_node *node, *n;
6461da177e4SLinus Torvalds 		struct fib_info *fi;
6471da177e4SLinus Torvalds 
6481da177e4SLinus Torvalds 		hlist_for_each_entry_safe(fi, node, n, head, fib_hash) {
6491da177e4SLinus Torvalds 			struct hlist_head *dest;
6501da177e4SLinus Torvalds 			unsigned int new_hash;
6511da177e4SLinus Torvalds 
6521da177e4SLinus Torvalds 			hlist_del(&fi->fib_hash);
6531da177e4SLinus Torvalds 
6541da177e4SLinus Torvalds 			new_hash = fib_info_hashfn(fi);
6551da177e4SLinus Torvalds 			dest = &new_info_hash[new_hash];
6561da177e4SLinus Torvalds 			hlist_add_head(&fi->fib_hash, dest);
6571da177e4SLinus Torvalds 		}
6581da177e4SLinus Torvalds 	}
6591da177e4SLinus Torvalds 	fib_info_hash = new_info_hash;
6601da177e4SLinus Torvalds 
6611da177e4SLinus Torvalds 	for (i = 0; i < old_size; i++) {
6621da177e4SLinus Torvalds 		struct hlist_head *lhead = &fib_info_laddrhash[i];
6631da177e4SLinus Torvalds 		struct hlist_node *node, *n;
6641da177e4SLinus Torvalds 		struct fib_info *fi;
6651da177e4SLinus Torvalds 
6661da177e4SLinus Torvalds 		hlist_for_each_entry_safe(fi, node, n, lhead, fib_lhash) {
6671da177e4SLinus Torvalds 			struct hlist_head *ldest;
6681da177e4SLinus Torvalds 			unsigned int new_hash;
6691da177e4SLinus Torvalds 
6701da177e4SLinus Torvalds 			hlist_del(&fi->fib_lhash);
6711da177e4SLinus Torvalds 
6721da177e4SLinus Torvalds 			new_hash = fib_laddr_hashfn(fi->fib_prefsrc);
6731da177e4SLinus Torvalds 			ldest = &new_laddrhash[new_hash];
6741da177e4SLinus Torvalds 			hlist_add_head(&fi->fib_lhash, ldest);
6751da177e4SLinus Torvalds 		}
6761da177e4SLinus Torvalds 	}
6771da177e4SLinus Torvalds 	fib_info_laddrhash = new_laddrhash;
6781da177e4SLinus Torvalds 
679832b4c5eSStephen Hemminger 	spin_unlock_bh(&fib_info_lock);
680b7656e7fSDavid S. Miller 
681b7656e7fSDavid S. Miller 	bytes = old_size * sizeof(struct hlist_head *);
682b7656e7fSDavid S. Miller 	fib_hash_free(old_info_hash, bytes);
683b7656e7fSDavid S. Miller 	fib_hash_free(old_laddrhash, bytes);
6841da177e4SLinus Torvalds }
6851da177e4SLinus Torvalds 
6864e902c57SThomas Graf struct fib_info *fib_create_info(struct fib_config *cfg)
6871da177e4SLinus Torvalds {
6881da177e4SLinus Torvalds 	int err;
6891da177e4SLinus Torvalds 	struct fib_info *fi = NULL;
6901da177e4SLinus Torvalds 	struct fib_info *ofi;
6911da177e4SLinus Torvalds 	int nhs = 1;
6927462bd74SDenis V. Lunev 	struct net *net = cfg->fc_nlinfo.nl_net;
6931da177e4SLinus Torvalds 
6941da177e4SLinus Torvalds 	/* Fast check to catch the most weird cases */
6954e902c57SThomas Graf 	if (fib_props[cfg->fc_type].scope > cfg->fc_scope)
6961da177e4SLinus Torvalds 		goto err_inval;
6971da177e4SLinus Torvalds 
6981da177e4SLinus Torvalds #ifdef CONFIG_IP_ROUTE_MULTIPATH
6994e902c57SThomas Graf 	if (cfg->fc_mp) {
7004e902c57SThomas Graf 		nhs = fib_count_nexthops(cfg->fc_mp, cfg->fc_mp_len);
7011da177e4SLinus Torvalds 		if (nhs == 0)
7021da177e4SLinus Torvalds 			goto err_inval;
7031da177e4SLinus Torvalds 	}
7041da177e4SLinus Torvalds #endif
7051da177e4SLinus Torvalds 
7061da177e4SLinus Torvalds 	err = -ENOBUFS;
7071da177e4SLinus Torvalds 	if (fib_info_cnt >= fib_hash_size) {
7081da177e4SLinus Torvalds 		unsigned int new_size = fib_hash_size << 1;
7091da177e4SLinus Torvalds 		struct hlist_head *new_info_hash;
7101da177e4SLinus Torvalds 		struct hlist_head *new_laddrhash;
7111da177e4SLinus Torvalds 		unsigned int bytes;
7121da177e4SLinus Torvalds 
7131da177e4SLinus Torvalds 		if (!new_size)
7141da177e4SLinus Torvalds 			new_size = 1;
7151da177e4SLinus Torvalds 		bytes = new_size * sizeof(struct hlist_head *);
7161da177e4SLinus Torvalds 		new_info_hash = fib_hash_alloc(bytes);
7171da177e4SLinus Torvalds 		new_laddrhash = fib_hash_alloc(bytes);
7181da177e4SLinus Torvalds 		if (!new_info_hash || !new_laddrhash) {
7191da177e4SLinus Torvalds 			fib_hash_free(new_info_hash, bytes);
7201da177e4SLinus Torvalds 			fib_hash_free(new_laddrhash, bytes);
72188f83491SJoonwoo Park 		} else
7221da177e4SLinus Torvalds 			fib_hash_move(new_info_hash, new_laddrhash, new_size);
7231da177e4SLinus Torvalds 
7241da177e4SLinus Torvalds 		if (!fib_hash_size)
7251da177e4SLinus Torvalds 			goto failure;
7261da177e4SLinus Torvalds 	}
7271da177e4SLinus Torvalds 
7280da974f4SPanagiotis Issaris 	fi = kzalloc(sizeof(*fi)+nhs*sizeof(struct fib_nh), GFP_KERNEL);
7291da177e4SLinus Torvalds 	if (fi == NULL)
7301da177e4SLinus Torvalds 		goto failure;
7311da177e4SLinus Torvalds 	fib_info_cnt++;
7321da177e4SLinus Torvalds 
7337462bd74SDenis V. Lunev 	fi->fib_net = net;
7344e902c57SThomas Graf 	fi->fib_protocol = cfg->fc_protocol;
7354e902c57SThomas Graf 	fi->fib_flags = cfg->fc_flags;
7364e902c57SThomas Graf 	fi->fib_priority = cfg->fc_priority;
7374e902c57SThomas Graf 	fi->fib_prefsrc = cfg->fc_prefsrc;
7381da177e4SLinus Torvalds 
7391da177e4SLinus Torvalds 	fi->fib_nhs = nhs;
7401da177e4SLinus Torvalds 	change_nexthops(fi) {
7411da177e4SLinus Torvalds 		nh->nh_parent = fi;
7421da177e4SLinus Torvalds 	} endfor_nexthops(fi)
7431da177e4SLinus Torvalds 
7444e902c57SThomas Graf 	if (cfg->fc_mx) {
7454e902c57SThomas Graf 		struct nlattr *nla;
7464e902c57SThomas Graf 		int remaining;
7471da177e4SLinus Torvalds 
7484e902c57SThomas Graf 		nla_for_each_attr(nla, cfg->fc_mx, cfg->fc_mx_len, remaining) {
7498f4c1f9bSThomas Graf 			int type = nla_type(nla);
7504e902c57SThomas Graf 
7514e902c57SThomas Graf 			if (type) {
7524e902c57SThomas Graf 				if (type > RTAX_MAX)
7531da177e4SLinus Torvalds 					goto err_inval;
7544e902c57SThomas Graf 				fi->fib_metrics[type - 1] = nla_get_u32(nla);
7551da177e4SLinus Torvalds 			}
7561da177e4SLinus Torvalds 		}
7574e902c57SThomas Graf 	}
7581da177e4SLinus Torvalds 
7594e902c57SThomas Graf 	if (cfg->fc_mp) {
7601da177e4SLinus Torvalds #ifdef CONFIG_IP_ROUTE_MULTIPATH
7614e902c57SThomas Graf 		err = fib_get_nhs(fi, cfg->fc_mp, cfg->fc_mp_len, cfg);
7624e902c57SThomas Graf 		if (err != 0)
7631da177e4SLinus Torvalds 			goto failure;
7644e902c57SThomas Graf 		if (cfg->fc_oif && fi->fib_nh->nh_oif != cfg->fc_oif)
7651da177e4SLinus Torvalds 			goto err_inval;
7664e902c57SThomas Graf 		if (cfg->fc_gw && fi->fib_nh->nh_gw != cfg->fc_gw)
7671da177e4SLinus Torvalds 			goto err_inval;
7681da177e4SLinus Torvalds #ifdef CONFIG_NET_CLS_ROUTE
7694e902c57SThomas Graf 		if (cfg->fc_flow && fi->fib_nh->nh_tclassid != cfg->fc_flow)
7701da177e4SLinus Torvalds 			goto err_inval;
7711da177e4SLinus Torvalds #endif
7721da177e4SLinus Torvalds #else
7731da177e4SLinus Torvalds 		goto err_inval;
7741da177e4SLinus Torvalds #endif
7751da177e4SLinus Torvalds 	} else {
7761da177e4SLinus Torvalds 		struct fib_nh *nh = fi->fib_nh;
7774e902c57SThomas Graf 
7784e902c57SThomas Graf 		nh->nh_oif = cfg->fc_oif;
7794e902c57SThomas Graf 		nh->nh_gw = cfg->fc_gw;
7804e902c57SThomas Graf 		nh->nh_flags = cfg->fc_flags;
7811da177e4SLinus Torvalds #ifdef CONFIG_NET_CLS_ROUTE
7824e902c57SThomas Graf 		nh->nh_tclassid = cfg->fc_flow;
7831da177e4SLinus Torvalds #endif
7841da177e4SLinus Torvalds #ifdef CONFIG_IP_ROUTE_MULTIPATH
7851da177e4SLinus Torvalds 		nh->nh_weight = 1;
7861da177e4SLinus Torvalds #endif
7871da177e4SLinus Torvalds 	}
7881da177e4SLinus Torvalds 
7894e902c57SThomas Graf 	if (fib_props[cfg->fc_type].error) {
7904e902c57SThomas Graf 		if (cfg->fc_gw || cfg->fc_oif || cfg->fc_mp)
7911da177e4SLinus Torvalds 			goto err_inval;
7921da177e4SLinus Torvalds 		goto link_it;
7931da177e4SLinus Torvalds 	}
7941da177e4SLinus Torvalds 
7954e902c57SThomas Graf 	if (cfg->fc_scope > RT_SCOPE_HOST)
7961da177e4SLinus Torvalds 		goto err_inval;
7971da177e4SLinus Torvalds 
7984e902c57SThomas Graf 	if (cfg->fc_scope == RT_SCOPE_HOST) {
7991da177e4SLinus Torvalds 		struct fib_nh *nh = fi->fib_nh;
8001da177e4SLinus Torvalds 
8011da177e4SLinus Torvalds 		/* Local address is added. */
8021da177e4SLinus Torvalds 		if (nhs != 1 || nh->nh_gw)
8031da177e4SLinus Torvalds 			goto err_inval;
8041da177e4SLinus Torvalds 		nh->nh_scope = RT_SCOPE_NOWHERE;
8057462bd74SDenis V. Lunev 		nh->nh_dev = dev_get_by_index(net, fi->fib_nh->nh_oif);
8061da177e4SLinus Torvalds 		err = -ENODEV;
8071da177e4SLinus Torvalds 		if (nh->nh_dev == NULL)
8081da177e4SLinus Torvalds 			goto failure;
8091da177e4SLinus Torvalds 	} else {
8101da177e4SLinus Torvalds 		change_nexthops(fi) {
8114e902c57SThomas Graf 			if ((err = fib_check_nh(cfg, fi, nh)) != 0)
8121da177e4SLinus Torvalds 				goto failure;
8131da177e4SLinus Torvalds 		} endfor_nexthops(fi)
8141da177e4SLinus Torvalds 	}
8151da177e4SLinus Torvalds 
8161da177e4SLinus Torvalds 	if (fi->fib_prefsrc) {
8174e902c57SThomas Graf 		if (cfg->fc_type != RTN_LOCAL || !cfg->fc_dst ||
8184e902c57SThomas Graf 		    fi->fib_prefsrc != cfg->fc_dst)
8197462bd74SDenis V. Lunev 			if (inet_addr_type(net, fi->fib_prefsrc) != RTN_LOCAL)
8201da177e4SLinus Torvalds 				goto err_inval;
8211da177e4SLinus Torvalds 	}
8221da177e4SLinus Torvalds 
8231da177e4SLinus Torvalds link_it:
8241da177e4SLinus Torvalds 	if ((ofi = fib_find_info(fi)) != NULL) {
8251da177e4SLinus Torvalds 		fi->fib_dead = 1;
8261da177e4SLinus Torvalds 		free_fib_info(fi);
8271da177e4SLinus Torvalds 		ofi->fib_treeref++;
8281da177e4SLinus Torvalds 		return ofi;
8291da177e4SLinus Torvalds 	}
8301da177e4SLinus Torvalds 
8311da177e4SLinus Torvalds 	fi->fib_treeref++;
8321da177e4SLinus Torvalds 	atomic_inc(&fi->fib_clntref);
833832b4c5eSStephen Hemminger 	spin_lock_bh(&fib_info_lock);
8341da177e4SLinus Torvalds 	hlist_add_head(&fi->fib_hash,
8351da177e4SLinus Torvalds 		       &fib_info_hash[fib_info_hashfn(fi)]);
8361da177e4SLinus Torvalds 	if (fi->fib_prefsrc) {
8371da177e4SLinus Torvalds 		struct hlist_head *head;
8381da177e4SLinus Torvalds 
8391da177e4SLinus Torvalds 		head = &fib_info_laddrhash[fib_laddr_hashfn(fi->fib_prefsrc)];
8401da177e4SLinus Torvalds 		hlist_add_head(&fi->fib_lhash, head);
8411da177e4SLinus Torvalds 	}
8421da177e4SLinus Torvalds 	change_nexthops(fi) {
8431da177e4SLinus Torvalds 		struct hlist_head *head;
8441da177e4SLinus Torvalds 		unsigned int hash;
8451da177e4SLinus Torvalds 
8461da177e4SLinus Torvalds 		if (!nh->nh_dev)
8471da177e4SLinus Torvalds 			continue;
8481da177e4SLinus Torvalds 		hash = fib_devindex_hashfn(nh->nh_dev->ifindex);
8491da177e4SLinus Torvalds 		head = &fib_info_devhash[hash];
8501da177e4SLinus Torvalds 		hlist_add_head(&nh->nh_hash, head);
8511da177e4SLinus Torvalds 	} endfor_nexthops(fi)
852832b4c5eSStephen Hemminger 	spin_unlock_bh(&fib_info_lock);
8531da177e4SLinus Torvalds 	return fi;
8541da177e4SLinus Torvalds 
8551da177e4SLinus Torvalds err_inval:
8561da177e4SLinus Torvalds 	err = -EINVAL;
8571da177e4SLinus Torvalds 
8581da177e4SLinus Torvalds failure:
8591da177e4SLinus Torvalds 	if (fi) {
8601da177e4SLinus Torvalds 		fi->fib_dead = 1;
8611da177e4SLinus Torvalds 		free_fib_info(fi);
8621da177e4SLinus Torvalds 	}
8634e902c57SThomas Graf 
8644e902c57SThomas Graf 	return ERR_PTR(err);
8651da177e4SLinus Torvalds }
8661da177e4SLinus Torvalds 
867e5b43760SRobert Olsson /* Note! fib_semantic_match intentionally uses  RCU list functions. */
8681da177e4SLinus Torvalds int fib_semantic_match(struct list_head *head, const struct flowi *flp,
8691ef1b8c8SAl Viro 		       struct fib_result *res, __be32 zone, __be32 mask,
8701da177e4SLinus Torvalds 			int prefixlen)
8711da177e4SLinus Torvalds {
8721da177e4SLinus Torvalds 	struct fib_alias *fa;
8731da177e4SLinus Torvalds 	int nh_sel = 0;
8741da177e4SLinus Torvalds 
875e5b43760SRobert Olsson 	list_for_each_entry_rcu(fa, head, fa_list) {
8761da177e4SLinus Torvalds 		int err;
8771da177e4SLinus Torvalds 
8781da177e4SLinus Torvalds 		if (fa->fa_tos &&
8791da177e4SLinus Torvalds 		    fa->fa_tos != flp->fl4_tos)
8801da177e4SLinus Torvalds 			continue;
8811da177e4SLinus Torvalds 
8821da177e4SLinus Torvalds 		if (fa->fa_scope < flp->fl4_scope)
8831da177e4SLinus Torvalds 			continue;
8841da177e4SLinus Torvalds 
8851da177e4SLinus Torvalds 		fa->fa_state |= FA_S_ACCESSED;
8861da177e4SLinus Torvalds 
8871da177e4SLinus Torvalds 		err = fib_props[fa->fa_type].error;
8881da177e4SLinus Torvalds 		if (err == 0) {
8891da177e4SLinus Torvalds 			struct fib_info *fi = fa->fa_info;
8901da177e4SLinus Torvalds 
8911da177e4SLinus Torvalds 			if (fi->fib_flags & RTNH_F_DEAD)
8921da177e4SLinus Torvalds 				continue;
8931da177e4SLinus Torvalds 
8941da177e4SLinus Torvalds 			switch (fa->fa_type) {
8951da177e4SLinus Torvalds 			case RTN_UNICAST:
8961da177e4SLinus Torvalds 			case RTN_LOCAL:
8971da177e4SLinus Torvalds 			case RTN_BROADCAST:
8981da177e4SLinus Torvalds 			case RTN_ANYCAST:
8991da177e4SLinus Torvalds 			case RTN_MULTICAST:
9001da177e4SLinus Torvalds 				for_nexthops(fi) {
9011da177e4SLinus Torvalds 					if (nh->nh_flags&RTNH_F_DEAD)
9021da177e4SLinus Torvalds 						continue;
9031da177e4SLinus Torvalds 					if (!flp->oif || flp->oif == nh->nh_oif)
9041da177e4SLinus Torvalds 						break;
9051da177e4SLinus Torvalds 				}
9061da177e4SLinus Torvalds #ifdef CONFIG_IP_ROUTE_MULTIPATH
9071da177e4SLinus Torvalds 				if (nhsel < fi->fib_nhs) {
9081da177e4SLinus Torvalds 					nh_sel = nhsel;
9091da177e4SLinus Torvalds 					goto out_fill_res;
9101da177e4SLinus Torvalds 				}
9111da177e4SLinus Torvalds #else
9121da177e4SLinus Torvalds 				if (nhsel < 1) {
9131da177e4SLinus Torvalds 					goto out_fill_res;
9141da177e4SLinus Torvalds 				}
9151da177e4SLinus Torvalds #endif
9161da177e4SLinus Torvalds 				endfor_nexthops(fi);
9171da177e4SLinus Torvalds 				continue;
9181da177e4SLinus Torvalds 
9191da177e4SLinus Torvalds 			default:
920a6db9010SStephen Hemminger 				printk(KERN_WARNING "fib_semantic_match bad type %#x\n",
921a6db9010SStephen Hemminger 					fa->fa_type);
9221da177e4SLinus Torvalds 				return -EINVAL;
9233ff50b79SStephen Hemminger 			}
9241da177e4SLinus Torvalds 		}
9251da177e4SLinus Torvalds 		return err;
9261da177e4SLinus Torvalds 	}
9271da177e4SLinus Torvalds 	return 1;
9281da177e4SLinus Torvalds 
9291da177e4SLinus Torvalds out_fill_res:
9301da177e4SLinus Torvalds 	res->prefixlen = prefixlen;
9311da177e4SLinus Torvalds 	res->nh_sel = nh_sel;
9321da177e4SLinus Torvalds 	res->type = fa->fa_type;
9331da177e4SLinus Torvalds 	res->scope = fa->fa_scope;
9341da177e4SLinus Torvalds 	res->fi = fa->fa_info;
9351da177e4SLinus Torvalds 	atomic_inc(&res->fi->fib_clntref);
9361da177e4SLinus Torvalds 	return 0;
9371da177e4SLinus Torvalds }
9381da177e4SLinus Torvalds 
9391da177e4SLinus Torvalds /* Find appropriate source address to this destination */
9401da177e4SLinus Torvalds 
941b83738aeSAl Viro __be32 __fib_res_prefsrc(struct fib_result *res)
9421da177e4SLinus Torvalds {
9431da177e4SLinus Torvalds 	return inet_select_addr(FIB_RES_DEV(*res), FIB_RES_GW(*res), res->scope);
9441da177e4SLinus Torvalds }
9451da177e4SLinus Torvalds 
946be403ea1SThomas Graf int fib_dump_info(struct sk_buff *skb, u32 pid, u32 seq, int event,
94781f7bf6cSAl Viro 		  u32 tb_id, u8 type, u8 scope, __be32 dst, int dst_len, u8 tos,
948b6544c0bSJamal Hadi Salim 		  struct fib_info *fi, unsigned int flags)
9491da177e4SLinus Torvalds {
9501da177e4SLinus Torvalds 	struct nlmsghdr *nlh;
951be403ea1SThomas Graf 	struct rtmsg *rtm;
9521da177e4SLinus Torvalds 
953be403ea1SThomas Graf 	nlh = nlmsg_put(skb, pid, seq, event, sizeof(*rtm), flags);
954be403ea1SThomas Graf 	if (nlh == NULL)
95526932566SPatrick McHardy 		return -EMSGSIZE;
956be403ea1SThomas Graf 
957be403ea1SThomas Graf 	rtm = nlmsg_data(nlh);
9581da177e4SLinus Torvalds 	rtm->rtm_family = AF_INET;
9591da177e4SLinus Torvalds 	rtm->rtm_dst_len = dst_len;
9601da177e4SLinus Torvalds 	rtm->rtm_src_len = 0;
9611da177e4SLinus Torvalds 	rtm->rtm_tos = tos;
9621da177e4SLinus Torvalds 	rtm->rtm_table = tb_id;
963be403ea1SThomas Graf 	NLA_PUT_U32(skb, RTA_TABLE, tb_id);
9641da177e4SLinus Torvalds 	rtm->rtm_type = type;
9651da177e4SLinus Torvalds 	rtm->rtm_flags = fi->fib_flags;
9661da177e4SLinus Torvalds 	rtm->rtm_scope = scope;
9671da177e4SLinus Torvalds 	rtm->rtm_protocol = fi->fib_protocol;
968be403ea1SThomas Graf 
969be403ea1SThomas Graf 	if (rtm->rtm_dst_len)
97017fb2c64SAl Viro 		NLA_PUT_BE32(skb, RTA_DST, dst);
971be403ea1SThomas Graf 
9721da177e4SLinus Torvalds 	if (fi->fib_priority)
973be403ea1SThomas Graf 		NLA_PUT_U32(skb, RTA_PRIORITY, fi->fib_priority);
974be403ea1SThomas Graf 
9751da177e4SLinus Torvalds 	if (rtnetlink_put_metrics(skb, fi->fib_metrics) < 0)
976be403ea1SThomas Graf 		goto nla_put_failure;
977be403ea1SThomas Graf 
9781da177e4SLinus Torvalds 	if (fi->fib_prefsrc)
97917fb2c64SAl Viro 		NLA_PUT_BE32(skb, RTA_PREFSRC, fi->fib_prefsrc);
980be403ea1SThomas Graf 
9811da177e4SLinus Torvalds 	if (fi->fib_nhs == 1) {
9821da177e4SLinus Torvalds 		if (fi->fib_nh->nh_gw)
98317fb2c64SAl Viro 			NLA_PUT_BE32(skb, RTA_GATEWAY, fi->fib_nh->nh_gw);
984be403ea1SThomas Graf 
9851da177e4SLinus Torvalds 		if (fi->fib_nh->nh_oif)
986be403ea1SThomas Graf 			NLA_PUT_U32(skb, RTA_OIF, fi->fib_nh->nh_oif);
9878265abc0SPatrick McHardy #ifdef CONFIG_NET_CLS_ROUTE
9888265abc0SPatrick McHardy 		if (fi->fib_nh[0].nh_tclassid)
989be403ea1SThomas Graf 			NLA_PUT_U32(skb, RTA_FLOW, fi->fib_nh[0].nh_tclassid);
9908265abc0SPatrick McHardy #endif
9911da177e4SLinus Torvalds 	}
9921da177e4SLinus Torvalds #ifdef CONFIG_IP_ROUTE_MULTIPATH
9931da177e4SLinus Torvalds 	if (fi->fib_nhs > 1) {
994be403ea1SThomas Graf 		struct rtnexthop *rtnh;
995be403ea1SThomas Graf 		struct nlattr *mp;
996be403ea1SThomas Graf 
997be403ea1SThomas Graf 		mp = nla_nest_start(skb, RTA_MULTIPATH);
998be403ea1SThomas Graf 		if (mp == NULL)
999be403ea1SThomas Graf 			goto nla_put_failure;
10001da177e4SLinus Torvalds 
10011da177e4SLinus Torvalds 		for_nexthops(fi) {
1002be403ea1SThomas Graf 			rtnh = nla_reserve_nohdr(skb, sizeof(*rtnh));
1003be403ea1SThomas Graf 			if (rtnh == NULL)
1004be403ea1SThomas Graf 				goto nla_put_failure;
1005be403ea1SThomas Graf 
1006be403ea1SThomas Graf 			rtnh->rtnh_flags = nh->nh_flags & 0xFF;
1007be403ea1SThomas Graf 			rtnh->rtnh_hops = nh->nh_weight - 1;
1008be403ea1SThomas Graf 			rtnh->rtnh_ifindex = nh->nh_oif;
1009be403ea1SThomas Graf 
10101da177e4SLinus Torvalds 			if (nh->nh_gw)
101117fb2c64SAl Viro 				NLA_PUT_BE32(skb, RTA_GATEWAY, nh->nh_gw);
10128265abc0SPatrick McHardy #ifdef CONFIG_NET_CLS_ROUTE
10138265abc0SPatrick McHardy 			if (nh->nh_tclassid)
1014be403ea1SThomas Graf 				NLA_PUT_U32(skb, RTA_FLOW, nh->nh_tclassid);
10158265abc0SPatrick McHardy #endif
1016be403ea1SThomas Graf 			/* length of rtnetlink header + attributes */
1017be403ea1SThomas Graf 			rtnh->rtnh_len = nlmsg_get_pos(skb) - (void *) rtnh;
10181da177e4SLinus Torvalds 		} endfor_nexthops(fi);
1019be403ea1SThomas Graf 
1020be403ea1SThomas Graf 		nla_nest_end(skb, mp);
10211da177e4SLinus Torvalds 	}
10221da177e4SLinus Torvalds #endif
1023be403ea1SThomas Graf 	return nlmsg_end(skb, nlh);
10241da177e4SLinus Torvalds 
1025be403ea1SThomas Graf nla_put_failure:
102626932566SPatrick McHardy 	nlmsg_cancel(skb, nlh);
102726932566SPatrick McHardy 	return -EMSGSIZE;
10281da177e4SLinus Torvalds }
10291da177e4SLinus Torvalds 
10301da177e4SLinus Torvalds /*
10311da177e4SLinus Torvalds    Update FIB if:
10321da177e4SLinus Torvalds    - local address disappeared -> we must delete all the entries
10331da177e4SLinus Torvalds      referring to it.
10341da177e4SLinus Torvalds    - device went down -> we must shutdown all nexthops going via it.
10351da177e4SLinus Torvalds  */
1036*4814bdbdSDenis V. Lunev int fib_sync_down_addr(struct net *net, __be32 local)
10371da177e4SLinus Torvalds {
10381da177e4SLinus Torvalds 	int ret = 0;
10391da177e4SLinus Torvalds 	unsigned int hash = fib_laddr_hashfn(local);
10401da177e4SLinus Torvalds 	struct hlist_head *head = &fib_info_laddrhash[hash];
10411da177e4SLinus Torvalds 	struct hlist_node *node;
10421da177e4SLinus Torvalds 	struct fib_info *fi;
10431da177e4SLinus Torvalds 
104485326fa5SDenis V. Lunev 	if (fib_info_laddrhash == NULL || local == 0)
104585326fa5SDenis V. Lunev 		return 0;
104685326fa5SDenis V. Lunev 
10471da177e4SLinus Torvalds 	hlist_for_each_entry(fi, node, head, fib_lhash) {
1048*4814bdbdSDenis V. Lunev 		if (fi->fib_net != net)
1049*4814bdbdSDenis V. Lunev 			continue;
10501da177e4SLinus Torvalds 		if (fi->fib_prefsrc == local) {
10511da177e4SLinus Torvalds 			fi->fib_flags |= RTNH_F_DEAD;
10521da177e4SLinus Torvalds 			ret++;
10531da177e4SLinus Torvalds 		}
10541da177e4SLinus Torvalds 	}
105585326fa5SDenis V. Lunev 	return ret;
10561da177e4SLinus Torvalds }
10571da177e4SLinus Torvalds 
105885326fa5SDenis V. Lunev int fib_sync_down_dev(struct net_device *dev, int force)
105985326fa5SDenis V. Lunev {
106085326fa5SDenis V. Lunev 	int ret = 0;
106185326fa5SDenis V. Lunev 	int scope = RT_SCOPE_NOWHERE;
10621da177e4SLinus Torvalds 	struct fib_info *prev_fi = NULL;
10631da177e4SLinus Torvalds 	unsigned int hash = fib_devindex_hashfn(dev->ifindex);
10641da177e4SLinus Torvalds 	struct hlist_head *head = &fib_info_devhash[hash];
10651da177e4SLinus Torvalds 	struct hlist_node *node;
10661da177e4SLinus Torvalds 	struct fib_nh *nh;
10671da177e4SLinus Torvalds 
106885326fa5SDenis V. Lunev 	if (force)
106985326fa5SDenis V. Lunev 		scope = -1;
107085326fa5SDenis V. Lunev 
10711da177e4SLinus Torvalds 	hlist_for_each_entry(nh, node, head, nh_hash) {
10721da177e4SLinus Torvalds 		struct fib_info *fi = nh->nh_parent;
10731da177e4SLinus Torvalds 		int dead;
10741da177e4SLinus Torvalds 
10751da177e4SLinus Torvalds 		BUG_ON(!fi->fib_nhs);
10761da177e4SLinus Torvalds 		if (nh->nh_dev != dev || fi == prev_fi)
10771da177e4SLinus Torvalds 			continue;
10781da177e4SLinus Torvalds 		prev_fi = fi;
10791da177e4SLinus Torvalds 		dead = 0;
10801da177e4SLinus Torvalds 		change_nexthops(fi) {
10811da177e4SLinus Torvalds 			if (nh->nh_flags&RTNH_F_DEAD)
10821da177e4SLinus Torvalds 				dead++;
10831da177e4SLinus Torvalds 			else if (nh->nh_dev == dev &&
10841da177e4SLinus Torvalds 					nh->nh_scope != scope) {
10851da177e4SLinus Torvalds 				nh->nh_flags |= RTNH_F_DEAD;
10861da177e4SLinus Torvalds #ifdef CONFIG_IP_ROUTE_MULTIPATH
10871da177e4SLinus Torvalds 				spin_lock_bh(&fib_multipath_lock);
10881da177e4SLinus Torvalds 				fi->fib_power -= nh->nh_power;
10891da177e4SLinus Torvalds 				nh->nh_power = 0;
10901da177e4SLinus Torvalds 				spin_unlock_bh(&fib_multipath_lock);
10911da177e4SLinus Torvalds #endif
10921da177e4SLinus Torvalds 				dead++;
10931da177e4SLinus Torvalds 			}
10941da177e4SLinus Torvalds #ifdef CONFIG_IP_ROUTE_MULTIPATH
10951da177e4SLinus Torvalds 			if (force > 1 && nh->nh_dev == dev) {
10961da177e4SLinus Torvalds 				dead = fi->fib_nhs;
10971da177e4SLinus Torvalds 				break;
10981da177e4SLinus Torvalds 			}
10991da177e4SLinus Torvalds #endif
11001da177e4SLinus Torvalds 		} endfor_nexthops(fi)
11011da177e4SLinus Torvalds 		if (dead == fi->fib_nhs) {
11021da177e4SLinus Torvalds 			fi->fib_flags |= RTNH_F_DEAD;
11031da177e4SLinus Torvalds 			ret++;
11041da177e4SLinus Torvalds 		}
11051da177e4SLinus Torvalds 	}
11061da177e4SLinus Torvalds 
11071da177e4SLinus Torvalds 	return ret;
11081da177e4SLinus Torvalds }
11091da177e4SLinus Torvalds 
11101da177e4SLinus Torvalds #ifdef CONFIG_IP_ROUTE_MULTIPATH
11111da177e4SLinus Torvalds 
11121da177e4SLinus Torvalds /*
11131da177e4SLinus Torvalds    Dead device goes up. We wake up dead nexthops.
11141da177e4SLinus Torvalds    It takes sense only on multipath routes.
11151da177e4SLinus Torvalds  */
11161da177e4SLinus Torvalds 
11171da177e4SLinus Torvalds int fib_sync_up(struct net_device *dev)
11181da177e4SLinus Torvalds {
11191da177e4SLinus Torvalds 	struct fib_info *prev_fi;
11201da177e4SLinus Torvalds 	unsigned int hash;
11211da177e4SLinus Torvalds 	struct hlist_head *head;
11221da177e4SLinus Torvalds 	struct hlist_node *node;
11231da177e4SLinus Torvalds 	struct fib_nh *nh;
11241da177e4SLinus Torvalds 	int ret;
11251da177e4SLinus Torvalds 
11261da177e4SLinus Torvalds 	if (!(dev->flags&IFF_UP))
11271da177e4SLinus Torvalds 		return 0;
11281da177e4SLinus Torvalds 
11291da177e4SLinus Torvalds 	prev_fi = NULL;
11301da177e4SLinus Torvalds 	hash = fib_devindex_hashfn(dev->ifindex);
11311da177e4SLinus Torvalds 	head = &fib_info_devhash[hash];
11321da177e4SLinus Torvalds 	ret = 0;
11331da177e4SLinus Torvalds 
11341da177e4SLinus Torvalds 	hlist_for_each_entry(nh, node, head, nh_hash) {
11351da177e4SLinus Torvalds 		struct fib_info *fi = nh->nh_parent;
11361da177e4SLinus Torvalds 		int alive;
11371da177e4SLinus Torvalds 
11381da177e4SLinus Torvalds 		BUG_ON(!fi->fib_nhs);
11391da177e4SLinus Torvalds 		if (nh->nh_dev != dev || fi == prev_fi)
11401da177e4SLinus Torvalds 			continue;
11411da177e4SLinus Torvalds 
11421da177e4SLinus Torvalds 		prev_fi = fi;
11431da177e4SLinus Torvalds 		alive = 0;
11441da177e4SLinus Torvalds 		change_nexthops(fi) {
11451da177e4SLinus Torvalds 			if (!(nh->nh_flags&RTNH_F_DEAD)) {
11461da177e4SLinus Torvalds 				alive++;
11471da177e4SLinus Torvalds 				continue;
11481da177e4SLinus Torvalds 			}
11491da177e4SLinus Torvalds 			if (nh->nh_dev == NULL || !(nh->nh_dev->flags&IFF_UP))
11501da177e4SLinus Torvalds 				continue;
1151e5ed6399SHerbert Xu 			if (nh->nh_dev != dev || !__in_dev_get_rtnl(dev))
11521da177e4SLinus Torvalds 				continue;
11531da177e4SLinus Torvalds 			alive++;
11541da177e4SLinus Torvalds 			spin_lock_bh(&fib_multipath_lock);
11551da177e4SLinus Torvalds 			nh->nh_power = 0;
11561da177e4SLinus Torvalds 			nh->nh_flags &= ~RTNH_F_DEAD;
11571da177e4SLinus Torvalds 			spin_unlock_bh(&fib_multipath_lock);
11581da177e4SLinus Torvalds 		} endfor_nexthops(fi)
11591da177e4SLinus Torvalds 
11601da177e4SLinus Torvalds 		if (alive > 0) {
11611da177e4SLinus Torvalds 			fi->fib_flags &= ~RTNH_F_DEAD;
11621da177e4SLinus Torvalds 			ret++;
11631da177e4SLinus Torvalds 		}
11641da177e4SLinus Torvalds 	}
11651da177e4SLinus Torvalds 
11661da177e4SLinus Torvalds 	return ret;
11671da177e4SLinus Torvalds }
11681da177e4SLinus Torvalds 
11691da177e4SLinus Torvalds /*
11701da177e4SLinus Torvalds    The algorithm is suboptimal, but it provides really
11711da177e4SLinus Torvalds    fair weighted route distribution.
11721da177e4SLinus Torvalds  */
11731da177e4SLinus Torvalds 
11741da177e4SLinus Torvalds void fib_select_multipath(const struct flowi *flp, struct fib_result *res)
11751da177e4SLinus Torvalds {
11761da177e4SLinus Torvalds 	struct fib_info *fi = res->fi;
11771da177e4SLinus Torvalds 	int w;
11781da177e4SLinus Torvalds 
11791da177e4SLinus Torvalds 	spin_lock_bh(&fib_multipath_lock);
11801da177e4SLinus Torvalds 	if (fi->fib_power <= 0) {
11811da177e4SLinus Torvalds 		int power = 0;
11821da177e4SLinus Torvalds 		change_nexthops(fi) {
11831da177e4SLinus Torvalds 			if (!(nh->nh_flags&RTNH_F_DEAD)) {
11841da177e4SLinus Torvalds 				power += nh->nh_weight;
11851da177e4SLinus Torvalds 				nh->nh_power = nh->nh_weight;
11861da177e4SLinus Torvalds 			}
11871da177e4SLinus Torvalds 		} endfor_nexthops(fi);
11881da177e4SLinus Torvalds 		fi->fib_power = power;
11891da177e4SLinus Torvalds 		if (power <= 0) {
11901da177e4SLinus Torvalds 			spin_unlock_bh(&fib_multipath_lock);
11911da177e4SLinus Torvalds 			/* Race condition: route has just become dead. */
11921da177e4SLinus Torvalds 			res->nh_sel = 0;
11931da177e4SLinus Torvalds 			return;
11941da177e4SLinus Torvalds 		}
11951da177e4SLinus Torvalds 	}
11961da177e4SLinus Torvalds 
11971da177e4SLinus Torvalds 
11981da177e4SLinus Torvalds 	/* w should be random number [0..fi->fib_power-1],
11991da177e4SLinus Torvalds 	   it is pretty bad approximation.
12001da177e4SLinus Torvalds 	 */
12011da177e4SLinus Torvalds 
12021da177e4SLinus Torvalds 	w = jiffies % fi->fib_power;
12031da177e4SLinus Torvalds 
12041da177e4SLinus Torvalds 	change_nexthops(fi) {
12051da177e4SLinus Torvalds 		if (!(nh->nh_flags&RTNH_F_DEAD) && nh->nh_power) {
12061da177e4SLinus Torvalds 			if ((w -= nh->nh_power) <= 0) {
12071da177e4SLinus Torvalds 				nh->nh_power--;
12081da177e4SLinus Torvalds 				fi->fib_power--;
12091da177e4SLinus Torvalds 				res->nh_sel = nhsel;
12101da177e4SLinus Torvalds 				spin_unlock_bh(&fib_multipath_lock);
12111da177e4SLinus Torvalds 				return;
12121da177e4SLinus Torvalds 			}
12131da177e4SLinus Torvalds 		}
12141da177e4SLinus Torvalds 	} endfor_nexthops(fi);
12151da177e4SLinus Torvalds 
12161da177e4SLinus Torvalds 	/* Race condition: route has just become dead. */
12171da177e4SLinus Torvalds 	res->nh_sel = 0;
12181da177e4SLinus Torvalds 	spin_unlock_bh(&fib_multipath_lock);
12191da177e4SLinus Torvalds }
12201da177e4SLinus Torvalds #endif
1221