xref: /linux/net/ipv4/fib_semantics.c (revision e5b4376074e02b783e56a8f7c42d544e18112c4e)
11da177e4SLinus Torvalds /*
21da177e4SLinus Torvalds  * INET		An implementation of the TCP/IP protocol suite for the LINUX
31da177e4SLinus Torvalds  *		operating system.  INET is implemented using the  BSD Socket
41da177e4SLinus Torvalds  *		interface as the means of communication with the user level.
51da177e4SLinus Torvalds  *
61da177e4SLinus Torvalds  *		IPv4 Forwarding Information Base: semantics.
71da177e4SLinus Torvalds  *
81da177e4SLinus Torvalds  * Version:	$Id: fib_semantics.c,v 1.19 2002/01/12 07:54:56 davem Exp $
91da177e4SLinus Torvalds  *
101da177e4SLinus Torvalds  * Authors:	Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
111da177e4SLinus Torvalds  *
121da177e4SLinus Torvalds  *		This program is free software; you can redistribute it and/or
131da177e4SLinus Torvalds  *		modify it under the terms of the GNU General Public License
141da177e4SLinus Torvalds  *		as published by the Free Software Foundation; either version
151da177e4SLinus Torvalds  *		2 of the License, or (at your option) any later version.
161da177e4SLinus Torvalds  */
171da177e4SLinus Torvalds 
181da177e4SLinus Torvalds #include <linux/config.h>
191da177e4SLinus Torvalds #include <asm/uaccess.h>
201da177e4SLinus Torvalds #include <asm/system.h>
211da177e4SLinus Torvalds #include <linux/bitops.h>
221da177e4SLinus Torvalds #include <linux/types.h>
231da177e4SLinus Torvalds #include <linux/kernel.h>
241da177e4SLinus Torvalds #include <linux/jiffies.h>
251da177e4SLinus Torvalds #include <linux/mm.h>
261da177e4SLinus Torvalds #include <linux/string.h>
271da177e4SLinus Torvalds #include <linux/socket.h>
281da177e4SLinus Torvalds #include <linux/sockios.h>
291da177e4SLinus Torvalds #include <linux/errno.h>
301da177e4SLinus Torvalds #include <linux/in.h>
311da177e4SLinus Torvalds #include <linux/inet.h>
321da177e4SLinus Torvalds #include <linux/netdevice.h>
331da177e4SLinus Torvalds #include <linux/if_arp.h>
341da177e4SLinus Torvalds #include <linux/proc_fs.h>
351da177e4SLinus Torvalds #include <linux/skbuff.h>
361da177e4SLinus Torvalds #include <linux/netlink.h>
371da177e4SLinus Torvalds #include <linux/init.h>
381da177e4SLinus Torvalds 
391da177e4SLinus Torvalds #include <net/ip.h>
401da177e4SLinus Torvalds #include <net/protocol.h>
411da177e4SLinus Torvalds #include <net/route.h>
421da177e4SLinus Torvalds #include <net/tcp.h>
431da177e4SLinus Torvalds #include <net/sock.h>
441da177e4SLinus Torvalds #include <net/ip_fib.h>
451da177e4SLinus Torvalds #include <net/ip_mp_alg.h>
461da177e4SLinus Torvalds 
471da177e4SLinus Torvalds #include "fib_lookup.h"
481da177e4SLinus Torvalds 
491da177e4SLinus Torvalds #define FSprintk(a...)
501da177e4SLinus Torvalds 
511da177e4SLinus Torvalds static DEFINE_RWLOCK(fib_info_lock);
521da177e4SLinus Torvalds static struct hlist_head *fib_info_hash;
531da177e4SLinus Torvalds static struct hlist_head *fib_info_laddrhash;
541da177e4SLinus Torvalds static unsigned int fib_hash_size;
551da177e4SLinus Torvalds static unsigned int fib_info_cnt;
561da177e4SLinus Torvalds 
571da177e4SLinus Torvalds #define DEVINDEX_HASHBITS 8
581da177e4SLinus Torvalds #define DEVINDEX_HASHSIZE (1U << DEVINDEX_HASHBITS)
591da177e4SLinus Torvalds static struct hlist_head fib_info_devhash[DEVINDEX_HASHSIZE];
601da177e4SLinus Torvalds 
611da177e4SLinus Torvalds #ifdef CONFIG_IP_ROUTE_MULTIPATH
621da177e4SLinus Torvalds 
631da177e4SLinus Torvalds static DEFINE_SPINLOCK(fib_multipath_lock);
641da177e4SLinus Torvalds 
651da177e4SLinus Torvalds #define for_nexthops(fi) { int nhsel; const struct fib_nh * nh; \
661da177e4SLinus Torvalds for (nhsel=0, nh = (fi)->fib_nh; nhsel < (fi)->fib_nhs; nh++, nhsel++)
671da177e4SLinus Torvalds 
681da177e4SLinus Torvalds #define change_nexthops(fi) { int nhsel; struct fib_nh * nh; \
691da177e4SLinus Torvalds for (nhsel=0, nh = (struct fib_nh*)((fi)->fib_nh); nhsel < (fi)->fib_nhs; nh++, nhsel++)
701da177e4SLinus Torvalds 
711da177e4SLinus Torvalds #else /* CONFIG_IP_ROUTE_MULTIPATH */
721da177e4SLinus Torvalds 
731da177e4SLinus Torvalds /* Hope, that gcc will optimize it to get rid of dummy loop */
741da177e4SLinus Torvalds 
751da177e4SLinus Torvalds #define for_nexthops(fi) { int nhsel=0; const struct fib_nh * nh = (fi)->fib_nh; \
761da177e4SLinus Torvalds for (nhsel=0; nhsel < 1; nhsel++)
771da177e4SLinus Torvalds 
781da177e4SLinus Torvalds #define change_nexthops(fi) { int nhsel=0; struct fib_nh * nh = (struct fib_nh*)((fi)->fib_nh); \
791da177e4SLinus Torvalds for (nhsel=0; nhsel < 1; nhsel++)
801da177e4SLinus Torvalds 
811da177e4SLinus Torvalds #endif /* CONFIG_IP_ROUTE_MULTIPATH */
821da177e4SLinus Torvalds 
831da177e4SLinus Torvalds #define endfor_nexthops(fi) }
841da177e4SLinus Torvalds 
851da177e4SLinus Torvalds 
861da177e4SLinus Torvalds static struct
871da177e4SLinus Torvalds {
881da177e4SLinus Torvalds 	int	error;
891da177e4SLinus Torvalds 	u8	scope;
901da177e4SLinus Torvalds } fib_props[RTA_MAX + 1] = {
911da177e4SLinus Torvalds         {
921da177e4SLinus Torvalds 		.error	= 0,
931da177e4SLinus Torvalds 		.scope	= RT_SCOPE_NOWHERE,
941da177e4SLinus Torvalds 	},	/* RTN_UNSPEC */
951da177e4SLinus Torvalds 	{
961da177e4SLinus Torvalds 		.error	= 0,
971da177e4SLinus Torvalds 		.scope	= RT_SCOPE_UNIVERSE,
981da177e4SLinus Torvalds 	},	/* RTN_UNICAST */
991da177e4SLinus Torvalds 	{
1001da177e4SLinus Torvalds 		.error	= 0,
1011da177e4SLinus Torvalds 		.scope	= RT_SCOPE_HOST,
1021da177e4SLinus Torvalds 	},	/* RTN_LOCAL */
1031da177e4SLinus Torvalds 	{
1041da177e4SLinus Torvalds 		.error	= 0,
1051da177e4SLinus Torvalds 		.scope	= RT_SCOPE_LINK,
1061da177e4SLinus Torvalds 	},	/* RTN_BROADCAST */
1071da177e4SLinus Torvalds 	{
1081da177e4SLinus Torvalds 		.error	= 0,
1091da177e4SLinus Torvalds 		.scope	= RT_SCOPE_LINK,
1101da177e4SLinus Torvalds 	},	/* RTN_ANYCAST */
1111da177e4SLinus Torvalds 	{
1121da177e4SLinus Torvalds 		.error	= 0,
1131da177e4SLinus Torvalds 		.scope	= RT_SCOPE_UNIVERSE,
1141da177e4SLinus Torvalds 	},	/* RTN_MULTICAST */
1151da177e4SLinus Torvalds 	{
1161da177e4SLinus Torvalds 		.error	= -EINVAL,
1171da177e4SLinus Torvalds 		.scope	= RT_SCOPE_UNIVERSE,
1181da177e4SLinus Torvalds 	},	/* RTN_BLACKHOLE */
1191da177e4SLinus Torvalds 	{
1201da177e4SLinus Torvalds 		.error	= -EHOSTUNREACH,
1211da177e4SLinus Torvalds 		.scope	= RT_SCOPE_UNIVERSE,
1221da177e4SLinus Torvalds 	},	/* RTN_UNREACHABLE */
1231da177e4SLinus Torvalds 	{
1241da177e4SLinus Torvalds 		.error	= -EACCES,
1251da177e4SLinus Torvalds 		.scope	= RT_SCOPE_UNIVERSE,
1261da177e4SLinus Torvalds 	},	/* RTN_PROHIBIT */
1271da177e4SLinus Torvalds 	{
1281da177e4SLinus Torvalds 		.error	= -EAGAIN,
1291da177e4SLinus Torvalds 		.scope	= RT_SCOPE_UNIVERSE,
1301da177e4SLinus Torvalds 	},	/* RTN_THROW */
1311da177e4SLinus Torvalds 	{
1321da177e4SLinus Torvalds 		.error	= -EINVAL,
1331da177e4SLinus Torvalds 		.scope	= RT_SCOPE_NOWHERE,
1341da177e4SLinus Torvalds 	},	/* RTN_NAT */
1351da177e4SLinus Torvalds 	{
1361da177e4SLinus Torvalds 		.error	= -EINVAL,
1371da177e4SLinus Torvalds 		.scope	= RT_SCOPE_NOWHERE,
1381da177e4SLinus Torvalds 	},	/* RTN_XRESOLVE */
1391da177e4SLinus Torvalds };
1401da177e4SLinus Torvalds 
1411da177e4SLinus Torvalds 
1421da177e4SLinus Torvalds /* Release a nexthop info record */
1431da177e4SLinus Torvalds 
1441da177e4SLinus Torvalds void free_fib_info(struct fib_info *fi)
1451da177e4SLinus Torvalds {
1461da177e4SLinus Torvalds 	if (fi->fib_dead == 0) {
1471da177e4SLinus Torvalds 		printk("Freeing alive fib_info %p\n", fi);
1481da177e4SLinus Torvalds 		return;
1491da177e4SLinus Torvalds 	}
1501da177e4SLinus Torvalds 	change_nexthops(fi) {
1511da177e4SLinus Torvalds 		if (nh->nh_dev)
1521da177e4SLinus Torvalds 			dev_put(nh->nh_dev);
1531da177e4SLinus Torvalds 		nh->nh_dev = NULL;
1541da177e4SLinus Torvalds 	} endfor_nexthops(fi);
1551da177e4SLinus Torvalds 	fib_info_cnt--;
1561da177e4SLinus Torvalds 	kfree(fi);
1571da177e4SLinus Torvalds }
1581da177e4SLinus Torvalds 
1591da177e4SLinus Torvalds void fib_release_info(struct fib_info *fi)
1601da177e4SLinus Torvalds {
1611da177e4SLinus Torvalds 	write_lock(&fib_info_lock);
1621da177e4SLinus Torvalds 	if (fi && --fi->fib_treeref == 0) {
1631da177e4SLinus Torvalds 		hlist_del(&fi->fib_hash);
1641da177e4SLinus Torvalds 		if (fi->fib_prefsrc)
1651da177e4SLinus Torvalds 			hlist_del(&fi->fib_lhash);
1661da177e4SLinus Torvalds 		change_nexthops(fi) {
1671da177e4SLinus Torvalds 			if (!nh->nh_dev)
1681da177e4SLinus Torvalds 				continue;
1691da177e4SLinus Torvalds 			hlist_del(&nh->nh_hash);
1701da177e4SLinus Torvalds 		} endfor_nexthops(fi)
1711da177e4SLinus Torvalds 		fi->fib_dead = 1;
1721da177e4SLinus Torvalds 		fib_info_put(fi);
1731da177e4SLinus Torvalds 	}
1741da177e4SLinus Torvalds 	write_unlock(&fib_info_lock);
1751da177e4SLinus Torvalds }
1761da177e4SLinus Torvalds 
1771da177e4SLinus Torvalds static __inline__ int nh_comp(const struct fib_info *fi, const struct fib_info *ofi)
1781da177e4SLinus Torvalds {
1791da177e4SLinus Torvalds 	const struct fib_nh *onh = ofi->fib_nh;
1801da177e4SLinus Torvalds 
1811da177e4SLinus Torvalds 	for_nexthops(fi) {
1821da177e4SLinus Torvalds 		if (nh->nh_oif != onh->nh_oif ||
1831da177e4SLinus Torvalds 		    nh->nh_gw  != onh->nh_gw ||
1841da177e4SLinus Torvalds 		    nh->nh_scope != onh->nh_scope ||
1851da177e4SLinus Torvalds #ifdef CONFIG_IP_ROUTE_MULTIPATH
1861da177e4SLinus Torvalds 		    nh->nh_weight != onh->nh_weight ||
1871da177e4SLinus Torvalds #endif
1881da177e4SLinus Torvalds #ifdef CONFIG_NET_CLS_ROUTE
1891da177e4SLinus Torvalds 		    nh->nh_tclassid != onh->nh_tclassid ||
1901da177e4SLinus Torvalds #endif
1911da177e4SLinus Torvalds 		    ((nh->nh_flags^onh->nh_flags)&~RTNH_F_DEAD))
1921da177e4SLinus Torvalds 			return -1;
1931da177e4SLinus Torvalds 		onh++;
1941da177e4SLinus Torvalds 	} endfor_nexthops(fi);
1951da177e4SLinus Torvalds 	return 0;
1961da177e4SLinus Torvalds }
1971da177e4SLinus Torvalds 
1981da177e4SLinus Torvalds static inline unsigned int fib_info_hashfn(const struct fib_info *fi)
1991da177e4SLinus Torvalds {
2001da177e4SLinus Torvalds 	unsigned int mask = (fib_hash_size - 1);
2011da177e4SLinus Torvalds 	unsigned int val = fi->fib_nhs;
2021da177e4SLinus Torvalds 
2031da177e4SLinus Torvalds 	val ^= fi->fib_protocol;
2041da177e4SLinus Torvalds 	val ^= fi->fib_prefsrc;
2051da177e4SLinus Torvalds 	val ^= fi->fib_priority;
2061da177e4SLinus Torvalds 
2071da177e4SLinus Torvalds 	return (val ^ (val >> 7) ^ (val >> 12)) & mask;
2081da177e4SLinus Torvalds }
2091da177e4SLinus Torvalds 
2101da177e4SLinus Torvalds static struct fib_info *fib_find_info(const struct fib_info *nfi)
2111da177e4SLinus Torvalds {
2121da177e4SLinus Torvalds 	struct hlist_head *head;
2131da177e4SLinus Torvalds 	struct hlist_node *node;
2141da177e4SLinus Torvalds 	struct fib_info *fi;
2151da177e4SLinus Torvalds 	unsigned int hash;
2161da177e4SLinus Torvalds 
2171da177e4SLinus Torvalds 	hash = fib_info_hashfn(nfi);
2181da177e4SLinus Torvalds 	head = &fib_info_hash[hash];
2191da177e4SLinus Torvalds 
2201da177e4SLinus Torvalds 	hlist_for_each_entry(fi, node, head, fib_hash) {
2211da177e4SLinus Torvalds 		if (fi->fib_nhs != nfi->fib_nhs)
2221da177e4SLinus Torvalds 			continue;
2231da177e4SLinus Torvalds 		if (nfi->fib_protocol == fi->fib_protocol &&
2241da177e4SLinus Torvalds 		    nfi->fib_prefsrc == fi->fib_prefsrc &&
2251da177e4SLinus Torvalds 		    nfi->fib_priority == fi->fib_priority &&
2261da177e4SLinus Torvalds 		    memcmp(nfi->fib_metrics, fi->fib_metrics,
2271da177e4SLinus Torvalds 			   sizeof(fi->fib_metrics)) == 0 &&
2281da177e4SLinus Torvalds 		    ((nfi->fib_flags^fi->fib_flags)&~RTNH_F_DEAD) == 0 &&
2291da177e4SLinus Torvalds 		    (nfi->fib_nhs == 0 || nh_comp(fi, nfi) == 0))
2301da177e4SLinus Torvalds 			return fi;
2311da177e4SLinus Torvalds 	}
2321da177e4SLinus Torvalds 
2331da177e4SLinus Torvalds 	return NULL;
2341da177e4SLinus Torvalds }
2351da177e4SLinus Torvalds 
2361da177e4SLinus Torvalds static inline unsigned int fib_devindex_hashfn(unsigned int val)
2371da177e4SLinus Torvalds {
2381da177e4SLinus Torvalds 	unsigned int mask = DEVINDEX_HASHSIZE - 1;
2391da177e4SLinus Torvalds 
2401da177e4SLinus Torvalds 	return (val ^
2411da177e4SLinus Torvalds 		(val >> DEVINDEX_HASHBITS) ^
2421da177e4SLinus Torvalds 		(val >> (DEVINDEX_HASHBITS * 2))) & mask;
2431da177e4SLinus Torvalds }
2441da177e4SLinus Torvalds 
2451da177e4SLinus Torvalds /* Check, that the gateway is already configured.
2461da177e4SLinus Torvalds    Used only by redirect accept routine.
2471da177e4SLinus Torvalds  */
2481da177e4SLinus Torvalds 
2491da177e4SLinus Torvalds int ip_fib_check_default(u32 gw, struct net_device *dev)
2501da177e4SLinus Torvalds {
2511da177e4SLinus Torvalds 	struct hlist_head *head;
2521da177e4SLinus Torvalds 	struct hlist_node *node;
2531da177e4SLinus Torvalds 	struct fib_nh *nh;
2541da177e4SLinus Torvalds 	unsigned int hash;
2551da177e4SLinus Torvalds 
2561da177e4SLinus Torvalds 	read_lock(&fib_info_lock);
2571da177e4SLinus Torvalds 
2581da177e4SLinus Torvalds 	hash = fib_devindex_hashfn(dev->ifindex);
2591da177e4SLinus Torvalds 	head = &fib_info_devhash[hash];
2601da177e4SLinus Torvalds 	hlist_for_each_entry(nh, node, head, nh_hash) {
2611da177e4SLinus Torvalds 		if (nh->nh_dev == dev &&
2621da177e4SLinus Torvalds 		    nh->nh_gw == gw &&
2631da177e4SLinus Torvalds 		    !(nh->nh_flags&RTNH_F_DEAD)) {
2641da177e4SLinus Torvalds 			read_unlock(&fib_info_lock);
2651da177e4SLinus Torvalds 			return 0;
2661da177e4SLinus Torvalds 		}
2671da177e4SLinus Torvalds 	}
2681da177e4SLinus Torvalds 
2691da177e4SLinus Torvalds 	read_unlock(&fib_info_lock);
2701da177e4SLinus Torvalds 
2711da177e4SLinus Torvalds 	return -1;
2721da177e4SLinus Torvalds }
2731da177e4SLinus Torvalds 
2741da177e4SLinus Torvalds void rtmsg_fib(int event, u32 key, struct fib_alias *fa,
2751da177e4SLinus Torvalds 	       int z, int tb_id,
2761da177e4SLinus Torvalds 	       struct nlmsghdr *n, struct netlink_skb_parms *req)
2771da177e4SLinus Torvalds {
2781da177e4SLinus Torvalds 	struct sk_buff *skb;
2799ed19f33SJamal Hadi Salim 	u32 pid = req ? req->pid : n->nlmsg_pid;
2801da177e4SLinus Torvalds 	int size = NLMSG_SPACE(sizeof(struct rtmsg)+256);
2811da177e4SLinus Torvalds 
2821da177e4SLinus Torvalds 	skb = alloc_skb(size, GFP_KERNEL);
2831da177e4SLinus Torvalds 	if (!skb)
2841da177e4SLinus Torvalds 		return;
2851da177e4SLinus Torvalds 
2861da177e4SLinus Torvalds 	if (fib_dump_info(skb, pid, n->nlmsg_seq, event, tb_id,
2871da177e4SLinus Torvalds 			  fa->fa_type, fa->fa_scope, &key, z,
2881da177e4SLinus Torvalds 			  fa->fa_tos,
289b6544c0bSJamal Hadi Salim 			  fa->fa_info, 0) < 0) {
2901da177e4SLinus Torvalds 		kfree_skb(skb);
2911da177e4SLinus Torvalds 		return;
2921da177e4SLinus Torvalds 	}
293ac6d439dSPatrick McHardy 	NETLINK_CB(skb).dst_group = RTNLGRP_IPV4_ROUTE;
2941da177e4SLinus Torvalds 	if (n->nlmsg_flags&NLM_F_ECHO)
2951da177e4SLinus Torvalds 		atomic_inc(&skb->users);
296ac6d439dSPatrick McHardy 	netlink_broadcast(rtnl, skb, pid, RTNLGRP_IPV4_ROUTE, GFP_KERNEL);
2971da177e4SLinus Torvalds 	if (n->nlmsg_flags&NLM_F_ECHO)
2981da177e4SLinus Torvalds 		netlink_unicast(rtnl, skb, pid, MSG_DONTWAIT);
2991da177e4SLinus Torvalds }
3001da177e4SLinus Torvalds 
3011da177e4SLinus Torvalds /* Return the first fib alias matching TOS with
3021da177e4SLinus Torvalds  * priority less than or equal to PRIO.
3031da177e4SLinus Torvalds  */
3041da177e4SLinus Torvalds struct fib_alias *fib_find_alias(struct list_head *fah, u8 tos, u32 prio)
3051da177e4SLinus Torvalds {
3061da177e4SLinus Torvalds 	if (fah) {
3071da177e4SLinus Torvalds 		struct fib_alias *fa;
3081da177e4SLinus Torvalds 		list_for_each_entry(fa, fah, fa_list) {
3091da177e4SLinus Torvalds 			if (fa->fa_tos > tos)
3101da177e4SLinus Torvalds 				continue;
3111da177e4SLinus Torvalds 			if (fa->fa_info->fib_priority >= prio ||
3121da177e4SLinus Torvalds 			    fa->fa_tos < tos)
3131da177e4SLinus Torvalds 				return fa;
3141da177e4SLinus Torvalds 		}
3151da177e4SLinus Torvalds 	}
3161da177e4SLinus Torvalds 	return NULL;
3171da177e4SLinus Torvalds }
3181da177e4SLinus Torvalds 
3191da177e4SLinus Torvalds int fib_detect_death(struct fib_info *fi, int order,
3201da177e4SLinus Torvalds 		     struct fib_info **last_resort, int *last_idx, int *dflt)
3211da177e4SLinus Torvalds {
3221da177e4SLinus Torvalds 	struct neighbour *n;
3231da177e4SLinus Torvalds 	int state = NUD_NONE;
3241da177e4SLinus Torvalds 
3251da177e4SLinus Torvalds 	n = neigh_lookup(&arp_tbl, &fi->fib_nh[0].nh_gw, fi->fib_dev);
3261da177e4SLinus Torvalds 	if (n) {
3271da177e4SLinus Torvalds 		state = n->nud_state;
3281da177e4SLinus Torvalds 		neigh_release(n);
3291da177e4SLinus Torvalds 	}
3301da177e4SLinus Torvalds 	if (state==NUD_REACHABLE)
3311da177e4SLinus Torvalds 		return 0;
3321da177e4SLinus Torvalds 	if ((state&NUD_VALID) && order != *dflt)
3331da177e4SLinus Torvalds 		return 0;
3341da177e4SLinus Torvalds 	if ((state&NUD_VALID) ||
3351da177e4SLinus Torvalds 	    (*last_idx<0 && order > *dflt)) {
3361da177e4SLinus Torvalds 		*last_resort = fi;
3371da177e4SLinus Torvalds 		*last_idx = order;
3381da177e4SLinus Torvalds 	}
3391da177e4SLinus Torvalds 	return 1;
3401da177e4SLinus Torvalds }
3411da177e4SLinus Torvalds 
3421da177e4SLinus Torvalds #ifdef CONFIG_IP_ROUTE_MULTIPATH
3431da177e4SLinus Torvalds 
3441da177e4SLinus Torvalds static u32 fib_get_attr32(struct rtattr *attr, int attrlen, int type)
3451da177e4SLinus Torvalds {
3461da177e4SLinus Torvalds 	while (RTA_OK(attr,attrlen)) {
3471da177e4SLinus Torvalds 		if (attr->rta_type == type)
3481da177e4SLinus Torvalds 			return *(u32*)RTA_DATA(attr);
3491da177e4SLinus Torvalds 		attr = RTA_NEXT(attr, attrlen);
3501da177e4SLinus Torvalds 	}
3511da177e4SLinus Torvalds 	return 0;
3521da177e4SLinus Torvalds }
3531da177e4SLinus Torvalds 
3541da177e4SLinus Torvalds static int
3551da177e4SLinus Torvalds fib_count_nexthops(struct rtattr *rta)
3561da177e4SLinus Torvalds {
3571da177e4SLinus Torvalds 	int nhs = 0;
3581da177e4SLinus Torvalds 	struct rtnexthop *nhp = RTA_DATA(rta);
3591da177e4SLinus Torvalds 	int nhlen = RTA_PAYLOAD(rta);
3601da177e4SLinus Torvalds 
3611da177e4SLinus Torvalds 	while (nhlen >= (int)sizeof(struct rtnexthop)) {
3621da177e4SLinus Torvalds 		if ((nhlen -= nhp->rtnh_len) < 0)
3631da177e4SLinus Torvalds 			return 0;
3641da177e4SLinus Torvalds 		nhs++;
3651da177e4SLinus Torvalds 		nhp = RTNH_NEXT(nhp);
3661da177e4SLinus Torvalds 	};
3671da177e4SLinus Torvalds 	return nhs;
3681da177e4SLinus Torvalds }
3691da177e4SLinus Torvalds 
3701da177e4SLinus Torvalds static int
3711da177e4SLinus Torvalds fib_get_nhs(struct fib_info *fi, const struct rtattr *rta, const struct rtmsg *r)
3721da177e4SLinus Torvalds {
3731da177e4SLinus Torvalds 	struct rtnexthop *nhp = RTA_DATA(rta);
3741da177e4SLinus Torvalds 	int nhlen = RTA_PAYLOAD(rta);
3751da177e4SLinus Torvalds 
3761da177e4SLinus Torvalds 	change_nexthops(fi) {
3771da177e4SLinus Torvalds 		int attrlen = nhlen - sizeof(struct rtnexthop);
3781da177e4SLinus Torvalds 		if (attrlen < 0 || (nhlen -= nhp->rtnh_len) < 0)
3791da177e4SLinus Torvalds 			return -EINVAL;
3801da177e4SLinus Torvalds 		nh->nh_flags = (r->rtm_flags&~0xFF) | nhp->rtnh_flags;
3811da177e4SLinus Torvalds 		nh->nh_oif = nhp->rtnh_ifindex;
3821da177e4SLinus Torvalds 		nh->nh_weight = nhp->rtnh_hops + 1;
3831da177e4SLinus Torvalds 		if (attrlen) {
3841da177e4SLinus Torvalds 			nh->nh_gw = fib_get_attr32(RTNH_DATA(nhp), attrlen, RTA_GATEWAY);
3851da177e4SLinus Torvalds #ifdef CONFIG_NET_CLS_ROUTE
3861da177e4SLinus Torvalds 			nh->nh_tclassid = fib_get_attr32(RTNH_DATA(nhp), attrlen, RTA_FLOW);
3871da177e4SLinus Torvalds #endif
3881da177e4SLinus Torvalds 		}
3891da177e4SLinus Torvalds 		nhp = RTNH_NEXT(nhp);
3901da177e4SLinus Torvalds 	} endfor_nexthops(fi);
3911da177e4SLinus Torvalds 	return 0;
3921da177e4SLinus Torvalds }
3931da177e4SLinus Torvalds 
3941da177e4SLinus Torvalds #endif
3951da177e4SLinus Torvalds 
3961da177e4SLinus Torvalds int fib_nh_match(struct rtmsg *r, struct nlmsghdr *nlh, struct kern_rta *rta,
3971da177e4SLinus Torvalds 		 struct fib_info *fi)
3981da177e4SLinus Torvalds {
3991da177e4SLinus Torvalds #ifdef CONFIG_IP_ROUTE_MULTIPATH
4001da177e4SLinus Torvalds 	struct rtnexthop *nhp;
4011da177e4SLinus Torvalds 	int nhlen;
4021da177e4SLinus Torvalds #endif
4031da177e4SLinus Torvalds 
4041da177e4SLinus Torvalds 	if (rta->rta_priority &&
4051da177e4SLinus Torvalds 	    *rta->rta_priority != fi->fib_priority)
4061da177e4SLinus Torvalds 		return 1;
4071da177e4SLinus Torvalds 
4081da177e4SLinus Torvalds 	if (rta->rta_oif || rta->rta_gw) {
4091da177e4SLinus Torvalds 		if ((!rta->rta_oif || *rta->rta_oif == fi->fib_nh->nh_oif) &&
4101da177e4SLinus Torvalds 		    (!rta->rta_gw  || memcmp(rta->rta_gw, &fi->fib_nh->nh_gw, 4) == 0))
4111da177e4SLinus Torvalds 			return 0;
4121da177e4SLinus Torvalds 		return 1;
4131da177e4SLinus Torvalds 	}
4141da177e4SLinus Torvalds 
4151da177e4SLinus Torvalds #ifdef CONFIG_IP_ROUTE_MULTIPATH
4161da177e4SLinus Torvalds 	if (rta->rta_mp == NULL)
4171da177e4SLinus Torvalds 		return 0;
4181da177e4SLinus Torvalds 	nhp = RTA_DATA(rta->rta_mp);
4191da177e4SLinus Torvalds 	nhlen = RTA_PAYLOAD(rta->rta_mp);
4201da177e4SLinus Torvalds 
4211da177e4SLinus Torvalds 	for_nexthops(fi) {
4221da177e4SLinus Torvalds 		int attrlen = nhlen - sizeof(struct rtnexthop);
4231da177e4SLinus Torvalds 		u32 gw;
4241da177e4SLinus Torvalds 
4251da177e4SLinus Torvalds 		if (attrlen < 0 || (nhlen -= nhp->rtnh_len) < 0)
4261da177e4SLinus Torvalds 			return -EINVAL;
4271da177e4SLinus Torvalds 		if (nhp->rtnh_ifindex && nhp->rtnh_ifindex != nh->nh_oif)
4281da177e4SLinus Torvalds 			return 1;
4291da177e4SLinus Torvalds 		if (attrlen) {
4301da177e4SLinus Torvalds 			gw = fib_get_attr32(RTNH_DATA(nhp), attrlen, RTA_GATEWAY);
4311da177e4SLinus Torvalds 			if (gw && gw != nh->nh_gw)
4321da177e4SLinus Torvalds 				return 1;
4331da177e4SLinus Torvalds #ifdef CONFIG_NET_CLS_ROUTE
4341da177e4SLinus Torvalds 			gw = fib_get_attr32(RTNH_DATA(nhp), attrlen, RTA_FLOW);
4351da177e4SLinus Torvalds 			if (gw && gw != nh->nh_tclassid)
4361da177e4SLinus Torvalds 				return 1;
4371da177e4SLinus Torvalds #endif
4381da177e4SLinus Torvalds 		}
4391da177e4SLinus Torvalds 		nhp = RTNH_NEXT(nhp);
4401da177e4SLinus Torvalds 	} endfor_nexthops(fi);
4411da177e4SLinus Torvalds #endif
4421da177e4SLinus Torvalds 	return 0;
4431da177e4SLinus Torvalds }
4441da177e4SLinus Torvalds 
4451da177e4SLinus Torvalds 
4461da177e4SLinus Torvalds /*
4471da177e4SLinus Torvalds    Picture
4481da177e4SLinus Torvalds    -------
4491da177e4SLinus Torvalds 
4501da177e4SLinus Torvalds    Semantics of nexthop is very messy by historical reasons.
4511da177e4SLinus Torvalds    We have to take into account, that:
4521da177e4SLinus Torvalds    a) gateway can be actually local interface address,
4531da177e4SLinus Torvalds       so that gatewayed route is direct.
4541da177e4SLinus Torvalds    b) gateway must be on-link address, possibly
4551da177e4SLinus Torvalds       described not by an ifaddr, but also by a direct route.
4561da177e4SLinus Torvalds    c) If both gateway and interface are specified, they should not
4571da177e4SLinus Torvalds       contradict.
4581da177e4SLinus Torvalds    d) If we use tunnel routes, gateway could be not on-link.
4591da177e4SLinus Torvalds 
4601da177e4SLinus Torvalds    Attempt to reconcile all of these (alas, self-contradictory) conditions
4611da177e4SLinus Torvalds    results in pretty ugly and hairy code with obscure logic.
4621da177e4SLinus Torvalds 
4631da177e4SLinus Torvalds    I chose to generalized it instead, so that the size
4641da177e4SLinus Torvalds    of code does not increase practically, but it becomes
4651da177e4SLinus Torvalds    much more general.
4661da177e4SLinus Torvalds    Every prefix is assigned a "scope" value: "host" is local address,
4671da177e4SLinus Torvalds    "link" is direct route,
4681da177e4SLinus Torvalds    [ ... "site" ... "interior" ... ]
4691da177e4SLinus Torvalds    and "universe" is true gateway route with global meaning.
4701da177e4SLinus Torvalds 
4711da177e4SLinus Torvalds    Every prefix refers to a set of "nexthop"s (gw, oif),
4721da177e4SLinus Torvalds    where gw must have narrower scope. This recursion stops
4731da177e4SLinus Torvalds    when gw has LOCAL scope or if "nexthop" is declared ONLINK,
4741da177e4SLinus Torvalds    which means that gw is forced to be on link.
4751da177e4SLinus Torvalds 
4761da177e4SLinus Torvalds    Code is still hairy, but now it is apparently logically
4771da177e4SLinus Torvalds    consistent and very flexible. F.e. as by-product it allows
4781da177e4SLinus Torvalds    to co-exists in peace independent exterior and interior
4791da177e4SLinus Torvalds    routing processes.
4801da177e4SLinus Torvalds 
4811da177e4SLinus Torvalds    Normally it looks as following.
4821da177e4SLinus Torvalds 
4831da177e4SLinus Torvalds    {universe prefix}  -> (gw, oif) [scope link]
4841da177e4SLinus Torvalds                           |
4851da177e4SLinus Torvalds 			  |-> {link prefix} -> (gw, oif) [scope local]
4861da177e4SLinus Torvalds 			                        |
4871da177e4SLinus Torvalds 						|-> {local prefix} (terminal node)
4881da177e4SLinus Torvalds  */
4891da177e4SLinus Torvalds 
4901da177e4SLinus Torvalds static int fib_check_nh(const struct rtmsg *r, struct fib_info *fi, struct fib_nh *nh)
4911da177e4SLinus Torvalds {
4921da177e4SLinus Torvalds 	int err;
4931da177e4SLinus Torvalds 
4941da177e4SLinus Torvalds 	if (nh->nh_gw) {
4951da177e4SLinus Torvalds 		struct fib_result res;
4961da177e4SLinus Torvalds 
4971da177e4SLinus Torvalds #ifdef CONFIG_IP_ROUTE_PERVASIVE
4981da177e4SLinus Torvalds 		if (nh->nh_flags&RTNH_F_PERVASIVE)
4991da177e4SLinus Torvalds 			return 0;
5001da177e4SLinus Torvalds #endif
5011da177e4SLinus Torvalds 		if (nh->nh_flags&RTNH_F_ONLINK) {
5021da177e4SLinus Torvalds 			struct net_device *dev;
5031da177e4SLinus Torvalds 
5041da177e4SLinus Torvalds 			if (r->rtm_scope >= RT_SCOPE_LINK)
5051da177e4SLinus Torvalds 				return -EINVAL;
5061da177e4SLinus Torvalds 			if (inet_addr_type(nh->nh_gw) != RTN_UNICAST)
5071da177e4SLinus Torvalds 				return -EINVAL;
5081da177e4SLinus Torvalds 			if ((dev = __dev_get_by_index(nh->nh_oif)) == NULL)
5091da177e4SLinus Torvalds 				return -ENODEV;
5101da177e4SLinus Torvalds 			if (!(dev->flags&IFF_UP))
5111da177e4SLinus Torvalds 				return -ENETDOWN;
5121da177e4SLinus Torvalds 			nh->nh_dev = dev;
5131da177e4SLinus Torvalds 			dev_hold(dev);
5141da177e4SLinus Torvalds 			nh->nh_scope = RT_SCOPE_LINK;
5151da177e4SLinus Torvalds 			return 0;
5161da177e4SLinus Torvalds 		}
5171da177e4SLinus Torvalds 		{
5181da177e4SLinus Torvalds 			struct flowi fl = { .nl_u = { .ip4_u =
5191da177e4SLinus Torvalds 						      { .daddr = nh->nh_gw,
5201da177e4SLinus Torvalds 							.scope = r->rtm_scope + 1 } },
5211da177e4SLinus Torvalds 					    .oif = nh->nh_oif };
5221da177e4SLinus Torvalds 
5231da177e4SLinus Torvalds 			/* It is not necessary, but requires a bit of thinking */
5241da177e4SLinus Torvalds 			if (fl.fl4_scope < RT_SCOPE_LINK)
5251da177e4SLinus Torvalds 				fl.fl4_scope = RT_SCOPE_LINK;
5261da177e4SLinus Torvalds 			if ((err = fib_lookup(&fl, &res)) != 0)
5271da177e4SLinus Torvalds 				return err;
5281da177e4SLinus Torvalds 		}
5291da177e4SLinus Torvalds 		err = -EINVAL;
5301da177e4SLinus Torvalds 		if (res.type != RTN_UNICAST && res.type != RTN_LOCAL)
5311da177e4SLinus Torvalds 			goto out;
5321da177e4SLinus Torvalds 		nh->nh_scope = res.scope;
5331da177e4SLinus Torvalds 		nh->nh_oif = FIB_RES_OIF(res);
5341da177e4SLinus Torvalds 		if ((nh->nh_dev = FIB_RES_DEV(res)) == NULL)
5351da177e4SLinus Torvalds 			goto out;
5361da177e4SLinus Torvalds 		dev_hold(nh->nh_dev);
5371da177e4SLinus Torvalds 		err = -ENETDOWN;
5381da177e4SLinus Torvalds 		if (!(nh->nh_dev->flags & IFF_UP))
5391da177e4SLinus Torvalds 			goto out;
5401da177e4SLinus Torvalds 		err = 0;
5411da177e4SLinus Torvalds out:
5421da177e4SLinus Torvalds 		fib_res_put(&res);
5431da177e4SLinus Torvalds 		return err;
5441da177e4SLinus Torvalds 	} else {
5451da177e4SLinus Torvalds 		struct in_device *in_dev;
5461da177e4SLinus Torvalds 
5471da177e4SLinus Torvalds 		if (nh->nh_flags&(RTNH_F_PERVASIVE|RTNH_F_ONLINK))
5481da177e4SLinus Torvalds 			return -EINVAL;
5491da177e4SLinus Torvalds 
5501da177e4SLinus Torvalds 		in_dev = inetdev_by_index(nh->nh_oif);
5511da177e4SLinus Torvalds 		if (in_dev == NULL)
5521da177e4SLinus Torvalds 			return -ENODEV;
5531da177e4SLinus Torvalds 		if (!(in_dev->dev->flags&IFF_UP)) {
5541da177e4SLinus Torvalds 			in_dev_put(in_dev);
5551da177e4SLinus Torvalds 			return -ENETDOWN;
5561da177e4SLinus Torvalds 		}
5571da177e4SLinus Torvalds 		nh->nh_dev = in_dev->dev;
5581da177e4SLinus Torvalds 		dev_hold(nh->nh_dev);
5591da177e4SLinus Torvalds 		nh->nh_scope = RT_SCOPE_HOST;
5601da177e4SLinus Torvalds 		in_dev_put(in_dev);
5611da177e4SLinus Torvalds 	}
5621da177e4SLinus Torvalds 	return 0;
5631da177e4SLinus Torvalds }
5641da177e4SLinus Torvalds 
5651da177e4SLinus Torvalds static inline unsigned int fib_laddr_hashfn(u32 val)
5661da177e4SLinus Torvalds {
5671da177e4SLinus Torvalds 	unsigned int mask = (fib_hash_size - 1);
5681da177e4SLinus Torvalds 
5691da177e4SLinus Torvalds 	return (val ^ (val >> 7) ^ (val >> 14)) & mask;
5701da177e4SLinus Torvalds }
5711da177e4SLinus Torvalds 
5721da177e4SLinus Torvalds static struct hlist_head *fib_hash_alloc(int bytes)
5731da177e4SLinus Torvalds {
5741da177e4SLinus Torvalds 	if (bytes <= PAGE_SIZE)
5751da177e4SLinus Torvalds 		return kmalloc(bytes, GFP_KERNEL);
5761da177e4SLinus Torvalds 	else
5771da177e4SLinus Torvalds 		return (struct hlist_head *)
5781da177e4SLinus Torvalds 			__get_free_pages(GFP_KERNEL, get_order(bytes));
5791da177e4SLinus Torvalds }
5801da177e4SLinus Torvalds 
5811da177e4SLinus Torvalds static void fib_hash_free(struct hlist_head *hash, int bytes)
5821da177e4SLinus Torvalds {
5831da177e4SLinus Torvalds 	if (!hash)
5841da177e4SLinus Torvalds 		return;
5851da177e4SLinus Torvalds 
5861da177e4SLinus Torvalds 	if (bytes <= PAGE_SIZE)
5871da177e4SLinus Torvalds 		kfree(hash);
5881da177e4SLinus Torvalds 	else
5891da177e4SLinus Torvalds 		free_pages((unsigned long) hash, get_order(bytes));
5901da177e4SLinus Torvalds }
5911da177e4SLinus Torvalds 
5921da177e4SLinus Torvalds static void fib_hash_move(struct hlist_head *new_info_hash,
5931da177e4SLinus Torvalds 			  struct hlist_head *new_laddrhash,
5941da177e4SLinus Torvalds 			  unsigned int new_size)
5951da177e4SLinus Torvalds {
596b7656e7fSDavid S. Miller 	struct hlist_head *old_info_hash, *old_laddrhash;
5971da177e4SLinus Torvalds 	unsigned int old_size = fib_hash_size;
598b7656e7fSDavid S. Miller 	unsigned int i, bytes;
5991da177e4SLinus Torvalds 
6001da177e4SLinus Torvalds 	write_lock(&fib_info_lock);
601b7656e7fSDavid S. Miller 	old_info_hash = fib_info_hash;
602b7656e7fSDavid S. Miller 	old_laddrhash = fib_info_laddrhash;
6031da177e4SLinus Torvalds 	fib_hash_size = new_size;
6041da177e4SLinus Torvalds 
6051da177e4SLinus Torvalds 	for (i = 0; i < old_size; i++) {
6061da177e4SLinus Torvalds 		struct hlist_head *head = &fib_info_hash[i];
6071da177e4SLinus Torvalds 		struct hlist_node *node, *n;
6081da177e4SLinus Torvalds 		struct fib_info *fi;
6091da177e4SLinus Torvalds 
6101da177e4SLinus Torvalds 		hlist_for_each_entry_safe(fi, node, n, head, fib_hash) {
6111da177e4SLinus Torvalds 			struct hlist_head *dest;
6121da177e4SLinus Torvalds 			unsigned int new_hash;
6131da177e4SLinus Torvalds 
6141da177e4SLinus Torvalds 			hlist_del(&fi->fib_hash);
6151da177e4SLinus Torvalds 
6161da177e4SLinus Torvalds 			new_hash = fib_info_hashfn(fi);
6171da177e4SLinus Torvalds 			dest = &new_info_hash[new_hash];
6181da177e4SLinus Torvalds 			hlist_add_head(&fi->fib_hash, dest);
6191da177e4SLinus Torvalds 		}
6201da177e4SLinus Torvalds 	}
6211da177e4SLinus Torvalds 	fib_info_hash = new_info_hash;
6221da177e4SLinus Torvalds 
6231da177e4SLinus Torvalds 	for (i = 0; i < old_size; i++) {
6241da177e4SLinus Torvalds 		struct hlist_head *lhead = &fib_info_laddrhash[i];
6251da177e4SLinus Torvalds 		struct hlist_node *node, *n;
6261da177e4SLinus Torvalds 		struct fib_info *fi;
6271da177e4SLinus Torvalds 
6281da177e4SLinus Torvalds 		hlist_for_each_entry_safe(fi, node, n, lhead, fib_lhash) {
6291da177e4SLinus Torvalds 			struct hlist_head *ldest;
6301da177e4SLinus Torvalds 			unsigned int new_hash;
6311da177e4SLinus Torvalds 
6321da177e4SLinus Torvalds 			hlist_del(&fi->fib_lhash);
6331da177e4SLinus Torvalds 
6341da177e4SLinus Torvalds 			new_hash = fib_laddr_hashfn(fi->fib_prefsrc);
6351da177e4SLinus Torvalds 			ldest = &new_laddrhash[new_hash];
6361da177e4SLinus Torvalds 			hlist_add_head(&fi->fib_lhash, ldest);
6371da177e4SLinus Torvalds 		}
6381da177e4SLinus Torvalds 	}
6391da177e4SLinus Torvalds 	fib_info_laddrhash = new_laddrhash;
6401da177e4SLinus Torvalds 
6411da177e4SLinus Torvalds 	write_unlock(&fib_info_lock);
642b7656e7fSDavid S. Miller 
643b7656e7fSDavid S. Miller 	bytes = old_size * sizeof(struct hlist_head *);
644b7656e7fSDavid S. Miller 	fib_hash_free(old_info_hash, bytes);
645b7656e7fSDavid S. Miller 	fib_hash_free(old_laddrhash, bytes);
6461da177e4SLinus Torvalds }
6471da177e4SLinus Torvalds 
6481da177e4SLinus Torvalds struct fib_info *
6491da177e4SLinus Torvalds fib_create_info(const struct rtmsg *r, struct kern_rta *rta,
6501da177e4SLinus Torvalds 		const struct nlmsghdr *nlh, int *errp)
6511da177e4SLinus Torvalds {
6521da177e4SLinus Torvalds 	int err;
6531da177e4SLinus Torvalds 	struct fib_info *fi = NULL;
6541da177e4SLinus Torvalds 	struct fib_info *ofi;
6551da177e4SLinus Torvalds #ifdef CONFIG_IP_ROUTE_MULTIPATH
6561da177e4SLinus Torvalds 	int nhs = 1;
6571da177e4SLinus Torvalds #else
6581da177e4SLinus Torvalds 	const int nhs = 1;
6591da177e4SLinus Torvalds #endif
6601da177e4SLinus Torvalds #ifdef CONFIG_IP_ROUTE_MULTIPATH_CACHED
6611da177e4SLinus Torvalds 	u32 mp_alg = IP_MP_ALG_NONE;
6621da177e4SLinus Torvalds #endif
6631da177e4SLinus Torvalds 
6641da177e4SLinus Torvalds 	/* Fast check to catch the most weird cases */
6651da177e4SLinus Torvalds 	if (fib_props[r->rtm_type].scope > r->rtm_scope)
6661da177e4SLinus Torvalds 		goto err_inval;
6671da177e4SLinus Torvalds 
6681da177e4SLinus Torvalds #ifdef CONFIG_IP_ROUTE_MULTIPATH
6691da177e4SLinus Torvalds 	if (rta->rta_mp) {
6701da177e4SLinus Torvalds 		nhs = fib_count_nexthops(rta->rta_mp);
6711da177e4SLinus Torvalds 		if (nhs == 0)
6721da177e4SLinus Torvalds 			goto err_inval;
6731da177e4SLinus Torvalds 	}
6741da177e4SLinus Torvalds #endif
6751da177e4SLinus Torvalds #ifdef CONFIG_IP_ROUTE_MULTIPATH_CACHED
6761da177e4SLinus Torvalds 	if (rta->rta_mp_alg) {
6771da177e4SLinus Torvalds 		mp_alg = *rta->rta_mp_alg;
6781da177e4SLinus Torvalds 
6791da177e4SLinus Torvalds 		if (mp_alg < IP_MP_ALG_NONE ||
6801da177e4SLinus Torvalds 		    mp_alg > IP_MP_ALG_MAX)
6811da177e4SLinus Torvalds 			goto err_inval;
6821da177e4SLinus Torvalds 	}
6831da177e4SLinus Torvalds #endif
6841da177e4SLinus Torvalds 
6851da177e4SLinus Torvalds 	err = -ENOBUFS;
6861da177e4SLinus Torvalds 	if (fib_info_cnt >= fib_hash_size) {
6871da177e4SLinus Torvalds 		unsigned int new_size = fib_hash_size << 1;
6881da177e4SLinus Torvalds 		struct hlist_head *new_info_hash;
6891da177e4SLinus Torvalds 		struct hlist_head *new_laddrhash;
6901da177e4SLinus Torvalds 		unsigned int bytes;
6911da177e4SLinus Torvalds 
6921da177e4SLinus Torvalds 		if (!new_size)
6931da177e4SLinus Torvalds 			new_size = 1;
6941da177e4SLinus Torvalds 		bytes = new_size * sizeof(struct hlist_head *);
6951da177e4SLinus Torvalds 		new_info_hash = fib_hash_alloc(bytes);
6961da177e4SLinus Torvalds 		new_laddrhash = fib_hash_alloc(bytes);
6971da177e4SLinus Torvalds 		if (!new_info_hash || !new_laddrhash) {
6981da177e4SLinus Torvalds 			fib_hash_free(new_info_hash, bytes);
6991da177e4SLinus Torvalds 			fib_hash_free(new_laddrhash, bytes);
7001da177e4SLinus Torvalds 		} else {
7011da177e4SLinus Torvalds 			memset(new_info_hash, 0, bytes);
7021da177e4SLinus Torvalds 			memset(new_laddrhash, 0, bytes);
7031da177e4SLinus Torvalds 
7041da177e4SLinus Torvalds 			fib_hash_move(new_info_hash, new_laddrhash, new_size);
7051da177e4SLinus Torvalds 		}
7061da177e4SLinus Torvalds 
7071da177e4SLinus Torvalds 		if (!fib_hash_size)
7081da177e4SLinus Torvalds 			goto failure;
7091da177e4SLinus Torvalds 	}
7101da177e4SLinus Torvalds 
7111da177e4SLinus Torvalds 	fi = kmalloc(sizeof(*fi)+nhs*sizeof(struct fib_nh), GFP_KERNEL);
7121da177e4SLinus Torvalds 	if (fi == NULL)
7131da177e4SLinus Torvalds 		goto failure;
7141da177e4SLinus Torvalds 	fib_info_cnt++;
7151da177e4SLinus Torvalds 	memset(fi, 0, sizeof(*fi)+nhs*sizeof(struct fib_nh));
7161da177e4SLinus Torvalds 
7171da177e4SLinus Torvalds 	fi->fib_protocol = r->rtm_protocol;
7181da177e4SLinus Torvalds 
7191da177e4SLinus Torvalds 	fi->fib_nhs = nhs;
7201da177e4SLinus Torvalds 	change_nexthops(fi) {
7211da177e4SLinus Torvalds 		nh->nh_parent = fi;
7221da177e4SLinus Torvalds 	} endfor_nexthops(fi)
7231da177e4SLinus Torvalds 
7241da177e4SLinus Torvalds 	fi->fib_flags = r->rtm_flags;
7251da177e4SLinus Torvalds 	if (rta->rta_priority)
7261da177e4SLinus Torvalds 		fi->fib_priority = *rta->rta_priority;
7271da177e4SLinus Torvalds 	if (rta->rta_mx) {
7281da177e4SLinus Torvalds 		int attrlen = RTA_PAYLOAD(rta->rta_mx);
7291da177e4SLinus Torvalds 		struct rtattr *attr = RTA_DATA(rta->rta_mx);
7301da177e4SLinus Torvalds 
7311da177e4SLinus Torvalds 		while (RTA_OK(attr, attrlen)) {
7321da177e4SLinus Torvalds 			unsigned flavor = attr->rta_type;
7331da177e4SLinus Torvalds 			if (flavor) {
7341da177e4SLinus Torvalds 				if (flavor > RTAX_MAX)
7351da177e4SLinus Torvalds 					goto err_inval;
7361da177e4SLinus Torvalds 				fi->fib_metrics[flavor-1] = *(unsigned*)RTA_DATA(attr);
7371da177e4SLinus Torvalds 			}
7381da177e4SLinus Torvalds 			attr = RTA_NEXT(attr, attrlen);
7391da177e4SLinus Torvalds 		}
7401da177e4SLinus Torvalds 	}
7411da177e4SLinus Torvalds 	if (rta->rta_prefsrc)
7421da177e4SLinus Torvalds 		memcpy(&fi->fib_prefsrc, rta->rta_prefsrc, 4);
7431da177e4SLinus Torvalds 
7441da177e4SLinus Torvalds 	if (rta->rta_mp) {
7451da177e4SLinus Torvalds #ifdef CONFIG_IP_ROUTE_MULTIPATH
7461da177e4SLinus Torvalds 		if ((err = fib_get_nhs(fi, rta->rta_mp, r)) != 0)
7471da177e4SLinus Torvalds 			goto failure;
7481da177e4SLinus Torvalds 		if (rta->rta_oif && fi->fib_nh->nh_oif != *rta->rta_oif)
7491da177e4SLinus Torvalds 			goto err_inval;
7501da177e4SLinus Torvalds 		if (rta->rta_gw && memcmp(&fi->fib_nh->nh_gw, rta->rta_gw, 4))
7511da177e4SLinus Torvalds 			goto err_inval;
7521da177e4SLinus Torvalds #ifdef CONFIG_NET_CLS_ROUTE
7531da177e4SLinus Torvalds 		if (rta->rta_flow && memcmp(&fi->fib_nh->nh_tclassid, rta->rta_flow, 4))
7541da177e4SLinus Torvalds 			goto err_inval;
7551da177e4SLinus Torvalds #endif
7561da177e4SLinus Torvalds #else
7571da177e4SLinus Torvalds 		goto err_inval;
7581da177e4SLinus Torvalds #endif
7591da177e4SLinus Torvalds 	} else {
7601da177e4SLinus Torvalds 		struct fib_nh *nh = fi->fib_nh;
7611da177e4SLinus Torvalds 		if (rta->rta_oif)
7621da177e4SLinus Torvalds 			nh->nh_oif = *rta->rta_oif;
7631da177e4SLinus Torvalds 		if (rta->rta_gw)
7641da177e4SLinus Torvalds 			memcpy(&nh->nh_gw, rta->rta_gw, 4);
7651da177e4SLinus Torvalds #ifdef CONFIG_NET_CLS_ROUTE
7661da177e4SLinus Torvalds 		if (rta->rta_flow)
7671da177e4SLinus Torvalds 			memcpy(&nh->nh_tclassid, rta->rta_flow, 4);
7681da177e4SLinus Torvalds #endif
7691da177e4SLinus Torvalds 		nh->nh_flags = r->rtm_flags;
7701da177e4SLinus Torvalds #ifdef CONFIG_IP_ROUTE_MULTIPATH
7711da177e4SLinus Torvalds 		nh->nh_weight = 1;
7721da177e4SLinus Torvalds #endif
7731da177e4SLinus Torvalds 	}
7741da177e4SLinus Torvalds 
7751da177e4SLinus Torvalds #ifdef CONFIG_IP_ROUTE_MULTIPATH_CACHED
7761da177e4SLinus Torvalds 	fi->fib_mp_alg = mp_alg;
7771da177e4SLinus Torvalds #endif
7781da177e4SLinus Torvalds 
7791da177e4SLinus Torvalds 	if (fib_props[r->rtm_type].error) {
7801da177e4SLinus Torvalds 		if (rta->rta_gw || rta->rta_oif || rta->rta_mp)
7811da177e4SLinus Torvalds 			goto err_inval;
7821da177e4SLinus Torvalds 		goto link_it;
7831da177e4SLinus Torvalds 	}
7841da177e4SLinus Torvalds 
7851da177e4SLinus Torvalds 	if (r->rtm_scope > RT_SCOPE_HOST)
7861da177e4SLinus Torvalds 		goto err_inval;
7871da177e4SLinus Torvalds 
7881da177e4SLinus Torvalds 	if (r->rtm_scope == RT_SCOPE_HOST) {
7891da177e4SLinus Torvalds 		struct fib_nh *nh = fi->fib_nh;
7901da177e4SLinus Torvalds 
7911da177e4SLinus Torvalds 		/* Local address is added. */
7921da177e4SLinus Torvalds 		if (nhs != 1 || nh->nh_gw)
7931da177e4SLinus Torvalds 			goto err_inval;
7941da177e4SLinus Torvalds 		nh->nh_scope = RT_SCOPE_NOWHERE;
7951da177e4SLinus Torvalds 		nh->nh_dev = dev_get_by_index(fi->fib_nh->nh_oif);
7961da177e4SLinus Torvalds 		err = -ENODEV;
7971da177e4SLinus Torvalds 		if (nh->nh_dev == NULL)
7981da177e4SLinus Torvalds 			goto failure;
7991da177e4SLinus Torvalds 	} else {
8001da177e4SLinus Torvalds 		change_nexthops(fi) {
8011da177e4SLinus Torvalds 			if ((err = fib_check_nh(r, fi, nh)) != 0)
8021da177e4SLinus Torvalds 				goto failure;
8031da177e4SLinus Torvalds 		} endfor_nexthops(fi)
8041da177e4SLinus Torvalds 	}
8051da177e4SLinus Torvalds 
8061da177e4SLinus Torvalds 	if (fi->fib_prefsrc) {
8071da177e4SLinus Torvalds 		if (r->rtm_type != RTN_LOCAL || rta->rta_dst == NULL ||
8081da177e4SLinus Torvalds 		    memcmp(&fi->fib_prefsrc, rta->rta_dst, 4))
8091da177e4SLinus Torvalds 			if (inet_addr_type(fi->fib_prefsrc) != RTN_LOCAL)
8101da177e4SLinus Torvalds 				goto err_inval;
8111da177e4SLinus Torvalds 	}
8121da177e4SLinus Torvalds 
8131da177e4SLinus Torvalds link_it:
8141da177e4SLinus Torvalds 	if ((ofi = fib_find_info(fi)) != NULL) {
8151da177e4SLinus Torvalds 		fi->fib_dead = 1;
8161da177e4SLinus Torvalds 		free_fib_info(fi);
8171da177e4SLinus Torvalds 		ofi->fib_treeref++;
8181da177e4SLinus Torvalds 		return ofi;
8191da177e4SLinus Torvalds 	}
8201da177e4SLinus Torvalds 
8211da177e4SLinus Torvalds 	fi->fib_treeref++;
8221da177e4SLinus Torvalds 	atomic_inc(&fi->fib_clntref);
8231da177e4SLinus Torvalds 	write_lock(&fib_info_lock);
8241da177e4SLinus Torvalds 	hlist_add_head(&fi->fib_hash,
8251da177e4SLinus Torvalds 		       &fib_info_hash[fib_info_hashfn(fi)]);
8261da177e4SLinus Torvalds 	if (fi->fib_prefsrc) {
8271da177e4SLinus Torvalds 		struct hlist_head *head;
8281da177e4SLinus Torvalds 
8291da177e4SLinus Torvalds 		head = &fib_info_laddrhash[fib_laddr_hashfn(fi->fib_prefsrc)];
8301da177e4SLinus Torvalds 		hlist_add_head(&fi->fib_lhash, head);
8311da177e4SLinus Torvalds 	}
8321da177e4SLinus Torvalds 	change_nexthops(fi) {
8331da177e4SLinus Torvalds 		struct hlist_head *head;
8341da177e4SLinus Torvalds 		unsigned int hash;
8351da177e4SLinus Torvalds 
8361da177e4SLinus Torvalds 		if (!nh->nh_dev)
8371da177e4SLinus Torvalds 			continue;
8381da177e4SLinus Torvalds 		hash = fib_devindex_hashfn(nh->nh_dev->ifindex);
8391da177e4SLinus Torvalds 		head = &fib_info_devhash[hash];
8401da177e4SLinus Torvalds 		hlist_add_head(&nh->nh_hash, head);
8411da177e4SLinus Torvalds 	} endfor_nexthops(fi)
8421da177e4SLinus Torvalds 	write_unlock(&fib_info_lock);
8431da177e4SLinus Torvalds 	return fi;
8441da177e4SLinus Torvalds 
8451da177e4SLinus Torvalds err_inval:
8461da177e4SLinus Torvalds 	err = -EINVAL;
8471da177e4SLinus Torvalds 
8481da177e4SLinus Torvalds failure:
8491da177e4SLinus Torvalds         *errp = err;
8501da177e4SLinus Torvalds         if (fi) {
8511da177e4SLinus Torvalds 		fi->fib_dead = 1;
8521da177e4SLinus Torvalds 		free_fib_info(fi);
8531da177e4SLinus Torvalds 	}
8541da177e4SLinus Torvalds 	return NULL;
8551da177e4SLinus Torvalds }
8561da177e4SLinus Torvalds 
857*e5b43760SRobert Olsson /* Note! fib_semantic_match intentionally uses  RCU list functions. */
8581da177e4SLinus Torvalds int fib_semantic_match(struct list_head *head, const struct flowi *flp,
8591da177e4SLinus Torvalds 		       struct fib_result *res, __u32 zone, __u32 mask,
8601da177e4SLinus Torvalds 			int prefixlen)
8611da177e4SLinus Torvalds {
8621da177e4SLinus Torvalds 	struct fib_alias *fa;
8631da177e4SLinus Torvalds 	int nh_sel = 0;
8641da177e4SLinus Torvalds 
865*e5b43760SRobert Olsson 	list_for_each_entry_rcu(fa, head, fa_list) {
8661da177e4SLinus Torvalds 		int err;
8671da177e4SLinus Torvalds 
8681da177e4SLinus Torvalds 		if (fa->fa_tos &&
8691da177e4SLinus Torvalds 		    fa->fa_tos != flp->fl4_tos)
8701da177e4SLinus Torvalds 			continue;
8711da177e4SLinus Torvalds 
8721da177e4SLinus Torvalds 		if (fa->fa_scope < flp->fl4_scope)
8731da177e4SLinus Torvalds 			continue;
8741da177e4SLinus Torvalds 
8751da177e4SLinus Torvalds 		fa->fa_state |= FA_S_ACCESSED;
8761da177e4SLinus Torvalds 
8771da177e4SLinus Torvalds 		err = fib_props[fa->fa_type].error;
8781da177e4SLinus Torvalds 		if (err == 0) {
8791da177e4SLinus Torvalds 			struct fib_info *fi = fa->fa_info;
8801da177e4SLinus Torvalds 
8811da177e4SLinus Torvalds 			if (fi->fib_flags & RTNH_F_DEAD)
8821da177e4SLinus Torvalds 				continue;
8831da177e4SLinus Torvalds 
8841da177e4SLinus Torvalds 			switch (fa->fa_type) {
8851da177e4SLinus Torvalds 			case RTN_UNICAST:
8861da177e4SLinus Torvalds 			case RTN_LOCAL:
8871da177e4SLinus Torvalds 			case RTN_BROADCAST:
8881da177e4SLinus Torvalds 			case RTN_ANYCAST:
8891da177e4SLinus Torvalds 			case RTN_MULTICAST:
8901da177e4SLinus Torvalds 				for_nexthops(fi) {
8911da177e4SLinus Torvalds 					if (nh->nh_flags&RTNH_F_DEAD)
8921da177e4SLinus Torvalds 						continue;
8931da177e4SLinus Torvalds 					if (!flp->oif || flp->oif == nh->nh_oif)
8941da177e4SLinus Torvalds 						break;
8951da177e4SLinus Torvalds 				}
8961da177e4SLinus Torvalds #ifdef CONFIG_IP_ROUTE_MULTIPATH
8971da177e4SLinus Torvalds 				if (nhsel < fi->fib_nhs) {
8981da177e4SLinus Torvalds 					nh_sel = nhsel;
8991da177e4SLinus Torvalds 					goto out_fill_res;
9001da177e4SLinus Torvalds 				}
9011da177e4SLinus Torvalds #else
9021da177e4SLinus Torvalds 				if (nhsel < 1) {
9031da177e4SLinus Torvalds 					goto out_fill_res;
9041da177e4SLinus Torvalds 				}
9051da177e4SLinus Torvalds #endif
9061da177e4SLinus Torvalds 				endfor_nexthops(fi);
9071da177e4SLinus Torvalds 				continue;
9081da177e4SLinus Torvalds 
9091da177e4SLinus Torvalds 			default:
9101da177e4SLinus Torvalds 				printk(KERN_DEBUG "impossible 102\n");
9111da177e4SLinus Torvalds 				return -EINVAL;
9121da177e4SLinus Torvalds 			};
9131da177e4SLinus Torvalds 		}
9141da177e4SLinus Torvalds 		return err;
9151da177e4SLinus Torvalds 	}
9161da177e4SLinus Torvalds 	return 1;
9171da177e4SLinus Torvalds 
9181da177e4SLinus Torvalds out_fill_res:
9191da177e4SLinus Torvalds 	res->prefixlen = prefixlen;
9201da177e4SLinus Torvalds 	res->nh_sel = nh_sel;
9211da177e4SLinus Torvalds 	res->type = fa->fa_type;
9221da177e4SLinus Torvalds 	res->scope = fa->fa_scope;
9231da177e4SLinus Torvalds 	res->fi = fa->fa_info;
9241da177e4SLinus Torvalds #ifdef CONFIG_IP_ROUTE_MULTIPATH_CACHED
9251da177e4SLinus Torvalds 	res->netmask = mask;
9261da177e4SLinus Torvalds 	res->network = zone &
9271da177e4SLinus Torvalds 		(0xFFFFFFFF >> (32 - prefixlen));
9281da177e4SLinus Torvalds #endif
9291da177e4SLinus Torvalds 	atomic_inc(&res->fi->fib_clntref);
9301da177e4SLinus Torvalds 	return 0;
9311da177e4SLinus Torvalds }
9321da177e4SLinus Torvalds 
9331da177e4SLinus Torvalds /* Find appropriate source address to this destination */
9341da177e4SLinus Torvalds 
9351da177e4SLinus Torvalds u32 __fib_res_prefsrc(struct fib_result *res)
9361da177e4SLinus Torvalds {
9371da177e4SLinus Torvalds 	return inet_select_addr(FIB_RES_DEV(*res), FIB_RES_GW(*res), res->scope);
9381da177e4SLinus Torvalds }
9391da177e4SLinus Torvalds 
9401da177e4SLinus Torvalds int
9411da177e4SLinus Torvalds fib_dump_info(struct sk_buff *skb, u32 pid, u32 seq, int event,
9421da177e4SLinus Torvalds 	      u8 tb_id, u8 type, u8 scope, void *dst, int dst_len, u8 tos,
943b6544c0bSJamal Hadi Salim 	      struct fib_info *fi, unsigned int flags)
9441da177e4SLinus Torvalds {
9451da177e4SLinus Torvalds 	struct rtmsg *rtm;
9461da177e4SLinus Torvalds 	struct nlmsghdr  *nlh;
9471da177e4SLinus Torvalds 	unsigned char	 *b = skb->tail;
9481da177e4SLinus Torvalds 
949b6544c0bSJamal Hadi Salim 	nlh = NLMSG_NEW(skb, pid, seq, event, sizeof(*rtm), flags);
9501da177e4SLinus Torvalds 	rtm = NLMSG_DATA(nlh);
9511da177e4SLinus Torvalds 	rtm->rtm_family = AF_INET;
9521da177e4SLinus Torvalds 	rtm->rtm_dst_len = dst_len;
9531da177e4SLinus Torvalds 	rtm->rtm_src_len = 0;
9541da177e4SLinus Torvalds 	rtm->rtm_tos = tos;
9551da177e4SLinus Torvalds 	rtm->rtm_table = tb_id;
9561da177e4SLinus Torvalds 	rtm->rtm_type = type;
9571da177e4SLinus Torvalds 	rtm->rtm_flags = fi->fib_flags;
9581da177e4SLinus Torvalds 	rtm->rtm_scope = scope;
9591da177e4SLinus Torvalds 	if (rtm->rtm_dst_len)
9601da177e4SLinus Torvalds 		RTA_PUT(skb, RTA_DST, 4, dst);
9611da177e4SLinus Torvalds 	rtm->rtm_protocol = fi->fib_protocol;
9621da177e4SLinus Torvalds 	if (fi->fib_priority)
9631da177e4SLinus Torvalds 		RTA_PUT(skb, RTA_PRIORITY, 4, &fi->fib_priority);
9641da177e4SLinus Torvalds #ifdef CONFIG_NET_CLS_ROUTE
9651da177e4SLinus Torvalds 	if (fi->fib_nh[0].nh_tclassid)
9661da177e4SLinus Torvalds 		RTA_PUT(skb, RTA_FLOW, 4, &fi->fib_nh[0].nh_tclassid);
9671da177e4SLinus Torvalds #endif
9681da177e4SLinus Torvalds 	if (rtnetlink_put_metrics(skb, fi->fib_metrics) < 0)
9691da177e4SLinus Torvalds 		goto rtattr_failure;
9701da177e4SLinus Torvalds 	if (fi->fib_prefsrc)
9711da177e4SLinus Torvalds 		RTA_PUT(skb, RTA_PREFSRC, 4, &fi->fib_prefsrc);
9721da177e4SLinus Torvalds 	if (fi->fib_nhs == 1) {
9731da177e4SLinus Torvalds 		if (fi->fib_nh->nh_gw)
9741da177e4SLinus Torvalds 			RTA_PUT(skb, RTA_GATEWAY, 4, &fi->fib_nh->nh_gw);
9751da177e4SLinus Torvalds 		if (fi->fib_nh->nh_oif)
9761da177e4SLinus Torvalds 			RTA_PUT(skb, RTA_OIF, sizeof(int), &fi->fib_nh->nh_oif);
9771da177e4SLinus Torvalds 	}
9781da177e4SLinus Torvalds #ifdef CONFIG_IP_ROUTE_MULTIPATH
9791da177e4SLinus Torvalds 	if (fi->fib_nhs > 1) {
9801da177e4SLinus Torvalds 		struct rtnexthop *nhp;
9811da177e4SLinus Torvalds 		struct rtattr *mp_head;
9821da177e4SLinus Torvalds 		if (skb_tailroom(skb) <= RTA_SPACE(0))
9831da177e4SLinus Torvalds 			goto rtattr_failure;
9841da177e4SLinus Torvalds 		mp_head = (struct rtattr*)skb_put(skb, RTA_SPACE(0));
9851da177e4SLinus Torvalds 
9861da177e4SLinus Torvalds 		for_nexthops(fi) {
9871da177e4SLinus Torvalds 			if (skb_tailroom(skb) < RTA_ALIGN(RTA_ALIGN(sizeof(*nhp)) + 4))
9881da177e4SLinus Torvalds 				goto rtattr_failure;
9891da177e4SLinus Torvalds 			nhp = (struct rtnexthop*)skb_put(skb, RTA_ALIGN(sizeof(*nhp)));
9901da177e4SLinus Torvalds 			nhp->rtnh_flags = nh->nh_flags & 0xFF;
9911da177e4SLinus Torvalds 			nhp->rtnh_hops = nh->nh_weight-1;
9921da177e4SLinus Torvalds 			nhp->rtnh_ifindex = nh->nh_oif;
9931da177e4SLinus Torvalds 			if (nh->nh_gw)
9941da177e4SLinus Torvalds 				RTA_PUT(skb, RTA_GATEWAY, 4, &nh->nh_gw);
9951da177e4SLinus Torvalds 			nhp->rtnh_len = skb->tail - (unsigned char*)nhp;
9961da177e4SLinus Torvalds 		} endfor_nexthops(fi);
9971da177e4SLinus Torvalds 		mp_head->rta_type = RTA_MULTIPATH;
9981da177e4SLinus Torvalds 		mp_head->rta_len = skb->tail - (u8*)mp_head;
9991da177e4SLinus Torvalds 	}
10001da177e4SLinus Torvalds #endif
10011da177e4SLinus Torvalds 	nlh->nlmsg_len = skb->tail - b;
10021da177e4SLinus Torvalds 	return skb->len;
10031da177e4SLinus Torvalds 
10041da177e4SLinus Torvalds nlmsg_failure:
10051da177e4SLinus Torvalds rtattr_failure:
10061da177e4SLinus Torvalds 	skb_trim(skb, b - skb->data);
10071da177e4SLinus Torvalds 	return -1;
10081da177e4SLinus Torvalds }
10091da177e4SLinus Torvalds 
10101da177e4SLinus Torvalds #ifndef CONFIG_IP_NOSIOCRT
10111da177e4SLinus Torvalds 
10121da177e4SLinus Torvalds int
10131da177e4SLinus Torvalds fib_convert_rtentry(int cmd, struct nlmsghdr *nl, struct rtmsg *rtm,
10141da177e4SLinus Torvalds 		    struct kern_rta *rta, struct rtentry *r)
10151da177e4SLinus Torvalds {
10161da177e4SLinus Torvalds 	int    plen;
10171da177e4SLinus Torvalds 	u32    *ptr;
10181da177e4SLinus Torvalds 
10191da177e4SLinus Torvalds 	memset(rtm, 0, sizeof(*rtm));
10201da177e4SLinus Torvalds 	memset(rta, 0, sizeof(*rta));
10211da177e4SLinus Torvalds 
10221da177e4SLinus Torvalds 	if (r->rt_dst.sa_family != AF_INET)
10231da177e4SLinus Torvalds 		return -EAFNOSUPPORT;
10241da177e4SLinus Torvalds 
10251da177e4SLinus Torvalds 	/* Check mask for validity:
10261da177e4SLinus Torvalds 	   a) it must be contiguous.
10271da177e4SLinus Torvalds 	   b) destination must have all host bits clear.
10281da177e4SLinus Torvalds 	   c) if application forgot to set correct family (AF_INET),
10291da177e4SLinus Torvalds 	      reject request unless it is absolutely clear i.e.
10301da177e4SLinus Torvalds 	      both family and mask are zero.
10311da177e4SLinus Torvalds 	 */
10321da177e4SLinus Torvalds 	plen = 32;
10331da177e4SLinus Torvalds 	ptr = &((struct sockaddr_in*)&r->rt_dst)->sin_addr.s_addr;
10341da177e4SLinus Torvalds 	if (!(r->rt_flags&RTF_HOST)) {
10351da177e4SLinus Torvalds 		u32 mask = ((struct sockaddr_in*)&r->rt_genmask)->sin_addr.s_addr;
10361da177e4SLinus Torvalds 		if (r->rt_genmask.sa_family != AF_INET) {
10371da177e4SLinus Torvalds 			if (mask || r->rt_genmask.sa_family)
10381da177e4SLinus Torvalds 				return -EAFNOSUPPORT;
10391da177e4SLinus Torvalds 		}
10401da177e4SLinus Torvalds 		if (bad_mask(mask, *ptr))
10411da177e4SLinus Torvalds 			return -EINVAL;
10421da177e4SLinus Torvalds 		plen = inet_mask_len(mask);
10431da177e4SLinus Torvalds 	}
10441da177e4SLinus Torvalds 
10451da177e4SLinus Torvalds 	nl->nlmsg_flags = NLM_F_REQUEST;
10469ed19f33SJamal Hadi Salim 	nl->nlmsg_pid = current->pid;
10471da177e4SLinus Torvalds 	nl->nlmsg_seq = 0;
10481da177e4SLinus Torvalds 	nl->nlmsg_len = NLMSG_LENGTH(sizeof(*rtm));
10491da177e4SLinus Torvalds 	if (cmd == SIOCDELRT) {
10501da177e4SLinus Torvalds 		nl->nlmsg_type = RTM_DELROUTE;
10511da177e4SLinus Torvalds 		nl->nlmsg_flags = 0;
10521da177e4SLinus Torvalds 	} else {
10531da177e4SLinus Torvalds 		nl->nlmsg_type = RTM_NEWROUTE;
10541da177e4SLinus Torvalds 		nl->nlmsg_flags = NLM_F_REQUEST|NLM_F_CREATE;
10551da177e4SLinus Torvalds 		rtm->rtm_protocol = RTPROT_BOOT;
10561da177e4SLinus Torvalds 	}
10571da177e4SLinus Torvalds 
10581da177e4SLinus Torvalds 	rtm->rtm_dst_len = plen;
10591da177e4SLinus Torvalds 	rta->rta_dst = ptr;
10601da177e4SLinus Torvalds 
10611da177e4SLinus Torvalds 	if (r->rt_metric) {
10621da177e4SLinus Torvalds 		*(u32*)&r->rt_pad3 = r->rt_metric - 1;
10631da177e4SLinus Torvalds 		rta->rta_priority = (u32*)&r->rt_pad3;
10641da177e4SLinus Torvalds 	}
10651da177e4SLinus Torvalds 	if (r->rt_flags&RTF_REJECT) {
10661da177e4SLinus Torvalds 		rtm->rtm_scope = RT_SCOPE_HOST;
10671da177e4SLinus Torvalds 		rtm->rtm_type = RTN_UNREACHABLE;
10681da177e4SLinus Torvalds 		return 0;
10691da177e4SLinus Torvalds 	}
10701da177e4SLinus Torvalds 	rtm->rtm_scope = RT_SCOPE_NOWHERE;
10711da177e4SLinus Torvalds 	rtm->rtm_type = RTN_UNICAST;
10721da177e4SLinus Torvalds 
10731da177e4SLinus Torvalds 	if (r->rt_dev) {
10741da177e4SLinus Torvalds 		char *colon;
10751da177e4SLinus Torvalds 		struct net_device *dev;
10761da177e4SLinus Torvalds 		char   devname[IFNAMSIZ];
10771da177e4SLinus Torvalds 
10781da177e4SLinus Torvalds 		if (copy_from_user(devname, r->rt_dev, IFNAMSIZ-1))
10791da177e4SLinus Torvalds 			return -EFAULT;
10801da177e4SLinus Torvalds 		devname[IFNAMSIZ-1] = 0;
10811da177e4SLinus Torvalds 		colon = strchr(devname, ':');
10821da177e4SLinus Torvalds 		if (colon)
10831da177e4SLinus Torvalds 			*colon = 0;
10841da177e4SLinus Torvalds 		dev = __dev_get_by_name(devname);
10851da177e4SLinus Torvalds 		if (!dev)
10861da177e4SLinus Torvalds 			return -ENODEV;
10871da177e4SLinus Torvalds 		rta->rta_oif = &dev->ifindex;
10881da177e4SLinus Torvalds 		if (colon) {
10891da177e4SLinus Torvalds 			struct in_ifaddr *ifa;
10901da177e4SLinus Torvalds 			struct in_device *in_dev = __in_dev_get(dev);
10911da177e4SLinus Torvalds 			if (!in_dev)
10921da177e4SLinus Torvalds 				return -ENODEV;
10931da177e4SLinus Torvalds 			*colon = ':';
10941da177e4SLinus Torvalds 			for (ifa = in_dev->ifa_list; ifa; ifa = ifa->ifa_next)
10951da177e4SLinus Torvalds 				if (strcmp(ifa->ifa_label, devname) == 0)
10961da177e4SLinus Torvalds 					break;
10971da177e4SLinus Torvalds 			if (ifa == NULL)
10981da177e4SLinus Torvalds 				return -ENODEV;
10991da177e4SLinus Torvalds 			rta->rta_prefsrc = &ifa->ifa_local;
11001da177e4SLinus Torvalds 		}
11011da177e4SLinus Torvalds 	}
11021da177e4SLinus Torvalds 
11031da177e4SLinus Torvalds 	ptr = &((struct sockaddr_in*)&r->rt_gateway)->sin_addr.s_addr;
11041da177e4SLinus Torvalds 	if (r->rt_gateway.sa_family == AF_INET && *ptr) {
11051da177e4SLinus Torvalds 		rta->rta_gw = ptr;
11061da177e4SLinus Torvalds 		if (r->rt_flags&RTF_GATEWAY && inet_addr_type(*ptr) == RTN_UNICAST)
11071da177e4SLinus Torvalds 			rtm->rtm_scope = RT_SCOPE_UNIVERSE;
11081da177e4SLinus Torvalds 	}
11091da177e4SLinus Torvalds 
11101da177e4SLinus Torvalds 	if (cmd == SIOCDELRT)
11111da177e4SLinus Torvalds 		return 0;
11121da177e4SLinus Torvalds 
11131da177e4SLinus Torvalds 	if (r->rt_flags&RTF_GATEWAY && rta->rta_gw == NULL)
11141da177e4SLinus Torvalds 		return -EINVAL;
11151da177e4SLinus Torvalds 
11161da177e4SLinus Torvalds 	if (rtm->rtm_scope == RT_SCOPE_NOWHERE)
11171da177e4SLinus Torvalds 		rtm->rtm_scope = RT_SCOPE_LINK;
11181da177e4SLinus Torvalds 
11191da177e4SLinus Torvalds 	if (r->rt_flags&(RTF_MTU|RTF_WINDOW|RTF_IRTT)) {
11201da177e4SLinus Torvalds 		struct rtattr *rec;
11211da177e4SLinus Torvalds 		struct rtattr *mx = kmalloc(RTA_LENGTH(3*RTA_LENGTH(4)), GFP_KERNEL);
11221da177e4SLinus Torvalds 		if (mx == NULL)
11231da177e4SLinus Torvalds 			return -ENOMEM;
11241da177e4SLinus Torvalds 		rta->rta_mx = mx;
11251da177e4SLinus Torvalds 		mx->rta_type = RTA_METRICS;
11261da177e4SLinus Torvalds 		mx->rta_len  = RTA_LENGTH(0);
11271da177e4SLinus Torvalds 		if (r->rt_flags&RTF_MTU) {
11281da177e4SLinus Torvalds 			rec = (void*)((char*)mx + RTA_ALIGN(mx->rta_len));
11291da177e4SLinus Torvalds 			rec->rta_type = RTAX_ADVMSS;
11301da177e4SLinus Torvalds 			rec->rta_len = RTA_LENGTH(4);
11311da177e4SLinus Torvalds 			mx->rta_len += RTA_LENGTH(4);
11321da177e4SLinus Torvalds 			*(u32*)RTA_DATA(rec) = r->rt_mtu - 40;
11331da177e4SLinus Torvalds 		}
11341da177e4SLinus Torvalds 		if (r->rt_flags&RTF_WINDOW) {
11351da177e4SLinus Torvalds 			rec = (void*)((char*)mx + RTA_ALIGN(mx->rta_len));
11361da177e4SLinus Torvalds 			rec->rta_type = RTAX_WINDOW;
11371da177e4SLinus Torvalds 			rec->rta_len = RTA_LENGTH(4);
11381da177e4SLinus Torvalds 			mx->rta_len += RTA_LENGTH(4);
11391da177e4SLinus Torvalds 			*(u32*)RTA_DATA(rec) = r->rt_window;
11401da177e4SLinus Torvalds 		}
11411da177e4SLinus Torvalds 		if (r->rt_flags&RTF_IRTT) {
11421da177e4SLinus Torvalds 			rec = (void*)((char*)mx + RTA_ALIGN(mx->rta_len));
11431da177e4SLinus Torvalds 			rec->rta_type = RTAX_RTT;
11441da177e4SLinus Torvalds 			rec->rta_len = RTA_LENGTH(4);
11451da177e4SLinus Torvalds 			mx->rta_len += RTA_LENGTH(4);
11461da177e4SLinus Torvalds 			*(u32*)RTA_DATA(rec) = r->rt_irtt<<3;
11471da177e4SLinus Torvalds 		}
11481da177e4SLinus Torvalds 	}
11491da177e4SLinus Torvalds 	return 0;
11501da177e4SLinus Torvalds }
11511da177e4SLinus Torvalds 
11521da177e4SLinus Torvalds #endif
11531da177e4SLinus Torvalds 
11541da177e4SLinus Torvalds /*
11551da177e4SLinus Torvalds    Update FIB if:
11561da177e4SLinus Torvalds    - local address disappeared -> we must delete all the entries
11571da177e4SLinus Torvalds      referring to it.
11581da177e4SLinus Torvalds    - device went down -> we must shutdown all nexthops going via it.
11591da177e4SLinus Torvalds  */
11601da177e4SLinus Torvalds 
11611da177e4SLinus Torvalds int fib_sync_down(u32 local, struct net_device *dev, int force)
11621da177e4SLinus Torvalds {
11631da177e4SLinus Torvalds 	int ret = 0;
11641da177e4SLinus Torvalds 	int scope = RT_SCOPE_NOWHERE;
11651da177e4SLinus Torvalds 
11661da177e4SLinus Torvalds 	if (force)
11671da177e4SLinus Torvalds 		scope = -1;
11681da177e4SLinus Torvalds 
11691da177e4SLinus Torvalds 	if (local && fib_info_laddrhash) {
11701da177e4SLinus Torvalds 		unsigned int hash = fib_laddr_hashfn(local);
11711da177e4SLinus Torvalds 		struct hlist_head *head = &fib_info_laddrhash[hash];
11721da177e4SLinus Torvalds 		struct hlist_node *node;
11731da177e4SLinus Torvalds 		struct fib_info *fi;
11741da177e4SLinus Torvalds 
11751da177e4SLinus Torvalds 		hlist_for_each_entry(fi, node, head, fib_lhash) {
11761da177e4SLinus Torvalds 			if (fi->fib_prefsrc == local) {
11771da177e4SLinus Torvalds 				fi->fib_flags |= RTNH_F_DEAD;
11781da177e4SLinus Torvalds 				ret++;
11791da177e4SLinus Torvalds 			}
11801da177e4SLinus Torvalds 		}
11811da177e4SLinus Torvalds 	}
11821da177e4SLinus Torvalds 
11831da177e4SLinus Torvalds 	if (dev) {
11841da177e4SLinus Torvalds 		struct fib_info *prev_fi = NULL;
11851da177e4SLinus Torvalds 		unsigned int hash = fib_devindex_hashfn(dev->ifindex);
11861da177e4SLinus Torvalds 		struct hlist_head *head = &fib_info_devhash[hash];
11871da177e4SLinus Torvalds 		struct hlist_node *node;
11881da177e4SLinus Torvalds 		struct fib_nh *nh;
11891da177e4SLinus Torvalds 
11901da177e4SLinus Torvalds 		hlist_for_each_entry(nh, node, head, nh_hash) {
11911da177e4SLinus Torvalds 			struct fib_info *fi = nh->nh_parent;
11921da177e4SLinus Torvalds 			int dead;
11931da177e4SLinus Torvalds 
11941da177e4SLinus Torvalds 			BUG_ON(!fi->fib_nhs);
11951da177e4SLinus Torvalds 			if (nh->nh_dev != dev || fi == prev_fi)
11961da177e4SLinus Torvalds 				continue;
11971da177e4SLinus Torvalds 			prev_fi = fi;
11981da177e4SLinus Torvalds 			dead = 0;
11991da177e4SLinus Torvalds 			change_nexthops(fi) {
12001da177e4SLinus Torvalds 				if (nh->nh_flags&RTNH_F_DEAD)
12011da177e4SLinus Torvalds 					dead++;
12021da177e4SLinus Torvalds 				else if (nh->nh_dev == dev &&
12031da177e4SLinus Torvalds 					 nh->nh_scope != scope) {
12041da177e4SLinus Torvalds 					nh->nh_flags |= RTNH_F_DEAD;
12051da177e4SLinus Torvalds #ifdef CONFIG_IP_ROUTE_MULTIPATH
12061da177e4SLinus Torvalds 					spin_lock_bh(&fib_multipath_lock);
12071da177e4SLinus Torvalds 					fi->fib_power -= nh->nh_power;
12081da177e4SLinus Torvalds 					nh->nh_power = 0;
12091da177e4SLinus Torvalds 					spin_unlock_bh(&fib_multipath_lock);
12101da177e4SLinus Torvalds #endif
12111da177e4SLinus Torvalds 					dead++;
12121da177e4SLinus Torvalds 				}
12131da177e4SLinus Torvalds #ifdef CONFIG_IP_ROUTE_MULTIPATH
12141da177e4SLinus Torvalds 				if (force > 1 && nh->nh_dev == dev) {
12151da177e4SLinus Torvalds 					dead = fi->fib_nhs;
12161da177e4SLinus Torvalds 					break;
12171da177e4SLinus Torvalds 				}
12181da177e4SLinus Torvalds #endif
12191da177e4SLinus Torvalds 			} endfor_nexthops(fi)
12201da177e4SLinus Torvalds 			if (dead == fi->fib_nhs) {
12211da177e4SLinus Torvalds 				fi->fib_flags |= RTNH_F_DEAD;
12221da177e4SLinus Torvalds 				ret++;
12231da177e4SLinus Torvalds 			}
12241da177e4SLinus Torvalds 		}
12251da177e4SLinus Torvalds 	}
12261da177e4SLinus Torvalds 
12271da177e4SLinus Torvalds 	return ret;
12281da177e4SLinus Torvalds }
12291da177e4SLinus Torvalds 
12301da177e4SLinus Torvalds #ifdef CONFIG_IP_ROUTE_MULTIPATH
12311da177e4SLinus Torvalds 
12321da177e4SLinus Torvalds /*
12331da177e4SLinus Torvalds    Dead device goes up. We wake up dead nexthops.
12341da177e4SLinus Torvalds    It takes sense only on multipath routes.
12351da177e4SLinus Torvalds  */
12361da177e4SLinus Torvalds 
12371da177e4SLinus Torvalds int fib_sync_up(struct net_device *dev)
12381da177e4SLinus Torvalds {
12391da177e4SLinus Torvalds 	struct fib_info *prev_fi;
12401da177e4SLinus Torvalds 	unsigned int hash;
12411da177e4SLinus Torvalds 	struct hlist_head *head;
12421da177e4SLinus Torvalds 	struct hlist_node *node;
12431da177e4SLinus Torvalds 	struct fib_nh *nh;
12441da177e4SLinus Torvalds 	int ret;
12451da177e4SLinus Torvalds 
12461da177e4SLinus Torvalds 	if (!(dev->flags&IFF_UP))
12471da177e4SLinus Torvalds 		return 0;
12481da177e4SLinus Torvalds 
12491da177e4SLinus Torvalds 	prev_fi = NULL;
12501da177e4SLinus Torvalds 	hash = fib_devindex_hashfn(dev->ifindex);
12511da177e4SLinus Torvalds 	head = &fib_info_devhash[hash];
12521da177e4SLinus Torvalds 	ret = 0;
12531da177e4SLinus Torvalds 
12541da177e4SLinus Torvalds 	hlist_for_each_entry(nh, node, head, nh_hash) {
12551da177e4SLinus Torvalds 		struct fib_info *fi = nh->nh_parent;
12561da177e4SLinus Torvalds 		int alive;
12571da177e4SLinus Torvalds 
12581da177e4SLinus Torvalds 		BUG_ON(!fi->fib_nhs);
12591da177e4SLinus Torvalds 		if (nh->nh_dev != dev || fi == prev_fi)
12601da177e4SLinus Torvalds 			continue;
12611da177e4SLinus Torvalds 
12621da177e4SLinus Torvalds 		prev_fi = fi;
12631da177e4SLinus Torvalds 		alive = 0;
12641da177e4SLinus Torvalds 		change_nexthops(fi) {
12651da177e4SLinus Torvalds 			if (!(nh->nh_flags&RTNH_F_DEAD)) {
12661da177e4SLinus Torvalds 				alive++;
12671da177e4SLinus Torvalds 				continue;
12681da177e4SLinus Torvalds 			}
12691da177e4SLinus Torvalds 			if (nh->nh_dev == NULL || !(nh->nh_dev->flags&IFF_UP))
12701da177e4SLinus Torvalds 				continue;
12711da177e4SLinus Torvalds 			if (nh->nh_dev != dev || __in_dev_get(dev) == NULL)
12721da177e4SLinus Torvalds 				continue;
12731da177e4SLinus Torvalds 			alive++;
12741da177e4SLinus Torvalds 			spin_lock_bh(&fib_multipath_lock);
12751da177e4SLinus Torvalds 			nh->nh_power = 0;
12761da177e4SLinus Torvalds 			nh->nh_flags &= ~RTNH_F_DEAD;
12771da177e4SLinus Torvalds 			spin_unlock_bh(&fib_multipath_lock);
12781da177e4SLinus Torvalds 		} endfor_nexthops(fi)
12791da177e4SLinus Torvalds 
12801da177e4SLinus Torvalds 		if (alive > 0) {
12811da177e4SLinus Torvalds 			fi->fib_flags &= ~RTNH_F_DEAD;
12821da177e4SLinus Torvalds 			ret++;
12831da177e4SLinus Torvalds 		}
12841da177e4SLinus Torvalds 	}
12851da177e4SLinus Torvalds 
12861da177e4SLinus Torvalds 	return ret;
12871da177e4SLinus Torvalds }
12881da177e4SLinus Torvalds 
12891da177e4SLinus Torvalds /*
12901da177e4SLinus Torvalds    The algorithm is suboptimal, but it provides really
12911da177e4SLinus Torvalds    fair weighted route distribution.
12921da177e4SLinus Torvalds  */
12931da177e4SLinus Torvalds 
12941da177e4SLinus Torvalds void fib_select_multipath(const struct flowi *flp, struct fib_result *res)
12951da177e4SLinus Torvalds {
12961da177e4SLinus Torvalds 	struct fib_info *fi = res->fi;
12971da177e4SLinus Torvalds 	int w;
12981da177e4SLinus Torvalds 
12991da177e4SLinus Torvalds 	spin_lock_bh(&fib_multipath_lock);
13001da177e4SLinus Torvalds 	if (fi->fib_power <= 0) {
13011da177e4SLinus Torvalds 		int power = 0;
13021da177e4SLinus Torvalds 		change_nexthops(fi) {
13031da177e4SLinus Torvalds 			if (!(nh->nh_flags&RTNH_F_DEAD)) {
13041da177e4SLinus Torvalds 				power += nh->nh_weight;
13051da177e4SLinus Torvalds 				nh->nh_power = nh->nh_weight;
13061da177e4SLinus Torvalds 			}
13071da177e4SLinus Torvalds 		} endfor_nexthops(fi);
13081da177e4SLinus Torvalds 		fi->fib_power = power;
13091da177e4SLinus Torvalds 		if (power <= 0) {
13101da177e4SLinus Torvalds 			spin_unlock_bh(&fib_multipath_lock);
13111da177e4SLinus Torvalds 			/* Race condition: route has just become dead. */
13121da177e4SLinus Torvalds 			res->nh_sel = 0;
13131da177e4SLinus Torvalds 			return;
13141da177e4SLinus Torvalds 		}
13151da177e4SLinus Torvalds 	}
13161da177e4SLinus Torvalds 
13171da177e4SLinus Torvalds 
13181da177e4SLinus Torvalds 	/* w should be random number [0..fi->fib_power-1],
13191da177e4SLinus Torvalds 	   it is pretty bad approximation.
13201da177e4SLinus Torvalds 	 */
13211da177e4SLinus Torvalds 
13221da177e4SLinus Torvalds 	w = jiffies % fi->fib_power;
13231da177e4SLinus Torvalds 
13241da177e4SLinus Torvalds 	change_nexthops(fi) {
13251da177e4SLinus Torvalds 		if (!(nh->nh_flags&RTNH_F_DEAD) && nh->nh_power) {
13261da177e4SLinus Torvalds 			if ((w -= nh->nh_power) <= 0) {
13271da177e4SLinus Torvalds 				nh->nh_power--;
13281da177e4SLinus Torvalds 				fi->fib_power--;
13291da177e4SLinus Torvalds 				res->nh_sel = nhsel;
13301da177e4SLinus Torvalds 				spin_unlock_bh(&fib_multipath_lock);
13311da177e4SLinus Torvalds 				return;
13321da177e4SLinus Torvalds 			}
13331da177e4SLinus Torvalds 		}
13341da177e4SLinus Torvalds 	} endfor_nexthops(fi);
13351da177e4SLinus Torvalds 
13361da177e4SLinus Torvalds 	/* Race condition: route has just become dead. */
13371da177e4SLinus Torvalds 	res->nh_sel = 0;
13381da177e4SLinus Torvalds 	spin_unlock_bh(&fib_multipath_lock);
13391da177e4SLinus Torvalds }
13401da177e4SLinus Torvalds #endif
1341