xref: /linux/net/ipv4/fib_semantics.c (revision f24e9f586b377749dff37554696cf3a105540c94)
1 /*
2  * INET		An implementation of the TCP/IP protocol suite for the LINUX
3  *		operating system.  INET is implemented using the  BSD Socket
4  *		interface as the means of communication with the user level.
5  *
6  *		IPv4 Forwarding Information Base: semantics.
7  *
8  * Version:	$Id: fib_semantics.c,v 1.19 2002/01/12 07:54:56 davem Exp $
9  *
10  * Authors:	Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
11  *
12  *		This program is free software; you can redistribute it and/or
13  *		modify it under the terms of the GNU General Public License
14  *		as published by the Free Software Foundation; either version
15  *		2 of the License, or (at your option) any later version.
16  */
17 
18 #include <asm/uaccess.h>
19 #include <asm/system.h>
20 #include <linux/bitops.h>
21 #include <linux/types.h>
22 #include <linux/kernel.h>
23 #include <linux/jiffies.h>
24 #include <linux/mm.h>
25 #include <linux/string.h>
26 #include <linux/socket.h>
27 #include <linux/sockios.h>
28 #include <linux/errno.h>
29 #include <linux/in.h>
30 #include <linux/inet.h>
31 #include <linux/inetdevice.h>
32 #include <linux/netdevice.h>
33 #include <linux/if_arp.h>
34 #include <linux/proc_fs.h>
35 #include <linux/skbuff.h>
36 #include <linux/netlink.h>
37 #include <linux/init.h>
38 
39 #include <net/arp.h>
40 #include <net/ip.h>
41 #include <net/protocol.h>
42 #include <net/route.h>
43 #include <net/tcp.h>
44 #include <net/sock.h>
45 #include <net/ip_fib.h>
46 #include <net/ip_mp_alg.h>
47 
48 #include "fib_lookup.h"
49 
50 #define FSprintk(a...)
51 
52 static DEFINE_RWLOCK(fib_info_lock);
53 static struct hlist_head *fib_info_hash;
54 static struct hlist_head *fib_info_laddrhash;
55 static unsigned int fib_hash_size;
56 static unsigned int fib_info_cnt;
57 
58 #define DEVINDEX_HASHBITS 8
59 #define DEVINDEX_HASHSIZE (1U << DEVINDEX_HASHBITS)
60 static struct hlist_head fib_info_devhash[DEVINDEX_HASHSIZE];
61 
62 #ifdef CONFIG_IP_ROUTE_MULTIPATH
63 
64 static DEFINE_SPINLOCK(fib_multipath_lock);
65 
66 #define for_nexthops(fi) { int nhsel; const struct fib_nh * nh; \
67 for (nhsel=0, nh = (fi)->fib_nh; nhsel < (fi)->fib_nhs; nh++, nhsel++)
68 
69 #define change_nexthops(fi) { int nhsel; struct fib_nh * nh; \
70 for (nhsel=0, nh = (struct fib_nh*)((fi)->fib_nh); nhsel < (fi)->fib_nhs; nh++, nhsel++)
71 
72 #else /* CONFIG_IP_ROUTE_MULTIPATH */
73 
74 /* Hope, that gcc will optimize it to get rid of dummy loop */
75 
76 #define for_nexthops(fi) { int nhsel=0; const struct fib_nh * nh = (fi)->fib_nh; \
77 for (nhsel=0; nhsel < 1; nhsel++)
78 
79 #define change_nexthops(fi) { int nhsel=0; struct fib_nh * nh = (struct fib_nh*)((fi)->fib_nh); \
80 for (nhsel=0; nhsel < 1; nhsel++)
81 
82 #endif /* CONFIG_IP_ROUTE_MULTIPATH */
83 
84 #define endfor_nexthops(fi) }
85 
86 
87 static const struct
88 {
89 	int	error;
90 	u8	scope;
91 } fib_props[RTA_MAX + 1] = {
92         {
93 		.error	= 0,
94 		.scope	= RT_SCOPE_NOWHERE,
95 	},	/* RTN_UNSPEC */
96 	{
97 		.error	= 0,
98 		.scope	= RT_SCOPE_UNIVERSE,
99 	},	/* RTN_UNICAST */
100 	{
101 		.error	= 0,
102 		.scope	= RT_SCOPE_HOST,
103 	},	/* RTN_LOCAL */
104 	{
105 		.error	= 0,
106 		.scope	= RT_SCOPE_LINK,
107 	},	/* RTN_BROADCAST */
108 	{
109 		.error	= 0,
110 		.scope	= RT_SCOPE_LINK,
111 	},	/* RTN_ANYCAST */
112 	{
113 		.error	= 0,
114 		.scope	= RT_SCOPE_UNIVERSE,
115 	},	/* RTN_MULTICAST */
116 	{
117 		.error	= -EINVAL,
118 		.scope	= RT_SCOPE_UNIVERSE,
119 	},	/* RTN_BLACKHOLE */
120 	{
121 		.error	= -EHOSTUNREACH,
122 		.scope	= RT_SCOPE_UNIVERSE,
123 	},	/* RTN_UNREACHABLE */
124 	{
125 		.error	= -EACCES,
126 		.scope	= RT_SCOPE_UNIVERSE,
127 	},	/* RTN_PROHIBIT */
128 	{
129 		.error	= -EAGAIN,
130 		.scope	= RT_SCOPE_UNIVERSE,
131 	},	/* RTN_THROW */
132 	{
133 		.error	= -EINVAL,
134 		.scope	= RT_SCOPE_NOWHERE,
135 	},	/* RTN_NAT */
136 	{
137 		.error	= -EINVAL,
138 		.scope	= RT_SCOPE_NOWHERE,
139 	},	/* RTN_XRESOLVE */
140 };
141 
142 
143 /* Release a nexthop info record */
144 
145 void free_fib_info(struct fib_info *fi)
146 {
147 	if (fi->fib_dead == 0) {
148 		printk("Freeing alive fib_info %p\n", fi);
149 		return;
150 	}
151 	change_nexthops(fi) {
152 		if (nh->nh_dev)
153 			dev_put(nh->nh_dev);
154 		nh->nh_dev = NULL;
155 	} endfor_nexthops(fi);
156 	fib_info_cnt--;
157 	kfree(fi);
158 }
159 
160 void fib_release_info(struct fib_info *fi)
161 {
162 	write_lock_bh(&fib_info_lock);
163 	if (fi && --fi->fib_treeref == 0) {
164 		hlist_del(&fi->fib_hash);
165 		if (fi->fib_prefsrc)
166 			hlist_del(&fi->fib_lhash);
167 		change_nexthops(fi) {
168 			if (!nh->nh_dev)
169 				continue;
170 			hlist_del(&nh->nh_hash);
171 		} endfor_nexthops(fi)
172 		fi->fib_dead = 1;
173 		fib_info_put(fi);
174 	}
175 	write_unlock_bh(&fib_info_lock);
176 }
177 
178 static __inline__ int nh_comp(const struct fib_info *fi, const struct fib_info *ofi)
179 {
180 	const struct fib_nh *onh = ofi->fib_nh;
181 
182 	for_nexthops(fi) {
183 		if (nh->nh_oif != onh->nh_oif ||
184 		    nh->nh_gw  != onh->nh_gw ||
185 		    nh->nh_scope != onh->nh_scope ||
186 #ifdef CONFIG_IP_ROUTE_MULTIPATH
187 		    nh->nh_weight != onh->nh_weight ||
188 #endif
189 #ifdef CONFIG_NET_CLS_ROUTE
190 		    nh->nh_tclassid != onh->nh_tclassid ||
191 #endif
192 		    ((nh->nh_flags^onh->nh_flags)&~RTNH_F_DEAD))
193 			return -1;
194 		onh++;
195 	} endfor_nexthops(fi);
196 	return 0;
197 }
198 
199 static inline unsigned int fib_info_hashfn(const struct fib_info *fi)
200 {
201 	unsigned int mask = (fib_hash_size - 1);
202 	unsigned int val = fi->fib_nhs;
203 
204 	val ^= fi->fib_protocol;
205 	val ^= fi->fib_prefsrc;
206 	val ^= fi->fib_priority;
207 
208 	return (val ^ (val >> 7) ^ (val >> 12)) & mask;
209 }
210 
211 static struct fib_info *fib_find_info(const struct fib_info *nfi)
212 {
213 	struct hlist_head *head;
214 	struct hlist_node *node;
215 	struct fib_info *fi;
216 	unsigned int hash;
217 
218 	hash = fib_info_hashfn(nfi);
219 	head = &fib_info_hash[hash];
220 
221 	hlist_for_each_entry(fi, node, head, fib_hash) {
222 		if (fi->fib_nhs != nfi->fib_nhs)
223 			continue;
224 		if (nfi->fib_protocol == fi->fib_protocol &&
225 		    nfi->fib_prefsrc == fi->fib_prefsrc &&
226 		    nfi->fib_priority == fi->fib_priority &&
227 		    memcmp(nfi->fib_metrics, fi->fib_metrics,
228 			   sizeof(fi->fib_metrics)) == 0 &&
229 		    ((nfi->fib_flags^fi->fib_flags)&~RTNH_F_DEAD) == 0 &&
230 		    (nfi->fib_nhs == 0 || nh_comp(fi, nfi) == 0))
231 			return fi;
232 	}
233 
234 	return NULL;
235 }
236 
237 static inline unsigned int fib_devindex_hashfn(unsigned int val)
238 {
239 	unsigned int mask = DEVINDEX_HASHSIZE - 1;
240 
241 	return (val ^
242 		(val >> DEVINDEX_HASHBITS) ^
243 		(val >> (DEVINDEX_HASHBITS * 2))) & mask;
244 }
245 
246 /* Check, that the gateway is already configured.
247    Used only by redirect accept routine.
248  */
249 
250 int ip_fib_check_default(u32 gw, struct net_device *dev)
251 {
252 	struct hlist_head *head;
253 	struct hlist_node *node;
254 	struct fib_nh *nh;
255 	unsigned int hash;
256 
257 	read_lock(&fib_info_lock);
258 
259 	hash = fib_devindex_hashfn(dev->ifindex);
260 	head = &fib_info_devhash[hash];
261 	hlist_for_each_entry(nh, node, head, nh_hash) {
262 		if (nh->nh_dev == dev &&
263 		    nh->nh_gw == gw &&
264 		    !(nh->nh_flags&RTNH_F_DEAD)) {
265 			read_unlock(&fib_info_lock);
266 			return 0;
267 		}
268 	}
269 
270 	read_unlock(&fib_info_lock);
271 
272 	return -1;
273 }
274 
275 void rtmsg_fib(int event, u32 key, struct fib_alias *fa,
276 	       int z, int tb_id,
277 	       struct nlmsghdr *n, struct netlink_skb_parms *req)
278 {
279 	struct sk_buff *skb;
280 	u32 pid = req ? req->pid : n->nlmsg_pid;
281 	int size = NLMSG_SPACE(sizeof(struct rtmsg)+256);
282 
283 	skb = alloc_skb(size, GFP_KERNEL);
284 	if (!skb)
285 		return;
286 
287 	if (fib_dump_info(skb, pid, n->nlmsg_seq, event, tb_id,
288 			  fa->fa_type, fa->fa_scope, &key, z,
289 			  fa->fa_tos,
290 			  fa->fa_info, 0) < 0) {
291 		kfree_skb(skb);
292 		return;
293 	}
294 	NETLINK_CB(skb).dst_group = RTNLGRP_IPV4_ROUTE;
295 	if (n->nlmsg_flags&NLM_F_ECHO)
296 		atomic_inc(&skb->users);
297 	netlink_broadcast(rtnl, skb, pid, RTNLGRP_IPV4_ROUTE, GFP_KERNEL);
298 	if (n->nlmsg_flags&NLM_F_ECHO)
299 		netlink_unicast(rtnl, skb, pid, MSG_DONTWAIT);
300 }
301 
302 /* Return the first fib alias matching TOS with
303  * priority less than or equal to PRIO.
304  */
305 struct fib_alias *fib_find_alias(struct list_head *fah, u8 tos, u32 prio)
306 {
307 	if (fah) {
308 		struct fib_alias *fa;
309 		list_for_each_entry(fa, fah, fa_list) {
310 			if (fa->fa_tos > tos)
311 				continue;
312 			if (fa->fa_info->fib_priority >= prio ||
313 			    fa->fa_tos < tos)
314 				return fa;
315 		}
316 	}
317 	return NULL;
318 }
319 
320 int fib_detect_death(struct fib_info *fi, int order,
321 		     struct fib_info **last_resort, int *last_idx, int *dflt)
322 {
323 	struct neighbour *n;
324 	int state = NUD_NONE;
325 
326 	n = neigh_lookup(&arp_tbl, &fi->fib_nh[0].nh_gw, fi->fib_dev);
327 	if (n) {
328 		state = n->nud_state;
329 		neigh_release(n);
330 	}
331 	if (state==NUD_REACHABLE)
332 		return 0;
333 	if ((state&NUD_VALID) && order != *dflt)
334 		return 0;
335 	if ((state&NUD_VALID) ||
336 	    (*last_idx<0 && order > *dflt)) {
337 		*last_resort = fi;
338 		*last_idx = order;
339 	}
340 	return 1;
341 }
342 
343 #ifdef CONFIG_IP_ROUTE_MULTIPATH
344 
345 static u32 fib_get_attr32(struct rtattr *attr, int attrlen, int type)
346 {
347 	while (RTA_OK(attr,attrlen)) {
348 		if (attr->rta_type == type)
349 			return *(u32*)RTA_DATA(attr);
350 		attr = RTA_NEXT(attr, attrlen);
351 	}
352 	return 0;
353 }
354 
355 static int
356 fib_count_nexthops(struct rtattr *rta)
357 {
358 	int nhs = 0;
359 	struct rtnexthop *nhp = RTA_DATA(rta);
360 	int nhlen = RTA_PAYLOAD(rta);
361 
362 	while (nhlen >= (int)sizeof(struct rtnexthop)) {
363 		if ((nhlen -= nhp->rtnh_len) < 0)
364 			return 0;
365 		nhs++;
366 		nhp = RTNH_NEXT(nhp);
367 	};
368 	return nhs;
369 }
370 
371 static int
372 fib_get_nhs(struct fib_info *fi, const struct rtattr *rta, const struct rtmsg *r)
373 {
374 	struct rtnexthop *nhp = RTA_DATA(rta);
375 	int nhlen = RTA_PAYLOAD(rta);
376 
377 	change_nexthops(fi) {
378 		int attrlen = nhlen - sizeof(struct rtnexthop);
379 		if (attrlen < 0 || (nhlen -= nhp->rtnh_len) < 0)
380 			return -EINVAL;
381 		nh->nh_flags = (r->rtm_flags&~0xFF) | nhp->rtnh_flags;
382 		nh->nh_oif = nhp->rtnh_ifindex;
383 		nh->nh_weight = nhp->rtnh_hops + 1;
384 		if (attrlen) {
385 			nh->nh_gw = fib_get_attr32(RTNH_DATA(nhp), attrlen, RTA_GATEWAY);
386 #ifdef CONFIG_NET_CLS_ROUTE
387 			nh->nh_tclassid = fib_get_attr32(RTNH_DATA(nhp), attrlen, RTA_FLOW);
388 #endif
389 		}
390 		nhp = RTNH_NEXT(nhp);
391 	} endfor_nexthops(fi);
392 	return 0;
393 }
394 
395 #endif
396 
397 int fib_nh_match(struct rtmsg *r, struct nlmsghdr *nlh, struct kern_rta *rta,
398 		 struct fib_info *fi)
399 {
400 #ifdef CONFIG_IP_ROUTE_MULTIPATH
401 	struct rtnexthop *nhp;
402 	int nhlen;
403 #endif
404 
405 	if (rta->rta_priority &&
406 	    *rta->rta_priority != fi->fib_priority)
407 		return 1;
408 
409 	if (rta->rta_oif || rta->rta_gw) {
410 		if ((!rta->rta_oif || *rta->rta_oif == fi->fib_nh->nh_oif) &&
411 		    (!rta->rta_gw  || memcmp(rta->rta_gw, &fi->fib_nh->nh_gw, 4) == 0))
412 			return 0;
413 		return 1;
414 	}
415 
416 #ifdef CONFIG_IP_ROUTE_MULTIPATH
417 	if (rta->rta_mp == NULL)
418 		return 0;
419 	nhp = RTA_DATA(rta->rta_mp);
420 	nhlen = RTA_PAYLOAD(rta->rta_mp);
421 
422 	for_nexthops(fi) {
423 		int attrlen = nhlen - sizeof(struct rtnexthop);
424 		u32 gw;
425 
426 		if (attrlen < 0 || (nhlen -= nhp->rtnh_len) < 0)
427 			return -EINVAL;
428 		if (nhp->rtnh_ifindex && nhp->rtnh_ifindex != nh->nh_oif)
429 			return 1;
430 		if (attrlen) {
431 			gw = fib_get_attr32(RTNH_DATA(nhp), attrlen, RTA_GATEWAY);
432 			if (gw && gw != nh->nh_gw)
433 				return 1;
434 #ifdef CONFIG_NET_CLS_ROUTE
435 			gw = fib_get_attr32(RTNH_DATA(nhp), attrlen, RTA_FLOW);
436 			if (gw && gw != nh->nh_tclassid)
437 				return 1;
438 #endif
439 		}
440 		nhp = RTNH_NEXT(nhp);
441 	} endfor_nexthops(fi);
442 #endif
443 	return 0;
444 }
445 
446 
447 /*
448    Picture
449    -------
450 
451    Semantics of nexthop is very messy by historical reasons.
452    We have to take into account, that:
453    a) gateway can be actually local interface address,
454       so that gatewayed route is direct.
455    b) gateway must be on-link address, possibly
456       described not by an ifaddr, but also by a direct route.
457    c) If both gateway and interface are specified, they should not
458       contradict.
459    d) If we use tunnel routes, gateway could be not on-link.
460 
461    Attempt to reconcile all of these (alas, self-contradictory) conditions
462    results in pretty ugly and hairy code with obscure logic.
463 
464    I chose to generalized it instead, so that the size
465    of code does not increase practically, but it becomes
466    much more general.
467    Every prefix is assigned a "scope" value: "host" is local address,
468    "link" is direct route,
469    [ ... "site" ... "interior" ... ]
470    and "universe" is true gateway route with global meaning.
471 
472    Every prefix refers to a set of "nexthop"s (gw, oif),
473    where gw must have narrower scope. This recursion stops
474    when gw has LOCAL scope or if "nexthop" is declared ONLINK,
475    which means that gw is forced to be on link.
476 
477    Code is still hairy, but now it is apparently logically
478    consistent and very flexible. F.e. as by-product it allows
479    to co-exists in peace independent exterior and interior
480    routing processes.
481 
482    Normally it looks as following.
483 
484    {universe prefix}  -> (gw, oif) [scope link]
485                           |
486 			  |-> {link prefix} -> (gw, oif) [scope local]
487 			                        |
488 						|-> {local prefix} (terminal node)
489  */
490 
491 static int fib_check_nh(const struct rtmsg *r, struct fib_info *fi, struct fib_nh *nh)
492 {
493 	int err;
494 
495 	if (nh->nh_gw) {
496 		struct fib_result res;
497 
498 #ifdef CONFIG_IP_ROUTE_PERVASIVE
499 		if (nh->nh_flags&RTNH_F_PERVASIVE)
500 			return 0;
501 #endif
502 		if (nh->nh_flags&RTNH_F_ONLINK) {
503 			struct net_device *dev;
504 
505 			if (r->rtm_scope >= RT_SCOPE_LINK)
506 				return -EINVAL;
507 			if (inet_addr_type(nh->nh_gw) != RTN_UNICAST)
508 				return -EINVAL;
509 			if ((dev = __dev_get_by_index(nh->nh_oif)) == NULL)
510 				return -ENODEV;
511 			if (!(dev->flags&IFF_UP))
512 				return -ENETDOWN;
513 			nh->nh_dev = dev;
514 			dev_hold(dev);
515 			nh->nh_scope = RT_SCOPE_LINK;
516 			return 0;
517 		}
518 		{
519 			struct flowi fl = { .nl_u = { .ip4_u =
520 						      { .daddr = nh->nh_gw,
521 							.scope = r->rtm_scope + 1 } },
522 					    .oif = nh->nh_oif };
523 
524 			/* It is not necessary, but requires a bit of thinking */
525 			if (fl.fl4_scope < RT_SCOPE_LINK)
526 				fl.fl4_scope = RT_SCOPE_LINK;
527 			if ((err = fib_lookup(&fl, &res)) != 0)
528 				return err;
529 		}
530 		err = -EINVAL;
531 		if (res.type != RTN_UNICAST && res.type != RTN_LOCAL)
532 			goto out;
533 		nh->nh_scope = res.scope;
534 		nh->nh_oif = FIB_RES_OIF(res);
535 		if ((nh->nh_dev = FIB_RES_DEV(res)) == NULL)
536 			goto out;
537 		dev_hold(nh->nh_dev);
538 		err = -ENETDOWN;
539 		if (!(nh->nh_dev->flags & IFF_UP))
540 			goto out;
541 		err = 0;
542 out:
543 		fib_res_put(&res);
544 		return err;
545 	} else {
546 		struct in_device *in_dev;
547 
548 		if (nh->nh_flags&(RTNH_F_PERVASIVE|RTNH_F_ONLINK))
549 			return -EINVAL;
550 
551 		in_dev = inetdev_by_index(nh->nh_oif);
552 		if (in_dev == NULL)
553 			return -ENODEV;
554 		if (!(in_dev->dev->flags&IFF_UP)) {
555 			in_dev_put(in_dev);
556 			return -ENETDOWN;
557 		}
558 		nh->nh_dev = in_dev->dev;
559 		dev_hold(nh->nh_dev);
560 		nh->nh_scope = RT_SCOPE_HOST;
561 		in_dev_put(in_dev);
562 	}
563 	return 0;
564 }
565 
566 static inline unsigned int fib_laddr_hashfn(u32 val)
567 {
568 	unsigned int mask = (fib_hash_size - 1);
569 
570 	return (val ^ (val >> 7) ^ (val >> 14)) & mask;
571 }
572 
573 static struct hlist_head *fib_hash_alloc(int bytes)
574 {
575 	if (bytes <= PAGE_SIZE)
576 		return kmalloc(bytes, GFP_KERNEL);
577 	else
578 		return (struct hlist_head *)
579 			__get_free_pages(GFP_KERNEL, get_order(bytes));
580 }
581 
582 static void fib_hash_free(struct hlist_head *hash, int bytes)
583 {
584 	if (!hash)
585 		return;
586 
587 	if (bytes <= PAGE_SIZE)
588 		kfree(hash);
589 	else
590 		free_pages((unsigned long) hash, get_order(bytes));
591 }
592 
593 static void fib_hash_move(struct hlist_head *new_info_hash,
594 			  struct hlist_head *new_laddrhash,
595 			  unsigned int new_size)
596 {
597 	struct hlist_head *old_info_hash, *old_laddrhash;
598 	unsigned int old_size = fib_hash_size;
599 	unsigned int i, bytes;
600 
601 	write_lock_bh(&fib_info_lock);
602 	old_info_hash = fib_info_hash;
603 	old_laddrhash = fib_info_laddrhash;
604 	fib_hash_size = new_size;
605 
606 	for (i = 0; i < old_size; i++) {
607 		struct hlist_head *head = &fib_info_hash[i];
608 		struct hlist_node *node, *n;
609 		struct fib_info *fi;
610 
611 		hlist_for_each_entry_safe(fi, node, n, head, fib_hash) {
612 			struct hlist_head *dest;
613 			unsigned int new_hash;
614 
615 			hlist_del(&fi->fib_hash);
616 
617 			new_hash = fib_info_hashfn(fi);
618 			dest = &new_info_hash[new_hash];
619 			hlist_add_head(&fi->fib_hash, dest);
620 		}
621 	}
622 	fib_info_hash = new_info_hash;
623 
624 	for (i = 0; i < old_size; i++) {
625 		struct hlist_head *lhead = &fib_info_laddrhash[i];
626 		struct hlist_node *node, *n;
627 		struct fib_info *fi;
628 
629 		hlist_for_each_entry_safe(fi, node, n, lhead, fib_lhash) {
630 			struct hlist_head *ldest;
631 			unsigned int new_hash;
632 
633 			hlist_del(&fi->fib_lhash);
634 
635 			new_hash = fib_laddr_hashfn(fi->fib_prefsrc);
636 			ldest = &new_laddrhash[new_hash];
637 			hlist_add_head(&fi->fib_lhash, ldest);
638 		}
639 	}
640 	fib_info_laddrhash = new_laddrhash;
641 
642 	write_unlock_bh(&fib_info_lock);
643 
644 	bytes = old_size * sizeof(struct hlist_head *);
645 	fib_hash_free(old_info_hash, bytes);
646 	fib_hash_free(old_laddrhash, bytes);
647 }
648 
649 struct fib_info *
650 fib_create_info(const struct rtmsg *r, struct kern_rta *rta,
651 		const struct nlmsghdr *nlh, int *errp)
652 {
653 	int err;
654 	struct fib_info *fi = NULL;
655 	struct fib_info *ofi;
656 #ifdef CONFIG_IP_ROUTE_MULTIPATH
657 	int nhs = 1;
658 #else
659 	const int nhs = 1;
660 #endif
661 #ifdef CONFIG_IP_ROUTE_MULTIPATH_CACHED
662 	u32 mp_alg = IP_MP_ALG_NONE;
663 #endif
664 
665 	/* Fast check to catch the most weird cases */
666 	if (fib_props[r->rtm_type].scope > r->rtm_scope)
667 		goto err_inval;
668 
669 #ifdef CONFIG_IP_ROUTE_MULTIPATH
670 	if (rta->rta_mp) {
671 		nhs = fib_count_nexthops(rta->rta_mp);
672 		if (nhs == 0)
673 			goto err_inval;
674 	}
675 #endif
676 #ifdef CONFIG_IP_ROUTE_MULTIPATH_CACHED
677 	if (rta->rta_mp_alg) {
678 		mp_alg = *rta->rta_mp_alg;
679 
680 		if (mp_alg < IP_MP_ALG_NONE ||
681 		    mp_alg > IP_MP_ALG_MAX)
682 			goto err_inval;
683 	}
684 #endif
685 
686 	err = -ENOBUFS;
687 	if (fib_info_cnt >= fib_hash_size) {
688 		unsigned int new_size = fib_hash_size << 1;
689 		struct hlist_head *new_info_hash;
690 		struct hlist_head *new_laddrhash;
691 		unsigned int bytes;
692 
693 		if (!new_size)
694 			new_size = 1;
695 		bytes = new_size * sizeof(struct hlist_head *);
696 		new_info_hash = fib_hash_alloc(bytes);
697 		new_laddrhash = fib_hash_alloc(bytes);
698 		if (!new_info_hash || !new_laddrhash) {
699 			fib_hash_free(new_info_hash, bytes);
700 			fib_hash_free(new_laddrhash, bytes);
701 		} else {
702 			memset(new_info_hash, 0, bytes);
703 			memset(new_laddrhash, 0, bytes);
704 
705 			fib_hash_move(new_info_hash, new_laddrhash, new_size);
706 		}
707 
708 		if (!fib_hash_size)
709 			goto failure;
710 	}
711 
712 	fi = kzalloc(sizeof(*fi)+nhs*sizeof(struct fib_nh), GFP_KERNEL);
713 	if (fi == NULL)
714 		goto failure;
715 	fib_info_cnt++;
716 
717 	fi->fib_protocol = r->rtm_protocol;
718 
719 	fi->fib_nhs = nhs;
720 	change_nexthops(fi) {
721 		nh->nh_parent = fi;
722 	} endfor_nexthops(fi)
723 
724 	fi->fib_flags = r->rtm_flags;
725 	if (rta->rta_priority)
726 		fi->fib_priority = *rta->rta_priority;
727 	if (rta->rta_mx) {
728 		int attrlen = RTA_PAYLOAD(rta->rta_mx);
729 		struct rtattr *attr = RTA_DATA(rta->rta_mx);
730 
731 		while (RTA_OK(attr, attrlen)) {
732 			unsigned flavor = attr->rta_type;
733 			if (flavor) {
734 				if (flavor > RTAX_MAX)
735 					goto err_inval;
736 				fi->fib_metrics[flavor-1] = *(unsigned*)RTA_DATA(attr);
737 			}
738 			attr = RTA_NEXT(attr, attrlen);
739 		}
740 	}
741 	if (rta->rta_prefsrc)
742 		memcpy(&fi->fib_prefsrc, rta->rta_prefsrc, 4);
743 
744 	if (rta->rta_mp) {
745 #ifdef CONFIG_IP_ROUTE_MULTIPATH
746 		if ((err = fib_get_nhs(fi, rta->rta_mp, r)) != 0)
747 			goto failure;
748 		if (rta->rta_oif && fi->fib_nh->nh_oif != *rta->rta_oif)
749 			goto err_inval;
750 		if (rta->rta_gw && memcmp(&fi->fib_nh->nh_gw, rta->rta_gw, 4))
751 			goto err_inval;
752 #ifdef CONFIG_NET_CLS_ROUTE
753 		if (rta->rta_flow && memcmp(&fi->fib_nh->nh_tclassid, rta->rta_flow, 4))
754 			goto err_inval;
755 #endif
756 #else
757 		goto err_inval;
758 #endif
759 	} else {
760 		struct fib_nh *nh = fi->fib_nh;
761 		if (rta->rta_oif)
762 			nh->nh_oif = *rta->rta_oif;
763 		if (rta->rta_gw)
764 			memcpy(&nh->nh_gw, rta->rta_gw, 4);
765 #ifdef CONFIG_NET_CLS_ROUTE
766 		if (rta->rta_flow)
767 			memcpy(&nh->nh_tclassid, rta->rta_flow, 4);
768 #endif
769 		nh->nh_flags = r->rtm_flags;
770 #ifdef CONFIG_IP_ROUTE_MULTIPATH
771 		nh->nh_weight = 1;
772 #endif
773 	}
774 
775 #ifdef CONFIG_IP_ROUTE_MULTIPATH_CACHED
776 	fi->fib_mp_alg = mp_alg;
777 #endif
778 
779 	if (fib_props[r->rtm_type].error) {
780 		if (rta->rta_gw || rta->rta_oif || rta->rta_mp)
781 			goto err_inval;
782 		goto link_it;
783 	}
784 
785 	if (r->rtm_scope > RT_SCOPE_HOST)
786 		goto err_inval;
787 
788 	if (r->rtm_scope == RT_SCOPE_HOST) {
789 		struct fib_nh *nh = fi->fib_nh;
790 
791 		/* Local address is added. */
792 		if (nhs != 1 || nh->nh_gw)
793 			goto err_inval;
794 		nh->nh_scope = RT_SCOPE_NOWHERE;
795 		nh->nh_dev = dev_get_by_index(fi->fib_nh->nh_oif);
796 		err = -ENODEV;
797 		if (nh->nh_dev == NULL)
798 			goto failure;
799 	} else {
800 		change_nexthops(fi) {
801 			if ((err = fib_check_nh(r, fi, nh)) != 0)
802 				goto failure;
803 		} endfor_nexthops(fi)
804 	}
805 
806 	if (fi->fib_prefsrc) {
807 		if (r->rtm_type != RTN_LOCAL || rta->rta_dst == NULL ||
808 		    memcmp(&fi->fib_prefsrc, rta->rta_dst, 4))
809 			if (inet_addr_type(fi->fib_prefsrc) != RTN_LOCAL)
810 				goto err_inval;
811 	}
812 
813 link_it:
814 	if ((ofi = fib_find_info(fi)) != NULL) {
815 		fi->fib_dead = 1;
816 		free_fib_info(fi);
817 		ofi->fib_treeref++;
818 		return ofi;
819 	}
820 
821 	fi->fib_treeref++;
822 	atomic_inc(&fi->fib_clntref);
823 	write_lock_bh(&fib_info_lock);
824 	hlist_add_head(&fi->fib_hash,
825 		       &fib_info_hash[fib_info_hashfn(fi)]);
826 	if (fi->fib_prefsrc) {
827 		struct hlist_head *head;
828 
829 		head = &fib_info_laddrhash[fib_laddr_hashfn(fi->fib_prefsrc)];
830 		hlist_add_head(&fi->fib_lhash, head);
831 	}
832 	change_nexthops(fi) {
833 		struct hlist_head *head;
834 		unsigned int hash;
835 
836 		if (!nh->nh_dev)
837 			continue;
838 		hash = fib_devindex_hashfn(nh->nh_dev->ifindex);
839 		head = &fib_info_devhash[hash];
840 		hlist_add_head(&nh->nh_hash, head);
841 	} endfor_nexthops(fi)
842 	write_unlock_bh(&fib_info_lock);
843 	return fi;
844 
845 err_inval:
846 	err = -EINVAL;
847 
848 failure:
849         *errp = err;
850         if (fi) {
851 		fi->fib_dead = 1;
852 		free_fib_info(fi);
853 	}
854 	return NULL;
855 }
856 
857 /* Note! fib_semantic_match intentionally uses  RCU list functions. */
858 int fib_semantic_match(struct list_head *head, const struct flowi *flp,
859 		       struct fib_result *res, __u32 zone, __u32 mask,
860 			int prefixlen)
861 {
862 	struct fib_alias *fa;
863 	int nh_sel = 0;
864 
865 	list_for_each_entry_rcu(fa, head, fa_list) {
866 		int err;
867 
868 		if (fa->fa_tos &&
869 		    fa->fa_tos != flp->fl4_tos)
870 			continue;
871 
872 		if (fa->fa_scope < flp->fl4_scope)
873 			continue;
874 
875 		fa->fa_state |= FA_S_ACCESSED;
876 
877 		err = fib_props[fa->fa_type].error;
878 		if (err == 0) {
879 			struct fib_info *fi = fa->fa_info;
880 
881 			if (fi->fib_flags & RTNH_F_DEAD)
882 				continue;
883 
884 			switch (fa->fa_type) {
885 			case RTN_UNICAST:
886 			case RTN_LOCAL:
887 			case RTN_BROADCAST:
888 			case RTN_ANYCAST:
889 			case RTN_MULTICAST:
890 				for_nexthops(fi) {
891 					if (nh->nh_flags&RTNH_F_DEAD)
892 						continue;
893 					if (!flp->oif || flp->oif == nh->nh_oif)
894 						break;
895 				}
896 #ifdef CONFIG_IP_ROUTE_MULTIPATH
897 				if (nhsel < fi->fib_nhs) {
898 					nh_sel = nhsel;
899 					goto out_fill_res;
900 				}
901 #else
902 				if (nhsel < 1) {
903 					goto out_fill_res;
904 				}
905 #endif
906 				endfor_nexthops(fi);
907 				continue;
908 
909 			default:
910 				printk(KERN_DEBUG "impossible 102\n");
911 				return -EINVAL;
912 			};
913 		}
914 		return err;
915 	}
916 	return 1;
917 
918 out_fill_res:
919 	res->prefixlen = prefixlen;
920 	res->nh_sel = nh_sel;
921 	res->type = fa->fa_type;
922 	res->scope = fa->fa_scope;
923 	res->fi = fa->fa_info;
924 #ifdef CONFIG_IP_ROUTE_MULTIPATH_CACHED
925 	res->netmask = mask;
926 	res->network = zone &
927 		(0xFFFFFFFF >> (32 - prefixlen));
928 #endif
929 	atomic_inc(&res->fi->fib_clntref);
930 	return 0;
931 }
932 
933 /* Find appropriate source address to this destination */
934 
935 u32 __fib_res_prefsrc(struct fib_result *res)
936 {
937 	return inet_select_addr(FIB_RES_DEV(*res), FIB_RES_GW(*res), res->scope);
938 }
939 
940 int
941 fib_dump_info(struct sk_buff *skb, u32 pid, u32 seq, int event,
942 	      u8 tb_id, u8 type, u8 scope, void *dst, int dst_len, u8 tos,
943 	      struct fib_info *fi, unsigned int flags)
944 {
945 	struct rtmsg *rtm;
946 	struct nlmsghdr  *nlh;
947 	unsigned char	 *b = skb->tail;
948 
949 	nlh = NLMSG_NEW(skb, pid, seq, event, sizeof(*rtm), flags);
950 	rtm = NLMSG_DATA(nlh);
951 	rtm->rtm_family = AF_INET;
952 	rtm->rtm_dst_len = dst_len;
953 	rtm->rtm_src_len = 0;
954 	rtm->rtm_tos = tos;
955 	rtm->rtm_table = tb_id;
956 	rtm->rtm_type = type;
957 	rtm->rtm_flags = fi->fib_flags;
958 	rtm->rtm_scope = scope;
959 	if (rtm->rtm_dst_len)
960 		RTA_PUT(skb, RTA_DST, 4, dst);
961 	rtm->rtm_protocol = fi->fib_protocol;
962 	if (fi->fib_priority)
963 		RTA_PUT(skb, RTA_PRIORITY, 4, &fi->fib_priority);
964 	if (rtnetlink_put_metrics(skb, fi->fib_metrics) < 0)
965 		goto rtattr_failure;
966 	if (fi->fib_prefsrc)
967 		RTA_PUT(skb, RTA_PREFSRC, 4, &fi->fib_prefsrc);
968 	if (fi->fib_nhs == 1) {
969 		if (fi->fib_nh->nh_gw)
970 			RTA_PUT(skb, RTA_GATEWAY, 4, &fi->fib_nh->nh_gw);
971 		if (fi->fib_nh->nh_oif)
972 			RTA_PUT(skb, RTA_OIF, sizeof(int), &fi->fib_nh->nh_oif);
973 #ifdef CONFIG_NET_CLS_ROUTE
974 		if (fi->fib_nh[0].nh_tclassid)
975 			RTA_PUT(skb, RTA_FLOW, 4, &fi->fib_nh[0].nh_tclassid);
976 #endif
977 	}
978 #ifdef CONFIG_IP_ROUTE_MULTIPATH
979 	if (fi->fib_nhs > 1) {
980 		struct rtnexthop *nhp;
981 		struct rtattr *mp_head;
982 		if (skb_tailroom(skb) <= RTA_SPACE(0))
983 			goto rtattr_failure;
984 		mp_head = (struct rtattr*)skb_put(skb, RTA_SPACE(0));
985 
986 		for_nexthops(fi) {
987 			if (skb_tailroom(skb) < RTA_ALIGN(RTA_ALIGN(sizeof(*nhp)) + 4))
988 				goto rtattr_failure;
989 			nhp = (struct rtnexthop*)skb_put(skb, RTA_ALIGN(sizeof(*nhp)));
990 			nhp->rtnh_flags = nh->nh_flags & 0xFF;
991 			nhp->rtnh_hops = nh->nh_weight-1;
992 			nhp->rtnh_ifindex = nh->nh_oif;
993 			if (nh->nh_gw)
994 				RTA_PUT(skb, RTA_GATEWAY, 4, &nh->nh_gw);
995 #ifdef CONFIG_NET_CLS_ROUTE
996 			if (nh->nh_tclassid)
997 				RTA_PUT(skb, RTA_FLOW, 4, &nh->nh_tclassid);
998 #endif
999 			nhp->rtnh_len = skb->tail - (unsigned char*)nhp;
1000 		} endfor_nexthops(fi);
1001 		mp_head->rta_type = RTA_MULTIPATH;
1002 		mp_head->rta_len = skb->tail - (u8*)mp_head;
1003 	}
1004 #endif
1005 	nlh->nlmsg_len = skb->tail - b;
1006 	return skb->len;
1007 
1008 nlmsg_failure:
1009 rtattr_failure:
1010 	skb_trim(skb, b - skb->data);
1011 	return -1;
1012 }
1013 
1014 #ifndef CONFIG_IP_NOSIOCRT
1015 
1016 int
1017 fib_convert_rtentry(int cmd, struct nlmsghdr *nl, struct rtmsg *rtm,
1018 		    struct kern_rta *rta, struct rtentry *r)
1019 {
1020 	int    plen;
1021 	u32    *ptr;
1022 
1023 	memset(rtm, 0, sizeof(*rtm));
1024 	memset(rta, 0, sizeof(*rta));
1025 
1026 	if (r->rt_dst.sa_family != AF_INET)
1027 		return -EAFNOSUPPORT;
1028 
1029 	/* Check mask for validity:
1030 	   a) it must be contiguous.
1031 	   b) destination must have all host bits clear.
1032 	   c) if application forgot to set correct family (AF_INET),
1033 	      reject request unless it is absolutely clear i.e.
1034 	      both family and mask are zero.
1035 	 */
1036 	plen = 32;
1037 	ptr = &((struct sockaddr_in*)&r->rt_dst)->sin_addr.s_addr;
1038 	if (!(r->rt_flags&RTF_HOST)) {
1039 		u32 mask = ((struct sockaddr_in*)&r->rt_genmask)->sin_addr.s_addr;
1040 		if (r->rt_genmask.sa_family != AF_INET) {
1041 			if (mask || r->rt_genmask.sa_family)
1042 				return -EAFNOSUPPORT;
1043 		}
1044 		if (bad_mask(mask, *ptr))
1045 			return -EINVAL;
1046 		plen = inet_mask_len(mask);
1047 	}
1048 
1049 	nl->nlmsg_flags = NLM_F_REQUEST;
1050 	nl->nlmsg_pid = 0;
1051 	nl->nlmsg_seq = 0;
1052 	nl->nlmsg_len = NLMSG_LENGTH(sizeof(*rtm));
1053 	if (cmd == SIOCDELRT) {
1054 		nl->nlmsg_type = RTM_DELROUTE;
1055 		nl->nlmsg_flags = 0;
1056 	} else {
1057 		nl->nlmsg_type = RTM_NEWROUTE;
1058 		nl->nlmsg_flags = NLM_F_REQUEST|NLM_F_CREATE;
1059 		rtm->rtm_protocol = RTPROT_BOOT;
1060 	}
1061 
1062 	rtm->rtm_dst_len = plen;
1063 	rta->rta_dst = ptr;
1064 
1065 	if (r->rt_metric) {
1066 		*(u32*)&r->rt_pad3 = r->rt_metric - 1;
1067 		rta->rta_priority = (u32*)&r->rt_pad3;
1068 	}
1069 	if (r->rt_flags&RTF_REJECT) {
1070 		rtm->rtm_scope = RT_SCOPE_HOST;
1071 		rtm->rtm_type = RTN_UNREACHABLE;
1072 		return 0;
1073 	}
1074 	rtm->rtm_scope = RT_SCOPE_NOWHERE;
1075 	rtm->rtm_type = RTN_UNICAST;
1076 
1077 	if (r->rt_dev) {
1078 		char *colon;
1079 		struct net_device *dev;
1080 		char   devname[IFNAMSIZ];
1081 
1082 		if (copy_from_user(devname, r->rt_dev, IFNAMSIZ-1))
1083 			return -EFAULT;
1084 		devname[IFNAMSIZ-1] = 0;
1085 		colon = strchr(devname, ':');
1086 		if (colon)
1087 			*colon = 0;
1088 		dev = __dev_get_by_name(devname);
1089 		if (!dev)
1090 			return -ENODEV;
1091 		rta->rta_oif = &dev->ifindex;
1092 		if (colon) {
1093 			struct in_ifaddr *ifa;
1094 			struct in_device *in_dev = __in_dev_get_rtnl(dev);
1095 			if (!in_dev)
1096 				return -ENODEV;
1097 			*colon = ':';
1098 			for (ifa = in_dev->ifa_list; ifa; ifa = ifa->ifa_next)
1099 				if (strcmp(ifa->ifa_label, devname) == 0)
1100 					break;
1101 			if (ifa == NULL)
1102 				return -ENODEV;
1103 			rta->rta_prefsrc = &ifa->ifa_local;
1104 		}
1105 	}
1106 
1107 	ptr = &((struct sockaddr_in*)&r->rt_gateway)->sin_addr.s_addr;
1108 	if (r->rt_gateway.sa_family == AF_INET && *ptr) {
1109 		rta->rta_gw = ptr;
1110 		if (r->rt_flags&RTF_GATEWAY && inet_addr_type(*ptr) == RTN_UNICAST)
1111 			rtm->rtm_scope = RT_SCOPE_UNIVERSE;
1112 	}
1113 
1114 	if (cmd == SIOCDELRT)
1115 		return 0;
1116 
1117 	if (r->rt_flags&RTF_GATEWAY && rta->rta_gw == NULL)
1118 		return -EINVAL;
1119 
1120 	if (rtm->rtm_scope == RT_SCOPE_NOWHERE)
1121 		rtm->rtm_scope = RT_SCOPE_LINK;
1122 
1123 	if (r->rt_flags&(RTF_MTU|RTF_WINDOW|RTF_IRTT)) {
1124 		struct rtattr *rec;
1125 		struct rtattr *mx = kmalloc(RTA_LENGTH(3*RTA_LENGTH(4)), GFP_KERNEL);
1126 		if (mx == NULL)
1127 			return -ENOMEM;
1128 		rta->rta_mx = mx;
1129 		mx->rta_type = RTA_METRICS;
1130 		mx->rta_len  = RTA_LENGTH(0);
1131 		if (r->rt_flags&RTF_MTU) {
1132 			rec = (void*)((char*)mx + RTA_ALIGN(mx->rta_len));
1133 			rec->rta_type = RTAX_ADVMSS;
1134 			rec->rta_len = RTA_LENGTH(4);
1135 			mx->rta_len += RTA_LENGTH(4);
1136 			*(u32*)RTA_DATA(rec) = r->rt_mtu - 40;
1137 		}
1138 		if (r->rt_flags&RTF_WINDOW) {
1139 			rec = (void*)((char*)mx + RTA_ALIGN(mx->rta_len));
1140 			rec->rta_type = RTAX_WINDOW;
1141 			rec->rta_len = RTA_LENGTH(4);
1142 			mx->rta_len += RTA_LENGTH(4);
1143 			*(u32*)RTA_DATA(rec) = r->rt_window;
1144 		}
1145 		if (r->rt_flags&RTF_IRTT) {
1146 			rec = (void*)((char*)mx + RTA_ALIGN(mx->rta_len));
1147 			rec->rta_type = RTAX_RTT;
1148 			rec->rta_len = RTA_LENGTH(4);
1149 			mx->rta_len += RTA_LENGTH(4);
1150 			*(u32*)RTA_DATA(rec) = r->rt_irtt<<3;
1151 		}
1152 	}
1153 	return 0;
1154 }
1155 
1156 #endif
1157 
1158 /*
1159    Update FIB if:
1160    - local address disappeared -> we must delete all the entries
1161      referring to it.
1162    - device went down -> we must shutdown all nexthops going via it.
1163  */
1164 
1165 int fib_sync_down(u32 local, struct net_device *dev, int force)
1166 {
1167 	int ret = 0;
1168 	int scope = RT_SCOPE_NOWHERE;
1169 
1170 	if (force)
1171 		scope = -1;
1172 
1173 	if (local && fib_info_laddrhash) {
1174 		unsigned int hash = fib_laddr_hashfn(local);
1175 		struct hlist_head *head = &fib_info_laddrhash[hash];
1176 		struct hlist_node *node;
1177 		struct fib_info *fi;
1178 
1179 		hlist_for_each_entry(fi, node, head, fib_lhash) {
1180 			if (fi->fib_prefsrc == local) {
1181 				fi->fib_flags |= RTNH_F_DEAD;
1182 				ret++;
1183 			}
1184 		}
1185 	}
1186 
1187 	if (dev) {
1188 		struct fib_info *prev_fi = NULL;
1189 		unsigned int hash = fib_devindex_hashfn(dev->ifindex);
1190 		struct hlist_head *head = &fib_info_devhash[hash];
1191 		struct hlist_node *node;
1192 		struct fib_nh *nh;
1193 
1194 		hlist_for_each_entry(nh, node, head, nh_hash) {
1195 			struct fib_info *fi = nh->nh_parent;
1196 			int dead;
1197 
1198 			BUG_ON(!fi->fib_nhs);
1199 			if (nh->nh_dev != dev || fi == prev_fi)
1200 				continue;
1201 			prev_fi = fi;
1202 			dead = 0;
1203 			change_nexthops(fi) {
1204 				if (nh->nh_flags&RTNH_F_DEAD)
1205 					dead++;
1206 				else if (nh->nh_dev == dev &&
1207 					 nh->nh_scope != scope) {
1208 					nh->nh_flags |= RTNH_F_DEAD;
1209 #ifdef CONFIG_IP_ROUTE_MULTIPATH
1210 					spin_lock_bh(&fib_multipath_lock);
1211 					fi->fib_power -= nh->nh_power;
1212 					nh->nh_power = 0;
1213 					spin_unlock_bh(&fib_multipath_lock);
1214 #endif
1215 					dead++;
1216 				}
1217 #ifdef CONFIG_IP_ROUTE_MULTIPATH
1218 				if (force > 1 && nh->nh_dev == dev) {
1219 					dead = fi->fib_nhs;
1220 					break;
1221 				}
1222 #endif
1223 			} endfor_nexthops(fi)
1224 			if (dead == fi->fib_nhs) {
1225 				fi->fib_flags |= RTNH_F_DEAD;
1226 				ret++;
1227 			}
1228 		}
1229 	}
1230 
1231 	return ret;
1232 }
1233 
1234 #ifdef CONFIG_IP_ROUTE_MULTIPATH
1235 
1236 /*
1237    Dead device goes up. We wake up dead nexthops.
1238    It takes sense only on multipath routes.
1239  */
1240 
1241 int fib_sync_up(struct net_device *dev)
1242 {
1243 	struct fib_info *prev_fi;
1244 	unsigned int hash;
1245 	struct hlist_head *head;
1246 	struct hlist_node *node;
1247 	struct fib_nh *nh;
1248 	int ret;
1249 
1250 	if (!(dev->flags&IFF_UP))
1251 		return 0;
1252 
1253 	prev_fi = NULL;
1254 	hash = fib_devindex_hashfn(dev->ifindex);
1255 	head = &fib_info_devhash[hash];
1256 	ret = 0;
1257 
1258 	hlist_for_each_entry(nh, node, head, nh_hash) {
1259 		struct fib_info *fi = nh->nh_parent;
1260 		int alive;
1261 
1262 		BUG_ON(!fi->fib_nhs);
1263 		if (nh->nh_dev != dev || fi == prev_fi)
1264 			continue;
1265 
1266 		prev_fi = fi;
1267 		alive = 0;
1268 		change_nexthops(fi) {
1269 			if (!(nh->nh_flags&RTNH_F_DEAD)) {
1270 				alive++;
1271 				continue;
1272 			}
1273 			if (nh->nh_dev == NULL || !(nh->nh_dev->flags&IFF_UP))
1274 				continue;
1275 			if (nh->nh_dev != dev || !__in_dev_get_rtnl(dev))
1276 				continue;
1277 			alive++;
1278 			spin_lock_bh(&fib_multipath_lock);
1279 			nh->nh_power = 0;
1280 			nh->nh_flags &= ~RTNH_F_DEAD;
1281 			spin_unlock_bh(&fib_multipath_lock);
1282 		} endfor_nexthops(fi)
1283 
1284 		if (alive > 0) {
1285 			fi->fib_flags &= ~RTNH_F_DEAD;
1286 			ret++;
1287 		}
1288 	}
1289 
1290 	return ret;
1291 }
1292 
1293 /*
1294    The algorithm is suboptimal, but it provides really
1295    fair weighted route distribution.
1296  */
1297 
1298 void fib_select_multipath(const struct flowi *flp, struct fib_result *res)
1299 {
1300 	struct fib_info *fi = res->fi;
1301 	int w;
1302 
1303 	spin_lock_bh(&fib_multipath_lock);
1304 	if (fi->fib_power <= 0) {
1305 		int power = 0;
1306 		change_nexthops(fi) {
1307 			if (!(nh->nh_flags&RTNH_F_DEAD)) {
1308 				power += nh->nh_weight;
1309 				nh->nh_power = nh->nh_weight;
1310 			}
1311 		} endfor_nexthops(fi);
1312 		fi->fib_power = power;
1313 		if (power <= 0) {
1314 			spin_unlock_bh(&fib_multipath_lock);
1315 			/* Race condition: route has just become dead. */
1316 			res->nh_sel = 0;
1317 			return;
1318 		}
1319 	}
1320 
1321 
1322 	/* w should be random number [0..fi->fib_power-1],
1323 	   it is pretty bad approximation.
1324 	 */
1325 
1326 	w = jiffies % fi->fib_power;
1327 
1328 	change_nexthops(fi) {
1329 		if (!(nh->nh_flags&RTNH_F_DEAD) && nh->nh_power) {
1330 			if ((w -= nh->nh_power) <= 0) {
1331 				nh->nh_power--;
1332 				fi->fib_power--;
1333 				res->nh_sel = nhsel;
1334 				spin_unlock_bh(&fib_multipath_lock);
1335 				return;
1336 			}
1337 		}
1338 	} endfor_nexthops(fi);
1339 
1340 	/* Race condition: route has just become dead. */
1341 	res->nh_sel = 0;
1342 	spin_unlock_bh(&fib_multipath_lock);
1343 }
1344 #endif
1345