xref: /linux/net/ipv4/fib_semantics.c (revision 60b2737de1b1ddfdb90f3ba622634eb49d6f3603)
1 /*
2  * INET		An implementation of the TCP/IP protocol suite for the LINUX
3  *		operating system.  INET is implemented using the  BSD Socket
4  *		interface as the means of communication with the user level.
5  *
6  *		IPv4 Forwarding Information Base: semantics.
7  *
8  * Version:	$Id: fib_semantics.c,v 1.19 2002/01/12 07:54:56 davem Exp $
9  *
10  * Authors:	Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
11  *
12  *		This program is free software; you can redistribute it and/or
13  *		modify it under the terms of the GNU General Public License
14  *		as published by the Free Software Foundation; either version
15  *		2 of the License, or (at your option) any later version.
16  */
17 
18 #include <linux/config.h>
19 #include <asm/uaccess.h>
20 #include <asm/system.h>
21 #include <linux/bitops.h>
22 #include <linux/types.h>
23 #include <linux/kernel.h>
24 #include <linux/jiffies.h>
25 #include <linux/mm.h>
26 #include <linux/string.h>
27 #include <linux/socket.h>
28 #include <linux/sockios.h>
29 #include <linux/errno.h>
30 #include <linux/in.h>
31 #include <linux/inet.h>
32 #include <linux/netdevice.h>
33 #include <linux/if_arp.h>
34 #include <linux/proc_fs.h>
35 #include <linux/skbuff.h>
36 #include <linux/netlink.h>
37 #include <linux/init.h>
38 
39 #include <net/ip.h>
40 #include <net/protocol.h>
41 #include <net/route.h>
42 #include <net/tcp.h>
43 #include <net/sock.h>
44 #include <net/ip_fib.h>
45 #include <net/ip_mp_alg.h>
46 
47 #include "fib_lookup.h"
48 
49 #define FSprintk(a...)
50 
51 static DEFINE_RWLOCK(fib_info_lock);
52 static struct hlist_head *fib_info_hash;
53 static struct hlist_head *fib_info_laddrhash;
54 static unsigned int fib_hash_size;
55 static unsigned int fib_info_cnt;
56 
57 #define DEVINDEX_HASHBITS 8
58 #define DEVINDEX_HASHSIZE (1U << DEVINDEX_HASHBITS)
59 static struct hlist_head fib_info_devhash[DEVINDEX_HASHSIZE];
60 
61 #ifdef CONFIG_IP_ROUTE_MULTIPATH
62 
63 static DEFINE_SPINLOCK(fib_multipath_lock);
64 
65 #define for_nexthops(fi) { int nhsel; const struct fib_nh * nh; \
66 for (nhsel=0, nh = (fi)->fib_nh; nhsel < (fi)->fib_nhs; nh++, nhsel++)
67 
68 #define change_nexthops(fi) { int nhsel; struct fib_nh * nh; \
69 for (nhsel=0, nh = (struct fib_nh*)((fi)->fib_nh); nhsel < (fi)->fib_nhs; nh++, nhsel++)
70 
71 #else /* CONFIG_IP_ROUTE_MULTIPATH */
72 
73 /* Hope, that gcc will optimize it to get rid of dummy loop */
74 
75 #define for_nexthops(fi) { int nhsel=0; const struct fib_nh * nh = (fi)->fib_nh; \
76 for (nhsel=0; nhsel < 1; nhsel++)
77 
78 #define change_nexthops(fi) { int nhsel=0; struct fib_nh * nh = (struct fib_nh*)((fi)->fib_nh); \
79 for (nhsel=0; nhsel < 1; nhsel++)
80 
81 #endif /* CONFIG_IP_ROUTE_MULTIPATH */
82 
83 #define endfor_nexthops(fi) }
84 
85 
86 static struct
87 {
88 	int	error;
89 	u8	scope;
90 } fib_props[RTA_MAX + 1] = {
91         {
92 		.error	= 0,
93 		.scope	= RT_SCOPE_NOWHERE,
94 	},	/* RTN_UNSPEC */
95 	{
96 		.error	= 0,
97 		.scope	= RT_SCOPE_UNIVERSE,
98 	},	/* RTN_UNICAST */
99 	{
100 		.error	= 0,
101 		.scope	= RT_SCOPE_HOST,
102 	},	/* RTN_LOCAL */
103 	{
104 		.error	= 0,
105 		.scope	= RT_SCOPE_LINK,
106 	},	/* RTN_BROADCAST */
107 	{
108 		.error	= 0,
109 		.scope	= RT_SCOPE_LINK,
110 	},	/* RTN_ANYCAST */
111 	{
112 		.error	= 0,
113 		.scope	= RT_SCOPE_UNIVERSE,
114 	},	/* RTN_MULTICAST */
115 	{
116 		.error	= -EINVAL,
117 		.scope	= RT_SCOPE_UNIVERSE,
118 	},	/* RTN_BLACKHOLE */
119 	{
120 		.error	= -EHOSTUNREACH,
121 		.scope	= RT_SCOPE_UNIVERSE,
122 	},	/* RTN_UNREACHABLE */
123 	{
124 		.error	= -EACCES,
125 		.scope	= RT_SCOPE_UNIVERSE,
126 	},	/* RTN_PROHIBIT */
127 	{
128 		.error	= -EAGAIN,
129 		.scope	= RT_SCOPE_UNIVERSE,
130 	},	/* RTN_THROW */
131 	{
132 		.error	= -EINVAL,
133 		.scope	= RT_SCOPE_NOWHERE,
134 	},	/* RTN_NAT */
135 	{
136 		.error	= -EINVAL,
137 		.scope	= RT_SCOPE_NOWHERE,
138 	},	/* RTN_XRESOLVE */
139 };
140 
141 
142 /* Release a nexthop info record */
143 
144 void free_fib_info(struct fib_info *fi)
145 {
146 	if (fi->fib_dead == 0) {
147 		printk("Freeing alive fib_info %p\n", fi);
148 		return;
149 	}
150 	change_nexthops(fi) {
151 		if (nh->nh_dev)
152 			dev_put(nh->nh_dev);
153 		nh->nh_dev = NULL;
154 	} endfor_nexthops(fi);
155 	fib_info_cnt--;
156 	kfree(fi);
157 }
158 
159 void fib_release_info(struct fib_info *fi)
160 {
161 	write_lock(&fib_info_lock);
162 	if (fi && --fi->fib_treeref == 0) {
163 		hlist_del(&fi->fib_hash);
164 		if (fi->fib_prefsrc)
165 			hlist_del(&fi->fib_lhash);
166 		change_nexthops(fi) {
167 			if (!nh->nh_dev)
168 				continue;
169 			hlist_del(&nh->nh_hash);
170 		} endfor_nexthops(fi)
171 		fi->fib_dead = 1;
172 		fib_info_put(fi);
173 	}
174 	write_unlock(&fib_info_lock);
175 }
176 
177 static __inline__ int nh_comp(const struct fib_info *fi, const struct fib_info *ofi)
178 {
179 	const struct fib_nh *onh = ofi->fib_nh;
180 
181 	for_nexthops(fi) {
182 		if (nh->nh_oif != onh->nh_oif ||
183 		    nh->nh_gw  != onh->nh_gw ||
184 		    nh->nh_scope != onh->nh_scope ||
185 #ifdef CONFIG_IP_ROUTE_MULTIPATH
186 		    nh->nh_weight != onh->nh_weight ||
187 #endif
188 #ifdef CONFIG_NET_CLS_ROUTE
189 		    nh->nh_tclassid != onh->nh_tclassid ||
190 #endif
191 		    ((nh->nh_flags^onh->nh_flags)&~RTNH_F_DEAD))
192 			return -1;
193 		onh++;
194 	} endfor_nexthops(fi);
195 	return 0;
196 }
197 
198 static inline unsigned int fib_info_hashfn(const struct fib_info *fi)
199 {
200 	unsigned int mask = (fib_hash_size - 1);
201 	unsigned int val = fi->fib_nhs;
202 
203 	val ^= fi->fib_protocol;
204 	val ^= fi->fib_prefsrc;
205 	val ^= fi->fib_priority;
206 
207 	return (val ^ (val >> 7) ^ (val >> 12)) & mask;
208 }
209 
210 static struct fib_info *fib_find_info(const struct fib_info *nfi)
211 {
212 	struct hlist_head *head;
213 	struct hlist_node *node;
214 	struct fib_info *fi;
215 	unsigned int hash;
216 
217 	hash = fib_info_hashfn(nfi);
218 	head = &fib_info_hash[hash];
219 
220 	hlist_for_each_entry(fi, node, head, fib_hash) {
221 		if (fi->fib_nhs != nfi->fib_nhs)
222 			continue;
223 		if (nfi->fib_protocol == fi->fib_protocol &&
224 		    nfi->fib_prefsrc == fi->fib_prefsrc &&
225 		    nfi->fib_priority == fi->fib_priority &&
226 		    memcmp(nfi->fib_metrics, fi->fib_metrics,
227 			   sizeof(fi->fib_metrics)) == 0 &&
228 		    ((nfi->fib_flags^fi->fib_flags)&~RTNH_F_DEAD) == 0 &&
229 		    (nfi->fib_nhs == 0 || nh_comp(fi, nfi) == 0))
230 			return fi;
231 	}
232 
233 	return NULL;
234 }
235 
236 static inline unsigned int fib_devindex_hashfn(unsigned int val)
237 {
238 	unsigned int mask = DEVINDEX_HASHSIZE - 1;
239 
240 	return (val ^
241 		(val >> DEVINDEX_HASHBITS) ^
242 		(val >> (DEVINDEX_HASHBITS * 2))) & mask;
243 }
244 
245 /* Check, that the gateway is already configured.
246    Used only by redirect accept routine.
247  */
248 
249 int ip_fib_check_default(u32 gw, struct net_device *dev)
250 {
251 	struct hlist_head *head;
252 	struct hlist_node *node;
253 	struct fib_nh *nh;
254 	unsigned int hash;
255 
256 	read_lock(&fib_info_lock);
257 
258 	hash = fib_devindex_hashfn(dev->ifindex);
259 	head = &fib_info_devhash[hash];
260 	hlist_for_each_entry(nh, node, head, nh_hash) {
261 		if (nh->nh_dev == dev &&
262 		    nh->nh_gw == gw &&
263 		    !(nh->nh_flags&RTNH_F_DEAD)) {
264 			read_unlock(&fib_info_lock);
265 			return 0;
266 		}
267 	}
268 
269 	read_unlock(&fib_info_lock);
270 
271 	return -1;
272 }
273 
274 void rtmsg_fib(int event, u32 key, struct fib_alias *fa,
275 	       int z, int tb_id,
276 	       struct nlmsghdr *n, struct netlink_skb_parms *req)
277 {
278 	struct sk_buff *skb;
279 	u32 pid = req ? req->pid : n->nlmsg_pid;
280 	int size = NLMSG_SPACE(sizeof(struct rtmsg)+256);
281 
282 	skb = alloc_skb(size, GFP_KERNEL);
283 	if (!skb)
284 		return;
285 
286 	if (fib_dump_info(skb, pid, n->nlmsg_seq, event, tb_id,
287 			  fa->fa_type, fa->fa_scope, &key, z,
288 			  fa->fa_tos,
289 			  fa->fa_info, 0) < 0) {
290 		kfree_skb(skb);
291 		return;
292 	}
293 	NETLINK_CB(skb).dst_groups = RTMGRP_IPV4_ROUTE;
294 	if (n->nlmsg_flags&NLM_F_ECHO)
295 		atomic_inc(&skb->users);
296 	netlink_broadcast(rtnl, skb, pid, RTMGRP_IPV4_ROUTE, GFP_KERNEL);
297 	if (n->nlmsg_flags&NLM_F_ECHO)
298 		netlink_unicast(rtnl, skb, pid, MSG_DONTWAIT);
299 }
300 
301 /* Return the first fib alias matching TOS with
302  * priority less than or equal to PRIO.
303  */
304 struct fib_alias *fib_find_alias(struct list_head *fah, u8 tos, u32 prio)
305 {
306 	if (fah) {
307 		struct fib_alias *fa;
308 		list_for_each_entry(fa, fah, fa_list) {
309 			if (fa->fa_tos > tos)
310 				continue;
311 			if (fa->fa_info->fib_priority >= prio ||
312 			    fa->fa_tos < tos)
313 				return fa;
314 		}
315 	}
316 	return NULL;
317 }
318 
319 int fib_detect_death(struct fib_info *fi, int order,
320 		     struct fib_info **last_resort, int *last_idx, int *dflt)
321 {
322 	struct neighbour *n;
323 	int state = NUD_NONE;
324 
325 	n = neigh_lookup(&arp_tbl, &fi->fib_nh[0].nh_gw, fi->fib_dev);
326 	if (n) {
327 		state = n->nud_state;
328 		neigh_release(n);
329 	}
330 	if (state==NUD_REACHABLE)
331 		return 0;
332 	if ((state&NUD_VALID) && order != *dflt)
333 		return 0;
334 	if ((state&NUD_VALID) ||
335 	    (*last_idx<0 && order > *dflt)) {
336 		*last_resort = fi;
337 		*last_idx = order;
338 	}
339 	return 1;
340 }
341 
342 #ifdef CONFIG_IP_ROUTE_MULTIPATH
343 
344 static u32 fib_get_attr32(struct rtattr *attr, int attrlen, int type)
345 {
346 	while (RTA_OK(attr,attrlen)) {
347 		if (attr->rta_type == type)
348 			return *(u32*)RTA_DATA(attr);
349 		attr = RTA_NEXT(attr, attrlen);
350 	}
351 	return 0;
352 }
353 
354 static int
355 fib_count_nexthops(struct rtattr *rta)
356 {
357 	int nhs = 0;
358 	struct rtnexthop *nhp = RTA_DATA(rta);
359 	int nhlen = RTA_PAYLOAD(rta);
360 
361 	while (nhlen >= (int)sizeof(struct rtnexthop)) {
362 		if ((nhlen -= nhp->rtnh_len) < 0)
363 			return 0;
364 		nhs++;
365 		nhp = RTNH_NEXT(nhp);
366 	};
367 	return nhs;
368 }
369 
370 static int
371 fib_get_nhs(struct fib_info *fi, const struct rtattr *rta, const struct rtmsg *r)
372 {
373 	struct rtnexthop *nhp = RTA_DATA(rta);
374 	int nhlen = RTA_PAYLOAD(rta);
375 
376 	change_nexthops(fi) {
377 		int attrlen = nhlen - sizeof(struct rtnexthop);
378 		if (attrlen < 0 || (nhlen -= nhp->rtnh_len) < 0)
379 			return -EINVAL;
380 		nh->nh_flags = (r->rtm_flags&~0xFF) | nhp->rtnh_flags;
381 		nh->nh_oif = nhp->rtnh_ifindex;
382 		nh->nh_weight = nhp->rtnh_hops + 1;
383 		if (attrlen) {
384 			nh->nh_gw = fib_get_attr32(RTNH_DATA(nhp), attrlen, RTA_GATEWAY);
385 #ifdef CONFIG_NET_CLS_ROUTE
386 			nh->nh_tclassid = fib_get_attr32(RTNH_DATA(nhp), attrlen, RTA_FLOW);
387 #endif
388 		}
389 		nhp = RTNH_NEXT(nhp);
390 	} endfor_nexthops(fi);
391 	return 0;
392 }
393 
394 #endif
395 
396 int fib_nh_match(struct rtmsg *r, struct nlmsghdr *nlh, struct kern_rta *rta,
397 		 struct fib_info *fi)
398 {
399 #ifdef CONFIG_IP_ROUTE_MULTIPATH
400 	struct rtnexthop *nhp;
401 	int nhlen;
402 #endif
403 
404 	if (rta->rta_priority &&
405 	    *rta->rta_priority != fi->fib_priority)
406 		return 1;
407 
408 	if (rta->rta_oif || rta->rta_gw) {
409 		if ((!rta->rta_oif || *rta->rta_oif == fi->fib_nh->nh_oif) &&
410 		    (!rta->rta_gw  || memcmp(rta->rta_gw, &fi->fib_nh->nh_gw, 4) == 0))
411 			return 0;
412 		return 1;
413 	}
414 
415 #ifdef CONFIG_IP_ROUTE_MULTIPATH
416 	if (rta->rta_mp == NULL)
417 		return 0;
418 	nhp = RTA_DATA(rta->rta_mp);
419 	nhlen = RTA_PAYLOAD(rta->rta_mp);
420 
421 	for_nexthops(fi) {
422 		int attrlen = nhlen - sizeof(struct rtnexthop);
423 		u32 gw;
424 
425 		if (attrlen < 0 || (nhlen -= nhp->rtnh_len) < 0)
426 			return -EINVAL;
427 		if (nhp->rtnh_ifindex && nhp->rtnh_ifindex != nh->nh_oif)
428 			return 1;
429 		if (attrlen) {
430 			gw = fib_get_attr32(RTNH_DATA(nhp), attrlen, RTA_GATEWAY);
431 			if (gw && gw != nh->nh_gw)
432 				return 1;
433 #ifdef CONFIG_NET_CLS_ROUTE
434 			gw = fib_get_attr32(RTNH_DATA(nhp), attrlen, RTA_FLOW);
435 			if (gw && gw != nh->nh_tclassid)
436 				return 1;
437 #endif
438 		}
439 		nhp = RTNH_NEXT(nhp);
440 	} endfor_nexthops(fi);
441 #endif
442 	return 0;
443 }
444 
445 
446 /*
447    Picture
448    -------
449 
450    Semantics of nexthop is very messy by historical reasons.
451    We have to take into account, that:
452    a) gateway can be actually local interface address,
453       so that gatewayed route is direct.
454    b) gateway must be on-link address, possibly
455       described not by an ifaddr, but also by a direct route.
456    c) If both gateway and interface are specified, they should not
457       contradict.
458    d) If we use tunnel routes, gateway could be not on-link.
459 
460    Attempt to reconcile all of these (alas, self-contradictory) conditions
461    results in pretty ugly and hairy code with obscure logic.
462 
463    I chose to generalized it instead, so that the size
464    of code does not increase practically, but it becomes
465    much more general.
466    Every prefix is assigned a "scope" value: "host" is local address,
467    "link" is direct route,
468    [ ... "site" ... "interior" ... ]
469    and "universe" is true gateway route with global meaning.
470 
471    Every prefix refers to a set of "nexthop"s (gw, oif),
472    where gw must have narrower scope. This recursion stops
473    when gw has LOCAL scope or if "nexthop" is declared ONLINK,
474    which means that gw is forced to be on link.
475 
476    Code is still hairy, but now it is apparently logically
477    consistent and very flexible. F.e. as by-product it allows
478    to co-exists in peace independent exterior and interior
479    routing processes.
480 
481    Normally it looks as following.
482 
483    {universe prefix}  -> (gw, oif) [scope link]
484                           |
485 			  |-> {link prefix} -> (gw, oif) [scope local]
486 			                        |
487 						|-> {local prefix} (terminal node)
488  */
489 
490 static int fib_check_nh(const struct rtmsg *r, struct fib_info *fi, struct fib_nh *nh)
491 {
492 	int err;
493 
494 	if (nh->nh_gw) {
495 		struct fib_result res;
496 
497 #ifdef CONFIG_IP_ROUTE_PERVASIVE
498 		if (nh->nh_flags&RTNH_F_PERVASIVE)
499 			return 0;
500 #endif
501 		if (nh->nh_flags&RTNH_F_ONLINK) {
502 			struct net_device *dev;
503 
504 			if (r->rtm_scope >= RT_SCOPE_LINK)
505 				return -EINVAL;
506 			if (inet_addr_type(nh->nh_gw) != RTN_UNICAST)
507 				return -EINVAL;
508 			if ((dev = __dev_get_by_index(nh->nh_oif)) == NULL)
509 				return -ENODEV;
510 			if (!(dev->flags&IFF_UP))
511 				return -ENETDOWN;
512 			nh->nh_dev = dev;
513 			dev_hold(dev);
514 			nh->nh_scope = RT_SCOPE_LINK;
515 			return 0;
516 		}
517 		{
518 			struct flowi fl = { .nl_u = { .ip4_u =
519 						      { .daddr = nh->nh_gw,
520 							.scope = r->rtm_scope + 1 } },
521 					    .oif = nh->nh_oif };
522 
523 			/* It is not necessary, but requires a bit of thinking */
524 			if (fl.fl4_scope < RT_SCOPE_LINK)
525 				fl.fl4_scope = RT_SCOPE_LINK;
526 			if ((err = fib_lookup(&fl, &res)) != 0)
527 				return err;
528 		}
529 		err = -EINVAL;
530 		if (res.type != RTN_UNICAST && res.type != RTN_LOCAL)
531 			goto out;
532 		nh->nh_scope = res.scope;
533 		nh->nh_oif = FIB_RES_OIF(res);
534 		if ((nh->nh_dev = FIB_RES_DEV(res)) == NULL)
535 			goto out;
536 		dev_hold(nh->nh_dev);
537 		err = -ENETDOWN;
538 		if (!(nh->nh_dev->flags & IFF_UP))
539 			goto out;
540 		err = 0;
541 out:
542 		fib_res_put(&res);
543 		return err;
544 	} else {
545 		struct in_device *in_dev;
546 
547 		if (nh->nh_flags&(RTNH_F_PERVASIVE|RTNH_F_ONLINK))
548 			return -EINVAL;
549 
550 		in_dev = inetdev_by_index(nh->nh_oif);
551 		if (in_dev == NULL)
552 			return -ENODEV;
553 		if (!(in_dev->dev->flags&IFF_UP)) {
554 			in_dev_put(in_dev);
555 			return -ENETDOWN;
556 		}
557 		nh->nh_dev = in_dev->dev;
558 		dev_hold(nh->nh_dev);
559 		nh->nh_scope = RT_SCOPE_HOST;
560 		in_dev_put(in_dev);
561 	}
562 	return 0;
563 }
564 
565 static inline unsigned int fib_laddr_hashfn(u32 val)
566 {
567 	unsigned int mask = (fib_hash_size - 1);
568 
569 	return (val ^ (val >> 7) ^ (val >> 14)) & mask;
570 }
571 
572 static struct hlist_head *fib_hash_alloc(int bytes)
573 {
574 	if (bytes <= PAGE_SIZE)
575 		return kmalloc(bytes, GFP_KERNEL);
576 	else
577 		return (struct hlist_head *)
578 			__get_free_pages(GFP_KERNEL, get_order(bytes));
579 }
580 
581 static void fib_hash_free(struct hlist_head *hash, int bytes)
582 {
583 	if (!hash)
584 		return;
585 
586 	if (bytes <= PAGE_SIZE)
587 		kfree(hash);
588 	else
589 		free_pages((unsigned long) hash, get_order(bytes));
590 }
591 
592 static void fib_hash_move(struct hlist_head *new_info_hash,
593 			  struct hlist_head *new_laddrhash,
594 			  unsigned int new_size)
595 {
596 	unsigned int old_size = fib_hash_size;
597 	unsigned int i;
598 
599 	write_lock(&fib_info_lock);
600 	fib_hash_size = new_size;
601 
602 	for (i = 0; i < old_size; i++) {
603 		struct hlist_head *head = &fib_info_hash[i];
604 		struct hlist_node *node, *n;
605 		struct fib_info *fi;
606 
607 		hlist_for_each_entry_safe(fi, node, n, head, fib_hash) {
608 			struct hlist_head *dest;
609 			unsigned int new_hash;
610 
611 			hlist_del(&fi->fib_hash);
612 
613 			new_hash = fib_info_hashfn(fi);
614 			dest = &new_info_hash[new_hash];
615 			hlist_add_head(&fi->fib_hash, dest);
616 		}
617 	}
618 	fib_info_hash = new_info_hash;
619 
620 	for (i = 0; i < old_size; i++) {
621 		struct hlist_head *lhead = &fib_info_laddrhash[i];
622 		struct hlist_node *node, *n;
623 		struct fib_info *fi;
624 
625 		hlist_for_each_entry_safe(fi, node, n, lhead, fib_lhash) {
626 			struct hlist_head *ldest;
627 			unsigned int new_hash;
628 
629 			hlist_del(&fi->fib_lhash);
630 
631 			new_hash = fib_laddr_hashfn(fi->fib_prefsrc);
632 			ldest = &new_laddrhash[new_hash];
633 			hlist_add_head(&fi->fib_lhash, ldest);
634 		}
635 	}
636 	fib_info_laddrhash = new_laddrhash;
637 
638 	write_unlock(&fib_info_lock);
639 }
640 
641 struct fib_info *
642 fib_create_info(const struct rtmsg *r, struct kern_rta *rta,
643 		const struct nlmsghdr *nlh, int *errp)
644 {
645 	int err;
646 	struct fib_info *fi = NULL;
647 	struct fib_info *ofi;
648 #ifdef CONFIG_IP_ROUTE_MULTIPATH
649 	int nhs = 1;
650 #else
651 	const int nhs = 1;
652 #endif
653 #ifdef CONFIG_IP_ROUTE_MULTIPATH_CACHED
654 	u32 mp_alg = IP_MP_ALG_NONE;
655 #endif
656 
657 	/* Fast check to catch the most weird cases */
658 	if (fib_props[r->rtm_type].scope > r->rtm_scope)
659 		goto err_inval;
660 
661 #ifdef CONFIG_IP_ROUTE_MULTIPATH
662 	if (rta->rta_mp) {
663 		nhs = fib_count_nexthops(rta->rta_mp);
664 		if (nhs == 0)
665 			goto err_inval;
666 	}
667 #endif
668 #ifdef CONFIG_IP_ROUTE_MULTIPATH_CACHED
669 	if (rta->rta_mp_alg) {
670 		mp_alg = *rta->rta_mp_alg;
671 
672 		if (mp_alg < IP_MP_ALG_NONE ||
673 		    mp_alg > IP_MP_ALG_MAX)
674 			goto err_inval;
675 	}
676 #endif
677 
678 	err = -ENOBUFS;
679 	if (fib_info_cnt >= fib_hash_size) {
680 		unsigned int new_size = fib_hash_size << 1;
681 		struct hlist_head *new_info_hash;
682 		struct hlist_head *new_laddrhash;
683 		unsigned int bytes;
684 
685 		if (!new_size)
686 			new_size = 1;
687 		bytes = new_size * sizeof(struct hlist_head *);
688 		new_info_hash = fib_hash_alloc(bytes);
689 		new_laddrhash = fib_hash_alloc(bytes);
690 		if (!new_info_hash || !new_laddrhash) {
691 			fib_hash_free(new_info_hash, bytes);
692 			fib_hash_free(new_laddrhash, bytes);
693 		} else {
694 			memset(new_info_hash, 0, bytes);
695 			memset(new_laddrhash, 0, bytes);
696 
697 			fib_hash_move(new_info_hash, new_laddrhash, new_size);
698 		}
699 
700 		if (!fib_hash_size)
701 			goto failure;
702 	}
703 
704 	fi = kmalloc(sizeof(*fi)+nhs*sizeof(struct fib_nh), GFP_KERNEL);
705 	if (fi == NULL)
706 		goto failure;
707 	fib_info_cnt++;
708 	memset(fi, 0, sizeof(*fi)+nhs*sizeof(struct fib_nh));
709 
710 	fi->fib_protocol = r->rtm_protocol;
711 
712 	fi->fib_nhs = nhs;
713 	change_nexthops(fi) {
714 		nh->nh_parent = fi;
715 	} endfor_nexthops(fi)
716 
717 	fi->fib_flags = r->rtm_flags;
718 	if (rta->rta_priority)
719 		fi->fib_priority = *rta->rta_priority;
720 	if (rta->rta_mx) {
721 		int attrlen = RTA_PAYLOAD(rta->rta_mx);
722 		struct rtattr *attr = RTA_DATA(rta->rta_mx);
723 
724 		while (RTA_OK(attr, attrlen)) {
725 			unsigned flavor = attr->rta_type;
726 			if (flavor) {
727 				if (flavor > RTAX_MAX)
728 					goto err_inval;
729 				fi->fib_metrics[flavor-1] = *(unsigned*)RTA_DATA(attr);
730 			}
731 			attr = RTA_NEXT(attr, attrlen);
732 		}
733 	}
734 	if (rta->rta_prefsrc)
735 		memcpy(&fi->fib_prefsrc, rta->rta_prefsrc, 4);
736 
737 	if (rta->rta_mp) {
738 #ifdef CONFIG_IP_ROUTE_MULTIPATH
739 		if ((err = fib_get_nhs(fi, rta->rta_mp, r)) != 0)
740 			goto failure;
741 		if (rta->rta_oif && fi->fib_nh->nh_oif != *rta->rta_oif)
742 			goto err_inval;
743 		if (rta->rta_gw && memcmp(&fi->fib_nh->nh_gw, rta->rta_gw, 4))
744 			goto err_inval;
745 #ifdef CONFIG_NET_CLS_ROUTE
746 		if (rta->rta_flow && memcmp(&fi->fib_nh->nh_tclassid, rta->rta_flow, 4))
747 			goto err_inval;
748 #endif
749 #else
750 		goto err_inval;
751 #endif
752 	} else {
753 		struct fib_nh *nh = fi->fib_nh;
754 		if (rta->rta_oif)
755 			nh->nh_oif = *rta->rta_oif;
756 		if (rta->rta_gw)
757 			memcpy(&nh->nh_gw, rta->rta_gw, 4);
758 #ifdef CONFIG_NET_CLS_ROUTE
759 		if (rta->rta_flow)
760 			memcpy(&nh->nh_tclassid, rta->rta_flow, 4);
761 #endif
762 		nh->nh_flags = r->rtm_flags;
763 #ifdef CONFIG_IP_ROUTE_MULTIPATH
764 		nh->nh_weight = 1;
765 #endif
766 	}
767 
768 #ifdef CONFIG_IP_ROUTE_MULTIPATH_CACHED
769 	fi->fib_mp_alg = mp_alg;
770 #endif
771 
772 	if (fib_props[r->rtm_type].error) {
773 		if (rta->rta_gw || rta->rta_oif || rta->rta_mp)
774 			goto err_inval;
775 		goto link_it;
776 	}
777 
778 	if (r->rtm_scope > RT_SCOPE_HOST)
779 		goto err_inval;
780 
781 	if (r->rtm_scope == RT_SCOPE_HOST) {
782 		struct fib_nh *nh = fi->fib_nh;
783 
784 		/* Local address is added. */
785 		if (nhs != 1 || nh->nh_gw)
786 			goto err_inval;
787 		nh->nh_scope = RT_SCOPE_NOWHERE;
788 		nh->nh_dev = dev_get_by_index(fi->fib_nh->nh_oif);
789 		err = -ENODEV;
790 		if (nh->nh_dev == NULL)
791 			goto failure;
792 	} else {
793 		change_nexthops(fi) {
794 			if ((err = fib_check_nh(r, fi, nh)) != 0)
795 				goto failure;
796 		} endfor_nexthops(fi)
797 	}
798 
799 	if (fi->fib_prefsrc) {
800 		if (r->rtm_type != RTN_LOCAL || rta->rta_dst == NULL ||
801 		    memcmp(&fi->fib_prefsrc, rta->rta_dst, 4))
802 			if (inet_addr_type(fi->fib_prefsrc) != RTN_LOCAL)
803 				goto err_inval;
804 	}
805 
806 link_it:
807 	if ((ofi = fib_find_info(fi)) != NULL) {
808 		fi->fib_dead = 1;
809 		free_fib_info(fi);
810 		ofi->fib_treeref++;
811 		return ofi;
812 	}
813 
814 	fi->fib_treeref++;
815 	atomic_inc(&fi->fib_clntref);
816 	write_lock(&fib_info_lock);
817 	hlist_add_head(&fi->fib_hash,
818 		       &fib_info_hash[fib_info_hashfn(fi)]);
819 	if (fi->fib_prefsrc) {
820 		struct hlist_head *head;
821 
822 		head = &fib_info_laddrhash[fib_laddr_hashfn(fi->fib_prefsrc)];
823 		hlist_add_head(&fi->fib_lhash, head);
824 	}
825 	change_nexthops(fi) {
826 		struct hlist_head *head;
827 		unsigned int hash;
828 
829 		if (!nh->nh_dev)
830 			continue;
831 		hash = fib_devindex_hashfn(nh->nh_dev->ifindex);
832 		head = &fib_info_devhash[hash];
833 		hlist_add_head(&nh->nh_hash, head);
834 	} endfor_nexthops(fi)
835 	write_unlock(&fib_info_lock);
836 	return fi;
837 
838 err_inval:
839 	err = -EINVAL;
840 
841 failure:
842         *errp = err;
843         if (fi) {
844 		fi->fib_dead = 1;
845 		free_fib_info(fi);
846 	}
847 	return NULL;
848 }
849 
850 int fib_semantic_match(struct list_head *head, const struct flowi *flp,
851 		       struct fib_result *res, __u32 zone, __u32 mask,
852 			int prefixlen)
853 {
854 	struct fib_alias *fa;
855 	int nh_sel = 0;
856 
857 	list_for_each_entry(fa, head, fa_list) {
858 		int err;
859 
860 		if (fa->fa_tos &&
861 		    fa->fa_tos != flp->fl4_tos)
862 			continue;
863 
864 		if (fa->fa_scope < flp->fl4_scope)
865 			continue;
866 
867 		fa->fa_state |= FA_S_ACCESSED;
868 
869 		err = fib_props[fa->fa_type].error;
870 		if (err == 0) {
871 			struct fib_info *fi = fa->fa_info;
872 
873 			if (fi->fib_flags & RTNH_F_DEAD)
874 				continue;
875 
876 			switch (fa->fa_type) {
877 			case RTN_UNICAST:
878 			case RTN_LOCAL:
879 			case RTN_BROADCAST:
880 			case RTN_ANYCAST:
881 			case RTN_MULTICAST:
882 				for_nexthops(fi) {
883 					if (nh->nh_flags&RTNH_F_DEAD)
884 						continue;
885 					if (!flp->oif || flp->oif == nh->nh_oif)
886 						break;
887 				}
888 #ifdef CONFIG_IP_ROUTE_MULTIPATH
889 				if (nhsel < fi->fib_nhs) {
890 					nh_sel = nhsel;
891 					goto out_fill_res;
892 				}
893 #else
894 				if (nhsel < 1) {
895 					goto out_fill_res;
896 				}
897 #endif
898 				endfor_nexthops(fi);
899 				continue;
900 
901 			default:
902 				printk(KERN_DEBUG "impossible 102\n");
903 				return -EINVAL;
904 			};
905 		}
906 		return err;
907 	}
908 	return 1;
909 
910 out_fill_res:
911 	res->prefixlen = prefixlen;
912 	res->nh_sel = nh_sel;
913 	res->type = fa->fa_type;
914 	res->scope = fa->fa_scope;
915 	res->fi = fa->fa_info;
916 #ifdef CONFIG_IP_ROUTE_MULTIPATH_CACHED
917 	res->netmask = mask;
918 	res->network = zone &
919 		(0xFFFFFFFF >> (32 - prefixlen));
920 #endif
921 	atomic_inc(&res->fi->fib_clntref);
922 	return 0;
923 }
924 
925 /* Find appropriate source address to this destination */
926 
927 u32 __fib_res_prefsrc(struct fib_result *res)
928 {
929 	return inet_select_addr(FIB_RES_DEV(*res), FIB_RES_GW(*res), res->scope);
930 }
931 
932 int
933 fib_dump_info(struct sk_buff *skb, u32 pid, u32 seq, int event,
934 	      u8 tb_id, u8 type, u8 scope, void *dst, int dst_len, u8 tos,
935 	      struct fib_info *fi, unsigned int flags)
936 {
937 	struct rtmsg *rtm;
938 	struct nlmsghdr  *nlh;
939 	unsigned char	 *b = skb->tail;
940 
941 	nlh = NLMSG_NEW(skb, pid, seq, event, sizeof(*rtm), flags);
942 	rtm = NLMSG_DATA(nlh);
943 	rtm->rtm_family = AF_INET;
944 	rtm->rtm_dst_len = dst_len;
945 	rtm->rtm_src_len = 0;
946 	rtm->rtm_tos = tos;
947 	rtm->rtm_table = tb_id;
948 	rtm->rtm_type = type;
949 	rtm->rtm_flags = fi->fib_flags;
950 	rtm->rtm_scope = scope;
951 	if (rtm->rtm_dst_len)
952 		RTA_PUT(skb, RTA_DST, 4, dst);
953 	rtm->rtm_protocol = fi->fib_protocol;
954 	if (fi->fib_priority)
955 		RTA_PUT(skb, RTA_PRIORITY, 4, &fi->fib_priority);
956 #ifdef CONFIG_NET_CLS_ROUTE
957 	if (fi->fib_nh[0].nh_tclassid)
958 		RTA_PUT(skb, RTA_FLOW, 4, &fi->fib_nh[0].nh_tclassid);
959 #endif
960 	if (rtnetlink_put_metrics(skb, fi->fib_metrics) < 0)
961 		goto rtattr_failure;
962 	if (fi->fib_prefsrc)
963 		RTA_PUT(skb, RTA_PREFSRC, 4, &fi->fib_prefsrc);
964 	if (fi->fib_nhs == 1) {
965 		if (fi->fib_nh->nh_gw)
966 			RTA_PUT(skb, RTA_GATEWAY, 4, &fi->fib_nh->nh_gw);
967 		if (fi->fib_nh->nh_oif)
968 			RTA_PUT(skb, RTA_OIF, sizeof(int), &fi->fib_nh->nh_oif);
969 	}
970 #ifdef CONFIG_IP_ROUTE_MULTIPATH
971 	if (fi->fib_nhs > 1) {
972 		struct rtnexthop *nhp;
973 		struct rtattr *mp_head;
974 		if (skb_tailroom(skb) <= RTA_SPACE(0))
975 			goto rtattr_failure;
976 		mp_head = (struct rtattr*)skb_put(skb, RTA_SPACE(0));
977 
978 		for_nexthops(fi) {
979 			if (skb_tailroom(skb) < RTA_ALIGN(RTA_ALIGN(sizeof(*nhp)) + 4))
980 				goto rtattr_failure;
981 			nhp = (struct rtnexthop*)skb_put(skb, RTA_ALIGN(sizeof(*nhp)));
982 			nhp->rtnh_flags = nh->nh_flags & 0xFF;
983 			nhp->rtnh_hops = nh->nh_weight-1;
984 			nhp->rtnh_ifindex = nh->nh_oif;
985 			if (nh->nh_gw)
986 				RTA_PUT(skb, RTA_GATEWAY, 4, &nh->nh_gw);
987 			nhp->rtnh_len = skb->tail - (unsigned char*)nhp;
988 		} endfor_nexthops(fi);
989 		mp_head->rta_type = RTA_MULTIPATH;
990 		mp_head->rta_len = skb->tail - (u8*)mp_head;
991 	}
992 #endif
993 	nlh->nlmsg_len = skb->tail - b;
994 	return skb->len;
995 
996 nlmsg_failure:
997 rtattr_failure:
998 	skb_trim(skb, b - skb->data);
999 	return -1;
1000 }
1001 
1002 #ifndef CONFIG_IP_NOSIOCRT
1003 
1004 int
1005 fib_convert_rtentry(int cmd, struct nlmsghdr *nl, struct rtmsg *rtm,
1006 		    struct kern_rta *rta, struct rtentry *r)
1007 {
1008 	int    plen;
1009 	u32    *ptr;
1010 
1011 	memset(rtm, 0, sizeof(*rtm));
1012 	memset(rta, 0, sizeof(*rta));
1013 
1014 	if (r->rt_dst.sa_family != AF_INET)
1015 		return -EAFNOSUPPORT;
1016 
1017 	/* Check mask for validity:
1018 	   a) it must be contiguous.
1019 	   b) destination must have all host bits clear.
1020 	   c) if application forgot to set correct family (AF_INET),
1021 	      reject request unless it is absolutely clear i.e.
1022 	      both family and mask are zero.
1023 	 */
1024 	plen = 32;
1025 	ptr = &((struct sockaddr_in*)&r->rt_dst)->sin_addr.s_addr;
1026 	if (!(r->rt_flags&RTF_HOST)) {
1027 		u32 mask = ((struct sockaddr_in*)&r->rt_genmask)->sin_addr.s_addr;
1028 		if (r->rt_genmask.sa_family != AF_INET) {
1029 			if (mask || r->rt_genmask.sa_family)
1030 				return -EAFNOSUPPORT;
1031 		}
1032 		if (bad_mask(mask, *ptr))
1033 			return -EINVAL;
1034 		plen = inet_mask_len(mask);
1035 	}
1036 
1037 	nl->nlmsg_flags = NLM_F_REQUEST;
1038 	nl->nlmsg_pid = current->pid;
1039 	nl->nlmsg_seq = 0;
1040 	nl->nlmsg_len = NLMSG_LENGTH(sizeof(*rtm));
1041 	if (cmd == SIOCDELRT) {
1042 		nl->nlmsg_type = RTM_DELROUTE;
1043 		nl->nlmsg_flags = 0;
1044 	} else {
1045 		nl->nlmsg_type = RTM_NEWROUTE;
1046 		nl->nlmsg_flags = NLM_F_REQUEST|NLM_F_CREATE;
1047 		rtm->rtm_protocol = RTPROT_BOOT;
1048 	}
1049 
1050 	rtm->rtm_dst_len = plen;
1051 	rta->rta_dst = ptr;
1052 
1053 	if (r->rt_metric) {
1054 		*(u32*)&r->rt_pad3 = r->rt_metric - 1;
1055 		rta->rta_priority = (u32*)&r->rt_pad3;
1056 	}
1057 	if (r->rt_flags&RTF_REJECT) {
1058 		rtm->rtm_scope = RT_SCOPE_HOST;
1059 		rtm->rtm_type = RTN_UNREACHABLE;
1060 		return 0;
1061 	}
1062 	rtm->rtm_scope = RT_SCOPE_NOWHERE;
1063 	rtm->rtm_type = RTN_UNICAST;
1064 
1065 	if (r->rt_dev) {
1066 		char *colon;
1067 		struct net_device *dev;
1068 		char   devname[IFNAMSIZ];
1069 
1070 		if (copy_from_user(devname, r->rt_dev, IFNAMSIZ-1))
1071 			return -EFAULT;
1072 		devname[IFNAMSIZ-1] = 0;
1073 		colon = strchr(devname, ':');
1074 		if (colon)
1075 			*colon = 0;
1076 		dev = __dev_get_by_name(devname);
1077 		if (!dev)
1078 			return -ENODEV;
1079 		rta->rta_oif = &dev->ifindex;
1080 		if (colon) {
1081 			struct in_ifaddr *ifa;
1082 			struct in_device *in_dev = __in_dev_get(dev);
1083 			if (!in_dev)
1084 				return -ENODEV;
1085 			*colon = ':';
1086 			for (ifa = in_dev->ifa_list; ifa; ifa = ifa->ifa_next)
1087 				if (strcmp(ifa->ifa_label, devname) == 0)
1088 					break;
1089 			if (ifa == NULL)
1090 				return -ENODEV;
1091 			rta->rta_prefsrc = &ifa->ifa_local;
1092 		}
1093 	}
1094 
1095 	ptr = &((struct sockaddr_in*)&r->rt_gateway)->sin_addr.s_addr;
1096 	if (r->rt_gateway.sa_family == AF_INET && *ptr) {
1097 		rta->rta_gw = ptr;
1098 		if (r->rt_flags&RTF_GATEWAY && inet_addr_type(*ptr) == RTN_UNICAST)
1099 			rtm->rtm_scope = RT_SCOPE_UNIVERSE;
1100 	}
1101 
1102 	if (cmd == SIOCDELRT)
1103 		return 0;
1104 
1105 	if (r->rt_flags&RTF_GATEWAY && rta->rta_gw == NULL)
1106 		return -EINVAL;
1107 
1108 	if (rtm->rtm_scope == RT_SCOPE_NOWHERE)
1109 		rtm->rtm_scope = RT_SCOPE_LINK;
1110 
1111 	if (r->rt_flags&(RTF_MTU|RTF_WINDOW|RTF_IRTT)) {
1112 		struct rtattr *rec;
1113 		struct rtattr *mx = kmalloc(RTA_LENGTH(3*RTA_LENGTH(4)), GFP_KERNEL);
1114 		if (mx == NULL)
1115 			return -ENOMEM;
1116 		rta->rta_mx = mx;
1117 		mx->rta_type = RTA_METRICS;
1118 		mx->rta_len  = RTA_LENGTH(0);
1119 		if (r->rt_flags&RTF_MTU) {
1120 			rec = (void*)((char*)mx + RTA_ALIGN(mx->rta_len));
1121 			rec->rta_type = RTAX_ADVMSS;
1122 			rec->rta_len = RTA_LENGTH(4);
1123 			mx->rta_len += RTA_LENGTH(4);
1124 			*(u32*)RTA_DATA(rec) = r->rt_mtu - 40;
1125 		}
1126 		if (r->rt_flags&RTF_WINDOW) {
1127 			rec = (void*)((char*)mx + RTA_ALIGN(mx->rta_len));
1128 			rec->rta_type = RTAX_WINDOW;
1129 			rec->rta_len = RTA_LENGTH(4);
1130 			mx->rta_len += RTA_LENGTH(4);
1131 			*(u32*)RTA_DATA(rec) = r->rt_window;
1132 		}
1133 		if (r->rt_flags&RTF_IRTT) {
1134 			rec = (void*)((char*)mx + RTA_ALIGN(mx->rta_len));
1135 			rec->rta_type = RTAX_RTT;
1136 			rec->rta_len = RTA_LENGTH(4);
1137 			mx->rta_len += RTA_LENGTH(4);
1138 			*(u32*)RTA_DATA(rec) = r->rt_irtt<<3;
1139 		}
1140 	}
1141 	return 0;
1142 }
1143 
1144 #endif
1145 
1146 /*
1147    Update FIB if:
1148    - local address disappeared -> we must delete all the entries
1149      referring to it.
1150    - device went down -> we must shutdown all nexthops going via it.
1151  */
1152 
1153 int fib_sync_down(u32 local, struct net_device *dev, int force)
1154 {
1155 	int ret = 0;
1156 	int scope = RT_SCOPE_NOWHERE;
1157 
1158 	if (force)
1159 		scope = -1;
1160 
1161 	if (local && fib_info_laddrhash) {
1162 		unsigned int hash = fib_laddr_hashfn(local);
1163 		struct hlist_head *head = &fib_info_laddrhash[hash];
1164 		struct hlist_node *node;
1165 		struct fib_info *fi;
1166 
1167 		hlist_for_each_entry(fi, node, head, fib_lhash) {
1168 			if (fi->fib_prefsrc == local) {
1169 				fi->fib_flags |= RTNH_F_DEAD;
1170 				ret++;
1171 			}
1172 		}
1173 	}
1174 
1175 	if (dev) {
1176 		struct fib_info *prev_fi = NULL;
1177 		unsigned int hash = fib_devindex_hashfn(dev->ifindex);
1178 		struct hlist_head *head = &fib_info_devhash[hash];
1179 		struct hlist_node *node;
1180 		struct fib_nh *nh;
1181 
1182 		hlist_for_each_entry(nh, node, head, nh_hash) {
1183 			struct fib_info *fi = nh->nh_parent;
1184 			int dead;
1185 
1186 			BUG_ON(!fi->fib_nhs);
1187 			if (nh->nh_dev != dev || fi == prev_fi)
1188 				continue;
1189 			prev_fi = fi;
1190 			dead = 0;
1191 			change_nexthops(fi) {
1192 				if (nh->nh_flags&RTNH_F_DEAD)
1193 					dead++;
1194 				else if (nh->nh_dev == dev &&
1195 					 nh->nh_scope != scope) {
1196 					nh->nh_flags |= RTNH_F_DEAD;
1197 #ifdef CONFIG_IP_ROUTE_MULTIPATH
1198 					spin_lock_bh(&fib_multipath_lock);
1199 					fi->fib_power -= nh->nh_power;
1200 					nh->nh_power = 0;
1201 					spin_unlock_bh(&fib_multipath_lock);
1202 #endif
1203 					dead++;
1204 				}
1205 #ifdef CONFIG_IP_ROUTE_MULTIPATH
1206 				if (force > 1 && nh->nh_dev == dev) {
1207 					dead = fi->fib_nhs;
1208 					break;
1209 				}
1210 #endif
1211 			} endfor_nexthops(fi)
1212 			if (dead == fi->fib_nhs) {
1213 				fi->fib_flags |= RTNH_F_DEAD;
1214 				ret++;
1215 			}
1216 		}
1217 	}
1218 
1219 	return ret;
1220 }
1221 
1222 #ifdef CONFIG_IP_ROUTE_MULTIPATH
1223 
1224 /*
1225    Dead device goes up. We wake up dead nexthops.
1226    It takes sense only on multipath routes.
1227  */
1228 
1229 int fib_sync_up(struct net_device *dev)
1230 {
1231 	struct fib_info *prev_fi;
1232 	unsigned int hash;
1233 	struct hlist_head *head;
1234 	struct hlist_node *node;
1235 	struct fib_nh *nh;
1236 	int ret;
1237 
1238 	if (!(dev->flags&IFF_UP))
1239 		return 0;
1240 
1241 	prev_fi = NULL;
1242 	hash = fib_devindex_hashfn(dev->ifindex);
1243 	head = &fib_info_devhash[hash];
1244 	ret = 0;
1245 
1246 	hlist_for_each_entry(nh, node, head, nh_hash) {
1247 		struct fib_info *fi = nh->nh_parent;
1248 		int alive;
1249 
1250 		BUG_ON(!fi->fib_nhs);
1251 		if (nh->nh_dev != dev || fi == prev_fi)
1252 			continue;
1253 
1254 		prev_fi = fi;
1255 		alive = 0;
1256 		change_nexthops(fi) {
1257 			if (!(nh->nh_flags&RTNH_F_DEAD)) {
1258 				alive++;
1259 				continue;
1260 			}
1261 			if (nh->nh_dev == NULL || !(nh->nh_dev->flags&IFF_UP))
1262 				continue;
1263 			if (nh->nh_dev != dev || __in_dev_get(dev) == NULL)
1264 				continue;
1265 			alive++;
1266 			spin_lock_bh(&fib_multipath_lock);
1267 			nh->nh_power = 0;
1268 			nh->nh_flags &= ~RTNH_F_DEAD;
1269 			spin_unlock_bh(&fib_multipath_lock);
1270 		} endfor_nexthops(fi)
1271 
1272 		if (alive > 0) {
1273 			fi->fib_flags &= ~RTNH_F_DEAD;
1274 			ret++;
1275 		}
1276 	}
1277 
1278 	return ret;
1279 }
1280 
1281 /*
1282    The algorithm is suboptimal, but it provides really
1283    fair weighted route distribution.
1284  */
1285 
1286 void fib_select_multipath(const struct flowi *flp, struct fib_result *res)
1287 {
1288 	struct fib_info *fi = res->fi;
1289 	int w;
1290 
1291 	spin_lock_bh(&fib_multipath_lock);
1292 	if (fi->fib_power <= 0) {
1293 		int power = 0;
1294 		change_nexthops(fi) {
1295 			if (!(nh->nh_flags&RTNH_F_DEAD)) {
1296 				power += nh->nh_weight;
1297 				nh->nh_power = nh->nh_weight;
1298 			}
1299 		} endfor_nexthops(fi);
1300 		fi->fib_power = power;
1301 		if (power <= 0) {
1302 			spin_unlock_bh(&fib_multipath_lock);
1303 			/* Race condition: route has just become dead. */
1304 			res->nh_sel = 0;
1305 			return;
1306 		}
1307 	}
1308 
1309 
1310 	/* w should be random number [0..fi->fib_power-1],
1311 	   it is pretty bad approximation.
1312 	 */
1313 
1314 	w = jiffies % fi->fib_power;
1315 
1316 	change_nexthops(fi) {
1317 		if (!(nh->nh_flags&RTNH_F_DEAD) && nh->nh_power) {
1318 			if ((w -= nh->nh_power) <= 0) {
1319 				nh->nh_power--;
1320 				fi->fib_power--;
1321 				res->nh_sel = nhsel;
1322 				spin_unlock_bh(&fib_multipath_lock);
1323 				return;
1324 			}
1325 		}
1326 	} endfor_nexthops(fi);
1327 
1328 	/* Race condition: route has just become dead. */
1329 	res->nh_sel = 0;
1330 	spin_unlock_bh(&fib_multipath_lock);
1331 }
1332 #endif
1333