xref: /linux/net/ipv4/fib_semantics.c (revision f3d9478b2ce468c3115b02ecae7e975990697f15)
1 /*
2  * INET		An implementation of the TCP/IP protocol suite for the LINUX
3  *		operating system.  INET is implemented using the  BSD Socket
4  *		interface as the means of communication with the user level.
5  *
6  *		IPv4 Forwarding Information Base: semantics.
7  *
8  * Version:	$Id: fib_semantics.c,v 1.19 2002/01/12 07:54:56 davem Exp $
9  *
10  * Authors:	Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
11  *
12  *		This program is free software; you can redistribute it and/or
13  *		modify it under the terms of the GNU General Public License
14  *		as published by the Free Software Foundation; either version
15  *		2 of the License, or (at your option) any later version.
16  */
17 
18 #include <linux/config.h>
19 #include <asm/uaccess.h>
20 #include <asm/system.h>
21 #include <linux/bitops.h>
22 #include <linux/types.h>
23 #include <linux/kernel.h>
24 #include <linux/jiffies.h>
25 #include <linux/mm.h>
26 #include <linux/string.h>
27 #include <linux/socket.h>
28 #include <linux/sockios.h>
29 #include <linux/errno.h>
30 #include <linux/in.h>
31 #include <linux/inet.h>
32 #include <linux/inetdevice.h>
33 #include <linux/netdevice.h>
34 #include <linux/if_arp.h>
35 #include <linux/proc_fs.h>
36 #include <linux/skbuff.h>
37 #include <linux/netlink.h>
38 #include <linux/init.h>
39 
40 #include <net/arp.h>
41 #include <net/ip.h>
42 #include <net/protocol.h>
43 #include <net/route.h>
44 #include <net/tcp.h>
45 #include <net/sock.h>
46 #include <net/ip_fib.h>
47 #include <net/ip_mp_alg.h>
48 
49 #include "fib_lookup.h"
50 
51 #define FSprintk(a...)
52 
53 static DEFINE_RWLOCK(fib_info_lock);
54 static struct hlist_head *fib_info_hash;
55 static struct hlist_head *fib_info_laddrhash;
56 static unsigned int fib_hash_size;
57 static unsigned int fib_info_cnt;
58 
59 #define DEVINDEX_HASHBITS 8
60 #define DEVINDEX_HASHSIZE (1U << DEVINDEX_HASHBITS)
61 static struct hlist_head fib_info_devhash[DEVINDEX_HASHSIZE];
62 
63 #ifdef CONFIG_IP_ROUTE_MULTIPATH
64 
65 static DEFINE_SPINLOCK(fib_multipath_lock);
66 
67 #define for_nexthops(fi) { int nhsel; const struct fib_nh * nh; \
68 for (nhsel=0, nh = (fi)->fib_nh; nhsel < (fi)->fib_nhs; nh++, nhsel++)
69 
70 #define change_nexthops(fi) { int nhsel; struct fib_nh * nh; \
71 for (nhsel=0, nh = (struct fib_nh*)((fi)->fib_nh); nhsel < (fi)->fib_nhs; nh++, nhsel++)
72 
73 #else /* CONFIG_IP_ROUTE_MULTIPATH */
74 
75 /* Hope, that gcc will optimize it to get rid of dummy loop */
76 
77 #define for_nexthops(fi) { int nhsel=0; const struct fib_nh * nh = (fi)->fib_nh; \
78 for (nhsel=0; nhsel < 1; nhsel++)
79 
80 #define change_nexthops(fi) { int nhsel=0; struct fib_nh * nh = (struct fib_nh*)((fi)->fib_nh); \
81 for (nhsel=0; nhsel < 1; nhsel++)
82 
83 #endif /* CONFIG_IP_ROUTE_MULTIPATH */
84 
85 #define endfor_nexthops(fi) }
86 
87 
88 static const struct
89 {
90 	int	error;
91 	u8	scope;
92 } fib_props[RTA_MAX + 1] = {
93         {
94 		.error	= 0,
95 		.scope	= RT_SCOPE_NOWHERE,
96 	},	/* RTN_UNSPEC */
97 	{
98 		.error	= 0,
99 		.scope	= RT_SCOPE_UNIVERSE,
100 	},	/* RTN_UNICAST */
101 	{
102 		.error	= 0,
103 		.scope	= RT_SCOPE_HOST,
104 	},	/* RTN_LOCAL */
105 	{
106 		.error	= 0,
107 		.scope	= RT_SCOPE_LINK,
108 	},	/* RTN_BROADCAST */
109 	{
110 		.error	= 0,
111 		.scope	= RT_SCOPE_LINK,
112 	},	/* RTN_ANYCAST */
113 	{
114 		.error	= 0,
115 		.scope	= RT_SCOPE_UNIVERSE,
116 	},	/* RTN_MULTICAST */
117 	{
118 		.error	= -EINVAL,
119 		.scope	= RT_SCOPE_UNIVERSE,
120 	},	/* RTN_BLACKHOLE */
121 	{
122 		.error	= -EHOSTUNREACH,
123 		.scope	= RT_SCOPE_UNIVERSE,
124 	},	/* RTN_UNREACHABLE */
125 	{
126 		.error	= -EACCES,
127 		.scope	= RT_SCOPE_UNIVERSE,
128 	},	/* RTN_PROHIBIT */
129 	{
130 		.error	= -EAGAIN,
131 		.scope	= RT_SCOPE_UNIVERSE,
132 	},	/* RTN_THROW */
133 	{
134 		.error	= -EINVAL,
135 		.scope	= RT_SCOPE_NOWHERE,
136 	},	/* RTN_NAT */
137 	{
138 		.error	= -EINVAL,
139 		.scope	= RT_SCOPE_NOWHERE,
140 	},	/* RTN_XRESOLVE */
141 };
142 
143 
144 /* Release a nexthop info record */
145 
146 void free_fib_info(struct fib_info *fi)
147 {
148 	if (fi->fib_dead == 0) {
149 		printk("Freeing alive fib_info %p\n", fi);
150 		return;
151 	}
152 	change_nexthops(fi) {
153 		if (nh->nh_dev)
154 			dev_put(nh->nh_dev);
155 		nh->nh_dev = NULL;
156 	} endfor_nexthops(fi);
157 	fib_info_cnt--;
158 	kfree(fi);
159 }
160 
161 void fib_release_info(struct fib_info *fi)
162 {
163 	write_lock(&fib_info_lock);
164 	if (fi && --fi->fib_treeref == 0) {
165 		hlist_del(&fi->fib_hash);
166 		if (fi->fib_prefsrc)
167 			hlist_del(&fi->fib_lhash);
168 		change_nexthops(fi) {
169 			if (!nh->nh_dev)
170 				continue;
171 			hlist_del(&nh->nh_hash);
172 		} endfor_nexthops(fi)
173 		fi->fib_dead = 1;
174 		fib_info_put(fi);
175 	}
176 	write_unlock(&fib_info_lock);
177 }
178 
179 static __inline__ int nh_comp(const struct fib_info *fi, const struct fib_info *ofi)
180 {
181 	const struct fib_nh *onh = ofi->fib_nh;
182 
183 	for_nexthops(fi) {
184 		if (nh->nh_oif != onh->nh_oif ||
185 		    nh->nh_gw  != onh->nh_gw ||
186 		    nh->nh_scope != onh->nh_scope ||
187 #ifdef CONFIG_IP_ROUTE_MULTIPATH
188 		    nh->nh_weight != onh->nh_weight ||
189 #endif
190 #ifdef CONFIG_NET_CLS_ROUTE
191 		    nh->nh_tclassid != onh->nh_tclassid ||
192 #endif
193 		    ((nh->nh_flags^onh->nh_flags)&~RTNH_F_DEAD))
194 			return -1;
195 		onh++;
196 	} endfor_nexthops(fi);
197 	return 0;
198 }
199 
200 static inline unsigned int fib_info_hashfn(const struct fib_info *fi)
201 {
202 	unsigned int mask = (fib_hash_size - 1);
203 	unsigned int val = fi->fib_nhs;
204 
205 	val ^= fi->fib_protocol;
206 	val ^= fi->fib_prefsrc;
207 	val ^= fi->fib_priority;
208 
209 	return (val ^ (val >> 7) ^ (val >> 12)) & mask;
210 }
211 
212 static struct fib_info *fib_find_info(const struct fib_info *nfi)
213 {
214 	struct hlist_head *head;
215 	struct hlist_node *node;
216 	struct fib_info *fi;
217 	unsigned int hash;
218 
219 	hash = fib_info_hashfn(nfi);
220 	head = &fib_info_hash[hash];
221 
222 	hlist_for_each_entry(fi, node, head, fib_hash) {
223 		if (fi->fib_nhs != nfi->fib_nhs)
224 			continue;
225 		if (nfi->fib_protocol == fi->fib_protocol &&
226 		    nfi->fib_prefsrc == fi->fib_prefsrc &&
227 		    nfi->fib_priority == fi->fib_priority &&
228 		    memcmp(nfi->fib_metrics, fi->fib_metrics,
229 			   sizeof(fi->fib_metrics)) == 0 &&
230 		    ((nfi->fib_flags^fi->fib_flags)&~RTNH_F_DEAD) == 0 &&
231 		    (nfi->fib_nhs == 0 || nh_comp(fi, nfi) == 0))
232 			return fi;
233 	}
234 
235 	return NULL;
236 }
237 
238 static inline unsigned int fib_devindex_hashfn(unsigned int val)
239 {
240 	unsigned int mask = DEVINDEX_HASHSIZE - 1;
241 
242 	return (val ^
243 		(val >> DEVINDEX_HASHBITS) ^
244 		(val >> (DEVINDEX_HASHBITS * 2))) & mask;
245 }
246 
247 /* Check, that the gateway is already configured.
248    Used only by redirect accept routine.
249  */
250 
251 int ip_fib_check_default(u32 gw, struct net_device *dev)
252 {
253 	struct hlist_head *head;
254 	struct hlist_node *node;
255 	struct fib_nh *nh;
256 	unsigned int hash;
257 
258 	read_lock(&fib_info_lock);
259 
260 	hash = fib_devindex_hashfn(dev->ifindex);
261 	head = &fib_info_devhash[hash];
262 	hlist_for_each_entry(nh, node, head, nh_hash) {
263 		if (nh->nh_dev == dev &&
264 		    nh->nh_gw == gw &&
265 		    !(nh->nh_flags&RTNH_F_DEAD)) {
266 			read_unlock(&fib_info_lock);
267 			return 0;
268 		}
269 	}
270 
271 	read_unlock(&fib_info_lock);
272 
273 	return -1;
274 }
275 
276 void rtmsg_fib(int event, u32 key, struct fib_alias *fa,
277 	       int z, int tb_id,
278 	       struct nlmsghdr *n, struct netlink_skb_parms *req)
279 {
280 	struct sk_buff *skb;
281 	u32 pid = req ? req->pid : n->nlmsg_pid;
282 	int size = NLMSG_SPACE(sizeof(struct rtmsg)+256);
283 
284 	skb = alloc_skb(size, GFP_KERNEL);
285 	if (!skb)
286 		return;
287 
288 	if (fib_dump_info(skb, pid, n->nlmsg_seq, event, tb_id,
289 			  fa->fa_type, fa->fa_scope, &key, z,
290 			  fa->fa_tos,
291 			  fa->fa_info, 0) < 0) {
292 		kfree_skb(skb);
293 		return;
294 	}
295 	NETLINK_CB(skb).dst_group = RTNLGRP_IPV4_ROUTE;
296 	if (n->nlmsg_flags&NLM_F_ECHO)
297 		atomic_inc(&skb->users);
298 	netlink_broadcast(rtnl, skb, pid, RTNLGRP_IPV4_ROUTE, GFP_KERNEL);
299 	if (n->nlmsg_flags&NLM_F_ECHO)
300 		netlink_unicast(rtnl, skb, pid, MSG_DONTWAIT);
301 }
302 
303 /* Return the first fib alias matching TOS with
304  * priority less than or equal to PRIO.
305  */
306 struct fib_alias *fib_find_alias(struct list_head *fah, u8 tos, u32 prio)
307 {
308 	if (fah) {
309 		struct fib_alias *fa;
310 		list_for_each_entry(fa, fah, fa_list) {
311 			if (fa->fa_tos > tos)
312 				continue;
313 			if (fa->fa_info->fib_priority >= prio ||
314 			    fa->fa_tos < tos)
315 				return fa;
316 		}
317 	}
318 	return NULL;
319 }
320 
321 int fib_detect_death(struct fib_info *fi, int order,
322 		     struct fib_info **last_resort, int *last_idx, int *dflt)
323 {
324 	struct neighbour *n;
325 	int state = NUD_NONE;
326 
327 	n = neigh_lookup(&arp_tbl, &fi->fib_nh[0].nh_gw, fi->fib_dev);
328 	if (n) {
329 		state = n->nud_state;
330 		neigh_release(n);
331 	}
332 	if (state==NUD_REACHABLE)
333 		return 0;
334 	if ((state&NUD_VALID) && order != *dflt)
335 		return 0;
336 	if ((state&NUD_VALID) ||
337 	    (*last_idx<0 && order > *dflt)) {
338 		*last_resort = fi;
339 		*last_idx = order;
340 	}
341 	return 1;
342 }
343 
344 #ifdef CONFIG_IP_ROUTE_MULTIPATH
345 
346 static u32 fib_get_attr32(struct rtattr *attr, int attrlen, int type)
347 {
348 	while (RTA_OK(attr,attrlen)) {
349 		if (attr->rta_type == type)
350 			return *(u32*)RTA_DATA(attr);
351 		attr = RTA_NEXT(attr, attrlen);
352 	}
353 	return 0;
354 }
355 
356 static int
357 fib_count_nexthops(struct rtattr *rta)
358 {
359 	int nhs = 0;
360 	struct rtnexthop *nhp = RTA_DATA(rta);
361 	int nhlen = RTA_PAYLOAD(rta);
362 
363 	while (nhlen >= (int)sizeof(struct rtnexthop)) {
364 		if ((nhlen -= nhp->rtnh_len) < 0)
365 			return 0;
366 		nhs++;
367 		nhp = RTNH_NEXT(nhp);
368 	};
369 	return nhs;
370 }
371 
372 static int
373 fib_get_nhs(struct fib_info *fi, const struct rtattr *rta, const struct rtmsg *r)
374 {
375 	struct rtnexthop *nhp = RTA_DATA(rta);
376 	int nhlen = RTA_PAYLOAD(rta);
377 
378 	change_nexthops(fi) {
379 		int attrlen = nhlen - sizeof(struct rtnexthop);
380 		if (attrlen < 0 || (nhlen -= nhp->rtnh_len) < 0)
381 			return -EINVAL;
382 		nh->nh_flags = (r->rtm_flags&~0xFF) | nhp->rtnh_flags;
383 		nh->nh_oif = nhp->rtnh_ifindex;
384 		nh->nh_weight = nhp->rtnh_hops + 1;
385 		if (attrlen) {
386 			nh->nh_gw = fib_get_attr32(RTNH_DATA(nhp), attrlen, RTA_GATEWAY);
387 #ifdef CONFIG_NET_CLS_ROUTE
388 			nh->nh_tclassid = fib_get_attr32(RTNH_DATA(nhp), attrlen, RTA_FLOW);
389 #endif
390 		}
391 		nhp = RTNH_NEXT(nhp);
392 	} endfor_nexthops(fi);
393 	return 0;
394 }
395 
396 #endif
397 
398 int fib_nh_match(struct rtmsg *r, struct nlmsghdr *nlh, struct kern_rta *rta,
399 		 struct fib_info *fi)
400 {
401 #ifdef CONFIG_IP_ROUTE_MULTIPATH
402 	struct rtnexthop *nhp;
403 	int nhlen;
404 #endif
405 
406 	if (rta->rta_priority &&
407 	    *rta->rta_priority != fi->fib_priority)
408 		return 1;
409 
410 	if (rta->rta_oif || rta->rta_gw) {
411 		if ((!rta->rta_oif || *rta->rta_oif == fi->fib_nh->nh_oif) &&
412 		    (!rta->rta_gw  || memcmp(rta->rta_gw, &fi->fib_nh->nh_gw, 4) == 0))
413 			return 0;
414 		return 1;
415 	}
416 
417 #ifdef CONFIG_IP_ROUTE_MULTIPATH
418 	if (rta->rta_mp == NULL)
419 		return 0;
420 	nhp = RTA_DATA(rta->rta_mp);
421 	nhlen = RTA_PAYLOAD(rta->rta_mp);
422 
423 	for_nexthops(fi) {
424 		int attrlen = nhlen - sizeof(struct rtnexthop);
425 		u32 gw;
426 
427 		if (attrlen < 0 || (nhlen -= nhp->rtnh_len) < 0)
428 			return -EINVAL;
429 		if (nhp->rtnh_ifindex && nhp->rtnh_ifindex != nh->nh_oif)
430 			return 1;
431 		if (attrlen) {
432 			gw = fib_get_attr32(RTNH_DATA(nhp), attrlen, RTA_GATEWAY);
433 			if (gw && gw != nh->nh_gw)
434 				return 1;
435 #ifdef CONFIG_NET_CLS_ROUTE
436 			gw = fib_get_attr32(RTNH_DATA(nhp), attrlen, RTA_FLOW);
437 			if (gw && gw != nh->nh_tclassid)
438 				return 1;
439 #endif
440 		}
441 		nhp = RTNH_NEXT(nhp);
442 	} endfor_nexthops(fi);
443 #endif
444 	return 0;
445 }
446 
447 
448 /*
449    Picture
450    -------
451 
452    Semantics of nexthop is very messy by historical reasons.
453    We have to take into account, that:
454    a) gateway can be actually local interface address,
455       so that gatewayed route is direct.
456    b) gateway must be on-link address, possibly
457       described not by an ifaddr, but also by a direct route.
458    c) If both gateway and interface are specified, they should not
459       contradict.
460    d) If we use tunnel routes, gateway could be not on-link.
461 
462    Attempt to reconcile all of these (alas, self-contradictory) conditions
463    results in pretty ugly and hairy code with obscure logic.
464 
465    I chose to generalized it instead, so that the size
466    of code does not increase practically, but it becomes
467    much more general.
468    Every prefix is assigned a "scope" value: "host" is local address,
469    "link" is direct route,
470    [ ... "site" ... "interior" ... ]
471    and "universe" is true gateway route with global meaning.
472 
473    Every prefix refers to a set of "nexthop"s (gw, oif),
474    where gw must have narrower scope. This recursion stops
475    when gw has LOCAL scope or if "nexthop" is declared ONLINK,
476    which means that gw is forced to be on link.
477 
478    Code is still hairy, but now it is apparently logically
479    consistent and very flexible. F.e. as by-product it allows
480    to co-exists in peace independent exterior and interior
481    routing processes.
482 
483    Normally it looks as following.
484 
485    {universe prefix}  -> (gw, oif) [scope link]
486                           |
487 			  |-> {link prefix} -> (gw, oif) [scope local]
488 			                        |
489 						|-> {local prefix} (terminal node)
490  */
491 
492 static int fib_check_nh(const struct rtmsg *r, struct fib_info *fi, struct fib_nh *nh)
493 {
494 	int err;
495 
496 	if (nh->nh_gw) {
497 		struct fib_result res;
498 
499 #ifdef CONFIG_IP_ROUTE_PERVASIVE
500 		if (nh->nh_flags&RTNH_F_PERVASIVE)
501 			return 0;
502 #endif
503 		if (nh->nh_flags&RTNH_F_ONLINK) {
504 			struct net_device *dev;
505 
506 			if (r->rtm_scope >= RT_SCOPE_LINK)
507 				return -EINVAL;
508 			if (inet_addr_type(nh->nh_gw) != RTN_UNICAST)
509 				return -EINVAL;
510 			if ((dev = __dev_get_by_index(nh->nh_oif)) == NULL)
511 				return -ENODEV;
512 			if (!(dev->flags&IFF_UP))
513 				return -ENETDOWN;
514 			nh->nh_dev = dev;
515 			dev_hold(dev);
516 			nh->nh_scope = RT_SCOPE_LINK;
517 			return 0;
518 		}
519 		{
520 			struct flowi fl = { .nl_u = { .ip4_u =
521 						      { .daddr = nh->nh_gw,
522 							.scope = r->rtm_scope + 1 } },
523 					    .oif = nh->nh_oif };
524 
525 			/* It is not necessary, but requires a bit of thinking */
526 			if (fl.fl4_scope < RT_SCOPE_LINK)
527 				fl.fl4_scope = RT_SCOPE_LINK;
528 			if ((err = fib_lookup(&fl, &res)) != 0)
529 				return err;
530 		}
531 		err = -EINVAL;
532 		if (res.type != RTN_UNICAST && res.type != RTN_LOCAL)
533 			goto out;
534 		nh->nh_scope = res.scope;
535 		nh->nh_oif = FIB_RES_OIF(res);
536 		if ((nh->nh_dev = FIB_RES_DEV(res)) == NULL)
537 			goto out;
538 		dev_hold(nh->nh_dev);
539 		err = -ENETDOWN;
540 		if (!(nh->nh_dev->flags & IFF_UP))
541 			goto out;
542 		err = 0;
543 out:
544 		fib_res_put(&res);
545 		return err;
546 	} else {
547 		struct in_device *in_dev;
548 
549 		if (nh->nh_flags&(RTNH_F_PERVASIVE|RTNH_F_ONLINK))
550 			return -EINVAL;
551 
552 		in_dev = inetdev_by_index(nh->nh_oif);
553 		if (in_dev == NULL)
554 			return -ENODEV;
555 		if (!(in_dev->dev->flags&IFF_UP)) {
556 			in_dev_put(in_dev);
557 			return -ENETDOWN;
558 		}
559 		nh->nh_dev = in_dev->dev;
560 		dev_hold(nh->nh_dev);
561 		nh->nh_scope = RT_SCOPE_HOST;
562 		in_dev_put(in_dev);
563 	}
564 	return 0;
565 }
566 
567 static inline unsigned int fib_laddr_hashfn(u32 val)
568 {
569 	unsigned int mask = (fib_hash_size - 1);
570 
571 	return (val ^ (val >> 7) ^ (val >> 14)) & mask;
572 }
573 
574 static struct hlist_head *fib_hash_alloc(int bytes)
575 {
576 	if (bytes <= PAGE_SIZE)
577 		return kmalloc(bytes, GFP_KERNEL);
578 	else
579 		return (struct hlist_head *)
580 			__get_free_pages(GFP_KERNEL, get_order(bytes));
581 }
582 
583 static void fib_hash_free(struct hlist_head *hash, int bytes)
584 {
585 	if (!hash)
586 		return;
587 
588 	if (bytes <= PAGE_SIZE)
589 		kfree(hash);
590 	else
591 		free_pages((unsigned long) hash, get_order(bytes));
592 }
593 
594 static void fib_hash_move(struct hlist_head *new_info_hash,
595 			  struct hlist_head *new_laddrhash,
596 			  unsigned int new_size)
597 {
598 	struct hlist_head *old_info_hash, *old_laddrhash;
599 	unsigned int old_size = fib_hash_size;
600 	unsigned int i, bytes;
601 
602 	write_lock(&fib_info_lock);
603 	old_info_hash = fib_info_hash;
604 	old_laddrhash = fib_info_laddrhash;
605 	fib_hash_size = new_size;
606 
607 	for (i = 0; i < old_size; i++) {
608 		struct hlist_head *head = &fib_info_hash[i];
609 		struct hlist_node *node, *n;
610 		struct fib_info *fi;
611 
612 		hlist_for_each_entry_safe(fi, node, n, head, fib_hash) {
613 			struct hlist_head *dest;
614 			unsigned int new_hash;
615 
616 			hlist_del(&fi->fib_hash);
617 
618 			new_hash = fib_info_hashfn(fi);
619 			dest = &new_info_hash[new_hash];
620 			hlist_add_head(&fi->fib_hash, dest);
621 		}
622 	}
623 	fib_info_hash = new_info_hash;
624 
625 	for (i = 0; i < old_size; i++) {
626 		struct hlist_head *lhead = &fib_info_laddrhash[i];
627 		struct hlist_node *node, *n;
628 		struct fib_info *fi;
629 
630 		hlist_for_each_entry_safe(fi, node, n, lhead, fib_lhash) {
631 			struct hlist_head *ldest;
632 			unsigned int new_hash;
633 
634 			hlist_del(&fi->fib_lhash);
635 
636 			new_hash = fib_laddr_hashfn(fi->fib_prefsrc);
637 			ldest = &new_laddrhash[new_hash];
638 			hlist_add_head(&fi->fib_lhash, ldest);
639 		}
640 	}
641 	fib_info_laddrhash = new_laddrhash;
642 
643 	write_unlock(&fib_info_lock);
644 
645 	bytes = old_size * sizeof(struct hlist_head *);
646 	fib_hash_free(old_info_hash, bytes);
647 	fib_hash_free(old_laddrhash, bytes);
648 }
649 
650 struct fib_info *
651 fib_create_info(const struct rtmsg *r, struct kern_rta *rta,
652 		const struct nlmsghdr *nlh, int *errp)
653 {
654 	int err;
655 	struct fib_info *fi = NULL;
656 	struct fib_info *ofi;
657 #ifdef CONFIG_IP_ROUTE_MULTIPATH
658 	int nhs = 1;
659 #else
660 	const int nhs = 1;
661 #endif
662 #ifdef CONFIG_IP_ROUTE_MULTIPATH_CACHED
663 	u32 mp_alg = IP_MP_ALG_NONE;
664 #endif
665 
666 	/* Fast check to catch the most weird cases */
667 	if (fib_props[r->rtm_type].scope > r->rtm_scope)
668 		goto err_inval;
669 
670 #ifdef CONFIG_IP_ROUTE_MULTIPATH
671 	if (rta->rta_mp) {
672 		nhs = fib_count_nexthops(rta->rta_mp);
673 		if (nhs == 0)
674 			goto err_inval;
675 	}
676 #endif
677 #ifdef CONFIG_IP_ROUTE_MULTIPATH_CACHED
678 	if (rta->rta_mp_alg) {
679 		mp_alg = *rta->rta_mp_alg;
680 
681 		if (mp_alg < IP_MP_ALG_NONE ||
682 		    mp_alg > IP_MP_ALG_MAX)
683 			goto err_inval;
684 	}
685 #endif
686 
687 	err = -ENOBUFS;
688 	if (fib_info_cnt >= fib_hash_size) {
689 		unsigned int new_size = fib_hash_size << 1;
690 		struct hlist_head *new_info_hash;
691 		struct hlist_head *new_laddrhash;
692 		unsigned int bytes;
693 
694 		if (!new_size)
695 			new_size = 1;
696 		bytes = new_size * sizeof(struct hlist_head *);
697 		new_info_hash = fib_hash_alloc(bytes);
698 		new_laddrhash = fib_hash_alloc(bytes);
699 		if (!new_info_hash || !new_laddrhash) {
700 			fib_hash_free(new_info_hash, bytes);
701 			fib_hash_free(new_laddrhash, bytes);
702 		} else {
703 			memset(new_info_hash, 0, bytes);
704 			memset(new_laddrhash, 0, bytes);
705 
706 			fib_hash_move(new_info_hash, new_laddrhash, new_size);
707 		}
708 
709 		if (!fib_hash_size)
710 			goto failure;
711 	}
712 
713 	fi = kmalloc(sizeof(*fi)+nhs*sizeof(struct fib_nh), GFP_KERNEL);
714 	if (fi == NULL)
715 		goto failure;
716 	fib_info_cnt++;
717 	memset(fi, 0, sizeof(*fi)+nhs*sizeof(struct fib_nh));
718 
719 	fi->fib_protocol = r->rtm_protocol;
720 
721 	fi->fib_nhs = nhs;
722 	change_nexthops(fi) {
723 		nh->nh_parent = fi;
724 	} endfor_nexthops(fi)
725 
726 	fi->fib_flags = r->rtm_flags;
727 	if (rta->rta_priority)
728 		fi->fib_priority = *rta->rta_priority;
729 	if (rta->rta_mx) {
730 		int attrlen = RTA_PAYLOAD(rta->rta_mx);
731 		struct rtattr *attr = RTA_DATA(rta->rta_mx);
732 
733 		while (RTA_OK(attr, attrlen)) {
734 			unsigned flavor = attr->rta_type;
735 			if (flavor) {
736 				if (flavor > RTAX_MAX)
737 					goto err_inval;
738 				fi->fib_metrics[flavor-1] = *(unsigned*)RTA_DATA(attr);
739 			}
740 			attr = RTA_NEXT(attr, attrlen);
741 		}
742 	}
743 	if (rta->rta_prefsrc)
744 		memcpy(&fi->fib_prefsrc, rta->rta_prefsrc, 4);
745 
746 	if (rta->rta_mp) {
747 #ifdef CONFIG_IP_ROUTE_MULTIPATH
748 		if ((err = fib_get_nhs(fi, rta->rta_mp, r)) != 0)
749 			goto failure;
750 		if (rta->rta_oif && fi->fib_nh->nh_oif != *rta->rta_oif)
751 			goto err_inval;
752 		if (rta->rta_gw && memcmp(&fi->fib_nh->nh_gw, rta->rta_gw, 4))
753 			goto err_inval;
754 #ifdef CONFIG_NET_CLS_ROUTE
755 		if (rta->rta_flow && memcmp(&fi->fib_nh->nh_tclassid, rta->rta_flow, 4))
756 			goto err_inval;
757 #endif
758 #else
759 		goto err_inval;
760 #endif
761 	} else {
762 		struct fib_nh *nh = fi->fib_nh;
763 		if (rta->rta_oif)
764 			nh->nh_oif = *rta->rta_oif;
765 		if (rta->rta_gw)
766 			memcpy(&nh->nh_gw, rta->rta_gw, 4);
767 #ifdef CONFIG_NET_CLS_ROUTE
768 		if (rta->rta_flow)
769 			memcpy(&nh->nh_tclassid, rta->rta_flow, 4);
770 #endif
771 		nh->nh_flags = r->rtm_flags;
772 #ifdef CONFIG_IP_ROUTE_MULTIPATH
773 		nh->nh_weight = 1;
774 #endif
775 	}
776 
777 #ifdef CONFIG_IP_ROUTE_MULTIPATH_CACHED
778 	fi->fib_mp_alg = mp_alg;
779 #endif
780 
781 	if (fib_props[r->rtm_type].error) {
782 		if (rta->rta_gw || rta->rta_oif || rta->rta_mp)
783 			goto err_inval;
784 		goto link_it;
785 	}
786 
787 	if (r->rtm_scope > RT_SCOPE_HOST)
788 		goto err_inval;
789 
790 	if (r->rtm_scope == RT_SCOPE_HOST) {
791 		struct fib_nh *nh = fi->fib_nh;
792 
793 		/* Local address is added. */
794 		if (nhs != 1 || nh->nh_gw)
795 			goto err_inval;
796 		nh->nh_scope = RT_SCOPE_NOWHERE;
797 		nh->nh_dev = dev_get_by_index(fi->fib_nh->nh_oif);
798 		err = -ENODEV;
799 		if (nh->nh_dev == NULL)
800 			goto failure;
801 	} else {
802 		change_nexthops(fi) {
803 			if ((err = fib_check_nh(r, fi, nh)) != 0)
804 				goto failure;
805 		} endfor_nexthops(fi)
806 	}
807 
808 	if (fi->fib_prefsrc) {
809 		if (r->rtm_type != RTN_LOCAL || rta->rta_dst == NULL ||
810 		    memcmp(&fi->fib_prefsrc, rta->rta_dst, 4))
811 			if (inet_addr_type(fi->fib_prefsrc) != RTN_LOCAL)
812 				goto err_inval;
813 	}
814 
815 link_it:
816 	if ((ofi = fib_find_info(fi)) != NULL) {
817 		fi->fib_dead = 1;
818 		free_fib_info(fi);
819 		ofi->fib_treeref++;
820 		return ofi;
821 	}
822 
823 	fi->fib_treeref++;
824 	atomic_inc(&fi->fib_clntref);
825 	write_lock(&fib_info_lock);
826 	hlist_add_head(&fi->fib_hash,
827 		       &fib_info_hash[fib_info_hashfn(fi)]);
828 	if (fi->fib_prefsrc) {
829 		struct hlist_head *head;
830 
831 		head = &fib_info_laddrhash[fib_laddr_hashfn(fi->fib_prefsrc)];
832 		hlist_add_head(&fi->fib_lhash, head);
833 	}
834 	change_nexthops(fi) {
835 		struct hlist_head *head;
836 		unsigned int hash;
837 
838 		if (!nh->nh_dev)
839 			continue;
840 		hash = fib_devindex_hashfn(nh->nh_dev->ifindex);
841 		head = &fib_info_devhash[hash];
842 		hlist_add_head(&nh->nh_hash, head);
843 	} endfor_nexthops(fi)
844 	write_unlock(&fib_info_lock);
845 	return fi;
846 
847 err_inval:
848 	err = -EINVAL;
849 
850 failure:
851         *errp = err;
852         if (fi) {
853 		fi->fib_dead = 1;
854 		free_fib_info(fi);
855 	}
856 	return NULL;
857 }
858 
859 /* Note! fib_semantic_match intentionally uses  RCU list functions. */
860 int fib_semantic_match(struct list_head *head, const struct flowi *flp,
861 		       struct fib_result *res, __u32 zone, __u32 mask,
862 			int prefixlen)
863 {
864 	struct fib_alias *fa;
865 	int nh_sel = 0;
866 
867 	list_for_each_entry_rcu(fa, head, fa_list) {
868 		int err;
869 
870 		if (fa->fa_tos &&
871 		    fa->fa_tos != flp->fl4_tos)
872 			continue;
873 
874 		if (fa->fa_scope < flp->fl4_scope)
875 			continue;
876 
877 		fa->fa_state |= FA_S_ACCESSED;
878 
879 		err = fib_props[fa->fa_type].error;
880 		if (err == 0) {
881 			struct fib_info *fi = fa->fa_info;
882 
883 			if (fi->fib_flags & RTNH_F_DEAD)
884 				continue;
885 
886 			switch (fa->fa_type) {
887 			case RTN_UNICAST:
888 			case RTN_LOCAL:
889 			case RTN_BROADCAST:
890 			case RTN_ANYCAST:
891 			case RTN_MULTICAST:
892 				for_nexthops(fi) {
893 					if (nh->nh_flags&RTNH_F_DEAD)
894 						continue;
895 					if (!flp->oif || flp->oif == nh->nh_oif)
896 						break;
897 				}
898 #ifdef CONFIG_IP_ROUTE_MULTIPATH
899 				if (nhsel < fi->fib_nhs) {
900 					nh_sel = nhsel;
901 					goto out_fill_res;
902 				}
903 #else
904 				if (nhsel < 1) {
905 					goto out_fill_res;
906 				}
907 #endif
908 				endfor_nexthops(fi);
909 				continue;
910 
911 			default:
912 				printk(KERN_DEBUG "impossible 102\n");
913 				return -EINVAL;
914 			};
915 		}
916 		return err;
917 	}
918 	return 1;
919 
920 out_fill_res:
921 	res->prefixlen = prefixlen;
922 	res->nh_sel = nh_sel;
923 	res->type = fa->fa_type;
924 	res->scope = fa->fa_scope;
925 	res->fi = fa->fa_info;
926 #ifdef CONFIG_IP_ROUTE_MULTIPATH_CACHED
927 	res->netmask = mask;
928 	res->network = zone &
929 		(0xFFFFFFFF >> (32 - prefixlen));
930 #endif
931 	atomic_inc(&res->fi->fib_clntref);
932 	return 0;
933 }
934 
935 /* Find appropriate source address to this destination */
936 
937 u32 __fib_res_prefsrc(struct fib_result *res)
938 {
939 	return inet_select_addr(FIB_RES_DEV(*res), FIB_RES_GW(*res), res->scope);
940 }
941 
942 int
943 fib_dump_info(struct sk_buff *skb, u32 pid, u32 seq, int event,
944 	      u8 tb_id, u8 type, u8 scope, void *dst, int dst_len, u8 tos,
945 	      struct fib_info *fi, unsigned int flags)
946 {
947 	struct rtmsg *rtm;
948 	struct nlmsghdr  *nlh;
949 	unsigned char	 *b = skb->tail;
950 
951 	nlh = NLMSG_NEW(skb, pid, seq, event, sizeof(*rtm), flags);
952 	rtm = NLMSG_DATA(nlh);
953 	rtm->rtm_family = AF_INET;
954 	rtm->rtm_dst_len = dst_len;
955 	rtm->rtm_src_len = 0;
956 	rtm->rtm_tos = tos;
957 	rtm->rtm_table = tb_id;
958 	rtm->rtm_type = type;
959 	rtm->rtm_flags = fi->fib_flags;
960 	rtm->rtm_scope = scope;
961 	if (rtm->rtm_dst_len)
962 		RTA_PUT(skb, RTA_DST, 4, dst);
963 	rtm->rtm_protocol = fi->fib_protocol;
964 	if (fi->fib_priority)
965 		RTA_PUT(skb, RTA_PRIORITY, 4, &fi->fib_priority);
966 #ifdef CONFIG_NET_CLS_ROUTE
967 	if (fi->fib_nh[0].nh_tclassid)
968 		RTA_PUT(skb, RTA_FLOW, 4, &fi->fib_nh[0].nh_tclassid);
969 #endif
970 	if (rtnetlink_put_metrics(skb, fi->fib_metrics) < 0)
971 		goto rtattr_failure;
972 	if (fi->fib_prefsrc)
973 		RTA_PUT(skb, RTA_PREFSRC, 4, &fi->fib_prefsrc);
974 	if (fi->fib_nhs == 1) {
975 		if (fi->fib_nh->nh_gw)
976 			RTA_PUT(skb, RTA_GATEWAY, 4, &fi->fib_nh->nh_gw);
977 		if (fi->fib_nh->nh_oif)
978 			RTA_PUT(skb, RTA_OIF, sizeof(int), &fi->fib_nh->nh_oif);
979 	}
980 #ifdef CONFIG_IP_ROUTE_MULTIPATH
981 	if (fi->fib_nhs > 1) {
982 		struct rtnexthop *nhp;
983 		struct rtattr *mp_head;
984 		if (skb_tailroom(skb) <= RTA_SPACE(0))
985 			goto rtattr_failure;
986 		mp_head = (struct rtattr*)skb_put(skb, RTA_SPACE(0));
987 
988 		for_nexthops(fi) {
989 			if (skb_tailroom(skb) < RTA_ALIGN(RTA_ALIGN(sizeof(*nhp)) + 4))
990 				goto rtattr_failure;
991 			nhp = (struct rtnexthop*)skb_put(skb, RTA_ALIGN(sizeof(*nhp)));
992 			nhp->rtnh_flags = nh->nh_flags & 0xFF;
993 			nhp->rtnh_hops = nh->nh_weight-1;
994 			nhp->rtnh_ifindex = nh->nh_oif;
995 			if (nh->nh_gw)
996 				RTA_PUT(skb, RTA_GATEWAY, 4, &nh->nh_gw);
997 			nhp->rtnh_len = skb->tail - (unsigned char*)nhp;
998 		} endfor_nexthops(fi);
999 		mp_head->rta_type = RTA_MULTIPATH;
1000 		mp_head->rta_len = skb->tail - (u8*)mp_head;
1001 	}
1002 #endif
1003 	nlh->nlmsg_len = skb->tail - b;
1004 	return skb->len;
1005 
1006 nlmsg_failure:
1007 rtattr_failure:
1008 	skb_trim(skb, b - skb->data);
1009 	return -1;
1010 }
1011 
1012 #ifndef CONFIG_IP_NOSIOCRT
1013 
1014 int
1015 fib_convert_rtentry(int cmd, struct nlmsghdr *nl, struct rtmsg *rtm,
1016 		    struct kern_rta *rta, struct rtentry *r)
1017 {
1018 	int    plen;
1019 	u32    *ptr;
1020 
1021 	memset(rtm, 0, sizeof(*rtm));
1022 	memset(rta, 0, sizeof(*rta));
1023 
1024 	if (r->rt_dst.sa_family != AF_INET)
1025 		return -EAFNOSUPPORT;
1026 
1027 	/* Check mask for validity:
1028 	   a) it must be contiguous.
1029 	   b) destination must have all host bits clear.
1030 	   c) if application forgot to set correct family (AF_INET),
1031 	      reject request unless it is absolutely clear i.e.
1032 	      both family and mask are zero.
1033 	 */
1034 	plen = 32;
1035 	ptr = &((struct sockaddr_in*)&r->rt_dst)->sin_addr.s_addr;
1036 	if (!(r->rt_flags&RTF_HOST)) {
1037 		u32 mask = ((struct sockaddr_in*)&r->rt_genmask)->sin_addr.s_addr;
1038 		if (r->rt_genmask.sa_family != AF_INET) {
1039 			if (mask || r->rt_genmask.sa_family)
1040 				return -EAFNOSUPPORT;
1041 		}
1042 		if (bad_mask(mask, *ptr))
1043 			return -EINVAL;
1044 		plen = inet_mask_len(mask);
1045 	}
1046 
1047 	nl->nlmsg_flags = NLM_F_REQUEST;
1048 	nl->nlmsg_pid = 0;
1049 	nl->nlmsg_seq = 0;
1050 	nl->nlmsg_len = NLMSG_LENGTH(sizeof(*rtm));
1051 	if (cmd == SIOCDELRT) {
1052 		nl->nlmsg_type = RTM_DELROUTE;
1053 		nl->nlmsg_flags = 0;
1054 	} else {
1055 		nl->nlmsg_type = RTM_NEWROUTE;
1056 		nl->nlmsg_flags = NLM_F_REQUEST|NLM_F_CREATE;
1057 		rtm->rtm_protocol = RTPROT_BOOT;
1058 	}
1059 
1060 	rtm->rtm_dst_len = plen;
1061 	rta->rta_dst = ptr;
1062 
1063 	if (r->rt_metric) {
1064 		*(u32*)&r->rt_pad3 = r->rt_metric - 1;
1065 		rta->rta_priority = (u32*)&r->rt_pad3;
1066 	}
1067 	if (r->rt_flags&RTF_REJECT) {
1068 		rtm->rtm_scope = RT_SCOPE_HOST;
1069 		rtm->rtm_type = RTN_UNREACHABLE;
1070 		return 0;
1071 	}
1072 	rtm->rtm_scope = RT_SCOPE_NOWHERE;
1073 	rtm->rtm_type = RTN_UNICAST;
1074 
1075 	if (r->rt_dev) {
1076 		char *colon;
1077 		struct net_device *dev;
1078 		char   devname[IFNAMSIZ];
1079 
1080 		if (copy_from_user(devname, r->rt_dev, IFNAMSIZ-1))
1081 			return -EFAULT;
1082 		devname[IFNAMSIZ-1] = 0;
1083 		colon = strchr(devname, ':');
1084 		if (colon)
1085 			*colon = 0;
1086 		dev = __dev_get_by_name(devname);
1087 		if (!dev)
1088 			return -ENODEV;
1089 		rta->rta_oif = &dev->ifindex;
1090 		if (colon) {
1091 			struct in_ifaddr *ifa;
1092 			struct in_device *in_dev = __in_dev_get_rtnl(dev);
1093 			if (!in_dev)
1094 				return -ENODEV;
1095 			*colon = ':';
1096 			for (ifa = in_dev->ifa_list; ifa; ifa = ifa->ifa_next)
1097 				if (strcmp(ifa->ifa_label, devname) == 0)
1098 					break;
1099 			if (ifa == NULL)
1100 				return -ENODEV;
1101 			rta->rta_prefsrc = &ifa->ifa_local;
1102 		}
1103 	}
1104 
1105 	ptr = &((struct sockaddr_in*)&r->rt_gateway)->sin_addr.s_addr;
1106 	if (r->rt_gateway.sa_family == AF_INET && *ptr) {
1107 		rta->rta_gw = ptr;
1108 		if (r->rt_flags&RTF_GATEWAY && inet_addr_type(*ptr) == RTN_UNICAST)
1109 			rtm->rtm_scope = RT_SCOPE_UNIVERSE;
1110 	}
1111 
1112 	if (cmd == SIOCDELRT)
1113 		return 0;
1114 
1115 	if (r->rt_flags&RTF_GATEWAY && rta->rta_gw == NULL)
1116 		return -EINVAL;
1117 
1118 	if (rtm->rtm_scope == RT_SCOPE_NOWHERE)
1119 		rtm->rtm_scope = RT_SCOPE_LINK;
1120 
1121 	if (r->rt_flags&(RTF_MTU|RTF_WINDOW|RTF_IRTT)) {
1122 		struct rtattr *rec;
1123 		struct rtattr *mx = kmalloc(RTA_LENGTH(3*RTA_LENGTH(4)), GFP_KERNEL);
1124 		if (mx == NULL)
1125 			return -ENOMEM;
1126 		rta->rta_mx = mx;
1127 		mx->rta_type = RTA_METRICS;
1128 		mx->rta_len  = RTA_LENGTH(0);
1129 		if (r->rt_flags&RTF_MTU) {
1130 			rec = (void*)((char*)mx + RTA_ALIGN(mx->rta_len));
1131 			rec->rta_type = RTAX_ADVMSS;
1132 			rec->rta_len = RTA_LENGTH(4);
1133 			mx->rta_len += RTA_LENGTH(4);
1134 			*(u32*)RTA_DATA(rec) = r->rt_mtu - 40;
1135 		}
1136 		if (r->rt_flags&RTF_WINDOW) {
1137 			rec = (void*)((char*)mx + RTA_ALIGN(mx->rta_len));
1138 			rec->rta_type = RTAX_WINDOW;
1139 			rec->rta_len = RTA_LENGTH(4);
1140 			mx->rta_len += RTA_LENGTH(4);
1141 			*(u32*)RTA_DATA(rec) = r->rt_window;
1142 		}
1143 		if (r->rt_flags&RTF_IRTT) {
1144 			rec = (void*)((char*)mx + RTA_ALIGN(mx->rta_len));
1145 			rec->rta_type = RTAX_RTT;
1146 			rec->rta_len = RTA_LENGTH(4);
1147 			mx->rta_len += RTA_LENGTH(4);
1148 			*(u32*)RTA_DATA(rec) = r->rt_irtt<<3;
1149 		}
1150 	}
1151 	return 0;
1152 }
1153 
1154 #endif
1155 
1156 /*
1157    Update FIB if:
1158    - local address disappeared -> we must delete all the entries
1159      referring to it.
1160    - device went down -> we must shutdown all nexthops going via it.
1161  */
1162 
1163 int fib_sync_down(u32 local, struct net_device *dev, int force)
1164 {
1165 	int ret = 0;
1166 	int scope = RT_SCOPE_NOWHERE;
1167 
1168 	if (force)
1169 		scope = -1;
1170 
1171 	if (local && fib_info_laddrhash) {
1172 		unsigned int hash = fib_laddr_hashfn(local);
1173 		struct hlist_head *head = &fib_info_laddrhash[hash];
1174 		struct hlist_node *node;
1175 		struct fib_info *fi;
1176 
1177 		hlist_for_each_entry(fi, node, head, fib_lhash) {
1178 			if (fi->fib_prefsrc == local) {
1179 				fi->fib_flags |= RTNH_F_DEAD;
1180 				ret++;
1181 			}
1182 		}
1183 	}
1184 
1185 	if (dev) {
1186 		struct fib_info *prev_fi = NULL;
1187 		unsigned int hash = fib_devindex_hashfn(dev->ifindex);
1188 		struct hlist_head *head = &fib_info_devhash[hash];
1189 		struct hlist_node *node;
1190 		struct fib_nh *nh;
1191 
1192 		hlist_for_each_entry(nh, node, head, nh_hash) {
1193 			struct fib_info *fi = nh->nh_parent;
1194 			int dead;
1195 
1196 			BUG_ON(!fi->fib_nhs);
1197 			if (nh->nh_dev != dev || fi == prev_fi)
1198 				continue;
1199 			prev_fi = fi;
1200 			dead = 0;
1201 			change_nexthops(fi) {
1202 				if (nh->nh_flags&RTNH_F_DEAD)
1203 					dead++;
1204 				else if (nh->nh_dev == dev &&
1205 					 nh->nh_scope != scope) {
1206 					nh->nh_flags |= RTNH_F_DEAD;
1207 #ifdef CONFIG_IP_ROUTE_MULTIPATH
1208 					spin_lock_bh(&fib_multipath_lock);
1209 					fi->fib_power -= nh->nh_power;
1210 					nh->nh_power = 0;
1211 					spin_unlock_bh(&fib_multipath_lock);
1212 #endif
1213 					dead++;
1214 				}
1215 #ifdef CONFIG_IP_ROUTE_MULTIPATH
1216 				if (force > 1 && nh->nh_dev == dev) {
1217 					dead = fi->fib_nhs;
1218 					break;
1219 				}
1220 #endif
1221 			} endfor_nexthops(fi)
1222 			if (dead == fi->fib_nhs) {
1223 				fi->fib_flags |= RTNH_F_DEAD;
1224 				ret++;
1225 			}
1226 		}
1227 	}
1228 
1229 	return ret;
1230 }
1231 
1232 #ifdef CONFIG_IP_ROUTE_MULTIPATH
1233 
1234 /*
1235    Dead device goes up. We wake up dead nexthops.
1236    It takes sense only on multipath routes.
1237  */
1238 
1239 int fib_sync_up(struct net_device *dev)
1240 {
1241 	struct fib_info *prev_fi;
1242 	unsigned int hash;
1243 	struct hlist_head *head;
1244 	struct hlist_node *node;
1245 	struct fib_nh *nh;
1246 	int ret;
1247 
1248 	if (!(dev->flags&IFF_UP))
1249 		return 0;
1250 
1251 	prev_fi = NULL;
1252 	hash = fib_devindex_hashfn(dev->ifindex);
1253 	head = &fib_info_devhash[hash];
1254 	ret = 0;
1255 
1256 	hlist_for_each_entry(nh, node, head, nh_hash) {
1257 		struct fib_info *fi = nh->nh_parent;
1258 		int alive;
1259 
1260 		BUG_ON(!fi->fib_nhs);
1261 		if (nh->nh_dev != dev || fi == prev_fi)
1262 			continue;
1263 
1264 		prev_fi = fi;
1265 		alive = 0;
1266 		change_nexthops(fi) {
1267 			if (!(nh->nh_flags&RTNH_F_DEAD)) {
1268 				alive++;
1269 				continue;
1270 			}
1271 			if (nh->nh_dev == NULL || !(nh->nh_dev->flags&IFF_UP))
1272 				continue;
1273 			if (nh->nh_dev != dev || !__in_dev_get_rtnl(dev))
1274 				continue;
1275 			alive++;
1276 			spin_lock_bh(&fib_multipath_lock);
1277 			nh->nh_power = 0;
1278 			nh->nh_flags &= ~RTNH_F_DEAD;
1279 			spin_unlock_bh(&fib_multipath_lock);
1280 		} endfor_nexthops(fi)
1281 
1282 		if (alive > 0) {
1283 			fi->fib_flags &= ~RTNH_F_DEAD;
1284 			ret++;
1285 		}
1286 	}
1287 
1288 	return ret;
1289 }
1290 
1291 /*
1292    The algorithm is suboptimal, but it provides really
1293    fair weighted route distribution.
1294  */
1295 
1296 void fib_select_multipath(const struct flowi *flp, struct fib_result *res)
1297 {
1298 	struct fib_info *fi = res->fi;
1299 	int w;
1300 
1301 	spin_lock_bh(&fib_multipath_lock);
1302 	if (fi->fib_power <= 0) {
1303 		int power = 0;
1304 		change_nexthops(fi) {
1305 			if (!(nh->nh_flags&RTNH_F_DEAD)) {
1306 				power += nh->nh_weight;
1307 				nh->nh_power = nh->nh_weight;
1308 			}
1309 		} endfor_nexthops(fi);
1310 		fi->fib_power = power;
1311 		if (power <= 0) {
1312 			spin_unlock_bh(&fib_multipath_lock);
1313 			/* Race condition: route has just become dead. */
1314 			res->nh_sel = 0;
1315 			return;
1316 		}
1317 	}
1318 
1319 
1320 	/* w should be random number [0..fi->fib_power-1],
1321 	   it is pretty bad approximation.
1322 	 */
1323 
1324 	w = jiffies % fi->fib_power;
1325 
1326 	change_nexthops(fi) {
1327 		if (!(nh->nh_flags&RTNH_F_DEAD) && nh->nh_power) {
1328 			if ((w -= nh->nh_power) <= 0) {
1329 				nh->nh_power--;
1330 				fi->fib_power--;
1331 				res->nh_sel = nhsel;
1332 				spin_unlock_bh(&fib_multipath_lock);
1333 				return;
1334 			}
1335 		}
1336 	} endfor_nexthops(fi);
1337 
1338 	/* Race condition: route has just become dead. */
1339 	res->nh_sel = 0;
1340 	spin_unlock_bh(&fib_multipath_lock);
1341 }
1342 #endif
1343