xref: /linux/net/ipv4/fib_semantics.c (revision 14b42963f64b98ab61fa9723c03d71aa5ef4f862)
1 /*
2  * INET		An implementation of the TCP/IP protocol suite for the LINUX
3  *		operating system.  INET is implemented using the  BSD Socket
4  *		interface as the means of communication with the user level.
5  *
6  *		IPv4 Forwarding Information Base: semantics.
7  *
8  * Version:	$Id: fib_semantics.c,v 1.19 2002/01/12 07:54:56 davem Exp $
9  *
10  * Authors:	Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
11  *
12  *		This program is free software; you can redistribute it and/or
13  *		modify it under the terms of the GNU General Public License
14  *		as published by the Free Software Foundation; either version
15  *		2 of the License, or (at your option) any later version.
16  */
17 
18 #include <asm/uaccess.h>
19 #include <asm/system.h>
20 #include <linux/bitops.h>
21 #include <linux/types.h>
22 #include <linux/kernel.h>
23 #include <linux/jiffies.h>
24 #include <linux/mm.h>
25 #include <linux/string.h>
26 #include <linux/socket.h>
27 #include <linux/sockios.h>
28 #include <linux/errno.h>
29 #include <linux/in.h>
30 #include <linux/inet.h>
31 #include <linux/inetdevice.h>
32 #include <linux/netdevice.h>
33 #include <linux/if_arp.h>
34 #include <linux/proc_fs.h>
35 #include <linux/skbuff.h>
36 #include <linux/netlink.h>
37 #include <linux/init.h>
38 
39 #include <net/arp.h>
40 #include <net/ip.h>
41 #include <net/protocol.h>
42 #include <net/route.h>
43 #include <net/tcp.h>
44 #include <net/sock.h>
45 #include <net/ip_fib.h>
46 #include <net/ip_mp_alg.h>
47 
48 #include "fib_lookup.h"
49 
50 #define FSprintk(a...)
51 
52 static DEFINE_RWLOCK(fib_info_lock);
53 static struct hlist_head *fib_info_hash;
54 static struct hlist_head *fib_info_laddrhash;
55 static unsigned int fib_hash_size;
56 static unsigned int fib_info_cnt;
57 
58 #define DEVINDEX_HASHBITS 8
59 #define DEVINDEX_HASHSIZE (1U << DEVINDEX_HASHBITS)
60 static struct hlist_head fib_info_devhash[DEVINDEX_HASHSIZE];
61 
62 #ifdef CONFIG_IP_ROUTE_MULTIPATH
63 
64 static DEFINE_SPINLOCK(fib_multipath_lock);
65 
66 #define for_nexthops(fi) { int nhsel; const struct fib_nh * nh; \
67 for (nhsel=0, nh = (fi)->fib_nh; nhsel < (fi)->fib_nhs; nh++, nhsel++)
68 
69 #define change_nexthops(fi) { int nhsel; struct fib_nh * nh; \
70 for (nhsel=0, nh = (struct fib_nh*)((fi)->fib_nh); nhsel < (fi)->fib_nhs; nh++, nhsel++)
71 
72 #else /* CONFIG_IP_ROUTE_MULTIPATH */
73 
74 /* Hope, that gcc will optimize it to get rid of dummy loop */
75 
76 #define for_nexthops(fi) { int nhsel=0; const struct fib_nh * nh = (fi)->fib_nh; \
77 for (nhsel=0; nhsel < 1; nhsel++)
78 
79 #define change_nexthops(fi) { int nhsel=0; struct fib_nh * nh = (struct fib_nh*)((fi)->fib_nh); \
80 for (nhsel=0; nhsel < 1; nhsel++)
81 
82 #endif /* CONFIG_IP_ROUTE_MULTIPATH */
83 
84 #define endfor_nexthops(fi) }
85 
86 
87 static const struct
88 {
89 	int	error;
90 	u8	scope;
91 } fib_props[RTA_MAX + 1] = {
92         {
93 		.error	= 0,
94 		.scope	= RT_SCOPE_NOWHERE,
95 	},	/* RTN_UNSPEC */
96 	{
97 		.error	= 0,
98 		.scope	= RT_SCOPE_UNIVERSE,
99 	},	/* RTN_UNICAST */
100 	{
101 		.error	= 0,
102 		.scope	= RT_SCOPE_HOST,
103 	},	/* RTN_LOCAL */
104 	{
105 		.error	= 0,
106 		.scope	= RT_SCOPE_LINK,
107 	},	/* RTN_BROADCAST */
108 	{
109 		.error	= 0,
110 		.scope	= RT_SCOPE_LINK,
111 	},	/* RTN_ANYCAST */
112 	{
113 		.error	= 0,
114 		.scope	= RT_SCOPE_UNIVERSE,
115 	},	/* RTN_MULTICAST */
116 	{
117 		.error	= -EINVAL,
118 		.scope	= RT_SCOPE_UNIVERSE,
119 	},	/* RTN_BLACKHOLE */
120 	{
121 		.error	= -EHOSTUNREACH,
122 		.scope	= RT_SCOPE_UNIVERSE,
123 	},	/* RTN_UNREACHABLE */
124 	{
125 		.error	= -EACCES,
126 		.scope	= RT_SCOPE_UNIVERSE,
127 	},	/* RTN_PROHIBIT */
128 	{
129 		.error	= -EAGAIN,
130 		.scope	= RT_SCOPE_UNIVERSE,
131 	},	/* RTN_THROW */
132 	{
133 		.error	= -EINVAL,
134 		.scope	= RT_SCOPE_NOWHERE,
135 	},	/* RTN_NAT */
136 	{
137 		.error	= -EINVAL,
138 		.scope	= RT_SCOPE_NOWHERE,
139 	},	/* RTN_XRESOLVE */
140 };
141 
142 
143 /* Release a nexthop info record */
144 
145 void free_fib_info(struct fib_info *fi)
146 {
147 	if (fi->fib_dead == 0) {
148 		printk("Freeing alive fib_info %p\n", fi);
149 		return;
150 	}
151 	change_nexthops(fi) {
152 		if (nh->nh_dev)
153 			dev_put(nh->nh_dev);
154 		nh->nh_dev = NULL;
155 	} endfor_nexthops(fi);
156 	fib_info_cnt--;
157 	kfree(fi);
158 }
159 
160 void fib_release_info(struct fib_info *fi)
161 {
162 	write_lock(&fib_info_lock);
163 	if (fi && --fi->fib_treeref == 0) {
164 		hlist_del(&fi->fib_hash);
165 		if (fi->fib_prefsrc)
166 			hlist_del(&fi->fib_lhash);
167 		change_nexthops(fi) {
168 			if (!nh->nh_dev)
169 				continue;
170 			hlist_del(&nh->nh_hash);
171 		} endfor_nexthops(fi)
172 		fi->fib_dead = 1;
173 		fib_info_put(fi);
174 	}
175 	write_unlock(&fib_info_lock);
176 }
177 
178 static __inline__ int nh_comp(const struct fib_info *fi, const struct fib_info *ofi)
179 {
180 	const struct fib_nh *onh = ofi->fib_nh;
181 
182 	for_nexthops(fi) {
183 		if (nh->nh_oif != onh->nh_oif ||
184 		    nh->nh_gw  != onh->nh_gw ||
185 		    nh->nh_scope != onh->nh_scope ||
186 #ifdef CONFIG_IP_ROUTE_MULTIPATH
187 		    nh->nh_weight != onh->nh_weight ||
188 #endif
189 #ifdef CONFIG_NET_CLS_ROUTE
190 		    nh->nh_tclassid != onh->nh_tclassid ||
191 #endif
192 		    ((nh->nh_flags^onh->nh_flags)&~RTNH_F_DEAD))
193 			return -1;
194 		onh++;
195 	} endfor_nexthops(fi);
196 	return 0;
197 }
198 
199 static inline unsigned int fib_info_hashfn(const struct fib_info *fi)
200 {
201 	unsigned int mask = (fib_hash_size - 1);
202 	unsigned int val = fi->fib_nhs;
203 
204 	val ^= fi->fib_protocol;
205 	val ^= fi->fib_prefsrc;
206 	val ^= fi->fib_priority;
207 
208 	return (val ^ (val >> 7) ^ (val >> 12)) & mask;
209 }
210 
211 static struct fib_info *fib_find_info(const struct fib_info *nfi)
212 {
213 	struct hlist_head *head;
214 	struct hlist_node *node;
215 	struct fib_info *fi;
216 	unsigned int hash;
217 
218 	hash = fib_info_hashfn(nfi);
219 	head = &fib_info_hash[hash];
220 
221 	hlist_for_each_entry(fi, node, head, fib_hash) {
222 		if (fi->fib_nhs != nfi->fib_nhs)
223 			continue;
224 		if (nfi->fib_protocol == fi->fib_protocol &&
225 		    nfi->fib_prefsrc == fi->fib_prefsrc &&
226 		    nfi->fib_priority == fi->fib_priority &&
227 		    memcmp(nfi->fib_metrics, fi->fib_metrics,
228 			   sizeof(fi->fib_metrics)) == 0 &&
229 		    ((nfi->fib_flags^fi->fib_flags)&~RTNH_F_DEAD) == 0 &&
230 		    (nfi->fib_nhs == 0 || nh_comp(fi, nfi) == 0))
231 			return fi;
232 	}
233 
234 	return NULL;
235 }
236 
237 static inline unsigned int fib_devindex_hashfn(unsigned int val)
238 {
239 	unsigned int mask = DEVINDEX_HASHSIZE - 1;
240 
241 	return (val ^
242 		(val >> DEVINDEX_HASHBITS) ^
243 		(val >> (DEVINDEX_HASHBITS * 2))) & mask;
244 }
245 
246 /* Check, that the gateway is already configured.
247    Used only by redirect accept routine.
248  */
249 
250 int ip_fib_check_default(u32 gw, struct net_device *dev)
251 {
252 	struct hlist_head *head;
253 	struct hlist_node *node;
254 	struct fib_nh *nh;
255 	unsigned int hash;
256 
257 	read_lock(&fib_info_lock);
258 
259 	hash = fib_devindex_hashfn(dev->ifindex);
260 	head = &fib_info_devhash[hash];
261 	hlist_for_each_entry(nh, node, head, nh_hash) {
262 		if (nh->nh_dev == dev &&
263 		    nh->nh_gw == gw &&
264 		    !(nh->nh_flags&RTNH_F_DEAD)) {
265 			read_unlock(&fib_info_lock);
266 			return 0;
267 		}
268 	}
269 
270 	read_unlock(&fib_info_lock);
271 
272 	return -1;
273 }
274 
275 void rtmsg_fib(int event, u32 key, struct fib_alias *fa,
276 	       int z, int tb_id,
277 	       struct nlmsghdr *n, struct netlink_skb_parms *req)
278 {
279 	struct sk_buff *skb;
280 	u32 pid = req ? req->pid : n->nlmsg_pid;
281 	int size = NLMSG_SPACE(sizeof(struct rtmsg)+256);
282 
283 	skb = alloc_skb(size, GFP_KERNEL);
284 	if (!skb)
285 		return;
286 
287 	if (fib_dump_info(skb, pid, n->nlmsg_seq, event, tb_id,
288 			  fa->fa_type, fa->fa_scope, &key, z,
289 			  fa->fa_tos,
290 			  fa->fa_info, 0) < 0) {
291 		kfree_skb(skb);
292 		return;
293 	}
294 	NETLINK_CB(skb).dst_group = RTNLGRP_IPV4_ROUTE;
295 	if (n->nlmsg_flags&NLM_F_ECHO)
296 		atomic_inc(&skb->users);
297 	netlink_broadcast(rtnl, skb, pid, RTNLGRP_IPV4_ROUTE, GFP_KERNEL);
298 	if (n->nlmsg_flags&NLM_F_ECHO)
299 		netlink_unicast(rtnl, skb, pid, MSG_DONTWAIT);
300 }
301 
302 /* Return the first fib alias matching TOS with
303  * priority less than or equal to PRIO.
304  */
305 struct fib_alias *fib_find_alias(struct list_head *fah, u8 tos, u32 prio)
306 {
307 	if (fah) {
308 		struct fib_alias *fa;
309 		list_for_each_entry(fa, fah, fa_list) {
310 			if (fa->fa_tos > tos)
311 				continue;
312 			if (fa->fa_info->fib_priority >= prio ||
313 			    fa->fa_tos < tos)
314 				return fa;
315 		}
316 	}
317 	return NULL;
318 }
319 
320 int fib_detect_death(struct fib_info *fi, int order,
321 		     struct fib_info **last_resort, int *last_idx, int *dflt)
322 {
323 	struct neighbour *n;
324 	int state = NUD_NONE;
325 
326 	n = neigh_lookup(&arp_tbl, &fi->fib_nh[0].nh_gw, fi->fib_dev);
327 	if (n) {
328 		state = n->nud_state;
329 		neigh_release(n);
330 	}
331 	if (state==NUD_REACHABLE)
332 		return 0;
333 	if ((state&NUD_VALID) && order != *dflt)
334 		return 0;
335 	if ((state&NUD_VALID) ||
336 	    (*last_idx<0 && order > *dflt)) {
337 		*last_resort = fi;
338 		*last_idx = order;
339 	}
340 	return 1;
341 }
342 
343 #ifdef CONFIG_IP_ROUTE_MULTIPATH
344 
345 static u32 fib_get_attr32(struct rtattr *attr, int attrlen, int type)
346 {
347 	while (RTA_OK(attr,attrlen)) {
348 		if (attr->rta_type == type)
349 			return *(u32*)RTA_DATA(attr);
350 		attr = RTA_NEXT(attr, attrlen);
351 	}
352 	return 0;
353 }
354 
355 static int
356 fib_count_nexthops(struct rtattr *rta)
357 {
358 	int nhs = 0;
359 	struct rtnexthop *nhp = RTA_DATA(rta);
360 	int nhlen = RTA_PAYLOAD(rta);
361 
362 	while (nhlen >= (int)sizeof(struct rtnexthop)) {
363 		if ((nhlen -= nhp->rtnh_len) < 0)
364 			return 0;
365 		nhs++;
366 		nhp = RTNH_NEXT(nhp);
367 	};
368 	return nhs;
369 }
370 
371 static int
372 fib_get_nhs(struct fib_info *fi, const struct rtattr *rta, const struct rtmsg *r)
373 {
374 	struct rtnexthop *nhp = RTA_DATA(rta);
375 	int nhlen = RTA_PAYLOAD(rta);
376 
377 	change_nexthops(fi) {
378 		int attrlen = nhlen - sizeof(struct rtnexthop);
379 		if (attrlen < 0 || (nhlen -= nhp->rtnh_len) < 0)
380 			return -EINVAL;
381 		nh->nh_flags = (r->rtm_flags&~0xFF) | nhp->rtnh_flags;
382 		nh->nh_oif = nhp->rtnh_ifindex;
383 		nh->nh_weight = nhp->rtnh_hops + 1;
384 		if (attrlen) {
385 			nh->nh_gw = fib_get_attr32(RTNH_DATA(nhp), attrlen, RTA_GATEWAY);
386 #ifdef CONFIG_NET_CLS_ROUTE
387 			nh->nh_tclassid = fib_get_attr32(RTNH_DATA(nhp), attrlen, RTA_FLOW);
388 #endif
389 		}
390 		nhp = RTNH_NEXT(nhp);
391 	} endfor_nexthops(fi);
392 	return 0;
393 }
394 
395 #endif
396 
397 int fib_nh_match(struct rtmsg *r, struct nlmsghdr *nlh, struct kern_rta *rta,
398 		 struct fib_info *fi)
399 {
400 #ifdef CONFIG_IP_ROUTE_MULTIPATH
401 	struct rtnexthop *nhp;
402 	int nhlen;
403 #endif
404 
405 	if (rta->rta_priority &&
406 	    *rta->rta_priority != fi->fib_priority)
407 		return 1;
408 
409 	if (rta->rta_oif || rta->rta_gw) {
410 		if ((!rta->rta_oif || *rta->rta_oif == fi->fib_nh->nh_oif) &&
411 		    (!rta->rta_gw  || memcmp(rta->rta_gw, &fi->fib_nh->nh_gw, 4) == 0))
412 			return 0;
413 		return 1;
414 	}
415 
416 #ifdef CONFIG_IP_ROUTE_MULTIPATH
417 	if (rta->rta_mp == NULL)
418 		return 0;
419 	nhp = RTA_DATA(rta->rta_mp);
420 	nhlen = RTA_PAYLOAD(rta->rta_mp);
421 
422 	for_nexthops(fi) {
423 		int attrlen = nhlen - sizeof(struct rtnexthop);
424 		u32 gw;
425 
426 		if (attrlen < 0 || (nhlen -= nhp->rtnh_len) < 0)
427 			return -EINVAL;
428 		if (nhp->rtnh_ifindex && nhp->rtnh_ifindex != nh->nh_oif)
429 			return 1;
430 		if (attrlen) {
431 			gw = fib_get_attr32(RTNH_DATA(nhp), attrlen, RTA_GATEWAY);
432 			if (gw && gw != nh->nh_gw)
433 				return 1;
434 #ifdef CONFIG_NET_CLS_ROUTE
435 			gw = fib_get_attr32(RTNH_DATA(nhp), attrlen, RTA_FLOW);
436 			if (gw && gw != nh->nh_tclassid)
437 				return 1;
438 #endif
439 		}
440 		nhp = RTNH_NEXT(nhp);
441 	} endfor_nexthops(fi);
442 #endif
443 	return 0;
444 }
445 
446 
447 /*
448    Picture
449    -------
450 
451    Semantics of nexthop is very messy by historical reasons.
452    We have to take into account, that:
453    a) gateway can be actually local interface address,
454       so that gatewayed route is direct.
455    b) gateway must be on-link address, possibly
456       described not by an ifaddr, but also by a direct route.
457    c) If both gateway and interface are specified, they should not
458       contradict.
459    d) If we use tunnel routes, gateway could be not on-link.
460 
461    Attempt to reconcile all of these (alas, self-contradictory) conditions
462    results in pretty ugly and hairy code with obscure logic.
463 
464    I chose to generalized it instead, so that the size
465    of code does not increase practically, but it becomes
466    much more general.
467    Every prefix is assigned a "scope" value: "host" is local address,
468    "link" is direct route,
469    [ ... "site" ... "interior" ... ]
470    and "universe" is true gateway route with global meaning.
471 
472    Every prefix refers to a set of "nexthop"s (gw, oif),
473    where gw must have narrower scope. This recursion stops
474    when gw has LOCAL scope or if "nexthop" is declared ONLINK,
475    which means that gw is forced to be on link.
476 
477    Code is still hairy, but now it is apparently logically
478    consistent and very flexible. F.e. as by-product it allows
479    to co-exists in peace independent exterior and interior
480    routing processes.
481 
482    Normally it looks as following.
483 
484    {universe prefix}  -> (gw, oif) [scope link]
485                           |
486 			  |-> {link prefix} -> (gw, oif) [scope local]
487 			                        |
488 						|-> {local prefix} (terminal node)
489  */
490 
491 static int fib_check_nh(const struct rtmsg *r, struct fib_info *fi, struct fib_nh *nh)
492 {
493 	int err;
494 
495 	if (nh->nh_gw) {
496 		struct fib_result res;
497 
498 #ifdef CONFIG_IP_ROUTE_PERVASIVE
499 		if (nh->nh_flags&RTNH_F_PERVASIVE)
500 			return 0;
501 #endif
502 		if (nh->nh_flags&RTNH_F_ONLINK) {
503 			struct net_device *dev;
504 
505 			if (r->rtm_scope >= RT_SCOPE_LINK)
506 				return -EINVAL;
507 			if (inet_addr_type(nh->nh_gw) != RTN_UNICAST)
508 				return -EINVAL;
509 			if ((dev = __dev_get_by_index(nh->nh_oif)) == NULL)
510 				return -ENODEV;
511 			if (!(dev->flags&IFF_UP))
512 				return -ENETDOWN;
513 			nh->nh_dev = dev;
514 			dev_hold(dev);
515 			nh->nh_scope = RT_SCOPE_LINK;
516 			return 0;
517 		}
518 		{
519 			struct flowi fl = { .nl_u = { .ip4_u =
520 						      { .daddr = nh->nh_gw,
521 							.scope = r->rtm_scope + 1 } },
522 					    .oif = nh->nh_oif };
523 
524 			/* It is not necessary, but requires a bit of thinking */
525 			if (fl.fl4_scope < RT_SCOPE_LINK)
526 				fl.fl4_scope = RT_SCOPE_LINK;
527 			if ((err = fib_lookup(&fl, &res)) != 0)
528 				return err;
529 		}
530 		err = -EINVAL;
531 		if (res.type != RTN_UNICAST && res.type != RTN_LOCAL)
532 			goto out;
533 		nh->nh_scope = res.scope;
534 		nh->nh_oif = FIB_RES_OIF(res);
535 		if ((nh->nh_dev = FIB_RES_DEV(res)) == NULL)
536 			goto out;
537 		dev_hold(nh->nh_dev);
538 		err = -ENETDOWN;
539 		if (!(nh->nh_dev->flags & IFF_UP))
540 			goto out;
541 		err = 0;
542 out:
543 		fib_res_put(&res);
544 		return err;
545 	} else {
546 		struct in_device *in_dev;
547 
548 		if (nh->nh_flags&(RTNH_F_PERVASIVE|RTNH_F_ONLINK))
549 			return -EINVAL;
550 
551 		in_dev = inetdev_by_index(nh->nh_oif);
552 		if (in_dev == NULL)
553 			return -ENODEV;
554 		if (!(in_dev->dev->flags&IFF_UP)) {
555 			in_dev_put(in_dev);
556 			return -ENETDOWN;
557 		}
558 		nh->nh_dev = in_dev->dev;
559 		dev_hold(nh->nh_dev);
560 		nh->nh_scope = RT_SCOPE_HOST;
561 		in_dev_put(in_dev);
562 	}
563 	return 0;
564 }
565 
566 static inline unsigned int fib_laddr_hashfn(u32 val)
567 {
568 	unsigned int mask = (fib_hash_size - 1);
569 
570 	return (val ^ (val >> 7) ^ (val >> 14)) & mask;
571 }
572 
573 static struct hlist_head *fib_hash_alloc(int bytes)
574 {
575 	if (bytes <= PAGE_SIZE)
576 		return kmalloc(bytes, GFP_KERNEL);
577 	else
578 		return (struct hlist_head *)
579 			__get_free_pages(GFP_KERNEL, get_order(bytes));
580 }
581 
582 static void fib_hash_free(struct hlist_head *hash, int bytes)
583 {
584 	if (!hash)
585 		return;
586 
587 	if (bytes <= PAGE_SIZE)
588 		kfree(hash);
589 	else
590 		free_pages((unsigned long) hash, get_order(bytes));
591 }
592 
593 static void fib_hash_move(struct hlist_head *new_info_hash,
594 			  struct hlist_head *new_laddrhash,
595 			  unsigned int new_size)
596 {
597 	struct hlist_head *old_info_hash, *old_laddrhash;
598 	unsigned int old_size = fib_hash_size;
599 	unsigned int i, bytes;
600 
601 	write_lock(&fib_info_lock);
602 	old_info_hash = fib_info_hash;
603 	old_laddrhash = fib_info_laddrhash;
604 	fib_hash_size = new_size;
605 
606 	for (i = 0; i < old_size; i++) {
607 		struct hlist_head *head = &fib_info_hash[i];
608 		struct hlist_node *node, *n;
609 		struct fib_info *fi;
610 
611 		hlist_for_each_entry_safe(fi, node, n, head, fib_hash) {
612 			struct hlist_head *dest;
613 			unsigned int new_hash;
614 
615 			hlist_del(&fi->fib_hash);
616 
617 			new_hash = fib_info_hashfn(fi);
618 			dest = &new_info_hash[new_hash];
619 			hlist_add_head(&fi->fib_hash, dest);
620 		}
621 	}
622 	fib_info_hash = new_info_hash;
623 
624 	for (i = 0; i < old_size; i++) {
625 		struct hlist_head *lhead = &fib_info_laddrhash[i];
626 		struct hlist_node *node, *n;
627 		struct fib_info *fi;
628 
629 		hlist_for_each_entry_safe(fi, node, n, lhead, fib_lhash) {
630 			struct hlist_head *ldest;
631 			unsigned int new_hash;
632 
633 			hlist_del(&fi->fib_lhash);
634 
635 			new_hash = fib_laddr_hashfn(fi->fib_prefsrc);
636 			ldest = &new_laddrhash[new_hash];
637 			hlist_add_head(&fi->fib_lhash, ldest);
638 		}
639 	}
640 	fib_info_laddrhash = new_laddrhash;
641 
642 	write_unlock(&fib_info_lock);
643 
644 	bytes = old_size * sizeof(struct hlist_head *);
645 	fib_hash_free(old_info_hash, bytes);
646 	fib_hash_free(old_laddrhash, bytes);
647 }
648 
649 struct fib_info *
650 fib_create_info(const struct rtmsg *r, struct kern_rta *rta,
651 		const struct nlmsghdr *nlh, int *errp)
652 {
653 	int err;
654 	struct fib_info *fi = NULL;
655 	struct fib_info *ofi;
656 #ifdef CONFIG_IP_ROUTE_MULTIPATH
657 	int nhs = 1;
658 #else
659 	const int nhs = 1;
660 #endif
661 #ifdef CONFIG_IP_ROUTE_MULTIPATH_CACHED
662 	u32 mp_alg = IP_MP_ALG_NONE;
663 #endif
664 
665 	/* Fast check to catch the most weird cases */
666 	if (fib_props[r->rtm_type].scope > r->rtm_scope)
667 		goto err_inval;
668 
669 #ifdef CONFIG_IP_ROUTE_MULTIPATH
670 	if (rta->rta_mp) {
671 		nhs = fib_count_nexthops(rta->rta_mp);
672 		if (nhs == 0)
673 			goto err_inval;
674 	}
675 #endif
676 #ifdef CONFIG_IP_ROUTE_MULTIPATH_CACHED
677 	if (rta->rta_mp_alg) {
678 		mp_alg = *rta->rta_mp_alg;
679 
680 		if (mp_alg < IP_MP_ALG_NONE ||
681 		    mp_alg > IP_MP_ALG_MAX)
682 			goto err_inval;
683 	}
684 #endif
685 
686 	err = -ENOBUFS;
687 	if (fib_info_cnt >= fib_hash_size) {
688 		unsigned int new_size = fib_hash_size << 1;
689 		struct hlist_head *new_info_hash;
690 		struct hlist_head *new_laddrhash;
691 		unsigned int bytes;
692 
693 		if (!new_size)
694 			new_size = 1;
695 		bytes = new_size * sizeof(struct hlist_head *);
696 		new_info_hash = fib_hash_alloc(bytes);
697 		new_laddrhash = fib_hash_alloc(bytes);
698 		if (!new_info_hash || !new_laddrhash) {
699 			fib_hash_free(new_info_hash, bytes);
700 			fib_hash_free(new_laddrhash, bytes);
701 		} else {
702 			memset(new_info_hash, 0, bytes);
703 			memset(new_laddrhash, 0, bytes);
704 
705 			fib_hash_move(new_info_hash, new_laddrhash, new_size);
706 		}
707 
708 		if (!fib_hash_size)
709 			goto failure;
710 	}
711 
712 	fi = kmalloc(sizeof(*fi)+nhs*sizeof(struct fib_nh), GFP_KERNEL);
713 	if (fi == NULL)
714 		goto failure;
715 	fib_info_cnt++;
716 	memset(fi, 0, sizeof(*fi)+nhs*sizeof(struct fib_nh));
717 
718 	fi->fib_protocol = r->rtm_protocol;
719 
720 	fi->fib_nhs = nhs;
721 	change_nexthops(fi) {
722 		nh->nh_parent = fi;
723 	} endfor_nexthops(fi)
724 
725 	fi->fib_flags = r->rtm_flags;
726 	if (rta->rta_priority)
727 		fi->fib_priority = *rta->rta_priority;
728 	if (rta->rta_mx) {
729 		int attrlen = RTA_PAYLOAD(rta->rta_mx);
730 		struct rtattr *attr = RTA_DATA(rta->rta_mx);
731 
732 		while (RTA_OK(attr, attrlen)) {
733 			unsigned flavor = attr->rta_type;
734 			if (flavor) {
735 				if (flavor > RTAX_MAX)
736 					goto err_inval;
737 				fi->fib_metrics[flavor-1] = *(unsigned*)RTA_DATA(attr);
738 			}
739 			attr = RTA_NEXT(attr, attrlen);
740 		}
741 	}
742 	if (rta->rta_prefsrc)
743 		memcpy(&fi->fib_prefsrc, rta->rta_prefsrc, 4);
744 
745 	if (rta->rta_mp) {
746 #ifdef CONFIG_IP_ROUTE_MULTIPATH
747 		if ((err = fib_get_nhs(fi, rta->rta_mp, r)) != 0)
748 			goto failure;
749 		if (rta->rta_oif && fi->fib_nh->nh_oif != *rta->rta_oif)
750 			goto err_inval;
751 		if (rta->rta_gw && memcmp(&fi->fib_nh->nh_gw, rta->rta_gw, 4))
752 			goto err_inval;
753 #ifdef CONFIG_NET_CLS_ROUTE
754 		if (rta->rta_flow && memcmp(&fi->fib_nh->nh_tclassid, rta->rta_flow, 4))
755 			goto err_inval;
756 #endif
757 #else
758 		goto err_inval;
759 #endif
760 	} else {
761 		struct fib_nh *nh = fi->fib_nh;
762 		if (rta->rta_oif)
763 			nh->nh_oif = *rta->rta_oif;
764 		if (rta->rta_gw)
765 			memcpy(&nh->nh_gw, rta->rta_gw, 4);
766 #ifdef CONFIG_NET_CLS_ROUTE
767 		if (rta->rta_flow)
768 			memcpy(&nh->nh_tclassid, rta->rta_flow, 4);
769 #endif
770 		nh->nh_flags = r->rtm_flags;
771 #ifdef CONFIG_IP_ROUTE_MULTIPATH
772 		nh->nh_weight = 1;
773 #endif
774 	}
775 
776 #ifdef CONFIG_IP_ROUTE_MULTIPATH_CACHED
777 	fi->fib_mp_alg = mp_alg;
778 #endif
779 
780 	if (fib_props[r->rtm_type].error) {
781 		if (rta->rta_gw || rta->rta_oif || rta->rta_mp)
782 			goto err_inval;
783 		goto link_it;
784 	}
785 
786 	if (r->rtm_scope > RT_SCOPE_HOST)
787 		goto err_inval;
788 
789 	if (r->rtm_scope == RT_SCOPE_HOST) {
790 		struct fib_nh *nh = fi->fib_nh;
791 
792 		/* Local address is added. */
793 		if (nhs != 1 || nh->nh_gw)
794 			goto err_inval;
795 		nh->nh_scope = RT_SCOPE_NOWHERE;
796 		nh->nh_dev = dev_get_by_index(fi->fib_nh->nh_oif);
797 		err = -ENODEV;
798 		if (nh->nh_dev == NULL)
799 			goto failure;
800 	} else {
801 		change_nexthops(fi) {
802 			if ((err = fib_check_nh(r, fi, nh)) != 0)
803 				goto failure;
804 		} endfor_nexthops(fi)
805 	}
806 
807 	if (fi->fib_prefsrc) {
808 		if (r->rtm_type != RTN_LOCAL || rta->rta_dst == NULL ||
809 		    memcmp(&fi->fib_prefsrc, rta->rta_dst, 4))
810 			if (inet_addr_type(fi->fib_prefsrc) != RTN_LOCAL)
811 				goto err_inval;
812 	}
813 
814 link_it:
815 	if ((ofi = fib_find_info(fi)) != NULL) {
816 		fi->fib_dead = 1;
817 		free_fib_info(fi);
818 		ofi->fib_treeref++;
819 		return ofi;
820 	}
821 
822 	fi->fib_treeref++;
823 	atomic_inc(&fi->fib_clntref);
824 	write_lock(&fib_info_lock);
825 	hlist_add_head(&fi->fib_hash,
826 		       &fib_info_hash[fib_info_hashfn(fi)]);
827 	if (fi->fib_prefsrc) {
828 		struct hlist_head *head;
829 
830 		head = &fib_info_laddrhash[fib_laddr_hashfn(fi->fib_prefsrc)];
831 		hlist_add_head(&fi->fib_lhash, head);
832 	}
833 	change_nexthops(fi) {
834 		struct hlist_head *head;
835 		unsigned int hash;
836 
837 		if (!nh->nh_dev)
838 			continue;
839 		hash = fib_devindex_hashfn(nh->nh_dev->ifindex);
840 		head = &fib_info_devhash[hash];
841 		hlist_add_head(&nh->nh_hash, head);
842 	} endfor_nexthops(fi)
843 	write_unlock(&fib_info_lock);
844 	return fi;
845 
846 err_inval:
847 	err = -EINVAL;
848 
849 failure:
850         *errp = err;
851         if (fi) {
852 		fi->fib_dead = 1;
853 		free_fib_info(fi);
854 	}
855 	return NULL;
856 }
857 
858 /* Note! fib_semantic_match intentionally uses  RCU list functions. */
859 int fib_semantic_match(struct list_head *head, const struct flowi *flp,
860 		       struct fib_result *res, __u32 zone, __u32 mask,
861 			int prefixlen)
862 {
863 	struct fib_alias *fa;
864 	int nh_sel = 0;
865 
866 	list_for_each_entry_rcu(fa, head, fa_list) {
867 		int err;
868 
869 		if (fa->fa_tos &&
870 		    fa->fa_tos != flp->fl4_tos)
871 			continue;
872 
873 		if (fa->fa_scope < flp->fl4_scope)
874 			continue;
875 
876 		fa->fa_state |= FA_S_ACCESSED;
877 
878 		err = fib_props[fa->fa_type].error;
879 		if (err == 0) {
880 			struct fib_info *fi = fa->fa_info;
881 
882 			if (fi->fib_flags & RTNH_F_DEAD)
883 				continue;
884 
885 			switch (fa->fa_type) {
886 			case RTN_UNICAST:
887 			case RTN_LOCAL:
888 			case RTN_BROADCAST:
889 			case RTN_ANYCAST:
890 			case RTN_MULTICAST:
891 				for_nexthops(fi) {
892 					if (nh->nh_flags&RTNH_F_DEAD)
893 						continue;
894 					if (!flp->oif || flp->oif == nh->nh_oif)
895 						break;
896 				}
897 #ifdef CONFIG_IP_ROUTE_MULTIPATH
898 				if (nhsel < fi->fib_nhs) {
899 					nh_sel = nhsel;
900 					goto out_fill_res;
901 				}
902 #else
903 				if (nhsel < 1) {
904 					goto out_fill_res;
905 				}
906 #endif
907 				endfor_nexthops(fi);
908 				continue;
909 
910 			default:
911 				printk(KERN_DEBUG "impossible 102\n");
912 				return -EINVAL;
913 			};
914 		}
915 		return err;
916 	}
917 	return 1;
918 
919 out_fill_res:
920 	res->prefixlen = prefixlen;
921 	res->nh_sel = nh_sel;
922 	res->type = fa->fa_type;
923 	res->scope = fa->fa_scope;
924 	res->fi = fa->fa_info;
925 #ifdef CONFIG_IP_ROUTE_MULTIPATH_CACHED
926 	res->netmask = mask;
927 	res->network = zone &
928 		(0xFFFFFFFF >> (32 - prefixlen));
929 #endif
930 	atomic_inc(&res->fi->fib_clntref);
931 	return 0;
932 }
933 
934 /* Find appropriate source address to this destination */
935 
936 u32 __fib_res_prefsrc(struct fib_result *res)
937 {
938 	return inet_select_addr(FIB_RES_DEV(*res), FIB_RES_GW(*res), res->scope);
939 }
940 
941 int
942 fib_dump_info(struct sk_buff *skb, u32 pid, u32 seq, int event,
943 	      u8 tb_id, u8 type, u8 scope, void *dst, int dst_len, u8 tos,
944 	      struct fib_info *fi, unsigned int flags)
945 {
946 	struct rtmsg *rtm;
947 	struct nlmsghdr  *nlh;
948 	unsigned char	 *b = skb->tail;
949 
950 	nlh = NLMSG_NEW(skb, pid, seq, event, sizeof(*rtm), flags);
951 	rtm = NLMSG_DATA(nlh);
952 	rtm->rtm_family = AF_INET;
953 	rtm->rtm_dst_len = dst_len;
954 	rtm->rtm_src_len = 0;
955 	rtm->rtm_tos = tos;
956 	rtm->rtm_table = tb_id;
957 	rtm->rtm_type = type;
958 	rtm->rtm_flags = fi->fib_flags;
959 	rtm->rtm_scope = scope;
960 	if (rtm->rtm_dst_len)
961 		RTA_PUT(skb, RTA_DST, 4, dst);
962 	rtm->rtm_protocol = fi->fib_protocol;
963 	if (fi->fib_priority)
964 		RTA_PUT(skb, RTA_PRIORITY, 4, &fi->fib_priority);
965 #ifdef CONFIG_NET_CLS_ROUTE
966 	if (fi->fib_nh[0].nh_tclassid)
967 		RTA_PUT(skb, RTA_FLOW, 4, &fi->fib_nh[0].nh_tclassid);
968 #endif
969 	if (rtnetlink_put_metrics(skb, fi->fib_metrics) < 0)
970 		goto rtattr_failure;
971 	if (fi->fib_prefsrc)
972 		RTA_PUT(skb, RTA_PREFSRC, 4, &fi->fib_prefsrc);
973 	if (fi->fib_nhs == 1) {
974 		if (fi->fib_nh->nh_gw)
975 			RTA_PUT(skb, RTA_GATEWAY, 4, &fi->fib_nh->nh_gw);
976 		if (fi->fib_nh->nh_oif)
977 			RTA_PUT(skb, RTA_OIF, sizeof(int), &fi->fib_nh->nh_oif);
978 	}
979 #ifdef CONFIG_IP_ROUTE_MULTIPATH
980 	if (fi->fib_nhs > 1) {
981 		struct rtnexthop *nhp;
982 		struct rtattr *mp_head;
983 		if (skb_tailroom(skb) <= RTA_SPACE(0))
984 			goto rtattr_failure;
985 		mp_head = (struct rtattr*)skb_put(skb, RTA_SPACE(0));
986 
987 		for_nexthops(fi) {
988 			if (skb_tailroom(skb) < RTA_ALIGN(RTA_ALIGN(sizeof(*nhp)) + 4))
989 				goto rtattr_failure;
990 			nhp = (struct rtnexthop*)skb_put(skb, RTA_ALIGN(sizeof(*nhp)));
991 			nhp->rtnh_flags = nh->nh_flags & 0xFF;
992 			nhp->rtnh_hops = nh->nh_weight-1;
993 			nhp->rtnh_ifindex = nh->nh_oif;
994 			if (nh->nh_gw)
995 				RTA_PUT(skb, RTA_GATEWAY, 4, &nh->nh_gw);
996 			nhp->rtnh_len = skb->tail - (unsigned char*)nhp;
997 		} endfor_nexthops(fi);
998 		mp_head->rta_type = RTA_MULTIPATH;
999 		mp_head->rta_len = skb->tail - (u8*)mp_head;
1000 	}
1001 #endif
1002 	nlh->nlmsg_len = skb->tail - b;
1003 	return skb->len;
1004 
1005 nlmsg_failure:
1006 rtattr_failure:
1007 	skb_trim(skb, b - skb->data);
1008 	return -1;
1009 }
1010 
1011 #ifndef CONFIG_IP_NOSIOCRT
1012 
1013 int
1014 fib_convert_rtentry(int cmd, struct nlmsghdr *nl, struct rtmsg *rtm,
1015 		    struct kern_rta *rta, struct rtentry *r)
1016 {
1017 	int    plen;
1018 	u32    *ptr;
1019 
1020 	memset(rtm, 0, sizeof(*rtm));
1021 	memset(rta, 0, sizeof(*rta));
1022 
1023 	if (r->rt_dst.sa_family != AF_INET)
1024 		return -EAFNOSUPPORT;
1025 
1026 	/* Check mask for validity:
1027 	   a) it must be contiguous.
1028 	   b) destination must have all host bits clear.
1029 	   c) if application forgot to set correct family (AF_INET),
1030 	      reject request unless it is absolutely clear i.e.
1031 	      both family and mask are zero.
1032 	 */
1033 	plen = 32;
1034 	ptr = &((struct sockaddr_in*)&r->rt_dst)->sin_addr.s_addr;
1035 	if (!(r->rt_flags&RTF_HOST)) {
1036 		u32 mask = ((struct sockaddr_in*)&r->rt_genmask)->sin_addr.s_addr;
1037 		if (r->rt_genmask.sa_family != AF_INET) {
1038 			if (mask || r->rt_genmask.sa_family)
1039 				return -EAFNOSUPPORT;
1040 		}
1041 		if (bad_mask(mask, *ptr))
1042 			return -EINVAL;
1043 		plen = inet_mask_len(mask);
1044 	}
1045 
1046 	nl->nlmsg_flags = NLM_F_REQUEST;
1047 	nl->nlmsg_pid = 0;
1048 	nl->nlmsg_seq = 0;
1049 	nl->nlmsg_len = NLMSG_LENGTH(sizeof(*rtm));
1050 	if (cmd == SIOCDELRT) {
1051 		nl->nlmsg_type = RTM_DELROUTE;
1052 		nl->nlmsg_flags = 0;
1053 	} else {
1054 		nl->nlmsg_type = RTM_NEWROUTE;
1055 		nl->nlmsg_flags = NLM_F_REQUEST|NLM_F_CREATE;
1056 		rtm->rtm_protocol = RTPROT_BOOT;
1057 	}
1058 
1059 	rtm->rtm_dst_len = plen;
1060 	rta->rta_dst = ptr;
1061 
1062 	if (r->rt_metric) {
1063 		*(u32*)&r->rt_pad3 = r->rt_metric - 1;
1064 		rta->rta_priority = (u32*)&r->rt_pad3;
1065 	}
1066 	if (r->rt_flags&RTF_REJECT) {
1067 		rtm->rtm_scope = RT_SCOPE_HOST;
1068 		rtm->rtm_type = RTN_UNREACHABLE;
1069 		return 0;
1070 	}
1071 	rtm->rtm_scope = RT_SCOPE_NOWHERE;
1072 	rtm->rtm_type = RTN_UNICAST;
1073 
1074 	if (r->rt_dev) {
1075 		char *colon;
1076 		struct net_device *dev;
1077 		char   devname[IFNAMSIZ];
1078 
1079 		if (copy_from_user(devname, r->rt_dev, IFNAMSIZ-1))
1080 			return -EFAULT;
1081 		devname[IFNAMSIZ-1] = 0;
1082 		colon = strchr(devname, ':');
1083 		if (colon)
1084 			*colon = 0;
1085 		dev = __dev_get_by_name(devname);
1086 		if (!dev)
1087 			return -ENODEV;
1088 		rta->rta_oif = &dev->ifindex;
1089 		if (colon) {
1090 			struct in_ifaddr *ifa;
1091 			struct in_device *in_dev = __in_dev_get_rtnl(dev);
1092 			if (!in_dev)
1093 				return -ENODEV;
1094 			*colon = ':';
1095 			for (ifa = in_dev->ifa_list; ifa; ifa = ifa->ifa_next)
1096 				if (strcmp(ifa->ifa_label, devname) == 0)
1097 					break;
1098 			if (ifa == NULL)
1099 				return -ENODEV;
1100 			rta->rta_prefsrc = &ifa->ifa_local;
1101 		}
1102 	}
1103 
1104 	ptr = &((struct sockaddr_in*)&r->rt_gateway)->sin_addr.s_addr;
1105 	if (r->rt_gateway.sa_family == AF_INET && *ptr) {
1106 		rta->rta_gw = ptr;
1107 		if (r->rt_flags&RTF_GATEWAY && inet_addr_type(*ptr) == RTN_UNICAST)
1108 			rtm->rtm_scope = RT_SCOPE_UNIVERSE;
1109 	}
1110 
1111 	if (cmd == SIOCDELRT)
1112 		return 0;
1113 
1114 	if (r->rt_flags&RTF_GATEWAY && rta->rta_gw == NULL)
1115 		return -EINVAL;
1116 
1117 	if (rtm->rtm_scope == RT_SCOPE_NOWHERE)
1118 		rtm->rtm_scope = RT_SCOPE_LINK;
1119 
1120 	if (r->rt_flags&(RTF_MTU|RTF_WINDOW|RTF_IRTT)) {
1121 		struct rtattr *rec;
1122 		struct rtattr *mx = kmalloc(RTA_LENGTH(3*RTA_LENGTH(4)), GFP_KERNEL);
1123 		if (mx == NULL)
1124 			return -ENOMEM;
1125 		rta->rta_mx = mx;
1126 		mx->rta_type = RTA_METRICS;
1127 		mx->rta_len  = RTA_LENGTH(0);
1128 		if (r->rt_flags&RTF_MTU) {
1129 			rec = (void*)((char*)mx + RTA_ALIGN(mx->rta_len));
1130 			rec->rta_type = RTAX_ADVMSS;
1131 			rec->rta_len = RTA_LENGTH(4);
1132 			mx->rta_len += RTA_LENGTH(4);
1133 			*(u32*)RTA_DATA(rec) = r->rt_mtu - 40;
1134 		}
1135 		if (r->rt_flags&RTF_WINDOW) {
1136 			rec = (void*)((char*)mx + RTA_ALIGN(mx->rta_len));
1137 			rec->rta_type = RTAX_WINDOW;
1138 			rec->rta_len = RTA_LENGTH(4);
1139 			mx->rta_len += RTA_LENGTH(4);
1140 			*(u32*)RTA_DATA(rec) = r->rt_window;
1141 		}
1142 		if (r->rt_flags&RTF_IRTT) {
1143 			rec = (void*)((char*)mx + RTA_ALIGN(mx->rta_len));
1144 			rec->rta_type = RTAX_RTT;
1145 			rec->rta_len = RTA_LENGTH(4);
1146 			mx->rta_len += RTA_LENGTH(4);
1147 			*(u32*)RTA_DATA(rec) = r->rt_irtt<<3;
1148 		}
1149 	}
1150 	return 0;
1151 }
1152 
1153 #endif
1154 
1155 /*
1156    Update FIB if:
1157    - local address disappeared -> we must delete all the entries
1158      referring to it.
1159    - device went down -> we must shutdown all nexthops going via it.
1160  */
1161 
1162 int fib_sync_down(u32 local, struct net_device *dev, int force)
1163 {
1164 	int ret = 0;
1165 	int scope = RT_SCOPE_NOWHERE;
1166 
1167 	if (force)
1168 		scope = -1;
1169 
1170 	if (local && fib_info_laddrhash) {
1171 		unsigned int hash = fib_laddr_hashfn(local);
1172 		struct hlist_head *head = &fib_info_laddrhash[hash];
1173 		struct hlist_node *node;
1174 		struct fib_info *fi;
1175 
1176 		hlist_for_each_entry(fi, node, head, fib_lhash) {
1177 			if (fi->fib_prefsrc == local) {
1178 				fi->fib_flags |= RTNH_F_DEAD;
1179 				ret++;
1180 			}
1181 		}
1182 	}
1183 
1184 	if (dev) {
1185 		struct fib_info *prev_fi = NULL;
1186 		unsigned int hash = fib_devindex_hashfn(dev->ifindex);
1187 		struct hlist_head *head = &fib_info_devhash[hash];
1188 		struct hlist_node *node;
1189 		struct fib_nh *nh;
1190 
1191 		hlist_for_each_entry(nh, node, head, nh_hash) {
1192 			struct fib_info *fi = nh->nh_parent;
1193 			int dead;
1194 
1195 			BUG_ON(!fi->fib_nhs);
1196 			if (nh->nh_dev != dev || fi == prev_fi)
1197 				continue;
1198 			prev_fi = fi;
1199 			dead = 0;
1200 			change_nexthops(fi) {
1201 				if (nh->nh_flags&RTNH_F_DEAD)
1202 					dead++;
1203 				else if (nh->nh_dev == dev &&
1204 					 nh->nh_scope != scope) {
1205 					nh->nh_flags |= RTNH_F_DEAD;
1206 #ifdef CONFIG_IP_ROUTE_MULTIPATH
1207 					spin_lock_bh(&fib_multipath_lock);
1208 					fi->fib_power -= nh->nh_power;
1209 					nh->nh_power = 0;
1210 					spin_unlock_bh(&fib_multipath_lock);
1211 #endif
1212 					dead++;
1213 				}
1214 #ifdef CONFIG_IP_ROUTE_MULTIPATH
1215 				if (force > 1 && nh->nh_dev == dev) {
1216 					dead = fi->fib_nhs;
1217 					break;
1218 				}
1219 #endif
1220 			} endfor_nexthops(fi)
1221 			if (dead == fi->fib_nhs) {
1222 				fi->fib_flags |= RTNH_F_DEAD;
1223 				ret++;
1224 			}
1225 		}
1226 	}
1227 
1228 	return ret;
1229 }
1230 
1231 #ifdef CONFIG_IP_ROUTE_MULTIPATH
1232 
1233 /*
1234    Dead device goes up. We wake up dead nexthops.
1235    It takes sense only on multipath routes.
1236  */
1237 
1238 int fib_sync_up(struct net_device *dev)
1239 {
1240 	struct fib_info *prev_fi;
1241 	unsigned int hash;
1242 	struct hlist_head *head;
1243 	struct hlist_node *node;
1244 	struct fib_nh *nh;
1245 	int ret;
1246 
1247 	if (!(dev->flags&IFF_UP))
1248 		return 0;
1249 
1250 	prev_fi = NULL;
1251 	hash = fib_devindex_hashfn(dev->ifindex);
1252 	head = &fib_info_devhash[hash];
1253 	ret = 0;
1254 
1255 	hlist_for_each_entry(nh, node, head, nh_hash) {
1256 		struct fib_info *fi = nh->nh_parent;
1257 		int alive;
1258 
1259 		BUG_ON(!fi->fib_nhs);
1260 		if (nh->nh_dev != dev || fi == prev_fi)
1261 			continue;
1262 
1263 		prev_fi = fi;
1264 		alive = 0;
1265 		change_nexthops(fi) {
1266 			if (!(nh->nh_flags&RTNH_F_DEAD)) {
1267 				alive++;
1268 				continue;
1269 			}
1270 			if (nh->nh_dev == NULL || !(nh->nh_dev->flags&IFF_UP))
1271 				continue;
1272 			if (nh->nh_dev != dev || !__in_dev_get_rtnl(dev))
1273 				continue;
1274 			alive++;
1275 			spin_lock_bh(&fib_multipath_lock);
1276 			nh->nh_power = 0;
1277 			nh->nh_flags &= ~RTNH_F_DEAD;
1278 			spin_unlock_bh(&fib_multipath_lock);
1279 		} endfor_nexthops(fi)
1280 
1281 		if (alive > 0) {
1282 			fi->fib_flags &= ~RTNH_F_DEAD;
1283 			ret++;
1284 		}
1285 	}
1286 
1287 	return ret;
1288 }
1289 
1290 /*
1291    The algorithm is suboptimal, but it provides really
1292    fair weighted route distribution.
1293  */
1294 
1295 void fib_select_multipath(const struct flowi *flp, struct fib_result *res)
1296 {
1297 	struct fib_info *fi = res->fi;
1298 	int w;
1299 
1300 	spin_lock_bh(&fib_multipath_lock);
1301 	if (fi->fib_power <= 0) {
1302 		int power = 0;
1303 		change_nexthops(fi) {
1304 			if (!(nh->nh_flags&RTNH_F_DEAD)) {
1305 				power += nh->nh_weight;
1306 				nh->nh_power = nh->nh_weight;
1307 			}
1308 		} endfor_nexthops(fi);
1309 		fi->fib_power = power;
1310 		if (power <= 0) {
1311 			spin_unlock_bh(&fib_multipath_lock);
1312 			/* Race condition: route has just become dead. */
1313 			res->nh_sel = 0;
1314 			return;
1315 		}
1316 	}
1317 
1318 
1319 	/* w should be random number [0..fi->fib_power-1],
1320 	   it is pretty bad approximation.
1321 	 */
1322 
1323 	w = jiffies % fi->fib_power;
1324 
1325 	change_nexthops(fi) {
1326 		if (!(nh->nh_flags&RTNH_F_DEAD) && nh->nh_power) {
1327 			if ((w -= nh->nh_power) <= 0) {
1328 				nh->nh_power--;
1329 				fi->fib_power--;
1330 				res->nh_sel = nhsel;
1331 				spin_unlock_bh(&fib_multipath_lock);
1332 				return;
1333 			}
1334 		}
1335 	} endfor_nexthops(fi);
1336 
1337 	/* Race condition: route has just become dead. */
1338 	res->nh_sel = 0;
1339 	spin_unlock_bh(&fib_multipath_lock);
1340 }
1341 #endif
1342