xref: /linux/net/ipv4/fib_frontend.c (revision eb2bce7f5e7ac1ca6da434461217fadf3c688d2c)
1 /*
2  * INET		An implementation of the TCP/IP protocol suite for the LINUX
3  *		operating system.  INET is implemented using the  BSD Socket
4  *		interface as the means of communication with the user level.
5  *
6  *		IPv4 Forwarding Information Base: FIB frontend.
7  *
8  * Version:	$Id: fib_frontend.c,v 1.26 2001/10/31 21:55:54 davem Exp $
9  *
10  * Authors:	Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
11  *
12  *		This program is free software; you can redistribute it and/or
13  *		modify it under the terms of the GNU General Public License
14  *		as published by the Free Software Foundation; either version
15  *		2 of the License, or (at your option) any later version.
16  */
17 
18 #include <linux/module.h>
19 #include <asm/uaccess.h>
20 #include <asm/system.h>
21 #include <linux/bitops.h>
22 #include <linux/capability.h>
23 #include <linux/types.h>
24 #include <linux/kernel.h>
25 #include <linux/mm.h>
26 #include <linux/string.h>
27 #include <linux/socket.h>
28 #include <linux/sockios.h>
29 #include <linux/errno.h>
30 #include <linux/in.h>
31 #include <linux/inet.h>
32 #include <linux/inetdevice.h>
33 #include <linux/netdevice.h>
34 #include <linux/if_addr.h>
35 #include <linux/if_arp.h>
36 #include <linux/skbuff.h>
37 #include <linux/init.h>
38 #include <linux/list.h>
39 
40 #include <net/ip.h>
41 #include <net/protocol.h>
42 #include <net/route.h>
43 #include <net/tcp.h>
44 #include <net/sock.h>
45 #include <net/icmp.h>
46 #include <net/arp.h>
47 #include <net/ip_fib.h>
48 #include <net/rtnetlink.h>
49 
50 #define FFprint(a...) printk(KERN_DEBUG a)
51 
52 #ifndef CONFIG_IP_MULTIPLE_TABLES
53 
54 struct fib_table *ip_fib_local_table;
55 struct fib_table *ip_fib_main_table;
56 
57 #define FIB_TABLE_HASHSZ 1
58 static struct hlist_head fib_table_hash[FIB_TABLE_HASHSZ];
59 
60 #else
61 
62 #define FIB_TABLE_HASHSZ 256
63 static struct hlist_head fib_table_hash[FIB_TABLE_HASHSZ];
64 
65 struct fib_table *fib_new_table(u32 id)
66 {
67 	struct fib_table *tb;
68 	unsigned int h;
69 
70 	if (id == 0)
71 		id = RT_TABLE_MAIN;
72 	tb = fib_get_table(id);
73 	if (tb)
74 		return tb;
75 	tb = fib_hash_init(id);
76 	if (!tb)
77 		return NULL;
78 	h = id & (FIB_TABLE_HASHSZ - 1);
79 	hlist_add_head_rcu(&tb->tb_hlist, &fib_table_hash[h]);
80 	return tb;
81 }
82 
83 struct fib_table *fib_get_table(u32 id)
84 {
85 	struct fib_table *tb;
86 	struct hlist_node *node;
87 	unsigned int h;
88 
89 	if (id == 0)
90 		id = RT_TABLE_MAIN;
91 	h = id & (FIB_TABLE_HASHSZ - 1);
92 	rcu_read_lock();
93 	hlist_for_each_entry_rcu(tb, node, &fib_table_hash[h], tb_hlist) {
94 		if (tb->tb_id == id) {
95 			rcu_read_unlock();
96 			return tb;
97 		}
98 	}
99 	rcu_read_unlock();
100 	return NULL;
101 }
102 #endif /* CONFIG_IP_MULTIPLE_TABLES */
103 
104 static void fib_flush(void)
105 {
106 	int flushed = 0;
107 	struct fib_table *tb;
108 	struct hlist_node *node;
109 	unsigned int h;
110 
111 	for (h = 0; h < FIB_TABLE_HASHSZ; h++) {
112 		hlist_for_each_entry(tb, node, &fib_table_hash[h], tb_hlist)
113 			flushed += tb->tb_flush(tb);
114 	}
115 
116 	if (flushed)
117 		rt_cache_flush(-1);
118 }
119 
120 /*
121  *	Find the first device with a given source address.
122  */
123 
124 struct net_device * ip_dev_find(__be32 addr)
125 {
126 	struct flowi fl = { .nl_u = { .ip4_u = { .daddr = addr } } };
127 	struct fib_result res;
128 	struct net_device *dev = NULL;
129 
130 #ifdef CONFIG_IP_MULTIPLE_TABLES
131 	res.r = NULL;
132 #endif
133 
134 	if (!ip_fib_local_table ||
135 	    ip_fib_local_table->tb_lookup(ip_fib_local_table, &fl, &res))
136 		return NULL;
137 	if (res.type != RTN_LOCAL)
138 		goto out;
139 	dev = FIB_RES_DEV(res);
140 
141 	if (dev)
142 		dev_hold(dev);
143 out:
144 	fib_res_put(&res);
145 	return dev;
146 }
147 
148 unsigned inet_addr_type(__be32 addr)
149 {
150 	struct flowi		fl = { .nl_u = { .ip4_u = { .daddr = addr } } };
151 	struct fib_result	res;
152 	unsigned ret = RTN_BROADCAST;
153 
154 	if (ZERONET(addr) || BADCLASS(addr))
155 		return RTN_BROADCAST;
156 	if (MULTICAST(addr))
157 		return RTN_MULTICAST;
158 
159 #ifdef CONFIG_IP_MULTIPLE_TABLES
160 	res.r = NULL;
161 #endif
162 
163 	if (ip_fib_local_table) {
164 		ret = RTN_UNICAST;
165 		if (!ip_fib_local_table->tb_lookup(ip_fib_local_table,
166 						   &fl, &res)) {
167 			ret = res.type;
168 			fib_res_put(&res);
169 		}
170 	}
171 	return ret;
172 }
173 
174 /* Given (packet source, input interface) and optional (dst, oif, tos):
175    - (main) check, that source is valid i.e. not broadcast or our local
176      address.
177    - figure out what "logical" interface this packet arrived
178      and calculate "specific destination" address.
179    - check, that packet arrived from expected physical interface.
180  */
181 
182 int fib_validate_source(__be32 src, __be32 dst, u8 tos, int oif,
183 			struct net_device *dev, __be32 *spec_dst, u32 *itag)
184 {
185 	struct in_device *in_dev;
186 	struct flowi fl = { .nl_u = { .ip4_u =
187 				      { .daddr = src,
188 					.saddr = dst,
189 					.tos = tos } },
190 			    .iif = oif };
191 	struct fib_result res;
192 	int no_addr, rpf;
193 	int ret;
194 
195 	no_addr = rpf = 0;
196 	rcu_read_lock();
197 	in_dev = __in_dev_get_rcu(dev);
198 	if (in_dev) {
199 		no_addr = in_dev->ifa_list == NULL;
200 		rpf = IN_DEV_RPFILTER(in_dev);
201 	}
202 	rcu_read_unlock();
203 
204 	if (in_dev == NULL)
205 		goto e_inval;
206 
207 	if (fib_lookup(&fl, &res))
208 		goto last_resort;
209 	if (res.type != RTN_UNICAST)
210 		goto e_inval_res;
211 	*spec_dst = FIB_RES_PREFSRC(res);
212 	fib_combine_itag(itag, &res);
213 #ifdef CONFIG_IP_ROUTE_MULTIPATH
214 	if (FIB_RES_DEV(res) == dev || res.fi->fib_nhs > 1)
215 #else
216 	if (FIB_RES_DEV(res) == dev)
217 #endif
218 	{
219 		ret = FIB_RES_NH(res).nh_scope >= RT_SCOPE_HOST;
220 		fib_res_put(&res);
221 		return ret;
222 	}
223 	fib_res_put(&res);
224 	if (no_addr)
225 		goto last_resort;
226 	if (rpf)
227 		goto e_inval;
228 	fl.oif = dev->ifindex;
229 
230 	ret = 0;
231 	if (fib_lookup(&fl, &res) == 0) {
232 		if (res.type == RTN_UNICAST) {
233 			*spec_dst = FIB_RES_PREFSRC(res);
234 			ret = FIB_RES_NH(res).nh_scope >= RT_SCOPE_HOST;
235 		}
236 		fib_res_put(&res);
237 	}
238 	return ret;
239 
240 last_resort:
241 	if (rpf)
242 		goto e_inval;
243 	*spec_dst = inet_select_addr(dev, 0, RT_SCOPE_UNIVERSE);
244 	*itag = 0;
245 	return 0;
246 
247 e_inval_res:
248 	fib_res_put(&res);
249 e_inval:
250 	return -EINVAL;
251 }
252 
253 #ifndef CONFIG_IP_NOSIOCRT
254 
255 static inline __be32 sk_extract_addr(struct sockaddr *addr)
256 {
257 	return ((struct sockaddr_in *) addr)->sin_addr.s_addr;
258 }
259 
260 static int put_rtax(struct nlattr *mx, int len, int type, u32 value)
261 {
262 	struct nlattr *nla;
263 
264 	nla = (struct nlattr *) ((char *) mx + len);
265 	nla->nla_type = type;
266 	nla->nla_len = nla_attr_size(4);
267 	*(u32 *) nla_data(nla) = value;
268 
269 	return len + nla_total_size(4);
270 }
271 
272 static int rtentry_to_fib_config(int cmd, struct rtentry *rt,
273 				 struct fib_config *cfg)
274 {
275 	__be32 addr;
276 	int plen;
277 
278 	memset(cfg, 0, sizeof(*cfg));
279 
280 	if (rt->rt_dst.sa_family != AF_INET)
281 		return -EAFNOSUPPORT;
282 
283 	/*
284 	 * Check mask for validity:
285 	 * a) it must be contiguous.
286 	 * b) destination must have all host bits clear.
287 	 * c) if application forgot to set correct family (AF_INET),
288 	 *    reject request unless it is absolutely clear i.e.
289 	 *    both family and mask are zero.
290 	 */
291 	plen = 32;
292 	addr = sk_extract_addr(&rt->rt_dst);
293 	if (!(rt->rt_flags & RTF_HOST)) {
294 		__be32 mask = sk_extract_addr(&rt->rt_genmask);
295 
296 		if (rt->rt_genmask.sa_family != AF_INET) {
297 			if (mask || rt->rt_genmask.sa_family)
298 				return -EAFNOSUPPORT;
299 		}
300 
301 		if (bad_mask(mask, addr))
302 			return -EINVAL;
303 
304 		plen = inet_mask_len(mask);
305 	}
306 
307 	cfg->fc_dst_len = plen;
308 	cfg->fc_dst = addr;
309 
310 	if (cmd != SIOCDELRT) {
311 		cfg->fc_nlflags = NLM_F_CREATE;
312 		cfg->fc_protocol = RTPROT_BOOT;
313 	}
314 
315 	if (rt->rt_metric)
316 		cfg->fc_priority = rt->rt_metric - 1;
317 
318 	if (rt->rt_flags & RTF_REJECT) {
319 		cfg->fc_scope = RT_SCOPE_HOST;
320 		cfg->fc_type = RTN_UNREACHABLE;
321 		return 0;
322 	}
323 
324 	cfg->fc_scope = RT_SCOPE_NOWHERE;
325 	cfg->fc_type = RTN_UNICAST;
326 
327 	if (rt->rt_dev) {
328 		char *colon;
329 		struct net_device *dev;
330 		char devname[IFNAMSIZ];
331 
332 		if (copy_from_user(devname, rt->rt_dev, IFNAMSIZ-1))
333 			return -EFAULT;
334 
335 		devname[IFNAMSIZ-1] = 0;
336 		colon = strchr(devname, ':');
337 		if (colon)
338 			*colon = 0;
339 		dev = __dev_get_by_name(devname);
340 		if (!dev)
341 			return -ENODEV;
342 		cfg->fc_oif = dev->ifindex;
343 		if (colon) {
344 			struct in_ifaddr *ifa;
345 			struct in_device *in_dev = __in_dev_get_rtnl(dev);
346 			if (!in_dev)
347 				return -ENODEV;
348 			*colon = ':';
349 			for (ifa = in_dev->ifa_list; ifa; ifa = ifa->ifa_next)
350 				if (strcmp(ifa->ifa_label, devname) == 0)
351 					break;
352 			if (ifa == NULL)
353 				return -ENODEV;
354 			cfg->fc_prefsrc = ifa->ifa_local;
355 		}
356 	}
357 
358 	addr = sk_extract_addr(&rt->rt_gateway);
359 	if (rt->rt_gateway.sa_family == AF_INET && addr) {
360 		cfg->fc_gw = addr;
361 		if (rt->rt_flags & RTF_GATEWAY &&
362 		    inet_addr_type(addr) == RTN_UNICAST)
363 			cfg->fc_scope = RT_SCOPE_UNIVERSE;
364 	}
365 
366 	if (cmd == SIOCDELRT)
367 		return 0;
368 
369 	if (rt->rt_flags & RTF_GATEWAY && !cfg->fc_gw)
370 		return -EINVAL;
371 
372 	if (cfg->fc_scope == RT_SCOPE_NOWHERE)
373 		cfg->fc_scope = RT_SCOPE_LINK;
374 
375 	if (rt->rt_flags & (RTF_MTU | RTF_WINDOW | RTF_IRTT)) {
376 		struct nlattr *mx;
377 		int len = 0;
378 
379 		mx = kzalloc(3 * nla_total_size(4), GFP_KERNEL);
380 		if (mx == NULL)
381 			return -ENOMEM;
382 
383 		if (rt->rt_flags & RTF_MTU)
384 			len = put_rtax(mx, len, RTAX_ADVMSS, rt->rt_mtu - 40);
385 
386 		if (rt->rt_flags & RTF_WINDOW)
387 			len = put_rtax(mx, len, RTAX_WINDOW, rt->rt_window);
388 
389 		if (rt->rt_flags & RTF_IRTT)
390 			len = put_rtax(mx, len, RTAX_RTT, rt->rt_irtt << 3);
391 
392 		cfg->fc_mx = mx;
393 		cfg->fc_mx_len = len;
394 	}
395 
396 	return 0;
397 }
398 
399 /*
400  *	Handle IP routing ioctl calls. These are used to manipulate the routing tables
401  */
402 
403 int ip_rt_ioctl(unsigned int cmd, void __user *arg)
404 {
405 	struct fib_config cfg;
406 	struct rtentry rt;
407 	int err;
408 
409 	switch (cmd) {
410 	case SIOCADDRT:		/* Add a route */
411 	case SIOCDELRT:		/* Delete a route */
412 		if (!capable(CAP_NET_ADMIN))
413 			return -EPERM;
414 
415 		if (copy_from_user(&rt, arg, sizeof(rt)))
416 			return -EFAULT;
417 
418 		rtnl_lock();
419 		err = rtentry_to_fib_config(cmd, &rt, &cfg);
420 		if (err == 0) {
421 			struct fib_table *tb;
422 
423 			if (cmd == SIOCDELRT) {
424 				tb = fib_get_table(cfg.fc_table);
425 				if (tb)
426 					err = tb->tb_delete(tb, &cfg);
427 				else
428 					err = -ESRCH;
429 			} else {
430 				tb = fib_new_table(cfg.fc_table);
431 				if (tb)
432 					err = tb->tb_insert(tb, &cfg);
433 				else
434 					err = -ENOBUFS;
435 			}
436 
437 			/* allocated by rtentry_to_fib_config() */
438 			kfree(cfg.fc_mx);
439 		}
440 		rtnl_unlock();
441 		return err;
442 	}
443 	return -EINVAL;
444 }
445 
446 #else
447 
448 int ip_rt_ioctl(unsigned int cmd, void *arg)
449 {
450 	return -EINVAL;
451 }
452 
453 #endif
454 
455 struct nla_policy rtm_ipv4_policy[RTA_MAX+1] __read_mostly = {
456 	[RTA_DST]		= { .type = NLA_U32 },
457 	[RTA_SRC]		= { .type = NLA_U32 },
458 	[RTA_IIF]		= { .type = NLA_U32 },
459 	[RTA_OIF]		= { .type = NLA_U32 },
460 	[RTA_GATEWAY]		= { .type = NLA_U32 },
461 	[RTA_PRIORITY]		= { .type = NLA_U32 },
462 	[RTA_PREFSRC]		= { .type = NLA_U32 },
463 	[RTA_METRICS]		= { .type = NLA_NESTED },
464 	[RTA_MULTIPATH]		= { .len = sizeof(struct rtnexthop) },
465 	[RTA_PROTOINFO]		= { .type = NLA_U32 },
466 	[RTA_FLOW]		= { .type = NLA_U32 },
467 	[RTA_MP_ALGO]		= { .type = NLA_U32 },
468 };
469 
470 static int rtm_to_fib_config(struct sk_buff *skb, struct nlmsghdr *nlh,
471 			     struct fib_config *cfg)
472 {
473 	struct nlattr *attr;
474 	int err, remaining;
475 	struct rtmsg *rtm;
476 
477 	err = nlmsg_validate(nlh, sizeof(*rtm), RTA_MAX, rtm_ipv4_policy);
478 	if (err < 0)
479 		goto errout;
480 
481 	memset(cfg, 0, sizeof(*cfg));
482 
483 	rtm = nlmsg_data(nlh);
484 	cfg->fc_dst_len = rtm->rtm_dst_len;
485 	cfg->fc_tos = rtm->rtm_tos;
486 	cfg->fc_table = rtm->rtm_table;
487 	cfg->fc_protocol = rtm->rtm_protocol;
488 	cfg->fc_scope = rtm->rtm_scope;
489 	cfg->fc_type = rtm->rtm_type;
490 	cfg->fc_flags = rtm->rtm_flags;
491 	cfg->fc_nlflags = nlh->nlmsg_flags;
492 
493 	cfg->fc_nlinfo.pid = NETLINK_CB(skb).pid;
494 	cfg->fc_nlinfo.nlh = nlh;
495 
496 	if (cfg->fc_type > RTN_MAX) {
497 		err = -EINVAL;
498 		goto errout;
499 	}
500 
501 	nlmsg_for_each_attr(attr, nlh, sizeof(struct rtmsg), remaining) {
502 		switch (attr->nla_type) {
503 		case RTA_DST:
504 			cfg->fc_dst = nla_get_be32(attr);
505 			break;
506 		case RTA_OIF:
507 			cfg->fc_oif = nla_get_u32(attr);
508 			break;
509 		case RTA_GATEWAY:
510 			cfg->fc_gw = nla_get_be32(attr);
511 			break;
512 		case RTA_PRIORITY:
513 			cfg->fc_priority = nla_get_u32(attr);
514 			break;
515 		case RTA_PREFSRC:
516 			cfg->fc_prefsrc = nla_get_be32(attr);
517 			break;
518 		case RTA_METRICS:
519 			cfg->fc_mx = nla_data(attr);
520 			cfg->fc_mx_len = nla_len(attr);
521 			break;
522 		case RTA_MULTIPATH:
523 			cfg->fc_mp = nla_data(attr);
524 			cfg->fc_mp_len = nla_len(attr);
525 			break;
526 		case RTA_FLOW:
527 			cfg->fc_flow = nla_get_u32(attr);
528 			break;
529 		case RTA_MP_ALGO:
530 			cfg->fc_mp_alg = nla_get_u32(attr);
531 			break;
532 		case RTA_TABLE:
533 			cfg->fc_table = nla_get_u32(attr);
534 			break;
535 		}
536 	}
537 
538 	return 0;
539 errout:
540 	return err;
541 }
542 
543 static int inet_rtm_delroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
544 {
545 	struct fib_config cfg;
546 	struct fib_table *tb;
547 	int err;
548 
549 	err = rtm_to_fib_config(skb, nlh, &cfg);
550 	if (err < 0)
551 		goto errout;
552 
553 	tb = fib_get_table(cfg.fc_table);
554 	if (tb == NULL) {
555 		err = -ESRCH;
556 		goto errout;
557 	}
558 
559 	err = tb->tb_delete(tb, &cfg);
560 errout:
561 	return err;
562 }
563 
564 static int inet_rtm_newroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
565 {
566 	struct fib_config cfg;
567 	struct fib_table *tb;
568 	int err;
569 
570 	err = rtm_to_fib_config(skb, nlh, &cfg);
571 	if (err < 0)
572 		goto errout;
573 
574 	tb = fib_new_table(cfg.fc_table);
575 	if (tb == NULL) {
576 		err = -ENOBUFS;
577 		goto errout;
578 	}
579 
580 	err = tb->tb_insert(tb, &cfg);
581 errout:
582 	return err;
583 }
584 
585 static int inet_dump_fib(struct sk_buff *skb, struct netlink_callback *cb)
586 {
587 	unsigned int h, s_h;
588 	unsigned int e = 0, s_e;
589 	struct fib_table *tb;
590 	struct hlist_node *node;
591 	int dumped = 0;
592 
593 	if (nlmsg_len(cb->nlh) >= sizeof(struct rtmsg) &&
594 	    ((struct rtmsg *) nlmsg_data(cb->nlh))->rtm_flags & RTM_F_CLONED)
595 		return ip_rt_dump(skb, cb);
596 
597 	s_h = cb->args[0];
598 	s_e = cb->args[1];
599 
600 	for (h = s_h; h < FIB_TABLE_HASHSZ; h++, s_e = 0) {
601 		e = 0;
602 		hlist_for_each_entry(tb, node, &fib_table_hash[h], tb_hlist) {
603 			if (e < s_e)
604 				goto next;
605 			if (dumped)
606 				memset(&cb->args[2], 0, sizeof(cb->args) -
607 						 2 * sizeof(cb->args[0]));
608 			if (tb->tb_dump(tb, skb, cb) < 0)
609 				goto out;
610 			dumped = 1;
611 next:
612 			e++;
613 		}
614 	}
615 out:
616 	cb->args[1] = e;
617 	cb->args[0] = h;
618 
619 	return skb->len;
620 }
621 
622 /* Prepare and feed intra-kernel routing request.
623    Really, it should be netlink message, but :-( netlink
624    can be not configured, so that we feed it directly
625    to fib engine. It is legal, because all events occur
626    only when netlink is already locked.
627  */
628 
629 static void fib_magic(int cmd, int type, __be32 dst, int dst_len, struct in_ifaddr *ifa)
630 {
631 	struct fib_table *tb;
632 	struct fib_config cfg = {
633 		.fc_protocol = RTPROT_KERNEL,
634 		.fc_type = type,
635 		.fc_dst = dst,
636 		.fc_dst_len = dst_len,
637 		.fc_prefsrc = ifa->ifa_local,
638 		.fc_oif = ifa->ifa_dev->dev->ifindex,
639 		.fc_nlflags = NLM_F_CREATE | NLM_F_APPEND,
640 	};
641 
642 	if (type == RTN_UNICAST)
643 		tb = fib_new_table(RT_TABLE_MAIN);
644 	else
645 		tb = fib_new_table(RT_TABLE_LOCAL);
646 
647 	if (tb == NULL)
648 		return;
649 
650 	cfg.fc_table = tb->tb_id;
651 
652 	if (type != RTN_LOCAL)
653 		cfg.fc_scope = RT_SCOPE_LINK;
654 	else
655 		cfg.fc_scope = RT_SCOPE_HOST;
656 
657 	if (cmd == RTM_NEWROUTE)
658 		tb->tb_insert(tb, &cfg);
659 	else
660 		tb->tb_delete(tb, &cfg);
661 }
662 
663 void fib_add_ifaddr(struct in_ifaddr *ifa)
664 {
665 	struct in_device *in_dev = ifa->ifa_dev;
666 	struct net_device *dev = in_dev->dev;
667 	struct in_ifaddr *prim = ifa;
668 	__be32 mask = ifa->ifa_mask;
669 	__be32 addr = ifa->ifa_local;
670 	__be32 prefix = ifa->ifa_address&mask;
671 
672 	if (ifa->ifa_flags&IFA_F_SECONDARY) {
673 		prim = inet_ifa_byprefix(in_dev, prefix, mask);
674 		if (prim == NULL) {
675 			printk(KERN_DEBUG "fib_add_ifaddr: bug: prim == NULL\n");
676 			return;
677 		}
678 	}
679 
680 	fib_magic(RTM_NEWROUTE, RTN_LOCAL, addr, 32, prim);
681 
682 	if (!(dev->flags&IFF_UP))
683 		return;
684 
685 	/* Add broadcast address, if it is explicitly assigned. */
686 	if (ifa->ifa_broadcast && ifa->ifa_broadcast != htonl(0xFFFFFFFF))
687 		fib_magic(RTM_NEWROUTE, RTN_BROADCAST, ifa->ifa_broadcast, 32, prim);
688 
689 	if (!ZERONET(prefix) && !(ifa->ifa_flags&IFA_F_SECONDARY) &&
690 	    (prefix != addr || ifa->ifa_prefixlen < 32)) {
691 		fib_magic(RTM_NEWROUTE, dev->flags&IFF_LOOPBACK ? RTN_LOCAL :
692 			  RTN_UNICAST, prefix, ifa->ifa_prefixlen, prim);
693 
694 		/* Add network specific broadcasts, when it takes a sense */
695 		if (ifa->ifa_prefixlen < 31) {
696 			fib_magic(RTM_NEWROUTE, RTN_BROADCAST, prefix, 32, prim);
697 			fib_magic(RTM_NEWROUTE, RTN_BROADCAST, prefix|~mask, 32, prim);
698 		}
699 	}
700 }
701 
702 static void fib_del_ifaddr(struct in_ifaddr *ifa)
703 {
704 	struct in_device *in_dev = ifa->ifa_dev;
705 	struct net_device *dev = in_dev->dev;
706 	struct in_ifaddr *ifa1;
707 	struct in_ifaddr *prim = ifa;
708 	__be32 brd = ifa->ifa_address|~ifa->ifa_mask;
709 	__be32 any = ifa->ifa_address&ifa->ifa_mask;
710 #define LOCAL_OK	1
711 #define BRD_OK		2
712 #define BRD0_OK		4
713 #define BRD1_OK		8
714 	unsigned ok = 0;
715 
716 	if (!(ifa->ifa_flags&IFA_F_SECONDARY))
717 		fib_magic(RTM_DELROUTE, dev->flags&IFF_LOOPBACK ? RTN_LOCAL :
718 			  RTN_UNICAST, any, ifa->ifa_prefixlen, prim);
719 	else {
720 		prim = inet_ifa_byprefix(in_dev, any, ifa->ifa_mask);
721 		if (prim == NULL) {
722 			printk(KERN_DEBUG "fib_del_ifaddr: bug: prim == NULL\n");
723 			return;
724 		}
725 	}
726 
727 	/* Deletion is more complicated than add.
728 	   We should take care of not to delete too much :-)
729 
730 	   Scan address list to be sure that addresses are really gone.
731 	 */
732 
733 	for (ifa1 = in_dev->ifa_list; ifa1; ifa1 = ifa1->ifa_next) {
734 		if (ifa->ifa_local == ifa1->ifa_local)
735 			ok |= LOCAL_OK;
736 		if (ifa->ifa_broadcast == ifa1->ifa_broadcast)
737 			ok |= BRD_OK;
738 		if (brd == ifa1->ifa_broadcast)
739 			ok |= BRD1_OK;
740 		if (any == ifa1->ifa_broadcast)
741 			ok |= BRD0_OK;
742 	}
743 
744 	if (!(ok&BRD_OK))
745 		fib_magic(RTM_DELROUTE, RTN_BROADCAST, ifa->ifa_broadcast, 32, prim);
746 	if (!(ok&BRD1_OK))
747 		fib_magic(RTM_DELROUTE, RTN_BROADCAST, brd, 32, prim);
748 	if (!(ok&BRD0_OK))
749 		fib_magic(RTM_DELROUTE, RTN_BROADCAST, any, 32, prim);
750 	if (!(ok&LOCAL_OK)) {
751 		fib_magic(RTM_DELROUTE, RTN_LOCAL, ifa->ifa_local, 32, prim);
752 
753 		/* Check, that this local address finally disappeared. */
754 		if (inet_addr_type(ifa->ifa_local) != RTN_LOCAL) {
755 			/* And the last, but not the least thing.
756 			   We must flush stray FIB entries.
757 
758 			   First of all, we scan fib_info list searching
759 			   for stray nexthop entries, then ignite fib_flush.
760 			*/
761 			if (fib_sync_down(ifa->ifa_local, NULL, 0))
762 				fib_flush();
763 		}
764 	}
765 #undef LOCAL_OK
766 #undef BRD_OK
767 #undef BRD0_OK
768 #undef BRD1_OK
769 }
770 
771 static void nl_fib_lookup(struct fib_result_nl *frn, struct fib_table *tb )
772 {
773 
774 	struct fib_result       res;
775 	struct flowi            fl = { .mark = frn->fl_mark,
776 				       .nl_u = { .ip4_u = { .daddr = frn->fl_addr,
777 							    .tos = frn->fl_tos,
778 							    .scope = frn->fl_scope } } };
779 
780 #ifdef CONFIG_IP_MULTIPLE_TABLES
781 	res.r = NULL;
782 #endif
783 
784 	frn->err = -ENOENT;
785 	if (tb) {
786 		local_bh_disable();
787 
788 		frn->tb_id = tb->tb_id;
789 		frn->err = tb->tb_lookup(tb, &fl, &res);
790 
791 		if (!frn->err) {
792 			frn->prefixlen = res.prefixlen;
793 			frn->nh_sel = res.nh_sel;
794 			frn->type = res.type;
795 			frn->scope = res.scope;
796 			fib_res_put(&res);
797 		}
798 		local_bh_enable();
799 	}
800 }
801 
802 static void nl_fib_input(struct sock *sk, int len)
803 {
804 	struct sk_buff *skb = NULL;
805 	struct nlmsghdr *nlh = NULL;
806 	struct fib_result_nl *frn;
807 	u32 pid;
808 	struct fib_table *tb;
809 
810 	skb = skb_dequeue(&sk->sk_receive_queue);
811 	if (skb == NULL)
812 		return;
813 
814 	nlh = nlmsg_hdr(skb);
815 	if (skb->len < NLMSG_SPACE(0) || skb->len < nlh->nlmsg_len ||
816 	    nlh->nlmsg_len < NLMSG_LENGTH(sizeof(*frn))) {
817 		kfree_skb(skb);
818 		return;
819 	}
820 
821 	frn = (struct fib_result_nl *) NLMSG_DATA(nlh);
822 	tb = fib_get_table(frn->tb_id_in);
823 
824 	nl_fib_lookup(frn, tb);
825 
826 	pid = NETLINK_CB(skb).pid;       /* pid of sending process */
827 	NETLINK_CB(skb).pid = 0;         /* from kernel */
828 	NETLINK_CB(skb).dst_group = 0;  /* unicast */
829 	netlink_unicast(sk, skb, pid, MSG_DONTWAIT);
830 }
831 
832 static void nl_fib_lookup_init(void)
833 {
834       netlink_kernel_create(NETLINK_FIB_LOOKUP, 0, nl_fib_input, NULL,
835       			    THIS_MODULE);
836 }
837 
838 static void fib_disable_ip(struct net_device *dev, int force)
839 {
840 	if (fib_sync_down(0, dev, force))
841 		fib_flush();
842 	rt_cache_flush(0);
843 	arp_ifdown(dev);
844 }
845 
846 static int fib_inetaddr_event(struct notifier_block *this, unsigned long event, void *ptr)
847 {
848 	struct in_ifaddr *ifa = (struct in_ifaddr*)ptr;
849 
850 	switch (event) {
851 	case NETDEV_UP:
852 		fib_add_ifaddr(ifa);
853 #ifdef CONFIG_IP_ROUTE_MULTIPATH
854 		fib_sync_up(ifa->ifa_dev->dev);
855 #endif
856 		rt_cache_flush(-1);
857 		break;
858 	case NETDEV_DOWN:
859 		fib_del_ifaddr(ifa);
860 		if (ifa->ifa_dev->ifa_list == NULL) {
861 			/* Last address was deleted from this interface.
862 			   Disable IP.
863 			 */
864 			fib_disable_ip(ifa->ifa_dev->dev, 1);
865 		} else {
866 			rt_cache_flush(-1);
867 		}
868 		break;
869 	}
870 	return NOTIFY_DONE;
871 }
872 
873 static int fib_netdev_event(struct notifier_block *this, unsigned long event, void *ptr)
874 {
875 	struct net_device *dev = ptr;
876 	struct in_device *in_dev = __in_dev_get_rtnl(dev);
877 
878 	if (event == NETDEV_UNREGISTER) {
879 		fib_disable_ip(dev, 2);
880 		return NOTIFY_DONE;
881 	}
882 
883 	if (!in_dev)
884 		return NOTIFY_DONE;
885 
886 	switch (event) {
887 	case NETDEV_UP:
888 		for_ifa(in_dev) {
889 			fib_add_ifaddr(ifa);
890 		} endfor_ifa(in_dev);
891 #ifdef CONFIG_IP_ROUTE_MULTIPATH
892 		fib_sync_up(dev);
893 #endif
894 		rt_cache_flush(-1);
895 		break;
896 	case NETDEV_DOWN:
897 		fib_disable_ip(dev, 0);
898 		break;
899 	case NETDEV_CHANGEMTU:
900 	case NETDEV_CHANGE:
901 		rt_cache_flush(0);
902 		break;
903 	}
904 	return NOTIFY_DONE;
905 }
906 
907 static struct notifier_block fib_inetaddr_notifier = {
908 	.notifier_call =fib_inetaddr_event,
909 };
910 
911 static struct notifier_block fib_netdev_notifier = {
912 	.notifier_call =fib_netdev_event,
913 };
914 
915 void __init ip_fib_init(void)
916 {
917 	unsigned int i;
918 
919 	for (i = 0; i < FIB_TABLE_HASHSZ; i++)
920 		INIT_HLIST_HEAD(&fib_table_hash[i]);
921 #ifndef CONFIG_IP_MULTIPLE_TABLES
922 	ip_fib_local_table = fib_hash_init(RT_TABLE_LOCAL);
923 	hlist_add_head_rcu(&ip_fib_local_table->tb_hlist, &fib_table_hash[0]);
924 	ip_fib_main_table  = fib_hash_init(RT_TABLE_MAIN);
925 	hlist_add_head_rcu(&ip_fib_main_table->tb_hlist, &fib_table_hash[0]);
926 #else
927 	fib4_rules_init();
928 #endif
929 
930 	register_netdevice_notifier(&fib_netdev_notifier);
931 	register_inetaddr_notifier(&fib_inetaddr_notifier);
932 	nl_fib_lookup_init();
933 
934 	rtnl_register(PF_INET, RTM_NEWROUTE, inet_rtm_newroute, NULL);
935 	rtnl_register(PF_INET, RTM_DELROUTE, inet_rtm_delroute, NULL);
936 	rtnl_register(PF_INET, RTM_GETROUTE, NULL, inet_dump_fib);
937 }
938 
939 EXPORT_SYMBOL(inet_addr_type);
940 EXPORT_SYMBOL(ip_dev_find);
941