xref: /linux/net/ipv4/fib_frontend.c (revision 606d099cdd1080bbb50ea50dc52d98252f8f10a1)
1 /*
2  * INET		An implementation of the TCP/IP protocol suite for the LINUX
3  *		operating system.  INET is implemented using the  BSD Socket
4  *		interface as the means of communication with the user level.
5  *
6  *		IPv4 Forwarding Information Base: FIB frontend.
7  *
8  * Version:	$Id: fib_frontend.c,v 1.26 2001/10/31 21:55:54 davem Exp $
9  *
10  * Authors:	Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
11  *
12  *		This program is free software; you can redistribute it and/or
13  *		modify it under the terms of the GNU General Public License
14  *		as published by the Free Software Foundation; either version
15  *		2 of the License, or (at your option) any later version.
16  */
17 
18 #include <linux/module.h>
19 #include <asm/uaccess.h>
20 #include <asm/system.h>
21 #include <linux/bitops.h>
22 #include <linux/capability.h>
23 #include <linux/types.h>
24 #include <linux/kernel.h>
25 #include <linux/sched.h>
26 #include <linux/mm.h>
27 #include <linux/string.h>
28 #include <linux/socket.h>
29 #include <linux/sockios.h>
30 #include <linux/errno.h>
31 #include <linux/in.h>
32 #include <linux/inet.h>
33 #include <linux/inetdevice.h>
34 #include <linux/netdevice.h>
35 #include <linux/if_addr.h>
36 #include <linux/if_arp.h>
37 #include <linux/skbuff.h>
38 #include <linux/netlink.h>
39 #include <linux/init.h>
40 #include <linux/list.h>
41 
42 #include <net/ip.h>
43 #include <net/protocol.h>
44 #include <net/route.h>
45 #include <net/tcp.h>
46 #include <net/sock.h>
47 #include <net/icmp.h>
48 #include <net/arp.h>
49 #include <net/ip_fib.h>
50 
51 #define FFprint(a...) printk(KERN_DEBUG a)
52 
53 #ifndef CONFIG_IP_MULTIPLE_TABLES
54 
55 struct fib_table *ip_fib_local_table;
56 struct fib_table *ip_fib_main_table;
57 
58 #define FIB_TABLE_HASHSZ 1
59 static struct hlist_head fib_table_hash[FIB_TABLE_HASHSZ];
60 
61 #else
62 
63 #define FIB_TABLE_HASHSZ 256
64 static struct hlist_head fib_table_hash[FIB_TABLE_HASHSZ];
65 
66 struct fib_table *fib_new_table(u32 id)
67 {
68 	struct fib_table *tb;
69 	unsigned int h;
70 
71 	if (id == 0)
72 		id = RT_TABLE_MAIN;
73 	tb = fib_get_table(id);
74 	if (tb)
75 		return tb;
76 	tb = fib_hash_init(id);
77 	if (!tb)
78 		return NULL;
79 	h = id & (FIB_TABLE_HASHSZ - 1);
80 	hlist_add_head_rcu(&tb->tb_hlist, &fib_table_hash[h]);
81 	return tb;
82 }
83 
84 struct fib_table *fib_get_table(u32 id)
85 {
86 	struct fib_table *tb;
87 	struct hlist_node *node;
88 	unsigned int h;
89 
90 	if (id == 0)
91 		id = RT_TABLE_MAIN;
92 	h = id & (FIB_TABLE_HASHSZ - 1);
93 	rcu_read_lock();
94 	hlist_for_each_entry_rcu(tb, node, &fib_table_hash[h], tb_hlist) {
95 		if (tb->tb_id == id) {
96 			rcu_read_unlock();
97 			return tb;
98 		}
99 	}
100 	rcu_read_unlock();
101 	return NULL;
102 }
103 #endif /* CONFIG_IP_MULTIPLE_TABLES */
104 
105 static void fib_flush(void)
106 {
107 	int flushed = 0;
108 	struct fib_table *tb;
109 	struct hlist_node *node;
110 	unsigned int h;
111 
112 	for (h = 0; h < FIB_TABLE_HASHSZ; h++) {
113 		hlist_for_each_entry(tb, node, &fib_table_hash[h], tb_hlist)
114 			flushed += tb->tb_flush(tb);
115 	}
116 
117 	if (flushed)
118 		rt_cache_flush(-1);
119 }
120 
121 /*
122  *	Find the first device with a given source address.
123  */
124 
125 struct net_device * ip_dev_find(__be32 addr)
126 {
127 	struct flowi fl = { .nl_u = { .ip4_u = { .daddr = addr } } };
128 	struct fib_result res;
129 	struct net_device *dev = NULL;
130 
131 #ifdef CONFIG_IP_MULTIPLE_TABLES
132 	res.r = NULL;
133 #endif
134 
135 	if (!ip_fib_local_table ||
136 	    ip_fib_local_table->tb_lookup(ip_fib_local_table, &fl, &res))
137 		return NULL;
138 	if (res.type != RTN_LOCAL)
139 		goto out;
140 	dev = FIB_RES_DEV(res);
141 
142 	if (dev)
143 		dev_hold(dev);
144 out:
145 	fib_res_put(&res);
146 	return dev;
147 }
148 
149 unsigned inet_addr_type(__be32 addr)
150 {
151 	struct flowi		fl = { .nl_u = { .ip4_u = { .daddr = addr } } };
152 	struct fib_result	res;
153 	unsigned ret = RTN_BROADCAST;
154 
155 	if (ZERONET(addr) || BADCLASS(addr))
156 		return RTN_BROADCAST;
157 	if (MULTICAST(addr))
158 		return RTN_MULTICAST;
159 
160 #ifdef CONFIG_IP_MULTIPLE_TABLES
161 	res.r = NULL;
162 #endif
163 
164 	if (ip_fib_local_table) {
165 		ret = RTN_UNICAST;
166 		if (!ip_fib_local_table->tb_lookup(ip_fib_local_table,
167 						   &fl, &res)) {
168 			ret = res.type;
169 			fib_res_put(&res);
170 		}
171 	}
172 	return ret;
173 }
174 
175 /* Given (packet source, input interface) and optional (dst, oif, tos):
176    - (main) check, that source is valid i.e. not broadcast or our local
177      address.
178    - figure out what "logical" interface this packet arrived
179      and calculate "specific destination" address.
180    - check, that packet arrived from expected physical interface.
181  */
182 
183 int fib_validate_source(__be32 src, __be32 dst, u8 tos, int oif,
184 			struct net_device *dev, __be32 *spec_dst, u32 *itag)
185 {
186 	struct in_device *in_dev;
187 	struct flowi fl = { .nl_u = { .ip4_u =
188 				      { .daddr = src,
189 					.saddr = dst,
190 					.tos = tos } },
191 			    .iif = oif };
192 	struct fib_result res;
193 	int no_addr, rpf;
194 	int ret;
195 
196 	no_addr = rpf = 0;
197 	rcu_read_lock();
198 	in_dev = __in_dev_get_rcu(dev);
199 	if (in_dev) {
200 		no_addr = in_dev->ifa_list == NULL;
201 		rpf = IN_DEV_RPFILTER(in_dev);
202 	}
203 	rcu_read_unlock();
204 
205 	if (in_dev == NULL)
206 		goto e_inval;
207 
208 	if (fib_lookup(&fl, &res))
209 		goto last_resort;
210 	if (res.type != RTN_UNICAST)
211 		goto e_inval_res;
212 	*spec_dst = FIB_RES_PREFSRC(res);
213 	fib_combine_itag(itag, &res);
214 #ifdef CONFIG_IP_ROUTE_MULTIPATH
215 	if (FIB_RES_DEV(res) == dev || res.fi->fib_nhs > 1)
216 #else
217 	if (FIB_RES_DEV(res) == dev)
218 #endif
219 	{
220 		ret = FIB_RES_NH(res).nh_scope >= RT_SCOPE_HOST;
221 		fib_res_put(&res);
222 		return ret;
223 	}
224 	fib_res_put(&res);
225 	if (no_addr)
226 		goto last_resort;
227 	if (rpf)
228 		goto e_inval;
229 	fl.oif = dev->ifindex;
230 
231 	ret = 0;
232 	if (fib_lookup(&fl, &res) == 0) {
233 		if (res.type == RTN_UNICAST) {
234 			*spec_dst = FIB_RES_PREFSRC(res);
235 			ret = FIB_RES_NH(res).nh_scope >= RT_SCOPE_HOST;
236 		}
237 		fib_res_put(&res);
238 	}
239 	return ret;
240 
241 last_resort:
242 	if (rpf)
243 		goto e_inval;
244 	*spec_dst = inet_select_addr(dev, 0, RT_SCOPE_UNIVERSE);
245 	*itag = 0;
246 	return 0;
247 
248 e_inval_res:
249 	fib_res_put(&res);
250 e_inval:
251 	return -EINVAL;
252 }
253 
254 #ifndef CONFIG_IP_NOSIOCRT
255 
256 static inline __be32 sk_extract_addr(struct sockaddr *addr)
257 {
258 	return ((struct sockaddr_in *) addr)->sin_addr.s_addr;
259 }
260 
261 static int put_rtax(struct nlattr *mx, int len, int type, u32 value)
262 {
263 	struct nlattr *nla;
264 
265 	nla = (struct nlattr *) ((char *) mx + len);
266 	nla->nla_type = type;
267 	nla->nla_len = nla_attr_size(4);
268 	*(u32 *) nla_data(nla) = value;
269 
270 	return len + nla_total_size(4);
271 }
272 
273 static int rtentry_to_fib_config(int cmd, struct rtentry *rt,
274 				 struct fib_config *cfg)
275 {
276 	__be32 addr;
277 	int plen;
278 
279 	memset(cfg, 0, sizeof(*cfg));
280 
281 	if (rt->rt_dst.sa_family != AF_INET)
282 		return -EAFNOSUPPORT;
283 
284 	/*
285 	 * Check mask for validity:
286 	 * a) it must be contiguous.
287 	 * b) destination must have all host bits clear.
288 	 * c) if application forgot to set correct family (AF_INET),
289 	 *    reject request unless it is absolutely clear i.e.
290 	 *    both family and mask are zero.
291 	 */
292 	plen = 32;
293 	addr = sk_extract_addr(&rt->rt_dst);
294 	if (!(rt->rt_flags & RTF_HOST)) {
295 		__be32 mask = sk_extract_addr(&rt->rt_genmask);
296 
297 		if (rt->rt_genmask.sa_family != AF_INET) {
298 			if (mask || rt->rt_genmask.sa_family)
299 				return -EAFNOSUPPORT;
300 		}
301 
302 		if (bad_mask(mask, addr))
303 			return -EINVAL;
304 
305 		plen = inet_mask_len(mask);
306 	}
307 
308 	cfg->fc_dst_len = plen;
309 	cfg->fc_dst = addr;
310 
311 	if (cmd != SIOCDELRT) {
312 		cfg->fc_nlflags = NLM_F_CREATE;
313 		cfg->fc_protocol = RTPROT_BOOT;
314 	}
315 
316 	if (rt->rt_metric)
317 		cfg->fc_priority = rt->rt_metric - 1;
318 
319 	if (rt->rt_flags & RTF_REJECT) {
320 		cfg->fc_scope = RT_SCOPE_HOST;
321 		cfg->fc_type = RTN_UNREACHABLE;
322 		return 0;
323 	}
324 
325 	cfg->fc_scope = RT_SCOPE_NOWHERE;
326 	cfg->fc_type = RTN_UNICAST;
327 
328 	if (rt->rt_dev) {
329 		char *colon;
330 		struct net_device *dev;
331 		char devname[IFNAMSIZ];
332 
333 		if (copy_from_user(devname, rt->rt_dev, IFNAMSIZ-1))
334 			return -EFAULT;
335 
336 		devname[IFNAMSIZ-1] = 0;
337 		colon = strchr(devname, ':');
338 		if (colon)
339 			*colon = 0;
340 		dev = __dev_get_by_name(devname);
341 		if (!dev)
342 			return -ENODEV;
343 		cfg->fc_oif = dev->ifindex;
344 		if (colon) {
345 			struct in_ifaddr *ifa;
346 			struct in_device *in_dev = __in_dev_get_rtnl(dev);
347 			if (!in_dev)
348 				return -ENODEV;
349 			*colon = ':';
350 			for (ifa = in_dev->ifa_list; ifa; ifa = ifa->ifa_next)
351 				if (strcmp(ifa->ifa_label, devname) == 0)
352 					break;
353 			if (ifa == NULL)
354 				return -ENODEV;
355 			cfg->fc_prefsrc = ifa->ifa_local;
356 		}
357 	}
358 
359 	addr = sk_extract_addr(&rt->rt_gateway);
360 	if (rt->rt_gateway.sa_family == AF_INET && addr) {
361 		cfg->fc_gw = addr;
362 		if (rt->rt_flags & RTF_GATEWAY &&
363 		    inet_addr_type(addr) == RTN_UNICAST)
364 			cfg->fc_scope = RT_SCOPE_UNIVERSE;
365 	}
366 
367 	if (cmd == SIOCDELRT)
368 		return 0;
369 
370 	if (rt->rt_flags & RTF_GATEWAY && !cfg->fc_gw)
371 		return -EINVAL;
372 
373 	if (cfg->fc_scope == RT_SCOPE_NOWHERE)
374 		cfg->fc_scope = RT_SCOPE_LINK;
375 
376 	if (rt->rt_flags & (RTF_MTU | RTF_WINDOW | RTF_IRTT)) {
377 		struct nlattr *mx;
378 		int len = 0;
379 
380 		mx = kzalloc(3 * nla_total_size(4), GFP_KERNEL);
381  		if (mx == NULL)
382 			return -ENOMEM;
383 
384 		if (rt->rt_flags & RTF_MTU)
385 			len = put_rtax(mx, len, RTAX_ADVMSS, rt->rt_mtu - 40);
386 
387 		if (rt->rt_flags & RTF_WINDOW)
388 			len = put_rtax(mx, len, RTAX_WINDOW, rt->rt_window);
389 
390 		if (rt->rt_flags & RTF_IRTT)
391 			len = put_rtax(mx, len, RTAX_RTT, rt->rt_irtt << 3);
392 
393 		cfg->fc_mx = mx;
394 		cfg->fc_mx_len = len;
395 	}
396 
397 	return 0;
398 }
399 
400 /*
401  *	Handle IP routing ioctl calls. These are used to manipulate the routing tables
402  */
403 
404 int ip_rt_ioctl(unsigned int cmd, void __user *arg)
405 {
406 	struct fib_config cfg;
407 	struct rtentry rt;
408 	int err;
409 
410 	switch (cmd) {
411 	case SIOCADDRT:		/* Add a route */
412 	case SIOCDELRT:		/* Delete a route */
413 		if (!capable(CAP_NET_ADMIN))
414 			return -EPERM;
415 
416 		if (copy_from_user(&rt, arg, sizeof(rt)))
417 			return -EFAULT;
418 
419 		rtnl_lock();
420 		err = rtentry_to_fib_config(cmd, &rt, &cfg);
421 		if (err == 0) {
422 			struct fib_table *tb;
423 
424 			if (cmd == SIOCDELRT) {
425 				tb = fib_get_table(cfg.fc_table);
426 				if (tb)
427 					err = tb->tb_delete(tb, &cfg);
428 				else
429 					err = -ESRCH;
430 			} else {
431 				tb = fib_new_table(cfg.fc_table);
432 				if (tb)
433 					err = tb->tb_insert(tb, &cfg);
434 				else
435 					err = -ENOBUFS;
436 			}
437 
438 			/* allocated by rtentry_to_fib_config() */
439 			kfree(cfg.fc_mx);
440 		}
441 		rtnl_unlock();
442 		return err;
443 	}
444 	return -EINVAL;
445 }
446 
447 #else
448 
449 int ip_rt_ioctl(unsigned int cmd, void *arg)
450 {
451 	return -EINVAL;
452 }
453 
454 #endif
455 
456 struct nla_policy rtm_ipv4_policy[RTA_MAX+1] __read_mostly = {
457 	[RTA_DST]		= { .type = NLA_U32 },
458 	[RTA_SRC]		= { .type = NLA_U32 },
459 	[RTA_IIF]		= { .type = NLA_U32 },
460 	[RTA_OIF]		= { .type = NLA_U32 },
461 	[RTA_GATEWAY]		= { .type = NLA_U32 },
462 	[RTA_PRIORITY]		= { .type = NLA_U32 },
463 	[RTA_PREFSRC]		= { .type = NLA_U32 },
464 	[RTA_METRICS]		= { .type = NLA_NESTED },
465 	[RTA_MULTIPATH]		= { .len = sizeof(struct rtnexthop) },
466 	[RTA_PROTOINFO]		= { .type = NLA_U32 },
467 	[RTA_FLOW]		= { .type = NLA_U32 },
468 	[RTA_MP_ALGO]		= { .type = NLA_U32 },
469 };
470 
471 static int rtm_to_fib_config(struct sk_buff *skb, struct nlmsghdr *nlh,
472 			     struct fib_config *cfg)
473 {
474 	struct nlattr *attr;
475 	int err, remaining;
476 	struct rtmsg *rtm;
477 
478 	err = nlmsg_validate(nlh, sizeof(*rtm), RTA_MAX, rtm_ipv4_policy);
479 	if (err < 0)
480 		goto errout;
481 
482 	memset(cfg, 0, sizeof(*cfg));
483 
484 	rtm = nlmsg_data(nlh);
485 	cfg->fc_dst_len = rtm->rtm_dst_len;
486 	cfg->fc_tos = rtm->rtm_tos;
487 	cfg->fc_table = rtm->rtm_table;
488 	cfg->fc_protocol = rtm->rtm_protocol;
489 	cfg->fc_scope = rtm->rtm_scope;
490 	cfg->fc_type = rtm->rtm_type;
491 	cfg->fc_flags = rtm->rtm_flags;
492 	cfg->fc_nlflags = nlh->nlmsg_flags;
493 
494 	cfg->fc_nlinfo.pid = NETLINK_CB(skb).pid;
495 	cfg->fc_nlinfo.nlh = nlh;
496 
497 	nlmsg_for_each_attr(attr, nlh, sizeof(struct rtmsg), remaining) {
498 		switch (attr->nla_type) {
499 		case RTA_DST:
500 			cfg->fc_dst = nla_get_be32(attr);
501 			break;
502 		case RTA_OIF:
503 			cfg->fc_oif = nla_get_u32(attr);
504 			break;
505 		case RTA_GATEWAY:
506 			cfg->fc_gw = nla_get_be32(attr);
507 			break;
508 		case RTA_PRIORITY:
509 			cfg->fc_priority = nla_get_u32(attr);
510 			break;
511 		case RTA_PREFSRC:
512 			cfg->fc_prefsrc = nla_get_be32(attr);
513 			break;
514 		case RTA_METRICS:
515 			cfg->fc_mx = nla_data(attr);
516 			cfg->fc_mx_len = nla_len(attr);
517 			break;
518 		case RTA_MULTIPATH:
519 			cfg->fc_mp = nla_data(attr);
520 			cfg->fc_mp_len = nla_len(attr);
521 			break;
522 		case RTA_FLOW:
523 			cfg->fc_flow = nla_get_u32(attr);
524 			break;
525 		case RTA_MP_ALGO:
526 			cfg->fc_mp_alg = nla_get_u32(attr);
527 			break;
528 		case RTA_TABLE:
529 			cfg->fc_table = nla_get_u32(attr);
530 			break;
531 		}
532 	}
533 
534 	return 0;
535 errout:
536 	return err;
537 }
538 
539 int inet_rtm_delroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
540 {
541 	struct fib_config cfg;
542 	struct fib_table *tb;
543 	int err;
544 
545 	err = rtm_to_fib_config(skb, nlh, &cfg);
546 	if (err < 0)
547 		goto errout;
548 
549 	tb = fib_get_table(cfg.fc_table);
550 	if (tb == NULL) {
551 		err = -ESRCH;
552 		goto errout;
553 	}
554 
555 	err = tb->tb_delete(tb, &cfg);
556 errout:
557 	return err;
558 }
559 
560 int inet_rtm_newroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
561 {
562 	struct fib_config cfg;
563 	struct fib_table *tb;
564 	int err;
565 
566 	err = rtm_to_fib_config(skb, nlh, &cfg);
567 	if (err < 0)
568 		goto errout;
569 
570 	tb = fib_new_table(cfg.fc_table);
571 	if (tb == NULL) {
572 		err = -ENOBUFS;
573 		goto errout;
574 	}
575 
576 	err = tb->tb_insert(tb, &cfg);
577 errout:
578 	return err;
579 }
580 
581 int inet_dump_fib(struct sk_buff *skb, struct netlink_callback *cb)
582 {
583 	unsigned int h, s_h;
584 	unsigned int e = 0, s_e;
585 	struct fib_table *tb;
586 	struct hlist_node *node;
587 	int dumped = 0;
588 
589 	if (nlmsg_len(cb->nlh) >= sizeof(struct rtmsg) &&
590 	    ((struct rtmsg *) nlmsg_data(cb->nlh))->rtm_flags & RTM_F_CLONED)
591 		return ip_rt_dump(skb, cb);
592 
593 	s_h = cb->args[0];
594 	s_e = cb->args[1];
595 
596 	for (h = s_h; h < FIB_TABLE_HASHSZ; h++, s_e = 0) {
597 		e = 0;
598 		hlist_for_each_entry(tb, node, &fib_table_hash[h], tb_hlist) {
599 			if (e < s_e)
600 				goto next;
601 			if (dumped)
602 				memset(&cb->args[2], 0, sizeof(cb->args) -
603 				                 2 * sizeof(cb->args[0]));
604 			if (tb->tb_dump(tb, skb, cb) < 0)
605 				goto out;
606 			dumped = 1;
607 next:
608 			e++;
609 		}
610 	}
611 out:
612 	cb->args[1] = e;
613 	cb->args[0] = h;
614 
615 	return skb->len;
616 }
617 
618 /* Prepare and feed intra-kernel routing request.
619    Really, it should be netlink message, but :-( netlink
620    can be not configured, so that we feed it directly
621    to fib engine. It is legal, because all events occur
622    only when netlink is already locked.
623  */
624 
625 static void fib_magic(int cmd, int type, __be32 dst, int dst_len, struct in_ifaddr *ifa)
626 {
627 	struct fib_table *tb;
628 	struct fib_config cfg = {
629 		.fc_protocol = RTPROT_KERNEL,
630 		.fc_type = type,
631 		.fc_dst = dst,
632 		.fc_dst_len = dst_len,
633 		.fc_prefsrc = ifa->ifa_local,
634 		.fc_oif = ifa->ifa_dev->dev->ifindex,
635 		.fc_nlflags = NLM_F_CREATE | NLM_F_APPEND,
636 	};
637 
638 	if (type == RTN_UNICAST)
639 		tb = fib_new_table(RT_TABLE_MAIN);
640 	else
641 		tb = fib_new_table(RT_TABLE_LOCAL);
642 
643 	if (tb == NULL)
644 		return;
645 
646 	cfg.fc_table = tb->tb_id;
647 
648 	if (type != RTN_LOCAL)
649 		cfg.fc_scope = RT_SCOPE_LINK;
650 	else
651 		cfg.fc_scope = RT_SCOPE_HOST;
652 
653 	if (cmd == RTM_NEWROUTE)
654 		tb->tb_insert(tb, &cfg);
655 	else
656 		tb->tb_delete(tb, &cfg);
657 }
658 
659 void fib_add_ifaddr(struct in_ifaddr *ifa)
660 {
661 	struct in_device *in_dev = ifa->ifa_dev;
662 	struct net_device *dev = in_dev->dev;
663 	struct in_ifaddr *prim = ifa;
664 	__be32 mask = ifa->ifa_mask;
665 	__be32 addr = ifa->ifa_local;
666 	__be32 prefix = ifa->ifa_address&mask;
667 
668 	if (ifa->ifa_flags&IFA_F_SECONDARY) {
669 		prim = inet_ifa_byprefix(in_dev, prefix, mask);
670 		if (prim == NULL) {
671 			printk(KERN_DEBUG "fib_add_ifaddr: bug: prim == NULL\n");
672 			return;
673 		}
674 	}
675 
676 	fib_magic(RTM_NEWROUTE, RTN_LOCAL, addr, 32, prim);
677 
678 	if (!(dev->flags&IFF_UP))
679 		return;
680 
681 	/* Add broadcast address, if it is explicitly assigned. */
682 	if (ifa->ifa_broadcast && ifa->ifa_broadcast != htonl(0xFFFFFFFF))
683 		fib_magic(RTM_NEWROUTE, RTN_BROADCAST, ifa->ifa_broadcast, 32, prim);
684 
685 	if (!ZERONET(prefix) && !(ifa->ifa_flags&IFA_F_SECONDARY) &&
686 	    (prefix != addr || ifa->ifa_prefixlen < 32)) {
687 		fib_magic(RTM_NEWROUTE, dev->flags&IFF_LOOPBACK ? RTN_LOCAL :
688 			  RTN_UNICAST, prefix, ifa->ifa_prefixlen, prim);
689 
690 		/* Add network specific broadcasts, when it takes a sense */
691 		if (ifa->ifa_prefixlen < 31) {
692 			fib_magic(RTM_NEWROUTE, RTN_BROADCAST, prefix, 32, prim);
693 			fib_magic(RTM_NEWROUTE, RTN_BROADCAST, prefix|~mask, 32, prim);
694 		}
695 	}
696 }
697 
698 static void fib_del_ifaddr(struct in_ifaddr *ifa)
699 {
700 	struct in_device *in_dev = ifa->ifa_dev;
701 	struct net_device *dev = in_dev->dev;
702 	struct in_ifaddr *ifa1;
703 	struct in_ifaddr *prim = ifa;
704 	__be32 brd = ifa->ifa_address|~ifa->ifa_mask;
705 	__be32 any = ifa->ifa_address&ifa->ifa_mask;
706 #define LOCAL_OK	1
707 #define BRD_OK		2
708 #define BRD0_OK		4
709 #define BRD1_OK		8
710 	unsigned ok = 0;
711 
712 	if (!(ifa->ifa_flags&IFA_F_SECONDARY))
713 		fib_magic(RTM_DELROUTE, dev->flags&IFF_LOOPBACK ? RTN_LOCAL :
714 			  RTN_UNICAST, any, ifa->ifa_prefixlen, prim);
715 	else {
716 		prim = inet_ifa_byprefix(in_dev, any, ifa->ifa_mask);
717 		if (prim == NULL) {
718 			printk(KERN_DEBUG "fib_del_ifaddr: bug: prim == NULL\n");
719 			return;
720 		}
721 	}
722 
723 	/* Deletion is more complicated than add.
724 	   We should take care of not to delete too much :-)
725 
726 	   Scan address list to be sure that addresses are really gone.
727 	 */
728 
729 	for (ifa1 = in_dev->ifa_list; ifa1; ifa1 = ifa1->ifa_next) {
730 		if (ifa->ifa_local == ifa1->ifa_local)
731 			ok |= LOCAL_OK;
732 		if (ifa->ifa_broadcast == ifa1->ifa_broadcast)
733 			ok |= BRD_OK;
734 		if (brd == ifa1->ifa_broadcast)
735 			ok |= BRD1_OK;
736 		if (any == ifa1->ifa_broadcast)
737 			ok |= BRD0_OK;
738 	}
739 
740 	if (!(ok&BRD_OK))
741 		fib_magic(RTM_DELROUTE, RTN_BROADCAST, ifa->ifa_broadcast, 32, prim);
742 	if (!(ok&BRD1_OK))
743 		fib_magic(RTM_DELROUTE, RTN_BROADCAST, brd, 32, prim);
744 	if (!(ok&BRD0_OK))
745 		fib_magic(RTM_DELROUTE, RTN_BROADCAST, any, 32, prim);
746 	if (!(ok&LOCAL_OK)) {
747 		fib_magic(RTM_DELROUTE, RTN_LOCAL, ifa->ifa_local, 32, prim);
748 
749 		/* Check, that this local address finally disappeared. */
750 		if (inet_addr_type(ifa->ifa_local) != RTN_LOCAL) {
751 			/* And the last, but not the least thing.
752 			   We must flush stray FIB entries.
753 
754 			   First of all, we scan fib_info list searching
755 			   for stray nexthop entries, then ignite fib_flush.
756 			*/
757 			if (fib_sync_down(ifa->ifa_local, NULL, 0))
758 				fib_flush();
759 		}
760 	}
761 #undef LOCAL_OK
762 #undef BRD_OK
763 #undef BRD0_OK
764 #undef BRD1_OK
765 }
766 
767 static void nl_fib_lookup(struct fib_result_nl *frn, struct fib_table *tb )
768 {
769 
770 	struct fib_result       res;
771 	struct flowi            fl = { .mark = frn->fl_mark,
772 				       .nl_u = { .ip4_u = { .daddr = frn->fl_addr,
773 							    .tos = frn->fl_tos,
774 							    .scope = frn->fl_scope } } };
775 	if (tb) {
776 		local_bh_disable();
777 
778 		frn->tb_id = tb->tb_id;
779 		frn->err = tb->tb_lookup(tb, &fl, &res);
780 
781 		if (!frn->err) {
782 			frn->prefixlen = res.prefixlen;
783 			frn->nh_sel = res.nh_sel;
784 			frn->type = res.type;
785 			frn->scope = res.scope;
786 		}
787 		local_bh_enable();
788 	}
789 }
790 
791 static void nl_fib_input(struct sock *sk, int len)
792 {
793 	struct sk_buff *skb = NULL;
794         struct nlmsghdr *nlh = NULL;
795 	struct fib_result_nl *frn;
796 	u32 pid;
797 	struct fib_table *tb;
798 
799 	skb = skb_dequeue(&sk->sk_receive_queue);
800 	nlh = (struct nlmsghdr *)skb->data;
801 	if (skb->len < NLMSG_SPACE(0) || skb->len < nlh->nlmsg_len ||
802 	    nlh->nlmsg_len < NLMSG_LENGTH(sizeof(*frn))) {
803 		kfree_skb(skb);
804 		return;
805 	}
806 
807 	frn = (struct fib_result_nl *) NLMSG_DATA(nlh);
808 	tb = fib_get_table(frn->tb_id_in);
809 
810 	nl_fib_lookup(frn, tb);
811 
812 	pid = nlh->nlmsg_pid;           /*pid of sending process */
813 	NETLINK_CB(skb).pid = 0;         /* from kernel */
814 	NETLINK_CB(skb).dst_group = 0;  /* unicast */
815 	netlink_unicast(sk, skb, pid, MSG_DONTWAIT);
816 }
817 
818 static void nl_fib_lookup_init(void)
819 {
820       netlink_kernel_create(NETLINK_FIB_LOOKUP, 0, nl_fib_input, THIS_MODULE);
821 }
822 
823 static void fib_disable_ip(struct net_device *dev, int force)
824 {
825 	if (fib_sync_down(0, dev, force))
826 		fib_flush();
827 	rt_cache_flush(0);
828 	arp_ifdown(dev);
829 }
830 
831 static int fib_inetaddr_event(struct notifier_block *this, unsigned long event, void *ptr)
832 {
833 	struct in_ifaddr *ifa = (struct in_ifaddr*)ptr;
834 
835 	switch (event) {
836 	case NETDEV_UP:
837 		fib_add_ifaddr(ifa);
838 #ifdef CONFIG_IP_ROUTE_MULTIPATH
839 		fib_sync_up(ifa->ifa_dev->dev);
840 #endif
841 		rt_cache_flush(-1);
842 		break;
843 	case NETDEV_DOWN:
844 		fib_del_ifaddr(ifa);
845 		if (ifa->ifa_dev->ifa_list == NULL) {
846 			/* Last address was deleted from this interface.
847 			   Disable IP.
848 			 */
849 			fib_disable_ip(ifa->ifa_dev->dev, 1);
850 		} else {
851 			rt_cache_flush(-1);
852 		}
853 		break;
854 	}
855 	return NOTIFY_DONE;
856 }
857 
858 static int fib_netdev_event(struct notifier_block *this, unsigned long event, void *ptr)
859 {
860 	struct net_device *dev = ptr;
861 	struct in_device *in_dev = __in_dev_get_rtnl(dev);
862 
863 	if (event == NETDEV_UNREGISTER) {
864 		fib_disable_ip(dev, 2);
865 		return NOTIFY_DONE;
866 	}
867 
868 	if (!in_dev)
869 		return NOTIFY_DONE;
870 
871 	switch (event) {
872 	case NETDEV_UP:
873 		for_ifa(in_dev) {
874 			fib_add_ifaddr(ifa);
875 		} endfor_ifa(in_dev);
876 #ifdef CONFIG_IP_ROUTE_MULTIPATH
877 		fib_sync_up(dev);
878 #endif
879 		rt_cache_flush(-1);
880 		break;
881 	case NETDEV_DOWN:
882 		fib_disable_ip(dev, 0);
883 		break;
884 	case NETDEV_CHANGEMTU:
885 	case NETDEV_CHANGE:
886 		rt_cache_flush(0);
887 		break;
888 	}
889 	return NOTIFY_DONE;
890 }
891 
892 static struct notifier_block fib_inetaddr_notifier = {
893 	.notifier_call =fib_inetaddr_event,
894 };
895 
896 static struct notifier_block fib_netdev_notifier = {
897 	.notifier_call =fib_netdev_event,
898 };
899 
900 void __init ip_fib_init(void)
901 {
902 	unsigned int i;
903 
904 	for (i = 0; i < FIB_TABLE_HASHSZ; i++)
905 		INIT_HLIST_HEAD(&fib_table_hash[i]);
906 #ifndef CONFIG_IP_MULTIPLE_TABLES
907 	ip_fib_local_table = fib_hash_init(RT_TABLE_LOCAL);
908 	hlist_add_head_rcu(&ip_fib_local_table->tb_hlist, &fib_table_hash[0]);
909 	ip_fib_main_table  = fib_hash_init(RT_TABLE_MAIN);
910 	hlist_add_head_rcu(&ip_fib_main_table->tb_hlist, &fib_table_hash[0]);
911 #else
912 	fib4_rules_init();
913 #endif
914 
915 	register_netdevice_notifier(&fib_netdev_notifier);
916 	register_inetaddr_notifier(&fib_inetaddr_notifier);
917 	nl_fib_lookup_init();
918 }
919 
920 EXPORT_SYMBOL(inet_addr_type);
921 EXPORT_SYMBOL(ip_dev_find);
922