xref: /linux/net/ipv4/fib_frontend.c (revision c537b994505099b7197e7d3125b942ecbcc51eb6)
1 /*
2  * INET		An implementation of the TCP/IP protocol suite for the LINUX
3  *		operating system.  INET is implemented using the  BSD Socket
4  *		interface as the means of communication with the user level.
5  *
6  *		IPv4 Forwarding Information Base: FIB frontend.
7  *
8  * Version:	$Id: fib_frontend.c,v 1.26 2001/10/31 21:55:54 davem Exp $
9  *
10  * Authors:	Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
11  *
12  *		This program is free software; you can redistribute it and/or
13  *		modify it under the terms of the GNU General Public License
14  *		as published by the Free Software Foundation; either version
15  *		2 of the License, or (at your option) any later version.
16  */
17 
18 #include <linux/module.h>
19 #include <asm/uaccess.h>
20 #include <asm/system.h>
21 #include <linux/bitops.h>
22 #include <linux/capability.h>
23 #include <linux/types.h>
24 #include <linux/kernel.h>
25 #include <linux/mm.h>
26 #include <linux/string.h>
27 #include <linux/socket.h>
28 #include <linux/sockios.h>
29 #include <linux/errno.h>
30 #include <linux/in.h>
31 #include <linux/inet.h>
32 #include <linux/inetdevice.h>
33 #include <linux/netdevice.h>
34 #include <linux/if_addr.h>
35 #include <linux/if_arp.h>
36 #include <linux/skbuff.h>
37 #include <linux/netlink.h>
38 #include <linux/init.h>
39 #include <linux/list.h>
40 
41 #include <net/ip.h>
42 #include <net/protocol.h>
43 #include <net/route.h>
44 #include <net/tcp.h>
45 #include <net/sock.h>
46 #include <net/icmp.h>
47 #include <net/arp.h>
48 #include <net/ip_fib.h>
49 
50 #define FFprint(a...) printk(KERN_DEBUG a)
51 
52 #ifndef CONFIG_IP_MULTIPLE_TABLES
53 
54 struct fib_table *ip_fib_local_table;
55 struct fib_table *ip_fib_main_table;
56 
57 #define FIB_TABLE_HASHSZ 1
58 static struct hlist_head fib_table_hash[FIB_TABLE_HASHSZ];
59 
60 #else
61 
62 #define FIB_TABLE_HASHSZ 256
63 static struct hlist_head fib_table_hash[FIB_TABLE_HASHSZ];
64 
65 struct fib_table *fib_new_table(u32 id)
66 {
67 	struct fib_table *tb;
68 	unsigned int h;
69 
70 	if (id == 0)
71 		id = RT_TABLE_MAIN;
72 	tb = fib_get_table(id);
73 	if (tb)
74 		return tb;
75 	tb = fib_hash_init(id);
76 	if (!tb)
77 		return NULL;
78 	h = id & (FIB_TABLE_HASHSZ - 1);
79 	hlist_add_head_rcu(&tb->tb_hlist, &fib_table_hash[h]);
80 	return tb;
81 }
82 
83 struct fib_table *fib_get_table(u32 id)
84 {
85 	struct fib_table *tb;
86 	struct hlist_node *node;
87 	unsigned int h;
88 
89 	if (id == 0)
90 		id = RT_TABLE_MAIN;
91 	h = id & (FIB_TABLE_HASHSZ - 1);
92 	rcu_read_lock();
93 	hlist_for_each_entry_rcu(tb, node, &fib_table_hash[h], tb_hlist) {
94 		if (tb->tb_id == id) {
95 			rcu_read_unlock();
96 			return tb;
97 		}
98 	}
99 	rcu_read_unlock();
100 	return NULL;
101 }
102 #endif /* CONFIG_IP_MULTIPLE_TABLES */
103 
104 static void fib_flush(void)
105 {
106 	int flushed = 0;
107 	struct fib_table *tb;
108 	struct hlist_node *node;
109 	unsigned int h;
110 
111 	for (h = 0; h < FIB_TABLE_HASHSZ; h++) {
112 		hlist_for_each_entry(tb, node, &fib_table_hash[h], tb_hlist)
113 			flushed += tb->tb_flush(tb);
114 	}
115 
116 	if (flushed)
117 		rt_cache_flush(-1);
118 }
119 
120 /*
121  *	Find the first device with a given source address.
122  */
123 
124 struct net_device * ip_dev_find(__be32 addr)
125 {
126 	struct flowi fl = { .nl_u = { .ip4_u = { .daddr = addr } } };
127 	struct fib_result res;
128 	struct net_device *dev = NULL;
129 
130 #ifdef CONFIG_IP_MULTIPLE_TABLES
131 	res.r = NULL;
132 #endif
133 
134 	if (!ip_fib_local_table ||
135 	    ip_fib_local_table->tb_lookup(ip_fib_local_table, &fl, &res))
136 		return NULL;
137 	if (res.type != RTN_LOCAL)
138 		goto out;
139 	dev = FIB_RES_DEV(res);
140 
141 	if (dev)
142 		dev_hold(dev);
143 out:
144 	fib_res_put(&res);
145 	return dev;
146 }
147 
148 unsigned inet_addr_type(__be32 addr)
149 {
150 	struct flowi		fl = { .nl_u = { .ip4_u = { .daddr = addr } } };
151 	struct fib_result	res;
152 	unsigned ret = RTN_BROADCAST;
153 
154 	if (ZERONET(addr) || BADCLASS(addr))
155 		return RTN_BROADCAST;
156 	if (MULTICAST(addr))
157 		return RTN_MULTICAST;
158 
159 #ifdef CONFIG_IP_MULTIPLE_TABLES
160 	res.r = NULL;
161 #endif
162 
163 	if (ip_fib_local_table) {
164 		ret = RTN_UNICAST;
165 		if (!ip_fib_local_table->tb_lookup(ip_fib_local_table,
166 						   &fl, &res)) {
167 			ret = res.type;
168 			fib_res_put(&res);
169 		}
170 	}
171 	return ret;
172 }
173 
174 /* Given (packet source, input interface) and optional (dst, oif, tos):
175    - (main) check, that source is valid i.e. not broadcast or our local
176      address.
177    - figure out what "logical" interface this packet arrived
178      and calculate "specific destination" address.
179    - check, that packet arrived from expected physical interface.
180  */
181 
182 int fib_validate_source(__be32 src, __be32 dst, u8 tos, int oif,
183 			struct net_device *dev, __be32 *spec_dst, u32 *itag)
184 {
185 	struct in_device *in_dev;
186 	struct flowi fl = { .nl_u = { .ip4_u =
187 				      { .daddr = src,
188 					.saddr = dst,
189 					.tos = tos } },
190 			    .iif = oif };
191 	struct fib_result res;
192 	int no_addr, rpf;
193 	int ret;
194 
195 	no_addr = rpf = 0;
196 	rcu_read_lock();
197 	in_dev = __in_dev_get_rcu(dev);
198 	if (in_dev) {
199 		no_addr = in_dev->ifa_list == NULL;
200 		rpf = IN_DEV_RPFILTER(in_dev);
201 	}
202 	rcu_read_unlock();
203 
204 	if (in_dev == NULL)
205 		goto e_inval;
206 
207 	if (fib_lookup(&fl, &res))
208 		goto last_resort;
209 	if (res.type != RTN_UNICAST)
210 		goto e_inval_res;
211 	*spec_dst = FIB_RES_PREFSRC(res);
212 	fib_combine_itag(itag, &res);
213 #ifdef CONFIG_IP_ROUTE_MULTIPATH
214 	if (FIB_RES_DEV(res) == dev || res.fi->fib_nhs > 1)
215 #else
216 	if (FIB_RES_DEV(res) == dev)
217 #endif
218 	{
219 		ret = FIB_RES_NH(res).nh_scope >= RT_SCOPE_HOST;
220 		fib_res_put(&res);
221 		return ret;
222 	}
223 	fib_res_put(&res);
224 	if (no_addr)
225 		goto last_resort;
226 	if (rpf)
227 		goto e_inval;
228 	fl.oif = dev->ifindex;
229 
230 	ret = 0;
231 	if (fib_lookup(&fl, &res) == 0) {
232 		if (res.type == RTN_UNICAST) {
233 			*spec_dst = FIB_RES_PREFSRC(res);
234 			ret = FIB_RES_NH(res).nh_scope >= RT_SCOPE_HOST;
235 		}
236 		fib_res_put(&res);
237 	}
238 	return ret;
239 
240 last_resort:
241 	if (rpf)
242 		goto e_inval;
243 	*spec_dst = inet_select_addr(dev, 0, RT_SCOPE_UNIVERSE);
244 	*itag = 0;
245 	return 0;
246 
247 e_inval_res:
248 	fib_res_put(&res);
249 e_inval:
250 	return -EINVAL;
251 }
252 
253 #ifndef CONFIG_IP_NOSIOCRT
254 
255 static inline __be32 sk_extract_addr(struct sockaddr *addr)
256 {
257 	return ((struct sockaddr_in *) addr)->sin_addr.s_addr;
258 }
259 
260 static int put_rtax(struct nlattr *mx, int len, int type, u32 value)
261 {
262 	struct nlattr *nla;
263 
264 	nla = (struct nlattr *) ((char *) mx + len);
265 	nla->nla_type = type;
266 	nla->nla_len = nla_attr_size(4);
267 	*(u32 *) nla_data(nla) = value;
268 
269 	return len + nla_total_size(4);
270 }
271 
272 static int rtentry_to_fib_config(int cmd, struct rtentry *rt,
273 				 struct fib_config *cfg)
274 {
275 	__be32 addr;
276 	int plen;
277 
278 	memset(cfg, 0, sizeof(*cfg));
279 
280 	if (rt->rt_dst.sa_family != AF_INET)
281 		return -EAFNOSUPPORT;
282 
283 	/*
284 	 * Check mask for validity:
285 	 * a) it must be contiguous.
286 	 * b) destination must have all host bits clear.
287 	 * c) if application forgot to set correct family (AF_INET),
288 	 *    reject request unless it is absolutely clear i.e.
289 	 *    both family and mask are zero.
290 	 */
291 	plen = 32;
292 	addr = sk_extract_addr(&rt->rt_dst);
293 	if (!(rt->rt_flags & RTF_HOST)) {
294 		__be32 mask = sk_extract_addr(&rt->rt_genmask);
295 
296 		if (rt->rt_genmask.sa_family != AF_INET) {
297 			if (mask || rt->rt_genmask.sa_family)
298 				return -EAFNOSUPPORT;
299 		}
300 
301 		if (bad_mask(mask, addr))
302 			return -EINVAL;
303 
304 		plen = inet_mask_len(mask);
305 	}
306 
307 	cfg->fc_dst_len = plen;
308 	cfg->fc_dst = addr;
309 
310 	if (cmd != SIOCDELRT) {
311 		cfg->fc_nlflags = NLM_F_CREATE;
312 		cfg->fc_protocol = RTPROT_BOOT;
313 	}
314 
315 	if (rt->rt_metric)
316 		cfg->fc_priority = rt->rt_metric - 1;
317 
318 	if (rt->rt_flags & RTF_REJECT) {
319 		cfg->fc_scope = RT_SCOPE_HOST;
320 		cfg->fc_type = RTN_UNREACHABLE;
321 		return 0;
322 	}
323 
324 	cfg->fc_scope = RT_SCOPE_NOWHERE;
325 	cfg->fc_type = RTN_UNICAST;
326 
327 	if (rt->rt_dev) {
328 		char *colon;
329 		struct net_device *dev;
330 		char devname[IFNAMSIZ];
331 
332 		if (copy_from_user(devname, rt->rt_dev, IFNAMSIZ-1))
333 			return -EFAULT;
334 
335 		devname[IFNAMSIZ-1] = 0;
336 		colon = strchr(devname, ':');
337 		if (colon)
338 			*colon = 0;
339 		dev = __dev_get_by_name(devname);
340 		if (!dev)
341 			return -ENODEV;
342 		cfg->fc_oif = dev->ifindex;
343 		if (colon) {
344 			struct in_ifaddr *ifa;
345 			struct in_device *in_dev = __in_dev_get_rtnl(dev);
346 			if (!in_dev)
347 				return -ENODEV;
348 			*colon = ':';
349 			for (ifa = in_dev->ifa_list; ifa; ifa = ifa->ifa_next)
350 				if (strcmp(ifa->ifa_label, devname) == 0)
351 					break;
352 			if (ifa == NULL)
353 				return -ENODEV;
354 			cfg->fc_prefsrc = ifa->ifa_local;
355 		}
356 	}
357 
358 	addr = sk_extract_addr(&rt->rt_gateway);
359 	if (rt->rt_gateway.sa_family == AF_INET && addr) {
360 		cfg->fc_gw = addr;
361 		if (rt->rt_flags & RTF_GATEWAY &&
362 		    inet_addr_type(addr) == RTN_UNICAST)
363 			cfg->fc_scope = RT_SCOPE_UNIVERSE;
364 	}
365 
366 	if (cmd == SIOCDELRT)
367 		return 0;
368 
369 	if (rt->rt_flags & RTF_GATEWAY && !cfg->fc_gw)
370 		return -EINVAL;
371 
372 	if (cfg->fc_scope == RT_SCOPE_NOWHERE)
373 		cfg->fc_scope = RT_SCOPE_LINK;
374 
375 	if (rt->rt_flags & (RTF_MTU | RTF_WINDOW | RTF_IRTT)) {
376 		struct nlattr *mx;
377 		int len = 0;
378 
379 		mx = kzalloc(3 * nla_total_size(4), GFP_KERNEL);
380 		if (mx == NULL)
381 			return -ENOMEM;
382 
383 		if (rt->rt_flags & RTF_MTU)
384 			len = put_rtax(mx, len, RTAX_ADVMSS, rt->rt_mtu - 40);
385 
386 		if (rt->rt_flags & RTF_WINDOW)
387 			len = put_rtax(mx, len, RTAX_WINDOW, rt->rt_window);
388 
389 		if (rt->rt_flags & RTF_IRTT)
390 			len = put_rtax(mx, len, RTAX_RTT, rt->rt_irtt << 3);
391 
392 		cfg->fc_mx = mx;
393 		cfg->fc_mx_len = len;
394 	}
395 
396 	return 0;
397 }
398 
399 /*
400  *	Handle IP routing ioctl calls. These are used to manipulate the routing tables
401  */
402 
403 int ip_rt_ioctl(unsigned int cmd, void __user *arg)
404 {
405 	struct fib_config cfg;
406 	struct rtentry rt;
407 	int err;
408 
409 	switch (cmd) {
410 	case SIOCADDRT:		/* Add a route */
411 	case SIOCDELRT:		/* Delete a route */
412 		if (!capable(CAP_NET_ADMIN))
413 			return -EPERM;
414 
415 		if (copy_from_user(&rt, arg, sizeof(rt)))
416 			return -EFAULT;
417 
418 		rtnl_lock();
419 		err = rtentry_to_fib_config(cmd, &rt, &cfg);
420 		if (err == 0) {
421 			struct fib_table *tb;
422 
423 			if (cmd == SIOCDELRT) {
424 				tb = fib_get_table(cfg.fc_table);
425 				if (tb)
426 					err = tb->tb_delete(tb, &cfg);
427 				else
428 					err = -ESRCH;
429 			} else {
430 				tb = fib_new_table(cfg.fc_table);
431 				if (tb)
432 					err = tb->tb_insert(tb, &cfg);
433 				else
434 					err = -ENOBUFS;
435 			}
436 
437 			/* allocated by rtentry_to_fib_config() */
438 			kfree(cfg.fc_mx);
439 		}
440 		rtnl_unlock();
441 		return err;
442 	}
443 	return -EINVAL;
444 }
445 
446 #else
447 
448 int ip_rt_ioctl(unsigned int cmd, void *arg)
449 {
450 	return -EINVAL;
451 }
452 
453 #endif
454 
455 struct nla_policy rtm_ipv4_policy[RTA_MAX+1] __read_mostly = {
456 	[RTA_DST]		= { .type = NLA_U32 },
457 	[RTA_SRC]		= { .type = NLA_U32 },
458 	[RTA_IIF]		= { .type = NLA_U32 },
459 	[RTA_OIF]		= { .type = NLA_U32 },
460 	[RTA_GATEWAY]		= { .type = NLA_U32 },
461 	[RTA_PRIORITY]		= { .type = NLA_U32 },
462 	[RTA_PREFSRC]		= { .type = NLA_U32 },
463 	[RTA_METRICS]		= { .type = NLA_NESTED },
464 	[RTA_MULTIPATH]		= { .len = sizeof(struct rtnexthop) },
465 	[RTA_PROTOINFO]		= { .type = NLA_U32 },
466 	[RTA_FLOW]		= { .type = NLA_U32 },
467 	[RTA_MP_ALGO]		= { .type = NLA_U32 },
468 };
469 
470 static int rtm_to_fib_config(struct sk_buff *skb, struct nlmsghdr *nlh,
471 			     struct fib_config *cfg)
472 {
473 	struct nlattr *attr;
474 	int err, remaining;
475 	struct rtmsg *rtm;
476 
477 	err = nlmsg_validate(nlh, sizeof(*rtm), RTA_MAX, rtm_ipv4_policy);
478 	if (err < 0)
479 		goto errout;
480 
481 	memset(cfg, 0, sizeof(*cfg));
482 
483 	rtm = nlmsg_data(nlh);
484 	cfg->fc_dst_len = rtm->rtm_dst_len;
485 	cfg->fc_tos = rtm->rtm_tos;
486 	cfg->fc_table = rtm->rtm_table;
487 	cfg->fc_protocol = rtm->rtm_protocol;
488 	cfg->fc_scope = rtm->rtm_scope;
489 	cfg->fc_type = rtm->rtm_type;
490 	cfg->fc_flags = rtm->rtm_flags;
491 	cfg->fc_nlflags = nlh->nlmsg_flags;
492 
493 	cfg->fc_nlinfo.pid = NETLINK_CB(skb).pid;
494 	cfg->fc_nlinfo.nlh = nlh;
495 
496 	nlmsg_for_each_attr(attr, nlh, sizeof(struct rtmsg), remaining) {
497 		switch (attr->nla_type) {
498 		case RTA_DST:
499 			cfg->fc_dst = nla_get_be32(attr);
500 			break;
501 		case RTA_OIF:
502 			cfg->fc_oif = nla_get_u32(attr);
503 			break;
504 		case RTA_GATEWAY:
505 			cfg->fc_gw = nla_get_be32(attr);
506 			break;
507 		case RTA_PRIORITY:
508 			cfg->fc_priority = nla_get_u32(attr);
509 			break;
510 		case RTA_PREFSRC:
511 			cfg->fc_prefsrc = nla_get_be32(attr);
512 			break;
513 		case RTA_METRICS:
514 			cfg->fc_mx = nla_data(attr);
515 			cfg->fc_mx_len = nla_len(attr);
516 			break;
517 		case RTA_MULTIPATH:
518 			cfg->fc_mp = nla_data(attr);
519 			cfg->fc_mp_len = nla_len(attr);
520 			break;
521 		case RTA_FLOW:
522 			cfg->fc_flow = nla_get_u32(attr);
523 			break;
524 		case RTA_MP_ALGO:
525 			cfg->fc_mp_alg = nla_get_u32(attr);
526 			break;
527 		case RTA_TABLE:
528 			cfg->fc_table = nla_get_u32(attr);
529 			break;
530 		}
531 	}
532 
533 	return 0;
534 errout:
535 	return err;
536 }
537 
538 int inet_rtm_delroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
539 {
540 	struct fib_config cfg;
541 	struct fib_table *tb;
542 	int err;
543 
544 	err = rtm_to_fib_config(skb, nlh, &cfg);
545 	if (err < 0)
546 		goto errout;
547 
548 	tb = fib_get_table(cfg.fc_table);
549 	if (tb == NULL) {
550 		err = -ESRCH;
551 		goto errout;
552 	}
553 
554 	err = tb->tb_delete(tb, &cfg);
555 errout:
556 	return err;
557 }
558 
559 int inet_rtm_newroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
560 {
561 	struct fib_config cfg;
562 	struct fib_table *tb;
563 	int err;
564 
565 	err = rtm_to_fib_config(skb, nlh, &cfg);
566 	if (err < 0)
567 		goto errout;
568 
569 	tb = fib_new_table(cfg.fc_table);
570 	if (tb == NULL) {
571 		err = -ENOBUFS;
572 		goto errout;
573 	}
574 
575 	err = tb->tb_insert(tb, &cfg);
576 errout:
577 	return err;
578 }
579 
580 int inet_dump_fib(struct sk_buff *skb, struct netlink_callback *cb)
581 {
582 	unsigned int h, s_h;
583 	unsigned int e = 0, s_e;
584 	struct fib_table *tb;
585 	struct hlist_node *node;
586 	int dumped = 0;
587 
588 	if (nlmsg_len(cb->nlh) >= sizeof(struct rtmsg) &&
589 	    ((struct rtmsg *) nlmsg_data(cb->nlh))->rtm_flags & RTM_F_CLONED)
590 		return ip_rt_dump(skb, cb);
591 
592 	s_h = cb->args[0];
593 	s_e = cb->args[1];
594 
595 	for (h = s_h; h < FIB_TABLE_HASHSZ; h++, s_e = 0) {
596 		e = 0;
597 		hlist_for_each_entry(tb, node, &fib_table_hash[h], tb_hlist) {
598 			if (e < s_e)
599 				goto next;
600 			if (dumped)
601 				memset(&cb->args[2], 0, sizeof(cb->args) -
602 						 2 * sizeof(cb->args[0]));
603 			if (tb->tb_dump(tb, skb, cb) < 0)
604 				goto out;
605 			dumped = 1;
606 next:
607 			e++;
608 		}
609 	}
610 out:
611 	cb->args[1] = e;
612 	cb->args[0] = h;
613 
614 	return skb->len;
615 }
616 
617 /* Prepare and feed intra-kernel routing request.
618    Really, it should be netlink message, but :-( netlink
619    can be not configured, so that we feed it directly
620    to fib engine. It is legal, because all events occur
621    only when netlink is already locked.
622  */
623 
624 static void fib_magic(int cmd, int type, __be32 dst, int dst_len, struct in_ifaddr *ifa)
625 {
626 	struct fib_table *tb;
627 	struct fib_config cfg = {
628 		.fc_protocol = RTPROT_KERNEL,
629 		.fc_type = type,
630 		.fc_dst = dst,
631 		.fc_dst_len = dst_len,
632 		.fc_prefsrc = ifa->ifa_local,
633 		.fc_oif = ifa->ifa_dev->dev->ifindex,
634 		.fc_nlflags = NLM_F_CREATE | NLM_F_APPEND,
635 	};
636 
637 	if (type == RTN_UNICAST)
638 		tb = fib_new_table(RT_TABLE_MAIN);
639 	else
640 		tb = fib_new_table(RT_TABLE_LOCAL);
641 
642 	if (tb == NULL)
643 		return;
644 
645 	cfg.fc_table = tb->tb_id;
646 
647 	if (type != RTN_LOCAL)
648 		cfg.fc_scope = RT_SCOPE_LINK;
649 	else
650 		cfg.fc_scope = RT_SCOPE_HOST;
651 
652 	if (cmd == RTM_NEWROUTE)
653 		tb->tb_insert(tb, &cfg);
654 	else
655 		tb->tb_delete(tb, &cfg);
656 }
657 
658 void fib_add_ifaddr(struct in_ifaddr *ifa)
659 {
660 	struct in_device *in_dev = ifa->ifa_dev;
661 	struct net_device *dev = in_dev->dev;
662 	struct in_ifaddr *prim = ifa;
663 	__be32 mask = ifa->ifa_mask;
664 	__be32 addr = ifa->ifa_local;
665 	__be32 prefix = ifa->ifa_address&mask;
666 
667 	if (ifa->ifa_flags&IFA_F_SECONDARY) {
668 		prim = inet_ifa_byprefix(in_dev, prefix, mask);
669 		if (prim == NULL) {
670 			printk(KERN_DEBUG "fib_add_ifaddr: bug: prim == NULL\n");
671 			return;
672 		}
673 	}
674 
675 	fib_magic(RTM_NEWROUTE, RTN_LOCAL, addr, 32, prim);
676 
677 	if (!(dev->flags&IFF_UP))
678 		return;
679 
680 	/* Add broadcast address, if it is explicitly assigned. */
681 	if (ifa->ifa_broadcast && ifa->ifa_broadcast != htonl(0xFFFFFFFF))
682 		fib_magic(RTM_NEWROUTE, RTN_BROADCAST, ifa->ifa_broadcast, 32, prim);
683 
684 	if (!ZERONET(prefix) && !(ifa->ifa_flags&IFA_F_SECONDARY) &&
685 	    (prefix != addr || ifa->ifa_prefixlen < 32)) {
686 		fib_magic(RTM_NEWROUTE, dev->flags&IFF_LOOPBACK ? RTN_LOCAL :
687 			  RTN_UNICAST, prefix, ifa->ifa_prefixlen, prim);
688 
689 		/* Add network specific broadcasts, when it takes a sense */
690 		if (ifa->ifa_prefixlen < 31) {
691 			fib_magic(RTM_NEWROUTE, RTN_BROADCAST, prefix, 32, prim);
692 			fib_magic(RTM_NEWROUTE, RTN_BROADCAST, prefix|~mask, 32, prim);
693 		}
694 	}
695 }
696 
697 static void fib_del_ifaddr(struct in_ifaddr *ifa)
698 {
699 	struct in_device *in_dev = ifa->ifa_dev;
700 	struct net_device *dev = in_dev->dev;
701 	struct in_ifaddr *ifa1;
702 	struct in_ifaddr *prim = ifa;
703 	__be32 brd = ifa->ifa_address|~ifa->ifa_mask;
704 	__be32 any = ifa->ifa_address&ifa->ifa_mask;
705 #define LOCAL_OK	1
706 #define BRD_OK		2
707 #define BRD0_OK		4
708 #define BRD1_OK		8
709 	unsigned ok = 0;
710 
711 	if (!(ifa->ifa_flags&IFA_F_SECONDARY))
712 		fib_magic(RTM_DELROUTE, dev->flags&IFF_LOOPBACK ? RTN_LOCAL :
713 			  RTN_UNICAST, any, ifa->ifa_prefixlen, prim);
714 	else {
715 		prim = inet_ifa_byprefix(in_dev, any, ifa->ifa_mask);
716 		if (prim == NULL) {
717 			printk(KERN_DEBUG "fib_del_ifaddr: bug: prim == NULL\n");
718 			return;
719 		}
720 	}
721 
722 	/* Deletion is more complicated than add.
723 	   We should take care of not to delete too much :-)
724 
725 	   Scan address list to be sure that addresses are really gone.
726 	 */
727 
728 	for (ifa1 = in_dev->ifa_list; ifa1; ifa1 = ifa1->ifa_next) {
729 		if (ifa->ifa_local == ifa1->ifa_local)
730 			ok |= LOCAL_OK;
731 		if (ifa->ifa_broadcast == ifa1->ifa_broadcast)
732 			ok |= BRD_OK;
733 		if (brd == ifa1->ifa_broadcast)
734 			ok |= BRD1_OK;
735 		if (any == ifa1->ifa_broadcast)
736 			ok |= BRD0_OK;
737 	}
738 
739 	if (!(ok&BRD_OK))
740 		fib_magic(RTM_DELROUTE, RTN_BROADCAST, ifa->ifa_broadcast, 32, prim);
741 	if (!(ok&BRD1_OK))
742 		fib_magic(RTM_DELROUTE, RTN_BROADCAST, brd, 32, prim);
743 	if (!(ok&BRD0_OK))
744 		fib_magic(RTM_DELROUTE, RTN_BROADCAST, any, 32, prim);
745 	if (!(ok&LOCAL_OK)) {
746 		fib_magic(RTM_DELROUTE, RTN_LOCAL, ifa->ifa_local, 32, prim);
747 
748 		/* Check, that this local address finally disappeared. */
749 		if (inet_addr_type(ifa->ifa_local) != RTN_LOCAL) {
750 			/* And the last, but not the least thing.
751 			   We must flush stray FIB entries.
752 
753 			   First of all, we scan fib_info list searching
754 			   for stray nexthop entries, then ignite fib_flush.
755 			*/
756 			if (fib_sync_down(ifa->ifa_local, NULL, 0))
757 				fib_flush();
758 		}
759 	}
760 #undef LOCAL_OK
761 #undef BRD_OK
762 #undef BRD0_OK
763 #undef BRD1_OK
764 }
765 
766 static void nl_fib_lookup(struct fib_result_nl *frn, struct fib_table *tb )
767 {
768 
769 	struct fib_result       res;
770 	struct flowi            fl = { .mark = frn->fl_mark,
771 				       .nl_u = { .ip4_u = { .daddr = frn->fl_addr,
772 							    .tos = frn->fl_tos,
773 							    .scope = frn->fl_scope } } };
774 	if (tb) {
775 		local_bh_disable();
776 
777 		frn->tb_id = tb->tb_id;
778 		frn->err = tb->tb_lookup(tb, &fl, &res);
779 
780 		if (!frn->err) {
781 			frn->prefixlen = res.prefixlen;
782 			frn->nh_sel = res.nh_sel;
783 			frn->type = res.type;
784 			frn->scope = res.scope;
785 		}
786 		local_bh_enable();
787 	}
788 }
789 
790 static void nl_fib_input(struct sock *sk, int len)
791 {
792 	struct sk_buff *skb = NULL;
793 	struct nlmsghdr *nlh = NULL;
794 	struct fib_result_nl *frn;
795 	u32 pid;
796 	struct fib_table *tb;
797 
798 	skb = skb_dequeue(&sk->sk_receive_queue);
799 	nlh = (struct nlmsghdr *)skb->data;
800 	if (skb->len < NLMSG_SPACE(0) || skb->len < nlh->nlmsg_len ||
801 	    nlh->nlmsg_len < NLMSG_LENGTH(sizeof(*frn))) {
802 		kfree_skb(skb);
803 		return;
804 	}
805 
806 	frn = (struct fib_result_nl *) NLMSG_DATA(nlh);
807 	tb = fib_get_table(frn->tb_id_in);
808 
809 	nl_fib_lookup(frn, tb);
810 
811 	pid = nlh->nlmsg_pid;           /*pid of sending process */
812 	NETLINK_CB(skb).pid = 0;         /* from kernel */
813 	NETLINK_CB(skb).dst_group = 0;  /* unicast */
814 	netlink_unicast(sk, skb, pid, MSG_DONTWAIT);
815 }
816 
817 static void nl_fib_lookup_init(void)
818 {
819       netlink_kernel_create(NETLINK_FIB_LOOKUP, 0, nl_fib_input, THIS_MODULE);
820 }
821 
822 static void fib_disable_ip(struct net_device *dev, int force)
823 {
824 	if (fib_sync_down(0, dev, force))
825 		fib_flush();
826 	rt_cache_flush(0);
827 	arp_ifdown(dev);
828 }
829 
830 static int fib_inetaddr_event(struct notifier_block *this, unsigned long event, void *ptr)
831 {
832 	struct in_ifaddr *ifa = (struct in_ifaddr*)ptr;
833 
834 	switch (event) {
835 	case NETDEV_UP:
836 		fib_add_ifaddr(ifa);
837 #ifdef CONFIG_IP_ROUTE_MULTIPATH
838 		fib_sync_up(ifa->ifa_dev->dev);
839 #endif
840 		rt_cache_flush(-1);
841 		break;
842 	case NETDEV_DOWN:
843 		fib_del_ifaddr(ifa);
844 		if (ifa->ifa_dev->ifa_list == NULL) {
845 			/* Last address was deleted from this interface.
846 			   Disable IP.
847 			 */
848 			fib_disable_ip(ifa->ifa_dev->dev, 1);
849 		} else {
850 			rt_cache_flush(-1);
851 		}
852 		break;
853 	}
854 	return NOTIFY_DONE;
855 }
856 
857 static int fib_netdev_event(struct notifier_block *this, unsigned long event, void *ptr)
858 {
859 	struct net_device *dev = ptr;
860 	struct in_device *in_dev = __in_dev_get_rtnl(dev);
861 
862 	if (event == NETDEV_UNREGISTER) {
863 		fib_disable_ip(dev, 2);
864 		return NOTIFY_DONE;
865 	}
866 
867 	if (!in_dev)
868 		return NOTIFY_DONE;
869 
870 	switch (event) {
871 	case NETDEV_UP:
872 		for_ifa(in_dev) {
873 			fib_add_ifaddr(ifa);
874 		} endfor_ifa(in_dev);
875 #ifdef CONFIG_IP_ROUTE_MULTIPATH
876 		fib_sync_up(dev);
877 #endif
878 		rt_cache_flush(-1);
879 		break;
880 	case NETDEV_DOWN:
881 		fib_disable_ip(dev, 0);
882 		break;
883 	case NETDEV_CHANGEMTU:
884 	case NETDEV_CHANGE:
885 		rt_cache_flush(0);
886 		break;
887 	}
888 	return NOTIFY_DONE;
889 }
890 
891 static struct notifier_block fib_inetaddr_notifier = {
892 	.notifier_call =fib_inetaddr_event,
893 };
894 
895 static struct notifier_block fib_netdev_notifier = {
896 	.notifier_call =fib_netdev_event,
897 };
898 
899 void __init ip_fib_init(void)
900 {
901 	unsigned int i;
902 
903 	for (i = 0; i < FIB_TABLE_HASHSZ; i++)
904 		INIT_HLIST_HEAD(&fib_table_hash[i]);
905 #ifndef CONFIG_IP_MULTIPLE_TABLES
906 	ip_fib_local_table = fib_hash_init(RT_TABLE_LOCAL);
907 	hlist_add_head_rcu(&ip_fib_local_table->tb_hlist, &fib_table_hash[0]);
908 	ip_fib_main_table  = fib_hash_init(RT_TABLE_MAIN);
909 	hlist_add_head_rcu(&ip_fib_main_table->tb_hlist, &fib_table_hash[0]);
910 #else
911 	fib4_rules_init();
912 #endif
913 
914 	register_netdevice_notifier(&fib_netdev_notifier);
915 	register_inetaddr_notifier(&fib_inetaddr_notifier);
916 	nl_fib_lookup_init();
917 }
918 
919 EXPORT_SYMBOL(inet_addr_type);
920 EXPORT_SYMBOL(ip_dev_find);
921