xref: /linux/net/ipv4/fib_frontend.c (revision 98366c20a275e957416e9516db5dcb7195b4e101)
1 /*
2  * INET		An implementation of the TCP/IP protocol suite for the LINUX
3  *		operating system.  INET is implemented using the  BSD Socket
4  *		interface as the means of communication with the user level.
5  *
6  *		IPv4 Forwarding Information Base: FIB frontend.
7  *
8  * Version:	$Id: fib_frontend.c,v 1.26 2001/10/31 21:55:54 davem Exp $
9  *
10  * Authors:	Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
11  *
12  *		This program is free software; you can redistribute it and/or
13  *		modify it under the terms of the GNU General Public License
14  *		as published by the Free Software Foundation; either version
15  *		2 of the License, or (at your option) any later version.
16  */
17 
18 #include <linux/module.h>
19 #include <asm/uaccess.h>
20 #include <asm/system.h>
21 #include <linux/bitops.h>
22 #include <linux/capability.h>
23 #include <linux/types.h>
24 #include <linux/kernel.h>
25 #include <linux/mm.h>
26 #include <linux/string.h>
27 #include <linux/socket.h>
28 #include <linux/sockios.h>
29 #include <linux/errno.h>
30 #include <linux/in.h>
31 #include <linux/inet.h>
32 #include <linux/inetdevice.h>
33 #include <linux/netdevice.h>
34 #include <linux/if_addr.h>
35 #include <linux/if_arp.h>
36 #include <linux/skbuff.h>
37 #include <linux/init.h>
38 #include <linux/list.h>
39 
40 #include <net/ip.h>
41 #include <net/protocol.h>
42 #include <net/route.h>
43 #include <net/tcp.h>
44 #include <net/sock.h>
45 #include <net/icmp.h>
46 #include <net/arp.h>
47 #include <net/ip_fib.h>
48 #include <net/rtnetlink.h>
49 
50 #define FFprint(a...) printk(KERN_DEBUG a)
51 
52 static struct sock *fibnl;
53 
54 #ifndef CONFIG_IP_MULTIPLE_TABLES
55 
56 struct fib_table *ip_fib_local_table;
57 struct fib_table *ip_fib_main_table;
58 
59 #define FIB_TABLE_HASHSZ 1
60 static struct hlist_head fib_table_hash[FIB_TABLE_HASHSZ];
61 
62 #else
63 
64 #define FIB_TABLE_HASHSZ 256
65 static struct hlist_head fib_table_hash[FIB_TABLE_HASHSZ];
66 
67 struct fib_table *fib_new_table(u32 id)
68 {
69 	struct fib_table *tb;
70 	unsigned int h;
71 
72 	if (id == 0)
73 		id = RT_TABLE_MAIN;
74 	tb = fib_get_table(id);
75 	if (tb)
76 		return tb;
77 	tb = fib_hash_init(id);
78 	if (!tb)
79 		return NULL;
80 	h = id & (FIB_TABLE_HASHSZ - 1);
81 	hlist_add_head_rcu(&tb->tb_hlist, &fib_table_hash[h]);
82 	return tb;
83 }
84 
85 struct fib_table *fib_get_table(u32 id)
86 {
87 	struct fib_table *tb;
88 	struct hlist_node *node;
89 	unsigned int h;
90 
91 	if (id == 0)
92 		id = RT_TABLE_MAIN;
93 	h = id & (FIB_TABLE_HASHSZ - 1);
94 	rcu_read_lock();
95 	hlist_for_each_entry_rcu(tb, node, &fib_table_hash[h], tb_hlist) {
96 		if (tb->tb_id == id) {
97 			rcu_read_unlock();
98 			return tb;
99 		}
100 	}
101 	rcu_read_unlock();
102 	return NULL;
103 }
104 #endif /* CONFIG_IP_MULTIPLE_TABLES */
105 
106 static void fib_flush(void)
107 {
108 	int flushed = 0;
109 	struct fib_table *tb;
110 	struct hlist_node *node;
111 	unsigned int h;
112 
113 	for (h = 0; h < FIB_TABLE_HASHSZ; h++) {
114 		hlist_for_each_entry(tb, node, &fib_table_hash[h], tb_hlist)
115 			flushed += tb->tb_flush(tb);
116 	}
117 
118 	if (flushed)
119 		rt_cache_flush(-1);
120 }
121 
122 /*
123  *	Find the first device with a given source address.
124  */
125 
126 struct net_device * ip_dev_find(__be32 addr)
127 {
128 	struct flowi fl = { .nl_u = { .ip4_u = { .daddr = addr } } };
129 	struct fib_result res;
130 	struct net_device *dev = NULL;
131 	struct fib_table *local_table;
132 
133 #ifdef CONFIG_IP_MULTIPLE_TABLES
134 	res.r = NULL;
135 #endif
136 
137 	local_table = fib_get_table(RT_TABLE_LOCAL);
138 	if (!local_table || local_table->tb_lookup(local_table, &fl, &res))
139 		return NULL;
140 	if (res.type != RTN_LOCAL)
141 		goto out;
142 	dev = FIB_RES_DEV(res);
143 
144 	if (dev)
145 		dev_hold(dev);
146 out:
147 	fib_res_put(&res);
148 	return dev;
149 }
150 
151 unsigned inet_addr_type(__be32 addr)
152 {
153 	struct flowi		fl = { .nl_u = { .ip4_u = { .daddr = addr } } };
154 	struct fib_result	res;
155 	unsigned ret = RTN_BROADCAST;
156 	struct fib_table *local_table;
157 
158 	if (ZERONET(addr) || BADCLASS(addr))
159 		return RTN_BROADCAST;
160 	if (MULTICAST(addr))
161 		return RTN_MULTICAST;
162 
163 #ifdef CONFIG_IP_MULTIPLE_TABLES
164 	res.r = NULL;
165 #endif
166 
167 	local_table = fib_get_table(RT_TABLE_LOCAL);
168 	if (local_table) {
169 		ret = RTN_UNICAST;
170 		if (!local_table->tb_lookup(local_table, &fl, &res)) {
171 			ret = res.type;
172 			fib_res_put(&res);
173 		}
174 	}
175 	return ret;
176 }
177 
178 /* Given (packet source, input interface) and optional (dst, oif, tos):
179    - (main) check, that source is valid i.e. not broadcast or our local
180      address.
181    - figure out what "logical" interface this packet arrived
182      and calculate "specific destination" address.
183    - check, that packet arrived from expected physical interface.
184  */
185 
186 int fib_validate_source(__be32 src, __be32 dst, u8 tos, int oif,
187 			struct net_device *dev, __be32 *spec_dst, u32 *itag)
188 {
189 	struct in_device *in_dev;
190 	struct flowi fl = { .nl_u = { .ip4_u =
191 				      { .daddr = src,
192 					.saddr = dst,
193 					.tos = tos } },
194 			    .iif = oif };
195 	struct fib_result res;
196 	int no_addr, rpf;
197 	int ret;
198 
199 	no_addr = rpf = 0;
200 	rcu_read_lock();
201 	in_dev = __in_dev_get_rcu(dev);
202 	if (in_dev) {
203 		no_addr = in_dev->ifa_list == NULL;
204 		rpf = IN_DEV_RPFILTER(in_dev);
205 	}
206 	rcu_read_unlock();
207 
208 	if (in_dev == NULL)
209 		goto e_inval;
210 
211 	if (fib_lookup(&fl, &res))
212 		goto last_resort;
213 	if (res.type != RTN_UNICAST)
214 		goto e_inval_res;
215 	*spec_dst = FIB_RES_PREFSRC(res);
216 	fib_combine_itag(itag, &res);
217 #ifdef CONFIG_IP_ROUTE_MULTIPATH
218 	if (FIB_RES_DEV(res) == dev || res.fi->fib_nhs > 1)
219 #else
220 	if (FIB_RES_DEV(res) == dev)
221 #endif
222 	{
223 		ret = FIB_RES_NH(res).nh_scope >= RT_SCOPE_HOST;
224 		fib_res_put(&res);
225 		return ret;
226 	}
227 	fib_res_put(&res);
228 	if (no_addr)
229 		goto last_resort;
230 	if (rpf)
231 		goto e_inval;
232 	fl.oif = dev->ifindex;
233 
234 	ret = 0;
235 	if (fib_lookup(&fl, &res) == 0) {
236 		if (res.type == RTN_UNICAST) {
237 			*spec_dst = FIB_RES_PREFSRC(res);
238 			ret = FIB_RES_NH(res).nh_scope >= RT_SCOPE_HOST;
239 		}
240 		fib_res_put(&res);
241 	}
242 	return ret;
243 
244 last_resort:
245 	if (rpf)
246 		goto e_inval;
247 	*spec_dst = inet_select_addr(dev, 0, RT_SCOPE_UNIVERSE);
248 	*itag = 0;
249 	return 0;
250 
251 e_inval_res:
252 	fib_res_put(&res);
253 e_inval:
254 	return -EINVAL;
255 }
256 
257 static inline __be32 sk_extract_addr(struct sockaddr *addr)
258 {
259 	return ((struct sockaddr_in *) addr)->sin_addr.s_addr;
260 }
261 
262 static int put_rtax(struct nlattr *mx, int len, int type, u32 value)
263 {
264 	struct nlattr *nla;
265 
266 	nla = (struct nlattr *) ((char *) mx + len);
267 	nla->nla_type = type;
268 	nla->nla_len = nla_attr_size(4);
269 	*(u32 *) nla_data(nla) = value;
270 
271 	return len + nla_total_size(4);
272 }
273 
274 static int rtentry_to_fib_config(int cmd, struct rtentry *rt,
275 				 struct fib_config *cfg)
276 {
277 	__be32 addr;
278 	int plen;
279 
280 	memset(cfg, 0, sizeof(*cfg));
281 
282 	if (rt->rt_dst.sa_family != AF_INET)
283 		return -EAFNOSUPPORT;
284 
285 	/*
286 	 * Check mask for validity:
287 	 * a) it must be contiguous.
288 	 * b) destination must have all host bits clear.
289 	 * c) if application forgot to set correct family (AF_INET),
290 	 *    reject request unless it is absolutely clear i.e.
291 	 *    both family and mask are zero.
292 	 */
293 	plen = 32;
294 	addr = sk_extract_addr(&rt->rt_dst);
295 	if (!(rt->rt_flags & RTF_HOST)) {
296 		__be32 mask = sk_extract_addr(&rt->rt_genmask);
297 
298 		if (rt->rt_genmask.sa_family != AF_INET) {
299 			if (mask || rt->rt_genmask.sa_family)
300 				return -EAFNOSUPPORT;
301 		}
302 
303 		if (bad_mask(mask, addr))
304 			return -EINVAL;
305 
306 		plen = inet_mask_len(mask);
307 	}
308 
309 	cfg->fc_dst_len = plen;
310 	cfg->fc_dst = addr;
311 
312 	if (cmd != SIOCDELRT) {
313 		cfg->fc_nlflags = NLM_F_CREATE;
314 		cfg->fc_protocol = RTPROT_BOOT;
315 	}
316 
317 	if (rt->rt_metric)
318 		cfg->fc_priority = rt->rt_metric - 1;
319 
320 	if (rt->rt_flags & RTF_REJECT) {
321 		cfg->fc_scope = RT_SCOPE_HOST;
322 		cfg->fc_type = RTN_UNREACHABLE;
323 		return 0;
324 	}
325 
326 	cfg->fc_scope = RT_SCOPE_NOWHERE;
327 	cfg->fc_type = RTN_UNICAST;
328 
329 	if (rt->rt_dev) {
330 		char *colon;
331 		struct net_device *dev;
332 		char devname[IFNAMSIZ];
333 
334 		if (copy_from_user(devname, rt->rt_dev, IFNAMSIZ-1))
335 			return -EFAULT;
336 
337 		devname[IFNAMSIZ-1] = 0;
338 		colon = strchr(devname, ':');
339 		if (colon)
340 			*colon = 0;
341 		dev = __dev_get_by_name(&init_net, devname);
342 		if (!dev)
343 			return -ENODEV;
344 		cfg->fc_oif = dev->ifindex;
345 		if (colon) {
346 			struct in_ifaddr *ifa;
347 			struct in_device *in_dev = __in_dev_get_rtnl(dev);
348 			if (!in_dev)
349 				return -ENODEV;
350 			*colon = ':';
351 			for (ifa = in_dev->ifa_list; ifa; ifa = ifa->ifa_next)
352 				if (strcmp(ifa->ifa_label, devname) == 0)
353 					break;
354 			if (ifa == NULL)
355 				return -ENODEV;
356 			cfg->fc_prefsrc = ifa->ifa_local;
357 		}
358 	}
359 
360 	addr = sk_extract_addr(&rt->rt_gateway);
361 	if (rt->rt_gateway.sa_family == AF_INET && addr) {
362 		cfg->fc_gw = addr;
363 		if (rt->rt_flags & RTF_GATEWAY &&
364 		    inet_addr_type(addr) == RTN_UNICAST)
365 			cfg->fc_scope = RT_SCOPE_UNIVERSE;
366 	}
367 
368 	if (cmd == SIOCDELRT)
369 		return 0;
370 
371 	if (rt->rt_flags & RTF_GATEWAY && !cfg->fc_gw)
372 		return -EINVAL;
373 
374 	if (cfg->fc_scope == RT_SCOPE_NOWHERE)
375 		cfg->fc_scope = RT_SCOPE_LINK;
376 
377 	if (rt->rt_flags & (RTF_MTU | RTF_WINDOW | RTF_IRTT)) {
378 		struct nlattr *mx;
379 		int len = 0;
380 
381 		mx = kzalloc(3 * nla_total_size(4), GFP_KERNEL);
382 		if (mx == NULL)
383 			return -ENOMEM;
384 
385 		if (rt->rt_flags & RTF_MTU)
386 			len = put_rtax(mx, len, RTAX_ADVMSS, rt->rt_mtu - 40);
387 
388 		if (rt->rt_flags & RTF_WINDOW)
389 			len = put_rtax(mx, len, RTAX_WINDOW, rt->rt_window);
390 
391 		if (rt->rt_flags & RTF_IRTT)
392 			len = put_rtax(mx, len, RTAX_RTT, rt->rt_irtt << 3);
393 
394 		cfg->fc_mx = mx;
395 		cfg->fc_mx_len = len;
396 	}
397 
398 	return 0;
399 }
400 
401 /*
402  *	Handle IP routing ioctl calls. These are used to manipulate the routing tables
403  */
404 
405 int ip_rt_ioctl(unsigned int cmd, void __user *arg)
406 {
407 	struct fib_config cfg;
408 	struct rtentry rt;
409 	int err;
410 
411 	switch (cmd) {
412 	case SIOCADDRT:		/* Add a route */
413 	case SIOCDELRT:		/* Delete a route */
414 		if (!capable(CAP_NET_ADMIN))
415 			return -EPERM;
416 
417 		if (copy_from_user(&rt, arg, sizeof(rt)))
418 			return -EFAULT;
419 
420 		rtnl_lock();
421 		err = rtentry_to_fib_config(cmd, &rt, &cfg);
422 		if (err == 0) {
423 			struct fib_table *tb;
424 
425 			if (cmd == SIOCDELRT) {
426 				tb = fib_get_table(cfg.fc_table);
427 				if (tb)
428 					err = tb->tb_delete(tb, &cfg);
429 				else
430 					err = -ESRCH;
431 			} else {
432 				tb = fib_new_table(cfg.fc_table);
433 				if (tb)
434 					err = tb->tb_insert(tb, &cfg);
435 				else
436 					err = -ENOBUFS;
437 			}
438 
439 			/* allocated by rtentry_to_fib_config() */
440 			kfree(cfg.fc_mx);
441 		}
442 		rtnl_unlock();
443 		return err;
444 	}
445 	return -EINVAL;
446 }
447 
448 const struct nla_policy rtm_ipv4_policy[RTA_MAX+1] = {
449 	[RTA_DST]		= { .type = NLA_U32 },
450 	[RTA_SRC]		= { .type = NLA_U32 },
451 	[RTA_IIF]		= { .type = NLA_U32 },
452 	[RTA_OIF]		= { .type = NLA_U32 },
453 	[RTA_GATEWAY]		= { .type = NLA_U32 },
454 	[RTA_PRIORITY]		= { .type = NLA_U32 },
455 	[RTA_PREFSRC]		= { .type = NLA_U32 },
456 	[RTA_METRICS]		= { .type = NLA_NESTED },
457 	[RTA_MULTIPATH]		= { .len = sizeof(struct rtnexthop) },
458 	[RTA_PROTOINFO]		= { .type = NLA_U32 },
459 	[RTA_FLOW]		= { .type = NLA_U32 },
460 };
461 
462 static int rtm_to_fib_config(struct sk_buff *skb, struct nlmsghdr *nlh,
463 			     struct fib_config *cfg)
464 {
465 	struct nlattr *attr;
466 	int err, remaining;
467 	struct rtmsg *rtm;
468 
469 	err = nlmsg_validate(nlh, sizeof(*rtm), RTA_MAX, rtm_ipv4_policy);
470 	if (err < 0)
471 		goto errout;
472 
473 	memset(cfg, 0, sizeof(*cfg));
474 
475 	rtm = nlmsg_data(nlh);
476 	cfg->fc_dst_len = rtm->rtm_dst_len;
477 	cfg->fc_tos = rtm->rtm_tos;
478 	cfg->fc_table = rtm->rtm_table;
479 	cfg->fc_protocol = rtm->rtm_protocol;
480 	cfg->fc_scope = rtm->rtm_scope;
481 	cfg->fc_type = rtm->rtm_type;
482 	cfg->fc_flags = rtm->rtm_flags;
483 	cfg->fc_nlflags = nlh->nlmsg_flags;
484 
485 	cfg->fc_nlinfo.pid = NETLINK_CB(skb).pid;
486 	cfg->fc_nlinfo.nlh = nlh;
487 
488 	if (cfg->fc_type > RTN_MAX) {
489 		err = -EINVAL;
490 		goto errout;
491 	}
492 
493 	nlmsg_for_each_attr(attr, nlh, sizeof(struct rtmsg), remaining) {
494 		switch (nla_type(attr)) {
495 		case RTA_DST:
496 			cfg->fc_dst = nla_get_be32(attr);
497 			break;
498 		case RTA_OIF:
499 			cfg->fc_oif = nla_get_u32(attr);
500 			break;
501 		case RTA_GATEWAY:
502 			cfg->fc_gw = nla_get_be32(attr);
503 			break;
504 		case RTA_PRIORITY:
505 			cfg->fc_priority = nla_get_u32(attr);
506 			break;
507 		case RTA_PREFSRC:
508 			cfg->fc_prefsrc = nla_get_be32(attr);
509 			break;
510 		case RTA_METRICS:
511 			cfg->fc_mx = nla_data(attr);
512 			cfg->fc_mx_len = nla_len(attr);
513 			break;
514 		case RTA_MULTIPATH:
515 			cfg->fc_mp = nla_data(attr);
516 			cfg->fc_mp_len = nla_len(attr);
517 			break;
518 		case RTA_FLOW:
519 			cfg->fc_flow = nla_get_u32(attr);
520 			break;
521 		case RTA_TABLE:
522 			cfg->fc_table = nla_get_u32(attr);
523 			break;
524 		}
525 	}
526 
527 	return 0;
528 errout:
529 	return err;
530 }
531 
532 static int inet_rtm_delroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
533 {
534 	struct fib_config cfg;
535 	struct fib_table *tb;
536 	int err;
537 
538 	err = rtm_to_fib_config(skb, nlh, &cfg);
539 	if (err < 0)
540 		goto errout;
541 
542 	tb = fib_get_table(cfg.fc_table);
543 	if (tb == NULL) {
544 		err = -ESRCH;
545 		goto errout;
546 	}
547 
548 	err = tb->tb_delete(tb, &cfg);
549 errout:
550 	return err;
551 }
552 
553 static int inet_rtm_newroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
554 {
555 	struct fib_config cfg;
556 	struct fib_table *tb;
557 	int err;
558 
559 	err = rtm_to_fib_config(skb, nlh, &cfg);
560 	if (err < 0)
561 		goto errout;
562 
563 	tb = fib_new_table(cfg.fc_table);
564 	if (tb == NULL) {
565 		err = -ENOBUFS;
566 		goto errout;
567 	}
568 
569 	err = tb->tb_insert(tb, &cfg);
570 errout:
571 	return err;
572 }
573 
574 static int inet_dump_fib(struct sk_buff *skb, struct netlink_callback *cb)
575 {
576 	unsigned int h, s_h;
577 	unsigned int e = 0, s_e;
578 	struct fib_table *tb;
579 	struct hlist_node *node;
580 	int dumped = 0;
581 
582 	if (nlmsg_len(cb->nlh) >= sizeof(struct rtmsg) &&
583 	    ((struct rtmsg *) nlmsg_data(cb->nlh))->rtm_flags & RTM_F_CLONED)
584 		return ip_rt_dump(skb, cb);
585 
586 	s_h = cb->args[0];
587 	s_e = cb->args[1];
588 
589 	for (h = s_h; h < FIB_TABLE_HASHSZ; h++, s_e = 0) {
590 		e = 0;
591 		hlist_for_each_entry(tb, node, &fib_table_hash[h], tb_hlist) {
592 			if (e < s_e)
593 				goto next;
594 			if (dumped)
595 				memset(&cb->args[2], 0, sizeof(cb->args) -
596 						 2 * sizeof(cb->args[0]));
597 			if (tb->tb_dump(tb, skb, cb) < 0)
598 				goto out;
599 			dumped = 1;
600 next:
601 			e++;
602 		}
603 	}
604 out:
605 	cb->args[1] = e;
606 	cb->args[0] = h;
607 
608 	return skb->len;
609 }
610 
611 /* Prepare and feed intra-kernel routing request.
612    Really, it should be netlink message, but :-( netlink
613    can be not configured, so that we feed it directly
614    to fib engine. It is legal, because all events occur
615    only when netlink is already locked.
616  */
617 
618 static void fib_magic(int cmd, int type, __be32 dst, int dst_len, struct in_ifaddr *ifa)
619 {
620 	struct fib_table *tb;
621 	struct fib_config cfg = {
622 		.fc_protocol = RTPROT_KERNEL,
623 		.fc_type = type,
624 		.fc_dst = dst,
625 		.fc_dst_len = dst_len,
626 		.fc_prefsrc = ifa->ifa_local,
627 		.fc_oif = ifa->ifa_dev->dev->ifindex,
628 		.fc_nlflags = NLM_F_CREATE | NLM_F_APPEND,
629 	};
630 
631 	if (type == RTN_UNICAST)
632 		tb = fib_new_table(RT_TABLE_MAIN);
633 	else
634 		tb = fib_new_table(RT_TABLE_LOCAL);
635 
636 	if (tb == NULL)
637 		return;
638 
639 	cfg.fc_table = tb->tb_id;
640 
641 	if (type != RTN_LOCAL)
642 		cfg.fc_scope = RT_SCOPE_LINK;
643 	else
644 		cfg.fc_scope = RT_SCOPE_HOST;
645 
646 	if (cmd == RTM_NEWROUTE)
647 		tb->tb_insert(tb, &cfg);
648 	else
649 		tb->tb_delete(tb, &cfg);
650 }
651 
652 void fib_add_ifaddr(struct in_ifaddr *ifa)
653 {
654 	struct in_device *in_dev = ifa->ifa_dev;
655 	struct net_device *dev = in_dev->dev;
656 	struct in_ifaddr *prim = ifa;
657 	__be32 mask = ifa->ifa_mask;
658 	__be32 addr = ifa->ifa_local;
659 	__be32 prefix = ifa->ifa_address&mask;
660 
661 	if (ifa->ifa_flags&IFA_F_SECONDARY) {
662 		prim = inet_ifa_byprefix(in_dev, prefix, mask);
663 		if (prim == NULL) {
664 			printk(KERN_DEBUG "fib_add_ifaddr: bug: prim == NULL\n");
665 			return;
666 		}
667 	}
668 
669 	fib_magic(RTM_NEWROUTE, RTN_LOCAL, addr, 32, prim);
670 
671 	if (!(dev->flags&IFF_UP))
672 		return;
673 
674 	/* Add broadcast address, if it is explicitly assigned. */
675 	if (ifa->ifa_broadcast && ifa->ifa_broadcast != htonl(0xFFFFFFFF))
676 		fib_magic(RTM_NEWROUTE, RTN_BROADCAST, ifa->ifa_broadcast, 32, prim);
677 
678 	if (!ZERONET(prefix) && !(ifa->ifa_flags&IFA_F_SECONDARY) &&
679 	    (prefix != addr || ifa->ifa_prefixlen < 32)) {
680 		fib_magic(RTM_NEWROUTE, dev->flags&IFF_LOOPBACK ? RTN_LOCAL :
681 			  RTN_UNICAST, prefix, ifa->ifa_prefixlen, prim);
682 
683 		/* Add network specific broadcasts, when it takes a sense */
684 		if (ifa->ifa_prefixlen < 31) {
685 			fib_magic(RTM_NEWROUTE, RTN_BROADCAST, prefix, 32, prim);
686 			fib_magic(RTM_NEWROUTE, RTN_BROADCAST, prefix|~mask, 32, prim);
687 		}
688 	}
689 }
690 
691 static void fib_del_ifaddr(struct in_ifaddr *ifa)
692 {
693 	struct in_device *in_dev = ifa->ifa_dev;
694 	struct net_device *dev = in_dev->dev;
695 	struct in_ifaddr *ifa1;
696 	struct in_ifaddr *prim = ifa;
697 	__be32 brd = ifa->ifa_address|~ifa->ifa_mask;
698 	__be32 any = ifa->ifa_address&ifa->ifa_mask;
699 #define LOCAL_OK	1
700 #define BRD_OK		2
701 #define BRD0_OK		4
702 #define BRD1_OK		8
703 	unsigned ok = 0;
704 
705 	if (!(ifa->ifa_flags&IFA_F_SECONDARY))
706 		fib_magic(RTM_DELROUTE, dev->flags&IFF_LOOPBACK ? RTN_LOCAL :
707 			  RTN_UNICAST, any, ifa->ifa_prefixlen, prim);
708 	else {
709 		prim = inet_ifa_byprefix(in_dev, any, ifa->ifa_mask);
710 		if (prim == NULL) {
711 			printk(KERN_DEBUG "fib_del_ifaddr: bug: prim == NULL\n");
712 			return;
713 		}
714 	}
715 
716 	/* Deletion is more complicated than add.
717 	   We should take care of not to delete too much :-)
718 
719 	   Scan address list to be sure that addresses are really gone.
720 	 */
721 
722 	for (ifa1 = in_dev->ifa_list; ifa1; ifa1 = ifa1->ifa_next) {
723 		if (ifa->ifa_local == ifa1->ifa_local)
724 			ok |= LOCAL_OK;
725 		if (ifa->ifa_broadcast == ifa1->ifa_broadcast)
726 			ok |= BRD_OK;
727 		if (brd == ifa1->ifa_broadcast)
728 			ok |= BRD1_OK;
729 		if (any == ifa1->ifa_broadcast)
730 			ok |= BRD0_OK;
731 	}
732 
733 	if (!(ok&BRD_OK))
734 		fib_magic(RTM_DELROUTE, RTN_BROADCAST, ifa->ifa_broadcast, 32, prim);
735 	if (!(ok&BRD1_OK))
736 		fib_magic(RTM_DELROUTE, RTN_BROADCAST, brd, 32, prim);
737 	if (!(ok&BRD0_OK))
738 		fib_magic(RTM_DELROUTE, RTN_BROADCAST, any, 32, prim);
739 	if (!(ok&LOCAL_OK)) {
740 		fib_magic(RTM_DELROUTE, RTN_LOCAL, ifa->ifa_local, 32, prim);
741 
742 		/* Check, that this local address finally disappeared. */
743 		if (inet_addr_type(ifa->ifa_local) != RTN_LOCAL) {
744 			/* And the last, but not the least thing.
745 			   We must flush stray FIB entries.
746 
747 			   First of all, we scan fib_info list searching
748 			   for stray nexthop entries, then ignite fib_flush.
749 			*/
750 			if (fib_sync_down(ifa->ifa_local, NULL, 0))
751 				fib_flush();
752 		}
753 	}
754 #undef LOCAL_OK
755 #undef BRD_OK
756 #undef BRD0_OK
757 #undef BRD1_OK
758 }
759 
760 static void nl_fib_lookup(struct fib_result_nl *frn, struct fib_table *tb )
761 {
762 
763 	struct fib_result       res;
764 	struct flowi            fl = { .mark = frn->fl_mark,
765 				       .nl_u = { .ip4_u = { .daddr = frn->fl_addr,
766 							    .tos = frn->fl_tos,
767 							    .scope = frn->fl_scope } } };
768 
769 #ifdef CONFIG_IP_MULTIPLE_TABLES
770 	res.r = NULL;
771 #endif
772 
773 	frn->err = -ENOENT;
774 	if (tb) {
775 		local_bh_disable();
776 
777 		frn->tb_id = tb->tb_id;
778 		frn->err = tb->tb_lookup(tb, &fl, &res);
779 
780 		if (!frn->err) {
781 			frn->prefixlen = res.prefixlen;
782 			frn->nh_sel = res.nh_sel;
783 			frn->type = res.type;
784 			frn->scope = res.scope;
785 			fib_res_put(&res);
786 		}
787 		local_bh_enable();
788 	}
789 }
790 
791 static void nl_fib_input(struct sk_buff *skb)
792 {
793 	struct fib_result_nl *frn;
794 	struct nlmsghdr *nlh;
795 	struct fib_table *tb;
796 	u32 pid;
797 
798 	nlh = nlmsg_hdr(skb);
799 	if (skb->len < NLMSG_SPACE(0) || skb->len < nlh->nlmsg_len ||
800 	    nlh->nlmsg_len < NLMSG_LENGTH(sizeof(*frn))) {
801 		kfree_skb(skb);
802 		return;
803 	}
804 
805 	frn = (struct fib_result_nl *) NLMSG_DATA(nlh);
806 	tb = fib_get_table(frn->tb_id_in);
807 
808 	nl_fib_lookup(frn, tb);
809 
810 	pid = NETLINK_CB(skb).pid;       /* pid of sending process */
811 	NETLINK_CB(skb).pid = 0;         /* from kernel */
812 	NETLINK_CB(skb).dst_group = 0;  /* unicast */
813 	netlink_unicast(fibnl, skb, pid, MSG_DONTWAIT);
814 }
815 
816 static void nl_fib_lookup_init(void)
817 {
818 	fibnl = netlink_kernel_create(&init_net, NETLINK_FIB_LOOKUP, 0,
819 				      nl_fib_input, NULL, THIS_MODULE);
820 }
821 
822 static void fib_disable_ip(struct net_device *dev, int force)
823 {
824 	if (fib_sync_down(0, dev, force))
825 		fib_flush();
826 	rt_cache_flush(0);
827 	arp_ifdown(dev);
828 }
829 
830 static int fib_inetaddr_event(struct notifier_block *this, unsigned long event, void *ptr)
831 {
832 	struct in_ifaddr *ifa = (struct in_ifaddr*)ptr;
833 
834 	switch (event) {
835 	case NETDEV_UP:
836 		fib_add_ifaddr(ifa);
837 #ifdef CONFIG_IP_ROUTE_MULTIPATH
838 		fib_sync_up(ifa->ifa_dev->dev);
839 #endif
840 		rt_cache_flush(-1);
841 		break;
842 	case NETDEV_DOWN:
843 		fib_del_ifaddr(ifa);
844 		if (ifa->ifa_dev->ifa_list == NULL) {
845 			/* Last address was deleted from this interface.
846 			   Disable IP.
847 			 */
848 			fib_disable_ip(ifa->ifa_dev->dev, 1);
849 		} else {
850 			rt_cache_flush(-1);
851 		}
852 		break;
853 	}
854 	return NOTIFY_DONE;
855 }
856 
857 static int fib_netdev_event(struct notifier_block *this, unsigned long event, void *ptr)
858 {
859 	struct net_device *dev = ptr;
860 	struct in_device *in_dev = __in_dev_get_rtnl(dev);
861 
862 	if (dev->nd_net != &init_net)
863 		return NOTIFY_DONE;
864 
865 	if (event == NETDEV_UNREGISTER) {
866 		fib_disable_ip(dev, 2);
867 		return NOTIFY_DONE;
868 	}
869 
870 	if (!in_dev)
871 		return NOTIFY_DONE;
872 
873 	switch (event) {
874 	case NETDEV_UP:
875 		for_ifa(in_dev) {
876 			fib_add_ifaddr(ifa);
877 		} endfor_ifa(in_dev);
878 #ifdef CONFIG_IP_ROUTE_MULTIPATH
879 		fib_sync_up(dev);
880 #endif
881 		rt_cache_flush(-1);
882 		break;
883 	case NETDEV_DOWN:
884 		fib_disable_ip(dev, 0);
885 		break;
886 	case NETDEV_CHANGEMTU:
887 	case NETDEV_CHANGE:
888 		rt_cache_flush(0);
889 		break;
890 	}
891 	return NOTIFY_DONE;
892 }
893 
894 static struct notifier_block fib_inetaddr_notifier = {
895 	.notifier_call =fib_inetaddr_event,
896 };
897 
898 static struct notifier_block fib_netdev_notifier = {
899 	.notifier_call =fib_netdev_event,
900 };
901 
902 void __init ip_fib_init(void)
903 {
904 	unsigned int i;
905 
906 	for (i = 0; i < FIB_TABLE_HASHSZ; i++)
907 		INIT_HLIST_HEAD(&fib_table_hash[i]);
908 #ifndef CONFIG_IP_MULTIPLE_TABLES
909 	ip_fib_local_table = fib_hash_init(RT_TABLE_LOCAL);
910 	hlist_add_head_rcu(&ip_fib_local_table->tb_hlist, &fib_table_hash[0]);
911 	ip_fib_main_table  = fib_hash_init(RT_TABLE_MAIN);
912 	hlist_add_head_rcu(&ip_fib_main_table->tb_hlist, &fib_table_hash[0]);
913 #else
914 	fib4_rules_init();
915 #endif
916 
917 	register_netdevice_notifier(&fib_netdev_notifier);
918 	register_inetaddr_notifier(&fib_inetaddr_notifier);
919 	nl_fib_lookup_init();
920 
921 	rtnl_register(PF_INET, RTM_NEWROUTE, inet_rtm_newroute, NULL);
922 	rtnl_register(PF_INET, RTM_DELROUTE, inet_rtm_delroute, NULL);
923 	rtnl_register(PF_INET, RTM_GETROUTE, NULL, inet_dump_fib);
924 }
925 
926 EXPORT_SYMBOL(inet_addr_type);
927 EXPORT_SYMBOL(ip_dev_find);
928