xref: /linux/net/ipv4/fib_frontend.c (revision 7f3edee81fbd49114c28057512906f169caa0bed)
1 /*
2  * INET		An implementation of the TCP/IP protocol suite for the LINUX
3  *		operating system.  INET is implemented using the  BSD Socket
4  *		interface as the means of communication with the user level.
5  *
6  *		IPv4 Forwarding Information Base: FIB frontend.
7  *
8  * Version:	$Id: fib_frontend.c,v 1.26 2001/10/31 21:55:54 davem Exp $
9  *
10  * Authors:	Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
11  *
12  *		This program is free software; you can redistribute it and/or
13  *		modify it under the terms of the GNU General Public License
14  *		as published by the Free Software Foundation; either version
15  *		2 of the License, or (at your option) any later version.
16  */
17 
18 #include <linux/module.h>
19 #include <asm/uaccess.h>
20 #include <asm/system.h>
21 #include <linux/bitops.h>
22 #include <linux/capability.h>
23 #include <linux/types.h>
24 #include <linux/kernel.h>
25 #include <linux/mm.h>
26 #include <linux/string.h>
27 #include <linux/socket.h>
28 #include <linux/sockios.h>
29 #include <linux/errno.h>
30 #include <linux/in.h>
31 #include <linux/inet.h>
32 #include <linux/inetdevice.h>
33 #include <linux/netdevice.h>
34 #include <linux/if_addr.h>
35 #include <linux/if_arp.h>
36 #include <linux/skbuff.h>
37 #include <linux/init.h>
38 #include <linux/list.h>
39 
40 #include <net/ip.h>
41 #include <net/protocol.h>
42 #include <net/route.h>
43 #include <net/tcp.h>
44 #include <net/sock.h>
45 #include <net/icmp.h>
46 #include <net/arp.h>
47 #include <net/ip_fib.h>
48 #include <net/rtnetlink.h>
49 
50 #define FFprint(a...) printk(KERN_DEBUG a)
51 
52 static struct sock *fibnl;
53 
54 #ifndef CONFIG_IP_MULTIPLE_TABLES
55 
56 struct fib_table *ip_fib_local_table;
57 struct fib_table *ip_fib_main_table;
58 
59 #define FIB_TABLE_HASHSZ 1
60 static struct hlist_head fib_table_hash[FIB_TABLE_HASHSZ];
61 
62 static void __init fib4_rules_init(void)
63 {
64 	ip_fib_local_table = fib_hash_init(RT_TABLE_LOCAL);
65 	hlist_add_head_rcu(&ip_fib_local_table->tb_hlist, &fib_table_hash[0]);
66 	ip_fib_main_table  = fib_hash_init(RT_TABLE_MAIN);
67 	hlist_add_head_rcu(&ip_fib_main_table->tb_hlist, &fib_table_hash[0]);
68 }
69 #else
70 
71 #define FIB_TABLE_HASHSZ 256
72 static struct hlist_head fib_table_hash[FIB_TABLE_HASHSZ];
73 
74 struct fib_table *fib_new_table(u32 id)
75 {
76 	struct fib_table *tb;
77 	unsigned int h;
78 
79 	if (id == 0)
80 		id = RT_TABLE_MAIN;
81 	tb = fib_get_table(id);
82 	if (tb)
83 		return tb;
84 	tb = fib_hash_init(id);
85 	if (!tb)
86 		return NULL;
87 	h = id & (FIB_TABLE_HASHSZ - 1);
88 	hlist_add_head_rcu(&tb->tb_hlist, &fib_table_hash[h]);
89 	return tb;
90 }
91 
92 struct fib_table *fib_get_table(u32 id)
93 {
94 	struct fib_table *tb;
95 	struct hlist_node *node;
96 	unsigned int h;
97 
98 	if (id == 0)
99 		id = RT_TABLE_MAIN;
100 	h = id & (FIB_TABLE_HASHSZ - 1);
101 	rcu_read_lock();
102 	hlist_for_each_entry_rcu(tb, node, &fib_table_hash[h], tb_hlist) {
103 		if (tb->tb_id == id) {
104 			rcu_read_unlock();
105 			return tb;
106 		}
107 	}
108 	rcu_read_unlock();
109 	return NULL;
110 }
111 #endif /* CONFIG_IP_MULTIPLE_TABLES */
112 
113 static void fib_flush(void)
114 {
115 	int flushed = 0;
116 	struct fib_table *tb;
117 	struct hlist_node *node;
118 	unsigned int h;
119 
120 	for (h = 0; h < FIB_TABLE_HASHSZ; h++) {
121 		hlist_for_each_entry(tb, node, &fib_table_hash[h], tb_hlist)
122 			flushed += tb->tb_flush(tb);
123 	}
124 
125 	if (flushed)
126 		rt_cache_flush(-1);
127 }
128 
129 /*
130  *	Find the first device with a given source address.
131  */
132 
133 struct net_device * ip_dev_find(__be32 addr)
134 {
135 	struct flowi fl = { .nl_u = { .ip4_u = { .daddr = addr } } };
136 	struct fib_result res;
137 	struct net_device *dev = NULL;
138 	struct fib_table *local_table;
139 
140 #ifdef CONFIG_IP_MULTIPLE_TABLES
141 	res.r = NULL;
142 #endif
143 
144 	local_table = fib_get_table(RT_TABLE_LOCAL);
145 	if (!local_table || local_table->tb_lookup(local_table, &fl, &res))
146 		return NULL;
147 	if (res.type != RTN_LOCAL)
148 		goto out;
149 	dev = FIB_RES_DEV(res);
150 
151 	if (dev)
152 		dev_hold(dev);
153 out:
154 	fib_res_put(&res);
155 	return dev;
156 }
157 
158 unsigned inet_addr_type(__be32 addr)
159 {
160 	struct flowi		fl = { .nl_u = { .ip4_u = { .daddr = addr } } };
161 	struct fib_result	res;
162 	unsigned ret = RTN_BROADCAST;
163 	struct fib_table *local_table;
164 
165 	if (ZERONET(addr) || BADCLASS(addr))
166 		return RTN_BROADCAST;
167 	if (MULTICAST(addr))
168 		return RTN_MULTICAST;
169 
170 #ifdef CONFIG_IP_MULTIPLE_TABLES
171 	res.r = NULL;
172 #endif
173 
174 	local_table = fib_get_table(RT_TABLE_LOCAL);
175 	if (local_table) {
176 		ret = RTN_UNICAST;
177 		if (!local_table->tb_lookup(local_table, &fl, &res)) {
178 			ret = res.type;
179 			fib_res_put(&res);
180 		}
181 	}
182 	return ret;
183 }
184 
185 /* Given (packet source, input interface) and optional (dst, oif, tos):
186    - (main) check, that source is valid i.e. not broadcast or our local
187      address.
188    - figure out what "logical" interface this packet arrived
189      and calculate "specific destination" address.
190    - check, that packet arrived from expected physical interface.
191  */
192 
193 int fib_validate_source(__be32 src, __be32 dst, u8 tos, int oif,
194 			struct net_device *dev, __be32 *spec_dst, u32 *itag)
195 {
196 	struct in_device *in_dev;
197 	struct flowi fl = { .nl_u = { .ip4_u =
198 				      { .daddr = src,
199 					.saddr = dst,
200 					.tos = tos } },
201 			    .iif = oif };
202 	struct fib_result res;
203 	int no_addr, rpf;
204 	int ret;
205 
206 	no_addr = rpf = 0;
207 	rcu_read_lock();
208 	in_dev = __in_dev_get_rcu(dev);
209 	if (in_dev) {
210 		no_addr = in_dev->ifa_list == NULL;
211 		rpf = IN_DEV_RPFILTER(in_dev);
212 	}
213 	rcu_read_unlock();
214 
215 	if (in_dev == NULL)
216 		goto e_inval;
217 
218 	if (fib_lookup(&fl, &res))
219 		goto last_resort;
220 	if (res.type != RTN_UNICAST)
221 		goto e_inval_res;
222 	*spec_dst = FIB_RES_PREFSRC(res);
223 	fib_combine_itag(itag, &res);
224 #ifdef CONFIG_IP_ROUTE_MULTIPATH
225 	if (FIB_RES_DEV(res) == dev || res.fi->fib_nhs > 1)
226 #else
227 	if (FIB_RES_DEV(res) == dev)
228 #endif
229 	{
230 		ret = FIB_RES_NH(res).nh_scope >= RT_SCOPE_HOST;
231 		fib_res_put(&res);
232 		return ret;
233 	}
234 	fib_res_put(&res);
235 	if (no_addr)
236 		goto last_resort;
237 	if (rpf)
238 		goto e_inval;
239 	fl.oif = dev->ifindex;
240 
241 	ret = 0;
242 	if (fib_lookup(&fl, &res) == 0) {
243 		if (res.type == RTN_UNICAST) {
244 			*spec_dst = FIB_RES_PREFSRC(res);
245 			ret = FIB_RES_NH(res).nh_scope >= RT_SCOPE_HOST;
246 		}
247 		fib_res_put(&res);
248 	}
249 	return ret;
250 
251 last_resort:
252 	if (rpf)
253 		goto e_inval;
254 	*spec_dst = inet_select_addr(dev, 0, RT_SCOPE_UNIVERSE);
255 	*itag = 0;
256 	return 0;
257 
258 e_inval_res:
259 	fib_res_put(&res);
260 e_inval:
261 	return -EINVAL;
262 }
263 
264 static inline __be32 sk_extract_addr(struct sockaddr *addr)
265 {
266 	return ((struct sockaddr_in *) addr)->sin_addr.s_addr;
267 }
268 
269 static int put_rtax(struct nlattr *mx, int len, int type, u32 value)
270 {
271 	struct nlattr *nla;
272 
273 	nla = (struct nlattr *) ((char *) mx + len);
274 	nla->nla_type = type;
275 	nla->nla_len = nla_attr_size(4);
276 	*(u32 *) nla_data(nla) = value;
277 
278 	return len + nla_total_size(4);
279 }
280 
281 static int rtentry_to_fib_config(int cmd, struct rtentry *rt,
282 				 struct fib_config *cfg)
283 {
284 	__be32 addr;
285 	int plen;
286 
287 	memset(cfg, 0, sizeof(*cfg));
288 
289 	if (rt->rt_dst.sa_family != AF_INET)
290 		return -EAFNOSUPPORT;
291 
292 	/*
293 	 * Check mask for validity:
294 	 * a) it must be contiguous.
295 	 * b) destination must have all host bits clear.
296 	 * c) if application forgot to set correct family (AF_INET),
297 	 *    reject request unless it is absolutely clear i.e.
298 	 *    both family and mask are zero.
299 	 */
300 	plen = 32;
301 	addr = sk_extract_addr(&rt->rt_dst);
302 	if (!(rt->rt_flags & RTF_HOST)) {
303 		__be32 mask = sk_extract_addr(&rt->rt_genmask);
304 
305 		if (rt->rt_genmask.sa_family != AF_INET) {
306 			if (mask || rt->rt_genmask.sa_family)
307 				return -EAFNOSUPPORT;
308 		}
309 
310 		if (bad_mask(mask, addr))
311 			return -EINVAL;
312 
313 		plen = inet_mask_len(mask);
314 	}
315 
316 	cfg->fc_dst_len = plen;
317 	cfg->fc_dst = addr;
318 
319 	if (cmd != SIOCDELRT) {
320 		cfg->fc_nlflags = NLM_F_CREATE;
321 		cfg->fc_protocol = RTPROT_BOOT;
322 	}
323 
324 	if (rt->rt_metric)
325 		cfg->fc_priority = rt->rt_metric - 1;
326 
327 	if (rt->rt_flags & RTF_REJECT) {
328 		cfg->fc_scope = RT_SCOPE_HOST;
329 		cfg->fc_type = RTN_UNREACHABLE;
330 		return 0;
331 	}
332 
333 	cfg->fc_scope = RT_SCOPE_NOWHERE;
334 	cfg->fc_type = RTN_UNICAST;
335 
336 	if (rt->rt_dev) {
337 		char *colon;
338 		struct net_device *dev;
339 		char devname[IFNAMSIZ];
340 
341 		if (copy_from_user(devname, rt->rt_dev, IFNAMSIZ-1))
342 			return -EFAULT;
343 
344 		devname[IFNAMSIZ-1] = 0;
345 		colon = strchr(devname, ':');
346 		if (colon)
347 			*colon = 0;
348 		dev = __dev_get_by_name(&init_net, devname);
349 		if (!dev)
350 			return -ENODEV;
351 		cfg->fc_oif = dev->ifindex;
352 		if (colon) {
353 			struct in_ifaddr *ifa;
354 			struct in_device *in_dev = __in_dev_get_rtnl(dev);
355 			if (!in_dev)
356 				return -ENODEV;
357 			*colon = ':';
358 			for (ifa = in_dev->ifa_list; ifa; ifa = ifa->ifa_next)
359 				if (strcmp(ifa->ifa_label, devname) == 0)
360 					break;
361 			if (ifa == NULL)
362 				return -ENODEV;
363 			cfg->fc_prefsrc = ifa->ifa_local;
364 		}
365 	}
366 
367 	addr = sk_extract_addr(&rt->rt_gateway);
368 	if (rt->rt_gateway.sa_family == AF_INET && addr) {
369 		cfg->fc_gw = addr;
370 		if (rt->rt_flags & RTF_GATEWAY &&
371 		    inet_addr_type(addr) == RTN_UNICAST)
372 			cfg->fc_scope = RT_SCOPE_UNIVERSE;
373 	}
374 
375 	if (cmd == SIOCDELRT)
376 		return 0;
377 
378 	if (rt->rt_flags & RTF_GATEWAY && !cfg->fc_gw)
379 		return -EINVAL;
380 
381 	if (cfg->fc_scope == RT_SCOPE_NOWHERE)
382 		cfg->fc_scope = RT_SCOPE_LINK;
383 
384 	if (rt->rt_flags & (RTF_MTU | RTF_WINDOW | RTF_IRTT)) {
385 		struct nlattr *mx;
386 		int len = 0;
387 
388 		mx = kzalloc(3 * nla_total_size(4), GFP_KERNEL);
389 		if (mx == NULL)
390 			return -ENOMEM;
391 
392 		if (rt->rt_flags & RTF_MTU)
393 			len = put_rtax(mx, len, RTAX_ADVMSS, rt->rt_mtu - 40);
394 
395 		if (rt->rt_flags & RTF_WINDOW)
396 			len = put_rtax(mx, len, RTAX_WINDOW, rt->rt_window);
397 
398 		if (rt->rt_flags & RTF_IRTT)
399 			len = put_rtax(mx, len, RTAX_RTT, rt->rt_irtt << 3);
400 
401 		cfg->fc_mx = mx;
402 		cfg->fc_mx_len = len;
403 	}
404 
405 	return 0;
406 }
407 
408 /*
409  *	Handle IP routing ioctl calls. These are used to manipulate the routing tables
410  */
411 
412 int ip_rt_ioctl(unsigned int cmd, void __user *arg)
413 {
414 	struct fib_config cfg;
415 	struct rtentry rt;
416 	int err;
417 
418 	switch (cmd) {
419 	case SIOCADDRT:		/* Add a route */
420 	case SIOCDELRT:		/* Delete a route */
421 		if (!capable(CAP_NET_ADMIN))
422 			return -EPERM;
423 
424 		if (copy_from_user(&rt, arg, sizeof(rt)))
425 			return -EFAULT;
426 
427 		rtnl_lock();
428 		err = rtentry_to_fib_config(cmd, &rt, &cfg);
429 		if (err == 0) {
430 			struct fib_table *tb;
431 
432 			if (cmd == SIOCDELRT) {
433 				tb = fib_get_table(cfg.fc_table);
434 				if (tb)
435 					err = tb->tb_delete(tb, &cfg);
436 				else
437 					err = -ESRCH;
438 			} else {
439 				tb = fib_new_table(cfg.fc_table);
440 				if (tb)
441 					err = tb->tb_insert(tb, &cfg);
442 				else
443 					err = -ENOBUFS;
444 			}
445 
446 			/* allocated by rtentry_to_fib_config() */
447 			kfree(cfg.fc_mx);
448 		}
449 		rtnl_unlock();
450 		return err;
451 	}
452 	return -EINVAL;
453 }
454 
455 const struct nla_policy rtm_ipv4_policy[RTA_MAX+1] = {
456 	[RTA_DST]		= { .type = NLA_U32 },
457 	[RTA_SRC]		= { .type = NLA_U32 },
458 	[RTA_IIF]		= { .type = NLA_U32 },
459 	[RTA_OIF]		= { .type = NLA_U32 },
460 	[RTA_GATEWAY]		= { .type = NLA_U32 },
461 	[RTA_PRIORITY]		= { .type = NLA_U32 },
462 	[RTA_PREFSRC]		= { .type = NLA_U32 },
463 	[RTA_METRICS]		= { .type = NLA_NESTED },
464 	[RTA_MULTIPATH]		= { .len = sizeof(struct rtnexthop) },
465 	[RTA_PROTOINFO]		= { .type = NLA_U32 },
466 	[RTA_FLOW]		= { .type = NLA_U32 },
467 };
468 
469 static int rtm_to_fib_config(struct sk_buff *skb, struct nlmsghdr *nlh,
470 			     struct fib_config *cfg)
471 {
472 	struct nlattr *attr;
473 	int err, remaining;
474 	struct rtmsg *rtm;
475 
476 	err = nlmsg_validate(nlh, sizeof(*rtm), RTA_MAX, rtm_ipv4_policy);
477 	if (err < 0)
478 		goto errout;
479 
480 	memset(cfg, 0, sizeof(*cfg));
481 
482 	rtm = nlmsg_data(nlh);
483 	cfg->fc_dst_len = rtm->rtm_dst_len;
484 	cfg->fc_tos = rtm->rtm_tos;
485 	cfg->fc_table = rtm->rtm_table;
486 	cfg->fc_protocol = rtm->rtm_protocol;
487 	cfg->fc_scope = rtm->rtm_scope;
488 	cfg->fc_type = rtm->rtm_type;
489 	cfg->fc_flags = rtm->rtm_flags;
490 	cfg->fc_nlflags = nlh->nlmsg_flags;
491 
492 	cfg->fc_nlinfo.pid = NETLINK_CB(skb).pid;
493 	cfg->fc_nlinfo.nlh = nlh;
494 
495 	if (cfg->fc_type > RTN_MAX) {
496 		err = -EINVAL;
497 		goto errout;
498 	}
499 
500 	nlmsg_for_each_attr(attr, nlh, sizeof(struct rtmsg), remaining) {
501 		switch (nla_type(attr)) {
502 		case RTA_DST:
503 			cfg->fc_dst = nla_get_be32(attr);
504 			break;
505 		case RTA_OIF:
506 			cfg->fc_oif = nla_get_u32(attr);
507 			break;
508 		case RTA_GATEWAY:
509 			cfg->fc_gw = nla_get_be32(attr);
510 			break;
511 		case RTA_PRIORITY:
512 			cfg->fc_priority = nla_get_u32(attr);
513 			break;
514 		case RTA_PREFSRC:
515 			cfg->fc_prefsrc = nla_get_be32(attr);
516 			break;
517 		case RTA_METRICS:
518 			cfg->fc_mx = nla_data(attr);
519 			cfg->fc_mx_len = nla_len(attr);
520 			break;
521 		case RTA_MULTIPATH:
522 			cfg->fc_mp = nla_data(attr);
523 			cfg->fc_mp_len = nla_len(attr);
524 			break;
525 		case RTA_FLOW:
526 			cfg->fc_flow = nla_get_u32(attr);
527 			break;
528 		case RTA_TABLE:
529 			cfg->fc_table = nla_get_u32(attr);
530 			break;
531 		}
532 	}
533 
534 	return 0;
535 errout:
536 	return err;
537 }
538 
539 static int inet_rtm_delroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
540 {
541 	struct fib_config cfg;
542 	struct fib_table *tb;
543 	int err;
544 
545 	err = rtm_to_fib_config(skb, nlh, &cfg);
546 	if (err < 0)
547 		goto errout;
548 
549 	tb = fib_get_table(cfg.fc_table);
550 	if (tb == NULL) {
551 		err = -ESRCH;
552 		goto errout;
553 	}
554 
555 	err = tb->tb_delete(tb, &cfg);
556 errout:
557 	return err;
558 }
559 
560 static int inet_rtm_newroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
561 {
562 	struct fib_config cfg;
563 	struct fib_table *tb;
564 	int err;
565 
566 	err = rtm_to_fib_config(skb, nlh, &cfg);
567 	if (err < 0)
568 		goto errout;
569 
570 	tb = fib_new_table(cfg.fc_table);
571 	if (tb == NULL) {
572 		err = -ENOBUFS;
573 		goto errout;
574 	}
575 
576 	err = tb->tb_insert(tb, &cfg);
577 errout:
578 	return err;
579 }
580 
581 static int inet_dump_fib(struct sk_buff *skb, struct netlink_callback *cb)
582 {
583 	unsigned int h, s_h;
584 	unsigned int e = 0, s_e;
585 	struct fib_table *tb;
586 	struct hlist_node *node;
587 	int dumped = 0;
588 
589 	if (nlmsg_len(cb->nlh) >= sizeof(struct rtmsg) &&
590 	    ((struct rtmsg *) nlmsg_data(cb->nlh))->rtm_flags & RTM_F_CLONED)
591 		return ip_rt_dump(skb, cb);
592 
593 	s_h = cb->args[0];
594 	s_e = cb->args[1];
595 
596 	for (h = s_h; h < FIB_TABLE_HASHSZ; h++, s_e = 0) {
597 		e = 0;
598 		hlist_for_each_entry(tb, node, &fib_table_hash[h], tb_hlist) {
599 			if (e < s_e)
600 				goto next;
601 			if (dumped)
602 				memset(&cb->args[2], 0, sizeof(cb->args) -
603 						 2 * sizeof(cb->args[0]));
604 			if (tb->tb_dump(tb, skb, cb) < 0)
605 				goto out;
606 			dumped = 1;
607 next:
608 			e++;
609 		}
610 	}
611 out:
612 	cb->args[1] = e;
613 	cb->args[0] = h;
614 
615 	return skb->len;
616 }
617 
618 /* Prepare and feed intra-kernel routing request.
619    Really, it should be netlink message, but :-( netlink
620    can be not configured, so that we feed it directly
621    to fib engine. It is legal, because all events occur
622    only when netlink is already locked.
623  */
624 
625 static void fib_magic(int cmd, int type, __be32 dst, int dst_len, struct in_ifaddr *ifa)
626 {
627 	struct fib_table *tb;
628 	struct fib_config cfg = {
629 		.fc_protocol = RTPROT_KERNEL,
630 		.fc_type = type,
631 		.fc_dst = dst,
632 		.fc_dst_len = dst_len,
633 		.fc_prefsrc = ifa->ifa_local,
634 		.fc_oif = ifa->ifa_dev->dev->ifindex,
635 		.fc_nlflags = NLM_F_CREATE | NLM_F_APPEND,
636 	};
637 
638 	if (type == RTN_UNICAST)
639 		tb = fib_new_table(RT_TABLE_MAIN);
640 	else
641 		tb = fib_new_table(RT_TABLE_LOCAL);
642 
643 	if (tb == NULL)
644 		return;
645 
646 	cfg.fc_table = tb->tb_id;
647 
648 	if (type != RTN_LOCAL)
649 		cfg.fc_scope = RT_SCOPE_LINK;
650 	else
651 		cfg.fc_scope = RT_SCOPE_HOST;
652 
653 	if (cmd == RTM_NEWROUTE)
654 		tb->tb_insert(tb, &cfg);
655 	else
656 		tb->tb_delete(tb, &cfg);
657 }
658 
659 void fib_add_ifaddr(struct in_ifaddr *ifa)
660 {
661 	struct in_device *in_dev = ifa->ifa_dev;
662 	struct net_device *dev = in_dev->dev;
663 	struct in_ifaddr *prim = ifa;
664 	__be32 mask = ifa->ifa_mask;
665 	__be32 addr = ifa->ifa_local;
666 	__be32 prefix = ifa->ifa_address&mask;
667 
668 	if (ifa->ifa_flags&IFA_F_SECONDARY) {
669 		prim = inet_ifa_byprefix(in_dev, prefix, mask);
670 		if (prim == NULL) {
671 			printk(KERN_DEBUG "fib_add_ifaddr: bug: prim == NULL\n");
672 			return;
673 		}
674 	}
675 
676 	fib_magic(RTM_NEWROUTE, RTN_LOCAL, addr, 32, prim);
677 
678 	if (!(dev->flags&IFF_UP))
679 		return;
680 
681 	/* Add broadcast address, if it is explicitly assigned. */
682 	if (ifa->ifa_broadcast && ifa->ifa_broadcast != htonl(0xFFFFFFFF))
683 		fib_magic(RTM_NEWROUTE, RTN_BROADCAST, ifa->ifa_broadcast, 32, prim);
684 
685 	if (!ZERONET(prefix) && !(ifa->ifa_flags&IFA_F_SECONDARY) &&
686 	    (prefix != addr || ifa->ifa_prefixlen < 32)) {
687 		fib_magic(RTM_NEWROUTE, dev->flags&IFF_LOOPBACK ? RTN_LOCAL :
688 			  RTN_UNICAST, prefix, ifa->ifa_prefixlen, prim);
689 
690 		/* Add network specific broadcasts, when it takes a sense */
691 		if (ifa->ifa_prefixlen < 31) {
692 			fib_magic(RTM_NEWROUTE, RTN_BROADCAST, prefix, 32, prim);
693 			fib_magic(RTM_NEWROUTE, RTN_BROADCAST, prefix|~mask, 32, prim);
694 		}
695 	}
696 }
697 
698 static void fib_del_ifaddr(struct in_ifaddr *ifa)
699 {
700 	struct in_device *in_dev = ifa->ifa_dev;
701 	struct net_device *dev = in_dev->dev;
702 	struct in_ifaddr *ifa1;
703 	struct in_ifaddr *prim = ifa;
704 	__be32 brd = ifa->ifa_address|~ifa->ifa_mask;
705 	__be32 any = ifa->ifa_address&ifa->ifa_mask;
706 #define LOCAL_OK	1
707 #define BRD_OK		2
708 #define BRD0_OK		4
709 #define BRD1_OK		8
710 	unsigned ok = 0;
711 
712 	if (!(ifa->ifa_flags&IFA_F_SECONDARY))
713 		fib_magic(RTM_DELROUTE, dev->flags&IFF_LOOPBACK ? RTN_LOCAL :
714 			  RTN_UNICAST, any, ifa->ifa_prefixlen, prim);
715 	else {
716 		prim = inet_ifa_byprefix(in_dev, any, ifa->ifa_mask);
717 		if (prim == NULL) {
718 			printk(KERN_DEBUG "fib_del_ifaddr: bug: prim == NULL\n");
719 			return;
720 		}
721 	}
722 
723 	/* Deletion is more complicated than add.
724 	   We should take care of not to delete too much :-)
725 
726 	   Scan address list to be sure that addresses are really gone.
727 	 */
728 
729 	for (ifa1 = in_dev->ifa_list; ifa1; ifa1 = ifa1->ifa_next) {
730 		if (ifa->ifa_local == ifa1->ifa_local)
731 			ok |= LOCAL_OK;
732 		if (ifa->ifa_broadcast == ifa1->ifa_broadcast)
733 			ok |= BRD_OK;
734 		if (brd == ifa1->ifa_broadcast)
735 			ok |= BRD1_OK;
736 		if (any == ifa1->ifa_broadcast)
737 			ok |= BRD0_OK;
738 	}
739 
740 	if (!(ok&BRD_OK))
741 		fib_magic(RTM_DELROUTE, RTN_BROADCAST, ifa->ifa_broadcast, 32, prim);
742 	if (!(ok&BRD1_OK))
743 		fib_magic(RTM_DELROUTE, RTN_BROADCAST, brd, 32, prim);
744 	if (!(ok&BRD0_OK))
745 		fib_magic(RTM_DELROUTE, RTN_BROADCAST, any, 32, prim);
746 	if (!(ok&LOCAL_OK)) {
747 		fib_magic(RTM_DELROUTE, RTN_LOCAL, ifa->ifa_local, 32, prim);
748 
749 		/* Check, that this local address finally disappeared. */
750 		if (inet_addr_type(ifa->ifa_local) != RTN_LOCAL) {
751 			/* And the last, but not the least thing.
752 			   We must flush stray FIB entries.
753 
754 			   First of all, we scan fib_info list searching
755 			   for stray nexthop entries, then ignite fib_flush.
756 			*/
757 			if (fib_sync_down(ifa->ifa_local, NULL, 0))
758 				fib_flush();
759 		}
760 	}
761 #undef LOCAL_OK
762 #undef BRD_OK
763 #undef BRD0_OK
764 #undef BRD1_OK
765 }
766 
767 static void nl_fib_lookup(struct fib_result_nl *frn, struct fib_table *tb )
768 {
769 
770 	struct fib_result       res;
771 	struct flowi            fl = { .mark = frn->fl_mark,
772 				       .nl_u = { .ip4_u = { .daddr = frn->fl_addr,
773 							    .tos = frn->fl_tos,
774 							    .scope = frn->fl_scope } } };
775 
776 #ifdef CONFIG_IP_MULTIPLE_TABLES
777 	res.r = NULL;
778 #endif
779 
780 	frn->err = -ENOENT;
781 	if (tb) {
782 		local_bh_disable();
783 
784 		frn->tb_id = tb->tb_id;
785 		frn->err = tb->tb_lookup(tb, &fl, &res);
786 
787 		if (!frn->err) {
788 			frn->prefixlen = res.prefixlen;
789 			frn->nh_sel = res.nh_sel;
790 			frn->type = res.type;
791 			frn->scope = res.scope;
792 			fib_res_put(&res);
793 		}
794 		local_bh_enable();
795 	}
796 }
797 
798 static void nl_fib_input(struct sk_buff *skb)
799 {
800 	struct fib_result_nl *frn;
801 	struct nlmsghdr *nlh;
802 	struct fib_table *tb;
803 	u32 pid;
804 
805 	nlh = nlmsg_hdr(skb);
806 	if (skb->len < NLMSG_SPACE(0) || skb->len < nlh->nlmsg_len ||
807 	    nlh->nlmsg_len < NLMSG_LENGTH(sizeof(*frn)))
808 		return;
809 
810 	skb = skb_clone(skb, GFP_KERNEL);
811 	if (skb == NULL)
812 		return;
813 	nlh = nlmsg_hdr(skb);
814 
815 	frn = (struct fib_result_nl *) NLMSG_DATA(nlh);
816 	tb = fib_get_table(frn->tb_id_in);
817 
818 	nl_fib_lookup(frn, tb);
819 
820 	pid = NETLINK_CB(skb).pid;       /* pid of sending process */
821 	NETLINK_CB(skb).pid = 0;         /* from kernel */
822 	NETLINK_CB(skb).dst_group = 0;  /* unicast */
823 	netlink_unicast(fibnl, skb, pid, MSG_DONTWAIT);
824 }
825 
826 static void nl_fib_lookup_init(void)
827 {
828 	fibnl = netlink_kernel_create(&init_net, NETLINK_FIB_LOOKUP, 0,
829 				      nl_fib_input, NULL, THIS_MODULE);
830 }
831 
832 static void fib_disable_ip(struct net_device *dev, int force)
833 {
834 	if (fib_sync_down(0, dev, force))
835 		fib_flush();
836 	rt_cache_flush(0);
837 	arp_ifdown(dev);
838 }
839 
840 static int fib_inetaddr_event(struct notifier_block *this, unsigned long event, void *ptr)
841 {
842 	struct in_ifaddr *ifa = (struct in_ifaddr*)ptr;
843 
844 	switch (event) {
845 	case NETDEV_UP:
846 		fib_add_ifaddr(ifa);
847 #ifdef CONFIG_IP_ROUTE_MULTIPATH
848 		fib_sync_up(ifa->ifa_dev->dev);
849 #endif
850 		rt_cache_flush(-1);
851 		break;
852 	case NETDEV_DOWN:
853 		fib_del_ifaddr(ifa);
854 		if (ifa->ifa_dev->ifa_list == NULL) {
855 			/* Last address was deleted from this interface.
856 			   Disable IP.
857 			 */
858 			fib_disable_ip(ifa->ifa_dev->dev, 1);
859 		} else {
860 			rt_cache_flush(-1);
861 		}
862 		break;
863 	}
864 	return NOTIFY_DONE;
865 }
866 
867 static int fib_netdev_event(struct notifier_block *this, unsigned long event, void *ptr)
868 {
869 	struct net_device *dev = ptr;
870 	struct in_device *in_dev = __in_dev_get_rtnl(dev);
871 
872 	if (dev->nd_net != &init_net)
873 		return NOTIFY_DONE;
874 
875 	if (event == NETDEV_UNREGISTER) {
876 		fib_disable_ip(dev, 2);
877 		return NOTIFY_DONE;
878 	}
879 
880 	if (!in_dev)
881 		return NOTIFY_DONE;
882 
883 	switch (event) {
884 	case NETDEV_UP:
885 		for_ifa(in_dev) {
886 			fib_add_ifaddr(ifa);
887 		} endfor_ifa(in_dev);
888 #ifdef CONFIG_IP_ROUTE_MULTIPATH
889 		fib_sync_up(dev);
890 #endif
891 		rt_cache_flush(-1);
892 		break;
893 	case NETDEV_DOWN:
894 		fib_disable_ip(dev, 0);
895 		break;
896 	case NETDEV_CHANGEMTU:
897 	case NETDEV_CHANGE:
898 		rt_cache_flush(0);
899 		break;
900 	}
901 	return NOTIFY_DONE;
902 }
903 
904 static struct notifier_block fib_inetaddr_notifier = {
905 	.notifier_call =fib_inetaddr_event,
906 };
907 
908 static struct notifier_block fib_netdev_notifier = {
909 	.notifier_call =fib_netdev_event,
910 };
911 
912 void __init ip_fib_init(void)
913 {
914 	unsigned int i;
915 
916 	for (i = 0; i < FIB_TABLE_HASHSZ; i++)
917 		INIT_HLIST_HEAD(&fib_table_hash[i]);
918 
919 	fib4_rules_init();
920 
921 	register_netdevice_notifier(&fib_netdev_notifier);
922 	register_inetaddr_notifier(&fib_inetaddr_notifier);
923 	nl_fib_lookup_init();
924 
925 	rtnl_register(PF_INET, RTM_NEWROUTE, inet_rtm_newroute, NULL);
926 	rtnl_register(PF_INET, RTM_DELROUTE, inet_rtm_delroute, NULL);
927 	rtnl_register(PF_INET, RTM_GETROUTE, NULL, inet_dump_fib);
928 }
929 
930 EXPORT_SYMBOL(inet_addr_type);
931 EXPORT_SYMBOL(ip_dev_find);
932