xref: /linux/net/ipv4/fib_frontend.c (revision dfc349402de8e95f6a42e8341e9ea193b718eee3)
1 /*
2  * INET		An implementation of the TCP/IP protocol suite for the LINUX
3  *		operating system.  INET is implemented using the  BSD Socket
4  *		interface as the means of communication with the user level.
5  *
6  *		IPv4 Forwarding Information Base: FIB frontend.
7  *
8  * Authors:	Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
9  *
10  *		This program is free software; you can redistribute it and/or
11  *		modify it under the terms of the GNU General Public License
12  *		as published by the Free Software Foundation; either version
13  *		2 of the License, or (at your option) any later version.
14  */
15 
16 #include <linux/module.h>
17 #include <asm/uaccess.h>
18 #include <asm/system.h>
19 #include <linux/bitops.h>
20 #include <linux/capability.h>
21 #include <linux/types.h>
22 #include <linux/kernel.h>
23 #include <linux/mm.h>
24 #include <linux/string.h>
25 #include <linux/socket.h>
26 #include <linux/sockios.h>
27 #include <linux/errno.h>
28 #include <linux/in.h>
29 #include <linux/inet.h>
30 #include <linux/inetdevice.h>
31 #include <linux/netdevice.h>
32 #include <linux/if_addr.h>
33 #include <linux/if_arp.h>
34 #include <linux/skbuff.h>
35 #include <linux/init.h>
36 #include <linux/list.h>
37 
38 #include <net/ip.h>
39 #include <net/protocol.h>
40 #include <net/route.h>
41 #include <net/tcp.h>
42 #include <net/sock.h>
43 #include <net/arp.h>
44 #include <net/ip_fib.h>
45 #include <net/rtnetlink.h>
46 
47 #ifndef CONFIG_IP_MULTIPLE_TABLES
48 
49 static int __net_init fib4_rules_init(struct net *net)
50 {
51 	struct fib_table *local_table, *main_table;
52 
53 	local_table = fib_hash_table(RT_TABLE_LOCAL);
54 	if (local_table == NULL)
55 		return -ENOMEM;
56 
57 	main_table  = fib_hash_table(RT_TABLE_MAIN);
58 	if (main_table == NULL)
59 		goto fail;
60 
61 	hlist_add_head_rcu(&local_table->tb_hlist,
62 				&net->ipv4.fib_table_hash[TABLE_LOCAL_INDEX]);
63 	hlist_add_head_rcu(&main_table->tb_hlist,
64 				&net->ipv4.fib_table_hash[TABLE_MAIN_INDEX]);
65 	return 0;
66 
67 fail:
68 	kfree(local_table);
69 	return -ENOMEM;
70 }
71 #else
72 
73 struct fib_table *fib_new_table(struct net *net, u32 id)
74 {
75 	struct fib_table *tb;
76 	unsigned int h;
77 
78 	if (id == 0)
79 		id = RT_TABLE_MAIN;
80 	tb = fib_get_table(net, id);
81 	if (tb)
82 		return tb;
83 
84 	tb = fib_hash_table(id);
85 	if (!tb)
86 		return NULL;
87 	h = id & (FIB_TABLE_HASHSZ - 1);
88 	hlist_add_head_rcu(&tb->tb_hlist, &net->ipv4.fib_table_hash[h]);
89 	return tb;
90 }
91 
92 struct fib_table *fib_get_table(struct net *net, u32 id)
93 {
94 	struct fib_table *tb;
95 	struct hlist_node *node;
96 	struct hlist_head *head;
97 	unsigned int h;
98 
99 	if (id == 0)
100 		id = RT_TABLE_MAIN;
101 	h = id & (FIB_TABLE_HASHSZ - 1);
102 
103 	rcu_read_lock();
104 	head = &net->ipv4.fib_table_hash[h];
105 	hlist_for_each_entry_rcu(tb, node, head, tb_hlist) {
106 		if (tb->tb_id == id) {
107 			rcu_read_unlock();
108 			return tb;
109 		}
110 	}
111 	rcu_read_unlock();
112 	return NULL;
113 }
114 #endif /* CONFIG_IP_MULTIPLE_TABLES */
115 
116 void fib_select_default(struct net *net,
117 			const struct flowi *flp, struct fib_result *res)
118 {
119 	struct fib_table *tb;
120 	int table = RT_TABLE_MAIN;
121 #ifdef CONFIG_IP_MULTIPLE_TABLES
122 	if (res->r == NULL || res->r->action != FR_ACT_TO_TBL)
123 		return;
124 	table = res->r->table;
125 #endif
126 	tb = fib_get_table(net, table);
127 	if (FIB_RES_GW(*res) && FIB_RES_NH(*res).nh_scope == RT_SCOPE_LINK)
128 		tb->tb_select_default(tb, flp, res);
129 }
130 
131 static void fib_flush(struct net *net)
132 {
133 	int flushed = 0;
134 	struct fib_table *tb;
135 	struct hlist_node *node;
136 	struct hlist_head *head;
137 	unsigned int h;
138 
139 	for (h = 0; h < FIB_TABLE_HASHSZ; h++) {
140 		head = &net->ipv4.fib_table_hash[h];
141 		hlist_for_each_entry(tb, node, head, tb_hlist)
142 			flushed += tb->tb_flush(tb);
143 	}
144 
145 	if (flushed)
146 		rt_cache_flush(net, -1);
147 }
148 
149 /*
150  *	Find the first device with a given source address.
151  */
152 
153 struct net_device * ip_dev_find(struct net *net, __be32 addr)
154 {
155 	struct flowi fl = { .nl_u = { .ip4_u = { .daddr = addr } } };
156 	struct fib_result res;
157 	struct net_device *dev = NULL;
158 	struct fib_table *local_table;
159 
160 #ifdef CONFIG_IP_MULTIPLE_TABLES
161 	res.r = NULL;
162 #endif
163 
164 	local_table = fib_get_table(net, RT_TABLE_LOCAL);
165 	if (!local_table || local_table->tb_lookup(local_table, &fl, &res))
166 		return NULL;
167 	if (res.type != RTN_LOCAL)
168 		goto out;
169 	dev = FIB_RES_DEV(res);
170 
171 	if (dev)
172 		dev_hold(dev);
173 out:
174 	fib_res_put(&res);
175 	return dev;
176 }
177 
178 /*
179  * Find address type as if only "dev" was present in the system. If
180  * on_dev is NULL then all interfaces are taken into consideration.
181  */
182 static inline unsigned __inet_dev_addr_type(struct net *net,
183 					    const struct net_device *dev,
184 					    __be32 addr)
185 {
186 	struct flowi		fl = { .nl_u = { .ip4_u = { .daddr = addr } } };
187 	struct fib_result	res;
188 	unsigned ret = RTN_BROADCAST;
189 	struct fib_table *local_table;
190 
191 	if (ipv4_is_zeronet(addr) || ipv4_is_lbcast(addr))
192 		return RTN_BROADCAST;
193 	if (ipv4_is_multicast(addr))
194 		return RTN_MULTICAST;
195 
196 #ifdef CONFIG_IP_MULTIPLE_TABLES
197 	res.r = NULL;
198 #endif
199 
200 	local_table = fib_get_table(net, RT_TABLE_LOCAL);
201 	if (local_table) {
202 		ret = RTN_UNICAST;
203 		if (!local_table->tb_lookup(local_table, &fl, &res)) {
204 			if (!dev || dev == res.fi->fib_dev)
205 				ret = res.type;
206 			fib_res_put(&res);
207 		}
208 	}
209 	return ret;
210 }
211 
212 unsigned int inet_addr_type(struct net *net, __be32 addr)
213 {
214 	return __inet_dev_addr_type(net, NULL, addr);
215 }
216 
217 unsigned int inet_dev_addr_type(struct net *net, const struct net_device *dev,
218 				__be32 addr)
219 {
220        return __inet_dev_addr_type(net, dev, addr);
221 }
222 
223 /* Given (packet source, input interface) and optional (dst, oif, tos):
224    - (main) check, that source is valid i.e. not broadcast or our local
225      address.
226    - figure out what "logical" interface this packet arrived
227      and calculate "specific destination" address.
228    - check, that packet arrived from expected physical interface.
229  */
230 
231 int fib_validate_source(__be32 src, __be32 dst, u8 tos, int oif,
232 			struct net_device *dev, __be32 *spec_dst,
233 			u32 *itag, u32 mark)
234 {
235 	struct in_device *in_dev;
236 	struct flowi fl = { .nl_u = { .ip4_u =
237 				      { .daddr = src,
238 					.saddr = dst,
239 					.tos = tos } },
240 			    .mark = mark,
241 			    .iif = oif };
242 
243 	struct fib_result res;
244 	int no_addr, rpf;
245 	int ret;
246 	struct net *net;
247 
248 	no_addr = rpf = 0;
249 	rcu_read_lock();
250 	in_dev = __in_dev_get_rcu(dev);
251 	if (in_dev) {
252 		no_addr = in_dev->ifa_list == NULL;
253 		rpf = IN_DEV_RPFILTER(in_dev);
254 	}
255 	rcu_read_unlock();
256 
257 	if (in_dev == NULL)
258 		goto e_inval;
259 
260 	net = dev_net(dev);
261 	if (fib_lookup(net, &fl, &res))
262 		goto last_resort;
263 	if (res.type != RTN_UNICAST)
264 		goto e_inval_res;
265 	*spec_dst = FIB_RES_PREFSRC(res);
266 	fib_combine_itag(itag, &res);
267 #ifdef CONFIG_IP_ROUTE_MULTIPATH
268 	if (FIB_RES_DEV(res) == dev || res.fi->fib_nhs > 1)
269 #else
270 	if (FIB_RES_DEV(res) == dev)
271 #endif
272 	{
273 		ret = FIB_RES_NH(res).nh_scope >= RT_SCOPE_HOST;
274 		fib_res_put(&res);
275 		return ret;
276 	}
277 	fib_res_put(&res);
278 	if (no_addr)
279 		goto last_resort;
280 	if (rpf == 1)
281 		goto e_inval;
282 	fl.oif = dev->ifindex;
283 
284 	ret = 0;
285 	if (fib_lookup(net, &fl, &res) == 0) {
286 		if (res.type == RTN_UNICAST) {
287 			*spec_dst = FIB_RES_PREFSRC(res);
288 			ret = FIB_RES_NH(res).nh_scope >= RT_SCOPE_HOST;
289 		}
290 		fib_res_put(&res);
291 	}
292 	return ret;
293 
294 last_resort:
295 	if (rpf)
296 		goto e_inval;
297 	*spec_dst = inet_select_addr(dev, 0, RT_SCOPE_UNIVERSE);
298 	*itag = 0;
299 	return 0;
300 
301 e_inval_res:
302 	fib_res_put(&res);
303 e_inval:
304 	return -EINVAL;
305 }
306 
307 static inline __be32 sk_extract_addr(struct sockaddr *addr)
308 {
309 	return ((struct sockaddr_in *) addr)->sin_addr.s_addr;
310 }
311 
312 static int put_rtax(struct nlattr *mx, int len, int type, u32 value)
313 {
314 	struct nlattr *nla;
315 
316 	nla = (struct nlattr *) ((char *) mx + len);
317 	nla->nla_type = type;
318 	nla->nla_len = nla_attr_size(4);
319 	*(u32 *) nla_data(nla) = value;
320 
321 	return len + nla_total_size(4);
322 }
323 
324 static int rtentry_to_fib_config(struct net *net, int cmd, struct rtentry *rt,
325 				 struct fib_config *cfg)
326 {
327 	__be32 addr;
328 	int plen;
329 
330 	memset(cfg, 0, sizeof(*cfg));
331 	cfg->fc_nlinfo.nl_net = net;
332 
333 	if (rt->rt_dst.sa_family != AF_INET)
334 		return -EAFNOSUPPORT;
335 
336 	/*
337 	 * Check mask for validity:
338 	 * a) it must be contiguous.
339 	 * b) destination must have all host bits clear.
340 	 * c) if application forgot to set correct family (AF_INET),
341 	 *    reject request unless it is absolutely clear i.e.
342 	 *    both family and mask are zero.
343 	 */
344 	plen = 32;
345 	addr = sk_extract_addr(&rt->rt_dst);
346 	if (!(rt->rt_flags & RTF_HOST)) {
347 		__be32 mask = sk_extract_addr(&rt->rt_genmask);
348 
349 		if (rt->rt_genmask.sa_family != AF_INET) {
350 			if (mask || rt->rt_genmask.sa_family)
351 				return -EAFNOSUPPORT;
352 		}
353 
354 		if (bad_mask(mask, addr))
355 			return -EINVAL;
356 
357 		plen = inet_mask_len(mask);
358 	}
359 
360 	cfg->fc_dst_len = plen;
361 	cfg->fc_dst = addr;
362 
363 	if (cmd != SIOCDELRT) {
364 		cfg->fc_nlflags = NLM_F_CREATE;
365 		cfg->fc_protocol = RTPROT_BOOT;
366 	}
367 
368 	if (rt->rt_metric)
369 		cfg->fc_priority = rt->rt_metric - 1;
370 
371 	if (rt->rt_flags & RTF_REJECT) {
372 		cfg->fc_scope = RT_SCOPE_HOST;
373 		cfg->fc_type = RTN_UNREACHABLE;
374 		return 0;
375 	}
376 
377 	cfg->fc_scope = RT_SCOPE_NOWHERE;
378 	cfg->fc_type = RTN_UNICAST;
379 
380 	if (rt->rt_dev) {
381 		char *colon;
382 		struct net_device *dev;
383 		char devname[IFNAMSIZ];
384 
385 		if (copy_from_user(devname, rt->rt_dev, IFNAMSIZ-1))
386 			return -EFAULT;
387 
388 		devname[IFNAMSIZ-1] = 0;
389 		colon = strchr(devname, ':');
390 		if (colon)
391 			*colon = 0;
392 		dev = __dev_get_by_name(net, devname);
393 		if (!dev)
394 			return -ENODEV;
395 		cfg->fc_oif = dev->ifindex;
396 		if (colon) {
397 			struct in_ifaddr *ifa;
398 			struct in_device *in_dev = __in_dev_get_rtnl(dev);
399 			if (!in_dev)
400 				return -ENODEV;
401 			*colon = ':';
402 			for (ifa = in_dev->ifa_list; ifa; ifa = ifa->ifa_next)
403 				if (strcmp(ifa->ifa_label, devname) == 0)
404 					break;
405 			if (ifa == NULL)
406 				return -ENODEV;
407 			cfg->fc_prefsrc = ifa->ifa_local;
408 		}
409 	}
410 
411 	addr = sk_extract_addr(&rt->rt_gateway);
412 	if (rt->rt_gateway.sa_family == AF_INET && addr) {
413 		cfg->fc_gw = addr;
414 		if (rt->rt_flags & RTF_GATEWAY &&
415 		    inet_addr_type(net, addr) == RTN_UNICAST)
416 			cfg->fc_scope = RT_SCOPE_UNIVERSE;
417 	}
418 
419 	if (cmd == SIOCDELRT)
420 		return 0;
421 
422 	if (rt->rt_flags & RTF_GATEWAY && !cfg->fc_gw)
423 		return -EINVAL;
424 
425 	if (cfg->fc_scope == RT_SCOPE_NOWHERE)
426 		cfg->fc_scope = RT_SCOPE_LINK;
427 
428 	if (rt->rt_flags & (RTF_MTU | RTF_WINDOW | RTF_IRTT)) {
429 		struct nlattr *mx;
430 		int len = 0;
431 
432 		mx = kzalloc(3 * nla_total_size(4), GFP_KERNEL);
433 		if (mx == NULL)
434 			return -ENOMEM;
435 
436 		if (rt->rt_flags & RTF_MTU)
437 			len = put_rtax(mx, len, RTAX_ADVMSS, rt->rt_mtu - 40);
438 
439 		if (rt->rt_flags & RTF_WINDOW)
440 			len = put_rtax(mx, len, RTAX_WINDOW, rt->rt_window);
441 
442 		if (rt->rt_flags & RTF_IRTT)
443 			len = put_rtax(mx, len, RTAX_RTT, rt->rt_irtt << 3);
444 
445 		cfg->fc_mx = mx;
446 		cfg->fc_mx_len = len;
447 	}
448 
449 	return 0;
450 }
451 
452 /*
453  *	Handle IP routing ioctl calls. These are used to manipulate the routing tables
454  */
455 
456 int ip_rt_ioctl(struct net *net, unsigned int cmd, void __user *arg)
457 {
458 	struct fib_config cfg;
459 	struct rtentry rt;
460 	int err;
461 
462 	switch (cmd) {
463 	case SIOCADDRT:		/* Add a route */
464 	case SIOCDELRT:		/* Delete a route */
465 		if (!capable(CAP_NET_ADMIN))
466 			return -EPERM;
467 
468 		if (copy_from_user(&rt, arg, sizeof(rt)))
469 			return -EFAULT;
470 
471 		rtnl_lock();
472 		err = rtentry_to_fib_config(net, cmd, &rt, &cfg);
473 		if (err == 0) {
474 			struct fib_table *tb;
475 
476 			if (cmd == SIOCDELRT) {
477 				tb = fib_get_table(net, cfg.fc_table);
478 				if (tb)
479 					err = tb->tb_delete(tb, &cfg);
480 				else
481 					err = -ESRCH;
482 			} else {
483 				tb = fib_new_table(net, cfg.fc_table);
484 				if (tb)
485 					err = tb->tb_insert(tb, &cfg);
486 				else
487 					err = -ENOBUFS;
488 			}
489 
490 			/* allocated by rtentry_to_fib_config() */
491 			kfree(cfg.fc_mx);
492 		}
493 		rtnl_unlock();
494 		return err;
495 	}
496 	return -EINVAL;
497 }
498 
499 const struct nla_policy rtm_ipv4_policy[RTA_MAX+1] = {
500 	[RTA_DST]		= { .type = NLA_U32 },
501 	[RTA_SRC]		= { .type = NLA_U32 },
502 	[RTA_IIF]		= { .type = NLA_U32 },
503 	[RTA_OIF]		= { .type = NLA_U32 },
504 	[RTA_GATEWAY]		= { .type = NLA_U32 },
505 	[RTA_PRIORITY]		= { .type = NLA_U32 },
506 	[RTA_PREFSRC]		= { .type = NLA_U32 },
507 	[RTA_METRICS]		= { .type = NLA_NESTED },
508 	[RTA_MULTIPATH]		= { .len = sizeof(struct rtnexthop) },
509 	[RTA_FLOW]		= { .type = NLA_U32 },
510 };
511 
512 static int rtm_to_fib_config(struct net *net, struct sk_buff *skb,
513 			    struct nlmsghdr *nlh, struct fib_config *cfg)
514 {
515 	struct nlattr *attr;
516 	int err, remaining;
517 	struct rtmsg *rtm;
518 
519 	err = nlmsg_validate(nlh, sizeof(*rtm), RTA_MAX, rtm_ipv4_policy);
520 	if (err < 0)
521 		goto errout;
522 
523 	memset(cfg, 0, sizeof(*cfg));
524 
525 	rtm = nlmsg_data(nlh);
526 	cfg->fc_dst_len = rtm->rtm_dst_len;
527 	cfg->fc_tos = rtm->rtm_tos;
528 	cfg->fc_table = rtm->rtm_table;
529 	cfg->fc_protocol = rtm->rtm_protocol;
530 	cfg->fc_scope = rtm->rtm_scope;
531 	cfg->fc_type = rtm->rtm_type;
532 	cfg->fc_flags = rtm->rtm_flags;
533 	cfg->fc_nlflags = nlh->nlmsg_flags;
534 
535 	cfg->fc_nlinfo.pid = NETLINK_CB(skb).pid;
536 	cfg->fc_nlinfo.nlh = nlh;
537 	cfg->fc_nlinfo.nl_net = net;
538 
539 	if (cfg->fc_type > RTN_MAX) {
540 		err = -EINVAL;
541 		goto errout;
542 	}
543 
544 	nlmsg_for_each_attr(attr, nlh, sizeof(struct rtmsg), remaining) {
545 		switch (nla_type(attr)) {
546 		case RTA_DST:
547 			cfg->fc_dst = nla_get_be32(attr);
548 			break;
549 		case RTA_OIF:
550 			cfg->fc_oif = nla_get_u32(attr);
551 			break;
552 		case RTA_GATEWAY:
553 			cfg->fc_gw = nla_get_be32(attr);
554 			break;
555 		case RTA_PRIORITY:
556 			cfg->fc_priority = nla_get_u32(attr);
557 			break;
558 		case RTA_PREFSRC:
559 			cfg->fc_prefsrc = nla_get_be32(attr);
560 			break;
561 		case RTA_METRICS:
562 			cfg->fc_mx = nla_data(attr);
563 			cfg->fc_mx_len = nla_len(attr);
564 			break;
565 		case RTA_MULTIPATH:
566 			cfg->fc_mp = nla_data(attr);
567 			cfg->fc_mp_len = nla_len(attr);
568 			break;
569 		case RTA_FLOW:
570 			cfg->fc_flow = nla_get_u32(attr);
571 			break;
572 		case RTA_TABLE:
573 			cfg->fc_table = nla_get_u32(attr);
574 			break;
575 		}
576 	}
577 
578 	return 0;
579 errout:
580 	return err;
581 }
582 
583 static int inet_rtm_delroute(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
584 {
585 	struct net *net = sock_net(skb->sk);
586 	struct fib_config cfg;
587 	struct fib_table *tb;
588 	int err;
589 
590 	err = rtm_to_fib_config(net, skb, nlh, &cfg);
591 	if (err < 0)
592 		goto errout;
593 
594 	tb = fib_get_table(net, cfg.fc_table);
595 	if (tb == NULL) {
596 		err = -ESRCH;
597 		goto errout;
598 	}
599 
600 	err = tb->tb_delete(tb, &cfg);
601 errout:
602 	return err;
603 }
604 
605 static int inet_rtm_newroute(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
606 {
607 	struct net *net = sock_net(skb->sk);
608 	struct fib_config cfg;
609 	struct fib_table *tb;
610 	int err;
611 
612 	err = rtm_to_fib_config(net, skb, nlh, &cfg);
613 	if (err < 0)
614 		goto errout;
615 
616 	tb = fib_new_table(net, cfg.fc_table);
617 	if (tb == NULL) {
618 		err = -ENOBUFS;
619 		goto errout;
620 	}
621 
622 	err = tb->tb_insert(tb, &cfg);
623 errout:
624 	return err;
625 }
626 
627 static int inet_dump_fib(struct sk_buff *skb, struct netlink_callback *cb)
628 {
629 	struct net *net = sock_net(skb->sk);
630 	unsigned int h, s_h;
631 	unsigned int e = 0, s_e;
632 	struct fib_table *tb;
633 	struct hlist_node *node;
634 	struct hlist_head *head;
635 	int dumped = 0;
636 
637 	if (nlmsg_len(cb->nlh) >= sizeof(struct rtmsg) &&
638 	    ((struct rtmsg *) nlmsg_data(cb->nlh))->rtm_flags & RTM_F_CLONED)
639 		return ip_rt_dump(skb, cb);
640 
641 	s_h = cb->args[0];
642 	s_e = cb->args[1];
643 
644 	for (h = s_h; h < FIB_TABLE_HASHSZ; h++, s_e = 0) {
645 		e = 0;
646 		head = &net->ipv4.fib_table_hash[h];
647 		hlist_for_each_entry(tb, node, head, tb_hlist) {
648 			if (e < s_e)
649 				goto next;
650 			if (dumped)
651 				memset(&cb->args[2], 0, sizeof(cb->args) -
652 						 2 * sizeof(cb->args[0]));
653 			if (tb->tb_dump(tb, skb, cb) < 0)
654 				goto out;
655 			dumped = 1;
656 next:
657 			e++;
658 		}
659 	}
660 out:
661 	cb->args[1] = e;
662 	cb->args[0] = h;
663 
664 	return skb->len;
665 }
666 
667 /* Prepare and feed intra-kernel routing request.
668    Really, it should be netlink message, but :-( netlink
669    can be not configured, so that we feed it directly
670    to fib engine. It is legal, because all events occur
671    only when netlink is already locked.
672  */
673 
674 static void fib_magic(int cmd, int type, __be32 dst, int dst_len, struct in_ifaddr *ifa)
675 {
676 	struct net *net = dev_net(ifa->ifa_dev->dev);
677 	struct fib_table *tb;
678 	struct fib_config cfg = {
679 		.fc_protocol = RTPROT_KERNEL,
680 		.fc_type = type,
681 		.fc_dst = dst,
682 		.fc_dst_len = dst_len,
683 		.fc_prefsrc = ifa->ifa_local,
684 		.fc_oif = ifa->ifa_dev->dev->ifindex,
685 		.fc_nlflags = NLM_F_CREATE | NLM_F_APPEND,
686 		.fc_nlinfo = {
687 			.nl_net = net,
688 		},
689 	};
690 
691 	if (type == RTN_UNICAST)
692 		tb = fib_new_table(net, RT_TABLE_MAIN);
693 	else
694 		tb = fib_new_table(net, RT_TABLE_LOCAL);
695 
696 	if (tb == NULL)
697 		return;
698 
699 	cfg.fc_table = tb->tb_id;
700 
701 	if (type != RTN_LOCAL)
702 		cfg.fc_scope = RT_SCOPE_LINK;
703 	else
704 		cfg.fc_scope = RT_SCOPE_HOST;
705 
706 	if (cmd == RTM_NEWROUTE)
707 		tb->tb_insert(tb, &cfg);
708 	else
709 		tb->tb_delete(tb, &cfg);
710 }
711 
712 void fib_add_ifaddr(struct in_ifaddr *ifa)
713 {
714 	struct in_device *in_dev = ifa->ifa_dev;
715 	struct net_device *dev = in_dev->dev;
716 	struct in_ifaddr *prim = ifa;
717 	__be32 mask = ifa->ifa_mask;
718 	__be32 addr = ifa->ifa_local;
719 	__be32 prefix = ifa->ifa_address&mask;
720 
721 	if (ifa->ifa_flags&IFA_F_SECONDARY) {
722 		prim = inet_ifa_byprefix(in_dev, prefix, mask);
723 		if (prim == NULL) {
724 			printk(KERN_WARNING "fib_add_ifaddr: bug: prim == NULL\n");
725 			return;
726 		}
727 	}
728 
729 	fib_magic(RTM_NEWROUTE, RTN_LOCAL, addr, 32, prim);
730 
731 	if (!(dev->flags&IFF_UP))
732 		return;
733 
734 	/* Add broadcast address, if it is explicitly assigned. */
735 	if (ifa->ifa_broadcast && ifa->ifa_broadcast != htonl(0xFFFFFFFF))
736 		fib_magic(RTM_NEWROUTE, RTN_BROADCAST, ifa->ifa_broadcast, 32, prim);
737 
738 	if (!ipv4_is_zeronet(prefix) && !(ifa->ifa_flags&IFA_F_SECONDARY) &&
739 	    (prefix != addr || ifa->ifa_prefixlen < 32)) {
740 		fib_magic(RTM_NEWROUTE, dev->flags&IFF_LOOPBACK ? RTN_LOCAL :
741 			  RTN_UNICAST, prefix, ifa->ifa_prefixlen, prim);
742 
743 		/* Add network specific broadcasts, when it takes a sense */
744 		if (ifa->ifa_prefixlen < 31) {
745 			fib_magic(RTM_NEWROUTE, RTN_BROADCAST, prefix, 32, prim);
746 			fib_magic(RTM_NEWROUTE, RTN_BROADCAST, prefix|~mask, 32, prim);
747 		}
748 	}
749 }
750 
751 static void fib_del_ifaddr(struct in_ifaddr *ifa)
752 {
753 	struct in_device *in_dev = ifa->ifa_dev;
754 	struct net_device *dev = in_dev->dev;
755 	struct in_ifaddr *ifa1;
756 	struct in_ifaddr *prim = ifa;
757 	__be32 brd = ifa->ifa_address|~ifa->ifa_mask;
758 	__be32 any = ifa->ifa_address&ifa->ifa_mask;
759 #define LOCAL_OK	1
760 #define BRD_OK		2
761 #define BRD0_OK		4
762 #define BRD1_OK		8
763 	unsigned ok = 0;
764 
765 	if (!(ifa->ifa_flags&IFA_F_SECONDARY))
766 		fib_magic(RTM_DELROUTE, dev->flags&IFF_LOOPBACK ? RTN_LOCAL :
767 			  RTN_UNICAST, any, ifa->ifa_prefixlen, prim);
768 	else {
769 		prim = inet_ifa_byprefix(in_dev, any, ifa->ifa_mask);
770 		if (prim == NULL) {
771 			printk(KERN_WARNING "fib_del_ifaddr: bug: prim == NULL\n");
772 			return;
773 		}
774 	}
775 
776 	/* Deletion is more complicated than add.
777 	   We should take care of not to delete too much :-)
778 
779 	   Scan address list to be sure that addresses are really gone.
780 	 */
781 
782 	for (ifa1 = in_dev->ifa_list; ifa1; ifa1 = ifa1->ifa_next) {
783 		if (ifa->ifa_local == ifa1->ifa_local)
784 			ok |= LOCAL_OK;
785 		if (ifa->ifa_broadcast == ifa1->ifa_broadcast)
786 			ok |= BRD_OK;
787 		if (brd == ifa1->ifa_broadcast)
788 			ok |= BRD1_OK;
789 		if (any == ifa1->ifa_broadcast)
790 			ok |= BRD0_OK;
791 	}
792 
793 	if (!(ok&BRD_OK))
794 		fib_magic(RTM_DELROUTE, RTN_BROADCAST, ifa->ifa_broadcast, 32, prim);
795 	if (!(ok&BRD1_OK))
796 		fib_magic(RTM_DELROUTE, RTN_BROADCAST, brd, 32, prim);
797 	if (!(ok&BRD0_OK))
798 		fib_magic(RTM_DELROUTE, RTN_BROADCAST, any, 32, prim);
799 	if (!(ok&LOCAL_OK)) {
800 		fib_magic(RTM_DELROUTE, RTN_LOCAL, ifa->ifa_local, 32, prim);
801 
802 		/* Check, that this local address finally disappeared. */
803 		if (inet_addr_type(dev_net(dev), ifa->ifa_local) != RTN_LOCAL) {
804 			/* And the last, but not the least thing.
805 			   We must flush stray FIB entries.
806 
807 			   First of all, we scan fib_info list searching
808 			   for stray nexthop entries, then ignite fib_flush.
809 			*/
810 			if (fib_sync_down_addr(dev_net(dev), ifa->ifa_local))
811 				fib_flush(dev_net(dev));
812 		}
813 	}
814 #undef LOCAL_OK
815 #undef BRD_OK
816 #undef BRD0_OK
817 #undef BRD1_OK
818 }
819 
820 static void nl_fib_lookup(struct fib_result_nl *frn, struct fib_table *tb )
821 {
822 
823 	struct fib_result       res;
824 	struct flowi            fl = { .mark = frn->fl_mark,
825 				       .nl_u = { .ip4_u = { .daddr = frn->fl_addr,
826 							    .tos = frn->fl_tos,
827 							    .scope = frn->fl_scope } } };
828 
829 #ifdef CONFIG_IP_MULTIPLE_TABLES
830 	res.r = NULL;
831 #endif
832 
833 	frn->err = -ENOENT;
834 	if (tb) {
835 		local_bh_disable();
836 
837 		frn->tb_id = tb->tb_id;
838 		frn->err = tb->tb_lookup(tb, &fl, &res);
839 
840 		if (!frn->err) {
841 			frn->prefixlen = res.prefixlen;
842 			frn->nh_sel = res.nh_sel;
843 			frn->type = res.type;
844 			frn->scope = res.scope;
845 			fib_res_put(&res);
846 		}
847 		local_bh_enable();
848 	}
849 }
850 
851 static void nl_fib_input(struct sk_buff *skb)
852 {
853 	struct net *net;
854 	struct fib_result_nl *frn;
855 	struct nlmsghdr *nlh;
856 	struct fib_table *tb;
857 	u32 pid;
858 
859 	net = sock_net(skb->sk);
860 	nlh = nlmsg_hdr(skb);
861 	if (skb->len < NLMSG_SPACE(0) || skb->len < nlh->nlmsg_len ||
862 	    nlh->nlmsg_len < NLMSG_LENGTH(sizeof(*frn)))
863 		return;
864 
865 	skb = skb_clone(skb, GFP_KERNEL);
866 	if (skb == NULL)
867 		return;
868 	nlh = nlmsg_hdr(skb);
869 
870 	frn = (struct fib_result_nl *) NLMSG_DATA(nlh);
871 	tb = fib_get_table(net, frn->tb_id_in);
872 
873 	nl_fib_lookup(frn, tb);
874 
875 	pid = NETLINK_CB(skb).pid;       /* pid of sending process */
876 	NETLINK_CB(skb).pid = 0;         /* from kernel */
877 	NETLINK_CB(skb).dst_group = 0;  /* unicast */
878 	netlink_unicast(net->ipv4.fibnl, skb, pid, MSG_DONTWAIT);
879 }
880 
881 static int nl_fib_lookup_init(struct net *net)
882 {
883 	struct sock *sk;
884 	sk = netlink_kernel_create(net, NETLINK_FIB_LOOKUP, 0,
885 				   nl_fib_input, NULL, THIS_MODULE);
886 	if (sk == NULL)
887 		return -EAFNOSUPPORT;
888 	net->ipv4.fibnl = sk;
889 	return 0;
890 }
891 
892 static void nl_fib_lookup_exit(struct net *net)
893 {
894 	netlink_kernel_release(net->ipv4.fibnl);
895 	net->ipv4.fibnl = NULL;
896 }
897 
898 static void fib_disable_ip(struct net_device *dev, int force)
899 {
900 	if (fib_sync_down_dev(dev, force))
901 		fib_flush(dev_net(dev));
902 	rt_cache_flush(dev_net(dev), 0);
903 	arp_ifdown(dev);
904 }
905 
906 static int fib_inetaddr_event(struct notifier_block *this, unsigned long event, void *ptr)
907 {
908 	struct in_ifaddr *ifa = (struct in_ifaddr *)ptr;
909 	struct net_device *dev = ifa->ifa_dev->dev;
910 
911 	switch (event) {
912 	case NETDEV_UP:
913 		fib_add_ifaddr(ifa);
914 #ifdef CONFIG_IP_ROUTE_MULTIPATH
915 		fib_sync_up(dev);
916 #endif
917 		rt_cache_flush(dev_net(dev), -1);
918 		break;
919 	case NETDEV_DOWN:
920 		fib_del_ifaddr(ifa);
921 		if (ifa->ifa_dev->ifa_list == NULL) {
922 			/* Last address was deleted from this interface.
923 			   Disable IP.
924 			 */
925 			fib_disable_ip(dev, 1);
926 		} else {
927 			rt_cache_flush(dev_net(dev), -1);
928 		}
929 		break;
930 	}
931 	return NOTIFY_DONE;
932 }
933 
934 static int fib_netdev_event(struct notifier_block *this, unsigned long event, void *ptr)
935 {
936 	struct net_device *dev = ptr;
937 	struct in_device *in_dev = __in_dev_get_rtnl(dev);
938 
939 	if (event == NETDEV_UNREGISTER) {
940 		fib_disable_ip(dev, 2);
941 		return NOTIFY_DONE;
942 	}
943 
944 	if (!in_dev)
945 		return NOTIFY_DONE;
946 
947 	switch (event) {
948 	case NETDEV_UP:
949 		for_ifa(in_dev) {
950 			fib_add_ifaddr(ifa);
951 		} endfor_ifa(in_dev);
952 #ifdef CONFIG_IP_ROUTE_MULTIPATH
953 		fib_sync_up(dev);
954 #endif
955 		rt_cache_flush(dev_net(dev), -1);
956 		break;
957 	case NETDEV_DOWN:
958 		fib_disable_ip(dev, 0);
959 		break;
960 	case NETDEV_CHANGEMTU:
961 	case NETDEV_CHANGE:
962 		rt_cache_flush(dev_net(dev), 0);
963 		break;
964 	}
965 	return NOTIFY_DONE;
966 }
967 
968 static struct notifier_block fib_inetaddr_notifier = {
969 	.notifier_call = fib_inetaddr_event,
970 };
971 
972 static struct notifier_block fib_netdev_notifier = {
973 	.notifier_call = fib_netdev_event,
974 };
975 
976 static int __net_init ip_fib_net_init(struct net *net)
977 {
978 	int err;
979 	unsigned int i;
980 
981 	net->ipv4.fib_table_hash = kzalloc(
982 			sizeof(struct hlist_head)*FIB_TABLE_HASHSZ, GFP_KERNEL);
983 	if (net->ipv4.fib_table_hash == NULL)
984 		return -ENOMEM;
985 
986 	for (i = 0; i < FIB_TABLE_HASHSZ; i++)
987 		INIT_HLIST_HEAD(&net->ipv4.fib_table_hash[i]);
988 
989 	err = fib4_rules_init(net);
990 	if (err < 0)
991 		goto fail;
992 	return 0;
993 
994 fail:
995 	kfree(net->ipv4.fib_table_hash);
996 	return err;
997 }
998 
999 static void __net_exit ip_fib_net_exit(struct net *net)
1000 {
1001 	unsigned int i;
1002 
1003 #ifdef CONFIG_IP_MULTIPLE_TABLES
1004 	fib4_rules_exit(net);
1005 #endif
1006 
1007 	for (i = 0; i < FIB_TABLE_HASHSZ; i++) {
1008 		struct fib_table *tb;
1009 		struct hlist_head *head;
1010 		struct hlist_node *node, *tmp;
1011 
1012 		head = &net->ipv4.fib_table_hash[i];
1013 		hlist_for_each_entry_safe(tb, node, tmp, head, tb_hlist) {
1014 			hlist_del(node);
1015 			tb->tb_flush(tb);
1016 			kfree(tb);
1017 		}
1018 	}
1019 	kfree(net->ipv4.fib_table_hash);
1020 }
1021 
1022 static int __net_init fib_net_init(struct net *net)
1023 {
1024 	int error;
1025 
1026 	error = ip_fib_net_init(net);
1027 	if (error < 0)
1028 		goto out;
1029 	error = nl_fib_lookup_init(net);
1030 	if (error < 0)
1031 		goto out_nlfl;
1032 	error = fib_proc_init(net);
1033 	if (error < 0)
1034 		goto out_proc;
1035 out:
1036 	return error;
1037 
1038 out_proc:
1039 	nl_fib_lookup_exit(net);
1040 out_nlfl:
1041 	ip_fib_net_exit(net);
1042 	goto out;
1043 }
1044 
1045 static void __net_exit fib_net_exit(struct net *net)
1046 {
1047 	fib_proc_exit(net);
1048 	nl_fib_lookup_exit(net);
1049 	ip_fib_net_exit(net);
1050 }
1051 
1052 static struct pernet_operations fib_net_ops = {
1053 	.init = fib_net_init,
1054 	.exit = fib_net_exit,
1055 };
1056 
1057 void __init ip_fib_init(void)
1058 {
1059 	rtnl_register(PF_INET, RTM_NEWROUTE, inet_rtm_newroute, NULL);
1060 	rtnl_register(PF_INET, RTM_DELROUTE, inet_rtm_delroute, NULL);
1061 	rtnl_register(PF_INET, RTM_GETROUTE, NULL, inet_dump_fib);
1062 
1063 	register_pernet_subsys(&fib_net_ops);
1064 	register_netdevice_notifier(&fib_netdev_notifier);
1065 	register_inetaddr_notifier(&fib_inetaddr_notifier);
1066 
1067 	fib_hash_init();
1068 }
1069 
1070 EXPORT_SYMBOL(inet_addr_type);
1071 EXPORT_SYMBOL(inet_dev_addr_type);
1072 EXPORT_SYMBOL(ip_dev_find);
1073