xref: /linux/net/ipv4/fib_frontend.c (revision a33f32244d8550da8b4a26e277ce07d5c6d158b5)
1 /*
2  * INET		An implementation of the TCP/IP protocol suite for the LINUX
3  *		operating system.  INET is implemented using the  BSD Socket
4  *		interface as the means of communication with the user level.
5  *
6  *		IPv4 Forwarding Information Base: FIB frontend.
7  *
8  * Authors:	Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
9  *
10  *		This program is free software; you can redistribute it and/or
11  *		modify it under the terms of the GNU General Public License
12  *		as published by the Free Software Foundation; either version
13  *		2 of the License, or (at your option) any later version.
14  */
15 
16 #include <linux/module.h>
17 #include <asm/uaccess.h>
18 #include <asm/system.h>
19 #include <linux/bitops.h>
20 #include <linux/capability.h>
21 #include <linux/types.h>
22 #include <linux/kernel.h>
23 #include <linux/mm.h>
24 #include <linux/string.h>
25 #include <linux/socket.h>
26 #include <linux/sockios.h>
27 #include <linux/errno.h>
28 #include <linux/in.h>
29 #include <linux/inet.h>
30 #include <linux/inetdevice.h>
31 #include <linux/netdevice.h>
32 #include <linux/if_addr.h>
33 #include <linux/if_arp.h>
34 #include <linux/skbuff.h>
35 #include <linux/init.h>
36 #include <linux/list.h>
37 #include <linux/slab.h>
38 
39 #include <net/ip.h>
40 #include <net/protocol.h>
41 #include <net/route.h>
42 #include <net/tcp.h>
43 #include <net/sock.h>
44 #include <net/arp.h>
45 #include <net/ip_fib.h>
46 #include <net/rtnetlink.h>
47 
48 #ifndef CONFIG_IP_MULTIPLE_TABLES
49 
50 static int __net_init fib4_rules_init(struct net *net)
51 {
52 	struct fib_table *local_table, *main_table;
53 
54 	local_table = fib_hash_table(RT_TABLE_LOCAL);
55 	if (local_table == NULL)
56 		return -ENOMEM;
57 
58 	main_table  = fib_hash_table(RT_TABLE_MAIN);
59 	if (main_table == NULL)
60 		goto fail;
61 
62 	hlist_add_head_rcu(&local_table->tb_hlist,
63 				&net->ipv4.fib_table_hash[TABLE_LOCAL_INDEX]);
64 	hlist_add_head_rcu(&main_table->tb_hlist,
65 				&net->ipv4.fib_table_hash[TABLE_MAIN_INDEX]);
66 	return 0;
67 
68 fail:
69 	kfree(local_table);
70 	return -ENOMEM;
71 }
72 #else
73 
74 struct fib_table *fib_new_table(struct net *net, u32 id)
75 {
76 	struct fib_table *tb;
77 	unsigned int h;
78 
79 	if (id == 0)
80 		id = RT_TABLE_MAIN;
81 	tb = fib_get_table(net, id);
82 	if (tb)
83 		return tb;
84 
85 	tb = fib_hash_table(id);
86 	if (!tb)
87 		return NULL;
88 	h = id & (FIB_TABLE_HASHSZ - 1);
89 	hlist_add_head_rcu(&tb->tb_hlist, &net->ipv4.fib_table_hash[h]);
90 	return tb;
91 }
92 
93 struct fib_table *fib_get_table(struct net *net, u32 id)
94 {
95 	struct fib_table *tb;
96 	struct hlist_node *node;
97 	struct hlist_head *head;
98 	unsigned int h;
99 
100 	if (id == 0)
101 		id = RT_TABLE_MAIN;
102 	h = id & (FIB_TABLE_HASHSZ - 1);
103 
104 	rcu_read_lock();
105 	head = &net->ipv4.fib_table_hash[h];
106 	hlist_for_each_entry_rcu(tb, node, head, tb_hlist) {
107 		if (tb->tb_id == id) {
108 			rcu_read_unlock();
109 			return tb;
110 		}
111 	}
112 	rcu_read_unlock();
113 	return NULL;
114 }
115 #endif /* CONFIG_IP_MULTIPLE_TABLES */
116 
117 void fib_select_default(struct net *net,
118 			const struct flowi *flp, struct fib_result *res)
119 {
120 	struct fib_table *tb;
121 	int table = RT_TABLE_MAIN;
122 #ifdef CONFIG_IP_MULTIPLE_TABLES
123 	if (res->r == NULL || res->r->action != FR_ACT_TO_TBL)
124 		return;
125 	table = res->r->table;
126 #endif
127 	tb = fib_get_table(net, table);
128 	if (FIB_RES_GW(*res) && FIB_RES_NH(*res).nh_scope == RT_SCOPE_LINK)
129 		fib_table_select_default(tb, flp, res);
130 }
131 
132 static void fib_flush(struct net *net)
133 {
134 	int flushed = 0;
135 	struct fib_table *tb;
136 	struct hlist_node *node;
137 	struct hlist_head *head;
138 	unsigned int h;
139 
140 	for (h = 0; h < FIB_TABLE_HASHSZ; h++) {
141 		head = &net->ipv4.fib_table_hash[h];
142 		hlist_for_each_entry(tb, node, head, tb_hlist)
143 			flushed += fib_table_flush(tb);
144 	}
145 
146 	if (flushed)
147 		rt_cache_flush(net, -1);
148 }
149 
150 /*
151  *	Find the first device with a given source address.
152  */
153 
154 struct net_device * ip_dev_find(struct net *net, __be32 addr)
155 {
156 	struct flowi fl = { .nl_u = { .ip4_u = { .daddr = addr } } };
157 	struct fib_result res;
158 	struct net_device *dev = NULL;
159 	struct fib_table *local_table;
160 
161 #ifdef CONFIG_IP_MULTIPLE_TABLES
162 	res.r = NULL;
163 #endif
164 
165 	local_table = fib_get_table(net, RT_TABLE_LOCAL);
166 	if (!local_table || fib_table_lookup(local_table, &fl, &res))
167 		return NULL;
168 	if (res.type != RTN_LOCAL)
169 		goto out;
170 	dev = FIB_RES_DEV(res);
171 
172 	if (dev)
173 		dev_hold(dev);
174 out:
175 	fib_res_put(&res);
176 	return dev;
177 }
178 
179 /*
180  * Find address type as if only "dev" was present in the system. If
181  * on_dev is NULL then all interfaces are taken into consideration.
182  */
183 static inline unsigned __inet_dev_addr_type(struct net *net,
184 					    const struct net_device *dev,
185 					    __be32 addr)
186 {
187 	struct flowi		fl = { .nl_u = { .ip4_u = { .daddr = addr } } };
188 	struct fib_result	res;
189 	unsigned ret = RTN_BROADCAST;
190 	struct fib_table *local_table;
191 
192 	if (ipv4_is_zeronet(addr) || ipv4_is_lbcast(addr))
193 		return RTN_BROADCAST;
194 	if (ipv4_is_multicast(addr))
195 		return RTN_MULTICAST;
196 
197 #ifdef CONFIG_IP_MULTIPLE_TABLES
198 	res.r = NULL;
199 #endif
200 
201 	local_table = fib_get_table(net, RT_TABLE_LOCAL);
202 	if (local_table) {
203 		ret = RTN_UNICAST;
204 		if (!fib_table_lookup(local_table, &fl, &res)) {
205 			if (!dev || dev == res.fi->fib_dev)
206 				ret = res.type;
207 			fib_res_put(&res);
208 		}
209 	}
210 	return ret;
211 }
212 
213 unsigned int inet_addr_type(struct net *net, __be32 addr)
214 {
215 	return __inet_dev_addr_type(net, NULL, addr);
216 }
217 
218 unsigned int inet_dev_addr_type(struct net *net, const struct net_device *dev,
219 				__be32 addr)
220 {
221        return __inet_dev_addr_type(net, dev, addr);
222 }
223 
224 /* Given (packet source, input interface) and optional (dst, oif, tos):
225    - (main) check, that source is valid i.e. not broadcast or our local
226      address.
227    - figure out what "logical" interface this packet arrived
228      and calculate "specific destination" address.
229    - check, that packet arrived from expected physical interface.
230  */
231 
232 int fib_validate_source(__be32 src, __be32 dst, u8 tos, int oif,
233 			struct net_device *dev, __be32 *spec_dst,
234 			u32 *itag, u32 mark)
235 {
236 	struct in_device *in_dev;
237 	struct flowi fl = { .nl_u = { .ip4_u =
238 				      { .daddr = src,
239 					.saddr = dst,
240 					.tos = tos } },
241 			    .mark = mark,
242 			    .iif = oif };
243 
244 	struct fib_result res;
245 	int no_addr, rpf, accept_local;
246 	int ret;
247 	struct net *net;
248 
249 	no_addr = rpf = accept_local = 0;
250 	rcu_read_lock();
251 	in_dev = __in_dev_get_rcu(dev);
252 	if (in_dev) {
253 		no_addr = in_dev->ifa_list == NULL;
254 		rpf = IN_DEV_RPFILTER(in_dev);
255 		accept_local = IN_DEV_ACCEPT_LOCAL(in_dev);
256 		if (mark && !IN_DEV_SRC_VMARK(in_dev))
257 			fl.mark = 0;
258 	}
259 	rcu_read_unlock();
260 
261 	if (in_dev == NULL)
262 		goto e_inval;
263 
264 	net = dev_net(dev);
265 	if (fib_lookup(net, &fl, &res))
266 		goto last_resort;
267 	if (res.type != RTN_UNICAST) {
268 		if (res.type != RTN_LOCAL || !accept_local)
269 			goto e_inval_res;
270 	}
271 	*spec_dst = FIB_RES_PREFSRC(res);
272 	fib_combine_itag(itag, &res);
273 #ifdef CONFIG_IP_ROUTE_MULTIPATH
274 	if (FIB_RES_DEV(res) == dev || res.fi->fib_nhs > 1)
275 #else
276 	if (FIB_RES_DEV(res) == dev)
277 #endif
278 	{
279 		ret = FIB_RES_NH(res).nh_scope >= RT_SCOPE_HOST;
280 		fib_res_put(&res);
281 		return ret;
282 	}
283 	fib_res_put(&res);
284 	if (no_addr)
285 		goto last_resort;
286 	if (rpf == 1)
287 		goto e_inval;
288 	fl.oif = dev->ifindex;
289 
290 	ret = 0;
291 	if (fib_lookup(net, &fl, &res) == 0) {
292 		if (res.type == RTN_UNICAST) {
293 			*spec_dst = FIB_RES_PREFSRC(res);
294 			ret = FIB_RES_NH(res).nh_scope >= RT_SCOPE_HOST;
295 		}
296 		fib_res_put(&res);
297 	}
298 	return ret;
299 
300 last_resort:
301 	if (rpf)
302 		goto e_inval;
303 	*spec_dst = inet_select_addr(dev, 0, RT_SCOPE_UNIVERSE);
304 	*itag = 0;
305 	return 0;
306 
307 e_inval_res:
308 	fib_res_put(&res);
309 e_inval:
310 	return -EINVAL;
311 }
312 
313 static inline __be32 sk_extract_addr(struct sockaddr *addr)
314 {
315 	return ((struct sockaddr_in *) addr)->sin_addr.s_addr;
316 }
317 
318 static int put_rtax(struct nlattr *mx, int len, int type, u32 value)
319 {
320 	struct nlattr *nla;
321 
322 	nla = (struct nlattr *) ((char *) mx + len);
323 	nla->nla_type = type;
324 	nla->nla_len = nla_attr_size(4);
325 	*(u32 *) nla_data(nla) = value;
326 
327 	return len + nla_total_size(4);
328 }
329 
330 static int rtentry_to_fib_config(struct net *net, int cmd, struct rtentry *rt,
331 				 struct fib_config *cfg)
332 {
333 	__be32 addr;
334 	int plen;
335 
336 	memset(cfg, 0, sizeof(*cfg));
337 	cfg->fc_nlinfo.nl_net = net;
338 
339 	if (rt->rt_dst.sa_family != AF_INET)
340 		return -EAFNOSUPPORT;
341 
342 	/*
343 	 * Check mask for validity:
344 	 * a) it must be contiguous.
345 	 * b) destination must have all host bits clear.
346 	 * c) if application forgot to set correct family (AF_INET),
347 	 *    reject request unless it is absolutely clear i.e.
348 	 *    both family and mask are zero.
349 	 */
350 	plen = 32;
351 	addr = sk_extract_addr(&rt->rt_dst);
352 	if (!(rt->rt_flags & RTF_HOST)) {
353 		__be32 mask = sk_extract_addr(&rt->rt_genmask);
354 
355 		if (rt->rt_genmask.sa_family != AF_INET) {
356 			if (mask || rt->rt_genmask.sa_family)
357 				return -EAFNOSUPPORT;
358 		}
359 
360 		if (bad_mask(mask, addr))
361 			return -EINVAL;
362 
363 		plen = inet_mask_len(mask);
364 	}
365 
366 	cfg->fc_dst_len = plen;
367 	cfg->fc_dst = addr;
368 
369 	if (cmd != SIOCDELRT) {
370 		cfg->fc_nlflags = NLM_F_CREATE;
371 		cfg->fc_protocol = RTPROT_BOOT;
372 	}
373 
374 	if (rt->rt_metric)
375 		cfg->fc_priority = rt->rt_metric - 1;
376 
377 	if (rt->rt_flags & RTF_REJECT) {
378 		cfg->fc_scope = RT_SCOPE_HOST;
379 		cfg->fc_type = RTN_UNREACHABLE;
380 		return 0;
381 	}
382 
383 	cfg->fc_scope = RT_SCOPE_NOWHERE;
384 	cfg->fc_type = RTN_UNICAST;
385 
386 	if (rt->rt_dev) {
387 		char *colon;
388 		struct net_device *dev;
389 		char devname[IFNAMSIZ];
390 
391 		if (copy_from_user(devname, rt->rt_dev, IFNAMSIZ-1))
392 			return -EFAULT;
393 
394 		devname[IFNAMSIZ-1] = 0;
395 		colon = strchr(devname, ':');
396 		if (colon)
397 			*colon = 0;
398 		dev = __dev_get_by_name(net, devname);
399 		if (!dev)
400 			return -ENODEV;
401 		cfg->fc_oif = dev->ifindex;
402 		if (colon) {
403 			struct in_ifaddr *ifa;
404 			struct in_device *in_dev = __in_dev_get_rtnl(dev);
405 			if (!in_dev)
406 				return -ENODEV;
407 			*colon = ':';
408 			for (ifa = in_dev->ifa_list; ifa; ifa = ifa->ifa_next)
409 				if (strcmp(ifa->ifa_label, devname) == 0)
410 					break;
411 			if (ifa == NULL)
412 				return -ENODEV;
413 			cfg->fc_prefsrc = ifa->ifa_local;
414 		}
415 	}
416 
417 	addr = sk_extract_addr(&rt->rt_gateway);
418 	if (rt->rt_gateway.sa_family == AF_INET && addr) {
419 		cfg->fc_gw = addr;
420 		if (rt->rt_flags & RTF_GATEWAY &&
421 		    inet_addr_type(net, addr) == RTN_UNICAST)
422 			cfg->fc_scope = RT_SCOPE_UNIVERSE;
423 	}
424 
425 	if (cmd == SIOCDELRT)
426 		return 0;
427 
428 	if (rt->rt_flags & RTF_GATEWAY && !cfg->fc_gw)
429 		return -EINVAL;
430 
431 	if (cfg->fc_scope == RT_SCOPE_NOWHERE)
432 		cfg->fc_scope = RT_SCOPE_LINK;
433 
434 	if (rt->rt_flags & (RTF_MTU | RTF_WINDOW | RTF_IRTT)) {
435 		struct nlattr *mx;
436 		int len = 0;
437 
438 		mx = kzalloc(3 * nla_total_size(4), GFP_KERNEL);
439 		if (mx == NULL)
440 			return -ENOMEM;
441 
442 		if (rt->rt_flags & RTF_MTU)
443 			len = put_rtax(mx, len, RTAX_ADVMSS, rt->rt_mtu - 40);
444 
445 		if (rt->rt_flags & RTF_WINDOW)
446 			len = put_rtax(mx, len, RTAX_WINDOW, rt->rt_window);
447 
448 		if (rt->rt_flags & RTF_IRTT)
449 			len = put_rtax(mx, len, RTAX_RTT, rt->rt_irtt << 3);
450 
451 		cfg->fc_mx = mx;
452 		cfg->fc_mx_len = len;
453 	}
454 
455 	return 0;
456 }
457 
458 /*
459  *	Handle IP routing ioctl calls. These are used to manipulate the routing tables
460  */
461 
462 int ip_rt_ioctl(struct net *net, unsigned int cmd, void __user *arg)
463 {
464 	struct fib_config cfg;
465 	struct rtentry rt;
466 	int err;
467 
468 	switch (cmd) {
469 	case SIOCADDRT:		/* Add a route */
470 	case SIOCDELRT:		/* Delete a route */
471 		if (!capable(CAP_NET_ADMIN))
472 			return -EPERM;
473 
474 		if (copy_from_user(&rt, arg, sizeof(rt)))
475 			return -EFAULT;
476 
477 		rtnl_lock();
478 		err = rtentry_to_fib_config(net, cmd, &rt, &cfg);
479 		if (err == 0) {
480 			struct fib_table *tb;
481 
482 			if (cmd == SIOCDELRT) {
483 				tb = fib_get_table(net, cfg.fc_table);
484 				if (tb)
485 					err = fib_table_delete(tb, &cfg);
486 				else
487 					err = -ESRCH;
488 			} else {
489 				tb = fib_new_table(net, cfg.fc_table);
490 				if (tb)
491 					err = fib_table_insert(tb, &cfg);
492 				else
493 					err = -ENOBUFS;
494 			}
495 
496 			/* allocated by rtentry_to_fib_config() */
497 			kfree(cfg.fc_mx);
498 		}
499 		rtnl_unlock();
500 		return err;
501 	}
502 	return -EINVAL;
503 }
504 
505 const struct nla_policy rtm_ipv4_policy[RTA_MAX+1] = {
506 	[RTA_DST]		= { .type = NLA_U32 },
507 	[RTA_SRC]		= { .type = NLA_U32 },
508 	[RTA_IIF]		= { .type = NLA_U32 },
509 	[RTA_OIF]		= { .type = NLA_U32 },
510 	[RTA_GATEWAY]		= { .type = NLA_U32 },
511 	[RTA_PRIORITY]		= { .type = NLA_U32 },
512 	[RTA_PREFSRC]		= { .type = NLA_U32 },
513 	[RTA_METRICS]		= { .type = NLA_NESTED },
514 	[RTA_MULTIPATH]		= { .len = sizeof(struct rtnexthop) },
515 	[RTA_FLOW]		= { .type = NLA_U32 },
516 };
517 
518 static int rtm_to_fib_config(struct net *net, struct sk_buff *skb,
519 			    struct nlmsghdr *nlh, struct fib_config *cfg)
520 {
521 	struct nlattr *attr;
522 	int err, remaining;
523 	struct rtmsg *rtm;
524 
525 	err = nlmsg_validate(nlh, sizeof(*rtm), RTA_MAX, rtm_ipv4_policy);
526 	if (err < 0)
527 		goto errout;
528 
529 	memset(cfg, 0, sizeof(*cfg));
530 
531 	rtm = nlmsg_data(nlh);
532 	cfg->fc_dst_len = rtm->rtm_dst_len;
533 	cfg->fc_tos = rtm->rtm_tos;
534 	cfg->fc_table = rtm->rtm_table;
535 	cfg->fc_protocol = rtm->rtm_protocol;
536 	cfg->fc_scope = rtm->rtm_scope;
537 	cfg->fc_type = rtm->rtm_type;
538 	cfg->fc_flags = rtm->rtm_flags;
539 	cfg->fc_nlflags = nlh->nlmsg_flags;
540 
541 	cfg->fc_nlinfo.pid = NETLINK_CB(skb).pid;
542 	cfg->fc_nlinfo.nlh = nlh;
543 	cfg->fc_nlinfo.nl_net = net;
544 
545 	if (cfg->fc_type > RTN_MAX) {
546 		err = -EINVAL;
547 		goto errout;
548 	}
549 
550 	nlmsg_for_each_attr(attr, nlh, sizeof(struct rtmsg), remaining) {
551 		switch (nla_type(attr)) {
552 		case RTA_DST:
553 			cfg->fc_dst = nla_get_be32(attr);
554 			break;
555 		case RTA_OIF:
556 			cfg->fc_oif = nla_get_u32(attr);
557 			break;
558 		case RTA_GATEWAY:
559 			cfg->fc_gw = nla_get_be32(attr);
560 			break;
561 		case RTA_PRIORITY:
562 			cfg->fc_priority = nla_get_u32(attr);
563 			break;
564 		case RTA_PREFSRC:
565 			cfg->fc_prefsrc = nla_get_be32(attr);
566 			break;
567 		case RTA_METRICS:
568 			cfg->fc_mx = nla_data(attr);
569 			cfg->fc_mx_len = nla_len(attr);
570 			break;
571 		case RTA_MULTIPATH:
572 			cfg->fc_mp = nla_data(attr);
573 			cfg->fc_mp_len = nla_len(attr);
574 			break;
575 		case RTA_FLOW:
576 			cfg->fc_flow = nla_get_u32(attr);
577 			break;
578 		case RTA_TABLE:
579 			cfg->fc_table = nla_get_u32(attr);
580 			break;
581 		}
582 	}
583 
584 	return 0;
585 errout:
586 	return err;
587 }
588 
589 static int inet_rtm_delroute(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
590 {
591 	struct net *net = sock_net(skb->sk);
592 	struct fib_config cfg;
593 	struct fib_table *tb;
594 	int err;
595 
596 	err = rtm_to_fib_config(net, skb, nlh, &cfg);
597 	if (err < 0)
598 		goto errout;
599 
600 	tb = fib_get_table(net, cfg.fc_table);
601 	if (tb == NULL) {
602 		err = -ESRCH;
603 		goto errout;
604 	}
605 
606 	err = fib_table_delete(tb, &cfg);
607 errout:
608 	return err;
609 }
610 
611 static int inet_rtm_newroute(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
612 {
613 	struct net *net = sock_net(skb->sk);
614 	struct fib_config cfg;
615 	struct fib_table *tb;
616 	int err;
617 
618 	err = rtm_to_fib_config(net, skb, nlh, &cfg);
619 	if (err < 0)
620 		goto errout;
621 
622 	tb = fib_new_table(net, cfg.fc_table);
623 	if (tb == NULL) {
624 		err = -ENOBUFS;
625 		goto errout;
626 	}
627 
628 	err = fib_table_insert(tb, &cfg);
629 errout:
630 	return err;
631 }
632 
633 static int inet_dump_fib(struct sk_buff *skb, struct netlink_callback *cb)
634 {
635 	struct net *net = sock_net(skb->sk);
636 	unsigned int h, s_h;
637 	unsigned int e = 0, s_e;
638 	struct fib_table *tb;
639 	struct hlist_node *node;
640 	struct hlist_head *head;
641 	int dumped = 0;
642 
643 	if (nlmsg_len(cb->nlh) >= sizeof(struct rtmsg) &&
644 	    ((struct rtmsg *) nlmsg_data(cb->nlh))->rtm_flags & RTM_F_CLONED)
645 		return ip_rt_dump(skb, cb);
646 
647 	s_h = cb->args[0];
648 	s_e = cb->args[1];
649 
650 	for (h = s_h; h < FIB_TABLE_HASHSZ; h++, s_e = 0) {
651 		e = 0;
652 		head = &net->ipv4.fib_table_hash[h];
653 		hlist_for_each_entry(tb, node, head, tb_hlist) {
654 			if (e < s_e)
655 				goto next;
656 			if (dumped)
657 				memset(&cb->args[2], 0, sizeof(cb->args) -
658 						 2 * sizeof(cb->args[0]));
659 			if (fib_table_dump(tb, skb, cb) < 0)
660 				goto out;
661 			dumped = 1;
662 next:
663 			e++;
664 		}
665 	}
666 out:
667 	cb->args[1] = e;
668 	cb->args[0] = h;
669 
670 	return skb->len;
671 }
672 
673 /* Prepare and feed intra-kernel routing request.
674    Really, it should be netlink message, but :-( netlink
675    can be not configured, so that we feed it directly
676    to fib engine. It is legal, because all events occur
677    only when netlink is already locked.
678  */
679 
680 static void fib_magic(int cmd, int type, __be32 dst, int dst_len, struct in_ifaddr *ifa)
681 {
682 	struct net *net = dev_net(ifa->ifa_dev->dev);
683 	struct fib_table *tb;
684 	struct fib_config cfg = {
685 		.fc_protocol = RTPROT_KERNEL,
686 		.fc_type = type,
687 		.fc_dst = dst,
688 		.fc_dst_len = dst_len,
689 		.fc_prefsrc = ifa->ifa_local,
690 		.fc_oif = ifa->ifa_dev->dev->ifindex,
691 		.fc_nlflags = NLM_F_CREATE | NLM_F_APPEND,
692 		.fc_nlinfo = {
693 			.nl_net = net,
694 		},
695 	};
696 
697 	if (type == RTN_UNICAST)
698 		tb = fib_new_table(net, RT_TABLE_MAIN);
699 	else
700 		tb = fib_new_table(net, RT_TABLE_LOCAL);
701 
702 	if (tb == NULL)
703 		return;
704 
705 	cfg.fc_table = tb->tb_id;
706 
707 	if (type != RTN_LOCAL)
708 		cfg.fc_scope = RT_SCOPE_LINK;
709 	else
710 		cfg.fc_scope = RT_SCOPE_HOST;
711 
712 	if (cmd == RTM_NEWROUTE)
713 		fib_table_insert(tb, &cfg);
714 	else
715 		fib_table_delete(tb, &cfg);
716 }
717 
718 void fib_add_ifaddr(struct in_ifaddr *ifa)
719 {
720 	struct in_device *in_dev = ifa->ifa_dev;
721 	struct net_device *dev = in_dev->dev;
722 	struct in_ifaddr *prim = ifa;
723 	__be32 mask = ifa->ifa_mask;
724 	__be32 addr = ifa->ifa_local;
725 	__be32 prefix = ifa->ifa_address&mask;
726 
727 	if (ifa->ifa_flags&IFA_F_SECONDARY) {
728 		prim = inet_ifa_byprefix(in_dev, prefix, mask);
729 		if (prim == NULL) {
730 			printk(KERN_WARNING "fib_add_ifaddr: bug: prim == NULL\n");
731 			return;
732 		}
733 	}
734 
735 	fib_magic(RTM_NEWROUTE, RTN_LOCAL, addr, 32, prim);
736 
737 	if (!(dev->flags&IFF_UP))
738 		return;
739 
740 	/* Add broadcast address, if it is explicitly assigned. */
741 	if (ifa->ifa_broadcast && ifa->ifa_broadcast != htonl(0xFFFFFFFF))
742 		fib_magic(RTM_NEWROUTE, RTN_BROADCAST, ifa->ifa_broadcast, 32, prim);
743 
744 	if (!ipv4_is_zeronet(prefix) && !(ifa->ifa_flags&IFA_F_SECONDARY) &&
745 	    (prefix != addr || ifa->ifa_prefixlen < 32)) {
746 		fib_magic(RTM_NEWROUTE, dev->flags&IFF_LOOPBACK ? RTN_LOCAL :
747 			  RTN_UNICAST, prefix, ifa->ifa_prefixlen, prim);
748 
749 		/* Add network specific broadcasts, when it takes a sense */
750 		if (ifa->ifa_prefixlen < 31) {
751 			fib_magic(RTM_NEWROUTE, RTN_BROADCAST, prefix, 32, prim);
752 			fib_magic(RTM_NEWROUTE, RTN_BROADCAST, prefix|~mask, 32, prim);
753 		}
754 	}
755 }
756 
757 static void fib_del_ifaddr(struct in_ifaddr *ifa)
758 {
759 	struct in_device *in_dev = ifa->ifa_dev;
760 	struct net_device *dev = in_dev->dev;
761 	struct in_ifaddr *ifa1;
762 	struct in_ifaddr *prim = ifa;
763 	__be32 brd = ifa->ifa_address|~ifa->ifa_mask;
764 	__be32 any = ifa->ifa_address&ifa->ifa_mask;
765 #define LOCAL_OK	1
766 #define BRD_OK		2
767 #define BRD0_OK		4
768 #define BRD1_OK		8
769 	unsigned ok = 0;
770 
771 	if (!(ifa->ifa_flags&IFA_F_SECONDARY))
772 		fib_magic(RTM_DELROUTE, dev->flags&IFF_LOOPBACK ? RTN_LOCAL :
773 			  RTN_UNICAST, any, ifa->ifa_prefixlen, prim);
774 	else {
775 		prim = inet_ifa_byprefix(in_dev, any, ifa->ifa_mask);
776 		if (prim == NULL) {
777 			printk(KERN_WARNING "fib_del_ifaddr: bug: prim == NULL\n");
778 			return;
779 		}
780 	}
781 
782 	/* Deletion is more complicated than add.
783 	   We should take care of not to delete too much :-)
784 
785 	   Scan address list to be sure that addresses are really gone.
786 	 */
787 
788 	for (ifa1 = in_dev->ifa_list; ifa1; ifa1 = ifa1->ifa_next) {
789 		if (ifa->ifa_local == ifa1->ifa_local)
790 			ok |= LOCAL_OK;
791 		if (ifa->ifa_broadcast == ifa1->ifa_broadcast)
792 			ok |= BRD_OK;
793 		if (brd == ifa1->ifa_broadcast)
794 			ok |= BRD1_OK;
795 		if (any == ifa1->ifa_broadcast)
796 			ok |= BRD0_OK;
797 	}
798 
799 	if (!(ok&BRD_OK))
800 		fib_magic(RTM_DELROUTE, RTN_BROADCAST, ifa->ifa_broadcast, 32, prim);
801 	if (!(ok&BRD1_OK))
802 		fib_magic(RTM_DELROUTE, RTN_BROADCAST, brd, 32, prim);
803 	if (!(ok&BRD0_OK))
804 		fib_magic(RTM_DELROUTE, RTN_BROADCAST, any, 32, prim);
805 	if (!(ok&LOCAL_OK)) {
806 		fib_magic(RTM_DELROUTE, RTN_LOCAL, ifa->ifa_local, 32, prim);
807 
808 		/* Check, that this local address finally disappeared. */
809 		if (inet_addr_type(dev_net(dev), ifa->ifa_local) != RTN_LOCAL) {
810 			/* And the last, but not the least thing.
811 			   We must flush stray FIB entries.
812 
813 			   First of all, we scan fib_info list searching
814 			   for stray nexthop entries, then ignite fib_flush.
815 			*/
816 			if (fib_sync_down_addr(dev_net(dev), ifa->ifa_local))
817 				fib_flush(dev_net(dev));
818 		}
819 	}
820 #undef LOCAL_OK
821 #undef BRD_OK
822 #undef BRD0_OK
823 #undef BRD1_OK
824 }
825 
826 static void nl_fib_lookup(struct fib_result_nl *frn, struct fib_table *tb )
827 {
828 
829 	struct fib_result       res;
830 	struct flowi            fl = { .mark = frn->fl_mark,
831 				       .nl_u = { .ip4_u = { .daddr = frn->fl_addr,
832 							    .tos = frn->fl_tos,
833 							    .scope = frn->fl_scope } } };
834 
835 #ifdef CONFIG_IP_MULTIPLE_TABLES
836 	res.r = NULL;
837 #endif
838 
839 	frn->err = -ENOENT;
840 	if (tb) {
841 		local_bh_disable();
842 
843 		frn->tb_id = tb->tb_id;
844 		frn->err = fib_table_lookup(tb, &fl, &res);
845 
846 		if (!frn->err) {
847 			frn->prefixlen = res.prefixlen;
848 			frn->nh_sel = res.nh_sel;
849 			frn->type = res.type;
850 			frn->scope = res.scope;
851 			fib_res_put(&res);
852 		}
853 		local_bh_enable();
854 	}
855 }
856 
857 static void nl_fib_input(struct sk_buff *skb)
858 {
859 	struct net *net;
860 	struct fib_result_nl *frn;
861 	struct nlmsghdr *nlh;
862 	struct fib_table *tb;
863 	u32 pid;
864 
865 	net = sock_net(skb->sk);
866 	nlh = nlmsg_hdr(skb);
867 	if (skb->len < NLMSG_SPACE(0) || skb->len < nlh->nlmsg_len ||
868 	    nlh->nlmsg_len < NLMSG_LENGTH(sizeof(*frn)))
869 		return;
870 
871 	skb = skb_clone(skb, GFP_KERNEL);
872 	if (skb == NULL)
873 		return;
874 	nlh = nlmsg_hdr(skb);
875 
876 	frn = (struct fib_result_nl *) NLMSG_DATA(nlh);
877 	tb = fib_get_table(net, frn->tb_id_in);
878 
879 	nl_fib_lookup(frn, tb);
880 
881 	pid = NETLINK_CB(skb).pid;       /* pid of sending process */
882 	NETLINK_CB(skb).pid = 0;         /* from kernel */
883 	NETLINK_CB(skb).dst_group = 0;  /* unicast */
884 	netlink_unicast(net->ipv4.fibnl, skb, pid, MSG_DONTWAIT);
885 }
886 
887 static int __net_init nl_fib_lookup_init(struct net *net)
888 {
889 	struct sock *sk;
890 	sk = netlink_kernel_create(net, NETLINK_FIB_LOOKUP, 0,
891 				   nl_fib_input, NULL, THIS_MODULE);
892 	if (sk == NULL)
893 		return -EAFNOSUPPORT;
894 	net->ipv4.fibnl = sk;
895 	return 0;
896 }
897 
898 static void nl_fib_lookup_exit(struct net *net)
899 {
900 	netlink_kernel_release(net->ipv4.fibnl);
901 	net->ipv4.fibnl = NULL;
902 }
903 
904 static void fib_disable_ip(struct net_device *dev, int force, int delay)
905 {
906 	if (fib_sync_down_dev(dev, force))
907 		fib_flush(dev_net(dev));
908 	rt_cache_flush(dev_net(dev), delay);
909 	arp_ifdown(dev);
910 }
911 
912 static int fib_inetaddr_event(struct notifier_block *this, unsigned long event, void *ptr)
913 {
914 	struct in_ifaddr *ifa = (struct in_ifaddr *)ptr;
915 	struct net_device *dev = ifa->ifa_dev->dev;
916 
917 	switch (event) {
918 	case NETDEV_UP:
919 		fib_add_ifaddr(ifa);
920 #ifdef CONFIG_IP_ROUTE_MULTIPATH
921 		fib_sync_up(dev);
922 #endif
923 		rt_cache_flush(dev_net(dev), -1);
924 		break;
925 	case NETDEV_DOWN:
926 		fib_del_ifaddr(ifa);
927 		if (ifa->ifa_dev->ifa_list == NULL) {
928 			/* Last address was deleted from this interface.
929 			   Disable IP.
930 			 */
931 			fib_disable_ip(dev, 1, 0);
932 		} else {
933 			rt_cache_flush(dev_net(dev), -1);
934 		}
935 		break;
936 	}
937 	return NOTIFY_DONE;
938 }
939 
940 static int fib_netdev_event(struct notifier_block *this, unsigned long event, void *ptr)
941 {
942 	struct net_device *dev = ptr;
943 	struct in_device *in_dev = __in_dev_get_rtnl(dev);
944 
945 	if (event == NETDEV_UNREGISTER) {
946 		fib_disable_ip(dev, 2, -1);
947 		return NOTIFY_DONE;
948 	}
949 
950 	if (!in_dev)
951 		return NOTIFY_DONE;
952 
953 	switch (event) {
954 	case NETDEV_UP:
955 		for_ifa(in_dev) {
956 			fib_add_ifaddr(ifa);
957 		} endfor_ifa(in_dev);
958 #ifdef CONFIG_IP_ROUTE_MULTIPATH
959 		fib_sync_up(dev);
960 #endif
961 		rt_cache_flush(dev_net(dev), -1);
962 		break;
963 	case NETDEV_DOWN:
964 		fib_disable_ip(dev, 0, 0);
965 		break;
966 	case NETDEV_CHANGEMTU:
967 	case NETDEV_CHANGE:
968 		rt_cache_flush(dev_net(dev), 0);
969 		break;
970 	case NETDEV_UNREGISTER_BATCH:
971 		rt_cache_flush_batch();
972 		break;
973 	}
974 	return NOTIFY_DONE;
975 }
976 
977 static struct notifier_block fib_inetaddr_notifier = {
978 	.notifier_call = fib_inetaddr_event,
979 };
980 
981 static struct notifier_block fib_netdev_notifier = {
982 	.notifier_call = fib_netdev_event,
983 };
984 
985 static int __net_init ip_fib_net_init(struct net *net)
986 {
987 	int err;
988 	unsigned int i;
989 
990 	net->ipv4.fib_table_hash = kzalloc(
991 			sizeof(struct hlist_head)*FIB_TABLE_HASHSZ, GFP_KERNEL);
992 	if (net->ipv4.fib_table_hash == NULL)
993 		return -ENOMEM;
994 
995 	for (i = 0; i < FIB_TABLE_HASHSZ; i++)
996 		INIT_HLIST_HEAD(&net->ipv4.fib_table_hash[i]);
997 
998 	err = fib4_rules_init(net);
999 	if (err < 0)
1000 		goto fail;
1001 	return 0;
1002 
1003 fail:
1004 	kfree(net->ipv4.fib_table_hash);
1005 	return err;
1006 }
1007 
1008 static void ip_fib_net_exit(struct net *net)
1009 {
1010 	unsigned int i;
1011 
1012 #ifdef CONFIG_IP_MULTIPLE_TABLES
1013 	fib4_rules_exit(net);
1014 #endif
1015 
1016 	for (i = 0; i < FIB_TABLE_HASHSZ; i++) {
1017 		struct fib_table *tb;
1018 		struct hlist_head *head;
1019 		struct hlist_node *node, *tmp;
1020 
1021 		head = &net->ipv4.fib_table_hash[i];
1022 		hlist_for_each_entry_safe(tb, node, tmp, head, tb_hlist) {
1023 			hlist_del(node);
1024 			fib_table_flush(tb);
1025 			kfree(tb);
1026 		}
1027 	}
1028 	kfree(net->ipv4.fib_table_hash);
1029 }
1030 
1031 static int __net_init fib_net_init(struct net *net)
1032 {
1033 	int error;
1034 
1035 	error = ip_fib_net_init(net);
1036 	if (error < 0)
1037 		goto out;
1038 	error = nl_fib_lookup_init(net);
1039 	if (error < 0)
1040 		goto out_nlfl;
1041 	error = fib_proc_init(net);
1042 	if (error < 0)
1043 		goto out_proc;
1044 out:
1045 	return error;
1046 
1047 out_proc:
1048 	nl_fib_lookup_exit(net);
1049 out_nlfl:
1050 	ip_fib_net_exit(net);
1051 	goto out;
1052 }
1053 
1054 static void __net_exit fib_net_exit(struct net *net)
1055 {
1056 	fib_proc_exit(net);
1057 	nl_fib_lookup_exit(net);
1058 	ip_fib_net_exit(net);
1059 }
1060 
1061 static struct pernet_operations fib_net_ops = {
1062 	.init = fib_net_init,
1063 	.exit = fib_net_exit,
1064 };
1065 
1066 void __init ip_fib_init(void)
1067 {
1068 	rtnl_register(PF_INET, RTM_NEWROUTE, inet_rtm_newroute, NULL);
1069 	rtnl_register(PF_INET, RTM_DELROUTE, inet_rtm_delroute, NULL);
1070 	rtnl_register(PF_INET, RTM_GETROUTE, NULL, inet_dump_fib);
1071 
1072 	register_pernet_subsys(&fib_net_ops);
1073 	register_netdevice_notifier(&fib_netdev_notifier);
1074 	register_inetaddr_notifier(&fib_inetaddr_notifier);
1075 
1076 	fib_hash_init();
1077 }
1078 
1079 EXPORT_SYMBOL(inet_addr_type);
1080 EXPORT_SYMBOL(inet_dev_addr_type);
1081 EXPORT_SYMBOL(ip_dev_find);
1082