xref: /linux/net/ipv6/route.c (revision 2c1ba398ac9da3305815f6ae8e95ae2b9fd3b5ff)
1 /*
2  *	Linux INET6 implementation
3  *	FIB front-end.
4  *
5  *	Authors:
6  *	Pedro Roque		<roque@di.fc.ul.pt>
7  *
8  *	This program is free software; you can redistribute it and/or
9  *      modify it under the terms of the GNU General Public License
10  *      as published by the Free Software Foundation; either version
11  *      2 of the License, or (at your option) any later version.
12  */
13 
14 /*	Changes:
15  *
16  *	YOSHIFUJI Hideaki @USAGI
17  *		reworked default router selection.
18  *		- respect outgoing interface
19  *		- select from (probably) reachable routers (i.e.
20  *		routers in REACHABLE, STALE, DELAY or PROBE states).
21  *		- always select the same router if it is (probably)
22  *		reachable.  otherwise, round-robin the list.
23  *	Ville Nuorvala
24  *		Fixed routing subtrees.
25  */
26 
27 #include <linux/capability.h>
28 #include <linux/errno.h>
29 #include <linux/types.h>
30 #include <linux/times.h>
31 #include <linux/socket.h>
32 #include <linux/sockios.h>
33 #include <linux/net.h>
34 #include <linux/route.h>
35 #include <linux/netdevice.h>
36 #include <linux/in6.h>
37 #include <linux/mroute6.h>
38 #include <linux/init.h>
39 #include <linux/if_arp.h>
40 #include <linux/proc_fs.h>
41 #include <linux/seq_file.h>
42 #include <linux/nsproxy.h>
43 #include <linux/slab.h>
44 #include <net/net_namespace.h>
45 #include <net/snmp.h>
46 #include <net/ipv6.h>
47 #include <net/ip6_fib.h>
48 #include <net/ip6_route.h>
49 #include <net/ndisc.h>
50 #include <net/addrconf.h>
51 #include <net/tcp.h>
52 #include <linux/rtnetlink.h>
53 #include <net/dst.h>
54 #include <net/xfrm.h>
55 #include <net/netevent.h>
56 #include <net/netlink.h>
57 
58 #include <asm/uaccess.h>
59 
60 #ifdef CONFIG_SYSCTL
61 #include <linux/sysctl.h>
62 #endif
63 
64 /* Set to 3 to get tracing. */
65 #define RT6_DEBUG 2
66 
67 #if RT6_DEBUG >= 3
68 #define RDBG(x) printk x
69 #define RT6_TRACE(x...) printk(KERN_DEBUG x)
70 #else
71 #define RDBG(x)
72 #define RT6_TRACE(x...) do { ; } while (0)
73 #endif
74 
75 static struct rt6_info *ip6_rt_copy(const struct rt6_info *ort,
76 				    const struct in6_addr *dest);
77 static struct dst_entry	*ip6_dst_check(struct dst_entry *dst, u32 cookie);
78 static unsigned int	 ip6_default_advmss(const struct dst_entry *dst);
79 static unsigned int	 ip6_default_mtu(const struct dst_entry *dst);
80 static struct dst_entry *ip6_negative_advice(struct dst_entry *);
81 static void		ip6_dst_destroy(struct dst_entry *);
82 static void		ip6_dst_ifdown(struct dst_entry *,
83 				       struct net_device *dev, int how);
84 static int		 ip6_dst_gc(struct dst_ops *ops);
85 
86 static int		ip6_pkt_discard(struct sk_buff *skb);
87 static int		ip6_pkt_discard_out(struct sk_buff *skb);
88 static void		ip6_link_failure(struct sk_buff *skb);
89 static void		ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu);
90 
91 #ifdef CONFIG_IPV6_ROUTE_INFO
92 static struct rt6_info *rt6_add_route_info(struct net *net,
93 					   const struct in6_addr *prefix, int prefixlen,
94 					   const struct in6_addr *gwaddr, int ifindex,
95 					   unsigned pref);
96 static struct rt6_info *rt6_get_route_info(struct net *net,
97 					   const struct in6_addr *prefix, int prefixlen,
98 					   const struct in6_addr *gwaddr, int ifindex);
99 #endif
100 
101 static u32 *ipv6_cow_metrics(struct dst_entry *dst, unsigned long old)
102 {
103 	struct rt6_info *rt = (struct rt6_info *) dst;
104 	struct inet_peer *peer;
105 	u32 *p = NULL;
106 
107 	if (!rt->rt6i_peer)
108 		rt6_bind_peer(rt, 1);
109 
110 	peer = rt->rt6i_peer;
111 	if (peer) {
112 		u32 *old_p = __DST_METRICS_PTR(old);
113 		unsigned long prev, new;
114 
115 		p = peer->metrics;
116 		if (inet_metrics_new(peer))
117 			memcpy(p, old_p, sizeof(u32) * RTAX_MAX);
118 
119 		new = (unsigned long) p;
120 		prev = cmpxchg(&dst->_metrics, old, new);
121 
122 		if (prev != old) {
123 			p = __DST_METRICS_PTR(prev);
124 			if (prev & DST_METRICS_READ_ONLY)
125 				p = NULL;
126 		}
127 	}
128 	return p;
129 }
130 
131 static struct neighbour *ip6_neigh_lookup(const struct dst_entry *dst, const void *daddr)
132 {
133 	return __neigh_lookup_errno(&nd_tbl, daddr, dst->dev);
134 }
135 
136 static struct dst_ops ip6_dst_ops_template = {
137 	.family			=	AF_INET6,
138 	.protocol		=	cpu_to_be16(ETH_P_IPV6),
139 	.gc			=	ip6_dst_gc,
140 	.gc_thresh		=	1024,
141 	.check			=	ip6_dst_check,
142 	.default_advmss		=	ip6_default_advmss,
143 	.default_mtu		=	ip6_default_mtu,
144 	.cow_metrics		=	ipv6_cow_metrics,
145 	.destroy		=	ip6_dst_destroy,
146 	.ifdown			=	ip6_dst_ifdown,
147 	.negative_advice	=	ip6_negative_advice,
148 	.link_failure		=	ip6_link_failure,
149 	.update_pmtu		=	ip6_rt_update_pmtu,
150 	.local_out		=	__ip6_local_out,
151 	.neigh_lookup		=	ip6_neigh_lookup,
152 };
153 
154 static unsigned int ip6_blackhole_default_mtu(const struct dst_entry *dst)
155 {
156 	return 0;
157 }
158 
159 static void ip6_rt_blackhole_update_pmtu(struct dst_entry *dst, u32 mtu)
160 {
161 }
162 
163 static u32 *ip6_rt_blackhole_cow_metrics(struct dst_entry *dst,
164 					 unsigned long old)
165 {
166 	return NULL;
167 }
168 
169 static struct dst_ops ip6_dst_blackhole_ops = {
170 	.family			=	AF_INET6,
171 	.protocol		=	cpu_to_be16(ETH_P_IPV6),
172 	.destroy		=	ip6_dst_destroy,
173 	.check			=	ip6_dst_check,
174 	.default_mtu		=	ip6_blackhole_default_mtu,
175 	.default_advmss		=	ip6_default_advmss,
176 	.update_pmtu		=	ip6_rt_blackhole_update_pmtu,
177 	.cow_metrics		=	ip6_rt_blackhole_cow_metrics,
178 	.neigh_lookup		=	ip6_neigh_lookup,
179 };
180 
181 static const u32 ip6_template_metrics[RTAX_MAX] = {
182 	[RTAX_HOPLIMIT - 1] = 255,
183 };
184 
185 static struct rt6_info ip6_null_entry_template = {
186 	.dst = {
187 		.__refcnt	= ATOMIC_INIT(1),
188 		.__use		= 1,
189 		.obsolete	= -1,
190 		.error		= -ENETUNREACH,
191 		.input		= ip6_pkt_discard,
192 		.output		= ip6_pkt_discard_out,
193 	},
194 	.rt6i_flags	= (RTF_REJECT | RTF_NONEXTHOP),
195 	.rt6i_protocol  = RTPROT_KERNEL,
196 	.rt6i_metric	= ~(u32) 0,
197 	.rt6i_ref	= ATOMIC_INIT(1),
198 };
199 
200 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
201 
202 static int ip6_pkt_prohibit(struct sk_buff *skb);
203 static int ip6_pkt_prohibit_out(struct sk_buff *skb);
204 
205 static struct rt6_info ip6_prohibit_entry_template = {
206 	.dst = {
207 		.__refcnt	= ATOMIC_INIT(1),
208 		.__use		= 1,
209 		.obsolete	= -1,
210 		.error		= -EACCES,
211 		.input		= ip6_pkt_prohibit,
212 		.output		= ip6_pkt_prohibit_out,
213 	},
214 	.rt6i_flags	= (RTF_REJECT | RTF_NONEXTHOP),
215 	.rt6i_protocol  = RTPROT_KERNEL,
216 	.rt6i_metric	= ~(u32) 0,
217 	.rt6i_ref	= ATOMIC_INIT(1),
218 };
219 
220 static struct rt6_info ip6_blk_hole_entry_template = {
221 	.dst = {
222 		.__refcnt	= ATOMIC_INIT(1),
223 		.__use		= 1,
224 		.obsolete	= -1,
225 		.error		= -EINVAL,
226 		.input		= dst_discard,
227 		.output		= dst_discard,
228 	},
229 	.rt6i_flags	= (RTF_REJECT | RTF_NONEXTHOP),
230 	.rt6i_protocol  = RTPROT_KERNEL,
231 	.rt6i_metric	= ~(u32) 0,
232 	.rt6i_ref	= ATOMIC_INIT(1),
233 };
234 
235 #endif
236 
237 /* allocate dst with ip6_dst_ops */
238 static inline struct rt6_info *ip6_dst_alloc(struct dst_ops *ops,
239 					     struct net_device *dev,
240 					     int flags)
241 {
242 	struct rt6_info *rt = dst_alloc(ops, dev, 0, 0, flags);
243 
244 	memset(&rt->rt6i_table, 0, sizeof(*rt) - sizeof(struct dst_entry));
245 
246 	return rt;
247 }
248 
249 static void ip6_dst_destroy(struct dst_entry *dst)
250 {
251 	struct rt6_info *rt = (struct rt6_info *)dst;
252 	struct inet6_dev *idev = rt->rt6i_idev;
253 	struct inet_peer *peer = rt->rt6i_peer;
254 
255 	if (idev != NULL) {
256 		rt->rt6i_idev = NULL;
257 		in6_dev_put(idev);
258 	}
259 	if (peer) {
260 		rt->rt6i_peer = NULL;
261 		inet_putpeer(peer);
262 	}
263 }
264 
265 static atomic_t __rt6_peer_genid = ATOMIC_INIT(0);
266 
267 static u32 rt6_peer_genid(void)
268 {
269 	return atomic_read(&__rt6_peer_genid);
270 }
271 
272 void rt6_bind_peer(struct rt6_info *rt, int create)
273 {
274 	struct inet_peer *peer;
275 
276 	peer = inet_getpeer_v6(&rt->rt6i_dst.addr, create);
277 	if (peer && cmpxchg(&rt->rt6i_peer, NULL, peer) != NULL)
278 		inet_putpeer(peer);
279 	else
280 		rt->rt6i_peer_genid = rt6_peer_genid();
281 }
282 
283 static void ip6_dst_ifdown(struct dst_entry *dst, struct net_device *dev,
284 			   int how)
285 {
286 	struct rt6_info *rt = (struct rt6_info *)dst;
287 	struct inet6_dev *idev = rt->rt6i_idev;
288 	struct net_device *loopback_dev =
289 		dev_net(dev)->loopback_dev;
290 
291 	if (dev != loopback_dev && idev != NULL && idev->dev == dev) {
292 		struct inet6_dev *loopback_idev =
293 			in6_dev_get(loopback_dev);
294 		if (loopback_idev != NULL) {
295 			rt->rt6i_idev = loopback_idev;
296 			in6_dev_put(idev);
297 		}
298 	}
299 }
300 
301 static __inline__ int rt6_check_expired(const struct rt6_info *rt)
302 {
303 	return (rt->rt6i_flags & RTF_EXPIRES) &&
304 		time_after(jiffies, rt->rt6i_expires);
305 }
306 
307 static inline int rt6_need_strict(const struct in6_addr *daddr)
308 {
309 	return ipv6_addr_type(daddr) &
310 		(IPV6_ADDR_MULTICAST | IPV6_ADDR_LINKLOCAL | IPV6_ADDR_LOOPBACK);
311 }
312 
313 /*
314  *	Route lookup. Any table->tb6_lock is implied.
315  */
316 
317 static inline struct rt6_info *rt6_device_match(struct net *net,
318 						    struct rt6_info *rt,
319 						    const struct in6_addr *saddr,
320 						    int oif,
321 						    int flags)
322 {
323 	struct rt6_info *local = NULL;
324 	struct rt6_info *sprt;
325 
326 	if (!oif && ipv6_addr_any(saddr))
327 		goto out;
328 
329 	for (sprt = rt; sprt; sprt = sprt->dst.rt6_next) {
330 		struct net_device *dev = sprt->rt6i_dev;
331 
332 		if (oif) {
333 			if (dev->ifindex == oif)
334 				return sprt;
335 			if (dev->flags & IFF_LOOPBACK) {
336 				if (sprt->rt6i_idev == NULL ||
337 				    sprt->rt6i_idev->dev->ifindex != oif) {
338 					if (flags & RT6_LOOKUP_F_IFACE && oif)
339 						continue;
340 					if (local && (!oif ||
341 						      local->rt6i_idev->dev->ifindex == oif))
342 						continue;
343 				}
344 				local = sprt;
345 			}
346 		} else {
347 			if (ipv6_chk_addr(net, saddr, dev,
348 					  flags & RT6_LOOKUP_F_IFACE))
349 				return sprt;
350 		}
351 	}
352 
353 	if (oif) {
354 		if (local)
355 			return local;
356 
357 		if (flags & RT6_LOOKUP_F_IFACE)
358 			return net->ipv6.ip6_null_entry;
359 	}
360 out:
361 	return rt;
362 }
363 
364 #ifdef CONFIG_IPV6_ROUTER_PREF
365 static void rt6_probe(struct rt6_info *rt)
366 {
367 	struct neighbour *neigh;
368 	/*
369 	 * Okay, this does not seem to be appropriate
370 	 * for now, however, we need to check if it
371 	 * is really so; aka Router Reachability Probing.
372 	 *
373 	 * Router Reachability Probe MUST be rate-limited
374 	 * to no more than one per minute.
375 	 */
376 	rcu_read_lock();
377 	neigh = rt ? dst_get_neighbour(&rt->dst) : NULL;
378 	if (!neigh || (neigh->nud_state & NUD_VALID))
379 		goto out;
380 	read_lock_bh(&neigh->lock);
381 	if (!(neigh->nud_state & NUD_VALID) &&
382 	    time_after(jiffies, neigh->updated + rt->rt6i_idev->cnf.rtr_probe_interval)) {
383 		struct in6_addr mcaddr;
384 		struct in6_addr *target;
385 
386 		neigh->updated = jiffies;
387 		read_unlock_bh(&neigh->lock);
388 
389 		target = (struct in6_addr *)&neigh->primary_key;
390 		addrconf_addr_solict_mult(target, &mcaddr);
391 		ndisc_send_ns(rt->rt6i_dev, NULL, target, &mcaddr, NULL);
392 	} else {
393 		read_unlock_bh(&neigh->lock);
394 	}
395 out:
396 	rcu_read_unlock();
397 }
398 #else
399 static inline void rt6_probe(struct rt6_info *rt)
400 {
401 }
402 #endif
403 
404 /*
405  * Default Router Selection (RFC 2461 6.3.6)
406  */
407 static inline int rt6_check_dev(struct rt6_info *rt, int oif)
408 {
409 	struct net_device *dev = rt->rt6i_dev;
410 	if (!oif || dev->ifindex == oif)
411 		return 2;
412 	if ((dev->flags & IFF_LOOPBACK) &&
413 	    rt->rt6i_idev && rt->rt6i_idev->dev->ifindex == oif)
414 		return 1;
415 	return 0;
416 }
417 
418 static inline int rt6_check_neigh(struct rt6_info *rt)
419 {
420 	struct neighbour *neigh;
421 	int m;
422 
423 	rcu_read_lock();
424 	neigh = dst_get_neighbour(&rt->dst);
425 	if (rt->rt6i_flags & RTF_NONEXTHOP ||
426 	    !(rt->rt6i_flags & RTF_GATEWAY))
427 		m = 1;
428 	else if (neigh) {
429 		read_lock_bh(&neigh->lock);
430 		if (neigh->nud_state & NUD_VALID)
431 			m = 2;
432 #ifdef CONFIG_IPV6_ROUTER_PREF
433 		else if (neigh->nud_state & NUD_FAILED)
434 			m = 0;
435 #endif
436 		else
437 			m = 1;
438 		read_unlock_bh(&neigh->lock);
439 	} else
440 		m = 0;
441 	rcu_read_unlock();
442 	return m;
443 }
444 
445 static int rt6_score_route(struct rt6_info *rt, int oif,
446 			   int strict)
447 {
448 	int m, n;
449 
450 	m = rt6_check_dev(rt, oif);
451 	if (!m && (strict & RT6_LOOKUP_F_IFACE))
452 		return -1;
453 #ifdef CONFIG_IPV6_ROUTER_PREF
454 	m |= IPV6_DECODE_PREF(IPV6_EXTRACT_PREF(rt->rt6i_flags)) << 2;
455 #endif
456 	n = rt6_check_neigh(rt);
457 	if (!n && (strict & RT6_LOOKUP_F_REACHABLE))
458 		return -1;
459 	return m;
460 }
461 
462 static struct rt6_info *find_match(struct rt6_info *rt, int oif, int strict,
463 				   int *mpri, struct rt6_info *match)
464 {
465 	int m;
466 
467 	if (rt6_check_expired(rt))
468 		goto out;
469 
470 	m = rt6_score_route(rt, oif, strict);
471 	if (m < 0)
472 		goto out;
473 
474 	if (m > *mpri) {
475 		if (strict & RT6_LOOKUP_F_REACHABLE)
476 			rt6_probe(match);
477 		*mpri = m;
478 		match = rt;
479 	} else if (strict & RT6_LOOKUP_F_REACHABLE) {
480 		rt6_probe(rt);
481 	}
482 
483 out:
484 	return match;
485 }
486 
487 static struct rt6_info *find_rr_leaf(struct fib6_node *fn,
488 				     struct rt6_info *rr_head,
489 				     u32 metric, int oif, int strict)
490 {
491 	struct rt6_info *rt, *match;
492 	int mpri = -1;
493 
494 	match = NULL;
495 	for (rt = rr_head; rt && rt->rt6i_metric == metric;
496 	     rt = rt->dst.rt6_next)
497 		match = find_match(rt, oif, strict, &mpri, match);
498 	for (rt = fn->leaf; rt && rt != rr_head && rt->rt6i_metric == metric;
499 	     rt = rt->dst.rt6_next)
500 		match = find_match(rt, oif, strict, &mpri, match);
501 
502 	return match;
503 }
504 
505 static struct rt6_info *rt6_select(struct fib6_node *fn, int oif, int strict)
506 {
507 	struct rt6_info *match, *rt0;
508 	struct net *net;
509 
510 	RT6_TRACE("%s(fn->leaf=%p, oif=%d)\n",
511 		  __func__, fn->leaf, oif);
512 
513 	rt0 = fn->rr_ptr;
514 	if (!rt0)
515 		fn->rr_ptr = rt0 = fn->leaf;
516 
517 	match = find_rr_leaf(fn, rt0, rt0->rt6i_metric, oif, strict);
518 
519 	if (!match &&
520 	    (strict & RT6_LOOKUP_F_REACHABLE)) {
521 		struct rt6_info *next = rt0->dst.rt6_next;
522 
523 		/* no entries matched; do round-robin */
524 		if (!next || next->rt6i_metric != rt0->rt6i_metric)
525 			next = fn->leaf;
526 
527 		if (next != rt0)
528 			fn->rr_ptr = next;
529 	}
530 
531 	RT6_TRACE("%s() => %p\n",
532 		  __func__, match);
533 
534 	net = dev_net(rt0->rt6i_dev);
535 	return match ? match : net->ipv6.ip6_null_entry;
536 }
537 
538 #ifdef CONFIG_IPV6_ROUTE_INFO
539 int rt6_route_rcv(struct net_device *dev, u8 *opt, int len,
540 		  const struct in6_addr *gwaddr)
541 {
542 	struct net *net = dev_net(dev);
543 	struct route_info *rinfo = (struct route_info *) opt;
544 	struct in6_addr prefix_buf, *prefix;
545 	unsigned int pref;
546 	unsigned long lifetime;
547 	struct rt6_info *rt;
548 
549 	if (len < sizeof(struct route_info)) {
550 		return -EINVAL;
551 	}
552 
553 	/* Sanity check for prefix_len and length */
554 	if (rinfo->length > 3) {
555 		return -EINVAL;
556 	} else if (rinfo->prefix_len > 128) {
557 		return -EINVAL;
558 	} else if (rinfo->prefix_len > 64) {
559 		if (rinfo->length < 2) {
560 			return -EINVAL;
561 		}
562 	} else if (rinfo->prefix_len > 0) {
563 		if (rinfo->length < 1) {
564 			return -EINVAL;
565 		}
566 	}
567 
568 	pref = rinfo->route_pref;
569 	if (pref == ICMPV6_ROUTER_PREF_INVALID)
570 		return -EINVAL;
571 
572 	lifetime = addrconf_timeout_fixup(ntohl(rinfo->lifetime), HZ);
573 
574 	if (rinfo->length == 3)
575 		prefix = (struct in6_addr *)rinfo->prefix;
576 	else {
577 		/* this function is safe */
578 		ipv6_addr_prefix(&prefix_buf,
579 				 (struct in6_addr *)rinfo->prefix,
580 				 rinfo->prefix_len);
581 		prefix = &prefix_buf;
582 	}
583 
584 	rt = rt6_get_route_info(net, prefix, rinfo->prefix_len, gwaddr,
585 				dev->ifindex);
586 
587 	if (rt && !lifetime) {
588 		ip6_del_rt(rt);
589 		rt = NULL;
590 	}
591 
592 	if (!rt && lifetime)
593 		rt = rt6_add_route_info(net, prefix, rinfo->prefix_len, gwaddr, dev->ifindex,
594 					pref);
595 	else if (rt)
596 		rt->rt6i_flags = RTF_ROUTEINFO |
597 				 (rt->rt6i_flags & ~RTF_PREF_MASK) | RTF_PREF(pref);
598 
599 	if (rt) {
600 		if (!addrconf_finite_timeout(lifetime)) {
601 			rt->rt6i_flags &= ~RTF_EXPIRES;
602 		} else {
603 			rt->rt6i_expires = jiffies + HZ * lifetime;
604 			rt->rt6i_flags |= RTF_EXPIRES;
605 		}
606 		dst_release(&rt->dst);
607 	}
608 	return 0;
609 }
610 #endif
611 
612 #define BACKTRACK(__net, saddr)			\
613 do { \
614 	if (rt == __net->ipv6.ip6_null_entry) {	\
615 		struct fib6_node *pn; \
616 		while (1) { \
617 			if (fn->fn_flags & RTN_TL_ROOT) \
618 				goto out; \
619 			pn = fn->parent; \
620 			if (FIB6_SUBTREE(pn) && FIB6_SUBTREE(pn) != fn) \
621 				fn = fib6_lookup(FIB6_SUBTREE(pn), NULL, saddr); \
622 			else \
623 				fn = pn; \
624 			if (fn->fn_flags & RTN_RTINFO) \
625 				goto restart; \
626 		} \
627 	} \
628 } while(0)
629 
630 static struct rt6_info *ip6_pol_route_lookup(struct net *net,
631 					     struct fib6_table *table,
632 					     struct flowi6 *fl6, int flags)
633 {
634 	struct fib6_node *fn;
635 	struct rt6_info *rt;
636 
637 	read_lock_bh(&table->tb6_lock);
638 	fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
639 restart:
640 	rt = fn->leaf;
641 	rt = rt6_device_match(net, rt, &fl6->saddr, fl6->flowi6_oif, flags);
642 	BACKTRACK(net, &fl6->saddr);
643 out:
644 	dst_use(&rt->dst, jiffies);
645 	read_unlock_bh(&table->tb6_lock);
646 	return rt;
647 
648 }
649 
650 struct rt6_info *rt6_lookup(struct net *net, const struct in6_addr *daddr,
651 			    const struct in6_addr *saddr, int oif, int strict)
652 {
653 	struct flowi6 fl6 = {
654 		.flowi6_oif = oif,
655 		.daddr = *daddr,
656 	};
657 	struct dst_entry *dst;
658 	int flags = strict ? RT6_LOOKUP_F_IFACE : 0;
659 
660 	if (saddr) {
661 		memcpy(&fl6.saddr, saddr, sizeof(*saddr));
662 		flags |= RT6_LOOKUP_F_HAS_SADDR;
663 	}
664 
665 	dst = fib6_rule_lookup(net, &fl6, flags, ip6_pol_route_lookup);
666 	if (dst->error == 0)
667 		return (struct rt6_info *) dst;
668 
669 	dst_release(dst);
670 
671 	return NULL;
672 }
673 
674 EXPORT_SYMBOL(rt6_lookup);
675 
676 /* ip6_ins_rt is called with FREE table->tb6_lock.
677    It takes new route entry, the addition fails by any reason the
678    route is freed. In any case, if caller does not hold it, it may
679    be destroyed.
680  */
681 
682 static int __ip6_ins_rt(struct rt6_info *rt, struct nl_info *info)
683 {
684 	int err;
685 	struct fib6_table *table;
686 
687 	table = rt->rt6i_table;
688 	write_lock_bh(&table->tb6_lock);
689 	err = fib6_add(&table->tb6_root, rt, info);
690 	write_unlock_bh(&table->tb6_lock);
691 
692 	return err;
693 }
694 
695 int ip6_ins_rt(struct rt6_info *rt)
696 {
697 	struct nl_info info = {
698 		.nl_net = dev_net(rt->rt6i_dev),
699 	};
700 	return __ip6_ins_rt(rt, &info);
701 }
702 
703 static struct rt6_info *rt6_alloc_cow(const struct rt6_info *ort,
704 				      const struct in6_addr *daddr,
705 				      const struct in6_addr *saddr)
706 {
707 	struct rt6_info *rt;
708 
709 	/*
710 	 *	Clone the route.
711 	 */
712 
713 	rt = ip6_rt_copy(ort, daddr);
714 
715 	if (rt) {
716 		struct neighbour *neigh;
717 		int attempts = !in_softirq();
718 
719 		if (!(rt->rt6i_flags&RTF_GATEWAY)) {
720 			if (rt->rt6i_dst.plen != 128 &&
721 			    ipv6_addr_equal(&ort->rt6i_dst.addr, daddr))
722 				rt->rt6i_flags |= RTF_ANYCAST;
723 			ipv6_addr_copy(&rt->rt6i_gateway, daddr);
724 		}
725 
726 		rt->rt6i_dst.plen = 128;
727 		rt->rt6i_flags |= RTF_CACHE;
728 		rt->dst.flags |= DST_HOST;
729 
730 #ifdef CONFIG_IPV6_SUBTREES
731 		if (rt->rt6i_src.plen && saddr) {
732 			ipv6_addr_copy(&rt->rt6i_src.addr, saddr);
733 			rt->rt6i_src.plen = 128;
734 		}
735 #endif
736 
737 	retry:
738 		neigh = ndisc_get_neigh(rt->rt6i_dev, &rt->rt6i_gateway);
739 		if (IS_ERR(neigh)) {
740 			struct net *net = dev_net(rt->rt6i_dev);
741 			int saved_rt_min_interval =
742 				net->ipv6.sysctl.ip6_rt_gc_min_interval;
743 			int saved_rt_elasticity =
744 				net->ipv6.sysctl.ip6_rt_gc_elasticity;
745 
746 			if (attempts-- > 0) {
747 				net->ipv6.sysctl.ip6_rt_gc_elasticity = 1;
748 				net->ipv6.sysctl.ip6_rt_gc_min_interval = 0;
749 
750 				ip6_dst_gc(&net->ipv6.ip6_dst_ops);
751 
752 				net->ipv6.sysctl.ip6_rt_gc_elasticity =
753 					saved_rt_elasticity;
754 				net->ipv6.sysctl.ip6_rt_gc_min_interval =
755 					saved_rt_min_interval;
756 				goto retry;
757 			}
758 
759 			if (net_ratelimit())
760 				printk(KERN_WARNING
761 				       "ipv6: Neighbour table overflow.\n");
762 			dst_free(&rt->dst);
763 			return NULL;
764 		}
765 		dst_set_neighbour(&rt->dst, neigh);
766 
767 	}
768 
769 	return rt;
770 }
771 
772 static struct rt6_info *rt6_alloc_clone(struct rt6_info *ort,
773 					const struct in6_addr *daddr)
774 {
775 	struct rt6_info *rt = ip6_rt_copy(ort, daddr);
776 
777 	if (rt) {
778 		rt->rt6i_dst.plen = 128;
779 		rt->rt6i_flags |= RTF_CACHE;
780 		rt->dst.flags |= DST_HOST;
781 		dst_set_neighbour(&rt->dst, neigh_clone(dst_get_neighbour_raw(&ort->dst)));
782 	}
783 	return rt;
784 }
785 
786 static struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table, int oif,
787 				      struct flowi6 *fl6, int flags)
788 {
789 	struct fib6_node *fn;
790 	struct rt6_info *rt, *nrt;
791 	int strict = 0;
792 	int attempts = 3;
793 	int err;
794 	int reachable = net->ipv6.devconf_all->forwarding ? 0 : RT6_LOOKUP_F_REACHABLE;
795 
796 	strict |= flags & RT6_LOOKUP_F_IFACE;
797 
798 relookup:
799 	read_lock_bh(&table->tb6_lock);
800 
801 restart_2:
802 	fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
803 
804 restart:
805 	rt = rt6_select(fn, oif, strict | reachable);
806 
807 	BACKTRACK(net, &fl6->saddr);
808 	if (rt == net->ipv6.ip6_null_entry ||
809 	    rt->rt6i_flags & RTF_CACHE)
810 		goto out;
811 
812 	dst_hold(&rt->dst);
813 	read_unlock_bh(&table->tb6_lock);
814 
815 	if (!dst_get_neighbour_raw(&rt->dst) && !(rt->rt6i_flags & RTF_NONEXTHOP))
816 		nrt = rt6_alloc_cow(rt, &fl6->daddr, &fl6->saddr);
817 	else if (!(rt->dst.flags & DST_HOST))
818 		nrt = rt6_alloc_clone(rt, &fl6->daddr);
819 	else
820 		goto out2;
821 
822 	dst_release(&rt->dst);
823 	rt = nrt ? : net->ipv6.ip6_null_entry;
824 
825 	dst_hold(&rt->dst);
826 	if (nrt) {
827 		err = ip6_ins_rt(nrt);
828 		if (!err)
829 			goto out2;
830 	}
831 
832 	if (--attempts <= 0)
833 		goto out2;
834 
835 	/*
836 	 * Race condition! In the gap, when table->tb6_lock was
837 	 * released someone could insert this route.  Relookup.
838 	 */
839 	dst_release(&rt->dst);
840 	goto relookup;
841 
842 out:
843 	if (reachable) {
844 		reachable = 0;
845 		goto restart_2;
846 	}
847 	dst_hold(&rt->dst);
848 	read_unlock_bh(&table->tb6_lock);
849 out2:
850 	rt->dst.lastuse = jiffies;
851 	rt->dst.__use++;
852 
853 	return rt;
854 }
855 
856 static struct rt6_info *ip6_pol_route_input(struct net *net, struct fib6_table *table,
857 					    struct flowi6 *fl6, int flags)
858 {
859 	return ip6_pol_route(net, table, fl6->flowi6_iif, fl6, flags);
860 }
861 
862 void ip6_route_input(struct sk_buff *skb)
863 {
864 	const struct ipv6hdr *iph = ipv6_hdr(skb);
865 	struct net *net = dev_net(skb->dev);
866 	int flags = RT6_LOOKUP_F_HAS_SADDR;
867 	struct flowi6 fl6 = {
868 		.flowi6_iif = skb->dev->ifindex,
869 		.daddr = iph->daddr,
870 		.saddr = iph->saddr,
871 		.flowlabel = (* (__be32 *) iph)&IPV6_FLOWINFO_MASK,
872 		.flowi6_mark = skb->mark,
873 		.flowi6_proto = iph->nexthdr,
874 	};
875 
876 	if (rt6_need_strict(&iph->daddr) && skb->dev->type != ARPHRD_PIMREG)
877 		flags |= RT6_LOOKUP_F_IFACE;
878 
879 	skb_dst_set(skb, fib6_rule_lookup(net, &fl6, flags, ip6_pol_route_input));
880 }
881 
882 static struct rt6_info *ip6_pol_route_output(struct net *net, struct fib6_table *table,
883 					     struct flowi6 *fl6, int flags)
884 {
885 	return ip6_pol_route(net, table, fl6->flowi6_oif, fl6, flags);
886 }
887 
888 struct dst_entry * ip6_route_output(struct net *net, const struct sock *sk,
889 				    struct flowi6 *fl6)
890 {
891 	int flags = 0;
892 
893 	if ((sk && sk->sk_bound_dev_if) || rt6_need_strict(&fl6->daddr))
894 		flags |= RT6_LOOKUP_F_IFACE;
895 
896 	if (!ipv6_addr_any(&fl6->saddr))
897 		flags |= RT6_LOOKUP_F_HAS_SADDR;
898 	else if (sk)
899 		flags |= rt6_srcprefs2flags(inet6_sk(sk)->srcprefs);
900 
901 	return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_output);
902 }
903 
904 EXPORT_SYMBOL(ip6_route_output);
905 
906 struct dst_entry *ip6_blackhole_route(struct net *net, struct dst_entry *dst_orig)
907 {
908 	struct rt6_info *rt, *ort = (struct rt6_info *) dst_orig;
909 	struct dst_entry *new = NULL;
910 
911 	rt = dst_alloc(&ip6_dst_blackhole_ops, ort->dst.dev, 1, 0, 0);
912 	if (rt) {
913 		memset(&rt->rt6i_table, 0, sizeof(*rt) - sizeof(struct dst_entry));
914 
915 		new = &rt->dst;
916 
917 		new->__use = 1;
918 		new->input = dst_discard;
919 		new->output = dst_discard;
920 
921 		if (dst_metrics_read_only(&ort->dst))
922 			new->_metrics = ort->dst._metrics;
923 		else
924 			dst_copy_metrics(new, &ort->dst);
925 		rt->rt6i_idev = ort->rt6i_idev;
926 		if (rt->rt6i_idev)
927 			in6_dev_hold(rt->rt6i_idev);
928 		rt->rt6i_expires = 0;
929 
930 		ipv6_addr_copy(&rt->rt6i_gateway, &ort->rt6i_gateway);
931 		rt->rt6i_flags = ort->rt6i_flags & ~RTF_EXPIRES;
932 		rt->rt6i_metric = 0;
933 
934 		memcpy(&rt->rt6i_dst, &ort->rt6i_dst, sizeof(struct rt6key));
935 #ifdef CONFIG_IPV6_SUBTREES
936 		memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
937 #endif
938 
939 		dst_free(new);
940 	}
941 
942 	dst_release(dst_orig);
943 	return new ? new : ERR_PTR(-ENOMEM);
944 }
945 
946 /*
947  *	Destination cache support functions
948  */
949 
950 static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie)
951 {
952 	struct rt6_info *rt;
953 
954 	rt = (struct rt6_info *) dst;
955 
956 	if (rt->rt6i_node && (rt->rt6i_node->fn_sernum == cookie)) {
957 		if (rt->rt6i_peer_genid != rt6_peer_genid()) {
958 			if (!rt->rt6i_peer)
959 				rt6_bind_peer(rt, 0);
960 			rt->rt6i_peer_genid = rt6_peer_genid();
961 		}
962 		return dst;
963 	}
964 	return NULL;
965 }
966 
967 static struct dst_entry *ip6_negative_advice(struct dst_entry *dst)
968 {
969 	struct rt6_info *rt = (struct rt6_info *) dst;
970 
971 	if (rt) {
972 		if (rt->rt6i_flags & RTF_CACHE) {
973 			if (rt6_check_expired(rt)) {
974 				ip6_del_rt(rt);
975 				dst = NULL;
976 			}
977 		} else {
978 			dst_release(dst);
979 			dst = NULL;
980 		}
981 	}
982 	return dst;
983 }
984 
985 static void ip6_link_failure(struct sk_buff *skb)
986 {
987 	struct rt6_info *rt;
988 
989 	icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_ADDR_UNREACH, 0);
990 
991 	rt = (struct rt6_info *) skb_dst(skb);
992 	if (rt) {
993 		if (rt->rt6i_flags&RTF_CACHE) {
994 			dst_set_expires(&rt->dst, 0);
995 			rt->rt6i_flags |= RTF_EXPIRES;
996 		} else if (rt->rt6i_node && (rt->rt6i_flags & RTF_DEFAULT))
997 			rt->rt6i_node->fn_sernum = -1;
998 	}
999 }
1000 
1001 static void ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu)
1002 {
1003 	struct rt6_info *rt6 = (struct rt6_info*)dst;
1004 
1005 	if (mtu < dst_mtu(dst) && rt6->rt6i_dst.plen == 128) {
1006 		rt6->rt6i_flags |= RTF_MODIFIED;
1007 		if (mtu < IPV6_MIN_MTU) {
1008 			u32 features = dst_metric(dst, RTAX_FEATURES);
1009 			mtu = IPV6_MIN_MTU;
1010 			features |= RTAX_FEATURE_ALLFRAG;
1011 			dst_metric_set(dst, RTAX_FEATURES, features);
1012 		}
1013 		dst_metric_set(dst, RTAX_MTU, mtu);
1014 	}
1015 }
1016 
1017 static unsigned int ip6_default_advmss(const struct dst_entry *dst)
1018 {
1019 	struct net_device *dev = dst->dev;
1020 	unsigned int mtu = dst_mtu(dst);
1021 	struct net *net = dev_net(dev);
1022 
1023 	mtu -= sizeof(struct ipv6hdr) + sizeof(struct tcphdr);
1024 
1025 	if (mtu < net->ipv6.sysctl.ip6_rt_min_advmss)
1026 		mtu = net->ipv6.sysctl.ip6_rt_min_advmss;
1027 
1028 	/*
1029 	 * Maximal non-jumbo IPv6 payload is IPV6_MAXPLEN and
1030 	 * corresponding MSS is IPV6_MAXPLEN - tcp_header_size.
1031 	 * IPV6_MAXPLEN is also valid and means: "any MSS,
1032 	 * rely only on pmtu discovery"
1033 	 */
1034 	if (mtu > IPV6_MAXPLEN - sizeof(struct tcphdr))
1035 		mtu = IPV6_MAXPLEN;
1036 	return mtu;
1037 }
1038 
1039 static unsigned int ip6_default_mtu(const struct dst_entry *dst)
1040 {
1041 	unsigned int mtu = IPV6_MIN_MTU;
1042 	struct inet6_dev *idev;
1043 
1044 	rcu_read_lock();
1045 	idev = __in6_dev_get(dst->dev);
1046 	if (idev)
1047 		mtu = idev->cnf.mtu6;
1048 	rcu_read_unlock();
1049 
1050 	return mtu;
1051 }
1052 
1053 static struct dst_entry *icmp6_dst_gc_list;
1054 static DEFINE_SPINLOCK(icmp6_dst_lock);
1055 
1056 struct dst_entry *icmp6_dst_alloc(struct net_device *dev,
1057 				  struct neighbour *neigh,
1058 				  const struct in6_addr *addr)
1059 {
1060 	struct rt6_info *rt;
1061 	struct inet6_dev *idev = in6_dev_get(dev);
1062 	struct net *net = dev_net(dev);
1063 
1064 	if (unlikely(idev == NULL))
1065 		return NULL;
1066 
1067 	rt = ip6_dst_alloc(&net->ipv6.ip6_dst_ops, dev, 0);
1068 	if (unlikely(rt == NULL)) {
1069 		in6_dev_put(idev);
1070 		goto out;
1071 	}
1072 
1073 	if (neigh)
1074 		neigh_hold(neigh);
1075 	else {
1076 		neigh = ndisc_get_neigh(dev, addr);
1077 		if (IS_ERR(neigh))
1078 			neigh = NULL;
1079 	}
1080 
1081 	rt->rt6i_idev     = idev;
1082 	dst_set_neighbour(&rt->dst, neigh);
1083 	atomic_set(&rt->dst.__refcnt, 1);
1084 	ipv6_addr_copy(&rt->rt6i_dst.addr, addr);
1085 	dst_metric_set(&rt->dst, RTAX_HOPLIMIT, 255);
1086 	rt->dst.output  = ip6_output;
1087 
1088 	spin_lock_bh(&icmp6_dst_lock);
1089 	rt->dst.next = icmp6_dst_gc_list;
1090 	icmp6_dst_gc_list = &rt->dst;
1091 	spin_unlock_bh(&icmp6_dst_lock);
1092 
1093 	fib6_force_start_gc(net);
1094 
1095 out:
1096 	return &rt->dst;
1097 }
1098 
1099 int icmp6_dst_gc(void)
1100 {
1101 	struct dst_entry *dst, **pprev;
1102 	int more = 0;
1103 
1104 	spin_lock_bh(&icmp6_dst_lock);
1105 	pprev = &icmp6_dst_gc_list;
1106 
1107 	while ((dst = *pprev) != NULL) {
1108 		if (!atomic_read(&dst->__refcnt)) {
1109 			*pprev = dst->next;
1110 			dst_free(dst);
1111 		} else {
1112 			pprev = &dst->next;
1113 			++more;
1114 		}
1115 	}
1116 
1117 	spin_unlock_bh(&icmp6_dst_lock);
1118 
1119 	return more;
1120 }
1121 
1122 static void icmp6_clean_all(int (*func)(struct rt6_info *rt, void *arg),
1123 			    void *arg)
1124 {
1125 	struct dst_entry *dst, **pprev;
1126 
1127 	spin_lock_bh(&icmp6_dst_lock);
1128 	pprev = &icmp6_dst_gc_list;
1129 	while ((dst = *pprev) != NULL) {
1130 		struct rt6_info *rt = (struct rt6_info *) dst;
1131 		if (func(rt, arg)) {
1132 			*pprev = dst->next;
1133 			dst_free(dst);
1134 		} else {
1135 			pprev = &dst->next;
1136 		}
1137 	}
1138 	spin_unlock_bh(&icmp6_dst_lock);
1139 }
1140 
1141 static int ip6_dst_gc(struct dst_ops *ops)
1142 {
1143 	unsigned long now = jiffies;
1144 	struct net *net = container_of(ops, struct net, ipv6.ip6_dst_ops);
1145 	int rt_min_interval = net->ipv6.sysctl.ip6_rt_gc_min_interval;
1146 	int rt_max_size = net->ipv6.sysctl.ip6_rt_max_size;
1147 	int rt_elasticity = net->ipv6.sysctl.ip6_rt_gc_elasticity;
1148 	int rt_gc_timeout = net->ipv6.sysctl.ip6_rt_gc_timeout;
1149 	unsigned long rt_last_gc = net->ipv6.ip6_rt_last_gc;
1150 	int entries;
1151 
1152 	entries = dst_entries_get_fast(ops);
1153 	if (time_after(rt_last_gc + rt_min_interval, now) &&
1154 	    entries <= rt_max_size)
1155 		goto out;
1156 
1157 	net->ipv6.ip6_rt_gc_expire++;
1158 	fib6_run_gc(net->ipv6.ip6_rt_gc_expire, net);
1159 	net->ipv6.ip6_rt_last_gc = now;
1160 	entries = dst_entries_get_slow(ops);
1161 	if (entries < ops->gc_thresh)
1162 		net->ipv6.ip6_rt_gc_expire = rt_gc_timeout>>1;
1163 out:
1164 	net->ipv6.ip6_rt_gc_expire -= net->ipv6.ip6_rt_gc_expire>>rt_elasticity;
1165 	return entries > rt_max_size;
1166 }
1167 
1168 /* Clean host part of a prefix. Not necessary in radix tree,
1169    but results in cleaner routing tables.
1170 
1171    Remove it only when all the things will work!
1172  */
1173 
1174 int ip6_dst_hoplimit(struct dst_entry *dst)
1175 {
1176 	int hoplimit = dst_metric_raw(dst, RTAX_HOPLIMIT);
1177 	if (hoplimit == 0) {
1178 		struct net_device *dev = dst->dev;
1179 		struct inet6_dev *idev;
1180 
1181 		rcu_read_lock();
1182 		idev = __in6_dev_get(dev);
1183 		if (idev)
1184 			hoplimit = idev->cnf.hop_limit;
1185 		else
1186 			hoplimit = dev_net(dev)->ipv6.devconf_all->hop_limit;
1187 		rcu_read_unlock();
1188 	}
1189 	return hoplimit;
1190 }
1191 EXPORT_SYMBOL(ip6_dst_hoplimit);
1192 
1193 /*
1194  *
1195  */
1196 
1197 int ip6_route_add(struct fib6_config *cfg)
1198 {
1199 	int err;
1200 	struct net *net = cfg->fc_nlinfo.nl_net;
1201 	struct rt6_info *rt = NULL;
1202 	struct net_device *dev = NULL;
1203 	struct inet6_dev *idev = NULL;
1204 	struct fib6_table *table;
1205 	int addr_type;
1206 
1207 	if (cfg->fc_dst_len > 128 || cfg->fc_src_len > 128)
1208 		return -EINVAL;
1209 #ifndef CONFIG_IPV6_SUBTREES
1210 	if (cfg->fc_src_len)
1211 		return -EINVAL;
1212 #endif
1213 	if (cfg->fc_ifindex) {
1214 		err = -ENODEV;
1215 		dev = dev_get_by_index(net, cfg->fc_ifindex);
1216 		if (!dev)
1217 			goto out;
1218 		idev = in6_dev_get(dev);
1219 		if (!idev)
1220 			goto out;
1221 	}
1222 
1223 	if (cfg->fc_metric == 0)
1224 		cfg->fc_metric = IP6_RT_PRIO_USER;
1225 
1226 	table = fib6_new_table(net, cfg->fc_table);
1227 	if (table == NULL) {
1228 		err = -ENOBUFS;
1229 		goto out;
1230 	}
1231 
1232 	rt = ip6_dst_alloc(&net->ipv6.ip6_dst_ops, NULL, DST_NOCOUNT);
1233 
1234 	if (rt == NULL) {
1235 		err = -ENOMEM;
1236 		goto out;
1237 	}
1238 
1239 	rt->dst.obsolete = -1;
1240 	rt->rt6i_expires = (cfg->fc_flags & RTF_EXPIRES) ?
1241 				jiffies + clock_t_to_jiffies(cfg->fc_expires) :
1242 				0;
1243 
1244 	if (cfg->fc_protocol == RTPROT_UNSPEC)
1245 		cfg->fc_protocol = RTPROT_BOOT;
1246 	rt->rt6i_protocol = cfg->fc_protocol;
1247 
1248 	addr_type = ipv6_addr_type(&cfg->fc_dst);
1249 
1250 	if (addr_type & IPV6_ADDR_MULTICAST)
1251 		rt->dst.input = ip6_mc_input;
1252 	else if (cfg->fc_flags & RTF_LOCAL)
1253 		rt->dst.input = ip6_input;
1254 	else
1255 		rt->dst.input = ip6_forward;
1256 
1257 	rt->dst.output = ip6_output;
1258 
1259 	ipv6_addr_prefix(&rt->rt6i_dst.addr, &cfg->fc_dst, cfg->fc_dst_len);
1260 	rt->rt6i_dst.plen = cfg->fc_dst_len;
1261 	if (rt->rt6i_dst.plen == 128)
1262 	       rt->dst.flags |= DST_HOST;
1263 
1264 #ifdef CONFIG_IPV6_SUBTREES
1265 	ipv6_addr_prefix(&rt->rt6i_src.addr, &cfg->fc_src, cfg->fc_src_len);
1266 	rt->rt6i_src.plen = cfg->fc_src_len;
1267 #endif
1268 
1269 	rt->rt6i_metric = cfg->fc_metric;
1270 
1271 	/* We cannot add true routes via loopback here,
1272 	   they would result in kernel looping; promote them to reject routes
1273 	 */
1274 	if ((cfg->fc_flags & RTF_REJECT) ||
1275 	    (dev && (dev->flags&IFF_LOOPBACK) && !(addr_type&IPV6_ADDR_LOOPBACK)
1276 					      && !(cfg->fc_flags&RTF_LOCAL))) {
1277 		/* hold loopback dev/idev if we haven't done so. */
1278 		if (dev != net->loopback_dev) {
1279 			if (dev) {
1280 				dev_put(dev);
1281 				in6_dev_put(idev);
1282 			}
1283 			dev = net->loopback_dev;
1284 			dev_hold(dev);
1285 			idev = in6_dev_get(dev);
1286 			if (!idev) {
1287 				err = -ENODEV;
1288 				goto out;
1289 			}
1290 		}
1291 		rt->dst.output = ip6_pkt_discard_out;
1292 		rt->dst.input = ip6_pkt_discard;
1293 		rt->dst.error = -ENETUNREACH;
1294 		rt->rt6i_flags = RTF_REJECT|RTF_NONEXTHOP;
1295 		goto install_route;
1296 	}
1297 
1298 	if (cfg->fc_flags & RTF_GATEWAY) {
1299 		const struct in6_addr *gw_addr;
1300 		int gwa_type;
1301 
1302 		gw_addr = &cfg->fc_gateway;
1303 		ipv6_addr_copy(&rt->rt6i_gateway, gw_addr);
1304 		gwa_type = ipv6_addr_type(gw_addr);
1305 
1306 		if (gwa_type != (IPV6_ADDR_LINKLOCAL|IPV6_ADDR_UNICAST)) {
1307 			struct rt6_info *grt;
1308 
1309 			/* IPv6 strictly inhibits using not link-local
1310 			   addresses as nexthop address.
1311 			   Otherwise, router will not able to send redirects.
1312 			   It is very good, but in some (rare!) circumstances
1313 			   (SIT, PtP, NBMA NOARP links) it is handy to allow
1314 			   some exceptions. --ANK
1315 			 */
1316 			err = -EINVAL;
1317 			if (!(gwa_type&IPV6_ADDR_UNICAST))
1318 				goto out;
1319 
1320 			grt = rt6_lookup(net, gw_addr, NULL, cfg->fc_ifindex, 1);
1321 
1322 			err = -EHOSTUNREACH;
1323 			if (grt == NULL)
1324 				goto out;
1325 			if (dev) {
1326 				if (dev != grt->rt6i_dev) {
1327 					dst_release(&grt->dst);
1328 					goto out;
1329 				}
1330 			} else {
1331 				dev = grt->rt6i_dev;
1332 				idev = grt->rt6i_idev;
1333 				dev_hold(dev);
1334 				in6_dev_hold(grt->rt6i_idev);
1335 			}
1336 			if (!(grt->rt6i_flags&RTF_GATEWAY))
1337 				err = 0;
1338 			dst_release(&grt->dst);
1339 
1340 			if (err)
1341 				goto out;
1342 		}
1343 		err = -EINVAL;
1344 		if (dev == NULL || (dev->flags&IFF_LOOPBACK))
1345 			goto out;
1346 	}
1347 
1348 	err = -ENODEV;
1349 	if (dev == NULL)
1350 		goto out;
1351 
1352 	if (!ipv6_addr_any(&cfg->fc_prefsrc)) {
1353 		if (!ipv6_chk_addr(net, &cfg->fc_prefsrc, dev, 0)) {
1354 			err = -EINVAL;
1355 			goto out;
1356 		}
1357 		ipv6_addr_copy(&rt->rt6i_prefsrc.addr, &cfg->fc_prefsrc);
1358 		rt->rt6i_prefsrc.plen = 128;
1359 	} else
1360 		rt->rt6i_prefsrc.plen = 0;
1361 
1362 	if (cfg->fc_flags & (RTF_GATEWAY | RTF_NONEXTHOP)) {
1363 		struct neighbour *n = __neigh_lookup_errno(&nd_tbl, &rt->rt6i_gateway, dev);
1364 		if (IS_ERR(n)) {
1365 			err = PTR_ERR(n);
1366 			goto out;
1367 		}
1368 		dst_set_neighbour(&rt->dst, n);
1369 	}
1370 
1371 	rt->rt6i_flags = cfg->fc_flags;
1372 
1373 install_route:
1374 	if (cfg->fc_mx) {
1375 		struct nlattr *nla;
1376 		int remaining;
1377 
1378 		nla_for_each_attr(nla, cfg->fc_mx, cfg->fc_mx_len, remaining) {
1379 			int type = nla_type(nla);
1380 
1381 			if (type) {
1382 				if (type > RTAX_MAX) {
1383 					err = -EINVAL;
1384 					goto out;
1385 				}
1386 
1387 				dst_metric_set(&rt->dst, type, nla_get_u32(nla));
1388 			}
1389 		}
1390 	}
1391 
1392 	rt->dst.dev = dev;
1393 	rt->rt6i_idev = idev;
1394 	rt->rt6i_table = table;
1395 
1396 	cfg->fc_nlinfo.nl_net = dev_net(dev);
1397 
1398 	return __ip6_ins_rt(rt, &cfg->fc_nlinfo);
1399 
1400 out:
1401 	if (dev)
1402 		dev_put(dev);
1403 	if (idev)
1404 		in6_dev_put(idev);
1405 	if (rt)
1406 		dst_free(&rt->dst);
1407 	return err;
1408 }
1409 
1410 static int __ip6_del_rt(struct rt6_info *rt, struct nl_info *info)
1411 {
1412 	int err;
1413 	struct fib6_table *table;
1414 	struct net *net = dev_net(rt->rt6i_dev);
1415 
1416 	if (rt == net->ipv6.ip6_null_entry)
1417 		return -ENOENT;
1418 
1419 	table = rt->rt6i_table;
1420 	write_lock_bh(&table->tb6_lock);
1421 
1422 	err = fib6_del(rt, info);
1423 	dst_release(&rt->dst);
1424 
1425 	write_unlock_bh(&table->tb6_lock);
1426 
1427 	return err;
1428 }
1429 
1430 int ip6_del_rt(struct rt6_info *rt)
1431 {
1432 	struct nl_info info = {
1433 		.nl_net = dev_net(rt->rt6i_dev),
1434 	};
1435 	return __ip6_del_rt(rt, &info);
1436 }
1437 
1438 static int ip6_route_del(struct fib6_config *cfg)
1439 {
1440 	struct fib6_table *table;
1441 	struct fib6_node *fn;
1442 	struct rt6_info *rt;
1443 	int err = -ESRCH;
1444 
1445 	table = fib6_get_table(cfg->fc_nlinfo.nl_net, cfg->fc_table);
1446 	if (table == NULL)
1447 		return err;
1448 
1449 	read_lock_bh(&table->tb6_lock);
1450 
1451 	fn = fib6_locate(&table->tb6_root,
1452 			 &cfg->fc_dst, cfg->fc_dst_len,
1453 			 &cfg->fc_src, cfg->fc_src_len);
1454 
1455 	if (fn) {
1456 		for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
1457 			if (cfg->fc_ifindex &&
1458 			    (rt->rt6i_dev == NULL ||
1459 			     rt->rt6i_dev->ifindex != cfg->fc_ifindex))
1460 				continue;
1461 			if (cfg->fc_flags & RTF_GATEWAY &&
1462 			    !ipv6_addr_equal(&cfg->fc_gateway, &rt->rt6i_gateway))
1463 				continue;
1464 			if (cfg->fc_metric && cfg->fc_metric != rt->rt6i_metric)
1465 				continue;
1466 			dst_hold(&rt->dst);
1467 			read_unlock_bh(&table->tb6_lock);
1468 
1469 			return __ip6_del_rt(rt, &cfg->fc_nlinfo);
1470 		}
1471 	}
1472 	read_unlock_bh(&table->tb6_lock);
1473 
1474 	return err;
1475 }
1476 
1477 /*
1478  *	Handle redirects
1479  */
1480 struct ip6rd_flowi {
1481 	struct flowi6 fl6;
1482 	struct in6_addr gateway;
1483 };
1484 
1485 static struct rt6_info *__ip6_route_redirect(struct net *net,
1486 					     struct fib6_table *table,
1487 					     struct flowi6 *fl6,
1488 					     int flags)
1489 {
1490 	struct ip6rd_flowi *rdfl = (struct ip6rd_flowi *)fl6;
1491 	struct rt6_info *rt;
1492 	struct fib6_node *fn;
1493 
1494 	/*
1495 	 * Get the "current" route for this destination and
1496 	 * check if the redirect has come from approriate router.
1497 	 *
1498 	 * RFC 2461 specifies that redirects should only be
1499 	 * accepted if they come from the nexthop to the target.
1500 	 * Due to the way the routes are chosen, this notion
1501 	 * is a bit fuzzy and one might need to check all possible
1502 	 * routes.
1503 	 */
1504 
1505 	read_lock_bh(&table->tb6_lock);
1506 	fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
1507 restart:
1508 	for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
1509 		/*
1510 		 * Current route is on-link; redirect is always invalid.
1511 		 *
1512 		 * Seems, previous statement is not true. It could
1513 		 * be node, which looks for us as on-link (f.e. proxy ndisc)
1514 		 * But then router serving it might decide, that we should
1515 		 * know truth 8)8) --ANK (980726).
1516 		 */
1517 		if (rt6_check_expired(rt))
1518 			continue;
1519 		if (!(rt->rt6i_flags & RTF_GATEWAY))
1520 			continue;
1521 		if (fl6->flowi6_oif != rt->rt6i_dev->ifindex)
1522 			continue;
1523 		if (!ipv6_addr_equal(&rdfl->gateway, &rt->rt6i_gateway))
1524 			continue;
1525 		break;
1526 	}
1527 
1528 	if (!rt)
1529 		rt = net->ipv6.ip6_null_entry;
1530 	BACKTRACK(net, &fl6->saddr);
1531 out:
1532 	dst_hold(&rt->dst);
1533 
1534 	read_unlock_bh(&table->tb6_lock);
1535 
1536 	return rt;
1537 };
1538 
1539 static struct rt6_info *ip6_route_redirect(const struct in6_addr *dest,
1540 					   const struct in6_addr *src,
1541 					   const struct in6_addr *gateway,
1542 					   struct net_device *dev)
1543 {
1544 	int flags = RT6_LOOKUP_F_HAS_SADDR;
1545 	struct net *net = dev_net(dev);
1546 	struct ip6rd_flowi rdfl = {
1547 		.fl6 = {
1548 			.flowi6_oif = dev->ifindex,
1549 			.daddr = *dest,
1550 			.saddr = *src,
1551 		},
1552 	};
1553 
1554 	ipv6_addr_copy(&rdfl.gateway, gateway);
1555 
1556 	if (rt6_need_strict(dest))
1557 		flags |= RT6_LOOKUP_F_IFACE;
1558 
1559 	return (struct rt6_info *)fib6_rule_lookup(net, &rdfl.fl6,
1560 						   flags, __ip6_route_redirect);
1561 }
1562 
1563 void rt6_redirect(const struct in6_addr *dest, const struct in6_addr *src,
1564 		  const struct in6_addr *saddr,
1565 		  struct neighbour *neigh, u8 *lladdr, int on_link)
1566 {
1567 	struct rt6_info *rt, *nrt = NULL;
1568 	struct netevent_redirect netevent;
1569 	struct net *net = dev_net(neigh->dev);
1570 
1571 	rt = ip6_route_redirect(dest, src, saddr, neigh->dev);
1572 
1573 	if (rt == net->ipv6.ip6_null_entry) {
1574 		if (net_ratelimit())
1575 			printk(KERN_DEBUG "rt6_redirect: source isn't a valid nexthop "
1576 			       "for redirect target\n");
1577 		goto out;
1578 	}
1579 
1580 	/*
1581 	 *	We have finally decided to accept it.
1582 	 */
1583 
1584 	neigh_update(neigh, lladdr, NUD_STALE,
1585 		     NEIGH_UPDATE_F_WEAK_OVERRIDE|
1586 		     NEIGH_UPDATE_F_OVERRIDE|
1587 		     (on_link ? 0 : (NEIGH_UPDATE_F_OVERRIDE_ISROUTER|
1588 				     NEIGH_UPDATE_F_ISROUTER))
1589 		     );
1590 
1591 	/*
1592 	 * Redirect received -> path was valid.
1593 	 * Look, redirects are sent only in response to data packets,
1594 	 * so that this nexthop apparently is reachable. --ANK
1595 	 */
1596 	dst_confirm(&rt->dst);
1597 
1598 	/* Duplicate redirect: silently ignore. */
1599 	if (neigh == dst_get_neighbour_raw(&rt->dst))
1600 		goto out;
1601 
1602 	nrt = ip6_rt_copy(rt, dest);
1603 	if (nrt == NULL)
1604 		goto out;
1605 
1606 	nrt->rt6i_flags = RTF_GATEWAY|RTF_UP|RTF_DYNAMIC|RTF_CACHE;
1607 	if (on_link)
1608 		nrt->rt6i_flags &= ~RTF_GATEWAY;
1609 
1610 	nrt->rt6i_dst.plen = 128;
1611 	nrt->dst.flags |= DST_HOST;
1612 
1613 	ipv6_addr_copy(&nrt->rt6i_gateway, (struct in6_addr*)neigh->primary_key);
1614 	dst_set_neighbour(&nrt->dst, neigh_clone(neigh));
1615 
1616 	if (ip6_ins_rt(nrt))
1617 		goto out;
1618 
1619 	netevent.old = &rt->dst;
1620 	netevent.new = &nrt->dst;
1621 	call_netevent_notifiers(NETEVENT_REDIRECT, &netevent);
1622 
1623 	if (rt->rt6i_flags&RTF_CACHE) {
1624 		ip6_del_rt(rt);
1625 		return;
1626 	}
1627 
1628 out:
1629 	dst_release(&rt->dst);
1630 }
1631 
1632 /*
1633  *	Handle ICMP "packet too big" messages
1634  *	i.e. Path MTU discovery
1635  */
1636 
1637 static void rt6_do_pmtu_disc(const struct in6_addr *daddr, const struct in6_addr *saddr,
1638 			     struct net *net, u32 pmtu, int ifindex)
1639 {
1640 	struct rt6_info *rt, *nrt;
1641 	int allfrag = 0;
1642 again:
1643 	rt = rt6_lookup(net, daddr, saddr, ifindex, 0);
1644 	if (rt == NULL)
1645 		return;
1646 
1647 	if (rt6_check_expired(rt)) {
1648 		ip6_del_rt(rt);
1649 		goto again;
1650 	}
1651 
1652 	if (pmtu >= dst_mtu(&rt->dst))
1653 		goto out;
1654 
1655 	if (pmtu < IPV6_MIN_MTU) {
1656 		/*
1657 		 * According to RFC2460, PMTU is set to the IPv6 Minimum Link
1658 		 * MTU (1280) and a fragment header should always be included
1659 		 * after a node receiving Too Big message reporting PMTU is
1660 		 * less than the IPv6 Minimum Link MTU.
1661 		 */
1662 		pmtu = IPV6_MIN_MTU;
1663 		allfrag = 1;
1664 	}
1665 
1666 	/* New mtu received -> path was valid.
1667 	   They are sent only in response to data packets,
1668 	   so that this nexthop apparently is reachable. --ANK
1669 	 */
1670 	dst_confirm(&rt->dst);
1671 
1672 	/* Host route. If it is static, it would be better
1673 	   not to override it, but add new one, so that
1674 	   when cache entry will expire old pmtu
1675 	   would return automatically.
1676 	 */
1677 	if (rt->rt6i_flags & RTF_CACHE) {
1678 		dst_metric_set(&rt->dst, RTAX_MTU, pmtu);
1679 		if (allfrag) {
1680 			u32 features = dst_metric(&rt->dst, RTAX_FEATURES);
1681 			features |= RTAX_FEATURE_ALLFRAG;
1682 			dst_metric_set(&rt->dst, RTAX_FEATURES, features);
1683 		}
1684 		dst_set_expires(&rt->dst, net->ipv6.sysctl.ip6_rt_mtu_expires);
1685 		rt->rt6i_flags |= RTF_MODIFIED|RTF_EXPIRES;
1686 		goto out;
1687 	}
1688 
1689 	/* Network route.
1690 	   Two cases are possible:
1691 	   1. It is connected route. Action: COW
1692 	   2. It is gatewayed route or NONEXTHOP route. Action: clone it.
1693 	 */
1694 	if (!dst_get_neighbour_raw(&rt->dst) && !(rt->rt6i_flags & RTF_NONEXTHOP))
1695 		nrt = rt6_alloc_cow(rt, daddr, saddr);
1696 	else
1697 		nrt = rt6_alloc_clone(rt, daddr);
1698 
1699 	if (nrt) {
1700 		dst_metric_set(&nrt->dst, RTAX_MTU, pmtu);
1701 		if (allfrag) {
1702 			u32 features = dst_metric(&nrt->dst, RTAX_FEATURES);
1703 			features |= RTAX_FEATURE_ALLFRAG;
1704 			dst_metric_set(&nrt->dst, RTAX_FEATURES, features);
1705 		}
1706 
1707 		/* According to RFC 1981, detecting PMTU increase shouldn't be
1708 		 * happened within 5 mins, the recommended timer is 10 mins.
1709 		 * Here this route expiration time is set to ip6_rt_mtu_expires
1710 		 * which is 10 mins. After 10 mins the decreased pmtu is expired
1711 		 * and detecting PMTU increase will be automatically happened.
1712 		 */
1713 		dst_set_expires(&nrt->dst, net->ipv6.sysctl.ip6_rt_mtu_expires);
1714 		nrt->rt6i_flags |= RTF_DYNAMIC|RTF_EXPIRES;
1715 
1716 		ip6_ins_rt(nrt);
1717 	}
1718 out:
1719 	dst_release(&rt->dst);
1720 }
1721 
1722 void rt6_pmtu_discovery(const struct in6_addr *daddr, const struct in6_addr *saddr,
1723 			struct net_device *dev, u32 pmtu)
1724 {
1725 	struct net *net = dev_net(dev);
1726 
1727 	/*
1728 	 * RFC 1981 states that a node "MUST reduce the size of the packets it
1729 	 * is sending along the path" that caused the Packet Too Big message.
1730 	 * Since it's not possible in the general case to determine which
1731 	 * interface was used to send the original packet, we update the MTU
1732 	 * on the interface that will be used to send future packets. We also
1733 	 * update the MTU on the interface that received the Packet Too Big in
1734 	 * case the original packet was forced out that interface with
1735 	 * SO_BINDTODEVICE or similar. This is the next best thing to the
1736 	 * correct behaviour, which would be to update the MTU on all
1737 	 * interfaces.
1738 	 */
1739 	rt6_do_pmtu_disc(daddr, saddr, net, pmtu, 0);
1740 	rt6_do_pmtu_disc(daddr, saddr, net, pmtu, dev->ifindex);
1741 }
1742 
1743 /*
1744  *	Misc support functions
1745  */
1746 
1747 static struct rt6_info *ip6_rt_copy(const struct rt6_info *ort,
1748 				    const struct in6_addr *dest)
1749 {
1750 	struct net *net = dev_net(ort->rt6i_dev);
1751 	struct rt6_info *rt = ip6_dst_alloc(&net->ipv6.ip6_dst_ops,
1752 					    ort->dst.dev, 0);
1753 
1754 	if (rt) {
1755 		rt->dst.input = ort->dst.input;
1756 		rt->dst.output = ort->dst.output;
1757 
1758 		ipv6_addr_copy(&rt->rt6i_dst.addr, dest);
1759 		rt->rt6i_dst.plen = ort->rt6i_dst.plen;
1760 		dst_copy_metrics(&rt->dst, &ort->dst);
1761 		rt->dst.error = ort->dst.error;
1762 		rt->rt6i_idev = ort->rt6i_idev;
1763 		if (rt->rt6i_idev)
1764 			in6_dev_hold(rt->rt6i_idev);
1765 		rt->dst.lastuse = jiffies;
1766 		rt->rt6i_expires = 0;
1767 
1768 		ipv6_addr_copy(&rt->rt6i_gateway, &ort->rt6i_gateway);
1769 		rt->rt6i_flags = ort->rt6i_flags & ~RTF_EXPIRES;
1770 		rt->rt6i_metric = 0;
1771 
1772 #ifdef CONFIG_IPV6_SUBTREES
1773 		memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
1774 #endif
1775 		memcpy(&rt->rt6i_prefsrc, &ort->rt6i_prefsrc, sizeof(struct rt6key));
1776 		rt->rt6i_table = ort->rt6i_table;
1777 	}
1778 	return rt;
1779 }
1780 
1781 #ifdef CONFIG_IPV6_ROUTE_INFO
1782 static struct rt6_info *rt6_get_route_info(struct net *net,
1783 					   const struct in6_addr *prefix, int prefixlen,
1784 					   const struct in6_addr *gwaddr, int ifindex)
1785 {
1786 	struct fib6_node *fn;
1787 	struct rt6_info *rt = NULL;
1788 	struct fib6_table *table;
1789 
1790 	table = fib6_get_table(net, RT6_TABLE_INFO);
1791 	if (table == NULL)
1792 		return NULL;
1793 
1794 	write_lock_bh(&table->tb6_lock);
1795 	fn = fib6_locate(&table->tb6_root, prefix ,prefixlen, NULL, 0);
1796 	if (!fn)
1797 		goto out;
1798 
1799 	for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
1800 		if (rt->rt6i_dev->ifindex != ifindex)
1801 			continue;
1802 		if ((rt->rt6i_flags & (RTF_ROUTEINFO|RTF_GATEWAY)) != (RTF_ROUTEINFO|RTF_GATEWAY))
1803 			continue;
1804 		if (!ipv6_addr_equal(&rt->rt6i_gateway, gwaddr))
1805 			continue;
1806 		dst_hold(&rt->dst);
1807 		break;
1808 	}
1809 out:
1810 	write_unlock_bh(&table->tb6_lock);
1811 	return rt;
1812 }
1813 
1814 static struct rt6_info *rt6_add_route_info(struct net *net,
1815 					   const struct in6_addr *prefix, int prefixlen,
1816 					   const struct in6_addr *gwaddr, int ifindex,
1817 					   unsigned pref)
1818 {
1819 	struct fib6_config cfg = {
1820 		.fc_table	= RT6_TABLE_INFO,
1821 		.fc_metric	= IP6_RT_PRIO_USER,
1822 		.fc_ifindex	= ifindex,
1823 		.fc_dst_len	= prefixlen,
1824 		.fc_flags	= RTF_GATEWAY | RTF_ADDRCONF | RTF_ROUTEINFO |
1825 				  RTF_UP | RTF_PREF(pref),
1826 		.fc_nlinfo.pid = 0,
1827 		.fc_nlinfo.nlh = NULL,
1828 		.fc_nlinfo.nl_net = net,
1829 	};
1830 
1831 	ipv6_addr_copy(&cfg.fc_dst, prefix);
1832 	ipv6_addr_copy(&cfg.fc_gateway, gwaddr);
1833 
1834 	/* We should treat it as a default route if prefix length is 0. */
1835 	if (!prefixlen)
1836 		cfg.fc_flags |= RTF_DEFAULT;
1837 
1838 	ip6_route_add(&cfg);
1839 
1840 	return rt6_get_route_info(net, prefix, prefixlen, gwaddr, ifindex);
1841 }
1842 #endif
1843 
1844 struct rt6_info *rt6_get_dflt_router(const struct in6_addr *addr, struct net_device *dev)
1845 {
1846 	struct rt6_info *rt;
1847 	struct fib6_table *table;
1848 
1849 	table = fib6_get_table(dev_net(dev), RT6_TABLE_DFLT);
1850 	if (table == NULL)
1851 		return NULL;
1852 
1853 	write_lock_bh(&table->tb6_lock);
1854 	for (rt = table->tb6_root.leaf; rt; rt=rt->dst.rt6_next) {
1855 		if (dev == rt->rt6i_dev &&
1856 		    ((rt->rt6i_flags & (RTF_ADDRCONF | RTF_DEFAULT)) == (RTF_ADDRCONF | RTF_DEFAULT)) &&
1857 		    ipv6_addr_equal(&rt->rt6i_gateway, addr))
1858 			break;
1859 	}
1860 	if (rt)
1861 		dst_hold(&rt->dst);
1862 	write_unlock_bh(&table->tb6_lock);
1863 	return rt;
1864 }
1865 
1866 struct rt6_info *rt6_add_dflt_router(const struct in6_addr *gwaddr,
1867 				     struct net_device *dev,
1868 				     unsigned int pref)
1869 {
1870 	struct fib6_config cfg = {
1871 		.fc_table	= RT6_TABLE_DFLT,
1872 		.fc_metric	= IP6_RT_PRIO_USER,
1873 		.fc_ifindex	= dev->ifindex,
1874 		.fc_flags	= RTF_GATEWAY | RTF_ADDRCONF | RTF_DEFAULT |
1875 				  RTF_UP | RTF_EXPIRES | RTF_PREF(pref),
1876 		.fc_nlinfo.pid = 0,
1877 		.fc_nlinfo.nlh = NULL,
1878 		.fc_nlinfo.nl_net = dev_net(dev),
1879 	};
1880 
1881 	ipv6_addr_copy(&cfg.fc_gateway, gwaddr);
1882 
1883 	ip6_route_add(&cfg);
1884 
1885 	return rt6_get_dflt_router(gwaddr, dev);
1886 }
1887 
1888 void rt6_purge_dflt_routers(struct net *net)
1889 {
1890 	struct rt6_info *rt;
1891 	struct fib6_table *table;
1892 
1893 	/* NOTE: Keep consistent with rt6_get_dflt_router */
1894 	table = fib6_get_table(net, RT6_TABLE_DFLT);
1895 	if (table == NULL)
1896 		return;
1897 
1898 restart:
1899 	read_lock_bh(&table->tb6_lock);
1900 	for (rt = table->tb6_root.leaf; rt; rt = rt->dst.rt6_next) {
1901 		if (rt->rt6i_flags & (RTF_DEFAULT | RTF_ADDRCONF)) {
1902 			dst_hold(&rt->dst);
1903 			read_unlock_bh(&table->tb6_lock);
1904 			ip6_del_rt(rt);
1905 			goto restart;
1906 		}
1907 	}
1908 	read_unlock_bh(&table->tb6_lock);
1909 }
1910 
1911 static void rtmsg_to_fib6_config(struct net *net,
1912 				 struct in6_rtmsg *rtmsg,
1913 				 struct fib6_config *cfg)
1914 {
1915 	memset(cfg, 0, sizeof(*cfg));
1916 
1917 	cfg->fc_table = RT6_TABLE_MAIN;
1918 	cfg->fc_ifindex = rtmsg->rtmsg_ifindex;
1919 	cfg->fc_metric = rtmsg->rtmsg_metric;
1920 	cfg->fc_expires = rtmsg->rtmsg_info;
1921 	cfg->fc_dst_len = rtmsg->rtmsg_dst_len;
1922 	cfg->fc_src_len = rtmsg->rtmsg_src_len;
1923 	cfg->fc_flags = rtmsg->rtmsg_flags;
1924 
1925 	cfg->fc_nlinfo.nl_net = net;
1926 
1927 	ipv6_addr_copy(&cfg->fc_dst, &rtmsg->rtmsg_dst);
1928 	ipv6_addr_copy(&cfg->fc_src, &rtmsg->rtmsg_src);
1929 	ipv6_addr_copy(&cfg->fc_gateway, &rtmsg->rtmsg_gateway);
1930 }
1931 
1932 int ipv6_route_ioctl(struct net *net, unsigned int cmd, void __user *arg)
1933 {
1934 	struct fib6_config cfg;
1935 	struct in6_rtmsg rtmsg;
1936 	int err;
1937 
1938 	switch(cmd) {
1939 	case SIOCADDRT:		/* Add a route */
1940 	case SIOCDELRT:		/* Delete a route */
1941 		if (!capable(CAP_NET_ADMIN))
1942 			return -EPERM;
1943 		err = copy_from_user(&rtmsg, arg,
1944 				     sizeof(struct in6_rtmsg));
1945 		if (err)
1946 			return -EFAULT;
1947 
1948 		rtmsg_to_fib6_config(net, &rtmsg, &cfg);
1949 
1950 		rtnl_lock();
1951 		switch (cmd) {
1952 		case SIOCADDRT:
1953 			err = ip6_route_add(&cfg);
1954 			break;
1955 		case SIOCDELRT:
1956 			err = ip6_route_del(&cfg);
1957 			break;
1958 		default:
1959 			err = -EINVAL;
1960 		}
1961 		rtnl_unlock();
1962 
1963 		return err;
1964 	}
1965 
1966 	return -EINVAL;
1967 }
1968 
1969 /*
1970  *	Drop the packet on the floor
1971  */
1972 
1973 static int ip6_pkt_drop(struct sk_buff *skb, u8 code, int ipstats_mib_noroutes)
1974 {
1975 	int type;
1976 	struct dst_entry *dst = skb_dst(skb);
1977 	switch (ipstats_mib_noroutes) {
1978 	case IPSTATS_MIB_INNOROUTES:
1979 		type = ipv6_addr_type(&ipv6_hdr(skb)->daddr);
1980 		if (type == IPV6_ADDR_ANY) {
1981 			IP6_INC_STATS(dev_net(dst->dev), ip6_dst_idev(dst),
1982 				      IPSTATS_MIB_INADDRERRORS);
1983 			break;
1984 		}
1985 		/* FALLTHROUGH */
1986 	case IPSTATS_MIB_OUTNOROUTES:
1987 		IP6_INC_STATS(dev_net(dst->dev), ip6_dst_idev(dst),
1988 			      ipstats_mib_noroutes);
1989 		break;
1990 	}
1991 	icmpv6_send(skb, ICMPV6_DEST_UNREACH, code, 0);
1992 	kfree_skb(skb);
1993 	return 0;
1994 }
1995 
1996 static int ip6_pkt_discard(struct sk_buff *skb)
1997 {
1998 	return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_INNOROUTES);
1999 }
2000 
2001 static int ip6_pkt_discard_out(struct sk_buff *skb)
2002 {
2003 	skb->dev = skb_dst(skb)->dev;
2004 	return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_OUTNOROUTES);
2005 }
2006 
2007 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
2008 
2009 static int ip6_pkt_prohibit(struct sk_buff *skb)
2010 {
2011 	return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_INNOROUTES);
2012 }
2013 
2014 static int ip6_pkt_prohibit_out(struct sk_buff *skb)
2015 {
2016 	skb->dev = skb_dst(skb)->dev;
2017 	return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_OUTNOROUTES);
2018 }
2019 
2020 #endif
2021 
2022 /*
2023  *	Allocate a dst for local (unicast / anycast) address.
2024  */
2025 
2026 struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev,
2027 				    const struct in6_addr *addr,
2028 				    int anycast)
2029 {
2030 	struct net *net = dev_net(idev->dev);
2031 	struct rt6_info *rt = ip6_dst_alloc(&net->ipv6.ip6_dst_ops,
2032 					    net->loopback_dev, 0);
2033 	struct neighbour *neigh;
2034 
2035 	if (rt == NULL) {
2036 		if (net_ratelimit())
2037 			pr_warning("IPv6:  Maximum number of routes reached,"
2038 				   " consider increasing route/max_size.\n");
2039 		return ERR_PTR(-ENOMEM);
2040 	}
2041 
2042 	in6_dev_hold(idev);
2043 
2044 	rt->dst.flags |= DST_HOST;
2045 	rt->dst.input = ip6_input;
2046 	rt->dst.output = ip6_output;
2047 	rt->rt6i_idev = idev;
2048 	rt->dst.obsolete = -1;
2049 
2050 	rt->rt6i_flags = RTF_UP | RTF_NONEXTHOP;
2051 	if (anycast)
2052 		rt->rt6i_flags |= RTF_ANYCAST;
2053 	else
2054 		rt->rt6i_flags |= RTF_LOCAL;
2055 	neigh = ndisc_get_neigh(rt->rt6i_dev, &rt->rt6i_gateway);
2056 	if (IS_ERR(neigh)) {
2057 		dst_free(&rt->dst);
2058 
2059 		return ERR_CAST(neigh);
2060 	}
2061 	dst_set_neighbour(&rt->dst, neigh);
2062 
2063 	ipv6_addr_copy(&rt->rt6i_dst.addr, addr);
2064 	rt->rt6i_dst.plen = 128;
2065 	rt->rt6i_table = fib6_get_table(net, RT6_TABLE_LOCAL);
2066 
2067 	atomic_set(&rt->dst.__refcnt, 1);
2068 
2069 	return rt;
2070 }
2071 
2072 int ip6_route_get_saddr(struct net *net,
2073 			struct rt6_info *rt,
2074 			const struct in6_addr *daddr,
2075 			unsigned int prefs,
2076 			struct in6_addr *saddr)
2077 {
2078 	struct inet6_dev *idev = ip6_dst_idev((struct dst_entry*)rt);
2079 	int err = 0;
2080 	if (rt->rt6i_prefsrc.plen)
2081 		ipv6_addr_copy(saddr, &rt->rt6i_prefsrc.addr);
2082 	else
2083 		err = ipv6_dev_get_saddr(net, idev ? idev->dev : NULL,
2084 					 daddr, prefs, saddr);
2085 	return err;
2086 }
2087 
2088 /* remove deleted ip from prefsrc entries */
2089 struct arg_dev_net_ip {
2090 	struct net_device *dev;
2091 	struct net *net;
2092 	struct in6_addr *addr;
2093 };
2094 
2095 static int fib6_remove_prefsrc(struct rt6_info *rt, void *arg)
2096 {
2097 	struct net_device *dev = ((struct arg_dev_net_ip *)arg)->dev;
2098 	struct net *net = ((struct arg_dev_net_ip *)arg)->net;
2099 	struct in6_addr *addr = ((struct arg_dev_net_ip *)arg)->addr;
2100 
2101 	if (((void *)rt->rt6i_dev == dev || dev == NULL) &&
2102 	    rt != net->ipv6.ip6_null_entry &&
2103 	    ipv6_addr_equal(addr, &rt->rt6i_prefsrc.addr)) {
2104 		/* remove prefsrc entry */
2105 		rt->rt6i_prefsrc.plen = 0;
2106 	}
2107 	return 0;
2108 }
2109 
2110 void rt6_remove_prefsrc(struct inet6_ifaddr *ifp)
2111 {
2112 	struct net *net = dev_net(ifp->idev->dev);
2113 	struct arg_dev_net_ip adni = {
2114 		.dev = ifp->idev->dev,
2115 		.net = net,
2116 		.addr = &ifp->addr,
2117 	};
2118 	fib6_clean_all(net, fib6_remove_prefsrc, 0, &adni);
2119 }
2120 
2121 struct arg_dev_net {
2122 	struct net_device *dev;
2123 	struct net *net;
2124 };
2125 
2126 static int fib6_ifdown(struct rt6_info *rt, void *arg)
2127 {
2128 	const struct arg_dev_net *adn = arg;
2129 	const struct net_device *dev = adn->dev;
2130 
2131 	if ((rt->rt6i_dev == dev || dev == NULL) &&
2132 	    rt != adn->net->ipv6.ip6_null_entry) {
2133 		RT6_TRACE("deleted by ifdown %p\n", rt);
2134 		return -1;
2135 	}
2136 	return 0;
2137 }
2138 
2139 void rt6_ifdown(struct net *net, struct net_device *dev)
2140 {
2141 	struct arg_dev_net adn = {
2142 		.dev = dev,
2143 		.net = net,
2144 	};
2145 
2146 	fib6_clean_all(net, fib6_ifdown, 0, &adn);
2147 	icmp6_clean_all(fib6_ifdown, &adn);
2148 }
2149 
2150 struct rt6_mtu_change_arg
2151 {
2152 	struct net_device *dev;
2153 	unsigned mtu;
2154 };
2155 
2156 static int rt6_mtu_change_route(struct rt6_info *rt, void *p_arg)
2157 {
2158 	struct rt6_mtu_change_arg *arg = (struct rt6_mtu_change_arg *) p_arg;
2159 	struct inet6_dev *idev;
2160 
2161 	/* In IPv6 pmtu discovery is not optional,
2162 	   so that RTAX_MTU lock cannot disable it.
2163 	   We still use this lock to block changes
2164 	   caused by addrconf/ndisc.
2165 	*/
2166 
2167 	idev = __in6_dev_get(arg->dev);
2168 	if (idev == NULL)
2169 		return 0;
2170 
2171 	/* For administrative MTU increase, there is no way to discover
2172 	   IPv6 PMTU increase, so PMTU increase should be updated here.
2173 	   Since RFC 1981 doesn't include administrative MTU increase
2174 	   update PMTU increase is a MUST. (i.e. jumbo frame)
2175 	 */
2176 	/*
2177 	   If new MTU is less than route PMTU, this new MTU will be the
2178 	   lowest MTU in the path, update the route PMTU to reflect PMTU
2179 	   decreases; if new MTU is greater than route PMTU, and the
2180 	   old MTU is the lowest MTU in the path, update the route PMTU
2181 	   to reflect the increase. In this case if the other nodes' MTU
2182 	   also have the lowest MTU, TOO BIG MESSAGE will be lead to
2183 	   PMTU discouvery.
2184 	 */
2185 	if (rt->rt6i_dev == arg->dev &&
2186 	    !dst_metric_locked(&rt->dst, RTAX_MTU) &&
2187 	    (dst_mtu(&rt->dst) >= arg->mtu ||
2188 	     (dst_mtu(&rt->dst) < arg->mtu &&
2189 	      dst_mtu(&rt->dst) == idev->cnf.mtu6))) {
2190 		dst_metric_set(&rt->dst, RTAX_MTU, arg->mtu);
2191 	}
2192 	return 0;
2193 }
2194 
2195 void rt6_mtu_change(struct net_device *dev, unsigned mtu)
2196 {
2197 	struct rt6_mtu_change_arg arg = {
2198 		.dev = dev,
2199 		.mtu = mtu,
2200 	};
2201 
2202 	fib6_clean_all(dev_net(dev), rt6_mtu_change_route, 0, &arg);
2203 }
2204 
2205 static const struct nla_policy rtm_ipv6_policy[RTA_MAX+1] = {
2206 	[RTA_GATEWAY]           = { .len = sizeof(struct in6_addr) },
2207 	[RTA_OIF]               = { .type = NLA_U32 },
2208 	[RTA_IIF]		= { .type = NLA_U32 },
2209 	[RTA_PRIORITY]          = { .type = NLA_U32 },
2210 	[RTA_METRICS]           = { .type = NLA_NESTED },
2211 };
2212 
2213 static int rtm_to_fib6_config(struct sk_buff *skb, struct nlmsghdr *nlh,
2214 			      struct fib6_config *cfg)
2215 {
2216 	struct rtmsg *rtm;
2217 	struct nlattr *tb[RTA_MAX+1];
2218 	int err;
2219 
2220 	err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy);
2221 	if (err < 0)
2222 		goto errout;
2223 
2224 	err = -EINVAL;
2225 	rtm = nlmsg_data(nlh);
2226 	memset(cfg, 0, sizeof(*cfg));
2227 
2228 	cfg->fc_table = rtm->rtm_table;
2229 	cfg->fc_dst_len = rtm->rtm_dst_len;
2230 	cfg->fc_src_len = rtm->rtm_src_len;
2231 	cfg->fc_flags = RTF_UP;
2232 	cfg->fc_protocol = rtm->rtm_protocol;
2233 
2234 	if (rtm->rtm_type == RTN_UNREACHABLE)
2235 		cfg->fc_flags |= RTF_REJECT;
2236 
2237 	if (rtm->rtm_type == RTN_LOCAL)
2238 		cfg->fc_flags |= RTF_LOCAL;
2239 
2240 	cfg->fc_nlinfo.pid = NETLINK_CB(skb).pid;
2241 	cfg->fc_nlinfo.nlh = nlh;
2242 	cfg->fc_nlinfo.nl_net = sock_net(skb->sk);
2243 
2244 	if (tb[RTA_GATEWAY]) {
2245 		nla_memcpy(&cfg->fc_gateway, tb[RTA_GATEWAY], 16);
2246 		cfg->fc_flags |= RTF_GATEWAY;
2247 	}
2248 
2249 	if (tb[RTA_DST]) {
2250 		int plen = (rtm->rtm_dst_len + 7) >> 3;
2251 
2252 		if (nla_len(tb[RTA_DST]) < plen)
2253 			goto errout;
2254 
2255 		nla_memcpy(&cfg->fc_dst, tb[RTA_DST], plen);
2256 	}
2257 
2258 	if (tb[RTA_SRC]) {
2259 		int plen = (rtm->rtm_src_len + 7) >> 3;
2260 
2261 		if (nla_len(tb[RTA_SRC]) < plen)
2262 			goto errout;
2263 
2264 		nla_memcpy(&cfg->fc_src, tb[RTA_SRC], plen);
2265 	}
2266 
2267 	if (tb[RTA_PREFSRC])
2268 		nla_memcpy(&cfg->fc_prefsrc, tb[RTA_PREFSRC], 16);
2269 
2270 	if (tb[RTA_OIF])
2271 		cfg->fc_ifindex = nla_get_u32(tb[RTA_OIF]);
2272 
2273 	if (tb[RTA_PRIORITY])
2274 		cfg->fc_metric = nla_get_u32(tb[RTA_PRIORITY]);
2275 
2276 	if (tb[RTA_METRICS]) {
2277 		cfg->fc_mx = nla_data(tb[RTA_METRICS]);
2278 		cfg->fc_mx_len = nla_len(tb[RTA_METRICS]);
2279 	}
2280 
2281 	if (tb[RTA_TABLE])
2282 		cfg->fc_table = nla_get_u32(tb[RTA_TABLE]);
2283 
2284 	err = 0;
2285 errout:
2286 	return err;
2287 }
2288 
2289 static int inet6_rtm_delroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
2290 {
2291 	struct fib6_config cfg;
2292 	int err;
2293 
2294 	err = rtm_to_fib6_config(skb, nlh, &cfg);
2295 	if (err < 0)
2296 		return err;
2297 
2298 	return ip6_route_del(&cfg);
2299 }
2300 
2301 static int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
2302 {
2303 	struct fib6_config cfg;
2304 	int err;
2305 
2306 	err = rtm_to_fib6_config(skb, nlh, &cfg);
2307 	if (err < 0)
2308 		return err;
2309 
2310 	return ip6_route_add(&cfg);
2311 }
2312 
2313 static inline size_t rt6_nlmsg_size(void)
2314 {
2315 	return NLMSG_ALIGN(sizeof(struct rtmsg))
2316 	       + nla_total_size(16) /* RTA_SRC */
2317 	       + nla_total_size(16) /* RTA_DST */
2318 	       + nla_total_size(16) /* RTA_GATEWAY */
2319 	       + nla_total_size(16) /* RTA_PREFSRC */
2320 	       + nla_total_size(4) /* RTA_TABLE */
2321 	       + nla_total_size(4) /* RTA_IIF */
2322 	       + nla_total_size(4) /* RTA_OIF */
2323 	       + nla_total_size(4) /* RTA_PRIORITY */
2324 	       + RTAX_MAX * nla_total_size(4) /* RTA_METRICS */
2325 	       + nla_total_size(sizeof(struct rta_cacheinfo));
2326 }
2327 
2328 static int rt6_fill_node(struct net *net,
2329 			 struct sk_buff *skb, struct rt6_info *rt,
2330 			 struct in6_addr *dst, struct in6_addr *src,
2331 			 int iif, int type, u32 pid, u32 seq,
2332 			 int prefix, int nowait, unsigned int flags)
2333 {
2334 	struct rtmsg *rtm;
2335 	struct nlmsghdr *nlh;
2336 	long expires;
2337 	u32 table;
2338 	struct neighbour *n;
2339 
2340 	if (prefix) {	/* user wants prefix routes only */
2341 		if (!(rt->rt6i_flags & RTF_PREFIX_RT)) {
2342 			/* success since this is not a prefix route */
2343 			return 1;
2344 		}
2345 	}
2346 
2347 	nlh = nlmsg_put(skb, pid, seq, type, sizeof(*rtm), flags);
2348 	if (nlh == NULL)
2349 		return -EMSGSIZE;
2350 
2351 	rtm = nlmsg_data(nlh);
2352 	rtm->rtm_family = AF_INET6;
2353 	rtm->rtm_dst_len = rt->rt6i_dst.plen;
2354 	rtm->rtm_src_len = rt->rt6i_src.plen;
2355 	rtm->rtm_tos = 0;
2356 	if (rt->rt6i_table)
2357 		table = rt->rt6i_table->tb6_id;
2358 	else
2359 		table = RT6_TABLE_UNSPEC;
2360 	rtm->rtm_table = table;
2361 	NLA_PUT_U32(skb, RTA_TABLE, table);
2362 	if (rt->rt6i_flags&RTF_REJECT)
2363 		rtm->rtm_type = RTN_UNREACHABLE;
2364 	else if (rt->rt6i_flags&RTF_LOCAL)
2365 		rtm->rtm_type = RTN_LOCAL;
2366 	else if (rt->rt6i_dev && (rt->rt6i_dev->flags&IFF_LOOPBACK))
2367 		rtm->rtm_type = RTN_LOCAL;
2368 	else
2369 		rtm->rtm_type = RTN_UNICAST;
2370 	rtm->rtm_flags = 0;
2371 	rtm->rtm_scope = RT_SCOPE_UNIVERSE;
2372 	rtm->rtm_protocol = rt->rt6i_protocol;
2373 	if (rt->rt6i_flags&RTF_DYNAMIC)
2374 		rtm->rtm_protocol = RTPROT_REDIRECT;
2375 	else if (rt->rt6i_flags & RTF_ADDRCONF)
2376 		rtm->rtm_protocol = RTPROT_KERNEL;
2377 	else if (rt->rt6i_flags&RTF_DEFAULT)
2378 		rtm->rtm_protocol = RTPROT_RA;
2379 
2380 	if (rt->rt6i_flags&RTF_CACHE)
2381 		rtm->rtm_flags |= RTM_F_CLONED;
2382 
2383 	if (dst) {
2384 		NLA_PUT(skb, RTA_DST, 16, dst);
2385 		rtm->rtm_dst_len = 128;
2386 	} else if (rtm->rtm_dst_len)
2387 		NLA_PUT(skb, RTA_DST, 16, &rt->rt6i_dst.addr);
2388 #ifdef CONFIG_IPV6_SUBTREES
2389 	if (src) {
2390 		NLA_PUT(skb, RTA_SRC, 16, src);
2391 		rtm->rtm_src_len = 128;
2392 	} else if (rtm->rtm_src_len)
2393 		NLA_PUT(skb, RTA_SRC, 16, &rt->rt6i_src.addr);
2394 #endif
2395 	if (iif) {
2396 #ifdef CONFIG_IPV6_MROUTE
2397 		if (ipv6_addr_is_multicast(&rt->rt6i_dst.addr)) {
2398 			int err = ip6mr_get_route(net, skb, rtm, nowait);
2399 			if (err <= 0) {
2400 				if (!nowait) {
2401 					if (err == 0)
2402 						return 0;
2403 					goto nla_put_failure;
2404 				} else {
2405 					if (err == -EMSGSIZE)
2406 						goto nla_put_failure;
2407 				}
2408 			}
2409 		} else
2410 #endif
2411 			NLA_PUT_U32(skb, RTA_IIF, iif);
2412 	} else if (dst) {
2413 		struct in6_addr saddr_buf;
2414 		if (ip6_route_get_saddr(net, rt, dst, 0, &saddr_buf) == 0)
2415 			NLA_PUT(skb, RTA_PREFSRC, 16, &saddr_buf);
2416 	}
2417 
2418 	if (rt->rt6i_prefsrc.plen) {
2419 		struct in6_addr saddr_buf;
2420 		ipv6_addr_copy(&saddr_buf, &rt->rt6i_prefsrc.addr);
2421 		NLA_PUT(skb, RTA_PREFSRC, 16, &saddr_buf);
2422 	}
2423 
2424 	if (rtnetlink_put_metrics(skb, dst_metrics_ptr(&rt->dst)) < 0)
2425 		goto nla_put_failure;
2426 
2427 	rcu_read_lock();
2428 	n = dst_get_neighbour(&rt->dst);
2429 	if (n)
2430 		NLA_PUT(skb, RTA_GATEWAY, 16, &n->primary_key);
2431 	rcu_read_unlock();
2432 
2433 	if (rt->dst.dev)
2434 		NLA_PUT_U32(skb, RTA_OIF, rt->rt6i_dev->ifindex);
2435 
2436 	NLA_PUT_U32(skb, RTA_PRIORITY, rt->rt6i_metric);
2437 
2438 	if (!(rt->rt6i_flags & RTF_EXPIRES))
2439 		expires = 0;
2440 	else if (rt->rt6i_expires - jiffies < INT_MAX)
2441 		expires = rt->rt6i_expires - jiffies;
2442 	else
2443 		expires = INT_MAX;
2444 
2445 	if (rtnl_put_cacheinfo(skb, &rt->dst, 0, 0, 0,
2446 			       expires, rt->dst.error) < 0)
2447 		goto nla_put_failure;
2448 
2449 	return nlmsg_end(skb, nlh);
2450 
2451 nla_put_failure:
2452 	nlmsg_cancel(skb, nlh);
2453 	return -EMSGSIZE;
2454 }
2455 
2456 int rt6_dump_route(struct rt6_info *rt, void *p_arg)
2457 {
2458 	struct rt6_rtnl_dump_arg *arg = (struct rt6_rtnl_dump_arg *) p_arg;
2459 	int prefix;
2460 
2461 	if (nlmsg_len(arg->cb->nlh) >= sizeof(struct rtmsg)) {
2462 		struct rtmsg *rtm = nlmsg_data(arg->cb->nlh);
2463 		prefix = (rtm->rtm_flags & RTM_F_PREFIX) != 0;
2464 	} else
2465 		prefix = 0;
2466 
2467 	return rt6_fill_node(arg->net,
2468 		     arg->skb, rt, NULL, NULL, 0, RTM_NEWROUTE,
2469 		     NETLINK_CB(arg->cb->skb).pid, arg->cb->nlh->nlmsg_seq,
2470 		     prefix, 0, NLM_F_MULTI);
2471 }
2472 
2473 static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void *arg)
2474 {
2475 	struct net *net = sock_net(in_skb->sk);
2476 	struct nlattr *tb[RTA_MAX+1];
2477 	struct rt6_info *rt;
2478 	struct sk_buff *skb;
2479 	struct rtmsg *rtm;
2480 	struct flowi6 fl6;
2481 	int err, iif = 0;
2482 
2483 	err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy);
2484 	if (err < 0)
2485 		goto errout;
2486 
2487 	err = -EINVAL;
2488 	memset(&fl6, 0, sizeof(fl6));
2489 
2490 	if (tb[RTA_SRC]) {
2491 		if (nla_len(tb[RTA_SRC]) < sizeof(struct in6_addr))
2492 			goto errout;
2493 
2494 		ipv6_addr_copy(&fl6.saddr, nla_data(tb[RTA_SRC]));
2495 	}
2496 
2497 	if (tb[RTA_DST]) {
2498 		if (nla_len(tb[RTA_DST]) < sizeof(struct in6_addr))
2499 			goto errout;
2500 
2501 		ipv6_addr_copy(&fl6.daddr, nla_data(tb[RTA_DST]));
2502 	}
2503 
2504 	if (tb[RTA_IIF])
2505 		iif = nla_get_u32(tb[RTA_IIF]);
2506 
2507 	if (tb[RTA_OIF])
2508 		fl6.flowi6_oif = nla_get_u32(tb[RTA_OIF]);
2509 
2510 	if (iif) {
2511 		struct net_device *dev;
2512 		dev = __dev_get_by_index(net, iif);
2513 		if (!dev) {
2514 			err = -ENODEV;
2515 			goto errout;
2516 		}
2517 	}
2518 
2519 	skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
2520 	if (skb == NULL) {
2521 		err = -ENOBUFS;
2522 		goto errout;
2523 	}
2524 
2525 	/* Reserve room for dummy headers, this skb can pass
2526 	   through good chunk of routing engine.
2527 	 */
2528 	skb_reset_mac_header(skb);
2529 	skb_reserve(skb, MAX_HEADER + sizeof(struct ipv6hdr));
2530 
2531 	rt = (struct rt6_info*) ip6_route_output(net, NULL, &fl6);
2532 	skb_dst_set(skb, &rt->dst);
2533 
2534 	err = rt6_fill_node(net, skb, rt, &fl6.daddr, &fl6.saddr, iif,
2535 			    RTM_NEWROUTE, NETLINK_CB(in_skb).pid,
2536 			    nlh->nlmsg_seq, 0, 0, 0);
2537 	if (err < 0) {
2538 		kfree_skb(skb);
2539 		goto errout;
2540 	}
2541 
2542 	err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).pid);
2543 errout:
2544 	return err;
2545 }
2546 
2547 void inet6_rt_notify(int event, struct rt6_info *rt, struct nl_info *info)
2548 {
2549 	struct sk_buff *skb;
2550 	struct net *net = info->nl_net;
2551 	u32 seq;
2552 	int err;
2553 
2554 	err = -ENOBUFS;
2555 	seq = info->nlh != NULL ? info->nlh->nlmsg_seq : 0;
2556 
2557 	skb = nlmsg_new(rt6_nlmsg_size(), gfp_any());
2558 	if (skb == NULL)
2559 		goto errout;
2560 
2561 	err = rt6_fill_node(net, skb, rt, NULL, NULL, 0,
2562 				event, info->pid, seq, 0, 0, 0);
2563 	if (err < 0) {
2564 		/* -EMSGSIZE implies BUG in rt6_nlmsg_size() */
2565 		WARN_ON(err == -EMSGSIZE);
2566 		kfree_skb(skb);
2567 		goto errout;
2568 	}
2569 	rtnl_notify(skb, net, info->pid, RTNLGRP_IPV6_ROUTE,
2570 		    info->nlh, gfp_any());
2571 	return;
2572 errout:
2573 	if (err < 0)
2574 		rtnl_set_sk_err(net, RTNLGRP_IPV6_ROUTE, err);
2575 }
2576 
2577 static int ip6_route_dev_notify(struct notifier_block *this,
2578 				unsigned long event, void *data)
2579 {
2580 	struct net_device *dev = (struct net_device *)data;
2581 	struct net *net = dev_net(dev);
2582 
2583 	if (event == NETDEV_REGISTER && (dev->flags & IFF_LOOPBACK)) {
2584 		net->ipv6.ip6_null_entry->dst.dev = dev;
2585 		net->ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(dev);
2586 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
2587 		net->ipv6.ip6_prohibit_entry->dst.dev = dev;
2588 		net->ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(dev);
2589 		net->ipv6.ip6_blk_hole_entry->dst.dev = dev;
2590 		net->ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(dev);
2591 #endif
2592 	}
2593 
2594 	return NOTIFY_OK;
2595 }
2596 
2597 /*
2598  *	/proc
2599  */
2600 
2601 #ifdef CONFIG_PROC_FS
2602 
2603 struct rt6_proc_arg
2604 {
2605 	char *buffer;
2606 	int offset;
2607 	int length;
2608 	int skip;
2609 	int len;
2610 };
2611 
2612 static int rt6_info_route(struct rt6_info *rt, void *p_arg)
2613 {
2614 	struct seq_file *m = p_arg;
2615 	struct neighbour *n;
2616 
2617 	seq_printf(m, "%pi6 %02x ", &rt->rt6i_dst.addr, rt->rt6i_dst.plen);
2618 
2619 #ifdef CONFIG_IPV6_SUBTREES
2620 	seq_printf(m, "%pi6 %02x ", &rt->rt6i_src.addr, rt->rt6i_src.plen);
2621 #else
2622 	seq_puts(m, "00000000000000000000000000000000 00 ");
2623 #endif
2624 	rcu_read_lock();
2625 	n = dst_get_neighbour(&rt->dst);
2626 	if (n) {
2627 		seq_printf(m, "%pi6", n->primary_key);
2628 	} else {
2629 		seq_puts(m, "00000000000000000000000000000000");
2630 	}
2631 	rcu_read_unlock();
2632 	seq_printf(m, " %08x %08x %08x %08x %8s\n",
2633 		   rt->rt6i_metric, atomic_read(&rt->dst.__refcnt),
2634 		   rt->dst.__use, rt->rt6i_flags,
2635 		   rt->rt6i_dev ? rt->rt6i_dev->name : "");
2636 	return 0;
2637 }
2638 
2639 static int ipv6_route_show(struct seq_file *m, void *v)
2640 {
2641 	struct net *net = (struct net *)m->private;
2642 	fib6_clean_all(net, rt6_info_route, 0, m);
2643 	return 0;
2644 }
2645 
2646 static int ipv6_route_open(struct inode *inode, struct file *file)
2647 {
2648 	return single_open_net(inode, file, ipv6_route_show);
2649 }
2650 
2651 static const struct file_operations ipv6_route_proc_fops = {
2652 	.owner		= THIS_MODULE,
2653 	.open		= ipv6_route_open,
2654 	.read		= seq_read,
2655 	.llseek		= seq_lseek,
2656 	.release	= single_release_net,
2657 };
2658 
2659 static int rt6_stats_seq_show(struct seq_file *seq, void *v)
2660 {
2661 	struct net *net = (struct net *)seq->private;
2662 	seq_printf(seq, "%04x %04x %04x %04x %04x %04x %04x\n",
2663 		   net->ipv6.rt6_stats->fib_nodes,
2664 		   net->ipv6.rt6_stats->fib_route_nodes,
2665 		   net->ipv6.rt6_stats->fib_rt_alloc,
2666 		   net->ipv6.rt6_stats->fib_rt_entries,
2667 		   net->ipv6.rt6_stats->fib_rt_cache,
2668 		   dst_entries_get_slow(&net->ipv6.ip6_dst_ops),
2669 		   net->ipv6.rt6_stats->fib_discarded_routes);
2670 
2671 	return 0;
2672 }
2673 
2674 static int rt6_stats_seq_open(struct inode *inode, struct file *file)
2675 {
2676 	return single_open_net(inode, file, rt6_stats_seq_show);
2677 }
2678 
2679 static const struct file_operations rt6_stats_seq_fops = {
2680 	.owner	 = THIS_MODULE,
2681 	.open	 = rt6_stats_seq_open,
2682 	.read	 = seq_read,
2683 	.llseek	 = seq_lseek,
2684 	.release = single_release_net,
2685 };
2686 #endif	/* CONFIG_PROC_FS */
2687 
2688 #ifdef CONFIG_SYSCTL
2689 
2690 static
2691 int ipv6_sysctl_rtcache_flush(ctl_table *ctl, int write,
2692 			      void __user *buffer, size_t *lenp, loff_t *ppos)
2693 {
2694 	struct net *net;
2695 	int delay;
2696 	if (!write)
2697 		return -EINVAL;
2698 
2699 	net = (struct net *)ctl->extra1;
2700 	delay = net->ipv6.sysctl.flush_delay;
2701 	proc_dointvec(ctl, write, buffer, lenp, ppos);
2702 	fib6_run_gc(delay <= 0 ? ~0UL : (unsigned long)delay, net);
2703 	return 0;
2704 }
2705 
2706 ctl_table ipv6_route_table_template[] = {
2707 	{
2708 		.procname	=	"flush",
2709 		.data		=	&init_net.ipv6.sysctl.flush_delay,
2710 		.maxlen		=	sizeof(int),
2711 		.mode		=	0200,
2712 		.proc_handler	=	ipv6_sysctl_rtcache_flush
2713 	},
2714 	{
2715 		.procname	=	"gc_thresh",
2716 		.data		=	&ip6_dst_ops_template.gc_thresh,
2717 		.maxlen		=	sizeof(int),
2718 		.mode		=	0644,
2719 		.proc_handler	=	proc_dointvec,
2720 	},
2721 	{
2722 		.procname	=	"max_size",
2723 		.data		=	&init_net.ipv6.sysctl.ip6_rt_max_size,
2724 		.maxlen		=	sizeof(int),
2725 		.mode		=	0644,
2726 		.proc_handler	=	proc_dointvec,
2727 	},
2728 	{
2729 		.procname	=	"gc_min_interval",
2730 		.data		=	&init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
2731 		.maxlen		=	sizeof(int),
2732 		.mode		=	0644,
2733 		.proc_handler	=	proc_dointvec_jiffies,
2734 	},
2735 	{
2736 		.procname	=	"gc_timeout",
2737 		.data		=	&init_net.ipv6.sysctl.ip6_rt_gc_timeout,
2738 		.maxlen		=	sizeof(int),
2739 		.mode		=	0644,
2740 		.proc_handler	=	proc_dointvec_jiffies,
2741 	},
2742 	{
2743 		.procname	=	"gc_interval",
2744 		.data		=	&init_net.ipv6.sysctl.ip6_rt_gc_interval,
2745 		.maxlen		=	sizeof(int),
2746 		.mode		=	0644,
2747 		.proc_handler	=	proc_dointvec_jiffies,
2748 	},
2749 	{
2750 		.procname	=	"gc_elasticity",
2751 		.data		=	&init_net.ipv6.sysctl.ip6_rt_gc_elasticity,
2752 		.maxlen		=	sizeof(int),
2753 		.mode		=	0644,
2754 		.proc_handler	=	proc_dointvec,
2755 	},
2756 	{
2757 		.procname	=	"mtu_expires",
2758 		.data		=	&init_net.ipv6.sysctl.ip6_rt_mtu_expires,
2759 		.maxlen		=	sizeof(int),
2760 		.mode		=	0644,
2761 		.proc_handler	=	proc_dointvec_jiffies,
2762 	},
2763 	{
2764 		.procname	=	"min_adv_mss",
2765 		.data		=	&init_net.ipv6.sysctl.ip6_rt_min_advmss,
2766 		.maxlen		=	sizeof(int),
2767 		.mode		=	0644,
2768 		.proc_handler	=	proc_dointvec,
2769 	},
2770 	{
2771 		.procname	=	"gc_min_interval_ms",
2772 		.data		=	&init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
2773 		.maxlen		=	sizeof(int),
2774 		.mode		=	0644,
2775 		.proc_handler	=	proc_dointvec_ms_jiffies,
2776 	},
2777 	{ }
2778 };
2779 
2780 struct ctl_table * __net_init ipv6_route_sysctl_init(struct net *net)
2781 {
2782 	struct ctl_table *table;
2783 
2784 	table = kmemdup(ipv6_route_table_template,
2785 			sizeof(ipv6_route_table_template),
2786 			GFP_KERNEL);
2787 
2788 	if (table) {
2789 		table[0].data = &net->ipv6.sysctl.flush_delay;
2790 		table[0].extra1 = net;
2791 		table[1].data = &net->ipv6.ip6_dst_ops.gc_thresh;
2792 		table[2].data = &net->ipv6.sysctl.ip6_rt_max_size;
2793 		table[3].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
2794 		table[4].data = &net->ipv6.sysctl.ip6_rt_gc_timeout;
2795 		table[5].data = &net->ipv6.sysctl.ip6_rt_gc_interval;
2796 		table[6].data = &net->ipv6.sysctl.ip6_rt_gc_elasticity;
2797 		table[7].data = &net->ipv6.sysctl.ip6_rt_mtu_expires;
2798 		table[8].data = &net->ipv6.sysctl.ip6_rt_min_advmss;
2799 		table[9].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
2800 	}
2801 
2802 	return table;
2803 }
2804 #endif
2805 
2806 static int __net_init ip6_route_net_init(struct net *net)
2807 {
2808 	int ret = -ENOMEM;
2809 
2810 	memcpy(&net->ipv6.ip6_dst_ops, &ip6_dst_ops_template,
2811 	       sizeof(net->ipv6.ip6_dst_ops));
2812 
2813 	if (dst_entries_init(&net->ipv6.ip6_dst_ops) < 0)
2814 		goto out_ip6_dst_ops;
2815 
2816 	net->ipv6.ip6_null_entry = kmemdup(&ip6_null_entry_template,
2817 					   sizeof(*net->ipv6.ip6_null_entry),
2818 					   GFP_KERNEL);
2819 	if (!net->ipv6.ip6_null_entry)
2820 		goto out_ip6_dst_entries;
2821 	net->ipv6.ip6_null_entry->dst.path =
2822 		(struct dst_entry *)net->ipv6.ip6_null_entry;
2823 	net->ipv6.ip6_null_entry->dst.ops = &net->ipv6.ip6_dst_ops;
2824 	dst_init_metrics(&net->ipv6.ip6_null_entry->dst,
2825 			 ip6_template_metrics, true);
2826 
2827 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
2828 	net->ipv6.ip6_prohibit_entry = kmemdup(&ip6_prohibit_entry_template,
2829 					       sizeof(*net->ipv6.ip6_prohibit_entry),
2830 					       GFP_KERNEL);
2831 	if (!net->ipv6.ip6_prohibit_entry)
2832 		goto out_ip6_null_entry;
2833 	net->ipv6.ip6_prohibit_entry->dst.path =
2834 		(struct dst_entry *)net->ipv6.ip6_prohibit_entry;
2835 	net->ipv6.ip6_prohibit_entry->dst.ops = &net->ipv6.ip6_dst_ops;
2836 	dst_init_metrics(&net->ipv6.ip6_prohibit_entry->dst,
2837 			 ip6_template_metrics, true);
2838 
2839 	net->ipv6.ip6_blk_hole_entry = kmemdup(&ip6_blk_hole_entry_template,
2840 					       sizeof(*net->ipv6.ip6_blk_hole_entry),
2841 					       GFP_KERNEL);
2842 	if (!net->ipv6.ip6_blk_hole_entry)
2843 		goto out_ip6_prohibit_entry;
2844 	net->ipv6.ip6_blk_hole_entry->dst.path =
2845 		(struct dst_entry *)net->ipv6.ip6_blk_hole_entry;
2846 	net->ipv6.ip6_blk_hole_entry->dst.ops = &net->ipv6.ip6_dst_ops;
2847 	dst_init_metrics(&net->ipv6.ip6_blk_hole_entry->dst,
2848 			 ip6_template_metrics, true);
2849 #endif
2850 
2851 	net->ipv6.sysctl.flush_delay = 0;
2852 	net->ipv6.sysctl.ip6_rt_max_size = 4096;
2853 	net->ipv6.sysctl.ip6_rt_gc_min_interval = HZ / 2;
2854 	net->ipv6.sysctl.ip6_rt_gc_timeout = 60*HZ;
2855 	net->ipv6.sysctl.ip6_rt_gc_interval = 30*HZ;
2856 	net->ipv6.sysctl.ip6_rt_gc_elasticity = 9;
2857 	net->ipv6.sysctl.ip6_rt_mtu_expires = 10*60*HZ;
2858 	net->ipv6.sysctl.ip6_rt_min_advmss = IPV6_MIN_MTU - 20 - 40;
2859 
2860 #ifdef CONFIG_PROC_FS
2861 	proc_net_fops_create(net, "ipv6_route", 0, &ipv6_route_proc_fops);
2862 	proc_net_fops_create(net, "rt6_stats", S_IRUGO, &rt6_stats_seq_fops);
2863 #endif
2864 	net->ipv6.ip6_rt_gc_expire = 30*HZ;
2865 
2866 	ret = 0;
2867 out:
2868 	return ret;
2869 
2870 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
2871 out_ip6_prohibit_entry:
2872 	kfree(net->ipv6.ip6_prohibit_entry);
2873 out_ip6_null_entry:
2874 	kfree(net->ipv6.ip6_null_entry);
2875 #endif
2876 out_ip6_dst_entries:
2877 	dst_entries_destroy(&net->ipv6.ip6_dst_ops);
2878 out_ip6_dst_ops:
2879 	goto out;
2880 }
2881 
2882 static void __net_exit ip6_route_net_exit(struct net *net)
2883 {
2884 #ifdef CONFIG_PROC_FS
2885 	proc_net_remove(net, "ipv6_route");
2886 	proc_net_remove(net, "rt6_stats");
2887 #endif
2888 	kfree(net->ipv6.ip6_null_entry);
2889 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
2890 	kfree(net->ipv6.ip6_prohibit_entry);
2891 	kfree(net->ipv6.ip6_blk_hole_entry);
2892 #endif
2893 	dst_entries_destroy(&net->ipv6.ip6_dst_ops);
2894 }
2895 
2896 static struct pernet_operations ip6_route_net_ops = {
2897 	.init = ip6_route_net_init,
2898 	.exit = ip6_route_net_exit,
2899 };
2900 
2901 static struct notifier_block ip6_route_dev_notifier = {
2902 	.notifier_call = ip6_route_dev_notify,
2903 	.priority = 0,
2904 };
2905 
2906 int __init ip6_route_init(void)
2907 {
2908 	int ret;
2909 
2910 	ret = -ENOMEM;
2911 	ip6_dst_ops_template.kmem_cachep =
2912 		kmem_cache_create("ip6_dst_cache", sizeof(struct rt6_info), 0,
2913 				  SLAB_HWCACHE_ALIGN, NULL);
2914 	if (!ip6_dst_ops_template.kmem_cachep)
2915 		goto out;
2916 
2917 	ret = dst_entries_init(&ip6_dst_blackhole_ops);
2918 	if (ret)
2919 		goto out_kmem_cache;
2920 
2921 	ret = register_pernet_subsys(&ip6_route_net_ops);
2922 	if (ret)
2923 		goto out_dst_entries;
2924 
2925 	ip6_dst_blackhole_ops.kmem_cachep = ip6_dst_ops_template.kmem_cachep;
2926 
2927 	/* Registering of the loopback is done before this portion of code,
2928 	 * the loopback reference in rt6_info will not be taken, do it
2929 	 * manually for init_net */
2930 	init_net.ipv6.ip6_null_entry->dst.dev = init_net.loopback_dev;
2931 	init_net.ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
2932   #ifdef CONFIG_IPV6_MULTIPLE_TABLES
2933 	init_net.ipv6.ip6_prohibit_entry->dst.dev = init_net.loopback_dev;
2934 	init_net.ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
2935 	init_net.ipv6.ip6_blk_hole_entry->dst.dev = init_net.loopback_dev;
2936 	init_net.ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
2937   #endif
2938 	ret = fib6_init();
2939 	if (ret)
2940 		goto out_register_subsys;
2941 
2942 	ret = xfrm6_init();
2943 	if (ret)
2944 		goto out_fib6_init;
2945 
2946 	ret = fib6_rules_init();
2947 	if (ret)
2948 		goto xfrm6_init;
2949 
2950 	ret = -ENOBUFS;
2951 	if (__rtnl_register(PF_INET6, RTM_NEWROUTE, inet6_rtm_newroute, NULL, NULL) ||
2952 	    __rtnl_register(PF_INET6, RTM_DELROUTE, inet6_rtm_delroute, NULL, NULL) ||
2953 	    __rtnl_register(PF_INET6, RTM_GETROUTE, inet6_rtm_getroute, NULL, NULL))
2954 		goto fib6_rules_init;
2955 
2956 	ret = register_netdevice_notifier(&ip6_route_dev_notifier);
2957 	if (ret)
2958 		goto fib6_rules_init;
2959 
2960 out:
2961 	return ret;
2962 
2963 fib6_rules_init:
2964 	fib6_rules_cleanup();
2965 xfrm6_init:
2966 	xfrm6_fini();
2967 out_fib6_init:
2968 	fib6_gc_cleanup();
2969 out_register_subsys:
2970 	unregister_pernet_subsys(&ip6_route_net_ops);
2971 out_dst_entries:
2972 	dst_entries_destroy(&ip6_dst_blackhole_ops);
2973 out_kmem_cache:
2974 	kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);
2975 	goto out;
2976 }
2977 
2978 void ip6_route_cleanup(void)
2979 {
2980 	unregister_netdevice_notifier(&ip6_route_dev_notifier);
2981 	fib6_rules_cleanup();
2982 	xfrm6_fini();
2983 	fib6_gc_cleanup();
2984 	unregister_pernet_subsys(&ip6_route_net_ops);
2985 	dst_entries_destroy(&ip6_dst_blackhole_ops);
2986 	kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);
2987 }
2988