xref: /linux/net/ipv6/route.c (revision 9e8ba5f3ec35cba4fd8a8bebda548c4db2651e40)
1 /*
2  *	Linux INET6 implementation
3  *	FIB front-end.
4  *
5  *	Authors:
6  *	Pedro Roque		<roque@di.fc.ul.pt>
7  *
8  *	This program is free software; you can redistribute it and/or
9  *      modify it under the terms of the GNU General Public License
10  *      as published by the Free Software Foundation; either version
11  *      2 of the License, or (at your option) any later version.
12  */
13 
14 /*	Changes:
15  *
16  *	YOSHIFUJI Hideaki @USAGI
17  *		reworked default router selection.
18  *		- respect outgoing interface
19  *		- select from (probably) reachable routers (i.e.
20  *		routers in REACHABLE, STALE, DELAY or PROBE states).
21  *		- always select the same router if it is (probably)
22  *		reachable.  otherwise, round-robin the list.
23  *	Ville Nuorvala
24  *		Fixed routing subtrees.
25  */
26 
27 #include <linux/capability.h>
28 #include <linux/errno.h>
29 #include <linux/export.h>
30 #include <linux/types.h>
31 #include <linux/times.h>
32 #include <linux/socket.h>
33 #include <linux/sockios.h>
34 #include <linux/net.h>
35 #include <linux/route.h>
36 #include <linux/netdevice.h>
37 #include <linux/in6.h>
38 #include <linux/mroute6.h>
39 #include <linux/init.h>
40 #include <linux/if_arp.h>
41 #include <linux/proc_fs.h>
42 #include <linux/seq_file.h>
43 #include <linux/nsproxy.h>
44 #include <linux/slab.h>
45 #include <net/net_namespace.h>
46 #include <net/snmp.h>
47 #include <net/ipv6.h>
48 #include <net/ip6_fib.h>
49 #include <net/ip6_route.h>
50 #include <net/ndisc.h>
51 #include <net/addrconf.h>
52 #include <net/tcp.h>
53 #include <linux/rtnetlink.h>
54 #include <net/dst.h>
55 #include <net/xfrm.h>
56 #include <net/netevent.h>
57 #include <net/netlink.h>
58 
59 #include <asm/uaccess.h>
60 
61 #ifdef CONFIG_SYSCTL
62 #include <linux/sysctl.h>
63 #endif
64 
65 /* Set to 3 to get tracing. */
66 #define RT6_DEBUG 2
67 
68 #if RT6_DEBUG >= 3
69 #define RDBG(x) printk x
70 #define RT6_TRACE(x...) printk(KERN_DEBUG x)
71 #else
72 #define RDBG(x)
73 #define RT6_TRACE(x...) do { ; } while (0)
74 #endif
75 
76 static struct rt6_info *ip6_rt_copy(const struct rt6_info *ort,
77 				    const struct in6_addr *dest);
78 static struct dst_entry	*ip6_dst_check(struct dst_entry *dst, u32 cookie);
79 static unsigned int	 ip6_default_advmss(const struct dst_entry *dst);
80 static unsigned int	 ip6_mtu(const struct dst_entry *dst);
81 static struct dst_entry *ip6_negative_advice(struct dst_entry *);
82 static void		ip6_dst_destroy(struct dst_entry *);
83 static void		ip6_dst_ifdown(struct dst_entry *,
84 				       struct net_device *dev, int how);
85 static int		 ip6_dst_gc(struct dst_ops *ops);
86 
87 static int		ip6_pkt_discard(struct sk_buff *skb);
88 static int		ip6_pkt_discard_out(struct sk_buff *skb);
89 static void		ip6_link_failure(struct sk_buff *skb);
90 static void		ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu);
91 
92 #ifdef CONFIG_IPV6_ROUTE_INFO
93 static struct rt6_info *rt6_add_route_info(struct net *net,
94 					   const struct in6_addr *prefix, int prefixlen,
95 					   const struct in6_addr *gwaddr, int ifindex,
96 					   unsigned pref);
97 static struct rt6_info *rt6_get_route_info(struct net *net,
98 					   const struct in6_addr *prefix, int prefixlen,
99 					   const struct in6_addr *gwaddr, int ifindex);
100 #endif
101 
102 static u32 *ipv6_cow_metrics(struct dst_entry *dst, unsigned long old)
103 {
104 	struct rt6_info *rt = (struct rt6_info *) dst;
105 	struct inet_peer *peer;
106 	u32 *p = NULL;
107 
108 	if (!(rt->dst.flags & DST_HOST))
109 		return NULL;
110 
111 	if (!rt->rt6i_peer)
112 		rt6_bind_peer(rt, 1);
113 
114 	peer = rt->rt6i_peer;
115 	if (peer) {
116 		u32 *old_p = __DST_METRICS_PTR(old);
117 		unsigned long prev, new;
118 
119 		p = peer->metrics;
120 		if (inet_metrics_new(peer))
121 			memcpy(p, old_p, sizeof(u32) * RTAX_MAX);
122 
123 		new = (unsigned long) p;
124 		prev = cmpxchg(&dst->_metrics, old, new);
125 
126 		if (prev != old) {
127 			p = __DST_METRICS_PTR(prev);
128 			if (prev & DST_METRICS_READ_ONLY)
129 				p = NULL;
130 		}
131 	}
132 	return p;
133 }
134 
135 static struct neighbour *ip6_neigh_lookup(const struct dst_entry *dst, const void *daddr)
136 {
137 	return __neigh_lookup_errno(&nd_tbl, daddr, dst->dev);
138 }
139 
140 static struct dst_ops ip6_dst_ops_template = {
141 	.family			=	AF_INET6,
142 	.protocol		=	cpu_to_be16(ETH_P_IPV6),
143 	.gc			=	ip6_dst_gc,
144 	.gc_thresh		=	1024,
145 	.check			=	ip6_dst_check,
146 	.default_advmss		=	ip6_default_advmss,
147 	.mtu			=	ip6_mtu,
148 	.cow_metrics		=	ipv6_cow_metrics,
149 	.destroy		=	ip6_dst_destroy,
150 	.ifdown			=	ip6_dst_ifdown,
151 	.negative_advice	=	ip6_negative_advice,
152 	.link_failure		=	ip6_link_failure,
153 	.update_pmtu		=	ip6_rt_update_pmtu,
154 	.local_out		=	__ip6_local_out,
155 	.neigh_lookup		=	ip6_neigh_lookup,
156 };
157 
158 static unsigned int ip6_blackhole_mtu(const struct dst_entry *dst)
159 {
160 	unsigned int mtu = dst_metric_raw(dst, RTAX_MTU);
161 
162 	return mtu ? : dst->dev->mtu;
163 }
164 
165 static void ip6_rt_blackhole_update_pmtu(struct dst_entry *dst, u32 mtu)
166 {
167 }
168 
169 static u32 *ip6_rt_blackhole_cow_metrics(struct dst_entry *dst,
170 					 unsigned long old)
171 {
172 	return NULL;
173 }
174 
175 static struct dst_ops ip6_dst_blackhole_ops = {
176 	.family			=	AF_INET6,
177 	.protocol		=	cpu_to_be16(ETH_P_IPV6),
178 	.destroy		=	ip6_dst_destroy,
179 	.check			=	ip6_dst_check,
180 	.mtu			=	ip6_blackhole_mtu,
181 	.default_advmss		=	ip6_default_advmss,
182 	.update_pmtu		=	ip6_rt_blackhole_update_pmtu,
183 	.cow_metrics		=	ip6_rt_blackhole_cow_metrics,
184 	.neigh_lookup		=	ip6_neigh_lookup,
185 };
186 
187 static const u32 ip6_template_metrics[RTAX_MAX] = {
188 	[RTAX_HOPLIMIT - 1] = 255,
189 };
190 
191 static struct rt6_info ip6_null_entry_template = {
192 	.dst = {
193 		.__refcnt	= ATOMIC_INIT(1),
194 		.__use		= 1,
195 		.obsolete	= -1,
196 		.error		= -ENETUNREACH,
197 		.input		= ip6_pkt_discard,
198 		.output		= ip6_pkt_discard_out,
199 	},
200 	.rt6i_flags	= (RTF_REJECT | RTF_NONEXTHOP),
201 	.rt6i_protocol  = RTPROT_KERNEL,
202 	.rt6i_metric	= ~(u32) 0,
203 	.rt6i_ref	= ATOMIC_INIT(1),
204 };
205 
206 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
207 
208 static int ip6_pkt_prohibit(struct sk_buff *skb);
209 static int ip6_pkt_prohibit_out(struct sk_buff *skb);
210 
211 static struct rt6_info ip6_prohibit_entry_template = {
212 	.dst = {
213 		.__refcnt	= ATOMIC_INIT(1),
214 		.__use		= 1,
215 		.obsolete	= -1,
216 		.error		= -EACCES,
217 		.input		= ip6_pkt_prohibit,
218 		.output		= ip6_pkt_prohibit_out,
219 	},
220 	.rt6i_flags	= (RTF_REJECT | RTF_NONEXTHOP),
221 	.rt6i_protocol  = RTPROT_KERNEL,
222 	.rt6i_metric	= ~(u32) 0,
223 	.rt6i_ref	= ATOMIC_INIT(1),
224 };
225 
226 static struct rt6_info ip6_blk_hole_entry_template = {
227 	.dst = {
228 		.__refcnt	= ATOMIC_INIT(1),
229 		.__use		= 1,
230 		.obsolete	= -1,
231 		.error		= -EINVAL,
232 		.input		= dst_discard,
233 		.output		= dst_discard,
234 	},
235 	.rt6i_flags	= (RTF_REJECT | RTF_NONEXTHOP),
236 	.rt6i_protocol  = RTPROT_KERNEL,
237 	.rt6i_metric	= ~(u32) 0,
238 	.rt6i_ref	= ATOMIC_INIT(1),
239 };
240 
241 #endif
242 
243 /* allocate dst with ip6_dst_ops */
244 static inline struct rt6_info *ip6_dst_alloc(struct dst_ops *ops,
245 					     struct net_device *dev,
246 					     int flags)
247 {
248 	struct rt6_info *rt = dst_alloc(ops, dev, 0, 0, flags);
249 
250 	if (rt)
251 		memset(&rt->rt6i_table, 0,
252 		       sizeof(*rt) - sizeof(struct dst_entry));
253 
254 	return rt;
255 }
256 
257 static void ip6_dst_destroy(struct dst_entry *dst)
258 {
259 	struct rt6_info *rt = (struct rt6_info *)dst;
260 	struct inet6_dev *idev = rt->rt6i_idev;
261 	struct inet_peer *peer = rt->rt6i_peer;
262 
263 	if (!(rt->dst.flags & DST_HOST))
264 		dst_destroy_metrics_generic(dst);
265 
266 	if (idev) {
267 		rt->rt6i_idev = NULL;
268 		in6_dev_put(idev);
269 	}
270 	if (peer) {
271 		rt->rt6i_peer = NULL;
272 		inet_putpeer(peer);
273 	}
274 }
275 
276 static atomic_t __rt6_peer_genid = ATOMIC_INIT(0);
277 
278 static u32 rt6_peer_genid(void)
279 {
280 	return atomic_read(&__rt6_peer_genid);
281 }
282 
283 void rt6_bind_peer(struct rt6_info *rt, int create)
284 {
285 	struct inet_peer *peer;
286 
287 	peer = inet_getpeer_v6(&rt->rt6i_dst.addr, create);
288 	if (peer && cmpxchg(&rt->rt6i_peer, NULL, peer) != NULL)
289 		inet_putpeer(peer);
290 	else
291 		rt->rt6i_peer_genid = rt6_peer_genid();
292 }
293 
294 static void ip6_dst_ifdown(struct dst_entry *dst, struct net_device *dev,
295 			   int how)
296 {
297 	struct rt6_info *rt = (struct rt6_info *)dst;
298 	struct inet6_dev *idev = rt->rt6i_idev;
299 	struct net_device *loopback_dev =
300 		dev_net(dev)->loopback_dev;
301 
302 	if (dev != loopback_dev && idev && idev->dev == dev) {
303 		struct inet6_dev *loopback_idev =
304 			in6_dev_get(loopback_dev);
305 		if (loopback_idev) {
306 			rt->rt6i_idev = loopback_idev;
307 			in6_dev_put(idev);
308 		}
309 	}
310 }
311 
312 static __inline__ int rt6_check_expired(const struct rt6_info *rt)
313 {
314 	return (rt->rt6i_flags & RTF_EXPIRES) &&
315 		time_after(jiffies, rt->rt6i_expires);
316 }
317 
318 static inline int rt6_need_strict(const struct in6_addr *daddr)
319 {
320 	return ipv6_addr_type(daddr) &
321 		(IPV6_ADDR_MULTICAST | IPV6_ADDR_LINKLOCAL | IPV6_ADDR_LOOPBACK);
322 }
323 
324 /*
325  *	Route lookup. Any table->tb6_lock is implied.
326  */
327 
328 static inline struct rt6_info *rt6_device_match(struct net *net,
329 						    struct rt6_info *rt,
330 						    const struct in6_addr *saddr,
331 						    int oif,
332 						    int flags)
333 {
334 	struct rt6_info *local = NULL;
335 	struct rt6_info *sprt;
336 
337 	if (!oif && ipv6_addr_any(saddr))
338 		goto out;
339 
340 	for (sprt = rt; sprt; sprt = sprt->dst.rt6_next) {
341 		struct net_device *dev = sprt->rt6i_dev;
342 
343 		if (oif) {
344 			if (dev->ifindex == oif)
345 				return sprt;
346 			if (dev->flags & IFF_LOOPBACK) {
347 				if (!sprt->rt6i_idev ||
348 				    sprt->rt6i_idev->dev->ifindex != oif) {
349 					if (flags & RT6_LOOKUP_F_IFACE && oif)
350 						continue;
351 					if (local && (!oif ||
352 						      local->rt6i_idev->dev->ifindex == oif))
353 						continue;
354 				}
355 				local = sprt;
356 			}
357 		} else {
358 			if (ipv6_chk_addr(net, saddr, dev,
359 					  flags & RT6_LOOKUP_F_IFACE))
360 				return sprt;
361 		}
362 	}
363 
364 	if (oif) {
365 		if (local)
366 			return local;
367 
368 		if (flags & RT6_LOOKUP_F_IFACE)
369 			return net->ipv6.ip6_null_entry;
370 	}
371 out:
372 	return rt;
373 }
374 
375 #ifdef CONFIG_IPV6_ROUTER_PREF
376 static void rt6_probe(struct rt6_info *rt)
377 {
378 	struct neighbour *neigh;
379 	/*
380 	 * Okay, this does not seem to be appropriate
381 	 * for now, however, we need to check if it
382 	 * is really so; aka Router Reachability Probing.
383 	 *
384 	 * Router Reachability Probe MUST be rate-limited
385 	 * to no more than one per minute.
386 	 */
387 	rcu_read_lock();
388 	neigh = rt ? dst_get_neighbour_noref(&rt->dst) : NULL;
389 	if (!neigh || (neigh->nud_state & NUD_VALID))
390 		goto out;
391 	read_lock_bh(&neigh->lock);
392 	if (!(neigh->nud_state & NUD_VALID) &&
393 	    time_after(jiffies, neigh->updated + rt->rt6i_idev->cnf.rtr_probe_interval)) {
394 		struct in6_addr mcaddr;
395 		struct in6_addr *target;
396 
397 		neigh->updated = jiffies;
398 		read_unlock_bh(&neigh->lock);
399 
400 		target = (struct in6_addr *)&neigh->primary_key;
401 		addrconf_addr_solict_mult(target, &mcaddr);
402 		ndisc_send_ns(rt->rt6i_dev, NULL, target, &mcaddr, NULL);
403 	} else {
404 		read_unlock_bh(&neigh->lock);
405 	}
406 out:
407 	rcu_read_unlock();
408 }
409 #else
410 static inline void rt6_probe(struct rt6_info *rt)
411 {
412 }
413 #endif
414 
415 /*
416  * Default Router Selection (RFC 2461 6.3.6)
417  */
418 static inline int rt6_check_dev(struct rt6_info *rt, int oif)
419 {
420 	struct net_device *dev = rt->rt6i_dev;
421 	if (!oif || dev->ifindex == oif)
422 		return 2;
423 	if ((dev->flags & IFF_LOOPBACK) &&
424 	    rt->rt6i_idev && rt->rt6i_idev->dev->ifindex == oif)
425 		return 1;
426 	return 0;
427 }
428 
429 static inline int rt6_check_neigh(struct rt6_info *rt)
430 {
431 	struct neighbour *neigh;
432 	int m;
433 
434 	rcu_read_lock();
435 	neigh = dst_get_neighbour_noref(&rt->dst);
436 	if (rt->rt6i_flags & RTF_NONEXTHOP ||
437 	    !(rt->rt6i_flags & RTF_GATEWAY))
438 		m = 1;
439 	else if (neigh) {
440 		read_lock_bh(&neigh->lock);
441 		if (neigh->nud_state & NUD_VALID)
442 			m = 2;
443 #ifdef CONFIG_IPV6_ROUTER_PREF
444 		else if (neigh->nud_state & NUD_FAILED)
445 			m = 0;
446 #endif
447 		else
448 			m = 1;
449 		read_unlock_bh(&neigh->lock);
450 	} else
451 		m = 0;
452 	rcu_read_unlock();
453 	return m;
454 }
455 
456 static int rt6_score_route(struct rt6_info *rt, int oif,
457 			   int strict)
458 {
459 	int m, n;
460 
461 	m = rt6_check_dev(rt, oif);
462 	if (!m && (strict & RT6_LOOKUP_F_IFACE))
463 		return -1;
464 #ifdef CONFIG_IPV6_ROUTER_PREF
465 	m |= IPV6_DECODE_PREF(IPV6_EXTRACT_PREF(rt->rt6i_flags)) << 2;
466 #endif
467 	n = rt6_check_neigh(rt);
468 	if (!n && (strict & RT6_LOOKUP_F_REACHABLE))
469 		return -1;
470 	return m;
471 }
472 
473 static struct rt6_info *find_match(struct rt6_info *rt, int oif, int strict,
474 				   int *mpri, struct rt6_info *match)
475 {
476 	int m;
477 
478 	if (rt6_check_expired(rt))
479 		goto out;
480 
481 	m = rt6_score_route(rt, oif, strict);
482 	if (m < 0)
483 		goto out;
484 
485 	if (m > *mpri) {
486 		if (strict & RT6_LOOKUP_F_REACHABLE)
487 			rt6_probe(match);
488 		*mpri = m;
489 		match = rt;
490 	} else if (strict & RT6_LOOKUP_F_REACHABLE) {
491 		rt6_probe(rt);
492 	}
493 
494 out:
495 	return match;
496 }
497 
498 static struct rt6_info *find_rr_leaf(struct fib6_node *fn,
499 				     struct rt6_info *rr_head,
500 				     u32 metric, int oif, int strict)
501 {
502 	struct rt6_info *rt, *match;
503 	int mpri = -1;
504 
505 	match = NULL;
506 	for (rt = rr_head; rt && rt->rt6i_metric == metric;
507 	     rt = rt->dst.rt6_next)
508 		match = find_match(rt, oif, strict, &mpri, match);
509 	for (rt = fn->leaf; rt && rt != rr_head && rt->rt6i_metric == metric;
510 	     rt = rt->dst.rt6_next)
511 		match = find_match(rt, oif, strict, &mpri, match);
512 
513 	return match;
514 }
515 
516 static struct rt6_info *rt6_select(struct fib6_node *fn, int oif, int strict)
517 {
518 	struct rt6_info *match, *rt0;
519 	struct net *net;
520 
521 	RT6_TRACE("%s(fn->leaf=%p, oif=%d)\n",
522 		  __func__, fn->leaf, oif);
523 
524 	rt0 = fn->rr_ptr;
525 	if (!rt0)
526 		fn->rr_ptr = rt0 = fn->leaf;
527 
528 	match = find_rr_leaf(fn, rt0, rt0->rt6i_metric, oif, strict);
529 
530 	if (!match &&
531 	    (strict & RT6_LOOKUP_F_REACHABLE)) {
532 		struct rt6_info *next = rt0->dst.rt6_next;
533 
534 		/* no entries matched; do round-robin */
535 		if (!next || next->rt6i_metric != rt0->rt6i_metric)
536 			next = fn->leaf;
537 
538 		if (next != rt0)
539 			fn->rr_ptr = next;
540 	}
541 
542 	RT6_TRACE("%s() => %p\n",
543 		  __func__, match);
544 
545 	net = dev_net(rt0->rt6i_dev);
546 	return match ? match : net->ipv6.ip6_null_entry;
547 }
548 
549 #ifdef CONFIG_IPV6_ROUTE_INFO
550 int rt6_route_rcv(struct net_device *dev, u8 *opt, int len,
551 		  const struct in6_addr *gwaddr)
552 {
553 	struct net *net = dev_net(dev);
554 	struct route_info *rinfo = (struct route_info *) opt;
555 	struct in6_addr prefix_buf, *prefix;
556 	unsigned int pref;
557 	unsigned long lifetime;
558 	struct rt6_info *rt;
559 
560 	if (len < sizeof(struct route_info)) {
561 		return -EINVAL;
562 	}
563 
564 	/* Sanity check for prefix_len and length */
565 	if (rinfo->length > 3) {
566 		return -EINVAL;
567 	} else if (rinfo->prefix_len > 128) {
568 		return -EINVAL;
569 	} else if (rinfo->prefix_len > 64) {
570 		if (rinfo->length < 2) {
571 			return -EINVAL;
572 		}
573 	} else if (rinfo->prefix_len > 0) {
574 		if (rinfo->length < 1) {
575 			return -EINVAL;
576 		}
577 	}
578 
579 	pref = rinfo->route_pref;
580 	if (pref == ICMPV6_ROUTER_PREF_INVALID)
581 		return -EINVAL;
582 
583 	lifetime = addrconf_timeout_fixup(ntohl(rinfo->lifetime), HZ);
584 
585 	if (rinfo->length == 3)
586 		prefix = (struct in6_addr *)rinfo->prefix;
587 	else {
588 		/* this function is safe */
589 		ipv6_addr_prefix(&prefix_buf,
590 				 (struct in6_addr *)rinfo->prefix,
591 				 rinfo->prefix_len);
592 		prefix = &prefix_buf;
593 	}
594 
595 	rt = rt6_get_route_info(net, prefix, rinfo->prefix_len, gwaddr,
596 				dev->ifindex);
597 
598 	if (rt && !lifetime) {
599 		ip6_del_rt(rt);
600 		rt = NULL;
601 	}
602 
603 	if (!rt && lifetime)
604 		rt = rt6_add_route_info(net, prefix, rinfo->prefix_len, gwaddr, dev->ifindex,
605 					pref);
606 	else if (rt)
607 		rt->rt6i_flags = RTF_ROUTEINFO |
608 				 (rt->rt6i_flags & ~RTF_PREF_MASK) | RTF_PREF(pref);
609 
610 	if (rt) {
611 		if (!addrconf_finite_timeout(lifetime)) {
612 			rt->rt6i_flags &= ~RTF_EXPIRES;
613 		} else {
614 			rt->rt6i_expires = jiffies + HZ * lifetime;
615 			rt->rt6i_flags |= RTF_EXPIRES;
616 		}
617 		dst_release(&rt->dst);
618 	}
619 	return 0;
620 }
621 #endif
622 
623 #define BACKTRACK(__net, saddr)			\
624 do { \
625 	if (rt == __net->ipv6.ip6_null_entry) {	\
626 		struct fib6_node *pn; \
627 		while (1) { \
628 			if (fn->fn_flags & RTN_TL_ROOT) \
629 				goto out; \
630 			pn = fn->parent; \
631 			if (FIB6_SUBTREE(pn) && FIB6_SUBTREE(pn) != fn) \
632 				fn = fib6_lookup(FIB6_SUBTREE(pn), NULL, saddr); \
633 			else \
634 				fn = pn; \
635 			if (fn->fn_flags & RTN_RTINFO) \
636 				goto restart; \
637 		} \
638 	} \
639 } while (0)
640 
641 static struct rt6_info *ip6_pol_route_lookup(struct net *net,
642 					     struct fib6_table *table,
643 					     struct flowi6 *fl6, int flags)
644 {
645 	struct fib6_node *fn;
646 	struct rt6_info *rt;
647 
648 	read_lock_bh(&table->tb6_lock);
649 	fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
650 restart:
651 	rt = fn->leaf;
652 	rt = rt6_device_match(net, rt, &fl6->saddr, fl6->flowi6_oif, flags);
653 	BACKTRACK(net, &fl6->saddr);
654 out:
655 	dst_use(&rt->dst, jiffies);
656 	read_unlock_bh(&table->tb6_lock);
657 	return rt;
658 
659 }
660 
661 struct rt6_info *rt6_lookup(struct net *net, const struct in6_addr *daddr,
662 			    const struct in6_addr *saddr, int oif, int strict)
663 {
664 	struct flowi6 fl6 = {
665 		.flowi6_oif = oif,
666 		.daddr = *daddr,
667 	};
668 	struct dst_entry *dst;
669 	int flags = strict ? RT6_LOOKUP_F_IFACE : 0;
670 
671 	if (saddr) {
672 		memcpy(&fl6.saddr, saddr, sizeof(*saddr));
673 		flags |= RT6_LOOKUP_F_HAS_SADDR;
674 	}
675 
676 	dst = fib6_rule_lookup(net, &fl6, flags, ip6_pol_route_lookup);
677 	if (dst->error == 0)
678 		return (struct rt6_info *) dst;
679 
680 	dst_release(dst);
681 
682 	return NULL;
683 }
684 
685 EXPORT_SYMBOL(rt6_lookup);
686 
687 /* ip6_ins_rt is called with FREE table->tb6_lock.
688    It takes new route entry, the addition fails by any reason the
689    route is freed. In any case, if caller does not hold it, it may
690    be destroyed.
691  */
692 
693 static int __ip6_ins_rt(struct rt6_info *rt, struct nl_info *info)
694 {
695 	int err;
696 	struct fib6_table *table;
697 
698 	table = rt->rt6i_table;
699 	write_lock_bh(&table->tb6_lock);
700 	err = fib6_add(&table->tb6_root, rt, info);
701 	write_unlock_bh(&table->tb6_lock);
702 
703 	return err;
704 }
705 
706 int ip6_ins_rt(struct rt6_info *rt)
707 {
708 	struct nl_info info = {
709 		.nl_net = dev_net(rt->rt6i_dev),
710 	};
711 	return __ip6_ins_rt(rt, &info);
712 }
713 
714 static struct rt6_info *rt6_alloc_cow(const struct rt6_info *ort,
715 				      const struct in6_addr *daddr,
716 				      const struct in6_addr *saddr)
717 {
718 	struct rt6_info *rt;
719 
720 	/*
721 	 *	Clone the route.
722 	 */
723 
724 	rt = ip6_rt_copy(ort, daddr);
725 
726 	if (rt) {
727 		struct neighbour *neigh;
728 		int attempts = !in_softirq();
729 
730 		if (!(rt->rt6i_flags & RTF_GATEWAY)) {
731 			if (ort->rt6i_dst.plen != 128 &&
732 			    ipv6_addr_equal(&ort->rt6i_dst.addr, daddr))
733 				rt->rt6i_flags |= RTF_ANYCAST;
734 			rt->rt6i_gateway = *daddr;
735 		}
736 
737 		rt->rt6i_flags |= RTF_CACHE;
738 
739 #ifdef CONFIG_IPV6_SUBTREES
740 		if (rt->rt6i_src.plen && saddr) {
741 			rt->rt6i_src.addr = *saddr;
742 			rt->rt6i_src.plen = 128;
743 		}
744 #endif
745 
746 	retry:
747 		neigh = __neigh_lookup_errno(&nd_tbl, &rt->rt6i_gateway,
748 					     rt->rt6i_dev);
749 		if (IS_ERR(neigh)) {
750 			struct net *net = dev_net(rt->rt6i_dev);
751 			int saved_rt_min_interval =
752 				net->ipv6.sysctl.ip6_rt_gc_min_interval;
753 			int saved_rt_elasticity =
754 				net->ipv6.sysctl.ip6_rt_gc_elasticity;
755 
756 			if (attempts-- > 0) {
757 				net->ipv6.sysctl.ip6_rt_gc_elasticity = 1;
758 				net->ipv6.sysctl.ip6_rt_gc_min_interval = 0;
759 
760 				ip6_dst_gc(&net->ipv6.ip6_dst_ops);
761 
762 				net->ipv6.sysctl.ip6_rt_gc_elasticity =
763 					saved_rt_elasticity;
764 				net->ipv6.sysctl.ip6_rt_gc_min_interval =
765 					saved_rt_min_interval;
766 				goto retry;
767 			}
768 
769 			if (net_ratelimit())
770 				printk(KERN_WARNING
771 				       "ipv6: Neighbour table overflow.\n");
772 			dst_free(&rt->dst);
773 			return NULL;
774 		}
775 		dst_set_neighbour(&rt->dst, neigh);
776 
777 	}
778 
779 	return rt;
780 }
781 
782 static struct rt6_info *rt6_alloc_clone(struct rt6_info *ort,
783 					const struct in6_addr *daddr)
784 {
785 	struct rt6_info *rt = ip6_rt_copy(ort, daddr);
786 
787 	if (rt) {
788 		rt->rt6i_flags |= RTF_CACHE;
789 		dst_set_neighbour(&rt->dst, neigh_clone(dst_get_neighbour_noref_raw(&ort->dst)));
790 	}
791 	return rt;
792 }
793 
794 static struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table, int oif,
795 				      struct flowi6 *fl6, int flags)
796 {
797 	struct fib6_node *fn;
798 	struct rt6_info *rt, *nrt;
799 	int strict = 0;
800 	int attempts = 3;
801 	int err;
802 	int reachable = net->ipv6.devconf_all->forwarding ? 0 : RT6_LOOKUP_F_REACHABLE;
803 
804 	strict |= flags & RT6_LOOKUP_F_IFACE;
805 
806 relookup:
807 	read_lock_bh(&table->tb6_lock);
808 
809 restart_2:
810 	fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
811 
812 restart:
813 	rt = rt6_select(fn, oif, strict | reachable);
814 
815 	BACKTRACK(net, &fl6->saddr);
816 	if (rt == net->ipv6.ip6_null_entry ||
817 	    rt->rt6i_flags & RTF_CACHE)
818 		goto out;
819 
820 	dst_hold(&rt->dst);
821 	read_unlock_bh(&table->tb6_lock);
822 
823 	if (!dst_get_neighbour_noref_raw(&rt->dst) && !(rt->rt6i_flags & RTF_NONEXTHOP))
824 		nrt = rt6_alloc_cow(rt, &fl6->daddr, &fl6->saddr);
825 	else if (!(rt->dst.flags & DST_HOST))
826 		nrt = rt6_alloc_clone(rt, &fl6->daddr);
827 	else
828 		goto out2;
829 
830 	dst_release(&rt->dst);
831 	rt = nrt ? : net->ipv6.ip6_null_entry;
832 
833 	dst_hold(&rt->dst);
834 	if (nrt) {
835 		err = ip6_ins_rt(nrt);
836 		if (!err)
837 			goto out2;
838 	}
839 
840 	if (--attempts <= 0)
841 		goto out2;
842 
843 	/*
844 	 * Race condition! In the gap, when table->tb6_lock was
845 	 * released someone could insert this route.  Relookup.
846 	 */
847 	dst_release(&rt->dst);
848 	goto relookup;
849 
850 out:
851 	if (reachable) {
852 		reachable = 0;
853 		goto restart_2;
854 	}
855 	dst_hold(&rt->dst);
856 	read_unlock_bh(&table->tb6_lock);
857 out2:
858 	rt->dst.lastuse = jiffies;
859 	rt->dst.__use++;
860 
861 	return rt;
862 }
863 
864 static struct rt6_info *ip6_pol_route_input(struct net *net, struct fib6_table *table,
865 					    struct flowi6 *fl6, int flags)
866 {
867 	return ip6_pol_route(net, table, fl6->flowi6_iif, fl6, flags);
868 }
869 
870 void ip6_route_input(struct sk_buff *skb)
871 {
872 	const struct ipv6hdr *iph = ipv6_hdr(skb);
873 	struct net *net = dev_net(skb->dev);
874 	int flags = RT6_LOOKUP_F_HAS_SADDR;
875 	struct flowi6 fl6 = {
876 		.flowi6_iif = skb->dev->ifindex,
877 		.daddr = iph->daddr,
878 		.saddr = iph->saddr,
879 		.flowlabel = (* (__be32 *) iph) & IPV6_FLOWINFO_MASK,
880 		.flowi6_mark = skb->mark,
881 		.flowi6_proto = iph->nexthdr,
882 	};
883 
884 	if (rt6_need_strict(&iph->daddr) && skb->dev->type != ARPHRD_PIMREG)
885 		flags |= RT6_LOOKUP_F_IFACE;
886 
887 	skb_dst_set(skb, fib6_rule_lookup(net, &fl6, flags, ip6_pol_route_input));
888 }
889 
890 static struct rt6_info *ip6_pol_route_output(struct net *net, struct fib6_table *table,
891 					     struct flowi6 *fl6, int flags)
892 {
893 	return ip6_pol_route(net, table, fl6->flowi6_oif, fl6, flags);
894 }
895 
896 struct dst_entry * ip6_route_output(struct net *net, const struct sock *sk,
897 				    struct flowi6 *fl6)
898 {
899 	int flags = 0;
900 
901 	if ((sk && sk->sk_bound_dev_if) || rt6_need_strict(&fl6->daddr))
902 		flags |= RT6_LOOKUP_F_IFACE;
903 
904 	if (!ipv6_addr_any(&fl6->saddr))
905 		flags |= RT6_LOOKUP_F_HAS_SADDR;
906 	else if (sk)
907 		flags |= rt6_srcprefs2flags(inet6_sk(sk)->srcprefs);
908 
909 	return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_output);
910 }
911 
912 EXPORT_SYMBOL(ip6_route_output);
913 
914 struct dst_entry *ip6_blackhole_route(struct net *net, struct dst_entry *dst_orig)
915 {
916 	struct rt6_info *rt, *ort = (struct rt6_info *) dst_orig;
917 	struct dst_entry *new = NULL;
918 
919 	rt = dst_alloc(&ip6_dst_blackhole_ops, ort->dst.dev, 1, 0, 0);
920 	if (rt) {
921 		memset(&rt->rt6i_table, 0, sizeof(*rt) - sizeof(struct dst_entry));
922 
923 		new = &rt->dst;
924 
925 		new->__use = 1;
926 		new->input = dst_discard;
927 		new->output = dst_discard;
928 
929 		if (dst_metrics_read_only(&ort->dst))
930 			new->_metrics = ort->dst._metrics;
931 		else
932 			dst_copy_metrics(new, &ort->dst);
933 		rt->rt6i_idev = ort->rt6i_idev;
934 		if (rt->rt6i_idev)
935 			in6_dev_hold(rt->rt6i_idev);
936 		rt->rt6i_expires = 0;
937 
938 		rt->rt6i_gateway = ort->rt6i_gateway;
939 		rt->rt6i_flags = ort->rt6i_flags & ~RTF_EXPIRES;
940 		rt->rt6i_metric = 0;
941 
942 		memcpy(&rt->rt6i_dst, &ort->rt6i_dst, sizeof(struct rt6key));
943 #ifdef CONFIG_IPV6_SUBTREES
944 		memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
945 #endif
946 
947 		dst_free(new);
948 	}
949 
950 	dst_release(dst_orig);
951 	return new ? new : ERR_PTR(-ENOMEM);
952 }
953 
954 /*
955  *	Destination cache support functions
956  */
957 
958 static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie)
959 {
960 	struct rt6_info *rt;
961 
962 	rt = (struct rt6_info *) dst;
963 
964 	if (rt->rt6i_node && (rt->rt6i_node->fn_sernum == cookie)) {
965 		if (rt->rt6i_peer_genid != rt6_peer_genid()) {
966 			if (!rt->rt6i_peer)
967 				rt6_bind_peer(rt, 0);
968 			rt->rt6i_peer_genid = rt6_peer_genid();
969 		}
970 		return dst;
971 	}
972 	return NULL;
973 }
974 
975 static struct dst_entry *ip6_negative_advice(struct dst_entry *dst)
976 {
977 	struct rt6_info *rt = (struct rt6_info *) dst;
978 
979 	if (rt) {
980 		if (rt->rt6i_flags & RTF_CACHE) {
981 			if (rt6_check_expired(rt)) {
982 				ip6_del_rt(rt);
983 				dst = NULL;
984 			}
985 		} else {
986 			dst_release(dst);
987 			dst = NULL;
988 		}
989 	}
990 	return dst;
991 }
992 
993 static void ip6_link_failure(struct sk_buff *skb)
994 {
995 	struct rt6_info *rt;
996 
997 	icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_ADDR_UNREACH, 0);
998 
999 	rt = (struct rt6_info *) skb_dst(skb);
1000 	if (rt) {
1001 		if (rt->rt6i_flags & RTF_CACHE) {
1002 			dst_set_expires(&rt->dst, 0);
1003 			rt->rt6i_flags |= RTF_EXPIRES;
1004 		} else if (rt->rt6i_node && (rt->rt6i_flags & RTF_DEFAULT))
1005 			rt->rt6i_node->fn_sernum = -1;
1006 	}
1007 }
1008 
1009 static void ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu)
1010 {
1011 	struct rt6_info *rt6 = (struct rt6_info*)dst;
1012 
1013 	if (mtu < dst_mtu(dst) && rt6->rt6i_dst.plen == 128) {
1014 		rt6->rt6i_flags |= RTF_MODIFIED;
1015 		if (mtu < IPV6_MIN_MTU) {
1016 			u32 features = dst_metric(dst, RTAX_FEATURES);
1017 			mtu = IPV6_MIN_MTU;
1018 			features |= RTAX_FEATURE_ALLFRAG;
1019 			dst_metric_set(dst, RTAX_FEATURES, features);
1020 		}
1021 		dst_metric_set(dst, RTAX_MTU, mtu);
1022 	}
1023 }
1024 
1025 static unsigned int ip6_default_advmss(const struct dst_entry *dst)
1026 {
1027 	struct net_device *dev = dst->dev;
1028 	unsigned int mtu = dst_mtu(dst);
1029 	struct net *net = dev_net(dev);
1030 
1031 	mtu -= sizeof(struct ipv6hdr) + sizeof(struct tcphdr);
1032 
1033 	if (mtu < net->ipv6.sysctl.ip6_rt_min_advmss)
1034 		mtu = net->ipv6.sysctl.ip6_rt_min_advmss;
1035 
1036 	/*
1037 	 * Maximal non-jumbo IPv6 payload is IPV6_MAXPLEN and
1038 	 * corresponding MSS is IPV6_MAXPLEN - tcp_header_size.
1039 	 * IPV6_MAXPLEN is also valid and means: "any MSS,
1040 	 * rely only on pmtu discovery"
1041 	 */
1042 	if (mtu > IPV6_MAXPLEN - sizeof(struct tcphdr))
1043 		mtu = IPV6_MAXPLEN;
1044 	return mtu;
1045 }
1046 
1047 static unsigned int ip6_mtu(const struct dst_entry *dst)
1048 {
1049 	struct inet6_dev *idev;
1050 	unsigned int mtu = dst_metric_raw(dst, RTAX_MTU);
1051 
1052 	if (mtu)
1053 		return mtu;
1054 
1055 	mtu = IPV6_MIN_MTU;
1056 
1057 	rcu_read_lock();
1058 	idev = __in6_dev_get(dst->dev);
1059 	if (idev)
1060 		mtu = idev->cnf.mtu6;
1061 	rcu_read_unlock();
1062 
1063 	return mtu;
1064 }
1065 
1066 static struct dst_entry *icmp6_dst_gc_list;
1067 static DEFINE_SPINLOCK(icmp6_dst_lock);
1068 
1069 struct dst_entry *icmp6_dst_alloc(struct net_device *dev,
1070 				  struct neighbour *neigh,
1071 				  struct flowi6 *fl6)
1072 {
1073 	struct dst_entry *dst;
1074 	struct rt6_info *rt;
1075 	struct inet6_dev *idev = in6_dev_get(dev);
1076 	struct net *net = dev_net(dev);
1077 
1078 	if (unlikely(!idev))
1079 		return NULL;
1080 
1081 	rt = ip6_dst_alloc(&net->ipv6.ip6_dst_ops, dev, 0);
1082 	if (unlikely(!rt)) {
1083 		in6_dev_put(idev);
1084 		dst = ERR_PTR(-ENOMEM);
1085 		goto out;
1086 	}
1087 
1088 	if (neigh)
1089 		neigh_hold(neigh);
1090 	else {
1091 		neigh = __neigh_lookup_errno(&nd_tbl, &fl6->daddr, dev);
1092 		if (IS_ERR(neigh)) {
1093 			dst_free(&rt->dst);
1094 			return ERR_CAST(neigh);
1095 		}
1096 	}
1097 
1098 	rt->dst.flags |= DST_HOST;
1099 	rt->dst.output  = ip6_output;
1100 	dst_set_neighbour(&rt->dst, neigh);
1101 	atomic_set(&rt->dst.__refcnt, 1);
1102 	rt->rt6i_dst.addr = fl6->daddr;
1103 	rt->rt6i_dst.plen = 128;
1104 	rt->rt6i_idev     = idev;
1105 	dst_metric_set(&rt->dst, RTAX_HOPLIMIT, 255);
1106 
1107 	spin_lock_bh(&icmp6_dst_lock);
1108 	rt->dst.next = icmp6_dst_gc_list;
1109 	icmp6_dst_gc_list = &rt->dst;
1110 	spin_unlock_bh(&icmp6_dst_lock);
1111 
1112 	fib6_force_start_gc(net);
1113 
1114 	dst = xfrm_lookup(net, &rt->dst, flowi6_to_flowi(fl6), NULL, 0);
1115 
1116 out:
1117 	return dst;
1118 }
1119 
1120 int icmp6_dst_gc(void)
1121 {
1122 	struct dst_entry *dst, **pprev;
1123 	int more = 0;
1124 
1125 	spin_lock_bh(&icmp6_dst_lock);
1126 	pprev = &icmp6_dst_gc_list;
1127 
1128 	while ((dst = *pprev) != NULL) {
1129 		if (!atomic_read(&dst->__refcnt)) {
1130 			*pprev = dst->next;
1131 			dst_free(dst);
1132 		} else {
1133 			pprev = &dst->next;
1134 			++more;
1135 		}
1136 	}
1137 
1138 	spin_unlock_bh(&icmp6_dst_lock);
1139 
1140 	return more;
1141 }
1142 
1143 static void icmp6_clean_all(int (*func)(struct rt6_info *rt, void *arg),
1144 			    void *arg)
1145 {
1146 	struct dst_entry *dst, **pprev;
1147 
1148 	spin_lock_bh(&icmp6_dst_lock);
1149 	pprev = &icmp6_dst_gc_list;
1150 	while ((dst = *pprev) != NULL) {
1151 		struct rt6_info *rt = (struct rt6_info *) dst;
1152 		if (func(rt, arg)) {
1153 			*pprev = dst->next;
1154 			dst_free(dst);
1155 		} else {
1156 			pprev = &dst->next;
1157 		}
1158 	}
1159 	spin_unlock_bh(&icmp6_dst_lock);
1160 }
1161 
1162 static int ip6_dst_gc(struct dst_ops *ops)
1163 {
1164 	unsigned long now = jiffies;
1165 	struct net *net = container_of(ops, struct net, ipv6.ip6_dst_ops);
1166 	int rt_min_interval = net->ipv6.sysctl.ip6_rt_gc_min_interval;
1167 	int rt_max_size = net->ipv6.sysctl.ip6_rt_max_size;
1168 	int rt_elasticity = net->ipv6.sysctl.ip6_rt_gc_elasticity;
1169 	int rt_gc_timeout = net->ipv6.sysctl.ip6_rt_gc_timeout;
1170 	unsigned long rt_last_gc = net->ipv6.ip6_rt_last_gc;
1171 	int entries;
1172 
1173 	entries = dst_entries_get_fast(ops);
1174 	if (time_after(rt_last_gc + rt_min_interval, now) &&
1175 	    entries <= rt_max_size)
1176 		goto out;
1177 
1178 	net->ipv6.ip6_rt_gc_expire++;
1179 	fib6_run_gc(net->ipv6.ip6_rt_gc_expire, net);
1180 	net->ipv6.ip6_rt_last_gc = now;
1181 	entries = dst_entries_get_slow(ops);
1182 	if (entries < ops->gc_thresh)
1183 		net->ipv6.ip6_rt_gc_expire = rt_gc_timeout>>1;
1184 out:
1185 	net->ipv6.ip6_rt_gc_expire -= net->ipv6.ip6_rt_gc_expire>>rt_elasticity;
1186 	return entries > rt_max_size;
1187 }
1188 
1189 /* Clean host part of a prefix. Not necessary in radix tree,
1190    but results in cleaner routing tables.
1191 
1192    Remove it only when all the things will work!
1193  */
1194 
1195 int ip6_dst_hoplimit(struct dst_entry *dst)
1196 {
1197 	int hoplimit = dst_metric_raw(dst, RTAX_HOPLIMIT);
1198 	if (hoplimit == 0) {
1199 		struct net_device *dev = dst->dev;
1200 		struct inet6_dev *idev;
1201 
1202 		rcu_read_lock();
1203 		idev = __in6_dev_get(dev);
1204 		if (idev)
1205 			hoplimit = idev->cnf.hop_limit;
1206 		else
1207 			hoplimit = dev_net(dev)->ipv6.devconf_all->hop_limit;
1208 		rcu_read_unlock();
1209 	}
1210 	return hoplimit;
1211 }
1212 EXPORT_SYMBOL(ip6_dst_hoplimit);
1213 
1214 /*
1215  *
1216  */
1217 
1218 int ip6_route_add(struct fib6_config *cfg)
1219 {
1220 	int err;
1221 	struct net *net = cfg->fc_nlinfo.nl_net;
1222 	struct rt6_info *rt = NULL;
1223 	struct net_device *dev = NULL;
1224 	struct inet6_dev *idev = NULL;
1225 	struct fib6_table *table;
1226 	int addr_type;
1227 
1228 	if (cfg->fc_dst_len > 128 || cfg->fc_src_len > 128)
1229 		return -EINVAL;
1230 #ifndef CONFIG_IPV6_SUBTREES
1231 	if (cfg->fc_src_len)
1232 		return -EINVAL;
1233 #endif
1234 	if (cfg->fc_ifindex) {
1235 		err = -ENODEV;
1236 		dev = dev_get_by_index(net, cfg->fc_ifindex);
1237 		if (!dev)
1238 			goto out;
1239 		idev = in6_dev_get(dev);
1240 		if (!idev)
1241 			goto out;
1242 	}
1243 
1244 	if (cfg->fc_metric == 0)
1245 		cfg->fc_metric = IP6_RT_PRIO_USER;
1246 
1247 	err = -ENOBUFS;
1248 	if (cfg->fc_nlinfo.nlh &&
1249 	    !(cfg->fc_nlinfo.nlh->nlmsg_flags & NLM_F_CREATE)) {
1250 		table = fib6_get_table(net, cfg->fc_table);
1251 		if (!table) {
1252 			printk(KERN_WARNING "IPv6: NLM_F_CREATE should be specified when creating new route\n");
1253 			table = fib6_new_table(net, cfg->fc_table);
1254 		}
1255 	} else {
1256 		table = fib6_new_table(net, cfg->fc_table);
1257 	}
1258 
1259 	if (!table)
1260 		goto out;
1261 
1262 	rt = ip6_dst_alloc(&net->ipv6.ip6_dst_ops, NULL, DST_NOCOUNT);
1263 
1264 	if (!rt) {
1265 		err = -ENOMEM;
1266 		goto out;
1267 	}
1268 
1269 	rt->dst.obsolete = -1;
1270 	rt->rt6i_expires = (cfg->fc_flags & RTF_EXPIRES) ?
1271 				jiffies + clock_t_to_jiffies(cfg->fc_expires) :
1272 				0;
1273 
1274 	if (cfg->fc_protocol == RTPROT_UNSPEC)
1275 		cfg->fc_protocol = RTPROT_BOOT;
1276 	rt->rt6i_protocol = cfg->fc_protocol;
1277 
1278 	addr_type = ipv6_addr_type(&cfg->fc_dst);
1279 
1280 	if (addr_type & IPV6_ADDR_MULTICAST)
1281 		rt->dst.input = ip6_mc_input;
1282 	else if (cfg->fc_flags & RTF_LOCAL)
1283 		rt->dst.input = ip6_input;
1284 	else
1285 		rt->dst.input = ip6_forward;
1286 
1287 	rt->dst.output = ip6_output;
1288 
1289 	ipv6_addr_prefix(&rt->rt6i_dst.addr, &cfg->fc_dst, cfg->fc_dst_len);
1290 	rt->rt6i_dst.plen = cfg->fc_dst_len;
1291 	if (rt->rt6i_dst.plen == 128)
1292 	       rt->dst.flags |= DST_HOST;
1293 
1294 	if (!(rt->dst.flags & DST_HOST) && cfg->fc_mx) {
1295 		u32 *metrics = kzalloc(sizeof(u32) * RTAX_MAX, GFP_KERNEL);
1296 		if (!metrics) {
1297 			err = -ENOMEM;
1298 			goto out;
1299 		}
1300 		dst_init_metrics(&rt->dst, metrics, 0);
1301 	}
1302 #ifdef CONFIG_IPV6_SUBTREES
1303 	ipv6_addr_prefix(&rt->rt6i_src.addr, &cfg->fc_src, cfg->fc_src_len);
1304 	rt->rt6i_src.plen = cfg->fc_src_len;
1305 #endif
1306 
1307 	rt->rt6i_metric = cfg->fc_metric;
1308 
1309 	/* We cannot add true routes via loopback here,
1310 	   they would result in kernel looping; promote them to reject routes
1311 	 */
1312 	if ((cfg->fc_flags & RTF_REJECT) ||
1313 	    (dev && (dev->flags & IFF_LOOPBACK) &&
1314 	     !(addr_type & IPV6_ADDR_LOOPBACK) &&
1315 	     !(cfg->fc_flags & RTF_LOCAL))) {
1316 		/* hold loopback dev/idev if we haven't done so. */
1317 		if (dev != net->loopback_dev) {
1318 			if (dev) {
1319 				dev_put(dev);
1320 				in6_dev_put(idev);
1321 			}
1322 			dev = net->loopback_dev;
1323 			dev_hold(dev);
1324 			idev = in6_dev_get(dev);
1325 			if (!idev) {
1326 				err = -ENODEV;
1327 				goto out;
1328 			}
1329 		}
1330 		rt->dst.output = ip6_pkt_discard_out;
1331 		rt->dst.input = ip6_pkt_discard;
1332 		rt->dst.error = -ENETUNREACH;
1333 		rt->rt6i_flags = RTF_REJECT|RTF_NONEXTHOP;
1334 		goto install_route;
1335 	}
1336 
1337 	if (cfg->fc_flags & RTF_GATEWAY) {
1338 		const struct in6_addr *gw_addr;
1339 		int gwa_type;
1340 
1341 		gw_addr = &cfg->fc_gateway;
1342 		rt->rt6i_gateway = *gw_addr;
1343 		gwa_type = ipv6_addr_type(gw_addr);
1344 
1345 		if (gwa_type != (IPV6_ADDR_LINKLOCAL|IPV6_ADDR_UNICAST)) {
1346 			struct rt6_info *grt;
1347 
1348 			/* IPv6 strictly inhibits using not link-local
1349 			   addresses as nexthop address.
1350 			   Otherwise, router will not able to send redirects.
1351 			   It is very good, but in some (rare!) circumstances
1352 			   (SIT, PtP, NBMA NOARP links) it is handy to allow
1353 			   some exceptions. --ANK
1354 			 */
1355 			err = -EINVAL;
1356 			if (!(gwa_type & IPV6_ADDR_UNICAST))
1357 				goto out;
1358 
1359 			grt = rt6_lookup(net, gw_addr, NULL, cfg->fc_ifindex, 1);
1360 
1361 			err = -EHOSTUNREACH;
1362 			if (!grt)
1363 				goto out;
1364 			if (dev) {
1365 				if (dev != grt->rt6i_dev) {
1366 					dst_release(&grt->dst);
1367 					goto out;
1368 				}
1369 			} else {
1370 				dev = grt->rt6i_dev;
1371 				idev = grt->rt6i_idev;
1372 				dev_hold(dev);
1373 				in6_dev_hold(grt->rt6i_idev);
1374 			}
1375 			if (!(grt->rt6i_flags & RTF_GATEWAY))
1376 				err = 0;
1377 			dst_release(&grt->dst);
1378 
1379 			if (err)
1380 				goto out;
1381 		}
1382 		err = -EINVAL;
1383 		if (!dev || (dev->flags & IFF_LOOPBACK))
1384 			goto out;
1385 	}
1386 
1387 	err = -ENODEV;
1388 	if (!dev)
1389 		goto out;
1390 
1391 	if (!ipv6_addr_any(&cfg->fc_prefsrc)) {
1392 		if (!ipv6_chk_addr(net, &cfg->fc_prefsrc, dev, 0)) {
1393 			err = -EINVAL;
1394 			goto out;
1395 		}
1396 		rt->rt6i_prefsrc.addr = cfg->fc_prefsrc;
1397 		rt->rt6i_prefsrc.plen = 128;
1398 	} else
1399 		rt->rt6i_prefsrc.plen = 0;
1400 
1401 	if (cfg->fc_flags & (RTF_GATEWAY | RTF_NONEXTHOP)) {
1402 		struct neighbour *n = __neigh_lookup_errno(&nd_tbl, &rt->rt6i_gateway, dev);
1403 		if (IS_ERR(n)) {
1404 			err = PTR_ERR(n);
1405 			goto out;
1406 		}
1407 		dst_set_neighbour(&rt->dst, n);
1408 	}
1409 
1410 	rt->rt6i_flags = cfg->fc_flags;
1411 
1412 install_route:
1413 	if (cfg->fc_mx) {
1414 		struct nlattr *nla;
1415 		int remaining;
1416 
1417 		nla_for_each_attr(nla, cfg->fc_mx, cfg->fc_mx_len, remaining) {
1418 			int type = nla_type(nla);
1419 
1420 			if (type) {
1421 				if (type > RTAX_MAX) {
1422 					err = -EINVAL;
1423 					goto out;
1424 				}
1425 
1426 				dst_metric_set(&rt->dst, type, nla_get_u32(nla));
1427 			}
1428 		}
1429 	}
1430 
1431 	rt->dst.dev = dev;
1432 	rt->rt6i_idev = idev;
1433 	rt->rt6i_table = table;
1434 
1435 	cfg->fc_nlinfo.nl_net = dev_net(dev);
1436 
1437 	return __ip6_ins_rt(rt, &cfg->fc_nlinfo);
1438 
1439 out:
1440 	if (dev)
1441 		dev_put(dev);
1442 	if (idev)
1443 		in6_dev_put(idev);
1444 	if (rt)
1445 		dst_free(&rt->dst);
1446 	return err;
1447 }
1448 
1449 static int __ip6_del_rt(struct rt6_info *rt, struct nl_info *info)
1450 {
1451 	int err;
1452 	struct fib6_table *table;
1453 	struct net *net = dev_net(rt->rt6i_dev);
1454 
1455 	if (rt == net->ipv6.ip6_null_entry)
1456 		return -ENOENT;
1457 
1458 	table = rt->rt6i_table;
1459 	write_lock_bh(&table->tb6_lock);
1460 
1461 	err = fib6_del(rt, info);
1462 	dst_release(&rt->dst);
1463 
1464 	write_unlock_bh(&table->tb6_lock);
1465 
1466 	return err;
1467 }
1468 
1469 int ip6_del_rt(struct rt6_info *rt)
1470 {
1471 	struct nl_info info = {
1472 		.nl_net = dev_net(rt->rt6i_dev),
1473 	};
1474 	return __ip6_del_rt(rt, &info);
1475 }
1476 
1477 static int ip6_route_del(struct fib6_config *cfg)
1478 {
1479 	struct fib6_table *table;
1480 	struct fib6_node *fn;
1481 	struct rt6_info *rt;
1482 	int err = -ESRCH;
1483 
1484 	table = fib6_get_table(cfg->fc_nlinfo.nl_net, cfg->fc_table);
1485 	if (!table)
1486 		return err;
1487 
1488 	read_lock_bh(&table->tb6_lock);
1489 
1490 	fn = fib6_locate(&table->tb6_root,
1491 			 &cfg->fc_dst, cfg->fc_dst_len,
1492 			 &cfg->fc_src, cfg->fc_src_len);
1493 
1494 	if (fn) {
1495 		for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
1496 			if (cfg->fc_ifindex &&
1497 			    (!rt->rt6i_dev ||
1498 			     rt->rt6i_dev->ifindex != cfg->fc_ifindex))
1499 				continue;
1500 			if (cfg->fc_flags & RTF_GATEWAY &&
1501 			    !ipv6_addr_equal(&cfg->fc_gateway, &rt->rt6i_gateway))
1502 				continue;
1503 			if (cfg->fc_metric && cfg->fc_metric != rt->rt6i_metric)
1504 				continue;
1505 			dst_hold(&rt->dst);
1506 			read_unlock_bh(&table->tb6_lock);
1507 
1508 			return __ip6_del_rt(rt, &cfg->fc_nlinfo);
1509 		}
1510 	}
1511 	read_unlock_bh(&table->tb6_lock);
1512 
1513 	return err;
1514 }
1515 
1516 /*
1517  *	Handle redirects
1518  */
1519 struct ip6rd_flowi {
1520 	struct flowi6 fl6;
1521 	struct in6_addr gateway;
1522 };
1523 
1524 static struct rt6_info *__ip6_route_redirect(struct net *net,
1525 					     struct fib6_table *table,
1526 					     struct flowi6 *fl6,
1527 					     int flags)
1528 {
1529 	struct ip6rd_flowi *rdfl = (struct ip6rd_flowi *)fl6;
1530 	struct rt6_info *rt;
1531 	struct fib6_node *fn;
1532 
1533 	/*
1534 	 * Get the "current" route for this destination and
1535 	 * check if the redirect has come from approriate router.
1536 	 *
1537 	 * RFC 2461 specifies that redirects should only be
1538 	 * accepted if they come from the nexthop to the target.
1539 	 * Due to the way the routes are chosen, this notion
1540 	 * is a bit fuzzy and one might need to check all possible
1541 	 * routes.
1542 	 */
1543 
1544 	read_lock_bh(&table->tb6_lock);
1545 	fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
1546 restart:
1547 	for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
1548 		/*
1549 		 * Current route is on-link; redirect is always invalid.
1550 		 *
1551 		 * Seems, previous statement is not true. It could
1552 		 * be node, which looks for us as on-link (f.e. proxy ndisc)
1553 		 * But then router serving it might decide, that we should
1554 		 * know truth 8)8) --ANK (980726).
1555 		 */
1556 		if (rt6_check_expired(rt))
1557 			continue;
1558 		if (!(rt->rt6i_flags & RTF_GATEWAY))
1559 			continue;
1560 		if (fl6->flowi6_oif != rt->rt6i_dev->ifindex)
1561 			continue;
1562 		if (!ipv6_addr_equal(&rdfl->gateway, &rt->rt6i_gateway))
1563 			continue;
1564 		break;
1565 	}
1566 
1567 	if (!rt)
1568 		rt = net->ipv6.ip6_null_entry;
1569 	BACKTRACK(net, &fl6->saddr);
1570 out:
1571 	dst_hold(&rt->dst);
1572 
1573 	read_unlock_bh(&table->tb6_lock);
1574 
1575 	return rt;
1576 };
1577 
1578 static struct rt6_info *ip6_route_redirect(const struct in6_addr *dest,
1579 					   const struct in6_addr *src,
1580 					   const struct in6_addr *gateway,
1581 					   struct net_device *dev)
1582 {
1583 	int flags = RT6_LOOKUP_F_HAS_SADDR;
1584 	struct net *net = dev_net(dev);
1585 	struct ip6rd_flowi rdfl = {
1586 		.fl6 = {
1587 			.flowi6_oif = dev->ifindex,
1588 			.daddr = *dest,
1589 			.saddr = *src,
1590 		},
1591 	};
1592 
1593 	rdfl.gateway = *gateway;
1594 
1595 	if (rt6_need_strict(dest))
1596 		flags |= RT6_LOOKUP_F_IFACE;
1597 
1598 	return (struct rt6_info *)fib6_rule_lookup(net, &rdfl.fl6,
1599 						   flags, __ip6_route_redirect);
1600 }
1601 
1602 void rt6_redirect(const struct in6_addr *dest, const struct in6_addr *src,
1603 		  const struct in6_addr *saddr,
1604 		  struct neighbour *neigh, u8 *lladdr, int on_link)
1605 {
1606 	struct rt6_info *rt, *nrt = NULL;
1607 	struct netevent_redirect netevent;
1608 	struct net *net = dev_net(neigh->dev);
1609 
1610 	rt = ip6_route_redirect(dest, src, saddr, neigh->dev);
1611 
1612 	if (rt == net->ipv6.ip6_null_entry) {
1613 		if (net_ratelimit())
1614 			printk(KERN_DEBUG "rt6_redirect: source isn't a valid nexthop "
1615 			       "for redirect target\n");
1616 		goto out;
1617 	}
1618 
1619 	/*
1620 	 *	We have finally decided to accept it.
1621 	 */
1622 
1623 	neigh_update(neigh, lladdr, NUD_STALE,
1624 		     NEIGH_UPDATE_F_WEAK_OVERRIDE|
1625 		     NEIGH_UPDATE_F_OVERRIDE|
1626 		     (on_link ? 0 : (NEIGH_UPDATE_F_OVERRIDE_ISROUTER|
1627 				     NEIGH_UPDATE_F_ISROUTER))
1628 		     );
1629 
1630 	/*
1631 	 * Redirect received -> path was valid.
1632 	 * Look, redirects are sent only in response to data packets,
1633 	 * so that this nexthop apparently is reachable. --ANK
1634 	 */
1635 	dst_confirm(&rt->dst);
1636 
1637 	/* Duplicate redirect: silently ignore. */
1638 	if (neigh == dst_get_neighbour_noref_raw(&rt->dst))
1639 		goto out;
1640 
1641 	nrt = ip6_rt_copy(rt, dest);
1642 	if (!nrt)
1643 		goto out;
1644 
1645 	nrt->rt6i_flags = RTF_GATEWAY|RTF_UP|RTF_DYNAMIC|RTF_CACHE;
1646 	if (on_link)
1647 		nrt->rt6i_flags &= ~RTF_GATEWAY;
1648 
1649 	nrt->rt6i_gateway = *(struct in6_addr *)neigh->primary_key;
1650 	dst_set_neighbour(&nrt->dst, neigh_clone(neigh));
1651 
1652 	if (ip6_ins_rt(nrt))
1653 		goto out;
1654 
1655 	netevent.old = &rt->dst;
1656 	netevent.new = &nrt->dst;
1657 	call_netevent_notifiers(NETEVENT_REDIRECT, &netevent);
1658 
1659 	if (rt->rt6i_flags & RTF_CACHE) {
1660 		ip6_del_rt(rt);
1661 		return;
1662 	}
1663 
1664 out:
1665 	dst_release(&rt->dst);
1666 }
1667 
1668 /*
1669  *	Handle ICMP "packet too big" messages
1670  *	i.e. Path MTU discovery
1671  */
1672 
1673 static void rt6_do_pmtu_disc(const struct in6_addr *daddr, const struct in6_addr *saddr,
1674 			     struct net *net, u32 pmtu, int ifindex)
1675 {
1676 	struct rt6_info *rt, *nrt;
1677 	int allfrag = 0;
1678 again:
1679 	rt = rt6_lookup(net, daddr, saddr, ifindex, 0);
1680 	if (!rt)
1681 		return;
1682 
1683 	if (rt6_check_expired(rt)) {
1684 		ip6_del_rt(rt);
1685 		goto again;
1686 	}
1687 
1688 	if (pmtu >= dst_mtu(&rt->dst))
1689 		goto out;
1690 
1691 	if (pmtu < IPV6_MIN_MTU) {
1692 		/*
1693 		 * According to RFC2460, PMTU is set to the IPv6 Minimum Link
1694 		 * MTU (1280) and a fragment header should always be included
1695 		 * after a node receiving Too Big message reporting PMTU is
1696 		 * less than the IPv6 Minimum Link MTU.
1697 		 */
1698 		pmtu = IPV6_MIN_MTU;
1699 		allfrag = 1;
1700 	}
1701 
1702 	/* New mtu received -> path was valid.
1703 	   They are sent only in response to data packets,
1704 	   so that this nexthop apparently is reachable. --ANK
1705 	 */
1706 	dst_confirm(&rt->dst);
1707 
1708 	/* Host route. If it is static, it would be better
1709 	   not to override it, but add new one, so that
1710 	   when cache entry will expire old pmtu
1711 	   would return automatically.
1712 	 */
1713 	if (rt->rt6i_flags & RTF_CACHE) {
1714 		dst_metric_set(&rt->dst, RTAX_MTU, pmtu);
1715 		if (allfrag) {
1716 			u32 features = dst_metric(&rt->dst, RTAX_FEATURES);
1717 			features |= RTAX_FEATURE_ALLFRAG;
1718 			dst_metric_set(&rt->dst, RTAX_FEATURES, features);
1719 		}
1720 		dst_set_expires(&rt->dst, net->ipv6.sysctl.ip6_rt_mtu_expires);
1721 		rt->rt6i_flags |= RTF_MODIFIED|RTF_EXPIRES;
1722 		goto out;
1723 	}
1724 
1725 	/* Network route.
1726 	   Two cases are possible:
1727 	   1. It is connected route. Action: COW
1728 	   2. It is gatewayed route or NONEXTHOP route. Action: clone it.
1729 	 */
1730 	if (!dst_get_neighbour_noref_raw(&rt->dst) && !(rt->rt6i_flags & RTF_NONEXTHOP))
1731 		nrt = rt6_alloc_cow(rt, daddr, saddr);
1732 	else
1733 		nrt = rt6_alloc_clone(rt, daddr);
1734 
1735 	if (nrt) {
1736 		dst_metric_set(&nrt->dst, RTAX_MTU, pmtu);
1737 		if (allfrag) {
1738 			u32 features = dst_metric(&nrt->dst, RTAX_FEATURES);
1739 			features |= RTAX_FEATURE_ALLFRAG;
1740 			dst_metric_set(&nrt->dst, RTAX_FEATURES, features);
1741 		}
1742 
1743 		/* According to RFC 1981, detecting PMTU increase shouldn't be
1744 		 * happened within 5 mins, the recommended timer is 10 mins.
1745 		 * Here this route expiration time is set to ip6_rt_mtu_expires
1746 		 * which is 10 mins. After 10 mins the decreased pmtu is expired
1747 		 * and detecting PMTU increase will be automatically happened.
1748 		 */
1749 		dst_set_expires(&nrt->dst, net->ipv6.sysctl.ip6_rt_mtu_expires);
1750 		nrt->rt6i_flags |= RTF_DYNAMIC|RTF_EXPIRES;
1751 
1752 		ip6_ins_rt(nrt);
1753 	}
1754 out:
1755 	dst_release(&rt->dst);
1756 }
1757 
1758 void rt6_pmtu_discovery(const struct in6_addr *daddr, const struct in6_addr *saddr,
1759 			struct net_device *dev, u32 pmtu)
1760 {
1761 	struct net *net = dev_net(dev);
1762 
1763 	/*
1764 	 * RFC 1981 states that a node "MUST reduce the size of the packets it
1765 	 * is sending along the path" that caused the Packet Too Big message.
1766 	 * Since it's not possible in the general case to determine which
1767 	 * interface was used to send the original packet, we update the MTU
1768 	 * on the interface that will be used to send future packets. We also
1769 	 * update the MTU on the interface that received the Packet Too Big in
1770 	 * case the original packet was forced out that interface with
1771 	 * SO_BINDTODEVICE or similar. This is the next best thing to the
1772 	 * correct behaviour, which would be to update the MTU on all
1773 	 * interfaces.
1774 	 */
1775 	rt6_do_pmtu_disc(daddr, saddr, net, pmtu, 0);
1776 	rt6_do_pmtu_disc(daddr, saddr, net, pmtu, dev->ifindex);
1777 }
1778 
1779 /*
1780  *	Misc support functions
1781  */
1782 
1783 static struct rt6_info *ip6_rt_copy(const struct rt6_info *ort,
1784 				    const struct in6_addr *dest)
1785 {
1786 	struct net *net = dev_net(ort->rt6i_dev);
1787 	struct rt6_info *rt = ip6_dst_alloc(&net->ipv6.ip6_dst_ops,
1788 					    ort->dst.dev, 0);
1789 
1790 	if (rt) {
1791 		rt->dst.input = ort->dst.input;
1792 		rt->dst.output = ort->dst.output;
1793 		rt->dst.flags |= DST_HOST;
1794 
1795 		rt->rt6i_dst.addr = *dest;
1796 		rt->rt6i_dst.plen = 128;
1797 		dst_copy_metrics(&rt->dst, &ort->dst);
1798 		rt->dst.error = ort->dst.error;
1799 		rt->rt6i_idev = ort->rt6i_idev;
1800 		if (rt->rt6i_idev)
1801 			in6_dev_hold(rt->rt6i_idev);
1802 		rt->dst.lastuse = jiffies;
1803 		rt->rt6i_expires = 0;
1804 
1805 		rt->rt6i_gateway = ort->rt6i_gateway;
1806 		rt->rt6i_flags = ort->rt6i_flags & ~RTF_EXPIRES;
1807 		rt->rt6i_metric = 0;
1808 
1809 #ifdef CONFIG_IPV6_SUBTREES
1810 		memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
1811 #endif
1812 		memcpy(&rt->rt6i_prefsrc, &ort->rt6i_prefsrc, sizeof(struct rt6key));
1813 		rt->rt6i_table = ort->rt6i_table;
1814 	}
1815 	return rt;
1816 }
1817 
1818 #ifdef CONFIG_IPV6_ROUTE_INFO
1819 static struct rt6_info *rt6_get_route_info(struct net *net,
1820 					   const struct in6_addr *prefix, int prefixlen,
1821 					   const struct in6_addr *gwaddr, int ifindex)
1822 {
1823 	struct fib6_node *fn;
1824 	struct rt6_info *rt = NULL;
1825 	struct fib6_table *table;
1826 
1827 	table = fib6_get_table(net, RT6_TABLE_INFO);
1828 	if (!table)
1829 		return NULL;
1830 
1831 	write_lock_bh(&table->tb6_lock);
1832 	fn = fib6_locate(&table->tb6_root, prefix ,prefixlen, NULL, 0);
1833 	if (!fn)
1834 		goto out;
1835 
1836 	for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
1837 		if (rt->rt6i_dev->ifindex != ifindex)
1838 			continue;
1839 		if ((rt->rt6i_flags & (RTF_ROUTEINFO|RTF_GATEWAY)) != (RTF_ROUTEINFO|RTF_GATEWAY))
1840 			continue;
1841 		if (!ipv6_addr_equal(&rt->rt6i_gateway, gwaddr))
1842 			continue;
1843 		dst_hold(&rt->dst);
1844 		break;
1845 	}
1846 out:
1847 	write_unlock_bh(&table->tb6_lock);
1848 	return rt;
1849 }
1850 
1851 static struct rt6_info *rt6_add_route_info(struct net *net,
1852 					   const struct in6_addr *prefix, int prefixlen,
1853 					   const struct in6_addr *gwaddr, int ifindex,
1854 					   unsigned pref)
1855 {
1856 	struct fib6_config cfg = {
1857 		.fc_table	= RT6_TABLE_INFO,
1858 		.fc_metric	= IP6_RT_PRIO_USER,
1859 		.fc_ifindex	= ifindex,
1860 		.fc_dst_len	= prefixlen,
1861 		.fc_flags	= RTF_GATEWAY | RTF_ADDRCONF | RTF_ROUTEINFO |
1862 				  RTF_UP | RTF_PREF(pref),
1863 		.fc_nlinfo.pid = 0,
1864 		.fc_nlinfo.nlh = NULL,
1865 		.fc_nlinfo.nl_net = net,
1866 	};
1867 
1868 	cfg.fc_dst = *prefix;
1869 	cfg.fc_gateway = *gwaddr;
1870 
1871 	/* We should treat it as a default route if prefix length is 0. */
1872 	if (!prefixlen)
1873 		cfg.fc_flags |= RTF_DEFAULT;
1874 
1875 	ip6_route_add(&cfg);
1876 
1877 	return rt6_get_route_info(net, prefix, prefixlen, gwaddr, ifindex);
1878 }
1879 #endif
1880 
1881 struct rt6_info *rt6_get_dflt_router(const struct in6_addr *addr, struct net_device *dev)
1882 {
1883 	struct rt6_info *rt;
1884 	struct fib6_table *table;
1885 
1886 	table = fib6_get_table(dev_net(dev), RT6_TABLE_DFLT);
1887 	if (!table)
1888 		return NULL;
1889 
1890 	write_lock_bh(&table->tb6_lock);
1891 	for (rt = table->tb6_root.leaf; rt; rt=rt->dst.rt6_next) {
1892 		if (dev == rt->rt6i_dev &&
1893 		    ((rt->rt6i_flags & (RTF_ADDRCONF | RTF_DEFAULT)) == (RTF_ADDRCONF | RTF_DEFAULT)) &&
1894 		    ipv6_addr_equal(&rt->rt6i_gateway, addr))
1895 			break;
1896 	}
1897 	if (rt)
1898 		dst_hold(&rt->dst);
1899 	write_unlock_bh(&table->tb6_lock);
1900 	return rt;
1901 }
1902 
1903 struct rt6_info *rt6_add_dflt_router(const struct in6_addr *gwaddr,
1904 				     struct net_device *dev,
1905 				     unsigned int pref)
1906 {
1907 	struct fib6_config cfg = {
1908 		.fc_table	= RT6_TABLE_DFLT,
1909 		.fc_metric	= IP6_RT_PRIO_USER,
1910 		.fc_ifindex	= dev->ifindex,
1911 		.fc_flags	= RTF_GATEWAY | RTF_ADDRCONF | RTF_DEFAULT |
1912 				  RTF_UP | RTF_EXPIRES | RTF_PREF(pref),
1913 		.fc_nlinfo.pid = 0,
1914 		.fc_nlinfo.nlh = NULL,
1915 		.fc_nlinfo.nl_net = dev_net(dev),
1916 	};
1917 
1918 	cfg.fc_gateway = *gwaddr;
1919 
1920 	ip6_route_add(&cfg);
1921 
1922 	return rt6_get_dflt_router(gwaddr, dev);
1923 }
1924 
1925 void rt6_purge_dflt_routers(struct net *net)
1926 {
1927 	struct rt6_info *rt;
1928 	struct fib6_table *table;
1929 
1930 	/* NOTE: Keep consistent with rt6_get_dflt_router */
1931 	table = fib6_get_table(net, RT6_TABLE_DFLT);
1932 	if (!table)
1933 		return;
1934 
1935 restart:
1936 	read_lock_bh(&table->tb6_lock);
1937 	for (rt = table->tb6_root.leaf; rt; rt = rt->dst.rt6_next) {
1938 		if (rt->rt6i_flags & (RTF_DEFAULT | RTF_ADDRCONF)) {
1939 			dst_hold(&rt->dst);
1940 			read_unlock_bh(&table->tb6_lock);
1941 			ip6_del_rt(rt);
1942 			goto restart;
1943 		}
1944 	}
1945 	read_unlock_bh(&table->tb6_lock);
1946 }
1947 
1948 static void rtmsg_to_fib6_config(struct net *net,
1949 				 struct in6_rtmsg *rtmsg,
1950 				 struct fib6_config *cfg)
1951 {
1952 	memset(cfg, 0, sizeof(*cfg));
1953 
1954 	cfg->fc_table = RT6_TABLE_MAIN;
1955 	cfg->fc_ifindex = rtmsg->rtmsg_ifindex;
1956 	cfg->fc_metric = rtmsg->rtmsg_metric;
1957 	cfg->fc_expires = rtmsg->rtmsg_info;
1958 	cfg->fc_dst_len = rtmsg->rtmsg_dst_len;
1959 	cfg->fc_src_len = rtmsg->rtmsg_src_len;
1960 	cfg->fc_flags = rtmsg->rtmsg_flags;
1961 
1962 	cfg->fc_nlinfo.nl_net = net;
1963 
1964 	cfg->fc_dst = rtmsg->rtmsg_dst;
1965 	cfg->fc_src = rtmsg->rtmsg_src;
1966 	cfg->fc_gateway = rtmsg->rtmsg_gateway;
1967 }
1968 
1969 int ipv6_route_ioctl(struct net *net, unsigned int cmd, void __user *arg)
1970 {
1971 	struct fib6_config cfg;
1972 	struct in6_rtmsg rtmsg;
1973 	int err;
1974 
1975 	switch(cmd) {
1976 	case SIOCADDRT:		/* Add a route */
1977 	case SIOCDELRT:		/* Delete a route */
1978 		if (!capable(CAP_NET_ADMIN))
1979 			return -EPERM;
1980 		err = copy_from_user(&rtmsg, arg,
1981 				     sizeof(struct in6_rtmsg));
1982 		if (err)
1983 			return -EFAULT;
1984 
1985 		rtmsg_to_fib6_config(net, &rtmsg, &cfg);
1986 
1987 		rtnl_lock();
1988 		switch (cmd) {
1989 		case SIOCADDRT:
1990 			err = ip6_route_add(&cfg);
1991 			break;
1992 		case SIOCDELRT:
1993 			err = ip6_route_del(&cfg);
1994 			break;
1995 		default:
1996 			err = -EINVAL;
1997 		}
1998 		rtnl_unlock();
1999 
2000 		return err;
2001 	}
2002 
2003 	return -EINVAL;
2004 }
2005 
2006 /*
2007  *	Drop the packet on the floor
2008  */
2009 
2010 static int ip6_pkt_drop(struct sk_buff *skb, u8 code, int ipstats_mib_noroutes)
2011 {
2012 	int type;
2013 	struct dst_entry *dst = skb_dst(skb);
2014 	switch (ipstats_mib_noroutes) {
2015 	case IPSTATS_MIB_INNOROUTES:
2016 		type = ipv6_addr_type(&ipv6_hdr(skb)->daddr);
2017 		if (type == IPV6_ADDR_ANY) {
2018 			IP6_INC_STATS(dev_net(dst->dev), ip6_dst_idev(dst),
2019 				      IPSTATS_MIB_INADDRERRORS);
2020 			break;
2021 		}
2022 		/* FALLTHROUGH */
2023 	case IPSTATS_MIB_OUTNOROUTES:
2024 		IP6_INC_STATS(dev_net(dst->dev), ip6_dst_idev(dst),
2025 			      ipstats_mib_noroutes);
2026 		break;
2027 	}
2028 	icmpv6_send(skb, ICMPV6_DEST_UNREACH, code, 0);
2029 	kfree_skb(skb);
2030 	return 0;
2031 }
2032 
2033 static int ip6_pkt_discard(struct sk_buff *skb)
2034 {
2035 	return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_INNOROUTES);
2036 }
2037 
2038 static int ip6_pkt_discard_out(struct sk_buff *skb)
2039 {
2040 	skb->dev = skb_dst(skb)->dev;
2041 	return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_OUTNOROUTES);
2042 }
2043 
2044 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
2045 
2046 static int ip6_pkt_prohibit(struct sk_buff *skb)
2047 {
2048 	return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_INNOROUTES);
2049 }
2050 
2051 static int ip6_pkt_prohibit_out(struct sk_buff *skb)
2052 {
2053 	skb->dev = skb_dst(skb)->dev;
2054 	return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_OUTNOROUTES);
2055 }
2056 
2057 #endif
2058 
2059 /*
2060  *	Allocate a dst for local (unicast / anycast) address.
2061  */
2062 
2063 struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev,
2064 				    const struct in6_addr *addr,
2065 				    bool anycast)
2066 {
2067 	struct net *net = dev_net(idev->dev);
2068 	struct rt6_info *rt = ip6_dst_alloc(&net->ipv6.ip6_dst_ops,
2069 					    net->loopback_dev, 0);
2070 	struct neighbour *neigh;
2071 
2072 	if (!rt) {
2073 		if (net_ratelimit())
2074 			pr_warning("IPv6:  Maximum number of routes reached,"
2075 				   " consider increasing route/max_size.\n");
2076 		return ERR_PTR(-ENOMEM);
2077 	}
2078 
2079 	in6_dev_hold(idev);
2080 
2081 	rt->dst.flags |= DST_HOST;
2082 	rt->dst.input = ip6_input;
2083 	rt->dst.output = ip6_output;
2084 	rt->rt6i_idev = idev;
2085 	rt->dst.obsolete = -1;
2086 
2087 	rt->rt6i_flags = RTF_UP | RTF_NONEXTHOP;
2088 	if (anycast)
2089 		rt->rt6i_flags |= RTF_ANYCAST;
2090 	else
2091 		rt->rt6i_flags |= RTF_LOCAL;
2092 	neigh = __neigh_lookup_errno(&nd_tbl, &rt->rt6i_gateway, rt->rt6i_dev);
2093 	if (IS_ERR(neigh)) {
2094 		dst_free(&rt->dst);
2095 
2096 		return ERR_CAST(neigh);
2097 	}
2098 	dst_set_neighbour(&rt->dst, neigh);
2099 
2100 	rt->rt6i_dst.addr = *addr;
2101 	rt->rt6i_dst.plen = 128;
2102 	rt->rt6i_table = fib6_get_table(net, RT6_TABLE_LOCAL);
2103 
2104 	atomic_set(&rt->dst.__refcnt, 1);
2105 
2106 	return rt;
2107 }
2108 
2109 int ip6_route_get_saddr(struct net *net,
2110 			struct rt6_info *rt,
2111 			const struct in6_addr *daddr,
2112 			unsigned int prefs,
2113 			struct in6_addr *saddr)
2114 {
2115 	struct inet6_dev *idev = ip6_dst_idev((struct dst_entry*)rt);
2116 	int err = 0;
2117 	if (rt->rt6i_prefsrc.plen)
2118 		*saddr = rt->rt6i_prefsrc.addr;
2119 	else
2120 		err = ipv6_dev_get_saddr(net, idev ? idev->dev : NULL,
2121 					 daddr, prefs, saddr);
2122 	return err;
2123 }
2124 
2125 /* remove deleted ip from prefsrc entries */
2126 struct arg_dev_net_ip {
2127 	struct net_device *dev;
2128 	struct net *net;
2129 	struct in6_addr *addr;
2130 };
2131 
2132 static int fib6_remove_prefsrc(struct rt6_info *rt, void *arg)
2133 {
2134 	struct net_device *dev = ((struct arg_dev_net_ip *)arg)->dev;
2135 	struct net *net = ((struct arg_dev_net_ip *)arg)->net;
2136 	struct in6_addr *addr = ((struct arg_dev_net_ip *)arg)->addr;
2137 
2138 	if (((void *)rt->rt6i_dev == dev || !dev) &&
2139 	    rt != net->ipv6.ip6_null_entry &&
2140 	    ipv6_addr_equal(addr, &rt->rt6i_prefsrc.addr)) {
2141 		/* remove prefsrc entry */
2142 		rt->rt6i_prefsrc.plen = 0;
2143 	}
2144 	return 0;
2145 }
2146 
2147 void rt6_remove_prefsrc(struct inet6_ifaddr *ifp)
2148 {
2149 	struct net *net = dev_net(ifp->idev->dev);
2150 	struct arg_dev_net_ip adni = {
2151 		.dev = ifp->idev->dev,
2152 		.net = net,
2153 		.addr = &ifp->addr,
2154 	};
2155 	fib6_clean_all(net, fib6_remove_prefsrc, 0, &adni);
2156 }
2157 
2158 struct arg_dev_net {
2159 	struct net_device *dev;
2160 	struct net *net;
2161 };
2162 
2163 static int fib6_ifdown(struct rt6_info *rt, void *arg)
2164 {
2165 	const struct arg_dev_net *adn = arg;
2166 	const struct net_device *dev = adn->dev;
2167 
2168 	if ((rt->rt6i_dev == dev || !dev) &&
2169 	    rt != adn->net->ipv6.ip6_null_entry) {
2170 		RT6_TRACE("deleted by ifdown %p\n", rt);
2171 		return -1;
2172 	}
2173 	return 0;
2174 }
2175 
2176 void rt6_ifdown(struct net *net, struct net_device *dev)
2177 {
2178 	struct arg_dev_net adn = {
2179 		.dev = dev,
2180 		.net = net,
2181 	};
2182 
2183 	fib6_clean_all(net, fib6_ifdown, 0, &adn);
2184 	icmp6_clean_all(fib6_ifdown, &adn);
2185 }
2186 
2187 struct rt6_mtu_change_arg
2188 {
2189 	struct net_device *dev;
2190 	unsigned mtu;
2191 };
2192 
2193 static int rt6_mtu_change_route(struct rt6_info *rt, void *p_arg)
2194 {
2195 	struct rt6_mtu_change_arg *arg = (struct rt6_mtu_change_arg *) p_arg;
2196 	struct inet6_dev *idev;
2197 
2198 	/* In IPv6 pmtu discovery is not optional,
2199 	   so that RTAX_MTU lock cannot disable it.
2200 	   We still use this lock to block changes
2201 	   caused by addrconf/ndisc.
2202 	*/
2203 
2204 	idev = __in6_dev_get(arg->dev);
2205 	if (!idev)
2206 		return 0;
2207 
2208 	/* For administrative MTU increase, there is no way to discover
2209 	   IPv6 PMTU increase, so PMTU increase should be updated here.
2210 	   Since RFC 1981 doesn't include administrative MTU increase
2211 	   update PMTU increase is a MUST. (i.e. jumbo frame)
2212 	 */
2213 	/*
2214 	   If new MTU is less than route PMTU, this new MTU will be the
2215 	   lowest MTU in the path, update the route PMTU to reflect PMTU
2216 	   decreases; if new MTU is greater than route PMTU, and the
2217 	   old MTU is the lowest MTU in the path, update the route PMTU
2218 	   to reflect the increase. In this case if the other nodes' MTU
2219 	   also have the lowest MTU, TOO BIG MESSAGE will be lead to
2220 	   PMTU discouvery.
2221 	 */
2222 	if (rt->rt6i_dev == arg->dev &&
2223 	    !dst_metric_locked(&rt->dst, RTAX_MTU) &&
2224 	    (dst_mtu(&rt->dst) >= arg->mtu ||
2225 	     (dst_mtu(&rt->dst) < arg->mtu &&
2226 	      dst_mtu(&rt->dst) == idev->cnf.mtu6))) {
2227 		dst_metric_set(&rt->dst, RTAX_MTU, arg->mtu);
2228 	}
2229 	return 0;
2230 }
2231 
2232 void rt6_mtu_change(struct net_device *dev, unsigned mtu)
2233 {
2234 	struct rt6_mtu_change_arg arg = {
2235 		.dev = dev,
2236 		.mtu = mtu,
2237 	};
2238 
2239 	fib6_clean_all(dev_net(dev), rt6_mtu_change_route, 0, &arg);
2240 }
2241 
2242 static const struct nla_policy rtm_ipv6_policy[RTA_MAX+1] = {
2243 	[RTA_GATEWAY]           = { .len = sizeof(struct in6_addr) },
2244 	[RTA_OIF]               = { .type = NLA_U32 },
2245 	[RTA_IIF]		= { .type = NLA_U32 },
2246 	[RTA_PRIORITY]          = { .type = NLA_U32 },
2247 	[RTA_METRICS]           = { .type = NLA_NESTED },
2248 };
2249 
2250 static int rtm_to_fib6_config(struct sk_buff *skb, struct nlmsghdr *nlh,
2251 			      struct fib6_config *cfg)
2252 {
2253 	struct rtmsg *rtm;
2254 	struct nlattr *tb[RTA_MAX+1];
2255 	int err;
2256 
2257 	err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy);
2258 	if (err < 0)
2259 		goto errout;
2260 
2261 	err = -EINVAL;
2262 	rtm = nlmsg_data(nlh);
2263 	memset(cfg, 0, sizeof(*cfg));
2264 
2265 	cfg->fc_table = rtm->rtm_table;
2266 	cfg->fc_dst_len = rtm->rtm_dst_len;
2267 	cfg->fc_src_len = rtm->rtm_src_len;
2268 	cfg->fc_flags = RTF_UP;
2269 	cfg->fc_protocol = rtm->rtm_protocol;
2270 
2271 	if (rtm->rtm_type == RTN_UNREACHABLE)
2272 		cfg->fc_flags |= RTF_REJECT;
2273 
2274 	if (rtm->rtm_type == RTN_LOCAL)
2275 		cfg->fc_flags |= RTF_LOCAL;
2276 
2277 	cfg->fc_nlinfo.pid = NETLINK_CB(skb).pid;
2278 	cfg->fc_nlinfo.nlh = nlh;
2279 	cfg->fc_nlinfo.nl_net = sock_net(skb->sk);
2280 
2281 	if (tb[RTA_GATEWAY]) {
2282 		nla_memcpy(&cfg->fc_gateway, tb[RTA_GATEWAY], 16);
2283 		cfg->fc_flags |= RTF_GATEWAY;
2284 	}
2285 
2286 	if (tb[RTA_DST]) {
2287 		int plen = (rtm->rtm_dst_len + 7) >> 3;
2288 
2289 		if (nla_len(tb[RTA_DST]) < plen)
2290 			goto errout;
2291 
2292 		nla_memcpy(&cfg->fc_dst, tb[RTA_DST], plen);
2293 	}
2294 
2295 	if (tb[RTA_SRC]) {
2296 		int plen = (rtm->rtm_src_len + 7) >> 3;
2297 
2298 		if (nla_len(tb[RTA_SRC]) < plen)
2299 			goto errout;
2300 
2301 		nla_memcpy(&cfg->fc_src, tb[RTA_SRC], plen);
2302 	}
2303 
2304 	if (tb[RTA_PREFSRC])
2305 		nla_memcpy(&cfg->fc_prefsrc, tb[RTA_PREFSRC], 16);
2306 
2307 	if (tb[RTA_OIF])
2308 		cfg->fc_ifindex = nla_get_u32(tb[RTA_OIF]);
2309 
2310 	if (tb[RTA_PRIORITY])
2311 		cfg->fc_metric = nla_get_u32(tb[RTA_PRIORITY]);
2312 
2313 	if (tb[RTA_METRICS]) {
2314 		cfg->fc_mx = nla_data(tb[RTA_METRICS]);
2315 		cfg->fc_mx_len = nla_len(tb[RTA_METRICS]);
2316 	}
2317 
2318 	if (tb[RTA_TABLE])
2319 		cfg->fc_table = nla_get_u32(tb[RTA_TABLE]);
2320 
2321 	err = 0;
2322 errout:
2323 	return err;
2324 }
2325 
2326 static int inet6_rtm_delroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
2327 {
2328 	struct fib6_config cfg;
2329 	int err;
2330 
2331 	err = rtm_to_fib6_config(skb, nlh, &cfg);
2332 	if (err < 0)
2333 		return err;
2334 
2335 	return ip6_route_del(&cfg);
2336 }
2337 
2338 static int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
2339 {
2340 	struct fib6_config cfg;
2341 	int err;
2342 
2343 	err = rtm_to_fib6_config(skb, nlh, &cfg);
2344 	if (err < 0)
2345 		return err;
2346 
2347 	return ip6_route_add(&cfg);
2348 }
2349 
2350 static inline size_t rt6_nlmsg_size(void)
2351 {
2352 	return NLMSG_ALIGN(sizeof(struct rtmsg))
2353 	       + nla_total_size(16) /* RTA_SRC */
2354 	       + nla_total_size(16) /* RTA_DST */
2355 	       + nla_total_size(16) /* RTA_GATEWAY */
2356 	       + nla_total_size(16) /* RTA_PREFSRC */
2357 	       + nla_total_size(4) /* RTA_TABLE */
2358 	       + nla_total_size(4) /* RTA_IIF */
2359 	       + nla_total_size(4) /* RTA_OIF */
2360 	       + nla_total_size(4) /* RTA_PRIORITY */
2361 	       + RTAX_MAX * nla_total_size(4) /* RTA_METRICS */
2362 	       + nla_total_size(sizeof(struct rta_cacheinfo));
2363 }
2364 
2365 static int rt6_fill_node(struct net *net,
2366 			 struct sk_buff *skb, struct rt6_info *rt,
2367 			 struct in6_addr *dst, struct in6_addr *src,
2368 			 int iif, int type, u32 pid, u32 seq,
2369 			 int prefix, int nowait, unsigned int flags)
2370 {
2371 	struct rtmsg *rtm;
2372 	struct nlmsghdr *nlh;
2373 	long expires;
2374 	u32 table;
2375 	struct neighbour *n;
2376 
2377 	if (prefix) {	/* user wants prefix routes only */
2378 		if (!(rt->rt6i_flags & RTF_PREFIX_RT)) {
2379 			/* success since this is not a prefix route */
2380 			return 1;
2381 		}
2382 	}
2383 
2384 	nlh = nlmsg_put(skb, pid, seq, type, sizeof(*rtm), flags);
2385 	if (!nlh)
2386 		return -EMSGSIZE;
2387 
2388 	rtm = nlmsg_data(nlh);
2389 	rtm->rtm_family = AF_INET6;
2390 	rtm->rtm_dst_len = rt->rt6i_dst.plen;
2391 	rtm->rtm_src_len = rt->rt6i_src.plen;
2392 	rtm->rtm_tos = 0;
2393 	if (rt->rt6i_table)
2394 		table = rt->rt6i_table->tb6_id;
2395 	else
2396 		table = RT6_TABLE_UNSPEC;
2397 	rtm->rtm_table = table;
2398 	NLA_PUT_U32(skb, RTA_TABLE, table);
2399 	if (rt->rt6i_flags & RTF_REJECT)
2400 		rtm->rtm_type = RTN_UNREACHABLE;
2401 	else if (rt->rt6i_flags & RTF_LOCAL)
2402 		rtm->rtm_type = RTN_LOCAL;
2403 	else if (rt->rt6i_dev && (rt->rt6i_dev->flags & IFF_LOOPBACK))
2404 		rtm->rtm_type = RTN_LOCAL;
2405 	else
2406 		rtm->rtm_type = RTN_UNICAST;
2407 	rtm->rtm_flags = 0;
2408 	rtm->rtm_scope = RT_SCOPE_UNIVERSE;
2409 	rtm->rtm_protocol = rt->rt6i_protocol;
2410 	if (rt->rt6i_flags & RTF_DYNAMIC)
2411 		rtm->rtm_protocol = RTPROT_REDIRECT;
2412 	else if (rt->rt6i_flags & RTF_ADDRCONF)
2413 		rtm->rtm_protocol = RTPROT_KERNEL;
2414 	else if (rt->rt6i_flags & RTF_DEFAULT)
2415 		rtm->rtm_protocol = RTPROT_RA;
2416 
2417 	if (rt->rt6i_flags & RTF_CACHE)
2418 		rtm->rtm_flags |= RTM_F_CLONED;
2419 
2420 	if (dst) {
2421 		NLA_PUT(skb, RTA_DST, 16, dst);
2422 		rtm->rtm_dst_len = 128;
2423 	} else if (rtm->rtm_dst_len)
2424 		NLA_PUT(skb, RTA_DST, 16, &rt->rt6i_dst.addr);
2425 #ifdef CONFIG_IPV6_SUBTREES
2426 	if (src) {
2427 		NLA_PUT(skb, RTA_SRC, 16, src);
2428 		rtm->rtm_src_len = 128;
2429 	} else if (rtm->rtm_src_len)
2430 		NLA_PUT(skb, RTA_SRC, 16, &rt->rt6i_src.addr);
2431 #endif
2432 	if (iif) {
2433 #ifdef CONFIG_IPV6_MROUTE
2434 		if (ipv6_addr_is_multicast(&rt->rt6i_dst.addr)) {
2435 			int err = ip6mr_get_route(net, skb, rtm, nowait);
2436 			if (err <= 0) {
2437 				if (!nowait) {
2438 					if (err == 0)
2439 						return 0;
2440 					goto nla_put_failure;
2441 				} else {
2442 					if (err == -EMSGSIZE)
2443 						goto nla_put_failure;
2444 				}
2445 			}
2446 		} else
2447 #endif
2448 			NLA_PUT_U32(skb, RTA_IIF, iif);
2449 	} else if (dst) {
2450 		struct in6_addr saddr_buf;
2451 		if (ip6_route_get_saddr(net, rt, dst, 0, &saddr_buf) == 0)
2452 			NLA_PUT(skb, RTA_PREFSRC, 16, &saddr_buf);
2453 	}
2454 
2455 	if (rt->rt6i_prefsrc.plen) {
2456 		struct in6_addr saddr_buf;
2457 		saddr_buf = rt->rt6i_prefsrc.addr;
2458 		NLA_PUT(skb, RTA_PREFSRC, 16, &saddr_buf);
2459 	}
2460 
2461 	if (rtnetlink_put_metrics(skb, dst_metrics_ptr(&rt->dst)) < 0)
2462 		goto nla_put_failure;
2463 
2464 	rcu_read_lock();
2465 	n = dst_get_neighbour_noref(&rt->dst);
2466 	if (n)
2467 		NLA_PUT(skb, RTA_GATEWAY, 16, &n->primary_key);
2468 	rcu_read_unlock();
2469 
2470 	if (rt->dst.dev)
2471 		NLA_PUT_U32(skb, RTA_OIF, rt->rt6i_dev->ifindex);
2472 
2473 	NLA_PUT_U32(skb, RTA_PRIORITY, rt->rt6i_metric);
2474 
2475 	if (!(rt->rt6i_flags & RTF_EXPIRES))
2476 		expires = 0;
2477 	else if (rt->rt6i_expires - jiffies < INT_MAX)
2478 		expires = rt->rt6i_expires - jiffies;
2479 	else
2480 		expires = INT_MAX;
2481 
2482 	if (rtnl_put_cacheinfo(skb, &rt->dst, 0, 0, 0,
2483 			       expires, rt->dst.error) < 0)
2484 		goto nla_put_failure;
2485 
2486 	return nlmsg_end(skb, nlh);
2487 
2488 nla_put_failure:
2489 	nlmsg_cancel(skb, nlh);
2490 	return -EMSGSIZE;
2491 }
2492 
2493 int rt6_dump_route(struct rt6_info *rt, void *p_arg)
2494 {
2495 	struct rt6_rtnl_dump_arg *arg = (struct rt6_rtnl_dump_arg *) p_arg;
2496 	int prefix;
2497 
2498 	if (nlmsg_len(arg->cb->nlh) >= sizeof(struct rtmsg)) {
2499 		struct rtmsg *rtm = nlmsg_data(arg->cb->nlh);
2500 		prefix = (rtm->rtm_flags & RTM_F_PREFIX) != 0;
2501 	} else
2502 		prefix = 0;
2503 
2504 	return rt6_fill_node(arg->net,
2505 		     arg->skb, rt, NULL, NULL, 0, RTM_NEWROUTE,
2506 		     NETLINK_CB(arg->cb->skb).pid, arg->cb->nlh->nlmsg_seq,
2507 		     prefix, 0, NLM_F_MULTI);
2508 }
2509 
2510 static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void *arg)
2511 {
2512 	struct net *net = sock_net(in_skb->sk);
2513 	struct nlattr *tb[RTA_MAX+1];
2514 	struct rt6_info *rt;
2515 	struct sk_buff *skb;
2516 	struct rtmsg *rtm;
2517 	struct flowi6 fl6;
2518 	int err, iif = 0;
2519 
2520 	err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy);
2521 	if (err < 0)
2522 		goto errout;
2523 
2524 	err = -EINVAL;
2525 	memset(&fl6, 0, sizeof(fl6));
2526 
2527 	if (tb[RTA_SRC]) {
2528 		if (nla_len(tb[RTA_SRC]) < sizeof(struct in6_addr))
2529 			goto errout;
2530 
2531 		fl6.saddr = *(struct in6_addr *)nla_data(tb[RTA_SRC]);
2532 	}
2533 
2534 	if (tb[RTA_DST]) {
2535 		if (nla_len(tb[RTA_DST]) < sizeof(struct in6_addr))
2536 			goto errout;
2537 
2538 		fl6.daddr = *(struct in6_addr *)nla_data(tb[RTA_DST]);
2539 	}
2540 
2541 	if (tb[RTA_IIF])
2542 		iif = nla_get_u32(tb[RTA_IIF]);
2543 
2544 	if (tb[RTA_OIF])
2545 		fl6.flowi6_oif = nla_get_u32(tb[RTA_OIF]);
2546 
2547 	if (iif) {
2548 		struct net_device *dev;
2549 		dev = __dev_get_by_index(net, iif);
2550 		if (!dev) {
2551 			err = -ENODEV;
2552 			goto errout;
2553 		}
2554 	}
2555 
2556 	skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
2557 	if (!skb) {
2558 		err = -ENOBUFS;
2559 		goto errout;
2560 	}
2561 
2562 	/* Reserve room for dummy headers, this skb can pass
2563 	   through good chunk of routing engine.
2564 	 */
2565 	skb_reset_mac_header(skb);
2566 	skb_reserve(skb, MAX_HEADER + sizeof(struct ipv6hdr));
2567 
2568 	rt = (struct rt6_info*) ip6_route_output(net, NULL, &fl6);
2569 	skb_dst_set(skb, &rt->dst);
2570 
2571 	err = rt6_fill_node(net, skb, rt, &fl6.daddr, &fl6.saddr, iif,
2572 			    RTM_NEWROUTE, NETLINK_CB(in_skb).pid,
2573 			    nlh->nlmsg_seq, 0, 0, 0);
2574 	if (err < 0) {
2575 		kfree_skb(skb);
2576 		goto errout;
2577 	}
2578 
2579 	err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).pid);
2580 errout:
2581 	return err;
2582 }
2583 
2584 void inet6_rt_notify(int event, struct rt6_info *rt, struct nl_info *info)
2585 {
2586 	struct sk_buff *skb;
2587 	struct net *net = info->nl_net;
2588 	u32 seq;
2589 	int err;
2590 
2591 	err = -ENOBUFS;
2592 	seq = info->nlh ? info->nlh->nlmsg_seq : 0;
2593 
2594 	skb = nlmsg_new(rt6_nlmsg_size(), gfp_any());
2595 	if (!skb)
2596 		goto errout;
2597 
2598 	err = rt6_fill_node(net, skb, rt, NULL, NULL, 0,
2599 				event, info->pid, seq, 0, 0, 0);
2600 	if (err < 0) {
2601 		/* -EMSGSIZE implies BUG in rt6_nlmsg_size() */
2602 		WARN_ON(err == -EMSGSIZE);
2603 		kfree_skb(skb);
2604 		goto errout;
2605 	}
2606 	rtnl_notify(skb, net, info->pid, RTNLGRP_IPV6_ROUTE,
2607 		    info->nlh, gfp_any());
2608 	return;
2609 errout:
2610 	if (err < 0)
2611 		rtnl_set_sk_err(net, RTNLGRP_IPV6_ROUTE, err);
2612 }
2613 
2614 static int ip6_route_dev_notify(struct notifier_block *this,
2615 				unsigned long event, void *data)
2616 {
2617 	struct net_device *dev = (struct net_device *)data;
2618 	struct net *net = dev_net(dev);
2619 
2620 	if (event == NETDEV_REGISTER && (dev->flags & IFF_LOOPBACK)) {
2621 		net->ipv6.ip6_null_entry->dst.dev = dev;
2622 		net->ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(dev);
2623 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
2624 		net->ipv6.ip6_prohibit_entry->dst.dev = dev;
2625 		net->ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(dev);
2626 		net->ipv6.ip6_blk_hole_entry->dst.dev = dev;
2627 		net->ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(dev);
2628 #endif
2629 	}
2630 
2631 	return NOTIFY_OK;
2632 }
2633 
2634 /*
2635  *	/proc
2636  */
2637 
2638 #ifdef CONFIG_PROC_FS
2639 
2640 struct rt6_proc_arg
2641 {
2642 	char *buffer;
2643 	int offset;
2644 	int length;
2645 	int skip;
2646 	int len;
2647 };
2648 
2649 static int rt6_info_route(struct rt6_info *rt, void *p_arg)
2650 {
2651 	struct seq_file *m = p_arg;
2652 	struct neighbour *n;
2653 
2654 	seq_printf(m, "%pi6 %02x ", &rt->rt6i_dst.addr, rt->rt6i_dst.plen);
2655 
2656 #ifdef CONFIG_IPV6_SUBTREES
2657 	seq_printf(m, "%pi6 %02x ", &rt->rt6i_src.addr, rt->rt6i_src.plen);
2658 #else
2659 	seq_puts(m, "00000000000000000000000000000000 00 ");
2660 #endif
2661 	rcu_read_lock();
2662 	n = dst_get_neighbour_noref(&rt->dst);
2663 	if (n) {
2664 		seq_printf(m, "%pi6", n->primary_key);
2665 	} else {
2666 		seq_puts(m, "00000000000000000000000000000000");
2667 	}
2668 	rcu_read_unlock();
2669 	seq_printf(m, " %08x %08x %08x %08x %8s\n",
2670 		   rt->rt6i_metric, atomic_read(&rt->dst.__refcnt),
2671 		   rt->dst.__use, rt->rt6i_flags,
2672 		   rt->rt6i_dev ? rt->rt6i_dev->name : "");
2673 	return 0;
2674 }
2675 
2676 static int ipv6_route_show(struct seq_file *m, void *v)
2677 {
2678 	struct net *net = (struct net *)m->private;
2679 	fib6_clean_all(net, rt6_info_route, 0, m);
2680 	return 0;
2681 }
2682 
2683 static int ipv6_route_open(struct inode *inode, struct file *file)
2684 {
2685 	return single_open_net(inode, file, ipv6_route_show);
2686 }
2687 
2688 static const struct file_operations ipv6_route_proc_fops = {
2689 	.owner		= THIS_MODULE,
2690 	.open		= ipv6_route_open,
2691 	.read		= seq_read,
2692 	.llseek		= seq_lseek,
2693 	.release	= single_release_net,
2694 };
2695 
2696 static int rt6_stats_seq_show(struct seq_file *seq, void *v)
2697 {
2698 	struct net *net = (struct net *)seq->private;
2699 	seq_printf(seq, "%04x %04x %04x %04x %04x %04x %04x\n",
2700 		   net->ipv6.rt6_stats->fib_nodes,
2701 		   net->ipv6.rt6_stats->fib_route_nodes,
2702 		   net->ipv6.rt6_stats->fib_rt_alloc,
2703 		   net->ipv6.rt6_stats->fib_rt_entries,
2704 		   net->ipv6.rt6_stats->fib_rt_cache,
2705 		   dst_entries_get_slow(&net->ipv6.ip6_dst_ops),
2706 		   net->ipv6.rt6_stats->fib_discarded_routes);
2707 
2708 	return 0;
2709 }
2710 
2711 static int rt6_stats_seq_open(struct inode *inode, struct file *file)
2712 {
2713 	return single_open_net(inode, file, rt6_stats_seq_show);
2714 }
2715 
2716 static const struct file_operations rt6_stats_seq_fops = {
2717 	.owner	 = THIS_MODULE,
2718 	.open	 = rt6_stats_seq_open,
2719 	.read	 = seq_read,
2720 	.llseek	 = seq_lseek,
2721 	.release = single_release_net,
2722 };
2723 #endif	/* CONFIG_PROC_FS */
2724 
2725 #ifdef CONFIG_SYSCTL
2726 
2727 static
2728 int ipv6_sysctl_rtcache_flush(ctl_table *ctl, int write,
2729 			      void __user *buffer, size_t *lenp, loff_t *ppos)
2730 {
2731 	struct net *net;
2732 	int delay;
2733 	if (!write)
2734 		return -EINVAL;
2735 
2736 	net = (struct net *)ctl->extra1;
2737 	delay = net->ipv6.sysctl.flush_delay;
2738 	proc_dointvec(ctl, write, buffer, lenp, ppos);
2739 	fib6_run_gc(delay <= 0 ? ~0UL : (unsigned long)delay, net);
2740 	return 0;
2741 }
2742 
2743 ctl_table ipv6_route_table_template[] = {
2744 	{
2745 		.procname	=	"flush",
2746 		.data		=	&init_net.ipv6.sysctl.flush_delay,
2747 		.maxlen		=	sizeof(int),
2748 		.mode		=	0200,
2749 		.proc_handler	=	ipv6_sysctl_rtcache_flush
2750 	},
2751 	{
2752 		.procname	=	"gc_thresh",
2753 		.data		=	&ip6_dst_ops_template.gc_thresh,
2754 		.maxlen		=	sizeof(int),
2755 		.mode		=	0644,
2756 		.proc_handler	=	proc_dointvec,
2757 	},
2758 	{
2759 		.procname	=	"max_size",
2760 		.data		=	&init_net.ipv6.sysctl.ip6_rt_max_size,
2761 		.maxlen		=	sizeof(int),
2762 		.mode		=	0644,
2763 		.proc_handler	=	proc_dointvec,
2764 	},
2765 	{
2766 		.procname	=	"gc_min_interval",
2767 		.data		=	&init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
2768 		.maxlen		=	sizeof(int),
2769 		.mode		=	0644,
2770 		.proc_handler	=	proc_dointvec_jiffies,
2771 	},
2772 	{
2773 		.procname	=	"gc_timeout",
2774 		.data		=	&init_net.ipv6.sysctl.ip6_rt_gc_timeout,
2775 		.maxlen		=	sizeof(int),
2776 		.mode		=	0644,
2777 		.proc_handler	=	proc_dointvec_jiffies,
2778 	},
2779 	{
2780 		.procname	=	"gc_interval",
2781 		.data		=	&init_net.ipv6.sysctl.ip6_rt_gc_interval,
2782 		.maxlen		=	sizeof(int),
2783 		.mode		=	0644,
2784 		.proc_handler	=	proc_dointvec_jiffies,
2785 	},
2786 	{
2787 		.procname	=	"gc_elasticity",
2788 		.data		=	&init_net.ipv6.sysctl.ip6_rt_gc_elasticity,
2789 		.maxlen		=	sizeof(int),
2790 		.mode		=	0644,
2791 		.proc_handler	=	proc_dointvec,
2792 	},
2793 	{
2794 		.procname	=	"mtu_expires",
2795 		.data		=	&init_net.ipv6.sysctl.ip6_rt_mtu_expires,
2796 		.maxlen		=	sizeof(int),
2797 		.mode		=	0644,
2798 		.proc_handler	=	proc_dointvec_jiffies,
2799 	},
2800 	{
2801 		.procname	=	"min_adv_mss",
2802 		.data		=	&init_net.ipv6.sysctl.ip6_rt_min_advmss,
2803 		.maxlen		=	sizeof(int),
2804 		.mode		=	0644,
2805 		.proc_handler	=	proc_dointvec,
2806 	},
2807 	{
2808 		.procname	=	"gc_min_interval_ms",
2809 		.data		=	&init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
2810 		.maxlen		=	sizeof(int),
2811 		.mode		=	0644,
2812 		.proc_handler	=	proc_dointvec_ms_jiffies,
2813 	},
2814 	{ }
2815 };
2816 
2817 struct ctl_table * __net_init ipv6_route_sysctl_init(struct net *net)
2818 {
2819 	struct ctl_table *table;
2820 
2821 	table = kmemdup(ipv6_route_table_template,
2822 			sizeof(ipv6_route_table_template),
2823 			GFP_KERNEL);
2824 
2825 	if (table) {
2826 		table[0].data = &net->ipv6.sysctl.flush_delay;
2827 		table[0].extra1 = net;
2828 		table[1].data = &net->ipv6.ip6_dst_ops.gc_thresh;
2829 		table[2].data = &net->ipv6.sysctl.ip6_rt_max_size;
2830 		table[3].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
2831 		table[4].data = &net->ipv6.sysctl.ip6_rt_gc_timeout;
2832 		table[5].data = &net->ipv6.sysctl.ip6_rt_gc_interval;
2833 		table[6].data = &net->ipv6.sysctl.ip6_rt_gc_elasticity;
2834 		table[7].data = &net->ipv6.sysctl.ip6_rt_mtu_expires;
2835 		table[8].data = &net->ipv6.sysctl.ip6_rt_min_advmss;
2836 		table[9].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
2837 	}
2838 
2839 	return table;
2840 }
2841 #endif
2842 
2843 static int __net_init ip6_route_net_init(struct net *net)
2844 {
2845 	int ret = -ENOMEM;
2846 
2847 	memcpy(&net->ipv6.ip6_dst_ops, &ip6_dst_ops_template,
2848 	       sizeof(net->ipv6.ip6_dst_ops));
2849 
2850 	if (dst_entries_init(&net->ipv6.ip6_dst_ops) < 0)
2851 		goto out_ip6_dst_ops;
2852 
2853 	net->ipv6.ip6_null_entry = kmemdup(&ip6_null_entry_template,
2854 					   sizeof(*net->ipv6.ip6_null_entry),
2855 					   GFP_KERNEL);
2856 	if (!net->ipv6.ip6_null_entry)
2857 		goto out_ip6_dst_entries;
2858 	net->ipv6.ip6_null_entry->dst.path =
2859 		(struct dst_entry *)net->ipv6.ip6_null_entry;
2860 	net->ipv6.ip6_null_entry->dst.ops = &net->ipv6.ip6_dst_ops;
2861 	dst_init_metrics(&net->ipv6.ip6_null_entry->dst,
2862 			 ip6_template_metrics, true);
2863 
2864 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
2865 	net->ipv6.ip6_prohibit_entry = kmemdup(&ip6_prohibit_entry_template,
2866 					       sizeof(*net->ipv6.ip6_prohibit_entry),
2867 					       GFP_KERNEL);
2868 	if (!net->ipv6.ip6_prohibit_entry)
2869 		goto out_ip6_null_entry;
2870 	net->ipv6.ip6_prohibit_entry->dst.path =
2871 		(struct dst_entry *)net->ipv6.ip6_prohibit_entry;
2872 	net->ipv6.ip6_prohibit_entry->dst.ops = &net->ipv6.ip6_dst_ops;
2873 	dst_init_metrics(&net->ipv6.ip6_prohibit_entry->dst,
2874 			 ip6_template_metrics, true);
2875 
2876 	net->ipv6.ip6_blk_hole_entry = kmemdup(&ip6_blk_hole_entry_template,
2877 					       sizeof(*net->ipv6.ip6_blk_hole_entry),
2878 					       GFP_KERNEL);
2879 	if (!net->ipv6.ip6_blk_hole_entry)
2880 		goto out_ip6_prohibit_entry;
2881 	net->ipv6.ip6_blk_hole_entry->dst.path =
2882 		(struct dst_entry *)net->ipv6.ip6_blk_hole_entry;
2883 	net->ipv6.ip6_blk_hole_entry->dst.ops = &net->ipv6.ip6_dst_ops;
2884 	dst_init_metrics(&net->ipv6.ip6_blk_hole_entry->dst,
2885 			 ip6_template_metrics, true);
2886 #endif
2887 
2888 	net->ipv6.sysctl.flush_delay = 0;
2889 	net->ipv6.sysctl.ip6_rt_max_size = 4096;
2890 	net->ipv6.sysctl.ip6_rt_gc_min_interval = HZ / 2;
2891 	net->ipv6.sysctl.ip6_rt_gc_timeout = 60*HZ;
2892 	net->ipv6.sysctl.ip6_rt_gc_interval = 30*HZ;
2893 	net->ipv6.sysctl.ip6_rt_gc_elasticity = 9;
2894 	net->ipv6.sysctl.ip6_rt_mtu_expires = 10*60*HZ;
2895 	net->ipv6.sysctl.ip6_rt_min_advmss = IPV6_MIN_MTU - 20 - 40;
2896 
2897 #ifdef CONFIG_PROC_FS
2898 	proc_net_fops_create(net, "ipv6_route", 0, &ipv6_route_proc_fops);
2899 	proc_net_fops_create(net, "rt6_stats", S_IRUGO, &rt6_stats_seq_fops);
2900 #endif
2901 	net->ipv6.ip6_rt_gc_expire = 30*HZ;
2902 
2903 	ret = 0;
2904 out:
2905 	return ret;
2906 
2907 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
2908 out_ip6_prohibit_entry:
2909 	kfree(net->ipv6.ip6_prohibit_entry);
2910 out_ip6_null_entry:
2911 	kfree(net->ipv6.ip6_null_entry);
2912 #endif
2913 out_ip6_dst_entries:
2914 	dst_entries_destroy(&net->ipv6.ip6_dst_ops);
2915 out_ip6_dst_ops:
2916 	goto out;
2917 }
2918 
2919 static void __net_exit ip6_route_net_exit(struct net *net)
2920 {
2921 #ifdef CONFIG_PROC_FS
2922 	proc_net_remove(net, "ipv6_route");
2923 	proc_net_remove(net, "rt6_stats");
2924 #endif
2925 	kfree(net->ipv6.ip6_null_entry);
2926 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
2927 	kfree(net->ipv6.ip6_prohibit_entry);
2928 	kfree(net->ipv6.ip6_blk_hole_entry);
2929 #endif
2930 	dst_entries_destroy(&net->ipv6.ip6_dst_ops);
2931 }
2932 
2933 static struct pernet_operations ip6_route_net_ops = {
2934 	.init = ip6_route_net_init,
2935 	.exit = ip6_route_net_exit,
2936 };
2937 
2938 static struct notifier_block ip6_route_dev_notifier = {
2939 	.notifier_call = ip6_route_dev_notify,
2940 	.priority = 0,
2941 };
2942 
2943 int __init ip6_route_init(void)
2944 {
2945 	int ret;
2946 
2947 	ret = -ENOMEM;
2948 	ip6_dst_ops_template.kmem_cachep =
2949 		kmem_cache_create("ip6_dst_cache", sizeof(struct rt6_info), 0,
2950 				  SLAB_HWCACHE_ALIGN, NULL);
2951 	if (!ip6_dst_ops_template.kmem_cachep)
2952 		goto out;
2953 
2954 	ret = dst_entries_init(&ip6_dst_blackhole_ops);
2955 	if (ret)
2956 		goto out_kmem_cache;
2957 
2958 	ret = register_pernet_subsys(&ip6_route_net_ops);
2959 	if (ret)
2960 		goto out_dst_entries;
2961 
2962 	ip6_dst_blackhole_ops.kmem_cachep = ip6_dst_ops_template.kmem_cachep;
2963 
2964 	/* Registering of the loopback is done before this portion of code,
2965 	 * the loopback reference in rt6_info will not be taken, do it
2966 	 * manually for init_net */
2967 	init_net.ipv6.ip6_null_entry->dst.dev = init_net.loopback_dev;
2968 	init_net.ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
2969   #ifdef CONFIG_IPV6_MULTIPLE_TABLES
2970 	init_net.ipv6.ip6_prohibit_entry->dst.dev = init_net.loopback_dev;
2971 	init_net.ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
2972 	init_net.ipv6.ip6_blk_hole_entry->dst.dev = init_net.loopback_dev;
2973 	init_net.ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
2974   #endif
2975 	ret = fib6_init();
2976 	if (ret)
2977 		goto out_register_subsys;
2978 
2979 	ret = xfrm6_init();
2980 	if (ret)
2981 		goto out_fib6_init;
2982 
2983 	ret = fib6_rules_init();
2984 	if (ret)
2985 		goto xfrm6_init;
2986 
2987 	ret = -ENOBUFS;
2988 	if (__rtnl_register(PF_INET6, RTM_NEWROUTE, inet6_rtm_newroute, NULL, NULL) ||
2989 	    __rtnl_register(PF_INET6, RTM_DELROUTE, inet6_rtm_delroute, NULL, NULL) ||
2990 	    __rtnl_register(PF_INET6, RTM_GETROUTE, inet6_rtm_getroute, NULL, NULL))
2991 		goto fib6_rules_init;
2992 
2993 	ret = register_netdevice_notifier(&ip6_route_dev_notifier);
2994 	if (ret)
2995 		goto fib6_rules_init;
2996 
2997 out:
2998 	return ret;
2999 
3000 fib6_rules_init:
3001 	fib6_rules_cleanup();
3002 xfrm6_init:
3003 	xfrm6_fini();
3004 out_fib6_init:
3005 	fib6_gc_cleanup();
3006 out_register_subsys:
3007 	unregister_pernet_subsys(&ip6_route_net_ops);
3008 out_dst_entries:
3009 	dst_entries_destroy(&ip6_dst_blackhole_ops);
3010 out_kmem_cache:
3011 	kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);
3012 	goto out;
3013 }
3014 
3015 void ip6_route_cleanup(void)
3016 {
3017 	unregister_netdevice_notifier(&ip6_route_dev_notifier);
3018 	fib6_rules_cleanup();
3019 	xfrm6_fini();
3020 	fib6_gc_cleanup();
3021 	unregister_pernet_subsys(&ip6_route_net_ops);
3022 	dst_entries_destroy(&ip6_dst_blackhole_ops);
3023 	kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);
3024 }
3025