xref: /linux/net/ipv6/route.c (revision 44b111b519160e33fdc41eadb39af86a24707edf)
1 /*
2  *	Linux INET6 implementation
3  *	FIB front-end.
4  *
5  *	Authors:
6  *	Pedro Roque		<roque@di.fc.ul.pt>
7  *
8  *	This program is free software; you can redistribute it and/or
9  *      modify it under the terms of the GNU General Public License
10  *      as published by the Free Software Foundation; either version
11  *      2 of the License, or (at your option) any later version.
12  */
13 
14 /*	Changes:
15  *
16  *	YOSHIFUJI Hideaki @USAGI
17  *		reworked default router selection.
18  *		- respect outgoing interface
19  *		- select from (probably) reachable routers (i.e.
20  *		routers in REACHABLE, STALE, DELAY or PROBE states).
21  *		- always select the same router if it is (probably)
22  *		reachable.  otherwise, round-robin the list.
23  *	Ville Nuorvala
24  *		Fixed routing subtrees.
25  */
26 
27 #include <linux/capability.h>
28 #include <linux/errno.h>
29 #include <linux/export.h>
30 #include <linux/types.h>
31 #include <linux/times.h>
32 #include <linux/socket.h>
33 #include <linux/sockios.h>
34 #include <linux/net.h>
35 #include <linux/route.h>
36 #include <linux/netdevice.h>
37 #include <linux/in6.h>
38 #include <linux/mroute6.h>
39 #include <linux/init.h>
40 #include <linux/if_arp.h>
41 #include <linux/proc_fs.h>
42 #include <linux/seq_file.h>
43 #include <linux/nsproxy.h>
44 #include <linux/slab.h>
45 #include <net/net_namespace.h>
46 #include <net/snmp.h>
47 #include <net/ipv6.h>
48 #include <net/ip6_fib.h>
49 #include <net/ip6_route.h>
50 #include <net/ndisc.h>
51 #include <net/addrconf.h>
52 #include <net/tcp.h>
53 #include <linux/rtnetlink.h>
54 #include <net/dst.h>
55 #include <net/xfrm.h>
56 #include <net/netevent.h>
57 #include <net/netlink.h>
58 
59 #include <asm/uaccess.h>
60 
61 #ifdef CONFIG_SYSCTL
62 #include <linux/sysctl.h>
63 #endif
64 
65 /* Set to 3 to get tracing. */
66 #define RT6_DEBUG 2
67 
68 #if RT6_DEBUG >= 3
69 #define RDBG(x) printk x
70 #define RT6_TRACE(x...) printk(KERN_DEBUG x)
71 #else
72 #define RDBG(x)
73 #define RT6_TRACE(x...) do { ; } while (0)
74 #endif
75 
76 static struct rt6_info *ip6_rt_copy(const struct rt6_info *ort,
77 				    const struct in6_addr *dest);
78 static struct dst_entry	*ip6_dst_check(struct dst_entry *dst, u32 cookie);
79 static unsigned int	 ip6_default_advmss(const struct dst_entry *dst);
80 static unsigned int	 ip6_mtu(const struct dst_entry *dst);
81 static struct dst_entry *ip6_negative_advice(struct dst_entry *);
82 static void		ip6_dst_destroy(struct dst_entry *);
83 static void		ip6_dst_ifdown(struct dst_entry *,
84 				       struct net_device *dev, int how);
85 static int		 ip6_dst_gc(struct dst_ops *ops);
86 
87 static int		ip6_pkt_discard(struct sk_buff *skb);
88 static int		ip6_pkt_discard_out(struct sk_buff *skb);
89 static void		ip6_link_failure(struct sk_buff *skb);
90 static void		ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu);
91 
92 #ifdef CONFIG_IPV6_ROUTE_INFO
93 static struct rt6_info *rt6_add_route_info(struct net *net,
94 					   const struct in6_addr *prefix, int prefixlen,
95 					   const struct in6_addr *gwaddr, int ifindex,
96 					   unsigned pref);
97 static struct rt6_info *rt6_get_route_info(struct net *net,
98 					   const struct in6_addr *prefix, int prefixlen,
99 					   const struct in6_addr *gwaddr, int ifindex);
100 #endif
101 
102 static u32 *ipv6_cow_metrics(struct dst_entry *dst, unsigned long old)
103 {
104 	struct rt6_info *rt = (struct rt6_info *) dst;
105 	struct inet_peer *peer;
106 	u32 *p = NULL;
107 
108 	if (!(rt->dst.flags & DST_HOST))
109 		return NULL;
110 
111 	if (!rt->rt6i_peer)
112 		rt6_bind_peer(rt, 1);
113 
114 	peer = rt->rt6i_peer;
115 	if (peer) {
116 		u32 *old_p = __DST_METRICS_PTR(old);
117 		unsigned long prev, new;
118 
119 		p = peer->metrics;
120 		if (inet_metrics_new(peer))
121 			memcpy(p, old_p, sizeof(u32) * RTAX_MAX);
122 
123 		new = (unsigned long) p;
124 		prev = cmpxchg(&dst->_metrics, old, new);
125 
126 		if (prev != old) {
127 			p = __DST_METRICS_PTR(prev);
128 			if (prev & DST_METRICS_READ_ONLY)
129 				p = NULL;
130 		}
131 	}
132 	return p;
133 }
134 
135 static struct neighbour *ip6_neigh_lookup(const struct dst_entry *dst, const void *daddr)
136 {
137 	return __neigh_lookup_errno(&nd_tbl, daddr, dst->dev);
138 }
139 
140 static struct dst_ops ip6_dst_ops_template = {
141 	.family			=	AF_INET6,
142 	.protocol		=	cpu_to_be16(ETH_P_IPV6),
143 	.gc			=	ip6_dst_gc,
144 	.gc_thresh		=	1024,
145 	.check			=	ip6_dst_check,
146 	.default_advmss		=	ip6_default_advmss,
147 	.mtu			=	ip6_mtu,
148 	.cow_metrics		=	ipv6_cow_metrics,
149 	.destroy		=	ip6_dst_destroy,
150 	.ifdown			=	ip6_dst_ifdown,
151 	.negative_advice	=	ip6_negative_advice,
152 	.link_failure		=	ip6_link_failure,
153 	.update_pmtu		=	ip6_rt_update_pmtu,
154 	.local_out		=	__ip6_local_out,
155 	.neigh_lookup		=	ip6_neigh_lookup,
156 };
157 
158 static unsigned int ip6_blackhole_mtu(const struct dst_entry *dst)
159 {
160 	unsigned int mtu = dst_metric_raw(dst, RTAX_MTU);
161 
162 	return mtu ? : dst->dev->mtu;
163 }
164 
165 static void ip6_rt_blackhole_update_pmtu(struct dst_entry *dst, u32 mtu)
166 {
167 }
168 
169 static u32 *ip6_rt_blackhole_cow_metrics(struct dst_entry *dst,
170 					 unsigned long old)
171 {
172 	return NULL;
173 }
174 
175 static struct dst_ops ip6_dst_blackhole_ops = {
176 	.family			=	AF_INET6,
177 	.protocol		=	cpu_to_be16(ETH_P_IPV6),
178 	.destroy		=	ip6_dst_destroy,
179 	.check			=	ip6_dst_check,
180 	.mtu			=	ip6_blackhole_mtu,
181 	.default_advmss		=	ip6_default_advmss,
182 	.update_pmtu		=	ip6_rt_blackhole_update_pmtu,
183 	.cow_metrics		=	ip6_rt_blackhole_cow_metrics,
184 	.neigh_lookup		=	ip6_neigh_lookup,
185 };
186 
187 static const u32 ip6_template_metrics[RTAX_MAX] = {
188 	[RTAX_HOPLIMIT - 1] = 255,
189 };
190 
191 static struct rt6_info ip6_null_entry_template = {
192 	.dst = {
193 		.__refcnt	= ATOMIC_INIT(1),
194 		.__use		= 1,
195 		.obsolete	= -1,
196 		.error		= -ENETUNREACH,
197 		.input		= ip6_pkt_discard,
198 		.output		= ip6_pkt_discard_out,
199 	},
200 	.rt6i_flags	= (RTF_REJECT | RTF_NONEXTHOP),
201 	.rt6i_protocol  = RTPROT_KERNEL,
202 	.rt6i_metric	= ~(u32) 0,
203 	.rt6i_ref	= ATOMIC_INIT(1),
204 };
205 
206 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
207 
208 static int ip6_pkt_prohibit(struct sk_buff *skb);
209 static int ip6_pkt_prohibit_out(struct sk_buff *skb);
210 
211 static struct rt6_info ip6_prohibit_entry_template = {
212 	.dst = {
213 		.__refcnt	= ATOMIC_INIT(1),
214 		.__use		= 1,
215 		.obsolete	= -1,
216 		.error		= -EACCES,
217 		.input		= ip6_pkt_prohibit,
218 		.output		= ip6_pkt_prohibit_out,
219 	},
220 	.rt6i_flags	= (RTF_REJECT | RTF_NONEXTHOP),
221 	.rt6i_protocol  = RTPROT_KERNEL,
222 	.rt6i_metric	= ~(u32) 0,
223 	.rt6i_ref	= ATOMIC_INIT(1),
224 };
225 
226 static struct rt6_info ip6_blk_hole_entry_template = {
227 	.dst = {
228 		.__refcnt	= ATOMIC_INIT(1),
229 		.__use		= 1,
230 		.obsolete	= -1,
231 		.error		= -EINVAL,
232 		.input		= dst_discard,
233 		.output		= dst_discard,
234 	},
235 	.rt6i_flags	= (RTF_REJECT | RTF_NONEXTHOP),
236 	.rt6i_protocol  = RTPROT_KERNEL,
237 	.rt6i_metric	= ~(u32) 0,
238 	.rt6i_ref	= ATOMIC_INIT(1),
239 };
240 
241 #endif
242 
243 /* allocate dst with ip6_dst_ops */
244 static inline struct rt6_info *ip6_dst_alloc(struct dst_ops *ops,
245 					     struct net_device *dev,
246 					     int flags)
247 {
248 	struct rt6_info *rt = dst_alloc(ops, dev, 0, 0, flags);
249 
250 	if (rt != NULL)
251 		memset(&rt->rt6i_table, 0,
252 			sizeof(*rt) - sizeof(struct dst_entry));
253 
254 	return rt;
255 }
256 
257 static void ip6_dst_destroy(struct dst_entry *dst)
258 {
259 	struct rt6_info *rt = (struct rt6_info *)dst;
260 	struct inet6_dev *idev = rt->rt6i_idev;
261 	struct inet_peer *peer = rt->rt6i_peer;
262 
263 	if (!(rt->dst.flags & DST_HOST))
264 		dst_destroy_metrics_generic(dst);
265 
266 	if (idev != NULL) {
267 		rt->rt6i_idev = NULL;
268 		in6_dev_put(idev);
269 	}
270 	if (peer) {
271 		rt->rt6i_peer = NULL;
272 		inet_putpeer(peer);
273 	}
274 }
275 
276 static atomic_t __rt6_peer_genid = ATOMIC_INIT(0);
277 
278 static u32 rt6_peer_genid(void)
279 {
280 	return atomic_read(&__rt6_peer_genid);
281 }
282 
283 void rt6_bind_peer(struct rt6_info *rt, int create)
284 {
285 	struct inet_peer *peer;
286 
287 	peer = inet_getpeer_v6(&rt->rt6i_dst.addr, create);
288 	if (peer && cmpxchg(&rt->rt6i_peer, NULL, peer) != NULL)
289 		inet_putpeer(peer);
290 	else
291 		rt->rt6i_peer_genid = rt6_peer_genid();
292 }
293 
294 static void ip6_dst_ifdown(struct dst_entry *dst, struct net_device *dev,
295 			   int how)
296 {
297 	struct rt6_info *rt = (struct rt6_info *)dst;
298 	struct inet6_dev *idev = rt->rt6i_idev;
299 	struct net_device *loopback_dev =
300 		dev_net(dev)->loopback_dev;
301 
302 	if (dev != loopback_dev && idev != NULL && idev->dev == dev) {
303 		struct inet6_dev *loopback_idev =
304 			in6_dev_get(loopback_dev);
305 		if (loopback_idev != NULL) {
306 			rt->rt6i_idev = loopback_idev;
307 			in6_dev_put(idev);
308 		}
309 	}
310 }
311 
312 static __inline__ int rt6_check_expired(const struct rt6_info *rt)
313 {
314 	return (rt->rt6i_flags & RTF_EXPIRES) &&
315 		time_after(jiffies, rt->rt6i_expires);
316 }
317 
318 static inline int rt6_need_strict(const struct in6_addr *daddr)
319 {
320 	return ipv6_addr_type(daddr) &
321 		(IPV6_ADDR_MULTICAST | IPV6_ADDR_LINKLOCAL | IPV6_ADDR_LOOPBACK);
322 }
323 
324 /*
325  *	Route lookup. Any table->tb6_lock is implied.
326  */
327 
328 static inline struct rt6_info *rt6_device_match(struct net *net,
329 						    struct rt6_info *rt,
330 						    const struct in6_addr *saddr,
331 						    int oif,
332 						    int flags)
333 {
334 	struct rt6_info *local = NULL;
335 	struct rt6_info *sprt;
336 
337 	if (!oif && ipv6_addr_any(saddr))
338 		goto out;
339 
340 	for (sprt = rt; sprt; sprt = sprt->dst.rt6_next) {
341 		struct net_device *dev = sprt->rt6i_dev;
342 
343 		if (oif) {
344 			if (dev->ifindex == oif)
345 				return sprt;
346 			if (dev->flags & IFF_LOOPBACK) {
347 				if (sprt->rt6i_idev == NULL ||
348 				    sprt->rt6i_idev->dev->ifindex != oif) {
349 					if (flags & RT6_LOOKUP_F_IFACE && oif)
350 						continue;
351 					if (local && (!oif ||
352 						      local->rt6i_idev->dev->ifindex == oif))
353 						continue;
354 				}
355 				local = sprt;
356 			}
357 		} else {
358 			if (ipv6_chk_addr(net, saddr, dev,
359 					  flags & RT6_LOOKUP_F_IFACE))
360 				return sprt;
361 		}
362 	}
363 
364 	if (oif) {
365 		if (local)
366 			return local;
367 
368 		if (flags & RT6_LOOKUP_F_IFACE)
369 			return net->ipv6.ip6_null_entry;
370 	}
371 out:
372 	return rt;
373 }
374 
375 #ifdef CONFIG_IPV6_ROUTER_PREF
376 static void rt6_probe(struct rt6_info *rt)
377 {
378 	struct neighbour *neigh;
379 	/*
380 	 * Okay, this does not seem to be appropriate
381 	 * for now, however, we need to check if it
382 	 * is really so; aka Router Reachability Probing.
383 	 *
384 	 * Router Reachability Probe MUST be rate-limited
385 	 * to no more than one per minute.
386 	 */
387 	rcu_read_lock();
388 	neigh = rt ? dst_get_neighbour(&rt->dst) : NULL;
389 	if (!neigh || (neigh->nud_state & NUD_VALID))
390 		goto out;
391 	read_lock_bh(&neigh->lock);
392 	if (!(neigh->nud_state & NUD_VALID) &&
393 	    time_after(jiffies, neigh->updated + rt->rt6i_idev->cnf.rtr_probe_interval)) {
394 		struct in6_addr mcaddr;
395 		struct in6_addr *target;
396 
397 		neigh->updated = jiffies;
398 		read_unlock_bh(&neigh->lock);
399 
400 		target = (struct in6_addr *)&neigh->primary_key;
401 		addrconf_addr_solict_mult(target, &mcaddr);
402 		ndisc_send_ns(rt->rt6i_dev, NULL, target, &mcaddr, NULL);
403 	} else {
404 		read_unlock_bh(&neigh->lock);
405 	}
406 out:
407 	rcu_read_unlock();
408 }
409 #else
410 static inline void rt6_probe(struct rt6_info *rt)
411 {
412 }
413 #endif
414 
415 /*
416  * Default Router Selection (RFC 2461 6.3.6)
417  */
418 static inline int rt6_check_dev(struct rt6_info *rt, int oif)
419 {
420 	struct net_device *dev = rt->rt6i_dev;
421 	if (!oif || dev->ifindex == oif)
422 		return 2;
423 	if ((dev->flags & IFF_LOOPBACK) &&
424 	    rt->rt6i_idev && rt->rt6i_idev->dev->ifindex == oif)
425 		return 1;
426 	return 0;
427 }
428 
429 static inline int rt6_check_neigh(struct rt6_info *rt)
430 {
431 	struct neighbour *neigh;
432 	int m;
433 
434 	rcu_read_lock();
435 	neigh = dst_get_neighbour(&rt->dst);
436 	if (rt->rt6i_flags & RTF_NONEXTHOP ||
437 	    !(rt->rt6i_flags & RTF_GATEWAY))
438 		m = 1;
439 	else if (neigh) {
440 		read_lock_bh(&neigh->lock);
441 		if (neigh->nud_state & NUD_VALID)
442 			m = 2;
443 #ifdef CONFIG_IPV6_ROUTER_PREF
444 		else if (neigh->nud_state & NUD_FAILED)
445 			m = 0;
446 #endif
447 		else
448 			m = 1;
449 		read_unlock_bh(&neigh->lock);
450 	} else
451 		m = 0;
452 	rcu_read_unlock();
453 	return m;
454 }
455 
456 static int rt6_score_route(struct rt6_info *rt, int oif,
457 			   int strict)
458 {
459 	int m, n;
460 
461 	m = rt6_check_dev(rt, oif);
462 	if (!m && (strict & RT6_LOOKUP_F_IFACE))
463 		return -1;
464 #ifdef CONFIG_IPV6_ROUTER_PREF
465 	m |= IPV6_DECODE_PREF(IPV6_EXTRACT_PREF(rt->rt6i_flags)) << 2;
466 #endif
467 	n = rt6_check_neigh(rt);
468 	if (!n && (strict & RT6_LOOKUP_F_REACHABLE))
469 		return -1;
470 	return m;
471 }
472 
473 static struct rt6_info *find_match(struct rt6_info *rt, int oif, int strict,
474 				   int *mpri, struct rt6_info *match)
475 {
476 	int m;
477 
478 	if (rt6_check_expired(rt))
479 		goto out;
480 
481 	m = rt6_score_route(rt, oif, strict);
482 	if (m < 0)
483 		goto out;
484 
485 	if (m > *mpri) {
486 		if (strict & RT6_LOOKUP_F_REACHABLE)
487 			rt6_probe(match);
488 		*mpri = m;
489 		match = rt;
490 	} else if (strict & RT6_LOOKUP_F_REACHABLE) {
491 		rt6_probe(rt);
492 	}
493 
494 out:
495 	return match;
496 }
497 
498 static struct rt6_info *find_rr_leaf(struct fib6_node *fn,
499 				     struct rt6_info *rr_head,
500 				     u32 metric, int oif, int strict)
501 {
502 	struct rt6_info *rt, *match;
503 	int mpri = -1;
504 
505 	match = NULL;
506 	for (rt = rr_head; rt && rt->rt6i_metric == metric;
507 	     rt = rt->dst.rt6_next)
508 		match = find_match(rt, oif, strict, &mpri, match);
509 	for (rt = fn->leaf; rt && rt != rr_head && rt->rt6i_metric == metric;
510 	     rt = rt->dst.rt6_next)
511 		match = find_match(rt, oif, strict, &mpri, match);
512 
513 	return match;
514 }
515 
516 static struct rt6_info *rt6_select(struct fib6_node *fn, int oif, int strict)
517 {
518 	struct rt6_info *match, *rt0;
519 	struct net *net;
520 
521 	RT6_TRACE("%s(fn->leaf=%p, oif=%d)\n",
522 		  __func__, fn->leaf, oif);
523 
524 	rt0 = fn->rr_ptr;
525 	if (!rt0)
526 		fn->rr_ptr = rt0 = fn->leaf;
527 
528 	match = find_rr_leaf(fn, rt0, rt0->rt6i_metric, oif, strict);
529 
530 	if (!match &&
531 	    (strict & RT6_LOOKUP_F_REACHABLE)) {
532 		struct rt6_info *next = rt0->dst.rt6_next;
533 
534 		/* no entries matched; do round-robin */
535 		if (!next || next->rt6i_metric != rt0->rt6i_metric)
536 			next = fn->leaf;
537 
538 		if (next != rt0)
539 			fn->rr_ptr = next;
540 	}
541 
542 	RT6_TRACE("%s() => %p\n",
543 		  __func__, match);
544 
545 	net = dev_net(rt0->rt6i_dev);
546 	return match ? match : net->ipv6.ip6_null_entry;
547 }
548 
549 #ifdef CONFIG_IPV6_ROUTE_INFO
550 int rt6_route_rcv(struct net_device *dev, u8 *opt, int len,
551 		  const struct in6_addr *gwaddr)
552 {
553 	struct net *net = dev_net(dev);
554 	struct route_info *rinfo = (struct route_info *) opt;
555 	struct in6_addr prefix_buf, *prefix;
556 	unsigned int pref;
557 	unsigned long lifetime;
558 	struct rt6_info *rt;
559 
560 	if (len < sizeof(struct route_info)) {
561 		return -EINVAL;
562 	}
563 
564 	/* Sanity check for prefix_len and length */
565 	if (rinfo->length > 3) {
566 		return -EINVAL;
567 	} else if (rinfo->prefix_len > 128) {
568 		return -EINVAL;
569 	} else if (rinfo->prefix_len > 64) {
570 		if (rinfo->length < 2) {
571 			return -EINVAL;
572 		}
573 	} else if (rinfo->prefix_len > 0) {
574 		if (rinfo->length < 1) {
575 			return -EINVAL;
576 		}
577 	}
578 
579 	pref = rinfo->route_pref;
580 	if (pref == ICMPV6_ROUTER_PREF_INVALID)
581 		return -EINVAL;
582 
583 	lifetime = addrconf_timeout_fixup(ntohl(rinfo->lifetime), HZ);
584 
585 	if (rinfo->length == 3)
586 		prefix = (struct in6_addr *)rinfo->prefix;
587 	else {
588 		/* this function is safe */
589 		ipv6_addr_prefix(&prefix_buf,
590 				 (struct in6_addr *)rinfo->prefix,
591 				 rinfo->prefix_len);
592 		prefix = &prefix_buf;
593 	}
594 
595 	rt = rt6_get_route_info(net, prefix, rinfo->prefix_len, gwaddr,
596 				dev->ifindex);
597 
598 	if (rt && !lifetime) {
599 		ip6_del_rt(rt);
600 		rt = NULL;
601 	}
602 
603 	if (!rt && lifetime)
604 		rt = rt6_add_route_info(net, prefix, rinfo->prefix_len, gwaddr, dev->ifindex,
605 					pref);
606 	else if (rt)
607 		rt->rt6i_flags = RTF_ROUTEINFO |
608 				 (rt->rt6i_flags & ~RTF_PREF_MASK) | RTF_PREF(pref);
609 
610 	if (rt) {
611 		if (!addrconf_finite_timeout(lifetime)) {
612 			rt->rt6i_flags &= ~RTF_EXPIRES;
613 		} else {
614 			rt->rt6i_expires = jiffies + HZ * lifetime;
615 			rt->rt6i_flags |= RTF_EXPIRES;
616 		}
617 		dst_release(&rt->dst);
618 	}
619 	return 0;
620 }
621 #endif
622 
623 #define BACKTRACK(__net, saddr)			\
624 do { \
625 	if (rt == __net->ipv6.ip6_null_entry) {	\
626 		struct fib6_node *pn; \
627 		while (1) { \
628 			if (fn->fn_flags & RTN_TL_ROOT) \
629 				goto out; \
630 			pn = fn->parent; \
631 			if (FIB6_SUBTREE(pn) && FIB6_SUBTREE(pn) != fn) \
632 				fn = fib6_lookup(FIB6_SUBTREE(pn), NULL, saddr); \
633 			else \
634 				fn = pn; \
635 			if (fn->fn_flags & RTN_RTINFO) \
636 				goto restart; \
637 		} \
638 	} \
639 } while(0)
640 
641 static struct rt6_info *ip6_pol_route_lookup(struct net *net,
642 					     struct fib6_table *table,
643 					     struct flowi6 *fl6, int flags)
644 {
645 	struct fib6_node *fn;
646 	struct rt6_info *rt;
647 
648 	read_lock_bh(&table->tb6_lock);
649 	fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
650 restart:
651 	rt = fn->leaf;
652 	rt = rt6_device_match(net, rt, &fl6->saddr, fl6->flowi6_oif, flags);
653 	BACKTRACK(net, &fl6->saddr);
654 out:
655 	dst_use(&rt->dst, jiffies);
656 	read_unlock_bh(&table->tb6_lock);
657 	return rt;
658 
659 }
660 
661 struct rt6_info *rt6_lookup(struct net *net, const struct in6_addr *daddr,
662 			    const struct in6_addr *saddr, int oif, int strict)
663 {
664 	struct flowi6 fl6 = {
665 		.flowi6_oif = oif,
666 		.daddr = *daddr,
667 	};
668 	struct dst_entry *dst;
669 	int flags = strict ? RT6_LOOKUP_F_IFACE : 0;
670 
671 	if (saddr) {
672 		memcpy(&fl6.saddr, saddr, sizeof(*saddr));
673 		flags |= RT6_LOOKUP_F_HAS_SADDR;
674 	}
675 
676 	dst = fib6_rule_lookup(net, &fl6, flags, ip6_pol_route_lookup);
677 	if (dst->error == 0)
678 		return (struct rt6_info *) dst;
679 
680 	dst_release(dst);
681 
682 	return NULL;
683 }
684 
685 EXPORT_SYMBOL(rt6_lookup);
686 
687 /* ip6_ins_rt is called with FREE table->tb6_lock.
688    It takes new route entry, the addition fails by any reason the
689    route is freed. In any case, if caller does not hold it, it may
690    be destroyed.
691  */
692 
693 static int __ip6_ins_rt(struct rt6_info *rt, struct nl_info *info)
694 {
695 	int err;
696 	struct fib6_table *table;
697 
698 	table = rt->rt6i_table;
699 	write_lock_bh(&table->tb6_lock);
700 	err = fib6_add(&table->tb6_root, rt, info);
701 	write_unlock_bh(&table->tb6_lock);
702 
703 	return err;
704 }
705 
706 int ip6_ins_rt(struct rt6_info *rt)
707 {
708 	struct nl_info info = {
709 		.nl_net = dev_net(rt->rt6i_dev),
710 	};
711 	return __ip6_ins_rt(rt, &info);
712 }
713 
714 static struct rt6_info *rt6_alloc_cow(const struct rt6_info *ort,
715 				      const struct in6_addr *daddr,
716 				      const struct in6_addr *saddr)
717 {
718 	struct rt6_info *rt;
719 
720 	/*
721 	 *	Clone the route.
722 	 */
723 
724 	rt = ip6_rt_copy(ort, daddr);
725 
726 	if (rt) {
727 		struct neighbour *neigh;
728 		int attempts = !in_softirq();
729 
730 		if (!(rt->rt6i_flags&RTF_GATEWAY)) {
731 			if (rt->rt6i_dst.plen != 128 &&
732 			    ipv6_addr_equal(&ort->rt6i_dst.addr, daddr))
733 				rt->rt6i_flags |= RTF_ANYCAST;
734 			ipv6_addr_copy(&rt->rt6i_gateway, daddr);
735 		}
736 
737 		rt->rt6i_flags |= RTF_CACHE;
738 
739 #ifdef CONFIG_IPV6_SUBTREES
740 		if (rt->rt6i_src.plen && saddr) {
741 			ipv6_addr_copy(&rt->rt6i_src.addr, saddr);
742 			rt->rt6i_src.plen = 128;
743 		}
744 #endif
745 
746 	retry:
747 		neigh = ndisc_get_neigh(rt->rt6i_dev, &rt->rt6i_gateway);
748 		if (IS_ERR(neigh)) {
749 			struct net *net = dev_net(rt->rt6i_dev);
750 			int saved_rt_min_interval =
751 				net->ipv6.sysctl.ip6_rt_gc_min_interval;
752 			int saved_rt_elasticity =
753 				net->ipv6.sysctl.ip6_rt_gc_elasticity;
754 
755 			if (attempts-- > 0) {
756 				net->ipv6.sysctl.ip6_rt_gc_elasticity = 1;
757 				net->ipv6.sysctl.ip6_rt_gc_min_interval = 0;
758 
759 				ip6_dst_gc(&net->ipv6.ip6_dst_ops);
760 
761 				net->ipv6.sysctl.ip6_rt_gc_elasticity =
762 					saved_rt_elasticity;
763 				net->ipv6.sysctl.ip6_rt_gc_min_interval =
764 					saved_rt_min_interval;
765 				goto retry;
766 			}
767 
768 			if (net_ratelimit())
769 				printk(KERN_WARNING
770 				       "ipv6: Neighbour table overflow.\n");
771 			dst_free(&rt->dst);
772 			return NULL;
773 		}
774 		dst_set_neighbour(&rt->dst, neigh);
775 
776 	}
777 
778 	return rt;
779 }
780 
781 static struct rt6_info *rt6_alloc_clone(struct rt6_info *ort,
782 					const struct in6_addr *daddr)
783 {
784 	struct rt6_info *rt = ip6_rt_copy(ort, daddr);
785 
786 	if (rt) {
787 		rt->rt6i_flags |= RTF_CACHE;
788 		dst_set_neighbour(&rt->dst, neigh_clone(dst_get_neighbour_raw(&ort->dst)));
789 	}
790 	return rt;
791 }
792 
793 static struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table, int oif,
794 				      struct flowi6 *fl6, int flags)
795 {
796 	struct fib6_node *fn;
797 	struct rt6_info *rt, *nrt;
798 	int strict = 0;
799 	int attempts = 3;
800 	int err;
801 	int reachable = net->ipv6.devconf_all->forwarding ? 0 : RT6_LOOKUP_F_REACHABLE;
802 
803 	strict |= flags & RT6_LOOKUP_F_IFACE;
804 
805 relookup:
806 	read_lock_bh(&table->tb6_lock);
807 
808 restart_2:
809 	fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
810 
811 restart:
812 	rt = rt6_select(fn, oif, strict | reachable);
813 
814 	BACKTRACK(net, &fl6->saddr);
815 	if (rt == net->ipv6.ip6_null_entry ||
816 	    rt->rt6i_flags & RTF_CACHE)
817 		goto out;
818 
819 	dst_hold(&rt->dst);
820 	read_unlock_bh(&table->tb6_lock);
821 
822 	if (!dst_get_neighbour_raw(&rt->dst) && !(rt->rt6i_flags & RTF_NONEXTHOP))
823 		nrt = rt6_alloc_cow(rt, &fl6->daddr, &fl6->saddr);
824 	else if (!(rt->dst.flags & DST_HOST))
825 		nrt = rt6_alloc_clone(rt, &fl6->daddr);
826 	else
827 		goto out2;
828 
829 	dst_release(&rt->dst);
830 	rt = nrt ? : net->ipv6.ip6_null_entry;
831 
832 	dst_hold(&rt->dst);
833 	if (nrt) {
834 		err = ip6_ins_rt(nrt);
835 		if (!err)
836 			goto out2;
837 	}
838 
839 	if (--attempts <= 0)
840 		goto out2;
841 
842 	/*
843 	 * Race condition! In the gap, when table->tb6_lock was
844 	 * released someone could insert this route.  Relookup.
845 	 */
846 	dst_release(&rt->dst);
847 	goto relookup;
848 
849 out:
850 	if (reachable) {
851 		reachable = 0;
852 		goto restart_2;
853 	}
854 	dst_hold(&rt->dst);
855 	read_unlock_bh(&table->tb6_lock);
856 out2:
857 	rt->dst.lastuse = jiffies;
858 	rt->dst.__use++;
859 
860 	return rt;
861 }
862 
863 static struct rt6_info *ip6_pol_route_input(struct net *net, struct fib6_table *table,
864 					    struct flowi6 *fl6, int flags)
865 {
866 	return ip6_pol_route(net, table, fl6->flowi6_iif, fl6, flags);
867 }
868 
869 void ip6_route_input(struct sk_buff *skb)
870 {
871 	const struct ipv6hdr *iph = ipv6_hdr(skb);
872 	struct net *net = dev_net(skb->dev);
873 	int flags = RT6_LOOKUP_F_HAS_SADDR;
874 	struct flowi6 fl6 = {
875 		.flowi6_iif = skb->dev->ifindex,
876 		.daddr = iph->daddr,
877 		.saddr = iph->saddr,
878 		.flowlabel = (* (__be32 *) iph)&IPV6_FLOWINFO_MASK,
879 		.flowi6_mark = skb->mark,
880 		.flowi6_proto = iph->nexthdr,
881 	};
882 
883 	if (rt6_need_strict(&iph->daddr) && skb->dev->type != ARPHRD_PIMREG)
884 		flags |= RT6_LOOKUP_F_IFACE;
885 
886 	skb_dst_set(skb, fib6_rule_lookup(net, &fl6, flags, ip6_pol_route_input));
887 }
888 
889 static struct rt6_info *ip6_pol_route_output(struct net *net, struct fib6_table *table,
890 					     struct flowi6 *fl6, int flags)
891 {
892 	return ip6_pol_route(net, table, fl6->flowi6_oif, fl6, flags);
893 }
894 
895 struct dst_entry * ip6_route_output(struct net *net, const struct sock *sk,
896 				    struct flowi6 *fl6)
897 {
898 	int flags = 0;
899 
900 	if ((sk && sk->sk_bound_dev_if) || rt6_need_strict(&fl6->daddr))
901 		flags |= RT6_LOOKUP_F_IFACE;
902 
903 	if (!ipv6_addr_any(&fl6->saddr))
904 		flags |= RT6_LOOKUP_F_HAS_SADDR;
905 	else if (sk)
906 		flags |= rt6_srcprefs2flags(inet6_sk(sk)->srcprefs);
907 
908 	return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_output);
909 }
910 
911 EXPORT_SYMBOL(ip6_route_output);
912 
913 struct dst_entry *ip6_blackhole_route(struct net *net, struct dst_entry *dst_orig)
914 {
915 	struct rt6_info *rt, *ort = (struct rt6_info *) dst_orig;
916 	struct dst_entry *new = NULL;
917 
918 	rt = dst_alloc(&ip6_dst_blackhole_ops, ort->dst.dev, 1, 0, 0);
919 	if (rt) {
920 		memset(&rt->rt6i_table, 0, sizeof(*rt) - sizeof(struct dst_entry));
921 
922 		new = &rt->dst;
923 
924 		new->__use = 1;
925 		new->input = dst_discard;
926 		new->output = dst_discard;
927 
928 		if (dst_metrics_read_only(&ort->dst))
929 			new->_metrics = ort->dst._metrics;
930 		else
931 			dst_copy_metrics(new, &ort->dst);
932 		rt->rt6i_idev = ort->rt6i_idev;
933 		if (rt->rt6i_idev)
934 			in6_dev_hold(rt->rt6i_idev);
935 		rt->rt6i_expires = 0;
936 
937 		ipv6_addr_copy(&rt->rt6i_gateway, &ort->rt6i_gateway);
938 		rt->rt6i_flags = ort->rt6i_flags & ~RTF_EXPIRES;
939 		rt->rt6i_metric = 0;
940 
941 		memcpy(&rt->rt6i_dst, &ort->rt6i_dst, sizeof(struct rt6key));
942 #ifdef CONFIG_IPV6_SUBTREES
943 		memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
944 #endif
945 
946 		dst_free(new);
947 	}
948 
949 	dst_release(dst_orig);
950 	return new ? new : ERR_PTR(-ENOMEM);
951 }
952 
953 /*
954  *	Destination cache support functions
955  */
956 
957 static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie)
958 {
959 	struct rt6_info *rt;
960 
961 	rt = (struct rt6_info *) dst;
962 
963 	if (rt->rt6i_node && (rt->rt6i_node->fn_sernum == cookie)) {
964 		if (rt->rt6i_peer_genid != rt6_peer_genid()) {
965 			if (!rt->rt6i_peer)
966 				rt6_bind_peer(rt, 0);
967 			rt->rt6i_peer_genid = rt6_peer_genid();
968 		}
969 		return dst;
970 	}
971 	return NULL;
972 }
973 
974 static struct dst_entry *ip6_negative_advice(struct dst_entry *dst)
975 {
976 	struct rt6_info *rt = (struct rt6_info *) dst;
977 
978 	if (rt) {
979 		if (rt->rt6i_flags & RTF_CACHE) {
980 			if (rt6_check_expired(rt)) {
981 				ip6_del_rt(rt);
982 				dst = NULL;
983 			}
984 		} else {
985 			dst_release(dst);
986 			dst = NULL;
987 		}
988 	}
989 	return dst;
990 }
991 
992 static void ip6_link_failure(struct sk_buff *skb)
993 {
994 	struct rt6_info *rt;
995 
996 	icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_ADDR_UNREACH, 0);
997 
998 	rt = (struct rt6_info *) skb_dst(skb);
999 	if (rt) {
1000 		if (rt->rt6i_flags&RTF_CACHE) {
1001 			dst_set_expires(&rt->dst, 0);
1002 			rt->rt6i_flags |= RTF_EXPIRES;
1003 		} else if (rt->rt6i_node && (rt->rt6i_flags & RTF_DEFAULT))
1004 			rt->rt6i_node->fn_sernum = -1;
1005 	}
1006 }
1007 
1008 static void ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu)
1009 {
1010 	struct rt6_info *rt6 = (struct rt6_info*)dst;
1011 
1012 	if (mtu < dst_mtu(dst) && rt6->rt6i_dst.plen == 128) {
1013 		rt6->rt6i_flags |= RTF_MODIFIED;
1014 		if (mtu < IPV6_MIN_MTU) {
1015 			u32 features = dst_metric(dst, RTAX_FEATURES);
1016 			mtu = IPV6_MIN_MTU;
1017 			features |= RTAX_FEATURE_ALLFRAG;
1018 			dst_metric_set(dst, RTAX_FEATURES, features);
1019 		}
1020 		dst_metric_set(dst, RTAX_MTU, mtu);
1021 	}
1022 }
1023 
1024 static unsigned int ip6_default_advmss(const struct dst_entry *dst)
1025 {
1026 	struct net_device *dev = dst->dev;
1027 	unsigned int mtu = dst_mtu(dst);
1028 	struct net *net = dev_net(dev);
1029 
1030 	mtu -= sizeof(struct ipv6hdr) + sizeof(struct tcphdr);
1031 
1032 	if (mtu < net->ipv6.sysctl.ip6_rt_min_advmss)
1033 		mtu = net->ipv6.sysctl.ip6_rt_min_advmss;
1034 
1035 	/*
1036 	 * Maximal non-jumbo IPv6 payload is IPV6_MAXPLEN and
1037 	 * corresponding MSS is IPV6_MAXPLEN - tcp_header_size.
1038 	 * IPV6_MAXPLEN is also valid and means: "any MSS,
1039 	 * rely only on pmtu discovery"
1040 	 */
1041 	if (mtu > IPV6_MAXPLEN - sizeof(struct tcphdr))
1042 		mtu = IPV6_MAXPLEN;
1043 	return mtu;
1044 }
1045 
1046 static unsigned int ip6_mtu(const struct dst_entry *dst)
1047 {
1048 	struct inet6_dev *idev;
1049 	unsigned int mtu = dst_metric_raw(dst, RTAX_MTU);
1050 
1051 	if (mtu)
1052 		return mtu;
1053 
1054 	mtu = IPV6_MIN_MTU;
1055 
1056 	rcu_read_lock();
1057 	idev = __in6_dev_get(dst->dev);
1058 	if (idev)
1059 		mtu = idev->cnf.mtu6;
1060 	rcu_read_unlock();
1061 
1062 	return mtu;
1063 }
1064 
1065 static struct dst_entry *icmp6_dst_gc_list;
1066 static DEFINE_SPINLOCK(icmp6_dst_lock);
1067 
1068 struct dst_entry *icmp6_dst_alloc(struct net_device *dev,
1069 				  struct neighbour *neigh,
1070 				  const struct in6_addr *addr)
1071 {
1072 	struct rt6_info *rt;
1073 	struct inet6_dev *idev = in6_dev_get(dev);
1074 	struct net *net = dev_net(dev);
1075 
1076 	if (unlikely(idev == NULL))
1077 		return NULL;
1078 
1079 	rt = ip6_dst_alloc(&net->ipv6.ip6_dst_ops, dev, 0);
1080 	if (unlikely(rt == NULL)) {
1081 		in6_dev_put(idev);
1082 		goto out;
1083 	}
1084 
1085 	if (neigh)
1086 		neigh_hold(neigh);
1087 	else {
1088 		neigh = ndisc_get_neigh(dev, addr);
1089 		if (IS_ERR(neigh))
1090 			neigh = NULL;
1091 	}
1092 
1093 	rt->dst.flags |= DST_HOST;
1094 	rt->dst.output  = ip6_output;
1095 	dst_set_neighbour(&rt->dst, neigh);
1096 	atomic_set(&rt->dst.__refcnt, 1);
1097 	ipv6_addr_copy(&rt->rt6i_dst.addr, addr);
1098 	rt->rt6i_dst.plen = 128;
1099 	rt->rt6i_idev     = idev;
1100 	dst_metric_set(&rt->dst, RTAX_HOPLIMIT, 255);
1101 
1102 	spin_lock_bh(&icmp6_dst_lock);
1103 	rt->dst.next = icmp6_dst_gc_list;
1104 	icmp6_dst_gc_list = &rt->dst;
1105 	spin_unlock_bh(&icmp6_dst_lock);
1106 
1107 	fib6_force_start_gc(net);
1108 
1109 out:
1110 	return &rt->dst;
1111 }
1112 
1113 int icmp6_dst_gc(void)
1114 {
1115 	struct dst_entry *dst, **pprev;
1116 	int more = 0;
1117 
1118 	spin_lock_bh(&icmp6_dst_lock);
1119 	pprev = &icmp6_dst_gc_list;
1120 
1121 	while ((dst = *pprev) != NULL) {
1122 		if (!atomic_read(&dst->__refcnt)) {
1123 			*pprev = dst->next;
1124 			dst_free(dst);
1125 		} else {
1126 			pprev = &dst->next;
1127 			++more;
1128 		}
1129 	}
1130 
1131 	spin_unlock_bh(&icmp6_dst_lock);
1132 
1133 	return more;
1134 }
1135 
1136 static void icmp6_clean_all(int (*func)(struct rt6_info *rt, void *arg),
1137 			    void *arg)
1138 {
1139 	struct dst_entry *dst, **pprev;
1140 
1141 	spin_lock_bh(&icmp6_dst_lock);
1142 	pprev = &icmp6_dst_gc_list;
1143 	while ((dst = *pprev) != NULL) {
1144 		struct rt6_info *rt = (struct rt6_info *) dst;
1145 		if (func(rt, arg)) {
1146 			*pprev = dst->next;
1147 			dst_free(dst);
1148 		} else {
1149 			pprev = &dst->next;
1150 		}
1151 	}
1152 	spin_unlock_bh(&icmp6_dst_lock);
1153 }
1154 
1155 static int ip6_dst_gc(struct dst_ops *ops)
1156 {
1157 	unsigned long now = jiffies;
1158 	struct net *net = container_of(ops, struct net, ipv6.ip6_dst_ops);
1159 	int rt_min_interval = net->ipv6.sysctl.ip6_rt_gc_min_interval;
1160 	int rt_max_size = net->ipv6.sysctl.ip6_rt_max_size;
1161 	int rt_elasticity = net->ipv6.sysctl.ip6_rt_gc_elasticity;
1162 	int rt_gc_timeout = net->ipv6.sysctl.ip6_rt_gc_timeout;
1163 	unsigned long rt_last_gc = net->ipv6.ip6_rt_last_gc;
1164 	int entries;
1165 
1166 	entries = dst_entries_get_fast(ops);
1167 	if (time_after(rt_last_gc + rt_min_interval, now) &&
1168 	    entries <= rt_max_size)
1169 		goto out;
1170 
1171 	net->ipv6.ip6_rt_gc_expire++;
1172 	fib6_run_gc(net->ipv6.ip6_rt_gc_expire, net);
1173 	net->ipv6.ip6_rt_last_gc = now;
1174 	entries = dst_entries_get_slow(ops);
1175 	if (entries < ops->gc_thresh)
1176 		net->ipv6.ip6_rt_gc_expire = rt_gc_timeout>>1;
1177 out:
1178 	net->ipv6.ip6_rt_gc_expire -= net->ipv6.ip6_rt_gc_expire>>rt_elasticity;
1179 	return entries > rt_max_size;
1180 }
1181 
1182 /* Clean host part of a prefix. Not necessary in radix tree,
1183    but results in cleaner routing tables.
1184 
1185    Remove it only when all the things will work!
1186  */
1187 
1188 int ip6_dst_hoplimit(struct dst_entry *dst)
1189 {
1190 	int hoplimit = dst_metric_raw(dst, RTAX_HOPLIMIT);
1191 	if (hoplimit == 0) {
1192 		struct net_device *dev = dst->dev;
1193 		struct inet6_dev *idev;
1194 
1195 		rcu_read_lock();
1196 		idev = __in6_dev_get(dev);
1197 		if (idev)
1198 			hoplimit = idev->cnf.hop_limit;
1199 		else
1200 			hoplimit = dev_net(dev)->ipv6.devconf_all->hop_limit;
1201 		rcu_read_unlock();
1202 	}
1203 	return hoplimit;
1204 }
1205 EXPORT_SYMBOL(ip6_dst_hoplimit);
1206 
1207 /*
1208  *
1209  */
1210 
1211 int ip6_route_add(struct fib6_config *cfg)
1212 {
1213 	int err;
1214 	struct net *net = cfg->fc_nlinfo.nl_net;
1215 	struct rt6_info *rt = NULL;
1216 	struct net_device *dev = NULL;
1217 	struct inet6_dev *idev = NULL;
1218 	struct fib6_table *table;
1219 	int addr_type;
1220 
1221 	if (cfg->fc_dst_len > 128 || cfg->fc_src_len > 128)
1222 		return -EINVAL;
1223 #ifndef CONFIG_IPV6_SUBTREES
1224 	if (cfg->fc_src_len)
1225 		return -EINVAL;
1226 #endif
1227 	if (cfg->fc_ifindex) {
1228 		err = -ENODEV;
1229 		dev = dev_get_by_index(net, cfg->fc_ifindex);
1230 		if (!dev)
1231 			goto out;
1232 		idev = in6_dev_get(dev);
1233 		if (!idev)
1234 			goto out;
1235 	}
1236 
1237 	if (cfg->fc_metric == 0)
1238 		cfg->fc_metric = IP6_RT_PRIO_USER;
1239 
1240 	table = fib6_new_table(net, cfg->fc_table);
1241 	if (table == NULL) {
1242 		err = -ENOBUFS;
1243 		goto out;
1244 	}
1245 
1246 	rt = ip6_dst_alloc(&net->ipv6.ip6_dst_ops, NULL, DST_NOCOUNT);
1247 
1248 	if (rt == NULL) {
1249 		err = -ENOMEM;
1250 		goto out;
1251 	}
1252 
1253 	rt->dst.obsolete = -1;
1254 	rt->rt6i_expires = (cfg->fc_flags & RTF_EXPIRES) ?
1255 				jiffies + clock_t_to_jiffies(cfg->fc_expires) :
1256 				0;
1257 
1258 	if (cfg->fc_protocol == RTPROT_UNSPEC)
1259 		cfg->fc_protocol = RTPROT_BOOT;
1260 	rt->rt6i_protocol = cfg->fc_protocol;
1261 
1262 	addr_type = ipv6_addr_type(&cfg->fc_dst);
1263 
1264 	if (addr_type & IPV6_ADDR_MULTICAST)
1265 		rt->dst.input = ip6_mc_input;
1266 	else if (cfg->fc_flags & RTF_LOCAL)
1267 		rt->dst.input = ip6_input;
1268 	else
1269 		rt->dst.input = ip6_forward;
1270 
1271 	rt->dst.output = ip6_output;
1272 
1273 	ipv6_addr_prefix(&rt->rt6i_dst.addr, &cfg->fc_dst, cfg->fc_dst_len);
1274 	rt->rt6i_dst.plen = cfg->fc_dst_len;
1275 	if (rt->rt6i_dst.plen == 128)
1276 	       rt->dst.flags |= DST_HOST;
1277 
1278 	if (!(rt->dst.flags & DST_HOST) && cfg->fc_mx) {
1279 		u32 *metrics = kzalloc(sizeof(u32) * RTAX_MAX, GFP_KERNEL);
1280 		if (!metrics) {
1281 			err = -ENOMEM;
1282 			goto out;
1283 		}
1284 		dst_init_metrics(&rt->dst, metrics, 0);
1285 	}
1286 #ifdef CONFIG_IPV6_SUBTREES
1287 	ipv6_addr_prefix(&rt->rt6i_src.addr, &cfg->fc_src, cfg->fc_src_len);
1288 	rt->rt6i_src.plen = cfg->fc_src_len;
1289 #endif
1290 
1291 	rt->rt6i_metric = cfg->fc_metric;
1292 
1293 	/* We cannot add true routes via loopback here,
1294 	   they would result in kernel looping; promote them to reject routes
1295 	 */
1296 	if ((cfg->fc_flags & RTF_REJECT) ||
1297 	    (dev && (dev->flags&IFF_LOOPBACK) && !(addr_type&IPV6_ADDR_LOOPBACK)
1298 					      && !(cfg->fc_flags&RTF_LOCAL))) {
1299 		/* hold loopback dev/idev if we haven't done so. */
1300 		if (dev != net->loopback_dev) {
1301 			if (dev) {
1302 				dev_put(dev);
1303 				in6_dev_put(idev);
1304 			}
1305 			dev = net->loopback_dev;
1306 			dev_hold(dev);
1307 			idev = in6_dev_get(dev);
1308 			if (!idev) {
1309 				err = -ENODEV;
1310 				goto out;
1311 			}
1312 		}
1313 		rt->dst.output = ip6_pkt_discard_out;
1314 		rt->dst.input = ip6_pkt_discard;
1315 		rt->dst.error = -ENETUNREACH;
1316 		rt->rt6i_flags = RTF_REJECT|RTF_NONEXTHOP;
1317 		goto install_route;
1318 	}
1319 
1320 	if (cfg->fc_flags & RTF_GATEWAY) {
1321 		const struct in6_addr *gw_addr;
1322 		int gwa_type;
1323 
1324 		gw_addr = &cfg->fc_gateway;
1325 		ipv6_addr_copy(&rt->rt6i_gateway, gw_addr);
1326 		gwa_type = ipv6_addr_type(gw_addr);
1327 
1328 		if (gwa_type != (IPV6_ADDR_LINKLOCAL|IPV6_ADDR_UNICAST)) {
1329 			struct rt6_info *grt;
1330 
1331 			/* IPv6 strictly inhibits using not link-local
1332 			   addresses as nexthop address.
1333 			   Otherwise, router will not able to send redirects.
1334 			   It is very good, but in some (rare!) circumstances
1335 			   (SIT, PtP, NBMA NOARP links) it is handy to allow
1336 			   some exceptions. --ANK
1337 			 */
1338 			err = -EINVAL;
1339 			if (!(gwa_type&IPV6_ADDR_UNICAST))
1340 				goto out;
1341 
1342 			grt = rt6_lookup(net, gw_addr, NULL, cfg->fc_ifindex, 1);
1343 
1344 			err = -EHOSTUNREACH;
1345 			if (grt == NULL)
1346 				goto out;
1347 			if (dev) {
1348 				if (dev != grt->rt6i_dev) {
1349 					dst_release(&grt->dst);
1350 					goto out;
1351 				}
1352 			} else {
1353 				dev = grt->rt6i_dev;
1354 				idev = grt->rt6i_idev;
1355 				dev_hold(dev);
1356 				in6_dev_hold(grt->rt6i_idev);
1357 			}
1358 			if (!(grt->rt6i_flags&RTF_GATEWAY))
1359 				err = 0;
1360 			dst_release(&grt->dst);
1361 
1362 			if (err)
1363 				goto out;
1364 		}
1365 		err = -EINVAL;
1366 		if (dev == NULL || (dev->flags&IFF_LOOPBACK))
1367 			goto out;
1368 	}
1369 
1370 	err = -ENODEV;
1371 	if (dev == NULL)
1372 		goto out;
1373 
1374 	if (!ipv6_addr_any(&cfg->fc_prefsrc)) {
1375 		if (!ipv6_chk_addr(net, &cfg->fc_prefsrc, dev, 0)) {
1376 			err = -EINVAL;
1377 			goto out;
1378 		}
1379 		ipv6_addr_copy(&rt->rt6i_prefsrc.addr, &cfg->fc_prefsrc);
1380 		rt->rt6i_prefsrc.plen = 128;
1381 	} else
1382 		rt->rt6i_prefsrc.plen = 0;
1383 
1384 	if (cfg->fc_flags & (RTF_GATEWAY | RTF_NONEXTHOP)) {
1385 		struct neighbour *n = __neigh_lookup_errno(&nd_tbl, &rt->rt6i_gateway, dev);
1386 		if (IS_ERR(n)) {
1387 			err = PTR_ERR(n);
1388 			goto out;
1389 		}
1390 		dst_set_neighbour(&rt->dst, n);
1391 	}
1392 
1393 	rt->rt6i_flags = cfg->fc_flags;
1394 
1395 install_route:
1396 	if (cfg->fc_mx) {
1397 		struct nlattr *nla;
1398 		int remaining;
1399 
1400 		nla_for_each_attr(nla, cfg->fc_mx, cfg->fc_mx_len, remaining) {
1401 			int type = nla_type(nla);
1402 
1403 			if (type) {
1404 				if (type > RTAX_MAX) {
1405 					err = -EINVAL;
1406 					goto out;
1407 				}
1408 
1409 				dst_metric_set(&rt->dst, type, nla_get_u32(nla));
1410 			}
1411 		}
1412 	}
1413 
1414 	rt->dst.dev = dev;
1415 	rt->rt6i_idev = idev;
1416 	rt->rt6i_table = table;
1417 
1418 	cfg->fc_nlinfo.nl_net = dev_net(dev);
1419 
1420 	return __ip6_ins_rt(rt, &cfg->fc_nlinfo);
1421 
1422 out:
1423 	if (dev)
1424 		dev_put(dev);
1425 	if (idev)
1426 		in6_dev_put(idev);
1427 	if (rt)
1428 		dst_free(&rt->dst);
1429 	return err;
1430 }
1431 
1432 static int __ip6_del_rt(struct rt6_info *rt, struct nl_info *info)
1433 {
1434 	int err;
1435 	struct fib6_table *table;
1436 	struct net *net = dev_net(rt->rt6i_dev);
1437 
1438 	if (rt == net->ipv6.ip6_null_entry)
1439 		return -ENOENT;
1440 
1441 	table = rt->rt6i_table;
1442 	write_lock_bh(&table->tb6_lock);
1443 
1444 	err = fib6_del(rt, info);
1445 	dst_release(&rt->dst);
1446 
1447 	write_unlock_bh(&table->tb6_lock);
1448 
1449 	return err;
1450 }
1451 
1452 int ip6_del_rt(struct rt6_info *rt)
1453 {
1454 	struct nl_info info = {
1455 		.nl_net = dev_net(rt->rt6i_dev),
1456 	};
1457 	return __ip6_del_rt(rt, &info);
1458 }
1459 
1460 static int ip6_route_del(struct fib6_config *cfg)
1461 {
1462 	struct fib6_table *table;
1463 	struct fib6_node *fn;
1464 	struct rt6_info *rt;
1465 	int err = -ESRCH;
1466 
1467 	table = fib6_get_table(cfg->fc_nlinfo.nl_net, cfg->fc_table);
1468 	if (table == NULL)
1469 		return err;
1470 
1471 	read_lock_bh(&table->tb6_lock);
1472 
1473 	fn = fib6_locate(&table->tb6_root,
1474 			 &cfg->fc_dst, cfg->fc_dst_len,
1475 			 &cfg->fc_src, cfg->fc_src_len);
1476 
1477 	if (fn) {
1478 		for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
1479 			if (cfg->fc_ifindex &&
1480 			    (rt->rt6i_dev == NULL ||
1481 			     rt->rt6i_dev->ifindex != cfg->fc_ifindex))
1482 				continue;
1483 			if (cfg->fc_flags & RTF_GATEWAY &&
1484 			    !ipv6_addr_equal(&cfg->fc_gateway, &rt->rt6i_gateway))
1485 				continue;
1486 			if (cfg->fc_metric && cfg->fc_metric != rt->rt6i_metric)
1487 				continue;
1488 			dst_hold(&rt->dst);
1489 			read_unlock_bh(&table->tb6_lock);
1490 
1491 			return __ip6_del_rt(rt, &cfg->fc_nlinfo);
1492 		}
1493 	}
1494 	read_unlock_bh(&table->tb6_lock);
1495 
1496 	return err;
1497 }
1498 
1499 /*
1500  *	Handle redirects
1501  */
1502 struct ip6rd_flowi {
1503 	struct flowi6 fl6;
1504 	struct in6_addr gateway;
1505 };
1506 
1507 static struct rt6_info *__ip6_route_redirect(struct net *net,
1508 					     struct fib6_table *table,
1509 					     struct flowi6 *fl6,
1510 					     int flags)
1511 {
1512 	struct ip6rd_flowi *rdfl = (struct ip6rd_flowi *)fl6;
1513 	struct rt6_info *rt;
1514 	struct fib6_node *fn;
1515 
1516 	/*
1517 	 * Get the "current" route for this destination and
1518 	 * check if the redirect has come from approriate router.
1519 	 *
1520 	 * RFC 2461 specifies that redirects should only be
1521 	 * accepted if they come from the nexthop to the target.
1522 	 * Due to the way the routes are chosen, this notion
1523 	 * is a bit fuzzy and one might need to check all possible
1524 	 * routes.
1525 	 */
1526 
1527 	read_lock_bh(&table->tb6_lock);
1528 	fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
1529 restart:
1530 	for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
1531 		/*
1532 		 * Current route is on-link; redirect is always invalid.
1533 		 *
1534 		 * Seems, previous statement is not true. It could
1535 		 * be node, which looks for us as on-link (f.e. proxy ndisc)
1536 		 * But then router serving it might decide, that we should
1537 		 * know truth 8)8) --ANK (980726).
1538 		 */
1539 		if (rt6_check_expired(rt))
1540 			continue;
1541 		if (!(rt->rt6i_flags & RTF_GATEWAY))
1542 			continue;
1543 		if (fl6->flowi6_oif != rt->rt6i_dev->ifindex)
1544 			continue;
1545 		if (!ipv6_addr_equal(&rdfl->gateway, &rt->rt6i_gateway))
1546 			continue;
1547 		break;
1548 	}
1549 
1550 	if (!rt)
1551 		rt = net->ipv6.ip6_null_entry;
1552 	BACKTRACK(net, &fl6->saddr);
1553 out:
1554 	dst_hold(&rt->dst);
1555 
1556 	read_unlock_bh(&table->tb6_lock);
1557 
1558 	return rt;
1559 };
1560 
1561 static struct rt6_info *ip6_route_redirect(const struct in6_addr *dest,
1562 					   const struct in6_addr *src,
1563 					   const struct in6_addr *gateway,
1564 					   struct net_device *dev)
1565 {
1566 	int flags = RT6_LOOKUP_F_HAS_SADDR;
1567 	struct net *net = dev_net(dev);
1568 	struct ip6rd_flowi rdfl = {
1569 		.fl6 = {
1570 			.flowi6_oif = dev->ifindex,
1571 			.daddr = *dest,
1572 			.saddr = *src,
1573 		},
1574 	};
1575 
1576 	ipv6_addr_copy(&rdfl.gateway, gateway);
1577 
1578 	if (rt6_need_strict(dest))
1579 		flags |= RT6_LOOKUP_F_IFACE;
1580 
1581 	return (struct rt6_info *)fib6_rule_lookup(net, &rdfl.fl6,
1582 						   flags, __ip6_route_redirect);
1583 }
1584 
1585 void rt6_redirect(const struct in6_addr *dest, const struct in6_addr *src,
1586 		  const struct in6_addr *saddr,
1587 		  struct neighbour *neigh, u8 *lladdr, int on_link)
1588 {
1589 	struct rt6_info *rt, *nrt = NULL;
1590 	struct netevent_redirect netevent;
1591 	struct net *net = dev_net(neigh->dev);
1592 
1593 	rt = ip6_route_redirect(dest, src, saddr, neigh->dev);
1594 
1595 	if (rt == net->ipv6.ip6_null_entry) {
1596 		if (net_ratelimit())
1597 			printk(KERN_DEBUG "rt6_redirect: source isn't a valid nexthop "
1598 			       "for redirect target\n");
1599 		goto out;
1600 	}
1601 
1602 	/*
1603 	 *	We have finally decided to accept it.
1604 	 */
1605 
1606 	neigh_update(neigh, lladdr, NUD_STALE,
1607 		     NEIGH_UPDATE_F_WEAK_OVERRIDE|
1608 		     NEIGH_UPDATE_F_OVERRIDE|
1609 		     (on_link ? 0 : (NEIGH_UPDATE_F_OVERRIDE_ISROUTER|
1610 				     NEIGH_UPDATE_F_ISROUTER))
1611 		     );
1612 
1613 	/*
1614 	 * Redirect received -> path was valid.
1615 	 * Look, redirects are sent only in response to data packets,
1616 	 * so that this nexthop apparently is reachable. --ANK
1617 	 */
1618 	dst_confirm(&rt->dst);
1619 
1620 	/* Duplicate redirect: silently ignore. */
1621 	if (neigh == dst_get_neighbour_raw(&rt->dst))
1622 		goto out;
1623 
1624 	nrt = ip6_rt_copy(rt, dest);
1625 	if (nrt == NULL)
1626 		goto out;
1627 
1628 	nrt->rt6i_flags = RTF_GATEWAY|RTF_UP|RTF_DYNAMIC|RTF_CACHE;
1629 	if (on_link)
1630 		nrt->rt6i_flags &= ~RTF_GATEWAY;
1631 
1632 	ipv6_addr_copy(&nrt->rt6i_gateway, (struct in6_addr*)neigh->primary_key);
1633 	dst_set_neighbour(&nrt->dst, neigh_clone(neigh));
1634 
1635 	if (ip6_ins_rt(nrt))
1636 		goto out;
1637 
1638 	netevent.old = &rt->dst;
1639 	netevent.new = &nrt->dst;
1640 	call_netevent_notifiers(NETEVENT_REDIRECT, &netevent);
1641 
1642 	if (rt->rt6i_flags&RTF_CACHE) {
1643 		ip6_del_rt(rt);
1644 		return;
1645 	}
1646 
1647 out:
1648 	dst_release(&rt->dst);
1649 }
1650 
1651 /*
1652  *	Handle ICMP "packet too big" messages
1653  *	i.e. Path MTU discovery
1654  */
1655 
1656 static void rt6_do_pmtu_disc(const struct in6_addr *daddr, const struct in6_addr *saddr,
1657 			     struct net *net, u32 pmtu, int ifindex)
1658 {
1659 	struct rt6_info *rt, *nrt;
1660 	int allfrag = 0;
1661 again:
1662 	rt = rt6_lookup(net, daddr, saddr, ifindex, 0);
1663 	if (rt == NULL)
1664 		return;
1665 
1666 	if (rt6_check_expired(rt)) {
1667 		ip6_del_rt(rt);
1668 		goto again;
1669 	}
1670 
1671 	if (pmtu >= dst_mtu(&rt->dst))
1672 		goto out;
1673 
1674 	if (pmtu < IPV6_MIN_MTU) {
1675 		/*
1676 		 * According to RFC2460, PMTU is set to the IPv6 Minimum Link
1677 		 * MTU (1280) and a fragment header should always be included
1678 		 * after a node receiving Too Big message reporting PMTU is
1679 		 * less than the IPv6 Minimum Link MTU.
1680 		 */
1681 		pmtu = IPV6_MIN_MTU;
1682 		allfrag = 1;
1683 	}
1684 
1685 	/* New mtu received -> path was valid.
1686 	   They are sent only in response to data packets,
1687 	   so that this nexthop apparently is reachable. --ANK
1688 	 */
1689 	dst_confirm(&rt->dst);
1690 
1691 	/* Host route. If it is static, it would be better
1692 	   not to override it, but add new one, so that
1693 	   when cache entry will expire old pmtu
1694 	   would return automatically.
1695 	 */
1696 	if (rt->rt6i_flags & RTF_CACHE) {
1697 		dst_metric_set(&rt->dst, RTAX_MTU, pmtu);
1698 		if (allfrag) {
1699 			u32 features = dst_metric(&rt->dst, RTAX_FEATURES);
1700 			features |= RTAX_FEATURE_ALLFRAG;
1701 			dst_metric_set(&rt->dst, RTAX_FEATURES, features);
1702 		}
1703 		dst_set_expires(&rt->dst, net->ipv6.sysctl.ip6_rt_mtu_expires);
1704 		rt->rt6i_flags |= RTF_MODIFIED|RTF_EXPIRES;
1705 		goto out;
1706 	}
1707 
1708 	/* Network route.
1709 	   Two cases are possible:
1710 	   1. It is connected route. Action: COW
1711 	   2. It is gatewayed route or NONEXTHOP route. Action: clone it.
1712 	 */
1713 	if (!dst_get_neighbour_raw(&rt->dst) && !(rt->rt6i_flags & RTF_NONEXTHOP))
1714 		nrt = rt6_alloc_cow(rt, daddr, saddr);
1715 	else
1716 		nrt = rt6_alloc_clone(rt, daddr);
1717 
1718 	if (nrt) {
1719 		dst_metric_set(&nrt->dst, RTAX_MTU, pmtu);
1720 		if (allfrag) {
1721 			u32 features = dst_metric(&nrt->dst, RTAX_FEATURES);
1722 			features |= RTAX_FEATURE_ALLFRAG;
1723 			dst_metric_set(&nrt->dst, RTAX_FEATURES, features);
1724 		}
1725 
1726 		/* According to RFC 1981, detecting PMTU increase shouldn't be
1727 		 * happened within 5 mins, the recommended timer is 10 mins.
1728 		 * Here this route expiration time is set to ip6_rt_mtu_expires
1729 		 * which is 10 mins. After 10 mins the decreased pmtu is expired
1730 		 * and detecting PMTU increase will be automatically happened.
1731 		 */
1732 		dst_set_expires(&nrt->dst, net->ipv6.sysctl.ip6_rt_mtu_expires);
1733 		nrt->rt6i_flags |= RTF_DYNAMIC|RTF_EXPIRES;
1734 
1735 		ip6_ins_rt(nrt);
1736 	}
1737 out:
1738 	dst_release(&rt->dst);
1739 }
1740 
1741 void rt6_pmtu_discovery(const struct in6_addr *daddr, const struct in6_addr *saddr,
1742 			struct net_device *dev, u32 pmtu)
1743 {
1744 	struct net *net = dev_net(dev);
1745 
1746 	/*
1747 	 * RFC 1981 states that a node "MUST reduce the size of the packets it
1748 	 * is sending along the path" that caused the Packet Too Big message.
1749 	 * Since it's not possible in the general case to determine which
1750 	 * interface was used to send the original packet, we update the MTU
1751 	 * on the interface that will be used to send future packets. We also
1752 	 * update the MTU on the interface that received the Packet Too Big in
1753 	 * case the original packet was forced out that interface with
1754 	 * SO_BINDTODEVICE or similar. This is the next best thing to the
1755 	 * correct behaviour, which would be to update the MTU on all
1756 	 * interfaces.
1757 	 */
1758 	rt6_do_pmtu_disc(daddr, saddr, net, pmtu, 0);
1759 	rt6_do_pmtu_disc(daddr, saddr, net, pmtu, dev->ifindex);
1760 }
1761 
1762 /*
1763  *	Misc support functions
1764  */
1765 
1766 static struct rt6_info *ip6_rt_copy(const struct rt6_info *ort,
1767 				    const struct in6_addr *dest)
1768 {
1769 	struct net *net = dev_net(ort->rt6i_dev);
1770 	struct rt6_info *rt = ip6_dst_alloc(&net->ipv6.ip6_dst_ops,
1771 					    ort->dst.dev, 0);
1772 
1773 	if (rt) {
1774 		rt->dst.input = ort->dst.input;
1775 		rt->dst.output = ort->dst.output;
1776 		rt->dst.flags |= DST_HOST;
1777 
1778 		ipv6_addr_copy(&rt->rt6i_dst.addr, dest);
1779 		rt->rt6i_dst.plen = 128;
1780 		dst_copy_metrics(&rt->dst, &ort->dst);
1781 		rt->dst.error = ort->dst.error;
1782 		rt->rt6i_idev = ort->rt6i_idev;
1783 		if (rt->rt6i_idev)
1784 			in6_dev_hold(rt->rt6i_idev);
1785 		rt->dst.lastuse = jiffies;
1786 		rt->rt6i_expires = 0;
1787 
1788 		ipv6_addr_copy(&rt->rt6i_gateway, &ort->rt6i_gateway);
1789 		rt->rt6i_flags = ort->rt6i_flags & ~RTF_EXPIRES;
1790 		rt->rt6i_metric = 0;
1791 
1792 #ifdef CONFIG_IPV6_SUBTREES
1793 		memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
1794 #endif
1795 		memcpy(&rt->rt6i_prefsrc, &ort->rt6i_prefsrc, sizeof(struct rt6key));
1796 		rt->rt6i_table = ort->rt6i_table;
1797 	}
1798 	return rt;
1799 }
1800 
1801 #ifdef CONFIG_IPV6_ROUTE_INFO
1802 static struct rt6_info *rt6_get_route_info(struct net *net,
1803 					   const struct in6_addr *prefix, int prefixlen,
1804 					   const struct in6_addr *gwaddr, int ifindex)
1805 {
1806 	struct fib6_node *fn;
1807 	struct rt6_info *rt = NULL;
1808 	struct fib6_table *table;
1809 
1810 	table = fib6_get_table(net, RT6_TABLE_INFO);
1811 	if (table == NULL)
1812 		return NULL;
1813 
1814 	write_lock_bh(&table->tb6_lock);
1815 	fn = fib6_locate(&table->tb6_root, prefix ,prefixlen, NULL, 0);
1816 	if (!fn)
1817 		goto out;
1818 
1819 	for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
1820 		if (rt->rt6i_dev->ifindex != ifindex)
1821 			continue;
1822 		if ((rt->rt6i_flags & (RTF_ROUTEINFO|RTF_GATEWAY)) != (RTF_ROUTEINFO|RTF_GATEWAY))
1823 			continue;
1824 		if (!ipv6_addr_equal(&rt->rt6i_gateway, gwaddr))
1825 			continue;
1826 		dst_hold(&rt->dst);
1827 		break;
1828 	}
1829 out:
1830 	write_unlock_bh(&table->tb6_lock);
1831 	return rt;
1832 }
1833 
1834 static struct rt6_info *rt6_add_route_info(struct net *net,
1835 					   const struct in6_addr *prefix, int prefixlen,
1836 					   const struct in6_addr *gwaddr, int ifindex,
1837 					   unsigned pref)
1838 {
1839 	struct fib6_config cfg = {
1840 		.fc_table	= RT6_TABLE_INFO,
1841 		.fc_metric	= IP6_RT_PRIO_USER,
1842 		.fc_ifindex	= ifindex,
1843 		.fc_dst_len	= prefixlen,
1844 		.fc_flags	= RTF_GATEWAY | RTF_ADDRCONF | RTF_ROUTEINFO |
1845 				  RTF_UP | RTF_PREF(pref),
1846 		.fc_nlinfo.pid = 0,
1847 		.fc_nlinfo.nlh = NULL,
1848 		.fc_nlinfo.nl_net = net,
1849 	};
1850 
1851 	ipv6_addr_copy(&cfg.fc_dst, prefix);
1852 	ipv6_addr_copy(&cfg.fc_gateway, gwaddr);
1853 
1854 	/* We should treat it as a default route if prefix length is 0. */
1855 	if (!prefixlen)
1856 		cfg.fc_flags |= RTF_DEFAULT;
1857 
1858 	ip6_route_add(&cfg);
1859 
1860 	return rt6_get_route_info(net, prefix, prefixlen, gwaddr, ifindex);
1861 }
1862 #endif
1863 
1864 struct rt6_info *rt6_get_dflt_router(const struct in6_addr *addr, struct net_device *dev)
1865 {
1866 	struct rt6_info *rt;
1867 	struct fib6_table *table;
1868 
1869 	table = fib6_get_table(dev_net(dev), RT6_TABLE_DFLT);
1870 	if (table == NULL)
1871 		return NULL;
1872 
1873 	write_lock_bh(&table->tb6_lock);
1874 	for (rt = table->tb6_root.leaf; rt; rt=rt->dst.rt6_next) {
1875 		if (dev == rt->rt6i_dev &&
1876 		    ((rt->rt6i_flags & (RTF_ADDRCONF | RTF_DEFAULT)) == (RTF_ADDRCONF | RTF_DEFAULT)) &&
1877 		    ipv6_addr_equal(&rt->rt6i_gateway, addr))
1878 			break;
1879 	}
1880 	if (rt)
1881 		dst_hold(&rt->dst);
1882 	write_unlock_bh(&table->tb6_lock);
1883 	return rt;
1884 }
1885 
1886 struct rt6_info *rt6_add_dflt_router(const struct in6_addr *gwaddr,
1887 				     struct net_device *dev,
1888 				     unsigned int pref)
1889 {
1890 	struct fib6_config cfg = {
1891 		.fc_table	= RT6_TABLE_DFLT,
1892 		.fc_metric	= IP6_RT_PRIO_USER,
1893 		.fc_ifindex	= dev->ifindex,
1894 		.fc_flags	= RTF_GATEWAY | RTF_ADDRCONF | RTF_DEFAULT |
1895 				  RTF_UP | RTF_EXPIRES | RTF_PREF(pref),
1896 		.fc_nlinfo.pid = 0,
1897 		.fc_nlinfo.nlh = NULL,
1898 		.fc_nlinfo.nl_net = dev_net(dev),
1899 	};
1900 
1901 	ipv6_addr_copy(&cfg.fc_gateway, gwaddr);
1902 
1903 	ip6_route_add(&cfg);
1904 
1905 	return rt6_get_dflt_router(gwaddr, dev);
1906 }
1907 
1908 void rt6_purge_dflt_routers(struct net *net)
1909 {
1910 	struct rt6_info *rt;
1911 	struct fib6_table *table;
1912 
1913 	/* NOTE: Keep consistent with rt6_get_dflt_router */
1914 	table = fib6_get_table(net, RT6_TABLE_DFLT);
1915 	if (table == NULL)
1916 		return;
1917 
1918 restart:
1919 	read_lock_bh(&table->tb6_lock);
1920 	for (rt = table->tb6_root.leaf; rt; rt = rt->dst.rt6_next) {
1921 		if (rt->rt6i_flags & (RTF_DEFAULT | RTF_ADDRCONF)) {
1922 			dst_hold(&rt->dst);
1923 			read_unlock_bh(&table->tb6_lock);
1924 			ip6_del_rt(rt);
1925 			goto restart;
1926 		}
1927 	}
1928 	read_unlock_bh(&table->tb6_lock);
1929 }
1930 
1931 static void rtmsg_to_fib6_config(struct net *net,
1932 				 struct in6_rtmsg *rtmsg,
1933 				 struct fib6_config *cfg)
1934 {
1935 	memset(cfg, 0, sizeof(*cfg));
1936 
1937 	cfg->fc_table = RT6_TABLE_MAIN;
1938 	cfg->fc_ifindex = rtmsg->rtmsg_ifindex;
1939 	cfg->fc_metric = rtmsg->rtmsg_metric;
1940 	cfg->fc_expires = rtmsg->rtmsg_info;
1941 	cfg->fc_dst_len = rtmsg->rtmsg_dst_len;
1942 	cfg->fc_src_len = rtmsg->rtmsg_src_len;
1943 	cfg->fc_flags = rtmsg->rtmsg_flags;
1944 
1945 	cfg->fc_nlinfo.nl_net = net;
1946 
1947 	ipv6_addr_copy(&cfg->fc_dst, &rtmsg->rtmsg_dst);
1948 	ipv6_addr_copy(&cfg->fc_src, &rtmsg->rtmsg_src);
1949 	ipv6_addr_copy(&cfg->fc_gateway, &rtmsg->rtmsg_gateway);
1950 }
1951 
1952 int ipv6_route_ioctl(struct net *net, unsigned int cmd, void __user *arg)
1953 {
1954 	struct fib6_config cfg;
1955 	struct in6_rtmsg rtmsg;
1956 	int err;
1957 
1958 	switch(cmd) {
1959 	case SIOCADDRT:		/* Add a route */
1960 	case SIOCDELRT:		/* Delete a route */
1961 		if (!capable(CAP_NET_ADMIN))
1962 			return -EPERM;
1963 		err = copy_from_user(&rtmsg, arg,
1964 				     sizeof(struct in6_rtmsg));
1965 		if (err)
1966 			return -EFAULT;
1967 
1968 		rtmsg_to_fib6_config(net, &rtmsg, &cfg);
1969 
1970 		rtnl_lock();
1971 		switch (cmd) {
1972 		case SIOCADDRT:
1973 			err = ip6_route_add(&cfg);
1974 			break;
1975 		case SIOCDELRT:
1976 			err = ip6_route_del(&cfg);
1977 			break;
1978 		default:
1979 			err = -EINVAL;
1980 		}
1981 		rtnl_unlock();
1982 
1983 		return err;
1984 	}
1985 
1986 	return -EINVAL;
1987 }
1988 
1989 /*
1990  *	Drop the packet on the floor
1991  */
1992 
1993 static int ip6_pkt_drop(struct sk_buff *skb, u8 code, int ipstats_mib_noroutes)
1994 {
1995 	int type;
1996 	struct dst_entry *dst = skb_dst(skb);
1997 	switch (ipstats_mib_noroutes) {
1998 	case IPSTATS_MIB_INNOROUTES:
1999 		type = ipv6_addr_type(&ipv6_hdr(skb)->daddr);
2000 		if (type == IPV6_ADDR_ANY) {
2001 			IP6_INC_STATS(dev_net(dst->dev), ip6_dst_idev(dst),
2002 				      IPSTATS_MIB_INADDRERRORS);
2003 			break;
2004 		}
2005 		/* FALLTHROUGH */
2006 	case IPSTATS_MIB_OUTNOROUTES:
2007 		IP6_INC_STATS(dev_net(dst->dev), ip6_dst_idev(dst),
2008 			      ipstats_mib_noroutes);
2009 		break;
2010 	}
2011 	icmpv6_send(skb, ICMPV6_DEST_UNREACH, code, 0);
2012 	kfree_skb(skb);
2013 	return 0;
2014 }
2015 
2016 static int ip6_pkt_discard(struct sk_buff *skb)
2017 {
2018 	return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_INNOROUTES);
2019 }
2020 
2021 static int ip6_pkt_discard_out(struct sk_buff *skb)
2022 {
2023 	skb->dev = skb_dst(skb)->dev;
2024 	return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_OUTNOROUTES);
2025 }
2026 
2027 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
2028 
2029 static int ip6_pkt_prohibit(struct sk_buff *skb)
2030 {
2031 	return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_INNOROUTES);
2032 }
2033 
2034 static int ip6_pkt_prohibit_out(struct sk_buff *skb)
2035 {
2036 	skb->dev = skb_dst(skb)->dev;
2037 	return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_OUTNOROUTES);
2038 }
2039 
2040 #endif
2041 
2042 /*
2043  *	Allocate a dst for local (unicast / anycast) address.
2044  */
2045 
2046 struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev,
2047 				    const struct in6_addr *addr,
2048 				    int anycast)
2049 {
2050 	struct net *net = dev_net(idev->dev);
2051 	struct rt6_info *rt = ip6_dst_alloc(&net->ipv6.ip6_dst_ops,
2052 					    net->loopback_dev, 0);
2053 	struct neighbour *neigh;
2054 
2055 	if (rt == NULL) {
2056 		if (net_ratelimit())
2057 			pr_warning("IPv6:  Maximum number of routes reached,"
2058 				   " consider increasing route/max_size.\n");
2059 		return ERR_PTR(-ENOMEM);
2060 	}
2061 
2062 	in6_dev_hold(idev);
2063 
2064 	rt->dst.flags |= DST_HOST;
2065 	rt->dst.input = ip6_input;
2066 	rt->dst.output = ip6_output;
2067 	rt->rt6i_idev = idev;
2068 	rt->dst.obsolete = -1;
2069 
2070 	rt->rt6i_flags = RTF_UP | RTF_NONEXTHOP;
2071 	if (anycast)
2072 		rt->rt6i_flags |= RTF_ANYCAST;
2073 	else
2074 		rt->rt6i_flags |= RTF_LOCAL;
2075 	neigh = ndisc_get_neigh(rt->rt6i_dev, &rt->rt6i_gateway);
2076 	if (IS_ERR(neigh)) {
2077 		dst_free(&rt->dst);
2078 
2079 		return ERR_CAST(neigh);
2080 	}
2081 	dst_set_neighbour(&rt->dst, neigh);
2082 
2083 	ipv6_addr_copy(&rt->rt6i_dst.addr, addr);
2084 	rt->rt6i_dst.plen = 128;
2085 	rt->rt6i_table = fib6_get_table(net, RT6_TABLE_LOCAL);
2086 
2087 	atomic_set(&rt->dst.__refcnt, 1);
2088 
2089 	return rt;
2090 }
2091 
2092 int ip6_route_get_saddr(struct net *net,
2093 			struct rt6_info *rt,
2094 			const struct in6_addr *daddr,
2095 			unsigned int prefs,
2096 			struct in6_addr *saddr)
2097 {
2098 	struct inet6_dev *idev = ip6_dst_idev((struct dst_entry*)rt);
2099 	int err = 0;
2100 	if (rt->rt6i_prefsrc.plen)
2101 		ipv6_addr_copy(saddr, &rt->rt6i_prefsrc.addr);
2102 	else
2103 		err = ipv6_dev_get_saddr(net, idev ? idev->dev : NULL,
2104 					 daddr, prefs, saddr);
2105 	return err;
2106 }
2107 
2108 /* remove deleted ip from prefsrc entries */
2109 struct arg_dev_net_ip {
2110 	struct net_device *dev;
2111 	struct net *net;
2112 	struct in6_addr *addr;
2113 };
2114 
2115 static int fib6_remove_prefsrc(struct rt6_info *rt, void *arg)
2116 {
2117 	struct net_device *dev = ((struct arg_dev_net_ip *)arg)->dev;
2118 	struct net *net = ((struct arg_dev_net_ip *)arg)->net;
2119 	struct in6_addr *addr = ((struct arg_dev_net_ip *)arg)->addr;
2120 
2121 	if (((void *)rt->rt6i_dev == dev || dev == NULL) &&
2122 	    rt != net->ipv6.ip6_null_entry &&
2123 	    ipv6_addr_equal(addr, &rt->rt6i_prefsrc.addr)) {
2124 		/* remove prefsrc entry */
2125 		rt->rt6i_prefsrc.plen = 0;
2126 	}
2127 	return 0;
2128 }
2129 
2130 void rt6_remove_prefsrc(struct inet6_ifaddr *ifp)
2131 {
2132 	struct net *net = dev_net(ifp->idev->dev);
2133 	struct arg_dev_net_ip adni = {
2134 		.dev = ifp->idev->dev,
2135 		.net = net,
2136 		.addr = &ifp->addr,
2137 	};
2138 	fib6_clean_all(net, fib6_remove_prefsrc, 0, &adni);
2139 }
2140 
2141 struct arg_dev_net {
2142 	struct net_device *dev;
2143 	struct net *net;
2144 };
2145 
2146 static int fib6_ifdown(struct rt6_info *rt, void *arg)
2147 {
2148 	const struct arg_dev_net *adn = arg;
2149 	const struct net_device *dev = adn->dev;
2150 
2151 	if ((rt->rt6i_dev == dev || dev == NULL) &&
2152 	    rt != adn->net->ipv6.ip6_null_entry) {
2153 		RT6_TRACE("deleted by ifdown %p\n", rt);
2154 		return -1;
2155 	}
2156 	return 0;
2157 }
2158 
2159 void rt6_ifdown(struct net *net, struct net_device *dev)
2160 {
2161 	struct arg_dev_net adn = {
2162 		.dev = dev,
2163 		.net = net,
2164 	};
2165 
2166 	fib6_clean_all(net, fib6_ifdown, 0, &adn);
2167 	icmp6_clean_all(fib6_ifdown, &adn);
2168 }
2169 
2170 struct rt6_mtu_change_arg
2171 {
2172 	struct net_device *dev;
2173 	unsigned mtu;
2174 };
2175 
2176 static int rt6_mtu_change_route(struct rt6_info *rt, void *p_arg)
2177 {
2178 	struct rt6_mtu_change_arg *arg = (struct rt6_mtu_change_arg *) p_arg;
2179 	struct inet6_dev *idev;
2180 
2181 	/* In IPv6 pmtu discovery is not optional,
2182 	   so that RTAX_MTU lock cannot disable it.
2183 	   We still use this lock to block changes
2184 	   caused by addrconf/ndisc.
2185 	*/
2186 
2187 	idev = __in6_dev_get(arg->dev);
2188 	if (idev == NULL)
2189 		return 0;
2190 
2191 	/* For administrative MTU increase, there is no way to discover
2192 	   IPv6 PMTU increase, so PMTU increase should be updated here.
2193 	   Since RFC 1981 doesn't include administrative MTU increase
2194 	   update PMTU increase is a MUST. (i.e. jumbo frame)
2195 	 */
2196 	/*
2197 	   If new MTU is less than route PMTU, this new MTU will be the
2198 	   lowest MTU in the path, update the route PMTU to reflect PMTU
2199 	   decreases; if new MTU is greater than route PMTU, and the
2200 	   old MTU is the lowest MTU in the path, update the route PMTU
2201 	   to reflect the increase. In this case if the other nodes' MTU
2202 	   also have the lowest MTU, TOO BIG MESSAGE will be lead to
2203 	   PMTU discouvery.
2204 	 */
2205 	if (rt->rt6i_dev == arg->dev &&
2206 	    !dst_metric_locked(&rt->dst, RTAX_MTU) &&
2207 	    (dst_mtu(&rt->dst) >= arg->mtu ||
2208 	     (dst_mtu(&rt->dst) < arg->mtu &&
2209 	      dst_mtu(&rt->dst) == idev->cnf.mtu6))) {
2210 		dst_metric_set(&rt->dst, RTAX_MTU, arg->mtu);
2211 	}
2212 	return 0;
2213 }
2214 
2215 void rt6_mtu_change(struct net_device *dev, unsigned mtu)
2216 {
2217 	struct rt6_mtu_change_arg arg = {
2218 		.dev = dev,
2219 		.mtu = mtu,
2220 	};
2221 
2222 	fib6_clean_all(dev_net(dev), rt6_mtu_change_route, 0, &arg);
2223 }
2224 
2225 static const struct nla_policy rtm_ipv6_policy[RTA_MAX+1] = {
2226 	[RTA_GATEWAY]           = { .len = sizeof(struct in6_addr) },
2227 	[RTA_OIF]               = { .type = NLA_U32 },
2228 	[RTA_IIF]		= { .type = NLA_U32 },
2229 	[RTA_PRIORITY]          = { .type = NLA_U32 },
2230 	[RTA_METRICS]           = { .type = NLA_NESTED },
2231 };
2232 
2233 static int rtm_to_fib6_config(struct sk_buff *skb, struct nlmsghdr *nlh,
2234 			      struct fib6_config *cfg)
2235 {
2236 	struct rtmsg *rtm;
2237 	struct nlattr *tb[RTA_MAX+1];
2238 	int err;
2239 
2240 	err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy);
2241 	if (err < 0)
2242 		goto errout;
2243 
2244 	err = -EINVAL;
2245 	rtm = nlmsg_data(nlh);
2246 	memset(cfg, 0, sizeof(*cfg));
2247 
2248 	cfg->fc_table = rtm->rtm_table;
2249 	cfg->fc_dst_len = rtm->rtm_dst_len;
2250 	cfg->fc_src_len = rtm->rtm_src_len;
2251 	cfg->fc_flags = RTF_UP;
2252 	cfg->fc_protocol = rtm->rtm_protocol;
2253 
2254 	if (rtm->rtm_type == RTN_UNREACHABLE)
2255 		cfg->fc_flags |= RTF_REJECT;
2256 
2257 	if (rtm->rtm_type == RTN_LOCAL)
2258 		cfg->fc_flags |= RTF_LOCAL;
2259 
2260 	cfg->fc_nlinfo.pid = NETLINK_CB(skb).pid;
2261 	cfg->fc_nlinfo.nlh = nlh;
2262 	cfg->fc_nlinfo.nl_net = sock_net(skb->sk);
2263 
2264 	if (tb[RTA_GATEWAY]) {
2265 		nla_memcpy(&cfg->fc_gateway, tb[RTA_GATEWAY], 16);
2266 		cfg->fc_flags |= RTF_GATEWAY;
2267 	}
2268 
2269 	if (tb[RTA_DST]) {
2270 		int plen = (rtm->rtm_dst_len + 7) >> 3;
2271 
2272 		if (nla_len(tb[RTA_DST]) < plen)
2273 			goto errout;
2274 
2275 		nla_memcpy(&cfg->fc_dst, tb[RTA_DST], plen);
2276 	}
2277 
2278 	if (tb[RTA_SRC]) {
2279 		int plen = (rtm->rtm_src_len + 7) >> 3;
2280 
2281 		if (nla_len(tb[RTA_SRC]) < plen)
2282 			goto errout;
2283 
2284 		nla_memcpy(&cfg->fc_src, tb[RTA_SRC], plen);
2285 	}
2286 
2287 	if (tb[RTA_PREFSRC])
2288 		nla_memcpy(&cfg->fc_prefsrc, tb[RTA_PREFSRC], 16);
2289 
2290 	if (tb[RTA_OIF])
2291 		cfg->fc_ifindex = nla_get_u32(tb[RTA_OIF]);
2292 
2293 	if (tb[RTA_PRIORITY])
2294 		cfg->fc_metric = nla_get_u32(tb[RTA_PRIORITY]);
2295 
2296 	if (tb[RTA_METRICS]) {
2297 		cfg->fc_mx = nla_data(tb[RTA_METRICS]);
2298 		cfg->fc_mx_len = nla_len(tb[RTA_METRICS]);
2299 	}
2300 
2301 	if (tb[RTA_TABLE])
2302 		cfg->fc_table = nla_get_u32(tb[RTA_TABLE]);
2303 
2304 	err = 0;
2305 errout:
2306 	return err;
2307 }
2308 
2309 static int inet6_rtm_delroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
2310 {
2311 	struct fib6_config cfg;
2312 	int err;
2313 
2314 	err = rtm_to_fib6_config(skb, nlh, &cfg);
2315 	if (err < 0)
2316 		return err;
2317 
2318 	return ip6_route_del(&cfg);
2319 }
2320 
2321 static int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
2322 {
2323 	struct fib6_config cfg;
2324 	int err;
2325 
2326 	err = rtm_to_fib6_config(skb, nlh, &cfg);
2327 	if (err < 0)
2328 		return err;
2329 
2330 	return ip6_route_add(&cfg);
2331 }
2332 
2333 static inline size_t rt6_nlmsg_size(void)
2334 {
2335 	return NLMSG_ALIGN(sizeof(struct rtmsg))
2336 	       + nla_total_size(16) /* RTA_SRC */
2337 	       + nla_total_size(16) /* RTA_DST */
2338 	       + nla_total_size(16) /* RTA_GATEWAY */
2339 	       + nla_total_size(16) /* RTA_PREFSRC */
2340 	       + nla_total_size(4) /* RTA_TABLE */
2341 	       + nla_total_size(4) /* RTA_IIF */
2342 	       + nla_total_size(4) /* RTA_OIF */
2343 	       + nla_total_size(4) /* RTA_PRIORITY */
2344 	       + RTAX_MAX * nla_total_size(4) /* RTA_METRICS */
2345 	       + nla_total_size(sizeof(struct rta_cacheinfo));
2346 }
2347 
2348 static int rt6_fill_node(struct net *net,
2349 			 struct sk_buff *skb, struct rt6_info *rt,
2350 			 struct in6_addr *dst, struct in6_addr *src,
2351 			 int iif, int type, u32 pid, u32 seq,
2352 			 int prefix, int nowait, unsigned int flags)
2353 {
2354 	struct rtmsg *rtm;
2355 	struct nlmsghdr *nlh;
2356 	long expires;
2357 	u32 table;
2358 	struct neighbour *n;
2359 
2360 	if (prefix) {	/* user wants prefix routes only */
2361 		if (!(rt->rt6i_flags & RTF_PREFIX_RT)) {
2362 			/* success since this is not a prefix route */
2363 			return 1;
2364 		}
2365 	}
2366 
2367 	nlh = nlmsg_put(skb, pid, seq, type, sizeof(*rtm), flags);
2368 	if (nlh == NULL)
2369 		return -EMSGSIZE;
2370 
2371 	rtm = nlmsg_data(nlh);
2372 	rtm->rtm_family = AF_INET6;
2373 	rtm->rtm_dst_len = rt->rt6i_dst.plen;
2374 	rtm->rtm_src_len = rt->rt6i_src.plen;
2375 	rtm->rtm_tos = 0;
2376 	if (rt->rt6i_table)
2377 		table = rt->rt6i_table->tb6_id;
2378 	else
2379 		table = RT6_TABLE_UNSPEC;
2380 	rtm->rtm_table = table;
2381 	NLA_PUT_U32(skb, RTA_TABLE, table);
2382 	if (rt->rt6i_flags&RTF_REJECT)
2383 		rtm->rtm_type = RTN_UNREACHABLE;
2384 	else if (rt->rt6i_flags&RTF_LOCAL)
2385 		rtm->rtm_type = RTN_LOCAL;
2386 	else if (rt->rt6i_dev && (rt->rt6i_dev->flags&IFF_LOOPBACK))
2387 		rtm->rtm_type = RTN_LOCAL;
2388 	else
2389 		rtm->rtm_type = RTN_UNICAST;
2390 	rtm->rtm_flags = 0;
2391 	rtm->rtm_scope = RT_SCOPE_UNIVERSE;
2392 	rtm->rtm_protocol = rt->rt6i_protocol;
2393 	if (rt->rt6i_flags&RTF_DYNAMIC)
2394 		rtm->rtm_protocol = RTPROT_REDIRECT;
2395 	else if (rt->rt6i_flags & RTF_ADDRCONF)
2396 		rtm->rtm_protocol = RTPROT_KERNEL;
2397 	else if (rt->rt6i_flags&RTF_DEFAULT)
2398 		rtm->rtm_protocol = RTPROT_RA;
2399 
2400 	if (rt->rt6i_flags&RTF_CACHE)
2401 		rtm->rtm_flags |= RTM_F_CLONED;
2402 
2403 	if (dst) {
2404 		NLA_PUT(skb, RTA_DST, 16, dst);
2405 		rtm->rtm_dst_len = 128;
2406 	} else if (rtm->rtm_dst_len)
2407 		NLA_PUT(skb, RTA_DST, 16, &rt->rt6i_dst.addr);
2408 #ifdef CONFIG_IPV6_SUBTREES
2409 	if (src) {
2410 		NLA_PUT(skb, RTA_SRC, 16, src);
2411 		rtm->rtm_src_len = 128;
2412 	} else if (rtm->rtm_src_len)
2413 		NLA_PUT(skb, RTA_SRC, 16, &rt->rt6i_src.addr);
2414 #endif
2415 	if (iif) {
2416 #ifdef CONFIG_IPV6_MROUTE
2417 		if (ipv6_addr_is_multicast(&rt->rt6i_dst.addr)) {
2418 			int err = ip6mr_get_route(net, skb, rtm, nowait);
2419 			if (err <= 0) {
2420 				if (!nowait) {
2421 					if (err == 0)
2422 						return 0;
2423 					goto nla_put_failure;
2424 				} else {
2425 					if (err == -EMSGSIZE)
2426 						goto nla_put_failure;
2427 				}
2428 			}
2429 		} else
2430 #endif
2431 			NLA_PUT_U32(skb, RTA_IIF, iif);
2432 	} else if (dst) {
2433 		struct in6_addr saddr_buf;
2434 		if (ip6_route_get_saddr(net, rt, dst, 0, &saddr_buf) == 0)
2435 			NLA_PUT(skb, RTA_PREFSRC, 16, &saddr_buf);
2436 	}
2437 
2438 	if (rt->rt6i_prefsrc.plen) {
2439 		struct in6_addr saddr_buf;
2440 		ipv6_addr_copy(&saddr_buf, &rt->rt6i_prefsrc.addr);
2441 		NLA_PUT(skb, RTA_PREFSRC, 16, &saddr_buf);
2442 	}
2443 
2444 	if (rtnetlink_put_metrics(skb, dst_metrics_ptr(&rt->dst)) < 0)
2445 		goto nla_put_failure;
2446 
2447 	rcu_read_lock();
2448 	n = dst_get_neighbour(&rt->dst);
2449 	if (n)
2450 		NLA_PUT(skb, RTA_GATEWAY, 16, &n->primary_key);
2451 	rcu_read_unlock();
2452 
2453 	if (rt->dst.dev)
2454 		NLA_PUT_U32(skb, RTA_OIF, rt->rt6i_dev->ifindex);
2455 
2456 	NLA_PUT_U32(skb, RTA_PRIORITY, rt->rt6i_metric);
2457 
2458 	if (!(rt->rt6i_flags & RTF_EXPIRES))
2459 		expires = 0;
2460 	else if (rt->rt6i_expires - jiffies < INT_MAX)
2461 		expires = rt->rt6i_expires - jiffies;
2462 	else
2463 		expires = INT_MAX;
2464 
2465 	if (rtnl_put_cacheinfo(skb, &rt->dst, 0, 0, 0,
2466 			       expires, rt->dst.error) < 0)
2467 		goto nla_put_failure;
2468 
2469 	return nlmsg_end(skb, nlh);
2470 
2471 nla_put_failure:
2472 	nlmsg_cancel(skb, nlh);
2473 	return -EMSGSIZE;
2474 }
2475 
2476 int rt6_dump_route(struct rt6_info *rt, void *p_arg)
2477 {
2478 	struct rt6_rtnl_dump_arg *arg = (struct rt6_rtnl_dump_arg *) p_arg;
2479 	int prefix;
2480 
2481 	if (nlmsg_len(arg->cb->nlh) >= sizeof(struct rtmsg)) {
2482 		struct rtmsg *rtm = nlmsg_data(arg->cb->nlh);
2483 		prefix = (rtm->rtm_flags & RTM_F_PREFIX) != 0;
2484 	} else
2485 		prefix = 0;
2486 
2487 	return rt6_fill_node(arg->net,
2488 		     arg->skb, rt, NULL, NULL, 0, RTM_NEWROUTE,
2489 		     NETLINK_CB(arg->cb->skb).pid, arg->cb->nlh->nlmsg_seq,
2490 		     prefix, 0, NLM_F_MULTI);
2491 }
2492 
2493 static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void *arg)
2494 {
2495 	struct net *net = sock_net(in_skb->sk);
2496 	struct nlattr *tb[RTA_MAX+1];
2497 	struct rt6_info *rt;
2498 	struct sk_buff *skb;
2499 	struct rtmsg *rtm;
2500 	struct flowi6 fl6;
2501 	int err, iif = 0;
2502 
2503 	err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy);
2504 	if (err < 0)
2505 		goto errout;
2506 
2507 	err = -EINVAL;
2508 	memset(&fl6, 0, sizeof(fl6));
2509 
2510 	if (tb[RTA_SRC]) {
2511 		if (nla_len(tb[RTA_SRC]) < sizeof(struct in6_addr))
2512 			goto errout;
2513 
2514 		ipv6_addr_copy(&fl6.saddr, nla_data(tb[RTA_SRC]));
2515 	}
2516 
2517 	if (tb[RTA_DST]) {
2518 		if (nla_len(tb[RTA_DST]) < sizeof(struct in6_addr))
2519 			goto errout;
2520 
2521 		ipv6_addr_copy(&fl6.daddr, nla_data(tb[RTA_DST]));
2522 	}
2523 
2524 	if (tb[RTA_IIF])
2525 		iif = nla_get_u32(tb[RTA_IIF]);
2526 
2527 	if (tb[RTA_OIF])
2528 		fl6.flowi6_oif = nla_get_u32(tb[RTA_OIF]);
2529 
2530 	if (iif) {
2531 		struct net_device *dev;
2532 		dev = __dev_get_by_index(net, iif);
2533 		if (!dev) {
2534 			err = -ENODEV;
2535 			goto errout;
2536 		}
2537 	}
2538 
2539 	skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
2540 	if (skb == NULL) {
2541 		err = -ENOBUFS;
2542 		goto errout;
2543 	}
2544 
2545 	/* Reserve room for dummy headers, this skb can pass
2546 	   through good chunk of routing engine.
2547 	 */
2548 	skb_reset_mac_header(skb);
2549 	skb_reserve(skb, MAX_HEADER + sizeof(struct ipv6hdr));
2550 
2551 	rt = (struct rt6_info*) ip6_route_output(net, NULL, &fl6);
2552 	skb_dst_set(skb, &rt->dst);
2553 
2554 	err = rt6_fill_node(net, skb, rt, &fl6.daddr, &fl6.saddr, iif,
2555 			    RTM_NEWROUTE, NETLINK_CB(in_skb).pid,
2556 			    nlh->nlmsg_seq, 0, 0, 0);
2557 	if (err < 0) {
2558 		kfree_skb(skb);
2559 		goto errout;
2560 	}
2561 
2562 	err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).pid);
2563 errout:
2564 	return err;
2565 }
2566 
2567 void inet6_rt_notify(int event, struct rt6_info *rt, struct nl_info *info)
2568 {
2569 	struct sk_buff *skb;
2570 	struct net *net = info->nl_net;
2571 	u32 seq;
2572 	int err;
2573 
2574 	err = -ENOBUFS;
2575 	seq = info->nlh != NULL ? info->nlh->nlmsg_seq : 0;
2576 
2577 	skb = nlmsg_new(rt6_nlmsg_size(), gfp_any());
2578 	if (skb == NULL)
2579 		goto errout;
2580 
2581 	err = rt6_fill_node(net, skb, rt, NULL, NULL, 0,
2582 				event, info->pid, seq, 0, 0, 0);
2583 	if (err < 0) {
2584 		/* -EMSGSIZE implies BUG in rt6_nlmsg_size() */
2585 		WARN_ON(err == -EMSGSIZE);
2586 		kfree_skb(skb);
2587 		goto errout;
2588 	}
2589 	rtnl_notify(skb, net, info->pid, RTNLGRP_IPV6_ROUTE,
2590 		    info->nlh, gfp_any());
2591 	return;
2592 errout:
2593 	if (err < 0)
2594 		rtnl_set_sk_err(net, RTNLGRP_IPV6_ROUTE, err);
2595 }
2596 
2597 static int ip6_route_dev_notify(struct notifier_block *this,
2598 				unsigned long event, void *data)
2599 {
2600 	struct net_device *dev = (struct net_device *)data;
2601 	struct net *net = dev_net(dev);
2602 
2603 	if (event == NETDEV_REGISTER && (dev->flags & IFF_LOOPBACK)) {
2604 		net->ipv6.ip6_null_entry->dst.dev = dev;
2605 		net->ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(dev);
2606 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
2607 		net->ipv6.ip6_prohibit_entry->dst.dev = dev;
2608 		net->ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(dev);
2609 		net->ipv6.ip6_blk_hole_entry->dst.dev = dev;
2610 		net->ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(dev);
2611 #endif
2612 	}
2613 
2614 	return NOTIFY_OK;
2615 }
2616 
2617 /*
2618  *	/proc
2619  */
2620 
2621 #ifdef CONFIG_PROC_FS
2622 
2623 struct rt6_proc_arg
2624 {
2625 	char *buffer;
2626 	int offset;
2627 	int length;
2628 	int skip;
2629 	int len;
2630 };
2631 
2632 static int rt6_info_route(struct rt6_info *rt, void *p_arg)
2633 {
2634 	struct seq_file *m = p_arg;
2635 	struct neighbour *n;
2636 
2637 	seq_printf(m, "%pi6 %02x ", &rt->rt6i_dst.addr, rt->rt6i_dst.plen);
2638 
2639 #ifdef CONFIG_IPV6_SUBTREES
2640 	seq_printf(m, "%pi6 %02x ", &rt->rt6i_src.addr, rt->rt6i_src.plen);
2641 #else
2642 	seq_puts(m, "00000000000000000000000000000000 00 ");
2643 #endif
2644 	rcu_read_lock();
2645 	n = dst_get_neighbour(&rt->dst);
2646 	if (n) {
2647 		seq_printf(m, "%pi6", n->primary_key);
2648 	} else {
2649 		seq_puts(m, "00000000000000000000000000000000");
2650 	}
2651 	rcu_read_unlock();
2652 	seq_printf(m, " %08x %08x %08x %08x %8s\n",
2653 		   rt->rt6i_metric, atomic_read(&rt->dst.__refcnt),
2654 		   rt->dst.__use, rt->rt6i_flags,
2655 		   rt->rt6i_dev ? rt->rt6i_dev->name : "");
2656 	return 0;
2657 }
2658 
2659 static int ipv6_route_show(struct seq_file *m, void *v)
2660 {
2661 	struct net *net = (struct net *)m->private;
2662 	fib6_clean_all(net, rt6_info_route, 0, m);
2663 	return 0;
2664 }
2665 
2666 static int ipv6_route_open(struct inode *inode, struct file *file)
2667 {
2668 	return single_open_net(inode, file, ipv6_route_show);
2669 }
2670 
2671 static const struct file_operations ipv6_route_proc_fops = {
2672 	.owner		= THIS_MODULE,
2673 	.open		= ipv6_route_open,
2674 	.read		= seq_read,
2675 	.llseek		= seq_lseek,
2676 	.release	= single_release_net,
2677 };
2678 
2679 static int rt6_stats_seq_show(struct seq_file *seq, void *v)
2680 {
2681 	struct net *net = (struct net *)seq->private;
2682 	seq_printf(seq, "%04x %04x %04x %04x %04x %04x %04x\n",
2683 		   net->ipv6.rt6_stats->fib_nodes,
2684 		   net->ipv6.rt6_stats->fib_route_nodes,
2685 		   net->ipv6.rt6_stats->fib_rt_alloc,
2686 		   net->ipv6.rt6_stats->fib_rt_entries,
2687 		   net->ipv6.rt6_stats->fib_rt_cache,
2688 		   dst_entries_get_slow(&net->ipv6.ip6_dst_ops),
2689 		   net->ipv6.rt6_stats->fib_discarded_routes);
2690 
2691 	return 0;
2692 }
2693 
2694 static int rt6_stats_seq_open(struct inode *inode, struct file *file)
2695 {
2696 	return single_open_net(inode, file, rt6_stats_seq_show);
2697 }
2698 
2699 static const struct file_operations rt6_stats_seq_fops = {
2700 	.owner	 = THIS_MODULE,
2701 	.open	 = rt6_stats_seq_open,
2702 	.read	 = seq_read,
2703 	.llseek	 = seq_lseek,
2704 	.release = single_release_net,
2705 };
2706 #endif	/* CONFIG_PROC_FS */
2707 
2708 #ifdef CONFIG_SYSCTL
2709 
2710 static
2711 int ipv6_sysctl_rtcache_flush(ctl_table *ctl, int write,
2712 			      void __user *buffer, size_t *lenp, loff_t *ppos)
2713 {
2714 	struct net *net;
2715 	int delay;
2716 	if (!write)
2717 		return -EINVAL;
2718 
2719 	net = (struct net *)ctl->extra1;
2720 	delay = net->ipv6.sysctl.flush_delay;
2721 	proc_dointvec(ctl, write, buffer, lenp, ppos);
2722 	fib6_run_gc(delay <= 0 ? ~0UL : (unsigned long)delay, net);
2723 	return 0;
2724 }
2725 
2726 ctl_table ipv6_route_table_template[] = {
2727 	{
2728 		.procname	=	"flush",
2729 		.data		=	&init_net.ipv6.sysctl.flush_delay,
2730 		.maxlen		=	sizeof(int),
2731 		.mode		=	0200,
2732 		.proc_handler	=	ipv6_sysctl_rtcache_flush
2733 	},
2734 	{
2735 		.procname	=	"gc_thresh",
2736 		.data		=	&ip6_dst_ops_template.gc_thresh,
2737 		.maxlen		=	sizeof(int),
2738 		.mode		=	0644,
2739 		.proc_handler	=	proc_dointvec,
2740 	},
2741 	{
2742 		.procname	=	"max_size",
2743 		.data		=	&init_net.ipv6.sysctl.ip6_rt_max_size,
2744 		.maxlen		=	sizeof(int),
2745 		.mode		=	0644,
2746 		.proc_handler	=	proc_dointvec,
2747 	},
2748 	{
2749 		.procname	=	"gc_min_interval",
2750 		.data		=	&init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
2751 		.maxlen		=	sizeof(int),
2752 		.mode		=	0644,
2753 		.proc_handler	=	proc_dointvec_jiffies,
2754 	},
2755 	{
2756 		.procname	=	"gc_timeout",
2757 		.data		=	&init_net.ipv6.sysctl.ip6_rt_gc_timeout,
2758 		.maxlen		=	sizeof(int),
2759 		.mode		=	0644,
2760 		.proc_handler	=	proc_dointvec_jiffies,
2761 	},
2762 	{
2763 		.procname	=	"gc_interval",
2764 		.data		=	&init_net.ipv6.sysctl.ip6_rt_gc_interval,
2765 		.maxlen		=	sizeof(int),
2766 		.mode		=	0644,
2767 		.proc_handler	=	proc_dointvec_jiffies,
2768 	},
2769 	{
2770 		.procname	=	"gc_elasticity",
2771 		.data		=	&init_net.ipv6.sysctl.ip6_rt_gc_elasticity,
2772 		.maxlen		=	sizeof(int),
2773 		.mode		=	0644,
2774 		.proc_handler	=	proc_dointvec,
2775 	},
2776 	{
2777 		.procname	=	"mtu_expires",
2778 		.data		=	&init_net.ipv6.sysctl.ip6_rt_mtu_expires,
2779 		.maxlen		=	sizeof(int),
2780 		.mode		=	0644,
2781 		.proc_handler	=	proc_dointvec_jiffies,
2782 	},
2783 	{
2784 		.procname	=	"min_adv_mss",
2785 		.data		=	&init_net.ipv6.sysctl.ip6_rt_min_advmss,
2786 		.maxlen		=	sizeof(int),
2787 		.mode		=	0644,
2788 		.proc_handler	=	proc_dointvec,
2789 	},
2790 	{
2791 		.procname	=	"gc_min_interval_ms",
2792 		.data		=	&init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
2793 		.maxlen		=	sizeof(int),
2794 		.mode		=	0644,
2795 		.proc_handler	=	proc_dointvec_ms_jiffies,
2796 	},
2797 	{ }
2798 };
2799 
2800 struct ctl_table * __net_init ipv6_route_sysctl_init(struct net *net)
2801 {
2802 	struct ctl_table *table;
2803 
2804 	table = kmemdup(ipv6_route_table_template,
2805 			sizeof(ipv6_route_table_template),
2806 			GFP_KERNEL);
2807 
2808 	if (table) {
2809 		table[0].data = &net->ipv6.sysctl.flush_delay;
2810 		table[0].extra1 = net;
2811 		table[1].data = &net->ipv6.ip6_dst_ops.gc_thresh;
2812 		table[2].data = &net->ipv6.sysctl.ip6_rt_max_size;
2813 		table[3].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
2814 		table[4].data = &net->ipv6.sysctl.ip6_rt_gc_timeout;
2815 		table[5].data = &net->ipv6.sysctl.ip6_rt_gc_interval;
2816 		table[6].data = &net->ipv6.sysctl.ip6_rt_gc_elasticity;
2817 		table[7].data = &net->ipv6.sysctl.ip6_rt_mtu_expires;
2818 		table[8].data = &net->ipv6.sysctl.ip6_rt_min_advmss;
2819 		table[9].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
2820 	}
2821 
2822 	return table;
2823 }
2824 #endif
2825 
2826 static int __net_init ip6_route_net_init(struct net *net)
2827 {
2828 	int ret = -ENOMEM;
2829 
2830 	memcpy(&net->ipv6.ip6_dst_ops, &ip6_dst_ops_template,
2831 	       sizeof(net->ipv6.ip6_dst_ops));
2832 
2833 	if (dst_entries_init(&net->ipv6.ip6_dst_ops) < 0)
2834 		goto out_ip6_dst_ops;
2835 
2836 	net->ipv6.ip6_null_entry = kmemdup(&ip6_null_entry_template,
2837 					   sizeof(*net->ipv6.ip6_null_entry),
2838 					   GFP_KERNEL);
2839 	if (!net->ipv6.ip6_null_entry)
2840 		goto out_ip6_dst_entries;
2841 	net->ipv6.ip6_null_entry->dst.path =
2842 		(struct dst_entry *)net->ipv6.ip6_null_entry;
2843 	net->ipv6.ip6_null_entry->dst.ops = &net->ipv6.ip6_dst_ops;
2844 	dst_init_metrics(&net->ipv6.ip6_null_entry->dst,
2845 			 ip6_template_metrics, true);
2846 
2847 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
2848 	net->ipv6.ip6_prohibit_entry = kmemdup(&ip6_prohibit_entry_template,
2849 					       sizeof(*net->ipv6.ip6_prohibit_entry),
2850 					       GFP_KERNEL);
2851 	if (!net->ipv6.ip6_prohibit_entry)
2852 		goto out_ip6_null_entry;
2853 	net->ipv6.ip6_prohibit_entry->dst.path =
2854 		(struct dst_entry *)net->ipv6.ip6_prohibit_entry;
2855 	net->ipv6.ip6_prohibit_entry->dst.ops = &net->ipv6.ip6_dst_ops;
2856 	dst_init_metrics(&net->ipv6.ip6_prohibit_entry->dst,
2857 			 ip6_template_metrics, true);
2858 
2859 	net->ipv6.ip6_blk_hole_entry = kmemdup(&ip6_blk_hole_entry_template,
2860 					       sizeof(*net->ipv6.ip6_blk_hole_entry),
2861 					       GFP_KERNEL);
2862 	if (!net->ipv6.ip6_blk_hole_entry)
2863 		goto out_ip6_prohibit_entry;
2864 	net->ipv6.ip6_blk_hole_entry->dst.path =
2865 		(struct dst_entry *)net->ipv6.ip6_blk_hole_entry;
2866 	net->ipv6.ip6_blk_hole_entry->dst.ops = &net->ipv6.ip6_dst_ops;
2867 	dst_init_metrics(&net->ipv6.ip6_blk_hole_entry->dst,
2868 			 ip6_template_metrics, true);
2869 #endif
2870 
2871 	net->ipv6.sysctl.flush_delay = 0;
2872 	net->ipv6.sysctl.ip6_rt_max_size = 4096;
2873 	net->ipv6.sysctl.ip6_rt_gc_min_interval = HZ / 2;
2874 	net->ipv6.sysctl.ip6_rt_gc_timeout = 60*HZ;
2875 	net->ipv6.sysctl.ip6_rt_gc_interval = 30*HZ;
2876 	net->ipv6.sysctl.ip6_rt_gc_elasticity = 9;
2877 	net->ipv6.sysctl.ip6_rt_mtu_expires = 10*60*HZ;
2878 	net->ipv6.sysctl.ip6_rt_min_advmss = IPV6_MIN_MTU - 20 - 40;
2879 
2880 #ifdef CONFIG_PROC_FS
2881 	proc_net_fops_create(net, "ipv6_route", 0, &ipv6_route_proc_fops);
2882 	proc_net_fops_create(net, "rt6_stats", S_IRUGO, &rt6_stats_seq_fops);
2883 #endif
2884 	net->ipv6.ip6_rt_gc_expire = 30*HZ;
2885 
2886 	ret = 0;
2887 out:
2888 	return ret;
2889 
2890 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
2891 out_ip6_prohibit_entry:
2892 	kfree(net->ipv6.ip6_prohibit_entry);
2893 out_ip6_null_entry:
2894 	kfree(net->ipv6.ip6_null_entry);
2895 #endif
2896 out_ip6_dst_entries:
2897 	dst_entries_destroy(&net->ipv6.ip6_dst_ops);
2898 out_ip6_dst_ops:
2899 	goto out;
2900 }
2901 
2902 static void __net_exit ip6_route_net_exit(struct net *net)
2903 {
2904 #ifdef CONFIG_PROC_FS
2905 	proc_net_remove(net, "ipv6_route");
2906 	proc_net_remove(net, "rt6_stats");
2907 #endif
2908 	kfree(net->ipv6.ip6_null_entry);
2909 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
2910 	kfree(net->ipv6.ip6_prohibit_entry);
2911 	kfree(net->ipv6.ip6_blk_hole_entry);
2912 #endif
2913 	dst_entries_destroy(&net->ipv6.ip6_dst_ops);
2914 }
2915 
2916 static struct pernet_operations ip6_route_net_ops = {
2917 	.init = ip6_route_net_init,
2918 	.exit = ip6_route_net_exit,
2919 };
2920 
2921 static struct notifier_block ip6_route_dev_notifier = {
2922 	.notifier_call = ip6_route_dev_notify,
2923 	.priority = 0,
2924 };
2925 
2926 int __init ip6_route_init(void)
2927 {
2928 	int ret;
2929 
2930 	ret = -ENOMEM;
2931 	ip6_dst_ops_template.kmem_cachep =
2932 		kmem_cache_create("ip6_dst_cache", sizeof(struct rt6_info), 0,
2933 				  SLAB_HWCACHE_ALIGN, NULL);
2934 	if (!ip6_dst_ops_template.kmem_cachep)
2935 		goto out;
2936 
2937 	ret = dst_entries_init(&ip6_dst_blackhole_ops);
2938 	if (ret)
2939 		goto out_kmem_cache;
2940 
2941 	ret = register_pernet_subsys(&ip6_route_net_ops);
2942 	if (ret)
2943 		goto out_dst_entries;
2944 
2945 	ip6_dst_blackhole_ops.kmem_cachep = ip6_dst_ops_template.kmem_cachep;
2946 
2947 	/* Registering of the loopback is done before this portion of code,
2948 	 * the loopback reference in rt6_info will not be taken, do it
2949 	 * manually for init_net */
2950 	init_net.ipv6.ip6_null_entry->dst.dev = init_net.loopback_dev;
2951 	init_net.ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
2952   #ifdef CONFIG_IPV6_MULTIPLE_TABLES
2953 	init_net.ipv6.ip6_prohibit_entry->dst.dev = init_net.loopback_dev;
2954 	init_net.ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
2955 	init_net.ipv6.ip6_blk_hole_entry->dst.dev = init_net.loopback_dev;
2956 	init_net.ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
2957   #endif
2958 	ret = fib6_init();
2959 	if (ret)
2960 		goto out_register_subsys;
2961 
2962 	ret = xfrm6_init();
2963 	if (ret)
2964 		goto out_fib6_init;
2965 
2966 	ret = fib6_rules_init();
2967 	if (ret)
2968 		goto xfrm6_init;
2969 
2970 	ret = -ENOBUFS;
2971 	if (__rtnl_register(PF_INET6, RTM_NEWROUTE, inet6_rtm_newroute, NULL, NULL) ||
2972 	    __rtnl_register(PF_INET6, RTM_DELROUTE, inet6_rtm_delroute, NULL, NULL) ||
2973 	    __rtnl_register(PF_INET6, RTM_GETROUTE, inet6_rtm_getroute, NULL, NULL))
2974 		goto fib6_rules_init;
2975 
2976 	ret = register_netdevice_notifier(&ip6_route_dev_notifier);
2977 	if (ret)
2978 		goto fib6_rules_init;
2979 
2980 out:
2981 	return ret;
2982 
2983 fib6_rules_init:
2984 	fib6_rules_cleanup();
2985 xfrm6_init:
2986 	xfrm6_fini();
2987 out_fib6_init:
2988 	fib6_gc_cleanup();
2989 out_register_subsys:
2990 	unregister_pernet_subsys(&ip6_route_net_ops);
2991 out_dst_entries:
2992 	dst_entries_destroy(&ip6_dst_blackhole_ops);
2993 out_kmem_cache:
2994 	kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);
2995 	goto out;
2996 }
2997 
2998 void ip6_route_cleanup(void)
2999 {
3000 	unregister_netdevice_notifier(&ip6_route_dev_notifier);
3001 	fib6_rules_cleanup();
3002 	xfrm6_fini();
3003 	fib6_gc_cleanup();
3004 	unregister_pernet_subsys(&ip6_route_net_ops);
3005 	dst_entries_destroy(&ip6_dst_blackhole_ops);
3006 	kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);
3007 }
3008