xref: /linux/net/ipv6/route.c (revision 9429ec96c2718c0d1e3317cf60a87a0405223814)
1 /*
2  *	Linux INET6 implementation
3  *	FIB front-end.
4  *
5  *	Authors:
6  *	Pedro Roque		<roque@di.fc.ul.pt>
7  *
8  *	This program is free software; you can redistribute it and/or
9  *      modify it under the terms of the GNU General Public License
10  *      as published by the Free Software Foundation; either version
11  *      2 of the License, or (at your option) any later version.
12  */
13 
14 /*	Changes:
15  *
16  *	YOSHIFUJI Hideaki @USAGI
17  *		reworked default router selection.
18  *		- respect outgoing interface
19  *		- select from (probably) reachable routers (i.e.
20  *		routers in REACHABLE, STALE, DELAY or PROBE states).
21  *		- always select the same router if it is (probably)
22  *		reachable.  otherwise, round-robin the list.
23  *	Ville Nuorvala
24  *		Fixed routing subtrees.
25  */
26 
27 #define pr_fmt(fmt) "IPv6: " fmt
28 
29 #include <linux/capability.h>
30 #include <linux/errno.h>
31 #include <linux/export.h>
32 #include <linux/types.h>
33 #include <linux/times.h>
34 #include <linux/socket.h>
35 #include <linux/sockios.h>
36 #include <linux/net.h>
37 #include <linux/route.h>
38 #include <linux/netdevice.h>
39 #include <linux/in6.h>
40 #include <linux/mroute6.h>
41 #include <linux/init.h>
42 #include <linux/if_arp.h>
43 #include <linux/proc_fs.h>
44 #include <linux/seq_file.h>
45 #include <linux/nsproxy.h>
46 #include <linux/slab.h>
47 #include <net/net_namespace.h>
48 #include <net/snmp.h>
49 #include <net/ipv6.h>
50 #include <net/ip6_fib.h>
51 #include <net/ip6_route.h>
52 #include <net/ndisc.h>
53 #include <net/addrconf.h>
54 #include <net/tcp.h>
55 #include <linux/rtnetlink.h>
56 #include <net/dst.h>
57 #include <net/xfrm.h>
58 #include <net/netevent.h>
59 #include <net/netlink.h>
60 
61 #include <asm/uaccess.h>
62 
63 #ifdef CONFIG_SYSCTL
64 #include <linux/sysctl.h>
65 #endif
66 
67 static struct rt6_info *ip6_rt_copy(struct rt6_info *ort,
68 				    const struct in6_addr *dest);
69 static struct dst_entry	*ip6_dst_check(struct dst_entry *dst, u32 cookie);
70 static unsigned int	 ip6_default_advmss(const struct dst_entry *dst);
71 static unsigned int	 ip6_mtu(const struct dst_entry *dst);
72 static struct dst_entry *ip6_negative_advice(struct dst_entry *);
73 static void		ip6_dst_destroy(struct dst_entry *);
74 static void		ip6_dst_ifdown(struct dst_entry *,
75 				       struct net_device *dev, int how);
76 static int		 ip6_dst_gc(struct dst_ops *ops);
77 
78 static int		ip6_pkt_discard(struct sk_buff *skb);
79 static int		ip6_pkt_discard_out(struct sk_buff *skb);
80 static void		ip6_link_failure(struct sk_buff *skb);
81 static void		ip6_rt_update_pmtu(struct dst_entry *dst, struct sock *sk,
82 					   struct sk_buff *skb, u32 mtu);
83 static void		rt6_do_redirect(struct dst_entry *dst, struct sock *sk,
84 					struct sk_buff *skb);
85 
86 #ifdef CONFIG_IPV6_ROUTE_INFO
87 static struct rt6_info *rt6_add_route_info(struct net *net,
88 					   const struct in6_addr *prefix, int prefixlen,
89 					   const struct in6_addr *gwaddr, int ifindex,
90 					   unsigned int pref);
91 static struct rt6_info *rt6_get_route_info(struct net *net,
92 					   const struct in6_addr *prefix, int prefixlen,
93 					   const struct in6_addr *gwaddr, int ifindex);
94 #endif
95 
96 static u32 *ipv6_cow_metrics(struct dst_entry *dst, unsigned long old)
97 {
98 	struct rt6_info *rt = (struct rt6_info *) dst;
99 	struct inet_peer *peer;
100 	u32 *p = NULL;
101 
102 	if (!(rt->dst.flags & DST_HOST))
103 		return NULL;
104 
105 	peer = rt6_get_peer_create(rt);
106 	if (peer) {
107 		u32 *old_p = __DST_METRICS_PTR(old);
108 		unsigned long prev, new;
109 
110 		p = peer->metrics;
111 		if (inet_metrics_new(peer))
112 			memcpy(p, old_p, sizeof(u32) * RTAX_MAX);
113 
114 		new = (unsigned long) p;
115 		prev = cmpxchg(&dst->_metrics, old, new);
116 
117 		if (prev != old) {
118 			p = __DST_METRICS_PTR(prev);
119 			if (prev & DST_METRICS_READ_ONLY)
120 				p = NULL;
121 		}
122 	}
123 	return p;
124 }
125 
126 static inline const void *choose_neigh_daddr(struct rt6_info *rt,
127 					     struct sk_buff *skb,
128 					     const void *daddr)
129 {
130 	struct in6_addr *p = &rt->rt6i_gateway;
131 
132 	if (!ipv6_addr_any(p))
133 		return (const void *) p;
134 	else if (skb)
135 		return &ipv6_hdr(skb)->daddr;
136 	return daddr;
137 }
138 
139 static struct neighbour *ip6_neigh_lookup(const struct dst_entry *dst,
140 					  struct sk_buff *skb,
141 					  const void *daddr)
142 {
143 	struct rt6_info *rt = (struct rt6_info *) dst;
144 	struct neighbour *n;
145 
146 	daddr = choose_neigh_daddr(rt, skb, daddr);
147 	n = __ipv6_neigh_lookup(&nd_tbl, dst->dev, daddr);
148 	if (n)
149 		return n;
150 	return neigh_create(&nd_tbl, daddr, dst->dev);
151 }
152 
153 static int rt6_bind_neighbour(struct rt6_info *rt, struct net_device *dev)
154 {
155 	struct neighbour *n = __ipv6_neigh_lookup(&nd_tbl, dev, &rt->rt6i_gateway);
156 	if (!n) {
157 		n = neigh_create(&nd_tbl, &rt->rt6i_gateway, dev);
158 		if (IS_ERR(n))
159 			return PTR_ERR(n);
160 	}
161 	rt->n = n;
162 
163 	return 0;
164 }
165 
166 static struct dst_ops ip6_dst_ops_template = {
167 	.family			=	AF_INET6,
168 	.protocol		=	cpu_to_be16(ETH_P_IPV6),
169 	.gc			=	ip6_dst_gc,
170 	.gc_thresh		=	1024,
171 	.check			=	ip6_dst_check,
172 	.default_advmss		=	ip6_default_advmss,
173 	.mtu			=	ip6_mtu,
174 	.cow_metrics		=	ipv6_cow_metrics,
175 	.destroy		=	ip6_dst_destroy,
176 	.ifdown			=	ip6_dst_ifdown,
177 	.negative_advice	=	ip6_negative_advice,
178 	.link_failure		=	ip6_link_failure,
179 	.update_pmtu		=	ip6_rt_update_pmtu,
180 	.redirect		=	rt6_do_redirect,
181 	.local_out		=	__ip6_local_out,
182 	.neigh_lookup		=	ip6_neigh_lookup,
183 };
184 
185 static unsigned int ip6_blackhole_mtu(const struct dst_entry *dst)
186 {
187 	unsigned int mtu = dst_metric_raw(dst, RTAX_MTU);
188 
189 	return mtu ? : dst->dev->mtu;
190 }
191 
192 static void ip6_rt_blackhole_update_pmtu(struct dst_entry *dst, struct sock *sk,
193 					 struct sk_buff *skb, u32 mtu)
194 {
195 }
196 
197 static void ip6_rt_blackhole_redirect(struct dst_entry *dst, struct sock *sk,
198 				      struct sk_buff *skb)
199 {
200 }
201 
202 static u32 *ip6_rt_blackhole_cow_metrics(struct dst_entry *dst,
203 					 unsigned long old)
204 {
205 	return NULL;
206 }
207 
208 static struct dst_ops ip6_dst_blackhole_ops = {
209 	.family			=	AF_INET6,
210 	.protocol		=	cpu_to_be16(ETH_P_IPV6),
211 	.destroy		=	ip6_dst_destroy,
212 	.check			=	ip6_dst_check,
213 	.mtu			=	ip6_blackhole_mtu,
214 	.default_advmss		=	ip6_default_advmss,
215 	.update_pmtu		=	ip6_rt_blackhole_update_pmtu,
216 	.redirect		=	ip6_rt_blackhole_redirect,
217 	.cow_metrics		=	ip6_rt_blackhole_cow_metrics,
218 	.neigh_lookup		=	ip6_neigh_lookup,
219 };
220 
221 static const u32 ip6_template_metrics[RTAX_MAX] = {
222 	[RTAX_HOPLIMIT - 1] = 255,
223 };
224 
225 static struct rt6_info ip6_null_entry_template = {
226 	.dst = {
227 		.__refcnt	= ATOMIC_INIT(1),
228 		.__use		= 1,
229 		.obsolete	= DST_OBSOLETE_FORCE_CHK,
230 		.error		= -ENETUNREACH,
231 		.input		= ip6_pkt_discard,
232 		.output		= ip6_pkt_discard_out,
233 	},
234 	.rt6i_flags	= (RTF_REJECT | RTF_NONEXTHOP),
235 	.rt6i_protocol  = RTPROT_KERNEL,
236 	.rt6i_metric	= ~(u32) 0,
237 	.rt6i_ref	= ATOMIC_INIT(1),
238 };
239 
240 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
241 
242 static int ip6_pkt_prohibit(struct sk_buff *skb);
243 static int ip6_pkt_prohibit_out(struct sk_buff *skb);
244 
245 static struct rt6_info ip6_prohibit_entry_template = {
246 	.dst = {
247 		.__refcnt	= ATOMIC_INIT(1),
248 		.__use		= 1,
249 		.obsolete	= DST_OBSOLETE_FORCE_CHK,
250 		.error		= -EACCES,
251 		.input		= ip6_pkt_prohibit,
252 		.output		= ip6_pkt_prohibit_out,
253 	},
254 	.rt6i_flags	= (RTF_REJECT | RTF_NONEXTHOP),
255 	.rt6i_protocol  = RTPROT_KERNEL,
256 	.rt6i_metric	= ~(u32) 0,
257 	.rt6i_ref	= ATOMIC_INIT(1),
258 };
259 
260 static struct rt6_info ip6_blk_hole_entry_template = {
261 	.dst = {
262 		.__refcnt	= ATOMIC_INIT(1),
263 		.__use		= 1,
264 		.obsolete	= DST_OBSOLETE_FORCE_CHK,
265 		.error		= -EINVAL,
266 		.input		= dst_discard,
267 		.output		= dst_discard,
268 	},
269 	.rt6i_flags	= (RTF_REJECT | RTF_NONEXTHOP),
270 	.rt6i_protocol  = RTPROT_KERNEL,
271 	.rt6i_metric	= ~(u32) 0,
272 	.rt6i_ref	= ATOMIC_INIT(1),
273 };
274 
275 #endif
276 
277 /* allocate dst with ip6_dst_ops */
278 static inline struct rt6_info *ip6_dst_alloc(struct net *net,
279 					     struct net_device *dev,
280 					     int flags,
281 					     struct fib6_table *table)
282 {
283 	struct rt6_info *rt = dst_alloc(&net->ipv6.ip6_dst_ops, dev,
284 					0, DST_OBSOLETE_FORCE_CHK, flags);
285 
286 	if (rt) {
287 		struct dst_entry *dst = &rt->dst;
288 
289 		memset(dst + 1, 0, sizeof(*rt) - sizeof(*dst));
290 		rt6_init_peer(rt, table ? &table->tb6_peers : net->ipv6.peers);
291 		rt->rt6i_genid = rt_genid(net);
292 	}
293 	return rt;
294 }
295 
296 static void ip6_dst_destroy(struct dst_entry *dst)
297 {
298 	struct rt6_info *rt = (struct rt6_info *)dst;
299 	struct inet6_dev *idev = rt->rt6i_idev;
300 
301 	if (rt->n)
302 		neigh_release(rt->n);
303 
304 	if (!(rt->dst.flags & DST_HOST))
305 		dst_destroy_metrics_generic(dst);
306 
307 	if (idev) {
308 		rt->rt6i_idev = NULL;
309 		in6_dev_put(idev);
310 	}
311 
312 	if (!(rt->rt6i_flags & RTF_EXPIRES) && dst->from)
313 		dst_release(dst->from);
314 
315 	if (rt6_has_peer(rt)) {
316 		struct inet_peer *peer = rt6_peer_ptr(rt);
317 		inet_putpeer(peer);
318 	}
319 }
320 
321 static atomic_t __rt6_peer_genid = ATOMIC_INIT(0);
322 
323 static u32 rt6_peer_genid(void)
324 {
325 	return atomic_read(&__rt6_peer_genid);
326 }
327 
328 void rt6_bind_peer(struct rt6_info *rt, int create)
329 {
330 	struct inet_peer_base *base;
331 	struct inet_peer *peer;
332 
333 	base = inetpeer_base_ptr(rt->_rt6i_peer);
334 	if (!base)
335 		return;
336 
337 	peer = inet_getpeer_v6(base, &rt->rt6i_dst.addr, create);
338 	if (peer) {
339 		if (!rt6_set_peer(rt, peer))
340 			inet_putpeer(peer);
341 		else
342 			rt->rt6i_peer_genid = rt6_peer_genid();
343 	}
344 }
345 
346 static void ip6_dst_ifdown(struct dst_entry *dst, struct net_device *dev,
347 			   int how)
348 {
349 	struct rt6_info *rt = (struct rt6_info *)dst;
350 	struct inet6_dev *idev = rt->rt6i_idev;
351 	struct net_device *loopback_dev =
352 		dev_net(dev)->loopback_dev;
353 
354 	if (dev != loopback_dev) {
355 		if (idev && idev->dev == dev) {
356 			struct inet6_dev *loopback_idev =
357 				in6_dev_get(loopback_dev);
358 			if (loopback_idev) {
359 				rt->rt6i_idev = loopback_idev;
360 				in6_dev_put(idev);
361 			}
362 		}
363 		if (rt->n && rt->n->dev == dev) {
364 			rt->n->dev = loopback_dev;
365 			dev_hold(loopback_dev);
366 			dev_put(dev);
367 		}
368 	}
369 }
370 
371 static bool rt6_check_expired(const struct rt6_info *rt)
372 {
373 	struct rt6_info *ort = NULL;
374 
375 	if (rt->rt6i_flags & RTF_EXPIRES) {
376 		if (time_after(jiffies, rt->dst.expires))
377 			return true;
378 	} else if (rt->dst.from) {
379 		ort = (struct rt6_info *) rt->dst.from;
380 		return (ort->rt6i_flags & RTF_EXPIRES) &&
381 			time_after(jiffies, ort->dst.expires);
382 	}
383 	return false;
384 }
385 
386 static bool rt6_need_strict(const struct in6_addr *daddr)
387 {
388 	return ipv6_addr_type(daddr) &
389 		(IPV6_ADDR_MULTICAST | IPV6_ADDR_LINKLOCAL | IPV6_ADDR_LOOPBACK);
390 }
391 
392 /*
393  *	Route lookup. Any table->tb6_lock is implied.
394  */
395 
396 static inline struct rt6_info *rt6_device_match(struct net *net,
397 						    struct rt6_info *rt,
398 						    const struct in6_addr *saddr,
399 						    int oif,
400 						    int flags)
401 {
402 	struct rt6_info *local = NULL;
403 	struct rt6_info *sprt;
404 
405 	if (!oif && ipv6_addr_any(saddr))
406 		goto out;
407 
408 	for (sprt = rt; sprt; sprt = sprt->dst.rt6_next) {
409 		struct net_device *dev = sprt->dst.dev;
410 
411 		if (oif) {
412 			if (dev->ifindex == oif)
413 				return sprt;
414 			if (dev->flags & IFF_LOOPBACK) {
415 				if (!sprt->rt6i_idev ||
416 				    sprt->rt6i_idev->dev->ifindex != oif) {
417 					if (flags & RT6_LOOKUP_F_IFACE && oif)
418 						continue;
419 					if (local && (!oif ||
420 						      local->rt6i_idev->dev->ifindex == oif))
421 						continue;
422 				}
423 				local = sprt;
424 			}
425 		} else {
426 			if (ipv6_chk_addr(net, saddr, dev,
427 					  flags & RT6_LOOKUP_F_IFACE))
428 				return sprt;
429 		}
430 	}
431 
432 	if (oif) {
433 		if (local)
434 			return local;
435 
436 		if (flags & RT6_LOOKUP_F_IFACE)
437 			return net->ipv6.ip6_null_entry;
438 	}
439 out:
440 	return rt;
441 }
442 
443 #ifdef CONFIG_IPV6_ROUTER_PREF
444 static void rt6_probe(struct rt6_info *rt)
445 {
446 	struct neighbour *neigh;
447 	/*
448 	 * Okay, this does not seem to be appropriate
449 	 * for now, however, we need to check if it
450 	 * is really so; aka Router Reachability Probing.
451 	 *
452 	 * Router Reachability Probe MUST be rate-limited
453 	 * to no more than one per minute.
454 	 */
455 	rcu_read_lock();
456 	neigh = rt ? rt->n : NULL;
457 	if (!neigh || (neigh->nud_state & NUD_VALID))
458 		goto out;
459 	read_lock_bh(&neigh->lock);
460 	if (!(neigh->nud_state & NUD_VALID) &&
461 	    time_after(jiffies, neigh->updated + rt->rt6i_idev->cnf.rtr_probe_interval)) {
462 		struct in6_addr mcaddr;
463 		struct in6_addr *target;
464 
465 		neigh->updated = jiffies;
466 		read_unlock_bh(&neigh->lock);
467 
468 		target = (struct in6_addr *)&neigh->primary_key;
469 		addrconf_addr_solict_mult(target, &mcaddr);
470 		ndisc_send_ns(rt->dst.dev, NULL, target, &mcaddr, NULL);
471 	} else {
472 		read_unlock_bh(&neigh->lock);
473 	}
474 out:
475 	rcu_read_unlock();
476 }
477 #else
478 static inline void rt6_probe(struct rt6_info *rt)
479 {
480 }
481 #endif
482 
483 /*
484  * Default Router Selection (RFC 2461 6.3.6)
485  */
486 static inline int rt6_check_dev(struct rt6_info *rt, int oif)
487 {
488 	struct net_device *dev = rt->dst.dev;
489 	if (!oif || dev->ifindex == oif)
490 		return 2;
491 	if ((dev->flags & IFF_LOOPBACK) &&
492 	    rt->rt6i_idev && rt->rt6i_idev->dev->ifindex == oif)
493 		return 1;
494 	return 0;
495 }
496 
497 static inline int rt6_check_neigh(struct rt6_info *rt)
498 {
499 	struct neighbour *neigh;
500 	int m;
501 
502 	rcu_read_lock();
503 	neigh = rt->n;
504 	if (rt->rt6i_flags & RTF_NONEXTHOP ||
505 	    !(rt->rt6i_flags & RTF_GATEWAY))
506 		m = 1;
507 	else if (neigh) {
508 		read_lock_bh(&neigh->lock);
509 		if (neigh->nud_state & NUD_VALID)
510 			m = 2;
511 #ifdef CONFIG_IPV6_ROUTER_PREF
512 		else if (neigh->nud_state & NUD_FAILED)
513 			m = 0;
514 #endif
515 		else
516 			m = 1;
517 		read_unlock_bh(&neigh->lock);
518 	} else
519 		m = 0;
520 	rcu_read_unlock();
521 	return m;
522 }
523 
524 static int rt6_score_route(struct rt6_info *rt, int oif,
525 			   int strict)
526 {
527 	int m, n;
528 
529 	m = rt6_check_dev(rt, oif);
530 	if (!m && (strict & RT6_LOOKUP_F_IFACE))
531 		return -1;
532 #ifdef CONFIG_IPV6_ROUTER_PREF
533 	m |= IPV6_DECODE_PREF(IPV6_EXTRACT_PREF(rt->rt6i_flags)) << 2;
534 #endif
535 	n = rt6_check_neigh(rt);
536 	if (!n && (strict & RT6_LOOKUP_F_REACHABLE))
537 		return -1;
538 	return m;
539 }
540 
541 static struct rt6_info *find_match(struct rt6_info *rt, int oif, int strict,
542 				   int *mpri, struct rt6_info *match)
543 {
544 	int m;
545 
546 	if (rt6_check_expired(rt))
547 		goto out;
548 
549 	m = rt6_score_route(rt, oif, strict);
550 	if (m < 0)
551 		goto out;
552 
553 	if (m > *mpri) {
554 		if (strict & RT6_LOOKUP_F_REACHABLE)
555 			rt6_probe(match);
556 		*mpri = m;
557 		match = rt;
558 	} else if (strict & RT6_LOOKUP_F_REACHABLE) {
559 		rt6_probe(rt);
560 	}
561 
562 out:
563 	return match;
564 }
565 
566 static struct rt6_info *find_rr_leaf(struct fib6_node *fn,
567 				     struct rt6_info *rr_head,
568 				     u32 metric, int oif, int strict)
569 {
570 	struct rt6_info *rt, *match;
571 	int mpri = -1;
572 
573 	match = NULL;
574 	for (rt = rr_head; rt && rt->rt6i_metric == metric;
575 	     rt = rt->dst.rt6_next)
576 		match = find_match(rt, oif, strict, &mpri, match);
577 	for (rt = fn->leaf; rt && rt != rr_head && rt->rt6i_metric == metric;
578 	     rt = rt->dst.rt6_next)
579 		match = find_match(rt, oif, strict, &mpri, match);
580 
581 	return match;
582 }
583 
584 static struct rt6_info *rt6_select(struct fib6_node *fn, int oif, int strict)
585 {
586 	struct rt6_info *match, *rt0;
587 	struct net *net;
588 
589 	rt0 = fn->rr_ptr;
590 	if (!rt0)
591 		fn->rr_ptr = rt0 = fn->leaf;
592 
593 	match = find_rr_leaf(fn, rt0, rt0->rt6i_metric, oif, strict);
594 
595 	if (!match &&
596 	    (strict & RT6_LOOKUP_F_REACHABLE)) {
597 		struct rt6_info *next = rt0->dst.rt6_next;
598 
599 		/* no entries matched; do round-robin */
600 		if (!next || next->rt6i_metric != rt0->rt6i_metric)
601 			next = fn->leaf;
602 
603 		if (next != rt0)
604 			fn->rr_ptr = next;
605 	}
606 
607 	net = dev_net(rt0->dst.dev);
608 	return match ? match : net->ipv6.ip6_null_entry;
609 }
610 
611 #ifdef CONFIG_IPV6_ROUTE_INFO
612 int rt6_route_rcv(struct net_device *dev, u8 *opt, int len,
613 		  const struct in6_addr *gwaddr)
614 {
615 	struct net *net = dev_net(dev);
616 	struct route_info *rinfo = (struct route_info *) opt;
617 	struct in6_addr prefix_buf, *prefix;
618 	unsigned int pref;
619 	unsigned long lifetime;
620 	struct rt6_info *rt;
621 
622 	if (len < sizeof(struct route_info)) {
623 		return -EINVAL;
624 	}
625 
626 	/* Sanity check for prefix_len and length */
627 	if (rinfo->length > 3) {
628 		return -EINVAL;
629 	} else if (rinfo->prefix_len > 128) {
630 		return -EINVAL;
631 	} else if (rinfo->prefix_len > 64) {
632 		if (rinfo->length < 2) {
633 			return -EINVAL;
634 		}
635 	} else if (rinfo->prefix_len > 0) {
636 		if (rinfo->length < 1) {
637 			return -EINVAL;
638 		}
639 	}
640 
641 	pref = rinfo->route_pref;
642 	if (pref == ICMPV6_ROUTER_PREF_INVALID)
643 		return -EINVAL;
644 
645 	lifetime = addrconf_timeout_fixup(ntohl(rinfo->lifetime), HZ);
646 
647 	if (rinfo->length == 3)
648 		prefix = (struct in6_addr *)rinfo->prefix;
649 	else {
650 		/* this function is safe */
651 		ipv6_addr_prefix(&prefix_buf,
652 				 (struct in6_addr *)rinfo->prefix,
653 				 rinfo->prefix_len);
654 		prefix = &prefix_buf;
655 	}
656 
657 	rt = rt6_get_route_info(net, prefix, rinfo->prefix_len, gwaddr,
658 				dev->ifindex);
659 
660 	if (rt && !lifetime) {
661 		ip6_del_rt(rt);
662 		rt = NULL;
663 	}
664 
665 	if (!rt && lifetime)
666 		rt = rt6_add_route_info(net, prefix, rinfo->prefix_len, gwaddr, dev->ifindex,
667 					pref);
668 	else if (rt)
669 		rt->rt6i_flags = RTF_ROUTEINFO |
670 				 (rt->rt6i_flags & ~RTF_PREF_MASK) | RTF_PREF(pref);
671 
672 	if (rt) {
673 		if (!addrconf_finite_timeout(lifetime))
674 			rt6_clean_expires(rt);
675 		else
676 			rt6_set_expires(rt, jiffies + HZ * lifetime);
677 
678 		dst_release(&rt->dst);
679 	}
680 	return 0;
681 }
682 #endif
683 
684 #define BACKTRACK(__net, saddr)			\
685 do { \
686 	if (rt == __net->ipv6.ip6_null_entry) {	\
687 		struct fib6_node *pn; \
688 		while (1) { \
689 			if (fn->fn_flags & RTN_TL_ROOT) \
690 				goto out; \
691 			pn = fn->parent; \
692 			if (FIB6_SUBTREE(pn) && FIB6_SUBTREE(pn) != fn) \
693 				fn = fib6_lookup(FIB6_SUBTREE(pn), NULL, saddr); \
694 			else \
695 				fn = pn; \
696 			if (fn->fn_flags & RTN_RTINFO) \
697 				goto restart; \
698 		} \
699 	} \
700 } while (0)
701 
702 static struct rt6_info *ip6_pol_route_lookup(struct net *net,
703 					     struct fib6_table *table,
704 					     struct flowi6 *fl6, int flags)
705 {
706 	struct fib6_node *fn;
707 	struct rt6_info *rt;
708 
709 	read_lock_bh(&table->tb6_lock);
710 	fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
711 restart:
712 	rt = fn->leaf;
713 	rt = rt6_device_match(net, rt, &fl6->saddr, fl6->flowi6_oif, flags);
714 	BACKTRACK(net, &fl6->saddr);
715 out:
716 	dst_use(&rt->dst, jiffies);
717 	read_unlock_bh(&table->tb6_lock);
718 	return rt;
719 
720 }
721 
722 struct dst_entry * ip6_route_lookup(struct net *net, struct flowi6 *fl6,
723 				    int flags)
724 {
725 	return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_lookup);
726 }
727 EXPORT_SYMBOL_GPL(ip6_route_lookup);
728 
729 struct rt6_info *rt6_lookup(struct net *net, const struct in6_addr *daddr,
730 			    const struct in6_addr *saddr, int oif, int strict)
731 {
732 	struct flowi6 fl6 = {
733 		.flowi6_oif = oif,
734 		.daddr = *daddr,
735 	};
736 	struct dst_entry *dst;
737 	int flags = strict ? RT6_LOOKUP_F_IFACE : 0;
738 
739 	if (saddr) {
740 		memcpy(&fl6.saddr, saddr, sizeof(*saddr));
741 		flags |= RT6_LOOKUP_F_HAS_SADDR;
742 	}
743 
744 	dst = fib6_rule_lookup(net, &fl6, flags, ip6_pol_route_lookup);
745 	if (dst->error == 0)
746 		return (struct rt6_info *) dst;
747 
748 	dst_release(dst);
749 
750 	return NULL;
751 }
752 
753 EXPORT_SYMBOL(rt6_lookup);
754 
755 /* ip6_ins_rt is called with FREE table->tb6_lock.
756    It takes new route entry, the addition fails by any reason the
757    route is freed. In any case, if caller does not hold it, it may
758    be destroyed.
759  */
760 
761 static int __ip6_ins_rt(struct rt6_info *rt, struct nl_info *info)
762 {
763 	int err;
764 	struct fib6_table *table;
765 
766 	table = rt->rt6i_table;
767 	write_lock_bh(&table->tb6_lock);
768 	err = fib6_add(&table->tb6_root, rt, info);
769 	write_unlock_bh(&table->tb6_lock);
770 
771 	return err;
772 }
773 
774 int ip6_ins_rt(struct rt6_info *rt)
775 {
776 	struct nl_info info = {
777 		.nl_net = dev_net(rt->dst.dev),
778 	};
779 	return __ip6_ins_rt(rt, &info);
780 }
781 
782 static struct rt6_info *rt6_alloc_cow(struct rt6_info *ort,
783 				      const struct in6_addr *daddr,
784 				      const struct in6_addr *saddr)
785 {
786 	struct rt6_info *rt;
787 
788 	/*
789 	 *	Clone the route.
790 	 */
791 
792 	rt = ip6_rt_copy(ort, daddr);
793 
794 	if (rt) {
795 		int attempts = !in_softirq();
796 
797 		if (!(rt->rt6i_flags & RTF_GATEWAY)) {
798 			if (ort->rt6i_dst.plen != 128 &&
799 			    ipv6_addr_equal(&ort->rt6i_dst.addr, daddr))
800 				rt->rt6i_flags |= RTF_ANYCAST;
801 			rt->rt6i_gateway = *daddr;
802 		}
803 
804 		rt->rt6i_flags |= RTF_CACHE;
805 
806 #ifdef CONFIG_IPV6_SUBTREES
807 		if (rt->rt6i_src.plen && saddr) {
808 			rt->rt6i_src.addr = *saddr;
809 			rt->rt6i_src.plen = 128;
810 		}
811 #endif
812 
813 	retry:
814 		if (rt6_bind_neighbour(rt, rt->dst.dev)) {
815 			struct net *net = dev_net(rt->dst.dev);
816 			int saved_rt_min_interval =
817 				net->ipv6.sysctl.ip6_rt_gc_min_interval;
818 			int saved_rt_elasticity =
819 				net->ipv6.sysctl.ip6_rt_gc_elasticity;
820 
821 			if (attempts-- > 0) {
822 				net->ipv6.sysctl.ip6_rt_gc_elasticity = 1;
823 				net->ipv6.sysctl.ip6_rt_gc_min_interval = 0;
824 
825 				ip6_dst_gc(&net->ipv6.ip6_dst_ops);
826 
827 				net->ipv6.sysctl.ip6_rt_gc_elasticity =
828 					saved_rt_elasticity;
829 				net->ipv6.sysctl.ip6_rt_gc_min_interval =
830 					saved_rt_min_interval;
831 				goto retry;
832 			}
833 
834 			net_warn_ratelimited("Neighbour table overflow\n");
835 			dst_free(&rt->dst);
836 			return NULL;
837 		}
838 	}
839 
840 	return rt;
841 }
842 
843 static struct rt6_info *rt6_alloc_clone(struct rt6_info *ort,
844 					const struct in6_addr *daddr)
845 {
846 	struct rt6_info *rt = ip6_rt_copy(ort, daddr);
847 
848 	if (rt) {
849 		rt->rt6i_flags |= RTF_CACHE;
850 		rt->n = neigh_clone(ort->n);
851 	}
852 	return rt;
853 }
854 
855 static struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table, int oif,
856 				      struct flowi6 *fl6, int flags)
857 {
858 	struct fib6_node *fn;
859 	struct rt6_info *rt, *nrt;
860 	int strict = 0;
861 	int attempts = 3;
862 	int err;
863 	int reachable = net->ipv6.devconf_all->forwarding ? 0 : RT6_LOOKUP_F_REACHABLE;
864 
865 	strict |= flags & RT6_LOOKUP_F_IFACE;
866 
867 relookup:
868 	read_lock_bh(&table->tb6_lock);
869 
870 restart_2:
871 	fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
872 
873 restart:
874 	rt = rt6_select(fn, oif, strict | reachable);
875 
876 	BACKTRACK(net, &fl6->saddr);
877 	if (rt == net->ipv6.ip6_null_entry ||
878 	    rt->rt6i_flags & RTF_CACHE)
879 		goto out;
880 
881 	dst_hold(&rt->dst);
882 	read_unlock_bh(&table->tb6_lock);
883 
884 	if (!rt->n && !(rt->rt6i_flags & RTF_NONEXTHOP))
885 		nrt = rt6_alloc_cow(rt, &fl6->daddr, &fl6->saddr);
886 	else if (!(rt->dst.flags & DST_HOST))
887 		nrt = rt6_alloc_clone(rt, &fl6->daddr);
888 	else
889 		goto out2;
890 
891 	dst_release(&rt->dst);
892 	rt = nrt ? : net->ipv6.ip6_null_entry;
893 
894 	dst_hold(&rt->dst);
895 	if (nrt) {
896 		err = ip6_ins_rt(nrt);
897 		if (!err)
898 			goto out2;
899 	}
900 
901 	if (--attempts <= 0)
902 		goto out2;
903 
904 	/*
905 	 * Race condition! In the gap, when table->tb6_lock was
906 	 * released someone could insert this route.  Relookup.
907 	 */
908 	dst_release(&rt->dst);
909 	goto relookup;
910 
911 out:
912 	if (reachable) {
913 		reachable = 0;
914 		goto restart_2;
915 	}
916 	dst_hold(&rt->dst);
917 	read_unlock_bh(&table->tb6_lock);
918 out2:
919 	rt->dst.lastuse = jiffies;
920 	rt->dst.__use++;
921 
922 	return rt;
923 }
924 
925 static struct rt6_info *ip6_pol_route_input(struct net *net, struct fib6_table *table,
926 					    struct flowi6 *fl6, int flags)
927 {
928 	return ip6_pol_route(net, table, fl6->flowi6_iif, fl6, flags);
929 }
930 
931 static struct dst_entry *ip6_route_input_lookup(struct net *net,
932 						struct net_device *dev,
933 						struct flowi6 *fl6, int flags)
934 {
935 	if (rt6_need_strict(&fl6->daddr) && dev->type != ARPHRD_PIMREG)
936 		flags |= RT6_LOOKUP_F_IFACE;
937 
938 	return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_input);
939 }
940 
941 void ip6_route_input(struct sk_buff *skb)
942 {
943 	const struct ipv6hdr *iph = ipv6_hdr(skb);
944 	struct net *net = dev_net(skb->dev);
945 	int flags = RT6_LOOKUP_F_HAS_SADDR;
946 	struct flowi6 fl6 = {
947 		.flowi6_iif = skb->dev->ifindex,
948 		.daddr = iph->daddr,
949 		.saddr = iph->saddr,
950 		.flowlabel = (* (__be32 *) iph) & IPV6_FLOWINFO_MASK,
951 		.flowi6_mark = skb->mark,
952 		.flowi6_proto = iph->nexthdr,
953 	};
954 
955 	skb_dst_set(skb, ip6_route_input_lookup(net, skb->dev, &fl6, flags));
956 }
957 
958 static struct rt6_info *ip6_pol_route_output(struct net *net, struct fib6_table *table,
959 					     struct flowi6 *fl6, int flags)
960 {
961 	return ip6_pol_route(net, table, fl6->flowi6_oif, fl6, flags);
962 }
963 
964 struct dst_entry * ip6_route_output(struct net *net, const struct sock *sk,
965 				    struct flowi6 *fl6)
966 {
967 	int flags = 0;
968 
969 	fl6->flowi6_iif = net->loopback_dev->ifindex;
970 
971 	if ((sk && sk->sk_bound_dev_if) || rt6_need_strict(&fl6->daddr))
972 		flags |= RT6_LOOKUP_F_IFACE;
973 
974 	if (!ipv6_addr_any(&fl6->saddr))
975 		flags |= RT6_LOOKUP_F_HAS_SADDR;
976 	else if (sk)
977 		flags |= rt6_srcprefs2flags(inet6_sk(sk)->srcprefs);
978 
979 	return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_output);
980 }
981 
982 EXPORT_SYMBOL(ip6_route_output);
983 
984 struct dst_entry *ip6_blackhole_route(struct net *net, struct dst_entry *dst_orig)
985 {
986 	struct rt6_info *rt, *ort = (struct rt6_info *) dst_orig;
987 	struct dst_entry *new = NULL;
988 
989 	rt = dst_alloc(&ip6_dst_blackhole_ops, ort->dst.dev, 1, DST_OBSOLETE_NONE, 0);
990 	if (rt) {
991 		new = &rt->dst;
992 
993 		memset(new + 1, 0, sizeof(*rt) - sizeof(*new));
994 		rt6_init_peer(rt, net->ipv6.peers);
995 
996 		new->__use = 1;
997 		new->input = dst_discard;
998 		new->output = dst_discard;
999 
1000 		if (dst_metrics_read_only(&ort->dst))
1001 			new->_metrics = ort->dst._metrics;
1002 		else
1003 			dst_copy_metrics(new, &ort->dst);
1004 		rt->rt6i_idev = ort->rt6i_idev;
1005 		if (rt->rt6i_idev)
1006 			in6_dev_hold(rt->rt6i_idev);
1007 
1008 		rt->rt6i_gateway = ort->rt6i_gateway;
1009 		rt->rt6i_flags = ort->rt6i_flags;
1010 		rt6_clean_expires(rt);
1011 		rt->rt6i_metric = 0;
1012 
1013 		memcpy(&rt->rt6i_dst, &ort->rt6i_dst, sizeof(struct rt6key));
1014 #ifdef CONFIG_IPV6_SUBTREES
1015 		memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
1016 #endif
1017 
1018 		dst_free(new);
1019 	}
1020 
1021 	dst_release(dst_orig);
1022 	return new ? new : ERR_PTR(-ENOMEM);
1023 }
1024 
1025 /*
1026  *	Destination cache support functions
1027  */
1028 
1029 static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie)
1030 {
1031 	struct rt6_info *rt;
1032 
1033 	rt = (struct rt6_info *) dst;
1034 
1035 	/* All IPV6 dsts are created with ->obsolete set to the value
1036 	 * DST_OBSOLETE_FORCE_CHK which forces validation calls down
1037 	 * into this function always.
1038 	 */
1039 	if (rt->rt6i_genid != rt_genid(dev_net(rt->dst.dev)))
1040 		return NULL;
1041 
1042 	if (rt->rt6i_node && (rt->rt6i_node->fn_sernum == cookie)) {
1043 		if (rt->rt6i_peer_genid != rt6_peer_genid()) {
1044 			if (!rt6_has_peer(rt))
1045 				rt6_bind_peer(rt, 0);
1046 			rt->rt6i_peer_genid = rt6_peer_genid();
1047 		}
1048 		return dst;
1049 	}
1050 	return NULL;
1051 }
1052 
1053 static struct dst_entry *ip6_negative_advice(struct dst_entry *dst)
1054 {
1055 	struct rt6_info *rt = (struct rt6_info *) dst;
1056 
1057 	if (rt) {
1058 		if (rt->rt6i_flags & RTF_CACHE) {
1059 			if (rt6_check_expired(rt)) {
1060 				ip6_del_rt(rt);
1061 				dst = NULL;
1062 			}
1063 		} else {
1064 			dst_release(dst);
1065 			dst = NULL;
1066 		}
1067 	}
1068 	return dst;
1069 }
1070 
1071 static void ip6_link_failure(struct sk_buff *skb)
1072 {
1073 	struct rt6_info *rt;
1074 
1075 	icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_ADDR_UNREACH, 0);
1076 
1077 	rt = (struct rt6_info *) skb_dst(skb);
1078 	if (rt) {
1079 		if (rt->rt6i_flags & RTF_CACHE)
1080 			rt6_update_expires(rt, 0);
1081 		else if (rt->rt6i_node && (rt->rt6i_flags & RTF_DEFAULT))
1082 			rt->rt6i_node->fn_sernum = -1;
1083 	}
1084 }
1085 
1086 static void ip6_rt_update_pmtu(struct dst_entry *dst, struct sock *sk,
1087 			       struct sk_buff *skb, u32 mtu)
1088 {
1089 	struct rt6_info *rt6 = (struct rt6_info*)dst;
1090 
1091 	dst_confirm(dst);
1092 	if (mtu < dst_mtu(dst) && rt6->rt6i_dst.plen == 128) {
1093 		struct net *net = dev_net(dst->dev);
1094 
1095 		rt6->rt6i_flags |= RTF_MODIFIED;
1096 		if (mtu < IPV6_MIN_MTU) {
1097 			u32 features = dst_metric(dst, RTAX_FEATURES);
1098 			mtu = IPV6_MIN_MTU;
1099 			features |= RTAX_FEATURE_ALLFRAG;
1100 			dst_metric_set(dst, RTAX_FEATURES, features);
1101 		}
1102 		dst_metric_set(dst, RTAX_MTU, mtu);
1103 		rt6_update_expires(rt6, net->ipv6.sysctl.ip6_rt_mtu_expires);
1104 	}
1105 }
1106 
1107 void ip6_update_pmtu(struct sk_buff *skb, struct net *net, __be32 mtu,
1108 		     int oif, u32 mark)
1109 {
1110 	const struct ipv6hdr *iph = (struct ipv6hdr *) skb->data;
1111 	struct dst_entry *dst;
1112 	struct flowi6 fl6;
1113 
1114 	memset(&fl6, 0, sizeof(fl6));
1115 	fl6.flowi6_oif = oif;
1116 	fl6.flowi6_mark = mark;
1117 	fl6.flowi6_flags = 0;
1118 	fl6.daddr = iph->daddr;
1119 	fl6.saddr = iph->saddr;
1120 	fl6.flowlabel = (*(__be32 *) iph) & IPV6_FLOWINFO_MASK;
1121 
1122 	dst = ip6_route_output(net, NULL, &fl6);
1123 	if (!dst->error)
1124 		ip6_rt_update_pmtu(dst, NULL, skb, ntohl(mtu));
1125 	dst_release(dst);
1126 }
1127 EXPORT_SYMBOL_GPL(ip6_update_pmtu);
1128 
1129 void ip6_sk_update_pmtu(struct sk_buff *skb, struct sock *sk, __be32 mtu)
1130 {
1131 	ip6_update_pmtu(skb, sock_net(sk), mtu,
1132 			sk->sk_bound_dev_if, sk->sk_mark);
1133 }
1134 EXPORT_SYMBOL_GPL(ip6_sk_update_pmtu);
1135 
1136 void ip6_redirect(struct sk_buff *skb, struct net *net, int oif, u32 mark)
1137 {
1138 	const struct ipv6hdr *iph = (struct ipv6hdr *) skb->data;
1139 	struct dst_entry *dst;
1140 	struct flowi6 fl6;
1141 
1142 	memset(&fl6, 0, sizeof(fl6));
1143 	fl6.flowi6_oif = oif;
1144 	fl6.flowi6_mark = mark;
1145 	fl6.flowi6_flags = 0;
1146 	fl6.daddr = iph->daddr;
1147 	fl6.saddr = iph->saddr;
1148 	fl6.flowlabel = (*(__be32 *) iph) & IPV6_FLOWINFO_MASK;
1149 
1150 	dst = ip6_route_output(net, NULL, &fl6);
1151 	if (!dst->error)
1152 		rt6_do_redirect(dst, NULL, skb);
1153 	dst_release(dst);
1154 }
1155 EXPORT_SYMBOL_GPL(ip6_redirect);
1156 
1157 void ip6_sk_redirect(struct sk_buff *skb, struct sock *sk)
1158 {
1159 	ip6_redirect(skb, sock_net(sk), sk->sk_bound_dev_if, sk->sk_mark);
1160 }
1161 EXPORT_SYMBOL_GPL(ip6_sk_redirect);
1162 
1163 static unsigned int ip6_default_advmss(const struct dst_entry *dst)
1164 {
1165 	struct net_device *dev = dst->dev;
1166 	unsigned int mtu = dst_mtu(dst);
1167 	struct net *net = dev_net(dev);
1168 
1169 	mtu -= sizeof(struct ipv6hdr) + sizeof(struct tcphdr);
1170 
1171 	if (mtu < net->ipv6.sysctl.ip6_rt_min_advmss)
1172 		mtu = net->ipv6.sysctl.ip6_rt_min_advmss;
1173 
1174 	/*
1175 	 * Maximal non-jumbo IPv6 payload is IPV6_MAXPLEN and
1176 	 * corresponding MSS is IPV6_MAXPLEN - tcp_header_size.
1177 	 * IPV6_MAXPLEN is also valid and means: "any MSS,
1178 	 * rely only on pmtu discovery"
1179 	 */
1180 	if (mtu > IPV6_MAXPLEN - sizeof(struct tcphdr))
1181 		mtu = IPV6_MAXPLEN;
1182 	return mtu;
1183 }
1184 
1185 static unsigned int ip6_mtu(const struct dst_entry *dst)
1186 {
1187 	struct inet6_dev *idev;
1188 	unsigned int mtu = dst_metric_raw(dst, RTAX_MTU);
1189 
1190 	if (mtu)
1191 		return mtu;
1192 
1193 	mtu = IPV6_MIN_MTU;
1194 
1195 	rcu_read_lock();
1196 	idev = __in6_dev_get(dst->dev);
1197 	if (idev)
1198 		mtu = idev->cnf.mtu6;
1199 	rcu_read_unlock();
1200 
1201 	return mtu;
1202 }
1203 
1204 static struct dst_entry *icmp6_dst_gc_list;
1205 static DEFINE_SPINLOCK(icmp6_dst_lock);
1206 
1207 struct dst_entry *icmp6_dst_alloc(struct net_device *dev,
1208 				  struct neighbour *neigh,
1209 				  struct flowi6 *fl6)
1210 {
1211 	struct dst_entry *dst;
1212 	struct rt6_info *rt;
1213 	struct inet6_dev *idev = in6_dev_get(dev);
1214 	struct net *net = dev_net(dev);
1215 
1216 	if (unlikely(!idev))
1217 		return ERR_PTR(-ENODEV);
1218 
1219 	rt = ip6_dst_alloc(net, dev, 0, NULL);
1220 	if (unlikely(!rt)) {
1221 		in6_dev_put(idev);
1222 		dst = ERR_PTR(-ENOMEM);
1223 		goto out;
1224 	}
1225 
1226 	if (neigh)
1227 		neigh_hold(neigh);
1228 	else {
1229 		neigh = ip6_neigh_lookup(&rt->dst, NULL, &fl6->daddr);
1230 		if (IS_ERR(neigh)) {
1231 			in6_dev_put(idev);
1232 			dst_free(&rt->dst);
1233 			return ERR_CAST(neigh);
1234 		}
1235 	}
1236 
1237 	rt->dst.flags |= DST_HOST;
1238 	rt->dst.output  = ip6_output;
1239 	rt->n = neigh;
1240 	atomic_set(&rt->dst.__refcnt, 1);
1241 	rt->rt6i_dst.addr = fl6->daddr;
1242 	rt->rt6i_dst.plen = 128;
1243 	rt->rt6i_idev     = idev;
1244 	dst_metric_set(&rt->dst, RTAX_HOPLIMIT, 255);
1245 
1246 	spin_lock_bh(&icmp6_dst_lock);
1247 	rt->dst.next = icmp6_dst_gc_list;
1248 	icmp6_dst_gc_list = &rt->dst;
1249 	spin_unlock_bh(&icmp6_dst_lock);
1250 
1251 	fib6_force_start_gc(net);
1252 
1253 	dst = xfrm_lookup(net, &rt->dst, flowi6_to_flowi(fl6), NULL, 0);
1254 
1255 out:
1256 	return dst;
1257 }
1258 
1259 int icmp6_dst_gc(void)
1260 {
1261 	struct dst_entry *dst, **pprev;
1262 	int more = 0;
1263 
1264 	spin_lock_bh(&icmp6_dst_lock);
1265 	pprev = &icmp6_dst_gc_list;
1266 
1267 	while ((dst = *pprev) != NULL) {
1268 		if (!atomic_read(&dst->__refcnt)) {
1269 			*pprev = dst->next;
1270 			dst_free(dst);
1271 		} else {
1272 			pprev = &dst->next;
1273 			++more;
1274 		}
1275 	}
1276 
1277 	spin_unlock_bh(&icmp6_dst_lock);
1278 
1279 	return more;
1280 }
1281 
1282 static void icmp6_clean_all(int (*func)(struct rt6_info *rt, void *arg),
1283 			    void *arg)
1284 {
1285 	struct dst_entry *dst, **pprev;
1286 
1287 	spin_lock_bh(&icmp6_dst_lock);
1288 	pprev = &icmp6_dst_gc_list;
1289 	while ((dst = *pprev) != NULL) {
1290 		struct rt6_info *rt = (struct rt6_info *) dst;
1291 		if (func(rt, arg)) {
1292 			*pprev = dst->next;
1293 			dst_free(dst);
1294 		} else {
1295 			pprev = &dst->next;
1296 		}
1297 	}
1298 	spin_unlock_bh(&icmp6_dst_lock);
1299 }
1300 
1301 static int ip6_dst_gc(struct dst_ops *ops)
1302 {
1303 	unsigned long now = jiffies;
1304 	struct net *net = container_of(ops, struct net, ipv6.ip6_dst_ops);
1305 	int rt_min_interval = net->ipv6.sysctl.ip6_rt_gc_min_interval;
1306 	int rt_max_size = net->ipv6.sysctl.ip6_rt_max_size;
1307 	int rt_elasticity = net->ipv6.sysctl.ip6_rt_gc_elasticity;
1308 	int rt_gc_timeout = net->ipv6.sysctl.ip6_rt_gc_timeout;
1309 	unsigned long rt_last_gc = net->ipv6.ip6_rt_last_gc;
1310 	int entries;
1311 
1312 	entries = dst_entries_get_fast(ops);
1313 	if (time_after(rt_last_gc + rt_min_interval, now) &&
1314 	    entries <= rt_max_size)
1315 		goto out;
1316 
1317 	net->ipv6.ip6_rt_gc_expire++;
1318 	fib6_run_gc(net->ipv6.ip6_rt_gc_expire, net);
1319 	net->ipv6.ip6_rt_last_gc = now;
1320 	entries = dst_entries_get_slow(ops);
1321 	if (entries < ops->gc_thresh)
1322 		net->ipv6.ip6_rt_gc_expire = rt_gc_timeout>>1;
1323 out:
1324 	net->ipv6.ip6_rt_gc_expire -= net->ipv6.ip6_rt_gc_expire>>rt_elasticity;
1325 	return entries > rt_max_size;
1326 }
1327 
1328 /* Clean host part of a prefix. Not necessary in radix tree,
1329    but results in cleaner routing tables.
1330 
1331    Remove it only when all the things will work!
1332  */
1333 
1334 int ip6_dst_hoplimit(struct dst_entry *dst)
1335 {
1336 	int hoplimit = dst_metric_raw(dst, RTAX_HOPLIMIT);
1337 	if (hoplimit == 0) {
1338 		struct net_device *dev = dst->dev;
1339 		struct inet6_dev *idev;
1340 
1341 		rcu_read_lock();
1342 		idev = __in6_dev_get(dev);
1343 		if (idev)
1344 			hoplimit = idev->cnf.hop_limit;
1345 		else
1346 			hoplimit = dev_net(dev)->ipv6.devconf_all->hop_limit;
1347 		rcu_read_unlock();
1348 	}
1349 	return hoplimit;
1350 }
1351 EXPORT_SYMBOL(ip6_dst_hoplimit);
1352 
1353 /*
1354  *
1355  */
1356 
1357 int ip6_route_add(struct fib6_config *cfg)
1358 {
1359 	int err;
1360 	struct net *net = cfg->fc_nlinfo.nl_net;
1361 	struct rt6_info *rt = NULL;
1362 	struct net_device *dev = NULL;
1363 	struct inet6_dev *idev = NULL;
1364 	struct fib6_table *table;
1365 	int addr_type;
1366 
1367 	if (cfg->fc_dst_len > 128 || cfg->fc_src_len > 128)
1368 		return -EINVAL;
1369 #ifndef CONFIG_IPV6_SUBTREES
1370 	if (cfg->fc_src_len)
1371 		return -EINVAL;
1372 #endif
1373 	if (cfg->fc_ifindex) {
1374 		err = -ENODEV;
1375 		dev = dev_get_by_index(net, cfg->fc_ifindex);
1376 		if (!dev)
1377 			goto out;
1378 		idev = in6_dev_get(dev);
1379 		if (!idev)
1380 			goto out;
1381 	}
1382 
1383 	if (cfg->fc_metric == 0)
1384 		cfg->fc_metric = IP6_RT_PRIO_USER;
1385 
1386 	err = -ENOBUFS;
1387 	if (cfg->fc_nlinfo.nlh &&
1388 	    !(cfg->fc_nlinfo.nlh->nlmsg_flags & NLM_F_CREATE)) {
1389 		table = fib6_get_table(net, cfg->fc_table);
1390 		if (!table) {
1391 			pr_warn("NLM_F_CREATE should be specified when creating new route\n");
1392 			table = fib6_new_table(net, cfg->fc_table);
1393 		}
1394 	} else {
1395 		table = fib6_new_table(net, cfg->fc_table);
1396 	}
1397 
1398 	if (!table)
1399 		goto out;
1400 
1401 	rt = ip6_dst_alloc(net, NULL, DST_NOCOUNT, table);
1402 
1403 	if (!rt) {
1404 		err = -ENOMEM;
1405 		goto out;
1406 	}
1407 
1408 	if (cfg->fc_flags & RTF_EXPIRES)
1409 		rt6_set_expires(rt, jiffies +
1410 				clock_t_to_jiffies(cfg->fc_expires));
1411 	else
1412 		rt6_clean_expires(rt);
1413 
1414 	if (cfg->fc_protocol == RTPROT_UNSPEC)
1415 		cfg->fc_protocol = RTPROT_BOOT;
1416 	rt->rt6i_protocol = cfg->fc_protocol;
1417 
1418 	addr_type = ipv6_addr_type(&cfg->fc_dst);
1419 
1420 	if (addr_type & IPV6_ADDR_MULTICAST)
1421 		rt->dst.input = ip6_mc_input;
1422 	else if (cfg->fc_flags & RTF_LOCAL)
1423 		rt->dst.input = ip6_input;
1424 	else
1425 		rt->dst.input = ip6_forward;
1426 
1427 	rt->dst.output = ip6_output;
1428 
1429 	ipv6_addr_prefix(&rt->rt6i_dst.addr, &cfg->fc_dst, cfg->fc_dst_len);
1430 	rt->rt6i_dst.plen = cfg->fc_dst_len;
1431 	if (rt->rt6i_dst.plen == 128)
1432 	       rt->dst.flags |= DST_HOST;
1433 
1434 	if (!(rt->dst.flags & DST_HOST) && cfg->fc_mx) {
1435 		u32 *metrics = kzalloc(sizeof(u32) * RTAX_MAX, GFP_KERNEL);
1436 		if (!metrics) {
1437 			err = -ENOMEM;
1438 			goto out;
1439 		}
1440 		dst_init_metrics(&rt->dst, metrics, 0);
1441 	}
1442 #ifdef CONFIG_IPV6_SUBTREES
1443 	ipv6_addr_prefix(&rt->rt6i_src.addr, &cfg->fc_src, cfg->fc_src_len);
1444 	rt->rt6i_src.plen = cfg->fc_src_len;
1445 #endif
1446 
1447 	rt->rt6i_metric = cfg->fc_metric;
1448 
1449 	/* We cannot add true routes via loopback here,
1450 	   they would result in kernel looping; promote them to reject routes
1451 	 */
1452 	if ((cfg->fc_flags & RTF_REJECT) ||
1453 	    (dev && (dev->flags & IFF_LOOPBACK) &&
1454 	     !(addr_type & IPV6_ADDR_LOOPBACK) &&
1455 	     !(cfg->fc_flags & RTF_LOCAL))) {
1456 		/* hold loopback dev/idev if we haven't done so. */
1457 		if (dev != net->loopback_dev) {
1458 			if (dev) {
1459 				dev_put(dev);
1460 				in6_dev_put(idev);
1461 			}
1462 			dev = net->loopback_dev;
1463 			dev_hold(dev);
1464 			idev = in6_dev_get(dev);
1465 			if (!idev) {
1466 				err = -ENODEV;
1467 				goto out;
1468 			}
1469 		}
1470 		rt->dst.output = ip6_pkt_discard_out;
1471 		rt->dst.input = ip6_pkt_discard;
1472 		rt->dst.error = -ENETUNREACH;
1473 		rt->rt6i_flags = RTF_REJECT|RTF_NONEXTHOP;
1474 		goto install_route;
1475 	}
1476 
1477 	if (cfg->fc_flags & RTF_GATEWAY) {
1478 		const struct in6_addr *gw_addr;
1479 		int gwa_type;
1480 
1481 		gw_addr = &cfg->fc_gateway;
1482 		rt->rt6i_gateway = *gw_addr;
1483 		gwa_type = ipv6_addr_type(gw_addr);
1484 
1485 		if (gwa_type != (IPV6_ADDR_LINKLOCAL|IPV6_ADDR_UNICAST)) {
1486 			struct rt6_info *grt;
1487 
1488 			/* IPv6 strictly inhibits using not link-local
1489 			   addresses as nexthop address.
1490 			   Otherwise, router will not able to send redirects.
1491 			   It is very good, but in some (rare!) circumstances
1492 			   (SIT, PtP, NBMA NOARP links) it is handy to allow
1493 			   some exceptions. --ANK
1494 			 */
1495 			err = -EINVAL;
1496 			if (!(gwa_type & IPV6_ADDR_UNICAST))
1497 				goto out;
1498 
1499 			grt = rt6_lookup(net, gw_addr, NULL, cfg->fc_ifindex, 1);
1500 
1501 			err = -EHOSTUNREACH;
1502 			if (!grt)
1503 				goto out;
1504 			if (dev) {
1505 				if (dev != grt->dst.dev) {
1506 					dst_release(&grt->dst);
1507 					goto out;
1508 				}
1509 			} else {
1510 				dev = grt->dst.dev;
1511 				idev = grt->rt6i_idev;
1512 				dev_hold(dev);
1513 				in6_dev_hold(grt->rt6i_idev);
1514 			}
1515 			if (!(grt->rt6i_flags & RTF_GATEWAY))
1516 				err = 0;
1517 			dst_release(&grt->dst);
1518 
1519 			if (err)
1520 				goto out;
1521 		}
1522 		err = -EINVAL;
1523 		if (!dev || (dev->flags & IFF_LOOPBACK))
1524 			goto out;
1525 	}
1526 
1527 	err = -ENODEV;
1528 	if (!dev)
1529 		goto out;
1530 
1531 	if (!ipv6_addr_any(&cfg->fc_prefsrc)) {
1532 		if (!ipv6_chk_addr(net, &cfg->fc_prefsrc, dev, 0)) {
1533 			err = -EINVAL;
1534 			goto out;
1535 		}
1536 		rt->rt6i_prefsrc.addr = cfg->fc_prefsrc;
1537 		rt->rt6i_prefsrc.plen = 128;
1538 	} else
1539 		rt->rt6i_prefsrc.plen = 0;
1540 
1541 	if (cfg->fc_flags & (RTF_GATEWAY | RTF_NONEXTHOP)) {
1542 		err = rt6_bind_neighbour(rt, dev);
1543 		if (err)
1544 			goto out;
1545 	}
1546 
1547 	rt->rt6i_flags = cfg->fc_flags;
1548 
1549 install_route:
1550 	if (cfg->fc_mx) {
1551 		struct nlattr *nla;
1552 		int remaining;
1553 
1554 		nla_for_each_attr(nla, cfg->fc_mx, cfg->fc_mx_len, remaining) {
1555 			int type = nla_type(nla);
1556 
1557 			if (type) {
1558 				if (type > RTAX_MAX) {
1559 					err = -EINVAL;
1560 					goto out;
1561 				}
1562 
1563 				dst_metric_set(&rt->dst, type, nla_get_u32(nla));
1564 			}
1565 		}
1566 	}
1567 
1568 	rt->dst.dev = dev;
1569 	rt->rt6i_idev = idev;
1570 	rt->rt6i_table = table;
1571 
1572 	cfg->fc_nlinfo.nl_net = dev_net(dev);
1573 
1574 	return __ip6_ins_rt(rt, &cfg->fc_nlinfo);
1575 
1576 out:
1577 	if (dev)
1578 		dev_put(dev);
1579 	if (idev)
1580 		in6_dev_put(idev);
1581 	if (rt)
1582 		dst_free(&rt->dst);
1583 	return err;
1584 }
1585 
1586 static int __ip6_del_rt(struct rt6_info *rt, struct nl_info *info)
1587 {
1588 	int err;
1589 	struct fib6_table *table;
1590 	struct net *net = dev_net(rt->dst.dev);
1591 
1592 	if (rt == net->ipv6.ip6_null_entry)
1593 		return -ENOENT;
1594 
1595 	table = rt->rt6i_table;
1596 	write_lock_bh(&table->tb6_lock);
1597 
1598 	err = fib6_del(rt, info);
1599 	dst_release(&rt->dst);
1600 
1601 	write_unlock_bh(&table->tb6_lock);
1602 
1603 	return err;
1604 }
1605 
1606 int ip6_del_rt(struct rt6_info *rt)
1607 {
1608 	struct nl_info info = {
1609 		.nl_net = dev_net(rt->dst.dev),
1610 	};
1611 	return __ip6_del_rt(rt, &info);
1612 }
1613 
1614 static int ip6_route_del(struct fib6_config *cfg)
1615 {
1616 	struct fib6_table *table;
1617 	struct fib6_node *fn;
1618 	struct rt6_info *rt;
1619 	int err = -ESRCH;
1620 
1621 	table = fib6_get_table(cfg->fc_nlinfo.nl_net, cfg->fc_table);
1622 	if (!table)
1623 		return err;
1624 
1625 	read_lock_bh(&table->tb6_lock);
1626 
1627 	fn = fib6_locate(&table->tb6_root,
1628 			 &cfg->fc_dst, cfg->fc_dst_len,
1629 			 &cfg->fc_src, cfg->fc_src_len);
1630 
1631 	if (fn) {
1632 		for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
1633 			if (cfg->fc_ifindex &&
1634 			    (!rt->dst.dev ||
1635 			     rt->dst.dev->ifindex != cfg->fc_ifindex))
1636 				continue;
1637 			if (cfg->fc_flags & RTF_GATEWAY &&
1638 			    !ipv6_addr_equal(&cfg->fc_gateway, &rt->rt6i_gateway))
1639 				continue;
1640 			if (cfg->fc_metric && cfg->fc_metric != rt->rt6i_metric)
1641 				continue;
1642 			dst_hold(&rt->dst);
1643 			read_unlock_bh(&table->tb6_lock);
1644 
1645 			return __ip6_del_rt(rt, &cfg->fc_nlinfo);
1646 		}
1647 	}
1648 	read_unlock_bh(&table->tb6_lock);
1649 
1650 	return err;
1651 }
1652 
1653 static void rt6_do_redirect(struct dst_entry *dst, struct sock *sk, struct sk_buff *skb)
1654 {
1655 	struct net *net = dev_net(skb->dev);
1656 	struct netevent_redirect netevent;
1657 	struct rt6_info *rt, *nrt = NULL;
1658 	const struct in6_addr *target;
1659 	struct ndisc_options ndopts;
1660 	const struct in6_addr *dest;
1661 	struct neighbour *old_neigh;
1662 	struct inet6_dev *in6_dev;
1663 	struct neighbour *neigh;
1664 	struct icmp6hdr *icmph;
1665 	int optlen, on_link;
1666 	u8 *lladdr;
1667 
1668 	optlen = skb->tail - skb->transport_header;
1669 	optlen -= sizeof(struct icmp6hdr) + 2 * sizeof(struct in6_addr);
1670 
1671 	if (optlen < 0) {
1672 		net_dbg_ratelimited("rt6_do_redirect: packet too short\n");
1673 		return;
1674 	}
1675 
1676 	icmph = icmp6_hdr(skb);
1677 	target = (const struct in6_addr *) (icmph + 1);
1678 	dest = target + 1;
1679 
1680 	if (ipv6_addr_is_multicast(dest)) {
1681 		net_dbg_ratelimited("rt6_do_redirect: destination address is multicast\n");
1682 		return;
1683 	}
1684 
1685 	on_link = 0;
1686 	if (ipv6_addr_equal(dest, target)) {
1687 		on_link = 1;
1688 	} else if (ipv6_addr_type(target) !=
1689 		   (IPV6_ADDR_UNICAST|IPV6_ADDR_LINKLOCAL)) {
1690 		net_dbg_ratelimited("rt6_do_redirect: target address is not link-local unicast\n");
1691 		return;
1692 	}
1693 
1694 	in6_dev = __in6_dev_get(skb->dev);
1695 	if (!in6_dev)
1696 		return;
1697 	if (in6_dev->cnf.forwarding || !in6_dev->cnf.accept_redirects)
1698 		return;
1699 
1700 	/* RFC2461 8.1:
1701 	 *	The IP source address of the Redirect MUST be the same as the current
1702 	 *	first-hop router for the specified ICMP Destination Address.
1703 	 */
1704 
1705 	if (!ndisc_parse_options((u8*)(dest + 1), optlen, &ndopts)) {
1706 		net_dbg_ratelimited("rt6_redirect: invalid ND options\n");
1707 		return;
1708 	}
1709 
1710 	lladdr = NULL;
1711 	if (ndopts.nd_opts_tgt_lladdr) {
1712 		lladdr = ndisc_opt_addr_data(ndopts.nd_opts_tgt_lladdr,
1713 					     skb->dev);
1714 		if (!lladdr) {
1715 			net_dbg_ratelimited("rt6_redirect: invalid link-layer address length\n");
1716 			return;
1717 		}
1718 	}
1719 
1720 	rt = (struct rt6_info *) dst;
1721 	if (rt == net->ipv6.ip6_null_entry) {
1722 		net_dbg_ratelimited("rt6_redirect: source isn't a valid nexthop for redirect target\n");
1723 		return;
1724 	}
1725 
1726 	/* Redirect received -> path was valid.
1727 	 * Look, redirects are sent only in response to data packets,
1728 	 * so that this nexthop apparently is reachable. --ANK
1729 	 */
1730 	dst_confirm(&rt->dst);
1731 
1732 	neigh = __neigh_lookup(&nd_tbl, target, skb->dev, 1);
1733 	if (!neigh)
1734 		return;
1735 
1736 	/* Duplicate redirect: silently ignore. */
1737 	old_neigh = rt->n;
1738 	if (neigh == old_neigh)
1739 		goto out;
1740 
1741 	/*
1742 	 *	We have finally decided to accept it.
1743 	 */
1744 
1745 	neigh_update(neigh, lladdr, NUD_STALE,
1746 		     NEIGH_UPDATE_F_WEAK_OVERRIDE|
1747 		     NEIGH_UPDATE_F_OVERRIDE|
1748 		     (on_link ? 0 : (NEIGH_UPDATE_F_OVERRIDE_ISROUTER|
1749 				     NEIGH_UPDATE_F_ISROUTER))
1750 		     );
1751 
1752 	nrt = ip6_rt_copy(rt, dest);
1753 	if (!nrt)
1754 		goto out;
1755 
1756 	nrt->rt6i_flags = RTF_GATEWAY|RTF_UP|RTF_DYNAMIC|RTF_CACHE;
1757 	if (on_link)
1758 		nrt->rt6i_flags &= ~RTF_GATEWAY;
1759 
1760 	nrt->rt6i_gateway = *(struct in6_addr *)neigh->primary_key;
1761 	nrt->n = neigh_clone(neigh);
1762 
1763 	if (ip6_ins_rt(nrt))
1764 		goto out;
1765 
1766 	netevent.old = &rt->dst;
1767 	netevent.old_neigh = old_neigh;
1768 	netevent.new = &nrt->dst;
1769 	netevent.new_neigh = neigh;
1770 	netevent.daddr = dest;
1771 	call_netevent_notifiers(NETEVENT_REDIRECT, &netevent);
1772 
1773 	if (rt->rt6i_flags & RTF_CACHE) {
1774 		rt = (struct rt6_info *) dst_clone(&rt->dst);
1775 		ip6_del_rt(rt);
1776 	}
1777 
1778 out:
1779 	neigh_release(neigh);
1780 }
1781 
1782 /*
1783  *	Misc support functions
1784  */
1785 
1786 static struct rt6_info *ip6_rt_copy(struct rt6_info *ort,
1787 				    const struct in6_addr *dest)
1788 {
1789 	struct net *net = dev_net(ort->dst.dev);
1790 	struct rt6_info *rt = ip6_dst_alloc(net, ort->dst.dev, 0,
1791 					    ort->rt6i_table);
1792 
1793 	if (rt) {
1794 		rt->dst.input = ort->dst.input;
1795 		rt->dst.output = ort->dst.output;
1796 		rt->dst.flags |= DST_HOST;
1797 
1798 		rt->rt6i_dst.addr = *dest;
1799 		rt->rt6i_dst.plen = 128;
1800 		dst_copy_metrics(&rt->dst, &ort->dst);
1801 		rt->dst.error = ort->dst.error;
1802 		rt->rt6i_idev = ort->rt6i_idev;
1803 		if (rt->rt6i_idev)
1804 			in6_dev_hold(rt->rt6i_idev);
1805 		rt->dst.lastuse = jiffies;
1806 
1807 		rt->rt6i_gateway = ort->rt6i_gateway;
1808 		rt->rt6i_flags = ort->rt6i_flags;
1809 		if ((ort->rt6i_flags & (RTF_DEFAULT | RTF_ADDRCONF)) ==
1810 		    (RTF_DEFAULT | RTF_ADDRCONF))
1811 			rt6_set_from(rt, ort);
1812 		else
1813 			rt6_clean_expires(rt);
1814 		rt->rt6i_metric = 0;
1815 
1816 #ifdef CONFIG_IPV6_SUBTREES
1817 		memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
1818 #endif
1819 		memcpy(&rt->rt6i_prefsrc, &ort->rt6i_prefsrc, sizeof(struct rt6key));
1820 		rt->rt6i_table = ort->rt6i_table;
1821 	}
1822 	return rt;
1823 }
1824 
1825 #ifdef CONFIG_IPV6_ROUTE_INFO
1826 static struct rt6_info *rt6_get_route_info(struct net *net,
1827 					   const struct in6_addr *prefix, int prefixlen,
1828 					   const struct in6_addr *gwaddr, int ifindex)
1829 {
1830 	struct fib6_node *fn;
1831 	struct rt6_info *rt = NULL;
1832 	struct fib6_table *table;
1833 
1834 	table = fib6_get_table(net, RT6_TABLE_INFO);
1835 	if (!table)
1836 		return NULL;
1837 
1838 	write_lock_bh(&table->tb6_lock);
1839 	fn = fib6_locate(&table->tb6_root, prefix ,prefixlen, NULL, 0);
1840 	if (!fn)
1841 		goto out;
1842 
1843 	for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
1844 		if (rt->dst.dev->ifindex != ifindex)
1845 			continue;
1846 		if ((rt->rt6i_flags & (RTF_ROUTEINFO|RTF_GATEWAY)) != (RTF_ROUTEINFO|RTF_GATEWAY))
1847 			continue;
1848 		if (!ipv6_addr_equal(&rt->rt6i_gateway, gwaddr))
1849 			continue;
1850 		dst_hold(&rt->dst);
1851 		break;
1852 	}
1853 out:
1854 	write_unlock_bh(&table->tb6_lock);
1855 	return rt;
1856 }
1857 
1858 static struct rt6_info *rt6_add_route_info(struct net *net,
1859 					   const struct in6_addr *prefix, int prefixlen,
1860 					   const struct in6_addr *gwaddr, int ifindex,
1861 					   unsigned int pref)
1862 {
1863 	struct fib6_config cfg = {
1864 		.fc_table	= RT6_TABLE_INFO,
1865 		.fc_metric	= IP6_RT_PRIO_USER,
1866 		.fc_ifindex	= ifindex,
1867 		.fc_dst_len	= prefixlen,
1868 		.fc_flags	= RTF_GATEWAY | RTF_ADDRCONF | RTF_ROUTEINFO |
1869 				  RTF_UP | RTF_PREF(pref),
1870 		.fc_nlinfo.pid = 0,
1871 		.fc_nlinfo.nlh = NULL,
1872 		.fc_nlinfo.nl_net = net,
1873 	};
1874 
1875 	cfg.fc_dst = *prefix;
1876 	cfg.fc_gateway = *gwaddr;
1877 
1878 	/* We should treat it as a default route if prefix length is 0. */
1879 	if (!prefixlen)
1880 		cfg.fc_flags |= RTF_DEFAULT;
1881 
1882 	ip6_route_add(&cfg);
1883 
1884 	return rt6_get_route_info(net, prefix, prefixlen, gwaddr, ifindex);
1885 }
1886 #endif
1887 
1888 struct rt6_info *rt6_get_dflt_router(const struct in6_addr *addr, struct net_device *dev)
1889 {
1890 	struct rt6_info *rt;
1891 	struct fib6_table *table;
1892 
1893 	table = fib6_get_table(dev_net(dev), RT6_TABLE_DFLT);
1894 	if (!table)
1895 		return NULL;
1896 
1897 	write_lock_bh(&table->tb6_lock);
1898 	for (rt = table->tb6_root.leaf; rt; rt=rt->dst.rt6_next) {
1899 		if (dev == rt->dst.dev &&
1900 		    ((rt->rt6i_flags & (RTF_ADDRCONF | RTF_DEFAULT)) == (RTF_ADDRCONF | RTF_DEFAULT)) &&
1901 		    ipv6_addr_equal(&rt->rt6i_gateway, addr))
1902 			break;
1903 	}
1904 	if (rt)
1905 		dst_hold(&rt->dst);
1906 	write_unlock_bh(&table->tb6_lock);
1907 	return rt;
1908 }
1909 
1910 struct rt6_info *rt6_add_dflt_router(const struct in6_addr *gwaddr,
1911 				     struct net_device *dev,
1912 				     unsigned int pref)
1913 {
1914 	struct fib6_config cfg = {
1915 		.fc_table	= RT6_TABLE_DFLT,
1916 		.fc_metric	= IP6_RT_PRIO_USER,
1917 		.fc_ifindex	= dev->ifindex,
1918 		.fc_flags	= RTF_GATEWAY | RTF_ADDRCONF | RTF_DEFAULT |
1919 				  RTF_UP | RTF_EXPIRES | RTF_PREF(pref),
1920 		.fc_nlinfo.pid = 0,
1921 		.fc_nlinfo.nlh = NULL,
1922 		.fc_nlinfo.nl_net = dev_net(dev),
1923 	};
1924 
1925 	cfg.fc_gateway = *gwaddr;
1926 
1927 	ip6_route_add(&cfg);
1928 
1929 	return rt6_get_dflt_router(gwaddr, dev);
1930 }
1931 
1932 void rt6_purge_dflt_routers(struct net *net)
1933 {
1934 	struct rt6_info *rt;
1935 	struct fib6_table *table;
1936 
1937 	/* NOTE: Keep consistent with rt6_get_dflt_router */
1938 	table = fib6_get_table(net, RT6_TABLE_DFLT);
1939 	if (!table)
1940 		return;
1941 
1942 restart:
1943 	read_lock_bh(&table->tb6_lock);
1944 	for (rt = table->tb6_root.leaf; rt; rt = rt->dst.rt6_next) {
1945 		if (rt->rt6i_flags & (RTF_DEFAULT | RTF_ADDRCONF)) {
1946 			dst_hold(&rt->dst);
1947 			read_unlock_bh(&table->tb6_lock);
1948 			ip6_del_rt(rt);
1949 			goto restart;
1950 		}
1951 	}
1952 	read_unlock_bh(&table->tb6_lock);
1953 }
1954 
1955 static void rtmsg_to_fib6_config(struct net *net,
1956 				 struct in6_rtmsg *rtmsg,
1957 				 struct fib6_config *cfg)
1958 {
1959 	memset(cfg, 0, sizeof(*cfg));
1960 
1961 	cfg->fc_table = RT6_TABLE_MAIN;
1962 	cfg->fc_ifindex = rtmsg->rtmsg_ifindex;
1963 	cfg->fc_metric = rtmsg->rtmsg_metric;
1964 	cfg->fc_expires = rtmsg->rtmsg_info;
1965 	cfg->fc_dst_len = rtmsg->rtmsg_dst_len;
1966 	cfg->fc_src_len = rtmsg->rtmsg_src_len;
1967 	cfg->fc_flags = rtmsg->rtmsg_flags;
1968 
1969 	cfg->fc_nlinfo.nl_net = net;
1970 
1971 	cfg->fc_dst = rtmsg->rtmsg_dst;
1972 	cfg->fc_src = rtmsg->rtmsg_src;
1973 	cfg->fc_gateway = rtmsg->rtmsg_gateway;
1974 }
1975 
1976 int ipv6_route_ioctl(struct net *net, unsigned int cmd, void __user *arg)
1977 {
1978 	struct fib6_config cfg;
1979 	struct in6_rtmsg rtmsg;
1980 	int err;
1981 
1982 	switch(cmd) {
1983 	case SIOCADDRT:		/* Add a route */
1984 	case SIOCDELRT:		/* Delete a route */
1985 		if (!capable(CAP_NET_ADMIN))
1986 			return -EPERM;
1987 		err = copy_from_user(&rtmsg, arg,
1988 				     sizeof(struct in6_rtmsg));
1989 		if (err)
1990 			return -EFAULT;
1991 
1992 		rtmsg_to_fib6_config(net, &rtmsg, &cfg);
1993 
1994 		rtnl_lock();
1995 		switch (cmd) {
1996 		case SIOCADDRT:
1997 			err = ip6_route_add(&cfg);
1998 			break;
1999 		case SIOCDELRT:
2000 			err = ip6_route_del(&cfg);
2001 			break;
2002 		default:
2003 			err = -EINVAL;
2004 		}
2005 		rtnl_unlock();
2006 
2007 		return err;
2008 	}
2009 
2010 	return -EINVAL;
2011 }
2012 
2013 /*
2014  *	Drop the packet on the floor
2015  */
2016 
2017 static int ip6_pkt_drop(struct sk_buff *skb, u8 code, int ipstats_mib_noroutes)
2018 {
2019 	int type;
2020 	struct dst_entry *dst = skb_dst(skb);
2021 	switch (ipstats_mib_noroutes) {
2022 	case IPSTATS_MIB_INNOROUTES:
2023 		type = ipv6_addr_type(&ipv6_hdr(skb)->daddr);
2024 		if (type == IPV6_ADDR_ANY) {
2025 			IP6_INC_STATS(dev_net(dst->dev), ip6_dst_idev(dst),
2026 				      IPSTATS_MIB_INADDRERRORS);
2027 			break;
2028 		}
2029 		/* FALLTHROUGH */
2030 	case IPSTATS_MIB_OUTNOROUTES:
2031 		IP6_INC_STATS(dev_net(dst->dev), ip6_dst_idev(dst),
2032 			      ipstats_mib_noroutes);
2033 		break;
2034 	}
2035 	icmpv6_send(skb, ICMPV6_DEST_UNREACH, code, 0);
2036 	kfree_skb(skb);
2037 	return 0;
2038 }
2039 
2040 static int ip6_pkt_discard(struct sk_buff *skb)
2041 {
2042 	return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_INNOROUTES);
2043 }
2044 
2045 static int ip6_pkt_discard_out(struct sk_buff *skb)
2046 {
2047 	skb->dev = skb_dst(skb)->dev;
2048 	return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_OUTNOROUTES);
2049 }
2050 
2051 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
2052 
2053 static int ip6_pkt_prohibit(struct sk_buff *skb)
2054 {
2055 	return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_INNOROUTES);
2056 }
2057 
2058 static int ip6_pkt_prohibit_out(struct sk_buff *skb)
2059 {
2060 	skb->dev = skb_dst(skb)->dev;
2061 	return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_OUTNOROUTES);
2062 }
2063 
2064 #endif
2065 
2066 /*
2067  *	Allocate a dst for local (unicast / anycast) address.
2068  */
2069 
2070 struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev,
2071 				    const struct in6_addr *addr,
2072 				    bool anycast)
2073 {
2074 	struct net *net = dev_net(idev->dev);
2075 	struct rt6_info *rt = ip6_dst_alloc(net, net->loopback_dev, 0, NULL);
2076 	int err;
2077 
2078 	if (!rt) {
2079 		net_warn_ratelimited("Maximum number of routes reached, consider increasing route/max_size\n");
2080 		return ERR_PTR(-ENOMEM);
2081 	}
2082 
2083 	in6_dev_hold(idev);
2084 
2085 	rt->dst.flags |= DST_HOST;
2086 	rt->dst.input = ip6_input;
2087 	rt->dst.output = ip6_output;
2088 	rt->rt6i_idev = idev;
2089 
2090 	rt->rt6i_flags = RTF_UP | RTF_NONEXTHOP;
2091 	if (anycast)
2092 		rt->rt6i_flags |= RTF_ANYCAST;
2093 	else
2094 		rt->rt6i_flags |= RTF_LOCAL;
2095 	err = rt6_bind_neighbour(rt, rt->dst.dev);
2096 	if (err) {
2097 		dst_free(&rt->dst);
2098 		return ERR_PTR(err);
2099 	}
2100 
2101 	rt->rt6i_dst.addr = *addr;
2102 	rt->rt6i_dst.plen = 128;
2103 	rt->rt6i_table = fib6_get_table(net, RT6_TABLE_LOCAL);
2104 
2105 	atomic_set(&rt->dst.__refcnt, 1);
2106 
2107 	return rt;
2108 }
2109 
2110 int ip6_route_get_saddr(struct net *net,
2111 			struct rt6_info *rt,
2112 			const struct in6_addr *daddr,
2113 			unsigned int prefs,
2114 			struct in6_addr *saddr)
2115 {
2116 	struct inet6_dev *idev = ip6_dst_idev((struct dst_entry*)rt);
2117 	int err = 0;
2118 	if (rt->rt6i_prefsrc.plen)
2119 		*saddr = rt->rt6i_prefsrc.addr;
2120 	else
2121 		err = ipv6_dev_get_saddr(net, idev ? idev->dev : NULL,
2122 					 daddr, prefs, saddr);
2123 	return err;
2124 }
2125 
2126 /* remove deleted ip from prefsrc entries */
2127 struct arg_dev_net_ip {
2128 	struct net_device *dev;
2129 	struct net *net;
2130 	struct in6_addr *addr;
2131 };
2132 
2133 static int fib6_remove_prefsrc(struct rt6_info *rt, void *arg)
2134 {
2135 	struct net_device *dev = ((struct arg_dev_net_ip *)arg)->dev;
2136 	struct net *net = ((struct arg_dev_net_ip *)arg)->net;
2137 	struct in6_addr *addr = ((struct arg_dev_net_ip *)arg)->addr;
2138 
2139 	if (((void *)rt->dst.dev == dev || !dev) &&
2140 	    rt != net->ipv6.ip6_null_entry &&
2141 	    ipv6_addr_equal(addr, &rt->rt6i_prefsrc.addr)) {
2142 		/* remove prefsrc entry */
2143 		rt->rt6i_prefsrc.plen = 0;
2144 	}
2145 	return 0;
2146 }
2147 
2148 void rt6_remove_prefsrc(struct inet6_ifaddr *ifp)
2149 {
2150 	struct net *net = dev_net(ifp->idev->dev);
2151 	struct arg_dev_net_ip adni = {
2152 		.dev = ifp->idev->dev,
2153 		.net = net,
2154 		.addr = &ifp->addr,
2155 	};
2156 	fib6_clean_all(net, fib6_remove_prefsrc, 0, &adni);
2157 }
2158 
2159 struct arg_dev_net {
2160 	struct net_device *dev;
2161 	struct net *net;
2162 };
2163 
2164 static int fib6_ifdown(struct rt6_info *rt, void *arg)
2165 {
2166 	const struct arg_dev_net *adn = arg;
2167 	const struct net_device *dev = adn->dev;
2168 
2169 	if ((rt->dst.dev == dev || !dev) &&
2170 	    rt != adn->net->ipv6.ip6_null_entry)
2171 		return -1;
2172 
2173 	return 0;
2174 }
2175 
2176 void rt6_ifdown(struct net *net, struct net_device *dev)
2177 {
2178 	struct arg_dev_net adn = {
2179 		.dev = dev,
2180 		.net = net,
2181 	};
2182 
2183 	fib6_clean_all(net, fib6_ifdown, 0, &adn);
2184 	icmp6_clean_all(fib6_ifdown, &adn);
2185 }
2186 
2187 struct rt6_mtu_change_arg {
2188 	struct net_device *dev;
2189 	unsigned int mtu;
2190 };
2191 
2192 static int rt6_mtu_change_route(struct rt6_info *rt, void *p_arg)
2193 {
2194 	struct rt6_mtu_change_arg *arg = (struct rt6_mtu_change_arg *) p_arg;
2195 	struct inet6_dev *idev;
2196 
2197 	/* In IPv6 pmtu discovery is not optional,
2198 	   so that RTAX_MTU lock cannot disable it.
2199 	   We still use this lock to block changes
2200 	   caused by addrconf/ndisc.
2201 	*/
2202 
2203 	idev = __in6_dev_get(arg->dev);
2204 	if (!idev)
2205 		return 0;
2206 
2207 	/* For administrative MTU increase, there is no way to discover
2208 	   IPv6 PMTU increase, so PMTU increase should be updated here.
2209 	   Since RFC 1981 doesn't include administrative MTU increase
2210 	   update PMTU increase is a MUST. (i.e. jumbo frame)
2211 	 */
2212 	/*
2213 	   If new MTU is less than route PMTU, this new MTU will be the
2214 	   lowest MTU in the path, update the route PMTU to reflect PMTU
2215 	   decreases; if new MTU is greater than route PMTU, and the
2216 	   old MTU is the lowest MTU in the path, update the route PMTU
2217 	   to reflect the increase. In this case if the other nodes' MTU
2218 	   also have the lowest MTU, TOO BIG MESSAGE will be lead to
2219 	   PMTU discouvery.
2220 	 */
2221 	if (rt->dst.dev == arg->dev &&
2222 	    !dst_metric_locked(&rt->dst, RTAX_MTU) &&
2223 	    (dst_mtu(&rt->dst) >= arg->mtu ||
2224 	     (dst_mtu(&rt->dst) < arg->mtu &&
2225 	      dst_mtu(&rt->dst) == idev->cnf.mtu6))) {
2226 		dst_metric_set(&rt->dst, RTAX_MTU, arg->mtu);
2227 	}
2228 	return 0;
2229 }
2230 
2231 void rt6_mtu_change(struct net_device *dev, unsigned int mtu)
2232 {
2233 	struct rt6_mtu_change_arg arg = {
2234 		.dev = dev,
2235 		.mtu = mtu,
2236 	};
2237 
2238 	fib6_clean_all(dev_net(dev), rt6_mtu_change_route, 0, &arg);
2239 }
2240 
2241 static const struct nla_policy rtm_ipv6_policy[RTA_MAX+1] = {
2242 	[RTA_GATEWAY]           = { .len = sizeof(struct in6_addr) },
2243 	[RTA_OIF]               = { .type = NLA_U32 },
2244 	[RTA_IIF]		= { .type = NLA_U32 },
2245 	[RTA_PRIORITY]          = { .type = NLA_U32 },
2246 	[RTA_METRICS]           = { .type = NLA_NESTED },
2247 };
2248 
2249 static int rtm_to_fib6_config(struct sk_buff *skb, struct nlmsghdr *nlh,
2250 			      struct fib6_config *cfg)
2251 {
2252 	struct rtmsg *rtm;
2253 	struct nlattr *tb[RTA_MAX+1];
2254 	int err;
2255 
2256 	err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy);
2257 	if (err < 0)
2258 		goto errout;
2259 
2260 	err = -EINVAL;
2261 	rtm = nlmsg_data(nlh);
2262 	memset(cfg, 0, sizeof(*cfg));
2263 
2264 	cfg->fc_table = rtm->rtm_table;
2265 	cfg->fc_dst_len = rtm->rtm_dst_len;
2266 	cfg->fc_src_len = rtm->rtm_src_len;
2267 	cfg->fc_flags = RTF_UP;
2268 	cfg->fc_protocol = rtm->rtm_protocol;
2269 
2270 	if (rtm->rtm_type == RTN_UNREACHABLE)
2271 		cfg->fc_flags |= RTF_REJECT;
2272 
2273 	if (rtm->rtm_type == RTN_LOCAL)
2274 		cfg->fc_flags |= RTF_LOCAL;
2275 
2276 	cfg->fc_nlinfo.pid = NETLINK_CB(skb).pid;
2277 	cfg->fc_nlinfo.nlh = nlh;
2278 	cfg->fc_nlinfo.nl_net = sock_net(skb->sk);
2279 
2280 	if (tb[RTA_GATEWAY]) {
2281 		nla_memcpy(&cfg->fc_gateway, tb[RTA_GATEWAY], 16);
2282 		cfg->fc_flags |= RTF_GATEWAY;
2283 	}
2284 
2285 	if (tb[RTA_DST]) {
2286 		int plen = (rtm->rtm_dst_len + 7) >> 3;
2287 
2288 		if (nla_len(tb[RTA_DST]) < plen)
2289 			goto errout;
2290 
2291 		nla_memcpy(&cfg->fc_dst, tb[RTA_DST], plen);
2292 	}
2293 
2294 	if (tb[RTA_SRC]) {
2295 		int plen = (rtm->rtm_src_len + 7) >> 3;
2296 
2297 		if (nla_len(tb[RTA_SRC]) < plen)
2298 			goto errout;
2299 
2300 		nla_memcpy(&cfg->fc_src, tb[RTA_SRC], plen);
2301 	}
2302 
2303 	if (tb[RTA_PREFSRC])
2304 		nla_memcpy(&cfg->fc_prefsrc, tb[RTA_PREFSRC], 16);
2305 
2306 	if (tb[RTA_OIF])
2307 		cfg->fc_ifindex = nla_get_u32(tb[RTA_OIF]);
2308 
2309 	if (tb[RTA_PRIORITY])
2310 		cfg->fc_metric = nla_get_u32(tb[RTA_PRIORITY]);
2311 
2312 	if (tb[RTA_METRICS]) {
2313 		cfg->fc_mx = nla_data(tb[RTA_METRICS]);
2314 		cfg->fc_mx_len = nla_len(tb[RTA_METRICS]);
2315 	}
2316 
2317 	if (tb[RTA_TABLE])
2318 		cfg->fc_table = nla_get_u32(tb[RTA_TABLE]);
2319 
2320 	err = 0;
2321 errout:
2322 	return err;
2323 }
2324 
2325 static int inet6_rtm_delroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
2326 {
2327 	struct fib6_config cfg;
2328 	int err;
2329 
2330 	err = rtm_to_fib6_config(skb, nlh, &cfg);
2331 	if (err < 0)
2332 		return err;
2333 
2334 	return ip6_route_del(&cfg);
2335 }
2336 
2337 static int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
2338 {
2339 	struct fib6_config cfg;
2340 	int err;
2341 
2342 	err = rtm_to_fib6_config(skb, nlh, &cfg);
2343 	if (err < 0)
2344 		return err;
2345 
2346 	return ip6_route_add(&cfg);
2347 }
2348 
2349 static inline size_t rt6_nlmsg_size(void)
2350 {
2351 	return NLMSG_ALIGN(sizeof(struct rtmsg))
2352 	       + nla_total_size(16) /* RTA_SRC */
2353 	       + nla_total_size(16) /* RTA_DST */
2354 	       + nla_total_size(16) /* RTA_GATEWAY */
2355 	       + nla_total_size(16) /* RTA_PREFSRC */
2356 	       + nla_total_size(4) /* RTA_TABLE */
2357 	       + nla_total_size(4) /* RTA_IIF */
2358 	       + nla_total_size(4) /* RTA_OIF */
2359 	       + nla_total_size(4) /* RTA_PRIORITY */
2360 	       + RTAX_MAX * nla_total_size(4) /* RTA_METRICS */
2361 	       + nla_total_size(sizeof(struct rta_cacheinfo));
2362 }
2363 
2364 static int rt6_fill_node(struct net *net,
2365 			 struct sk_buff *skb, struct rt6_info *rt,
2366 			 struct in6_addr *dst, struct in6_addr *src,
2367 			 int iif, int type, u32 pid, u32 seq,
2368 			 int prefix, int nowait, unsigned int flags)
2369 {
2370 	struct rtmsg *rtm;
2371 	struct nlmsghdr *nlh;
2372 	long expires;
2373 	u32 table;
2374 	struct neighbour *n;
2375 
2376 	if (prefix) {	/* user wants prefix routes only */
2377 		if (!(rt->rt6i_flags & RTF_PREFIX_RT)) {
2378 			/* success since this is not a prefix route */
2379 			return 1;
2380 		}
2381 	}
2382 
2383 	nlh = nlmsg_put(skb, pid, seq, type, sizeof(*rtm), flags);
2384 	if (!nlh)
2385 		return -EMSGSIZE;
2386 
2387 	rtm = nlmsg_data(nlh);
2388 	rtm->rtm_family = AF_INET6;
2389 	rtm->rtm_dst_len = rt->rt6i_dst.plen;
2390 	rtm->rtm_src_len = rt->rt6i_src.plen;
2391 	rtm->rtm_tos = 0;
2392 	if (rt->rt6i_table)
2393 		table = rt->rt6i_table->tb6_id;
2394 	else
2395 		table = RT6_TABLE_UNSPEC;
2396 	rtm->rtm_table = table;
2397 	if (nla_put_u32(skb, RTA_TABLE, table))
2398 		goto nla_put_failure;
2399 	if (rt->rt6i_flags & RTF_REJECT)
2400 		rtm->rtm_type = RTN_UNREACHABLE;
2401 	else if (rt->rt6i_flags & RTF_LOCAL)
2402 		rtm->rtm_type = RTN_LOCAL;
2403 	else if (rt->dst.dev && (rt->dst.dev->flags & IFF_LOOPBACK))
2404 		rtm->rtm_type = RTN_LOCAL;
2405 	else
2406 		rtm->rtm_type = RTN_UNICAST;
2407 	rtm->rtm_flags = 0;
2408 	rtm->rtm_scope = RT_SCOPE_UNIVERSE;
2409 	rtm->rtm_protocol = rt->rt6i_protocol;
2410 	if (rt->rt6i_flags & RTF_DYNAMIC)
2411 		rtm->rtm_protocol = RTPROT_REDIRECT;
2412 	else if (rt->rt6i_flags & RTF_ADDRCONF) {
2413 		if (rt->rt6i_flags & (RTF_DEFAULT | RTF_ROUTEINFO))
2414 			rtm->rtm_protocol = RTPROT_RA;
2415 		else
2416 			rtm->rtm_protocol = RTPROT_KERNEL;
2417 	}
2418 
2419 	if (rt->rt6i_flags & RTF_CACHE)
2420 		rtm->rtm_flags |= RTM_F_CLONED;
2421 
2422 	if (dst) {
2423 		if (nla_put(skb, RTA_DST, 16, dst))
2424 			goto nla_put_failure;
2425 		rtm->rtm_dst_len = 128;
2426 	} else if (rtm->rtm_dst_len)
2427 		if (nla_put(skb, RTA_DST, 16, &rt->rt6i_dst.addr))
2428 			goto nla_put_failure;
2429 #ifdef CONFIG_IPV6_SUBTREES
2430 	if (src) {
2431 		if (nla_put(skb, RTA_SRC, 16, src))
2432 			goto nla_put_failure;
2433 		rtm->rtm_src_len = 128;
2434 	} else if (rtm->rtm_src_len &&
2435 		   nla_put(skb, RTA_SRC, 16, &rt->rt6i_src.addr))
2436 		goto nla_put_failure;
2437 #endif
2438 	if (iif) {
2439 #ifdef CONFIG_IPV6_MROUTE
2440 		if (ipv6_addr_is_multicast(&rt->rt6i_dst.addr)) {
2441 			int err = ip6mr_get_route(net, skb, rtm, nowait);
2442 			if (err <= 0) {
2443 				if (!nowait) {
2444 					if (err == 0)
2445 						return 0;
2446 					goto nla_put_failure;
2447 				} else {
2448 					if (err == -EMSGSIZE)
2449 						goto nla_put_failure;
2450 				}
2451 			}
2452 		} else
2453 #endif
2454 			if (nla_put_u32(skb, RTA_IIF, iif))
2455 				goto nla_put_failure;
2456 	} else if (dst) {
2457 		struct in6_addr saddr_buf;
2458 		if (ip6_route_get_saddr(net, rt, dst, 0, &saddr_buf) == 0 &&
2459 		    nla_put(skb, RTA_PREFSRC, 16, &saddr_buf))
2460 			goto nla_put_failure;
2461 	}
2462 
2463 	if (rt->rt6i_prefsrc.plen) {
2464 		struct in6_addr saddr_buf;
2465 		saddr_buf = rt->rt6i_prefsrc.addr;
2466 		if (nla_put(skb, RTA_PREFSRC, 16, &saddr_buf))
2467 			goto nla_put_failure;
2468 	}
2469 
2470 	if (rtnetlink_put_metrics(skb, dst_metrics_ptr(&rt->dst)) < 0)
2471 		goto nla_put_failure;
2472 
2473 	rcu_read_lock();
2474 	n = rt->n;
2475 	if (n) {
2476 		if (nla_put(skb, RTA_GATEWAY, 16, &n->primary_key) < 0) {
2477 			rcu_read_unlock();
2478 			goto nla_put_failure;
2479 		}
2480 	}
2481 	rcu_read_unlock();
2482 
2483 	if (rt->dst.dev &&
2484 	    nla_put_u32(skb, RTA_OIF, rt->dst.dev->ifindex))
2485 		goto nla_put_failure;
2486 	if (nla_put_u32(skb, RTA_PRIORITY, rt->rt6i_metric))
2487 		goto nla_put_failure;
2488 
2489 	expires = (rt->rt6i_flags & RTF_EXPIRES) ? rt->dst.expires - jiffies : 0;
2490 
2491 	if (rtnl_put_cacheinfo(skb, &rt->dst, 0, expires, rt->dst.error) < 0)
2492 		goto nla_put_failure;
2493 
2494 	return nlmsg_end(skb, nlh);
2495 
2496 nla_put_failure:
2497 	nlmsg_cancel(skb, nlh);
2498 	return -EMSGSIZE;
2499 }
2500 
2501 int rt6_dump_route(struct rt6_info *rt, void *p_arg)
2502 {
2503 	struct rt6_rtnl_dump_arg *arg = (struct rt6_rtnl_dump_arg *) p_arg;
2504 	int prefix;
2505 
2506 	if (nlmsg_len(arg->cb->nlh) >= sizeof(struct rtmsg)) {
2507 		struct rtmsg *rtm = nlmsg_data(arg->cb->nlh);
2508 		prefix = (rtm->rtm_flags & RTM_F_PREFIX) != 0;
2509 	} else
2510 		prefix = 0;
2511 
2512 	return rt6_fill_node(arg->net,
2513 		     arg->skb, rt, NULL, NULL, 0, RTM_NEWROUTE,
2514 		     NETLINK_CB(arg->cb->skb).pid, arg->cb->nlh->nlmsg_seq,
2515 		     prefix, 0, NLM_F_MULTI);
2516 }
2517 
2518 static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void *arg)
2519 {
2520 	struct net *net = sock_net(in_skb->sk);
2521 	struct nlattr *tb[RTA_MAX+1];
2522 	struct rt6_info *rt;
2523 	struct sk_buff *skb;
2524 	struct rtmsg *rtm;
2525 	struct flowi6 fl6;
2526 	int err, iif = 0, oif = 0;
2527 
2528 	err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy);
2529 	if (err < 0)
2530 		goto errout;
2531 
2532 	err = -EINVAL;
2533 	memset(&fl6, 0, sizeof(fl6));
2534 
2535 	if (tb[RTA_SRC]) {
2536 		if (nla_len(tb[RTA_SRC]) < sizeof(struct in6_addr))
2537 			goto errout;
2538 
2539 		fl6.saddr = *(struct in6_addr *)nla_data(tb[RTA_SRC]);
2540 	}
2541 
2542 	if (tb[RTA_DST]) {
2543 		if (nla_len(tb[RTA_DST]) < sizeof(struct in6_addr))
2544 			goto errout;
2545 
2546 		fl6.daddr = *(struct in6_addr *)nla_data(tb[RTA_DST]);
2547 	}
2548 
2549 	if (tb[RTA_IIF])
2550 		iif = nla_get_u32(tb[RTA_IIF]);
2551 
2552 	if (tb[RTA_OIF])
2553 		oif = nla_get_u32(tb[RTA_OIF]);
2554 
2555 	if (iif) {
2556 		struct net_device *dev;
2557 		int flags = 0;
2558 
2559 		dev = __dev_get_by_index(net, iif);
2560 		if (!dev) {
2561 			err = -ENODEV;
2562 			goto errout;
2563 		}
2564 
2565 		fl6.flowi6_iif = iif;
2566 
2567 		if (!ipv6_addr_any(&fl6.saddr))
2568 			flags |= RT6_LOOKUP_F_HAS_SADDR;
2569 
2570 		rt = (struct rt6_info *)ip6_route_input_lookup(net, dev, &fl6,
2571 							       flags);
2572 	} else {
2573 		fl6.flowi6_oif = oif;
2574 
2575 		rt = (struct rt6_info *)ip6_route_output(net, NULL, &fl6);
2576 	}
2577 
2578 	skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
2579 	if (!skb) {
2580 		dst_release(&rt->dst);
2581 		err = -ENOBUFS;
2582 		goto errout;
2583 	}
2584 
2585 	/* Reserve room for dummy headers, this skb can pass
2586 	   through good chunk of routing engine.
2587 	 */
2588 	skb_reset_mac_header(skb);
2589 	skb_reserve(skb, MAX_HEADER + sizeof(struct ipv6hdr));
2590 
2591 	skb_dst_set(skb, &rt->dst);
2592 
2593 	err = rt6_fill_node(net, skb, rt, &fl6.daddr, &fl6.saddr, iif,
2594 			    RTM_NEWROUTE, NETLINK_CB(in_skb).pid,
2595 			    nlh->nlmsg_seq, 0, 0, 0);
2596 	if (err < 0) {
2597 		kfree_skb(skb);
2598 		goto errout;
2599 	}
2600 
2601 	err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).pid);
2602 errout:
2603 	return err;
2604 }
2605 
2606 void inet6_rt_notify(int event, struct rt6_info *rt, struct nl_info *info)
2607 {
2608 	struct sk_buff *skb;
2609 	struct net *net = info->nl_net;
2610 	u32 seq;
2611 	int err;
2612 
2613 	err = -ENOBUFS;
2614 	seq = info->nlh ? info->nlh->nlmsg_seq : 0;
2615 
2616 	skb = nlmsg_new(rt6_nlmsg_size(), gfp_any());
2617 	if (!skb)
2618 		goto errout;
2619 
2620 	err = rt6_fill_node(net, skb, rt, NULL, NULL, 0,
2621 				event, info->pid, seq, 0, 0, 0);
2622 	if (err < 0) {
2623 		/* -EMSGSIZE implies BUG in rt6_nlmsg_size() */
2624 		WARN_ON(err == -EMSGSIZE);
2625 		kfree_skb(skb);
2626 		goto errout;
2627 	}
2628 	rtnl_notify(skb, net, info->pid, RTNLGRP_IPV6_ROUTE,
2629 		    info->nlh, gfp_any());
2630 	return;
2631 errout:
2632 	if (err < 0)
2633 		rtnl_set_sk_err(net, RTNLGRP_IPV6_ROUTE, err);
2634 }
2635 
2636 static int ip6_route_dev_notify(struct notifier_block *this,
2637 				unsigned long event, void *data)
2638 {
2639 	struct net_device *dev = (struct net_device *)data;
2640 	struct net *net = dev_net(dev);
2641 
2642 	if (event == NETDEV_REGISTER && (dev->flags & IFF_LOOPBACK)) {
2643 		net->ipv6.ip6_null_entry->dst.dev = dev;
2644 		net->ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(dev);
2645 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
2646 		net->ipv6.ip6_prohibit_entry->dst.dev = dev;
2647 		net->ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(dev);
2648 		net->ipv6.ip6_blk_hole_entry->dst.dev = dev;
2649 		net->ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(dev);
2650 #endif
2651 	}
2652 
2653 	return NOTIFY_OK;
2654 }
2655 
2656 /*
2657  *	/proc
2658  */
2659 
2660 #ifdef CONFIG_PROC_FS
2661 
2662 struct rt6_proc_arg
2663 {
2664 	char *buffer;
2665 	int offset;
2666 	int length;
2667 	int skip;
2668 	int len;
2669 };
2670 
2671 static int rt6_info_route(struct rt6_info *rt, void *p_arg)
2672 {
2673 	struct seq_file *m = p_arg;
2674 	struct neighbour *n;
2675 
2676 	seq_printf(m, "%pi6 %02x ", &rt->rt6i_dst.addr, rt->rt6i_dst.plen);
2677 
2678 #ifdef CONFIG_IPV6_SUBTREES
2679 	seq_printf(m, "%pi6 %02x ", &rt->rt6i_src.addr, rt->rt6i_src.plen);
2680 #else
2681 	seq_puts(m, "00000000000000000000000000000000 00 ");
2682 #endif
2683 	rcu_read_lock();
2684 	n = rt->n;
2685 	if (n) {
2686 		seq_printf(m, "%pi6", n->primary_key);
2687 	} else {
2688 		seq_puts(m, "00000000000000000000000000000000");
2689 	}
2690 	rcu_read_unlock();
2691 	seq_printf(m, " %08x %08x %08x %08x %8s\n",
2692 		   rt->rt6i_metric, atomic_read(&rt->dst.__refcnt),
2693 		   rt->dst.__use, rt->rt6i_flags,
2694 		   rt->dst.dev ? rt->dst.dev->name : "");
2695 	return 0;
2696 }
2697 
2698 static int ipv6_route_show(struct seq_file *m, void *v)
2699 {
2700 	struct net *net = (struct net *)m->private;
2701 	fib6_clean_all_ro(net, rt6_info_route, 0, m);
2702 	return 0;
2703 }
2704 
2705 static int ipv6_route_open(struct inode *inode, struct file *file)
2706 {
2707 	return single_open_net(inode, file, ipv6_route_show);
2708 }
2709 
2710 static const struct file_operations ipv6_route_proc_fops = {
2711 	.owner		= THIS_MODULE,
2712 	.open		= ipv6_route_open,
2713 	.read		= seq_read,
2714 	.llseek		= seq_lseek,
2715 	.release	= single_release_net,
2716 };
2717 
2718 static int rt6_stats_seq_show(struct seq_file *seq, void *v)
2719 {
2720 	struct net *net = (struct net *)seq->private;
2721 	seq_printf(seq, "%04x %04x %04x %04x %04x %04x %04x\n",
2722 		   net->ipv6.rt6_stats->fib_nodes,
2723 		   net->ipv6.rt6_stats->fib_route_nodes,
2724 		   net->ipv6.rt6_stats->fib_rt_alloc,
2725 		   net->ipv6.rt6_stats->fib_rt_entries,
2726 		   net->ipv6.rt6_stats->fib_rt_cache,
2727 		   dst_entries_get_slow(&net->ipv6.ip6_dst_ops),
2728 		   net->ipv6.rt6_stats->fib_discarded_routes);
2729 
2730 	return 0;
2731 }
2732 
2733 static int rt6_stats_seq_open(struct inode *inode, struct file *file)
2734 {
2735 	return single_open_net(inode, file, rt6_stats_seq_show);
2736 }
2737 
2738 static const struct file_operations rt6_stats_seq_fops = {
2739 	.owner	 = THIS_MODULE,
2740 	.open	 = rt6_stats_seq_open,
2741 	.read	 = seq_read,
2742 	.llseek	 = seq_lseek,
2743 	.release = single_release_net,
2744 };
2745 #endif	/* CONFIG_PROC_FS */
2746 
2747 #ifdef CONFIG_SYSCTL
2748 
2749 static
2750 int ipv6_sysctl_rtcache_flush(ctl_table *ctl, int write,
2751 			      void __user *buffer, size_t *lenp, loff_t *ppos)
2752 {
2753 	struct net *net;
2754 	int delay;
2755 	if (!write)
2756 		return -EINVAL;
2757 
2758 	net = (struct net *)ctl->extra1;
2759 	delay = net->ipv6.sysctl.flush_delay;
2760 	proc_dointvec(ctl, write, buffer, lenp, ppos);
2761 	fib6_run_gc(delay <= 0 ? ~0UL : (unsigned long)delay, net);
2762 	return 0;
2763 }
2764 
2765 ctl_table ipv6_route_table_template[] = {
2766 	{
2767 		.procname	=	"flush",
2768 		.data		=	&init_net.ipv6.sysctl.flush_delay,
2769 		.maxlen		=	sizeof(int),
2770 		.mode		=	0200,
2771 		.proc_handler	=	ipv6_sysctl_rtcache_flush
2772 	},
2773 	{
2774 		.procname	=	"gc_thresh",
2775 		.data		=	&ip6_dst_ops_template.gc_thresh,
2776 		.maxlen		=	sizeof(int),
2777 		.mode		=	0644,
2778 		.proc_handler	=	proc_dointvec,
2779 	},
2780 	{
2781 		.procname	=	"max_size",
2782 		.data		=	&init_net.ipv6.sysctl.ip6_rt_max_size,
2783 		.maxlen		=	sizeof(int),
2784 		.mode		=	0644,
2785 		.proc_handler	=	proc_dointvec,
2786 	},
2787 	{
2788 		.procname	=	"gc_min_interval",
2789 		.data		=	&init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
2790 		.maxlen		=	sizeof(int),
2791 		.mode		=	0644,
2792 		.proc_handler	=	proc_dointvec_jiffies,
2793 	},
2794 	{
2795 		.procname	=	"gc_timeout",
2796 		.data		=	&init_net.ipv6.sysctl.ip6_rt_gc_timeout,
2797 		.maxlen		=	sizeof(int),
2798 		.mode		=	0644,
2799 		.proc_handler	=	proc_dointvec_jiffies,
2800 	},
2801 	{
2802 		.procname	=	"gc_interval",
2803 		.data		=	&init_net.ipv6.sysctl.ip6_rt_gc_interval,
2804 		.maxlen		=	sizeof(int),
2805 		.mode		=	0644,
2806 		.proc_handler	=	proc_dointvec_jiffies,
2807 	},
2808 	{
2809 		.procname	=	"gc_elasticity",
2810 		.data		=	&init_net.ipv6.sysctl.ip6_rt_gc_elasticity,
2811 		.maxlen		=	sizeof(int),
2812 		.mode		=	0644,
2813 		.proc_handler	=	proc_dointvec,
2814 	},
2815 	{
2816 		.procname	=	"mtu_expires",
2817 		.data		=	&init_net.ipv6.sysctl.ip6_rt_mtu_expires,
2818 		.maxlen		=	sizeof(int),
2819 		.mode		=	0644,
2820 		.proc_handler	=	proc_dointvec_jiffies,
2821 	},
2822 	{
2823 		.procname	=	"min_adv_mss",
2824 		.data		=	&init_net.ipv6.sysctl.ip6_rt_min_advmss,
2825 		.maxlen		=	sizeof(int),
2826 		.mode		=	0644,
2827 		.proc_handler	=	proc_dointvec,
2828 	},
2829 	{
2830 		.procname	=	"gc_min_interval_ms",
2831 		.data		=	&init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
2832 		.maxlen		=	sizeof(int),
2833 		.mode		=	0644,
2834 		.proc_handler	=	proc_dointvec_ms_jiffies,
2835 	},
2836 	{ }
2837 };
2838 
2839 struct ctl_table * __net_init ipv6_route_sysctl_init(struct net *net)
2840 {
2841 	struct ctl_table *table;
2842 
2843 	table = kmemdup(ipv6_route_table_template,
2844 			sizeof(ipv6_route_table_template),
2845 			GFP_KERNEL);
2846 
2847 	if (table) {
2848 		table[0].data = &net->ipv6.sysctl.flush_delay;
2849 		table[0].extra1 = net;
2850 		table[1].data = &net->ipv6.ip6_dst_ops.gc_thresh;
2851 		table[2].data = &net->ipv6.sysctl.ip6_rt_max_size;
2852 		table[3].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
2853 		table[4].data = &net->ipv6.sysctl.ip6_rt_gc_timeout;
2854 		table[5].data = &net->ipv6.sysctl.ip6_rt_gc_interval;
2855 		table[6].data = &net->ipv6.sysctl.ip6_rt_gc_elasticity;
2856 		table[7].data = &net->ipv6.sysctl.ip6_rt_mtu_expires;
2857 		table[8].data = &net->ipv6.sysctl.ip6_rt_min_advmss;
2858 		table[9].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
2859 	}
2860 
2861 	return table;
2862 }
2863 #endif
2864 
2865 static int __net_init ip6_route_net_init(struct net *net)
2866 {
2867 	int ret = -ENOMEM;
2868 
2869 	memcpy(&net->ipv6.ip6_dst_ops, &ip6_dst_ops_template,
2870 	       sizeof(net->ipv6.ip6_dst_ops));
2871 
2872 	if (dst_entries_init(&net->ipv6.ip6_dst_ops) < 0)
2873 		goto out_ip6_dst_ops;
2874 
2875 	net->ipv6.ip6_null_entry = kmemdup(&ip6_null_entry_template,
2876 					   sizeof(*net->ipv6.ip6_null_entry),
2877 					   GFP_KERNEL);
2878 	if (!net->ipv6.ip6_null_entry)
2879 		goto out_ip6_dst_entries;
2880 	net->ipv6.ip6_null_entry->dst.path =
2881 		(struct dst_entry *)net->ipv6.ip6_null_entry;
2882 	net->ipv6.ip6_null_entry->dst.ops = &net->ipv6.ip6_dst_ops;
2883 	dst_init_metrics(&net->ipv6.ip6_null_entry->dst,
2884 			 ip6_template_metrics, true);
2885 
2886 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
2887 	net->ipv6.ip6_prohibit_entry = kmemdup(&ip6_prohibit_entry_template,
2888 					       sizeof(*net->ipv6.ip6_prohibit_entry),
2889 					       GFP_KERNEL);
2890 	if (!net->ipv6.ip6_prohibit_entry)
2891 		goto out_ip6_null_entry;
2892 	net->ipv6.ip6_prohibit_entry->dst.path =
2893 		(struct dst_entry *)net->ipv6.ip6_prohibit_entry;
2894 	net->ipv6.ip6_prohibit_entry->dst.ops = &net->ipv6.ip6_dst_ops;
2895 	dst_init_metrics(&net->ipv6.ip6_prohibit_entry->dst,
2896 			 ip6_template_metrics, true);
2897 
2898 	net->ipv6.ip6_blk_hole_entry = kmemdup(&ip6_blk_hole_entry_template,
2899 					       sizeof(*net->ipv6.ip6_blk_hole_entry),
2900 					       GFP_KERNEL);
2901 	if (!net->ipv6.ip6_blk_hole_entry)
2902 		goto out_ip6_prohibit_entry;
2903 	net->ipv6.ip6_blk_hole_entry->dst.path =
2904 		(struct dst_entry *)net->ipv6.ip6_blk_hole_entry;
2905 	net->ipv6.ip6_blk_hole_entry->dst.ops = &net->ipv6.ip6_dst_ops;
2906 	dst_init_metrics(&net->ipv6.ip6_blk_hole_entry->dst,
2907 			 ip6_template_metrics, true);
2908 #endif
2909 
2910 	net->ipv6.sysctl.flush_delay = 0;
2911 	net->ipv6.sysctl.ip6_rt_max_size = 4096;
2912 	net->ipv6.sysctl.ip6_rt_gc_min_interval = HZ / 2;
2913 	net->ipv6.sysctl.ip6_rt_gc_timeout = 60*HZ;
2914 	net->ipv6.sysctl.ip6_rt_gc_interval = 30*HZ;
2915 	net->ipv6.sysctl.ip6_rt_gc_elasticity = 9;
2916 	net->ipv6.sysctl.ip6_rt_mtu_expires = 10*60*HZ;
2917 	net->ipv6.sysctl.ip6_rt_min_advmss = IPV6_MIN_MTU - 20 - 40;
2918 
2919 	net->ipv6.ip6_rt_gc_expire = 30*HZ;
2920 
2921 	ret = 0;
2922 out:
2923 	return ret;
2924 
2925 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
2926 out_ip6_prohibit_entry:
2927 	kfree(net->ipv6.ip6_prohibit_entry);
2928 out_ip6_null_entry:
2929 	kfree(net->ipv6.ip6_null_entry);
2930 #endif
2931 out_ip6_dst_entries:
2932 	dst_entries_destroy(&net->ipv6.ip6_dst_ops);
2933 out_ip6_dst_ops:
2934 	goto out;
2935 }
2936 
2937 static void __net_exit ip6_route_net_exit(struct net *net)
2938 {
2939 	kfree(net->ipv6.ip6_null_entry);
2940 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
2941 	kfree(net->ipv6.ip6_prohibit_entry);
2942 	kfree(net->ipv6.ip6_blk_hole_entry);
2943 #endif
2944 	dst_entries_destroy(&net->ipv6.ip6_dst_ops);
2945 }
2946 
2947 static int __net_init ip6_route_net_init_late(struct net *net)
2948 {
2949 #ifdef CONFIG_PROC_FS
2950 	proc_net_fops_create(net, "ipv6_route", 0, &ipv6_route_proc_fops);
2951 	proc_net_fops_create(net, "rt6_stats", S_IRUGO, &rt6_stats_seq_fops);
2952 #endif
2953 	return 0;
2954 }
2955 
2956 static void __net_exit ip6_route_net_exit_late(struct net *net)
2957 {
2958 #ifdef CONFIG_PROC_FS
2959 	proc_net_remove(net, "ipv6_route");
2960 	proc_net_remove(net, "rt6_stats");
2961 #endif
2962 }
2963 
2964 static struct pernet_operations ip6_route_net_ops = {
2965 	.init = ip6_route_net_init,
2966 	.exit = ip6_route_net_exit,
2967 };
2968 
2969 static int __net_init ipv6_inetpeer_init(struct net *net)
2970 {
2971 	struct inet_peer_base *bp = kmalloc(sizeof(*bp), GFP_KERNEL);
2972 
2973 	if (!bp)
2974 		return -ENOMEM;
2975 	inet_peer_base_init(bp);
2976 	net->ipv6.peers = bp;
2977 	return 0;
2978 }
2979 
2980 static void __net_exit ipv6_inetpeer_exit(struct net *net)
2981 {
2982 	struct inet_peer_base *bp = net->ipv6.peers;
2983 
2984 	net->ipv6.peers = NULL;
2985 	inetpeer_invalidate_tree(bp);
2986 	kfree(bp);
2987 }
2988 
2989 static struct pernet_operations ipv6_inetpeer_ops = {
2990 	.init	=	ipv6_inetpeer_init,
2991 	.exit	=	ipv6_inetpeer_exit,
2992 };
2993 
2994 static struct pernet_operations ip6_route_net_late_ops = {
2995 	.init = ip6_route_net_init_late,
2996 	.exit = ip6_route_net_exit_late,
2997 };
2998 
2999 static struct notifier_block ip6_route_dev_notifier = {
3000 	.notifier_call = ip6_route_dev_notify,
3001 	.priority = 0,
3002 };
3003 
3004 int __init ip6_route_init(void)
3005 {
3006 	int ret;
3007 
3008 	ret = -ENOMEM;
3009 	ip6_dst_ops_template.kmem_cachep =
3010 		kmem_cache_create("ip6_dst_cache", sizeof(struct rt6_info), 0,
3011 				  SLAB_HWCACHE_ALIGN, NULL);
3012 	if (!ip6_dst_ops_template.kmem_cachep)
3013 		goto out;
3014 
3015 	ret = dst_entries_init(&ip6_dst_blackhole_ops);
3016 	if (ret)
3017 		goto out_kmem_cache;
3018 
3019 	ret = register_pernet_subsys(&ipv6_inetpeer_ops);
3020 	if (ret)
3021 		goto out_dst_entries;
3022 
3023 	ret = register_pernet_subsys(&ip6_route_net_ops);
3024 	if (ret)
3025 		goto out_register_inetpeer;
3026 
3027 	ip6_dst_blackhole_ops.kmem_cachep = ip6_dst_ops_template.kmem_cachep;
3028 
3029 	/* Registering of the loopback is done before this portion of code,
3030 	 * the loopback reference in rt6_info will not be taken, do it
3031 	 * manually for init_net */
3032 	init_net.ipv6.ip6_null_entry->dst.dev = init_net.loopback_dev;
3033 	init_net.ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
3034   #ifdef CONFIG_IPV6_MULTIPLE_TABLES
3035 	init_net.ipv6.ip6_prohibit_entry->dst.dev = init_net.loopback_dev;
3036 	init_net.ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
3037 	init_net.ipv6.ip6_blk_hole_entry->dst.dev = init_net.loopback_dev;
3038 	init_net.ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
3039   #endif
3040 	ret = fib6_init();
3041 	if (ret)
3042 		goto out_register_subsys;
3043 
3044 	ret = xfrm6_init();
3045 	if (ret)
3046 		goto out_fib6_init;
3047 
3048 	ret = fib6_rules_init();
3049 	if (ret)
3050 		goto xfrm6_init;
3051 
3052 	ret = register_pernet_subsys(&ip6_route_net_late_ops);
3053 	if (ret)
3054 		goto fib6_rules_init;
3055 
3056 	ret = -ENOBUFS;
3057 	if (__rtnl_register(PF_INET6, RTM_NEWROUTE, inet6_rtm_newroute, NULL, NULL) ||
3058 	    __rtnl_register(PF_INET6, RTM_DELROUTE, inet6_rtm_delroute, NULL, NULL) ||
3059 	    __rtnl_register(PF_INET6, RTM_GETROUTE, inet6_rtm_getroute, NULL, NULL))
3060 		goto out_register_late_subsys;
3061 
3062 	ret = register_netdevice_notifier(&ip6_route_dev_notifier);
3063 	if (ret)
3064 		goto out_register_late_subsys;
3065 
3066 out:
3067 	return ret;
3068 
3069 out_register_late_subsys:
3070 	unregister_pernet_subsys(&ip6_route_net_late_ops);
3071 fib6_rules_init:
3072 	fib6_rules_cleanup();
3073 xfrm6_init:
3074 	xfrm6_fini();
3075 out_fib6_init:
3076 	fib6_gc_cleanup();
3077 out_register_subsys:
3078 	unregister_pernet_subsys(&ip6_route_net_ops);
3079 out_register_inetpeer:
3080 	unregister_pernet_subsys(&ipv6_inetpeer_ops);
3081 out_dst_entries:
3082 	dst_entries_destroy(&ip6_dst_blackhole_ops);
3083 out_kmem_cache:
3084 	kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);
3085 	goto out;
3086 }
3087 
3088 void ip6_route_cleanup(void)
3089 {
3090 	unregister_netdevice_notifier(&ip6_route_dev_notifier);
3091 	unregister_pernet_subsys(&ip6_route_net_late_ops);
3092 	fib6_rules_cleanup();
3093 	xfrm6_fini();
3094 	fib6_gc_cleanup();
3095 	unregister_pernet_subsys(&ipv6_inetpeer_ops);
3096 	unregister_pernet_subsys(&ip6_route_net_ops);
3097 	dst_entries_destroy(&ip6_dst_blackhole_ops);
3098 	kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);
3099 }
3100