xref: /linux/net/ipv6/route.c (revision f49f4ab95c301dbccad0efe85296d908b8ae7ad4)
1 /*
2  *	Linux INET6 implementation
3  *	FIB front-end.
4  *
5  *	Authors:
6  *	Pedro Roque		<roque@di.fc.ul.pt>
7  *
8  *	This program is free software; you can redistribute it and/or
9  *      modify it under the terms of the GNU General Public License
10  *      as published by the Free Software Foundation; either version
11  *      2 of the License, or (at your option) any later version.
12  */
13 
14 /*	Changes:
15  *
16  *	YOSHIFUJI Hideaki @USAGI
17  *		reworked default router selection.
18  *		- respect outgoing interface
19  *		- select from (probably) reachable routers (i.e.
20  *		routers in REACHABLE, STALE, DELAY or PROBE states).
21  *		- always select the same router if it is (probably)
22  *		reachable.  otherwise, round-robin the list.
23  *	Ville Nuorvala
24  *		Fixed routing subtrees.
25  */
26 
27 #define pr_fmt(fmt) "IPv6: " fmt
28 
29 #include <linux/capability.h>
30 #include <linux/errno.h>
31 #include <linux/export.h>
32 #include <linux/types.h>
33 #include <linux/times.h>
34 #include <linux/socket.h>
35 #include <linux/sockios.h>
36 #include <linux/net.h>
37 #include <linux/route.h>
38 #include <linux/netdevice.h>
39 #include <linux/in6.h>
40 #include <linux/mroute6.h>
41 #include <linux/init.h>
42 #include <linux/if_arp.h>
43 #include <linux/proc_fs.h>
44 #include <linux/seq_file.h>
45 #include <linux/nsproxy.h>
46 #include <linux/slab.h>
47 #include <net/net_namespace.h>
48 #include <net/snmp.h>
49 #include <net/ipv6.h>
50 #include <net/ip6_fib.h>
51 #include <net/ip6_route.h>
52 #include <net/ndisc.h>
53 #include <net/addrconf.h>
54 #include <net/tcp.h>
55 #include <linux/rtnetlink.h>
56 #include <net/dst.h>
57 #include <net/xfrm.h>
58 #include <net/netevent.h>
59 #include <net/netlink.h>
60 
61 #include <asm/uaccess.h>
62 
63 #ifdef CONFIG_SYSCTL
64 #include <linux/sysctl.h>
65 #endif
66 
67 static struct rt6_info *ip6_rt_copy(struct rt6_info *ort,
68 				    const struct in6_addr *dest);
69 static struct dst_entry	*ip6_dst_check(struct dst_entry *dst, u32 cookie);
70 static unsigned int	 ip6_default_advmss(const struct dst_entry *dst);
71 static unsigned int	 ip6_mtu(const struct dst_entry *dst);
72 static struct dst_entry *ip6_negative_advice(struct dst_entry *);
73 static void		ip6_dst_destroy(struct dst_entry *);
74 static void		ip6_dst_ifdown(struct dst_entry *,
75 				       struct net_device *dev, int how);
76 static int		 ip6_dst_gc(struct dst_ops *ops);
77 
78 static int		ip6_pkt_discard(struct sk_buff *skb);
79 static int		ip6_pkt_discard_out(struct sk_buff *skb);
80 static void		ip6_link_failure(struct sk_buff *skb);
81 static void		ip6_rt_update_pmtu(struct dst_entry *dst, struct sock *sk,
82 					   struct sk_buff *skb, u32 mtu);
83 static void		rt6_do_redirect(struct dst_entry *dst, struct sock *sk,
84 					struct sk_buff *skb);
85 
86 #ifdef CONFIG_IPV6_ROUTE_INFO
87 static struct rt6_info *rt6_add_route_info(struct net *net,
88 					   const struct in6_addr *prefix, int prefixlen,
89 					   const struct in6_addr *gwaddr, int ifindex,
90 					   unsigned int pref);
91 static struct rt6_info *rt6_get_route_info(struct net *net,
92 					   const struct in6_addr *prefix, int prefixlen,
93 					   const struct in6_addr *gwaddr, int ifindex);
94 #endif
95 
96 static u32 *ipv6_cow_metrics(struct dst_entry *dst, unsigned long old)
97 {
98 	struct rt6_info *rt = (struct rt6_info *) dst;
99 	struct inet_peer *peer;
100 	u32 *p = NULL;
101 
102 	if (!(rt->dst.flags & DST_HOST))
103 		return NULL;
104 
105 	peer = rt6_get_peer_create(rt);
106 	if (peer) {
107 		u32 *old_p = __DST_METRICS_PTR(old);
108 		unsigned long prev, new;
109 
110 		p = peer->metrics;
111 		if (inet_metrics_new(peer))
112 			memcpy(p, old_p, sizeof(u32) * RTAX_MAX);
113 
114 		new = (unsigned long) p;
115 		prev = cmpxchg(&dst->_metrics, old, new);
116 
117 		if (prev != old) {
118 			p = __DST_METRICS_PTR(prev);
119 			if (prev & DST_METRICS_READ_ONLY)
120 				p = NULL;
121 		}
122 	}
123 	return p;
124 }
125 
126 static inline const void *choose_neigh_daddr(struct rt6_info *rt,
127 					     struct sk_buff *skb,
128 					     const void *daddr)
129 {
130 	struct in6_addr *p = &rt->rt6i_gateway;
131 
132 	if (!ipv6_addr_any(p))
133 		return (const void *) p;
134 	else if (skb)
135 		return &ipv6_hdr(skb)->daddr;
136 	return daddr;
137 }
138 
139 static struct neighbour *ip6_neigh_lookup(const struct dst_entry *dst,
140 					  struct sk_buff *skb,
141 					  const void *daddr)
142 {
143 	struct rt6_info *rt = (struct rt6_info *) dst;
144 	struct neighbour *n;
145 
146 	daddr = choose_neigh_daddr(rt, skb, daddr);
147 	n = __ipv6_neigh_lookup(&nd_tbl, dst->dev, daddr);
148 	if (n)
149 		return n;
150 	return neigh_create(&nd_tbl, daddr, dst->dev);
151 }
152 
153 static int rt6_bind_neighbour(struct rt6_info *rt, struct net_device *dev)
154 {
155 	struct neighbour *n = __ipv6_neigh_lookup(&nd_tbl, dev, &rt->rt6i_gateway);
156 	if (!n) {
157 		n = neigh_create(&nd_tbl, &rt->rt6i_gateway, dev);
158 		if (IS_ERR(n))
159 			return PTR_ERR(n);
160 	}
161 	rt->n = n;
162 
163 	return 0;
164 }
165 
166 static struct dst_ops ip6_dst_ops_template = {
167 	.family			=	AF_INET6,
168 	.protocol		=	cpu_to_be16(ETH_P_IPV6),
169 	.gc			=	ip6_dst_gc,
170 	.gc_thresh		=	1024,
171 	.check			=	ip6_dst_check,
172 	.default_advmss		=	ip6_default_advmss,
173 	.mtu			=	ip6_mtu,
174 	.cow_metrics		=	ipv6_cow_metrics,
175 	.destroy		=	ip6_dst_destroy,
176 	.ifdown			=	ip6_dst_ifdown,
177 	.negative_advice	=	ip6_negative_advice,
178 	.link_failure		=	ip6_link_failure,
179 	.update_pmtu		=	ip6_rt_update_pmtu,
180 	.redirect		=	rt6_do_redirect,
181 	.local_out		=	__ip6_local_out,
182 	.neigh_lookup		=	ip6_neigh_lookup,
183 };
184 
185 static unsigned int ip6_blackhole_mtu(const struct dst_entry *dst)
186 {
187 	unsigned int mtu = dst_metric_raw(dst, RTAX_MTU);
188 
189 	return mtu ? : dst->dev->mtu;
190 }
191 
192 static void ip6_rt_blackhole_update_pmtu(struct dst_entry *dst, struct sock *sk,
193 					 struct sk_buff *skb, u32 mtu)
194 {
195 }
196 
197 static void ip6_rt_blackhole_redirect(struct dst_entry *dst, struct sock *sk,
198 				      struct sk_buff *skb)
199 {
200 }
201 
202 static u32 *ip6_rt_blackhole_cow_metrics(struct dst_entry *dst,
203 					 unsigned long old)
204 {
205 	return NULL;
206 }
207 
208 static struct dst_ops ip6_dst_blackhole_ops = {
209 	.family			=	AF_INET6,
210 	.protocol		=	cpu_to_be16(ETH_P_IPV6),
211 	.destroy		=	ip6_dst_destroy,
212 	.check			=	ip6_dst_check,
213 	.mtu			=	ip6_blackhole_mtu,
214 	.default_advmss		=	ip6_default_advmss,
215 	.update_pmtu		=	ip6_rt_blackhole_update_pmtu,
216 	.redirect		=	ip6_rt_blackhole_redirect,
217 	.cow_metrics		=	ip6_rt_blackhole_cow_metrics,
218 	.neigh_lookup		=	ip6_neigh_lookup,
219 };
220 
221 static const u32 ip6_template_metrics[RTAX_MAX] = {
222 	[RTAX_HOPLIMIT - 1] = 0,
223 };
224 
225 static const struct rt6_info ip6_null_entry_template = {
226 	.dst = {
227 		.__refcnt	= ATOMIC_INIT(1),
228 		.__use		= 1,
229 		.obsolete	= DST_OBSOLETE_FORCE_CHK,
230 		.error		= -ENETUNREACH,
231 		.input		= ip6_pkt_discard,
232 		.output		= ip6_pkt_discard_out,
233 	},
234 	.rt6i_flags	= (RTF_REJECT | RTF_NONEXTHOP),
235 	.rt6i_protocol  = RTPROT_KERNEL,
236 	.rt6i_metric	= ~(u32) 0,
237 	.rt6i_ref	= ATOMIC_INIT(1),
238 };
239 
240 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
241 
242 static int ip6_pkt_prohibit(struct sk_buff *skb);
243 static int ip6_pkt_prohibit_out(struct sk_buff *skb);
244 
245 static const struct rt6_info ip6_prohibit_entry_template = {
246 	.dst = {
247 		.__refcnt	= ATOMIC_INIT(1),
248 		.__use		= 1,
249 		.obsolete	= DST_OBSOLETE_FORCE_CHK,
250 		.error		= -EACCES,
251 		.input		= ip6_pkt_prohibit,
252 		.output		= ip6_pkt_prohibit_out,
253 	},
254 	.rt6i_flags	= (RTF_REJECT | RTF_NONEXTHOP),
255 	.rt6i_protocol  = RTPROT_KERNEL,
256 	.rt6i_metric	= ~(u32) 0,
257 	.rt6i_ref	= ATOMIC_INIT(1),
258 };
259 
260 static const struct rt6_info ip6_blk_hole_entry_template = {
261 	.dst = {
262 		.__refcnt	= ATOMIC_INIT(1),
263 		.__use		= 1,
264 		.obsolete	= DST_OBSOLETE_FORCE_CHK,
265 		.error		= -EINVAL,
266 		.input		= dst_discard,
267 		.output		= dst_discard,
268 	},
269 	.rt6i_flags	= (RTF_REJECT | RTF_NONEXTHOP),
270 	.rt6i_protocol  = RTPROT_KERNEL,
271 	.rt6i_metric	= ~(u32) 0,
272 	.rt6i_ref	= ATOMIC_INIT(1),
273 };
274 
275 #endif
276 
277 /* allocate dst with ip6_dst_ops */
278 static inline struct rt6_info *ip6_dst_alloc(struct net *net,
279 					     struct net_device *dev,
280 					     int flags,
281 					     struct fib6_table *table)
282 {
283 	struct rt6_info *rt = dst_alloc(&net->ipv6.ip6_dst_ops, dev,
284 					0, DST_OBSOLETE_FORCE_CHK, flags);
285 
286 	if (rt) {
287 		struct dst_entry *dst = &rt->dst;
288 
289 		memset(dst + 1, 0, sizeof(*rt) - sizeof(*dst));
290 		rt6_init_peer(rt, table ? &table->tb6_peers : net->ipv6.peers);
291 		rt->rt6i_genid = rt_genid(net);
292 	}
293 	return rt;
294 }
295 
296 static void ip6_dst_destroy(struct dst_entry *dst)
297 {
298 	struct rt6_info *rt = (struct rt6_info *)dst;
299 	struct inet6_dev *idev = rt->rt6i_idev;
300 
301 	if (rt->n)
302 		neigh_release(rt->n);
303 
304 	if (!(rt->dst.flags & DST_HOST))
305 		dst_destroy_metrics_generic(dst);
306 
307 	if (idev) {
308 		rt->rt6i_idev = NULL;
309 		in6_dev_put(idev);
310 	}
311 
312 	if (!(rt->rt6i_flags & RTF_EXPIRES) && dst->from)
313 		dst_release(dst->from);
314 
315 	if (rt6_has_peer(rt)) {
316 		struct inet_peer *peer = rt6_peer_ptr(rt);
317 		inet_putpeer(peer);
318 	}
319 }
320 
321 static atomic_t __rt6_peer_genid = ATOMIC_INIT(0);
322 
323 static u32 rt6_peer_genid(void)
324 {
325 	return atomic_read(&__rt6_peer_genid);
326 }
327 
328 void rt6_bind_peer(struct rt6_info *rt, int create)
329 {
330 	struct inet_peer_base *base;
331 	struct inet_peer *peer;
332 
333 	base = inetpeer_base_ptr(rt->_rt6i_peer);
334 	if (!base)
335 		return;
336 
337 	peer = inet_getpeer_v6(base, &rt->rt6i_dst.addr, create);
338 	if (peer) {
339 		if (!rt6_set_peer(rt, peer))
340 			inet_putpeer(peer);
341 		else
342 			rt->rt6i_peer_genid = rt6_peer_genid();
343 	}
344 }
345 
346 static void ip6_dst_ifdown(struct dst_entry *dst, struct net_device *dev,
347 			   int how)
348 {
349 	struct rt6_info *rt = (struct rt6_info *)dst;
350 	struct inet6_dev *idev = rt->rt6i_idev;
351 	struct net_device *loopback_dev =
352 		dev_net(dev)->loopback_dev;
353 
354 	if (dev != loopback_dev) {
355 		if (idev && idev->dev == dev) {
356 			struct inet6_dev *loopback_idev =
357 				in6_dev_get(loopback_dev);
358 			if (loopback_idev) {
359 				rt->rt6i_idev = loopback_idev;
360 				in6_dev_put(idev);
361 			}
362 		}
363 		if (rt->n && rt->n->dev == dev) {
364 			rt->n->dev = loopback_dev;
365 			dev_hold(loopback_dev);
366 			dev_put(dev);
367 		}
368 	}
369 }
370 
371 static bool rt6_check_expired(const struct rt6_info *rt)
372 {
373 	if (rt->rt6i_flags & RTF_EXPIRES) {
374 		if (time_after(jiffies, rt->dst.expires))
375 			return true;
376 	} else if (rt->dst.from) {
377 		return rt6_check_expired((struct rt6_info *) rt->dst.from);
378 	}
379 	return false;
380 }
381 
382 static bool rt6_need_strict(const struct in6_addr *daddr)
383 {
384 	return ipv6_addr_type(daddr) &
385 		(IPV6_ADDR_MULTICAST | IPV6_ADDR_LINKLOCAL | IPV6_ADDR_LOOPBACK);
386 }
387 
388 /*
389  *	Route lookup. Any table->tb6_lock is implied.
390  */
391 
392 static inline struct rt6_info *rt6_device_match(struct net *net,
393 						    struct rt6_info *rt,
394 						    const struct in6_addr *saddr,
395 						    int oif,
396 						    int flags)
397 {
398 	struct rt6_info *local = NULL;
399 	struct rt6_info *sprt;
400 
401 	if (!oif && ipv6_addr_any(saddr))
402 		goto out;
403 
404 	for (sprt = rt; sprt; sprt = sprt->dst.rt6_next) {
405 		struct net_device *dev = sprt->dst.dev;
406 
407 		if (oif) {
408 			if (dev->ifindex == oif)
409 				return sprt;
410 			if (dev->flags & IFF_LOOPBACK) {
411 				if (!sprt->rt6i_idev ||
412 				    sprt->rt6i_idev->dev->ifindex != oif) {
413 					if (flags & RT6_LOOKUP_F_IFACE && oif)
414 						continue;
415 					if (local && (!oif ||
416 						      local->rt6i_idev->dev->ifindex == oif))
417 						continue;
418 				}
419 				local = sprt;
420 			}
421 		} else {
422 			if (ipv6_chk_addr(net, saddr, dev,
423 					  flags & RT6_LOOKUP_F_IFACE))
424 				return sprt;
425 		}
426 	}
427 
428 	if (oif) {
429 		if (local)
430 			return local;
431 
432 		if (flags & RT6_LOOKUP_F_IFACE)
433 			return net->ipv6.ip6_null_entry;
434 	}
435 out:
436 	return rt;
437 }
438 
439 #ifdef CONFIG_IPV6_ROUTER_PREF
440 static void rt6_probe(struct rt6_info *rt)
441 {
442 	struct neighbour *neigh;
443 	/*
444 	 * Okay, this does not seem to be appropriate
445 	 * for now, however, we need to check if it
446 	 * is really so; aka Router Reachability Probing.
447 	 *
448 	 * Router Reachability Probe MUST be rate-limited
449 	 * to no more than one per minute.
450 	 */
451 	neigh = rt ? rt->n : NULL;
452 	if (!neigh || (neigh->nud_state & NUD_VALID))
453 		return;
454 	read_lock_bh(&neigh->lock);
455 	if (!(neigh->nud_state & NUD_VALID) &&
456 	    time_after(jiffies, neigh->updated + rt->rt6i_idev->cnf.rtr_probe_interval)) {
457 		struct in6_addr mcaddr;
458 		struct in6_addr *target;
459 
460 		neigh->updated = jiffies;
461 		read_unlock_bh(&neigh->lock);
462 
463 		target = (struct in6_addr *)&neigh->primary_key;
464 		addrconf_addr_solict_mult(target, &mcaddr);
465 		ndisc_send_ns(rt->dst.dev, NULL, target, &mcaddr, NULL);
466 	} else {
467 		read_unlock_bh(&neigh->lock);
468 	}
469 }
470 #else
471 static inline void rt6_probe(struct rt6_info *rt)
472 {
473 }
474 #endif
475 
476 /*
477  * Default Router Selection (RFC 2461 6.3.6)
478  */
479 static inline int rt6_check_dev(struct rt6_info *rt, int oif)
480 {
481 	struct net_device *dev = rt->dst.dev;
482 	if (!oif || dev->ifindex == oif)
483 		return 2;
484 	if ((dev->flags & IFF_LOOPBACK) &&
485 	    rt->rt6i_idev && rt->rt6i_idev->dev->ifindex == oif)
486 		return 1;
487 	return 0;
488 }
489 
490 static inline int rt6_check_neigh(struct rt6_info *rt)
491 {
492 	struct neighbour *neigh;
493 	int m;
494 
495 	neigh = rt->n;
496 	if (rt->rt6i_flags & RTF_NONEXTHOP ||
497 	    !(rt->rt6i_flags & RTF_GATEWAY))
498 		m = 1;
499 	else if (neigh) {
500 		read_lock_bh(&neigh->lock);
501 		if (neigh->nud_state & NUD_VALID)
502 			m = 2;
503 #ifdef CONFIG_IPV6_ROUTER_PREF
504 		else if (neigh->nud_state & NUD_FAILED)
505 			m = 0;
506 #endif
507 		else
508 			m = 1;
509 		read_unlock_bh(&neigh->lock);
510 	} else
511 		m = 0;
512 	return m;
513 }
514 
515 static int rt6_score_route(struct rt6_info *rt, int oif,
516 			   int strict)
517 {
518 	int m, n;
519 
520 	m = rt6_check_dev(rt, oif);
521 	if (!m && (strict & RT6_LOOKUP_F_IFACE))
522 		return -1;
523 #ifdef CONFIG_IPV6_ROUTER_PREF
524 	m |= IPV6_DECODE_PREF(IPV6_EXTRACT_PREF(rt->rt6i_flags)) << 2;
525 #endif
526 	n = rt6_check_neigh(rt);
527 	if (!n && (strict & RT6_LOOKUP_F_REACHABLE))
528 		return -1;
529 	return m;
530 }
531 
532 static struct rt6_info *find_match(struct rt6_info *rt, int oif, int strict,
533 				   int *mpri, struct rt6_info *match)
534 {
535 	int m;
536 
537 	if (rt6_check_expired(rt))
538 		goto out;
539 
540 	m = rt6_score_route(rt, oif, strict);
541 	if (m < 0)
542 		goto out;
543 
544 	if (m > *mpri) {
545 		if (strict & RT6_LOOKUP_F_REACHABLE)
546 			rt6_probe(match);
547 		*mpri = m;
548 		match = rt;
549 	} else if (strict & RT6_LOOKUP_F_REACHABLE) {
550 		rt6_probe(rt);
551 	}
552 
553 out:
554 	return match;
555 }
556 
557 static struct rt6_info *find_rr_leaf(struct fib6_node *fn,
558 				     struct rt6_info *rr_head,
559 				     u32 metric, int oif, int strict)
560 {
561 	struct rt6_info *rt, *match;
562 	int mpri = -1;
563 
564 	match = NULL;
565 	for (rt = rr_head; rt && rt->rt6i_metric == metric;
566 	     rt = rt->dst.rt6_next)
567 		match = find_match(rt, oif, strict, &mpri, match);
568 	for (rt = fn->leaf; rt && rt != rr_head && rt->rt6i_metric == metric;
569 	     rt = rt->dst.rt6_next)
570 		match = find_match(rt, oif, strict, &mpri, match);
571 
572 	return match;
573 }
574 
575 static struct rt6_info *rt6_select(struct fib6_node *fn, int oif, int strict)
576 {
577 	struct rt6_info *match, *rt0;
578 	struct net *net;
579 
580 	rt0 = fn->rr_ptr;
581 	if (!rt0)
582 		fn->rr_ptr = rt0 = fn->leaf;
583 
584 	match = find_rr_leaf(fn, rt0, rt0->rt6i_metric, oif, strict);
585 
586 	if (!match &&
587 	    (strict & RT6_LOOKUP_F_REACHABLE)) {
588 		struct rt6_info *next = rt0->dst.rt6_next;
589 
590 		/* no entries matched; do round-robin */
591 		if (!next || next->rt6i_metric != rt0->rt6i_metric)
592 			next = fn->leaf;
593 
594 		if (next != rt0)
595 			fn->rr_ptr = next;
596 	}
597 
598 	net = dev_net(rt0->dst.dev);
599 	return match ? match : net->ipv6.ip6_null_entry;
600 }
601 
602 #ifdef CONFIG_IPV6_ROUTE_INFO
603 int rt6_route_rcv(struct net_device *dev, u8 *opt, int len,
604 		  const struct in6_addr *gwaddr)
605 {
606 	struct net *net = dev_net(dev);
607 	struct route_info *rinfo = (struct route_info *) opt;
608 	struct in6_addr prefix_buf, *prefix;
609 	unsigned int pref;
610 	unsigned long lifetime;
611 	struct rt6_info *rt;
612 
613 	if (len < sizeof(struct route_info)) {
614 		return -EINVAL;
615 	}
616 
617 	/* Sanity check for prefix_len and length */
618 	if (rinfo->length > 3) {
619 		return -EINVAL;
620 	} else if (rinfo->prefix_len > 128) {
621 		return -EINVAL;
622 	} else if (rinfo->prefix_len > 64) {
623 		if (rinfo->length < 2) {
624 			return -EINVAL;
625 		}
626 	} else if (rinfo->prefix_len > 0) {
627 		if (rinfo->length < 1) {
628 			return -EINVAL;
629 		}
630 	}
631 
632 	pref = rinfo->route_pref;
633 	if (pref == ICMPV6_ROUTER_PREF_INVALID)
634 		return -EINVAL;
635 
636 	lifetime = addrconf_timeout_fixup(ntohl(rinfo->lifetime), HZ);
637 
638 	if (rinfo->length == 3)
639 		prefix = (struct in6_addr *)rinfo->prefix;
640 	else {
641 		/* this function is safe */
642 		ipv6_addr_prefix(&prefix_buf,
643 				 (struct in6_addr *)rinfo->prefix,
644 				 rinfo->prefix_len);
645 		prefix = &prefix_buf;
646 	}
647 
648 	rt = rt6_get_route_info(net, prefix, rinfo->prefix_len, gwaddr,
649 				dev->ifindex);
650 
651 	if (rt && !lifetime) {
652 		ip6_del_rt(rt);
653 		rt = NULL;
654 	}
655 
656 	if (!rt && lifetime)
657 		rt = rt6_add_route_info(net, prefix, rinfo->prefix_len, gwaddr, dev->ifindex,
658 					pref);
659 	else if (rt)
660 		rt->rt6i_flags = RTF_ROUTEINFO |
661 				 (rt->rt6i_flags & ~RTF_PREF_MASK) | RTF_PREF(pref);
662 
663 	if (rt) {
664 		if (!addrconf_finite_timeout(lifetime))
665 			rt6_clean_expires(rt);
666 		else
667 			rt6_set_expires(rt, jiffies + HZ * lifetime);
668 
669 		dst_release(&rt->dst);
670 	}
671 	return 0;
672 }
673 #endif
674 
675 #define BACKTRACK(__net, saddr)			\
676 do { \
677 	if (rt == __net->ipv6.ip6_null_entry) {	\
678 		struct fib6_node *pn; \
679 		while (1) { \
680 			if (fn->fn_flags & RTN_TL_ROOT) \
681 				goto out; \
682 			pn = fn->parent; \
683 			if (FIB6_SUBTREE(pn) && FIB6_SUBTREE(pn) != fn) \
684 				fn = fib6_lookup(FIB6_SUBTREE(pn), NULL, saddr); \
685 			else \
686 				fn = pn; \
687 			if (fn->fn_flags & RTN_RTINFO) \
688 				goto restart; \
689 		} \
690 	} \
691 } while (0)
692 
693 static struct rt6_info *ip6_pol_route_lookup(struct net *net,
694 					     struct fib6_table *table,
695 					     struct flowi6 *fl6, int flags)
696 {
697 	struct fib6_node *fn;
698 	struct rt6_info *rt;
699 
700 	read_lock_bh(&table->tb6_lock);
701 	fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
702 restart:
703 	rt = fn->leaf;
704 	rt = rt6_device_match(net, rt, &fl6->saddr, fl6->flowi6_oif, flags);
705 	BACKTRACK(net, &fl6->saddr);
706 out:
707 	dst_use(&rt->dst, jiffies);
708 	read_unlock_bh(&table->tb6_lock);
709 	return rt;
710 
711 }
712 
713 struct dst_entry * ip6_route_lookup(struct net *net, struct flowi6 *fl6,
714 				    int flags)
715 {
716 	return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_lookup);
717 }
718 EXPORT_SYMBOL_GPL(ip6_route_lookup);
719 
720 struct rt6_info *rt6_lookup(struct net *net, const struct in6_addr *daddr,
721 			    const struct in6_addr *saddr, int oif, int strict)
722 {
723 	struct flowi6 fl6 = {
724 		.flowi6_oif = oif,
725 		.daddr = *daddr,
726 	};
727 	struct dst_entry *dst;
728 	int flags = strict ? RT6_LOOKUP_F_IFACE : 0;
729 
730 	if (saddr) {
731 		memcpy(&fl6.saddr, saddr, sizeof(*saddr));
732 		flags |= RT6_LOOKUP_F_HAS_SADDR;
733 	}
734 
735 	dst = fib6_rule_lookup(net, &fl6, flags, ip6_pol_route_lookup);
736 	if (dst->error == 0)
737 		return (struct rt6_info *) dst;
738 
739 	dst_release(dst);
740 
741 	return NULL;
742 }
743 
744 EXPORT_SYMBOL(rt6_lookup);
745 
746 /* ip6_ins_rt is called with FREE table->tb6_lock.
747    It takes new route entry, the addition fails by any reason the
748    route is freed. In any case, if caller does not hold it, it may
749    be destroyed.
750  */
751 
752 static int __ip6_ins_rt(struct rt6_info *rt, struct nl_info *info)
753 {
754 	int err;
755 	struct fib6_table *table;
756 
757 	table = rt->rt6i_table;
758 	write_lock_bh(&table->tb6_lock);
759 	err = fib6_add(&table->tb6_root, rt, info);
760 	write_unlock_bh(&table->tb6_lock);
761 
762 	return err;
763 }
764 
765 int ip6_ins_rt(struct rt6_info *rt)
766 {
767 	struct nl_info info = {
768 		.nl_net = dev_net(rt->dst.dev),
769 	};
770 	return __ip6_ins_rt(rt, &info);
771 }
772 
773 static struct rt6_info *rt6_alloc_cow(struct rt6_info *ort,
774 				      const struct in6_addr *daddr,
775 				      const struct in6_addr *saddr)
776 {
777 	struct rt6_info *rt;
778 
779 	/*
780 	 *	Clone the route.
781 	 */
782 
783 	rt = ip6_rt_copy(ort, daddr);
784 
785 	if (rt) {
786 		int attempts = !in_softirq();
787 
788 		if (!(rt->rt6i_flags & RTF_GATEWAY)) {
789 			if (ort->rt6i_dst.plen != 128 &&
790 			    ipv6_addr_equal(&ort->rt6i_dst.addr, daddr))
791 				rt->rt6i_flags |= RTF_ANYCAST;
792 			rt->rt6i_gateway = *daddr;
793 		}
794 
795 		rt->rt6i_flags |= RTF_CACHE;
796 
797 #ifdef CONFIG_IPV6_SUBTREES
798 		if (rt->rt6i_src.plen && saddr) {
799 			rt->rt6i_src.addr = *saddr;
800 			rt->rt6i_src.plen = 128;
801 		}
802 #endif
803 
804 	retry:
805 		if (rt6_bind_neighbour(rt, rt->dst.dev)) {
806 			struct net *net = dev_net(rt->dst.dev);
807 			int saved_rt_min_interval =
808 				net->ipv6.sysctl.ip6_rt_gc_min_interval;
809 			int saved_rt_elasticity =
810 				net->ipv6.sysctl.ip6_rt_gc_elasticity;
811 
812 			if (attempts-- > 0) {
813 				net->ipv6.sysctl.ip6_rt_gc_elasticity = 1;
814 				net->ipv6.sysctl.ip6_rt_gc_min_interval = 0;
815 
816 				ip6_dst_gc(&net->ipv6.ip6_dst_ops);
817 
818 				net->ipv6.sysctl.ip6_rt_gc_elasticity =
819 					saved_rt_elasticity;
820 				net->ipv6.sysctl.ip6_rt_gc_min_interval =
821 					saved_rt_min_interval;
822 				goto retry;
823 			}
824 
825 			net_warn_ratelimited("Neighbour table overflow\n");
826 			dst_free(&rt->dst);
827 			return NULL;
828 		}
829 	}
830 
831 	return rt;
832 }
833 
834 static struct rt6_info *rt6_alloc_clone(struct rt6_info *ort,
835 					const struct in6_addr *daddr)
836 {
837 	struct rt6_info *rt = ip6_rt_copy(ort, daddr);
838 
839 	if (rt) {
840 		rt->rt6i_flags |= RTF_CACHE;
841 		rt->n = neigh_clone(ort->n);
842 	}
843 	return rt;
844 }
845 
846 static struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table, int oif,
847 				      struct flowi6 *fl6, int flags)
848 {
849 	struct fib6_node *fn;
850 	struct rt6_info *rt, *nrt;
851 	int strict = 0;
852 	int attempts = 3;
853 	int err;
854 	int reachable = net->ipv6.devconf_all->forwarding ? 0 : RT6_LOOKUP_F_REACHABLE;
855 
856 	strict |= flags & RT6_LOOKUP_F_IFACE;
857 
858 relookup:
859 	read_lock_bh(&table->tb6_lock);
860 
861 restart_2:
862 	fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
863 
864 restart:
865 	rt = rt6_select(fn, oif, strict | reachable);
866 
867 	BACKTRACK(net, &fl6->saddr);
868 	if (rt == net->ipv6.ip6_null_entry ||
869 	    rt->rt6i_flags & RTF_CACHE)
870 		goto out;
871 
872 	dst_hold(&rt->dst);
873 	read_unlock_bh(&table->tb6_lock);
874 
875 	if (!rt->n && !(rt->rt6i_flags & RTF_NONEXTHOP))
876 		nrt = rt6_alloc_cow(rt, &fl6->daddr, &fl6->saddr);
877 	else if (!(rt->dst.flags & DST_HOST))
878 		nrt = rt6_alloc_clone(rt, &fl6->daddr);
879 	else
880 		goto out2;
881 
882 	dst_release(&rt->dst);
883 	rt = nrt ? : net->ipv6.ip6_null_entry;
884 
885 	dst_hold(&rt->dst);
886 	if (nrt) {
887 		err = ip6_ins_rt(nrt);
888 		if (!err)
889 			goto out2;
890 	}
891 
892 	if (--attempts <= 0)
893 		goto out2;
894 
895 	/*
896 	 * Race condition! In the gap, when table->tb6_lock was
897 	 * released someone could insert this route.  Relookup.
898 	 */
899 	dst_release(&rt->dst);
900 	goto relookup;
901 
902 out:
903 	if (reachable) {
904 		reachable = 0;
905 		goto restart_2;
906 	}
907 	dst_hold(&rt->dst);
908 	read_unlock_bh(&table->tb6_lock);
909 out2:
910 	rt->dst.lastuse = jiffies;
911 	rt->dst.__use++;
912 
913 	return rt;
914 }
915 
916 static struct rt6_info *ip6_pol_route_input(struct net *net, struct fib6_table *table,
917 					    struct flowi6 *fl6, int flags)
918 {
919 	return ip6_pol_route(net, table, fl6->flowi6_iif, fl6, flags);
920 }
921 
922 static struct dst_entry *ip6_route_input_lookup(struct net *net,
923 						struct net_device *dev,
924 						struct flowi6 *fl6, int flags)
925 {
926 	if (rt6_need_strict(&fl6->daddr) && dev->type != ARPHRD_PIMREG)
927 		flags |= RT6_LOOKUP_F_IFACE;
928 
929 	return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_input);
930 }
931 
932 void ip6_route_input(struct sk_buff *skb)
933 {
934 	const struct ipv6hdr *iph = ipv6_hdr(skb);
935 	struct net *net = dev_net(skb->dev);
936 	int flags = RT6_LOOKUP_F_HAS_SADDR;
937 	struct flowi6 fl6 = {
938 		.flowi6_iif = skb->dev->ifindex,
939 		.daddr = iph->daddr,
940 		.saddr = iph->saddr,
941 		.flowlabel = (* (__be32 *) iph) & IPV6_FLOWINFO_MASK,
942 		.flowi6_mark = skb->mark,
943 		.flowi6_proto = iph->nexthdr,
944 	};
945 
946 	skb_dst_set(skb, ip6_route_input_lookup(net, skb->dev, &fl6, flags));
947 }
948 
949 static struct rt6_info *ip6_pol_route_output(struct net *net, struct fib6_table *table,
950 					     struct flowi6 *fl6, int flags)
951 {
952 	return ip6_pol_route(net, table, fl6->flowi6_oif, fl6, flags);
953 }
954 
955 struct dst_entry * ip6_route_output(struct net *net, const struct sock *sk,
956 				    struct flowi6 *fl6)
957 {
958 	int flags = 0;
959 
960 	fl6->flowi6_iif = LOOPBACK_IFINDEX;
961 
962 	if ((sk && sk->sk_bound_dev_if) || rt6_need_strict(&fl6->daddr))
963 		flags |= RT6_LOOKUP_F_IFACE;
964 
965 	if (!ipv6_addr_any(&fl6->saddr))
966 		flags |= RT6_LOOKUP_F_HAS_SADDR;
967 	else if (sk)
968 		flags |= rt6_srcprefs2flags(inet6_sk(sk)->srcprefs);
969 
970 	return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_output);
971 }
972 
973 EXPORT_SYMBOL(ip6_route_output);
974 
975 struct dst_entry *ip6_blackhole_route(struct net *net, struct dst_entry *dst_orig)
976 {
977 	struct rt6_info *rt, *ort = (struct rt6_info *) dst_orig;
978 	struct dst_entry *new = NULL;
979 
980 	rt = dst_alloc(&ip6_dst_blackhole_ops, ort->dst.dev, 1, DST_OBSOLETE_NONE, 0);
981 	if (rt) {
982 		new = &rt->dst;
983 
984 		memset(new + 1, 0, sizeof(*rt) - sizeof(*new));
985 		rt6_init_peer(rt, net->ipv6.peers);
986 
987 		new->__use = 1;
988 		new->input = dst_discard;
989 		new->output = dst_discard;
990 
991 		if (dst_metrics_read_only(&ort->dst))
992 			new->_metrics = ort->dst._metrics;
993 		else
994 			dst_copy_metrics(new, &ort->dst);
995 		rt->rt6i_idev = ort->rt6i_idev;
996 		if (rt->rt6i_idev)
997 			in6_dev_hold(rt->rt6i_idev);
998 
999 		rt->rt6i_gateway = ort->rt6i_gateway;
1000 		rt->rt6i_flags = ort->rt6i_flags;
1001 		rt6_clean_expires(rt);
1002 		rt->rt6i_metric = 0;
1003 
1004 		memcpy(&rt->rt6i_dst, &ort->rt6i_dst, sizeof(struct rt6key));
1005 #ifdef CONFIG_IPV6_SUBTREES
1006 		memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
1007 #endif
1008 
1009 		dst_free(new);
1010 	}
1011 
1012 	dst_release(dst_orig);
1013 	return new ? new : ERR_PTR(-ENOMEM);
1014 }
1015 
1016 /*
1017  *	Destination cache support functions
1018  */
1019 
1020 static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie)
1021 {
1022 	struct rt6_info *rt;
1023 
1024 	rt = (struct rt6_info *) dst;
1025 
1026 	/* All IPV6 dsts are created with ->obsolete set to the value
1027 	 * DST_OBSOLETE_FORCE_CHK which forces validation calls down
1028 	 * into this function always.
1029 	 */
1030 	if (rt->rt6i_genid != rt_genid(dev_net(rt->dst.dev)))
1031 		return NULL;
1032 
1033 	if (rt->rt6i_node && (rt->rt6i_node->fn_sernum == cookie)) {
1034 		if (rt->rt6i_peer_genid != rt6_peer_genid()) {
1035 			if (!rt6_has_peer(rt))
1036 				rt6_bind_peer(rt, 0);
1037 			rt->rt6i_peer_genid = rt6_peer_genid();
1038 		}
1039 		return dst;
1040 	}
1041 	return NULL;
1042 }
1043 
1044 static struct dst_entry *ip6_negative_advice(struct dst_entry *dst)
1045 {
1046 	struct rt6_info *rt = (struct rt6_info *) dst;
1047 
1048 	if (rt) {
1049 		if (rt->rt6i_flags & RTF_CACHE) {
1050 			if (rt6_check_expired(rt)) {
1051 				ip6_del_rt(rt);
1052 				dst = NULL;
1053 			}
1054 		} else {
1055 			dst_release(dst);
1056 			dst = NULL;
1057 		}
1058 	}
1059 	return dst;
1060 }
1061 
1062 static void ip6_link_failure(struct sk_buff *skb)
1063 {
1064 	struct rt6_info *rt;
1065 
1066 	icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_ADDR_UNREACH, 0);
1067 
1068 	rt = (struct rt6_info *) skb_dst(skb);
1069 	if (rt) {
1070 		if (rt->rt6i_flags & RTF_CACHE)
1071 			rt6_update_expires(rt, 0);
1072 		else if (rt->rt6i_node && (rt->rt6i_flags & RTF_DEFAULT))
1073 			rt->rt6i_node->fn_sernum = -1;
1074 	}
1075 }
1076 
1077 static void ip6_rt_update_pmtu(struct dst_entry *dst, struct sock *sk,
1078 			       struct sk_buff *skb, u32 mtu)
1079 {
1080 	struct rt6_info *rt6 = (struct rt6_info*)dst;
1081 
1082 	dst_confirm(dst);
1083 	if (mtu < dst_mtu(dst) && rt6->rt6i_dst.plen == 128) {
1084 		struct net *net = dev_net(dst->dev);
1085 
1086 		rt6->rt6i_flags |= RTF_MODIFIED;
1087 		if (mtu < IPV6_MIN_MTU) {
1088 			u32 features = dst_metric(dst, RTAX_FEATURES);
1089 			mtu = IPV6_MIN_MTU;
1090 			features |= RTAX_FEATURE_ALLFRAG;
1091 			dst_metric_set(dst, RTAX_FEATURES, features);
1092 		}
1093 		dst_metric_set(dst, RTAX_MTU, mtu);
1094 		rt6_update_expires(rt6, net->ipv6.sysctl.ip6_rt_mtu_expires);
1095 	}
1096 }
1097 
1098 void ip6_update_pmtu(struct sk_buff *skb, struct net *net, __be32 mtu,
1099 		     int oif, u32 mark)
1100 {
1101 	const struct ipv6hdr *iph = (struct ipv6hdr *) skb->data;
1102 	struct dst_entry *dst;
1103 	struct flowi6 fl6;
1104 
1105 	memset(&fl6, 0, sizeof(fl6));
1106 	fl6.flowi6_oif = oif;
1107 	fl6.flowi6_mark = mark;
1108 	fl6.flowi6_flags = 0;
1109 	fl6.daddr = iph->daddr;
1110 	fl6.saddr = iph->saddr;
1111 	fl6.flowlabel = (*(__be32 *) iph) & IPV6_FLOWINFO_MASK;
1112 
1113 	dst = ip6_route_output(net, NULL, &fl6);
1114 	if (!dst->error)
1115 		ip6_rt_update_pmtu(dst, NULL, skb, ntohl(mtu));
1116 	dst_release(dst);
1117 }
1118 EXPORT_SYMBOL_GPL(ip6_update_pmtu);
1119 
1120 void ip6_sk_update_pmtu(struct sk_buff *skb, struct sock *sk, __be32 mtu)
1121 {
1122 	ip6_update_pmtu(skb, sock_net(sk), mtu,
1123 			sk->sk_bound_dev_if, sk->sk_mark);
1124 }
1125 EXPORT_SYMBOL_GPL(ip6_sk_update_pmtu);
1126 
1127 void ip6_redirect(struct sk_buff *skb, struct net *net, int oif, u32 mark)
1128 {
1129 	const struct ipv6hdr *iph = (struct ipv6hdr *) skb->data;
1130 	struct dst_entry *dst;
1131 	struct flowi6 fl6;
1132 
1133 	memset(&fl6, 0, sizeof(fl6));
1134 	fl6.flowi6_oif = oif;
1135 	fl6.flowi6_mark = mark;
1136 	fl6.flowi6_flags = 0;
1137 	fl6.daddr = iph->daddr;
1138 	fl6.saddr = iph->saddr;
1139 	fl6.flowlabel = (*(__be32 *) iph) & IPV6_FLOWINFO_MASK;
1140 
1141 	dst = ip6_route_output(net, NULL, &fl6);
1142 	if (!dst->error)
1143 		rt6_do_redirect(dst, NULL, skb);
1144 	dst_release(dst);
1145 }
1146 EXPORT_SYMBOL_GPL(ip6_redirect);
1147 
1148 void ip6_sk_redirect(struct sk_buff *skb, struct sock *sk)
1149 {
1150 	ip6_redirect(skb, sock_net(sk), sk->sk_bound_dev_if, sk->sk_mark);
1151 }
1152 EXPORT_SYMBOL_GPL(ip6_sk_redirect);
1153 
1154 static unsigned int ip6_default_advmss(const struct dst_entry *dst)
1155 {
1156 	struct net_device *dev = dst->dev;
1157 	unsigned int mtu = dst_mtu(dst);
1158 	struct net *net = dev_net(dev);
1159 
1160 	mtu -= sizeof(struct ipv6hdr) + sizeof(struct tcphdr);
1161 
1162 	if (mtu < net->ipv6.sysctl.ip6_rt_min_advmss)
1163 		mtu = net->ipv6.sysctl.ip6_rt_min_advmss;
1164 
1165 	/*
1166 	 * Maximal non-jumbo IPv6 payload is IPV6_MAXPLEN and
1167 	 * corresponding MSS is IPV6_MAXPLEN - tcp_header_size.
1168 	 * IPV6_MAXPLEN is also valid and means: "any MSS,
1169 	 * rely only on pmtu discovery"
1170 	 */
1171 	if (mtu > IPV6_MAXPLEN - sizeof(struct tcphdr))
1172 		mtu = IPV6_MAXPLEN;
1173 	return mtu;
1174 }
1175 
1176 static unsigned int ip6_mtu(const struct dst_entry *dst)
1177 {
1178 	struct inet6_dev *idev;
1179 	unsigned int mtu = dst_metric_raw(dst, RTAX_MTU);
1180 
1181 	if (mtu)
1182 		return mtu;
1183 
1184 	mtu = IPV6_MIN_MTU;
1185 
1186 	rcu_read_lock();
1187 	idev = __in6_dev_get(dst->dev);
1188 	if (idev)
1189 		mtu = idev->cnf.mtu6;
1190 	rcu_read_unlock();
1191 
1192 	return mtu;
1193 }
1194 
1195 static struct dst_entry *icmp6_dst_gc_list;
1196 static DEFINE_SPINLOCK(icmp6_dst_lock);
1197 
1198 struct dst_entry *icmp6_dst_alloc(struct net_device *dev,
1199 				  struct neighbour *neigh,
1200 				  struct flowi6 *fl6)
1201 {
1202 	struct dst_entry *dst;
1203 	struct rt6_info *rt;
1204 	struct inet6_dev *idev = in6_dev_get(dev);
1205 	struct net *net = dev_net(dev);
1206 
1207 	if (unlikely(!idev))
1208 		return ERR_PTR(-ENODEV);
1209 
1210 	rt = ip6_dst_alloc(net, dev, 0, NULL);
1211 	if (unlikely(!rt)) {
1212 		in6_dev_put(idev);
1213 		dst = ERR_PTR(-ENOMEM);
1214 		goto out;
1215 	}
1216 
1217 	if (neigh)
1218 		neigh_hold(neigh);
1219 	else {
1220 		neigh = ip6_neigh_lookup(&rt->dst, NULL, &fl6->daddr);
1221 		if (IS_ERR(neigh)) {
1222 			in6_dev_put(idev);
1223 			dst_free(&rt->dst);
1224 			return ERR_CAST(neigh);
1225 		}
1226 	}
1227 
1228 	rt->dst.flags |= DST_HOST;
1229 	rt->dst.output  = ip6_output;
1230 	rt->n = neigh;
1231 	atomic_set(&rt->dst.__refcnt, 1);
1232 	rt->rt6i_dst.addr = fl6->daddr;
1233 	rt->rt6i_dst.plen = 128;
1234 	rt->rt6i_idev     = idev;
1235 	dst_metric_set(&rt->dst, RTAX_HOPLIMIT, 0);
1236 
1237 	spin_lock_bh(&icmp6_dst_lock);
1238 	rt->dst.next = icmp6_dst_gc_list;
1239 	icmp6_dst_gc_list = &rt->dst;
1240 	spin_unlock_bh(&icmp6_dst_lock);
1241 
1242 	fib6_force_start_gc(net);
1243 
1244 	dst = xfrm_lookup(net, &rt->dst, flowi6_to_flowi(fl6), NULL, 0);
1245 
1246 out:
1247 	return dst;
1248 }
1249 
1250 int icmp6_dst_gc(void)
1251 {
1252 	struct dst_entry *dst, **pprev;
1253 	int more = 0;
1254 
1255 	spin_lock_bh(&icmp6_dst_lock);
1256 	pprev = &icmp6_dst_gc_list;
1257 
1258 	while ((dst = *pprev) != NULL) {
1259 		if (!atomic_read(&dst->__refcnt)) {
1260 			*pprev = dst->next;
1261 			dst_free(dst);
1262 		} else {
1263 			pprev = &dst->next;
1264 			++more;
1265 		}
1266 	}
1267 
1268 	spin_unlock_bh(&icmp6_dst_lock);
1269 
1270 	return more;
1271 }
1272 
1273 static void icmp6_clean_all(int (*func)(struct rt6_info *rt, void *arg),
1274 			    void *arg)
1275 {
1276 	struct dst_entry *dst, **pprev;
1277 
1278 	spin_lock_bh(&icmp6_dst_lock);
1279 	pprev = &icmp6_dst_gc_list;
1280 	while ((dst = *pprev) != NULL) {
1281 		struct rt6_info *rt = (struct rt6_info *) dst;
1282 		if (func(rt, arg)) {
1283 			*pprev = dst->next;
1284 			dst_free(dst);
1285 		} else {
1286 			pprev = &dst->next;
1287 		}
1288 	}
1289 	spin_unlock_bh(&icmp6_dst_lock);
1290 }
1291 
1292 static int ip6_dst_gc(struct dst_ops *ops)
1293 {
1294 	unsigned long now = jiffies;
1295 	struct net *net = container_of(ops, struct net, ipv6.ip6_dst_ops);
1296 	int rt_min_interval = net->ipv6.sysctl.ip6_rt_gc_min_interval;
1297 	int rt_max_size = net->ipv6.sysctl.ip6_rt_max_size;
1298 	int rt_elasticity = net->ipv6.sysctl.ip6_rt_gc_elasticity;
1299 	int rt_gc_timeout = net->ipv6.sysctl.ip6_rt_gc_timeout;
1300 	unsigned long rt_last_gc = net->ipv6.ip6_rt_last_gc;
1301 	int entries;
1302 
1303 	entries = dst_entries_get_fast(ops);
1304 	if (time_after(rt_last_gc + rt_min_interval, now) &&
1305 	    entries <= rt_max_size)
1306 		goto out;
1307 
1308 	net->ipv6.ip6_rt_gc_expire++;
1309 	fib6_run_gc(net->ipv6.ip6_rt_gc_expire, net);
1310 	net->ipv6.ip6_rt_last_gc = now;
1311 	entries = dst_entries_get_slow(ops);
1312 	if (entries < ops->gc_thresh)
1313 		net->ipv6.ip6_rt_gc_expire = rt_gc_timeout>>1;
1314 out:
1315 	net->ipv6.ip6_rt_gc_expire -= net->ipv6.ip6_rt_gc_expire>>rt_elasticity;
1316 	return entries > rt_max_size;
1317 }
1318 
1319 /* Clean host part of a prefix. Not necessary in radix tree,
1320    but results in cleaner routing tables.
1321 
1322    Remove it only when all the things will work!
1323  */
1324 
1325 int ip6_dst_hoplimit(struct dst_entry *dst)
1326 {
1327 	int hoplimit = dst_metric_raw(dst, RTAX_HOPLIMIT);
1328 	if (hoplimit == 0) {
1329 		struct net_device *dev = dst->dev;
1330 		struct inet6_dev *idev;
1331 
1332 		rcu_read_lock();
1333 		idev = __in6_dev_get(dev);
1334 		if (idev)
1335 			hoplimit = idev->cnf.hop_limit;
1336 		else
1337 			hoplimit = dev_net(dev)->ipv6.devconf_all->hop_limit;
1338 		rcu_read_unlock();
1339 	}
1340 	return hoplimit;
1341 }
1342 EXPORT_SYMBOL(ip6_dst_hoplimit);
1343 
1344 /*
1345  *
1346  */
1347 
1348 int ip6_route_add(struct fib6_config *cfg)
1349 {
1350 	int err;
1351 	struct net *net = cfg->fc_nlinfo.nl_net;
1352 	struct rt6_info *rt = NULL;
1353 	struct net_device *dev = NULL;
1354 	struct inet6_dev *idev = NULL;
1355 	struct fib6_table *table;
1356 	int addr_type;
1357 
1358 	if (cfg->fc_dst_len > 128 || cfg->fc_src_len > 128)
1359 		return -EINVAL;
1360 #ifndef CONFIG_IPV6_SUBTREES
1361 	if (cfg->fc_src_len)
1362 		return -EINVAL;
1363 #endif
1364 	if (cfg->fc_ifindex) {
1365 		err = -ENODEV;
1366 		dev = dev_get_by_index(net, cfg->fc_ifindex);
1367 		if (!dev)
1368 			goto out;
1369 		idev = in6_dev_get(dev);
1370 		if (!idev)
1371 			goto out;
1372 	}
1373 
1374 	if (cfg->fc_metric == 0)
1375 		cfg->fc_metric = IP6_RT_PRIO_USER;
1376 
1377 	err = -ENOBUFS;
1378 	if (cfg->fc_nlinfo.nlh &&
1379 	    !(cfg->fc_nlinfo.nlh->nlmsg_flags & NLM_F_CREATE)) {
1380 		table = fib6_get_table(net, cfg->fc_table);
1381 		if (!table) {
1382 			pr_warn("NLM_F_CREATE should be specified when creating new route\n");
1383 			table = fib6_new_table(net, cfg->fc_table);
1384 		}
1385 	} else {
1386 		table = fib6_new_table(net, cfg->fc_table);
1387 	}
1388 
1389 	if (!table)
1390 		goto out;
1391 
1392 	rt = ip6_dst_alloc(net, NULL, DST_NOCOUNT, table);
1393 
1394 	if (!rt) {
1395 		err = -ENOMEM;
1396 		goto out;
1397 	}
1398 
1399 	if (cfg->fc_flags & RTF_EXPIRES)
1400 		rt6_set_expires(rt, jiffies +
1401 				clock_t_to_jiffies(cfg->fc_expires));
1402 	else
1403 		rt6_clean_expires(rt);
1404 
1405 	if (cfg->fc_protocol == RTPROT_UNSPEC)
1406 		cfg->fc_protocol = RTPROT_BOOT;
1407 	rt->rt6i_protocol = cfg->fc_protocol;
1408 
1409 	addr_type = ipv6_addr_type(&cfg->fc_dst);
1410 
1411 	if (addr_type & IPV6_ADDR_MULTICAST)
1412 		rt->dst.input = ip6_mc_input;
1413 	else if (cfg->fc_flags & RTF_LOCAL)
1414 		rt->dst.input = ip6_input;
1415 	else
1416 		rt->dst.input = ip6_forward;
1417 
1418 	rt->dst.output = ip6_output;
1419 
1420 	ipv6_addr_prefix(&rt->rt6i_dst.addr, &cfg->fc_dst, cfg->fc_dst_len);
1421 	rt->rt6i_dst.plen = cfg->fc_dst_len;
1422 	if (rt->rt6i_dst.plen == 128)
1423 	       rt->dst.flags |= DST_HOST;
1424 
1425 	if (!(rt->dst.flags & DST_HOST) && cfg->fc_mx) {
1426 		u32 *metrics = kzalloc(sizeof(u32) * RTAX_MAX, GFP_KERNEL);
1427 		if (!metrics) {
1428 			err = -ENOMEM;
1429 			goto out;
1430 		}
1431 		dst_init_metrics(&rt->dst, metrics, 0);
1432 	}
1433 #ifdef CONFIG_IPV6_SUBTREES
1434 	ipv6_addr_prefix(&rt->rt6i_src.addr, &cfg->fc_src, cfg->fc_src_len);
1435 	rt->rt6i_src.plen = cfg->fc_src_len;
1436 #endif
1437 
1438 	rt->rt6i_metric = cfg->fc_metric;
1439 
1440 	/* We cannot add true routes via loopback here,
1441 	   they would result in kernel looping; promote them to reject routes
1442 	 */
1443 	if ((cfg->fc_flags & RTF_REJECT) ||
1444 	    (dev && (dev->flags & IFF_LOOPBACK) &&
1445 	     !(addr_type & IPV6_ADDR_LOOPBACK) &&
1446 	     !(cfg->fc_flags & RTF_LOCAL))) {
1447 		/* hold loopback dev/idev if we haven't done so. */
1448 		if (dev != net->loopback_dev) {
1449 			if (dev) {
1450 				dev_put(dev);
1451 				in6_dev_put(idev);
1452 			}
1453 			dev = net->loopback_dev;
1454 			dev_hold(dev);
1455 			idev = in6_dev_get(dev);
1456 			if (!idev) {
1457 				err = -ENODEV;
1458 				goto out;
1459 			}
1460 		}
1461 		rt->dst.output = ip6_pkt_discard_out;
1462 		rt->dst.input = ip6_pkt_discard;
1463 		rt->rt6i_flags = RTF_REJECT|RTF_NONEXTHOP;
1464 		switch (cfg->fc_type) {
1465 		case RTN_BLACKHOLE:
1466 			rt->dst.error = -EINVAL;
1467 			break;
1468 		case RTN_PROHIBIT:
1469 			rt->dst.error = -EACCES;
1470 			break;
1471 		case RTN_THROW:
1472 			rt->dst.error = -EAGAIN;
1473 			break;
1474 		default:
1475 			rt->dst.error = -ENETUNREACH;
1476 			break;
1477 		}
1478 		goto install_route;
1479 	}
1480 
1481 	if (cfg->fc_flags & RTF_GATEWAY) {
1482 		const struct in6_addr *gw_addr;
1483 		int gwa_type;
1484 
1485 		gw_addr = &cfg->fc_gateway;
1486 		rt->rt6i_gateway = *gw_addr;
1487 		gwa_type = ipv6_addr_type(gw_addr);
1488 
1489 		if (gwa_type != (IPV6_ADDR_LINKLOCAL|IPV6_ADDR_UNICAST)) {
1490 			struct rt6_info *grt;
1491 
1492 			/* IPv6 strictly inhibits using not link-local
1493 			   addresses as nexthop address.
1494 			   Otherwise, router will not able to send redirects.
1495 			   It is very good, but in some (rare!) circumstances
1496 			   (SIT, PtP, NBMA NOARP links) it is handy to allow
1497 			   some exceptions. --ANK
1498 			 */
1499 			err = -EINVAL;
1500 			if (!(gwa_type & IPV6_ADDR_UNICAST))
1501 				goto out;
1502 
1503 			grt = rt6_lookup(net, gw_addr, NULL, cfg->fc_ifindex, 1);
1504 
1505 			err = -EHOSTUNREACH;
1506 			if (!grt)
1507 				goto out;
1508 			if (dev) {
1509 				if (dev != grt->dst.dev) {
1510 					dst_release(&grt->dst);
1511 					goto out;
1512 				}
1513 			} else {
1514 				dev = grt->dst.dev;
1515 				idev = grt->rt6i_idev;
1516 				dev_hold(dev);
1517 				in6_dev_hold(grt->rt6i_idev);
1518 			}
1519 			if (!(grt->rt6i_flags & RTF_GATEWAY))
1520 				err = 0;
1521 			dst_release(&grt->dst);
1522 
1523 			if (err)
1524 				goto out;
1525 		}
1526 		err = -EINVAL;
1527 		if (!dev || (dev->flags & IFF_LOOPBACK))
1528 			goto out;
1529 	}
1530 
1531 	err = -ENODEV;
1532 	if (!dev)
1533 		goto out;
1534 
1535 	if (!ipv6_addr_any(&cfg->fc_prefsrc)) {
1536 		if (!ipv6_chk_addr(net, &cfg->fc_prefsrc, dev, 0)) {
1537 			err = -EINVAL;
1538 			goto out;
1539 		}
1540 		rt->rt6i_prefsrc.addr = cfg->fc_prefsrc;
1541 		rt->rt6i_prefsrc.plen = 128;
1542 	} else
1543 		rt->rt6i_prefsrc.plen = 0;
1544 
1545 	if (cfg->fc_flags & (RTF_GATEWAY | RTF_NONEXTHOP)) {
1546 		err = rt6_bind_neighbour(rt, dev);
1547 		if (err)
1548 			goto out;
1549 	}
1550 
1551 	rt->rt6i_flags = cfg->fc_flags;
1552 
1553 install_route:
1554 	if (cfg->fc_mx) {
1555 		struct nlattr *nla;
1556 		int remaining;
1557 
1558 		nla_for_each_attr(nla, cfg->fc_mx, cfg->fc_mx_len, remaining) {
1559 			int type = nla_type(nla);
1560 
1561 			if (type) {
1562 				if (type > RTAX_MAX) {
1563 					err = -EINVAL;
1564 					goto out;
1565 				}
1566 
1567 				dst_metric_set(&rt->dst, type, nla_get_u32(nla));
1568 			}
1569 		}
1570 	}
1571 
1572 	rt->dst.dev = dev;
1573 	rt->rt6i_idev = idev;
1574 	rt->rt6i_table = table;
1575 
1576 	cfg->fc_nlinfo.nl_net = dev_net(dev);
1577 
1578 	return __ip6_ins_rt(rt, &cfg->fc_nlinfo);
1579 
1580 out:
1581 	if (dev)
1582 		dev_put(dev);
1583 	if (idev)
1584 		in6_dev_put(idev);
1585 	if (rt)
1586 		dst_free(&rt->dst);
1587 	return err;
1588 }
1589 
1590 static int __ip6_del_rt(struct rt6_info *rt, struct nl_info *info)
1591 {
1592 	int err;
1593 	struct fib6_table *table;
1594 	struct net *net = dev_net(rt->dst.dev);
1595 
1596 	if (rt == net->ipv6.ip6_null_entry) {
1597 		err = -ENOENT;
1598 		goto out;
1599 	}
1600 
1601 	table = rt->rt6i_table;
1602 	write_lock_bh(&table->tb6_lock);
1603 	err = fib6_del(rt, info);
1604 	write_unlock_bh(&table->tb6_lock);
1605 
1606 out:
1607 	dst_release(&rt->dst);
1608 	return err;
1609 }
1610 
1611 int ip6_del_rt(struct rt6_info *rt)
1612 {
1613 	struct nl_info info = {
1614 		.nl_net = dev_net(rt->dst.dev),
1615 	};
1616 	return __ip6_del_rt(rt, &info);
1617 }
1618 
1619 static int ip6_route_del(struct fib6_config *cfg)
1620 {
1621 	struct fib6_table *table;
1622 	struct fib6_node *fn;
1623 	struct rt6_info *rt;
1624 	int err = -ESRCH;
1625 
1626 	table = fib6_get_table(cfg->fc_nlinfo.nl_net, cfg->fc_table);
1627 	if (!table)
1628 		return err;
1629 
1630 	read_lock_bh(&table->tb6_lock);
1631 
1632 	fn = fib6_locate(&table->tb6_root,
1633 			 &cfg->fc_dst, cfg->fc_dst_len,
1634 			 &cfg->fc_src, cfg->fc_src_len);
1635 
1636 	if (fn) {
1637 		for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
1638 			if (cfg->fc_ifindex &&
1639 			    (!rt->dst.dev ||
1640 			     rt->dst.dev->ifindex != cfg->fc_ifindex))
1641 				continue;
1642 			if (cfg->fc_flags & RTF_GATEWAY &&
1643 			    !ipv6_addr_equal(&cfg->fc_gateway, &rt->rt6i_gateway))
1644 				continue;
1645 			if (cfg->fc_metric && cfg->fc_metric != rt->rt6i_metric)
1646 				continue;
1647 			dst_hold(&rt->dst);
1648 			read_unlock_bh(&table->tb6_lock);
1649 
1650 			return __ip6_del_rt(rt, &cfg->fc_nlinfo);
1651 		}
1652 	}
1653 	read_unlock_bh(&table->tb6_lock);
1654 
1655 	return err;
1656 }
1657 
1658 static void rt6_do_redirect(struct dst_entry *dst, struct sock *sk, struct sk_buff *skb)
1659 {
1660 	struct net *net = dev_net(skb->dev);
1661 	struct netevent_redirect netevent;
1662 	struct rt6_info *rt, *nrt = NULL;
1663 	const struct in6_addr *target;
1664 	struct ndisc_options ndopts;
1665 	const struct in6_addr *dest;
1666 	struct neighbour *old_neigh;
1667 	struct inet6_dev *in6_dev;
1668 	struct neighbour *neigh;
1669 	struct icmp6hdr *icmph;
1670 	int optlen, on_link;
1671 	u8 *lladdr;
1672 
1673 	optlen = skb->tail - skb->transport_header;
1674 	optlen -= sizeof(struct icmp6hdr) + 2 * sizeof(struct in6_addr);
1675 
1676 	if (optlen < 0) {
1677 		net_dbg_ratelimited("rt6_do_redirect: packet too short\n");
1678 		return;
1679 	}
1680 
1681 	icmph = icmp6_hdr(skb);
1682 	target = (const struct in6_addr *) (icmph + 1);
1683 	dest = target + 1;
1684 
1685 	if (ipv6_addr_is_multicast(dest)) {
1686 		net_dbg_ratelimited("rt6_do_redirect: destination address is multicast\n");
1687 		return;
1688 	}
1689 
1690 	on_link = 0;
1691 	if (ipv6_addr_equal(dest, target)) {
1692 		on_link = 1;
1693 	} else if (ipv6_addr_type(target) !=
1694 		   (IPV6_ADDR_UNICAST|IPV6_ADDR_LINKLOCAL)) {
1695 		net_dbg_ratelimited("rt6_do_redirect: target address is not link-local unicast\n");
1696 		return;
1697 	}
1698 
1699 	in6_dev = __in6_dev_get(skb->dev);
1700 	if (!in6_dev)
1701 		return;
1702 	if (in6_dev->cnf.forwarding || !in6_dev->cnf.accept_redirects)
1703 		return;
1704 
1705 	/* RFC2461 8.1:
1706 	 *	The IP source address of the Redirect MUST be the same as the current
1707 	 *	first-hop router for the specified ICMP Destination Address.
1708 	 */
1709 
1710 	if (!ndisc_parse_options((u8*)(dest + 1), optlen, &ndopts)) {
1711 		net_dbg_ratelimited("rt6_redirect: invalid ND options\n");
1712 		return;
1713 	}
1714 
1715 	lladdr = NULL;
1716 	if (ndopts.nd_opts_tgt_lladdr) {
1717 		lladdr = ndisc_opt_addr_data(ndopts.nd_opts_tgt_lladdr,
1718 					     skb->dev);
1719 		if (!lladdr) {
1720 			net_dbg_ratelimited("rt6_redirect: invalid link-layer address length\n");
1721 			return;
1722 		}
1723 	}
1724 
1725 	rt = (struct rt6_info *) dst;
1726 	if (rt == net->ipv6.ip6_null_entry) {
1727 		net_dbg_ratelimited("rt6_redirect: source isn't a valid nexthop for redirect target\n");
1728 		return;
1729 	}
1730 
1731 	/* Redirect received -> path was valid.
1732 	 * Look, redirects are sent only in response to data packets,
1733 	 * so that this nexthop apparently is reachable. --ANK
1734 	 */
1735 	dst_confirm(&rt->dst);
1736 
1737 	neigh = __neigh_lookup(&nd_tbl, target, skb->dev, 1);
1738 	if (!neigh)
1739 		return;
1740 
1741 	/* Duplicate redirect: silently ignore. */
1742 	old_neigh = rt->n;
1743 	if (neigh == old_neigh)
1744 		goto out;
1745 
1746 	/*
1747 	 *	We have finally decided to accept it.
1748 	 */
1749 
1750 	neigh_update(neigh, lladdr, NUD_STALE,
1751 		     NEIGH_UPDATE_F_WEAK_OVERRIDE|
1752 		     NEIGH_UPDATE_F_OVERRIDE|
1753 		     (on_link ? 0 : (NEIGH_UPDATE_F_OVERRIDE_ISROUTER|
1754 				     NEIGH_UPDATE_F_ISROUTER))
1755 		     );
1756 
1757 	nrt = ip6_rt_copy(rt, dest);
1758 	if (!nrt)
1759 		goto out;
1760 
1761 	nrt->rt6i_flags = RTF_GATEWAY|RTF_UP|RTF_DYNAMIC|RTF_CACHE;
1762 	if (on_link)
1763 		nrt->rt6i_flags &= ~RTF_GATEWAY;
1764 
1765 	nrt->rt6i_gateway = *(struct in6_addr *)neigh->primary_key;
1766 	nrt->n = neigh_clone(neigh);
1767 
1768 	if (ip6_ins_rt(nrt))
1769 		goto out;
1770 
1771 	netevent.old = &rt->dst;
1772 	netevent.old_neigh = old_neigh;
1773 	netevent.new = &nrt->dst;
1774 	netevent.new_neigh = neigh;
1775 	netevent.daddr = dest;
1776 	call_netevent_notifiers(NETEVENT_REDIRECT, &netevent);
1777 
1778 	if (rt->rt6i_flags & RTF_CACHE) {
1779 		rt = (struct rt6_info *) dst_clone(&rt->dst);
1780 		ip6_del_rt(rt);
1781 	}
1782 
1783 out:
1784 	neigh_release(neigh);
1785 }
1786 
1787 /*
1788  *	Misc support functions
1789  */
1790 
1791 static struct rt6_info *ip6_rt_copy(struct rt6_info *ort,
1792 				    const struct in6_addr *dest)
1793 {
1794 	struct net *net = dev_net(ort->dst.dev);
1795 	struct rt6_info *rt = ip6_dst_alloc(net, ort->dst.dev, 0,
1796 					    ort->rt6i_table);
1797 
1798 	if (rt) {
1799 		rt->dst.input = ort->dst.input;
1800 		rt->dst.output = ort->dst.output;
1801 		rt->dst.flags |= DST_HOST;
1802 
1803 		rt->rt6i_dst.addr = *dest;
1804 		rt->rt6i_dst.plen = 128;
1805 		dst_copy_metrics(&rt->dst, &ort->dst);
1806 		rt->dst.error = ort->dst.error;
1807 		rt->rt6i_idev = ort->rt6i_idev;
1808 		if (rt->rt6i_idev)
1809 			in6_dev_hold(rt->rt6i_idev);
1810 		rt->dst.lastuse = jiffies;
1811 
1812 		rt->rt6i_gateway = ort->rt6i_gateway;
1813 		rt->rt6i_flags = ort->rt6i_flags;
1814 		if ((ort->rt6i_flags & (RTF_DEFAULT | RTF_ADDRCONF)) ==
1815 		    (RTF_DEFAULT | RTF_ADDRCONF))
1816 			rt6_set_from(rt, ort);
1817 		else
1818 			rt6_clean_expires(rt);
1819 		rt->rt6i_metric = 0;
1820 
1821 #ifdef CONFIG_IPV6_SUBTREES
1822 		memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
1823 #endif
1824 		memcpy(&rt->rt6i_prefsrc, &ort->rt6i_prefsrc, sizeof(struct rt6key));
1825 		rt->rt6i_table = ort->rt6i_table;
1826 	}
1827 	return rt;
1828 }
1829 
1830 #ifdef CONFIG_IPV6_ROUTE_INFO
1831 static struct rt6_info *rt6_get_route_info(struct net *net,
1832 					   const struct in6_addr *prefix, int prefixlen,
1833 					   const struct in6_addr *gwaddr, int ifindex)
1834 {
1835 	struct fib6_node *fn;
1836 	struct rt6_info *rt = NULL;
1837 	struct fib6_table *table;
1838 
1839 	table = fib6_get_table(net, RT6_TABLE_INFO);
1840 	if (!table)
1841 		return NULL;
1842 
1843 	read_lock_bh(&table->tb6_lock);
1844 	fn = fib6_locate(&table->tb6_root, prefix ,prefixlen, NULL, 0);
1845 	if (!fn)
1846 		goto out;
1847 
1848 	for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
1849 		if (rt->dst.dev->ifindex != ifindex)
1850 			continue;
1851 		if ((rt->rt6i_flags & (RTF_ROUTEINFO|RTF_GATEWAY)) != (RTF_ROUTEINFO|RTF_GATEWAY))
1852 			continue;
1853 		if (!ipv6_addr_equal(&rt->rt6i_gateway, gwaddr))
1854 			continue;
1855 		dst_hold(&rt->dst);
1856 		break;
1857 	}
1858 out:
1859 	read_unlock_bh(&table->tb6_lock);
1860 	return rt;
1861 }
1862 
1863 static struct rt6_info *rt6_add_route_info(struct net *net,
1864 					   const struct in6_addr *prefix, int prefixlen,
1865 					   const struct in6_addr *gwaddr, int ifindex,
1866 					   unsigned int pref)
1867 {
1868 	struct fib6_config cfg = {
1869 		.fc_table	= RT6_TABLE_INFO,
1870 		.fc_metric	= IP6_RT_PRIO_USER,
1871 		.fc_ifindex	= ifindex,
1872 		.fc_dst_len	= prefixlen,
1873 		.fc_flags	= RTF_GATEWAY | RTF_ADDRCONF | RTF_ROUTEINFO |
1874 				  RTF_UP | RTF_PREF(pref),
1875 		.fc_nlinfo.portid = 0,
1876 		.fc_nlinfo.nlh = NULL,
1877 		.fc_nlinfo.nl_net = net,
1878 	};
1879 
1880 	cfg.fc_dst = *prefix;
1881 	cfg.fc_gateway = *gwaddr;
1882 
1883 	/* We should treat it as a default route if prefix length is 0. */
1884 	if (!prefixlen)
1885 		cfg.fc_flags |= RTF_DEFAULT;
1886 
1887 	ip6_route_add(&cfg);
1888 
1889 	return rt6_get_route_info(net, prefix, prefixlen, gwaddr, ifindex);
1890 }
1891 #endif
1892 
1893 struct rt6_info *rt6_get_dflt_router(const struct in6_addr *addr, struct net_device *dev)
1894 {
1895 	struct rt6_info *rt;
1896 	struct fib6_table *table;
1897 
1898 	table = fib6_get_table(dev_net(dev), RT6_TABLE_DFLT);
1899 	if (!table)
1900 		return NULL;
1901 
1902 	read_lock_bh(&table->tb6_lock);
1903 	for (rt = table->tb6_root.leaf; rt; rt=rt->dst.rt6_next) {
1904 		if (dev == rt->dst.dev &&
1905 		    ((rt->rt6i_flags & (RTF_ADDRCONF | RTF_DEFAULT)) == (RTF_ADDRCONF | RTF_DEFAULT)) &&
1906 		    ipv6_addr_equal(&rt->rt6i_gateway, addr))
1907 			break;
1908 	}
1909 	if (rt)
1910 		dst_hold(&rt->dst);
1911 	read_unlock_bh(&table->tb6_lock);
1912 	return rt;
1913 }
1914 
1915 struct rt6_info *rt6_add_dflt_router(const struct in6_addr *gwaddr,
1916 				     struct net_device *dev,
1917 				     unsigned int pref)
1918 {
1919 	struct fib6_config cfg = {
1920 		.fc_table	= RT6_TABLE_DFLT,
1921 		.fc_metric	= IP6_RT_PRIO_USER,
1922 		.fc_ifindex	= dev->ifindex,
1923 		.fc_flags	= RTF_GATEWAY | RTF_ADDRCONF | RTF_DEFAULT |
1924 				  RTF_UP | RTF_EXPIRES | RTF_PREF(pref),
1925 		.fc_nlinfo.portid = 0,
1926 		.fc_nlinfo.nlh = NULL,
1927 		.fc_nlinfo.nl_net = dev_net(dev),
1928 	};
1929 
1930 	cfg.fc_gateway = *gwaddr;
1931 
1932 	ip6_route_add(&cfg);
1933 
1934 	return rt6_get_dflt_router(gwaddr, dev);
1935 }
1936 
1937 void rt6_purge_dflt_routers(struct net *net)
1938 {
1939 	struct rt6_info *rt;
1940 	struct fib6_table *table;
1941 
1942 	/* NOTE: Keep consistent with rt6_get_dflt_router */
1943 	table = fib6_get_table(net, RT6_TABLE_DFLT);
1944 	if (!table)
1945 		return;
1946 
1947 restart:
1948 	read_lock_bh(&table->tb6_lock);
1949 	for (rt = table->tb6_root.leaf; rt; rt = rt->dst.rt6_next) {
1950 		if (rt->rt6i_flags & (RTF_DEFAULT | RTF_ADDRCONF)) {
1951 			dst_hold(&rt->dst);
1952 			read_unlock_bh(&table->tb6_lock);
1953 			ip6_del_rt(rt);
1954 			goto restart;
1955 		}
1956 	}
1957 	read_unlock_bh(&table->tb6_lock);
1958 }
1959 
1960 static void rtmsg_to_fib6_config(struct net *net,
1961 				 struct in6_rtmsg *rtmsg,
1962 				 struct fib6_config *cfg)
1963 {
1964 	memset(cfg, 0, sizeof(*cfg));
1965 
1966 	cfg->fc_table = RT6_TABLE_MAIN;
1967 	cfg->fc_ifindex = rtmsg->rtmsg_ifindex;
1968 	cfg->fc_metric = rtmsg->rtmsg_metric;
1969 	cfg->fc_expires = rtmsg->rtmsg_info;
1970 	cfg->fc_dst_len = rtmsg->rtmsg_dst_len;
1971 	cfg->fc_src_len = rtmsg->rtmsg_src_len;
1972 	cfg->fc_flags = rtmsg->rtmsg_flags;
1973 
1974 	cfg->fc_nlinfo.nl_net = net;
1975 
1976 	cfg->fc_dst = rtmsg->rtmsg_dst;
1977 	cfg->fc_src = rtmsg->rtmsg_src;
1978 	cfg->fc_gateway = rtmsg->rtmsg_gateway;
1979 }
1980 
1981 int ipv6_route_ioctl(struct net *net, unsigned int cmd, void __user *arg)
1982 {
1983 	struct fib6_config cfg;
1984 	struct in6_rtmsg rtmsg;
1985 	int err;
1986 
1987 	switch(cmd) {
1988 	case SIOCADDRT:		/* Add a route */
1989 	case SIOCDELRT:		/* Delete a route */
1990 		if (!capable(CAP_NET_ADMIN))
1991 			return -EPERM;
1992 		err = copy_from_user(&rtmsg, arg,
1993 				     sizeof(struct in6_rtmsg));
1994 		if (err)
1995 			return -EFAULT;
1996 
1997 		rtmsg_to_fib6_config(net, &rtmsg, &cfg);
1998 
1999 		rtnl_lock();
2000 		switch (cmd) {
2001 		case SIOCADDRT:
2002 			err = ip6_route_add(&cfg);
2003 			break;
2004 		case SIOCDELRT:
2005 			err = ip6_route_del(&cfg);
2006 			break;
2007 		default:
2008 			err = -EINVAL;
2009 		}
2010 		rtnl_unlock();
2011 
2012 		return err;
2013 	}
2014 
2015 	return -EINVAL;
2016 }
2017 
2018 /*
2019  *	Drop the packet on the floor
2020  */
2021 
2022 static int ip6_pkt_drop(struct sk_buff *skb, u8 code, int ipstats_mib_noroutes)
2023 {
2024 	int type;
2025 	struct dst_entry *dst = skb_dst(skb);
2026 	switch (ipstats_mib_noroutes) {
2027 	case IPSTATS_MIB_INNOROUTES:
2028 		type = ipv6_addr_type(&ipv6_hdr(skb)->daddr);
2029 		if (type == IPV6_ADDR_ANY) {
2030 			IP6_INC_STATS(dev_net(dst->dev), ip6_dst_idev(dst),
2031 				      IPSTATS_MIB_INADDRERRORS);
2032 			break;
2033 		}
2034 		/* FALLTHROUGH */
2035 	case IPSTATS_MIB_OUTNOROUTES:
2036 		IP6_INC_STATS(dev_net(dst->dev), ip6_dst_idev(dst),
2037 			      ipstats_mib_noroutes);
2038 		break;
2039 	}
2040 	icmpv6_send(skb, ICMPV6_DEST_UNREACH, code, 0);
2041 	kfree_skb(skb);
2042 	return 0;
2043 }
2044 
2045 static int ip6_pkt_discard(struct sk_buff *skb)
2046 {
2047 	return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_INNOROUTES);
2048 }
2049 
2050 static int ip6_pkt_discard_out(struct sk_buff *skb)
2051 {
2052 	skb->dev = skb_dst(skb)->dev;
2053 	return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_OUTNOROUTES);
2054 }
2055 
2056 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
2057 
2058 static int ip6_pkt_prohibit(struct sk_buff *skb)
2059 {
2060 	return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_INNOROUTES);
2061 }
2062 
2063 static int ip6_pkt_prohibit_out(struct sk_buff *skb)
2064 {
2065 	skb->dev = skb_dst(skb)->dev;
2066 	return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_OUTNOROUTES);
2067 }
2068 
2069 #endif
2070 
2071 /*
2072  *	Allocate a dst for local (unicast / anycast) address.
2073  */
2074 
2075 struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev,
2076 				    const struct in6_addr *addr,
2077 				    bool anycast)
2078 {
2079 	struct net *net = dev_net(idev->dev);
2080 	struct rt6_info *rt = ip6_dst_alloc(net, net->loopback_dev, 0, NULL);
2081 	int err;
2082 
2083 	if (!rt) {
2084 		net_warn_ratelimited("Maximum number of routes reached, consider increasing route/max_size\n");
2085 		return ERR_PTR(-ENOMEM);
2086 	}
2087 
2088 	in6_dev_hold(idev);
2089 
2090 	rt->dst.flags |= DST_HOST;
2091 	rt->dst.input = ip6_input;
2092 	rt->dst.output = ip6_output;
2093 	rt->rt6i_idev = idev;
2094 
2095 	rt->rt6i_flags = RTF_UP | RTF_NONEXTHOP;
2096 	if (anycast)
2097 		rt->rt6i_flags |= RTF_ANYCAST;
2098 	else
2099 		rt->rt6i_flags |= RTF_LOCAL;
2100 	err = rt6_bind_neighbour(rt, rt->dst.dev);
2101 	if (err) {
2102 		dst_free(&rt->dst);
2103 		return ERR_PTR(err);
2104 	}
2105 
2106 	rt->rt6i_dst.addr = *addr;
2107 	rt->rt6i_dst.plen = 128;
2108 	rt->rt6i_table = fib6_get_table(net, RT6_TABLE_LOCAL);
2109 
2110 	atomic_set(&rt->dst.__refcnt, 1);
2111 
2112 	return rt;
2113 }
2114 
2115 int ip6_route_get_saddr(struct net *net,
2116 			struct rt6_info *rt,
2117 			const struct in6_addr *daddr,
2118 			unsigned int prefs,
2119 			struct in6_addr *saddr)
2120 {
2121 	struct inet6_dev *idev = ip6_dst_idev((struct dst_entry*)rt);
2122 	int err = 0;
2123 	if (rt->rt6i_prefsrc.plen)
2124 		*saddr = rt->rt6i_prefsrc.addr;
2125 	else
2126 		err = ipv6_dev_get_saddr(net, idev ? idev->dev : NULL,
2127 					 daddr, prefs, saddr);
2128 	return err;
2129 }
2130 
2131 /* remove deleted ip from prefsrc entries */
2132 struct arg_dev_net_ip {
2133 	struct net_device *dev;
2134 	struct net *net;
2135 	struct in6_addr *addr;
2136 };
2137 
2138 static int fib6_remove_prefsrc(struct rt6_info *rt, void *arg)
2139 {
2140 	struct net_device *dev = ((struct arg_dev_net_ip *)arg)->dev;
2141 	struct net *net = ((struct arg_dev_net_ip *)arg)->net;
2142 	struct in6_addr *addr = ((struct arg_dev_net_ip *)arg)->addr;
2143 
2144 	if (((void *)rt->dst.dev == dev || !dev) &&
2145 	    rt != net->ipv6.ip6_null_entry &&
2146 	    ipv6_addr_equal(addr, &rt->rt6i_prefsrc.addr)) {
2147 		/* remove prefsrc entry */
2148 		rt->rt6i_prefsrc.plen = 0;
2149 	}
2150 	return 0;
2151 }
2152 
2153 void rt6_remove_prefsrc(struct inet6_ifaddr *ifp)
2154 {
2155 	struct net *net = dev_net(ifp->idev->dev);
2156 	struct arg_dev_net_ip adni = {
2157 		.dev = ifp->idev->dev,
2158 		.net = net,
2159 		.addr = &ifp->addr,
2160 	};
2161 	fib6_clean_all(net, fib6_remove_prefsrc, 0, &adni);
2162 }
2163 
2164 struct arg_dev_net {
2165 	struct net_device *dev;
2166 	struct net *net;
2167 };
2168 
2169 static int fib6_ifdown(struct rt6_info *rt, void *arg)
2170 {
2171 	const struct arg_dev_net *adn = arg;
2172 	const struct net_device *dev = adn->dev;
2173 
2174 	if ((rt->dst.dev == dev || !dev) &&
2175 	    rt != adn->net->ipv6.ip6_null_entry)
2176 		return -1;
2177 
2178 	return 0;
2179 }
2180 
2181 void rt6_ifdown(struct net *net, struct net_device *dev)
2182 {
2183 	struct arg_dev_net adn = {
2184 		.dev = dev,
2185 		.net = net,
2186 	};
2187 
2188 	fib6_clean_all(net, fib6_ifdown, 0, &adn);
2189 	icmp6_clean_all(fib6_ifdown, &adn);
2190 }
2191 
2192 struct rt6_mtu_change_arg {
2193 	struct net_device *dev;
2194 	unsigned int mtu;
2195 };
2196 
2197 static int rt6_mtu_change_route(struct rt6_info *rt, void *p_arg)
2198 {
2199 	struct rt6_mtu_change_arg *arg = (struct rt6_mtu_change_arg *) p_arg;
2200 	struct inet6_dev *idev;
2201 
2202 	/* In IPv6 pmtu discovery is not optional,
2203 	   so that RTAX_MTU lock cannot disable it.
2204 	   We still use this lock to block changes
2205 	   caused by addrconf/ndisc.
2206 	*/
2207 
2208 	idev = __in6_dev_get(arg->dev);
2209 	if (!idev)
2210 		return 0;
2211 
2212 	/* For administrative MTU increase, there is no way to discover
2213 	   IPv6 PMTU increase, so PMTU increase should be updated here.
2214 	   Since RFC 1981 doesn't include administrative MTU increase
2215 	   update PMTU increase is a MUST. (i.e. jumbo frame)
2216 	 */
2217 	/*
2218 	   If new MTU is less than route PMTU, this new MTU will be the
2219 	   lowest MTU in the path, update the route PMTU to reflect PMTU
2220 	   decreases; if new MTU is greater than route PMTU, and the
2221 	   old MTU is the lowest MTU in the path, update the route PMTU
2222 	   to reflect the increase. In this case if the other nodes' MTU
2223 	   also have the lowest MTU, TOO BIG MESSAGE will be lead to
2224 	   PMTU discouvery.
2225 	 */
2226 	if (rt->dst.dev == arg->dev &&
2227 	    !dst_metric_locked(&rt->dst, RTAX_MTU) &&
2228 	    (dst_mtu(&rt->dst) >= arg->mtu ||
2229 	     (dst_mtu(&rt->dst) < arg->mtu &&
2230 	      dst_mtu(&rt->dst) == idev->cnf.mtu6))) {
2231 		dst_metric_set(&rt->dst, RTAX_MTU, arg->mtu);
2232 	}
2233 	return 0;
2234 }
2235 
2236 void rt6_mtu_change(struct net_device *dev, unsigned int mtu)
2237 {
2238 	struct rt6_mtu_change_arg arg = {
2239 		.dev = dev,
2240 		.mtu = mtu,
2241 	};
2242 
2243 	fib6_clean_all(dev_net(dev), rt6_mtu_change_route, 0, &arg);
2244 }
2245 
2246 static const struct nla_policy rtm_ipv6_policy[RTA_MAX+1] = {
2247 	[RTA_GATEWAY]           = { .len = sizeof(struct in6_addr) },
2248 	[RTA_OIF]               = { .type = NLA_U32 },
2249 	[RTA_IIF]		= { .type = NLA_U32 },
2250 	[RTA_PRIORITY]          = { .type = NLA_U32 },
2251 	[RTA_METRICS]           = { .type = NLA_NESTED },
2252 };
2253 
2254 static int rtm_to_fib6_config(struct sk_buff *skb, struct nlmsghdr *nlh,
2255 			      struct fib6_config *cfg)
2256 {
2257 	struct rtmsg *rtm;
2258 	struct nlattr *tb[RTA_MAX+1];
2259 	int err;
2260 
2261 	err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy);
2262 	if (err < 0)
2263 		goto errout;
2264 
2265 	err = -EINVAL;
2266 	rtm = nlmsg_data(nlh);
2267 	memset(cfg, 0, sizeof(*cfg));
2268 
2269 	cfg->fc_table = rtm->rtm_table;
2270 	cfg->fc_dst_len = rtm->rtm_dst_len;
2271 	cfg->fc_src_len = rtm->rtm_src_len;
2272 	cfg->fc_flags = RTF_UP;
2273 	cfg->fc_protocol = rtm->rtm_protocol;
2274 	cfg->fc_type = rtm->rtm_type;
2275 
2276 	if (rtm->rtm_type == RTN_UNREACHABLE ||
2277 	    rtm->rtm_type == RTN_BLACKHOLE ||
2278 	    rtm->rtm_type == RTN_PROHIBIT ||
2279 	    rtm->rtm_type == RTN_THROW)
2280 		cfg->fc_flags |= RTF_REJECT;
2281 
2282 	if (rtm->rtm_type == RTN_LOCAL)
2283 		cfg->fc_flags |= RTF_LOCAL;
2284 
2285 	cfg->fc_nlinfo.portid = NETLINK_CB(skb).portid;
2286 	cfg->fc_nlinfo.nlh = nlh;
2287 	cfg->fc_nlinfo.nl_net = sock_net(skb->sk);
2288 
2289 	if (tb[RTA_GATEWAY]) {
2290 		nla_memcpy(&cfg->fc_gateway, tb[RTA_GATEWAY], 16);
2291 		cfg->fc_flags |= RTF_GATEWAY;
2292 	}
2293 
2294 	if (tb[RTA_DST]) {
2295 		int plen = (rtm->rtm_dst_len + 7) >> 3;
2296 
2297 		if (nla_len(tb[RTA_DST]) < plen)
2298 			goto errout;
2299 
2300 		nla_memcpy(&cfg->fc_dst, tb[RTA_DST], plen);
2301 	}
2302 
2303 	if (tb[RTA_SRC]) {
2304 		int plen = (rtm->rtm_src_len + 7) >> 3;
2305 
2306 		if (nla_len(tb[RTA_SRC]) < plen)
2307 			goto errout;
2308 
2309 		nla_memcpy(&cfg->fc_src, tb[RTA_SRC], plen);
2310 	}
2311 
2312 	if (tb[RTA_PREFSRC])
2313 		nla_memcpy(&cfg->fc_prefsrc, tb[RTA_PREFSRC], 16);
2314 
2315 	if (tb[RTA_OIF])
2316 		cfg->fc_ifindex = nla_get_u32(tb[RTA_OIF]);
2317 
2318 	if (tb[RTA_PRIORITY])
2319 		cfg->fc_metric = nla_get_u32(tb[RTA_PRIORITY]);
2320 
2321 	if (tb[RTA_METRICS]) {
2322 		cfg->fc_mx = nla_data(tb[RTA_METRICS]);
2323 		cfg->fc_mx_len = nla_len(tb[RTA_METRICS]);
2324 	}
2325 
2326 	if (tb[RTA_TABLE])
2327 		cfg->fc_table = nla_get_u32(tb[RTA_TABLE]);
2328 
2329 	err = 0;
2330 errout:
2331 	return err;
2332 }
2333 
2334 static int inet6_rtm_delroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
2335 {
2336 	struct fib6_config cfg;
2337 	int err;
2338 
2339 	err = rtm_to_fib6_config(skb, nlh, &cfg);
2340 	if (err < 0)
2341 		return err;
2342 
2343 	return ip6_route_del(&cfg);
2344 }
2345 
2346 static int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
2347 {
2348 	struct fib6_config cfg;
2349 	int err;
2350 
2351 	err = rtm_to_fib6_config(skb, nlh, &cfg);
2352 	if (err < 0)
2353 		return err;
2354 
2355 	return ip6_route_add(&cfg);
2356 }
2357 
2358 static inline size_t rt6_nlmsg_size(void)
2359 {
2360 	return NLMSG_ALIGN(sizeof(struct rtmsg))
2361 	       + nla_total_size(16) /* RTA_SRC */
2362 	       + nla_total_size(16) /* RTA_DST */
2363 	       + nla_total_size(16) /* RTA_GATEWAY */
2364 	       + nla_total_size(16) /* RTA_PREFSRC */
2365 	       + nla_total_size(4) /* RTA_TABLE */
2366 	       + nla_total_size(4) /* RTA_IIF */
2367 	       + nla_total_size(4) /* RTA_OIF */
2368 	       + nla_total_size(4) /* RTA_PRIORITY */
2369 	       + RTAX_MAX * nla_total_size(4) /* RTA_METRICS */
2370 	       + nla_total_size(sizeof(struct rta_cacheinfo));
2371 }
2372 
2373 static int rt6_fill_node(struct net *net,
2374 			 struct sk_buff *skb, struct rt6_info *rt,
2375 			 struct in6_addr *dst, struct in6_addr *src,
2376 			 int iif, int type, u32 portid, u32 seq,
2377 			 int prefix, int nowait, unsigned int flags)
2378 {
2379 	struct rtmsg *rtm;
2380 	struct nlmsghdr *nlh;
2381 	long expires;
2382 	u32 table;
2383 	struct neighbour *n;
2384 
2385 	if (prefix) {	/* user wants prefix routes only */
2386 		if (!(rt->rt6i_flags & RTF_PREFIX_RT)) {
2387 			/* success since this is not a prefix route */
2388 			return 1;
2389 		}
2390 	}
2391 
2392 	nlh = nlmsg_put(skb, portid, seq, type, sizeof(*rtm), flags);
2393 	if (!nlh)
2394 		return -EMSGSIZE;
2395 
2396 	rtm = nlmsg_data(nlh);
2397 	rtm->rtm_family = AF_INET6;
2398 	rtm->rtm_dst_len = rt->rt6i_dst.plen;
2399 	rtm->rtm_src_len = rt->rt6i_src.plen;
2400 	rtm->rtm_tos = 0;
2401 	if (rt->rt6i_table)
2402 		table = rt->rt6i_table->tb6_id;
2403 	else
2404 		table = RT6_TABLE_UNSPEC;
2405 	rtm->rtm_table = table;
2406 	if (nla_put_u32(skb, RTA_TABLE, table))
2407 		goto nla_put_failure;
2408 	if (rt->rt6i_flags & RTF_REJECT) {
2409 		switch (rt->dst.error) {
2410 		case -EINVAL:
2411 			rtm->rtm_type = RTN_BLACKHOLE;
2412 			break;
2413 		case -EACCES:
2414 			rtm->rtm_type = RTN_PROHIBIT;
2415 			break;
2416 		case -EAGAIN:
2417 			rtm->rtm_type = RTN_THROW;
2418 			break;
2419 		default:
2420 			rtm->rtm_type = RTN_UNREACHABLE;
2421 			break;
2422 		}
2423 	}
2424 	else if (rt->rt6i_flags & RTF_LOCAL)
2425 		rtm->rtm_type = RTN_LOCAL;
2426 	else if (rt->dst.dev && (rt->dst.dev->flags & IFF_LOOPBACK))
2427 		rtm->rtm_type = RTN_LOCAL;
2428 	else
2429 		rtm->rtm_type = RTN_UNICAST;
2430 	rtm->rtm_flags = 0;
2431 	rtm->rtm_scope = RT_SCOPE_UNIVERSE;
2432 	rtm->rtm_protocol = rt->rt6i_protocol;
2433 	if (rt->rt6i_flags & RTF_DYNAMIC)
2434 		rtm->rtm_protocol = RTPROT_REDIRECT;
2435 	else if (rt->rt6i_flags & RTF_ADDRCONF) {
2436 		if (rt->rt6i_flags & (RTF_DEFAULT | RTF_ROUTEINFO))
2437 			rtm->rtm_protocol = RTPROT_RA;
2438 		else
2439 			rtm->rtm_protocol = RTPROT_KERNEL;
2440 	}
2441 
2442 	if (rt->rt6i_flags & RTF_CACHE)
2443 		rtm->rtm_flags |= RTM_F_CLONED;
2444 
2445 	if (dst) {
2446 		if (nla_put(skb, RTA_DST, 16, dst))
2447 			goto nla_put_failure;
2448 		rtm->rtm_dst_len = 128;
2449 	} else if (rtm->rtm_dst_len)
2450 		if (nla_put(skb, RTA_DST, 16, &rt->rt6i_dst.addr))
2451 			goto nla_put_failure;
2452 #ifdef CONFIG_IPV6_SUBTREES
2453 	if (src) {
2454 		if (nla_put(skb, RTA_SRC, 16, src))
2455 			goto nla_put_failure;
2456 		rtm->rtm_src_len = 128;
2457 	} else if (rtm->rtm_src_len &&
2458 		   nla_put(skb, RTA_SRC, 16, &rt->rt6i_src.addr))
2459 		goto nla_put_failure;
2460 #endif
2461 	if (iif) {
2462 #ifdef CONFIG_IPV6_MROUTE
2463 		if (ipv6_addr_is_multicast(&rt->rt6i_dst.addr)) {
2464 			int err = ip6mr_get_route(net, skb, rtm, nowait);
2465 			if (err <= 0) {
2466 				if (!nowait) {
2467 					if (err == 0)
2468 						return 0;
2469 					goto nla_put_failure;
2470 				} else {
2471 					if (err == -EMSGSIZE)
2472 						goto nla_put_failure;
2473 				}
2474 			}
2475 		} else
2476 #endif
2477 			if (nla_put_u32(skb, RTA_IIF, iif))
2478 				goto nla_put_failure;
2479 	} else if (dst) {
2480 		struct in6_addr saddr_buf;
2481 		if (ip6_route_get_saddr(net, rt, dst, 0, &saddr_buf) == 0 &&
2482 		    nla_put(skb, RTA_PREFSRC, 16, &saddr_buf))
2483 			goto nla_put_failure;
2484 	}
2485 
2486 	if (rt->rt6i_prefsrc.plen) {
2487 		struct in6_addr saddr_buf;
2488 		saddr_buf = rt->rt6i_prefsrc.addr;
2489 		if (nla_put(skb, RTA_PREFSRC, 16, &saddr_buf))
2490 			goto nla_put_failure;
2491 	}
2492 
2493 	if (rtnetlink_put_metrics(skb, dst_metrics_ptr(&rt->dst)) < 0)
2494 		goto nla_put_failure;
2495 
2496 	n = rt->n;
2497 	if (n) {
2498 		if (nla_put(skb, RTA_GATEWAY, 16, &n->primary_key) < 0)
2499 			goto nla_put_failure;
2500 	}
2501 
2502 	if (rt->dst.dev &&
2503 	    nla_put_u32(skb, RTA_OIF, rt->dst.dev->ifindex))
2504 		goto nla_put_failure;
2505 	if (nla_put_u32(skb, RTA_PRIORITY, rt->rt6i_metric))
2506 		goto nla_put_failure;
2507 
2508 	expires = (rt->rt6i_flags & RTF_EXPIRES) ? rt->dst.expires - jiffies : 0;
2509 
2510 	if (rtnl_put_cacheinfo(skb, &rt->dst, 0, expires, rt->dst.error) < 0)
2511 		goto nla_put_failure;
2512 
2513 	return nlmsg_end(skb, nlh);
2514 
2515 nla_put_failure:
2516 	nlmsg_cancel(skb, nlh);
2517 	return -EMSGSIZE;
2518 }
2519 
2520 int rt6_dump_route(struct rt6_info *rt, void *p_arg)
2521 {
2522 	struct rt6_rtnl_dump_arg *arg = (struct rt6_rtnl_dump_arg *) p_arg;
2523 	int prefix;
2524 
2525 	if (nlmsg_len(arg->cb->nlh) >= sizeof(struct rtmsg)) {
2526 		struct rtmsg *rtm = nlmsg_data(arg->cb->nlh);
2527 		prefix = (rtm->rtm_flags & RTM_F_PREFIX) != 0;
2528 	} else
2529 		prefix = 0;
2530 
2531 	return rt6_fill_node(arg->net,
2532 		     arg->skb, rt, NULL, NULL, 0, RTM_NEWROUTE,
2533 		     NETLINK_CB(arg->cb->skb).portid, arg->cb->nlh->nlmsg_seq,
2534 		     prefix, 0, NLM_F_MULTI);
2535 }
2536 
2537 static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void *arg)
2538 {
2539 	struct net *net = sock_net(in_skb->sk);
2540 	struct nlattr *tb[RTA_MAX+1];
2541 	struct rt6_info *rt;
2542 	struct sk_buff *skb;
2543 	struct rtmsg *rtm;
2544 	struct flowi6 fl6;
2545 	int err, iif = 0, oif = 0;
2546 
2547 	err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy);
2548 	if (err < 0)
2549 		goto errout;
2550 
2551 	err = -EINVAL;
2552 	memset(&fl6, 0, sizeof(fl6));
2553 
2554 	if (tb[RTA_SRC]) {
2555 		if (nla_len(tb[RTA_SRC]) < sizeof(struct in6_addr))
2556 			goto errout;
2557 
2558 		fl6.saddr = *(struct in6_addr *)nla_data(tb[RTA_SRC]);
2559 	}
2560 
2561 	if (tb[RTA_DST]) {
2562 		if (nla_len(tb[RTA_DST]) < sizeof(struct in6_addr))
2563 			goto errout;
2564 
2565 		fl6.daddr = *(struct in6_addr *)nla_data(tb[RTA_DST]);
2566 	}
2567 
2568 	if (tb[RTA_IIF])
2569 		iif = nla_get_u32(tb[RTA_IIF]);
2570 
2571 	if (tb[RTA_OIF])
2572 		oif = nla_get_u32(tb[RTA_OIF]);
2573 
2574 	if (iif) {
2575 		struct net_device *dev;
2576 		int flags = 0;
2577 
2578 		dev = __dev_get_by_index(net, iif);
2579 		if (!dev) {
2580 			err = -ENODEV;
2581 			goto errout;
2582 		}
2583 
2584 		fl6.flowi6_iif = iif;
2585 
2586 		if (!ipv6_addr_any(&fl6.saddr))
2587 			flags |= RT6_LOOKUP_F_HAS_SADDR;
2588 
2589 		rt = (struct rt6_info *)ip6_route_input_lookup(net, dev, &fl6,
2590 							       flags);
2591 	} else {
2592 		fl6.flowi6_oif = oif;
2593 
2594 		rt = (struct rt6_info *)ip6_route_output(net, NULL, &fl6);
2595 	}
2596 
2597 	skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
2598 	if (!skb) {
2599 		dst_release(&rt->dst);
2600 		err = -ENOBUFS;
2601 		goto errout;
2602 	}
2603 
2604 	/* Reserve room for dummy headers, this skb can pass
2605 	   through good chunk of routing engine.
2606 	 */
2607 	skb_reset_mac_header(skb);
2608 	skb_reserve(skb, MAX_HEADER + sizeof(struct ipv6hdr));
2609 
2610 	skb_dst_set(skb, &rt->dst);
2611 
2612 	err = rt6_fill_node(net, skb, rt, &fl6.daddr, &fl6.saddr, iif,
2613 			    RTM_NEWROUTE, NETLINK_CB(in_skb).portid,
2614 			    nlh->nlmsg_seq, 0, 0, 0);
2615 	if (err < 0) {
2616 		kfree_skb(skb);
2617 		goto errout;
2618 	}
2619 
2620 	err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).portid);
2621 errout:
2622 	return err;
2623 }
2624 
2625 void inet6_rt_notify(int event, struct rt6_info *rt, struct nl_info *info)
2626 {
2627 	struct sk_buff *skb;
2628 	struct net *net = info->nl_net;
2629 	u32 seq;
2630 	int err;
2631 
2632 	err = -ENOBUFS;
2633 	seq = info->nlh ? info->nlh->nlmsg_seq : 0;
2634 
2635 	skb = nlmsg_new(rt6_nlmsg_size(), gfp_any());
2636 	if (!skb)
2637 		goto errout;
2638 
2639 	err = rt6_fill_node(net, skb, rt, NULL, NULL, 0,
2640 				event, info->portid, seq, 0, 0, 0);
2641 	if (err < 0) {
2642 		/* -EMSGSIZE implies BUG in rt6_nlmsg_size() */
2643 		WARN_ON(err == -EMSGSIZE);
2644 		kfree_skb(skb);
2645 		goto errout;
2646 	}
2647 	rtnl_notify(skb, net, info->portid, RTNLGRP_IPV6_ROUTE,
2648 		    info->nlh, gfp_any());
2649 	return;
2650 errout:
2651 	if (err < 0)
2652 		rtnl_set_sk_err(net, RTNLGRP_IPV6_ROUTE, err);
2653 }
2654 
2655 static int ip6_route_dev_notify(struct notifier_block *this,
2656 				unsigned long event, void *data)
2657 {
2658 	struct net_device *dev = (struct net_device *)data;
2659 	struct net *net = dev_net(dev);
2660 
2661 	if (event == NETDEV_REGISTER && (dev->flags & IFF_LOOPBACK)) {
2662 		net->ipv6.ip6_null_entry->dst.dev = dev;
2663 		net->ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(dev);
2664 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
2665 		net->ipv6.ip6_prohibit_entry->dst.dev = dev;
2666 		net->ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(dev);
2667 		net->ipv6.ip6_blk_hole_entry->dst.dev = dev;
2668 		net->ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(dev);
2669 #endif
2670 	}
2671 
2672 	return NOTIFY_OK;
2673 }
2674 
2675 /*
2676  *	/proc
2677  */
2678 
2679 #ifdef CONFIG_PROC_FS
2680 
2681 struct rt6_proc_arg
2682 {
2683 	char *buffer;
2684 	int offset;
2685 	int length;
2686 	int skip;
2687 	int len;
2688 };
2689 
2690 static int rt6_info_route(struct rt6_info *rt, void *p_arg)
2691 {
2692 	struct seq_file *m = p_arg;
2693 	struct neighbour *n;
2694 
2695 	seq_printf(m, "%pi6 %02x ", &rt->rt6i_dst.addr, rt->rt6i_dst.plen);
2696 
2697 #ifdef CONFIG_IPV6_SUBTREES
2698 	seq_printf(m, "%pi6 %02x ", &rt->rt6i_src.addr, rt->rt6i_src.plen);
2699 #else
2700 	seq_puts(m, "00000000000000000000000000000000 00 ");
2701 #endif
2702 	n = rt->n;
2703 	if (n) {
2704 		seq_printf(m, "%pi6", n->primary_key);
2705 	} else {
2706 		seq_puts(m, "00000000000000000000000000000000");
2707 	}
2708 	seq_printf(m, " %08x %08x %08x %08x %8s\n",
2709 		   rt->rt6i_metric, atomic_read(&rt->dst.__refcnt),
2710 		   rt->dst.__use, rt->rt6i_flags,
2711 		   rt->dst.dev ? rt->dst.dev->name : "");
2712 	return 0;
2713 }
2714 
2715 static int ipv6_route_show(struct seq_file *m, void *v)
2716 {
2717 	struct net *net = (struct net *)m->private;
2718 	fib6_clean_all_ro(net, rt6_info_route, 0, m);
2719 	return 0;
2720 }
2721 
2722 static int ipv6_route_open(struct inode *inode, struct file *file)
2723 {
2724 	return single_open_net(inode, file, ipv6_route_show);
2725 }
2726 
2727 static const struct file_operations ipv6_route_proc_fops = {
2728 	.owner		= THIS_MODULE,
2729 	.open		= ipv6_route_open,
2730 	.read		= seq_read,
2731 	.llseek		= seq_lseek,
2732 	.release	= single_release_net,
2733 };
2734 
2735 static int rt6_stats_seq_show(struct seq_file *seq, void *v)
2736 {
2737 	struct net *net = (struct net *)seq->private;
2738 	seq_printf(seq, "%04x %04x %04x %04x %04x %04x %04x\n",
2739 		   net->ipv6.rt6_stats->fib_nodes,
2740 		   net->ipv6.rt6_stats->fib_route_nodes,
2741 		   net->ipv6.rt6_stats->fib_rt_alloc,
2742 		   net->ipv6.rt6_stats->fib_rt_entries,
2743 		   net->ipv6.rt6_stats->fib_rt_cache,
2744 		   dst_entries_get_slow(&net->ipv6.ip6_dst_ops),
2745 		   net->ipv6.rt6_stats->fib_discarded_routes);
2746 
2747 	return 0;
2748 }
2749 
2750 static int rt6_stats_seq_open(struct inode *inode, struct file *file)
2751 {
2752 	return single_open_net(inode, file, rt6_stats_seq_show);
2753 }
2754 
2755 static const struct file_operations rt6_stats_seq_fops = {
2756 	.owner	 = THIS_MODULE,
2757 	.open	 = rt6_stats_seq_open,
2758 	.read	 = seq_read,
2759 	.llseek	 = seq_lseek,
2760 	.release = single_release_net,
2761 };
2762 #endif	/* CONFIG_PROC_FS */
2763 
2764 #ifdef CONFIG_SYSCTL
2765 
2766 static
2767 int ipv6_sysctl_rtcache_flush(ctl_table *ctl, int write,
2768 			      void __user *buffer, size_t *lenp, loff_t *ppos)
2769 {
2770 	struct net *net;
2771 	int delay;
2772 	if (!write)
2773 		return -EINVAL;
2774 
2775 	net = (struct net *)ctl->extra1;
2776 	delay = net->ipv6.sysctl.flush_delay;
2777 	proc_dointvec(ctl, write, buffer, lenp, ppos);
2778 	fib6_run_gc(delay <= 0 ? ~0UL : (unsigned long)delay, net);
2779 	return 0;
2780 }
2781 
2782 ctl_table ipv6_route_table_template[] = {
2783 	{
2784 		.procname	=	"flush",
2785 		.data		=	&init_net.ipv6.sysctl.flush_delay,
2786 		.maxlen		=	sizeof(int),
2787 		.mode		=	0200,
2788 		.proc_handler	=	ipv6_sysctl_rtcache_flush
2789 	},
2790 	{
2791 		.procname	=	"gc_thresh",
2792 		.data		=	&ip6_dst_ops_template.gc_thresh,
2793 		.maxlen		=	sizeof(int),
2794 		.mode		=	0644,
2795 		.proc_handler	=	proc_dointvec,
2796 	},
2797 	{
2798 		.procname	=	"max_size",
2799 		.data		=	&init_net.ipv6.sysctl.ip6_rt_max_size,
2800 		.maxlen		=	sizeof(int),
2801 		.mode		=	0644,
2802 		.proc_handler	=	proc_dointvec,
2803 	},
2804 	{
2805 		.procname	=	"gc_min_interval",
2806 		.data		=	&init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
2807 		.maxlen		=	sizeof(int),
2808 		.mode		=	0644,
2809 		.proc_handler	=	proc_dointvec_jiffies,
2810 	},
2811 	{
2812 		.procname	=	"gc_timeout",
2813 		.data		=	&init_net.ipv6.sysctl.ip6_rt_gc_timeout,
2814 		.maxlen		=	sizeof(int),
2815 		.mode		=	0644,
2816 		.proc_handler	=	proc_dointvec_jiffies,
2817 	},
2818 	{
2819 		.procname	=	"gc_interval",
2820 		.data		=	&init_net.ipv6.sysctl.ip6_rt_gc_interval,
2821 		.maxlen		=	sizeof(int),
2822 		.mode		=	0644,
2823 		.proc_handler	=	proc_dointvec_jiffies,
2824 	},
2825 	{
2826 		.procname	=	"gc_elasticity",
2827 		.data		=	&init_net.ipv6.sysctl.ip6_rt_gc_elasticity,
2828 		.maxlen		=	sizeof(int),
2829 		.mode		=	0644,
2830 		.proc_handler	=	proc_dointvec,
2831 	},
2832 	{
2833 		.procname	=	"mtu_expires",
2834 		.data		=	&init_net.ipv6.sysctl.ip6_rt_mtu_expires,
2835 		.maxlen		=	sizeof(int),
2836 		.mode		=	0644,
2837 		.proc_handler	=	proc_dointvec_jiffies,
2838 	},
2839 	{
2840 		.procname	=	"min_adv_mss",
2841 		.data		=	&init_net.ipv6.sysctl.ip6_rt_min_advmss,
2842 		.maxlen		=	sizeof(int),
2843 		.mode		=	0644,
2844 		.proc_handler	=	proc_dointvec,
2845 	},
2846 	{
2847 		.procname	=	"gc_min_interval_ms",
2848 		.data		=	&init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
2849 		.maxlen		=	sizeof(int),
2850 		.mode		=	0644,
2851 		.proc_handler	=	proc_dointvec_ms_jiffies,
2852 	},
2853 	{ }
2854 };
2855 
2856 struct ctl_table * __net_init ipv6_route_sysctl_init(struct net *net)
2857 {
2858 	struct ctl_table *table;
2859 
2860 	table = kmemdup(ipv6_route_table_template,
2861 			sizeof(ipv6_route_table_template),
2862 			GFP_KERNEL);
2863 
2864 	if (table) {
2865 		table[0].data = &net->ipv6.sysctl.flush_delay;
2866 		table[0].extra1 = net;
2867 		table[1].data = &net->ipv6.ip6_dst_ops.gc_thresh;
2868 		table[2].data = &net->ipv6.sysctl.ip6_rt_max_size;
2869 		table[3].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
2870 		table[4].data = &net->ipv6.sysctl.ip6_rt_gc_timeout;
2871 		table[5].data = &net->ipv6.sysctl.ip6_rt_gc_interval;
2872 		table[6].data = &net->ipv6.sysctl.ip6_rt_gc_elasticity;
2873 		table[7].data = &net->ipv6.sysctl.ip6_rt_mtu_expires;
2874 		table[8].data = &net->ipv6.sysctl.ip6_rt_min_advmss;
2875 		table[9].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
2876 	}
2877 
2878 	return table;
2879 }
2880 #endif
2881 
2882 static int __net_init ip6_route_net_init(struct net *net)
2883 {
2884 	int ret = -ENOMEM;
2885 
2886 	memcpy(&net->ipv6.ip6_dst_ops, &ip6_dst_ops_template,
2887 	       sizeof(net->ipv6.ip6_dst_ops));
2888 
2889 	if (dst_entries_init(&net->ipv6.ip6_dst_ops) < 0)
2890 		goto out_ip6_dst_ops;
2891 
2892 	net->ipv6.ip6_null_entry = kmemdup(&ip6_null_entry_template,
2893 					   sizeof(*net->ipv6.ip6_null_entry),
2894 					   GFP_KERNEL);
2895 	if (!net->ipv6.ip6_null_entry)
2896 		goto out_ip6_dst_entries;
2897 	net->ipv6.ip6_null_entry->dst.path =
2898 		(struct dst_entry *)net->ipv6.ip6_null_entry;
2899 	net->ipv6.ip6_null_entry->dst.ops = &net->ipv6.ip6_dst_ops;
2900 	dst_init_metrics(&net->ipv6.ip6_null_entry->dst,
2901 			 ip6_template_metrics, true);
2902 
2903 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
2904 	net->ipv6.ip6_prohibit_entry = kmemdup(&ip6_prohibit_entry_template,
2905 					       sizeof(*net->ipv6.ip6_prohibit_entry),
2906 					       GFP_KERNEL);
2907 	if (!net->ipv6.ip6_prohibit_entry)
2908 		goto out_ip6_null_entry;
2909 	net->ipv6.ip6_prohibit_entry->dst.path =
2910 		(struct dst_entry *)net->ipv6.ip6_prohibit_entry;
2911 	net->ipv6.ip6_prohibit_entry->dst.ops = &net->ipv6.ip6_dst_ops;
2912 	dst_init_metrics(&net->ipv6.ip6_prohibit_entry->dst,
2913 			 ip6_template_metrics, true);
2914 
2915 	net->ipv6.ip6_blk_hole_entry = kmemdup(&ip6_blk_hole_entry_template,
2916 					       sizeof(*net->ipv6.ip6_blk_hole_entry),
2917 					       GFP_KERNEL);
2918 	if (!net->ipv6.ip6_blk_hole_entry)
2919 		goto out_ip6_prohibit_entry;
2920 	net->ipv6.ip6_blk_hole_entry->dst.path =
2921 		(struct dst_entry *)net->ipv6.ip6_blk_hole_entry;
2922 	net->ipv6.ip6_blk_hole_entry->dst.ops = &net->ipv6.ip6_dst_ops;
2923 	dst_init_metrics(&net->ipv6.ip6_blk_hole_entry->dst,
2924 			 ip6_template_metrics, true);
2925 #endif
2926 
2927 	net->ipv6.sysctl.flush_delay = 0;
2928 	net->ipv6.sysctl.ip6_rt_max_size = 4096;
2929 	net->ipv6.sysctl.ip6_rt_gc_min_interval = HZ / 2;
2930 	net->ipv6.sysctl.ip6_rt_gc_timeout = 60*HZ;
2931 	net->ipv6.sysctl.ip6_rt_gc_interval = 30*HZ;
2932 	net->ipv6.sysctl.ip6_rt_gc_elasticity = 9;
2933 	net->ipv6.sysctl.ip6_rt_mtu_expires = 10*60*HZ;
2934 	net->ipv6.sysctl.ip6_rt_min_advmss = IPV6_MIN_MTU - 20 - 40;
2935 
2936 	net->ipv6.ip6_rt_gc_expire = 30*HZ;
2937 
2938 	ret = 0;
2939 out:
2940 	return ret;
2941 
2942 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
2943 out_ip6_prohibit_entry:
2944 	kfree(net->ipv6.ip6_prohibit_entry);
2945 out_ip6_null_entry:
2946 	kfree(net->ipv6.ip6_null_entry);
2947 #endif
2948 out_ip6_dst_entries:
2949 	dst_entries_destroy(&net->ipv6.ip6_dst_ops);
2950 out_ip6_dst_ops:
2951 	goto out;
2952 }
2953 
2954 static void __net_exit ip6_route_net_exit(struct net *net)
2955 {
2956 	kfree(net->ipv6.ip6_null_entry);
2957 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
2958 	kfree(net->ipv6.ip6_prohibit_entry);
2959 	kfree(net->ipv6.ip6_blk_hole_entry);
2960 #endif
2961 	dst_entries_destroy(&net->ipv6.ip6_dst_ops);
2962 }
2963 
2964 static int __net_init ip6_route_net_init_late(struct net *net)
2965 {
2966 #ifdef CONFIG_PROC_FS
2967 	proc_net_fops_create(net, "ipv6_route", 0, &ipv6_route_proc_fops);
2968 	proc_net_fops_create(net, "rt6_stats", S_IRUGO, &rt6_stats_seq_fops);
2969 #endif
2970 	return 0;
2971 }
2972 
2973 static void __net_exit ip6_route_net_exit_late(struct net *net)
2974 {
2975 #ifdef CONFIG_PROC_FS
2976 	proc_net_remove(net, "ipv6_route");
2977 	proc_net_remove(net, "rt6_stats");
2978 #endif
2979 }
2980 
2981 static struct pernet_operations ip6_route_net_ops = {
2982 	.init = ip6_route_net_init,
2983 	.exit = ip6_route_net_exit,
2984 };
2985 
2986 static int __net_init ipv6_inetpeer_init(struct net *net)
2987 {
2988 	struct inet_peer_base *bp = kmalloc(sizeof(*bp), GFP_KERNEL);
2989 
2990 	if (!bp)
2991 		return -ENOMEM;
2992 	inet_peer_base_init(bp);
2993 	net->ipv6.peers = bp;
2994 	return 0;
2995 }
2996 
2997 static void __net_exit ipv6_inetpeer_exit(struct net *net)
2998 {
2999 	struct inet_peer_base *bp = net->ipv6.peers;
3000 
3001 	net->ipv6.peers = NULL;
3002 	inetpeer_invalidate_tree(bp);
3003 	kfree(bp);
3004 }
3005 
3006 static struct pernet_operations ipv6_inetpeer_ops = {
3007 	.init	=	ipv6_inetpeer_init,
3008 	.exit	=	ipv6_inetpeer_exit,
3009 };
3010 
3011 static struct pernet_operations ip6_route_net_late_ops = {
3012 	.init = ip6_route_net_init_late,
3013 	.exit = ip6_route_net_exit_late,
3014 };
3015 
3016 static struct notifier_block ip6_route_dev_notifier = {
3017 	.notifier_call = ip6_route_dev_notify,
3018 	.priority = 0,
3019 };
3020 
3021 int __init ip6_route_init(void)
3022 {
3023 	int ret;
3024 
3025 	ret = -ENOMEM;
3026 	ip6_dst_ops_template.kmem_cachep =
3027 		kmem_cache_create("ip6_dst_cache", sizeof(struct rt6_info), 0,
3028 				  SLAB_HWCACHE_ALIGN, NULL);
3029 	if (!ip6_dst_ops_template.kmem_cachep)
3030 		goto out;
3031 
3032 	ret = dst_entries_init(&ip6_dst_blackhole_ops);
3033 	if (ret)
3034 		goto out_kmem_cache;
3035 
3036 	ret = register_pernet_subsys(&ipv6_inetpeer_ops);
3037 	if (ret)
3038 		goto out_dst_entries;
3039 
3040 	ret = register_pernet_subsys(&ip6_route_net_ops);
3041 	if (ret)
3042 		goto out_register_inetpeer;
3043 
3044 	ip6_dst_blackhole_ops.kmem_cachep = ip6_dst_ops_template.kmem_cachep;
3045 
3046 	/* Registering of the loopback is done before this portion of code,
3047 	 * the loopback reference in rt6_info will not be taken, do it
3048 	 * manually for init_net */
3049 	init_net.ipv6.ip6_null_entry->dst.dev = init_net.loopback_dev;
3050 	init_net.ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
3051   #ifdef CONFIG_IPV6_MULTIPLE_TABLES
3052 	init_net.ipv6.ip6_prohibit_entry->dst.dev = init_net.loopback_dev;
3053 	init_net.ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
3054 	init_net.ipv6.ip6_blk_hole_entry->dst.dev = init_net.loopback_dev;
3055 	init_net.ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
3056   #endif
3057 	ret = fib6_init();
3058 	if (ret)
3059 		goto out_register_subsys;
3060 
3061 	ret = xfrm6_init();
3062 	if (ret)
3063 		goto out_fib6_init;
3064 
3065 	ret = fib6_rules_init();
3066 	if (ret)
3067 		goto xfrm6_init;
3068 
3069 	ret = register_pernet_subsys(&ip6_route_net_late_ops);
3070 	if (ret)
3071 		goto fib6_rules_init;
3072 
3073 	ret = -ENOBUFS;
3074 	if (__rtnl_register(PF_INET6, RTM_NEWROUTE, inet6_rtm_newroute, NULL, NULL) ||
3075 	    __rtnl_register(PF_INET6, RTM_DELROUTE, inet6_rtm_delroute, NULL, NULL) ||
3076 	    __rtnl_register(PF_INET6, RTM_GETROUTE, inet6_rtm_getroute, NULL, NULL))
3077 		goto out_register_late_subsys;
3078 
3079 	ret = register_netdevice_notifier(&ip6_route_dev_notifier);
3080 	if (ret)
3081 		goto out_register_late_subsys;
3082 
3083 out:
3084 	return ret;
3085 
3086 out_register_late_subsys:
3087 	unregister_pernet_subsys(&ip6_route_net_late_ops);
3088 fib6_rules_init:
3089 	fib6_rules_cleanup();
3090 xfrm6_init:
3091 	xfrm6_fini();
3092 out_fib6_init:
3093 	fib6_gc_cleanup();
3094 out_register_subsys:
3095 	unregister_pernet_subsys(&ip6_route_net_ops);
3096 out_register_inetpeer:
3097 	unregister_pernet_subsys(&ipv6_inetpeer_ops);
3098 out_dst_entries:
3099 	dst_entries_destroy(&ip6_dst_blackhole_ops);
3100 out_kmem_cache:
3101 	kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);
3102 	goto out;
3103 }
3104 
3105 void ip6_route_cleanup(void)
3106 {
3107 	unregister_netdevice_notifier(&ip6_route_dev_notifier);
3108 	unregister_pernet_subsys(&ip6_route_net_late_ops);
3109 	fib6_rules_cleanup();
3110 	xfrm6_fini();
3111 	fib6_gc_cleanup();
3112 	unregister_pernet_subsys(&ipv6_inetpeer_ops);
3113 	unregister_pernet_subsys(&ip6_route_net_ops);
3114 	dst_entries_destroy(&ip6_dst_blackhole_ops);
3115 	kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);
3116 }
3117