xref: /linux/net/ipv6/route.c (revision fea88a0c02822fbb91a0b8301bf9af04377876a3)
1 /*
2  *	Linux INET6 implementation
3  *	FIB front-end.
4  *
5  *	Authors:
6  *	Pedro Roque		<roque@di.fc.ul.pt>
7  *
8  *	This program is free software; you can redistribute it and/or
9  *      modify it under the terms of the GNU General Public License
10  *      as published by the Free Software Foundation; either version
11  *      2 of the License, or (at your option) any later version.
12  */
13 
14 /*	Changes:
15  *
16  *	YOSHIFUJI Hideaki @USAGI
17  *		reworked default router selection.
18  *		- respect outgoing interface
19  *		- select from (probably) reachable routers (i.e.
20  *		routers in REACHABLE, STALE, DELAY or PROBE states).
21  *		- always select the same router if it is (probably)
22  *		reachable.  otherwise, round-robin the list.
23  *	Ville Nuorvala
24  *		Fixed routing subtrees.
25  */
26 
27 #include <linux/capability.h>
28 #include <linux/errno.h>
29 #include <linux/export.h>
30 #include <linux/types.h>
31 #include <linux/times.h>
32 #include <linux/socket.h>
33 #include <linux/sockios.h>
34 #include <linux/net.h>
35 #include <linux/route.h>
36 #include <linux/netdevice.h>
37 #include <linux/in6.h>
38 #include <linux/mroute6.h>
39 #include <linux/init.h>
40 #include <linux/if_arp.h>
41 #include <linux/proc_fs.h>
42 #include <linux/seq_file.h>
43 #include <linux/nsproxy.h>
44 #include <linux/slab.h>
45 #include <net/net_namespace.h>
46 #include <net/snmp.h>
47 #include <net/ipv6.h>
48 #include <net/ip6_fib.h>
49 #include <net/ip6_route.h>
50 #include <net/ndisc.h>
51 #include <net/addrconf.h>
52 #include <net/tcp.h>
53 #include <linux/rtnetlink.h>
54 #include <net/dst.h>
55 #include <net/xfrm.h>
56 #include <net/netevent.h>
57 #include <net/netlink.h>
58 
59 #include <asm/uaccess.h>
60 
61 #ifdef CONFIG_SYSCTL
62 #include <linux/sysctl.h>
63 #endif
64 
65 static struct rt6_info *ip6_rt_copy(const struct rt6_info *ort,
66 				    const struct in6_addr *dest);
67 static struct dst_entry	*ip6_dst_check(struct dst_entry *dst, u32 cookie);
68 static unsigned int	 ip6_default_advmss(const struct dst_entry *dst);
69 static unsigned int	 ip6_mtu(const struct dst_entry *dst);
70 static struct dst_entry *ip6_negative_advice(struct dst_entry *);
71 static void		ip6_dst_destroy(struct dst_entry *);
72 static void		ip6_dst_ifdown(struct dst_entry *,
73 				       struct net_device *dev, int how);
74 static int		 ip6_dst_gc(struct dst_ops *ops);
75 
76 static int		ip6_pkt_discard(struct sk_buff *skb);
77 static int		ip6_pkt_discard_out(struct sk_buff *skb);
78 static void		ip6_link_failure(struct sk_buff *skb);
79 static void		ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu);
80 
81 #ifdef CONFIG_IPV6_ROUTE_INFO
82 static struct rt6_info *rt6_add_route_info(struct net *net,
83 					   const struct in6_addr *prefix, int prefixlen,
84 					   const struct in6_addr *gwaddr, int ifindex,
85 					   unsigned pref);
86 static struct rt6_info *rt6_get_route_info(struct net *net,
87 					   const struct in6_addr *prefix, int prefixlen,
88 					   const struct in6_addr *gwaddr, int ifindex);
89 #endif
90 
91 static u32 *ipv6_cow_metrics(struct dst_entry *dst, unsigned long old)
92 {
93 	struct rt6_info *rt = (struct rt6_info *) dst;
94 	struct inet_peer *peer;
95 	u32 *p = NULL;
96 
97 	if (!(rt->dst.flags & DST_HOST))
98 		return NULL;
99 
100 	if (!rt->rt6i_peer)
101 		rt6_bind_peer(rt, 1);
102 
103 	peer = rt->rt6i_peer;
104 	if (peer) {
105 		u32 *old_p = __DST_METRICS_PTR(old);
106 		unsigned long prev, new;
107 
108 		p = peer->metrics;
109 		if (inet_metrics_new(peer))
110 			memcpy(p, old_p, sizeof(u32) * RTAX_MAX);
111 
112 		new = (unsigned long) p;
113 		prev = cmpxchg(&dst->_metrics, old, new);
114 
115 		if (prev != old) {
116 			p = __DST_METRICS_PTR(prev);
117 			if (prev & DST_METRICS_READ_ONLY)
118 				p = NULL;
119 		}
120 	}
121 	return p;
122 }
123 
124 static inline const void *choose_neigh_daddr(struct rt6_info *rt, const void *daddr)
125 {
126 	struct in6_addr *p = &rt->rt6i_gateway;
127 
128 	if (!ipv6_addr_any(p))
129 		return (const void *) p;
130 	return daddr;
131 }
132 
133 static struct neighbour *ip6_neigh_lookup(const struct dst_entry *dst, const void *daddr)
134 {
135 	struct rt6_info *rt = (struct rt6_info *) dst;
136 	struct neighbour *n;
137 
138 	daddr = choose_neigh_daddr(rt, daddr);
139 	n = __ipv6_neigh_lookup(&nd_tbl, dst->dev, daddr);
140 	if (n)
141 		return n;
142 	return neigh_create(&nd_tbl, daddr, dst->dev);
143 }
144 
145 static int rt6_bind_neighbour(struct rt6_info *rt, struct net_device *dev)
146 {
147 	struct neighbour *n = __ipv6_neigh_lookup(&nd_tbl, dev, &rt->rt6i_gateway);
148 	if (!n) {
149 		n = neigh_create(&nd_tbl, &rt->rt6i_gateway, dev);
150 		if (IS_ERR(n))
151 			return PTR_ERR(n);
152 	}
153 	dst_set_neighbour(&rt->dst, n);
154 
155 	return 0;
156 }
157 
158 static struct dst_ops ip6_dst_ops_template = {
159 	.family			=	AF_INET6,
160 	.protocol		=	cpu_to_be16(ETH_P_IPV6),
161 	.gc			=	ip6_dst_gc,
162 	.gc_thresh		=	1024,
163 	.check			=	ip6_dst_check,
164 	.default_advmss		=	ip6_default_advmss,
165 	.mtu			=	ip6_mtu,
166 	.cow_metrics		=	ipv6_cow_metrics,
167 	.destroy		=	ip6_dst_destroy,
168 	.ifdown			=	ip6_dst_ifdown,
169 	.negative_advice	=	ip6_negative_advice,
170 	.link_failure		=	ip6_link_failure,
171 	.update_pmtu		=	ip6_rt_update_pmtu,
172 	.local_out		=	__ip6_local_out,
173 	.neigh_lookup		=	ip6_neigh_lookup,
174 };
175 
176 static unsigned int ip6_blackhole_mtu(const struct dst_entry *dst)
177 {
178 	unsigned int mtu = dst_metric_raw(dst, RTAX_MTU);
179 
180 	return mtu ? : dst->dev->mtu;
181 }
182 
183 static void ip6_rt_blackhole_update_pmtu(struct dst_entry *dst, u32 mtu)
184 {
185 }
186 
187 static u32 *ip6_rt_blackhole_cow_metrics(struct dst_entry *dst,
188 					 unsigned long old)
189 {
190 	return NULL;
191 }
192 
193 static struct dst_ops ip6_dst_blackhole_ops = {
194 	.family			=	AF_INET6,
195 	.protocol		=	cpu_to_be16(ETH_P_IPV6),
196 	.destroy		=	ip6_dst_destroy,
197 	.check			=	ip6_dst_check,
198 	.mtu			=	ip6_blackhole_mtu,
199 	.default_advmss		=	ip6_default_advmss,
200 	.update_pmtu		=	ip6_rt_blackhole_update_pmtu,
201 	.cow_metrics		=	ip6_rt_blackhole_cow_metrics,
202 	.neigh_lookup		=	ip6_neigh_lookup,
203 };
204 
205 static const u32 ip6_template_metrics[RTAX_MAX] = {
206 	[RTAX_HOPLIMIT - 1] = 255,
207 };
208 
209 static struct rt6_info ip6_null_entry_template = {
210 	.dst = {
211 		.__refcnt	= ATOMIC_INIT(1),
212 		.__use		= 1,
213 		.obsolete	= -1,
214 		.error		= -ENETUNREACH,
215 		.input		= ip6_pkt_discard,
216 		.output		= ip6_pkt_discard_out,
217 	},
218 	.rt6i_flags	= (RTF_REJECT | RTF_NONEXTHOP),
219 	.rt6i_protocol  = RTPROT_KERNEL,
220 	.rt6i_metric	= ~(u32) 0,
221 	.rt6i_ref	= ATOMIC_INIT(1),
222 };
223 
224 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
225 
226 static int ip6_pkt_prohibit(struct sk_buff *skb);
227 static int ip6_pkt_prohibit_out(struct sk_buff *skb);
228 
229 static struct rt6_info ip6_prohibit_entry_template = {
230 	.dst = {
231 		.__refcnt	= ATOMIC_INIT(1),
232 		.__use		= 1,
233 		.obsolete	= -1,
234 		.error		= -EACCES,
235 		.input		= ip6_pkt_prohibit,
236 		.output		= ip6_pkt_prohibit_out,
237 	},
238 	.rt6i_flags	= (RTF_REJECT | RTF_NONEXTHOP),
239 	.rt6i_protocol  = RTPROT_KERNEL,
240 	.rt6i_metric	= ~(u32) 0,
241 	.rt6i_ref	= ATOMIC_INIT(1),
242 };
243 
244 static struct rt6_info ip6_blk_hole_entry_template = {
245 	.dst = {
246 		.__refcnt	= ATOMIC_INIT(1),
247 		.__use		= 1,
248 		.obsolete	= -1,
249 		.error		= -EINVAL,
250 		.input		= dst_discard,
251 		.output		= dst_discard,
252 	},
253 	.rt6i_flags	= (RTF_REJECT | RTF_NONEXTHOP),
254 	.rt6i_protocol  = RTPROT_KERNEL,
255 	.rt6i_metric	= ~(u32) 0,
256 	.rt6i_ref	= ATOMIC_INIT(1),
257 };
258 
259 #endif
260 
261 /* allocate dst with ip6_dst_ops */
262 static inline struct rt6_info *ip6_dst_alloc(struct dst_ops *ops,
263 					     struct net_device *dev,
264 					     int flags)
265 {
266 	struct rt6_info *rt = dst_alloc(ops, dev, 0, 0, flags);
267 
268 	if (rt)
269 		memset(&rt->rt6i_table, 0,
270 		       sizeof(*rt) - sizeof(struct dst_entry));
271 
272 	return rt;
273 }
274 
275 static void ip6_dst_destroy(struct dst_entry *dst)
276 {
277 	struct rt6_info *rt = (struct rt6_info *)dst;
278 	struct inet6_dev *idev = rt->rt6i_idev;
279 	struct inet_peer *peer = rt->rt6i_peer;
280 
281 	if (!(rt->dst.flags & DST_HOST))
282 		dst_destroy_metrics_generic(dst);
283 
284 	if (idev) {
285 		rt->rt6i_idev = NULL;
286 		in6_dev_put(idev);
287 	}
288 	if (peer) {
289 		rt->rt6i_peer = NULL;
290 		inet_putpeer(peer);
291 	}
292 }
293 
294 static atomic_t __rt6_peer_genid = ATOMIC_INIT(0);
295 
296 static u32 rt6_peer_genid(void)
297 {
298 	return atomic_read(&__rt6_peer_genid);
299 }
300 
301 void rt6_bind_peer(struct rt6_info *rt, int create)
302 {
303 	struct inet_peer *peer;
304 
305 	peer = inet_getpeer_v6(&rt->rt6i_dst.addr, create);
306 	if (peer && cmpxchg(&rt->rt6i_peer, NULL, peer) != NULL)
307 		inet_putpeer(peer);
308 	else
309 		rt->rt6i_peer_genid = rt6_peer_genid();
310 }
311 
312 static void ip6_dst_ifdown(struct dst_entry *dst, struct net_device *dev,
313 			   int how)
314 {
315 	struct rt6_info *rt = (struct rt6_info *)dst;
316 	struct inet6_dev *idev = rt->rt6i_idev;
317 	struct net_device *loopback_dev =
318 		dev_net(dev)->loopback_dev;
319 
320 	if (dev != loopback_dev && idev && idev->dev == dev) {
321 		struct inet6_dev *loopback_idev =
322 			in6_dev_get(loopback_dev);
323 		if (loopback_idev) {
324 			rt->rt6i_idev = loopback_idev;
325 			in6_dev_put(idev);
326 		}
327 	}
328 }
329 
330 static __inline__ int rt6_check_expired(const struct rt6_info *rt)
331 {
332 	return (rt->rt6i_flags & RTF_EXPIRES) &&
333 		time_after(jiffies, rt->dst.expires);
334 }
335 
336 static inline int rt6_need_strict(const struct in6_addr *daddr)
337 {
338 	return ipv6_addr_type(daddr) &
339 		(IPV6_ADDR_MULTICAST | IPV6_ADDR_LINKLOCAL | IPV6_ADDR_LOOPBACK);
340 }
341 
342 /*
343  *	Route lookup. Any table->tb6_lock is implied.
344  */
345 
346 static inline struct rt6_info *rt6_device_match(struct net *net,
347 						    struct rt6_info *rt,
348 						    const struct in6_addr *saddr,
349 						    int oif,
350 						    int flags)
351 {
352 	struct rt6_info *local = NULL;
353 	struct rt6_info *sprt;
354 
355 	if (!oif && ipv6_addr_any(saddr))
356 		goto out;
357 
358 	for (sprt = rt; sprt; sprt = sprt->dst.rt6_next) {
359 		struct net_device *dev = sprt->dst.dev;
360 
361 		if (oif) {
362 			if (dev->ifindex == oif)
363 				return sprt;
364 			if (dev->flags & IFF_LOOPBACK) {
365 				if (!sprt->rt6i_idev ||
366 				    sprt->rt6i_idev->dev->ifindex != oif) {
367 					if (flags & RT6_LOOKUP_F_IFACE && oif)
368 						continue;
369 					if (local && (!oif ||
370 						      local->rt6i_idev->dev->ifindex == oif))
371 						continue;
372 				}
373 				local = sprt;
374 			}
375 		} else {
376 			if (ipv6_chk_addr(net, saddr, dev,
377 					  flags & RT6_LOOKUP_F_IFACE))
378 				return sprt;
379 		}
380 	}
381 
382 	if (oif) {
383 		if (local)
384 			return local;
385 
386 		if (flags & RT6_LOOKUP_F_IFACE)
387 			return net->ipv6.ip6_null_entry;
388 	}
389 out:
390 	return rt;
391 }
392 
393 #ifdef CONFIG_IPV6_ROUTER_PREF
394 static void rt6_probe(struct rt6_info *rt)
395 {
396 	struct neighbour *neigh;
397 	/*
398 	 * Okay, this does not seem to be appropriate
399 	 * for now, however, we need to check if it
400 	 * is really so; aka Router Reachability Probing.
401 	 *
402 	 * Router Reachability Probe MUST be rate-limited
403 	 * to no more than one per minute.
404 	 */
405 	rcu_read_lock();
406 	neigh = rt ? dst_get_neighbour_noref(&rt->dst) : NULL;
407 	if (!neigh || (neigh->nud_state & NUD_VALID))
408 		goto out;
409 	read_lock_bh(&neigh->lock);
410 	if (!(neigh->nud_state & NUD_VALID) &&
411 	    time_after(jiffies, neigh->updated + rt->rt6i_idev->cnf.rtr_probe_interval)) {
412 		struct in6_addr mcaddr;
413 		struct in6_addr *target;
414 
415 		neigh->updated = jiffies;
416 		read_unlock_bh(&neigh->lock);
417 
418 		target = (struct in6_addr *)&neigh->primary_key;
419 		addrconf_addr_solict_mult(target, &mcaddr);
420 		ndisc_send_ns(rt->dst.dev, NULL, target, &mcaddr, NULL);
421 	} else {
422 		read_unlock_bh(&neigh->lock);
423 	}
424 out:
425 	rcu_read_unlock();
426 }
427 #else
428 static inline void rt6_probe(struct rt6_info *rt)
429 {
430 }
431 #endif
432 
433 /*
434  * Default Router Selection (RFC 2461 6.3.6)
435  */
436 static inline int rt6_check_dev(struct rt6_info *rt, int oif)
437 {
438 	struct net_device *dev = rt->dst.dev;
439 	if (!oif || dev->ifindex == oif)
440 		return 2;
441 	if ((dev->flags & IFF_LOOPBACK) &&
442 	    rt->rt6i_idev && rt->rt6i_idev->dev->ifindex == oif)
443 		return 1;
444 	return 0;
445 }
446 
447 static inline int rt6_check_neigh(struct rt6_info *rt)
448 {
449 	struct neighbour *neigh;
450 	int m;
451 
452 	rcu_read_lock();
453 	neigh = dst_get_neighbour_noref(&rt->dst);
454 	if (rt->rt6i_flags & RTF_NONEXTHOP ||
455 	    !(rt->rt6i_flags & RTF_GATEWAY))
456 		m = 1;
457 	else if (neigh) {
458 		read_lock_bh(&neigh->lock);
459 		if (neigh->nud_state & NUD_VALID)
460 			m = 2;
461 #ifdef CONFIG_IPV6_ROUTER_PREF
462 		else if (neigh->nud_state & NUD_FAILED)
463 			m = 0;
464 #endif
465 		else
466 			m = 1;
467 		read_unlock_bh(&neigh->lock);
468 	} else
469 		m = 0;
470 	rcu_read_unlock();
471 	return m;
472 }
473 
474 static int rt6_score_route(struct rt6_info *rt, int oif,
475 			   int strict)
476 {
477 	int m, n;
478 
479 	m = rt6_check_dev(rt, oif);
480 	if (!m && (strict & RT6_LOOKUP_F_IFACE))
481 		return -1;
482 #ifdef CONFIG_IPV6_ROUTER_PREF
483 	m |= IPV6_DECODE_PREF(IPV6_EXTRACT_PREF(rt->rt6i_flags)) << 2;
484 #endif
485 	n = rt6_check_neigh(rt);
486 	if (!n && (strict & RT6_LOOKUP_F_REACHABLE))
487 		return -1;
488 	return m;
489 }
490 
491 static struct rt6_info *find_match(struct rt6_info *rt, int oif, int strict,
492 				   int *mpri, struct rt6_info *match)
493 {
494 	int m;
495 
496 	if (rt6_check_expired(rt))
497 		goto out;
498 
499 	m = rt6_score_route(rt, oif, strict);
500 	if (m < 0)
501 		goto out;
502 
503 	if (m > *mpri) {
504 		if (strict & RT6_LOOKUP_F_REACHABLE)
505 			rt6_probe(match);
506 		*mpri = m;
507 		match = rt;
508 	} else if (strict & RT6_LOOKUP_F_REACHABLE) {
509 		rt6_probe(rt);
510 	}
511 
512 out:
513 	return match;
514 }
515 
516 static struct rt6_info *find_rr_leaf(struct fib6_node *fn,
517 				     struct rt6_info *rr_head,
518 				     u32 metric, int oif, int strict)
519 {
520 	struct rt6_info *rt, *match;
521 	int mpri = -1;
522 
523 	match = NULL;
524 	for (rt = rr_head; rt && rt->rt6i_metric == metric;
525 	     rt = rt->dst.rt6_next)
526 		match = find_match(rt, oif, strict, &mpri, match);
527 	for (rt = fn->leaf; rt && rt != rr_head && rt->rt6i_metric == metric;
528 	     rt = rt->dst.rt6_next)
529 		match = find_match(rt, oif, strict, &mpri, match);
530 
531 	return match;
532 }
533 
534 static struct rt6_info *rt6_select(struct fib6_node *fn, int oif, int strict)
535 {
536 	struct rt6_info *match, *rt0;
537 	struct net *net;
538 
539 	rt0 = fn->rr_ptr;
540 	if (!rt0)
541 		fn->rr_ptr = rt0 = fn->leaf;
542 
543 	match = find_rr_leaf(fn, rt0, rt0->rt6i_metric, oif, strict);
544 
545 	if (!match &&
546 	    (strict & RT6_LOOKUP_F_REACHABLE)) {
547 		struct rt6_info *next = rt0->dst.rt6_next;
548 
549 		/* no entries matched; do round-robin */
550 		if (!next || next->rt6i_metric != rt0->rt6i_metric)
551 			next = fn->leaf;
552 
553 		if (next != rt0)
554 			fn->rr_ptr = next;
555 	}
556 
557 	net = dev_net(rt0->dst.dev);
558 	return match ? match : net->ipv6.ip6_null_entry;
559 }
560 
561 #ifdef CONFIG_IPV6_ROUTE_INFO
562 int rt6_route_rcv(struct net_device *dev, u8 *opt, int len,
563 		  const struct in6_addr *gwaddr)
564 {
565 	struct net *net = dev_net(dev);
566 	struct route_info *rinfo = (struct route_info *) opt;
567 	struct in6_addr prefix_buf, *prefix;
568 	unsigned int pref;
569 	unsigned long lifetime;
570 	struct rt6_info *rt;
571 
572 	if (len < sizeof(struct route_info)) {
573 		return -EINVAL;
574 	}
575 
576 	/* Sanity check for prefix_len and length */
577 	if (rinfo->length > 3) {
578 		return -EINVAL;
579 	} else if (rinfo->prefix_len > 128) {
580 		return -EINVAL;
581 	} else if (rinfo->prefix_len > 64) {
582 		if (rinfo->length < 2) {
583 			return -EINVAL;
584 		}
585 	} else if (rinfo->prefix_len > 0) {
586 		if (rinfo->length < 1) {
587 			return -EINVAL;
588 		}
589 	}
590 
591 	pref = rinfo->route_pref;
592 	if (pref == ICMPV6_ROUTER_PREF_INVALID)
593 		return -EINVAL;
594 
595 	lifetime = addrconf_timeout_fixup(ntohl(rinfo->lifetime), HZ);
596 
597 	if (rinfo->length == 3)
598 		prefix = (struct in6_addr *)rinfo->prefix;
599 	else {
600 		/* this function is safe */
601 		ipv6_addr_prefix(&prefix_buf,
602 				 (struct in6_addr *)rinfo->prefix,
603 				 rinfo->prefix_len);
604 		prefix = &prefix_buf;
605 	}
606 
607 	rt = rt6_get_route_info(net, prefix, rinfo->prefix_len, gwaddr,
608 				dev->ifindex);
609 
610 	if (rt && !lifetime) {
611 		ip6_del_rt(rt);
612 		rt = NULL;
613 	}
614 
615 	if (!rt && lifetime)
616 		rt = rt6_add_route_info(net, prefix, rinfo->prefix_len, gwaddr, dev->ifindex,
617 					pref);
618 	else if (rt)
619 		rt->rt6i_flags = RTF_ROUTEINFO |
620 				 (rt->rt6i_flags & ~RTF_PREF_MASK) | RTF_PREF(pref);
621 
622 	if (rt) {
623 		if (!addrconf_finite_timeout(lifetime)) {
624 			rt->rt6i_flags &= ~RTF_EXPIRES;
625 		} else {
626 			rt->dst.expires = jiffies + HZ * lifetime;
627 			rt->rt6i_flags |= RTF_EXPIRES;
628 		}
629 		dst_release(&rt->dst);
630 	}
631 	return 0;
632 }
633 #endif
634 
635 #define BACKTRACK(__net, saddr)			\
636 do { \
637 	if (rt == __net->ipv6.ip6_null_entry) {	\
638 		struct fib6_node *pn; \
639 		while (1) { \
640 			if (fn->fn_flags & RTN_TL_ROOT) \
641 				goto out; \
642 			pn = fn->parent; \
643 			if (FIB6_SUBTREE(pn) && FIB6_SUBTREE(pn) != fn) \
644 				fn = fib6_lookup(FIB6_SUBTREE(pn), NULL, saddr); \
645 			else \
646 				fn = pn; \
647 			if (fn->fn_flags & RTN_RTINFO) \
648 				goto restart; \
649 		} \
650 	} \
651 } while (0)
652 
653 static struct rt6_info *ip6_pol_route_lookup(struct net *net,
654 					     struct fib6_table *table,
655 					     struct flowi6 *fl6, int flags)
656 {
657 	struct fib6_node *fn;
658 	struct rt6_info *rt;
659 
660 	read_lock_bh(&table->tb6_lock);
661 	fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
662 restart:
663 	rt = fn->leaf;
664 	rt = rt6_device_match(net, rt, &fl6->saddr, fl6->flowi6_oif, flags);
665 	BACKTRACK(net, &fl6->saddr);
666 out:
667 	dst_use(&rt->dst, jiffies);
668 	read_unlock_bh(&table->tb6_lock);
669 	return rt;
670 
671 }
672 
673 struct dst_entry * ip6_route_lookup(struct net *net, struct flowi6 *fl6,
674 				    int flags)
675 {
676 	return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_lookup);
677 }
678 EXPORT_SYMBOL_GPL(ip6_route_lookup);
679 
680 struct rt6_info *rt6_lookup(struct net *net, const struct in6_addr *daddr,
681 			    const struct in6_addr *saddr, int oif, int strict)
682 {
683 	struct flowi6 fl6 = {
684 		.flowi6_oif = oif,
685 		.daddr = *daddr,
686 	};
687 	struct dst_entry *dst;
688 	int flags = strict ? RT6_LOOKUP_F_IFACE : 0;
689 
690 	if (saddr) {
691 		memcpy(&fl6.saddr, saddr, sizeof(*saddr));
692 		flags |= RT6_LOOKUP_F_HAS_SADDR;
693 	}
694 
695 	dst = fib6_rule_lookup(net, &fl6, flags, ip6_pol_route_lookup);
696 	if (dst->error == 0)
697 		return (struct rt6_info *) dst;
698 
699 	dst_release(dst);
700 
701 	return NULL;
702 }
703 
704 EXPORT_SYMBOL(rt6_lookup);
705 
706 /* ip6_ins_rt is called with FREE table->tb6_lock.
707    It takes new route entry, the addition fails by any reason the
708    route is freed. In any case, if caller does not hold it, it may
709    be destroyed.
710  */
711 
712 static int __ip6_ins_rt(struct rt6_info *rt, struct nl_info *info)
713 {
714 	int err;
715 	struct fib6_table *table;
716 
717 	table = rt->rt6i_table;
718 	write_lock_bh(&table->tb6_lock);
719 	err = fib6_add(&table->tb6_root, rt, info);
720 	write_unlock_bh(&table->tb6_lock);
721 
722 	return err;
723 }
724 
725 int ip6_ins_rt(struct rt6_info *rt)
726 {
727 	struct nl_info info = {
728 		.nl_net = dev_net(rt->dst.dev),
729 	};
730 	return __ip6_ins_rt(rt, &info);
731 }
732 
733 static struct rt6_info *rt6_alloc_cow(const struct rt6_info *ort,
734 				      const struct in6_addr *daddr,
735 				      const struct in6_addr *saddr)
736 {
737 	struct rt6_info *rt;
738 
739 	/*
740 	 *	Clone the route.
741 	 */
742 
743 	rt = ip6_rt_copy(ort, daddr);
744 
745 	if (rt) {
746 		int attempts = !in_softirq();
747 
748 		if (!(rt->rt6i_flags & RTF_GATEWAY)) {
749 			if (ort->rt6i_dst.plen != 128 &&
750 			    ipv6_addr_equal(&ort->rt6i_dst.addr, daddr))
751 				rt->rt6i_flags |= RTF_ANYCAST;
752 			rt->rt6i_gateway = *daddr;
753 		}
754 
755 		rt->rt6i_flags |= RTF_CACHE;
756 
757 #ifdef CONFIG_IPV6_SUBTREES
758 		if (rt->rt6i_src.plen && saddr) {
759 			rt->rt6i_src.addr = *saddr;
760 			rt->rt6i_src.plen = 128;
761 		}
762 #endif
763 
764 	retry:
765 		if (rt6_bind_neighbour(rt, rt->dst.dev)) {
766 			struct net *net = dev_net(rt->dst.dev);
767 			int saved_rt_min_interval =
768 				net->ipv6.sysctl.ip6_rt_gc_min_interval;
769 			int saved_rt_elasticity =
770 				net->ipv6.sysctl.ip6_rt_gc_elasticity;
771 
772 			if (attempts-- > 0) {
773 				net->ipv6.sysctl.ip6_rt_gc_elasticity = 1;
774 				net->ipv6.sysctl.ip6_rt_gc_min_interval = 0;
775 
776 				ip6_dst_gc(&net->ipv6.ip6_dst_ops);
777 
778 				net->ipv6.sysctl.ip6_rt_gc_elasticity =
779 					saved_rt_elasticity;
780 				net->ipv6.sysctl.ip6_rt_gc_min_interval =
781 					saved_rt_min_interval;
782 				goto retry;
783 			}
784 
785 			if (net_ratelimit())
786 				printk(KERN_WARNING
787 				       "ipv6: Neighbour table overflow.\n");
788 			dst_free(&rt->dst);
789 			return NULL;
790 		}
791 	}
792 
793 	return rt;
794 }
795 
796 static struct rt6_info *rt6_alloc_clone(struct rt6_info *ort,
797 					const struct in6_addr *daddr)
798 {
799 	struct rt6_info *rt = ip6_rt_copy(ort, daddr);
800 
801 	if (rt) {
802 		rt->rt6i_flags |= RTF_CACHE;
803 		dst_set_neighbour(&rt->dst, neigh_clone(dst_get_neighbour_noref_raw(&ort->dst)));
804 	}
805 	return rt;
806 }
807 
808 static struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table, int oif,
809 				      struct flowi6 *fl6, int flags)
810 {
811 	struct fib6_node *fn;
812 	struct rt6_info *rt, *nrt;
813 	int strict = 0;
814 	int attempts = 3;
815 	int err;
816 	int reachable = net->ipv6.devconf_all->forwarding ? 0 : RT6_LOOKUP_F_REACHABLE;
817 
818 	strict |= flags & RT6_LOOKUP_F_IFACE;
819 
820 relookup:
821 	read_lock_bh(&table->tb6_lock);
822 
823 restart_2:
824 	fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
825 
826 restart:
827 	rt = rt6_select(fn, oif, strict | reachable);
828 
829 	BACKTRACK(net, &fl6->saddr);
830 	if (rt == net->ipv6.ip6_null_entry ||
831 	    rt->rt6i_flags & RTF_CACHE)
832 		goto out;
833 
834 	dst_hold(&rt->dst);
835 	read_unlock_bh(&table->tb6_lock);
836 
837 	if (!dst_get_neighbour_noref_raw(&rt->dst) && !(rt->rt6i_flags & RTF_NONEXTHOP))
838 		nrt = rt6_alloc_cow(rt, &fl6->daddr, &fl6->saddr);
839 	else if (!(rt->dst.flags & DST_HOST))
840 		nrt = rt6_alloc_clone(rt, &fl6->daddr);
841 	else
842 		goto out2;
843 
844 	dst_release(&rt->dst);
845 	rt = nrt ? : net->ipv6.ip6_null_entry;
846 
847 	dst_hold(&rt->dst);
848 	if (nrt) {
849 		err = ip6_ins_rt(nrt);
850 		if (!err)
851 			goto out2;
852 	}
853 
854 	if (--attempts <= 0)
855 		goto out2;
856 
857 	/*
858 	 * Race condition! In the gap, when table->tb6_lock was
859 	 * released someone could insert this route.  Relookup.
860 	 */
861 	dst_release(&rt->dst);
862 	goto relookup;
863 
864 out:
865 	if (reachable) {
866 		reachable = 0;
867 		goto restart_2;
868 	}
869 	dst_hold(&rt->dst);
870 	read_unlock_bh(&table->tb6_lock);
871 out2:
872 	rt->dst.lastuse = jiffies;
873 	rt->dst.__use++;
874 
875 	return rt;
876 }
877 
878 static struct rt6_info *ip6_pol_route_input(struct net *net, struct fib6_table *table,
879 					    struct flowi6 *fl6, int flags)
880 {
881 	return ip6_pol_route(net, table, fl6->flowi6_iif, fl6, flags);
882 }
883 
884 static struct dst_entry *ip6_route_input_lookup(struct net *net,
885 						struct net_device *dev,
886 						struct flowi6 *fl6, int flags)
887 {
888 	if (rt6_need_strict(&fl6->daddr) && dev->type != ARPHRD_PIMREG)
889 		flags |= RT6_LOOKUP_F_IFACE;
890 
891 	return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_input);
892 }
893 
894 void ip6_route_input(struct sk_buff *skb)
895 {
896 	const struct ipv6hdr *iph = ipv6_hdr(skb);
897 	struct net *net = dev_net(skb->dev);
898 	int flags = RT6_LOOKUP_F_HAS_SADDR;
899 	struct flowi6 fl6 = {
900 		.flowi6_iif = skb->dev->ifindex,
901 		.daddr = iph->daddr,
902 		.saddr = iph->saddr,
903 		.flowlabel = (* (__be32 *) iph) & IPV6_FLOWINFO_MASK,
904 		.flowi6_mark = skb->mark,
905 		.flowi6_proto = iph->nexthdr,
906 	};
907 
908 	skb_dst_set(skb, ip6_route_input_lookup(net, skb->dev, &fl6, flags));
909 }
910 
911 static struct rt6_info *ip6_pol_route_output(struct net *net, struct fib6_table *table,
912 					     struct flowi6 *fl6, int flags)
913 {
914 	return ip6_pol_route(net, table, fl6->flowi6_oif, fl6, flags);
915 }
916 
917 struct dst_entry * ip6_route_output(struct net *net, const struct sock *sk,
918 				    struct flowi6 *fl6)
919 {
920 	int flags = 0;
921 
922 	if ((sk && sk->sk_bound_dev_if) || rt6_need_strict(&fl6->daddr))
923 		flags |= RT6_LOOKUP_F_IFACE;
924 
925 	if (!ipv6_addr_any(&fl6->saddr))
926 		flags |= RT6_LOOKUP_F_HAS_SADDR;
927 	else if (sk)
928 		flags |= rt6_srcprefs2flags(inet6_sk(sk)->srcprefs);
929 
930 	return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_output);
931 }
932 
933 EXPORT_SYMBOL(ip6_route_output);
934 
935 struct dst_entry *ip6_blackhole_route(struct net *net, struct dst_entry *dst_orig)
936 {
937 	struct rt6_info *rt, *ort = (struct rt6_info *) dst_orig;
938 	struct dst_entry *new = NULL;
939 
940 	rt = dst_alloc(&ip6_dst_blackhole_ops, ort->dst.dev, 1, 0, 0);
941 	if (rt) {
942 		memset(&rt->rt6i_table, 0, sizeof(*rt) - sizeof(struct dst_entry));
943 
944 		new = &rt->dst;
945 
946 		new->__use = 1;
947 		new->input = dst_discard;
948 		new->output = dst_discard;
949 
950 		if (dst_metrics_read_only(&ort->dst))
951 			new->_metrics = ort->dst._metrics;
952 		else
953 			dst_copy_metrics(new, &ort->dst);
954 		rt->rt6i_idev = ort->rt6i_idev;
955 		if (rt->rt6i_idev)
956 			in6_dev_hold(rt->rt6i_idev);
957 		rt->dst.expires = 0;
958 
959 		rt->rt6i_gateway = ort->rt6i_gateway;
960 		rt->rt6i_flags = ort->rt6i_flags & ~RTF_EXPIRES;
961 		rt->rt6i_metric = 0;
962 
963 		memcpy(&rt->rt6i_dst, &ort->rt6i_dst, sizeof(struct rt6key));
964 #ifdef CONFIG_IPV6_SUBTREES
965 		memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
966 #endif
967 
968 		dst_free(new);
969 	}
970 
971 	dst_release(dst_orig);
972 	return new ? new : ERR_PTR(-ENOMEM);
973 }
974 
975 /*
976  *	Destination cache support functions
977  */
978 
979 static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie)
980 {
981 	struct rt6_info *rt;
982 
983 	rt = (struct rt6_info *) dst;
984 
985 	if (rt->rt6i_node && (rt->rt6i_node->fn_sernum == cookie)) {
986 		if (rt->rt6i_peer_genid != rt6_peer_genid()) {
987 			if (!rt->rt6i_peer)
988 				rt6_bind_peer(rt, 0);
989 			rt->rt6i_peer_genid = rt6_peer_genid();
990 		}
991 		return dst;
992 	}
993 	return NULL;
994 }
995 
996 static struct dst_entry *ip6_negative_advice(struct dst_entry *dst)
997 {
998 	struct rt6_info *rt = (struct rt6_info *) dst;
999 
1000 	if (rt) {
1001 		if (rt->rt6i_flags & RTF_CACHE) {
1002 			if (rt6_check_expired(rt)) {
1003 				ip6_del_rt(rt);
1004 				dst = NULL;
1005 			}
1006 		} else {
1007 			dst_release(dst);
1008 			dst = NULL;
1009 		}
1010 	}
1011 	return dst;
1012 }
1013 
1014 static void ip6_link_failure(struct sk_buff *skb)
1015 {
1016 	struct rt6_info *rt;
1017 
1018 	icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_ADDR_UNREACH, 0);
1019 
1020 	rt = (struct rt6_info *) skb_dst(skb);
1021 	if (rt) {
1022 		if (rt->rt6i_flags & RTF_CACHE) {
1023 			dst_set_expires(&rt->dst, 0);
1024 			rt->rt6i_flags |= RTF_EXPIRES;
1025 		} else if (rt->rt6i_node && (rt->rt6i_flags & RTF_DEFAULT))
1026 			rt->rt6i_node->fn_sernum = -1;
1027 	}
1028 }
1029 
1030 static void ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu)
1031 {
1032 	struct rt6_info *rt6 = (struct rt6_info*)dst;
1033 
1034 	if (mtu < dst_mtu(dst) && rt6->rt6i_dst.plen == 128) {
1035 		rt6->rt6i_flags |= RTF_MODIFIED;
1036 		if (mtu < IPV6_MIN_MTU) {
1037 			u32 features = dst_metric(dst, RTAX_FEATURES);
1038 			mtu = IPV6_MIN_MTU;
1039 			features |= RTAX_FEATURE_ALLFRAG;
1040 			dst_metric_set(dst, RTAX_FEATURES, features);
1041 		}
1042 		dst_metric_set(dst, RTAX_MTU, mtu);
1043 	}
1044 }
1045 
1046 static unsigned int ip6_default_advmss(const struct dst_entry *dst)
1047 {
1048 	struct net_device *dev = dst->dev;
1049 	unsigned int mtu = dst_mtu(dst);
1050 	struct net *net = dev_net(dev);
1051 
1052 	mtu -= sizeof(struct ipv6hdr) + sizeof(struct tcphdr);
1053 
1054 	if (mtu < net->ipv6.sysctl.ip6_rt_min_advmss)
1055 		mtu = net->ipv6.sysctl.ip6_rt_min_advmss;
1056 
1057 	/*
1058 	 * Maximal non-jumbo IPv6 payload is IPV6_MAXPLEN and
1059 	 * corresponding MSS is IPV6_MAXPLEN - tcp_header_size.
1060 	 * IPV6_MAXPLEN is also valid and means: "any MSS,
1061 	 * rely only on pmtu discovery"
1062 	 */
1063 	if (mtu > IPV6_MAXPLEN - sizeof(struct tcphdr))
1064 		mtu = IPV6_MAXPLEN;
1065 	return mtu;
1066 }
1067 
1068 static unsigned int ip6_mtu(const struct dst_entry *dst)
1069 {
1070 	struct inet6_dev *idev;
1071 	unsigned int mtu = dst_metric_raw(dst, RTAX_MTU);
1072 
1073 	if (mtu)
1074 		return mtu;
1075 
1076 	mtu = IPV6_MIN_MTU;
1077 
1078 	rcu_read_lock();
1079 	idev = __in6_dev_get(dst->dev);
1080 	if (idev)
1081 		mtu = idev->cnf.mtu6;
1082 	rcu_read_unlock();
1083 
1084 	return mtu;
1085 }
1086 
1087 static struct dst_entry *icmp6_dst_gc_list;
1088 static DEFINE_SPINLOCK(icmp6_dst_lock);
1089 
1090 struct dst_entry *icmp6_dst_alloc(struct net_device *dev,
1091 				  struct neighbour *neigh,
1092 				  struct flowi6 *fl6)
1093 {
1094 	struct dst_entry *dst;
1095 	struct rt6_info *rt;
1096 	struct inet6_dev *idev = in6_dev_get(dev);
1097 	struct net *net = dev_net(dev);
1098 
1099 	if (unlikely(!idev))
1100 		return ERR_PTR(-ENODEV);
1101 
1102 	rt = ip6_dst_alloc(&net->ipv6.ip6_dst_ops, dev, 0);
1103 	if (unlikely(!rt)) {
1104 		in6_dev_put(idev);
1105 		dst = ERR_PTR(-ENOMEM);
1106 		goto out;
1107 	}
1108 
1109 	if (neigh)
1110 		neigh_hold(neigh);
1111 	else {
1112 		neigh = ip6_neigh_lookup(&rt->dst, &fl6->daddr);
1113 		if (IS_ERR(neigh)) {
1114 			in6_dev_put(idev);
1115 			dst_free(&rt->dst);
1116 			return ERR_CAST(neigh);
1117 		}
1118 	}
1119 
1120 	rt->dst.flags |= DST_HOST;
1121 	rt->dst.output  = ip6_output;
1122 	dst_set_neighbour(&rt->dst, neigh);
1123 	atomic_set(&rt->dst.__refcnt, 1);
1124 	rt->rt6i_dst.addr = fl6->daddr;
1125 	rt->rt6i_dst.plen = 128;
1126 	rt->rt6i_idev     = idev;
1127 	dst_metric_set(&rt->dst, RTAX_HOPLIMIT, 255);
1128 
1129 	spin_lock_bh(&icmp6_dst_lock);
1130 	rt->dst.next = icmp6_dst_gc_list;
1131 	icmp6_dst_gc_list = &rt->dst;
1132 	spin_unlock_bh(&icmp6_dst_lock);
1133 
1134 	fib6_force_start_gc(net);
1135 
1136 	dst = xfrm_lookup(net, &rt->dst, flowi6_to_flowi(fl6), NULL, 0);
1137 
1138 out:
1139 	return dst;
1140 }
1141 
1142 int icmp6_dst_gc(void)
1143 {
1144 	struct dst_entry *dst, **pprev;
1145 	int more = 0;
1146 
1147 	spin_lock_bh(&icmp6_dst_lock);
1148 	pprev = &icmp6_dst_gc_list;
1149 
1150 	while ((dst = *pprev) != NULL) {
1151 		if (!atomic_read(&dst->__refcnt)) {
1152 			*pprev = dst->next;
1153 			dst_free(dst);
1154 		} else {
1155 			pprev = &dst->next;
1156 			++more;
1157 		}
1158 	}
1159 
1160 	spin_unlock_bh(&icmp6_dst_lock);
1161 
1162 	return more;
1163 }
1164 
1165 static void icmp6_clean_all(int (*func)(struct rt6_info *rt, void *arg),
1166 			    void *arg)
1167 {
1168 	struct dst_entry *dst, **pprev;
1169 
1170 	spin_lock_bh(&icmp6_dst_lock);
1171 	pprev = &icmp6_dst_gc_list;
1172 	while ((dst = *pprev) != NULL) {
1173 		struct rt6_info *rt = (struct rt6_info *) dst;
1174 		if (func(rt, arg)) {
1175 			*pprev = dst->next;
1176 			dst_free(dst);
1177 		} else {
1178 			pprev = &dst->next;
1179 		}
1180 	}
1181 	spin_unlock_bh(&icmp6_dst_lock);
1182 }
1183 
1184 static int ip6_dst_gc(struct dst_ops *ops)
1185 {
1186 	unsigned long now = jiffies;
1187 	struct net *net = container_of(ops, struct net, ipv6.ip6_dst_ops);
1188 	int rt_min_interval = net->ipv6.sysctl.ip6_rt_gc_min_interval;
1189 	int rt_max_size = net->ipv6.sysctl.ip6_rt_max_size;
1190 	int rt_elasticity = net->ipv6.sysctl.ip6_rt_gc_elasticity;
1191 	int rt_gc_timeout = net->ipv6.sysctl.ip6_rt_gc_timeout;
1192 	unsigned long rt_last_gc = net->ipv6.ip6_rt_last_gc;
1193 	int entries;
1194 
1195 	entries = dst_entries_get_fast(ops);
1196 	if (time_after(rt_last_gc + rt_min_interval, now) &&
1197 	    entries <= rt_max_size)
1198 		goto out;
1199 
1200 	net->ipv6.ip6_rt_gc_expire++;
1201 	fib6_run_gc(net->ipv6.ip6_rt_gc_expire, net);
1202 	net->ipv6.ip6_rt_last_gc = now;
1203 	entries = dst_entries_get_slow(ops);
1204 	if (entries < ops->gc_thresh)
1205 		net->ipv6.ip6_rt_gc_expire = rt_gc_timeout>>1;
1206 out:
1207 	net->ipv6.ip6_rt_gc_expire -= net->ipv6.ip6_rt_gc_expire>>rt_elasticity;
1208 	return entries > rt_max_size;
1209 }
1210 
1211 /* Clean host part of a prefix. Not necessary in radix tree,
1212    but results in cleaner routing tables.
1213 
1214    Remove it only when all the things will work!
1215  */
1216 
1217 int ip6_dst_hoplimit(struct dst_entry *dst)
1218 {
1219 	int hoplimit = dst_metric_raw(dst, RTAX_HOPLIMIT);
1220 	if (hoplimit == 0) {
1221 		struct net_device *dev = dst->dev;
1222 		struct inet6_dev *idev;
1223 
1224 		rcu_read_lock();
1225 		idev = __in6_dev_get(dev);
1226 		if (idev)
1227 			hoplimit = idev->cnf.hop_limit;
1228 		else
1229 			hoplimit = dev_net(dev)->ipv6.devconf_all->hop_limit;
1230 		rcu_read_unlock();
1231 	}
1232 	return hoplimit;
1233 }
1234 EXPORT_SYMBOL(ip6_dst_hoplimit);
1235 
1236 /*
1237  *
1238  */
1239 
1240 int ip6_route_add(struct fib6_config *cfg)
1241 {
1242 	int err;
1243 	struct net *net = cfg->fc_nlinfo.nl_net;
1244 	struct rt6_info *rt = NULL;
1245 	struct net_device *dev = NULL;
1246 	struct inet6_dev *idev = NULL;
1247 	struct fib6_table *table;
1248 	int addr_type;
1249 
1250 	if (cfg->fc_dst_len > 128 || cfg->fc_src_len > 128)
1251 		return -EINVAL;
1252 #ifndef CONFIG_IPV6_SUBTREES
1253 	if (cfg->fc_src_len)
1254 		return -EINVAL;
1255 #endif
1256 	if (cfg->fc_ifindex) {
1257 		err = -ENODEV;
1258 		dev = dev_get_by_index(net, cfg->fc_ifindex);
1259 		if (!dev)
1260 			goto out;
1261 		idev = in6_dev_get(dev);
1262 		if (!idev)
1263 			goto out;
1264 	}
1265 
1266 	if (cfg->fc_metric == 0)
1267 		cfg->fc_metric = IP6_RT_PRIO_USER;
1268 
1269 	err = -ENOBUFS;
1270 	if (cfg->fc_nlinfo.nlh &&
1271 	    !(cfg->fc_nlinfo.nlh->nlmsg_flags & NLM_F_CREATE)) {
1272 		table = fib6_get_table(net, cfg->fc_table);
1273 		if (!table) {
1274 			printk(KERN_WARNING "IPv6: NLM_F_CREATE should be specified when creating new route\n");
1275 			table = fib6_new_table(net, cfg->fc_table);
1276 		}
1277 	} else {
1278 		table = fib6_new_table(net, cfg->fc_table);
1279 	}
1280 
1281 	if (!table)
1282 		goto out;
1283 
1284 	rt = ip6_dst_alloc(&net->ipv6.ip6_dst_ops, NULL, DST_NOCOUNT);
1285 
1286 	if (!rt) {
1287 		err = -ENOMEM;
1288 		goto out;
1289 	}
1290 
1291 	rt->dst.obsolete = -1;
1292 	rt->dst.expires = (cfg->fc_flags & RTF_EXPIRES) ?
1293 				jiffies + clock_t_to_jiffies(cfg->fc_expires) :
1294 				0;
1295 
1296 	if (cfg->fc_protocol == RTPROT_UNSPEC)
1297 		cfg->fc_protocol = RTPROT_BOOT;
1298 	rt->rt6i_protocol = cfg->fc_protocol;
1299 
1300 	addr_type = ipv6_addr_type(&cfg->fc_dst);
1301 
1302 	if (addr_type & IPV6_ADDR_MULTICAST)
1303 		rt->dst.input = ip6_mc_input;
1304 	else if (cfg->fc_flags & RTF_LOCAL)
1305 		rt->dst.input = ip6_input;
1306 	else
1307 		rt->dst.input = ip6_forward;
1308 
1309 	rt->dst.output = ip6_output;
1310 
1311 	ipv6_addr_prefix(&rt->rt6i_dst.addr, &cfg->fc_dst, cfg->fc_dst_len);
1312 	rt->rt6i_dst.plen = cfg->fc_dst_len;
1313 	if (rt->rt6i_dst.plen == 128)
1314 	       rt->dst.flags |= DST_HOST;
1315 
1316 	if (!(rt->dst.flags & DST_HOST) && cfg->fc_mx) {
1317 		u32 *metrics = kzalloc(sizeof(u32) * RTAX_MAX, GFP_KERNEL);
1318 		if (!metrics) {
1319 			err = -ENOMEM;
1320 			goto out;
1321 		}
1322 		dst_init_metrics(&rt->dst, metrics, 0);
1323 	}
1324 #ifdef CONFIG_IPV6_SUBTREES
1325 	ipv6_addr_prefix(&rt->rt6i_src.addr, &cfg->fc_src, cfg->fc_src_len);
1326 	rt->rt6i_src.plen = cfg->fc_src_len;
1327 #endif
1328 
1329 	rt->rt6i_metric = cfg->fc_metric;
1330 
1331 	/* We cannot add true routes via loopback here,
1332 	   they would result in kernel looping; promote them to reject routes
1333 	 */
1334 	if ((cfg->fc_flags & RTF_REJECT) ||
1335 	    (dev && (dev->flags & IFF_LOOPBACK) &&
1336 	     !(addr_type & IPV6_ADDR_LOOPBACK) &&
1337 	     !(cfg->fc_flags & RTF_LOCAL))) {
1338 		/* hold loopback dev/idev if we haven't done so. */
1339 		if (dev != net->loopback_dev) {
1340 			if (dev) {
1341 				dev_put(dev);
1342 				in6_dev_put(idev);
1343 			}
1344 			dev = net->loopback_dev;
1345 			dev_hold(dev);
1346 			idev = in6_dev_get(dev);
1347 			if (!idev) {
1348 				err = -ENODEV;
1349 				goto out;
1350 			}
1351 		}
1352 		rt->dst.output = ip6_pkt_discard_out;
1353 		rt->dst.input = ip6_pkt_discard;
1354 		rt->dst.error = -ENETUNREACH;
1355 		rt->rt6i_flags = RTF_REJECT|RTF_NONEXTHOP;
1356 		goto install_route;
1357 	}
1358 
1359 	if (cfg->fc_flags & RTF_GATEWAY) {
1360 		const struct in6_addr *gw_addr;
1361 		int gwa_type;
1362 
1363 		gw_addr = &cfg->fc_gateway;
1364 		rt->rt6i_gateway = *gw_addr;
1365 		gwa_type = ipv6_addr_type(gw_addr);
1366 
1367 		if (gwa_type != (IPV6_ADDR_LINKLOCAL|IPV6_ADDR_UNICAST)) {
1368 			struct rt6_info *grt;
1369 
1370 			/* IPv6 strictly inhibits using not link-local
1371 			   addresses as nexthop address.
1372 			   Otherwise, router will not able to send redirects.
1373 			   It is very good, but in some (rare!) circumstances
1374 			   (SIT, PtP, NBMA NOARP links) it is handy to allow
1375 			   some exceptions. --ANK
1376 			 */
1377 			err = -EINVAL;
1378 			if (!(gwa_type & IPV6_ADDR_UNICAST))
1379 				goto out;
1380 
1381 			grt = rt6_lookup(net, gw_addr, NULL, cfg->fc_ifindex, 1);
1382 
1383 			err = -EHOSTUNREACH;
1384 			if (!grt)
1385 				goto out;
1386 			if (dev) {
1387 				if (dev != grt->dst.dev) {
1388 					dst_release(&grt->dst);
1389 					goto out;
1390 				}
1391 			} else {
1392 				dev = grt->dst.dev;
1393 				idev = grt->rt6i_idev;
1394 				dev_hold(dev);
1395 				in6_dev_hold(grt->rt6i_idev);
1396 			}
1397 			if (!(grt->rt6i_flags & RTF_GATEWAY))
1398 				err = 0;
1399 			dst_release(&grt->dst);
1400 
1401 			if (err)
1402 				goto out;
1403 		}
1404 		err = -EINVAL;
1405 		if (!dev || (dev->flags & IFF_LOOPBACK))
1406 			goto out;
1407 	}
1408 
1409 	err = -ENODEV;
1410 	if (!dev)
1411 		goto out;
1412 
1413 	if (!ipv6_addr_any(&cfg->fc_prefsrc)) {
1414 		if (!ipv6_chk_addr(net, &cfg->fc_prefsrc, dev, 0)) {
1415 			err = -EINVAL;
1416 			goto out;
1417 		}
1418 		rt->rt6i_prefsrc.addr = cfg->fc_prefsrc;
1419 		rt->rt6i_prefsrc.plen = 128;
1420 	} else
1421 		rt->rt6i_prefsrc.plen = 0;
1422 
1423 	if (cfg->fc_flags & (RTF_GATEWAY | RTF_NONEXTHOP)) {
1424 		err = rt6_bind_neighbour(rt, dev);
1425 		if (err)
1426 			goto out;
1427 	}
1428 
1429 	rt->rt6i_flags = cfg->fc_flags;
1430 
1431 install_route:
1432 	if (cfg->fc_mx) {
1433 		struct nlattr *nla;
1434 		int remaining;
1435 
1436 		nla_for_each_attr(nla, cfg->fc_mx, cfg->fc_mx_len, remaining) {
1437 			int type = nla_type(nla);
1438 
1439 			if (type) {
1440 				if (type > RTAX_MAX) {
1441 					err = -EINVAL;
1442 					goto out;
1443 				}
1444 
1445 				dst_metric_set(&rt->dst, type, nla_get_u32(nla));
1446 			}
1447 		}
1448 	}
1449 
1450 	rt->dst.dev = dev;
1451 	rt->rt6i_idev = idev;
1452 	rt->rt6i_table = table;
1453 
1454 	cfg->fc_nlinfo.nl_net = dev_net(dev);
1455 
1456 	return __ip6_ins_rt(rt, &cfg->fc_nlinfo);
1457 
1458 out:
1459 	if (dev)
1460 		dev_put(dev);
1461 	if (idev)
1462 		in6_dev_put(idev);
1463 	if (rt)
1464 		dst_free(&rt->dst);
1465 	return err;
1466 }
1467 
1468 static int __ip6_del_rt(struct rt6_info *rt, struct nl_info *info)
1469 {
1470 	int err;
1471 	struct fib6_table *table;
1472 	struct net *net = dev_net(rt->dst.dev);
1473 
1474 	if (rt == net->ipv6.ip6_null_entry)
1475 		return -ENOENT;
1476 
1477 	table = rt->rt6i_table;
1478 	write_lock_bh(&table->tb6_lock);
1479 
1480 	err = fib6_del(rt, info);
1481 	dst_release(&rt->dst);
1482 
1483 	write_unlock_bh(&table->tb6_lock);
1484 
1485 	return err;
1486 }
1487 
1488 int ip6_del_rt(struct rt6_info *rt)
1489 {
1490 	struct nl_info info = {
1491 		.nl_net = dev_net(rt->dst.dev),
1492 	};
1493 	return __ip6_del_rt(rt, &info);
1494 }
1495 
1496 static int ip6_route_del(struct fib6_config *cfg)
1497 {
1498 	struct fib6_table *table;
1499 	struct fib6_node *fn;
1500 	struct rt6_info *rt;
1501 	int err = -ESRCH;
1502 
1503 	table = fib6_get_table(cfg->fc_nlinfo.nl_net, cfg->fc_table);
1504 	if (!table)
1505 		return err;
1506 
1507 	read_lock_bh(&table->tb6_lock);
1508 
1509 	fn = fib6_locate(&table->tb6_root,
1510 			 &cfg->fc_dst, cfg->fc_dst_len,
1511 			 &cfg->fc_src, cfg->fc_src_len);
1512 
1513 	if (fn) {
1514 		for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
1515 			if (cfg->fc_ifindex &&
1516 			    (!rt->dst.dev ||
1517 			     rt->dst.dev->ifindex != cfg->fc_ifindex))
1518 				continue;
1519 			if (cfg->fc_flags & RTF_GATEWAY &&
1520 			    !ipv6_addr_equal(&cfg->fc_gateway, &rt->rt6i_gateway))
1521 				continue;
1522 			if (cfg->fc_metric && cfg->fc_metric != rt->rt6i_metric)
1523 				continue;
1524 			dst_hold(&rt->dst);
1525 			read_unlock_bh(&table->tb6_lock);
1526 
1527 			return __ip6_del_rt(rt, &cfg->fc_nlinfo);
1528 		}
1529 	}
1530 	read_unlock_bh(&table->tb6_lock);
1531 
1532 	return err;
1533 }
1534 
1535 /*
1536  *	Handle redirects
1537  */
1538 struct ip6rd_flowi {
1539 	struct flowi6 fl6;
1540 	struct in6_addr gateway;
1541 };
1542 
1543 static struct rt6_info *__ip6_route_redirect(struct net *net,
1544 					     struct fib6_table *table,
1545 					     struct flowi6 *fl6,
1546 					     int flags)
1547 {
1548 	struct ip6rd_flowi *rdfl = (struct ip6rd_flowi *)fl6;
1549 	struct rt6_info *rt;
1550 	struct fib6_node *fn;
1551 
1552 	/*
1553 	 * Get the "current" route for this destination and
1554 	 * check if the redirect has come from approriate router.
1555 	 *
1556 	 * RFC 2461 specifies that redirects should only be
1557 	 * accepted if they come from the nexthop to the target.
1558 	 * Due to the way the routes are chosen, this notion
1559 	 * is a bit fuzzy and one might need to check all possible
1560 	 * routes.
1561 	 */
1562 
1563 	read_lock_bh(&table->tb6_lock);
1564 	fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
1565 restart:
1566 	for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
1567 		/*
1568 		 * Current route is on-link; redirect is always invalid.
1569 		 *
1570 		 * Seems, previous statement is not true. It could
1571 		 * be node, which looks for us as on-link (f.e. proxy ndisc)
1572 		 * But then router serving it might decide, that we should
1573 		 * know truth 8)8) --ANK (980726).
1574 		 */
1575 		if (rt6_check_expired(rt))
1576 			continue;
1577 		if (!(rt->rt6i_flags & RTF_GATEWAY))
1578 			continue;
1579 		if (fl6->flowi6_oif != rt->dst.dev->ifindex)
1580 			continue;
1581 		if (!ipv6_addr_equal(&rdfl->gateway, &rt->rt6i_gateway))
1582 			continue;
1583 		break;
1584 	}
1585 
1586 	if (!rt)
1587 		rt = net->ipv6.ip6_null_entry;
1588 	BACKTRACK(net, &fl6->saddr);
1589 out:
1590 	dst_hold(&rt->dst);
1591 
1592 	read_unlock_bh(&table->tb6_lock);
1593 
1594 	return rt;
1595 };
1596 
1597 static struct rt6_info *ip6_route_redirect(const struct in6_addr *dest,
1598 					   const struct in6_addr *src,
1599 					   const struct in6_addr *gateway,
1600 					   struct net_device *dev)
1601 {
1602 	int flags = RT6_LOOKUP_F_HAS_SADDR;
1603 	struct net *net = dev_net(dev);
1604 	struct ip6rd_flowi rdfl = {
1605 		.fl6 = {
1606 			.flowi6_oif = dev->ifindex,
1607 			.daddr = *dest,
1608 			.saddr = *src,
1609 		},
1610 	};
1611 
1612 	rdfl.gateway = *gateway;
1613 
1614 	if (rt6_need_strict(dest))
1615 		flags |= RT6_LOOKUP_F_IFACE;
1616 
1617 	return (struct rt6_info *)fib6_rule_lookup(net, &rdfl.fl6,
1618 						   flags, __ip6_route_redirect);
1619 }
1620 
1621 void rt6_redirect(const struct in6_addr *dest, const struct in6_addr *src,
1622 		  const struct in6_addr *saddr,
1623 		  struct neighbour *neigh, u8 *lladdr, int on_link)
1624 {
1625 	struct rt6_info *rt, *nrt = NULL;
1626 	struct netevent_redirect netevent;
1627 	struct net *net = dev_net(neigh->dev);
1628 
1629 	rt = ip6_route_redirect(dest, src, saddr, neigh->dev);
1630 
1631 	if (rt == net->ipv6.ip6_null_entry) {
1632 		if (net_ratelimit())
1633 			printk(KERN_DEBUG "rt6_redirect: source isn't a valid nexthop "
1634 			       "for redirect target\n");
1635 		goto out;
1636 	}
1637 
1638 	/*
1639 	 *	We have finally decided to accept it.
1640 	 */
1641 
1642 	neigh_update(neigh, lladdr, NUD_STALE,
1643 		     NEIGH_UPDATE_F_WEAK_OVERRIDE|
1644 		     NEIGH_UPDATE_F_OVERRIDE|
1645 		     (on_link ? 0 : (NEIGH_UPDATE_F_OVERRIDE_ISROUTER|
1646 				     NEIGH_UPDATE_F_ISROUTER))
1647 		     );
1648 
1649 	/*
1650 	 * Redirect received -> path was valid.
1651 	 * Look, redirects are sent only in response to data packets,
1652 	 * so that this nexthop apparently is reachable. --ANK
1653 	 */
1654 	dst_confirm(&rt->dst);
1655 
1656 	/* Duplicate redirect: silently ignore. */
1657 	if (neigh == dst_get_neighbour_noref_raw(&rt->dst))
1658 		goto out;
1659 
1660 	nrt = ip6_rt_copy(rt, dest);
1661 	if (!nrt)
1662 		goto out;
1663 
1664 	nrt->rt6i_flags = RTF_GATEWAY|RTF_UP|RTF_DYNAMIC|RTF_CACHE;
1665 	if (on_link)
1666 		nrt->rt6i_flags &= ~RTF_GATEWAY;
1667 
1668 	nrt->rt6i_gateway = *(struct in6_addr *)neigh->primary_key;
1669 	dst_set_neighbour(&nrt->dst, neigh_clone(neigh));
1670 
1671 	if (ip6_ins_rt(nrt))
1672 		goto out;
1673 
1674 	netevent.old = &rt->dst;
1675 	netevent.new = &nrt->dst;
1676 	call_netevent_notifiers(NETEVENT_REDIRECT, &netevent);
1677 
1678 	if (rt->rt6i_flags & RTF_CACHE) {
1679 		ip6_del_rt(rt);
1680 		return;
1681 	}
1682 
1683 out:
1684 	dst_release(&rt->dst);
1685 }
1686 
1687 /*
1688  *	Handle ICMP "packet too big" messages
1689  *	i.e. Path MTU discovery
1690  */
1691 
1692 static void rt6_do_pmtu_disc(const struct in6_addr *daddr, const struct in6_addr *saddr,
1693 			     struct net *net, u32 pmtu, int ifindex)
1694 {
1695 	struct rt6_info *rt, *nrt;
1696 	int allfrag = 0;
1697 again:
1698 	rt = rt6_lookup(net, daddr, saddr, ifindex, 0);
1699 	if (!rt)
1700 		return;
1701 
1702 	if (rt6_check_expired(rt)) {
1703 		ip6_del_rt(rt);
1704 		goto again;
1705 	}
1706 
1707 	if (pmtu >= dst_mtu(&rt->dst))
1708 		goto out;
1709 
1710 	if (pmtu < IPV6_MIN_MTU) {
1711 		/*
1712 		 * According to RFC2460, PMTU is set to the IPv6 Minimum Link
1713 		 * MTU (1280) and a fragment header should always be included
1714 		 * after a node receiving Too Big message reporting PMTU is
1715 		 * less than the IPv6 Minimum Link MTU.
1716 		 */
1717 		pmtu = IPV6_MIN_MTU;
1718 		allfrag = 1;
1719 	}
1720 
1721 	/* New mtu received -> path was valid.
1722 	   They are sent only in response to data packets,
1723 	   so that this nexthop apparently is reachable. --ANK
1724 	 */
1725 	dst_confirm(&rt->dst);
1726 
1727 	/* Host route. If it is static, it would be better
1728 	   not to override it, but add new one, so that
1729 	   when cache entry will expire old pmtu
1730 	   would return automatically.
1731 	 */
1732 	if (rt->rt6i_flags & RTF_CACHE) {
1733 		dst_metric_set(&rt->dst, RTAX_MTU, pmtu);
1734 		if (allfrag) {
1735 			u32 features = dst_metric(&rt->dst, RTAX_FEATURES);
1736 			features |= RTAX_FEATURE_ALLFRAG;
1737 			dst_metric_set(&rt->dst, RTAX_FEATURES, features);
1738 		}
1739 		dst_set_expires(&rt->dst, net->ipv6.sysctl.ip6_rt_mtu_expires);
1740 		rt->rt6i_flags |= RTF_MODIFIED|RTF_EXPIRES;
1741 		goto out;
1742 	}
1743 
1744 	/* Network route.
1745 	   Two cases are possible:
1746 	   1. It is connected route. Action: COW
1747 	   2. It is gatewayed route or NONEXTHOP route. Action: clone it.
1748 	 */
1749 	if (!dst_get_neighbour_noref_raw(&rt->dst) && !(rt->rt6i_flags & RTF_NONEXTHOP))
1750 		nrt = rt6_alloc_cow(rt, daddr, saddr);
1751 	else
1752 		nrt = rt6_alloc_clone(rt, daddr);
1753 
1754 	if (nrt) {
1755 		dst_metric_set(&nrt->dst, RTAX_MTU, pmtu);
1756 		if (allfrag) {
1757 			u32 features = dst_metric(&nrt->dst, RTAX_FEATURES);
1758 			features |= RTAX_FEATURE_ALLFRAG;
1759 			dst_metric_set(&nrt->dst, RTAX_FEATURES, features);
1760 		}
1761 
1762 		/* According to RFC 1981, detecting PMTU increase shouldn't be
1763 		 * happened within 5 mins, the recommended timer is 10 mins.
1764 		 * Here this route expiration time is set to ip6_rt_mtu_expires
1765 		 * which is 10 mins. After 10 mins the decreased pmtu is expired
1766 		 * and detecting PMTU increase will be automatically happened.
1767 		 */
1768 		dst_set_expires(&nrt->dst, net->ipv6.sysctl.ip6_rt_mtu_expires);
1769 		nrt->rt6i_flags |= RTF_DYNAMIC|RTF_EXPIRES;
1770 
1771 		ip6_ins_rt(nrt);
1772 	}
1773 out:
1774 	dst_release(&rt->dst);
1775 }
1776 
1777 void rt6_pmtu_discovery(const struct in6_addr *daddr, const struct in6_addr *saddr,
1778 			struct net_device *dev, u32 pmtu)
1779 {
1780 	struct net *net = dev_net(dev);
1781 
1782 	/*
1783 	 * RFC 1981 states that a node "MUST reduce the size of the packets it
1784 	 * is sending along the path" that caused the Packet Too Big message.
1785 	 * Since it's not possible in the general case to determine which
1786 	 * interface was used to send the original packet, we update the MTU
1787 	 * on the interface that will be used to send future packets. We also
1788 	 * update the MTU on the interface that received the Packet Too Big in
1789 	 * case the original packet was forced out that interface with
1790 	 * SO_BINDTODEVICE or similar. This is the next best thing to the
1791 	 * correct behaviour, which would be to update the MTU on all
1792 	 * interfaces.
1793 	 */
1794 	rt6_do_pmtu_disc(daddr, saddr, net, pmtu, 0);
1795 	rt6_do_pmtu_disc(daddr, saddr, net, pmtu, dev->ifindex);
1796 }
1797 
1798 /*
1799  *	Misc support functions
1800  */
1801 
1802 static struct rt6_info *ip6_rt_copy(const struct rt6_info *ort,
1803 				    const struct in6_addr *dest)
1804 {
1805 	struct net *net = dev_net(ort->dst.dev);
1806 	struct rt6_info *rt = ip6_dst_alloc(&net->ipv6.ip6_dst_ops,
1807 					    ort->dst.dev, 0);
1808 
1809 	if (rt) {
1810 		rt->dst.input = ort->dst.input;
1811 		rt->dst.output = ort->dst.output;
1812 		rt->dst.flags |= DST_HOST;
1813 
1814 		rt->rt6i_dst.addr = *dest;
1815 		rt->rt6i_dst.plen = 128;
1816 		dst_copy_metrics(&rt->dst, &ort->dst);
1817 		rt->dst.error = ort->dst.error;
1818 		rt->rt6i_idev = ort->rt6i_idev;
1819 		if (rt->rt6i_idev)
1820 			in6_dev_hold(rt->rt6i_idev);
1821 		rt->dst.lastuse = jiffies;
1822 		rt->dst.expires = 0;
1823 
1824 		rt->rt6i_gateway = ort->rt6i_gateway;
1825 		rt->rt6i_flags = ort->rt6i_flags & ~RTF_EXPIRES;
1826 		rt->rt6i_metric = 0;
1827 
1828 #ifdef CONFIG_IPV6_SUBTREES
1829 		memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
1830 #endif
1831 		memcpy(&rt->rt6i_prefsrc, &ort->rt6i_prefsrc, sizeof(struct rt6key));
1832 		rt->rt6i_table = ort->rt6i_table;
1833 	}
1834 	return rt;
1835 }
1836 
1837 #ifdef CONFIG_IPV6_ROUTE_INFO
1838 static struct rt6_info *rt6_get_route_info(struct net *net,
1839 					   const struct in6_addr *prefix, int prefixlen,
1840 					   const struct in6_addr *gwaddr, int ifindex)
1841 {
1842 	struct fib6_node *fn;
1843 	struct rt6_info *rt = NULL;
1844 	struct fib6_table *table;
1845 
1846 	table = fib6_get_table(net, RT6_TABLE_INFO);
1847 	if (!table)
1848 		return NULL;
1849 
1850 	write_lock_bh(&table->tb6_lock);
1851 	fn = fib6_locate(&table->tb6_root, prefix ,prefixlen, NULL, 0);
1852 	if (!fn)
1853 		goto out;
1854 
1855 	for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
1856 		if (rt->dst.dev->ifindex != ifindex)
1857 			continue;
1858 		if ((rt->rt6i_flags & (RTF_ROUTEINFO|RTF_GATEWAY)) != (RTF_ROUTEINFO|RTF_GATEWAY))
1859 			continue;
1860 		if (!ipv6_addr_equal(&rt->rt6i_gateway, gwaddr))
1861 			continue;
1862 		dst_hold(&rt->dst);
1863 		break;
1864 	}
1865 out:
1866 	write_unlock_bh(&table->tb6_lock);
1867 	return rt;
1868 }
1869 
1870 static struct rt6_info *rt6_add_route_info(struct net *net,
1871 					   const struct in6_addr *prefix, int prefixlen,
1872 					   const struct in6_addr *gwaddr, int ifindex,
1873 					   unsigned pref)
1874 {
1875 	struct fib6_config cfg = {
1876 		.fc_table	= RT6_TABLE_INFO,
1877 		.fc_metric	= IP6_RT_PRIO_USER,
1878 		.fc_ifindex	= ifindex,
1879 		.fc_dst_len	= prefixlen,
1880 		.fc_flags	= RTF_GATEWAY | RTF_ADDRCONF | RTF_ROUTEINFO |
1881 				  RTF_UP | RTF_PREF(pref),
1882 		.fc_nlinfo.pid = 0,
1883 		.fc_nlinfo.nlh = NULL,
1884 		.fc_nlinfo.nl_net = net,
1885 	};
1886 
1887 	cfg.fc_dst = *prefix;
1888 	cfg.fc_gateway = *gwaddr;
1889 
1890 	/* We should treat it as a default route if prefix length is 0. */
1891 	if (!prefixlen)
1892 		cfg.fc_flags |= RTF_DEFAULT;
1893 
1894 	ip6_route_add(&cfg);
1895 
1896 	return rt6_get_route_info(net, prefix, prefixlen, gwaddr, ifindex);
1897 }
1898 #endif
1899 
1900 struct rt6_info *rt6_get_dflt_router(const struct in6_addr *addr, struct net_device *dev)
1901 {
1902 	struct rt6_info *rt;
1903 	struct fib6_table *table;
1904 
1905 	table = fib6_get_table(dev_net(dev), RT6_TABLE_DFLT);
1906 	if (!table)
1907 		return NULL;
1908 
1909 	write_lock_bh(&table->tb6_lock);
1910 	for (rt = table->tb6_root.leaf; rt; rt=rt->dst.rt6_next) {
1911 		if (dev == rt->dst.dev &&
1912 		    ((rt->rt6i_flags & (RTF_ADDRCONF | RTF_DEFAULT)) == (RTF_ADDRCONF | RTF_DEFAULT)) &&
1913 		    ipv6_addr_equal(&rt->rt6i_gateway, addr))
1914 			break;
1915 	}
1916 	if (rt)
1917 		dst_hold(&rt->dst);
1918 	write_unlock_bh(&table->tb6_lock);
1919 	return rt;
1920 }
1921 
1922 struct rt6_info *rt6_add_dflt_router(const struct in6_addr *gwaddr,
1923 				     struct net_device *dev,
1924 				     unsigned int pref)
1925 {
1926 	struct fib6_config cfg = {
1927 		.fc_table	= RT6_TABLE_DFLT,
1928 		.fc_metric	= IP6_RT_PRIO_USER,
1929 		.fc_ifindex	= dev->ifindex,
1930 		.fc_flags	= RTF_GATEWAY | RTF_ADDRCONF | RTF_DEFAULT |
1931 				  RTF_UP | RTF_EXPIRES | RTF_PREF(pref),
1932 		.fc_nlinfo.pid = 0,
1933 		.fc_nlinfo.nlh = NULL,
1934 		.fc_nlinfo.nl_net = dev_net(dev),
1935 	};
1936 
1937 	cfg.fc_gateway = *gwaddr;
1938 
1939 	ip6_route_add(&cfg);
1940 
1941 	return rt6_get_dflt_router(gwaddr, dev);
1942 }
1943 
1944 void rt6_purge_dflt_routers(struct net *net)
1945 {
1946 	struct rt6_info *rt;
1947 	struct fib6_table *table;
1948 
1949 	/* NOTE: Keep consistent with rt6_get_dflt_router */
1950 	table = fib6_get_table(net, RT6_TABLE_DFLT);
1951 	if (!table)
1952 		return;
1953 
1954 restart:
1955 	read_lock_bh(&table->tb6_lock);
1956 	for (rt = table->tb6_root.leaf; rt; rt = rt->dst.rt6_next) {
1957 		if (rt->rt6i_flags & (RTF_DEFAULT | RTF_ADDRCONF)) {
1958 			dst_hold(&rt->dst);
1959 			read_unlock_bh(&table->tb6_lock);
1960 			ip6_del_rt(rt);
1961 			goto restart;
1962 		}
1963 	}
1964 	read_unlock_bh(&table->tb6_lock);
1965 }
1966 
1967 static void rtmsg_to_fib6_config(struct net *net,
1968 				 struct in6_rtmsg *rtmsg,
1969 				 struct fib6_config *cfg)
1970 {
1971 	memset(cfg, 0, sizeof(*cfg));
1972 
1973 	cfg->fc_table = RT6_TABLE_MAIN;
1974 	cfg->fc_ifindex = rtmsg->rtmsg_ifindex;
1975 	cfg->fc_metric = rtmsg->rtmsg_metric;
1976 	cfg->fc_expires = rtmsg->rtmsg_info;
1977 	cfg->fc_dst_len = rtmsg->rtmsg_dst_len;
1978 	cfg->fc_src_len = rtmsg->rtmsg_src_len;
1979 	cfg->fc_flags = rtmsg->rtmsg_flags;
1980 
1981 	cfg->fc_nlinfo.nl_net = net;
1982 
1983 	cfg->fc_dst = rtmsg->rtmsg_dst;
1984 	cfg->fc_src = rtmsg->rtmsg_src;
1985 	cfg->fc_gateway = rtmsg->rtmsg_gateway;
1986 }
1987 
1988 int ipv6_route_ioctl(struct net *net, unsigned int cmd, void __user *arg)
1989 {
1990 	struct fib6_config cfg;
1991 	struct in6_rtmsg rtmsg;
1992 	int err;
1993 
1994 	switch(cmd) {
1995 	case SIOCADDRT:		/* Add a route */
1996 	case SIOCDELRT:		/* Delete a route */
1997 		if (!capable(CAP_NET_ADMIN))
1998 			return -EPERM;
1999 		err = copy_from_user(&rtmsg, arg,
2000 				     sizeof(struct in6_rtmsg));
2001 		if (err)
2002 			return -EFAULT;
2003 
2004 		rtmsg_to_fib6_config(net, &rtmsg, &cfg);
2005 
2006 		rtnl_lock();
2007 		switch (cmd) {
2008 		case SIOCADDRT:
2009 			err = ip6_route_add(&cfg);
2010 			break;
2011 		case SIOCDELRT:
2012 			err = ip6_route_del(&cfg);
2013 			break;
2014 		default:
2015 			err = -EINVAL;
2016 		}
2017 		rtnl_unlock();
2018 
2019 		return err;
2020 	}
2021 
2022 	return -EINVAL;
2023 }
2024 
2025 /*
2026  *	Drop the packet on the floor
2027  */
2028 
2029 static int ip6_pkt_drop(struct sk_buff *skb, u8 code, int ipstats_mib_noroutes)
2030 {
2031 	int type;
2032 	struct dst_entry *dst = skb_dst(skb);
2033 	switch (ipstats_mib_noroutes) {
2034 	case IPSTATS_MIB_INNOROUTES:
2035 		type = ipv6_addr_type(&ipv6_hdr(skb)->daddr);
2036 		if (type == IPV6_ADDR_ANY) {
2037 			IP6_INC_STATS(dev_net(dst->dev), ip6_dst_idev(dst),
2038 				      IPSTATS_MIB_INADDRERRORS);
2039 			break;
2040 		}
2041 		/* FALLTHROUGH */
2042 	case IPSTATS_MIB_OUTNOROUTES:
2043 		IP6_INC_STATS(dev_net(dst->dev), ip6_dst_idev(dst),
2044 			      ipstats_mib_noroutes);
2045 		break;
2046 	}
2047 	icmpv6_send(skb, ICMPV6_DEST_UNREACH, code, 0);
2048 	kfree_skb(skb);
2049 	return 0;
2050 }
2051 
2052 static int ip6_pkt_discard(struct sk_buff *skb)
2053 {
2054 	return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_INNOROUTES);
2055 }
2056 
2057 static int ip6_pkt_discard_out(struct sk_buff *skb)
2058 {
2059 	skb->dev = skb_dst(skb)->dev;
2060 	return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_OUTNOROUTES);
2061 }
2062 
2063 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
2064 
2065 static int ip6_pkt_prohibit(struct sk_buff *skb)
2066 {
2067 	return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_INNOROUTES);
2068 }
2069 
2070 static int ip6_pkt_prohibit_out(struct sk_buff *skb)
2071 {
2072 	skb->dev = skb_dst(skb)->dev;
2073 	return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_OUTNOROUTES);
2074 }
2075 
2076 #endif
2077 
2078 /*
2079  *	Allocate a dst for local (unicast / anycast) address.
2080  */
2081 
2082 struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev,
2083 				    const struct in6_addr *addr,
2084 				    bool anycast)
2085 {
2086 	struct net *net = dev_net(idev->dev);
2087 	struct rt6_info *rt = ip6_dst_alloc(&net->ipv6.ip6_dst_ops,
2088 					    net->loopback_dev, 0);
2089 	int err;
2090 
2091 	if (!rt) {
2092 		if (net_ratelimit())
2093 			pr_warning("IPv6:  Maximum number of routes reached,"
2094 				   " consider increasing route/max_size.\n");
2095 		return ERR_PTR(-ENOMEM);
2096 	}
2097 
2098 	in6_dev_hold(idev);
2099 
2100 	rt->dst.flags |= DST_HOST;
2101 	rt->dst.input = ip6_input;
2102 	rt->dst.output = ip6_output;
2103 	rt->rt6i_idev = idev;
2104 	rt->dst.obsolete = -1;
2105 
2106 	rt->rt6i_flags = RTF_UP | RTF_NONEXTHOP;
2107 	if (anycast)
2108 		rt->rt6i_flags |= RTF_ANYCAST;
2109 	else
2110 		rt->rt6i_flags |= RTF_LOCAL;
2111 	err = rt6_bind_neighbour(rt, rt->dst.dev);
2112 	if (err) {
2113 		dst_free(&rt->dst);
2114 		return ERR_PTR(err);
2115 	}
2116 
2117 	rt->rt6i_dst.addr = *addr;
2118 	rt->rt6i_dst.plen = 128;
2119 	rt->rt6i_table = fib6_get_table(net, RT6_TABLE_LOCAL);
2120 
2121 	atomic_set(&rt->dst.__refcnt, 1);
2122 
2123 	return rt;
2124 }
2125 
2126 int ip6_route_get_saddr(struct net *net,
2127 			struct rt6_info *rt,
2128 			const struct in6_addr *daddr,
2129 			unsigned int prefs,
2130 			struct in6_addr *saddr)
2131 {
2132 	struct inet6_dev *idev = ip6_dst_idev((struct dst_entry*)rt);
2133 	int err = 0;
2134 	if (rt->rt6i_prefsrc.plen)
2135 		*saddr = rt->rt6i_prefsrc.addr;
2136 	else
2137 		err = ipv6_dev_get_saddr(net, idev ? idev->dev : NULL,
2138 					 daddr, prefs, saddr);
2139 	return err;
2140 }
2141 
2142 /* remove deleted ip from prefsrc entries */
2143 struct arg_dev_net_ip {
2144 	struct net_device *dev;
2145 	struct net *net;
2146 	struct in6_addr *addr;
2147 };
2148 
2149 static int fib6_remove_prefsrc(struct rt6_info *rt, void *arg)
2150 {
2151 	struct net_device *dev = ((struct arg_dev_net_ip *)arg)->dev;
2152 	struct net *net = ((struct arg_dev_net_ip *)arg)->net;
2153 	struct in6_addr *addr = ((struct arg_dev_net_ip *)arg)->addr;
2154 
2155 	if (((void *)rt->dst.dev == dev || !dev) &&
2156 	    rt != net->ipv6.ip6_null_entry &&
2157 	    ipv6_addr_equal(addr, &rt->rt6i_prefsrc.addr)) {
2158 		/* remove prefsrc entry */
2159 		rt->rt6i_prefsrc.plen = 0;
2160 	}
2161 	return 0;
2162 }
2163 
2164 void rt6_remove_prefsrc(struct inet6_ifaddr *ifp)
2165 {
2166 	struct net *net = dev_net(ifp->idev->dev);
2167 	struct arg_dev_net_ip adni = {
2168 		.dev = ifp->idev->dev,
2169 		.net = net,
2170 		.addr = &ifp->addr,
2171 	};
2172 	fib6_clean_all(net, fib6_remove_prefsrc, 0, &adni);
2173 }
2174 
2175 struct arg_dev_net {
2176 	struct net_device *dev;
2177 	struct net *net;
2178 };
2179 
2180 static int fib6_ifdown(struct rt6_info *rt, void *arg)
2181 {
2182 	const struct arg_dev_net *adn = arg;
2183 	const struct net_device *dev = adn->dev;
2184 
2185 	if ((rt->dst.dev == dev || !dev) &&
2186 	    rt != adn->net->ipv6.ip6_null_entry)
2187 		return -1;
2188 
2189 	return 0;
2190 }
2191 
2192 void rt6_ifdown(struct net *net, struct net_device *dev)
2193 {
2194 	struct arg_dev_net adn = {
2195 		.dev = dev,
2196 		.net = net,
2197 	};
2198 
2199 	fib6_clean_all(net, fib6_ifdown, 0, &adn);
2200 	icmp6_clean_all(fib6_ifdown, &adn);
2201 }
2202 
2203 struct rt6_mtu_change_arg
2204 {
2205 	struct net_device *dev;
2206 	unsigned mtu;
2207 };
2208 
2209 static int rt6_mtu_change_route(struct rt6_info *rt, void *p_arg)
2210 {
2211 	struct rt6_mtu_change_arg *arg = (struct rt6_mtu_change_arg *) p_arg;
2212 	struct inet6_dev *idev;
2213 
2214 	/* In IPv6 pmtu discovery is not optional,
2215 	   so that RTAX_MTU lock cannot disable it.
2216 	   We still use this lock to block changes
2217 	   caused by addrconf/ndisc.
2218 	*/
2219 
2220 	idev = __in6_dev_get(arg->dev);
2221 	if (!idev)
2222 		return 0;
2223 
2224 	/* For administrative MTU increase, there is no way to discover
2225 	   IPv6 PMTU increase, so PMTU increase should be updated here.
2226 	   Since RFC 1981 doesn't include administrative MTU increase
2227 	   update PMTU increase is a MUST. (i.e. jumbo frame)
2228 	 */
2229 	/*
2230 	   If new MTU is less than route PMTU, this new MTU will be the
2231 	   lowest MTU in the path, update the route PMTU to reflect PMTU
2232 	   decreases; if new MTU is greater than route PMTU, and the
2233 	   old MTU is the lowest MTU in the path, update the route PMTU
2234 	   to reflect the increase. In this case if the other nodes' MTU
2235 	   also have the lowest MTU, TOO BIG MESSAGE will be lead to
2236 	   PMTU discouvery.
2237 	 */
2238 	if (rt->dst.dev == arg->dev &&
2239 	    !dst_metric_locked(&rt->dst, RTAX_MTU) &&
2240 	    (dst_mtu(&rt->dst) >= arg->mtu ||
2241 	     (dst_mtu(&rt->dst) < arg->mtu &&
2242 	      dst_mtu(&rt->dst) == idev->cnf.mtu6))) {
2243 		dst_metric_set(&rt->dst, RTAX_MTU, arg->mtu);
2244 	}
2245 	return 0;
2246 }
2247 
2248 void rt6_mtu_change(struct net_device *dev, unsigned mtu)
2249 {
2250 	struct rt6_mtu_change_arg arg = {
2251 		.dev = dev,
2252 		.mtu = mtu,
2253 	};
2254 
2255 	fib6_clean_all(dev_net(dev), rt6_mtu_change_route, 0, &arg);
2256 }
2257 
2258 static const struct nla_policy rtm_ipv6_policy[RTA_MAX+1] = {
2259 	[RTA_GATEWAY]           = { .len = sizeof(struct in6_addr) },
2260 	[RTA_OIF]               = { .type = NLA_U32 },
2261 	[RTA_IIF]		= { .type = NLA_U32 },
2262 	[RTA_PRIORITY]          = { .type = NLA_U32 },
2263 	[RTA_METRICS]           = { .type = NLA_NESTED },
2264 };
2265 
2266 static int rtm_to_fib6_config(struct sk_buff *skb, struct nlmsghdr *nlh,
2267 			      struct fib6_config *cfg)
2268 {
2269 	struct rtmsg *rtm;
2270 	struct nlattr *tb[RTA_MAX+1];
2271 	int err;
2272 
2273 	err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy);
2274 	if (err < 0)
2275 		goto errout;
2276 
2277 	err = -EINVAL;
2278 	rtm = nlmsg_data(nlh);
2279 	memset(cfg, 0, sizeof(*cfg));
2280 
2281 	cfg->fc_table = rtm->rtm_table;
2282 	cfg->fc_dst_len = rtm->rtm_dst_len;
2283 	cfg->fc_src_len = rtm->rtm_src_len;
2284 	cfg->fc_flags = RTF_UP;
2285 	cfg->fc_protocol = rtm->rtm_protocol;
2286 
2287 	if (rtm->rtm_type == RTN_UNREACHABLE)
2288 		cfg->fc_flags |= RTF_REJECT;
2289 
2290 	if (rtm->rtm_type == RTN_LOCAL)
2291 		cfg->fc_flags |= RTF_LOCAL;
2292 
2293 	cfg->fc_nlinfo.pid = NETLINK_CB(skb).pid;
2294 	cfg->fc_nlinfo.nlh = nlh;
2295 	cfg->fc_nlinfo.nl_net = sock_net(skb->sk);
2296 
2297 	if (tb[RTA_GATEWAY]) {
2298 		nla_memcpy(&cfg->fc_gateway, tb[RTA_GATEWAY], 16);
2299 		cfg->fc_flags |= RTF_GATEWAY;
2300 	}
2301 
2302 	if (tb[RTA_DST]) {
2303 		int plen = (rtm->rtm_dst_len + 7) >> 3;
2304 
2305 		if (nla_len(tb[RTA_DST]) < plen)
2306 			goto errout;
2307 
2308 		nla_memcpy(&cfg->fc_dst, tb[RTA_DST], plen);
2309 	}
2310 
2311 	if (tb[RTA_SRC]) {
2312 		int plen = (rtm->rtm_src_len + 7) >> 3;
2313 
2314 		if (nla_len(tb[RTA_SRC]) < plen)
2315 			goto errout;
2316 
2317 		nla_memcpy(&cfg->fc_src, tb[RTA_SRC], plen);
2318 	}
2319 
2320 	if (tb[RTA_PREFSRC])
2321 		nla_memcpy(&cfg->fc_prefsrc, tb[RTA_PREFSRC], 16);
2322 
2323 	if (tb[RTA_OIF])
2324 		cfg->fc_ifindex = nla_get_u32(tb[RTA_OIF]);
2325 
2326 	if (tb[RTA_PRIORITY])
2327 		cfg->fc_metric = nla_get_u32(tb[RTA_PRIORITY]);
2328 
2329 	if (tb[RTA_METRICS]) {
2330 		cfg->fc_mx = nla_data(tb[RTA_METRICS]);
2331 		cfg->fc_mx_len = nla_len(tb[RTA_METRICS]);
2332 	}
2333 
2334 	if (tb[RTA_TABLE])
2335 		cfg->fc_table = nla_get_u32(tb[RTA_TABLE]);
2336 
2337 	err = 0;
2338 errout:
2339 	return err;
2340 }
2341 
2342 static int inet6_rtm_delroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
2343 {
2344 	struct fib6_config cfg;
2345 	int err;
2346 
2347 	err = rtm_to_fib6_config(skb, nlh, &cfg);
2348 	if (err < 0)
2349 		return err;
2350 
2351 	return ip6_route_del(&cfg);
2352 }
2353 
2354 static int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
2355 {
2356 	struct fib6_config cfg;
2357 	int err;
2358 
2359 	err = rtm_to_fib6_config(skb, nlh, &cfg);
2360 	if (err < 0)
2361 		return err;
2362 
2363 	return ip6_route_add(&cfg);
2364 }
2365 
2366 static inline size_t rt6_nlmsg_size(void)
2367 {
2368 	return NLMSG_ALIGN(sizeof(struct rtmsg))
2369 	       + nla_total_size(16) /* RTA_SRC */
2370 	       + nla_total_size(16) /* RTA_DST */
2371 	       + nla_total_size(16) /* RTA_GATEWAY */
2372 	       + nla_total_size(16) /* RTA_PREFSRC */
2373 	       + nla_total_size(4) /* RTA_TABLE */
2374 	       + nla_total_size(4) /* RTA_IIF */
2375 	       + nla_total_size(4) /* RTA_OIF */
2376 	       + nla_total_size(4) /* RTA_PRIORITY */
2377 	       + RTAX_MAX * nla_total_size(4) /* RTA_METRICS */
2378 	       + nla_total_size(sizeof(struct rta_cacheinfo));
2379 }
2380 
2381 static int rt6_fill_node(struct net *net,
2382 			 struct sk_buff *skb, struct rt6_info *rt,
2383 			 struct in6_addr *dst, struct in6_addr *src,
2384 			 int iif, int type, u32 pid, u32 seq,
2385 			 int prefix, int nowait, unsigned int flags)
2386 {
2387 	const struct inet_peer *peer;
2388 	struct rtmsg *rtm;
2389 	struct nlmsghdr *nlh;
2390 	long expires;
2391 	u32 table;
2392 	struct neighbour *n;
2393 	u32 ts, tsage;
2394 
2395 	if (prefix) {	/* user wants prefix routes only */
2396 		if (!(rt->rt6i_flags & RTF_PREFIX_RT)) {
2397 			/* success since this is not a prefix route */
2398 			return 1;
2399 		}
2400 	}
2401 
2402 	nlh = nlmsg_put(skb, pid, seq, type, sizeof(*rtm), flags);
2403 	if (!nlh)
2404 		return -EMSGSIZE;
2405 
2406 	rtm = nlmsg_data(nlh);
2407 	rtm->rtm_family = AF_INET6;
2408 	rtm->rtm_dst_len = rt->rt6i_dst.plen;
2409 	rtm->rtm_src_len = rt->rt6i_src.plen;
2410 	rtm->rtm_tos = 0;
2411 	if (rt->rt6i_table)
2412 		table = rt->rt6i_table->tb6_id;
2413 	else
2414 		table = RT6_TABLE_UNSPEC;
2415 	rtm->rtm_table = table;
2416 	NLA_PUT_U32(skb, RTA_TABLE, table);
2417 	if (rt->rt6i_flags & RTF_REJECT)
2418 		rtm->rtm_type = RTN_UNREACHABLE;
2419 	else if (rt->rt6i_flags & RTF_LOCAL)
2420 		rtm->rtm_type = RTN_LOCAL;
2421 	else if (rt->dst.dev && (rt->dst.dev->flags & IFF_LOOPBACK))
2422 		rtm->rtm_type = RTN_LOCAL;
2423 	else
2424 		rtm->rtm_type = RTN_UNICAST;
2425 	rtm->rtm_flags = 0;
2426 	rtm->rtm_scope = RT_SCOPE_UNIVERSE;
2427 	rtm->rtm_protocol = rt->rt6i_protocol;
2428 	if (rt->rt6i_flags & RTF_DYNAMIC)
2429 		rtm->rtm_protocol = RTPROT_REDIRECT;
2430 	else if (rt->rt6i_flags & RTF_ADDRCONF)
2431 		rtm->rtm_protocol = RTPROT_KERNEL;
2432 	else if (rt->rt6i_flags & RTF_DEFAULT)
2433 		rtm->rtm_protocol = RTPROT_RA;
2434 
2435 	if (rt->rt6i_flags & RTF_CACHE)
2436 		rtm->rtm_flags |= RTM_F_CLONED;
2437 
2438 	if (dst) {
2439 		NLA_PUT(skb, RTA_DST, 16, dst);
2440 		rtm->rtm_dst_len = 128;
2441 	} else if (rtm->rtm_dst_len)
2442 		NLA_PUT(skb, RTA_DST, 16, &rt->rt6i_dst.addr);
2443 #ifdef CONFIG_IPV6_SUBTREES
2444 	if (src) {
2445 		NLA_PUT(skb, RTA_SRC, 16, src);
2446 		rtm->rtm_src_len = 128;
2447 	} else if (rtm->rtm_src_len)
2448 		NLA_PUT(skb, RTA_SRC, 16, &rt->rt6i_src.addr);
2449 #endif
2450 	if (iif) {
2451 #ifdef CONFIG_IPV6_MROUTE
2452 		if (ipv6_addr_is_multicast(&rt->rt6i_dst.addr)) {
2453 			int err = ip6mr_get_route(net, skb, rtm, nowait);
2454 			if (err <= 0) {
2455 				if (!nowait) {
2456 					if (err == 0)
2457 						return 0;
2458 					goto nla_put_failure;
2459 				} else {
2460 					if (err == -EMSGSIZE)
2461 						goto nla_put_failure;
2462 				}
2463 			}
2464 		} else
2465 #endif
2466 			NLA_PUT_U32(skb, RTA_IIF, iif);
2467 	} else if (dst) {
2468 		struct in6_addr saddr_buf;
2469 		if (ip6_route_get_saddr(net, rt, dst, 0, &saddr_buf) == 0)
2470 			NLA_PUT(skb, RTA_PREFSRC, 16, &saddr_buf);
2471 	}
2472 
2473 	if (rt->rt6i_prefsrc.plen) {
2474 		struct in6_addr saddr_buf;
2475 		saddr_buf = rt->rt6i_prefsrc.addr;
2476 		NLA_PUT(skb, RTA_PREFSRC, 16, &saddr_buf);
2477 	}
2478 
2479 	if (rtnetlink_put_metrics(skb, dst_metrics_ptr(&rt->dst)) < 0)
2480 		goto nla_put_failure;
2481 
2482 	rcu_read_lock();
2483 	n = dst_get_neighbour_noref(&rt->dst);
2484 	if (n) {
2485 		if (nla_put(skb, RTA_GATEWAY, 16, &n->primary_key) < 0) {
2486 			rcu_read_unlock();
2487 			goto nla_put_failure;
2488 		}
2489 	}
2490 	rcu_read_unlock();
2491 
2492 	if (rt->dst.dev)
2493 		NLA_PUT_U32(skb, RTA_OIF, rt->dst.dev->ifindex);
2494 
2495 	NLA_PUT_U32(skb, RTA_PRIORITY, rt->rt6i_metric);
2496 
2497 	if (!(rt->rt6i_flags & RTF_EXPIRES))
2498 		expires = 0;
2499 	else if (rt->dst.expires - jiffies < INT_MAX)
2500 		expires = rt->dst.expires - jiffies;
2501 	else
2502 		expires = INT_MAX;
2503 
2504 	peer = rt->rt6i_peer;
2505 	ts = tsage = 0;
2506 	if (peer && peer->tcp_ts_stamp) {
2507 		ts = peer->tcp_ts;
2508 		tsage = get_seconds() - peer->tcp_ts_stamp;
2509 	}
2510 
2511 	if (rtnl_put_cacheinfo(skb, &rt->dst, 0, ts, tsage,
2512 			       expires, rt->dst.error) < 0)
2513 		goto nla_put_failure;
2514 
2515 	return nlmsg_end(skb, nlh);
2516 
2517 nla_put_failure:
2518 	nlmsg_cancel(skb, nlh);
2519 	return -EMSGSIZE;
2520 }
2521 
2522 int rt6_dump_route(struct rt6_info *rt, void *p_arg)
2523 {
2524 	struct rt6_rtnl_dump_arg *arg = (struct rt6_rtnl_dump_arg *) p_arg;
2525 	int prefix;
2526 
2527 	if (nlmsg_len(arg->cb->nlh) >= sizeof(struct rtmsg)) {
2528 		struct rtmsg *rtm = nlmsg_data(arg->cb->nlh);
2529 		prefix = (rtm->rtm_flags & RTM_F_PREFIX) != 0;
2530 	} else
2531 		prefix = 0;
2532 
2533 	return rt6_fill_node(arg->net,
2534 		     arg->skb, rt, NULL, NULL, 0, RTM_NEWROUTE,
2535 		     NETLINK_CB(arg->cb->skb).pid, arg->cb->nlh->nlmsg_seq,
2536 		     prefix, 0, NLM_F_MULTI);
2537 }
2538 
2539 static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void *arg)
2540 {
2541 	struct net *net = sock_net(in_skb->sk);
2542 	struct nlattr *tb[RTA_MAX+1];
2543 	struct rt6_info *rt;
2544 	struct sk_buff *skb;
2545 	struct rtmsg *rtm;
2546 	struct flowi6 fl6;
2547 	int err, iif = 0, oif = 0;
2548 
2549 	err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy);
2550 	if (err < 0)
2551 		goto errout;
2552 
2553 	err = -EINVAL;
2554 	memset(&fl6, 0, sizeof(fl6));
2555 
2556 	if (tb[RTA_SRC]) {
2557 		if (nla_len(tb[RTA_SRC]) < sizeof(struct in6_addr))
2558 			goto errout;
2559 
2560 		fl6.saddr = *(struct in6_addr *)nla_data(tb[RTA_SRC]);
2561 	}
2562 
2563 	if (tb[RTA_DST]) {
2564 		if (nla_len(tb[RTA_DST]) < sizeof(struct in6_addr))
2565 			goto errout;
2566 
2567 		fl6.daddr = *(struct in6_addr *)nla_data(tb[RTA_DST]);
2568 	}
2569 
2570 	if (tb[RTA_IIF])
2571 		iif = nla_get_u32(tb[RTA_IIF]);
2572 
2573 	if (tb[RTA_OIF])
2574 		oif = nla_get_u32(tb[RTA_OIF]);
2575 
2576 	if (iif) {
2577 		struct net_device *dev;
2578 		int flags = 0;
2579 
2580 		dev = __dev_get_by_index(net, iif);
2581 		if (!dev) {
2582 			err = -ENODEV;
2583 			goto errout;
2584 		}
2585 
2586 		fl6.flowi6_iif = iif;
2587 
2588 		if (!ipv6_addr_any(&fl6.saddr))
2589 			flags |= RT6_LOOKUP_F_HAS_SADDR;
2590 
2591 		rt = (struct rt6_info *)ip6_route_input_lookup(net, dev, &fl6,
2592 							       flags);
2593 	} else {
2594 		fl6.flowi6_oif = oif;
2595 
2596 		rt = (struct rt6_info *)ip6_route_output(net, NULL, &fl6);
2597 	}
2598 
2599 	skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
2600 	if (!skb) {
2601 		err = -ENOBUFS;
2602 		goto errout;
2603 	}
2604 
2605 	/* Reserve room for dummy headers, this skb can pass
2606 	   through good chunk of routing engine.
2607 	 */
2608 	skb_reset_mac_header(skb);
2609 	skb_reserve(skb, MAX_HEADER + sizeof(struct ipv6hdr));
2610 
2611 	skb_dst_set(skb, &rt->dst);
2612 
2613 	err = rt6_fill_node(net, skb, rt, &fl6.daddr, &fl6.saddr, iif,
2614 			    RTM_NEWROUTE, NETLINK_CB(in_skb).pid,
2615 			    nlh->nlmsg_seq, 0, 0, 0);
2616 	if (err < 0) {
2617 		kfree_skb(skb);
2618 		goto errout;
2619 	}
2620 
2621 	err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).pid);
2622 errout:
2623 	return err;
2624 }
2625 
2626 void inet6_rt_notify(int event, struct rt6_info *rt, struct nl_info *info)
2627 {
2628 	struct sk_buff *skb;
2629 	struct net *net = info->nl_net;
2630 	u32 seq;
2631 	int err;
2632 
2633 	err = -ENOBUFS;
2634 	seq = info->nlh ? info->nlh->nlmsg_seq : 0;
2635 
2636 	skb = nlmsg_new(rt6_nlmsg_size(), gfp_any());
2637 	if (!skb)
2638 		goto errout;
2639 
2640 	err = rt6_fill_node(net, skb, rt, NULL, NULL, 0,
2641 				event, info->pid, seq, 0, 0, 0);
2642 	if (err < 0) {
2643 		/* -EMSGSIZE implies BUG in rt6_nlmsg_size() */
2644 		WARN_ON(err == -EMSGSIZE);
2645 		kfree_skb(skb);
2646 		goto errout;
2647 	}
2648 	rtnl_notify(skb, net, info->pid, RTNLGRP_IPV6_ROUTE,
2649 		    info->nlh, gfp_any());
2650 	return;
2651 errout:
2652 	if (err < 0)
2653 		rtnl_set_sk_err(net, RTNLGRP_IPV6_ROUTE, err);
2654 }
2655 
2656 static int ip6_route_dev_notify(struct notifier_block *this,
2657 				unsigned long event, void *data)
2658 {
2659 	struct net_device *dev = (struct net_device *)data;
2660 	struct net *net = dev_net(dev);
2661 
2662 	if (event == NETDEV_REGISTER && (dev->flags & IFF_LOOPBACK)) {
2663 		net->ipv6.ip6_null_entry->dst.dev = dev;
2664 		net->ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(dev);
2665 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
2666 		net->ipv6.ip6_prohibit_entry->dst.dev = dev;
2667 		net->ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(dev);
2668 		net->ipv6.ip6_blk_hole_entry->dst.dev = dev;
2669 		net->ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(dev);
2670 #endif
2671 	}
2672 
2673 	return NOTIFY_OK;
2674 }
2675 
2676 /*
2677  *	/proc
2678  */
2679 
2680 #ifdef CONFIG_PROC_FS
2681 
2682 struct rt6_proc_arg
2683 {
2684 	char *buffer;
2685 	int offset;
2686 	int length;
2687 	int skip;
2688 	int len;
2689 };
2690 
2691 static int rt6_info_route(struct rt6_info *rt, void *p_arg)
2692 {
2693 	struct seq_file *m = p_arg;
2694 	struct neighbour *n;
2695 
2696 	seq_printf(m, "%pi6 %02x ", &rt->rt6i_dst.addr, rt->rt6i_dst.plen);
2697 
2698 #ifdef CONFIG_IPV6_SUBTREES
2699 	seq_printf(m, "%pi6 %02x ", &rt->rt6i_src.addr, rt->rt6i_src.plen);
2700 #else
2701 	seq_puts(m, "00000000000000000000000000000000 00 ");
2702 #endif
2703 	rcu_read_lock();
2704 	n = dst_get_neighbour_noref(&rt->dst);
2705 	if (n) {
2706 		seq_printf(m, "%pi6", n->primary_key);
2707 	} else {
2708 		seq_puts(m, "00000000000000000000000000000000");
2709 	}
2710 	rcu_read_unlock();
2711 	seq_printf(m, " %08x %08x %08x %08x %8s\n",
2712 		   rt->rt6i_metric, atomic_read(&rt->dst.__refcnt),
2713 		   rt->dst.__use, rt->rt6i_flags,
2714 		   rt->dst.dev ? rt->dst.dev->name : "");
2715 	return 0;
2716 }
2717 
2718 static int ipv6_route_show(struct seq_file *m, void *v)
2719 {
2720 	struct net *net = (struct net *)m->private;
2721 	fib6_clean_all_ro(net, rt6_info_route, 0, m);
2722 	return 0;
2723 }
2724 
2725 static int ipv6_route_open(struct inode *inode, struct file *file)
2726 {
2727 	return single_open_net(inode, file, ipv6_route_show);
2728 }
2729 
2730 static const struct file_operations ipv6_route_proc_fops = {
2731 	.owner		= THIS_MODULE,
2732 	.open		= ipv6_route_open,
2733 	.read		= seq_read,
2734 	.llseek		= seq_lseek,
2735 	.release	= single_release_net,
2736 };
2737 
2738 static int rt6_stats_seq_show(struct seq_file *seq, void *v)
2739 {
2740 	struct net *net = (struct net *)seq->private;
2741 	seq_printf(seq, "%04x %04x %04x %04x %04x %04x %04x\n",
2742 		   net->ipv6.rt6_stats->fib_nodes,
2743 		   net->ipv6.rt6_stats->fib_route_nodes,
2744 		   net->ipv6.rt6_stats->fib_rt_alloc,
2745 		   net->ipv6.rt6_stats->fib_rt_entries,
2746 		   net->ipv6.rt6_stats->fib_rt_cache,
2747 		   dst_entries_get_slow(&net->ipv6.ip6_dst_ops),
2748 		   net->ipv6.rt6_stats->fib_discarded_routes);
2749 
2750 	return 0;
2751 }
2752 
2753 static int rt6_stats_seq_open(struct inode *inode, struct file *file)
2754 {
2755 	return single_open_net(inode, file, rt6_stats_seq_show);
2756 }
2757 
2758 static const struct file_operations rt6_stats_seq_fops = {
2759 	.owner	 = THIS_MODULE,
2760 	.open	 = rt6_stats_seq_open,
2761 	.read	 = seq_read,
2762 	.llseek	 = seq_lseek,
2763 	.release = single_release_net,
2764 };
2765 #endif	/* CONFIG_PROC_FS */
2766 
2767 #ifdef CONFIG_SYSCTL
2768 
2769 static
2770 int ipv6_sysctl_rtcache_flush(ctl_table *ctl, int write,
2771 			      void __user *buffer, size_t *lenp, loff_t *ppos)
2772 {
2773 	struct net *net;
2774 	int delay;
2775 	if (!write)
2776 		return -EINVAL;
2777 
2778 	net = (struct net *)ctl->extra1;
2779 	delay = net->ipv6.sysctl.flush_delay;
2780 	proc_dointvec(ctl, write, buffer, lenp, ppos);
2781 	fib6_run_gc(delay <= 0 ? ~0UL : (unsigned long)delay, net);
2782 	return 0;
2783 }
2784 
2785 ctl_table ipv6_route_table_template[] = {
2786 	{
2787 		.procname	=	"flush",
2788 		.data		=	&init_net.ipv6.sysctl.flush_delay,
2789 		.maxlen		=	sizeof(int),
2790 		.mode		=	0200,
2791 		.proc_handler	=	ipv6_sysctl_rtcache_flush
2792 	},
2793 	{
2794 		.procname	=	"gc_thresh",
2795 		.data		=	&ip6_dst_ops_template.gc_thresh,
2796 		.maxlen		=	sizeof(int),
2797 		.mode		=	0644,
2798 		.proc_handler	=	proc_dointvec,
2799 	},
2800 	{
2801 		.procname	=	"max_size",
2802 		.data		=	&init_net.ipv6.sysctl.ip6_rt_max_size,
2803 		.maxlen		=	sizeof(int),
2804 		.mode		=	0644,
2805 		.proc_handler	=	proc_dointvec,
2806 	},
2807 	{
2808 		.procname	=	"gc_min_interval",
2809 		.data		=	&init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
2810 		.maxlen		=	sizeof(int),
2811 		.mode		=	0644,
2812 		.proc_handler	=	proc_dointvec_jiffies,
2813 	},
2814 	{
2815 		.procname	=	"gc_timeout",
2816 		.data		=	&init_net.ipv6.sysctl.ip6_rt_gc_timeout,
2817 		.maxlen		=	sizeof(int),
2818 		.mode		=	0644,
2819 		.proc_handler	=	proc_dointvec_jiffies,
2820 	},
2821 	{
2822 		.procname	=	"gc_interval",
2823 		.data		=	&init_net.ipv6.sysctl.ip6_rt_gc_interval,
2824 		.maxlen		=	sizeof(int),
2825 		.mode		=	0644,
2826 		.proc_handler	=	proc_dointvec_jiffies,
2827 	},
2828 	{
2829 		.procname	=	"gc_elasticity",
2830 		.data		=	&init_net.ipv6.sysctl.ip6_rt_gc_elasticity,
2831 		.maxlen		=	sizeof(int),
2832 		.mode		=	0644,
2833 		.proc_handler	=	proc_dointvec,
2834 	},
2835 	{
2836 		.procname	=	"mtu_expires",
2837 		.data		=	&init_net.ipv6.sysctl.ip6_rt_mtu_expires,
2838 		.maxlen		=	sizeof(int),
2839 		.mode		=	0644,
2840 		.proc_handler	=	proc_dointvec_jiffies,
2841 	},
2842 	{
2843 		.procname	=	"min_adv_mss",
2844 		.data		=	&init_net.ipv6.sysctl.ip6_rt_min_advmss,
2845 		.maxlen		=	sizeof(int),
2846 		.mode		=	0644,
2847 		.proc_handler	=	proc_dointvec,
2848 	},
2849 	{
2850 		.procname	=	"gc_min_interval_ms",
2851 		.data		=	&init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
2852 		.maxlen		=	sizeof(int),
2853 		.mode		=	0644,
2854 		.proc_handler	=	proc_dointvec_ms_jiffies,
2855 	},
2856 	{ }
2857 };
2858 
2859 struct ctl_table * __net_init ipv6_route_sysctl_init(struct net *net)
2860 {
2861 	struct ctl_table *table;
2862 
2863 	table = kmemdup(ipv6_route_table_template,
2864 			sizeof(ipv6_route_table_template),
2865 			GFP_KERNEL);
2866 
2867 	if (table) {
2868 		table[0].data = &net->ipv6.sysctl.flush_delay;
2869 		table[0].extra1 = net;
2870 		table[1].data = &net->ipv6.ip6_dst_ops.gc_thresh;
2871 		table[2].data = &net->ipv6.sysctl.ip6_rt_max_size;
2872 		table[3].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
2873 		table[4].data = &net->ipv6.sysctl.ip6_rt_gc_timeout;
2874 		table[5].data = &net->ipv6.sysctl.ip6_rt_gc_interval;
2875 		table[6].data = &net->ipv6.sysctl.ip6_rt_gc_elasticity;
2876 		table[7].data = &net->ipv6.sysctl.ip6_rt_mtu_expires;
2877 		table[8].data = &net->ipv6.sysctl.ip6_rt_min_advmss;
2878 		table[9].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
2879 	}
2880 
2881 	return table;
2882 }
2883 #endif
2884 
2885 static int __net_init ip6_route_net_init(struct net *net)
2886 {
2887 	int ret = -ENOMEM;
2888 
2889 	memcpy(&net->ipv6.ip6_dst_ops, &ip6_dst_ops_template,
2890 	       sizeof(net->ipv6.ip6_dst_ops));
2891 
2892 	if (dst_entries_init(&net->ipv6.ip6_dst_ops) < 0)
2893 		goto out_ip6_dst_ops;
2894 
2895 	net->ipv6.ip6_null_entry = kmemdup(&ip6_null_entry_template,
2896 					   sizeof(*net->ipv6.ip6_null_entry),
2897 					   GFP_KERNEL);
2898 	if (!net->ipv6.ip6_null_entry)
2899 		goto out_ip6_dst_entries;
2900 	net->ipv6.ip6_null_entry->dst.path =
2901 		(struct dst_entry *)net->ipv6.ip6_null_entry;
2902 	net->ipv6.ip6_null_entry->dst.ops = &net->ipv6.ip6_dst_ops;
2903 	dst_init_metrics(&net->ipv6.ip6_null_entry->dst,
2904 			 ip6_template_metrics, true);
2905 
2906 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
2907 	net->ipv6.ip6_prohibit_entry = kmemdup(&ip6_prohibit_entry_template,
2908 					       sizeof(*net->ipv6.ip6_prohibit_entry),
2909 					       GFP_KERNEL);
2910 	if (!net->ipv6.ip6_prohibit_entry)
2911 		goto out_ip6_null_entry;
2912 	net->ipv6.ip6_prohibit_entry->dst.path =
2913 		(struct dst_entry *)net->ipv6.ip6_prohibit_entry;
2914 	net->ipv6.ip6_prohibit_entry->dst.ops = &net->ipv6.ip6_dst_ops;
2915 	dst_init_metrics(&net->ipv6.ip6_prohibit_entry->dst,
2916 			 ip6_template_metrics, true);
2917 
2918 	net->ipv6.ip6_blk_hole_entry = kmemdup(&ip6_blk_hole_entry_template,
2919 					       sizeof(*net->ipv6.ip6_blk_hole_entry),
2920 					       GFP_KERNEL);
2921 	if (!net->ipv6.ip6_blk_hole_entry)
2922 		goto out_ip6_prohibit_entry;
2923 	net->ipv6.ip6_blk_hole_entry->dst.path =
2924 		(struct dst_entry *)net->ipv6.ip6_blk_hole_entry;
2925 	net->ipv6.ip6_blk_hole_entry->dst.ops = &net->ipv6.ip6_dst_ops;
2926 	dst_init_metrics(&net->ipv6.ip6_blk_hole_entry->dst,
2927 			 ip6_template_metrics, true);
2928 #endif
2929 
2930 	net->ipv6.sysctl.flush_delay = 0;
2931 	net->ipv6.sysctl.ip6_rt_max_size = 4096;
2932 	net->ipv6.sysctl.ip6_rt_gc_min_interval = HZ / 2;
2933 	net->ipv6.sysctl.ip6_rt_gc_timeout = 60*HZ;
2934 	net->ipv6.sysctl.ip6_rt_gc_interval = 30*HZ;
2935 	net->ipv6.sysctl.ip6_rt_gc_elasticity = 9;
2936 	net->ipv6.sysctl.ip6_rt_mtu_expires = 10*60*HZ;
2937 	net->ipv6.sysctl.ip6_rt_min_advmss = IPV6_MIN_MTU - 20 - 40;
2938 
2939 #ifdef CONFIG_PROC_FS
2940 	proc_net_fops_create(net, "ipv6_route", 0, &ipv6_route_proc_fops);
2941 	proc_net_fops_create(net, "rt6_stats", S_IRUGO, &rt6_stats_seq_fops);
2942 #endif
2943 	net->ipv6.ip6_rt_gc_expire = 30*HZ;
2944 
2945 	ret = 0;
2946 out:
2947 	return ret;
2948 
2949 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
2950 out_ip6_prohibit_entry:
2951 	kfree(net->ipv6.ip6_prohibit_entry);
2952 out_ip6_null_entry:
2953 	kfree(net->ipv6.ip6_null_entry);
2954 #endif
2955 out_ip6_dst_entries:
2956 	dst_entries_destroy(&net->ipv6.ip6_dst_ops);
2957 out_ip6_dst_ops:
2958 	goto out;
2959 }
2960 
2961 static void __net_exit ip6_route_net_exit(struct net *net)
2962 {
2963 #ifdef CONFIG_PROC_FS
2964 	proc_net_remove(net, "ipv6_route");
2965 	proc_net_remove(net, "rt6_stats");
2966 #endif
2967 	kfree(net->ipv6.ip6_null_entry);
2968 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
2969 	kfree(net->ipv6.ip6_prohibit_entry);
2970 	kfree(net->ipv6.ip6_blk_hole_entry);
2971 #endif
2972 	dst_entries_destroy(&net->ipv6.ip6_dst_ops);
2973 }
2974 
2975 static struct pernet_operations ip6_route_net_ops = {
2976 	.init = ip6_route_net_init,
2977 	.exit = ip6_route_net_exit,
2978 };
2979 
2980 static struct notifier_block ip6_route_dev_notifier = {
2981 	.notifier_call = ip6_route_dev_notify,
2982 	.priority = 0,
2983 };
2984 
2985 int __init ip6_route_init(void)
2986 {
2987 	int ret;
2988 
2989 	ret = -ENOMEM;
2990 	ip6_dst_ops_template.kmem_cachep =
2991 		kmem_cache_create("ip6_dst_cache", sizeof(struct rt6_info), 0,
2992 				  SLAB_HWCACHE_ALIGN, NULL);
2993 	if (!ip6_dst_ops_template.kmem_cachep)
2994 		goto out;
2995 
2996 	ret = dst_entries_init(&ip6_dst_blackhole_ops);
2997 	if (ret)
2998 		goto out_kmem_cache;
2999 
3000 	ret = register_pernet_subsys(&ip6_route_net_ops);
3001 	if (ret)
3002 		goto out_dst_entries;
3003 
3004 	ip6_dst_blackhole_ops.kmem_cachep = ip6_dst_ops_template.kmem_cachep;
3005 
3006 	/* Registering of the loopback is done before this portion of code,
3007 	 * the loopback reference in rt6_info will not be taken, do it
3008 	 * manually for init_net */
3009 	init_net.ipv6.ip6_null_entry->dst.dev = init_net.loopback_dev;
3010 	init_net.ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
3011   #ifdef CONFIG_IPV6_MULTIPLE_TABLES
3012 	init_net.ipv6.ip6_prohibit_entry->dst.dev = init_net.loopback_dev;
3013 	init_net.ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
3014 	init_net.ipv6.ip6_blk_hole_entry->dst.dev = init_net.loopback_dev;
3015 	init_net.ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
3016   #endif
3017 	ret = fib6_init();
3018 	if (ret)
3019 		goto out_register_subsys;
3020 
3021 	ret = xfrm6_init();
3022 	if (ret)
3023 		goto out_fib6_init;
3024 
3025 	ret = fib6_rules_init();
3026 	if (ret)
3027 		goto xfrm6_init;
3028 
3029 	ret = -ENOBUFS;
3030 	if (__rtnl_register(PF_INET6, RTM_NEWROUTE, inet6_rtm_newroute, NULL, NULL) ||
3031 	    __rtnl_register(PF_INET6, RTM_DELROUTE, inet6_rtm_delroute, NULL, NULL) ||
3032 	    __rtnl_register(PF_INET6, RTM_GETROUTE, inet6_rtm_getroute, NULL, NULL))
3033 		goto fib6_rules_init;
3034 
3035 	ret = register_netdevice_notifier(&ip6_route_dev_notifier);
3036 	if (ret)
3037 		goto fib6_rules_init;
3038 
3039 out:
3040 	return ret;
3041 
3042 fib6_rules_init:
3043 	fib6_rules_cleanup();
3044 xfrm6_init:
3045 	xfrm6_fini();
3046 out_fib6_init:
3047 	fib6_gc_cleanup();
3048 out_register_subsys:
3049 	unregister_pernet_subsys(&ip6_route_net_ops);
3050 out_dst_entries:
3051 	dst_entries_destroy(&ip6_dst_blackhole_ops);
3052 out_kmem_cache:
3053 	kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);
3054 	goto out;
3055 }
3056 
3057 void ip6_route_cleanup(void)
3058 {
3059 	unregister_netdevice_notifier(&ip6_route_dev_notifier);
3060 	fib6_rules_cleanup();
3061 	xfrm6_fini();
3062 	fib6_gc_cleanup();
3063 	unregister_pernet_subsys(&ip6_route_net_ops);
3064 	dst_entries_destroy(&ip6_dst_blackhole_ops);
3065 	kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);
3066 }
3067