xref: /linux/net/ipv6/route.c (revision a67ff6a54095e27093ea501fb143fefe51a536c2)
1 /*
2  *	Linux INET6 implementation
3  *	FIB front-end.
4  *
5  *	Authors:
6  *	Pedro Roque		<roque@di.fc.ul.pt>
7  *
8  *	This program is free software; you can redistribute it and/or
9  *      modify it under the terms of the GNU General Public License
10  *      as published by the Free Software Foundation; either version
11  *      2 of the License, or (at your option) any later version.
12  */
13 
14 /*	Changes:
15  *
16  *	YOSHIFUJI Hideaki @USAGI
17  *		reworked default router selection.
18  *		- respect outgoing interface
19  *		- select from (probably) reachable routers (i.e.
20  *		routers in REACHABLE, STALE, DELAY or PROBE states).
21  *		- always select the same router if it is (probably)
22  *		reachable.  otherwise, round-robin the list.
23  *	Ville Nuorvala
24  *		Fixed routing subtrees.
25  */
26 
27 #include <linux/capability.h>
28 #include <linux/errno.h>
29 #include <linux/export.h>
30 #include <linux/types.h>
31 #include <linux/times.h>
32 #include <linux/socket.h>
33 #include <linux/sockios.h>
34 #include <linux/net.h>
35 #include <linux/route.h>
36 #include <linux/netdevice.h>
37 #include <linux/in6.h>
38 #include <linux/mroute6.h>
39 #include <linux/init.h>
40 #include <linux/if_arp.h>
41 #include <linux/proc_fs.h>
42 #include <linux/seq_file.h>
43 #include <linux/nsproxy.h>
44 #include <linux/slab.h>
45 #include <net/net_namespace.h>
46 #include <net/snmp.h>
47 #include <net/ipv6.h>
48 #include <net/ip6_fib.h>
49 #include <net/ip6_route.h>
50 #include <net/ndisc.h>
51 #include <net/addrconf.h>
52 #include <net/tcp.h>
53 #include <linux/rtnetlink.h>
54 #include <net/dst.h>
55 #include <net/xfrm.h>
56 #include <net/netevent.h>
57 #include <net/netlink.h>
58 
59 #include <asm/uaccess.h>
60 
61 #ifdef CONFIG_SYSCTL
62 #include <linux/sysctl.h>
63 #endif
64 
65 /* Set to 3 to get tracing. */
66 #define RT6_DEBUG 2
67 
68 #if RT6_DEBUG >= 3
69 #define RDBG(x) printk x
70 #define RT6_TRACE(x...) printk(KERN_DEBUG x)
71 #else
72 #define RDBG(x)
73 #define RT6_TRACE(x...) do { ; } while (0)
74 #endif
75 
76 static struct rt6_info *ip6_rt_copy(const struct rt6_info *ort,
77 				    const struct in6_addr *dest);
78 static struct dst_entry	*ip6_dst_check(struct dst_entry *dst, u32 cookie);
79 static unsigned int	 ip6_default_advmss(const struct dst_entry *dst);
80 static unsigned int	 ip6_default_mtu(const struct dst_entry *dst);
81 static struct dst_entry *ip6_negative_advice(struct dst_entry *);
82 static void		ip6_dst_destroy(struct dst_entry *);
83 static void		ip6_dst_ifdown(struct dst_entry *,
84 				       struct net_device *dev, int how);
85 static int		 ip6_dst_gc(struct dst_ops *ops);
86 
87 static int		ip6_pkt_discard(struct sk_buff *skb);
88 static int		ip6_pkt_discard_out(struct sk_buff *skb);
89 static void		ip6_link_failure(struct sk_buff *skb);
90 static void		ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu);
91 
92 #ifdef CONFIG_IPV6_ROUTE_INFO
93 static struct rt6_info *rt6_add_route_info(struct net *net,
94 					   const struct in6_addr *prefix, int prefixlen,
95 					   const struct in6_addr *gwaddr, int ifindex,
96 					   unsigned pref);
97 static struct rt6_info *rt6_get_route_info(struct net *net,
98 					   const struct in6_addr *prefix, int prefixlen,
99 					   const struct in6_addr *gwaddr, int ifindex);
100 #endif
101 
102 static u32 *ipv6_cow_metrics(struct dst_entry *dst, unsigned long old)
103 {
104 	struct rt6_info *rt = (struct rt6_info *) dst;
105 	struct inet_peer *peer;
106 	u32 *p = NULL;
107 
108 	if (!(rt->dst.flags & DST_HOST))
109 		return NULL;
110 
111 	if (!rt->rt6i_peer)
112 		rt6_bind_peer(rt, 1);
113 
114 	peer = rt->rt6i_peer;
115 	if (peer) {
116 		u32 *old_p = __DST_METRICS_PTR(old);
117 		unsigned long prev, new;
118 
119 		p = peer->metrics;
120 		if (inet_metrics_new(peer))
121 			memcpy(p, old_p, sizeof(u32) * RTAX_MAX);
122 
123 		new = (unsigned long) p;
124 		prev = cmpxchg(&dst->_metrics, old, new);
125 
126 		if (prev != old) {
127 			p = __DST_METRICS_PTR(prev);
128 			if (prev & DST_METRICS_READ_ONLY)
129 				p = NULL;
130 		}
131 	}
132 	return p;
133 }
134 
135 static struct neighbour *ip6_neigh_lookup(const struct dst_entry *dst, const void *daddr)
136 {
137 	return __neigh_lookup_errno(&nd_tbl, daddr, dst->dev);
138 }
139 
140 static struct dst_ops ip6_dst_ops_template = {
141 	.family			=	AF_INET6,
142 	.protocol		=	cpu_to_be16(ETH_P_IPV6),
143 	.gc			=	ip6_dst_gc,
144 	.gc_thresh		=	1024,
145 	.check			=	ip6_dst_check,
146 	.default_advmss		=	ip6_default_advmss,
147 	.default_mtu		=	ip6_default_mtu,
148 	.cow_metrics		=	ipv6_cow_metrics,
149 	.destroy		=	ip6_dst_destroy,
150 	.ifdown			=	ip6_dst_ifdown,
151 	.negative_advice	=	ip6_negative_advice,
152 	.link_failure		=	ip6_link_failure,
153 	.update_pmtu		=	ip6_rt_update_pmtu,
154 	.local_out		=	__ip6_local_out,
155 	.neigh_lookup		=	ip6_neigh_lookup,
156 };
157 
158 static unsigned int ip6_blackhole_default_mtu(const struct dst_entry *dst)
159 {
160 	return 0;
161 }
162 
163 static void ip6_rt_blackhole_update_pmtu(struct dst_entry *dst, u32 mtu)
164 {
165 }
166 
167 static u32 *ip6_rt_blackhole_cow_metrics(struct dst_entry *dst,
168 					 unsigned long old)
169 {
170 	return NULL;
171 }
172 
173 static struct dst_ops ip6_dst_blackhole_ops = {
174 	.family			=	AF_INET6,
175 	.protocol		=	cpu_to_be16(ETH_P_IPV6),
176 	.destroy		=	ip6_dst_destroy,
177 	.check			=	ip6_dst_check,
178 	.default_mtu		=	ip6_blackhole_default_mtu,
179 	.default_advmss		=	ip6_default_advmss,
180 	.update_pmtu		=	ip6_rt_blackhole_update_pmtu,
181 	.cow_metrics		=	ip6_rt_blackhole_cow_metrics,
182 	.neigh_lookup		=	ip6_neigh_lookup,
183 };
184 
185 static const u32 ip6_template_metrics[RTAX_MAX] = {
186 	[RTAX_HOPLIMIT - 1] = 255,
187 };
188 
189 static struct rt6_info ip6_null_entry_template = {
190 	.dst = {
191 		.__refcnt	= ATOMIC_INIT(1),
192 		.__use		= 1,
193 		.obsolete	= -1,
194 		.error		= -ENETUNREACH,
195 		.input		= ip6_pkt_discard,
196 		.output		= ip6_pkt_discard_out,
197 	},
198 	.rt6i_flags	= (RTF_REJECT | RTF_NONEXTHOP),
199 	.rt6i_protocol  = RTPROT_KERNEL,
200 	.rt6i_metric	= ~(u32) 0,
201 	.rt6i_ref	= ATOMIC_INIT(1),
202 };
203 
204 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
205 
206 static int ip6_pkt_prohibit(struct sk_buff *skb);
207 static int ip6_pkt_prohibit_out(struct sk_buff *skb);
208 
209 static struct rt6_info ip6_prohibit_entry_template = {
210 	.dst = {
211 		.__refcnt	= ATOMIC_INIT(1),
212 		.__use		= 1,
213 		.obsolete	= -1,
214 		.error		= -EACCES,
215 		.input		= ip6_pkt_prohibit,
216 		.output		= ip6_pkt_prohibit_out,
217 	},
218 	.rt6i_flags	= (RTF_REJECT | RTF_NONEXTHOP),
219 	.rt6i_protocol  = RTPROT_KERNEL,
220 	.rt6i_metric	= ~(u32) 0,
221 	.rt6i_ref	= ATOMIC_INIT(1),
222 };
223 
224 static struct rt6_info ip6_blk_hole_entry_template = {
225 	.dst = {
226 		.__refcnt	= ATOMIC_INIT(1),
227 		.__use		= 1,
228 		.obsolete	= -1,
229 		.error		= -EINVAL,
230 		.input		= dst_discard,
231 		.output		= dst_discard,
232 	},
233 	.rt6i_flags	= (RTF_REJECT | RTF_NONEXTHOP),
234 	.rt6i_protocol  = RTPROT_KERNEL,
235 	.rt6i_metric	= ~(u32) 0,
236 	.rt6i_ref	= ATOMIC_INIT(1),
237 };
238 
239 #endif
240 
241 /* allocate dst with ip6_dst_ops */
242 static inline struct rt6_info *ip6_dst_alloc(struct dst_ops *ops,
243 					     struct net_device *dev,
244 					     int flags)
245 {
246 	struct rt6_info *rt = dst_alloc(ops, dev, 0, 0, flags);
247 
248 	if (rt != NULL)
249 		memset(&rt->rt6i_table, 0,
250 			sizeof(*rt) - sizeof(struct dst_entry));
251 
252 	return rt;
253 }
254 
255 static void ip6_dst_destroy(struct dst_entry *dst)
256 {
257 	struct rt6_info *rt = (struct rt6_info *)dst;
258 	struct inet6_dev *idev = rt->rt6i_idev;
259 	struct inet_peer *peer = rt->rt6i_peer;
260 
261 	if (!(rt->dst.flags & DST_HOST))
262 		dst_destroy_metrics_generic(dst);
263 
264 	if (idev != NULL) {
265 		rt->rt6i_idev = NULL;
266 		in6_dev_put(idev);
267 	}
268 	if (peer) {
269 		rt->rt6i_peer = NULL;
270 		inet_putpeer(peer);
271 	}
272 }
273 
274 static atomic_t __rt6_peer_genid = ATOMIC_INIT(0);
275 
276 static u32 rt6_peer_genid(void)
277 {
278 	return atomic_read(&__rt6_peer_genid);
279 }
280 
281 void rt6_bind_peer(struct rt6_info *rt, int create)
282 {
283 	struct inet_peer *peer;
284 
285 	peer = inet_getpeer_v6(&rt->rt6i_dst.addr, create);
286 	if (peer && cmpxchg(&rt->rt6i_peer, NULL, peer) != NULL)
287 		inet_putpeer(peer);
288 	else
289 		rt->rt6i_peer_genid = rt6_peer_genid();
290 }
291 
292 static void ip6_dst_ifdown(struct dst_entry *dst, struct net_device *dev,
293 			   int how)
294 {
295 	struct rt6_info *rt = (struct rt6_info *)dst;
296 	struct inet6_dev *idev = rt->rt6i_idev;
297 	struct net_device *loopback_dev =
298 		dev_net(dev)->loopback_dev;
299 
300 	if (dev != loopback_dev && idev != NULL && idev->dev == dev) {
301 		struct inet6_dev *loopback_idev =
302 			in6_dev_get(loopback_dev);
303 		if (loopback_idev != NULL) {
304 			rt->rt6i_idev = loopback_idev;
305 			in6_dev_put(idev);
306 		}
307 	}
308 }
309 
310 static __inline__ int rt6_check_expired(const struct rt6_info *rt)
311 {
312 	return (rt->rt6i_flags & RTF_EXPIRES) &&
313 		time_after(jiffies, rt->rt6i_expires);
314 }
315 
316 static inline int rt6_need_strict(const struct in6_addr *daddr)
317 {
318 	return ipv6_addr_type(daddr) &
319 		(IPV6_ADDR_MULTICAST | IPV6_ADDR_LINKLOCAL | IPV6_ADDR_LOOPBACK);
320 }
321 
322 /*
323  *	Route lookup. Any table->tb6_lock is implied.
324  */
325 
326 static inline struct rt6_info *rt6_device_match(struct net *net,
327 						    struct rt6_info *rt,
328 						    const struct in6_addr *saddr,
329 						    int oif,
330 						    int flags)
331 {
332 	struct rt6_info *local = NULL;
333 	struct rt6_info *sprt;
334 
335 	if (!oif && ipv6_addr_any(saddr))
336 		goto out;
337 
338 	for (sprt = rt; sprt; sprt = sprt->dst.rt6_next) {
339 		struct net_device *dev = sprt->rt6i_dev;
340 
341 		if (oif) {
342 			if (dev->ifindex == oif)
343 				return sprt;
344 			if (dev->flags & IFF_LOOPBACK) {
345 				if (sprt->rt6i_idev == NULL ||
346 				    sprt->rt6i_idev->dev->ifindex != oif) {
347 					if (flags & RT6_LOOKUP_F_IFACE && oif)
348 						continue;
349 					if (local && (!oif ||
350 						      local->rt6i_idev->dev->ifindex == oif))
351 						continue;
352 				}
353 				local = sprt;
354 			}
355 		} else {
356 			if (ipv6_chk_addr(net, saddr, dev,
357 					  flags & RT6_LOOKUP_F_IFACE))
358 				return sprt;
359 		}
360 	}
361 
362 	if (oif) {
363 		if (local)
364 			return local;
365 
366 		if (flags & RT6_LOOKUP_F_IFACE)
367 			return net->ipv6.ip6_null_entry;
368 	}
369 out:
370 	return rt;
371 }
372 
373 #ifdef CONFIG_IPV6_ROUTER_PREF
374 static void rt6_probe(struct rt6_info *rt)
375 {
376 	struct neighbour *neigh;
377 	/*
378 	 * Okay, this does not seem to be appropriate
379 	 * for now, however, we need to check if it
380 	 * is really so; aka Router Reachability Probing.
381 	 *
382 	 * Router Reachability Probe MUST be rate-limited
383 	 * to no more than one per minute.
384 	 */
385 	rcu_read_lock();
386 	neigh = rt ? dst_get_neighbour(&rt->dst) : NULL;
387 	if (!neigh || (neigh->nud_state & NUD_VALID))
388 		goto out;
389 	read_lock_bh(&neigh->lock);
390 	if (!(neigh->nud_state & NUD_VALID) &&
391 	    time_after(jiffies, neigh->updated + rt->rt6i_idev->cnf.rtr_probe_interval)) {
392 		struct in6_addr mcaddr;
393 		struct in6_addr *target;
394 
395 		neigh->updated = jiffies;
396 		read_unlock_bh(&neigh->lock);
397 
398 		target = (struct in6_addr *)&neigh->primary_key;
399 		addrconf_addr_solict_mult(target, &mcaddr);
400 		ndisc_send_ns(rt->rt6i_dev, NULL, target, &mcaddr, NULL);
401 	} else {
402 		read_unlock_bh(&neigh->lock);
403 	}
404 out:
405 	rcu_read_unlock();
406 }
407 #else
408 static inline void rt6_probe(struct rt6_info *rt)
409 {
410 }
411 #endif
412 
413 /*
414  * Default Router Selection (RFC 2461 6.3.6)
415  */
416 static inline int rt6_check_dev(struct rt6_info *rt, int oif)
417 {
418 	struct net_device *dev = rt->rt6i_dev;
419 	if (!oif || dev->ifindex == oif)
420 		return 2;
421 	if ((dev->flags & IFF_LOOPBACK) &&
422 	    rt->rt6i_idev && rt->rt6i_idev->dev->ifindex == oif)
423 		return 1;
424 	return 0;
425 }
426 
427 static inline int rt6_check_neigh(struct rt6_info *rt)
428 {
429 	struct neighbour *neigh;
430 	int m;
431 
432 	rcu_read_lock();
433 	neigh = dst_get_neighbour(&rt->dst);
434 	if (rt->rt6i_flags & RTF_NONEXTHOP ||
435 	    !(rt->rt6i_flags & RTF_GATEWAY))
436 		m = 1;
437 	else if (neigh) {
438 		read_lock_bh(&neigh->lock);
439 		if (neigh->nud_state & NUD_VALID)
440 			m = 2;
441 #ifdef CONFIG_IPV6_ROUTER_PREF
442 		else if (neigh->nud_state & NUD_FAILED)
443 			m = 0;
444 #endif
445 		else
446 			m = 1;
447 		read_unlock_bh(&neigh->lock);
448 	} else
449 		m = 0;
450 	rcu_read_unlock();
451 	return m;
452 }
453 
454 static int rt6_score_route(struct rt6_info *rt, int oif,
455 			   int strict)
456 {
457 	int m, n;
458 
459 	m = rt6_check_dev(rt, oif);
460 	if (!m && (strict & RT6_LOOKUP_F_IFACE))
461 		return -1;
462 #ifdef CONFIG_IPV6_ROUTER_PREF
463 	m |= IPV6_DECODE_PREF(IPV6_EXTRACT_PREF(rt->rt6i_flags)) << 2;
464 #endif
465 	n = rt6_check_neigh(rt);
466 	if (!n && (strict & RT6_LOOKUP_F_REACHABLE))
467 		return -1;
468 	return m;
469 }
470 
471 static struct rt6_info *find_match(struct rt6_info *rt, int oif, int strict,
472 				   int *mpri, struct rt6_info *match)
473 {
474 	int m;
475 
476 	if (rt6_check_expired(rt))
477 		goto out;
478 
479 	m = rt6_score_route(rt, oif, strict);
480 	if (m < 0)
481 		goto out;
482 
483 	if (m > *mpri) {
484 		if (strict & RT6_LOOKUP_F_REACHABLE)
485 			rt6_probe(match);
486 		*mpri = m;
487 		match = rt;
488 	} else if (strict & RT6_LOOKUP_F_REACHABLE) {
489 		rt6_probe(rt);
490 	}
491 
492 out:
493 	return match;
494 }
495 
496 static struct rt6_info *find_rr_leaf(struct fib6_node *fn,
497 				     struct rt6_info *rr_head,
498 				     u32 metric, int oif, int strict)
499 {
500 	struct rt6_info *rt, *match;
501 	int mpri = -1;
502 
503 	match = NULL;
504 	for (rt = rr_head; rt && rt->rt6i_metric == metric;
505 	     rt = rt->dst.rt6_next)
506 		match = find_match(rt, oif, strict, &mpri, match);
507 	for (rt = fn->leaf; rt && rt != rr_head && rt->rt6i_metric == metric;
508 	     rt = rt->dst.rt6_next)
509 		match = find_match(rt, oif, strict, &mpri, match);
510 
511 	return match;
512 }
513 
514 static struct rt6_info *rt6_select(struct fib6_node *fn, int oif, int strict)
515 {
516 	struct rt6_info *match, *rt0;
517 	struct net *net;
518 
519 	RT6_TRACE("%s(fn->leaf=%p, oif=%d)\n",
520 		  __func__, fn->leaf, oif);
521 
522 	rt0 = fn->rr_ptr;
523 	if (!rt0)
524 		fn->rr_ptr = rt0 = fn->leaf;
525 
526 	match = find_rr_leaf(fn, rt0, rt0->rt6i_metric, oif, strict);
527 
528 	if (!match &&
529 	    (strict & RT6_LOOKUP_F_REACHABLE)) {
530 		struct rt6_info *next = rt0->dst.rt6_next;
531 
532 		/* no entries matched; do round-robin */
533 		if (!next || next->rt6i_metric != rt0->rt6i_metric)
534 			next = fn->leaf;
535 
536 		if (next != rt0)
537 			fn->rr_ptr = next;
538 	}
539 
540 	RT6_TRACE("%s() => %p\n",
541 		  __func__, match);
542 
543 	net = dev_net(rt0->rt6i_dev);
544 	return match ? match : net->ipv6.ip6_null_entry;
545 }
546 
547 #ifdef CONFIG_IPV6_ROUTE_INFO
548 int rt6_route_rcv(struct net_device *dev, u8 *opt, int len,
549 		  const struct in6_addr *gwaddr)
550 {
551 	struct net *net = dev_net(dev);
552 	struct route_info *rinfo = (struct route_info *) opt;
553 	struct in6_addr prefix_buf, *prefix;
554 	unsigned int pref;
555 	unsigned long lifetime;
556 	struct rt6_info *rt;
557 
558 	if (len < sizeof(struct route_info)) {
559 		return -EINVAL;
560 	}
561 
562 	/* Sanity check for prefix_len and length */
563 	if (rinfo->length > 3) {
564 		return -EINVAL;
565 	} else if (rinfo->prefix_len > 128) {
566 		return -EINVAL;
567 	} else if (rinfo->prefix_len > 64) {
568 		if (rinfo->length < 2) {
569 			return -EINVAL;
570 		}
571 	} else if (rinfo->prefix_len > 0) {
572 		if (rinfo->length < 1) {
573 			return -EINVAL;
574 		}
575 	}
576 
577 	pref = rinfo->route_pref;
578 	if (pref == ICMPV6_ROUTER_PREF_INVALID)
579 		return -EINVAL;
580 
581 	lifetime = addrconf_timeout_fixup(ntohl(rinfo->lifetime), HZ);
582 
583 	if (rinfo->length == 3)
584 		prefix = (struct in6_addr *)rinfo->prefix;
585 	else {
586 		/* this function is safe */
587 		ipv6_addr_prefix(&prefix_buf,
588 				 (struct in6_addr *)rinfo->prefix,
589 				 rinfo->prefix_len);
590 		prefix = &prefix_buf;
591 	}
592 
593 	rt = rt6_get_route_info(net, prefix, rinfo->prefix_len, gwaddr,
594 				dev->ifindex);
595 
596 	if (rt && !lifetime) {
597 		ip6_del_rt(rt);
598 		rt = NULL;
599 	}
600 
601 	if (!rt && lifetime)
602 		rt = rt6_add_route_info(net, prefix, rinfo->prefix_len, gwaddr, dev->ifindex,
603 					pref);
604 	else if (rt)
605 		rt->rt6i_flags = RTF_ROUTEINFO |
606 				 (rt->rt6i_flags & ~RTF_PREF_MASK) | RTF_PREF(pref);
607 
608 	if (rt) {
609 		if (!addrconf_finite_timeout(lifetime)) {
610 			rt->rt6i_flags &= ~RTF_EXPIRES;
611 		} else {
612 			rt->rt6i_expires = jiffies + HZ * lifetime;
613 			rt->rt6i_flags |= RTF_EXPIRES;
614 		}
615 		dst_release(&rt->dst);
616 	}
617 	return 0;
618 }
619 #endif
620 
621 #define BACKTRACK(__net, saddr)			\
622 do { \
623 	if (rt == __net->ipv6.ip6_null_entry) {	\
624 		struct fib6_node *pn; \
625 		while (1) { \
626 			if (fn->fn_flags & RTN_TL_ROOT) \
627 				goto out; \
628 			pn = fn->parent; \
629 			if (FIB6_SUBTREE(pn) && FIB6_SUBTREE(pn) != fn) \
630 				fn = fib6_lookup(FIB6_SUBTREE(pn), NULL, saddr); \
631 			else \
632 				fn = pn; \
633 			if (fn->fn_flags & RTN_RTINFO) \
634 				goto restart; \
635 		} \
636 	} \
637 } while(0)
638 
639 static struct rt6_info *ip6_pol_route_lookup(struct net *net,
640 					     struct fib6_table *table,
641 					     struct flowi6 *fl6, int flags)
642 {
643 	struct fib6_node *fn;
644 	struct rt6_info *rt;
645 
646 	read_lock_bh(&table->tb6_lock);
647 	fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
648 restart:
649 	rt = fn->leaf;
650 	rt = rt6_device_match(net, rt, &fl6->saddr, fl6->flowi6_oif, flags);
651 	BACKTRACK(net, &fl6->saddr);
652 out:
653 	dst_use(&rt->dst, jiffies);
654 	read_unlock_bh(&table->tb6_lock);
655 	return rt;
656 
657 }
658 
659 struct rt6_info *rt6_lookup(struct net *net, const struct in6_addr *daddr,
660 			    const struct in6_addr *saddr, int oif, int strict)
661 {
662 	struct flowi6 fl6 = {
663 		.flowi6_oif = oif,
664 		.daddr = *daddr,
665 	};
666 	struct dst_entry *dst;
667 	int flags = strict ? RT6_LOOKUP_F_IFACE : 0;
668 
669 	if (saddr) {
670 		memcpy(&fl6.saddr, saddr, sizeof(*saddr));
671 		flags |= RT6_LOOKUP_F_HAS_SADDR;
672 	}
673 
674 	dst = fib6_rule_lookup(net, &fl6, flags, ip6_pol_route_lookup);
675 	if (dst->error == 0)
676 		return (struct rt6_info *) dst;
677 
678 	dst_release(dst);
679 
680 	return NULL;
681 }
682 
683 EXPORT_SYMBOL(rt6_lookup);
684 
685 /* ip6_ins_rt is called with FREE table->tb6_lock.
686    It takes new route entry, the addition fails by any reason the
687    route is freed. In any case, if caller does not hold it, it may
688    be destroyed.
689  */
690 
691 static int __ip6_ins_rt(struct rt6_info *rt, struct nl_info *info)
692 {
693 	int err;
694 	struct fib6_table *table;
695 
696 	table = rt->rt6i_table;
697 	write_lock_bh(&table->tb6_lock);
698 	err = fib6_add(&table->tb6_root, rt, info);
699 	write_unlock_bh(&table->tb6_lock);
700 
701 	return err;
702 }
703 
704 int ip6_ins_rt(struct rt6_info *rt)
705 {
706 	struct nl_info info = {
707 		.nl_net = dev_net(rt->rt6i_dev),
708 	};
709 	return __ip6_ins_rt(rt, &info);
710 }
711 
712 static struct rt6_info *rt6_alloc_cow(const struct rt6_info *ort,
713 				      const struct in6_addr *daddr,
714 				      const struct in6_addr *saddr)
715 {
716 	struct rt6_info *rt;
717 
718 	/*
719 	 *	Clone the route.
720 	 */
721 
722 	rt = ip6_rt_copy(ort, daddr);
723 
724 	if (rt) {
725 		struct neighbour *neigh;
726 		int attempts = !in_softirq();
727 
728 		if (!(rt->rt6i_flags&RTF_GATEWAY)) {
729 			if (rt->rt6i_dst.plen != 128 &&
730 			    ipv6_addr_equal(&ort->rt6i_dst.addr, daddr))
731 				rt->rt6i_flags |= RTF_ANYCAST;
732 			ipv6_addr_copy(&rt->rt6i_gateway, daddr);
733 		}
734 
735 		rt->rt6i_flags |= RTF_CACHE;
736 
737 #ifdef CONFIG_IPV6_SUBTREES
738 		if (rt->rt6i_src.plen && saddr) {
739 			ipv6_addr_copy(&rt->rt6i_src.addr, saddr);
740 			rt->rt6i_src.plen = 128;
741 		}
742 #endif
743 
744 	retry:
745 		neigh = ndisc_get_neigh(rt->rt6i_dev, &rt->rt6i_gateway);
746 		if (IS_ERR(neigh)) {
747 			struct net *net = dev_net(rt->rt6i_dev);
748 			int saved_rt_min_interval =
749 				net->ipv6.sysctl.ip6_rt_gc_min_interval;
750 			int saved_rt_elasticity =
751 				net->ipv6.sysctl.ip6_rt_gc_elasticity;
752 
753 			if (attempts-- > 0) {
754 				net->ipv6.sysctl.ip6_rt_gc_elasticity = 1;
755 				net->ipv6.sysctl.ip6_rt_gc_min_interval = 0;
756 
757 				ip6_dst_gc(&net->ipv6.ip6_dst_ops);
758 
759 				net->ipv6.sysctl.ip6_rt_gc_elasticity =
760 					saved_rt_elasticity;
761 				net->ipv6.sysctl.ip6_rt_gc_min_interval =
762 					saved_rt_min_interval;
763 				goto retry;
764 			}
765 
766 			if (net_ratelimit())
767 				printk(KERN_WARNING
768 				       "ipv6: Neighbour table overflow.\n");
769 			dst_free(&rt->dst);
770 			return NULL;
771 		}
772 		dst_set_neighbour(&rt->dst, neigh);
773 
774 	}
775 
776 	return rt;
777 }
778 
779 static struct rt6_info *rt6_alloc_clone(struct rt6_info *ort,
780 					const struct in6_addr *daddr)
781 {
782 	struct rt6_info *rt = ip6_rt_copy(ort, daddr);
783 
784 	if (rt) {
785 		rt->rt6i_flags |= RTF_CACHE;
786 		dst_set_neighbour(&rt->dst, neigh_clone(dst_get_neighbour_raw(&ort->dst)));
787 	}
788 	return rt;
789 }
790 
791 static struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table, int oif,
792 				      struct flowi6 *fl6, int flags)
793 {
794 	struct fib6_node *fn;
795 	struct rt6_info *rt, *nrt;
796 	int strict = 0;
797 	int attempts = 3;
798 	int err;
799 	int reachable = net->ipv6.devconf_all->forwarding ? 0 : RT6_LOOKUP_F_REACHABLE;
800 
801 	strict |= flags & RT6_LOOKUP_F_IFACE;
802 
803 relookup:
804 	read_lock_bh(&table->tb6_lock);
805 
806 restart_2:
807 	fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
808 
809 restart:
810 	rt = rt6_select(fn, oif, strict | reachable);
811 
812 	BACKTRACK(net, &fl6->saddr);
813 	if (rt == net->ipv6.ip6_null_entry ||
814 	    rt->rt6i_flags & RTF_CACHE)
815 		goto out;
816 
817 	dst_hold(&rt->dst);
818 	read_unlock_bh(&table->tb6_lock);
819 
820 	if (!dst_get_neighbour_raw(&rt->dst) && !(rt->rt6i_flags & RTF_NONEXTHOP))
821 		nrt = rt6_alloc_cow(rt, &fl6->daddr, &fl6->saddr);
822 	else if (!(rt->dst.flags & DST_HOST))
823 		nrt = rt6_alloc_clone(rt, &fl6->daddr);
824 	else
825 		goto out2;
826 
827 	dst_release(&rt->dst);
828 	rt = nrt ? : net->ipv6.ip6_null_entry;
829 
830 	dst_hold(&rt->dst);
831 	if (nrt) {
832 		err = ip6_ins_rt(nrt);
833 		if (!err)
834 			goto out2;
835 	}
836 
837 	if (--attempts <= 0)
838 		goto out2;
839 
840 	/*
841 	 * Race condition! In the gap, when table->tb6_lock was
842 	 * released someone could insert this route.  Relookup.
843 	 */
844 	dst_release(&rt->dst);
845 	goto relookup;
846 
847 out:
848 	if (reachable) {
849 		reachable = 0;
850 		goto restart_2;
851 	}
852 	dst_hold(&rt->dst);
853 	read_unlock_bh(&table->tb6_lock);
854 out2:
855 	rt->dst.lastuse = jiffies;
856 	rt->dst.__use++;
857 
858 	return rt;
859 }
860 
861 static struct rt6_info *ip6_pol_route_input(struct net *net, struct fib6_table *table,
862 					    struct flowi6 *fl6, int flags)
863 {
864 	return ip6_pol_route(net, table, fl6->flowi6_iif, fl6, flags);
865 }
866 
867 void ip6_route_input(struct sk_buff *skb)
868 {
869 	const struct ipv6hdr *iph = ipv6_hdr(skb);
870 	struct net *net = dev_net(skb->dev);
871 	int flags = RT6_LOOKUP_F_HAS_SADDR;
872 	struct flowi6 fl6 = {
873 		.flowi6_iif = skb->dev->ifindex,
874 		.daddr = iph->daddr,
875 		.saddr = iph->saddr,
876 		.flowlabel = (* (__be32 *) iph)&IPV6_FLOWINFO_MASK,
877 		.flowi6_mark = skb->mark,
878 		.flowi6_proto = iph->nexthdr,
879 	};
880 
881 	if (rt6_need_strict(&iph->daddr) && skb->dev->type != ARPHRD_PIMREG)
882 		flags |= RT6_LOOKUP_F_IFACE;
883 
884 	skb_dst_set(skb, fib6_rule_lookup(net, &fl6, flags, ip6_pol_route_input));
885 }
886 
887 static struct rt6_info *ip6_pol_route_output(struct net *net, struct fib6_table *table,
888 					     struct flowi6 *fl6, int flags)
889 {
890 	return ip6_pol_route(net, table, fl6->flowi6_oif, fl6, flags);
891 }
892 
893 struct dst_entry * ip6_route_output(struct net *net, const struct sock *sk,
894 				    struct flowi6 *fl6)
895 {
896 	int flags = 0;
897 
898 	if ((sk && sk->sk_bound_dev_if) || rt6_need_strict(&fl6->daddr))
899 		flags |= RT6_LOOKUP_F_IFACE;
900 
901 	if (!ipv6_addr_any(&fl6->saddr))
902 		flags |= RT6_LOOKUP_F_HAS_SADDR;
903 	else if (sk)
904 		flags |= rt6_srcprefs2flags(inet6_sk(sk)->srcprefs);
905 
906 	return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_output);
907 }
908 
909 EXPORT_SYMBOL(ip6_route_output);
910 
911 struct dst_entry *ip6_blackhole_route(struct net *net, struct dst_entry *dst_orig)
912 {
913 	struct rt6_info *rt, *ort = (struct rt6_info *) dst_orig;
914 	struct dst_entry *new = NULL;
915 
916 	rt = dst_alloc(&ip6_dst_blackhole_ops, ort->dst.dev, 1, 0, 0);
917 	if (rt) {
918 		memset(&rt->rt6i_table, 0, sizeof(*rt) - sizeof(struct dst_entry));
919 
920 		new = &rt->dst;
921 
922 		new->__use = 1;
923 		new->input = dst_discard;
924 		new->output = dst_discard;
925 
926 		if (dst_metrics_read_only(&ort->dst))
927 			new->_metrics = ort->dst._metrics;
928 		else
929 			dst_copy_metrics(new, &ort->dst);
930 		rt->rt6i_idev = ort->rt6i_idev;
931 		if (rt->rt6i_idev)
932 			in6_dev_hold(rt->rt6i_idev);
933 		rt->rt6i_expires = 0;
934 
935 		ipv6_addr_copy(&rt->rt6i_gateway, &ort->rt6i_gateway);
936 		rt->rt6i_flags = ort->rt6i_flags & ~RTF_EXPIRES;
937 		rt->rt6i_metric = 0;
938 
939 		memcpy(&rt->rt6i_dst, &ort->rt6i_dst, sizeof(struct rt6key));
940 #ifdef CONFIG_IPV6_SUBTREES
941 		memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
942 #endif
943 
944 		dst_free(new);
945 	}
946 
947 	dst_release(dst_orig);
948 	return new ? new : ERR_PTR(-ENOMEM);
949 }
950 
951 /*
952  *	Destination cache support functions
953  */
954 
955 static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie)
956 {
957 	struct rt6_info *rt;
958 
959 	rt = (struct rt6_info *) dst;
960 
961 	if (rt->rt6i_node && (rt->rt6i_node->fn_sernum == cookie)) {
962 		if (rt->rt6i_peer_genid != rt6_peer_genid()) {
963 			if (!rt->rt6i_peer)
964 				rt6_bind_peer(rt, 0);
965 			rt->rt6i_peer_genid = rt6_peer_genid();
966 		}
967 		return dst;
968 	}
969 	return NULL;
970 }
971 
972 static struct dst_entry *ip6_negative_advice(struct dst_entry *dst)
973 {
974 	struct rt6_info *rt = (struct rt6_info *) dst;
975 
976 	if (rt) {
977 		if (rt->rt6i_flags & RTF_CACHE) {
978 			if (rt6_check_expired(rt)) {
979 				ip6_del_rt(rt);
980 				dst = NULL;
981 			}
982 		} else {
983 			dst_release(dst);
984 			dst = NULL;
985 		}
986 	}
987 	return dst;
988 }
989 
990 static void ip6_link_failure(struct sk_buff *skb)
991 {
992 	struct rt6_info *rt;
993 
994 	icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_ADDR_UNREACH, 0);
995 
996 	rt = (struct rt6_info *) skb_dst(skb);
997 	if (rt) {
998 		if (rt->rt6i_flags&RTF_CACHE) {
999 			dst_set_expires(&rt->dst, 0);
1000 			rt->rt6i_flags |= RTF_EXPIRES;
1001 		} else if (rt->rt6i_node && (rt->rt6i_flags & RTF_DEFAULT))
1002 			rt->rt6i_node->fn_sernum = -1;
1003 	}
1004 }
1005 
1006 static void ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu)
1007 {
1008 	struct rt6_info *rt6 = (struct rt6_info*)dst;
1009 
1010 	if (mtu < dst_mtu(dst) && rt6->rt6i_dst.plen == 128) {
1011 		rt6->rt6i_flags |= RTF_MODIFIED;
1012 		if (mtu < IPV6_MIN_MTU) {
1013 			u32 features = dst_metric(dst, RTAX_FEATURES);
1014 			mtu = IPV6_MIN_MTU;
1015 			features |= RTAX_FEATURE_ALLFRAG;
1016 			dst_metric_set(dst, RTAX_FEATURES, features);
1017 		}
1018 		dst_metric_set(dst, RTAX_MTU, mtu);
1019 	}
1020 }
1021 
1022 static unsigned int ip6_default_advmss(const struct dst_entry *dst)
1023 {
1024 	struct net_device *dev = dst->dev;
1025 	unsigned int mtu = dst_mtu(dst);
1026 	struct net *net = dev_net(dev);
1027 
1028 	mtu -= sizeof(struct ipv6hdr) + sizeof(struct tcphdr);
1029 
1030 	if (mtu < net->ipv6.sysctl.ip6_rt_min_advmss)
1031 		mtu = net->ipv6.sysctl.ip6_rt_min_advmss;
1032 
1033 	/*
1034 	 * Maximal non-jumbo IPv6 payload is IPV6_MAXPLEN and
1035 	 * corresponding MSS is IPV6_MAXPLEN - tcp_header_size.
1036 	 * IPV6_MAXPLEN is also valid and means: "any MSS,
1037 	 * rely only on pmtu discovery"
1038 	 */
1039 	if (mtu > IPV6_MAXPLEN - sizeof(struct tcphdr))
1040 		mtu = IPV6_MAXPLEN;
1041 	return mtu;
1042 }
1043 
1044 static unsigned int ip6_default_mtu(const struct dst_entry *dst)
1045 {
1046 	unsigned int mtu = IPV6_MIN_MTU;
1047 	struct inet6_dev *idev;
1048 
1049 	rcu_read_lock();
1050 	idev = __in6_dev_get(dst->dev);
1051 	if (idev)
1052 		mtu = idev->cnf.mtu6;
1053 	rcu_read_unlock();
1054 
1055 	return mtu;
1056 }
1057 
1058 static struct dst_entry *icmp6_dst_gc_list;
1059 static DEFINE_SPINLOCK(icmp6_dst_lock);
1060 
1061 struct dst_entry *icmp6_dst_alloc(struct net_device *dev,
1062 				  struct neighbour *neigh,
1063 				  const struct in6_addr *addr)
1064 {
1065 	struct rt6_info *rt;
1066 	struct inet6_dev *idev = in6_dev_get(dev);
1067 	struct net *net = dev_net(dev);
1068 
1069 	if (unlikely(idev == NULL))
1070 		return NULL;
1071 
1072 	rt = ip6_dst_alloc(&net->ipv6.ip6_dst_ops, dev, 0);
1073 	if (unlikely(rt == NULL)) {
1074 		in6_dev_put(idev);
1075 		goto out;
1076 	}
1077 
1078 	if (neigh)
1079 		neigh_hold(neigh);
1080 	else {
1081 		neigh = ndisc_get_neigh(dev, addr);
1082 		if (IS_ERR(neigh))
1083 			neigh = NULL;
1084 	}
1085 
1086 	rt->dst.flags |= DST_HOST;
1087 	rt->dst.output  = ip6_output;
1088 	dst_set_neighbour(&rt->dst, neigh);
1089 	atomic_set(&rt->dst.__refcnt, 1);
1090 	ipv6_addr_copy(&rt->rt6i_dst.addr, addr);
1091 	rt->rt6i_dst.plen = 128;
1092 	rt->rt6i_idev     = idev;
1093 	dst_metric_set(&rt->dst, RTAX_HOPLIMIT, 255);
1094 
1095 	spin_lock_bh(&icmp6_dst_lock);
1096 	rt->dst.next = icmp6_dst_gc_list;
1097 	icmp6_dst_gc_list = &rt->dst;
1098 	spin_unlock_bh(&icmp6_dst_lock);
1099 
1100 	fib6_force_start_gc(net);
1101 
1102 out:
1103 	return &rt->dst;
1104 }
1105 
1106 int icmp6_dst_gc(void)
1107 {
1108 	struct dst_entry *dst, **pprev;
1109 	int more = 0;
1110 
1111 	spin_lock_bh(&icmp6_dst_lock);
1112 	pprev = &icmp6_dst_gc_list;
1113 
1114 	while ((dst = *pprev) != NULL) {
1115 		if (!atomic_read(&dst->__refcnt)) {
1116 			*pprev = dst->next;
1117 			dst_free(dst);
1118 		} else {
1119 			pprev = &dst->next;
1120 			++more;
1121 		}
1122 	}
1123 
1124 	spin_unlock_bh(&icmp6_dst_lock);
1125 
1126 	return more;
1127 }
1128 
1129 static void icmp6_clean_all(int (*func)(struct rt6_info *rt, void *arg),
1130 			    void *arg)
1131 {
1132 	struct dst_entry *dst, **pprev;
1133 
1134 	spin_lock_bh(&icmp6_dst_lock);
1135 	pprev = &icmp6_dst_gc_list;
1136 	while ((dst = *pprev) != NULL) {
1137 		struct rt6_info *rt = (struct rt6_info *) dst;
1138 		if (func(rt, arg)) {
1139 			*pprev = dst->next;
1140 			dst_free(dst);
1141 		} else {
1142 			pprev = &dst->next;
1143 		}
1144 	}
1145 	spin_unlock_bh(&icmp6_dst_lock);
1146 }
1147 
1148 static int ip6_dst_gc(struct dst_ops *ops)
1149 {
1150 	unsigned long now = jiffies;
1151 	struct net *net = container_of(ops, struct net, ipv6.ip6_dst_ops);
1152 	int rt_min_interval = net->ipv6.sysctl.ip6_rt_gc_min_interval;
1153 	int rt_max_size = net->ipv6.sysctl.ip6_rt_max_size;
1154 	int rt_elasticity = net->ipv6.sysctl.ip6_rt_gc_elasticity;
1155 	int rt_gc_timeout = net->ipv6.sysctl.ip6_rt_gc_timeout;
1156 	unsigned long rt_last_gc = net->ipv6.ip6_rt_last_gc;
1157 	int entries;
1158 
1159 	entries = dst_entries_get_fast(ops);
1160 	if (time_after(rt_last_gc + rt_min_interval, now) &&
1161 	    entries <= rt_max_size)
1162 		goto out;
1163 
1164 	net->ipv6.ip6_rt_gc_expire++;
1165 	fib6_run_gc(net->ipv6.ip6_rt_gc_expire, net);
1166 	net->ipv6.ip6_rt_last_gc = now;
1167 	entries = dst_entries_get_slow(ops);
1168 	if (entries < ops->gc_thresh)
1169 		net->ipv6.ip6_rt_gc_expire = rt_gc_timeout>>1;
1170 out:
1171 	net->ipv6.ip6_rt_gc_expire -= net->ipv6.ip6_rt_gc_expire>>rt_elasticity;
1172 	return entries > rt_max_size;
1173 }
1174 
1175 /* Clean host part of a prefix. Not necessary in radix tree,
1176    but results in cleaner routing tables.
1177 
1178    Remove it only when all the things will work!
1179  */
1180 
1181 int ip6_dst_hoplimit(struct dst_entry *dst)
1182 {
1183 	int hoplimit = dst_metric_raw(dst, RTAX_HOPLIMIT);
1184 	if (hoplimit == 0) {
1185 		struct net_device *dev = dst->dev;
1186 		struct inet6_dev *idev;
1187 
1188 		rcu_read_lock();
1189 		idev = __in6_dev_get(dev);
1190 		if (idev)
1191 			hoplimit = idev->cnf.hop_limit;
1192 		else
1193 			hoplimit = dev_net(dev)->ipv6.devconf_all->hop_limit;
1194 		rcu_read_unlock();
1195 	}
1196 	return hoplimit;
1197 }
1198 EXPORT_SYMBOL(ip6_dst_hoplimit);
1199 
1200 /*
1201  *
1202  */
1203 
1204 int ip6_route_add(struct fib6_config *cfg)
1205 {
1206 	int err;
1207 	struct net *net = cfg->fc_nlinfo.nl_net;
1208 	struct rt6_info *rt = NULL;
1209 	struct net_device *dev = NULL;
1210 	struct inet6_dev *idev = NULL;
1211 	struct fib6_table *table;
1212 	int addr_type;
1213 
1214 	if (cfg->fc_dst_len > 128 || cfg->fc_src_len > 128)
1215 		return -EINVAL;
1216 #ifndef CONFIG_IPV6_SUBTREES
1217 	if (cfg->fc_src_len)
1218 		return -EINVAL;
1219 #endif
1220 	if (cfg->fc_ifindex) {
1221 		err = -ENODEV;
1222 		dev = dev_get_by_index(net, cfg->fc_ifindex);
1223 		if (!dev)
1224 			goto out;
1225 		idev = in6_dev_get(dev);
1226 		if (!idev)
1227 			goto out;
1228 	}
1229 
1230 	if (cfg->fc_metric == 0)
1231 		cfg->fc_metric = IP6_RT_PRIO_USER;
1232 
1233 	table = fib6_new_table(net, cfg->fc_table);
1234 	if (table == NULL) {
1235 		err = -ENOBUFS;
1236 		goto out;
1237 	}
1238 
1239 	rt = ip6_dst_alloc(&net->ipv6.ip6_dst_ops, NULL, DST_NOCOUNT);
1240 
1241 	if (rt == NULL) {
1242 		err = -ENOMEM;
1243 		goto out;
1244 	}
1245 
1246 	rt->dst.obsolete = -1;
1247 	rt->rt6i_expires = (cfg->fc_flags & RTF_EXPIRES) ?
1248 				jiffies + clock_t_to_jiffies(cfg->fc_expires) :
1249 				0;
1250 
1251 	if (cfg->fc_protocol == RTPROT_UNSPEC)
1252 		cfg->fc_protocol = RTPROT_BOOT;
1253 	rt->rt6i_protocol = cfg->fc_protocol;
1254 
1255 	addr_type = ipv6_addr_type(&cfg->fc_dst);
1256 
1257 	if (addr_type & IPV6_ADDR_MULTICAST)
1258 		rt->dst.input = ip6_mc_input;
1259 	else if (cfg->fc_flags & RTF_LOCAL)
1260 		rt->dst.input = ip6_input;
1261 	else
1262 		rt->dst.input = ip6_forward;
1263 
1264 	rt->dst.output = ip6_output;
1265 
1266 	ipv6_addr_prefix(&rt->rt6i_dst.addr, &cfg->fc_dst, cfg->fc_dst_len);
1267 	rt->rt6i_dst.plen = cfg->fc_dst_len;
1268 	if (rt->rt6i_dst.plen == 128)
1269 	       rt->dst.flags |= DST_HOST;
1270 
1271 	if (!(rt->dst.flags & DST_HOST) && cfg->fc_mx) {
1272 		u32 *metrics = kzalloc(sizeof(u32) * RTAX_MAX, GFP_KERNEL);
1273 		if (!metrics) {
1274 			err = -ENOMEM;
1275 			goto out;
1276 		}
1277 		dst_init_metrics(&rt->dst, metrics, 0);
1278 	}
1279 #ifdef CONFIG_IPV6_SUBTREES
1280 	ipv6_addr_prefix(&rt->rt6i_src.addr, &cfg->fc_src, cfg->fc_src_len);
1281 	rt->rt6i_src.plen = cfg->fc_src_len;
1282 #endif
1283 
1284 	rt->rt6i_metric = cfg->fc_metric;
1285 
1286 	/* We cannot add true routes via loopback here,
1287 	   they would result in kernel looping; promote them to reject routes
1288 	 */
1289 	if ((cfg->fc_flags & RTF_REJECT) ||
1290 	    (dev && (dev->flags&IFF_LOOPBACK) && !(addr_type&IPV6_ADDR_LOOPBACK)
1291 					      && !(cfg->fc_flags&RTF_LOCAL))) {
1292 		/* hold loopback dev/idev if we haven't done so. */
1293 		if (dev != net->loopback_dev) {
1294 			if (dev) {
1295 				dev_put(dev);
1296 				in6_dev_put(idev);
1297 			}
1298 			dev = net->loopback_dev;
1299 			dev_hold(dev);
1300 			idev = in6_dev_get(dev);
1301 			if (!idev) {
1302 				err = -ENODEV;
1303 				goto out;
1304 			}
1305 		}
1306 		rt->dst.output = ip6_pkt_discard_out;
1307 		rt->dst.input = ip6_pkt_discard;
1308 		rt->dst.error = -ENETUNREACH;
1309 		rt->rt6i_flags = RTF_REJECT|RTF_NONEXTHOP;
1310 		goto install_route;
1311 	}
1312 
1313 	if (cfg->fc_flags & RTF_GATEWAY) {
1314 		const struct in6_addr *gw_addr;
1315 		int gwa_type;
1316 
1317 		gw_addr = &cfg->fc_gateway;
1318 		ipv6_addr_copy(&rt->rt6i_gateway, gw_addr);
1319 		gwa_type = ipv6_addr_type(gw_addr);
1320 
1321 		if (gwa_type != (IPV6_ADDR_LINKLOCAL|IPV6_ADDR_UNICAST)) {
1322 			struct rt6_info *grt;
1323 
1324 			/* IPv6 strictly inhibits using not link-local
1325 			   addresses as nexthop address.
1326 			   Otherwise, router will not able to send redirects.
1327 			   It is very good, but in some (rare!) circumstances
1328 			   (SIT, PtP, NBMA NOARP links) it is handy to allow
1329 			   some exceptions. --ANK
1330 			 */
1331 			err = -EINVAL;
1332 			if (!(gwa_type&IPV6_ADDR_UNICAST))
1333 				goto out;
1334 
1335 			grt = rt6_lookup(net, gw_addr, NULL, cfg->fc_ifindex, 1);
1336 
1337 			err = -EHOSTUNREACH;
1338 			if (grt == NULL)
1339 				goto out;
1340 			if (dev) {
1341 				if (dev != grt->rt6i_dev) {
1342 					dst_release(&grt->dst);
1343 					goto out;
1344 				}
1345 			} else {
1346 				dev = grt->rt6i_dev;
1347 				idev = grt->rt6i_idev;
1348 				dev_hold(dev);
1349 				in6_dev_hold(grt->rt6i_idev);
1350 			}
1351 			if (!(grt->rt6i_flags&RTF_GATEWAY))
1352 				err = 0;
1353 			dst_release(&grt->dst);
1354 
1355 			if (err)
1356 				goto out;
1357 		}
1358 		err = -EINVAL;
1359 		if (dev == NULL || (dev->flags&IFF_LOOPBACK))
1360 			goto out;
1361 	}
1362 
1363 	err = -ENODEV;
1364 	if (dev == NULL)
1365 		goto out;
1366 
1367 	if (!ipv6_addr_any(&cfg->fc_prefsrc)) {
1368 		if (!ipv6_chk_addr(net, &cfg->fc_prefsrc, dev, 0)) {
1369 			err = -EINVAL;
1370 			goto out;
1371 		}
1372 		ipv6_addr_copy(&rt->rt6i_prefsrc.addr, &cfg->fc_prefsrc);
1373 		rt->rt6i_prefsrc.plen = 128;
1374 	} else
1375 		rt->rt6i_prefsrc.plen = 0;
1376 
1377 	if (cfg->fc_flags & (RTF_GATEWAY | RTF_NONEXTHOP)) {
1378 		struct neighbour *n = __neigh_lookup_errno(&nd_tbl, &rt->rt6i_gateway, dev);
1379 		if (IS_ERR(n)) {
1380 			err = PTR_ERR(n);
1381 			goto out;
1382 		}
1383 		dst_set_neighbour(&rt->dst, n);
1384 	}
1385 
1386 	rt->rt6i_flags = cfg->fc_flags;
1387 
1388 install_route:
1389 	if (cfg->fc_mx) {
1390 		struct nlattr *nla;
1391 		int remaining;
1392 
1393 		nla_for_each_attr(nla, cfg->fc_mx, cfg->fc_mx_len, remaining) {
1394 			int type = nla_type(nla);
1395 
1396 			if (type) {
1397 				if (type > RTAX_MAX) {
1398 					err = -EINVAL;
1399 					goto out;
1400 				}
1401 
1402 				dst_metric_set(&rt->dst, type, nla_get_u32(nla));
1403 			}
1404 		}
1405 	}
1406 
1407 	rt->dst.dev = dev;
1408 	rt->rt6i_idev = idev;
1409 	rt->rt6i_table = table;
1410 
1411 	cfg->fc_nlinfo.nl_net = dev_net(dev);
1412 
1413 	return __ip6_ins_rt(rt, &cfg->fc_nlinfo);
1414 
1415 out:
1416 	if (dev)
1417 		dev_put(dev);
1418 	if (idev)
1419 		in6_dev_put(idev);
1420 	if (rt)
1421 		dst_free(&rt->dst);
1422 	return err;
1423 }
1424 
1425 static int __ip6_del_rt(struct rt6_info *rt, struct nl_info *info)
1426 {
1427 	int err;
1428 	struct fib6_table *table;
1429 	struct net *net = dev_net(rt->rt6i_dev);
1430 
1431 	if (rt == net->ipv6.ip6_null_entry)
1432 		return -ENOENT;
1433 
1434 	table = rt->rt6i_table;
1435 	write_lock_bh(&table->tb6_lock);
1436 
1437 	err = fib6_del(rt, info);
1438 	dst_release(&rt->dst);
1439 
1440 	write_unlock_bh(&table->tb6_lock);
1441 
1442 	return err;
1443 }
1444 
1445 int ip6_del_rt(struct rt6_info *rt)
1446 {
1447 	struct nl_info info = {
1448 		.nl_net = dev_net(rt->rt6i_dev),
1449 	};
1450 	return __ip6_del_rt(rt, &info);
1451 }
1452 
1453 static int ip6_route_del(struct fib6_config *cfg)
1454 {
1455 	struct fib6_table *table;
1456 	struct fib6_node *fn;
1457 	struct rt6_info *rt;
1458 	int err = -ESRCH;
1459 
1460 	table = fib6_get_table(cfg->fc_nlinfo.nl_net, cfg->fc_table);
1461 	if (table == NULL)
1462 		return err;
1463 
1464 	read_lock_bh(&table->tb6_lock);
1465 
1466 	fn = fib6_locate(&table->tb6_root,
1467 			 &cfg->fc_dst, cfg->fc_dst_len,
1468 			 &cfg->fc_src, cfg->fc_src_len);
1469 
1470 	if (fn) {
1471 		for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
1472 			if (cfg->fc_ifindex &&
1473 			    (rt->rt6i_dev == NULL ||
1474 			     rt->rt6i_dev->ifindex != cfg->fc_ifindex))
1475 				continue;
1476 			if (cfg->fc_flags & RTF_GATEWAY &&
1477 			    !ipv6_addr_equal(&cfg->fc_gateway, &rt->rt6i_gateway))
1478 				continue;
1479 			if (cfg->fc_metric && cfg->fc_metric != rt->rt6i_metric)
1480 				continue;
1481 			dst_hold(&rt->dst);
1482 			read_unlock_bh(&table->tb6_lock);
1483 
1484 			return __ip6_del_rt(rt, &cfg->fc_nlinfo);
1485 		}
1486 	}
1487 	read_unlock_bh(&table->tb6_lock);
1488 
1489 	return err;
1490 }
1491 
1492 /*
1493  *	Handle redirects
1494  */
1495 struct ip6rd_flowi {
1496 	struct flowi6 fl6;
1497 	struct in6_addr gateway;
1498 };
1499 
1500 static struct rt6_info *__ip6_route_redirect(struct net *net,
1501 					     struct fib6_table *table,
1502 					     struct flowi6 *fl6,
1503 					     int flags)
1504 {
1505 	struct ip6rd_flowi *rdfl = (struct ip6rd_flowi *)fl6;
1506 	struct rt6_info *rt;
1507 	struct fib6_node *fn;
1508 
1509 	/*
1510 	 * Get the "current" route for this destination and
1511 	 * check if the redirect has come from approriate router.
1512 	 *
1513 	 * RFC 2461 specifies that redirects should only be
1514 	 * accepted if they come from the nexthop to the target.
1515 	 * Due to the way the routes are chosen, this notion
1516 	 * is a bit fuzzy and one might need to check all possible
1517 	 * routes.
1518 	 */
1519 
1520 	read_lock_bh(&table->tb6_lock);
1521 	fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
1522 restart:
1523 	for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
1524 		/*
1525 		 * Current route is on-link; redirect is always invalid.
1526 		 *
1527 		 * Seems, previous statement is not true. It could
1528 		 * be node, which looks for us as on-link (f.e. proxy ndisc)
1529 		 * But then router serving it might decide, that we should
1530 		 * know truth 8)8) --ANK (980726).
1531 		 */
1532 		if (rt6_check_expired(rt))
1533 			continue;
1534 		if (!(rt->rt6i_flags & RTF_GATEWAY))
1535 			continue;
1536 		if (fl6->flowi6_oif != rt->rt6i_dev->ifindex)
1537 			continue;
1538 		if (!ipv6_addr_equal(&rdfl->gateway, &rt->rt6i_gateway))
1539 			continue;
1540 		break;
1541 	}
1542 
1543 	if (!rt)
1544 		rt = net->ipv6.ip6_null_entry;
1545 	BACKTRACK(net, &fl6->saddr);
1546 out:
1547 	dst_hold(&rt->dst);
1548 
1549 	read_unlock_bh(&table->tb6_lock);
1550 
1551 	return rt;
1552 };
1553 
1554 static struct rt6_info *ip6_route_redirect(const struct in6_addr *dest,
1555 					   const struct in6_addr *src,
1556 					   const struct in6_addr *gateway,
1557 					   struct net_device *dev)
1558 {
1559 	int flags = RT6_LOOKUP_F_HAS_SADDR;
1560 	struct net *net = dev_net(dev);
1561 	struct ip6rd_flowi rdfl = {
1562 		.fl6 = {
1563 			.flowi6_oif = dev->ifindex,
1564 			.daddr = *dest,
1565 			.saddr = *src,
1566 		},
1567 	};
1568 
1569 	ipv6_addr_copy(&rdfl.gateway, gateway);
1570 
1571 	if (rt6_need_strict(dest))
1572 		flags |= RT6_LOOKUP_F_IFACE;
1573 
1574 	return (struct rt6_info *)fib6_rule_lookup(net, &rdfl.fl6,
1575 						   flags, __ip6_route_redirect);
1576 }
1577 
1578 void rt6_redirect(const struct in6_addr *dest, const struct in6_addr *src,
1579 		  const struct in6_addr *saddr,
1580 		  struct neighbour *neigh, u8 *lladdr, int on_link)
1581 {
1582 	struct rt6_info *rt, *nrt = NULL;
1583 	struct netevent_redirect netevent;
1584 	struct net *net = dev_net(neigh->dev);
1585 
1586 	rt = ip6_route_redirect(dest, src, saddr, neigh->dev);
1587 
1588 	if (rt == net->ipv6.ip6_null_entry) {
1589 		if (net_ratelimit())
1590 			printk(KERN_DEBUG "rt6_redirect: source isn't a valid nexthop "
1591 			       "for redirect target\n");
1592 		goto out;
1593 	}
1594 
1595 	/*
1596 	 *	We have finally decided to accept it.
1597 	 */
1598 
1599 	neigh_update(neigh, lladdr, NUD_STALE,
1600 		     NEIGH_UPDATE_F_WEAK_OVERRIDE|
1601 		     NEIGH_UPDATE_F_OVERRIDE|
1602 		     (on_link ? 0 : (NEIGH_UPDATE_F_OVERRIDE_ISROUTER|
1603 				     NEIGH_UPDATE_F_ISROUTER))
1604 		     );
1605 
1606 	/*
1607 	 * Redirect received -> path was valid.
1608 	 * Look, redirects are sent only in response to data packets,
1609 	 * so that this nexthop apparently is reachable. --ANK
1610 	 */
1611 	dst_confirm(&rt->dst);
1612 
1613 	/* Duplicate redirect: silently ignore. */
1614 	if (neigh == dst_get_neighbour_raw(&rt->dst))
1615 		goto out;
1616 
1617 	nrt = ip6_rt_copy(rt, dest);
1618 	if (nrt == NULL)
1619 		goto out;
1620 
1621 	nrt->rt6i_flags = RTF_GATEWAY|RTF_UP|RTF_DYNAMIC|RTF_CACHE;
1622 	if (on_link)
1623 		nrt->rt6i_flags &= ~RTF_GATEWAY;
1624 
1625 	ipv6_addr_copy(&nrt->rt6i_gateway, (struct in6_addr*)neigh->primary_key);
1626 	dst_set_neighbour(&nrt->dst, neigh_clone(neigh));
1627 
1628 	if (ip6_ins_rt(nrt))
1629 		goto out;
1630 
1631 	netevent.old = &rt->dst;
1632 	netevent.new = &nrt->dst;
1633 	call_netevent_notifiers(NETEVENT_REDIRECT, &netevent);
1634 
1635 	if (rt->rt6i_flags&RTF_CACHE) {
1636 		ip6_del_rt(rt);
1637 		return;
1638 	}
1639 
1640 out:
1641 	dst_release(&rt->dst);
1642 }
1643 
1644 /*
1645  *	Handle ICMP "packet too big" messages
1646  *	i.e. Path MTU discovery
1647  */
1648 
1649 static void rt6_do_pmtu_disc(const struct in6_addr *daddr, const struct in6_addr *saddr,
1650 			     struct net *net, u32 pmtu, int ifindex)
1651 {
1652 	struct rt6_info *rt, *nrt;
1653 	int allfrag = 0;
1654 again:
1655 	rt = rt6_lookup(net, daddr, saddr, ifindex, 0);
1656 	if (rt == NULL)
1657 		return;
1658 
1659 	if (rt6_check_expired(rt)) {
1660 		ip6_del_rt(rt);
1661 		goto again;
1662 	}
1663 
1664 	if (pmtu >= dst_mtu(&rt->dst))
1665 		goto out;
1666 
1667 	if (pmtu < IPV6_MIN_MTU) {
1668 		/*
1669 		 * According to RFC2460, PMTU is set to the IPv6 Minimum Link
1670 		 * MTU (1280) and a fragment header should always be included
1671 		 * after a node receiving Too Big message reporting PMTU is
1672 		 * less than the IPv6 Minimum Link MTU.
1673 		 */
1674 		pmtu = IPV6_MIN_MTU;
1675 		allfrag = 1;
1676 	}
1677 
1678 	/* New mtu received -> path was valid.
1679 	   They are sent only in response to data packets,
1680 	   so that this nexthop apparently is reachable. --ANK
1681 	 */
1682 	dst_confirm(&rt->dst);
1683 
1684 	/* Host route. If it is static, it would be better
1685 	   not to override it, but add new one, so that
1686 	   when cache entry will expire old pmtu
1687 	   would return automatically.
1688 	 */
1689 	if (rt->rt6i_flags & RTF_CACHE) {
1690 		dst_metric_set(&rt->dst, RTAX_MTU, pmtu);
1691 		if (allfrag) {
1692 			u32 features = dst_metric(&rt->dst, RTAX_FEATURES);
1693 			features |= RTAX_FEATURE_ALLFRAG;
1694 			dst_metric_set(&rt->dst, RTAX_FEATURES, features);
1695 		}
1696 		dst_set_expires(&rt->dst, net->ipv6.sysctl.ip6_rt_mtu_expires);
1697 		rt->rt6i_flags |= RTF_MODIFIED|RTF_EXPIRES;
1698 		goto out;
1699 	}
1700 
1701 	/* Network route.
1702 	   Two cases are possible:
1703 	   1. It is connected route. Action: COW
1704 	   2. It is gatewayed route or NONEXTHOP route. Action: clone it.
1705 	 */
1706 	if (!dst_get_neighbour_raw(&rt->dst) && !(rt->rt6i_flags & RTF_NONEXTHOP))
1707 		nrt = rt6_alloc_cow(rt, daddr, saddr);
1708 	else
1709 		nrt = rt6_alloc_clone(rt, daddr);
1710 
1711 	if (nrt) {
1712 		dst_metric_set(&nrt->dst, RTAX_MTU, pmtu);
1713 		if (allfrag) {
1714 			u32 features = dst_metric(&nrt->dst, RTAX_FEATURES);
1715 			features |= RTAX_FEATURE_ALLFRAG;
1716 			dst_metric_set(&nrt->dst, RTAX_FEATURES, features);
1717 		}
1718 
1719 		/* According to RFC 1981, detecting PMTU increase shouldn't be
1720 		 * happened within 5 mins, the recommended timer is 10 mins.
1721 		 * Here this route expiration time is set to ip6_rt_mtu_expires
1722 		 * which is 10 mins. After 10 mins the decreased pmtu is expired
1723 		 * and detecting PMTU increase will be automatically happened.
1724 		 */
1725 		dst_set_expires(&nrt->dst, net->ipv6.sysctl.ip6_rt_mtu_expires);
1726 		nrt->rt6i_flags |= RTF_DYNAMIC|RTF_EXPIRES;
1727 
1728 		ip6_ins_rt(nrt);
1729 	}
1730 out:
1731 	dst_release(&rt->dst);
1732 }
1733 
1734 void rt6_pmtu_discovery(const struct in6_addr *daddr, const struct in6_addr *saddr,
1735 			struct net_device *dev, u32 pmtu)
1736 {
1737 	struct net *net = dev_net(dev);
1738 
1739 	/*
1740 	 * RFC 1981 states that a node "MUST reduce the size of the packets it
1741 	 * is sending along the path" that caused the Packet Too Big message.
1742 	 * Since it's not possible in the general case to determine which
1743 	 * interface was used to send the original packet, we update the MTU
1744 	 * on the interface that will be used to send future packets. We also
1745 	 * update the MTU on the interface that received the Packet Too Big in
1746 	 * case the original packet was forced out that interface with
1747 	 * SO_BINDTODEVICE or similar. This is the next best thing to the
1748 	 * correct behaviour, which would be to update the MTU on all
1749 	 * interfaces.
1750 	 */
1751 	rt6_do_pmtu_disc(daddr, saddr, net, pmtu, 0);
1752 	rt6_do_pmtu_disc(daddr, saddr, net, pmtu, dev->ifindex);
1753 }
1754 
1755 /*
1756  *	Misc support functions
1757  */
1758 
1759 static struct rt6_info *ip6_rt_copy(const struct rt6_info *ort,
1760 				    const struct in6_addr *dest)
1761 {
1762 	struct net *net = dev_net(ort->rt6i_dev);
1763 	struct rt6_info *rt = ip6_dst_alloc(&net->ipv6.ip6_dst_ops,
1764 					    ort->dst.dev, 0);
1765 
1766 	if (rt) {
1767 		rt->dst.input = ort->dst.input;
1768 		rt->dst.output = ort->dst.output;
1769 		rt->dst.flags |= DST_HOST;
1770 
1771 		ipv6_addr_copy(&rt->rt6i_dst.addr, dest);
1772 		rt->rt6i_dst.plen = 128;
1773 		dst_copy_metrics(&rt->dst, &ort->dst);
1774 		rt->dst.error = ort->dst.error;
1775 		rt->rt6i_idev = ort->rt6i_idev;
1776 		if (rt->rt6i_idev)
1777 			in6_dev_hold(rt->rt6i_idev);
1778 		rt->dst.lastuse = jiffies;
1779 		rt->rt6i_expires = 0;
1780 
1781 		ipv6_addr_copy(&rt->rt6i_gateway, &ort->rt6i_gateway);
1782 		rt->rt6i_flags = ort->rt6i_flags & ~RTF_EXPIRES;
1783 		rt->rt6i_metric = 0;
1784 
1785 #ifdef CONFIG_IPV6_SUBTREES
1786 		memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
1787 #endif
1788 		memcpy(&rt->rt6i_prefsrc, &ort->rt6i_prefsrc, sizeof(struct rt6key));
1789 		rt->rt6i_table = ort->rt6i_table;
1790 	}
1791 	return rt;
1792 }
1793 
1794 #ifdef CONFIG_IPV6_ROUTE_INFO
1795 static struct rt6_info *rt6_get_route_info(struct net *net,
1796 					   const struct in6_addr *prefix, int prefixlen,
1797 					   const struct in6_addr *gwaddr, int ifindex)
1798 {
1799 	struct fib6_node *fn;
1800 	struct rt6_info *rt = NULL;
1801 	struct fib6_table *table;
1802 
1803 	table = fib6_get_table(net, RT6_TABLE_INFO);
1804 	if (table == NULL)
1805 		return NULL;
1806 
1807 	write_lock_bh(&table->tb6_lock);
1808 	fn = fib6_locate(&table->tb6_root, prefix ,prefixlen, NULL, 0);
1809 	if (!fn)
1810 		goto out;
1811 
1812 	for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
1813 		if (rt->rt6i_dev->ifindex != ifindex)
1814 			continue;
1815 		if ((rt->rt6i_flags & (RTF_ROUTEINFO|RTF_GATEWAY)) != (RTF_ROUTEINFO|RTF_GATEWAY))
1816 			continue;
1817 		if (!ipv6_addr_equal(&rt->rt6i_gateway, gwaddr))
1818 			continue;
1819 		dst_hold(&rt->dst);
1820 		break;
1821 	}
1822 out:
1823 	write_unlock_bh(&table->tb6_lock);
1824 	return rt;
1825 }
1826 
1827 static struct rt6_info *rt6_add_route_info(struct net *net,
1828 					   const struct in6_addr *prefix, int prefixlen,
1829 					   const struct in6_addr *gwaddr, int ifindex,
1830 					   unsigned pref)
1831 {
1832 	struct fib6_config cfg = {
1833 		.fc_table	= RT6_TABLE_INFO,
1834 		.fc_metric	= IP6_RT_PRIO_USER,
1835 		.fc_ifindex	= ifindex,
1836 		.fc_dst_len	= prefixlen,
1837 		.fc_flags	= RTF_GATEWAY | RTF_ADDRCONF | RTF_ROUTEINFO |
1838 				  RTF_UP | RTF_PREF(pref),
1839 		.fc_nlinfo.pid = 0,
1840 		.fc_nlinfo.nlh = NULL,
1841 		.fc_nlinfo.nl_net = net,
1842 	};
1843 
1844 	ipv6_addr_copy(&cfg.fc_dst, prefix);
1845 	ipv6_addr_copy(&cfg.fc_gateway, gwaddr);
1846 
1847 	/* We should treat it as a default route if prefix length is 0. */
1848 	if (!prefixlen)
1849 		cfg.fc_flags |= RTF_DEFAULT;
1850 
1851 	ip6_route_add(&cfg);
1852 
1853 	return rt6_get_route_info(net, prefix, prefixlen, gwaddr, ifindex);
1854 }
1855 #endif
1856 
1857 struct rt6_info *rt6_get_dflt_router(const struct in6_addr *addr, struct net_device *dev)
1858 {
1859 	struct rt6_info *rt;
1860 	struct fib6_table *table;
1861 
1862 	table = fib6_get_table(dev_net(dev), RT6_TABLE_DFLT);
1863 	if (table == NULL)
1864 		return NULL;
1865 
1866 	write_lock_bh(&table->tb6_lock);
1867 	for (rt = table->tb6_root.leaf; rt; rt=rt->dst.rt6_next) {
1868 		if (dev == rt->rt6i_dev &&
1869 		    ((rt->rt6i_flags & (RTF_ADDRCONF | RTF_DEFAULT)) == (RTF_ADDRCONF | RTF_DEFAULT)) &&
1870 		    ipv6_addr_equal(&rt->rt6i_gateway, addr))
1871 			break;
1872 	}
1873 	if (rt)
1874 		dst_hold(&rt->dst);
1875 	write_unlock_bh(&table->tb6_lock);
1876 	return rt;
1877 }
1878 
1879 struct rt6_info *rt6_add_dflt_router(const struct in6_addr *gwaddr,
1880 				     struct net_device *dev,
1881 				     unsigned int pref)
1882 {
1883 	struct fib6_config cfg = {
1884 		.fc_table	= RT6_TABLE_DFLT,
1885 		.fc_metric	= IP6_RT_PRIO_USER,
1886 		.fc_ifindex	= dev->ifindex,
1887 		.fc_flags	= RTF_GATEWAY | RTF_ADDRCONF | RTF_DEFAULT |
1888 				  RTF_UP | RTF_EXPIRES | RTF_PREF(pref),
1889 		.fc_nlinfo.pid = 0,
1890 		.fc_nlinfo.nlh = NULL,
1891 		.fc_nlinfo.nl_net = dev_net(dev),
1892 	};
1893 
1894 	ipv6_addr_copy(&cfg.fc_gateway, gwaddr);
1895 
1896 	ip6_route_add(&cfg);
1897 
1898 	return rt6_get_dflt_router(gwaddr, dev);
1899 }
1900 
1901 void rt6_purge_dflt_routers(struct net *net)
1902 {
1903 	struct rt6_info *rt;
1904 	struct fib6_table *table;
1905 
1906 	/* NOTE: Keep consistent with rt6_get_dflt_router */
1907 	table = fib6_get_table(net, RT6_TABLE_DFLT);
1908 	if (table == NULL)
1909 		return;
1910 
1911 restart:
1912 	read_lock_bh(&table->tb6_lock);
1913 	for (rt = table->tb6_root.leaf; rt; rt = rt->dst.rt6_next) {
1914 		if (rt->rt6i_flags & (RTF_DEFAULT | RTF_ADDRCONF)) {
1915 			dst_hold(&rt->dst);
1916 			read_unlock_bh(&table->tb6_lock);
1917 			ip6_del_rt(rt);
1918 			goto restart;
1919 		}
1920 	}
1921 	read_unlock_bh(&table->tb6_lock);
1922 }
1923 
1924 static void rtmsg_to_fib6_config(struct net *net,
1925 				 struct in6_rtmsg *rtmsg,
1926 				 struct fib6_config *cfg)
1927 {
1928 	memset(cfg, 0, sizeof(*cfg));
1929 
1930 	cfg->fc_table = RT6_TABLE_MAIN;
1931 	cfg->fc_ifindex = rtmsg->rtmsg_ifindex;
1932 	cfg->fc_metric = rtmsg->rtmsg_metric;
1933 	cfg->fc_expires = rtmsg->rtmsg_info;
1934 	cfg->fc_dst_len = rtmsg->rtmsg_dst_len;
1935 	cfg->fc_src_len = rtmsg->rtmsg_src_len;
1936 	cfg->fc_flags = rtmsg->rtmsg_flags;
1937 
1938 	cfg->fc_nlinfo.nl_net = net;
1939 
1940 	ipv6_addr_copy(&cfg->fc_dst, &rtmsg->rtmsg_dst);
1941 	ipv6_addr_copy(&cfg->fc_src, &rtmsg->rtmsg_src);
1942 	ipv6_addr_copy(&cfg->fc_gateway, &rtmsg->rtmsg_gateway);
1943 }
1944 
1945 int ipv6_route_ioctl(struct net *net, unsigned int cmd, void __user *arg)
1946 {
1947 	struct fib6_config cfg;
1948 	struct in6_rtmsg rtmsg;
1949 	int err;
1950 
1951 	switch(cmd) {
1952 	case SIOCADDRT:		/* Add a route */
1953 	case SIOCDELRT:		/* Delete a route */
1954 		if (!capable(CAP_NET_ADMIN))
1955 			return -EPERM;
1956 		err = copy_from_user(&rtmsg, arg,
1957 				     sizeof(struct in6_rtmsg));
1958 		if (err)
1959 			return -EFAULT;
1960 
1961 		rtmsg_to_fib6_config(net, &rtmsg, &cfg);
1962 
1963 		rtnl_lock();
1964 		switch (cmd) {
1965 		case SIOCADDRT:
1966 			err = ip6_route_add(&cfg);
1967 			break;
1968 		case SIOCDELRT:
1969 			err = ip6_route_del(&cfg);
1970 			break;
1971 		default:
1972 			err = -EINVAL;
1973 		}
1974 		rtnl_unlock();
1975 
1976 		return err;
1977 	}
1978 
1979 	return -EINVAL;
1980 }
1981 
1982 /*
1983  *	Drop the packet on the floor
1984  */
1985 
1986 static int ip6_pkt_drop(struct sk_buff *skb, u8 code, int ipstats_mib_noroutes)
1987 {
1988 	int type;
1989 	struct dst_entry *dst = skb_dst(skb);
1990 	switch (ipstats_mib_noroutes) {
1991 	case IPSTATS_MIB_INNOROUTES:
1992 		type = ipv6_addr_type(&ipv6_hdr(skb)->daddr);
1993 		if (type == IPV6_ADDR_ANY) {
1994 			IP6_INC_STATS(dev_net(dst->dev), ip6_dst_idev(dst),
1995 				      IPSTATS_MIB_INADDRERRORS);
1996 			break;
1997 		}
1998 		/* FALLTHROUGH */
1999 	case IPSTATS_MIB_OUTNOROUTES:
2000 		IP6_INC_STATS(dev_net(dst->dev), ip6_dst_idev(dst),
2001 			      ipstats_mib_noroutes);
2002 		break;
2003 	}
2004 	icmpv6_send(skb, ICMPV6_DEST_UNREACH, code, 0);
2005 	kfree_skb(skb);
2006 	return 0;
2007 }
2008 
2009 static int ip6_pkt_discard(struct sk_buff *skb)
2010 {
2011 	return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_INNOROUTES);
2012 }
2013 
2014 static int ip6_pkt_discard_out(struct sk_buff *skb)
2015 {
2016 	skb->dev = skb_dst(skb)->dev;
2017 	return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_OUTNOROUTES);
2018 }
2019 
2020 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
2021 
2022 static int ip6_pkt_prohibit(struct sk_buff *skb)
2023 {
2024 	return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_INNOROUTES);
2025 }
2026 
2027 static int ip6_pkt_prohibit_out(struct sk_buff *skb)
2028 {
2029 	skb->dev = skb_dst(skb)->dev;
2030 	return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_OUTNOROUTES);
2031 }
2032 
2033 #endif
2034 
2035 /*
2036  *	Allocate a dst for local (unicast / anycast) address.
2037  */
2038 
2039 struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev,
2040 				    const struct in6_addr *addr,
2041 				    int anycast)
2042 {
2043 	struct net *net = dev_net(idev->dev);
2044 	struct rt6_info *rt = ip6_dst_alloc(&net->ipv6.ip6_dst_ops,
2045 					    net->loopback_dev, 0);
2046 	struct neighbour *neigh;
2047 
2048 	if (rt == NULL) {
2049 		if (net_ratelimit())
2050 			pr_warning("IPv6:  Maximum number of routes reached,"
2051 				   " consider increasing route/max_size.\n");
2052 		return ERR_PTR(-ENOMEM);
2053 	}
2054 
2055 	in6_dev_hold(idev);
2056 
2057 	rt->dst.flags |= DST_HOST;
2058 	rt->dst.input = ip6_input;
2059 	rt->dst.output = ip6_output;
2060 	rt->rt6i_idev = idev;
2061 	rt->dst.obsolete = -1;
2062 
2063 	rt->rt6i_flags = RTF_UP | RTF_NONEXTHOP;
2064 	if (anycast)
2065 		rt->rt6i_flags |= RTF_ANYCAST;
2066 	else
2067 		rt->rt6i_flags |= RTF_LOCAL;
2068 	neigh = ndisc_get_neigh(rt->rt6i_dev, &rt->rt6i_gateway);
2069 	if (IS_ERR(neigh)) {
2070 		dst_free(&rt->dst);
2071 
2072 		return ERR_CAST(neigh);
2073 	}
2074 	dst_set_neighbour(&rt->dst, neigh);
2075 
2076 	ipv6_addr_copy(&rt->rt6i_dst.addr, addr);
2077 	rt->rt6i_dst.plen = 128;
2078 	rt->rt6i_table = fib6_get_table(net, RT6_TABLE_LOCAL);
2079 
2080 	atomic_set(&rt->dst.__refcnt, 1);
2081 
2082 	return rt;
2083 }
2084 
2085 int ip6_route_get_saddr(struct net *net,
2086 			struct rt6_info *rt,
2087 			const struct in6_addr *daddr,
2088 			unsigned int prefs,
2089 			struct in6_addr *saddr)
2090 {
2091 	struct inet6_dev *idev = ip6_dst_idev((struct dst_entry*)rt);
2092 	int err = 0;
2093 	if (rt->rt6i_prefsrc.plen)
2094 		ipv6_addr_copy(saddr, &rt->rt6i_prefsrc.addr);
2095 	else
2096 		err = ipv6_dev_get_saddr(net, idev ? idev->dev : NULL,
2097 					 daddr, prefs, saddr);
2098 	return err;
2099 }
2100 
2101 /* remove deleted ip from prefsrc entries */
2102 struct arg_dev_net_ip {
2103 	struct net_device *dev;
2104 	struct net *net;
2105 	struct in6_addr *addr;
2106 };
2107 
2108 static int fib6_remove_prefsrc(struct rt6_info *rt, void *arg)
2109 {
2110 	struct net_device *dev = ((struct arg_dev_net_ip *)arg)->dev;
2111 	struct net *net = ((struct arg_dev_net_ip *)arg)->net;
2112 	struct in6_addr *addr = ((struct arg_dev_net_ip *)arg)->addr;
2113 
2114 	if (((void *)rt->rt6i_dev == dev || dev == NULL) &&
2115 	    rt != net->ipv6.ip6_null_entry &&
2116 	    ipv6_addr_equal(addr, &rt->rt6i_prefsrc.addr)) {
2117 		/* remove prefsrc entry */
2118 		rt->rt6i_prefsrc.plen = 0;
2119 	}
2120 	return 0;
2121 }
2122 
2123 void rt6_remove_prefsrc(struct inet6_ifaddr *ifp)
2124 {
2125 	struct net *net = dev_net(ifp->idev->dev);
2126 	struct arg_dev_net_ip adni = {
2127 		.dev = ifp->idev->dev,
2128 		.net = net,
2129 		.addr = &ifp->addr,
2130 	};
2131 	fib6_clean_all(net, fib6_remove_prefsrc, 0, &adni);
2132 }
2133 
2134 struct arg_dev_net {
2135 	struct net_device *dev;
2136 	struct net *net;
2137 };
2138 
2139 static int fib6_ifdown(struct rt6_info *rt, void *arg)
2140 {
2141 	const struct arg_dev_net *adn = arg;
2142 	const struct net_device *dev = adn->dev;
2143 
2144 	if ((rt->rt6i_dev == dev || dev == NULL) &&
2145 	    rt != adn->net->ipv6.ip6_null_entry) {
2146 		RT6_TRACE("deleted by ifdown %p\n", rt);
2147 		return -1;
2148 	}
2149 	return 0;
2150 }
2151 
2152 void rt6_ifdown(struct net *net, struct net_device *dev)
2153 {
2154 	struct arg_dev_net adn = {
2155 		.dev = dev,
2156 		.net = net,
2157 	};
2158 
2159 	fib6_clean_all(net, fib6_ifdown, 0, &adn);
2160 	icmp6_clean_all(fib6_ifdown, &adn);
2161 }
2162 
2163 struct rt6_mtu_change_arg
2164 {
2165 	struct net_device *dev;
2166 	unsigned mtu;
2167 };
2168 
2169 static int rt6_mtu_change_route(struct rt6_info *rt, void *p_arg)
2170 {
2171 	struct rt6_mtu_change_arg *arg = (struct rt6_mtu_change_arg *) p_arg;
2172 	struct inet6_dev *idev;
2173 
2174 	/* In IPv6 pmtu discovery is not optional,
2175 	   so that RTAX_MTU lock cannot disable it.
2176 	   We still use this lock to block changes
2177 	   caused by addrconf/ndisc.
2178 	*/
2179 
2180 	idev = __in6_dev_get(arg->dev);
2181 	if (idev == NULL)
2182 		return 0;
2183 
2184 	/* For administrative MTU increase, there is no way to discover
2185 	   IPv6 PMTU increase, so PMTU increase should be updated here.
2186 	   Since RFC 1981 doesn't include administrative MTU increase
2187 	   update PMTU increase is a MUST. (i.e. jumbo frame)
2188 	 */
2189 	/*
2190 	   If new MTU is less than route PMTU, this new MTU will be the
2191 	   lowest MTU in the path, update the route PMTU to reflect PMTU
2192 	   decreases; if new MTU is greater than route PMTU, and the
2193 	   old MTU is the lowest MTU in the path, update the route PMTU
2194 	   to reflect the increase. In this case if the other nodes' MTU
2195 	   also have the lowest MTU, TOO BIG MESSAGE will be lead to
2196 	   PMTU discouvery.
2197 	 */
2198 	if (rt->rt6i_dev == arg->dev &&
2199 	    !dst_metric_locked(&rt->dst, RTAX_MTU) &&
2200 	    (dst_mtu(&rt->dst) >= arg->mtu ||
2201 	     (dst_mtu(&rt->dst) < arg->mtu &&
2202 	      dst_mtu(&rt->dst) == idev->cnf.mtu6))) {
2203 		dst_metric_set(&rt->dst, RTAX_MTU, arg->mtu);
2204 	}
2205 	return 0;
2206 }
2207 
2208 void rt6_mtu_change(struct net_device *dev, unsigned mtu)
2209 {
2210 	struct rt6_mtu_change_arg arg = {
2211 		.dev = dev,
2212 		.mtu = mtu,
2213 	};
2214 
2215 	fib6_clean_all(dev_net(dev), rt6_mtu_change_route, 0, &arg);
2216 }
2217 
2218 static const struct nla_policy rtm_ipv6_policy[RTA_MAX+1] = {
2219 	[RTA_GATEWAY]           = { .len = sizeof(struct in6_addr) },
2220 	[RTA_OIF]               = { .type = NLA_U32 },
2221 	[RTA_IIF]		= { .type = NLA_U32 },
2222 	[RTA_PRIORITY]          = { .type = NLA_U32 },
2223 	[RTA_METRICS]           = { .type = NLA_NESTED },
2224 };
2225 
2226 static int rtm_to_fib6_config(struct sk_buff *skb, struct nlmsghdr *nlh,
2227 			      struct fib6_config *cfg)
2228 {
2229 	struct rtmsg *rtm;
2230 	struct nlattr *tb[RTA_MAX+1];
2231 	int err;
2232 
2233 	err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy);
2234 	if (err < 0)
2235 		goto errout;
2236 
2237 	err = -EINVAL;
2238 	rtm = nlmsg_data(nlh);
2239 	memset(cfg, 0, sizeof(*cfg));
2240 
2241 	cfg->fc_table = rtm->rtm_table;
2242 	cfg->fc_dst_len = rtm->rtm_dst_len;
2243 	cfg->fc_src_len = rtm->rtm_src_len;
2244 	cfg->fc_flags = RTF_UP;
2245 	cfg->fc_protocol = rtm->rtm_protocol;
2246 
2247 	if (rtm->rtm_type == RTN_UNREACHABLE)
2248 		cfg->fc_flags |= RTF_REJECT;
2249 
2250 	if (rtm->rtm_type == RTN_LOCAL)
2251 		cfg->fc_flags |= RTF_LOCAL;
2252 
2253 	cfg->fc_nlinfo.pid = NETLINK_CB(skb).pid;
2254 	cfg->fc_nlinfo.nlh = nlh;
2255 	cfg->fc_nlinfo.nl_net = sock_net(skb->sk);
2256 
2257 	if (tb[RTA_GATEWAY]) {
2258 		nla_memcpy(&cfg->fc_gateway, tb[RTA_GATEWAY], 16);
2259 		cfg->fc_flags |= RTF_GATEWAY;
2260 	}
2261 
2262 	if (tb[RTA_DST]) {
2263 		int plen = (rtm->rtm_dst_len + 7) >> 3;
2264 
2265 		if (nla_len(tb[RTA_DST]) < plen)
2266 			goto errout;
2267 
2268 		nla_memcpy(&cfg->fc_dst, tb[RTA_DST], plen);
2269 	}
2270 
2271 	if (tb[RTA_SRC]) {
2272 		int plen = (rtm->rtm_src_len + 7) >> 3;
2273 
2274 		if (nla_len(tb[RTA_SRC]) < plen)
2275 			goto errout;
2276 
2277 		nla_memcpy(&cfg->fc_src, tb[RTA_SRC], plen);
2278 	}
2279 
2280 	if (tb[RTA_PREFSRC])
2281 		nla_memcpy(&cfg->fc_prefsrc, tb[RTA_PREFSRC], 16);
2282 
2283 	if (tb[RTA_OIF])
2284 		cfg->fc_ifindex = nla_get_u32(tb[RTA_OIF]);
2285 
2286 	if (tb[RTA_PRIORITY])
2287 		cfg->fc_metric = nla_get_u32(tb[RTA_PRIORITY]);
2288 
2289 	if (tb[RTA_METRICS]) {
2290 		cfg->fc_mx = nla_data(tb[RTA_METRICS]);
2291 		cfg->fc_mx_len = nla_len(tb[RTA_METRICS]);
2292 	}
2293 
2294 	if (tb[RTA_TABLE])
2295 		cfg->fc_table = nla_get_u32(tb[RTA_TABLE]);
2296 
2297 	err = 0;
2298 errout:
2299 	return err;
2300 }
2301 
2302 static int inet6_rtm_delroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
2303 {
2304 	struct fib6_config cfg;
2305 	int err;
2306 
2307 	err = rtm_to_fib6_config(skb, nlh, &cfg);
2308 	if (err < 0)
2309 		return err;
2310 
2311 	return ip6_route_del(&cfg);
2312 }
2313 
2314 static int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
2315 {
2316 	struct fib6_config cfg;
2317 	int err;
2318 
2319 	err = rtm_to_fib6_config(skb, nlh, &cfg);
2320 	if (err < 0)
2321 		return err;
2322 
2323 	return ip6_route_add(&cfg);
2324 }
2325 
2326 static inline size_t rt6_nlmsg_size(void)
2327 {
2328 	return NLMSG_ALIGN(sizeof(struct rtmsg))
2329 	       + nla_total_size(16) /* RTA_SRC */
2330 	       + nla_total_size(16) /* RTA_DST */
2331 	       + nla_total_size(16) /* RTA_GATEWAY */
2332 	       + nla_total_size(16) /* RTA_PREFSRC */
2333 	       + nla_total_size(4) /* RTA_TABLE */
2334 	       + nla_total_size(4) /* RTA_IIF */
2335 	       + nla_total_size(4) /* RTA_OIF */
2336 	       + nla_total_size(4) /* RTA_PRIORITY */
2337 	       + RTAX_MAX * nla_total_size(4) /* RTA_METRICS */
2338 	       + nla_total_size(sizeof(struct rta_cacheinfo));
2339 }
2340 
2341 static int rt6_fill_node(struct net *net,
2342 			 struct sk_buff *skb, struct rt6_info *rt,
2343 			 struct in6_addr *dst, struct in6_addr *src,
2344 			 int iif, int type, u32 pid, u32 seq,
2345 			 int prefix, int nowait, unsigned int flags)
2346 {
2347 	struct rtmsg *rtm;
2348 	struct nlmsghdr *nlh;
2349 	long expires;
2350 	u32 table;
2351 	struct neighbour *n;
2352 
2353 	if (prefix) {	/* user wants prefix routes only */
2354 		if (!(rt->rt6i_flags & RTF_PREFIX_RT)) {
2355 			/* success since this is not a prefix route */
2356 			return 1;
2357 		}
2358 	}
2359 
2360 	nlh = nlmsg_put(skb, pid, seq, type, sizeof(*rtm), flags);
2361 	if (nlh == NULL)
2362 		return -EMSGSIZE;
2363 
2364 	rtm = nlmsg_data(nlh);
2365 	rtm->rtm_family = AF_INET6;
2366 	rtm->rtm_dst_len = rt->rt6i_dst.plen;
2367 	rtm->rtm_src_len = rt->rt6i_src.plen;
2368 	rtm->rtm_tos = 0;
2369 	if (rt->rt6i_table)
2370 		table = rt->rt6i_table->tb6_id;
2371 	else
2372 		table = RT6_TABLE_UNSPEC;
2373 	rtm->rtm_table = table;
2374 	NLA_PUT_U32(skb, RTA_TABLE, table);
2375 	if (rt->rt6i_flags&RTF_REJECT)
2376 		rtm->rtm_type = RTN_UNREACHABLE;
2377 	else if (rt->rt6i_flags&RTF_LOCAL)
2378 		rtm->rtm_type = RTN_LOCAL;
2379 	else if (rt->rt6i_dev && (rt->rt6i_dev->flags&IFF_LOOPBACK))
2380 		rtm->rtm_type = RTN_LOCAL;
2381 	else
2382 		rtm->rtm_type = RTN_UNICAST;
2383 	rtm->rtm_flags = 0;
2384 	rtm->rtm_scope = RT_SCOPE_UNIVERSE;
2385 	rtm->rtm_protocol = rt->rt6i_protocol;
2386 	if (rt->rt6i_flags&RTF_DYNAMIC)
2387 		rtm->rtm_protocol = RTPROT_REDIRECT;
2388 	else if (rt->rt6i_flags & RTF_ADDRCONF)
2389 		rtm->rtm_protocol = RTPROT_KERNEL;
2390 	else if (rt->rt6i_flags&RTF_DEFAULT)
2391 		rtm->rtm_protocol = RTPROT_RA;
2392 
2393 	if (rt->rt6i_flags&RTF_CACHE)
2394 		rtm->rtm_flags |= RTM_F_CLONED;
2395 
2396 	if (dst) {
2397 		NLA_PUT(skb, RTA_DST, 16, dst);
2398 		rtm->rtm_dst_len = 128;
2399 	} else if (rtm->rtm_dst_len)
2400 		NLA_PUT(skb, RTA_DST, 16, &rt->rt6i_dst.addr);
2401 #ifdef CONFIG_IPV6_SUBTREES
2402 	if (src) {
2403 		NLA_PUT(skb, RTA_SRC, 16, src);
2404 		rtm->rtm_src_len = 128;
2405 	} else if (rtm->rtm_src_len)
2406 		NLA_PUT(skb, RTA_SRC, 16, &rt->rt6i_src.addr);
2407 #endif
2408 	if (iif) {
2409 #ifdef CONFIG_IPV6_MROUTE
2410 		if (ipv6_addr_is_multicast(&rt->rt6i_dst.addr)) {
2411 			int err = ip6mr_get_route(net, skb, rtm, nowait);
2412 			if (err <= 0) {
2413 				if (!nowait) {
2414 					if (err == 0)
2415 						return 0;
2416 					goto nla_put_failure;
2417 				} else {
2418 					if (err == -EMSGSIZE)
2419 						goto nla_put_failure;
2420 				}
2421 			}
2422 		} else
2423 #endif
2424 			NLA_PUT_U32(skb, RTA_IIF, iif);
2425 	} else if (dst) {
2426 		struct in6_addr saddr_buf;
2427 		if (ip6_route_get_saddr(net, rt, dst, 0, &saddr_buf) == 0)
2428 			NLA_PUT(skb, RTA_PREFSRC, 16, &saddr_buf);
2429 	}
2430 
2431 	if (rt->rt6i_prefsrc.plen) {
2432 		struct in6_addr saddr_buf;
2433 		ipv6_addr_copy(&saddr_buf, &rt->rt6i_prefsrc.addr);
2434 		NLA_PUT(skb, RTA_PREFSRC, 16, &saddr_buf);
2435 	}
2436 
2437 	if (rtnetlink_put_metrics(skb, dst_metrics_ptr(&rt->dst)) < 0)
2438 		goto nla_put_failure;
2439 
2440 	rcu_read_lock();
2441 	n = dst_get_neighbour(&rt->dst);
2442 	if (n)
2443 		NLA_PUT(skb, RTA_GATEWAY, 16, &n->primary_key);
2444 	rcu_read_unlock();
2445 
2446 	if (rt->dst.dev)
2447 		NLA_PUT_U32(skb, RTA_OIF, rt->rt6i_dev->ifindex);
2448 
2449 	NLA_PUT_U32(skb, RTA_PRIORITY, rt->rt6i_metric);
2450 
2451 	if (!(rt->rt6i_flags & RTF_EXPIRES))
2452 		expires = 0;
2453 	else if (rt->rt6i_expires - jiffies < INT_MAX)
2454 		expires = rt->rt6i_expires - jiffies;
2455 	else
2456 		expires = INT_MAX;
2457 
2458 	if (rtnl_put_cacheinfo(skb, &rt->dst, 0, 0, 0,
2459 			       expires, rt->dst.error) < 0)
2460 		goto nla_put_failure;
2461 
2462 	return nlmsg_end(skb, nlh);
2463 
2464 nla_put_failure:
2465 	nlmsg_cancel(skb, nlh);
2466 	return -EMSGSIZE;
2467 }
2468 
2469 int rt6_dump_route(struct rt6_info *rt, void *p_arg)
2470 {
2471 	struct rt6_rtnl_dump_arg *arg = (struct rt6_rtnl_dump_arg *) p_arg;
2472 	int prefix;
2473 
2474 	if (nlmsg_len(arg->cb->nlh) >= sizeof(struct rtmsg)) {
2475 		struct rtmsg *rtm = nlmsg_data(arg->cb->nlh);
2476 		prefix = (rtm->rtm_flags & RTM_F_PREFIX) != 0;
2477 	} else
2478 		prefix = 0;
2479 
2480 	return rt6_fill_node(arg->net,
2481 		     arg->skb, rt, NULL, NULL, 0, RTM_NEWROUTE,
2482 		     NETLINK_CB(arg->cb->skb).pid, arg->cb->nlh->nlmsg_seq,
2483 		     prefix, 0, NLM_F_MULTI);
2484 }
2485 
2486 static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void *arg)
2487 {
2488 	struct net *net = sock_net(in_skb->sk);
2489 	struct nlattr *tb[RTA_MAX+1];
2490 	struct rt6_info *rt;
2491 	struct sk_buff *skb;
2492 	struct rtmsg *rtm;
2493 	struct flowi6 fl6;
2494 	int err, iif = 0;
2495 
2496 	err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy);
2497 	if (err < 0)
2498 		goto errout;
2499 
2500 	err = -EINVAL;
2501 	memset(&fl6, 0, sizeof(fl6));
2502 
2503 	if (tb[RTA_SRC]) {
2504 		if (nla_len(tb[RTA_SRC]) < sizeof(struct in6_addr))
2505 			goto errout;
2506 
2507 		ipv6_addr_copy(&fl6.saddr, nla_data(tb[RTA_SRC]));
2508 	}
2509 
2510 	if (tb[RTA_DST]) {
2511 		if (nla_len(tb[RTA_DST]) < sizeof(struct in6_addr))
2512 			goto errout;
2513 
2514 		ipv6_addr_copy(&fl6.daddr, nla_data(tb[RTA_DST]));
2515 	}
2516 
2517 	if (tb[RTA_IIF])
2518 		iif = nla_get_u32(tb[RTA_IIF]);
2519 
2520 	if (tb[RTA_OIF])
2521 		fl6.flowi6_oif = nla_get_u32(tb[RTA_OIF]);
2522 
2523 	if (iif) {
2524 		struct net_device *dev;
2525 		dev = __dev_get_by_index(net, iif);
2526 		if (!dev) {
2527 			err = -ENODEV;
2528 			goto errout;
2529 		}
2530 	}
2531 
2532 	skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
2533 	if (skb == NULL) {
2534 		err = -ENOBUFS;
2535 		goto errout;
2536 	}
2537 
2538 	/* Reserve room for dummy headers, this skb can pass
2539 	   through good chunk of routing engine.
2540 	 */
2541 	skb_reset_mac_header(skb);
2542 	skb_reserve(skb, MAX_HEADER + sizeof(struct ipv6hdr));
2543 
2544 	rt = (struct rt6_info*) ip6_route_output(net, NULL, &fl6);
2545 	skb_dst_set(skb, &rt->dst);
2546 
2547 	err = rt6_fill_node(net, skb, rt, &fl6.daddr, &fl6.saddr, iif,
2548 			    RTM_NEWROUTE, NETLINK_CB(in_skb).pid,
2549 			    nlh->nlmsg_seq, 0, 0, 0);
2550 	if (err < 0) {
2551 		kfree_skb(skb);
2552 		goto errout;
2553 	}
2554 
2555 	err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).pid);
2556 errout:
2557 	return err;
2558 }
2559 
2560 void inet6_rt_notify(int event, struct rt6_info *rt, struct nl_info *info)
2561 {
2562 	struct sk_buff *skb;
2563 	struct net *net = info->nl_net;
2564 	u32 seq;
2565 	int err;
2566 
2567 	err = -ENOBUFS;
2568 	seq = info->nlh != NULL ? info->nlh->nlmsg_seq : 0;
2569 
2570 	skb = nlmsg_new(rt6_nlmsg_size(), gfp_any());
2571 	if (skb == NULL)
2572 		goto errout;
2573 
2574 	err = rt6_fill_node(net, skb, rt, NULL, NULL, 0,
2575 				event, info->pid, seq, 0, 0, 0);
2576 	if (err < 0) {
2577 		/* -EMSGSIZE implies BUG in rt6_nlmsg_size() */
2578 		WARN_ON(err == -EMSGSIZE);
2579 		kfree_skb(skb);
2580 		goto errout;
2581 	}
2582 	rtnl_notify(skb, net, info->pid, RTNLGRP_IPV6_ROUTE,
2583 		    info->nlh, gfp_any());
2584 	return;
2585 errout:
2586 	if (err < 0)
2587 		rtnl_set_sk_err(net, RTNLGRP_IPV6_ROUTE, err);
2588 }
2589 
2590 static int ip6_route_dev_notify(struct notifier_block *this,
2591 				unsigned long event, void *data)
2592 {
2593 	struct net_device *dev = (struct net_device *)data;
2594 	struct net *net = dev_net(dev);
2595 
2596 	if (event == NETDEV_REGISTER && (dev->flags & IFF_LOOPBACK)) {
2597 		net->ipv6.ip6_null_entry->dst.dev = dev;
2598 		net->ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(dev);
2599 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
2600 		net->ipv6.ip6_prohibit_entry->dst.dev = dev;
2601 		net->ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(dev);
2602 		net->ipv6.ip6_blk_hole_entry->dst.dev = dev;
2603 		net->ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(dev);
2604 #endif
2605 	}
2606 
2607 	return NOTIFY_OK;
2608 }
2609 
2610 /*
2611  *	/proc
2612  */
2613 
2614 #ifdef CONFIG_PROC_FS
2615 
2616 struct rt6_proc_arg
2617 {
2618 	char *buffer;
2619 	int offset;
2620 	int length;
2621 	int skip;
2622 	int len;
2623 };
2624 
2625 static int rt6_info_route(struct rt6_info *rt, void *p_arg)
2626 {
2627 	struct seq_file *m = p_arg;
2628 	struct neighbour *n;
2629 
2630 	seq_printf(m, "%pi6 %02x ", &rt->rt6i_dst.addr, rt->rt6i_dst.plen);
2631 
2632 #ifdef CONFIG_IPV6_SUBTREES
2633 	seq_printf(m, "%pi6 %02x ", &rt->rt6i_src.addr, rt->rt6i_src.plen);
2634 #else
2635 	seq_puts(m, "00000000000000000000000000000000 00 ");
2636 #endif
2637 	rcu_read_lock();
2638 	n = dst_get_neighbour(&rt->dst);
2639 	if (n) {
2640 		seq_printf(m, "%pi6", n->primary_key);
2641 	} else {
2642 		seq_puts(m, "00000000000000000000000000000000");
2643 	}
2644 	rcu_read_unlock();
2645 	seq_printf(m, " %08x %08x %08x %08x %8s\n",
2646 		   rt->rt6i_metric, atomic_read(&rt->dst.__refcnt),
2647 		   rt->dst.__use, rt->rt6i_flags,
2648 		   rt->rt6i_dev ? rt->rt6i_dev->name : "");
2649 	return 0;
2650 }
2651 
2652 static int ipv6_route_show(struct seq_file *m, void *v)
2653 {
2654 	struct net *net = (struct net *)m->private;
2655 	fib6_clean_all(net, rt6_info_route, 0, m);
2656 	return 0;
2657 }
2658 
2659 static int ipv6_route_open(struct inode *inode, struct file *file)
2660 {
2661 	return single_open_net(inode, file, ipv6_route_show);
2662 }
2663 
2664 static const struct file_operations ipv6_route_proc_fops = {
2665 	.owner		= THIS_MODULE,
2666 	.open		= ipv6_route_open,
2667 	.read		= seq_read,
2668 	.llseek		= seq_lseek,
2669 	.release	= single_release_net,
2670 };
2671 
2672 static int rt6_stats_seq_show(struct seq_file *seq, void *v)
2673 {
2674 	struct net *net = (struct net *)seq->private;
2675 	seq_printf(seq, "%04x %04x %04x %04x %04x %04x %04x\n",
2676 		   net->ipv6.rt6_stats->fib_nodes,
2677 		   net->ipv6.rt6_stats->fib_route_nodes,
2678 		   net->ipv6.rt6_stats->fib_rt_alloc,
2679 		   net->ipv6.rt6_stats->fib_rt_entries,
2680 		   net->ipv6.rt6_stats->fib_rt_cache,
2681 		   dst_entries_get_slow(&net->ipv6.ip6_dst_ops),
2682 		   net->ipv6.rt6_stats->fib_discarded_routes);
2683 
2684 	return 0;
2685 }
2686 
2687 static int rt6_stats_seq_open(struct inode *inode, struct file *file)
2688 {
2689 	return single_open_net(inode, file, rt6_stats_seq_show);
2690 }
2691 
2692 static const struct file_operations rt6_stats_seq_fops = {
2693 	.owner	 = THIS_MODULE,
2694 	.open	 = rt6_stats_seq_open,
2695 	.read	 = seq_read,
2696 	.llseek	 = seq_lseek,
2697 	.release = single_release_net,
2698 };
2699 #endif	/* CONFIG_PROC_FS */
2700 
2701 #ifdef CONFIG_SYSCTL
2702 
2703 static
2704 int ipv6_sysctl_rtcache_flush(ctl_table *ctl, int write,
2705 			      void __user *buffer, size_t *lenp, loff_t *ppos)
2706 {
2707 	struct net *net;
2708 	int delay;
2709 	if (!write)
2710 		return -EINVAL;
2711 
2712 	net = (struct net *)ctl->extra1;
2713 	delay = net->ipv6.sysctl.flush_delay;
2714 	proc_dointvec(ctl, write, buffer, lenp, ppos);
2715 	fib6_run_gc(delay <= 0 ? ~0UL : (unsigned long)delay, net);
2716 	return 0;
2717 }
2718 
2719 ctl_table ipv6_route_table_template[] = {
2720 	{
2721 		.procname	=	"flush",
2722 		.data		=	&init_net.ipv6.sysctl.flush_delay,
2723 		.maxlen		=	sizeof(int),
2724 		.mode		=	0200,
2725 		.proc_handler	=	ipv6_sysctl_rtcache_flush
2726 	},
2727 	{
2728 		.procname	=	"gc_thresh",
2729 		.data		=	&ip6_dst_ops_template.gc_thresh,
2730 		.maxlen		=	sizeof(int),
2731 		.mode		=	0644,
2732 		.proc_handler	=	proc_dointvec,
2733 	},
2734 	{
2735 		.procname	=	"max_size",
2736 		.data		=	&init_net.ipv6.sysctl.ip6_rt_max_size,
2737 		.maxlen		=	sizeof(int),
2738 		.mode		=	0644,
2739 		.proc_handler	=	proc_dointvec,
2740 	},
2741 	{
2742 		.procname	=	"gc_min_interval",
2743 		.data		=	&init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
2744 		.maxlen		=	sizeof(int),
2745 		.mode		=	0644,
2746 		.proc_handler	=	proc_dointvec_jiffies,
2747 	},
2748 	{
2749 		.procname	=	"gc_timeout",
2750 		.data		=	&init_net.ipv6.sysctl.ip6_rt_gc_timeout,
2751 		.maxlen		=	sizeof(int),
2752 		.mode		=	0644,
2753 		.proc_handler	=	proc_dointvec_jiffies,
2754 	},
2755 	{
2756 		.procname	=	"gc_interval",
2757 		.data		=	&init_net.ipv6.sysctl.ip6_rt_gc_interval,
2758 		.maxlen		=	sizeof(int),
2759 		.mode		=	0644,
2760 		.proc_handler	=	proc_dointvec_jiffies,
2761 	},
2762 	{
2763 		.procname	=	"gc_elasticity",
2764 		.data		=	&init_net.ipv6.sysctl.ip6_rt_gc_elasticity,
2765 		.maxlen		=	sizeof(int),
2766 		.mode		=	0644,
2767 		.proc_handler	=	proc_dointvec,
2768 	},
2769 	{
2770 		.procname	=	"mtu_expires",
2771 		.data		=	&init_net.ipv6.sysctl.ip6_rt_mtu_expires,
2772 		.maxlen		=	sizeof(int),
2773 		.mode		=	0644,
2774 		.proc_handler	=	proc_dointvec_jiffies,
2775 	},
2776 	{
2777 		.procname	=	"min_adv_mss",
2778 		.data		=	&init_net.ipv6.sysctl.ip6_rt_min_advmss,
2779 		.maxlen		=	sizeof(int),
2780 		.mode		=	0644,
2781 		.proc_handler	=	proc_dointvec,
2782 	},
2783 	{
2784 		.procname	=	"gc_min_interval_ms",
2785 		.data		=	&init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
2786 		.maxlen		=	sizeof(int),
2787 		.mode		=	0644,
2788 		.proc_handler	=	proc_dointvec_ms_jiffies,
2789 	},
2790 	{ }
2791 };
2792 
2793 struct ctl_table * __net_init ipv6_route_sysctl_init(struct net *net)
2794 {
2795 	struct ctl_table *table;
2796 
2797 	table = kmemdup(ipv6_route_table_template,
2798 			sizeof(ipv6_route_table_template),
2799 			GFP_KERNEL);
2800 
2801 	if (table) {
2802 		table[0].data = &net->ipv6.sysctl.flush_delay;
2803 		table[0].extra1 = net;
2804 		table[1].data = &net->ipv6.ip6_dst_ops.gc_thresh;
2805 		table[2].data = &net->ipv6.sysctl.ip6_rt_max_size;
2806 		table[3].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
2807 		table[4].data = &net->ipv6.sysctl.ip6_rt_gc_timeout;
2808 		table[5].data = &net->ipv6.sysctl.ip6_rt_gc_interval;
2809 		table[6].data = &net->ipv6.sysctl.ip6_rt_gc_elasticity;
2810 		table[7].data = &net->ipv6.sysctl.ip6_rt_mtu_expires;
2811 		table[8].data = &net->ipv6.sysctl.ip6_rt_min_advmss;
2812 		table[9].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
2813 	}
2814 
2815 	return table;
2816 }
2817 #endif
2818 
2819 static int __net_init ip6_route_net_init(struct net *net)
2820 {
2821 	int ret = -ENOMEM;
2822 
2823 	memcpy(&net->ipv6.ip6_dst_ops, &ip6_dst_ops_template,
2824 	       sizeof(net->ipv6.ip6_dst_ops));
2825 
2826 	if (dst_entries_init(&net->ipv6.ip6_dst_ops) < 0)
2827 		goto out_ip6_dst_ops;
2828 
2829 	net->ipv6.ip6_null_entry = kmemdup(&ip6_null_entry_template,
2830 					   sizeof(*net->ipv6.ip6_null_entry),
2831 					   GFP_KERNEL);
2832 	if (!net->ipv6.ip6_null_entry)
2833 		goto out_ip6_dst_entries;
2834 	net->ipv6.ip6_null_entry->dst.path =
2835 		(struct dst_entry *)net->ipv6.ip6_null_entry;
2836 	net->ipv6.ip6_null_entry->dst.ops = &net->ipv6.ip6_dst_ops;
2837 	dst_init_metrics(&net->ipv6.ip6_null_entry->dst,
2838 			 ip6_template_metrics, true);
2839 
2840 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
2841 	net->ipv6.ip6_prohibit_entry = kmemdup(&ip6_prohibit_entry_template,
2842 					       sizeof(*net->ipv6.ip6_prohibit_entry),
2843 					       GFP_KERNEL);
2844 	if (!net->ipv6.ip6_prohibit_entry)
2845 		goto out_ip6_null_entry;
2846 	net->ipv6.ip6_prohibit_entry->dst.path =
2847 		(struct dst_entry *)net->ipv6.ip6_prohibit_entry;
2848 	net->ipv6.ip6_prohibit_entry->dst.ops = &net->ipv6.ip6_dst_ops;
2849 	dst_init_metrics(&net->ipv6.ip6_prohibit_entry->dst,
2850 			 ip6_template_metrics, true);
2851 
2852 	net->ipv6.ip6_blk_hole_entry = kmemdup(&ip6_blk_hole_entry_template,
2853 					       sizeof(*net->ipv6.ip6_blk_hole_entry),
2854 					       GFP_KERNEL);
2855 	if (!net->ipv6.ip6_blk_hole_entry)
2856 		goto out_ip6_prohibit_entry;
2857 	net->ipv6.ip6_blk_hole_entry->dst.path =
2858 		(struct dst_entry *)net->ipv6.ip6_blk_hole_entry;
2859 	net->ipv6.ip6_blk_hole_entry->dst.ops = &net->ipv6.ip6_dst_ops;
2860 	dst_init_metrics(&net->ipv6.ip6_blk_hole_entry->dst,
2861 			 ip6_template_metrics, true);
2862 #endif
2863 
2864 	net->ipv6.sysctl.flush_delay = 0;
2865 	net->ipv6.sysctl.ip6_rt_max_size = 4096;
2866 	net->ipv6.sysctl.ip6_rt_gc_min_interval = HZ / 2;
2867 	net->ipv6.sysctl.ip6_rt_gc_timeout = 60*HZ;
2868 	net->ipv6.sysctl.ip6_rt_gc_interval = 30*HZ;
2869 	net->ipv6.sysctl.ip6_rt_gc_elasticity = 9;
2870 	net->ipv6.sysctl.ip6_rt_mtu_expires = 10*60*HZ;
2871 	net->ipv6.sysctl.ip6_rt_min_advmss = IPV6_MIN_MTU - 20 - 40;
2872 
2873 #ifdef CONFIG_PROC_FS
2874 	proc_net_fops_create(net, "ipv6_route", 0, &ipv6_route_proc_fops);
2875 	proc_net_fops_create(net, "rt6_stats", S_IRUGO, &rt6_stats_seq_fops);
2876 #endif
2877 	net->ipv6.ip6_rt_gc_expire = 30*HZ;
2878 
2879 	ret = 0;
2880 out:
2881 	return ret;
2882 
2883 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
2884 out_ip6_prohibit_entry:
2885 	kfree(net->ipv6.ip6_prohibit_entry);
2886 out_ip6_null_entry:
2887 	kfree(net->ipv6.ip6_null_entry);
2888 #endif
2889 out_ip6_dst_entries:
2890 	dst_entries_destroy(&net->ipv6.ip6_dst_ops);
2891 out_ip6_dst_ops:
2892 	goto out;
2893 }
2894 
2895 static void __net_exit ip6_route_net_exit(struct net *net)
2896 {
2897 #ifdef CONFIG_PROC_FS
2898 	proc_net_remove(net, "ipv6_route");
2899 	proc_net_remove(net, "rt6_stats");
2900 #endif
2901 	kfree(net->ipv6.ip6_null_entry);
2902 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
2903 	kfree(net->ipv6.ip6_prohibit_entry);
2904 	kfree(net->ipv6.ip6_blk_hole_entry);
2905 #endif
2906 	dst_entries_destroy(&net->ipv6.ip6_dst_ops);
2907 }
2908 
2909 static struct pernet_operations ip6_route_net_ops = {
2910 	.init = ip6_route_net_init,
2911 	.exit = ip6_route_net_exit,
2912 };
2913 
2914 static struct notifier_block ip6_route_dev_notifier = {
2915 	.notifier_call = ip6_route_dev_notify,
2916 	.priority = 0,
2917 };
2918 
2919 int __init ip6_route_init(void)
2920 {
2921 	int ret;
2922 
2923 	ret = -ENOMEM;
2924 	ip6_dst_ops_template.kmem_cachep =
2925 		kmem_cache_create("ip6_dst_cache", sizeof(struct rt6_info), 0,
2926 				  SLAB_HWCACHE_ALIGN, NULL);
2927 	if (!ip6_dst_ops_template.kmem_cachep)
2928 		goto out;
2929 
2930 	ret = dst_entries_init(&ip6_dst_blackhole_ops);
2931 	if (ret)
2932 		goto out_kmem_cache;
2933 
2934 	ret = register_pernet_subsys(&ip6_route_net_ops);
2935 	if (ret)
2936 		goto out_dst_entries;
2937 
2938 	ip6_dst_blackhole_ops.kmem_cachep = ip6_dst_ops_template.kmem_cachep;
2939 
2940 	/* Registering of the loopback is done before this portion of code,
2941 	 * the loopback reference in rt6_info will not be taken, do it
2942 	 * manually for init_net */
2943 	init_net.ipv6.ip6_null_entry->dst.dev = init_net.loopback_dev;
2944 	init_net.ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
2945   #ifdef CONFIG_IPV6_MULTIPLE_TABLES
2946 	init_net.ipv6.ip6_prohibit_entry->dst.dev = init_net.loopback_dev;
2947 	init_net.ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
2948 	init_net.ipv6.ip6_blk_hole_entry->dst.dev = init_net.loopback_dev;
2949 	init_net.ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
2950   #endif
2951 	ret = fib6_init();
2952 	if (ret)
2953 		goto out_register_subsys;
2954 
2955 	ret = xfrm6_init();
2956 	if (ret)
2957 		goto out_fib6_init;
2958 
2959 	ret = fib6_rules_init();
2960 	if (ret)
2961 		goto xfrm6_init;
2962 
2963 	ret = -ENOBUFS;
2964 	if (__rtnl_register(PF_INET6, RTM_NEWROUTE, inet6_rtm_newroute, NULL, NULL) ||
2965 	    __rtnl_register(PF_INET6, RTM_DELROUTE, inet6_rtm_delroute, NULL, NULL) ||
2966 	    __rtnl_register(PF_INET6, RTM_GETROUTE, inet6_rtm_getroute, NULL, NULL))
2967 		goto fib6_rules_init;
2968 
2969 	ret = register_netdevice_notifier(&ip6_route_dev_notifier);
2970 	if (ret)
2971 		goto fib6_rules_init;
2972 
2973 out:
2974 	return ret;
2975 
2976 fib6_rules_init:
2977 	fib6_rules_cleanup();
2978 xfrm6_init:
2979 	xfrm6_fini();
2980 out_fib6_init:
2981 	fib6_gc_cleanup();
2982 out_register_subsys:
2983 	unregister_pernet_subsys(&ip6_route_net_ops);
2984 out_dst_entries:
2985 	dst_entries_destroy(&ip6_dst_blackhole_ops);
2986 out_kmem_cache:
2987 	kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);
2988 	goto out;
2989 }
2990 
2991 void ip6_route_cleanup(void)
2992 {
2993 	unregister_netdevice_notifier(&ip6_route_dev_notifier);
2994 	fib6_rules_cleanup();
2995 	xfrm6_fini();
2996 	fib6_gc_cleanup();
2997 	unregister_pernet_subsys(&ip6_route_net_ops);
2998 	dst_entries_destroy(&ip6_dst_blackhole_ops);
2999 	kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);
3000 }
3001