xref: /linux/net/ipv6/route.c (revision e978aa7d7d57d04eb5f88a7507c4fb98577def77)
1 /*
2  *	Linux INET6 implementation
3  *	FIB front-end.
4  *
5  *	Authors:
6  *	Pedro Roque		<roque@di.fc.ul.pt>
7  *
8  *	This program is free software; you can redistribute it and/or
9  *      modify it under the terms of the GNU General Public License
10  *      as published by the Free Software Foundation; either version
11  *      2 of the License, or (at your option) any later version.
12  */
13 
14 /*	Changes:
15  *
16  *	YOSHIFUJI Hideaki @USAGI
17  *		reworked default router selection.
18  *		- respect outgoing interface
19  *		- select from (probably) reachable routers (i.e.
20  *		routers in REACHABLE, STALE, DELAY or PROBE states).
21  *		- always select the same router if it is (probably)
22  *		reachable.  otherwise, round-robin the list.
23  *	Ville Nuorvala
24  *		Fixed routing subtrees.
25  */
26 
27 #include <linux/capability.h>
28 #include <linux/errno.h>
29 #include <linux/types.h>
30 #include <linux/times.h>
31 #include <linux/socket.h>
32 #include <linux/sockios.h>
33 #include <linux/net.h>
34 #include <linux/route.h>
35 #include <linux/netdevice.h>
36 #include <linux/in6.h>
37 #include <linux/mroute6.h>
38 #include <linux/init.h>
39 #include <linux/if_arp.h>
40 #include <linux/proc_fs.h>
41 #include <linux/seq_file.h>
42 #include <linux/nsproxy.h>
43 #include <linux/slab.h>
44 #include <net/net_namespace.h>
45 #include <net/snmp.h>
46 #include <net/ipv6.h>
47 #include <net/ip6_fib.h>
48 #include <net/ip6_route.h>
49 #include <net/ndisc.h>
50 #include <net/addrconf.h>
51 #include <net/tcp.h>
52 #include <linux/rtnetlink.h>
53 #include <net/dst.h>
54 #include <net/xfrm.h>
55 #include <net/netevent.h>
56 #include <net/netlink.h>
57 
58 #include <asm/uaccess.h>
59 
60 #ifdef CONFIG_SYSCTL
61 #include <linux/sysctl.h>
62 #endif
63 
64 /* Set to 3 to get tracing. */
65 #define RT6_DEBUG 2
66 
67 #if RT6_DEBUG >= 3
68 #define RDBG(x) printk x
69 #define RT6_TRACE(x...) printk(KERN_DEBUG x)
70 #else
71 #define RDBG(x)
72 #define RT6_TRACE(x...) do { ; } while (0)
73 #endif
74 
75 static struct rt6_info *ip6_rt_copy(const struct rt6_info *ort,
76 				    const struct in6_addr *dest);
77 static struct dst_entry	*ip6_dst_check(struct dst_entry *dst, u32 cookie);
78 static unsigned int	 ip6_default_advmss(const struct dst_entry *dst);
79 static unsigned int	 ip6_default_mtu(const struct dst_entry *dst);
80 static struct dst_entry *ip6_negative_advice(struct dst_entry *);
81 static void		ip6_dst_destroy(struct dst_entry *);
82 static void		ip6_dst_ifdown(struct dst_entry *,
83 				       struct net_device *dev, int how);
84 static int		 ip6_dst_gc(struct dst_ops *ops);
85 
86 static int		ip6_pkt_discard(struct sk_buff *skb);
87 static int		ip6_pkt_discard_out(struct sk_buff *skb);
88 static void		ip6_link_failure(struct sk_buff *skb);
89 static void		ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu);
90 
91 #ifdef CONFIG_IPV6_ROUTE_INFO
92 static struct rt6_info *rt6_add_route_info(struct net *net,
93 					   const struct in6_addr *prefix, int prefixlen,
94 					   const struct in6_addr *gwaddr, int ifindex,
95 					   unsigned pref);
96 static struct rt6_info *rt6_get_route_info(struct net *net,
97 					   const struct in6_addr *prefix, int prefixlen,
98 					   const struct in6_addr *gwaddr, int ifindex);
99 #endif
100 
101 static u32 *ipv6_cow_metrics(struct dst_entry *dst, unsigned long old)
102 {
103 	struct rt6_info *rt = (struct rt6_info *) dst;
104 	struct inet_peer *peer;
105 	u32 *p = NULL;
106 
107 	if (!(rt->dst.flags & DST_HOST))
108 		return NULL;
109 
110 	if (!rt->rt6i_peer)
111 		rt6_bind_peer(rt, 1);
112 
113 	peer = rt->rt6i_peer;
114 	if (peer) {
115 		u32 *old_p = __DST_METRICS_PTR(old);
116 		unsigned long prev, new;
117 
118 		p = peer->metrics;
119 		if (inet_metrics_new(peer))
120 			memcpy(p, old_p, sizeof(u32) * RTAX_MAX);
121 
122 		new = (unsigned long) p;
123 		prev = cmpxchg(&dst->_metrics, old, new);
124 
125 		if (prev != old) {
126 			p = __DST_METRICS_PTR(prev);
127 			if (prev & DST_METRICS_READ_ONLY)
128 				p = NULL;
129 		}
130 	}
131 	return p;
132 }
133 
134 static struct neighbour *ip6_neigh_lookup(const struct dst_entry *dst, const void *daddr)
135 {
136 	return __neigh_lookup_errno(&nd_tbl, daddr, dst->dev);
137 }
138 
139 static struct dst_ops ip6_dst_ops_template = {
140 	.family			=	AF_INET6,
141 	.protocol		=	cpu_to_be16(ETH_P_IPV6),
142 	.gc			=	ip6_dst_gc,
143 	.gc_thresh		=	1024,
144 	.check			=	ip6_dst_check,
145 	.default_advmss		=	ip6_default_advmss,
146 	.default_mtu		=	ip6_default_mtu,
147 	.cow_metrics		=	ipv6_cow_metrics,
148 	.destroy		=	ip6_dst_destroy,
149 	.ifdown			=	ip6_dst_ifdown,
150 	.negative_advice	=	ip6_negative_advice,
151 	.link_failure		=	ip6_link_failure,
152 	.update_pmtu		=	ip6_rt_update_pmtu,
153 	.local_out		=	__ip6_local_out,
154 	.neigh_lookup		=	ip6_neigh_lookup,
155 };
156 
157 static unsigned int ip6_blackhole_default_mtu(const struct dst_entry *dst)
158 {
159 	return 0;
160 }
161 
162 static void ip6_rt_blackhole_update_pmtu(struct dst_entry *dst, u32 mtu)
163 {
164 }
165 
166 static u32 *ip6_rt_blackhole_cow_metrics(struct dst_entry *dst,
167 					 unsigned long old)
168 {
169 	return NULL;
170 }
171 
172 static struct dst_ops ip6_dst_blackhole_ops = {
173 	.family			=	AF_INET6,
174 	.protocol		=	cpu_to_be16(ETH_P_IPV6),
175 	.destroy		=	ip6_dst_destroy,
176 	.check			=	ip6_dst_check,
177 	.default_mtu		=	ip6_blackhole_default_mtu,
178 	.default_advmss		=	ip6_default_advmss,
179 	.update_pmtu		=	ip6_rt_blackhole_update_pmtu,
180 	.cow_metrics		=	ip6_rt_blackhole_cow_metrics,
181 	.neigh_lookup		=	ip6_neigh_lookup,
182 };
183 
184 static const u32 ip6_template_metrics[RTAX_MAX] = {
185 	[RTAX_HOPLIMIT - 1] = 255,
186 };
187 
188 static struct rt6_info ip6_null_entry_template = {
189 	.dst = {
190 		.__refcnt	= ATOMIC_INIT(1),
191 		.__use		= 1,
192 		.obsolete	= -1,
193 		.error		= -ENETUNREACH,
194 		.input		= ip6_pkt_discard,
195 		.output		= ip6_pkt_discard_out,
196 	},
197 	.rt6i_flags	= (RTF_REJECT | RTF_NONEXTHOP),
198 	.rt6i_protocol  = RTPROT_KERNEL,
199 	.rt6i_metric	= ~(u32) 0,
200 	.rt6i_ref	= ATOMIC_INIT(1),
201 };
202 
203 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
204 
205 static int ip6_pkt_prohibit(struct sk_buff *skb);
206 static int ip6_pkt_prohibit_out(struct sk_buff *skb);
207 
208 static struct rt6_info ip6_prohibit_entry_template = {
209 	.dst = {
210 		.__refcnt	= ATOMIC_INIT(1),
211 		.__use		= 1,
212 		.obsolete	= -1,
213 		.error		= -EACCES,
214 		.input		= ip6_pkt_prohibit,
215 		.output		= ip6_pkt_prohibit_out,
216 	},
217 	.rt6i_flags	= (RTF_REJECT | RTF_NONEXTHOP),
218 	.rt6i_protocol  = RTPROT_KERNEL,
219 	.rt6i_metric	= ~(u32) 0,
220 	.rt6i_ref	= ATOMIC_INIT(1),
221 };
222 
223 static struct rt6_info ip6_blk_hole_entry_template = {
224 	.dst = {
225 		.__refcnt	= ATOMIC_INIT(1),
226 		.__use		= 1,
227 		.obsolete	= -1,
228 		.error		= -EINVAL,
229 		.input		= dst_discard,
230 		.output		= dst_discard,
231 	},
232 	.rt6i_flags	= (RTF_REJECT | RTF_NONEXTHOP),
233 	.rt6i_protocol  = RTPROT_KERNEL,
234 	.rt6i_metric	= ~(u32) 0,
235 	.rt6i_ref	= ATOMIC_INIT(1),
236 };
237 
238 #endif
239 
240 /* allocate dst with ip6_dst_ops */
241 static inline struct rt6_info *ip6_dst_alloc(struct dst_ops *ops,
242 					     struct net_device *dev,
243 					     int flags)
244 {
245 	struct rt6_info *rt = dst_alloc(ops, dev, 0, 0, flags);
246 
247 	if (rt != NULL)
248 		memset(&rt->rt6i_table, 0,
249 			sizeof(*rt) - sizeof(struct dst_entry));
250 
251 	return rt;
252 }
253 
254 static void ip6_dst_destroy(struct dst_entry *dst)
255 {
256 	struct rt6_info *rt = (struct rt6_info *)dst;
257 	struct inet6_dev *idev = rt->rt6i_idev;
258 	struct inet_peer *peer = rt->rt6i_peer;
259 
260 	if (!(rt->dst.flags & DST_HOST))
261 		dst_destroy_metrics_generic(dst);
262 
263 	if (idev != NULL) {
264 		rt->rt6i_idev = NULL;
265 		in6_dev_put(idev);
266 	}
267 	if (peer) {
268 		rt->rt6i_peer = NULL;
269 		inet_putpeer(peer);
270 	}
271 }
272 
273 static atomic_t __rt6_peer_genid = ATOMIC_INIT(0);
274 
275 static u32 rt6_peer_genid(void)
276 {
277 	return atomic_read(&__rt6_peer_genid);
278 }
279 
280 void rt6_bind_peer(struct rt6_info *rt, int create)
281 {
282 	struct inet_peer *peer;
283 
284 	peer = inet_getpeer_v6(&rt->rt6i_dst.addr, create);
285 	if (peer && cmpxchg(&rt->rt6i_peer, NULL, peer) != NULL)
286 		inet_putpeer(peer);
287 	else
288 		rt->rt6i_peer_genid = rt6_peer_genid();
289 }
290 
291 static void ip6_dst_ifdown(struct dst_entry *dst, struct net_device *dev,
292 			   int how)
293 {
294 	struct rt6_info *rt = (struct rt6_info *)dst;
295 	struct inet6_dev *idev = rt->rt6i_idev;
296 	struct net_device *loopback_dev =
297 		dev_net(dev)->loopback_dev;
298 
299 	if (dev != loopback_dev && idev != NULL && idev->dev == dev) {
300 		struct inet6_dev *loopback_idev =
301 			in6_dev_get(loopback_dev);
302 		if (loopback_idev != NULL) {
303 			rt->rt6i_idev = loopback_idev;
304 			in6_dev_put(idev);
305 		}
306 	}
307 }
308 
309 static __inline__ int rt6_check_expired(const struct rt6_info *rt)
310 {
311 	return (rt->rt6i_flags & RTF_EXPIRES) &&
312 		time_after(jiffies, rt->rt6i_expires);
313 }
314 
315 static inline int rt6_need_strict(const struct in6_addr *daddr)
316 {
317 	return ipv6_addr_type(daddr) &
318 		(IPV6_ADDR_MULTICAST | IPV6_ADDR_LINKLOCAL | IPV6_ADDR_LOOPBACK);
319 }
320 
321 /*
322  *	Route lookup. Any table->tb6_lock is implied.
323  */
324 
325 static inline struct rt6_info *rt6_device_match(struct net *net,
326 						    struct rt6_info *rt,
327 						    const struct in6_addr *saddr,
328 						    int oif,
329 						    int flags)
330 {
331 	struct rt6_info *local = NULL;
332 	struct rt6_info *sprt;
333 
334 	if (!oif && ipv6_addr_any(saddr))
335 		goto out;
336 
337 	for (sprt = rt; sprt; sprt = sprt->dst.rt6_next) {
338 		struct net_device *dev = sprt->rt6i_dev;
339 
340 		if (oif) {
341 			if (dev->ifindex == oif)
342 				return sprt;
343 			if (dev->flags & IFF_LOOPBACK) {
344 				if (sprt->rt6i_idev == NULL ||
345 				    sprt->rt6i_idev->dev->ifindex != oif) {
346 					if (flags & RT6_LOOKUP_F_IFACE && oif)
347 						continue;
348 					if (local && (!oif ||
349 						      local->rt6i_idev->dev->ifindex == oif))
350 						continue;
351 				}
352 				local = sprt;
353 			}
354 		} else {
355 			if (ipv6_chk_addr(net, saddr, dev,
356 					  flags & RT6_LOOKUP_F_IFACE))
357 				return sprt;
358 		}
359 	}
360 
361 	if (oif) {
362 		if (local)
363 			return local;
364 
365 		if (flags & RT6_LOOKUP_F_IFACE)
366 			return net->ipv6.ip6_null_entry;
367 	}
368 out:
369 	return rt;
370 }
371 
372 #ifdef CONFIG_IPV6_ROUTER_PREF
373 static void rt6_probe(struct rt6_info *rt)
374 {
375 	struct neighbour *neigh;
376 	/*
377 	 * Okay, this does not seem to be appropriate
378 	 * for now, however, we need to check if it
379 	 * is really so; aka Router Reachability Probing.
380 	 *
381 	 * Router Reachability Probe MUST be rate-limited
382 	 * to no more than one per minute.
383 	 */
384 	rcu_read_lock();
385 	neigh = rt ? dst_get_neighbour(&rt->dst) : NULL;
386 	if (!neigh || (neigh->nud_state & NUD_VALID))
387 		goto out;
388 	read_lock_bh(&neigh->lock);
389 	if (!(neigh->nud_state & NUD_VALID) &&
390 	    time_after(jiffies, neigh->updated + rt->rt6i_idev->cnf.rtr_probe_interval)) {
391 		struct in6_addr mcaddr;
392 		struct in6_addr *target;
393 
394 		neigh->updated = jiffies;
395 		read_unlock_bh(&neigh->lock);
396 
397 		target = (struct in6_addr *)&neigh->primary_key;
398 		addrconf_addr_solict_mult(target, &mcaddr);
399 		ndisc_send_ns(rt->rt6i_dev, NULL, target, &mcaddr, NULL);
400 	} else {
401 		read_unlock_bh(&neigh->lock);
402 	}
403 out:
404 	rcu_read_unlock();
405 }
406 #else
407 static inline void rt6_probe(struct rt6_info *rt)
408 {
409 }
410 #endif
411 
412 /*
413  * Default Router Selection (RFC 2461 6.3.6)
414  */
415 static inline int rt6_check_dev(struct rt6_info *rt, int oif)
416 {
417 	struct net_device *dev = rt->rt6i_dev;
418 	if (!oif || dev->ifindex == oif)
419 		return 2;
420 	if ((dev->flags & IFF_LOOPBACK) &&
421 	    rt->rt6i_idev && rt->rt6i_idev->dev->ifindex == oif)
422 		return 1;
423 	return 0;
424 }
425 
426 static inline int rt6_check_neigh(struct rt6_info *rt)
427 {
428 	struct neighbour *neigh;
429 	int m;
430 
431 	rcu_read_lock();
432 	neigh = dst_get_neighbour(&rt->dst);
433 	if (rt->rt6i_flags & RTF_NONEXTHOP ||
434 	    !(rt->rt6i_flags & RTF_GATEWAY))
435 		m = 1;
436 	else if (neigh) {
437 		read_lock_bh(&neigh->lock);
438 		if (neigh->nud_state & NUD_VALID)
439 			m = 2;
440 #ifdef CONFIG_IPV6_ROUTER_PREF
441 		else if (neigh->nud_state & NUD_FAILED)
442 			m = 0;
443 #endif
444 		else
445 			m = 1;
446 		read_unlock_bh(&neigh->lock);
447 	} else
448 		m = 0;
449 	rcu_read_unlock();
450 	return m;
451 }
452 
453 static int rt6_score_route(struct rt6_info *rt, int oif,
454 			   int strict)
455 {
456 	int m, n;
457 
458 	m = rt6_check_dev(rt, oif);
459 	if (!m && (strict & RT6_LOOKUP_F_IFACE))
460 		return -1;
461 #ifdef CONFIG_IPV6_ROUTER_PREF
462 	m |= IPV6_DECODE_PREF(IPV6_EXTRACT_PREF(rt->rt6i_flags)) << 2;
463 #endif
464 	n = rt6_check_neigh(rt);
465 	if (!n && (strict & RT6_LOOKUP_F_REACHABLE))
466 		return -1;
467 	return m;
468 }
469 
470 static struct rt6_info *find_match(struct rt6_info *rt, int oif, int strict,
471 				   int *mpri, struct rt6_info *match)
472 {
473 	int m;
474 
475 	if (rt6_check_expired(rt))
476 		goto out;
477 
478 	m = rt6_score_route(rt, oif, strict);
479 	if (m < 0)
480 		goto out;
481 
482 	if (m > *mpri) {
483 		if (strict & RT6_LOOKUP_F_REACHABLE)
484 			rt6_probe(match);
485 		*mpri = m;
486 		match = rt;
487 	} else if (strict & RT6_LOOKUP_F_REACHABLE) {
488 		rt6_probe(rt);
489 	}
490 
491 out:
492 	return match;
493 }
494 
495 static struct rt6_info *find_rr_leaf(struct fib6_node *fn,
496 				     struct rt6_info *rr_head,
497 				     u32 metric, int oif, int strict)
498 {
499 	struct rt6_info *rt, *match;
500 	int mpri = -1;
501 
502 	match = NULL;
503 	for (rt = rr_head; rt && rt->rt6i_metric == metric;
504 	     rt = rt->dst.rt6_next)
505 		match = find_match(rt, oif, strict, &mpri, match);
506 	for (rt = fn->leaf; rt && rt != rr_head && rt->rt6i_metric == metric;
507 	     rt = rt->dst.rt6_next)
508 		match = find_match(rt, oif, strict, &mpri, match);
509 
510 	return match;
511 }
512 
513 static struct rt6_info *rt6_select(struct fib6_node *fn, int oif, int strict)
514 {
515 	struct rt6_info *match, *rt0;
516 	struct net *net;
517 
518 	RT6_TRACE("%s(fn->leaf=%p, oif=%d)\n",
519 		  __func__, fn->leaf, oif);
520 
521 	rt0 = fn->rr_ptr;
522 	if (!rt0)
523 		fn->rr_ptr = rt0 = fn->leaf;
524 
525 	match = find_rr_leaf(fn, rt0, rt0->rt6i_metric, oif, strict);
526 
527 	if (!match &&
528 	    (strict & RT6_LOOKUP_F_REACHABLE)) {
529 		struct rt6_info *next = rt0->dst.rt6_next;
530 
531 		/* no entries matched; do round-robin */
532 		if (!next || next->rt6i_metric != rt0->rt6i_metric)
533 			next = fn->leaf;
534 
535 		if (next != rt0)
536 			fn->rr_ptr = next;
537 	}
538 
539 	RT6_TRACE("%s() => %p\n",
540 		  __func__, match);
541 
542 	net = dev_net(rt0->rt6i_dev);
543 	return match ? match : net->ipv6.ip6_null_entry;
544 }
545 
546 #ifdef CONFIG_IPV6_ROUTE_INFO
547 int rt6_route_rcv(struct net_device *dev, u8 *opt, int len,
548 		  const struct in6_addr *gwaddr)
549 {
550 	struct net *net = dev_net(dev);
551 	struct route_info *rinfo = (struct route_info *) opt;
552 	struct in6_addr prefix_buf, *prefix;
553 	unsigned int pref;
554 	unsigned long lifetime;
555 	struct rt6_info *rt;
556 
557 	if (len < sizeof(struct route_info)) {
558 		return -EINVAL;
559 	}
560 
561 	/* Sanity check for prefix_len and length */
562 	if (rinfo->length > 3) {
563 		return -EINVAL;
564 	} else if (rinfo->prefix_len > 128) {
565 		return -EINVAL;
566 	} else if (rinfo->prefix_len > 64) {
567 		if (rinfo->length < 2) {
568 			return -EINVAL;
569 		}
570 	} else if (rinfo->prefix_len > 0) {
571 		if (rinfo->length < 1) {
572 			return -EINVAL;
573 		}
574 	}
575 
576 	pref = rinfo->route_pref;
577 	if (pref == ICMPV6_ROUTER_PREF_INVALID)
578 		return -EINVAL;
579 
580 	lifetime = addrconf_timeout_fixup(ntohl(rinfo->lifetime), HZ);
581 
582 	if (rinfo->length == 3)
583 		prefix = (struct in6_addr *)rinfo->prefix;
584 	else {
585 		/* this function is safe */
586 		ipv6_addr_prefix(&prefix_buf,
587 				 (struct in6_addr *)rinfo->prefix,
588 				 rinfo->prefix_len);
589 		prefix = &prefix_buf;
590 	}
591 
592 	rt = rt6_get_route_info(net, prefix, rinfo->prefix_len, gwaddr,
593 				dev->ifindex);
594 
595 	if (rt && !lifetime) {
596 		ip6_del_rt(rt);
597 		rt = NULL;
598 	}
599 
600 	if (!rt && lifetime)
601 		rt = rt6_add_route_info(net, prefix, rinfo->prefix_len, gwaddr, dev->ifindex,
602 					pref);
603 	else if (rt)
604 		rt->rt6i_flags = RTF_ROUTEINFO |
605 				 (rt->rt6i_flags & ~RTF_PREF_MASK) | RTF_PREF(pref);
606 
607 	if (rt) {
608 		if (!addrconf_finite_timeout(lifetime)) {
609 			rt->rt6i_flags &= ~RTF_EXPIRES;
610 		} else {
611 			rt->rt6i_expires = jiffies + HZ * lifetime;
612 			rt->rt6i_flags |= RTF_EXPIRES;
613 		}
614 		dst_release(&rt->dst);
615 	}
616 	return 0;
617 }
618 #endif
619 
620 #define BACKTRACK(__net, saddr)			\
621 do { \
622 	if (rt == __net->ipv6.ip6_null_entry) {	\
623 		struct fib6_node *pn; \
624 		while (1) { \
625 			if (fn->fn_flags & RTN_TL_ROOT) \
626 				goto out; \
627 			pn = fn->parent; \
628 			if (FIB6_SUBTREE(pn) && FIB6_SUBTREE(pn) != fn) \
629 				fn = fib6_lookup(FIB6_SUBTREE(pn), NULL, saddr); \
630 			else \
631 				fn = pn; \
632 			if (fn->fn_flags & RTN_RTINFO) \
633 				goto restart; \
634 		} \
635 	} \
636 } while(0)
637 
638 static struct rt6_info *ip6_pol_route_lookup(struct net *net,
639 					     struct fib6_table *table,
640 					     struct flowi6 *fl6, int flags)
641 {
642 	struct fib6_node *fn;
643 	struct rt6_info *rt;
644 
645 	read_lock_bh(&table->tb6_lock);
646 	fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
647 restart:
648 	rt = fn->leaf;
649 	rt = rt6_device_match(net, rt, &fl6->saddr, fl6->flowi6_oif, flags);
650 	BACKTRACK(net, &fl6->saddr);
651 out:
652 	dst_use(&rt->dst, jiffies);
653 	read_unlock_bh(&table->tb6_lock);
654 	return rt;
655 
656 }
657 
658 struct rt6_info *rt6_lookup(struct net *net, const struct in6_addr *daddr,
659 			    const struct in6_addr *saddr, int oif, int strict)
660 {
661 	struct flowi6 fl6 = {
662 		.flowi6_oif = oif,
663 		.daddr = *daddr,
664 	};
665 	struct dst_entry *dst;
666 	int flags = strict ? RT6_LOOKUP_F_IFACE : 0;
667 
668 	if (saddr) {
669 		memcpy(&fl6.saddr, saddr, sizeof(*saddr));
670 		flags |= RT6_LOOKUP_F_HAS_SADDR;
671 	}
672 
673 	dst = fib6_rule_lookup(net, &fl6, flags, ip6_pol_route_lookup);
674 	if (dst->error == 0)
675 		return (struct rt6_info *) dst;
676 
677 	dst_release(dst);
678 
679 	return NULL;
680 }
681 
682 EXPORT_SYMBOL(rt6_lookup);
683 
684 /* ip6_ins_rt is called with FREE table->tb6_lock.
685    It takes new route entry, the addition fails by any reason the
686    route is freed. In any case, if caller does not hold it, it may
687    be destroyed.
688  */
689 
690 static int __ip6_ins_rt(struct rt6_info *rt, struct nl_info *info)
691 {
692 	int err;
693 	struct fib6_table *table;
694 
695 	table = rt->rt6i_table;
696 	write_lock_bh(&table->tb6_lock);
697 	err = fib6_add(&table->tb6_root, rt, info);
698 	write_unlock_bh(&table->tb6_lock);
699 
700 	return err;
701 }
702 
703 int ip6_ins_rt(struct rt6_info *rt)
704 {
705 	struct nl_info info = {
706 		.nl_net = dev_net(rt->rt6i_dev),
707 	};
708 	return __ip6_ins_rt(rt, &info);
709 }
710 
711 static struct rt6_info *rt6_alloc_cow(const struct rt6_info *ort,
712 				      const struct in6_addr *daddr,
713 				      const struct in6_addr *saddr)
714 {
715 	struct rt6_info *rt;
716 
717 	/*
718 	 *	Clone the route.
719 	 */
720 
721 	rt = ip6_rt_copy(ort, daddr);
722 
723 	if (rt) {
724 		struct neighbour *neigh;
725 		int attempts = !in_softirq();
726 
727 		if (!(rt->rt6i_flags&RTF_GATEWAY)) {
728 			if (rt->rt6i_dst.plen != 128 &&
729 			    ipv6_addr_equal(&ort->rt6i_dst.addr, daddr))
730 				rt->rt6i_flags |= RTF_ANYCAST;
731 			ipv6_addr_copy(&rt->rt6i_gateway, daddr);
732 		}
733 
734 		rt->rt6i_flags |= RTF_CACHE;
735 
736 #ifdef CONFIG_IPV6_SUBTREES
737 		if (rt->rt6i_src.plen && saddr) {
738 			ipv6_addr_copy(&rt->rt6i_src.addr, saddr);
739 			rt->rt6i_src.plen = 128;
740 		}
741 #endif
742 
743 	retry:
744 		neigh = ndisc_get_neigh(rt->rt6i_dev, &rt->rt6i_gateway);
745 		if (IS_ERR(neigh)) {
746 			struct net *net = dev_net(rt->rt6i_dev);
747 			int saved_rt_min_interval =
748 				net->ipv6.sysctl.ip6_rt_gc_min_interval;
749 			int saved_rt_elasticity =
750 				net->ipv6.sysctl.ip6_rt_gc_elasticity;
751 
752 			if (attempts-- > 0) {
753 				net->ipv6.sysctl.ip6_rt_gc_elasticity = 1;
754 				net->ipv6.sysctl.ip6_rt_gc_min_interval = 0;
755 
756 				ip6_dst_gc(&net->ipv6.ip6_dst_ops);
757 
758 				net->ipv6.sysctl.ip6_rt_gc_elasticity =
759 					saved_rt_elasticity;
760 				net->ipv6.sysctl.ip6_rt_gc_min_interval =
761 					saved_rt_min_interval;
762 				goto retry;
763 			}
764 
765 			if (net_ratelimit())
766 				printk(KERN_WARNING
767 				       "ipv6: Neighbour table overflow.\n");
768 			dst_free(&rt->dst);
769 			return NULL;
770 		}
771 		dst_set_neighbour(&rt->dst, neigh);
772 
773 	}
774 
775 	return rt;
776 }
777 
778 static struct rt6_info *rt6_alloc_clone(struct rt6_info *ort,
779 					const struct in6_addr *daddr)
780 {
781 	struct rt6_info *rt = ip6_rt_copy(ort, daddr);
782 
783 	if (rt) {
784 		rt->rt6i_flags |= RTF_CACHE;
785 		dst_set_neighbour(&rt->dst, neigh_clone(dst_get_neighbour_raw(&ort->dst)));
786 	}
787 	return rt;
788 }
789 
790 static struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table, int oif,
791 				      struct flowi6 *fl6, int flags)
792 {
793 	struct fib6_node *fn;
794 	struct rt6_info *rt, *nrt;
795 	int strict = 0;
796 	int attempts = 3;
797 	int err;
798 	int reachable = net->ipv6.devconf_all->forwarding ? 0 : RT6_LOOKUP_F_REACHABLE;
799 
800 	strict |= flags & RT6_LOOKUP_F_IFACE;
801 
802 relookup:
803 	read_lock_bh(&table->tb6_lock);
804 
805 restart_2:
806 	fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
807 
808 restart:
809 	rt = rt6_select(fn, oif, strict | reachable);
810 
811 	BACKTRACK(net, &fl6->saddr);
812 	if (rt == net->ipv6.ip6_null_entry ||
813 	    rt->rt6i_flags & RTF_CACHE)
814 		goto out;
815 
816 	dst_hold(&rt->dst);
817 	read_unlock_bh(&table->tb6_lock);
818 
819 	if (!dst_get_neighbour_raw(&rt->dst) && !(rt->rt6i_flags & RTF_NONEXTHOP))
820 		nrt = rt6_alloc_cow(rt, &fl6->daddr, &fl6->saddr);
821 	else if (!(rt->dst.flags & DST_HOST))
822 		nrt = rt6_alloc_clone(rt, &fl6->daddr);
823 	else
824 		goto out2;
825 
826 	dst_release(&rt->dst);
827 	rt = nrt ? : net->ipv6.ip6_null_entry;
828 
829 	dst_hold(&rt->dst);
830 	if (nrt) {
831 		err = ip6_ins_rt(nrt);
832 		if (!err)
833 			goto out2;
834 	}
835 
836 	if (--attempts <= 0)
837 		goto out2;
838 
839 	/*
840 	 * Race condition! In the gap, when table->tb6_lock was
841 	 * released someone could insert this route.  Relookup.
842 	 */
843 	dst_release(&rt->dst);
844 	goto relookup;
845 
846 out:
847 	if (reachable) {
848 		reachable = 0;
849 		goto restart_2;
850 	}
851 	dst_hold(&rt->dst);
852 	read_unlock_bh(&table->tb6_lock);
853 out2:
854 	rt->dst.lastuse = jiffies;
855 	rt->dst.__use++;
856 
857 	return rt;
858 }
859 
860 static struct rt6_info *ip6_pol_route_input(struct net *net, struct fib6_table *table,
861 					    struct flowi6 *fl6, int flags)
862 {
863 	return ip6_pol_route(net, table, fl6->flowi6_iif, fl6, flags);
864 }
865 
866 void ip6_route_input(struct sk_buff *skb)
867 {
868 	const struct ipv6hdr *iph = ipv6_hdr(skb);
869 	struct net *net = dev_net(skb->dev);
870 	int flags = RT6_LOOKUP_F_HAS_SADDR;
871 	struct flowi6 fl6 = {
872 		.flowi6_iif = skb->dev->ifindex,
873 		.daddr = iph->daddr,
874 		.saddr = iph->saddr,
875 		.flowlabel = (* (__be32 *) iph)&IPV6_FLOWINFO_MASK,
876 		.flowi6_mark = skb->mark,
877 		.flowi6_proto = iph->nexthdr,
878 	};
879 
880 	if (rt6_need_strict(&iph->daddr) && skb->dev->type != ARPHRD_PIMREG)
881 		flags |= RT6_LOOKUP_F_IFACE;
882 
883 	skb_dst_set(skb, fib6_rule_lookup(net, &fl6, flags, ip6_pol_route_input));
884 }
885 
886 static struct rt6_info *ip6_pol_route_output(struct net *net, struct fib6_table *table,
887 					     struct flowi6 *fl6, int flags)
888 {
889 	return ip6_pol_route(net, table, fl6->flowi6_oif, fl6, flags);
890 }
891 
892 struct dst_entry * ip6_route_output(struct net *net, const struct sock *sk,
893 				    struct flowi6 *fl6)
894 {
895 	int flags = 0;
896 
897 	if ((sk && sk->sk_bound_dev_if) || rt6_need_strict(&fl6->daddr))
898 		flags |= RT6_LOOKUP_F_IFACE;
899 
900 	if (!ipv6_addr_any(&fl6->saddr))
901 		flags |= RT6_LOOKUP_F_HAS_SADDR;
902 	else if (sk)
903 		flags |= rt6_srcprefs2flags(inet6_sk(sk)->srcprefs);
904 
905 	return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_output);
906 }
907 
908 EXPORT_SYMBOL(ip6_route_output);
909 
910 struct dst_entry *ip6_blackhole_route(struct net *net, struct dst_entry *dst_orig)
911 {
912 	struct rt6_info *rt, *ort = (struct rt6_info *) dst_orig;
913 	struct dst_entry *new = NULL;
914 
915 	rt = dst_alloc(&ip6_dst_blackhole_ops, ort->dst.dev, 1, 0, 0);
916 	if (rt) {
917 		memset(&rt->rt6i_table, 0, sizeof(*rt) - sizeof(struct dst_entry));
918 
919 		new = &rt->dst;
920 
921 		new->__use = 1;
922 		new->input = dst_discard;
923 		new->output = dst_discard;
924 
925 		if (dst_metrics_read_only(&ort->dst))
926 			new->_metrics = ort->dst._metrics;
927 		else
928 			dst_copy_metrics(new, &ort->dst);
929 		rt->rt6i_idev = ort->rt6i_idev;
930 		if (rt->rt6i_idev)
931 			in6_dev_hold(rt->rt6i_idev);
932 		rt->rt6i_expires = 0;
933 
934 		ipv6_addr_copy(&rt->rt6i_gateway, &ort->rt6i_gateway);
935 		rt->rt6i_flags = ort->rt6i_flags & ~RTF_EXPIRES;
936 		rt->rt6i_metric = 0;
937 
938 		memcpy(&rt->rt6i_dst, &ort->rt6i_dst, sizeof(struct rt6key));
939 #ifdef CONFIG_IPV6_SUBTREES
940 		memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
941 #endif
942 
943 		dst_free(new);
944 	}
945 
946 	dst_release(dst_orig);
947 	return new ? new : ERR_PTR(-ENOMEM);
948 }
949 
950 /*
951  *	Destination cache support functions
952  */
953 
954 static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie)
955 {
956 	struct rt6_info *rt;
957 
958 	rt = (struct rt6_info *) dst;
959 
960 	if (rt->rt6i_node && (rt->rt6i_node->fn_sernum == cookie)) {
961 		if (rt->rt6i_peer_genid != rt6_peer_genid()) {
962 			if (!rt->rt6i_peer)
963 				rt6_bind_peer(rt, 0);
964 			rt->rt6i_peer_genid = rt6_peer_genid();
965 		}
966 		return dst;
967 	}
968 	return NULL;
969 }
970 
971 static struct dst_entry *ip6_negative_advice(struct dst_entry *dst)
972 {
973 	struct rt6_info *rt = (struct rt6_info *) dst;
974 
975 	if (rt) {
976 		if (rt->rt6i_flags & RTF_CACHE) {
977 			if (rt6_check_expired(rt)) {
978 				ip6_del_rt(rt);
979 				dst = NULL;
980 			}
981 		} else {
982 			dst_release(dst);
983 			dst = NULL;
984 		}
985 	}
986 	return dst;
987 }
988 
989 static void ip6_link_failure(struct sk_buff *skb)
990 {
991 	struct rt6_info *rt;
992 
993 	icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_ADDR_UNREACH, 0);
994 
995 	rt = (struct rt6_info *) skb_dst(skb);
996 	if (rt) {
997 		if (rt->rt6i_flags&RTF_CACHE) {
998 			dst_set_expires(&rt->dst, 0);
999 			rt->rt6i_flags |= RTF_EXPIRES;
1000 		} else if (rt->rt6i_node && (rt->rt6i_flags & RTF_DEFAULT))
1001 			rt->rt6i_node->fn_sernum = -1;
1002 	}
1003 }
1004 
1005 static void ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu)
1006 {
1007 	struct rt6_info *rt6 = (struct rt6_info*)dst;
1008 
1009 	if (mtu < dst_mtu(dst) && rt6->rt6i_dst.plen == 128) {
1010 		rt6->rt6i_flags |= RTF_MODIFIED;
1011 		if (mtu < IPV6_MIN_MTU) {
1012 			u32 features = dst_metric(dst, RTAX_FEATURES);
1013 			mtu = IPV6_MIN_MTU;
1014 			features |= RTAX_FEATURE_ALLFRAG;
1015 			dst_metric_set(dst, RTAX_FEATURES, features);
1016 		}
1017 		dst_metric_set(dst, RTAX_MTU, mtu);
1018 	}
1019 }
1020 
1021 static unsigned int ip6_default_advmss(const struct dst_entry *dst)
1022 {
1023 	struct net_device *dev = dst->dev;
1024 	unsigned int mtu = dst_mtu(dst);
1025 	struct net *net = dev_net(dev);
1026 
1027 	mtu -= sizeof(struct ipv6hdr) + sizeof(struct tcphdr);
1028 
1029 	if (mtu < net->ipv6.sysctl.ip6_rt_min_advmss)
1030 		mtu = net->ipv6.sysctl.ip6_rt_min_advmss;
1031 
1032 	/*
1033 	 * Maximal non-jumbo IPv6 payload is IPV6_MAXPLEN and
1034 	 * corresponding MSS is IPV6_MAXPLEN - tcp_header_size.
1035 	 * IPV6_MAXPLEN is also valid and means: "any MSS,
1036 	 * rely only on pmtu discovery"
1037 	 */
1038 	if (mtu > IPV6_MAXPLEN - sizeof(struct tcphdr))
1039 		mtu = IPV6_MAXPLEN;
1040 	return mtu;
1041 }
1042 
1043 static unsigned int ip6_default_mtu(const struct dst_entry *dst)
1044 {
1045 	unsigned int mtu = IPV6_MIN_MTU;
1046 	struct inet6_dev *idev;
1047 
1048 	rcu_read_lock();
1049 	idev = __in6_dev_get(dst->dev);
1050 	if (idev)
1051 		mtu = idev->cnf.mtu6;
1052 	rcu_read_unlock();
1053 
1054 	return mtu;
1055 }
1056 
1057 static struct dst_entry *icmp6_dst_gc_list;
1058 static DEFINE_SPINLOCK(icmp6_dst_lock);
1059 
1060 struct dst_entry *icmp6_dst_alloc(struct net_device *dev,
1061 				  struct neighbour *neigh,
1062 				  const struct in6_addr *addr)
1063 {
1064 	struct rt6_info *rt;
1065 	struct inet6_dev *idev = in6_dev_get(dev);
1066 	struct net *net = dev_net(dev);
1067 
1068 	if (unlikely(idev == NULL))
1069 		return NULL;
1070 
1071 	rt = ip6_dst_alloc(&net->ipv6.ip6_dst_ops, dev, 0);
1072 	if (unlikely(rt == NULL)) {
1073 		in6_dev_put(idev);
1074 		goto out;
1075 	}
1076 
1077 	if (neigh)
1078 		neigh_hold(neigh);
1079 	else {
1080 		neigh = ndisc_get_neigh(dev, addr);
1081 		if (IS_ERR(neigh))
1082 			neigh = NULL;
1083 	}
1084 
1085 	rt->dst.flags |= DST_HOST;
1086 	rt->dst.output  = ip6_output;
1087 	dst_set_neighbour(&rt->dst, neigh);
1088 	atomic_set(&rt->dst.__refcnt, 1);
1089 	dst_metric_set(&rt->dst, RTAX_HOPLIMIT, 255);
1090 
1091 	ipv6_addr_copy(&rt->rt6i_dst.addr, addr);
1092 	rt->rt6i_dst.plen = 128;
1093 	rt->rt6i_idev     = idev;
1094 
1095 	spin_lock_bh(&icmp6_dst_lock);
1096 	rt->dst.next = icmp6_dst_gc_list;
1097 	icmp6_dst_gc_list = &rt->dst;
1098 	spin_unlock_bh(&icmp6_dst_lock);
1099 
1100 	fib6_force_start_gc(net);
1101 
1102 out:
1103 	return &rt->dst;
1104 }
1105 
1106 int icmp6_dst_gc(void)
1107 {
1108 	struct dst_entry *dst, **pprev;
1109 	int more = 0;
1110 
1111 	spin_lock_bh(&icmp6_dst_lock);
1112 	pprev = &icmp6_dst_gc_list;
1113 
1114 	while ((dst = *pprev) != NULL) {
1115 		if (!atomic_read(&dst->__refcnt)) {
1116 			*pprev = dst->next;
1117 			dst_free(dst);
1118 		} else {
1119 			pprev = &dst->next;
1120 			++more;
1121 		}
1122 	}
1123 
1124 	spin_unlock_bh(&icmp6_dst_lock);
1125 
1126 	return more;
1127 }
1128 
1129 static void icmp6_clean_all(int (*func)(struct rt6_info *rt, void *arg),
1130 			    void *arg)
1131 {
1132 	struct dst_entry *dst, **pprev;
1133 
1134 	spin_lock_bh(&icmp6_dst_lock);
1135 	pprev = &icmp6_dst_gc_list;
1136 	while ((dst = *pprev) != NULL) {
1137 		struct rt6_info *rt = (struct rt6_info *) dst;
1138 		if (func(rt, arg)) {
1139 			*pprev = dst->next;
1140 			dst_free(dst);
1141 		} else {
1142 			pprev = &dst->next;
1143 		}
1144 	}
1145 	spin_unlock_bh(&icmp6_dst_lock);
1146 }
1147 
1148 static int ip6_dst_gc(struct dst_ops *ops)
1149 {
1150 	unsigned long now = jiffies;
1151 	struct net *net = container_of(ops, struct net, ipv6.ip6_dst_ops);
1152 	int rt_min_interval = net->ipv6.sysctl.ip6_rt_gc_min_interval;
1153 	int rt_max_size = net->ipv6.sysctl.ip6_rt_max_size;
1154 	int rt_elasticity = net->ipv6.sysctl.ip6_rt_gc_elasticity;
1155 	int rt_gc_timeout = net->ipv6.sysctl.ip6_rt_gc_timeout;
1156 	unsigned long rt_last_gc = net->ipv6.ip6_rt_last_gc;
1157 	int entries;
1158 
1159 	entries = dst_entries_get_fast(ops);
1160 	if (time_after(rt_last_gc + rt_min_interval, now) &&
1161 	    entries <= rt_max_size)
1162 		goto out;
1163 
1164 	net->ipv6.ip6_rt_gc_expire++;
1165 	fib6_run_gc(net->ipv6.ip6_rt_gc_expire, net);
1166 	net->ipv6.ip6_rt_last_gc = now;
1167 	entries = dst_entries_get_slow(ops);
1168 	if (entries < ops->gc_thresh)
1169 		net->ipv6.ip6_rt_gc_expire = rt_gc_timeout>>1;
1170 out:
1171 	net->ipv6.ip6_rt_gc_expire -= net->ipv6.ip6_rt_gc_expire>>rt_elasticity;
1172 	return entries > rt_max_size;
1173 }
1174 
1175 /* Clean host part of a prefix. Not necessary in radix tree,
1176    but results in cleaner routing tables.
1177 
1178    Remove it only when all the things will work!
1179  */
1180 
1181 int ip6_dst_hoplimit(struct dst_entry *dst)
1182 {
1183 	int hoplimit = dst_metric_raw(dst, RTAX_HOPLIMIT);
1184 	if (hoplimit == 0) {
1185 		struct net_device *dev = dst->dev;
1186 		struct inet6_dev *idev;
1187 
1188 		rcu_read_lock();
1189 		idev = __in6_dev_get(dev);
1190 		if (idev)
1191 			hoplimit = idev->cnf.hop_limit;
1192 		else
1193 			hoplimit = dev_net(dev)->ipv6.devconf_all->hop_limit;
1194 		rcu_read_unlock();
1195 	}
1196 	return hoplimit;
1197 }
1198 EXPORT_SYMBOL(ip6_dst_hoplimit);
1199 
1200 /*
1201  *
1202  */
1203 
1204 int ip6_route_add(struct fib6_config *cfg)
1205 {
1206 	int err;
1207 	struct net *net = cfg->fc_nlinfo.nl_net;
1208 	struct rt6_info *rt = NULL;
1209 	struct net_device *dev = NULL;
1210 	struct inet6_dev *idev = NULL;
1211 	struct fib6_table *table;
1212 	int addr_type;
1213 
1214 	if (cfg->fc_dst_len > 128 || cfg->fc_src_len > 128)
1215 		return -EINVAL;
1216 #ifndef CONFIG_IPV6_SUBTREES
1217 	if (cfg->fc_src_len)
1218 		return -EINVAL;
1219 #endif
1220 	if (cfg->fc_ifindex) {
1221 		err = -ENODEV;
1222 		dev = dev_get_by_index(net, cfg->fc_ifindex);
1223 		if (!dev)
1224 			goto out;
1225 		idev = in6_dev_get(dev);
1226 		if (!idev)
1227 			goto out;
1228 	}
1229 
1230 	if (cfg->fc_metric == 0)
1231 		cfg->fc_metric = IP6_RT_PRIO_USER;
1232 
1233 	table = fib6_new_table(net, cfg->fc_table);
1234 	if (table == NULL) {
1235 		err = -ENOBUFS;
1236 		goto out;
1237 	}
1238 
1239 	rt = ip6_dst_alloc(&net->ipv6.ip6_dst_ops, NULL, DST_NOCOUNT);
1240 
1241 	if (rt == NULL) {
1242 		err = -ENOMEM;
1243 		goto out;
1244 	}
1245 
1246 	rt->dst.obsolete = -1;
1247 	rt->rt6i_expires = (cfg->fc_flags & RTF_EXPIRES) ?
1248 				jiffies + clock_t_to_jiffies(cfg->fc_expires) :
1249 				0;
1250 
1251 	if (cfg->fc_protocol == RTPROT_UNSPEC)
1252 		cfg->fc_protocol = RTPROT_BOOT;
1253 	rt->rt6i_protocol = cfg->fc_protocol;
1254 
1255 	addr_type = ipv6_addr_type(&cfg->fc_dst);
1256 
1257 	if (addr_type & IPV6_ADDR_MULTICAST)
1258 		rt->dst.input = ip6_mc_input;
1259 	else if (cfg->fc_flags & RTF_LOCAL)
1260 		rt->dst.input = ip6_input;
1261 	else
1262 		rt->dst.input = ip6_forward;
1263 
1264 	rt->dst.output = ip6_output;
1265 
1266 	ipv6_addr_prefix(&rt->rt6i_dst.addr, &cfg->fc_dst, cfg->fc_dst_len);
1267 	rt->rt6i_dst.plen = cfg->fc_dst_len;
1268 	if (rt->rt6i_dst.plen == 128)
1269 	       rt->dst.flags |= DST_HOST;
1270 
1271 	if (!(rt->dst.flags & DST_HOST) && cfg->fc_mx) {
1272 		u32 *metrics = kzalloc(sizeof(u32) * RTAX_MAX, GFP_KERNEL);
1273 		if (!metrics) {
1274 			err = -ENOMEM;
1275 			goto out;
1276 		}
1277 		dst_init_metrics(&rt->dst, metrics, 0);
1278 	}
1279 #ifdef CONFIG_IPV6_SUBTREES
1280 	ipv6_addr_prefix(&rt->rt6i_src.addr, &cfg->fc_src, cfg->fc_src_len);
1281 	rt->rt6i_src.plen = cfg->fc_src_len;
1282 #endif
1283 
1284 	rt->rt6i_metric = cfg->fc_metric;
1285 
1286 	/* We cannot add true routes via loopback here,
1287 	   they would result in kernel looping; promote them to reject routes
1288 	 */
1289 	if ((cfg->fc_flags & RTF_REJECT) ||
1290 	    (dev && (dev->flags&IFF_LOOPBACK) && !(addr_type&IPV6_ADDR_LOOPBACK)
1291 					      && !(cfg->fc_flags&RTF_LOCAL))) {
1292 		/* hold loopback dev/idev if we haven't done so. */
1293 		if (dev != net->loopback_dev) {
1294 			if (dev) {
1295 				dev_put(dev);
1296 				in6_dev_put(idev);
1297 			}
1298 			dev = net->loopback_dev;
1299 			dev_hold(dev);
1300 			idev = in6_dev_get(dev);
1301 			if (!idev) {
1302 				err = -ENODEV;
1303 				goto out;
1304 			}
1305 		}
1306 		rt->dst.output = ip6_pkt_discard_out;
1307 		rt->dst.input = ip6_pkt_discard;
1308 		rt->dst.error = -ENETUNREACH;
1309 		rt->rt6i_flags = RTF_REJECT|RTF_NONEXTHOP;
1310 		goto install_route;
1311 	}
1312 
1313 	if (cfg->fc_flags & RTF_GATEWAY) {
1314 		const struct in6_addr *gw_addr;
1315 		int gwa_type;
1316 
1317 		gw_addr = &cfg->fc_gateway;
1318 		ipv6_addr_copy(&rt->rt6i_gateway, gw_addr);
1319 		gwa_type = ipv6_addr_type(gw_addr);
1320 
1321 		if (gwa_type != (IPV6_ADDR_LINKLOCAL|IPV6_ADDR_UNICAST)) {
1322 			struct rt6_info *grt;
1323 
1324 			/* IPv6 strictly inhibits using not link-local
1325 			   addresses as nexthop address.
1326 			   Otherwise, router will not able to send redirects.
1327 			   It is very good, but in some (rare!) circumstances
1328 			   (SIT, PtP, NBMA NOARP links) it is handy to allow
1329 			   some exceptions. --ANK
1330 			 */
1331 			err = -EINVAL;
1332 			if (!(gwa_type&IPV6_ADDR_UNICAST))
1333 				goto out;
1334 
1335 			grt = rt6_lookup(net, gw_addr, NULL, cfg->fc_ifindex, 1);
1336 
1337 			err = -EHOSTUNREACH;
1338 			if (grt == NULL)
1339 				goto out;
1340 			if (dev) {
1341 				if (dev != grt->rt6i_dev) {
1342 					dst_release(&grt->dst);
1343 					goto out;
1344 				}
1345 			} else {
1346 				dev = grt->rt6i_dev;
1347 				idev = grt->rt6i_idev;
1348 				dev_hold(dev);
1349 				in6_dev_hold(grt->rt6i_idev);
1350 			}
1351 			if (!(grt->rt6i_flags&RTF_GATEWAY))
1352 				err = 0;
1353 			dst_release(&grt->dst);
1354 
1355 			if (err)
1356 				goto out;
1357 		}
1358 		err = -EINVAL;
1359 		if (dev == NULL || (dev->flags&IFF_LOOPBACK))
1360 			goto out;
1361 	}
1362 
1363 	err = -ENODEV;
1364 	if (dev == NULL)
1365 		goto out;
1366 
1367 	if (!ipv6_addr_any(&cfg->fc_prefsrc)) {
1368 		if (!ipv6_chk_addr(net, &cfg->fc_prefsrc, dev, 0)) {
1369 			err = -EINVAL;
1370 			goto out;
1371 		}
1372 		ipv6_addr_copy(&rt->rt6i_prefsrc.addr, &cfg->fc_prefsrc);
1373 		rt->rt6i_prefsrc.plen = 128;
1374 	} else
1375 		rt->rt6i_prefsrc.plen = 0;
1376 
1377 	if (cfg->fc_flags & (RTF_GATEWAY | RTF_NONEXTHOP)) {
1378 		struct neighbour *n = __neigh_lookup_errno(&nd_tbl, &rt->rt6i_gateway, dev);
1379 		if (IS_ERR(n)) {
1380 			err = PTR_ERR(n);
1381 			goto out;
1382 		}
1383 		dst_set_neighbour(&rt->dst, n);
1384 	}
1385 
1386 	rt->rt6i_flags = cfg->fc_flags;
1387 
1388 install_route:
1389 	if (cfg->fc_mx) {
1390 		struct nlattr *nla;
1391 		int remaining;
1392 
1393 		nla_for_each_attr(nla, cfg->fc_mx, cfg->fc_mx_len, remaining) {
1394 			int type = nla_type(nla);
1395 
1396 			if (type) {
1397 				if (type > RTAX_MAX) {
1398 					err = -EINVAL;
1399 					goto out;
1400 				}
1401 
1402 				dst_metric_set(&rt->dst, type, nla_get_u32(nla));
1403 			}
1404 		}
1405 	}
1406 
1407 	rt->dst.dev = dev;
1408 	rt->rt6i_idev = idev;
1409 	rt->rt6i_table = table;
1410 
1411 	cfg->fc_nlinfo.nl_net = dev_net(dev);
1412 
1413 	return __ip6_ins_rt(rt, &cfg->fc_nlinfo);
1414 
1415 out:
1416 	if (dev)
1417 		dev_put(dev);
1418 	if (idev)
1419 		in6_dev_put(idev);
1420 	if (rt)
1421 		dst_free(&rt->dst);
1422 	return err;
1423 }
1424 
1425 static int __ip6_del_rt(struct rt6_info *rt, struct nl_info *info)
1426 {
1427 	int err;
1428 	struct fib6_table *table;
1429 	struct net *net = dev_net(rt->rt6i_dev);
1430 
1431 	if (rt == net->ipv6.ip6_null_entry)
1432 		return -ENOENT;
1433 
1434 	table = rt->rt6i_table;
1435 	write_lock_bh(&table->tb6_lock);
1436 
1437 	err = fib6_del(rt, info);
1438 	dst_release(&rt->dst);
1439 
1440 	write_unlock_bh(&table->tb6_lock);
1441 
1442 	return err;
1443 }
1444 
1445 int ip6_del_rt(struct rt6_info *rt)
1446 {
1447 	struct nl_info info = {
1448 		.nl_net = dev_net(rt->rt6i_dev),
1449 	};
1450 	return __ip6_del_rt(rt, &info);
1451 }
1452 
1453 static int ip6_route_del(struct fib6_config *cfg)
1454 {
1455 	struct fib6_table *table;
1456 	struct fib6_node *fn;
1457 	struct rt6_info *rt;
1458 	int err = -ESRCH;
1459 
1460 	table = fib6_get_table(cfg->fc_nlinfo.nl_net, cfg->fc_table);
1461 	if (table == NULL)
1462 		return err;
1463 
1464 	read_lock_bh(&table->tb6_lock);
1465 
1466 	fn = fib6_locate(&table->tb6_root,
1467 			 &cfg->fc_dst, cfg->fc_dst_len,
1468 			 &cfg->fc_src, cfg->fc_src_len);
1469 
1470 	if (fn) {
1471 		for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
1472 			if (cfg->fc_ifindex &&
1473 			    (rt->rt6i_dev == NULL ||
1474 			     rt->rt6i_dev->ifindex != cfg->fc_ifindex))
1475 				continue;
1476 			if (cfg->fc_flags & RTF_GATEWAY &&
1477 			    !ipv6_addr_equal(&cfg->fc_gateway, &rt->rt6i_gateway))
1478 				continue;
1479 			if (cfg->fc_metric && cfg->fc_metric != rt->rt6i_metric)
1480 				continue;
1481 			dst_hold(&rt->dst);
1482 			read_unlock_bh(&table->tb6_lock);
1483 
1484 			return __ip6_del_rt(rt, &cfg->fc_nlinfo);
1485 		}
1486 	}
1487 	read_unlock_bh(&table->tb6_lock);
1488 
1489 	return err;
1490 }
1491 
1492 /*
1493  *	Handle redirects
1494  */
1495 struct ip6rd_flowi {
1496 	struct flowi6 fl6;
1497 	struct in6_addr gateway;
1498 };
1499 
1500 static struct rt6_info *__ip6_route_redirect(struct net *net,
1501 					     struct fib6_table *table,
1502 					     struct flowi6 *fl6,
1503 					     int flags)
1504 {
1505 	struct ip6rd_flowi *rdfl = (struct ip6rd_flowi *)fl6;
1506 	struct rt6_info *rt;
1507 	struct fib6_node *fn;
1508 
1509 	/*
1510 	 * Get the "current" route for this destination and
1511 	 * check if the redirect has come from approriate router.
1512 	 *
1513 	 * RFC 2461 specifies that redirects should only be
1514 	 * accepted if they come from the nexthop to the target.
1515 	 * Due to the way the routes are chosen, this notion
1516 	 * is a bit fuzzy and one might need to check all possible
1517 	 * routes.
1518 	 */
1519 
1520 	read_lock_bh(&table->tb6_lock);
1521 	fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
1522 restart:
1523 	for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
1524 		/*
1525 		 * Current route is on-link; redirect is always invalid.
1526 		 *
1527 		 * Seems, previous statement is not true. It could
1528 		 * be node, which looks for us as on-link (f.e. proxy ndisc)
1529 		 * But then router serving it might decide, that we should
1530 		 * know truth 8)8) --ANK (980726).
1531 		 */
1532 		if (rt6_check_expired(rt))
1533 			continue;
1534 		if (!(rt->rt6i_flags & RTF_GATEWAY))
1535 			continue;
1536 		if (fl6->flowi6_oif != rt->rt6i_dev->ifindex)
1537 			continue;
1538 		if (!ipv6_addr_equal(&rdfl->gateway, &rt->rt6i_gateway))
1539 			continue;
1540 		break;
1541 	}
1542 
1543 	if (!rt)
1544 		rt = net->ipv6.ip6_null_entry;
1545 	BACKTRACK(net, &fl6->saddr);
1546 out:
1547 	dst_hold(&rt->dst);
1548 
1549 	read_unlock_bh(&table->tb6_lock);
1550 
1551 	return rt;
1552 };
1553 
1554 static struct rt6_info *ip6_route_redirect(const struct in6_addr *dest,
1555 					   const struct in6_addr *src,
1556 					   const struct in6_addr *gateway,
1557 					   struct net_device *dev)
1558 {
1559 	int flags = RT6_LOOKUP_F_HAS_SADDR;
1560 	struct net *net = dev_net(dev);
1561 	struct ip6rd_flowi rdfl = {
1562 		.fl6 = {
1563 			.flowi6_oif = dev->ifindex,
1564 			.daddr = *dest,
1565 			.saddr = *src,
1566 		},
1567 	};
1568 
1569 	ipv6_addr_copy(&rdfl.gateway, gateway);
1570 
1571 	if (rt6_need_strict(dest))
1572 		flags |= RT6_LOOKUP_F_IFACE;
1573 
1574 	return (struct rt6_info *)fib6_rule_lookup(net, &rdfl.fl6,
1575 						   flags, __ip6_route_redirect);
1576 }
1577 
1578 void rt6_redirect(const struct in6_addr *dest, const struct in6_addr *src,
1579 		  const struct in6_addr *saddr,
1580 		  struct neighbour *neigh, u8 *lladdr, int on_link)
1581 {
1582 	struct rt6_info *rt, *nrt = NULL;
1583 	struct netevent_redirect netevent;
1584 	struct net *net = dev_net(neigh->dev);
1585 
1586 	rt = ip6_route_redirect(dest, src, saddr, neigh->dev);
1587 
1588 	if (rt == net->ipv6.ip6_null_entry) {
1589 		if (net_ratelimit())
1590 			printk(KERN_DEBUG "rt6_redirect: source isn't a valid nexthop "
1591 			       "for redirect target\n");
1592 		goto out;
1593 	}
1594 
1595 	/*
1596 	 *	We have finally decided to accept it.
1597 	 */
1598 
1599 	neigh_update(neigh, lladdr, NUD_STALE,
1600 		     NEIGH_UPDATE_F_WEAK_OVERRIDE|
1601 		     NEIGH_UPDATE_F_OVERRIDE|
1602 		     (on_link ? 0 : (NEIGH_UPDATE_F_OVERRIDE_ISROUTER|
1603 				     NEIGH_UPDATE_F_ISROUTER))
1604 		     );
1605 
1606 	/*
1607 	 * Redirect received -> path was valid.
1608 	 * Look, redirects are sent only in response to data packets,
1609 	 * so that this nexthop apparently is reachable. --ANK
1610 	 */
1611 	dst_confirm(&rt->dst);
1612 
1613 	/* Duplicate redirect: silently ignore. */
1614 	if (neigh == dst_get_neighbour_raw(&rt->dst))
1615 		goto out;
1616 
1617 	nrt = ip6_rt_copy(rt, dest);
1618 	if (nrt == NULL)
1619 		goto out;
1620 
1621 	nrt->rt6i_flags = RTF_GATEWAY|RTF_UP|RTF_DYNAMIC|RTF_CACHE;
1622 	if (on_link)
1623 		nrt->rt6i_flags &= ~RTF_GATEWAY;
1624 
1625 	ipv6_addr_copy(&nrt->rt6i_gateway, (struct in6_addr*)neigh->primary_key);
1626 	dst_set_neighbour(&nrt->dst, neigh_clone(neigh));
1627 
1628 	if (ip6_ins_rt(nrt))
1629 		goto out;
1630 
1631 	netevent.old = &rt->dst;
1632 	netevent.new = &nrt->dst;
1633 	call_netevent_notifiers(NETEVENT_REDIRECT, &netevent);
1634 
1635 	if (rt->rt6i_flags&RTF_CACHE) {
1636 		ip6_del_rt(rt);
1637 		return;
1638 	}
1639 
1640 out:
1641 	dst_release(&rt->dst);
1642 }
1643 
1644 /*
1645  *	Handle ICMP "packet too big" messages
1646  *	i.e. Path MTU discovery
1647  */
1648 
1649 static void rt6_do_pmtu_disc(const struct in6_addr *daddr, const struct in6_addr *saddr,
1650 			     struct net *net, u32 pmtu, int ifindex)
1651 {
1652 	struct rt6_info *rt, *nrt;
1653 	int allfrag = 0;
1654 again:
1655 	rt = rt6_lookup(net, daddr, saddr, ifindex, 0);
1656 	if (rt == NULL)
1657 		return;
1658 
1659 	if (rt6_check_expired(rt)) {
1660 		ip6_del_rt(rt);
1661 		goto again;
1662 	}
1663 
1664 	if (pmtu >= dst_mtu(&rt->dst))
1665 		goto out;
1666 
1667 	if (pmtu < IPV6_MIN_MTU) {
1668 		/*
1669 		 * According to RFC2460, PMTU is set to the IPv6 Minimum Link
1670 		 * MTU (1280) and a fragment header should always be included
1671 		 * after a node receiving Too Big message reporting PMTU is
1672 		 * less than the IPv6 Minimum Link MTU.
1673 		 */
1674 		pmtu = IPV6_MIN_MTU;
1675 		allfrag = 1;
1676 	}
1677 
1678 	/* New mtu received -> path was valid.
1679 	   They are sent only in response to data packets,
1680 	   so that this nexthop apparently is reachable. --ANK
1681 	 */
1682 	dst_confirm(&rt->dst);
1683 
1684 	/* Host route. If it is static, it would be better
1685 	   not to override it, but add new one, so that
1686 	   when cache entry will expire old pmtu
1687 	   would return automatically.
1688 	 */
1689 	if (rt->rt6i_flags & RTF_CACHE) {
1690 		dst_metric_set(&rt->dst, RTAX_MTU, pmtu);
1691 		if (allfrag) {
1692 			u32 features = dst_metric(&rt->dst, RTAX_FEATURES);
1693 			features |= RTAX_FEATURE_ALLFRAG;
1694 			dst_metric_set(&rt->dst, RTAX_FEATURES, features);
1695 		}
1696 		dst_set_expires(&rt->dst, net->ipv6.sysctl.ip6_rt_mtu_expires);
1697 		rt->rt6i_flags |= RTF_MODIFIED|RTF_EXPIRES;
1698 		goto out;
1699 	}
1700 
1701 	/* Network route.
1702 	   Two cases are possible:
1703 	   1. It is connected route. Action: COW
1704 	   2. It is gatewayed route or NONEXTHOP route. Action: clone it.
1705 	 */
1706 	if (!dst_get_neighbour_raw(&rt->dst) && !(rt->rt6i_flags & RTF_NONEXTHOP))
1707 		nrt = rt6_alloc_cow(rt, daddr, saddr);
1708 	else
1709 		nrt = rt6_alloc_clone(rt, daddr);
1710 
1711 	if (nrt) {
1712 		dst_metric_set(&nrt->dst, RTAX_MTU, pmtu);
1713 		if (allfrag) {
1714 			u32 features = dst_metric(&nrt->dst, RTAX_FEATURES);
1715 			features |= RTAX_FEATURE_ALLFRAG;
1716 			dst_metric_set(&nrt->dst, RTAX_FEATURES, features);
1717 		}
1718 
1719 		/* According to RFC 1981, detecting PMTU increase shouldn't be
1720 		 * happened within 5 mins, the recommended timer is 10 mins.
1721 		 * Here this route expiration time is set to ip6_rt_mtu_expires
1722 		 * which is 10 mins. After 10 mins the decreased pmtu is expired
1723 		 * and detecting PMTU increase will be automatically happened.
1724 		 */
1725 		dst_set_expires(&nrt->dst, net->ipv6.sysctl.ip6_rt_mtu_expires);
1726 		nrt->rt6i_flags |= RTF_DYNAMIC|RTF_EXPIRES;
1727 
1728 		ip6_ins_rt(nrt);
1729 	}
1730 out:
1731 	dst_release(&rt->dst);
1732 }
1733 
1734 void rt6_pmtu_discovery(const struct in6_addr *daddr, const struct in6_addr *saddr,
1735 			struct net_device *dev, u32 pmtu)
1736 {
1737 	struct net *net = dev_net(dev);
1738 
1739 	/*
1740 	 * RFC 1981 states that a node "MUST reduce the size of the packets it
1741 	 * is sending along the path" that caused the Packet Too Big message.
1742 	 * Since it's not possible in the general case to determine which
1743 	 * interface was used to send the original packet, we update the MTU
1744 	 * on the interface that will be used to send future packets. We also
1745 	 * update the MTU on the interface that received the Packet Too Big in
1746 	 * case the original packet was forced out that interface with
1747 	 * SO_BINDTODEVICE or similar. This is the next best thing to the
1748 	 * correct behaviour, which would be to update the MTU on all
1749 	 * interfaces.
1750 	 */
1751 	rt6_do_pmtu_disc(daddr, saddr, net, pmtu, 0);
1752 	rt6_do_pmtu_disc(daddr, saddr, net, pmtu, dev->ifindex);
1753 }
1754 
1755 /*
1756  *	Misc support functions
1757  */
1758 
1759 static struct rt6_info *ip6_rt_copy(const struct rt6_info *ort,
1760 				    const struct in6_addr *dest)
1761 {
1762 	struct net *net = dev_net(ort->rt6i_dev);
1763 	struct rt6_info *rt = ip6_dst_alloc(&net->ipv6.ip6_dst_ops,
1764 					    ort->dst.dev, 0);
1765 
1766 	if (rt) {
1767 		rt->dst.input = ort->dst.input;
1768 		rt->dst.output = ort->dst.output;
1769 		rt->dst.flags |= DST_HOST;
1770 
1771 		ipv6_addr_copy(&rt->rt6i_dst.addr, dest);
1772 		rt->rt6i_dst.plen = 128;
1773 		dst_copy_metrics(&rt->dst, &ort->dst);
1774 		rt->dst.error = ort->dst.error;
1775 		rt->rt6i_idev = ort->rt6i_idev;
1776 		if (rt->rt6i_idev)
1777 			in6_dev_hold(rt->rt6i_idev);
1778 		rt->dst.lastuse = jiffies;
1779 		rt->rt6i_expires = 0;
1780 
1781 		ipv6_addr_copy(&rt->rt6i_gateway, &ort->rt6i_gateway);
1782 		rt->rt6i_flags = ort->rt6i_flags & ~RTF_EXPIRES;
1783 		rt->rt6i_metric = 0;
1784 
1785 #ifdef CONFIG_IPV6_SUBTREES
1786 		memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
1787 #endif
1788 		memcpy(&rt->rt6i_prefsrc, &ort->rt6i_prefsrc, sizeof(struct rt6key));
1789 		rt->rt6i_table = ort->rt6i_table;
1790 	}
1791 	return rt;
1792 }
1793 
1794 #ifdef CONFIG_IPV6_ROUTE_INFO
1795 static struct rt6_info *rt6_get_route_info(struct net *net,
1796 					   const struct in6_addr *prefix, int prefixlen,
1797 					   const struct in6_addr *gwaddr, int ifindex)
1798 {
1799 	struct fib6_node *fn;
1800 	struct rt6_info *rt = NULL;
1801 	struct fib6_table *table;
1802 
1803 	table = fib6_get_table(net, RT6_TABLE_INFO);
1804 	if (table == NULL)
1805 		return NULL;
1806 
1807 	write_lock_bh(&table->tb6_lock);
1808 	fn = fib6_locate(&table->tb6_root, prefix ,prefixlen, NULL, 0);
1809 	if (!fn)
1810 		goto out;
1811 
1812 	for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
1813 		if (rt->rt6i_dev->ifindex != ifindex)
1814 			continue;
1815 		if ((rt->rt6i_flags & (RTF_ROUTEINFO|RTF_GATEWAY)) != (RTF_ROUTEINFO|RTF_GATEWAY))
1816 			continue;
1817 		if (!ipv6_addr_equal(&rt->rt6i_gateway, gwaddr))
1818 			continue;
1819 		dst_hold(&rt->dst);
1820 		break;
1821 	}
1822 out:
1823 	write_unlock_bh(&table->tb6_lock);
1824 	return rt;
1825 }
1826 
1827 static struct rt6_info *rt6_add_route_info(struct net *net,
1828 					   const struct in6_addr *prefix, int prefixlen,
1829 					   const struct in6_addr *gwaddr, int ifindex,
1830 					   unsigned pref)
1831 {
1832 	struct fib6_config cfg = {
1833 		.fc_table	= RT6_TABLE_INFO,
1834 		.fc_metric	= IP6_RT_PRIO_USER,
1835 		.fc_ifindex	= ifindex,
1836 		.fc_dst_len	= prefixlen,
1837 		.fc_flags	= RTF_GATEWAY | RTF_ADDRCONF | RTF_ROUTEINFO |
1838 				  RTF_UP | RTF_PREF(pref),
1839 		.fc_nlinfo.pid = 0,
1840 		.fc_nlinfo.nlh = NULL,
1841 		.fc_nlinfo.nl_net = net,
1842 	};
1843 
1844 	ipv6_addr_copy(&cfg.fc_dst, prefix);
1845 	ipv6_addr_copy(&cfg.fc_gateway, gwaddr);
1846 
1847 	/* We should treat it as a default route if prefix length is 0. */
1848 	if (!prefixlen)
1849 		cfg.fc_flags |= RTF_DEFAULT;
1850 
1851 	ip6_route_add(&cfg);
1852 
1853 	return rt6_get_route_info(net, prefix, prefixlen, gwaddr, ifindex);
1854 }
1855 #endif
1856 
1857 struct rt6_info *rt6_get_dflt_router(const struct in6_addr *addr, struct net_device *dev)
1858 {
1859 	struct rt6_info *rt;
1860 	struct fib6_table *table;
1861 
1862 	table = fib6_get_table(dev_net(dev), RT6_TABLE_DFLT);
1863 	if (table == NULL)
1864 		return NULL;
1865 
1866 	write_lock_bh(&table->tb6_lock);
1867 	for (rt = table->tb6_root.leaf; rt; rt=rt->dst.rt6_next) {
1868 		if (dev == rt->rt6i_dev &&
1869 		    ((rt->rt6i_flags & (RTF_ADDRCONF | RTF_DEFAULT)) == (RTF_ADDRCONF | RTF_DEFAULT)) &&
1870 		    ipv6_addr_equal(&rt->rt6i_gateway, addr))
1871 			break;
1872 	}
1873 	if (rt)
1874 		dst_hold(&rt->dst);
1875 	write_unlock_bh(&table->tb6_lock);
1876 	return rt;
1877 }
1878 
1879 struct rt6_info *rt6_add_dflt_router(const struct in6_addr *gwaddr,
1880 				     struct net_device *dev,
1881 				     unsigned int pref)
1882 {
1883 	struct fib6_config cfg = {
1884 		.fc_table	= RT6_TABLE_DFLT,
1885 		.fc_metric	= IP6_RT_PRIO_USER,
1886 		.fc_ifindex	= dev->ifindex,
1887 		.fc_flags	= RTF_GATEWAY | RTF_ADDRCONF | RTF_DEFAULT |
1888 				  RTF_UP | RTF_EXPIRES | RTF_PREF(pref),
1889 		.fc_nlinfo.pid = 0,
1890 		.fc_nlinfo.nlh = NULL,
1891 		.fc_nlinfo.nl_net = dev_net(dev),
1892 	};
1893 
1894 	ipv6_addr_copy(&cfg.fc_gateway, gwaddr);
1895 
1896 	ip6_route_add(&cfg);
1897 
1898 	return rt6_get_dflt_router(gwaddr, dev);
1899 }
1900 
1901 void rt6_purge_dflt_routers(struct net *net)
1902 {
1903 	struct rt6_info *rt;
1904 	struct fib6_table *table;
1905 
1906 	/* NOTE: Keep consistent with rt6_get_dflt_router */
1907 	table = fib6_get_table(net, RT6_TABLE_DFLT);
1908 	if (table == NULL)
1909 		return;
1910 
1911 restart:
1912 	read_lock_bh(&table->tb6_lock);
1913 	for (rt = table->tb6_root.leaf; rt; rt = rt->dst.rt6_next) {
1914 		if (rt->rt6i_flags & (RTF_DEFAULT | RTF_ADDRCONF)) {
1915 			dst_hold(&rt->dst);
1916 			read_unlock_bh(&table->tb6_lock);
1917 			ip6_del_rt(rt);
1918 			goto restart;
1919 		}
1920 	}
1921 	read_unlock_bh(&table->tb6_lock);
1922 }
1923 
1924 static void rtmsg_to_fib6_config(struct net *net,
1925 				 struct in6_rtmsg *rtmsg,
1926 				 struct fib6_config *cfg)
1927 {
1928 	memset(cfg, 0, sizeof(*cfg));
1929 
1930 	cfg->fc_table = RT6_TABLE_MAIN;
1931 	cfg->fc_ifindex = rtmsg->rtmsg_ifindex;
1932 	cfg->fc_metric = rtmsg->rtmsg_metric;
1933 	cfg->fc_expires = rtmsg->rtmsg_info;
1934 	cfg->fc_dst_len = rtmsg->rtmsg_dst_len;
1935 	cfg->fc_src_len = rtmsg->rtmsg_src_len;
1936 	cfg->fc_flags = rtmsg->rtmsg_flags;
1937 
1938 	cfg->fc_nlinfo.nl_net = net;
1939 
1940 	ipv6_addr_copy(&cfg->fc_dst, &rtmsg->rtmsg_dst);
1941 	ipv6_addr_copy(&cfg->fc_src, &rtmsg->rtmsg_src);
1942 	ipv6_addr_copy(&cfg->fc_gateway, &rtmsg->rtmsg_gateway);
1943 }
1944 
1945 int ipv6_route_ioctl(struct net *net, unsigned int cmd, void __user *arg)
1946 {
1947 	struct fib6_config cfg;
1948 	struct in6_rtmsg rtmsg;
1949 	int err;
1950 
1951 	switch(cmd) {
1952 	case SIOCADDRT:		/* Add a route */
1953 	case SIOCDELRT:		/* Delete a route */
1954 		if (!capable(CAP_NET_ADMIN))
1955 			return -EPERM;
1956 		err = copy_from_user(&rtmsg, arg,
1957 				     sizeof(struct in6_rtmsg));
1958 		if (err)
1959 			return -EFAULT;
1960 
1961 		rtmsg_to_fib6_config(net, &rtmsg, &cfg);
1962 
1963 		rtnl_lock();
1964 		switch (cmd) {
1965 		case SIOCADDRT:
1966 			err = ip6_route_add(&cfg);
1967 			break;
1968 		case SIOCDELRT:
1969 			err = ip6_route_del(&cfg);
1970 			break;
1971 		default:
1972 			err = -EINVAL;
1973 		}
1974 		rtnl_unlock();
1975 
1976 		return err;
1977 	}
1978 
1979 	return -EINVAL;
1980 }
1981 
1982 /*
1983  *	Drop the packet on the floor
1984  */
1985 
1986 static int ip6_pkt_drop(struct sk_buff *skb, u8 code, int ipstats_mib_noroutes)
1987 {
1988 	int type;
1989 	struct dst_entry *dst = skb_dst(skb);
1990 	switch (ipstats_mib_noroutes) {
1991 	case IPSTATS_MIB_INNOROUTES:
1992 		type = ipv6_addr_type(&ipv6_hdr(skb)->daddr);
1993 		if (type == IPV6_ADDR_ANY) {
1994 			IP6_INC_STATS(dev_net(dst->dev), ip6_dst_idev(dst),
1995 				      IPSTATS_MIB_INADDRERRORS);
1996 			break;
1997 		}
1998 		/* FALLTHROUGH */
1999 	case IPSTATS_MIB_OUTNOROUTES:
2000 		IP6_INC_STATS(dev_net(dst->dev), ip6_dst_idev(dst),
2001 			      ipstats_mib_noroutes);
2002 		break;
2003 	}
2004 	icmpv6_send(skb, ICMPV6_DEST_UNREACH, code, 0);
2005 	kfree_skb(skb);
2006 	return 0;
2007 }
2008 
2009 static int ip6_pkt_discard(struct sk_buff *skb)
2010 {
2011 	return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_INNOROUTES);
2012 }
2013 
2014 static int ip6_pkt_discard_out(struct sk_buff *skb)
2015 {
2016 	skb->dev = skb_dst(skb)->dev;
2017 	return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_OUTNOROUTES);
2018 }
2019 
2020 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
2021 
2022 static int ip6_pkt_prohibit(struct sk_buff *skb)
2023 {
2024 	return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_INNOROUTES);
2025 }
2026 
2027 static int ip6_pkt_prohibit_out(struct sk_buff *skb)
2028 {
2029 	skb->dev = skb_dst(skb)->dev;
2030 	return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_OUTNOROUTES);
2031 }
2032 
2033 #endif
2034 
2035 /*
2036  *	Allocate a dst for local (unicast / anycast) address.
2037  */
2038 
2039 struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev,
2040 				    const struct in6_addr *addr,
2041 				    int anycast)
2042 {
2043 	struct net *net = dev_net(idev->dev);
2044 	struct rt6_info *rt = ip6_dst_alloc(&net->ipv6.ip6_dst_ops,
2045 					    net->loopback_dev, 0);
2046 	struct neighbour *neigh;
2047 
2048 	if (rt == NULL) {
2049 		if (net_ratelimit())
2050 			pr_warning("IPv6:  Maximum number of routes reached,"
2051 				   " consider increasing route/max_size.\n");
2052 		return ERR_PTR(-ENOMEM);
2053 	}
2054 
2055 	in6_dev_hold(idev);
2056 
2057 	rt->dst.flags |= DST_HOST;
2058 	rt->dst.input = ip6_input;
2059 	rt->dst.output = ip6_output;
2060 	rt->rt6i_idev = idev;
2061 	rt->dst.obsolete = -1;
2062 
2063 	rt->rt6i_flags = RTF_UP | RTF_NONEXTHOP;
2064 	if (anycast)
2065 		rt->rt6i_flags |= RTF_ANYCAST;
2066 	else
2067 		rt->rt6i_flags |= RTF_LOCAL;
2068 	neigh = ndisc_get_neigh(rt->rt6i_dev, &rt->rt6i_gateway);
2069 	if (IS_ERR(neigh)) {
2070 		dst_free(&rt->dst);
2071 
2072 		return ERR_CAST(neigh);
2073 	}
2074 	dst_set_neighbour(&rt->dst, neigh);
2075 
2076 	ipv6_addr_copy(&rt->rt6i_dst.addr, addr);
2077 	rt->rt6i_dst.plen = 128;
2078 	rt->rt6i_table = fib6_get_table(net, RT6_TABLE_LOCAL);
2079 
2080 	atomic_set(&rt->dst.__refcnt, 1);
2081 
2082 	return rt;
2083 }
2084 
2085 int ip6_route_get_saddr(struct net *net,
2086 			struct rt6_info *rt,
2087 			const struct in6_addr *daddr,
2088 			unsigned int prefs,
2089 			struct in6_addr *saddr)
2090 {
2091 	struct inet6_dev *idev = ip6_dst_idev((struct dst_entry*)rt);
2092 	int err = 0;
2093 	if (rt->rt6i_prefsrc.plen)
2094 		ipv6_addr_copy(saddr, &rt->rt6i_prefsrc.addr);
2095 	else
2096 		err = ipv6_dev_get_saddr(net, idev ? idev->dev : NULL,
2097 					 daddr, prefs, saddr);
2098 	return err;
2099 }
2100 
2101 /* remove deleted ip from prefsrc entries */
2102 struct arg_dev_net_ip {
2103 	struct net_device *dev;
2104 	struct net *net;
2105 	struct in6_addr *addr;
2106 };
2107 
2108 static int fib6_remove_prefsrc(struct rt6_info *rt, void *arg)
2109 {
2110 	struct net_device *dev = ((struct arg_dev_net_ip *)arg)->dev;
2111 	struct net *net = ((struct arg_dev_net_ip *)arg)->net;
2112 	struct in6_addr *addr = ((struct arg_dev_net_ip *)arg)->addr;
2113 
2114 	if (((void *)rt->rt6i_dev == dev || dev == NULL) &&
2115 	    rt != net->ipv6.ip6_null_entry &&
2116 	    ipv6_addr_equal(addr, &rt->rt6i_prefsrc.addr)) {
2117 		/* remove prefsrc entry */
2118 		rt->rt6i_prefsrc.plen = 0;
2119 	}
2120 	return 0;
2121 }
2122 
2123 void rt6_remove_prefsrc(struct inet6_ifaddr *ifp)
2124 {
2125 	struct net *net = dev_net(ifp->idev->dev);
2126 	struct arg_dev_net_ip adni = {
2127 		.dev = ifp->idev->dev,
2128 		.net = net,
2129 		.addr = &ifp->addr,
2130 	};
2131 	fib6_clean_all(net, fib6_remove_prefsrc, 0, &adni);
2132 }
2133 
2134 struct arg_dev_net {
2135 	struct net_device *dev;
2136 	struct net *net;
2137 };
2138 
2139 static int fib6_ifdown(struct rt6_info *rt, void *arg)
2140 {
2141 	const struct arg_dev_net *adn = arg;
2142 	const struct net_device *dev = adn->dev;
2143 
2144 	if ((rt->rt6i_dev == dev || dev == NULL) &&
2145 	    rt != adn->net->ipv6.ip6_null_entry) {
2146 		RT6_TRACE("deleted by ifdown %p\n", rt);
2147 		return -1;
2148 	}
2149 	return 0;
2150 }
2151 
2152 void rt6_ifdown(struct net *net, struct net_device *dev)
2153 {
2154 	struct arg_dev_net adn = {
2155 		.dev = dev,
2156 		.net = net,
2157 	};
2158 
2159 	fib6_clean_all(net, fib6_ifdown, 0, &adn);
2160 	icmp6_clean_all(fib6_ifdown, &adn);
2161 }
2162 
2163 struct rt6_mtu_change_arg
2164 {
2165 	struct net_device *dev;
2166 	unsigned mtu;
2167 };
2168 
2169 static int rt6_mtu_change_route(struct rt6_info *rt, void *p_arg)
2170 {
2171 	struct rt6_mtu_change_arg *arg = (struct rt6_mtu_change_arg *) p_arg;
2172 	struct inet6_dev *idev;
2173 
2174 	/* In IPv6 pmtu discovery is not optional,
2175 	   so that RTAX_MTU lock cannot disable it.
2176 	   We still use this lock to block changes
2177 	   caused by addrconf/ndisc.
2178 	*/
2179 
2180 	idev = __in6_dev_get(arg->dev);
2181 	if (idev == NULL)
2182 		return 0;
2183 
2184 	/* For administrative MTU increase, there is no way to discover
2185 	   IPv6 PMTU increase, so PMTU increase should be updated here.
2186 	   Since RFC 1981 doesn't include administrative MTU increase
2187 	   update PMTU increase is a MUST. (i.e. jumbo frame)
2188 	 */
2189 	/*
2190 	   If new MTU is less than route PMTU, this new MTU will be the
2191 	   lowest MTU in the path, update the route PMTU to reflect PMTU
2192 	   decreases; if new MTU is greater than route PMTU, and the
2193 	   old MTU is the lowest MTU in the path, update the route PMTU
2194 	   to reflect the increase. In this case if the other nodes' MTU
2195 	   also have the lowest MTU, TOO BIG MESSAGE will be lead to
2196 	   PMTU discouvery.
2197 	 */
2198 	if (rt->rt6i_dev == arg->dev &&
2199 	    !dst_metric_locked(&rt->dst, RTAX_MTU) &&
2200 	    (dst_mtu(&rt->dst) >= arg->mtu ||
2201 	     (dst_mtu(&rt->dst) < arg->mtu &&
2202 	      dst_mtu(&rt->dst) == idev->cnf.mtu6))) {
2203 		dst_metric_set(&rt->dst, RTAX_MTU, arg->mtu);
2204 	}
2205 	return 0;
2206 }
2207 
2208 void rt6_mtu_change(struct net_device *dev, unsigned mtu)
2209 {
2210 	struct rt6_mtu_change_arg arg = {
2211 		.dev = dev,
2212 		.mtu = mtu,
2213 	};
2214 
2215 	fib6_clean_all(dev_net(dev), rt6_mtu_change_route, 0, &arg);
2216 }
2217 
2218 static const struct nla_policy rtm_ipv6_policy[RTA_MAX+1] = {
2219 	[RTA_GATEWAY]           = { .len = sizeof(struct in6_addr) },
2220 	[RTA_OIF]               = { .type = NLA_U32 },
2221 	[RTA_IIF]		= { .type = NLA_U32 },
2222 	[RTA_PRIORITY]          = { .type = NLA_U32 },
2223 	[RTA_METRICS]           = { .type = NLA_NESTED },
2224 };
2225 
2226 static int rtm_to_fib6_config(struct sk_buff *skb, struct nlmsghdr *nlh,
2227 			      struct fib6_config *cfg)
2228 {
2229 	struct rtmsg *rtm;
2230 	struct nlattr *tb[RTA_MAX+1];
2231 	int err;
2232 
2233 	err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy);
2234 	if (err < 0)
2235 		goto errout;
2236 
2237 	err = -EINVAL;
2238 	rtm = nlmsg_data(nlh);
2239 	memset(cfg, 0, sizeof(*cfg));
2240 
2241 	cfg->fc_table = rtm->rtm_table;
2242 	cfg->fc_dst_len = rtm->rtm_dst_len;
2243 	cfg->fc_src_len = rtm->rtm_src_len;
2244 	cfg->fc_flags = RTF_UP;
2245 	cfg->fc_protocol = rtm->rtm_protocol;
2246 
2247 	if (rtm->rtm_type == RTN_UNREACHABLE)
2248 		cfg->fc_flags |= RTF_REJECT;
2249 
2250 	if (rtm->rtm_type == RTN_LOCAL)
2251 		cfg->fc_flags |= RTF_LOCAL;
2252 
2253 	cfg->fc_nlinfo.pid = NETLINK_CB(skb).pid;
2254 	cfg->fc_nlinfo.nlh = nlh;
2255 	cfg->fc_nlinfo.nl_net = sock_net(skb->sk);
2256 
2257 	if (tb[RTA_GATEWAY]) {
2258 		nla_memcpy(&cfg->fc_gateway, tb[RTA_GATEWAY], 16);
2259 		cfg->fc_flags |= RTF_GATEWAY;
2260 	}
2261 
2262 	if (tb[RTA_DST]) {
2263 		int plen = (rtm->rtm_dst_len + 7) >> 3;
2264 
2265 		if (nla_len(tb[RTA_DST]) < plen)
2266 			goto errout;
2267 
2268 		nla_memcpy(&cfg->fc_dst, tb[RTA_DST], plen);
2269 	}
2270 
2271 	if (tb[RTA_SRC]) {
2272 		int plen = (rtm->rtm_src_len + 7) >> 3;
2273 
2274 		if (nla_len(tb[RTA_SRC]) < plen)
2275 			goto errout;
2276 
2277 		nla_memcpy(&cfg->fc_src, tb[RTA_SRC], plen);
2278 	}
2279 
2280 	if (tb[RTA_PREFSRC])
2281 		nla_memcpy(&cfg->fc_prefsrc, tb[RTA_PREFSRC], 16);
2282 
2283 	if (tb[RTA_OIF])
2284 		cfg->fc_ifindex = nla_get_u32(tb[RTA_OIF]);
2285 
2286 	if (tb[RTA_PRIORITY])
2287 		cfg->fc_metric = nla_get_u32(tb[RTA_PRIORITY]);
2288 
2289 	if (tb[RTA_METRICS]) {
2290 		cfg->fc_mx = nla_data(tb[RTA_METRICS]);
2291 		cfg->fc_mx_len = nla_len(tb[RTA_METRICS]);
2292 	}
2293 
2294 	if (tb[RTA_TABLE])
2295 		cfg->fc_table = nla_get_u32(tb[RTA_TABLE]);
2296 
2297 	err = 0;
2298 errout:
2299 	return err;
2300 }
2301 
2302 static int inet6_rtm_delroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
2303 {
2304 	struct fib6_config cfg;
2305 	int err;
2306 
2307 	err = rtm_to_fib6_config(skb, nlh, &cfg);
2308 	if (err < 0)
2309 		return err;
2310 
2311 	return ip6_route_del(&cfg);
2312 }
2313 
2314 static int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
2315 {
2316 	struct fib6_config cfg;
2317 	int err;
2318 
2319 	err = rtm_to_fib6_config(skb, nlh, &cfg);
2320 	if (err < 0)
2321 		return err;
2322 
2323 	return ip6_route_add(&cfg);
2324 }
2325 
2326 static inline size_t rt6_nlmsg_size(void)
2327 {
2328 	return NLMSG_ALIGN(sizeof(struct rtmsg))
2329 	       + nla_total_size(16) /* RTA_SRC */
2330 	       + nla_total_size(16) /* RTA_DST */
2331 	       + nla_total_size(16) /* RTA_GATEWAY */
2332 	       + nla_total_size(16) /* RTA_PREFSRC */
2333 	       + nla_total_size(4) /* RTA_TABLE */
2334 	       + nla_total_size(4) /* RTA_IIF */
2335 	       + nla_total_size(4) /* RTA_OIF */
2336 	       + nla_total_size(4) /* RTA_PRIORITY */
2337 	       + RTAX_MAX * nla_total_size(4) /* RTA_METRICS */
2338 	       + nla_total_size(sizeof(struct rta_cacheinfo));
2339 }
2340 
2341 static int rt6_fill_node(struct net *net,
2342 			 struct sk_buff *skb, struct rt6_info *rt,
2343 			 struct in6_addr *dst, struct in6_addr *src,
2344 			 int iif, int type, u32 pid, u32 seq,
2345 			 int prefix, int nowait, unsigned int flags)
2346 {
2347 	struct rtmsg *rtm;
2348 	struct nlmsghdr *nlh;
2349 	long expires;
2350 	u32 table;
2351 	struct neighbour *n;
2352 
2353 	if (prefix) {	/* user wants prefix routes only */
2354 		if (!(rt->rt6i_flags & RTF_PREFIX_RT)) {
2355 			/* success since this is not a prefix route */
2356 			return 1;
2357 		}
2358 	}
2359 
2360 	nlh = nlmsg_put(skb, pid, seq, type, sizeof(*rtm), flags);
2361 	if (nlh == NULL)
2362 		return -EMSGSIZE;
2363 
2364 	rtm = nlmsg_data(nlh);
2365 	rtm->rtm_family = AF_INET6;
2366 	rtm->rtm_dst_len = rt->rt6i_dst.plen;
2367 	rtm->rtm_src_len = rt->rt6i_src.plen;
2368 	rtm->rtm_tos = 0;
2369 	if (rt->rt6i_table)
2370 		table = rt->rt6i_table->tb6_id;
2371 	else
2372 		table = RT6_TABLE_UNSPEC;
2373 	rtm->rtm_table = table;
2374 	NLA_PUT_U32(skb, RTA_TABLE, table);
2375 	if (rt->rt6i_flags&RTF_REJECT)
2376 		rtm->rtm_type = RTN_UNREACHABLE;
2377 	else if (rt->rt6i_flags&RTF_LOCAL)
2378 		rtm->rtm_type = RTN_LOCAL;
2379 	else if (rt->rt6i_dev && (rt->rt6i_dev->flags&IFF_LOOPBACK))
2380 		rtm->rtm_type = RTN_LOCAL;
2381 	else
2382 		rtm->rtm_type = RTN_UNICAST;
2383 	rtm->rtm_flags = 0;
2384 	rtm->rtm_scope = RT_SCOPE_UNIVERSE;
2385 	rtm->rtm_protocol = rt->rt6i_protocol;
2386 	if (rt->rt6i_flags&RTF_DYNAMIC)
2387 		rtm->rtm_protocol = RTPROT_REDIRECT;
2388 	else if (rt->rt6i_flags & RTF_ADDRCONF)
2389 		rtm->rtm_protocol = RTPROT_KERNEL;
2390 	else if (rt->rt6i_flags&RTF_DEFAULT)
2391 		rtm->rtm_protocol = RTPROT_RA;
2392 
2393 	if (rt->rt6i_flags&RTF_CACHE)
2394 		rtm->rtm_flags |= RTM_F_CLONED;
2395 
2396 	if (dst) {
2397 		NLA_PUT(skb, RTA_DST, 16, dst);
2398 		rtm->rtm_dst_len = 128;
2399 	} else if (rtm->rtm_dst_len)
2400 		NLA_PUT(skb, RTA_DST, 16, &rt->rt6i_dst.addr);
2401 #ifdef CONFIG_IPV6_SUBTREES
2402 	if (src) {
2403 		NLA_PUT(skb, RTA_SRC, 16, src);
2404 		rtm->rtm_src_len = 128;
2405 	} else if (rtm->rtm_src_len)
2406 		NLA_PUT(skb, RTA_SRC, 16, &rt->rt6i_src.addr);
2407 #endif
2408 	if (iif) {
2409 #ifdef CONFIG_IPV6_MROUTE
2410 		if (ipv6_addr_is_multicast(&rt->rt6i_dst.addr)) {
2411 			int err = ip6mr_get_route(net, skb, rtm, nowait);
2412 			if (err <= 0) {
2413 				if (!nowait) {
2414 					if (err == 0)
2415 						return 0;
2416 					goto nla_put_failure;
2417 				} else {
2418 					if (err == -EMSGSIZE)
2419 						goto nla_put_failure;
2420 				}
2421 			}
2422 		} else
2423 #endif
2424 			NLA_PUT_U32(skb, RTA_IIF, iif);
2425 	} else if (dst) {
2426 		struct in6_addr saddr_buf;
2427 		if (ip6_route_get_saddr(net, rt, dst, 0, &saddr_buf) == 0)
2428 			NLA_PUT(skb, RTA_PREFSRC, 16, &saddr_buf);
2429 	}
2430 
2431 	if (rt->rt6i_prefsrc.plen) {
2432 		struct in6_addr saddr_buf;
2433 		ipv6_addr_copy(&saddr_buf, &rt->rt6i_prefsrc.addr);
2434 		NLA_PUT(skb, RTA_PREFSRC, 16, &saddr_buf);
2435 	}
2436 
2437 	if (rtnetlink_put_metrics(skb, dst_metrics_ptr(&rt->dst)) < 0)
2438 		goto nla_put_failure;
2439 
2440 	rcu_read_lock();
2441 	n = dst_get_neighbour(&rt->dst);
2442 	if (n)
2443 		NLA_PUT(skb, RTA_GATEWAY, 16, &n->primary_key);
2444 	rcu_read_unlock();
2445 
2446 	if (rt->dst.dev)
2447 		NLA_PUT_U32(skb, RTA_OIF, rt->rt6i_dev->ifindex);
2448 
2449 	NLA_PUT_U32(skb, RTA_PRIORITY, rt->rt6i_metric);
2450 
2451 	if (!(rt->rt6i_flags & RTF_EXPIRES))
2452 		expires = 0;
2453 	else if (rt->rt6i_expires - jiffies < INT_MAX)
2454 		expires = rt->rt6i_expires - jiffies;
2455 	else
2456 		expires = INT_MAX;
2457 
2458 	if (rtnl_put_cacheinfo(skb, &rt->dst, 0, 0, 0,
2459 			       expires, rt->dst.error) < 0)
2460 		goto nla_put_failure;
2461 
2462 	return nlmsg_end(skb, nlh);
2463 
2464 nla_put_failure:
2465 	nlmsg_cancel(skb, nlh);
2466 	return -EMSGSIZE;
2467 }
2468 
2469 int rt6_dump_route(struct rt6_info *rt, void *p_arg)
2470 {
2471 	struct rt6_rtnl_dump_arg *arg = (struct rt6_rtnl_dump_arg *) p_arg;
2472 	int prefix;
2473 
2474 	if (nlmsg_len(arg->cb->nlh) >= sizeof(struct rtmsg)) {
2475 		struct rtmsg *rtm = nlmsg_data(arg->cb->nlh);
2476 		prefix = (rtm->rtm_flags & RTM_F_PREFIX) != 0;
2477 	} else
2478 		prefix = 0;
2479 
2480 	return rt6_fill_node(arg->net,
2481 		     arg->skb, rt, NULL, NULL, 0, RTM_NEWROUTE,
2482 		     NETLINK_CB(arg->cb->skb).pid, arg->cb->nlh->nlmsg_seq,
2483 		     prefix, 0, NLM_F_MULTI);
2484 }
2485 
2486 static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void *arg)
2487 {
2488 	struct net *net = sock_net(in_skb->sk);
2489 	struct nlattr *tb[RTA_MAX+1];
2490 	struct rt6_info *rt;
2491 	struct sk_buff *skb;
2492 	struct rtmsg *rtm;
2493 	struct flowi6 fl6;
2494 	int err, iif = 0;
2495 
2496 	err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy);
2497 	if (err < 0)
2498 		goto errout;
2499 
2500 	err = -EINVAL;
2501 	memset(&fl6, 0, sizeof(fl6));
2502 
2503 	if (tb[RTA_SRC]) {
2504 		if (nla_len(tb[RTA_SRC]) < sizeof(struct in6_addr))
2505 			goto errout;
2506 
2507 		ipv6_addr_copy(&fl6.saddr, nla_data(tb[RTA_SRC]));
2508 	}
2509 
2510 	if (tb[RTA_DST]) {
2511 		if (nla_len(tb[RTA_DST]) < sizeof(struct in6_addr))
2512 			goto errout;
2513 
2514 		ipv6_addr_copy(&fl6.daddr, nla_data(tb[RTA_DST]));
2515 	}
2516 
2517 	if (tb[RTA_IIF])
2518 		iif = nla_get_u32(tb[RTA_IIF]);
2519 
2520 	if (tb[RTA_OIF])
2521 		fl6.flowi6_oif = nla_get_u32(tb[RTA_OIF]);
2522 
2523 	if (iif) {
2524 		struct net_device *dev;
2525 		dev = __dev_get_by_index(net, iif);
2526 		if (!dev) {
2527 			err = -ENODEV;
2528 			goto errout;
2529 		}
2530 	}
2531 
2532 	skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
2533 	if (skb == NULL) {
2534 		err = -ENOBUFS;
2535 		goto errout;
2536 	}
2537 
2538 	/* Reserve room for dummy headers, this skb can pass
2539 	   through good chunk of routing engine.
2540 	 */
2541 	skb_reset_mac_header(skb);
2542 	skb_reserve(skb, MAX_HEADER + sizeof(struct ipv6hdr));
2543 
2544 	rt = (struct rt6_info*) ip6_route_output(net, NULL, &fl6);
2545 	skb_dst_set(skb, &rt->dst);
2546 
2547 	err = rt6_fill_node(net, skb, rt, &fl6.daddr, &fl6.saddr, iif,
2548 			    RTM_NEWROUTE, NETLINK_CB(in_skb).pid,
2549 			    nlh->nlmsg_seq, 0, 0, 0);
2550 	if (err < 0) {
2551 		kfree_skb(skb);
2552 		goto errout;
2553 	}
2554 
2555 	err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).pid);
2556 errout:
2557 	return err;
2558 }
2559 
2560 void inet6_rt_notify(int event, struct rt6_info *rt, struct nl_info *info)
2561 {
2562 	struct sk_buff *skb;
2563 	struct net *net = info->nl_net;
2564 	u32 seq;
2565 	int err;
2566 
2567 	err = -ENOBUFS;
2568 	seq = info->nlh != NULL ? info->nlh->nlmsg_seq : 0;
2569 
2570 	skb = nlmsg_new(rt6_nlmsg_size(), gfp_any());
2571 	if (skb == NULL)
2572 		goto errout;
2573 
2574 	err = rt6_fill_node(net, skb, rt, NULL, NULL, 0,
2575 				event, info->pid, seq, 0, 0, 0);
2576 	if (err < 0) {
2577 		/* -EMSGSIZE implies BUG in rt6_nlmsg_size() */
2578 		WARN_ON(err == -EMSGSIZE);
2579 		kfree_skb(skb);
2580 		goto errout;
2581 	}
2582 	rtnl_notify(skb, net, info->pid, RTNLGRP_IPV6_ROUTE,
2583 		    info->nlh, gfp_any());
2584 	return;
2585 errout:
2586 	if (err < 0)
2587 		rtnl_set_sk_err(net, RTNLGRP_IPV6_ROUTE, err);
2588 }
2589 
2590 static int ip6_route_dev_notify(struct notifier_block *this,
2591 				unsigned long event, void *data)
2592 {
2593 	struct net_device *dev = (struct net_device *)data;
2594 	struct net *net = dev_net(dev);
2595 
2596 	if (event == NETDEV_REGISTER && (dev->flags & IFF_LOOPBACK)) {
2597 		net->ipv6.ip6_null_entry->dst.dev = dev;
2598 		net->ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(dev);
2599 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
2600 		net->ipv6.ip6_prohibit_entry->dst.dev = dev;
2601 		net->ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(dev);
2602 		net->ipv6.ip6_blk_hole_entry->dst.dev = dev;
2603 		net->ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(dev);
2604 #endif
2605 	}
2606 
2607 	return NOTIFY_OK;
2608 }
2609 
2610 /*
2611  *	/proc
2612  */
2613 
2614 #ifdef CONFIG_PROC_FS
2615 
2616 struct rt6_proc_arg
2617 {
2618 	char *buffer;
2619 	int offset;
2620 	int length;
2621 	int skip;
2622 	int len;
2623 };
2624 
2625 static int rt6_info_route(struct rt6_info *rt, void *p_arg)
2626 {
2627 	struct seq_file *m = p_arg;
2628 	struct neighbour *n;
2629 
2630 	seq_printf(m, "%pi6 %02x ", &rt->rt6i_dst.addr, rt->rt6i_dst.plen);
2631 
2632 #ifdef CONFIG_IPV6_SUBTREES
2633 	seq_printf(m, "%pi6 %02x ", &rt->rt6i_src.addr, rt->rt6i_src.plen);
2634 #else
2635 	seq_puts(m, "00000000000000000000000000000000 00 ");
2636 #endif
2637 	rcu_read_lock();
2638 	n = dst_get_neighbour(&rt->dst);
2639 	if (n) {
2640 		seq_printf(m, "%pi6", n->primary_key);
2641 	} else {
2642 		seq_puts(m, "00000000000000000000000000000000");
2643 	}
2644 	rcu_read_unlock();
2645 	seq_printf(m, " %08x %08x %08x %08x %8s\n",
2646 		   rt->rt6i_metric, atomic_read(&rt->dst.__refcnt),
2647 		   rt->dst.__use, rt->rt6i_flags,
2648 		   rt->rt6i_dev ? rt->rt6i_dev->name : "");
2649 	return 0;
2650 }
2651 
2652 static int ipv6_route_show(struct seq_file *m, void *v)
2653 {
2654 	struct net *net = (struct net *)m->private;
2655 	fib6_clean_all(net, rt6_info_route, 0, m);
2656 	return 0;
2657 }
2658 
2659 static int ipv6_route_open(struct inode *inode, struct file *file)
2660 {
2661 	return single_open_net(inode, file, ipv6_route_show);
2662 }
2663 
2664 static const struct file_operations ipv6_route_proc_fops = {
2665 	.owner		= THIS_MODULE,
2666 	.open		= ipv6_route_open,
2667 	.read		= seq_read,
2668 	.llseek		= seq_lseek,
2669 	.release	= single_release_net,
2670 };
2671 
2672 static int rt6_stats_seq_show(struct seq_file *seq, void *v)
2673 {
2674 	struct net *net = (struct net *)seq->private;
2675 	seq_printf(seq, "%04x %04x %04x %04x %04x %04x %04x\n",
2676 		   net->ipv6.rt6_stats->fib_nodes,
2677 		   net->ipv6.rt6_stats->fib_route_nodes,
2678 		   net->ipv6.rt6_stats->fib_rt_alloc,
2679 		   net->ipv6.rt6_stats->fib_rt_entries,
2680 		   net->ipv6.rt6_stats->fib_rt_cache,
2681 		   dst_entries_get_slow(&net->ipv6.ip6_dst_ops),
2682 		   net->ipv6.rt6_stats->fib_discarded_routes);
2683 
2684 	return 0;
2685 }
2686 
2687 static int rt6_stats_seq_open(struct inode *inode, struct file *file)
2688 {
2689 	return single_open_net(inode, file, rt6_stats_seq_show);
2690 }
2691 
2692 static const struct file_operations rt6_stats_seq_fops = {
2693 	.owner	 = THIS_MODULE,
2694 	.open	 = rt6_stats_seq_open,
2695 	.read	 = seq_read,
2696 	.llseek	 = seq_lseek,
2697 	.release = single_release_net,
2698 };
2699 #endif	/* CONFIG_PROC_FS */
2700 
2701 #ifdef CONFIG_SYSCTL
2702 
2703 static
2704 int ipv6_sysctl_rtcache_flush(ctl_table *ctl, int write,
2705 			      void __user *buffer, size_t *lenp, loff_t *ppos)
2706 {
2707 	struct net *net;
2708 	int delay;
2709 	if (!write)
2710 		return -EINVAL;
2711 
2712 	net = (struct net *)ctl->extra1;
2713 	delay = net->ipv6.sysctl.flush_delay;
2714 	proc_dointvec(ctl, write, buffer, lenp, ppos);
2715 	fib6_run_gc(delay <= 0 ? ~0UL : (unsigned long)delay, net);
2716 	return 0;
2717 }
2718 
2719 ctl_table ipv6_route_table_template[] = {
2720 	{
2721 		.procname	=	"flush",
2722 		.data		=	&init_net.ipv6.sysctl.flush_delay,
2723 		.maxlen		=	sizeof(int),
2724 		.mode		=	0200,
2725 		.proc_handler	=	ipv6_sysctl_rtcache_flush
2726 	},
2727 	{
2728 		.procname	=	"gc_thresh",
2729 		.data		=	&ip6_dst_ops_template.gc_thresh,
2730 		.maxlen		=	sizeof(int),
2731 		.mode		=	0644,
2732 		.proc_handler	=	proc_dointvec,
2733 	},
2734 	{
2735 		.procname	=	"max_size",
2736 		.data		=	&init_net.ipv6.sysctl.ip6_rt_max_size,
2737 		.maxlen		=	sizeof(int),
2738 		.mode		=	0644,
2739 		.proc_handler	=	proc_dointvec,
2740 	},
2741 	{
2742 		.procname	=	"gc_min_interval",
2743 		.data		=	&init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
2744 		.maxlen		=	sizeof(int),
2745 		.mode		=	0644,
2746 		.proc_handler	=	proc_dointvec_jiffies,
2747 	},
2748 	{
2749 		.procname	=	"gc_timeout",
2750 		.data		=	&init_net.ipv6.sysctl.ip6_rt_gc_timeout,
2751 		.maxlen		=	sizeof(int),
2752 		.mode		=	0644,
2753 		.proc_handler	=	proc_dointvec_jiffies,
2754 	},
2755 	{
2756 		.procname	=	"gc_interval",
2757 		.data		=	&init_net.ipv6.sysctl.ip6_rt_gc_interval,
2758 		.maxlen		=	sizeof(int),
2759 		.mode		=	0644,
2760 		.proc_handler	=	proc_dointvec_jiffies,
2761 	},
2762 	{
2763 		.procname	=	"gc_elasticity",
2764 		.data		=	&init_net.ipv6.sysctl.ip6_rt_gc_elasticity,
2765 		.maxlen		=	sizeof(int),
2766 		.mode		=	0644,
2767 		.proc_handler	=	proc_dointvec,
2768 	},
2769 	{
2770 		.procname	=	"mtu_expires",
2771 		.data		=	&init_net.ipv6.sysctl.ip6_rt_mtu_expires,
2772 		.maxlen		=	sizeof(int),
2773 		.mode		=	0644,
2774 		.proc_handler	=	proc_dointvec_jiffies,
2775 	},
2776 	{
2777 		.procname	=	"min_adv_mss",
2778 		.data		=	&init_net.ipv6.sysctl.ip6_rt_min_advmss,
2779 		.maxlen		=	sizeof(int),
2780 		.mode		=	0644,
2781 		.proc_handler	=	proc_dointvec,
2782 	},
2783 	{
2784 		.procname	=	"gc_min_interval_ms",
2785 		.data		=	&init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
2786 		.maxlen		=	sizeof(int),
2787 		.mode		=	0644,
2788 		.proc_handler	=	proc_dointvec_ms_jiffies,
2789 	},
2790 	{ }
2791 };
2792 
2793 struct ctl_table * __net_init ipv6_route_sysctl_init(struct net *net)
2794 {
2795 	struct ctl_table *table;
2796 
2797 	table = kmemdup(ipv6_route_table_template,
2798 			sizeof(ipv6_route_table_template),
2799 			GFP_KERNEL);
2800 
2801 	if (table) {
2802 		table[0].data = &net->ipv6.sysctl.flush_delay;
2803 		table[0].extra1 = net;
2804 		table[1].data = &net->ipv6.ip6_dst_ops.gc_thresh;
2805 		table[2].data = &net->ipv6.sysctl.ip6_rt_max_size;
2806 		table[3].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
2807 		table[4].data = &net->ipv6.sysctl.ip6_rt_gc_timeout;
2808 		table[5].data = &net->ipv6.sysctl.ip6_rt_gc_interval;
2809 		table[6].data = &net->ipv6.sysctl.ip6_rt_gc_elasticity;
2810 		table[7].data = &net->ipv6.sysctl.ip6_rt_mtu_expires;
2811 		table[8].data = &net->ipv6.sysctl.ip6_rt_min_advmss;
2812 		table[9].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
2813 	}
2814 
2815 	return table;
2816 }
2817 #endif
2818 
2819 static int __net_init ip6_route_net_init(struct net *net)
2820 {
2821 	int ret = -ENOMEM;
2822 
2823 	memcpy(&net->ipv6.ip6_dst_ops, &ip6_dst_ops_template,
2824 	       sizeof(net->ipv6.ip6_dst_ops));
2825 
2826 	if (dst_entries_init(&net->ipv6.ip6_dst_ops) < 0)
2827 		goto out_ip6_dst_ops;
2828 
2829 	net->ipv6.ip6_null_entry = kmemdup(&ip6_null_entry_template,
2830 					   sizeof(*net->ipv6.ip6_null_entry),
2831 					   GFP_KERNEL);
2832 	if (!net->ipv6.ip6_null_entry)
2833 		goto out_ip6_dst_entries;
2834 	net->ipv6.ip6_null_entry->dst.path =
2835 		(struct dst_entry *)net->ipv6.ip6_null_entry;
2836 	net->ipv6.ip6_null_entry->dst.ops = &net->ipv6.ip6_dst_ops;
2837 	dst_init_metrics(&net->ipv6.ip6_null_entry->dst,
2838 			 ip6_template_metrics, true);
2839 
2840 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
2841 	net->ipv6.ip6_prohibit_entry = kmemdup(&ip6_prohibit_entry_template,
2842 					       sizeof(*net->ipv6.ip6_prohibit_entry),
2843 					       GFP_KERNEL);
2844 	if (!net->ipv6.ip6_prohibit_entry)
2845 		goto out_ip6_null_entry;
2846 	net->ipv6.ip6_prohibit_entry->dst.path =
2847 		(struct dst_entry *)net->ipv6.ip6_prohibit_entry;
2848 	net->ipv6.ip6_prohibit_entry->dst.ops = &net->ipv6.ip6_dst_ops;
2849 	dst_init_metrics(&net->ipv6.ip6_prohibit_entry->dst,
2850 			 ip6_template_metrics, true);
2851 
2852 	net->ipv6.ip6_blk_hole_entry = kmemdup(&ip6_blk_hole_entry_template,
2853 					       sizeof(*net->ipv6.ip6_blk_hole_entry),
2854 					       GFP_KERNEL);
2855 	if (!net->ipv6.ip6_blk_hole_entry)
2856 		goto out_ip6_prohibit_entry;
2857 	net->ipv6.ip6_blk_hole_entry->dst.path =
2858 		(struct dst_entry *)net->ipv6.ip6_blk_hole_entry;
2859 	net->ipv6.ip6_blk_hole_entry->dst.ops = &net->ipv6.ip6_dst_ops;
2860 	dst_init_metrics(&net->ipv6.ip6_blk_hole_entry->dst,
2861 			 ip6_template_metrics, true);
2862 #endif
2863 
2864 	net->ipv6.sysctl.flush_delay = 0;
2865 	net->ipv6.sysctl.ip6_rt_max_size = 4096;
2866 	net->ipv6.sysctl.ip6_rt_gc_min_interval = HZ / 2;
2867 	net->ipv6.sysctl.ip6_rt_gc_timeout = 60*HZ;
2868 	net->ipv6.sysctl.ip6_rt_gc_interval = 30*HZ;
2869 	net->ipv6.sysctl.ip6_rt_gc_elasticity = 9;
2870 	net->ipv6.sysctl.ip6_rt_mtu_expires = 10*60*HZ;
2871 	net->ipv6.sysctl.ip6_rt_min_advmss = IPV6_MIN_MTU - 20 - 40;
2872 
2873 #ifdef CONFIG_PROC_FS
2874 	proc_net_fops_create(net, "ipv6_route", 0, &ipv6_route_proc_fops);
2875 	proc_net_fops_create(net, "rt6_stats", S_IRUGO, &rt6_stats_seq_fops);
2876 #endif
2877 	net->ipv6.ip6_rt_gc_expire = 30*HZ;
2878 
2879 	ret = 0;
2880 out:
2881 	return ret;
2882 
2883 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
2884 out_ip6_prohibit_entry:
2885 	kfree(net->ipv6.ip6_prohibit_entry);
2886 out_ip6_null_entry:
2887 	kfree(net->ipv6.ip6_null_entry);
2888 #endif
2889 out_ip6_dst_entries:
2890 	dst_entries_destroy(&net->ipv6.ip6_dst_ops);
2891 out_ip6_dst_ops:
2892 	goto out;
2893 }
2894 
2895 static void __net_exit ip6_route_net_exit(struct net *net)
2896 {
2897 #ifdef CONFIG_PROC_FS
2898 	proc_net_remove(net, "ipv6_route");
2899 	proc_net_remove(net, "rt6_stats");
2900 #endif
2901 	kfree(net->ipv6.ip6_null_entry);
2902 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
2903 	kfree(net->ipv6.ip6_prohibit_entry);
2904 	kfree(net->ipv6.ip6_blk_hole_entry);
2905 #endif
2906 	dst_entries_destroy(&net->ipv6.ip6_dst_ops);
2907 }
2908 
2909 static struct pernet_operations ip6_route_net_ops = {
2910 	.init = ip6_route_net_init,
2911 	.exit = ip6_route_net_exit,
2912 };
2913 
2914 static struct notifier_block ip6_route_dev_notifier = {
2915 	.notifier_call = ip6_route_dev_notify,
2916 	.priority = 0,
2917 };
2918 
2919 int __init ip6_route_init(void)
2920 {
2921 	int ret;
2922 
2923 	ret = -ENOMEM;
2924 	ip6_dst_ops_template.kmem_cachep =
2925 		kmem_cache_create("ip6_dst_cache", sizeof(struct rt6_info), 0,
2926 				  SLAB_HWCACHE_ALIGN, NULL);
2927 	if (!ip6_dst_ops_template.kmem_cachep)
2928 		goto out;
2929 
2930 	ret = dst_entries_init(&ip6_dst_blackhole_ops);
2931 	if (ret)
2932 		goto out_kmem_cache;
2933 
2934 	ret = register_pernet_subsys(&ip6_route_net_ops);
2935 	if (ret)
2936 		goto out_dst_entries;
2937 
2938 	ip6_dst_blackhole_ops.kmem_cachep = ip6_dst_ops_template.kmem_cachep;
2939 
2940 	/* Registering of the loopback is done before this portion of code,
2941 	 * the loopback reference in rt6_info will not be taken, do it
2942 	 * manually for init_net */
2943 	init_net.ipv6.ip6_null_entry->dst.dev = init_net.loopback_dev;
2944 	init_net.ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
2945   #ifdef CONFIG_IPV6_MULTIPLE_TABLES
2946 	init_net.ipv6.ip6_prohibit_entry->dst.dev = init_net.loopback_dev;
2947 	init_net.ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
2948 	init_net.ipv6.ip6_blk_hole_entry->dst.dev = init_net.loopback_dev;
2949 	init_net.ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
2950   #endif
2951 	ret = fib6_init();
2952 	if (ret)
2953 		goto out_register_subsys;
2954 
2955 	ret = xfrm6_init();
2956 	if (ret)
2957 		goto out_fib6_init;
2958 
2959 	ret = fib6_rules_init();
2960 	if (ret)
2961 		goto xfrm6_init;
2962 
2963 	ret = -ENOBUFS;
2964 	if (__rtnl_register(PF_INET6, RTM_NEWROUTE, inet6_rtm_newroute, NULL, NULL) ||
2965 	    __rtnl_register(PF_INET6, RTM_DELROUTE, inet6_rtm_delroute, NULL, NULL) ||
2966 	    __rtnl_register(PF_INET6, RTM_GETROUTE, inet6_rtm_getroute, NULL, NULL))
2967 		goto fib6_rules_init;
2968 
2969 	ret = register_netdevice_notifier(&ip6_route_dev_notifier);
2970 	if (ret)
2971 		goto fib6_rules_init;
2972 
2973 out:
2974 	return ret;
2975 
2976 fib6_rules_init:
2977 	fib6_rules_cleanup();
2978 xfrm6_init:
2979 	xfrm6_fini();
2980 out_fib6_init:
2981 	fib6_gc_cleanup();
2982 out_register_subsys:
2983 	unregister_pernet_subsys(&ip6_route_net_ops);
2984 out_dst_entries:
2985 	dst_entries_destroy(&ip6_dst_blackhole_ops);
2986 out_kmem_cache:
2987 	kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);
2988 	goto out;
2989 }
2990 
2991 void ip6_route_cleanup(void)
2992 {
2993 	unregister_netdevice_notifier(&ip6_route_dev_notifier);
2994 	fib6_rules_cleanup();
2995 	xfrm6_fini();
2996 	fib6_gc_cleanup();
2997 	unregister_pernet_subsys(&ip6_route_net_ops);
2998 	dst_entries_destroy(&ip6_dst_blackhole_ops);
2999 	kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);
3000 }
3001