xref: /linux/net/ipv6/route.c (revision d39d0ed196aa1685bb24771e92f78633c66ac9cb)
1 /*
2  *	Linux INET6 implementation
3  *	FIB front-end.
4  *
5  *	Authors:
6  *	Pedro Roque		<roque@di.fc.ul.pt>
7  *
8  *	This program is free software; you can redistribute it and/or
9  *      modify it under the terms of the GNU General Public License
10  *      as published by the Free Software Foundation; either version
11  *      2 of the License, or (at your option) any later version.
12  */
13 
14 /*	Changes:
15  *
16  *	YOSHIFUJI Hideaki @USAGI
17  *		reworked default router selection.
18  *		- respect outgoing interface
19  *		- select from (probably) reachable routers (i.e.
20  *		routers in REACHABLE, STALE, DELAY or PROBE states).
21  *		- always select the same router if it is (probably)
22  *		reachable.  otherwise, round-robin the list.
23  *	Ville Nuorvala
24  *		Fixed routing subtrees.
25  */
26 
27 #include <linux/capability.h>
28 #include <linux/errno.h>
29 #include <linux/types.h>
30 #include <linux/times.h>
31 #include <linux/socket.h>
32 #include <linux/sockios.h>
33 #include <linux/net.h>
34 #include <linux/route.h>
35 #include <linux/netdevice.h>
36 #include <linux/in6.h>
37 #include <linux/mroute6.h>
38 #include <linux/init.h>
39 #include <linux/if_arp.h>
40 #include <linux/proc_fs.h>
41 #include <linux/seq_file.h>
42 #include <linux/nsproxy.h>
43 #include <linux/slab.h>
44 #include <net/net_namespace.h>
45 #include <net/snmp.h>
46 #include <net/ipv6.h>
47 #include <net/ip6_fib.h>
48 #include <net/ip6_route.h>
49 #include <net/ndisc.h>
50 #include <net/addrconf.h>
51 #include <net/tcp.h>
52 #include <linux/rtnetlink.h>
53 #include <net/dst.h>
54 #include <net/xfrm.h>
55 #include <net/netevent.h>
56 #include <net/netlink.h>
57 
58 #include <asm/uaccess.h>
59 
60 #ifdef CONFIG_SYSCTL
61 #include <linux/sysctl.h>
62 #endif
63 
64 /* Set to 3 to get tracing. */
65 #define RT6_DEBUG 2
66 
67 #if RT6_DEBUG >= 3
68 #define RDBG(x) printk x
69 #define RT6_TRACE(x...) printk(KERN_DEBUG x)
70 #else
71 #define RDBG(x)
72 #define RT6_TRACE(x...) do { ; } while (0)
73 #endif
74 
75 #define CLONE_OFFLINK_ROUTE 0
76 
77 static struct rt6_info * ip6_rt_copy(struct rt6_info *ort);
78 static struct dst_entry	*ip6_dst_check(struct dst_entry *dst, u32 cookie);
79 static struct dst_entry *ip6_negative_advice(struct dst_entry *);
80 static void		ip6_dst_destroy(struct dst_entry *);
81 static void		ip6_dst_ifdown(struct dst_entry *,
82 				       struct net_device *dev, int how);
83 static int		 ip6_dst_gc(struct dst_ops *ops);
84 
85 static int		ip6_pkt_discard(struct sk_buff *skb);
86 static int		ip6_pkt_discard_out(struct sk_buff *skb);
87 static void		ip6_link_failure(struct sk_buff *skb);
88 static void		ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu);
89 
90 #ifdef CONFIG_IPV6_ROUTE_INFO
91 static struct rt6_info *rt6_add_route_info(struct net *net,
92 					   struct in6_addr *prefix, int prefixlen,
93 					   struct in6_addr *gwaddr, int ifindex,
94 					   unsigned pref);
95 static struct rt6_info *rt6_get_route_info(struct net *net,
96 					   struct in6_addr *prefix, int prefixlen,
97 					   struct in6_addr *gwaddr, int ifindex);
98 #endif
99 
100 static struct dst_ops ip6_dst_ops_template = {
101 	.family			=	AF_INET6,
102 	.protocol		=	cpu_to_be16(ETH_P_IPV6),
103 	.gc			=	ip6_dst_gc,
104 	.gc_thresh		=	1024,
105 	.check			=	ip6_dst_check,
106 	.destroy		=	ip6_dst_destroy,
107 	.ifdown			=	ip6_dst_ifdown,
108 	.negative_advice	=	ip6_negative_advice,
109 	.link_failure		=	ip6_link_failure,
110 	.update_pmtu		=	ip6_rt_update_pmtu,
111 	.local_out		=	__ip6_local_out,
112 	.entries		=	ATOMIC_INIT(0),
113 };
114 
115 static void ip6_rt_blackhole_update_pmtu(struct dst_entry *dst, u32 mtu)
116 {
117 }
118 
119 static struct dst_ops ip6_dst_blackhole_ops = {
120 	.family			=	AF_INET6,
121 	.protocol		=	cpu_to_be16(ETH_P_IPV6),
122 	.destroy		=	ip6_dst_destroy,
123 	.check			=	ip6_dst_check,
124 	.update_pmtu		=	ip6_rt_blackhole_update_pmtu,
125 	.entries		=	ATOMIC_INIT(0),
126 };
127 
128 static struct rt6_info ip6_null_entry_template = {
129 	.dst = {
130 		.__refcnt	= ATOMIC_INIT(1),
131 		.__use		= 1,
132 		.obsolete	= -1,
133 		.error		= -ENETUNREACH,
134 		.metrics	= { [RTAX_HOPLIMIT - 1] = 255, },
135 		.input		= ip6_pkt_discard,
136 		.output		= ip6_pkt_discard_out,
137 	},
138 	.rt6i_flags	= (RTF_REJECT | RTF_NONEXTHOP),
139 	.rt6i_protocol  = RTPROT_KERNEL,
140 	.rt6i_metric	= ~(u32) 0,
141 	.rt6i_ref	= ATOMIC_INIT(1),
142 };
143 
144 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
145 
146 static int ip6_pkt_prohibit(struct sk_buff *skb);
147 static int ip6_pkt_prohibit_out(struct sk_buff *skb);
148 
149 static struct rt6_info ip6_prohibit_entry_template = {
150 	.dst = {
151 		.__refcnt	= ATOMIC_INIT(1),
152 		.__use		= 1,
153 		.obsolete	= -1,
154 		.error		= -EACCES,
155 		.metrics	= { [RTAX_HOPLIMIT - 1] = 255, },
156 		.input		= ip6_pkt_prohibit,
157 		.output		= ip6_pkt_prohibit_out,
158 	},
159 	.rt6i_flags	= (RTF_REJECT | RTF_NONEXTHOP),
160 	.rt6i_protocol  = RTPROT_KERNEL,
161 	.rt6i_metric	= ~(u32) 0,
162 	.rt6i_ref	= ATOMIC_INIT(1),
163 };
164 
165 static struct rt6_info ip6_blk_hole_entry_template = {
166 	.dst = {
167 		.__refcnt	= ATOMIC_INIT(1),
168 		.__use		= 1,
169 		.obsolete	= -1,
170 		.error		= -EINVAL,
171 		.metrics	= { [RTAX_HOPLIMIT - 1] = 255, },
172 		.input		= dst_discard,
173 		.output		= dst_discard,
174 	},
175 	.rt6i_flags	= (RTF_REJECT | RTF_NONEXTHOP),
176 	.rt6i_protocol  = RTPROT_KERNEL,
177 	.rt6i_metric	= ~(u32) 0,
178 	.rt6i_ref	= ATOMIC_INIT(1),
179 };
180 
181 #endif
182 
183 /* allocate dst with ip6_dst_ops */
184 static inline struct rt6_info *ip6_dst_alloc(struct dst_ops *ops)
185 {
186 	return (struct rt6_info *)dst_alloc(ops);
187 }
188 
189 static void ip6_dst_destroy(struct dst_entry *dst)
190 {
191 	struct rt6_info *rt = (struct rt6_info *)dst;
192 	struct inet6_dev *idev = rt->rt6i_idev;
193 
194 	if (idev != NULL) {
195 		rt->rt6i_idev = NULL;
196 		in6_dev_put(idev);
197 	}
198 }
199 
200 static void ip6_dst_ifdown(struct dst_entry *dst, struct net_device *dev,
201 			   int how)
202 {
203 	struct rt6_info *rt = (struct rt6_info *)dst;
204 	struct inet6_dev *idev = rt->rt6i_idev;
205 	struct net_device *loopback_dev =
206 		dev_net(dev)->loopback_dev;
207 
208 	if (dev != loopback_dev && idev != NULL && idev->dev == dev) {
209 		struct inet6_dev *loopback_idev =
210 			in6_dev_get(loopback_dev);
211 		if (loopback_idev != NULL) {
212 			rt->rt6i_idev = loopback_idev;
213 			in6_dev_put(idev);
214 		}
215 	}
216 }
217 
218 static __inline__ int rt6_check_expired(const struct rt6_info *rt)
219 {
220 	return (rt->rt6i_flags & RTF_EXPIRES &&
221 		time_after(jiffies, rt->rt6i_expires));
222 }
223 
224 static inline int rt6_need_strict(struct in6_addr *daddr)
225 {
226 	return (ipv6_addr_type(daddr) &
227 		(IPV6_ADDR_MULTICAST | IPV6_ADDR_LINKLOCAL | IPV6_ADDR_LOOPBACK));
228 }
229 
230 /*
231  *	Route lookup. Any table->tb6_lock is implied.
232  */
233 
234 static inline struct rt6_info *rt6_device_match(struct net *net,
235 						    struct rt6_info *rt,
236 						    struct in6_addr *saddr,
237 						    int oif,
238 						    int flags)
239 {
240 	struct rt6_info *local = NULL;
241 	struct rt6_info *sprt;
242 
243 	if (!oif && ipv6_addr_any(saddr))
244 		goto out;
245 
246 	for (sprt = rt; sprt; sprt = sprt->dst.rt6_next) {
247 		struct net_device *dev = sprt->rt6i_dev;
248 
249 		if (oif) {
250 			if (dev->ifindex == oif)
251 				return sprt;
252 			if (dev->flags & IFF_LOOPBACK) {
253 				if (sprt->rt6i_idev == NULL ||
254 				    sprt->rt6i_idev->dev->ifindex != oif) {
255 					if (flags & RT6_LOOKUP_F_IFACE && oif)
256 						continue;
257 					if (local && (!oif ||
258 						      local->rt6i_idev->dev->ifindex == oif))
259 						continue;
260 				}
261 				local = sprt;
262 			}
263 		} else {
264 			if (ipv6_chk_addr(net, saddr, dev,
265 					  flags & RT6_LOOKUP_F_IFACE))
266 				return sprt;
267 		}
268 	}
269 
270 	if (oif) {
271 		if (local)
272 			return local;
273 
274 		if (flags & RT6_LOOKUP_F_IFACE)
275 			return net->ipv6.ip6_null_entry;
276 	}
277 out:
278 	return rt;
279 }
280 
281 #ifdef CONFIG_IPV6_ROUTER_PREF
282 static void rt6_probe(struct rt6_info *rt)
283 {
284 	struct neighbour *neigh = rt ? rt->rt6i_nexthop : NULL;
285 	/*
286 	 * Okay, this does not seem to be appropriate
287 	 * for now, however, we need to check if it
288 	 * is really so; aka Router Reachability Probing.
289 	 *
290 	 * Router Reachability Probe MUST be rate-limited
291 	 * to no more than one per minute.
292 	 */
293 	if (!neigh || (neigh->nud_state & NUD_VALID))
294 		return;
295 	read_lock_bh(&neigh->lock);
296 	if (!(neigh->nud_state & NUD_VALID) &&
297 	    time_after(jiffies, neigh->updated + rt->rt6i_idev->cnf.rtr_probe_interval)) {
298 		struct in6_addr mcaddr;
299 		struct in6_addr *target;
300 
301 		neigh->updated = jiffies;
302 		read_unlock_bh(&neigh->lock);
303 
304 		target = (struct in6_addr *)&neigh->primary_key;
305 		addrconf_addr_solict_mult(target, &mcaddr);
306 		ndisc_send_ns(rt->rt6i_dev, NULL, target, &mcaddr, NULL);
307 	} else
308 		read_unlock_bh(&neigh->lock);
309 }
310 #else
311 static inline void rt6_probe(struct rt6_info *rt)
312 {
313 }
314 #endif
315 
316 /*
317  * Default Router Selection (RFC 2461 6.3.6)
318  */
319 static inline int rt6_check_dev(struct rt6_info *rt, int oif)
320 {
321 	struct net_device *dev = rt->rt6i_dev;
322 	if (!oif || dev->ifindex == oif)
323 		return 2;
324 	if ((dev->flags & IFF_LOOPBACK) &&
325 	    rt->rt6i_idev && rt->rt6i_idev->dev->ifindex == oif)
326 		return 1;
327 	return 0;
328 }
329 
330 static inline int rt6_check_neigh(struct rt6_info *rt)
331 {
332 	struct neighbour *neigh = rt->rt6i_nexthop;
333 	int m;
334 	if (rt->rt6i_flags & RTF_NONEXTHOP ||
335 	    !(rt->rt6i_flags & RTF_GATEWAY))
336 		m = 1;
337 	else if (neigh) {
338 		read_lock_bh(&neigh->lock);
339 		if (neigh->nud_state & NUD_VALID)
340 			m = 2;
341 #ifdef CONFIG_IPV6_ROUTER_PREF
342 		else if (neigh->nud_state & NUD_FAILED)
343 			m = 0;
344 #endif
345 		else
346 			m = 1;
347 		read_unlock_bh(&neigh->lock);
348 	} else
349 		m = 0;
350 	return m;
351 }
352 
353 static int rt6_score_route(struct rt6_info *rt, int oif,
354 			   int strict)
355 {
356 	int m, n;
357 
358 	m = rt6_check_dev(rt, oif);
359 	if (!m && (strict & RT6_LOOKUP_F_IFACE))
360 		return -1;
361 #ifdef CONFIG_IPV6_ROUTER_PREF
362 	m |= IPV6_DECODE_PREF(IPV6_EXTRACT_PREF(rt->rt6i_flags)) << 2;
363 #endif
364 	n = rt6_check_neigh(rt);
365 	if (!n && (strict & RT6_LOOKUP_F_REACHABLE))
366 		return -1;
367 	return m;
368 }
369 
370 static struct rt6_info *find_match(struct rt6_info *rt, int oif, int strict,
371 				   int *mpri, struct rt6_info *match)
372 {
373 	int m;
374 
375 	if (rt6_check_expired(rt))
376 		goto out;
377 
378 	m = rt6_score_route(rt, oif, strict);
379 	if (m < 0)
380 		goto out;
381 
382 	if (m > *mpri) {
383 		if (strict & RT6_LOOKUP_F_REACHABLE)
384 			rt6_probe(match);
385 		*mpri = m;
386 		match = rt;
387 	} else if (strict & RT6_LOOKUP_F_REACHABLE) {
388 		rt6_probe(rt);
389 	}
390 
391 out:
392 	return match;
393 }
394 
395 static struct rt6_info *find_rr_leaf(struct fib6_node *fn,
396 				     struct rt6_info *rr_head,
397 				     u32 metric, int oif, int strict)
398 {
399 	struct rt6_info *rt, *match;
400 	int mpri = -1;
401 
402 	match = NULL;
403 	for (rt = rr_head; rt && rt->rt6i_metric == metric;
404 	     rt = rt->dst.rt6_next)
405 		match = find_match(rt, oif, strict, &mpri, match);
406 	for (rt = fn->leaf; rt && rt != rr_head && rt->rt6i_metric == metric;
407 	     rt = rt->dst.rt6_next)
408 		match = find_match(rt, oif, strict, &mpri, match);
409 
410 	return match;
411 }
412 
413 static struct rt6_info *rt6_select(struct fib6_node *fn, int oif, int strict)
414 {
415 	struct rt6_info *match, *rt0;
416 	struct net *net;
417 
418 	RT6_TRACE("%s(fn->leaf=%p, oif=%d)\n",
419 		  __func__, fn->leaf, oif);
420 
421 	rt0 = fn->rr_ptr;
422 	if (!rt0)
423 		fn->rr_ptr = rt0 = fn->leaf;
424 
425 	match = find_rr_leaf(fn, rt0, rt0->rt6i_metric, oif, strict);
426 
427 	if (!match &&
428 	    (strict & RT6_LOOKUP_F_REACHABLE)) {
429 		struct rt6_info *next = rt0->dst.rt6_next;
430 
431 		/* no entries matched; do round-robin */
432 		if (!next || next->rt6i_metric != rt0->rt6i_metric)
433 			next = fn->leaf;
434 
435 		if (next != rt0)
436 			fn->rr_ptr = next;
437 	}
438 
439 	RT6_TRACE("%s() => %p\n",
440 		  __func__, match);
441 
442 	net = dev_net(rt0->rt6i_dev);
443 	return (match ? match : net->ipv6.ip6_null_entry);
444 }
445 
446 #ifdef CONFIG_IPV6_ROUTE_INFO
447 int rt6_route_rcv(struct net_device *dev, u8 *opt, int len,
448 		  struct in6_addr *gwaddr)
449 {
450 	struct net *net = dev_net(dev);
451 	struct route_info *rinfo = (struct route_info *) opt;
452 	struct in6_addr prefix_buf, *prefix;
453 	unsigned int pref;
454 	unsigned long lifetime;
455 	struct rt6_info *rt;
456 
457 	if (len < sizeof(struct route_info)) {
458 		return -EINVAL;
459 	}
460 
461 	/* Sanity check for prefix_len and length */
462 	if (rinfo->length > 3) {
463 		return -EINVAL;
464 	} else if (rinfo->prefix_len > 128) {
465 		return -EINVAL;
466 	} else if (rinfo->prefix_len > 64) {
467 		if (rinfo->length < 2) {
468 			return -EINVAL;
469 		}
470 	} else if (rinfo->prefix_len > 0) {
471 		if (rinfo->length < 1) {
472 			return -EINVAL;
473 		}
474 	}
475 
476 	pref = rinfo->route_pref;
477 	if (pref == ICMPV6_ROUTER_PREF_INVALID)
478 		return -EINVAL;
479 
480 	lifetime = addrconf_timeout_fixup(ntohl(rinfo->lifetime), HZ);
481 
482 	if (rinfo->length == 3)
483 		prefix = (struct in6_addr *)rinfo->prefix;
484 	else {
485 		/* this function is safe */
486 		ipv6_addr_prefix(&prefix_buf,
487 				 (struct in6_addr *)rinfo->prefix,
488 				 rinfo->prefix_len);
489 		prefix = &prefix_buf;
490 	}
491 
492 	rt = rt6_get_route_info(net, prefix, rinfo->prefix_len, gwaddr,
493 				dev->ifindex);
494 
495 	if (rt && !lifetime) {
496 		ip6_del_rt(rt);
497 		rt = NULL;
498 	}
499 
500 	if (!rt && lifetime)
501 		rt = rt6_add_route_info(net, prefix, rinfo->prefix_len, gwaddr, dev->ifindex,
502 					pref);
503 	else if (rt)
504 		rt->rt6i_flags = RTF_ROUTEINFO |
505 				 (rt->rt6i_flags & ~RTF_PREF_MASK) | RTF_PREF(pref);
506 
507 	if (rt) {
508 		if (!addrconf_finite_timeout(lifetime)) {
509 			rt->rt6i_flags &= ~RTF_EXPIRES;
510 		} else {
511 			rt->rt6i_expires = jiffies + HZ * lifetime;
512 			rt->rt6i_flags |= RTF_EXPIRES;
513 		}
514 		dst_release(&rt->dst);
515 	}
516 	return 0;
517 }
518 #endif
519 
520 #define BACKTRACK(__net, saddr)			\
521 do { \
522 	if (rt == __net->ipv6.ip6_null_entry) {	\
523 		struct fib6_node *pn; \
524 		while (1) { \
525 			if (fn->fn_flags & RTN_TL_ROOT) \
526 				goto out; \
527 			pn = fn->parent; \
528 			if (FIB6_SUBTREE(pn) && FIB6_SUBTREE(pn) != fn) \
529 				fn = fib6_lookup(FIB6_SUBTREE(pn), NULL, saddr); \
530 			else \
531 				fn = pn; \
532 			if (fn->fn_flags & RTN_RTINFO) \
533 				goto restart; \
534 		} \
535 	} \
536 } while(0)
537 
538 static struct rt6_info *ip6_pol_route_lookup(struct net *net,
539 					     struct fib6_table *table,
540 					     struct flowi *fl, int flags)
541 {
542 	struct fib6_node *fn;
543 	struct rt6_info *rt;
544 
545 	read_lock_bh(&table->tb6_lock);
546 	fn = fib6_lookup(&table->tb6_root, &fl->fl6_dst, &fl->fl6_src);
547 restart:
548 	rt = fn->leaf;
549 	rt = rt6_device_match(net, rt, &fl->fl6_src, fl->oif, flags);
550 	BACKTRACK(net, &fl->fl6_src);
551 out:
552 	dst_use(&rt->dst, jiffies);
553 	read_unlock_bh(&table->tb6_lock);
554 	return rt;
555 
556 }
557 
558 struct rt6_info *rt6_lookup(struct net *net, const struct in6_addr *daddr,
559 			    const struct in6_addr *saddr, int oif, int strict)
560 {
561 	struct flowi fl = {
562 		.oif = oif,
563 		.nl_u = {
564 			.ip6_u = {
565 				.daddr = *daddr,
566 			},
567 		},
568 	};
569 	struct dst_entry *dst;
570 	int flags = strict ? RT6_LOOKUP_F_IFACE : 0;
571 
572 	if (saddr) {
573 		memcpy(&fl.fl6_src, saddr, sizeof(*saddr));
574 		flags |= RT6_LOOKUP_F_HAS_SADDR;
575 	}
576 
577 	dst = fib6_rule_lookup(net, &fl, flags, ip6_pol_route_lookup);
578 	if (dst->error == 0)
579 		return (struct rt6_info *) dst;
580 
581 	dst_release(dst);
582 
583 	return NULL;
584 }
585 
586 EXPORT_SYMBOL(rt6_lookup);
587 
588 /* ip6_ins_rt is called with FREE table->tb6_lock.
589    It takes new route entry, the addition fails by any reason the
590    route is freed. In any case, if caller does not hold it, it may
591    be destroyed.
592  */
593 
594 static int __ip6_ins_rt(struct rt6_info *rt, struct nl_info *info)
595 {
596 	int err;
597 	struct fib6_table *table;
598 
599 	table = rt->rt6i_table;
600 	write_lock_bh(&table->tb6_lock);
601 	err = fib6_add(&table->tb6_root, rt, info);
602 	write_unlock_bh(&table->tb6_lock);
603 
604 	return err;
605 }
606 
607 int ip6_ins_rt(struct rt6_info *rt)
608 {
609 	struct nl_info info = {
610 		.nl_net = dev_net(rt->rt6i_dev),
611 	};
612 	return __ip6_ins_rt(rt, &info);
613 }
614 
615 static struct rt6_info *rt6_alloc_cow(struct rt6_info *ort, struct in6_addr *daddr,
616 				      struct in6_addr *saddr)
617 {
618 	struct rt6_info *rt;
619 
620 	/*
621 	 *	Clone the route.
622 	 */
623 
624 	rt = ip6_rt_copy(ort);
625 
626 	if (rt) {
627 		struct neighbour *neigh;
628 		int attempts = !in_softirq();
629 
630 		if (!(rt->rt6i_flags&RTF_GATEWAY)) {
631 			if (rt->rt6i_dst.plen != 128 &&
632 			    ipv6_addr_equal(&rt->rt6i_dst.addr, daddr))
633 				rt->rt6i_flags |= RTF_ANYCAST;
634 			ipv6_addr_copy(&rt->rt6i_gateway, daddr);
635 		}
636 
637 		ipv6_addr_copy(&rt->rt6i_dst.addr, daddr);
638 		rt->rt6i_dst.plen = 128;
639 		rt->rt6i_flags |= RTF_CACHE;
640 		rt->dst.flags |= DST_HOST;
641 
642 #ifdef CONFIG_IPV6_SUBTREES
643 		if (rt->rt6i_src.plen && saddr) {
644 			ipv6_addr_copy(&rt->rt6i_src.addr, saddr);
645 			rt->rt6i_src.plen = 128;
646 		}
647 #endif
648 
649 	retry:
650 		neigh = ndisc_get_neigh(rt->rt6i_dev, &rt->rt6i_gateway);
651 		if (IS_ERR(neigh)) {
652 			struct net *net = dev_net(rt->rt6i_dev);
653 			int saved_rt_min_interval =
654 				net->ipv6.sysctl.ip6_rt_gc_min_interval;
655 			int saved_rt_elasticity =
656 				net->ipv6.sysctl.ip6_rt_gc_elasticity;
657 
658 			if (attempts-- > 0) {
659 				net->ipv6.sysctl.ip6_rt_gc_elasticity = 1;
660 				net->ipv6.sysctl.ip6_rt_gc_min_interval = 0;
661 
662 				ip6_dst_gc(&net->ipv6.ip6_dst_ops);
663 
664 				net->ipv6.sysctl.ip6_rt_gc_elasticity =
665 					saved_rt_elasticity;
666 				net->ipv6.sysctl.ip6_rt_gc_min_interval =
667 					saved_rt_min_interval;
668 				goto retry;
669 			}
670 
671 			if (net_ratelimit())
672 				printk(KERN_WARNING
673 				       "Neighbour table overflow.\n");
674 			dst_free(&rt->dst);
675 			return NULL;
676 		}
677 		rt->rt6i_nexthop = neigh;
678 
679 	}
680 
681 	return rt;
682 }
683 
684 static struct rt6_info *rt6_alloc_clone(struct rt6_info *ort, struct in6_addr *daddr)
685 {
686 	struct rt6_info *rt = ip6_rt_copy(ort);
687 	if (rt) {
688 		ipv6_addr_copy(&rt->rt6i_dst.addr, daddr);
689 		rt->rt6i_dst.plen = 128;
690 		rt->rt6i_flags |= RTF_CACHE;
691 		rt->dst.flags |= DST_HOST;
692 		rt->rt6i_nexthop = neigh_clone(ort->rt6i_nexthop);
693 	}
694 	return rt;
695 }
696 
697 static struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table, int oif,
698 				      struct flowi *fl, int flags)
699 {
700 	struct fib6_node *fn;
701 	struct rt6_info *rt, *nrt;
702 	int strict = 0;
703 	int attempts = 3;
704 	int err;
705 	int reachable = net->ipv6.devconf_all->forwarding ? 0 : RT6_LOOKUP_F_REACHABLE;
706 
707 	strict |= flags & RT6_LOOKUP_F_IFACE;
708 
709 relookup:
710 	read_lock_bh(&table->tb6_lock);
711 
712 restart_2:
713 	fn = fib6_lookup(&table->tb6_root, &fl->fl6_dst, &fl->fl6_src);
714 
715 restart:
716 	rt = rt6_select(fn, oif, strict | reachable);
717 
718 	BACKTRACK(net, &fl->fl6_src);
719 	if (rt == net->ipv6.ip6_null_entry ||
720 	    rt->rt6i_flags & RTF_CACHE)
721 		goto out;
722 
723 	dst_hold(&rt->dst);
724 	read_unlock_bh(&table->tb6_lock);
725 
726 	if (!rt->rt6i_nexthop && !(rt->rt6i_flags & RTF_NONEXTHOP))
727 		nrt = rt6_alloc_cow(rt, &fl->fl6_dst, &fl->fl6_src);
728 	else {
729 #if CLONE_OFFLINK_ROUTE
730 		nrt = rt6_alloc_clone(rt, &fl->fl6_dst);
731 #else
732 		goto out2;
733 #endif
734 	}
735 
736 	dst_release(&rt->dst);
737 	rt = nrt ? : net->ipv6.ip6_null_entry;
738 
739 	dst_hold(&rt->dst);
740 	if (nrt) {
741 		err = ip6_ins_rt(nrt);
742 		if (!err)
743 			goto out2;
744 	}
745 
746 	if (--attempts <= 0)
747 		goto out2;
748 
749 	/*
750 	 * Race condition! In the gap, when table->tb6_lock was
751 	 * released someone could insert this route.  Relookup.
752 	 */
753 	dst_release(&rt->dst);
754 	goto relookup;
755 
756 out:
757 	if (reachable) {
758 		reachable = 0;
759 		goto restart_2;
760 	}
761 	dst_hold(&rt->dst);
762 	read_unlock_bh(&table->tb6_lock);
763 out2:
764 	rt->dst.lastuse = jiffies;
765 	rt->dst.__use++;
766 
767 	return rt;
768 }
769 
770 static struct rt6_info *ip6_pol_route_input(struct net *net, struct fib6_table *table,
771 					    struct flowi *fl, int flags)
772 {
773 	return ip6_pol_route(net, table, fl->iif, fl, flags);
774 }
775 
776 void ip6_route_input(struct sk_buff *skb)
777 {
778 	struct ipv6hdr *iph = ipv6_hdr(skb);
779 	struct net *net = dev_net(skb->dev);
780 	int flags = RT6_LOOKUP_F_HAS_SADDR;
781 	struct flowi fl = {
782 		.iif = skb->dev->ifindex,
783 		.nl_u = {
784 			.ip6_u = {
785 				.daddr = iph->daddr,
786 				.saddr = iph->saddr,
787 				.flowlabel = (* (__be32 *) iph)&IPV6_FLOWINFO_MASK,
788 			},
789 		},
790 		.mark = skb->mark,
791 		.proto = iph->nexthdr,
792 	};
793 
794 	if (rt6_need_strict(&iph->daddr) && skb->dev->type != ARPHRD_PIMREG)
795 		flags |= RT6_LOOKUP_F_IFACE;
796 
797 	skb_dst_set(skb, fib6_rule_lookup(net, &fl, flags, ip6_pol_route_input));
798 }
799 
800 static struct rt6_info *ip6_pol_route_output(struct net *net, struct fib6_table *table,
801 					     struct flowi *fl, int flags)
802 {
803 	return ip6_pol_route(net, table, fl->oif, fl, flags);
804 }
805 
806 struct dst_entry * ip6_route_output(struct net *net, struct sock *sk,
807 				    struct flowi *fl)
808 {
809 	int flags = 0;
810 
811 	if ((sk && sk->sk_bound_dev_if) || rt6_need_strict(&fl->fl6_dst))
812 		flags |= RT6_LOOKUP_F_IFACE;
813 
814 	if (!ipv6_addr_any(&fl->fl6_src))
815 		flags |= RT6_LOOKUP_F_HAS_SADDR;
816 	else if (sk)
817 		flags |= rt6_srcprefs2flags(inet6_sk(sk)->srcprefs);
818 
819 	return fib6_rule_lookup(net, fl, flags, ip6_pol_route_output);
820 }
821 
822 EXPORT_SYMBOL(ip6_route_output);
823 
824 int ip6_dst_blackhole(struct sock *sk, struct dst_entry **dstp, struct flowi *fl)
825 {
826 	struct rt6_info *ort = (struct rt6_info *) *dstp;
827 	struct rt6_info *rt = (struct rt6_info *)
828 		dst_alloc(&ip6_dst_blackhole_ops);
829 	struct dst_entry *new = NULL;
830 
831 	if (rt) {
832 		new = &rt->dst;
833 
834 		atomic_set(&new->__refcnt, 1);
835 		new->__use = 1;
836 		new->input = dst_discard;
837 		new->output = dst_discard;
838 
839 		memcpy(new->metrics, ort->dst.metrics, RTAX_MAX*sizeof(u32));
840 		new->dev = ort->dst.dev;
841 		if (new->dev)
842 			dev_hold(new->dev);
843 		rt->rt6i_idev = ort->rt6i_idev;
844 		if (rt->rt6i_idev)
845 			in6_dev_hold(rt->rt6i_idev);
846 		rt->rt6i_expires = 0;
847 
848 		ipv6_addr_copy(&rt->rt6i_gateway, &ort->rt6i_gateway);
849 		rt->rt6i_flags = ort->rt6i_flags & ~RTF_EXPIRES;
850 		rt->rt6i_metric = 0;
851 
852 		memcpy(&rt->rt6i_dst, &ort->rt6i_dst, sizeof(struct rt6key));
853 #ifdef CONFIG_IPV6_SUBTREES
854 		memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
855 #endif
856 
857 		dst_free(new);
858 	}
859 
860 	dst_release(*dstp);
861 	*dstp = new;
862 	return (new ? 0 : -ENOMEM);
863 }
864 EXPORT_SYMBOL_GPL(ip6_dst_blackhole);
865 
866 /*
867  *	Destination cache support functions
868  */
869 
870 static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie)
871 {
872 	struct rt6_info *rt;
873 
874 	rt = (struct rt6_info *) dst;
875 
876 	if (rt->rt6i_node && (rt->rt6i_node->fn_sernum == cookie))
877 		return dst;
878 
879 	return NULL;
880 }
881 
882 static struct dst_entry *ip6_negative_advice(struct dst_entry *dst)
883 {
884 	struct rt6_info *rt = (struct rt6_info *) dst;
885 
886 	if (rt) {
887 		if (rt->rt6i_flags & RTF_CACHE) {
888 			if (rt6_check_expired(rt)) {
889 				ip6_del_rt(rt);
890 				dst = NULL;
891 			}
892 		} else {
893 			dst_release(dst);
894 			dst = NULL;
895 		}
896 	}
897 	return dst;
898 }
899 
900 static void ip6_link_failure(struct sk_buff *skb)
901 {
902 	struct rt6_info *rt;
903 
904 	icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_ADDR_UNREACH, 0);
905 
906 	rt = (struct rt6_info *) skb_dst(skb);
907 	if (rt) {
908 		if (rt->rt6i_flags&RTF_CACHE) {
909 			dst_set_expires(&rt->dst, 0);
910 			rt->rt6i_flags |= RTF_EXPIRES;
911 		} else if (rt->rt6i_node && (rt->rt6i_flags & RTF_DEFAULT))
912 			rt->rt6i_node->fn_sernum = -1;
913 	}
914 }
915 
916 static void ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu)
917 {
918 	struct rt6_info *rt6 = (struct rt6_info*)dst;
919 
920 	if (mtu < dst_mtu(dst) && rt6->rt6i_dst.plen == 128) {
921 		rt6->rt6i_flags |= RTF_MODIFIED;
922 		if (mtu < IPV6_MIN_MTU) {
923 			mtu = IPV6_MIN_MTU;
924 			dst->metrics[RTAX_FEATURES-1] |= RTAX_FEATURE_ALLFRAG;
925 		}
926 		dst->metrics[RTAX_MTU-1] = mtu;
927 		call_netevent_notifiers(NETEVENT_PMTU_UPDATE, dst);
928 	}
929 }
930 
931 static int ipv6_get_mtu(struct net_device *dev);
932 
933 static inline unsigned int ipv6_advmss(struct net *net, unsigned int mtu)
934 {
935 	mtu -= sizeof(struct ipv6hdr) + sizeof(struct tcphdr);
936 
937 	if (mtu < net->ipv6.sysctl.ip6_rt_min_advmss)
938 		mtu = net->ipv6.sysctl.ip6_rt_min_advmss;
939 
940 	/*
941 	 * Maximal non-jumbo IPv6 payload is IPV6_MAXPLEN and
942 	 * corresponding MSS is IPV6_MAXPLEN - tcp_header_size.
943 	 * IPV6_MAXPLEN is also valid and means: "any MSS,
944 	 * rely only on pmtu discovery"
945 	 */
946 	if (mtu > IPV6_MAXPLEN - sizeof(struct tcphdr))
947 		mtu = IPV6_MAXPLEN;
948 	return mtu;
949 }
950 
951 static struct dst_entry *icmp6_dst_gc_list;
952 static DEFINE_SPINLOCK(icmp6_dst_lock);
953 
954 struct dst_entry *icmp6_dst_alloc(struct net_device *dev,
955 				  struct neighbour *neigh,
956 				  const struct in6_addr *addr)
957 {
958 	struct rt6_info *rt;
959 	struct inet6_dev *idev = in6_dev_get(dev);
960 	struct net *net = dev_net(dev);
961 
962 	if (unlikely(idev == NULL))
963 		return NULL;
964 
965 	rt = ip6_dst_alloc(&net->ipv6.ip6_dst_ops);
966 	if (unlikely(rt == NULL)) {
967 		in6_dev_put(idev);
968 		goto out;
969 	}
970 
971 	dev_hold(dev);
972 	if (neigh)
973 		neigh_hold(neigh);
974 	else {
975 		neigh = ndisc_get_neigh(dev, addr);
976 		if (IS_ERR(neigh))
977 			neigh = NULL;
978 	}
979 
980 	rt->rt6i_dev	  = dev;
981 	rt->rt6i_idev     = idev;
982 	rt->rt6i_nexthop  = neigh;
983 	atomic_set(&rt->dst.__refcnt, 1);
984 	rt->dst.metrics[RTAX_HOPLIMIT-1] = 255;
985 	rt->dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(rt->rt6i_dev);
986 	rt->dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(net, dst_mtu(&rt->dst));
987 	rt->dst.output  = ip6_output;
988 
989 #if 0	/* there's no chance to use these for ndisc */
990 	rt->dst.flags   = ipv6_addr_type(addr) & IPV6_ADDR_UNICAST
991 				? DST_HOST
992 				: 0;
993 	ipv6_addr_copy(&rt->rt6i_dst.addr, addr);
994 	rt->rt6i_dst.plen = 128;
995 #endif
996 
997 	spin_lock_bh(&icmp6_dst_lock);
998 	rt->dst.next = icmp6_dst_gc_list;
999 	icmp6_dst_gc_list = &rt->dst;
1000 	spin_unlock_bh(&icmp6_dst_lock);
1001 
1002 	fib6_force_start_gc(net);
1003 
1004 out:
1005 	return &rt->dst;
1006 }
1007 
1008 int icmp6_dst_gc(void)
1009 {
1010 	struct dst_entry *dst, *next, **pprev;
1011 	int more = 0;
1012 
1013 	next = NULL;
1014 
1015 	spin_lock_bh(&icmp6_dst_lock);
1016 	pprev = &icmp6_dst_gc_list;
1017 
1018 	while ((dst = *pprev) != NULL) {
1019 		if (!atomic_read(&dst->__refcnt)) {
1020 			*pprev = dst->next;
1021 			dst_free(dst);
1022 		} else {
1023 			pprev = &dst->next;
1024 			++more;
1025 		}
1026 	}
1027 
1028 	spin_unlock_bh(&icmp6_dst_lock);
1029 
1030 	return more;
1031 }
1032 
1033 static void icmp6_clean_all(int (*func)(struct rt6_info *rt, void *arg),
1034 			    void *arg)
1035 {
1036 	struct dst_entry *dst, **pprev;
1037 
1038 	spin_lock_bh(&icmp6_dst_lock);
1039 	pprev = &icmp6_dst_gc_list;
1040 	while ((dst = *pprev) != NULL) {
1041 		struct rt6_info *rt = (struct rt6_info *) dst;
1042 		if (func(rt, arg)) {
1043 			*pprev = dst->next;
1044 			dst_free(dst);
1045 		} else {
1046 			pprev = &dst->next;
1047 		}
1048 	}
1049 	spin_unlock_bh(&icmp6_dst_lock);
1050 }
1051 
1052 static int ip6_dst_gc(struct dst_ops *ops)
1053 {
1054 	unsigned long now = jiffies;
1055 	struct net *net = container_of(ops, struct net, ipv6.ip6_dst_ops);
1056 	int rt_min_interval = net->ipv6.sysctl.ip6_rt_gc_min_interval;
1057 	int rt_max_size = net->ipv6.sysctl.ip6_rt_max_size;
1058 	int rt_elasticity = net->ipv6.sysctl.ip6_rt_gc_elasticity;
1059 	int rt_gc_timeout = net->ipv6.sysctl.ip6_rt_gc_timeout;
1060 	unsigned long rt_last_gc = net->ipv6.ip6_rt_last_gc;
1061 
1062 	if (time_after(rt_last_gc + rt_min_interval, now) &&
1063 	    atomic_read(&ops->entries) <= rt_max_size)
1064 		goto out;
1065 
1066 	net->ipv6.ip6_rt_gc_expire++;
1067 	fib6_run_gc(net->ipv6.ip6_rt_gc_expire, net);
1068 	net->ipv6.ip6_rt_last_gc = now;
1069 	if (atomic_read(&ops->entries) < ops->gc_thresh)
1070 		net->ipv6.ip6_rt_gc_expire = rt_gc_timeout>>1;
1071 out:
1072 	net->ipv6.ip6_rt_gc_expire -= net->ipv6.ip6_rt_gc_expire>>rt_elasticity;
1073 	return (atomic_read(&ops->entries) > rt_max_size);
1074 }
1075 
1076 /* Clean host part of a prefix. Not necessary in radix tree,
1077    but results in cleaner routing tables.
1078 
1079    Remove it only when all the things will work!
1080  */
1081 
1082 static int ipv6_get_mtu(struct net_device *dev)
1083 {
1084 	int mtu = IPV6_MIN_MTU;
1085 	struct inet6_dev *idev;
1086 
1087 	rcu_read_lock();
1088 	idev = __in6_dev_get(dev);
1089 	if (idev)
1090 		mtu = idev->cnf.mtu6;
1091 	rcu_read_unlock();
1092 	return mtu;
1093 }
1094 
1095 int ip6_dst_hoplimit(struct dst_entry *dst)
1096 {
1097 	int hoplimit = dst_metric(dst, RTAX_HOPLIMIT);
1098 	if (hoplimit < 0) {
1099 		struct net_device *dev = dst->dev;
1100 		struct inet6_dev *idev;
1101 
1102 		rcu_read_lock();
1103 		idev = __in6_dev_get(dev);
1104 		if (idev)
1105 			hoplimit = idev->cnf.hop_limit;
1106 		else
1107 			hoplimit = dev_net(dev)->ipv6.devconf_all->hop_limit;
1108 		rcu_read_unlock();
1109 	}
1110 	return hoplimit;
1111 }
1112 
1113 /*
1114  *
1115  */
1116 
1117 int ip6_route_add(struct fib6_config *cfg)
1118 {
1119 	int err;
1120 	struct net *net = cfg->fc_nlinfo.nl_net;
1121 	struct rt6_info *rt = NULL;
1122 	struct net_device *dev = NULL;
1123 	struct inet6_dev *idev = NULL;
1124 	struct fib6_table *table;
1125 	int addr_type;
1126 
1127 	if (cfg->fc_dst_len > 128 || cfg->fc_src_len > 128)
1128 		return -EINVAL;
1129 #ifndef CONFIG_IPV6_SUBTREES
1130 	if (cfg->fc_src_len)
1131 		return -EINVAL;
1132 #endif
1133 	if (cfg->fc_ifindex) {
1134 		err = -ENODEV;
1135 		dev = dev_get_by_index(net, cfg->fc_ifindex);
1136 		if (!dev)
1137 			goto out;
1138 		idev = in6_dev_get(dev);
1139 		if (!idev)
1140 			goto out;
1141 	}
1142 
1143 	if (cfg->fc_metric == 0)
1144 		cfg->fc_metric = IP6_RT_PRIO_USER;
1145 
1146 	table = fib6_new_table(net, cfg->fc_table);
1147 	if (table == NULL) {
1148 		err = -ENOBUFS;
1149 		goto out;
1150 	}
1151 
1152 	rt = ip6_dst_alloc(&net->ipv6.ip6_dst_ops);
1153 
1154 	if (rt == NULL) {
1155 		err = -ENOMEM;
1156 		goto out;
1157 	}
1158 
1159 	rt->dst.obsolete = -1;
1160 	rt->rt6i_expires = (cfg->fc_flags & RTF_EXPIRES) ?
1161 				jiffies + clock_t_to_jiffies(cfg->fc_expires) :
1162 				0;
1163 
1164 	if (cfg->fc_protocol == RTPROT_UNSPEC)
1165 		cfg->fc_protocol = RTPROT_BOOT;
1166 	rt->rt6i_protocol = cfg->fc_protocol;
1167 
1168 	addr_type = ipv6_addr_type(&cfg->fc_dst);
1169 
1170 	if (addr_type & IPV6_ADDR_MULTICAST)
1171 		rt->dst.input = ip6_mc_input;
1172 	else
1173 		rt->dst.input = ip6_forward;
1174 
1175 	rt->dst.output = ip6_output;
1176 
1177 	ipv6_addr_prefix(&rt->rt6i_dst.addr, &cfg->fc_dst, cfg->fc_dst_len);
1178 	rt->rt6i_dst.plen = cfg->fc_dst_len;
1179 	if (rt->rt6i_dst.plen == 128)
1180 	       rt->dst.flags = DST_HOST;
1181 
1182 #ifdef CONFIG_IPV6_SUBTREES
1183 	ipv6_addr_prefix(&rt->rt6i_src.addr, &cfg->fc_src, cfg->fc_src_len);
1184 	rt->rt6i_src.plen = cfg->fc_src_len;
1185 #endif
1186 
1187 	rt->rt6i_metric = cfg->fc_metric;
1188 
1189 	/* We cannot add true routes via loopback here,
1190 	   they would result in kernel looping; promote them to reject routes
1191 	 */
1192 	if ((cfg->fc_flags & RTF_REJECT) ||
1193 	    (dev && (dev->flags&IFF_LOOPBACK) && !(addr_type&IPV6_ADDR_LOOPBACK))) {
1194 		/* hold loopback dev/idev if we haven't done so. */
1195 		if (dev != net->loopback_dev) {
1196 			if (dev) {
1197 				dev_put(dev);
1198 				in6_dev_put(idev);
1199 			}
1200 			dev = net->loopback_dev;
1201 			dev_hold(dev);
1202 			idev = in6_dev_get(dev);
1203 			if (!idev) {
1204 				err = -ENODEV;
1205 				goto out;
1206 			}
1207 		}
1208 		rt->dst.output = ip6_pkt_discard_out;
1209 		rt->dst.input = ip6_pkt_discard;
1210 		rt->dst.error = -ENETUNREACH;
1211 		rt->rt6i_flags = RTF_REJECT|RTF_NONEXTHOP;
1212 		goto install_route;
1213 	}
1214 
1215 	if (cfg->fc_flags & RTF_GATEWAY) {
1216 		struct in6_addr *gw_addr;
1217 		int gwa_type;
1218 
1219 		gw_addr = &cfg->fc_gateway;
1220 		ipv6_addr_copy(&rt->rt6i_gateway, gw_addr);
1221 		gwa_type = ipv6_addr_type(gw_addr);
1222 
1223 		if (gwa_type != (IPV6_ADDR_LINKLOCAL|IPV6_ADDR_UNICAST)) {
1224 			struct rt6_info *grt;
1225 
1226 			/* IPv6 strictly inhibits using not link-local
1227 			   addresses as nexthop address.
1228 			   Otherwise, router will not able to send redirects.
1229 			   It is very good, but in some (rare!) circumstances
1230 			   (SIT, PtP, NBMA NOARP links) it is handy to allow
1231 			   some exceptions. --ANK
1232 			 */
1233 			err = -EINVAL;
1234 			if (!(gwa_type&IPV6_ADDR_UNICAST))
1235 				goto out;
1236 
1237 			grt = rt6_lookup(net, gw_addr, NULL, cfg->fc_ifindex, 1);
1238 
1239 			err = -EHOSTUNREACH;
1240 			if (grt == NULL)
1241 				goto out;
1242 			if (dev) {
1243 				if (dev != grt->rt6i_dev) {
1244 					dst_release(&grt->dst);
1245 					goto out;
1246 				}
1247 			} else {
1248 				dev = grt->rt6i_dev;
1249 				idev = grt->rt6i_idev;
1250 				dev_hold(dev);
1251 				in6_dev_hold(grt->rt6i_idev);
1252 			}
1253 			if (!(grt->rt6i_flags&RTF_GATEWAY))
1254 				err = 0;
1255 			dst_release(&grt->dst);
1256 
1257 			if (err)
1258 				goto out;
1259 		}
1260 		err = -EINVAL;
1261 		if (dev == NULL || (dev->flags&IFF_LOOPBACK))
1262 			goto out;
1263 	}
1264 
1265 	err = -ENODEV;
1266 	if (dev == NULL)
1267 		goto out;
1268 
1269 	if (cfg->fc_flags & (RTF_GATEWAY | RTF_NONEXTHOP)) {
1270 		rt->rt6i_nexthop = __neigh_lookup_errno(&nd_tbl, &rt->rt6i_gateway, dev);
1271 		if (IS_ERR(rt->rt6i_nexthop)) {
1272 			err = PTR_ERR(rt->rt6i_nexthop);
1273 			rt->rt6i_nexthop = NULL;
1274 			goto out;
1275 		}
1276 	}
1277 
1278 	rt->rt6i_flags = cfg->fc_flags;
1279 
1280 install_route:
1281 	if (cfg->fc_mx) {
1282 		struct nlattr *nla;
1283 		int remaining;
1284 
1285 		nla_for_each_attr(nla, cfg->fc_mx, cfg->fc_mx_len, remaining) {
1286 			int type = nla_type(nla);
1287 
1288 			if (type) {
1289 				if (type > RTAX_MAX) {
1290 					err = -EINVAL;
1291 					goto out;
1292 				}
1293 
1294 				rt->dst.metrics[type - 1] = nla_get_u32(nla);
1295 			}
1296 		}
1297 	}
1298 
1299 	if (dst_metric(&rt->dst, RTAX_HOPLIMIT) == 0)
1300 		rt->dst.metrics[RTAX_HOPLIMIT-1] = -1;
1301 	if (!dst_mtu(&rt->dst))
1302 		rt->dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(dev);
1303 	if (!dst_metric(&rt->dst, RTAX_ADVMSS))
1304 		rt->dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(net, dst_mtu(&rt->dst));
1305 	rt->dst.dev = dev;
1306 	rt->rt6i_idev = idev;
1307 	rt->rt6i_table = table;
1308 
1309 	cfg->fc_nlinfo.nl_net = dev_net(dev);
1310 
1311 	return __ip6_ins_rt(rt, &cfg->fc_nlinfo);
1312 
1313 out:
1314 	if (dev)
1315 		dev_put(dev);
1316 	if (idev)
1317 		in6_dev_put(idev);
1318 	if (rt)
1319 		dst_free(&rt->dst);
1320 	return err;
1321 }
1322 
1323 static int __ip6_del_rt(struct rt6_info *rt, struct nl_info *info)
1324 {
1325 	int err;
1326 	struct fib6_table *table;
1327 	struct net *net = dev_net(rt->rt6i_dev);
1328 
1329 	if (rt == net->ipv6.ip6_null_entry)
1330 		return -ENOENT;
1331 
1332 	table = rt->rt6i_table;
1333 	write_lock_bh(&table->tb6_lock);
1334 
1335 	err = fib6_del(rt, info);
1336 	dst_release(&rt->dst);
1337 
1338 	write_unlock_bh(&table->tb6_lock);
1339 
1340 	return err;
1341 }
1342 
1343 int ip6_del_rt(struct rt6_info *rt)
1344 {
1345 	struct nl_info info = {
1346 		.nl_net = dev_net(rt->rt6i_dev),
1347 	};
1348 	return __ip6_del_rt(rt, &info);
1349 }
1350 
1351 static int ip6_route_del(struct fib6_config *cfg)
1352 {
1353 	struct fib6_table *table;
1354 	struct fib6_node *fn;
1355 	struct rt6_info *rt;
1356 	int err = -ESRCH;
1357 
1358 	table = fib6_get_table(cfg->fc_nlinfo.nl_net, cfg->fc_table);
1359 	if (table == NULL)
1360 		return err;
1361 
1362 	read_lock_bh(&table->tb6_lock);
1363 
1364 	fn = fib6_locate(&table->tb6_root,
1365 			 &cfg->fc_dst, cfg->fc_dst_len,
1366 			 &cfg->fc_src, cfg->fc_src_len);
1367 
1368 	if (fn) {
1369 		for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
1370 			if (cfg->fc_ifindex &&
1371 			    (rt->rt6i_dev == NULL ||
1372 			     rt->rt6i_dev->ifindex != cfg->fc_ifindex))
1373 				continue;
1374 			if (cfg->fc_flags & RTF_GATEWAY &&
1375 			    !ipv6_addr_equal(&cfg->fc_gateway, &rt->rt6i_gateway))
1376 				continue;
1377 			if (cfg->fc_metric && cfg->fc_metric != rt->rt6i_metric)
1378 				continue;
1379 			dst_hold(&rt->dst);
1380 			read_unlock_bh(&table->tb6_lock);
1381 
1382 			return __ip6_del_rt(rt, &cfg->fc_nlinfo);
1383 		}
1384 	}
1385 	read_unlock_bh(&table->tb6_lock);
1386 
1387 	return err;
1388 }
1389 
1390 /*
1391  *	Handle redirects
1392  */
1393 struct ip6rd_flowi {
1394 	struct flowi fl;
1395 	struct in6_addr gateway;
1396 };
1397 
1398 static struct rt6_info *__ip6_route_redirect(struct net *net,
1399 					     struct fib6_table *table,
1400 					     struct flowi *fl,
1401 					     int flags)
1402 {
1403 	struct ip6rd_flowi *rdfl = (struct ip6rd_flowi *)fl;
1404 	struct rt6_info *rt;
1405 	struct fib6_node *fn;
1406 
1407 	/*
1408 	 * Get the "current" route for this destination and
1409 	 * check if the redirect has come from approriate router.
1410 	 *
1411 	 * RFC 2461 specifies that redirects should only be
1412 	 * accepted if they come from the nexthop to the target.
1413 	 * Due to the way the routes are chosen, this notion
1414 	 * is a bit fuzzy and one might need to check all possible
1415 	 * routes.
1416 	 */
1417 
1418 	read_lock_bh(&table->tb6_lock);
1419 	fn = fib6_lookup(&table->tb6_root, &fl->fl6_dst, &fl->fl6_src);
1420 restart:
1421 	for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
1422 		/*
1423 		 * Current route is on-link; redirect is always invalid.
1424 		 *
1425 		 * Seems, previous statement is not true. It could
1426 		 * be node, which looks for us as on-link (f.e. proxy ndisc)
1427 		 * But then router serving it might decide, that we should
1428 		 * know truth 8)8) --ANK (980726).
1429 		 */
1430 		if (rt6_check_expired(rt))
1431 			continue;
1432 		if (!(rt->rt6i_flags & RTF_GATEWAY))
1433 			continue;
1434 		if (fl->oif != rt->rt6i_dev->ifindex)
1435 			continue;
1436 		if (!ipv6_addr_equal(&rdfl->gateway, &rt->rt6i_gateway))
1437 			continue;
1438 		break;
1439 	}
1440 
1441 	if (!rt)
1442 		rt = net->ipv6.ip6_null_entry;
1443 	BACKTRACK(net, &fl->fl6_src);
1444 out:
1445 	dst_hold(&rt->dst);
1446 
1447 	read_unlock_bh(&table->tb6_lock);
1448 
1449 	return rt;
1450 };
1451 
1452 static struct rt6_info *ip6_route_redirect(struct in6_addr *dest,
1453 					   struct in6_addr *src,
1454 					   struct in6_addr *gateway,
1455 					   struct net_device *dev)
1456 {
1457 	int flags = RT6_LOOKUP_F_HAS_SADDR;
1458 	struct net *net = dev_net(dev);
1459 	struct ip6rd_flowi rdfl = {
1460 		.fl = {
1461 			.oif = dev->ifindex,
1462 			.nl_u = {
1463 				.ip6_u = {
1464 					.daddr = *dest,
1465 					.saddr = *src,
1466 				},
1467 			},
1468 		},
1469 	};
1470 
1471 	ipv6_addr_copy(&rdfl.gateway, gateway);
1472 
1473 	if (rt6_need_strict(dest))
1474 		flags |= RT6_LOOKUP_F_IFACE;
1475 
1476 	return (struct rt6_info *)fib6_rule_lookup(net, (struct flowi *)&rdfl,
1477 						   flags, __ip6_route_redirect);
1478 }
1479 
1480 void rt6_redirect(struct in6_addr *dest, struct in6_addr *src,
1481 		  struct in6_addr *saddr,
1482 		  struct neighbour *neigh, u8 *lladdr, int on_link)
1483 {
1484 	struct rt6_info *rt, *nrt = NULL;
1485 	struct netevent_redirect netevent;
1486 	struct net *net = dev_net(neigh->dev);
1487 
1488 	rt = ip6_route_redirect(dest, src, saddr, neigh->dev);
1489 
1490 	if (rt == net->ipv6.ip6_null_entry) {
1491 		if (net_ratelimit())
1492 			printk(KERN_DEBUG "rt6_redirect: source isn't a valid nexthop "
1493 			       "for redirect target\n");
1494 		goto out;
1495 	}
1496 
1497 	/*
1498 	 *	We have finally decided to accept it.
1499 	 */
1500 
1501 	neigh_update(neigh, lladdr, NUD_STALE,
1502 		     NEIGH_UPDATE_F_WEAK_OVERRIDE|
1503 		     NEIGH_UPDATE_F_OVERRIDE|
1504 		     (on_link ? 0 : (NEIGH_UPDATE_F_OVERRIDE_ISROUTER|
1505 				     NEIGH_UPDATE_F_ISROUTER))
1506 		     );
1507 
1508 	/*
1509 	 * Redirect received -> path was valid.
1510 	 * Look, redirects are sent only in response to data packets,
1511 	 * so that this nexthop apparently is reachable. --ANK
1512 	 */
1513 	dst_confirm(&rt->dst);
1514 
1515 	/* Duplicate redirect: silently ignore. */
1516 	if (neigh == rt->dst.neighbour)
1517 		goto out;
1518 
1519 	nrt = ip6_rt_copy(rt);
1520 	if (nrt == NULL)
1521 		goto out;
1522 
1523 	nrt->rt6i_flags = RTF_GATEWAY|RTF_UP|RTF_DYNAMIC|RTF_CACHE;
1524 	if (on_link)
1525 		nrt->rt6i_flags &= ~RTF_GATEWAY;
1526 
1527 	ipv6_addr_copy(&nrt->rt6i_dst.addr, dest);
1528 	nrt->rt6i_dst.plen = 128;
1529 	nrt->dst.flags |= DST_HOST;
1530 
1531 	ipv6_addr_copy(&nrt->rt6i_gateway, (struct in6_addr*)neigh->primary_key);
1532 	nrt->rt6i_nexthop = neigh_clone(neigh);
1533 	/* Reset pmtu, it may be better */
1534 	nrt->dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(neigh->dev);
1535 	nrt->dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(dev_net(neigh->dev),
1536 							dst_mtu(&nrt->dst));
1537 
1538 	if (ip6_ins_rt(nrt))
1539 		goto out;
1540 
1541 	netevent.old = &rt->dst;
1542 	netevent.new = &nrt->dst;
1543 	call_netevent_notifiers(NETEVENT_REDIRECT, &netevent);
1544 
1545 	if (rt->rt6i_flags&RTF_CACHE) {
1546 		ip6_del_rt(rt);
1547 		return;
1548 	}
1549 
1550 out:
1551 	dst_release(&rt->dst);
1552 }
1553 
1554 /*
1555  *	Handle ICMP "packet too big" messages
1556  *	i.e. Path MTU discovery
1557  */
1558 
1559 void rt6_pmtu_discovery(struct in6_addr *daddr, struct in6_addr *saddr,
1560 			struct net_device *dev, u32 pmtu)
1561 {
1562 	struct rt6_info *rt, *nrt;
1563 	struct net *net = dev_net(dev);
1564 	int allfrag = 0;
1565 
1566 	rt = rt6_lookup(net, daddr, saddr, dev->ifindex, 0);
1567 	if (rt == NULL)
1568 		return;
1569 
1570 	if (pmtu >= dst_mtu(&rt->dst))
1571 		goto out;
1572 
1573 	if (pmtu < IPV6_MIN_MTU) {
1574 		/*
1575 		 * According to RFC2460, PMTU is set to the IPv6 Minimum Link
1576 		 * MTU (1280) and a fragment header should always be included
1577 		 * after a node receiving Too Big message reporting PMTU is
1578 		 * less than the IPv6 Minimum Link MTU.
1579 		 */
1580 		pmtu = IPV6_MIN_MTU;
1581 		allfrag = 1;
1582 	}
1583 
1584 	/* New mtu received -> path was valid.
1585 	   They are sent only in response to data packets,
1586 	   so that this nexthop apparently is reachable. --ANK
1587 	 */
1588 	dst_confirm(&rt->dst);
1589 
1590 	/* Host route. If it is static, it would be better
1591 	   not to override it, but add new one, so that
1592 	   when cache entry will expire old pmtu
1593 	   would return automatically.
1594 	 */
1595 	if (rt->rt6i_flags & RTF_CACHE) {
1596 		rt->dst.metrics[RTAX_MTU-1] = pmtu;
1597 		if (allfrag)
1598 			rt->dst.metrics[RTAX_FEATURES-1] |= RTAX_FEATURE_ALLFRAG;
1599 		dst_set_expires(&rt->dst, net->ipv6.sysctl.ip6_rt_mtu_expires);
1600 		rt->rt6i_flags |= RTF_MODIFIED|RTF_EXPIRES;
1601 		goto out;
1602 	}
1603 
1604 	/* Network route.
1605 	   Two cases are possible:
1606 	   1. It is connected route. Action: COW
1607 	   2. It is gatewayed route or NONEXTHOP route. Action: clone it.
1608 	 */
1609 	if (!rt->rt6i_nexthop && !(rt->rt6i_flags & RTF_NONEXTHOP))
1610 		nrt = rt6_alloc_cow(rt, daddr, saddr);
1611 	else
1612 		nrt = rt6_alloc_clone(rt, daddr);
1613 
1614 	if (nrt) {
1615 		nrt->dst.metrics[RTAX_MTU-1] = pmtu;
1616 		if (allfrag)
1617 			nrt->dst.metrics[RTAX_FEATURES-1] |= RTAX_FEATURE_ALLFRAG;
1618 
1619 		/* According to RFC 1981, detecting PMTU increase shouldn't be
1620 		 * happened within 5 mins, the recommended timer is 10 mins.
1621 		 * Here this route expiration time is set to ip6_rt_mtu_expires
1622 		 * which is 10 mins. After 10 mins the decreased pmtu is expired
1623 		 * and detecting PMTU increase will be automatically happened.
1624 		 */
1625 		dst_set_expires(&nrt->dst, net->ipv6.sysctl.ip6_rt_mtu_expires);
1626 		nrt->rt6i_flags |= RTF_DYNAMIC|RTF_EXPIRES;
1627 
1628 		ip6_ins_rt(nrt);
1629 	}
1630 out:
1631 	dst_release(&rt->dst);
1632 }
1633 
1634 /*
1635  *	Misc support functions
1636  */
1637 
1638 static struct rt6_info * ip6_rt_copy(struct rt6_info *ort)
1639 {
1640 	struct net *net = dev_net(ort->rt6i_dev);
1641 	struct rt6_info *rt = ip6_dst_alloc(&net->ipv6.ip6_dst_ops);
1642 
1643 	if (rt) {
1644 		rt->dst.input = ort->dst.input;
1645 		rt->dst.output = ort->dst.output;
1646 
1647 		memcpy(rt->dst.metrics, ort->dst.metrics, RTAX_MAX*sizeof(u32));
1648 		rt->dst.error = ort->dst.error;
1649 		rt->dst.dev = ort->dst.dev;
1650 		if (rt->dst.dev)
1651 			dev_hold(rt->dst.dev);
1652 		rt->rt6i_idev = ort->rt6i_idev;
1653 		if (rt->rt6i_idev)
1654 			in6_dev_hold(rt->rt6i_idev);
1655 		rt->dst.lastuse = jiffies;
1656 		rt->rt6i_expires = 0;
1657 
1658 		ipv6_addr_copy(&rt->rt6i_gateway, &ort->rt6i_gateway);
1659 		rt->rt6i_flags = ort->rt6i_flags & ~RTF_EXPIRES;
1660 		rt->rt6i_metric = 0;
1661 
1662 		memcpy(&rt->rt6i_dst, &ort->rt6i_dst, sizeof(struct rt6key));
1663 #ifdef CONFIG_IPV6_SUBTREES
1664 		memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
1665 #endif
1666 		rt->rt6i_table = ort->rt6i_table;
1667 	}
1668 	return rt;
1669 }
1670 
1671 #ifdef CONFIG_IPV6_ROUTE_INFO
1672 static struct rt6_info *rt6_get_route_info(struct net *net,
1673 					   struct in6_addr *prefix, int prefixlen,
1674 					   struct in6_addr *gwaddr, int ifindex)
1675 {
1676 	struct fib6_node *fn;
1677 	struct rt6_info *rt = NULL;
1678 	struct fib6_table *table;
1679 
1680 	table = fib6_get_table(net, RT6_TABLE_INFO);
1681 	if (table == NULL)
1682 		return NULL;
1683 
1684 	write_lock_bh(&table->tb6_lock);
1685 	fn = fib6_locate(&table->tb6_root, prefix ,prefixlen, NULL, 0);
1686 	if (!fn)
1687 		goto out;
1688 
1689 	for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
1690 		if (rt->rt6i_dev->ifindex != ifindex)
1691 			continue;
1692 		if ((rt->rt6i_flags & (RTF_ROUTEINFO|RTF_GATEWAY)) != (RTF_ROUTEINFO|RTF_GATEWAY))
1693 			continue;
1694 		if (!ipv6_addr_equal(&rt->rt6i_gateway, gwaddr))
1695 			continue;
1696 		dst_hold(&rt->dst);
1697 		break;
1698 	}
1699 out:
1700 	write_unlock_bh(&table->tb6_lock);
1701 	return rt;
1702 }
1703 
1704 static struct rt6_info *rt6_add_route_info(struct net *net,
1705 					   struct in6_addr *prefix, int prefixlen,
1706 					   struct in6_addr *gwaddr, int ifindex,
1707 					   unsigned pref)
1708 {
1709 	struct fib6_config cfg = {
1710 		.fc_table	= RT6_TABLE_INFO,
1711 		.fc_metric	= IP6_RT_PRIO_USER,
1712 		.fc_ifindex	= ifindex,
1713 		.fc_dst_len	= prefixlen,
1714 		.fc_flags	= RTF_GATEWAY | RTF_ADDRCONF | RTF_ROUTEINFO |
1715 				  RTF_UP | RTF_PREF(pref),
1716 		.fc_nlinfo.pid = 0,
1717 		.fc_nlinfo.nlh = NULL,
1718 		.fc_nlinfo.nl_net = net,
1719 	};
1720 
1721 	ipv6_addr_copy(&cfg.fc_dst, prefix);
1722 	ipv6_addr_copy(&cfg.fc_gateway, gwaddr);
1723 
1724 	/* We should treat it as a default route if prefix length is 0. */
1725 	if (!prefixlen)
1726 		cfg.fc_flags |= RTF_DEFAULT;
1727 
1728 	ip6_route_add(&cfg);
1729 
1730 	return rt6_get_route_info(net, prefix, prefixlen, gwaddr, ifindex);
1731 }
1732 #endif
1733 
1734 struct rt6_info *rt6_get_dflt_router(struct in6_addr *addr, struct net_device *dev)
1735 {
1736 	struct rt6_info *rt;
1737 	struct fib6_table *table;
1738 
1739 	table = fib6_get_table(dev_net(dev), RT6_TABLE_DFLT);
1740 	if (table == NULL)
1741 		return NULL;
1742 
1743 	write_lock_bh(&table->tb6_lock);
1744 	for (rt = table->tb6_root.leaf; rt; rt=rt->dst.rt6_next) {
1745 		if (dev == rt->rt6i_dev &&
1746 		    ((rt->rt6i_flags & (RTF_ADDRCONF | RTF_DEFAULT)) == (RTF_ADDRCONF | RTF_DEFAULT)) &&
1747 		    ipv6_addr_equal(&rt->rt6i_gateway, addr))
1748 			break;
1749 	}
1750 	if (rt)
1751 		dst_hold(&rt->dst);
1752 	write_unlock_bh(&table->tb6_lock);
1753 	return rt;
1754 }
1755 
1756 struct rt6_info *rt6_add_dflt_router(struct in6_addr *gwaddr,
1757 				     struct net_device *dev,
1758 				     unsigned int pref)
1759 {
1760 	struct fib6_config cfg = {
1761 		.fc_table	= RT6_TABLE_DFLT,
1762 		.fc_metric	= IP6_RT_PRIO_USER,
1763 		.fc_ifindex	= dev->ifindex,
1764 		.fc_flags	= RTF_GATEWAY | RTF_ADDRCONF | RTF_DEFAULT |
1765 				  RTF_UP | RTF_EXPIRES | RTF_PREF(pref),
1766 		.fc_nlinfo.pid = 0,
1767 		.fc_nlinfo.nlh = NULL,
1768 		.fc_nlinfo.nl_net = dev_net(dev),
1769 	};
1770 
1771 	ipv6_addr_copy(&cfg.fc_gateway, gwaddr);
1772 
1773 	ip6_route_add(&cfg);
1774 
1775 	return rt6_get_dflt_router(gwaddr, dev);
1776 }
1777 
1778 void rt6_purge_dflt_routers(struct net *net)
1779 {
1780 	struct rt6_info *rt;
1781 	struct fib6_table *table;
1782 
1783 	/* NOTE: Keep consistent with rt6_get_dflt_router */
1784 	table = fib6_get_table(net, RT6_TABLE_DFLT);
1785 	if (table == NULL)
1786 		return;
1787 
1788 restart:
1789 	read_lock_bh(&table->tb6_lock);
1790 	for (rt = table->tb6_root.leaf; rt; rt = rt->dst.rt6_next) {
1791 		if (rt->rt6i_flags & (RTF_DEFAULT | RTF_ADDRCONF)) {
1792 			dst_hold(&rt->dst);
1793 			read_unlock_bh(&table->tb6_lock);
1794 			ip6_del_rt(rt);
1795 			goto restart;
1796 		}
1797 	}
1798 	read_unlock_bh(&table->tb6_lock);
1799 }
1800 
1801 static void rtmsg_to_fib6_config(struct net *net,
1802 				 struct in6_rtmsg *rtmsg,
1803 				 struct fib6_config *cfg)
1804 {
1805 	memset(cfg, 0, sizeof(*cfg));
1806 
1807 	cfg->fc_table = RT6_TABLE_MAIN;
1808 	cfg->fc_ifindex = rtmsg->rtmsg_ifindex;
1809 	cfg->fc_metric = rtmsg->rtmsg_metric;
1810 	cfg->fc_expires = rtmsg->rtmsg_info;
1811 	cfg->fc_dst_len = rtmsg->rtmsg_dst_len;
1812 	cfg->fc_src_len = rtmsg->rtmsg_src_len;
1813 	cfg->fc_flags = rtmsg->rtmsg_flags;
1814 
1815 	cfg->fc_nlinfo.nl_net = net;
1816 
1817 	ipv6_addr_copy(&cfg->fc_dst, &rtmsg->rtmsg_dst);
1818 	ipv6_addr_copy(&cfg->fc_src, &rtmsg->rtmsg_src);
1819 	ipv6_addr_copy(&cfg->fc_gateway, &rtmsg->rtmsg_gateway);
1820 }
1821 
1822 int ipv6_route_ioctl(struct net *net, unsigned int cmd, void __user *arg)
1823 {
1824 	struct fib6_config cfg;
1825 	struct in6_rtmsg rtmsg;
1826 	int err;
1827 
1828 	switch(cmd) {
1829 	case SIOCADDRT:		/* Add a route */
1830 	case SIOCDELRT:		/* Delete a route */
1831 		if (!capable(CAP_NET_ADMIN))
1832 			return -EPERM;
1833 		err = copy_from_user(&rtmsg, arg,
1834 				     sizeof(struct in6_rtmsg));
1835 		if (err)
1836 			return -EFAULT;
1837 
1838 		rtmsg_to_fib6_config(net, &rtmsg, &cfg);
1839 
1840 		rtnl_lock();
1841 		switch (cmd) {
1842 		case SIOCADDRT:
1843 			err = ip6_route_add(&cfg);
1844 			break;
1845 		case SIOCDELRT:
1846 			err = ip6_route_del(&cfg);
1847 			break;
1848 		default:
1849 			err = -EINVAL;
1850 		}
1851 		rtnl_unlock();
1852 
1853 		return err;
1854 	}
1855 
1856 	return -EINVAL;
1857 }
1858 
1859 /*
1860  *	Drop the packet on the floor
1861  */
1862 
1863 static int ip6_pkt_drop(struct sk_buff *skb, u8 code, int ipstats_mib_noroutes)
1864 {
1865 	int type;
1866 	struct dst_entry *dst = skb_dst(skb);
1867 	switch (ipstats_mib_noroutes) {
1868 	case IPSTATS_MIB_INNOROUTES:
1869 		type = ipv6_addr_type(&ipv6_hdr(skb)->daddr);
1870 		if (type == IPV6_ADDR_ANY) {
1871 			IP6_INC_STATS(dev_net(dst->dev), ip6_dst_idev(dst),
1872 				      IPSTATS_MIB_INADDRERRORS);
1873 			break;
1874 		}
1875 		/* FALLTHROUGH */
1876 	case IPSTATS_MIB_OUTNOROUTES:
1877 		IP6_INC_STATS(dev_net(dst->dev), ip6_dst_idev(dst),
1878 			      ipstats_mib_noroutes);
1879 		break;
1880 	}
1881 	icmpv6_send(skb, ICMPV6_DEST_UNREACH, code, 0);
1882 	kfree_skb(skb);
1883 	return 0;
1884 }
1885 
1886 static int ip6_pkt_discard(struct sk_buff *skb)
1887 {
1888 	return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_INNOROUTES);
1889 }
1890 
1891 static int ip6_pkt_discard_out(struct sk_buff *skb)
1892 {
1893 	skb->dev = skb_dst(skb)->dev;
1894 	return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_OUTNOROUTES);
1895 }
1896 
1897 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
1898 
1899 static int ip6_pkt_prohibit(struct sk_buff *skb)
1900 {
1901 	return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_INNOROUTES);
1902 }
1903 
1904 static int ip6_pkt_prohibit_out(struct sk_buff *skb)
1905 {
1906 	skb->dev = skb_dst(skb)->dev;
1907 	return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_OUTNOROUTES);
1908 }
1909 
1910 #endif
1911 
1912 /*
1913  *	Allocate a dst for local (unicast / anycast) address.
1914  */
1915 
1916 struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev,
1917 				    const struct in6_addr *addr,
1918 				    int anycast)
1919 {
1920 	struct net *net = dev_net(idev->dev);
1921 	struct rt6_info *rt = ip6_dst_alloc(&net->ipv6.ip6_dst_ops);
1922 	struct neighbour *neigh;
1923 
1924 	if (rt == NULL)
1925 		return ERR_PTR(-ENOMEM);
1926 
1927 	dev_hold(net->loopback_dev);
1928 	in6_dev_hold(idev);
1929 
1930 	rt->dst.flags = DST_HOST;
1931 	rt->dst.input = ip6_input;
1932 	rt->dst.output = ip6_output;
1933 	rt->rt6i_dev = net->loopback_dev;
1934 	rt->rt6i_idev = idev;
1935 	rt->dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(rt->rt6i_dev);
1936 	rt->dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(net, dst_mtu(&rt->dst));
1937 	rt->dst.metrics[RTAX_HOPLIMIT-1] = -1;
1938 	rt->dst.obsolete = -1;
1939 
1940 	rt->rt6i_flags = RTF_UP | RTF_NONEXTHOP;
1941 	if (anycast)
1942 		rt->rt6i_flags |= RTF_ANYCAST;
1943 	else
1944 		rt->rt6i_flags |= RTF_LOCAL;
1945 	neigh = ndisc_get_neigh(rt->rt6i_dev, &rt->rt6i_gateway);
1946 	if (IS_ERR(neigh)) {
1947 		dst_free(&rt->dst);
1948 
1949 		/* We are casting this because that is the return
1950 		 * value type.  But an errno encoded pointer is the
1951 		 * same regardless of the underlying pointer type,
1952 		 * and that's what we are returning.  So this is OK.
1953 		 */
1954 		return (struct rt6_info *) neigh;
1955 	}
1956 	rt->rt6i_nexthop = neigh;
1957 
1958 	ipv6_addr_copy(&rt->rt6i_dst.addr, addr);
1959 	rt->rt6i_dst.plen = 128;
1960 	rt->rt6i_table = fib6_get_table(net, RT6_TABLE_LOCAL);
1961 
1962 	atomic_set(&rt->dst.__refcnt, 1);
1963 
1964 	return rt;
1965 }
1966 
1967 struct arg_dev_net {
1968 	struct net_device *dev;
1969 	struct net *net;
1970 };
1971 
1972 static int fib6_ifdown(struct rt6_info *rt, void *arg)
1973 {
1974 	struct net_device *dev = ((struct arg_dev_net *)arg)->dev;
1975 	struct net *net = ((struct arg_dev_net *)arg)->net;
1976 
1977 	if (((void *)rt->rt6i_dev == dev || dev == NULL) &&
1978 	    rt != net->ipv6.ip6_null_entry) {
1979 		RT6_TRACE("deleted by ifdown %p\n", rt);
1980 		return -1;
1981 	}
1982 	return 0;
1983 }
1984 
1985 void rt6_ifdown(struct net *net, struct net_device *dev)
1986 {
1987 	struct arg_dev_net adn = {
1988 		.dev = dev,
1989 		.net = net,
1990 	};
1991 
1992 	fib6_clean_all(net, fib6_ifdown, 0, &adn);
1993 	icmp6_clean_all(fib6_ifdown, &adn);
1994 }
1995 
1996 struct rt6_mtu_change_arg
1997 {
1998 	struct net_device *dev;
1999 	unsigned mtu;
2000 };
2001 
2002 static int rt6_mtu_change_route(struct rt6_info *rt, void *p_arg)
2003 {
2004 	struct rt6_mtu_change_arg *arg = (struct rt6_mtu_change_arg *) p_arg;
2005 	struct inet6_dev *idev;
2006 	struct net *net = dev_net(arg->dev);
2007 
2008 	/* In IPv6 pmtu discovery is not optional,
2009 	   so that RTAX_MTU lock cannot disable it.
2010 	   We still use this lock to block changes
2011 	   caused by addrconf/ndisc.
2012 	*/
2013 
2014 	idev = __in6_dev_get(arg->dev);
2015 	if (idev == NULL)
2016 		return 0;
2017 
2018 	/* For administrative MTU increase, there is no way to discover
2019 	   IPv6 PMTU increase, so PMTU increase should be updated here.
2020 	   Since RFC 1981 doesn't include administrative MTU increase
2021 	   update PMTU increase is a MUST. (i.e. jumbo frame)
2022 	 */
2023 	/*
2024 	   If new MTU is less than route PMTU, this new MTU will be the
2025 	   lowest MTU in the path, update the route PMTU to reflect PMTU
2026 	   decreases; if new MTU is greater than route PMTU, and the
2027 	   old MTU is the lowest MTU in the path, update the route PMTU
2028 	   to reflect the increase. In this case if the other nodes' MTU
2029 	   also have the lowest MTU, TOO BIG MESSAGE will be lead to
2030 	   PMTU discouvery.
2031 	 */
2032 	if (rt->rt6i_dev == arg->dev &&
2033 	    !dst_metric_locked(&rt->dst, RTAX_MTU) &&
2034 	    (dst_mtu(&rt->dst) >= arg->mtu ||
2035 	     (dst_mtu(&rt->dst) < arg->mtu &&
2036 	      dst_mtu(&rt->dst) == idev->cnf.mtu6))) {
2037 		rt->dst.metrics[RTAX_MTU-1] = arg->mtu;
2038 		rt->dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(net, arg->mtu);
2039 	}
2040 	return 0;
2041 }
2042 
2043 void rt6_mtu_change(struct net_device *dev, unsigned mtu)
2044 {
2045 	struct rt6_mtu_change_arg arg = {
2046 		.dev = dev,
2047 		.mtu = mtu,
2048 	};
2049 
2050 	fib6_clean_all(dev_net(dev), rt6_mtu_change_route, 0, &arg);
2051 }
2052 
2053 static const struct nla_policy rtm_ipv6_policy[RTA_MAX+1] = {
2054 	[RTA_GATEWAY]           = { .len = sizeof(struct in6_addr) },
2055 	[RTA_OIF]               = { .type = NLA_U32 },
2056 	[RTA_IIF]		= { .type = NLA_U32 },
2057 	[RTA_PRIORITY]          = { .type = NLA_U32 },
2058 	[RTA_METRICS]           = { .type = NLA_NESTED },
2059 };
2060 
2061 static int rtm_to_fib6_config(struct sk_buff *skb, struct nlmsghdr *nlh,
2062 			      struct fib6_config *cfg)
2063 {
2064 	struct rtmsg *rtm;
2065 	struct nlattr *tb[RTA_MAX+1];
2066 	int err;
2067 
2068 	err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy);
2069 	if (err < 0)
2070 		goto errout;
2071 
2072 	err = -EINVAL;
2073 	rtm = nlmsg_data(nlh);
2074 	memset(cfg, 0, sizeof(*cfg));
2075 
2076 	cfg->fc_table = rtm->rtm_table;
2077 	cfg->fc_dst_len = rtm->rtm_dst_len;
2078 	cfg->fc_src_len = rtm->rtm_src_len;
2079 	cfg->fc_flags = RTF_UP;
2080 	cfg->fc_protocol = rtm->rtm_protocol;
2081 
2082 	if (rtm->rtm_type == RTN_UNREACHABLE)
2083 		cfg->fc_flags |= RTF_REJECT;
2084 
2085 	cfg->fc_nlinfo.pid = NETLINK_CB(skb).pid;
2086 	cfg->fc_nlinfo.nlh = nlh;
2087 	cfg->fc_nlinfo.nl_net = sock_net(skb->sk);
2088 
2089 	if (tb[RTA_GATEWAY]) {
2090 		nla_memcpy(&cfg->fc_gateway, tb[RTA_GATEWAY], 16);
2091 		cfg->fc_flags |= RTF_GATEWAY;
2092 	}
2093 
2094 	if (tb[RTA_DST]) {
2095 		int plen = (rtm->rtm_dst_len + 7) >> 3;
2096 
2097 		if (nla_len(tb[RTA_DST]) < plen)
2098 			goto errout;
2099 
2100 		nla_memcpy(&cfg->fc_dst, tb[RTA_DST], plen);
2101 	}
2102 
2103 	if (tb[RTA_SRC]) {
2104 		int plen = (rtm->rtm_src_len + 7) >> 3;
2105 
2106 		if (nla_len(tb[RTA_SRC]) < plen)
2107 			goto errout;
2108 
2109 		nla_memcpy(&cfg->fc_src, tb[RTA_SRC], plen);
2110 	}
2111 
2112 	if (tb[RTA_OIF])
2113 		cfg->fc_ifindex = nla_get_u32(tb[RTA_OIF]);
2114 
2115 	if (tb[RTA_PRIORITY])
2116 		cfg->fc_metric = nla_get_u32(tb[RTA_PRIORITY]);
2117 
2118 	if (tb[RTA_METRICS]) {
2119 		cfg->fc_mx = nla_data(tb[RTA_METRICS]);
2120 		cfg->fc_mx_len = nla_len(tb[RTA_METRICS]);
2121 	}
2122 
2123 	if (tb[RTA_TABLE])
2124 		cfg->fc_table = nla_get_u32(tb[RTA_TABLE]);
2125 
2126 	err = 0;
2127 errout:
2128 	return err;
2129 }
2130 
2131 static int inet6_rtm_delroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
2132 {
2133 	struct fib6_config cfg;
2134 	int err;
2135 
2136 	err = rtm_to_fib6_config(skb, nlh, &cfg);
2137 	if (err < 0)
2138 		return err;
2139 
2140 	return ip6_route_del(&cfg);
2141 }
2142 
2143 static int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
2144 {
2145 	struct fib6_config cfg;
2146 	int err;
2147 
2148 	err = rtm_to_fib6_config(skb, nlh, &cfg);
2149 	if (err < 0)
2150 		return err;
2151 
2152 	return ip6_route_add(&cfg);
2153 }
2154 
2155 static inline size_t rt6_nlmsg_size(void)
2156 {
2157 	return NLMSG_ALIGN(sizeof(struct rtmsg))
2158 	       + nla_total_size(16) /* RTA_SRC */
2159 	       + nla_total_size(16) /* RTA_DST */
2160 	       + nla_total_size(16) /* RTA_GATEWAY */
2161 	       + nla_total_size(16) /* RTA_PREFSRC */
2162 	       + nla_total_size(4) /* RTA_TABLE */
2163 	       + nla_total_size(4) /* RTA_IIF */
2164 	       + nla_total_size(4) /* RTA_OIF */
2165 	       + nla_total_size(4) /* RTA_PRIORITY */
2166 	       + RTAX_MAX * nla_total_size(4) /* RTA_METRICS */
2167 	       + nla_total_size(sizeof(struct rta_cacheinfo));
2168 }
2169 
2170 static int rt6_fill_node(struct net *net,
2171 			 struct sk_buff *skb, struct rt6_info *rt,
2172 			 struct in6_addr *dst, struct in6_addr *src,
2173 			 int iif, int type, u32 pid, u32 seq,
2174 			 int prefix, int nowait, unsigned int flags)
2175 {
2176 	struct rtmsg *rtm;
2177 	struct nlmsghdr *nlh;
2178 	long expires;
2179 	u32 table;
2180 
2181 	if (prefix) {	/* user wants prefix routes only */
2182 		if (!(rt->rt6i_flags & RTF_PREFIX_RT)) {
2183 			/* success since this is not a prefix route */
2184 			return 1;
2185 		}
2186 	}
2187 
2188 	nlh = nlmsg_put(skb, pid, seq, type, sizeof(*rtm), flags);
2189 	if (nlh == NULL)
2190 		return -EMSGSIZE;
2191 
2192 	rtm = nlmsg_data(nlh);
2193 	rtm->rtm_family = AF_INET6;
2194 	rtm->rtm_dst_len = rt->rt6i_dst.plen;
2195 	rtm->rtm_src_len = rt->rt6i_src.plen;
2196 	rtm->rtm_tos = 0;
2197 	if (rt->rt6i_table)
2198 		table = rt->rt6i_table->tb6_id;
2199 	else
2200 		table = RT6_TABLE_UNSPEC;
2201 	rtm->rtm_table = table;
2202 	NLA_PUT_U32(skb, RTA_TABLE, table);
2203 	if (rt->rt6i_flags&RTF_REJECT)
2204 		rtm->rtm_type = RTN_UNREACHABLE;
2205 	else if (rt->rt6i_dev && (rt->rt6i_dev->flags&IFF_LOOPBACK))
2206 		rtm->rtm_type = RTN_LOCAL;
2207 	else
2208 		rtm->rtm_type = RTN_UNICAST;
2209 	rtm->rtm_flags = 0;
2210 	rtm->rtm_scope = RT_SCOPE_UNIVERSE;
2211 	rtm->rtm_protocol = rt->rt6i_protocol;
2212 	if (rt->rt6i_flags&RTF_DYNAMIC)
2213 		rtm->rtm_protocol = RTPROT_REDIRECT;
2214 	else if (rt->rt6i_flags & RTF_ADDRCONF)
2215 		rtm->rtm_protocol = RTPROT_KERNEL;
2216 	else if (rt->rt6i_flags&RTF_DEFAULT)
2217 		rtm->rtm_protocol = RTPROT_RA;
2218 
2219 	if (rt->rt6i_flags&RTF_CACHE)
2220 		rtm->rtm_flags |= RTM_F_CLONED;
2221 
2222 	if (dst) {
2223 		NLA_PUT(skb, RTA_DST, 16, dst);
2224 		rtm->rtm_dst_len = 128;
2225 	} else if (rtm->rtm_dst_len)
2226 		NLA_PUT(skb, RTA_DST, 16, &rt->rt6i_dst.addr);
2227 #ifdef CONFIG_IPV6_SUBTREES
2228 	if (src) {
2229 		NLA_PUT(skb, RTA_SRC, 16, src);
2230 		rtm->rtm_src_len = 128;
2231 	} else if (rtm->rtm_src_len)
2232 		NLA_PUT(skb, RTA_SRC, 16, &rt->rt6i_src.addr);
2233 #endif
2234 	if (iif) {
2235 #ifdef CONFIG_IPV6_MROUTE
2236 		if (ipv6_addr_is_multicast(&rt->rt6i_dst.addr)) {
2237 			int err = ip6mr_get_route(net, skb, rtm, nowait);
2238 			if (err <= 0) {
2239 				if (!nowait) {
2240 					if (err == 0)
2241 						return 0;
2242 					goto nla_put_failure;
2243 				} else {
2244 					if (err == -EMSGSIZE)
2245 						goto nla_put_failure;
2246 				}
2247 			}
2248 		} else
2249 #endif
2250 			NLA_PUT_U32(skb, RTA_IIF, iif);
2251 	} else if (dst) {
2252 		struct inet6_dev *idev = ip6_dst_idev(&rt->dst);
2253 		struct in6_addr saddr_buf;
2254 		if (ipv6_dev_get_saddr(net, idev ? idev->dev : NULL,
2255 				       dst, 0, &saddr_buf) == 0)
2256 			NLA_PUT(skb, RTA_PREFSRC, 16, &saddr_buf);
2257 	}
2258 
2259 	if (rtnetlink_put_metrics(skb, rt->dst.metrics) < 0)
2260 		goto nla_put_failure;
2261 
2262 	if (rt->dst.neighbour)
2263 		NLA_PUT(skb, RTA_GATEWAY, 16, &rt->dst.neighbour->primary_key);
2264 
2265 	if (rt->dst.dev)
2266 		NLA_PUT_U32(skb, RTA_OIF, rt->rt6i_dev->ifindex);
2267 
2268 	NLA_PUT_U32(skb, RTA_PRIORITY, rt->rt6i_metric);
2269 
2270 	if (!(rt->rt6i_flags & RTF_EXPIRES))
2271 		expires = 0;
2272 	else if (rt->rt6i_expires - jiffies < INT_MAX)
2273 		expires = rt->rt6i_expires - jiffies;
2274 	else
2275 		expires = INT_MAX;
2276 
2277 	if (rtnl_put_cacheinfo(skb, &rt->dst, 0, 0, 0,
2278 			       expires, rt->dst.error) < 0)
2279 		goto nla_put_failure;
2280 
2281 	return nlmsg_end(skb, nlh);
2282 
2283 nla_put_failure:
2284 	nlmsg_cancel(skb, nlh);
2285 	return -EMSGSIZE;
2286 }
2287 
2288 int rt6_dump_route(struct rt6_info *rt, void *p_arg)
2289 {
2290 	struct rt6_rtnl_dump_arg *arg = (struct rt6_rtnl_dump_arg *) p_arg;
2291 	int prefix;
2292 
2293 	if (nlmsg_len(arg->cb->nlh) >= sizeof(struct rtmsg)) {
2294 		struct rtmsg *rtm = nlmsg_data(arg->cb->nlh);
2295 		prefix = (rtm->rtm_flags & RTM_F_PREFIX) != 0;
2296 	} else
2297 		prefix = 0;
2298 
2299 	return rt6_fill_node(arg->net,
2300 		     arg->skb, rt, NULL, NULL, 0, RTM_NEWROUTE,
2301 		     NETLINK_CB(arg->cb->skb).pid, arg->cb->nlh->nlmsg_seq,
2302 		     prefix, 0, NLM_F_MULTI);
2303 }
2304 
2305 static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void *arg)
2306 {
2307 	struct net *net = sock_net(in_skb->sk);
2308 	struct nlattr *tb[RTA_MAX+1];
2309 	struct rt6_info *rt;
2310 	struct sk_buff *skb;
2311 	struct rtmsg *rtm;
2312 	struct flowi fl;
2313 	int err, iif = 0;
2314 
2315 	err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy);
2316 	if (err < 0)
2317 		goto errout;
2318 
2319 	err = -EINVAL;
2320 	memset(&fl, 0, sizeof(fl));
2321 
2322 	if (tb[RTA_SRC]) {
2323 		if (nla_len(tb[RTA_SRC]) < sizeof(struct in6_addr))
2324 			goto errout;
2325 
2326 		ipv6_addr_copy(&fl.fl6_src, nla_data(tb[RTA_SRC]));
2327 	}
2328 
2329 	if (tb[RTA_DST]) {
2330 		if (nla_len(tb[RTA_DST]) < sizeof(struct in6_addr))
2331 			goto errout;
2332 
2333 		ipv6_addr_copy(&fl.fl6_dst, nla_data(tb[RTA_DST]));
2334 	}
2335 
2336 	if (tb[RTA_IIF])
2337 		iif = nla_get_u32(tb[RTA_IIF]);
2338 
2339 	if (tb[RTA_OIF])
2340 		fl.oif = nla_get_u32(tb[RTA_OIF]);
2341 
2342 	if (iif) {
2343 		struct net_device *dev;
2344 		dev = __dev_get_by_index(net, iif);
2345 		if (!dev) {
2346 			err = -ENODEV;
2347 			goto errout;
2348 		}
2349 	}
2350 
2351 	skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
2352 	if (skb == NULL) {
2353 		err = -ENOBUFS;
2354 		goto errout;
2355 	}
2356 
2357 	/* Reserve room for dummy headers, this skb can pass
2358 	   through good chunk of routing engine.
2359 	 */
2360 	skb_reset_mac_header(skb);
2361 	skb_reserve(skb, MAX_HEADER + sizeof(struct ipv6hdr));
2362 
2363 	rt = (struct rt6_info*) ip6_route_output(net, NULL, &fl);
2364 	skb_dst_set(skb, &rt->dst);
2365 
2366 	err = rt6_fill_node(net, skb, rt, &fl.fl6_dst, &fl.fl6_src, iif,
2367 			    RTM_NEWROUTE, NETLINK_CB(in_skb).pid,
2368 			    nlh->nlmsg_seq, 0, 0, 0);
2369 	if (err < 0) {
2370 		kfree_skb(skb);
2371 		goto errout;
2372 	}
2373 
2374 	err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).pid);
2375 errout:
2376 	return err;
2377 }
2378 
2379 void inet6_rt_notify(int event, struct rt6_info *rt, struct nl_info *info)
2380 {
2381 	struct sk_buff *skb;
2382 	struct net *net = info->nl_net;
2383 	u32 seq;
2384 	int err;
2385 
2386 	err = -ENOBUFS;
2387 	seq = info->nlh != NULL ? info->nlh->nlmsg_seq : 0;
2388 
2389 	skb = nlmsg_new(rt6_nlmsg_size(), gfp_any());
2390 	if (skb == NULL)
2391 		goto errout;
2392 
2393 	err = rt6_fill_node(net, skb, rt, NULL, NULL, 0,
2394 				event, info->pid, seq, 0, 0, 0);
2395 	if (err < 0) {
2396 		/* -EMSGSIZE implies BUG in rt6_nlmsg_size() */
2397 		WARN_ON(err == -EMSGSIZE);
2398 		kfree_skb(skb);
2399 		goto errout;
2400 	}
2401 	rtnl_notify(skb, net, info->pid, RTNLGRP_IPV6_ROUTE,
2402 		    info->nlh, gfp_any());
2403 	return;
2404 errout:
2405 	if (err < 0)
2406 		rtnl_set_sk_err(net, RTNLGRP_IPV6_ROUTE, err);
2407 }
2408 
2409 static int ip6_route_dev_notify(struct notifier_block *this,
2410 				unsigned long event, void *data)
2411 {
2412 	struct net_device *dev = (struct net_device *)data;
2413 	struct net *net = dev_net(dev);
2414 
2415 	if (event == NETDEV_REGISTER && (dev->flags & IFF_LOOPBACK)) {
2416 		net->ipv6.ip6_null_entry->dst.dev = dev;
2417 		net->ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(dev);
2418 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
2419 		net->ipv6.ip6_prohibit_entry->dst.dev = dev;
2420 		net->ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(dev);
2421 		net->ipv6.ip6_blk_hole_entry->dst.dev = dev;
2422 		net->ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(dev);
2423 #endif
2424 	}
2425 
2426 	return NOTIFY_OK;
2427 }
2428 
2429 /*
2430  *	/proc
2431  */
2432 
2433 #ifdef CONFIG_PROC_FS
2434 
2435 #define RT6_INFO_LEN (32 + 4 + 32 + 4 + 32 + 40 + 5 + 1)
2436 
2437 struct rt6_proc_arg
2438 {
2439 	char *buffer;
2440 	int offset;
2441 	int length;
2442 	int skip;
2443 	int len;
2444 };
2445 
2446 static int rt6_info_route(struct rt6_info *rt, void *p_arg)
2447 {
2448 	struct seq_file *m = p_arg;
2449 
2450 	seq_printf(m, "%pi6 %02x ", &rt->rt6i_dst.addr, rt->rt6i_dst.plen);
2451 
2452 #ifdef CONFIG_IPV6_SUBTREES
2453 	seq_printf(m, "%pi6 %02x ", &rt->rt6i_src.addr, rt->rt6i_src.plen);
2454 #else
2455 	seq_puts(m, "00000000000000000000000000000000 00 ");
2456 #endif
2457 
2458 	if (rt->rt6i_nexthop) {
2459 		seq_printf(m, "%pi6", rt->rt6i_nexthop->primary_key);
2460 	} else {
2461 		seq_puts(m, "00000000000000000000000000000000");
2462 	}
2463 	seq_printf(m, " %08x %08x %08x %08x %8s\n",
2464 		   rt->rt6i_metric, atomic_read(&rt->dst.__refcnt),
2465 		   rt->dst.__use, rt->rt6i_flags,
2466 		   rt->rt6i_dev ? rt->rt6i_dev->name : "");
2467 	return 0;
2468 }
2469 
2470 static int ipv6_route_show(struct seq_file *m, void *v)
2471 {
2472 	struct net *net = (struct net *)m->private;
2473 	fib6_clean_all(net, rt6_info_route, 0, m);
2474 	return 0;
2475 }
2476 
2477 static int ipv6_route_open(struct inode *inode, struct file *file)
2478 {
2479 	return single_open_net(inode, file, ipv6_route_show);
2480 }
2481 
2482 static const struct file_operations ipv6_route_proc_fops = {
2483 	.owner		= THIS_MODULE,
2484 	.open		= ipv6_route_open,
2485 	.read		= seq_read,
2486 	.llseek		= seq_lseek,
2487 	.release	= single_release_net,
2488 };
2489 
2490 static int rt6_stats_seq_show(struct seq_file *seq, void *v)
2491 {
2492 	struct net *net = (struct net *)seq->private;
2493 	seq_printf(seq, "%04x %04x %04x %04x %04x %04x %04x\n",
2494 		   net->ipv6.rt6_stats->fib_nodes,
2495 		   net->ipv6.rt6_stats->fib_route_nodes,
2496 		   net->ipv6.rt6_stats->fib_rt_alloc,
2497 		   net->ipv6.rt6_stats->fib_rt_entries,
2498 		   net->ipv6.rt6_stats->fib_rt_cache,
2499 		   atomic_read(&net->ipv6.ip6_dst_ops.entries),
2500 		   net->ipv6.rt6_stats->fib_discarded_routes);
2501 
2502 	return 0;
2503 }
2504 
2505 static int rt6_stats_seq_open(struct inode *inode, struct file *file)
2506 {
2507 	return single_open_net(inode, file, rt6_stats_seq_show);
2508 }
2509 
2510 static const struct file_operations rt6_stats_seq_fops = {
2511 	.owner	 = THIS_MODULE,
2512 	.open	 = rt6_stats_seq_open,
2513 	.read	 = seq_read,
2514 	.llseek	 = seq_lseek,
2515 	.release = single_release_net,
2516 };
2517 #endif	/* CONFIG_PROC_FS */
2518 
2519 #ifdef CONFIG_SYSCTL
2520 
2521 static
2522 int ipv6_sysctl_rtcache_flush(ctl_table *ctl, int write,
2523 			      void __user *buffer, size_t *lenp, loff_t *ppos)
2524 {
2525 	struct net *net = current->nsproxy->net_ns;
2526 	int delay = net->ipv6.sysctl.flush_delay;
2527 	if (write) {
2528 		proc_dointvec(ctl, write, buffer, lenp, ppos);
2529 		fib6_run_gc(delay <= 0 ? ~0UL : (unsigned long)delay, net);
2530 		return 0;
2531 	} else
2532 		return -EINVAL;
2533 }
2534 
2535 ctl_table ipv6_route_table_template[] = {
2536 	{
2537 		.procname	=	"flush",
2538 		.data		=	&init_net.ipv6.sysctl.flush_delay,
2539 		.maxlen		=	sizeof(int),
2540 		.mode		=	0200,
2541 		.proc_handler	=	ipv6_sysctl_rtcache_flush
2542 	},
2543 	{
2544 		.procname	=	"gc_thresh",
2545 		.data		=	&ip6_dst_ops_template.gc_thresh,
2546 		.maxlen		=	sizeof(int),
2547 		.mode		=	0644,
2548 		.proc_handler	=	proc_dointvec,
2549 	},
2550 	{
2551 		.procname	=	"max_size",
2552 		.data		=	&init_net.ipv6.sysctl.ip6_rt_max_size,
2553 		.maxlen		=	sizeof(int),
2554 		.mode		=	0644,
2555 		.proc_handler	=	proc_dointvec,
2556 	},
2557 	{
2558 		.procname	=	"gc_min_interval",
2559 		.data		=	&init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
2560 		.maxlen		=	sizeof(int),
2561 		.mode		=	0644,
2562 		.proc_handler	=	proc_dointvec_jiffies,
2563 	},
2564 	{
2565 		.procname	=	"gc_timeout",
2566 		.data		=	&init_net.ipv6.sysctl.ip6_rt_gc_timeout,
2567 		.maxlen		=	sizeof(int),
2568 		.mode		=	0644,
2569 		.proc_handler	=	proc_dointvec_jiffies,
2570 	},
2571 	{
2572 		.procname	=	"gc_interval",
2573 		.data		=	&init_net.ipv6.sysctl.ip6_rt_gc_interval,
2574 		.maxlen		=	sizeof(int),
2575 		.mode		=	0644,
2576 		.proc_handler	=	proc_dointvec_jiffies,
2577 	},
2578 	{
2579 		.procname	=	"gc_elasticity",
2580 		.data		=	&init_net.ipv6.sysctl.ip6_rt_gc_elasticity,
2581 		.maxlen		=	sizeof(int),
2582 		.mode		=	0644,
2583 		.proc_handler	=	proc_dointvec,
2584 	},
2585 	{
2586 		.procname	=	"mtu_expires",
2587 		.data		=	&init_net.ipv6.sysctl.ip6_rt_mtu_expires,
2588 		.maxlen		=	sizeof(int),
2589 		.mode		=	0644,
2590 		.proc_handler	=	proc_dointvec_jiffies,
2591 	},
2592 	{
2593 		.procname	=	"min_adv_mss",
2594 		.data		=	&init_net.ipv6.sysctl.ip6_rt_min_advmss,
2595 		.maxlen		=	sizeof(int),
2596 		.mode		=	0644,
2597 		.proc_handler	=	proc_dointvec,
2598 	},
2599 	{
2600 		.procname	=	"gc_min_interval_ms",
2601 		.data		=	&init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
2602 		.maxlen		=	sizeof(int),
2603 		.mode		=	0644,
2604 		.proc_handler	=	proc_dointvec_ms_jiffies,
2605 	},
2606 	{ }
2607 };
2608 
2609 struct ctl_table * __net_init ipv6_route_sysctl_init(struct net *net)
2610 {
2611 	struct ctl_table *table;
2612 
2613 	table = kmemdup(ipv6_route_table_template,
2614 			sizeof(ipv6_route_table_template),
2615 			GFP_KERNEL);
2616 
2617 	if (table) {
2618 		table[0].data = &net->ipv6.sysctl.flush_delay;
2619 		table[1].data = &net->ipv6.ip6_dst_ops.gc_thresh;
2620 		table[2].data = &net->ipv6.sysctl.ip6_rt_max_size;
2621 		table[3].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
2622 		table[4].data = &net->ipv6.sysctl.ip6_rt_gc_timeout;
2623 		table[5].data = &net->ipv6.sysctl.ip6_rt_gc_interval;
2624 		table[6].data = &net->ipv6.sysctl.ip6_rt_gc_elasticity;
2625 		table[7].data = &net->ipv6.sysctl.ip6_rt_mtu_expires;
2626 		table[8].data = &net->ipv6.sysctl.ip6_rt_min_advmss;
2627 		table[9].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
2628 	}
2629 
2630 	return table;
2631 }
2632 #endif
2633 
2634 static int __net_init ip6_route_net_init(struct net *net)
2635 {
2636 	int ret = -ENOMEM;
2637 
2638 	memcpy(&net->ipv6.ip6_dst_ops, &ip6_dst_ops_template,
2639 	       sizeof(net->ipv6.ip6_dst_ops));
2640 
2641 	net->ipv6.ip6_null_entry = kmemdup(&ip6_null_entry_template,
2642 					   sizeof(*net->ipv6.ip6_null_entry),
2643 					   GFP_KERNEL);
2644 	if (!net->ipv6.ip6_null_entry)
2645 		goto out_ip6_dst_ops;
2646 	net->ipv6.ip6_null_entry->dst.path =
2647 		(struct dst_entry *)net->ipv6.ip6_null_entry;
2648 	net->ipv6.ip6_null_entry->dst.ops = &net->ipv6.ip6_dst_ops;
2649 
2650 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
2651 	net->ipv6.ip6_prohibit_entry = kmemdup(&ip6_prohibit_entry_template,
2652 					       sizeof(*net->ipv6.ip6_prohibit_entry),
2653 					       GFP_KERNEL);
2654 	if (!net->ipv6.ip6_prohibit_entry)
2655 		goto out_ip6_null_entry;
2656 	net->ipv6.ip6_prohibit_entry->dst.path =
2657 		(struct dst_entry *)net->ipv6.ip6_prohibit_entry;
2658 	net->ipv6.ip6_prohibit_entry->dst.ops = &net->ipv6.ip6_dst_ops;
2659 
2660 	net->ipv6.ip6_blk_hole_entry = kmemdup(&ip6_blk_hole_entry_template,
2661 					       sizeof(*net->ipv6.ip6_blk_hole_entry),
2662 					       GFP_KERNEL);
2663 	if (!net->ipv6.ip6_blk_hole_entry)
2664 		goto out_ip6_prohibit_entry;
2665 	net->ipv6.ip6_blk_hole_entry->dst.path =
2666 		(struct dst_entry *)net->ipv6.ip6_blk_hole_entry;
2667 	net->ipv6.ip6_blk_hole_entry->dst.ops = &net->ipv6.ip6_dst_ops;
2668 #endif
2669 
2670 	net->ipv6.sysctl.flush_delay = 0;
2671 	net->ipv6.sysctl.ip6_rt_max_size = 4096;
2672 	net->ipv6.sysctl.ip6_rt_gc_min_interval = HZ / 2;
2673 	net->ipv6.sysctl.ip6_rt_gc_timeout = 60*HZ;
2674 	net->ipv6.sysctl.ip6_rt_gc_interval = 30*HZ;
2675 	net->ipv6.sysctl.ip6_rt_gc_elasticity = 9;
2676 	net->ipv6.sysctl.ip6_rt_mtu_expires = 10*60*HZ;
2677 	net->ipv6.sysctl.ip6_rt_min_advmss = IPV6_MIN_MTU - 20 - 40;
2678 
2679 #ifdef CONFIG_PROC_FS
2680 	proc_net_fops_create(net, "ipv6_route", 0, &ipv6_route_proc_fops);
2681 	proc_net_fops_create(net, "rt6_stats", S_IRUGO, &rt6_stats_seq_fops);
2682 #endif
2683 	net->ipv6.ip6_rt_gc_expire = 30*HZ;
2684 
2685 	ret = 0;
2686 out:
2687 	return ret;
2688 
2689 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
2690 out_ip6_prohibit_entry:
2691 	kfree(net->ipv6.ip6_prohibit_entry);
2692 out_ip6_null_entry:
2693 	kfree(net->ipv6.ip6_null_entry);
2694 #endif
2695 out_ip6_dst_ops:
2696 	goto out;
2697 }
2698 
2699 static void __net_exit ip6_route_net_exit(struct net *net)
2700 {
2701 #ifdef CONFIG_PROC_FS
2702 	proc_net_remove(net, "ipv6_route");
2703 	proc_net_remove(net, "rt6_stats");
2704 #endif
2705 	kfree(net->ipv6.ip6_null_entry);
2706 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
2707 	kfree(net->ipv6.ip6_prohibit_entry);
2708 	kfree(net->ipv6.ip6_blk_hole_entry);
2709 #endif
2710 }
2711 
2712 static struct pernet_operations ip6_route_net_ops = {
2713 	.init = ip6_route_net_init,
2714 	.exit = ip6_route_net_exit,
2715 };
2716 
2717 static struct notifier_block ip6_route_dev_notifier = {
2718 	.notifier_call = ip6_route_dev_notify,
2719 	.priority = 0,
2720 };
2721 
2722 int __init ip6_route_init(void)
2723 {
2724 	int ret;
2725 
2726 	ret = -ENOMEM;
2727 	ip6_dst_ops_template.kmem_cachep =
2728 		kmem_cache_create("ip6_dst_cache", sizeof(struct rt6_info), 0,
2729 				  SLAB_HWCACHE_ALIGN, NULL);
2730 	if (!ip6_dst_ops_template.kmem_cachep)
2731 		goto out;
2732 
2733 	ret = register_pernet_subsys(&ip6_route_net_ops);
2734 	if (ret)
2735 		goto out_kmem_cache;
2736 
2737 	ip6_dst_blackhole_ops.kmem_cachep = ip6_dst_ops_template.kmem_cachep;
2738 
2739 	/* Registering of the loopback is done before this portion of code,
2740 	 * the loopback reference in rt6_info will not be taken, do it
2741 	 * manually for init_net */
2742 	init_net.ipv6.ip6_null_entry->dst.dev = init_net.loopback_dev;
2743 	init_net.ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
2744   #ifdef CONFIG_IPV6_MULTIPLE_TABLES
2745 	init_net.ipv6.ip6_prohibit_entry->dst.dev = init_net.loopback_dev;
2746 	init_net.ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
2747 	init_net.ipv6.ip6_blk_hole_entry->dst.dev = init_net.loopback_dev;
2748 	init_net.ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
2749   #endif
2750 	ret = fib6_init();
2751 	if (ret)
2752 		goto out_register_subsys;
2753 
2754 	ret = xfrm6_init();
2755 	if (ret)
2756 		goto out_fib6_init;
2757 
2758 	ret = fib6_rules_init();
2759 	if (ret)
2760 		goto xfrm6_init;
2761 
2762 	ret = -ENOBUFS;
2763 	if (__rtnl_register(PF_INET6, RTM_NEWROUTE, inet6_rtm_newroute, NULL) ||
2764 	    __rtnl_register(PF_INET6, RTM_DELROUTE, inet6_rtm_delroute, NULL) ||
2765 	    __rtnl_register(PF_INET6, RTM_GETROUTE, inet6_rtm_getroute, NULL))
2766 		goto fib6_rules_init;
2767 
2768 	ret = register_netdevice_notifier(&ip6_route_dev_notifier);
2769 	if (ret)
2770 		goto fib6_rules_init;
2771 
2772 out:
2773 	return ret;
2774 
2775 fib6_rules_init:
2776 	fib6_rules_cleanup();
2777 xfrm6_init:
2778 	xfrm6_fini();
2779 out_fib6_init:
2780 	fib6_gc_cleanup();
2781 out_register_subsys:
2782 	unregister_pernet_subsys(&ip6_route_net_ops);
2783 out_kmem_cache:
2784 	kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);
2785 	goto out;
2786 }
2787 
2788 void ip6_route_cleanup(void)
2789 {
2790 	unregister_netdevice_notifier(&ip6_route_dev_notifier);
2791 	fib6_rules_cleanup();
2792 	xfrm6_fini();
2793 	fib6_gc_cleanup();
2794 	unregister_pernet_subsys(&ip6_route_net_ops);
2795 	kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);
2796 }
2797