xref: /linux/net/ipv6/route.c (revision ba6e8564f459211117ce300eae2c7fdd23befe34)
1 /*
2  *	Linux INET6 implementation
3  *	FIB front-end.
4  *
5  *	Authors:
6  *	Pedro Roque		<roque@di.fc.ul.pt>
7  *
8  *	$Id: route.c,v 1.56 2001/10/31 21:55:55 davem Exp $
9  *
10  *	This program is free software; you can redistribute it and/or
11  *      modify it under the terms of the GNU General Public License
12  *      as published by the Free Software Foundation; either version
13  *      2 of the License, or (at your option) any later version.
14  */
15 
16 /*	Changes:
17  *
18  *	YOSHIFUJI Hideaki @USAGI
19  *		reworked default router selection.
20  *		- respect outgoing interface
21  *		- select from (probably) reachable routers (i.e.
22  *		routers in REACHABLE, STALE, DELAY or PROBE states).
23  *		- always select the same router if it is (probably)
24  *		reachable.  otherwise, round-robin the list.
25  *	Ville Nuorvala
26  *		Fixed routing subtrees.
27  */
28 
29 #include <linux/capability.h>
30 #include <linux/errno.h>
31 #include <linux/types.h>
32 #include <linux/times.h>
33 #include <linux/socket.h>
34 #include <linux/sockios.h>
35 #include <linux/net.h>
36 #include <linux/route.h>
37 #include <linux/netdevice.h>
38 #include <linux/in6.h>
39 #include <linux/init.h>
40 #include <linux/if_arp.h>
41 
42 #ifdef 	CONFIG_PROC_FS
43 #include <linux/proc_fs.h>
44 #include <linux/seq_file.h>
45 #endif
46 
47 #include <net/snmp.h>
48 #include <net/ipv6.h>
49 #include <net/ip6_fib.h>
50 #include <net/ip6_route.h>
51 #include <net/ndisc.h>
52 #include <net/addrconf.h>
53 #include <net/tcp.h>
54 #include <linux/rtnetlink.h>
55 #include <net/dst.h>
56 #include <net/xfrm.h>
57 #include <net/netevent.h>
58 #include <net/netlink.h>
59 
60 #include <asm/uaccess.h>
61 
62 #ifdef CONFIG_SYSCTL
63 #include <linux/sysctl.h>
64 #endif
65 
66 /* Set to 3 to get tracing. */
67 #define RT6_DEBUG 2
68 
69 #if RT6_DEBUG >= 3
70 #define RDBG(x) printk x
71 #define RT6_TRACE(x...) printk(KERN_DEBUG x)
72 #else
73 #define RDBG(x)
74 #define RT6_TRACE(x...) do { ; } while (0)
75 #endif
76 
77 #define CLONE_OFFLINK_ROUTE 0
78 
79 static int ip6_rt_max_size = 4096;
80 static int ip6_rt_gc_min_interval = HZ / 2;
81 static int ip6_rt_gc_timeout = 60*HZ;
82 int ip6_rt_gc_interval = 30*HZ;
83 static int ip6_rt_gc_elasticity = 9;
84 static int ip6_rt_mtu_expires = 10*60*HZ;
85 static int ip6_rt_min_advmss = IPV6_MIN_MTU - 20 - 40;
86 
87 static struct rt6_info * ip6_rt_copy(struct rt6_info *ort);
88 static struct dst_entry	*ip6_dst_check(struct dst_entry *dst, u32 cookie);
89 static struct dst_entry *ip6_negative_advice(struct dst_entry *);
90 static void		ip6_dst_destroy(struct dst_entry *);
91 static void		ip6_dst_ifdown(struct dst_entry *,
92 				       struct net_device *dev, int how);
93 static int		 ip6_dst_gc(void);
94 
95 static int		ip6_pkt_discard(struct sk_buff *skb);
96 static int		ip6_pkt_discard_out(struct sk_buff *skb);
97 static void		ip6_link_failure(struct sk_buff *skb);
98 static void		ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu);
99 
100 #ifdef CONFIG_IPV6_ROUTE_INFO
101 static struct rt6_info *rt6_add_route_info(struct in6_addr *prefix, int prefixlen,
102 					   struct in6_addr *gwaddr, int ifindex,
103 					   unsigned pref);
104 static struct rt6_info *rt6_get_route_info(struct in6_addr *prefix, int prefixlen,
105 					   struct in6_addr *gwaddr, int ifindex);
106 #endif
107 
108 static struct dst_ops ip6_dst_ops = {
109 	.family			=	AF_INET6,
110 	.protocol		=	__constant_htons(ETH_P_IPV6),
111 	.gc			=	ip6_dst_gc,
112 	.gc_thresh		=	1024,
113 	.check			=	ip6_dst_check,
114 	.destroy		=	ip6_dst_destroy,
115 	.ifdown			=	ip6_dst_ifdown,
116 	.negative_advice	=	ip6_negative_advice,
117 	.link_failure		=	ip6_link_failure,
118 	.update_pmtu		=	ip6_rt_update_pmtu,
119 	.entry_size		=	sizeof(struct rt6_info),
120 };
121 
122 struct rt6_info ip6_null_entry = {
123 	.u = {
124 		.dst = {
125 			.__refcnt	= ATOMIC_INIT(1),
126 			.__use		= 1,
127 			.dev		= &loopback_dev,
128 			.obsolete	= -1,
129 			.error		= -ENETUNREACH,
130 			.metrics	= { [RTAX_HOPLIMIT - 1] = 255, },
131 			.input		= ip6_pkt_discard,
132 			.output		= ip6_pkt_discard_out,
133 			.ops		= &ip6_dst_ops,
134 			.path		= (struct dst_entry*)&ip6_null_entry,
135 		}
136 	},
137 	.rt6i_flags	= (RTF_REJECT | RTF_NONEXTHOP),
138 	.rt6i_metric	= ~(u32) 0,
139 	.rt6i_ref	= ATOMIC_INIT(1),
140 };
141 
142 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
143 
144 static int ip6_pkt_prohibit(struct sk_buff *skb);
145 static int ip6_pkt_prohibit_out(struct sk_buff *skb);
146 static int ip6_pkt_blk_hole(struct sk_buff *skb);
147 
148 struct rt6_info ip6_prohibit_entry = {
149 	.u = {
150 		.dst = {
151 			.__refcnt	= ATOMIC_INIT(1),
152 			.__use		= 1,
153 			.dev		= &loopback_dev,
154 			.obsolete	= -1,
155 			.error		= -EACCES,
156 			.metrics	= { [RTAX_HOPLIMIT - 1] = 255, },
157 			.input		= ip6_pkt_prohibit,
158 			.output		= ip6_pkt_prohibit_out,
159 			.ops		= &ip6_dst_ops,
160 			.path		= (struct dst_entry*)&ip6_prohibit_entry,
161 		}
162 	},
163 	.rt6i_flags	= (RTF_REJECT | RTF_NONEXTHOP),
164 	.rt6i_metric	= ~(u32) 0,
165 	.rt6i_ref	= ATOMIC_INIT(1),
166 };
167 
168 struct rt6_info ip6_blk_hole_entry = {
169 	.u = {
170 		.dst = {
171 			.__refcnt	= ATOMIC_INIT(1),
172 			.__use		= 1,
173 			.dev		= &loopback_dev,
174 			.obsolete	= -1,
175 			.error		= -EINVAL,
176 			.metrics	= { [RTAX_HOPLIMIT - 1] = 255, },
177 			.input		= ip6_pkt_blk_hole,
178 			.output		= ip6_pkt_blk_hole,
179 			.ops		= &ip6_dst_ops,
180 			.path		= (struct dst_entry*)&ip6_blk_hole_entry,
181 		}
182 	},
183 	.rt6i_flags	= (RTF_REJECT | RTF_NONEXTHOP),
184 	.rt6i_metric	= ~(u32) 0,
185 	.rt6i_ref	= ATOMIC_INIT(1),
186 };
187 
188 #endif
189 
190 /* allocate dst with ip6_dst_ops */
191 static __inline__ struct rt6_info *ip6_dst_alloc(void)
192 {
193 	return (struct rt6_info *)dst_alloc(&ip6_dst_ops);
194 }
195 
196 static void ip6_dst_destroy(struct dst_entry *dst)
197 {
198 	struct rt6_info *rt = (struct rt6_info *)dst;
199 	struct inet6_dev *idev = rt->rt6i_idev;
200 
201 	if (idev != NULL) {
202 		rt->rt6i_idev = NULL;
203 		in6_dev_put(idev);
204 	}
205 }
206 
207 static void ip6_dst_ifdown(struct dst_entry *dst, struct net_device *dev,
208 			   int how)
209 {
210 	struct rt6_info *rt = (struct rt6_info *)dst;
211 	struct inet6_dev *idev = rt->rt6i_idev;
212 
213 	if (dev != &loopback_dev && idev != NULL && idev->dev == dev) {
214 		struct inet6_dev *loopback_idev = in6_dev_get(&loopback_dev);
215 		if (loopback_idev != NULL) {
216 			rt->rt6i_idev = loopback_idev;
217 			in6_dev_put(idev);
218 		}
219 	}
220 }
221 
222 static __inline__ int rt6_check_expired(const struct rt6_info *rt)
223 {
224 	return (rt->rt6i_flags & RTF_EXPIRES &&
225 		time_after(jiffies, rt->rt6i_expires));
226 }
227 
228 static inline int rt6_need_strict(struct in6_addr *daddr)
229 {
230 	return (ipv6_addr_type(daddr) &
231 		(IPV6_ADDR_MULTICAST | IPV6_ADDR_LINKLOCAL));
232 }
233 
234 /*
235  *	Route lookup. Any table->tb6_lock is implied.
236  */
237 
238 static __inline__ struct rt6_info *rt6_device_match(struct rt6_info *rt,
239 						    int oif,
240 						    int strict)
241 {
242 	struct rt6_info *local = NULL;
243 	struct rt6_info *sprt;
244 
245 	if (oif) {
246 		for (sprt = rt; sprt; sprt = sprt->u.dst.rt6_next) {
247 			struct net_device *dev = sprt->rt6i_dev;
248 			if (dev->ifindex == oif)
249 				return sprt;
250 			if (dev->flags & IFF_LOOPBACK) {
251 				if (sprt->rt6i_idev == NULL ||
252 				    sprt->rt6i_idev->dev->ifindex != oif) {
253 					if (strict && oif)
254 						continue;
255 					if (local && (!oif ||
256 						      local->rt6i_idev->dev->ifindex == oif))
257 						continue;
258 				}
259 				local = sprt;
260 			}
261 		}
262 
263 		if (local)
264 			return local;
265 
266 		if (strict)
267 			return &ip6_null_entry;
268 	}
269 	return rt;
270 }
271 
272 #ifdef CONFIG_IPV6_ROUTER_PREF
273 static void rt6_probe(struct rt6_info *rt)
274 {
275 	struct neighbour *neigh = rt ? rt->rt6i_nexthop : NULL;
276 	/*
277 	 * Okay, this does not seem to be appropriate
278 	 * for now, however, we need to check if it
279 	 * is really so; aka Router Reachability Probing.
280 	 *
281 	 * Router Reachability Probe MUST be rate-limited
282 	 * to no more than one per minute.
283 	 */
284 	if (!neigh || (neigh->nud_state & NUD_VALID))
285 		return;
286 	read_lock_bh(&neigh->lock);
287 	if (!(neigh->nud_state & NUD_VALID) &&
288 	    time_after(jiffies, neigh->updated + rt->rt6i_idev->cnf.rtr_probe_interval)) {
289 		struct in6_addr mcaddr;
290 		struct in6_addr *target;
291 
292 		neigh->updated = jiffies;
293 		read_unlock_bh(&neigh->lock);
294 
295 		target = (struct in6_addr *)&neigh->primary_key;
296 		addrconf_addr_solict_mult(target, &mcaddr);
297 		ndisc_send_ns(rt->rt6i_dev, NULL, target, &mcaddr, NULL);
298 	} else
299 		read_unlock_bh(&neigh->lock);
300 }
301 #else
302 static inline void rt6_probe(struct rt6_info *rt)
303 {
304 	return;
305 }
306 #endif
307 
308 /*
309  * Default Router Selection (RFC 2461 6.3.6)
310  */
311 static inline int rt6_check_dev(struct rt6_info *rt, int oif)
312 {
313 	struct net_device *dev = rt->rt6i_dev;
314 	int ret = 0;
315 
316 	if (!oif)
317 		return 2;
318 	if (dev->flags & IFF_LOOPBACK) {
319 		if (!WARN_ON(rt->rt6i_idev == NULL) &&
320 		    rt->rt6i_idev->dev->ifindex == oif)
321 			ret = 1;
322 		else
323 			return 0;
324 	}
325 	if (dev->ifindex == oif)
326 		return 2;
327 
328 	return ret;
329 }
330 
331 static inline int rt6_check_neigh(struct rt6_info *rt)
332 {
333 	struct neighbour *neigh = rt->rt6i_nexthop;
334 	int m = 0;
335 	if (rt->rt6i_flags & RTF_NONEXTHOP ||
336 	    !(rt->rt6i_flags & RTF_GATEWAY))
337 		m = 1;
338 	else if (neigh) {
339 		read_lock_bh(&neigh->lock);
340 		if (neigh->nud_state & NUD_VALID)
341 			m = 2;
342 		else if (!(neigh->nud_state & NUD_FAILED))
343 			m = 1;
344 		read_unlock_bh(&neigh->lock);
345 	}
346 	return m;
347 }
348 
349 static int rt6_score_route(struct rt6_info *rt, int oif,
350 			   int strict)
351 {
352 	int m, n;
353 
354 	m = rt6_check_dev(rt, oif);
355 	if (!m && (strict & RT6_LOOKUP_F_IFACE))
356 		return -1;
357 #ifdef CONFIG_IPV6_ROUTER_PREF
358 	m |= IPV6_DECODE_PREF(IPV6_EXTRACT_PREF(rt->rt6i_flags)) << 2;
359 #endif
360 	n = rt6_check_neigh(rt);
361 	if (!n && (strict & RT6_LOOKUP_F_REACHABLE))
362 		return -1;
363 	return m;
364 }
365 
366 static struct rt6_info *find_match(struct rt6_info *rt, int oif, int strict,
367 				   int *mpri, struct rt6_info *match)
368 {
369 	int m;
370 
371 	if (rt6_check_expired(rt))
372 		goto out;
373 
374 	m = rt6_score_route(rt, oif, strict);
375 	if (m < 0)
376 		goto out;
377 
378 	if (m > *mpri) {
379 		if (strict & RT6_LOOKUP_F_REACHABLE)
380 			rt6_probe(match);
381 		*mpri = m;
382 		match = rt;
383 	} else if (strict & RT6_LOOKUP_F_REACHABLE) {
384 		rt6_probe(rt);
385 	}
386 
387 out:
388 	return match;
389 }
390 
391 static struct rt6_info *find_rr_leaf(struct fib6_node *fn,
392 				     struct rt6_info *rr_head,
393 				     u32 metric, int oif, int strict)
394 {
395 	struct rt6_info *rt, *match;
396 	int mpri = -1;
397 
398 	match = NULL;
399 	for (rt = rr_head; rt && rt->rt6i_metric == metric;
400 	     rt = rt->u.dst.rt6_next)
401 		match = find_match(rt, oif, strict, &mpri, match);
402 	for (rt = fn->leaf; rt && rt != rr_head && rt->rt6i_metric == metric;
403 	     rt = rt->u.dst.rt6_next)
404 		match = find_match(rt, oif, strict, &mpri, match);
405 
406 	return match;
407 }
408 
409 static struct rt6_info *rt6_select(struct fib6_node *fn, int oif, int strict)
410 {
411 	struct rt6_info *match, *rt0;
412 
413 	RT6_TRACE("%s(fn->leaf=%p, oif=%d)\n",
414 		  __FUNCTION__, fn->leaf, oif);
415 
416 	rt0 = fn->rr_ptr;
417 	if (!rt0)
418 		fn->rr_ptr = rt0 = fn->leaf;
419 
420 	match = find_rr_leaf(fn, rt0, rt0->rt6i_metric, oif, strict);
421 
422 	if (!match &&
423 	    (strict & RT6_LOOKUP_F_REACHABLE)) {
424 		struct rt6_info *next = rt0->u.dst.rt6_next;
425 
426 		/* no entries matched; do round-robin */
427 		if (!next || next->rt6i_metric != rt0->rt6i_metric)
428 			next = fn->leaf;
429 
430 		if (next != rt0)
431 			fn->rr_ptr = next;
432 	}
433 
434 	RT6_TRACE("%s() => %p\n",
435 		  __FUNCTION__, match);
436 
437 	return (match ? match : &ip6_null_entry);
438 }
439 
440 #ifdef CONFIG_IPV6_ROUTE_INFO
441 int rt6_route_rcv(struct net_device *dev, u8 *opt, int len,
442 		  struct in6_addr *gwaddr)
443 {
444 	struct route_info *rinfo = (struct route_info *) opt;
445 	struct in6_addr prefix_buf, *prefix;
446 	unsigned int pref;
447 	u32 lifetime;
448 	struct rt6_info *rt;
449 
450 	if (len < sizeof(struct route_info)) {
451 		return -EINVAL;
452 	}
453 
454 	/* Sanity check for prefix_len and length */
455 	if (rinfo->length > 3) {
456 		return -EINVAL;
457 	} else if (rinfo->prefix_len > 128) {
458 		return -EINVAL;
459 	} else if (rinfo->prefix_len > 64) {
460 		if (rinfo->length < 2) {
461 			return -EINVAL;
462 		}
463 	} else if (rinfo->prefix_len > 0) {
464 		if (rinfo->length < 1) {
465 			return -EINVAL;
466 		}
467 	}
468 
469 	pref = rinfo->route_pref;
470 	if (pref == ICMPV6_ROUTER_PREF_INVALID)
471 		pref = ICMPV6_ROUTER_PREF_MEDIUM;
472 
473 	lifetime = ntohl(rinfo->lifetime);
474 	if (lifetime == 0xffffffff) {
475 		/* infinity */
476 	} else if (lifetime > 0x7fffffff/HZ) {
477 		/* Avoid arithmetic overflow */
478 		lifetime = 0x7fffffff/HZ - 1;
479 	}
480 
481 	if (rinfo->length == 3)
482 		prefix = (struct in6_addr *)rinfo->prefix;
483 	else {
484 		/* this function is safe */
485 		ipv6_addr_prefix(&prefix_buf,
486 				 (struct in6_addr *)rinfo->prefix,
487 				 rinfo->prefix_len);
488 		prefix = &prefix_buf;
489 	}
490 
491 	rt = rt6_get_route_info(prefix, rinfo->prefix_len, gwaddr, dev->ifindex);
492 
493 	if (rt && !lifetime) {
494 		ip6_del_rt(rt);
495 		rt = NULL;
496 	}
497 
498 	if (!rt && lifetime)
499 		rt = rt6_add_route_info(prefix, rinfo->prefix_len, gwaddr, dev->ifindex,
500 					pref);
501 	else if (rt)
502 		rt->rt6i_flags = RTF_ROUTEINFO |
503 				 (rt->rt6i_flags & ~RTF_PREF_MASK) | RTF_PREF(pref);
504 
505 	if (rt) {
506 		if (lifetime == 0xffffffff) {
507 			rt->rt6i_flags &= ~RTF_EXPIRES;
508 		} else {
509 			rt->rt6i_expires = jiffies + HZ * lifetime;
510 			rt->rt6i_flags |= RTF_EXPIRES;
511 		}
512 		dst_release(&rt->u.dst);
513 	}
514 	return 0;
515 }
516 #endif
517 
518 #define BACKTRACK(saddr) \
519 do { \
520 	if (rt == &ip6_null_entry) { \
521 		struct fib6_node *pn; \
522 		while (1) { \
523 			if (fn->fn_flags & RTN_TL_ROOT) \
524 				goto out; \
525 			pn = fn->parent; \
526 			if (FIB6_SUBTREE(pn) && FIB6_SUBTREE(pn) != fn) \
527 				fn = fib6_lookup(FIB6_SUBTREE(pn), NULL, saddr); \
528 			else \
529 				fn = pn; \
530 			if (fn->fn_flags & RTN_RTINFO) \
531 				goto restart; \
532 		} \
533 	} \
534 } while(0)
535 
536 static struct rt6_info *ip6_pol_route_lookup(struct fib6_table *table,
537 					     struct flowi *fl, int flags)
538 {
539 	struct fib6_node *fn;
540 	struct rt6_info *rt;
541 
542 	read_lock_bh(&table->tb6_lock);
543 	fn = fib6_lookup(&table->tb6_root, &fl->fl6_dst, &fl->fl6_src);
544 restart:
545 	rt = fn->leaf;
546 	rt = rt6_device_match(rt, fl->oif, flags);
547 	BACKTRACK(&fl->fl6_src);
548 out:
549 	dst_hold(&rt->u.dst);
550 	read_unlock_bh(&table->tb6_lock);
551 
552 	rt->u.dst.lastuse = jiffies;
553 	rt->u.dst.__use++;
554 
555 	return rt;
556 
557 }
558 
559 struct rt6_info *rt6_lookup(struct in6_addr *daddr, struct in6_addr *saddr,
560 			    int oif, int strict)
561 {
562 	struct flowi fl = {
563 		.oif = oif,
564 		.nl_u = {
565 			.ip6_u = {
566 				.daddr = *daddr,
567 			},
568 		},
569 	};
570 	struct dst_entry *dst;
571 	int flags = strict ? RT6_LOOKUP_F_IFACE : 0;
572 
573 	if (saddr) {
574 		memcpy(&fl.fl6_src, saddr, sizeof(*saddr));
575 		flags |= RT6_LOOKUP_F_HAS_SADDR;
576 	}
577 
578 	dst = fib6_rule_lookup(&fl, flags, ip6_pol_route_lookup);
579 	if (dst->error == 0)
580 		return (struct rt6_info *) dst;
581 
582 	dst_release(dst);
583 
584 	return NULL;
585 }
586 
587 /* ip6_ins_rt is called with FREE table->tb6_lock.
588    It takes new route entry, the addition fails by any reason the
589    route is freed. In any case, if caller does not hold it, it may
590    be destroyed.
591  */
592 
593 static int __ip6_ins_rt(struct rt6_info *rt, struct nl_info *info)
594 {
595 	int err;
596 	struct fib6_table *table;
597 
598 	table = rt->rt6i_table;
599 	write_lock_bh(&table->tb6_lock);
600 	err = fib6_add(&table->tb6_root, rt, info);
601 	write_unlock_bh(&table->tb6_lock);
602 
603 	return err;
604 }
605 
606 int ip6_ins_rt(struct rt6_info *rt)
607 {
608 	return __ip6_ins_rt(rt, NULL);
609 }
610 
611 static struct rt6_info *rt6_alloc_cow(struct rt6_info *ort, struct in6_addr *daddr,
612 				      struct in6_addr *saddr)
613 {
614 	struct rt6_info *rt;
615 
616 	/*
617 	 *	Clone the route.
618 	 */
619 
620 	rt = ip6_rt_copy(ort);
621 
622 	if (rt) {
623 		if (!(rt->rt6i_flags&RTF_GATEWAY)) {
624 			if (rt->rt6i_dst.plen != 128 &&
625 			    ipv6_addr_equal(&rt->rt6i_dst.addr, daddr))
626 				rt->rt6i_flags |= RTF_ANYCAST;
627 			ipv6_addr_copy(&rt->rt6i_gateway, daddr);
628 		}
629 
630 		ipv6_addr_copy(&rt->rt6i_dst.addr, daddr);
631 		rt->rt6i_dst.plen = 128;
632 		rt->rt6i_flags |= RTF_CACHE;
633 		rt->u.dst.flags |= DST_HOST;
634 
635 #ifdef CONFIG_IPV6_SUBTREES
636 		if (rt->rt6i_src.plen && saddr) {
637 			ipv6_addr_copy(&rt->rt6i_src.addr, saddr);
638 			rt->rt6i_src.plen = 128;
639 		}
640 #endif
641 
642 		rt->rt6i_nexthop = ndisc_get_neigh(rt->rt6i_dev, &rt->rt6i_gateway);
643 
644 	}
645 
646 	return rt;
647 }
648 
649 static struct rt6_info *rt6_alloc_clone(struct rt6_info *ort, struct in6_addr *daddr)
650 {
651 	struct rt6_info *rt = ip6_rt_copy(ort);
652 	if (rt) {
653 		ipv6_addr_copy(&rt->rt6i_dst.addr, daddr);
654 		rt->rt6i_dst.plen = 128;
655 		rt->rt6i_flags |= RTF_CACHE;
656 		rt->u.dst.flags |= DST_HOST;
657 		rt->rt6i_nexthop = neigh_clone(ort->rt6i_nexthop);
658 	}
659 	return rt;
660 }
661 
662 static struct rt6_info *ip6_pol_route_input(struct fib6_table *table,
663 					    struct flowi *fl, int flags)
664 {
665 	struct fib6_node *fn;
666 	struct rt6_info *rt, *nrt;
667 	int strict = 0;
668 	int attempts = 3;
669 	int err;
670 	int reachable = ipv6_devconf.forwarding ? 0 : RT6_LOOKUP_F_REACHABLE;
671 
672 	strict |= flags & RT6_LOOKUP_F_IFACE;
673 
674 relookup:
675 	read_lock_bh(&table->tb6_lock);
676 
677 restart_2:
678 	fn = fib6_lookup(&table->tb6_root, &fl->fl6_dst, &fl->fl6_src);
679 
680 restart:
681 	rt = rt6_select(fn, fl->iif, strict | reachable);
682 	BACKTRACK(&fl->fl6_src);
683 	if (rt == &ip6_null_entry ||
684 	    rt->rt6i_flags & RTF_CACHE)
685 		goto out;
686 
687 	dst_hold(&rt->u.dst);
688 	read_unlock_bh(&table->tb6_lock);
689 
690 	if (!rt->rt6i_nexthop && !(rt->rt6i_flags & RTF_NONEXTHOP))
691 		nrt = rt6_alloc_cow(rt, &fl->fl6_dst, &fl->fl6_src);
692 	else {
693 #if CLONE_OFFLINK_ROUTE
694 		nrt = rt6_alloc_clone(rt, &fl->fl6_dst);
695 #else
696 		goto out2;
697 #endif
698 	}
699 
700 	dst_release(&rt->u.dst);
701 	rt = nrt ? : &ip6_null_entry;
702 
703 	dst_hold(&rt->u.dst);
704 	if (nrt) {
705 		err = ip6_ins_rt(nrt);
706 		if (!err)
707 			goto out2;
708 	}
709 
710 	if (--attempts <= 0)
711 		goto out2;
712 
713 	/*
714 	 * Race condition! In the gap, when table->tb6_lock was
715 	 * released someone could insert this route.  Relookup.
716 	 */
717 	dst_release(&rt->u.dst);
718 	goto relookup;
719 
720 out:
721 	if (reachable) {
722 		reachable = 0;
723 		goto restart_2;
724 	}
725 	dst_hold(&rt->u.dst);
726 	read_unlock_bh(&table->tb6_lock);
727 out2:
728 	rt->u.dst.lastuse = jiffies;
729 	rt->u.dst.__use++;
730 
731 	return rt;
732 }
733 
734 void ip6_route_input(struct sk_buff *skb)
735 {
736 	struct ipv6hdr *iph = skb->nh.ipv6h;
737 	int flags = RT6_LOOKUP_F_HAS_SADDR;
738 	struct flowi fl = {
739 		.iif = skb->dev->ifindex,
740 		.nl_u = {
741 			.ip6_u = {
742 				.daddr = iph->daddr,
743 				.saddr = iph->saddr,
744 				.flowlabel = (* (__be32 *) iph)&IPV6_FLOWINFO_MASK,
745 			},
746 		},
747 		.mark = skb->mark,
748 		.proto = iph->nexthdr,
749 	};
750 
751 	if (rt6_need_strict(&iph->daddr))
752 		flags |= RT6_LOOKUP_F_IFACE;
753 
754 	skb->dst = fib6_rule_lookup(&fl, flags, ip6_pol_route_input);
755 }
756 
757 static struct rt6_info *ip6_pol_route_output(struct fib6_table *table,
758 					     struct flowi *fl, int flags)
759 {
760 	struct fib6_node *fn;
761 	struct rt6_info *rt, *nrt;
762 	int strict = 0;
763 	int attempts = 3;
764 	int err;
765 	int reachable = ipv6_devconf.forwarding ? 0 : RT6_LOOKUP_F_REACHABLE;
766 
767 	strict |= flags & RT6_LOOKUP_F_IFACE;
768 
769 relookup:
770 	read_lock_bh(&table->tb6_lock);
771 
772 restart_2:
773 	fn = fib6_lookup(&table->tb6_root, &fl->fl6_dst, &fl->fl6_src);
774 
775 restart:
776 	rt = rt6_select(fn, fl->oif, strict | reachable);
777 	BACKTRACK(&fl->fl6_src);
778 	if (rt == &ip6_null_entry ||
779 	    rt->rt6i_flags & RTF_CACHE)
780 		goto out;
781 
782 	dst_hold(&rt->u.dst);
783 	read_unlock_bh(&table->tb6_lock);
784 
785 	if (!rt->rt6i_nexthop && !(rt->rt6i_flags & RTF_NONEXTHOP))
786 		nrt = rt6_alloc_cow(rt, &fl->fl6_dst, &fl->fl6_src);
787 	else {
788 #if CLONE_OFFLINK_ROUTE
789 		nrt = rt6_alloc_clone(rt, &fl->fl6_dst);
790 #else
791 		goto out2;
792 #endif
793 	}
794 
795 	dst_release(&rt->u.dst);
796 	rt = nrt ? : &ip6_null_entry;
797 
798 	dst_hold(&rt->u.dst);
799 	if (nrt) {
800 		err = ip6_ins_rt(nrt);
801 		if (!err)
802 			goto out2;
803 	}
804 
805 	if (--attempts <= 0)
806 		goto out2;
807 
808 	/*
809 	 * Race condition! In the gap, when table->tb6_lock was
810 	 * released someone could insert this route.  Relookup.
811 	 */
812 	dst_release(&rt->u.dst);
813 	goto relookup;
814 
815 out:
816 	if (reachable) {
817 		reachable = 0;
818 		goto restart_2;
819 	}
820 	dst_hold(&rt->u.dst);
821 	read_unlock_bh(&table->tb6_lock);
822 out2:
823 	rt->u.dst.lastuse = jiffies;
824 	rt->u.dst.__use++;
825 	return rt;
826 }
827 
828 struct dst_entry * ip6_route_output(struct sock *sk, struct flowi *fl)
829 {
830 	int flags = 0;
831 
832 	if (rt6_need_strict(&fl->fl6_dst))
833 		flags |= RT6_LOOKUP_F_IFACE;
834 
835 	if (!ipv6_addr_any(&fl->fl6_src))
836 		flags |= RT6_LOOKUP_F_HAS_SADDR;
837 
838 	return fib6_rule_lookup(fl, flags, ip6_pol_route_output);
839 }
840 
841 
842 /*
843  *	Destination cache support functions
844  */
845 
846 static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie)
847 {
848 	struct rt6_info *rt;
849 
850 	rt = (struct rt6_info *) dst;
851 
852 	if (rt && rt->rt6i_node && (rt->rt6i_node->fn_sernum == cookie))
853 		return dst;
854 
855 	return NULL;
856 }
857 
858 static struct dst_entry *ip6_negative_advice(struct dst_entry *dst)
859 {
860 	struct rt6_info *rt = (struct rt6_info *) dst;
861 
862 	if (rt) {
863 		if (rt->rt6i_flags & RTF_CACHE)
864 			ip6_del_rt(rt);
865 		else
866 			dst_release(dst);
867 	}
868 	return NULL;
869 }
870 
871 static void ip6_link_failure(struct sk_buff *skb)
872 {
873 	struct rt6_info *rt;
874 
875 	icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_ADDR_UNREACH, 0, skb->dev);
876 
877 	rt = (struct rt6_info *) skb->dst;
878 	if (rt) {
879 		if (rt->rt6i_flags&RTF_CACHE) {
880 			dst_set_expires(&rt->u.dst, 0);
881 			rt->rt6i_flags |= RTF_EXPIRES;
882 		} else if (rt->rt6i_node && (rt->rt6i_flags & RTF_DEFAULT))
883 			rt->rt6i_node->fn_sernum = -1;
884 	}
885 }
886 
887 static void ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu)
888 {
889 	struct rt6_info *rt6 = (struct rt6_info*)dst;
890 
891 	if (mtu < dst_mtu(dst) && rt6->rt6i_dst.plen == 128) {
892 		rt6->rt6i_flags |= RTF_MODIFIED;
893 		if (mtu < IPV6_MIN_MTU) {
894 			mtu = IPV6_MIN_MTU;
895 			dst->metrics[RTAX_FEATURES-1] |= RTAX_FEATURE_ALLFRAG;
896 		}
897 		dst->metrics[RTAX_MTU-1] = mtu;
898 		call_netevent_notifiers(NETEVENT_PMTU_UPDATE, dst);
899 	}
900 }
901 
902 static int ipv6_get_mtu(struct net_device *dev);
903 
904 static inline unsigned int ipv6_advmss(unsigned int mtu)
905 {
906 	mtu -= sizeof(struct ipv6hdr) + sizeof(struct tcphdr);
907 
908 	if (mtu < ip6_rt_min_advmss)
909 		mtu = ip6_rt_min_advmss;
910 
911 	/*
912 	 * Maximal non-jumbo IPv6 payload is IPV6_MAXPLEN and
913 	 * corresponding MSS is IPV6_MAXPLEN - tcp_header_size.
914 	 * IPV6_MAXPLEN is also valid and means: "any MSS,
915 	 * rely only on pmtu discovery"
916 	 */
917 	if (mtu > IPV6_MAXPLEN - sizeof(struct tcphdr))
918 		mtu = IPV6_MAXPLEN;
919 	return mtu;
920 }
921 
922 static struct dst_entry *ndisc_dst_gc_list;
923 static DEFINE_SPINLOCK(ndisc_lock);
924 
925 struct dst_entry *ndisc_dst_alloc(struct net_device *dev,
926 				  struct neighbour *neigh,
927 				  struct in6_addr *addr,
928 				  int (*output)(struct sk_buff *))
929 {
930 	struct rt6_info *rt;
931 	struct inet6_dev *idev = in6_dev_get(dev);
932 
933 	if (unlikely(idev == NULL))
934 		return NULL;
935 
936 	rt = ip6_dst_alloc();
937 	if (unlikely(rt == NULL)) {
938 		in6_dev_put(idev);
939 		goto out;
940 	}
941 
942 	dev_hold(dev);
943 	if (neigh)
944 		neigh_hold(neigh);
945 	else
946 		neigh = ndisc_get_neigh(dev, addr);
947 
948 	rt->rt6i_dev	  = dev;
949 	rt->rt6i_idev     = idev;
950 	rt->rt6i_nexthop  = neigh;
951 	atomic_set(&rt->u.dst.__refcnt, 1);
952 	rt->u.dst.metrics[RTAX_HOPLIMIT-1] = 255;
953 	rt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(rt->rt6i_dev);
954 	rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(dst_mtu(&rt->u.dst));
955 	rt->u.dst.output  = output;
956 
957 #if 0	/* there's no chance to use these for ndisc */
958 	rt->u.dst.flags   = ipv6_addr_type(addr) & IPV6_ADDR_UNICAST
959 				? DST_HOST
960 				: 0;
961 	ipv6_addr_copy(&rt->rt6i_dst.addr, addr);
962 	rt->rt6i_dst.plen = 128;
963 #endif
964 
965 	spin_lock_bh(&ndisc_lock);
966 	rt->u.dst.next = ndisc_dst_gc_list;
967 	ndisc_dst_gc_list = &rt->u.dst;
968 	spin_unlock_bh(&ndisc_lock);
969 
970 	fib6_force_start_gc();
971 
972 out:
973 	return &rt->u.dst;
974 }
975 
976 int ndisc_dst_gc(int *more)
977 {
978 	struct dst_entry *dst, *next, **pprev;
979 	int freed;
980 
981 	next = NULL;
982 	freed = 0;
983 
984 	spin_lock_bh(&ndisc_lock);
985 	pprev = &ndisc_dst_gc_list;
986 
987 	while ((dst = *pprev) != NULL) {
988 		if (!atomic_read(&dst->__refcnt)) {
989 			*pprev = dst->next;
990 			dst_free(dst);
991 			freed++;
992 		} else {
993 			pprev = &dst->next;
994 			(*more)++;
995 		}
996 	}
997 
998 	spin_unlock_bh(&ndisc_lock);
999 
1000 	return freed;
1001 }
1002 
1003 static int ip6_dst_gc(void)
1004 {
1005 	static unsigned expire = 30*HZ;
1006 	static unsigned long last_gc;
1007 	unsigned long now = jiffies;
1008 
1009 	if (time_after(last_gc + ip6_rt_gc_min_interval, now) &&
1010 	    atomic_read(&ip6_dst_ops.entries) <= ip6_rt_max_size)
1011 		goto out;
1012 
1013 	expire++;
1014 	fib6_run_gc(expire);
1015 	last_gc = now;
1016 	if (atomic_read(&ip6_dst_ops.entries) < ip6_dst_ops.gc_thresh)
1017 		expire = ip6_rt_gc_timeout>>1;
1018 
1019 out:
1020 	expire -= expire>>ip6_rt_gc_elasticity;
1021 	return (atomic_read(&ip6_dst_ops.entries) > ip6_rt_max_size);
1022 }
1023 
1024 /* Clean host part of a prefix. Not necessary in radix tree,
1025    but results in cleaner routing tables.
1026 
1027    Remove it only when all the things will work!
1028  */
1029 
1030 static int ipv6_get_mtu(struct net_device *dev)
1031 {
1032 	int mtu = IPV6_MIN_MTU;
1033 	struct inet6_dev *idev;
1034 
1035 	idev = in6_dev_get(dev);
1036 	if (idev) {
1037 		mtu = idev->cnf.mtu6;
1038 		in6_dev_put(idev);
1039 	}
1040 	return mtu;
1041 }
1042 
1043 int ipv6_get_hoplimit(struct net_device *dev)
1044 {
1045 	int hoplimit = ipv6_devconf.hop_limit;
1046 	struct inet6_dev *idev;
1047 
1048 	idev = in6_dev_get(dev);
1049 	if (idev) {
1050 		hoplimit = idev->cnf.hop_limit;
1051 		in6_dev_put(idev);
1052 	}
1053 	return hoplimit;
1054 }
1055 
1056 /*
1057  *
1058  */
1059 
1060 int ip6_route_add(struct fib6_config *cfg)
1061 {
1062 	int err;
1063 	struct rt6_info *rt = NULL;
1064 	struct net_device *dev = NULL;
1065 	struct inet6_dev *idev = NULL;
1066 	struct fib6_table *table;
1067 	int addr_type;
1068 
1069 	if (cfg->fc_dst_len > 128 || cfg->fc_src_len > 128)
1070 		return -EINVAL;
1071 #ifndef CONFIG_IPV6_SUBTREES
1072 	if (cfg->fc_src_len)
1073 		return -EINVAL;
1074 #endif
1075 	if (cfg->fc_ifindex) {
1076 		err = -ENODEV;
1077 		dev = dev_get_by_index(cfg->fc_ifindex);
1078 		if (!dev)
1079 			goto out;
1080 		idev = in6_dev_get(dev);
1081 		if (!idev)
1082 			goto out;
1083 	}
1084 
1085 	if (cfg->fc_metric == 0)
1086 		cfg->fc_metric = IP6_RT_PRIO_USER;
1087 
1088 	table = fib6_new_table(cfg->fc_table);
1089 	if (table == NULL) {
1090 		err = -ENOBUFS;
1091 		goto out;
1092 	}
1093 
1094 	rt = ip6_dst_alloc();
1095 
1096 	if (rt == NULL) {
1097 		err = -ENOMEM;
1098 		goto out;
1099 	}
1100 
1101 	rt->u.dst.obsolete = -1;
1102 	rt->rt6i_expires = jiffies + clock_t_to_jiffies(cfg->fc_expires);
1103 
1104 	if (cfg->fc_protocol == RTPROT_UNSPEC)
1105 		cfg->fc_protocol = RTPROT_BOOT;
1106 	rt->rt6i_protocol = cfg->fc_protocol;
1107 
1108 	addr_type = ipv6_addr_type(&cfg->fc_dst);
1109 
1110 	if (addr_type & IPV6_ADDR_MULTICAST)
1111 		rt->u.dst.input = ip6_mc_input;
1112 	else
1113 		rt->u.dst.input = ip6_forward;
1114 
1115 	rt->u.dst.output = ip6_output;
1116 
1117 	ipv6_addr_prefix(&rt->rt6i_dst.addr, &cfg->fc_dst, cfg->fc_dst_len);
1118 	rt->rt6i_dst.plen = cfg->fc_dst_len;
1119 	if (rt->rt6i_dst.plen == 128)
1120 	       rt->u.dst.flags = DST_HOST;
1121 
1122 #ifdef CONFIG_IPV6_SUBTREES
1123 	ipv6_addr_prefix(&rt->rt6i_src.addr, &cfg->fc_src, cfg->fc_src_len);
1124 	rt->rt6i_src.plen = cfg->fc_src_len;
1125 #endif
1126 
1127 	rt->rt6i_metric = cfg->fc_metric;
1128 
1129 	/* We cannot add true routes via loopback here,
1130 	   they would result in kernel looping; promote them to reject routes
1131 	 */
1132 	if ((cfg->fc_flags & RTF_REJECT) ||
1133 	    (dev && (dev->flags&IFF_LOOPBACK) && !(addr_type&IPV6_ADDR_LOOPBACK))) {
1134 		/* hold loopback dev/idev if we haven't done so. */
1135 		if (dev != &loopback_dev) {
1136 			if (dev) {
1137 				dev_put(dev);
1138 				in6_dev_put(idev);
1139 			}
1140 			dev = &loopback_dev;
1141 			dev_hold(dev);
1142 			idev = in6_dev_get(dev);
1143 			if (!idev) {
1144 				err = -ENODEV;
1145 				goto out;
1146 			}
1147 		}
1148 		rt->u.dst.output = ip6_pkt_discard_out;
1149 		rt->u.dst.input = ip6_pkt_discard;
1150 		rt->u.dst.error = -ENETUNREACH;
1151 		rt->rt6i_flags = RTF_REJECT|RTF_NONEXTHOP;
1152 		goto install_route;
1153 	}
1154 
1155 	if (cfg->fc_flags & RTF_GATEWAY) {
1156 		struct in6_addr *gw_addr;
1157 		int gwa_type;
1158 
1159 		gw_addr = &cfg->fc_gateway;
1160 		ipv6_addr_copy(&rt->rt6i_gateway, gw_addr);
1161 		gwa_type = ipv6_addr_type(gw_addr);
1162 
1163 		if (gwa_type != (IPV6_ADDR_LINKLOCAL|IPV6_ADDR_UNICAST)) {
1164 			struct rt6_info *grt;
1165 
1166 			/* IPv6 strictly inhibits using not link-local
1167 			   addresses as nexthop address.
1168 			   Otherwise, router will not able to send redirects.
1169 			   It is very good, but in some (rare!) circumstances
1170 			   (SIT, PtP, NBMA NOARP links) it is handy to allow
1171 			   some exceptions. --ANK
1172 			 */
1173 			err = -EINVAL;
1174 			if (!(gwa_type&IPV6_ADDR_UNICAST))
1175 				goto out;
1176 
1177 			grt = rt6_lookup(gw_addr, NULL, cfg->fc_ifindex, 1);
1178 
1179 			err = -EHOSTUNREACH;
1180 			if (grt == NULL)
1181 				goto out;
1182 			if (dev) {
1183 				if (dev != grt->rt6i_dev) {
1184 					dst_release(&grt->u.dst);
1185 					goto out;
1186 				}
1187 			} else {
1188 				dev = grt->rt6i_dev;
1189 				idev = grt->rt6i_idev;
1190 				dev_hold(dev);
1191 				in6_dev_hold(grt->rt6i_idev);
1192 			}
1193 			if (!(grt->rt6i_flags&RTF_GATEWAY))
1194 				err = 0;
1195 			dst_release(&grt->u.dst);
1196 
1197 			if (err)
1198 				goto out;
1199 		}
1200 		err = -EINVAL;
1201 		if (dev == NULL || (dev->flags&IFF_LOOPBACK))
1202 			goto out;
1203 	}
1204 
1205 	err = -ENODEV;
1206 	if (dev == NULL)
1207 		goto out;
1208 
1209 	if (cfg->fc_flags & (RTF_GATEWAY | RTF_NONEXTHOP)) {
1210 		rt->rt6i_nexthop = __neigh_lookup_errno(&nd_tbl, &rt->rt6i_gateway, dev);
1211 		if (IS_ERR(rt->rt6i_nexthop)) {
1212 			err = PTR_ERR(rt->rt6i_nexthop);
1213 			rt->rt6i_nexthop = NULL;
1214 			goto out;
1215 		}
1216 	}
1217 
1218 	rt->rt6i_flags = cfg->fc_flags;
1219 
1220 install_route:
1221 	if (cfg->fc_mx) {
1222 		struct nlattr *nla;
1223 		int remaining;
1224 
1225 		nla_for_each_attr(nla, cfg->fc_mx, cfg->fc_mx_len, remaining) {
1226 			int type = nla->nla_type;
1227 
1228 			if (type) {
1229 				if (type > RTAX_MAX) {
1230 					err = -EINVAL;
1231 					goto out;
1232 				}
1233 
1234 				rt->u.dst.metrics[type - 1] = nla_get_u32(nla);
1235 			}
1236 		}
1237 	}
1238 
1239 	if (rt->u.dst.metrics[RTAX_HOPLIMIT-1] == 0)
1240 		rt->u.dst.metrics[RTAX_HOPLIMIT-1] = -1;
1241 	if (!rt->u.dst.metrics[RTAX_MTU-1])
1242 		rt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(dev);
1243 	if (!rt->u.dst.metrics[RTAX_ADVMSS-1])
1244 		rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(dst_mtu(&rt->u.dst));
1245 	rt->u.dst.dev = dev;
1246 	rt->rt6i_idev = idev;
1247 	rt->rt6i_table = table;
1248 	return __ip6_ins_rt(rt, &cfg->fc_nlinfo);
1249 
1250 out:
1251 	if (dev)
1252 		dev_put(dev);
1253 	if (idev)
1254 		in6_dev_put(idev);
1255 	if (rt)
1256 		dst_free(&rt->u.dst);
1257 	return err;
1258 }
1259 
1260 static int __ip6_del_rt(struct rt6_info *rt, struct nl_info *info)
1261 {
1262 	int err;
1263 	struct fib6_table *table;
1264 
1265 	if (rt == &ip6_null_entry)
1266 		return -ENOENT;
1267 
1268 	table = rt->rt6i_table;
1269 	write_lock_bh(&table->tb6_lock);
1270 
1271 	err = fib6_del(rt, info);
1272 	dst_release(&rt->u.dst);
1273 
1274 	write_unlock_bh(&table->tb6_lock);
1275 
1276 	return err;
1277 }
1278 
1279 int ip6_del_rt(struct rt6_info *rt)
1280 {
1281 	return __ip6_del_rt(rt, NULL);
1282 }
1283 
1284 static int ip6_route_del(struct fib6_config *cfg)
1285 {
1286 	struct fib6_table *table;
1287 	struct fib6_node *fn;
1288 	struct rt6_info *rt;
1289 	int err = -ESRCH;
1290 
1291 	table = fib6_get_table(cfg->fc_table);
1292 	if (table == NULL)
1293 		return err;
1294 
1295 	read_lock_bh(&table->tb6_lock);
1296 
1297 	fn = fib6_locate(&table->tb6_root,
1298 			 &cfg->fc_dst, cfg->fc_dst_len,
1299 			 &cfg->fc_src, cfg->fc_src_len);
1300 
1301 	if (fn) {
1302 		for (rt = fn->leaf; rt; rt = rt->u.dst.rt6_next) {
1303 			if (cfg->fc_ifindex &&
1304 			    (rt->rt6i_dev == NULL ||
1305 			     rt->rt6i_dev->ifindex != cfg->fc_ifindex))
1306 				continue;
1307 			if (cfg->fc_flags & RTF_GATEWAY &&
1308 			    !ipv6_addr_equal(&cfg->fc_gateway, &rt->rt6i_gateway))
1309 				continue;
1310 			if (cfg->fc_metric && cfg->fc_metric != rt->rt6i_metric)
1311 				continue;
1312 			dst_hold(&rt->u.dst);
1313 			read_unlock_bh(&table->tb6_lock);
1314 
1315 			return __ip6_del_rt(rt, &cfg->fc_nlinfo);
1316 		}
1317 	}
1318 	read_unlock_bh(&table->tb6_lock);
1319 
1320 	return err;
1321 }
1322 
1323 /*
1324  *	Handle redirects
1325  */
1326 struct ip6rd_flowi {
1327 	struct flowi fl;
1328 	struct in6_addr gateway;
1329 };
1330 
1331 static struct rt6_info *__ip6_route_redirect(struct fib6_table *table,
1332 					     struct flowi *fl,
1333 					     int flags)
1334 {
1335 	struct ip6rd_flowi *rdfl = (struct ip6rd_flowi *)fl;
1336 	struct rt6_info *rt;
1337 	struct fib6_node *fn;
1338 
1339 	/*
1340 	 * Get the "current" route for this destination and
1341 	 * check if the redirect has come from approriate router.
1342 	 *
1343 	 * RFC 2461 specifies that redirects should only be
1344 	 * accepted if they come from the nexthop to the target.
1345 	 * Due to the way the routes are chosen, this notion
1346 	 * is a bit fuzzy and one might need to check all possible
1347 	 * routes.
1348 	 */
1349 
1350 	read_lock_bh(&table->tb6_lock);
1351 	fn = fib6_lookup(&table->tb6_root, &fl->fl6_dst, &fl->fl6_src);
1352 restart:
1353 	for (rt = fn->leaf; rt; rt = rt->u.dst.rt6_next) {
1354 		/*
1355 		 * Current route is on-link; redirect is always invalid.
1356 		 *
1357 		 * Seems, previous statement is not true. It could
1358 		 * be node, which looks for us as on-link (f.e. proxy ndisc)
1359 		 * But then router serving it might decide, that we should
1360 		 * know truth 8)8) --ANK (980726).
1361 		 */
1362 		if (rt6_check_expired(rt))
1363 			continue;
1364 		if (!(rt->rt6i_flags & RTF_GATEWAY))
1365 			continue;
1366 		if (fl->oif != rt->rt6i_dev->ifindex)
1367 			continue;
1368 		if (!ipv6_addr_equal(&rdfl->gateway, &rt->rt6i_gateway))
1369 			continue;
1370 		break;
1371 	}
1372 
1373 	if (!rt)
1374 		rt = &ip6_null_entry;
1375 	BACKTRACK(&fl->fl6_src);
1376 out:
1377 	dst_hold(&rt->u.dst);
1378 
1379 	read_unlock_bh(&table->tb6_lock);
1380 
1381 	return rt;
1382 };
1383 
1384 static struct rt6_info *ip6_route_redirect(struct in6_addr *dest,
1385 					   struct in6_addr *src,
1386 					   struct in6_addr *gateway,
1387 					   struct net_device *dev)
1388 {
1389 	int flags = RT6_LOOKUP_F_HAS_SADDR;
1390 	struct ip6rd_flowi rdfl = {
1391 		.fl = {
1392 			.oif = dev->ifindex,
1393 			.nl_u = {
1394 				.ip6_u = {
1395 					.daddr = *dest,
1396 					.saddr = *src,
1397 				},
1398 			},
1399 		},
1400 		.gateway = *gateway,
1401 	};
1402 
1403 	if (rt6_need_strict(dest))
1404 		flags |= RT6_LOOKUP_F_IFACE;
1405 
1406 	return (struct rt6_info *)fib6_rule_lookup((struct flowi *)&rdfl, flags, __ip6_route_redirect);
1407 }
1408 
1409 void rt6_redirect(struct in6_addr *dest, struct in6_addr *src,
1410 		  struct in6_addr *saddr,
1411 		  struct neighbour *neigh, u8 *lladdr, int on_link)
1412 {
1413 	struct rt6_info *rt, *nrt = NULL;
1414 	struct netevent_redirect netevent;
1415 
1416 	rt = ip6_route_redirect(dest, src, saddr, neigh->dev);
1417 
1418 	if (rt == &ip6_null_entry) {
1419 		if (net_ratelimit())
1420 			printk(KERN_DEBUG "rt6_redirect: source isn't a valid nexthop "
1421 			       "for redirect target\n");
1422 		goto out;
1423 	}
1424 
1425 	/*
1426 	 *	We have finally decided to accept it.
1427 	 */
1428 
1429 	neigh_update(neigh, lladdr, NUD_STALE,
1430 		     NEIGH_UPDATE_F_WEAK_OVERRIDE|
1431 		     NEIGH_UPDATE_F_OVERRIDE|
1432 		     (on_link ? 0 : (NEIGH_UPDATE_F_OVERRIDE_ISROUTER|
1433 				     NEIGH_UPDATE_F_ISROUTER))
1434 		     );
1435 
1436 	/*
1437 	 * Redirect received -> path was valid.
1438 	 * Look, redirects are sent only in response to data packets,
1439 	 * so that this nexthop apparently is reachable. --ANK
1440 	 */
1441 	dst_confirm(&rt->u.dst);
1442 
1443 	/* Duplicate redirect: silently ignore. */
1444 	if (neigh == rt->u.dst.neighbour)
1445 		goto out;
1446 
1447 	nrt = ip6_rt_copy(rt);
1448 	if (nrt == NULL)
1449 		goto out;
1450 
1451 	nrt->rt6i_flags = RTF_GATEWAY|RTF_UP|RTF_DYNAMIC|RTF_CACHE;
1452 	if (on_link)
1453 		nrt->rt6i_flags &= ~RTF_GATEWAY;
1454 
1455 	ipv6_addr_copy(&nrt->rt6i_dst.addr, dest);
1456 	nrt->rt6i_dst.plen = 128;
1457 	nrt->u.dst.flags |= DST_HOST;
1458 
1459 	ipv6_addr_copy(&nrt->rt6i_gateway, (struct in6_addr*)neigh->primary_key);
1460 	nrt->rt6i_nexthop = neigh_clone(neigh);
1461 	/* Reset pmtu, it may be better */
1462 	nrt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(neigh->dev);
1463 	nrt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(dst_mtu(&nrt->u.dst));
1464 
1465 	if (ip6_ins_rt(nrt))
1466 		goto out;
1467 
1468 	netevent.old = &rt->u.dst;
1469 	netevent.new = &nrt->u.dst;
1470 	call_netevent_notifiers(NETEVENT_REDIRECT, &netevent);
1471 
1472 	if (rt->rt6i_flags&RTF_CACHE) {
1473 		ip6_del_rt(rt);
1474 		return;
1475 	}
1476 
1477 out:
1478 	dst_release(&rt->u.dst);
1479 	return;
1480 }
1481 
1482 /*
1483  *	Handle ICMP "packet too big" messages
1484  *	i.e. Path MTU discovery
1485  */
1486 
1487 void rt6_pmtu_discovery(struct in6_addr *daddr, struct in6_addr *saddr,
1488 			struct net_device *dev, u32 pmtu)
1489 {
1490 	struct rt6_info *rt, *nrt;
1491 	int allfrag = 0;
1492 
1493 	rt = rt6_lookup(daddr, saddr, dev->ifindex, 0);
1494 	if (rt == NULL)
1495 		return;
1496 
1497 	if (pmtu >= dst_mtu(&rt->u.dst))
1498 		goto out;
1499 
1500 	if (pmtu < IPV6_MIN_MTU) {
1501 		/*
1502 		 * According to RFC2460, PMTU is set to the IPv6 Minimum Link
1503 		 * MTU (1280) and a fragment header should always be included
1504 		 * after a node receiving Too Big message reporting PMTU is
1505 		 * less than the IPv6 Minimum Link MTU.
1506 		 */
1507 		pmtu = IPV6_MIN_MTU;
1508 		allfrag = 1;
1509 	}
1510 
1511 	/* New mtu received -> path was valid.
1512 	   They are sent only in response to data packets,
1513 	   so that this nexthop apparently is reachable. --ANK
1514 	 */
1515 	dst_confirm(&rt->u.dst);
1516 
1517 	/* Host route. If it is static, it would be better
1518 	   not to override it, but add new one, so that
1519 	   when cache entry will expire old pmtu
1520 	   would return automatically.
1521 	 */
1522 	if (rt->rt6i_flags & RTF_CACHE) {
1523 		rt->u.dst.metrics[RTAX_MTU-1] = pmtu;
1524 		if (allfrag)
1525 			rt->u.dst.metrics[RTAX_FEATURES-1] |= RTAX_FEATURE_ALLFRAG;
1526 		dst_set_expires(&rt->u.dst, ip6_rt_mtu_expires);
1527 		rt->rt6i_flags |= RTF_MODIFIED|RTF_EXPIRES;
1528 		goto out;
1529 	}
1530 
1531 	/* Network route.
1532 	   Two cases are possible:
1533 	   1. It is connected route. Action: COW
1534 	   2. It is gatewayed route or NONEXTHOP route. Action: clone it.
1535 	 */
1536 	if (!rt->rt6i_nexthop && !(rt->rt6i_flags & RTF_NONEXTHOP))
1537 		nrt = rt6_alloc_cow(rt, daddr, saddr);
1538 	else
1539 		nrt = rt6_alloc_clone(rt, daddr);
1540 
1541 	if (nrt) {
1542 		nrt->u.dst.metrics[RTAX_MTU-1] = pmtu;
1543 		if (allfrag)
1544 			nrt->u.dst.metrics[RTAX_FEATURES-1] |= RTAX_FEATURE_ALLFRAG;
1545 
1546 		/* According to RFC 1981, detecting PMTU increase shouldn't be
1547 		 * happened within 5 mins, the recommended timer is 10 mins.
1548 		 * Here this route expiration time is set to ip6_rt_mtu_expires
1549 		 * which is 10 mins. After 10 mins the decreased pmtu is expired
1550 		 * and detecting PMTU increase will be automatically happened.
1551 		 */
1552 		dst_set_expires(&nrt->u.dst, ip6_rt_mtu_expires);
1553 		nrt->rt6i_flags |= RTF_DYNAMIC|RTF_EXPIRES;
1554 
1555 		ip6_ins_rt(nrt);
1556 	}
1557 out:
1558 	dst_release(&rt->u.dst);
1559 }
1560 
1561 /*
1562  *	Misc support functions
1563  */
1564 
1565 static struct rt6_info * ip6_rt_copy(struct rt6_info *ort)
1566 {
1567 	struct rt6_info *rt = ip6_dst_alloc();
1568 
1569 	if (rt) {
1570 		rt->u.dst.input = ort->u.dst.input;
1571 		rt->u.dst.output = ort->u.dst.output;
1572 
1573 		memcpy(rt->u.dst.metrics, ort->u.dst.metrics, RTAX_MAX*sizeof(u32));
1574 		rt->u.dst.error = ort->u.dst.error;
1575 		rt->u.dst.dev = ort->u.dst.dev;
1576 		if (rt->u.dst.dev)
1577 			dev_hold(rt->u.dst.dev);
1578 		rt->rt6i_idev = ort->rt6i_idev;
1579 		if (rt->rt6i_idev)
1580 			in6_dev_hold(rt->rt6i_idev);
1581 		rt->u.dst.lastuse = jiffies;
1582 		rt->rt6i_expires = 0;
1583 
1584 		ipv6_addr_copy(&rt->rt6i_gateway, &ort->rt6i_gateway);
1585 		rt->rt6i_flags = ort->rt6i_flags & ~RTF_EXPIRES;
1586 		rt->rt6i_metric = 0;
1587 
1588 		memcpy(&rt->rt6i_dst, &ort->rt6i_dst, sizeof(struct rt6key));
1589 #ifdef CONFIG_IPV6_SUBTREES
1590 		memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
1591 #endif
1592 		rt->rt6i_table = ort->rt6i_table;
1593 	}
1594 	return rt;
1595 }
1596 
1597 #ifdef CONFIG_IPV6_ROUTE_INFO
1598 static struct rt6_info *rt6_get_route_info(struct in6_addr *prefix, int prefixlen,
1599 					   struct in6_addr *gwaddr, int ifindex)
1600 {
1601 	struct fib6_node *fn;
1602 	struct rt6_info *rt = NULL;
1603 	struct fib6_table *table;
1604 
1605 	table = fib6_get_table(RT6_TABLE_INFO);
1606 	if (table == NULL)
1607 		return NULL;
1608 
1609 	write_lock_bh(&table->tb6_lock);
1610 	fn = fib6_locate(&table->tb6_root, prefix ,prefixlen, NULL, 0);
1611 	if (!fn)
1612 		goto out;
1613 
1614 	for (rt = fn->leaf; rt; rt = rt->u.dst.rt6_next) {
1615 		if (rt->rt6i_dev->ifindex != ifindex)
1616 			continue;
1617 		if ((rt->rt6i_flags & (RTF_ROUTEINFO|RTF_GATEWAY)) != (RTF_ROUTEINFO|RTF_GATEWAY))
1618 			continue;
1619 		if (!ipv6_addr_equal(&rt->rt6i_gateway, gwaddr))
1620 			continue;
1621 		dst_hold(&rt->u.dst);
1622 		break;
1623 	}
1624 out:
1625 	write_unlock_bh(&table->tb6_lock);
1626 	return rt;
1627 }
1628 
1629 static struct rt6_info *rt6_add_route_info(struct in6_addr *prefix, int prefixlen,
1630 					   struct in6_addr *gwaddr, int ifindex,
1631 					   unsigned pref)
1632 {
1633 	struct fib6_config cfg = {
1634 		.fc_table	= RT6_TABLE_INFO,
1635 		.fc_metric	= 1024,
1636 		.fc_ifindex	= ifindex,
1637 		.fc_dst_len	= prefixlen,
1638 		.fc_flags	= RTF_GATEWAY | RTF_ADDRCONF | RTF_ROUTEINFO |
1639 				  RTF_UP | RTF_PREF(pref),
1640 	};
1641 
1642 	ipv6_addr_copy(&cfg.fc_dst, prefix);
1643 	ipv6_addr_copy(&cfg.fc_gateway, gwaddr);
1644 
1645 	/* We should treat it as a default route if prefix length is 0. */
1646 	if (!prefixlen)
1647 		cfg.fc_flags |= RTF_DEFAULT;
1648 
1649 	ip6_route_add(&cfg);
1650 
1651 	return rt6_get_route_info(prefix, prefixlen, gwaddr, ifindex);
1652 }
1653 #endif
1654 
1655 struct rt6_info *rt6_get_dflt_router(struct in6_addr *addr, struct net_device *dev)
1656 {
1657 	struct rt6_info *rt;
1658 	struct fib6_table *table;
1659 
1660 	table = fib6_get_table(RT6_TABLE_DFLT);
1661 	if (table == NULL)
1662 		return NULL;
1663 
1664 	write_lock_bh(&table->tb6_lock);
1665 	for (rt = table->tb6_root.leaf; rt; rt=rt->u.dst.rt6_next) {
1666 		if (dev == rt->rt6i_dev &&
1667 		    ((rt->rt6i_flags & (RTF_ADDRCONF | RTF_DEFAULT)) == (RTF_ADDRCONF | RTF_DEFAULT)) &&
1668 		    ipv6_addr_equal(&rt->rt6i_gateway, addr))
1669 			break;
1670 	}
1671 	if (rt)
1672 		dst_hold(&rt->u.dst);
1673 	write_unlock_bh(&table->tb6_lock);
1674 	return rt;
1675 }
1676 
1677 struct rt6_info *rt6_add_dflt_router(struct in6_addr *gwaddr,
1678 				     struct net_device *dev,
1679 				     unsigned int pref)
1680 {
1681 	struct fib6_config cfg = {
1682 		.fc_table	= RT6_TABLE_DFLT,
1683 		.fc_metric	= 1024,
1684 		.fc_ifindex	= dev->ifindex,
1685 		.fc_flags	= RTF_GATEWAY | RTF_ADDRCONF | RTF_DEFAULT |
1686 				  RTF_UP | RTF_EXPIRES | RTF_PREF(pref),
1687 	};
1688 
1689 	ipv6_addr_copy(&cfg.fc_gateway, gwaddr);
1690 
1691 	ip6_route_add(&cfg);
1692 
1693 	return rt6_get_dflt_router(gwaddr, dev);
1694 }
1695 
1696 void rt6_purge_dflt_routers(void)
1697 {
1698 	struct rt6_info *rt;
1699 	struct fib6_table *table;
1700 
1701 	/* NOTE: Keep consistent with rt6_get_dflt_router */
1702 	table = fib6_get_table(RT6_TABLE_DFLT);
1703 	if (table == NULL)
1704 		return;
1705 
1706 restart:
1707 	read_lock_bh(&table->tb6_lock);
1708 	for (rt = table->tb6_root.leaf; rt; rt = rt->u.dst.rt6_next) {
1709 		if (rt->rt6i_flags & (RTF_DEFAULT | RTF_ADDRCONF)) {
1710 			dst_hold(&rt->u.dst);
1711 			read_unlock_bh(&table->tb6_lock);
1712 			ip6_del_rt(rt);
1713 			goto restart;
1714 		}
1715 	}
1716 	read_unlock_bh(&table->tb6_lock);
1717 }
1718 
1719 static void rtmsg_to_fib6_config(struct in6_rtmsg *rtmsg,
1720 				 struct fib6_config *cfg)
1721 {
1722 	memset(cfg, 0, sizeof(*cfg));
1723 
1724 	cfg->fc_table = RT6_TABLE_MAIN;
1725 	cfg->fc_ifindex = rtmsg->rtmsg_ifindex;
1726 	cfg->fc_metric = rtmsg->rtmsg_metric;
1727 	cfg->fc_expires = rtmsg->rtmsg_info;
1728 	cfg->fc_dst_len = rtmsg->rtmsg_dst_len;
1729 	cfg->fc_src_len = rtmsg->rtmsg_src_len;
1730 	cfg->fc_flags = rtmsg->rtmsg_flags;
1731 
1732 	ipv6_addr_copy(&cfg->fc_dst, &rtmsg->rtmsg_dst);
1733 	ipv6_addr_copy(&cfg->fc_src, &rtmsg->rtmsg_src);
1734 	ipv6_addr_copy(&cfg->fc_gateway, &rtmsg->rtmsg_gateway);
1735 }
1736 
1737 int ipv6_route_ioctl(unsigned int cmd, void __user *arg)
1738 {
1739 	struct fib6_config cfg;
1740 	struct in6_rtmsg rtmsg;
1741 	int err;
1742 
1743 	switch(cmd) {
1744 	case SIOCADDRT:		/* Add a route */
1745 	case SIOCDELRT:		/* Delete a route */
1746 		if (!capable(CAP_NET_ADMIN))
1747 			return -EPERM;
1748 		err = copy_from_user(&rtmsg, arg,
1749 				     sizeof(struct in6_rtmsg));
1750 		if (err)
1751 			return -EFAULT;
1752 
1753 		rtmsg_to_fib6_config(&rtmsg, &cfg);
1754 
1755 		rtnl_lock();
1756 		switch (cmd) {
1757 		case SIOCADDRT:
1758 			err = ip6_route_add(&cfg);
1759 			break;
1760 		case SIOCDELRT:
1761 			err = ip6_route_del(&cfg);
1762 			break;
1763 		default:
1764 			err = -EINVAL;
1765 		}
1766 		rtnl_unlock();
1767 
1768 		return err;
1769 	};
1770 
1771 	return -EINVAL;
1772 }
1773 
1774 /*
1775  *	Drop the packet on the floor
1776  */
1777 
1778 static inline int ip6_pkt_drop(struct sk_buff *skb, int code)
1779 {
1780 	int type = ipv6_addr_type(&skb->nh.ipv6h->daddr);
1781 	if (type == IPV6_ADDR_ANY || type == IPV6_ADDR_RESERVED)
1782 		IP6_INC_STATS(ip6_dst_idev(skb->dst), IPSTATS_MIB_INADDRERRORS);
1783 
1784 	IP6_INC_STATS(ip6_dst_idev(skb->dst), IPSTATS_MIB_OUTNOROUTES);
1785 	icmpv6_send(skb, ICMPV6_DEST_UNREACH, code, 0, skb->dev);
1786 	kfree_skb(skb);
1787 	return 0;
1788 }
1789 
1790 static int ip6_pkt_discard(struct sk_buff *skb)
1791 {
1792 	return ip6_pkt_drop(skb, ICMPV6_NOROUTE);
1793 }
1794 
1795 static int ip6_pkt_discard_out(struct sk_buff *skb)
1796 {
1797 	skb->dev = skb->dst->dev;
1798 	return ip6_pkt_discard(skb);
1799 }
1800 
1801 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
1802 
1803 static int ip6_pkt_prohibit(struct sk_buff *skb)
1804 {
1805 	return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED);
1806 }
1807 
1808 static int ip6_pkt_prohibit_out(struct sk_buff *skb)
1809 {
1810 	skb->dev = skb->dst->dev;
1811 	return ip6_pkt_prohibit(skb);
1812 }
1813 
1814 static int ip6_pkt_blk_hole(struct sk_buff *skb)
1815 {
1816 	kfree_skb(skb);
1817 	return 0;
1818 }
1819 
1820 #endif
1821 
1822 /*
1823  *	Allocate a dst for local (unicast / anycast) address.
1824  */
1825 
1826 struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev,
1827 				    const struct in6_addr *addr,
1828 				    int anycast)
1829 {
1830 	struct rt6_info *rt = ip6_dst_alloc();
1831 
1832 	if (rt == NULL)
1833 		return ERR_PTR(-ENOMEM);
1834 
1835 	dev_hold(&loopback_dev);
1836 	in6_dev_hold(idev);
1837 
1838 	rt->u.dst.flags = DST_HOST;
1839 	rt->u.dst.input = ip6_input;
1840 	rt->u.dst.output = ip6_output;
1841 	rt->rt6i_dev = &loopback_dev;
1842 	rt->rt6i_idev = idev;
1843 	rt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(rt->rt6i_dev);
1844 	rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(dst_mtu(&rt->u.dst));
1845 	rt->u.dst.metrics[RTAX_HOPLIMIT-1] = -1;
1846 	rt->u.dst.obsolete = -1;
1847 
1848 	rt->rt6i_flags = RTF_UP | RTF_NONEXTHOP;
1849 	if (anycast)
1850 		rt->rt6i_flags |= RTF_ANYCAST;
1851 	else
1852 		rt->rt6i_flags |= RTF_LOCAL;
1853 	rt->rt6i_nexthop = ndisc_get_neigh(rt->rt6i_dev, &rt->rt6i_gateway);
1854 	if (rt->rt6i_nexthop == NULL) {
1855 		dst_free(&rt->u.dst);
1856 		return ERR_PTR(-ENOMEM);
1857 	}
1858 
1859 	ipv6_addr_copy(&rt->rt6i_dst.addr, addr);
1860 	rt->rt6i_dst.plen = 128;
1861 	rt->rt6i_table = fib6_get_table(RT6_TABLE_LOCAL);
1862 
1863 	atomic_set(&rt->u.dst.__refcnt, 1);
1864 
1865 	return rt;
1866 }
1867 
1868 static int fib6_ifdown(struct rt6_info *rt, void *arg)
1869 {
1870 	if (((void*)rt->rt6i_dev == arg || arg == NULL) &&
1871 	    rt != &ip6_null_entry) {
1872 		RT6_TRACE("deleted by ifdown %p\n", rt);
1873 		return -1;
1874 	}
1875 	return 0;
1876 }
1877 
1878 void rt6_ifdown(struct net_device *dev)
1879 {
1880 	fib6_clean_all(fib6_ifdown, 0, dev);
1881 }
1882 
1883 struct rt6_mtu_change_arg
1884 {
1885 	struct net_device *dev;
1886 	unsigned mtu;
1887 };
1888 
1889 static int rt6_mtu_change_route(struct rt6_info *rt, void *p_arg)
1890 {
1891 	struct rt6_mtu_change_arg *arg = (struct rt6_mtu_change_arg *) p_arg;
1892 	struct inet6_dev *idev;
1893 
1894 	/* In IPv6 pmtu discovery is not optional,
1895 	   so that RTAX_MTU lock cannot disable it.
1896 	   We still use this lock to block changes
1897 	   caused by addrconf/ndisc.
1898 	*/
1899 
1900 	idev = __in6_dev_get(arg->dev);
1901 	if (idev == NULL)
1902 		return 0;
1903 
1904 	/* For administrative MTU increase, there is no way to discover
1905 	   IPv6 PMTU increase, so PMTU increase should be updated here.
1906 	   Since RFC 1981 doesn't include administrative MTU increase
1907 	   update PMTU increase is a MUST. (i.e. jumbo frame)
1908 	 */
1909 	/*
1910 	   If new MTU is less than route PMTU, this new MTU will be the
1911 	   lowest MTU in the path, update the route PMTU to reflect PMTU
1912 	   decreases; if new MTU is greater than route PMTU, and the
1913 	   old MTU is the lowest MTU in the path, update the route PMTU
1914 	   to reflect the increase. In this case if the other nodes' MTU
1915 	   also have the lowest MTU, TOO BIG MESSAGE will be lead to
1916 	   PMTU discouvery.
1917 	 */
1918 	if (rt->rt6i_dev == arg->dev &&
1919 	    !dst_metric_locked(&rt->u.dst, RTAX_MTU) &&
1920 	    (dst_mtu(&rt->u.dst) > arg->mtu ||
1921 	     (dst_mtu(&rt->u.dst) < arg->mtu &&
1922 	      dst_mtu(&rt->u.dst) == idev->cnf.mtu6)))
1923 		rt->u.dst.metrics[RTAX_MTU-1] = arg->mtu;
1924 	rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(arg->mtu);
1925 	return 0;
1926 }
1927 
1928 void rt6_mtu_change(struct net_device *dev, unsigned mtu)
1929 {
1930 	struct rt6_mtu_change_arg arg = {
1931 		.dev = dev,
1932 		.mtu = mtu,
1933 	};
1934 
1935 	fib6_clean_all(rt6_mtu_change_route, 0, &arg);
1936 }
1937 
1938 static struct nla_policy rtm_ipv6_policy[RTA_MAX+1] __read_mostly = {
1939 	[RTA_GATEWAY]           = { .len = sizeof(struct in6_addr) },
1940 	[RTA_OIF]               = { .type = NLA_U32 },
1941 	[RTA_IIF]		= { .type = NLA_U32 },
1942 	[RTA_PRIORITY]          = { .type = NLA_U32 },
1943 	[RTA_METRICS]           = { .type = NLA_NESTED },
1944 };
1945 
1946 static int rtm_to_fib6_config(struct sk_buff *skb, struct nlmsghdr *nlh,
1947 			      struct fib6_config *cfg)
1948 {
1949 	struct rtmsg *rtm;
1950 	struct nlattr *tb[RTA_MAX+1];
1951 	int err;
1952 
1953 	err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy);
1954 	if (err < 0)
1955 		goto errout;
1956 
1957 	err = -EINVAL;
1958 	rtm = nlmsg_data(nlh);
1959 	memset(cfg, 0, sizeof(*cfg));
1960 
1961 	cfg->fc_table = rtm->rtm_table;
1962 	cfg->fc_dst_len = rtm->rtm_dst_len;
1963 	cfg->fc_src_len = rtm->rtm_src_len;
1964 	cfg->fc_flags = RTF_UP;
1965 	cfg->fc_protocol = rtm->rtm_protocol;
1966 
1967 	if (rtm->rtm_type == RTN_UNREACHABLE)
1968 		cfg->fc_flags |= RTF_REJECT;
1969 
1970 	cfg->fc_nlinfo.pid = NETLINK_CB(skb).pid;
1971 	cfg->fc_nlinfo.nlh = nlh;
1972 
1973 	if (tb[RTA_GATEWAY]) {
1974 		nla_memcpy(&cfg->fc_gateway, tb[RTA_GATEWAY], 16);
1975 		cfg->fc_flags |= RTF_GATEWAY;
1976 	}
1977 
1978 	if (tb[RTA_DST]) {
1979 		int plen = (rtm->rtm_dst_len + 7) >> 3;
1980 
1981 		if (nla_len(tb[RTA_DST]) < plen)
1982 			goto errout;
1983 
1984 		nla_memcpy(&cfg->fc_dst, tb[RTA_DST], plen);
1985 	}
1986 
1987 	if (tb[RTA_SRC]) {
1988 		int plen = (rtm->rtm_src_len + 7) >> 3;
1989 
1990 		if (nla_len(tb[RTA_SRC]) < plen)
1991 			goto errout;
1992 
1993 		nla_memcpy(&cfg->fc_src, tb[RTA_SRC], plen);
1994 	}
1995 
1996 	if (tb[RTA_OIF])
1997 		cfg->fc_ifindex = nla_get_u32(tb[RTA_OIF]);
1998 
1999 	if (tb[RTA_PRIORITY])
2000 		cfg->fc_metric = nla_get_u32(tb[RTA_PRIORITY]);
2001 
2002 	if (tb[RTA_METRICS]) {
2003 		cfg->fc_mx = nla_data(tb[RTA_METRICS]);
2004 		cfg->fc_mx_len = nla_len(tb[RTA_METRICS]);
2005 	}
2006 
2007 	if (tb[RTA_TABLE])
2008 		cfg->fc_table = nla_get_u32(tb[RTA_TABLE]);
2009 
2010 	err = 0;
2011 errout:
2012 	return err;
2013 }
2014 
2015 int inet6_rtm_delroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
2016 {
2017 	struct fib6_config cfg;
2018 	int err;
2019 
2020 	err = rtm_to_fib6_config(skb, nlh, &cfg);
2021 	if (err < 0)
2022 		return err;
2023 
2024 	return ip6_route_del(&cfg);
2025 }
2026 
2027 int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
2028 {
2029 	struct fib6_config cfg;
2030 	int err;
2031 
2032 	err = rtm_to_fib6_config(skb, nlh, &cfg);
2033 	if (err < 0)
2034 		return err;
2035 
2036 	return ip6_route_add(&cfg);
2037 }
2038 
2039 static inline size_t rt6_nlmsg_size(void)
2040 {
2041 	return NLMSG_ALIGN(sizeof(struct rtmsg))
2042 	       + nla_total_size(16) /* RTA_SRC */
2043 	       + nla_total_size(16) /* RTA_DST */
2044 	       + nla_total_size(16) /* RTA_GATEWAY */
2045 	       + nla_total_size(16) /* RTA_PREFSRC */
2046 	       + nla_total_size(4) /* RTA_TABLE */
2047 	       + nla_total_size(4) /* RTA_IIF */
2048 	       + nla_total_size(4) /* RTA_OIF */
2049 	       + nla_total_size(4) /* RTA_PRIORITY */
2050 	       + RTAX_MAX * nla_total_size(4) /* RTA_METRICS */
2051 	       + nla_total_size(sizeof(struct rta_cacheinfo));
2052 }
2053 
2054 static int rt6_fill_node(struct sk_buff *skb, struct rt6_info *rt,
2055 			 struct in6_addr *dst, struct in6_addr *src,
2056 			 int iif, int type, u32 pid, u32 seq,
2057 			 int prefix, unsigned int flags)
2058 {
2059 	struct rtmsg *rtm;
2060 	struct nlmsghdr *nlh;
2061 	long expires;
2062 	u32 table;
2063 
2064 	if (prefix) {	/* user wants prefix routes only */
2065 		if (!(rt->rt6i_flags & RTF_PREFIX_RT)) {
2066 			/* success since this is not a prefix route */
2067 			return 1;
2068 		}
2069 	}
2070 
2071 	nlh = nlmsg_put(skb, pid, seq, type, sizeof(*rtm), flags);
2072 	if (nlh == NULL)
2073 		return -EMSGSIZE;
2074 
2075 	rtm = nlmsg_data(nlh);
2076 	rtm->rtm_family = AF_INET6;
2077 	rtm->rtm_dst_len = rt->rt6i_dst.plen;
2078 	rtm->rtm_src_len = rt->rt6i_src.plen;
2079 	rtm->rtm_tos = 0;
2080 	if (rt->rt6i_table)
2081 		table = rt->rt6i_table->tb6_id;
2082 	else
2083 		table = RT6_TABLE_UNSPEC;
2084 	rtm->rtm_table = table;
2085 	NLA_PUT_U32(skb, RTA_TABLE, table);
2086 	if (rt->rt6i_flags&RTF_REJECT)
2087 		rtm->rtm_type = RTN_UNREACHABLE;
2088 	else if (rt->rt6i_dev && (rt->rt6i_dev->flags&IFF_LOOPBACK))
2089 		rtm->rtm_type = RTN_LOCAL;
2090 	else
2091 		rtm->rtm_type = RTN_UNICAST;
2092 	rtm->rtm_flags = 0;
2093 	rtm->rtm_scope = RT_SCOPE_UNIVERSE;
2094 	rtm->rtm_protocol = rt->rt6i_protocol;
2095 	if (rt->rt6i_flags&RTF_DYNAMIC)
2096 		rtm->rtm_protocol = RTPROT_REDIRECT;
2097 	else if (rt->rt6i_flags & RTF_ADDRCONF)
2098 		rtm->rtm_protocol = RTPROT_KERNEL;
2099 	else if (rt->rt6i_flags&RTF_DEFAULT)
2100 		rtm->rtm_protocol = RTPROT_RA;
2101 
2102 	if (rt->rt6i_flags&RTF_CACHE)
2103 		rtm->rtm_flags |= RTM_F_CLONED;
2104 
2105 	if (dst) {
2106 		NLA_PUT(skb, RTA_DST, 16, dst);
2107 		rtm->rtm_dst_len = 128;
2108 	} else if (rtm->rtm_dst_len)
2109 		NLA_PUT(skb, RTA_DST, 16, &rt->rt6i_dst.addr);
2110 #ifdef CONFIG_IPV6_SUBTREES
2111 	if (src) {
2112 		NLA_PUT(skb, RTA_SRC, 16, src);
2113 		rtm->rtm_src_len = 128;
2114 	} else if (rtm->rtm_src_len)
2115 		NLA_PUT(skb, RTA_SRC, 16, &rt->rt6i_src.addr);
2116 #endif
2117 	if (iif)
2118 		NLA_PUT_U32(skb, RTA_IIF, iif);
2119 	else if (dst) {
2120 		struct in6_addr saddr_buf;
2121 		if (ipv6_get_saddr(&rt->u.dst, dst, &saddr_buf) == 0)
2122 			NLA_PUT(skb, RTA_PREFSRC, 16, &saddr_buf);
2123 	}
2124 
2125 	if (rtnetlink_put_metrics(skb, rt->u.dst.metrics) < 0)
2126 		goto nla_put_failure;
2127 
2128 	if (rt->u.dst.neighbour)
2129 		NLA_PUT(skb, RTA_GATEWAY, 16, &rt->u.dst.neighbour->primary_key);
2130 
2131 	if (rt->u.dst.dev)
2132 		NLA_PUT_U32(skb, RTA_OIF, rt->rt6i_dev->ifindex);
2133 
2134 	NLA_PUT_U32(skb, RTA_PRIORITY, rt->rt6i_metric);
2135 
2136 	expires = rt->rt6i_expires ? rt->rt6i_expires - jiffies : 0;
2137 	if (rtnl_put_cacheinfo(skb, &rt->u.dst, 0, 0, 0,
2138 			       expires, rt->u.dst.error) < 0)
2139 		goto nla_put_failure;
2140 
2141 	return nlmsg_end(skb, nlh);
2142 
2143 nla_put_failure:
2144 	nlmsg_cancel(skb, nlh);
2145 	return -EMSGSIZE;
2146 }
2147 
2148 int rt6_dump_route(struct rt6_info *rt, void *p_arg)
2149 {
2150 	struct rt6_rtnl_dump_arg *arg = (struct rt6_rtnl_dump_arg *) p_arg;
2151 	int prefix;
2152 
2153 	if (nlmsg_len(arg->cb->nlh) >= sizeof(struct rtmsg)) {
2154 		struct rtmsg *rtm = nlmsg_data(arg->cb->nlh);
2155 		prefix = (rtm->rtm_flags & RTM_F_PREFIX) != 0;
2156 	} else
2157 		prefix = 0;
2158 
2159 	return rt6_fill_node(arg->skb, rt, NULL, NULL, 0, RTM_NEWROUTE,
2160 		     NETLINK_CB(arg->cb->skb).pid, arg->cb->nlh->nlmsg_seq,
2161 		     prefix, NLM_F_MULTI);
2162 }
2163 
2164 int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void *arg)
2165 {
2166 	struct nlattr *tb[RTA_MAX+1];
2167 	struct rt6_info *rt;
2168 	struct sk_buff *skb;
2169 	struct rtmsg *rtm;
2170 	struct flowi fl;
2171 	int err, iif = 0;
2172 
2173 	err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy);
2174 	if (err < 0)
2175 		goto errout;
2176 
2177 	err = -EINVAL;
2178 	memset(&fl, 0, sizeof(fl));
2179 
2180 	if (tb[RTA_SRC]) {
2181 		if (nla_len(tb[RTA_SRC]) < sizeof(struct in6_addr))
2182 			goto errout;
2183 
2184 		ipv6_addr_copy(&fl.fl6_src, nla_data(tb[RTA_SRC]));
2185 	}
2186 
2187 	if (tb[RTA_DST]) {
2188 		if (nla_len(tb[RTA_DST]) < sizeof(struct in6_addr))
2189 			goto errout;
2190 
2191 		ipv6_addr_copy(&fl.fl6_dst, nla_data(tb[RTA_DST]));
2192 	}
2193 
2194 	if (tb[RTA_IIF])
2195 		iif = nla_get_u32(tb[RTA_IIF]);
2196 
2197 	if (tb[RTA_OIF])
2198 		fl.oif = nla_get_u32(tb[RTA_OIF]);
2199 
2200 	if (iif) {
2201 		struct net_device *dev;
2202 		dev = __dev_get_by_index(iif);
2203 		if (!dev) {
2204 			err = -ENODEV;
2205 			goto errout;
2206 		}
2207 	}
2208 
2209 	skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
2210 	if (skb == NULL) {
2211 		err = -ENOBUFS;
2212 		goto errout;
2213 	}
2214 
2215 	/* Reserve room for dummy headers, this skb can pass
2216 	   through good chunk of routing engine.
2217 	 */
2218 	skb->mac.raw = skb->data;
2219 	skb_reserve(skb, MAX_HEADER + sizeof(struct ipv6hdr));
2220 
2221 	rt = (struct rt6_info*) ip6_route_output(NULL, &fl);
2222 	skb->dst = &rt->u.dst;
2223 
2224 	err = rt6_fill_node(skb, rt, &fl.fl6_dst, &fl.fl6_src, iif,
2225 			    RTM_NEWROUTE, NETLINK_CB(in_skb).pid,
2226 			    nlh->nlmsg_seq, 0, 0);
2227 	if (err < 0) {
2228 		kfree_skb(skb);
2229 		goto errout;
2230 	}
2231 
2232 	err = rtnl_unicast(skb, NETLINK_CB(in_skb).pid);
2233 errout:
2234 	return err;
2235 }
2236 
2237 void inet6_rt_notify(int event, struct rt6_info *rt, struct nl_info *info)
2238 {
2239 	struct sk_buff *skb;
2240 	u32 pid = 0, seq = 0;
2241 	struct nlmsghdr *nlh = NULL;
2242 	int err = -ENOBUFS;
2243 
2244 	if (info) {
2245 		pid = info->pid;
2246 		nlh = info->nlh;
2247 		if (nlh)
2248 			seq = nlh->nlmsg_seq;
2249 	}
2250 
2251 	skb = nlmsg_new(rt6_nlmsg_size(), gfp_any());
2252 	if (skb == NULL)
2253 		goto errout;
2254 
2255 	err = rt6_fill_node(skb, rt, NULL, NULL, 0, event, pid, seq, 0, 0);
2256 	if (err < 0) {
2257 		/* -EMSGSIZE implies BUG in rt6_nlmsg_size() */
2258 		WARN_ON(err == -EMSGSIZE);
2259 		kfree_skb(skb);
2260 		goto errout;
2261 	}
2262 	err = rtnl_notify(skb, pid, RTNLGRP_IPV6_ROUTE, nlh, gfp_any());
2263 errout:
2264 	if (err < 0)
2265 		rtnl_set_sk_err(RTNLGRP_IPV6_ROUTE, err);
2266 }
2267 
2268 /*
2269  *	/proc
2270  */
2271 
2272 #ifdef CONFIG_PROC_FS
2273 
2274 #define RT6_INFO_LEN (32 + 4 + 32 + 4 + 32 + 40 + 5 + 1)
2275 
2276 struct rt6_proc_arg
2277 {
2278 	char *buffer;
2279 	int offset;
2280 	int length;
2281 	int skip;
2282 	int len;
2283 };
2284 
2285 static int rt6_info_route(struct rt6_info *rt, void *p_arg)
2286 {
2287 	struct rt6_proc_arg *arg = (struct rt6_proc_arg *) p_arg;
2288 
2289 	if (arg->skip < arg->offset / RT6_INFO_LEN) {
2290 		arg->skip++;
2291 		return 0;
2292 	}
2293 
2294 	if (arg->len >= arg->length)
2295 		return 0;
2296 
2297 	arg->len += sprintf(arg->buffer + arg->len,
2298 			    NIP6_SEQFMT " %02x ",
2299 			    NIP6(rt->rt6i_dst.addr),
2300 			    rt->rt6i_dst.plen);
2301 
2302 #ifdef CONFIG_IPV6_SUBTREES
2303 	arg->len += sprintf(arg->buffer + arg->len,
2304 			    NIP6_SEQFMT " %02x ",
2305 			    NIP6(rt->rt6i_src.addr),
2306 			    rt->rt6i_src.plen);
2307 #else
2308 	arg->len += sprintf(arg->buffer + arg->len,
2309 			    "00000000000000000000000000000000 00 ");
2310 #endif
2311 
2312 	if (rt->rt6i_nexthop) {
2313 		arg->len += sprintf(arg->buffer + arg->len,
2314 				    NIP6_SEQFMT,
2315 				    NIP6(*((struct in6_addr *)rt->rt6i_nexthop->primary_key)));
2316 	} else {
2317 		arg->len += sprintf(arg->buffer + arg->len,
2318 				    "00000000000000000000000000000000");
2319 	}
2320 	arg->len += sprintf(arg->buffer + arg->len,
2321 			    " %08x %08x %08x %08x %8s\n",
2322 			    rt->rt6i_metric, atomic_read(&rt->u.dst.__refcnt),
2323 			    rt->u.dst.__use, rt->rt6i_flags,
2324 			    rt->rt6i_dev ? rt->rt6i_dev->name : "");
2325 	return 0;
2326 }
2327 
2328 static int rt6_proc_info(char *buffer, char **start, off_t offset, int length)
2329 {
2330 	struct rt6_proc_arg arg = {
2331 		.buffer = buffer,
2332 		.offset = offset,
2333 		.length = length,
2334 	};
2335 
2336 	fib6_clean_all(rt6_info_route, 0, &arg);
2337 
2338 	*start = buffer;
2339 	if (offset)
2340 		*start += offset % RT6_INFO_LEN;
2341 
2342 	arg.len -= offset % RT6_INFO_LEN;
2343 
2344 	if (arg.len > length)
2345 		arg.len = length;
2346 	if (arg.len < 0)
2347 		arg.len = 0;
2348 
2349 	return arg.len;
2350 }
2351 
2352 static int rt6_stats_seq_show(struct seq_file *seq, void *v)
2353 {
2354 	seq_printf(seq, "%04x %04x %04x %04x %04x %04x %04x\n",
2355 		      rt6_stats.fib_nodes, rt6_stats.fib_route_nodes,
2356 		      rt6_stats.fib_rt_alloc, rt6_stats.fib_rt_entries,
2357 		      rt6_stats.fib_rt_cache,
2358 		      atomic_read(&ip6_dst_ops.entries),
2359 		      rt6_stats.fib_discarded_routes);
2360 
2361 	return 0;
2362 }
2363 
2364 static int rt6_stats_seq_open(struct inode *inode, struct file *file)
2365 {
2366 	return single_open(file, rt6_stats_seq_show, NULL);
2367 }
2368 
2369 static const struct file_operations rt6_stats_seq_fops = {
2370 	.owner	 = THIS_MODULE,
2371 	.open	 = rt6_stats_seq_open,
2372 	.read	 = seq_read,
2373 	.llseek	 = seq_lseek,
2374 	.release = single_release,
2375 };
2376 #endif	/* CONFIG_PROC_FS */
2377 
2378 #ifdef CONFIG_SYSCTL
2379 
2380 static int flush_delay;
2381 
2382 static
2383 int ipv6_sysctl_rtcache_flush(ctl_table *ctl, int write, struct file * filp,
2384 			      void __user *buffer, size_t *lenp, loff_t *ppos)
2385 {
2386 	if (write) {
2387 		proc_dointvec(ctl, write, filp, buffer, lenp, ppos);
2388 		fib6_run_gc(flush_delay <= 0 ? ~0UL : (unsigned long)flush_delay);
2389 		return 0;
2390 	} else
2391 		return -EINVAL;
2392 }
2393 
2394 ctl_table ipv6_route_table[] = {
2395 	{
2396 		.ctl_name	=	NET_IPV6_ROUTE_FLUSH,
2397 		.procname	=	"flush",
2398 		.data		=	&flush_delay,
2399 		.maxlen		=	sizeof(int),
2400 		.mode		=	0200,
2401 		.proc_handler	=	&ipv6_sysctl_rtcache_flush
2402 	},
2403 	{
2404 		.ctl_name	=	NET_IPV6_ROUTE_GC_THRESH,
2405 		.procname	=	"gc_thresh",
2406 		.data		=	&ip6_dst_ops.gc_thresh,
2407 		.maxlen		=	sizeof(int),
2408 		.mode		=	0644,
2409 		.proc_handler	=	&proc_dointvec,
2410 	},
2411 	{
2412 		.ctl_name	=	NET_IPV6_ROUTE_MAX_SIZE,
2413 		.procname	=	"max_size",
2414 		.data		=	&ip6_rt_max_size,
2415 		.maxlen		=	sizeof(int),
2416 		.mode		=	0644,
2417 		.proc_handler	=	&proc_dointvec,
2418 	},
2419 	{
2420 		.ctl_name	=	NET_IPV6_ROUTE_GC_MIN_INTERVAL,
2421 		.procname	=	"gc_min_interval",
2422 		.data		=	&ip6_rt_gc_min_interval,
2423 		.maxlen		=	sizeof(int),
2424 		.mode		=	0644,
2425 		.proc_handler	=	&proc_dointvec_jiffies,
2426 		.strategy	=	&sysctl_jiffies,
2427 	},
2428 	{
2429 		.ctl_name	=	NET_IPV6_ROUTE_GC_TIMEOUT,
2430 		.procname	=	"gc_timeout",
2431 		.data		=	&ip6_rt_gc_timeout,
2432 		.maxlen		=	sizeof(int),
2433 		.mode		=	0644,
2434 		.proc_handler	=	&proc_dointvec_jiffies,
2435 		.strategy	=	&sysctl_jiffies,
2436 	},
2437 	{
2438 		.ctl_name	=	NET_IPV6_ROUTE_GC_INTERVAL,
2439 		.procname	=	"gc_interval",
2440 		.data		=	&ip6_rt_gc_interval,
2441 		.maxlen		=	sizeof(int),
2442 		.mode		=	0644,
2443 		.proc_handler	=	&proc_dointvec_jiffies,
2444 		.strategy	=	&sysctl_jiffies,
2445 	},
2446 	{
2447 		.ctl_name	=	NET_IPV6_ROUTE_GC_ELASTICITY,
2448 		.procname	=	"gc_elasticity",
2449 		.data		=	&ip6_rt_gc_elasticity,
2450 		.maxlen		=	sizeof(int),
2451 		.mode		=	0644,
2452 		.proc_handler	=	&proc_dointvec_jiffies,
2453 		.strategy	=	&sysctl_jiffies,
2454 	},
2455 	{
2456 		.ctl_name	=	NET_IPV6_ROUTE_MTU_EXPIRES,
2457 		.procname	=	"mtu_expires",
2458 		.data		=	&ip6_rt_mtu_expires,
2459 		.maxlen		=	sizeof(int),
2460 		.mode		=	0644,
2461 		.proc_handler	=	&proc_dointvec_jiffies,
2462 		.strategy	=	&sysctl_jiffies,
2463 	},
2464 	{
2465 		.ctl_name	=	NET_IPV6_ROUTE_MIN_ADVMSS,
2466 		.procname	=	"min_adv_mss",
2467 		.data		=	&ip6_rt_min_advmss,
2468 		.maxlen		=	sizeof(int),
2469 		.mode		=	0644,
2470 		.proc_handler	=	&proc_dointvec_jiffies,
2471 		.strategy	=	&sysctl_jiffies,
2472 	},
2473 	{
2474 		.ctl_name	=	NET_IPV6_ROUTE_GC_MIN_INTERVAL_MS,
2475 		.procname	=	"gc_min_interval_ms",
2476 		.data		=	&ip6_rt_gc_min_interval,
2477 		.maxlen		=	sizeof(int),
2478 		.mode		=	0644,
2479 		.proc_handler	=	&proc_dointvec_ms_jiffies,
2480 		.strategy	=	&sysctl_ms_jiffies,
2481 	},
2482 	{ .ctl_name = 0 }
2483 };
2484 
2485 #endif
2486 
2487 void __init ip6_route_init(void)
2488 {
2489 	struct proc_dir_entry *p;
2490 
2491 	ip6_dst_ops.kmem_cachep =
2492 		kmem_cache_create("ip6_dst_cache", sizeof(struct rt6_info), 0,
2493 				  SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL, NULL);
2494 	fib6_init();
2495 #ifdef 	CONFIG_PROC_FS
2496 	p = proc_net_create("ipv6_route", 0, rt6_proc_info);
2497 	if (p)
2498 		p->owner = THIS_MODULE;
2499 
2500 	proc_net_fops_create("rt6_stats", S_IRUGO, &rt6_stats_seq_fops);
2501 #endif
2502 #ifdef CONFIG_XFRM
2503 	xfrm6_init();
2504 #endif
2505 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
2506 	fib6_rules_init();
2507 #endif
2508 }
2509 
2510 void ip6_route_cleanup(void)
2511 {
2512 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
2513 	fib6_rules_cleanup();
2514 #endif
2515 #ifdef CONFIG_PROC_FS
2516 	proc_net_remove("ipv6_route");
2517 	proc_net_remove("rt6_stats");
2518 #endif
2519 #ifdef CONFIG_XFRM
2520 	xfrm6_fini();
2521 #endif
2522 	rt6_ifdown(NULL);
2523 	fib6_gc_cleanup();
2524 	kmem_cache_destroy(ip6_dst_ops.kmem_cachep);
2525 }
2526