xref: /linux/net/ipv6/route.c (revision a7edd0e676d51145ae634a2acf7a447e319200fa)
1 /*
2  *	Linux INET6 implementation
3  *	FIB front-end.
4  *
5  *	Authors:
6  *	Pedro Roque		<roque@di.fc.ul.pt>
7  *
8  *	$Id: route.c,v 1.56 2001/10/31 21:55:55 davem Exp $
9  *
10  *	This program is free software; you can redistribute it and/or
11  *      modify it under the terms of the GNU General Public License
12  *      as published by the Free Software Foundation; either version
13  *      2 of the License, or (at your option) any later version.
14  */
15 
16 /*	Changes:
17  *
18  *	YOSHIFUJI Hideaki @USAGI
19  *		reworked default router selection.
20  *		- respect outgoing interface
21  *		- select from (probably) reachable routers (i.e.
22  *		routers in REACHABLE, STALE, DELAY or PROBE states).
23  *		- always select the same router if it is (probably)
24  *		reachable.  otherwise, round-robin the list.
25  *	Ville Nuorvala
26  *		Fixed routing subtrees.
27  */
28 
29 #include <linux/capability.h>
30 #include <linux/errno.h>
31 #include <linux/types.h>
32 #include <linux/times.h>
33 #include <linux/socket.h>
34 #include <linux/sockios.h>
35 #include <linux/net.h>
36 #include <linux/route.h>
37 #include <linux/netdevice.h>
38 #include <linux/in6.h>
39 #include <linux/init.h>
40 #include <linux/if_arp.h>
41 
42 #ifdef 	CONFIG_PROC_FS
43 #include <linux/proc_fs.h>
44 #include <linux/seq_file.h>
45 #endif
46 
47 #include <net/snmp.h>
48 #include <net/ipv6.h>
49 #include <net/ip6_fib.h>
50 #include <net/ip6_route.h>
51 #include <net/ndisc.h>
52 #include <net/addrconf.h>
53 #include <net/tcp.h>
54 #include <linux/rtnetlink.h>
55 #include <net/dst.h>
56 #include <net/xfrm.h>
57 #include <net/netevent.h>
58 #include <net/netlink.h>
59 
60 #include <asm/uaccess.h>
61 
62 #ifdef CONFIG_SYSCTL
63 #include <linux/sysctl.h>
64 #endif
65 
66 /* Set to 3 to get tracing. */
67 #define RT6_DEBUG 2
68 
69 #if RT6_DEBUG >= 3
70 #define RDBG(x) printk x
71 #define RT6_TRACE(x...) printk(KERN_DEBUG x)
72 #else
73 #define RDBG(x)
74 #define RT6_TRACE(x...) do { ; } while (0)
75 #endif
76 
77 #define CLONE_OFFLINK_ROUTE 0
78 
79 static int ip6_rt_max_size = 4096;
80 static int ip6_rt_gc_min_interval = HZ / 2;
81 static int ip6_rt_gc_timeout = 60*HZ;
82 int ip6_rt_gc_interval = 30*HZ;
83 static int ip6_rt_gc_elasticity = 9;
84 static int ip6_rt_mtu_expires = 10*60*HZ;
85 static int ip6_rt_min_advmss = IPV6_MIN_MTU - 20 - 40;
86 
87 static struct rt6_info * ip6_rt_copy(struct rt6_info *ort);
88 static struct dst_entry	*ip6_dst_check(struct dst_entry *dst, u32 cookie);
89 static struct dst_entry *ip6_negative_advice(struct dst_entry *);
90 static void		ip6_dst_destroy(struct dst_entry *);
91 static void		ip6_dst_ifdown(struct dst_entry *,
92 				       struct net_device *dev, int how);
93 static int		 ip6_dst_gc(void);
94 
95 static int		ip6_pkt_discard(struct sk_buff *skb);
96 static int		ip6_pkt_discard_out(struct sk_buff *skb);
97 static void		ip6_link_failure(struct sk_buff *skb);
98 static void		ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu);
99 
100 #ifdef CONFIG_IPV6_ROUTE_INFO
101 static struct rt6_info *rt6_add_route_info(struct in6_addr *prefix, int prefixlen,
102 					   struct in6_addr *gwaddr, int ifindex,
103 					   unsigned pref);
104 static struct rt6_info *rt6_get_route_info(struct in6_addr *prefix, int prefixlen,
105 					   struct in6_addr *gwaddr, int ifindex);
106 #endif
107 
108 static struct dst_ops ip6_dst_ops = {
109 	.family			=	AF_INET6,
110 	.protocol		=	__constant_htons(ETH_P_IPV6),
111 	.gc			=	ip6_dst_gc,
112 	.gc_thresh		=	1024,
113 	.check			=	ip6_dst_check,
114 	.destroy		=	ip6_dst_destroy,
115 	.ifdown			=	ip6_dst_ifdown,
116 	.negative_advice	=	ip6_negative_advice,
117 	.link_failure		=	ip6_link_failure,
118 	.update_pmtu		=	ip6_rt_update_pmtu,
119 	.entry_size		=	sizeof(struct rt6_info),
120 };
121 
122 struct rt6_info ip6_null_entry = {
123 	.u = {
124 		.dst = {
125 			.__refcnt	= ATOMIC_INIT(1),
126 			.__use		= 1,
127 			.dev		= &loopback_dev,
128 			.obsolete	= -1,
129 			.error		= -ENETUNREACH,
130 			.metrics	= { [RTAX_HOPLIMIT - 1] = 255, },
131 			.input		= ip6_pkt_discard,
132 			.output		= ip6_pkt_discard_out,
133 			.ops		= &ip6_dst_ops,
134 			.path		= (struct dst_entry*)&ip6_null_entry,
135 		}
136 	},
137 	.rt6i_flags	= (RTF_REJECT | RTF_NONEXTHOP),
138 	.rt6i_metric	= ~(u32) 0,
139 	.rt6i_ref	= ATOMIC_INIT(1),
140 };
141 
142 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
143 
144 static int ip6_pkt_prohibit(struct sk_buff *skb);
145 static int ip6_pkt_prohibit_out(struct sk_buff *skb);
146 static int ip6_pkt_blk_hole(struct sk_buff *skb);
147 
148 struct rt6_info ip6_prohibit_entry = {
149 	.u = {
150 		.dst = {
151 			.__refcnt	= ATOMIC_INIT(1),
152 			.__use		= 1,
153 			.dev		= &loopback_dev,
154 			.obsolete	= -1,
155 			.error		= -EACCES,
156 			.metrics	= { [RTAX_HOPLIMIT - 1] = 255, },
157 			.input		= ip6_pkt_prohibit,
158 			.output		= ip6_pkt_prohibit_out,
159 			.ops		= &ip6_dst_ops,
160 			.path		= (struct dst_entry*)&ip6_prohibit_entry,
161 		}
162 	},
163 	.rt6i_flags	= (RTF_REJECT | RTF_NONEXTHOP),
164 	.rt6i_metric	= ~(u32) 0,
165 	.rt6i_ref	= ATOMIC_INIT(1),
166 };
167 
168 struct rt6_info ip6_blk_hole_entry = {
169 	.u = {
170 		.dst = {
171 			.__refcnt	= ATOMIC_INIT(1),
172 			.__use		= 1,
173 			.dev		= &loopback_dev,
174 			.obsolete	= -1,
175 			.error		= -EINVAL,
176 			.metrics	= { [RTAX_HOPLIMIT - 1] = 255, },
177 			.input		= ip6_pkt_blk_hole,
178 			.output		= ip6_pkt_blk_hole,
179 			.ops		= &ip6_dst_ops,
180 			.path		= (struct dst_entry*)&ip6_blk_hole_entry,
181 		}
182 	},
183 	.rt6i_flags	= (RTF_REJECT | RTF_NONEXTHOP),
184 	.rt6i_metric	= ~(u32) 0,
185 	.rt6i_ref	= ATOMIC_INIT(1),
186 };
187 
188 #endif
189 
190 /* allocate dst with ip6_dst_ops */
191 static __inline__ struct rt6_info *ip6_dst_alloc(void)
192 {
193 	return (struct rt6_info *)dst_alloc(&ip6_dst_ops);
194 }
195 
196 static void ip6_dst_destroy(struct dst_entry *dst)
197 {
198 	struct rt6_info *rt = (struct rt6_info *)dst;
199 	struct inet6_dev *idev = rt->rt6i_idev;
200 
201 	if (idev != NULL) {
202 		rt->rt6i_idev = NULL;
203 		in6_dev_put(idev);
204 	}
205 }
206 
207 static void ip6_dst_ifdown(struct dst_entry *dst, struct net_device *dev,
208 			   int how)
209 {
210 	struct rt6_info *rt = (struct rt6_info *)dst;
211 	struct inet6_dev *idev = rt->rt6i_idev;
212 
213 	if (dev != &loopback_dev && idev != NULL && idev->dev == dev) {
214 		struct inet6_dev *loopback_idev = in6_dev_get(&loopback_dev);
215 		if (loopback_idev != NULL) {
216 			rt->rt6i_idev = loopback_idev;
217 			in6_dev_put(idev);
218 		}
219 	}
220 }
221 
222 static __inline__ int rt6_check_expired(const struct rt6_info *rt)
223 {
224 	return (rt->rt6i_flags & RTF_EXPIRES &&
225 		time_after(jiffies, rt->rt6i_expires));
226 }
227 
228 static inline int rt6_need_strict(struct in6_addr *daddr)
229 {
230 	return (ipv6_addr_type(daddr) &
231 		(IPV6_ADDR_MULTICAST | IPV6_ADDR_LINKLOCAL));
232 }
233 
234 /*
235  *	Route lookup. Any table->tb6_lock is implied.
236  */
237 
238 static __inline__ struct rt6_info *rt6_device_match(struct rt6_info *rt,
239 						    int oif,
240 						    int strict)
241 {
242 	struct rt6_info *local = NULL;
243 	struct rt6_info *sprt;
244 
245 	if (oif) {
246 		for (sprt = rt; sprt; sprt = sprt->u.dst.rt6_next) {
247 			struct net_device *dev = sprt->rt6i_dev;
248 			if (dev->ifindex == oif)
249 				return sprt;
250 			if (dev->flags & IFF_LOOPBACK) {
251 				if (sprt->rt6i_idev == NULL ||
252 				    sprt->rt6i_idev->dev->ifindex != oif) {
253 					if (strict && oif)
254 						continue;
255 					if (local && (!oif ||
256 						      local->rt6i_idev->dev->ifindex == oif))
257 						continue;
258 				}
259 				local = sprt;
260 			}
261 		}
262 
263 		if (local)
264 			return local;
265 
266 		if (strict)
267 			return &ip6_null_entry;
268 	}
269 	return rt;
270 }
271 
272 #ifdef CONFIG_IPV6_ROUTER_PREF
273 static void rt6_probe(struct rt6_info *rt)
274 {
275 	struct neighbour *neigh = rt ? rt->rt6i_nexthop : NULL;
276 	/*
277 	 * Okay, this does not seem to be appropriate
278 	 * for now, however, we need to check if it
279 	 * is really so; aka Router Reachability Probing.
280 	 *
281 	 * Router Reachability Probe MUST be rate-limited
282 	 * to no more than one per minute.
283 	 */
284 	if (!neigh || (neigh->nud_state & NUD_VALID))
285 		return;
286 	read_lock_bh(&neigh->lock);
287 	if (!(neigh->nud_state & NUD_VALID) &&
288 	    time_after(jiffies, neigh->updated + rt->rt6i_idev->cnf.rtr_probe_interval)) {
289 		struct in6_addr mcaddr;
290 		struct in6_addr *target;
291 
292 		neigh->updated = jiffies;
293 		read_unlock_bh(&neigh->lock);
294 
295 		target = (struct in6_addr *)&neigh->primary_key;
296 		addrconf_addr_solict_mult(target, &mcaddr);
297 		ndisc_send_ns(rt->rt6i_dev, NULL, target, &mcaddr, NULL);
298 	} else
299 		read_unlock_bh(&neigh->lock);
300 }
301 #else
302 static inline void rt6_probe(struct rt6_info *rt)
303 {
304 	return;
305 }
306 #endif
307 
308 /*
309  * Default Router Selection (RFC 2461 6.3.6)
310  */
311 static inline int rt6_check_dev(struct rt6_info *rt, int oif)
312 {
313 	struct net_device *dev = rt->rt6i_dev;
314 	if (!oif || dev->ifindex == oif)
315 		return 2;
316 	if ((dev->flags & IFF_LOOPBACK) &&
317 	    rt->rt6i_idev && rt->rt6i_idev->dev->ifindex == oif)
318 		return 1;
319 	return 0;
320 }
321 
322 static inline int rt6_check_neigh(struct rt6_info *rt)
323 {
324 	struct neighbour *neigh = rt->rt6i_nexthop;
325 	int m = 0;
326 	if (rt->rt6i_flags & RTF_NONEXTHOP ||
327 	    !(rt->rt6i_flags & RTF_GATEWAY))
328 		m = 1;
329 	else if (neigh) {
330 		read_lock_bh(&neigh->lock);
331 		if (neigh->nud_state & NUD_VALID)
332 			m = 2;
333 		else if (!(neigh->nud_state & NUD_FAILED))
334 			m = 1;
335 		read_unlock_bh(&neigh->lock);
336 	}
337 	return m;
338 }
339 
340 static int rt6_score_route(struct rt6_info *rt, int oif,
341 			   int strict)
342 {
343 	int m, n;
344 
345 	m = rt6_check_dev(rt, oif);
346 	if (!m && (strict & RT6_LOOKUP_F_IFACE))
347 		return -1;
348 #ifdef CONFIG_IPV6_ROUTER_PREF
349 	m |= IPV6_DECODE_PREF(IPV6_EXTRACT_PREF(rt->rt6i_flags)) << 2;
350 #endif
351 	n = rt6_check_neigh(rt);
352 	if (!n && (strict & RT6_LOOKUP_F_REACHABLE))
353 		return -1;
354 	return m;
355 }
356 
357 static struct rt6_info *find_match(struct rt6_info *rt, int oif, int strict,
358 				   int *mpri, struct rt6_info *match)
359 {
360 	int m;
361 
362 	if (rt6_check_expired(rt))
363 		goto out;
364 
365 	m = rt6_score_route(rt, oif, strict);
366 	if (m < 0)
367 		goto out;
368 
369 	if (m > *mpri) {
370 		if (strict & RT6_LOOKUP_F_REACHABLE)
371 			rt6_probe(match);
372 		*mpri = m;
373 		match = rt;
374 	} else if (strict & RT6_LOOKUP_F_REACHABLE) {
375 		rt6_probe(rt);
376 	}
377 
378 out:
379 	return match;
380 }
381 
382 static struct rt6_info *find_rr_leaf(struct fib6_node *fn,
383 				     struct rt6_info *rr_head,
384 				     u32 metric, int oif, int strict)
385 {
386 	struct rt6_info *rt, *match;
387 	int mpri = -1;
388 
389 	match = NULL;
390 	for (rt = rr_head; rt && rt->rt6i_metric == metric;
391 	     rt = rt->u.dst.rt6_next)
392 		match = find_match(rt, oif, strict, &mpri, match);
393 	for (rt = fn->leaf; rt && rt != rr_head && rt->rt6i_metric == metric;
394 	     rt = rt->u.dst.rt6_next)
395 		match = find_match(rt, oif, strict, &mpri, match);
396 
397 	return match;
398 }
399 
400 static struct rt6_info *rt6_select(struct fib6_node *fn, int oif, int strict)
401 {
402 	struct rt6_info *match, *rt0;
403 
404 	RT6_TRACE("%s(fn->leaf=%p, oif=%d)\n",
405 		  __FUNCTION__, fn->leaf, oif);
406 
407 	rt0 = fn->rr_ptr;
408 	if (!rt0)
409 		fn->rr_ptr = rt0 = fn->leaf;
410 
411 	match = find_rr_leaf(fn, rt0, rt0->rt6i_metric, oif, strict);
412 
413 	if (!match &&
414 	    (strict & RT6_LOOKUP_F_REACHABLE)) {
415 		struct rt6_info *next = rt0->u.dst.rt6_next;
416 
417 		/* no entries matched; do round-robin */
418 		if (!next || next->rt6i_metric != rt0->rt6i_metric)
419 			next = fn->leaf;
420 
421 		if (next != rt0)
422 			fn->rr_ptr = next;
423 	}
424 
425 	RT6_TRACE("%s() => %p\n",
426 		  __FUNCTION__, match);
427 
428 	return (match ? match : &ip6_null_entry);
429 }
430 
431 #ifdef CONFIG_IPV6_ROUTE_INFO
432 int rt6_route_rcv(struct net_device *dev, u8 *opt, int len,
433 		  struct in6_addr *gwaddr)
434 {
435 	struct route_info *rinfo = (struct route_info *) opt;
436 	struct in6_addr prefix_buf, *prefix;
437 	unsigned int pref;
438 	u32 lifetime;
439 	struct rt6_info *rt;
440 
441 	if (len < sizeof(struct route_info)) {
442 		return -EINVAL;
443 	}
444 
445 	/* Sanity check for prefix_len and length */
446 	if (rinfo->length > 3) {
447 		return -EINVAL;
448 	} else if (rinfo->prefix_len > 128) {
449 		return -EINVAL;
450 	} else if (rinfo->prefix_len > 64) {
451 		if (rinfo->length < 2) {
452 			return -EINVAL;
453 		}
454 	} else if (rinfo->prefix_len > 0) {
455 		if (rinfo->length < 1) {
456 			return -EINVAL;
457 		}
458 	}
459 
460 	pref = rinfo->route_pref;
461 	if (pref == ICMPV6_ROUTER_PREF_INVALID)
462 		pref = ICMPV6_ROUTER_PREF_MEDIUM;
463 
464 	lifetime = ntohl(rinfo->lifetime);
465 	if (lifetime == 0xffffffff) {
466 		/* infinity */
467 	} else if (lifetime > 0x7fffffff/HZ) {
468 		/* Avoid arithmetic overflow */
469 		lifetime = 0x7fffffff/HZ - 1;
470 	}
471 
472 	if (rinfo->length == 3)
473 		prefix = (struct in6_addr *)rinfo->prefix;
474 	else {
475 		/* this function is safe */
476 		ipv6_addr_prefix(&prefix_buf,
477 				 (struct in6_addr *)rinfo->prefix,
478 				 rinfo->prefix_len);
479 		prefix = &prefix_buf;
480 	}
481 
482 	rt = rt6_get_route_info(prefix, rinfo->prefix_len, gwaddr, dev->ifindex);
483 
484 	if (rt && !lifetime) {
485 		ip6_del_rt(rt);
486 		rt = NULL;
487 	}
488 
489 	if (!rt && lifetime)
490 		rt = rt6_add_route_info(prefix, rinfo->prefix_len, gwaddr, dev->ifindex,
491 					pref);
492 	else if (rt)
493 		rt->rt6i_flags = RTF_ROUTEINFO |
494 				 (rt->rt6i_flags & ~RTF_PREF_MASK) | RTF_PREF(pref);
495 
496 	if (rt) {
497 		if (lifetime == 0xffffffff) {
498 			rt->rt6i_flags &= ~RTF_EXPIRES;
499 		} else {
500 			rt->rt6i_expires = jiffies + HZ * lifetime;
501 			rt->rt6i_flags |= RTF_EXPIRES;
502 		}
503 		dst_release(&rt->u.dst);
504 	}
505 	return 0;
506 }
507 #endif
508 
509 #define BACKTRACK(saddr) \
510 do { \
511 	if (rt == &ip6_null_entry) { \
512 		struct fib6_node *pn; \
513 		while (1) { \
514 			if (fn->fn_flags & RTN_TL_ROOT) \
515 				goto out; \
516 			pn = fn->parent; \
517 			if (FIB6_SUBTREE(pn) && FIB6_SUBTREE(pn) != fn) \
518 				fn = fib6_lookup(FIB6_SUBTREE(pn), NULL, saddr); \
519 			else \
520 				fn = pn; \
521 			if (fn->fn_flags & RTN_RTINFO) \
522 				goto restart; \
523 		} \
524 	} \
525 } while(0)
526 
527 static struct rt6_info *ip6_pol_route_lookup(struct fib6_table *table,
528 					     struct flowi *fl, int flags)
529 {
530 	struct fib6_node *fn;
531 	struct rt6_info *rt;
532 
533 	read_lock_bh(&table->tb6_lock);
534 	fn = fib6_lookup(&table->tb6_root, &fl->fl6_dst, &fl->fl6_src);
535 restart:
536 	rt = fn->leaf;
537 	rt = rt6_device_match(rt, fl->oif, flags);
538 	BACKTRACK(&fl->fl6_src);
539 out:
540 	dst_hold(&rt->u.dst);
541 	read_unlock_bh(&table->tb6_lock);
542 
543 	rt->u.dst.lastuse = jiffies;
544 	rt->u.dst.__use++;
545 
546 	return rt;
547 
548 }
549 
550 struct rt6_info *rt6_lookup(struct in6_addr *daddr, struct in6_addr *saddr,
551 			    int oif, int strict)
552 {
553 	struct flowi fl = {
554 		.oif = oif,
555 		.nl_u = {
556 			.ip6_u = {
557 				.daddr = *daddr,
558 			},
559 		},
560 	};
561 	struct dst_entry *dst;
562 	int flags = strict ? RT6_LOOKUP_F_IFACE : 0;
563 
564 	if (saddr) {
565 		memcpy(&fl.fl6_src, saddr, sizeof(*saddr));
566 		flags |= RT6_LOOKUP_F_HAS_SADDR;
567 	}
568 
569 	dst = fib6_rule_lookup(&fl, flags, ip6_pol_route_lookup);
570 	if (dst->error == 0)
571 		return (struct rt6_info *) dst;
572 
573 	dst_release(dst);
574 
575 	return NULL;
576 }
577 
578 /* ip6_ins_rt is called with FREE table->tb6_lock.
579    It takes new route entry, the addition fails by any reason the
580    route is freed. In any case, if caller does not hold it, it may
581    be destroyed.
582  */
583 
584 static int __ip6_ins_rt(struct rt6_info *rt, struct nl_info *info)
585 {
586 	int err;
587 	struct fib6_table *table;
588 
589 	table = rt->rt6i_table;
590 	write_lock_bh(&table->tb6_lock);
591 	err = fib6_add(&table->tb6_root, rt, info);
592 	write_unlock_bh(&table->tb6_lock);
593 
594 	return err;
595 }
596 
597 int ip6_ins_rt(struct rt6_info *rt)
598 {
599 	return __ip6_ins_rt(rt, NULL);
600 }
601 
602 static struct rt6_info *rt6_alloc_cow(struct rt6_info *ort, struct in6_addr *daddr,
603 				      struct in6_addr *saddr)
604 {
605 	struct rt6_info *rt;
606 
607 	/*
608 	 *	Clone the route.
609 	 */
610 
611 	rt = ip6_rt_copy(ort);
612 
613 	if (rt) {
614 		if (!(rt->rt6i_flags&RTF_GATEWAY)) {
615 			if (rt->rt6i_dst.plen != 128 &&
616 			    ipv6_addr_equal(&rt->rt6i_dst.addr, daddr))
617 				rt->rt6i_flags |= RTF_ANYCAST;
618 			ipv6_addr_copy(&rt->rt6i_gateway, daddr);
619 		}
620 
621 		ipv6_addr_copy(&rt->rt6i_dst.addr, daddr);
622 		rt->rt6i_dst.plen = 128;
623 		rt->rt6i_flags |= RTF_CACHE;
624 		rt->u.dst.flags |= DST_HOST;
625 
626 #ifdef CONFIG_IPV6_SUBTREES
627 		if (rt->rt6i_src.plen && saddr) {
628 			ipv6_addr_copy(&rt->rt6i_src.addr, saddr);
629 			rt->rt6i_src.plen = 128;
630 		}
631 #endif
632 
633 		rt->rt6i_nexthop = ndisc_get_neigh(rt->rt6i_dev, &rt->rt6i_gateway);
634 
635 	}
636 
637 	return rt;
638 }
639 
640 static struct rt6_info *rt6_alloc_clone(struct rt6_info *ort, struct in6_addr *daddr)
641 {
642 	struct rt6_info *rt = ip6_rt_copy(ort);
643 	if (rt) {
644 		ipv6_addr_copy(&rt->rt6i_dst.addr, daddr);
645 		rt->rt6i_dst.plen = 128;
646 		rt->rt6i_flags |= RTF_CACHE;
647 		rt->u.dst.flags |= DST_HOST;
648 		rt->rt6i_nexthop = neigh_clone(ort->rt6i_nexthop);
649 	}
650 	return rt;
651 }
652 
653 static struct rt6_info *ip6_pol_route_input(struct fib6_table *table,
654 					    struct flowi *fl, int flags)
655 {
656 	struct fib6_node *fn;
657 	struct rt6_info *rt, *nrt;
658 	int strict = 0;
659 	int attempts = 3;
660 	int err;
661 	int reachable = ipv6_devconf.forwarding ? 0 : RT6_LOOKUP_F_REACHABLE;
662 
663 	strict |= flags & RT6_LOOKUP_F_IFACE;
664 
665 relookup:
666 	read_lock_bh(&table->tb6_lock);
667 
668 restart_2:
669 	fn = fib6_lookup(&table->tb6_root, &fl->fl6_dst, &fl->fl6_src);
670 
671 restart:
672 	rt = rt6_select(fn, fl->iif, strict | reachable);
673 	BACKTRACK(&fl->fl6_src);
674 	if (rt == &ip6_null_entry ||
675 	    rt->rt6i_flags & RTF_CACHE)
676 		goto out;
677 
678 	dst_hold(&rt->u.dst);
679 	read_unlock_bh(&table->tb6_lock);
680 
681 	if (!rt->rt6i_nexthop && !(rt->rt6i_flags & RTF_NONEXTHOP))
682 		nrt = rt6_alloc_cow(rt, &fl->fl6_dst, &fl->fl6_src);
683 	else {
684 #if CLONE_OFFLINK_ROUTE
685 		nrt = rt6_alloc_clone(rt, &fl->fl6_dst);
686 #else
687 		goto out2;
688 #endif
689 	}
690 
691 	dst_release(&rt->u.dst);
692 	rt = nrt ? : &ip6_null_entry;
693 
694 	dst_hold(&rt->u.dst);
695 	if (nrt) {
696 		err = ip6_ins_rt(nrt);
697 		if (!err)
698 			goto out2;
699 	}
700 
701 	if (--attempts <= 0)
702 		goto out2;
703 
704 	/*
705 	 * Race condition! In the gap, when table->tb6_lock was
706 	 * released someone could insert this route.  Relookup.
707 	 */
708 	dst_release(&rt->u.dst);
709 	goto relookup;
710 
711 out:
712 	if (reachable) {
713 		reachable = 0;
714 		goto restart_2;
715 	}
716 	dst_hold(&rt->u.dst);
717 	read_unlock_bh(&table->tb6_lock);
718 out2:
719 	rt->u.dst.lastuse = jiffies;
720 	rt->u.dst.__use++;
721 
722 	return rt;
723 }
724 
725 void ip6_route_input(struct sk_buff *skb)
726 {
727 	struct ipv6hdr *iph = skb->nh.ipv6h;
728 	int flags = RT6_LOOKUP_F_HAS_SADDR;
729 	struct flowi fl = {
730 		.iif = skb->dev->ifindex,
731 		.nl_u = {
732 			.ip6_u = {
733 				.daddr = iph->daddr,
734 				.saddr = iph->saddr,
735 				.flowlabel = (* (__be32 *) iph)&IPV6_FLOWINFO_MASK,
736 			},
737 		},
738 		.mark = skb->mark,
739 		.proto = iph->nexthdr,
740 	};
741 
742 	if (rt6_need_strict(&iph->daddr))
743 		flags |= RT6_LOOKUP_F_IFACE;
744 
745 	skb->dst = fib6_rule_lookup(&fl, flags, ip6_pol_route_input);
746 }
747 
748 static struct rt6_info *ip6_pol_route_output(struct fib6_table *table,
749 					     struct flowi *fl, int flags)
750 {
751 	struct fib6_node *fn;
752 	struct rt6_info *rt, *nrt;
753 	int strict = 0;
754 	int attempts = 3;
755 	int err;
756 	int reachable = ipv6_devconf.forwarding ? 0 : RT6_LOOKUP_F_REACHABLE;
757 
758 	strict |= flags & RT6_LOOKUP_F_IFACE;
759 
760 relookup:
761 	read_lock_bh(&table->tb6_lock);
762 
763 restart_2:
764 	fn = fib6_lookup(&table->tb6_root, &fl->fl6_dst, &fl->fl6_src);
765 
766 restart:
767 	rt = rt6_select(fn, fl->oif, strict | reachable);
768 	BACKTRACK(&fl->fl6_src);
769 	if (rt == &ip6_null_entry ||
770 	    rt->rt6i_flags & RTF_CACHE)
771 		goto out;
772 
773 	dst_hold(&rt->u.dst);
774 	read_unlock_bh(&table->tb6_lock);
775 
776 	if (!rt->rt6i_nexthop && !(rt->rt6i_flags & RTF_NONEXTHOP))
777 		nrt = rt6_alloc_cow(rt, &fl->fl6_dst, &fl->fl6_src);
778 	else {
779 #if CLONE_OFFLINK_ROUTE
780 		nrt = rt6_alloc_clone(rt, &fl->fl6_dst);
781 #else
782 		goto out2;
783 #endif
784 	}
785 
786 	dst_release(&rt->u.dst);
787 	rt = nrt ? : &ip6_null_entry;
788 
789 	dst_hold(&rt->u.dst);
790 	if (nrt) {
791 		err = ip6_ins_rt(nrt);
792 		if (!err)
793 			goto out2;
794 	}
795 
796 	if (--attempts <= 0)
797 		goto out2;
798 
799 	/*
800 	 * Race condition! In the gap, when table->tb6_lock was
801 	 * released someone could insert this route.  Relookup.
802 	 */
803 	dst_release(&rt->u.dst);
804 	goto relookup;
805 
806 out:
807 	if (reachable) {
808 		reachable = 0;
809 		goto restart_2;
810 	}
811 	dst_hold(&rt->u.dst);
812 	read_unlock_bh(&table->tb6_lock);
813 out2:
814 	rt->u.dst.lastuse = jiffies;
815 	rt->u.dst.__use++;
816 	return rt;
817 }
818 
819 struct dst_entry * ip6_route_output(struct sock *sk, struct flowi *fl)
820 {
821 	int flags = 0;
822 
823 	if (rt6_need_strict(&fl->fl6_dst))
824 		flags |= RT6_LOOKUP_F_IFACE;
825 
826 	if (!ipv6_addr_any(&fl->fl6_src))
827 		flags |= RT6_LOOKUP_F_HAS_SADDR;
828 
829 	return fib6_rule_lookup(fl, flags, ip6_pol_route_output);
830 }
831 
832 
833 /*
834  *	Destination cache support functions
835  */
836 
837 static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie)
838 {
839 	struct rt6_info *rt;
840 
841 	rt = (struct rt6_info *) dst;
842 
843 	if (rt && rt->rt6i_node && (rt->rt6i_node->fn_sernum == cookie))
844 		return dst;
845 
846 	return NULL;
847 }
848 
849 static struct dst_entry *ip6_negative_advice(struct dst_entry *dst)
850 {
851 	struct rt6_info *rt = (struct rt6_info *) dst;
852 
853 	if (rt) {
854 		if (rt->rt6i_flags & RTF_CACHE)
855 			ip6_del_rt(rt);
856 		else
857 			dst_release(dst);
858 	}
859 	return NULL;
860 }
861 
862 static void ip6_link_failure(struct sk_buff *skb)
863 {
864 	struct rt6_info *rt;
865 
866 	icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_ADDR_UNREACH, 0, skb->dev);
867 
868 	rt = (struct rt6_info *) skb->dst;
869 	if (rt) {
870 		if (rt->rt6i_flags&RTF_CACHE) {
871 			dst_set_expires(&rt->u.dst, 0);
872 			rt->rt6i_flags |= RTF_EXPIRES;
873 		} else if (rt->rt6i_node && (rt->rt6i_flags & RTF_DEFAULT))
874 			rt->rt6i_node->fn_sernum = -1;
875 	}
876 }
877 
878 static void ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu)
879 {
880 	struct rt6_info *rt6 = (struct rt6_info*)dst;
881 
882 	if (mtu < dst_mtu(dst) && rt6->rt6i_dst.plen == 128) {
883 		rt6->rt6i_flags |= RTF_MODIFIED;
884 		if (mtu < IPV6_MIN_MTU) {
885 			mtu = IPV6_MIN_MTU;
886 			dst->metrics[RTAX_FEATURES-1] |= RTAX_FEATURE_ALLFRAG;
887 		}
888 		dst->metrics[RTAX_MTU-1] = mtu;
889 		call_netevent_notifiers(NETEVENT_PMTU_UPDATE, dst);
890 	}
891 }
892 
893 static int ipv6_get_mtu(struct net_device *dev);
894 
895 static inline unsigned int ipv6_advmss(unsigned int mtu)
896 {
897 	mtu -= sizeof(struct ipv6hdr) + sizeof(struct tcphdr);
898 
899 	if (mtu < ip6_rt_min_advmss)
900 		mtu = ip6_rt_min_advmss;
901 
902 	/*
903 	 * Maximal non-jumbo IPv6 payload is IPV6_MAXPLEN and
904 	 * corresponding MSS is IPV6_MAXPLEN - tcp_header_size.
905 	 * IPV6_MAXPLEN is also valid and means: "any MSS,
906 	 * rely only on pmtu discovery"
907 	 */
908 	if (mtu > IPV6_MAXPLEN - sizeof(struct tcphdr))
909 		mtu = IPV6_MAXPLEN;
910 	return mtu;
911 }
912 
913 static struct dst_entry *ndisc_dst_gc_list;
914 static DEFINE_SPINLOCK(ndisc_lock);
915 
916 struct dst_entry *ndisc_dst_alloc(struct net_device *dev,
917 				  struct neighbour *neigh,
918 				  struct in6_addr *addr,
919 				  int (*output)(struct sk_buff *))
920 {
921 	struct rt6_info *rt;
922 	struct inet6_dev *idev = in6_dev_get(dev);
923 
924 	if (unlikely(idev == NULL))
925 		return NULL;
926 
927 	rt = ip6_dst_alloc();
928 	if (unlikely(rt == NULL)) {
929 		in6_dev_put(idev);
930 		goto out;
931 	}
932 
933 	dev_hold(dev);
934 	if (neigh)
935 		neigh_hold(neigh);
936 	else
937 		neigh = ndisc_get_neigh(dev, addr);
938 
939 	rt->rt6i_dev	  = dev;
940 	rt->rt6i_idev     = idev;
941 	rt->rt6i_nexthop  = neigh;
942 	atomic_set(&rt->u.dst.__refcnt, 1);
943 	rt->u.dst.metrics[RTAX_HOPLIMIT-1] = 255;
944 	rt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(rt->rt6i_dev);
945 	rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(dst_mtu(&rt->u.dst));
946 	rt->u.dst.output  = output;
947 
948 #if 0	/* there's no chance to use these for ndisc */
949 	rt->u.dst.flags   = ipv6_addr_type(addr) & IPV6_ADDR_UNICAST
950 				? DST_HOST
951 				: 0;
952 	ipv6_addr_copy(&rt->rt6i_dst.addr, addr);
953 	rt->rt6i_dst.plen = 128;
954 #endif
955 
956 	spin_lock_bh(&ndisc_lock);
957 	rt->u.dst.next = ndisc_dst_gc_list;
958 	ndisc_dst_gc_list = &rt->u.dst;
959 	spin_unlock_bh(&ndisc_lock);
960 
961 	fib6_force_start_gc();
962 
963 out:
964 	return &rt->u.dst;
965 }
966 
967 int ndisc_dst_gc(int *more)
968 {
969 	struct dst_entry *dst, *next, **pprev;
970 	int freed;
971 
972 	next = NULL;
973 	freed = 0;
974 
975 	spin_lock_bh(&ndisc_lock);
976 	pprev = &ndisc_dst_gc_list;
977 
978 	while ((dst = *pprev) != NULL) {
979 		if (!atomic_read(&dst->__refcnt)) {
980 			*pprev = dst->next;
981 			dst_free(dst);
982 			freed++;
983 		} else {
984 			pprev = &dst->next;
985 			(*more)++;
986 		}
987 	}
988 
989 	spin_unlock_bh(&ndisc_lock);
990 
991 	return freed;
992 }
993 
994 static int ip6_dst_gc(void)
995 {
996 	static unsigned expire = 30*HZ;
997 	static unsigned long last_gc;
998 	unsigned long now = jiffies;
999 
1000 	if (time_after(last_gc + ip6_rt_gc_min_interval, now) &&
1001 	    atomic_read(&ip6_dst_ops.entries) <= ip6_rt_max_size)
1002 		goto out;
1003 
1004 	expire++;
1005 	fib6_run_gc(expire);
1006 	last_gc = now;
1007 	if (atomic_read(&ip6_dst_ops.entries) < ip6_dst_ops.gc_thresh)
1008 		expire = ip6_rt_gc_timeout>>1;
1009 
1010 out:
1011 	expire -= expire>>ip6_rt_gc_elasticity;
1012 	return (atomic_read(&ip6_dst_ops.entries) > ip6_rt_max_size);
1013 }
1014 
1015 /* Clean host part of a prefix. Not necessary in radix tree,
1016    but results in cleaner routing tables.
1017 
1018    Remove it only when all the things will work!
1019  */
1020 
1021 static int ipv6_get_mtu(struct net_device *dev)
1022 {
1023 	int mtu = IPV6_MIN_MTU;
1024 	struct inet6_dev *idev;
1025 
1026 	idev = in6_dev_get(dev);
1027 	if (idev) {
1028 		mtu = idev->cnf.mtu6;
1029 		in6_dev_put(idev);
1030 	}
1031 	return mtu;
1032 }
1033 
1034 int ipv6_get_hoplimit(struct net_device *dev)
1035 {
1036 	int hoplimit = ipv6_devconf.hop_limit;
1037 	struct inet6_dev *idev;
1038 
1039 	idev = in6_dev_get(dev);
1040 	if (idev) {
1041 		hoplimit = idev->cnf.hop_limit;
1042 		in6_dev_put(idev);
1043 	}
1044 	return hoplimit;
1045 }
1046 
1047 /*
1048  *
1049  */
1050 
1051 int ip6_route_add(struct fib6_config *cfg)
1052 {
1053 	int err;
1054 	struct rt6_info *rt = NULL;
1055 	struct net_device *dev = NULL;
1056 	struct inet6_dev *idev = NULL;
1057 	struct fib6_table *table;
1058 	int addr_type;
1059 
1060 	if (cfg->fc_dst_len > 128 || cfg->fc_src_len > 128)
1061 		return -EINVAL;
1062 #ifndef CONFIG_IPV6_SUBTREES
1063 	if (cfg->fc_src_len)
1064 		return -EINVAL;
1065 #endif
1066 	if (cfg->fc_ifindex) {
1067 		err = -ENODEV;
1068 		dev = dev_get_by_index(cfg->fc_ifindex);
1069 		if (!dev)
1070 			goto out;
1071 		idev = in6_dev_get(dev);
1072 		if (!idev)
1073 			goto out;
1074 	}
1075 
1076 	if (cfg->fc_metric == 0)
1077 		cfg->fc_metric = IP6_RT_PRIO_USER;
1078 
1079 	table = fib6_new_table(cfg->fc_table);
1080 	if (table == NULL) {
1081 		err = -ENOBUFS;
1082 		goto out;
1083 	}
1084 
1085 	rt = ip6_dst_alloc();
1086 
1087 	if (rt == NULL) {
1088 		err = -ENOMEM;
1089 		goto out;
1090 	}
1091 
1092 	rt->u.dst.obsolete = -1;
1093 	rt->rt6i_expires = jiffies + clock_t_to_jiffies(cfg->fc_expires);
1094 
1095 	if (cfg->fc_protocol == RTPROT_UNSPEC)
1096 		cfg->fc_protocol = RTPROT_BOOT;
1097 	rt->rt6i_protocol = cfg->fc_protocol;
1098 
1099 	addr_type = ipv6_addr_type(&cfg->fc_dst);
1100 
1101 	if (addr_type & IPV6_ADDR_MULTICAST)
1102 		rt->u.dst.input = ip6_mc_input;
1103 	else
1104 		rt->u.dst.input = ip6_forward;
1105 
1106 	rt->u.dst.output = ip6_output;
1107 
1108 	ipv6_addr_prefix(&rt->rt6i_dst.addr, &cfg->fc_dst, cfg->fc_dst_len);
1109 	rt->rt6i_dst.plen = cfg->fc_dst_len;
1110 	if (rt->rt6i_dst.plen == 128)
1111 	       rt->u.dst.flags = DST_HOST;
1112 
1113 #ifdef CONFIG_IPV6_SUBTREES
1114 	ipv6_addr_prefix(&rt->rt6i_src.addr, &cfg->fc_src, cfg->fc_src_len);
1115 	rt->rt6i_src.plen = cfg->fc_src_len;
1116 #endif
1117 
1118 	rt->rt6i_metric = cfg->fc_metric;
1119 
1120 	/* We cannot add true routes via loopback here,
1121 	   they would result in kernel looping; promote them to reject routes
1122 	 */
1123 	if ((cfg->fc_flags & RTF_REJECT) ||
1124 	    (dev && (dev->flags&IFF_LOOPBACK) && !(addr_type&IPV6_ADDR_LOOPBACK))) {
1125 		/* hold loopback dev/idev if we haven't done so. */
1126 		if (dev != &loopback_dev) {
1127 			if (dev) {
1128 				dev_put(dev);
1129 				in6_dev_put(idev);
1130 			}
1131 			dev = &loopback_dev;
1132 			dev_hold(dev);
1133 			idev = in6_dev_get(dev);
1134 			if (!idev) {
1135 				err = -ENODEV;
1136 				goto out;
1137 			}
1138 		}
1139 		rt->u.dst.output = ip6_pkt_discard_out;
1140 		rt->u.dst.input = ip6_pkt_discard;
1141 		rt->u.dst.error = -ENETUNREACH;
1142 		rt->rt6i_flags = RTF_REJECT|RTF_NONEXTHOP;
1143 		goto install_route;
1144 	}
1145 
1146 	if (cfg->fc_flags & RTF_GATEWAY) {
1147 		struct in6_addr *gw_addr;
1148 		int gwa_type;
1149 
1150 		gw_addr = &cfg->fc_gateway;
1151 		ipv6_addr_copy(&rt->rt6i_gateway, gw_addr);
1152 		gwa_type = ipv6_addr_type(gw_addr);
1153 
1154 		if (gwa_type != (IPV6_ADDR_LINKLOCAL|IPV6_ADDR_UNICAST)) {
1155 			struct rt6_info *grt;
1156 
1157 			/* IPv6 strictly inhibits using not link-local
1158 			   addresses as nexthop address.
1159 			   Otherwise, router will not able to send redirects.
1160 			   It is very good, but in some (rare!) circumstances
1161 			   (SIT, PtP, NBMA NOARP links) it is handy to allow
1162 			   some exceptions. --ANK
1163 			 */
1164 			err = -EINVAL;
1165 			if (!(gwa_type&IPV6_ADDR_UNICAST))
1166 				goto out;
1167 
1168 			grt = rt6_lookup(gw_addr, NULL, cfg->fc_ifindex, 1);
1169 
1170 			err = -EHOSTUNREACH;
1171 			if (grt == NULL)
1172 				goto out;
1173 			if (dev) {
1174 				if (dev != grt->rt6i_dev) {
1175 					dst_release(&grt->u.dst);
1176 					goto out;
1177 				}
1178 			} else {
1179 				dev = grt->rt6i_dev;
1180 				idev = grt->rt6i_idev;
1181 				dev_hold(dev);
1182 				in6_dev_hold(grt->rt6i_idev);
1183 			}
1184 			if (!(grt->rt6i_flags&RTF_GATEWAY))
1185 				err = 0;
1186 			dst_release(&grt->u.dst);
1187 
1188 			if (err)
1189 				goto out;
1190 		}
1191 		err = -EINVAL;
1192 		if (dev == NULL || (dev->flags&IFF_LOOPBACK))
1193 			goto out;
1194 	}
1195 
1196 	err = -ENODEV;
1197 	if (dev == NULL)
1198 		goto out;
1199 
1200 	if (cfg->fc_flags & (RTF_GATEWAY | RTF_NONEXTHOP)) {
1201 		rt->rt6i_nexthop = __neigh_lookup_errno(&nd_tbl, &rt->rt6i_gateway, dev);
1202 		if (IS_ERR(rt->rt6i_nexthop)) {
1203 			err = PTR_ERR(rt->rt6i_nexthop);
1204 			rt->rt6i_nexthop = NULL;
1205 			goto out;
1206 		}
1207 	}
1208 
1209 	rt->rt6i_flags = cfg->fc_flags;
1210 
1211 install_route:
1212 	if (cfg->fc_mx) {
1213 		struct nlattr *nla;
1214 		int remaining;
1215 
1216 		nla_for_each_attr(nla, cfg->fc_mx, cfg->fc_mx_len, remaining) {
1217 			int type = nla->nla_type;
1218 
1219 			if (type) {
1220 				if (type > RTAX_MAX) {
1221 					err = -EINVAL;
1222 					goto out;
1223 				}
1224 
1225 				rt->u.dst.metrics[type - 1] = nla_get_u32(nla);
1226 			}
1227 		}
1228 	}
1229 
1230 	if (rt->u.dst.metrics[RTAX_HOPLIMIT-1] == 0)
1231 		rt->u.dst.metrics[RTAX_HOPLIMIT-1] = -1;
1232 	if (!rt->u.dst.metrics[RTAX_MTU-1])
1233 		rt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(dev);
1234 	if (!rt->u.dst.metrics[RTAX_ADVMSS-1])
1235 		rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(dst_mtu(&rt->u.dst));
1236 	rt->u.dst.dev = dev;
1237 	rt->rt6i_idev = idev;
1238 	rt->rt6i_table = table;
1239 	return __ip6_ins_rt(rt, &cfg->fc_nlinfo);
1240 
1241 out:
1242 	if (dev)
1243 		dev_put(dev);
1244 	if (idev)
1245 		in6_dev_put(idev);
1246 	if (rt)
1247 		dst_free(&rt->u.dst);
1248 	return err;
1249 }
1250 
1251 static int __ip6_del_rt(struct rt6_info *rt, struct nl_info *info)
1252 {
1253 	int err;
1254 	struct fib6_table *table;
1255 
1256 	if (rt == &ip6_null_entry)
1257 		return -ENOENT;
1258 
1259 	table = rt->rt6i_table;
1260 	write_lock_bh(&table->tb6_lock);
1261 
1262 	err = fib6_del(rt, info);
1263 	dst_release(&rt->u.dst);
1264 
1265 	write_unlock_bh(&table->tb6_lock);
1266 
1267 	return err;
1268 }
1269 
1270 int ip6_del_rt(struct rt6_info *rt)
1271 {
1272 	return __ip6_del_rt(rt, NULL);
1273 }
1274 
1275 static int ip6_route_del(struct fib6_config *cfg)
1276 {
1277 	struct fib6_table *table;
1278 	struct fib6_node *fn;
1279 	struct rt6_info *rt;
1280 	int err = -ESRCH;
1281 
1282 	table = fib6_get_table(cfg->fc_table);
1283 	if (table == NULL)
1284 		return err;
1285 
1286 	read_lock_bh(&table->tb6_lock);
1287 
1288 	fn = fib6_locate(&table->tb6_root,
1289 			 &cfg->fc_dst, cfg->fc_dst_len,
1290 			 &cfg->fc_src, cfg->fc_src_len);
1291 
1292 	if (fn) {
1293 		for (rt = fn->leaf; rt; rt = rt->u.dst.rt6_next) {
1294 			if (cfg->fc_ifindex &&
1295 			    (rt->rt6i_dev == NULL ||
1296 			     rt->rt6i_dev->ifindex != cfg->fc_ifindex))
1297 				continue;
1298 			if (cfg->fc_flags & RTF_GATEWAY &&
1299 			    !ipv6_addr_equal(&cfg->fc_gateway, &rt->rt6i_gateway))
1300 				continue;
1301 			if (cfg->fc_metric && cfg->fc_metric != rt->rt6i_metric)
1302 				continue;
1303 			dst_hold(&rt->u.dst);
1304 			read_unlock_bh(&table->tb6_lock);
1305 
1306 			return __ip6_del_rt(rt, &cfg->fc_nlinfo);
1307 		}
1308 	}
1309 	read_unlock_bh(&table->tb6_lock);
1310 
1311 	return err;
1312 }
1313 
1314 /*
1315  *	Handle redirects
1316  */
1317 struct ip6rd_flowi {
1318 	struct flowi fl;
1319 	struct in6_addr gateway;
1320 };
1321 
1322 static struct rt6_info *__ip6_route_redirect(struct fib6_table *table,
1323 					     struct flowi *fl,
1324 					     int flags)
1325 {
1326 	struct ip6rd_flowi *rdfl = (struct ip6rd_flowi *)fl;
1327 	struct rt6_info *rt;
1328 	struct fib6_node *fn;
1329 
1330 	/*
1331 	 * Get the "current" route for this destination and
1332 	 * check if the redirect has come from approriate router.
1333 	 *
1334 	 * RFC 2461 specifies that redirects should only be
1335 	 * accepted if they come from the nexthop to the target.
1336 	 * Due to the way the routes are chosen, this notion
1337 	 * is a bit fuzzy and one might need to check all possible
1338 	 * routes.
1339 	 */
1340 
1341 	read_lock_bh(&table->tb6_lock);
1342 	fn = fib6_lookup(&table->tb6_root, &fl->fl6_dst, &fl->fl6_src);
1343 restart:
1344 	for (rt = fn->leaf; rt; rt = rt->u.dst.rt6_next) {
1345 		/*
1346 		 * Current route is on-link; redirect is always invalid.
1347 		 *
1348 		 * Seems, previous statement is not true. It could
1349 		 * be node, which looks for us as on-link (f.e. proxy ndisc)
1350 		 * But then router serving it might decide, that we should
1351 		 * know truth 8)8) --ANK (980726).
1352 		 */
1353 		if (rt6_check_expired(rt))
1354 			continue;
1355 		if (!(rt->rt6i_flags & RTF_GATEWAY))
1356 			continue;
1357 		if (fl->oif != rt->rt6i_dev->ifindex)
1358 			continue;
1359 		if (!ipv6_addr_equal(&rdfl->gateway, &rt->rt6i_gateway))
1360 			continue;
1361 		break;
1362 	}
1363 
1364 	if (!rt)
1365 		rt = &ip6_null_entry;
1366 	BACKTRACK(&fl->fl6_src);
1367 out:
1368 	dst_hold(&rt->u.dst);
1369 
1370 	read_unlock_bh(&table->tb6_lock);
1371 
1372 	return rt;
1373 };
1374 
1375 static struct rt6_info *ip6_route_redirect(struct in6_addr *dest,
1376 					   struct in6_addr *src,
1377 					   struct in6_addr *gateway,
1378 					   struct net_device *dev)
1379 {
1380 	int flags = RT6_LOOKUP_F_HAS_SADDR;
1381 	struct ip6rd_flowi rdfl = {
1382 		.fl = {
1383 			.oif = dev->ifindex,
1384 			.nl_u = {
1385 				.ip6_u = {
1386 					.daddr = *dest,
1387 					.saddr = *src,
1388 				},
1389 			},
1390 		},
1391 		.gateway = *gateway,
1392 	};
1393 
1394 	if (rt6_need_strict(dest))
1395 		flags |= RT6_LOOKUP_F_IFACE;
1396 
1397 	return (struct rt6_info *)fib6_rule_lookup((struct flowi *)&rdfl, flags, __ip6_route_redirect);
1398 }
1399 
1400 void rt6_redirect(struct in6_addr *dest, struct in6_addr *src,
1401 		  struct in6_addr *saddr,
1402 		  struct neighbour *neigh, u8 *lladdr, int on_link)
1403 {
1404 	struct rt6_info *rt, *nrt = NULL;
1405 	struct netevent_redirect netevent;
1406 
1407 	rt = ip6_route_redirect(dest, src, saddr, neigh->dev);
1408 
1409 	if (rt == &ip6_null_entry) {
1410 		if (net_ratelimit())
1411 			printk(KERN_DEBUG "rt6_redirect: source isn't a valid nexthop "
1412 			       "for redirect target\n");
1413 		goto out;
1414 	}
1415 
1416 	/*
1417 	 *	We have finally decided to accept it.
1418 	 */
1419 
1420 	neigh_update(neigh, lladdr, NUD_STALE,
1421 		     NEIGH_UPDATE_F_WEAK_OVERRIDE|
1422 		     NEIGH_UPDATE_F_OVERRIDE|
1423 		     (on_link ? 0 : (NEIGH_UPDATE_F_OVERRIDE_ISROUTER|
1424 				     NEIGH_UPDATE_F_ISROUTER))
1425 		     );
1426 
1427 	/*
1428 	 * Redirect received -> path was valid.
1429 	 * Look, redirects are sent only in response to data packets,
1430 	 * so that this nexthop apparently is reachable. --ANK
1431 	 */
1432 	dst_confirm(&rt->u.dst);
1433 
1434 	/* Duplicate redirect: silently ignore. */
1435 	if (neigh == rt->u.dst.neighbour)
1436 		goto out;
1437 
1438 	nrt = ip6_rt_copy(rt);
1439 	if (nrt == NULL)
1440 		goto out;
1441 
1442 	nrt->rt6i_flags = RTF_GATEWAY|RTF_UP|RTF_DYNAMIC|RTF_CACHE;
1443 	if (on_link)
1444 		nrt->rt6i_flags &= ~RTF_GATEWAY;
1445 
1446 	ipv6_addr_copy(&nrt->rt6i_dst.addr, dest);
1447 	nrt->rt6i_dst.plen = 128;
1448 	nrt->u.dst.flags |= DST_HOST;
1449 
1450 	ipv6_addr_copy(&nrt->rt6i_gateway, (struct in6_addr*)neigh->primary_key);
1451 	nrt->rt6i_nexthop = neigh_clone(neigh);
1452 	/* Reset pmtu, it may be better */
1453 	nrt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(neigh->dev);
1454 	nrt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(dst_mtu(&nrt->u.dst));
1455 
1456 	if (ip6_ins_rt(nrt))
1457 		goto out;
1458 
1459 	netevent.old = &rt->u.dst;
1460 	netevent.new = &nrt->u.dst;
1461 	call_netevent_notifiers(NETEVENT_REDIRECT, &netevent);
1462 
1463 	if (rt->rt6i_flags&RTF_CACHE) {
1464 		ip6_del_rt(rt);
1465 		return;
1466 	}
1467 
1468 out:
1469 	dst_release(&rt->u.dst);
1470 	return;
1471 }
1472 
1473 /*
1474  *	Handle ICMP "packet too big" messages
1475  *	i.e. Path MTU discovery
1476  */
1477 
1478 void rt6_pmtu_discovery(struct in6_addr *daddr, struct in6_addr *saddr,
1479 			struct net_device *dev, u32 pmtu)
1480 {
1481 	struct rt6_info *rt, *nrt;
1482 	int allfrag = 0;
1483 
1484 	rt = rt6_lookup(daddr, saddr, dev->ifindex, 0);
1485 	if (rt == NULL)
1486 		return;
1487 
1488 	if (pmtu >= dst_mtu(&rt->u.dst))
1489 		goto out;
1490 
1491 	if (pmtu < IPV6_MIN_MTU) {
1492 		/*
1493 		 * According to RFC2460, PMTU is set to the IPv6 Minimum Link
1494 		 * MTU (1280) and a fragment header should always be included
1495 		 * after a node receiving Too Big message reporting PMTU is
1496 		 * less than the IPv6 Minimum Link MTU.
1497 		 */
1498 		pmtu = IPV6_MIN_MTU;
1499 		allfrag = 1;
1500 	}
1501 
1502 	/* New mtu received -> path was valid.
1503 	   They are sent only in response to data packets,
1504 	   so that this nexthop apparently is reachable. --ANK
1505 	 */
1506 	dst_confirm(&rt->u.dst);
1507 
1508 	/* Host route. If it is static, it would be better
1509 	   not to override it, but add new one, so that
1510 	   when cache entry will expire old pmtu
1511 	   would return automatically.
1512 	 */
1513 	if (rt->rt6i_flags & RTF_CACHE) {
1514 		rt->u.dst.metrics[RTAX_MTU-1] = pmtu;
1515 		if (allfrag)
1516 			rt->u.dst.metrics[RTAX_FEATURES-1] |= RTAX_FEATURE_ALLFRAG;
1517 		dst_set_expires(&rt->u.dst, ip6_rt_mtu_expires);
1518 		rt->rt6i_flags |= RTF_MODIFIED|RTF_EXPIRES;
1519 		goto out;
1520 	}
1521 
1522 	/* Network route.
1523 	   Two cases are possible:
1524 	   1. It is connected route. Action: COW
1525 	   2. It is gatewayed route or NONEXTHOP route. Action: clone it.
1526 	 */
1527 	if (!rt->rt6i_nexthop && !(rt->rt6i_flags & RTF_NONEXTHOP))
1528 		nrt = rt6_alloc_cow(rt, daddr, saddr);
1529 	else
1530 		nrt = rt6_alloc_clone(rt, daddr);
1531 
1532 	if (nrt) {
1533 		nrt->u.dst.metrics[RTAX_MTU-1] = pmtu;
1534 		if (allfrag)
1535 			nrt->u.dst.metrics[RTAX_FEATURES-1] |= RTAX_FEATURE_ALLFRAG;
1536 
1537 		/* According to RFC 1981, detecting PMTU increase shouldn't be
1538 		 * happened within 5 mins, the recommended timer is 10 mins.
1539 		 * Here this route expiration time is set to ip6_rt_mtu_expires
1540 		 * which is 10 mins. After 10 mins the decreased pmtu is expired
1541 		 * and detecting PMTU increase will be automatically happened.
1542 		 */
1543 		dst_set_expires(&nrt->u.dst, ip6_rt_mtu_expires);
1544 		nrt->rt6i_flags |= RTF_DYNAMIC|RTF_EXPIRES;
1545 
1546 		ip6_ins_rt(nrt);
1547 	}
1548 out:
1549 	dst_release(&rt->u.dst);
1550 }
1551 
1552 /*
1553  *	Misc support functions
1554  */
1555 
1556 static struct rt6_info * ip6_rt_copy(struct rt6_info *ort)
1557 {
1558 	struct rt6_info *rt = ip6_dst_alloc();
1559 
1560 	if (rt) {
1561 		rt->u.dst.input = ort->u.dst.input;
1562 		rt->u.dst.output = ort->u.dst.output;
1563 
1564 		memcpy(rt->u.dst.metrics, ort->u.dst.metrics, RTAX_MAX*sizeof(u32));
1565 		rt->u.dst.error = ort->u.dst.error;
1566 		rt->u.dst.dev = ort->u.dst.dev;
1567 		if (rt->u.dst.dev)
1568 			dev_hold(rt->u.dst.dev);
1569 		rt->rt6i_idev = ort->rt6i_idev;
1570 		if (rt->rt6i_idev)
1571 			in6_dev_hold(rt->rt6i_idev);
1572 		rt->u.dst.lastuse = jiffies;
1573 		rt->rt6i_expires = 0;
1574 
1575 		ipv6_addr_copy(&rt->rt6i_gateway, &ort->rt6i_gateway);
1576 		rt->rt6i_flags = ort->rt6i_flags & ~RTF_EXPIRES;
1577 		rt->rt6i_metric = 0;
1578 
1579 		memcpy(&rt->rt6i_dst, &ort->rt6i_dst, sizeof(struct rt6key));
1580 #ifdef CONFIG_IPV6_SUBTREES
1581 		memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
1582 #endif
1583 		rt->rt6i_table = ort->rt6i_table;
1584 	}
1585 	return rt;
1586 }
1587 
1588 #ifdef CONFIG_IPV6_ROUTE_INFO
1589 static struct rt6_info *rt6_get_route_info(struct in6_addr *prefix, int prefixlen,
1590 					   struct in6_addr *gwaddr, int ifindex)
1591 {
1592 	struct fib6_node *fn;
1593 	struct rt6_info *rt = NULL;
1594 	struct fib6_table *table;
1595 
1596 	table = fib6_get_table(RT6_TABLE_INFO);
1597 	if (table == NULL)
1598 		return NULL;
1599 
1600 	write_lock_bh(&table->tb6_lock);
1601 	fn = fib6_locate(&table->tb6_root, prefix ,prefixlen, NULL, 0);
1602 	if (!fn)
1603 		goto out;
1604 
1605 	for (rt = fn->leaf; rt; rt = rt->u.dst.rt6_next) {
1606 		if (rt->rt6i_dev->ifindex != ifindex)
1607 			continue;
1608 		if ((rt->rt6i_flags & (RTF_ROUTEINFO|RTF_GATEWAY)) != (RTF_ROUTEINFO|RTF_GATEWAY))
1609 			continue;
1610 		if (!ipv6_addr_equal(&rt->rt6i_gateway, gwaddr))
1611 			continue;
1612 		dst_hold(&rt->u.dst);
1613 		break;
1614 	}
1615 out:
1616 	write_unlock_bh(&table->tb6_lock);
1617 	return rt;
1618 }
1619 
1620 static struct rt6_info *rt6_add_route_info(struct in6_addr *prefix, int prefixlen,
1621 					   struct in6_addr *gwaddr, int ifindex,
1622 					   unsigned pref)
1623 {
1624 	struct fib6_config cfg = {
1625 		.fc_table	= RT6_TABLE_INFO,
1626 		.fc_metric	= 1024,
1627 		.fc_ifindex	= ifindex,
1628 		.fc_dst_len	= prefixlen,
1629 		.fc_flags	= RTF_GATEWAY | RTF_ADDRCONF | RTF_ROUTEINFO |
1630 				  RTF_UP | RTF_PREF(pref),
1631 	};
1632 
1633 	ipv6_addr_copy(&cfg.fc_dst, prefix);
1634 	ipv6_addr_copy(&cfg.fc_gateway, gwaddr);
1635 
1636 	/* We should treat it as a default route if prefix length is 0. */
1637 	if (!prefixlen)
1638 		cfg.fc_flags |= RTF_DEFAULT;
1639 
1640 	ip6_route_add(&cfg);
1641 
1642 	return rt6_get_route_info(prefix, prefixlen, gwaddr, ifindex);
1643 }
1644 #endif
1645 
1646 struct rt6_info *rt6_get_dflt_router(struct in6_addr *addr, struct net_device *dev)
1647 {
1648 	struct rt6_info *rt;
1649 	struct fib6_table *table;
1650 
1651 	table = fib6_get_table(RT6_TABLE_DFLT);
1652 	if (table == NULL)
1653 		return NULL;
1654 
1655 	write_lock_bh(&table->tb6_lock);
1656 	for (rt = table->tb6_root.leaf; rt; rt=rt->u.dst.rt6_next) {
1657 		if (dev == rt->rt6i_dev &&
1658 		    ((rt->rt6i_flags & (RTF_ADDRCONF | RTF_DEFAULT)) == (RTF_ADDRCONF | RTF_DEFAULT)) &&
1659 		    ipv6_addr_equal(&rt->rt6i_gateway, addr))
1660 			break;
1661 	}
1662 	if (rt)
1663 		dst_hold(&rt->u.dst);
1664 	write_unlock_bh(&table->tb6_lock);
1665 	return rt;
1666 }
1667 
1668 struct rt6_info *rt6_add_dflt_router(struct in6_addr *gwaddr,
1669 				     struct net_device *dev,
1670 				     unsigned int pref)
1671 {
1672 	struct fib6_config cfg = {
1673 		.fc_table	= RT6_TABLE_DFLT,
1674 		.fc_metric	= 1024,
1675 		.fc_ifindex	= dev->ifindex,
1676 		.fc_flags	= RTF_GATEWAY | RTF_ADDRCONF | RTF_DEFAULT |
1677 				  RTF_UP | RTF_EXPIRES | RTF_PREF(pref),
1678 	};
1679 
1680 	ipv6_addr_copy(&cfg.fc_gateway, gwaddr);
1681 
1682 	ip6_route_add(&cfg);
1683 
1684 	return rt6_get_dflt_router(gwaddr, dev);
1685 }
1686 
1687 void rt6_purge_dflt_routers(void)
1688 {
1689 	struct rt6_info *rt;
1690 	struct fib6_table *table;
1691 
1692 	/* NOTE: Keep consistent with rt6_get_dflt_router */
1693 	table = fib6_get_table(RT6_TABLE_DFLT);
1694 	if (table == NULL)
1695 		return;
1696 
1697 restart:
1698 	read_lock_bh(&table->tb6_lock);
1699 	for (rt = table->tb6_root.leaf; rt; rt = rt->u.dst.rt6_next) {
1700 		if (rt->rt6i_flags & (RTF_DEFAULT | RTF_ADDRCONF)) {
1701 			dst_hold(&rt->u.dst);
1702 			read_unlock_bh(&table->tb6_lock);
1703 			ip6_del_rt(rt);
1704 			goto restart;
1705 		}
1706 	}
1707 	read_unlock_bh(&table->tb6_lock);
1708 }
1709 
1710 static void rtmsg_to_fib6_config(struct in6_rtmsg *rtmsg,
1711 				 struct fib6_config *cfg)
1712 {
1713 	memset(cfg, 0, sizeof(*cfg));
1714 
1715 	cfg->fc_table = RT6_TABLE_MAIN;
1716 	cfg->fc_ifindex = rtmsg->rtmsg_ifindex;
1717 	cfg->fc_metric = rtmsg->rtmsg_metric;
1718 	cfg->fc_expires = rtmsg->rtmsg_info;
1719 	cfg->fc_dst_len = rtmsg->rtmsg_dst_len;
1720 	cfg->fc_src_len = rtmsg->rtmsg_src_len;
1721 	cfg->fc_flags = rtmsg->rtmsg_flags;
1722 
1723 	ipv6_addr_copy(&cfg->fc_dst, &rtmsg->rtmsg_dst);
1724 	ipv6_addr_copy(&cfg->fc_src, &rtmsg->rtmsg_src);
1725 	ipv6_addr_copy(&cfg->fc_gateway, &rtmsg->rtmsg_gateway);
1726 }
1727 
1728 int ipv6_route_ioctl(unsigned int cmd, void __user *arg)
1729 {
1730 	struct fib6_config cfg;
1731 	struct in6_rtmsg rtmsg;
1732 	int err;
1733 
1734 	switch(cmd) {
1735 	case SIOCADDRT:		/* Add a route */
1736 	case SIOCDELRT:		/* Delete a route */
1737 		if (!capable(CAP_NET_ADMIN))
1738 			return -EPERM;
1739 		err = copy_from_user(&rtmsg, arg,
1740 				     sizeof(struct in6_rtmsg));
1741 		if (err)
1742 			return -EFAULT;
1743 
1744 		rtmsg_to_fib6_config(&rtmsg, &cfg);
1745 
1746 		rtnl_lock();
1747 		switch (cmd) {
1748 		case SIOCADDRT:
1749 			err = ip6_route_add(&cfg);
1750 			break;
1751 		case SIOCDELRT:
1752 			err = ip6_route_del(&cfg);
1753 			break;
1754 		default:
1755 			err = -EINVAL;
1756 		}
1757 		rtnl_unlock();
1758 
1759 		return err;
1760 	};
1761 
1762 	return -EINVAL;
1763 }
1764 
1765 /*
1766  *	Drop the packet on the floor
1767  */
1768 
1769 static inline int ip6_pkt_drop(struct sk_buff *skb, int code)
1770 {
1771 	int type = ipv6_addr_type(&skb->nh.ipv6h->daddr);
1772 	if (type == IPV6_ADDR_ANY || type == IPV6_ADDR_RESERVED)
1773 		IP6_INC_STATS(ip6_dst_idev(skb->dst), IPSTATS_MIB_INADDRERRORS);
1774 
1775 	IP6_INC_STATS(ip6_dst_idev(skb->dst), IPSTATS_MIB_OUTNOROUTES);
1776 	icmpv6_send(skb, ICMPV6_DEST_UNREACH, code, 0, skb->dev);
1777 	kfree_skb(skb);
1778 	return 0;
1779 }
1780 
1781 static int ip6_pkt_discard(struct sk_buff *skb)
1782 {
1783 	return ip6_pkt_drop(skb, ICMPV6_NOROUTE);
1784 }
1785 
1786 static int ip6_pkt_discard_out(struct sk_buff *skb)
1787 {
1788 	skb->dev = skb->dst->dev;
1789 	return ip6_pkt_discard(skb);
1790 }
1791 
1792 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
1793 
1794 static int ip6_pkt_prohibit(struct sk_buff *skb)
1795 {
1796 	return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED);
1797 }
1798 
1799 static int ip6_pkt_prohibit_out(struct sk_buff *skb)
1800 {
1801 	skb->dev = skb->dst->dev;
1802 	return ip6_pkt_prohibit(skb);
1803 }
1804 
1805 static int ip6_pkt_blk_hole(struct sk_buff *skb)
1806 {
1807 	kfree_skb(skb);
1808 	return 0;
1809 }
1810 
1811 #endif
1812 
1813 /*
1814  *	Allocate a dst for local (unicast / anycast) address.
1815  */
1816 
1817 struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev,
1818 				    const struct in6_addr *addr,
1819 				    int anycast)
1820 {
1821 	struct rt6_info *rt = ip6_dst_alloc();
1822 
1823 	if (rt == NULL)
1824 		return ERR_PTR(-ENOMEM);
1825 
1826 	dev_hold(&loopback_dev);
1827 	in6_dev_hold(idev);
1828 
1829 	rt->u.dst.flags = DST_HOST;
1830 	rt->u.dst.input = ip6_input;
1831 	rt->u.dst.output = ip6_output;
1832 	rt->rt6i_dev = &loopback_dev;
1833 	rt->rt6i_idev = idev;
1834 	rt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(rt->rt6i_dev);
1835 	rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(dst_mtu(&rt->u.dst));
1836 	rt->u.dst.metrics[RTAX_HOPLIMIT-1] = -1;
1837 	rt->u.dst.obsolete = -1;
1838 
1839 	rt->rt6i_flags = RTF_UP | RTF_NONEXTHOP;
1840 	if (anycast)
1841 		rt->rt6i_flags |= RTF_ANYCAST;
1842 	else
1843 		rt->rt6i_flags |= RTF_LOCAL;
1844 	rt->rt6i_nexthop = ndisc_get_neigh(rt->rt6i_dev, &rt->rt6i_gateway);
1845 	if (rt->rt6i_nexthop == NULL) {
1846 		dst_free(&rt->u.dst);
1847 		return ERR_PTR(-ENOMEM);
1848 	}
1849 
1850 	ipv6_addr_copy(&rt->rt6i_dst.addr, addr);
1851 	rt->rt6i_dst.plen = 128;
1852 	rt->rt6i_table = fib6_get_table(RT6_TABLE_LOCAL);
1853 
1854 	atomic_set(&rt->u.dst.__refcnt, 1);
1855 
1856 	return rt;
1857 }
1858 
1859 static int fib6_ifdown(struct rt6_info *rt, void *arg)
1860 {
1861 	if (((void*)rt->rt6i_dev == arg || arg == NULL) &&
1862 	    rt != &ip6_null_entry) {
1863 		RT6_TRACE("deleted by ifdown %p\n", rt);
1864 		return -1;
1865 	}
1866 	return 0;
1867 }
1868 
1869 void rt6_ifdown(struct net_device *dev)
1870 {
1871 	fib6_clean_all(fib6_ifdown, 0, dev);
1872 }
1873 
1874 struct rt6_mtu_change_arg
1875 {
1876 	struct net_device *dev;
1877 	unsigned mtu;
1878 };
1879 
1880 static int rt6_mtu_change_route(struct rt6_info *rt, void *p_arg)
1881 {
1882 	struct rt6_mtu_change_arg *arg = (struct rt6_mtu_change_arg *) p_arg;
1883 	struct inet6_dev *idev;
1884 
1885 	/* In IPv6 pmtu discovery is not optional,
1886 	   so that RTAX_MTU lock cannot disable it.
1887 	   We still use this lock to block changes
1888 	   caused by addrconf/ndisc.
1889 	*/
1890 
1891 	idev = __in6_dev_get(arg->dev);
1892 	if (idev == NULL)
1893 		return 0;
1894 
1895 	/* For administrative MTU increase, there is no way to discover
1896 	   IPv6 PMTU increase, so PMTU increase should be updated here.
1897 	   Since RFC 1981 doesn't include administrative MTU increase
1898 	   update PMTU increase is a MUST. (i.e. jumbo frame)
1899 	 */
1900 	/*
1901 	   If new MTU is less than route PMTU, this new MTU will be the
1902 	   lowest MTU in the path, update the route PMTU to reflect PMTU
1903 	   decreases; if new MTU is greater than route PMTU, and the
1904 	   old MTU is the lowest MTU in the path, update the route PMTU
1905 	   to reflect the increase. In this case if the other nodes' MTU
1906 	   also have the lowest MTU, TOO BIG MESSAGE will be lead to
1907 	   PMTU discouvery.
1908 	 */
1909 	if (rt->rt6i_dev == arg->dev &&
1910 	    !dst_metric_locked(&rt->u.dst, RTAX_MTU) &&
1911 	    (dst_mtu(&rt->u.dst) > arg->mtu ||
1912 	     (dst_mtu(&rt->u.dst) < arg->mtu &&
1913 	      dst_mtu(&rt->u.dst) == idev->cnf.mtu6)))
1914 		rt->u.dst.metrics[RTAX_MTU-1] = arg->mtu;
1915 	rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(arg->mtu);
1916 	return 0;
1917 }
1918 
1919 void rt6_mtu_change(struct net_device *dev, unsigned mtu)
1920 {
1921 	struct rt6_mtu_change_arg arg = {
1922 		.dev = dev,
1923 		.mtu = mtu,
1924 	};
1925 
1926 	fib6_clean_all(rt6_mtu_change_route, 0, &arg);
1927 }
1928 
1929 static struct nla_policy rtm_ipv6_policy[RTA_MAX+1] __read_mostly = {
1930 	[RTA_GATEWAY]           = { .len = sizeof(struct in6_addr) },
1931 	[RTA_OIF]               = { .type = NLA_U32 },
1932 	[RTA_IIF]		= { .type = NLA_U32 },
1933 	[RTA_PRIORITY]          = { .type = NLA_U32 },
1934 	[RTA_METRICS]           = { .type = NLA_NESTED },
1935 };
1936 
1937 static int rtm_to_fib6_config(struct sk_buff *skb, struct nlmsghdr *nlh,
1938 			      struct fib6_config *cfg)
1939 {
1940 	struct rtmsg *rtm;
1941 	struct nlattr *tb[RTA_MAX+1];
1942 	int err;
1943 
1944 	err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy);
1945 	if (err < 0)
1946 		goto errout;
1947 
1948 	err = -EINVAL;
1949 	rtm = nlmsg_data(nlh);
1950 	memset(cfg, 0, sizeof(*cfg));
1951 
1952 	cfg->fc_table = rtm->rtm_table;
1953 	cfg->fc_dst_len = rtm->rtm_dst_len;
1954 	cfg->fc_src_len = rtm->rtm_src_len;
1955 	cfg->fc_flags = RTF_UP;
1956 	cfg->fc_protocol = rtm->rtm_protocol;
1957 
1958 	if (rtm->rtm_type == RTN_UNREACHABLE)
1959 		cfg->fc_flags |= RTF_REJECT;
1960 
1961 	cfg->fc_nlinfo.pid = NETLINK_CB(skb).pid;
1962 	cfg->fc_nlinfo.nlh = nlh;
1963 
1964 	if (tb[RTA_GATEWAY]) {
1965 		nla_memcpy(&cfg->fc_gateway, tb[RTA_GATEWAY], 16);
1966 		cfg->fc_flags |= RTF_GATEWAY;
1967 	}
1968 
1969 	if (tb[RTA_DST]) {
1970 		int plen = (rtm->rtm_dst_len + 7) >> 3;
1971 
1972 		if (nla_len(tb[RTA_DST]) < plen)
1973 			goto errout;
1974 
1975 		nla_memcpy(&cfg->fc_dst, tb[RTA_DST], plen);
1976 	}
1977 
1978 	if (tb[RTA_SRC]) {
1979 		int plen = (rtm->rtm_src_len + 7) >> 3;
1980 
1981 		if (nla_len(tb[RTA_SRC]) < plen)
1982 			goto errout;
1983 
1984 		nla_memcpy(&cfg->fc_src, tb[RTA_SRC], plen);
1985 	}
1986 
1987 	if (tb[RTA_OIF])
1988 		cfg->fc_ifindex = nla_get_u32(tb[RTA_OIF]);
1989 
1990 	if (tb[RTA_PRIORITY])
1991 		cfg->fc_metric = nla_get_u32(tb[RTA_PRIORITY]);
1992 
1993 	if (tb[RTA_METRICS]) {
1994 		cfg->fc_mx = nla_data(tb[RTA_METRICS]);
1995 		cfg->fc_mx_len = nla_len(tb[RTA_METRICS]);
1996 	}
1997 
1998 	if (tb[RTA_TABLE])
1999 		cfg->fc_table = nla_get_u32(tb[RTA_TABLE]);
2000 
2001 	err = 0;
2002 errout:
2003 	return err;
2004 }
2005 
2006 int inet6_rtm_delroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
2007 {
2008 	struct fib6_config cfg;
2009 	int err;
2010 
2011 	err = rtm_to_fib6_config(skb, nlh, &cfg);
2012 	if (err < 0)
2013 		return err;
2014 
2015 	return ip6_route_del(&cfg);
2016 }
2017 
2018 int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
2019 {
2020 	struct fib6_config cfg;
2021 	int err;
2022 
2023 	err = rtm_to_fib6_config(skb, nlh, &cfg);
2024 	if (err < 0)
2025 		return err;
2026 
2027 	return ip6_route_add(&cfg);
2028 }
2029 
2030 static inline size_t rt6_nlmsg_size(void)
2031 {
2032 	return NLMSG_ALIGN(sizeof(struct rtmsg))
2033 	       + nla_total_size(16) /* RTA_SRC */
2034 	       + nla_total_size(16) /* RTA_DST */
2035 	       + nla_total_size(16) /* RTA_GATEWAY */
2036 	       + nla_total_size(16) /* RTA_PREFSRC */
2037 	       + nla_total_size(4) /* RTA_TABLE */
2038 	       + nla_total_size(4) /* RTA_IIF */
2039 	       + nla_total_size(4) /* RTA_OIF */
2040 	       + nla_total_size(4) /* RTA_PRIORITY */
2041 	       + RTAX_MAX * nla_total_size(4) /* RTA_METRICS */
2042 	       + nla_total_size(sizeof(struct rta_cacheinfo));
2043 }
2044 
2045 static int rt6_fill_node(struct sk_buff *skb, struct rt6_info *rt,
2046 			 struct in6_addr *dst, struct in6_addr *src,
2047 			 int iif, int type, u32 pid, u32 seq,
2048 			 int prefix, unsigned int flags)
2049 {
2050 	struct rtmsg *rtm;
2051 	struct nlmsghdr *nlh;
2052 	long expires;
2053 	u32 table;
2054 
2055 	if (prefix) {	/* user wants prefix routes only */
2056 		if (!(rt->rt6i_flags & RTF_PREFIX_RT)) {
2057 			/* success since this is not a prefix route */
2058 			return 1;
2059 		}
2060 	}
2061 
2062 	nlh = nlmsg_put(skb, pid, seq, type, sizeof(*rtm), flags);
2063 	if (nlh == NULL)
2064 		return -EMSGSIZE;
2065 
2066 	rtm = nlmsg_data(nlh);
2067 	rtm->rtm_family = AF_INET6;
2068 	rtm->rtm_dst_len = rt->rt6i_dst.plen;
2069 	rtm->rtm_src_len = rt->rt6i_src.plen;
2070 	rtm->rtm_tos = 0;
2071 	if (rt->rt6i_table)
2072 		table = rt->rt6i_table->tb6_id;
2073 	else
2074 		table = RT6_TABLE_UNSPEC;
2075 	rtm->rtm_table = table;
2076 	NLA_PUT_U32(skb, RTA_TABLE, table);
2077 	if (rt->rt6i_flags&RTF_REJECT)
2078 		rtm->rtm_type = RTN_UNREACHABLE;
2079 	else if (rt->rt6i_dev && (rt->rt6i_dev->flags&IFF_LOOPBACK))
2080 		rtm->rtm_type = RTN_LOCAL;
2081 	else
2082 		rtm->rtm_type = RTN_UNICAST;
2083 	rtm->rtm_flags = 0;
2084 	rtm->rtm_scope = RT_SCOPE_UNIVERSE;
2085 	rtm->rtm_protocol = rt->rt6i_protocol;
2086 	if (rt->rt6i_flags&RTF_DYNAMIC)
2087 		rtm->rtm_protocol = RTPROT_REDIRECT;
2088 	else if (rt->rt6i_flags & RTF_ADDRCONF)
2089 		rtm->rtm_protocol = RTPROT_KERNEL;
2090 	else if (rt->rt6i_flags&RTF_DEFAULT)
2091 		rtm->rtm_protocol = RTPROT_RA;
2092 
2093 	if (rt->rt6i_flags&RTF_CACHE)
2094 		rtm->rtm_flags |= RTM_F_CLONED;
2095 
2096 	if (dst) {
2097 		NLA_PUT(skb, RTA_DST, 16, dst);
2098 		rtm->rtm_dst_len = 128;
2099 	} else if (rtm->rtm_dst_len)
2100 		NLA_PUT(skb, RTA_DST, 16, &rt->rt6i_dst.addr);
2101 #ifdef CONFIG_IPV6_SUBTREES
2102 	if (src) {
2103 		NLA_PUT(skb, RTA_SRC, 16, src);
2104 		rtm->rtm_src_len = 128;
2105 	} else if (rtm->rtm_src_len)
2106 		NLA_PUT(skb, RTA_SRC, 16, &rt->rt6i_src.addr);
2107 #endif
2108 	if (iif)
2109 		NLA_PUT_U32(skb, RTA_IIF, iif);
2110 	else if (dst) {
2111 		struct in6_addr saddr_buf;
2112 		if (ipv6_get_saddr(&rt->u.dst, dst, &saddr_buf) == 0)
2113 			NLA_PUT(skb, RTA_PREFSRC, 16, &saddr_buf);
2114 	}
2115 
2116 	if (rtnetlink_put_metrics(skb, rt->u.dst.metrics) < 0)
2117 		goto nla_put_failure;
2118 
2119 	if (rt->u.dst.neighbour)
2120 		NLA_PUT(skb, RTA_GATEWAY, 16, &rt->u.dst.neighbour->primary_key);
2121 
2122 	if (rt->u.dst.dev)
2123 		NLA_PUT_U32(skb, RTA_OIF, rt->rt6i_dev->ifindex);
2124 
2125 	NLA_PUT_U32(skb, RTA_PRIORITY, rt->rt6i_metric);
2126 
2127 	expires = rt->rt6i_expires ? rt->rt6i_expires - jiffies : 0;
2128 	if (rtnl_put_cacheinfo(skb, &rt->u.dst, 0, 0, 0,
2129 			       expires, rt->u.dst.error) < 0)
2130 		goto nla_put_failure;
2131 
2132 	return nlmsg_end(skb, nlh);
2133 
2134 nla_put_failure:
2135 	nlmsg_cancel(skb, nlh);
2136 	return -EMSGSIZE;
2137 }
2138 
2139 int rt6_dump_route(struct rt6_info *rt, void *p_arg)
2140 {
2141 	struct rt6_rtnl_dump_arg *arg = (struct rt6_rtnl_dump_arg *) p_arg;
2142 	int prefix;
2143 
2144 	if (nlmsg_len(arg->cb->nlh) >= sizeof(struct rtmsg)) {
2145 		struct rtmsg *rtm = nlmsg_data(arg->cb->nlh);
2146 		prefix = (rtm->rtm_flags & RTM_F_PREFIX) != 0;
2147 	} else
2148 		prefix = 0;
2149 
2150 	return rt6_fill_node(arg->skb, rt, NULL, NULL, 0, RTM_NEWROUTE,
2151 		     NETLINK_CB(arg->cb->skb).pid, arg->cb->nlh->nlmsg_seq,
2152 		     prefix, NLM_F_MULTI);
2153 }
2154 
2155 int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void *arg)
2156 {
2157 	struct nlattr *tb[RTA_MAX+1];
2158 	struct rt6_info *rt;
2159 	struct sk_buff *skb;
2160 	struct rtmsg *rtm;
2161 	struct flowi fl;
2162 	int err, iif = 0;
2163 
2164 	err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy);
2165 	if (err < 0)
2166 		goto errout;
2167 
2168 	err = -EINVAL;
2169 	memset(&fl, 0, sizeof(fl));
2170 
2171 	if (tb[RTA_SRC]) {
2172 		if (nla_len(tb[RTA_SRC]) < sizeof(struct in6_addr))
2173 			goto errout;
2174 
2175 		ipv6_addr_copy(&fl.fl6_src, nla_data(tb[RTA_SRC]));
2176 	}
2177 
2178 	if (tb[RTA_DST]) {
2179 		if (nla_len(tb[RTA_DST]) < sizeof(struct in6_addr))
2180 			goto errout;
2181 
2182 		ipv6_addr_copy(&fl.fl6_dst, nla_data(tb[RTA_DST]));
2183 	}
2184 
2185 	if (tb[RTA_IIF])
2186 		iif = nla_get_u32(tb[RTA_IIF]);
2187 
2188 	if (tb[RTA_OIF])
2189 		fl.oif = nla_get_u32(tb[RTA_OIF]);
2190 
2191 	if (iif) {
2192 		struct net_device *dev;
2193 		dev = __dev_get_by_index(iif);
2194 		if (!dev) {
2195 			err = -ENODEV;
2196 			goto errout;
2197 		}
2198 	}
2199 
2200 	skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
2201 	if (skb == NULL) {
2202 		err = -ENOBUFS;
2203 		goto errout;
2204 	}
2205 
2206 	/* Reserve room for dummy headers, this skb can pass
2207 	   through good chunk of routing engine.
2208 	 */
2209 	skb->mac.raw = skb->data;
2210 	skb_reserve(skb, MAX_HEADER + sizeof(struct ipv6hdr));
2211 
2212 	rt = (struct rt6_info*) ip6_route_output(NULL, &fl);
2213 	skb->dst = &rt->u.dst;
2214 
2215 	err = rt6_fill_node(skb, rt, &fl.fl6_dst, &fl.fl6_src, iif,
2216 			    RTM_NEWROUTE, NETLINK_CB(in_skb).pid,
2217 			    nlh->nlmsg_seq, 0, 0);
2218 	if (err < 0) {
2219 		kfree_skb(skb);
2220 		goto errout;
2221 	}
2222 
2223 	err = rtnl_unicast(skb, NETLINK_CB(in_skb).pid);
2224 errout:
2225 	return err;
2226 }
2227 
2228 void inet6_rt_notify(int event, struct rt6_info *rt, struct nl_info *info)
2229 {
2230 	struct sk_buff *skb;
2231 	u32 pid = 0, seq = 0;
2232 	struct nlmsghdr *nlh = NULL;
2233 	int err = -ENOBUFS;
2234 
2235 	if (info) {
2236 		pid = info->pid;
2237 		nlh = info->nlh;
2238 		if (nlh)
2239 			seq = nlh->nlmsg_seq;
2240 	}
2241 
2242 	skb = nlmsg_new(rt6_nlmsg_size(), gfp_any());
2243 	if (skb == NULL)
2244 		goto errout;
2245 
2246 	err = rt6_fill_node(skb, rt, NULL, NULL, 0, event, pid, seq, 0, 0);
2247 	if (err < 0) {
2248 		/* -EMSGSIZE implies BUG in rt6_nlmsg_size() */
2249 		WARN_ON(err == -EMSGSIZE);
2250 		kfree_skb(skb);
2251 		goto errout;
2252 	}
2253 	err = rtnl_notify(skb, pid, RTNLGRP_IPV6_ROUTE, nlh, gfp_any());
2254 errout:
2255 	if (err < 0)
2256 		rtnl_set_sk_err(RTNLGRP_IPV6_ROUTE, err);
2257 }
2258 
2259 /*
2260  *	/proc
2261  */
2262 
2263 #ifdef CONFIG_PROC_FS
2264 
2265 #define RT6_INFO_LEN (32 + 4 + 32 + 4 + 32 + 40 + 5 + 1)
2266 
2267 struct rt6_proc_arg
2268 {
2269 	char *buffer;
2270 	int offset;
2271 	int length;
2272 	int skip;
2273 	int len;
2274 };
2275 
2276 static int rt6_info_route(struct rt6_info *rt, void *p_arg)
2277 {
2278 	struct rt6_proc_arg *arg = (struct rt6_proc_arg *) p_arg;
2279 
2280 	if (arg->skip < arg->offset / RT6_INFO_LEN) {
2281 		arg->skip++;
2282 		return 0;
2283 	}
2284 
2285 	if (arg->len >= arg->length)
2286 		return 0;
2287 
2288 	arg->len += sprintf(arg->buffer + arg->len,
2289 			    NIP6_SEQFMT " %02x ",
2290 			    NIP6(rt->rt6i_dst.addr),
2291 			    rt->rt6i_dst.plen);
2292 
2293 #ifdef CONFIG_IPV6_SUBTREES
2294 	arg->len += sprintf(arg->buffer + arg->len,
2295 			    NIP6_SEQFMT " %02x ",
2296 			    NIP6(rt->rt6i_src.addr),
2297 			    rt->rt6i_src.plen);
2298 #else
2299 	arg->len += sprintf(arg->buffer + arg->len,
2300 			    "00000000000000000000000000000000 00 ");
2301 #endif
2302 
2303 	if (rt->rt6i_nexthop) {
2304 		arg->len += sprintf(arg->buffer + arg->len,
2305 				    NIP6_SEQFMT,
2306 				    NIP6(*((struct in6_addr *)rt->rt6i_nexthop->primary_key)));
2307 	} else {
2308 		arg->len += sprintf(arg->buffer + arg->len,
2309 				    "00000000000000000000000000000000");
2310 	}
2311 	arg->len += sprintf(arg->buffer + arg->len,
2312 			    " %08x %08x %08x %08x %8s\n",
2313 			    rt->rt6i_metric, atomic_read(&rt->u.dst.__refcnt),
2314 			    rt->u.dst.__use, rt->rt6i_flags,
2315 			    rt->rt6i_dev ? rt->rt6i_dev->name : "");
2316 	return 0;
2317 }
2318 
2319 static int rt6_proc_info(char *buffer, char **start, off_t offset, int length)
2320 {
2321 	struct rt6_proc_arg arg = {
2322 		.buffer = buffer,
2323 		.offset = offset,
2324 		.length = length,
2325 	};
2326 
2327 	fib6_clean_all(rt6_info_route, 0, &arg);
2328 
2329 	*start = buffer;
2330 	if (offset)
2331 		*start += offset % RT6_INFO_LEN;
2332 
2333 	arg.len -= offset % RT6_INFO_LEN;
2334 
2335 	if (arg.len > length)
2336 		arg.len = length;
2337 	if (arg.len < 0)
2338 		arg.len = 0;
2339 
2340 	return arg.len;
2341 }
2342 
2343 static int rt6_stats_seq_show(struct seq_file *seq, void *v)
2344 {
2345 	seq_printf(seq, "%04x %04x %04x %04x %04x %04x %04x\n",
2346 		      rt6_stats.fib_nodes, rt6_stats.fib_route_nodes,
2347 		      rt6_stats.fib_rt_alloc, rt6_stats.fib_rt_entries,
2348 		      rt6_stats.fib_rt_cache,
2349 		      atomic_read(&ip6_dst_ops.entries),
2350 		      rt6_stats.fib_discarded_routes);
2351 
2352 	return 0;
2353 }
2354 
2355 static int rt6_stats_seq_open(struct inode *inode, struct file *file)
2356 {
2357 	return single_open(file, rt6_stats_seq_show, NULL);
2358 }
2359 
2360 static const struct file_operations rt6_stats_seq_fops = {
2361 	.owner	 = THIS_MODULE,
2362 	.open	 = rt6_stats_seq_open,
2363 	.read	 = seq_read,
2364 	.llseek	 = seq_lseek,
2365 	.release = single_release,
2366 };
2367 #endif	/* CONFIG_PROC_FS */
2368 
2369 #ifdef CONFIG_SYSCTL
2370 
2371 static int flush_delay;
2372 
2373 static
2374 int ipv6_sysctl_rtcache_flush(ctl_table *ctl, int write, struct file * filp,
2375 			      void __user *buffer, size_t *lenp, loff_t *ppos)
2376 {
2377 	if (write) {
2378 		proc_dointvec(ctl, write, filp, buffer, lenp, ppos);
2379 		fib6_run_gc(flush_delay <= 0 ? ~0UL : (unsigned long)flush_delay);
2380 		return 0;
2381 	} else
2382 		return -EINVAL;
2383 }
2384 
2385 ctl_table ipv6_route_table[] = {
2386 	{
2387 		.ctl_name	=	NET_IPV6_ROUTE_FLUSH,
2388 		.procname	=	"flush",
2389 		.data		=	&flush_delay,
2390 		.maxlen		=	sizeof(int),
2391 		.mode		=	0200,
2392 		.proc_handler	=	&ipv6_sysctl_rtcache_flush
2393 	},
2394 	{
2395 		.ctl_name	=	NET_IPV6_ROUTE_GC_THRESH,
2396 		.procname	=	"gc_thresh",
2397 		.data		=	&ip6_dst_ops.gc_thresh,
2398 		.maxlen		=	sizeof(int),
2399 		.mode		=	0644,
2400 		.proc_handler	=	&proc_dointvec,
2401 	},
2402 	{
2403 		.ctl_name	=	NET_IPV6_ROUTE_MAX_SIZE,
2404 		.procname	=	"max_size",
2405 		.data		=	&ip6_rt_max_size,
2406 		.maxlen		=	sizeof(int),
2407 		.mode		=	0644,
2408 		.proc_handler	=	&proc_dointvec,
2409 	},
2410 	{
2411 		.ctl_name	=	NET_IPV6_ROUTE_GC_MIN_INTERVAL,
2412 		.procname	=	"gc_min_interval",
2413 		.data		=	&ip6_rt_gc_min_interval,
2414 		.maxlen		=	sizeof(int),
2415 		.mode		=	0644,
2416 		.proc_handler	=	&proc_dointvec_jiffies,
2417 		.strategy	=	&sysctl_jiffies,
2418 	},
2419 	{
2420 		.ctl_name	=	NET_IPV6_ROUTE_GC_TIMEOUT,
2421 		.procname	=	"gc_timeout",
2422 		.data		=	&ip6_rt_gc_timeout,
2423 		.maxlen		=	sizeof(int),
2424 		.mode		=	0644,
2425 		.proc_handler	=	&proc_dointvec_jiffies,
2426 		.strategy	=	&sysctl_jiffies,
2427 	},
2428 	{
2429 		.ctl_name	=	NET_IPV6_ROUTE_GC_INTERVAL,
2430 		.procname	=	"gc_interval",
2431 		.data		=	&ip6_rt_gc_interval,
2432 		.maxlen		=	sizeof(int),
2433 		.mode		=	0644,
2434 		.proc_handler	=	&proc_dointvec_jiffies,
2435 		.strategy	=	&sysctl_jiffies,
2436 	},
2437 	{
2438 		.ctl_name	=	NET_IPV6_ROUTE_GC_ELASTICITY,
2439 		.procname	=	"gc_elasticity",
2440 		.data		=	&ip6_rt_gc_elasticity,
2441 		.maxlen		=	sizeof(int),
2442 		.mode		=	0644,
2443 		.proc_handler	=	&proc_dointvec_jiffies,
2444 		.strategy	=	&sysctl_jiffies,
2445 	},
2446 	{
2447 		.ctl_name	=	NET_IPV6_ROUTE_MTU_EXPIRES,
2448 		.procname	=	"mtu_expires",
2449 		.data		=	&ip6_rt_mtu_expires,
2450 		.maxlen		=	sizeof(int),
2451 		.mode		=	0644,
2452 		.proc_handler	=	&proc_dointvec_jiffies,
2453 		.strategy	=	&sysctl_jiffies,
2454 	},
2455 	{
2456 		.ctl_name	=	NET_IPV6_ROUTE_MIN_ADVMSS,
2457 		.procname	=	"min_adv_mss",
2458 		.data		=	&ip6_rt_min_advmss,
2459 		.maxlen		=	sizeof(int),
2460 		.mode		=	0644,
2461 		.proc_handler	=	&proc_dointvec_jiffies,
2462 		.strategy	=	&sysctl_jiffies,
2463 	},
2464 	{
2465 		.ctl_name	=	NET_IPV6_ROUTE_GC_MIN_INTERVAL_MS,
2466 		.procname	=	"gc_min_interval_ms",
2467 		.data		=	&ip6_rt_gc_min_interval,
2468 		.maxlen		=	sizeof(int),
2469 		.mode		=	0644,
2470 		.proc_handler	=	&proc_dointvec_ms_jiffies,
2471 		.strategy	=	&sysctl_ms_jiffies,
2472 	},
2473 	{ .ctl_name = 0 }
2474 };
2475 
2476 #endif
2477 
2478 void __init ip6_route_init(void)
2479 {
2480 	struct proc_dir_entry *p;
2481 
2482 	ip6_dst_ops.kmem_cachep =
2483 		kmem_cache_create("ip6_dst_cache", sizeof(struct rt6_info), 0,
2484 				  SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL, NULL);
2485 	fib6_init();
2486 #ifdef 	CONFIG_PROC_FS
2487 	p = proc_net_create("ipv6_route", 0, rt6_proc_info);
2488 	if (p)
2489 		p->owner = THIS_MODULE;
2490 
2491 	proc_net_fops_create("rt6_stats", S_IRUGO, &rt6_stats_seq_fops);
2492 #endif
2493 #ifdef CONFIG_XFRM
2494 	xfrm6_init();
2495 #endif
2496 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
2497 	fib6_rules_init();
2498 #endif
2499 }
2500 
2501 void ip6_route_cleanup(void)
2502 {
2503 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
2504 	fib6_rules_cleanup();
2505 #endif
2506 #ifdef CONFIG_PROC_FS
2507 	proc_net_remove("ipv6_route");
2508 	proc_net_remove("rt6_stats");
2509 #endif
2510 #ifdef CONFIG_XFRM
2511 	xfrm6_fini();
2512 #endif
2513 	rt6_ifdown(NULL);
2514 	fib6_gc_cleanup();
2515 	kmem_cache_destroy(ip6_dst_ops.kmem_cachep);
2516 }
2517