xref: /linux/net/ipv6/route.c (revision 7f3edee81fbd49114c28057512906f169caa0bed)
1 /*
2  *	Linux INET6 implementation
3  *	FIB front-end.
4  *
5  *	Authors:
6  *	Pedro Roque		<roque@di.fc.ul.pt>
7  *
8  *	$Id: route.c,v 1.56 2001/10/31 21:55:55 davem Exp $
9  *
10  *	This program is free software; you can redistribute it and/or
11  *      modify it under the terms of the GNU General Public License
12  *      as published by the Free Software Foundation; either version
13  *      2 of the License, or (at your option) any later version.
14  */
15 
16 /*	Changes:
17  *
18  *	YOSHIFUJI Hideaki @USAGI
19  *		reworked default router selection.
20  *		- respect outgoing interface
21  *		- select from (probably) reachable routers (i.e.
22  *		routers in REACHABLE, STALE, DELAY or PROBE states).
23  *		- always select the same router if it is (probably)
24  *		reachable.  otherwise, round-robin the list.
25  *	Ville Nuorvala
26  *		Fixed routing subtrees.
27  */
28 
29 #include <linux/capability.h>
30 #include <linux/errno.h>
31 #include <linux/types.h>
32 #include <linux/times.h>
33 #include <linux/socket.h>
34 #include <linux/sockios.h>
35 #include <linux/net.h>
36 #include <linux/route.h>
37 #include <linux/netdevice.h>
38 #include <linux/in6.h>
39 #include <linux/init.h>
40 #include <linux/if_arp.h>
41 #include <linux/proc_fs.h>
42 #include <linux/seq_file.h>
43 #include <net/net_namespace.h>
44 #include <net/snmp.h>
45 #include <net/ipv6.h>
46 #include <net/ip6_fib.h>
47 #include <net/ip6_route.h>
48 #include <net/ndisc.h>
49 #include <net/addrconf.h>
50 #include <net/tcp.h>
51 #include <linux/rtnetlink.h>
52 #include <net/dst.h>
53 #include <net/xfrm.h>
54 #include <net/netevent.h>
55 #include <net/netlink.h>
56 
57 #include <asm/uaccess.h>
58 
59 #ifdef CONFIG_SYSCTL
60 #include <linux/sysctl.h>
61 #endif
62 
63 /* Set to 3 to get tracing. */
64 #define RT6_DEBUG 2
65 
66 #if RT6_DEBUG >= 3
67 #define RDBG(x) printk x
68 #define RT6_TRACE(x...) printk(KERN_DEBUG x)
69 #else
70 #define RDBG(x)
71 #define RT6_TRACE(x...) do { ; } while (0)
72 #endif
73 
74 #define CLONE_OFFLINK_ROUTE 0
75 
76 static int ip6_rt_max_size = 4096;
77 static int ip6_rt_gc_min_interval = HZ / 2;
78 static int ip6_rt_gc_timeout = 60*HZ;
79 int ip6_rt_gc_interval = 30*HZ;
80 static int ip6_rt_gc_elasticity = 9;
81 static int ip6_rt_mtu_expires = 10*60*HZ;
82 static int ip6_rt_min_advmss = IPV6_MIN_MTU - 20 - 40;
83 
84 static struct rt6_info * ip6_rt_copy(struct rt6_info *ort);
85 static struct dst_entry	*ip6_dst_check(struct dst_entry *dst, u32 cookie);
86 static struct dst_entry *ip6_negative_advice(struct dst_entry *);
87 static void		ip6_dst_destroy(struct dst_entry *);
88 static void		ip6_dst_ifdown(struct dst_entry *,
89 				       struct net_device *dev, int how);
90 static int		 ip6_dst_gc(void);
91 
92 static int		ip6_pkt_discard(struct sk_buff *skb);
93 static int		ip6_pkt_discard_out(struct sk_buff *skb);
94 static void		ip6_link_failure(struct sk_buff *skb);
95 static void		ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu);
96 
97 #ifdef CONFIG_IPV6_ROUTE_INFO
98 static struct rt6_info *rt6_add_route_info(struct in6_addr *prefix, int prefixlen,
99 					   struct in6_addr *gwaddr, int ifindex,
100 					   unsigned pref);
101 static struct rt6_info *rt6_get_route_info(struct in6_addr *prefix, int prefixlen,
102 					   struct in6_addr *gwaddr, int ifindex);
103 #endif
104 
105 static struct dst_ops ip6_dst_ops = {
106 	.family			=	AF_INET6,
107 	.protocol		=	__constant_htons(ETH_P_IPV6),
108 	.gc			=	ip6_dst_gc,
109 	.gc_thresh		=	1024,
110 	.check			=	ip6_dst_check,
111 	.destroy		=	ip6_dst_destroy,
112 	.ifdown			=	ip6_dst_ifdown,
113 	.negative_advice	=	ip6_negative_advice,
114 	.link_failure		=	ip6_link_failure,
115 	.update_pmtu		=	ip6_rt_update_pmtu,
116 	.entry_size		=	sizeof(struct rt6_info),
117 };
118 
119 static void ip6_rt_blackhole_update_pmtu(struct dst_entry *dst, u32 mtu)
120 {
121 }
122 
123 static struct dst_ops ip6_dst_blackhole_ops = {
124 	.family			=	AF_INET6,
125 	.protocol		=	__constant_htons(ETH_P_IPV6),
126 	.destroy		=	ip6_dst_destroy,
127 	.check			=	ip6_dst_check,
128 	.update_pmtu		=	ip6_rt_blackhole_update_pmtu,
129 	.entry_size		=	sizeof(struct rt6_info),
130 };
131 
132 struct rt6_info ip6_null_entry = {
133 	.u = {
134 		.dst = {
135 			.__refcnt	= ATOMIC_INIT(1),
136 			.__use		= 1,
137 			.obsolete	= -1,
138 			.error		= -ENETUNREACH,
139 			.metrics	= { [RTAX_HOPLIMIT - 1] = 255, },
140 			.input		= ip6_pkt_discard,
141 			.output		= ip6_pkt_discard_out,
142 			.ops		= &ip6_dst_ops,
143 			.path		= (struct dst_entry*)&ip6_null_entry,
144 		}
145 	},
146 	.rt6i_flags	= (RTF_REJECT | RTF_NONEXTHOP),
147 	.rt6i_metric	= ~(u32) 0,
148 	.rt6i_ref	= ATOMIC_INIT(1),
149 };
150 
151 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
152 
153 static int ip6_pkt_prohibit(struct sk_buff *skb);
154 static int ip6_pkt_prohibit_out(struct sk_buff *skb);
155 static int ip6_pkt_blk_hole(struct sk_buff *skb);
156 
157 struct rt6_info ip6_prohibit_entry = {
158 	.u = {
159 		.dst = {
160 			.__refcnt	= ATOMIC_INIT(1),
161 			.__use		= 1,
162 			.obsolete	= -1,
163 			.error		= -EACCES,
164 			.metrics	= { [RTAX_HOPLIMIT - 1] = 255, },
165 			.input		= ip6_pkt_prohibit,
166 			.output		= ip6_pkt_prohibit_out,
167 			.ops		= &ip6_dst_ops,
168 			.path		= (struct dst_entry*)&ip6_prohibit_entry,
169 		}
170 	},
171 	.rt6i_flags	= (RTF_REJECT | RTF_NONEXTHOP),
172 	.rt6i_metric	= ~(u32) 0,
173 	.rt6i_ref	= ATOMIC_INIT(1),
174 };
175 
176 struct rt6_info ip6_blk_hole_entry = {
177 	.u = {
178 		.dst = {
179 			.__refcnt	= ATOMIC_INIT(1),
180 			.__use		= 1,
181 			.obsolete	= -1,
182 			.error		= -EINVAL,
183 			.metrics	= { [RTAX_HOPLIMIT - 1] = 255, },
184 			.input		= ip6_pkt_blk_hole,
185 			.output		= ip6_pkt_blk_hole,
186 			.ops		= &ip6_dst_ops,
187 			.path		= (struct dst_entry*)&ip6_blk_hole_entry,
188 		}
189 	},
190 	.rt6i_flags	= (RTF_REJECT | RTF_NONEXTHOP),
191 	.rt6i_metric	= ~(u32) 0,
192 	.rt6i_ref	= ATOMIC_INIT(1),
193 };
194 
195 #endif
196 
197 /* allocate dst with ip6_dst_ops */
198 static __inline__ struct rt6_info *ip6_dst_alloc(void)
199 {
200 	return (struct rt6_info *)dst_alloc(&ip6_dst_ops);
201 }
202 
203 static void ip6_dst_destroy(struct dst_entry *dst)
204 {
205 	struct rt6_info *rt = (struct rt6_info *)dst;
206 	struct inet6_dev *idev = rt->rt6i_idev;
207 
208 	if (idev != NULL) {
209 		rt->rt6i_idev = NULL;
210 		in6_dev_put(idev);
211 	}
212 }
213 
214 static void ip6_dst_ifdown(struct dst_entry *dst, struct net_device *dev,
215 			   int how)
216 {
217 	struct rt6_info *rt = (struct rt6_info *)dst;
218 	struct inet6_dev *idev = rt->rt6i_idev;
219 
220 	if (dev != init_net.loopback_dev && idev != NULL && idev->dev == dev) {
221 		struct inet6_dev *loopback_idev = in6_dev_get(init_net.loopback_dev);
222 		if (loopback_idev != NULL) {
223 			rt->rt6i_idev = loopback_idev;
224 			in6_dev_put(idev);
225 		}
226 	}
227 }
228 
229 static __inline__ int rt6_check_expired(const struct rt6_info *rt)
230 {
231 	return (rt->rt6i_flags & RTF_EXPIRES &&
232 		time_after(jiffies, rt->rt6i_expires));
233 }
234 
235 static inline int rt6_need_strict(struct in6_addr *daddr)
236 {
237 	return (ipv6_addr_type(daddr) &
238 		(IPV6_ADDR_MULTICAST | IPV6_ADDR_LINKLOCAL));
239 }
240 
241 /*
242  *	Route lookup. Any table->tb6_lock is implied.
243  */
244 
245 static __inline__ struct rt6_info *rt6_device_match(struct rt6_info *rt,
246 						    int oif,
247 						    int strict)
248 {
249 	struct rt6_info *local = NULL;
250 	struct rt6_info *sprt;
251 
252 	if (oif) {
253 		for (sprt = rt; sprt; sprt = sprt->u.dst.rt6_next) {
254 			struct net_device *dev = sprt->rt6i_dev;
255 			if (dev->ifindex == oif)
256 				return sprt;
257 			if (dev->flags & IFF_LOOPBACK) {
258 				if (sprt->rt6i_idev == NULL ||
259 				    sprt->rt6i_idev->dev->ifindex != oif) {
260 					if (strict && oif)
261 						continue;
262 					if (local && (!oif ||
263 						      local->rt6i_idev->dev->ifindex == oif))
264 						continue;
265 				}
266 				local = sprt;
267 			}
268 		}
269 
270 		if (local)
271 			return local;
272 
273 		if (strict)
274 			return &ip6_null_entry;
275 	}
276 	return rt;
277 }
278 
279 #ifdef CONFIG_IPV6_ROUTER_PREF
280 static void rt6_probe(struct rt6_info *rt)
281 {
282 	struct neighbour *neigh = rt ? rt->rt6i_nexthop : NULL;
283 	/*
284 	 * Okay, this does not seem to be appropriate
285 	 * for now, however, we need to check if it
286 	 * is really so; aka Router Reachability Probing.
287 	 *
288 	 * Router Reachability Probe MUST be rate-limited
289 	 * to no more than one per minute.
290 	 */
291 	if (!neigh || (neigh->nud_state & NUD_VALID))
292 		return;
293 	read_lock_bh(&neigh->lock);
294 	if (!(neigh->nud_state & NUD_VALID) &&
295 	    time_after(jiffies, neigh->updated + rt->rt6i_idev->cnf.rtr_probe_interval)) {
296 		struct in6_addr mcaddr;
297 		struct in6_addr *target;
298 
299 		neigh->updated = jiffies;
300 		read_unlock_bh(&neigh->lock);
301 
302 		target = (struct in6_addr *)&neigh->primary_key;
303 		addrconf_addr_solict_mult(target, &mcaddr);
304 		ndisc_send_ns(rt->rt6i_dev, NULL, target, &mcaddr, NULL);
305 	} else
306 		read_unlock_bh(&neigh->lock);
307 }
308 #else
309 static inline void rt6_probe(struct rt6_info *rt)
310 {
311 	return;
312 }
313 #endif
314 
315 /*
316  * Default Router Selection (RFC 2461 6.3.6)
317  */
318 static inline int rt6_check_dev(struct rt6_info *rt, int oif)
319 {
320 	struct net_device *dev = rt->rt6i_dev;
321 	if (!oif || dev->ifindex == oif)
322 		return 2;
323 	if ((dev->flags & IFF_LOOPBACK) &&
324 	    rt->rt6i_idev && rt->rt6i_idev->dev->ifindex == oif)
325 		return 1;
326 	return 0;
327 }
328 
329 static inline int rt6_check_neigh(struct rt6_info *rt)
330 {
331 	struct neighbour *neigh = rt->rt6i_nexthop;
332 	int m;
333 	if (rt->rt6i_flags & RTF_NONEXTHOP ||
334 	    !(rt->rt6i_flags & RTF_GATEWAY))
335 		m = 1;
336 	else if (neigh) {
337 		read_lock_bh(&neigh->lock);
338 		if (neigh->nud_state & NUD_VALID)
339 			m = 2;
340 #ifdef CONFIG_IPV6_ROUTER_PREF
341 		else if (neigh->nud_state & NUD_FAILED)
342 			m = 0;
343 #endif
344 		else
345 			m = 1;
346 		read_unlock_bh(&neigh->lock);
347 	} else
348 		m = 0;
349 	return m;
350 }
351 
352 static int rt6_score_route(struct rt6_info *rt, int oif,
353 			   int strict)
354 {
355 	int m, n;
356 
357 	m = rt6_check_dev(rt, oif);
358 	if (!m && (strict & RT6_LOOKUP_F_IFACE))
359 		return -1;
360 #ifdef CONFIG_IPV6_ROUTER_PREF
361 	m |= IPV6_DECODE_PREF(IPV6_EXTRACT_PREF(rt->rt6i_flags)) << 2;
362 #endif
363 	n = rt6_check_neigh(rt);
364 	if (!n && (strict & RT6_LOOKUP_F_REACHABLE))
365 		return -1;
366 	return m;
367 }
368 
369 static struct rt6_info *find_match(struct rt6_info *rt, int oif, int strict,
370 				   int *mpri, struct rt6_info *match)
371 {
372 	int m;
373 
374 	if (rt6_check_expired(rt))
375 		goto out;
376 
377 	m = rt6_score_route(rt, oif, strict);
378 	if (m < 0)
379 		goto out;
380 
381 	if (m > *mpri) {
382 		if (strict & RT6_LOOKUP_F_REACHABLE)
383 			rt6_probe(match);
384 		*mpri = m;
385 		match = rt;
386 	} else if (strict & RT6_LOOKUP_F_REACHABLE) {
387 		rt6_probe(rt);
388 	}
389 
390 out:
391 	return match;
392 }
393 
394 static struct rt6_info *find_rr_leaf(struct fib6_node *fn,
395 				     struct rt6_info *rr_head,
396 				     u32 metric, int oif, int strict)
397 {
398 	struct rt6_info *rt, *match;
399 	int mpri = -1;
400 
401 	match = NULL;
402 	for (rt = rr_head; rt && rt->rt6i_metric == metric;
403 	     rt = rt->u.dst.rt6_next)
404 		match = find_match(rt, oif, strict, &mpri, match);
405 	for (rt = fn->leaf; rt && rt != rr_head && rt->rt6i_metric == metric;
406 	     rt = rt->u.dst.rt6_next)
407 		match = find_match(rt, oif, strict, &mpri, match);
408 
409 	return match;
410 }
411 
412 static struct rt6_info *rt6_select(struct fib6_node *fn, int oif, int strict)
413 {
414 	struct rt6_info *match, *rt0;
415 
416 	RT6_TRACE("%s(fn->leaf=%p, oif=%d)\n",
417 		  __FUNCTION__, fn->leaf, oif);
418 
419 	rt0 = fn->rr_ptr;
420 	if (!rt0)
421 		fn->rr_ptr = rt0 = fn->leaf;
422 
423 	match = find_rr_leaf(fn, rt0, rt0->rt6i_metric, oif, strict);
424 
425 	if (!match &&
426 	    (strict & RT6_LOOKUP_F_REACHABLE)) {
427 		struct rt6_info *next = rt0->u.dst.rt6_next;
428 
429 		/* no entries matched; do round-robin */
430 		if (!next || next->rt6i_metric != rt0->rt6i_metric)
431 			next = fn->leaf;
432 
433 		if (next != rt0)
434 			fn->rr_ptr = next;
435 	}
436 
437 	RT6_TRACE("%s() => %p\n",
438 		  __FUNCTION__, match);
439 
440 	return (match ? match : &ip6_null_entry);
441 }
442 
443 #ifdef CONFIG_IPV6_ROUTE_INFO
444 int rt6_route_rcv(struct net_device *dev, u8 *opt, int len,
445 		  struct in6_addr *gwaddr)
446 {
447 	struct route_info *rinfo = (struct route_info *) opt;
448 	struct in6_addr prefix_buf, *prefix;
449 	unsigned int pref;
450 	u32 lifetime;
451 	struct rt6_info *rt;
452 
453 	if (len < sizeof(struct route_info)) {
454 		return -EINVAL;
455 	}
456 
457 	/* Sanity check for prefix_len and length */
458 	if (rinfo->length > 3) {
459 		return -EINVAL;
460 	} else if (rinfo->prefix_len > 128) {
461 		return -EINVAL;
462 	} else if (rinfo->prefix_len > 64) {
463 		if (rinfo->length < 2) {
464 			return -EINVAL;
465 		}
466 	} else if (rinfo->prefix_len > 0) {
467 		if (rinfo->length < 1) {
468 			return -EINVAL;
469 		}
470 	}
471 
472 	pref = rinfo->route_pref;
473 	if (pref == ICMPV6_ROUTER_PREF_INVALID)
474 		pref = ICMPV6_ROUTER_PREF_MEDIUM;
475 
476 	lifetime = ntohl(rinfo->lifetime);
477 	if (lifetime == 0xffffffff) {
478 		/* infinity */
479 	} else if (lifetime > 0x7fffffff/HZ) {
480 		/* Avoid arithmetic overflow */
481 		lifetime = 0x7fffffff/HZ - 1;
482 	}
483 
484 	if (rinfo->length == 3)
485 		prefix = (struct in6_addr *)rinfo->prefix;
486 	else {
487 		/* this function is safe */
488 		ipv6_addr_prefix(&prefix_buf,
489 				 (struct in6_addr *)rinfo->prefix,
490 				 rinfo->prefix_len);
491 		prefix = &prefix_buf;
492 	}
493 
494 	rt = rt6_get_route_info(prefix, rinfo->prefix_len, gwaddr, dev->ifindex);
495 
496 	if (rt && !lifetime) {
497 		ip6_del_rt(rt);
498 		rt = NULL;
499 	}
500 
501 	if (!rt && lifetime)
502 		rt = rt6_add_route_info(prefix, rinfo->prefix_len, gwaddr, dev->ifindex,
503 					pref);
504 	else if (rt)
505 		rt->rt6i_flags = RTF_ROUTEINFO |
506 				 (rt->rt6i_flags & ~RTF_PREF_MASK) | RTF_PREF(pref);
507 
508 	if (rt) {
509 		if (lifetime == 0xffffffff) {
510 			rt->rt6i_flags &= ~RTF_EXPIRES;
511 		} else {
512 			rt->rt6i_expires = jiffies + HZ * lifetime;
513 			rt->rt6i_flags |= RTF_EXPIRES;
514 		}
515 		dst_release(&rt->u.dst);
516 	}
517 	return 0;
518 }
519 #endif
520 
521 #define BACKTRACK(saddr) \
522 do { \
523 	if (rt == &ip6_null_entry) { \
524 		struct fib6_node *pn; \
525 		while (1) { \
526 			if (fn->fn_flags & RTN_TL_ROOT) \
527 				goto out; \
528 			pn = fn->parent; \
529 			if (FIB6_SUBTREE(pn) && FIB6_SUBTREE(pn) != fn) \
530 				fn = fib6_lookup(FIB6_SUBTREE(pn), NULL, saddr); \
531 			else \
532 				fn = pn; \
533 			if (fn->fn_flags & RTN_RTINFO) \
534 				goto restart; \
535 		} \
536 	} \
537 } while(0)
538 
539 static struct rt6_info *ip6_pol_route_lookup(struct fib6_table *table,
540 					     struct flowi *fl, int flags)
541 {
542 	struct fib6_node *fn;
543 	struct rt6_info *rt;
544 
545 	read_lock_bh(&table->tb6_lock);
546 	fn = fib6_lookup(&table->tb6_root, &fl->fl6_dst, &fl->fl6_src);
547 restart:
548 	rt = fn->leaf;
549 	rt = rt6_device_match(rt, fl->oif, flags);
550 	BACKTRACK(&fl->fl6_src);
551 out:
552 	dst_use(&rt->u.dst, jiffies);
553 	read_unlock_bh(&table->tb6_lock);
554 	return rt;
555 
556 }
557 
558 struct rt6_info *rt6_lookup(struct in6_addr *daddr, struct in6_addr *saddr,
559 			    int oif, int strict)
560 {
561 	struct flowi fl = {
562 		.oif = oif,
563 		.nl_u = {
564 			.ip6_u = {
565 				.daddr = *daddr,
566 			},
567 		},
568 	};
569 	struct dst_entry *dst;
570 	int flags = strict ? RT6_LOOKUP_F_IFACE : 0;
571 
572 	if (saddr) {
573 		memcpy(&fl.fl6_src, saddr, sizeof(*saddr));
574 		flags |= RT6_LOOKUP_F_HAS_SADDR;
575 	}
576 
577 	dst = fib6_rule_lookup(&fl, flags, ip6_pol_route_lookup);
578 	if (dst->error == 0)
579 		return (struct rt6_info *) dst;
580 
581 	dst_release(dst);
582 
583 	return NULL;
584 }
585 
586 EXPORT_SYMBOL(rt6_lookup);
587 
588 /* ip6_ins_rt is called with FREE table->tb6_lock.
589    It takes new route entry, the addition fails by any reason the
590    route is freed. In any case, if caller does not hold it, it may
591    be destroyed.
592  */
593 
594 static int __ip6_ins_rt(struct rt6_info *rt, struct nl_info *info)
595 {
596 	int err;
597 	struct fib6_table *table;
598 
599 	table = rt->rt6i_table;
600 	write_lock_bh(&table->tb6_lock);
601 	err = fib6_add(&table->tb6_root, rt, info);
602 	write_unlock_bh(&table->tb6_lock);
603 
604 	return err;
605 }
606 
607 int ip6_ins_rt(struct rt6_info *rt)
608 {
609 	return __ip6_ins_rt(rt, NULL);
610 }
611 
612 static struct rt6_info *rt6_alloc_cow(struct rt6_info *ort, struct in6_addr *daddr,
613 				      struct in6_addr *saddr)
614 {
615 	struct rt6_info *rt;
616 
617 	/*
618 	 *	Clone the route.
619 	 */
620 
621 	rt = ip6_rt_copy(ort);
622 
623 	if (rt) {
624 		if (!(rt->rt6i_flags&RTF_GATEWAY)) {
625 			if (rt->rt6i_dst.plen != 128 &&
626 			    ipv6_addr_equal(&rt->rt6i_dst.addr, daddr))
627 				rt->rt6i_flags |= RTF_ANYCAST;
628 			ipv6_addr_copy(&rt->rt6i_gateway, daddr);
629 		}
630 
631 		ipv6_addr_copy(&rt->rt6i_dst.addr, daddr);
632 		rt->rt6i_dst.plen = 128;
633 		rt->rt6i_flags |= RTF_CACHE;
634 		rt->u.dst.flags |= DST_HOST;
635 
636 #ifdef CONFIG_IPV6_SUBTREES
637 		if (rt->rt6i_src.plen && saddr) {
638 			ipv6_addr_copy(&rt->rt6i_src.addr, saddr);
639 			rt->rt6i_src.plen = 128;
640 		}
641 #endif
642 
643 		rt->rt6i_nexthop = ndisc_get_neigh(rt->rt6i_dev, &rt->rt6i_gateway);
644 
645 	}
646 
647 	return rt;
648 }
649 
650 static struct rt6_info *rt6_alloc_clone(struct rt6_info *ort, struct in6_addr *daddr)
651 {
652 	struct rt6_info *rt = ip6_rt_copy(ort);
653 	if (rt) {
654 		ipv6_addr_copy(&rt->rt6i_dst.addr, daddr);
655 		rt->rt6i_dst.plen = 128;
656 		rt->rt6i_flags |= RTF_CACHE;
657 		rt->u.dst.flags |= DST_HOST;
658 		rt->rt6i_nexthop = neigh_clone(ort->rt6i_nexthop);
659 	}
660 	return rt;
661 }
662 
663 static struct rt6_info *ip6_pol_route(struct fib6_table *table, int oif,
664 					    struct flowi *fl, int flags)
665 {
666 	struct fib6_node *fn;
667 	struct rt6_info *rt, *nrt;
668 	int strict = 0;
669 	int attempts = 3;
670 	int err;
671 	int reachable = ipv6_devconf.forwarding ? 0 : RT6_LOOKUP_F_REACHABLE;
672 
673 	strict |= flags & RT6_LOOKUP_F_IFACE;
674 
675 relookup:
676 	read_lock_bh(&table->tb6_lock);
677 
678 restart_2:
679 	fn = fib6_lookup(&table->tb6_root, &fl->fl6_dst, &fl->fl6_src);
680 
681 restart:
682 	rt = rt6_select(fn, oif, strict | reachable);
683 	BACKTRACK(&fl->fl6_src);
684 	if (rt == &ip6_null_entry ||
685 	    rt->rt6i_flags & RTF_CACHE)
686 		goto out;
687 
688 	dst_hold(&rt->u.dst);
689 	read_unlock_bh(&table->tb6_lock);
690 
691 	if (!rt->rt6i_nexthop && !(rt->rt6i_flags & RTF_NONEXTHOP))
692 		nrt = rt6_alloc_cow(rt, &fl->fl6_dst, &fl->fl6_src);
693 	else {
694 #if CLONE_OFFLINK_ROUTE
695 		nrt = rt6_alloc_clone(rt, &fl->fl6_dst);
696 #else
697 		goto out2;
698 #endif
699 	}
700 
701 	dst_release(&rt->u.dst);
702 	rt = nrt ? : &ip6_null_entry;
703 
704 	dst_hold(&rt->u.dst);
705 	if (nrt) {
706 		err = ip6_ins_rt(nrt);
707 		if (!err)
708 			goto out2;
709 	}
710 
711 	if (--attempts <= 0)
712 		goto out2;
713 
714 	/*
715 	 * Race condition! In the gap, when table->tb6_lock was
716 	 * released someone could insert this route.  Relookup.
717 	 */
718 	dst_release(&rt->u.dst);
719 	goto relookup;
720 
721 out:
722 	if (reachable) {
723 		reachable = 0;
724 		goto restart_2;
725 	}
726 	dst_hold(&rt->u.dst);
727 	read_unlock_bh(&table->tb6_lock);
728 out2:
729 	rt->u.dst.lastuse = jiffies;
730 	rt->u.dst.__use++;
731 
732 	return rt;
733 }
734 
735 static struct rt6_info *ip6_pol_route_input(struct fib6_table *table,
736 					    struct flowi *fl, int flags)
737 {
738 	return ip6_pol_route(table, fl->iif, fl, flags);
739 }
740 
741 void ip6_route_input(struct sk_buff *skb)
742 {
743 	struct ipv6hdr *iph = ipv6_hdr(skb);
744 	int flags = RT6_LOOKUP_F_HAS_SADDR;
745 	struct flowi fl = {
746 		.iif = skb->dev->ifindex,
747 		.nl_u = {
748 			.ip6_u = {
749 				.daddr = iph->daddr,
750 				.saddr = iph->saddr,
751 				.flowlabel = (* (__be32 *) iph)&IPV6_FLOWINFO_MASK,
752 			},
753 		},
754 		.mark = skb->mark,
755 		.proto = iph->nexthdr,
756 	};
757 
758 	if (rt6_need_strict(&iph->daddr))
759 		flags |= RT6_LOOKUP_F_IFACE;
760 
761 	skb->dst = fib6_rule_lookup(&fl, flags, ip6_pol_route_input);
762 }
763 
764 static struct rt6_info *ip6_pol_route_output(struct fib6_table *table,
765 					     struct flowi *fl, int flags)
766 {
767 	return ip6_pol_route(table, fl->oif, fl, flags);
768 }
769 
770 struct dst_entry * ip6_route_output(struct sock *sk, struct flowi *fl)
771 {
772 	int flags = 0;
773 
774 	if (rt6_need_strict(&fl->fl6_dst))
775 		flags |= RT6_LOOKUP_F_IFACE;
776 
777 	if (!ipv6_addr_any(&fl->fl6_src))
778 		flags |= RT6_LOOKUP_F_HAS_SADDR;
779 
780 	return fib6_rule_lookup(fl, flags, ip6_pol_route_output);
781 }
782 
783 EXPORT_SYMBOL(ip6_route_output);
784 
785 static int ip6_blackhole_output(struct sk_buff *skb)
786 {
787 	kfree_skb(skb);
788 	return 0;
789 }
790 
791 int ip6_dst_blackhole(struct sock *sk, struct dst_entry **dstp, struct flowi *fl)
792 {
793 	struct rt6_info *ort = (struct rt6_info *) *dstp;
794 	struct rt6_info *rt = (struct rt6_info *)
795 		dst_alloc(&ip6_dst_blackhole_ops);
796 	struct dst_entry *new = NULL;
797 
798 	if (rt) {
799 		new = &rt->u.dst;
800 
801 		atomic_set(&new->__refcnt, 1);
802 		new->__use = 1;
803 		new->input = ip6_blackhole_output;
804 		new->output = ip6_blackhole_output;
805 
806 		memcpy(new->metrics, ort->u.dst.metrics, RTAX_MAX*sizeof(u32));
807 		new->dev = ort->u.dst.dev;
808 		if (new->dev)
809 			dev_hold(new->dev);
810 		rt->rt6i_idev = ort->rt6i_idev;
811 		if (rt->rt6i_idev)
812 			in6_dev_hold(rt->rt6i_idev);
813 		rt->rt6i_expires = 0;
814 
815 		ipv6_addr_copy(&rt->rt6i_gateway, &ort->rt6i_gateway);
816 		rt->rt6i_flags = ort->rt6i_flags & ~RTF_EXPIRES;
817 		rt->rt6i_metric = 0;
818 
819 		memcpy(&rt->rt6i_dst, &ort->rt6i_dst, sizeof(struct rt6key));
820 #ifdef CONFIG_IPV6_SUBTREES
821 		memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
822 #endif
823 
824 		dst_free(new);
825 	}
826 
827 	dst_release(*dstp);
828 	*dstp = new;
829 	return (new ? 0 : -ENOMEM);
830 }
831 EXPORT_SYMBOL_GPL(ip6_dst_blackhole);
832 
833 /*
834  *	Destination cache support functions
835  */
836 
837 static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie)
838 {
839 	struct rt6_info *rt;
840 
841 	rt = (struct rt6_info *) dst;
842 
843 	if (rt && rt->rt6i_node && (rt->rt6i_node->fn_sernum == cookie))
844 		return dst;
845 
846 	return NULL;
847 }
848 
849 static struct dst_entry *ip6_negative_advice(struct dst_entry *dst)
850 {
851 	struct rt6_info *rt = (struct rt6_info *) dst;
852 
853 	if (rt) {
854 		if (rt->rt6i_flags & RTF_CACHE)
855 			ip6_del_rt(rt);
856 		else
857 			dst_release(dst);
858 	}
859 	return NULL;
860 }
861 
862 static void ip6_link_failure(struct sk_buff *skb)
863 {
864 	struct rt6_info *rt;
865 
866 	icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_ADDR_UNREACH, 0, skb->dev);
867 
868 	rt = (struct rt6_info *) skb->dst;
869 	if (rt) {
870 		if (rt->rt6i_flags&RTF_CACHE) {
871 			dst_set_expires(&rt->u.dst, 0);
872 			rt->rt6i_flags |= RTF_EXPIRES;
873 		} else if (rt->rt6i_node && (rt->rt6i_flags & RTF_DEFAULT))
874 			rt->rt6i_node->fn_sernum = -1;
875 	}
876 }
877 
878 static void ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu)
879 {
880 	struct rt6_info *rt6 = (struct rt6_info*)dst;
881 
882 	if (mtu < dst_mtu(dst) && rt6->rt6i_dst.plen == 128) {
883 		rt6->rt6i_flags |= RTF_MODIFIED;
884 		if (mtu < IPV6_MIN_MTU) {
885 			mtu = IPV6_MIN_MTU;
886 			dst->metrics[RTAX_FEATURES-1] |= RTAX_FEATURE_ALLFRAG;
887 		}
888 		dst->metrics[RTAX_MTU-1] = mtu;
889 		call_netevent_notifiers(NETEVENT_PMTU_UPDATE, dst);
890 	}
891 }
892 
893 static int ipv6_get_mtu(struct net_device *dev);
894 
895 static inline unsigned int ipv6_advmss(unsigned int mtu)
896 {
897 	mtu -= sizeof(struct ipv6hdr) + sizeof(struct tcphdr);
898 
899 	if (mtu < ip6_rt_min_advmss)
900 		mtu = ip6_rt_min_advmss;
901 
902 	/*
903 	 * Maximal non-jumbo IPv6 payload is IPV6_MAXPLEN and
904 	 * corresponding MSS is IPV6_MAXPLEN - tcp_header_size.
905 	 * IPV6_MAXPLEN is also valid and means: "any MSS,
906 	 * rely only on pmtu discovery"
907 	 */
908 	if (mtu > IPV6_MAXPLEN - sizeof(struct tcphdr))
909 		mtu = IPV6_MAXPLEN;
910 	return mtu;
911 }
912 
913 static struct dst_entry *ndisc_dst_gc_list;
914 static DEFINE_SPINLOCK(ndisc_lock);
915 
916 struct dst_entry *ndisc_dst_alloc(struct net_device *dev,
917 				  struct neighbour *neigh,
918 				  struct in6_addr *addr,
919 				  int (*output)(struct sk_buff *))
920 {
921 	struct rt6_info *rt;
922 	struct inet6_dev *idev = in6_dev_get(dev);
923 
924 	if (unlikely(idev == NULL))
925 		return NULL;
926 
927 	rt = ip6_dst_alloc();
928 	if (unlikely(rt == NULL)) {
929 		in6_dev_put(idev);
930 		goto out;
931 	}
932 
933 	dev_hold(dev);
934 	if (neigh)
935 		neigh_hold(neigh);
936 	else
937 		neigh = ndisc_get_neigh(dev, addr);
938 
939 	rt->rt6i_dev	  = dev;
940 	rt->rt6i_idev     = idev;
941 	rt->rt6i_nexthop  = neigh;
942 	atomic_set(&rt->u.dst.__refcnt, 1);
943 	rt->u.dst.metrics[RTAX_HOPLIMIT-1] = 255;
944 	rt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(rt->rt6i_dev);
945 	rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(dst_mtu(&rt->u.dst));
946 	rt->u.dst.output  = output;
947 
948 #if 0	/* there's no chance to use these for ndisc */
949 	rt->u.dst.flags   = ipv6_addr_type(addr) & IPV6_ADDR_UNICAST
950 				? DST_HOST
951 				: 0;
952 	ipv6_addr_copy(&rt->rt6i_dst.addr, addr);
953 	rt->rt6i_dst.plen = 128;
954 #endif
955 
956 	spin_lock_bh(&ndisc_lock);
957 	rt->u.dst.next = ndisc_dst_gc_list;
958 	ndisc_dst_gc_list = &rt->u.dst;
959 	spin_unlock_bh(&ndisc_lock);
960 
961 	fib6_force_start_gc();
962 
963 out:
964 	return &rt->u.dst;
965 }
966 
967 int ndisc_dst_gc(int *more)
968 {
969 	struct dst_entry *dst, *next, **pprev;
970 	int freed;
971 
972 	next = NULL;
973 	freed = 0;
974 
975 	spin_lock_bh(&ndisc_lock);
976 	pprev = &ndisc_dst_gc_list;
977 
978 	while ((dst = *pprev) != NULL) {
979 		if (!atomic_read(&dst->__refcnt)) {
980 			*pprev = dst->next;
981 			dst_free(dst);
982 			freed++;
983 		} else {
984 			pprev = &dst->next;
985 			(*more)++;
986 		}
987 	}
988 
989 	spin_unlock_bh(&ndisc_lock);
990 
991 	return freed;
992 }
993 
994 static int ip6_dst_gc(void)
995 {
996 	static unsigned expire = 30*HZ;
997 	static unsigned long last_gc;
998 	unsigned long now = jiffies;
999 
1000 	if (time_after(last_gc + ip6_rt_gc_min_interval, now) &&
1001 	    atomic_read(&ip6_dst_ops.entries) <= ip6_rt_max_size)
1002 		goto out;
1003 
1004 	expire++;
1005 	fib6_run_gc(expire);
1006 	last_gc = now;
1007 	if (atomic_read(&ip6_dst_ops.entries) < ip6_dst_ops.gc_thresh)
1008 		expire = ip6_rt_gc_timeout>>1;
1009 
1010 out:
1011 	expire -= expire>>ip6_rt_gc_elasticity;
1012 	return (atomic_read(&ip6_dst_ops.entries) > ip6_rt_max_size);
1013 }
1014 
1015 /* Clean host part of a prefix. Not necessary in radix tree,
1016    but results in cleaner routing tables.
1017 
1018    Remove it only when all the things will work!
1019  */
1020 
1021 static int ipv6_get_mtu(struct net_device *dev)
1022 {
1023 	int mtu = IPV6_MIN_MTU;
1024 	struct inet6_dev *idev;
1025 
1026 	idev = in6_dev_get(dev);
1027 	if (idev) {
1028 		mtu = idev->cnf.mtu6;
1029 		in6_dev_put(idev);
1030 	}
1031 	return mtu;
1032 }
1033 
1034 int ipv6_get_hoplimit(struct net_device *dev)
1035 {
1036 	int hoplimit = ipv6_devconf.hop_limit;
1037 	struct inet6_dev *idev;
1038 
1039 	idev = in6_dev_get(dev);
1040 	if (idev) {
1041 		hoplimit = idev->cnf.hop_limit;
1042 		in6_dev_put(idev);
1043 	}
1044 	return hoplimit;
1045 }
1046 
1047 /*
1048  *
1049  */
1050 
1051 int ip6_route_add(struct fib6_config *cfg)
1052 {
1053 	int err;
1054 	struct rt6_info *rt = NULL;
1055 	struct net_device *dev = NULL;
1056 	struct inet6_dev *idev = NULL;
1057 	struct fib6_table *table;
1058 	int addr_type;
1059 
1060 	if (cfg->fc_dst_len > 128 || cfg->fc_src_len > 128)
1061 		return -EINVAL;
1062 #ifndef CONFIG_IPV6_SUBTREES
1063 	if (cfg->fc_src_len)
1064 		return -EINVAL;
1065 #endif
1066 	if (cfg->fc_ifindex) {
1067 		err = -ENODEV;
1068 		dev = dev_get_by_index(&init_net, cfg->fc_ifindex);
1069 		if (!dev)
1070 			goto out;
1071 		idev = in6_dev_get(dev);
1072 		if (!idev)
1073 			goto out;
1074 	}
1075 
1076 	if (cfg->fc_metric == 0)
1077 		cfg->fc_metric = IP6_RT_PRIO_USER;
1078 
1079 	table = fib6_new_table(cfg->fc_table);
1080 	if (table == NULL) {
1081 		err = -ENOBUFS;
1082 		goto out;
1083 	}
1084 
1085 	rt = ip6_dst_alloc();
1086 
1087 	if (rt == NULL) {
1088 		err = -ENOMEM;
1089 		goto out;
1090 	}
1091 
1092 	rt->u.dst.obsolete = -1;
1093 	rt->rt6i_expires = jiffies + clock_t_to_jiffies(cfg->fc_expires);
1094 
1095 	if (cfg->fc_protocol == RTPROT_UNSPEC)
1096 		cfg->fc_protocol = RTPROT_BOOT;
1097 	rt->rt6i_protocol = cfg->fc_protocol;
1098 
1099 	addr_type = ipv6_addr_type(&cfg->fc_dst);
1100 
1101 	if (addr_type & IPV6_ADDR_MULTICAST)
1102 		rt->u.dst.input = ip6_mc_input;
1103 	else
1104 		rt->u.dst.input = ip6_forward;
1105 
1106 	rt->u.dst.output = ip6_output;
1107 
1108 	ipv6_addr_prefix(&rt->rt6i_dst.addr, &cfg->fc_dst, cfg->fc_dst_len);
1109 	rt->rt6i_dst.plen = cfg->fc_dst_len;
1110 	if (rt->rt6i_dst.plen == 128)
1111 	       rt->u.dst.flags = DST_HOST;
1112 
1113 #ifdef CONFIG_IPV6_SUBTREES
1114 	ipv6_addr_prefix(&rt->rt6i_src.addr, &cfg->fc_src, cfg->fc_src_len);
1115 	rt->rt6i_src.plen = cfg->fc_src_len;
1116 #endif
1117 
1118 	rt->rt6i_metric = cfg->fc_metric;
1119 
1120 	/* We cannot add true routes via loopback here,
1121 	   they would result in kernel looping; promote them to reject routes
1122 	 */
1123 	if ((cfg->fc_flags & RTF_REJECT) ||
1124 	    (dev && (dev->flags&IFF_LOOPBACK) && !(addr_type&IPV6_ADDR_LOOPBACK))) {
1125 		/* hold loopback dev/idev if we haven't done so. */
1126 		if (dev != init_net.loopback_dev) {
1127 			if (dev) {
1128 				dev_put(dev);
1129 				in6_dev_put(idev);
1130 			}
1131 			dev = init_net.loopback_dev;
1132 			dev_hold(dev);
1133 			idev = in6_dev_get(dev);
1134 			if (!idev) {
1135 				err = -ENODEV;
1136 				goto out;
1137 			}
1138 		}
1139 		rt->u.dst.output = ip6_pkt_discard_out;
1140 		rt->u.dst.input = ip6_pkt_discard;
1141 		rt->u.dst.error = -ENETUNREACH;
1142 		rt->rt6i_flags = RTF_REJECT|RTF_NONEXTHOP;
1143 		goto install_route;
1144 	}
1145 
1146 	if (cfg->fc_flags & RTF_GATEWAY) {
1147 		struct in6_addr *gw_addr;
1148 		int gwa_type;
1149 
1150 		gw_addr = &cfg->fc_gateway;
1151 		ipv6_addr_copy(&rt->rt6i_gateway, gw_addr);
1152 		gwa_type = ipv6_addr_type(gw_addr);
1153 
1154 		if (gwa_type != (IPV6_ADDR_LINKLOCAL|IPV6_ADDR_UNICAST)) {
1155 			struct rt6_info *grt;
1156 
1157 			/* IPv6 strictly inhibits using not link-local
1158 			   addresses as nexthop address.
1159 			   Otherwise, router will not able to send redirects.
1160 			   It is very good, but in some (rare!) circumstances
1161 			   (SIT, PtP, NBMA NOARP links) it is handy to allow
1162 			   some exceptions. --ANK
1163 			 */
1164 			err = -EINVAL;
1165 			if (!(gwa_type&IPV6_ADDR_UNICAST))
1166 				goto out;
1167 
1168 			grt = rt6_lookup(gw_addr, NULL, cfg->fc_ifindex, 1);
1169 
1170 			err = -EHOSTUNREACH;
1171 			if (grt == NULL)
1172 				goto out;
1173 			if (dev) {
1174 				if (dev != grt->rt6i_dev) {
1175 					dst_release(&grt->u.dst);
1176 					goto out;
1177 				}
1178 			} else {
1179 				dev = grt->rt6i_dev;
1180 				idev = grt->rt6i_idev;
1181 				dev_hold(dev);
1182 				in6_dev_hold(grt->rt6i_idev);
1183 			}
1184 			if (!(grt->rt6i_flags&RTF_GATEWAY))
1185 				err = 0;
1186 			dst_release(&grt->u.dst);
1187 
1188 			if (err)
1189 				goto out;
1190 		}
1191 		err = -EINVAL;
1192 		if (dev == NULL || (dev->flags&IFF_LOOPBACK))
1193 			goto out;
1194 	}
1195 
1196 	err = -ENODEV;
1197 	if (dev == NULL)
1198 		goto out;
1199 
1200 	if (cfg->fc_flags & (RTF_GATEWAY | RTF_NONEXTHOP)) {
1201 		rt->rt6i_nexthop = __neigh_lookup_errno(&nd_tbl, &rt->rt6i_gateway, dev);
1202 		if (IS_ERR(rt->rt6i_nexthop)) {
1203 			err = PTR_ERR(rt->rt6i_nexthop);
1204 			rt->rt6i_nexthop = NULL;
1205 			goto out;
1206 		}
1207 	}
1208 
1209 	rt->rt6i_flags = cfg->fc_flags;
1210 
1211 install_route:
1212 	if (cfg->fc_mx) {
1213 		struct nlattr *nla;
1214 		int remaining;
1215 
1216 		nla_for_each_attr(nla, cfg->fc_mx, cfg->fc_mx_len, remaining) {
1217 			int type = nla_type(nla);
1218 
1219 			if (type) {
1220 				if (type > RTAX_MAX) {
1221 					err = -EINVAL;
1222 					goto out;
1223 				}
1224 
1225 				rt->u.dst.metrics[type - 1] = nla_get_u32(nla);
1226 			}
1227 		}
1228 	}
1229 
1230 	if (rt->u.dst.metrics[RTAX_HOPLIMIT-1] == 0)
1231 		rt->u.dst.metrics[RTAX_HOPLIMIT-1] = -1;
1232 	if (!rt->u.dst.metrics[RTAX_MTU-1])
1233 		rt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(dev);
1234 	if (!rt->u.dst.metrics[RTAX_ADVMSS-1])
1235 		rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(dst_mtu(&rt->u.dst));
1236 	rt->u.dst.dev = dev;
1237 	rt->rt6i_idev = idev;
1238 	rt->rt6i_table = table;
1239 	return __ip6_ins_rt(rt, &cfg->fc_nlinfo);
1240 
1241 out:
1242 	if (dev)
1243 		dev_put(dev);
1244 	if (idev)
1245 		in6_dev_put(idev);
1246 	if (rt)
1247 		dst_free(&rt->u.dst);
1248 	return err;
1249 }
1250 
1251 static int __ip6_del_rt(struct rt6_info *rt, struct nl_info *info)
1252 {
1253 	int err;
1254 	struct fib6_table *table;
1255 
1256 	if (rt == &ip6_null_entry)
1257 		return -ENOENT;
1258 
1259 	table = rt->rt6i_table;
1260 	write_lock_bh(&table->tb6_lock);
1261 
1262 	err = fib6_del(rt, info);
1263 	dst_release(&rt->u.dst);
1264 
1265 	write_unlock_bh(&table->tb6_lock);
1266 
1267 	return err;
1268 }
1269 
1270 int ip6_del_rt(struct rt6_info *rt)
1271 {
1272 	return __ip6_del_rt(rt, NULL);
1273 }
1274 
1275 static int ip6_route_del(struct fib6_config *cfg)
1276 {
1277 	struct fib6_table *table;
1278 	struct fib6_node *fn;
1279 	struct rt6_info *rt;
1280 	int err = -ESRCH;
1281 
1282 	table = fib6_get_table(cfg->fc_table);
1283 	if (table == NULL)
1284 		return err;
1285 
1286 	read_lock_bh(&table->tb6_lock);
1287 
1288 	fn = fib6_locate(&table->tb6_root,
1289 			 &cfg->fc_dst, cfg->fc_dst_len,
1290 			 &cfg->fc_src, cfg->fc_src_len);
1291 
1292 	if (fn) {
1293 		for (rt = fn->leaf; rt; rt = rt->u.dst.rt6_next) {
1294 			if (cfg->fc_ifindex &&
1295 			    (rt->rt6i_dev == NULL ||
1296 			     rt->rt6i_dev->ifindex != cfg->fc_ifindex))
1297 				continue;
1298 			if (cfg->fc_flags & RTF_GATEWAY &&
1299 			    !ipv6_addr_equal(&cfg->fc_gateway, &rt->rt6i_gateway))
1300 				continue;
1301 			if (cfg->fc_metric && cfg->fc_metric != rt->rt6i_metric)
1302 				continue;
1303 			dst_hold(&rt->u.dst);
1304 			read_unlock_bh(&table->tb6_lock);
1305 
1306 			return __ip6_del_rt(rt, &cfg->fc_nlinfo);
1307 		}
1308 	}
1309 	read_unlock_bh(&table->tb6_lock);
1310 
1311 	return err;
1312 }
1313 
1314 /*
1315  *	Handle redirects
1316  */
1317 struct ip6rd_flowi {
1318 	struct flowi fl;
1319 	struct in6_addr gateway;
1320 };
1321 
1322 static struct rt6_info *__ip6_route_redirect(struct fib6_table *table,
1323 					     struct flowi *fl,
1324 					     int flags)
1325 {
1326 	struct ip6rd_flowi *rdfl = (struct ip6rd_flowi *)fl;
1327 	struct rt6_info *rt;
1328 	struct fib6_node *fn;
1329 
1330 	/*
1331 	 * Get the "current" route for this destination and
1332 	 * check if the redirect has come from approriate router.
1333 	 *
1334 	 * RFC 2461 specifies that redirects should only be
1335 	 * accepted if they come from the nexthop to the target.
1336 	 * Due to the way the routes are chosen, this notion
1337 	 * is a bit fuzzy and one might need to check all possible
1338 	 * routes.
1339 	 */
1340 
1341 	read_lock_bh(&table->tb6_lock);
1342 	fn = fib6_lookup(&table->tb6_root, &fl->fl6_dst, &fl->fl6_src);
1343 restart:
1344 	for (rt = fn->leaf; rt; rt = rt->u.dst.rt6_next) {
1345 		/*
1346 		 * Current route is on-link; redirect is always invalid.
1347 		 *
1348 		 * Seems, previous statement is not true. It could
1349 		 * be node, which looks for us as on-link (f.e. proxy ndisc)
1350 		 * But then router serving it might decide, that we should
1351 		 * know truth 8)8) --ANK (980726).
1352 		 */
1353 		if (rt6_check_expired(rt))
1354 			continue;
1355 		if (!(rt->rt6i_flags & RTF_GATEWAY))
1356 			continue;
1357 		if (fl->oif != rt->rt6i_dev->ifindex)
1358 			continue;
1359 		if (!ipv6_addr_equal(&rdfl->gateway, &rt->rt6i_gateway))
1360 			continue;
1361 		break;
1362 	}
1363 
1364 	if (!rt)
1365 		rt = &ip6_null_entry;
1366 	BACKTRACK(&fl->fl6_src);
1367 out:
1368 	dst_hold(&rt->u.dst);
1369 
1370 	read_unlock_bh(&table->tb6_lock);
1371 
1372 	return rt;
1373 };
1374 
1375 static struct rt6_info *ip6_route_redirect(struct in6_addr *dest,
1376 					   struct in6_addr *src,
1377 					   struct in6_addr *gateway,
1378 					   struct net_device *dev)
1379 {
1380 	int flags = RT6_LOOKUP_F_HAS_SADDR;
1381 	struct ip6rd_flowi rdfl = {
1382 		.fl = {
1383 			.oif = dev->ifindex,
1384 			.nl_u = {
1385 				.ip6_u = {
1386 					.daddr = *dest,
1387 					.saddr = *src,
1388 				},
1389 			},
1390 		},
1391 		.gateway = *gateway,
1392 	};
1393 
1394 	if (rt6_need_strict(dest))
1395 		flags |= RT6_LOOKUP_F_IFACE;
1396 
1397 	return (struct rt6_info *)fib6_rule_lookup((struct flowi *)&rdfl, flags, __ip6_route_redirect);
1398 }
1399 
1400 void rt6_redirect(struct in6_addr *dest, struct in6_addr *src,
1401 		  struct in6_addr *saddr,
1402 		  struct neighbour *neigh, u8 *lladdr, int on_link)
1403 {
1404 	struct rt6_info *rt, *nrt = NULL;
1405 	struct netevent_redirect netevent;
1406 
1407 	rt = ip6_route_redirect(dest, src, saddr, neigh->dev);
1408 
1409 	if (rt == &ip6_null_entry) {
1410 		if (net_ratelimit())
1411 			printk(KERN_DEBUG "rt6_redirect: source isn't a valid nexthop "
1412 			       "for redirect target\n");
1413 		goto out;
1414 	}
1415 
1416 	/*
1417 	 *	We have finally decided to accept it.
1418 	 */
1419 
1420 	neigh_update(neigh, lladdr, NUD_STALE,
1421 		     NEIGH_UPDATE_F_WEAK_OVERRIDE|
1422 		     NEIGH_UPDATE_F_OVERRIDE|
1423 		     (on_link ? 0 : (NEIGH_UPDATE_F_OVERRIDE_ISROUTER|
1424 				     NEIGH_UPDATE_F_ISROUTER))
1425 		     );
1426 
1427 	/*
1428 	 * Redirect received -> path was valid.
1429 	 * Look, redirects are sent only in response to data packets,
1430 	 * so that this nexthop apparently is reachable. --ANK
1431 	 */
1432 	dst_confirm(&rt->u.dst);
1433 
1434 	/* Duplicate redirect: silently ignore. */
1435 	if (neigh == rt->u.dst.neighbour)
1436 		goto out;
1437 
1438 	nrt = ip6_rt_copy(rt);
1439 	if (nrt == NULL)
1440 		goto out;
1441 
1442 	nrt->rt6i_flags = RTF_GATEWAY|RTF_UP|RTF_DYNAMIC|RTF_CACHE;
1443 	if (on_link)
1444 		nrt->rt6i_flags &= ~RTF_GATEWAY;
1445 
1446 	ipv6_addr_copy(&nrt->rt6i_dst.addr, dest);
1447 	nrt->rt6i_dst.plen = 128;
1448 	nrt->u.dst.flags |= DST_HOST;
1449 
1450 	ipv6_addr_copy(&nrt->rt6i_gateway, (struct in6_addr*)neigh->primary_key);
1451 	nrt->rt6i_nexthop = neigh_clone(neigh);
1452 	/* Reset pmtu, it may be better */
1453 	nrt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(neigh->dev);
1454 	nrt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(dst_mtu(&nrt->u.dst));
1455 
1456 	if (ip6_ins_rt(nrt))
1457 		goto out;
1458 
1459 	netevent.old = &rt->u.dst;
1460 	netevent.new = &nrt->u.dst;
1461 	call_netevent_notifiers(NETEVENT_REDIRECT, &netevent);
1462 
1463 	if (rt->rt6i_flags&RTF_CACHE) {
1464 		ip6_del_rt(rt);
1465 		return;
1466 	}
1467 
1468 out:
1469 	dst_release(&rt->u.dst);
1470 	return;
1471 }
1472 
1473 /*
1474  *	Handle ICMP "packet too big" messages
1475  *	i.e. Path MTU discovery
1476  */
1477 
1478 void rt6_pmtu_discovery(struct in6_addr *daddr, struct in6_addr *saddr,
1479 			struct net_device *dev, u32 pmtu)
1480 {
1481 	struct rt6_info *rt, *nrt;
1482 	int allfrag = 0;
1483 
1484 	rt = rt6_lookup(daddr, saddr, dev->ifindex, 0);
1485 	if (rt == NULL)
1486 		return;
1487 
1488 	if (pmtu >= dst_mtu(&rt->u.dst))
1489 		goto out;
1490 
1491 	if (pmtu < IPV6_MIN_MTU) {
1492 		/*
1493 		 * According to RFC2460, PMTU is set to the IPv6 Minimum Link
1494 		 * MTU (1280) and a fragment header should always be included
1495 		 * after a node receiving Too Big message reporting PMTU is
1496 		 * less than the IPv6 Minimum Link MTU.
1497 		 */
1498 		pmtu = IPV6_MIN_MTU;
1499 		allfrag = 1;
1500 	}
1501 
1502 	/* New mtu received -> path was valid.
1503 	   They are sent only in response to data packets,
1504 	   so that this nexthop apparently is reachable. --ANK
1505 	 */
1506 	dst_confirm(&rt->u.dst);
1507 
1508 	/* Host route. If it is static, it would be better
1509 	   not to override it, but add new one, so that
1510 	   when cache entry will expire old pmtu
1511 	   would return automatically.
1512 	 */
1513 	if (rt->rt6i_flags & RTF_CACHE) {
1514 		rt->u.dst.metrics[RTAX_MTU-1] = pmtu;
1515 		if (allfrag)
1516 			rt->u.dst.metrics[RTAX_FEATURES-1] |= RTAX_FEATURE_ALLFRAG;
1517 		dst_set_expires(&rt->u.dst, ip6_rt_mtu_expires);
1518 		rt->rt6i_flags |= RTF_MODIFIED|RTF_EXPIRES;
1519 		goto out;
1520 	}
1521 
1522 	/* Network route.
1523 	   Two cases are possible:
1524 	   1. It is connected route. Action: COW
1525 	   2. It is gatewayed route or NONEXTHOP route. Action: clone it.
1526 	 */
1527 	if (!rt->rt6i_nexthop && !(rt->rt6i_flags & RTF_NONEXTHOP))
1528 		nrt = rt6_alloc_cow(rt, daddr, saddr);
1529 	else
1530 		nrt = rt6_alloc_clone(rt, daddr);
1531 
1532 	if (nrt) {
1533 		nrt->u.dst.metrics[RTAX_MTU-1] = pmtu;
1534 		if (allfrag)
1535 			nrt->u.dst.metrics[RTAX_FEATURES-1] |= RTAX_FEATURE_ALLFRAG;
1536 
1537 		/* According to RFC 1981, detecting PMTU increase shouldn't be
1538 		 * happened within 5 mins, the recommended timer is 10 mins.
1539 		 * Here this route expiration time is set to ip6_rt_mtu_expires
1540 		 * which is 10 mins. After 10 mins the decreased pmtu is expired
1541 		 * and detecting PMTU increase will be automatically happened.
1542 		 */
1543 		dst_set_expires(&nrt->u.dst, ip6_rt_mtu_expires);
1544 		nrt->rt6i_flags |= RTF_DYNAMIC|RTF_EXPIRES;
1545 
1546 		ip6_ins_rt(nrt);
1547 	}
1548 out:
1549 	dst_release(&rt->u.dst);
1550 }
1551 
1552 /*
1553  *	Misc support functions
1554  */
1555 
1556 static struct rt6_info * ip6_rt_copy(struct rt6_info *ort)
1557 {
1558 	struct rt6_info *rt = ip6_dst_alloc();
1559 
1560 	if (rt) {
1561 		rt->u.dst.input = ort->u.dst.input;
1562 		rt->u.dst.output = ort->u.dst.output;
1563 
1564 		memcpy(rt->u.dst.metrics, ort->u.dst.metrics, RTAX_MAX*sizeof(u32));
1565 		rt->u.dst.error = ort->u.dst.error;
1566 		rt->u.dst.dev = ort->u.dst.dev;
1567 		if (rt->u.dst.dev)
1568 			dev_hold(rt->u.dst.dev);
1569 		rt->rt6i_idev = ort->rt6i_idev;
1570 		if (rt->rt6i_idev)
1571 			in6_dev_hold(rt->rt6i_idev);
1572 		rt->u.dst.lastuse = jiffies;
1573 		rt->rt6i_expires = 0;
1574 
1575 		ipv6_addr_copy(&rt->rt6i_gateway, &ort->rt6i_gateway);
1576 		rt->rt6i_flags = ort->rt6i_flags & ~RTF_EXPIRES;
1577 		rt->rt6i_metric = 0;
1578 
1579 		memcpy(&rt->rt6i_dst, &ort->rt6i_dst, sizeof(struct rt6key));
1580 #ifdef CONFIG_IPV6_SUBTREES
1581 		memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
1582 #endif
1583 		rt->rt6i_table = ort->rt6i_table;
1584 	}
1585 	return rt;
1586 }
1587 
1588 #ifdef CONFIG_IPV6_ROUTE_INFO
1589 static struct rt6_info *rt6_get_route_info(struct in6_addr *prefix, int prefixlen,
1590 					   struct in6_addr *gwaddr, int ifindex)
1591 {
1592 	struct fib6_node *fn;
1593 	struct rt6_info *rt = NULL;
1594 	struct fib6_table *table;
1595 
1596 	table = fib6_get_table(RT6_TABLE_INFO);
1597 	if (table == NULL)
1598 		return NULL;
1599 
1600 	write_lock_bh(&table->tb6_lock);
1601 	fn = fib6_locate(&table->tb6_root, prefix ,prefixlen, NULL, 0);
1602 	if (!fn)
1603 		goto out;
1604 
1605 	for (rt = fn->leaf; rt; rt = rt->u.dst.rt6_next) {
1606 		if (rt->rt6i_dev->ifindex != ifindex)
1607 			continue;
1608 		if ((rt->rt6i_flags & (RTF_ROUTEINFO|RTF_GATEWAY)) != (RTF_ROUTEINFO|RTF_GATEWAY))
1609 			continue;
1610 		if (!ipv6_addr_equal(&rt->rt6i_gateway, gwaddr))
1611 			continue;
1612 		dst_hold(&rt->u.dst);
1613 		break;
1614 	}
1615 out:
1616 	write_unlock_bh(&table->tb6_lock);
1617 	return rt;
1618 }
1619 
1620 static struct rt6_info *rt6_add_route_info(struct in6_addr *prefix, int prefixlen,
1621 					   struct in6_addr *gwaddr, int ifindex,
1622 					   unsigned pref)
1623 {
1624 	struct fib6_config cfg = {
1625 		.fc_table	= RT6_TABLE_INFO,
1626 		.fc_metric	= 1024,
1627 		.fc_ifindex	= ifindex,
1628 		.fc_dst_len	= prefixlen,
1629 		.fc_flags	= RTF_GATEWAY | RTF_ADDRCONF | RTF_ROUTEINFO |
1630 				  RTF_UP | RTF_PREF(pref),
1631 	};
1632 
1633 	ipv6_addr_copy(&cfg.fc_dst, prefix);
1634 	ipv6_addr_copy(&cfg.fc_gateway, gwaddr);
1635 
1636 	/* We should treat it as a default route if prefix length is 0. */
1637 	if (!prefixlen)
1638 		cfg.fc_flags |= RTF_DEFAULT;
1639 
1640 	ip6_route_add(&cfg);
1641 
1642 	return rt6_get_route_info(prefix, prefixlen, gwaddr, ifindex);
1643 }
1644 #endif
1645 
1646 struct rt6_info *rt6_get_dflt_router(struct in6_addr *addr, struct net_device *dev)
1647 {
1648 	struct rt6_info *rt;
1649 	struct fib6_table *table;
1650 
1651 	table = fib6_get_table(RT6_TABLE_DFLT);
1652 	if (table == NULL)
1653 		return NULL;
1654 
1655 	write_lock_bh(&table->tb6_lock);
1656 	for (rt = table->tb6_root.leaf; rt; rt=rt->u.dst.rt6_next) {
1657 		if (dev == rt->rt6i_dev &&
1658 		    ((rt->rt6i_flags & (RTF_ADDRCONF | RTF_DEFAULT)) == (RTF_ADDRCONF | RTF_DEFAULT)) &&
1659 		    ipv6_addr_equal(&rt->rt6i_gateway, addr))
1660 			break;
1661 	}
1662 	if (rt)
1663 		dst_hold(&rt->u.dst);
1664 	write_unlock_bh(&table->tb6_lock);
1665 	return rt;
1666 }
1667 
1668 struct rt6_info *rt6_add_dflt_router(struct in6_addr *gwaddr,
1669 				     struct net_device *dev,
1670 				     unsigned int pref)
1671 {
1672 	struct fib6_config cfg = {
1673 		.fc_table	= RT6_TABLE_DFLT,
1674 		.fc_metric	= 1024,
1675 		.fc_ifindex	= dev->ifindex,
1676 		.fc_flags	= RTF_GATEWAY | RTF_ADDRCONF | RTF_DEFAULT |
1677 				  RTF_UP | RTF_EXPIRES | RTF_PREF(pref),
1678 	};
1679 
1680 	ipv6_addr_copy(&cfg.fc_gateway, gwaddr);
1681 
1682 	ip6_route_add(&cfg);
1683 
1684 	return rt6_get_dflt_router(gwaddr, dev);
1685 }
1686 
1687 void rt6_purge_dflt_routers(void)
1688 {
1689 	struct rt6_info *rt;
1690 	struct fib6_table *table;
1691 
1692 	/* NOTE: Keep consistent with rt6_get_dflt_router */
1693 	table = fib6_get_table(RT6_TABLE_DFLT);
1694 	if (table == NULL)
1695 		return;
1696 
1697 restart:
1698 	read_lock_bh(&table->tb6_lock);
1699 	for (rt = table->tb6_root.leaf; rt; rt = rt->u.dst.rt6_next) {
1700 		if (rt->rt6i_flags & (RTF_DEFAULT | RTF_ADDRCONF)) {
1701 			dst_hold(&rt->u.dst);
1702 			read_unlock_bh(&table->tb6_lock);
1703 			ip6_del_rt(rt);
1704 			goto restart;
1705 		}
1706 	}
1707 	read_unlock_bh(&table->tb6_lock);
1708 }
1709 
1710 static void rtmsg_to_fib6_config(struct in6_rtmsg *rtmsg,
1711 				 struct fib6_config *cfg)
1712 {
1713 	memset(cfg, 0, sizeof(*cfg));
1714 
1715 	cfg->fc_table = RT6_TABLE_MAIN;
1716 	cfg->fc_ifindex = rtmsg->rtmsg_ifindex;
1717 	cfg->fc_metric = rtmsg->rtmsg_metric;
1718 	cfg->fc_expires = rtmsg->rtmsg_info;
1719 	cfg->fc_dst_len = rtmsg->rtmsg_dst_len;
1720 	cfg->fc_src_len = rtmsg->rtmsg_src_len;
1721 	cfg->fc_flags = rtmsg->rtmsg_flags;
1722 
1723 	ipv6_addr_copy(&cfg->fc_dst, &rtmsg->rtmsg_dst);
1724 	ipv6_addr_copy(&cfg->fc_src, &rtmsg->rtmsg_src);
1725 	ipv6_addr_copy(&cfg->fc_gateway, &rtmsg->rtmsg_gateway);
1726 }
1727 
1728 int ipv6_route_ioctl(unsigned int cmd, void __user *arg)
1729 {
1730 	struct fib6_config cfg;
1731 	struct in6_rtmsg rtmsg;
1732 	int err;
1733 
1734 	switch(cmd) {
1735 	case SIOCADDRT:		/* Add a route */
1736 	case SIOCDELRT:		/* Delete a route */
1737 		if (!capable(CAP_NET_ADMIN))
1738 			return -EPERM;
1739 		err = copy_from_user(&rtmsg, arg,
1740 				     sizeof(struct in6_rtmsg));
1741 		if (err)
1742 			return -EFAULT;
1743 
1744 		rtmsg_to_fib6_config(&rtmsg, &cfg);
1745 
1746 		rtnl_lock();
1747 		switch (cmd) {
1748 		case SIOCADDRT:
1749 			err = ip6_route_add(&cfg);
1750 			break;
1751 		case SIOCDELRT:
1752 			err = ip6_route_del(&cfg);
1753 			break;
1754 		default:
1755 			err = -EINVAL;
1756 		}
1757 		rtnl_unlock();
1758 
1759 		return err;
1760 	}
1761 
1762 	return -EINVAL;
1763 }
1764 
1765 /*
1766  *	Drop the packet on the floor
1767  */
1768 
1769 static inline int ip6_pkt_drop(struct sk_buff *skb, int code,
1770 			       int ipstats_mib_noroutes)
1771 {
1772 	int type;
1773 	switch (ipstats_mib_noroutes) {
1774 	case IPSTATS_MIB_INNOROUTES:
1775 		type = ipv6_addr_type(&ipv6_hdr(skb)->daddr);
1776 		if (type == IPV6_ADDR_ANY || type == IPV6_ADDR_RESERVED) {
1777 			IP6_INC_STATS(ip6_dst_idev(skb->dst), IPSTATS_MIB_INADDRERRORS);
1778 			break;
1779 		}
1780 		/* FALLTHROUGH */
1781 	case IPSTATS_MIB_OUTNOROUTES:
1782 		IP6_INC_STATS(ip6_dst_idev(skb->dst), ipstats_mib_noroutes);
1783 		break;
1784 	}
1785 	icmpv6_send(skb, ICMPV6_DEST_UNREACH, code, 0, skb->dev);
1786 	kfree_skb(skb);
1787 	return 0;
1788 }
1789 
1790 static int ip6_pkt_discard(struct sk_buff *skb)
1791 {
1792 	return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_INNOROUTES);
1793 }
1794 
1795 static int ip6_pkt_discard_out(struct sk_buff *skb)
1796 {
1797 	skb->dev = skb->dst->dev;
1798 	return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_OUTNOROUTES);
1799 }
1800 
1801 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
1802 
1803 static int ip6_pkt_prohibit(struct sk_buff *skb)
1804 {
1805 	return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_INNOROUTES);
1806 }
1807 
1808 static int ip6_pkt_prohibit_out(struct sk_buff *skb)
1809 {
1810 	skb->dev = skb->dst->dev;
1811 	return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_OUTNOROUTES);
1812 }
1813 
1814 static int ip6_pkt_blk_hole(struct sk_buff *skb)
1815 {
1816 	kfree_skb(skb);
1817 	return 0;
1818 }
1819 
1820 #endif
1821 
1822 /*
1823  *	Allocate a dst for local (unicast / anycast) address.
1824  */
1825 
1826 struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev,
1827 				    const struct in6_addr *addr,
1828 				    int anycast)
1829 {
1830 	struct rt6_info *rt = ip6_dst_alloc();
1831 
1832 	if (rt == NULL)
1833 		return ERR_PTR(-ENOMEM);
1834 
1835 	dev_hold(init_net.loopback_dev);
1836 	in6_dev_hold(idev);
1837 
1838 	rt->u.dst.flags = DST_HOST;
1839 	rt->u.dst.input = ip6_input;
1840 	rt->u.dst.output = ip6_output;
1841 	rt->rt6i_dev = init_net.loopback_dev;
1842 	rt->rt6i_idev = idev;
1843 	rt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(rt->rt6i_dev);
1844 	rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(dst_mtu(&rt->u.dst));
1845 	rt->u.dst.metrics[RTAX_HOPLIMIT-1] = -1;
1846 	rt->u.dst.obsolete = -1;
1847 
1848 	rt->rt6i_flags = RTF_UP | RTF_NONEXTHOP;
1849 	if (anycast)
1850 		rt->rt6i_flags |= RTF_ANYCAST;
1851 	else
1852 		rt->rt6i_flags |= RTF_LOCAL;
1853 	rt->rt6i_nexthop = ndisc_get_neigh(rt->rt6i_dev, &rt->rt6i_gateway);
1854 	if (rt->rt6i_nexthop == NULL) {
1855 		dst_free(&rt->u.dst);
1856 		return ERR_PTR(-ENOMEM);
1857 	}
1858 
1859 	ipv6_addr_copy(&rt->rt6i_dst.addr, addr);
1860 	rt->rt6i_dst.plen = 128;
1861 	rt->rt6i_table = fib6_get_table(RT6_TABLE_LOCAL);
1862 
1863 	atomic_set(&rt->u.dst.__refcnt, 1);
1864 
1865 	return rt;
1866 }
1867 
1868 static int fib6_ifdown(struct rt6_info *rt, void *arg)
1869 {
1870 	if (((void*)rt->rt6i_dev == arg || arg == NULL) &&
1871 	    rt != &ip6_null_entry) {
1872 		RT6_TRACE("deleted by ifdown %p\n", rt);
1873 		return -1;
1874 	}
1875 	return 0;
1876 }
1877 
1878 void rt6_ifdown(struct net_device *dev)
1879 {
1880 	fib6_clean_all(fib6_ifdown, 0, dev);
1881 }
1882 
1883 struct rt6_mtu_change_arg
1884 {
1885 	struct net_device *dev;
1886 	unsigned mtu;
1887 };
1888 
1889 static int rt6_mtu_change_route(struct rt6_info *rt, void *p_arg)
1890 {
1891 	struct rt6_mtu_change_arg *arg = (struct rt6_mtu_change_arg *) p_arg;
1892 	struct inet6_dev *idev;
1893 
1894 	/* In IPv6 pmtu discovery is not optional,
1895 	   so that RTAX_MTU lock cannot disable it.
1896 	   We still use this lock to block changes
1897 	   caused by addrconf/ndisc.
1898 	*/
1899 
1900 	idev = __in6_dev_get(arg->dev);
1901 	if (idev == NULL)
1902 		return 0;
1903 
1904 	/* For administrative MTU increase, there is no way to discover
1905 	   IPv6 PMTU increase, so PMTU increase should be updated here.
1906 	   Since RFC 1981 doesn't include administrative MTU increase
1907 	   update PMTU increase is a MUST. (i.e. jumbo frame)
1908 	 */
1909 	/*
1910 	   If new MTU is less than route PMTU, this new MTU will be the
1911 	   lowest MTU in the path, update the route PMTU to reflect PMTU
1912 	   decreases; if new MTU is greater than route PMTU, and the
1913 	   old MTU is the lowest MTU in the path, update the route PMTU
1914 	   to reflect the increase. In this case if the other nodes' MTU
1915 	   also have the lowest MTU, TOO BIG MESSAGE will be lead to
1916 	   PMTU discouvery.
1917 	 */
1918 	if (rt->rt6i_dev == arg->dev &&
1919 	    !dst_metric_locked(&rt->u.dst, RTAX_MTU) &&
1920 	    (dst_mtu(&rt->u.dst) > arg->mtu ||
1921 	     (dst_mtu(&rt->u.dst) < arg->mtu &&
1922 	      dst_mtu(&rt->u.dst) == idev->cnf.mtu6))) {
1923 		rt->u.dst.metrics[RTAX_MTU-1] = arg->mtu;
1924 		rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(arg->mtu);
1925 	}
1926 	return 0;
1927 }
1928 
1929 void rt6_mtu_change(struct net_device *dev, unsigned mtu)
1930 {
1931 	struct rt6_mtu_change_arg arg = {
1932 		.dev = dev,
1933 		.mtu = mtu,
1934 	};
1935 
1936 	fib6_clean_all(rt6_mtu_change_route, 0, &arg);
1937 }
1938 
1939 static const struct nla_policy rtm_ipv6_policy[RTA_MAX+1] = {
1940 	[RTA_GATEWAY]           = { .len = sizeof(struct in6_addr) },
1941 	[RTA_OIF]               = { .type = NLA_U32 },
1942 	[RTA_IIF]		= { .type = NLA_U32 },
1943 	[RTA_PRIORITY]          = { .type = NLA_U32 },
1944 	[RTA_METRICS]           = { .type = NLA_NESTED },
1945 };
1946 
1947 static int rtm_to_fib6_config(struct sk_buff *skb, struct nlmsghdr *nlh,
1948 			      struct fib6_config *cfg)
1949 {
1950 	struct rtmsg *rtm;
1951 	struct nlattr *tb[RTA_MAX+1];
1952 	int err;
1953 
1954 	err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy);
1955 	if (err < 0)
1956 		goto errout;
1957 
1958 	err = -EINVAL;
1959 	rtm = nlmsg_data(nlh);
1960 	memset(cfg, 0, sizeof(*cfg));
1961 
1962 	cfg->fc_table = rtm->rtm_table;
1963 	cfg->fc_dst_len = rtm->rtm_dst_len;
1964 	cfg->fc_src_len = rtm->rtm_src_len;
1965 	cfg->fc_flags = RTF_UP;
1966 	cfg->fc_protocol = rtm->rtm_protocol;
1967 
1968 	if (rtm->rtm_type == RTN_UNREACHABLE)
1969 		cfg->fc_flags |= RTF_REJECT;
1970 
1971 	cfg->fc_nlinfo.pid = NETLINK_CB(skb).pid;
1972 	cfg->fc_nlinfo.nlh = nlh;
1973 
1974 	if (tb[RTA_GATEWAY]) {
1975 		nla_memcpy(&cfg->fc_gateway, tb[RTA_GATEWAY], 16);
1976 		cfg->fc_flags |= RTF_GATEWAY;
1977 	}
1978 
1979 	if (tb[RTA_DST]) {
1980 		int plen = (rtm->rtm_dst_len + 7) >> 3;
1981 
1982 		if (nla_len(tb[RTA_DST]) < plen)
1983 			goto errout;
1984 
1985 		nla_memcpy(&cfg->fc_dst, tb[RTA_DST], plen);
1986 	}
1987 
1988 	if (tb[RTA_SRC]) {
1989 		int plen = (rtm->rtm_src_len + 7) >> 3;
1990 
1991 		if (nla_len(tb[RTA_SRC]) < plen)
1992 			goto errout;
1993 
1994 		nla_memcpy(&cfg->fc_src, tb[RTA_SRC], plen);
1995 	}
1996 
1997 	if (tb[RTA_OIF])
1998 		cfg->fc_ifindex = nla_get_u32(tb[RTA_OIF]);
1999 
2000 	if (tb[RTA_PRIORITY])
2001 		cfg->fc_metric = nla_get_u32(tb[RTA_PRIORITY]);
2002 
2003 	if (tb[RTA_METRICS]) {
2004 		cfg->fc_mx = nla_data(tb[RTA_METRICS]);
2005 		cfg->fc_mx_len = nla_len(tb[RTA_METRICS]);
2006 	}
2007 
2008 	if (tb[RTA_TABLE])
2009 		cfg->fc_table = nla_get_u32(tb[RTA_TABLE]);
2010 
2011 	err = 0;
2012 errout:
2013 	return err;
2014 }
2015 
2016 static int inet6_rtm_delroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
2017 {
2018 	struct fib6_config cfg;
2019 	int err;
2020 
2021 	err = rtm_to_fib6_config(skb, nlh, &cfg);
2022 	if (err < 0)
2023 		return err;
2024 
2025 	return ip6_route_del(&cfg);
2026 }
2027 
2028 static int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
2029 {
2030 	struct fib6_config cfg;
2031 	int err;
2032 
2033 	err = rtm_to_fib6_config(skb, nlh, &cfg);
2034 	if (err < 0)
2035 		return err;
2036 
2037 	return ip6_route_add(&cfg);
2038 }
2039 
2040 static inline size_t rt6_nlmsg_size(void)
2041 {
2042 	return NLMSG_ALIGN(sizeof(struct rtmsg))
2043 	       + nla_total_size(16) /* RTA_SRC */
2044 	       + nla_total_size(16) /* RTA_DST */
2045 	       + nla_total_size(16) /* RTA_GATEWAY */
2046 	       + nla_total_size(16) /* RTA_PREFSRC */
2047 	       + nla_total_size(4) /* RTA_TABLE */
2048 	       + nla_total_size(4) /* RTA_IIF */
2049 	       + nla_total_size(4) /* RTA_OIF */
2050 	       + nla_total_size(4) /* RTA_PRIORITY */
2051 	       + RTAX_MAX * nla_total_size(4) /* RTA_METRICS */
2052 	       + nla_total_size(sizeof(struct rta_cacheinfo));
2053 }
2054 
2055 static int rt6_fill_node(struct sk_buff *skb, struct rt6_info *rt,
2056 			 struct in6_addr *dst, struct in6_addr *src,
2057 			 int iif, int type, u32 pid, u32 seq,
2058 			 int prefix, unsigned int flags)
2059 {
2060 	struct rtmsg *rtm;
2061 	struct nlmsghdr *nlh;
2062 	long expires;
2063 	u32 table;
2064 
2065 	if (prefix) {	/* user wants prefix routes only */
2066 		if (!(rt->rt6i_flags & RTF_PREFIX_RT)) {
2067 			/* success since this is not a prefix route */
2068 			return 1;
2069 		}
2070 	}
2071 
2072 	nlh = nlmsg_put(skb, pid, seq, type, sizeof(*rtm), flags);
2073 	if (nlh == NULL)
2074 		return -EMSGSIZE;
2075 
2076 	rtm = nlmsg_data(nlh);
2077 	rtm->rtm_family = AF_INET6;
2078 	rtm->rtm_dst_len = rt->rt6i_dst.plen;
2079 	rtm->rtm_src_len = rt->rt6i_src.plen;
2080 	rtm->rtm_tos = 0;
2081 	if (rt->rt6i_table)
2082 		table = rt->rt6i_table->tb6_id;
2083 	else
2084 		table = RT6_TABLE_UNSPEC;
2085 	rtm->rtm_table = table;
2086 	NLA_PUT_U32(skb, RTA_TABLE, table);
2087 	if (rt->rt6i_flags&RTF_REJECT)
2088 		rtm->rtm_type = RTN_UNREACHABLE;
2089 	else if (rt->rt6i_dev && (rt->rt6i_dev->flags&IFF_LOOPBACK))
2090 		rtm->rtm_type = RTN_LOCAL;
2091 	else
2092 		rtm->rtm_type = RTN_UNICAST;
2093 	rtm->rtm_flags = 0;
2094 	rtm->rtm_scope = RT_SCOPE_UNIVERSE;
2095 	rtm->rtm_protocol = rt->rt6i_protocol;
2096 	if (rt->rt6i_flags&RTF_DYNAMIC)
2097 		rtm->rtm_protocol = RTPROT_REDIRECT;
2098 	else if (rt->rt6i_flags & RTF_ADDRCONF)
2099 		rtm->rtm_protocol = RTPROT_KERNEL;
2100 	else if (rt->rt6i_flags&RTF_DEFAULT)
2101 		rtm->rtm_protocol = RTPROT_RA;
2102 
2103 	if (rt->rt6i_flags&RTF_CACHE)
2104 		rtm->rtm_flags |= RTM_F_CLONED;
2105 
2106 	if (dst) {
2107 		NLA_PUT(skb, RTA_DST, 16, dst);
2108 		rtm->rtm_dst_len = 128;
2109 	} else if (rtm->rtm_dst_len)
2110 		NLA_PUT(skb, RTA_DST, 16, &rt->rt6i_dst.addr);
2111 #ifdef CONFIG_IPV6_SUBTREES
2112 	if (src) {
2113 		NLA_PUT(skb, RTA_SRC, 16, src);
2114 		rtm->rtm_src_len = 128;
2115 	} else if (rtm->rtm_src_len)
2116 		NLA_PUT(skb, RTA_SRC, 16, &rt->rt6i_src.addr);
2117 #endif
2118 	if (iif)
2119 		NLA_PUT_U32(skb, RTA_IIF, iif);
2120 	else if (dst) {
2121 		struct in6_addr saddr_buf;
2122 		if (ipv6_get_saddr(&rt->u.dst, dst, &saddr_buf) == 0)
2123 			NLA_PUT(skb, RTA_PREFSRC, 16, &saddr_buf);
2124 	}
2125 
2126 	if (rtnetlink_put_metrics(skb, rt->u.dst.metrics) < 0)
2127 		goto nla_put_failure;
2128 
2129 	if (rt->u.dst.neighbour)
2130 		NLA_PUT(skb, RTA_GATEWAY, 16, &rt->u.dst.neighbour->primary_key);
2131 
2132 	if (rt->u.dst.dev)
2133 		NLA_PUT_U32(skb, RTA_OIF, rt->rt6i_dev->ifindex);
2134 
2135 	NLA_PUT_U32(skb, RTA_PRIORITY, rt->rt6i_metric);
2136 
2137 	expires = rt->rt6i_expires ? rt->rt6i_expires - jiffies : 0;
2138 	if (rtnl_put_cacheinfo(skb, &rt->u.dst, 0, 0, 0,
2139 			       expires, rt->u.dst.error) < 0)
2140 		goto nla_put_failure;
2141 
2142 	return nlmsg_end(skb, nlh);
2143 
2144 nla_put_failure:
2145 	nlmsg_cancel(skb, nlh);
2146 	return -EMSGSIZE;
2147 }
2148 
2149 int rt6_dump_route(struct rt6_info *rt, void *p_arg)
2150 {
2151 	struct rt6_rtnl_dump_arg *arg = (struct rt6_rtnl_dump_arg *) p_arg;
2152 	int prefix;
2153 
2154 	if (nlmsg_len(arg->cb->nlh) >= sizeof(struct rtmsg)) {
2155 		struct rtmsg *rtm = nlmsg_data(arg->cb->nlh);
2156 		prefix = (rtm->rtm_flags & RTM_F_PREFIX) != 0;
2157 	} else
2158 		prefix = 0;
2159 
2160 	return rt6_fill_node(arg->skb, rt, NULL, NULL, 0, RTM_NEWROUTE,
2161 		     NETLINK_CB(arg->cb->skb).pid, arg->cb->nlh->nlmsg_seq,
2162 		     prefix, NLM_F_MULTI);
2163 }
2164 
2165 static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void *arg)
2166 {
2167 	struct nlattr *tb[RTA_MAX+1];
2168 	struct rt6_info *rt;
2169 	struct sk_buff *skb;
2170 	struct rtmsg *rtm;
2171 	struct flowi fl;
2172 	int err, iif = 0;
2173 
2174 	err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy);
2175 	if (err < 0)
2176 		goto errout;
2177 
2178 	err = -EINVAL;
2179 	memset(&fl, 0, sizeof(fl));
2180 
2181 	if (tb[RTA_SRC]) {
2182 		if (nla_len(tb[RTA_SRC]) < sizeof(struct in6_addr))
2183 			goto errout;
2184 
2185 		ipv6_addr_copy(&fl.fl6_src, nla_data(tb[RTA_SRC]));
2186 	}
2187 
2188 	if (tb[RTA_DST]) {
2189 		if (nla_len(tb[RTA_DST]) < sizeof(struct in6_addr))
2190 			goto errout;
2191 
2192 		ipv6_addr_copy(&fl.fl6_dst, nla_data(tb[RTA_DST]));
2193 	}
2194 
2195 	if (tb[RTA_IIF])
2196 		iif = nla_get_u32(tb[RTA_IIF]);
2197 
2198 	if (tb[RTA_OIF])
2199 		fl.oif = nla_get_u32(tb[RTA_OIF]);
2200 
2201 	if (iif) {
2202 		struct net_device *dev;
2203 		dev = __dev_get_by_index(&init_net, iif);
2204 		if (!dev) {
2205 			err = -ENODEV;
2206 			goto errout;
2207 		}
2208 	}
2209 
2210 	skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
2211 	if (skb == NULL) {
2212 		err = -ENOBUFS;
2213 		goto errout;
2214 	}
2215 
2216 	/* Reserve room for dummy headers, this skb can pass
2217 	   through good chunk of routing engine.
2218 	 */
2219 	skb_reset_mac_header(skb);
2220 	skb_reserve(skb, MAX_HEADER + sizeof(struct ipv6hdr));
2221 
2222 	rt = (struct rt6_info*) ip6_route_output(NULL, &fl);
2223 	skb->dst = &rt->u.dst;
2224 
2225 	err = rt6_fill_node(skb, rt, &fl.fl6_dst, &fl.fl6_src, iif,
2226 			    RTM_NEWROUTE, NETLINK_CB(in_skb).pid,
2227 			    nlh->nlmsg_seq, 0, 0);
2228 	if (err < 0) {
2229 		kfree_skb(skb);
2230 		goto errout;
2231 	}
2232 
2233 	err = rtnl_unicast(skb, NETLINK_CB(in_skb).pid);
2234 errout:
2235 	return err;
2236 }
2237 
2238 void inet6_rt_notify(int event, struct rt6_info *rt, struct nl_info *info)
2239 {
2240 	struct sk_buff *skb;
2241 	u32 pid = 0, seq = 0;
2242 	struct nlmsghdr *nlh = NULL;
2243 	int err = -ENOBUFS;
2244 
2245 	if (info) {
2246 		pid = info->pid;
2247 		nlh = info->nlh;
2248 		if (nlh)
2249 			seq = nlh->nlmsg_seq;
2250 	}
2251 
2252 	skb = nlmsg_new(rt6_nlmsg_size(), gfp_any());
2253 	if (skb == NULL)
2254 		goto errout;
2255 
2256 	err = rt6_fill_node(skb, rt, NULL, NULL, 0, event, pid, seq, 0, 0);
2257 	if (err < 0) {
2258 		/* -EMSGSIZE implies BUG in rt6_nlmsg_size() */
2259 		WARN_ON(err == -EMSGSIZE);
2260 		kfree_skb(skb);
2261 		goto errout;
2262 	}
2263 	err = rtnl_notify(skb, pid, RTNLGRP_IPV6_ROUTE, nlh, gfp_any());
2264 errout:
2265 	if (err < 0)
2266 		rtnl_set_sk_err(RTNLGRP_IPV6_ROUTE, err);
2267 }
2268 
2269 /*
2270  *	/proc
2271  */
2272 
2273 #ifdef CONFIG_PROC_FS
2274 
2275 #define RT6_INFO_LEN (32 + 4 + 32 + 4 + 32 + 40 + 5 + 1)
2276 
2277 struct rt6_proc_arg
2278 {
2279 	char *buffer;
2280 	int offset;
2281 	int length;
2282 	int skip;
2283 	int len;
2284 };
2285 
2286 static int rt6_info_route(struct rt6_info *rt, void *p_arg)
2287 {
2288 	struct seq_file *m = p_arg;
2289 
2290 	seq_printf(m, NIP6_SEQFMT " %02x ", NIP6(rt->rt6i_dst.addr),
2291 		   rt->rt6i_dst.plen);
2292 
2293 #ifdef CONFIG_IPV6_SUBTREES
2294 	seq_printf(m, NIP6_SEQFMT " %02x ", NIP6(rt->rt6i_src.addr),
2295 		   rt->rt6i_src.plen);
2296 #else
2297 	seq_puts(m, "00000000000000000000000000000000 00 ");
2298 #endif
2299 
2300 	if (rt->rt6i_nexthop) {
2301 		seq_printf(m, NIP6_SEQFMT,
2302 			   NIP6(*((struct in6_addr *)rt->rt6i_nexthop->primary_key)));
2303 	} else {
2304 		seq_puts(m, "00000000000000000000000000000000");
2305 	}
2306 	seq_printf(m, " %08x %08x %08x %08x %8s\n",
2307 		   rt->rt6i_metric, atomic_read(&rt->u.dst.__refcnt),
2308 		   rt->u.dst.__use, rt->rt6i_flags,
2309 		   rt->rt6i_dev ? rt->rt6i_dev->name : "");
2310 	return 0;
2311 }
2312 
2313 static int ipv6_route_show(struct seq_file *m, void *v)
2314 {
2315 	fib6_clean_all(rt6_info_route, 0, m);
2316 	return 0;
2317 }
2318 
2319 static int ipv6_route_open(struct inode *inode, struct file *file)
2320 {
2321 	return single_open(file, ipv6_route_show, NULL);
2322 }
2323 
2324 static const struct file_operations ipv6_route_proc_fops = {
2325 	.owner		= THIS_MODULE,
2326 	.open		= ipv6_route_open,
2327 	.read		= seq_read,
2328 	.llseek		= seq_lseek,
2329 	.release	= single_release,
2330 };
2331 
2332 static int rt6_stats_seq_show(struct seq_file *seq, void *v)
2333 {
2334 	seq_printf(seq, "%04x %04x %04x %04x %04x %04x %04x\n",
2335 		      rt6_stats.fib_nodes, rt6_stats.fib_route_nodes,
2336 		      rt6_stats.fib_rt_alloc, rt6_stats.fib_rt_entries,
2337 		      rt6_stats.fib_rt_cache,
2338 		      atomic_read(&ip6_dst_ops.entries),
2339 		      rt6_stats.fib_discarded_routes);
2340 
2341 	return 0;
2342 }
2343 
2344 static int rt6_stats_seq_open(struct inode *inode, struct file *file)
2345 {
2346 	return single_open(file, rt6_stats_seq_show, NULL);
2347 }
2348 
2349 static const struct file_operations rt6_stats_seq_fops = {
2350 	.owner	 = THIS_MODULE,
2351 	.open	 = rt6_stats_seq_open,
2352 	.read	 = seq_read,
2353 	.llseek	 = seq_lseek,
2354 	.release = single_release,
2355 };
2356 #endif	/* CONFIG_PROC_FS */
2357 
2358 #ifdef CONFIG_SYSCTL
2359 
2360 static int flush_delay;
2361 
2362 static
2363 int ipv6_sysctl_rtcache_flush(ctl_table *ctl, int write, struct file * filp,
2364 			      void __user *buffer, size_t *lenp, loff_t *ppos)
2365 {
2366 	if (write) {
2367 		proc_dointvec(ctl, write, filp, buffer, lenp, ppos);
2368 		fib6_run_gc(flush_delay <= 0 ? ~0UL : (unsigned long)flush_delay);
2369 		return 0;
2370 	} else
2371 		return -EINVAL;
2372 }
2373 
2374 ctl_table ipv6_route_table[] = {
2375 	{
2376 		.procname	=	"flush",
2377 		.data		=	&flush_delay,
2378 		.maxlen		=	sizeof(int),
2379 		.mode		=	0200,
2380 		.proc_handler	=	&ipv6_sysctl_rtcache_flush
2381 	},
2382 	{
2383 		.ctl_name	=	NET_IPV6_ROUTE_GC_THRESH,
2384 		.procname	=	"gc_thresh",
2385 		.data		=	&ip6_dst_ops.gc_thresh,
2386 		.maxlen		=	sizeof(int),
2387 		.mode		=	0644,
2388 		.proc_handler	=	&proc_dointvec,
2389 	},
2390 	{
2391 		.ctl_name	=	NET_IPV6_ROUTE_MAX_SIZE,
2392 		.procname	=	"max_size",
2393 		.data		=	&ip6_rt_max_size,
2394 		.maxlen		=	sizeof(int),
2395 		.mode		=	0644,
2396 		.proc_handler	=	&proc_dointvec,
2397 	},
2398 	{
2399 		.ctl_name	=	NET_IPV6_ROUTE_GC_MIN_INTERVAL,
2400 		.procname	=	"gc_min_interval",
2401 		.data		=	&ip6_rt_gc_min_interval,
2402 		.maxlen		=	sizeof(int),
2403 		.mode		=	0644,
2404 		.proc_handler	=	&proc_dointvec_jiffies,
2405 		.strategy	=	&sysctl_jiffies,
2406 	},
2407 	{
2408 		.ctl_name	=	NET_IPV6_ROUTE_GC_TIMEOUT,
2409 		.procname	=	"gc_timeout",
2410 		.data		=	&ip6_rt_gc_timeout,
2411 		.maxlen		=	sizeof(int),
2412 		.mode		=	0644,
2413 		.proc_handler	=	&proc_dointvec_jiffies,
2414 		.strategy	=	&sysctl_jiffies,
2415 	},
2416 	{
2417 		.ctl_name	=	NET_IPV6_ROUTE_GC_INTERVAL,
2418 		.procname	=	"gc_interval",
2419 		.data		=	&ip6_rt_gc_interval,
2420 		.maxlen		=	sizeof(int),
2421 		.mode		=	0644,
2422 		.proc_handler	=	&proc_dointvec_jiffies,
2423 		.strategy	=	&sysctl_jiffies,
2424 	},
2425 	{
2426 		.ctl_name	=	NET_IPV6_ROUTE_GC_ELASTICITY,
2427 		.procname	=	"gc_elasticity",
2428 		.data		=	&ip6_rt_gc_elasticity,
2429 		.maxlen		=	sizeof(int),
2430 		.mode		=	0644,
2431 		.proc_handler	=	&proc_dointvec_jiffies,
2432 		.strategy	=	&sysctl_jiffies,
2433 	},
2434 	{
2435 		.ctl_name	=	NET_IPV6_ROUTE_MTU_EXPIRES,
2436 		.procname	=	"mtu_expires",
2437 		.data		=	&ip6_rt_mtu_expires,
2438 		.maxlen		=	sizeof(int),
2439 		.mode		=	0644,
2440 		.proc_handler	=	&proc_dointvec_jiffies,
2441 		.strategy	=	&sysctl_jiffies,
2442 	},
2443 	{
2444 		.ctl_name	=	NET_IPV6_ROUTE_MIN_ADVMSS,
2445 		.procname	=	"min_adv_mss",
2446 		.data		=	&ip6_rt_min_advmss,
2447 		.maxlen		=	sizeof(int),
2448 		.mode		=	0644,
2449 		.proc_handler	=	&proc_dointvec_jiffies,
2450 		.strategy	=	&sysctl_jiffies,
2451 	},
2452 	{
2453 		.ctl_name	=	NET_IPV6_ROUTE_GC_MIN_INTERVAL_MS,
2454 		.procname	=	"gc_min_interval_ms",
2455 		.data		=	&ip6_rt_gc_min_interval,
2456 		.maxlen		=	sizeof(int),
2457 		.mode		=	0644,
2458 		.proc_handler	=	&proc_dointvec_ms_jiffies,
2459 		.strategy	=	&sysctl_ms_jiffies,
2460 	},
2461 	{ .ctl_name = 0 }
2462 };
2463 
2464 #endif
2465 
2466 void __init ip6_route_init(void)
2467 {
2468 	ip6_dst_ops.kmem_cachep =
2469 		kmem_cache_create("ip6_dst_cache", sizeof(struct rt6_info), 0,
2470 				  SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL);
2471 	ip6_dst_blackhole_ops.kmem_cachep = ip6_dst_ops.kmem_cachep;
2472 
2473 	fib6_init();
2474 	proc_net_fops_create(&init_net, "ipv6_route", 0, &ipv6_route_proc_fops);
2475 	proc_net_fops_create(&init_net, "rt6_stats", S_IRUGO, &rt6_stats_seq_fops);
2476 #ifdef CONFIG_XFRM
2477 	xfrm6_init();
2478 #endif
2479 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
2480 	fib6_rules_init();
2481 #endif
2482 
2483 	__rtnl_register(PF_INET6, RTM_NEWROUTE, inet6_rtm_newroute, NULL);
2484 	__rtnl_register(PF_INET6, RTM_DELROUTE, inet6_rtm_delroute, NULL);
2485 	__rtnl_register(PF_INET6, RTM_GETROUTE, inet6_rtm_getroute, NULL);
2486 }
2487 
2488 void ip6_route_cleanup(void)
2489 {
2490 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
2491 	fib6_rules_cleanup();
2492 #endif
2493 #ifdef CONFIG_PROC_FS
2494 	proc_net_remove(&init_net, "ipv6_route");
2495 	proc_net_remove(&init_net, "rt6_stats");
2496 #endif
2497 #ifdef CONFIG_XFRM
2498 	xfrm6_fini();
2499 #endif
2500 	rt6_ifdown(NULL);
2501 	fib6_gc_cleanup();
2502 	kmem_cache_destroy(ip6_dst_ops.kmem_cachep);
2503 }
2504