xref: /linux/net/ipv6/route.c (revision c537b994505099b7197e7d3125b942ecbcc51eb6)
1 /*
2  *	Linux INET6 implementation
3  *	FIB front-end.
4  *
5  *	Authors:
6  *	Pedro Roque		<roque@di.fc.ul.pt>
7  *
8  *	$Id: route.c,v 1.56 2001/10/31 21:55:55 davem Exp $
9  *
10  *	This program is free software; you can redistribute it and/or
11  *      modify it under the terms of the GNU General Public License
12  *      as published by the Free Software Foundation; either version
13  *      2 of the License, or (at your option) any later version.
14  */
15 
16 /*	Changes:
17  *
18  *	YOSHIFUJI Hideaki @USAGI
19  *		reworked default router selection.
20  *		- respect outgoing interface
21  *		- select from (probably) reachable routers (i.e.
22  *		routers in REACHABLE, STALE, DELAY or PROBE states).
23  *		- always select the same router if it is (probably)
24  *		reachable.  otherwise, round-robin the list.
25  *	Ville Nuorvala
26  *		Fixed routing subtrees.
27  */
28 
29 #include <linux/capability.h>
30 #include <linux/errno.h>
31 #include <linux/types.h>
32 #include <linux/times.h>
33 #include <linux/socket.h>
34 #include <linux/sockios.h>
35 #include <linux/net.h>
36 #include <linux/route.h>
37 #include <linux/netdevice.h>
38 #include <linux/in6.h>
39 #include <linux/init.h>
40 #include <linux/if_arp.h>
41 
42 #ifdef 	CONFIG_PROC_FS
43 #include <linux/proc_fs.h>
44 #include <linux/seq_file.h>
45 #endif
46 
47 #include <net/snmp.h>
48 #include <net/ipv6.h>
49 #include <net/ip6_fib.h>
50 #include <net/ip6_route.h>
51 #include <net/ndisc.h>
52 #include <net/addrconf.h>
53 #include <net/tcp.h>
54 #include <linux/rtnetlink.h>
55 #include <net/dst.h>
56 #include <net/xfrm.h>
57 #include <net/netevent.h>
58 #include <net/netlink.h>
59 
60 #include <asm/uaccess.h>
61 
62 #ifdef CONFIG_SYSCTL
63 #include <linux/sysctl.h>
64 #endif
65 
66 /* Set to 3 to get tracing. */
67 #define RT6_DEBUG 2
68 
69 #if RT6_DEBUG >= 3
70 #define RDBG(x) printk x
71 #define RT6_TRACE(x...) printk(KERN_DEBUG x)
72 #else
73 #define RDBG(x)
74 #define RT6_TRACE(x...) do { ; } while (0)
75 #endif
76 
77 #define CLONE_OFFLINK_ROUTE 0
78 
79 static int ip6_rt_max_size = 4096;
80 static int ip6_rt_gc_min_interval = HZ / 2;
81 static int ip6_rt_gc_timeout = 60*HZ;
82 int ip6_rt_gc_interval = 30*HZ;
83 static int ip6_rt_gc_elasticity = 9;
84 static int ip6_rt_mtu_expires = 10*60*HZ;
85 static int ip6_rt_min_advmss = IPV6_MIN_MTU - 20 - 40;
86 
87 static struct rt6_info * ip6_rt_copy(struct rt6_info *ort);
88 static struct dst_entry	*ip6_dst_check(struct dst_entry *dst, u32 cookie);
89 static struct dst_entry *ip6_negative_advice(struct dst_entry *);
90 static void		ip6_dst_destroy(struct dst_entry *);
91 static void		ip6_dst_ifdown(struct dst_entry *,
92 				       struct net_device *dev, int how);
93 static int		 ip6_dst_gc(void);
94 
95 static int		ip6_pkt_discard(struct sk_buff *skb);
96 static int		ip6_pkt_discard_out(struct sk_buff *skb);
97 static void		ip6_link_failure(struct sk_buff *skb);
98 static void		ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu);
99 
100 #ifdef CONFIG_IPV6_ROUTE_INFO
101 static struct rt6_info *rt6_add_route_info(struct in6_addr *prefix, int prefixlen,
102 					   struct in6_addr *gwaddr, int ifindex,
103 					   unsigned pref);
104 static struct rt6_info *rt6_get_route_info(struct in6_addr *prefix, int prefixlen,
105 					   struct in6_addr *gwaddr, int ifindex);
106 #endif
107 
108 static struct dst_ops ip6_dst_ops = {
109 	.family			=	AF_INET6,
110 	.protocol		=	__constant_htons(ETH_P_IPV6),
111 	.gc			=	ip6_dst_gc,
112 	.gc_thresh		=	1024,
113 	.check			=	ip6_dst_check,
114 	.destroy		=	ip6_dst_destroy,
115 	.ifdown			=	ip6_dst_ifdown,
116 	.negative_advice	=	ip6_negative_advice,
117 	.link_failure		=	ip6_link_failure,
118 	.update_pmtu		=	ip6_rt_update_pmtu,
119 	.entry_size		=	sizeof(struct rt6_info),
120 };
121 
122 struct rt6_info ip6_null_entry = {
123 	.u = {
124 		.dst = {
125 			.__refcnt	= ATOMIC_INIT(1),
126 			.__use		= 1,
127 			.dev		= &loopback_dev,
128 			.obsolete	= -1,
129 			.error		= -ENETUNREACH,
130 			.metrics	= { [RTAX_HOPLIMIT - 1] = 255, },
131 			.input		= ip6_pkt_discard,
132 			.output		= ip6_pkt_discard_out,
133 			.ops		= &ip6_dst_ops,
134 			.path		= (struct dst_entry*)&ip6_null_entry,
135 		}
136 	},
137 	.rt6i_flags	= (RTF_REJECT | RTF_NONEXTHOP),
138 	.rt6i_metric	= ~(u32) 0,
139 	.rt6i_ref	= ATOMIC_INIT(1),
140 };
141 
142 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
143 
144 static int ip6_pkt_prohibit(struct sk_buff *skb);
145 static int ip6_pkt_prohibit_out(struct sk_buff *skb);
146 static int ip6_pkt_blk_hole(struct sk_buff *skb);
147 
148 struct rt6_info ip6_prohibit_entry = {
149 	.u = {
150 		.dst = {
151 			.__refcnt	= ATOMIC_INIT(1),
152 			.__use		= 1,
153 			.dev		= &loopback_dev,
154 			.obsolete	= -1,
155 			.error		= -EACCES,
156 			.metrics	= { [RTAX_HOPLIMIT - 1] = 255, },
157 			.input		= ip6_pkt_prohibit,
158 			.output		= ip6_pkt_prohibit_out,
159 			.ops		= &ip6_dst_ops,
160 			.path		= (struct dst_entry*)&ip6_prohibit_entry,
161 		}
162 	},
163 	.rt6i_flags	= (RTF_REJECT | RTF_NONEXTHOP),
164 	.rt6i_metric	= ~(u32) 0,
165 	.rt6i_ref	= ATOMIC_INIT(1),
166 };
167 
168 struct rt6_info ip6_blk_hole_entry = {
169 	.u = {
170 		.dst = {
171 			.__refcnt	= ATOMIC_INIT(1),
172 			.__use		= 1,
173 			.dev		= &loopback_dev,
174 			.obsolete	= -1,
175 			.error		= -EINVAL,
176 			.metrics	= { [RTAX_HOPLIMIT - 1] = 255, },
177 			.input		= ip6_pkt_blk_hole,
178 			.output		= ip6_pkt_blk_hole,
179 			.ops		= &ip6_dst_ops,
180 			.path		= (struct dst_entry*)&ip6_blk_hole_entry,
181 		}
182 	},
183 	.rt6i_flags	= (RTF_REJECT | RTF_NONEXTHOP),
184 	.rt6i_metric	= ~(u32) 0,
185 	.rt6i_ref	= ATOMIC_INIT(1),
186 };
187 
188 #endif
189 
190 /* allocate dst with ip6_dst_ops */
191 static __inline__ struct rt6_info *ip6_dst_alloc(void)
192 {
193 	return (struct rt6_info *)dst_alloc(&ip6_dst_ops);
194 }
195 
196 static void ip6_dst_destroy(struct dst_entry *dst)
197 {
198 	struct rt6_info *rt = (struct rt6_info *)dst;
199 	struct inet6_dev *idev = rt->rt6i_idev;
200 
201 	if (idev != NULL) {
202 		rt->rt6i_idev = NULL;
203 		in6_dev_put(idev);
204 	}
205 }
206 
207 static void ip6_dst_ifdown(struct dst_entry *dst, struct net_device *dev,
208 			   int how)
209 {
210 	struct rt6_info *rt = (struct rt6_info *)dst;
211 	struct inet6_dev *idev = rt->rt6i_idev;
212 
213 	if (dev != &loopback_dev && idev != NULL && idev->dev == dev) {
214 		struct inet6_dev *loopback_idev = in6_dev_get(&loopback_dev);
215 		if (loopback_idev != NULL) {
216 			rt->rt6i_idev = loopback_idev;
217 			in6_dev_put(idev);
218 		}
219 	}
220 }
221 
222 static __inline__ int rt6_check_expired(const struct rt6_info *rt)
223 {
224 	return (rt->rt6i_flags & RTF_EXPIRES &&
225 		time_after(jiffies, rt->rt6i_expires));
226 }
227 
228 static inline int rt6_need_strict(struct in6_addr *daddr)
229 {
230 	return (ipv6_addr_type(daddr) &
231 		(IPV6_ADDR_MULTICAST | IPV6_ADDR_LINKLOCAL));
232 }
233 
234 /*
235  *	Route lookup. Any table->tb6_lock is implied.
236  */
237 
238 static __inline__ struct rt6_info *rt6_device_match(struct rt6_info *rt,
239 						    int oif,
240 						    int strict)
241 {
242 	struct rt6_info *local = NULL;
243 	struct rt6_info *sprt;
244 
245 	if (oif) {
246 		for (sprt = rt; sprt; sprt = sprt->u.dst.rt6_next) {
247 			struct net_device *dev = sprt->rt6i_dev;
248 			if (dev->ifindex == oif)
249 				return sprt;
250 			if (dev->flags & IFF_LOOPBACK) {
251 				if (sprt->rt6i_idev == NULL ||
252 				    sprt->rt6i_idev->dev->ifindex != oif) {
253 					if (strict && oif)
254 						continue;
255 					if (local && (!oif ||
256 						      local->rt6i_idev->dev->ifindex == oif))
257 						continue;
258 				}
259 				local = sprt;
260 			}
261 		}
262 
263 		if (local)
264 			return local;
265 
266 		if (strict)
267 			return &ip6_null_entry;
268 	}
269 	return rt;
270 }
271 
272 #ifdef CONFIG_IPV6_ROUTER_PREF
273 static void rt6_probe(struct rt6_info *rt)
274 {
275 	struct neighbour *neigh = rt ? rt->rt6i_nexthop : NULL;
276 	/*
277 	 * Okay, this does not seem to be appropriate
278 	 * for now, however, we need to check if it
279 	 * is really so; aka Router Reachability Probing.
280 	 *
281 	 * Router Reachability Probe MUST be rate-limited
282 	 * to no more than one per minute.
283 	 */
284 	if (!neigh || (neigh->nud_state & NUD_VALID))
285 		return;
286 	read_lock_bh(&neigh->lock);
287 	if (!(neigh->nud_state & NUD_VALID) &&
288 	    time_after(jiffies, neigh->updated + rt->rt6i_idev->cnf.rtr_probe_interval)) {
289 		struct in6_addr mcaddr;
290 		struct in6_addr *target;
291 
292 		neigh->updated = jiffies;
293 		read_unlock_bh(&neigh->lock);
294 
295 		target = (struct in6_addr *)&neigh->primary_key;
296 		addrconf_addr_solict_mult(target, &mcaddr);
297 		ndisc_send_ns(rt->rt6i_dev, NULL, target, &mcaddr, NULL);
298 	} else
299 		read_unlock_bh(&neigh->lock);
300 }
301 #else
302 static inline void rt6_probe(struct rt6_info *rt)
303 {
304 	return;
305 }
306 #endif
307 
308 /*
309  * Default Router Selection (RFC 2461 6.3.6)
310  */
311 static int inline rt6_check_dev(struct rt6_info *rt, int oif)
312 {
313 	struct net_device *dev = rt->rt6i_dev;
314 	int ret = 0;
315 
316 	if (!oif)
317 		return 2;
318 	if (dev->flags & IFF_LOOPBACK) {
319 		if (!WARN_ON(rt->rt6i_idev == NULL) &&
320 		    rt->rt6i_idev->dev->ifindex == oif)
321 			ret = 1;
322 		else
323 			return 0;
324 	}
325 	if (dev->ifindex == oif)
326 		return 2;
327 
328 	return ret;
329 }
330 
331 static int inline rt6_check_neigh(struct rt6_info *rt)
332 {
333 	struct neighbour *neigh = rt->rt6i_nexthop;
334 	int m = 0;
335 	if (rt->rt6i_flags & RTF_NONEXTHOP ||
336 	    !(rt->rt6i_flags & RTF_GATEWAY))
337 		m = 1;
338 	else if (neigh) {
339 		read_lock_bh(&neigh->lock);
340 		if (neigh->nud_state & NUD_VALID)
341 			m = 2;
342 		else if (!(neigh->nud_state & NUD_FAILED))
343 			m = 1;
344 		read_unlock_bh(&neigh->lock);
345 	}
346 	return m;
347 }
348 
349 static int rt6_score_route(struct rt6_info *rt, int oif,
350 			   int strict)
351 {
352 	int m, n;
353 
354 	m = rt6_check_dev(rt, oif);
355 	if (!m && (strict & RT6_LOOKUP_F_IFACE))
356 		return -1;
357 #ifdef CONFIG_IPV6_ROUTER_PREF
358 	m |= IPV6_DECODE_PREF(IPV6_EXTRACT_PREF(rt->rt6i_flags)) << 2;
359 #endif
360 	n = rt6_check_neigh(rt);
361 	if (!n && (strict & RT6_LOOKUP_F_REACHABLE))
362 		return -1;
363 	return m;
364 }
365 
366 static struct rt6_info *rt6_select(struct rt6_info **head, int oif,
367 				   int strict)
368 {
369 	struct rt6_info *match = NULL, *last = NULL;
370 	struct rt6_info *rt, *rt0 = *head;
371 	u32 metric;
372 	int mpri = -1;
373 
374 	RT6_TRACE("%s(head=%p(*head=%p), oif=%d)\n",
375 		  __FUNCTION__, head, head ? *head : NULL, oif);
376 
377 	for (rt = rt0, metric = rt0->rt6i_metric;
378 	     rt && rt->rt6i_metric == metric && (!last || rt != rt0);
379 	     rt = rt->u.dst.rt6_next) {
380 		int m;
381 
382 		if (rt6_check_expired(rt))
383 			continue;
384 
385 		last = rt;
386 
387 		m = rt6_score_route(rt, oif, strict);
388 		if (m < 0)
389 			continue;
390 
391 		if (m > mpri) {
392 			if (strict & RT6_LOOKUP_F_REACHABLE)
393 				rt6_probe(match);
394 			match = rt;
395 			mpri = m;
396 		} else if (strict & RT6_LOOKUP_F_REACHABLE) {
397 			rt6_probe(rt);
398 		}
399 	}
400 
401 	if (!match &&
402 	    (strict & RT6_LOOKUP_F_REACHABLE) &&
403 	    last && last != rt0) {
404 		/* no entries matched; do round-robin */
405 		static DEFINE_SPINLOCK(lock);
406 		spin_lock(&lock);
407 		*head = rt0->u.dst.rt6_next;
408 		rt0->u.dst.rt6_next = last->u.dst.rt6_next;
409 		last->u.dst.rt6_next = rt0;
410 		spin_unlock(&lock);
411 	}
412 
413 	RT6_TRACE("%s() => %p, score=%d\n",
414 		  __FUNCTION__, match, mpri);
415 
416 	return (match ? match : &ip6_null_entry);
417 }
418 
419 #ifdef CONFIG_IPV6_ROUTE_INFO
420 int rt6_route_rcv(struct net_device *dev, u8 *opt, int len,
421 		  struct in6_addr *gwaddr)
422 {
423 	struct route_info *rinfo = (struct route_info *) opt;
424 	struct in6_addr prefix_buf, *prefix;
425 	unsigned int pref;
426 	u32 lifetime;
427 	struct rt6_info *rt;
428 
429 	if (len < sizeof(struct route_info)) {
430 		return -EINVAL;
431 	}
432 
433 	/* Sanity check for prefix_len and length */
434 	if (rinfo->length > 3) {
435 		return -EINVAL;
436 	} else if (rinfo->prefix_len > 128) {
437 		return -EINVAL;
438 	} else if (rinfo->prefix_len > 64) {
439 		if (rinfo->length < 2) {
440 			return -EINVAL;
441 		}
442 	} else if (rinfo->prefix_len > 0) {
443 		if (rinfo->length < 1) {
444 			return -EINVAL;
445 		}
446 	}
447 
448 	pref = rinfo->route_pref;
449 	if (pref == ICMPV6_ROUTER_PREF_INVALID)
450 		pref = ICMPV6_ROUTER_PREF_MEDIUM;
451 
452 	lifetime = ntohl(rinfo->lifetime);
453 	if (lifetime == 0xffffffff) {
454 		/* infinity */
455 	} else if (lifetime > 0x7fffffff/HZ) {
456 		/* Avoid arithmetic overflow */
457 		lifetime = 0x7fffffff/HZ - 1;
458 	}
459 
460 	if (rinfo->length == 3)
461 		prefix = (struct in6_addr *)rinfo->prefix;
462 	else {
463 		/* this function is safe */
464 		ipv6_addr_prefix(&prefix_buf,
465 				 (struct in6_addr *)rinfo->prefix,
466 				 rinfo->prefix_len);
467 		prefix = &prefix_buf;
468 	}
469 
470 	rt = rt6_get_route_info(prefix, rinfo->prefix_len, gwaddr, dev->ifindex);
471 
472 	if (rt && !lifetime) {
473 		ip6_del_rt(rt);
474 		rt = NULL;
475 	}
476 
477 	if (!rt && lifetime)
478 		rt = rt6_add_route_info(prefix, rinfo->prefix_len, gwaddr, dev->ifindex,
479 					pref);
480 	else if (rt)
481 		rt->rt6i_flags = RTF_ROUTEINFO |
482 				 (rt->rt6i_flags & ~RTF_PREF_MASK) | RTF_PREF(pref);
483 
484 	if (rt) {
485 		if (lifetime == 0xffffffff) {
486 			rt->rt6i_flags &= ~RTF_EXPIRES;
487 		} else {
488 			rt->rt6i_expires = jiffies + HZ * lifetime;
489 			rt->rt6i_flags |= RTF_EXPIRES;
490 		}
491 		dst_release(&rt->u.dst);
492 	}
493 	return 0;
494 }
495 #endif
496 
497 #define BACKTRACK(saddr) \
498 do { \
499 	if (rt == &ip6_null_entry) { \
500 		struct fib6_node *pn; \
501 		while (1) { \
502 			if (fn->fn_flags & RTN_TL_ROOT) \
503 				goto out; \
504 			pn = fn->parent; \
505 			if (FIB6_SUBTREE(pn) && FIB6_SUBTREE(pn) != fn) \
506 				fn = fib6_lookup(FIB6_SUBTREE(pn), NULL, saddr); \
507 			else \
508 				fn = pn; \
509 			if (fn->fn_flags & RTN_RTINFO) \
510 				goto restart; \
511 		} \
512 	} \
513 } while(0)
514 
515 static struct rt6_info *ip6_pol_route_lookup(struct fib6_table *table,
516 					     struct flowi *fl, int flags)
517 {
518 	struct fib6_node *fn;
519 	struct rt6_info *rt;
520 
521 	read_lock_bh(&table->tb6_lock);
522 	fn = fib6_lookup(&table->tb6_root, &fl->fl6_dst, &fl->fl6_src);
523 restart:
524 	rt = fn->leaf;
525 	rt = rt6_device_match(rt, fl->oif, flags);
526 	BACKTRACK(&fl->fl6_src);
527 out:
528 	dst_hold(&rt->u.dst);
529 	read_unlock_bh(&table->tb6_lock);
530 
531 	rt->u.dst.lastuse = jiffies;
532 	rt->u.dst.__use++;
533 
534 	return rt;
535 
536 }
537 
538 struct rt6_info *rt6_lookup(struct in6_addr *daddr, struct in6_addr *saddr,
539 			    int oif, int strict)
540 {
541 	struct flowi fl = {
542 		.oif = oif,
543 		.nl_u = {
544 			.ip6_u = {
545 				.daddr = *daddr,
546 			},
547 		},
548 	};
549 	struct dst_entry *dst;
550 	int flags = strict ? RT6_LOOKUP_F_IFACE : 0;
551 
552 	if (saddr) {
553 		memcpy(&fl.fl6_src, saddr, sizeof(*saddr));
554 		flags |= RT6_LOOKUP_F_HAS_SADDR;
555 	}
556 
557 	dst = fib6_rule_lookup(&fl, flags, ip6_pol_route_lookup);
558 	if (dst->error == 0)
559 		return (struct rt6_info *) dst;
560 
561 	dst_release(dst);
562 
563 	return NULL;
564 }
565 
566 /* ip6_ins_rt is called with FREE table->tb6_lock.
567    It takes new route entry, the addition fails by any reason the
568    route is freed. In any case, if caller does not hold it, it may
569    be destroyed.
570  */
571 
572 static int __ip6_ins_rt(struct rt6_info *rt, struct nl_info *info)
573 {
574 	int err;
575 	struct fib6_table *table;
576 
577 	table = rt->rt6i_table;
578 	write_lock_bh(&table->tb6_lock);
579 	err = fib6_add(&table->tb6_root, rt, info);
580 	write_unlock_bh(&table->tb6_lock);
581 
582 	return err;
583 }
584 
585 int ip6_ins_rt(struct rt6_info *rt)
586 {
587 	return __ip6_ins_rt(rt, NULL);
588 }
589 
590 static struct rt6_info *rt6_alloc_cow(struct rt6_info *ort, struct in6_addr *daddr,
591 				      struct in6_addr *saddr)
592 {
593 	struct rt6_info *rt;
594 
595 	/*
596 	 *	Clone the route.
597 	 */
598 
599 	rt = ip6_rt_copy(ort);
600 
601 	if (rt) {
602 		if (!(rt->rt6i_flags&RTF_GATEWAY)) {
603 			if (rt->rt6i_dst.plen != 128 &&
604 			    ipv6_addr_equal(&rt->rt6i_dst.addr, daddr))
605 				rt->rt6i_flags |= RTF_ANYCAST;
606 			ipv6_addr_copy(&rt->rt6i_gateway, daddr);
607 		}
608 
609 		ipv6_addr_copy(&rt->rt6i_dst.addr, daddr);
610 		rt->rt6i_dst.plen = 128;
611 		rt->rt6i_flags |= RTF_CACHE;
612 		rt->u.dst.flags |= DST_HOST;
613 
614 #ifdef CONFIG_IPV6_SUBTREES
615 		if (rt->rt6i_src.plen && saddr) {
616 			ipv6_addr_copy(&rt->rt6i_src.addr, saddr);
617 			rt->rt6i_src.plen = 128;
618 		}
619 #endif
620 
621 		rt->rt6i_nexthop = ndisc_get_neigh(rt->rt6i_dev, &rt->rt6i_gateway);
622 
623 	}
624 
625 	return rt;
626 }
627 
628 static struct rt6_info *rt6_alloc_clone(struct rt6_info *ort, struct in6_addr *daddr)
629 {
630 	struct rt6_info *rt = ip6_rt_copy(ort);
631 	if (rt) {
632 		ipv6_addr_copy(&rt->rt6i_dst.addr, daddr);
633 		rt->rt6i_dst.plen = 128;
634 		rt->rt6i_flags |= RTF_CACHE;
635 		rt->u.dst.flags |= DST_HOST;
636 		rt->rt6i_nexthop = neigh_clone(ort->rt6i_nexthop);
637 	}
638 	return rt;
639 }
640 
641 static struct rt6_info *ip6_pol_route_input(struct fib6_table *table,
642 					    struct flowi *fl, int flags)
643 {
644 	struct fib6_node *fn;
645 	struct rt6_info *rt, *nrt;
646 	int strict = 0;
647 	int attempts = 3;
648 	int err;
649 	int reachable = ipv6_devconf.forwarding ? 0 : RT6_LOOKUP_F_REACHABLE;
650 
651 	strict |= flags & RT6_LOOKUP_F_IFACE;
652 
653 relookup:
654 	read_lock_bh(&table->tb6_lock);
655 
656 restart_2:
657 	fn = fib6_lookup(&table->tb6_root, &fl->fl6_dst, &fl->fl6_src);
658 
659 restart:
660 	rt = rt6_select(&fn->leaf, fl->iif, strict | reachable);
661 	BACKTRACK(&fl->fl6_src);
662 	if (rt == &ip6_null_entry ||
663 	    rt->rt6i_flags & RTF_CACHE)
664 		goto out;
665 
666 	dst_hold(&rt->u.dst);
667 	read_unlock_bh(&table->tb6_lock);
668 
669 	if (!rt->rt6i_nexthop && !(rt->rt6i_flags & RTF_NONEXTHOP))
670 		nrt = rt6_alloc_cow(rt, &fl->fl6_dst, &fl->fl6_src);
671 	else {
672 #if CLONE_OFFLINK_ROUTE
673 		nrt = rt6_alloc_clone(rt, &fl->fl6_dst);
674 #else
675 		goto out2;
676 #endif
677 	}
678 
679 	dst_release(&rt->u.dst);
680 	rt = nrt ? : &ip6_null_entry;
681 
682 	dst_hold(&rt->u.dst);
683 	if (nrt) {
684 		err = ip6_ins_rt(nrt);
685 		if (!err)
686 			goto out2;
687 	}
688 
689 	if (--attempts <= 0)
690 		goto out2;
691 
692 	/*
693 	 * Race condition! In the gap, when table->tb6_lock was
694 	 * released someone could insert this route.  Relookup.
695 	 */
696 	dst_release(&rt->u.dst);
697 	goto relookup;
698 
699 out:
700 	if (reachable) {
701 		reachable = 0;
702 		goto restart_2;
703 	}
704 	dst_hold(&rt->u.dst);
705 	read_unlock_bh(&table->tb6_lock);
706 out2:
707 	rt->u.dst.lastuse = jiffies;
708 	rt->u.dst.__use++;
709 
710 	return rt;
711 }
712 
713 void ip6_route_input(struct sk_buff *skb)
714 {
715 	struct ipv6hdr *iph = skb->nh.ipv6h;
716 	int flags = RT6_LOOKUP_F_HAS_SADDR;
717 	struct flowi fl = {
718 		.iif = skb->dev->ifindex,
719 		.nl_u = {
720 			.ip6_u = {
721 				.daddr = iph->daddr,
722 				.saddr = iph->saddr,
723 				.flowlabel = (* (__be32 *) iph)&IPV6_FLOWINFO_MASK,
724 			},
725 		},
726 		.mark = skb->mark,
727 		.proto = iph->nexthdr,
728 	};
729 
730 	if (rt6_need_strict(&iph->daddr))
731 		flags |= RT6_LOOKUP_F_IFACE;
732 
733 	skb->dst = fib6_rule_lookup(&fl, flags, ip6_pol_route_input);
734 }
735 
736 static struct rt6_info *ip6_pol_route_output(struct fib6_table *table,
737 					     struct flowi *fl, int flags)
738 {
739 	struct fib6_node *fn;
740 	struct rt6_info *rt, *nrt;
741 	int strict = 0;
742 	int attempts = 3;
743 	int err;
744 	int reachable = ipv6_devconf.forwarding ? 0 : RT6_LOOKUP_F_REACHABLE;
745 
746 	strict |= flags & RT6_LOOKUP_F_IFACE;
747 
748 relookup:
749 	read_lock_bh(&table->tb6_lock);
750 
751 restart_2:
752 	fn = fib6_lookup(&table->tb6_root, &fl->fl6_dst, &fl->fl6_src);
753 
754 restart:
755 	rt = rt6_select(&fn->leaf, fl->oif, strict | reachable);
756 	BACKTRACK(&fl->fl6_src);
757 	if (rt == &ip6_null_entry ||
758 	    rt->rt6i_flags & RTF_CACHE)
759 		goto out;
760 
761 	dst_hold(&rt->u.dst);
762 	read_unlock_bh(&table->tb6_lock);
763 
764 	if (!rt->rt6i_nexthop && !(rt->rt6i_flags & RTF_NONEXTHOP))
765 		nrt = rt6_alloc_cow(rt, &fl->fl6_dst, &fl->fl6_src);
766 	else {
767 #if CLONE_OFFLINK_ROUTE
768 		nrt = rt6_alloc_clone(rt, &fl->fl6_dst);
769 #else
770 		goto out2;
771 #endif
772 	}
773 
774 	dst_release(&rt->u.dst);
775 	rt = nrt ? : &ip6_null_entry;
776 
777 	dst_hold(&rt->u.dst);
778 	if (nrt) {
779 		err = ip6_ins_rt(nrt);
780 		if (!err)
781 			goto out2;
782 	}
783 
784 	if (--attempts <= 0)
785 		goto out2;
786 
787 	/*
788 	 * Race condition! In the gap, when table->tb6_lock was
789 	 * released someone could insert this route.  Relookup.
790 	 */
791 	dst_release(&rt->u.dst);
792 	goto relookup;
793 
794 out:
795 	if (reachable) {
796 		reachable = 0;
797 		goto restart_2;
798 	}
799 	dst_hold(&rt->u.dst);
800 	read_unlock_bh(&table->tb6_lock);
801 out2:
802 	rt->u.dst.lastuse = jiffies;
803 	rt->u.dst.__use++;
804 	return rt;
805 }
806 
807 struct dst_entry * ip6_route_output(struct sock *sk, struct flowi *fl)
808 {
809 	int flags = 0;
810 
811 	if (rt6_need_strict(&fl->fl6_dst))
812 		flags |= RT6_LOOKUP_F_IFACE;
813 
814 	if (!ipv6_addr_any(&fl->fl6_src))
815 		flags |= RT6_LOOKUP_F_HAS_SADDR;
816 
817 	return fib6_rule_lookup(fl, flags, ip6_pol_route_output);
818 }
819 
820 
821 /*
822  *	Destination cache support functions
823  */
824 
825 static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie)
826 {
827 	struct rt6_info *rt;
828 
829 	rt = (struct rt6_info *) dst;
830 
831 	if (rt && rt->rt6i_node && (rt->rt6i_node->fn_sernum == cookie))
832 		return dst;
833 
834 	return NULL;
835 }
836 
837 static struct dst_entry *ip6_negative_advice(struct dst_entry *dst)
838 {
839 	struct rt6_info *rt = (struct rt6_info *) dst;
840 
841 	if (rt) {
842 		if (rt->rt6i_flags & RTF_CACHE)
843 			ip6_del_rt(rt);
844 		else
845 			dst_release(dst);
846 	}
847 	return NULL;
848 }
849 
850 static void ip6_link_failure(struct sk_buff *skb)
851 {
852 	struct rt6_info *rt;
853 
854 	icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_ADDR_UNREACH, 0, skb->dev);
855 
856 	rt = (struct rt6_info *) skb->dst;
857 	if (rt) {
858 		if (rt->rt6i_flags&RTF_CACHE) {
859 			dst_set_expires(&rt->u.dst, 0);
860 			rt->rt6i_flags |= RTF_EXPIRES;
861 		} else if (rt->rt6i_node && (rt->rt6i_flags & RTF_DEFAULT))
862 			rt->rt6i_node->fn_sernum = -1;
863 	}
864 }
865 
866 static void ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu)
867 {
868 	struct rt6_info *rt6 = (struct rt6_info*)dst;
869 
870 	if (mtu < dst_mtu(dst) && rt6->rt6i_dst.plen == 128) {
871 		rt6->rt6i_flags |= RTF_MODIFIED;
872 		if (mtu < IPV6_MIN_MTU) {
873 			mtu = IPV6_MIN_MTU;
874 			dst->metrics[RTAX_FEATURES-1] |= RTAX_FEATURE_ALLFRAG;
875 		}
876 		dst->metrics[RTAX_MTU-1] = mtu;
877 		call_netevent_notifiers(NETEVENT_PMTU_UPDATE, dst);
878 	}
879 }
880 
881 static int ipv6_get_mtu(struct net_device *dev);
882 
883 static inline unsigned int ipv6_advmss(unsigned int mtu)
884 {
885 	mtu -= sizeof(struct ipv6hdr) + sizeof(struct tcphdr);
886 
887 	if (mtu < ip6_rt_min_advmss)
888 		mtu = ip6_rt_min_advmss;
889 
890 	/*
891 	 * Maximal non-jumbo IPv6 payload is IPV6_MAXPLEN and
892 	 * corresponding MSS is IPV6_MAXPLEN - tcp_header_size.
893 	 * IPV6_MAXPLEN is also valid and means: "any MSS,
894 	 * rely only on pmtu discovery"
895 	 */
896 	if (mtu > IPV6_MAXPLEN - sizeof(struct tcphdr))
897 		mtu = IPV6_MAXPLEN;
898 	return mtu;
899 }
900 
901 static struct dst_entry *ndisc_dst_gc_list;
902 static DEFINE_SPINLOCK(ndisc_lock);
903 
904 struct dst_entry *ndisc_dst_alloc(struct net_device *dev,
905 				  struct neighbour *neigh,
906 				  struct in6_addr *addr,
907 				  int (*output)(struct sk_buff *))
908 {
909 	struct rt6_info *rt;
910 	struct inet6_dev *idev = in6_dev_get(dev);
911 
912 	if (unlikely(idev == NULL))
913 		return NULL;
914 
915 	rt = ip6_dst_alloc();
916 	if (unlikely(rt == NULL)) {
917 		in6_dev_put(idev);
918 		goto out;
919 	}
920 
921 	dev_hold(dev);
922 	if (neigh)
923 		neigh_hold(neigh);
924 	else
925 		neigh = ndisc_get_neigh(dev, addr);
926 
927 	rt->rt6i_dev	  = dev;
928 	rt->rt6i_idev     = idev;
929 	rt->rt6i_nexthop  = neigh;
930 	atomic_set(&rt->u.dst.__refcnt, 1);
931 	rt->u.dst.metrics[RTAX_HOPLIMIT-1] = 255;
932 	rt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(rt->rt6i_dev);
933 	rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(dst_mtu(&rt->u.dst));
934 	rt->u.dst.output  = output;
935 
936 #if 0	/* there's no chance to use these for ndisc */
937 	rt->u.dst.flags   = ipv6_addr_type(addr) & IPV6_ADDR_UNICAST
938 				? DST_HOST
939 				: 0;
940 	ipv6_addr_copy(&rt->rt6i_dst.addr, addr);
941 	rt->rt6i_dst.plen = 128;
942 #endif
943 
944 	spin_lock_bh(&ndisc_lock);
945 	rt->u.dst.next = ndisc_dst_gc_list;
946 	ndisc_dst_gc_list = &rt->u.dst;
947 	spin_unlock_bh(&ndisc_lock);
948 
949 	fib6_force_start_gc();
950 
951 out:
952 	return &rt->u.dst;
953 }
954 
955 int ndisc_dst_gc(int *more)
956 {
957 	struct dst_entry *dst, *next, **pprev;
958 	int freed;
959 
960 	next = NULL;
961 	freed = 0;
962 
963 	spin_lock_bh(&ndisc_lock);
964 	pprev = &ndisc_dst_gc_list;
965 
966 	while ((dst = *pprev) != NULL) {
967 		if (!atomic_read(&dst->__refcnt)) {
968 			*pprev = dst->next;
969 			dst_free(dst);
970 			freed++;
971 		} else {
972 			pprev = &dst->next;
973 			(*more)++;
974 		}
975 	}
976 
977 	spin_unlock_bh(&ndisc_lock);
978 
979 	return freed;
980 }
981 
982 static int ip6_dst_gc(void)
983 {
984 	static unsigned expire = 30*HZ;
985 	static unsigned long last_gc;
986 	unsigned long now = jiffies;
987 
988 	if (time_after(last_gc + ip6_rt_gc_min_interval, now) &&
989 	    atomic_read(&ip6_dst_ops.entries) <= ip6_rt_max_size)
990 		goto out;
991 
992 	expire++;
993 	fib6_run_gc(expire);
994 	last_gc = now;
995 	if (atomic_read(&ip6_dst_ops.entries) < ip6_dst_ops.gc_thresh)
996 		expire = ip6_rt_gc_timeout>>1;
997 
998 out:
999 	expire -= expire>>ip6_rt_gc_elasticity;
1000 	return (atomic_read(&ip6_dst_ops.entries) > ip6_rt_max_size);
1001 }
1002 
1003 /* Clean host part of a prefix. Not necessary in radix tree,
1004    but results in cleaner routing tables.
1005 
1006    Remove it only when all the things will work!
1007  */
1008 
1009 static int ipv6_get_mtu(struct net_device *dev)
1010 {
1011 	int mtu = IPV6_MIN_MTU;
1012 	struct inet6_dev *idev;
1013 
1014 	idev = in6_dev_get(dev);
1015 	if (idev) {
1016 		mtu = idev->cnf.mtu6;
1017 		in6_dev_put(idev);
1018 	}
1019 	return mtu;
1020 }
1021 
1022 int ipv6_get_hoplimit(struct net_device *dev)
1023 {
1024 	int hoplimit = ipv6_devconf.hop_limit;
1025 	struct inet6_dev *idev;
1026 
1027 	idev = in6_dev_get(dev);
1028 	if (idev) {
1029 		hoplimit = idev->cnf.hop_limit;
1030 		in6_dev_put(idev);
1031 	}
1032 	return hoplimit;
1033 }
1034 
1035 /*
1036  *
1037  */
1038 
1039 int ip6_route_add(struct fib6_config *cfg)
1040 {
1041 	int err;
1042 	struct rt6_info *rt = NULL;
1043 	struct net_device *dev = NULL;
1044 	struct inet6_dev *idev = NULL;
1045 	struct fib6_table *table;
1046 	int addr_type;
1047 
1048 	if (cfg->fc_dst_len > 128 || cfg->fc_src_len > 128)
1049 		return -EINVAL;
1050 #ifndef CONFIG_IPV6_SUBTREES
1051 	if (cfg->fc_src_len)
1052 		return -EINVAL;
1053 #endif
1054 	if (cfg->fc_ifindex) {
1055 		err = -ENODEV;
1056 		dev = dev_get_by_index(cfg->fc_ifindex);
1057 		if (!dev)
1058 			goto out;
1059 		idev = in6_dev_get(dev);
1060 		if (!idev)
1061 			goto out;
1062 	}
1063 
1064 	if (cfg->fc_metric == 0)
1065 		cfg->fc_metric = IP6_RT_PRIO_USER;
1066 
1067 	table = fib6_new_table(cfg->fc_table);
1068 	if (table == NULL) {
1069 		err = -ENOBUFS;
1070 		goto out;
1071 	}
1072 
1073 	rt = ip6_dst_alloc();
1074 
1075 	if (rt == NULL) {
1076 		err = -ENOMEM;
1077 		goto out;
1078 	}
1079 
1080 	rt->u.dst.obsolete = -1;
1081 	rt->rt6i_expires = jiffies + clock_t_to_jiffies(cfg->fc_expires);
1082 
1083 	if (cfg->fc_protocol == RTPROT_UNSPEC)
1084 		cfg->fc_protocol = RTPROT_BOOT;
1085 	rt->rt6i_protocol = cfg->fc_protocol;
1086 
1087 	addr_type = ipv6_addr_type(&cfg->fc_dst);
1088 
1089 	if (addr_type & IPV6_ADDR_MULTICAST)
1090 		rt->u.dst.input = ip6_mc_input;
1091 	else
1092 		rt->u.dst.input = ip6_forward;
1093 
1094 	rt->u.dst.output = ip6_output;
1095 
1096 	ipv6_addr_prefix(&rt->rt6i_dst.addr, &cfg->fc_dst, cfg->fc_dst_len);
1097 	rt->rt6i_dst.plen = cfg->fc_dst_len;
1098 	if (rt->rt6i_dst.plen == 128)
1099 	       rt->u.dst.flags = DST_HOST;
1100 
1101 #ifdef CONFIG_IPV6_SUBTREES
1102 	ipv6_addr_prefix(&rt->rt6i_src.addr, &cfg->fc_src, cfg->fc_src_len);
1103 	rt->rt6i_src.plen = cfg->fc_src_len;
1104 #endif
1105 
1106 	rt->rt6i_metric = cfg->fc_metric;
1107 
1108 	/* We cannot add true routes via loopback here,
1109 	   they would result in kernel looping; promote them to reject routes
1110 	 */
1111 	if ((cfg->fc_flags & RTF_REJECT) ||
1112 	    (dev && (dev->flags&IFF_LOOPBACK) && !(addr_type&IPV6_ADDR_LOOPBACK))) {
1113 		/* hold loopback dev/idev if we haven't done so. */
1114 		if (dev != &loopback_dev) {
1115 			if (dev) {
1116 				dev_put(dev);
1117 				in6_dev_put(idev);
1118 			}
1119 			dev = &loopback_dev;
1120 			dev_hold(dev);
1121 			idev = in6_dev_get(dev);
1122 			if (!idev) {
1123 				err = -ENODEV;
1124 				goto out;
1125 			}
1126 		}
1127 		rt->u.dst.output = ip6_pkt_discard_out;
1128 		rt->u.dst.input = ip6_pkt_discard;
1129 		rt->u.dst.error = -ENETUNREACH;
1130 		rt->rt6i_flags = RTF_REJECT|RTF_NONEXTHOP;
1131 		goto install_route;
1132 	}
1133 
1134 	if (cfg->fc_flags & RTF_GATEWAY) {
1135 		struct in6_addr *gw_addr;
1136 		int gwa_type;
1137 
1138 		gw_addr = &cfg->fc_gateway;
1139 		ipv6_addr_copy(&rt->rt6i_gateway, gw_addr);
1140 		gwa_type = ipv6_addr_type(gw_addr);
1141 
1142 		if (gwa_type != (IPV6_ADDR_LINKLOCAL|IPV6_ADDR_UNICAST)) {
1143 			struct rt6_info *grt;
1144 
1145 			/* IPv6 strictly inhibits using not link-local
1146 			   addresses as nexthop address.
1147 			   Otherwise, router will not able to send redirects.
1148 			   It is very good, but in some (rare!) circumstances
1149 			   (SIT, PtP, NBMA NOARP links) it is handy to allow
1150 			   some exceptions. --ANK
1151 			 */
1152 			err = -EINVAL;
1153 			if (!(gwa_type&IPV6_ADDR_UNICAST))
1154 				goto out;
1155 
1156 			grt = rt6_lookup(gw_addr, NULL, cfg->fc_ifindex, 1);
1157 
1158 			err = -EHOSTUNREACH;
1159 			if (grt == NULL)
1160 				goto out;
1161 			if (dev) {
1162 				if (dev != grt->rt6i_dev) {
1163 					dst_release(&grt->u.dst);
1164 					goto out;
1165 				}
1166 			} else {
1167 				dev = grt->rt6i_dev;
1168 				idev = grt->rt6i_idev;
1169 				dev_hold(dev);
1170 				in6_dev_hold(grt->rt6i_idev);
1171 			}
1172 			if (!(grt->rt6i_flags&RTF_GATEWAY))
1173 				err = 0;
1174 			dst_release(&grt->u.dst);
1175 
1176 			if (err)
1177 				goto out;
1178 		}
1179 		err = -EINVAL;
1180 		if (dev == NULL || (dev->flags&IFF_LOOPBACK))
1181 			goto out;
1182 	}
1183 
1184 	err = -ENODEV;
1185 	if (dev == NULL)
1186 		goto out;
1187 
1188 	if (cfg->fc_flags & (RTF_GATEWAY | RTF_NONEXTHOP)) {
1189 		rt->rt6i_nexthop = __neigh_lookup_errno(&nd_tbl, &rt->rt6i_gateway, dev);
1190 		if (IS_ERR(rt->rt6i_nexthop)) {
1191 			err = PTR_ERR(rt->rt6i_nexthop);
1192 			rt->rt6i_nexthop = NULL;
1193 			goto out;
1194 		}
1195 	}
1196 
1197 	rt->rt6i_flags = cfg->fc_flags;
1198 
1199 install_route:
1200 	if (cfg->fc_mx) {
1201 		struct nlattr *nla;
1202 		int remaining;
1203 
1204 		nla_for_each_attr(nla, cfg->fc_mx, cfg->fc_mx_len, remaining) {
1205 			int type = nla->nla_type;
1206 
1207 			if (type) {
1208 				if (type > RTAX_MAX) {
1209 					err = -EINVAL;
1210 					goto out;
1211 				}
1212 
1213 				rt->u.dst.metrics[type - 1] = nla_get_u32(nla);
1214 			}
1215 		}
1216 	}
1217 
1218 	if (rt->u.dst.metrics[RTAX_HOPLIMIT-1] == 0)
1219 		rt->u.dst.metrics[RTAX_HOPLIMIT-1] = -1;
1220 	if (!rt->u.dst.metrics[RTAX_MTU-1])
1221 		rt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(dev);
1222 	if (!rt->u.dst.metrics[RTAX_ADVMSS-1])
1223 		rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(dst_mtu(&rt->u.dst));
1224 	rt->u.dst.dev = dev;
1225 	rt->rt6i_idev = idev;
1226 	rt->rt6i_table = table;
1227 	return __ip6_ins_rt(rt, &cfg->fc_nlinfo);
1228 
1229 out:
1230 	if (dev)
1231 		dev_put(dev);
1232 	if (idev)
1233 		in6_dev_put(idev);
1234 	if (rt)
1235 		dst_free(&rt->u.dst);
1236 	return err;
1237 }
1238 
1239 static int __ip6_del_rt(struct rt6_info *rt, struct nl_info *info)
1240 {
1241 	int err;
1242 	struct fib6_table *table;
1243 
1244 	if (rt == &ip6_null_entry)
1245 		return -ENOENT;
1246 
1247 	table = rt->rt6i_table;
1248 	write_lock_bh(&table->tb6_lock);
1249 
1250 	err = fib6_del(rt, info);
1251 	dst_release(&rt->u.dst);
1252 
1253 	write_unlock_bh(&table->tb6_lock);
1254 
1255 	return err;
1256 }
1257 
1258 int ip6_del_rt(struct rt6_info *rt)
1259 {
1260 	return __ip6_del_rt(rt, NULL);
1261 }
1262 
1263 static int ip6_route_del(struct fib6_config *cfg)
1264 {
1265 	struct fib6_table *table;
1266 	struct fib6_node *fn;
1267 	struct rt6_info *rt;
1268 	int err = -ESRCH;
1269 
1270 	table = fib6_get_table(cfg->fc_table);
1271 	if (table == NULL)
1272 		return err;
1273 
1274 	read_lock_bh(&table->tb6_lock);
1275 
1276 	fn = fib6_locate(&table->tb6_root,
1277 			 &cfg->fc_dst, cfg->fc_dst_len,
1278 			 &cfg->fc_src, cfg->fc_src_len);
1279 
1280 	if (fn) {
1281 		for (rt = fn->leaf; rt; rt = rt->u.dst.rt6_next) {
1282 			if (cfg->fc_ifindex &&
1283 			    (rt->rt6i_dev == NULL ||
1284 			     rt->rt6i_dev->ifindex != cfg->fc_ifindex))
1285 				continue;
1286 			if (cfg->fc_flags & RTF_GATEWAY &&
1287 			    !ipv6_addr_equal(&cfg->fc_gateway, &rt->rt6i_gateway))
1288 				continue;
1289 			if (cfg->fc_metric && cfg->fc_metric != rt->rt6i_metric)
1290 				continue;
1291 			dst_hold(&rt->u.dst);
1292 			read_unlock_bh(&table->tb6_lock);
1293 
1294 			return __ip6_del_rt(rt, &cfg->fc_nlinfo);
1295 		}
1296 	}
1297 	read_unlock_bh(&table->tb6_lock);
1298 
1299 	return err;
1300 }
1301 
1302 /*
1303  *	Handle redirects
1304  */
1305 struct ip6rd_flowi {
1306 	struct flowi fl;
1307 	struct in6_addr gateway;
1308 };
1309 
1310 static struct rt6_info *__ip6_route_redirect(struct fib6_table *table,
1311 					     struct flowi *fl,
1312 					     int flags)
1313 {
1314 	struct ip6rd_flowi *rdfl = (struct ip6rd_flowi *)fl;
1315 	struct rt6_info *rt;
1316 	struct fib6_node *fn;
1317 
1318 	/*
1319 	 * Get the "current" route for this destination and
1320 	 * check if the redirect has come from approriate router.
1321 	 *
1322 	 * RFC 2461 specifies that redirects should only be
1323 	 * accepted if they come from the nexthop to the target.
1324 	 * Due to the way the routes are chosen, this notion
1325 	 * is a bit fuzzy and one might need to check all possible
1326 	 * routes.
1327 	 */
1328 
1329 	read_lock_bh(&table->tb6_lock);
1330 	fn = fib6_lookup(&table->tb6_root, &fl->fl6_dst, &fl->fl6_src);
1331 restart:
1332 	for (rt = fn->leaf; rt; rt = rt->u.dst.rt6_next) {
1333 		/*
1334 		 * Current route is on-link; redirect is always invalid.
1335 		 *
1336 		 * Seems, previous statement is not true. It could
1337 		 * be node, which looks for us as on-link (f.e. proxy ndisc)
1338 		 * But then router serving it might decide, that we should
1339 		 * know truth 8)8) --ANK (980726).
1340 		 */
1341 		if (rt6_check_expired(rt))
1342 			continue;
1343 		if (!(rt->rt6i_flags & RTF_GATEWAY))
1344 			continue;
1345 		if (fl->oif != rt->rt6i_dev->ifindex)
1346 			continue;
1347 		if (!ipv6_addr_equal(&rdfl->gateway, &rt->rt6i_gateway))
1348 			continue;
1349 		break;
1350 	}
1351 
1352 	if (!rt)
1353 		rt = &ip6_null_entry;
1354 	BACKTRACK(&fl->fl6_src);
1355 out:
1356 	dst_hold(&rt->u.dst);
1357 
1358 	read_unlock_bh(&table->tb6_lock);
1359 
1360 	return rt;
1361 };
1362 
1363 static struct rt6_info *ip6_route_redirect(struct in6_addr *dest,
1364 					   struct in6_addr *src,
1365 					   struct in6_addr *gateway,
1366 					   struct net_device *dev)
1367 {
1368 	int flags = RT6_LOOKUP_F_HAS_SADDR;
1369 	struct ip6rd_flowi rdfl = {
1370 		.fl = {
1371 			.oif = dev->ifindex,
1372 			.nl_u = {
1373 				.ip6_u = {
1374 					.daddr = *dest,
1375 					.saddr = *src,
1376 				},
1377 			},
1378 		},
1379 		.gateway = *gateway,
1380 	};
1381 
1382 	if (rt6_need_strict(dest))
1383 		flags |= RT6_LOOKUP_F_IFACE;
1384 
1385 	return (struct rt6_info *)fib6_rule_lookup((struct flowi *)&rdfl, flags, __ip6_route_redirect);
1386 }
1387 
1388 void rt6_redirect(struct in6_addr *dest, struct in6_addr *src,
1389 		  struct in6_addr *saddr,
1390 		  struct neighbour *neigh, u8 *lladdr, int on_link)
1391 {
1392 	struct rt6_info *rt, *nrt = NULL;
1393 	struct netevent_redirect netevent;
1394 
1395 	rt = ip6_route_redirect(dest, src, saddr, neigh->dev);
1396 
1397 	if (rt == &ip6_null_entry) {
1398 		if (net_ratelimit())
1399 			printk(KERN_DEBUG "rt6_redirect: source isn't a valid nexthop "
1400 			       "for redirect target\n");
1401 		goto out;
1402 	}
1403 
1404 	/*
1405 	 *	We have finally decided to accept it.
1406 	 */
1407 
1408 	neigh_update(neigh, lladdr, NUD_STALE,
1409 		     NEIGH_UPDATE_F_WEAK_OVERRIDE|
1410 		     NEIGH_UPDATE_F_OVERRIDE|
1411 		     (on_link ? 0 : (NEIGH_UPDATE_F_OVERRIDE_ISROUTER|
1412 				     NEIGH_UPDATE_F_ISROUTER))
1413 		     );
1414 
1415 	/*
1416 	 * Redirect received -> path was valid.
1417 	 * Look, redirects are sent only in response to data packets,
1418 	 * so that this nexthop apparently is reachable. --ANK
1419 	 */
1420 	dst_confirm(&rt->u.dst);
1421 
1422 	/* Duplicate redirect: silently ignore. */
1423 	if (neigh == rt->u.dst.neighbour)
1424 		goto out;
1425 
1426 	nrt = ip6_rt_copy(rt);
1427 	if (nrt == NULL)
1428 		goto out;
1429 
1430 	nrt->rt6i_flags = RTF_GATEWAY|RTF_UP|RTF_DYNAMIC|RTF_CACHE;
1431 	if (on_link)
1432 		nrt->rt6i_flags &= ~RTF_GATEWAY;
1433 
1434 	ipv6_addr_copy(&nrt->rt6i_dst.addr, dest);
1435 	nrt->rt6i_dst.plen = 128;
1436 	nrt->u.dst.flags |= DST_HOST;
1437 
1438 	ipv6_addr_copy(&nrt->rt6i_gateway, (struct in6_addr*)neigh->primary_key);
1439 	nrt->rt6i_nexthop = neigh_clone(neigh);
1440 	/* Reset pmtu, it may be better */
1441 	nrt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(neigh->dev);
1442 	nrt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(dst_mtu(&nrt->u.dst));
1443 
1444 	if (ip6_ins_rt(nrt))
1445 		goto out;
1446 
1447 	netevent.old = &rt->u.dst;
1448 	netevent.new = &nrt->u.dst;
1449 	call_netevent_notifiers(NETEVENT_REDIRECT, &netevent);
1450 
1451 	if (rt->rt6i_flags&RTF_CACHE) {
1452 		ip6_del_rt(rt);
1453 		return;
1454 	}
1455 
1456 out:
1457 	dst_release(&rt->u.dst);
1458 	return;
1459 }
1460 
1461 /*
1462  *	Handle ICMP "packet too big" messages
1463  *	i.e. Path MTU discovery
1464  */
1465 
1466 void rt6_pmtu_discovery(struct in6_addr *daddr, struct in6_addr *saddr,
1467 			struct net_device *dev, u32 pmtu)
1468 {
1469 	struct rt6_info *rt, *nrt;
1470 	int allfrag = 0;
1471 
1472 	rt = rt6_lookup(daddr, saddr, dev->ifindex, 0);
1473 	if (rt == NULL)
1474 		return;
1475 
1476 	if (pmtu >= dst_mtu(&rt->u.dst))
1477 		goto out;
1478 
1479 	if (pmtu < IPV6_MIN_MTU) {
1480 		/*
1481 		 * According to RFC2460, PMTU is set to the IPv6 Minimum Link
1482 		 * MTU (1280) and a fragment header should always be included
1483 		 * after a node receiving Too Big message reporting PMTU is
1484 		 * less than the IPv6 Minimum Link MTU.
1485 		 */
1486 		pmtu = IPV6_MIN_MTU;
1487 		allfrag = 1;
1488 	}
1489 
1490 	/* New mtu received -> path was valid.
1491 	   They are sent only in response to data packets,
1492 	   so that this nexthop apparently is reachable. --ANK
1493 	 */
1494 	dst_confirm(&rt->u.dst);
1495 
1496 	/* Host route. If it is static, it would be better
1497 	   not to override it, but add new one, so that
1498 	   when cache entry will expire old pmtu
1499 	   would return automatically.
1500 	 */
1501 	if (rt->rt6i_flags & RTF_CACHE) {
1502 		rt->u.dst.metrics[RTAX_MTU-1] = pmtu;
1503 		if (allfrag)
1504 			rt->u.dst.metrics[RTAX_FEATURES-1] |= RTAX_FEATURE_ALLFRAG;
1505 		dst_set_expires(&rt->u.dst, ip6_rt_mtu_expires);
1506 		rt->rt6i_flags |= RTF_MODIFIED|RTF_EXPIRES;
1507 		goto out;
1508 	}
1509 
1510 	/* Network route.
1511 	   Two cases are possible:
1512 	   1. It is connected route. Action: COW
1513 	   2. It is gatewayed route or NONEXTHOP route. Action: clone it.
1514 	 */
1515 	if (!rt->rt6i_nexthop && !(rt->rt6i_flags & RTF_NONEXTHOP))
1516 		nrt = rt6_alloc_cow(rt, daddr, saddr);
1517 	else
1518 		nrt = rt6_alloc_clone(rt, daddr);
1519 
1520 	if (nrt) {
1521 		nrt->u.dst.metrics[RTAX_MTU-1] = pmtu;
1522 		if (allfrag)
1523 			nrt->u.dst.metrics[RTAX_FEATURES-1] |= RTAX_FEATURE_ALLFRAG;
1524 
1525 		/* According to RFC 1981, detecting PMTU increase shouldn't be
1526 		 * happened within 5 mins, the recommended timer is 10 mins.
1527 		 * Here this route expiration time is set to ip6_rt_mtu_expires
1528 		 * which is 10 mins. After 10 mins the decreased pmtu is expired
1529 		 * and detecting PMTU increase will be automatically happened.
1530 		 */
1531 		dst_set_expires(&nrt->u.dst, ip6_rt_mtu_expires);
1532 		nrt->rt6i_flags |= RTF_DYNAMIC|RTF_EXPIRES;
1533 
1534 		ip6_ins_rt(nrt);
1535 	}
1536 out:
1537 	dst_release(&rt->u.dst);
1538 }
1539 
1540 /*
1541  *	Misc support functions
1542  */
1543 
1544 static struct rt6_info * ip6_rt_copy(struct rt6_info *ort)
1545 {
1546 	struct rt6_info *rt = ip6_dst_alloc();
1547 
1548 	if (rt) {
1549 		rt->u.dst.input = ort->u.dst.input;
1550 		rt->u.dst.output = ort->u.dst.output;
1551 
1552 		memcpy(rt->u.dst.metrics, ort->u.dst.metrics, RTAX_MAX*sizeof(u32));
1553 		rt->u.dst.error = ort->u.dst.error;
1554 		rt->u.dst.dev = ort->u.dst.dev;
1555 		if (rt->u.dst.dev)
1556 			dev_hold(rt->u.dst.dev);
1557 		rt->rt6i_idev = ort->rt6i_idev;
1558 		if (rt->rt6i_idev)
1559 			in6_dev_hold(rt->rt6i_idev);
1560 		rt->u.dst.lastuse = jiffies;
1561 		rt->rt6i_expires = 0;
1562 
1563 		ipv6_addr_copy(&rt->rt6i_gateway, &ort->rt6i_gateway);
1564 		rt->rt6i_flags = ort->rt6i_flags & ~RTF_EXPIRES;
1565 		rt->rt6i_metric = 0;
1566 
1567 		memcpy(&rt->rt6i_dst, &ort->rt6i_dst, sizeof(struct rt6key));
1568 #ifdef CONFIG_IPV6_SUBTREES
1569 		memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
1570 #endif
1571 		rt->rt6i_table = ort->rt6i_table;
1572 	}
1573 	return rt;
1574 }
1575 
1576 #ifdef CONFIG_IPV6_ROUTE_INFO
1577 static struct rt6_info *rt6_get_route_info(struct in6_addr *prefix, int prefixlen,
1578 					   struct in6_addr *gwaddr, int ifindex)
1579 {
1580 	struct fib6_node *fn;
1581 	struct rt6_info *rt = NULL;
1582 	struct fib6_table *table;
1583 
1584 	table = fib6_get_table(RT6_TABLE_INFO);
1585 	if (table == NULL)
1586 		return NULL;
1587 
1588 	write_lock_bh(&table->tb6_lock);
1589 	fn = fib6_locate(&table->tb6_root, prefix ,prefixlen, NULL, 0);
1590 	if (!fn)
1591 		goto out;
1592 
1593 	for (rt = fn->leaf; rt; rt = rt->u.dst.rt6_next) {
1594 		if (rt->rt6i_dev->ifindex != ifindex)
1595 			continue;
1596 		if ((rt->rt6i_flags & (RTF_ROUTEINFO|RTF_GATEWAY)) != (RTF_ROUTEINFO|RTF_GATEWAY))
1597 			continue;
1598 		if (!ipv6_addr_equal(&rt->rt6i_gateway, gwaddr))
1599 			continue;
1600 		dst_hold(&rt->u.dst);
1601 		break;
1602 	}
1603 out:
1604 	write_unlock_bh(&table->tb6_lock);
1605 	return rt;
1606 }
1607 
1608 static struct rt6_info *rt6_add_route_info(struct in6_addr *prefix, int prefixlen,
1609 					   struct in6_addr *gwaddr, int ifindex,
1610 					   unsigned pref)
1611 {
1612 	struct fib6_config cfg = {
1613 		.fc_table	= RT6_TABLE_INFO,
1614 		.fc_metric	= 1024,
1615 		.fc_ifindex	= ifindex,
1616 		.fc_dst_len	= prefixlen,
1617 		.fc_flags	= RTF_GATEWAY | RTF_ADDRCONF | RTF_ROUTEINFO |
1618 				  RTF_UP | RTF_PREF(pref),
1619 	};
1620 
1621 	ipv6_addr_copy(&cfg.fc_dst, prefix);
1622 	ipv6_addr_copy(&cfg.fc_gateway, gwaddr);
1623 
1624 	/* We should treat it as a default route if prefix length is 0. */
1625 	if (!prefixlen)
1626 		cfg.fc_flags |= RTF_DEFAULT;
1627 
1628 	ip6_route_add(&cfg);
1629 
1630 	return rt6_get_route_info(prefix, prefixlen, gwaddr, ifindex);
1631 }
1632 #endif
1633 
1634 struct rt6_info *rt6_get_dflt_router(struct in6_addr *addr, struct net_device *dev)
1635 {
1636 	struct rt6_info *rt;
1637 	struct fib6_table *table;
1638 
1639 	table = fib6_get_table(RT6_TABLE_DFLT);
1640 	if (table == NULL)
1641 		return NULL;
1642 
1643 	write_lock_bh(&table->tb6_lock);
1644 	for (rt = table->tb6_root.leaf; rt; rt=rt->u.dst.rt6_next) {
1645 		if (dev == rt->rt6i_dev &&
1646 		    ((rt->rt6i_flags & (RTF_ADDRCONF | RTF_DEFAULT)) == (RTF_ADDRCONF | RTF_DEFAULT)) &&
1647 		    ipv6_addr_equal(&rt->rt6i_gateway, addr))
1648 			break;
1649 	}
1650 	if (rt)
1651 		dst_hold(&rt->u.dst);
1652 	write_unlock_bh(&table->tb6_lock);
1653 	return rt;
1654 }
1655 
1656 struct rt6_info *rt6_add_dflt_router(struct in6_addr *gwaddr,
1657 				     struct net_device *dev,
1658 				     unsigned int pref)
1659 {
1660 	struct fib6_config cfg = {
1661 		.fc_table	= RT6_TABLE_DFLT,
1662 		.fc_metric	= 1024,
1663 		.fc_ifindex	= dev->ifindex,
1664 		.fc_flags	= RTF_GATEWAY | RTF_ADDRCONF | RTF_DEFAULT |
1665 				  RTF_UP | RTF_EXPIRES | RTF_PREF(pref),
1666 	};
1667 
1668 	ipv6_addr_copy(&cfg.fc_gateway, gwaddr);
1669 
1670 	ip6_route_add(&cfg);
1671 
1672 	return rt6_get_dflt_router(gwaddr, dev);
1673 }
1674 
1675 void rt6_purge_dflt_routers(void)
1676 {
1677 	struct rt6_info *rt;
1678 	struct fib6_table *table;
1679 
1680 	/* NOTE: Keep consistent with rt6_get_dflt_router */
1681 	table = fib6_get_table(RT6_TABLE_DFLT);
1682 	if (table == NULL)
1683 		return;
1684 
1685 restart:
1686 	read_lock_bh(&table->tb6_lock);
1687 	for (rt = table->tb6_root.leaf; rt; rt = rt->u.dst.rt6_next) {
1688 		if (rt->rt6i_flags & (RTF_DEFAULT | RTF_ADDRCONF)) {
1689 			dst_hold(&rt->u.dst);
1690 			read_unlock_bh(&table->tb6_lock);
1691 			ip6_del_rt(rt);
1692 			goto restart;
1693 		}
1694 	}
1695 	read_unlock_bh(&table->tb6_lock);
1696 }
1697 
1698 static void rtmsg_to_fib6_config(struct in6_rtmsg *rtmsg,
1699 				 struct fib6_config *cfg)
1700 {
1701 	memset(cfg, 0, sizeof(*cfg));
1702 
1703 	cfg->fc_table = RT6_TABLE_MAIN;
1704 	cfg->fc_ifindex = rtmsg->rtmsg_ifindex;
1705 	cfg->fc_metric = rtmsg->rtmsg_metric;
1706 	cfg->fc_expires = rtmsg->rtmsg_info;
1707 	cfg->fc_dst_len = rtmsg->rtmsg_dst_len;
1708 	cfg->fc_src_len = rtmsg->rtmsg_src_len;
1709 	cfg->fc_flags = rtmsg->rtmsg_flags;
1710 
1711 	ipv6_addr_copy(&cfg->fc_dst, &rtmsg->rtmsg_dst);
1712 	ipv6_addr_copy(&cfg->fc_src, &rtmsg->rtmsg_src);
1713 	ipv6_addr_copy(&cfg->fc_gateway, &rtmsg->rtmsg_gateway);
1714 }
1715 
1716 int ipv6_route_ioctl(unsigned int cmd, void __user *arg)
1717 {
1718 	struct fib6_config cfg;
1719 	struct in6_rtmsg rtmsg;
1720 	int err;
1721 
1722 	switch(cmd) {
1723 	case SIOCADDRT:		/* Add a route */
1724 	case SIOCDELRT:		/* Delete a route */
1725 		if (!capable(CAP_NET_ADMIN))
1726 			return -EPERM;
1727 		err = copy_from_user(&rtmsg, arg,
1728 				     sizeof(struct in6_rtmsg));
1729 		if (err)
1730 			return -EFAULT;
1731 
1732 		rtmsg_to_fib6_config(&rtmsg, &cfg);
1733 
1734 		rtnl_lock();
1735 		switch (cmd) {
1736 		case SIOCADDRT:
1737 			err = ip6_route_add(&cfg);
1738 			break;
1739 		case SIOCDELRT:
1740 			err = ip6_route_del(&cfg);
1741 			break;
1742 		default:
1743 			err = -EINVAL;
1744 		}
1745 		rtnl_unlock();
1746 
1747 		return err;
1748 	};
1749 
1750 	return -EINVAL;
1751 }
1752 
1753 /*
1754  *	Drop the packet on the floor
1755  */
1756 
1757 static inline int ip6_pkt_drop(struct sk_buff *skb, int code)
1758 {
1759 	int type = ipv6_addr_type(&skb->nh.ipv6h->daddr);
1760 	if (type == IPV6_ADDR_ANY || type == IPV6_ADDR_RESERVED)
1761 		IP6_INC_STATS(ip6_dst_idev(skb->dst), IPSTATS_MIB_INADDRERRORS);
1762 
1763 	IP6_INC_STATS(ip6_dst_idev(skb->dst), IPSTATS_MIB_OUTNOROUTES);
1764 	icmpv6_send(skb, ICMPV6_DEST_UNREACH, code, 0, skb->dev);
1765 	kfree_skb(skb);
1766 	return 0;
1767 }
1768 
1769 static int ip6_pkt_discard(struct sk_buff *skb)
1770 {
1771 	return ip6_pkt_drop(skb, ICMPV6_NOROUTE);
1772 }
1773 
1774 static int ip6_pkt_discard_out(struct sk_buff *skb)
1775 {
1776 	skb->dev = skb->dst->dev;
1777 	return ip6_pkt_discard(skb);
1778 }
1779 
1780 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
1781 
1782 static int ip6_pkt_prohibit(struct sk_buff *skb)
1783 {
1784 	return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED);
1785 }
1786 
1787 static int ip6_pkt_prohibit_out(struct sk_buff *skb)
1788 {
1789 	skb->dev = skb->dst->dev;
1790 	return ip6_pkt_prohibit(skb);
1791 }
1792 
1793 static int ip6_pkt_blk_hole(struct sk_buff *skb)
1794 {
1795 	kfree_skb(skb);
1796 	return 0;
1797 }
1798 
1799 #endif
1800 
1801 /*
1802  *	Allocate a dst for local (unicast / anycast) address.
1803  */
1804 
1805 struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev,
1806 				    const struct in6_addr *addr,
1807 				    int anycast)
1808 {
1809 	struct rt6_info *rt = ip6_dst_alloc();
1810 
1811 	if (rt == NULL)
1812 		return ERR_PTR(-ENOMEM);
1813 
1814 	dev_hold(&loopback_dev);
1815 	in6_dev_hold(idev);
1816 
1817 	rt->u.dst.flags = DST_HOST;
1818 	rt->u.dst.input = ip6_input;
1819 	rt->u.dst.output = ip6_output;
1820 	rt->rt6i_dev = &loopback_dev;
1821 	rt->rt6i_idev = idev;
1822 	rt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(rt->rt6i_dev);
1823 	rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(dst_mtu(&rt->u.dst));
1824 	rt->u.dst.metrics[RTAX_HOPLIMIT-1] = -1;
1825 	rt->u.dst.obsolete = -1;
1826 
1827 	rt->rt6i_flags = RTF_UP | RTF_NONEXTHOP;
1828 	if (anycast)
1829 		rt->rt6i_flags |= RTF_ANYCAST;
1830 	else
1831 		rt->rt6i_flags |= RTF_LOCAL;
1832 	rt->rt6i_nexthop = ndisc_get_neigh(rt->rt6i_dev, &rt->rt6i_gateway);
1833 	if (rt->rt6i_nexthop == NULL) {
1834 		dst_free(&rt->u.dst);
1835 		return ERR_PTR(-ENOMEM);
1836 	}
1837 
1838 	ipv6_addr_copy(&rt->rt6i_dst.addr, addr);
1839 	rt->rt6i_dst.plen = 128;
1840 	rt->rt6i_table = fib6_get_table(RT6_TABLE_LOCAL);
1841 
1842 	atomic_set(&rt->u.dst.__refcnt, 1);
1843 
1844 	return rt;
1845 }
1846 
1847 static int fib6_ifdown(struct rt6_info *rt, void *arg)
1848 {
1849 	if (((void*)rt->rt6i_dev == arg || arg == NULL) &&
1850 	    rt != &ip6_null_entry) {
1851 		RT6_TRACE("deleted by ifdown %p\n", rt);
1852 		return -1;
1853 	}
1854 	return 0;
1855 }
1856 
1857 void rt6_ifdown(struct net_device *dev)
1858 {
1859 	fib6_clean_all(fib6_ifdown, 0, dev);
1860 }
1861 
1862 struct rt6_mtu_change_arg
1863 {
1864 	struct net_device *dev;
1865 	unsigned mtu;
1866 };
1867 
1868 static int rt6_mtu_change_route(struct rt6_info *rt, void *p_arg)
1869 {
1870 	struct rt6_mtu_change_arg *arg = (struct rt6_mtu_change_arg *) p_arg;
1871 	struct inet6_dev *idev;
1872 
1873 	/* In IPv6 pmtu discovery is not optional,
1874 	   so that RTAX_MTU lock cannot disable it.
1875 	   We still use this lock to block changes
1876 	   caused by addrconf/ndisc.
1877 	*/
1878 
1879 	idev = __in6_dev_get(arg->dev);
1880 	if (idev == NULL)
1881 		return 0;
1882 
1883 	/* For administrative MTU increase, there is no way to discover
1884 	   IPv6 PMTU increase, so PMTU increase should be updated here.
1885 	   Since RFC 1981 doesn't include administrative MTU increase
1886 	   update PMTU increase is a MUST. (i.e. jumbo frame)
1887 	 */
1888 	/*
1889 	   If new MTU is less than route PMTU, this new MTU will be the
1890 	   lowest MTU in the path, update the route PMTU to reflect PMTU
1891 	   decreases; if new MTU is greater than route PMTU, and the
1892 	   old MTU is the lowest MTU in the path, update the route PMTU
1893 	   to reflect the increase. In this case if the other nodes' MTU
1894 	   also have the lowest MTU, TOO BIG MESSAGE will be lead to
1895 	   PMTU discouvery.
1896 	 */
1897 	if (rt->rt6i_dev == arg->dev &&
1898 	    !dst_metric_locked(&rt->u.dst, RTAX_MTU) &&
1899 	    (dst_mtu(&rt->u.dst) > arg->mtu ||
1900 	     (dst_mtu(&rt->u.dst) < arg->mtu &&
1901 	      dst_mtu(&rt->u.dst) == idev->cnf.mtu6)))
1902 		rt->u.dst.metrics[RTAX_MTU-1] = arg->mtu;
1903 	rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(arg->mtu);
1904 	return 0;
1905 }
1906 
1907 void rt6_mtu_change(struct net_device *dev, unsigned mtu)
1908 {
1909 	struct rt6_mtu_change_arg arg = {
1910 		.dev = dev,
1911 		.mtu = mtu,
1912 	};
1913 
1914 	fib6_clean_all(rt6_mtu_change_route, 0, &arg);
1915 }
1916 
1917 static struct nla_policy rtm_ipv6_policy[RTA_MAX+1] __read_mostly = {
1918 	[RTA_GATEWAY]           = { .len = sizeof(struct in6_addr) },
1919 	[RTA_OIF]               = { .type = NLA_U32 },
1920 	[RTA_IIF]		= { .type = NLA_U32 },
1921 	[RTA_PRIORITY]          = { .type = NLA_U32 },
1922 	[RTA_METRICS]           = { .type = NLA_NESTED },
1923 };
1924 
1925 static int rtm_to_fib6_config(struct sk_buff *skb, struct nlmsghdr *nlh,
1926 			      struct fib6_config *cfg)
1927 {
1928 	struct rtmsg *rtm;
1929 	struct nlattr *tb[RTA_MAX+1];
1930 	int err;
1931 
1932 	err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy);
1933 	if (err < 0)
1934 		goto errout;
1935 
1936 	err = -EINVAL;
1937 	rtm = nlmsg_data(nlh);
1938 	memset(cfg, 0, sizeof(*cfg));
1939 
1940 	cfg->fc_table = rtm->rtm_table;
1941 	cfg->fc_dst_len = rtm->rtm_dst_len;
1942 	cfg->fc_src_len = rtm->rtm_src_len;
1943 	cfg->fc_flags = RTF_UP;
1944 	cfg->fc_protocol = rtm->rtm_protocol;
1945 
1946 	if (rtm->rtm_type == RTN_UNREACHABLE)
1947 		cfg->fc_flags |= RTF_REJECT;
1948 
1949 	cfg->fc_nlinfo.pid = NETLINK_CB(skb).pid;
1950 	cfg->fc_nlinfo.nlh = nlh;
1951 
1952 	if (tb[RTA_GATEWAY]) {
1953 		nla_memcpy(&cfg->fc_gateway, tb[RTA_GATEWAY], 16);
1954 		cfg->fc_flags |= RTF_GATEWAY;
1955 	}
1956 
1957 	if (tb[RTA_DST]) {
1958 		int plen = (rtm->rtm_dst_len + 7) >> 3;
1959 
1960 		if (nla_len(tb[RTA_DST]) < plen)
1961 			goto errout;
1962 
1963 		nla_memcpy(&cfg->fc_dst, tb[RTA_DST], plen);
1964 	}
1965 
1966 	if (tb[RTA_SRC]) {
1967 		int plen = (rtm->rtm_src_len + 7) >> 3;
1968 
1969 		if (nla_len(tb[RTA_SRC]) < plen)
1970 			goto errout;
1971 
1972 		nla_memcpy(&cfg->fc_src, tb[RTA_SRC], plen);
1973 	}
1974 
1975 	if (tb[RTA_OIF])
1976 		cfg->fc_ifindex = nla_get_u32(tb[RTA_OIF]);
1977 
1978 	if (tb[RTA_PRIORITY])
1979 		cfg->fc_metric = nla_get_u32(tb[RTA_PRIORITY]);
1980 
1981 	if (tb[RTA_METRICS]) {
1982 		cfg->fc_mx = nla_data(tb[RTA_METRICS]);
1983 		cfg->fc_mx_len = nla_len(tb[RTA_METRICS]);
1984 	}
1985 
1986 	if (tb[RTA_TABLE])
1987 		cfg->fc_table = nla_get_u32(tb[RTA_TABLE]);
1988 
1989 	err = 0;
1990 errout:
1991 	return err;
1992 }
1993 
1994 int inet6_rtm_delroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
1995 {
1996 	struct fib6_config cfg;
1997 	int err;
1998 
1999 	err = rtm_to_fib6_config(skb, nlh, &cfg);
2000 	if (err < 0)
2001 		return err;
2002 
2003 	return ip6_route_del(&cfg);
2004 }
2005 
2006 int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
2007 {
2008 	struct fib6_config cfg;
2009 	int err;
2010 
2011 	err = rtm_to_fib6_config(skb, nlh, &cfg);
2012 	if (err < 0)
2013 		return err;
2014 
2015 	return ip6_route_add(&cfg);
2016 }
2017 
2018 static inline size_t rt6_nlmsg_size(void)
2019 {
2020 	return NLMSG_ALIGN(sizeof(struct rtmsg))
2021 	       + nla_total_size(16) /* RTA_SRC */
2022 	       + nla_total_size(16) /* RTA_DST */
2023 	       + nla_total_size(16) /* RTA_GATEWAY */
2024 	       + nla_total_size(16) /* RTA_PREFSRC */
2025 	       + nla_total_size(4) /* RTA_TABLE */
2026 	       + nla_total_size(4) /* RTA_IIF */
2027 	       + nla_total_size(4) /* RTA_OIF */
2028 	       + nla_total_size(4) /* RTA_PRIORITY */
2029 	       + RTAX_MAX * nla_total_size(4) /* RTA_METRICS */
2030 	       + nla_total_size(sizeof(struct rta_cacheinfo));
2031 }
2032 
2033 static int rt6_fill_node(struct sk_buff *skb, struct rt6_info *rt,
2034 			 struct in6_addr *dst, struct in6_addr *src,
2035 			 int iif, int type, u32 pid, u32 seq,
2036 			 int prefix, unsigned int flags)
2037 {
2038 	struct rtmsg *rtm;
2039 	struct nlmsghdr *nlh;
2040 	long expires;
2041 	u32 table;
2042 
2043 	if (prefix) {	/* user wants prefix routes only */
2044 		if (!(rt->rt6i_flags & RTF_PREFIX_RT)) {
2045 			/* success since this is not a prefix route */
2046 			return 1;
2047 		}
2048 	}
2049 
2050 	nlh = nlmsg_put(skb, pid, seq, type, sizeof(*rtm), flags);
2051 	if (nlh == NULL)
2052 		return -EMSGSIZE;
2053 
2054 	rtm = nlmsg_data(nlh);
2055 	rtm->rtm_family = AF_INET6;
2056 	rtm->rtm_dst_len = rt->rt6i_dst.plen;
2057 	rtm->rtm_src_len = rt->rt6i_src.plen;
2058 	rtm->rtm_tos = 0;
2059 	if (rt->rt6i_table)
2060 		table = rt->rt6i_table->tb6_id;
2061 	else
2062 		table = RT6_TABLE_UNSPEC;
2063 	rtm->rtm_table = table;
2064 	NLA_PUT_U32(skb, RTA_TABLE, table);
2065 	if (rt->rt6i_flags&RTF_REJECT)
2066 		rtm->rtm_type = RTN_UNREACHABLE;
2067 	else if (rt->rt6i_dev && (rt->rt6i_dev->flags&IFF_LOOPBACK))
2068 		rtm->rtm_type = RTN_LOCAL;
2069 	else
2070 		rtm->rtm_type = RTN_UNICAST;
2071 	rtm->rtm_flags = 0;
2072 	rtm->rtm_scope = RT_SCOPE_UNIVERSE;
2073 	rtm->rtm_protocol = rt->rt6i_protocol;
2074 	if (rt->rt6i_flags&RTF_DYNAMIC)
2075 		rtm->rtm_protocol = RTPROT_REDIRECT;
2076 	else if (rt->rt6i_flags & RTF_ADDRCONF)
2077 		rtm->rtm_protocol = RTPROT_KERNEL;
2078 	else if (rt->rt6i_flags&RTF_DEFAULT)
2079 		rtm->rtm_protocol = RTPROT_RA;
2080 
2081 	if (rt->rt6i_flags&RTF_CACHE)
2082 		rtm->rtm_flags |= RTM_F_CLONED;
2083 
2084 	if (dst) {
2085 		NLA_PUT(skb, RTA_DST, 16, dst);
2086 		rtm->rtm_dst_len = 128;
2087 	} else if (rtm->rtm_dst_len)
2088 		NLA_PUT(skb, RTA_DST, 16, &rt->rt6i_dst.addr);
2089 #ifdef CONFIG_IPV6_SUBTREES
2090 	if (src) {
2091 		NLA_PUT(skb, RTA_SRC, 16, src);
2092 		rtm->rtm_src_len = 128;
2093 	} else if (rtm->rtm_src_len)
2094 		NLA_PUT(skb, RTA_SRC, 16, &rt->rt6i_src.addr);
2095 #endif
2096 	if (iif)
2097 		NLA_PUT_U32(skb, RTA_IIF, iif);
2098 	else if (dst) {
2099 		struct in6_addr saddr_buf;
2100 		if (ipv6_get_saddr(&rt->u.dst, dst, &saddr_buf) == 0)
2101 			NLA_PUT(skb, RTA_PREFSRC, 16, &saddr_buf);
2102 	}
2103 
2104 	if (rtnetlink_put_metrics(skb, rt->u.dst.metrics) < 0)
2105 		goto nla_put_failure;
2106 
2107 	if (rt->u.dst.neighbour)
2108 		NLA_PUT(skb, RTA_GATEWAY, 16, &rt->u.dst.neighbour->primary_key);
2109 
2110 	if (rt->u.dst.dev)
2111 		NLA_PUT_U32(skb, RTA_OIF, rt->rt6i_dev->ifindex);
2112 
2113 	NLA_PUT_U32(skb, RTA_PRIORITY, rt->rt6i_metric);
2114 
2115 	expires = rt->rt6i_expires ? rt->rt6i_expires - jiffies : 0;
2116 	if (rtnl_put_cacheinfo(skb, &rt->u.dst, 0, 0, 0,
2117 			       expires, rt->u.dst.error) < 0)
2118 		goto nla_put_failure;
2119 
2120 	return nlmsg_end(skb, nlh);
2121 
2122 nla_put_failure:
2123 	nlmsg_cancel(skb, nlh);
2124 	return -EMSGSIZE;
2125 }
2126 
2127 int rt6_dump_route(struct rt6_info *rt, void *p_arg)
2128 {
2129 	struct rt6_rtnl_dump_arg *arg = (struct rt6_rtnl_dump_arg *) p_arg;
2130 	int prefix;
2131 
2132 	if (nlmsg_len(arg->cb->nlh) >= sizeof(struct rtmsg)) {
2133 		struct rtmsg *rtm = nlmsg_data(arg->cb->nlh);
2134 		prefix = (rtm->rtm_flags & RTM_F_PREFIX) != 0;
2135 	} else
2136 		prefix = 0;
2137 
2138 	return rt6_fill_node(arg->skb, rt, NULL, NULL, 0, RTM_NEWROUTE,
2139 		     NETLINK_CB(arg->cb->skb).pid, arg->cb->nlh->nlmsg_seq,
2140 		     prefix, NLM_F_MULTI);
2141 }
2142 
2143 int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void *arg)
2144 {
2145 	struct nlattr *tb[RTA_MAX+1];
2146 	struct rt6_info *rt;
2147 	struct sk_buff *skb;
2148 	struct rtmsg *rtm;
2149 	struct flowi fl;
2150 	int err, iif = 0;
2151 
2152 	err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy);
2153 	if (err < 0)
2154 		goto errout;
2155 
2156 	err = -EINVAL;
2157 	memset(&fl, 0, sizeof(fl));
2158 
2159 	if (tb[RTA_SRC]) {
2160 		if (nla_len(tb[RTA_SRC]) < sizeof(struct in6_addr))
2161 			goto errout;
2162 
2163 		ipv6_addr_copy(&fl.fl6_src, nla_data(tb[RTA_SRC]));
2164 	}
2165 
2166 	if (tb[RTA_DST]) {
2167 		if (nla_len(tb[RTA_DST]) < sizeof(struct in6_addr))
2168 			goto errout;
2169 
2170 		ipv6_addr_copy(&fl.fl6_dst, nla_data(tb[RTA_DST]));
2171 	}
2172 
2173 	if (tb[RTA_IIF])
2174 		iif = nla_get_u32(tb[RTA_IIF]);
2175 
2176 	if (tb[RTA_OIF])
2177 		fl.oif = nla_get_u32(tb[RTA_OIF]);
2178 
2179 	if (iif) {
2180 		struct net_device *dev;
2181 		dev = __dev_get_by_index(iif);
2182 		if (!dev) {
2183 			err = -ENODEV;
2184 			goto errout;
2185 		}
2186 	}
2187 
2188 	skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
2189 	if (skb == NULL) {
2190 		err = -ENOBUFS;
2191 		goto errout;
2192 	}
2193 
2194 	/* Reserve room for dummy headers, this skb can pass
2195 	   through good chunk of routing engine.
2196 	 */
2197 	skb->mac.raw = skb->data;
2198 	skb_reserve(skb, MAX_HEADER + sizeof(struct ipv6hdr));
2199 
2200 	rt = (struct rt6_info*) ip6_route_output(NULL, &fl);
2201 	skb->dst = &rt->u.dst;
2202 
2203 	err = rt6_fill_node(skb, rt, &fl.fl6_dst, &fl.fl6_src, iif,
2204 			    RTM_NEWROUTE, NETLINK_CB(in_skb).pid,
2205 			    nlh->nlmsg_seq, 0, 0);
2206 	if (err < 0) {
2207 		kfree_skb(skb);
2208 		goto errout;
2209 	}
2210 
2211 	err = rtnl_unicast(skb, NETLINK_CB(in_skb).pid);
2212 errout:
2213 	return err;
2214 }
2215 
2216 void inet6_rt_notify(int event, struct rt6_info *rt, struct nl_info *info)
2217 {
2218 	struct sk_buff *skb;
2219 	u32 pid = 0, seq = 0;
2220 	struct nlmsghdr *nlh = NULL;
2221 	int err = -ENOBUFS;
2222 
2223 	if (info) {
2224 		pid = info->pid;
2225 		nlh = info->nlh;
2226 		if (nlh)
2227 			seq = nlh->nlmsg_seq;
2228 	}
2229 
2230 	skb = nlmsg_new(rt6_nlmsg_size(), gfp_any());
2231 	if (skb == NULL)
2232 		goto errout;
2233 
2234 	err = rt6_fill_node(skb, rt, NULL, NULL, 0, event, pid, seq, 0, 0);
2235 	if (err < 0) {
2236 		/* -EMSGSIZE implies BUG in rt6_nlmsg_size() */
2237 		WARN_ON(err == -EMSGSIZE);
2238 		kfree_skb(skb);
2239 		goto errout;
2240 	}
2241 	err = rtnl_notify(skb, pid, RTNLGRP_IPV6_ROUTE, nlh, gfp_any());
2242 errout:
2243 	if (err < 0)
2244 		rtnl_set_sk_err(RTNLGRP_IPV6_ROUTE, err);
2245 }
2246 
2247 /*
2248  *	/proc
2249  */
2250 
2251 #ifdef CONFIG_PROC_FS
2252 
2253 #define RT6_INFO_LEN (32 + 4 + 32 + 4 + 32 + 40 + 5 + 1)
2254 
2255 struct rt6_proc_arg
2256 {
2257 	char *buffer;
2258 	int offset;
2259 	int length;
2260 	int skip;
2261 	int len;
2262 };
2263 
2264 static int rt6_info_route(struct rt6_info *rt, void *p_arg)
2265 {
2266 	struct rt6_proc_arg *arg = (struct rt6_proc_arg *) p_arg;
2267 
2268 	if (arg->skip < arg->offset / RT6_INFO_LEN) {
2269 		arg->skip++;
2270 		return 0;
2271 	}
2272 
2273 	if (arg->len >= arg->length)
2274 		return 0;
2275 
2276 	arg->len += sprintf(arg->buffer + arg->len,
2277 			    NIP6_SEQFMT " %02x ",
2278 			    NIP6(rt->rt6i_dst.addr),
2279 			    rt->rt6i_dst.plen);
2280 
2281 #ifdef CONFIG_IPV6_SUBTREES
2282 	arg->len += sprintf(arg->buffer + arg->len,
2283 			    NIP6_SEQFMT " %02x ",
2284 			    NIP6(rt->rt6i_src.addr),
2285 			    rt->rt6i_src.plen);
2286 #else
2287 	arg->len += sprintf(arg->buffer + arg->len,
2288 			    "00000000000000000000000000000000 00 ");
2289 #endif
2290 
2291 	if (rt->rt6i_nexthop) {
2292 		arg->len += sprintf(arg->buffer + arg->len,
2293 				    NIP6_SEQFMT,
2294 				    NIP6(*((struct in6_addr *)rt->rt6i_nexthop->primary_key)));
2295 	} else {
2296 		arg->len += sprintf(arg->buffer + arg->len,
2297 				    "00000000000000000000000000000000");
2298 	}
2299 	arg->len += sprintf(arg->buffer + arg->len,
2300 			    " %08x %08x %08x %08x %8s\n",
2301 			    rt->rt6i_metric, atomic_read(&rt->u.dst.__refcnt),
2302 			    rt->u.dst.__use, rt->rt6i_flags,
2303 			    rt->rt6i_dev ? rt->rt6i_dev->name : "");
2304 	return 0;
2305 }
2306 
2307 static int rt6_proc_info(char *buffer, char **start, off_t offset, int length)
2308 {
2309 	struct rt6_proc_arg arg = {
2310 		.buffer = buffer,
2311 		.offset = offset,
2312 		.length = length,
2313 	};
2314 
2315 	fib6_clean_all(rt6_info_route, 0, &arg);
2316 
2317 	*start = buffer;
2318 	if (offset)
2319 		*start += offset % RT6_INFO_LEN;
2320 
2321 	arg.len -= offset % RT6_INFO_LEN;
2322 
2323 	if (arg.len > length)
2324 		arg.len = length;
2325 	if (arg.len < 0)
2326 		arg.len = 0;
2327 
2328 	return arg.len;
2329 }
2330 
2331 static int rt6_stats_seq_show(struct seq_file *seq, void *v)
2332 {
2333 	seq_printf(seq, "%04x %04x %04x %04x %04x %04x %04x\n",
2334 		      rt6_stats.fib_nodes, rt6_stats.fib_route_nodes,
2335 		      rt6_stats.fib_rt_alloc, rt6_stats.fib_rt_entries,
2336 		      rt6_stats.fib_rt_cache,
2337 		      atomic_read(&ip6_dst_ops.entries),
2338 		      rt6_stats.fib_discarded_routes);
2339 
2340 	return 0;
2341 }
2342 
2343 static int rt6_stats_seq_open(struct inode *inode, struct file *file)
2344 {
2345 	return single_open(file, rt6_stats_seq_show, NULL);
2346 }
2347 
2348 static const struct file_operations rt6_stats_seq_fops = {
2349 	.owner	 = THIS_MODULE,
2350 	.open	 = rt6_stats_seq_open,
2351 	.read	 = seq_read,
2352 	.llseek	 = seq_lseek,
2353 	.release = single_release,
2354 };
2355 #endif	/* CONFIG_PROC_FS */
2356 
2357 #ifdef CONFIG_SYSCTL
2358 
2359 static int flush_delay;
2360 
2361 static
2362 int ipv6_sysctl_rtcache_flush(ctl_table *ctl, int write, struct file * filp,
2363 			      void __user *buffer, size_t *lenp, loff_t *ppos)
2364 {
2365 	if (write) {
2366 		proc_dointvec(ctl, write, filp, buffer, lenp, ppos);
2367 		fib6_run_gc(flush_delay <= 0 ? ~0UL : (unsigned long)flush_delay);
2368 		return 0;
2369 	} else
2370 		return -EINVAL;
2371 }
2372 
2373 ctl_table ipv6_route_table[] = {
2374 	{
2375 		.ctl_name	=	NET_IPV6_ROUTE_FLUSH,
2376 		.procname	=	"flush",
2377 		.data		=	&flush_delay,
2378 		.maxlen		=	sizeof(int),
2379 		.mode		=	0200,
2380 		.proc_handler	=	&ipv6_sysctl_rtcache_flush
2381 	},
2382 	{
2383 		.ctl_name	=	NET_IPV6_ROUTE_GC_THRESH,
2384 		.procname	=	"gc_thresh",
2385 		.data		=	&ip6_dst_ops.gc_thresh,
2386 		.maxlen		=	sizeof(int),
2387 		.mode		=	0644,
2388 		.proc_handler	=	&proc_dointvec,
2389 	},
2390 	{
2391 		.ctl_name	=	NET_IPV6_ROUTE_MAX_SIZE,
2392 		.procname	=	"max_size",
2393 		.data		=	&ip6_rt_max_size,
2394 		.maxlen		=	sizeof(int),
2395 		.mode		=	0644,
2396 		.proc_handler	=	&proc_dointvec,
2397 	},
2398 	{
2399 		.ctl_name	=	NET_IPV6_ROUTE_GC_MIN_INTERVAL,
2400 		.procname	=	"gc_min_interval",
2401 		.data		=	&ip6_rt_gc_min_interval,
2402 		.maxlen		=	sizeof(int),
2403 		.mode		=	0644,
2404 		.proc_handler	=	&proc_dointvec_jiffies,
2405 		.strategy	=	&sysctl_jiffies,
2406 	},
2407 	{
2408 		.ctl_name	=	NET_IPV6_ROUTE_GC_TIMEOUT,
2409 		.procname	=	"gc_timeout",
2410 		.data		=	&ip6_rt_gc_timeout,
2411 		.maxlen		=	sizeof(int),
2412 		.mode		=	0644,
2413 		.proc_handler	=	&proc_dointvec_jiffies,
2414 		.strategy	=	&sysctl_jiffies,
2415 	},
2416 	{
2417 		.ctl_name	=	NET_IPV6_ROUTE_GC_INTERVAL,
2418 		.procname	=	"gc_interval",
2419 		.data		=	&ip6_rt_gc_interval,
2420 		.maxlen		=	sizeof(int),
2421 		.mode		=	0644,
2422 		.proc_handler	=	&proc_dointvec_jiffies,
2423 		.strategy	=	&sysctl_jiffies,
2424 	},
2425 	{
2426 		.ctl_name	=	NET_IPV6_ROUTE_GC_ELASTICITY,
2427 		.procname	=	"gc_elasticity",
2428 		.data		=	&ip6_rt_gc_elasticity,
2429 		.maxlen		=	sizeof(int),
2430 		.mode		=	0644,
2431 		.proc_handler	=	&proc_dointvec_jiffies,
2432 		.strategy	=	&sysctl_jiffies,
2433 	},
2434 	{
2435 		.ctl_name	=	NET_IPV6_ROUTE_MTU_EXPIRES,
2436 		.procname	=	"mtu_expires",
2437 		.data		=	&ip6_rt_mtu_expires,
2438 		.maxlen		=	sizeof(int),
2439 		.mode		=	0644,
2440 		.proc_handler	=	&proc_dointvec_jiffies,
2441 		.strategy	=	&sysctl_jiffies,
2442 	},
2443 	{
2444 		.ctl_name	=	NET_IPV6_ROUTE_MIN_ADVMSS,
2445 		.procname	=	"min_adv_mss",
2446 		.data		=	&ip6_rt_min_advmss,
2447 		.maxlen		=	sizeof(int),
2448 		.mode		=	0644,
2449 		.proc_handler	=	&proc_dointvec_jiffies,
2450 		.strategy	=	&sysctl_jiffies,
2451 	},
2452 	{
2453 		.ctl_name	=	NET_IPV6_ROUTE_GC_MIN_INTERVAL_MS,
2454 		.procname	=	"gc_min_interval_ms",
2455 		.data		=	&ip6_rt_gc_min_interval,
2456 		.maxlen		=	sizeof(int),
2457 		.mode		=	0644,
2458 		.proc_handler	=	&proc_dointvec_ms_jiffies,
2459 		.strategy	=	&sysctl_ms_jiffies,
2460 	},
2461 	{ .ctl_name = 0 }
2462 };
2463 
2464 #endif
2465 
2466 void __init ip6_route_init(void)
2467 {
2468 	struct proc_dir_entry *p;
2469 
2470 	ip6_dst_ops.kmem_cachep =
2471 		kmem_cache_create("ip6_dst_cache", sizeof(struct rt6_info), 0,
2472 				  SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL, NULL);
2473 	fib6_init();
2474 #ifdef 	CONFIG_PROC_FS
2475 	p = proc_net_create("ipv6_route", 0, rt6_proc_info);
2476 	if (p)
2477 		p->owner = THIS_MODULE;
2478 
2479 	proc_net_fops_create("rt6_stats", S_IRUGO, &rt6_stats_seq_fops);
2480 #endif
2481 #ifdef CONFIG_XFRM
2482 	xfrm6_init();
2483 #endif
2484 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
2485 	fib6_rules_init();
2486 #endif
2487 }
2488 
2489 void ip6_route_cleanup(void)
2490 {
2491 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
2492 	fib6_rules_cleanup();
2493 #endif
2494 #ifdef CONFIG_PROC_FS
2495 	proc_net_remove("ipv6_route");
2496 	proc_net_remove("rt6_stats");
2497 #endif
2498 #ifdef CONFIG_XFRM
2499 	xfrm6_fini();
2500 #endif
2501 	rt6_ifdown(NULL);
2502 	fib6_gc_cleanup();
2503 	kmem_cache_destroy(ip6_dst_ops.kmem_cachep);
2504 }
2505