xref: /linux/net/ipv6/route.c (revision f58c91ce82cbb55a48fbc1a0cb7c84c0d0a4e1bd)
1 /*
2  *	Linux INET6 implementation
3  *	FIB front-end.
4  *
5  *	Authors:
6  *	Pedro Roque		<roque@di.fc.ul.pt>
7  *
8  *	This program is free software; you can redistribute it and/or
9  *      modify it under the terms of the GNU General Public License
10  *      as published by the Free Software Foundation; either version
11  *      2 of the License, or (at your option) any later version.
12  */
13 
14 /*	Changes:
15  *
16  *	YOSHIFUJI Hideaki @USAGI
17  *		reworked default router selection.
18  *		- respect outgoing interface
19  *		- select from (probably) reachable routers (i.e.
20  *		routers in REACHABLE, STALE, DELAY or PROBE states).
21  *		- always select the same router if it is (probably)
22  *		reachable.  otherwise, round-robin the list.
23  *	Ville Nuorvala
24  *		Fixed routing subtrees.
25  */
26 
27 #define pr_fmt(fmt) "IPv6: " fmt
28 
29 #include <linux/capability.h>
30 #include <linux/errno.h>
31 #include <linux/export.h>
32 #include <linux/types.h>
33 #include <linux/times.h>
34 #include <linux/socket.h>
35 #include <linux/sockios.h>
36 #include <linux/net.h>
37 #include <linux/route.h>
38 #include <linux/netdevice.h>
39 #include <linux/in6.h>
40 #include <linux/mroute6.h>
41 #include <linux/init.h>
42 #include <linux/if_arp.h>
43 #include <linux/proc_fs.h>
44 #include <linux/seq_file.h>
45 #include <linux/nsproxy.h>
46 #include <linux/slab.h>
47 #include <net/net_namespace.h>
48 #include <net/snmp.h>
49 #include <net/ipv6.h>
50 #include <net/ip6_fib.h>
51 #include <net/ip6_route.h>
52 #include <net/ndisc.h>
53 #include <net/addrconf.h>
54 #include <net/tcp.h>
55 #include <linux/rtnetlink.h>
56 #include <net/dst.h>
57 #include <net/xfrm.h>
58 #include <net/netevent.h>
59 #include <net/netlink.h>
60 #include <net/nexthop.h>
61 
62 #include <asm/uaccess.h>
63 
64 #ifdef CONFIG_SYSCTL
65 #include <linux/sysctl.h>
66 #endif
67 
68 enum rt6_nud_state {
69 	RT6_NUD_FAIL_HARD = -2,
70 	RT6_NUD_FAIL_SOFT = -1,
71 	RT6_NUD_SUCCEED = 1
72 };
73 
74 static struct rt6_info *ip6_rt_copy(struct rt6_info *ort,
75 				    const struct in6_addr *dest);
76 static struct dst_entry	*ip6_dst_check(struct dst_entry *dst, u32 cookie);
77 static unsigned int	 ip6_default_advmss(const struct dst_entry *dst);
78 static unsigned int	 ip6_mtu(const struct dst_entry *dst);
79 static struct dst_entry *ip6_negative_advice(struct dst_entry *);
80 static void		ip6_dst_destroy(struct dst_entry *);
81 static void		ip6_dst_ifdown(struct dst_entry *,
82 				       struct net_device *dev, int how);
83 static int		 ip6_dst_gc(struct dst_ops *ops);
84 
85 static int		ip6_pkt_discard(struct sk_buff *skb);
86 static int		ip6_pkt_discard_out(struct sk_buff *skb);
87 static void		ip6_link_failure(struct sk_buff *skb);
88 static void		ip6_rt_update_pmtu(struct dst_entry *dst, struct sock *sk,
89 					   struct sk_buff *skb, u32 mtu);
90 static void		rt6_do_redirect(struct dst_entry *dst, struct sock *sk,
91 					struct sk_buff *skb);
92 static int rt6_score_route(struct rt6_info *rt, int oif, int strict);
93 
94 #ifdef CONFIG_IPV6_ROUTE_INFO
95 static struct rt6_info *rt6_add_route_info(struct net *net,
96 					   const struct in6_addr *prefix, int prefixlen,
97 					   const struct in6_addr *gwaddr, int ifindex,
98 					   unsigned int pref);
99 static struct rt6_info *rt6_get_route_info(struct net *net,
100 					   const struct in6_addr *prefix, int prefixlen,
101 					   const struct in6_addr *gwaddr, int ifindex);
102 #endif
103 
104 static u32 *ipv6_cow_metrics(struct dst_entry *dst, unsigned long old)
105 {
106 	struct rt6_info *rt = (struct rt6_info *) dst;
107 	struct inet_peer *peer;
108 	u32 *p = NULL;
109 
110 	if (!(rt->dst.flags & DST_HOST))
111 		return NULL;
112 
113 	peer = rt6_get_peer_create(rt);
114 	if (peer) {
115 		u32 *old_p = __DST_METRICS_PTR(old);
116 		unsigned long prev, new;
117 
118 		p = peer->metrics;
119 		if (inet_metrics_new(peer))
120 			memcpy(p, old_p, sizeof(u32) * RTAX_MAX);
121 
122 		new = (unsigned long) p;
123 		prev = cmpxchg(&dst->_metrics, old, new);
124 
125 		if (prev != old) {
126 			p = __DST_METRICS_PTR(prev);
127 			if (prev & DST_METRICS_READ_ONLY)
128 				p = NULL;
129 		}
130 	}
131 	return p;
132 }
133 
134 static inline const void *choose_neigh_daddr(struct rt6_info *rt,
135 					     struct sk_buff *skb,
136 					     const void *daddr)
137 {
138 	struct in6_addr *p = &rt->rt6i_gateway;
139 
140 	if (!ipv6_addr_any(p))
141 		return (const void *) p;
142 	else if (skb)
143 		return &ipv6_hdr(skb)->daddr;
144 	return daddr;
145 }
146 
147 static struct neighbour *ip6_neigh_lookup(const struct dst_entry *dst,
148 					  struct sk_buff *skb,
149 					  const void *daddr)
150 {
151 	struct rt6_info *rt = (struct rt6_info *) dst;
152 	struct neighbour *n;
153 
154 	daddr = choose_neigh_daddr(rt, skb, daddr);
155 	n = __ipv6_neigh_lookup(dst->dev, daddr);
156 	if (n)
157 		return n;
158 	return neigh_create(&nd_tbl, daddr, dst->dev);
159 }
160 
161 static struct dst_ops ip6_dst_ops_template = {
162 	.family			=	AF_INET6,
163 	.protocol		=	cpu_to_be16(ETH_P_IPV6),
164 	.gc			=	ip6_dst_gc,
165 	.gc_thresh		=	1024,
166 	.check			=	ip6_dst_check,
167 	.default_advmss		=	ip6_default_advmss,
168 	.mtu			=	ip6_mtu,
169 	.cow_metrics		=	ipv6_cow_metrics,
170 	.destroy		=	ip6_dst_destroy,
171 	.ifdown			=	ip6_dst_ifdown,
172 	.negative_advice	=	ip6_negative_advice,
173 	.link_failure		=	ip6_link_failure,
174 	.update_pmtu		=	ip6_rt_update_pmtu,
175 	.redirect		=	rt6_do_redirect,
176 	.local_out		=	__ip6_local_out,
177 	.neigh_lookup		=	ip6_neigh_lookup,
178 };
179 
180 static unsigned int ip6_blackhole_mtu(const struct dst_entry *dst)
181 {
182 	unsigned int mtu = dst_metric_raw(dst, RTAX_MTU);
183 
184 	return mtu ? : dst->dev->mtu;
185 }
186 
187 static void ip6_rt_blackhole_update_pmtu(struct dst_entry *dst, struct sock *sk,
188 					 struct sk_buff *skb, u32 mtu)
189 {
190 }
191 
192 static void ip6_rt_blackhole_redirect(struct dst_entry *dst, struct sock *sk,
193 				      struct sk_buff *skb)
194 {
195 }
196 
197 static u32 *ip6_rt_blackhole_cow_metrics(struct dst_entry *dst,
198 					 unsigned long old)
199 {
200 	return NULL;
201 }
202 
203 static struct dst_ops ip6_dst_blackhole_ops = {
204 	.family			=	AF_INET6,
205 	.protocol		=	cpu_to_be16(ETH_P_IPV6),
206 	.destroy		=	ip6_dst_destroy,
207 	.check			=	ip6_dst_check,
208 	.mtu			=	ip6_blackhole_mtu,
209 	.default_advmss		=	ip6_default_advmss,
210 	.update_pmtu		=	ip6_rt_blackhole_update_pmtu,
211 	.redirect		=	ip6_rt_blackhole_redirect,
212 	.cow_metrics		=	ip6_rt_blackhole_cow_metrics,
213 	.neigh_lookup		=	ip6_neigh_lookup,
214 };
215 
216 static const u32 ip6_template_metrics[RTAX_MAX] = {
217 	[RTAX_HOPLIMIT - 1] = 0,
218 };
219 
220 static const struct rt6_info ip6_null_entry_template = {
221 	.dst = {
222 		.__refcnt	= ATOMIC_INIT(1),
223 		.__use		= 1,
224 		.obsolete	= DST_OBSOLETE_FORCE_CHK,
225 		.error		= -ENETUNREACH,
226 		.input		= ip6_pkt_discard,
227 		.output		= ip6_pkt_discard_out,
228 	},
229 	.rt6i_flags	= (RTF_REJECT | RTF_NONEXTHOP),
230 	.rt6i_protocol  = RTPROT_KERNEL,
231 	.rt6i_metric	= ~(u32) 0,
232 	.rt6i_ref	= ATOMIC_INIT(1),
233 };
234 
235 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
236 
237 static int ip6_pkt_prohibit(struct sk_buff *skb);
238 static int ip6_pkt_prohibit_out(struct sk_buff *skb);
239 
240 static const struct rt6_info ip6_prohibit_entry_template = {
241 	.dst = {
242 		.__refcnt	= ATOMIC_INIT(1),
243 		.__use		= 1,
244 		.obsolete	= DST_OBSOLETE_FORCE_CHK,
245 		.error		= -EACCES,
246 		.input		= ip6_pkt_prohibit,
247 		.output		= ip6_pkt_prohibit_out,
248 	},
249 	.rt6i_flags	= (RTF_REJECT | RTF_NONEXTHOP),
250 	.rt6i_protocol  = RTPROT_KERNEL,
251 	.rt6i_metric	= ~(u32) 0,
252 	.rt6i_ref	= ATOMIC_INIT(1),
253 };
254 
255 static const struct rt6_info ip6_blk_hole_entry_template = {
256 	.dst = {
257 		.__refcnt	= ATOMIC_INIT(1),
258 		.__use		= 1,
259 		.obsolete	= DST_OBSOLETE_FORCE_CHK,
260 		.error		= -EINVAL,
261 		.input		= dst_discard,
262 		.output		= dst_discard,
263 	},
264 	.rt6i_flags	= (RTF_REJECT | RTF_NONEXTHOP),
265 	.rt6i_protocol  = RTPROT_KERNEL,
266 	.rt6i_metric	= ~(u32) 0,
267 	.rt6i_ref	= ATOMIC_INIT(1),
268 };
269 
270 #endif
271 
272 /* allocate dst with ip6_dst_ops */
273 static inline struct rt6_info *ip6_dst_alloc(struct net *net,
274 					     struct net_device *dev,
275 					     int flags,
276 					     struct fib6_table *table)
277 {
278 	struct rt6_info *rt = dst_alloc(&net->ipv6.ip6_dst_ops, dev,
279 					0, DST_OBSOLETE_FORCE_CHK, flags);
280 
281 	if (rt) {
282 		struct dst_entry *dst = &rt->dst;
283 
284 		memset(dst + 1, 0, sizeof(*rt) - sizeof(*dst));
285 		rt6_init_peer(rt, table ? &table->tb6_peers : net->ipv6.peers);
286 		rt->rt6i_genid = rt_genid_ipv6(net);
287 		INIT_LIST_HEAD(&rt->rt6i_siblings);
288 	}
289 	return rt;
290 }
291 
292 static void ip6_dst_destroy(struct dst_entry *dst)
293 {
294 	struct rt6_info *rt = (struct rt6_info *)dst;
295 	struct inet6_dev *idev = rt->rt6i_idev;
296 	struct dst_entry *from = dst->from;
297 
298 	if (!(rt->dst.flags & DST_HOST))
299 		dst_destroy_metrics_generic(dst);
300 
301 	if (idev) {
302 		rt->rt6i_idev = NULL;
303 		in6_dev_put(idev);
304 	}
305 
306 	dst->from = NULL;
307 	dst_release(from);
308 
309 	if (rt6_has_peer(rt)) {
310 		struct inet_peer *peer = rt6_peer_ptr(rt);
311 		inet_putpeer(peer);
312 	}
313 }
314 
315 void rt6_bind_peer(struct rt6_info *rt, int create)
316 {
317 	struct inet_peer_base *base;
318 	struct inet_peer *peer;
319 
320 	base = inetpeer_base_ptr(rt->_rt6i_peer);
321 	if (!base)
322 		return;
323 
324 	peer = inet_getpeer_v6(base, &rt->rt6i_dst.addr, create);
325 	if (peer) {
326 		if (!rt6_set_peer(rt, peer))
327 			inet_putpeer(peer);
328 	}
329 }
330 
331 static void ip6_dst_ifdown(struct dst_entry *dst, struct net_device *dev,
332 			   int how)
333 {
334 	struct rt6_info *rt = (struct rt6_info *)dst;
335 	struct inet6_dev *idev = rt->rt6i_idev;
336 	struct net_device *loopback_dev =
337 		dev_net(dev)->loopback_dev;
338 
339 	if (dev != loopback_dev) {
340 		if (idev && idev->dev == dev) {
341 			struct inet6_dev *loopback_idev =
342 				in6_dev_get(loopback_dev);
343 			if (loopback_idev) {
344 				rt->rt6i_idev = loopback_idev;
345 				in6_dev_put(idev);
346 			}
347 		}
348 	}
349 }
350 
351 static bool rt6_check_expired(const struct rt6_info *rt)
352 {
353 	if (rt->rt6i_flags & RTF_EXPIRES) {
354 		if (time_after(jiffies, rt->dst.expires))
355 			return true;
356 	} else if (rt->dst.from) {
357 		return rt6_check_expired((struct rt6_info *) rt->dst.from);
358 	}
359 	return false;
360 }
361 
362 static bool rt6_need_strict(const struct in6_addr *daddr)
363 {
364 	return ipv6_addr_type(daddr) &
365 		(IPV6_ADDR_MULTICAST | IPV6_ADDR_LINKLOCAL | IPV6_ADDR_LOOPBACK);
366 }
367 
368 /* Multipath route selection:
369  *   Hash based function using packet header and flowlabel.
370  * Adapted from fib_info_hashfn()
371  */
372 static int rt6_info_hash_nhsfn(unsigned int candidate_count,
373 			       const struct flowi6 *fl6)
374 {
375 	unsigned int val = fl6->flowi6_proto;
376 
377 	val ^= ipv6_addr_hash(&fl6->daddr);
378 	val ^= ipv6_addr_hash(&fl6->saddr);
379 
380 	/* Work only if this not encapsulated */
381 	switch (fl6->flowi6_proto) {
382 	case IPPROTO_UDP:
383 	case IPPROTO_TCP:
384 	case IPPROTO_SCTP:
385 		val ^= (__force u16)fl6->fl6_sport;
386 		val ^= (__force u16)fl6->fl6_dport;
387 		break;
388 
389 	case IPPROTO_ICMPV6:
390 		val ^= (__force u16)fl6->fl6_icmp_type;
391 		val ^= (__force u16)fl6->fl6_icmp_code;
392 		break;
393 	}
394 	/* RFC6438 recommands to use flowlabel */
395 	val ^= (__force u32)fl6->flowlabel;
396 
397 	/* Perhaps, we need to tune, this function? */
398 	val = val ^ (val >> 7) ^ (val >> 12);
399 	return val % candidate_count;
400 }
401 
402 static struct rt6_info *rt6_multipath_select(struct rt6_info *match,
403 					     struct flowi6 *fl6, int oif,
404 					     int strict)
405 {
406 	struct rt6_info *sibling, *next_sibling;
407 	int route_choosen;
408 
409 	route_choosen = rt6_info_hash_nhsfn(match->rt6i_nsiblings + 1, fl6);
410 	/* Don't change the route, if route_choosen == 0
411 	 * (siblings does not include ourself)
412 	 */
413 	if (route_choosen)
414 		list_for_each_entry_safe(sibling, next_sibling,
415 				&match->rt6i_siblings, rt6i_siblings) {
416 			route_choosen--;
417 			if (route_choosen == 0) {
418 				if (rt6_score_route(sibling, oif, strict) < 0)
419 					break;
420 				match = sibling;
421 				break;
422 			}
423 		}
424 	return match;
425 }
426 
427 /*
428  *	Route lookup. Any table->tb6_lock is implied.
429  */
430 
431 static inline struct rt6_info *rt6_device_match(struct net *net,
432 						    struct rt6_info *rt,
433 						    const struct in6_addr *saddr,
434 						    int oif,
435 						    int flags)
436 {
437 	struct rt6_info *local = NULL;
438 	struct rt6_info *sprt;
439 
440 	if (!oif && ipv6_addr_any(saddr))
441 		goto out;
442 
443 	for (sprt = rt; sprt; sprt = sprt->dst.rt6_next) {
444 		struct net_device *dev = sprt->dst.dev;
445 
446 		if (oif) {
447 			if (dev->ifindex == oif)
448 				return sprt;
449 			if (dev->flags & IFF_LOOPBACK) {
450 				if (!sprt->rt6i_idev ||
451 				    sprt->rt6i_idev->dev->ifindex != oif) {
452 					if (flags & RT6_LOOKUP_F_IFACE && oif)
453 						continue;
454 					if (local && (!oif ||
455 						      local->rt6i_idev->dev->ifindex == oif))
456 						continue;
457 				}
458 				local = sprt;
459 			}
460 		} else {
461 			if (ipv6_chk_addr(net, saddr, dev,
462 					  flags & RT6_LOOKUP_F_IFACE))
463 				return sprt;
464 		}
465 	}
466 
467 	if (oif) {
468 		if (local)
469 			return local;
470 
471 		if (flags & RT6_LOOKUP_F_IFACE)
472 			return net->ipv6.ip6_null_entry;
473 	}
474 out:
475 	return rt;
476 }
477 
478 #ifdef CONFIG_IPV6_ROUTER_PREF
479 struct __rt6_probe_work {
480 	struct work_struct work;
481 	struct in6_addr target;
482 	struct net_device *dev;
483 };
484 
485 static void rt6_probe_deferred(struct work_struct *w)
486 {
487 	struct in6_addr mcaddr;
488 	struct __rt6_probe_work *work =
489 		container_of(w, struct __rt6_probe_work, work);
490 
491 	addrconf_addr_solict_mult(&work->target, &mcaddr);
492 	ndisc_send_ns(work->dev, NULL, &work->target, &mcaddr, NULL);
493 	dev_put(work->dev);
494 	kfree(w);
495 }
496 
497 static void rt6_probe(struct rt6_info *rt)
498 {
499 	struct neighbour *neigh;
500 	/*
501 	 * Okay, this does not seem to be appropriate
502 	 * for now, however, we need to check if it
503 	 * is really so; aka Router Reachability Probing.
504 	 *
505 	 * Router Reachability Probe MUST be rate-limited
506 	 * to no more than one per minute.
507 	 */
508 	if (!rt || !(rt->rt6i_flags & RTF_GATEWAY))
509 		return;
510 	rcu_read_lock_bh();
511 	neigh = __ipv6_neigh_lookup_noref(rt->dst.dev, &rt->rt6i_gateway);
512 	if (neigh) {
513 		write_lock(&neigh->lock);
514 		if (neigh->nud_state & NUD_VALID)
515 			goto out;
516 	}
517 
518 	if (!neigh ||
519 	    time_after(jiffies, neigh->updated + rt->rt6i_idev->cnf.rtr_probe_interval)) {
520 		struct __rt6_probe_work *work;
521 
522 		work = kmalloc(sizeof(*work), GFP_ATOMIC);
523 
524 		if (neigh && work)
525 			neigh->updated = jiffies;
526 
527 		if (neigh)
528 			write_unlock(&neigh->lock);
529 
530 		if (work) {
531 			INIT_WORK(&work->work, rt6_probe_deferred);
532 			work->target = rt->rt6i_gateway;
533 			dev_hold(rt->dst.dev);
534 			work->dev = rt->dst.dev;
535 			schedule_work(&work->work);
536 		}
537 	} else {
538 out:
539 		write_unlock(&neigh->lock);
540 	}
541 	rcu_read_unlock_bh();
542 }
543 #else
544 static inline void rt6_probe(struct rt6_info *rt)
545 {
546 }
547 #endif
548 
549 /*
550  * Default Router Selection (RFC 2461 6.3.6)
551  */
552 static inline int rt6_check_dev(struct rt6_info *rt, int oif)
553 {
554 	struct net_device *dev = rt->dst.dev;
555 	if (!oif || dev->ifindex == oif)
556 		return 2;
557 	if ((dev->flags & IFF_LOOPBACK) &&
558 	    rt->rt6i_idev && rt->rt6i_idev->dev->ifindex == oif)
559 		return 1;
560 	return 0;
561 }
562 
563 static inline enum rt6_nud_state rt6_check_neigh(struct rt6_info *rt)
564 {
565 	struct neighbour *neigh;
566 	enum rt6_nud_state ret = RT6_NUD_FAIL_HARD;
567 
568 	if (rt->rt6i_flags & RTF_NONEXTHOP ||
569 	    !(rt->rt6i_flags & RTF_GATEWAY))
570 		return RT6_NUD_SUCCEED;
571 
572 	rcu_read_lock_bh();
573 	neigh = __ipv6_neigh_lookup_noref(rt->dst.dev, &rt->rt6i_gateway);
574 	if (neigh) {
575 		read_lock(&neigh->lock);
576 		if (neigh->nud_state & NUD_VALID)
577 			ret = RT6_NUD_SUCCEED;
578 #ifdef CONFIG_IPV6_ROUTER_PREF
579 		else if (!(neigh->nud_state & NUD_FAILED))
580 			ret = RT6_NUD_SUCCEED;
581 #endif
582 		read_unlock(&neigh->lock);
583 	} else {
584 		ret = IS_ENABLED(CONFIG_IPV6_ROUTER_PREF) ?
585 		      RT6_NUD_SUCCEED : RT6_NUD_FAIL_SOFT;
586 	}
587 	rcu_read_unlock_bh();
588 
589 	return ret;
590 }
591 
592 static int rt6_score_route(struct rt6_info *rt, int oif,
593 			   int strict)
594 {
595 	int m;
596 
597 	m = rt6_check_dev(rt, oif);
598 	if (!m && (strict & RT6_LOOKUP_F_IFACE))
599 		return RT6_NUD_FAIL_HARD;
600 #ifdef CONFIG_IPV6_ROUTER_PREF
601 	m |= IPV6_DECODE_PREF(IPV6_EXTRACT_PREF(rt->rt6i_flags)) << 2;
602 #endif
603 	if (strict & RT6_LOOKUP_F_REACHABLE) {
604 		int n = rt6_check_neigh(rt);
605 		if (n < 0)
606 			return n;
607 	}
608 	return m;
609 }
610 
611 static struct rt6_info *find_match(struct rt6_info *rt, int oif, int strict,
612 				   int *mpri, struct rt6_info *match,
613 				   bool *do_rr)
614 {
615 	int m;
616 	bool match_do_rr = false;
617 
618 	if (rt6_check_expired(rt))
619 		goto out;
620 
621 	m = rt6_score_route(rt, oif, strict);
622 	if (m == RT6_NUD_FAIL_SOFT) {
623 		match_do_rr = true;
624 		m = 0; /* lowest valid score */
625 	} else if (m < 0) {
626 		goto out;
627 	}
628 
629 	if (strict & RT6_LOOKUP_F_REACHABLE)
630 		rt6_probe(rt);
631 
632 	if (m > *mpri) {
633 		*do_rr = match_do_rr;
634 		*mpri = m;
635 		match = rt;
636 	}
637 out:
638 	return match;
639 }
640 
641 static struct rt6_info *find_rr_leaf(struct fib6_node *fn,
642 				     struct rt6_info *rr_head,
643 				     u32 metric, int oif, int strict,
644 				     bool *do_rr)
645 {
646 	struct rt6_info *rt, *match;
647 	int mpri = -1;
648 
649 	match = NULL;
650 	for (rt = rr_head; rt && rt->rt6i_metric == metric;
651 	     rt = rt->dst.rt6_next)
652 		match = find_match(rt, oif, strict, &mpri, match, do_rr);
653 	for (rt = fn->leaf; rt && rt != rr_head && rt->rt6i_metric == metric;
654 	     rt = rt->dst.rt6_next)
655 		match = find_match(rt, oif, strict, &mpri, match, do_rr);
656 
657 	return match;
658 }
659 
660 static struct rt6_info *rt6_select(struct fib6_node *fn, int oif, int strict)
661 {
662 	struct rt6_info *match, *rt0;
663 	struct net *net;
664 	bool do_rr = false;
665 
666 	rt0 = fn->rr_ptr;
667 	if (!rt0)
668 		fn->rr_ptr = rt0 = fn->leaf;
669 
670 	match = find_rr_leaf(fn, rt0, rt0->rt6i_metric, oif, strict,
671 			     &do_rr);
672 
673 	if (do_rr) {
674 		struct rt6_info *next = rt0->dst.rt6_next;
675 
676 		/* no entries matched; do round-robin */
677 		if (!next || next->rt6i_metric != rt0->rt6i_metric)
678 			next = fn->leaf;
679 
680 		if (next != rt0)
681 			fn->rr_ptr = next;
682 	}
683 
684 	net = dev_net(rt0->dst.dev);
685 	return match ? match : net->ipv6.ip6_null_entry;
686 }
687 
688 #ifdef CONFIG_IPV6_ROUTE_INFO
689 int rt6_route_rcv(struct net_device *dev, u8 *opt, int len,
690 		  const struct in6_addr *gwaddr)
691 {
692 	struct net *net = dev_net(dev);
693 	struct route_info *rinfo = (struct route_info *) opt;
694 	struct in6_addr prefix_buf, *prefix;
695 	unsigned int pref;
696 	unsigned long lifetime;
697 	struct rt6_info *rt;
698 
699 	if (len < sizeof(struct route_info)) {
700 		return -EINVAL;
701 	}
702 
703 	/* Sanity check for prefix_len and length */
704 	if (rinfo->length > 3) {
705 		return -EINVAL;
706 	} else if (rinfo->prefix_len > 128) {
707 		return -EINVAL;
708 	} else if (rinfo->prefix_len > 64) {
709 		if (rinfo->length < 2) {
710 			return -EINVAL;
711 		}
712 	} else if (rinfo->prefix_len > 0) {
713 		if (rinfo->length < 1) {
714 			return -EINVAL;
715 		}
716 	}
717 
718 	pref = rinfo->route_pref;
719 	if (pref == ICMPV6_ROUTER_PREF_INVALID)
720 		return -EINVAL;
721 
722 	lifetime = addrconf_timeout_fixup(ntohl(rinfo->lifetime), HZ);
723 
724 	if (rinfo->length == 3)
725 		prefix = (struct in6_addr *)rinfo->prefix;
726 	else {
727 		/* this function is safe */
728 		ipv6_addr_prefix(&prefix_buf,
729 				 (struct in6_addr *)rinfo->prefix,
730 				 rinfo->prefix_len);
731 		prefix = &prefix_buf;
732 	}
733 
734 	if (rinfo->prefix_len == 0)
735 		rt = rt6_get_dflt_router(gwaddr, dev);
736 	else
737 		rt = rt6_get_route_info(net, prefix, rinfo->prefix_len,
738 					gwaddr, dev->ifindex);
739 
740 	if (rt && !lifetime) {
741 		ip6_del_rt(rt);
742 		rt = NULL;
743 	}
744 
745 	if (!rt && lifetime)
746 		rt = rt6_add_route_info(net, prefix, rinfo->prefix_len, gwaddr, dev->ifindex,
747 					pref);
748 	else if (rt)
749 		rt->rt6i_flags = RTF_ROUTEINFO |
750 				 (rt->rt6i_flags & ~RTF_PREF_MASK) | RTF_PREF(pref);
751 
752 	if (rt) {
753 		if (!addrconf_finite_timeout(lifetime))
754 			rt6_clean_expires(rt);
755 		else
756 			rt6_set_expires(rt, jiffies + HZ * lifetime);
757 
758 		ip6_rt_put(rt);
759 	}
760 	return 0;
761 }
762 #endif
763 
764 #define BACKTRACK(__net, saddr)			\
765 do { \
766 	if (rt == __net->ipv6.ip6_null_entry) {	\
767 		struct fib6_node *pn; \
768 		while (1) { \
769 			if (fn->fn_flags & RTN_TL_ROOT) \
770 				goto out; \
771 			pn = fn->parent; \
772 			if (FIB6_SUBTREE(pn) && FIB6_SUBTREE(pn) != fn) \
773 				fn = fib6_lookup(FIB6_SUBTREE(pn), NULL, saddr); \
774 			else \
775 				fn = pn; \
776 			if (fn->fn_flags & RTN_RTINFO) \
777 				goto restart; \
778 		} \
779 	} \
780 } while (0)
781 
782 static struct rt6_info *ip6_pol_route_lookup(struct net *net,
783 					     struct fib6_table *table,
784 					     struct flowi6 *fl6, int flags)
785 {
786 	struct fib6_node *fn;
787 	struct rt6_info *rt;
788 
789 	read_lock_bh(&table->tb6_lock);
790 	fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
791 restart:
792 	rt = fn->leaf;
793 	rt = rt6_device_match(net, rt, &fl6->saddr, fl6->flowi6_oif, flags);
794 	if (rt->rt6i_nsiblings && fl6->flowi6_oif == 0)
795 		rt = rt6_multipath_select(rt, fl6, fl6->flowi6_oif, flags);
796 	BACKTRACK(net, &fl6->saddr);
797 out:
798 	dst_use(&rt->dst, jiffies);
799 	read_unlock_bh(&table->tb6_lock);
800 	return rt;
801 
802 }
803 
804 struct dst_entry * ip6_route_lookup(struct net *net, struct flowi6 *fl6,
805 				    int flags)
806 {
807 	return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_lookup);
808 }
809 EXPORT_SYMBOL_GPL(ip6_route_lookup);
810 
811 struct rt6_info *rt6_lookup(struct net *net, const struct in6_addr *daddr,
812 			    const struct in6_addr *saddr, int oif, int strict)
813 {
814 	struct flowi6 fl6 = {
815 		.flowi6_oif = oif,
816 		.daddr = *daddr,
817 	};
818 	struct dst_entry *dst;
819 	int flags = strict ? RT6_LOOKUP_F_IFACE : 0;
820 
821 	if (saddr) {
822 		memcpy(&fl6.saddr, saddr, sizeof(*saddr));
823 		flags |= RT6_LOOKUP_F_HAS_SADDR;
824 	}
825 
826 	dst = fib6_rule_lookup(net, &fl6, flags, ip6_pol_route_lookup);
827 	if (dst->error == 0)
828 		return (struct rt6_info *) dst;
829 
830 	dst_release(dst);
831 
832 	return NULL;
833 }
834 
835 EXPORT_SYMBOL(rt6_lookup);
836 
837 /* ip6_ins_rt is called with FREE table->tb6_lock.
838    It takes new route entry, the addition fails by any reason the
839    route is freed. In any case, if caller does not hold it, it may
840    be destroyed.
841  */
842 
843 static int __ip6_ins_rt(struct rt6_info *rt, struct nl_info *info)
844 {
845 	int err;
846 	struct fib6_table *table;
847 
848 	table = rt->rt6i_table;
849 	write_lock_bh(&table->tb6_lock);
850 	err = fib6_add(&table->tb6_root, rt, info);
851 	write_unlock_bh(&table->tb6_lock);
852 
853 	return err;
854 }
855 
856 int ip6_ins_rt(struct rt6_info *rt)
857 {
858 	struct nl_info info = {
859 		.nl_net = dev_net(rt->dst.dev),
860 	};
861 	return __ip6_ins_rt(rt, &info);
862 }
863 
864 static struct rt6_info *rt6_alloc_cow(struct rt6_info *ort,
865 				      const struct in6_addr *daddr,
866 				      const struct in6_addr *saddr)
867 {
868 	struct rt6_info *rt;
869 
870 	/*
871 	 *	Clone the route.
872 	 */
873 
874 	rt = ip6_rt_copy(ort, daddr);
875 
876 	if (rt) {
877 		if (ort->rt6i_dst.plen != 128 &&
878 		    ipv6_addr_equal(&ort->rt6i_dst.addr, daddr))
879 			rt->rt6i_flags |= RTF_ANYCAST;
880 
881 		rt->rt6i_flags |= RTF_CACHE;
882 
883 #ifdef CONFIG_IPV6_SUBTREES
884 		if (rt->rt6i_src.plen && saddr) {
885 			rt->rt6i_src.addr = *saddr;
886 			rt->rt6i_src.plen = 128;
887 		}
888 #endif
889 	}
890 
891 	return rt;
892 }
893 
894 static struct rt6_info *rt6_alloc_clone(struct rt6_info *ort,
895 					const struct in6_addr *daddr)
896 {
897 	struct rt6_info *rt = ip6_rt_copy(ort, daddr);
898 
899 	if (rt)
900 		rt->rt6i_flags |= RTF_CACHE;
901 	return rt;
902 }
903 
904 static struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table, int oif,
905 				      struct flowi6 *fl6, int flags)
906 {
907 	struct fib6_node *fn;
908 	struct rt6_info *rt, *nrt;
909 	int strict = 0;
910 	int attempts = 3;
911 	int err;
912 	int reachable = net->ipv6.devconf_all->forwarding ? 0 : RT6_LOOKUP_F_REACHABLE;
913 
914 	strict |= flags & RT6_LOOKUP_F_IFACE;
915 
916 relookup:
917 	read_lock_bh(&table->tb6_lock);
918 
919 restart_2:
920 	fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
921 
922 restart:
923 	rt = rt6_select(fn, oif, strict | reachable);
924 	if (rt->rt6i_nsiblings)
925 		rt = rt6_multipath_select(rt, fl6, oif, strict | reachable);
926 	BACKTRACK(net, &fl6->saddr);
927 	if (rt == net->ipv6.ip6_null_entry ||
928 	    rt->rt6i_flags & RTF_CACHE)
929 		goto out;
930 
931 	dst_hold(&rt->dst);
932 	read_unlock_bh(&table->tb6_lock);
933 
934 	if (!(rt->rt6i_flags & (RTF_NONEXTHOP | RTF_GATEWAY)))
935 		nrt = rt6_alloc_cow(rt, &fl6->daddr, &fl6->saddr);
936 	else if (!(rt->dst.flags & DST_HOST))
937 		nrt = rt6_alloc_clone(rt, &fl6->daddr);
938 	else
939 		goto out2;
940 
941 	ip6_rt_put(rt);
942 	rt = nrt ? : net->ipv6.ip6_null_entry;
943 
944 	dst_hold(&rt->dst);
945 	if (nrt) {
946 		err = ip6_ins_rt(nrt);
947 		if (!err)
948 			goto out2;
949 	}
950 
951 	if (--attempts <= 0)
952 		goto out2;
953 
954 	/*
955 	 * Race condition! In the gap, when table->tb6_lock was
956 	 * released someone could insert this route.  Relookup.
957 	 */
958 	ip6_rt_put(rt);
959 	goto relookup;
960 
961 out:
962 	if (reachable) {
963 		reachable = 0;
964 		goto restart_2;
965 	}
966 	dst_hold(&rt->dst);
967 	read_unlock_bh(&table->tb6_lock);
968 out2:
969 	rt->dst.lastuse = jiffies;
970 	rt->dst.__use++;
971 
972 	return rt;
973 }
974 
975 static struct rt6_info *ip6_pol_route_input(struct net *net, struct fib6_table *table,
976 					    struct flowi6 *fl6, int flags)
977 {
978 	return ip6_pol_route(net, table, fl6->flowi6_iif, fl6, flags);
979 }
980 
981 static struct dst_entry *ip6_route_input_lookup(struct net *net,
982 						struct net_device *dev,
983 						struct flowi6 *fl6, int flags)
984 {
985 	if (rt6_need_strict(&fl6->daddr) && dev->type != ARPHRD_PIMREG)
986 		flags |= RT6_LOOKUP_F_IFACE;
987 
988 	return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_input);
989 }
990 
991 void ip6_route_input(struct sk_buff *skb)
992 {
993 	const struct ipv6hdr *iph = ipv6_hdr(skb);
994 	struct net *net = dev_net(skb->dev);
995 	int flags = RT6_LOOKUP_F_HAS_SADDR;
996 	struct flowi6 fl6 = {
997 		.flowi6_iif = skb->dev->ifindex,
998 		.daddr = iph->daddr,
999 		.saddr = iph->saddr,
1000 		.flowlabel = ip6_flowinfo(iph),
1001 		.flowi6_mark = skb->mark,
1002 		.flowi6_proto = iph->nexthdr,
1003 	};
1004 
1005 	skb_dst_set(skb, ip6_route_input_lookup(net, skb->dev, &fl6, flags));
1006 }
1007 
1008 static struct rt6_info *ip6_pol_route_output(struct net *net, struct fib6_table *table,
1009 					     struct flowi6 *fl6, int flags)
1010 {
1011 	return ip6_pol_route(net, table, fl6->flowi6_oif, fl6, flags);
1012 }
1013 
1014 struct dst_entry * ip6_route_output(struct net *net, const struct sock *sk,
1015 				    struct flowi6 *fl6)
1016 {
1017 	int flags = 0;
1018 
1019 	fl6->flowi6_iif = LOOPBACK_IFINDEX;
1020 
1021 	if ((sk && sk->sk_bound_dev_if) || rt6_need_strict(&fl6->daddr))
1022 		flags |= RT6_LOOKUP_F_IFACE;
1023 
1024 	if (!ipv6_addr_any(&fl6->saddr))
1025 		flags |= RT6_LOOKUP_F_HAS_SADDR;
1026 	else if (sk)
1027 		flags |= rt6_srcprefs2flags(inet6_sk(sk)->srcprefs);
1028 
1029 	return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_output);
1030 }
1031 
1032 EXPORT_SYMBOL(ip6_route_output);
1033 
1034 struct dst_entry *ip6_blackhole_route(struct net *net, struct dst_entry *dst_orig)
1035 {
1036 	struct rt6_info *rt, *ort = (struct rt6_info *) dst_orig;
1037 	struct dst_entry *new = NULL;
1038 
1039 	rt = dst_alloc(&ip6_dst_blackhole_ops, ort->dst.dev, 1, DST_OBSOLETE_NONE, 0);
1040 	if (rt) {
1041 		new = &rt->dst;
1042 
1043 		memset(new + 1, 0, sizeof(*rt) - sizeof(*new));
1044 		rt6_init_peer(rt, net->ipv6.peers);
1045 
1046 		new->__use = 1;
1047 		new->input = dst_discard;
1048 		new->output = dst_discard;
1049 
1050 		if (dst_metrics_read_only(&ort->dst))
1051 			new->_metrics = ort->dst._metrics;
1052 		else
1053 			dst_copy_metrics(new, &ort->dst);
1054 		rt->rt6i_idev = ort->rt6i_idev;
1055 		if (rt->rt6i_idev)
1056 			in6_dev_hold(rt->rt6i_idev);
1057 
1058 		rt->rt6i_gateway = ort->rt6i_gateway;
1059 		rt->rt6i_flags = ort->rt6i_flags;
1060 		rt->rt6i_metric = 0;
1061 
1062 		memcpy(&rt->rt6i_dst, &ort->rt6i_dst, sizeof(struct rt6key));
1063 #ifdef CONFIG_IPV6_SUBTREES
1064 		memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
1065 #endif
1066 
1067 		dst_free(new);
1068 	}
1069 
1070 	dst_release(dst_orig);
1071 	return new ? new : ERR_PTR(-ENOMEM);
1072 }
1073 
1074 /*
1075  *	Destination cache support functions
1076  */
1077 
1078 static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie)
1079 {
1080 	struct rt6_info *rt;
1081 
1082 	rt = (struct rt6_info *) dst;
1083 
1084 	/* All IPV6 dsts are created with ->obsolete set to the value
1085 	 * DST_OBSOLETE_FORCE_CHK which forces validation calls down
1086 	 * into this function always.
1087 	 */
1088 	if (rt->rt6i_genid != rt_genid_ipv6(dev_net(rt->dst.dev)))
1089 		return NULL;
1090 
1091 	if (!rt->rt6i_node || (rt->rt6i_node->fn_sernum != cookie))
1092 		return NULL;
1093 
1094 	if (rt6_check_expired(rt))
1095 		return NULL;
1096 
1097 	return dst;
1098 }
1099 
1100 static struct dst_entry *ip6_negative_advice(struct dst_entry *dst)
1101 {
1102 	struct rt6_info *rt = (struct rt6_info *) dst;
1103 
1104 	if (rt) {
1105 		if (rt->rt6i_flags & RTF_CACHE) {
1106 			if (rt6_check_expired(rt)) {
1107 				ip6_del_rt(rt);
1108 				dst = NULL;
1109 			}
1110 		} else {
1111 			dst_release(dst);
1112 			dst = NULL;
1113 		}
1114 	}
1115 	return dst;
1116 }
1117 
1118 static void ip6_link_failure(struct sk_buff *skb)
1119 {
1120 	struct rt6_info *rt;
1121 
1122 	icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_ADDR_UNREACH, 0);
1123 
1124 	rt = (struct rt6_info *) skb_dst(skb);
1125 	if (rt) {
1126 		if (rt->rt6i_flags & RTF_CACHE) {
1127 			dst_hold(&rt->dst);
1128 			if (ip6_del_rt(rt))
1129 				dst_free(&rt->dst);
1130 		} else if (rt->rt6i_node && (rt->rt6i_flags & RTF_DEFAULT)) {
1131 			rt->rt6i_node->fn_sernum = -1;
1132 		}
1133 	}
1134 }
1135 
1136 static void ip6_rt_update_pmtu(struct dst_entry *dst, struct sock *sk,
1137 			       struct sk_buff *skb, u32 mtu)
1138 {
1139 	struct rt6_info *rt6 = (struct rt6_info*)dst;
1140 
1141 	dst_confirm(dst);
1142 	if (mtu < dst_mtu(dst) && rt6->rt6i_dst.plen == 128) {
1143 		struct net *net = dev_net(dst->dev);
1144 
1145 		rt6->rt6i_flags |= RTF_MODIFIED;
1146 		if (mtu < IPV6_MIN_MTU) {
1147 			u32 features = dst_metric(dst, RTAX_FEATURES);
1148 			mtu = IPV6_MIN_MTU;
1149 			features |= RTAX_FEATURE_ALLFRAG;
1150 			dst_metric_set(dst, RTAX_FEATURES, features);
1151 		}
1152 		dst_metric_set(dst, RTAX_MTU, mtu);
1153 		rt6_update_expires(rt6, net->ipv6.sysctl.ip6_rt_mtu_expires);
1154 	}
1155 }
1156 
1157 void ip6_update_pmtu(struct sk_buff *skb, struct net *net, __be32 mtu,
1158 		     int oif, u32 mark)
1159 {
1160 	const struct ipv6hdr *iph = (struct ipv6hdr *) skb->data;
1161 	struct dst_entry *dst;
1162 	struct flowi6 fl6;
1163 
1164 	memset(&fl6, 0, sizeof(fl6));
1165 	fl6.flowi6_oif = oif;
1166 	fl6.flowi6_mark = mark;
1167 	fl6.daddr = iph->daddr;
1168 	fl6.saddr = iph->saddr;
1169 	fl6.flowlabel = ip6_flowinfo(iph);
1170 
1171 	dst = ip6_route_output(net, NULL, &fl6);
1172 	if (!dst->error)
1173 		ip6_rt_update_pmtu(dst, NULL, skb, ntohl(mtu));
1174 	dst_release(dst);
1175 }
1176 EXPORT_SYMBOL_GPL(ip6_update_pmtu);
1177 
1178 void ip6_sk_update_pmtu(struct sk_buff *skb, struct sock *sk, __be32 mtu)
1179 {
1180 	ip6_update_pmtu(skb, sock_net(sk), mtu,
1181 			sk->sk_bound_dev_if, sk->sk_mark);
1182 }
1183 EXPORT_SYMBOL_GPL(ip6_sk_update_pmtu);
1184 
1185 /* Handle redirects */
1186 struct ip6rd_flowi {
1187 	struct flowi6 fl6;
1188 	struct in6_addr gateway;
1189 };
1190 
1191 static struct rt6_info *__ip6_route_redirect(struct net *net,
1192 					     struct fib6_table *table,
1193 					     struct flowi6 *fl6,
1194 					     int flags)
1195 {
1196 	struct ip6rd_flowi *rdfl = (struct ip6rd_flowi *)fl6;
1197 	struct rt6_info *rt;
1198 	struct fib6_node *fn;
1199 
1200 	/* Get the "current" route for this destination and
1201 	 * check if the redirect has come from approriate router.
1202 	 *
1203 	 * RFC 4861 specifies that redirects should only be
1204 	 * accepted if they come from the nexthop to the target.
1205 	 * Due to the way the routes are chosen, this notion
1206 	 * is a bit fuzzy and one might need to check all possible
1207 	 * routes.
1208 	 */
1209 
1210 	read_lock_bh(&table->tb6_lock);
1211 	fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
1212 restart:
1213 	for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
1214 		if (rt6_check_expired(rt))
1215 			continue;
1216 		if (rt->dst.error)
1217 			break;
1218 		if (!(rt->rt6i_flags & RTF_GATEWAY))
1219 			continue;
1220 		if (fl6->flowi6_oif != rt->dst.dev->ifindex)
1221 			continue;
1222 		if (!ipv6_addr_equal(&rdfl->gateway, &rt->rt6i_gateway))
1223 			continue;
1224 		break;
1225 	}
1226 
1227 	if (!rt)
1228 		rt = net->ipv6.ip6_null_entry;
1229 	else if (rt->dst.error) {
1230 		rt = net->ipv6.ip6_null_entry;
1231 		goto out;
1232 	}
1233 	BACKTRACK(net, &fl6->saddr);
1234 out:
1235 	dst_hold(&rt->dst);
1236 
1237 	read_unlock_bh(&table->tb6_lock);
1238 
1239 	return rt;
1240 };
1241 
1242 static struct dst_entry *ip6_route_redirect(struct net *net,
1243 					const struct flowi6 *fl6,
1244 					const struct in6_addr *gateway)
1245 {
1246 	int flags = RT6_LOOKUP_F_HAS_SADDR;
1247 	struct ip6rd_flowi rdfl;
1248 
1249 	rdfl.fl6 = *fl6;
1250 	rdfl.gateway = *gateway;
1251 
1252 	return fib6_rule_lookup(net, &rdfl.fl6,
1253 				flags, __ip6_route_redirect);
1254 }
1255 
1256 void ip6_redirect(struct sk_buff *skb, struct net *net, int oif, u32 mark)
1257 {
1258 	const struct ipv6hdr *iph = (struct ipv6hdr *) skb->data;
1259 	struct dst_entry *dst;
1260 	struct flowi6 fl6;
1261 
1262 	memset(&fl6, 0, sizeof(fl6));
1263 	fl6.flowi6_oif = oif;
1264 	fl6.flowi6_mark = mark;
1265 	fl6.daddr = iph->daddr;
1266 	fl6.saddr = iph->saddr;
1267 	fl6.flowlabel = ip6_flowinfo(iph);
1268 
1269 	dst = ip6_route_redirect(net, &fl6, &ipv6_hdr(skb)->saddr);
1270 	rt6_do_redirect(dst, NULL, skb);
1271 	dst_release(dst);
1272 }
1273 EXPORT_SYMBOL_GPL(ip6_redirect);
1274 
1275 void ip6_redirect_no_header(struct sk_buff *skb, struct net *net, int oif,
1276 			    u32 mark)
1277 {
1278 	const struct ipv6hdr *iph = ipv6_hdr(skb);
1279 	const struct rd_msg *msg = (struct rd_msg *)icmp6_hdr(skb);
1280 	struct dst_entry *dst;
1281 	struct flowi6 fl6;
1282 
1283 	memset(&fl6, 0, sizeof(fl6));
1284 	fl6.flowi6_oif = oif;
1285 	fl6.flowi6_mark = mark;
1286 	fl6.daddr = msg->dest;
1287 	fl6.saddr = iph->daddr;
1288 
1289 	dst = ip6_route_redirect(net, &fl6, &iph->saddr);
1290 	rt6_do_redirect(dst, NULL, skb);
1291 	dst_release(dst);
1292 }
1293 
1294 void ip6_sk_redirect(struct sk_buff *skb, struct sock *sk)
1295 {
1296 	ip6_redirect(skb, sock_net(sk), sk->sk_bound_dev_if, sk->sk_mark);
1297 }
1298 EXPORT_SYMBOL_GPL(ip6_sk_redirect);
1299 
1300 static unsigned int ip6_default_advmss(const struct dst_entry *dst)
1301 {
1302 	struct net_device *dev = dst->dev;
1303 	unsigned int mtu = dst_mtu(dst);
1304 	struct net *net = dev_net(dev);
1305 
1306 	mtu -= sizeof(struct ipv6hdr) + sizeof(struct tcphdr);
1307 
1308 	if (mtu < net->ipv6.sysctl.ip6_rt_min_advmss)
1309 		mtu = net->ipv6.sysctl.ip6_rt_min_advmss;
1310 
1311 	/*
1312 	 * Maximal non-jumbo IPv6 payload is IPV6_MAXPLEN and
1313 	 * corresponding MSS is IPV6_MAXPLEN - tcp_header_size.
1314 	 * IPV6_MAXPLEN is also valid and means: "any MSS,
1315 	 * rely only on pmtu discovery"
1316 	 */
1317 	if (mtu > IPV6_MAXPLEN - sizeof(struct tcphdr))
1318 		mtu = IPV6_MAXPLEN;
1319 	return mtu;
1320 }
1321 
1322 static unsigned int ip6_mtu(const struct dst_entry *dst)
1323 {
1324 	struct inet6_dev *idev;
1325 	unsigned int mtu = dst_metric_raw(dst, RTAX_MTU);
1326 
1327 	if (mtu)
1328 		return mtu;
1329 
1330 	mtu = IPV6_MIN_MTU;
1331 
1332 	rcu_read_lock();
1333 	idev = __in6_dev_get(dst->dev);
1334 	if (idev)
1335 		mtu = idev->cnf.mtu6;
1336 	rcu_read_unlock();
1337 
1338 	return mtu;
1339 }
1340 
1341 static struct dst_entry *icmp6_dst_gc_list;
1342 static DEFINE_SPINLOCK(icmp6_dst_lock);
1343 
1344 struct dst_entry *icmp6_dst_alloc(struct net_device *dev,
1345 				  struct flowi6 *fl6)
1346 {
1347 	struct dst_entry *dst;
1348 	struct rt6_info *rt;
1349 	struct inet6_dev *idev = in6_dev_get(dev);
1350 	struct net *net = dev_net(dev);
1351 
1352 	if (unlikely(!idev))
1353 		return ERR_PTR(-ENODEV);
1354 
1355 	rt = ip6_dst_alloc(net, dev, 0, NULL);
1356 	if (unlikely(!rt)) {
1357 		in6_dev_put(idev);
1358 		dst = ERR_PTR(-ENOMEM);
1359 		goto out;
1360 	}
1361 
1362 	rt->dst.flags |= DST_HOST;
1363 	rt->dst.output  = ip6_output;
1364 	atomic_set(&rt->dst.__refcnt, 1);
1365 	rt->rt6i_gateway  = fl6->daddr;
1366 	rt->rt6i_dst.addr = fl6->daddr;
1367 	rt->rt6i_dst.plen = 128;
1368 	rt->rt6i_idev     = idev;
1369 	dst_metric_set(&rt->dst, RTAX_HOPLIMIT, 0);
1370 
1371 	spin_lock_bh(&icmp6_dst_lock);
1372 	rt->dst.next = icmp6_dst_gc_list;
1373 	icmp6_dst_gc_list = &rt->dst;
1374 	spin_unlock_bh(&icmp6_dst_lock);
1375 
1376 	fib6_force_start_gc(net);
1377 
1378 	dst = xfrm_lookup(net, &rt->dst, flowi6_to_flowi(fl6), NULL, 0);
1379 
1380 out:
1381 	return dst;
1382 }
1383 
1384 int icmp6_dst_gc(void)
1385 {
1386 	struct dst_entry *dst, **pprev;
1387 	int more = 0;
1388 
1389 	spin_lock_bh(&icmp6_dst_lock);
1390 	pprev = &icmp6_dst_gc_list;
1391 
1392 	while ((dst = *pprev) != NULL) {
1393 		if (!atomic_read(&dst->__refcnt)) {
1394 			*pprev = dst->next;
1395 			dst_free(dst);
1396 		} else {
1397 			pprev = &dst->next;
1398 			++more;
1399 		}
1400 	}
1401 
1402 	spin_unlock_bh(&icmp6_dst_lock);
1403 
1404 	return more;
1405 }
1406 
1407 static void icmp6_clean_all(int (*func)(struct rt6_info *rt, void *arg),
1408 			    void *arg)
1409 {
1410 	struct dst_entry *dst, **pprev;
1411 
1412 	spin_lock_bh(&icmp6_dst_lock);
1413 	pprev = &icmp6_dst_gc_list;
1414 	while ((dst = *pprev) != NULL) {
1415 		struct rt6_info *rt = (struct rt6_info *) dst;
1416 		if (func(rt, arg)) {
1417 			*pprev = dst->next;
1418 			dst_free(dst);
1419 		} else {
1420 			pprev = &dst->next;
1421 		}
1422 	}
1423 	spin_unlock_bh(&icmp6_dst_lock);
1424 }
1425 
1426 static int ip6_dst_gc(struct dst_ops *ops)
1427 {
1428 	struct net *net = container_of(ops, struct net, ipv6.ip6_dst_ops);
1429 	int rt_min_interval = net->ipv6.sysctl.ip6_rt_gc_min_interval;
1430 	int rt_max_size = net->ipv6.sysctl.ip6_rt_max_size;
1431 	int rt_elasticity = net->ipv6.sysctl.ip6_rt_gc_elasticity;
1432 	int rt_gc_timeout = net->ipv6.sysctl.ip6_rt_gc_timeout;
1433 	unsigned long rt_last_gc = net->ipv6.ip6_rt_last_gc;
1434 	int entries;
1435 
1436 	entries = dst_entries_get_fast(ops);
1437 	if (time_after(rt_last_gc + rt_min_interval, jiffies) &&
1438 	    entries <= rt_max_size)
1439 		goto out;
1440 
1441 	net->ipv6.ip6_rt_gc_expire++;
1442 	fib6_run_gc(net->ipv6.ip6_rt_gc_expire, net, entries > rt_max_size);
1443 	entries = dst_entries_get_slow(ops);
1444 	if (entries < ops->gc_thresh)
1445 		net->ipv6.ip6_rt_gc_expire = rt_gc_timeout>>1;
1446 out:
1447 	net->ipv6.ip6_rt_gc_expire -= net->ipv6.ip6_rt_gc_expire>>rt_elasticity;
1448 	return entries > rt_max_size;
1449 }
1450 
1451 /*
1452  *
1453  */
1454 
1455 int ip6_route_add(struct fib6_config *cfg)
1456 {
1457 	int err;
1458 	struct net *net = cfg->fc_nlinfo.nl_net;
1459 	struct rt6_info *rt = NULL;
1460 	struct net_device *dev = NULL;
1461 	struct inet6_dev *idev = NULL;
1462 	struct fib6_table *table;
1463 	int addr_type;
1464 
1465 	if (cfg->fc_dst_len > 128 || cfg->fc_src_len > 128)
1466 		return -EINVAL;
1467 #ifndef CONFIG_IPV6_SUBTREES
1468 	if (cfg->fc_src_len)
1469 		return -EINVAL;
1470 #endif
1471 	if (cfg->fc_ifindex) {
1472 		err = -ENODEV;
1473 		dev = dev_get_by_index(net, cfg->fc_ifindex);
1474 		if (!dev)
1475 			goto out;
1476 		idev = in6_dev_get(dev);
1477 		if (!idev)
1478 			goto out;
1479 	}
1480 
1481 	if (cfg->fc_metric == 0)
1482 		cfg->fc_metric = IP6_RT_PRIO_USER;
1483 
1484 	err = -ENOBUFS;
1485 	if (cfg->fc_nlinfo.nlh &&
1486 	    !(cfg->fc_nlinfo.nlh->nlmsg_flags & NLM_F_CREATE)) {
1487 		table = fib6_get_table(net, cfg->fc_table);
1488 		if (!table) {
1489 			pr_warn("NLM_F_CREATE should be specified when creating new route\n");
1490 			table = fib6_new_table(net, cfg->fc_table);
1491 		}
1492 	} else {
1493 		table = fib6_new_table(net, cfg->fc_table);
1494 	}
1495 
1496 	if (!table)
1497 		goto out;
1498 
1499 	rt = ip6_dst_alloc(net, NULL, DST_NOCOUNT, table);
1500 
1501 	if (!rt) {
1502 		err = -ENOMEM;
1503 		goto out;
1504 	}
1505 
1506 	if (cfg->fc_flags & RTF_EXPIRES)
1507 		rt6_set_expires(rt, jiffies +
1508 				clock_t_to_jiffies(cfg->fc_expires));
1509 	else
1510 		rt6_clean_expires(rt);
1511 
1512 	if (cfg->fc_protocol == RTPROT_UNSPEC)
1513 		cfg->fc_protocol = RTPROT_BOOT;
1514 	rt->rt6i_protocol = cfg->fc_protocol;
1515 
1516 	addr_type = ipv6_addr_type(&cfg->fc_dst);
1517 
1518 	if (addr_type & IPV6_ADDR_MULTICAST)
1519 		rt->dst.input = ip6_mc_input;
1520 	else if (cfg->fc_flags & RTF_LOCAL)
1521 		rt->dst.input = ip6_input;
1522 	else
1523 		rt->dst.input = ip6_forward;
1524 
1525 	rt->dst.output = ip6_output;
1526 
1527 	ipv6_addr_prefix(&rt->rt6i_dst.addr, &cfg->fc_dst, cfg->fc_dst_len);
1528 	rt->rt6i_dst.plen = cfg->fc_dst_len;
1529 	if (rt->rt6i_dst.plen == 128)
1530 	       rt->dst.flags |= DST_HOST;
1531 
1532 	if (!(rt->dst.flags & DST_HOST) && cfg->fc_mx) {
1533 		u32 *metrics = kzalloc(sizeof(u32) * RTAX_MAX, GFP_KERNEL);
1534 		if (!metrics) {
1535 			err = -ENOMEM;
1536 			goto out;
1537 		}
1538 		dst_init_metrics(&rt->dst, metrics, 0);
1539 	}
1540 #ifdef CONFIG_IPV6_SUBTREES
1541 	ipv6_addr_prefix(&rt->rt6i_src.addr, &cfg->fc_src, cfg->fc_src_len);
1542 	rt->rt6i_src.plen = cfg->fc_src_len;
1543 #endif
1544 
1545 	rt->rt6i_metric = cfg->fc_metric;
1546 
1547 	/* We cannot add true routes via loopback here,
1548 	   they would result in kernel looping; promote them to reject routes
1549 	 */
1550 	if ((cfg->fc_flags & RTF_REJECT) ||
1551 	    (dev && (dev->flags & IFF_LOOPBACK) &&
1552 	     !(addr_type & IPV6_ADDR_LOOPBACK) &&
1553 	     !(cfg->fc_flags & RTF_LOCAL))) {
1554 		/* hold loopback dev/idev if we haven't done so. */
1555 		if (dev != net->loopback_dev) {
1556 			if (dev) {
1557 				dev_put(dev);
1558 				in6_dev_put(idev);
1559 			}
1560 			dev = net->loopback_dev;
1561 			dev_hold(dev);
1562 			idev = in6_dev_get(dev);
1563 			if (!idev) {
1564 				err = -ENODEV;
1565 				goto out;
1566 			}
1567 		}
1568 		rt->dst.output = ip6_pkt_discard_out;
1569 		rt->dst.input = ip6_pkt_discard;
1570 		rt->rt6i_flags = RTF_REJECT|RTF_NONEXTHOP;
1571 		switch (cfg->fc_type) {
1572 		case RTN_BLACKHOLE:
1573 			rt->dst.error = -EINVAL;
1574 			break;
1575 		case RTN_PROHIBIT:
1576 			rt->dst.error = -EACCES;
1577 			break;
1578 		case RTN_THROW:
1579 			rt->dst.error = -EAGAIN;
1580 			break;
1581 		default:
1582 			rt->dst.error = -ENETUNREACH;
1583 			break;
1584 		}
1585 		goto install_route;
1586 	}
1587 
1588 	if (cfg->fc_flags & RTF_GATEWAY) {
1589 		const struct in6_addr *gw_addr;
1590 		int gwa_type;
1591 
1592 		gw_addr = &cfg->fc_gateway;
1593 		rt->rt6i_gateway = *gw_addr;
1594 		gwa_type = ipv6_addr_type(gw_addr);
1595 
1596 		if (gwa_type != (IPV6_ADDR_LINKLOCAL|IPV6_ADDR_UNICAST)) {
1597 			struct rt6_info *grt;
1598 
1599 			/* IPv6 strictly inhibits using not link-local
1600 			   addresses as nexthop address.
1601 			   Otherwise, router will not able to send redirects.
1602 			   It is very good, but in some (rare!) circumstances
1603 			   (SIT, PtP, NBMA NOARP links) it is handy to allow
1604 			   some exceptions. --ANK
1605 			 */
1606 			err = -EINVAL;
1607 			if (!(gwa_type & IPV6_ADDR_UNICAST))
1608 				goto out;
1609 
1610 			grt = rt6_lookup(net, gw_addr, NULL, cfg->fc_ifindex, 1);
1611 
1612 			err = -EHOSTUNREACH;
1613 			if (!grt)
1614 				goto out;
1615 			if (dev) {
1616 				if (dev != grt->dst.dev) {
1617 					ip6_rt_put(grt);
1618 					goto out;
1619 				}
1620 			} else {
1621 				dev = grt->dst.dev;
1622 				idev = grt->rt6i_idev;
1623 				dev_hold(dev);
1624 				in6_dev_hold(grt->rt6i_idev);
1625 			}
1626 			if (!(grt->rt6i_flags & RTF_GATEWAY))
1627 				err = 0;
1628 			ip6_rt_put(grt);
1629 
1630 			if (err)
1631 				goto out;
1632 		}
1633 		err = -EINVAL;
1634 		if (!dev || (dev->flags & IFF_LOOPBACK))
1635 			goto out;
1636 	}
1637 
1638 	err = -ENODEV;
1639 	if (!dev)
1640 		goto out;
1641 
1642 	if (!ipv6_addr_any(&cfg->fc_prefsrc)) {
1643 		if (!ipv6_chk_addr(net, &cfg->fc_prefsrc, dev, 0)) {
1644 			err = -EINVAL;
1645 			goto out;
1646 		}
1647 		rt->rt6i_prefsrc.addr = cfg->fc_prefsrc;
1648 		rt->rt6i_prefsrc.plen = 128;
1649 	} else
1650 		rt->rt6i_prefsrc.plen = 0;
1651 
1652 	rt->rt6i_flags = cfg->fc_flags;
1653 
1654 install_route:
1655 	if (cfg->fc_mx) {
1656 		struct nlattr *nla;
1657 		int remaining;
1658 
1659 		nla_for_each_attr(nla, cfg->fc_mx, cfg->fc_mx_len, remaining) {
1660 			int type = nla_type(nla);
1661 
1662 			if (type) {
1663 				if (type > RTAX_MAX) {
1664 					err = -EINVAL;
1665 					goto out;
1666 				}
1667 
1668 				dst_metric_set(&rt->dst, type, nla_get_u32(nla));
1669 			}
1670 		}
1671 	}
1672 
1673 	rt->dst.dev = dev;
1674 	rt->rt6i_idev = idev;
1675 	rt->rt6i_table = table;
1676 
1677 	cfg->fc_nlinfo.nl_net = dev_net(dev);
1678 
1679 	return __ip6_ins_rt(rt, &cfg->fc_nlinfo);
1680 
1681 out:
1682 	if (dev)
1683 		dev_put(dev);
1684 	if (idev)
1685 		in6_dev_put(idev);
1686 	if (rt)
1687 		dst_free(&rt->dst);
1688 	return err;
1689 }
1690 
1691 static int __ip6_del_rt(struct rt6_info *rt, struct nl_info *info)
1692 {
1693 	int err;
1694 	struct fib6_table *table;
1695 	struct net *net = dev_net(rt->dst.dev);
1696 
1697 	if (rt == net->ipv6.ip6_null_entry) {
1698 		err = -ENOENT;
1699 		goto out;
1700 	}
1701 
1702 	table = rt->rt6i_table;
1703 	write_lock_bh(&table->tb6_lock);
1704 	err = fib6_del(rt, info);
1705 	write_unlock_bh(&table->tb6_lock);
1706 
1707 out:
1708 	ip6_rt_put(rt);
1709 	return err;
1710 }
1711 
1712 int ip6_del_rt(struct rt6_info *rt)
1713 {
1714 	struct nl_info info = {
1715 		.nl_net = dev_net(rt->dst.dev),
1716 	};
1717 	return __ip6_del_rt(rt, &info);
1718 }
1719 
1720 static int ip6_route_del(struct fib6_config *cfg)
1721 {
1722 	struct fib6_table *table;
1723 	struct fib6_node *fn;
1724 	struct rt6_info *rt;
1725 	int err = -ESRCH;
1726 
1727 	table = fib6_get_table(cfg->fc_nlinfo.nl_net, cfg->fc_table);
1728 	if (!table)
1729 		return err;
1730 
1731 	read_lock_bh(&table->tb6_lock);
1732 
1733 	fn = fib6_locate(&table->tb6_root,
1734 			 &cfg->fc_dst, cfg->fc_dst_len,
1735 			 &cfg->fc_src, cfg->fc_src_len);
1736 
1737 	if (fn) {
1738 		for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
1739 			if (cfg->fc_ifindex &&
1740 			    (!rt->dst.dev ||
1741 			     rt->dst.dev->ifindex != cfg->fc_ifindex))
1742 				continue;
1743 			if (cfg->fc_flags & RTF_GATEWAY &&
1744 			    !ipv6_addr_equal(&cfg->fc_gateway, &rt->rt6i_gateway))
1745 				continue;
1746 			if (cfg->fc_metric && cfg->fc_metric != rt->rt6i_metric)
1747 				continue;
1748 			dst_hold(&rt->dst);
1749 			read_unlock_bh(&table->tb6_lock);
1750 
1751 			return __ip6_del_rt(rt, &cfg->fc_nlinfo);
1752 		}
1753 	}
1754 	read_unlock_bh(&table->tb6_lock);
1755 
1756 	return err;
1757 }
1758 
1759 static void rt6_do_redirect(struct dst_entry *dst, struct sock *sk, struct sk_buff *skb)
1760 {
1761 	struct net *net = dev_net(skb->dev);
1762 	struct netevent_redirect netevent;
1763 	struct rt6_info *rt, *nrt = NULL;
1764 	struct ndisc_options ndopts;
1765 	struct inet6_dev *in6_dev;
1766 	struct neighbour *neigh;
1767 	struct rd_msg *msg;
1768 	int optlen, on_link;
1769 	u8 *lladdr;
1770 
1771 	optlen = skb_tail_pointer(skb) - skb_transport_header(skb);
1772 	optlen -= sizeof(*msg);
1773 
1774 	if (optlen < 0) {
1775 		net_dbg_ratelimited("rt6_do_redirect: packet too short\n");
1776 		return;
1777 	}
1778 
1779 	msg = (struct rd_msg *)icmp6_hdr(skb);
1780 
1781 	if (ipv6_addr_is_multicast(&msg->dest)) {
1782 		net_dbg_ratelimited("rt6_do_redirect: destination address is multicast\n");
1783 		return;
1784 	}
1785 
1786 	on_link = 0;
1787 	if (ipv6_addr_equal(&msg->dest, &msg->target)) {
1788 		on_link = 1;
1789 	} else if (ipv6_addr_type(&msg->target) !=
1790 		   (IPV6_ADDR_UNICAST|IPV6_ADDR_LINKLOCAL)) {
1791 		net_dbg_ratelimited("rt6_do_redirect: target address is not link-local unicast\n");
1792 		return;
1793 	}
1794 
1795 	in6_dev = __in6_dev_get(skb->dev);
1796 	if (!in6_dev)
1797 		return;
1798 	if (in6_dev->cnf.forwarding || !in6_dev->cnf.accept_redirects)
1799 		return;
1800 
1801 	/* RFC2461 8.1:
1802 	 *	The IP source address of the Redirect MUST be the same as the current
1803 	 *	first-hop router for the specified ICMP Destination Address.
1804 	 */
1805 
1806 	if (!ndisc_parse_options(msg->opt, optlen, &ndopts)) {
1807 		net_dbg_ratelimited("rt6_redirect: invalid ND options\n");
1808 		return;
1809 	}
1810 
1811 	lladdr = NULL;
1812 	if (ndopts.nd_opts_tgt_lladdr) {
1813 		lladdr = ndisc_opt_addr_data(ndopts.nd_opts_tgt_lladdr,
1814 					     skb->dev);
1815 		if (!lladdr) {
1816 			net_dbg_ratelimited("rt6_redirect: invalid link-layer address length\n");
1817 			return;
1818 		}
1819 	}
1820 
1821 	rt = (struct rt6_info *) dst;
1822 	if (rt == net->ipv6.ip6_null_entry) {
1823 		net_dbg_ratelimited("rt6_redirect: source isn't a valid nexthop for redirect target\n");
1824 		return;
1825 	}
1826 
1827 	/* Redirect received -> path was valid.
1828 	 * Look, redirects are sent only in response to data packets,
1829 	 * so that this nexthop apparently is reachable. --ANK
1830 	 */
1831 	dst_confirm(&rt->dst);
1832 
1833 	neigh = __neigh_lookup(&nd_tbl, &msg->target, skb->dev, 1);
1834 	if (!neigh)
1835 		return;
1836 
1837 	/*
1838 	 *	We have finally decided to accept it.
1839 	 */
1840 
1841 	neigh_update(neigh, lladdr, NUD_STALE,
1842 		     NEIGH_UPDATE_F_WEAK_OVERRIDE|
1843 		     NEIGH_UPDATE_F_OVERRIDE|
1844 		     (on_link ? 0 : (NEIGH_UPDATE_F_OVERRIDE_ISROUTER|
1845 				     NEIGH_UPDATE_F_ISROUTER))
1846 		     );
1847 
1848 	nrt = ip6_rt_copy(rt, &msg->dest);
1849 	if (!nrt)
1850 		goto out;
1851 
1852 	nrt->rt6i_flags = RTF_GATEWAY|RTF_UP|RTF_DYNAMIC|RTF_CACHE;
1853 	if (on_link)
1854 		nrt->rt6i_flags &= ~RTF_GATEWAY;
1855 
1856 	nrt->rt6i_gateway = *(struct in6_addr *)neigh->primary_key;
1857 
1858 	if (ip6_ins_rt(nrt))
1859 		goto out;
1860 
1861 	netevent.old = &rt->dst;
1862 	netevent.new = &nrt->dst;
1863 	netevent.daddr = &msg->dest;
1864 	netevent.neigh = neigh;
1865 	call_netevent_notifiers(NETEVENT_REDIRECT, &netevent);
1866 
1867 	if (rt->rt6i_flags & RTF_CACHE) {
1868 		rt = (struct rt6_info *) dst_clone(&rt->dst);
1869 		ip6_del_rt(rt);
1870 	}
1871 
1872 out:
1873 	neigh_release(neigh);
1874 }
1875 
1876 /*
1877  *	Misc support functions
1878  */
1879 
1880 static struct rt6_info *ip6_rt_copy(struct rt6_info *ort,
1881 				    const struct in6_addr *dest)
1882 {
1883 	struct net *net = dev_net(ort->dst.dev);
1884 	struct rt6_info *rt = ip6_dst_alloc(net, ort->dst.dev, 0,
1885 					    ort->rt6i_table);
1886 
1887 	if (rt) {
1888 		rt->dst.input = ort->dst.input;
1889 		rt->dst.output = ort->dst.output;
1890 		rt->dst.flags |= DST_HOST;
1891 
1892 		rt->rt6i_dst.addr = *dest;
1893 		rt->rt6i_dst.plen = 128;
1894 		dst_copy_metrics(&rt->dst, &ort->dst);
1895 		rt->dst.error = ort->dst.error;
1896 		rt->rt6i_idev = ort->rt6i_idev;
1897 		if (rt->rt6i_idev)
1898 			in6_dev_hold(rt->rt6i_idev);
1899 		rt->dst.lastuse = jiffies;
1900 
1901 		if (ort->rt6i_flags & RTF_GATEWAY)
1902 			rt->rt6i_gateway = ort->rt6i_gateway;
1903 		else
1904 			rt->rt6i_gateway = *dest;
1905 		rt->rt6i_flags = ort->rt6i_flags;
1906 		if ((ort->rt6i_flags & (RTF_DEFAULT | RTF_ADDRCONF)) ==
1907 		    (RTF_DEFAULT | RTF_ADDRCONF))
1908 			rt6_set_from(rt, ort);
1909 		rt->rt6i_metric = 0;
1910 
1911 #ifdef CONFIG_IPV6_SUBTREES
1912 		memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
1913 #endif
1914 		memcpy(&rt->rt6i_prefsrc, &ort->rt6i_prefsrc, sizeof(struct rt6key));
1915 		rt->rt6i_table = ort->rt6i_table;
1916 	}
1917 	return rt;
1918 }
1919 
1920 #ifdef CONFIG_IPV6_ROUTE_INFO
1921 static struct rt6_info *rt6_get_route_info(struct net *net,
1922 					   const struct in6_addr *prefix, int prefixlen,
1923 					   const struct in6_addr *gwaddr, int ifindex)
1924 {
1925 	struct fib6_node *fn;
1926 	struct rt6_info *rt = NULL;
1927 	struct fib6_table *table;
1928 
1929 	table = fib6_get_table(net, RT6_TABLE_INFO);
1930 	if (!table)
1931 		return NULL;
1932 
1933 	read_lock_bh(&table->tb6_lock);
1934 	fn = fib6_locate(&table->tb6_root, prefix ,prefixlen, NULL, 0);
1935 	if (!fn)
1936 		goto out;
1937 
1938 	for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
1939 		if (rt->dst.dev->ifindex != ifindex)
1940 			continue;
1941 		if ((rt->rt6i_flags & (RTF_ROUTEINFO|RTF_GATEWAY)) != (RTF_ROUTEINFO|RTF_GATEWAY))
1942 			continue;
1943 		if (!ipv6_addr_equal(&rt->rt6i_gateway, gwaddr))
1944 			continue;
1945 		dst_hold(&rt->dst);
1946 		break;
1947 	}
1948 out:
1949 	read_unlock_bh(&table->tb6_lock);
1950 	return rt;
1951 }
1952 
1953 static struct rt6_info *rt6_add_route_info(struct net *net,
1954 					   const struct in6_addr *prefix, int prefixlen,
1955 					   const struct in6_addr *gwaddr, int ifindex,
1956 					   unsigned int pref)
1957 {
1958 	struct fib6_config cfg = {
1959 		.fc_table	= RT6_TABLE_INFO,
1960 		.fc_metric	= IP6_RT_PRIO_USER,
1961 		.fc_ifindex	= ifindex,
1962 		.fc_dst_len	= prefixlen,
1963 		.fc_flags	= RTF_GATEWAY | RTF_ADDRCONF | RTF_ROUTEINFO |
1964 				  RTF_UP | RTF_PREF(pref),
1965 		.fc_nlinfo.portid = 0,
1966 		.fc_nlinfo.nlh = NULL,
1967 		.fc_nlinfo.nl_net = net,
1968 	};
1969 
1970 	cfg.fc_dst = *prefix;
1971 	cfg.fc_gateway = *gwaddr;
1972 
1973 	/* We should treat it as a default route if prefix length is 0. */
1974 	if (!prefixlen)
1975 		cfg.fc_flags |= RTF_DEFAULT;
1976 
1977 	ip6_route_add(&cfg);
1978 
1979 	return rt6_get_route_info(net, prefix, prefixlen, gwaddr, ifindex);
1980 }
1981 #endif
1982 
1983 struct rt6_info *rt6_get_dflt_router(const struct in6_addr *addr, struct net_device *dev)
1984 {
1985 	struct rt6_info *rt;
1986 	struct fib6_table *table;
1987 
1988 	table = fib6_get_table(dev_net(dev), RT6_TABLE_DFLT);
1989 	if (!table)
1990 		return NULL;
1991 
1992 	read_lock_bh(&table->tb6_lock);
1993 	for (rt = table->tb6_root.leaf; rt; rt=rt->dst.rt6_next) {
1994 		if (dev == rt->dst.dev &&
1995 		    ((rt->rt6i_flags & (RTF_ADDRCONF | RTF_DEFAULT)) == (RTF_ADDRCONF | RTF_DEFAULT)) &&
1996 		    ipv6_addr_equal(&rt->rt6i_gateway, addr))
1997 			break;
1998 	}
1999 	if (rt)
2000 		dst_hold(&rt->dst);
2001 	read_unlock_bh(&table->tb6_lock);
2002 	return rt;
2003 }
2004 
2005 struct rt6_info *rt6_add_dflt_router(const struct in6_addr *gwaddr,
2006 				     struct net_device *dev,
2007 				     unsigned int pref)
2008 {
2009 	struct fib6_config cfg = {
2010 		.fc_table	= RT6_TABLE_DFLT,
2011 		.fc_metric	= IP6_RT_PRIO_USER,
2012 		.fc_ifindex	= dev->ifindex,
2013 		.fc_flags	= RTF_GATEWAY | RTF_ADDRCONF | RTF_DEFAULT |
2014 				  RTF_UP | RTF_EXPIRES | RTF_PREF(pref),
2015 		.fc_nlinfo.portid = 0,
2016 		.fc_nlinfo.nlh = NULL,
2017 		.fc_nlinfo.nl_net = dev_net(dev),
2018 	};
2019 
2020 	cfg.fc_gateway = *gwaddr;
2021 
2022 	ip6_route_add(&cfg);
2023 
2024 	return rt6_get_dflt_router(gwaddr, dev);
2025 }
2026 
2027 void rt6_purge_dflt_routers(struct net *net)
2028 {
2029 	struct rt6_info *rt;
2030 	struct fib6_table *table;
2031 
2032 	/* NOTE: Keep consistent with rt6_get_dflt_router */
2033 	table = fib6_get_table(net, RT6_TABLE_DFLT);
2034 	if (!table)
2035 		return;
2036 
2037 restart:
2038 	read_lock_bh(&table->tb6_lock);
2039 	for (rt = table->tb6_root.leaf; rt; rt = rt->dst.rt6_next) {
2040 		if (rt->rt6i_flags & (RTF_DEFAULT | RTF_ADDRCONF) &&
2041 		    (!rt->rt6i_idev || rt->rt6i_idev->cnf.accept_ra != 2)) {
2042 			dst_hold(&rt->dst);
2043 			read_unlock_bh(&table->tb6_lock);
2044 			ip6_del_rt(rt);
2045 			goto restart;
2046 		}
2047 	}
2048 	read_unlock_bh(&table->tb6_lock);
2049 }
2050 
2051 static void rtmsg_to_fib6_config(struct net *net,
2052 				 struct in6_rtmsg *rtmsg,
2053 				 struct fib6_config *cfg)
2054 {
2055 	memset(cfg, 0, sizeof(*cfg));
2056 
2057 	cfg->fc_table = RT6_TABLE_MAIN;
2058 	cfg->fc_ifindex = rtmsg->rtmsg_ifindex;
2059 	cfg->fc_metric = rtmsg->rtmsg_metric;
2060 	cfg->fc_expires = rtmsg->rtmsg_info;
2061 	cfg->fc_dst_len = rtmsg->rtmsg_dst_len;
2062 	cfg->fc_src_len = rtmsg->rtmsg_src_len;
2063 	cfg->fc_flags = rtmsg->rtmsg_flags;
2064 
2065 	cfg->fc_nlinfo.nl_net = net;
2066 
2067 	cfg->fc_dst = rtmsg->rtmsg_dst;
2068 	cfg->fc_src = rtmsg->rtmsg_src;
2069 	cfg->fc_gateway = rtmsg->rtmsg_gateway;
2070 }
2071 
2072 int ipv6_route_ioctl(struct net *net, unsigned int cmd, void __user *arg)
2073 {
2074 	struct fib6_config cfg;
2075 	struct in6_rtmsg rtmsg;
2076 	int err;
2077 
2078 	switch(cmd) {
2079 	case SIOCADDRT:		/* Add a route */
2080 	case SIOCDELRT:		/* Delete a route */
2081 		if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
2082 			return -EPERM;
2083 		err = copy_from_user(&rtmsg, arg,
2084 				     sizeof(struct in6_rtmsg));
2085 		if (err)
2086 			return -EFAULT;
2087 
2088 		rtmsg_to_fib6_config(net, &rtmsg, &cfg);
2089 
2090 		rtnl_lock();
2091 		switch (cmd) {
2092 		case SIOCADDRT:
2093 			err = ip6_route_add(&cfg);
2094 			break;
2095 		case SIOCDELRT:
2096 			err = ip6_route_del(&cfg);
2097 			break;
2098 		default:
2099 			err = -EINVAL;
2100 		}
2101 		rtnl_unlock();
2102 
2103 		return err;
2104 	}
2105 
2106 	return -EINVAL;
2107 }
2108 
2109 /*
2110  *	Drop the packet on the floor
2111  */
2112 
2113 static int ip6_pkt_drop(struct sk_buff *skb, u8 code, int ipstats_mib_noroutes)
2114 {
2115 	int type;
2116 	struct dst_entry *dst = skb_dst(skb);
2117 	switch (ipstats_mib_noroutes) {
2118 	case IPSTATS_MIB_INNOROUTES:
2119 		type = ipv6_addr_type(&ipv6_hdr(skb)->daddr);
2120 		if (type == IPV6_ADDR_ANY) {
2121 			IP6_INC_STATS(dev_net(dst->dev), ip6_dst_idev(dst),
2122 				      IPSTATS_MIB_INADDRERRORS);
2123 			break;
2124 		}
2125 		/* FALLTHROUGH */
2126 	case IPSTATS_MIB_OUTNOROUTES:
2127 		IP6_INC_STATS(dev_net(dst->dev), ip6_dst_idev(dst),
2128 			      ipstats_mib_noroutes);
2129 		break;
2130 	}
2131 	icmpv6_send(skb, ICMPV6_DEST_UNREACH, code, 0);
2132 	kfree_skb(skb);
2133 	return 0;
2134 }
2135 
2136 static int ip6_pkt_discard(struct sk_buff *skb)
2137 {
2138 	return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_INNOROUTES);
2139 }
2140 
2141 static int ip6_pkt_discard_out(struct sk_buff *skb)
2142 {
2143 	skb->dev = skb_dst(skb)->dev;
2144 	return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_OUTNOROUTES);
2145 }
2146 
2147 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
2148 
2149 static int ip6_pkt_prohibit(struct sk_buff *skb)
2150 {
2151 	return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_INNOROUTES);
2152 }
2153 
2154 static int ip6_pkt_prohibit_out(struct sk_buff *skb)
2155 {
2156 	skb->dev = skb_dst(skb)->dev;
2157 	return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_OUTNOROUTES);
2158 }
2159 
2160 #endif
2161 
2162 /*
2163  *	Allocate a dst for local (unicast / anycast) address.
2164  */
2165 
2166 struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev,
2167 				    const struct in6_addr *addr,
2168 				    bool anycast)
2169 {
2170 	struct net *net = dev_net(idev->dev);
2171 	struct rt6_info *rt = ip6_dst_alloc(net, net->loopback_dev, 0, NULL);
2172 
2173 	if (!rt) {
2174 		net_warn_ratelimited("Maximum number of routes reached, consider increasing route/max_size\n");
2175 		return ERR_PTR(-ENOMEM);
2176 	}
2177 
2178 	in6_dev_hold(idev);
2179 
2180 	rt->dst.flags |= DST_HOST;
2181 	rt->dst.input = ip6_input;
2182 	rt->dst.output = ip6_output;
2183 	rt->rt6i_idev = idev;
2184 
2185 	rt->rt6i_flags = RTF_UP | RTF_NONEXTHOP;
2186 	if (anycast)
2187 		rt->rt6i_flags |= RTF_ANYCAST;
2188 	else
2189 		rt->rt6i_flags |= RTF_LOCAL;
2190 
2191 	rt->rt6i_gateway  = *addr;
2192 	rt->rt6i_dst.addr = *addr;
2193 	rt->rt6i_dst.plen = 128;
2194 	rt->rt6i_table = fib6_get_table(net, RT6_TABLE_LOCAL);
2195 
2196 	atomic_set(&rt->dst.__refcnt, 1);
2197 
2198 	return rt;
2199 }
2200 
2201 int ip6_route_get_saddr(struct net *net,
2202 			struct rt6_info *rt,
2203 			const struct in6_addr *daddr,
2204 			unsigned int prefs,
2205 			struct in6_addr *saddr)
2206 {
2207 	struct inet6_dev *idev = ip6_dst_idev((struct dst_entry*)rt);
2208 	int err = 0;
2209 	if (rt->rt6i_prefsrc.plen)
2210 		*saddr = rt->rt6i_prefsrc.addr;
2211 	else
2212 		err = ipv6_dev_get_saddr(net, idev ? idev->dev : NULL,
2213 					 daddr, prefs, saddr);
2214 	return err;
2215 }
2216 
2217 /* remove deleted ip from prefsrc entries */
2218 struct arg_dev_net_ip {
2219 	struct net_device *dev;
2220 	struct net *net;
2221 	struct in6_addr *addr;
2222 };
2223 
2224 static int fib6_remove_prefsrc(struct rt6_info *rt, void *arg)
2225 {
2226 	struct net_device *dev = ((struct arg_dev_net_ip *)arg)->dev;
2227 	struct net *net = ((struct arg_dev_net_ip *)arg)->net;
2228 	struct in6_addr *addr = ((struct arg_dev_net_ip *)arg)->addr;
2229 
2230 	if (((void *)rt->dst.dev == dev || !dev) &&
2231 	    rt != net->ipv6.ip6_null_entry &&
2232 	    ipv6_addr_equal(addr, &rt->rt6i_prefsrc.addr)) {
2233 		/* remove prefsrc entry */
2234 		rt->rt6i_prefsrc.plen = 0;
2235 	}
2236 	return 0;
2237 }
2238 
2239 void rt6_remove_prefsrc(struct inet6_ifaddr *ifp)
2240 {
2241 	struct net *net = dev_net(ifp->idev->dev);
2242 	struct arg_dev_net_ip adni = {
2243 		.dev = ifp->idev->dev,
2244 		.net = net,
2245 		.addr = &ifp->addr,
2246 	};
2247 	fib6_clean_all(net, fib6_remove_prefsrc, 0, &adni);
2248 }
2249 
2250 struct arg_dev_net {
2251 	struct net_device *dev;
2252 	struct net *net;
2253 };
2254 
2255 static int fib6_ifdown(struct rt6_info *rt, void *arg)
2256 {
2257 	const struct arg_dev_net *adn = arg;
2258 	const struct net_device *dev = adn->dev;
2259 
2260 	if ((rt->dst.dev == dev || !dev) &&
2261 	    rt != adn->net->ipv6.ip6_null_entry)
2262 		return -1;
2263 
2264 	return 0;
2265 }
2266 
2267 void rt6_ifdown(struct net *net, struct net_device *dev)
2268 {
2269 	struct arg_dev_net adn = {
2270 		.dev = dev,
2271 		.net = net,
2272 	};
2273 
2274 	fib6_clean_all(net, fib6_ifdown, 0, &adn);
2275 	icmp6_clean_all(fib6_ifdown, &adn);
2276 }
2277 
2278 struct rt6_mtu_change_arg {
2279 	struct net_device *dev;
2280 	unsigned int mtu;
2281 };
2282 
2283 static int rt6_mtu_change_route(struct rt6_info *rt, void *p_arg)
2284 {
2285 	struct rt6_mtu_change_arg *arg = (struct rt6_mtu_change_arg *) p_arg;
2286 	struct inet6_dev *idev;
2287 
2288 	/* In IPv6 pmtu discovery is not optional,
2289 	   so that RTAX_MTU lock cannot disable it.
2290 	   We still use this lock to block changes
2291 	   caused by addrconf/ndisc.
2292 	*/
2293 
2294 	idev = __in6_dev_get(arg->dev);
2295 	if (!idev)
2296 		return 0;
2297 
2298 	/* For administrative MTU increase, there is no way to discover
2299 	   IPv6 PMTU increase, so PMTU increase should be updated here.
2300 	   Since RFC 1981 doesn't include administrative MTU increase
2301 	   update PMTU increase is a MUST. (i.e. jumbo frame)
2302 	 */
2303 	/*
2304 	   If new MTU is less than route PMTU, this new MTU will be the
2305 	   lowest MTU in the path, update the route PMTU to reflect PMTU
2306 	   decreases; if new MTU is greater than route PMTU, and the
2307 	   old MTU is the lowest MTU in the path, update the route PMTU
2308 	   to reflect the increase. In this case if the other nodes' MTU
2309 	   also have the lowest MTU, TOO BIG MESSAGE will be lead to
2310 	   PMTU discouvery.
2311 	 */
2312 	if (rt->dst.dev == arg->dev &&
2313 	    !dst_metric_locked(&rt->dst, RTAX_MTU) &&
2314 	    (dst_mtu(&rt->dst) >= arg->mtu ||
2315 	     (dst_mtu(&rt->dst) < arg->mtu &&
2316 	      dst_mtu(&rt->dst) == idev->cnf.mtu6))) {
2317 		dst_metric_set(&rt->dst, RTAX_MTU, arg->mtu);
2318 	}
2319 	return 0;
2320 }
2321 
2322 void rt6_mtu_change(struct net_device *dev, unsigned int mtu)
2323 {
2324 	struct rt6_mtu_change_arg arg = {
2325 		.dev = dev,
2326 		.mtu = mtu,
2327 	};
2328 
2329 	fib6_clean_all(dev_net(dev), rt6_mtu_change_route, 0, &arg);
2330 }
2331 
2332 static const struct nla_policy rtm_ipv6_policy[RTA_MAX+1] = {
2333 	[RTA_GATEWAY]           = { .len = sizeof(struct in6_addr) },
2334 	[RTA_OIF]               = { .type = NLA_U32 },
2335 	[RTA_IIF]		= { .type = NLA_U32 },
2336 	[RTA_PRIORITY]          = { .type = NLA_U32 },
2337 	[RTA_METRICS]           = { .type = NLA_NESTED },
2338 	[RTA_MULTIPATH]		= { .len = sizeof(struct rtnexthop) },
2339 };
2340 
2341 static int rtm_to_fib6_config(struct sk_buff *skb, struct nlmsghdr *nlh,
2342 			      struct fib6_config *cfg)
2343 {
2344 	struct rtmsg *rtm;
2345 	struct nlattr *tb[RTA_MAX+1];
2346 	int err;
2347 
2348 	err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy);
2349 	if (err < 0)
2350 		goto errout;
2351 
2352 	err = -EINVAL;
2353 	rtm = nlmsg_data(nlh);
2354 	memset(cfg, 0, sizeof(*cfg));
2355 
2356 	cfg->fc_table = rtm->rtm_table;
2357 	cfg->fc_dst_len = rtm->rtm_dst_len;
2358 	cfg->fc_src_len = rtm->rtm_src_len;
2359 	cfg->fc_flags = RTF_UP;
2360 	cfg->fc_protocol = rtm->rtm_protocol;
2361 	cfg->fc_type = rtm->rtm_type;
2362 
2363 	if (rtm->rtm_type == RTN_UNREACHABLE ||
2364 	    rtm->rtm_type == RTN_BLACKHOLE ||
2365 	    rtm->rtm_type == RTN_PROHIBIT ||
2366 	    rtm->rtm_type == RTN_THROW)
2367 		cfg->fc_flags |= RTF_REJECT;
2368 
2369 	if (rtm->rtm_type == RTN_LOCAL)
2370 		cfg->fc_flags |= RTF_LOCAL;
2371 
2372 	cfg->fc_nlinfo.portid = NETLINK_CB(skb).portid;
2373 	cfg->fc_nlinfo.nlh = nlh;
2374 	cfg->fc_nlinfo.nl_net = sock_net(skb->sk);
2375 
2376 	if (tb[RTA_GATEWAY]) {
2377 		nla_memcpy(&cfg->fc_gateway, tb[RTA_GATEWAY], 16);
2378 		cfg->fc_flags |= RTF_GATEWAY;
2379 	}
2380 
2381 	if (tb[RTA_DST]) {
2382 		int plen = (rtm->rtm_dst_len + 7) >> 3;
2383 
2384 		if (nla_len(tb[RTA_DST]) < plen)
2385 			goto errout;
2386 
2387 		nla_memcpy(&cfg->fc_dst, tb[RTA_DST], plen);
2388 	}
2389 
2390 	if (tb[RTA_SRC]) {
2391 		int plen = (rtm->rtm_src_len + 7) >> 3;
2392 
2393 		if (nla_len(tb[RTA_SRC]) < plen)
2394 			goto errout;
2395 
2396 		nla_memcpy(&cfg->fc_src, tb[RTA_SRC], plen);
2397 	}
2398 
2399 	if (tb[RTA_PREFSRC])
2400 		nla_memcpy(&cfg->fc_prefsrc, tb[RTA_PREFSRC], 16);
2401 
2402 	if (tb[RTA_OIF])
2403 		cfg->fc_ifindex = nla_get_u32(tb[RTA_OIF]);
2404 
2405 	if (tb[RTA_PRIORITY])
2406 		cfg->fc_metric = nla_get_u32(tb[RTA_PRIORITY]);
2407 
2408 	if (tb[RTA_METRICS]) {
2409 		cfg->fc_mx = nla_data(tb[RTA_METRICS]);
2410 		cfg->fc_mx_len = nla_len(tb[RTA_METRICS]);
2411 	}
2412 
2413 	if (tb[RTA_TABLE])
2414 		cfg->fc_table = nla_get_u32(tb[RTA_TABLE]);
2415 
2416 	if (tb[RTA_MULTIPATH]) {
2417 		cfg->fc_mp = nla_data(tb[RTA_MULTIPATH]);
2418 		cfg->fc_mp_len = nla_len(tb[RTA_MULTIPATH]);
2419 	}
2420 
2421 	err = 0;
2422 errout:
2423 	return err;
2424 }
2425 
2426 static int ip6_route_multipath(struct fib6_config *cfg, int add)
2427 {
2428 	struct fib6_config r_cfg;
2429 	struct rtnexthop *rtnh;
2430 	int remaining;
2431 	int attrlen;
2432 	int err = 0, last_err = 0;
2433 
2434 beginning:
2435 	rtnh = (struct rtnexthop *)cfg->fc_mp;
2436 	remaining = cfg->fc_mp_len;
2437 
2438 	/* Parse a Multipath Entry */
2439 	while (rtnh_ok(rtnh, remaining)) {
2440 		memcpy(&r_cfg, cfg, sizeof(*cfg));
2441 		if (rtnh->rtnh_ifindex)
2442 			r_cfg.fc_ifindex = rtnh->rtnh_ifindex;
2443 
2444 		attrlen = rtnh_attrlen(rtnh);
2445 		if (attrlen > 0) {
2446 			struct nlattr *nla, *attrs = rtnh_attrs(rtnh);
2447 
2448 			nla = nla_find(attrs, attrlen, RTA_GATEWAY);
2449 			if (nla) {
2450 				nla_memcpy(&r_cfg.fc_gateway, nla, 16);
2451 				r_cfg.fc_flags |= RTF_GATEWAY;
2452 			}
2453 		}
2454 		err = add ? ip6_route_add(&r_cfg) : ip6_route_del(&r_cfg);
2455 		if (err) {
2456 			last_err = err;
2457 			/* If we are trying to remove a route, do not stop the
2458 			 * loop when ip6_route_del() fails (because next hop is
2459 			 * already gone), we should try to remove all next hops.
2460 			 */
2461 			if (add) {
2462 				/* If add fails, we should try to delete all
2463 				 * next hops that have been already added.
2464 				 */
2465 				add = 0;
2466 				goto beginning;
2467 			}
2468 		}
2469 		/* Because each route is added like a single route we remove
2470 		 * this flag after the first nexthop (if there is a collision,
2471 		 * we have already fail to add the first nexthop:
2472 		 * fib6_add_rt2node() has reject it).
2473 		 */
2474 		cfg->fc_nlinfo.nlh->nlmsg_flags &= ~NLM_F_EXCL;
2475 		rtnh = rtnh_next(rtnh, &remaining);
2476 	}
2477 
2478 	return last_err;
2479 }
2480 
2481 static int inet6_rtm_delroute(struct sk_buff *skb, struct nlmsghdr* nlh)
2482 {
2483 	struct fib6_config cfg;
2484 	int err;
2485 
2486 	err = rtm_to_fib6_config(skb, nlh, &cfg);
2487 	if (err < 0)
2488 		return err;
2489 
2490 	if (cfg.fc_mp)
2491 		return ip6_route_multipath(&cfg, 0);
2492 	else
2493 		return ip6_route_del(&cfg);
2494 }
2495 
2496 static int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr* nlh)
2497 {
2498 	struct fib6_config cfg;
2499 	int err;
2500 
2501 	err = rtm_to_fib6_config(skb, nlh, &cfg);
2502 	if (err < 0)
2503 		return err;
2504 
2505 	if (cfg.fc_mp)
2506 		return ip6_route_multipath(&cfg, 1);
2507 	else
2508 		return ip6_route_add(&cfg);
2509 }
2510 
2511 static inline size_t rt6_nlmsg_size(void)
2512 {
2513 	return NLMSG_ALIGN(sizeof(struct rtmsg))
2514 	       + nla_total_size(16) /* RTA_SRC */
2515 	       + nla_total_size(16) /* RTA_DST */
2516 	       + nla_total_size(16) /* RTA_GATEWAY */
2517 	       + nla_total_size(16) /* RTA_PREFSRC */
2518 	       + nla_total_size(4) /* RTA_TABLE */
2519 	       + nla_total_size(4) /* RTA_IIF */
2520 	       + nla_total_size(4) /* RTA_OIF */
2521 	       + nla_total_size(4) /* RTA_PRIORITY */
2522 	       + RTAX_MAX * nla_total_size(4) /* RTA_METRICS */
2523 	       + nla_total_size(sizeof(struct rta_cacheinfo));
2524 }
2525 
2526 static int rt6_fill_node(struct net *net,
2527 			 struct sk_buff *skb, struct rt6_info *rt,
2528 			 struct in6_addr *dst, struct in6_addr *src,
2529 			 int iif, int type, u32 portid, u32 seq,
2530 			 int prefix, int nowait, unsigned int flags)
2531 {
2532 	struct rtmsg *rtm;
2533 	struct nlmsghdr *nlh;
2534 	long expires;
2535 	u32 table;
2536 
2537 	if (prefix) {	/* user wants prefix routes only */
2538 		if (!(rt->rt6i_flags & RTF_PREFIX_RT)) {
2539 			/* success since this is not a prefix route */
2540 			return 1;
2541 		}
2542 	}
2543 
2544 	nlh = nlmsg_put(skb, portid, seq, type, sizeof(*rtm), flags);
2545 	if (!nlh)
2546 		return -EMSGSIZE;
2547 
2548 	rtm = nlmsg_data(nlh);
2549 	rtm->rtm_family = AF_INET6;
2550 	rtm->rtm_dst_len = rt->rt6i_dst.plen;
2551 	rtm->rtm_src_len = rt->rt6i_src.plen;
2552 	rtm->rtm_tos = 0;
2553 	if (rt->rt6i_table)
2554 		table = rt->rt6i_table->tb6_id;
2555 	else
2556 		table = RT6_TABLE_UNSPEC;
2557 	rtm->rtm_table = table;
2558 	if (nla_put_u32(skb, RTA_TABLE, table))
2559 		goto nla_put_failure;
2560 	if (rt->rt6i_flags & RTF_REJECT) {
2561 		switch (rt->dst.error) {
2562 		case -EINVAL:
2563 			rtm->rtm_type = RTN_BLACKHOLE;
2564 			break;
2565 		case -EACCES:
2566 			rtm->rtm_type = RTN_PROHIBIT;
2567 			break;
2568 		case -EAGAIN:
2569 			rtm->rtm_type = RTN_THROW;
2570 			break;
2571 		default:
2572 			rtm->rtm_type = RTN_UNREACHABLE;
2573 			break;
2574 		}
2575 	}
2576 	else if (rt->rt6i_flags & RTF_LOCAL)
2577 		rtm->rtm_type = RTN_LOCAL;
2578 	else if (rt->dst.dev && (rt->dst.dev->flags & IFF_LOOPBACK))
2579 		rtm->rtm_type = RTN_LOCAL;
2580 	else
2581 		rtm->rtm_type = RTN_UNICAST;
2582 	rtm->rtm_flags = 0;
2583 	rtm->rtm_scope = RT_SCOPE_UNIVERSE;
2584 	rtm->rtm_protocol = rt->rt6i_protocol;
2585 	if (rt->rt6i_flags & RTF_DYNAMIC)
2586 		rtm->rtm_protocol = RTPROT_REDIRECT;
2587 	else if (rt->rt6i_flags & RTF_ADDRCONF) {
2588 		if (rt->rt6i_flags & (RTF_DEFAULT | RTF_ROUTEINFO))
2589 			rtm->rtm_protocol = RTPROT_RA;
2590 		else
2591 			rtm->rtm_protocol = RTPROT_KERNEL;
2592 	}
2593 
2594 	if (rt->rt6i_flags & RTF_CACHE)
2595 		rtm->rtm_flags |= RTM_F_CLONED;
2596 
2597 	if (dst) {
2598 		if (nla_put(skb, RTA_DST, 16, dst))
2599 			goto nla_put_failure;
2600 		rtm->rtm_dst_len = 128;
2601 	} else if (rtm->rtm_dst_len)
2602 		if (nla_put(skb, RTA_DST, 16, &rt->rt6i_dst.addr))
2603 			goto nla_put_failure;
2604 #ifdef CONFIG_IPV6_SUBTREES
2605 	if (src) {
2606 		if (nla_put(skb, RTA_SRC, 16, src))
2607 			goto nla_put_failure;
2608 		rtm->rtm_src_len = 128;
2609 	} else if (rtm->rtm_src_len &&
2610 		   nla_put(skb, RTA_SRC, 16, &rt->rt6i_src.addr))
2611 		goto nla_put_failure;
2612 #endif
2613 	if (iif) {
2614 #ifdef CONFIG_IPV6_MROUTE
2615 		if (ipv6_addr_is_multicast(&rt->rt6i_dst.addr)) {
2616 			int err = ip6mr_get_route(net, skb, rtm, nowait);
2617 			if (err <= 0) {
2618 				if (!nowait) {
2619 					if (err == 0)
2620 						return 0;
2621 					goto nla_put_failure;
2622 				} else {
2623 					if (err == -EMSGSIZE)
2624 						goto nla_put_failure;
2625 				}
2626 			}
2627 		} else
2628 #endif
2629 			if (nla_put_u32(skb, RTA_IIF, iif))
2630 				goto nla_put_failure;
2631 	} else if (dst) {
2632 		struct in6_addr saddr_buf;
2633 		if (ip6_route_get_saddr(net, rt, dst, 0, &saddr_buf) == 0 &&
2634 		    nla_put(skb, RTA_PREFSRC, 16, &saddr_buf))
2635 			goto nla_put_failure;
2636 	}
2637 
2638 	if (rt->rt6i_prefsrc.plen) {
2639 		struct in6_addr saddr_buf;
2640 		saddr_buf = rt->rt6i_prefsrc.addr;
2641 		if (nla_put(skb, RTA_PREFSRC, 16, &saddr_buf))
2642 			goto nla_put_failure;
2643 	}
2644 
2645 	if (rtnetlink_put_metrics(skb, dst_metrics_ptr(&rt->dst)) < 0)
2646 		goto nla_put_failure;
2647 
2648 	if (rt->rt6i_flags & RTF_GATEWAY) {
2649 		if (nla_put(skb, RTA_GATEWAY, 16, &rt->rt6i_gateway) < 0)
2650 			goto nla_put_failure;
2651 	}
2652 
2653 	if (rt->dst.dev &&
2654 	    nla_put_u32(skb, RTA_OIF, rt->dst.dev->ifindex))
2655 		goto nla_put_failure;
2656 	if (nla_put_u32(skb, RTA_PRIORITY, rt->rt6i_metric))
2657 		goto nla_put_failure;
2658 
2659 	expires = (rt->rt6i_flags & RTF_EXPIRES) ? rt->dst.expires - jiffies : 0;
2660 
2661 	if (rtnl_put_cacheinfo(skb, &rt->dst, 0, expires, rt->dst.error) < 0)
2662 		goto nla_put_failure;
2663 
2664 	return nlmsg_end(skb, nlh);
2665 
2666 nla_put_failure:
2667 	nlmsg_cancel(skb, nlh);
2668 	return -EMSGSIZE;
2669 }
2670 
2671 int rt6_dump_route(struct rt6_info *rt, void *p_arg)
2672 {
2673 	struct rt6_rtnl_dump_arg *arg = (struct rt6_rtnl_dump_arg *) p_arg;
2674 	int prefix;
2675 
2676 	if (nlmsg_len(arg->cb->nlh) >= sizeof(struct rtmsg)) {
2677 		struct rtmsg *rtm = nlmsg_data(arg->cb->nlh);
2678 		prefix = (rtm->rtm_flags & RTM_F_PREFIX) != 0;
2679 	} else
2680 		prefix = 0;
2681 
2682 	return rt6_fill_node(arg->net,
2683 		     arg->skb, rt, NULL, NULL, 0, RTM_NEWROUTE,
2684 		     NETLINK_CB(arg->cb->skb).portid, arg->cb->nlh->nlmsg_seq,
2685 		     prefix, 0, NLM_F_MULTI);
2686 }
2687 
2688 static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh)
2689 {
2690 	struct net *net = sock_net(in_skb->sk);
2691 	struct nlattr *tb[RTA_MAX+1];
2692 	struct rt6_info *rt;
2693 	struct sk_buff *skb;
2694 	struct rtmsg *rtm;
2695 	struct flowi6 fl6;
2696 	int err, iif = 0, oif = 0;
2697 
2698 	err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy);
2699 	if (err < 0)
2700 		goto errout;
2701 
2702 	err = -EINVAL;
2703 	memset(&fl6, 0, sizeof(fl6));
2704 
2705 	if (tb[RTA_SRC]) {
2706 		if (nla_len(tb[RTA_SRC]) < sizeof(struct in6_addr))
2707 			goto errout;
2708 
2709 		fl6.saddr = *(struct in6_addr *)nla_data(tb[RTA_SRC]);
2710 	}
2711 
2712 	if (tb[RTA_DST]) {
2713 		if (nla_len(tb[RTA_DST]) < sizeof(struct in6_addr))
2714 			goto errout;
2715 
2716 		fl6.daddr = *(struct in6_addr *)nla_data(tb[RTA_DST]);
2717 	}
2718 
2719 	if (tb[RTA_IIF])
2720 		iif = nla_get_u32(tb[RTA_IIF]);
2721 
2722 	if (tb[RTA_OIF])
2723 		oif = nla_get_u32(tb[RTA_OIF]);
2724 
2725 	if (iif) {
2726 		struct net_device *dev;
2727 		int flags = 0;
2728 
2729 		dev = __dev_get_by_index(net, iif);
2730 		if (!dev) {
2731 			err = -ENODEV;
2732 			goto errout;
2733 		}
2734 
2735 		fl6.flowi6_iif = iif;
2736 
2737 		if (!ipv6_addr_any(&fl6.saddr))
2738 			flags |= RT6_LOOKUP_F_HAS_SADDR;
2739 
2740 		rt = (struct rt6_info *)ip6_route_input_lookup(net, dev, &fl6,
2741 							       flags);
2742 	} else {
2743 		fl6.flowi6_oif = oif;
2744 
2745 		rt = (struct rt6_info *)ip6_route_output(net, NULL, &fl6);
2746 	}
2747 
2748 	skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
2749 	if (!skb) {
2750 		ip6_rt_put(rt);
2751 		err = -ENOBUFS;
2752 		goto errout;
2753 	}
2754 
2755 	/* Reserve room for dummy headers, this skb can pass
2756 	   through good chunk of routing engine.
2757 	 */
2758 	skb_reset_mac_header(skb);
2759 	skb_reserve(skb, MAX_HEADER + sizeof(struct ipv6hdr));
2760 
2761 	skb_dst_set(skb, &rt->dst);
2762 
2763 	err = rt6_fill_node(net, skb, rt, &fl6.daddr, &fl6.saddr, iif,
2764 			    RTM_NEWROUTE, NETLINK_CB(in_skb).portid,
2765 			    nlh->nlmsg_seq, 0, 0, 0);
2766 	if (err < 0) {
2767 		kfree_skb(skb);
2768 		goto errout;
2769 	}
2770 
2771 	err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).portid);
2772 errout:
2773 	return err;
2774 }
2775 
2776 void inet6_rt_notify(int event, struct rt6_info *rt, struct nl_info *info)
2777 {
2778 	struct sk_buff *skb;
2779 	struct net *net = info->nl_net;
2780 	u32 seq;
2781 	int err;
2782 
2783 	err = -ENOBUFS;
2784 	seq = info->nlh ? info->nlh->nlmsg_seq : 0;
2785 
2786 	skb = nlmsg_new(rt6_nlmsg_size(), gfp_any());
2787 	if (!skb)
2788 		goto errout;
2789 
2790 	err = rt6_fill_node(net, skb, rt, NULL, NULL, 0,
2791 				event, info->portid, seq, 0, 0, 0);
2792 	if (err < 0) {
2793 		/* -EMSGSIZE implies BUG in rt6_nlmsg_size() */
2794 		WARN_ON(err == -EMSGSIZE);
2795 		kfree_skb(skb);
2796 		goto errout;
2797 	}
2798 	rtnl_notify(skb, net, info->portid, RTNLGRP_IPV6_ROUTE,
2799 		    info->nlh, gfp_any());
2800 	return;
2801 errout:
2802 	if (err < 0)
2803 		rtnl_set_sk_err(net, RTNLGRP_IPV6_ROUTE, err);
2804 }
2805 
2806 static int ip6_route_dev_notify(struct notifier_block *this,
2807 				unsigned long event, void *ptr)
2808 {
2809 	struct net_device *dev = netdev_notifier_info_to_dev(ptr);
2810 	struct net *net = dev_net(dev);
2811 
2812 	if (event == NETDEV_REGISTER && (dev->flags & IFF_LOOPBACK)) {
2813 		net->ipv6.ip6_null_entry->dst.dev = dev;
2814 		net->ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(dev);
2815 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
2816 		net->ipv6.ip6_prohibit_entry->dst.dev = dev;
2817 		net->ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(dev);
2818 		net->ipv6.ip6_blk_hole_entry->dst.dev = dev;
2819 		net->ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(dev);
2820 #endif
2821 	}
2822 
2823 	return NOTIFY_OK;
2824 }
2825 
2826 /*
2827  *	/proc
2828  */
2829 
2830 #ifdef CONFIG_PROC_FS
2831 
2832 static const struct file_operations ipv6_route_proc_fops = {
2833 	.owner		= THIS_MODULE,
2834 	.open		= ipv6_route_open,
2835 	.read		= seq_read,
2836 	.llseek		= seq_lseek,
2837 	.release	= seq_release_net,
2838 };
2839 
2840 static int rt6_stats_seq_show(struct seq_file *seq, void *v)
2841 {
2842 	struct net *net = (struct net *)seq->private;
2843 	seq_printf(seq, "%04x %04x %04x %04x %04x %04x %04x\n",
2844 		   net->ipv6.rt6_stats->fib_nodes,
2845 		   net->ipv6.rt6_stats->fib_route_nodes,
2846 		   net->ipv6.rt6_stats->fib_rt_alloc,
2847 		   net->ipv6.rt6_stats->fib_rt_entries,
2848 		   net->ipv6.rt6_stats->fib_rt_cache,
2849 		   dst_entries_get_slow(&net->ipv6.ip6_dst_ops),
2850 		   net->ipv6.rt6_stats->fib_discarded_routes);
2851 
2852 	return 0;
2853 }
2854 
2855 static int rt6_stats_seq_open(struct inode *inode, struct file *file)
2856 {
2857 	return single_open_net(inode, file, rt6_stats_seq_show);
2858 }
2859 
2860 static const struct file_operations rt6_stats_seq_fops = {
2861 	.owner	 = THIS_MODULE,
2862 	.open	 = rt6_stats_seq_open,
2863 	.read	 = seq_read,
2864 	.llseek	 = seq_lseek,
2865 	.release = single_release_net,
2866 };
2867 #endif	/* CONFIG_PROC_FS */
2868 
2869 #ifdef CONFIG_SYSCTL
2870 
2871 static
2872 int ipv6_sysctl_rtcache_flush(struct ctl_table *ctl, int write,
2873 			      void __user *buffer, size_t *lenp, loff_t *ppos)
2874 {
2875 	struct net *net;
2876 	int delay;
2877 	if (!write)
2878 		return -EINVAL;
2879 
2880 	net = (struct net *)ctl->extra1;
2881 	delay = net->ipv6.sysctl.flush_delay;
2882 	proc_dointvec(ctl, write, buffer, lenp, ppos);
2883 	fib6_run_gc(delay <= 0 ? 0 : (unsigned long)delay, net, delay > 0);
2884 	return 0;
2885 }
2886 
2887 struct ctl_table ipv6_route_table_template[] = {
2888 	{
2889 		.procname	=	"flush",
2890 		.data		=	&init_net.ipv6.sysctl.flush_delay,
2891 		.maxlen		=	sizeof(int),
2892 		.mode		=	0200,
2893 		.proc_handler	=	ipv6_sysctl_rtcache_flush
2894 	},
2895 	{
2896 		.procname	=	"gc_thresh",
2897 		.data		=	&ip6_dst_ops_template.gc_thresh,
2898 		.maxlen		=	sizeof(int),
2899 		.mode		=	0644,
2900 		.proc_handler	=	proc_dointvec,
2901 	},
2902 	{
2903 		.procname	=	"max_size",
2904 		.data		=	&init_net.ipv6.sysctl.ip6_rt_max_size,
2905 		.maxlen		=	sizeof(int),
2906 		.mode		=	0644,
2907 		.proc_handler	=	proc_dointvec,
2908 	},
2909 	{
2910 		.procname	=	"gc_min_interval",
2911 		.data		=	&init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
2912 		.maxlen		=	sizeof(int),
2913 		.mode		=	0644,
2914 		.proc_handler	=	proc_dointvec_jiffies,
2915 	},
2916 	{
2917 		.procname	=	"gc_timeout",
2918 		.data		=	&init_net.ipv6.sysctl.ip6_rt_gc_timeout,
2919 		.maxlen		=	sizeof(int),
2920 		.mode		=	0644,
2921 		.proc_handler	=	proc_dointvec_jiffies,
2922 	},
2923 	{
2924 		.procname	=	"gc_interval",
2925 		.data		=	&init_net.ipv6.sysctl.ip6_rt_gc_interval,
2926 		.maxlen		=	sizeof(int),
2927 		.mode		=	0644,
2928 		.proc_handler	=	proc_dointvec_jiffies,
2929 	},
2930 	{
2931 		.procname	=	"gc_elasticity",
2932 		.data		=	&init_net.ipv6.sysctl.ip6_rt_gc_elasticity,
2933 		.maxlen		=	sizeof(int),
2934 		.mode		=	0644,
2935 		.proc_handler	=	proc_dointvec,
2936 	},
2937 	{
2938 		.procname	=	"mtu_expires",
2939 		.data		=	&init_net.ipv6.sysctl.ip6_rt_mtu_expires,
2940 		.maxlen		=	sizeof(int),
2941 		.mode		=	0644,
2942 		.proc_handler	=	proc_dointvec_jiffies,
2943 	},
2944 	{
2945 		.procname	=	"min_adv_mss",
2946 		.data		=	&init_net.ipv6.sysctl.ip6_rt_min_advmss,
2947 		.maxlen		=	sizeof(int),
2948 		.mode		=	0644,
2949 		.proc_handler	=	proc_dointvec,
2950 	},
2951 	{
2952 		.procname	=	"gc_min_interval_ms",
2953 		.data		=	&init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
2954 		.maxlen		=	sizeof(int),
2955 		.mode		=	0644,
2956 		.proc_handler	=	proc_dointvec_ms_jiffies,
2957 	},
2958 	{ }
2959 };
2960 
2961 struct ctl_table * __net_init ipv6_route_sysctl_init(struct net *net)
2962 {
2963 	struct ctl_table *table;
2964 
2965 	table = kmemdup(ipv6_route_table_template,
2966 			sizeof(ipv6_route_table_template),
2967 			GFP_KERNEL);
2968 
2969 	if (table) {
2970 		table[0].data = &net->ipv6.sysctl.flush_delay;
2971 		table[0].extra1 = net;
2972 		table[1].data = &net->ipv6.ip6_dst_ops.gc_thresh;
2973 		table[2].data = &net->ipv6.sysctl.ip6_rt_max_size;
2974 		table[3].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
2975 		table[4].data = &net->ipv6.sysctl.ip6_rt_gc_timeout;
2976 		table[5].data = &net->ipv6.sysctl.ip6_rt_gc_interval;
2977 		table[6].data = &net->ipv6.sysctl.ip6_rt_gc_elasticity;
2978 		table[7].data = &net->ipv6.sysctl.ip6_rt_mtu_expires;
2979 		table[8].data = &net->ipv6.sysctl.ip6_rt_min_advmss;
2980 		table[9].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
2981 
2982 		/* Don't export sysctls to unprivileged users */
2983 		if (net->user_ns != &init_user_ns)
2984 			table[0].procname = NULL;
2985 	}
2986 
2987 	return table;
2988 }
2989 #endif
2990 
2991 static int __net_init ip6_route_net_init(struct net *net)
2992 {
2993 	int ret = -ENOMEM;
2994 
2995 	memcpy(&net->ipv6.ip6_dst_ops, &ip6_dst_ops_template,
2996 	       sizeof(net->ipv6.ip6_dst_ops));
2997 
2998 	if (dst_entries_init(&net->ipv6.ip6_dst_ops) < 0)
2999 		goto out_ip6_dst_ops;
3000 
3001 	net->ipv6.ip6_null_entry = kmemdup(&ip6_null_entry_template,
3002 					   sizeof(*net->ipv6.ip6_null_entry),
3003 					   GFP_KERNEL);
3004 	if (!net->ipv6.ip6_null_entry)
3005 		goto out_ip6_dst_entries;
3006 	net->ipv6.ip6_null_entry->dst.path =
3007 		(struct dst_entry *)net->ipv6.ip6_null_entry;
3008 	net->ipv6.ip6_null_entry->dst.ops = &net->ipv6.ip6_dst_ops;
3009 	dst_init_metrics(&net->ipv6.ip6_null_entry->dst,
3010 			 ip6_template_metrics, true);
3011 
3012 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
3013 	net->ipv6.ip6_prohibit_entry = kmemdup(&ip6_prohibit_entry_template,
3014 					       sizeof(*net->ipv6.ip6_prohibit_entry),
3015 					       GFP_KERNEL);
3016 	if (!net->ipv6.ip6_prohibit_entry)
3017 		goto out_ip6_null_entry;
3018 	net->ipv6.ip6_prohibit_entry->dst.path =
3019 		(struct dst_entry *)net->ipv6.ip6_prohibit_entry;
3020 	net->ipv6.ip6_prohibit_entry->dst.ops = &net->ipv6.ip6_dst_ops;
3021 	dst_init_metrics(&net->ipv6.ip6_prohibit_entry->dst,
3022 			 ip6_template_metrics, true);
3023 
3024 	net->ipv6.ip6_blk_hole_entry = kmemdup(&ip6_blk_hole_entry_template,
3025 					       sizeof(*net->ipv6.ip6_blk_hole_entry),
3026 					       GFP_KERNEL);
3027 	if (!net->ipv6.ip6_blk_hole_entry)
3028 		goto out_ip6_prohibit_entry;
3029 	net->ipv6.ip6_blk_hole_entry->dst.path =
3030 		(struct dst_entry *)net->ipv6.ip6_blk_hole_entry;
3031 	net->ipv6.ip6_blk_hole_entry->dst.ops = &net->ipv6.ip6_dst_ops;
3032 	dst_init_metrics(&net->ipv6.ip6_blk_hole_entry->dst,
3033 			 ip6_template_metrics, true);
3034 #endif
3035 
3036 	net->ipv6.sysctl.flush_delay = 0;
3037 	net->ipv6.sysctl.ip6_rt_max_size = 4096;
3038 	net->ipv6.sysctl.ip6_rt_gc_min_interval = HZ / 2;
3039 	net->ipv6.sysctl.ip6_rt_gc_timeout = 60*HZ;
3040 	net->ipv6.sysctl.ip6_rt_gc_interval = 30*HZ;
3041 	net->ipv6.sysctl.ip6_rt_gc_elasticity = 9;
3042 	net->ipv6.sysctl.ip6_rt_mtu_expires = 10*60*HZ;
3043 	net->ipv6.sysctl.ip6_rt_min_advmss = IPV6_MIN_MTU - 20 - 40;
3044 
3045 	net->ipv6.ip6_rt_gc_expire = 30*HZ;
3046 
3047 	ret = 0;
3048 out:
3049 	return ret;
3050 
3051 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
3052 out_ip6_prohibit_entry:
3053 	kfree(net->ipv6.ip6_prohibit_entry);
3054 out_ip6_null_entry:
3055 	kfree(net->ipv6.ip6_null_entry);
3056 #endif
3057 out_ip6_dst_entries:
3058 	dst_entries_destroy(&net->ipv6.ip6_dst_ops);
3059 out_ip6_dst_ops:
3060 	goto out;
3061 }
3062 
3063 static void __net_exit ip6_route_net_exit(struct net *net)
3064 {
3065 	kfree(net->ipv6.ip6_null_entry);
3066 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
3067 	kfree(net->ipv6.ip6_prohibit_entry);
3068 	kfree(net->ipv6.ip6_blk_hole_entry);
3069 #endif
3070 	dst_entries_destroy(&net->ipv6.ip6_dst_ops);
3071 }
3072 
3073 static int __net_init ip6_route_net_init_late(struct net *net)
3074 {
3075 #ifdef CONFIG_PROC_FS
3076 	proc_create("ipv6_route", 0, net->proc_net, &ipv6_route_proc_fops);
3077 	proc_create("rt6_stats", S_IRUGO, net->proc_net, &rt6_stats_seq_fops);
3078 #endif
3079 	return 0;
3080 }
3081 
3082 static void __net_exit ip6_route_net_exit_late(struct net *net)
3083 {
3084 #ifdef CONFIG_PROC_FS
3085 	remove_proc_entry("ipv6_route", net->proc_net);
3086 	remove_proc_entry("rt6_stats", net->proc_net);
3087 #endif
3088 }
3089 
3090 static struct pernet_operations ip6_route_net_ops = {
3091 	.init = ip6_route_net_init,
3092 	.exit = ip6_route_net_exit,
3093 };
3094 
3095 static int __net_init ipv6_inetpeer_init(struct net *net)
3096 {
3097 	struct inet_peer_base *bp = kmalloc(sizeof(*bp), GFP_KERNEL);
3098 
3099 	if (!bp)
3100 		return -ENOMEM;
3101 	inet_peer_base_init(bp);
3102 	net->ipv6.peers = bp;
3103 	return 0;
3104 }
3105 
3106 static void __net_exit ipv6_inetpeer_exit(struct net *net)
3107 {
3108 	struct inet_peer_base *bp = net->ipv6.peers;
3109 
3110 	net->ipv6.peers = NULL;
3111 	inetpeer_invalidate_tree(bp);
3112 	kfree(bp);
3113 }
3114 
3115 static struct pernet_operations ipv6_inetpeer_ops = {
3116 	.init	=	ipv6_inetpeer_init,
3117 	.exit	=	ipv6_inetpeer_exit,
3118 };
3119 
3120 static struct pernet_operations ip6_route_net_late_ops = {
3121 	.init = ip6_route_net_init_late,
3122 	.exit = ip6_route_net_exit_late,
3123 };
3124 
3125 static struct notifier_block ip6_route_dev_notifier = {
3126 	.notifier_call = ip6_route_dev_notify,
3127 	.priority = 0,
3128 };
3129 
3130 int __init ip6_route_init(void)
3131 {
3132 	int ret;
3133 
3134 	ret = -ENOMEM;
3135 	ip6_dst_ops_template.kmem_cachep =
3136 		kmem_cache_create("ip6_dst_cache", sizeof(struct rt6_info), 0,
3137 				  SLAB_HWCACHE_ALIGN, NULL);
3138 	if (!ip6_dst_ops_template.kmem_cachep)
3139 		goto out;
3140 
3141 	ret = dst_entries_init(&ip6_dst_blackhole_ops);
3142 	if (ret)
3143 		goto out_kmem_cache;
3144 
3145 	ret = register_pernet_subsys(&ipv6_inetpeer_ops);
3146 	if (ret)
3147 		goto out_dst_entries;
3148 
3149 	ret = register_pernet_subsys(&ip6_route_net_ops);
3150 	if (ret)
3151 		goto out_register_inetpeer;
3152 
3153 	ip6_dst_blackhole_ops.kmem_cachep = ip6_dst_ops_template.kmem_cachep;
3154 
3155 	/* Registering of the loopback is done before this portion of code,
3156 	 * the loopback reference in rt6_info will not be taken, do it
3157 	 * manually for init_net */
3158 	init_net.ipv6.ip6_null_entry->dst.dev = init_net.loopback_dev;
3159 	init_net.ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
3160   #ifdef CONFIG_IPV6_MULTIPLE_TABLES
3161 	init_net.ipv6.ip6_prohibit_entry->dst.dev = init_net.loopback_dev;
3162 	init_net.ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
3163 	init_net.ipv6.ip6_blk_hole_entry->dst.dev = init_net.loopback_dev;
3164 	init_net.ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
3165   #endif
3166 	ret = fib6_init();
3167 	if (ret)
3168 		goto out_register_subsys;
3169 
3170 	ret = xfrm6_init();
3171 	if (ret)
3172 		goto out_fib6_init;
3173 
3174 	ret = fib6_rules_init();
3175 	if (ret)
3176 		goto xfrm6_init;
3177 
3178 	ret = register_pernet_subsys(&ip6_route_net_late_ops);
3179 	if (ret)
3180 		goto fib6_rules_init;
3181 
3182 	ret = -ENOBUFS;
3183 	if (__rtnl_register(PF_INET6, RTM_NEWROUTE, inet6_rtm_newroute, NULL, NULL) ||
3184 	    __rtnl_register(PF_INET6, RTM_DELROUTE, inet6_rtm_delroute, NULL, NULL) ||
3185 	    __rtnl_register(PF_INET6, RTM_GETROUTE, inet6_rtm_getroute, NULL, NULL))
3186 		goto out_register_late_subsys;
3187 
3188 	ret = register_netdevice_notifier(&ip6_route_dev_notifier);
3189 	if (ret)
3190 		goto out_register_late_subsys;
3191 
3192 out:
3193 	return ret;
3194 
3195 out_register_late_subsys:
3196 	unregister_pernet_subsys(&ip6_route_net_late_ops);
3197 fib6_rules_init:
3198 	fib6_rules_cleanup();
3199 xfrm6_init:
3200 	xfrm6_fini();
3201 out_fib6_init:
3202 	fib6_gc_cleanup();
3203 out_register_subsys:
3204 	unregister_pernet_subsys(&ip6_route_net_ops);
3205 out_register_inetpeer:
3206 	unregister_pernet_subsys(&ipv6_inetpeer_ops);
3207 out_dst_entries:
3208 	dst_entries_destroy(&ip6_dst_blackhole_ops);
3209 out_kmem_cache:
3210 	kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);
3211 	goto out;
3212 }
3213 
3214 void ip6_route_cleanup(void)
3215 {
3216 	unregister_netdevice_notifier(&ip6_route_dev_notifier);
3217 	unregister_pernet_subsys(&ip6_route_net_late_ops);
3218 	fib6_rules_cleanup();
3219 	xfrm6_fini();
3220 	fib6_gc_cleanup();
3221 	unregister_pernet_subsys(&ipv6_inetpeer_ops);
3222 	unregister_pernet_subsys(&ip6_route_net_ops);
3223 	dst_entries_destroy(&ip6_dst_blackhole_ops);
3224 	kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);
3225 }
3226