xref: /linux/net/ipv6/route.c (revision ca55b2fef3a9373fcfc30f82fd26bc7fccbda732)
1 /*
2  *	Linux INET6 implementation
3  *	FIB front-end.
4  *
5  *	Authors:
6  *	Pedro Roque		<roque@di.fc.ul.pt>
7  *
8  *	This program is free software; you can redistribute it and/or
9  *      modify it under the terms of the GNU General Public License
10  *      as published by the Free Software Foundation; either version
11  *      2 of the License, or (at your option) any later version.
12  */
13 
14 /*	Changes:
15  *
16  *	YOSHIFUJI Hideaki @USAGI
17  *		reworked default router selection.
18  *		- respect outgoing interface
19  *		- select from (probably) reachable routers (i.e.
20  *		routers in REACHABLE, STALE, DELAY or PROBE states).
21  *		- always select the same router if it is (probably)
22  *		reachable.  otherwise, round-robin the list.
23  *	Ville Nuorvala
24  *		Fixed routing subtrees.
25  */
26 
27 #define pr_fmt(fmt) "IPv6: " fmt
28 
29 #include <linux/capability.h>
30 #include <linux/errno.h>
31 #include <linux/export.h>
32 #include <linux/types.h>
33 #include <linux/times.h>
34 #include <linux/socket.h>
35 #include <linux/sockios.h>
36 #include <linux/net.h>
37 #include <linux/route.h>
38 #include <linux/netdevice.h>
39 #include <linux/in6.h>
40 #include <linux/mroute6.h>
41 #include <linux/init.h>
42 #include <linux/if_arp.h>
43 #include <linux/proc_fs.h>
44 #include <linux/seq_file.h>
45 #include <linux/nsproxy.h>
46 #include <linux/slab.h>
47 #include <net/net_namespace.h>
48 #include <net/snmp.h>
49 #include <net/ipv6.h>
50 #include <net/ip6_fib.h>
51 #include <net/ip6_route.h>
52 #include <net/ndisc.h>
53 #include <net/addrconf.h>
54 #include <net/tcp.h>
55 #include <linux/rtnetlink.h>
56 #include <net/dst.h>
57 #include <net/dst_metadata.h>
58 #include <net/xfrm.h>
59 #include <net/netevent.h>
60 #include <net/netlink.h>
61 #include <net/nexthop.h>
62 #include <net/lwtunnel.h>
63 #include <net/ip_tunnels.h>
64 
65 #include <asm/uaccess.h>
66 
67 #ifdef CONFIG_SYSCTL
68 #include <linux/sysctl.h>
69 #endif
70 
71 enum rt6_nud_state {
72 	RT6_NUD_FAIL_HARD = -3,
73 	RT6_NUD_FAIL_PROBE = -2,
74 	RT6_NUD_FAIL_DO_RR = -1,
75 	RT6_NUD_SUCCEED = 1
76 };
77 
78 static void ip6_rt_copy_init(struct rt6_info *rt, struct rt6_info *ort);
79 static struct dst_entry	*ip6_dst_check(struct dst_entry *dst, u32 cookie);
80 static unsigned int	 ip6_default_advmss(const struct dst_entry *dst);
81 static unsigned int	 ip6_mtu(const struct dst_entry *dst);
82 static struct dst_entry *ip6_negative_advice(struct dst_entry *);
83 static void		ip6_dst_destroy(struct dst_entry *);
84 static void		ip6_dst_ifdown(struct dst_entry *,
85 				       struct net_device *dev, int how);
86 static int		 ip6_dst_gc(struct dst_ops *ops);
87 
88 static int		ip6_pkt_discard(struct sk_buff *skb);
89 static int		ip6_pkt_discard_out(struct sock *sk, struct sk_buff *skb);
90 static int		ip6_pkt_prohibit(struct sk_buff *skb);
91 static int		ip6_pkt_prohibit_out(struct sock *sk, struct sk_buff *skb);
92 static void		ip6_link_failure(struct sk_buff *skb);
93 static void		ip6_rt_update_pmtu(struct dst_entry *dst, struct sock *sk,
94 					   struct sk_buff *skb, u32 mtu);
95 static void		rt6_do_redirect(struct dst_entry *dst, struct sock *sk,
96 					struct sk_buff *skb);
97 static void		rt6_dst_from_metrics_check(struct rt6_info *rt);
98 static int rt6_score_route(struct rt6_info *rt, int oif, int strict);
99 
100 #ifdef CONFIG_IPV6_ROUTE_INFO
101 static struct rt6_info *rt6_add_route_info(struct net *net,
102 					   const struct in6_addr *prefix, int prefixlen,
103 					   const struct in6_addr *gwaddr, int ifindex,
104 					   unsigned int pref);
105 static struct rt6_info *rt6_get_route_info(struct net *net,
106 					   const struct in6_addr *prefix, int prefixlen,
107 					   const struct in6_addr *gwaddr, int ifindex);
108 #endif
109 
110 struct uncached_list {
111 	spinlock_t		lock;
112 	struct list_head	head;
113 };
114 
115 static DEFINE_PER_CPU_ALIGNED(struct uncached_list, rt6_uncached_list);
116 
117 static void rt6_uncached_list_add(struct rt6_info *rt)
118 {
119 	struct uncached_list *ul = raw_cpu_ptr(&rt6_uncached_list);
120 
121 	rt->dst.flags |= DST_NOCACHE;
122 	rt->rt6i_uncached_list = ul;
123 
124 	spin_lock_bh(&ul->lock);
125 	list_add_tail(&rt->rt6i_uncached, &ul->head);
126 	spin_unlock_bh(&ul->lock);
127 }
128 
129 static void rt6_uncached_list_del(struct rt6_info *rt)
130 {
131 	if (!list_empty(&rt->rt6i_uncached)) {
132 		struct uncached_list *ul = rt->rt6i_uncached_list;
133 
134 		spin_lock_bh(&ul->lock);
135 		list_del(&rt->rt6i_uncached);
136 		spin_unlock_bh(&ul->lock);
137 	}
138 }
139 
140 static void rt6_uncached_list_flush_dev(struct net *net, struct net_device *dev)
141 {
142 	struct net_device *loopback_dev = net->loopback_dev;
143 	int cpu;
144 
145 	if (dev == loopback_dev)
146 		return;
147 
148 	for_each_possible_cpu(cpu) {
149 		struct uncached_list *ul = per_cpu_ptr(&rt6_uncached_list, cpu);
150 		struct rt6_info *rt;
151 
152 		spin_lock_bh(&ul->lock);
153 		list_for_each_entry(rt, &ul->head, rt6i_uncached) {
154 			struct inet6_dev *rt_idev = rt->rt6i_idev;
155 			struct net_device *rt_dev = rt->dst.dev;
156 
157 			if (rt_idev->dev == dev) {
158 				rt->rt6i_idev = in6_dev_get(loopback_dev);
159 				in6_dev_put(rt_idev);
160 			}
161 
162 			if (rt_dev == dev) {
163 				rt->dst.dev = loopback_dev;
164 				dev_hold(rt->dst.dev);
165 				dev_put(rt_dev);
166 			}
167 		}
168 		spin_unlock_bh(&ul->lock);
169 	}
170 }
171 
172 static u32 *rt6_pcpu_cow_metrics(struct rt6_info *rt)
173 {
174 	return dst_metrics_write_ptr(rt->dst.from);
175 }
176 
177 static u32 *ipv6_cow_metrics(struct dst_entry *dst, unsigned long old)
178 {
179 	struct rt6_info *rt = (struct rt6_info *)dst;
180 
181 	if (rt->rt6i_flags & RTF_PCPU)
182 		return rt6_pcpu_cow_metrics(rt);
183 	else if (rt->rt6i_flags & RTF_CACHE)
184 		return NULL;
185 	else
186 		return dst_cow_metrics_generic(dst, old);
187 }
188 
189 static inline const void *choose_neigh_daddr(struct rt6_info *rt,
190 					     struct sk_buff *skb,
191 					     const void *daddr)
192 {
193 	struct in6_addr *p = &rt->rt6i_gateway;
194 
195 	if (!ipv6_addr_any(p))
196 		return (const void *) p;
197 	else if (skb)
198 		return &ipv6_hdr(skb)->daddr;
199 	return daddr;
200 }
201 
202 static struct neighbour *ip6_neigh_lookup(const struct dst_entry *dst,
203 					  struct sk_buff *skb,
204 					  const void *daddr)
205 {
206 	struct rt6_info *rt = (struct rt6_info *) dst;
207 	struct neighbour *n;
208 
209 	daddr = choose_neigh_daddr(rt, skb, daddr);
210 	n = __ipv6_neigh_lookup(dst->dev, daddr);
211 	if (n)
212 		return n;
213 	return neigh_create(&nd_tbl, daddr, dst->dev);
214 }
215 
216 static struct dst_ops ip6_dst_ops_template = {
217 	.family			=	AF_INET6,
218 	.gc			=	ip6_dst_gc,
219 	.gc_thresh		=	1024,
220 	.check			=	ip6_dst_check,
221 	.default_advmss		=	ip6_default_advmss,
222 	.mtu			=	ip6_mtu,
223 	.cow_metrics		=	ipv6_cow_metrics,
224 	.destroy		=	ip6_dst_destroy,
225 	.ifdown			=	ip6_dst_ifdown,
226 	.negative_advice	=	ip6_negative_advice,
227 	.link_failure		=	ip6_link_failure,
228 	.update_pmtu		=	ip6_rt_update_pmtu,
229 	.redirect		=	rt6_do_redirect,
230 	.local_out		=	__ip6_local_out,
231 	.neigh_lookup		=	ip6_neigh_lookup,
232 };
233 
234 static unsigned int ip6_blackhole_mtu(const struct dst_entry *dst)
235 {
236 	unsigned int mtu = dst_metric_raw(dst, RTAX_MTU);
237 
238 	return mtu ? : dst->dev->mtu;
239 }
240 
241 static void ip6_rt_blackhole_update_pmtu(struct dst_entry *dst, struct sock *sk,
242 					 struct sk_buff *skb, u32 mtu)
243 {
244 }
245 
246 static void ip6_rt_blackhole_redirect(struct dst_entry *dst, struct sock *sk,
247 				      struct sk_buff *skb)
248 {
249 }
250 
251 static struct dst_ops ip6_dst_blackhole_ops = {
252 	.family			=	AF_INET6,
253 	.destroy		=	ip6_dst_destroy,
254 	.check			=	ip6_dst_check,
255 	.mtu			=	ip6_blackhole_mtu,
256 	.default_advmss		=	ip6_default_advmss,
257 	.update_pmtu		=	ip6_rt_blackhole_update_pmtu,
258 	.redirect		=	ip6_rt_blackhole_redirect,
259 	.cow_metrics		=	dst_cow_metrics_generic,
260 	.neigh_lookup		=	ip6_neigh_lookup,
261 };
262 
263 static const u32 ip6_template_metrics[RTAX_MAX] = {
264 	[RTAX_HOPLIMIT - 1] = 0,
265 };
266 
267 static const struct rt6_info ip6_null_entry_template = {
268 	.dst = {
269 		.__refcnt	= ATOMIC_INIT(1),
270 		.__use		= 1,
271 		.obsolete	= DST_OBSOLETE_FORCE_CHK,
272 		.error		= -ENETUNREACH,
273 		.input		= ip6_pkt_discard,
274 		.output		= ip6_pkt_discard_out,
275 	},
276 	.rt6i_flags	= (RTF_REJECT | RTF_NONEXTHOP),
277 	.rt6i_protocol  = RTPROT_KERNEL,
278 	.rt6i_metric	= ~(u32) 0,
279 	.rt6i_ref	= ATOMIC_INIT(1),
280 };
281 
282 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
283 
284 static const struct rt6_info ip6_prohibit_entry_template = {
285 	.dst = {
286 		.__refcnt	= ATOMIC_INIT(1),
287 		.__use		= 1,
288 		.obsolete	= DST_OBSOLETE_FORCE_CHK,
289 		.error		= -EACCES,
290 		.input		= ip6_pkt_prohibit,
291 		.output		= ip6_pkt_prohibit_out,
292 	},
293 	.rt6i_flags	= (RTF_REJECT | RTF_NONEXTHOP),
294 	.rt6i_protocol  = RTPROT_KERNEL,
295 	.rt6i_metric	= ~(u32) 0,
296 	.rt6i_ref	= ATOMIC_INIT(1),
297 };
298 
299 static const struct rt6_info ip6_blk_hole_entry_template = {
300 	.dst = {
301 		.__refcnt	= ATOMIC_INIT(1),
302 		.__use		= 1,
303 		.obsolete	= DST_OBSOLETE_FORCE_CHK,
304 		.error		= -EINVAL,
305 		.input		= dst_discard,
306 		.output		= dst_discard_sk,
307 	},
308 	.rt6i_flags	= (RTF_REJECT | RTF_NONEXTHOP),
309 	.rt6i_protocol  = RTPROT_KERNEL,
310 	.rt6i_metric	= ~(u32) 0,
311 	.rt6i_ref	= ATOMIC_INIT(1),
312 };
313 
314 #endif
315 
316 static void rt6_info_init(struct rt6_info *rt)
317 {
318 	struct dst_entry *dst = &rt->dst;
319 
320 	memset(dst + 1, 0, sizeof(*rt) - sizeof(*dst));
321 	INIT_LIST_HEAD(&rt->rt6i_siblings);
322 	INIT_LIST_HEAD(&rt->rt6i_uncached);
323 }
324 
325 /* allocate dst with ip6_dst_ops */
326 static struct rt6_info *__ip6_dst_alloc(struct net *net,
327 					struct net_device *dev,
328 					int flags)
329 {
330 	struct rt6_info *rt = dst_alloc(&net->ipv6.ip6_dst_ops, dev,
331 					0, DST_OBSOLETE_FORCE_CHK, flags);
332 
333 	if (rt)
334 		rt6_info_init(rt);
335 
336 	return rt;
337 }
338 
339 static struct rt6_info *ip6_dst_alloc(struct net *net,
340 				      struct net_device *dev,
341 				      int flags)
342 {
343 	struct rt6_info *rt = __ip6_dst_alloc(net, dev, flags);
344 
345 	if (rt) {
346 		rt->rt6i_pcpu = alloc_percpu_gfp(struct rt6_info *, GFP_ATOMIC);
347 		if (rt->rt6i_pcpu) {
348 			int cpu;
349 
350 			for_each_possible_cpu(cpu) {
351 				struct rt6_info **p;
352 
353 				p = per_cpu_ptr(rt->rt6i_pcpu, cpu);
354 				/* no one shares rt */
355 				*p =  NULL;
356 			}
357 		} else {
358 			dst_destroy((struct dst_entry *)rt);
359 			return NULL;
360 		}
361 	}
362 
363 	return rt;
364 }
365 
366 static void ip6_dst_destroy(struct dst_entry *dst)
367 {
368 	struct rt6_info *rt = (struct rt6_info *)dst;
369 	struct dst_entry *from = dst->from;
370 	struct inet6_dev *idev;
371 
372 	dst_destroy_metrics_generic(dst);
373 	free_percpu(rt->rt6i_pcpu);
374 	rt6_uncached_list_del(rt);
375 
376 	idev = rt->rt6i_idev;
377 	if (idev) {
378 		rt->rt6i_idev = NULL;
379 		in6_dev_put(idev);
380 	}
381 
382 	dst->from = NULL;
383 	dst_release(from);
384 }
385 
386 static void ip6_dst_ifdown(struct dst_entry *dst, struct net_device *dev,
387 			   int how)
388 {
389 	struct rt6_info *rt = (struct rt6_info *)dst;
390 	struct inet6_dev *idev = rt->rt6i_idev;
391 	struct net_device *loopback_dev =
392 		dev_net(dev)->loopback_dev;
393 
394 	if (dev != loopback_dev) {
395 		if (idev && idev->dev == dev) {
396 			struct inet6_dev *loopback_idev =
397 				in6_dev_get(loopback_dev);
398 			if (loopback_idev) {
399 				rt->rt6i_idev = loopback_idev;
400 				in6_dev_put(idev);
401 			}
402 		}
403 	}
404 }
405 
406 static bool rt6_check_expired(const struct rt6_info *rt)
407 {
408 	if (rt->rt6i_flags & RTF_EXPIRES) {
409 		if (time_after(jiffies, rt->dst.expires))
410 			return true;
411 	} else if (rt->dst.from) {
412 		return rt6_check_expired((struct rt6_info *) rt->dst.from);
413 	}
414 	return false;
415 }
416 
417 /* Multipath route selection:
418  *   Hash based function using packet header and flowlabel.
419  * Adapted from fib_info_hashfn()
420  */
421 static int rt6_info_hash_nhsfn(unsigned int candidate_count,
422 			       const struct flowi6 *fl6)
423 {
424 	unsigned int val = fl6->flowi6_proto;
425 
426 	val ^= ipv6_addr_hash(&fl6->daddr);
427 	val ^= ipv6_addr_hash(&fl6->saddr);
428 
429 	/* Work only if this not encapsulated */
430 	switch (fl6->flowi6_proto) {
431 	case IPPROTO_UDP:
432 	case IPPROTO_TCP:
433 	case IPPROTO_SCTP:
434 		val ^= (__force u16)fl6->fl6_sport;
435 		val ^= (__force u16)fl6->fl6_dport;
436 		break;
437 
438 	case IPPROTO_ICMPV6:
439 		val ^= (__force u16)fl6->fl6_icmp_type;
440 		val ^= (__force u16)fl6->fl6_icmp_code;
441 		break;
442 	}
443 	/* RFC6438 recommands to use flowlabel */
444 	val ^= (__force u32)fl6->flowlabel;
445 
446 	/* Perhaps, we need to tune, this function? */
447 	val = val ^ (val >> 7) ^ (val >> 12);
448 	return val % candidate_count;
449 }
450 
451 static struct rt6_info *rt6_multipath_select(struct rt6_info *match,
452 					     struct flowi6 *fl6, int oif,
453 					     int strict)
454 {
455 	struct rt6_info *sibling, *next_sibling;
456 	int route_choosen;
457 
458 	route_choosen = rt6_info_hash_nhsfn(match->rt6i_nsiblings + 1, fl6);
459 	/* Don't change the route, if route_choosen == 0
460 	 * (siblings does not include ourself)
461 	 */
462 	if (route_choosen)
463 		list_for_each_entry_safe(sibling, next_sibling,
464 				&match->rt6i_siblings, rt6i_siblings) {
465 			route_choosen--;
466 			if (route_choosen == 0) {
467 				if (rt6_score_route(sibling, oif, strict) < 0)
468 					break;
469 				match = sibling;
470 				break;
471 			}
472 		}
473 	return match;
474 }
475 
476 /*
477  *	Route lookup. Any table->tb6_lock is implied.
478  */
479 
480 static inline struct rt6_info *rt6_device_match(struct net *net,
481 						    struct rt6_info *rt,
482 						    const struct in6_addr *saddr,
483 						    int oif,
484 						    int flags)
485 {
486 	struct rt6_info *local = NULL;
487 	struct rt6_info *sprt;
488 
489 	if (!oif && ipv6_addr_any(saddr))
490 		goto out;
491 
492 	for (sprt = rt; sprt; sprt = sprt->dst.rt6_next) {
493 		struct net_device *dev = sprt->dst.dev;
494 
495 		if (oif) {
496 			if (dev->ifindex == oif)
497 				return sprt;
498 			if (dev->flags & IFF_LOOPBACK) {
499 				if (!sprt->rt6i_idev ||
500 				    sprt->rt6i_idev->dev->ifindex != oif) {
501 					if (flags & RT6_LOOKUP_F_IFACE && oif)
502 						continue;
503 					if (local && (!oif ||
504 						      local->rt6i_idev->dev->ifindex == oif))
505 						continue;
506 				}
507 				local = sprt;
508 			}
509 		} else {
510 			if (ipv6_chk_addr(net, saddr, dev,
511 					  flags & RT6_LOOKUP_F_IFACE))
512 				return sprt;
513 		}
514 	}
515 
516 	if (oif) {
517 		if (local)
518 			return local;
519 
520 		if (flags & RT6_LOOKUP_F_IFACE)
521 			return net->ipv6.ip6_null_entry;
522 	}
523 out:
524 	return rt;
525 }
526 
527 #ifdef CONFIG_IPV6_ROUTER_PREF
528 struct __rt6_probe_work {
529 	struct work_struct work;
530 	struct in6_addr target;
531 	struct net_device *dev;
532 };
533 
534 static void rt6_probe_deferred(struct work_struct *w)
535 {
536 	struct in6_addr mcaddr;
537 	struct __rt6_probe_work *work =
538 		container_of(w, struct __rt6_probe_work, work);
539 
540 	addrconf_addr_solict_mult(&work->target, &mcaddr);
541 	ndisc_send_ns(work->dev, NULL, &work->target, &mcaddr, NULL, NULL);
542 	dev_put(work->dev);
543 	kfree(work);
544 }
545 
546 static void rt6_probe(struct rt6_info *rt)
547 {
548 	struct __rt6_probe_work *work;
549 	struct neighbour *neigh;
550 	/*
551 	 * Okay, this does not seem to be appropriate
552 	 * for now, however, we need to check if it
553 	 * is really so; aka Router Reachability Probing.
554 	 *
555 	 * Router Reachability Probe MUST be rate-limited
556 	 * to no more than one per minute.
557 	 */
558 	if (!rt || !(rt->rt6i_flags & RTF_GATEWAY))
559 		return;
560 	rcu_read_lock_bh();
561 	neigh = __ipv6_neigh_lookup_noref(rt->dst.dev, &rt->rt6i_gateway);
562 	if (neigh) {
563 		if (neigh->nud_state & NUD_VALID)
564 			goto out;
565 
566 		work = NULL;
567 		write_lock(&neigh->lock);
568 		if (!(neigh->nud_state & NUD_VALID) &&
569 		    time_after(jiffies,
570 			       neigh->updated +
571 			       rt->rt6i_idev->cnf.rtr_probe_interval)) {
572 			work = kmalloc(sizeof(*work), GFP_ATOMIC);
573 			if (work)
574 				__neigh_set_probe_once(neigh);
575 		}
576 		write_unlock(&neigh->lock);
577 	} else {
578 		work = kmalloc(sizeof(*work), GFP_ATOMIC);
579 	}
580 
581 	if (work) {
582 		INIT_WORK(&work->work, rt6_probe_deferred);
583 		work->target = rt->rt6i_gateway;
584 		dev_hold(rt->dst.dev);
585 		work->dev = rt->dst.dev;
586 		schedule_work(&work->work);
587 	}
588 
589 out:
590 	rcu_read_unlock_bh();
591 }
592 #else
593 static inline void rt6_probe(struct rt6_info *rt)
594 {
595 }
596 #endif
597 
598 /*
599  * Default Router Selection (RFC 2461 6.3.6)
600  */
601 static inline int rt6_check_dev(struct rt6_info *rt, int oif)
602 {
603 	struct net_device *dev = rt->dst.dev;
604 	if (!oif || dev->ifindex == oif)
605 		return 2;
606 	if ((dev->flags & IFF_LOOPBACK) &&
607 	    rt->rt6i_idev && rt->rt6i_idev->dev->ifindex == oif)
608 		return 1;
609 	return 0;
610 }
611 
612 static inline enum rt6_nud_state rt6_check_neigh(struct rt6_info *rt)
613 {
614 	struct neighbour *neigh;
615 	enum rt6_nud_state ret = RT6_NUD_FAIL_HARD;
616 
617 	if (rt->rt6i_flags & RTF_NONEXTHOP ||
618 	    !(rt->rt6i_flags & RTF_GATEWAY))
619 		return RT6_NUD_SUCCEED;
620 
621 	rcu_read_lock_bh();
622 	neigh = __ipv6_neigh_lookup_noref(rt->dst.dev, &rt->rt6i_gateway);
623 	if (neigh) {
624 		read_lock(&neigh->lock);
625 		if (neigh->nud_state & NUD_VALID)
626 			ret = RT6_NUD_SUCCEED;
627 #ifdef CONFIG_IPV6_ROUTER_PREF
628 		else if (!(neigh->nud_state & NUD_FAILED))
629 			ret = RT6_NUD_SUCCEED;
630 		else
631 			ret = RT6_NUD_FAIL_PROBE;
632 #endif
633 		read_unlock(&neigh->lock);
634 	} else {
635 		ret = IS_ENABLED(CONFIG_IPV6_ROUTER_PREF) ?
636 		      RT6_NUD_SUCCEED : RT6_NUD_FAIL_DO_RR;
637 	}
638 	rcu_read_unlock_bh();
639 
640 	return ret;
641 }
642 
643 static int rt6_score_route(struct rt6_info *rt, int oif,
644 			   int strict)
645 {
646 	int m;
647 
648 	m = rt6_check_dev(rt, oif);
649 	if (!m && (strict & RT6_LOOKUP_F_IFACE))
650 		return RT6_NUD_FAIL_HARD;
651 #ifdef CONFIG_IPV6_ROUTER_PREF
652 	m |= IPV6_DECODE_PREF(IPV6_EXTRACT_PREF(rt->rt6i_flags)) << 2;
653 #endif
654 	if (strict & RT6_LOOKUP_F_REACHABLE) {
655 		int n = rt6_check_neigh(rt);
656 		if (n < 0)
657 			return n;
658 	}
659 	return m;
660 }
661 
662 static struct rt6_info *find_match(struct rt6_info *rt, int oif, int strict,
663 				   int *mpri, struct rt6_info *match,
664 				   bool *do_rr)
665 {
666 	int m;
667 	bool match_do_rr = false;
668 	struct inet6_dev *idev = rt->rt6i_idev;
669 	struct net_device *dev = rt->dst.dev;
670 
671 	if (dev && !netif_carrier_ok(dev) &&
672 	    idev->cnf.ignore_routes_with_linkdown)
673 		goto out;
674 
675 	if (rt6_check_expired(rt))
676 		goto out;
677 
678 	m = rt6_score_route(rt, oif, strict);
679 	if (m == RT6_NUD_FAIL_DO_RR) {
680 		match_do_rr = true;
681 		m = 0; /* lowest valid score */
682 	} else if (m == RT6_NUD_FAIL_HARD) {
683 		goto out;
684 	}
685 
686 	if (strict & RT6_LOOKUP_F_REACHABLE)
687 		rt6_probe(rt);
688 
689 	/* note that m can be RT6_NUD_FAIL_PROBE at this point */
690 	if (m > *mpri) {
691 		*do_rr = match_do_rr;
692 		*mpri = m;
693 		match = rt;
694 	}
695 out:
696 	return match;
697 }
698 
699 static struct rt6_info *find_rr_leaf(struct fib6_node *fn,
700 				     struct rt6_info *rr_head,
701 				     u32 metric, int oif, int strict,
702 				     bool *do_rr)
703 {
704 	struct rt6_info *rt, *match, *cont;
705 	int mpri = -1;
706 
707 	match = NULL;
708 	cont = NULL;
709 	for (rt = rr_head; rt; rt = rt->dst.rt6_next) {
710 		if (rt->rt6i_metric != metric) {
711 			cont = rt;
712 			break;
713 		}
714 
715 		match = find_match(rt, oif, strict, &mpri, match, do_rr);
716 	}
717 
718 	for (rt = fn->leaf; rt && rt != rr_head; rt = rt->dst.rt6_next) {
719 		if (rt->rt6i_metric != metric) {
720 			cont = rt;
721 			break;
722 		}
723 
724 		match = find_match(rt, oif, strict, &mpri, match, do_rr);
725 	}
726 
727 	if (match || !cont)
728 		return match;
729 
730 	for (rt = cont; rt; rt = rt->dst.rt6_next)
731 		match = find_match(rt, oif, strict, &mpri, match, do_rr);
732 
733 	return match;
734 }
735 
736 static struct rt6_info *rt6_select(struct fib6_node *fn, int oif, int strict)
737 {
738 	struct rt6_info *match, *rt0;
739 	struct net *net;
740 	bool do_rr = false;
741 
742 	rt0 = fn->rr_ptr;
743 	if (!rt0)
744 		fn->rr_ptr = rt0 = fn->leaf;
745 
746 	match = find_rr_leaf(fn, rt0, rt0->rt6i_metric, oif, strict,
747 			     &do_rr);
748 
749 	if (do_rr) {
750 		struct rt6_info *next = rt0->dst.rt6_next;
751 
752 		/* no entries matched; do round-robin */
753 		if (!next || next->rt6i_metric != rt0->rt6i_metric)
754 			next = fn->leaf;
755 
756 		if (next != rt0)
757 			fn->rr_ptr = next;
758 	}
759 
760 	net = dev_net(rt0->dst.dev);
761 	return match ? match : net->ipv6.ip6_null_entry;
762 }
763 
764 static bool rt6_is_gw_or_nonexthop(const struct rt6_info *rt)
765 {
766 	return (rt->rt6i_flags & (RTF_NONEXTHOP | RTF_GATEWAY));
767 }
768 
769 #ifdef CONFIG_IPV6_ROUTE_INFO
770 int rt6_route_rcv(struct net_device *dev, u8 *opt, int len,
771 		  const struct in6_addr *gwaddr)
772 {
773 	struct net *net = dev_net(dev);
774 	struct route_info *rinfo = (struct route_info *) opt;
775 	struct in6_addr prefix_buf, *prefix;
776 	unsigned int pref;
777 	unsigned long lifetime;
778 	struct rt6_info *rt;
779 
780 	if (len < sizeof(struct route_info)) {
781 		return -EINVAL;
782 	}
783 
784 	/* Sanity check for prefix_len and length */
785 	if (rinfo->length > 3) {
786 		return -EINVAL;
787 	} else if (rinfo->prefix_len > 128) {
788 		return -EINVAL;
789 	} else if (rinfo->prefix_len > 64) {
790 		if (rinfo->length < 2) {
791 			return -EINVAL;
792 		}
793 	} else if (rinfo->prefix_len > 0) {
794 		if (rinfo->length < 1) {
795 			return -EINVAL;
796 		}
797 	}
798 
799 	pref = rinfo->route_pref;
800 	if (pref == ICMPV6_ROUTER_PREF_INVALID)
801 		return -EINVAL;
802 
803 	lifetime = addrconf_timeout_fixup(ntohl(rinfo->lifetime), HZ);
804 
805 	if (rinfo->length == 3)
806 		prefix = (struct in6_addr *)rinfo->prefix;
807 	else {
808 		/* this function is safe */
809 		ipv6_addr_prefix(&prefix_buf,
810 				 (struct in6_addr *)rinfo->prefix,
811 				 rinfo->prefix_len);
812 		prefix = &prefix_buf;
813 	}
814 
815 	if (rinfo->prefix_len == 0)
816 		rt = rt6_get_dflt_router(gwaddr, dev);
817 	else
818 		rt = rt6_get_route_info(net, prefix, rinfo->prefix_len,
819 					gwaddr, dev->ifindex);
820 
821 	if (rt && !lifetime) {
822 		ip6_del_rt(rt);
823 		rt = NULL;
824 	}
825 
826 	if (!rt && lifetime)
827 		rt = rt6_add_route_info(net, prefix, rinfo->prefix_len, gwaddr, dev->ifindex,
828 					pref);
829 	else if (rt)
830 		rt->rt6i_flags = RTF_ROUTEINFO |
831 				 (rt->rt6i_flags & ~RTF_PREF_MASK) | RTF_PREF(pref);
832 
833 	if (rt) {
834 		if (!addrconf_finite_timeout(lifetime))
835 			rt6_clean_expires(rt);
836 		else
837 			rt6_set_expires(rt, jiffies + HZ * lifetime);
838 
839 		ip6_rt_put(rt);
840 	}
841 	return 0;
842 }
843 #endif
844 
845 static struct fib6_node* fib6_backtrack(struct fib6_node *fn,
846 					struct in6_addr *saddr)
847 {
848 	struct fib6_node *pn;
849 	while (1) {
850 		if (fn->fn_flags & RTN_TL_ROOT)
851 			return NULL;
852 		pn = fn->parent;
853 		if (FIB6_SUBTREE(pn) && FIB6_SUBTREE(pn) != fn)
854 			fn = fib6_lookup(FIB6_SUBTREE(pn), NULL, saddr);
855 		else
856 			fn = pn;
857 		if (fn->fn_flags & RTN_RTINFO)
858 			return fn;
859 	}
860 }
861 
862 static struct rt6_info *ip6_pol_route_lookup(struct net *net,
863 					     struct fib6_table *table,
864 					     struct flowi6 *fl6, int flags)
865 {
866 	struct fib6_node *fn;
867 	struct rt6_info *rt;
868 
869 	read_lock_bh(&table->tb6_lock);
870 	fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
871 restart:
872 	rt = fn->leaf;
873 	rt = rt6_device_match(net, rt, &fl6->saddr, fl6->flowi6_oif, flags);
874 	if (rt->rt6i_nsiblings && fl6->flowi6_oif == 0)
875 		rt = rt6_multipath_select(rt, fl6, fl6->flowi6_oif, flags);
876 	if (rt == net->ipv6.ip6_null_entry) {
877 		fn = fib6_backtrack(fn, &fl6->saddr);
878 		if (fn)
879 			goto restart;
880 	}
881 	dst_use(&rt->dst, jiffies);
882 	read_unlock_bh(&table->tb6_lock);
883 	return rt;
884 
885 }
886 
887 struct dst_entry *ip6_route_lookup(struct net *net, struct flowi6 *fl6,
888 				    int flags)
889 {
890 	return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_lookup);
891 }
892 EXPORT_SYMBOL_GPL(ip6_route_lookup);
893 
894 struct rt6_info *rt6_lookup(struct net *net, const struct in6_addr *daddr,
895 			    const struct in6_addr *saddr, int oif, int strict)
896 {
897 	struct flowi6 fl6 = {
898 		.flowi6_oif = oif,
899 		.daddr = *daddr,
900 	};
901 	struct dst_entry *dst;
902 	int flags = strict ? RT6_LOOKUP_F_IFACE : 0;
903 
904 	if (saddr) {
905 		memcpy(&fl6.saddr, saddr, sizeof(*saddr));
906 		flags |= RT6_LOOKUP_F_HAS_SADDR;
907 	}
908 
909 	dst = fib6_rule_lookup(net, &fl6, flags, ip6_pol_route_lookup);
910 	if (dst->error == 0)
911 		return (struct rt6_info *) dst;
912 
913 	dst_release(dst);
914 
915 	return NULL;
916 }
917 EXPORT_SYMBOL(rt6_lookup);
918 
919 /* ip6_ins_rt is called with FREE table->tb6_lock.
920    It takes new route entry, the addition fails by any reason the
921    route is freed. In any case, if caller does not hold it, it may
922    be destroyed.
923  */
924 
925 static int __ip6_ins_rt(struct rt6_info *rt, struct nl_info *info,
926 			struct mx6_config *mxc)
927 {
928 	int err;
929 	struct fib6_table *table;
930 
931 	table = rt->rt6i_table;
932 	write_lock_bh(&table->tb6_lock);
933 	err = fib6_add(&table->tb6_root, rt, info, mxc);
934 	write_unlock_bh(&table->tb6_lock);
935 
936 	return err;
937 }
938 
939 int ip6_ins_rt(struct rt6_info *rt)
940 {
941 	struct nl_info info = {	.nl_net = dev_net(rt->dst.dev), };
942 	struct mx6_config mxc = { .mx = NULL, };
943 
944 	return __ip6_ins_rt(rt, &info, &mxc);
945 }
946 
947 static struct rt6_info *ip6_rt_cache_alloc(struct rt6_info *ort,
948 					   const struct in6_addr *daddr,
949 					   const struct in6_addr *saddr)
950 {
951 	struct rt6_info *rt;
952 
953 	/*
954 	 *	Clone the route.
955 	 */
956 
957 	if (ort->rt6i_flags & (RTF_CACHE | RTF_PCPU))
958 		ort = (struct rt6_info *)ort->dst.from;
959 
960 	rt = __ip6_dst_alloc(dev_net(ort->dst.dev), ort->dst.dev, 0);
961 
962 	if (!rt)
963 		return NULL;
964 
965 	ip6_rt_copy_init(rt, ort);
966 	rt->rt6i_flags |= RTF_CACHE;
967 	rt->rt6i_metric = 0;
968 	rt->dst.flags |= DST_HOST;
969 	rt->rt6i_dst.addr = *daddr;
970 	rt->rt6i_dst.plen = 128;
971 
972 	if (!rt6_is_gw_or_nonexthop(ort)) {
973 		if (ort->rt6i_dst.plen != 128 &&
974 		    ipv6_addr_equal(&ort->rt6i_dst.addr, daddr))
975 			rt->rt6i_flags |= RTF_ANYCAST;
976 #ifdef CONFIG_IPV6_SUBTREES
977 		if (rt->rt6i_src.plen && saddr) {
978 			rt->rt6i_src.addr = *saddr;
979 			rt->rt6i_src.plen = 128;
980 		}
981 #endif
982 	}
983 
984 	return rt;
985 }
986 
987 static struct rt6_info *ip6_rt_pcpu_alloc(struct rt6_info *rt)
988 {
989 	struct rt6_info *pcpu_rt;
990 
991 	pcpu_rt = __ip6_dst_alloc(dev_net(rt->dst.dev),
992 				  rt->dst.dev, rt->dst.flags);
993 
994 	if (!pcpu_rt)
995 		return NULL;
996 	ip6_rt_copy_init(pcpu_rt, rt);
997 	pcpu_rt->rt6i_protocol = rt->rt6i_protocol;
998 	pcpu_rt->rt6i_flags |= RTF_PCPU;
999 	return pcpu_rt;
1000 }
1001 
1002 /* It should be called with read_lock_bh(&tb6_lock) acquired */
1003 static struct rt6_info *rt6_get_pcpu_route(struct rt6_info *rt)
1004 {
1005 	struct rt6_info *pcpu_rt, **p;
1006 
1007 	p = this_cpu_ptr(rt->rt6i_pcpu);
1008 	pcpu_rt = *p;
1009 
1010 	if (pcpu_rt) {
1011 		dst_hold(&pcpu_rt->dst);
1012 		rt6_dst_from_metrics_check(pcpu_rt);
1013 	}
1014 	return pcpu_rt;
1015 }
1016 
1017 static struct rt6_info *rt6_make_pcpu_route(struct rt6_info *rt)
1018 {
1019 	struct fib6_table *table = rt->rt6i_table;
1020 	struct rt6_info *pcpu_rt, *prev, **p;
1021 
1022 	pcpu_rt = ip6_rt_pcpu_alloc(rt);
1023 	if (!pcpu_rt) {
1024 		struct net *net = dev_net(rt->dst.dev);
1025 
1026 		dst_hold(&net->ipv6.ip6_null_entry->dst);
1027 		return net->ipv6.ip6_null_entry;
1028 	}
1029 
1030 	read_lock_bh(&table->tb6_lock);
1031 	if (rt->rt6i_pcpu) {
1032 		p = this_cpu_ptr(rt->rt6i_pcpu);
1033 		prev = cmpxchg(p, NULL, pcpu_rt);
1034 		if (prev) {
1035 			/* If someone did it before us, return prev instead */
1036 			dst_destroy(&pcpu_rt->dst);
1037 			pcpu_rt = prev;
1038 		}
1039 	} else {
1040 		/* rt has been removed from the fib6 tree
1041 		 * before we have a chance to acquire the read_lock.
1042 		 * In this case, don't brother to create a pcpu rt
1043 		 * since rt is going away anyway.  The next
1044 		 * dst_check() will trigger a re-lookup.
1045 		 */
1046 		dst_destroy(&pcpu_rt->dst);
1047 		pcpu_rt = rt;
1048 	}
1049 	dst_hold(&pcpu_rt->dst);
1050 	rt6_dst_from_metrics_check(pcpu_rt);
1051 	read_unlock_bh(&table->tb6_lock);
1052 	return pcpu_rt;
1053 }
1054 
1055 static struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table, int oif,
1056 				      struct flowi6 *fl6, int flags)
1057 {
1058 	struct fib6_node *fn, *saved_fn;
1059 	struct rt6_info *rt;
1060 	int strict = 0;
1061 
1062 	strict |= flags & RT6_LOOKUP_F_IFACE;
1063 	if (net->ipv6.devconf_all->forwarding == 0)
1064 		strict |= RT6_LOOKUP_F_REACHABLE;
1065 
1066 	read_lock_bh(&table->tb6_lock);
1067 
1068 	fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
1069 	saved_fn = fn;
1070 
1071 	if (fl6->flowi6_flags & FLOWI_FLAG_SKIP_NH_OIF)
1072 		oif = 0;
1073 
1074 redo_rt6_select:
1075 	rt = rt6_select(fn, oif, strict);
1076 	if (rt->rt6i_nsiblings)
1077 		rt = rt6_multipath_select(rt, fl6, oif, strict);
1078 	if (rt == net->ipv6.ip6_null_entry) {
1079 		fn = fib6_backtrack(fn, &fl6->saddr);
1080 		if (fn)
1081 			goto redo_rt6_select;
1082 		else if (strict & RT6_LOOKUP_F_REACHABLE) {
1083 			/* also consider unreachable route */
1084 			strict &= ~RT6_LOOKUP_F_REACHABLE;
1085 			fn = saved_fn;
1086 			goto redo_rt6_select;
1087 		}
1088 	}
1089 
1090 
1091 	if (rt == net->ipv6.ip6_null_entry || (rt->rt6i_flags & RTF_CACHE)) {
1092 		dst_use(&rt->dst, jiffies);
1093 		read_unlock_bh(&table->tb6_lock);
1094 
1095 		rt6_dst_from_metrics_check(rt);
1096 		return rt;
1097 	} else if (unlikely((fl6->flowi6_flags & FLOWI_FLAG_KNOWN_NH) &&
1098 			    !(rt->rt6i_flags & RTF_GATEWAY))) {
1099 		/* Create a RTF_CACHE clone which will not be
1100 		 * owned by the fib6 tree.  It is for the special case where
1101 		 * the daddr in the skb during the neighbor look-up is different
1102 		 * from the fl6->daddr used to look-up route here.
1103 		 */
1104 
1105 		struct rt6_info *uncached_rt;
1106 
1107 		dst_use(&rt->dst, jiffies);
1108 		read_unlock_bh(&table->tb6_lock);
1109 
1110 		uncached_rt = ip6_rt_cache_alloc(rt, &fl6->daddr, NULL);
1111 		dst_release(&rt->dst);
1112 
1113 		if (uncached_rt)
1114 			rt6_uncached_list_add(uncached_rt);
1115 		else
1116 			uncached_rt = net->ipv6.ip6_null_entry;
1117 
1118 		dst_hold(&uncached_rt->dst);
1119 		return uncached_rt;
1120 
1121 	} else {
1122 		/* Get a percpu copy */
1123 
1124 		struct rt6_info *pcpu_rt;
1125 
1126 		rt->dst.lastuse = jiffies;
1127 		rt->dst.__use++;
1128 		pcpu_rt = rt6_get_pcpu_route(rt);
1129 
1130 		if (pcpu_rt) {
1131 			read_unlock_bh(&table->tb6_lock);
1132 		} else {
1133 			/* We have to do the read_unlock first
1134 			 * because rt6_make_pcpu_route() may trigger
1135 			 * ip6_dst_gc() which will take the write_lock.
1136 			 */
1137 			dst_hold(&rt->dst);
1138 			read_unlock_bh(&table->tb6_lock);
1139 			pcpu_rt = rt6_make_pcpu_route(rt);
1140 			dst_release(&rt->dst);
1141 		}
1142 
1143 		return pcpu_rt;
1144 
1145 	}
1146 }
1147 
1148 static struct rt6_info *ip6_pol_route_input(struct net *net, struct fib6_table *table,
1149 					    struct flowi6 *fl6, int flags)
1150 {
1151 	return ip6_pol_route(net, table, fl6->flowi6_iif, fl6, flags);
1152 }
1153 
1154 static struct dst_entry *ip6_route_input_lookup(struct net *net,
1155 						struct net_device *dev,
1156 						struct flowi6 *fl6, int flags)
1157 {
1158 	if (rt6_need_strict(&fl6->daddr) && dev->type != ARPHRD_PIMREG)
1159 		flags |= RT6_LOOKUP_F_IFACE;
1160 
1161 	return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_input);
1162 }
1163 
1164 void ip6_route_input(struct sk_buff *skb)
1165 {
1166 	const struct ipv6hdr *iph = ipv6_hdr(skb);
1167 	struct net *net = dev_net(skb->dev);
1168 	int flags = RT6_LOOKUP_F_HAS_SADDR;
1169 	struct ip_tunnel_info *tun_info;
1170 	struct flowi6 fl6 = {
1171 		.flowi6_iif = skb->dev->ifindex,
1172 		.daddr = iph->daddr,
1173 		.saddr = iph->saddr,
1174 		.flowlabel = ip6_flowinfo(iph),
1175 		.flowi6_mark = skb->mark,
1176 		.flowi6_proto = iph->nexthdr,
1177 	};
1178 
1179 	tun_info = skb_tunnel_info(skb);
1180 	if (tun_info && !(tun_info->mode & IP_TUNNEL_INFO_TX))
1181 		fl6.flowi6_tun_key.tun_id = tun_info->key.tun_id;
1182 	skb_dst_drop(skb);
1183 	skb_dst_set(skb, ip6_route_input_lookup(net, skb->dev, &fl6, flags));
1184 }
1185 
1186 static struct rt6_info *ip6_pol_route_output(struct net *net, struct fib6_table *table,
1187 					     struct flowi6 *fl6, int flags)
1188 {
1189 	return ip6_pol_route(net, table, fl6->flowi6_oif, fl6, flags);
1190 }
1191 
1192 struct dst_entry *ip6_route_output(struct net *net, const struct sock *sk,
1193 				    struct flowi6 *fl6)
1194 {
1195 	int flags = 0;
1196 	bool any_src;
1197 
1198 	fl6->flowi6_iif = LOOPBACK_IFINDEX;
1199 
1200 	any_src = ipv6_addr_any(&fl6->saddr);
1201 	if ((sk && sk->sk_bound_dev_if) || rt6_need_strict(&fl6->daddr) ||
1202 	    (fl6->flowi6_oif && any_src))
1203 		flags |= RT6_LOOKUP_F_IFACE;
1204 
1205 	if (!any_src)
1206 		flags |= RT6_LOOKUP_F_HAS_SADDR;
1207 	else if (sk)
1208 		flags |= rt6_srcprefs2flags(inet6_sk(sk)->srcprefs);
1209 
1210 	return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_output);
1211 }
1212 EXPORT_SYMBOL(ip6_route_output);
1213 
1214 struct dst_entry *ip6_blackhole_route(struct net *net, struct dst_entry *dst_orig)
1215 {
1216 	struct rt6_info *rt, *ort = (struct rt6_info *) dst_orig;
1217 	struct dst_entry *new = NULL;
1218 
1219 	rt = dst_alloc(&ip6_dst_blackhole_ops, ort->dst.dev, 1, DST_OBSOLETE_NONE, 0);
1220 	if (rt) {
1221 		rt6_info_init(rt);
1222 
1223 		new = &rt->dst;
1224 		new->__use = 1;
1225 		new->input = dst_discard;
1226 		new->output = dst_discard_sk;
1227 
1228 		dst_copy_metrics(new, &ort->dst);
1229 		rt->rt6i_idev = ort->rt6i_idev;
1230 		if (rt->rt6i_idev)
1231 			in6_dev_hold(rt->rt6i_idev);
1232 
1233 		rt->rt6i_gateway = ort->rt6i_gateway;
1234 		rt->rt6i_flags = ort->rt6i_flags & ~RTF_PCPU;
1235 		rt->rt6i_metric = 0;
1236 
1237 		memcpy(&rt->rt6i_dst, &ort->rt6i_dst, sizeof(struct rt6key));
1238 #ifdef CONFIG_IPV6_SUBTREES
1239 		memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
1240 #endif
1241 
1242 		dst_free(new);
1243 	}
1244 
1245 	dst_release(dst_orig);
1246 	return new ? new : ERR_PTR(-ENOMEM);
1247 }
1248 
1249 /*
1250  *	Destination cache support functions
1251  */
1252 
1253 static void rt6_dst_from_metrics_check(struct rt6_info *rt)
1254 {
1255 	if (rt->dst.from &&
1256 	    dst_metrics_ptr(&rt->dst) != dst_metrics_ptr(rt->dst.from))
1257 		dst_init_metrics(&rt->dst, dst_metrics_ptr(rt->dst.from), true);
1258 }
1259 
1260 static struct dst_entry *rt6_check(struct rt6_info *rt, u32 cookie)
1261 {
1262 	if (!rt->rt6i_node || (rt->rt6i_node->fn_sernum != cookie))
1263 		return NULL;
1264 
1265 	if (rt6_check_expired(rt))
1266 		return NULL;
1267 
1268 	return &rt->dst;
1269 }
1270 
1271 static struct dst_entry *rt6_dst_from_check(struct rt6_info *rt, u32 cookie)
1272 {
1273 	if (rt->dst.obsolete == DST_OBSOLETE_FORCE_CHK &&
1274 	    rt6_check((struct rt6_info *)(rt->dst.from), cookie))
1275 		return &rt->dst;
1276 	else
1277 		return NULL;
1278 }
1279 
1280 static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie)
1281 {
1282 	struct rt6_info *rt;
1283 
1284 	rt = (struct rt6_info *) dst;
1285 
1286 	/* All IPV6 dsts are created with ->obsolete set to the value
1287 	 * DST_OBSOLETE_FORCE_CHK which forces validation calls down
1288 	 * into this function always.
1289 	 */
1290 
1291 	rt6_dst_from_metrics_check(rt);
1292 
1293 	if ((rt->rt6i_flags & RTF_PCPU) || unlikely(dst->flags & DST_NOCACHE))
1294 		return rt6_dst_from_check(rt, cookie);
1295 	else
1296 		return rt6_check(rt, cookie);
1297 }
1298 
1299 static struct dst_entry *ip6_negative_advice(struct dst_entry *dst)
1300 {
1301 	struct rt6_info *rt = (struct rt6_info *) dst;
1302 
1303 	if (rt) {
1304 		if (rt->rt6i_flags & RTF_CACHE) {
1305 			if (rt6_check_expired(rt)) {
1306 				ip6_del_rt(rt);
1307 				dst = NULL;
1308 			}
1309 		} else {
1310 			dst_release(dst);
1311 			dst = NULL;
1312 		}
1313 	}
1314 	return dst;
1315 }
1316 
1317 static void ip6_link_failure(struct sk_buff *skb)
1318 {
1319 	struct rt6_info *rt;
1320 
1321 	icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_ADDR_UNREACH, 0);
1322 
1323 	rt = (struct rt6_info *) skb_dst(skb);
1324 	if (rt) {
1325 		if (rt->rt6i_flags & RTF_CACHE) {
1326 			dst_hold(&rt->dst);
1327 			ip6_del_rt(rt);
1328 		} else if (rt->rt6i_node && (rt->rt6i_flags & RTF_DEFAULT)) {
1329 			rt->rt6i_node->fn_sernum = -1;
1330 		}
1331 	}
1332 }
1333 
1334 static void rt6_do_update_pmtu(struct rt6_info *rt, u32 mtu)
1335 {
1336 	struct net *net = dev_net(rt->dst.dev);
1337 
1338 	rt->rt6i_flags |= RTF_MODIFIED;
1339 	rt->rt6i_pmtu = mtu;
1340 	rt6_update_expires(rt, net->ipv6.sysctl.ip6_rt_mtu_expires);
1341 }
1342 
1343 static void __ip6_rt_update_pmtu(struct dst_entry *dst, const struct sock *sk,
1344 				 const struct ipv6hdr *iph, u32 mtu)
1345 {
1346 	struct rt6_info *rt6 = (struct rt6_info *)dst;
1347 
1348 	if (rt6->rt6i_flags & RTF_LOCAL)
1349 		return;
1350 
1351 	dst_confirm(dst);
1352 	mtu = max_t(u32, mtu, IPV6_MIN_MTU);
1353 	if (mtu >= dst_mtu(dst))
1354 		return;
1355 
1356 	if (rt6->rt6i_flags & RTF_CACHE) {
1357 		rt6_do_update_pmtu(rt6, mtu);
1358 	} else {
1359 		const struct in6_addr *daddr, *saddr;
1360 		struct rt6_info *nrt6;
1361 
1362 		if (iph) {
1363 			daddr = &iph->daddr;
1364 			saddr = &iph->saddr;
1365 		} else if (sk) {
1366 			daddr = &sk->sk_v6_daddr;
1367 			saddr = &inet6_sk(sk)->saddr;
1368 		} else {
1369 			return;
1370 		}
1371 		nrt6 = ip6_rt_cache_alloc(rt6, daddr, saddr);
1372 		if (nrt6) {
1373 			rt6_do_update_pmtu(nrt6, mtu);
1374 
1375 			/* ip6_ins_rt(nrt6) will bump the
1376 			 * rt6->rt6i_node->fn_sernum
1377 			 * which will fail the next rt6_check() and
1378 			 * invalidate the sk->sk_dst_cache.
1379 			 */
1380 			ip6_ins_rt(nrt6);
1381 		}
1382 	}
1383 }
1384 
1385 static void ip6_rt_update_pmtu(struct dst_entry *dst, struct sock *sk,
1386 			       struct sk_buff *skb, u32 mtu)
1387 {
1388 	__ip6_rt_update_pmtu(dst, sk, skb ? ipv6_hdr(skb) : NULL, mtu);
1389 }
1390 
1391 void ip6_update_pmtu(struct sk_buff *skb, struct net *net, __be32 mtu,
1392 		     int oif, u32 mark)
1393 {
1394 	const struct ipv6hdr *iph = (struct ipv6hdr *) skb->data;
1395 	struct dst_entry *dst;
1396 	struct flowi6 fl6;
1397 
1398 	memset(&fl6, 0, sizeof(fl6));
1399 	fl6.flowi6_oif = oif;
1400 	fl6.flowi6_mark = mark ? mark : IP6_REPLY_MARK(net, skb->mark);
1401 	fl6.daddr = iph->daddr;
1402 	fl6.saddr = iph->saddr;
1403 	fl6.flowlabel = ip6_flowinfo(iph);
1404 
1405 	dst = ip6_route_output(net, NULL, &fl6);
1406 	if (!dst->error)
1407 		__ip6_rt_update_pmtu(dst, NULL, iph, ntohl(mtu));
1408 	dst_release(dst);
1409 }
1410 EXPORT_SYMBOL_GPL(ip6_update_pmtu);
1411 
1412 void ip6_sk_update_pmtu(struct sk_buff *skb, struct sock *sk, __be32 mtu)
1413 {
1414 	ip6_update_pmtu(skb, sock_net(sk), mtu,
1415 			sk->sk_bound_dev_if, sk->sk_mark);
1416 }
1417 EXPORT_SYMBOL_GPL(ip6_sk_update_pmtu);
1418 
1419 /* Handle redirects */
1420 struct ip6rd_flowi {
1421 	struct flowi6 fl6;
1422 	struct in6_addr gateway;
1423 };
1424 
1425 static struct rt6_info *__ip6_route_redirect(struct net *net,
1426 					     struct fib6_table *table,
1427 					     struct flowi6 *fl6,
1428 					     int flags)
1429 {
1430 	struct ip6rd_flowi *rdfl = (struct ip6rd_flowi *)fl6;
1431 	struct rt6_info *rt;
1432 	struct fib6_node *fn;
1433 
1434 	/* Get the "current" route for this destination and
1435 	 * check if the redirect has come from approriate router.
1436 	 *
1437 	 * RFC 4861 specifies that redirects should only be
1438 	 * accepted if they come from the nexthop to the target.
1439 	 * Due to the way the routes are chosen, this notion
1440 	 * is a bit fuzzy and one might need to check all possible
1441 	 * routes.
1442 	 */
1443 
1444 	read_lock_bh(&table->tb6_lock);
1445 	fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
1446 restart:
1447 	for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
1448 		if (rt6_check_expired(rt))
1449 			continue;
1450 		if (rt->dst.error)
1451 			break;
1452 		if (!(rt->rt6i_flags & RTF_GATEWAY))
1453 			continue;
1454 		if (fl6->flowi6_oif != rt->dst.dev->ifindex)
1455 			continue;
1456 		if (!ipv6_addr_equal(&rdfl->gateway, &rt->rt6i_gateway))
1457 			continue;
1458 		break;
1459 	}
1460 
1461 	if (!rt)
1462 		rt = net->ipv6.ip6_null_entry;
1463 	else if (rt->dst.error) {
1464 		rt = net->ipv6.ip6_null_entry;
1465 		goto out;
1466 	}
1467 
1468 	if (rt == net->ipv6.ip6_null_entry) {
1469 		fn = fib6_backtrack(fn, &fl6->saddr);
1470 		if (fn)
1471 			goto restart;
1472 	}
1473 
1474 out:
1475 	dst_hold(&rt->dst);
1476 
1477 	read_unlock_bh(&table->tb6_lock);
1478 
1479 	return rt;
1480 };
1481 
1482 static struct dst_entry *ip6_route_redirect(struct net *net,
1483 					const struct flowi6 *fl6,
1484 					const struct in6_addr *gateway)
1485 {
1486 	int flags = RT6_LOOKUP_F_HAS_SADDR;
1487 	struct ip6rd_flowi rdfl;
1488 
1489 	rdfl.fl6 = *fl6;
1490 	rdfl.gateway = *gateway;
1491 
1492 	return fib6_rule_lookup(net, &rdfl.fl6,
1493 				flags, __ip6_route_redirect);
1494 }
1495 
1496 void ip6_redirect(struct sk_buff *skb, struct net *net, int oif, u32 mark)
1497 {
1498 	const struct ipv6hdr *iph = (struct ipv6hdr *) skb->data;
1499 	struct dst_entry *dst;
1500 	struct flowi6 fl6;
1501 
1502 	memset(&fl6, 0, sizeof(fl6));
1503 	fl6.flowi6_iif = LOOPBACK_IFINDEX;
1504 	fl6.flowi6_oif = oif;
1505 	fl6.flowi6_mark = mark;
1506 	fl6.daddr = iph->daddr;
1507 	fl6.saddr = iph->saddr;
1508 	fl6.flowlabel = ip6_flowinfo(iph);
1509 
1510 	dst = ip6_route_redirect(net, &fl6, &ipv6_hdr(skb)->saddr);
1511 	rt6_do_redirect(dst, NULL, skb);
1512 	dst_release(dst);
1513 }
1514 EXPORT_SYMBOL_GPL(ip6_redirect);
1515 
1516 void ip6_redirect_no_header(struct sk_buff *skb, struct net *net, int oif,
1517 			    u32 mark)
1518 {
1519 	const struct ipv6hdr *iph = ipv6_hdr(skb);
1520 	const struct rd_msg *msg = (struct rd_msg *)icmp6_hdr(skb);
1521 	struct dst_entry *dst;
1522 	struct flowi6 fl6;
1523 
1524 	memset(&fl6, 0, sizeof(fl6));
1525 	fl6.flowi6_iif = LOOPBACK_IFINDEX;
1526 	fl6.flowi6_oif = oif;
1527 	fl6.flowi6_mark = mark;
1528 	fl6.daddr = msg->dest;
1529 	fl6.saddr = iph->daddr;
1530 
1531 	dst = ip6_route_redirect(net, &fl6, &iph->saddr);
1532 	rt6_do_redirect(dst, NULL, skb);
1533 	dst_release(dst);
1534 }
1535 
1536 void ip6_sk_redirect(struct sk_buff *skb, struct sock *sk)
1537 {
1538 	ip6_redirect(skb, sock_net(sk), sk->sk_bound_dev_if, sk->sk_mark);
1539 }
1540 EXPORT_SYMBOL_GPL(ip6_sk_redirect);
1541 
1542 static unsigned int ip6_default_advmss(const struct dst_entry *dst)
1543 {
1544 	struct net_device *dev = dst->dev;
1545 	unsigned int mtu = dst_mtu(dst);
1546 	struct net *net = dev_net(dev);
1547 
1548 	mtu -= sizeof(struct ipv6hdr) + sizeof(struct tcphdr);
1549 
1550 	if (mtu < net->ipv6.sysctl.ip6_rt_min_advmss)
1551 		mtu = net->ipv6.sysctl.ip6_rt_min_advmss;
1552 
1553 	/*
1554 	 * Maximal non-jumbo IPv6 payload is IPV6_MAXPLEN and
1555 	 * corresponding MSS is IPV6_MAXPLEN - tcp_header_size.
1556 	 * IPV6_MAXPLEN is also valid and means: "any MSS,
1557 	 * rely only on pmtu discovery"
1558 	 */
1559 	if (mtu > IPV6_MAXPLEN - sizeof(struct tcphdr))
1560 		mtu = IPV6_MAXPLEN;
1561 	return mtu;
1562 }
1563 
1564 static unsigned int ip6_mtu(const struct dst_entry *dst)
1565 {
1566 	const struct rt6_info *rt = (const struct rt6_info *)dst;
1567 	unsigned int mtu = rt->rt6i_pmtu;
1568 	struct inet6_dev *idev;
1569 
1570 	if (mtu)
1571 		goto out;
1572 
1573 	mtu = dst_metric_raw(dst, RTAX_MTU);
1574 	if (mtu)
1575 		goto out;
1576 
1577 	mtu = IPV6_MIN_MTU;
1578 
1579 	rcu_read_lock();
1580 	idev = __in6_dev_get(dst->dev);
1581 	if (idev)
1582 		mtu = idev->cnf.mtu6;
1583 	rcu_read_unlock();
1584 
1585 out:
1586 	return min_t(unsigned int, mtu, IP6_MAX_MTU);
1587 }
1588 
1589 static struct dst_entry *icmp6_dst_gc_list;
1590 static DEFINE_SPINLOCK(icmp6_dst_lock);
1591 
1592 struct dst_entry *icmp6_dst_alloc(struct net_device *dev,
1593 				  struct flowi6 *fl6)
1594 {
1595 	struct dst_entry *dst;
1596 	struct rt6_info *rt;
1597 	struct inet6_dev *idev = in6_dev_get(dev);
1598 	struct net *net = dev_net(dev);
1599 
1600 	if (unlikely(!idev))
1601 		return ERR_PTR(-ENODEV);
1602 
1603 	rt = ip6_dst_alloc(net, dev, 0);
1604 	if (unlikely(!rt)) {
1605 		in6_dev_put(idev);
1606 		dst = ERR_PTR(-ENOMEM);
1607 		goto out;
1608 	}
1609 
1610 	rt->dst.flags |= DST_HOST;
1611 	rt->dst.output  = ip6_output;
1612 	atomic_set(&rt->dst.__refcnt, 1);
1613 	rt->rt6i_gateway  = fl6->daddr;
1614 	rt->rt6i_dst.addr = fl6->daddr;
1615 	rt->rt6i_dst.plen = 128;
1616 	rt->rt6i_idev     = idev;
1617 	dst_metric_set(&rt->dst, RTAX_HOPLIMIT, 0);
1618 
1619 	spin_lock_bh(&icmp6_dst_lock);
1620 	rt->dst.next = icmp6_dst_gc_list;
1621 	icmp6_dst_gc_list = &rt->dst;
1622 	spin_unlock_bh(&icmp6_dst_lock);
1623 
1624 	fib6_force_start_gc(net);
1625 
1626 	dst = xfrm_lookup(net, &rt->dst, flowi6_to_flowi(fl6), NULL, 0);
1627 
1628 out:
1629 	return dst;
1630 }
1631 
1632 int icmp6_dst_gc(void)
1633 {
1634 	struct dst_entry *dst, **pprev;
1635 	int more = 0;
1636 
1637 	spin_lock_bh(&icmp6_dst_lock);
1638 	pprev = &icmp6_dst_gc_list;
1639 
1640 	while ((dst = *pprev) != NULL) {
1641 		if (!atomic_read(&dst->__refcnt)) {
1642 			*pprev = dst->next;
1643 			dst_free(dst);
1644 		} else {
1645 			pprev = &dst->next;
1646 			++more;
1647 		}
1648 	}
1649 
1650 	spin_unlock_bh(&icmp6_dst_lock);
1651 
1652 	return more;
1653 }
1654 
1655 static void icmp6_clean_all(int (*func)(struct rt6_info *rt, void *arg),
1656 			    void *arg)
1657 {
1658 	struct dst_entry *dst, **pprev;
1659 
1660 	spin_lock_bh(&icmp6_dst_lock);
1661 	pprev = &icmp6_dst_gc_list;
1662 	while ((dst = *pprev) != NULL) {
1663 		struct rt6_info *rt = (struct rt6_info *) dst;
1664 		if (func(rt, arg)) {
1665 			*pprev = dst->next;
1666 			dst_free(dst);
1667 		} else {
1668 			pprev = &dst->next;
1669 		}
1670 	}
1671 	spin_unlock_bh(&icmp6_dst_lock);
1672 }
1673 
1674 static int ip6_dst_gc(struct dst_ops *ops)
1675 {
1676 	struct net *net = container_of(ops, struct net, ipv6.ip6_dst_ops);
1677 	int rt_min_interval = net->ipv6.sysctl.ip6_rt_gc_min_interval;
1678 	int rt_max_size = net->ipv6.sysctl.ip6_rt_max_size;
1679 	int rt_elasticity = net->ipv6.sysctl.ip6_rt_gc_elasticity;
1680 	int rt_gc_timeout = net->ipv6.sysctl.ip6_rt_gc_timeout;
1681 	unsigned long rt_last_gc = net->ipv6.ip6_rt_last_gc;
1682 	int entries;
1683 
1684 	entries = dst_entries_get_fast(ops);
1685 	if (time_after(rt_last_gc + rt_min_interval, jiffies) &&
1686 	    entries <= rt_max_size)
1687 		goto out;
1688 
1689 	net->ipv6.ip6_rt_gc_expire++;
1690 	fib6_run_gc(net->ipv6.ip6_rt_gc_expire, net, true);
1691 	entries = dst_entries_get_slow(ops);
1692 	if (entries < ops->gc_thresh)
1693 		net->ipv6.ip6_rt_gc_expire = rt_gc_timeout>>1;
1694 out:
1695 	net->ipv6.ip6_rt_gc_expire -= net->ipv6.ip6_rt_gc_expire>>rt_elasticity;
1696 	return entries > rt_max_size;
1697 }
1698 
1699 static int ip6_convert_metrics(struct mx6_config *mxc,
1700 			       const struct fib6_config *cfg)
1701 {
1702 	bool ecn_ca = false;
1703 	struct nlattr *nla;
1704 	int remaining;
1705 	u32 *mp;
1706 
1707 	if (!cfg->fc_mx)
1708 		return 0;
1709 
1710 	mp = kzalloc(sizeof(u32) * RTAX_MAX, GFP_KERNEL);
1711 	if (unlikely(!mp))
1712 		return -ENOMEM;
1713 
1714 	nla_for_each_attr(nla, cfg->fc_mx, cfg->fc_mx_len, remaining) {
1715 		int type = nla_type(nla);
1716 		u32 val;
1717 
1718 		if (!type)
1719 			continue;
1720 		if (unlikely(type > RTAX_MAX))
1721 			goto err;
1722 
1723 		if (type == RTAX_CC_ALGO) {
1724 			char tmp[TCP_CA_NAME_MAX];
1725 
1726 			nla_strlcpy(tmp, nla, sizeof(tmp));
1727 			val = tcp_ca_get_key_by_name(tmp, &ecn_ca);
1728 			if (val == TCP_CA_UNSPEC)
1729 				goto err;
1730 		} else {
1731 			val = nla_get_u32(nla);
1732 		}
1733 		if (type == RTAX_FEATURES && (val & ~RTAX_FEATURE_MASK))
1734 			goto err;
1735 
1736 		mp[type - 1] = val;
1737 		__set_bit(type - 1, mxc->mx_valid);
1738 	}
1739 
1740 	if (ecn_ca) {
1741 		__set_bit(RTAX_FEATURES - 1, mxc->mx_valid);
1742 		mp[RTAX_FEATURES - 1] |= DST_FEATURE_ECN_CA;
1743 	}
1744 
1745 	mxc->mx = mp;
1746 	return 0;
1747  err:
1748 	kfree(mp);
1749 	return -EINVAL;
1750 }
1751 
1752 int ip6_route_info_create(struct fib6_config *cfg, struct rt6_info **rt_ret)
1753 {
1754 	int err;
1755 	struct net *net = cfg->fc_nlinfo.nl_net;
1756 	struct rt6_info *rt = NULL;
1757 	struct net_device *dev = NULL;
1758 	struct inet6_dev *idev = NULL;
1759 	struct fib6_table *table;
1760 	int addr_type;
1761 
1762 	if (cfg->fc_dst_len > 128 || cfg->fc_src_len > 128)
1763 		return -EINVAL;
1764 #ifndef CONFIG_IPV6_SUBTREES
1765 	if (cfg->fc_src_len)
1766 		return -EINVAL;
1767 #endif
1768 	if (cfg->fc_ifindex) {
1769 		err = -ENODEV;
1770 		dev = dev_get_by_index(net, cfg->fc_ifindex);
1771 		if (!dev)
1772 			goto out;
1773 		idev = in6_dev_get(dev);
1774 		if (!idev)
1775 			goto out;
1776 	}
1777 
1778 	if (cfg->fc_metric == 0)
1779 		cfg->fc_metric = IP6_RT_PRIO_USER;
1780 
1781 	err = -ENOBUFS;
1782 	if (cfg->fc_nlinfo.nlh &&
1783 	    !(cfg->fc_nlinfo.nlh->nlmsg_flags & NLM_F_CREATE)) {
1784 		table = fib6_get_table(net, cfg->fc_table);
1785 		if (!table) {
1786 			pr_warn("NLM_F_CREATE should be specified when creating new route\n");
1787 			table = fib6_new_table(net, cfg->fc_table);
1788 		}
1789 	} else {
1790 		table = fib6_new_table(net, cfg->fc_table);
1791 	}
1792 
1793 	if (!table)
1794 		goto out;
1795 
1796 	rt = ip6_dst_alloc(net, NULL,
1797 			   (cfg->fc_flags & RTF_ADDRCONF) ? 0 : DST_NOCOUNT);
1798 
1799 	if (!rt) {
1800 		err = -ENOMEM;
1801 		goto out;
1802 	}
1803 
1804 	if (cfg->fc_flags & RTF_EXPIRES)
1805 		rt6_set_expires(rt, jiffies +
1806 				clock_t_to_jiffies(cfg->fc_expires));
1807 	else
1808 		rt6_clean_expires(rt);
1809 
1810 	if (cfg->fc_protocol == RTPROT_UNSPEC)
1811 		cfg->fc_protocol = RTPROT_BOOT;
1812 	rt->rt6i_protocol = cfg->fc_protocol;
1813 
1814 	addr_type = ipv6_addr_type(&cfg->fc_dst);
1815 
1816 	if (addr_type & IPV6_ADDR_MULTICAST)
1817 		rt->dst.input = ip6_mc_input;
1818 	else if (cfg->fc_flags & RTF_LOCAL)
1819 		rt->dst.input = ip6_input;
1820 	else
1821 		rt->dst.input = ip6_forward;
1822 
1823 	rt->dst.output = ip6_output;
1824 
1825 	if (cfg->fc_encap) {
1826 		struct lwtunnel_state *lwtstate;
1827 
1828 		err = lwtunnel_build_state(dev, cfg->fc_encap_type,
1829 					   cfg->fc_encap, AF_INET6, cfg,
1830 					   &lwtstate);
1831 		if (err)
1832 			goto out;
1833 		rt->dst.lwtstate = lwtstate_get(lwtstate);
1834 		if (lwtunnel_output_redirect(rt->dst.lwtstate)) {
1835 			rt->dst.lwtstate->orig_output = rt->dst.output;
1836 			rt->dst.output = lwtunnel_output;
1837 		}
1838 		if (lwtunnel_input_redirect(rt->dst.lwtstate)) {
1839 			rt->dst.lwtstate->orig_input = rt->dst.input;
1840 			rt->dst.input = lwtunnel_input;
1841 		}
1842 	}
1843 
1844 	ipv6_addr_prefix(&rt->rt6i_dst.addr, &cfg->fc_dst, cfg->fc_dst_len);
1845 	rt->rt6i_dst.plen = cfg->fc_dst_len;
1846 	if (rt->rt6i_dst.plen == 128)
1847 		rt->dst.flags |= DST_HOST;
1848 
1849 #ifdef CONFIG_IPV6_SUBTREES
1850 	ipv6_addr_prefix(&rt->rt6i_src.addr, &cfg->fc_src, cfg->fc_src_len);
1851 	rt->rt6i_src.plen = cfg->fc_src_len;
1852 #endif
1853 
1854 	rt->rt6i_metric = cfg->fc_metric;
1855 
1856 	/* We cannot add true routes via loopback here,
1857 	   they would result in kernel looping; promote them to reject routes
1858 	 */
1859 	if ((cfg->fc_flags & RTF_REJECT) ||
1860 	    (dev && (dev->flags & IFF_LOOPBACK) &&
1861 	     !(addr_type & IPV6_ADDR_LOOPBACK) &&
1862 	     !(cfg->fc_flags & RTF_LOCAL))) {
1863 		/* hold loopback dev/idev if we haven't done so. */
1864 		if (dev != net->loopback_dev) {
1865 			if (dev) {
1866 				dev_put(dev);
1867 				in6_dev_put(idev);
1868 			}
1869 			dev = net->loopback_dev;
1870 			dev_hold(dev);
1871 			idev = in6_dev_get(dev);
1872 			if (!idev) {
1873 				err = -ENODEV;
1874 				goto out;
1875 			}
1876 		}
1877 		rt->rt6i_flags = RTF_REJECT|RTF_NONEXTHOP;
1878 		switch (cfg->fc_type) {
1879 		case RTN_BLACKHOLE:
1880 			rt->dst.error = -EINVAL;
1881 			rt->dst.output = dst_discard_sk;
1882 			rt->dst.input = dst_discard;
1883 			break;
1884 		case RTN_PROHIBIT:
1885 			rt->dst.error = -EACCES;
1886 			rt->dst.output = ip6_pkt_prohibit_out;
1887 			rt->dst.input = ip6_pkt_prohibit;
1888 			break;
1889 		case RTN_THROW:
1890 		case RTN_UNREACHABLE:
1891 		default:
1892 			rt->dst.error = (cfg->fc_type == RTN_THROW) ? -EAGAIN
1893 					: (cfg->fc_type == RTN_UNREACHABLE)
1894 					? -EHOSTUNREACH : -ENETUNREACH;
1895 			rt->dst.output = ip6_pkt_discard_out;
1896 			rt->dst.input = ip6_pkt_discard;
1897 			break;
1898 		}
1899 		goto install_route;
1900 	}
1901 
1902 	if (cfg->fc_flags & RTF_GATEWAY) {
1903 		const struct in6_addr *gw_addr;
1904 		int gwa_type;
1905 
1906 		gw_addr = &cfg->fc_gateway;
1907 		gwa_type = ipv6_addr_type(gw_addr);
1908 
1909 		/* if gw_addr is local we will fail to detect this in case
1910 		 * address is still TENTATIVE (DAD in progress). rt6_lookup()
1911 		 * will return already-added prefix route via interface that
1912 		 * prefix route was assigned to, which might be non-loopback.
1913 		 */
1914 		err = -EINVAL;
1915 		if (ipv6_chk_addr_and_flags(net, gw_addr,
1916 					    gwa_type & IPV6_ADDR_LINKLOCAL ?
1917 					    dev : NULL, 0, 0))
1918 			goto out;
1919 
1920 		rt->rt6i_gateway = *gw_addr;
1921 
1922 		if (gwa_type != (IPV6_ADDR_LINKLOCAL|IPV6_ADDR_UNICAST)) {
1923 			struct rt6_info *grt;
1924 
1925 			/* IPv6 strictly inhibits using not link-local
1926 			   addresses as nexthop address.
1927 			   Otherwise, router will not able to send redirects.
1928 			   It is very good, but in some (rare!) circumstances
1929 			   (SIT, PtP, NBMA NOARP links) it is handy to allow
1930 			   some exceptions. --ANK
1931 			 */
1932 			if (!(gwa_type & IPV6_ADDR_UNICAST))
1933 				goto out;
1934 
1935 			grt = rt6_lookup(net, gw_addr, NULL, cfg->fc_ifindex, 1);
1936 
1937 			err = -EHOSTUNREACH;
1938 			if (!grt)
1939 				goto out;
1940 			if (dev) {
1941 				if (dev != grt->dst.dev) {
1942 					ip6_rt_put(grt);
1943 					goto out;
1944 				}
1945 			} else {
1946 				dev = grt->dst.dev;
1947 				idev = grt->rt6i_idev;
1948 				dev_hold(dev);
1949 				in6_dev_hold(grt->rt6i_idev);
1950 			}
1951 			if (!(grt->rt6i_flags & RTF_GATEWAY))
1952 				err = 0;
1953 			ip6_rt_put(grt);
1954 
1955 			if (err)
1956 				goto out;
1957 		}
1958 		err = -EINVAL;
1959 		if (!dev || (dev->flags & IFF_LOOPBACK))
1960 			goto out;
1961 	}
1962 
1963 	err = -ENODEV;
1964 	if (!dev)
1965 		goto out;
1966 
1967 	if (!ipv6_addr_any(&cfg->fc_prefsrc)) {
1968 		if (!ipv6_chk_addr(net, &cfg->fc_prefsrc, dev, 0)) {
1969 			err = -EINVAL;
1970 			goto out;
1971 		}
1972 		rt->rt6i_prefsrc.addr = cfg->fc_prefsrc;
1973 		rt->rt6i_prefsrc.plen = 128;
1974 	} else
1975 		rt->rt6i_prefsrc.plen = 0;
1976 
1977 	rt->rt6i_flags = cfg->fc_flags;
1978 
1979 install_route:
1980 	rt->dst.dev = dev;
1981 	rt->rt6i_idev = idev;
1982 	rt->rt6i_table = table;
1983 
1984 	cfg->fc_nlinfo.nl_net = dev_net(dev);
1985 
1986 	*rt_ret = rt;
1987 
1988 	return 0;
1989 out:
1990 	if (dev)
1991 		dev_put(dev);
1992 	if (idev)
1993 		in6_dev_put(idev);
1994 	if (rt)
1995 		dst_free(&rt->dst);
1996 
1997 	*rt_ret = NULL;
1998 
1999 	return err;
2000 }
2001 
2002 int ip6_route_add(struct fib6_config *cfg)
2003 {
2004 	struct mx6_config mxc = { .mx = NULL, };
2005 	struct rt6_info *rt = NULL;
2006 	int err;
2007 
2008 	err = ip6_route_info_create(cfg, &rt);
2009 	if (err)
2010 		goto out;
2011 
2012 	err = ip6_convert_metrics(&mxc, cfg);
2013 	if (err)
2014 		goto out;
2015 
2016 	err = __ip6_ins_rt(rt, &cfg->fc_nlinfo, &mxc);
2017 
2018 	kfree(mxc.mx);
2019 
2020 	return err;
2021 out:
2022 	if (rt)
2023 		dst_free(&rt->dst);
2024 
2025 	return err;
2026 }
2027 
2028 static int __ip6_del_rt(struct rt6_info *rt, struct nl_info *info)
2029 {
2030 	int err;
2031 	struct fib6_table *table;
2032 	struct net *net = dev_net(rt->dst.dev);
2033 
2034 	if (rt == net->ipv6.ip6_null_entry ||
2035 	    rt->dst.flags & DST_NOCACHE) {
2036 		err = -ENOENT;
2037 		goto out;
2038 	}
2039 
2040 	table = rt->rt6i_table;
2041 	write_lock_bh(&table->tb6_lock);
2042 	err = fib6_del(rt, info);
2043 	write_unlock_bh(&table->tb6_lock);
2044 
2045 out:
2046 	ip6_rt_put(rt);
2047 	return err;
2048 }
2049 
2050 int ip6_del_rt(struct rt6_info *rt)
2051 {
2052 	struct nl_info info = {
2053 		.nl_net = dev_net(rt->dst.dev),
2054 	};
2055 	return __ip6_del_rt(rt, &info);
2056 }
2057 
2058 static int ip6_route_del(struct fib6_config *cfg)
2059 {
2060 	struct fib6_table *table;
2061 	struct fib6_node *fn;
2062 	struct rt6_info *rt;
2063 	int err = -ESRCH;
2064 
2065 	table = fib6_get_table(cfg->fc_nlinfo.nl_net, cfg->fc_table);
2066 	if (!table)
2067 		return err;
2068 
2069 	read_lock_bh(&table->tb6_lock);
2070 
2071 	fn = fib6_locate(&table->tb6_root,
2072 			 &cfg->fc_dst, cfg->fc_dst_len,
2073 			 &cfg->fc_src, cfg->fc_src_len);
2074 
2075 	if (fn) {
2076 		for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
2077 			if ((rt->rt6i_flags & RTF_CACHE) &&
2078 			    !(cfg->fc_flags & RTF_CACHE))
2079 				continue;
2080 			if (cfg->fc_ifindex &&
2081 			    (!rt->dst.dev ||
2082 			     rt->dst.dev->ifindex != cfg->fc_ifindex))
2083 				continue;
2084 			if (cfg->fc_flags & RTF_GATEWAY &&
2085 			    !ipv6_addr_equal(&cfg->fc_gateway, &rt->rt6i_gateway))
2086 				continue;
2087 			if (cfg->fc_metric && cfg->fc_metric != rt->rt6i_metric)
2088 				continue;
2089 			dst_hold(&rt->dst);
2090 			read_unlock_bh(&table->tb6_lock);
2091 
2092 			return __ip6_del_rt(rt, &cfg->fc_nlinfo);
2093 		}
2094 	}
2095 	read_unlock_bh(&table->tb6_lock);
2096 
2097 	return err;
2098 }
2099 
2100 static void rt6_do_redirect(struct dst_entry *dst, struct sock *sk, struct sk_buff *skb)
2101 {
2102 	struct net *net = dev_net(skb->dev);
2103 	struct netevent_redirect netevent;
2104 	struct rt6_info *rt, *nrt = NULL;
2105 	struct ndisc_options ndopts;
2106 	struct inet6_dev *in6_dev;
2107 	struct neighbour *neigh;
2108 	struct rd_msg *msg;
2109 	int optlen, on_link;
2110 	u8 *lladdr;
2111 
2112 	optlen = skb_tail_pointer(skb) - skb_transport_header(skb);
2113 	optlen -= sizeof(*msg);
2114 
2115 	if (optlen < 0) {
2116 		net_dbg_ratelimited("rt6_do_redirect: packet too short\n");
2117 		return;
2118 	}
2119 
2120 	msg = (struct rd_msg *)icmp6_hdr(skb);
2121 
2122 	if (ipv6_addr_is_multicast(&msg->dest)) {
2123 		net_dbg_ratelimited("rt6_do_redirect: destination address is multicast\n");
2124 		return;
2125 	}
2126 
2127 	on_link = 0;
2128 	if (ipv6_addr_equal(&msg->dest, &msg->target)) {
2129 		on_link = 1;
2130 	} else if (ipv6_addr_type(&msg->target) !=
2131 		   (IPV6_ADDR_UNICAST|IPV6_ADDR_LINKLOCAL)) {
2132 		net_dbg_ratelimited("rt6_do_redirect: target address is not link-local unicast\n");
2133 		return;
2134 	}
2135 
2136 	in6_dev = __in6_dev_get(skb->dev);
2137 	if (!in6_dev)
2138 		return;
2139 	if (in6_dev->cnf.forwarding || !in6_dev->cnf.accept_redirects)
2140 		return;
2141 
2142 	/* RFC2461 8.1:
2143 	 *	The IP source address of the Redirect MUST be the same as the current
2144 	 *	first-hop router for the specified ICMP Destination Address.
2145 	 */
2146 
2147 	if (!ndisc_parse_options(msg->opt, optlen, &ndopts)) {
2148 		net_dbg_ratelimited("rt6_redirect: invalid ND options\n");
2149 		return;
2150 	}
2151 
2152 	lladdr = NULL;
2153 	if (ndopts.nd_opts_tgt_lladdr) {
2154 		lladdr = ndisc_opt_addr_data(ndopts.nd_opts_tgt_lladdr,
2155 					     skb->dev);
2156 		if (!lladdr) {
2157 			net_dbg_ratelimited("rt6_redirect: invalid link-layer address length\n");
2158 			return;
2159 		}
2160 	}
2161 
2162 	rt = (struct rt6_info *) dst;
2163 	if (rt == net->ipv6.ip6_null_entry) {
2164 		net_dbg_ratelimited("rt6_redirect: source isn't a valid nexthop for redirect target\n");
2165 		return;
2166 	}
2167 
2168 	/* Redirect received -> path was valid.
2169 	 * Look, redirects are sent only in response to data packets,
2170 	 * so that this nexthop apparently is reachable. --ANK
2171 	 */
2172 	dst_confirm(&rt->dst);
2173 
2174 	neigh = __neigh_lookup(&nd_tbl, &msg->target, skb->dev, 1);
2175 	if (!neigh)
2176 		return;
2177 
2178 	/*
2179 	 *	We have finally decided to accept it.
2180 	 */
2181 
2182 	neigh_update(neigh, lladdr, NUD_STALE,
2183 		     NEIGH_UPDATE_F_WEAK_OVERRIDE|
2184 		     NEIGH_UPDATE_F_OVERRIDE|
2185 		     (on_link ? 0 : (NEIGH_UPDATE_F_OVERRIDE_ISROUTER|
2186 				     NEIGH_UPDATE_F_ISROUTER))
2187 		     );
2188 
2189 	nrt = ip6_rt_cache_alloc(rt, &msg->dest, NULL);
2190 	if (!nrt)
2191 		goto out;
2192 
2193 	nrt->rt6i_flags = RTF_GATEWAY|RTF_UP|RTF_DYNAMIC|RTF_CACHE;
2194 	if (on_link)
2195 		nrt->rt6i_flags &= ~RTF_GATEWAY;
2196 
2197 	nrt->rt6i_gateway = *(struct in6_addr *)neigh->primary_key;
2198 
2199 	if (ip6_ins_rt(nrt))
2200 		goto out;
2201 
2202 	netevent.old = &rt->dst;
2203 	netevent.new = &nrt->dst;
2204 	netevent.daddr = &msg->dest;
2205 	netevent.neigh = neigh;
2206 	call_netevent_notifiers(NETEVENT_REDIRECT, &netevent);
2207 
2208 	if (rt->rt6i_flags & RTF_CACHE) {
2209 		rt = (struct rt6_info *) dst_clone(&rt->dst);
2210 		ip6_del_rt(rt);
2211 	}
2212 
2213 out:
2214 	neigh_release(neigh);
2215 }
2216 
2217 /*
2218  *	Misc support functions
2219  */
2220 
2221 static void rt6_set_from(struct rt6_info *rt, struct rt6_info *from)
2222 {
2223 	BUG_ON(from->dst.from);
2224 
2225 	rt->rt6i_flags &= ~RTF_EXPIRES;
2226 	dst_hold(&from->dst);
2227 	rt->dst.from = &from->dst;
2228 	dst_init_metrics(&rt->dst, dst_metrics_ptr(&from->dst), true);
2229 }
2230 
2231 static void ip6_rt_copy_init(struct rt6_info *rt, struct rt6_info *ort)
2232 {
2233 	rt->dst.input = ort->dst.input;
2234 	rt->dst.output = ort->dst.output;
2235 	rt->rt6i_dst = ort->rt6i_dst;
2236 	rt->dst.error = ort->dst.error;
2237 	rt->rt6i_idev = ort->rt6i_idev;
2238 	if (rt->rt6i_idev)
2239 		in6_dev_hold(rt->rt6i_idev);
2240 	rt->dst.lastuse = jiffies;
2241 	rt->rt6i_gateway = ort->rt6i_gateway;
2242 	rt->rt6i_flags = ort->rt6i_flags;
2243 	rt6_set_from(rt, ort);
2244 	rt->rt6i_metric = ort->rt6i_metric;
2245 #ifdef CONFIG_IPV6_SUBTREES
2246 	rt->rt6i_src = ort->rt6i_src;
2247 #endif
2248 	rt->rt6i_prefsrc = ort->rt6i_prefsrc;
2249 	rt->rt6i_table = ort->rt6i_table;
2250 	rt->dst.lwtstate = lwtstate_get(ort->dst.lwtstate);
2251 }
2252 
2253 #ifdef CONFIG_IPV6_ROUTE_INFO
2254 static struct rt6_info *rt6_get_route_info(struct net *net,
2255 					   const struct in6_addr *prefix, int prefixlen,
2256 					   const struct in6_addr *gwaddr, int ifindex)
2257 {
2258 	struct fib6_node *fn;
2259 	struct rt6_info *rt = NULL;
2260 	struct fib6_table *table;
2261 
2262 	table = fib6_get_table(net, RT6_TABLE_INFO);
2263 	if (!table)
2264 		return NULL;
2265 
2266 	read_lock_bh(&table->tb6_lock);
2267 	fn = fib6_locate(&table->tb6_root, prefix, prefixlen, NULL, 0);
2268 	if (!fn)
2269 		goto out;
2270 
2271 	for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
2272 		if (rt->dst.dev->ifindex != ifindex)
2273 			continue;
2274 		if ((rt->rt6i_flags & (RTF_ROUTEINFO|RTF_GATEWAY)) != (RTF_ROUTEINFO|RTF_GATEWAY))
2275 			continue;
2276 		if (!ipv6_addr_equal(&rt->rt6i_gateway, gwaddr))
2277 			continue;
2278 		dst_hold(&rt->dst);
2279 		break;
2280 	}
2281 out:
2282 	read_unlock_bh(&table->tb6_lock);
2283 	return rt;
2284 }
2285 
2286 static struct rt6_info *rt6_add_route_info(struct net *net,
2287 					   const struct in6_addr *prefix, int prefixlen,
2288 					   const struct in6_addr *gwaddr, int ifindex,
2289 					   unsigned int pref)
2290 {
2291 	struct fib6_config cfg = {
2292 		.fc_table	= RT6_TABLE_INFO,
2293 		.fc_metric	= IP6_RT_PRIO_USER,
2294 		.fc_ifindex	= ifindex,
2295 		.fc_dst_len	= prefixlen,
2296 		.fc_flags	= RTF_GATEWAY | RTF_ADDRCONF | RTF_ROUTEINFO |
2297 				  RTF_UP | RTF_PREF(pref),
2298 		.fc_nlinfo.portid = 0,
2299 		.fc_nlinfo.nlh = NULL,
2300 		.fc_nlinfo.nl_net = net,
2301 	};
2302 
2303 	cfg.fc_dst = *prefix;
2304 	cfg.fc_gateway = *gwaddr;
2305 
2306 	/* We should treat it as a default route if prefix length is 0. */
2307 	if (!prefixlen)
2308 		cfg.fc_flags |= RTF_DEFAULT;
2309 
2310 	ip6_route_add(&cfg);
2311 
2312 	return rt6_get_route_info(net, prefix, prefixlen, gwaddr, ifindex);
2313 }
2314 #endif
2315 
2316 struct rt6_info *rt6_get_dflt_router(const struct in6_addr *addr, struct net_device *dev)
2317 {
2318 	struct rt6_info *rt;
2319 	struct fib6_table *table;
2320 
2321 	table = fib6_get_table(dev_net(dev), RT6_TABLE_DFLT);
2322 	if (!table)
2323 		return NULL;
2324 
2325 	read_lock_bh(&table->tb6_lock);
2326 	for (rt = table->tb6_root.leaf; rt; rt = rt->dst.rt6_next) {
2327 		if (dev == rt->dst.dev &&
2328 		    ((rt->rt6i_flags & (RTF_ADDRCONF | RTF_DEFAULT)) == (RTF_ADDRCONF | RTF_DEFAULT)) &&
2329 		    ipv6_addr_equal(&rt->rt6i_gateway, addr))
2330 			break;
2331 	}
2332 	if (rt)
2333 		dst_hold(&rt->dst);
2334 	read_unlock_bh(&table->tb6_lock);
2335 	return rt;
2336 }
2337 
2338 struct rt6_info *rt6_add_dflt_router(const struct in6_addr *gwaddr,
2339 				     struct net_device *dev,
2340 				     unsigned int pref)
2341 {
2342 	struct fib6_config cfg = {
2343 		.fc_table	= RT6_TABLE_DFLT,
2344 		.fc_metric	= IP6_RT_PRIO_USER,
2345 		.fc_ifindex	= dev->ifindex,
2346 		.fc_flags	= RTF_GATEWAY | RTF_ADDRCONF | RTF_DEFAULT |
2347 				  RTF_UP | RTF_EXPIRES | RTF_PREF(pref),
2348 		.fc_nlinfo.portid = 0,
2349 		.fc_nlinfo.nlh = NULL,
2350 		.fc_nlinfo.nl_net = dev_net(dev),
2351 	};
2352 
2353 	cfg.fc_gateway = *gwaddr;
2354 
2355 	ip6_route_add(&cfg);
2356 
2357 	return rt6_get_dflt_router(gwaddr, dev);
2358 }
2359 
2360 void rt6_purge_dflt_routers(struct net *net)
2361 {
2362 	struct rt6_info *rt;
2363 	struct fib6_table *table;
2364 
2365 	/* NOTE: Keep consistent with rt6_get_dflt_router */
2366 	table = fib6_get_table(net, RT6_TABLE_DFLT);
2367 	if (!table)
2368 		return;
2369 
2370 restart:
2371 	read_lock_bh(&table->tb6_lock);
2372 	for (rt = table->tb6_root.leaf; rt; rt = rt->dst.rt6_next) {
2373 		if (rt->rt6i_flags & (RTF_DEFAULT | RTF_ADDRCONF) &&
2374 		    (!rt->rt6i_idev || rt->rt6i_idev->cnf.accept_ra != 2)) {
2375 			dst_hold(&rt->dst);
2376 			read_unlock_bh(&table->tb6_lock);
2377 			ip6_del_rt(rt);
2378 			goto restart;
2379 		}
2380 	}
2381 	read_unlock_bh(&table->tb6_lock);
2382 }
2383 
2384 static void rtmsg_to_fib6_config(struct net *net,
2385 				 struct in6_rtmsg *rtmsg,
2386 				 struct fib6_config *cfg)
2387 {
2388 	memset(cfg, 0, sizeof(*cfg));
2389 
2390 	cfg->fc_table = RT6_TABLE_MAIN;
2391 	cfg->fc_ifindex = rtmsg->rtmsg_ifindex;
2392 	cfg->fc_metric = rtmsg->rtmsg_metric;
2393 	cfg->fc_expires = rtmsg->rtmsg_info;
2394 	cfg->fc_dst_len = rtmsg->rtmsg_dst_len;
2395 	cfg->fc_src_len = rtmsg->rtmsg_src_len;
2396 	cfg->fc_flags = rtmsg->rtmsg_flags;
2397 
2398 	cfg->fc_nlinfo.nl_net = net;
2399 
2400 	cfg->fc_dst = rtmsg->rtmsg_dst;
2401 	cfg->fc_src = rtmsg->rtmsg_src;
2402 	cfg->fc_gateway = rtmsg->rtmsg_gateway;
2403 }
2404 
2405 int ipv6_route_ioctl(struct net *net, unsigned int cmd, void __user *arg)
2406 {
2407 	struct fib6_config cfg;
2408 	struct in6_rtmsg rtmsg;
2409 	int err;
2410 
2411 	switch (cmd) {
2412 	case SIOCADDRT:		/* Add a route */
2413 	case SIOCDELRT:		/* Delete a route */
2414 		if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
2415 			return -EPERM;
2416 		err = copy_from_user(&rtmsg, arg,
2417 				     sizeof(struct in6_rtmsg));
2418 		if (err)
2419 			return -EFAULT;
2420 
2421 		rtmsg_to_fib6_config(net, &rtmsg, &cfg);
2422 
2423 		rtnl_lock();
2424 		switch (cmd) {
2425 		case SIOCADDRT:
2426 			err = ip6_route_add(&cfg);
2427 			break;
2428 		case SIOCDELRT:
2429 			err = ip6_route_del(&cfg);
2430 			break;
2431 		default:
2432 			err = -EINVAL;
2433 		}
2434 		rtnl_unlock();
2435 
2436 		return err;
2437 	}
2438 
2439 	return -EINVAL;
2440 }
2441 
2442 /*
2443  *	Drop the packet on the floor
2444  */
2445 
2446 static int ip6_pkt_drop(struct sk_buff *skb, u8 code, int ipstats_mib_noroutes)
2447 {
2448 	int type;
2449 	struct dst_entry *dst = skb_dst(skb);
2450 	switch (ipstats_mib_noroutes) {
2451 	case IPSTATS_MIB_INNOROUTES:
2452 		type = ipv6_addr_type(&ipv6_hdr(skb)->daddr);
2453 		if (type == IPV6_ADDR_ANY) {
2454 			IP6_INC_STATS(dev_net(dst->dev), ip6_dst_idev(dst),
2455 				      IPSTATS_MIB_INADDRERRORS);
2456 			break;
2457 		}
2458 		/* FALLTHROUGH */
2459 	case IPSTATS_MIB_OUTNOROUTES:
2460 		IP6_INC_STATS(dev_net(dst->dev), ip6_dst_idev(dst),
2461 			      ipstats_mib_noroutes);
2462 		break;
2463 	}
2464 	icmpv6_send(skb, ICMPV6_DEST_UNREACH, code, 0);
2465 	kfree_skb(skb);
2466 	return 0;
2467 }
2468 
2469 static int ip6_pkt_discard(struct sk_buff *skb)
2470 {
2471 	return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_INNOROUTES);
2472 }
2473 
2474 static int ip6_pkt_discard_out(struct sock *sk, struct sk_buff *skb)
2475 {
2476 	skb->dev = skb_dst(skb)->dev;
2477 	return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_OUTNOROUTES);
2478 }
2479 
2480 static int ip6_pkt_prohibit(struct sk_buff *skb)
2481 {
2482 	return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_INNOROUTES);
2483 }
2484 
2485 static int ip6_pkt_prohibit_out(struct sock *sk, struct sk_buff *skb)
2486 {
2487 	skb->dev = skb_dst(skb)->dev;
2488 	return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_OUTNOROUTES);
2489 }
2490 
2491 /*
2492  *	Allocate a dst for local (unicast / anycast) address.
2493  */
2494 
2495 struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev,
2496 				    const struct in6_addr *addr,
2497 				    bool anycast)
2498 {
2499 	struct net *net = dev_net(idev->dev);
2500 	struct rt6_info *rt = ip6_dst_alloc(net, net->loopback_dev,
2501 					    DST_NOCOUNT);
2502 	if (!rt)
2503 		return ERR_PTR(-ENOMEM);
2504 
2505 	in6_dev_hold(idev);
2506 
2507 	rt->dst.flags |= DST_HOST;
2508 	rt->dst.input = ip6_input;
2509 	rt->dst.output = ip6_output;
2510 	rt->rt6i_idev = idev;
2511 
2512 	rt->rt6i_flags = RTF_UP | RTF_NONEXTHOP;
2513 	if (anycast)
2514 		rt->rt6i_flags |= RTF_ANYCAST;
2515 	else
2516 		rt->rt6i_flags |= RTF_LOCAL;
2517 
2518 	rt->rt6i_gateway  = *addr;
2519 	rt->rt6i_dst.addr = *addr;
2520 	rt->rt6i_dst.plen = 128;
2521 	rt->rt6i_table = fib6_get_table(net, RT6_TABLE_LOCAL);
2522 	rt->dst.flags |= DST_NOCACHE;
2523 
2524 	atomic_set(&rt->dst.__refcnt, 1);
2525 
2526 	return rt;
2527 }
2528 
2529 int ip6_route_get_saddr(struct net *net,
2530 			struct rt6_info *rt,
2531 			const struct in6_addr *daddr,
2532 			unsigned int prefs,
2533 			struct in6_addr *saddr)
2534 {
2535 	struct inet6_dev *idev =
2536 		rt ? ip6_dst_idev((struct dst_entry *)rt) : NULL;
2537 	int err = 0;
2538 	if (rt && rt->rt6i_prefsrc.plen)
2539 		*saddr = rt->rt6i_prefsrc.addr;
2540 	else
2541 		err = ipv6_dev_get_saddr(net, idev ? idev->dev : NULL,
2542 					 daddr, prefs, saddr);
2543 	return err;
2544 }
2545 
2546 /* remove deleted ip from prefsrc entries */
2547 struct arg_dev_net_ip {
2548 	struct net_device *dev;
2549 	struct net *net;
2550 	struct in6_addr *addr;
2551 };
2552 
2553 static int fib6_remove_prefsrc(struct rt6_info *rt, void *arg)
2554 {
2555 	struct net_device *dev = ((struct arg_dev_net_ip *)arg)->dev;
2556 	struct net *net = ((struct arg_dev_net_ip *)arg)->net;
2557 	struct in6_addr *addr = ((struct arg_dev_net_ip *)arg)->addr;
2558 
2559 	if (((void *)rt->dst.dev == dev || !dev) &&
2560 	    rt != net->ipv6.ip6_null_entry &&
2561 	    ipv6_addr_equal(addr, &rt->rt6i_prefsrc.addr)) {
2562 		/* remove prefsrc entry */
2563 		rt->rt6i_prefsrc.plen = 0;
2564 	}
2565 	return 0;
2566 }
2567 
2568 void rt6_remove_prefsrc(struct inet6_ifaddr *ifp)
2569 {
2570 	struct net *net = dev_net(ifp->idev->dev);
2571 	struct arg_dev_net_ip adni = {
2572 		.dev = ifp->idev->dev,
2573 		.net = net,
2574 		.addr = &ifp->addr,
2575 	};
2576 	fib6_clean_all(net, fib6_remove_prefsrc, &adni);
2577 }
2578 
2579 #define RTF_RA_ROUTER		(RTF_ADDRCONF | RTF_DEFAULT | RTF_GATEWAY)
2580 #define RTF_CACHE_GATEWAY	(RTF_GATEWAY | RTF_CACHE)
2581 
2582 /* Remove routers and update dst entries when gateway turn into host. */
2583 static int fib6_clean_tohost(struct rt6_info *rt, void *arg)
2584 {
2585 	struct in6_addr *gateway = (struct in6_addr *)arg;
2586 
2587 	if ((((rt->rt6i_flags & RTF_RA_ROUTER) == RTF_RA_ROUTER) ||
2588 	     ((rt->rt6i_flags & RTF_CACHE_GATEWAY) == RTF_CACHE_GATEWAY)) &&
2589 	     ipv6_addr_equal(gateway, &rt->rt6i_gateway)) {
2590 		return -1;
2591 	}
2592 	return 0;
2593 }
2594 
2595 void rt6_clean_tohost(struct net *net, struct in6_addr *gateway)
2596 {
2597 	fib6_clean_all(net, fib6_clean_tohost, gateway);
2598 }
2599 
2600 struct arg_dev_net {
2601 	struct net_device *dev;
2602 	struct net *net;
2603 };
2604 
2605 static int fib6_ifdown(struct rt6_info *rt, void *arg)
2606 {
2607 	const struct arg_dev_net *adn = arg;
2608 	const struct net_device *dev = adn->dev;
2609 
2610 	if ((rt->dst.dev == dev || !dev) &&
2611 	    rt != adn->net->ipv6.ip6_null_entry)
2612 		return -1;
2613 
2614 	return 0;
2615 }
2616 
2617 void rt6_ifdown(struct net *net, struct net_device *dev)
2618 {
2619 	struct arg_dev_net adn = {
2620 		.dev = dev,
2621 		.net = net,
2622 	};
2623 
2624 	fib6_clean_all(net, fib6_ifdown, &adn);
2625 	icmp6_clean_all(fib6_ifdown, &adn);
2626 	if (dev)
2627 		rt6_uncached_list_flush_dev(net, dev);
2628 }
2629 
2630 struct rt6_mtu_change_arg {
2631 	struct net_device *dev;
2632 	unsigned int mtu;
2633 };
2634 
2635 static int rt6_mtu_change_route(struct rt6_info *rt, void *p_arg)
2636 {
2637 	struct rt6_mtu_change_arg *arg = (struct rt6_mtu_change_arg *) p_arg;
2638 	struct inet6_dev *idev;
2639 
2640 	/* In IPv6 pmtu discovery is not optional,
2641 	   so that RTAX_MTU lock cannot disable it.
2642 	   We still use this lock to block changes
2643 	   caused by addrconf/ndisc.
2644 	*/
2645 
2646 	idev = __in6_dev_get(arg->dev);
2647 	if (!idev)
2648 		return 0;
2649 
2650 	/* For administrative MTU increase, there is no way to discover
2651 	   IPv6 PMTU increase, so PMTU increase should be updated here.
2652 	   Since RFC 1981 doesn't include administrative MTU increase
2653 	   update PMTU increase is a MUST. (i.e. jumbo frame)
2654 	 */
2655 	/*
2656 	   If new MTU is less than route PMTU, this new MTU will be the
2657 	   lowest MTU in the path, update the route PMTU to reflect PMTU
2658 	   decreases; if new MTU is greater than route PMTU, and the
2659 	   old MTU is the lowest MTU in the path, update the route PMTU
2660 	   to reflect the increase. In this case if the other nodes' MTU
2661 	   also have the lowest MTU, TOO BIG MESSAGE will be lead to
2662 	   PMTU discouvery.
2663 	 */
2664 	if (rt->dst.dev == arg->dev &&
2665 	    !dst_metric_locked(&rt->dst, RTAX_MTU)) {
2666 		if (rt->rt6i_flags & RTF_CACHE) {
2667 			/* For RTF_CACHE with rt6i_pmtu == 0
2668 			 * (i.e. a redirected route),
2669 			 * the metrics of its rt->dst.from has already
2670 			 * been updated.
2671 			 */
2672 			if (rt->rt6i_pmtu && rt->rt6i_pmtu > arg->mtu)
2673 				rt->rt6i_pmtu = arg->mtu;
2674 		} else if (dst_mtu(&rt->dst) >= arg->mtu ||
2675 			   (dst_mtu(&rt->dst) < arg->mtu &&
2676 			    dst_mtu(&rt->dst) == idev->cnf.mtu6)) {
2677 			dst_metric_set(&rt->dst, RTAX_MTU, arg->mtu);
2678 		}
2679 	}
2680 	return 0;
2681 }
2682 
2683 void rt6_mtu_change(struct net_device *dev, unsigned int mtu)
2684 {
2685 	struct rt6_mtu_change_arg arg = {
2686 		.dev = dev,
2687 		.mtu = mtu,
2688 	};
2689 
2690 	fib6_clean_all(dev_net(dev), rt6_mtu_change_route, &arg);
2691 }
2692 
2693 static const struct nla_policy rtm_ipv6_policy[RTA_MAX+1] = {
2694 	[RTA_GATEWAY]           = { .len = sizeof(struct in6_addr) },
2695 	[RTA_OIF]               = { .type = NLA_U32 },
2696 	[RTA_IIF]		= { .type = NLA_U32 },
2697 	[RTA_PRIORITY]          = { .type = NLA_U32 },
2698 	[RTA_METRICS]           = { .type = NLA_NESTED },
2699 	[RTA_MULTIPATH]		= { .len = sizeof(struct rtnexthop) },
2700 	[RTA_PREF]              = { .type = NLA_U8 },
2701 	[RTA_ENCAP_TYPE]	= { .type = NLA_U16 },
2702 	[RTA_ENCAP]		= { .type = NLA_NESTED },
2703 };
2704 
2705 static int rtm_to_fib6_config(struct sk_buff *skb, struct nlmsghdr *nlh,
2706 			      struct fib6_config *cfg)
2707 {
2708 	struct rtmsg *rtm;
2709 	struct nlattr *tb[RTA_MAX+1];
2710 	unsigned int pref;
2711 	int err;
2712 
2713 	err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy);
2714 	if (err < 0)
2715 		goto errout;
2716 
2717 	err = -EINVAL;
2718 	rtm = nlmsg_data(nlh);
2719 	memset(cfg, 0, sizeof(*cfg));
2720 
2721 	cfg->fc_table = rtm->rtm_table;
2722 	cfg->fc_dst_len = rtm->rtm_dst_len;
2723 	cfg->fc_src_len = rtm->rtm_src_len;
2724 	cfg->fc_flags = RTF_UP;
2725 	cfg->fc_protocol = rtm->rtm_protocol;
2726 	cfg->fc_type = rtm->rtm_type;
2727 
2728 	if (rtm->rtm_type == RTN_UNREACHABLE ||
2729 	    rtm->rtm_type == RTN_BLACKHOLE ||
2730 	    rtm->rtm_type == RTN_PROHIBIT ||
2731 	    rtm->rtm_type == RTN_THROW)
2732 		cfg->fc_flags |= RTF_REJECT;
2733 
2734 	if (rtm->rtm_type == RTN_LOCAL)
2735 		cfg->fc_flags |= RTF_LOCAL;
2736 
2737 	if (rtm->rtm_flags & RTM_F_CLONED)
2738 		cfg->fc_flags |= RTF_CACHE;
2739 
2740 	cfg->fc_nlinfo.portid = NETLINK_CB(skb).portid;
2741 	cfg->fc_nlinfo.nlh = nlh;
2742 	cfg->fc_nlinfo.nl_net = sock_net(skb->sk);
2743 
2744 	if (tb[RTA_GATEWAY]) {
2745 		cfg->fc_gateway = nla_get_in6_addr(tb[RTA_GATEWAY]);
2746 		cfg->fc_flags |= RTF_GATEWAY;
2747 	}
2748 
2749 	if (tb[RTA_DST]) {
2750 		int plen = (rtm->rtm_dst_len + 7) >> 3;
2751 
2752 		if (nla_len(tb[RTA_DST]) < plen)
2753 			goto errout;
2754 
2755 		nla_memcpy(&cfg->fc_dst, tb[RTA_DST], plen);
2756 	}
2757 
2758 	if (tb[RTA_SRC]) {
2759 		int plen = (rtm->rtm_src_len + 7) >> 3;
2760 
2761 		if (nla_len(tb[RTA_SRC]) < plen)
2762 			goto errout;
2763 
2764 		nla_memcpy(&cfg->fc_src, tb[RTA_SRC], plen);
2765 	}
2766 
2767 	if (tb[RTA_PREFSRC])
2768 		cfg->fc_prefsrc = nla_get_in6_addr(tb[RTA_PREFSRC]);
2769 
2770 	if (tb[RTA_OIF])
2771 		cfg->fc_ifindex = nla_get_u32(tb[RTA_OIF]);
2772 
2773 	if (tb[RTA_PRIORITY])
2774 		cfg->fc_metric = nla_get_u32(tb[RTA_PRIORITY]);
2775 
2776 	if (tb[RTA_METRICS]) {
2777 		cfg->fc_mx = nla_data(tb[RTA_METRICS]);
2778 		cfg->fc_mx_len = nla_len(tb[RTA_METRICS]);
2779 	}
2780 
2781 	if (tb[RTA_TABLE])
2782 		cfg->fc_table = nla_get_u32(tb[RTA_TABLE]);
2783 
2784 	if (tb[RTA_MULTIPATH]) {
2785 		cfg->fc_mp = nla_data(tb[RTA_MULTIPATH]);
2786 		cfg->fc_mp_len = nla_len(tb[RTA_MULTIPATH]);
2787 	}
2788 
2789 	if (tb[RTA_PREF]) {
2790 		pref = nla_get_u8(tb[RTA_PREF]);
2791 		if (pref != ICMPV6_ROUTER_PREF_LOW &&
2792 		    pref != ICMPV6_ROUTER_PREF_HIGH)
2793 			pref = ICMPV6_ROUTER_PREF_MEDIUM;
2794 		cfg->fc_flags |= RTF_PREF(pref);
2795 	}
2796 
2797 	if (tb[RTA_ENCAP])
2798 		cfg->fc_encap = tb[RTA_ENCAP];
2799 
2800 	if (tb[RTA_ENCAP_TYPE])
2801 		cfg->fc_encap_type = nla_get_u16(tb[RTA_ENCAP_TYPE]);
2802 
2803 	err = 0;
2804 errout:
2805 	return err;
2806 }
2807 
2808 struct rt6_nh {
2809 	struct rt6_info *rt6_info;
2810 	struct fib6_config r_cfg;
2811 	struct mx6_config mxc;
2812 	struct list_head next;
2813 };
2814 
2815 static void ip6_print_replace_route_err(struct list_head *rt6_nh_list)
2816 {
2817 	struct rt6_nh *nh;
2818 
2819 	list_for_each_entry(nh, rt6_nh_list, next) {
2820 		pr_warn("IPV6: multipath route replace failed (check consistency of installed routes): %pI6 nexthop %pI6 ifi %d\n",
2821 		        &nh->r_cfg.fc_dst, &nh->r_cfg.fc_gateway,
2822 		        nh->r_cfg.fc_ifindex);
2823 	}
2824 }
2825 
2826 static int ip6_route_info_append(struct list_head *rt6_nh_list,
2827 				 struct rt6_info *rt, struct fib6_config *r_cfg)
2828 {
2829 	struct rt6_nh *nh;
2830 	struct rt6_info *rtnh;
2831 	int err = -EEXIST;
2832 
2833 	list_for_each_entry(nh, rt6_nh_list, next) {
2834 		/* check if rt6_info already exists */
2835 		rtnh = nh->rt6_info;
2836 
2837 		if (rtnh->dst.dev == rt->dst.dev &&
2838 		    rtnh->rt6i_idev == rt->rt6i_idev &&
2839 		    ipv6_addr_equal(&rtnh->rt6i_gateway,
2840 				    &rt->rt6i_gateway))
2841 			return err;
2842 	}
2843 
2844 	nh = kzalloc(sizeof(*nh), GFP_KERNEL);
2845 	if (!nh)
2846 		return -ENOMEM;
2847 	nh->rt6_info = rt;
2848 	err = ip6_convert_metrics(&nh->mxc, r_cfg);
2849 	if (err) {
2850 		kfree(nh);
2851 		return err;
2852 	}
2853 	memcpy(&nh->r_cfg, r_cfg, sizeof(*r_cfg));
2854 	list_add_tail(&nh->next, rt6_nh_list);
2855 
2856 	return 0;
2857 }
2858 
2859 static int ip6_route_multipath_add(struct fib6_config *cfg)
2860 {
2861 	struct fib6_config r_cfg;
2862 	struct rtnexthop *rtnh;
2863 	struct rt6_info *rt;
2864 	struct rt6_nh *err_nh;
2865 	struct rt6_nh *nh, *nh_safe;
2866 	int remaining;
2867 	int attrlen;
2868 	int err = 1;
2869 	int nhn = 0;
2870 	int replace = (cfg->fc_nlinfo.nlh &&
2871 		       (cfg->fc_nlinfo.nlh->nlmsg_flags & NLM_F_REPLACE));
2872 	LIST_HEAD(rt6_nh_list);
2873 
2874 	remaining = cfg->fc_mp_len;
2875 	rtnh = (struct rtnexthop *)cfg->fc_mp;
2876 
2877 	/* Parse a Multipath Entry and build a list (rt6_nh_list) of
2878 	 * rt6_info structs per nexthop
2879 	 */
2880 	while (rtnh_ok(rtnh, remaining)) {
2881 		memcpy(&r_cfg, cfg, sizeof(*cfg));
2882 		if (rtnh->rtnh_ifindex)
2883 			r_cfg.fc_ifindex = rtnh->rtnh_ifindex;
2884 
2885 		attrlen = rtnh_attrlen(rtnh);
2886 		if (attrlen > 0) {
2887 			struct nlattr *nla, *attrs = rtnh_attrs(rtnh);
2888 
2889 			nla = nla_find(attrs, attrlen, RTA_GATEWAY);
2890 			if (nla) {
2891 				r_cfg.fc_gateway = nla_get_in6_addr(nla);
2892 				r_cfg.fc_flags |= RTF_GATEWAY;
2893 			}
2894 			r_cfg.fc_encap = nla_find(attrs, attrlen, RTA_ENCAP);
2895 			nla = nla_find(attrs, attrlen, RTA_ENCAP_TYPE);
2896 			if (nla)
2897 				r_cfg.fc_encap_type = nla_get_u16(nla);
2898 		}
2899 
2900 		err = ip6_route_info_create(&r_cfg, &rt);
2901 		if (err)
2902 			goto cleanup;
2903 
2904 		err = ip6_route_info_append(&rt6_nh_list, rt, &r_cfg);
2905 		if (err) {
2906 			dst_free(&rt->dst);
2907 			goto cleanup;
2908 		}
2909 
2910 		rtnh = rtnh_next(rtnh, &remaining);
2911 	}
2912 
2913 	err_nh = NULL;
2914 	list_for_each_entry(nh, &rt6_nh_list, next) {
2915 		err = __ip6_ins_rt(nh->rt6_info, &cfg->fc_nlinfo, &nh->mxc);
2916 		/* nh->rt6_info is used or freed at this point, reset to NULL*/
2917 		nh->rt6_info = NULL;
2918 		if (err) {
2919 			if (replace && nhn)
2920 				ip6_print_replace_route_err(&rt6_nh_list);
2921 			err_nh = nh;
2922 			goto add_errout;
2923 		}
2924 
2925 		/* Because each route is added like a single route we remove
2926 		 * these flags after the first nexthop: if there is a collision,
2927 		 * we have already failed to add the first nexthop:
2928 		 * fib6_add_rt2node() has rejected it; when replacing, old
2929 		 * nexthops have been replaced by first new, the rest should
2930 		 * be added to it.
2931 		 */
2932 		cfg->fc_nlinfo.nlh->nlmsg_flags &= ~(NLM_F_EXCL |
2933 						     NLM_F_REPLACE);
2934 		nhn++;
2935 	}
2936 
2937 	goto cleanup;
2938 
2939 add_errout:
2940 	/* Delete routes that were already added */
2941 	list_for_each_entry(nh, &rt6_nh_list, next) {
2942 		if (err_nh == nh)
2943 			break;
2944 		ip6_route_del(&nh->r_cfg);
2945 	}
2946 
2947 cleanup:
2948 	list_for_each_entry_safe(nh, nh_safe, &rt6_nh_list, next) {
2949 		if (nh->rt6_info)
2950 			dst_free(&nh->rt6_info->dst);
2951 		kfree(nh->mxc.mx);
2952 		list_del(&nh->next);
2953 		kfree(nh);
2954 	}
2955 
2956 	return err;
2957 }
2958 
2959 static int ip6_route_multipath_del(struct fib6_config *cfg)
2960 {
2961 	struct fib6_config r_cfg;
2962 	struct rtnexthop *rtnh;
2963 	int remaining;
2964 	int attrlen;
2965 	int err = 1, last_err = 0;
2966 
2967 	remaining = cfg->fc_mp_len;
2968 	rtnh = (struct rtnexthop *)cfg->fc_mp;
2969 
2970 	/* Parse a Multipath Entry */
2971 	while (rtnh_ok(rtnh, remaining)) {
2972 		memcpy(&r_cfg, cfg, sizeof(*cfg));
2973 		if (rtnh->rtnh_ifindex)
2974 			r_cfg.fc_ifindex = rtnh->rtnh_ifindex;
2975 
2976 		attrlen = rtnh_attrlen(rtnh);
2977 		if (attrlen > 0) {
2978 			struct nlattr *nla, *attrs = rtnh_attrs(rtnh);
2979 
2980 			nla = nla_find(attrs, attrlen, RTA_GATEWAY);
2981 			if (nla) {
2982 				nla_memcpy(&r_cfg.fc_gateway, nla, 16);
2983 				r_cfg.fc_flags |= RTF_GATEWAY;
2984 			}
2985 		}
2986 		err = ip6_route_del(&r_cfg);
2987 		if (err)
2988 			last_err = err;
2989 
2990 		rtnh = rtnh_next(rtnh, &remaining);
2991 	}
2992 
2993 	return last_err;
2994 }
2995 
2996 static int inet6_rtm_delroute(struct sk_buff *skb, struct nlmsghdr *nlh)
2997 {
2998 	struct fib6_config cfg;
2999 	int err;
3000 
3001 	err = rtm_to_fib6_config(skb, nlh, &cfg);
3002 	if (err < 0)
3003 		return err;
3004 
3005 	if (cfg.fc_mp)
3006 		return ip6_route_multipath_del(&cfg);
3007 	else
3008 		return ip6_route_del(&cfg);
3009 }
3010 
3011 static int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr *nlh)
3012 {
3013 	struct fib6_config cfg;
3014 	int err;
3015 
3016 	err = rtm_to_fib6_config(skb, nlh, &cfg);
3017 	if (err < 0)
3018 		return err;
3019 
3020 	if (cfg.fc_mp)
3021 		return ip6_route_multipath_add(&cfg);
3022 	else
3023 		return ip6_route_add(&cfg);
3024 }
3025 
3026 static inline size_t rt6_nlmsg_size(struct rt6_info *rt)
3027 {
3028 	return NLMSG_ALIGN(sizeof(struct rtmsg))
3029 	       + nla_total_size(16) /* RTA_SRC */
3030 	       + nla_total_size(16) /* RTA_DST */
3031 	       + nla_total_size(16) /* RTA_GATEWAY */
3032 	       + nla_total_size(16) /* RTA_PREFSRC */
3033 	       + nla_total_size(4) /* RTA_TABLE */
3034 	       + nla_total_size(4) /* RTA_IIF */
3035 	       + nla_total_size(4) /* RTA_OIF */
3036 	       + nla_total_size(4) /* RTA_PRIORITY */
3037 	       + RTAX_MAX * nla_total_size(4) /* RTA_METRICS */
3038 	       + nla_total_size(sizeof(struct rta_cacheinfo))
3039 	       + nla_total_size(TCP_CA_NAME_MAX) /* RTAX_CC_ALGO */
3040 	       + nla_total_size(1) /* RTA_PREF */
3041 	       + lwtunnel_get_encap_size(rt->dst.lwtstate);
3042 }
3043 
3044 static int rt6_fill_node(struct net *net,
3045 			 struct sk_buff *skb, struct rt6_info *rt,
3046 			 struct in6_addr *dst, struct in6_addr *src,
3047 			 int iif, int type, u32 portid, u32 seq,
3048 			 int prefix, int nowait, unsigned int flags)
3049 {
3050 	u32 metrics[RTAX_MAX];
3051 	struct rtmsg *rtm;
3052 	struct nlmsghdr *nlh;
3053 	long expires;
3054 	u32 table;
3055 
3056 	if (prefix) {	/* user wants prefix routes only */
3057 		if (!(rt->rt6i_flags & RTF_PREFIX_RT)) {
3058 			/* success since this is not a prefix route */
3059 			return 1;
3060 		}
3061 	}
3062 
3063 	nlh = nlmsg_put(skb, portid, seq, type, sizeof(*rtm), flags);
3064 	if (!nlh)
3065 		return -EMSGSIZE;
3066 
3067 	rtm = nlmsg_data(nlh);
3068 	rtm->rtm_family = AF_INET6;
3069 	rtm->rtm_dst_len = rt->rt6i_dst.plen;
3070 	rtm->rtm_src_len = rt->rt6i_src.plen;
3071 	rtm->rtm_tos = 0;
3072 	if (rt->rt6i_table)
3073 		table = rt->rt6i_table->tb6_id;
3074 	else
3075 		table = RT6_TABLE_UNSPEC;
3076 	rtm->rtm_table = table;
3077 	if (nla_put_u32(skb, RTA_TABLE, table))
3078 		goto nla_put_failure;
3079 	if (rt->rt6i_flags & RTF_REJECT) {
3080 		switch (rt->dst.error) {
3081 		case -EINVAL:
3082 			rtm->rtm_type = RTN_BLACKHOLE;
3083 			break;
3084 		case -EACCES:
3085 			rtm->rtm_type = RTN_PROHIBIT;
3086 			break;
3087 		case -EAGAIN:
3088 			rtm->rtm_type = RTN_THROW;
3089 			break;
3090 		default:
3091 			rtm->rtm_type = RTN_UNREACHABLE;
3092 			break;
3093 		}
3094 	}
3095 	else if (rt->rt6i_flags & RTF_LOCAL)
3096 		rtm->rtm_type = RTN_LOCAL;
3097 	else if (rt->dst.dev && (rt->dst.dev->flags & IFF_LOOPBACK))
3098 		rtm->rtm_type = RTN_LOCAL;
3099 	else
3100 		rtm->rtm_type = RTN_UNICAST;
3101 	rtm->rtm_flags = 0;
3102 	if (!netif_carrier_ok(rt->dst.dev)) {
3103 		rtm->rtm_flags |= RTNH_F_LINKDOWN;
3104 		if (rt->rt6i_idev->cnf.ignore_routes_with_linkdown)
3105 			rtm->rtm_flags |= RTNH_F_DEAD;
3106 	}
3107 	rtm->rtm_scope = RT_SCOPE_UNIVERSE;
3108 	rtm->rtm_protocol = rt->rt6i_protocol;
3109 	if (rt->rt6i_flags & RTF_DYNAMIC)
3110 		rtm->rtm_protocol = RTPROT_REDIRECT;
3111 	else if (rt->rt6i_flags & RTF_ADDRCONF) {
3112 		if (rt->rt6i_flags & (RTF_DEFAULT | RTF_ROUTEINFO))
3113 			rtm->rtm_protocol = RTPROT_RA;
3114 		else
3115 			rtm->rtm_protocol = RTPROT_KERNEL;
3116 	}
3117 
3118 	if (rt->rt6i_flags & RTF_CACHE)
3119 		rtm->rtm_flags |= RTM_F_CLONED;
3120 
3121 	if (dst) {
3122 		if (nla_put_in6_addr(skb, RTA_DST, dst))
3123 			goto nla_put_failure;
3124 		rtm->rtm_dst_len = 128;
3125 	} else if (rtm->rtm_dst_len)
3126 		if (nla_put_in6_addr(skb, RTA_DST, &rt->rt6i_dst.addr))
3127 			goto nla_put_failure;
3128 #ifdef CONFIG_IPV6_SUBTREES
3129 	if (src) {
3130 		if (nla_put_in6_addr(skb, RTA_SRC, src))
3131 			goto nla_put_failure;
3132 		rtm->rtm_src_len = 128;
3133 	} else if (rtm->rtm_src_len &&
3134 		   nla_put_in6_addr(skb, RTA_SRC, &rt->rt6i_src.addr))
3135 		goto nla_put_failure;
3136 #endif
3137 	if (iif) {
3138 #ifdef CONFIG_IPV6_MROUTE
3139 		if (ipv6_addr_is_multicast(&rt->rt6i_dst.addr)) {
3140 			int err = ip6mr_get_route(net, skb, rtm, nowait);
3141 			if (err <= 0) {
3142 				if (!nowait) {
3143 					if (err == 0)
3144 						return 0;
3145 					goto nla_put_failure;
3146 				} else {
3147 					if (err == -EMSGSIZE)
3148 						goto nla_put_failure;
3149 				}
3150 			}
3151 		} else
3152 #endif
3153 			if (nla_put_u32(skb, RTA_IIF, iif))
3154 				goto nla_put_failure;
3155 	} else if (dst) {
3156 		struct in6_addr saddr_buf;
3157 		if (ip6_route_get_saddr(net, rt, dst, 0, &saddr_buf) == 0 &&
3158 		    nla_put_in6_addr(skb, RTA_PREFSRC, &saddr_buf))
3159 			goto nla_put_failure;
3160 	}
3161 
3162 	if (rt->rt6i_prefsrc.plen) {
3163 		struct in6_addr saddr_buf;
3164 		saddr_buf = rt->rt6i_prefsrc.addr;
3165 		if (nla_put_in6_addr(skb, RTA_PREFSRC, &saddr_buf))
3166 			goto nla_put_failure;
3167 	}
3168 
3169 	memcpy(metrics, dst_metrics_ptr(&rt->dst), sizeof(metrics));
3170 	if (rt->rt6i_pmtu)
3171 		metrics[RTAX_MTU - 1] = rt->rt6i_pmtu;
3172 	if (rtnetlink_put_metrics(skb, metrics) < 0)
3173 		goto nla_put_failure;
3174 
3175 	if (rt->rt6i_flags & RTF_GATEWAY) {
3176 		if (nla_put_in6_addr(skb, RTA_GATEWAY, &rt->rt6i_gateway) < 0)
3177 			goto nla_put_failure;
3178 	}
3179 
3180 	if (rt->dst.dev &&
3181 	    nla_put_u32(skb, RTA_OIF, rt->dst.dev->ifindex))
3182 		goto nla_put_failure;
3183 	if (nla_put_u32(skb, RTA_PRIORITY, rt->rt6i_metric))
3184 		goto nla_put_failure;
3185 
3186 	expires = (rt->rt6i_flags & RTF_EXPIRES) ? rt->dst.expires - jiffies : 0;
3187 
3188 	if (rtnl_put_cacheinfo(skb, &rt->dst, 0, expires, rt->dst.error) < 0)
3189 		goto nla_put_failure;
3190 
3191 	if (nla_put_u8(skb, RTA_PREF, IPV6_EXTRACT_PREF(rt->rt6i_flags)))
3192 		goto nla_put_failure;
3193 
3194 	lwtunnel_fill_encap(skb, rt->dst.lwtstate);
3195 
3196 	nlmsg_end(skb, nlh);
3197 	return 0;
3198 
3199 nla_put_failure:
3200 	nlmsg_cancel(skb, nlh);
3201 	return -EMSGSIZE;
3202 }
3203 
3204 int rt6_dump_route(struct rt6_info *rt, void *p_arg)
3205 {
3206 	struct rt6_rtnl_dump_arg *arg = (struct rt6_rtnl_dump_arg *) p_arg;
3207 	int prefix;
3208 
3209 	if (nlmsg_len(arg->cb->nlh) >= sizeof(struct rtmsg)) {
3210 		struct rtmsg *rtm = nlmsg_data(arg->cb->nlh);
3211 		prefix = (rtm->rtm_flags & RTM_F_PREFIX) != 0;
3212 	} else
3213 		prefix = 0;
3214 
3215 	return rt6_fill_node(arg->net,
3216 		     arg->skb, rt, NULL, NULL, 0, RTM_NEWROUTE,
3217 		     NETLINK_CB(arg->cb->skb).portid, arg->cb->nlh->nlmsg_seq,
3218 		     prefix, 0, NLM_F_MULTI);
3219 }
3220 
3221 static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh)
3222 {
3223 	struct net *net = sock_net(in_skb->sk);
3224 	struct nlattr *tb[RTA_MAX+1];
3225 	struct rt6_info *rt;
3226 	struct sk_buff *skb;
3227 	struct rtmsg *rtm;
3228 	struct flowi6 fl6;
3229 	int err, iif = 0, oif = 0;
3230 
3231 	err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy);
3232 	if (err < 0)
3233 		goto errout;
3234 
3235 	err = -EINVAL;
3236 	memset(&fl6, 0, sizeof(fl6));
3237 
3238 	if (tb[RTA_SRC]) {
3239 		if (nla_len(tb[RTA_SRC]) < sizeof(struct in6_addr))
3240 			goto errout;
3241 
3242 		fl6.saddr = *(struct in6_addr *)nla_data(tb[RTA_SRC]);
3243 	}
3244 
3245 	if (tb[RTA_DST]) {
3246 		if (nla_len(tb[RTA_DST]) < sizeof(struct in6_addr))
3247 			goto errout;
3248 
3249 		fl6.daddr = *(struct in6_addr *)nla_data(tb[RTA_DST]);
3250 	}
3251 
3252 	if (tb[RTA_IIF])
3253 		iif = nla_get_u32(tb[RTA_IIF]);
3254 
3255 	if (tb[RTA_OIF])
3256 		oif = nla_get_u32(tb[RTA_OIF]);
3257 
3258 	if (tb[RTA_MARK])
3259 		fl6.flowi6_mark = nla_get_u32(tb[RTA_MARK]);
3260 
3261 	if (iif) {
3262 		struct net_device *dev;
3263 		int flags = 0;
3264 
3265 		dev = __dev_get_by_index(net, iif);
3266 		if (!dev) {
3267 			err = -ENODEV;
3268 			goto errout;
3269 		}
3270 
3271 		fl6.flowi6_iif = iif;
3272 
3273 		if (!ipv6_addr_any(&fl6.saddr))
3274 			flags |= RT6_LOOKUP_F_HAS_SADDR;
3275 
3276 		rt = (struct rt6_info *)ip6_route_input_lookup(net, dev, &fl6,
3277 							       flags);
3278 	} else {
3279 		fl6.flowi6_oif = oif;
3280 
3281 		rt = (struct rt6_info *)ip6_route_output(net, NULL, &fl6);
3282 	}
3283 
3284 	skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
3285 	if (!skb) {
3286 		ip6_rt_put(rt);
3287 		err = -ENOBUFS;
3288 		goto errout;
3289 	}
3290 
3291 	/* Reserve room for dummy headers, this skb can pass
3292 	   through good chunk of routing engine.
3293 	 */
3294 	skb_reset_mac_header(skb);
3295 	skb_reserve(skb, MAX_HEADER + sizeof(struct ipv6hdr));
3296 
3297 	skb_dst_set(skb, &rt->dst);
3298 
3299 	err = rt6_fill_node(net, skb, rt, &fl6.daddr, &fl6.saddr, iif,
3300 			    RTM_NEWROUTE, NETLINK_CB(in_skb).portid,
3301 			    nlh->nlmsg_seq, 0, 0, 0);
3302 	if (err < 0) {
3303 		kfree_skb(skb);
3304 		goto errout;
3305 	}
3306 
3307 	err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).portid);
3308 errout:
3309 	return err;
3310 }
3311 
3312 void inet6_rt_notify(int event, struct rt6_info *rt, struct nl_info *info,
3313 		     unsigned int nlm_flags)
3314 {
3315 	struct sk_buff *skb;
3316 	struct net *net = info->nl_net;
3317 	u32 seq;
3318 	int err;
3319 
3320 	err = -ENOBUFS;
3321 	seq = info->nlh ? info->nlh->nlmsg_seq : 0;
3322 
3323 	skb = nlmsg_new(rt6_nlmsg_size(rt), gfp_any());
3324 	if (!skb)
3325 		goto errout;
3326 
3327 	err = rt6_fill_node(net, skb, rt, NULL, NULL, 0,
3328 				event, info->portid, seq, 0, 0, nlm_flags);
3329 	if (err < 0) {
3330 		/* -EMSGSIZE implies BUG in rt6_nlmsg_size() */
3331 		WARN_ON(err == -EMSGSIZE);
3332 		kfree_skb(skb);
3333 		goto errout;
3334 	}
3335 	rtnl_notify(skb, net, info->portid, RTNLGRP_IPV6_ROUTE,
3336 		    info->nlh, gfp_any());
3337 	return;
3338 errout:
3339 	if (err < 0)
3340 		rtnl_set_sk_err(net, RTNLGRP_IPV6_ROUTE, err);
3341 }
3342 
3343 static int ip6_route_dev_notify(struct notifier_block *this,
3344 				unsigned long event, void *ptr)
3345 {
3346 	struct net_device *dev = netdev_notifier_info_to_dev(ptr);
3347 	struct net *net = dev_net(dev);
3348 
3349 	if (event == NETDEV_REGISTER && (dev->flags & IFF_LOOPBACK)) {
3350 		net->ipv6.ip6_null_entry->dst.dev = dev;
3351 		net->ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(dev);
3352 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
3353 		net->ipv6.ip6_prohibit_entry->dst.dev = dev;
3354 		net->ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(dev);
3355 		net->ipv6.ip6_blk_hole_entry->dst.dev = dev;
3356 		net->ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(dev);
3357 #endif
3358 	}
3359 
3360 	return NOTIFY_OK;
3361 }
3362 
3363 /*
3364  *	/proc
3365  */
3366 
3367 #ifdef CONFIG_PROC_FS
3368 
3369 static const struct file_operations ipv6_route_proc_fops = {
3370 	.owner		= THIS_MODULE,
3371 	.open		= ipv6_route_open,
3372 	.read		= seq_read,
3373 	.llseek		= seq_lseek,
3374 	.release	= seq_release_net,
3375 };
3376 
3377 static int rt6_stats_seq_show(struct seq_file *seq, void *v)
3378 {
3379 	struct net *net = (struct net *)seq->private;
3380 	seq_printf(seq, "%04x %04x %04x %04x %04x %04x %04x\n",
3381 		   net->ipv6.rt6_stats->fib_nodes,
3382 		   net->ipv6.rt6_stats->fib_route_nodes,
3383 		   net->ipv6.rt6_stats->fib_rt_alloc,
3384 		   net->ipv6.rt6_stats->fib_rt_entries,
3385 		   net->ipv6.rt6_stats->fib_rt_cache,
3386 		   dst_entries_get_slow(&net->ipv6.ip6_dst_ops),
3387 		   net->ipv6.rt6_stats->fib_discarded_routes);
3388 
3389 	return 0;
3390 }
3391 
3392 static int rt6_stats_seq_open(struct inode *inode, struct file *file)
3393 {
3394 	return single_open_net(inode, file, rt6_stats_seq_show);
3395 }
3396 
3397 static const struct file_operations rt6_stats_seq_fops = {
3398 	.owner	 = THIS_MODULE,
3399 	.open	 = rt6_stats_seq_open,
3400 	.read	 = seq_read,
3401 	.llseek	 = seq_lseek,
3402 	.release = single_release_net,
3403 };
3404 #endif	/* CONFIG_PROC_FS */
3405 
3406 #ifdef CONFIG_SYSCTL
3407 
3408 static
3409 int ipv6_sysctl_rtcache_flush(struct ctl_table *ctl, int write,
3410 			      void __user *buffer, size_t *lenp, loff_t *ppos)
3411 {
3412 	struct net *net;
3413 	int delay;
3414 	if (!write)
3415 		return -EINVAL;
3416 
3417 	net = (struct net *)ctl->extra1;
3418 	delay = net->ipv6.sysctl.flush_delay;
3419 	proc_dointvec(ctl, write, buffer, lenp, ppos);
3420 	fib6_run_gc(delay <= 0 ? 0 : (unsigned long)delay, net, delay > 0);
3421 	return 0;
3422 }
3423 
3424 struct ctl_table ipv6_route_table_template[] = {
3425 	{
3426 		.procname	=	"flush",
3427 		.data		=	&init_net.ipv6.sysctl.flush_delay,
3428 		.maxlen		=	sizeof(int),
3429 		.mode		=	0200,
3430 		.proc_handler	=	ipv6_sysctl_rtcache_flush
3431 	},
3432 	{
3433 		.procname	=	"gc_thresh",
3434 		.data		=	&ip6_dst_ops_template.gc_thresh,
3435 		.maxlen		=	sizeof(int),
3436 		.mode		=	0644,
3437 		.proc_handler	=	proc_dointvec,
3438 	},
3439 	{
3440 		.procname	=	"max_size",
3441 		.data		=	&init_net.ipv6.sysctl.ip6_rt_max_size,
3442 		.maxlen		=	sizeof(int),
3443 		.mode		=	0644,
3444 		.proc_handler	=	proc_dointvec,
3445 	},
3446 	{
3447 		.procname	=	"gc_min_interval",
3448 		.data		=	&init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
3449 		.maxlen		=	sizeof(int),
3450 		.mode		=	0644,
3451 		.proc_handler	=	proc_dointvec_jiffies,
3452 	},
3453 	{
3454 		.procname	=	"gc_timeout",
3455 		.data		=	&init_net.ipv6.sysctl.ip6_rt_gc_timeout,
3456 		.maxlen		=	sizeof(int),
3457 		.mode		=	0644,
3458 		.proc_handler	=	proc_dointvec_jiffies,
3459 	},
3460 	{
3461 		.procname	=	"gc_interval",
3462 		.data		=	&init_net.ipv6.sysctl.ip6_rt_gc_interval,
3463 		.maxlen		=	sizeof(int),
3464 		.mode		=	0644,
3465 		.proc_handler	=	proc_dointvec_jiffies,
3466 	},
3467 	{
3468 		.procname	=	"gc_elasticity",
3469 		.data		=	&init_net.ipv6.sysctl.ip6_rt_gc_elasticity,
3470 		.maxlen		=	sizeof(int),
3471 		.mode		=	0644,
3472 		.proc_handler	=	proc_dointvec,
3473 	},
3474 	{
3475 		.procname	=	"mtu_expires",
3476 		.data		=	&init_net.ipv6.sysctl.ip6_rt_mtu_expires,
3477 		.maxlen		=	sizeof(int),
3478 		.mode		=	0644,
3479 		.proc_handler	=	proc_dointvec_jiffies,
3480 	},
3481 	{
3482 		.procname	=	"min_adv_mss",
3483 		.data		=	&init_net.ipv6.sysctl.ip6_rt_min_advmss,
3484 		.maxlen		=	sizeof(int),
3485 		.mode		=	0644,
3486 		.proc_handler	=	proc_dointvec,
3487 	},
3488 	{
3489 		.procname	=	"gc_min_interval_ms",
3490 		.data		=	&init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
3491 		.maxlen		=	sizeof(int),
3492 		.mode		=	0644,
3493 		.proc_handler	=	proc_dointvec_ms_jiffies,
3494 	},
3495 	{ }
3496 };
3497 
3498 struct ctl_table * __net_init ipv6_route_sysctl_init(struct net *net)
3499 {
3500 	struct ctl_table *table;
3501 
3502 	table = kmemdup(ipv6_route_table_template,
3503 			sizeof(ipv6_route_table_template),
3504 			GFP_KERNEL);
3505 
3506 	if (table) {
3507 		table[0].data = &net->ipv6.sysctl.flush_delay;
3508 		table[0].extra1 = net;
3509 		table[1].data = &net->ipv6.ip6_dst_ops.gc_thresh;
3510 		table[2].data = &net->ipv6.sysctl.ip6_rt_max_size;
3511 		table[3].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
3512 		table[4].data = &net->ipv6.sysctl.ip6_rt_gc_timeout;
3513 		table[5].data = &net->ipv6.sysctl.ip6_rt_gc_interval;
3514 		table[6].data = &net->ipv6.sysctl.ip6_rt_gc_elasticity;
3515 		table[7].data = &net->ipv6.sysctl.ip6_rt_mtu_expires;
3516 		table[8].data = &net->ipv6.sysctl.ip6_rt_min_advmss;
3517 		table[9].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
3518 
3519 		/* Don't export sysctls to unprivileged users */
3520 		if (net->user_ns != &init_user_ns)
3521 			table[0].procname = NULL;
3522 	}
3523 
3524 	return table;
3525 }
3526 #endif
3527 
3528 static int __net_init ip6_route_net_init(struct net *net)
3529 {
3530 	int ret = -ENOMEM;
3531 
3532 	memcpy(&net->ipv6.ip6_dst_ops, &ip6_dst_ops_template,
3533 	       sizeof(net->ipv6.ip6_dst_ops));
3534 
3535 	if (dst_entries_init(&net->ipv6.ip6_dst_ops) < 0)
3536 		goto out_ip6_dst_ops;
3537 
3538 	net->ipv6.ip6_null_entry = kmemdup(&ip6_null_entry_template,
3539 					   sizeof(*net->ipv6.ip6_null_entry),
3540 					   GFP_KERNEL);
3541 	if (!net->ipv6.ip6_null_entry)
3542 		goto out_ip6_dst_entries;
3543 	net->ipv6.ip6_null_entry->dst.path =
3544 		(struct dst_entry *)net->ipv6.ip6_null_entry;
3545 	net->ipv6.ip6_null_entry->dst.ops = &net->ipv6.ip6_dst_ops;
3546 	dst_init_metrics(&net->ipv6.ip6_null_entry->dst,
3547 			 ip6_template_metrics, true);
3548 
3549 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
3550 	net->ipv6.ip6_prohibit_entry = kmemdup(&ip6_prohibit_entry_template,
3551 					       sizeof(*net->ipv6.ip6_prohibit_entry),
3552 					       GFP_KERNEL);
3553 	if (!net->ipv6.ip6_prohibit_entry)
3554 		goto out_ip6_null_entry;
3555 	net->ipv6.ip6_prohibit_entry->dst.path =
3556 		(struct dst_entry *)net->ipv6.ip6_prohibit_entry;
3557 	net->ipv6.ip6_prohibit_entry->dst.ops = &net->ipv6.ip6_dst_ops;
3558 	dst_init_metrics(&net->ipv6.ip6_prohibit_entry->dst,
3559 			 ip6_template_metrics, true);
3560 
3561 	net->ipv6.ip6_blk_hole_entry = kmemdup(&ip6_blk_hole_entry_template,
3562 					       sizeof(*net->ipv6.ip6_blk_hole_entry),
3563 					       GFP_KERNEL);
3564 	if (!net->ipv6.ip6_blk_hole_entry)
3565 		goto out_ip6_prohibit_entry;
3566 	net->ipv6.ip6_blk_hole_entry->dst.path =
3567 		(struct dst_entry *)net->ipv6.ip6_blk_hole_entry;
3568 	net->ipv6.ip6_blk_hole_entry->dst.ops = &net->ipv6.ip6_dst_ops;
3569 	dst_init_metrics(&net->ipv6.ip6_blk_hole_entry->dst,
3570 			 ip6_template_metrics, true);
3571 #endif
3572 
3573 	net->ipv6.sysctl.flush_delay = 0;
3574 	net->ipv6.sysctl.ip6_rt_max_size = 4096;
3575 	net->ipv6.sysctl.ip6_rt_gc_min_interval = HZ / 2;
3576 	net->ipv6.sysctl.ip6_rt_gc_timeout = 60*HZ;
3577 	net->ipv6.sysctl.ip6_rt_gc_interval = 30*HZ;
3578 	net->ipv6.sysctl.ip6_rt_gc_elasticity = 9;
3579 	net->ipv6.sysctl.ip6_rt_mtu_expires = 10*60*HZ;
3580 	net->ipv6.sysctl.ip6_rt_min_advmss = IPV6_MIN_MTU - 20 - 40;
3581 
3582 	net->ipv6.ip6_rt_gc_expire = 30*HZ;
3583 
3584 	ret = 0;
3585 out:
3586 	return ret;
3587 
3588 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
3589 out_ip6_prohibit_entry:
3590 	kfree(net->ipv6.ip6_prohibit_entry);
3591 out_ip6_null_entry:
3592 	kfree(net->ipv6.ip6_null_entry);
3593 #endif
3594 out_ip6_dst_entries:
3595 	dst_entries_destroy(&net->ipv6.ip6_dst_ops);
3596 out_ip6_dst_ops:
3597 	goto out;
3598 }
3599 
3600 static void __net_exit ip6_route_net_exit(struct net *net)
3601 {
3602 	kfree(net->ipv6.ip6_null_entry);
3603 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
3604 	kfree(net->ipv6.ip6_prohibit_entry);
3605 	kfree(net->ipv6.ip6_blk_hole_entry);
3606 #endif
3607 	dst_entries_destroy(&net->ipv6.ip6_dst_ops);
3608 }
3609 
3610 static int __net_init ip6_route_net_init_late(struct net *net)
3611 {
3612 #ifdef CONFIG_PROC_FS
3613 	proc_create("ipv6_route", 0, net->proc_net, &ipv6_route_proc_fops);
3614 	proc_create("rt6_stats", S_IRUGO, net->proc_net, &rt6_stats_seq_fops);
3615 #endif
3616 	return 0;
3617 }
3618 
3619 static void __net_exit ip6_route_net_exit_late(struct net *net)
3620 {
3621 #ifdef CONFIG_PROC_FS
3622 	remove_proc_entry("ipv6_route", net->proc_net);
3623 	remove_proc_entry("rt6_stats", net->proc_net);
3624 #endif
3625 }
3626 
3627 static struct pernet_operations ip6_route_net_ops = {
3628 	.init = ip6_route_net_init,
3629 	.exit = ip6_route_net_exit,
3630 };
3631 
3632 static int __net_init ipv6_inetpeer_init(struct net *net)
3633 {
3634 	struct inet_peer_base *bp = kmalloc(sizeof(*bp), GFP_KERNEL);
3635 
3636 	if (!bp)
3637 		return -ENOMEM;
3638 	inet_peer_base_init(bp);
3639 	net->ipv6.peers = bp;
3640 	return 0;
3641 }
3642 
3643 static void __net_exit ipv6_inetpeer_exit(struct net *net)
3644 {
3645 	struct inet_peer_base *bp = net->ipv6.peers;
3646 
3647 	net->ipv6.peers = NULL;
3648 	inetpeer_invalidate_tree(bp);
3649 	kfree(bp);
3650 }
3651 
3652 static struct pernet_operations ipv6_inetpeer_ops = {
3653 	.init	=	ipv6_inetpeer_init,
3654 	.exit	=	ipv6_inetpeer_exit,
3655 };
3656 
3657 static struct pernet_operations ip6_route_net_late_ops = {
3658 	.init = ip6_route_net_init_late,
3659 	.exit = ip6_route_net_exit_late,
3660 };
3661 
3662 static struct notifier_block ip6_route_dev_notifier = {
3663 	.notifier_call = ip6_route_dev_notify,
3664 	.priority = 0,
3665 };
3666 
3667 int __init ip6_route_init(void)
3668 {
3669 	int ret;
3670 	int cpu;
3671 
3672 	ret = -ENOMEM;
3673 	ip6_dst_ops_template.kmem_cachep =
3674 		kmem_cache_create("ip6_dst_cache", sizeof(struct rt6_info), 0,
3675 				  SLAB_HWCACHE_ALIGN, NULL);
3676 	if (!ip6_dst_ops_template.kmem_cachep)
3677 		goto out;
3678 
3679 	ret = dst_entries_init(&ip6_dst_blackhole_ops);
3680 	if (ret)
3681 		goto out_kmem_cache;
3682 
3683 	ret = register_pernet_subsys(&ipv6_inetpeer_ops);
3684 	if (ret)
3685 		goto out_dst_entries;
3686 
3687 	ret = register_pernet_subsys(&ip6_route_net_ops);
3688 	if (ret)
3689 		goto out_register_inetpeer;
3690 
3691 	ip6_dst_blackhole_ops.kmem_cachep = ip6_dst_ops_template.kmem_cachep;
3692 
3693 	/* Registering of the loopback is done before this portion of code,
3694 	 * the loopback reference in rt6_info will not be taken, do it
3695 	 * manually for init_net */
3696 	init_net.ipv6.ip6_null_entry->dst.dev = init_net.loopback_dev;
3697 	init_net.ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
3698   #ifdef CONFIG_IPV6_MULTIPLE_TABLES
3699 	init_net.ipv6.ip6_prohibit_entry->dst.dev = init_net.loopback_dev;
3700 	init_net.ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
3701 	init_net.ipv6.ip6_blk_hole_entry->dst.dev = init_net.loopback_dev;
3702 	init_net.ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
3703   #endif
3704 	ret = fib6_init();
3705 	if (ret)
3706 		goto out_register_subsys;
3707 
3708 	ret = xfrm6_init();
3709 	if (ret)
3710 		goto out_fib6_init;
3711 
3712 	ret = fib6_rules_init();
3713 	if (ret)
3714 		goto xfrm6_init;
3715 
3716 	ret = register_pernet_subsys(&ip6_route_net_late_ops);
3717 	if (ret)
3718 		goto fib6_rules_init;
3719 
3720 	ret = -ENOBUFS;
3721 	if (__rtnl_register(PF_INET6, RTM_NEWROUTE, inet6_rtm_newroute, NULL, NULL) ||
3722 	    __rtnl_register(PF_INET6, RTM_DELROUTE, inet6_rtm_delroute, NULL, NULL) ||
3723 	    __rtnl_register(PF_INET6, RTM_GETROUTE, inet6_rtm_getroute, NULL, NULL))
3724 		goto out_register_late_subsys;
3725 
3726 	ret = register_netdevice_notifier(&ip6_route_dev_notifier);
3727 	if (ret)
3728 		goto out_register_late_subsys;
3729 
3730 	for_each_possible_cpu(cpu) {
3731 		struct uncached_list *ul = per_cpu_ptr(&rt6_uncached_list, cpu);
3732 
3733 		INIT_LIST_HEAD(&ul->head);
3734 		spin_lock_init(&ul->lock);
3735 	}
3736 
3737 out:
3738 	return ret;
3739 
3740 out_register_late_subsys:
3741 	unregister_pernet_subsys(&ip6_route_net_late_ops);
3742 fib6_rules_init:
3743 	fib6_rules_cleanup();
3744 xfrm6_init:
3745 	xfrm6_fini();
3746 out_fib6_init:
3747 	fib6_gc_cleanup();
3748 out_register_subsys:
3749 	unregister_pernet_subsys(&ip6_route_net_ops);
3750 out_register_inetpeer:
3751 	unregister_pernet_subsys(&ipv6_inetpeer_ops);
3752 out_dst_entries:
3753 	dst_entries_destroy(&ip6_dst_blackhole_ops);
3754 out_kmem_cache:
3755 	kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);
3756 	goto out;
3757 }
3758 
3759 void ip6_route_cleanup(void)
3760 {
3761 	unregister_netdevice_notifier(&ip6_route_dev_notifier);
3762 	unregister_pernet_subsys(&ip6_route_net_late_ops);
3763 	fib6_rules_cleanup();
3764 	xfrm6_fini();
3765 	fib6_gc_cleanup();
3766 	unregister_pernet_subsys(&ipv6_inetpeer_ops);
3767 	unregister_pernet_subsys(&ip6_route_net_ops);
3768 	dst_entries_destroy(&ip6_dst_blackhole_ops);
3769 	kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);
3770 }
3771