xref: /linux/net/ipv6/route.c (revision 9cfc5c90ad38c8fc11bfd39de42a107da00871ba)
1 /*
2  *	Linux INET6 implementation
3  *	FIB front-end.
4  *
5  *	Authors:
6  *	Pedro Roque		<roque@di.fc.ul.pt>
7  *
8  *	This program is free software; you can redistribute it and/or
9  *      modify it under the terms of the GNU General Public License
10  *      as published by the Free Software Foundation; either version
11  *      2 of the License, or (at your option) any later version.
12  */
13 
14 /*	Changes:
15  *
16  *	YOSHIFUJI Hideaki @USAGI
17  *		reworked default router selection.
18  *		- respect outgoing interface
19  *		- select from (probably) reachable routers (i.e.
20  *		routers in REACHABLE, STALE, DELAY or PROBE states).
21  *		- always select the same router if it is (probably)
22  *		reachable.  otherwise, round-robin the list.
23  *	Ville Nuorvala
24  *		Fixed routing subtrees.
25  */
26 
27 #define pr_fmt(fmt) "IPv6: " fmt
28 
29 #include <linux/capability.h>
30 #include <linux/errno.h>
31 #include <linux/export.h>
32 #include <linux/types.h>
33 #include <linux/times.h>
34 #include <linux/socket.h>
35 #include <linux/sockios.h>
36 #include <linux/net.h>
37 #include <linux/route.h>
38 #include <linux/netdevice.h>
39 #include <linux/in6.h>
40 #include <linux/mroute6.h>
41 #include <linux/init.h>
42 #include <linux/if_arp.h>
43 #include <linux/proc_fs.h>
44 #include <linux/seq_file.h>
45 #include <linux/nsproxy.h>
46 #include <linux/slab.h>
47 #include <net/net_namespace.h>
48 #include <net/snmp.h>
49 #include <net/ipv6.h>
50 #include <net/ip6_fib.h>
51 #include <net/ip6_route.h>
52 #include <net/ndisc.h>
53 #include <net/addrconf.h>
54 #include <net/tcp.h>
55 #include <linux/rtnetlink.h>
56 #include <net/dst.h>
57 #include <net/dst_metadata.h>
58 #include <net/xfrm.h>
59 #include <net/netevent.h>
60 #include <net/netlink.h>
61 #include <net/nexthop.h>
62 #include <net/lwtunnel.h>
63 #include <net/ip_tunnels.h>
64 #include <net/l3mdev.h>
65 
66 #include <asm/uaccess.h>
67 
68 #ifdef CONFIG_SYSCTL
69 #include <linux/sysctl.h>
70 #endif
71 
72 enum rt6_nud_state {
73 	RT6_NUD_FAIL_HARD = -3,
74 	RT6_NUD_FAIL_PROBE = -2,
75 	RT6_NUD_FAIL_DO_RR = -1,
76 	RT6_NUD_SUCCEED = 1
77 };
78 
79 static void ip6_rt_copy_init(struct rt6_info *rt, struct rt6_info *ort);
80 static struct dst_entry	*ip6_dst_check(struct dst_entry *dst, u32 cookie);
81 static unsigned int	 ip6_default_advmss(const struct dst_entry *dst);
82 static unsigned int	 ip6_mtu(const struct dst_entry *dst);
83 static struct dst_entry *ip6_negative_advice(struct dst_entry *);
84 static void		ip6_dst_destroy(struct dst_entry *);
85 static void		ip6_dst_ifdown(struct dst_entry *,
86 				       struct net_device *dev, int how);
87 static int		 ip6_dst_gc(struct dst_ops *ops);
88 
89 static int		ip6_pkt_discard(struct sk_buff *skb);
90 static int		ip6_pkt_discard_out(struct net *net, struct sock *sk, struct sk_buff *skb);
91 static int		ip6_pkt_prohibit(struct sk_buff *skb);
92 static int		ip6_pkt_prohibit_out(struct net *net, struct sock *sk, struct sk_buff *skb);
93 static void		ip6_link_failure(struct sk_buff *skb);
94 static void		ip6_rt_update_pmtu(struct dst_entry *dst, struct sock *sk,
95 					   struct sk_buff *skb, u32 mtu);
96 static void		rt6_do_redirect(struct dst_entry *dst, struct sock *sk,
97 					struct sk_buff *skb);
98 static void		rt6_dst_from_metrics_check(struct rt6_info *rt);
99 static int rt6_score_route(struct rt6_info *rt, int oif, int strict);
100 
101 #ifdef CONFIG_IPV6_ROUTE_INFO
102 static struct rt6_info *rt6_add_route_info(struct net *net,
103 					   const struct in6_addr *prefix, int prefixlen,
104 					   const struct in6_addr *gwaddr, int ifindex,
105 					   unsigned int pref);
106 static struct rt6_info *rt6_get_route_info(struct net *net,
107 					   const struct in6_addr *prefix, int prefixlen,
108 					   const struct in6_addr *gwaddr, int ifindex);
109 #endif
110 
111 struct uncached_list {
112 	spinlock_t		lock;
113 	struct list_head	head;
114 };
115 
116 static DEFINE_PER_CPU_ALIGNED(struct uncached_list, rt6_uncached_list);
117 
118 static void rt6_uncached_list_add(struct rt6_info *rt)
119 {
120 	struct uncached_list *ul = raw_cpu_ptr(&rt6_uncached_list);
121 
122 	rt->dst.flags |= DST_NOCACHE;
123 	rt->rt6i_uncached_list = ul;
124 
125 	spin_lock_bh(&ul->lock);
126 	list_add_tail(&rt->rt6i_uncached, &ul->head);
127 	spin_unlock_bh(&ul->lock);
128 }
129 
130 static void rt6_uncached_list_del(struct rt6_info *rt)
131 {
132 	if (!list_empty(&rt->rt6i_uncached)) {
133 		struct uncached_list *ul = rt->rt6i_uncached_list;
134 
135 		spin_lock_bh(&ul->lock);
136 		list_del(&rt->rt6i_uncached);
137 		spin_unlock_bh(&ul->lock);
138 	}
139 }
140 
141 static void rt6_uncached_list_flush_dev(struct net *net, struct net_device *dev)
142 {
143 	struct net_device *loopback_dev = net->loopback_dev;
144 	int cpu;
145 
146 	if (dev == loopback_dev)
147 		return;
148 
149 	for_each_possible_cpu(cpu) {
150 		struct uncached_list *ul = per_cpu_ptr(&rt6_uncached_list, cpu);
151 		struct rt6_info *rt;
152 
153 		spin_lock_bh(&ul->lock);
154 		list_for_each_entry(rt, &ul->head, rt6i_uncached) {
155 			struct inet6_dev *rt_idev = rt->rt6i_idev;
156 			struct net_device *rt_dev = rt->dst.dev;
157 
158 			if (rt_idev->dev == dev) {
159 				rt->rt6i_idev = in6_dev_get(loopback_dev);
160 				in6_dev_put(rt_idev);
161 			}
162 
163 			if (rt_dev == dev) {
164 				rt->dst.dev = loopback_dev;
165 				dev_hold(rt->dst.dev);
166 				dev_put(rt_dev);
167 			}
168 		}
169 		spin_unlock_bh(&ul->lock);
170 	}
171 }
172 
173 static u32 *rt6_pcpu_cow_metrics(struct rt6_info *rt)
174 {
175 	return dst_metrics_write_ptr(rt->dst.from);
176 }
177 
178 static u32 *ipv6_cow_metrics(struct dst_entry *dst, unsigned long old)
179 {
180 	struct rt6_info *rt = (struct rt6_info *)dst;
181 
182 	if (rt->rt6i_flags & RTF_PCPU)
183 		return rt6_pcpu_cow_metrics(rt);
184 	else if (rt->rt6i_flags & RTF_CACHE)
185 		return NULL;
186 	else
187 		return dst_cow_metrics_generic(dst, old);
188 }
189 
190 static inline const void *choose_neigh_daddr(struct rt6_info *rt,
191 					     struct sk_buff *skb,
192 					     const void *daddr)
193 {
194 	struct in6_addr *p = &rt->rt6i_gateway;
195 
196 	if (!ipv6_addr_any(p))
197 		return (const void *) p;
198 	else if (skb)
199 		return &ipv6_hdr(skb)->daddr;
200 	return daddr;
201 }
202 
203 static struct neighbour *ip6_neigh_lookup(const struct dst_entry *dst,
204 					  struct sk_buff *skb,
205 					  const void *daddr)
206 {
207 	struct rt6_info *rt = (struct rt6_info *) dst;
208 	struct neighbour *n;
209 
210 	daddr = choose_neigh_daddr(rt, skb, daddr);
211 	n = __ipv6_neigh_lookup(dst->dev, daddr);
212 	if (n)
213 		return n;
214 	return neigh_create(&nd_tbl, daddr, dst->dev);
215 }
216 
217 static struct dst_ops ip6_dst_ops_template = {
218 	.family			=	AF_INET6,
219 	.gc			=	ip6_dst_gc,
220 	.gc_thresh		=	1024,
221 	.check			=	ip6_dst_check,
222 	.default_advmss		=	ip6_default_advmss,
223 	.mtu			=	ip6_mtu,
224 	.cow_metrics		=	ipv6_cow_metrics,
225 	.destroy		=	ip6_dst_destroy,
226 	.ifdown			=	ip6_dst_ifdown,
227 	.negative_advice	=	ip6_negative_advice,
228 	.link_failure		=	ip6_link_failure,
229 	.update_pmtu		=	ip6_rt_update_pmtu,
230 	.redirect		=	rt6_do_redirect,
231 	.local_out		=	__ip6_local_out,
232 	.neigh_lookup		=	ip6_neigh_lookup,
233 };
234 
235 static unsigned int ip6_blackhole_mtu(const struct dst_entry *dst)
236 {
237 	unsigned int mtu = dst_metric_raw(dst, RTAX_MTU);
238 
239 	return mtu ? : dst->dev->mtu;
240 }
241 
242 static void ip6_rt_blackhole_update_pmtu(struct dst_entry *dst, struct sock *sk,
243 					 struct sk_buff *skb, u32 mtu)
244 {
245 }
246 
247 static void ip6_rt_blackhole_redirect(struct dst_entry *dst, struct sock *sk,
248 				      struct sk_buff *skb)
249 {
250 }
251 
252 static struct dst_ops ip6_dst_blackhole_ops = {
253 	.family			=	AF_INET6,
254 	.destroy		=	ip6_dst_destroy,
255 	.check			=	ip6_dst_check,
256 	.mtu			=	ip6_blackhole_mtu,
257 	.default_advmss		=	ip6_default_advmss,
258 	.update_pmtu		=	ip6_rt_blackhole_update_pmtu,
259 	.redirect		=	ip6_rt_blackhole_redirect,
260 	.cow_metrics		=	dst_cow_metrics_generic,
261 	.neigh_lookup		=	ip6_neigh_lookup,
262 };
263 
264 static const u32 ip6_template_metrics[RTAX_MAX] = {
265 	[RTAX_HOPLIMIT - 1] = 0,
266 };
267 
268 static const struct rt6_info ip6_null_entry_template = {
269 	.dst = {
270 		.__refcnt	= ATOMIC_INIT(1),
271 		.__use		= 1,
272 		.obsolete	= DST_OBSOLETE_FORCE_CHK,
273 		.error		= -ENETUNREACH,
274 		.input		= ip6_pkt_discard,
275 		.output		= ip6_pkt_discard_out,
276 	},
277 	.rt6i_flags	= (RTF_REJECT | RTF_NONEXTHOP),
278 	.rt6i_protocol  = RTPROT_KERNEL,
279 	.rt6i_metric	= ~(u32) 0,
280 	.rt6i_ref	= ATOMIC_INIT(1),
281 };
282 
283 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
284 
285 static const struct rt6_info ip6_prohibit_entry_template = {
286 	.dst = {
287 		.__refcnt	= ATOMIC_INIT(1),
288 		.__use		= 1,
289 		.obsolete	= DST_OBSOLETE_FORCE_CHK,
290 		.error		= -EACCES,
291 		.input		= ip6_pkt_prohibit,
292 		.output		= ip6_pkt_prohibit_out,
293 	},
294 	.rt6i_flags	= (RTF_REJECT | RTF_NONEXTHOP),
295 	.rt6i_protocol  = RTPROT_KERNEL,
296 	.rt6i_metric	= ~(u32) 0,
297 	.rt6i_ref	= ATOMIC_INIT(1),
298 };
299 
300 static const struct rt6_info ip6_blk_hole_entry_template = {
301 	.dst = {
302 		.__refcnt	= ATOMIC_INIT(1),
303 		.__use		= 1,
304 		.obsolete	= DST_OBSOLETE_FORCE_CHK,
305 		.error		= -EINVAL,
306 		.input		= dst_discard,
307 		.output		= dst_discard_out,
308 	},
309 	.rt6i_flags	= (RTF_REJECT | RTF_NONEXTHOP),
310 	.rt6i_protocol  = RTPROT_KERNEL,
311 	.rt6i_metric	= ~(u32) 0,
312 	.rt6i_ref	= ATOMIC_INIT(1),
313 };
314 
315 #endif
316 
317 static void rt6_info_init(struct rt6_info *rt)
318 {
319 	struct dst_entry *dst = &rt->dst;
320 
321 	memset(dst + 1, 0, sizeof(*rt) - sizeof(*dst));
322 	INIT_LIST_HEAD(&rt->rt6i_siblings);
323 	INIT_LIST_HEAD(&rt->rt6i_uncached);
324 }
325 
326 /* allocate dst with ip6_dst_ops */
327 static struct rt6_info *__ip6_dst_alloc(struct net *net,
328 					struct net_device *dev,
329 					int flags)
330 {
331 	struct rt6_info *rt = dst_alloc(&net->ipv6.ip6_dst_ops, dev,
332 					0, DST_OBSOLETE_FORCE_CHK, flags);
333 
334 	if (rt)
335 		rt6_info_init(rt);
336 
337 	return rt;
338 }
339 
340 static struct rt6_info *ip6_dst_alloc(struct net *net,
341 				      struct net_device *dev,
342 				      int flags)
343 {
344 	struct rt6_info *rt = __ip6_dst_alloc(net, dev, flags);
345 
346 	if (rt) {
347 		rt->rt6i_pcpu = alloc_percpu_gfp(struct rt6_info *, GFP_ATOMIC);
348 		if (rt->rt6i_pcpu) {
349 			int cpu;
350 
351 			for_each_possible_cpu(cpu) {
352 				struct rt6_info **p;
353 
354 				p = per_cpu_ptr(rt->rt6i_pcpu, cpu);
355 				/* no one shares rt */
356 				*p =  NULL;
357 			}
358 		} else {
359 			dst_destroy((struct dst_entry *)rt);
360 			return NULL;
361 		}
362 	}
363 
364 	return rt;
365 }
366 
367 static void ip6_dst_destroy(struct dst_entry *dst)
368 {
369 	struct rt6_info *rt = (struct rt6_info *)dst;
370 	struct dst_entry *from = dst->from;
371 	struct inet6_dev *idev;
372 
373 	dst_destroy_metrics_generic(dst);
374 	free_percpu(rt->rt6i_pcpu);
375 	rt6_uncached_list_del(rt);
376 
377 	idev = rt->rt6i_idev;
378 	if (idev) {
379 		rt->rt6i_idev = NULL;
380 		in6_dev_put(idev);
381 	}
382 
383 	dst->from = NULL;
384 	dst_release(from);
385 }
386 
387 static void ip6_dst_ifdown(struct dst_entry *dst, struct net_device *dev,
388 			   int how)
389 {
390 	struct rt6_info *rt = (struct rt6_info *)dst;
391 	struct inet6_dev *idev = rt->rt6i_idev;
392 	struct net_device *loopback_dev =
393 		dev_net(dev)->loopback_dev;
394 
395 	if (dev != loopback_dev) {
396 		if (idev && idev->dev == dev) {
397 			struct inet6_dev *loopback_idev =
398 				in6_dev_get(loopback_dev);
399 			if (loopback_idev) {
400 				rt->rt6i_idev = loopback_idev;
401 				in6_dev_put(idev);
402 			}
403 		}
404 	}
405 }
406 
407 static bool rt6_check_expired(const struct rt6_info *rt)
408 {
409 	if (rt->rt6i_flags & RTF_EXPIRES) {
410 		if (time_after(jiffies, rt->dst.expires))
411 			return true;
412 	} else if (rt->dst.from) {
413 		return rt6_check_expired((struct rt6_info *) rt->dst.from);
414 	}
415 	return false;
416 }
417 
418 /* Multipath route selection:
419  *   Hash based function using packet header and flowlabel.
420  * Adapted from fib_info_hashfn()
421  */
422 static int rt6_info_hash_nhsfn(unsigned int candidate_count,
423 			       const struct flowi6 *fl6)
424 {
425 	return get_hash_from_flowi6(fl6) % candidate_count;
426 }
427 
428 static struct rt6_info *rt6_multipath_select(struct rt6_info *match,
429 					     struct flowi6 *fl6, int oif,
430 					     int strict)
431 {
432 	struct rt6_info *sibling, *next_sibling;
433 	int route_choosen;
434 
435 	route_choosen = rt6_info_hash_nhsfn(match->rt6i_nsiblings + 1, fl6);
436 	/* Don't change the route, if route_choosen == 0
437 	 * (siblings does not include ourself)
438 	 */
439 	if (route_choosen)
440 		list_for_each_entry_safe(sibling, next_sibling,
441 				&match->rt6i_siblings, rt6i_siblings) {
442 			route_choosen--;
443 			if (route_choosen == 0) {
444 				if (rt6_score_route(sibling, oif, strict) < 0)
445 					break;
446 				match = sibling;
447 				break;
448 			}
449 		}
450 	return match;
451 }
452 
453 /*
454  *	Route lookup. Any table->tb6_lock is implied.
455  */
456 
457 static inline struct rt6_info *rt6_device_match(struct net *net,
458 						    struct rt6_info *rt,
459 						    const struct in6_addr *saddr,
460 						    int oif,
461 						    int flags)
462 {
463 	struct rt6_info *local = NULL;
464 	struct rt6_info *sprt;
465 
466 	if (!oif && ipv6_addr_any(saddr))
467 		goto out;
468 
469 	for (sprt = rt; sprt; sprt = sprt->dst.rt6_next) {
470 		struct net_device *dev = sprt->dst.dev;
471 
472 		if (oif) {
473 			if (dev->ifindex == oif)
474 				return sprt;
475 			if (dev->flags & IFF_LOOPBACK) {
476 				if (!sprt->rt6i_idev ||
477 				    sprt->rt6i_idev->dev->ifindex != oif) {
478 					if (flags & RT6_LOOKUP_F_IFACE)
479 						continue;
480 					if (local &&
481 					    local->rt6i_idev->dev->ifindex == oif)
482 						continue;
483 				}
484 				local = sprt;
485 			}
486 		} else {
487 			if (ipv6_chk_addr(net, saddr, dev,
488 					  flags & RT6_LOOKUP_F_IFACE))
489 				return sprt;
490 		}
491 	}
492 
493 	if (oif) {
494 		if (local)
495 			return local;
496 
497 		if (flags & RT6_LOOKUP_F_IFACE)
498 			return net->ipv6.ip6_null_entry;
499 	}
500 out:
501 	return rt;
502 }
503 
504 #ifdef CONFIG_IPV6_ROUTER_PREF
505 struct __rt6_probe_work {
506 	struct work_struct work;
507 	struct in6_addr target;
508 	struct net_device *dev;
509 };
510 
511 static void rt6_probe_deferred(struct work_struct *w)
512 {
513 	struct in6_addr mcaddr;
514 	struct __rt6_probe_work *work =
515 		container_of(w, struct __rt6_probe_work, work);
516 
517 	addrconf_addr_solict_mult(&work->target, &mcaddr);
518 	ndisc_send_ns(work->dev, &work->target, &mcaddr, NULL, NULL);
519 	dev_put(work->dev);
520 	kfree(work);
521 }
522 
523 static void rt6_probe(struct rt6_info *rt)
524 {
525 	struct __rt6_probe_work *work;
526 	struct neighbour *neigh;
527 	/*
528 	 * Okay, this does not seem to be appropriate
529 	 * for now, however, we need to check if it
530 	 * is really so; aka Router Reachability Probing.
531 	 *
532 	 * Router Reachability Probe MUST be rate-limited
533 	 * to no more than one per minute.
534 	 */
535 	if (!rt || !(rt->rt6i_flags & RTF_GATEWAY))
536 		return;
537 	rcu_read_lock_bh();
538 	neigh = __ipv6_neigh_lookup_noref(rt->dst.dev, &rt->rt6i_gateway);
539 	if (neigh) {
540 		if (neigh->nud_state & NUD_VALID)
541 			goto out;
542 
543 		work = NULL;
544 		write_lock(&neigh->lock);
545 		if (!(neigh->nud_state & NUD_VALID) &&
546 		    time_after(jiffies,
547 			       neigh->updated +
548 			       rt->rt6i_idev->cnf.rtr_probe_interval)) {
549 			work = kmalloc(sizeof(*work), GFP_ATOMIC);
550 			if (work)
551 				__neigh_set_probe_once(neigh);
552 		}
553 		write_unlock(&neigh->lock);
554 	} else {
555 		work = kmalloc(sizeof(*work), GFP_ATOMIC);
556 	}
557 
558 	if (work) {
559 		INIT_WORK(&work->work, rt6_probe_deferred);
560 		work->target = rt->rt6i_gateway;
561 		dev_hold(rt->dst.dev);
562 		work->dev = rt->dst.dev;
563 		schedule_work(&work->work);
564 	}
565 
566 out:
567 	rcu_read_unlock_bh();
568 }
569 #else
570 static inline void rt6_probe(struct rt6_info *rt)
571 {
572 }
573 #endif
574 
575 /*
576  * Default Router Selection (RFC 2461 6.3.6)
577  */
578 static inline int rt6_check_dev(struct rt6_info *rt, int oif)
579 {
580 	struct net_device *dev = rt->dst.dev;
581 	if (!oif || dev->ifindex == oif)
582 		return 2;
583 	if ((dev->flags & IFF_LOOPBACK) &&
584 	    rt->rt6i_idev && rt->rt6i_idev->dev->ifindex == oif)
585 		return 1;
586 	return 0;
587 }
588 
589 static inline enum rt6_nud_state rt6_check_neigh(struct rt6_info *rt)
590 {
591 	struct neighbour *neigh;
592 	enum rt6_nud_state ret = RT6_NUD_FAIL_HARD;
593 
594 	if (rt->rt6i_flags & RTF_NONEXTHOP ||
595 	    !(rt->rt6i_flags & RTF_GATEWAY))
596 		return RT6_NUD_SUCCEED;
597 
598 	rcu_read_lock_bh();
599 	neigh = __ipv6_neigh_lookup_noref(rt->dst.dev, &rt->rt6i_gateway);
600 	if (neigh) {
601 		read_lock(&neigh->lock);
602 		if (neigh->nud_state & NUD_VALID)
603 			ret = RT6_NUD_SUCCEED;
604 #ifdef CONFIG_IPV6_ROUTER_PREF
605 		else if (!(neigh->nud_state & NUD_FAILED))
606 			ret = RT6_NUD_SUCCEED;
607 		else
608 			ret = RT6_NUD_FAIL_PROBE;
609 #endif
610 		read_unlock(&neigh->lock);
611 	} else {
612 		ret = IS_ENABLED(CONFIG_IPV6_ROUTER_PREF) ?
613 		      RT6_NUD_SUCCEED : RT6_NUD_FAIL_DO_RR;
614 	}
615 	rcu_read_unlock_bh();
616 
617 	return ret;
618 }
619 
620 static int rt6_score_route(struct rt6_info *rt, int oif,
621 			   int strict)
622 {
623 	int m;
624 
625 	m = rt6_check_dev(rt, oif);
626 	if (!m && (strict & RT6_LOOKUP_F_IFACE))
627 		return RT6_NUD_FAIL_HARD;
628 #ifdef CONFIG_IPV6_ROUTER_PREF
629 	m |= IPV6_DECODE_PREF(IPV6_EXTRACT_PREF(rt->rt6i_flags)) << 2;
630 #endif
631 	if (strict & RT6_LOOKUP_F_REACHABLE) {
632 		int n = rt6_check_neigh(rt);
633 		if (n < 0)
634 			return n;
635 	}
636 	return m;
637 }
638 
639 static struct rt6_info *find_match(struct rt6_info *rt, int oif, int strict,
640 				   int *mpri, struct rt6_info *match,
641 				   bool *do_rr)
642 {
643 	int m;
644 	bool match_do_rr = false;
645 	struct inet6_dev *idev = rt->rt6i_idev;
646 	struct net_device *dev = rt->dst.dev;
647 
648 	if (dev && !netif_carrier_ok(dev) &&
649 	    idev->cnf.ignore_routes_with_linkdown)
650 		goto out;
651 
652 	if (rt6_check_expired(rt))
653 		goto out;
654 
655 	m = rt6_score_route(rt, oif, strict);
656 	if (m == RT6_NUD_FAIL_DO_RR) {
657 		match_do_rr = true;
658 		m = 0; /* lowest valid score */
659 	} else if (m == RT6_NUD_FAIL_HARD) {
660 		goto out;
661 	}
662 
663 	if (strict & RT6_LOOKUP_F_REACHABLE)
664 		rt6_probe(rt);
665 
666 	/* note that m can be RT6_NUD_FAIL_PROBE at this point */
667 	if (m > *mpri) {
668 		*do_rr = match_do_rr;
669 		*mpri = m;
670 		match = rt;
671 	}
672 out:
673 	return match;
674 }
675 
676 static struct rt6_info *find_rr_leaf(struct fib6_node *fn,
677 				     struct rt6_info *rr_head,
678 				     u32 metric, int oif, int strict,
679 				     bool *do_rr)
680 {
681 	struct rt6_info *rt, *match, *cont;
682 	int mpri = -1;
683 
684 	match = NULL;
685 	cont = NULL;
686 	for (rt = rr_head; rt; rt = rt->dst.rt6_next) {
687 		if (rt->rt6i_metric != metric) {
688 			cont = rt;
689 			break;
690 		}
691 
692 		match = find_match(rt, oif, strict, &mpri, match, do_rr);
693 	}
694 
695 	for (rt = fn->leaf; rt && rt != rr_head; rt = rt->dst.rt6_next) {
696 		if (rt->rt6i_metric != metric) {
697 			cont = rt;
698 			break;
699 		}
700 
701 		match = find_match(rt, oif, strict, &mpri, match, do_rr);
702 	}
703 
704 	if (match || !cont)
705 		return match;
706 
707 	for (rt = cont; rt; rt = rt->dst.rt6_next)
708 		match = find_match(rt, oif, strict, &mpri, match, do_rr);
709 
710 	return match;
711 }
712 
713 static struct rt6_info *rt6_select(struct fib6_node *fn, int oif, int strict)
714 {
715 	struct rt6_info *match, *rt0;
716 	struct net *net;
717 	bool do_rr = false;
718 
719 	rt0 = fn->rr_ptr;
720 	if (!rt0)
721 		fn->rr_ptr = rt0 = fn->leaf;
722 
723 	match = find_rr_leaf(fn, rt0, rt0->rt6i_metric, oif, strict,
724 			     &do_rr);
725 
726 	if (do_rr) {
727 		struct rt6_info *next = rt0->dst.rt6_next;
728 
729 		/* no entries matched; do round-robin */
730 		if (!next || next->rt6i_metric != rt0->rt6i_metric)
731 			next = fn->leaf;
732 
733 		if (next != rt0)
734 			fn->rr_ptr = next;
735 	}
736 
737 	net = dev_net(rt0->dst.dev);
738 	return match ? match : net->ipv6.ip6_null_entry;
739 }
740 
741 static bool rt6_is_gw_or_nonexthop(const struct rt6_info *rt)
742 {
743 	return (rt->rt6i_flags & (RTF_NONEXTHOP | RTF_GATEWAY));
744 }
745 
746 #ifdef CONFIG_IPV6_ROUTE_INFO
747 int rt6_route_rcv(struct net_device *dev, u8 *opt, int len,
748 		  const struct in6_addr *gwaddr)
749 {
750 	struct net *net = dev_net(dev);
751 	struct route_info *rinfo = (struct route_info *) opt;
752 	struct in6_addr prefix_buf, *prefix;
753 	unsigned int pref;
754 	unsigned long lifetime;
755 	struct rt6_info *rt;
756 
757 	if (len < sizeof(struct route_info)) {
758 		return -EINVAL;
759 	}
760 
761 	/* Sanity check for prefix_len and length */
762 	if (rinfo->length > 3) {
763 		return -EINVAL;
764 	} else if (rinfo->prefix_len > 128) {
765 		return -EINVAL;
766 	} else if (rinfo->prefix_len > 64) {
767 		if (rinfo->length < 2) {
768 			return -EINVAL;
769 		}
770 	} else if (rinfo->prefix_len > 0) {
771 		if (rinfo->length < 1) {
772 			return -EINVAL;
773 		}
774 	}
775 
776 	pref = rinfo->route_pref;
777 	if (pref == ICMPV6_ROUTER_PREF_INVALID)
778 		return -EINVAL;
779 
780 	lifetime = addrconf_timeout_fixup(ntohl(rinfo->lifetime), HZ);
781 
782 	if (rinfo->length == 3)
783 		prefix = (struct in6_addr *)rinfo->prefix;
784 	else {
785 		/* this function is safe */
786 		ipv6_addr_prefix(&prefix_buf,
787 				 (struct in6_addr *)rinfo->prefix,
788 				 rinfo->prefix_len);
789 		prefix = &prefix_buf;
790 	}
791 
792 	if (rinfo->prefix_len == 0)
793 		rt = rt6_get_dflt_router(gwaddr, dev);
794 	else
795 		rt = rt6_get_route_info(net, prefix, rinfo->prefix_len,
796 					gwaddr, dev->ifindex);
797 
798 	if (rt && !lifetime) {
799 		ip6_del_rt(rt);
800 		rt = NULL;
801 	}
802 
803 	if (!rt && lifetime)
804 		rt = rt6_add_route_info(net, prefix, rinfo->prefix_len, gwaddr, dev->ifindex,
805 					pref);
806 	else if (rt)
807 		rt->rt6i_flags = RTF_ROUTEINFO |
808 				 (rt->rt6i_flags & ~RTF_PREF_MASK) | RTF_PREF(pref);
809 
810 	if (rt) {
811 		if (!addrconf_finite_timeout(lifetime))
812 			rt6_clean_expires(rt);
813 		else
814 			rt6_set_expires(rt, jiffies + HZ * lifetime);
815 
816 		ip6_rt_put(rt);
817 	}
818 	return 0;
819 }
820 #endif
821 
822 static struct fib6_node* fib6_backtrack(struct fib6_node *fn,
823 					struct in6_addr *saddr)
824 {
825 	struct fib6_node *pn;
826 	while (1) {
827 		if (fn->fn_flags & RTN_TL_ROOT)
828 			return NULL;
829 		pn = fn->parent;
830 		if (FIB6_SUBTREE(pn) && FIB6_SUBTREE(pn) != fn)
831 			fn = fib6_lookup(FIB6_SUBTREE(pn), NULL, saddr);
832 		else
833 			fn = pn;
834 		if (fn->fn_flags & RTN_RTINFO)
835 			return fn;
836 	}
837 }
838 
839 static struct rt6_info *ip6_pol_route_lookup(struct net *net,
840 					     struct fib6_table *table,
841 					     struct flowi6 *fl6, int flags)
842 {
843 	struct fib6_node *fn;
844 	struct rt6_info *rt;
845 
846 	read_lock_bh(&table->tb6_lock);
847 	fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
848 restart:
849 	rt = fn->leaf;
850 	rt = rt6_device_match(net, rt, &fl6->saddr, fl6->flowi6_oif, flags);
851 	if (rt->rt6i_nsiblings && fl6->flowi6_oif == 0)
852 		rt = rt6_multipath_select(rt, fl6, fl6->flowi6_oif, flags);
853 	if (rt == net->ipv6.ip6_null_entry) {
854 		fn = fib6_backtrack(fn, &fl6->saddr);
855 		if (fn)
856 			goto restart;
857 	}
858 	dst_use(&rt->dst, jiffies);
859 	read_unlock_bh(&table->tb6_lock);
860 	return rt;
861 
862 }
863 
864 struct dst_entry *ip6_route_lookup(struct net *net, struct flowi6 *fl6,
865 				    int flags)
866 {
867 	return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_lookup);
868 }
869 EXPORT_SYMBOL_GPL(ip6_route_lookup);
870 
871 struct rt6_info *rt6_lookup(struct net *net, const struct in6_addr *daddr,
872 			    const struct in6_addr *saddr, int oif, int strict)
873 {
874 	struct flowi6 fl6 = {
875 		.flowi6_oif = oif,
876 		.daddr = *daddr,
877 	};
878 	struct dst_entry *dst;
879 	int flags = strict ? RT6_LOOKUP_F_IFACE : 0;
880 
881 	if (saddr) {
882 		memcpy(&fl6.saddr, saddr, sizeof(*saddr));
883 		flags |= RT6_LOOKUP_F_HAS_SADDR;
884 	}
885 
886 	dst = fib6_rule_lookup(net, &fl6, flags, ip6_pol_route_lookup);
887 	if (dst->error == 0)
888 		return (struct rt6_info *) dst;
889 
890 	dst_release(dst);
891 
892 	return NULL;
893 }
894 EXPORT_SYMBOL(rt6_lookup);
895 
896 /* ip6_ins_rt is called with FREE table->tb6_lock.
897    It takes new route entry, the addition fails by any reason the
898    route is freed. In any case, if caller does not hold it, it may
899    be destroyed.
900  */
901 
902 static int __ip6_ins_rt(struct rt6_info *rt, struct nl_info *info,
903 			struct mx6_config *mxc)
904 {
905 	int err;
906 	struct fib6_table *table;
907 
908 	table = rt->rt6i_table;
909 	write_lock_bh(&table->tb6_lock);
910 	err = fib6_add(&table->tb6_root, rt, info, mxc);
911 	write_unlock_bh(&table->tb6_lock);
912 
913 	return err;
914 }
915 
916 int ip6_ins_rt(struct rt6_info *rt)
917 {
918 	struct nl_info info = {	.nl_net = dev_net(rt->dst.dev), };
919 	struct mx6_config mxc = { .mx = NULL, };
920 
921 	return __ip6_ins_rt(rt, &info, &mxc);
922 }
923 
924 static struct rt6_info *ip6_rt_cache_alloc(struct rt6_info *ort,
925 					   const struct in6_addr *daddr,
926 					   const struct in6_addr *saddr)
927 {
928 	struct rt6_info *rt;
929 
930 	/*
931 	 *	Clone the route.
932 	 */
933 
934 	if (ort->rt6i_flags & (RTF_CACHE | RTF_PCPU))
935 		ort = (struct rt6_info *)ort->dst.from;
936 
937 	rt = __ip6_dst_alloc(dev_net(ort->dst.dev), ort->dst.dev, 0);
938 
939 	if (!rt)
940 		return NULL;
941 
942 	ip6_rt_copy_init(rt, ort);
943 	rt->rt6i_flags |= RTF_CACHE;
944 	rt->rt6i_metric = 0;
945 	rt->dst.flags |= DST_HOST;
946 	rt->rt6i_dst.addr = *daddr;
947 	rt->rt6i_dst.plen = 128;
948 
949 	if (!rt6_is_gw_or_nonexthop(ort)) {
950 		if (ort->rt6i_dst.plen != 128 &&
951 		    ipv6_addr_equal(&ort->rt6i_dst.addr, daddr))
952 			rt->rt6i_flags |= RTF_ANYCAST;
953 #ifdef CONFIG_IPV6_SUBTREES
954 		if (rt->rt6i_src.plen && saddr) {
955 			rt->rt6i_src.addr = *saddr;
956 			rt->rt6i_src.plen = 128;
957 		}
958 #endif
959 	}
960 
961 	return rt;
962 }
963 
964 static struct rt6_info *ip6_rt_pcpu_alloc(struct rt6_info *rt)
965 {
966 	struct rt6_info *pcpu_rt;
967 
968 	pcpu_rt = __ip6_dst_alloc(dev_net(rt->dst.dev),
969 				  rt->dst.dev, rt->dst.flags);
970 
971 	if (!pcpu_rt)
972 		return NULL;
973 	ip6_rt_copy_init(pcpu_rt, rt);
974 	pcpu_rt->rt6i_protocol = rt->rt6i_protocol;
975 	pcpu_rt->rt6i_flags |= RTF_PCPU;
976 	return pcpu_rt;
977 }
978 
979 /* It should be called with read_lock_bh(&tb6_lock) acquired */
980 static struct rt6_info *rt6_get_pcpu_route(struct rt6_info *rt)
981 {
982 	struct rt6_info *pcpu_rt, **p;
983 
984 	p = this_cpu_ptr(rt->rt6i_pcpu);
985 	pcpu_rt = *p;
986 
987 	if (pcpu_rt) {
988 		dst_hold(&pcpu_rt->dst);
989 		rt6_dst_from_metrics_check(pcpu_rt);
990 	}
991 	return pcpu_rt;
992 }
993 
994 static struct rt6_info *rt6_make_pcpu_route(struct rt6_info *rt)
995 {
996 	struct fib6_table *table = rt->rt6i_table;
997 	struct rt6_info *pcpu_rt, *prev, **p;
998 
999 	pcpu_rt = ip6_rt_pcpu_alloc(rt);
1000 	if (!pcpu_rt) {
1001 		struct net *net = dev_net(rt->dst.dev);
1002 
1003 		dst_hold(&net->ipv6.ip6_null_entry->dst);
1004 		return net->ipv6.ip6_null_entry;
1005 	}
1006 
1007 	read_lock_bh(&table->tb6_lock);
1008 	if (rt->rt6i_pcpu) {
1009 		p = this_cpu_ptr(rt->rt6i_pcpu);
1010 		prev = cmpxchg(p, NULL, pcpu_rt);
1011 		if (prev) {
1012 			/* If someone did it before us, return prev instead */
1013 			dst_destroy(&pcpu_rt->dst);
1014 			pcpu_rt = prev;
1015 		}
1016 	} else {
1017 		/* rt has been removed from the fib6 tree
1018 		 * before we have a chance to acquire the read_lock.
1019 		 * In this case, don't brother to create a pcpu rt
1020 		 * since rt is going away anyway.  The next
1021 		 * dst_check() will trigger a re-lookup.
1022 		 */
1023 		dst_destroy(&pcpu_rt->dst);
1024 		pcpu_rt = rt;
1025 	}
1026 	dst_hold(&pcpu_rt->dst);
1027 	rt6_dst_from_metrics_check(pcpu_rt);
1028 	read_unlock_bh(&table->tb6_lock);
1029 	return pcpu_rt;
1030 }
1031 
1032 static struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table, int oif,
1033 				      struct flowi6 *fl6, int flags)
1034 {
1035 	struct fib6_node *fn, *saved_fn;
1036 	struct rt6_info *rt;
1037 	int strict = 0;
1038 
1039 	strict |= flags & RT6_LOOKUP_F_IFACE;
1040 	if (net->ipv6.devconf_all->forwarding == 0)
1041 		strict |= RT6_LOOKUP_F_REACHABLE;
1042 
1043 	read_lock_bh(&table->tb6_lock);
1044 
1045 	fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
1046 	saved_fn = fn;
1047 
1048 	if (fl6->flowi6_flags & FLOWI_FLAG_SKIP_NH_OIF)
1049 		oif = 0;
1050 
1051 redo_rt6_select:
1052 	rt = rt6_select(fn, oif, strict);
1053 	if (rt->rt6i_nsiblings)
1054 		rt = rt6_multipath_select(rt, fl6, oif, strict);
1055 	if (rt == net->ipv6.ip6_null_entry) {
1056 		fn = fib6_backtrack(fn, &fl6->saddr);
1057 		if (fn)
1058 			goto redo_rt6_select;
1059 		else if (strict & RT6_LOOKUP_F_REACHABLE) {
1060 			/* also consider unreachable route */
1061 			strict &= ~RT6_LOOKUP_F_REACHABLE;
1062 			fn = saved_fn;
1063 			goto redo_rt6_select;
1064 		}
1065 	}
1066 
1067 
1068 	if (rt == net->ipv6.ip6_null_entry || (rt->rt6i_flags & RTF_CACHE)) {
1069 		dst_use(&rt->dst, jiffies);
1070 		read_unlock_bh(&table->tb6_lock);
1071 
1072 		rt6_dst_from_metrics_check(rt);
1073 		return rt;
1074 	} else if (unlikely((fl6->flowi6_flags & FLOWI_FLAG_KNOWN_NH) &&
1075 			    !(rt->rt6i_flags & RTF_GATEWAY))) {
1076 		/* Create a RTF_CACHE clone which will not be
1077 		 * owned by the fib6 tree.  It is for the special case where
1078 		 * the daddr in the skb during the neighbor look-up is different
1079 		 * from the fl6->daddr used to look-up route here.
1080 		 */
1081 
1082 		struct rt6_info *uncached_rt;
1083 
1084 		dst_use(&rt->dst, jiffies);
1085 		read_unlock_bh(&table->tb6_lock);
1086 
1087 		uncached_rt = ip6_rt_cache_alloc(rt, &fl6->daddr, NULL);
1088 		dst_release(&rt->dst);
1089 
1090 		if (uncached_rt)
1091 			rt6_uncached_list_add(uncached_rt);
1092 		else
1093 			uncached_rt = net->ipv6.ip6_null_entry;
1094 
1095 		dst_hold(&uncached_rt->dst);
1096 		return uncached_rt;
1097 
1098 	} else {
1099 		/* Get a percpu copy */
1100 
1101 		struct rt6_info *pcpu_rt;
1102 
1103 		rt->dst.lastuse = jiffies;
1104 		rt->dst.__use++;
1105 		pcpu_rt = rt6_get_pcpu_route(rt);
1106 
1107 		if (pcpu_rt) {
1108 			read_unlock_bh(&table->tb6_lock);
1109 		} else {
1110 			/* We have to do the read_unlock first
1111 			 * because rt6_make_pcpu_route() may trigger
1112 			 * ip6_dst_gc() which will take the write_lock.
1113 			 */
1114 			dst_hold(&rt->dst);
1115 			read_unlock_bh(&table->tb6_lock);
1116 			pcpu_rt = rt6_make_pcpu_route(rt);
1117 			dst_release(&rt->dst);
1118 		}
1119 
1120 		return pcpu_rt;
1121 
1122 	}
1123 }
1124 
1125 static struct rt6_info *ip6_pol_route_input(struct net *net, struct fib6_table *table,
1126 					    struct flowi6 *fl6, int flags)
1127 {
1128 	return ip6_pol_route(net, table, fl6->flowi6_iif, fl6, flags);
1129 }
1130 
1131 static struct dst_entry *ip6_route_input_lookup(struct net *net,
1132 						struct net_device *dev,
1133 						struct flowi6 *fl6, int flags)
1134 {
1135 	if (rt6_need_strict(&fl6->daddr) && dev->type != ARPHRD_PIMREG)
1136 		flags |= RT6_LOOKUP_F_IFACE;
1137 
1138 	return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_input);
1139 }
1140 
1141 void ip6_route_input(struct sk_buff *skb)
1142 {
1143 	const struct ipv6hdr *iph = ipv6_hdr(skb);
1144 	struct net *net = dev_net(skb->dev);
1145 	int flags = RT6_LOOKUP_F_HAS_SADDR;
1146 	struct ip_tunnel_info *tun_info;
1147 	struct flowi6 fl6 = {
1148 		.flowi6_iif = l3mdev_fib_oif(skb->dev),
1149 		.daddr = iph->daddr,
1150 		.saddr = iph->saddr,
1151 		.flowlabel = ip6_flowinfo(iph),
1152 		.flowi6_mark = skb->mark,
1153 		.flowi6_proto = iph->nexthdr,
1154 	};
1155 
1156 	tun_info = skb_tunnel_info(skb);
1157 	if (tun_info && !(tun_info->mode & IP_TUNNEL_INFO_TX))
1158 		fl6.flowi6_tun_key.tun_id = tun_info->key.tun_id;
1159 	skb_dst_drop(skb);
1160 	skb_dst_set(skb, ip6_route_input_lookup(net, skb->dev, &fl6, flags));
1161 }
1162 
1163 static struct rt6_info *ip6_pol_route_output(struct net *net, struct fib6_table *table,
1164 					     struct flowi6 *fl6, int flags)
1165 {
1166 	return ip6_pol_route(net, table, fl6->flowi6_oif, fl6, flags);
1167 }
1168 
1169 struct dst_entry *ip6_route_output(struct net *net, const struct sock *sk,
1170 				    struct flowi6 *fl6)
1171 {
1172 	struct dst_entry *dst;
1173 	int flags = 0;
1174 	bool any_src;
1175 
1176 	dst = l3mdev_rt6_dst_by_oif(net, fl6);
1177 	if (dst)
1178 		return dst;
1179 
1180 	fl6->flowi6_iif = LOOPBACK_IFINDEX;
1181 
1182 	any_src = ipv6_addr_any(&fl6->saddr);
1183 	if ((sk && sk->sk_bound_dev_if) || rt6_need_strict(&fl6->daddr) ||
1184 	    (fl6->flowi6_oif && any_src))
1185 		flags |= RT6_LOOKUP_F_IFACE;
1186 
1187 	if (!any_src)
1188 		flags |= RT6_LOOKUP_F_HAS_SADDR;
1189 	else if (sk)
1190 		flags |= rt6_srcprefs2flags(inet6_sk(sk)->srcprefs);
1191 
1192 	return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_output);
1193 }
1194 EXPORT_SYMBOL(ip6_route_output);
1195 
1196 struct dst_entry *ip6_blackhole_route(struct net *net, struct dst_entry *dst_orig)
1197 {
1198 	struct rt6_info *rt, *ort = (struct rt6_info *) dst_orig;
1199 	struct dst_entry *new = NULL;
1200 
1201 	rt = dst_alloc(&ip6_dst_blackhole_ops, ort->dst.dev, 1, DST_OBSOLETE_NONE, 0);
1202 	if (rt) {
1203 		rt6_info_init(rt);
1204 
1205 		new = &rt->dst;
1206 		new->__use = 1;
1207 		new->input = dst_discard;
1208 		new->output = dst_discard_out;
1209 
1210 		dst_copy_metrics(new, &ort->dst);
1211 		rt->rt6i_idev = ort->rt6i_idev;
1212 		if (rt->rt6i_idev)
1213 			in6_dev_hold(rt->rt6i_idev);
1214 
1215 		rt->rt6i_gateway = ort->rt6i_gateway;
1216 		rt->rt6i_flags = ort->rt6i_flags & ~RTF_PCPU;
1217 		rt->rt6i_metric = 0;
1218 
1219 		memcpy(&rt->rt6i_dst, &ort->rt6i_dst, sizeof(struct rt6key));
1220 #ifdef CONFIG_IPV6_SUBTREES
1221 		memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
1222 #endif
1223 
1224 		dst_free(new);
1225 	}
1226 
1227 	dst_release(dst_orig);
1228 	return new ? new : ERR_PTR(-ENOMEM);
1229 }
1230 
1231 /*
1232  *	Destination cache support functions
1233  */
1234 
1235 static void rt6_dst_from_metrics_check(struct rt6_info *rt)
1236 {
1237 	if (rt->dst.from &&
1238 	    dst_metrics_ptr(&rt->dst) != dst_metrics_ptr(rt->dst.from))
1239 		dst_init_metrics(&rt->dst, dst_metrics_ptr(rt->dst.from), true);
1240 }
1241 
1242 static struct dst_entry *rt6_check(struct rt6_info *rt, u32 cookie)
1243 {
1244 	if (!rt->rt6i_node || (rt->rt6i_node->fn_sernum != cookie))
1245 		return NULL;
1246 
1247 	if (rt6_check_expired(rt))
1248 		return NULL;
1249 
1250 	return &rt->dst;
1251 }
1252 
1253 static struct dst_entry *rt6_dst_from_check(struct rt6_info *rt, u32 cookie)
1254 {
1255 	if (rt->dst.obsolete == DST_OBSOLETE_FORCE_CHK &&
1256 	    rt6_check((struct rt6_info *)(rt->dst.from), cookie))
1257 		return &rt->dst;
1258 	else
1259 		return NULL;
1260 }
1261 
1262 static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie)
1263 {
1264 	struct rt6_info *rt;
1265 
1266 	rt = (struct rt6_info *) dst;
1267 
1268 	/* All IPV6 dsts are created with ->obsolete set to the value
1269 	 * DST_OBSOLETE_FORCE_CHK which forces validation calls down
1270 	 * into this function always.
1271 	 */
1272 
1273 	rt6_dst_from_metrics_check(rt);
1274 
1275 	if ((rt->rt6i_flags & RTF_PCPU) || unlikely(dst->flags & DST_NOCACHE))
1276 		return rt6_dst_from_check(rt, cookie);
1277 	else
1278 		return rt6_check(rt, cookie);
1279 }
1280 
1281 static struct dst_entry *ip6_negative_advice(struct dst_entry *dst)
1282 {
1283 	struct rt6_info *rt = (struct rt6_info *) dst;
1284 
1285 	if (rt) {
1286 		if (rt->rt6i_flags & RTF_CACHE) {
1287 			if (rt6_check_expired(rt)) {
1288 				ip6_del_rt(rt);
1289 				dst = NULL;
1290 			}
1291 		} else {
1292 			dst_release(dst);
1293 			dst = NULL;
1294 		}
1295 	}
1296 	return dst;
1297 }
1298 
1299 static void ip6_link_failure(struct sk_buff *skb)
1300 {
1301 	struct rt6_info *rt;
1302 
1303 	icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_ADDR_UNREACH, 0);
1304 
1305 	rt = (struct rt6_info *) skb_dst(skb);
1306 	if (rt) {
1307 		if (rt->rt6i_flags & RTF_CACHE) {
1308 			dst_hold(&rt->dst);
1309 			ip6_del_rt(rt);
1310 		} else if (rt->rt6i_node && (rt->rt6i_flags & RTF_DEFAULT)) {
1311 			rt->rt6i_node->fn_sernum = -1;
1312 		}
1313 	}
1314 }
1315 
1316 static void rt6_do_update_pmtu(struct rt6_info *rt, u32 mtu)
1317 {
1318 	struct net *net = dev_net(rt->dst.dev);
1319 
1320 	rt->rt6i_flags |= RTF_MODIFIED;
1321 	rt->rt6i_pmtu = mtu;
1322 	rt6_update_expires(rt, net->ipv6.sysctl.ip6_rt_mtu_expires);
1323 }
1324 
1325 static void __ip6_rt_update_pmtu(struct dst_entry *dst, const struct sock *sk,
1326 				 const struct ipv6hdr *iph, u32 mtu)
1327 {
1328 	struct rt6_info *rt6 = (struct rt6_info *)dst;
1329 
1330 	if (rt6->rt6i_flags & RTF_LOCAL)
1331 		return;
1332 
1333 	dst_confirm(dst);
1334 	mtu = max_t(u32, mtu, IPV6_MIN_MTU);
1335 	if (mtu >= dst_mtu(dst))
1336 		return;
1337 
1338 	if (rt6->rt6i_flags & RTF_CACHE) {
1339 		rt6_do_update_pmtu(rt6, mtu);
1340 	} else {
1341 		const struct in6_addr *daddr, *saddr;
1342 		struct rt6_info *nrt6;
1343 
1344 		if (iph) {
1345 			daddr = &iph->daddr;
1346 			saddr = &iph->saddr;
1347 		} else if (sk) {
1348 			daddr = &sk->sk_v6_daddr;
1349 			saddr = &inet6_sk(sk)->saddr;
1350 		} else {
1351 			return;
1352 		}
1353 		nrt6 = ip6_rt_cache_alloc(rt6, daddr, saddr);
1354 		if (nrt6) {
1355 			rt6_do_update_pmtu(nrt6, mtu);
1356 
1357 			/* ip6_ins_rt(nrt6) will bump the
1358 			 * rt6->rt6i_node->fn_sernum
1359 			 * which will fail the next rt6_check() and
1360 			 * invalidate the sk->sk_dst_cache.
1361 			 */
1362 			ip6_ins_rt(nrt6);
1363 		}
1364 	}
1365 }
1366 
1367 static void ip6_rt_update_pmtu(struct dst_entry *dst, struct sock *sk,
1368 			       struct sk_buff *skb, u32 mtu)
1369 {
1370 	__ip6_rt_update_pmtu(dst, sk, skb ? ipv6_hdr(skb) : NULL, mtu);
1371 }
1372 
1373 void ip6_update_pmtu(struct sk_buff *skb, struct net *net, __be32 mtu,
1374 		     int oif, u32 mark)
1375 {
1376 	const struct ipv6hdr *iph = (struct ipv6hdr *) skb->data;
1377 	struct dst_entry *dst;
1378 	struct flowi6 fl6;
1379 
1380 	memset(&fl6, 0, sizeof(fl6));
1381 	fl6.flowi6_oif = oif;
1382 	fl6.flowi6_mark = mark ? mark : IP6_REPLY_MARK(net, skb->mark);
1383 	fl6.daddr = iph->daddr;
1384 	fl6.saddr = iph->saddr;
1385 	fl6.flowlabel = ip6_flowinfo(iph);
1386 
1387 	dst = ip6_route_output(net, NULL, &fl6);
1388 	if (!dst->error)
1389 		__ip6_rt_update_pmtu(dst, NULL, iph, ntohl(mtu));
1390 	dst_release(dst);
1391 }
1392 EXPORT_SYMBOL_GPL(ip6_update_pmtu);
1393 
1394 void ip6_sk_update_pmtu(struct sk_buff *skb, struct sock *sk, __be32 mtu)
1395 {
1396 	ip6_update_pmtu(skb, sock_net(sk), mtu,
1397 			sk->sk_bound_dev_if, sk->sk_mark);
1398 }
1399 EXPORT_SYMBOL_GPL(ip6_sk_update_pmtu);
1400 
1401 /* Handle redirects */
1402 struct ip6rd_flowi {
1403 	struct flowi6 fl6;
1404 	struct in6_addr gateway;
1405 };
1406 
1407 static struct rt6_info *__ip6_route_redirect(struct net *net,
1408 					     struct fib6_table *table,
1409 					     struct flowi6 *fl6,
1410 					     int flags)
1411 {
1412 	struct ip6rd_flowi *rdfl = (struct ip6rd_flowi *)fl6;
1413 	struct rt6_info *rt;
1414 	struct fib6_node *fn;
1415 
1416 	/* Get the "current" route for this destination and
1417 	 * check if the redirect has come from approriate router.
1418 	 *
1419 	 * RFC 4861 specifies that redirects should only be
1420 	 * accepted if they come from the nexthop to the target.
1421 	 * Due to the way the routes are chosen, this notion
1422 	 * is a bit fuzzy and one might need to check all possible
1423 	 * routes.
1424 	 */
1425 
1426 	read_lock_bh(&table->tb6_lock);
1427 	fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
1428 restart:
1429 	for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
1430 		if (rt6_check_expired(rt))
1431 			continue;
1432 		if (rt->dst.error)
1433 			break;
1434 		if (!(rt->rt6i_flags & RTF_GATEWAY))
1435 			continue;
1436 		if (fl6->flowi6_oif != rt->dst.dev->ifindex)
1437 			continue;
1438 		if (!ipv6_addr_equal(&rdfl->gateway, &rt->rt6i_gateway))
1439 			continue;
1440 		break;
1441 	}
1442 
1443 	if (!rt)
1444 		rt = net->ipv6.ip6_null_entry;
1445 	else if (rt->dst.error) {
1446 		rt = net->ipv6.ip6_null_entry;
1447 		goto out;
1448 	}
1449 
1450 	if (rt == net->ipv6.ip6_null_entry) {
1451 		fn = fib6_backtrack(fn, &fl6->saddr);
1452 		if (fn)
1453 			goto restart;
1454 	}
1455 
1456 out:
1457 	dst_hold(&rt->dst);
1458 
1459 	read_unlock_bh(&table->tb6_lock);
1460 
1461 	return rt;
1462 };
1463 
1464 static struct dst_entry *ip6_route_redirect(struct net *net,
1465 					const struct flowi6 *fl6,
1466 					const struct in6_addr *gateway)
1467 {
1468 	int flags = RT6_LOOKUP_F_HAS_SADDR;
1469 	struct ip6rd_flowi rdfl;
1470 
1471 	rdfl.fl6 = *fl6;
1472 	rdfl.gateway = *gateway;
1473 
1474 	return fib6_rule_lookup(net, &rdfl.fl6,
1475 				flags, __ip6_route_redirect);
1476 }
1477 
1478 void ip6_redirect(struct sk_buff *skb, struct net *net, int oif, u32 mark)
1479 {
1480 	const struct ipv6hdr *iph = (struct ipv6hdr *) skb->data;
1481 	struct dst_entry *dst;
1482 	struct flowi6 fl6;
1483 
1484 	memset(&fl6, 0, sizeof(fl6));
1485 	fl6.flowi6_iif = LOOPBACK_IFINDEX;
1486 	fl6.flowi6_oif = oif;
1487 	fl6.flowi6_mark = mark;
1488 	fl6.daddr = iph->daddr;
1489 	fl6.saddr = iph->saddr;
1490 	fl6.flowlabel = ip6_flowinfo(iph);
1491 
1492 	dst = ip6_route_redirect(net, &fl6, &ipv6_hdr(skb)->saddr);
1493 	rt6_do_redirect(dst, NULL, skb);
1494 	dst_release(dst);
1495 }
1496 EXPORT_SYMBOL_GPL(ip6_redirect);
1497 
1498 void ip6_redirect_no_header(struct sk_buff *skb, struct net *net, int oif,
1499 			    u32 mark)
1500 {
1501 	const struct ipv6hdr *iph = ipv6_hdr(skb);
1502 	const struct rd_msg *msg = (struct rd_msg *)icmp6_hdr(skb);
1503 	struct dst_entry *dst;
1504 	struct flowi6 fl6;
1505 
1506 	memset(&fl6, 0, sizeof(fl6));
1507 	fl6.flowi6_iif = LOOPBACK_IFINDEX;
1508 	fl6.flowi6_oif = oif;
1509 	fl6.flowi6_mark = mark;
1510 	fl6.daddr = msg->dest;
1511 	fl6.saddr = iph->daddr;
1512 
1513 	dst = ip6_route_redirect(net, &fl6, &iph->saddr);
1514 	rt6_do_redirect(dst, NULL, skb);
1515 	dst_release(dst);
1516 }
1517 
1518 void ip6_sk_redirect(struct sk_buff *skb, struct sock *sk)
1519 {
1520 	ip6_redirect(skb, sock_net(sk), sk->sk_bound_dev_if, sk->sk_mark);
1521 }
1522 EXPORT_SYMBOL_GPL(ip6_sk_redirect);
1523 
1524 static unsigned int ip6_default_advmss(const struct dst_entry *dst)
1525 {
1526 	struct net_device *dev = dst->dev;
1527 	unsigned int mtu = dst_mtu(dst);
1528 	struct net *net = dev_net(dev);
1529 
1530 	mtu -= sizeof(struct ipv6hdr) + sizeof(struct tcphdr);
1531 
1532 	if (mtu < net->ipv6.sysctl.ip6_rt_min_advmss)
1533 		mtu = net->ipv6.sysctl.ip6_rt_min_advmss;
1534 
1535 	/*
1536 	 * Maximal non-jumbo IPv6 payload is IPV6_MAXPLEN and
1537 	 * corresponding MSS is IPV6_MAXPLEN - tcp_header_size.
1538 	 * IPV6_MAXPLEN is also valid and means: "any MSS,
1539 	 * rely only on pmtu discovery"
1540 	 */
1541 	if (mtu > IPV6_MAXPLEN - sizeof(struct tcphdr))
1542 		mtu = IPV6_MAXPLEN;
1543 	return mtu;
1544 }
1545 
1546 static unsigned int ip6_mtu(const struct dst_entry *dst)
1547 {
1548 	const struct rt6_info *rt = (const struct rt6_info *)dst;
1549 	unsigned int mtu = rt->rt6i_pmtu;
1550 	struct inet6_dev *idev;
1551 
1552 	if (mtu)
1553 		goto out;
1554 
1555 	mtu = dst_metric_raw(dst, RTAX_MTU);
1556 	if (mtu)
1557 		goto out;
1558 
1559 	mtu = IPV6_MIN_MTU;
1560 
1561 	rcu_read_lock();
1562 	idev = __in6_dev_get(dst->dev);
1563 	if (idev)
1564 		mtu = idev->cnf.mtu6;
1565 	rcu_read_unlock();
1566 
1567 out:
1568 	return min_t(unsigned int, mtu, IP6_MAX_MTU);
1569 }
1570 
1571 static struct dst_entry *icmp6_dst_gc_list;
1572 static DEFINE_SPINLOCK(icmp6_dst_lock);
1573 
1574 struct dst_entry *icmp6_dst_alloc(struct net_device *dev,
1575 				  struct flowi6 *fl6)
1576 {
1577 	struct dst_entry *dst;
1578 	struct rt6_info *rt;
1579 	struct inet6_dev *idev = in6_dev_get(dev);
1580 	struct net *net = dev_net(dev);
1581 
1582 	if (unlikely(!idev))
1583 		return ERR_PTR(-ENODEV);
1584 
1585 	rt = ip6_dst_alloc(net, dev, 0);
1586 	if (unlikely(!rt)) {
1587 		in6_dev_put(idev);
1588 		dst = ERR_PTR(-ENOMEM);
1589 		goto out;
1590 	}
1591 
1592 	rt->dst.flags |= DST_HOST;
1593 	rt->dst.output  = ip6_output;
1594 	atomic_set(&rt->dst.__refcnt, 1);
1595 	rt->rt6i_gateway  = fl6->daddr;
1596 	rt->rt6i_dst.addr = fl6->daddr;
1597 	rt->rt6i_dst.plen = 128;
1598 	rt->rt6i_idev     = idev;
1599 	dst_metric_set(&rt->dst, RTAX_HOPLIMIT, 0);
1600 
1601 	spin_lock_bh(&icmp6_dst_lock);
1602 	rt->dst.next = icmp6_dst_gc_list;
1603 	icmp6_dst_gc_list = &rt->dst;
1604 	spin_unlock_bh(&icmp6_dst_lock);
1605 
1606 	fib6_force_start_gc(net);
1607 
1608 	dst = xfrm_lookup(net, &rt->dst, flowi6_to_flowi(fl6), NULL, 0);
1609 
1610 out:
1611 	return dst;
1612 }
1613 
1614 int icmp6_dst_gc(void)
1615 {
1616 	struct dst_entry *dst, **pprev;
1617 	int more = 0;
1618 
1619 	spin_lock_bh(&icmp6_dst_lock);
1620 	pprev = &icmp6_dst_gc_list;
1621 
1622 	while ((dst = *pprev) != NULL) {
1623 		if (!atomic_read(&dst->__refcnt)) {
1624 			*pprev = dst->next;
1625 			dst_free(dst);
1626 		} else {
1627 			pprev = &dst->next;
1628 			++more;
1629 		}
1630 	}
1631 
1632 	spin_unlock_bh(&icmp6_dst_lock);
1633 
1634 	return more;
1635 }
1636 
1637 static void icmp6_clean_all(int (*func)(struct rt6_info *rt, void *arg),
1638 			    void *arg)
1639 {
1640 	struct dst_entry *dst, **pprev;
1641 
1642 	spin_lock_bh(&icmp6_dst_lock);
1643 	pprev = &icmp6_dst_gc_list;
1644 	while ((dst = *pprev) != NULL) {
1645 		struct rt6_info *rt = (struct rt6_info *) dst;
1646 		if (func(rt, arg)) {
1647 			*pprev = dst->next;
1648 			dst_free(dst);
1649 		} else {
1650 			pprev = &dst->next;
1651 		}
1652 	}
1653 	spin_unlock_bh(&icmp6_dst_lock);
1654 }
1655 
1656 static int ip6_dst_gc(struct dst_ops *ops)
1657 {
1658 	struct net *net = container_of(ops, struct net, ipv6.ip6_dst_ops);
1659 	int rt_min_interval = net->ipv6.sysctl.ip6_rt_gc_min_interval;
1660 	int rt_max_size = net->ipv6.sysctl.ip6_rt_max_size;
1661 	int rt_elasticity = net->ipv6.sysctl.ip6_rt_gc_elasticity;
1662 	int rt_gc_timeout = net->ipv6.sysctl.ip6_rt_gc_timeout;
1663 	unsigned long rt_last_gc = net->ipv6.ip6_rt_last_gc;
1664 	int entries;
1665 
1666 	entries = dst_entries_get_fast(ops);
1667 	if (time_after(rt_last_gc + rt_min_interval, jiffies) &&
1668 	    entries <= rt_max_size)
1669 		goto out;
1670 
1671 	net->ipv6.ip6_rt_gc_expire++;
1672 	fib6_run_gc(net->ipv6.ip6_rt_gc_expire, net, true);
1673 	entries = dst_entries_get_slow(ops);
1674 	if (entries < ops->gc_thresh)
1675 		net->ipv6.ip6_rt_gc_expire = rt_gc_timeout>>1;
1676 out:
1677 	net->ipv6.ip6_rt_gc_expire -= net->ipv6.ip6_rt_gc_expire>>rt_elasticity;
1678 	return entries > rt_max_size;
1679 }
1680 
1681 static int ip6_convert_metrics(struct mx6_config *mxc,
1682 			       const struct fib6_config *cfg)
1683 {
1684 	bool ecn_ca = false;
1685 	struct nlattr *nla;
1686 	int remaining;
1687 	u32 *mp;
1688 
1689 	if (!cfg->fc_mx)
1690 		return 0;
1691 
1692 	mp = kzalloc(sizeof(u32) * RTAX_MAX, GFP_KERNEL);
1693 	if (unlikely(!mp))
1694 		return -ENOMEM;
1695 
1696 	nla_for_each_attr(nla, cfg->fc_mx, cfg->fc_mx_len, remaining) {
1697 		int type = nla_type(nla);
1698 		u32 val;
1699 
1700 		if (!type)
1701 			continue;
1702 		if (unlikely(type > RTAX_MAX))
1703 			goto err;
1704 
1705 		if (type == RTAX_CC_ALGO) {
1706 			char tmp[TCP_CA_NAME_MAX];
1707 
1708 			nla_strlcpy(tmp, nla, sizeof(tmp));
1709 			val = tcp_ca_get_key_by_name(tmp, &ecn_ca);
1710 			if (val == TCP_CA_UNSPEC)
1711 				goto err;
1712 		} else {
1713 			val = nla_get_u32(nla);
1714 		}
1715 		if (type == RTAX_FEATURES && (val & ~RTAX_FEATURE_MASK))
1716 			goto err;
1717 
1718 		mp[type - 1] = val;
1719 		__set_bit(type - 1, mxc->mx_valid);
1720 	}
1721 
1722 	if (ecn_ca) {
1723 		__set_bit(RTAX_FEATURES - 1, mxc->mx_valid);
1724 		mp[RTAX_FEATURES - 1] |= DST_FEATURE_ECN_CA;
1725 	}
1726 
1727 	mxc->mx = mp;
1728 	return 0;
1729  err:
1730 	kfree(mp);
1731 	return -EINVAL;
1732 }
1733 
1734 static struct rt6_info *ip6_route_info_create(struct fib6_config *cfg)
1735 {
1736 	struct net *net = cfg->fc_nlinfo.nl_net;
1737 	struct rt6_info *rt = NULL;
1738 	struct net_device *dev = NULL;
1739 	struct inet6_dev *idev = NULL;
1740 	struct fib6_table *table;
1741 	int addr_type;
1742 	int err = -EINVAL;
1743 
1744 	if (cfg->fc_dst_len > 128 || cfg->fc_src_len > 128)
1745 		goto out;
1746 #ifndef CONFIG_IPV6_SUBTREES
1747 	if (cfg->fc_src_len)
1748 		goto out;
1749 #endif
1750 	if (cfg->fc_ifindex) {
1751 		err = -ENODEV;
1752 		dev = dev_get_by_index(net, cfg->fc_ifindex);
1753 		if (!dev)
1754 			goto out;
1755 		idev = in6_dev_get(dev);
1756 		if (!idev)
1757 			goto out;
1758 	}
1759 
1760 	if (cfg->fc_metric == 0)
1761 		cfg->fc_metric = IP6_RT_PRIO_USER;
1762 
1763 	err = -ENOBUFS;
1764 	if (cfg->fc_nlinfo.nlh &&
1765 	    !(cfg->fc_nlinfo.nlh->nlmsg_flags & NLM_F_CREATE)) {
1766 		table = fib6_get_table(net, cfg->fc_table);
1767 		if (!table) {
1768 			pr_warn("NLM_F_CREATE should be specified when creating new route\n");
1769 			table = fib6_new_table(net, cfg->fc_table);
1770 		}
1771 	} else {
1772 		table = fib6_new_table(net, cfg->fc_table);
1773 	}
1774 
1775 	if (!table)
1776 		goto out;
1777 
1778 	rt = ip6_dst_alloc(net, NULL,
1779 			   (cfg->fc_flags & RTF_ADDRCONF) ? 0 : DST_NOCOUNT);
1780 
1781 	if (!rt) {
1782 		err = -ENOMEM;
1783 		goto out;
1784 	}
1785 
1786 	if (cfg->fc_flags & RTF_EXPIRES)
1787 		rt6_set_expires(rt, jiffies +
1788 				clock_t_to_jiffies(cfg->fc_expires));
1789 	else
1790 		rt6_clean_expires(rt);
1791 
1792 	if (cfg->fc_protocol == RTPROT_UNSPEC)
1793 		cfg->fc_protocol = RTPROT_BOOT;
1794 	rt->rt6i_protocol = cfg->fc_protocol;
1795 
1796 	addr_type = ipv6_addr_type(&cfg->fc_dst);
1797 
1798 	if (addr_type & IPV6_ADDR_MULTICAST)
1799 		rt->dst.input = ip6_mc_input;
1800 	else if (cfg->fc_flags & RTF_LOCAL)
1801 		rt->dst.input = ip6_input;
1802 	else
1803 		rt->dst.input = ip6_forward;
1804 
1805 	rt->dst.output = ip6_output;
1806 
1807 	if (cfg->fc_encap) {
1808 		struct lwtunnel_state *lwtstate;
1809 
1810 		err = lwtunnel_build_state(dev, cfg->fc_encap_type,
1811 					   cfg->fc_encap, AF_INET6, cfg,
1812 					   &lwtstate);
1813 		if (err)
1814 			goto out;
1815 		rt->dst.lwtstate = lwtstate_get(lwtstate);
1816 		if (lwtunnel_output_redirect(rt->dst.lwtstate)) {
1817 			rt->dst.lwtstate->orig_output = rt->dst.output;
1818 			rt->dst.output = lwtunnel_output;
1819 		}
1820 		if (lwtunnel_input_redirect(rt->dst.lwtstate)) {
1821 			rt->dst.lwtstate->orig_input = rt->dst.input;
1822 			rt->dst.input = lwtunnel_input;
1823 		}
1824 	}
1825 
1826 	ipv6_addr_prefix(&rt->rt6i_dst.addr, &cfg->fc_dst, cfg->fc_dst_len);
1827 	rt->rt6i_dst.plen = cfg->fc_dst_len;
1828 	if (rt->rt6i_dst.plen == 128)
1829 		rt->dst.flags |= DST_HOST;
1830 
1831 #ifdef CONFIG_IPV6_SUBTREES
1832 	ipv6_addr_prefix(&rt->rt6i_src.addr, &cfg->fc_src, cfg->fc_src_len);
1833 	rt->rt6i_src.plen = cfg->fc_src_len;
1834 #endif
1835 
1836 	rt->rt6i_metric = cfg->fc_metric;
1837 
1838 	/* We cannot add true routes via loopback here,
1839 	   they would result in kernel looping; promote them to reject routes
1840 	 */
1841 	if ((cfg->fc_flags & RTF_REJECT) ||
1842 	    (dev && (dev->flags & IFF_LOOPBACK) &&
1843 	     !(addr_type & IPV6_ADDR_LOOPBACK) &&
1844 	     !(cfg->fc_flags & RTF_LOCAL))) {
1845 		/* hold loopback dev/idev if we haven't done so. */
1846 		if (dev != net->loopback_dev) {
1847 			if (dev) {
1848 				dev_put(dev);
1849 				in6_dev_put(idev);
1850 			}
1851 			dev = net->loopback_dev;
1852 			dev_hold(dev);
1853 			idev = in6_dev_get(dev);
1854 			if (!idev) {
1855 				err = -ENODEV;
1856 				goto out;
1857 			}
1858 		}
1859 		rt->rt6i_flags = RTF_REJECT|RTF_NONEXTHOP;
1860 		switch (cfg->fc_type) {
1861 		case RTN_BLACKHOLE:
1862 			rt->dst.error = -EINVAL;
1863 			rt->dst.output = dst_discard_out;
1864 			rt->dst.input = dst_discard;
1865 			break;
1866 		case RTN_PROHIBIT:
1867 			rt->dst.error = -EACCES;
1868 			rt->dst.output = ip6_pkt_prohibit_out;
1869 			rt->dst.input = ip6_pkt_prohibit;
1870 			break;
1871 		case RTN_THROW:
1872 		case RTN_UNREACHABLE:
1873 		default:
1874 			rt->dst.error = (cfg->fc_type == RTN_THROW) ? -EAGAIN
1875 					: (cfg->fc_type == RTN_UNREACHABLE)
1876 					? -EHOSTUNREACH : -ENETUNREACH;
1877 			rt->dst.output = ip6_pkt_discard_out;
1878 			rt->dst.input = ip6_pkt_discard;
1879 			break;
1880 		}
1881 		goto install_route;
1882 	}
1883 
1884 	if (cfg->fc_flags & RTF_GATEWAY) {
1885 		const struct in6_addr *gw_addr;
1886 		int gwa_type;
1887 
1888 		gw_addr = &cfg->fc_gateway;
1889 		gwa_type = ipv6_addr_type(gw_addr);
1890 
1891 		/* if gw_addr is local we will fail to detect this in case
1892 		 * address is still TENTATIVE (DAD in progress). rt6_lookup()
1893 		 * will return already-added prefix route via interface that
1894 		 * prefix route was assigned to, which might be non-loopback.
1895 		 */
1896 		err = -EINVAL;
1897 		if (ipv6_chk_addr_and_flags(net, gw_addr,
1898 					    gwa_type & IPV6_ADDR_LINKLOCAL ?
1899 					    dev : NULL, 0, 0))
1900 			goto out;
1901 
1902 		rt->rt6i_gateway = *gw_addr;
1903 
1904 		if (gwa_type != (IPV6_ADDR_LINKLOCAL|IPV6_ADDR_UNICAST)) {
1905 			struct rt6_info *grt;
1906 
1907 			/* IPv6 strictly inhibits using not link-local
1908 			   addresses as nexthop address.
1909 			   Otherwise, router will not able to send redirects.
1910 			   It is very good, but in some (rare!) circumstances
1911 			   (SIT, PtP, NBMA NOARP links) it is handy to allow
1912 			   some exceptions. --ANK
1913 			 */
1914 			if (!(gwa_type & IPV6_ADDR_UNICAST))
1915 				goto out;
1916 
1917 			grt = rt6_lookup(net, gw_addr, NULL, cfg->fc_ifindex, 1);
1918 
1919 			err = -EHOSTUNREACH;
1920 			if (!grt)
1921 				goto out;
1922 			if (dev) {
1923 				if (dev != grt->dst.dev) {
1924 					ip6_rt_put(grt);
1925 					goto out;
1926 				}
1927 			} else {
1928 				dev = grt->dst.dev;
1929 				idev = grt->rt6i_idev;
1930 				dev_hold(dev);
1931 				in6_dev_hold(grt->rt6i_idev);
1932 			}
1933 			if (!(grt->rt6i_flags & RTF_GATEWAY))
1934 				err = 0;
1935 			ip6_rt_put(grt);
1936 
1937 			if (err)
1938 				goto out;
1939 		}
1940 		err = -EINVAL;
1941 		if (!dev || (dev->flags & IFF_LOOPBACK))
1942 			goto out;
1943 	}
1944 
1945 	err = -ENODEV;
1946 	if (!dev)
1947 		goto out;
1948 
1949 	if (!ipv6_addr_any(&cfg->fc_prefsrc)) {
1950 		if (!ipv6_chk_addr(net, &cfg->fc_prefsrc, dev, 0)) {
1951 			err = -EINVAL;
1952 			goto out;
1953 		}
1954 		rt->rt6i_prefsrc.addr = cfg->fc_prefsrc;
1955 		rt->rt6i_prefsrc.plen = 128;
1956 	} else
1957 		rt->rt6i_prefsrc.plen = 0;
1958 
1959 	rt->rt6i_flags = cfg->fc_flags;
1960 
1961 install_route:
1962 	rt->dst.dev = dev;
1963 	rt->rt6i_idev = idev;
1964 	rt->rt6i_table = table;
1965 
1966 	cfg->fc_nlinfo.nl_net = dev_net(dev);
1967 
1968 	return rt;
1969 out:
1970 	if (dev)
1971 		dev_put(dev);
1972 	if (idev)
1973 		in6_dev_put(idev);
1974 	if (rt)
1975 		dst_free(&rt->dst);
1976 
1977 	return ERR_PTR(err);
1978 }
1979 
1980 int ip6_route_add(struct fib6_config *cfg)
1981 {
1982 	struct mx6_config mxc = { .mx = NULL, };
1983 	struct rt6_info *rt;
1984 	int err;
1985 
1986 	rt = ip6_route_info_create(cfg);
1987 	if (IS_ERR(rt)) {
1988 		err = PTR_ERR(rt);
1989 		rt = NULL;
1990 		goto out;
1991 	}
1992 
1993 	err = ip6_convert_metrics(&mxc, cfg);
1994 	if (err)
1995 		goto out;
1996 
1997 	err = __ip6_ins_rt(rt, &cfg->fc_nlinfo, &mxc);
1998 
1999 	kfree(mxc.mx);
2000 
2001 	return err;
2002 out:
2003 	if (rt)
2004 		dst_free(&rt->dst);
2005 
2006 	return err;
2007 }
2008 
2009 static int __ip6_del_rt(struct rt6_info *rt, struct nl_info *info)
2010 {
2011 	int err;
2012 	struct fib6_table *table;
2013 	struct net *net = dev_net(rt->dst.dev);
2014 
2015 	if (rt == net->ipv6.ip6_null_entry ||
2016 	    rt->dst.flags & DST_NOCACHE) {
2017 		err = -ENOENT;
2018 		goto out;
2019 	}
2020 
2021 	table = rt->rt6i_table;
2022 	write_lock_bh(&table->tb6_lock);
2023 	err = fib6_del(rt, info);
2024 	write_unlock_bh(&table->tb6_lock);
2025 
2026 out:
2027 	ip6_rt_put(rt);
2028 	return err;
2029 }
2030 
2031 int ip6_del_rt(struct rt6_info *rt)
2032 {
2033 	struct nl_info info = {
2034 		.nl_net = dev_net(rt->dst.dev),
2035 	};
2036 	return __ip6_del_rt(rt, &info);
2037 }
2038 
2039 static int ip6_route_del(struct fib6_config *cfg)
2040 {
2041 	struct fib6_table *table;
2042 	struct fib6_node *fn;
2043 	struct rt6_info *rt;
2044 	int err = -ESRCH;
2045 
2046 	table = fib6_get_table(cfg->fc_nlinfo.nl_net, cfg->fc_table);
2047 	if (!table)
2048 		return err;
2049 
2050 	read_lock_bh(&table->tb6_lock);
2051 
2052 	fn = fib6_locate(&table->tb6_root,
2053 			 &cfg->fc_dst, cfg->fc_dst_len,
2054 			 &cfg->fc_src, cfg->fc_src_len);
2055 
2056 	if (fn) {
2057 		for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
2058 			if ((rt->rt6i_flags & RTF_CACHE) &&
2059 			    !(cfg->fc_flags & RTF_CACHE))
2060 				continue;
2061 			if (cfg->fc_ifindex &&
2062 			    (!rt->dst.dev ||
2063 			     rt->dst.dev->ifindex != cfg->fc_ifindex))
2064 				continue;
2065 			if (cfg->fc_flags & RTF_GATEWAY &&
2066 			    !ipv6_addr_equal(&cfg->fc_gateway, &rt->rt6i_gateway))
2067 				continue;
2068 			if (cfg->fc_metric && cfg->fc_metric != rt->rt6i_metric)
2069 				continue;
2070 			dst_hold(&rt->dst);
2071 			read_unlock_bh(&table->tb6_lock);
2072 
2073 			return __ip6_del_rt(rt, &cfg->fc_nlinfo);
2074 		}
2075 	}
2076 	read_unlock_bh(&table->tb6_lock);
2077 
2078 	return err;
2079 }
2080 
2081 static void rt6_do_redirect(struct dst_entry *dst, struct sock *sk, struct sk_buff *skb)
2082 {
2083 	struct netevent_redirect netevent;
2084 	struct rt6_info *rt, *nrt = NULL;
2085 	struct ndisc_options ndopts;
2086 	struct inet6_dev *in6_dev;
2087 	struct neighbour *neigh;
2088 	struct rd_msg *msg;
2089 	int optlen, on_link;
2090 	u8 *lladdr;
2091 
2092 	optlen = skb_tail_pointer(skb) - skb_transport_header(skb);
2093 	optlen -= sizeof(*msg);
2094 
2095 	if (optlen < 0) {
2096 		net_dbg_ratelimited("rt6_do_redirect: packet too short\n");
2097 		return;
2098 	}
2099 
2100 	msg = (struct rd_msg *)icmp6_hdr(skb);
2101 
2102 	if (ipv6_addr_is_multicast(&msg->dest)) {
2103 		net_dbg_ratelimited("rt6_do_redirect: destination address is multicast\n");
2104 		return;
2105 	}
2106 
2107 	on_link = 0;
2108 	if (ipv6_addr_equal(&msg->dest, &msg->target)) {
2109 		on_link = 1;
2110 	} else if (ipv6_addr_type(&msg->target) !=
2111 		   (IPV6_ADDR_UNICAST|IPV6_ADDR_LINKLOCAL)) {
2112 		net_dbg_ratelimited("rt6_do_redirect: target address is not link-local unicast\n");
2113 		return;
2114 	}
2115 
2116 	in6_dev = __in6_dev_get(skb->dev);
2117 	if (!in6_dev)
2118 		return;
2119 	if (in6_dev->cnf.forwarding || !in6_dev->cnf.accept_redirects)
2120 		return;
2121 
2122 	/* RFC2461 8.1:
2123 	 *	The IP source address of the Redirect MUST be the same as the current
2124 	 *	first-hop router for the specified ICMP Destination Address.
2125 	 */
2126 
2127 	if (!ndisc_parse_options(msg->opt, optlen, &ndopts)) {
2128 		net_dbg_ratelimited("rt6_redirect: invalid ND options\n");
2129 		return;
2130 	}
2131 
2132 	lladdr = NULL;
2133 	if (ndopts.nd_opts_tgt_lladdr) {
2134 		lladdr = ndisc_opt_addr_data(ndopts.nd_opts_tgt_lladdr,
2135 					     skb->dev);
2136 		if (!lladdr) {
2137 			net_dbg_ratelimited("rt6_redirect: invalid link-layer address length\n");
2138 			return;
2139 		}
2140 	}
2141 
2142 	rt = (struct rt6_info *) dst;
2143 	if (rt->rt6i_flags & RTF_REJECT) {
2144 		net_dbg_ratelimited("rt6_redirect: source isn't a valid nexthop for redirect target\n");
2145 		return;
2146 	}
2147 
2148 	/* Redirect received -> path was valid.
2149 	 * Look, redirects are sent only in response to data packets,
2150 	 * so that this nexthop apparently is reachable. --ANK
2151 	 */
2152 	dst_confirm(&rt->dst);
2153 
2154 	neigh = __neigh_lookup(&nd_tbl, &msg->target, skb->dev, 1);
2155 	if (!neigh)
2156 		return;
2157 
2158 	/*
2159 	 *	We have finally decided to accept it.
2160 	 */
2161 
2162 	neigh_update(neigh, lladdr, NUD_STALE,
2163 		     NEIGH_UPDATE_F_WEAK_OVERRIDE|
2164 		     NEIGH_UPDATE_F_OVERRIDE|
2165 		     (on_link ? 0 : (NEIGH_UPDATE_F_OVERRIDE_ISROUTER|
2166 				     NEIGH_UPDATE_F_ISROUTER))
2167 		     );
2168 
2169 	nrt = ip6_rt_cache_alloc(rt, &msg->dest, NULL);
2170 	if (!nrt)
2171 		goto out;
2172 
2173 	nrt->rt6i_flags = RTF_GATEWAY|RTF_UP|RTF_DYNAMIC|RTF_CACHE;
2174 	if (on_link)
2175 		nrt->rt6i_flags &= ~RTF_GATEWAY;
2176 
2177 	nrt->rt6i_gateway = *(struct in6_addr *)neigh->primary_key;
2178 
2179 	if (ip6_ins_rt(nrt))
2180 		goto out;
2181 
2182 	netevent.old = &rt->dst;
2183 	netevent.new = &nrt->dst;
2184 	netevent.daddr = &msg->dest;
2185 	netevent.neigh = neigh;
2186 	call_netevent_notifiers(NETEVENT_REDIRECT, &netevent);
2187 
2188 	if (rt->rt6i_flags & RTF_CACHE) {
2189 		rt = (struct rt6_info *) dst_clone(&rt->dst);
2190 		ip6_del_rt(rt);
2191 	}
2192 
2193 out:
2194 	neigh_release(neigh);
2195 }
2196 
2197 /*
2198  *	Misc support functions
2199  */
2200 
2201 static void rt6_set_from(struct rt6_info *rt, struct rt6_info *from)
2202 {
2203 	BUG_ON(from->dst.from);
2204 
2205 	rt->rt6i_flags &= ~RTF_EXPIRES;
2206 	dst_hold(&from->dst);
2207 	rt->dst.from = &from->dst;
2208 	dst_init_metrics(&rt->dst, dst_metrics_ptr(&from->dst), true);
2209 }
2210 
2211 static void ip6_rt_copy_init(struct rt6_info *rt, struct rt6_info *ort)
2212 {
2213 	rt->dst.input = ort->dst.input;
2214 	rt->dst.output = ort->dst.output;
2215 	rt->rt6i_dst = ort->rt6i_dst;
2216 	rt->dst.error = ort->dst.error;
2217 	rt->rt6i_idev = ort->rt6i_idev;
2218 	if (rt->rt6i_idev)
2219 		in6_dev_hold(rt->rt6i_idev);
2220 	rt->dst.lastuse = jiffies;
2221 	rt->rt6i_gateway = ort->rt6i_gateway;
2222 	rt->rt6i_flags = ort->rt6i_flags;
2223 	rt6_set_from(rt, ort);
2224 	rt->rt6i_metric = ort->rt6i_metric;
2225 #ifdef CONFIG_IPV6_SUBTREES
2226 	rt->rt6i_src = ort->rt6i_src;
2227 #endif
2228 	rt->rt6i_prefsrc = ort->rt6i_prefsrc;
2229 	rt->rt6i_table = ort->rt6i_table;
2230 	rt->dst.lwtstate = lwtstate_get(ort->dst.lwtstate);
2231 }
2232 
2233 #ifdef CONFIG_IPV6_ROUTE_INFO
2234 static struct rt6_info *rt6_get_route_info(struct net *net,
2235 					   const struct in6_addr *prefix, int prefixlen,
2236 					   const struct in6_addr *gwaddr, int ifindex)
2237 {
2238 	struct fib6_node *fn;
2239 	struct rt6_info *rt = NULL;
2240 	struct fib6_table *table;
2241 
2242 	table = fib6_get_table(net, RT6_TABLE_INFO);
2243 	if (!table)
2244 		return NULL;
2245 
2246 	read_lock_bh(&table->tb6_lock);
2247 	fn = fib6_locate(&table->tb6_root, prefix, prefixlen, NULL, 0);
2248 	if (!fn)
2249 		goto out;
2250 
2251 	for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
2252 		if (rt->dst.dev->ifindex != ifindex)
2253 			continue;
2254 		if ((rt->rt6i_flags & (RTF_ROUTEINFO|RTF_GATEWAY)) != (RTF_ROUTEINFO|RTF_GATEWAY))
2255 			continue;
2256 		if (!ipv6_addr_equal(&rt->rt6i_gateway, gwaddr))
2257 			continue;
2258 		dst_hold(&rt->dst);
2259 		break;
2260 	}
2261 out:
2262 	read_unlock_bh(&table->tb6_lock);
2263 	return rt;
2264 }
2265 
2266 static struct rt6_info *rt6_add_route_info(struct net *net,
2267 					   const struct in6_addr *prefix, int prefixlen,
2268 					   const struct in6_addr *gwaddr, int ifindex,
2269 					   unsigned int pref)
2270 {
2271 	struct fib6_config cfg = {
2272 		.fc_metric	= IP6_RT_PRIO_USER,
2273 		.fc_ifindex	= ifindex,
2274 		.fc_dst_len	= prefixlen,
2275 		.fc_flags	= RTF_GATEWAY | RTF_ADDRCONF | RTF_ROUTEINFO |
2276 				  RTF_UP | RTF_PREF(pref),
2277 		.fc_nlinfo.portid = 0,
2278 		.fc_nlinfo.nlh = NULL,
2279 		.fc_nlinfo.nl_net = net,
2280 	};
2281 
2282 	cfg.fc_table = l3mdev_fib_table_by_index(net, ifindex) ? : RT6_TABLE_INFO;
2283 	cfg.fc_dst = *prefix;
2284 	cfg.fc_gateway = *gwaddr;
2285 
2286 	/* We should treat it as a default route if prefix length is 0. */
2287 	if (!prefixlen)
2288 		cfg.fc_flags |= RTF_DEFAULT;
2289 
2290 	ip6_route_add(&cfg);
2291 
2292 	return rt6_get_route_info(net, prefix, prefixlen, gwaddr, ifindex);
2293 }
2294 #endif
2295 
2296 struct rt6_info *rt6_get_dflt_router(const struct in6_addr *addr, struct net_device *dev)
2297 {
2298 	struct rt6_info *rt;
2299 	struct fib6_table *table;
2300 
2301 	table = fib6_get_table(dev_net(dev), RT6_TABLE_DFLT);
2302 	if (!table)
2303 		return NULL;
2304 
2305 	read_lock_bh(&table->tb6_lock);
2306 	for (rt = table->tb6_root.leaf; rt; rt = rt->dst.rt6_next) {
2307 		if (dev == rt->dst.dev &&
2308 		    ((rt->rt6i_flags & (RTF_ADDRCONF | RTF_DEFAULT)) == (RTF_ADDRCONF | RTF_DEFAULT)) &&
2309 		    ipv6_addr_equal(&rt->rt6i_gateway, addr))
2310 			break;
2311 	}
2312 	if (rt)
2313 		dst_hold(&rt->dst);
2314 	read_unlock_bh(&table->tb6_lock);
2315 	return rt;
2316 }
2317 
2318 struct rt6_info *rt6_add_dflt_router(const struct in6_addr *gwaddr,
2319 				     struct net_device *dev,
2320 				     unsigned int pref)
2321 {
2322 	struct fib6_config cfg = {
2323 		.fc_table	= l3mdev_fib_table(dev) ? : RT6_TABLE_DFLT,
2324 		.fc_metric	= IP6_RT_PRIO_USER,
2325 		.fc_ifindex	= dev->ifindex,
2326 		.fc_flags	= RTF_GATEWAY | RTF_ADDRCONF | RTF_DEFAULT |
2327 				  RTF_UP | RTF_EXPIRES | RTF_PREF(pref),
2328 		.fc_nlinfo.portid = 0,
2329 		.fc_nlinfo.nlh = NULL,
2330 		.fc_nlinfo.nl_net = dev_net(dev),
2331 	};
2332 
2333 	cfg.fc_gateway = *gwaddr;
2334 
2335 	ip6_route_add(&cfg);
2336 
2337 	return rt6_get_dflt_router(gwaddr, dev);
2338 }
2339 
2340 void rt6_purge_dflt_routers(struct net *net)
2341 {
2342 	struct rt6_info *rt;
2343 	struct fib6_table *table;
2344 
2345 	/* NOTE: Keep consistent with rt6_get_dflt_router */
2346 	table = fib6_get_table(net, RT6_TABLE_DFLT);
2347 	if (!table)
2348 		return;
2349 
2350 restart:
2351 	read_lock_bh(&table->tb6_lock);
2352 	for (rt = table->tb6_root.leaf; rt; rt = rt->dst.rt6_next) {
2353 		if (rt->rt6i_flags & (RTF_DEFAULT | RTF_ADDRCONF) &&
2354 		    (!rt->rt6i_idev || rt->rt6i_idev->cnf.accept_ra != 2)) {
2355 			dst_hold(&rt->dst);
2356 			read_unlock_bh(&table->tb6_lock);
2357 			ip6_del_rt(rt);
2358 			goto restart;
2359 		}
2360 	}
2361 	read_unlock_bh(&table->tb6_lock);
2362 }
2363 
2364 static void rtmsg_to_fib6_config(struct net *net,
2365 				 struct in6_rtmsg *rtmsg,
2366 				 struct fib6_config *cfg)
2367 {
2368 	memset(cfg, 0, sizeof(*cfg));
2369 
2370 	cfg->fc_table = l3mdev_fib_table_by_index(net, rtmsg->rtmsg_ifindex) ?
2371 			 : RT6_TABLE_MAIN;
2372 	cfg->fc_ifindex = rtmsg->rtmsg_ifindex;
2373 	cfg->fc_metric = rtmsg->rtmsg_metric;
2374 	cfg->fc_expires = rtmsg->rtmsg_info;
2375 	cfg->fc_dst_len = rtmsg->rtmsg_dst_len;
2376 	cfg->fc_src_len = rtmsg->rtmsg_src_len;
2377 	cfg->fc_flags = rtmsg->rtmsg_flags;
2378 
2379 	cfg->fc_nlinfo.nl_net = net;
2380 
2381 	cfg->fc_dst = rtmsg->rtmsg_dst;
2382 	cfg->fc_src = rtmsg->rtmsg_src;
2383 	cfg->fc_gateway = rtmsg->rtmsg_gateway;
2384 }
2385 
2386 int ipv6_route_ioctl(struct net *net, unsigned int cmd, void __user *arg)
2387 {
2388 	struct fib6_config cfg;
2389 	struct in6_rtmsg rtmsg;
2390 	int err;
2391 
2392 	switch (cmd) {
2393 	case SIOCADDRT:		/* Add a route */
2394 	case SIOCDELRT:		/* Delete a route */
2395 		if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
2396 			return -EPERM;
2397 		err = copy_from_user(&rtmsg, arg,
2398 				     sizeof(struct in6_rtmsg));
2399 		if (err)
2400 			return -EFAULT;
2401 
2402 		rtmsg_to_fib6_config(net, &rtmsg, &cfg);
2403 
2404 		rtnl_lock();
2405 		switch (cmd) {
2406 		case SIOCADDRT:
2407 			err = ip6_route_add(&cfg);
2408 			break;
2409 		case SIOCDELRT:
2410 			err = ip6_route_del(&cfg);
2411 			break;
2412 		default:
2413 			err = -EINVAL;
2414 		}
2415 		rtnl_unlock();
2416 
2417 		return err;
2418 	}
2419 
2420 	return -EINVAL;
2421 }
2422 
2423 /*
2424  *	Drop the packet on the floor
2425  */
2426 
2427 static int ip6_pkt_drop(struct sk_buff *skb, u8 code, int ipstats_mib_noroutes)
2428 {
2429 	int type;
2430 	struct dst_entry *dst = skb_dst(skb);
2431 	switch (ipstats_mib_noroutes) {
2432 	case IPSTATS_MIB_INNOROUTES:
2433 		type = ipv6_addr_type(&ipv6_hdr(skb)->daddr);
2434 		if (type == IPV6_ADDR_ANY) {
2435 			IP6_INC_STATS(dev_net(dst->dev), ip6_dst_idev(dst),
2436 				      IPSTATS_MIB_INADDRERRORS);
2437 			break;
2438 		}
2439 		/* FALLTHROUGH */
2440 	case IPSTATS_MIB_OUTNOROUTES:
2441 		IP6_INC_STATS(dev_net(dst->dev), ip6_dst_idev(dst),
2442 			      ipstats_mib_noroutes);
2443 		break;
2444 	}
2445 	icmpv6_send(skb, ICMPV6_DEST_UNREACH, code, 0);
2446 	kfree_skb(skb);
2447 	return 0;
2448 }
2449 
2450 static int ip6_pkt_discard(struct sk_buff *skb)
2451 {
2452 	return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_INNOROUTES);
2453 }
2454 
2455 static int ip6_pkt_discard_out(struct net *net, struct sock *sk, struct sk_buff *skb)
2456 {
2457 	skb->dev = skb_dst(skb)->dev;
2458 	return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_OUTNOROUTES);
2459 }
2460 
2461 static int ip6_pkt_prohibit(struct sk_buff *skb)
2462 {
2463 	return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_INNOROUTES);
2464 }
2465 
2466 static int ip6_pkt_prohibit_out(struct net *net, struct sock *sk, struct sk_buff *skb)
2467 {
2468 	skb->dev = skb_dst(skb)->dev;
2469 	return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_OUTNOROUTES);
2470 }
2471 
2472 /*
2473  *	Allocate a dst for local (unicast / anycast) address.
2474  */
2475 
2476 struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev,
2477 				    const struct in6_addr *addr,
2478 				    bool anycast)
2479 {
2480 	u32 tb_id;
2481 	struct net *net = dev_net(idev->dev);
2482 	struct rt6_info *rt = ip6_dst_alloc(net, net->loopback_dev,
2483 					    DST_NOCOUNT);
2484 	if (!rt)
2485 		return ERR_PTR(-ENOMEM);
2486 
2487 	in6_dev_hold(idev);
2488 
2489 	rt->dst.flags |= DST_HOST;
2490 	rt->dst.input = ip6_input;
2491 	rt->dst.output = ip6_output;
2492 	rt->rt6i_idev = idev;
2493 
2494 	rt->rt6i_flags = RTF_UP | RTF_NONEXTHOP;
2495 	if (anycast)
2496 		rt->rt6i_flags |= RTF_ANYCAST;
2497 	else
2498 		rt->rt6i_flags |= RTF_LOCAL;
2499 
2500 	rt->rt6i_gateway  = *addr;
2501 	rt->rt6i_dst.addr = *addr;
2502 	rt->rt6i_dst.plen = 128;
2503 	tb_id = l3mdev_fib_table(idev->dev) ? : RT6_TABLE_LOCAL;
2504 	rt->rt6i_table = fib6_get_table(net, tb_id);
2505 	rt->dst.flags |= DST_NOCACHE;
2506 
2507 	atomic_set(&rt->dst.__refcnt, 1);
2508 
2509 	return rt;
2510 }
2511 
2512 int ip6_route_get_saddr(struct net *net,
2513 			struct rt6_info *rt,
2514 			const struct in6_addr *daddr,
2515 			unsigned int prefs,
2516 			struct in6_addr *saddr)
2517 {
2518 	struct inet6_dev *idev =
2519 		rt ? ip6_dst_idev((struct dst_entry *)rt) : NULL;
2520 	int err = 0;
2521 	if (rt && rt->rt6i_prefsrc.plen)
2522 		*saddr = rt->rt6i_prefsrc.addr;
2523 	else
2524 		err = ipv6_dev_get_saddr(net, idev ? idev->dev : NULL,
2525 					 daddr, prefs, saddr);
2526 	return err;
2527 }
2528 
2529 /* remove deleted ip from prefsrc entries */
2530 struct arg_dev_net_ip {
2531 	struct net_device *dev;
2532 	struct net *net;
2533 	struct in6_addr *addr;
2534 };
2535 
2536 static int fib6_remove_prefsrc(struct rt6_info *rt, void *arg)
2537 {
2538 	struct net_device *dev = ((struct arg_dev_net_ip *)arg)->dev;
2539 	struct net *net = ((struct arg_dev_net_ip *)arg)->net;
2540 	struct in6_addr *addr = ((struct arg_dev_net_ip *)arg)->addr;
2541 
2542 	if (((void *)rt->dst.dev == dev || !dev) &&
2543 	    rt != net->ipv6.ip6_null_entry &&
2544 	    ipv6_addr_equal(addr, &rt->rt6i_prefsrc.addr)) {
2545 		/* remove prefsrc entry */
2546 		rt->rt6i_prefsrc.plen = 0;
2547 	}
2548 	return 0;
2549 }
2550 
2551 void rt6_remove_prefsrc(struct inet6_ifaddr *ifp)
2552 {
2553 	struct net *net = dev_net(ifp->idev->dev);
2554 	struct arg_dev_net_ip adni = {
2555 		.dev = ifp->idev->dev,
2556 		.net = net,
2557 		.addr = &ifp->addr,
2558 	};
2559 	fib6_clean_all(net, fib6_remove_prefsrc, &adni);
2560 }
2561 
2562 #define RTF_RA_ROUTER		(RTF_ADDRCONF | RTF_DEFAULT | RTF_GATEWAY)
2563 #define RTF_CACHE_GATEWAY	(RTF_GATEWAY | RTF_CACHE)
2564 
2565 /* Remove routers and update dst entries when gateway turn into host. */
2566 static int fib6_clean_tohost(struct rt6_info *rt, void *arg)
2567 {
2568 	struct in6_addr *gateway = (struct in6_addr *)arg;
2569 
2570 	if ((((rt->rt6i_flags & RTF_RA_ROUTER) == RTF_RA_ROUTER) ||
2571 	     ((rt->rt6i_flags & RTF_CACHE_GATEWAY) == RTF_CACHE_GATEWAY)) &&
2572 	     ipv6_addr_equal(gateway, &rt->rt6i_gateway)) {
2573 		return -1;
2574 	}
2575 	return 0;
2576 }
2577 
2578 void rt6_clean_tohost(struct net *net, struct in6_addr *gateway)
2579 {
2580 	fib6_clean_all(net, fib6_clean_tohost, gateway);
2581 }
2582 
2583 struct arg_dev_net {
2584 	struct net_device *dev;
2585 	struct net *net;
2586 };
2587 
2588 static int fib6_ifdown(struct rt6_info *rt, void *arg)
2589 {
2590 	const struct arg_dev_net *adn = arg;
2591 	const struct net_device *dev = adn->dev;
2592 
2593 	if ((rt->dst.dev == dev || !dev) &&
2594 	    rt != adn->net->ipv6.ip6_null_entry)
2595 		return -1;
2596 
2597 	return 0;
2598 }
2599 
2600 void rt6_ifdown(struct net *net, struct net_device *dev)
2601 {
2602 	struct arg_dev_net adn = {
2603 		.dev = dev,
2604 		.net = net,
2605 	};
2606 
2607 	fib6_clean_all(net, fib6_ifdown, &adn);
2608 	icmp6_clean_all(fib6_ifdown, &adn);
2609 	if (dev)
2610 		rt6_uncached_list_flush_dev(net, dev);
2611 }
2612 
2613 struct rt6_mtu_change_arg {
2614 	struct net_device *dev;
2615 	unsigned int mtu;
2616 };
2617 
2618 static int rt6_mtu_change_route(struct rt6_info *rt, void *p_arg)
2619 {
2620 	struct rt6_mtu_change_arg *arg = (struct rt6_mtu_change_arg *) p_arg;
2621 	struct inet6_dev *idev;
2622 
2623 	/* In IPv6 pmtu discovery is not optional,
2624 	   so that RTAX_MTU lock cannot disable it.
2625 	   We still use this lock to block changes
2626 	   caused by addrconf/ndisc.
2627 	*/
2628 
2629 	idev = __in6_dev_get(arg->dev);
2630 	if (!idev)
2631 		return 0;
2632 
2633 	/* For administrative MTU increase, there is no way to discover
2634 	   IPv6 PMTU increase, so PMTU increase should be updated here.
2635 	   Since RFC 1981 doesn't include administrative MTU increase
2636 	   update PMTU increase is a MUST. (i.e. jumbo frame)
2637 	 */
2638 	/*
2639 	   If new MTU is less than route PMTU, this new MTU will be the
2640 	   lowest MTU in the path, update the route PMTU to reflect PMTU
2641 	   decreases; if new MTU is greater than route PMTU, and the
2642 	   old MTU is the lowest MTU in the path, update the route PMTU
2643 	   to reflect the increase. In this case if the other nodes' MTU
2644 	   also have the lowest MTU, TOO BIG MESSAGE will be lead to
2645 	   PMTU discouvery.
2646 	 */
2647 	if (rt->dst.dev == arg->dev &&
2648 	    !dst_metric_locked(&rt->dst, RTAX_MTU)) {
2649 		if (rt->rt6i_flags & RTF_CACHE) {
2650 			/* For RTF_CACHE with rt6i_pmtu == 0
2651 			 * (i.e. a redirected route),
2652 			 * the metrics of its rt->dst.from has already
2653 			 * been updated.
2654 			 */
2655 			if (rt->rt6i_pmtu && rt->rt6i_pmtu > arg->mtu)
2656 				rt->rt6i_pmtu = arg->mtu;
2657 		} else if (dst_mtu(&rt->dst) >= arg->mtu ||
2658 			   (dst_mtu(&rt->dst) < arg->mtu &&
2659 			    dst_mtu(&rt->dst) == idev->cnf.mtu6)) {
2660 			dst_metric_set(&rt->dst, RTAX_MTU, arg->mtu);
2661 		}
2662 	}
2663 	return 0;
2664 }
2665 
2666 void rt6_mtu_change(struct net_device *dev, unsigned int mtu)
2667 {
2668 	struct rt6_mtu_change_arg arg = {
2669 		.dev = dev,
2670 		.mtu = mtu,
2671 	};
2672 
2673 	fib6_clean_all(dev_net(dev), rt6_mtu_change_route, &arg);
2674 }
2675 
2676 static const struct nla_policy rtm_ipv6_policy[RTA_MAX+1] = {
2677 	[RTA_GATEWAY]           = { .len = sizeof(struct in6_addr) },
2678 	[RTA_OIF]               = { .type = NLA_U32 },
2679 	[RTA_IIF]		= { .type = NLA_U32 },
2680 	[RTA_PRIORITY]          = { .type = NLA_U32 },
2681 	[RTA_METRICS]           = { .type = NLA_NESTED },
2682 	[RTA_MULTIPATH]		= { .len = sizeof(struct rtnexthop) },
2683 	[RTA_PREF]              = { .type = NLA_U8 },
2684 	[RTA_ENCAP_TYPE]	= { .type = NLA_U16 },
2685 	[RTA_ENCAP]		= { .type = NLA_NESTED },
2686 };
2687 
2688 static int rtm_to_fib6_config(struct sk_buff *skb, struct nlmsghdr *nlh,
2689 			      struct fib6_config *cfg)
2690 {
2691 	struct rtmsg *rtm;
2692 	struct nlattr *tb[RTA_MAX+1];
2693 	unsigned int pref;
2694 	int err;
2695 
2696 	err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy);
2697 	if (err < 0)
2698 		goto errout;
2699 
2700 	err = -EINVAL;
2701 	rtm = nlmsg_data(nlh);
2702 	memset(cfg, 0, sizeof(*cfg));
2703 
2704 	cfg->fc_table = rtm->rtm_table;
2705 	cfg->fc_dst_len = rtm->rtm_dst_len;
2706 	cfg->fc_src_len = rtm->rtm_src_len;
2707 	cfg->fc_flags = RTF_UP;
2708 	cfg->fc_protocol = rtm->rtm_protocol;
2709 	cfg->fc_type = rtm->rtm_type;
2710 
2711 	if (rtm->rtm_type == RTN_UNREACHABLE ||
2712 	    rtm->rtm_type == RTN_BLACKHOLE ||
2713 	    rtm->rtm_type == RTN_PROHIBIT ||
2714 	    rtm->rtm_type == RTN_THROW)
2715 		cfg->fc_flags |= RTF_REJECT;
2716 
2717 	if (rtm->rtm_type == RTN_LOCAL)
2718 		cfg->fc_flags |= RTF_LOCAL;
2719 
2720 	if (rtm->rtm_flags & RTM_F_CLONED)
2721 		cfg->fc_flags |= RTF_CACHE;
2722 
2723 	cfg->fc_nlinfo.portid = NETLINK_CB(skb).portid;
2724 	cfg->fc_nlinfo.nlh = nlh;
2725 	cfg->fc_nlinfo.nl_net = sock_net(skb->sk);
2726 
2727 	if (tb[RTA_GATEWAY]) {
2728 		cfg->fc_gateway = nla_get_in6_addr(tb[RTA_GATEWAY]);
2729 		cfg->fc_flags |= RTF_GATEWAY;
2730 	}
2731 
2732 	if (tb[RTA_DST]) {
2733 		int plen = (rtm->rtm_dst_len + 7) >> 3;
2734 
2735 		if (nla_len(tb[RTA_DST]) < plen)
2736 			goto errout;
2737 
2738 		nla_memcpy(&cfg->fc_dst, tb[RTA_DST], plen);
2739 	}
2740 
2741 	if (tb[RTA_SRC]) {
2742 		int plen = (rtm->rtm_src_len + 7) >> 3;
2743 
2744 		if (nla_len(tb[RTA_SRC]) < plen)
2745 			goto errout;
2746 
2747 		nla_memcpy(&cfg->fc_src, tb[RTA_SRC], plen);
2748 	}
2749 
2750 	if (tb[RTA_PREFSRC])
2751 		cfg->fc_prefsrc = nla_get_in6_addr(tb[RTA_PREFSRC]);
2752 
2753 	if (tb[RTA_OIF])
2754 		cfg->fc_ifindex = nla_get_u32(tb[RTA_OIF]);
2755 
2756 	if (tb[RTA_PRIORITY])
2757 		cfg->fc_metric = nla_get_u32(tb[RTA_PRIORITY]);
2758 
2759 	if (tb[RTA_METRICS]) {
2760 		cfg->fc_mx = nla_data(tb[RTA_METRICS]);
2761 		cfg->fc_mx_len = nla_len(tb[RTA_METRICS]);
2762 	}
2763 
2764 	if (tb[RTA_TABLE])
2765 		cfg->fc_table = nla_get_u32(tb[RTA_TABLE]);
2766 
2767 	if (tb[RTA_MULTIPATH]) {
2768 		cfg->fc_mp = nla_data(tb[RTA_MULTIPATH]);
2769 		cfg->fc_mp_len = nla_len(tb[RTA_MULTIPATH]);
2770 	}
2771 
2772 	if (tb[RTA_PREF]) {
2773 		pref = nla_get_u8(tb[RTA_PREF]);
2774 		if (pref != ICMPV6_ROUTER_PREF_LOW &&
2775 		    pref != ICMPV6_ROUTER_PREF_HIGH)
2776 			pref = ICMPV6_ROUTER_PREF_MEDIUM;
2777 		cfg->fc_flags |= RTF_PREF(pref);
2778 	}
2779 
2780 	if (tb[RTA_ENCAP])
2781 		cfg->fc_encap = tb[RTA_ENCAP];
2782 
2783 	if (tb[RTA_ENCAP_TYPE])
2784 		cfg->fc_encap_type = nla_get_u16(tb[RTA_ENCAP_TYPE]);
2785 
2786 	err = 0;
2787 errout:
2788 	return err;
2789 }
2790 
2791 struct rt6_nh {
2792 	struct rt6_info *rt6_info;
2793 	struct fib6_config r_cfg;
2794 	struct mx6_config mxc;
2795 	struct list_head next;
2796 };
2797 
2798 static void ip6_print_replace_route_err(struct list_head *rt6_nh_list)
2799 {
2800 	struct rt6_nh *nh;
2801 
2802 	list_for_each_entry(nh, rt6_nh_list, next) {
2803 		pr_warn("IPV6: multipath route replace failed (check consistency of installed routes): %pI6 nexthop %pI6 ifi %d\n",
2804 		        &nh->r_cfg.fc_dst, &nh->r_cfg.fc_gateway,
2805 		        nh->r_cfg.fc_ifindex);
2806 	}
2807 }
2808 
2809 static int ip6_route_info_append(struct list_head *rt6_nh_list,
2810 				 struct rt6_info *rt, struct fib6_config *r_cfg)
2811 {
2812 	struct rt6_nh *nh;
2813 	struct rt6_info *rtnh;
2814 	int err = -EEXIST;
2815 
2816 	list_for_each_entry(nh, rt6_nh_list, next) {
2817 		/* check if rt6_info already exists */
2818 		rtnh = nh->rt6_info;
2819 
2820 		if (rtnh->dst.dev == rt->dst.dev &&
2821 		    rtnh->rt6i_idev == rt->rt6i_idev &&
2822 		    ipv6_addr_equal(&rtnh->rt6i_gateway,
2823 				    &rt->rt6i_gateway))
2824 			return err;
2825 	}
2826 
2827 	nh = kzalloc(sizeof(*nh), GFP_KERNEL);
2828 	if (!nh)
2829 		return -ENOMEM;
2830 	nh->rt6_info = rt;
2831 	err = ip6_convert_metrics(&nh->mxc, r_cfg);
2832 	if (err) {
2833 		kfree(nh);
2834 		return err;
2835 	}
2836 	memcpy(&nh->r_cfg, r_cfg, sizeof(*r_cfg));
2837 	list_add_tail(&nh->next, rt6_nh_list);
2838 
2839 	return 0;
2840 }
2841 
2842 static int ip6_route_multipath_add(struct fib6_config *cfg)
2843 {
2844 	struct fib6_config r_cfg;
2845 	struct rtnexthop *rtnh;
2846 	struct rt6_info *rt;
2847 	struct rt6_nh *err_nh;
2848 	struct rt6_nh *nh, *nh_safe;
2849 	int remaining;
2850 	int attrlen;
2851 	int err = 1;
2852 	int nhn = 0;
2853 	int replace = (cfg->fc_nlinfo.nlh &&
2854 		       (cfg->fc_nlinfo.nlh->nlmsg_flags & NLM_F_REPLACE));
2855 	LIST_HEAD(rt6_nh_list);
2856 
2857 	remaining = cfg->fc_mp_len;
2858 	rtnh = (struct rtnexthop *)cfg->fc_mp;
2859 
2860 	/* Parse a Multipath Entry and build a list (rt6_nh_list) of
2861 	 * rt6_info structs per nexthop
2862 	 */
2863 	while (rtnh_ok(rtnh, remaining)) {
2864 		memcpy(&r_cfg, cfg, sizeof(*cfg));
2865 		if (rtnh->rtnh_ifindex)
2866 			r_cfg.fc_ifindex = rtnh->rtnh_ifindex;
2867 
2868 		attrlen = rtnh_attrlen(rtnh);
2869 		if (attrlen > 0) {
2870 			struct nlattr *nla, *attrs = rtnh_attrs(rtnh);
2871 
2872 			nla = nla_find(attrs, attrlen, RTA_GATEWAY);
2873 			if (nla) {
2874 				r_cfg.fc_gateway = nla_get_in6_addr(nla);
2875 				r_cfg.fc_flags |= RTF_GATEWAY;
2876 			}
2877 			r_cfg.fc_encap = nla_find(attrs, attrlen, RTA_ENCAP);
2878 			nla = nla_find(attrs, attrlen, RTA_ENCAP_TYPE);
2879 			if (nla)
2880 				r_cfg.fc_encap_type = nla_get_u16(nla);
2881 		}
2882 
2883 		rt = ip6_route_info_create(&r_cfg);
2884 		if (IS_ERR(rt)) {
2885 			err = PTR_ERR(rt);
2886 			rt = NULL;
2887 			goto cleanup;
2888 		}
2889 
2890 		err = ip6_route_info_append(&rt6_nh_list, rt, &r_cfg);
2891 		if (err) {
2892 			dst_free(&rt->dst);
2893 			goto cleanup;
2894 		}
2895 
2896 		rtnh = rtnh_next(rtnh, &remaining);
2897 	}
2898 
2899 	err_nh = NULL;
2900 	list_for_each_entry(nh, &rt6_nh_list, next) {
2901 		err = __ip6_ins_rt(nh->rt6_info, &cfg->fc_nlinfo, &nh->mxc);
2902 		/* nh->rt6_info is used or freed at this point, reset to NULL*/
2903 		nh->rt6_info = NULL;
2904 		if (err) {
2905 			if (replace && nhn)
2906 				ip6_print_replace_route_err(&rt6_nh_list);
2907 			err_nh = nh;
2908 			goto add_errout;
2909 		}
2910 
2911 		/* Because each route is added like a single route we remove
2912 		 * these flags after the first nexthop: if there is a collision,
2913 		 * we have already failed to add the first nexthop:
2914 		 * fib6_add_rt2node() has rejected it; when replacing, old
2915 		 * nexthops have been replaced by first new, the rest should
2916 		 * be added to it.
2917 		 */
2918 		cfg->fc_nlinfo.nlh->nlmsg_flags &= ~(NLM_F_EXCL |
2919 						     NLM_F_REPLACE);
2920 		nhn++;
2921 	}
2922 
2923 	goto cleanup;
2924 
2925 add_errout:
2926 	/* Delete routes that were already added */
2927 	list_for_each_entry(nh, &rt6_nh_list, next) {
2928 		if (err_nh == nh)
2929 			break;
2930 		ip6_route_del(&nh->r_cfg);
2931 	}
2932 
2933 cleanup:
2934 	list_for_each_entry_safe(nh, nh_safe, &rt6_nh_list, next) {
2935 		if (nh->rt6_info)
2936 			dst_free(&nh->rt6_info->dst);
2937 		kfree(nh->mxc.mx);
2938 		list_del(&nh->next);
2939 		kfree(nh);
2940 	}
2941 
2942 	return err;
2943 }
2944 
2945 static int ip6_route_multipath_del(struct fib6_config *cfg)
2946 {
2947 	struct fib6_config r_cfg;
2948 	struct rtnexthop *rtnh;
2949 	int remaining;
2950 	int attrlen;
2951 	int err = 1, last_err = 0;
2952 
2953 	remaining = cfg->fc_mp_len;
2954 	rtnh = (struct rtnexthop *)cfg->fc_mp;
2955 
2956 	/* Parse a Multipath Entry */
2957 	while (rtnh_ok(rtnh, remaining)) {
2958 		memcpy(&r_cfg, cfg, sizeof(*cfg));
2959 		if (rtnh->rtnh_ifindex)
2960 			r_cfg.fc_ifindex = rtnh->rtnh_ifindex;
2961 
2962 		attrlen = rtnh_attrlen(rtnh);
2963 		if (attrlen > 0) {
2964 			struct nlattr *nla, *attrs = rtnh_attrs(rtnh);
2965 
2966 			nla = nla_find(attrs, attrlen, RTA_GATEWAY);
2967 			if (nla) {
2968 				nla_memcpy(&r_cfg.fc_gateway, nla, 16);
2969 				r_cfg.fc_flags |= RTF_GATEWAY;
2970 			}
2971 		}
2972 		err = ip6_route_del(&r_cfg);
2973 		if (err)
2974 			last_err = err;
2975 
2976 		rtnh = rtnh_next(rtnh, &remaining);
2977 	}
2978 
2979 	return last_err;
2980 }
2981 
2982 static int inet6_rtm_delroute(struct sk_buff *skb, struct nlmsghdr *nlh)
2983 {
2984 	struct fib6_config cfg;
2985 	int err;
2986 
2987 	err = rtm_to_fib6_config(skb, nlh, &cfg);
2988 	if (err < 0)
2989 		return err;
2990 
2991 	if (cfg.fc_mp)
2992 		return ip6_route_multipath_del(&cfg);
2993 	else
2994 		return ip6_route_del(&cfg);
2995 }
2996 
2997 static int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr *nlh)
2998 {
2999 	struct fib6_config cfg;
3000 	int err;
3001 
3002 	err = rtm_to_fib6_config(skb, nlh, &cfg);
3003 	if (err < 0)
3004 		return err;
3005 
3006 	if (cfg.fc_mp)
3007 		return ip6_route_multipath_add(&cfg);
3008 	else
3009 		return ip6_route_add(&cfg);
3010 }
3011 
3012 static inline size_t rt6_nlmsg_size(struct rt6_info *rt)
3013 {
3014 	return NLMSG_ALIGN(sizeof(struct rtmsg))
3015 	       + nla_total_size(16) /* RTA_SRC */
3016 	       + nla_total_size(16) /* RTA_DST */
3017 	       + nla_total_size(16) /* RTA_GATEWAY */
3018 	       + nla_total_size(16) /* RTA_PREFSRC */
3019 	       + nla_total_size(4) /* RTA_TABLE */
3020 	       + nla_total_size(4) /* RTA_IIF */
3021 	       + nla_total_size(4) /* RTA_OIF */
3022 	       + nla_total_size(4) /* RTA_PRIORITY */
3023 	       + RTAX_MAX * nla_total_size(4) /* RTA_METRICS */
3024 	       + nla_total_size(sizeof(struct rta_cacheinfo))
3025 	       + nla_total_size(TCP_CA_NAME_MAX) /* RTAX_CC_ALGO */
3026 	       + nla_total_size(1) /* RTA_PREF */
3027 	       + lwtunnel_get_encap_size(rt->dst.lwtstate);
3028 }
3029 
3030 static int rt6_fill_node(struct net *net,
3031 			 struct sk_buff *skb, struct rt6_info *rt,
3032 			 struct in6_addr *dst, struct in6_addr *src,
3033 			 int iif, int type, u32 portid, u32 seq,
3034 			 int prefix, int nowait, unsigned int flags)
3035 {
3036 	u32 metrics[RTAX_MAX];
3037 	struct rtmsg *rtm;
3038 	struct nlmsghdr *nlh;
3039 	long expires;
3040 	u32 table;
3041 
3042 	if (prefix) {	/* user wants prefix routes only */
3043 		if (!(rt->rt6i_flags & RTF_PREFIX_RT)) {
3044 			/* success since this is not a prefix route */
3045 			return 1;
3046 		}
3047 	}
3048 
3049 	nlh = nlmsg_put(skb, portid, seq, type, sizeof(*rtm), flags);
3050 	if (!nlh)
3051 		return -EMSGSIZE;
3052 
3053 	rtm = nlmsg_data(nlh);
3054 	rtm->rtm_family = AF_INET6;
3055 	rtm->rtm_dst_len = rt->rt6i_dst.plen;
3056 	rtm->rtm_src_len = rt->rt6i_src.plen;
3057 	rtm->rtm_tos = 0;
3058 	if (rt->rt6i_table)
3059 		table = rt->rt6i_table->tb6_id;
3060 	else
3061 		table = RT6_TABLE_UNSPEC;
3062 	rtm->rtm_table = table;
3063 	if (nla_put_u32(skb, RTA_TABLE, table))
3064 		goto nla_put_failure;
3065 	if (rt->rt6i_flags & RTF_REJECT) {
3066 		switch (rt->dst.error) {
3067 		case -EINVAL:
3068 			rtm->rtm_type = RTN_BLACKHOLE;
3069 			break;
3070 		case -EACCES:
3071 			rtm->rtm_type = RTN_PROHIBIT;
3072 			break;
3073 		case -EAGAIN:
3074 			rtm->rtm_type = RTN_THROW;
3075 			break;
3076 		default:
3077 			rtm->rtm_type = RTN_UNREACHABLE;
3078 			break;
3079 		}
3080 	}
3081 	else if (rt->rt6i_flags & RTF_LOCAL)
3082 		rtm->rtm_type = RTN_LOCAL;
3083 	else if (rt->dst.dev && (rt->dst.dev->flags & IFF_LOOPBACK))
3084 		rtm->rtm_type = RTN_LOCAL;
3085 	else
3086 		rtm->rtm_type = RTN_UNICAST;
3087 	rtm->rtm_flags = 0;
3088 	if (!netif_carrier_ok(rt->dst.dev)) {
3089 		rtm->rtm_flags |= RTNH_F_LINKDOWN;
3090 		if (rt->rt6i_idev->cnf.ignore_routes_with_linkdown)
3091 			rtm->rtm_flags |= RTNH_F_DEAD;
3092 	}
3093 	rtm->rtm_scope = RT_SCOPE_UNIVERSE;
3094 	rtm->rtm_protocol = rt->rt6i_protocol;
3095 	if (rt->rt6i_flags & RTF_DYNAMIC)
3096 		rtm->rtm_protocol = RTPROT_REDIRECT;
3097 	else if (rt->rt6i_flags & RTF_ADDRCONF) {
3098 		if (rt->rt6i_flags & (RTF_DEFAULT | RTF_ROUTEINFO))
3099 			rtm->rtm_protocol = RTPROT_RA;
3100 		else
3101 			rtm->rtm_protocol = RTPROT_KERNEL;
3102 	}
3103 
3104 	if (rt->rt6i_flags & RTF_CACHE)
3105 		rtm->rtm_flags |= RTM_F_CLONED;
3106 
3107 	if (dst) {
3108 		if (nla_put_in6_addr(skb, RTA_DST, dst))
3109 			goto nla_put_failure;
3110 		rtm->rtm_dst_len = 128;
3111 	} else if (rtm->rtm_dst_len)
3112 		if (nla_put_in6_addr(skb, RTA_DST, &rt->rt6i_dst.addr))
3113 			goto nla_put_failure;
3114 #ifdef CONFIG_IPV6_SUBTREES
3115 	if (src) {
3116 		if (nla_put_in6_addr(skb, RTA_SRC, src))
3117 			goto nla_put_failure;
3118 		rtm->rtm_src_len = 128;
3119 	} else if (rtm->rtm_src_len &&
3120 		   nla_put_in6_addr(skb, RTA_SRC, &rt->rt6i_src.addr))
3121 		goto nla_put_failure;
3122 #endif
3123 	if (iif) {
3124 #ifdef CONFIG_IPV6_MROUTE
3125 		if (ipv6_addr_is_multicast(&rt->rt6i_dst.addr)) {
3126 			int err = ip6mr_get_route(net, skb, rtm, nowait);
3127 			if (err <= 0) {
3128 				if (!nowait) {
3129 					if (err == 0)
3130 						return 0;
3131 					goto nla_put_failure;
3132 				} else {
3133 					if (err == -EMSGSIZE)
3134 						goto nla_put_failure;
3135 				}
3136 			}
3137 		} else
3138 #endif
3139 			if (nla_put_u32(skb, RTA_IIF, iif))
3140 				goto nla_put_failure;
3141 	} else if (dst) {
3142 		struct in6_addr saddr_buf;
3143 		if (ip6_route_get_saddr(net, rt, dst, 0, &saddr_buf) == 0 &&
3144 		    nla_put_in6_addr(skb, RTA_PREFSRC, &saddr_buf))
3145 			goto nla_put_failure;
3146 	}
3147 
3148 	if (rt->rt6i_prefsrc.plen) {
3149 		struct in6_addr saddr_buf;
3150 		saddr_buf = rt->rt6i_prefsrc.addr;
3151 		if (nla_put_in6_addr(skb, RTA_PREFSRC, &saddr_buf))
3152 			goto nla_put_failure;
3153 	}
3154 
3155 	memcpy(metrics, dst_metrics_ptr(&rt->dst), sizeof(metrics));
3156 	if (rt->rt6i_pmtu)
3157 		metrics[RTAX_MTU - 1] = rt->rt6i_pmtu;
3158 	if (rtnetlink_put_metrics(skb, metrics) < 0)
3159 		goto nla_put_failure;
3160 
3161 	if (rt->rt6i_flags & RTF_GATEWAY) {
3162 		if (nla_put_in6_addr(skb, RTA_GATEWAY, &rt->rt6i_gateway) < 0)
3163 			goto nla_put_failure;
3164 	}
3165 
3166 	if (rt->dst.dev &&
3167 	    nla_put_u32(skb, RTA_OIF, rt->dst.dev->ifindex))
3168 		goto nla_put_failure;
3169 	if (nla_put_u32(skb, RTA_PRIORITY, rt->rt6i_metric))
3170 		goto nla_put_failure;
3171 
3172 	expires = (rt->rt6i_flags & RTF_EXPIRES) ? rt->dst.expires - jiffies : 0;
3173 
3174 	if (rtnl_put_cacheinfo(skb, &rt->dst, 0, expires, rt->dst.error) < 0)
3175 		goto nla_put_failure;
3176 
3177 	if (nla_put_u8(skb, RTA_PREF, IPV6_EXTRACT_PREF(rt->rt6i_flags)))
3178 		goto nla_put_failure;
3179 
3180 	lwtunnel_fill_encap(skb, rt->dst.lwtstate);
3181 
3182 	nlmsg_end(skb, nlh);
3183 	return 0;
3184 
3185 nla_put_failure:
3186 	nlmsg_cancel(skb, nlh);
3187 	return -EMSGSIZE;
3188 }
3189 
3190 int rt6_dump_route(struct rt6_info *rt, void *p_arg)
3191 {
3192 	struct rt6_rtnl_dump_arg *arg = (struct rt6_rtnl_dump_arg *) p_arg;
3193 	int prefix;
3194 
3195 	if (nlmsg_len(arg->cb->nlh) >= sizeof(struct rtmsg)) {
3196 		struct rtmsg *rtm = nlmsg_data(arg->cb->nlh);
3197 		prefix = (rtm->rtm_flags & RTM_F_PREFIX) != 0;
3198 	} else
3199 		prefix = 0;
3200 
3201 	return rt6_fill_node(arg->net,
3202 		     arg->skb, rt, NULL, NULL, 0, RTM_NEWROUTE,
3203 		     NETLINK_CB(arg->cb->skb).portid, arg->cb->nlh->nlmsg_seq,
3204 		     prefix, 0, NLM_F_MULTI);
3205 }
3206 
3207 static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh)
3208 {
3209 	struct net *net = sock_net(in_skb->sk);
3210 	struct nlattr *tb[RTA_MAX+1];
3211 	struct rt6_info *rt;
3212 	struct sk_buff *skb;
3213 	struct rtmsg *rtm;
3214 	struct flowi6 fl6;
3215 	int err, iif = 0, oif = 0;
3216 
3217 	err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy);
3218 	if (err < 0)
3219 		goto errout;
3220 
3221 	err = -EINVAL;
3222 	memset(&fl6, 0, sizeof(fl6));
3223 
3224 	if (tb[RTA_SRC]) {
3225 		if (nla_len(tb[RTA_SRC]) < sizeof(struct in6_addr))
3226 			goto errout;
3227 
3228 		fl6.saddr = *(struct in6_addr *)nla_data(tb[RTA_SRC]);
3229 	}
3230 
3231 	if (tb[RTA_DST]) {
3232 		if (nla_len(tb[RTA_DST]) < sizeof(struct in6_addr))
3233 			goto errout;
3234 
3235 		fl6.daddr = *(struct in6_addr *)nla_data(tb[RTA_DST]);
3236 	}
3237 
3238 	if (tb[RTA_IIF])
3239 		iif = nla_get_u32(tb[RTA_IIF]);
3240 
3241 	if (tb[RTA_OIF])
3242 		oif = nla_get_u32(tb[RTA_OIF]);
3243 
3244 	if (tb[RTA_MARK])
3245 		fl6.flowi6_mark = nla_get_u32(tb[RTA_MARK]);
3246 
3247 	if (iif) {
3248 		struct net_device *dev;
3249 		int flags = 0;
3250 
3251 		dev = __dev_get_by_index(net, iif);
3252 		if (!dev) {
3253 			err = -ENODEV;
3254 			goto errout;
3255 		}
3256 
3257 		fl6.flowi6_iif = iif;
3258 
3259 		if (!ipv6_addr_any(&fl6.saddr))
3260 			flags |= RT6_LOOKUP_F_HAS_SADDR;
3261 
3262 		rt = (struct rt6_info *)ip6_route_input_lookup(net, dev, &fl6,
3263 							       flags);
3264 	} else {
3265 		fl6.flowi6_oif = oif;
3266 
3267 		if (netif_index_is_l3_master(net, oif)) {
3268 			fl6.flowi6_flags = FLOWI_FLAG_L3MDEV_SRC |
3269 					   FLOWI_FLAG_SKIP_NH_OIF;
3270 		}
3271 
3272 		rt = (struct rt6_info *)ip6_route_output(net, NULL, &fl6);
3273 	}
3274 
3275 	skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
3276 	if (!skb) {
3277 		ip6_rt_put(rt);
3278 		err = -ENOBUFS;
3279 		goto errout;
3280 	}
3281 
3282 	/* Reserve room for dummy headers, this skb can pass
3283 	   through good chunk of routing engine.
3284 	 */
3285 	skb_reset_mac_header(skb);
3286 	skb_reserve(skb, MAX_HEADER + sizeof(struct ipv6hdr));
3287 
3288 	skb_dst_set(skb, &rt->dst);
3289 
3290 	err = rt6_fill_node(net, skb, rt, &fl6.daddr, &fl6.saddr, iif,
3291 			    RTM_NEWROUTE, NETLINK_CB(in_skb).portid,
3292 			    nlh->nlmsg_seq, 0, 0, 0);
3293 	if (err < 0) {
3294 		kfree_skb(skb);
3295 		goto errout;
3296 	}
3297 
3298 	err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).portid);
3299 errout:
3300 	return err;
3301 }
3302 
3303 void inet6_rt_notify(int event, struct rt6_info *rt, struct nl_info *info,
3304 		     unsigned int nlm_flags)
3305 {
3306 	struct sk_buff *skb;
3307 	struct net *net = info->nl_net;
3308 	u32 seq;
3309 	int err;
3310 
3311 	err = -ENOBUFS;
3312 	seq = info->nlh ? info->nlh->nlmsg_seq : 0;
3313 
3314 	skb = nlmsg_new(rt6_nlmsg_size(rt), gfp_any());
3315 	if (!skb)
3316 		goto errout;
3317 
3318 	err = rt6_fill_node(net, skb, rt, NULL, NULL, 0,
3319 				event, info->portid, seq, 0, 0, nlm_flags);
3320 	if (err < 0) {
3321 		/* -EMSGSIZE implies BUG in rt6_nlmsg_size() */
3322 		WARN_ON(err == -EMSGSIZE);
3323 		kfree_skb(skb);
3324 		goto errout;
3325 	}
3326 	rtnl_notify(skb, net, info->portid, RTNLGRP_IPV6_ROUTE,
3327 		    info->nlh, gfp_any());
3328 	return;
3329 errout:
3330 	if (err < 0)
3331 		rtnl_set_sk_err(net, RTNLGRP_IPV6_ROUTE, err);
3332 }
3333 
3334 static int ip6_route_dev_notify(struct notifier_block *this,
3335 				unsigned long event, void *ptr)
3336 {
3337 	struct net_device *dev = netdev_notifier_info_to_dev(ptr);
3338 	struct net *net = dev_net(dev);
3339 
3340 	if (event == NETDEV_REGISTER && (dev->flags & IFF_LOOPBACK)) {
3341 		net->ipv6.ip6_null_entry->dst.dev = dev;
3342 		net->ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(dev);
3343 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
3344 		net->ipv6.ip6_prohibit_entry->dst.dev = dev;
3345 		net->ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(dev);
3346 		net->ipv6.ip6_blk_hole_entry->dst.dev = dev;
3347 		net->ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(dev);
3348 #endif
3349 	}
3350 
3351 	return NOTIFY_OK;
3352 }
3353 
3354 /*
3355  *	/proc
3356  */
3357 
3358 #ifdef CONFIG_PROC_FS
3359 
3360 static const struct file_operations ipv6_route_proc_fops = {
3361 	.owner		= THIS_MODULE,
3362 	.open		= ipv6_route_open,
3363 	.read		= seq_read,
3364 	.llseek		= seq_lseek,
3365 	.release	= seq_release_net,
3366 };
3367 
3368 static int rt6_stats_seq_show(struct seq_file *seq, void *v)
3369 {
3370 	struct net *net = (struct net *)seq->private;
3371 	seq_printf(seq, "%04x %04x %04x %04x %04x %04x %04x\n",
3372 		   net->ipv6.rt6_stats->fib_nodes,
3373 		   net->ipv6.rt6_stats->fib_route_nodes,
3374 		   net->ipv6.rt6_stats->fib_rt_alloc,
3375 		   net->ipv6.rt6_stats->fib_rt_entries,
3376 		   net->ipv6.rt6_stats->fib_rt_cache,
3377 		   dst_entries_get_slow(&net->ipv6.ip6_dst_ops),
3378 		   net->ipv6.rt6_stats->fib_discarded_routes);
3379 
3380 	return 0;
3381 }
3382 
3383 static int rt6_stats_seq_open(struct inode *inode, struct file *file)
3384 {
3385 	return single_open_net(inode, file, rt6_stats_seq_show);
3386 }
3387 
3388 static const struct file_operations rt6_stats_seq_fops = {
3389 	.owner	 = THIS_MODULE,
3390 	.open	 = rt6_stats_seq_open,
3391 	.read	 = seq_read,
3392 	.llseek	 = seq_lseek,
3393 	.release = single_release_net,
3394 };
3395 #endif	/* CONFIG_PROC_FS */
3396 
3397 #ifdef CONFIG_SYSCTL
3398 
3399 static
3400 int ipv6_sysctl_rtcache_flush(struct ctl_table *ctl, int write,
3401 			      void __user *buffer, size_t *lenp, loff_t *ppos)
3402 {
3403 	struct net *net;
3404 	int delay;
3405 	if (!write)
3406 		return -EINVAL;
3407 
3408 	net = (struct net *)ctl->extra1;
3409 	delay = net->ipv6.sysctl.flush_delay;
3410 	proc_dointvec(ctl, write, buffer, lenp, ppos);
3411 	fib6_run_gc(delay <= 0 ? 0 : (unsigned long)delay, net, delay > 0);
3412 	return 0;
3413 }
3414 
3415 struct ctl_table ipv6_route_table_template[] = {
3416 	{
3417 		.procname	=	"flush",
3418 		.data		=	&init_net.ipv6.sysctl.flush_delay,
3419 		.maxlen		=	sizeof(int),
3420 		.mode		=	0200,
3421 		.proc_handler	=	ipv6_sysctl_rtcache_flush
3422 	},
3423 	{
3424 		.procname	=	"gc_thresh",
3425 		.data		=	&ip6_dst_ops_template.gc_thresh,
3426 		.maxlen		=	sizeof(int),
3427 		.mode		=	0644,
3428 		.proc_handler	=	proc_dointvec,
3429 	},
3430 	{
3431 		.procname	=	"max_size",
3432 		.data		=	&init_net.ipv6.sysctl.ip6_rt_max_size,
3433 		.maxlen		=	sizeof(int),
3434 		.mode		=	0644,
3435 		.proc_handler	=	proc_dointvec,
3436 	},
3437 	{
3438 		.procname	=	"gc_min_interval",
3439 		.data		=	&init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
3440 		.maxlen		=	sizeof(int),
3441 		.mode		=	0644,
3442 		.proc_handler	=	proc_dointvec_jiffies,
3443 	},
3444 	{
3445 		.procname	=	"gc_timeout",
3446 		.data		=	&init_net.ipv6.sysctl.ip6_rt_gc_timeout,
3447 		.maxlen		=	sizeof(int),
3448 		.mode		=	0644,
3449 		.proc_handler	=	proc_dointvec_jiffies,
3450 	},
3451 	{
3452 		.procname	=	"gc_interval",
3453 		.data		=	&init_net.ipv6.sysctl.ip6_rt_gc_interval,
3454 		.maxlen		=	sizeof(int),
3455 		.mode		=	0644,
3456 		.proc_handler	=	proc_dointvec_jiffies,
3457 	},
3458 	{
3459 		.procname	=	"gc_elasticity",
3460 		.data		=	&init_net.ipv6.sysctl.ip6_rt_gc_elasticity,
3461 		.maxlen		=	sizeof(int),
3462 		.mode		=	0644,
3463 		.proc_handler	=	proc_dointvec,
3464 	},
3465 	{
3466 		.procname	=	"mtu_expires",
3467 		.data		=	&init_net.ipv6.sysctl.ip6_rt_mtu_expires,
3468 		.maxlen		=	sizeof(int),
3469 		.mode		=	0644,
3470 		.proc_handler	=	proc_dointvec_jiffies,
3471 	},
3472 	{
3473 		.procname	=	"min_adv_mss",
3474 		.data		=	&init_net.ipv6.sysctl.ip6_rt_min_advmss,
3475 		.maxlen		=	sizeof(int),
3476 		.mode		=	0644,
3477 		.proc_handler	=	proc_dointvec,
3478 	},
3479 	{
3480 		.procname	=	"gc_min_interval_ms",
3481 		.data		=	&init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
3482 		.maxlen		=	sizeof(int),
3483 		.mode		=	0644,
3484 		.proc_handler	=	proc_dointvec_ms_jiffies,
3485 	},
3486 	{ }
3487 };
3488 
3489 struct ctl_table * __net_init ipv6_route_sysctl_init(struct net *net)
3490 {
3491 	struct ctl_table *table;
3492 
3493 	table = kmemdup(ipv6_route_table_template,
3494 			sizeof(ipv6_route_table_template),
3495 			GFP_KERNEL);
3496 
3497 	if (table) {
3498 		table[0].data = &net->ipv6.sysctl.flush_delay;
3499 		table[0].extra1 = net;
3500 		table[1].data = &net->ipv6.ip6_dst_ops.gc_thresh;
3501 		table[2].data = &net->ipv6.sysctl.ip6_rt_max_size;
3502 		table[3].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
3503 		table[4].data = &net->ipv6.sysctl.ip6_rt_gc_timeout;
3504 		table[5].data = &net->ipv6.sysctl.ip6_rt_gc_interval;
3505 		table[6].data = &net->ipv6.sysctl.ip6_rt_gc_elasticity;
3506 		table[7].data = &net->ipv6.sysctl.ip6_rt_mtu_expires;
3507 		table[8].data = &net->ipv6.sysctl.ip6_rt_min_advmss;
3508 		table[9].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
3509 
3510 		/* Don't export sysctls to unprivileged users */
3511 		if (net->user_ns != &init_user_ns)
3512 			table[0].procname = NULL;
3513 	}
3514 
3515 	return table;
3516 }
3517 #endif
3518 
3519 static int __net_init ip6_route_net_init(struct net *net)
3520 {
3521 	int ret = -ENOMEM;
3522 
3523 	memcpy(&net->ipv6.ip6_dst_ops, &ip6_dst_ops_template,
3524 	       sizeof(net->ipv6.ip6_dst_ops));
3525 
3526 	if (dst_entries_init(&net->ipv6.ip6_dst_ops) < 0)
3527 		goto out_ip6_dst_ops;
3528 
3529 	net->ipv6.ip6_null_entry = kmemdup(&ip6_null_entry_template,
3530 					   sizeof(*net->ipv6.ip6_null_entry),
3531 					   GFP_KERNEL);
3532 	if (!net->ipv6.ip6_null_entry)
3533 		goto out_ip6_dst_entries;
3534 	net->ipv6.ip6_null_entry->dst.path =
3535 		(struct dst_entry *)net->ipv6.ip6_null_entry;
3536 	net->ipv6.ip6_null_entry->dst.ops = &net->ipv6.ip6_dst_ops;
3537 	dst_init_metrics(&net->ipv6.ip6_null_entry->dst,
3538 			 ip6_template_metrics, true);
3539 
3540 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
3541 	net->ipv6.ip6_prohibit_entry = kmemdup(&ip6_prohibit_entry_template,
3542 					       sizeof(*net->ipv6.ip6_prohibit_entry),
3543 					       GFP_KERNEL);
3544 	if (!net->ipv6.ip6_prohibit_entry)
3545 		goto out_ip6_null_entry;
3546 	net->ipv6.ip6_prohibit_entry->dst.path =
3547 		(struct dst_entry *)net->ipv6.ip6_prohibit_entry;
3548 	net->ipv6.ip6_prohibit_entry->dst.ops = &net->ipv6.ip6_dst_ops;
3549 	dst_init_metrics(&net->ipv6.ip6_prohibit_entry->dst,
3550 			 ip6_template_metrics, true);
3551 
3552 	net->ipv6.ip6_blk_hole_entry = kmemdup(&ip6_blk_hole_entry_template,
3553 					       sizeof(*net->ipv6.ip6_blk_hole_entry),
3554 					       GFP_KERNEL);
3555 	if (!net->ipv6.ip6_blk_hole_entry)
3556 		goto out_ip6_prohibit_entry;
3557 	net->ipv6.ip6_blk_hole_entry->dst.path =
3558 		(struct dst_entry *)net->ipv6.ip6_blk_hole_entry;
3559 	net->ipv6.ip6_blk_hole_entry->dst.ops = &net->ipv6.ip6_dst_ops;
3560 	dst_init_metrics(&net->ipv6.ip6_blk_hole_entry->dst,
3561 			 ip6_template_metrics, true);
3562 #endif
3563 
3564 	net->ipv6.sysctl.flush_delay = 0;
3565 	net->ipv6.sysctl.ip6_rt_max_size = 4096;
3566 	net->ipv6.sysctl.ip6_rt_gc_min_interval = HZ / 2;
3567 	net->ipv6.sysctl.ip6_rt_gc_timeout = 60*HZ;
3568 	net->ipv6.sysctl.ip6_rt_gc_interval = 30*HZ;
3569 	net->ipv6.sysctl.ip6_rt_gc_elasticity = 9;
3570 	net->ipv6.sysctl.ip6_rt_mtu_expires = 10*60*HZ;
3571 	net->ipv6.sysctl.ip6_rt_min_advmss = IPV6_MIN_MTU - 20 - 40;
3572 
3573 	net->ipv6.ip6_rt_gc_expire = 30*HZ;
3574 
3575 	ret = 0;
3576 out:
3577 	return ret;
3578 
3579 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
3580 out_ip6_prohibit_entry:
3581 	kfree(net->ipv6.ip6_prohibit_entry);
3582 out_ip6_null_entry:
3583 	kfree(net->ipv6.ip6_null_entry);
3584 #endif
3585 out_ip6_dst_entries:
3586 	dst_entries_destroy(&net->ipv6.ip6_dst_ops);
3587 out_ip6_dst_ops:
3588 	goto out;
3589 }
3590 
3591 static void __net_exit ip6_route_net_exit(struct net *net)
3592 {
3593 	kfree(net->ipv6.ip6_null_entry);
3594 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
3595 	kfree(net->ipv6.ip6_prohibit_entry);
3596 	kfree(net->ipv6.ip6_blk_hole_entry);
3597 #endif
3598 	dst_entries_destroy(&net->ipv6.ip6_dst_ops);
3599 }
3600 
3601 static int __net_init ip6_route_net_init_late(struct net *net)
3602 {
3603 #ifdef CONFIG_PROC_FS
3604 	proc_create("ipv6_route", 0, net->proc_net, &ipv6_route_proc_fops);
3605 	proc_create("rt6_stats", S_IRUGO, net->proc_net, &rt6_stats_seq_fops);
3606 #endif
3607 	return 0;
3608 }
3609 
3610 static void __net_exit ip6_route_net_exit_late(struct net *net)
3611 {
3612 #ifdef CONFIG_PROC_FS
3613 	remove_proc_entry("ipv6_route", net->proc_net);
3614 	remove_proc_entry("rt6_stats", net->proc_net);
3615 #endif
3616 }
3617 
3618 static struct pernet_operations ip6_route_net_ops = {
3619 	.init = ip6_route_net_init,
3620 	.exit = ip6_route_net_exit,
3621 };
3622 
3623 static int __net_init ipv6_inetpeer_init(struct net *net)
3624 {
3625 	struct inet_peer_base *bp = kmalloc(sizeof(*bp), GFP_KERNEL);
3626 
3627 	if (!bp)
3628 		return -ENOMEM;
3629 	inet_peer_base_init(bp);
3630 	net->ipv6.peers = bp;
3631 	return 0;
3632 }
3633 
3634 static void __net_exit ipv6_inetpeer_exit(struct net *net)
3635 {
3636 	struct inet_peer_base *bp = net->ipv6.peers;
3637 
3638 	net->ipv6.peers = NULL;
3639 	inetpeer_invalidate_tree(bp);
3640 	kfree(bp);
3641 }
3642 
3643 static struct pernet_operations ipv6_inetpeer_ops = {
3644 	.init	=	ipv6_inetpeer_init,
3645 	.exit	=	ipv6_inetpeer_exit,
3646 };
3647 
3648 static struct pernet_operations ip6_route_net_late_ops = {
3649 	.init = ip6_route_net_init_late,
3650 	.exit = ip6_route_net_exit_late,
3651 };
3652 
3653 static struct notifier_block ip6_route_dev_notifier = {
3654 	.notifier_call = ip6_route_dev_notify,
3655 	.priority = 0,
3656 };
3657 
3658 int __init ip6_route_init(void)
3659 {
3660 	int ret;
3661 	int cpu;
3662 
3663 	ret = -ENOMEM;
3664 	ip6_dst_ops_template.kmem_cachep =
3665 		kmem_cache_create("ip6_dst_cache", sizeof(struct rt6_info), 0,
3666 				  SLAB_HWCACHE_ALIGN, NULL);
3667 	if (!ip6_dst_ops_template.kmem_cachep)
3668 		goto out;
3669 
3670 	ret = dst_entries_init(&ip6_dst_blackhole_ops);
3671 	if (ret)
3672 		goto out_kmem_cache;
3673 
3674 	ret = register_pernet_subsys(&ipv6_inetpeer_ops);
3675 	if (ret)
3676 		goto out_dst_entries;
3677 
3678 	ret = register_pernet_subsys(&ip6_route_net_ops);
3679 	if (ret)
3680 		goto out_register_inetpeer;
3681 
3682 	ip6_dst_blackhole_ops.kmem_cachep = ip6_dst_ops_template.kmem_cachep;
3683 
3684 	/* Registering of the loopback is done before this portion of code,
3685 	 * the loopback reference in rt6_info will not be taken, do it
3686 	 * manually for init_net */
3687 	init_net.ipv6.ip6_null_entry->dst.dev = init_net.loopback_dev;
3688 	init_net.ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
3689   #ifdef CONFIG_IPV6_MULTIPLE_TABLES
3690 	init_net.ipv6.ip6_prohibit_entry->dst.dev = init_net.loopback_dev;
3691 	init_net.ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
3692 	init_net.ipv6.ip6_blk_hole_entry->dst.dev = init_net.loopback_dev;
3693 	init_net.ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
3694   #endif
3695 	ret = fib6_init();
3696 	if (ret)
3697 		goto out_register_subsys;
3698 
3699 	ret = xfrm6_init();
3700 	if (ret)
3701 		goto out_fib6_init;
3702 
3703 	ret = fib6_rules_init();
3704 	if (ret)
3705 		goto xfrm6_init;
3706 
3707 	ret = register_pernet_subsys(&ip6_route_net_late_ops);
3708 	if (ret)
3709 		goto fib6_rules_init;
3710 
3711 	ret = -ENOBUFS;
3712 	if (__rtnl_register(PF_INET6, RTM_NEWROUTE, inet6_rtm_newroute, NULL, NULL) ||
3713 	    __rtnl_register(PF_INET6, RTM_DELROUTE, inet6_rtm_delroute, NULL, NULL) ||
3714 	    __rtnl_register(PF_INET6, RTM_GETROUTE, inet6_rtm_getroute, NULL, NULL))
3715 		goto out_register_late_subsys;
3716 
3717 	ret = register_netdevice_notifier(&ip6_route_dev_notifier);
3718 	if (ret)
3719 		goto out_register_late_subsys;
3720 
3721 	for_each_possible_cpu(cpu) {
3722 		struct uncached_list *ul = per_cpu_ptr(&rt6_uncached_list, cpu);
3723 
3724 		INIT_LIST_HEAD(&ul->head);
3725 		spin_lock_init(&ul->lock);
3726 	}
3727 
3728 out:
3729 	return ret;
3730 
3731 out_register_late_subsys:
3732 	unregister_pernet_subsys(&ip6_route_net_late_ops);
3733 fib6_rules_init:
3734 	fib6_rules_cleanup();
3735 xfrm6_init:
3736 	xfrm6_fini();
3737 out_fib6_init:
3738 	fib6_gc_cleanup();
3739 out_register_subsys:
3740 	unregister_pernet_subsys(&ip6_route_net_ops);
3741 out_register_inetpeer:
3742 	unregister_pernet_subsys(&ipv6_inetpeer_ops);
3743 out_dst_entries:
3744 	dst_entries_destroy(&ip6_dst_blackhole_ops);
3745 out_kmem_cache:
3746 	kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);
3747 	goto out;
3748 }
3749 
3750 void ip6_route_cleanup(void)
3751 {
3752 	unregister_netdevice_notifier(&ip6_route_dev_notifier);
3753 	unregister_pernet_subsys(&ip6_route_net_late_ops);
3754 	fib6_rules_cleanup();
3755 	xfrm6_fini();
3756 	fib6_gc_cleanup();
3757 	unregister_pernet_subsys(&ipv6_inetpeer_ops);
3758 	unregister_pernet_subsys(&ip6_route_net_ops);
3759 	dst_entries_destroy(&ip6_dst_blackhole_ops);
3760 	kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);
3761 }
3762