xref: /linux/net/ipv6/route.c (revision 02bafd96f3a5d8e610b19033ffec55b92459aaae)
1 /*
2  *	Linux INET6 implementation
3  *	FIB front-end.
4  *
5  *	Authors:
6  *	Pedro Roque		<roque@di.fc.ul.pt>
7  *
8  *	This program is free software; you can redistribute it and/or
9  *      modify it under the terms of the GNU General Public License
10  *      as published by the Free Software Foundation; either version
11  *      2 of the License, or (at your option) any later version.
12  */
13 
14 /*	Changes:
15  *
16  *	YOSHIFUJI Hideaki @USAGI
17  *		reworked default router selection.
18  *		- respect outgoing interface
19  *		- select from (probably) reachable routers (i.e.
20  *		routers in REACHABLE, STALE, DELAY or PROBE states).
21  *		- always select the same router if it is (probably)
22  *		reachable.  otherwise, round-robin the list.
23  *	Ville Nuorvala
24  *		Fixed routing subtrees.
25  */
26 
27 #define pr_fmt(fmt) "IPv6: " fmt
28 
29 #include <linux/capability.h>
30 #include <linux/errno.h>
31 #include <linux/export.h>
32 #include <linux/types.h>
33 #include <linux/times.h>
34 #include <linux/socket.h>
35 #include <linux/sockios.h>
36 #include <linux/net.h>
37 #include <linux/route.h>
38 #include <linux/netdevice.h>
39 #include <linux/in6.h>
40 #include <linux/mroute6.h>
41 #include <linux/init.h>
42 #include <linux/if_arp.h>
43 #include <linux/proc_fs.h>
44 #include <linux/seq_file.h>
45 #include <linux/nsproxy.h>
46 #include <linux/slab.h>
47 #include <net/net_namespace.h>
48 #include <net/snmp.h>
49 #include <net/ipv6.h>
50 #include <net/ip6_fib.h>
51 #include <net/ip6_route.h>
52 #include <net/ndisc.h>
53 #include <net/addrconf.h>
54 #include <net/tcp.h>
55 #include <linux/rtnetlink.h>
56 #include <net/dst.h>
57 #include <net/dst_metadata.h>
58 #include <net/xfrm.h>
59 #include <net/netevent.h>
60 #include <net/netlink.h>
61 #include <net/nexthop.h>
62 #include <net/lwtunnel.h>
63 #include <net/ip_tunnels.h>
64 #include <net/l3mdev.h>
65 #include <trace/events/fib6.h>
66 
67 #include <asm/uaccess.h>
68 
69 #ifdef CONFIG_SYSCTL
70 #include <linux/sysctl.h>
71 #endif
72 
73 enum rt6_nud_state {
74 	RT6_NUD_FAIL_HARD = -3,
75 	RT6_NUD_FAIL_PROBE = -2,
76 	RT6_NUD_FAIL_DO_RR = -1,
77 	RT6_NUD_SUCCEED = 1
78 };
79 
80 static void ip6_rt_copy_init(struct rt6_info *rt, struct rt6_info *ort);
81 static struct dst_entry	*ip6_dst_check(struct dst_entry *dst, u32 cookie);
82 static unsigned int	 ip6_default_advmss(const struct dst_entry *dst);
83 static unsigned int	 ip6_mtu(const struct dst_entry *dst);
84 static struct dst_entry *ip6_negative_advice(struct dst_entry *);
85 static void		ip6_dst_destroy(struct dst_entry *);
86 static void		ip6_dst_ifdown(struct dst_entry *,
87 				       struct net_device *dev, int how);
88 static int		 ip6_dst_gc(struct dst_ops *ops);
89 
90 static int		ip6_pkt_discard(struct sk_buff *skb);
91 static int		ip6_pkt_discard_out(struct net *net, struct sock *sk, struct sk_buff *skb);
92 static int		ip6_pkt_prohibit(struct sk_buff *skb);
93 static int		ip6_pkt_prohibit_out(struct net *net, struct sock *sk, struct sk_buff *skb);
94 static void		ip6_link_failure(struct sk_buff *skb);
95 static void		ip6_rt_update_pmtu(struct dst_entry *dst, struct sock *sk,
96 					   struct sk_buff *skb, u32 mtu);
97 static void		rt6_do_redirect(struct dst_entry *dst, struct sock *sk,
98 					struct sk_buff *skb);
99 static void		rt6_dst_from_metrics_check(struct rt6_info *rt);
100 static int rt6_score_route(struct rt6_info *rt, int oif, int strict);
101 
102 #ifdef CONFIG_IPV6_ROUTE_INFO
103 static struct rt6_info *rt6_add_route_info(struct net *net,
104 					   const struct in6_addr *prefix, int prefixlen,
105 					   const struct in6_addr *gwaddr, int ifindex,
106 					   unsigned int pref);
107 static struct rt6_info *rt6_get_route_info(struct net *net,
108 					   const struct in6_addr *prefix, int prefixlen,
109 					   const struct in6_addr *gwaddr, int ifindex);
110 #endif
111 
112 struct uncached_list {
113 	spinlock_t		lock;
114 	struct list_head	head;
115 };
116 
117 static DEFINE_PER_CPU_ALIGNED(struct uncached_list, rt6_uncached_list);
118 
119 static void rt6_uncached_list_add(struct rt6_info *rt)
120 {
121 	struct uncached_list *ul = raw_cpu_ptr(&rt6_uncached_list);
122 
123 	rt->dst.flags |= DST_NOCACHE;
124 	rt->rt6i_uncached_list = ul;
125 
126 	spin_lock_bh(&ul->lock);
127 	list_add_tail(&rt->rt6i_uncached, &ul->head);
128 	spin_unlock_bh(&ul->lock);
129 }
130 
131 static void rt6_uncached_list_del(struct rt6_info *rt)
132 {
133 	if (!list_empty(&rt->rt6i_uncached)) {
134 		struct uncached_list *ul = rt->rt6i_uncached_list;
135 
136 		spin_lock_bh(&ul->lock);
137 		list_del(&rt->rt6i_uncached);
138 		spin_unlock_bh(&ul->lock);
139 	}
140 }
141 
142 static void rt6_uncached_list_flush_dev(struct net *net, struct net_device *dev)
143 {
144 	struct net_device *loopback_dev = net->loopback_dev;
145 	int cpu;
146 
147 	if (dev == loopback_dev)
148 		return;
149 
150 	for_each_possible_cpu(cpu) {
151 		struct uncached_list *ul = per_cpu_ptr(&rt6_uncached_list, cpu);
152 		struct rt6_info *rt;
153 
154 		spin_lock_bh(&ul->lock);
155 		list_for_each_entry(rt, &ul->head, rt6i_uncached) {
156 			struct inet6_dev *rt_idev = rt->rt6i_idev;
157 			struct net_device *rt_dev = rt->dst.dev;
158 
159 			if (rt_idev->dev == dev) {
160 				rt->rt6i_idev = in6_dev_get(loopback_dev);
161 				in6_dev_put(rt_idev);
162 			}
163 
164 			if (rt_dev == dev) {
165 				rt->dst.dev = loopback_dev;
166 				dev_hold(rt->dst.dev);
167 				dev_put(rt_dev);
168 			}
169 		}
170 		spin_unlock_bh(&ul->lock);
171 	}
172 }
173 
174 static u32 *rt6_pcpu_cow_metrics(struct rt6_info *rt)
175 {
176 	return dst_metrics_write_ptr(rt->dst.from);
177 }
178 
179 static u32 *ipv6_cow_metrics(struct dst_entry *dst, unsigned long old)
180 {
181 	struct rt6_info *rt = (struct rt6_info *)dst;
182 
183 	if (rt->rt6i_flags & RTF_PCPU)
184 		return rt6_pcpu_cow_metrics(rt);
185 	else if (rt->rt6i_flags & RTF_CACHE)
186 		return NULL;
187 	else
188 		return dst_cow_metrics_generic(dst, old);
189 }
190 
191 static inline const void *choose_neigh_daddr(struct rt6_info *rt,
192 					     struct sk_buff *skb,
193 					     const void *daddr)
194 {
195 	struct in6_addr *p = &rt->rt6i_gateway;
196 
197 	if (!ipv6_addr_any(p))
198 		return (const void *) p;
199 	else if (skb)
200 		return &ipv6_hdr(skb)->daddr;
201 	return daddr;
202 }
203 
204 static struct neighbour *ip6_neigh_lookup(const struct dst_entry *dst,
205 					  struct sk_buff *skb,
206 					  const void *daddr)
207 {
208 	struct rt6_info *rt = (struct rt6_info *) dst;
209 	struct neighbour *n;
210 
211 	daddr = choose_neigh_daddr(rt, skb, daddr);
212 	n = __ipv6_neigh_lookup(dst->dev, daddr);
213 	if (n)
214 		return n;
215 	return neigh_create(&nd_tbl, daddr, dst->dev);
216 }
217 
218 static struct dst_ops ip6_dst_ops_template = {
219 	.family			=	AF_INET6,
220 	.gc			=	ip6_dst_gc,
221 	.gc_thresh		=	1024,
222 	.check			=	ip6_dst_check,
223 	.default_advmss		=	ip6_default_advmss,
224 	.mtu			=	ip6_mtu,
225 	.cow_metrics		=	ipv6_cow_metrics,
226 	.destroy		=	ip6_dst_destroy,
227 	.ifdown			=	ip6_dst_ifdown,
228 	.negative_advice	=	ip6_negative_advice,
229 	.link_failure		=	ip6_link_failure,
230 	.update_pmtu		=	ip6_rt_update_pmtu,
231 	.redirect		=	rt6_do_redirect,
232 	.local_out		=	__ip6_local_out,
233 	.neigh_lookup		=	ip6_neigh_lookup,
234 };
235 
236 static unsigned int ip6_blackhole_mtu(const struct dst_entry *dst)
237 {
238 	unsigned int mtu = dst_metric_raw(dst, RTAX_MTU);
239 
240 	return mtu ? : dst->dev->mtu;
241 }
242 
243 static void ip6_rt_blackhole_update_pmtu(struct dst_entry *dst, struct sock *sk,
244 					 struct sk_buff *skb, u32 mtu)
245 {
246 }
247 
248 static void ip6_rt_blackhole_redirect(struct dst_entry *dst, struct sock *sk,
249 				      struct sk_buff *skb)
250 {
251 }
252 
253 static struct dst_ops ip6_dst_blackhole_ops = {
254 	.family			=	AF_INET6,
255 	.destroy		=	ip6_dst_destroy,
256 	.check			=	ip6_dst_check,
257 	.mtu			=	ip6_blackhole_mtu,
258 	.default_advmss		=	ip6_default_advmss,
259 	.update_pmtu		=	ip6_rt_blackhole_update_pmtu,
260 	.redirect		=	ip6_rt_blackhole_redirect,
261 	.cow_metrics		=	dst_cow_metrics_generic,
262 	.neigh_lookup		=	ip6_neigh_lookup,
263 };
264 
265 static const u32 ip6_template_metrics[RTAX_MAX] = {
266 	[RTAX_HOPLIMIT - 1] = 0,
267 };
268 
269 static const struct rt6_info ip6_null_entry_template = {
270 	.dst = {
271 		.__refcnt	= ATOMIC_INIT(1),
272 		.__use		= 1,
273 		.obsolete	= DST_OBSOLETE_FORCE_CHK,
274 		.error		= -ENETUNREACH,
275 		.input		= ip6_pkt_discard,
276 		.output		= ip6_pkt_discard_out,
277 	},
278 	.rt6i_flags	= (RTF_REJECT | RTF_NONEXTHOP),
279 	.rt6i_protocol  = RTPROT_KERNEL,
280 	.rt6i_metric	= ~(u32) 0,
281 	.rt6i_ref	= ATOMIC_INIT(1),
282 };
283 
284 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
285 
286 static const struct rt6_info ip6_prohibit_entry_template = {
287 	.dst = {
288 		.__refcnt	= ATOMIC_INIT(1),
289 		.__use		= 1,
290 		.obsolete	= DST_OBSOLETE_FORCE_CHK,
291 		.error		= -EACCES,
292 		.input		= ip6_pkt_prohibit,
293 		.output		= ip6_pkt_prohibit_out,
294 	},
295 	.rt6i_flags	= (RTF_REJECT | RTF_NONEXTHOP),
296 	.rt6i_protocol  = RTPROT_KERNEL,
297 	.rt6i_metric	= ~(u32) 0,
298 	.rt6i_ref	= ATOMIC_INIT(1),
299 };
300 
301 static const struct rt6_info ip6_blk_hole_entry_template = {
302 	.dst = {
303 		.__refcnt	= ATOMIC_INIT(1),
304 		.__use		= 1,
305 		.obsolete	= DST_OBSOLETE_FORCE_CHK,
306 		.error		= -EINVAL,
307 		.input		= dst_discard,
308 		.output		= dst_discard_out,
309 	},
310 	.rt6i_flags	= (RTF_REJECT | RTF_NONEXTHOP),
311 	.rt6i_protocol  = RTPROT_KERNEL,
312 	.rt6i_metric	= ~(u32) 0,
313 	.rt6i_ref	= ATOMIC_INIT(1),
314 };
315 
316 #endif
317 
318 static void rt6_info_init(struct rt6_info *rt)
319 {
320 	struct dst_entry *dst = &rt->dst;
321 
322 	memset(dst + 1, 0, sizeof(*rt) - sizeof(*dst));
323 	INIT_LIST_HEAD(&rt->rt6i_siblings);
324 	INIT_LIST_HEAD(&rt->rt6i_uncached);
325 }
326 
327 /* allocate dst with ip6_dst_ops */
328 static struct rt6_info *__ip6_dst_alloc(struct net *net,
329 					struct net_device *dev,
330 					int flags)
331 {
332 	struct rt6_info *rt = dst_alloc(&net->ipv6.ip6_dst_ops, dev,
333 					0, DST_OBSOLETE_FORCE_CHK, flags);
334 
335 	if (rt)
336 		rt6_info_init(rt);
337 
338 	return rt;
339 }
340 
341 struct rt6_info *ip6_dst_alloc(struct net *net,
342 			       struct net_device *dev,
343 			       int flags)
344 {
345 	struct rt6_info *rt = __ip6_dst_alloc(net, dev, flags);
346 
347 	if (rt) {
348 		rt->rt6i_pcpu = alloc_percpu_gfp(struct rt6_info *, GFP_ATOMIC);
349 		if (rt->rt6i_pcpu) {
350 			int cpu;
351 
352 			for_each_possible_cpu(cpu) {
353 				struct rt6_info **p;
354 
355 				p = per_cpu_ptr(rt->rt6i_pcpu, cpu);
356 				/* no one shares rt */
357 				*p =  NULL;
358 			}
359 		} else {
360 			dst_destroy((struct dst_entry *)rt);
361 			return NULL;
362 		}
363 	}
364 
365 	return rt;
366 }
367 EXPORT_SYMBOL(ip6_dst_alloc);
368 
369 static void ip6_dst_destroy(struct dst_entry *dst)
370 {
371 	struct rt6_info *rt = (struct rt6_info *)dst;
372 	struct dst_entry *from = dst->from;
373 	struct inet6_dev *idev;
374 
375 	dst_destroy_metrics_generic(dst);
376 	free_percpu(rt->rt6i_pcpu);
377 	rt6_uncached_list_del(rt);
378 
379 	idev = rt->rt6i_idev;
380 	if (idev) {
381 		rt->rt6i_idev = NULL;
382 		in6_dev_put(idev);
383 	}
384 
385 	dst->from = NULL;
386 	dst_release(from);
387 }
388 
389 static void ip6_dst_ifdown(struct dst_entry *dst, struct net_device *dev,
390 			   int how)
391 {
392 	struct rt6_info *rt = (struct rt6_info *)dst;
393 	struct inet6_dev *idev = rt->rt6i_idev;
394 	struct net_device *loopback_dev =
395 		dev_net(dev)->loopback_dev;
396 
397 	if (dev != loopback_dev) {
398 		if (idev && idev->dev == dev) {
399 			struct inet6_dev *loopback_idev =
400 				in6_dev_get(loopback_dev);
401 			if (loopback_idev) {
402 				rt->rt6i_idev = loopback_idev;
403 				in6_dev_put(idev);
404 			}
405 		}
406 	}
407 }
408 
409 static bool __rt6_check_expired(const struct rt6_info *rt)
410 {
411 	if (rt->rt6i_flags & RTF_EXPIRES)
412 		return time_after(jiffies, rt->dst.expires);
413 	else
414 		return false;
415 }
416 
417 static bool rt6_check_expired(const struct rt6_info *rt)
418 {
419 	if (rt->rt6i_flags & RTF_EXPIRES) {
420 		if (time_after(jiffies, rt->dst.expires))
421 			return true;
422 	} else if (rt->dst.from) {
423 		return rt6_check_expired((struct rt6_info *) rt->dst.from);
424 	}
425 	return false;
426 }
427 
428 /* Multipath route selection:
429  *   Hash based function using packet header and flowlabel.
430  * Adapted from fib_info_hashfn()
431  */
432 static int rt6_info_hash_nhsfn(unsigned int candidate_count,
433 			       const struct flowi6 *fl6)
434 {
435 	return get_hash_from_flowi6(fl6) % candidate_count;
436 }
437 
438 static struct rt6_info *rt6_multipath_select(struct rt6_info *match,
439 					     struct flowi6 *fl6, int oif,
440 					     int strict)
441 {
442 	struct rt6_info *sibling, *next_sibling;
443 	int route_choosen;
444 
445 	route_choosen = rt6_info_hash_nhsfn(match->rt6i_nsiblings + 1, fl6);
446 	/* Don't change the route, if route_choosen == 0
447 	 * (siblings does not include ourself)
448 	 */
449 	if (route_choosen)
450 		list_for_each_entry_safe(sibling, next_sibling,
451 				&match->rt6i_siblings, rt6i_siblings) {
452 			route_choosen--;
453 			if (route_choosen == 0) {
454 				if (rt6_score_route(sibling, oif, strict) < 0)
455 					break;
456 				match = sibling;
457 				break;
458 			}
459 		}
460 	return match;
461 }
462 
463 /*
464  *	Route lookup. Any table->tb6_lock is implied.
465  */
466 
467 static inline struct rt6_info *rt6_device_match(struct net *net,
468 						    struct rt6_info *rt,
469 						    const struct in6_addr *saddr,
470 						    int oif,
471 						    int flags)
472 {
473 	struct rt6_info *local = NULL;
474 	struct rt6_info *sprt;
475 
476 	if (!oif && ipv6_addr_any(saddr))
477 		goto out;
478 
479 	for (sprt = rt; sprt; sprt = sprt->dst.rt6_next) {
480 		struct net_device *dev = sprt->dst.dev;
481 
482 		if (oif) {
483 			if (dev->ifindex == oif)
484 				return sprt;
485 			if (dev->flags & IFF_LOOPBACK) {
486 				if (!sprt->rt6i_idev ||
487 				    sprt->rt6i_idev->dev->ifindex != oif) {
488 					if (flags & RT6_LOOKUP_F_IFACE)
489 						continue;
490 					if (local &&
491 					    local->rt6i_idev->dev->ifindex == oif)
492 						continue;
493 				}
494 				local = sprt;
495 			}
496 		} else {
497 			if (ipv6_chk_addr(net, saddr, dev,
498 					  flags & RT6_LOOKUP_F_IFACE))
499 				return sprt;
500 		}
501 	}
502 
503 	if (oif) {
504 		if (local)
505 			return local;
506 
507 		if (flags & RT6_LOOKUP_F_IFACE)
508 			return net->ipv6.ip6_null_entry;
509 	}
510 out:
511 	return rt;
512 }
513 
514 #ifdef CONFIG_IPV6_ROUTER_PREF
515 struct __rt6_probe_work {
516 	struct work_struct work;
517 	struct in6_addr target;
518 	struct net_device *dev;
519 };
520 
521 static void rt6_probe_deferred(struct work_struct *w)
522 {
523 	struct in6_addr mcaddr;
524 	struct __rt6_probe_work *work =
525 		container_of(w, struct __rt6_probe_work, work);
526 
527 	addrconf_addr_solict_mult(&work->target, &mcaddr);
528 	ndisc_send_ns(work->dev, &work->target, &mcaddr, NULL);
529 	dev_put(work->dev);
530 	kfree(work);
531 }
532 
533 static void rt6_probe(struct rt6_info *rt)
534 {
535 	struct __rt6_probe_work *work;
536 	struct neighbour *neigh;
537 	/*
538 	 * Okay, this does not seem to be appropriate
539 	 * for now, however, we need to check if it
540 	 * is really so; aka Router Reachability Probing.
541 	 *
542 	 * Router Reachability Probe MUST be rate-limited
543 	 * to no more than one per minute.
544 	 */
545 	if (!rt || !(rt->rt6i_flags & RTF_GATEWAY))
546 		return;
547 	rcu_read_lock_bh();
548 	neigh = __ipv6_neigh_lookup_noref(rt->dst.dev, &rt->rt6i_gateway);
549 	if (neigh) {
550 		if (neigh->nud_state & NUD_VALID)
551 			goto out;
552 
553 		work = NULL;
554 		write_lock(&neigh->lock);
555 		if (!(neigh->nud_state & NUD_VALID) &&
556 		    time_after(jiffies,
557 			       neigh->updated +
558 			       rt->rt6i_idev->cnf.rtr_probe_interval)) {
559 			work = kmalloc(sizeof(*work), GFP_ATOMIC);
560 			if (work)
561 				__neigh_set_probe_once(neigh);
562 		}
563 		write_unlock(&neigh->lock);
564 	} else {
565 		work = kmalloc(sizeof(*work), GFP_ATOMIC);
566 	}
567 
568 	if (work) {
569 		INIT_WORK(&work->work, rt6_probe_deferred);
570 		work->target = rt->rt6i_gateway;
571 		dev_hold(rt->dst.dev);
572 		work->dev = rt->dst.dev;
573 		schedule_work(&work->work);
574 	}
575 
576 out:
577 	rcu_read_unlock_bh();
578 }
579 #else
580 static inline void rt6_probe(struct rt6_info *rt)
581 {
582 }
583 #endif
584 
585 /*
586  * Default Router Selection (RFC 2461 6.3.6)
587  */
588 static inline int rt6_check_dev(struct rt6_info *rt, int oif)
589 {
590 	struct net_device *dev = rt->dst.dev;
591 	if (!oif || dev->ifindex == oif)
592 		return 2;
593 	if ((dev->flags & IFF_LOOPBACK) &&
594 	    rt->rt6i_idev && rt->rt6i_idev->dev->ifindex == oif)
595 		return 1;
596 	return 0;
597 }
598 
599 static inline enum rt6_nud_state rt6_check_neigh(struct rt6_info *rt)
600 {
601 	struct neighbour *neigh;
602 	enum rt6_nud_state ret = RT6_NUD_FAIL_HARD;
603 
604 	if (rt->rt6i_flags & RTF_NONEXTHOP ||
605 	    !(rt->rt6i_flags & RTF_GATEWAY))
606 		return RT6_NUD_SUCCEED;
607 
608 	rcu_read_lock_bh();
609 	neigh = __ipv6_neigh_lookup_noref(rt->dst.dev, &rt->rt6i_gateway);
610 	if (neigh) {
611 		read_lock(&neigh->lock);
612 		if (neigh->nud_state & NUD_VALID)
613 			ret = RT6_NUD_SUCCEED;
614 #ifdef CONFIG_IPV6_ROUTER_PREF
615 		else if (!(neigh->nud_state & NUD_FAILED))
616 			ret = RT6_NUD_SUCCEED;
617 		else
618 			ret = RT6_NUD_FAIL_PROBE;
619 #endif
620 		read_unlock(&neigh->lock);
621 	} else {
622 		ret = IS_ENABLED(CONFIG_IPV6_ROUTER_PREF) ?
623 		      RT6_NUD_SUCCEED : RT6_NUD_FAIL_DO_RR;
624 	}
625 	rcu_read_unlock_bh();
626 
627 	return ret;
628 }
629 
630 static int rt6_score_route(struct rt6_info *rt, int oif,
631 			   int strict)
632 {
633 	int m;
634 
635 	m = rt6_check_dev(rt, oif);
636 	if (!m && (strict & RT6_LOOKUP_F_IFACE))
637 		return RT6_NUD_FAIL_HARD;
638 #ifdef CONFIG_IPV6_ROUTER_PREF
639 	m |= IPV6_DECODE_PREF(IPV6_EXTRACT_PREF(rt->rt6i_flags)) << 2;
640 #endif
641 	if (strict & RT6_LOOKUP_F_REACHABLE) {
642 		int n = rt6_check_neigh(rt);
643 		if (n < 0)
644 			return n;
645 	}
646 	return m;
647 }
648 
649 static struct rt6_info *find_match(struct rt6_info *rt, int oif, int strict,
650 				   int *mpri, struct rt6_info *match,
651 				   bool *do_rr)
652 {
653 	int m;
654 	bool match_do_rr = false;
655 	struct inet6_dev *idev = rt->rt6i_idev;
656 	struct net_device *dev = rt->dst.dev;
657 
658 	if (dev && !netif_carrier_ok(dev) &&
659 	    idev->cnf.ignore_routes_with_linkdown)
660 		goto out;
661 
662 	if (rt6_check_expired(rt))
663 		goto out;
664 
665 	m = rt6_score_route(rt, oif, strict);
666 	if (m == RT6_NUD_FAIL_DO_RR) {
667 		match_do_rr = true;
668 		m = 0; /* lowest valid score */
669 	} else if (m == RT6_NUD_FAIL_HARD) {
670 		goto out;
671 	}
672 
673 	if (strict & RT6_LOOKUP_F_REACHABLE)
674 		rt6_probe(rt);
675 
676 	/* note that m can be RT6_NUD_FAIL_PROBE at this point */
677 	if (m > *mpri) {
678 		*do_rr = match_do_rr;
679 		*mpri = m;
680 		match = rt;
681 	}
682 out:
683 	return match;
684 }
685 
686 static struct rt6_info *find_rr_leaf(struct fib6_node *fn,
687 				     struct rt6_info *rr_head,
688 				     u32 metric, int oif, int strict,
689 				     bool *do_rr)
690 {
691 	struct rt6_info *rt, *match, *cont;
692 	int mpri = -1;
693 
694 	match = NULL;
695 	cont = NULL;
696 	for (rt = rr_head; rt; rt = rt->dst.rt6_next) {
697 		if (rt->rt6i_metric != metric) {
698 			cont = rt;
699 			break;
700 		}
701 
702 		match = find_match(rt, oif, strict, &mpri, match, do_rr);
703 	}
704 
705 	for (rt = fn->leaf; rt && rt != rr_head; rt = rt->dst.rt6_next) {
706 		if (rt->rt6i_metric != metric) {
707 			cont = rt;
708 			break;
709 		}
710 
711 		match = find_match(rt, oif, strict, &mpri, match, do_rr);
712 	}
713 
714 	if (match || !cont)
715 		return match;
716 
717 	for (rt = cont; rt; rt = rt->dst.rt6_next)
718 		match = find_match(rt, oif, strict, &mpri, match, do_rr);
719 
720 	return match;
721 }
722 
723 static struct rt6_info *rt6_select(struct fib6_node *fn, int oif, int strict)
724 {
725 	struct rt6_info *match, *rt0;
726 	struct net *net;
727 	bool do_rr = false;
728 
729 	rt0 = fn->rr_ptr;
730 	if (!rt0)
731 		fn->rr_ptr = rt0 = fn->leaf;
732 
733 	match = find_rr_leaf(fn, rt0, rt0->rt6i_metric, oif, strict,
734 			     &do_rr);
735 
736 	if (do_rr) {
737 		struct rt6_info *next = rt0->dst.rt6_next;
738 
739 		/* no entries matched; do round-robin */
740 		if (!next || next->rt6i_metric != rt0->rt6i_metric)
741 			next = fn->leaf;
742 
743 		if (next != rt0)
744 			fn->rr_ptr = next;
745 	}
746 
747 	net = dev_net(rt0->dst.dev);
748 	return match ? match : net->ipv6.ip6_null_entry;
749 }
750 
751 static bool rt6_is_gw_or_nonexthop(const struct rt6_info *rt)
752 {
753 	return (rt->rt6i_flags & (RTF_NONEXTHOP | RTF_GATEWAY));
754 }
755 
756 #ifdef CONFIG_IPV6_ROUTE_INFO
757 int rt6_route_rcv(struct net_device *dev, u8 *opt, int len,
758 		  const struct in6_addr *gwaddr)
759 {
760 	struct net *net = dev_net(dev);
761 	struct route_info *rinfo = (struct route_info *) opt;
762 	struct in6_addr prefix_buf, *prefix;
763 	unsigned int pref;
764 	unsigned long lifetime;
765 	struct rt6_info *rt;
766 
767 	if (len < sizeof(struct route_info)) {
768 		return -EINVAL;
769 	}
770 
771 	/* Sanity check for prefix_len and length */
772 	if (rinfo->length > 3) {
773 		return -EINVAL;
774 	} else if (rinfo->prefix_len > 128) {
775 		return -EINVAL;
776 	} else if (rinfo->prefix_len > 64) {
777 		if (rinfo->length < 2) {
778 			return -EINVAL;
779 		}
780 	} else if (rinfo->prefix_len > 0) {
781 		if (rinfo->length < 1) {
782 			return -EINVAL;
783 		}
784 	}
785 
786 	pref = rinfo->route_pref;
787 	if (pref == ICMPV6_ROUTER_PREF_INVALID)
788 		return -EINVAL;
789 
790 	lifetime = addrconf_timeout_fixup(ntohl(rinfo->lifetime), HZ);
791 
792 	if (rinfo->length == 3)
793 		prefix = (struct in6_addr *)rinfo->prefix;
794 	else {
795 		/* this function is safe */
796 		ipv6_addr_prefix(&prefix_buf,
797 				 (struct in6_addr *)rinfo->prefix,
798 				 rinfo->prefix_len);
799 		prefix = &prefix_buf;
800 	}
801 
802 	if (rinfo->prefix_len == 0)
803 		rt = rt6_get_dflt_router(gwaddr, dev);
804 	else
805 		rt = rt6_get_route_info(net, prefix, rinfo->prefix_len,
806 					gwaddr, dev->ifindex);
807 
808 	if (rt && !lifetime) {
809 		ip6_del_rt(rt);
810 		rt = NULL;
811 	}
812 
813 	if (!rt && lifetime)
814 		rt = rt6_add_route_info(net, prefix, rinfo->prefix_len, gwaddr, dev->ifindex,
815 					pref);
816 	else if (rt)
817 		rt->rt6i_flags = RTF_ROUTEINFO |
818 				 (rt->rt6i_flags & ~RTF_PREF_MASK) | RTF_PREF(pref);
819 
820 	if (rt) {
821 		if (!addrconf_finite_timeout(lifetime))
822 			rt6_clean_expires(rt);
823 		else
824 			rt6_set_expires(rt, jiffies + HZ * lifetime);
825 
826 		ip6_rt_put(rt);
827 	}
828 	return 0;
829 }
830 #endif
831 
832 static struct fib6_node* fib6_backtrack(struct fib6_node *fn,
833 					struct in6_addr *saddr)
834 {
835 	struct fib6_node *pn;
836 	while (1) {
837 		if (fn->fn_flags & RTN_TL_ROOT)
838 			return NULL;
839 		pn = fn->parent;
840 		if (FIB6_SUBTREE(pn) && FIB6_SUBTREE(pn) != fn)
841 			fn = fib6_lookup(FIB6_SUBTREE(pn), NULL, saddr);
842 		else
843 			fn = pn;
844 		if (fn->fn_flags & RTN_RTINFO)
845 			return fn;
846 	}
847 }
848 
849 static struct rt6_info *ip6_pol_route_lookup(struct net *net,
850 					     struct fib6_table *table,
851 					     struct flowi6 *fl6, int flags)
852 {
853 	struct fib6_node *fn;
854 	struct rt6_info *rt;
855 
856 	read_lock_bh(&table->tb6_lock);
857 	fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
858 restart:
859 	rt = fn->leaf;
860 	rt = rt6_device_match(net, rt, &fl6->saddr, fl6->flowi6_oif, flags);
861 	if (rt->rt6i_nsiblings && fl6->flowi6_oif == 0)
862 		rt = rt6_multipath_select(rt, fl6, fl6->flowi6_oif, flags);
863 	if (rt == net->ipv6.ip6_null_entry) {
864 		fn = fib6_backtrack(fn, &fl6->saddr);
865 		if (fn)
866 			goto restart;
867 	}
868 	dst_use(&rt->dst, jiffies);
869 	read_unlock_bh(&table->tb6_lock);
870 
871 	trace_fib6_table_lookup(net, rt, table->tb6_id, fl6);
872 
873 	return rt;
874 
875 }
876 
877 struct dst_entry *ip6_route_lookup(struct net *net, struct flowi6 *fl6,
878 				    int flags)
879 {
880 	return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_lookup);
881 }
882 EXPORT_SYMBOL_GPL(ip6_route_lookup);
883 
884 struct rt6_info *rt6_lookup(struct net *net, const struct in6_addr *daddr,
885 			    const struct in6_addr *saddr, int oif, int strict)
886 {
887 	struct flowi6 fl6 = {
888 		.flowi6_oif = oif,
889 		.daddr = *daddr,
890 	};
891 	struct dst_entry *dst;
892 	int flags = strict ? RT6_LOOKUP_F_IFACE : 0;
893 
894 	if (saddr) {
895 		memcpy(&fl6.saddr, saddr, sizeof(*saddr));
896 		flags |= RT6_LOOKUP_F_HAS_SADDR;
897 	}
898 
899 	dst = fib6_rule_lookup(net, &fl6, flags, ip6_pol_route_lookup);
900 	if (dst->error == 0)
901 		return (struct rt6_info *) dst;
902 
903 	dst_release(dst);
904 
905 	return NULL;
906 }
907 EXPORT_SYMBOL(rt6_lookup);
908 
909 /* ip6_ins_rt is called with FREE table->tb6_lock.
910    It takes new route entry, the addition fails by any reason the
911    route is freed. In any case, if caller does not hold it, it may
912    be destroyed.
913  */
914 
915 static int __ip6_ins_rt(struct rt6_info *rt, struct nl_info *info,
916 			struct mx6_config *mxc)
917 {
918 	int err;
919 	struct fib6_table *table;
920 
921 	table = rt->rt6i_table;
922 	write_lock_bh(&table->tb6_lock);
923 	err = fib6_add(&table->tb6_root, rt, info, mxc);
924 	write_unlock_bh(&table->tb6_lock);
925 
926 	return err;
927 }
928 
929 int ip6_ins_rt(struct rt6_info *rt)
930 {
931 	struct nl_info info = {	.nl_net = dev_net(rt->dst.dev), };
932 	struct mx6_config mxc = { .mx = NULL, };
933 
934 	return __ip6_ins_rt(rt, &info, &mxc);
935 }
936 
937 static struct rt6_info *ip6_rt_cache_alloc(struct rt6_info *ort,
938 					   const struct in6_addr *daddr,
939 					   const struct in6_addr *saddr)
940 {
941 	struct rt6_info *rt;
942 
943 	/*
944 	 *	Clone the route.
945 	 */
946 
947 	if (ort->rt6i_flags & (RTF_CACHE | RTF_PCPU))
948 		ort = (struct rt6_info *)ort->dst.from;
949 
950 	rt = __ip6_dst_alloc(dev_net(ort->dst.dev), ort->dst.dev, 0);
951 
952 	if (!rt)
953 		return NULL;
954 
955 	ip6_rt_copy_init(rt, ort);
956 	rt->rt6i_flags |= RTF_CACHE;
957 	rt->rt6i_metric = 0;
958 	rt->dst.flags |= DST_HOST;
959 	rt->rt6i_dst.addr = *daddr;
960 	rt->rt6i_dst.plen = 128;
961 
962 	if (!rt6_is_gw_or_nonexthop(ort)) {
963 		if (ort->rt6i_dst.plen != 128 &&
964 		    ipv6_addr_equal(&ort->rt6i_dst.addr, daddr))
965 			rt->rt6i_flags |= RTF_ANYCAST;
966 #ifdef CONFIG_IPV6_SUBTREES
967 		if (rt->rt6i_src.plen && saddr) {
968 			rt->rt6i_src.addr = *saddr;
969 			rt->rt6i_src.plen = 128;
970 		}
971 #endif
972 	}
973 
974 	return rt;
975 }
976 
977 static struct rt6_info *ip6_rt_pcpu_alloc(struct rt6_info *rt)
978 {
979 	struct rt6_info *pcpu_rt;
980 
981 	pcpu_rt = __ip6_dst_alloc(dev_net(rt->dst.dev),
982 				  rt->dst.dev, rt->dst.flags);
983 
984 	if (!pcpu_rt)
985 		return NULL;
986 	ip6_rt_copy_init(pcpu_rt, rt);
987 	pcpu_rt->rt6i_protocol = rt->rt6i_protocol;
988 	pcpu_rt->rt6i_flags |= RTF_PCPU;
989 	return pcpu_rt;
990 }
991 
992 /* It should be called with read_lock_bh(&tb6_lock) acquired */
993 static struct rt6_info *rt6_get_pcpu_route(struct rt6_info *rt)
994 {
995 	struct rt6_info *pcpu_rt, **p;
996 
997 	p = this_cpu_ptr(rt->rt6i_pcpu);
998 	pcpu_rt = *p;
999 
1000 	if (pcpu_rt) {
1001 		dst_hold(&pcpu_rt->dst);
1002 		rt6_dst_from_metrics_check(pcpu_rt);
1003 	}
1004 	return pcpu_rt;
1005 }
1006 
1007 static struct rt6_info *rt6_make_pcpu_route(struct rt6_info *rt)
1008 {
1009 	struct fib6_table *table = rt->rt6i_table;
1010 	struct rt6_info *pcpu_rt, *prev, **p;
1011 
1012 	pcpu_rt = ip6_rt_pcpu_alloc(rt);
1013 	if (!pcpu_rt) {
1014 		struct net *net = dev_net(rt->dst.dev);
1015 
1016 		dst_hold(&net->ipv6.ip6_null_entry->dst);
1017 		return net->ipv6.ip6_null_entry;
1018 	}
1019 
1020 	read_lock_bh(&table->tb6_lock);
1021 	if (rt->rt6i_pcpu) {
1022 		p = this_cpu_ptr(rt->rt6i_pcpu);
1023 		prev = cmpxchg(p, NULL, pcpu_rt);
1024 		if (prev) {
1025 			/* If someone did it before us, return prev instead */
1026 			dst_destroy(&pcpu_rt->dst);
1027 			pcpu_rt = prev;
1028 		}
1029 	} else {
1030 		/* rt has been removed from the fib6 tree
1031 		 * before we have a chance to acquire the read_lock.
1032 		 * In this case, don't brother to create a pcpu rt
1033 		 * since rt is going away anyway.  The next
1034 		 * dst_check() will trigger a re-lookup.
1035 		 */
1036 		dst_destroy(&pcpu_rt->dst);
1037 		pcpu_rt = rt;
1038 	}
1039 	dst_hold(&pcpu_rt->dst);
1040 	rt6_dst_from_metrics_check(pcpu_rt);
1041 	read_unlock_bh(&table->tb6_lock);
1042 	return pcpu_rt;
1043 }
1044 
1045 struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table,
1046 			       int oif, struct flowi6 *fl6, int flags)
1047 {
1048 	struct fib6_node *fn, *saved_fn;
1049 	struct rt6_info *rt;
1050 	int strict = 0;
1051 
1052 	strict |= flags & RT6_LOOKUP_F_IFACE;
1053 	if (net->ipv6.devconf_all->forwarding == 0)
1054 		strict |= RT6_LOOKUP_F_REACHABLE;
1055 
1056 	read_lock_bh(&table->tb6_lock);
1057 
1058 	fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
1059 	saved_fn = fn;
1060 
1061 	if (fl6->flowi6_flags & FLOWI_FLAG_SKIP_NH_OIF)
1062 		oif = 0;
1063 
1064 redo_rt6_select:
1065 	rt = rt6_select(fn, oif, strict);
1066 	if (rt->rt6i_nsiblings)
1067 		rt = rt6_multipath_select(rt, fl6, oif, strict);
1068 	if (rt == net->ipv6.ip6_null_entry) {
1069 		fn = fib6_backtrack(fn, &fl6->saddr);
1070 		if (fn)
1071 			goto redo_rt6_select;
1072 		else if (strict & RT6_LOOKUP_F_REACHABLE) {
1073 			/* also consider unreachable route */
1074 			strict &= ~RT6_LOOKUP_F_REACHABLE;
1075 			fn = saved_fn;
1076 			goto redo_rt6_select;
1077 		}
1078 	}
1079 
1080 
1081 	if (rt == net->ipv6.ip6_null_entry || (rt->rt6i_flags & RTF_CACHE)) {
1082 		dst_use(&rt->dst, jiffies);
1083 		read_unlock_bh(&table->tb6_lock);
1084 
1085 		rt6_dst_from_metrics_check(rt);
1086 
1087 		trace_fib6_table_lookup(net, rt, table->tb6_id, fl6);
1088 		return rt;
1089 	} else if (unlikely((fl6->flowi6_flags & FLOWI_FLAG_KNOWN_NH) &&
1090 			    !(rt->rt6i_flags & RTF_GATEWAY))) {
1091 		/* Create a RTF_CACHE clone which will not be
1092 		 * owned by the fib6 tree.  It is for the special case where
1093 		 * the daddr in the skb during the neighbor look-up is different
1094 		 * from the fl6->daddr used to look-up route here.
1095 		 */
1096 
1097 		struct rt6_info *uncached_rt;
1098 
1099 		dst_use(&rt->dst, jiffies);
1100 		read_unlock_bh(&table->tb6_lock);
1101 
1102 		uncached_rt = ip6_rt_cache_alloc(rt, &fl6->daddr, NULL);
1103 		dst_release(&rt->dst);
1104 
1105 		if (uncached_rt)
1106 			rt6_uncached_list_add(uncached_rt);
1107 		else
1108 			uncached_rt = net->ipv6.ip6_null_entry;
1109 
1110 		dst_hold(&uncached_rt->dst);
1111 
1112 		trace_fib6_table_lookup(net, uncached_rt, table->tb6_id, fl6);
1113 		return uncached_rt;
1114 
1115 	} else {
1116 		/* Get a percpu copy */
1117 
1118 		struct rt6_info *pcpu_rt;
1119 
1120 		rt->dst.lastuse = jiffies;
1121 		rt->dst.__use++;
1122 		pcpu_rt = rt6_get_pcpu_route(rt);
1123 
1124 		if (pcpu_rt) {
1125 			read_unlock_bh(&table->tb6_lock);
1126 		} else {
1127 			/* We have to do the read_unlock first
1128 			 * because rt6_make_pcpu_route() may trigger
1129 			 * ip6_dst_gc() which will take the write_lock.
1130 			 */
1131 			dst_hold(&rt->dst);
1132 			read_unlock_bh(&table->tb6_lock);
1133 			pcpu_rt = rt6_make_pcpu_route(rt);
1134 			dst_release(&rt->dst);
1135 		}
1136 
1137 		trace_fib6_table_lookup(net, pcpu_rt, table->tb6_id, fl6);
1138 		return pcpu_rt;
1139 
1140 	}
1141 }
1142 EXPORT_SYMBOL_GPL(ip6_pol_route);
1143 
1144 static struct rt6_info *ip6_pol_route_input(struct net *net, struct fib6_table *table,
1145 					    struct flowi6 *fl6, int flags)
1146 {
1147 	return ip6_pol_route(net, table, fl6->flowi6_iif, fl6, flags);
1148 }
1149 
1150 static struct dst_entry *ip6_route_input_lookup(struct net *net,
1151 						struct net_device *dev,
1152 						struct flowi6 *fl6, int flags)
1153 {
1154 	if (rt6_need_strict(&fl6->daddr) && dev->type != ARPHRD_PIMREG)
1155 		flags |= RT6_LOOKUP_F_IFACE;
1156 
1157 	return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_input);
1158 }
1159 
1160 void ip6_route_input(struct sk_buff *skb)
1161 {
1162 	const struct ipv6hdr *iph = ipv6_hdr(skb);
1163 	struct net *net = dev_net(skb->dev);
1164 	int flags = RT6_LOOKUP_F_HAS_SADDR;
1165 	struct ip_tunnel_info *tun_info;
1166 	struct flowi6 fl6 = {
1167 		.flowi6_iif = l3mdev_fib_oif(skb->dev),
1168 		.daddr = iph->daddr,
1169 		.saddr = iph->saddr,
1170 		.flowlabel = ip6_flowinfo(iph),
1171 		.flowi6_mark = skb->mark,
1172 		.flowi6_proto = iph->nexthdr,
1173 	};
1174 
1175 	tun_info = skb_tunnel_info(skb);
1176 	if (tun_info && !(tun_info->mode & IP_TUNNEL_INFO_TX))
1177 		fl6.flowi6_tun_key.tun_id = tun_info->key.tun_id;
1178 	skb_dst_drop(skb);
1179 	skb_dst_set(skb, ip6_route_input_lookup(net, skb->dev, &fl6, flags));
1180 }
1181 
1182 static struct rt6_info *ip6_pol_route_output(struct net *net, struct fib6_table *table,
1183 					     struct flowi6 *fl6, int flags)
1184 {
1185 	return ip6_pol_route(net, table, fl6->flowi6_oif, fl6, flags);
1186 }
1187 
1188 struct dst_entry *ip6_route_output_flags(struct net *net, const struct sock *sk,
1189 					 struct flowi6 *fl6, int flags)
1190 {
1191 	struct dst_entry *dst;
1192 	bool any_src;
1193 
1194 	dst = l3mdev_get_rt6_dst(net, fl6);
1195 	if (dst)
1196 		return dst;
1197 
1198 	fl6->flowi6_iif = LOOPBACK_IFINDEX;
1199 
1200 	any_src = ipv6_addr_any(&fl6->saddr);
1201 	if ((sk && sk->sk_bound_dev_if) || rt6_need_strict(&fl6->daddr) ||
1202 	    (fl6->flowi6_oif && any_src))
1203 		flags |= RT6_LOOKUP_F_IFACE;
1204 
1205 	if (!any_src)
1206 		flags |= RT6_LOOKUP_F_HAS_SADDR;
1207 	else if (sk)
1208 		flags |= rt6_srcprefs2flags(inet6_sk(sk)->srcprefs);
1209 
1210 	return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_output);
1211 }
1212 EXPORT_SYMBOL_GPL(ip6_route_output_flags);
1213 
1214 struct dst_entry *ip6_blackhole_route(struct net *net, struct dst_entry *dst_orig)
1215 {
1216 	struct rt6_info *rt, *ort = (struct rt6_info *) dst_orig;
1217 	struct dst_entry *new = NULL;
1218 
1219 	rt = dst_alloc(&ip6_dst_blackhole_ops, ort->dst.dev, 1, DST_OBSOLETE_NONE, 0);
1220 	if (rt) {
1221 		rt6_info_init(rt);
1222 
1223 		new = &rt->dst;
1224 		new->__use = 1;
1225 		new->input = dst_discard;
1226 		new->output = dst_discard_out;
1227 
1228 		dst_copy_metrics(new, &ort->dst);
1229 		rt->rt6i_idev = ort->rt6i_idev;
1230 		if (rt->rt6i_idev)
1231 			in6_dev_hold(rt->rt6i_idev);
1232 
1233 		rt->rt6i_gateway = ort->rt6i_gateway;
1234 		rt->rt6i_flags = ort->rt6i_flags & ~RTF_PCPU;
1235 		rt->rt6i_metric = 0;
1236 
1237 		memcpy(&rt->rt6i_dst, &ort->rt6i_dst, sizeof(struct rt6key));
1238 #ifdef CONFIG_IPV6_SUBTREES
1239 		memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
1240 #endif
1241 
1242 		dst_free(new);
1243 	}
1244 
1245 	dst_release(dst_orig);
1246 	return new ? new : ERR_PTR(-ENOMEM);
1247 }
1248 
1249 /*
1250  *	Destination cache support functions
1251  */
1252 
1253 static void rt6_dst_from_metrics_check(struct rt6_info *rt)
1254 {
1255 	if (rt->dst.from &&
1256 	    dst_metrics_ptr(&rt->dst) != dst_metrics_ptr(rt->dst.from))
1257 		dst_init_metrics(&rt->dst, dst_metrics_ptr(rt->dst.from), true);
1258 }
1259 
1260 static struct dst_entry *rt6_check(struct rt6_info *rt, u32 cookie)
1261 {
1262 	if (!rt->rt6i_node || (rt->rt6i_node->fn_sernum != cookie))
1263 		return NULL;
1264 
1265 	if (rt6_check_expired(rt))
1266 		return NULL;
1267 
1268 	return &rt->dst;
1269 }
1270 
1271 static struct dst_entry *rt6_dst_from_check(struct rt6_info *rt, u32 cookie)
1272 {
1273 	if (!__rt6_check_expired(rt) &&
1274 	    rt->dst.obsolete == DST_OBSOLETE_FORCE_CHK &&
1275 	    rt6_check((struct rt6_info *)(rt->dst.from), cookie))
1276 		return &rt->dst;
1277 	else
1278 		return NULL;
1279 }
1280 
1281 static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie)
1282 {
1283 	struct rt6_info *rt;
1284 
1285 	rt = (struct rt6_info *) dst;
1286 
1287 	/* All IPV6 dsts are created with ->obsolete set to the value
1288 	 * DST_OBSOLETE_FORCE_CHK which forces validation calls down
1289 	 * into this function always.
1290 	 */
1291 
1292 	rt6_dst_from_metrics_check(rt);
1293 
1294 	if (rt->rt6i_flags & RTF_PCPU ||
1295 	    (unlikely(dst->flags & DST_NOCACHE) && rt->dst.from))
1296 		return rt6_dst_from_check(rt, cookie);
1297 	else
1298 		return rt6_check(rt, cookie);
1299 }
1300 
1301 static struct dst_entry *ip6_negative_advice(struct dst_entry *dst)
1302 {
1303 	struct rt6_info *rt = (struct rt6_info *) dst;
1304 
1305 	if (rt) {
1306 		if (rt->rt6i_flags & RTF_CACHE) {
1307 			if (rt6_check_expired(rt)) {
1308 				ip6_del_rt(rt);
1309 				dst = NULL;
1310 			}
1311 		} else {
1312 			dst_release(dst);
1313 			dst = NULL;
1314 		}
1315 	}
1316 	return dst;
1317 }
1318 
1319 static void ip6_link_failure(struct sk_buff *skb)
1320 {
1321 	struct rt6_info *rt;
1322 
1323 	icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_ADDR_UNREACH, 0);
1324 
1325 	rt = (struct rt6_info *) skb_dst(skb);
1326 	if (rt) {
1327 		if (rt->rt6i_flags & RTF_CACHE) {
1328 			dst_hold(&rt->dst);
1329 			ip6_del_rt(rt);
1330 		} else if (rt->rt6i_node && (rt->rt6i_flags & RTF_DEFAULT)) {
1331 			rt->rt6i_node->fn_sernum = -1;
1332 		}
1333 	}
1334 }
1335 
1336 static void rt6_do_update_pmtu(struct rt6_info *rt, u32 mtu)
1337 {
1338 	struct net *net = dev_net(rt->dst.dev);
1339 
1340 	rt->rt6i_flags |= RTF_MODIFIED;
1341 	rt->rt6i_pmtu = mtu;
1342 	rt6_update_expires(rt, net->ipv6.sysctl.ip6_rt_mtu_expires);
1343 }
1344 
1345 static bool rt6_cache_allowed_for_pmtu(const struct rt6_info *rt)
1346 {
1347 	return !(rt->rt6i_flags & RTF_CACHE) &&
1348 		(rt->rt6i_flags & RTF_PCPU || rt->rt6i_node);
1349 }
1350 
1351 static void __ip6_rt_update_pmtu(struct dst_entry *dst, const struct sock *sk,
1352 				 const struct ipv6hdr *iph, u32 mtu)
1353 {
1354 	struct rt6_info *rt6 = (struct rt6_info *)dst;
1355 
1356 	if (rt6->rt6i_flags & RTF_LOCAL)
1357 		return;
1358 
1359 	dst_confirm(dst);
1360 	mtu = max_t(u32, mtu, IPV6_MIN_MTU);
1361 	if (mtu >= dst_mtu(dst))
1362 		return;
1363 
1364 	if (!rt6_cache_allowed_for_pmtu(rt6)) {
1365 		rt6_do_update_pmtu(rt6, mtu);
1366 	} else {
1367 		const struct in6_addr *daddr, *saddr;
1368 		struct rt6_info *nrt6;
1369 
1370 		if (iph) {
1371 			daddr = &iph->daddr;
1372 			saddr = &iph->saddr;
1373 		} else if (sk) {
1374 			daddr = &sk->sk_v6_daddr;
1375 			saddr = &inet6_sk(sk)->saddr;
1376 		} else {
1377 			return;
1378 		}
1379 		nrt6 = ip6_rt_cache_alloc(rt6, daddr, saddr);
1380 		if (nrt6) {
1381 			rt6_do_update_pmtu(nrt6, mtu);
1382 
1383 			/* ip6_ins_rt(nrt6) will bump the
1384 			 * rt6->rt6i_node->fn_sernum
1385 			 * which will fail the next rt6_check() and
1386 			 * invalidate the sk->sk_dst_cache.
1387 			 */
1388 			ip6_ins_rt(nrt6);
1389 		}
1390 	}
1391 }
1392 
1393 static void ip6_rt_update_pmtu(struct dst_entry *dst, struct sock *sk,
1394 			       struct sk_buff *skb, u32 mtu)
1395 {
1396 	__ip6_rt_update_pmtu(dst, sk, skb ? ipv6_hdr(skb) : NULL, mtu);
1397 }
1398 
1399 void ip6_update_pmtu(struct sk_buff *skb, struct net *net, __be32 mtu,
1400 		     int oif, u32 mark)
1401 {
1402 	const struct ipv6hdr *iph = (struct ipv6hdr *) skb->data;
1403 	struct dst_entry *dst;
1404 	struct flowi6 fl6;
1405 
1406 	memset(&fl6, 0, sizeof(fl6));
1407 	fl6.flowi6_oif = oif;
1408 	fl6.flowi6_mark = mark ? mark : IP6_REPLY_MARK(net, skb->mark);
1409 	fl6.daddr = iph->daddr;
1410 	fl6.saddr = iph->saddr;
1411 	fl6.flowlabel = ip6_flowinfo(iph);
1412 
1413 	dst = ip6_route_output(net, NULL, &fl6);
1414 	if (!dst->error)
1415 		__ip6_rt_update_pmtu(dst, NULL, iph, ntohl(mtu));
1416 	dst_release(dst);
1417 }
1418 EXPORT_SYMBOL_GPL(ip6_update_pmtu);
1419 
1420 void ip6_sk_update_pmtu(struct sk_buff *skb, struct sock *sk, __be32 mtu)
1421 {
1422 	struct dst_entry *dst;
1423 
1424 	ip6_update_pmtu(skb, sock_net(sk), mtu,
1425 			sk->sk_bound_dev_if, sk->sk_mark);
1426 
1427 	dst = __sk_dst_get(sk);
1428 	if (!dst || !dst->obsolete ||
1429 	    dst->ops->check(dst, inet6_sk(sk)->dst_cookie))
1430 		return;
1431 
1432 	bh_lock_sock(sk);
1433 	if (!sock_owned_by_user(sk) && !ipv6_addr_v4mapped(&sk->sk_v6_daddr))
1434 		ip6_datagram_dst_update(sk, false);
1435 	bh_unlock_sock(sk);
1436 }
1437 EXPORT_SYMBOL_GPL(ip6_sk_update_pmtu);
1438 
1439 /* Handle redirects */
1440 struct ip6rd_flowi {
1441 	struct flowi6 fl6;
1442 	struct in6_addr gateway;
1443 };
1444 
1445 static struct rt6_info *__ip6_route_redirect(struct net *net,
1446 					     struct fib6_table *table,
1447 					     struct flowi6 *fl6,
1448 					     int flags)
1449 {
1450 	struct ip6rd_flowi *rdfl = (struct ip6rd_flowi *)fl6;
1451 	struct rt6_info *rt;
1452 	struct fib6_node *fn;
1453 
1454 	/* Get the "current" route for this destination and
1455 	 * check if the redirect has come from approriate router.
1456 	 *
1457 	 * RFC 4861 specifies that redirects should only be
1458 	 * accepted if they come from the nexthop to the target.
1459 	 * Due to the way the routes are chosen, this notion
1460 	 * is a bit fuzzy and one might need to check all possible
1461 	 * routes.
1462 	 */
1463 
1464 	read_lock_bh(&table->tb6_lock);
1465 	fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
1466 restart:
1467 	for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
1468 		if (rt6_check_expired(rt))
1469 			continue;
1470 		if (rt->dst.error)
1471 			break;
1472 		if (!(rt->rt6i_flags & RTF_GATEWAY))
1473 			continue;
1474 		if (fl6->flowi6_oif != rt->dst.dev->ifindex)
1475 			continue;
1476 		if (!ipv6_addr_equal(&rdfl->gateway, &rt->rt6i_gateway))
1477 			continue;
1478 		break;
1479 	}
1480 
1481 	if (!rt)
1482 		rt = net->ipv6.ip6_null_entry;
1483 	else if (rt->dst.error) {
1484 		rt = net->ipv6.ip6_null_entry;
1485 		goto out;
1486 	}
1487 
1488 	if (rt == net->ipv6.ip6_null_entry) {
1489 		fn = fib6_backtrack(fn, &fl6->saddr);
1490 		if (fn)
1491 			goto restart;
1492 	}
1493 
1494 out:
1495 	dst_hold(&rt->dst);
1496 
1497 	read_unlock_bh(&table->tb6_lock);
1498 
1499 	trace_fib6_table_lookup(net, rt, table->tb6_id, fl6);
1500 	return rt;
1501 };
1502 
1503 static struct dst_entry *ip6_route_redirect(struct net *net,
1504 					const struct flowi6 *fl6,
1505 					const struct in6_addr *gateway)
1506 {
1507 	int flags = RT6_LOOKUP_F_HAS_SADDR;
1508 	struct ip6rd_flowi rdfl;
1509 
1510 	rdfl.fl6 = *fl6;
1511 	rdfl.gateway = *gateway;
1512 
1513 	return fib6_rule_lookup(net, &rdfl.fl6,
1514 				flags, __ip6_route_redirect);
1515 }
1516 
1517 void ip6_redirect(struct sk_buff *skb, struct net *net, int oif, u32 mark)
1518 {
1519 	const struct ipv6hdr *iph = (struct ipv6hdr *) skb->data;
1520 	struct dst_entry *dst;
1521 	struct flowi6 fl6;
1522 
1523 	memset(&fl6, 0, sizeof(fl6));
1524 	fl6.flowi6_iif = LOOPBACK_IFINDEX;
1525 	fl6.flowi6_oif = oif;
1526 	fl6.flowi6_mark = mark;
1527 	fl6.daddr = iph->daddr;
1528 	fl6.saddr = iph->saddr;
1529 	fl6.flowlabel = ip6_flowinfo(iph);
1530 
1531 	dst = ip6_route_redirect(net, &fl6, &ipv6_hdr(skb)->saddr);
1532 	rt6_do_redirect(dst, NULL, skb);
1533 	dst_release(dst);
1534 }
1535 EXPORT_SYMBOL_GPL(ip6_redirect);
1536 
1537 void ip6_redirect_no_header(struct sk_buff *skb, struct net *net, int oif,
1538 			    u32 mark)
1539 {
1540 	const struct ipv6hdr *iph = ipv6_hdr(skb);
1541 	const struct rd_msg *msg = (struct rd_msg *)icmp6_hdr(skb);
1542 	struct dst_entry *dst;
1543 	struct flowi6 fl6;
1544 
1545 	memset(&fl6, 0, sizeof(fl6));
1546 	fl6.flowi6_iif = LOOPBACK_IFINDEX;
1547 	fl6.flowi6_oif = oif;
1548 	fl6.flowi6_mark = mark;
1549 	fl6.daddr = msg->dest;
1550 	fl6.saddr = iph->daddr;
1551 
1552 	dst = ip6_route_redirect(net, &fl6, &iph->saddr);
1553 	rt6_do_redirect(dst, NULL, skb);
1554 	dst_release(dst);
1555 }
1556 
1557 void ip6_sk_redirect(struct sk_buff *skb, struct sock *sk)
1558 {
1559 	ip6_redirect(skb, sock_net(sk), sk->sk_bound_dev_if, sk->sk_mark);
1560 }
1561 EXPORT_SYMBOL_GPL(ip6_sk_redirect);
1562 
1563 static unsigned int ip6_default_advmss(const struct dst_entry *dst)
1564 {
1565 	struct net_device *dev = dst->dev;
1566 	unsigned int mtu = dst_mtu(dst);
1567 	struct net *net = dev_net(dev);
1568 
1569 	mtu -= sizeof(struct ipv6hdr) + sizeof(struct tcphdr);
1570 
1571 	if (mtu < net->ipv6.sysctl.ip6_rt_min_advmss)
1572 		mtu = net->ipv6.sysctl.ip6_rt_min_advmss;
1573 
1574 	/*
1575 	 * Maximal non-jumbo IPv6 payload is IPV6_MAXPLEN and
1576 	 * corresponding MSS is IPV6_MAXPLEN - tcp_header_size.
1577 	 * IPV6_MAXPLEN is also valid and means: "any MSS,
1578 	 * rely only on pmtu discovery"
1579 	 */
1580 	if (mtu > IPV6_MAXPLEN - sizeof(struct tcphdr))
1581 		mtu = IPV6_MAXPLEN;
1582 	return mtu;
1583 }
1584 
1585 static unsigned int ip6_mtu(const struct dst_entry *dst)
1586 {
1587 	const struct rt6_info *rt = (const struct rt6_info *)dst;
1588 	unsigned int mtu = rt->rt6i_pmtu;
1589 	struct inet6_dev *idev;
1590 
1591 	if (mtu)
1592 		goto out;
1593 
1594 	mtu = dst_metric_raw(dst, RTAX_MTU);
1595 	if (mtu)
1596 		goto out;
1597 
1598 	mtu = IPV6_MIN_MTU;
1599 
1600 	rcu_read_lock();
1601 	idev = __in6_dev_get(dst->dev);
1602 	if (idev)
1603 		mtu = idev->cnf.mtu6;
1604 	rcu_read_unlock();
1605 
1606 out:
1607 	return min_t(unsigned int, mtu, IP6_MAX_MTU);
1608 }
1609 
1610 static struct dst_entry *icmp6_dst_gc_list;
1611 static DEFINE_SPINLOCK(icmp6_dst_lock);
1612 
1613 struct dst_entry *icmp6_dst_alloc(struct net_device *dev,
1614 				  struct flowi6 *fl6)
1615 {
1616 	struct dst_entry *dst;
1617 	struct rt6_info *rt;
1618 	struct inet6_dev *idev = in6_dev_get(dev);
1619 	struct net *net = dev_net(dev);
1620 
1621 	if (unlikely(!idev))
1622 		return ERR_PTR(-ENODEV);
1623 
1624 	rt = ip6_dst_alloc(net, dev, 0);
1625 	if (unlikely(!rt)) {
1626 		in6_dev_put(idev);
1627 		dst = ERR_PTR(-ENOMEM);
1628 		goto out;
1629 	}
1630 
1631 	rt->dst.flags |= DST_HOST;
1632 	rt->dst.output  = ip6_output;
1633 	atomic_set(&rt->dst.__refcnt, 1);
1634 	rt->rt6i_gateway  = fl6->daddr;
1635 	rt->rt6i_dst.addr = fl6->daddr;
1636 	rt->rt6i_dst.plen = 128;
1637 	rt->rt6i_idev     = idev;
1638 	dst_metric_set(&rt->dst, RTAX_HOPLIMIT, 0);
1639 
1640 	spin_lock_bh(&icmp6_dst_lock);
1641 	rt->dst.next = icmp6_dst_gc_list;
1642 	icmp6_dst_gc_list = &rt->dst;
1643 	spin_unlock_bh(&icmp6_dst_lock);
1644 
1645 	fib6_force_start_gc(net);
1646 
1647 	dst = xfrm_lookup(net, &rt->dst, flowi6_to_flowi(fl6), NULL, 0);
1648 
1649 out:
1650 	return dst;
1651 }
1652 
1653 int icmp6_dst_gc(void)
1654 {
1655 	struct dst_entry *dst, **pprev;
1656 	int more = 0;
1657 
1658 	spin_lock_bh(&icmp6_dst_lock);
1659 	pprev = &icmp6_dst_gc_list;
1660 
1661 	while ((dst = *pprev) != NULL) {
1662 		if (!atomic_read(&dst->__refcnt)) {
1663 			*pprev = dst->next;
1664 			dst_free(dst);
1665 		} else {
1666 			pprev = &dst->next;
1667 			++more;
1668 		}
1669 	}
1670 
1671 	spin_unlock_bh(&icmp6_dst_lock);
1672 
1673 	return more;
1674 }
1675 
1676 static void icmp6_clean_all(int (*func)(struct rt6_info *rt, void *arg),
1677 			    void *arg)
1678 {
1679 	struct dst_entry *dst, **pprev;
1680 
1681 	spin_lock_bh(&icmp6_dst_lock);
1682 	pprev = &icmp6_dst_gc_list;
1683 	while ((dst = *pprev) != NULL) {
1684 		struct rt6_info *rt = (struct rt6_info *) dst;
1685 		if (func(rt, arg)) {
1686 			*pprev = dst->next;
1687 			dst_free(dst);
1688 		} else {
1689 			pprev = &dst->next;
1690 		}
1691 	}
1692 	spin_unlock_bh(&icmp6_dst_lock);
1693 }
1694 
1695 static int ip6_dst_gc(struct dst_ops *ops)
1696 {
1697 	struct net *net = container_of(ops, struct net, ipv6.ip6_dst_ops);
1698 	int rt_min_interval = net->ipv6.sysctl.ip6_rt_gc_min_interval;
1699 	int rt_max_size = net->ipv6.sysctl.ip6_rt_max_size;
1700 	int rt_elasticity = net->ipv6.sysctl.ip6_rt_gc_elasticity;
1701 	int rt_gc_timeout = net->ipv6.sysctl.ip6_rt_gc_timeout;
1702 	unsigned long rt_last_gc = net->ipv6.ip6_rt_last_gc;
1703 	int entries;
1704 
1705 	entries = dst_entries_get_fast(ops);
1706 	if (time_after(rt_last_gc + rt_min_interval, jiffies) &&
1707 	    entries <= rt_max_size)
1708 		goto out;
1709 
1710 	net->ipv6.ip6_rt_gc_expire++;
1711 	fib6_run_gc(net->ipv6.ip6_rt_gc_expire, net, true);
1712 	entries = dst_entries_get_slow(ops);
1713 	if (entries < ops->gc_thresh)
1714 		net->ipv6.ip6_rt_gc_expire = rt_gc_timeout>>1;
1715 out:
1716 	net->ipv6.ip6_rt_gc_expire -= net->ipv6.ip6_rt_gc_expire>>rt_elasticity;
1717 	return entries > rt_max_size;
1718 }
1719 
1720 static int ip6_convert_metrics(struct mx6_config *mxc,
1721 			       const struct fib6_config *cfg)
1722 {
1723 	bool ecn_ca = false;
1724 	struct nlattr *nla;
1725 	int remaining;
1726 	u32 *mp;
1727 
1728 	if (!cfg->fc_mx)
1729 		return 0;
1730 
1731 	mp = kzalloc(sizeof(u32) * RTAX_MAX, GFP_KERNEL);
1732 	if (unlikely(!mp))
1733 		return -ENOMEM;
1734 
1735 	nla_for_each_attr(nla, cfg->fc_mx, cfg->fc_mx_len, remaining) {
1736 		int type = nla_type(nla);
1737 		u32 val;
1738 
1739 		if (!type)
1740 			continue;
1741 		if (unlikely(type > RTAX_MAX))
1742 			goto err;
1743 
1744 		if (type == RTAX_CC_ALGO) {
1745 			char tmp[TCP_CA_NAME_MAX];
1746 
1747 			nla_strlcpy(tmp, nla, sizeof(tmp));
1748 			val = tcp_ca_get_key_by_name(tmp, &ecn_ca);
1749 			if (val == TCP_CA_UNSPEC)
1750 				goto err;
1751 		} else {
1752 			val = nla_get_u32(nla);
1753 		}
1754 		if (type == RTAX_HOPLIMIT && val > 255)
1755 			val = 255;
1756 		if (type == RTAX_FEATURES && (val & ~RTAX_FEATURE_MASK))
1757 			goto err;
1758 
1759 		mp[type - 1] = val;
1760 		__set_bit(type - 1, mxc->mx_valid);
1761 	}
1762 
1763 	if (ecn_ca) {
1764 		__set_bit(RTAX_FEATURES - 1, mxc->mx_valid);
1765 		mp[RTAX_FEATURES - 1] |= DST_FEATURE_ECN_CA;
1766 	}
1767 
1768 	mxc->mx = mp;
1769 	return 0;
1770  err:
1771 	kfree(mp);
1772 	return -EINVAL;
1773 }
1774 
1775 static struct rt6_info *ip6_nh_lookup_table(struct net *net,
1776 					    struct fib6_config *cfg,
1777 					    const struct in6_addr *gw_addr)
1778 {
1779 	struct flowi6 fl6 = {
1780 		.flowi6_oif = cfg->fc_ifindex,
1781 		.daddr = *gw_addr,
1782 		.saddr = cfg->fc_prefsrc,
1783 	};
1784 	struct fib6_table *table;
1785 	struct rt6_info *rt;
1786 	int flags = RT6_LOOKUP_F_IFACE;
1787 
1788 	table = fib6_get_table(net, cfg->fc_table);
1789 	if (!table)
1790 		return NULL;
1791 
1792 	if (!ipv6_addr_any(&cfg->fc_prefsrc))
1793 		flags |= RT6_LOOKUP_F_HAS_SADDR;
1794 
1795 	rt = ip6_pol_route(net, table, cfg->fc_ifindex, &fl6, flags);
1796 
1797 	/* if table lookup failed, fall back to full lookup */
1798 	if (rt == net->ipv6.ip6_null_entry) {
1799 		ip6_rt_put(rt);
1800 		rt = NULL;
1801 	}
1802 
1803 	return rt;
1804 }
1805 
1806 static struct rt6_info *ip6_route_info_create(struct fib6_config *cfg)
1807 {
1808 	struct net *net = cfg->fc_nlinfo.nl_net;
1809 	struct rt6_info *rt = NULL;
1810 	struct net_device *dev = NULL;
1811 	struct inet6_dev *idev = NULL;
1812 	struct fib6_table *table;
1813 	int addr_type;
1814 	int err = -EINVAL;
1815 
1816 	if (cfg->fc_dst_len > 128 || cfg->fc_src_len > 128)
1817 		goto out;
1818 #ifndef CONFIG_IPV6_SUBTREES
1819 	if (cfg->fc_src_len)
1820 		goto out;
1821 #endif
1822 	if (cfg->fc_ifindex) {
1823 		err = -ENODEV;
1824 		dev = dev_get_by_index(net, cfg->fc_ifindex);
1825 		if (!dev)
1826 			goto out;
1827 		idev = in6_dev_get(dev);
1828 		if (!idev)
1829 			goto out;
1830 	}
1831 
1832 	if (cfg->fc_metric == 0)
1833 		cfg->fc_metric = IP6_RT_PRIO_USER;
1834 
1835 	err = -ENOBUFS;
1836 	if (cfg->fc_nlinfo.nlh &&
1837 	    !(cfg->fc_nlinfo.nlh->nlmsg_flags & NLM_F_CREATE)) {
1838 		table = fib6_get_table(net, cfg->fc_table);
1839 		if (!table) {
1840 			pr_warn("NLM_F_CREATE should be specified when creating new route\n");
1841 			table = fib6_new_table(net, cfg->fc_table);
1842 		}
1843 	} else {
1844 		table = fib6_new_table(net, cfg->fc_table);
1845 	}
1846 
1847 	if (!table)
1848 		goto out;
1849 
1850 	rt = ip6_dst_alloc(net, NULL,
1851 			   (cfg->fc_flags & RTF_ADDRCONF) ? 0 : DST_NOCOUNT);
1852 
1853 	if (!rt) {
1854 		err = -ENOMEM;
1855 		goto out;
1856 	}
1857 
1858 	if (cfg->fc_flags & RTF_EXPIRES)
1859 		rt6_set_expires(rt, jiffies +
1860 				clock_t_to_jiffies(cfg->fc_expires));
1861 	else
1862 		rt6_clean_expires(rt);
1863 
1864 	if (cfg->fc_protocol == RTPROT_UNSPEC)
1865 		cfg->fc_protocol = RTPROT_BOOT;
1866 	rt->rt6i_protocol = cfg->fc_protocol;
1867 
1868 	addr_type = ipv6_addr_type(&cfg->fc_dst);
1869 
1870 	if (addr_type & IPV6_ADDR_MULTICAST)
1871 		rt->dst.input = ip6_mc_input;
1872 	else if (cfg->fc_flags & RTF_LOCAL)
1873 		rt->dst.input = ip6_input;
1874 	else
1875 		rt->dst.input = ip6_forward;
1876 
1877 	rt->dst.output = ip6_output;
1878 
1879 	if (cfg->fc_encap) {
1880 		struct lwtunnel_state *lwtstate;
1881 
1882 		err = lwtunnel_build_state(dev, cfg->fc_encap_type,
1883 					   cfg->fc_encap, AF_INET6, cfg,
1884 					   &lwtstate);
1885 		if (err)
1886 			goto out;
1887 		rt->dst.lwtstate = lwtstate_get(lwtstate);
1888 		if (lwtunnel_output_redirect(rt->dst.lwtstate)) {
1889 			rt->dst.lwtstate->orig_output = rt->dst.output;
1890 			rt->dst.output = lwtunnel_output;
1891 		}
1892 		if (lwtunnel_input_redirect(rt->dst.lwtstate)) {
1893 			rt->dst.lwtstate->orig_input = rt->dst.input;
1894 			rt->dst.input = lwtunnel_input;
1895 		}
1896 	}
1897 
1898 	ipv6_addr_prefix(&rt->rt6i_dst.addr, &cfg->fc_dst, cfg->fc_dst_len);
1899 	rt->rt6i_dst.plen = cfg->fc_dst_len;
1900 	if (rt->rt6i_dst.plen == 128)
1901 		rt->dst.flags |= DST_HOST;
1902 
1903 #ifdef CONFIG_IPV6_SUBTREES
1904 	ipv6_addr_prefix(&rt->rt6i_src.addr, &cfg->fc_src, cfg->fc_src_len);
1905 	rt->rt6i_src.plen = cfg->fc_src_len;
1906 #endif
1907 
1908 	rt->rt6i_metric = cfg->fc_metric;
1909 
1910 	/* We cannot add true routes via loopback here,
1911 	   they would result in kernel looping; promote them to reject routes
1912 	 */
1913 	if ((cfg->fc_flags & RTF_REJECT) ||
1914 	    (dev && (dev->flags & IFF_LOOPBACK) &&
1915 	     !(addr_type & IPV6_ADDR_LOOPBACK) &&
1916 	     !(cfg->fc_flags & RTF_LOCAL))) {
1917 		/* hold loopback dev/idev if we haven't done so. */
1918 		if (dev != net->loopback_dev) {
1919 			if (dev) {
1920 				dev_put(dev);
1921 				in6_dev_put(idev);
1922 			}
1923 			dev = net->loopback_dev;
1924 			dev_hold(dev);
1925 			idev = in6_dev_get(dev);
1926 			if (!idev) {
1927 				err = -ENODEV;
1928 				goto out;
1929 			}
1930 		}
1931 		rt->rt6i_flags = RTF_REJECT|RTF_NONEXTHOP;
1932 		switch (cfg->fc_type) {
1933 		case RTN_BLACKHOLE:
1934 			rt->dst.error = -EINVAL;
1935 			rt->dst.output = dst_discard_out;
1936 			rt->dst.input = dst_discard;
1937 			break;
1938 		case RTN_PROHIBIT:
1939 			rt->dst.error = -EACCES;
1940 			rt->dst.output = ip6_pkt_prohibit_out;
1941 			rt->dst.input = ip6_pkt_prohibit;
1942 			break;
1943 		case RTN_THROW:
1944 		case RTN_UNREACHABLE:
1945 		default:
1946 			rt->dst.error = (cfg->fc_type == RTN_THROW) ? -EAGAIN
1947 					: (cfg->fc_type == RTN_UNREACHABLE)
1948 					? -EHOSTUNREACH : -ENETUNREACH;
1949 			rt->dst.output = ip6_pkt_discard_out;
1950 			rt->dst.input = ip6_pkt_discard;
1951 			break;
1952 		}
1953 		goto install_route;
1954 	}
1955 
1956 	if (cfg->fc_flags & RTF_GATEWAY) {
1957 		const struct in6_addr *gw_addr;
1958 		int gwa_type;
1959 
1960 		gw_addr = &cfg->fc_gateway;
1961 		gwa_type = ipv6_addr_type(gw_addr);
1962 
1963 		/* if gw_addr is local we will fail to detect this in case
1964 		 * address is still TENTATIVE (DAD in progress). rt6_lookup()
1965 		 * will return already-added prefix route via interface that
1966 		 * prefix route was assigned to, which might be non-loopback.
1967 		 */
1968 		err = -EINVAL;
1969 		if (ipv6_chk_addr_and_flags(net, gw_addr,
1970 					    gwa_type & IPV6_ADDR_LINKLOCAL ?
1971 					    dev : NULL, 0, 0))
1972 			goto out;
1973 
1974 		rt->rt6i_gateway = *gw_addr;
1975 
1976 		if (gwa_type != (IPV6_ADDR_LINKLOCAL|IPV6_ADDR_UNICAST)) {
1977 			struct rt6_info *grt = NULL;
1978 
1979 			/* IPv6 strictly inhibits using not link-local
1980 			   addresses as nexthop address.
1981 			   Otherwise, router will not able to send redirects.
1982 			   It is very good, but in some (rare!) circumstances
1983 			   (SIT, PtP, NBMA NOARP links) it is handy to allow
1984 			   some exceptions. --ANK
1985 			 */
1986 			if (!(gwa_type & IPV6_ADDR_UNICAST))
1987 				goto out;
1988 
1989 			if (cfg->fc_table) {
1990 				grt = ip6_nh_lookup_table(net, cfg, gw_addr);
1991 
1992 				if (grt) {
1993 					if (grt->rt6i_flags & RTF_GATEWAY ||
1994 					    (dev && dev != grt->dst.dev)) {
1995 						ip6_rt_put(grt);
1996 						grt = NULL;
1997 					}
1998 				}
1999 			}
2000 
2001 			if (!grt)
2002 				grt = rt6_lookup(net, gw_addr, NULL,
2003 						 cfg->fc_ifindex, 1);
2004 
2005 			err = -EHOSTUNREACH;
2006 			if (!grt)
2007 				goto out;
2008 			if (dev) {
2009 				if (dev != grt->dst.dev) {
2010 					ip6_rt_put(grt);
2011 					goto out;
2012 				}
2013 			} else {
2014 				dev = grt->dst.dev;
2015 				idev = grt->rt6i_idev;
2016 				dev_hold(dev);
2017 				in6_dev_hold(grt->rt6i_idev);
2018 			}
2019 			if (!(grt->rt6i_flags & RTF_GATEWAY))
2020 				err = 0;
2021 			ip6_rt_put(grt);
2022 
2023 			if (err)
2024 				goto out;
2025 		}
2026 		err = -EINVAL;
2027 		if (!dev || (dev->flags & IFF_LOOPBACK))
2028 			goto out;
2029 	}
2030 
2031 	err = -ENODEV;
2032 	if (!dev)
2033 		goto out;
2034 
2035 	if (!ipv6_addr_any(&cfg->fc_prefsrc)) {
2036 		if (!ipv6_chk_addr(net, &cfg->fc_prefsrc, dev, 0)) {
2037 			err = -EINVAL;
2038 			goto out;
2039 		}
2040 		rt->rt6i_prefsrc.addr = cfg->fc_prefsrc;
2041 		rt->rt6i_prefsrc.plen = 128;
2042 	} else
2043 		rt->rt6i_prefsrc.plen = 0;
2044 
2045 	rt->rt6i_flags = cfg->fc_flags;
2046 
2047 install_route:
2048 	rt->dst.dev = dev;
2049 	rt->rt6i_idev = idev;
2050 	rt->rt6i_table = table;
2051 
2052 	cfg->fc_nlinfo.nl_net = dev_net(dev);
2053 
2054 	return rt;
2055 out:
2056 	if (dev)
2057 		dev_put(dev);
2058 	if (idev)
2059 		in6_dev_put(idev);
2060 	if (rt)
2061 		dst_free(&rt->dst);
2062 
2063 	return ERR_PTR(err);
2064 }
2065 
2066 int ip6_route_add(struct fib6_config *cfg)
2067 {
2068 	struct mx6_config mxc = { .mx = NULL, };
2069 	struct rt6_info *rt;
2070 	int err;
2071 
2072 	rt = ip6_route_info_create(cfg);
2073 	if (IS_ERR(rt)) {
2074 		err = PTR_ERR(rt);
2075 		rt = NULL;
2076 		goto out;
2077 	}
2078 
2079 	err = ip6_convert_metrics(&mxc, cfg);
2080 	if (err)
2081 		goto out;
2082 
2083 	err = __ip6_ins_rt(rt, &cfg->fc_nlinfo, &mxc);
2084 
2085 	kfree(mxc.mx);
2086 
2087 	return err;
2088 out:
2089 	if (rt)
2090 		dst_free(&rt->dst);
2091 
2092 	return err;
2093 }
2094 
2095 static int __ip6_del_rt(struct rt6_info *rt, struct nl_info *info)
2096 {
2097 	int err;
2098 	struct fib6_table *table;
2099 	struct net *net = dev_net(rt->dst.dev);
2100 
2101 	if (rt == net->ipv6.ip6_null_entry ||
2102 	    rt->dst.flags & DST_NOCACHE) {
2103 		err = -ENOENT;
2104 		goto out;
2105 	}
2106 
2107 	table = rt->rt6i_table;
2108 	write_lock_bh(&table->tb6_lock);
2109 	err = fib6_del(rt, info);
2110 	write_unlock_bh(&table->tb6_lock);
2111 
2112 out:
2113 	ip6_rt_put(rt);
2114 	return err;
2115 }
2116 
2117 int ip6_del_rt(struct rt6_info *rt)
2118 {
2119 	struct nl_info info = {
2120 		.nl_net = dev_net(rt->dst.dev),
2121 	};
2122 	return __ip6_del_rt(rt, &info);
2123 }
2124 
2125 static int ip6_route_del(struct fib6_config *cfg)
2126 {
2127 	struct fib6_table *table;
2128 	struct fib6_node *fn;
2129 	struct rt6_info *rt;
2130 	int err = -ESRCH;
2131 
2132 	table = fib6_get_table(cfg->fc_nlinfo.nl_net, cfg->fc_table);
2133 	if (!table)
2134 		return err;
2135 
2136 	read_lock_bh(&table->tb6_lock);
2137 
2138 	fn = fib6_locate(&table->tb6_root,
2139 			 &cfg->fc_dst, cfg->fc_dst_len,
2140 			 &cfg->fc_src, cfg->fc_src_len);
2141 
2142 	if (fn) {
2143 		for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
2144 			if ((rt->rt6i_flags & RTF_CACHE) &&
2145 			    !(cfg->fc_flags & RTF_CACHE))
2146 				continue;
2147 			if (cfg->fc_ifindex &&
2148 			    (!rt->dst.dev ||
2149 			     rt->dst.dev->ifindex != cfg->fc_ifindex))
2150 				continue;
2151 			if (cfg->fc_flags & RTF_GATEWAY &&
2152 			    !ipv6_addr_equal(&cfg->fc_gateway, &rt->rt6i_gateway))
2153 				continue;
2154 			if (cfg->fc_metric && cfg->fc_metric != rt->rt6i_metric)
2155 				continue;
2156 			dst_hold(&rt->dst);
2157 			read_unlock_bh(&table->tb6_lock);
2158 
2159 			return __ip6_del_rt(rt, &cfg->fc_nlinfo);
2160 		}
2161 	}
2162 	read_unlock_bh(&table->tb6_lock);
2163 
2164 	return err;
2165 }
2166 
2167 static void rt6_do_redirect(struct dst_entry *dst, struct sock *sk, struct sk_buff *skb)
2168 {
2169 	struct netevent_redirect netevent;
2170 	struct rt6_info *rt, *nrt = NULL;
2171 	struct ndisc_options ndopts;
2172 	struct inet6_dev *in6_dev;
2173 	struct neighbour *neigh;
2174 	struct rd_msg *msg;
2175 	int optlen, on_link;
2176 	u8 *lladdr;
2177 
2178 	optlen = skb_tail_pointer(skb) - skb_transport_header(skb);
2179 	optlen -= sizeof(*msg);
2180 
2181 	if (optlen < 0) {
2182 		net_dbg_ratelimited("rt6_do_redirect: packet too short\n");
2183 		return;
2184 	}
2185 
2186 	msg = (struct rd_msg *)icmp6_hdr(skb);
2187 
2188 	if (ipv6_addr_is_multicast(&msg->dest)) {
2189 		net_dbg_ratelimited("rt6_do_redirect: destination address is multicast\n");
2190 		return;
2191 	}
2192 
2193 	on_link = 0;
2194 	if (ipv6_addr_equal(&msg->dest, &msg->target)) {
2195 		on_link = 1;
2196 	} else if (ipv6_addr_type(&msg->target) !=
2197 		   (IPV6_ADDR_UNICAST|IPV6_ADDR_LINKLOCAL)) {
2198 		net_dbg_ratelimited("rt6_do_redirect: target address is not link-local unicast\n");
2199 		return;
2200 	}
2201 
2202 	in6_dev = __in6_dev_get(skb->dev);
2203 	if (!in6_dev)
2204 		return;
2205 	if (in6_dev->cnf.forwarding || !in6_dev->cnf.accept_redirects)
2206 		return;
2207 
2208 	/* RFC2461 8.1:
2209 	 *	The IP source address of the Redirect MUST be the same as the current
2210 	 *	first-hop router for the specified ICMP Destination Address.
2211 	 */
2212 
2213 	if (!ndisc_parse_options(skb->dev, msg->opt, optlen, &ndopts)) {
2214 		net_dbg_ratelimited("rt6_redirect: invalid ND options\n");
2215 		return;
2216 	}
2217 
2218 	lladdr = NULL;
2219 	if (ndopts.nd_opts_tgt_lladdr) {
2220 		lladdr = ndisc_opt_addr_data(ndopts.nd_opts_tgt_lladdr,
2221 					     skb->dev);
2222 		if (!lladdr) {
2223 			net_dbg_ratelimited("rt6_redirect: invalid link-layer address length\n");
2224 			return;
2225 		}
2226 	}
2227 
2228 	rt = (struct rt6_info *) dst;
2229 	if (rt->rt6i_flags & RTF_REJECT) {
2230 		net_dbg_ratelimited("rt6_redirect: source isn't a valid nexthop for redirect target\n");
2231 		return;
2232 	}
2233 
2234 	/* Redirect received -> path was valid.
2235 	 * Look, redirects are sent only in response to data packets,
2236 	 * so that this nexthop apparently is reachable. --ANK
2237 	 */
2238 	dst_confirm(&rt->dst);
2239 
2240 	neigh = __neigh_lookup(&nd_tbl, &msg->target, skb->dev, 1);
2241 	if (!neigh)
2242 		return;
2243 
2244 	/*
2245 	 *	We have finally decided to accept it.
2246 	 */
2247 
2248 	ndisc_update(skb->dev, neigh, lladdr, NUD_STALE,
2249 		     NEIGH_UPDATE_F_WEAK_OVERRIDE|
2250 		     NEIGH_UPDATE_F_OVERRIDE|
2251 		     (on_link ? 0 : (NEIGH_UPDATE_F_OVERRIDE_ISROUTER|
2252 				     NEIGH_UPDATE_F_ISROUTER)),
2253 		     NDISC_REDIRECT, &ndopts);
2254 
2255 	nrt = ip6_rt_cache_alloc(rt, &msg->dest, NULL);
2256 	if (!nrt)
2257 		goto out;
2258 
2259 	nrt->rt6i_flags = RTF_GATEWAY|RTF_UP|RTF_DYNAMIC|RTF_CACHE;
2260 	if (on_link)
2261 		nrt->rt6i_flags &= ~RTF_GATEWAY;
2262 
2263 	nrt->rt6i_gateway = *(struct in6_addr *)neigh->primary_key;
2264 
2265 	if (ip6_ins_rt(nrt))
2266 		goto out;
2267 
2268 	netevent.old = &rt->dst;
2269 	netevent.new = &nrt->dst;
2270 	netevent.daddr = &msg->dest;
2271 	netevent.neigh = neigh;
2272 	call_netevent_notifiers(NETEVENT_REDIRECT, &netevent);
2273 
2274 	if (rt->rt6i_flags & RTF_CACHE) {
2275 		rt = (struct rt6_info *) dst_clone(&rt->dst);
2276 		ip6_del_rt(rt);
2277 	}
2278 
2279 out:
2280 	neigh_release(neigh);
2281 }
2282 
2283 /*
2284  *	Misc support functions
2285  */
2286 
2287 static void rt6_set_from(struct rt6_info *rt, struct rt6_info *from)
2288 {
2289 	BUG_ON(from->dst.from);
2290 
2291 	rt->rt6i_flags &= ~RTF_EXPIRES;
2292 	dst_hold(&from->dst);
2293 	rt->dst.from = &from->dst;
2294 	dst_init_metrics(&rt->dst, dst_metrics_ptr(&from->dst), true);
2295 }
2296 
2297 static void ip6_rt_copy_init(struct rt6_info *rt, struct rt6_info *ort)
2298 {
2299 	rt->dst.input = ort->dst.input;
2300 	rt->dst.output = ort->dst.output;
2301 	rt->rt6i_dst = ort->rt6i_dst;
2302 	rt->dst.error = ort->dst.error;
2303 	rt->rt6i_idev = ort->rt6i_idev;
2304 	if (rt->rt6i_idev)
2305 		in6_dev_hold(rt->rt6i_idev);
2306 	rt->dst.lastuse = jiffies;
2307 	rt->rt6i_gateway = ort->rt6i_gateway;
2308 	rt->rt6i_flags = ort->rt6i_flags;
2309 	rt6_set_from(rt, ort);
2310 	rt->rt6i_metric = ort->rt6i_metric;
2311 #ifdef CONFIG_IPV6_SUBTREES
2312 	rt->rt6i_src = ort->rt6i_src;
2313 #endif
2314 	rt->rt6i_prefsrc = ort->rt6i_prefsrc;
2315 	rt->rt6i_table = ort->rt6i_table;
2316 	rt->dst.lwtstate = lwtstate_get(ort->dst.lwtstate);
2317 }
2318 
2319 #ifdef CONFIG_IPV6_ROUTE_INFO
2320 static struct rt6_info *rt6_get_route_info(struct net *net,
2321 					   const struct in6_addr *prefix, int prefixlen,
2322 					   const struct in6_addr *gwaddr, int ifindex)
2323 {
2324 	struct fib6_node *fn;
2325 	struct rt6_info *rt = NULL;
2326 	struct fib6_table *table;
2327 
2328 	table = fib6_get_table(net, RT6_TABLE_INFO);
2329 	if (!table)
2330 		return NULL;
2331 
2332 	read_lock_bh(&table->tb6_lock);
2333 	fn = fib6_locate(&table->tb6_root, prefix, prefixlen, NULL, 0);
2334 	if (!fn)
2335 		goto out;
2336 
2337 	for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
2338 		if (rt->dst.dev->ifindex != ifindex)
2339 			continue;
2340 		if ((rt->rt6i_flags & (RTF_ROUTEINFO|RTF_GATEWAY)) != (RTF_ROUTEINFO|RTF_GATEWAY))
2341 			continue;
2342 		if (!ipv6_addr_equal(&rt->rt6i_gateway, gwaddr))
2343 			continue;
2344 		dst_hold(&rt->dst);
2345 		break;
2346 	}
2347 out:
2348 	read_unlock_bh(&table->tb6_lock);
2349 	return rt;
2350 }
2351 
2352 static struct rt6_info *rt6_add_route_info(struct net *net,
2353 					   const struct in6_addr *prefix, int prefixlen,
2354 					   const struct in6_addr *gwaddr, int ifindex,
2355 					   unsigned int pref)
2356 {
2357 	struct fib6_config cfg = {
2358 		.fc_metric	= IP6_RT_PRIO_USER,
2359 		.fc_ifindex	= ifindex,
2360 		.fc_dst_len	= prefixlen,
2361 		.fc_flags	= RTF_GATEWAY | RTF_ADDRCONF | RTF_ROUTEINFO |
2362 				  RTF_UP | RTF_PREF(pref),
2363 		.fc_nlinfo.portid = 0,
2364 		.fc_nlinfo.nlh = NULL,
2365 		.fc_nlinfo.nl_net = net,
2366 	};
2367 
2368 	cfg.fc_table = l3mdev_fib_table_by_index(net, ifindex) ? : RT6_TABLE_INFO;
2369 	cfg.fc_dst = *prefix;
2370 	cfg.fc_gateway = *gwaddr;
2371 
2372 	/* We should treat it as a default route if prefix length is 0. */
2373 	if (!prefixlen)
2374 		cfg.fc_flags |= RTF_DEFAULT;
2375 
2376 	ip6_route_add(&cfg);
2377 
2378 	return rt6_get_route_info(net, prefix, prefixlen, gwaddr, ifindex);
2379 }
2380 #endif
2381 
2382 struct rt6_info *rt6_get_dflt_router(const struct in6_addr *addr, struct net_device *dev)
2383 {
2384 	struct rt6_info *rt;
2385 	struct fib6_table *table;
2386 
2387 	table = fib6_get_table(dev_net(dev), RT6_TABLE_DFLT);
2388 	if (!table)
2389 		return NULL;
2390 
2391 	read_lock_bh(&table->tb6_lock);
2392 	for (rt = table->tb6_root.leaf; rt; rt = rt->dst.rt6_next) {
2393 		if (dev == rt->dst.dev &&
2394 		    ((rt->rt6i_flags & (RTF_ADDRCONF | RTF_DEFAULT)) == (RTF_ADDRCONF | RTF_DEFAULT)) &&
2395 		    ipv6_addr_equal(&rt->rt6i_gateway, addr))
2396 			break;
2397 	}
2398 	if (rt)
2399 		dst_hold(&rt->dst);
2400 	read_unlock_bh(&table->tb6_lock);
2401 	return rt;
2402 }
2403 
2404 struct rt6_info *rt6_add_dflt_router(const struct in6_addr *gwaddr,
2405 				     struct net_device *dev,
2406 				     unsigned int pref)
2407 {
2408 	struct fib6_config cfg = {
2409 		.fc_table	= l3mdev_fib_table(dev) ? : RT6_TABLE_DFLT,
2410 		.fc_metric	= IP6_RT_PRIO_USER,
2411 		.fc_ifindex	= dev->ifindex,
2412 		.fc_flags	= RTF_GATEWAY | RTF_ADDRCONF | RTF_DEFAULT |
2413 				  RTF_UP | RTF_EXPIRES | RTF_PREF(pref),
2414 		.fc_nlinfo.portid = 0,
2415 		.fc_nlinfo.nlh = NULL,
2416 		.fc_nlinfo.nl_net = dev_net(dev),
2417 	};
2418 
2419 	cfg.fc_gateway = *gwaddr;
2420 
2421 	ip6_route_add(&cfg);
2422 
2423 	return rt6_get_dflt_router(gwaddr, dev);
2424 }
2425 
2426 void rt6_purge_dflt_routers(struct net *net)
2427 {
2428 	struct rt6_info *rt;
2429 	struct fib6_table *table;
2430 
2431 	/* NOTE: Keep consistent with rt6_get_dflt_router */
2432 	table = fib6_get_table(net, RT6_TABLE_DFLT);
2433 	if (!table)
2434 		return;
2435 
2436 restart:
2437 	read_lock_bh(&table->tb6_lock);
2438 	for (rt = table->tb6_root.leaf; rt; rt = rt->dst.rt6_next) {
2439 		if (rt->rt6i_flags & (RTF_DEFAULT | RTF_ADDRCONF) &&
2440 		    (!rt->rt6i_idev || rt->rt6i_idev->cnf.accept_ra != 2)) {
2441 			dst_hold(&rt->dst);
2442 			read_unlock_bh(&table->tb6_lock);
2443 			ip6_del_rt(rt);
2444 			goto restart;
2445 		}
2446 	}
2447 	read_unlock_bh(&table->tb6_lock);
2448 }
2449 
2450 static void rtmsg_to_fib6_config(struct net *net,
2451 				 struct in6_rtmsg *rtmsg,
2452 				 struct fib6_config *cfg)
2453 {
2454 	memset(cfg, 0, sizeof(*cfg));
2455 
2456 	cfg->fc_table = l3mdev_fib_table_by_index(net, rtmsg->rtmsg_ifindex) ?
2457 			 : RT6_TABLE_MAIN;
2458 	cfg->fc_ifindex = rtmsg->rtmsg_ifindex;
2459 	cfg->fc_metric = rtmsg->rtmsg_metric;
2460 	cfg->fc_expires = rtmsg->rtmsg_info;
2461 	cfg->fc_dst_len = rtmsg->rtmsg_dst_len;
2462 	cfg->fc_src_len = rtmsg->rtmsg_src_len;
2463 	cfg->fc_flags = rtmsg->rtmsg_flags;
2464 
2465 	cfg->fc_nlinfo.nl_net = net;
2466 
2467 	cfg->fc_dst = rtmsg->rtmsg_dst;
2468 	cfg->fc_src = rtmsg->rtmsg_src;
2469 	cfg->fc_gateway = rtmsg->rtmsg_gateway;
2470 }
2471 
2472 int ipv6_route_ioctl(struct net *net, unsigned int cmd, void __user *arg)
2473 {
2474 	struct fib6_config cfg;
2475 	struct in6_rtmsg rtmsg;
2476 	int err;
2477 
2478 	switch (cmd) {
2479 	case SIOCADDRT:		/* Add a route */
2480 	case SIOCDELRT:		/* Delete a route */
2481 		if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
2482 			return -EPERM;
2483 		err = copy_from_user(&rtmsg, arg,
2484 				     sizeof(struct in6_rtmsg));
2485 		if (err)
2486 			return -EFAULT;
2487 
2488 		rtmsg_to_fib6_config(net, &rtmsg, &cfg);
2489 
2490 		rtnl_lock();
2491 		switch (cmd) {
2492 		case SIOCADDRT:
2493 			err = ip6_route_add(&cfg);
2494 			break;
2495 		case SIOCDELRT:
2496 			err = ip6_route_del(&cfg);
2497 			break;
2498 		default:
2499 			err = -EINVAL;
2500 		}
2501 		rtnl_unlock();
2502 
2503 		return err;
2504 	}
2505 
2506 	return -EINVAL;
2507 }
2508 
2509 /*
2510  *	Drop the packet on the floor
2511  */
2512 
2513 static int ip6_pkt_drop(struct sk_buff *skb, u8 code, int ipstats_mib_noroutes)
2514 {
2515 	int type;
2516 	struct dst_entry *dst = skb_dst(skb);
2517 	switch (ipstats_mib_noroutes) {
2518 	case IPSTATS_MIB_INNOROUTES:
2519 		type = ipv6_addr_type(&ipv6_hdr(skb)->daddr);
2520 		if (type == IPV6_ADDR_ANY) {
2521 			IP6_INC_STATS(dev_net(dst->dev), ip6_dst_idev(dst),
2522 				      IPSTATS_MIB_INADDRERRORS);
2523 			break;
2524 		}
2525 		/* FALLTHROUGH */
2526 	case IPSTATS_MIB_OUTNOROUTES:
2527 		IP6_INC_STATS(dev_net(dst->dev), ip6_dst_idev(dst),
2528 			      ipstats_mib_noroutes);
2529 		break;
2530 	}
2531 	icmpv6_send(skb, ICMPV6_DEST_UNREACH, code, 0);
2532 	kfree_skb(skb);
2533 	return 0;
2534 }
2535 
2536 static int ip6_pkt_discard(struct sk_buff *skb)
2537 {
2538 	return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_INNOROUTES);
2539 }
2540 
2541 static int ip6_pkt_discard_out(struct net *net, struct sock *sk, struct sk_buff *skb)
2542 {
2543 	skb->dev = skb_dst(skb)->dev;
2544 	return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_OUTNOROUTES);
2545 }
2546 
2547 static int ip6_pkt_prohibit(struct sk_buff *skb)
2548 {
2549 	return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_INNOROUTES);
2550 }
2551 
2552 static int ip6_pkt_prohibit_out(struct net *net, struct sock *sk, struct sk_buff *skb)
2553 {
2554 	skb->dev = skb_dst(skb)->dev;
2555 	return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_OUTNOROUTES);
2556 }
2557 
2558 /*
2559  *	Allocate a dst for local (unicast / anycast) address.
2560  */
2561 
2562 struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev,
2563 				    const struct in6_addr *addr,
2564 				    bool anycast)
2565 {
2566 	u32 tb_id;
2567 	struct net *net = dev_net(idev->dev);
2568 	struct rt6_info *rt = ip6_dst_alloc(net, net->loopback_dev,
2569 					    DST_NOCOUNT);
2570 	if (!rt)
2571 		return ERR_PTR(-ENOMEM);
2572 
2573 	in6_dev_hold(idev);
2574 
2575 	rt->dst.flags |= DST_HOST;
2576 	rt->dst.input = ip6_input;
2577 	rt->dst.output = ip6_output;
2578 	rt->rt6i_idev = idev;
2579 
2580 	rt->rt6i_flags = RTF_UP | RTF_NONEXTHOP;
2581 	if (anycast)
2582 		rt->rt6i_flags |= RTF_ANYCAST;
2583 	else
2584 		rt->rt6i_flags |= RTF_LOCAL;
2585 
2586 	rt->rt6i_gateway  = *addr;
2587 	rt->rt6i_dst.addr = *addr;
2588 	rt->rt6i_dst.plen = 128;
2589 	tb_id = l3mdev_fib_table(idev->dev) ? : RT6_TABLE_LOCAL;
2590 	rt->rt6i_table = fib6_get_table(net, tb_id);
2591 	rt->dst.flags |= DST_NOCACHE;
2592 
2593 	atomic_set(&rt->dst.__refcnt, 1);
2594 
2595 	return rt;
2596 }
2597 
2598 /* remove deleted ip from prefsrc entries */
2599 struct arg_dev_net_ip {
2600 	struct net_device *dev;
2601 	struct net *net;
2602 	struct in6_addr *addr;
2603 };
2604 
2605 static int fib6_remove_prefsrc(struct rt6_info *rt, void *arg)
2606 {
2607 	struct net_device *dev = ((struct arg_dev_net_ip *)arg)->dev;
2608 	struct net *net = ((struct arg_dev_net_ip *)arg)->net;
2609 	struct in6_addr *addr = ((struct arg_dev_net_ip *)arg)->addr;
2610 
2611 	if (((void *)rt->dst.dev == dev || !dev) &&
2612 	    rt != net->ipv6.ip6_null_entry &&
2613 	    ipv6_addr_equal(addr, &rt->rt6i_prefsrc.addr)) {
2614 		/* remove prefsrc entry */
2615 		rt->rt6i_prefsrc.plen = 0;
2616 	}
2617 	return 0;
2618 }
2619 
2620 void rt6_remove_prefsrc(struct inet6_ifaddr *ifp)
2621 {
2622 	struct net *net = dev_net(ifp->idev->dev);
2623 	struct arg_dev_net_ip adni = {
2624 		.dev = ifp->idev->dev,
2625 		.net = net,
2626 		.addr = &ifp->addr,
2627 	};
2628 	fib6_clean_all(net, fib6_remove_prefsrc, &adni);
2629 }
2630 
2631 #define RTF_RA_ROUTER		(RTF_ADDRCONF | RTF_DEFAULT | RTF_GATEWAY)
2632 #define RTF_CACHE_GATEWAY	(RTF_GATEWAY | RTF_CACHE)
2633 
2634 /* Remove routers and update dst entries when gateway turn into host. */
2635 static int fib6_clean_tohost(struct rt6_info *rt, void *arg)
2636 {
2637 	struct in6_addr *gateway = (struct in6_addr *)arg;
2638 
2639 	if ((((rt->rt6i_flags & RTF_RA_ROUTER) == RTF_RA_ROUTER) ||
2640 	     ((rt->rt6i_flags & RTF_CACHE_GATEWAY) == RTF_CACHE_GATEWAY)) &&
2641 	     ipv6_addr_equal(gateway, &rt->rt6i_gateway)) {
2642 		return -1;
2643 	}
2644 	return 0;
2645 }
2646 
2647 void rt6_clean_tohost(struct net *net, struct in6_addr *gateway)
2648 {
2649 	fib6_clean_all(net, fib6_clean_tohost, gateway);
2650 }
2651 
2652 struct arg_dev_net {
2653 	struct net_device *dev;
2654 	struct net *net;
2655 };
2656 
2657 static int fib6_ifdown(struct rt6_info *rt, void *arg)
2658 {
2659 	const struct arg_dev_net *adn = arg;
2660 	const struct net_device *dev = adn->dev;
2661 
2662 	if ((rt->dst.dev == dev || !dev) &&
2663 	    rt != adn->net->ipv6.ip6_null_entry)
2664 		return -1;
2665 
2666 	return 0;
2667 }
2668 
2669 void rt6_ifdown(struct net *net, struct net_device *dev)
2670 {
2671 	struct arg_dev_net adn = {
2672 		.dev = dev,
2673 		.net = net,
2674 	};
2675 
2676 	fib6_clean_all(net, fib6_ifdown, &adn);
2677 	icmp6_clean_all(fib6_ifdown, &adn);
2678 	if (dev)
2679 		rt6_uncached_list_flush_dev(net, dev);
2680 }
2681 
2682 struct rt6_mtu_change_arg {
2683 	struct net_device *dev;
2684 	unsigned int mtu;
2685 };
2686 
2687 static int rt6_mtu_change_route(struct rt6_info *rt, void *p_arg)
2688 {
2689 	struct rt6_mtu_change_arg *arg = (struct rt6_mtu_change_arg *) p_arg;
2690 	struct inet6_dev *idev;
2691 
2692 	/* In IPv6 pmtu discovery is not optional,
2693 	   so that RTAX_MTU lock cannot disable it.
2694 	   We still use this lock to block changes
2695 	   caused by addrconf/ndisc.
2696 	*/
2697 
2698 	idev = __in6_dev_get(arg->dev);
2699 	if (!idev)
2700 		return 0;
2701 
2702 	/* For administrative MTU increase, there is no way to discover
2703 	   IPv6 PMTU increase, so PMTU increase should be updated here.
2704 	   Since RFC 1981 doesn't include administrative MTU increase
2705 	   update PMTU increase is a MUST. (i.e. jumbo frame)
2706 	 */
2707 	/*
2708 	   If new MTU is less than route PMTU, this new MTU will be the
2709 	   lowest MTU in the path, update the route PMTU to reflect PMTU
2710 	   decreases; if new MTU is greater than route PMTU, and the
2711 	   old MTU is the lowest MTU in the path, update the route PMTU
2712 	   to reflect the increase. In this case if the other nodes' MTU
2713 	   also have the lowest MTU, TOO BIG MESSAGE will be lead to
2714 	   PMTU discouvery.
2715 	 */
2716 	if (rt->dst.dev == arg->dev &&
2717 	    !dst_metric_locked(&rt->dst, RTAX_MTU)) {
2718 		if (rt->rt6i_flags & RTF_CACHE) {
2719 			/* For RTF_CACHE with rt6i_pmtu == 0
2720 			 * (i.e. a redirected route),
2721 			 * the metrics of its rt->dst.from has already
2722 			 * been updated.
2723 			 */
2724 			if (rt->rt6i_pmtu && rt->rt6i_pmtu > arg->mtu)
2725 				rt->rt6i_pmtu = arg->mtu;
2726 		} else if (dst_mtu(&rt->dst) >= arg->mtu ||
2727 			   (dst_mtu(&rt->dst) < arg->mtu &&
2728 			    dst_mtu(&rt->dst) == idev->cnf.mtu6)) {
2729 			dst_metric_set(&rt->dst, RTAX_MTU, arg->mtu);
2730 		}
2731 	}
2732 	return 0;
2733 }
2734 
2735 void rt6_mtu_change(struct net_device *dev, unsigned int mtu)
2736 {
2737 	struct rt6_mtu_change_arg arg = {
2738 		.dev = dev,
2739 		.mtu = mtu,
2740 	};
2741 
2742 	fib6_clean_all(dev_net(dev), rt6_mtu_change_route, &arg);
2743 }
2744 
2745 static const struct nla_policy rtm_ipv6_policy[RTA_MAX+1] = {
2746 	[RTA_GATEWAY]           = { .len = sizeof(struct in6_addr) },
2747 	[RTA_OIF]               = { .type = NLA_U32 },
2748 	[RTA_IIF]		= { .type = NLA_U32 },
2749 	[RTA_PRIORITY]          = { .type = NLA_U32 },
2750 	[RTA_METRICS]           = { .type = NLA_NESTED },
2751 	[RTA_MULTIPATH]		= { .len = sizeof(struct rtnexthop) },
2752 	[RTA_PREF]              = { .type = NLA_U8 },
2753 	[RTA_ENCAP_TYPE]	= { .type = NLA_U16 },
2754 	[RTA_ENCAP]		= { .type = NLA_NESTED },
2755 	[RTA_EXPIRES]		= { .type = NLA_U32 },
2756 };
2757 
2758 static int rtm_to_fib6_config(struct sk_buff *skb, struct nlmsghdr *nlh,
2759 			      struct fib6_config *cfg)
2760 {
2761 	struct rtmsg *rtm;
2762 	struct nlattr *tb[RTA_MAX+1];
2763 	unsigned int pref;
2764 	int err;
2765 
2766 	err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy);
2767 	if (err < 0)
2768 		goto errout;
2769 
2770 	err = -EINVAL;
2771 	rtm = nlmsg_data(nlh);
2772 	memset(cfg, 0, sizeof(*cfg));
2773 
2774 	cfg->fc_table = rtm->rtm_table;
2775 	cfg->fc_dst_len = rtm->rtm_dst_len;
2776 	cfg->fc_src_len = rtm->rtm_src_len;
2777 	cfg->fc_flags = RTF_UP;
2778 	cfg->fc_protocol = rtm->rtm_protocol;
2779 	cfg->fc_type = rtm->rtm_type;
2780 
2781 	if (rtm->rtm_type == RTN_UNREACHABLE ||
2782 	    rtm->rtm_type == RTN_BLACKHOLE ||
2783 	    rtm->rtm_type == RTN_PROHIBIT ||
2784 	    rtm->rtm_type == RTN_THROW)
2785 		cfg->fc_flags |= RTF_REJECT;
2786 
2787 	if (rtm->rtm_type == RTN_LOCAL)
2788 		cfg->fc_flags |= RTF_LOCAL;
2789 
2790 	if (rtm->rtm_flags & RTM_F_CLONED)
2791 		cfg->fc_flags |= RTF_CACHE;
2792 
2793 	cfg->fc_nlinfo.portid = NETLINK_CB(skb).portid;
2794 	cfg->fc_nlinfo.nlh = nlh;
2795 	cfg->fc_nlinfo.nl_net = sock_net(skb->sk);
2796 
2797 	if (tb[RTA_GATEWAY]) {
2798 		cfg->fc_gateway = nla_get_in6_addr(tb[RTA_GATEWAY]);
2799 		cfg->fc_flags |= RTF_GATEWAY;
2800 	}
2801 
2802 	if (tb[RTA_DST]) {
2803 		int plen = (rtm->rtm_dst_len + 7) >> 3;
2804 
2805 		if (nla_len(tb[RTA_DST]) < plen)
2806 			goto errout;
2807 
2808 		nla_memcpy(&cfg->fc_dst, tb[RTA_DST], plen);
2809 	}
2810 
2811 	if (tb[RTA_SRC]) {
2812 		int plen = (rtm->rtm_src_len + 7) >> 3;
2813 
2814 		if (nla_len(tb[RTA_SRC]) < plen)
2815 			goto errout;
2816 
2817 		nla_memcpy(&cfg->fc_src, tb[RTA_SRC], plen);
2818 	}
2819 
2820 	if (tb[RTA_PREFSRC])
2821 		cfg->fc_prefsrc = nla_get_in6_addr(tb[RTA_PREFSRC]);
2822 
2823 	if (tb[RTA_OIF])
2824 		cfg->fc_ifindex = nla_get_u32(tb[RTA_OIF]);
2825 
2826 	if (tb[RTA_PRIORITY])
2827 		cfg->fc_metric = nla_get_u32(tb[RTA_PRIORITY]);
2828 
2829 	if (tb[RTA_METRICS]) {
2830 		cfg->fc_mx = nla_data(tb[RTA_METRICS]);
2831 		cfg->fc_mx_len = nla_len(tb[RTA_METRICS]);
2832 	}
2833 
2834 	if (tb[RTA_TABLE])
2835 		cfg->fc_table = nla_get_u32(tb[RTA_TABLE]);
2836 
2837 	if (tb[RTA_MULTIPATH]) {
2838 		cfg->fc_mp = nla_data(tb[RTA_MULTIPATH]);
2839 		cfg->fc_mp_len = nla_len(tb[RTA_MULTIPATH]);
2840 	}
2841 
2842 	if (tb[RTA_PREF]) {
2843 		pref = nla_get_u8(tb[RTA_PREF]);
2844 		if (pref != ICMPV6_ROUTER_PREF_LOW &&
2845 		    pref != ICMPV6_ROUTER_PREF_HIGH)
2846 			pref = ICMPV6_ROUTER_PREF_MEDIUM;
2847 		cfg->fc_flags |= RTF_PREF(pref);
2848 	}
2849 
2850 	if (tb[RTA_ENCAP])
2851 		cfg->fc_encap = tb[RTA_ENCAP];
2852 
2853 	if (tb[RTA_ENCAP_TYPE])
2854 		cfg->fc_encap_type = nla_get_u16(tb[RTA_ENCAP_TYPE]);
2855 
2856 	if (tb[RTA_EXPIRES]) {
2857 		unsigned long timeout = addrconf_timeout_fixup(nla_get_u32(tb[RTA_EXPIRES]), HZ);
2858 
2859 		if (addrconf_finite_timeout(timeout)) {
2860 			cfg->fc_expires = jiffies_to_clock_t(timeout * HZ);
2861 			cfg->fc_flags |= RTF_EXPIRES;
2862 		}
2863 	}
2864 
2865 	err = 0;
2866 errout:
2867 	return err;
2868 }
2869 
2870 struct rt6_nh {
2871 	struct rt6_info *rt6_info;
2872 	struct fib6_config r_cfg;
2873 	struct mx6_config mxc;
2874 	struct list_head next;
2875 };
2876 
2877 static void ip6_print_replace_route_err(struct list_head *rt6_nh_list)
2878 {
2879 	struct rt6_nh *nh;
2880 
2881 	list_for_each_entry(nh, rt6_nh_list, next) {
2882 		pr_warn("IPV6: multipath route replace failed (check consistency of installed routes): %pI6 nexthop %pI6 ifi %d\n",
2883 		        &nh->r_cfg.fc_dst, &nh->r_cfg.fc_gateway,
2884 		        nh->r_cfg.fc_ifindex);
2885 	}
2886 }
2887 
2888 static int ip6_route_info_append(struct list_head *rt6_nh_list,
2889 				 struct rt6_info *rt, struct fib6_config *r_cfg)
2890 {
2891 	struct rt6_nh *nh;
2892 	struct rt6_info *rtnh;
2893 	int err = -EEXIST;
2894 
2895 	list_for_each_entry(nh, rt6_nh_list, next) {
2896 		/* check if rt6_info already exists */
2897 		rtnh = nh->rt6_info;
2898 
2899 		if (rtnh->dst.dev == rt->dst.dev &&
2900 		    rtnh->rt6i_idev == rt->rt6i_idev &&
2901 		    ipv6_addr_equal(&rtnh->rt6i_gateway,
2902 				    &rt->rt6i_gateway))
2903 			return err;
2904 	}
2905 
2906 	nh = kzalloc(sizeof(*nh), GFP_KERNEL);
2907 	if (!nh)
2908 		return -ENOMEM;
2909 	nh->rt6_info = rt;
2910 	err = ip6_convert_metrics(&nh->mxc, r_cfg);
2911 	if (err) {
2912 		kfree(nh);
2913 		return err;
2914 	}
2915 	memcpy(&nh->r_cfg, r_cfg, sizeof(*r_cfg));
2916 	list_add_tail(&nh->next, rt6_nh_list);
2917 
2918 	return 0;
2919 }
2920 
2921 static int ip6_route_multipath_add(struct fib6_config *cfg)
2922 {
2923 	struct fib6_config r_cfg;
2924 	struct rtnexthop *rtnh;
2925 	struct rt6_info *rt;
2926 	struct rt6_nh *err_nh;
2927 	struct rt6_nh *nh, *nh_safe;
2928 	int remaining;
2929 	int attrlen;
2930 	int err = 1;
2931 	int nhn = 0;
2932 	int replace = (cfg->fc_nlinfo.nlh &&
2933 		       (cfg->fc_nlinfo.nlh->nlmsg_flags & NLM_F_REPLACE));
2934 	LIST_HEAD(rt6_nh_list);
2935 
2936 	remaining = cfg->fc_mp_len;
2937 	rtnh = (struct rtnexthop *)cfg->fc_mp;
2938 
2939 	/* Parse a Multipath Entry and build a list (rt6_nh_list) of
2940 	 * rt6_info structs per nexthop
2941 	 */
2942 	while (rtnh_ok(rtnh, remaining)) {
2943 		memcpy(&r_cfg, cfg, sizeof(*cfg));
2944 		if (rtnh->rtnh_ifindex)
2945 			r_cfg.fc_ifindex = rtnh->rtnh_ifindex;
2946 
2947 		attrlen = rtnh_attrlen(rtnh);
2948 		if (attrlen > 0) {
2949 			struct nlattr *nla, *attrs = rtnh_attrs(rtnh);
2950 
2951 			nla = nla_find(attrs, attrlen, RTA_GATEWAY);
2952 			if (nla) {
2953 				r_cfg.fc_gateway = nla_get_in6_addr(nla);
2954 				r_cfg.fc_flags |= RTF_GATEWAY;
2955 			}
2956 			r_cfg.fc_encap = nla_find(attrs, attrlen, RTA_ENCAP);
2957 			nla = nla_find(attrs, attrlen, RTA_ENCAP_TYPE);
2958 			if (nla)
2959 				r_cfg.fc_encap_type = nla_get_u16(nla);
2960 		}
2961 
2962 		rt = ip6_route_info_create(&r_cfg);
2963 		if (IS_ERR(rt)) {
2964 			err = PTR_ERR(rt);
2965 			rt = NULL;
2966 			goto cleanup;
2967 		}
2968 
2969 		err = ip6_route_info_append(&rt6_nh_list, rt, &r_cfg);
2970 		if (err) {
2971 			dst_free(&rt->dst);
2972 			goto cleanup;
2973 		}
2974 
2975 		rtnh = rtnh_next(rtnh, &remaining);
2976 	}
2977 
2978 	err_nh = NULL;
2979 	list_for_each_entry(nh, &rt6_nh_list, next) {
2980 		err = __ip6_ins_rt(nh->rt6_info, &cfg->fc_nlinfo, &nh->mxc);
2981 		/* nh->rt6_info is used or freed at this point, reset to NULL*/
2982 		nh->rt6_info = NULL;
2983 		if (err) {
2984 			if (replace && nhn)
2985 				ip6_print_replace_route_err(&rt6_nh_list);
2986 			err_nh = nh;
2987 			goto add_errout;
2988 		}
2989 
2990 		/* Because each route is added like a single route we remove
2991 		 * these flags after the first nexthop: if there is a collision,
2992 		 * we have already failed to add the first nexthop:
2993 		 * fib6_add_rt2node() has rejected it; when replacing, old
2994 		 * nexthops have been replaced by first new, the rest should
2995 		 * be added to it.
2996 		 */
2997 		cfg->fc_nlinfo.nlh->nlmsg_flags &= ~(NLM_F_EXCL |
2998 						     NLM_F_REPLACE);
2999 		nhn++;
3000 	}
3001 
3002 	goto cleanup;
3003 
3004 add_errout:
3005 	/* Delete routes that were already added */
3006 	list_for_each_entry(nh, &rt6_nh_list, next) {
3007 		if (err_nh == nh)
3008 			break;
3009 		ip6_route_del(&nh->r_cfg);
3010 	}
3011 
3012 cleanup:
3013 	list_for_each_entry_safe(nh, nh_safe, &rt6_nh_list, next) {
3014 		if (nh->rt6_info)
3015 			dst_free(&nh->rt6_info->dst);
3016 		kfree(nh->mxc.mx);
3017 		list_del(&nh->next);
3018 		kfree(nh);
3019 	}
3020 
3021 	return err;
3022 }
3023 
3024 static int ip6_route_multipath_del(struct fib6_config *cfg)
3025 {
3026 	struct fib6_config r_cfg;
3027 	struct rtnexthop *rtnh;
3028 	int remaining;
3029 	int attrlen;
3030 	int err = 1, last_err = 0;
3031 
3032 	remaining = cfg->fc_mp_len;
3033 	rtnh = (struct rtnexthop *)cfg->fc_mp;
3034 
3035 	/* Parse a Multipath Entry */
3036 	while (rtnh_ok(rtnh, remaining)) {
3037 		memcpy(&r_cfg, cfg, sizeof(*cfg));
3038 		if (rtnh->rtnh_ifindex)
3039 			r_cfg.fc_ifindex = rtnh->rtnh_ifindex;
3040 
3041 		attrlen = rtnh_attrlen(rtnh);
3042 		if (attrlen > 0) {
3043 			struct nlattr *nla, *attrs = rtnh_attrs(rtnh);
3044 
3045 			nla = nla_find(attrs, attrlen, RTA_GATEWAY);
3046 			if (nla) {
3047 				nla_memcpy(&r_cfg.fc_gateway, nla, 16);
3048 				r_cfg.fc_flags |= RTF_GATEWAY;
3049 			}
3050 		}
3051 		err = ip6_route_del(&r_cfg);
3052 		if (err)
3053 			last_err = err;
3054 
3055 		rtnh = rtnh_next(rtnh, &remaining);
3056 	}
3057 
3058 	return last_err;
3059 }
3060 
3061 static int inet6_rtm_delroute(struct sk_buff *skb, struct nlmsghdr *nlh)
3062 {
3063 	struct fib6_config cfg;
3064 	int err;
3065 
3066 	err = rtm_to_fib6_config(skb, nlh, &cfg);
3067 	if (err < 0)
3068 		return err;
3069 
3070 	if (cfg.fc_mp)
3071 		return ip6_route_multipath_del(&cfg);
3072 	else
3073 		return ip6_route_del(&cfg);
3074 }
3075 
3076 static int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr *nlh)
3077 {
3078 	struct fib6_config cfg;
3079 	int err;
3080 
3081 	err = rtm_to_fib6_config(skb, nlh, &cfg);
3082 	if (err < 0)
3083 		return err;
3084 
3085 	if (cfg.fc_mp)
3086 		return ip6_route_multipath_add(&cfg);
3087 	else
3088 		return ip6_route_add(&cfg);
3089 }
3090 
3091 static inline size_t rt6_nlmsg_size(struct rt6_info *rt)
3092 {
3093 	return NLMSG_ALIGN(sizeof(struct rtmsg))
3094 	       + nla_total_size(16) /* RTA_SRC */
3095 	       + nla_total_size(16) /* RTA_DST */
3096 	       + nla_total_size(16) /* RTA_GATEWAY */
3097 	       + nla_total_size(16) /* RTA_PREFSRC */
3098 	       + nla_total_size(4) /* RTA_TABLE */
3099 	       + nla_total_size(4) /* RTA_IIF */
3100 	       + nla_total_size(4) /* RTA_OIF */
3101 	       + nla_total_size(4) /* RTA_PRIORITY */
3102 	       + RTAX_MAX * nla_total_size(4) /* RTA_METRICS */
3103 	       + nla_total_size(sizeof(struct rta_cacheinfo))
3104 	       + nla_total_size(TCP_CA_NAME_MAX) /* RTAX_CC_ALGO */
3105 	       + nla_total_size(1) /* RTA_PREF */
3106 	       + lwtunnel_get_encap_size(rt->dst.lwtstate);
3107 }
3108 
3109 static int rt6_fill_node(struct net *net,
3110 			 struct sk_buff *skb, struct rt6_info *rt,
3111 			 struct in6_addr *dst, struct in6_addr *src,
3112 			 int iif, int type, u32 portid, u32 seq,
3113 			 int prefix, int nowait, unsigned int flags)
3114 {
3115 	u32 metrics[RTAX_MAX];
3116 	struct rtmsg *rtm;
3117 	struct nlmsghdr *nlh;
3118 	long expires;
3119 	u32 table;
3120 
3121 	if (prefix) {	/* user wants prefix routes only */
3122 		if (!(rt->rt6i_flags & RTF_PREFIX_RT)) {
3123 			/* success since this is not a prefix route */
3124 			return 1;
3125 		}
3126 	}
3127 
3128 	nlh = nlmsg_put(skb, portid, seq, type, sizeof(*rtm), flags);
3129 	if (!nlh)
3130 		return -EMSGSIZE;
3131 
3132 	rtm = nlmsg_data(nlh);
3133 	rtm->rtm_family = AF_INET6;
3134 	rtm->rtm_dst_len = rt->rt6i_dst.plen;
3135 	rtm->rtm_src_len = rt->rt6i_src.plen;
3136 	rtm->rtm_tos = 0;
3137 	if (rt->rt6i_table)
3138 		table = rt->rt6i_table->tb6_id;
3139 	else
3140 		table = RT6_TABLE_UNSPEC;
3141 	rtm->rtm_table = table;
3142 	if (nla_put_u32(skb, RTA_TABLE, table))
3143 		goto nla_put_failure;
3144 	if (rt->rt6i_flags & RTF_REJECT) {
3145 		switch (rt->dst.error) {
3146 		case -EINVAL:
3147 			rtm->rtm_type = RTN_BLACKHOLE;
3148 			break;
3149 		case -EACCES:
3150 			rtm->rtm_type = RTN_PROHIBIT;
3151 			break;
3152 		case -EAGAIN:
3153 			rtm->rtm_type = RTN_THROW;
3154 			break;
3155 		default:
3156 			rtm->rtm_type = RTN_UNREACHABLE;
3157 			break;
3158 		}
3159 	}
3160 	else if (rt->rt6i_flags & RTF_LOCAL)
3161 		rtm->rtm_type = RTN_LOCAL;
3162 	else if (rt->dst.dev && (rt->dst.dev->flags & IFF_LOOPBACK))
3163 		rtm->rtm_type = RTN_LOCAL;
3164 	else
3165 		rtm->rtm_type = RTN_UNICAST;
3166 	rtm->rtm_flags = 0;
3167 	if (!netif_carrier_ok(rt->dst.dev)) {
3168 		rtm->rtm_flags |= RTNH_F_LINKDOWN;
3169 		if (rt->rt6i_idev->cnf.ignore_routes_with_linkdown)
3170 			rtm->rtm_flags |= RTNH_F_DEAD;
3171 	}
3172 	rtm->rtm_scope = RT_SCOPE_UNIVERSE;
3173 	rtm->rtm_protocol = rt->rt6i_protocol;
3174 	if (rt->rt6i_flags & RTF_DYNAMIC)
3175 		rtm->rtm_protocol = RTPROT_REDIRECT;
3176 	else if (rt->rt6i_flags & RTF_ADDRCONF) {
3177 		if (rt->rt6i_flags & (RTF_DEFAULT | RTF_ROUTEINFO))
3178 			rtm->rtm_protocol = RTPROT_RA;
3179 		else
3180 			rtm->rtm_protocol = RTPROT_KERNEL;
3181 	}
3182 
3183 	if (rt->rt6i_flags & RTF_CACHE)
3184 		rtm->rtm_flags |= RTM_F_CLONED;
3185 
3186 	if (dst) {
3187 		if (nla_put_in6_addr(skb, RTA_DST, dst))
3188 			goto nla_put_failure;
3189 		rtm->rtm_dst_len = 128;
3190 	} else if (rtm->rtm_dst_len)
3191 		if (nla_put_in6_addr(skb, RTA_DST, &rt->rt6i_dst.addr))
3192 			goto nla_put_failure;
3193 #ifdef CONFIG_IPV6_SUBTREES
3194 	if (src) {
3195 		if (nla_put_in6_addr(skb, RTA_SRC, src))
3196 			goto nla_put_failure;
3197 		rtm->rtm_src_len = 128;
3198 	} else if (rtm->rtm_src_len &&
3199 		   nla_put_in6_addr(skb, RTA_SRC, &rt->rt6i_src.addr))
3200 		goto nla_put_failure;
3201 #endif
3202 	if (iif) {
3203 #ifdef CONFIG_IPV6_MROUTE
3204 		if (ipv6_addr_is_multicast(&rt->rt6i_dst.addr)) {
3205 			int err = ip6mr_get_route(net, skb, rtm, nowait,
3206 						  portid);
3207 
3208 			if (err <= 0) {
3209 				if (!nowait) {
3210 					if (err == 0)
3211 						return 0;
3212 					goto nla_put_failure;
3213 				} else {
3214 					if (err == -EMSGSIZE)
3215 						goto nla_put_failure;
3216 				}
3217 			}
3218 		} else
3219 #endif
3220 			if (nla_put_u32(skb, RTA_IIF, iif))
3221 				goto nla_put_failure;
3222 	} else if (dst) {
3223 		struct in6_addr saddr_buf;
3224 		if (ip6_route_get_saddr(net, rt, dst, 0, &saddr_buf) == 0 &&
3225 		    nla_put_in6_addr(skb, RTA_PREFSRC, &saddr_buf))
3226 			goto nla_put_failure;
3227 	}
3228 
3229 	if (rt->rt6i_prefsrc.plen) {
3230 		struct in6_addr saddr_buf;
3231 		saddr_buf = rt->rt6i_prefsrc.addr;
3232 		if (nla_put_in6_addr(skb, RTA_PREFSRC, &saddr_buf))
3233 			goto nla_put_failure;
3234 	}
3235 
3236 	memcpy(metrics, dst_metrics_ptr(&rt->dst), sizeof(metrics));
3237 	if (rt->rt6i_pmtu)
3238 		metrics[RTAX_MTU - 1] = rt->rt6i_pmtu;
3239 	if (rtnetlink_put_metrics(skb, metrics) < 0)
3240 		goto nla_put_failure;
3241 
3242 	if (rt->rt6i_flags & RTF_GATEWAY) {
3243 		if (nla_put_in6_addr(skb, RTA_GATEWAY, &rt->rt6i_gateway) < 0)
3244 			goto nla_put_failure;
3245 	}
3246 
3247 	if (rt->dst.dev &&
3248 	    nla_put_u32(skb, RTA_OIF, rt->dst.dev->ifindex))
3249 		goto nla_put_failure;
3250 	if (nla_put_u32(skb, RTA_PRIORITY, rt->rt6i_metric))
3251 		goto nla_put_failure;
3252 
3253 	expires = (rt->rt6i_flags & RTF_EXPIRES) ? rt->dst.expires - jiffies : 0;
3254 
3255 	if (rtnl_put_cacheinfo(skb, &rt->dst, 0, expires, rt->dst.error) < 0)
3256 		goto nla_put_failure;
3257 
3258 	if (nla_put_u8(skb, RTA_PREF, IPV6_EXTRACT_PREF(rt->rt6i_flags)))
3259 		goto nla_put_failure;
3260 
3261 	lwtunnel_fill_encap(skb, rt->dst.lwtstate);
3262 
3263 	nlmsg_end(skb, nlh);
3264 	return 0;
3265 
3266 nla_put_failure:
3267 	nlmsg_cancel(skb, nlh);
3268 	return -EMSGSIZE;
3269 }
3270 
3271 int rt6_dump_route(struct rt6_info *rt, void *p_arg)
3272 {
3273 	struct rt6_rtnl_dump_arg *arg = (struct rt6_rtnl_dump_arg *) p_arg;
3274 	int prefix;
3275 
3276 	if (nlmsg_len(arg->cb->nlh) >= sizeof(struct rtmsg)) {
3277 		struct rtmsg *rtm = nlmsg_data(arg->cb->nlh);
3278 		prefix = (rtm->rtm_flags & RTM_F_PREFIX) != 0;
3279 	} else
3280 		prefix = 0;
3281 
3282 	return rt6_fill_node(arg->net,
3283 		     arg->skb, rt, NULL, NULL, 0, RTM_NEWROUTE,
3284 		     NETLINK_CB(arg->cb->skb).portid, arg->cb->nlh->nlmsg_seq,
3285 		     prefix, 0, NLM_F_MULTI);
3286 }
3287 
3288 static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh)
3289 {
3290 	struct net *net = sock_net(in_skb->sk);
3291 	struct nlattr *tb[RTA_MAX+1];
3292 	struct rt6_info *rt;
3293 	struct sk_buff *skb;
3294 	struct rtmsg *rtm;
3295 	struct flowi6 fl6;
3296 	int err, iif = 0, oif = 0;
3297 
3298 	err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy);
3299 	if (err < 0)
3300 		goto errout;
3301 
3302 	err = -EINVAL;
3303 	memset(&fl6, 0, sizeof(fl6));
3304 	rtm = nlmsg_data(nlh);
3305 	fl6.flowlabel = ip6_make_flowinfo(rtm->rtm_tos, 0);
3306 
3307 	if (tb[RTA_SRC]) {
3308 		if (nla_len(tb[RTA_SRC]) < sizeof(struct in6_addr))
3309 			goto errout;
3310 
3311 		fl6.saddr = *(struct in6_addr *)nla_data(tb[RTA_SRC]);
3312 	}
3313 
3314 	if (tb[RTA_DST]) {
3315 		if (nla_len(tb[RTA_DST]) < sizeof(struct in6_addr))
3316 			goto errout;
3317 
3318 		fl6.daddr = *(struct in6_addr *)nla_data(tb[RTA_DST]);
3319 	}
3320 
3321 	if (tb[RTA_IIF])
3322 		iif = nla_get_u32(tb[RTA_IIF]);
3323 
3324 	if (tb[RTA_OIF])
3325 		oif = nla_get_u32(tb[RTA_OIF]);
3326 
3327 	if (tb[RTA_MARK])
3328 		fl6.flowi6_mark = nla_get_u32(tb[RTA_MARK]);
3329 
3330 	if (iif) {
3331 		struct net_device *dev;
3332 		int flags = 0;
3333 
3334 		dev = __dev_get_by_index(net, iif);
3335 		if (!dev) {
3336 			err = -ENODEV;
3337 			goto errout;
3338 		}
3339 
3340 		fl6.flowi6_iif = iif;
3341 
3342 		if (!ipv6_addr_any(&fl6.saddr))
3343 			flags |= RT6_LOOKUP_F_HAS_SADDR;
3344 
3345 		rt = (struct rt6_info *)ip6_route_input_lookup(net, dev, &fl6,
3346 							       flags);
3347 	} else {
3348 		fl6.flowi6_oif = oif;
3349 
3350 		if (netif_index_is_l3_master(net, oif)) {
3351 			fl6.flowi6_flags = FLOWI_FLAG_L3MDEV_SRC |
3352 					   FLOWI_FLAG_SKIP_NH_OIF;
3353 		}
3354 
3355 		rt = (struct rt6_info *)ip6_route_output(net, NULL, &fl6);
3356 	}
3357 
3358 	skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
3359 	if (!skb) {
3360 		ip6_rt_put(rt);
3361 		err = -ENOBUFS;
3362 		goto errout;
3363 	}
3364 
3365 	/* Reserve room for dummy headers, this skb can pass
3366 	   through good chunk of routing engine.
3367 	 */
3368 	skb_reset_mac_header(skb);
3369 	skb_reserve(skb, MAX_HEADER + sizeof(struct ipv6hdr));
3370 
3371 	skb_dst_set(skb, &rt->dst);
3372 
3373 	err = rt6_fill_node(net, skb, rt, &fl6.daddr, &fl6.saddr, iif,
3374 			    RTM_NEWROUTE, NETLINK_CB(in_skb).portid,
3375 			    nlh->nlmsg_seq, 0, 0, 0);
3376 	if (err < 0) {
3377 		kfree_skb(skb);
3378 		goto errout;
3379 	}
3380 
3381 	err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).portid);
3382 errout:
3383 	return err;
3384 }
3385 
3386 void inet6_rt_notify(int event, struct rt6_info *rt, struct nl_info *info,
3387 		     unsigned int nlm_flags)
3388 {
3389 	struct sk_buff *skb;
3390 	struct net *net = info->nl_net;
3391 	u32 seq;
3392 	int err;
3393 
3394 	err = -ENOBUFS;
3395 	seq = info->nlh ? info->nlh->nlmsg_seq : 0;
3396 
3397 	skb = nlmsg_new(rt6_nlmsg_size(rt), gfp_any());
3398 	if (!skb)
3399 		goto errout;
3400 
3401 	err = rt6_fill_node(net, skb, rt, NULL, NULL, 0,
3402 				event, info->portid, seq, 0, 0, nlm_flags);
3403 	if (err < 0) {
3404 		/* -EMSGSIZE implies BUG in rt6_nlmsg_size() */
3405 		WARN_ON(err == -EMSGSIZE);
3406 		kfree_skb(skb);
3407 		goto errout;
3408 	}
3409 	rtnl_notify(skb, net, info->portid, RTNLGRP_IPV6_ROUTE,
3410 		    info->nlh, gfp_any());
3411 	return;
3412 errout:
3413 	if (err < 0)
3414 		rtnl_set_sk_err(net, RTNLGRP_IPV6_ROUTE, err);
3415 }
3416 
3417 static int ip6_route_dev_notify(struct notifier_block *this,
3418 				unsigned long event, void *ptr)
3419 {
3420 	struct net_device *dev = netdev_notifier_info_to_dev(ptr);
3421 	struct net *net = dev_net(dev);
3422 
3423 	if (event == NETDEV_REGISTER && (dev->flags & IFF_LOOPBACK)) {
3424 		net->ipv6.ip6_null_entry->dst.dev = dev;
3425 		net->ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(dev);
3426 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
3427 		net->ipv6.ip6_prohibit_entry->dst.dev = dev;
3428 		net->ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(dev);
3429 		net->ipv6.ip6_blk_hole_entry->dst.dev = dev;
3430 		net->ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(dev);
3431 #endif
3432 	}
3433 
3434 	return NOTIFY_OK;
3435 }
3436 
3437 /*
3438  *	/proc
3439  */
3440 
3441 #ifdef CONFIG_PROC_FS
3442 
3443 static const struct file_operations ipv6_route_proc_fops = {
3444 	.owner		= THIS_MODULE,
3445 	.open		= ipv6_route_open,
3446 	.read		= seq_read,
3447 	.llseek		= seq_lseek,
3448 	.release	= seq_release_net,
3449 };
3450 
3451 static int rt6_stats_seq_show(struct seq_file *seq, void *v)
3452 {
3453 	struct net *net = (struct net *)seq->private;
3454 	seq_printf(seq, "%04x %04x %04x %04x %04x %04x %04x\n",
3455 		   net->ipv6.rt6_stats->fib_nodes,
3456 		   net->ipv6.rt6_stats->fib_route_nodes,
3457 		   net->ipv6.rt6_stats->fib_rt_alloc,
3458 		   net->ipv6.rt6_stats->fib_rt_entries,
3459 		   net->ipv6.rt6_stats->fib_rt_cache,
3460 		   dst_entries_get_slow(&net->ipv6.ip6_dst_ops),
3461 		   net->ipv6.rt6_stats->fib_discarded_routes);
3462 
3463 	return 0;
3464 }
3465 
3466 static int rt6_stats_seq_open(struct inode *inode, struct file *file)
3467 {
3468 	return single_open_net(inode, file, rt6_stats_seq_show);
3469 }
3470 
3471 static const struct file_operations rt6_stats_seq_fops = {
3472 	.owner	 = THIS_MODULE,
3473 	.open	 = rt6_stats_seq_open,
3474 	.read	 = seq_read,
3475 	.llseek	 = seq_lseek,
3476 	.release = single_release_net,
3477 };
3478 #endif	/* CONFIG_PROC_FS */
3479 
3480 #ifdef CONFIG_SYSCTL
3481 
3482 static
3483 int ipv6_sysctl_rtcache_flush(struct ctl_table *ctl, int write,
3484 			      void __user *buffer, size_t *lenp, loff_t *ppos)
3485 {
3486 	struct net *net;
3487 	int delay;
3488 	if (!write)
3489 		return -EINVAL;
3490 
3491 	net = (struct net *)ctl->extra1;
3492 	delay = net->ipv6.sysctl.flush_delay;
3493 	proc_dointvec(ctl, write, buffer, lenp, ppos);
3494 	fib6_run_gc(delay <= 0 ? 0 : (unsigned long)delay, net, delay > 0);
3495 	return 0;
3496 }
3497 
3498 struct ctl_table ipv6_route_table_template[] = {
3499 	{
3500 		.procname	=	"flush",
3501 		.data		=	&init_net.ipv6.sysctl.flush_delay,
3502 		.maxlen		=	sizeof(int),
3503 		.mode		=	0200,
3504 		.proc_handler	=	ipv6_sysctl_rtcache_flush
3505 	},
3506 	{
3507 		.procname	=	"gc_thresh",
3508 		.data		=	&ip6_dst_ops_template.gc_thresh,
3509 		.maxlen		=	sizeof(int),
3510 		.mode		=	0644,
3511 		.proc_handler	=	proc_dointvec,
3512 	},
3513 	{
3514 		.procname	=	"max_size",
3515 		.data		=	&init_net.ipv6.sysctl.ip6_rt_max_size,
3516 		.maxlen		=	sizeof(int),
3517 		.mode		=	0644,
3518 		.proc_handler	=	proc_dointvec,
3519 	},
3520 	{
3521 		.procname	=	"gc_min_interval",
3522 		.data		=	&init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
3523 		.maxlen		=	sizeof(int),
3524 		.mode		=	0644,
3525 		.proc_handler	=	proc_dointvec_jiffies,
3526 	},
3527 	{
3528 		.procname	=	"gc_timeout",
3529 		.data		=	&init_net.ipv6.sysctl.ip6_rt_gc_timeout,
3530 		.maxlen		=	sizeof(int),
3531 		.mode		=	0644,
3532 		.proc_handler	=	proc_dointvec_jiffies,
3533 	},
3534 	{
3535 		.procname	=	"gc_interval",
3536 		.data		=	&init_net.ipv6.sysctl.ip6_rt_gc_interval,
3537 		.maxlen		=	sizeof(int),
3538 		.mode		=	0644,
3539 		.proc_handler	=	proc_dointvec_jiffies,
3540 	},
3541 	{
3542 		.procname	=	"gc_elasticity",
3543 		.data		=	&init_net.ipv6.sysctl.ip6_rt_gc_elasticity,
3544 		.maxlen		=	sizeof(int),
3545 		.mode		=	0644,
3546 		.proc_handler	=	proc_dointvec,
3547 	},
3548 	{
3549 		.procname	=	"mtu_expires",
3550 		.data		=	&init_net.ipv6.sysctl.ip6_rt_mtu_expires,
3551 		.maxlen		=	sizeof(int),
3552 		.mode		=	0644,
3553 		.proc_handler	=	proc_dointvec_jiffies,
3554 	},
3555 	{
3556 		.procname	=	"min_adv_mss",
3557 		.data		=	&init_net.ipv6.sysctl.ip6_rt_min_advmss,
3558 		.maxlen		=	sizeof(int),
3559 		.mode		=	0644,
3560 		.proc_handler	=	proc_dointvec,
3561 	},
3562 	{
3563 		.procname	=	"gc_min_interval_ms",
3564 		.data		=	&init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
3565 		.maxlen		=	sizeof(int),
3566 		.mode		=	0644,
3567 		.proc_handler	=	proc_dointvec_ms_jiffies,
3568 	},
3569 	{ }
3570 };
3571 
3572 struct ctl_table * __net_init ipv6_route_sysctl_init(struct net *net)
3573 {
3574 	struct ctl_table *table;
3575 
3576 	table = kmemdup(ipv6_route_table_template,
3577 			sizeof(ipv6_route_table_template),
3578 			GFP_KERNEL);
3579 
3580 	if (table) {
3581 		table[0].data = &net->ipv6.sysctl.flush_delay;
3582 		table[0].extra1 = net;
3583 		table[1].data = &net->ipv6.ip6_dst_ops.gc_thresh;
3584 		table[2].data = &net->ipv6.sysctl.ip6_rt_max_size;
3585 		table[3].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
3586 		table[4].data = &net->ipv6.sysctl.ip6_rt_gc_timeout;
3587 		table[5].data = &net->ipv6.sysctl.ip6_rt_gc_interval;
3588 		table[6].data = &net->ipv6.sysctl.ip6_rt_gc_elasticity;
3589 		table[7].data = &net->ipv6.sysctl.ip6_rt_mtu_expires;
3590 		table[8].data = &net->ipv6.sysctl.ip6_rt_min_advmss;
3591 		table[9].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
3592 
3593 		/* Don't export sysctls to unprivileged users */
3594 		if (net->user_ns != &init_user_ns)
3595 			table[0].procname = NULL;
3596 	}
3597 
3598 	return table;
3599 }
3600 #endif
3601 
3602 static int __net_init ip6_route_net_init(struct net *net)
3603 {
3604 	int ret = -ENOMEM;
3605 
3606 	memcpy(&net->ipv6.ip6_dst_ops, &ip6_dst_ops_template,
3607 	       sizeof(net->ipv6.ip6_dst_ops));
3608 
3609 	if (dst_entries_init(&net->ipv6.ip6_dst_ops) < 0)
3610 		goto out_ip6_dst_ops;
3611 
3612 	net->ipv6.ip6_null_entry = kmemdup(&ip6_null_entry_template,
3613 					   sizeof(*net->ipv6.ip6_null_entry),
3614 					   GFP_KERNEL);
3615 	if (!net->ipv6.ip6_null_entry)
3616 		goto out_ip6_dst_entries;
3617 	net->ipv6.ip6_null_entry->dst.path =
3618 		(struct dst_entry *)net->ipv6.ip6_null_entry;
3619 	net->ipv6.ip6_null_entry->dst.ops = &net->ipv6.ip6_dst_ops;
3620 	dst_init_metrics(&net->ipv6.ip6_null_entry->dst,
3621 			 ip6_template_metrics, true);
3622 
3623 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
3624 	net->ipv6.ip6_prohibit_entry = kmemdup(&ip6_prohibit_entry_template,
3625 					       sizeof(*net->ipv6.ip6_prohibit_entry),
3626 					       GFP_KERNEL);
3627 	if (!net->ipv6.ip6_prohibit_entry)
3628 		goto out_ip6_null_entry;
3629 	net->ipv6.ip6_prohibit_entry->dst.path =
3630 		(struct dst_entry *)net->ipv6.ip6_prohibit_entry;
3631 	net->ipv6.ip6_prohibit_entry->dst.ops = &net->ipv6.ip6_dst_ops;
3632 	dst_init_metrics(&net->ipv6.ip6_prohibit_entry->dst,
3633 			 ip6_template_metrics, true);
3634 
3635 	net->ipv6.ip6_blk_hole_entry = kmemdup(&ip6_blk_hole_entry_template,
3636 					       sizeof(*net->ipv6.ip6_blk_hole_entry),
3637 					       GFP_KERNEL);
3638 	if (!net->ipv6.ip6_blk_hole_entry)
3639 		goto out_ip6_prohibit_entry;
3640 	net->ipv6.ip6_blk_hole_entry->dst.path =
3641 		(struct dst_entry *)net->ipv6.ip6_blk_hole_entry;
3642 	net->ipv6.ip6_blk_hole_entry->dst.ops = &net->ipv6.ip6_dst_ops;
3643 	dst_init_metrics(&net->ipv6.ip6_blk_hole_entry->dst,
3644 			 ip6_template_metrics, true);
3645 #endif
3646 
3647 	net->ipv6.sysctl.flush_delay = 0;
3648 	net->ipv6.sysctl.ip6_rt_max_size = 4096;
3649 	net->ipv6.sysctl.ip6_rt_gc_min_interval = HZ / 2;
3650 	net->ipv6.sysctl.ip6_rt_gc_timeout = 60*HZ;
3651 	net->ipv6.sysctl.ip6_rt_gc_interval = 30*HZ;
3652 	net->ipv6.sysctl.ip6_rt_gc_elasticity = 9;
3653 	net->ipv6.sysctl.ip6_rt_mtu_expires = 10*60*HZ;
3654 	net->ipv6.sysctl.ip6_rt_min_advmss = IPV6_MIN_MTU - 20 - 40;
3655 
3656 	net->ipv6.ip6_rt_gc_expire = 30*HZ;
3657 
3658 	ret = 0;
3659 out:
3660 	return ret;
3661 
3662 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
3663 out_ip6_prohibit_entry:
3664 	kfree(net->ipv6.ip6_prohibit_entry);
3665 out_ip6_null_entry:
3666 	kfree(net->ipv6.ip6_null_entry);
3667 #endif
3668 out_ip6_dst_entries:
3669 	dst_entries_destroy(&net->ipv6.ip6_dst_ops);
3670 out_ip6_dst_ops:
3671 	goto out;
3672 }
3673 
3674 static void __net_exit ip6_route_net_exit(struct net *net)
3675 {
3676 	kfree(net->ipv6.ip6_null_entry);
3677 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
3678 	kfree(net->ipv6.ip6_prohibit_entry);
3679 	kfree(net->ipv6.ip6_blk_hole_entry);
3680 #endif
3681 	dst_entries_destroy(&net->ipv6.ip6_dst_ops);
3682 }
3683 
3684 static int __net_init ip6_route_net_init_late(struct net *net)
3685 {
3686 #ifdef CONFIG_PROC_FS
3687 	proc_create("ipv6_route", 0, net->proc_net, &ipv6_route_proc_fops);
3688 	proc_create("rt6_stats", S_IRUGO, net->proc_net, &rt6_stats_seq_fops);
3689 #endif
3690 	return 0;
3691 }
3692 
3693 static void __net_exit ip6_route_net_exit_late(struct net *net)
3694 {
3695 #ifdef CONFIG_PROC_FS
3696 	remove_proc_entry("ipv6_route", net->proc_net);
3697 	remove_proc_entry("rt6_stats", net->proc_net);
3698 #endif
3699 }
3700 
3701 static struct pernet_operations ip6_route_net_ops = {
3702 	.init = ip6_route_net_init,
3703 	.exit = ip6_route_net_exit,
3704 };
3705 
3706 static int __net_init ipv6_inetpeer_init(struct net *net)
3707 {
3708 	struct inet_peer_base *bp = kmalloc(sizeof(*bp), GFP_KERNEL);
3709 
3710 	if (!bp)
3711 		return -ENOMEM;
3712 	inet_peer_base_init(bp);
3713 	net->ipv6.peers = bp;
3714 	return 0;
3715 }
3716 
3717 static void __net_exit ipv6_inetpeer_exit(struct net *net)
3718 {
3719 	struct inet_peer_base *bp = net->ipv6.peers;
3720 
3721 	net->ipv6.peers = NULL;
3722 	inetpeer_invalidate_tree(bp);
3723 	kfree(bp);
3724 }
3725 
3726 static struct pernet_operations ipv6_inetpeer_ops = {
3727 	.init	=	ipv6_inetpeer_init,
3728 	.exit	=	ipv6_inetpeer_exit,
3729 };
3730 
3731 static struct pernet_operations ip6_route_net_late_ops = {
3732 	.init = ip6_route_net_init_late,
3733 	.exit = ip6_route_net_exit_late,
3734 };
3735 
3736 static struct notifier_block ip6_route_dev_notifier = {
3737 	.notifier_call = ip6_route_dev_notify,
3738 	.priority = 0,
3739 };
3740 
3741 int __init ip6_route_init(void)
3742 {
3743 	int ret;
3744 	int cpu;
3745 
3746 	ret = -ENOMEM;
3747 	ip6_dst_ops_template.kmem_cachep =
3748 		kmem_cache_create("ip6_dst_cache", sizeof(struct rt6_info), 0,
3749 				  SLAB_HWCACHE_ALIGN, NULL);
3750 	if (!ip6_dst_ops_template.kmem_cachep)
3751 		goto out;
3752 
3753 	ret = dst_entries_init(&ip6_dst_blackhole_ops);
3754 	if (ret)
3755 		goto out_kmem_cache;
3756 
3757 	ret = register_pernet_subsys(&ipv6_inetpeer_ops);
3758 	if (ret)
3759 		goto out_dst_entries;
3760 
3761 	ret = register_pernet_subsys(&ip6_route_net_ops);
3762 	if (ret)
3763 		goto out_register_inetpeer;
3764 
3765 	ip6_dst_blackhole_ops.kmem_cachep = ip6_dst_ops_template.kmem_cachep;
3766 
3767 	/* Registering of the loopback is done before this portion of code,
3768 	 * the loopback reference in rt6_info will not be taken, do it
3769 	 * manually for init_net */
3770 	init_net.ipv6.ip6_null_entry->dst.dev = init_net.loopback_dev;
3771 	init_net.ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
3772   #ifdef CONFIG_IPV6_MULTIPLE_TABLES
3773 	init_net.ipv6.ip6_prohibit_entry->dst.dev = init_net.loopback_dev;
3774 	init_net.ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
3775 	init_net.ipv6.ip6_blk_hole_entry->dst.dev = init_net.loopback_dev;
3776 	init_net.ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
3777   #endif
3778 	ret = fib6_init();
3779 	if (ret)
3780 		goto out_register_subsys;
3781 
3782 	ret = xfrm6_init();
3783 	if (ret)
3784 		goto out_fib6_init;
3785 
3786 	ret = fib6_rules_init();
3787 	if (ret)
3788 		goto xfrm6_init;
3789 
3790 	ret = register_pernet_subsys(&ip6_route_net_late_ops);
3791 	if (ret)
3792 		goto fib6_rules_init;
3793 
3794 	ret = -ENOBUFS;
3795 	if (__rtnl_register(PF_INET6, RTM_NEWROUTE, inet6_rtm_newroute, NULL, NULL) ||
3796 	    __rtnl_register(PF_INET6, RTM_DELROUTE, inet6_rtm_delroute, NULL, NULL) ||
3797 	    __rtnl_register(PF_INET6, RTM_GETROUTE, inet6_rtm_getroute, NULL, NULL))
3798 		goto out_register_late_subsys;
3799 
3800 	ret = register_netdevice_notifier(&ip6_route_dev_notifier);
3801 	if (ret)
3802 		goto out_register_late_subsys;
3803 
3804 	for_each_possible_cpu(cpu) {
3805 		struct uncached_list *ul = per_cpu_ptr(&rt6_uncached_list, cpu);
3806 
3807 		INIT_LIST_HEAD(&ul->head);
3808 		spin_lock_init(&ul->lock);
3809 	}
3810 
3811 out:
3812 	return ret;
3813 
3814 out_register_late_subsys:
3815 	unregister_pernet_subsys(&ip6_route_net_late_ops);
3816 fib6_rules_init:
3817 	fib6_rules_cleanup();
3818 xfrm6_init:
3819 	xfrm6_fini();
3820 out_fib6_init:
3821 	fib6_gc_cleanup();
3822 out_register_subsys:
3823 	unregister_pernet_subsys(&ip6_route_net_ops);
3824 out_register_inetpeer:
3825 	unregister_pernet_subsys(&ipv6_inetpeer_ops);
3826 out_dst_entries:
3827 	dst_entries_destroy(&ip6_dst_blackhole_ops);
3828 out_kmem_cache:
3829 	kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);
3830 	goto out;
3831 }
3832 
3833 void ip6_route_cleanup(void)
3834 {
3835 	unregister_netdevice_notifier(&ip6_route_dev_notifier);
3836 	unregister_pernet_subsys(&ip6_route_net_late_ops);
3837 	fib6_rules_cleanup();
3838 	xfrm6_fini();
3839 	fib6_gc_cleanup();
3840 	unregister_pernet_subsys(&ipv6_inetpeer_ops);
3841 	unregister_pernet_subsys(&ip6_route_net_ops);
3842 	dst_entries_destroy(&ip6_dst_blackhole_ops);
3843 	kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);
3844 }
3845