xref: /linux/net/ipv6/route.c (revision cd65cd95128781ca59d06611270fcbd9b4a7cf8d)
1 /*
2  *	Linux INET6 implementation
3  *	FIB front-end.
4  *
5  *	Authors:
6  *	Pedro Roque		<roque@di.fc.ul.pt>
7  *
8  *	This program is free software; you can redistribute it and/or
9  *      modify it under the terms of the GNU General Public License
10  *      as published by the Free Software Foundation; either version
11  *      2 of the License, or (at your option) any later version.
12  */
13 
14 /*	Changes:
15  *
16  *	YOSHIFUJI Hideaki @USAGI
17  *		reworked default router selection.
18  *		- respect outgoing interface
19  *		- select from (probably) reachable routers (i.e.
20  *		routers in REACHABLE, STALE, DELAY or PROBE states).
21  *		- always select the same router if it is (probably)
22  *		reachable.  otherwise, round-robin the list.
23  *	Ville Nuorvala
24  *		Fixed routing subtrees.
25  */
26 
27 #define pr_fmt(fmt) "IPv6: " fmt
28 
29 #include <linux/capability.h>
30 #include <linux/errno.h>
31 #include <linux/export.h>
32 #include <linux/types.h>
33 #include <linux/times.h>
34 #include <linux/socket.h>
35 #include <linux/sockios.h>
36 #include <linux/net.h>
37 #include <linux/route.h>
38 #include <linux/netdevice.h>
39 #include <linux/in6.h>
40 #include <linux/mroute6.h>
41 #include <linux/init.h>
42 #include <linux/if_arp.h>
43 #include <linux/proc_fs.h>
44 #include <linux/seq_file.h>
45 #include <linux/nsproxy.h>
46 #include <linux/slab.h>
47 #include <linux/jhash.h>
48 #include <net/net_namespace.h>
49 #include <net/snmp.h>
50 #include <net/ipv6.h>
51 #include <net/ip6_fib.h>
52 #include <net/ip6_route.h>
53 #include <net/ndisc.h>
54 #include <net/addrconf.h>
55 #include <net/tcp.h>
56 #include <linux/rtnetlink.h>
57 #include <net/dst.h>
58 #include <net/dst_metadata.h>
59 #include <net/xfrm.h>
60 #include <net/netevent.h>
61 #include <net/netlink.h>
62 #include <net/nexthop.h>
63 #include <net/lwtunnel.h>
64 #include <net/ip_tunnels.h>
65 #include <net/l3mdev.h>
66 #include <trace/events/fib6.h>
67 
68 #include <linux/uaccess.h>
69 
70 #ifdef CONFIG_SYSCTL
71 #include <linux/sysctl.h>
72 #endif
73 
74 enum rt6_nud_state {
75 	RT6_NUD_FAIL_HARD = -3,
76 	RT6_NUD_FAIL_PROBE = -2,
77 	RT6_NUD_FAIL_DO_RR = -1,
78 	RT6_NUD_SUCCEED = 1
79 };
80 
81 static struct dst_entry	*ip6_dst_check(struct dst_entry *dst, u32 cookie);
82 static unsigned int	 ip6_default_advmss(const struct dst_entry *dst);
83 static unsigned int	 ip6_mtu(const struct dst_entry *dst);
84 static struct dst_entry *ip6_negative_advice(struct dst_entry *);
85 static void		ip6_dst_destroy(struct dst_entry *);
86 static void		ip6_dst_ifdown(struct dst_entry *,
87 				       struct net_device *dev, int how);
88 static int		 ip6_dst_gc(struct dst_ops *ops);
89 
90 static int		ip6_pkt_discard(struct sk_buff *skb);
91 static int		ip6_pkt_discard_out(struct net *net, struct sock *sk, struct sk_buff *skb);
92 static int		ip6_pkt_prohibit(struct sk_buff *skb);
93 static int		ip6_pkt_prohibit_out(struct net *net, struct sock *sk, struct sk_buff *skb);
94 static void		ip6_link_failure(struct sk_buff *skb);
95 static void		ip6_rt_update_pmtu(struct dst_entry *dst, struct sock *sk,
96 					   struct sk_buff *skb, u32 mtu);
97 static void		rt6_do_redirect(struct dst_entry *dst, struct sock *sk,
98 					struct sk_buff *skb);
99 static int rt6_score_route(struct fib6_info *rt, int oif, int strict);
100 static size_t rt6_nlmsg_size(struct fib6_info *rt);
101 static int rt6_fill_node(struct net *net, struct sk_buff *skb,
102 			 struct fib6_info *rt, struct dst_entry *dst,
103 			 struct in6_addr *dest, struct in6_addr *src,
104 			 int iif, int type, u32 portid, u32 seq,
105 			 unsigned int flags);
106 static struct rt6_info *rt6_find_cached_rt(struct fib6_info *rt,
107 					   struct in6_addr *daddr,
108 					   struct in6_addr *saddr);
109 
110 #ifdef CONFIG_IPV6_ROUTE_INFO
111 static struct fib6_info *rt6_add_route_info(struct net *net,
112 					   const struct in6_addr *prefix, int prefixlen,
113 					   const struct in6_addr *gwaddr,
114 					   struct net_device *dev,
115 					   unsigned int pref);
116 static struct fib6_info *rt6_get_route_info(struct net *net,
117 					   const struct in6_addr *prefix, int prefixlen,
118 					   const struct in6_addr *gwaddr,
119 					   struct net_device *dev);
120 #endif
121 
122 struct uncached_list {
123 	spinlock_t		lock;
124 	struct list_head	head;
125 };
126 
127 static DEFINE_PER_CPU_ALIGNED(struct uncached_list, rt6_uncached_list);
128 
129 void rt6_uncached_list_add(struct rt6_info *rt)
130 {
131 	struct uncached_list *ul = raw_cpu_ptr(&rt6_uncached_list);
132 
133 	rt->rt6i_uncached_list = ul;
134 
135 	spin_lock_bh(&ul->lock);
136 	list_add_tail(&rt->rt6i_uncached, &ul->head);
137 	spin_unlock_bh(&ul->lock);
138 }
139 
140 void rt6_uncached_list_del(struct rt6_info *rt)
141 {
142 	if (!list_empty(&rt->rt6i_uncached)) {
143 		struct uncached_list *ul = rt->rt6i_uncached_list;
144 		struct net *net = dev_net(rt->dst.dev);
145 
146 		spin_lock_bh(&ul->lock);
147 		list_del(&rt->rt6i_uncached);
148 		atomic_dec(&net->ipv6.rt6_stats->fib_rt_uncache);
149 		spin_unlock_bh(&ul->lock);
150 	}
151 }
152 
153 static void rt6_uncached_list_flush_dev(struct net *net, struct net_device *dev)
154 {
155 	struct net_device *loopback_dev = net->loopback_dev;
156 	int cpu;
157 
158 	if (dev == loopback_dev)
159 		return;
160 
161 	for_each_possible_cpu(cpu) {
162 		struct uncached_list *ul = per_cpu_ptr(&rt6_uncached_list, cpu);
163 		struct rt6_info *rt;
164 
165 		spin_lock_bh(&ul->lock);
166 		list_for_each_entry(rt, &ul->head, rt6i_uncached) {
167 			struct inet6_dev *rt_idev = rt->rt6i_idev;
168 			struct net_device *rt_dev = rt->dst.dev;
169 
170 			if (rt_idev->dev == dev) {
171 				rt->rt6i_idev = in6_dev_get(loopback_dev);
172 				in6_dev_put(rt_idev);
173 			}
174 
175 			if (rt_dev == dev) {
176 				rt->dst.dev = loopback_dev;
177 				dev_hold(rt->dst.dev);
178 				dev_put(rt_dev);
179 			}
180 		}
181 		spin_unlock_bh(&ul->lock);
182 	}
183 }
184 
185 static inline const void *choose_neigh_daddr(const struct in6_addr *p,
186 					     struct sk_buff *skb,
187 					     const void *daddr)
188 {
189 	if (!ipv6_addr_any(p))
190 		return (const void *) p;
191 	else if (skb)
192 		return &ipv6_hdr(skb)->daddr;
193 	return daddr;
194 }
195 
196 struct neighbour *ip6_neigh_lookup(const struct in6_addr *gw,
197 				   struct net_device *dev,
198 				   struct sk_buff *skb,
199 				   const void *daddr)
200 {
201 	struct neighbour *n;
202 
203 	daddr = choose_neigh_daddr(gw, skb, daddr);
204 	n = __ipv6_neigh_lookup(dev, daddr);
205 	if (n)
206 		return n;
207 	return neigh_create(&nd_tbl, daddr, dev);
208 }
209 
210 static struct neighbour *ip6_dst_neigh_lookup(const struct dst_entry *dst,
211 					      struct sk_buff *skb,
212 					      const void *daddr)
213 {
214 	const struct rt6_info *rt = container_of(dst, struct rt6_info, dst);
215 
216 	return ip6_neigh_lookup(&rt->rt6i_gateway, dst->dev, skb, daddr);
217 }
218 
219 static void ip6_confirm_neigh(const struct dst_entry *dst, const void *daddr)
220 {
221 	struct net_device *dev = dst->dev;
222 	struct rt6_info *rt = (struct rt6_info *)dst;
223 
224 	daddr = choose_neigh_daddr(&rt->rt6i_gateway, NULL, daddr);
225 	if (!daddr)
226 		return;
227 	if (dev->flags & (IFF_NOARP | IFF_LOOPBACK))
228 		return;
229 	if (ipv6_addr_is_multicast((const struct in6_addr *)daddr))
230 		return;
231 	__ipv6_confirm_neigh(dev, daddr);
232 }
233 
234 static struct dst_ops ip6_dst_ops_template = {
235 	.family			=	AF_INET6,
236 	.gc			=	ip6_dst_gc,
237 	.gc_thresh		=	1024,
238 	.check			=	ip6_dst_check,
239 	.default_advmss		=	ip6_default_advmss,
240 	.mtu			=	ip6_mtu,
241 	.cow_metrics		=	dst_cow_metrics_generic,
242 	.destroy		=	ip6_dst_destroy,
243 	.ifdown			=	ip6_dst_ifdown,
244 	.negative_advice	=	ip6_negative_advice,
245 	.link_failure		=	ip6_link_failure,
246 	.update_pmtu		=	ip6_rt_update_pmtu,
247 	.redirect		=	rt6_do_redirect,
248 	.local_out		=	__ip6_local_out,
249 	.neigh_lookup		=	ip6_dst_neigh_lookup,
250 	.confirm_neigh		=	ip6_confirm_neigh,
251 };
252 
253 static unsigned int ip6_blackhole_mtu(const struct dst_entry *dst)
254 {
255 	unsigned int mtu = dst_metric_raw(dst, RTAX_MTU);
256 
257 	return mtu ? : dst->dev->mtu;
258 }
259 
260 static void ip6_rt_blackhole_update_pmtu(struct dst_entry *dst, struct sock *sk,
261 					 struct sk_buff *skb, u32 mtu)
262 {
263 }
264 
265 static void ip6_rt_blackhole_redirect(struct dst_entry *dst, struct sock *sk,
266 				      struct sk_buff *skb)
267 {
268 }
269 
270 static struct dst_ops ip6_dst_blackhole_ops = {
271 	.family			=	AF_INET6,
272 	.destroy		=	ip6_dst_destroy,
273 	.check			=	ip6_dst_check,
274 	.mtu			=	ip6_blackhole_mtu,
275 	.default_advmss		=	ip6_default_advmss,
276 	.update_pmtu		=	ip6_rt_blackhole_update_pmtu,
277 	.redirect		=	ip6_rt_blackhole_redirect,
278 	.cow_metrics		=	dst_cow_metrics_generic,
279 	.neigh_lookup		=	ip6_dst_neigh_lookup,
280 };
281 
282 static const u32 ip6_template_metrics[RTAX_MAX] = {
283 	[RTAX_HOPLIMIT - 1] = 0,
284 };
285 
286 static const struct fib6_info fib6_null_entry_template = {
287 	.fib6_flags	= (RTF_REJECT | RTF_NONEXTHOP),
288 	.fib6_protocol  = RTPROT_KERNEL,
289 	.fib6_metric	= ~(u32)0,
290 	.fib6_ref	= ATOMIC_INIT(1),
291 	.fib6_type	= RTN_UNREACHABLE,
292 	.fib6_metrics	= (struct dst_metrics *)&dst_default_metrics,
293 };
294 
295 static const struct rt6_info ip6_null_entry_template = {
296 	.dst = {
297 		.__refcnt	= ATOMIC_INIT(1),
298 		.__use		= 1,
299 		.obsolete	= DST_OBSOLETE_FORCE_CHK,
300 		.error		= -ENETUNREACH,
301 		.input		= ip6_pkt_discard,
302 		.output		= ip6_pkt_discard_out,
303 	},
304 	.rt6i_flags	= (RTF_REJECT | RTF_NONEXTHOP),
305 };
306 
307 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
308 
309 static const struct rt6_info ip6_prohibit_entry_template = {
310 	.dst = {
311 		.__refcnt	= ATOMIC_INIT(1),
312 		.__use		= 1,
313 		.obsolete	= DST_OBSOLETE_FORCE_CHK,
314 		.error		= -EACCES,
315 		.input		= ip6_pkt_prohibit,
316 		.output		= ip6_pkt_prohibit_out,
317 	},
318 	.rt6i_flags	= (RTF_REJECT | RTF_NONEXTHOP),
319 };
320 
321 static const struct rt6_info ip6_blk_hole_entry_template = {
322 	.dst = {
323 		.__refcnt	= ATOMIC_INIT(1),
324 		.__use		= 1,
325 		.obsolete	= DST_OBSOLETE_FORCE_CHK,
326 		.error		= -EINVAL,
327 		.input		= dst_discard,
328 		.output		= dst_discard_out,
329 	},
330 	.rt6i_flags	= (RTF_REJECT | RTF_NONEXTHOP),
331 };
332 
333 #endif
334 
335 static void rt6_info_init(struct rt6_info *rt)
336 {
337 	struct dst_entry *dst = &rt->dst;
338 
339 	memset(dst + 1, 0, sizeof(*rt) - sizeof(*dst));
340 	INIT_LIST_HEAD(&rt->rt6i_uncached);
341 }
342 
343 /* allocate dst with ip6_dst_ops */
344 struct rt6_info *ip6_dst_alloc(struct net *net, struct net_device *dev,
345 			       int flags)
346 {
347 	struct rt6_info *rt = dst_alloc(&net->ipv6.ip6_dst_ops, dev,
348 					1, DST_OBSOLETE_FORCE_CHK, flags);
349 
350 	if (rt) {
351 		rt6_info_init(rt);
352 		atomic_inc(&net->ipv6.rt6_stats->fib_rt_alloc);
353 	}
354 
355 	return rt;
356 }
357 EXPORT_SYMBOL(ip6_dst_alloc);
358 
359 static void ip6_dst_destroy(struct dst_entry *dst)
360 {
361 	struct rt6_info *rt = (struct rt6_info *)dst;
362 	struct fib6_info *from;
363 	struct inet6_dev *idev;
364 
365 	dst_destroy_metrics_generic(dst);
366 	rt6_uncached_list_del(rt);
367 
368 	idev = rt->rt6i_idev;
369 	if (idev) {
370 		rt->rt6i_idev = NULL;
371 		in6_dev_put(idev);
372 	}
373 
374 	rcu_read_lock();
375 	from = rcu_dereference(rt->from);
376 	rcu_assign_pointer(rt->from, NULL);
377 	fib6_info_release(from);
378 	rcu_read_unlock();
379 }
380 
381 static void ip6_dst_ifdown(struct dst_entry *dst, struct net_device *dev,
382 			   int how)
383 {
384 	struct rt6_info *rt = (struct rt6_info *)dst;
385 	struct inet6_dev *idev = rt->rt6i_idev;
386 	struct net_device *loopback_dev =
387 		dev_net(dev)->loopback_dev;
388 
389 	if (idev && idev->dev != loopback_dev) {
390 		struct inet6_dev *loopback_idev = in6_dev_get(loopback_dev);
391 		if (loopback_idev) {
392 			rt->rt6i_idev = loopback_idev;
393 			in6_dev_put(idev);
394 		}
395 	}
396 }
397 
398 static bool __rt6_check_expired(const struct rt6_info *rt)
399 {
400 	if (rt->rt6i_flags & RTF_EXPIRES)
401 		return time_after(jiffies, rt->dst.expires);
402 	else
403 		return false;
404 }
405 
406 static bool rt6_check_expired(const struct rt6_info *rt)
407 {
408 	struct fib6_info *from;
409 
410 	from = rcu_dereference(rt->from);
411 
412 	if (rt->rt6i_flags & RTF_EXPIRES) {
413 		if (time_after(jiffies, rt->dst.expires))
414 			return true;
415 	} else if (from) {
416 		return rt->dst.obsolete != DST_OBSOLETE_FORCE_CHK ||
417 			fib6_check_expired(from);
418 	}
419 	return false;
420 }
421 
422 static struct fib6_info *rt6_multipath_select(const struct net *net,
423 					      struct fib6_info *match,
424 					     struct flowi6 *fl6, int oif,
425 					     const struct sk_buff *skb,
426 					     int strict)
427 {
428 	struct fib6_info *sibling, *next_sibling;
429 
430 	/* We might have already computed the hash for ICMPv6 errors. In such
431 	 * case it will always be non-zero. Otherwise now is the time to do it.
432 	 */
433 	if (!fl6->mp_hash)
434 		fl6->mp_hash = rt6_multipath_hash(net, fl6, skb, NULL);
435 
436 	if (fl6->mp_hash <= atomic_read(&match->fib6_nh.nh_upper_bound))
437 		return match;
438 
439 	list_for_each_entry_safe(sibling, next_sibling, &match->fib6_siblings,
440 				 fib6_siblings) {
441 		int nh_upper_bound;
442 
443 		nh_upper_bound = atomic_read(&sibling->fib6_nh.nh_upper_bound);
444 		if (fl6->mp_hash > nh_upper_bound)
445 			continue;
446 		if (rt6_score_route(sibling, oif, strict) < 0)
447 			break;
448 		match = sibling;
449 		break;
450 	}
451 
452 	return match;
453 }
454 
455 /*
456  *	Route lookup. rcu_read_lock() should be held.
457  */
458 
459 static inline struct fib6_info *rt6_device_match(struct net *net,
460 						 struct fib6_info *rt,
461 						    const struct in6_addr *saddr,
462 						    int oif,
463 						    int flags)
464 {
465 	struct fib6_info *sprt;
466 
467 	if (!oif && ipv6_addr_any(saddr) &&
468 	    !(rt->fib6_nh.nh_flags & RTNH_F_DEAD))
469 		return rt;
470 
471 	for (sprt = rt; sprt; sprt = rcu_dereference(sprt->fib6_next)) {
472 		const struct net_device *dev = sprt->fib6_nh.nh_dev;
473 
474 		if (sprt->fib6_nh.nh_flags & RTNH_F_DEAD)
475 			continue;
476 
477 		if (oif) {
478 			if (dev->ifindex == oif)
479 				return sprt;
480 		} else {
481 			if (ipv6_chk_addr(net, saddr, dev,
482 					  flags & RT6_LOOKUP_F_IFACE))
483 				return sprt;
484 		}
485 	}
486 
487 	if (oif && flags & RT6_LOOKUP_F_IFACE)
488 		return net->ipv6.fib6_null_entry;
489 
490 	return rt->fib6_nh.nh_flags & RTNH_F_DEAD ? net->ipv6.fib6_null_entry : rt;
491 }
492 
493 #ifdef CONFIG_IPV6_ROUTER_PREF
494 struct __rt6_probe_work {
495 	struct work_struct work;
496 	struct in6_addr target;
497 	struct net_device *dev;
498 };
499 
500 static void rt6_probe_deferred(struct work_struct *w)
501 {
502 	struct in6_addr mcaddr;
503 	struct __rt6_probe_work *work =
504 		container_of(w, struct __rt6_probe_work, work);
505 
506 	addrconf_addr_solict_mult(&work->target, &mcaddr);
507 	ndisc_send_ns(work->dev, &work->target, &mcaddr, NULL, 0);
508 	dev_put(work->dev);
509 	kfree(work);
510 }
511 
512 static void rt6_probe(struct fib6_info *rt)
513 {
514 	struct __rt6_probe_work *work;
515 	const struct in6_addr *nh_gw;
516 	struct neighbour *neigh;
517 	struct net_device *dev;
518 
519 	/*
520 	 * Okay, this does not seem to be appropriate
521 	 * for now, however, we need to check if it
522 	 * is really so; aka Router Reachability Probing.
523 	 *
524 	 * Router Reachability Probe MUST be rate-limited
525 	 * to no more than one per minute.
526 	 */
527 	if (!rt || !(rt->fib6_flags & RTF_GATEWAY))
528 		return;
529 
530 	nh_gw = &rt->fib6_nh.nh_gw;
531 	dev = rt->fib6_nh.nh_dev;
532 	rcu_read_lock_bh();
533 	neigh = __ipv6_neigh_lookup_noref(dev, nh_gw);
534 	if (neigh) {
535 		struct inet6_dev *idev;
536 
537 		if (neigh->nud_state & NUD_VALID)
538 			goto out;
539 
540 		idev = __in6_dev_get(dev);
541 		work = NULL;
542 		write_lock(&neigh->lock);
543 		if (!(neigh->nud_state & NUD_VALID) &&
544 		    time_after(jiffies,
545 			       neigh->updated + idev->cnf.rtr_probe_interval)) {
546 			work = kmalloc(sizeof(*work), GFP_ATOMIC);
547 			if (work)
548 				__neigh_set_probe_once(neigh);
549 		}
550 		write_unlock(&neigh->lock);
551 	} else {
552 		work = kmalloc(sizeof(*work), GFP_ATOMIC);
553 	}
554 
555 	if (work) {
556 		INIT_WORK(&work->work, rt6_probe_deferred);
557 		work->target = *nh_gw;
558 		dev_hold(dev);
559 		work->dev = dev;
560 		schedule_work(&work->work);
561 	}
562 
563 out:
564 	rcu_read_unlock_bh();
565 }
566 #else
567 static inline void rt6_probe(struct fib6_info *rt)
568 {
569 }
570 #endif
571 
572 /*
573  * Default Router Selection (RFC 2461 6.3.6)
574  */
575 static inline int rt6_check_dev(struct fib6_info *rt, int oif)
576 {
577 	const struct net_device *dev = rt->fib6_nh.nh_dev;
578 
579 	if (!oif || dev->ifindex == oif)
580 		return 2;
581 	return 0;
582 }
583 
584 static inline enum rt6_nud_state rt6_check_neigh(struct fib6_info *rt)
585 {
586 	enum rt6_nud_state ret = RT6_NUD_FAIL_HARD;
587 	struct neighbour *neigh;
588 
589 	if (rt->fib6_flags & RTF_NONEXTHOP ||
590 	    !(rt->fib6_flags & RTF_GATEWAY))
591 		return RT6_NUD_SUCCEED;
592 
593 	rcu_read_lock_bh();
594 	neigh = __ipv6_neigh_lookup_noref(rt->fib6_nh.nh_dev,
595 					  &rt->fib6_nh.nh_gw);
596 	if (neigh) {
597 		read_lock(&neigh->lock);
598 		if (neigh->nud_state & NUD_VALID)
599 			ret = RT6_NUD_SUCCEED;
600 #ifdef CONFIG_IPV6_ROUTER_PREF
601 		else if (!(neigh->nud_state & NUD_FAILED))
602 			ret = RT6_NUD_SUCCEED;
603 		else
604 			ret = RT6_NUD_FAIL_PROBE;
605 #endif
606 		read_unlock(&neigh->lock);
607 	} else {
608 		ret = IS_ENABLED(CONFIG_IPV6_ROUTER_PREF) ?
609 		      RT6_NUD_SUCCEED : RT6_NUD_FAIL_DO_RR;
610 	}
611 	rcu_read_unlock_bh();
612 
613 	return ret;
614 }
615 
616 static int rt6_score_route(struct fib6_info *rt, int oif, int strict)
617 {
618 	int m;
619 
620 	m = rt6_check_dev(rt, oif);
621 	if (!m && (strict & RT6_LOOKUP_F_IFACE))
622 		return RT6_NUD_FAIL_HARD;
623 #ifdef CONFIG_IPV6_ROUTER_PREF
624 	m |= IPV6_DECODE_PREF(IPV6_EXTRACT_PREF(rt->fib6_flags)) << 2;
625 #endif
626 	if (strict & RT6_LOOKUP_F_REACHABLE) {
627 		int n = rt6_check_neigh(rt);
628 		if (n < 0)
629 			return n;
630 	}
631 	return m;
632 }
633 
634 /* called with rc_read_lock held */
635 static inline bool fib6_ignore_linkdown(const struct fib6_info *f6i)
636 {
637 	const struct net_device *dev = fib6_info_nh_dev(f6i);
638 	bool rc = false;
639 
640 	if (dev) {
641 		const struct inet6_dev *idev = __in6_dev_get(dev);
642 
643 		rc = !!idev->cnf.ignore_routes_with_linkdown;
644 	}
645 
646 	return rc;
647 }
648 
649 static struct fib6_info *find_match(struct fib6_info *rt, int oif, int strict,
650 				   int *mpri, struct fib6_info *match,
651 				   bool *do_rr)
652 {
653 	int m;
654 	bool match_do_rr = false;
655 
656 	if (rt->fib6_nh.nh_flags & RTNH_F_DEAD)
657 		goto out;
658 
659 	if (fib6_ignore_linkdown(rt) &&
660 	    rt->fib6_nh.nh_flags & RTNH_F_LINKDOWN &&
661 	    !(strict & RT6_LOOKUP_F_IGNORE_LINKSTATE))
662 		goto out;
663 
664 	if (fib6_check_expired(rt))
665 		goto out;
666 
667 	m = rt6_score_route(rt, oif, strict);
668 	if (m == RT6_NUD_FAIL_DO_RR) {
669 		match_do_rr = true;
670 		m = 0; /* lowest valid score */
671 	} else if (m == RT6_NUD_FAIL_HARD) {
672 		goto out;
673 	}
674 
675 	if (strict & RT6_LOOKUP_F_REACHABLE)
676 		rt6_probe(rt);
677 
678 	/* note that m can be RT6_NUD_FAIL_PROBE at this point */
679 	if (m > *mpri) {
680 		*do_rr = match_do_rr;
681 		*mpri = m;
682 		match = rt;
683 	}
684 out:
685 	return match;
686 }
687 
688 static struct fib6_info *find_rr_leaf(struct fib6_node *fn,
689 				     struct fib6_info *leaf,
690 				     struct fib6_info *rr_head,
691 				     u32 metric, int oif, int strict,
692 				     bool *do_rr)
693 {
694 	struct fib6_info *rt, *match, *cont;
695 	int mpri = -1;
696 
697 	match = NULL;
698 	cont = NULL;
699 	for (rt = rr_head; rt; rt = rcu_dereference(rt->fib6_next)) {
700 		if (rt->fib6_metric != metric) {
701 			cont = rt;
702 			break;
703 		}
704 
705 		match = find_match(rt, oif, strict, &mpri, match, do_rr);
706 	}
707 
708 	for (rt = leaf; rt && rt != rr_head;
709 	     rt = rcu_dereference(rt->fib6_next)) {
710 		if (rt->fib6_metric != metric) {
711 			cont = rt;
712 			break;
713 		}
714 
715 		match = find_match(rt, oif, strict, &mpri, match, do_rr);
716 	}
717 
718 	if (match || !cont)
719 		return match;
720 
721 	for (rt = cont; rt; rt = rcu_dereference(rt->fib6_next))
722 		match = find_match(rt, oif, strict, &mpri, match, do_rr);
723 
724 	return match;
725 }
726 
727 static struct fib6_info *rt6_select(struct net *net, struct fib6_node *fn,
728 				   int oif, int strict)
729 {
730 	struct fib6_info *leaf = rcu_dereference(fn->leaf);
731 	struct fib6_info *match, *rt0;
732 	bool do_rr = false;
733 	int key_plen;
734 
735 	if (!leaf || leaf == net->ipv6.fib6_null_entry)
736 		return net->ipv6.fib6_null_entry;
737 
738 	rt0 = rcu_dereference(fn->rr_ptr);
739 	if (!rt0)
740 		rt0 = leaf;
741 
742 	/* Double check to make sure fn is not an intermediate node
743 	 * and fn->leaf does not points to its child's leaf
744 	 * (This might happen if all routes under fn are deleted from
745 	 * the tree and fib6_repair_tree() is called on the node.)
746 	 */
747 	key_plen = rt0->fib6_dst.plen;
748 #ifdef CONFIG_IPV6_SUBTREES
749 	if (rt0->fib6_src.plen)
750 		key_plen = rt0->fib6_src.plen;
751 #endif
752 	if (fn->fn_bit != key_plen)
753 		return net->ipv6.fib6_null_entry;
754 
755 	match = find_rr_leaf(fn, leaf, rt0, rt0->fib6_metric, oif, strict,
756 			     &do_rr);
757 
758 	if (do_rr) {
759 		struct fib6_info *next = rcu_dereference(rt0->fib6_next);
760 
761 		/* no entries matched; do round-robin */
762 		if (!next || next->fib6_metric != rt0->fib6_metric)
763 			next = leaf;
764 
765 		if (next != rt0) {
766 			spin_lock_bh(&leaf->fib6_table->tb6_lock);
767 			/* make sure next is not being deleted from the tree */
768 			if (next->fib6_node)
769 				rcu_assign_pointer(fn->rr_ptr, next);
770 			spin_unlock_bh(&leaf->fib6_table->tb6_lock);
771 		}
772 	}
773 
774 	return match ? match : net->ipv6.fib6_null_entry;
775 }
776 
777 static bool rt6_is_gw_or_nonexthop(const struct fib6_info *rt)
778 {
779 	return (rt->fib6_flags & (RTF_NONEXTHOP | RTF_GATEWAY));
780 }
781 
782 #ifdef CONFIG_IPV6_ROUTE_INFO
783 int rt6_route_rcv(struct net_device *dev, u8 *opt, int len,
784 		  const struct in6_addr *gwaddr)
785 {
786 	struct net *net = dev_net(dev);
787 	struct route_info *rinfo = (struct route_info *) opt;
788 	struct in6_addr prefix_buf, *prefix;
789 	unsigned int pref;
790 	unsigned long lifetime;
791 	struct fib6_info *rt;
792 
793 	if (len < sizeof(struct route_info)) {
794 		return -EINVAL;
795 	}
796 
797 	/* Sanity check for prefix_len and length */
798 	if (rinfo->length > 3) {
799 		return -EINVAL;
800 	} else if (rinfo->prefix_len > 128) {
801 		return -EINVAL;
802 	} else if (rinfo->prefix_len > 64) {
803 		if (rinfo->length < 2) {
804 			return -EINVAL;
805 		}
806 	} else if (rinfo->prefix_len > 0) {
807 		if (rinfo->length < 1) {
808 			return -EINVAL;
809 		}
810 	}
811 
812 	pref = rinfo->route_pref;
813 	if (pref == ICMPV6_ROUTER_PREF_INVALID)
814 		return -EINVAL;
815 
816 	lifetime = addrconf_timeout_fixup(ntohl(rinfo->lifetime), HZ);
817 
818 	if (rinfo->length == 3)
819 		prefix = (struct in6_addr *)rinfo->prefix;
820 	else {
821 		/* this function is safe */
822 		ipv6_addr_prefix(&prefix_buf,
823 				 (struct in6_addr *)rinfo->prefix,
824 				 rinfo->prefix_len);
825 		prefix = &prefix_buf;
826 	}
827 
828 	if (rinfo->prefix_len == 0)
829 		rt = rt6_get_dflt_router(net, gwaddr, dev);
830 	else
831 		rt = rt6_get_route_info(net, prefix, rinfo->prefix_len,
832 					gwaddr, dev);
833 
834 	if (rt && !lifetime) {
835 		ip6_del_rt(net, rt);
836 		rt = NULL;
837 	}
838 
839 	if (!rt && lifetime)
840 		rt = rt6_add_route_info(net, prefix, rinfo->prefix_len, gwaddr,
841 					dev, pref);
842 	else if (rt)
843 		rt->fib6_flags = RTF_ROUTEINFO |
844 				 (rt->fib6_flags & ~RTF_PREF_MASK) | RTF_PREF(pref);
845 
846 	if (rt) {
847 		if (!addrconf_finite_timeout(lifetime))
848 			fib6_clean_expires(rt);
849 		else
850 			fib6_set_expires(rt, jiffies + HZ * lifetime);
851 
852 		fib6_info_release(rt);
853 	}
854 	return 0;
855 }
856 #endif
857 
858 /*
859  *	Misc support functions
860  */
861 
862 /* called with rcu_lock held */
863 static struct net_device *ip6_rt_get_dev_rcu(struct fib6_info *rt)
864 {
865 	struct net_device *dev = rt->fib6_nh.nh_dev;
866 
867 	if (rt->fib6_flags & (RTF_LOCAL | RTF_ANYCAST)) {
868 		/* for copies of local routes, dst->dev needs to be the
869 		 * device if it is a master device, the master device if
870 		 * device is enslaved, and the loopback as the default
871 		 */
872 		if (netif_is_l3_slave(dev) &&
873 		    !rt6_need_strict(&rt->fib6_dst.addr))
874 			dev = l3mdev_master_dev_rcu(dev);
875 		else if (!netif_is_l3_master(dev))
876 			dev = dev_net(dev)->loopback_dev;
877 		/* last case is netif_is_l3_master(dev) is true in which
878 		 * case we want dev returned to be dev
879 		 */
880 	}
881 
882 	return dev;
883 }
884 
885 static const int fib6_prop[RTN_MAX + 1] = {
886 	[RTN_UNSPEC]	= 0,
887 	[RTN_UNICAST]	= 0,
888 	[RTN_LOCAL]	= 0,
889 	[RTN_BROADCAST]	= 0,
890 	[RTN_ANYCAST]	= 0,
891 	[RTN_MULTICAST]	= 0,
892 	[RTN_BLACKHOLE]	= -EINVAL,
893 	[RTN_UNREACHABLE] = -EHOSTUNREACH,
894 	[RTN_PROHIBIT]	= -EACCES,
895 	[RTN_THROW]	= -EAGAIN,
896 	[RTN_NAT]	= -EINVAL,
897 	[RTN_XRESOLVE]	= -EINVAL,
898 };
899 
900 static int ip6_rt_type_to_error(u8 fib6_type)
901 {
902 	return fib6_prop[fib6_type];
903 }
904 
905 static unsigned short fib6_info_dst_flags(struct fib6_info *rt)
906 {
907 	unsigned short flags = 0;
908 
909 	if (rt->dst_nocount)
910 		flags |= DST_NOCOUNT;
911 	if (rt->dst_nopolicy)
912 		flags |= DST_NOPOLICY;
913 	if (rt->dst_host)
914 		flags |= DST_HOST;
915 
916 	return flags;
917 }
918 
919 static void ip6_rt_init_dst_reject(struct rt6_info *rt, struct fib6_info *ort)
920 {
921 	rt->dst.error = ip6_rt_type_to_error(ort->fib6_type);
922 
923 	switch (ort->fib6_type) {
924 	case RTN_BLACKHOLE:
925 		rt->dst.output = dst_discard_out;
926 		rt->dst.input = dst_discard;
927 		break;
928 	case RTN_PROHIBIT:
929 		rt->dst.output = ip6_pkt_prohibit_out;
930 		rt->dst.input = ip6_pkt_prohibit;
931 		break;
932 	case RTN_THROW:
933 	case RTN_UNREACHABLE:
934 	default:
935 		rt->dst.output = ip6_pkt_discard_out;
936 		rt->dst.input = ip6_pkt_discard;
937 		break;
938 	}
939 }
940 
941 static void ip6_rt_init_dst(struct rt6_info *rt, struct fib6_info *ort)
942 {
943 	rt->dst.flags |= fib6_info_dst_flags(ort);
944 
945 	if (ort->fib6_flags & RTF_REJECT) {
946 		ip6_rt_init_dst_reject(rt, ort);
947 		return;
948 	}
949 
950 	rt->dst.error = 0;
951 	rt->dst.output = ip6_output;
952 
953 	if (ort->fib6_type == RTN_LOCAL) {
954 		rt->dst.input = ip6_input;
955 	} else if (ipv6_addr_type(&ort->fib6_dst.addr) & IPV6_ADDR_MULTICAST) {
956 		rt->dst.input = ip6_mc_input;
957 	} else {
958 		rt->dst.input = ip6_forward;
959 	}
960 
961 	if (ort->fib6_nh.nh_lwtstate) {
962 		rt->dst.lwtstate = lwtstate_get(ort->fib6_nh.nh_lwtstate);
963 		lwtunnel_set_redirect(&rt->dst);
964 	}
965 
966 	rt->dst.lastuse = jiffies;
967 }
968 
969 static void rt6_set_from(struct rt6_info *rt, struct fib6_info *from)
970 {
971 	rt->rt6i_flags &= ~RTF_EXPIRES;
972 	fib6_info_hold(from);
973 	rcu_assign_pointer(rt->from, from);
974 	dst_init_metrics(&rt->dst, from->fib6_metrics->metrics, true);
975 	if (from->fib6_metrics != &dst_default_metrics) {
976 		rt->dst._metrics |= DST_METRICS_REFCOUNTED;
977 		refcount_inc(&from->fib6_metrics->refcnt);
978 	}
979 }
980 
981 static void ip6_rt_copy_init(struct rt6_info *rt, struct fib6_info *ort)
982 {
983 	struct net_device *dev = fib6_info_nh_dev(ort);
984 
985 	ip6_rt_init_dst(rt, ort);
986 
987 	rt->rt6i_dst = ort->fib6_dst;
988 	rt->rt6i_idev = dev ? in6_dev_get(dev) : NULL;
989 	rt->rt6i_gateway = ort->fib6_nh.nh_gw;
990 	rt->rt6i_flags = ort->fib6_flags;
991 	rt6_set_from(rt, ort);
992 #ifdef CONFIG_IPV6_SUBTREES
993 	rt->rt6i_src = ort->fib6_src;
994 #endif
995 	rt->rt6i_prefsrc = ort->fib6_prefsrc;
996 	rt->dst.lwtstate = lwtstate_get(ort->fib6_nh.nh_lwtstate);
997 }
998 
999 static struct fib6_node* fib6_backtrack(struct fib6_node *fn,
1000 					struct in6_addr *saddr)
1001 {
1002 	struct fib6_node *pn, *sn;
1003 	while (1) {
1004 		if (fn->fn_flags & RTN_TL_ROOT)
1005 			return NULL;
1006 		pn = rcu_dereference(fn->parent);
1007 		sn = FIB6_SUBTREE(pn);
1008 		if (sn && sn != fn)
1009 			fn = fib6_lookup(sn, NULL, saddr);
1010 		else
1011 			fn = pn;
1012 		if (fn->fn_flags & RTN_RTINFO)
1013 			return fn;
1014 	}
1015 }
1016 
1017 static bool ip6_hold_safe(struct net *net, struct rt6_info **prt,
1018 			  bool null_fallback)
1019 {
1020 	struct rt6_info *rt = *prt;
1021 
1022 	if (dst_hold_safe(&rt->dst))
1023 		return true;
1024 	if (null_fallback) {
1025 		rt = net->ipv6.ip6_null_entry;
1026 		dst_hold(&rt->dst);
1027 	} else {
1028 		rt = NULL;
1029 	}
1030 	*prt = rt;
1031 	return false;
1032 }
1033 
1034 /* called with rcu_lock held */
1035 static struct rt6_info *ip6_create_rt_rcu(struct fib6_info *rt)
1036 {
1037 	unsigned short flags = fib6_info_dst_flags(rt);
1038 	struct net_device *dev = rt->fib6_nh.nh_dev;
1039 	struct rt6_info *nrt;
1040 
1041 	nrt = ip6_dst_alloc(dev_net(dev), dev, flags);
1042 	if (nrt)
1043 		ip6_rt_copy_init(nrt, rt);
1044 
1045 	return nrt;
1046 }
1047 
1048 static struct rt6_info *ip6_pol_route_lookup(struct net *net,
1049 					     struct fib6_table *table,
1050 					     struct flowi6 *fl6,
1051 					     const struct sk_buff *skb,
1052 					     int flags)
1053 {
1054 	struct fib6_info *f6i;
1055 	struct fib6_node *fn;
1056 	struct rt6_info *rt;
1057 
1058 	if (fl6->flowi6_flags & FLOWI_FLAG_SKIP_NH_OIF)
1059 		flags &= ~RT6_LOOKUP_F_IFACE;
1060 
1061 	rcu_read_lock();
1062 	fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
1063 restart:
1064 	f6i = rcu_dereference(fn->leaf);
1065 	if (!f6i) {
1066 		f6i = net->ipv6.fib6_null_entry;
1067 	} else {
1068 		f6i = rt6_device_match(net, f6i, &fl6->saddr,
1069 				      fl6->flowi6_oif, flags);
1070 		if (f6i->fib6_nsiblings && fl6->flowi6_oif == 0)
1071 			f6i = rt6_multipath_select(net, f6i, fl6,
1072 						   fl6->flowi6_oif, skb, flags);
1073 	}
1074 	if (f6i == net->ipv6.fib6_null_entry) {
1075 		fn = fib6_backtrack(fn, &fl6->saddr);
1076 		if (fn)
1077 			goto restart;
1078 	}
1079 
1080 	/* Search through exception table */
1081 	rt = rt6_find_cached_rt(f6i, &fl6->daddr, &fl6->saddr);
1082 	if (rt) {
1083 		if (ip6_hold_safe(net, &rt, true))
1084 			dst_use_noref(&rt->dst, jiffies);
1085 	} else if (f6i == net->ipv6.fib6_null_entry) {
1086 		rt = net->ipv6.ip6_null_entry;
1087 		dst_hold(&rt->dst);
1088 	} else {
1089 		rt = ip6_create_rt_rcu(f6i);
1090 		if (!rt) {
1091 			rt = net->ipv6.ip6_null_entry;
1092 			dst_hold(&rt->dst);
1093 		}
1094 	}
1095 
1096 	rcu_read_unlock();
1097 
1098 	trace_fib6_table_lookup(net, rt, table, fl6);
1099 
1100 	return rt;
1101 }
1102 
1103 struct dst_entry *ip6_route_lookup(struct net *net, struct flowi6 *fl6,
1104 				   const struct sk_buff *skb, int flags)
1105 {
1106 	return fib6_rule_lookup(net, fl6, skb, flags, ip6_pol_route_lookup);
1107 }
1108 EXPORT_SYMBOL_GPL(ip6_route_lookup);
1109 
1110 struct rt6_info *rt6_lookup(struct net *net, const struct in6_addr *daddr,
1111 			    const struct in6_addr *saddr, int oif,
1112 			    const struct sk_buff *skb, int strict)
1113 {
1114 	struct flowi6 fl6 = {
1115 		.flowi6_oif = oif,
1116 		.daddr = *daddr,
1117 	};
1118 	struct dst_entry *dst;
1119 	int flags = strict ? RT6_LOOKUP_F_IFACE : 0;
1120 
1121 	if (saddr) {
1122 		memcpy(&fl6.saddr, saddr, sizeof(*saddr));
1123 		flags |= RT6_LOOKUP_F_HAS_SADDR;
1124 	}
1125 
1126 	dst = fib6_rule_lookup(net, &fl6, skb, flags, ip6_pol_route_lookup);
1127 	if (dst->error == 0)
1128 		return (struct rt6_info *) dst;
1129 
1130 	dst_release(dst);
1131 
1132 	return NULL;
1133 }
1134 EXPORT_SYMBOL(rt6_lookup);
1135 
1136 /* ip6_ins_rt is called with FREE table->tb6_lock.
1137  * It takes new route entry, the addition fails by any reason the
1138  * route is released.
1139  * Caller must hold dst before calling it.
1140  */
1141 
1142 static int __ip6_ins_rt(struct fib6_info *rt, struct nl_info *info,
1143 			struct netlink_ext_ack *extack)
1144 {
1145 	int err;
1146 	struct fib6_table *table;
1147 
1148 	table = rt->fib6_table;
1149 	spin_lock_bh(&table->tb6_lock);
1150 	err = fib6_add(&table->tb6_root, rt, info, extack);
1151 	spin_unlock_bh(&table->tb6_lock);
1152 
1153 	return err;
1154 }
1155 
1156 int ip6_ins_rt(struct net *net, struct fib6_info *rt)
1157 {
1158 	struct nl_info info = {	.nl_net = net, };
1159 
1160 	return __ip6_ins_rt(rt, &info, NULL);
1161 }
1162 
1163 static struct rt6_info *ip6_rt_cache_alloc(struct fib6_info *ort,
1164 					   const struct in6_addr *daddr,
1165 					   const struct in6_addr *saddr)
1166 {
1167 	struct net_device *dev;
1168 	struct rt6_info *rt;
1169 
1170 	/*
1171 	 *	Clone the route.
1172 	 */
1173 
1174 	dev = ip6_rt_get_dev_rcu(ort);
1175 	rt = ip6_dst_alloc(dev_net(dev), dev, 0);
1176 	if (!rt)
1177 		return NULL;
1178 
1179 	ip6_rt_copy_init(rt, ort);
1180 	rt->rt6i_flags |= RTF_CACHE;
1181 	rt->dst.flags |= DST_HOST;
1182 	rt->rt6i_dst.addr = *daddr;
1183 	rt->rt6i_dst.plen = 128;
1184 
1185 	if (!rt6_is_gw_or_nonexthop(ort)) {
1186 		if (ort->fib6_dst.plen != 128 &&
1187 		    ipv6_addr_equal(&ort->fib6_dst.addr, daddr))
1188 			rt->rt6i_flags |= RTF_ANYCAST;
1189 #ifdef CONFIG_IPV6_SUBTREES
1190 		if (rt->rt6i_src.plen && saddr) {
1191 			rt->rt6i_src.addr = *saddr;
1192 			rt->rt6i_src.plen = 128;
1193 		}
1194 #endif
1195 	}
1196 
1197 	return rt;
1198 }
1199 
1200 static struct rt6_info *ip6_rt_pcpu_alloc(struct fib6_info *rt)
1201 {
1202 	unsigned short flags = fib6_info_dst_flags(rt);
1203 	struct net_device *dev;
1204 	struct rt6_info *pcpu_rt;
1205 
1206 	rcu_read_lock();
1207 	dev = ip6_rt_get_dev_rcu(rt);
1208 	pcpu_rt = ip6_dst_alloc(dev_net(dev), dev, flags);
1209 	rcu_read_unlock();
1210 	if (!pcpu_rt)
1211 		return NULL;
1212 	ip6_rt_copy_init(pcpu_rt, rt);
1213 	pcpu_rt->rt6i_flags |= RTF_PCPU;
1214 	return pcpu_rt;
1215 }
1216 
1217 /* It should be called with rcu_read_lock() acquired */
1218 static struct rt6_info *rt6_get_pcpu_route(struct fib6_info *rt)
1219 {
1220 	struct rt6_info *pcpu_rt, **p;
1221 
1222 	p = this_cpu_ptr(rt->rt6i_pcpu);
1223 	pcpu_rt = *p;
1224 
1225 	if (pcpu_rt)
1226 		ip6_hold_safe(NULL, &pcpu_rt, false);
1227 
1228 	return pcpu_rt;
1229 }
1230 
1231 static struct rt6_info *rt6_make_pcpu_route(struct net *net,
1232 					    struct fib6_info *rt)
1233 {
1234 	struct rt6_info *pcpu_rt, *prev, **p;
1235 
1236 	pcpu_rt = ip6_rt_pcpu_alloc(rt);
1237 	if (!pcpu_rt) {
1238 		dst_hold(&net->ipv6.ip6_null_entry->dst);
1239 		return net->ipv6.ip6_null_entry;
1240 	}
1241 
1242 	dst_hold(&pcpu_rt->dst);
1243 	p = this_cpu_ptr(rt->rt6i_pcpu);
1244 	prev = cmpxchg(p, NULL, pcpu_rt);
1245 	BUG_ON(prev);
1246 
1247 	return pcpu_rt;
1248 }
1249 
1250 /* exception hash table implementation
1251  */
1252 static DEFINE_SPINLOCK(rt6_exception_lock);
1253 
1254 /* Remove rt6_ex from hash table and free the memory
1255  * Caller must hold rt6_exception_lock
1256  */
1257 static void rt6_remove_exception(struct rt6_exception_bucket *bucket,
1258 				 struct rt6_exception *rt6_ex)
1259 {
1260 	struct net *net;
1261 
1262 	if (!bucket || !rt6_ex)
1263 		return;
1264 
1265 	net = dev_net(rt6_ex->rt6i->dst.dev);
1266 	hlist_del_rcu(&rt6_ex->hlist);
1267 	dst_release(&rt6_ex->rt6i->dst);
1268 	kfree_rcu(rt6_ex, rcu);
1269 	WARN_ON_ONCE(!bucket->depth);
1270 	bucket->depth--;
1271 	net->ipv6.rt6_stats->fib_rt_cache--;
1272 }
1273 
1274 /* Remove oldest rt6_ex in bucket and free the memory
1275  * Caller must hold rt6_exception_lock
1276  */
1277 static void rt6_exception_remove_oldest(struct rt6_exception_bucket *bucket)
1278 {
1279 	struct rt6_exception *rt6_ex, *oldest = NULL;
1280 
1281 	if (!bucket)
1282 		return;
1283 
1284 	hlist_for_each_entry(rt6_ex, &bucket->chain, hlist) {
1285 		if (!oldest || time_before(rt6_ex->stamp, oldest->stamp))
1286 			oldest = rt6_ex;
1287 	}
1288 	rt6_remove_exception(bucket, oldest);
1289 }
1290 
1291 static u32 rt6_exception_hash(const struct in6_addr *dst,
1292 			      const struct in6_addr *src)
1293 {
1294 	static u32 seed __read_mostly;
1295 	u32 val;
1296 
1297 	net_get_random_once(&seed, sizeof(seed));
1298 	val = jhash(dst, sizeof(*dst), seed);
1299 
1300 #ifdef CONFIG_IPV6_SUBTREES
1301 	if (src)
1302 		val = jhash(src, sizeof(*src), val);
1303 #endif
1304 	return hash_32(val, FIB6_EXCEPTION_BUCKET_SIZE_SHIFT);
1305 }
1306 
1307 /* Helper function to find the cached rt in the hash table
1308  * and update bucket pointer to point to the bucket for this
1309  * (daddr, saddr) pair
1310  * Caller must hold rt6_exception_lock
1311  */
1312 static struct rt6_exception *
1313 __rt6_find_exception_spinlock(struct rt6_exception_bucket **bucket,
1314 			      const struct in6_addr *daddr,
1315 			      const struct in6_addr *saddr)
1316 {
1317 	struct rt6_exception *rt6_ex;
1318 	u32 hval;
1319 
1320 	if (!(*bucket) || !daddr)
1321 		return NULL;
1322 
1323 	hval = rt6_exception_hash(daddr, saddr);
1324 	*bucket += hval;
1325 
1326 	hlist_for_each_entry(rt6_ex, &(*bucket)->chain, hlist) {
1327 		struct rt6_info *rt6 = rt6_ex->rt6i;
1328 		bool matched = ipv6_addr_equal(daddr, &rt6->rt6i_dst.addr);
1329 
1330 #ifdef CONFIG_IPV6_SUBTREES
1331 		if (matched && saddr)
1332 			matched = ipv6_addr_equal(saddr, &rt6->rt6i_src.addr);
1333 #endif
1334 		if (matched)
1335 			return rt6_ex;
1336 	}
1337 	return NULL;
1338 }
1339 
1340 /* Helper function to find the cached rt in the hash table
1341  * and update bucket pointer to point to the bucket for this
1342  * (daddr, saddr) pair
1343  * Caller must hold rcu_read_lock()
1344  */
1345 static struct rt6_exception *
1346 __rt6_find_exception_rcu(struct rt6_exception_bucket **bucket,
1347 			 const struct in6_addr *daddr,
1348 			 const struct in6_addr *saddr)
1349 {
1350 	struct rt6_exception *rt6_ex;
1351 	u32 hval;
1352 
1353 	WARN_ON_ONCE(!rcu_read_lock_held());
1354 
1355 	if (!(*bucket) || !daddr)
1356 		return NULL;
1357 
1358 	hval = rt6_exception_hash(daddr, saddr);
1359 	*bucket += hval;
1360 
1361 	hlist_for_each_entry_rcu(rt6_ex, &(*bucket)->chain, hlist) {
1362 		struct rt6_info *rt6 = rt6_ex->rt6i;
1363 		bool matched = ipv6_addr_equal(daddr, &rt6->rt6i_dst.addr);
1364 
1365 #ifdef CONFIG_IPV6_SUBTREES
1366 		if (matched && saddr)
1367 			matched = ipv6_addr_equal(saddr, &rt6->rt6i_src.addr);
1368 #endif
1369 		if (matched)
1370 			return rt6_ex;
1371 	}
1372 	return NULL;
1373 }
1374 
1375 static unsigned int fib6_mtu(const struct fib6_info *rt)
1376 {
1377 	unsigned int mtu;
1378 
1379 	if (rt->fib6_pmtu) {
1380 		mtu = rt->fib6_pmtu;
1381 	} else {
1382 		struct net_device *dev = fib6_info_nh_dev(rt);
1383 		struct inet6_dev *idev;
1384 
1385 		rcu_read_lock();
1386 		idev = __in6_dev_get(dev);
1387 		mtu = idev->cnf.mtu6;
1388 		rcu_read_unlock();
1389 	}
1390 
1391 	mtu = min_t(unsigned int, mtu, IP6_MAX_MTU);
1392 
1393 	return mtu - lwtunnel_headroom(rt->fib6_nh.nh_lwtstate, mtu);
1394 }
1395 
1396 static int rt6_insert_exception(struct rt6_info *nrt,
1397 				struct fib6_info *ort)
1398 {
1399 	struct net *net = dev_net(nrt->dst.dev);
1400 	struct rt6_exception_bucket *bucket;
1401 	struct in6_addr *src_key = NULL;
1402 	struct rt6_exception *rt6_ex;
1403 	int err = 0;
1404 
1405 	spin_lock_bh(&rt6_exception_lock);
1406 
1407 	if (ort->exception_bucket_flushed) {
1408 		err = -EINVAL;
1409 		goto out;
1410 	}
1411 
1412 	bucket = rcu_dereference_protected(ort->rt6i_exception_bucket,
1413 					lockdep_is_held(&rt6_exception_lock));
1414 	if (!bucket) {
1415 		bucket = kcalloc(FIB6_EXCEPTION_BUCKET_SIZE, sizeof(*bucket),
1416 				 GFP_ATOMIC);
1417 		if (!bucket) {
1418 			err = -ENOMEM;
1419 			goto out;
1420 		}
1421 		rcu_assign_pointer(ort->rt6i_exception_bucket, bucket);
1422 	}
1423 
1424 #ifdef CONFIG_IPV6_SUBTREES
1425 	/* rt6i_src.plen != 0 indicates ort is in subtree
1426 	 * and exception table is indexed by a hash of
1427 	 * both rt6i_dst and rt6i_src.
1428 	 * Otherwise, the exception table is indexed by
1429 	 * a hash of only rt6i_dst.
1430 	 */
1431 	if (ort->fib6_src.plen)
1432 		src_key = &nrt->rt6i_src.addr;
1433 #endif
1434 
1435 	/* Update rt6i_prefsrc as it could be changed
1436 	 * in rt6_remove_prefsrc()
1437 	 */
1438 	nrt->rt6i_prefsrc = ort->fib6_prefsrc;
1439 	/* rt6_mtu_change() might lower mtu on ort.
1440 	 * Only insert this exception route if its mtu
1441 	 * is less than ort's mtu value.
1442 	 */
1443 	if (dst_metric_raw(&nrt->dst, RTAX_MTU) >= fib6_mtu(ort)) {
1444 		err = -EINVAL;
1445 		goto out;
1446 	}
1447 
1448 	rt6_ex = __rt6_find_exception_spinlock(&bucket, &nrt->rt6i_dst.addr,
1449 					       src_key);
1450 	if (rt6_ex)
1451 		rt6_remove_exception(bucket, rt6_ex);
1452 
1453 	rt6_ex = kzalloc(sizeof(*rt6_ex), GFP_ATOMIC);
1454 	if (!rt6_ex) {
1455 		err = -ENOMEM;
1456 		goto out;
1457 	}
1458 	rt6_ex->rt6i = nrt;
1459 	rt6_ex->stamp = jiffies;
1460 	hlist_add_head_rcu(&rt6_ex->hlist, &bucket->chain);
1461 	bucket->depth++;
1462 	net->ipv6.rt6_stats->fib_rt_cache++;
1463 
1464 	if (bucket->depth > FIB6_MAX_DEPTH)
1465 		rt6_exception_remove_oldest(bucket);
1466 
1467 out:
1468 	spin_unlock_bh(&rt6_exception_lock);
1469 
1470 	/* Update fn->fn_sernum to invalidate all cached dst */
1471 	if (!err) {
1472 		spin_lock_bh(&ort->fib6_table->tb6_lock);
1473 		fib6_update_sernum(net, ort);
1474 		spin_unlock_bh(&ort->fib6_table->tb6_lock);
1475 		fib6_force_start_gc(net);
1476 	}
1477 
1478 	return err;
1479 }
1480 
1481 void rt6_flush_exceptions(struct fib6_info *rt)
1482 {
1483 	struct rt6_exception_bucket *bucket;
1484 	struct rt6_exception *rt6_ex;
1485 	struct hlist_node *tmp;
1486 	int i;
1487 
1488 	spin_lock_bh(&rt6_exception_lock);
1489 	/* Prevent rt6_insert_exception() to recreate the bucket list */
1490 	rt->exception_bucket_flushed = 1;
1491 
1492 	bucket = rcu_dereference_protected(rt->rt6i_exception_bucket,
1493 				    lockdep_is_held(&rt6_exception_lock));
1494 	if (!bucket)
1495 		goto out;
1496 
1497 	for (i = 0; i < FIB6_EXCEPTION_BUCKET_SIZE; i++) {
1498 		hlist_for_each_entry_safe(rt6_ex, tmp, &bucket->chain, hlist)
1499 			rt6_remove_exception(bucket, rt6_ex);
1500 		WARN_ON_ONCE(bucket->depth);
1501 		bucket++;
1502 	}
1503 
1504 out:
1505 	spin_unlock_bh(&rt6_exception_lock);
1506 }
1507 
1508 /* Find cached rt in the hash table inside passed in rt
1509  * Caller has to hold rcu_read_lock()
1510  */
1511 static struct rt6_info *rt6_find_cached_rt(struct fib6_info *rt,
1512 					   struct in6_addr *daddr,
1513 					   struct in6_addr *saddr)
1514 {
1515 	struct rt6_exception_bucket *bucket;
1516 	struct in6_addr *src_key = NULL;
1517 	struct rt6_exception *rt6_ex;
1518 	struct rt6_info *res = NULL;
1519 
1520 	bucket = rcu_dereference(rt->rt6i_exception_bucket);
1521 
1522 #ifdef CONFIG_IPV6_SUBTREES
1523 	/* rt6i_src.plen != 0 indicates rt is in subtree
1524 	 * and exception table is indexed by a hash of
1525 	 * both rt6i_dst and rt6i_src.
1526 	 * Otherwise, the exception table is indexed by
1527 	 * a hash of only rt6i_dst.
1528 	 */
1529 	if (rt->fib6_src.plen)
1530 		src_key = saddr;
1531 #endif
1532 	rt6_ex = __rt6_find_exception_rcu(&bucket, daddr, src_key);
1533 
1534 	if (rt6_ex && !rt6_check_expired(rt6_ex->rt6i))
1535 		res = rt6_ex->rt6i;
1536 
1537 	return res;
1538 }
1539 
1540 /* Remove the passed in cached rt from the hash table that contains it */
1541 static int rt6_remove_exception_rt(struct rt6_info *rt)
1542 {
1543 	struct rt6_exception_bucket *bucket;
1544 	struct in6_addr *src_key = NULL;
1545 	struct rt6_exception *rt6_ex;
1546 	struct fib6_info *from;
1547 	int err;
1548 
1549 	from = rcu_dereference(rt->from);
1550 	if (!from ||
1551 	    !(rt->rt6i_flags & RTF_CACHE))
1552 		return -EINVAL;
1553 
1554 	if (!rcu_access_pointer(from->rt6i_exception_bucket))
1555 		return -ENOENT;
1556 
1557 	spin_lock_bh(&rt6_exception_lock);
1558 	bucket = rcu_dereference_protected(from->rt6i_exception_bucket,
1559 				    lockdep_is_held(&rt6_exception_lock));
1560 #ifdef CONFIG_IPV6_SUBTREES
1561 	/* rt6i_src.plen != 0 indicates 'from' is in subtree
1562 	 * and exception table is indexed by a hash of
1563 	 * both rt6i_dst and rt6i_src.
1564 	 * Otherwise, the exception table is indexed by
1565 	 * a hash of only rt6i_dst.
1566 	 */
1567 	if (from->fib6_src.plen)
1568 		src_key = &rt->rt6i_src.addr;
1569 #endif
1570 	rt6_ex = __rt6_find_exception_spinlock(&bucket,
1571 					       &rt->rt6i_dst.addr,
1572 					       src_key);
1573 	if (rt6_ex) {
1574 		rt6_remove_exception(bucket, rt6_ex);
1575 		err = 0;
1576 	} else {
1577 		err = -ENOENT;
1578 	}
1579 
1580 	spin_unlock_bh(&rt6_exception_lock);
1581 	return err;
1582 }
1583 
1584 /* Find rt6_ex which contains the passed in rt cache and
1585  * refresh its stamp
1586  */
1587 static void rt6_update_exception_stamp_rt(struct rt6_info *rt)
1588 {
1589 	struct rt6_exception_bucket *bucket;
1590 	struct fib6_info *from = rt->from;
1591 	struct in6_addr *src_key = NULL;
1592 	struct rt6_exception *rt6_ex;
1593 
1594 	if (!from ||
1595 	    !(rt->rt6i_flags & RTF_CACHE))
1596 		return;
1597 
1598 	rcu_read_lock();
1599 	bucket = rcu_dereference(from->rt6i_exception_bucket);
1600 
1601 #ifdef CONFIG_IPV6_SUBTREES
1602 	/* rt6i_src.plen != 0 indicates 'from' is in subtree
1603 	 * and exception table is indexed by a hash of
1604 	 * both rt6i_dst and rt6i_src.
1605 	 * Otherwise, the exception table is indexed by
1606 	 * a hash of only rt6i_dst.
1607 	 */
1608 	if (from->fib6_src.plen)
1609 		src_key = &rt->rt6i_src.addr;
1610 #endif
1611 	rt6_ex = __rt6_find_exception_rcu(&bucket,
1612 					  &rt->rt6i_dst.addr,
1613 					  src_key);
1614 	if (rt6_ex)
1615 		rt6_ex->stamp = jiffies;
1616 
1617 	rcu_read_unlock();
1618 }
1619 
1620 static void rt6_exceptions_remove_prefsrc(struct fib6_info *rt)
1621 {
1622 	struct rt6_exception_bucket *bucket;
1623 	struct rt6_exception *rt6_ex;
1624 	int i;
1625 
1626 	bucket = rcu_dereference_protected(rt->rt6i_exception_bucket,
1627 					lockdep_is_held(&rt6_exception_lock));
1628 
1629 	if (bucket) {
1630 		for (i = 0; i < FIB6_EXCEPTION_BUCKET_SIZE; i++) {
1631 			hlist_for_each_entry(rt6_ex, &bucket->chain, hlist) {
1632 				rt6_ex->rt6i->rt6i_prefsrc.plen = 0;
1633 			}
1634 			bucket++;
1635 		}
1636 	}
1637 }
1638 
1639 static bool rt6_mtu_change_route_allowed(struct inet6_dev *idev,
1640 					 struct rt6_info *rt, int mtu)
1641 {
1642 	/* If the new MTU is lower than the route PMTU, this new MTU will be the
1643 	 * lowest MTU in the path: always allow updating the route PMTU to
1644 	 * reflect PMTU decreases.
1645 	 *
1646 	 * If the new MTU is higher, and the route PMTU is equal to the local
1647 	 * MTU, this means the old MTU is the lowest in the path, so allow
1648 	 * updating it: if other nodes now have lower MTUs, PMTU discovery will
1649 	 * handle this.
1650 	 */
1651 
1652 	if (dst_mtu(&rt->dst) >= mtu)
1653 		return true;
1654 
1655 	if (dst_mtu(&rt->dst) == idev->cnf.mtu6)
1656 		return true;
1657 
1658 	return false;
1659 }
1660 
1661 static void rt6_exceptions_update_pmtu(struct inet6_dev *idev,
1662 				       struct fib6_info *rt, int mtu)
1663 {
1664 	struct rt6_exception_bucket *bucket;
1665 	struct rt6_exception *rt6_ex;
1666 	int i;
1667 
1668 	bucket = rcu_dereference_protected(rt->rt6i_exception_bucket,
1669 					lockdep_is_held(&rt6_exception_lock));
1670 
1671 	if (!bucket)
1672 		return;
1673 
1674 	for (i = 0; i < FIB6_EXCEPTION_BUCKET_SIZE; i++) {
1675 		hlist_for_each_entry(rt6_ex, &bucket->chain, hlist) {
1676 			struct rt6_info *entry = rt6_ex->rt6i;
1677 
1678 			/* For RTF_CACHE with rt6i_pmtu == 0 (i.e. a redirected
1679 			 * route), the metrics of its rt->from have already
1680 			 * been updated.
1681 			 */
1682 			if (dst_metric_raw(&entry->dst, RTAX_MTU) &&
1683 			    rt6_mtu_change_route_allowed(idev, entry, mtu))
1684 				dst_metric_set(&entry->dst, RTAX_MTU, mtu);
1685 		}
1686 		bucket++;
1687 	}
1688 }
1689 
1690 #define RTF_CACHE_GATEWAY	(RTF_GATEWAY | RTF_CACHE)
1691 
1692 static void rt6_exceptions_clean_tohost(struct fib6_info *rt,
1693 					struct in6_addr *gateway)
1694 {
1695 	struct rt6_exception_bucket *bucket;
1696 	struct rt6_exception *rt6_ex;
1697 	struct hlist_node *tmp;
1698 	int i;
1699 
1700 	if (!rcu_access_pointer(rt->rt6i_exception_bucket))
1701 		return;
1702 
1703 	spin_lock_bh(&rt6_exception_lock);
1704 	bucket = rcu_dereference_protected(rt->rt6i_exception_bucket,
1705 				     lockdep_is_held(&rt6_exception_lock));
1706 
1707 	if (bucket) {
1708 		for (i = 0; i < FIB6_EXCEPTION_BUCKET_SIZE; i++) {
1709 			hlist_for_each_entry_safe(rt6_ex, tmp,
1710 						  &bucket->chain, hlist) {
1711 				struct rt6_info *entry = rt6_ex->rt6i;
1712 
1713 				if ((entry->rt6i_flags & RTF_CACHE_GATEWAY) ==
1714 				    RTF_CACHE_GATEWAY &&
1715 				    ipv6_addr_equal(gateway,
1716 						    &entry->rt6i_gateway)) {
1717 					rt6_remove_exception(bucket, rt6_ex);
1718 				}
1719 			}
1720 			bucket++;
1721 		}
1722 	}
1723 
1724 	spin_unlock_bh(&rt6_exception_lock);
1725 }
1726 
1727 static void rt6_age_examine_exception(struct rt6_exception_bucket *bucket,
1728 				      struct rt6_exception *rt6_ex,
1729 				      struct fib6_gc_args *gc_args,
1730 				      unsigned long now)
1731 {
1732 	struct rt6_info *rt = rt6_ex->rt6i;
1733 
1734 	/* we are pruning and obsoleting aged-out and non gateway exceptions
1735 	 * even if others have still references to them, so that on next
1736 	 * dst_check() such references can be dropped.
1737 	 * EXPIRES exceptions - e.g. pmtu-generated ones are pruned when
1738 	 * expired, independently from their aging, as per RFC 8201 section 4
1739 	 */
1740 	if (!(rt->rt6i_flags & RTF_EXPIRES)) {
1741 		if (time_after_eq(now, rt->dst.lastuse + gc_args->timeout)) {
1742 			RT6_TRACE("aging clone %p\n", rt);
1743 			rt6_remove_exception(bucket, rt6_ex);
1744 			return;
1745 		}
1746 	} else if (time_after(jiffies, rt->dst.expires)) {
1747 		RT6_TRACE("purging expired route %p\n", rt);
1748 		rt6_remove_exception(bucket, rt6_ex);
1749 		return;
1750 	}
1751 
1752 	if (rt->rt6i_flags & RTF_GATEWAY) {
1753 		struct neighbour *neigh;
1754 		__u8 neigh_flags = 0;
1755 
1756 		neigh = __ipv6_neigh_lookup_noref(rt->dst.dev, &rt->rt6i_gateway);
1757 		if (neigh)
1758 			neigh_flags = neigh->flags;
1759 
1760 		if (!(neigh_flags & NTF_ROUTER)) {
1761 			RT6_TRACE("purging route %p via non-router but gateway\n",
1762 				  rt);
1763 			rt6_remove_exception(bucket, rt6_ex);
1764 			return;
1765 		}
1766 	}
1767 
1768 	gc_args->more++;
1769 }
1770 
1771 void rt6_age_exceptions(struct fib6_info *rt,
1772 			struct fib6_gc_args *gc_args,
1773 			unsigned long now)
1774 {
1775 	struct rt6_exception_bucket *bucket;
1776 	struct rt6_exception *rt6_ex;
1777 	struct hlist_node *tmp;
1778 	int i;
1779 
1780 	if (!rcu_access_pointer(rt->rt6i_exception_bucket))
1781 		return;
1782 
1783 	rcu_read_lock_bh();
1784 	spin_lock(&rt6_exception_lock);
1785 	bucket = rcu_dereference_protected(rt->rt6i_exception_bucket,
1786 				    lockdep_is_held(&rt6_exception_lock));
1787 
1788 	if (bucket) {
1789 		for (i = 0; i < FIB6_EXCEPTION_BUCKET_SIZE; i++) {
1790 			hlist_for_each_entry_safe(rt6_ex, tmp,
1791 						  &bucket->chain, hlist) {
1792 				rt6_age_examine_exception(bucket, rt6_ex,
1793 							  gc_args, now);
1794 			}
1795 			bucket++;
1796 		}
1797 	}
1798 	spin_unlock(&rt6_exception_lock);
1799 	rcu_read_unlock_bh();
1800 }
1801 
1802 struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table,
1803 			       int oif, struct flowi6 *fl6,
1804 			       const struct sk_buff *skb, int flags)
1805 {
1806 	struct fib6_node *fn, *saved_fn;
1807 	struct fib6_info *f6i;
1808 	struct rt6_info *rt;
1809 	int strict = 0;
1810 
1811 	strict |= flags & RT6_LOOKUP_F_IFACE;
1812 	strict |= flags & RT6_LOOKUP_F_IGNORE_LINKSTATE;
1813 	if (net->ipv6.devconf_all->forwarding == 0)
1814 		strict |= RT6_LOOKUP_F_REACHABLE;
1815 
1816 	rcu_read_lock();
1817 
1818 	fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
1819 	saved_fn = fn;
1820 
1821 	if (fl6->flowi6_flags & FLOWI_FLAG_SKIP_NH_OIF)
1822 		oif = 0;
1823 
1824 redo_rt6_select:
1825 	f6i = rt6_select(net, fn, oif, strict);
1826 	if (f6i->fib6_nsiblings)
1827 		f6i = rt6_multipath_select(net, f6i, fl6, oif, skb, strict);
1828 	if (f6i == net->ipv6.fib6_null_entry) {
1829 		fn = fib6_backtrack(fn, &fl6->saddr);
1830 		if (fn)
1831 			goto redo_rt6_select;
1832 		else if (strict & RT6_LOOKUP_F_REACHABLE) {
1833 			/* also consider unreachable route */
1834 			strict &= ~RT6_LOOKUP_F_REACHABLE;
1835 			fn = saved_fn;
1836 			goto redo_rt6_select;
1837 		}
1838 	}
1839 
1840 	if (f6i == net->ipv6.fib6_null_entry) {
1841 		rt = net->ipv6.ip6_null_entry;
1842 		rcu_read_unlock();
1843 		dst_hold(&rt->dst);
1844 		trace_fib6_table_lookup(net, rt, table, fl6);
1845 		return rt;
1846 	}
1847 
1848 	/*Search through exception table */
1849 	rt = rt6_find_cached_rt(f6i, &fl6->daddr, &fl6->saddr);
1850 	if (rt) {
1851 		if (ip6_hold_safe(net, &rt, true))
1852 			dst_use_noref(&rt->dst, jiffies);
1853 
1854 		rcu_read_unlock();
1855 		trace_fib6_table_lookup(net, rt, table, fl6);
1856 		return rt;
1857 	} else if (unlikely((fl6->flowi6_flags & FLOWI_FLAG_KNOWN_NH) &&
1858 			    !(f6i->fib6_flags & RTF_GATEWAY))) {
1859 		/* Create a RTF_CACHE clone which will not be
1860 		 * owned by the fib6 tree.  It is for the special case where
1861 		 * the daddr in the skb during the neighbor look-up is different
1862 		 * from the fl6->daddr used to look-up route here.
1863 		 */
1864 		struct rt6_info *uncached_rt;
1865 
1866 		uncached_rt = ip6_rt_cache_alloc(f6i, &fl6->daddr, NULL);
1867 
1868 		rcu_read_unlock();
1869 
1870 		if (uncached_rt) {
1871 			/* Uncached_rt's refcnt is taken during ip6_rt_cache_alloc()
1872 			 * No need for another dst_hold()
1873 			 */
1874 			rt6_uncached_list_add(uncached_rt);
1875 			atomic_inc(&net->ipv6.rt6_stats->fib_rt_uncache);
1876 		} else {
1877 			uncached_rt = net->ipv6.ip6_null_entry;
1878 			dst_hold(&uncached_rt->dst);
1879 		}
1880 
1881 		trace_fib6_table_lookup(net, uncached_rt, table, fl6);
1882 		return uncached_rt;
1883 
1884 	} else {
1885 		/* Get a percpu copy */
1886 
1887 		struct rt6_info *pcpu_rt;
1888 
1889 		local_bh_disable();
1890 		pcpu_rt = rt6_get_pcpu_route(f6i);
1891 
1892 		if (!pcpu_rt)
1893 			pcpu_rt = rt6_make_pcpu_route(net, f6i);
1894 
1895 		local_bh_enable();
1896 		rcu_read_unlock();
1897 		trace_fib6_table_lookup(net, pcpu_rt, table, fl6);
1898 		return pcpu_rt;
1899 	}
1900 }
1901 EXPORT_SYMBOL_GPL(ip6_pol_route);
1902 
1903 static struct rt6_info *ip6_pol_route_input(struct net *net,
1904 					    struct fib6_table *table,
1905 					    struct flowi6 *fl6,
1906 					    const struct sk_buff *skb,
1907 					    int flags)
1908 {
1909 	return ip6_pol_route(net, table, fl6->flowi6_iif, fl6, skb, flags);
1910 }
1911 
1912 struct dst_entry *ip6_route_input_lookup(struct net *net,
1913 					 struct net_device *dev,
1914 					 struct flowi6 *fl6,
1915 					 const struct sk_buff *skb,
1916 					 int flags)
1917 {
1918 	if (rt6_need_strict(&fl6->daddr) && dev->type != ARPHRD_PIMREG)
1919 		flags |= RT6_LOOKUP_F_IFACE;
1920 
1921 	return fib6_rule_lookup(net, fl6, skb, flags, ip6_pol_route_input);
1922 }
1923 EXPORT_SYMBOL_GPL(ip6_route_input_lookup);
1924 
1925 static void ip6_multipath_l3_keys(const struct sk_buff *skb,
1926 				  struct flow_keys *keys,
1927 				  struct flow_keys *flkeys)
1928 {
1929 	const struct ipv6hdr *outer_iph = ipv6_hdr(skb);
1930 	const struct ipv6hdr *key_iph = outer_iph;
1931 	struct flow_keys *_flkeys = flkeys;
1932 	const struct ipv6hdr *inner_iph;
1933 	const struct icmp6hdr *icmph;
1934 	struct ipv6hdr _inner_iph;
1935 	struct icmp6hdr _icmph;
1936 
1937 	if (likely(outer_iph->nexthdr != IPPROTO_ICMPV6))
1938 		goto out;
1939 
1940 	icmph = skb_header_pointer(skb, skb_transport_offset(skb),
1941 				   sizeof(_icmph), &_icmph);
1942 	if (!icmph)
1943 		goto out;
1944 
1945 	if (icmph->icmp6_type != ICMPV6_DEST_UNREACH &&
1946 	    icmph->icmp6_type != ICMPV6_PKT_TOOBIG &&
1947 	    icmph->icmp6_type != ICMPV6_TIME_EXCEED &&
1948 	    icmph->icmp6_type != ICMPV6_PARAMPROB)
1949 		goto out;
1950 
1951 	inner_iph = skb_header_pointer(skb,
1952 				       skb_transport_offset(skb) + sizeof(*icmph),
1953 				       sizeof(_inner_iph), &_inner_iph);
1954 	if (!inner_iph)
1955 		goto out;
1956 
1957 	key_iph = inner_iph;
1958 	_flkeys = NULL;
1959 out:
1960 	if (_flkeys) {
1961 		keys->addrs.v6addrs.src = _flkeys->addrs.v6addrs.src;
1962 		keys->addrs.v6addrs.dst = _flkeys->addrs.v6addrs.dst;
1963 		keys->tags.flow_label = _flkeys->tags.flow_label;
1964 		keys->basic.ip_proto = _flkeys->basic.ip_proto;
1965 	} else {
1966 		keys->addrs.v6addrs.src = key_iph->saddr;
1967 		keys->addrs.v6addrs.dst = key_iph->daddr;
1968 		keys->tags.flow_label = ip6_flowinfo(key_iph);
1969 		keys->basic.ip_proto = key_iph->nexthdr;
1970 	}
1971 }
1972 
1973 /* if skb is set it will be used and fl6 can be NULL */
1974 u32 rt6_multipath_hash(const struct net *net, const struct flowi6 *fl6,
1975 		       const struct sk_buff *skb, struct flow_keys *flkeys)
1976 {
1977 	struct flow_keys hash_keys;
1978 	u32 mhash;
1979 
1980 	switch (ip6_multipath_hash_policy(net)) {
1981 	case 0:
1982 		memset(&hash_keys, 0, sizeof(hash_keys));
1983 		hash_keys.control.addr_type = FLOW_DISSECTOR_KEY_IPV6_ADDRS;
1984 		if (skb) {
1985 			ip6_multipath_l3_keys(skb, &hash_keys, flkeys);
1986 		} else {
1987 			hash_keys.addrs.v6addrs.src = fl6->saddr;
1988 			hash_keys.addrs.v6addrs.dst = fl6->daddr;
1989 			hash_keys.tags.flow_label = (__force u32)fl6->flowlabel;
1990 			hash_keys.basic.ip_proto = fl6->flowi6_proto;
1991 		}
1992 		break;
1993 	case 1:
1994 		if (skb) {
1995 			unsigned int flag = FLOW_DISSECTOR_F_STOP_AT_ENCAP;
1996 			struct flow_keys keys;
1997 
1998 			/* short-circuit if we already have L4 hash present */
1999 			if (skb->l4_hash)
2000 				return skb_get_hash_raw(skb) >> 1;
2001 
2002 			memset(&hash_keys, 0, sizeof(hash_keys));
2003 
2004                         if (!flkeys) {
2005 				skb_flow_dissect_flow_keys(skb, &keys, flag);
2006 				flkeys = &keys;
2007 			}
2008 			hash_keys.control.addr_type = FLOW_DISSECTOR_KEY_IPV6_ADDRS;
2009 			hash_keys.addrs.v6addrs.src = flkeys->addrs.v6addrs.src;
2010 			hash_keys.addrs.v6addrs.dst = flkeys->addrs.v6addrs.dst;
2011 			hash_keys.ports.src = flkeys->ports.src;
2012 			hash_keys.ports.dst = flkeys->ports.dst;
2013 			hash_keys.basic.ip_proto = flkeys->basic.ip_proto;
2014 		} else {
2015 			memset(&hash_keys, 0, sizeof(hash_keys));
2016 			hash_keys.control.addr_type = FLOW_DISSECTOR_KEY_IPV6_ADDRS;
2017 			hash_keys.addrs.v6addrs.src = fl6->saddr;
2018 			hash_keys.addrs.v6addrs.dst = fl6->daddr;
2019 			hash_keys.ports.src = fl6->fl6_sport;
2020 			hash_keys.ports.dst = fl6->fl6_dport;
2021 			hash_keys.basic.ip_proto = fl6->flowi6_proto;
2022 		}
2023 		break;
2024 	}
2025 	mhash = flow_hash_from_keys(&hash_keys);
2026 
2027 	return mhash >> 1;
2028 }
2029 
2030 void ip6_route_input(struct sk_buff *skb)
2031 {
2032 	const struct ipv6hdr *iph = ipv6_hdr(skb);
2033 	struct net *net = dev_net(skb->dev);
2034 	int flags = RT6_LOOKUP_F_HAS_SADDR;
2035 	struct ip_tunnel_info *tun_info;
2036 	struct flowi6 fl6 = {
2037 		.flowi6_iif = skb->dev->ifindex,
2038 		.daddr = iph->daddr,
2039 		.saddr = iph->saddr,
2040 		.flowlabel = ip6_flowinfo(iph),
2041 		.flowi6_mark = skb->mark,
2042 		.flowi6_proto = iph->nexthdr,
2043 	};
2044 	struct flow_keys *flkeys = NULL, _flkeys;
2045 
2046 	tun_info = skb_tunnel_info(skb);
2047 	if (tun_info && !(tun_info->mode & IP_TUNNEL_INFO_TX))
2048 		fl6.flowi6_tun_key.tun_id = tun_info->key.tun_id;
2049 
2050 	if (fib6_rules_early_flow_dissect(net, skb, &fl6, &_flkeys))
2051 		flkeys = &_flkeys;
2052 
2053 	if (unlikely(fl6.flowi6_proto == IPPROTO_ICMPV6))
2054 		fl6.mp_hash = rt6_multipath_hash(net, &fl6, skb, flkeys);
2055 	skb_dst_drop(skb);
2056 	skb_dst_set(skb,
2057 		    ip6_route_input_lookup(net, skb->dev, &fl6, skb, flags));
2058 }
2059 
2060 static struct rt6_info *ip6_pol_route_output(struct net *net,
2061 					     struct fib6_table *table,
2062 					     struct flowi6 *fl6,
2063 					     const struct sk_buff *skb,
2064 					     int flags)
2065 {
2066 	return ip6_pol_route(net, table, fl6->flowi6_oif, fl6, skb, flags);
2067 }
2068 
2069 struct dst_entry *ip6_route_output_flags(struct net *net, const struct sock *sk,
2070 					 struct flowi6 *fl6, int flags)
2071 {
2072 	bool any_src;
2073 
2074 	if (rt6_need_strict(&fl6->daddr)) {
2075 		struct dst_entry *dst;
2076 
2077 		dst = l3mdev_link_scope_lookup(net, fl6);
2078 		if (dst)
2079 			return dst;
2080 	}
2081 
2082 	fl6->flowi6_iif = LOOPBACK_IFINDEX;
2083 
2084 	any_src = ipv6_addr_any(&fl6->saddr);
2085 	if ((sk && sk->sk_bound_dev_if) || rt6_need_strict(&fl6->daddr) ||
2086 	    (fl6->flowi6_oif && any_src))
2087 		flags |= RT6_LOOKUP_F_IFACE;
2088 
2089 	if (!any_src)
2090 		flags |= RT6_LOOKUP_F_HAS_SADDR;
2091 	else if (sk)
2092 		flags |= rt6_srcprefs2flags(inet6_sk(sk)->srcprefs);
2093 
2094 	return fib6_rule_lookup(net, fl6, NULL, flags, ip6_pol_route_output);
2095 }
2096 EXPORT_SYMBOL_GPL(ip6_route_output_flags);
2097 
2098 struct dst_entry *ip6_blackhole_route(struct net *net, struct dst_entry *dst_orig)
2099 {
2100 	struct rt6_info *rt, *ort = (struct rt6_info *) dst_orig;
2101 	struct net_device *loopback_dev = net->loopback_dev;
2102 	struct dst_entry *new = NULL;
2103 
2104 	rt = dst_alloc(&ip6_dst_blackhole_ops, loopback_dev, 1,
2105 		       DST_OBSOLETE_DEAD, 0);
2106 	if (rt) {
2107 		rt6_info_init(rt);
2108 		atomic_inc(&net->ipv6.rt6_stats->fib_rt_alloc);
2109 
2110 		new = &rt->dst;
2111 		new->__use = 1;
2112 		new->input = dst_discard;
2113 		new->output = dst_discard_out;
2114 
2115 		dst_copy_metrics(new, &ort->dst);
2116 
2117 		rt->rt6i_idev = in6_dev_get(loopback_dev);
2118 		rt->rt6i_gateway = ort->rt6i_gateway;
2119 		rt->rt6i_flags = ort->rt6i_flags & ~RTF_PCPU;
2120 
2121 		memcpy(&rt->rt6i_dst, &ort->rt6i_dst, sizeof(struct rt6key));
2122 #ifdef CONFIG_IPV6_SUBTREES
2123 		memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
2124 #endif
2125 	}
2126 
2127 	dst_release(dst_orig);
2128 	return new ? new : ERR_PTR(-ENOMEM);
2129 }
2130 
2131 /*
2132  *	Destination cache support functions
2133  */
2134 
2135 static bool fib6_check(struct fib6_info *f6i, u32 cookie)
2136 {
2137 	u32 rt_cookie = 0;
2138 
2139 	if (!fib6_get_cookie_safe(f6i, &rt_cookie) || rt_cookie != cookie)
2140 		return false;
2141 
2142 	if (fib6_check_expired(f6i))
2143 		return false;
2144 
2145 	return true;
2146 }
2147 
2148 static struct dst_entry *rt6_check(struct rt6_info *rt,
2149 				   struct fib6_info *from,
2150 				   u32 cookie)
2151 {
2152 	u32 rt_cookie = 0;
2153 
2154 	if ((from && !fib6_get_cookie_safe(from, &rt_cookie)) ||
2155 	    rt_cookie != cookie)
2156 		return NULL;
2157 
2158 	if (rt6_check_expired(rt))
2159 		return NULL;
2160 
2161 	return &rt->dst;
2162 }
2163 
2164 static struct dst_entry *rt6_dst_from_check(struct rt6_info *rt,
2165 					    struct fib6_info *from,
2166 					    u32 cookie)
2167 {
2168 	if (!__rt6_check_expired(rt) &&
2169 	    rt->dst.obsolete == DST_OBSOLETE_FORCE_CHK &&
2170 	    fib6_check(from, cookie))
2171 		return &rt->dst;
2172 	else
2173 		return NULL;
2174 }
2175 
2176 static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie)
2177 {
2178 	struct dst_entry *dst_ret;
2179 	struct fib6_info *from;
2180 	struct rt6_info *rt;
2181 
2182 	rt = container_of(dst, struct rt6_info, dst);
2183 
2184 	rcu_read_lock();
2185 
2186 	/* All IPV6 dsts are created with ->obsolete set to the value
2187 	 * DST_OBSOLETE_FORCE_CHK which forces validation calls down
2188 	 * into this function always.
2189 	 */
2190 
2191 	from = rcu_dereference(rt->from);
2192 
2193 	if (from && (rt->rt6i_flags & RTF_PCPU ||
2194 	    unlikely(!list_empty(&rt->rt6i_uncached))))
2195 		dst_ret = rt6_dst_from_check(rt, from, cookie);
2196 	else
2197 		dst_ret = rt6_check(rt, from, cookie);
2198 
2199 	rcu_read_unlock();
2200 
2201 	return dst_ret;
2202 }
2203 
2204 static struct dst_entry *ip6_negative_advice(struct dst_entry *dst)
2205 {
2206 	struct rt6_info *rt = (struct rt6_info *) dst;
2207 
2208 	if (rt) {
2209 		if (rt->rt6i_flags & RTF_CACHE) {
2210 			rcu_read_lock();
2211 			if (rt6_check_expired(rt)) {
2212 				rt6_remove_exception_rt(rt);
2213 				dst = NULL;
2214 			}
2215 			rcu_read_unlock();
2216 		} else {
2217 			dst_release(dst);
2218 			dst = NULL;
2219 		}
2220 	}
2221 	return dst;
2222 }
2223 
2224 static void ip6_link_failure(struct sk_buff *skb)
2225 {
2226 	struct rt6_info *rt;
2227 
2228 	icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_ADDR_UNREACH, 0);
2229 
2230 	rt = (struct rt6_info *) skb_dst(skb);
2231 	if (rt) {
2232 		rcu_read_lock();
2233 		if (rt->rt6i_flags & RTF_CACHE) {
2234 			if (dst_hold_safe(&rt->dst))
2235 				rt6_remove_exception_rt(rt);
2236 		} else {
2237 			struct fib6_info *from;
2238 			struct fib6_node *fn;
2239 
2240 			from = rcu_dereference(rt->from);
2241 			if (from) {
2242 				fn = rcu_dereference(from->fib6_node);
2243 				if (fn && (rt->rt6i_flags & RTF_DEFAULT))
2244 					fn->fn_sernum = -1;
2245 			}
2246 		}
2247 		rcu_read_unlock();
2248 	}
2249 }
2250 
2251 static void rt6_update_expires(struct rt6_info *rt0, int timeout)
2252 {
2253 	if (!(rt0->rt6i_flags & RTF_EXPIRES)) {
2254 		struct fib6_info *from;
2255 
2256 		rcu_read_lock();
2257 		from = rcu_dereference(rt0->from);
2258 		if (from)
2259 			rt0->dst.expires = from->expires;
2260 		rcu_read_unlock();
2261 	}
2262 
2263 	dst_set_expires(&rt0->dst, timeout);
2264 	rt0->rt6i_flags |= RTF_EXPIRES;
2265 }
2266 
2267 static void rt6_do_update_pmtu(struct rt6_info *rt, u32 mtu)
2268 {
2269 	struct net *net = dev_net(rt->dst.dev);
2270 
2271 	dst_metric_set(&rt->dst, RTAX_MTU, mtu);
2272 	rt->rt6i_flags |= RTF_MODIFIED;
2273 	rt6_update_expires(rt, net->ipv6.sysctl.ip6_rt_mtu_expires);
2274 }
2275 
2276 static bool rt6_cache_allowed_for_pmtu(const struct rt6_info *rt)
2277 {
2278 	bool from_set;
2279 
2280 	rcu_read_lock();
2281 	from_set = !!rcu_dereference(rt->from);
2282 	rcu_read_unlock();
2283 
2284 	return !(rt->rt6i_flags & RTF_CACHE) &&
2285 		(rt->rt6i_flags & RTF_PCPU || from_set);
2286 }
2287 
2288 static void __ip6_rt_update_pmtu(struct dst_entry *dst, const struct sock *sk,
2289 				 const struct ipv6hdr *iph, u32 mtu)
2290 {
2291 	const struct in6_addr *daddr, *saddr;
2292 	struct rt6_info *rt6 = (struct rt6_info *)dst;
2293 
2294 	if (rt6->rt6i_flags & RTF_LOCAL)
2295 		return;
2296 
2297 	if (dst_metric_locked(dst, RTAX_MTU))
2298 		return;
2299 
2300 	if (iph) {
2301 		daddr = &iph->daddr;
2302 		saddr = &iph->saddr;
2303 	} else if (sk) {
2304 		daddr = &sk->sk_v6_daddr;
2305 		saddr = &inet6_sk(sk)->saddr;
2306 	} else {
2307 		daddr = NULL;
2308 		saddr = NULL;
2309 	}
2310 	dst_confirm_neigh(dst, daddr);
2311 	mtu = max_t(u32, mtu, IPV6_MIN_MTU);
2312 	if (mtu >= dst_mtu(dst))
2313 		return;
2314 
2315 	if (!rt6_cache_allowed_for_pmtu(rt6)) {
2316 		rt6_do_update_pmtu(rt6, mtu);
2317 		/* update rt6_ex->stamp for cache */
2318 		if (rt6->rt6i_flags & RTF_CACHE)
2319 			rt6_update_exception_stamp_rt(rt6);
2320 	} else if (daddr) {
2321 		struct fib6_info *from;
2322 		struct rt6_info *nrt6;
2323 
2324 		rcu_read_lock();
2325 		from = rcu_dereference(rt6->from);
2326 		nrt6 = ip6_rt_cache_alloc(from, daddr, saddr);
2327 		if (nrt6) {
2328 			rt6_do_update_pmtu(nrt6, mtu);
2329 			if (rt6_insert_exception(nrt6, from))
2330 				dst_release_immediate(&nrt6->dst);
2331 		}
2332 		rcu_read_unlock();
2333 	}
2334 }
2335 
2336 static void ip6_rt_update_pmtu(struct dst_entry *dst, struct sock *sk,
2337 			       struct sk_buff *skb, u32 mtu)
2338 {
2339 	__ip6_rt_update_pmtu(dst, sk, skb ? ipv6_hdr(skb) : NULL, mtu);
2340 }
2341 
2342 void ip6_update_pmtu(struct sk_buff *skb, struct net *net, __be32 mtu,
2343 		     int oif, u32 mark, kuid_t uid)
2344 {
2345 	const struct ipv6hdr *iph = (struct ipv6hdr *) skb->data;
2346 	struct dst_entry *dst;
2347 	struct flowi6 fl6;
2348 
2349 	memset(&fl6, 0, sizeof(fl6));
2350 	fl6.flowi6_oif = oif;
2351 	fl6.flowi6_mark = mark ? mark : IP6_REPLY_MARK(net, skb->mark);
2352 	fl6.daddr = iph->daddr;
2353 	fl6.saddr = iph->saddr;
2354 	fl6.flowlabel = ip6_flowinfo(iph);
2355 	fl6.flowi6_uid = uid;
2356 
2357 	dst = ip6_route_output(net, NULL, &fl6);
2358 	if (!dst->error)
2359 		__ip6_rt_update_pmtu(dst, NULL, iph, ntohl(mtu));
2360 	dst_release(dst);
2361 }
2362 EXPORT_SYMBOL_GPL(ip6_update_pmtu);
2363 
2364 void ip6_sk_update_pmtu(struct sk_buff *skb, struct sock *sk, __be32 mtu)
2365 {
2366 	struct dst_entry *dst;
2367 
2368 	ip6_update_pmtu(skb, sock_net(sk), mtu,
2369 			sk->sk_bound_dev_if, sk->sk_mark, sk->sk_uid);
2370 
2371 	dst = __sk_dst_get(sk);
2372 	if (!dst || !dst->obsolete ||
2373 	    dst->ops->check(dst, inet6_sk(sk)->dst_cookie))
2374 		return;
2375 
2376 	bh_lock_sock(sk);
2377 	if (!sock_owned_by_user(sk) && !ipv6_addr_v4mapped(&sk->sk_v6_daddr))
2378 		ip6_datagram_dst_update(sk, false);
2379 	bh_unlock_sock(sk);
2380 }
2381 EXPORT_SYMBOL_GPL(ip6_sk_update_pmtu);
2382 
2383 void ip6_sk_dst_store_flow(struct sock *sk, struct dst_entry *dst,
2384 			   const struct flowi6 *fl6)
2385 {
2386 #ifdef CONFIG_IPV6_SUBTREES
2387 	struct ipv6_pinfo *np = inet6_sk(sk);
2388 #endif
2389 
2390 	ip6_dst_store(sk, dst,
2391 		      ipv6_addr_equal(&fl6->daddr, &sk->sk_v6_daddr) ?
2392 		      &sk->sk_v6_daddr : NULL,
2393 #ifdef CONFIG_IPV6_SUBTREES
2394 		      ipv6_addr_equal(&fl6->saddr, &np->saddr) ?
2395 		      &np->saddr :
2396 #endif
2397 		      NULL);
2398 }
2399 
2400 /* Handle redirects */
2401 struct ip6rd_flowi {
2402 	struct flowi6 fl6;
2403 	struct in6_addr gateway;
2404 };
2405 
2406 static struct rt6_info *__ip6_route_redirect(struct net *net,
2407 					     struct fib6_table *table,
2408 					     struct flowi6 *fl6,
2409 					     const struct sk_buff *skb,
2410 					     int flags)
2411 {
2412 	struct ip6rd_flowi *rdfl = (struct ip6rd_flowi *)fl6;
2413 	struct rt6_info *ret = NULL, *rt_cache;
2414 	struct fib6_info *rt;
2415 	struct fib6_node *fn;
2416 
2417 	/* Get the "current" route for this destination and
2418 	 * check if the redirect has come from appropriate router.
2419 	 *
2420 	 * RFC 4861 specifies that redirects should only be
2421 	 * accepted if they come from the nexthop to the target.
2422 	 * Due to the way the routes are chosen, this notion
2423 	 * is a bit fuzzy and one might need to check all possible
2424 	 * routes.
2425 	 */
2426 
2427 	rcu_read_lock();
2428 	fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
2429 restart:
2430 	for_each_fib6_node_rt_rcu(fn) {
2431 		if (rt->fib6_nh.nh_flags & RTNH_F_DEAD)
2432 			continue;
2433 		if (fib6_check_expired(rt))
2434 			continue;
2435 		if (rt->fib6_flags & RTF_REJECT)
2436 			break;
2437 		if (!(rt->fib6_flags & RTF_GATEWAY))
2438 			continue;
2439 		if (fl6->flowi6_oif != rt->fib6_nh.nh_dev->ifindex)
2440 			continue;
2441 		/* rt_cache's gateway might be different from its 'parent'
2442 		 * in the case of an ip redirect.
2443 		 * So we keep searching in the exception table if the gateway
2444 		 * is different.
2445 		 */
2446 		if (!ipv6_addr_equal(&rdfl->gateway, &rt->fib6_nh.nh_gw)) {
2447 			rt_cache = rt6_find_cached_rt(rt,
2448 						      &fl6->daddr,
2449 						      &fl6->saddr);
2450 			if (rt_cache &&
2451 			    ipv6_addr_equal(&rdfl->gateway,
2452 					    &rt_cache->rt6i_gateway)) {
2453 				ret = rt_cache;
2454 				break;
2455 			}
2456 			continue;
2457 		}
2458 		break;
2459 	}
2460 
2461 	if (!rt)
2462 		rt = net->ipv6.fib6_null_entry;
2463 	else if (rt->fib6_flags & RTF_REJECT) {
2464 		ret = net->ipv6.ip6_null_entry;
2465 		goto out;
2466 	}
2467 
2468 	if (rt == net->ipv6.fib6_null_entry) {
2469 		fn = fib6_backtrack(fn, &fl6->saddr);
2470 		if (fn)
2471 			goto restart;
2472 	}
2473 
2474 out:
2475 	if (ret)
2476 		dst_hold(&ret->dst);
2477 	else
2478 		ret = ip6_create_rt_rcu(rt);
2479 
2480 	rcu_read_unlock();
2481 
2482 	trace_fib6_table_lookup(net, ret, table, fl6);
2483 	return ret;
2484 };
2485 
2486 static struct dst_entry *ip6_route_redirect(struct net *net,
2487 					    const struct flowi6 *fl6,
2488 					    const struct sk_buff *skb,
2489 					    const struct in6_addr *gateway)
2490 {
2491 	int flags = RT6_LOOKUP_F_HAS_SADDR;
2492 	struct ip6rd_flowi rdfl;
2493 
2494 	rdfl.fl6 = *fl6;
2495 	rdfl.gateway = *gateway;
2496 
2497 	return fib6_rule_lookup(net, &rdfl.fl6, skb,
2498 				flags, __ip6_route_redirect);
2499 }
2500 
2501 void ip6_redirect(struct sk_buff *skb, struct net *net, int oif, u32 mark,
2502 		  kuid_t uid)
2503 {
2504 	const struct ipv6hdr *iph = (struct ipv6hdr *) skb->data;
2505 	struct dst_entry *dst;
2506 	struct flowi6 fl6;
2507 
2508 	memset(&fl6, 0, sizeof(fl6));
2509 	fl6.flowi6_iif = LOOPBACK_IFINDEX;
2510 	fl6.flowi6_oif = oif;
2511 	fl6.flowi6_mark = mark;
2512 	fl6.daddr = iph->daddr;
2513 	fl6.saddr = iph->saddr;
2514 	fl6.flowlabel = ip6_flowinfo(iph);
2515 	fl6.flowi6_uid = uid;
2516 
2517 	dst = ip6_route_redirect(net, &fl6, skb, &ipv6_hdr(skb)->saddr);
2518 	rt6_do_redirect(dst, NULL, skb);
2519 	dst_release(dst);
2520 }
2521 EXPORT_SYMBOL_GPL(ip6_redirect);
2522 
2523 void ip6_redirect_no_header(struct sk_buff *skb, struct net *net, int oif,
2524 			    u32 mark)
2525 {
2526 	const struct ipv6hdr *iph = ipv6_hdr(skb);
2527 	const struct rd_msg *msg = (struct rd_msg *)icmp6_hdr(skb);
2528 	struct dst_entry *dst;
2529 	struct flowi6 fl6;
2530 
2531 	memset(&fl6, 0, sizeof(fl6));
2532 	fl6.flowi6_iif = LOOPBACK_IFINDEX;
2533 	fl6.flowi6_oif = oif;
2534 	fl6.flowi6_mark = mark;
2535 	fl6.daddr = msg->dest;
2536 	fl6.saddr = iph->daddr;
2537 	fl6.flowi6_uid = sock_net_uid(net, NULL);
2538 
2539 	dst = ip6_route_redirect(net, &fl6, skb, &iph->saddr);
2540 	rt6_do_redirect(dst, NULL, skb);
2541 	dst_release(dst);
2542 }
2543 
2544 void ip6_sk_redirect(struct sk_buff *skb, struct sock *sk)
2545 {
2546 	ip6_redirect(skb, sock_net(sk), sk->sk_bound_dev_if, sk->sk_mark,
2547 		     sk->sk_uid);
2548 }
2549 EXPORT_SYMBOL_GPL(ip6_sk_redirect);
2550 
2551 static unsigned int ip6_default_advmss(const struct dst_entry *dst)
2552 {
2553 	struct net_device *dev = dst->dev;
2554 	unsigned int mtu = dst_mtu(dst);
2555 	struct net *net = dev_net(dev);
2556 
2557 	mtu -= sizeof(struct ipv6hdr) + sizeof(struct tcphdr);
2558 
2559 	if (mtu < net->ipv6.sysctl.ip6_rt_min_advmss)
2560 		mtu = net->ipv6.sysctl.ip6_rt_min_advmss;
2561 
2562 	/*
2563 	 * Maximal non-jumbo IPv6 payload is IPV6_MAXPLEN and
2564 	 * corresponding MSS is IPV6_MAXPLEN - tcp_header_size.
2565 	 * IPV6_MAXPLEN is also valid and means: "any MSS,
2566 	 * rely only on pmtu discovery"
2567 	 */
2568 	if (mtu > IPV6_MAXPLEN - sizeof(struct tcphdr))
2569 		mtu = IPV6_MAXPLEN;
2570 	return mtu;
2571 }
2572 
2573 static unsigned int ip6_mtu(const struct dst_entry *dst)
2574 {
2575 	struct inet6_dev *idev;
2576 	unsigned int mtu;
2577 
2578 	mtu = dst_metric_raw(dst, RTAX_MTU);
2579 	if (mtu)
2580 		goto out;
2581 
2582 	mtu = IPV6_MIN_MTU;
2583 
2584 	rcu_read_lock();
2585 	idev = __in6_dev_get(dst->dev);
2586 	if (idev)
2587 		mtu = idev->cnf.mtu6;
2588 	rcu_read_unlock();
2589 
2590 out:
2591 	mtu = min_t(unsigned int, mtu, IP6_MAX_MTU);
2592 
2593 	return mtu - lwtunnel_headroom(dst->lwtstate, mtu);
2594 }
2595 
2596 struct dst_entry *icmp6_dst_alloc(struct net_device *dev,
2597 				  struct flowi6 *fl6)
2598 {
2599 	struct dst_entry *dst;
2600 	struct rt6_info *rt;
2601 	struct inet6_dev *idev = in6_dev_get(dev);
2602 	struct net *net = dev_net(dev);
2603 
2604 	if (unlikely(!idev))
2605 		return ERR_PTR(-ENODEV);
2606 
2607 	rt = ip6_dst_alloc(net, dev, 0);
2608 	if (unlikely(!rt)) {
2609 		in6_dev_put(idev);
2610 		dst = ERR_PTR(-ENOMEM);
2611 		goto out;
2612 	}
2613 
2614 	rt->dst.flags |= DST_HOST;
2615 	rt->dst.input = ip6_input;
2616 	rt->dst.output  = ip6_output;
2617 	rt->rt6i_gateway  = fl6->daddr;
2618 	rt->rt6i_dst.addr = fl6->daddr;
2619 	rt->rt6i_dst.plen = 128;
2620 	rt->rt6i_idev     = idev;
2621 	dst_metric_set(&rt->dst, RTAX_HOPLIMIT, 0);
2622 
2623 	/* Add this dst into uncached_list so that rt6_disable_ip() can
2624 	 * do proper release of the net_device
2625 	 */
2626 	rt6_uncached_list_add(rt);
2627 	atomic_inc(&net->ipv6.rt6_stats->fib_rt_uncache);
2628 
2629 	dst = xfrm_lookup(net, &rt->dst, flowi6_to_flowi(fl6), NULL, 0);
2630 
2631 out:
2632 	return dst;
2633 }
2634 
2635 static int ip6_dst_gc(struct dst_ops *ops)
2636 {
2637 	struct net *net = container_of(ops, struct net, ipv6.ip6_dst_ops);
2638 	int rt_min_interval = net->ipv6.sysctl.ip6_rt_gc_min_interval;
2639 	int rt_max_size = net->ipv6.sysctl.ip6_rt_max_size;
2640 	int rt_elasticity = net->ipv6.sysctl.ip6_rt_gc_elasticity;
2641 	int rt_gc_timeout = net->ipv6.sysctl.ip6_rt_gc_timeout;
2642 	unsigned long rt_last_gc = net->ipv6.ip6_rt_last_gc;
2643 	int entries;
2644 
2645 	entries = dst_entries_get_fast(ops);
2646 	if (time_after(rt_last_gc + rt_min_interval, jiffies) &&
2647 	    entries <= rt_max_size)
2648 		goto out;
2649 
2650 	net->ipv6.ip6_rt_gc_expire++;
2651 	fib6_run_gc(net->ipv6.ip6_rt_gc_expire, net, true);
2652 	entries = dst_entries_get_slow(ops);
2653 	if (entries < ops->gc_thresh)
2654 		net->ipv6.ip6_rt_gc_expire = rt_gc_timeout>>1;
2655 out:
2656 	net->ipv6.ip6_rt_gc_expire -= net->ipv6.ip6_rt_gc_expire>>rt_elasticity;
2657 	return entries > rt_max_size;
2658 }
2659 
2660 static int ip6_convert_metrics(struct net *net, struct fib6_info *rt,
2661 			       struct fib6_config *cfg)
2662 {
2663 	struct dst_metrics *p;
2664 
2665 	if (!cfg->fc_mx)
2666 		return 0;
2667 
2668 	p = kzalloc(sizeof(*rt->fib6_metrics), GFP_KERNEL);
2669 	if (unlikely(!p))
2670 		return -ENOMEM;
2671 
2672 	refcount_set(&p->refcnt, 1);
2673 	rt->fib6_metrics = p;
2674 
2675 	return ip_metrics_convert(net, cfg->fc_mx, cfg->fc_mx_len, p->metrics);
2676 }
2677 
2678 static struct rt6_info *ip6_nh_lookup_table(struct net *net,
2679 					    struct fib6_config *cfg,
2680 					    const struct in6_addr *gw_addr,
2681 					    u32 tbid, int flags)
2682 {
2683 	struct flowi6 fl6 = {
2684 		.flowi6_oif = cfg->fc_ifindex,
2685 		.daddr = *gw_addr,
2686 		.saddr = cfg->fc_prefsrc,
2687 	};
2688 	struct fib6_table *table;
2689 	struct rt6_info *rt;
2690 
2691 	table = fib6_get_table(net, tbid);
2692 	if (!table)
2693 		return NULL;
2694 
2695 	if (!ipv6_addr_any(&cfg->fc_prefsrc))
2696 		flags |= RT6_LOOKUP_F_HAS_SADDR;
2697 
2698 	flags |= RT6_LOOKUP_F_IGNORE_LINKSTATE;
2699 	rt = ip6_pol_route(net, table, cfg->fc_ifindex, &fl6, NULL, flags);
2700 
2701 	/* if table lookup failed, fall back to full lookup */
2702 	if (rt == net->ipv6.ip6_null_entry) {
2703 		ip6_rt_put(rt);
2704 		rt = NULL;
2705 	}
2706 
2707 	return rt;
2708 }
2709 
2710 static int ip6_route_check_nh_onlink(struct net *net,
2711 				     struct fib6_config *cfg,
2712 				     const struct net_device *dev,
2713 				     struct netlink_ext_ack *extack)
2714 {
2715 	u32 tbid = l3mdev_fib_table(dev) ? : RT_TABLE_MAIN;
2716 	const struct in6_addr *gw_addr = &cfg->fc_gateway;
2717 	u32 flags = RTF_LOCAL | RTF_ANYCAST | RTF_REJECT;
2718 	struct rt6_info *grt;
2719 	int err;
2720 
2721 	err = 0;
2722 	grt = ip6_nh_lookup_table(net, cfg, gw_addr, tbid, 0);
2723 	if (grt) {
2724 		if (!grt->dst.error &&
2725 		    (grt->rt6i_flags & flags || dev != grt->dst.dev)) {
2726 			NL_SET_ERR_MSG(extack,
2727 				       "Nexthop has invalid gateway or device mismatch");
2728 			err = -EINVAL;
2729 		}
2730 
2731 		ip6_rt_put(grt);
2732 	}
2733 
2734 	return err;
2735 }
2736 
2737 static int ip6_route_check_nh(struct net *net,
2738 			      struct fib6_config *cfg,
2739 			      struct net_device **_dev,
2740 			      struct inet6_dev **idev)
2741 {
2742 	const struct in6_addr *gw_addr = &cfg->fc_gateway;
2743 	struct net_device *dev = _dev ? *_dev : NULL;
2744 	struct rt6_info *grt = NULL;
2745 	int err = -EHOSTUNREACH;
2746 
2747 	if (cfg->fc_table) {
2748 		int flags = RT6_LOOKUP_F_IFACE;
2749 
2750 		grt = ip6_nh_lookup_table(net, cfg, gw_addr,
2751 					  cfg->fc_table, flags);
2752 		if (grt) {
2753 			if (grt->rt6i_flags & RTF_GATEWAY ||
2754 			    (dev && dev != grt->dst.dev)) {
2755 				ip6_rt_put(grt);
2756 				grt = NULL;
2757 			}
2758 		}
2759 	}
2760 
2761 	if (!grt)
2762 		grt = rt6_lookup(net, gw_addr, NULL, cfg->fc_ifindex, NULL, 1);
2763 
2764 	if (!grt)
2765 		goto out;
2766 
2767 	if (dev) {
2768 		if (dev != grt->dst.dev) {
2769 			ip6_rt_put(grt);
2770 			goto out;
2771 		}
2772 	} else {
2773 		*_dev = dev = grt->dst.dev;
2774 		*idev = grt->rt6i_idev;
2775 		dev_hold(dev);
2776 		in6_dev_hold(grt->rt6i_idev);
2777 	}
2778 
2779 	if (!(grt->rt6i_flags & RTF_GATEWAY))
2780 		err = 0;
2781 
2782 	ip6_rt_put(grt);
2783 
2784 out:
2785 	return err;
2786 }
2787 
2788 static int ip6_validate_gw(struct net *net, struct fib6_config *cfg,
2789 			   struct net_device **_dev, struct inet6_dev **idev,
2790 			   struct netlink_ext_ack *extack)
2791 {
2792 	const struct in6_addr *gw_addr = &cfg->fc_gateway;
2793 	int gwa_type = ipv6_addr_type(gw_addr);
2794 	bool skip_dev = gwa_type & IPV6_ADDR_LINKLOCAL ? false : true;
2795 	const struct net_device *dev = *_dev;
2796 	bool need_addr_check = !dev;
2797 	int err = -EINVAL;
2798 
2799 	/* if gw_addr is local we will fail to detect this in case
2800 	 * address is still TENTATIVE (DAD in progress). rt6_lookup()
2801 	 * will return already-added prefix route via interface that
2802 	 * prefix route was assigned to, which might be non-loopback.
2803 	 */
2804 	if (dev &&
2805 	    ipv6_chk_addr_and_flags(net, gw_addr, dev, skip_dev, 0, 0)) {
2806 		NL_SET_ERR_MSG(extack, "Gateway can not be a local address");
2807 		goto out;
2808 	}
2809 
2810 	if (gwa_type != (IPV6_ADDR_LINKLOCAL | IPV6_ADDR_UNICAST)) {
2811 		/* IPv6 strictly inhibits using not link-local
2812 		 * addresses as nexthop address.
2813 		 * Otherwise, router will not able to send redirects.
2814 		 * It is very good, but in some (rare!) circumstances
2815 		 * (SIT, PtP, NBMA NOARP links) it is handy to allow
2816 		 * some exceptions. --ANK
2817 		 * We allow IPv4-mapped nexthops to support RFC4798-type
2818 		 * addressing
2819 		 */
2820 		if (!(gwa_type & (IPV6_ADDR_UNICAST | IPV6_ADDR_MAPPED))) {
2821 			NL_SET_ERR_MSG(extack, "Invalid gateway address");
2822 			goto out;
2823 		}
2824 
2825 		if (cfg->fc_flags & RTNH_F_ONLINK)
2826 			err = ip6_route_check_nh_onlink(net, cfg, dev, extack);
2827 		else
2828 			err = ip6_route_check_nh(net, cfg, _dev, idev);
2829 
2830 		if (err)
2831 			goto out;
2832 	}
2833 
2834 	/* reload in case device was changed */
2835 	dev = *_dev;
2836 
2837 	err = -EINVAL;
2838 	if (!dev) {
2839 		NL_SET_ERR_MSG(extack, "Egress device not specified");
2840 		goto out;
2841 	} else if (dev->flags & IFF_LOOPBACK) {
2842 		NL_SET_ERR_MSG(extack,
2843 			       "Egress device can not be loopback device for this route");
2844 		goto out;
2845 	}
2846 
2847 	/* if we did not check gw_addr above, do so now that the
2848 	 * egress device has been resolved.
2849 	 */
2850 	if (need_addr_check &&
2851 	    ipv6_chk_addr_and_flags(net, gw_addr, dev, skip_dev, 0, 0)) {
2852 		NL_SET_ERR_MSG(extack, "Gateway can not be a local address");
2853 		goto out;
2854 	}
2855 
2856 	err = 0;
2857 out:
2858 	return err;
2859 }
2860 
2861 static struct fib6_info *ip6_route_info_create(struct fib6_config *cfg,
2862 					      gfp_t gfp_flags,
2863 					      struct netlink_ext_ack *extack)
2864 {
2865 	struct net *net = cfg->fc_nlinfo.nl_net;
2866 	struct fib6_info *rt = NULL;
2867 	struct net_device *dev = NULL;
2868 	struct inet6_dev *idev = NULL;
2869 	struct fib6_table *table;
2870 	int addr_type;
2871 	int err = -EINVAL;
2872 
2873 	/* RTF_PCPU is an internal flag; can not be set by userspace */
2874 	if (cfg->fc_flags & RTF_PCPU) {
2875 		NL_SET_ERR_MSG(extack, "Userspace can not set RTF_PCPU");
2876 		goto out;
2877 	}
2878 
2879 	/* RTF_CACHE is an internal flag; can not be set by userspace */
2880 	if (cfg->fc_flags & RTF_CACHE) {
2881 		NL_SET_ERR_MSG(extack, "Userspace can not set RTF_CACHE");
2882 		goto out;
2883 	}
2884 
2885 	if (cfg->fc_type > RTN_MAX) {
2886 		NL_SET_ERR_MSG(extack, "Invalid route type");
2887 		goto out;
2888 	}
2889 
2890 	if (cfg->fc_dst_len > 128) {
2891 		NL_SET_ERR_MSG(extack, "Invalid prefix length");
2892 		goto out;
2893 	}
2894 	if (cfg->fc_src_len > 128) {
2895 		NL_SET_ERR_MSG(extack, "Invalid source address length");
2896 		goto out;
2897 	}
2898 #ifndef CONFIG_IPV6_SUBTREES
2899 	if (cfg->fc_src_len) {
2900 		NL_SET_ERR_MSG(extack,
2901 			       "Specifying source address requires IPV6_SUBTREES to be enabled");
2902 		goto out;
2903 	}
2904 #endif
2905 	if (cfg->fc_ifindex) {
2906 		err = -ENODEV;
2907 		dev = dev_get_by_index(net, cfg->fc_ifindex);
2908 		if (!dev)
2909 			goto out;
2910 		idev = in6_dev_get(dev);
2911 		if (!idev)
2912 			goto out;
2913 	}
2914 
2915 	if (cfg->fc_metric == 0)
2916 		cfg->fc_metric = IP6_RT_PRIO_USER;
2917 
2918 	if (cfg->fc_flags & RTNH_F_ONLINK) {
2919 		if (!dev) {
2920 			NL_SET_ERR_MSG(extack,
2921 				       "Nexthop device required for onlink");
2922 			err = -ENODEV;
2923 			goto out;
2924 		}
2925 
2926 		if (!(dev->flags & IFF_UP)) {
2927 			NL_SET_ERR_MSG(extack, "Nexthop device is not up");
2928 			err = -ENETDOWN;
2929 			goto out;
2930 		}
2931 	}
2932 
2933 	err = -ENOBUFS;
2934 	if (cfg->fc_nlinfo.nlh &&
2935 	    !(cfg->fc_nlinfo.nlh->nlmsg_flags & NLM_F_CREATE)) {
2936 		table = fib6_get_table(net, cfg->fc_table);
2937 		if (!table) {
2938 			pr_warn("NLM_F_CREATE should be specified when creating new route\n");
2939 			table = fib6_new_table(net, cfg->fc_table);
2940 		}
2941 	} else {
2942 		table = fib6_new_table(net, cfg->fc_table);
2943 	}
2944 
2945 	if (!table)
2946 		goto out;
2947 
2948 	err = -ENOMEM;
2949 	rt = fib6_info_alloc(gfp_flags);
2950 	if (!rt)
2951 		goto out;
2952 
2953 	if (cfg->fc_flags & RTF_ADDRCONF)
2954 		rt->dst_nocount = true;
2955 
2956 	err = ip6_convert_metrics(net, rt, cfg);
2957 	if (err < 0)
2958 		goto out;
2959 
2960 	if (cfg->fc_flags & RTF_EXPIRES)
2961 		fib6_set_expires(rt, jiffies +
2962 				clock_t_to_jiffies(cfg->fc_expires));
2963 	else
2964 		fib6_clean_expires(rt);
2965 
2966 	if (cfg->fc_protocol == RTPROT_UNSPEC)
2967 		cfg->fc_protocol = RTPROT_BOOT;
2968 	rt->fib6_protocol = cfg->fc_protocol;
2969 
2970 	addr_type = ipv6_addr_type(&cfg->fc_dst);
2971 
2972 	if (cfg->fc_encap) {
2973 		struct lwtunnel_state *lwtstate;
2974 
2975 		err = lwtunnel_build_state(cfg->fc_encap_type,
2976 					   cfg->fc_encap, AF_INET6, cfg,
2977 					   &lwtstate, extack);
2978 		if (err)
2979 			goto out;
2980 		rt->fib6_nh.nh_lwtstate = lwtstate_get(lwtstate);
2981 	}
2982 
2983 	ipv6_addr_prefix(&rt->fib6_dst.addr, &cfg->fc_dst, cfg->fc_dst_len);
2984 	rt->fib6_dst.plen = cfg->fc_dst_len;
2985 	if (rt->fib6_dst.plen == 128)
2986 		rt->dst_host = true;
2987 
2988 #ifdef CONFIG_IPV6_SUBTREES
2989 	ipv6_addr_prefix(&rt->fib6_src.addr, &cfg->fc_src, cfg->fc_src_len);
2990 	rt->fib6_src.plen = cfg->fc_src_len;
2991 #endif
2992 
2993 	rt->fib6_metric = cfg->fc_metric;
2994 	rt->fib6_nh.nh_weight = 1;
2995 
2996 	rt->fib6_type = cfg->fc_type;
2997 
2998 	/* We cannot add true routes via loopback here,
2999 	   they would result in kernel looping; promote them to reject routes
3000 	 */
3001 	if ((cfg->fc_flags & RTF_REJECT) ||
3002 	    (dev && (dev->flags & IFF_LOOPBACK) &&
3003 	     !(addr_type & IPV6_ADDR_LOOPBACK) &&
3004 	     !(cfg->fc_flags & RTF_LOCAL))) {
3005 		/* hold loopback dev/idev if we haven't done so. */
3006 		if (dev != net->loopback_dev) {
3007 			if (dev) {
3008 				dev_put(dev);
3009 				in6_dev_put(idev);
3010 			}
3011 			dev = net->loopback_dev;
3012 			dev_hold(dev);
3013 			idev = in6_dev_get(dev);
3014 			if (!idev) {
3015 				err = -ENODEV;
3016 				goto out;
3017 			}
3018 		}
3019 		rt->fib6_flags = RTF_REJECT|RTF_NONEXTHOP;
3020 		goto install_route;
3021 	}
3022 
3023 	if (cfg->fc_flags & RTF_GATEWAY) {
3024 		err = ip6_validate_gw(net, cfg, &dev, &idev, extack);
3025 		if (err)
3026 			goto out;
3027 
3028 		rt->fib6_nh.nh_gw = cfg->fc_gateway;
3029 	}
3030 
3031 	err = -ENODEV;
3032 	if (!dev)
3033 		goto out;
3034 
3035 	if (idev->cnf.disable_ipv6) {
3036 		NL_SET_ERR_MSG(extack, "IPv6 is disabled on nexthop device");
3037 		err = -EACCES;
3038 		goto out;
3039 	}
3040 
3041 	if (!(dev->flags & IFF_UP)) {
3042 		NL_SET_ERR_MSG(extack, "Nexthop device is not up");
3043 		err = -ENETDOWN;
3044 		goto out;
3045 	}
3046 
3047 	if (!ipv6_addr_any(&cfg->fc_prefsrc)) {
3048 		if (!ipv6_chk_addr(net, &cfg->fc_prefsrc, dev, 0)) {
3049 			NL_SET_ERR_MSG(extack, "Invalid source address");
3050 			err = -EINVAL;
3051 			goto out;
3052 		}
3053 		rt->fib6_prefsrc.addr = cfg->fc_prefsrc;
3054 		rt->fib6_prefsrc.plen = 128;
3055 	} else
3056 		rt->fib6_prefsrc.plen = 0;
3057 
3058 	rt->fib6_flags = cfg->fc_flags;
3059 
3060 install_route:
3061 	if (!(rt->fib6_flags & (RTF_LOCAL | RTF_ANYCAST)) &&
3062 	    !netif_carrier_ok(dev))
3063 		rt->fib6_nh.nh_flags |= RTNH_F_LINKDOWN;
3064 	rt->fib6_nh.nh_flags |= (cfg->fc_flags & RTNH_F_ONLINK);
3065 	rt->fib6_nh.nh_dev = dev;
3066 	rt->fib6_table = table;
3067 
3068 	cfg->fc_nlinfo.nl_net = dev_net(dev);
3069 
3070 	if (idev)
3071 		in6_dev_put(idev);
3072 
3073 	return rt;
3074 out:
3075 	if (dev)
3076 		dev_put(dev);
3077 	if (idev)
3078 		in6_dev_put(idev);
3079 
3080 	fib6_info_release(rt);
3081 	return ERR_PTR(err);
3082 }
3083 
3084 int ip6_route_add(struct fib6_config *cfg, gfp_t gfp_flags,
3085 		  struct netlink_ext_ack *extack)
3086 {
3087 	struct fib6_info *rt;
3088 	int err;
3089 
3090 	rt = ip6_route_info_create(cfg, gfp_flags, extack);
3091 	if (IS_ERR(rt))
3092 		return PTR_ERR(rt);
3093 
3094 	err = __ip6_ins_rt(rt, &cfg->fc_nlinfo, extack);
3095 	fib6_info_release(rt);
3096 
3097 	return err;
3098 }
3099 
3100 static int __ip6_del_rt(struct fib6_info *rt, struct nl_info *info)
3101 {
3102 	struct net *net = info->nl_net;
3103 	struct fib6_table *table;
3104 	int err;
3105 
3106 	if (rt == net->ipv6.fib6_null_entry) {
3107 		err = -ENOENT;
3108 		goto out;
3109 	}
3110 
3111 	table = rt->fib6_table;
3112 	spin_lock_bh(&table->tb6_lock);
3113 	err = fib6_del(rt, info);
3114 	spin_unlock_bh(&table->tb6_lock);
3115 
3116 out:
3117 	fib6_info_release(rt);
3118 	return err;
3119 }
3120 
3121 int ip6_del_rt(struct net *net, struct fib6_info *rt)
3122 {
3123 	struct nl_info info = { .nl_net = net };
3124 
3125 	return __ip6_del_rt(rt, &info);
3126 }
3127 
3128 static int __ip6_del_rt_siblings(struct fib6_info *rt, struct fib6_config *cfg)
3129 {
3130 	struct nl_info *info = &cfg->fc_nlinfo;
3131 	struct net *net = info->nl_net;
3132 	struct sk_buff *skb = NULL;
3133 	struct fib6_table *table;
3134 	int err = -ENOENT;
3135 
3136 	if (rt == net->ipv6.fib6_null_entry)
3137 		goto out_put;
3138 	table = rt->fib6_table;
3139 	spin_lock_bh(&table->tb6_lock);
3140 
3141 	if (rt->fib6_nsiblings && cfg->fc_delete_all_nh) {
3142 		struct fib6_info *sibling, *next_sibling;
3143 
3144 		/* prefer to send a single notification with all hops */
3145 		skb = nlmsg_new(rt6_nlmsg_size(rt), gfp_any());
3146 		if (skb) {
3147 			u32 seq = info->nlh ? info->nlh->nlmsg_seq : 0;
3148 
3149 			if (rt6_fill_node(net, skb, rt, NULL,
3150 					  NULL, NULL, 0, RTM_DELROUTE,
3151 					  info->portid, seq, 0) < 0) {
3152 				kfree_skb(skb);
3153 				skb = NULL;
3154 			} else
3155 				info->skip_notify = 1;
3156 		}
3157 
3158 		list_for_each_entry_safe(sibling, next_sibling,
3159 					 &rt->fib6_siblings,
3160 					 fib6_siblings) {
3161 			err = fib6_del(sibling, info);
3162 			if (err)
3163 				goto out_unlock;
3164 		}
3165 	}
3166 
3167 	err = fib6_del(rt, info);
3168 out_unlock:
3169 	spin_unlock_bh(&table->tb6_lock);
3170 out_put:
3171 	fib6_info_release(rt);
3172 
3173 	if (skb) {
3174 		rtnl_notify(skb, net, info->portid, RTNLGRP_IPV6_ROUTE,
3175 			    info->nlh, gfp_any());
3176 	}
3177 	return err;
3178 }
3179 
3180 static int ip6_del_cached_rt(struct rt6_info *rt, struct fib6_config *cfg)
3181 {
3182 	int rc = -ESRCH;
3183 
3184 	if (cfg->fc_ifindex && rt->dst.dev->ifindex != cfg->fc_ifindex)
3185 		goto out;
3186 
3187 	if (cfg->fc_flags & RTF_GATEWAY &&
3188 	    !ipv6_addr_equal(&cfg->fc_gateway, &rt->rt6i_gateway))
3189 		goto out;
3190 	if (dst_hold_safe(&rt->dst))
3191 		rc = rt6_remove_exception_rt(rt);
3192 out:
3193 	return rc;
3194 }
3195 
3196 static int ip6_route_del(struct fib6_config *cfg,
3197 			 struct netlink_ext_ack *extack)
3198 {
3199 	struct rt6_info *rt_cache;
3200 	struct fib6_table *table;
3201 	struct fib6_info *rt;
3202 	struct fib6_node *fn;
3203 	int err = -ESRCH;
3204 
3205 	table = fib6_get_table(cfg->fc_nlinfo.nl_net, cfg->fc_table);
3206 	if (!table) {
3207 		NL_SET_ERR_MSG(extack, "FIB table does not exist");
3208 		return err;
3209 	}
3210 
3211 	rcu_read_lock();
3212 
3213 	fn = fib6_locate(&table->tb6_root,
3214 			 &cfg->fc_dst, cfg->fc_dst_len,
3215 			 &cfg->fc_src, cfg->fc_src_len,
3216 			 !(cfg->fc_flags & RTF_CACHE));
3217 
3218 	if (fn) {
3219 		for_each_fib6_node_rt_rcu(fn) {
3220 			if (cfg->fc_flags & RTF_CACHE) {
3221 				int rc;
3222 
3223 				rt_cache = rt6_find_cached_rt(rt, &cfg->fc_dst,
3224 							      &cfg->fc_src);
3225 				if (rt_cache) {
3226 					rc = ip6_del_cached_rt(rt_cache, cfg);
3227 					if (rc != -ESRCH)
3228 						return rc;
3229 				}
3230 				continue;
3231 			}
3232 			if (cfg->fc_ifindex &&
3233 			    (!rt->fib6_nh.nh_dev ||
3234 			     rt->fib6_nh.nh_dev->ifindex != cfg->fc_ifindex))
3235 				continue;
3236 			if (cfg->fc_flags & RTF_GATEWAY &&
3237 			    !ipv6_addr_equal(&cfg->fc_gateway, &rt->fib6_nh.nh_gw))
3238 				continue;
3239 			if (cfg->fc_metric && cfg->fc_metric != rt->fib6_metric)
3240 				continue;
3241 			if (cfg->fc_protocol && cfg->fc_protocol != rt->fib6_protocol)
3242 				continue;
3243 			fib6_info_hold(rt);
3244 			rcu_read_unlock();
3245 
3246 			/* if gateway was specified only delete the one hop */
3247 			if (cfg->fc_flags & RTF_GATEWAY)
3248 				return __ip6_del_rt(rt, &cfg->fc_nlinfo);
3249 
3250 			return __ip6_del_rt_siblings(rt, cfg);
3251 		}
3252 	}
3253 	rcu_read_unlock();
3254 
3255 	return err;
3256 }
3257 
3258 static void rt6_do_redirect(struct dst_entry *dst, struct sock *sk, struct sk_buff *skb)
3259 {
3260 	struct netevent_redirect netevent;
3261 	struct rt6_info *rt, *nrt = NULL;
3262 	struct ndisc_options ndopts;
3263 	struct inet6_dev *in6_dev;
3264 	struct neighbour *neigh;
3265 	struct fib6_info *from;
3266 	struct rd_msg *msg;
3267 	int optlen, on_link;
3268 	u8 *lladdr;
3269 
3270 	optlen = skb_tail_pointer(skb) - skb_transport_header(skb);
3271 	optlen -= sizeof(*msg);
3272 
3273 	if (optlen < 0) {
3274 		net_dbg_ratelimited("rt6_do_redirect: packet too short\n");
3275 		return;
3276 	}
3277 
3278 	msg = (struct rd_msg *)icmp6_hdr(skb);
3279 
3280 	if (ipv6_addr_is_multicast(&msg->dest)) {
3281 		net_dbg_ratelimited("rt6_do_redirect: destination address is multicast\n");
3282 		return;
3283 	}
3284 
3285 	on_link = 0;
3286 	if (ipv6_addr_equal(&msg->dest, &msg->target)) {
3287 		on_link = 1;
3288 	} else if (ipv6_addr_type(&msg->target) !=
3289 		   (IPV6_ADDR_UNICAST|IPV6_ADDR_LINKLOCAL)) {
3290 		net_dbg_ratelimited("rt6_do_redirect: target address is not link-local unicast\n");
3291 		return;
3292 	}
3293 
3294 	in6_dev = __in6_dev_get(skb->dev);
3295 	if (!in6_dev)
3296 		return;
3297 	if (in6_dev->cnf.forwarding || !in6_dev->cnf.accept_redirects)
3298 		return;
3299 
3300 	/* RFC2461 8.1:
3301 	 *	The IP source address of the Redirect MUST be the same as the current
3302 	 *	first-hop router for the specified ICMP Destination Address.
3303 	 */
3304 
3305 	if (!ndisc_parse_options(skb->dev, msg->opt, optlen, &ndopts)) {
3306 		net_dbg_ratelimited("rt6_redirect: invalid ND options\n");
3307 		return;
3308 	}
3309 
3310 	lladdr = NULL;
3311 	if (ndopts.nd_opts_tgt_lladdr) {
3312 		lladdr = ndisc_opt_addr_data(ndopts.nd_opts_tgt_lladdr,
3313 					     skb->dev);
3314 		if (!lladdr) {
3315 			net_dbg_ratelimited("rt6_redirect: invalid link-layer address length\n");
3316 			return;
3317 		}
3318 	}
3319 
3320 	rt = (struct rt6_info *) dst;
3321 	if (rt->rt6i_flags & RTF_REJECT) {
3322 		net_dbg_ratelimited("rt6_redirect: source isn't a valid nexthop for redirect target\n");
3323 		return;
3324 	}
3325 
3326 	/* Redirect received -> path was valid.
3327 	 * Look, redirects are sent only in response to data packets,
3328 	 * so that this nexthop apparently is reachable. --ANK
3329 	 */
3330 	dst_confirm_neigh(&rt->dst, &ipv6_hdr(skb)->saddr);
3331 
3332 	neigh = __neigh_lookup(&nd_tbl, &msg->target, skb->dev, 1);
3333 	if (!neigh)
3334 		return;
3335 
3336 	/*
3337 	 *	We have finally decided to accept it.
3338 	 */
3339 
3340 	ndisc_update(skb->dev, neigh, lladdr, NUD_STALE,
3341 		     NEIGH_UPDATE_F_WEAK_OVERRIDE|
3342 		     NEIGH_UPDATE_F_OVERRIDE|
3343 		     (on_link ? 0 : (NEIGH_UPDATE_F_OVERRIDE_ISROUTER|
3344 				     NEIGH_UPDATE_F_ISROUTER)),
3345 		     NDISC_REDIRECT, &ndopts);
3346 
3347 	rcu_read_lock();
3348 	from = rcu_dereference(rt->from);
3349 	fib6_info_hold(from);
3350 	rcu_read_unlock();
3351 
3352 	nrt = ip6_rt_cache_alloc(from, &msg->dest, NULL);
3353 	if (!nrt)
3354 		goto out;
3355 
3356 	nrt->rt6i_flags = RTF_GATEWAY|RTF_UP|RTF_DYNAMIC|RTF_CACHE;
3357 	if (on_link)
3358 		nrt->rt6i_flags &= ~RTF_GATEWAY;
3359 
3360 	nrt->rt6i_gateway = *(struct in6_addr *)neigh->primary_key;
3361 
3362 	/* No need to remove rt from the exception table if rt is
3363 	 * a cached route because rt6_insert_exception() will
3364 	 * takes care of it
3365 	 */
3366 	if (rt6_insert_exception(nrt, from)) {
3367 		dst_release_immediate(&nrt->dst);
3368 		goto out;
3369 	}
3370 
3371 	netevent.old = &rt->dst;
3372 	netevent.new = &nrt->dst;
3373 	netevent.daddr = &msg->dest;
3374 	netevent.neigh = neigh;
3375 	call_netevent_notifiers(NETEVENT_REDIRECT, &netevent);
3376 
3377 out:
3378 	fib6_info_release(from);
3379 	neigh_release(neigh);
3380 }
3381 
3382 #ifdef CONFIG_IPV6_ROUTE_INFO
3383 static struct fib6_info *rt6_get_route_info(struct net *net,
3384 					   const struct in6_addr *prefix, int prefixlen,
3385 					   const struct in6_addr *gwaddr,
3386 					   struct net_device *dev)
3387 {
3388 	u32 tb_id = l3mdev_fib_table(dev) ? : RT6_TABLE_INFO;
3389 	int ifindex = dev->ifindex;
3390 	struct fib6_node *fn;
3391 	struct fib6_info *rt = NULL;
3392 	struct fib6_table *table;
3393 
3394 	table = fib6_get_table(net, tb_id);
3395 	if (!table)
3396 		return NULL;
3397 
3398 	rcu_read_lock();
3399 	fn = fib6_locate(&table->tb6_root, prefix, prefixlen, NULL, 0, true);
3400 	if (!fn)
3401 		goto out;
3402 
3403 	for_each_fib6_node_rt_rcu(fn) {
3404 		if (rt->fib6_nh.nh_dev->ifindex != ifindex)
3405 			continue;
3406 		if ((rt->fib6_flags & (RTF_ROUTEINFO|RTF_GATEWAY)) != (RTF_ROUTEINFO|RTF_GATEWAY))
3407 			continue;
3408 		if (!ipv6_addr_equal(&rt->fib6_nh.nh_gw, gwaddr))
3409 			continue;
3410 		fib6_info_hold(rt);
3411 		break;
3412 	}
3413 out:
3414 	rcu_read_unlock();
3415 	return rt;
3416 }
3417 
3418 static struct fib6_info *rt6_add_route_info(struct net *net,
3419 					   const struct in6_addr *prefix, int prefixlen,
3420 					   const struct in6_addr *gwaddr,
3421 					   struct net_device *dev,
3422 					   unsigned int pref)
3423 {
3424 	struct fib6_config cfg = {
3425 		.fc_metric	= IP6_RT_PRIO_USER,
3426 		.fc_ifindex	= dev->ifindex,
3427 		.fc_dst_len	= prefixlen,
3428 		.fc_flags	= RTF_GATEWAY | RTF_ADDRCONF | RTF_ROUTEINFO |
3429 				  RTF_UP | RTF_PREF(pref),
3430 		.fc_protocol = RTPROT_RA,
3431 		.fc_type = RTN_UNICAST,
3432 		.fc_nlinfo.portid = 0,
3433 		.fc_nlinfo.nlh = NULL,
3434 		.fc_nlinfo.nl_net = net,
3435 	};
3436 
3437 	cfg.fc_table = l3mdev_fib_table(dev) ? : RT6_TABLE_INFO,
3438 	cfg.fc_dst = *prefix;
3439 	cfg.fc_gateway = *gwaddr;
3440 
3441 	/* We should treat it as a default route if prefix length is 0. */
3442 	if (!prefixlen)
3443 		cfg.fc_flags |= RTF_DEFAULT;
3444 
3445 	ip6_route_add(&cfg, GFP_ATOMIC, NULL);
3446 
3447 	return rt6_get_route_info(net, prefix, prefixlen, gwaddr, dev);
3448 }
3449 #endif
3450 
3451 struct fib6_info *rt6_get_dflt_router(struct net *net,
3452 				     const struct in6_addr *addr,
3453 				     struct net_device *dev)
3454 {
3455 	u32 tb_id = l3mdev_fib_table(dev) ? : RT6_TABLE_DFLT;
3456 	struct fib6_info *rt;
3457 	struct fib6_table *table;
3458 
3459 	table = fib6_get_table(net, tb_id);
3460 	if (!table)
3461 		return NULL;
3462 
3463 	rcu_read_lock();
3464 	for_each_fib6_node_rt_rcu(&table->tb6_root) {
3465 		if (dev == rt->fib6_nh.nh_dev &&
3466 		    ((rt->fib6_flags & (RTF_ADDRCONF | RTF_DEFAULT)) == (RTF_ADDRCONF | RTF_DEFAULT)) &&
3467 		    ipv6_addr_equal(&rt->fib6_nh.nh_gw, addr))
3468 			break;
3469 	}
3470 	if (rt)
3471 		fib6_info_hold(rt);
3472 	rcu_read_unlock();
3473 	return rt;
3474 }
3475 
3476 struct fib6_info *rt6_add_dflt_router(struct net *net,
3477 				     const struct in6_addr *gwaddr,
3478 				     struct net_device *dev,
3479 				     unsigned int pref)
3480 {
3481 	struct fib6_config cfg = {
3482 		.fc_table	= l3mdev_fib_table(dev) ? : RT6_TABLE_DFLT,
3483 		.fc_metric	= IP6_RT_PRIO_USER,
3484 		.fc_ifindex	= dev->ifindex,
3485 		.fc_flags	= RTF_GATEWAY | RTF_ADDRCONF | RTF_DEFAULT |
3486 				  RTF_UP | RTF_EXPIRES | RTF_PREF(pref),
3487 		.fc_protocol = RTPROT_RA,
3488 		.fc_type = RTN_UNICAST,
3489 		.fc_nlinfo.portid = 0,
3490 		.fc_nlinfo.nlh = NULL,
3491 		.fc_nlinfo.nl_net = net,
3492 	};
3493 
3494 	cfg.fc_gateway = *gwaddr;
3495 
3496 	if (!ip6_route_add(&cfg, GFP_ATOMIC, NULL)) {
3497 		struct fib6_table *table;
3498 
3499 		table = fib6_get_table(dev_net(dev), cfg.fc_table);
3500 		if (table)
3501 			table->flags |= RT6_TABLE_HAS_DFLT_ROUTER;
3502 	}
3503 
3504 	return rt6_get_dflt_router(net, gwaddr, dev);
3505 }
3506 
3507 static void __rt6_purge_dflt_routers(struct net *net,
3508 				     struct fib6_table *table)
3509 {
3510 	struct fib6_info *rt;
3511 
3512 restart:
3513 	rcu_read_lock();
3514 	for_each_fib6_node_rt_rcu(&table->tb6_root) {
3515 		struct net_device *dev = fib6_info_nh_dev(rt);
3516 		struct inet6_dev *idev = dev ? __in6_dev_get(dev) : NULL;
3517 
3518 		if (rt->fib6_flags & (RTF_DEFAULT | RTF_ADDRCONF) &&
3519 		    (!idev || idev->cnf.accept_ra != 2)) {
3520 			fib6_info_hold(rt);
3521 			rcu_read_unlock();
3522 			ip6_del_rt(net, rt);
3523 			goto restart;
3524 		}
3525 	}
3526 	rcu_read_unlock();
3527 
3528 	table->flags &= ~RT6_TABLE_HAS_DFLT_ROUTER;
3529 }
3530 
3531 void rt6_purge_dflt_routers(struct net *net)
3532 {
3533 	struct fib6_table *table;
3534 	struct hlist_head *head;
3535 	unsigned int h;
3536 
3537 	rcu_read_lock();
3538 
3539 	for (h = 0; h < FIB6_TABLE_HASHSZ; h++) {
3540 		head = &net->ipv6.fib_table_hash[h];
3541 		hlist_for_each_entry_rcu(table, head, tb6_hlist) {
3542 			if (table->flags & RT6_TABLE_HAS_DFLT_ROUTER)
3543 				__rt6_purge_dflt_routers(net, table);
3544 		}
3545 	}
3546 
3547 	rcu_read_unlock();
3548 }
3549 
3550 static void rtmsg_to_fib6_config(struct net *net,
3551 				 struct in6_rtmsg *rtmsg,
3552 				 struct fib6_config *cfg)
3553 {
3554 	memset(cfg, 0, sizeof(*cfg));
3555 
3556 	cfg->fc_table = l3mdev_fib_table_by_index(net, rtmsg->rtmsg_ifindex) ?
3557 			 : RT6_TABLE_MAIN;
3558 	cfg->fc_ifindex = rtmsg->rtmsg_ifindex;
3559 	cfg->fc_metric = rtmsg->rtmsg_metric;
3560 	cfg->fc_expires = rtmsg->rtmsg_info;
3561 	cfg->fc_dst_len = rtmsg->rtmsg_dst_len;
3562 	cfg->fc_src_len = rtmsg->rtmsg_src_len;
3563 	cfg->fc_flags = rtmsg->rtmsg_flags;
3564 	cfg->fc_type = rtmsg->rtmsg_type;
3565 
3566 	cfg->fc_nlinfo.nl_net = net;
3567 
3568 	cfg->fc_dst = rtmsg->rtmsg_dst;
3569 	cfg->fc_src = rtmsg->rtmsg_src;
3570 	cfg->fc_gateway = rtmsg->rtmsg_gateway;
3571 }
3572 
3573 int ipv6_route_ioctl(struct net *net, unsigned int cmd, void __user *arg)
3574 {
3575 	struct fib6_config cfg;
3576 	struct in6_rtmsg rtmsg;
3577 	int err;
3578 
3579 	switch (cmd) {
3580 	case SIOCADDRT:		/* Add a route */
3581 	case SIOCDELRT:		/* Delete a route */
3582 		if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
3583 			return -EPERM;
3584 		err = copy_from_user(&rtmsg, arg,
3585 				     sizeof(struct in6_rtmsg));
3586 		if (err)
3587 			return -EFAULT;
3588 
3589 		rtmsg_to_fib6_config(net, &rtmsg, &cfg);
3590 
3591 		rtnl_lock();
3592 		switch (cmd) {
3593 		case SIOCADDRT:
3594 			err = ip6_route_add(&cfg, GFP_KERNEL, NULL);
3595 			break;
3596 		case SIOCDELRT:
3597 			err = ip6_route_del(&cfg, NULL);
3598 			break;
3599 		default:
3600 			err = -EINVAL;
3601 		}
3602 		rtnl_unlock();
3603 
3604 		return err;
3605 	}
3606 
3607 	return -EINVAL;
3608 }
3609 
3610 /*
3611  *	Drop the packet on the floor
3612  */
3613 
3614 static int ip6_pkt_drop(struct sk_buff *skb, u8 code, int ipstats_mib_noroutes)
3615 {
3616 	int type;
3617 	struct dst_entry *dst = skb_dst(skb);
3618 	switch (ipstats_mib_noroutes) {
3619 	case IPSTATS_MIB_INNOROUTES:
3620 		type = ipv6_addr_type(&ipv6_hdr(skb)->daddr);
3621 		if (type == IPV6_ADDR_ANY) {
3622 			IP6_INC_STATS(dev_net(dst->dev),
3623 				      __in6_dev_get_safely(skb->dev),
3624 				      IPSTATS_MIB_INADDRERRORS);
3625 			break;
3626 		}
3627 		/* FALLTHROUGH */
3628 	case IPSTATS_MIB_OUTNOROUTES:
3629 		IP6_INC_STATS(dev_net(dst->dev), ip6_dst_idev(dst),
3630 			      ipstats_mib_noroutes);
3631 		break;
3632 	}
3633 	icmpv6_send(skb, ICMPV6_DEST_UNREACH, code, 0);
3634 	kfree_skb(skb);
3635 	return 0;
3636 }
3637 
3638 static int ip6_pkt_discard(struct sk_buff *skb)
3639 {
3640 	return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_INNOROUTES);
3641 }
3642 
3643 static int ip6_pkt_discard_out(struct net *net, struct sock *sk, struct sk_buff *skb)
3644 {
3645 	skb->dev = skb_dst(skb)->dev;
3646 	return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_OUTNOROUTES);
3647 }
3648 
3649 static int ip6_pkt_prohibit(struct sk_buff *skb)
3650 {
3651 	return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_INNOROUTES);
3652 }
3653 
3654 static int ip6_pkt_prohibit_out(struct net *net, struct sock *sk, struct sk_buff *skb)
3655 {
3656 	skb->dev = skb_dst(skb)->dev;
3657 	return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_OUTNOROUTES);
3658 }
3659 
3660 /*
3661  *	Allocate a dst for local (unicast / anycast) address.
3662  */
3663 
3664 struct fib6_info *addrconf_f6i_alloc(struct net *net,
3665 				     struct inet6_dev *idev,
3666 				     const struct in6_addr *addr,
3667 				     bool anycast, gfp_t gfp_flags)
3668 {
3669 	u32 tb_id;
3670 	struct net_device *dev = idev->dev;
3671 	struct fib6_info *f6i;
3672 
3673 	f6i = fib6_info_alloc(gfp_flags);
3674 	if (!f6i)
3675 		return ERR_PTR(-ENOMEM);
3676 
3677 	f6i->dst_nocount = true;
3678 	f6i->dst_host = true;
3679 	f6i->fib6_protocol = RTPROT_KERNEL;
3680 	f6i->fib6_flags = RTF_UP | RTF_NONEXTHOP;
3681 	if (anycast) {
3682 		f6i->fib6_type = RTN_ANYCAST;
3683 		f6i->fib6_flags |= RTF_ANYCAST;
3684 	} else {
3685 		f6i->fib6_type = RTN_LOCAL;
3686 		f6i->fib6_flags |= RTF_LOCAL;
3687 	}
3688 
3689 	f6i->fib6_nh.nh_gw = *addr;
3690 	dev_hold(dev);
3691 	f6i->fib6_nh.nh_dev = dev;
3692 	f6i->fib6_dst.addr = *addr;
3693 	f6i->fib6_dst.plen = 128;
3694 	tb_id = l3mdev_fib_table(idev->dev) ? : RT6_TABLE_LOCAL;
3695 	f6i->fib6_table = fib6_get_table(net, tb_id);
3696 
3697 	return f6i;
3698 }
3699 
3700 /* remove deleted ip from prefsrc entries */
3701 struct arg_dev_net_ip {
3702 	struct net_device *dev;
3703 	struct net *net;
3704 	struct in6_addr *addr;
3705 };
3706 
3707 static int fib6_remove_prefsrc(struct fib6_info *rt, void *arg)
3708 {
3709 	struct net_device *dev = ((struct arg_dev_net_ip *)arg)->dev;
3710 	struct net *net = ((struct arg_dev_net_ip *)arg)->net;
3711 	struct in6_addr *addr = ((struct arg_dev_net_ip *)arg)->addr;
3712 
3713 	if (((void *)rt->fib6_nh.nh_dev == dev || !dev) &&
3714 	    rt != net->ipv6.fib6_null_entry &&
3715 	    ipv6_addr_equal(addr, &rt->fib6_prefsrc.addr)) {
3716 		spin_lock_bh(&rt6_exception_lock);
3717 		/* remove prefsrc entry */
3718 		rt->fib6_prefsrc.plen = 0;
3719 		/* need to update cache as well */
3720 		rt6_exceptions_remove_prefsrc(rt);
3721 		spin_unlock_bh(&rt6_exception_lock);
3722 	}
3723 	return 0;
3724 }
3725 
3726 void rt6_remove_prefsrc(struct inet6_ifaddr *ifp)
3727 {
3728 	struct net *net = dev_net(ifp->idev->dev);
3729 	struct arg_dev_net_ip adni = {
3730 		.dev = ifp->idev->dev,
3731 		.net = net,
3732 		.addr = &ifp->addr,
3733 	};
3734 	fib6_clean_all(net, fib6_remove_prefsrc, &adni);
3735 }
3736 
3737 #define RTF_RA_ROUTER		(RTF_ADDRCONF | RTF_DEFAULT | RTF_GATEWAY)
3738 
3739 /* Remove routers and update dst entries when gateway turn into host. */
3740 static int fib6_clean_tohost(struct fib6_info *rt, void *arg)
3741 {
3742 	struct in6_addr *gateway = (struct in6_addr *)arg;
3743 
3744 	if (((rt->fib6_flags & RTF_RA_ROUTER) == RTF_RA_ROUTER) &&
3745 	    ipv6_addr_equal(gateway, &rt->fib6_nh.nh_gw)) {
3746 		return -1;
3747 	}
3748 
3749 	/* Further clean up cached routes in exception table.
3750 	 * This is needed because cached route may have a different
3751 	 * gateway than its 'parent' in the case of an ip redirect.
3752 	 */
3753 	rt6_exceptions_clean_tohost(rt, gateway);
3754 
3755 	return 0;
3756 }
3757 
3758 void rt6_clean_tohost(struct net *net, struct in6_addr *gateway)
3759 {
3760 	fib6_clean_all(net, fib6_clean_tohost, gateway);
3761 }
3762 
3763 struct arg_netdev_event {
3764 	const struct net_device *dev;
3765 	union {
3766 		unsigned int nh_flags;
3767 		unsigned long event;
3768 	};
3769 };
3770 
3771 static struct fib6_info *rt6_multipath_first_sibling(const struct fib6_info *rt)
3772 {
3773 	struct fib6_info *iter;
3774 	struct fib6_node *fn;
3775 
3776 	fn = rcu_dereference_protected(rt->fib6_node,
3777 			lockdep_is_held(&rt->fib6_table->tb6_lock));
3778 	iter = rcu_dereference_protected(fn->leaf,
3779 			lockdep_is_held(&rt->fib6_table->tb6_lock));
3780 	while (iter) {
3781 		if (iter->fib6_metric == rt->fib6_metric &&
3782 		    rt6_qualify_for_ecmp(iter))
3783 			return iter;
3784 		iter = rcu_dereference_protected(iter->fib6_next,
3785 				lockdep_is_held(&rt->fib6_table->tb6_lock));
3786 	}
3787 
3788 	return NULL;
3789 }
3790 
3791 static bool rt6_is_dead(const struct fib6_info *rt)
3792 {
3793 	if (rt->fib6_nh.nh_flags & RTNH_F_DEAD ||
3794 	    (rt->fib6_nh.nh_flags & RTNH_F_LINKDOWN &&
3795 	     fib6_ignore_linkdown(rt)))
3796 		return true;
3797 
3798 	return false;
3799 }
3800 
3801 static int rt6_multipath_total_weight(const struct fib6_info *rt)
3802 {
3803 	struct fib6_info *iter;
3804 	int total = 0;
3805 
3806 	if (!rt6_is_dead(rt))
3807 		total += rt->fib6_nh.nh_weight;
3808 
3809 	list_for_each_entry(iter, &rt->fib6_siblings, fib6_siblings) {
3810 		if (!rt6_is_dead(iter))
3811 			total += iter->fib6_nh.nh_weight;
3812 	}
3813 
3814 	return total;
3815 }
3816 
3817 static void rt6_upper_bound_set(struct fib6_info *rt, int *weight, int total)
3818 {
3819 	int upper_bound = -1;
3820 
3821 	if (!rt6_is_dead(rt)) {
3822 		*weight += rt->fib6_nh.nh_weight;
3823 		upper_bound = DIV_ROUND_CLOSEST_ULL((u64) (*weight) << 31,
3824 						    total) - 1;
3825 	}
3826 	atomic_set(&rt->fib6_nh.nh_upper_bound, upper_bound);
3827 }
3828 
3829 static void rt6_multipath_upper_bound_set(struct fib6_info *rt, int total)
3830 {
3831 	struct fib6_info *iter;
3832 	int weight = 0;
3833 
3834 	rt6_upper_bound_set(rt, &weight, total);
3835 
3836 	list_for_each_entry(iter, &rt->fib6_siblings, fib6_siblings)
3837 		rt6_upper_bound_set(iter, &weight, total);
3838 }
3839 
3840 void rt6_multipath_rebalance(struct fib6_info *rt)
3841 {
3842 	struct fib6_info *first;
3843 	int total;
3844 
3845 	/* In case the entire multipath route was marked for flushing,
3846 	 * then there is no need to rebalance upon the removal of every
3847 	 * sibling route.
3848 	 */
3849 	if (!rt->fib6_nsiblings || rt->should_flush)
3850 		return;
3851 
3852 	/* During lookup routes are evaluated in order, so we need to
3853 	 * make sure upper bounds are assigned from the first sibling
3854 	 * onwards.
3855 	 */
3856 	first = rt6_multipath_first_sibling(rt);
3857 	if (WARN_ON_ONCE(!first))
3858 		return;
3859 
3860 	total = rt6_multipath_total_weight(first);
3861 	rt6_multipath_upper_bound_set(first, total);
3862 }
3863 
3864 static int fib6_ifup(struct fib6_info *rt, void *p_arg)
3865 {
3866 	const struct arg_netdev_event *arg = p_arg;
3867 	struct net *net = dev_net(arg->dev);
3868 
3869 	if (rt != net->ipv6.fib6_null_entry && rt->fib6_nh.nh_dev == arg->dev) {
3870 		rt->fib6_nh.nh_flags &= ~arg->nh_flags;
3871 		fib6_update_sernum_upto_root(net, rt);
3872 		rt6_multipath_rebalance(rt);
3873 	}
3874 
3875 	return 0;
3876 }
3877 
3878 void rt6_sync_up(struct net_device *dev, unsigned int nh_flags)
3879 {
3880 	struct arg_netdev_event arg = {
3881 		.dev = dev,
3882 		{
3883 			.nh_flags = nh_flags,
3884 		},
3885 	};
3886 
3887 	if (nh_flags & RTNH_F_DEAD && netif_carrier_ok(dev))
3888 		arg.nh_flags |= RTNH_F_LINKDOWN;
3889 
3890 	fib6_clean_all(dev_net(dev), fib6_ifup, &arg);
3891 }
3892 
3893 static bool rt6_multipath_uses_dev(const struct fib6_info *rt,
3894 				   const struct net_device *dev)
3895 {
3896 	struct fib6_info *iter;
3897 
3898 	if (rt->fib6_nh.nh_dev == dev)
3899 		return true;
3900 	list_for_each_entry(iter, &rt->fib6_siblings, fib6_siblings)
3901 		if (iter->fib6_nh.nh_dev == dev)
3902 			return true;
3903 
3904 	return false;
3905 }
3906 
3907 static void rt6_multipath_flush(struct fib6_info *rt)
3908 {
3909 	struct fib6_info *iter;
3910 
3911 	rt->should_flush = 1;
3912 	list_for_each_entry(iter, &rt->fib6_siblings, fib6_siblings)
3913 		iter->should_flush = 1;
3914 }
3915 
3916 static unsigned int rt6_multipath_dead_count(const struct fib6_info *rt,
3917 					     const struct net_device *down_dev)
3918 {
3919 	struct fib6_info *iter;
3920 	unsigned int dead = 0;
3921 
3922 	if (rt->fib6_nh.nh_dev == down_dev ||
3923 	    rt->fib6_nh.nh_flags & RTNH_F_DEAD)
3924 		dead++;
3925 	list_for_each_entry(iter, &rt->fib6_siblings, fib6_siblings)
3926 		if (iter->fib6_nh.nh_dev == down_dev ||
3927 		    iter->fib6_nh.nh_flags & RTNH_F_DEAD)
3928 			dead++;
3929 
3930 	return dead;
3931 }
3932 
3933 static void rt6_multipath_nh_flags_set(struct fib6_info *rt,
3934 				       const struct net_device *dev,
3935 				       unsigned int nh_flags)
3936 {
3937 	struct fib6_info *iter;
3938 
3939 	if (rt->fib6_nh.nh_dev == dev)
3940 		rt->fib6_nh.nh_flags |= nh_flags;
3941 	list_for_each_entry(iter, &rt->fib6_siblings, fib6_siblings)
3942 		if (iter->fib6_nh.nh_dev == dev)
3943 			iter->fib6_nh.nh_flags |= nh_flags;
3944 }
3945 
3946 /* called with write lock held for table with rt */
3947 static int fib6_ifdown(struct fib6_info *rt, void *p_arg)
3948 {
3949 	const struct arg_netdev_event *arg = p_arg;
3950 	const struct net_device *dev = arg->dev;
3951 	struct net *net = dev_net(dev);
3952 
3953 	if (rt == net->ipv6.fib6_null_entry)
3954 		return 0;
3955 
3956 	switch (arg->event) {
3957 	case NETDEV_UNREGISTER:
3958 		return rt->fib6_nh.nh_dev == dev ? -1 : 0;
3959 	case NETDEV_DOWN:
3960 		if (rt->should_flush)
3961 			return -1;
3962 		if (!rt->fib6_nsiblings)
3963 			return rt->fib6_nh.nh_dev == dev ? -1 : 0;
3964 		if (rt6_multipath_uses_dev(rt, dev)) {
3965 			unsigned int count;
3966 
3967 			count = rt6_multipath_dead_count(rt, dev);
3968 			if (rt->fib6_nsiblings + 1 == count) {
3969 				rt6_multipath_flush(rt);
3970 				return -1;
3971 			}
3972 			rt6_multipath_nh_flags_set(rt, dev, RTNH_F_DEAD |
3973 						   RTNH_F_LINKDOWN);
3974 			fib6_update_sernum(net, rt);
3975 			rt6_multipath_rebalance(rt);
3976 		}
3977 		return -2;
3978 	case NETDEV_CHANGE:
3979 		if (rt->fib6_nh.nh_dev != dev ||
3980 		    rt->fib6_flags & (RTF_LOCAL | RTF_ANYCAST))
3981 			break;
3982 		rt->fib6_nh.nh_flags |= RTNH_F_LINKDOWN;
3983 		rt6_multipath_rebalance(rt);
3984 		break;
3985 	}
3986 
3987 	return 0;
3988 }
3989 
3990 void rt6_sync_down_dev(struct net_device *dev, unsigned long event)
3991 {
3992 	struct arg_netdev_event arg = {
3993 		.dev = dev,
3994 		{
3995 			.event = event,
3996 		},
3997 	};
3998 
3999 	fib6_clean_all(dev_net(dev), fib6_ifdown, &arg);
4000 }
4001 
4002 void rt6_disable_ip(struct net_device *dev, unsigned long event)
4003 {
4004 	rt6_sync_down_dev(dev, event);
4005 	rt6_uncached_list_flush_dev(dev_net(dev), dev);
4006 	neigh_ifdown(&nd_tbl, dev);
4007 }
4008 
4009 struct rt6_mtu_change_arg {
4010 	struct net_device *dev;
4011 	unsigned int mtu;
4012 };
4013 
4014 static int rt6_mtu_change_route(struct fib6_info *rt, void *p_arg)
4015 {
4016 	struct rt6_mtu_change_arg *arg = (struct rt6_mtu_change_arg *) p_arg;
4017 	struct inet6_dev *idev;
4018 
4019 	/* In IPv6 pmtu discovery is not optional,
4020 	   so that RTAX_MTU lock cannot disable it.
4021 	   We still use this lock to block changes
4022 	   caused by addrconf/ndisc.
4023 	*/
4024 
4025 	idev = __in6_dev_get(arg->dev);
4026 	if (!idev)
4027 		return 0;
4028 
4029 	/* For administrative MTU increase, there is no way to discover
4030 	   IPv6 PMTU increase, so PMTU increase should be updated here.
4031 	   Since RFC 1981 doesn't include administrative MTU increase
4032 	   update PMTU increase is a MUST. (i.e. jumbo frame)
4033 	 */
4034 	if (rt->fib6_nh.nh_dev == arg->dev &&
4035 	    !fib6_metric_locked(rt, RTAX_MTU)) {
4036 		u32 mtu = rt->fib6_pmtu;
4037 
4038 		if (mtu >= arg->mtu ||
4039 		    (mtu < arg->mtu && mtu == idev->cnf.mtu6))
4040 			fib6_metric_set(rt, RTAX_MTU, arg->mtu);
4041 
4042 		spin_lock_bh(&rt6_exception_lock);
4043 		rt6_exceptions_update_pmtu(idev, rt, arg->mtu);
4044 		spin_unlock_bh(&rt6_exception_lock);
4045 	}
4046 	return 0;
4047 }
4048 
4049 void rt6_mtu_change(struct net_device *dev, unsigned int mtu)
4050 {
4051 	struct rt6_mtu_change_arg arg = {
4052 		.dev = dev,
4053 		.mtu = mtu,
4054 	};
4055 
4056 	fib6_clean_all(dev_net(dev), rt6_mtu_change_route, &arg);
4057 }
4058 
4059 static const struct nla_policy rtm_ipv6_policy[RTA_MAX+1] = {
4060 	[RTA_GATEWAY]           = { .len = sizeof(struct in6_addr) },
4061 	[RTA_PREFSRC]		= { .len = sizeof(struct in6_addr) },
4062 	[RTA_OIF]               = { .type = NLA_U32 },
4063 	[RTA_IIF]		= { .type = NLA_U32 },
4064 	[RTA_PRIORITY]          = { .type = NLA_U32 },
4065 	[RTA_METRICS]           = { .type = NLA_NESTED },
4066 	[RTA_MULTIPATH]		= { .len = sizeof(struct rtnexthop) },
4067 	[RTA_PREF]              = { .type = NLA_U8 },
4068 	[RTA_ENCAP_TYPE]	= { .type = NLA_U16 },
4069 	[RTA_ENCAP]		= { .type = NLA_NESTED },
4070 	[RTA_EXPIRES]		= { .type = NLA_U32 },
4071 	[RTA_UID]		= { .type = NLA_U32 },
4072 	[RTA_MARK]		= { .type = NLA_U32 },
4073 	[RTA_TABLE]		= { .type = NLA_U32 },
4074 };
4075 
4076 static int rtm_to_fib6_config(struct sk_buff *skb, struct nlmsghdr *nlh,
4077 			      struct fib6_config *cfg,
4078 			      struct netlink_ext_ack *extack)
4079 {
4080 	struct rtmsg *rtm;
4081 	struct nlattr *tb[RTA_MAX+1];
4082 	unsigned int pref;
4083 	int err;
4084 
4085 	err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy,
4086 			  NULL);
4087 	if (err < 0)
4088 		goto errout;
4089 
4090 	err = -EINVAL;
4091 	rtm = nlmsg_data(nlh);
4092 	memset(cfg, 0, sizeof(*cfg));
4093 
4094 	cfg->fc_table = rtm->rtm_table;
4095 	cfg->fc_dst_len = rtm->rtm_dst_len;
4096 	cfg->fc_src_len = rtm->rtm_src_len;
4097 	cfg->fc_flags = RTF_UP;
4098 	cfg->fc_protocol = rtm->rtm_protocol;
4099 	cfg->fc_type = rtm->rtm_type;
4100 
4101 	if (rtm->rtm_type == RTN_UNREACHABLE ||
4102 	    rtm->rtm_type == RTN_BLACKHOLE ||
4103 	    rtm->rtm_type == RTN_PROHIBIT ||
4104 	    rtm->rtm_type == RTN_THROW)
4105 		cfg->fc_flags |= RTF_REJECT;
4106 
4107 	if (rtm->rtm_type == RTN_LOCAL)
4108 		cfg->fc_flags |= RTF_LOCAL;
4109 
4110 	if (rtm->rtm_flags & RTM_F_CLONED)
4111 		cfg->fc_flags |= RTF_CACHE;
4112 
4113 	cfg->fc_flags |= (rtm->rtm_flags & RTNH_F_ONLINK);
4114 
4115 	cfg->fc_nlinfo.portid = NETLINK_CB(skb).portid;
4116 	cfg->fc_nlinfo.nlh = nlh;
4117 	cfg->fc_nlinfo.nl_net = sock_net(skb->sk);
4118 
4119 	if (tb[RTA_GATEWAY]) {
4120 		cfg->fc_gateway = nla_get_in6_addr(tb[RTA_GATEWAY]);
4121 		cfg->fc_flags |= RTF_GATEWAY;
4122 	}
4123 
4124 	if (tb[RTA_DST]) {
4125 		int plen = (rtm->rtm_dst_len + 7) >> 3;
4126 
4127 		if (nla_len(tb[RTA_DST]) < plen)
4128 			goto errout;
4129 
4130 		nla_memcpy(&cfg->fc_dst, tb[RTA_DST], plen);
4131 	}
4132 
4133 	if (tb[RTA_SRC]) {
4134 		int plen = (rtm->rtm_src_len + 7) >> 3;
4135 
4136 		if (nla_len(tb[RTA_SRC]) < plen)
4137 			goto errout;
4138 
4139 		nla_memcpy(&cfg->fc_src, tb[RTA_SRC], plen);
4140 	}
4141 
4142 	if (tb[RTA_PREFSRC])
4143 		cfg->fc_prefsrc = nla_get_in6_addr(tb[RTA_PREFSRC]);
4144 
4145 	if (tb[RTA_OIF])
4146 		cfg->fc_ifindex = nla_get_u32(tb[RTA_OIF]);
4147 
4148 	if (tb[RTA_PRIORITY])
4149 		cfg->fc_metric = nla_get_u32(tb[RTA_PRIORITY]);
4150 
4151 	if (tb[RTA_METRICS]) {
4152 		cfg->fc_mx = nla_data(tb[RTA_METRICS]);
4153 		cfg->fc_mx_len = nla_len(tb[RTA_METRICS]);
4154 	}
4155 
4156 	if (tb[RTA_TABLE])
4157 		cfg->fc_table = nla_get_u32(tb[RTA_TABLE]);
4158 
4159 	if (tb[RTA_MULTIPATH]) {
4160 		cfg->fc_mp = nla_data(tb[RTA_MULTIPATH]);
4161 		cfg->fc_mp_len = nla_len(tb[RTA_MULTIPATH]);
4162 
4163 		err = lwtunnel_valid_encap_type_attr(cfg->fc_mp,
4164 						     cfg->fc_mp_len, extack);
4165 		if (err < 0)
4166 			goto errout;
4167 	}
4168 
4169 	if (tb[RTA_PREF]) {
4170 		pref = nla_get_u8(tb[RTA_PREF]);
4171 		if (pref != ICMPV6_ROUTER_PREF_LOW &&
4172 		    pref != ICMPV6_ROUTER_PREF_HIGH)
4173 			pref = ICMPV6_ROUTER_PREF_MEDIUM;
4174 		cfg->fc_flags |= RTF_PREF(pref);
4175 	}
4176 
4177 	if (tb[RTA_ENCAP])
4178 		cfg->fc_encap = tb[RTA_ENCAP];
4179 
4180 	if (tb[RTA_ENCAP_TYPE]) {
4181 		cfg->fc_encap_type = nla_get_u16(tb[RTA_ENCAP_TYPE]);
4182 
4183 		err = lwtunnel_valid_encap_type(cfg->fc_encap_type, extack);
4184 		if (err < 0)
4185 			goto errout;
4186 	}
4187 
4188 	if (tb[RTA_EXPIRES]) {
4189 		unsigned long timeout = addrconf_timeout_fixup(nla_get_u32(tb[RTA_EXPIRES]), HZ);
4190 
4191 		if (addrconf_finite_timeout(timeout)) {
4192 			cfg->fc_expires = jiffies_to_clock_t(timeout * HZ);
4193 			cfg->fc_flags |= RTF_EXPIRES;
4194 		}
4195 	}
4196 
4197 	err = 0;
4198 errout:
4199 	return err;
4200 }
4201 
4202 struct rt6_nh {
4203 	struct fib6_info *fib6_info;
4204 	struct fib6_config r_cfg;
4205 	struct list_head next;
4206 };
4207 
4208 static void ip6_print_replace_route_err(struct list_head *rt6_nh_list)
4209 {
4210 	struct rt6_nh *nh;
4211 
4212 	list_for_each_entry(nh, rt6_nh_list, next) {
4213 		pr_warn("IPV6: multipath route replace failed (check consistency of installed routes): %pI6c nexthop %pI6c ifi %d\n",
4214 		        &nh->r_cfg.fc_dst, &nh->r_cfg.fc_gateway,
4215 		        nh->r_cfg.fc_ifindex);
4216 	}
4217 }
4218 
4219 static int ip6_route_info_append(struct net *net,
4220 				 struct list_head *rt6_nh_list,
4221 				 struct fib6_info *rt,
4222 				 struct fib6_config *r_cfg)
4223 {
4224 	struct rt6_nh *nh;
4225 	int err = -EEXIST;
4226 
4227 	list_for_each_entry(nh, rt6_nh_list, next) {
4228 		/* check if fib6_info already exists */
4229 		if (rt6_duplicate_nexthop(nh->fib6_info, rt))
4230 			return err;
4231 	}
4232 
4233 	nh = kzalloc(sizeof(*nh), GFP_KERNEL);
4234 	if (!nh)
4235 		return -ENOMEM;
4236 	nh->fib6_info = rt;
4237 	err = ip6_convert_metrics(net, rt, r_cfg);
4238 	if (err) {
4239 		kfree(nh);
4240 		return err;
4241 	}
4242 	memcpy(&nh->r_cfg, r_cfg, sizeof(*r_cfg));
4243 	list_add_tail(&nh->next, rt6_nh_list);
4244 
4245 	return 0;
4246 }
4247 
4248 static void ip6_route_mpath_notify(struct fib6_info *rt,
4249 				   struct fib6_info *rt_last,
4250 				   struct nl_info *info,
4251 				   __u16 nlflags)
4252 {
4253 	/* if this is an APPEND route, then rt points to the first route
4254 	 * inserted and rt_last points to last route inserted. Userspace
4255 	 * wants a consistent dump of the route which starts at the first
4256 	 * nexthop. Since sibling routes are always added at the end of
4257 	 * the list, find the first sibling of the last route appended
4258 	 */
4259 	if ((nlflags & NLM_F_APPEND) && rt_last && rt_last->fib6_nsiblings) {
4260 		rt = list_first_entry(&rt_last->fib6_siblings,
4261 				      struct fib6_info,
4262 				      fib6_siblings);
4263 	}
4264 
4265 	if (rt)
4266 		inet6_rt_notify(RTM_NEWROUTE, rt, info, nlflags);
4267 }
4268 
4269 static int ip6_route_multipath_add(struct fib6_config *cfg,
4270 				   struct netlink_ext_ack *extack)
4271 {
4272 	struct fib6_info *rt_notif = NULL, *rt_last = NULL;
4273 	struct nl_info *info = &cfg->fc_nlinfo;
4274 	struct fib6_config r_cfg;
4275 	struct rtnexthop *rtnh;
4276 	struct fib6_info *rt;
4277 	struct rt6_nh *err_nh;
4278 	struct rt6_nh *nh, *nh_safe;
4279 	__u16 nlflags;
4280 	int remaining;
4281 	int attrlen;
4282 	int err = 1;
4283 	int nhn = 0;
4284 	int replace = (cfg->fc_nlinfo.nlh &&
4285 		       (cfg->fc_nlinfo.nlh->nlmsg_flags & NLM_F_REPLACE));
4286 	LIST_HEAD(rt6_nh_list);
4287 
4288 	nlflags = replace ? NLM_F_REPLACE : NLM_F_CREATE;
4289 	if (info->nlh && info->nlh->nlmsg_flags & NLM_F_APPEND)
4290 		nlflags |= NLM_F_APPEND;
4291 
4292 	remaining = cfg->fc_mp_len;
4293 	rtnh = (struct rtnexthop *)cfg->fc_mp;
4294 
4295 	/* Parse a Multipath Entry and build a list (rt6_nh_list) of
4296 	 * fib6_info structs per nexthop
4297 	 */
4298 	while (rtnh_ok(rtnh, remaining)) {
4299 		memcpy(&r_cfg, cfg, sizeof(*cfg));
4300 		if (rtnh->rtnh_ifindex)
4301 			r_cfg.fc_ifindex = rtnh->rtnh_ifindex;
4302 
4303 		attrlen = rtnh_attrlen(rtnh);
4304 		if (attrlen > 0) {
4305 			struct nlattr *nla, *attrs = rtnh_attrs(rtnh);
4306 
4307 			nla = nla_find(attrs, attrlen, RTA_GATEWAY);
4308 			if (nla) {
4309 				r_cfg.fc_gateway = nla_get_in6_addr(nla);
4310 				r_cfg.fc_flags |= RTF_GATEWAY;
4311 			}
4312 			r_cfg.fc_encap = nla_find(attrs, attrlen, RTA_ENCAP);
4313 			nla = nla_find(attrs, attrlen, RTA_ENCAP_TYPE);
4314 			if (nla)
4315 				r_cfg.fc_encap_type = nla_get_u16(nla);
4316 		}
4317 
4318 		r_cfg.fc_flags |= (rtnh->rtnh_flags & RTNH_F_ONLINK);
4319 		rt = ip6_route_info_create(&r_cfg, GFP_KERNEL, extack);
4320 		if (IS_ERR(rt)) {
4321 			err = PTR_ERR(rt);
4322 			rt = NULL;
4323 			goto cleanup;
4324 		}
4325 
4326 		rt->fib6_nh.nh_weight = rtnh->rtnh_hops + 1;
4327 
4328 		err = ip6_route_info_append(info->nl_net, &rt6_nh_list,
4329 					    rt, &r_cfg);
4330 		if (err) {
4331 			fib6_info_release(rt);
4332 			goto cleanup;
4333 		}
4334 
4335 		rtnh = rtnh_next(rtnh, &remaining);
4336 	}
4337 
4338 	/* for add and replace send one notification with all nexthops.
4339 	 * Skip the notification in fib6_add_rt2node and send one with
4340 	 * the full route when done
4341 	 */
4342 	info->skip_notify = 1;
4343 
4344 	err_nh = NULL;
4345 	list_for_each_entry(nh, &rt6_nh_list, next) {
4346 		rt_last = nh->fib6_info;
4347 		err = __ip6_ins_rt(nh->fib6_info, info, extack);
4348 		fib6_info_release(nh->fib6_info);
4349 
4350 		/* save reference to first route for notification */
4351 		if (!rt_notif && !err)
4352 			rt_notif = nh->fib6_info;
4353 
4354 		/* nh->fib6_info is used or freed at this point, reset to NULL*/
4355 		nh->fib6_info = NULL;
4356 		if (err) {
4357 			if (replace && nhn)
4358 				ip6_print_replace_route_err(&rt6_nh_list);
4359 			err_nh = nh;
4360 			goto add_errout;
4361 		}
4362 
4363 		/* Because each route is added like a single route we remove
4364 		 * these flags after the first nexthop: if there is a collision,
4365 		 * we have already failed to add the first nexthop:
4366 		 * fib6_add_rt2node() has rejected it; when replacing, old
4367 		 * nexthops have been replaced by first new, the rest should
4368 		 * be added to it.
4369 		 */
4370 		cfg->fc_nlinfo.nlh->nlmsg_flags &= ~(NLM_F_EXCL |
4371 						     NLM_F_REPLACE);
4372 		nhn++;
4373 	}
4374 
4375 	/* success ... tell user about new route */
4376 	ip6_route_mpath_notify(rt_notif, rt_last, info, nlflags);
4377 	goto cleanup;
4378 
4379 add_errout:
4380 	/* send notification for routes that were added so that
4381 	 * the delete notifications sent by ip6_route_del are
4382 	 * coherent
4383 	 */
4384 	if (rt_notif)
4385 		ip6_route_mpath_notify(rt_notif, rt_last, info, nlflags);
4386 
4387 	/* Delete routes that were already added */
4388 	list_for_each_entry(nh, &rt6_nh_list, next) {
4389 		if (err_nh == nh)
4390 			break;
4391 		ip6_route_del(&nh->r_cfg, extack);
4392 	}
4393 
4394 cleanup:
4395 	list_for_each_entry_safe(nh, nh_safe, &rt6_nh_list, next) {
4396 		if (nh->fib6_info)
4397 			fib6_info_release(nh->fib6_info);
4398 		list_del(&nh->next);
4399 		kfree(nh);
4400 	}
4401 
4402 	return err;
4403 }
4404 
4405 static int ip6_route_multipath_del(struct fib6_config *cfg,
4406 				   struct netlink_ext_ack *extack)
4407 {
4408 	struct fib6_config r_cfg;
4409 	struct rtnexthop *rtnh;
4410 	int remaining;
4411 	int attrlen;
4412 	int err = 1, last_err = 0;
4413 
4414 	remaining = cfg->fc_mp_len;
4415 	rtnh = (struct rtnexthop *)cfg->fc_mp;
4416 
4417 	/* Parse a Multipath Entry */
4418 	while (rtnh_ok(rtnh, remaining)) {
4419 		memcpy(&r_cfg, cfg, sizeof(*cfg));
4420 		if (rtnh->rtnh_ifindex)
4421 			r_cfg.fc_ifindex = rtnh->rtnh_ifindex;
4422 
4423 		attrlen = rtnh_attrlen(rtnh);
4424 		if (attrlen > 0) {
4425 			struct nlattr *nla, *attrs = rtnh_attrs(rtnh);
4426 
4427 			nla = nla_find(attrs, attrlen, RTA_GATEWAY);
4428 			if (nla) {
4429 				nla_memcpy(&r_cfg.fc_gateway, nla, 16);
4430 				r_cfg.fc_flags |= RTF_GATEWAY;
4431 			}
4432 		}
4433 		err = ip6_route_del(&r_cfg, extack);
4434 		if (err)
4435 			last_err = err;
4436 
4437 		rtnh = rtnh_next(rtnh, &remaining);
4438 	}
4439 
4440 	return last_err;
4441 }
4442 
4443 static int inet6_rtm_delroute(struct sk_buff *skb, struct nlmsghdr *nlh,
4444 			      struct netlink_ext_ack *extack)
4445 {
4446 	struct fib6_config cfg;
4447 	int err;
4448 
4449 	err = rtm_to_fib6_config(skb, nlh, &cfg, extack);
4450 	if (err < 0)
4451 		return err;
4452 
4453 	if (cfg.fc_mp)
4454 		return ip6_route_multipath_del(&cfg, extack);
4455 	else {
4456 		cfg.fc_delete_all_nh = 1;
4457 		return ip6_route_del(&cfg, extack);
4458 	}
4459 }
4460 
4461 static int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr *nlh,
4462 			      struct netlink_ext_ack *extack)
4463 {
4464 	struct fib6_config cfg;
4465 	int err;
4466 
4467 	err = rtm_to_fib6_config(skb, nlh, &cfg, extack);
4468 	if (err < 0)
4469 		return err;
4470 
4471 	if (cfg.fc_mp)
4472 		return ip6_route_multipath_add(&cfg, extack);
4473 	else
4474 		return ip6_route_add(&cfg, GFP_KERNEL, extack);
4475 }
4476 
4477 static size_t rt6_nlmsg_size(struct fib6_info *rt)
4478 {
4479 	int nexthop_len = 0;
4480 
4481 	if (rt->fib6_nsiblings) {
4482 		nexthop_len = nla_total_size(0)	 /* RTA_MULTIPATH */
4483 			    + NLA_ALIGN(sizeof(struct rtnexthop))
4484 			    + nla_total_size(16) /* RTA_GATEWAY */
4485 			    + lwtunnel_get_encap_size(rt->fib6_nh.nh_lwtstate);
4486 
4487 		nexthop_len *= rt->fib6_nsiblings;
4488 	}
4489 
4490 	return NLMSG_ALIGN(sizeof(struct rtmsg))
4491 	       + nla_total_size(16) /* RTA_SRC */
4492 	       + nla_total_size(16) /* RTA_DST */
4493 	       + nla_total_size(16) /* RTA_GATEWAY */
4494 	       + nla_total_size(16) /* RTA_PREFSRC */
4495 	       + nla_total_size(4) /* RTA_TABLE */
4496 	       + nla_total_size(4) /* RTA_IIF */
4497 	       + nla_total_size(4) /* RTA_OIF */
4498 	       + nla_total_size(4) /* RTA_PRIORITY */
4499 	       + RTAX_MAX * nla_total_size(4) /* RTA_METRICS */
4500 	       + nla_total_size(sizeof(struct rta_cacheinfo))
4501 	       + nla_total_size(TCP_CA_NAME_MAX) /* RTAX_CC_ALGO */
4502 	       + nla_total_size(1) /* RTA_PREF */
4503 	       + lwtunnel_get_encap_size(rt->fib6_nh.nh_lwtstate)
4504 	       + nexthop_len;
4505 }
4506 
4507 static int rt6_nexthop_info(struct sk_buff *skb, struct fib6_info *rt,
4508 			    unsigned int *flags, bool skip_oif)
4509 {
4510 	if (rt->fib6_nh.nh_flags & RTNH_F_DEAD)
4511 		*flags |= RTNH_F_DEAD;
4512 
4513 	if (rt->fib6_nh.nh_flags & RTNH_F_LINKDOWN) {
4514 		*flags |= RTNH_F_LINKDOWN;
4515 
4516 		rcu_read_lock();
4517 		if (fib6_ignore_linkdown(rt))
4518 			*flags |= RTNH_F_DEAD;
4519 		rcu_read_unlock();
4520 	}
4521 
4522 	if (rt->fib6_flags & RTF_GATEWAY) {
4523 		if (nla_put_in6_addr(skb, RTA_GATEWAY, &rt->fib6_nh.nh_gw) < 0)
4524 			goto nla_put_failure;
4525 	}
4526 
4527 	*flags |= (rt->fib6_nh.nh_flags & RTNH_F_ONLINK);
4528 	if (rt->fib6_nh.nh_flags & RTNH_F_OFFLOAD)
4529 		*flags |= RTNH_F_OFFLOAD;
4530 
4531 	/* not needed for multipath encoding b/c it has a rtnexthop struct */
4532 	if (!skip_oif && rt->fib6_nh.nh_dev &&
4533 	    nla_put_u32(skb, RTA_OIF, rt->fib6_nh.nh_dev->ifindex))
4534 		goto nla_put_failure;
4535 
4536 	if (rt->fib6_nh.nh_lwtstate &&
4537 	    lwtunnel_fill_encap(skb, rt->fib6_nh.nh_lwtstate) < 0)
4538 		goto nla_put_failure;
4539 
4540 	return 0;
4541 
4542 nla_put_failure:
4543 	return -EMSGSIZE;
4544 }
4545 
4546 /* add multipath next hop */
4547 static int rt6_add_nexthop(struct sk_buff *skb, struct fib6_info *rt)
4548 {
4549 	const struct net_device *dev = rt->fib6_nh.nh_dev;
4550 	struct rtnexthop *rtnh;
4551 	unsigned int flags = 0;
4552 
4553 	rtnh = nla_reserve_nohdr(skb, sizeof(*rtnh));
4554 	if (!rtnh)
4555 		goto nla_put_failure;
4556 
4557 	rtnh->rtnh_hops = rt->fib6_nh.nh_weight - 1;
4558 	rtnh->rtnh_ifindex = dev ? dev->ifindex : 0;
4559 
4560 	if (rt6_nexthop_info(skb, rt, &flags, true) < 0)
4561 		goto nla_put_failure;
4562 
4563 	rtnh->rtnh_flags = flags;
4564 
4565 	/* length of rtnetlink header + attributes */
4566 	rtnh->rtnh_len = nlmsg_get_pos(skb) - (void *)rtnh;
4567 
4568 	return 0;
4569 
4570 nla_put_failure:
4571 	return -EMSGSIZE;
4572 }
4573 
4574 static int rt6_fill_node(struct net *net, struct sk_buff *skb,
4575 			 struct fib6_info *rt, struct dst_entry *dst,
4576 			 struct in6_addr *dest, struct in6_addr *src,
4577 			 int iif, int type, u32 portid, u32 seq,
4578 			 unsigned int flags)
4579 {
4580 	struct rtmsg *rtm;
4581 	struct nlmsghdr *nlh;
4582 	long expires = 0;
4583 	u32 *pmetrics;
4584 	u32 table;
4585 
4586 	nlh = nlmsg_put(skb, portid, seq, type, sizeof(*rtm), flags);
4587 	if (!nlh)
4588 		return -EMSGSIZE;
4589 
4590 	rtm = nlmsg_data(nlh);
4591 	rtm->rtm_family = AF_INET6;
4592 	rtm->rtm_dst_len = rt->fib6_dst.plen;
4593 	rtm->rtm_src_len = rt->fib6_src.plen;
4594 	rtm->rtm_tos = 0;
4595 	if (rt->fib6_table)
4596 		table = rt->fib6_table->tb6_id;
4597 	else
4598 		table = RT6_TABLE_UNSPEC;
4599 	rtm->rtm_table = table;
4600 	if (nla_put_u32(skb, RTA_TABLE, table))
4601 		goto nla_put_failure;
4602 
4603 	rtm->rtm_type = rt->fib6_type;
4604 	rtm->rtm_flags = 0;
4605 	rtm->rtm_scope = RT_SCOPE_UNIVERSE;
4606 	rtm->rtm_protocol = rt->fib6_protocol;
4607 
4608 	if (rt->fib6_flags & RTF_CACHE)
4609 		rtm->rtm_flags |= RTM_F_CLONED;
4610 
4611 	if (dest) {
4612 		if (nla_put_in6_addr(skb, RTA_DST, dest))
4613 			goto nla_put_failure;
4614 		rtm->rtm_dst_len = 128;
4615 	} else if (rtm->rtm_dst_len)
4616 		if (nla_put_in6_addr(skb, RTA_DST, &rt->fib6_dst.addr))
4617 			goto nla_put_failure;
4618 #ifdef CONFIG_IPV6_SUBTREES
4619 	if (src) {
4620 		if (nla_put_in6_addr(skb, RTA_SRC, src))
4621 			goto nla_put_failure;
4622 		rtm->rtm_src_len = 128;
4623 	} else if (rtm->rtm_src_len &&
4624 		   nla_put_in6_addr(skb, RTA_SRC, &rt->fib6_src.addr))
4625 		goto nla_put_failure;
4626 #endif
4627 	if (iif) {
4628 #ifdef CONFIG_IPV6_MROUTE
4629 		if (ipv6_addr_is_multicast(&rt->fib6_dst.addr)) {
4630 			int err = ip6mr_get_route(net, skb, rtm, portid);
4631 
4632 			if (err == 0)
4633 				return 0;
4634 			if (err < 0)
4635 				goto nla_put_failure;
4636 		} else
4637 #endif
4638 			if (nla_put_u32(skb, RTA_IIF, iif))
4639 				goto nla_put_failure;
4640 	} else if (dest) {
4641 		struct in6_addr saddr_buf;
4642 		if (ip6_route_get_saddr(net, rt, dest, 0, &saddr_buf) == 0 &&
4643 		    nla_put_in6_addr(skb, RTA_PREFSRC, &saddr_buf))
4644 			goto nla_put_failure;
4645 	}
4646 
4647 	if (rt->fib6_prefsrc.plen) {
4648 		struct in6_addr saddr_buf;
4649 		saddr_buf = rt->fib6_prefsrc.addr;
4650 		if (nla_put_in6_addr(skb, RTA_PREFSRC, &saddr_buf))
4651 			goto nla_put_failure;
4652 	}
4653 
4654 	pmetrics = dst ? dst_metrics_ptr(dst) : rt->fib6_metrics->metrics;
4655 	if (rtnetlink_put_metrics(skb, pmetrics) < 0)
4656 		goto nla_put_failure;
4657 
4658 	if (nla_put_u32(skb, RTA_PRIORITY, rt->fib6_metric))
4659 		goto nla_put_failure;
4660 
4661 	/* For multipath routes, walk the siblings list and add
4662 	 * each as a nexthop within RTA_MULTIPATH.
4663 	 */
4664 	if (rt->fib6_nsiblings) {
4665 		struct fib6_info *sibling, *next_sibling;
4666 		struct nlattr *mp;
4667 
4668 		mp = nla_nest_start(skb, RTA_MULTIPATH);
4669 		if (!mp)
4670 			goto nla_put_failure;
4671 
4672 		if (rt6_add_nexthop(skb, rt) < 0)
4673 			goto nla_put_failure;
4674 
4675 		list_for_each_entry_safe(sibling, next_sibling,
4676 					 &rt->fib6_siblings, fib6_siblings) {
4677 			if (rt6_add_nexthop(skb, sibling) < 0)
4678 				goto nla_put_failure;
4679 		}
4680 
4681 		nla_nest_end(skb, mp);
4682 	} else {
4683 		if (rt6_nexthop_info(skb, rt, &rtm->rtm_flags, false) < 0)
4684 			goto nla_put_failure;
4685 	}
4686 
4687 	if (rt->fib6_flags & RTF_EXPIRES) {
4688 		expires = dst ? dst->expires : rt->expires;
4689 		expires -= jiffies;
4690 	}
4691 
4692 	if (rtnl_put_cacheinfo(skb, dst, 0, expires, dst ? dst->error : 0) < 0)
4693 		goto nla_put_failure;
4694 
4695 	if (nla_put_u8(skb, RTA_PREF, IPV6_EXTRACT_PREF(rt->fib6_flags)))
4696 		goto nla_put_failure;
4697 
4698 
4699 	nlmsg_end(skb, nlh);
4700 	return 0;
4701 
4702 nla_put_failure:
4703 	nlmsg_cancel(skb, nlh);
4704 	return -EMSGSIZE;
4705 }
4706 
4707 int rt6_dump_route(struct fib6_info *rt, void *p_arg)
4708 {
4709 	struct rt6_rtnl_dump_arg *arg = (struct rt6_rtnl_dump_arg *) p_arg;
4710 	struct net *net = arg->net;
4711 
4712 	if (rt == net->ipv6.fib6_null_entry)
4713 		return 0;
4714 
4715 	if (nlmsg_len(arg->cb->nlh) >= sizeof(struct rtmsg)) {
4716 		struct rtmsg *rtm = nlmsg_data(arg->cb->nlh);
4717 
4718 		/* user wants prefix routes only */
4719 		if (rtm->rtm_flags & RTM_F_PREFIX &&
4720 		    !(rt->fib6_flags & RTF_PREFIX_RT)) {
4721 			/* success since this is not a prefix route */
4722 			return 1;
4723 		}
4724 	}
4725 
4726 	return rt6_fill_node(net, arg->skb, rt, NULL, NULL, NULL, 0,
4727 			     RTM_NEWROUTE, NETLINK_CB(arg->cb->skb).portid,
4728 			     arg->cb->nlh->nlmsg_seq, NLM_F_MULTI);
4729 }
4730 
4731 static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh,
4732 			      struct netlink_ext_ack *extack)
4733 {
4734 	struct net *net = sock_net(in_skb->sk);
4735 	struct nlattr *tb[RTA_MAX+1];
4736 	int err, iif = 0, oif = 0;
4737 	struct fib6_info *from;
4738 	struct dst_entry *dst;
4739 	struct rt6_info *rt;
4740 	struct sk_buff *skb;
4741 	struct rtmsg *rtm;
4742 	struct flowi6 fl6;
4743 	bool fibmatch;
4744 
4745 	err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy,
4746 			  extack);
4747 	if (err < 0)
4748 		goto errout;
4749 
4750 	err = -EINVAL;
4751 	memset(&fl6, 0, sizeof(fl6));
4752 	rtm = nlmsg_data(nlh);
4753 	fl6.flowlabel = ip6_make_flowinfo(rtm->rtm_tos, 0);
4754 	fibmatch = !!(rtm->rtm_flags & RTM_F_FIB_MATCH);
4755 
4756 	if (tb[RTA_SRC]) {
4757 		if (nla_len(tb[RTA_SRC]) < sizeof(struct in6_addr))
4758 			goto errout;
4759 
4760 		fl6.saddr = *(struct in6_addr *)nla_data(tb[RTA_SRC]);
4761 	}
4762 
4763 	if (tb[RTA_DST]) {
4764 		if (nla_len(tb[RTA_DST]) < sizeof(struct in6_addr))
4765 			goto errout;
4766 
4767 		fl6.daddr = *(struct in6_addr *)nla_data(tb[RTA_DST]);
4768 	}
4769 
4770 	if (tb[RTA_IIF])
4771 		iif = nla_get_u32(tb[RTA_IIF]);
4772 
4773 	if (tb[RTA_OIF])
4774 		oif = nla_get_u32(tb[RTA_OIF]);
4775 
4776 	if (tb[RTA_MARK])
4777 		fl6.flowi6_mark = nla_get_u32(tb[RTA_MARK]);
4778 
4779 	if (tb[RTA_UID])
4780 		fl6.flowi6_uid = make_kuid(current_user_ns(),
4781 					   nla_get_u32(tb[RTA_UID]));
4782 	else
4783 		fl6.flowi6_uid = iif ? INVALID_UID : current_uid();
4784 
4785 	if (iif) {
4786 		struct net_device *dev;
4787 		int flags = 0;
4788 
4789 		rcu_read_lock();
4790 
4791 		dev = dev_get_by_index_rcu(net, iif);
4792 		if (!dev) {
4793 			rcu_read_unlock();
4794 			err = -ENODEV;
4795 			goto errout;
4796 		}
4797 
4798 		fl6.flowi6_iif = iif;
4799 
4800 		if (!ipv6_addr_any(&fl6.saddr))
4801 			flags |= RT6_LOOKUP_F_HAS_SADDR;
4802 
4803 		dst = ip6_route_input_lookup(net, dev, &fl6, NULL, flags);
4804 
4805 		rcu_read_unlock();
4806 	} else {
4807 		fl6.flowi6_oif = oif;
4808 
4809 		dst = ip6_route_output(net, NULL, &fl6);
4810 	}
4811 
4812 
4813 	rt = container_of(dst, struct rt6_info, dst);
4814 	if (rt->dst.error) {
4815 		err = rt->dst.error;
4816 		ip6_rt_put(rt);
4817 		goto errout;
4818 	}
4819 
4820 	if (rt == net->ipv6.ip6_null_entry) {
4821 		err = rt->dst.error;
4822 		ip6_rt_put(rt);
4823 		goto errout;
4824 	}
4825 
4826 	skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
4827 	if (!skb) {
4828 		ip6_rt_put(rt);
4829 		err = -ENOBUFS;
4830 		goto errout;
4831 	}
4832 
4833 	skb_dst_set(skb, &rt->dst);
4834 
4835 	rcu_read_lock();
4836 	from = rcu_dereference(rt->from);
4837 
4838 	if (fibmatch)
4839 		err = rt6_fill_node(net, skb, from, NULL, NULL, NULL, iif,
4840 				    RTM_NEWROUTE, NETLINK_CB(in_skb).portid,
4841 				    nlh->nlmsg_seq, 0);
4842 	else
4843 		err = rt6_fill_node(net, skb, from, dst, &fl6.daddr,
4844 				    &fl6.saddr, iif, RTM_NEWROUTE,
4845 				    NETLINK_CB(in_skb).portid, nlh->nlmsg_seq,
4846 				    0);
4847 	rcu_read_unlock();
4848 
4849 	if (err < 0) {
4850 		kfree_skb(skb);
4851 		goto errout;
4852 	}
4853 
4854 	err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).portid);
4855 errout:
4856 	return err;
4857 }
4858 
4859 void inet6_rt_notify(int event, struct fib6_info *rt, struct nl_info *info,
4860 		     unsigned int nlm_flags)
4861 {
4862 	struct sk_buff *skb;
4863 	struct net *net = info->nl_net;
4864 	u32 seq;
4865 	int err;
4866 
4867 	err = -ENOBUFS;
4868 	seq = info->nlh ? info->nlh->nlmsg_seq : 0;
4869 
4870 	skb = nlmsg_new(rt6_nlmsg_size(rt), gfp_any());
4871 	if (!skb)
4872 		goto errout;
4873 
4874 	err = rt6_fill_node(net, skb, rt, NULL, NULL, NULL, 0,
4875 			    event, info->portid, seq, nlm_flags);
4876 	if (err < 0) {
4877 		/* -EMSGSIZE implies BUG in rt6_nlmsg_size() */
4878 		WARN_ON(err == -EMSGSIZE);
4879 		kfree_skb(skb);
4880 		goto errout;
4881 	}
4882 	rtnl_notify(skb, net, info->portid, RTNLGRP_IPV6_ROUTE,
4883 		    info->nlh, gfp_any());
4884 	return;
4885 errout:
4886 	if (err < 0)
4887 		rtnl_set_sk_err(net, RTNLGRP_IPV6_ROUTE, err);
4888 }
4889 
4890 static int ip6_route_dev_notify(struct notifier_block *this,
4891 				unsigned long event, void *ptr)
4892 {
4893 	struct net_device *dev = netdev_notifier_info_to_dev(ptr);
4894 	struct net *net = dev_net(dev);
4895 
4896 	if (!(dev->flags & IFF_LOOPBACK))
4897 		return NOTIFY_OK;
4898 
4899 	if (event == NETDEV_REGISTER) {
4900 		net->ipv6.fib6_null_entry->fib6_nh.nh_dev = dev;
4901 		net->ipv6.ip6_null_entry->dst.dev = dev;
4902 		net->ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(dev);
4903 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
4904 		net->ipv6.ip6_prohibit_entry->dst.dev = dev;
4905 		net->ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(dev);
4906 		net->ipv6.ip6_blk_hole_entry->dst.dev = dev;
4907 		net->ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(dev);
4908 #endif
4909 	 } else if (event == NETDEV_UNREGISTER &&
4910 		    dev->reg_state != NETREG_UNREGISTERED) {
4911 		/* NETDEV_UNREGISTER could be fired for multiple times by
4912 		 * netdev_wait_allrefs(). Make sure we only call this once.
4913 		 */
4914 		in6_dev_put_clear(&net->ipv6.ip6_null_entry->rt6i_idev);
4915 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
4916 		in6_dev_put_clear(&net->ipv6.ip6_prohibit_entry->rt6i_idev);
4917 		in6_dev_put_clear(&net->ipv6.ip6_blk_hole_entry->rt6i_idev);
4918 #endif
4919 	}
4920 
4921 	return NOTIFY_OK;
4922 }
4923 
4924 /*
4925  *	/proc
4926  */
4927 
4928 #ifdef CONFIG_PROC_FS
4929 
4930 static const struct file_operations ipv6_route_proc_fops = {
4931 	.open		= ipv6_route_open,
4932 	.read		= seq_read,
4933 	.llseek		= seq_lseek,
4934 	.release	= seq_release_net,
4935 };
4936 
4937 static int rt6_stats_seq_show(struct seq_file *seq, void *v)
4938 {
4939 	struct net *net = (struct net *)seq->private;
4940 	seq_printf(seq, "%04x %04x %04x %04x %04x %04x %04x\n",
4941 		   net->ipv6.rt6_stats->fib_nodes,
4942 		   net->ipv6.rt6_stats->fib_route_nodes,
4943 		   atomic_read(&net->ipv6.rt6_stats->fib_rt_alloc),
4944 		   net->ipv6.rt6_stats->fib_rt_entries,
4945 		   net->ipv6.rt6_stats->fib_rt_cache,
4946 		   dst_entries_get_slow(&net->ipv6.ip6_dst_ops),
4947 		   net->ipv6.rt6_stats->fib_discarded_routes);
4948 
4949 	return 0;
4950 }
4951 
4952 static int rt6_stats_seq_open(struct inode *inode, struct file *file)
4953 {
4954 	return single_open_net(inode, file, rt6_stats_seq_show);
4955 }
4956 
4957 static const struct file_operations rt6_stats_seq_fops = {
4958 	.open	 = rt6_stats_seq_open,
4959 	.read	 = seq_read,
4960 	.llseek	 = seq_lseek,
4961 	.release = single_release_net,
4962 };
4963 #endif	/* CONFIG_PROC_FS */
4964 
4965 #ifdef CONFIG_SYSCTL
4966 
4967 static
4968 int ipv6_sysctl_rtcache_flush(struct ctl_table *ctl, int write,
4969 			      void __user *buffer, size_t *lenp, loff_t *ppos)
4970 {
4971 	struct net *net;
4972 	int delay;
4973 	if (!write)
4974 		return -EINVAL;
4975 
4976 	net = (struct net *)ctl->extra1;
4977 	delay = net->ipv6.sysctl.flush_delay;
4978 	proc_dointvec(ctl, write, buffer, lenp, ppos);
4979 	fib6_run_gc(delay <= 0 ? 0 : (unsigned long)delay, net, delay > 0);
4980 	return 0;
4981 }
4982 
4983 struct ctl_table ipv6_route_table_template[] = {
4984 	{
4985 		.procname	=	"flush",
4986 		.data		=	&init_net.ipv6.sysctl.flush_delay,
4987 		.maxlen		=	sizeof(int),
4988 		.mode		=	0200,
4989 		.proc_handler	=	ipv6_sysctl_rtcache_flush
4990 	},
4991 	{
4992 		.procname	=	"gc_thresh",
4993 		.data		=	&ip6_dst_ops_template.gc_thresh,
4994 		.maxlen		=	sizeof(int),
4995 		.mode		=	0644,
4996 		.proc_handler	=	proc_dointvec,
4997 	},
4998 	{
4999 		.procname	=	"max_size",
5000 		.data		=	&init_net.ipv6.sysctl.ip6_rt_max_size,
5001 		.maxlen		=	sizeof(int),
5002 		.mode		=	0644,
5003 		.proc_handler	=	proc_dointvec,
5004 	},
5005 	{
5006 		.procname	=	"gc_min_interval",
5007 		.data		=	&init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
5008 		.maxlen		=	sizeof(int),
5009 		.mode		=	0644,
5010 		.proc_handler	=	proc_dointvec_jiffies,
5011 	},
5012 	{
5013 		.procname	=	"gc_timeout",
5014 		.data		=	&init_net.ipv6.sysctl.ip6_rt_gc_timeout,
5015 		.maxlen		=	sizeof(int),
5016 		.mode		=	0644,
5017 		.proc_handler	=	proc_dointvec_jiffies,
5018 	},
5019 	{
5020 		.procname	=	"gc_interval",
5021 		.data		=	&init_net.ipv6.sysctl.ip6_rt_gc_interval,
5022 		.maxlen		=	sizeof(int),
5023 		.mode		=	0644,
5024 		.proc_handler	=	proc_dointvec_jiffies,
5025 	},
5026 	{
5027 		.procname	=	"gc_elasticity",
5028 		.data		=	&init_net.ipv6.sysctl.ip6_rt_gc_elasticity,
5029 		.maxlen		=	sizeof(int),
5030 		.mode		=	0644,
5031 		.proc_handler	=	proc_dointvec,
5032 	},
5033 	{
5034 		.procname	=	"mtu_expires",
5035 		.data		=	&init_net.ipv6.sysctl.ip6_rt_mtu_expires,
5036 		.maxlen		=	sizeof(int),
5037 		.mode		=	0644,
5038 		.proc_handler	=	proc_dointvec_jiffies,
5039 	},
5040 	{
5041 		.procname	=	"min_adv_mss",
5042 		.data		=	&init_net.ipv6.sysctl.ip6_rt_min_advmss,
5043 		.maxlen		=	sizeof(int),
5044 		.mode		=	0644,
5045 		.proc_handler	=	proc_dointvec,
5046 	},
5047 	{
5048 		.procname	=	"gc_min_interval_ms",
5049 		.data		=	&init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
5050 		.maxlen		=	sizeof(int),
5051 		.mode		=	0644,
5052 		.proc_handler	=	proc_dointvec_ms_jiffies,
5053 	},
5054 	{ }
5055 };
5056 
5057 struct ctl_table * __net_init ipv6_route_sysctl_init(struct net *net)
5058 {
5059 	struct ctl_table *table;
5060 
5061 	table = kmemdup(ipv6_route_table_template,
5062 			sizeof(ipv6_route_table_template),
5063 			GFP_KERNEL);
5064 
5065 	if (table) {
5066 		table[0].data = &net->ipv6.sysctl.flush_delay;
5067 		table[0].extra1 = net;
5068 		table[1].data = &net->ipv6.ip6_dst_ops.gc_thresh;
5069 		table[2].data = &net->ipv6.sysctl.ip6_rt_max_size;
5070 		table[3].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
5071 		table[4].data = &net->ipv6.sysctl.ip6_rt_gc_timeout;
5072 		table[5].data = &net->ipv6.sysctl.ip6_rt_gc_interval;
5073 		table[6].data = &net->ipv6.sysctl.ip6_rt_gc_elasticity;
5074 		table[7].data = &net->ipv6.sysctl.ip6_rt_mtu_expires;
5075 		table[8].data = &net->ipv6.sysctl.ip6_rt_min_advmss;
5076 		table[9].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
5077 
5078 		/* Don't export sysctls to unprivileged users */
5079 		if (net->user_ns != &init_user_ns)
5080 			table[0].procname = NULL;
5081 	}
5082 
5083 	return table;
5084 }
5085 #endif
5086 
5087 static int __net_init ip6_route_net_init(struct net *net)
5088 {
5089 	int ret = -ENOMEM;
5090 
5091 	memcpy(&net->ipv6.ip6_dst_ops, &ip6_dst_ops_template,
5092 	       sizeof(net->ipv6.ip6_dst_ops));
5093 
5094 	if (dst_entries_init(&net->ipv6.ip6_dst_ops) < 0)
5095 		goto out_ip6_dst_ops;
5096 
5097 	net->ipv6.fib6_null_entry = kmemdup(&fib6_null_entry_template,
5098 					    sizeof(*net->ipv6.fib6_null_entry),
5099 					    GFP_KERNEL);
5100 	if (!net->ipv6.fib6_null_entry)
5101 		goto out_ip6_dst_entries;
5102 
5103 	net->ipv6.ip6_null_entry = kmemdup(&ip6_null_entry_template,
5104 					   sizeof(*net->ipv6.ip6_null_entry),
5105 					   GFP_KERNEL);
5106 	if (!net->ipv6.ip6_null_entry)
5107 		goto out_fib6_null_entry;
5108 	net->ipv6.ip6_null_entry->dst.ops = &net->ipv6.ip6_dst_ops;
5109 	dst_init_metrics(&net->ipv6.ip6_null_entry->dst,
5110 			 ip6_template_metrics, true);
5111 
5112 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
5113 	net->ipv6.fib6_has_custom_rules = false;
5114 	net->ipv6.ip6_prohibit_entry = kmemdup(&ip6_prohibit_entry_template,
5115 					       sizeof(*net->ipv6.ip6_prohibit_entry),
5116 					       GFP_KERNEL);
5117 	if (!net->ipv6.ip6_prohibit_entry)
5118 		goto out_ip6_null_entry;
5119 	net->ipv6.ip6_prohibit_entry->dst.ops = &net->ipv6.ip6_dst_ops;
5120 	dst_init_metrics(&net->ipv6.ip6_prohibit_entry->dst,
5121 			 ip6_template_metrics, true);
5122 
5123 	net->ipv6.ip6_blk_hole_entry = kmemdup(&ip6_blk_hole_entry_template,
5124 					       sizeof(*net->ipv6.ip6_blk_hole_entry),
5125 					       GFP_KERNEL);
5126 	if (!net->ipv6.ip6_blk_hole_entry)
5127 		goto out_ip6_prohibit_entry;
5128 	net->ipv6.ip6_blk_hole_entry->dst.ops = &net->ipv6.ip6_dst_ops;
5129 	dst_init_metrics(&net->ipv6.ip6_blk_hole_entry->dst,
5130 			 ip6_template_metrics, true);
5131 #endif
5132 
5133 	net->ipv6.sysctl.flush_delay = 0;
5134 	net->ipv6.sysctl.ip6_rt_max_size = 4096;
5135 	net->ipv6.sysctl.ip6_rt_gc_min_interval = HZ / 2;
5136 	net->ipv6.sysctl.ip6_rt_gc_timeout = 60*HZ;
5137 	net->ipv6.sysctl.ip6_rt_gc_interval = 30*HZ;
5138 	net->ipv6.sysctl.ip6_rt_gc_elasticity = 9;
5139 	net->ipv6.sysctl.ip6_rt_mtu_expires = 10*60*HZ;
5140 	net->ipv6.sysctl.ip6_rt_min_advmss = IPV6_MIN_MTU - 20 - 40;
5141 
5142 	net->ipv6.ip6_rt_gc_expire = 30*HZ;
5143 
5144 	ret = 0;
5145 out:
5146 	return ret;
5147 
5148 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
5149 out_ip6_prohibit_entry:
5150 	kfree(net->ipv6.ip6_prohibit_entry);
5151 out_ip6_null_entry:
5152 	kfree(net->ipv6.ip6_null_entry);
5153 #endif
5154 out_fib6_null_entry:
5155 	kfree(net->ipv6.fib6_null_entry);
5156 out_ip6_dst_entries:
5157 	dst_entries_destroy(&net->ipv6.ip6_dst_ops);
5158 out_ip6_dst_ops:
5159 	goto out;
5160 }
5161 
5162 static void __net_exit ip6_route_net_exit(struct net *net)
5163 {
5164 	kfree(net->ipv6.fib6_null_entry);
5165 	kfree(net->ipv6.ip6_null_entry);
5166 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
5167 	kfree(net->ipv6.ip6_prohibit_entry);
5168 	kfree(net->ipv6.ip6_blk_hole_entry);
5169 #endif
5170 	dst_entries_destroy(&net->ipv6.ip6_dst_ops);
5171 }
5172 
5173 static int __net_init ip6_route_net_init_late(struct net *net)
5174 {
5175 #ifdef CONFIG_PROC_FS
5176 	proc_create("ipv6_route", 0, net->proc_net, &ipv6_route_proc_fops);
5177 	proc_create("rt6_stats", 0444, net->proc_net, &rt6_stats_seq_fops);
5178 #endif
5179 	return 0;
5180 }
5181 
5182 static void __net_exit ip6_route_net_exit_late(struct net *net)
5183 {
5184 #ifdef CONFIG_PROC_FS
5185 	remove_proc_entry("ipv6_route", net->proc_net);
5186 	remove_proc_entry("rt6_stats", net->proc_net);
5187 #endif
5188 }
5189 
5190 static struct pernet_operations ip6_route_net_ops = {
5191 	.init = ip6_route_net_init,
5192 	.exit = ip6_route_net_exit,
5193 };
5194 
5195 static int __net_init ipv6_inetpeer_init(struct net *net)
5196 {
5197 	struct inet_peer_base *bp = kmalloc(sizeof(*bp), GFP_KERNEL);
5198 
5199 	if (!bp)
5200 		return -ENOMEM;
5201 	inet_peer_base_init(bp);
5202 	net->ipv6.peers = bp;
5203 	return 0;
5204 }
5205 
5206 static void __net_exit ipv6_inetpeer_exit(struct net *net)
5207 {
5208 	struct inet_peer_base *bp = net->ipv6.peers;
5209 
5210 	net->ipv6.peers = NULL;
5211 	inetpeer_invalidate_tree(bp);
5212 	kfree(bp);
5213 }
5214 
5215 static struct pernet_operations ipv6_inetpeer_ops = {
5216 	.init	=	ipv6_inetpeer_init,
5217 	.exit	=	ipv6_inetpeer_exit,
5218 };
5219 
5220 static struct pernet_operations ip6_route_net_late_ops = {
5221 	.init = ip6_route_net_init_late,
5222 	.exit = ip6_route_net_exit_late,
5223 };
5224 
5225 static struct notifier_block ip6_route_dev_notifier = {
5226 	.notifier_call = ip6_route_dev_notify,
5227 	.priority = ADDRCONF_NOTIFY_PRIORITY - 10,
5228 };
5229 
5230 void __init ip6_route_init_special_entries(void)
5231 {
5232 	/* Registering of the loopback is done before this portion of code,
5233 	 * the loopback reference in rt6_info will not be taken, do it
5234 	 * manually for init_net */
5235 	init_net.ipv6.fib6_null_entry->fib6_nh.nh_dev = init_net.loopback_dev;
5236 	init_net.ipv6.ip6_null_entry->dst.dev = init_net.loopback_dev;
5237 	init_net.ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
5238   #ifdef CONFIG_IPV6_MULTIPLE_TABLES
5239 	init_net.ipv6.ip6_prohibit_entry->dst.dev = init_net.loopback_dev;
5240 	init_net.ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
5241 	init_net.ipv6.ip6_blk_hole_entry->dst.dev = init_net.loopback_dev;
5242 	init_net.ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
5243   #endif
5244 }
5245 
5246 int __init ip6_route_init(void)
5247 {
5248 	int ret;
5249 	int cpu;
5250 
5251 	ret = -ENOMEM;
5252 	ip6_dst_ops_template.kmem_cachep =
5253 		kmem_cache_create("ip6_dst_cache", sizeof(struct rt6_info), 0,
5254 				  SLAB_HWCACHE_ALIGN, NULL);
5255 	if (!ip6_dst_ops_template.kmem_cachep)
5256 		goto out;
5257 
5258 	ret = dst_entries_init(&ip6_dst_blackhole_ops);
5259 	if (ret)
5260 		goto out_kmem_cache;
5261 
5262 	ret = register_pernet_subsys(&ipv6_inetpeer_ops);
5263 	if (ret)
5264 		goto out_dst_entries;
5265 
5266 	ret = register_pernet_subsys(&ip6_route_net_ops);
5267 	if (ret)
5268 		goto out_register_inetpeer;
5269 
5270 	ip6_dst_blackhole_ops.kmem_cachep = ip6_dst_ops_template.kmem_cachep;
5271 
5272 	ret = fib6_init();
5273 	if (ret)
5274 		goto out_register_subsys;
5275 
5276 	ret = xfrm6_init();
5277 	if (ret)
5278 		goto out_fib6_init;
5279 
5280 	ret = fib6_rules_init();
5281 	if (ret)
5282 		goto xfrm6_init;
5283 
5284 	ret = register_pernet_subsys(&ip6_route_net_late_ops);
5285 	if (ret)
5286 		goto fib6_rules_init;
5287 
5288 	ret = rtnl_register_module(THIS_MODULE, PF_INET6, RTM_NEWROUTE,
5289 				   inet6_rtm_newroute, NULL, 0);
5290 	if (ret < 0)
5291 		goto out_register_late_subsys;
5292 
5293 	ret = rtnl_register_module(THIS_MODULE, PF_INET6, RTM_DELROUTE,
5294 				   inet6_rtm_delroute, NULL, 0);
5295 	if (ret < 0)
5296 		goto out_register_late_subsys;
5297 
5298 	ret = rtnl_register_module(THIS_MODULE, PF_INET6, RTM_GETROUTE,
5299 				   inet6_rtm_getroute, NULL,
5300 				   RTNL_FLAG_DOIT_UNLOCKED);
5301 	if (ret < 0)
5302 		goto out_register_late_subsys;
5303 
5304 	ret = register_netdevice_notifier(&ip6_route_dev_notifier);
5305 	if (ret)
5306 		goto out_register_late_subsys;
5307 
5308 	for_each_possible_cpu(cpu) {
5309 		struct uncached_list *ul = per_cpu_ptr(&rt6_uncached_list, cpu);
5310 
5311 		INIT_LIST_HEAD(&ul->head);
5312 		spin_lock_init(&ul->lock);
5313 	}
5314 
5315 out:
5316 	return ret;
5317 
5318 out_register_late_subsys:
5319 	rtnl_unregister_all(PF_INET6);
5320 	unregister_pernet_subsys(&ip6_route_net_late_ops);
5321 fib6_rules_init:
5322 	fib6_rules_cleanup();
5323 xfrm6_init:
5324 	xfrm6_fini();
5325 out_fib6_init:
5326 	fib6_gc_cleanup();
5327 out_register_subsys:
5328 	unregister_pernet_subsys(&ip6_route_net_ops);
5329 out_register_inetpeer:
5330 	unregister_pernet_subsys(&ipv6_inetpeer_ops);
5331 out_dst_entries:
5332 	dst_entries_destroy(&ip6_dst_blackhole_ops);
5333 out_kmem_cache:
5334 	kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);
5335 	goto out;
5336 }
5337 
5338 void ip6_route_cleanup(void)
5339 {
5340 	unregister_netdevice_notifier(&ip6_route_dev_notifier);
5341 	unregister_pernet_subsys(&ip6_route_net_late_ops);
5342 	fib6_rules_cleanup();
5343 	xfrm6_fini();
5344 	fib6_gc_cleanup();
5345 	unregister_pernet_subsys(&ipv6_inetpeer_ops);
5346 	unregister_pernet_subsys(&ip6_route_net_ops);
5347 	dst_entries_destroy(&ip6_dst_blackhole_ops);
5348 	kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);
5349 }
5350