xref: /linux/net/ipv6/route.c (revision 9d3df886d17b5ef73d4018841ef0a349fcd109ea)
1 /*
2  *	Linux INET6 implementation
3  *	FIB front-end.
4  *
5  *	Authors:
6  *	Pedro Roque		<roque@di.fc.ul.pt>
7  *
8  *	This program is free software; you can redistribute it and/or
9  *      modify it under the terms of the GNU General Public License
10  *      as published by the Free Software Foundation; either version
11  *      2 of the License, or (at your option) any later version.
12  */
13 
14 /*	Changes:
15  *
16  *	YOSHIFUJI Hideaki @USAGI
17  *		reworked default router selection.
18  *		- respect outgoing interface
19  *		- select from (probably) reachable routers (i.e.
20  *		routers in REACHABLE, STALE, DELAY or PROBE states).
21  *		- always select the same router if it is (probably)
22  *		reachable.  otherwise, round-robin the list.
23  *	Ville Nuorvala
24  *		Fixed routing subtrees.
25  */
26 
27 #define pr_fmt(fmt) "IPv6: " fmt
28 
29 #include <linux/capability.h>
30 #include <linux/errno.h>
31 #include <linux/export.h>
32 #include <linux/types.h>
33 #include <linux/times.h>
34 #include <linux/socket.h>
35 #include <linux/sockios.h>
36 #include <linux/net.h>
37 #include <linux/route.h>
38 #include <linux/netdevice.h>
39 #include <linux/in6.h>
40 #include <linux/mroute6.h>
41 #include <linux/init.h>
42 #include <linux/if_arp.h>
43 #include <linux/proc_fs.h>
44 #include <linux/seq_file.h>
45 #include <linux/nsproxy.h>
46 #include <linux/slab.h>
47 #include <linux/jhash.h>
48 #include <net/net_namespace.h>
49 #include <net/snmp.h>
50 #include <net/ipv6.h>
51 #include <net/ip6_fib.h>
52 #include <net/ip6_route.h>
53 #include <net/ndisc.h>
54 #include <net/addrconf.h>
55 #include <net/tcp.h>
56 #include <linux/rtnetlink.h>
57 #include <net/dst.h>
58 #include <net/dst_metadata.h>
59 #include <net/xfrm.h>
60 #include <net/netevent.h>
61 #include <net/netlink.h>
62 #include <net/nexthop.h>
63 #include <net/lwtunnel.h>
64 #include <net/ip_tunnels.h>
65 #include <net/l3mdev.h>
66 #include <net/ip.h>
67 #include <trace/events/fib6.h>
68 
69 #include <linux/uaccess.h>
70 
71 #ifdef CONFIG_SYSCTL
72 #include <linux/sysctl.h>
73 #endif
74 
75 enum rt6_nud_state {
76 	RT6_NUD_FAIL_HARD = -3,
77 	RT6_NUD_FAIL_PROBE = -2,
78 	RT6_NUD_FAIL_DO_RR = -1,
79 	RT6_NUD_SUCCEED = 1
80 };
81 
82 static struct dst_entry	*ip6_dst_check(struct dst_entry *dst, u32 cookie);
83 static unsigned int	 ip6_default_advmss(const struct dst_entry *dst);
84 static unsigned int	 ip6_mtu(const struct dst_entry *dst);
85 static struct dst_entry *ip6_negative_advice(struct dst_entry *);
86 static void		ip6_dst_destroy(struct dst_entry *);
87 static void		ip6_dst_ifdown(struct dst_entry *,
88 				       struct net_device *dev, int how);
89 static int		 ip6_dst_gc(struct dst_ops *ops);
90 
91 static int		ip6_pkt_discard(struct sk_buff *skb);
92 static int		ip6_pkt_discard_out(struct net *net, struct sock *sk, struct sk_buff *skb);
93 static int		ip6_pkt_prohibit(struct sk_buff *skb);
94 static int		ip6_pkt_prohibit_out(struct net *net, struct sock *sk, struct sk_buff *skb);
95 static void		ip6_link_failure(struct sk_buff *skb);
96 static void		ip6_rt_update_pmtu(struct dst_entry *dst, struct sock *sk,
97 					   struct sk_buff *skb, u32 mtu);
98 static void		rt6_do_redirect(struct dst_entry *dst, struct sock *sk,
99 					struct sk_buff *skb);
100 static int rt6_score_route(struct fib6_info *rt, int oif, int strict);
101 static size_t rt6_nlmsg_size(struct fib6_info *rt);
102 static int rt6_fill_node(struct net *net, struct sk_buff *skb,
103 			 struct fib6_info *rt, struct dst_entry *dst,
104 			 struct in6_addr *dest, struct in6_addr *src,
105 			 int iif, int type, u32 portid, u32 seq,
106 			 unsigned int flags);
107 static struct rt6_info *rt6_find_cached_rt(struct fib6_info *rt,
108 					   struct in6_addr *daddr,
109 					   struct in6_addr *saddr);
110 
111 #ifdef CONFIG_IPV6_ROUTE_INFO
112 static struct fib6_info *rt6_add_route_info(struct net *net,
113 					   const struct in6_addr *prefix, int prefixlen,
114 					   const struct in6_addr *gwaddr,
115 					   struct net_device *dev,
116 					   unsigned int pref);
117 static struct fib6_info *rt6_get_route_info(struct net *net,
118 					   const struct in6_addr *prefix, int prefixlen,
119 					   const struct in6_addr *gwaddr,
120 					   struct net_device *dev);
121 #endif
122 
123 struct uncached_list {
124 	spinlock_t		lock;
125 	struct list_head	head;
126 };
127 
128 static DEFINE_PER_CPU_ALIGNED(struct uncached_list, rt6_uncached_list);
129 
130 void rt6_uncached_list_add(struct rt6_info *rt)
131 {
132 	struct uncached_list *ul = raw_cpu_ptr(&rt6_uncached_list);
133 
134 	rt->rt6i_uncached_list = ul;
135 
136 	spin_lock_bh(&ul->lock);
137 	list_add_tail(&rt->rt6i_uncached, &ul->head);
138 	spin_unlock_bh(&ul->lock);
139 }
140 
141 void rt6_uncached_list_del(struct rt6_info *rt)
142 {
143 	if (!list_empty(&rt->rt6i_uncached)) {
144 		struct uncached_list *ul = rt->rt6i_uncached_list;
145 		struct net *net = dev_net(rt->dst.dev);
146 
147 		spin_lock_bh(&ul->lock);
148 		list_del(&rt->rt6i_uncached);
149 		atomic_dec(&net->ipv6.rt6_stats->fib_rt_uncache);
150 		spin_unlock_bh(&ul->lock);
151 	}
152 }
153 
154 static void rt6_uncached_list_flush_dev(struct net *net, struct net_device *dev)
155 {
156 	struct net_device *loopback_dev = net->loopback_dev;
157 	int cpu;
158 
159 	if (dev == loopback_dev)
160 		return;
161 
162 	for_each_possible_cpu(cpu) {
163 		struct uncached_list *ul = per_cpu_ptr(&rt6_uncached_list, cpu);
164 		struct rt6_info *rt;
165 
166 		spin_lock_bh(&ul->lock);
167 		list_for_each_entry(rt, &ul->head, rt6i_uncached) {
168 			struct inet6_dev *rt_idev = rt->rt6i_idev;
169 			struct net_device *rt_dev = rt->dst.dev;
170 
171 			if (rt_idev->dev == dev) {
172 				rt->rt6i_idev = in6_dev_get(loopback_dev);
173 				in6_dev_put(rt_idev);
174 			}
175 
176 			if (rt_dev == dev) {
177 				rt->dst.dev = loopback_dev;
178 				dev_hold(rt->dst.dev);
179 				dev_put(rt_dev);
180 			}
181 		}
182 		spin_unlock_bh(&ul->lock);
183 	}
184 }
185 
186 static inline const void *choose_neigh_daddr(const struct in6_addr *p,
187 					     struct sk_buff *skb,
188 					     const void *daddr)
189 {
190 	if (!ipv6_addr_any(p))
191 		return (const void *) p;
192 	else if (skb)
193 		return &ipv6_hdr(skb)->daddr;
194 	return daddr;
195 }
196 
197 struct neighbour *ip6_neigh_lookup(const struct in6_addr *gw,
198 				   struct net_device *dev,
199 				   struct sk_buff *skb,
200 				   const void *daddr)
201 {
202 	struct neighbour *n;
203 
204 	daddr = choose_neigh_daddr(gw, skb, daddr);
205 	n = __ipv6_neigh_lookup(dev, daddr);
206 	if (n)
207 		return n;
208 	return neigh_create(&nd_tbl, daddr, dev);
209 }
210 
211 static struct neighbour *ip6_dst_neigh_lookup(const struct dst_entry *dst,
212 					      struct sk_buff *skb,
213 					      const void *daddr)
214 {
215 	const struct rt6_info *rt = container_of(dst, struct rt6_info, dst);
216 
217 	return ip6_neigh_lookup(&rt->rt6i_gateway, dst->dev, skb, daddr);
218 }
219 
220 static void ip6_confirm_neigh(const struct dst_entry *dst, const void *daddr)
221 {
222 	struct net_device *dev = dst->dev;
223 	struct rt6_info *rt = (struct rt6_info *)dst;
224 
225 	daddr = choose_neigh_daddr(&rt->rt6i_gateway, NULL, daddr);
226 	if (!daddr)
227 		return;
228 	if (dev->flags & (IFF_NOARP | IFF_LOOPBACK))
229 		return;
230 	if (ipv6_addr_is_multicast((const struct in6_addr *)daddr))
231 		return;
232 	__ipv6_confirm_neigh(dev, daddr);
233 }
234 
235 static struct dst_ops ip6_dst_ops_template = {
236 	.family			=	AF_INET6,
237 	.gc			=	ip6_dst_gc,
238 	.gc_thresh		=	1024,
239 	.check			=	ip6_dst_check,
240 	.default_advmss		=	ip6_default_advmss,
241 	.mtu			=	ip6_mtu,
242 	.cow_metrics		=	dst_cow_metrics_generic,
243 	.destroy		=	ip6_dst_destroy,
244 	.ifdown			=	ip6_dst_ifdown,
245 	.negative_advice	=	ip6_negative_advice,
246 	.link_failure		=	ip6_link_failure,
247 	.update_pmtu		=	ip6_rt_update_pmtu,
248 	.redirect		=	rt6_do_redirect,
249 	.local_out		=	__ip6_local_out,
250 	.neigh_lookup		=	ip6_dst_neigh_lookup,
251 	.confirm_neigh		=	ip6_confirm_neigh,
252 };
253 
254 static unsigned int ip6_blackhole_mtu(const struct dst_entry *dst)
255 {
256 	unsigned int mtu = dst_metric_raw(dst, RTAX_MTU);
257 
258 	return mtu ? : dst->dev->mtu;
259 }
260 
261 static void ip6_rt_blackhole_update_pmtu(struct dst_entry *dst, struct sock *sk,
262 					 struct sk_buff *skb, u32 mtu)
263 {
264 }
265 
266 static void ip6_rt_blackhole_redirect(struct dst_entry *dst, struct sock *sk,
267 				      struct sk_buff *skb)
268 {
269 }
270 
271 static struct dst_ops ip6_dst_blackhole_ops = {
272 	.family			=	AF_INET6,
273 	.destroy		=	ip6_dst_destroy,
274 	.check			=	ip6_dst_check,
275 	.mtu			=	ip6_blackhole_mtu,
276 	.default_advmss		=	ip6_default_advmss,
277 	.update_pmtu		=	ip6_rt_blackhole_update_pmtu,
278 	.redirect		=	ip6_rt_blackhole_redirect,
279 	.cow_metrics		=	dst_cow_metrics_generic,
280 	.neigh_lookup		=	ip6_dst_neigh_lookup,
281 };
282 
283 static const u32 ip6_template_metrics[RTAX_MAX] = {
284 	[RTAX_HOPLIMIT - 1] = 0,
285 };
286 
287 static const struct fib6_info fib6_null_entry_template = {
288 	.fib6_flags	= (RTF_REJECT | RTF_NONEXTHOP),
289 	.fib6_protocol  = RTPROT_KERNEL,
290 	.fib6_metric	= ~(u32)0,
291 	.fib6_ref	= ATOMIC_INIT(1),
292 	.fib6_type	= RTN_UNREACHABLE,
293 	.fib6_metrics	= (struct dst_metrics *)&dst_default_metrics,
294 };
295 
296 static const struct rt6_info ip6_null_entry_template = {
297 	.dst = {
298 		.__refcnt	= ATOMIC_INIT(1),
299 		.__use		= 1,
300 		.obsolete	= DST_OBSOLETE_FORCE_CHK,
301 		.error		= -ENETUNREACH,
302 		.input		= ip6_pkt_discard,
303 		.output		= ip6_pkt_discard_out,
304 	},
305 	.rt6i_flags	= (RTF_REJECT | RTF_NONEXTHOP),
306 };
307 
308 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
309 
310 static const struct rt6_info ip6_prohibit_entry_template = {
311 	.dst = {
312 		.__refcnt	= ATOMIC_INIT(1),
313 		.__use		= 1,
314 		.obsolete	= DST_OBSOLETE_FORCE_CHK,
315 		.error		= -EACCES,
316 		.input		= ip6_pkt_prohibit,
317 		.output		= ip6_pkt_prohibit_out,
318 	},
319 	.rt6i_flags	= (RTF_REJECT | RTF_NONEXTHOP),
320 };
321 
322 static const struct rt6_info ip6_blk_hole_entry_template = {
323 	.dst = {
324 		.__refcnt	= ATOMIC_INIT(1),
325 		.__use		= 1,
326 		.obsolete	= DST_OBSOLETE_FORCE_CHK,
327 		.error		= -EINVAL,
328 		.input		= dst_discard,
329 		.output		= dst_discard_out,
330 	},
331 	.rt6i_flags	= (RTF_REJECT | RTF_NONEXTHOP),
332 };
333 
334 #endif
335 
336 static void rt6_info_init(struct rt6_info *rt)
337 {
338 	struct dst_entry *dst = &rt->dst;
339 
340 	memset(dst + 1, 0, sizeof(*rt) - sizeof(*dst));
341 	INIT_LIST_HEAD(&rt->rt6i_uncached);
342 }
343 
344 /* allocate dst with ip6_dst_ops */
345 struct rt6_info *ip6_dst_alloc(struct net *net, struct net_device *dev,
346 			       int flags)
347 {
348 	struct rt6_info *rt = dst_alloc(&net->ipv6.ip6_dst_ops, dev,
349 					1, DST_OBSOLETE_FORCE_CHK, flags);
350 
351 	if (rt) {
352 		rt6_info_init(rt);
353 		atomic_inc(&net->ipv6.rt6_stats->fib_rt_alloc);
354 	}
355 
356 	return rt;
357 }
358 EXPORT_SYMBOL(ip6_dst_alloc);
359 
360 static void ip6_dst_destroy(struct dst_entry *dst)
361 {
362 	struct rt6_info *rt = (struct rt6_info *)dst;
363 	struct fib6_info *from;
364 	struct inet6_dev *idev;
365 
366 	dst_destroy_metrics_generic(dst);
367 	rt6_uncached_list_del(rt);
368 
369 	idev = rt->rt6i_idev;
370 	if (idev) {
371 		rt->rt6i_idev = NULL;
372 		in6_dev_put(idev);
373 	}
374 
375 	rcu_read_lock();
376 	from = rcu_dereference(rt->from);
377 	rcu_assign_pointer(rt->from, NULL);
378 	fib6_info_release(from);
379 	rcu_read_unlock();
380 }
381 
382 static void ip6_dst_ifdown(struct dst_entry *dst, struct net_device *dev,
383 			   int how)
384 {
385 	struct rt6_info *rt = (struct rt6_info *)dst;
386 	struct inet6_dev *idev = rt->rt6i_idev;
387 	struct net_device *loopback_dev =
388 		dev_net(dev)->loopback_dev;
389 
390 	if (idev && idev->dev != loopback_dev) {
391 		struct inet6_dev *loopback_idev = in6_dev_get(loopback_dev);
392 		if (loopback_idev) {
393 			rt->rt6i_idev = loopback_idev;
394 			in6_dev_put(idev);
395 		}
396 	}
397 }
398 
399 static bool __rt6_check_expired(const struct rt6_info *rt)
400 {
401 	if (rt->rt6i_flags & RTF_EXPIRES)
402 		return time_after(jiffies, rt->dst.expires);
403 	else
404 		return false;
405 }
406 
407 static bool rt6_check_expired(const struct rt6_info *rt)
408 {
409 	struct fib6_info *from;
410 
411 	from = rcu_dereference(rt->from);
412 
413 	if (rt->rt6i_flags & RTF_EXPIRES) {
414 		if (time_after(jiffies, rt->dst.expires))
415 			return true;
416 	} else if (from) {
417 		return rt->dst.obsolete != DST_OBSOLETE_FORCE_CHK ||
418 			fib6_check_expired(from);
419 	}
420 	return false;
421 }
422 
423 struct fib6_info *fib6_multipath_select(const struct net *net,
424 					struct fib6_info *match,
425 					struct flowi6 *fl6, int oif,
426 					const struct sk_buff *skb,
427 					int strict)
428 {
429 	struct fib6_info *sibling, *next_sibling;
430 
431 	/* We might have already computed the hash for ICMPv6 errors. In such
432 	 * case it will always be non-zero. Otherwise now is the time to do it.
433 	 */
434 	if (!fl6->mp_hash)
435 		fl6->mp_hash = rt6_multipath_hash(net, fl6, skb, NULL);
436 
437 	if (fl6->mp_hash <= atomic_read(&match->fib6_nh.nh_upper_bound))
438 		return match;
439 
440 	list_for_each_entry_safe(sibling, next_sibling, &match->fib6_siblings,
441 				 fib6_siblings) {
442 		int nh_upper_bound;
443 
444 		nh_upper_bound = atomic_read(&sibling->fib6_nh.nh_upper_bound);
445 		if (fl6->mp_hash > nh_upper_bound)
446 			continue;
447 		if (rt6_score_route(sibling, oif, strict) < 0)
448 			break;
449 		match = sibling;
450 		break;
451 	}
452 
453 	return match;
454 }
455 
456 /*
457  *	Route lookup. rcu_read_lock() should be held.
458  */
459 
460 static inline struct fib6_info *rt6_device_match(struct net *net,
461 						 struct fib6_info *rt,
462 						    const struct in6_addr *saddr,
463 						    int oif,
464 						    int flags)
465 {
466 	struct fib6_info *sprt;
467 
468 	if (!oif && ipv6_addr_any(saddr) &&
469 	    !(rt->fib6_nh.nh_flags & RTNH_F_DEAD))
470 		return rt;
471 
472 	for (sprt = rt; sprt; sprt = rcu_dereference(sprt->fib6_next)) {
473 		const struct net_device *dev = sprt->fib6_nh.nh_dev;
474 
475 		if (sprt->fib6_nh.nh_flags & RTNH_F_DEAD)
476 			continue;
477 
478 		if (oif) {
479 			if (dev->ifindex == oif)
480 				return sprt;
481 		} else {
482 			if (ipv6_chk_addr(net, saddr, dev,
483 					  flags & RT6_LOOKUP_F_IFACE))
484 				return sprt;
485 		}
486 	}
487 
488 	if (oif && flags & RT6_LOOKUP_F_IFACE)
489 		return net->ipv6.fib6_null_entry;
490 
491 	return rt->fib6_nh.nh_flags & RTNH_F_DEAD ? net->ipv6.fib6_null_entry : rt;
492 }
493 
494 #ifdef CONFIG_IPV6_ROUTER_PREF
495 struct __rt6_probe_work {
496 	struct work_struct work;
497 	struct in6_addr target;
498 	struct net_device *dev;
499 };
500 
501 static void rt6_probe_deferred(struct work_struct *w)
502 {
503 	struct in6_addr mcaddr;
504 	struct __rt6_probe_work *work =
505 		container_of(w, struct __rt6_probe_work, work);
506 
507 	addrconf_addr_solict_mult(&work->target, &mcaddr);
508 	ndisc_send_ns(work->dev, &work->target, &mcaddr, NULL, 0);
509 	dev_put(work->dev);
510 	kfree(work);
511 }
512 
513 static void rt6_probe(struct fib6_info *rt)
514 {
515 	struct __rt6_probe_work *work;
516 	const struct in6_addr *nh_gw;
517 	struct neighbour *neigh;
518 	struct net_device *dev;
519 
520 	/*
521 	 * Okay, this does not seem to be appropriate
522 	 * for now, however, we need to check if it
523 	 * is really so; aka Router Reachability Probing.
524 	 *
525 	 * Router Reachability Probe MUST be rate-limited
526 	 * to no more than one per minute.
527 	 */
528 	if (!rt || !(rt->fib6_flags & RTF_GATEWAY))
529 		return;
530 
531 	nh_gw = &rt->fib6_nh.nh_gw;
532 	dev = rt->fib6_nh.nh_dev;
533 	rcu_read_lock_bh();
534 	neigh = __ipv6_neigh_lookup_noref(dev, nh_gw);
535 	if (neigh) {
536 		struct inet6_dev *idev;
537 
538 		if (neigh->nud_state & NUD_VALID)
539 			goto out;
540 
541 		idev = __in6_dev_get(dev);
542 		work = NULL;
543 		write_lock(&neigh->lock);
544 		if (!(neigh->nud_state & NUD_VALID) &&
545 		    time_after(jiffies,
546 			       neigh->updated + idev->cnf.rtr_probe_interval)) {
547 			work = kmalloc(sizeof(*work), GFP_ATOMIC);
548 			if (work)
549 				__neigh_set_probe_once(neigh);
550 		}
551 		write_unlock(&neigh->lock);
552 	} else {
553 		work = kmalloc(sizeof(*work), GFP_ATOMIC);
554 	}
555 
556 	if (work) {
557 		INIT_WORK(&work->work, rt6_probe_deferred);
558 		work->target = *nh_gw;
559 		dev_hold(dev);
560 		work->dev = dev;
561 		schedule_work(&work->work);
562 	}
563 
564 out:
565 	rcu_read_unlock_bh();
566 }
567 #else
568 static inline void rt6_probe(struct fib6_info *rt)
569 {
570 }
571 #endif
572 
573 /*
574  * Default Router Selection (RFC 2461 6.3.6)
575  */
576 static inline int rt6_check_dev(struct fib6_info *rt, int oif)
577 {
578 	const struct net_device *dev = rt->fib6_nh.nh_dev;
579 
580 	if (!oif || dev->ifindex == oif)
581 		return 2;
582 	return 0;
583 }
584 
585 static inline enum rt6_nud_state rt6_check_neigh(struct fib6_info *rt)
586 {
587 	enum rt6_nud_state ret = RT6_NUD_FAIL_HARD;
588 	struct neighbour *neigh;
589 
590 	if (rt->fib6_flags & RTF_NONEXTHOP ||
591 	    !(rt->fib6_flags & RTF_GATEWAY))
592 		return RT6_NUD_SUCCEED;
593 
594 	rcu_read_lock_bh();
595 	neigh = __ipv6_neigh_lookup_noref(rt->fib6_nh.nh_dev,
596 					  &rt->fib6_nh.nh_gw);
597 	if (neigh) {
598 		read_lock(&neigh->lock);
599 		if (neigh->nud_state & NUD_VALID)
600 			ret = RT6_NUD_SUCCEED;
601 #ifdef CONFIG_IPV6_ROUTER_PREF
602 		else if (!(neigh->nud_state & NUD_FAILED))
603 			ret = RT6_NUD_SUCCEED;
604 		else
605 			ret = RT6_NUD_FAIL_PROBE;
606 #endif
607 		read_unlock(&neigh->lock);
608 	} else {
609 		ret = IS_ENABLED(CONFIG_IPV6_ROUTER_PREF) ?
610 		      RT6_NUD_SUCCEED : RT6_NUD_FAIL_DO_RR;
611 	}
612 	rcu_read_unlock_bh();
613 
614 	return ret;
615 }
616 
617 static int rt6_score_route(struct fib6_info *rt, int oif, int strict)
618 {
619 	int m;
620 
621 	m = rt6_check_dev(rt, oif);
622 	if (!m && (strict & RT6_LOOKUP_F_IFACE))
623 		return RT6_NUD_FAIL_HARD;
624 #ifdef CONFIG_IPV6_ROUTER_PREF
625 	m |= IPV6_DECODE_PREF(IPV6_EXTRACT_PREF(rt->fib6_flags)) << 2;
626 #endif
627 	if (strict & RT6_LOOKUP_F_REACHABLE) {
628 		int n = rt6_check_neigh(rt);
629 		if (n < 0)
630 			return n;
631 	}
632 	return m;
633 }
634 
635 /* called with rc_read_lock held */
636 static inline bool fib6_ignore_linkdown(const struct fib6_info *f6i)
637 {
638 	const struct net_device *dev = fib6_info_nh_dev(f6i);
639 	bool rc = false;
640 
641 	if (dev) {
642 		const struct inet6_dev *idev = __in6_dev_get(dev);
643 
644 		rc = !!idev->cnf.ignore_routes_with_linkdown;
645 	}
646 
647 	return rc;
648 }
649 
650 static struct fib6_info *find_match(struct fib6_info *rt, int oif, int strict,
651 				   int *mpri, struct fib6_info *match,
652 				   bool *do_rr)
653 {
654 	int m;
655 	bool match_do_rr = false;
656 
657 	if (rt->fib6_nh.nh_flags & RTNH_F_DEAD)
658 		goto out;
659 
660 	if (fib6_ignore_linkdown(rt) &&
661 	    rt->fib6_nh.nh_flags & RTNH_F_LINKDOWN &&
662 	    !(strict & RT6_LOOKUP_F_IGNORE_LINKSTATE))
663 		goto out;
664 
665 	if (fib6_check_expired(rt))
666 		goto out;
667 
668 	m = rt6_score_route(rt, oif, strict);
669 	if (m == RT6_NUD_FAIL_DO_RR) {
670 		match_do_rr = true;
671 		m = 0; /* lowest valid score */
672 	} else if (m == RT6_NUD_FAIL_HARD) {
673 		goto out;
674 	}
675 
676 	if (strict & RT6_LOOKUP_F_REACHABLE)
677 		rt6_probe(rt);
678 
679 	/* note that m can be RT6_NUD_FAIL_PROBE at this point */
680 	if (m > *mpri) {
681 		*do_rr = match_do_rr;
682 		*mpri = m;
683 		match = rt;
684 	}
685 out:
686 	return match;
687 }
688 
689 static struct fib6_info *find_rr_leaf(struct fib6_node *fn,
690 				     struct fib6_info *leaf,
691 				     struct fib6_info *rr_head,
692 				     u32 metric, int oif, int strict,
693 				     bool *do_rr)
694 {
695 	struct fib6_info *rt, *match, *cont;
696 	int mpri = -1;
697 
698 	match = NULL;
699 	cont = NULL;
700 	for (rt = rr_head; rt; rt = rcu_dereference(rt->fib6_next)) {
701 		if (rt->fib6_metric != metric) {
702 			cont = rt;
703 			break;
704 		}
705 
706 		match = find_match(rt, oif, strict, &mpri, match, do_rr);
707 	}
708 
709 	for (rt = leaf; rt && rt != rr_head;
710 	     rt = rcu_dereference(rt->fib6_next)) {
711 		if (rt->fib6_metric != metric) {
712 			cont = rt;
713 			break;
714 		}
715 
716 		match = find_match(rt, oif, strict, &mpri, match, do_rr);
717 	}
718 
719 	if (match || !cont)
720 		return match;
721 
722 	for (rt = cont; rt; rt = rcu_dereference(rt->fib6_next))
723 		match = find_match(rt, oif, strict, &mpri, match, do_rr);
724 
725 	return match;
726 }
727 
728 static struct fib6_info *rt6_select(struct net *net, struct fib6_node *fn,
729 				   int oif, int strict)
730 {
731 	struct fib6_info *leaf = rcu_dereference(fn->leaf);
732 	struct fib6_info *match, *rt0;
733 	bool do_rr = false;
734 	int key_plen;
735 
736 	if (!leaf || leaf == net->ipv6.fib6_null_entry)
737 		return net->ipv6.fib6_null_entry;
738 
739 	rt0 = rcu_dereference(fn->rr_ptr);
740 	if (!rt0)
741 		rt0 = leaf;
742 
743 	/* Double check to make sure fn is not an intermediate node
744 	 * and fn->leaf does not points to its child's leaf
745 	 * (This might happen if all routes under fn are deleted from
746 	 * the tree and fib6_repair_tree() is called on the node.)
747 	 */
748 	key_plen = rt0->fib6_dst.plen;
749 #ifdef CONFIG_IPV6_SUBTREES
750 	if (rt0->fib6_src.plen)
751 		key_plen = rt0->fib6_src.plen;
752 #endif
753 	if (fn->fn_bit != key_plen)
754 		return net->ipv6.fib6_null_entry;
755 
756 	match = find_rr_leaf(fn, leaf, rt0, rt0->fib6_metric, oif, strict,
757 			     &do_rr);
758 
759 	if (do_rr) {
760 		struct fib6_info *next = rcu_dereference(rt0->fib6_next);
761 
762 		/* no entries matched; do round-robin */
763 		if (!next || next->fib6_metric != rt0->fib6_metric)
764 			next = leaf;
765 
766 		if (next != rt0) {
767 			spin_lock_bh(&leaf->fib6_table->tb6_lock);
768 			/* make sure next is not being deleted from the tree */
769 			if (next->fib6_node)
770 				rcu_assign_pointer(fn->rr_ptr, next);
771 			spin_unlock_bh(&leaf->fib6_table->tb6_lock);
772 		}
773 	}
774 
775 	return match ? match : net->ipv6.fib6_null_entry;
776 }
777 
778 static bool rt6_is_gw_or_nonexthop(const struct fib6_info *rt)
779 {
780 	return (rt->fib6_flags & (RTF_NONEXTHOP | RTF_GATEWAY));
781 }
782 
783 #ifdef CONFIG_IPV6_ROUTE_INFO
784 int rt6_route_rcv(struct net_device *dev, u8 *opt, int len,
785 		  const struct in6_addr *gwaddr)
786 {
787 	struct net *net = dev_net(dev);
788 	struct route_info *rinfo = (struct route_info *) opt;
789 	struct in6_addr prefix_buf, *prefix;
790 	unsigned int pref;
791 	unsigned long lifetime;
792 	struct fib6_info *rt;
793 
794 	if (len < sizeof(struct route_info)) {
795 		return -EINVAL;
796 	}
797 
798 	/* Sanity check for prefix_len and length */
799 	if (rinfo->length > 3) {
800 		return -EINVAL;
801 	} else if (rinfo->prefix_len > 128) {
802 		return -EINVAL;
803 	} else if (rinfo->prefix_len > 64) {
804 		if (rinfo->length < 2) {
805 			return -EINVAL;
806 		}
807 	} else if (rinfo->prefix_len > 0) {
808 		if (rinfo->length < 1) {
809 			return -EINVAL;
810 		}
811 	}
812 
813 	pref = rinfo->route_pref;
814 	if (pref == ICMPV6_ROUTER_PREF_INVALID)
815 		return -EINVAL;
816 
817 	lifetime = addrconf_timeout_fixup(ntohl(rinfo->lifetime), HZ);
818 
819 	if (rinfo->length == 3)
820 		prefix = (struct in6_addr *)rinfo->prefix;
821 	else {
822 		/* this function is safe */
823 		ipv6_addr_prefix(&prefix_buf,
824 				 (struct in6_addr *)rinfo->prefix,
825 				 rinfo->prefix_len);
826 		prefix = &prefix_buf;
827 	}
828 
829 	if (rinfo->prefix_len == 0)
830 		rt = rt6_get_dflt_router(net, gwaddr, dev);
831 	else
832 		rt = rt6_get_route_info(net, prefix, rinfo->prefix_len,
833 					gwaddr, dev);
834 
835 	if (rt && !lifetime) {
836 		ip6_del_rt(net, rt);
837 		rt = NULL;
838 	}
839 
840 	if (!rt && lifetime)
841 		rt = rt6_add_route_info(net, prefix, rinfo->prefix_len, gwaddr,
842 					dev, pref);
843 	else if (rt)
844 		rt->fib6_flags = RTF_ROUTEINFO |
845 				 (rt->fib6_flags & ~RTF_PREF_MASK) | RTF_PREF(pref);
846 
847 	if (rt) {
848 		if (!addrconf_finite_timeout(lifetime))
849 			fib6_clean_expires(rt);
850 		else
851 			fib6_set_expires(rt, jiffies + HZ * lifetime);
852 
853 		fib6_info_release(rt);
854 	}
855 	return 0;
856 }
857 #endif
858 
859 /*
860  *	Misc support functions
861  */
862 
863 /* called with rcu_lock held */
864 static struct net_device *ip6_rt_get_dev_rcu(struct fib6_info *rt)
865 {
866 	struct net_device *dev = rt->fib6_nh.nh_dev;
867 
868 	if (rt->fib6_flags & (RTF_LOCAL | RTF_ANYCAST)) {
869 		/* for copies of local routes, dst->dev needs to be the
870 		 * device if it is a master device, the master device if
871 		 * device is enslaved, and the loopback as the default
872 		 */
873 		if (netif_is_l3_slave(dev) &&
874 		    !rt6_need_strict(&rt->fib6_dst.addr))
875 			dev = l3mdev_master_dev_rcu(dev);
876 		else if (!netif_is_l3_master(dev))
877 			dev = dev_net(dev)->loopback_dev;
878 		/* last case is netif_is_l3_master(dev) is true in which
879 		 * case we want dev returned to be dev
880 		 */
881 	}
882 
883 	return dev;
884 }
885 
886 static const int fib6_prop[RTN_MAX + 1] = {
887 	[RTN_UNSPEC]	= 0,
888 	[RTN_UNICAST]	= 0,
889 	[RTN_LOCAL]	= 0,
890 	[RTN_BROADCAST]	= 0,
891 	[RTN_ANYCAST]	= 0,
892 	[RTN_MULTICAST]	= 0,
893 	[RTN_BLACKHOLE]	= -EINVAL,
894 	[RTN_UNREACHABLE] = -EHOSTUNREACH,
895 	[RTN_PROHIBIT]	= -EACCES,
896 	[RTN_THROW]	= -EAGAIN,
897 	[RTN_NAT]	= -EINVAL,
898 	[RTN_XRESOLVE]	= -EINVAL,
899 };
900 
901 static int ip6_rt_type_to_error(u8 fib6_type)
902 {
903 	return fib6_prop[fib6_type];
904 }
905 
906 static unsigned short fib6_info_dst_flags(struct fib6_info *rt)
907 {
908 	unsigned short flags = 0;
909 
910 	if (rt->dst_nocount)
911 		flags |= DST_NOCOUNT;
912 	if (rt->dst_nopolicy)
913 		flags |= DST_NOPOLICY;
914 	if (rt->dst_host)
915 		flags |= DST_HOST;
916 
917 	return flags;
918 }
919 
920 static void ip6_rt_init_dst_reject(struct rt6_info *rt, struct fib6_info *ort)
921 {
922 	rt->dst.error = ip6_rt_type_to_error(ort->fib6_type);
923 
924 	switch (ort->fib6_type) {
925 	case RTN_BLACKHOLE:
926 		rt->dst.output = dst_discard_out;
927 		rt->dst.input = dst_discard;
928 		break;
929 	case RTN_PROHIBIT:
930 		rt->dst.output = ip6_pkt_prohibit_out;
931 		rt->dst.input = ip6_pkt_prohibit;
932 		break;
933 	case RTN_THROW:
934 	case RTN_UNREACHABLE:
935 	default:
936 		rt->dst.output = ip6_pkt_discard_out;
937 		rt->dst.input = ip6_pkt_discard;
938 		break;
939 	}
940 }
941 
942 static void ip6_rt_init_dst(struct rt6_info *rt, struct fib6_info *ort)
943 {
944 	rt->dst.flags |= fib6_info_dst_flags(ort);
945 
946 	if (ort->fib6_flags & RTF_REJECT) {
947 		ip6_rt_init_dst_reject(rt, ort);
948 		return;
949 	}
950 
951 	rt->dst.error = 0;
952 	rt->dst.output = ip6_output;
953 
954 	if (ort->fib6_type == RTN_LOCAL) {
955 		rt->dst.input = ip6_input;
956 	} else if (ipv6_addr_type(&ort->fib6_dst.addr) & IPV6_ADDR_MULTICAST) {
957 		rt->dst.input = ip6_mc_input;
958 	} else {
959 		rt->dst.input = ip6_forward;
960 	}
961 
962 	if (ort->fib6_nh.nh_lwtstate) {
963 		rt->dst.lwtstate = lwtstate_get(ort->fib6_nh.nh_lwtstate);
964 		lwtunnel_set_redirect(&rt->dst);
965 	}
966 
967 	rt->dst.lastuse = jiffies;
968 }
969 
970 static void rt6_set_from(struct rt6_info *rt, struct fib6_info *from)
971 {
972 	rt->rt6i_flags &= ~RTF_EXPIRES;
973 	fib6_info_hold(from);
974 	rcu_assign_pointer(rt->from, from);
975 	dst_init_metrics(&rt->dst, from->fib6_metrics->metrics, true);
976 	if (from->fib6_metrics != &dst_default_metrics) {
977 		rt->dst._metrics |= DST_METRICS_REFCOUNTED;
978 		refcount_inc(&from->fib6_metrics->refcnt);
979 	}
980 }
981 
982 static void ip6_rt_copy_init(struct rt6_info *rt, struct fib6_info *ort)
983 {
984 	struct net_device *dev = fib6_info_nh_dev(ort);
985 
986 	ip6_rt_init_dst(rt, ort);
987 
988 	rt->rt6i_dst = ort->fib6_dst;
989 	rt->rt6i_idev = dev ? in6_dev_get(dev) : NULL;
990 	rt->rt6i_gateway = ort->fib6_nh.nh_gw;
991 	rt->rt6i_flags = ort->fib6_flags;
992 	rt6_set_from(rt, ort);
993 #ifdef CONFIG_IPV6_SUBTREES
994 	rt->rt6i_src = ort->fib6_src;
995 #endif
996 	rt->rt6i_prefsrc = ort->fib6_prefsrc;
997 	rt->dst.lwtstate = lwtstate_get(ort->fib6_nh.nh_lwtstate);
998 }
999 
1000 static struct fib6_node* fib6_backtrack(struct fib6_node *fn,
1001 					struct in6_addr *saddr)
1002 {
1003 	struct fib6_node *pn, *sn;
1004 	while (1) {
1005 		if (fn->fn_flags & RTN_TL_ROOT)
1006 			return NULL;
1007 		pn = rcu_dereference(fn->parent);
1008 		sn = FIB6_SUBTREE(pn);
1009 		if (sn && sn != fn)
1010 			fn = fib6_node_lookup(sn, NULL, saddr);
1011 		else
1012 			fn = pn;
1013 		if (fn->fn_flags & RTN_RTINFO)
1014 			return fn;
1015 	}
1016 }
1017 
1018 static bool ip6_hold_safe(struct net *net, struct rt6_info **prt,
1019 			  bool null_fallback)
1020 {
1021 	struct rt6_info *rt = *prt;
1022 
1023 	if (dst_hold_safe(&rt->dst))
1024 		return true;
1025 	if (null_fallback) {
1026 		rt = net->ipv6.ip6_null_entry;
1027 		dst_hold(&rt->dst);
1028 	} else {
1029 		rt = NULL;
1030 	}
1031 	*prt = rt;
1032 	return false;
1033 }
1034 
1035 /* called with rcu_lock held */
1036 static struct rt6_info *ip6_create_rt_rcu(struct fib6_info *rt)
1037 {
1038 	unsigned short flags = fib6_info_dst_flags(rt);
1039 	struct net_device *dev = rt->fib6_nh.nh_dev;
1040 	struct rt6_info *nrt;
1041 
1042 	nrt = ip6_dst_alloc(dev_net(dev), dev, flags);
1043 	if (nrt)
1044 		ip6_rt_copy_init(nrt, rt);
1045 
1046 	return nrt;
1047 }
1048 
1049 static struct rt6_info *ip6_pol_route_lookup(struct net *net,
1050 					     struct fib6_table *table,
1051 					     struct flowi6 *fl6,
1052 					     const struct sk_buff *skb,
1053 					     int flags)
1054 {
1055 	struct fib6_info *f6i;
1056 	struct fib6_node *fn;
1057 	struct rt6_info *rt;
1058 
1059 	if (fl6->flowi6_flags & FLOWI_FLAG_SKIP_NH_OIF)
1060 		flags &= ~RT6_LOOKUP_F_IFACE;
1061 
1062 	rcu_read_lock();
1063 	fn = fib6_node_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
1064 restart:
1065 	f6i = rcu_dereference(fn->leaf);
1066 	if (!f6i) {
1067 		f6i = net->ipv6.fib6_null_entry;
1068 	} else {
1069 		f6i = rt6_device_match(net, f6i, &fl6->saddr,
1070 				      fl6->flowi6_oif, flags);
1071 		if (f6i->fib6_nsiblings && fl6->flowi6_oif == 0)
1072 			f6i = fib6_multipath_select(net, f6i, fl6,
1073 						    fl6->flowi6_oif, skb,
1074 						    flags);
1075 	}
1076 	if (f6i == net->ipv6.fib6_null_entry) {
1077 		fn = fib6_backtrack(fn, &fl6->saddr);
1078 		if (fn)
1079 			goto restart;
1080 	}
1081 
1082 	trace_fib6_table_lookup(net, f6i, table, fl6);
1083 
1084 	/* Search through exception table */
1085 	rt = rt6_find_cached_rt(f6i, &fl6->daddr, &fl6->saddr);
1086 	if (rt) {
1087 		if (ip6_hold_safe(net, &rt, true))
1088 			dst_use_noref(&rt->dst, jiffies);
1089 	} else if (f6i == net->ipv6.fib6_null_entry) {
1090 		rt = net->ipv6.ip6_null_entry;
1091 		dst_hold(&rt->dst);
1092 	} else {
1093 		rt = ip6_create_rt_rcu(f6i);
1094 		if (!rt) {
1095 			rt = net->ipv6.ip6_null_entry;
1096 			dst_hold(&rt->dst);
1097 		}
1098 	}
1099 
1100 	rcu_read_unlock();
1101 
1102 	return rt;
1103 }
1104 
1105 struct dst_entry *ip6_route_lookup(struct net *net, struct flowi6 *fl6,
1106 				   const struct sk_buff *skb, int flags)
1107 {
1108 	return fib6_rule_lookup(net, fl6, skb, flags, ip6_pol_route_lookup);
1109 }
1110 EXPORT_SYMBOL_GPL(ip6_route_lookup);
1111 
1112 struct rt6_info *rt6_lookup(struct net *net, const struct in6_addr *daddr,
1113 			    const struct in6_addr *saddr, int oif,
1114 			    const struct sk_buff *skb, int strict)
1115 {
1116 	struct flowi6 fl6 = {
1117 		.flowi6_oif = oif,
1118 		.daddr = *daddr,
1119 	};
1120 	struct dst_entry *dst;
1121 	int flags = strict ? RT6_LOOKUP_F_IFACE : 0;
1122 
1123 	if (saddr) {
1124 		memcpy(&fl6.saddr, saddr, sizeof(*saddr));
1125 		flags |= RT6_LOOKUP_F_HAS_SADDR;
1126 	}
1127 
1128 	dst = fib6_rule_lookup(net, &fl6, skb, flags, ip6_pol_route_lookup);
1129 	if (dst->error == 0)
1130 		return (struct rt6_info *) dst;
1131 
1132 	dst_release(dst);
1133 
1134 	return NULL;
1135 }
1136 EXPORT_SYMBOL(rt6_lookup);
1137 
1138 /* ip6_ins_rt is called with FREE table->tb6_lock.
1139  * It takes new route entry, the addition fails by any reason the
1140  * route is released.
1141  * Caller must hold dst before calling it.
1142  */
1143 
1144 static int __ip6_ins_rt(struct fib6_info *rt, struct nl_info *info,
1145 			struct netlink_ext_ack *extack)
1146 {
1147 	int err;
1148 	struct fib6_table *table;
1149 
1150 	table = rt->fib6_table;
1151 	spin_lock_bh(&table->tb6_lock);
1152 	err = fib6_add(&table->tb6_root, rt, info, extack);
1153 	spin_unlock_bh(&table->tb6_lock);
1154 
1155 	return err;
1156 }
1157 
1158 int ip6_ins_rt(struct net *net, struct fib6_info *rt)
1159 {
1160 	struct nl_info info = {	.nl_net = net, };
1161 
1162 	return __ip6_ins_rt(rt, &info, NULL);
1163 }
1164 
1165 static struct rt6_info *ip6_rt_cache_alloc(struct fib6_info *ort,
1166 					   const struct in6_addr *daddr,
1167 					   const struct in6_addr *saddr)
1168 {
1169 	struct net_device *dev;
1170 	struct rt6_info *rt;
1171 
1172 	/*
1173 	 *	Clone the route.
1174 	 */
1175 
1176 	dev = ip6_rt_get_dev_rcu(ort);
1177 	rt = ip6_dst_alloc(dev_net(dev), dev, 0);
1178 	if (!rt)
1179 		return NULL;
1180 
1181 	ip6_rt_copy_init(rt, ort);
1182 	rt->rt6i_flags |= RTF_CACHE;
1183 	rt->dst.flags |= DST_HOST;
1184 	rt->rt6i_dst.addr = *daddr;
1185 	rt->rt6i_dst.plen = 128;
1186 
1187 	if (!rt6_is_gw_or_nonexthop(ort)) {
1188 		if (ort->fib6_dst.plen != 128 &&
1189 		    ipv6_addr_equal(&ort->fib6_dst.addr, daddr))
1190 			rt->rt6i_flags |= RTF_ANYCAST;
1191 #ifdef CONFIG_IPV6_SUBTREES
1192 		if (rt->rt6i_src.plen && saddr) {
1193 			rt->rt6i_src.addr = *saddr;
1194 			rt->rt6i_src.plen = 128;
1195 		}
1196 #endif
1197 	}
1198 
1199 	return rt;
1200 }
1201 
1202 static struct rt6_info *ip6_rt_pcpu_alloc(struct fib6_info *rt)
1203 {
1204 	unsigned short flags = fib6_info_dst_flags(rt);
1205 	struct net_device *dev;
1206 	struct rt6_info *pcpu_rt;
1207 
1208 	rcu_read_lock();
1209 	dev = ip6_rt_get_dev_rcu(rt);
1210 	pcpu_rt = ip6_dst_alloc(dev_net(dev), dev, flags);
1211 	rcu_read_unlock();
1212 	if (!pcpu_rt)
1213 		return NULL;
1214 	ip6_rt_copy_init(pcpu_rt, rt);
1215 	pcpu_rt->rt6i_flags |= RTF_PCPU;
1216 	return pcpu_rt;
1217 }
1218 
1219 /* It should be called with rcu_read_lock() acquired */
1220 static struct rt6_info *rt6_get_pcpu_route(struct fib6_info *rt)
1221 {
1222 	struct rt6_info *pcpu_rt, **p;
1223 
1224 	p = this_cpu_ptr(rt->rt6i_pcpu);
1225 	pcpu_rt = *p;
1226 
1227 	if (pcpu_rt)
1228 		ip6_hold_safe(NULL, &pcpu_rt, false);
1229 
1230 	return pcpu_rt;
1231 }
1232 
1233 static struct rt6_info *rt6_make_pcpu_route(struct net *net,
1234 					    struct fib6_info *rt)
1235 {
1236 	struct rt6_info *pcpu_rt, *prev, **p;
1237 
1238 	pcpu_rt = ip6_rt_pcpu_alloc(rt);
1239 	if (!pcpu_rt) {
1240 		dst_hold(&net->ipv6.ip6_null_entry->dst);
1241 		return net->ipv6.ip6_null_entry;
1242 	}
1243 
1244 	dst_hold(&pcpu_rt->dst);
1245 	p = this_cpu_ptr(rt->rt6i_pcpu);
1246 	prev = cmpxchg(p, NULL, pcpu_rt);
1247 	BUG_ON(prev);
1248 
1249 	return pcpu_rt;
1250 }
1251 
1252 /* exception hash table implementation
1253  */
1254 static DEFINE_SPINLOCK(rt6_exception_lock);
1255 
1256 /* Remove rt6_ex from hash table and free the memory
1257  * Caller must hold rt6_exception_lock
1258  */
1259 static void rt6_remove_exception(struct rt6_exception_bucket *bucket,
1260 				 struct rt6_exception *rt6_ex)
1261 {
1262 	struct net *net;
1263 
1264 	if (!bucket || !rt6_ex)
1265 		return;
1266 
1267 	net = dev_net(rt6_ex->rt6i->dst.dev);
1268 	hlist_del_rcu(&rt6_ex->hlist);
1269 	dst_release(&rt6_ex->rt6i->dst);
1270 	kfree_rcu(rt6_ex, rcu);
1271 	WARN_ON_ONCE(!bucket->depth);
1272 	bucket->depth--;
1273 	net->ipv6.rt6_stats->fib_rt_cache--;
1274 }
1275 
1276 /* Remove oldest rt6_ex in bucket and free the memory
1277  * Caller must hold rt6_exception_lock
1278  */
1279 static void rt6_exception_remove_oldest(struct rt6_exception_bucket *bucket)
1280 {
1281 	struct rt6_exception *rt6_ex, *oldest = NULL;
1282 
1283 	if (!bucket)
1284 		return;
1285 
1286 	hlist_for_each_entry(rt6_ex, &bucket->chain, hlist) {
1287 		if (!oldest || time_before(rt6_ex->stamp, oldest->stamp))
1288 			oldest = rt6_ex;
1289 	}
1290 	rt6_remove_exception(bucket, oldest);
1291 }
1292 
1293 static u32 rt6_exception_hash(const struct in6_addr *dst,
1294 			      const struct in6_addr *src)
1295 {
1296 	static u32 seed __read_mostly;
1297 	u32 val;
1298 
1299 	net_get_random_once(&seed, sizeof(seed));
1300 	val = jhash(dst, sizeof(*dst), seed);
1301 
1302 #ifdef CONFIG_IPV6_SUBTREES
1303 	if (src)
1304 		val = jhash(src, sizeof(*src), val);
1305 #endif
1306 	return hash_32(val, FIB6_EXCEPTION_BUCKET_SIZE_SHIFT);
1307 }
1308 
1309 /* Helper function to find the cached rt in the hash table
1310  * and update bucket pointer to point to the bucket for this
1311  * (daddr, saddr) pair
1312  * Caller must hold rt6_exception_lock
1313  */
1314 static struct rt6_exception *
1315 __rt6_find_exception_spinlock(struct rt6_exception_bucket **bucket,
1316 			      const struct in6_addr *daddr,
1317 			      const struct in6_addr *saddr)
1318 {
1319 	struct rt6_exception *rt6_ex;
1320 	u32 hval;
1321 
1322 	if (!(*bucket) || !daddr)
1323 		return NULL;
1324 
1325 	hval = rt6_exception_hash(daddr, saddr);
1326 	*bucket += hval;
1327 
1328 	hlist_for_each_entry(rt6_ex, &(*bucket)->chain, hlist) {
1329 		struct rt6_info *rt6 = rt6_ex->rt6i;
1330 		bool matched = ipv6_addr_equal(daddr, &rt6->rt6i_dst.addr);
1331 
1332 #ifdef CONFIG_IPV6_SUBTREES
1333 		if (matched && saddr)
1334 			matched = ipv6_addr_equal(saddr, &rt6->rt6i_src.addr);
1335 #endif
1336 		if (matched)
1337 			return rt6_ex;
1338 	}
1339 	return NULL;
1340 }
1341 
1342 /* Helper function to find the cached rt in the hash table
1343  * and update bucket pointer to point to the bucket for this
1344  * (daddr, saddr) pair
1345  * Caller must hold rcu_read_lock()
1346  */
1347 static struct rt6_exception *
1348 __rt6_find_exception_rcu(struct rt6_exception_bucket **bucket,
1349 			 const struct in6_addr *daddr,
1350 			 const struct in6_addr *saddr)
1351 {
1352 	struct rt6_exception *rt6_ex;
1353 	u32 hval;
1354 
1355 	WARN_ON_ONCE(!rcu_read_lock_held());
1356 
1357 	if (!(*bucket) || !daddr)
1358 		return NULL;
1359 
1360 	hval = rt6_exception_hash(daddr, saddr);
1361 	*bucket += hval;
1362 
1363 	hlist_for_each_entry_rcu(rt6_ex, &(*bucket)->chain, hlist) {
1364 		struct rt6_info *rt6 = rt6_ex->rt6i;
1365 		bool matched = ipv6_addr_equal(daddr, &rt6->rt6i_dst.addr);
1366 
1367 #ifdef CONFIG_IPV6_SUBTREES
1368 		if (matched && saddr)
1369 			matched = ipv6_addr_equal(saddr, &rt6->rt6i_src.addr);
1370 #endif
1371 		if (matched)
1372 			return rt6_ex;
1373 	}
1374 	return NULL;
1375 }
1376 
1377 static unsigned int fib6_mtu(const struct fib6_info *rt)
1378 {
1379 	unsigned int mtu;
1380 
1381 	if (rt->fib6_pmtu) {
1382 		mtu = rt->fib6_pmtu;
1383 	} else {
1384 		struct net_device *dev = fib6_info_nh_dev(rt);
1385 		struct inet6_dev *idev;
1386 
1387 		rcu_read_lock();
1388 		idev = __in6_dev_get(dev);
1389 		mtu = idev->cnf.mtu6;
1390 		rcu_read_unlock();
1391 	}
1392 
1393 	mtu = min_t(unsigned int, mtu, IP6_MAX_MTU);
1394 
1395 	return mtu - lwtunnel_headroom(rt->fib6_nh.nh_lwtstate, mtu);
1396 }
1397 
1398 static int rt6_insert_exception(struct rt6_info *nrt,
1399 				struct fib6_info *ort)
1400 {
1401 	struct net *net = dev_net(nrt->dst.dev);
1402 	struct rt6_exception_bucket *bucket;
1403 	struct in6_addr *src_key = NULL;
1404 	struct rt6_exception *rt6_ex;
1405 	int err = 0;
1406 
1407 	spin_lock_bh(&rt6_exception_lock);
1408 
1409 	if (ort->exception_bucket_flushed) {
1410 		err = -EINVAL;
1411 		goto out;
1412 	}
1413 
1414 	bucket = rcu_dereference_protected(ort->rt6i_exception_bucket,
1415 					lockdep_is_held(&rt6_exception_lock));
1416 	if (!bucket) {
1417 		bucket = kcalloc(FIB6_EXCEPTION_BUCKET_SIZE, sizeof(*bucket),
1418 				 GFP_ATOMIC);
1419 		if (!bucket) {
1420 			err = -ENOMEM;
1421 			goto out;
1422 		}
1423 		rcu_assign_pointer(ort->rt6i_exception_bucket, bucket);
1424 	}
1425 
1426 #ifdef CONFIG_IPV6_SUBTREES
1427 	/* rt6i_src.plen != 0 indicates ort is in subtree
1428 	 * and exception table is indexed by a hash of
1429 	 * both rt6i_dst and rt6i_src.
1430 	 * Otherwise, the exception table is indexed by
1431 	 * a hash of only rt6i_dst.
1432 	 */
1433 	if (ort->fib6_src.plen)
1434 		src_key = &nrt->rt6i_src.addr;
1435 #endif
1436 
1437 	/* Update rt6i_prefsrc as it could be changed
1438 	 * in rt6_remove_prefsrc()
1439 	 */
1440 	nrt->rt6i_prefsrc = ort->fib6_prefsrc;
1441 	/* rt6_mtu_change() might lower mtu on ort.
1442 	 * Only insert this exception route if its mtu
1443 	 * is less than ort's mtu value.
1444 	 */
1445 	if (dst_metric_raw(&nrt->dst, RTAX_MTU) >= fib6_mtu(ort)) {
1446 		err = -EINVAL;
1447 		goto out;
1448 	}
1449 
1450 	rt6_ex = __rt6_find_exception_spinlock(&bucket, &nrt->rt6i_dst.addr,
1451 					       src_key);
1452 	if (rt6_ex)
1453 		rt6_remove_exception(bucket, rt6_ex);
1454 
1455 	rt6_ex = kzalloc(sizeof(*rt6_ex), GFP_ATOMIC);
1456 	if (!rt6_ex) {
1457 		err = -ENOMEM;
1458 		goto out;
1459 	}
1460 	rt6_ex->rt6i = nrt;
1461 	rt6_ex->stamp = jiffies;
1462 	hlist_add_head_rcu(&rt6_ex->hlist, &bucket->chain);
1463 	bucket->depth++;
1464 	net->ipv6.rt6_stats->fib_rt_cache++;
1465 
1466 	if (bucket->depth > FIB6_MAX_DEPTH)
1467 		rt6_exception_remove_oldest(bucket);
1468 
1469 out:
1470 	spin_unlock_bh(&rt6_exception_lock);
1471 
1472 	/* Update fn->fn_sernum to invalidate all cached dst */
1473 	if (!err) {
1474 		spin_lock_bh(&ort->fib6_table->tb6_lock);
1475 		fib6_update_sernum(net, ort);
1476 		spin_unlock_bh(&ort->fib6_table->tb6_lock);
1477 		fib6_force_start_gc(net);
1478 	}
1479 
1480 	return err;
1481 }
1482 
1483 void rt6_flush_exceptions(struct fib6_info *rt)
1484 {
1485 	struct rt6_exception_bucket *bucket;
1486 	struct rt6_exception *rt6_ex;
1487 	struct hlist_node *tmp;
1488 	int i;
1489 
1490 	spin_lock_bh(&rt6_exception_lock);
1491 	/* Prevent rt6_insert_exception() to recreate the bucket list */
1492 	rt->exception_bucket_flushed = 1;
1493 
1494 	bucket = rcu_dereference_protected(rt->rt6i_exception_bucket,
1495 				    lockdep_is_held(&rt6_exception_lock));
1496 	if (!bucket)
1497 		goto out;
1498 
1499 	for (i = 0; i < FIB6_EXCEPTION_BUCKET_SIZE; i++) {
1500 		hlist_for_each_entry_safe(rt6_ex, tmp, &bucket->chain, hlist)
1501 			rt6_remove_exception(bucket, rt6_ex);
1502 		WARN_ON_ONCE(bucket->depth);
1503 		bucket++;
1504 	}
1505 
1506 out:
1507 	spin_unlock_bh(&rt6_exception_lock);
1508 }
1509 
1510 /* Find cached rt in the hash table inside passed in rt
1511  * Caller has to hold rcu_read_lock()
1512  */
1513 static struct rt6_info *rt6_find_cached_rt(struct fib6_info *rt,
1514 					   struct in6_addr *daddr,
1515 					   struct in6_addr *saddr)
1516 {
1517 	struct rt6_exception_bucket *bucket;
1518 	struct in6_addr *src_key = NULL;
1519 	struct rt6_exception *rt6_ex;
1520 	struct rt6_info *res = NULL;
1521 
1522 	bucket = rcu_dereference(rt->rt6i_exception_bucket);
1523 
1524 #ifdef CONFIG_IPV6_SUBTREES
1525 	/* rt6i_src.plen != 0 indicates rt is in subtree
1526 	 * and exception table is indexed by a hash of
1527 	 * both rt6i_dst and rt6i_src.
1528 	 * Otherwise, the exception table is indexed by
1529 	 * a hash of only rt6i_dst.
1530 	 */
1531 	if (rt->fib6_src.plen)
1532 		src_key = saddr;
1533 #endif
1534 	rt6_ex = __rt6_find_exception_rcu(&bucket, daddr, src_key);
1535 
1536 	if (rt6_ex && !rt6_check_expired(rt6_ex->rt6i))
1537 		res = rt6_ex->rt6i;
1538 
1539 	return res;
1540 }
1541 
1542 /* Remove the passed in cached rt from the hash table that contains it */
1543 static int rt6_remove_exception_rt(struct rt6_info *rt)
1544 {
1545 	struct rt6_exception_bucket *bucket;
1546 	struct in6_addr *src_key = NULL;
1547 	struct rt6_exception *rt6_ex;
1548 	struct fib6_info *from;
1549 	int err;
1550 
1551 	from = rcu_dereference(rt->from);
1552 	if (!from ||
1553 	    !(rt->rt6i_flags & RTF_CACHE))
1554 		return -EINVAL;
1555 
1556 	if (!rcu_access_pointer(from->rt6i_exception_bucket))
1557 		return -ENOENT;
1558 
1559 	spin_lock_bh(&rt6_exception_lock);
1560 	bucket = rcu_dereference_protected(from->rt6i_exception_bucket,
1561 				    lockdep_is_held(&rt6_exception_lock));
1562 #ifdef CONFIG_IPV6_SUBTREES
1563 	/* rt6i_src.plen != 0 indicates 'from' is in subtree
1564 	 * and exception table is indexed by a hash of
1565 	 * both rt6i_dst and rt6i_src.
1566 	 * Otherwise, the exception table is indexed by
1567 	 * a hash of only rt6i_dst.
1568 	 */
1569 	if (from->fib6_src.plen)
1570 		src_key = &rt->rt6i_src.addr;
1571 #endif
1572 	rt6_ex = __rt6_find_exception_spinlock(&bucket,
1573 					       &rt->rt6i_dst.addr,
1574 					       src_key);
1575 	if (rt6_ex) {
1576 		rt6_remove_exception(bucket, rt6_ex);
1577 		err = 0;
1578 	} else {
1579 		err = -ENOENT;
1580 	}
1581 
1582 	spin_unlock_bh(&rt6_exception_lock);
1583 	return err;
1584 }
1585 
1586 /* Find rt6_ex which contains the passed in rt cache and
1587  * refresh its stamp
1588  */
1589 static void rt6_update_exception_stamp_rt(struct rt6_info *rt)
1590 {
1591 	struct rt6_exception_bucket *bucket;
1592 	struct fib6_info *from = rt->from;
1593 	struct in6_addr *src_key = NULL;
1594 	struct rt6_exception *rt6_ex;
1595 
1596 	if (!from ||
1597 	    !(rt->rt6i_flags & RTF_CACHE))
1598 		return;
1599 
1600 	rcu_read_lock();
1601 	bucket = rcu_dereference(from->rt6i_exception_bucket);
1602 
1603 #ifdef CONFIG_IPV6_SUBTREES
1604 	/* rt6i_src.plen != 0 indicates 'from' is in subtree
1605 	 * and exception table is indexed by a hash of
1606 	 * both rt6i_dst and rt6i_src.
1607 	 * Otherwise, the exception table is indexed by
1608 	 * a hash of only rt6i_dst.
1609 	 */
1610 	if (from->fib6_src.plen)
1611 		src_key = &rt->rt6i_src.addr;
1612 #endif
1613 	rt6_ex = __rt6_find_exception_rcu(&bucket,
1614 					  &rt->rt6i_dst.addr,
1615 					  src_key);
1616 	if (rt6_ex)
1617 		rt6_ex->stamp = jiffies;
1618 
1619 	rcu_read_unlock();
1620 }
1621 
1622 static void rt6_exceptions_remove_prefsrc(struct fib6_info *rt)
1623 {
1624 	struct rt6_exception_bucket *bucket;
1625 	struct rt6_exception *rt6_ex;
1626 	int i;
1627 
1628 	bucket = rcu_dereference_protected(rt->rt6i_exception_bucket,
1629 					lockdep_is_held(&rt6_exception_lock));
1630 
1631 	if (bucket) {
1632 		for (i = 0; i < FIB6_EXCEPTION_BUCKET_SIZE; i++) {
1633 			hlist_for_each_entry(rt6_ex, &bucket->chain, hlist) {
1634 				rt6_ex->rt6i->rt6i_prefsrc.plen = 0;
1635 			}
1636 			bucket++;
1637 		}
1638 	}
1639 }
1640 
1641 static bool rt6_mtu_change_route_allowed(struct inet6_dev *idev,
1642 					 struct rt6_info *rt, int mtu)
1643 {
1644 	/* If the new MTU is lower than the route PMTU, this new MTU will be the
1645 	 * lowest MTU in the path: always allow updating the route PMTU to
1646 	 * reflect PMTU decreases.
1647 	 *
1648 	 * If the new MTU is higher, and the route PMTU is equal to the local
1649 	 * MTU, this means the old MTU is the lowest in the path, so allow
1650 	 * updating it: if other nodes now have lower MTUs, PMTU discovery will
1651 	 * handle this.
1652 	 */
1653 
1654 	if (dst_mtu(&rt->dst) >= mtu)
1655 		return true;
1656 
1657 	if (dst_mtu(&rt->dst) == idev->cnf.mtu6)
1658 		return true;
1659 
1660 	return false;
1661 }
1662 
1663 static void rt6_exceptions_update_pmtu(struct inet6_dev *idev,
1664 				       struct fib6_info *rt, int mtu)
1665 {
1666 	struct rt6_exception_bucket *bucket;
1667 	struct rt6_exception *rt6_ex;
1668 	int i;
1669 
1670 	bucket = rcu_dereference_protected(rt->rt6i_exception_bucket,
1671 					lockdep_is_held(&rt6_exception_lock));
1672 
1673 	if (!bucket)
1674 		return;
1675 
1676 	for (i = 0; i < FIB6_EXCEPTION_BUCKET_SIZE; i++) {
1677 		hlist_for_each_entry(rt6_ex, &bucket->chain, hlist) {
1678 			struct rt6_info *entry = rt6_ex->rt6i;
1679 
1680 			/* For RTF_CACHE with rt6i_pmtu == 0 (i.e. a redirected
1681 			 * route), the metrics of its rt->from have already
1682 			 * been updated.
1683 			 */
1684 			if (dst_metric_raw(&entry->dst, RTAX_MTU) &&
1685 			    rt6_mtu_change_route_allowed(idev, entry, mtu))
1686 				dst_metric_set(&entry->dst, RTAX_MTU, mtu);
1687 		}
1688 		bucket++;
1689 	}
1690 }
1691 
1692 #define RTF_CACHE_GATEWAY	(RTF_GATEWAY | RTF_CACHE)
1693 
1694 static void rt6_exceptions_clean_tohost(struct fib6_info *rt,
1695 					struct in6_addr *gateway)
1696 {
1697 	struct rt6_exception_bucket *bucket;
1698 	struct rt6_exception *rt6_ex;
1699 	struct hlist_node *tmp;
1700 	int i;
1701 
1702 	if (!rcu_access_pointer(rt->rt6i_exception_bucket))
1703 		return;
1704 
1705 	spin_lock_bh(&rt6_exception_lock);
1706 	bucket = rcu_dereference_protected(rt->rt6i_exception_bucket,
1707 				     lockdep_is_held(&rt6_exception_lock));
1708 
1709 	if (bucket) {
1710 		for (i = 0; i < FIB6_EXCEPTION_BUCKET_SIZE; i++) {
1711 			hlist_for_each_entry_safe(rt6_ex, tmp,
1712 						  &bucket->chain, hlist) {
1713 				struct rt6_info *entry = rt6_ex->rt6i;
1714 
1715 				if ((entry->rt6i_flags & RTF_CACHE_GATEWAY) ==
1716 				    RTF_CACHE_GATEWAY &&
1717 				    ipv6_addr_equal(gateway,
1718 						    &entry->rt6i_gateway)) {
1719 					rt6_remove_exception(bucket, rt6_ex);
1720 				}
1721 			}
1722 			bucket++;
1723 		}
1724 	}
1725 
1726 	spin_unlock_bh(&rt6_exception_lock);
1727 }
1728 
1729 static void rt6_age_examine_exception(struct rt6_exception_bucket *bucket,
1730 				      struct rt6_exception *rt6_ex,
1731 				      struct fib6_gc_args *gc_args,
1732 				      unsigned long now)
1733 {
1734 	struct rt6_info *rt = rt6_ex->rt6i;
1735 
1736 	/* we are pruning and obsoleting aged-out and non gateway exceptions
1737 	 * even if others have still references to them, so that on next
1738 	 * dst_check() such references can be dropped.
1739 	 * EXPIRES exceptions - e.g. pmtu-generated ones are pruned when
1740 	 * expired, independently from their aging, as per RFC 8201 section 4
1741 	 */
1742 	if (!(rt->rt6i_flags & RTF_EXPIRES)) {
1743 		if (time_after_eq(now, rt->dst.lastuse + gc_args->timeout)) {
1744 			RT6_TRACE("aging clone %p\n", rt);
1745 			rt6_remove_exception(bucket, rt6_ex);
1746 			return;
1747 		}
1748 	} else if (time_after(jiffies, rt->dst.expires)) {
1749 		RT6_TRACE("purging expired route %p\n", rt);
1750 		rt6_remove_exception(bucket, rt6_ex);
1751 		return;
1752 	}
1753 
1754 	if (rt->rt6i_flags & RTF_GATEWAY) {
1755 		struct neighbour *neigh;
1756 		__u8 neigh_flags = 0;
1757 
1758 		neigh = __ipv6_neigh_lookup_noref(rt->dst.dev, &rt->rt6i_gateway);
1759 		if (neigh)
1760 			neigh_flags = neigh->flags;
1761 
1762 		if (!(neigh_flags & NTF_ROUTER)) {
1763 			RT6_TRACE("purging route %p via non-router but gateway\n",
1764 				  rt);
1765 			rt6_remove_exception(bucket, rt6_ex);
1766 			return;
1767 		}
1768 	}
1769 
1770 	gc_args->more++;
1771 }
1772 
1773 void rt6_age_exceptions(struct fib6_info *rt,
1774 			struct fib6_gc_args *gc_args,
1775 			unsigned long now)
1776 {
1777 	struct rt6_exception_bucket *bucket;
1778 	struct rt6_exception *rt6_ex;
1779 	struct hlist_node *tmp;
1780 	int i;
1781 
1782 	if (!rcu_access_pointer(rt->rt6i_exception_bucket))
1783 		return;
1784 
1785 	rcu_read_lock_bh();
1786 	spin_lock(&rt6_exception_lock);
1787 	bucket = rcu_dereference_protected(rt->rt6i_exception_bucket,
1788 				    lockdep_is_held(&rt6_exception_lock));
1789 
1790 	if (bucket) {
1791 		for (i = 0; i < FIB6_EXCEPTION_BUCKET_SIZE; i++) {
1792 			hlist_for_each_entry_safe(rt6_ex, tmp,
1793 						  &bucket->chain, hlist) {
1794 				rt6_age_examine_exception(bucket, rt6_ex,
1795 							  gc_args, now);
1796 			}
1797 			bucket++;
1798 		}
1799 	}
1800 	spin_unlock(&rt6_exception_lock);
1801 	rcu_read_unlock_bh();
1802 }
1803 
1804 /* must be called with rcu lock held */
1805 struct fib6_info *fib6_table_lookup(struct net *net, struct fib6_table *table,
1806 				    int oif, struct flowi6 *fl6, int strict)
1807 {
1808 	struct fib6_node *fn, *saved_fn;
1809 	struct fib6_info *f6i;
1810 
1811 	fn = fib6_node_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
1812 	saved_fn = fn;
1813 
1814 	if (fl6->flowi6_flags & FLOWI_FLAG_SKIP_NH_OIF)
1815 		oif = 0;
1816 
1817 redo_rt6_select:
1818 	f6i = rt6_select(net, fn, oif, strict);
1819 	if (f6i == net->ipv6.fib6_null_entry) {
1820 		fn = fib6_backtrack(fn, &fl6->saddr);
1821 		if (fn)
1822 			goto redo_rt6_select;
1823 		else if (strict & RT6_LOOKUP_F_REACHABLE) {
1824 			/* also consider unreachable route */
1825 			strict &= ~RT6_LOOKUP_F_REACHABLE;
1826 			fn = saved_fn;
1827 			goto redo_rt6_select;
1828 		}
1829 	}
1830 
1831 	trace_fib6_table_lookup(net, f6i, table, fl6);
1832 
1833 	return f6i;
1834 }
1835 
1836 struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table,
1837 			       int oif, struct flowi6 *fl6,
1838 			       const struct sk_buff *skb, int flags)
1839 {
1840 	struct fib6_info *f6i;
1841 	struct rt6_info *rt;
1842 	int strict = 0;
1843 
1844 	strict |= flags & RT6_LOOKUP_F_IFACE;
1845 	strict |= flags & RT6_LOOKUP_F_IGNORE_LINKSTATE;
1846 	if (net->ipv6.devconf_all->forwarding == 0)
1847 		strict |= RT6_LOOKUP_F_REACHABLE;
1848 
1849 	rcu_read_lock();
1850 
1851 	f6i = fib6_table_lookup(net, table, oif, fl6, strict);
1852 	if (f6i->fib6_nsiblings)
1853 		f6i = fib6_multipath_select(net, f6i, fl6, oif, skb, strict);
1854 
1855 	if (f6i == net->ipv6.fib6_null_entry) {
1856 		rt = net->ipv6.ip6_null_entry;
1857 		rcu_read_unlock();
1858 		dst_hold(&rt->dst);
1859 		return rt;
1860 	}
1861 
1862 	/*Search through exception table */
1863 	rt = rt6_find_cached_rt(f6i, &fl6->daddr, &fl6->saddr);
1864 	if (rt) {
1865 		if (ip6_hold_safe(net, &rt, true))
1866 			dst_use_noref(&rt->dst, jiffies);
1867 
1868 		rcu_read_unlock();
1869 		return rt;
1870 	} else if (unlikely((fl6->flowi6_flags & FLOWI_FLAG_KNOWN_NH) &&
1871 			    !(f6i->fib6_flags & RTF_GATEWAY))) {
1872 		/* Create a RTF_CACHE clone which will not be
1873 		 * owned by the fib6 tree.  It is for the special case where
1874 		 * the daddr in the skb during the neighbor look-up is different
1875 		 * from the fl6->daddr used to look-up route here.
1876 		 */
1877 		struct rt6_info *uncached_rt;
1878 
1879 		uncached_rt = ip6_rt_cache_alloc(f6i, &fl6->daddr, NULL);
1880 
1881 		rcu_read_unlock();
1882 
1883 		if (uncached_rt) {
1884 			/* Uncached_rt's refcnt is taken during ip6_rt_cache_alloc()
1885 			 * No need for another dst_hold()
1886 			 */
1887 			rt6_uncached_list_add(uncached_rt);
1888 			atomic_inc(&net->ipv6.rt6_stats->fib_rt_uncache);
1889 		} else {
1890 			uncached_rt = net->ipv6.ip6_null_entry;
1891 			dst_hold(&uncached_rt->dst);
1892 		}
1893 
1894 		return uncached_rt;
1895 	} else {
1896 		/* Get a percpu copy */
1897 
1898 		struct rt6_info *pcpu_rt;
1899 
1900 		local_bh_disable();
1901 		pcpu_rt = rt6_get_pcpu_route(f6i);
1902 
1903 		if (!pcpu_rt)
1904 			pcpu_rt = rt6_make_pcpu_route(net, f6i);
1905 
1906 		local_bh_enable();
1907 		rcu_read_unlock();
1908 
1909 		return pcpu_rt;
1910 	}
1911 }
1912 EXPORT_SYMBOL_GPL(ip6_pol_route);
1913 
1914 static struct rt6_info *ip6_pol_route_input(struct net *net,
1915 					    struct fib6_table *table,
1916 					    struct flowi6 *fl6,
1917 					    const struct sk_buff *skb,
1918 					    int flags)
1919 {
1920 	return ip6_pol_route(net, table, fl6->flowi6_iif, fl6, skb, flags);
1921 }
1922 
1923 struct dst_entry *ip6_route_input_lookup(struct net *net,
1924 					 struct net_device *dev,
1925 					 struct flowi6 *fl6,
1926 					 const struct sk_buff *skb,
1927 					 int flags)
1928 {
1929 	if (rt6_need_strict(&fl6->daddr) && dev->type != ARPHRD_PIMREG)
1930 		flags |= RT6_LOOKUP_F_IFACE;
1931 
1932 	return fib6_rule_lookup(net, fl6, skb, flags, ip6_pol_route_input);
1933 }
1934 EXPORT_SYMBOL_GPL(ip6_route_input_lookup);
1935 
1936 static void ip6_multipath_l3_keys(const struct sk_buff *skb,
1937 				  struct flow_keys *keys,
1938 				  struct flow_keys *flkeys)
1939 {
1940 	const struct ipv6hdr *outer_iph = ipv6_hdr(skb);
1941 	const struct ipv6hdr *key_iph = outer_iph;
1942 	struct flow_keys *_flkeys = flkeys;
1943 	const struct ipv6hdr *inner_iph;
1944 	const struct icmp6hdr *icmph;
1945 	struct ipv6hdr _inner_iph;
1946 	struct icmp6hdr _icmph;
1947 
1948 	if (likely(outer_iph->nexthdr != IPPROTO_ICMPV6))
1949 		goto out;
1950 
1951 	icmph = skb_header_pointer(skb, skb_transport_offset(skb),
1952 				   sizeof(_icmph), &_icmph);
1953 	if (!icmph)
1954 		goto out;
1955 
1956 	if (icmph->icmp6_type != ICMPV6_DEST_UNREACH &&
1957 	    icmph->icmp6_type != ICMPV6_PKT_TOOBIG &&
1958 	    icmph->icmp6_type != ICMPV6_TIME_EXCEED &&
1959 	    icmph->icmp6_type != ICMPV6_PARAMPROB)
1960 		goto out;
1961 
1962 	inner_iph = skb_header_pointer(skb,
1963 				       skb_transport_offset(skb) + sizeof(*icmph),
1964 				       sizeof(_inner_iph), &_inner_iph);
1965 	if (!inner_iph)
1966 		goto out;
1967 
1968 	key_iph = inner_iph;
1969 	_flkeys = NULL;
1970 out:
1971 	if (_flkeys) {
1972 		keys->addrs.v6addrs.src = _flkeys->addrs.v6addrs.src;
1973 		keys->addrs.v6addrs.dst = _flkeys->addrs.v6addrs.dst;
1974 		keys->tags.flow_label = _flkeys->tags.flow_label;
1975 		keys->basic.ip_proto = _flkeys->basic.ip_proto;
1976 	} else {
1977 		keys->addrs.v6addrs.src = key_iph->saddr;
1978 		keys->addrs.v6addrs.dst = key_iph->daddr;
1979 		keys->tags.flow_label = ip6_flowinfo(key_iph);
1980 		keys->basic.ip_proto = key_iph->nexthdr;
1981 	}
1982 }
1983 
1984 /* if skb is set it will be used and fl6 can be NULL */
1985 u32 rt6_multipath_hash(const struct net *net, const struct flowi6 *fl6,
1986 		       const struct sk_buff *skb, struct flow_keys *flkeys)
1987 {
1988 	struct flow_keys hash_keys;
1989 	u32 mhash;
1990 
1991 	switch (ip6_multipath_hash_policy(net)) {
1992 	case 0:
1993 		memset(&hash_keys, 0, sizeof(hash_keys));
1994 		hash_keys.control.addr_type = FLOW_DISSECTOR_KEY_IPV6_ADDRS;
1995 		if (skb) {
1996 			ip6_multipath_l3_keys(skb, &hash_keys, flkeys);
1997 		} else {
1998 			hash_keys.addrs.v6addrs.src = fl6->saddr;
1999 			hash_keys.addrs.v6addrs.dst = fl6->daddr;
2000 			hash_keys.tags.flow_label = (__force u32)fl6->flowlabel;
2001 			hash_keys.basic.ip_proto = fl6->flowi6_proto;
2002 		}
2003 		break;
2004 	case 1:
2005 		if (skb) {
2006 			unsigned int flag = FLOW_DISSECTOR_F_STOP_AT_ENCAP;
2007 			struct flow_keys keys;
2008 
2009 			/* short-circuit if we already have L4 hash present */
2010 			if (skb->l4_hash)
2011 				return skb_get_hash_raw(skb) >> 1;
2012 
2013 			memset(&hash_keys, 0, sizeof(hash_keys));
2014 
2015                         if (!flkeys) {
2016 				skb_flow_dissect_flow_keys(skb, &keys, flag);
2017 				flkeys = &keys;
2018 			}
2019 			hash_keys.control.addr_type = FLOW_DISSECTOR_KEY_IPV6_ADDRS;
2020 			hash_keys.addrs.v6addrs.src = flkeys->addrs.v6addrs.src;
2021 			hash_keys.addrs.v6addrs.dst = flkeys->addrs.v6addrs.dst;
2022 			hash_keys.ports.src = flkeys->ports.src;
2023 			hash_keys.ports.dst = flkeys->ports.dst;
2024 			hash_keys.basic.ip_proto = flkeys->basic.ip_proto;
2025 		} else {
2026 			memset(&hash_keys, 0, sizeof(hash_keys));
2027 			hash_keys.control.addr_type = FLOW_DISSECTOR_KEY_IPV6_ADDRS;
2028 			hash_keys.addrs.v6addrs.src = fl6->saddr;
2029 			hash_keys.addrs.v6addrs.dst = fl6->daddr;
2030 			hash_keys.ports.src = fl6->fl6_sport;
2031 			hash_keys.ports.dst = fl6->fl6_dport;
2032 			hash_keys.basic.ip_proto = fl6->flowi6_proto;
2033 		}
2034 		break;
2035 	}
2036 	mhash = flow_hash_from_keys(&hash_keys);
2037 
2038 	return mhash >> 1;
2039 }
2040 
2041 void ip6_route_input(struct sk_buff *skb)
2042 {
2043 	const struct ipv6hdr *iph = ipv6_hdr(skb);
2044 	struct net *net = dev_net(skb->dev);
2045 	int flags = RT6_LOOKUP_F_HAS_SADDR;
2046 	struct ip_tunnel_info *tun_info;
2047 	struct flowi6 fl6 = {
2048 		.flowi6_iif = skb->dev->ifindex,
2049 		.daddr = iph->daddr,
2050 		.saddr = iph->saddr,
2051 		.flowlabel = ip6_flowinfo(iph),
2052 		.flowi6_mark = skb->mark,
2053 		.flowi6_proto = iph->nexthdr,
2054 	};
2055 	struct flow_keys *flkeys = NULL, _flkeys;
2056 
2057 	tun_info = skb_tunnel_info(skb);
2058 	if (tun_info && !(tun_info->mode & IP_TUNNEL_INFO_TX))
2059 		fl6.flowi6_tun_key.tun_id = tun_info->key.tun_id;
2060 
2061 	if (fib6_rules_early_flow_dissect(net, skb, &fl6, &_flkeys))
2062 		flkeys = &_flkeys;
2063 
2064 	if (unlikely(fl6.flowi6_proto == IPPROTO_ICMPV6))
2065 		fl6.mp_hash = rt6_multipath_hash(net, &fl6, skb, flkeys);
2066 	skb_dst_drop(skb);
2067 	skb_dst_set(skb,
2068 		    ip6_route_input_lookup(net, skb->dev, &fl6, skb, flags));
2069 }
2070 
2071 static struct rt6_info *ip6_pol_route_output(struct net *net,
2072 					     struct fib6_table *table,
2073 					     struct flowi6 *fl6,
2074 					     const struct sk_buff *skb,
2075 					     int flags)
2076 {
2077 	return ip6_pol_route(net, table, fl6->flowi6_oif, fl6, skb, flags);
2078 }
2079 
2080 struct dst_entry *ip6_route_output_flags(struct net *net, const struct sock *sk,
2081 					 struct flowi6 *fl6, int flags)
2082 {
2083 	bool any_src;
2084 
2085 	if (rt6_need_strict(&fl6->daddr)) {
2086 		struct dst_entry *dst;
2087 
2088 		dst = l3mdev_link_scope_lookup(net, fl6);
2089 		if (dst)
2090 			return dst;
2091 	}
2092 
2093 	fl6->flowi6_iif = LOOPBACK_IFINDEX;
2094 
2095 	any_src = ipv6_addr_any(&fl6->saddr);
2096 	if ((sk && sk->sk_bound_dev_if) || rt6_need_strict(&fl6->daddr) ||
2097 	    (fl6->flowi6_oif && any_src))
2098 		flags |= RT6_LOOKUP_F_IFACE;
2099 
2100 	if (!any_src)
2101 		flags |= RT6_LOOKUP_F_HAS_SADDR;
2102 	else if (sk)
2103 		flags |= rt6_srcprefs2flags(inet6_sk(sk)->srcprefs);
2104 
2105 	return fib6_rule_lookup(net, fl6, NULL, flags, ip6_pol_route_output);
2106 }
2107 EXPORT_SYMBOL_GPL(ip6_route_output_flags);
2108 
2109 struct dst_entry *ip6_blackhole_route(struct net *net, struct dst_entry *dst_orig)
2110 {
2111 	struct rt6_info *rt, *ort = (struct rt6_info *) dst_orig;
2112 	struct net_device *loopback_dev = net->loopback_dev;
2113 	struct dst_entry *new = NULL;
2114 
2115 	rt = dst_alloc(&ip6_dst_blackhole_ops, loopback_dev, 1,
2116 		       DST_OBSOLETE_DEAD, 0);
2117 	if (rt) {
2118 		rt6_info_init(rt);
2119 		atomic_inc(&net->ipv6.rt6_stats->fib_rt_alloc);
2120 
2121 		new = &rt->dst;
2122 		new->__use = 1;
2123 		new->input = dst_discard;
2124 		new->output = dst_discard_out;
2125 
2126 		dst_copy_metrics(new, &ort->dst);
2127 
2128 		rt->rt6i_idev = in6_dev_get(loopback_dev);
2129 		rt->rt6i_gateway = ort->rt6i_gateway;
2130 		rt->rt6i_flags = ort->rt6i_flags & ~RTF_PCPU;
2131 
2132 		memcpy(&rt->rt6i_dst, &ort->rt6i_dst, sizeof(struct rt6key));
2133 #ifdef CONFIG_IPV6_SUBTREES
2134 		memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
2135 #endif
2136 	}
2137 
2138 	dst_release(dst_orig);
2139 	return new ? new : ERR_PTR(-ENOMEM);
2140 }
2141 
2142 /*
2143  *	Destination cache support functions
2144  */
2145 
2146 static bool fib6_check(struct fib6_info *f6i, u32 cookie)
2147 {
2148 	u32 rt_cookie = 0;
2149 
2150 	if (!fib6_get_cookie_safe(f6i, &rt_cookie) || rt_cookie != cookie)
2151 		return false;
2152 
2153 	if (fib6_check_expired(f6i))
2154 		return false;
2155 
2156 	return true;
2157 }
2158 
2159 static struct dst_entry *rt6_check(struct rt6_info *rt,
2160 				   struct fib6_info *from,
2161 				   u32 cookie)
2162 {
2163 	u32 rt_cookie = 0;
2164 
2165 	if ((from && !fib6_get_cookie_safe(from, &rt_cookie)) ||
2166 	    rt_cookie != cookie)
2167 		return NULL;
2168 
2169 	if (rt6_check_expired(rt))
2170 		return NULL;
2171 
2172 	return &rt->dst;
2173 }
2174 
2175 static struct dst_entry *rt6_dst_from_check(struct rt6_info *rt,
2176 					    struct fib6_info *from,
2177 					    u32 cookie)
2178 {
2179 	if (!__rt6_check_expired(rt) &&
2180 	    rt->dst.obsolete == DST_OBSOLETE_FORCE_CHK &&
2181 	    fib6_check(from, cookie))
2182 		return &rt->dst;
2183 	else
2184 		return NULL;
2185 }
2186 
2187 static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie)
2188 {
2189 	struct dst_entry *dst_ret;
2190 	struct fib6_info *from;
2191 	struct rt6_info *rt;
2192 
2193 	rt = container_of(dst, struct rt6_info, dst);
2194 
2195 	rcu_read_lock();
2196 
2197 	/* All IPV6 dsts are created with ->obsolete set to the value
2198 	 * DST_OBSOLETE_FORCE_CHK which forces validation calls down
2199 	 * into this function always.
2200 	 */
2201 
2202 	from = rcu_dereference(rt->from);
2203 
2204 	if (from && (rt->rt6i_flags & RTF_PCPU ||
2205 	    unlikely(!list_empty(&rt->rt6i_uncached))))
2206 		dst_ret = rt6_dst_from_check(rt, from, cookie);
2207 	else
2208 		dst_ret = rt6_check(rt, from, cookie);
2209 
2210 	rcu_read_unlock();
2211 
2212 	return dst_ret;
2213 }
2214 
2215 static struct dst_entry *ip6_negative_advice(struct dst_entry *dst)
2216 {
2217 	struct rt6_info *rt = (struct rt6_info *) dst;
2218 
2219 	if (rt) {
2220 		if (rt->rt6i_flags & RTF_CACHE) {
2221 			rcu_read_lock();
2222 			if (rt6_check_expired(rt)) {
2223 				rt6_remove_exception_rt(rt);
2224 				dst = NULL;
2225 			}
2226 			rcu_read_unlock();
2227 		} else {
2228 			dst_release(dst);
2229 			dst = NULL;
2230 		}
2231 	}
2232 	return dst;
2233 }
2234 
2235 static void ip6_link_failure(struct sk_buff *skb)
2236 {
2237 	struct rt6_info *rt;
2238 
2239 	icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_ADDR_UNREACH, 0);
2240 
2241 	rt = (struct rt6_info *) skb_dst(skb);
2242 	if (rt) {
2243 		rcu_read_lock();
2244 		if (rt->rt6i_flags & RTF_CACHE) {
2245 			if (dst_hold_safe(&rt->dst))
2246 				rt6_remove_exception_rt(rt);
2247 		} else {
2248 			struct fib6_info *from;
2249 			struct fib6_node *fn;
2250 
2251 			from = rcu_dereference(rt->from);
2252 			if (from) {
2253 				fn = rcu_dereference(from->fib6_node);
2254 				if (fn && (rt->rt6i_flags & RTF_DEFAULT))
2255 					fn->fn_sernum = -1;
2256 			}
2257 		}
2258 		rcu_read_unlock();
2259 	}
2260 }
2261 
2262 static void rt6_update_expires(struct rt6_info *rt0, int timeout)
2263 {
2264 	if (!(rt0->rt6i_flags & RTF_EXPIRES)) {
2265 		struct fib6_info *from;
2266 
2267 		rcu_read_lock();
2268 		from = rcu_dereference(rt0->from);
2269 		if (from)
2270 			rt0->dst.expires = from->expires;
2271 		rcu_read_unlock();
2272 	}
2273 
2274 	dst_set_expires(&rt0->dst, timeout);
2275 	rt0->rt6i_flags |= RTF_EXPIRES;
2276 }
2277 
2278 static void rt6_do_update_pmtu(struct rt6_info *rt, u32 mtu)
2279 {
2280 	struct net *net = dev_net(rt->dst.dev);
2281 
2282 	dst_metric_set(&rt->dst, RTAX_MTU, mtu);
2283 	rt->rt6i_flags |= RTF_MODIFIED;
2284 	rt6_update_expires(rt, net->ipv6.sysctl.ip6_rt_mtu_expires);
2285 }
2286 
2287 static bool rt6_cache_allowed_for_pmtu(const struct rt6_info *rt)
2288 {
2289 	bool from_set;
2290 
2291 	rcu_read_lock();
2292 	from_set = !!rcu_dereference(rt->from);
2293 	rcu_read_unlock();
2294 
2295 	return !(rt->rt6i_flags & RTF_CACHE) &&
2296 		(rt->rt6i_flags & RTF_PCPU || from_set);
2297 }
2298 
2299 static void __ip6_rt_update_pmtu(struct dst_entry *dst, const struct sock *sk,
2300 				 const struct ipv6hdr *iph, u32 mtu)
2301 {
2302 	const struct in6_addr *daddr, *saddr;
2303 	struct rt6_info *rt6 = (struct rt6_info *)dst;
2304 
2305 	if (rt6->rt6i_flags & RTF_LOCAL)
2306 		return;
2307 
2308 	if (dst_metric_locked(dst, RTAX_MTU))
2309 		return;
2310 
2311 	if (iph) {
2312 		daddr = &iph->daddr;
2313 		saddr = &iph->saddr;
2314 	} else if (sk) {
2315 		daddr = &sk->sk_v6_daddr;
2316 		saddr = &inet6_sk(sk)->saddr;
2317 	} else {
2318 		daddr = NULL;
2319 		saddr = NULL;
2320 	}
2321 	dst_confirm_neigh(dst, daddr);
2322 	mtu = max_t(u32, mtu, IPV6_MIN_MTU);
2323 	if (mtu >= dst_mtu(dst))
2324 		return;
2325 
2326 	if (!rt6_cache_allowed_for_pmtu(rt6)) {
2327 		rt6_do_update_pmtu(rt6, mtu);
2328 		/* update rt6_ex->stamp for cache */
2329 		if (rt6->rt6i_flags & RTF_CACHE)
2330 			rt6_update_exception_stamp_rt(rt6);
2331 	} else if (daddr) {
2332 		struct fib6_info *from;
2333 		struct rt6_info *nrt6;
2334 
2335 		rcu_read_lock();
2336 		from = rcu_dereference(rt6->from);
2337 		nrt6 = ip6_rt_cache_alloc(from, daddr, saddr);
2338 		if (nrt6) {
2339 			rt6_do_update_pmtu(nrt6, mtu);
2340 			if (rt6_insert_exception(nrt6, from))
2341 				dst_release_immediate(&nrt6->dst);
2342 		}
2343 		rcu_read_unlock();
2344 	}
2345 }
2346 
2347 static void ip6_rt_update_pmtu(struct dst_entry *dst, struct sock *sk,
2348 			       struct sk_buff *skb, u32 mtu)
2349 {
2350 	__ip6_rt_update_pmtu(dst, sk, skb ? ipv6_hdr(skb) : NULL, mtu);
2351 }
2352 
2353 void ip6_update_pmtu(struct sk_buff *skb, struct net *net, __be32 mtu,
2354 		     int oif, u32 mark, kuid_t uid)
2355 {
2356 	const struct ipv6hdr *iph = (struct ipv6hdr *) skb->data;
2357 	struct dst_entry *dst;
2358 	struct flowi6 fl6;
2359 
2360 	memset(&fl6, 0, sizeof(fl6));
2361 	fl6.flowi6_oif = oif;
2362 	fl6.flowi6_mark = mark ? mark : IP6_REPLY_MARK(net, skb->mark);
2363 	fl6.daddr = iph->daddr;
2364 	fl6.saddr = iph->saddr;
2365 	fl6.flowlabel = ip6_flowinfo(iph);
2366 	fl6.flowi6_uid = uid;
2367 
2368 	dst = ip6_route_output(net, NULL, &fl6);
2369 	if (!dst->error)
2370 		__ip6_rt_update_pmtu(dst, NULL, iph, ntohl(mtu));
2371 	dst_release(dst);
2372 }
2373 EXPORT_SYMBOL_GPL(ip6_update_pmtu);
2374 
2375 void ip6_sk_update_pmtu(struct sk_buff *skb, struct sock *sk, __be32 mtu)
2376 {
2377 	struct dst_entry *dst;
2378 
2379 	ip6_update_pmtu(skb, sock_net(sk), mtu,
2380 			sk->sk_bound_dev_if, sk->sk_mark, sk->sk_uid);
2381 
2382 	dst = __sk_dst_get(sk);
2383 	if (!dst || !dst->obsolete ||
2384 	    dst->ops->check(dst, inet6_sk(sk)->dst_cookie))
2385 		return;
2386 
2387 	bh_lock_sock(sk);
2388 	if (!sock_owned_by_user(sk) && !ipv6_addr_v4mapped(&sk->sk_v6_daddr))
2389 		ip6_datagram_dst_update(sk, false);
2390 	bh_unlock_sock(sk);
2391 }
2392 EXPORT_SYMBOL_GPL(ip6_sk_update_pmtu);
2393 
2394 void ip6_sk_dst_store_flow(struct sock *sk, struct dst_entry *dst,
2395 			   const struct flowi6 *fl6)
2396 {
2397 #ifdef CONFIG_IPV6_SUBTREES
2398 	struct ipv6_pinfo *np = inet6_sk(sk);
2399 #endif
2400 
2401 	ip6_dst_store(sk, dst,
2402 		      ipv6_addr_equal(&fl6->daddr, &sk->sk_v6_daddr) ?
2403 		      &sk->sk_v6_daddr : NULL,
2404 #ifdef CONFIG_IPV6_SUBTREES
2405 		      ipv6_addr_equal(&fl6->saddr, &np->saddr) ?
2406 		      &np->saddr :
2407 #endif
2408 		      NULL);
2409 }
2410 
2411 /* Handle redirects */
2412 struct ip6rd_flowi {
2413 	struct flowi6 fl6;
2414 	struct in6_addr gateway;
2415 };
2416 
2417 static struct rt6_info *__ip6_route_redirect(struct net *net,
2418 					     struct fib6_table *table,
2419 					     struct flowi6 *fl6,
2420 					     const struct sk_buff *skb,
2421 					     int flags)
2422 {
2423 	struct ip6rd_flowi *rdfl = (struct ip6rd_flowi *)fl6;
2424 	struct rt6_info *ret = NULL, *rt_cache;
2425 	struct fib6_info *rt;
2426 	struct fib6_node *fn;
2427 
2428 	/* Get the "current" route for this destination and
2429 	 * check if the redirect has come from appropriate router.
2430 	 *
2431 	 * RFC 4861 specifies that redirects should only be
2432 	 * accepted if they come from the nexthop to the target.
2433 	 * Due to the way the routes are chosen, this notion
2434 	 * is a bit fuzzy and one might need to check all possible
2435 	 * routes.
2436 	 */
2437 
2438 	rcu_read_lock();
2439 	fn = fib6_node_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
2440 restart:
2441 	for_each_fib6_node_rt_rcu(fn) {
2442 		if (rt->fib6_nh.nh_flags & RTNH_F_DEAD)
2443 			continue;
2444 		if (fib6_check_expired(rt))
2445 			continue;
2446 		if (rt->fib6_flags & RTF_REJECT)
2447 			break;
2448 		if (!(rt->fib6_flags & RTF_GATEWAY))
2449 			continue;
2450 		if (fl6->flowi6_oif != rt->fib6_nh.nh_dev->ifindex)
2451 			continue;
2452 		/* rt_cache's gateway might be different from its 'parent'
2453 		 * in the case of an ip redirect.
2454 		 * So we keep searching in the exception table if the gateway
2455 		 * is different.
2456 		 */
2457 		if (!ipv6_addr_equal(&rdfl->gateway, &rt->fib6_nh.nh_gw)) {
2458 			rt_cache = rt6_find_cached_rt(rt,
2459 						      &fl6->daddr,
2460 						      &fl6->saddr);
2461 			if (rt_cache &&
2462 			    ipv6_addr_equal(&rdfl->gateway,
2463 					    &rt_cache->rt6i_gateway)) {
2464 				ret = rt_cache;
2465 				break;
2466 			}
2467 			continue;
2468 		}
2469 		break;
2470 	}
2471 
2472 	if (!rt)
2473 		rt = net->ipv6.fib6_null_entry;
2474 	else if (rt->fib6_flags & RTF_REJECT) {
2475 		ret = net->ipv6.ip6_null_entry;
2476 		goto out;
2477 	}
2478 
2479 	if (rt == net->ipv6.fib6_null_entry) {
2480 		fn = fib6_backtrack(fn, &fl6->saddr);
2481 		if (fn)
2482 			goto restart;
2483 	}
2484 
2485 out:
2486 	if (ret)
2487 		dst_hold(&ret->dst);
2488 	else
2489 		ret = ip6_create_rt_rcu(rt);
2490 
2491 	rcu_read_unlock();
2492 
2493 	trace_fib6_table_lookup(net, rt, table, fl6);
2494 	return ret;
2495 };
2496 
2497 static struct dst_entry *ip6_route_redirect(struct net *net,
2498 					    const struct flowi6 *fl6,
2499 					    const struct sk_buff *skb,
2500 					    const struct in6_addr *gateway)
2501 {
2502 	int flags = RT6_LOOKUP_F_HAS_SADDR;
2503 	struct ip6rd_flowi rdfl;
2504 
2505 	rdfl.fl6 = *fl6;
2506 	rdfl.gateway = *gateway;
2507 
2508 	return fib6_rule_lookup(net, &rdfl.fl6, skb,
2509 				flags, __ip6_route_redirect);
2510 }
2511 
2512 void ip6_redirect(struct sk_buff *skb, struct net *net, int oif, u32 mark,
2513 		  kuid_t uid)
2514 {
2515 	const struct ipv6hdr *iph = (struct ipv6hdr *) skb->data;
2516 	struct dst_entry *dst;
2517 	struct flowi6 fl6;
2518 
2519 	memset(&fl6, 0, sizeof(fl6));
2520 	fl6.flowi6_iif = LOOPBACK_IFINDEX;
2521 	fl6.flowi6_oif = oif;
2522 	fl6.flowi6_mark = mark;
2523 	fl6.daddr = iph->daddr;
2524 	fl6.saddr = iph->saddr;
2525 	fl6.flowlabel = ip6_flowinfo(iph);
2526 	fl6.flowi6_uid = uid;
2527 
2528 	dst = ip6_route_redirect(net, &fl6, skb, &ipv6_hdr(skb)->saddr);
2529 	rt6_do_redirect(dst, NULL, skb);
2530 	dst_release(dst);
2531 }
2532 EXPORT_SYMBOL_GPL(ip6_redirect);
2533 
2534 void ip6_redirect_no_header(struct sk_buff *skb, struct net *net, int oif,
2535 			    u32 mark)
2536 {
2537 	const struct ipv6hdr *iph = ipv6_hdr(skb);
2538 	const struct rd_msg *msg = (struct rd_msg *)icmp6_hdr(skb);
2539 	struct dst_entry *dst;
2540 	struct flowi6 fl6;
2541 
2542 	memset(&fl6, 0, sizeof(fl6));
2543 	fl6.flowi6_iif = LOOPBACK_IFINDEX;
2544 	fl6.flowi6_oif = oif;
2545 	fl6.flowi6_mark = mark;
2546 	fl6.daddr = msg->dest;
2547 	fl6.saddr = iph->daddr;
2548 	fl6.flowi6_uid = sock_net_uid(net, NULL);
2549 
2550 	dst = ip6_route_redirect(net, &fl6, skb, &iph->saddr);
2551 	rt6_do_redirect(dst, NULL, skb);
2552 	dst_release(dst);
2553 }
2554 
2555 void ip6_sk_redirect(struct sk_buff *skb, struct sock *sk)
2556 {
2557 	ip6_redirect(skb, sock_net(sk), sk->sk_bound_dev_if, sk->sk_mark,
2558 		     sk->sk_uid);
2559 }
2560 EXPORT_SYMBOL_GPL(ip6_sk_redirect);
2561 
2562 static unsigned int ip6_default_advmss(const struct dst_entry *dst)
2563 {
2564 	struct net_device *dev = dst->dev;
2565 	unsigned int mtu = dst_mtu(dst);
2566 	struct net *net = dev_net(dev);
2567 
2568 	mtu -= sizeof(struct ipv6hdr) + sizeof(struct tcphdr);
2569 
2570 	if (mtu < net->ipv6.sysctl.ip6_rt_min_advmss)
2571 		mtu = net->ipv6.sysctl.ip6_rt_min_advmss;
2572 
2573 	/*
2574 	 * Maximal non-jumbo IPv6 payload is IPV6_MAXPLEN and
2575 	 * corresponding MSS is IPV6_MAXPLEN - tcp_header_size.
2576 	 * IPV6_MAXPLEN is also valid and means: "any MSS,
2577 	 * rely only on pmtu discovery"
2578 	 */
2579 	if (mtu > IPV6_MAXPLEN - sizeof(struct tcphdr))
2580 		mtu = IPV6_MAXPLEN;
2581 	return mtu;
2582 }
2583 
2584 static unsigned int ip6_mtu(const struct dst_entry *dst)
2585 {
2586 	struct inet6_dev *idev;
2587 	unsigned int mtu;
2588 
2589 	mtu = dst_metric_raw(dst, RTAX_MTU);
2590 	if (mtu)
2591 		goto out;
2592 
2593 	mtu = IPV6_MIN_MTU;
2594 
2595 	rcu_read_lock();
2596 	idev = __in6_dev_get(dst->dev);
2597 	if (idev)
2598 		mtu = idev->cnf.mtu6;
2599 	rcu_read_unlock();
2600 
2601 out:
2602 	mtu = min_t(unsigned int, mtu, IP6_MAX_MTU);
2603 
2604 	return mtu - lwtunnel_headroom(dst->lwtstate, mtu);
2605 }
2606 
2607 struct dst_entry *icmp6_dst_alloc(struct net_device *dev,
2608 				  struct flowi6 *fl6)
2609 {
2610 	struct dst_entry *dst;
2611 	struct rt6_info *rt;
2612 	struct inet6_dev *idev = in6_dev_get(dev);
2613 	struct net *net = dev_net(dev);
2614 
2615 	if (unlikely(!idev))
2616 		return ERR_PTR(-ENODEV);
2617 
2618 	rt = ip6_dst_alloc(net, dev, 0);
2619 	if (unlikely(!rt)) {
2620 		in6_dev_put(idev);
2621 		dst = ERR_PTR(-ENOMEM);
2622 		goto out;
2623 	}
2624 
2625 	rt->dst.flags |= DST_HOST;
2626 	rt->dst.input = ip6_input;
2627 	rt->dst.output  = ip6_output;
2628 	rt->rt6i_gateway  = fl6->daddr;
2629 	rt->rt6i_dst.addr = fl6->daddr;
2630 	rt->rt6i_dst.plen = 128;
2631 	rt->rt6i_idev     = idev;
2632 	dst_metric_set(&rt->dst, RTAX_HOPLIMIT, 0);
2633 
2634 	/* Add this dst into uncached_list so that rt6_disable_ip() can
2635 	 * do proper release of the net_device
2636 	 */
2637 	rt6_uncached_list_add(rt);
2638 	atomic_inc(&net->ipv6.rt6_stats->fib_rt_uncache);
2639 
2640 	dst = xfrm_lookup(net, &rt->dst, flowi6_to_flowi(fl6), NULL, 0);
2641 
2642 out:
2643 	return dst;
2644 }
2645 
2646 static int ip6_dst_gc(struct dst_ops *ops)
2647 {
2648 	struct net *net = container_of(ops, struct net, ipv6.ip6_dst_ops);
2649 	int rt_min_interval = net->ipv6.sysctl.ip6_rt_gc_min_interval;
2650 	int rt_max_size = net->ipv6.sysctl.ip6_rt_max_size;
2651 	int rt_elasticity = net->ipv6.sysctl.ip6_rt_gc_elasticity;
2652 	int rt_gc_timeout = net->ipv6.sysctl.ip6_rt_gc_timeout;
2653 	unsigned long rt_last_gc = net->ipv6.ip6_rt_last_gc;
2654 	int entries;
2655 
2656 	entries = dst_entries_get_fast(ops);
2657 	if (time_after(rt_last_gc + rt_min_interval, jiffies) &&
2658 	    entries <= rt_max_size)
2659 		goto out;
2660 
2661 	net->ipv6.ip6_rt_gc_expire++;
2662 	fib6_run_gc(net->ipv6.ip6_rt_gc_expire, net, true);
2663 	entries = dst_entries_get_slow(ops);
2664 	if (entries < ops->gc_thresh)
2665 		net->ipv6.ip6_rt_gc_expire = rt_gc_timeout>>1;
2666 out:
2667 	net->ipv6.ip6_rt_gc_expire -= net->ipv6.ip6_rt_gc_expire>>rt_elasticity;
2668 	return entries > rt_max_size;
2669 }
2670 
2671 static int ip6_convert_metrics(struct net *net, struct fib6_info *rt,
2672 			       struct fib6_config *cfg)
2673 {
2674 	struct dst_metrics *p;
2675 
2676 	if (!cfg->fc_mx)
2677 		return 0;
2678 
2679 	p = kzalloc(sizeof(*rt->fib6_metrics), GFP_KERNEL);
2680 	if (unlikely(!p))
2681 		return -ENOMEM;
2682 
2683 	refcount_set(&p->refcnt, 1);
2684 	rt->fib6_metrics = p;
2685 
2686 	return ip_metrics_convert(net, cfg->fc_mx, cfg->fc_mx_len, p->metrics);
2687 }
2688 
2689 static struct rt6_info *ip6_nh_lookup_table(struct net *net,
2690 					    struct fib6_config *cfg,
2691 					    const struct in6_addr *gw_addr,
2692 					    u32 tbid, int flags)
2693 {
2694 	struct flowi6 fl6 = {
2695 		.flowi6_oif = cfg->fc_ifindex,
2696 		.daddr = *gw_addr,
2697 		.saddr = cfg->fc_prefsrc,
2698 	};
2699 	struct fib6_table *table;
2700 	struct rt6_info *rt;
2701 
2702 	table = fib6_get_table(net, tbid);
2703 	if (!table)
2704 		return NULL;
2705 
2706 	if (!ipv6_addr_any(&cfg->fc_prefsrc))
2707 		flags |= RT6_LOOKUP_F_HAS_SADDR;
2708 
2709 	flags |= RT6_LOOKUP_F_IGNORE_LINKSTATE;
2710 	rt = ip6_pol_route(net, table, cfg->fc_ifindex, &fl6, NULL, flags);
2711 
2712 	/* if table lookup failed, fall back to full lookup */
2713 	if (rt == net->ipv6.ip6_null_entry) {
2714 		ip6_rt_put(rt);
2715 		rt = NULL;
2716 	}
2717 
2718 	return rt;
2719 }
2720 
2721 static int ip6_route_check_nh_onlink(struct net *net,
2722 				     struct fib6_config *cfg,
2723 				     const struct net_device *dev,
2724 				     struct netlink_ext_ack *extack)
2725 {
2726 	u32 tbid = l3mdev_fib_table(dev) ? : RT_TABLE_MAIN;
2727 	const struct in6_addr *gw_addr = &cfg->fc_gateway;
2728 	u32 flags = RTF_LOCAL | RTF_ANYCAST | RTF_REJECT;
2729 	struct rt6_info *grt;
2730 	int err;
2731 
2732 	err = 0;
2733 	grt = ip6_nh_lookup_table(net, cfg, gw_addr, tbid, 0);
2734 	if (grt) {
2735 		if (!grt->dst.error &&
2736 		    (grt->rt6i_flags & flags || dev != grt->dst.dev)) {
2737 			NL_SET_ERR_MSG(extack,
2738 				       "Nexthop has invalid gateway or device mismatch");
2739 			err = -EINVAL;
2740 		}
2741 
2742 		ip6_rt_put(grt);
2743 	}
2744 
2745 	return err;
2746 }
2747 
2748 static int ip6_route_check_nh(struct net *net,
2749 			      struct fib6_config *cfg,
2750 			      struct net_device **_dev,
2751 			      struct inet6_dev **idev)
2752 {
2753 	const struct in6_addr *gw_addr = &cfg->fc_gateway;
2754 	struct net_device *dev = _dev ? *_dev : NULL;
2755 	struct rt6_info *grt = NULL;
2756 	int err = -EHOSTUNREACH;
2757 
2758 	if (cfg->fc_table) {
2759 		int flags = RT6_LOOKUP_F_IFACE;
2760 
2761 		grt = ip6_nh_lookup_table(net, cfg, gw_addr,
2762 					  cfg->fc_table, flags);
2763 		if (grt) {
2764 			if (grt->rt6i_flags & RTF_GATEWAY ||
2765 			    (dev && dev != grt->dst.dev)) {
2766 				ip6_rt_put(grt);
2767 				grt = NULL;
2768 			}
2769 		}
2770 	}
2771 
2772 	if (!grt)
2773 		grt = rt6_lookup(net, gw_addr, NULL, cfg->fc_ifindex, NULL, 1);
2774 
2775 	if (!grt)
2776 		goto out;
2777 
2778 	if (dev) {
2779 		if (dev != grt->dst.dev) {
2780 			ip6_rt_put(grt);
2781 			goto out;
2782 		}
2783 	} else {
2784 		*_dev = dev = grt->dst.dev;
2785 		*idev = grt->rt6i_idev;
2786 		dev_hold(dev);
2787 		in6_dev_hold(grt->rt6i_idev);
2788 	}
2789 
2790 	if (!(grt->rt6i_flags & RTF_GATEWAY))
2791 		err = 0;
2792 
2793 	ip6_rt_put(grt);
2794 
2795 out:
2796 	return err;
2797 }
2798 
2799 static int ip6_validate_gw(struct net *net, struct fib6_config *cfg,
2800 			   struct net_device **_dev, struct inet6_dev **idev,
2801 			   struct netlink_ext_ack *extack)
2802 {
2803 	const struct in6_addr *gw_addr = &cfg->fc_gateway;
2804 	int gwa_type = ipv6_addr_type(gw_addr);
2805 	bool skip_dev = gwa_type & IPV6_ADDR_LINKLOCAL ? false : true;
2806 	const struct net_device *dev = *_dev;
2807 	bool need_addr_check = !dev;
2808 	int err = -EINVAL;
2809 
2810 	/* if gw_addr is local we will fail to detect this in case
2811 	 * address is still TENTATIVE (DAD in progress). rt6_lookup()
2812 	 * will return already-added prefix route via interface that
2813 	 * prefix route was assigned to, which might be non-loopback.
2814 	 */
2815 	if (dev &&
2816 	    ipv6_chk_addr_and_flags(net, gw_addr, dev, skip_dev, 0, 0)) {
2817 		NL_SET_ERR_MSG(extack, "Gateway can not be a local address");
2818 		goto out;
2819 	}
2820 
2821 	if (gwa_type != (IPV6_ADDR_LINKLOCAL | IPV6_ADDR_UNICAST)) {
2822 		/* IPv6 strictly inhibits using not link-local
2823 		 * addresses as nexthop address.
2824 		 * Otherwise, router will not able to send redirects.
2825 		 * It is very good, but in some (rare!) circumstances
2826 		 * (SIT, PtP, NBMA NOARP links) it is handy to allow
2827 		 * some exceptions. --ANK
2828 		 * We allow IPv4-mapped nexthops to support RFC4798-type
2829 		 * addressing
2830 		 */
2831 		if (!(gwa_type & (IPV6_ADDR_UNICAST | IPV6_ADDR_MAPPED))) {
2832 			NL_SET_ERR_MSG(extack, "Invalid gateway address");
2833 			goto out;
2834 		}
2835 
2836 		if (cfg->fc_flags & RTNH_F_ONLINK)
2837 			err = ip6_route_check_nh_onlink(net, cfg, dev, extack);
2838 		else
2839 			err = ip6_route_check_nh(net, cfg, _dev, idev);
2840 
2841 		if (err)
2842 			goto out;
2843 	}
2844 
2845 	/* reload in case device was changed */
2846 	dev = *_dev;
2847 
2848 	err = -EINVAL;
2849 	if (!dev) {
2850 		NL_SET_ERR_MSG(extack, "Egress device not specified");
2851 		goto out;
2852 	} else if (dev->flags & IFF_LOOPBACK) {
2853 		NL_SET_ERR_MSG(extack,
2854 			       "Egress device can not be loopback device for this route");
2855 		goto out;
2856 	}
2857 
2858 	/* if we did not check gw_addr above, do so now that the
2859 	 * egress device has been resolved.
2860 	 */
2861 	if (need_addr_check &&
2862 	    ipv6_chk_addr_and_flags(net, gw_addr, dev, skip_dev, 0, 0)) {
2863 		NL_SET_ERR_MSG(extack, "Gateway can not be a local address");
2864 		goto out;
2865 	}
2866 
2867 	err = 0;
2868 out:
2869 	return err;
2870 }
2871 
2872 static struct fib6_info *ip6_route_info_create(struct fib6_config *cfg,
2873 					      gfp_t gfp_flags,
2874 					      struct netlink_ext_ack *extack)
2875 {
2876 	struct net *net = cfg->fc_nlinfo.nl_net;
2877 	struct fib6_info *rt = NULL;
2878 	struct net_device *dev = NULL;
2879 	struct inet6_dev *idev = NULL;
2880 	struct fib6_table *table;
2881 	int addr_type;
2882 	int err = -EINVAL;
2883 
2884 	/* RTF_PCPU is an internal flag; can not be set by userspace */
2885 	if (cfg->fc_flags & RTF_PCPU) {
2886 		NL_SET_ERR_MSG(extack, "Userspace can not set RTF_PCPU");
2887 		goto out;
2888 	}
2889 
2890 	/* RTF_CACHE is an internal flag; can not be set by userspace */
2891 	if (cfg->fc_flags & RTF_CACHE) {
2892 		NL_SET_ERR_MSG(extack, "Userspace can not set RTF_CACHE");
2893 		goto out;
2894 	}
2895 
2896 	if (cfg->fc_type > RTN_MAX) {
2897 		NL_SET_ERR_MSG(extack, "Invalid route type");
2898 		goto out;
2899 	}
2900 
2901 	if (cfg->fc_dst_len > 128) {
2902 		NL_SET_ERR_MSG(extack, "Invalid prefix length");
2903 		goto out;
2904 	}
2905 	if (cfg->fc_src_len > 128) {
2906 		NL_SET_ERR_MSG(extack, "Invalid source address length");
2907 		goto out;
2908 	}
2909 #ifndef CONFIG_IPV6_SUBTREES
2910 	if (cfg->fc_src_len) {
2911 		NL_SET_ERR_MSG(extack,
2912 			       "Specifying source address requires IPV6_SUBTREES to be enabled");
2913 		goto out;
2914 	}
2915 #endif
2916 	if (cfg->fc_ifindex) {
2917 		err = -ENODEV;
2918 		dev = dev_get_by_index(net, cfg->fc_ifindex);
2919 		if (!dev)
2920 			goto out;
2921 		idev = in6_dev_get(dev);
2922 		if (!idev)
2923 			goto out;
2924 	}
2925 
2926 	if (cfg->fc_metric == 0)
2927 		cfg->fc_metric = IP6_RT_PRIO_USER;
2928 
2929 	if (cfg->fc_flags & RTNH_F_ONLINK) {
2930 		if (!dev) {
2931 			NL_SET_ERR_MSG(extack,
2932 				       "Nexthop device required for onlink");
2933 			err = -ENODEV;
2934 			goto out;
2935 		}
2936 
2937 		if (!(dev->flags & IFF_UP)) {
2938 			NL_SET_ERR_MSG(extack, "Nexthop device is not up");
2939 			err = -ENETDOWN;
2940 			goto out;
2941 		}
2942 	}
2943 
2944 	err = -ENOBUFS;
2945 	if (cfg->fc_nlinfo.nlh &&
2946 	    !(cfg->fc_nlinfo.nlh->nlmsg_flags & NLM_F_CREATE)) {
2947 		table = fib6_get_table(net, cfg->fc_table);
2948 		if (!table) {
2949 			pr_warn("NLM_F_CREATE should be specified when creating new route\n");
2950 			table = fib6_new_table(net, cfg->fc_table);
2951 		}
2952 	} else {
2953 		table = fib6_new_table(net, cfg->fc_table);
2954 	}
2955 
2956 	if (!table)
2957 		goto out;
2958 
2959 	err = -ENOMEM;
2960 	rt = fib6_info_alloc(gfp_flags);
2961 	if (!rt)
2962 		goto out;
2963 
2964 	if (cfg->fc_flags & RTF_ADDRCONF)
2965 		rt->dst_nocount = true;
2966 
2967 	err = ip6_convert_metrics(net, rt, cfg);
2968 	if (err < 0)
2969 		goto out;
2970 
2971 	if (cfg->fc_flags & RTF_EXPIRES)
2972 		fib6_set_expires(rt, jiffies +
2973 				clock_t_to_jiffies(cfg->fc_expires));
2974 	else
2975 		fib6_clean_expires(rt);
2976 
2977 	if (cfg->fc_protocol == RTPROT_UNSPEC)
2978 		cfg->fc_protocol = RTPROT_BOOT;
2979 	rt->fib6_protocol = cfg->fc_protocol;
2980 
2981 	addr_type = ipv6_addr_type(&cfg->fc_dst);
2982 
2983 	if (cfg->fc_encap) {
2984 		struct lwtunnel_state *lwtstate;
2985 
2986 		err = lwtunnel_build_state(cfg->fc_encap_type,
2987 					   cfg->fc_encap, AF_INET6, cfg,
2988 					   &lwtstate, extack);
2989 		if (err)
2990 			goto out;
2991 		rt->fib6_nh.nh_lwtstate = lwtstate_get(lwtstate);
2992 	}
2993 
2994 	ipv6_addr_prefix(&rt->fib6_dst.addr, &cfg->fc_dst, cfg->fc_dst_len);
2995 	rt->fib6_dst.plen = cfg->fc_dst_len;
2996 	if (rt->fib6_dst.plen == 128)
2997 		rt->dst_host = true;
2998 
2999 #ifdef CONFIG_IPV6_SUBTREES
3000 	ipv6_addr_prefix(&rt->fib6_src.addr, &cfg->fc_src, cfg->fc_src_len);
3001 	rt->fib6_src.plen = cfg->fc_src_len;
3002 #endif
3003 
3004 	rt->fib6_metric = cfg->fc_metric;
3005 	rt->fib6_nh.nh_weight = 1;
3006 
3007 	rt->fib6_type = cfg->fc_type;
3008 
3009 	/* We cannot add true routes via loopback here,
3010 	   they would result in kernel looping; promote them to reject routes
3011 	 */
3012 	if ((cfg->fc_flags & RTF_REJECT) ||
3013 	    (dev && (dev->flags & IFF_LOOPBACK) &&
3014 	     !(addr_type & IPV6_ADDR_LOOPBACK) &&
3015 	     !(cfg->fc_flags & RTF_LOCAL))) {
3016 		/* hold loopback dev/idev if we haven't done so. */
3017 		if (dev != net->loopback_dev) {
3018 			if (dev) {
3019 				dev_put(dev);
3020 				in6_dev_put(idev);
3021 			}
3022 			dev = net->loopback_dev;
3023 			dev_hold(dev);
3024 			idev = in6_dev_get(dev);
3025 			if (!idev) {
3026 				err = -ENODEV;
3027 				goto out;
3028 			}
3029 		}
3030 		rt->fib6_flags = RTF_REJECT|RTF_NONEXTHOP;
3031 		goto install_route;
3032 	}
3033 
3034 	if (cfg->fc_flags & RTF_GATEWAY) {
3035 		err = ip6_validate_gw(net, cfg, &dev, &idev, extack);
3036 		if (err)
3037 			goto out;
3038 
3039 		rt->fib6_nh.nh_gw = cfg->fc_gateway;
3040 	}
3041 
3042 	err = -ENODEV;
3043 	if (!dev)
3044 		goto out;
3045 
3046 	if (idev->cnf.disable_ipv6) {
3047 		NL_SET_ERR_MSG(extack, "IPv6 is disabled on nexthop device");
3048 		err = -EACCES;
3049 		goto out;
3050 	}
3051 
3052 	if (!(dev->flags & IFF_UP)) {
3053 		NL_SET_ERR_MSG(extack, "Nexthop device is not up");
3054 		err = -ENETDOWN;
3055 		goto out;
3056 	}
3057 
3058 	if (!ipv6_addr_any(&cfg->fc_prefsrc)) {
3059 		if (!ipv6_chk_addr(net, &cfg->fc_prefsrc, dev, 0)) {
3060 			NL_SET_ERR_MSG(extack, "Invalid source address");
3061 			err = -EINVAL;
3062 			goto out;
3063 		}
3064 		rt->fib6_prefsrc.addr = cfg->fc_prefsrc;
3065 		rt->fib6_prefsrc.plen = 128;
3066 	} else
3067 		rt->fib6_prefsrc.plen = 0;
3068 
3069 	rt->fib6_flags = cfg->fc_flags;
3070 
3071 install_route:
3072 	if (!(rt->fib6_flags & (RTF_LOCAL | RTF_ANYCAST)) &&
3073 	    !netif_carrier_ok(dev))
3074 		rt->fib6_nh.nh_flags |= RTNH_F_LINKDOWN;
3075 	rt->fib6_nh.nh_flags |= (cfg->fc_flags & RTNH_F_ONLINK);
3076 	rt->fib6_nh.nh_dev = dev;
3077 	rt->fib6_table = table;
3078 
3079 	cfg->fc_nlinfo.nl_net = dev_net(dev);
3080 
3081 	if (idev)
3082 		in6_dev_put(idev);
3083 
3084 	return rt;
3085 out:
3086 	if (dev)
3087 		dev_put(dev);
3088 	if (idev)
3089 		in6_dev_put(idev);
3090 
3091 	fib6_info_release(rt);
3092 	return ERR_PTR(err);
3093 }
3094 
3095 int ip6_route_add(struct fib6_config *cfg, gfp_t gfp_flags,
3096 		  struct netlink_ext_ack *extack)
3097 {
3098 	struct fib6_info *rt;
3099 	int err;
3100 
3101 	rt = ip6_route_info_create(cfg, gfp_flags, extack);
3102 	if (IS_ERR(rt))
3103 		return PTR_ERR(rt);
3104 
3105 	err = __ip6_ins_rt(rt, &cfg->fc_nlinfo, extack);
3106 	fib6_info_release(rt);
3107 
3108 	return err;
3109 }
3110 
3111 static int __ip6_del_rt(struct fib6_info *rt, struct nl_info *info)
3112 {
3113 	struct net *net = info->nl_net;
3114 	struct fib6_table *table;
3115 	int err;
3116 
3117 	if (rt == net->ipv6.fib6_null_entry) {
3118 		err = -ENOENT;
3119 		goto out;
3120 	}
3121 
3122 	table = rt->fib6_table;
3123 	spin_lock_bh(&table->tb6_lock);
3124 	err = fib6_del(rt, info);
3125 	spin_unlock_bh(&table->tb6_lock);
3126 
3127 out:
3128 	fib6_info_release(rt);
3129 	return err;
3130 }
3131 
3132 int ip6_del_rt(struct net *net, struct fib6_info *rt)
3133 {
3134 	struct nl_info info = { .nl_net = net };
3135 
3136 	return __ip6_del_rt(rt, &info);
3137 }
3138 
3139 static int __ip6_del_rt_siblings(struct fib6_info *rt, struct fib6_config *cfg)
3140 {
3141 	struct nl_info *info = &cfg->fc_nlinfo;
3142 	struct net *net = info->nl_net;
3143 	struct sk_buff *skb = NULL;
3144 	struct fib6_table *table;
3145 	int err = -ENOENT;
3146 
3147 	if (rt == net->ipv6.fib6_null_entry)
3148 		goto out_put;
3149 	table = rt->fib6_table;
3150 	spin_lock_bh(&table->tb6_lock);
3151 
3152 	if (rt->fib6_nsiblings && cfg->fc_delete_all_nh) {
3153 		struct fib6_info *sibling, *next_sibling;
3154 
3155 		/* prefer to send a single notification with all hops */
3156 		skb = nlmsg_new(rt6_nlmsg_size(rt), gfp_any());
3157 		if (skb) {
3158 			u32 seq = info->nlh ? info->nlh->nlmsg_seq : 0;
3159 
3160 			if (rt6_fill_node(net, skb, rt, NULL,
3161 					  NULL, NULL, 0, RTM_DELROUTE,
3162 					  info->portid, seq, 0) < 0) {
3163 				kfree_skb(skb);
3164 				skb = NULL;
3165 			} else
3166 				info->skip_notify = 1;
3167 		}
3168 
3169 		list_for_each_entry_safe(sibling, next_sibling,
3170 					 &rt->fib6_siblings,
3171 					 fib6_siblings) {
3172 			err = fib6_del(sibling, info);
3173 			if (err)
3174 				goto out_unlock;
3175 		}
3176 	}
3177 
3178 	err = fib6_del(rt, info);
3179 out_unlock:
3180 	spin_unlock_bh(&table->tb6_lock);
3181 out_put:
3182 	fib6_info_release(rt);
3183 
3184 	if (skb) {
3185 		rtnl_notify(skb, net, info->portid, RTNLGRP_IPV6_ROUTE,
3186 			    info->nlh, gfp_any());
3187 	}
3188 	return err;
3189 }
3190 
3191 static int ip6_del_cached_rt(struct rt6_info *rt, struct fib6_config *cfg)
3192 {
3193 	int rc = -ESRCH;
3194 
3195 	if (cfg->fc_ifindex && rt->dst.dev->ifindex != cfg->fc_ifindex)
3196 		goto out;
3197 
3198 	if (cfg->fc_flags & RTF_GATEWAY &&
3199 	    !ipv6_addr_equal(&cfg->fc_gateway, &rt->rt6i_gateway))
3200 		goto out;
3201 	if (dst_hold_safe(&rt->dst))
3202 		rc = rt6_remove_exception_rt(rt);
3203 out:
3204 	return rc;
3205 }
3206 
3207 static int ip6_route_del(struct fib6_config *cfg,
3208 			 struct netlink_ext_ack *extack)
3209 {
3210 	struct rt6_info *rt_cache;
3211 	struct fib6_table *table;
3212 	struct fib6_info *rt;
3213 	struct fib6_node *fn;
3214 	int err = -ESRCH;
3215 
3216 	table = fib6_get_table(cfg->fc_nlinfo.nl_net, cfg->fc_table);
3217 	if (!table) {
3218 		NL_SET_ERR_MSG(extack, "FIB table does not exist");
3219 		return err;
3220 	}
3221 
3222 	rcu_read_lock();
3223 
3224 	fn = fib6_locate(&table->tb6_root,
3225 			 &cfg->fc_dst, cfg->fc_dst_len,
3226 			 &cfg->fc_src, cfg->fc_src_len,
3227 			 !(cfg->fc_flags & RTF_CACHE));
3228 
3229 	if (fn) {
3230 		for_each_fib6_node_rt_rcu(fn) {
3231 			if (cfg->fc_flags & RTF_CACHE) {
3232 				int rc;
3233 
3234 				rt_cache = rt6_find_cached_rt(rt, &cfg->fc_dst,
3235 							      &cfg->fc_src);
3236 				if (rt_cache) {
3237 					rc = ip6_del_cached_rt(rt_cache, cfg);
3238 					if (rc != -ESRCH) {
3239 						rcu_read_unlock();
3240 						return rc;
3241 					}
3242 				}
3243 				continue;
3244 			}
3245 			if (cfg->fc_ifindex &&
3246 			    (!rt->fib6_nh.nh_dev ||
3247 			     rt->fib6_nh.nh_dev->ifindex != cfg->fc_ifindex))
3248 				continue;
3249 			if (cfg->fc_flags & RTF_GATEWAY &&
3250 			    !ipv6_addr_equal(&cfg->fc_gateway, &rt->fib6_nh.nh_gw))
3251 				continue;
3252 			if (cfg->fc_metric && cfg->fc_metric != rt->fib6_metric)
3253 				continue;
3254 			if (cfg->fc_protocol && cfg->fc_protocol != rt->fib6_protocol)
3255 				continue;
3256 			fib6_info_hold(rt);
3257 			rcu_read_unlock();
3258 
3259 			/* if gateway was specified only delete the one hop */
3260 			if (cfg->fc_flags & RTF_GATEWAY)
3261 				return __ip6_del_rt(rt, &cfg->fc_nlinfo);
3262 
3263 			return __ip6_del_rt_siblings(rt, cfg);
3264 		}
3265 	}
3266 	rcu_read_unlock();
3267 
3268 	return err;
3269 }
3270 
3271 static void rt6_do_redirect(struct dst_entry *dst, struct sock *sk, struct sk_buff *skb)
3272 {
3273 	struct netevent_redirect netevent;
3274 	struct rt6_info *rt, *nrt = NULL;
3275 	struct ndisc_options ndopts;
3276 	struct inet6_dev *in6_dev;
3277 	struct neighbour *neigh;
3278 	struct fib6_info *from;
3279 	struct rd_msg *msg;
3280 	int optlen, on_link;
3281 	u8 *lladdr;
3282 
3283 	optlen = skb_tail_pointer(skb) - skb_transport_header(skb);
3284 	optlen -= sizeof(*msg);
3285 
3286 	if (optlen < 0) {
3287 		net_dbg_ratelimited("rt6_do_redirect: packet too short\n");
3288 		return;
3289 	}
3290 
3291 	msg = (struct rd_msg *)icmp6_hdr(skb);
3292 
3293 	if (ipv6_addr_is_multicast(&msg->dest)) {
3294 		net_dbg_ratelimited("rt6_do_redirect: destination address is multicast\n");
3295 		return;
3296 	}
3297 
3298 	on_link = 0;
3299 	if (ipv6_addr_equal(&msg->dest, &msg->target)) {
3300 		on_link = 1;
3301 	} else if (ipv6_addr_type(&msg->target) !=
3302 		   (IPV6_ADDR_UNICAST|IPV6_ADDR_LINKLOCAL)) {
3303 		net_dbg_ratelimited("rt6_do_redirect: target address is not link-local unicast\n");
3304 		return;
3305 	}
3306 
3307 	in6_dev = __in6_dev_get(skb->dev);
3308 	if (!in6_dev)
3309 		return;
3310 	if (in6_dev->cnf.forwarding || !in6_dev->cnf.accept_redirects)
3311 		return;
3312 
3313 	/* RFC2461 8.1:
3314 	 *	The IP source address of the Redirect MUST be the same as the current
3315 	 *	first-hop router for the specified ICMP Destination Address.
3316 	 */
3317 
3318 	if (!ndisc_parse_options(skb->dev, msg->opt, optlen, &ndopts)) {
3319 		net_dbg_ratelimited("rt6_redirect: invalid ND options\n");
3320 		return;
3321 	}
3322 
3323 	lladdr = NULL;
3324 	if (ndopts.nd_opts_tgt_lladdr) {
3325 		lladdr = ndisc_opt_addr_data(ndopts.nd_opts_tgt_lladdr,
3326 					     skb->dev);
3327 		if (!lladdr) {
3328 			net_dbg_ratelimited("rt6_redirect: invalid link-layer address length\n");
3329 			return;
3330 		}
3331 	}
3332 
3333 	rt = (struct rt6_info *) dst;
3334 	if (rt->rt6i_flags & RTF_REJECT) {
3335 		net_dbg_ratelimited("rt6_redirect: source isn't a valid nexthop for redirect target\n");
3336 		return;
3337 	}
3338 
3339 	/* Redirect received -> path was valid.
3340 	 * Look, redirects are sent only in response to data packets,
3341 	 * so that this nexthop apparently is reachable. --ANK
3342 	 */
3343 	dst_confirm_neigh(&rt->dst, &ipv6_hdr(skb)->saddr);
3344 
3345 	neigh = __neigh_lookup(&nd_tbl, &msg->target, skb->dev, 1);
3346 	if (!neigh)
3347 		return;
3348 
3349 	/*
3350 	 *	We have finally decided to accept it.
3351 	 */
3352 
3353 	ndisc_update(skb->dev, neigh, lladdr, NUD_STALE,
3354 		     NEIGH_UPDATE_F_WEAK_OVERRIDE|
3355 		     NEIGH_UPDATE_F_OVERRIDE|
3356 		     (on_link ? 0 : (NEIGH_UPDATE_F_OVERRIDE_ISROUTER|
3357 				     NEIGH_UPDATE_F_ISROUTER)),
3358 		     NDISC_REDIRECT, &ndopts);
3359 
3360 	rcu_read_lock();
3361 	from = rcu_dereference(rt->from);
3362 	fib6_info_hold(from);
3363 	rcu_read_unlock();
3364 
3365 	nrt = ip6_rt_cache_alloc(from, &msg->dest, NULL);
3366 	if (!nrt)
3367 		goto out;
3368 
3369 	nrt->rt6i_flags = RTF_GATEWAY|RTF_UP|RTF_DYNAMIC|RTF_CACHE;
3370 	if (on_link)
3371 		nrt->rt6i_flags &= ~RTF_GATEWAY;
3372 
3373 	nrt->rt6i_gateway = *(struct in6_addr *)neigh->primary_key;
3374 
3375 	/* No need to remove rt from the exception table if rt is
3376 	 * a cached route because rt6_insert_exception() will
3377 	 * takes care of it
3378 	 */
3379 	if (rt6_insert_exception(nrt, from)) {
3380 		dst_release_immediate(&nrt->dst);
3381 		goto out;
3382 	}
3383 
3384 	netevent.old = &rt->dst;
3385 	netevent.new = &nrt->dst;
3386 	netevent.daddr = &msg->dest;
3387 	netevent.neigh = neigh;
3388 	call_netevent_notifiers(NETEVENT_REDIRECT, &netevent);
3389 
3390 out:
3391 	fib6_info_release(from);
3392 	neigh_release(neigh);
3393 }
3394 
3395 #ifdef CONFIG_IPV6_ROUTE_INFO
3396 static struct fib6_info *rt6_get_route_info(struct net *net,
3397 					   const struct in6_addr *prefix, int prefixlen,
3398 					   const struct in6_addr *gwaddr,
3399 					   struct net_device *dev)
3400 {
3401 	u32 tb_id = l3mdev_fib_table(dev) ? : RT6_TABLE_INFO;
3402 	int ifindex = dev->ifindex;
3403 	struct fib6_node *fn;
3404 	struct fib6_info *rt = NULL;
3405 	struct fib6_table *table;
3406 
3407 	table = fib6_get_table(net, tb_id);
3408 	if (!table)
3409 		return NULL;
3410 
3411 	rcu_read_lock();
3412 	fn = fib6_locate(&table->tb6_root, prefix, prefixlen, NULL, 0, true);
3413 	if (!fn)
3414 		goto out;
3415 
3416 	for_each_fib6_node_rt_rcu(fn) {
3417 		if (rt->fib6_nh.nh_dev->ifindex != ifindex)
3418 			continue;
3419 		if ((rt->fib6_flags & (RTF_ROUTEINFO|RTF_GATEWAY)) != (RTF_ROUTEINFO|RTF_GATEWAY))
3420 			continue;
3421 		if (!ipv6_addr_equal(&rt->fib6_nh.nh_gw, gwaddr))
3422 			continue;
3423 		fib6_info_hold(rt);
3424 		break;
3425 	}
3426 out:
3427 	rcu_read_unlock();
3428 	return rt;
3429 }
3430 
3431 static struct fib6_info *rt6_add_route_info(struct net *net,
3432 					   const struct in6_addr *prefix, int prefixlen,
3433 					   const struct in6_addr *gwaddr,
3434 					   struct net_device *dev,
3435 					   unsigned int pref)
3436 {
3437 	struct fib6_config cfg = {
3438 		.fc_metric	= IP6_RT_PRIO_USER,
3439 		.fc_ifindex	= dev->ifindex,
3440 		.fc_dst_len	= prefixlen,
3441 		.fc_flags	= RTF_GATEWAY | RTF_ADDRCONF | RTF_ROUTEINFO |
3442 				  RTF_UP | RTF_PREF(pref),
3443 		.fc_protocol = RTPROT_RA,
3444 		.fc_type = RTN_UNICAST,
3445 		.fc_nlinfo.portid = 0,
3446 		.fc_nlinfo.nlh = NULL,
3447 		.fc_nlinfo.nl_net = net,
3448 	};
3449 
3450 	cfg.fc_table = l3mdev_fib_table(dev) ? : RT6_TABLE_INFO,
3451 	cfg.fc_dst = *prefix;
3452 	cfg.fc_gateway = *gwaddr;
3453 
3454 	/* We should treat it as a default route if prefix length is 0. */
3455 	if (!prefixlen)
3456 		cfg.fc_flags |= RTF_DEFAULT;
3457 
3458 	ip6_route_add(&cfg, GFP_ATOMIC, NULL);
3459 
3460 	return rt6_get_route_info(net, prefix, prefixlen, gwaddr, dev);
3461 }
3462 #endif
3463 
3464 struct fib6_info *rt6_get_dflt_router(struct net *net,
3465 				     const struct in6_addr *addr,
3466 				     struct net_device *dev)
3467 {
3468 	u32 tb_id = l3mdev_fib_table(dev) ? : RT6_TABLE_DFLT;
3469 	struct fib6_info *rt;
3470 	struct fib6_table *table;
3471 
3472 	table = fib6_get_table(net, tb_id);
3473 	if (!table)
3474 		return NULL;
3475 
3476 	rcu_read_lock();
3477 	for_each_fib6_node_rt_rcu(&table->tb6_root) {
3478 		if (dev == rt->fib6_nh.nh_dev &&
3479 		    ((rt->fib6_flags & (RTF_ADDRCONF | RTF_DEFAULT)) == (RTF_ADDRCONF | RTF_DEFAULT)) &&
3480 		    ipv6_addr_equal(&rt->fib6_nh.nh_gw, addr))
3481 			break;
3482 	}
3483 	if (rt)
3484 		fib6_info_hold(rt);
3485 	rcu_read_unlock();
3486 	return rt;
3487 }
3488 
3489 struct fib6_info *rt6_add_dflt_router(struct net *net,
3490 				     const struct in6_addr *gwaddr,
3491 				     struct net_device *dev,
3492 				     unsigned int pref)
3493 {
3494 	struct fib6_config cfg = {
3495 		.fc_table	= l3mdev_fib_table(dev) ? : RT6_TABLE_DFLT,
3496 		.fc_metric	= IP6_RT_PRIO_USER,
3497 		.fc_ifindex	= dev->ifindex,
3498 		.fc_flags	= RTF_GATEWAY | RTF_ADDRCONF | RTF_DEFAULT |
3499 				  RTF_UP | RTF_EXPIRES | RTF_PREF(pref),
3500 		.fc_protocol = RTPROT_RA,
3501 		.fc_type = RTN_UNICAST,
3502 		.fc_nlinfo.portid = 0,
3503 		.fc_nlinfo.nlh = NULL,
3504 		.fc_nlinfo.nl_net = net,
3505 	};
3506 
3507 	cfg.fc_gateway = *gwaddr;
3508 
3509 	if (!ip6_route_add(&cfg, GFP_ATOMIC, NULL)) {
3510 		struct fib6_table *table;
3511 
3512 		table = fib6_get_table(dev_net(dev), cfg.fc_table);
3513 		if (table)
3514 			table->flags |= RT6_TABLE_HAS_DFLT_ROUTER;
3515 	}
3516 
3517 	return rt6_get_dflt_router(net, gwaddr, dev);
3518 }
3519 
3520 static void __rt6_purge_dflt_routers(struct net *net,
3521 				     struct fib6_table *table)
3522 {
3523 	struct fib6_info *rt;
3524 
3525 restart:
3526 	rcu_read_lock();
3527 	for_each_fib6_node_rt_rcu(&table->tb6_root) {
3528 		struct net_device *dev = fib6_info_nh_dev(rt);
3529 		struct inet6_dev *idev = dev ? __in6_dev_get(dev) : NULL;
3530 
3531 		if (rt->fib6_flags & (RTF_DEFAULT | RTF_ADDRCONF) &&
3532 		    (!idev || idev->cnf.accept_ra != 2)) {
3533 			fib6_info_hold(rt);
3534 			rcu_read_unlock();
3535 			ip6_del_rt(net, rt);
3536 			goto restart;
3537 		}
3538 	}
3539 	rcu_read_unlock();
3540 
3541 	table->flags &= ~RT6_TABLE_HAS_DFLT_ROUTER;
3542 }
3543 
3544 void rt6_purge_dflt_routers(struct net *net)
3545 {
3546 	struct fib6_table *table;
3547 	struct hlist_head *head;
3548 	unsigned int h;
3549 
3550 	rcu_read_lock();
3551 
3552 	for (h = 0; h < FIB6_TABLE_HASHSZ; h++) {
3553 		head = &net->ipv6.fib_table_hash[h];
3554 		hlist_for_each_entry_rcu(table, head, tb6_hlist) {
3555 			if (table->flags & RT6_TABLE_HAS_DFLT_ROUTER)
3556 				__rt6_purge_dflt_routers(net, table);
3557 		}
3558 	}
3559 
3560 	rcu_read_unlock();
3561 }
3562 
3563 static void rtmsg_to_fib6_config(struct net *net,
3564 				 struct in6_rtmsg *rtmsg,
3565 				 struct fib6_config *cfg)
3566 {
3567 	memset(cfg, 0, sizeof(*cfg));
3568 
3569 	cfg->fc_table = l3mdev_fib_table_by_index(net, rtmsg->rtmsg_ifindex) ?
3570 			 : RT6_TABLE_MAIN;
3571 	cfg->fc_ifindex = rtmsg->rtmsg_ifindex;
3572 	cfg->fc_metric = rtmsg->rtmsg_metric;
3573 	cfg->fc_expires = rtmsg->rtmsg_info;
3574 	cfg->fc_dst_len = rtmsg->rtmsg_dst_len;
3575 	cfg->fc_src_len = rtmsg->rtmsg_src_len;
3576 	cfg->fc_flags = rtmsg->rtmsg_flags;
3577 	cfg->fc_type = rtmsg->rtmsg_type;
3578 
3579 	cfg->fc_nlinfo.nl_net = net;
3580 
3581 	cfg->fc_dst = rtmsg->rtmsg_dst;
3582 	cfg->fc_src = rtmsg->rtmsg_src;
3583 	cfg->fc_gateway = rtmsg->rtmsg_gateway;
3584 }
3585 
3586 int ipv6_route_ioctl(struct net *net, unsigned int cmd, void __user *arg)
3587 {
3588 	struct fib6_config cfg;
3589 	struct in6_rtmsg rtmsg;
3590 	int err;
3591 
3592 	switch (cmd) {
3593 	case SIOCADDRT:		/* Add a route */
3594 	case SIOCDELRT:		/* Delete a route */
3595 		if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
3596 			return -EPERM;
3597 		err = copy_from_user(&rtmsg, arg,
3598 				     sizeof(struct in6_rtmsg));
3599 		if (err)
3600 			return -EFAULT;
3601 
3602 		rtmsg_to_fib6_config(net, &rtmsg, &cfg);
3603 
3604 		rtnl_lock();
3605 		switch (cmd) {
3606 		case SIOCADDRT:
3607 			err = ip6_route_add(&cfg, GFP_KERNEL, NULL);
3608 			break;
3609 		case SIOCDELRT:
3610 			err = ip6_route_del(&cfg, NULL);
3611 			break;
3612 		default:
3613 			err = -EINVAL;
3614 		}
3615 		rtnl_unlock();
3616 
3617 		return err;
3618 	}
3619 
3620 	return -EINVAL;
3621 }
3622 
3623 /*
3624  *	Drop the packet on the floor
3625  */
3626 
3627 static int ip6_pkt_drop(struct sk_buff *skb, u8 code, int ipstats_mib_noroutes)
3628 {
3629 	int type;
3630 	struct dst_entry *dst = skb_dst(skb);
3631 	switch (ipstats_mib_noroutes) {
3632 	case IPSTATS_MIB_INNOROUTES:
3633 		type = ipv6_addr_type(&ipv6_hdr(skb)->daddr);
3634 		if (type == IPV6_ADDR_ANY) {
3635 			IP6_INC_STATS(dev_net(dst->dev),
3636 				      __in6_dev_get_safely(skb->dev),
3637 				      IPSTATS_MIB_INADDRERRORS);
3638 			break;
3639 		}
3640 		/* FALLTHROUGH */
3641 	case IPSTATS_MIB_OUTNOROUTES:
3642 		IP6_INC_STATS(dev_net(dst->dev), ip6_dst_idev(dst),
3643 			      ipstats_mib_noroutes);
3644 		break;
3645 	}
3646 	icmpv6_send(skb, ICMPV6_DEST_UNREACH, code, 0);
3647 	kfree_skb(skb);
3648 	return 0;
3649 }
3650 
3651 static int ip6_pkt_discard(struct sk_buff *skb)
3652 {
3653 	return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_INNOROUTES);
3654 }
3655 
3656 static int ip6_pkt_discard_out(struct net *net, struct sock *sk, struct sk_buff *skb)
3657 {
3658 	skb->dev = skb_dst(skb)->dev;
3659 	return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_OUTNOROUTES);
3660 }
3661 
3662 static int ip6_pkt_prohibit(struct sk_buff *skb)
3663 {
3664 	return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_INNOROUTES);
3665 }
3666 
3667 static int ip6_pkt_prohibit_out(struct net *net, struct sock *sk, struct sk_buff *skb)
3668 {
3669 	skb->dev = skb_dst(skb)->dev;
3670 	return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_OUTNOROUTES);
3671 }
3672 
3673 /*
3674  *	Allocate a dst for local (unicast / anycast) address.
3675  */
3676 
3677 struct fib6_info *addrconf_f6i_alloc(struct net *net,
3678 				     struct inet6_dev *idev,
3679 				     const struct in6_addr *addr,
3680 				     bool anycast, gfp_t gfp_flags)
3681 {
3682 	u32 tb_id;
3683 	struct net_device *dev = idev->dev;
3684 	struct fib6_info *f6i;
3685 
3686 	f6i = fib6_info_alloc(gfp_flags);
3687 	if (!f6i)
3688 		return ERR_PTR(-ENOMEM);
3689 
3690 	f6i->dst_nocount = true;
3691 	f6i->dst_host = true;
3692 	f6i->fib6_protocol = RTPROT_KERNEL;
3693 	f6i->fib6_flags = RTF_UP | RTF_NONEXTHOP;
3694 	if (anycast) {
3695 		f6i->fib6_type = RTN_ANYCAST;
3696 		f6i->fib6_flags |= RTF_ANYCAST;
3697 	} else {
3698 		f6i->fib6_type = RTN_LOCAL;
3699 		f6i->fib6_flags |= RTF_LOCAL;
3700 	}
3701 
3702 	f6i->fib6_nh.nh_gw = *addr;
3703 	dev_hold(dev);
3704 	f6i->fib6_nh.nh_dev = dev;
3705 	f6i->fib6_dst.addr = *addr;
3706 	f6i->fib6_dst.plen = 128;
3707 	tb_id = l3mdev_fib_table(idev->dev) ? : RT6_TABLE_LOCAL;
3708 	f6i->fib6_table = fib6_get_table(net, tb_id);
3709 
3710 	return f6i;
3711 }
3712 
3713 /* remove deleted ip from prefsrc entries */
3714 struct arg_dev_net_ip {
3715 	struct net_device *dev;
3716 	struct net *net;
3717 	struct in6_addr *addr;
3718 };
3719 
3720 static int fib6_remove_prefsrc(struct fib6_info *rt, void *arg)
3721 {
3722 	struct net_device *dev = ((struct arg_dev_net_ip *)arg)->dev;
3723 	struct net *net = ((struct arg_dev_net_ip *)arg)->net;
3724 	struct in6_addr *addr = ((struct arg_dev_net_ip *)arg)->addr;
3725 
3726 	if (((void *)rt->fib6_nh.nh_dev == dev || !dev) &&
3727 	    rt != net->ipv6.fib6_null_entry &&
3728 	    ipv6_addr_equal(addr, &rt->fib6_prefsrc.addr)) {
3729 		spin_lock_bh(&rt6_exception_lock);
3730 		/* remove prefsrc entry */
3731 		rt->fib6_prefsrc.plen = 0;
3732 		/* need to update cache as well */
3733 		rt6_exceptions_remove_prefsrc(rt);
3734 		spin_unlock_bh(&rt6_exception_lock);
3735 	}
3736 	return 0;
3737 }
3738 
3739 void rt6_remove_prefsrc(struct inet6_ifaddr *ifp)
3740 {
3741 	struct net *net = dev_net(ifp->idev->dev);
3742 	struct arg_dev_net_ip adni = {
3743 		.dev = ifp->idev->dev,
3744 		.net = net,
3745 		.addr = &ifp->addr,
3746 	};
3747 	fib6_clean_all(net, fib6_remove_prefsrc, &adni);
3748 }
3749 
3750 #define RTF_RA_ROUTER		(RTF_ADDRCONF | RTF_DEFAULT | RTF_GATEWAY)
3751 
3752 /* Remove routers and update dst entries when gateway turn into host. */
3753 static int fib6_clean_tohost(struct fib6_info *rt, void *arg)
3754 {
3755 	struct in6_addr *gateway = (struct in6_addr *)arg;
3756 
3757 	if (((rt->fib6_flags & RTF_RA_ROUTER) == RTF_RA_ROUTER) &&
3758 	    ipv6_addr_equal(gateway, &rt->fib6_nh.nh_gw)) {
3759 		return -1;
3760 	}
3761 
3762 	/* Further clean up cached routes in exception table.
3763 	 * This is needed because cached route may have a different
3764 	 * gateway than its 'parent' in the case of an ip redirect.
3765 	 */
3766 	rt6_exceptions_clean_tohost(rt, gateway);
3767 
3768 	return 0;
3769 }
3770 
3771 void rt6_clean_tohost(struct net *net, struct in6_addr *gateway)
3772 {
3773 	fib6_clean_all(net, fib6_clean_tohost, gateway);
3774 }
3775 
3776 struct arg_netdev_event {
3777 	const struct net_device *dev;
3778 	union {
3779 		unsigned int nh_flags;
3780 		unsigned long event;
3781 	};
3782 };
3783 
3784 static struct fib6_info *rt6_multipath_first_sibling(const struct fib6_info *rt)
3785 {
3786 	struct fib6_info *iter;
3787 	struct fib6_node *fn;
3788 
3789 	fn = rcu_dereference_protected(rt->fib6_node,
3790 			lockdep_is_held(&rt->fib6_table->tb6_lock));
3791 	iter = rcu_dereference_protected(fn->leaf,
3792 			lockdep_is_held(&rt->fib6_table->tb6_lock));
3793 	while (iter) {
3794 		if (iter->fib6_metric == rt->fib6_metric &&
3795 		    iter->fib6_nsiblings)
3796 			return iter;
3797 		iter = rcu_dereference_protected(iter->fib6_next,
3798 				lockdep_is_held(&rt->fib6_table->tb6_lock));
3799 	}
3800 
3801 	return NULL;
3802 }
3803 
3804 static bool rt6_is_dead(const struct fib6_info *rt)
3805 {
3806 	if (rt->fib6_nh.nh_flags & RTNH_F_DEAD ||
3807 	    (rt->fib6_nh.nh_flags & RTNH_F_LINKDOWN &&
3808 	     fib6_ignore_linkdown(rt)))
3809 		return true;
3810 
3811 	return false;
3812 }
3813 
3814 static int rt6_multipath_total_weight(const struct fib6_info *rt)
3815 {
3816 	struct fib6_info *iter;
3817 	int total = 0;
3818 
3819 	if (!rt6_is_dead(rt))
3820 		total += rt->fib6_nh.nh_weight;
3821 
3822 	list_for_each_entry(iter, &rt->fib6_siblings, fib6_siblings) {
3823 		if (!rt6_is_dead(iter))
3824 			total += iter->fib6_nh.nh_weight;
3825 	}
3826 
3827 	return total;
3828 }
3829 
3830 static void rt6_upper_bound_set(struct fib6_info *rt, int *weight, int total)
3831 {
3832 	int upper_bound = -1;
3833 
3834 	if (!rt6_is_dead(rt)) {
3835 		*weight += rt->fib6_nh.nh_weight;
3836 		upper_bound = DIV_ROUND_CLOSEST_ULL((u64) (*weight) << 31,
3837 						    total) - 1;
3838 	}
3839 	atomic_set(&rt->fib6_nh.nh_upper_bound, upper_bound);
3840 }
3841 
3842 static void rt6_multipath_upper_bound_set(struct fib6_info *rt, int total)
3843 {
3844 	struct fib6_info *iter;
3845 	int weight = 0;
3846 
3847 	rt6_upper_bound_set(rt, &weight, total);
3848 
3849 	list_for_each_entry(iter, &rt->fib6_siblings, fib6_siblings)
3850 		rt6_upper_bound_set(iter, &weight, total);
3851 }
3852 
3853 void rt6_multipath_rebalance(struct fib6_info *rt)
3854 {
3855 	struct fib6_info *first;
3856 	int total;
3857 
3858 	/* In case the entire multipath route was marked for flushing,
3859 	 * then there is no need to rebalance upon the removal of every
3860 	 * sibling route.
3861 	 */
3862 	if (!rt->fib6_nsiblings || rt->should_flush)
3863 		return;
3864 
3865 	/* During lookup routes are evaluated in order, so we need to
3866 	 * make sure upper bounds are assigned from the first sibling
3867 	 * onwards.
3868 	 */
3869 	first = rt6_multipath_first_sibling(rt);
3870 	if (WARN_ON_ONCE(!first))
3871 		return;
3872 
3873 	total = rt6_multipath_total_weight(first);
3874 	rt6_multipath_upper_bound_set(first, total);
3875 }
3876 
3877 static int fib6_ifup(struct fib6_info *rt, void *p_arg)
3878 {
3879 	const struct arg_netdev_event *arg = p_arg;
3880 	struct net *net = dev_net(arg->dev);
3881 
3882 	if (rt != net->ipv6.fib6_null_entry && rt->fib6_nh.nh_dev == arg->dev) {
3883 		rt->fib6_nh.nh_flags &= ~arg->nh_flags;
3884 		fib6_update_sernum_upto_root(net, rt);
3885 		rt6_multipath_rebalance(rt);
3886 	}
3887 
3888 	return 0;
3889 }
3890 
3891 void rt6_sync_up(struct net_device *dev, unsigned int nh_flags)
3892 {
3893 	struct arg_netdev_event arg = {
3894 		.dev = dev,
3895 		{
3896 			.nh_flags = nh_flags,
3897 		},
3898 	};
3899 
3900 	if (nh_flags & RTNH_F_DEAD && netif_carrier_ok(dev))
3901 		arg.nh_flags |= RTNH_F_LINKDOWN;
3902 
3903 	fib6_clean_all(dev_net(dev), fib6_ifup, &arg);
3904 }
3905 
3906 static bool rt6_multipath_uses_dev(const struct fib6_info *rt,
3907 				   const struct net_device *dev)
3908 {
3909 	struct fib6_info *iter;
3910 
3911 	if (rt->fib6_nh.nh_dev == dev)
3912 		return true;
3913 	list_for_each_entry(iter, &rt->fib6_siblings, fib6_siblings)
3914 		if (iter->fib6_nh.nh_dev == dev)
3915 			return true;
3916 
3917 	return false;
3918 }
3919 
3920 static void rt6_multipath_flush(struct fib6_info *rt)
3921 {
3922 	struct fib6_info *iter;
3923 
3924 	rt->should_flush = 1;
3925 	list_for_each_entry(iter, &rt->fib6_siblings, fib6_siblings)
3926 		iter->should_flush = 1;
3927 }
3928 
3929 static unsigned int rt6_multipath_dead_count(const struct fib6_info *rt,
3930 					     const struct net_device *down_dev)
3931 {
3932 	struct fib6_info *iter;
3933 	unsigned int dead = 0;
3934 
3935 	if (rt->fib6_nh.nh_dev == down_dev ||
3936 	    rt->fib6_nh.nh_flags & RTNH_F_DEAD)
3937 		dead++;
3938 	list_for_each_entry(iter, &rt->fib6_siblings, fib6_siblings)
3939 		if (iter->fib6_nh.nh_dev == down_dev ||
3940 		    iter->fib6_nh.nh_flags & RTNH_F_DEAD)
3941 			dead++;
3942 
3943 	return dead;
3944 }
3945 
3946 static void rt6_multipath_nh_flags_set(struct fib6_info *rt,
3947 				       const struct net_device *dev,
3948 				       unsigned int nh_flags)
3949 {
3950 	struct fib6_info *iter;
3951 
3952 	if (rt->fib6_nh.nh_dev == dev)
3953 		rt->fib6_nh.nh_flags |= nh_flags;
3954 	list_for_each_entry(iter, &rt->fib6_siblings, fib6_siblings)
3955 		if (iter->fib6_nh.nh_dev == dev)
3956 			iter->fib6_nh.nh_flags |= nh_flags;
3957 }
3958 
3959 /* called with write lock held for table with rt */
3960 static int fib6_ifdown(struct fib6_info *rt, void *p_arg)
3961 {
3962 	const struct arg_netdev_event *arg = p_arg;
3963 	const struct net_device *dev = arg->dev;
3964 	struct net *net = dev_net(dev);
3965 
3966 	if (rt == net->ipv6.fib6_null_entry)
3967 		return 0;
3968 
3969 	switch (arg->event) {
3970 	case NETDEV_UNREGISTER:
3971 		return rt->fib6_nh.nh_dev == dev ? -1 : 0;
3972 	case NETDEV_DOWN:
3973 		if (rt->should_flush)
3974 			return -1;
3975 		if (!rt->fib6_nsiblings)
3976 			return rt->fib6_nh.nh_dev == dev ? -1 : 0;
3977 		if (rt6_multipath_uses_dev(rt, dev)) {
3978 			unsigned int count;
3979 
3980 			count = rt6_multipath_dead_count(rt, dev);
3981 			if (rt->fib6_nsiblings + 1 == count) {
3982 				rt6_multipath_flush(rt);
3983 				return -1;
3984 			}
3985 			rt6_multipath_nh_flags_set(rt, dev, RTNH_F_DEAD |
3986 						   RTNH_F_LINKDOWN);
3987 			fib6_update_sernum(net, rt);
3988 			rt6_multipath_rebalance(rt);
3989 		}
3990 		return -2;
3991 	case NETDEV_CHANGE:
3992 		if (rt->fib6_nh.nh_dev != dev ||
3993 		    rt->fib6_flags & (RTF_LOCAL | RTF_ANYCAST))
3994 			break;
3995 		rt->fib6_nh.nh_flags |= RTNH_F_LINKDOWN;
3996 		rt6_multipath_rebalance(rt);
3997 		break;
3998 	}
3999 
4000 	return 0;
4001 }
4002 
4003 void rt6_sync_down_dev(struct net_device *dev, unsigned long event)
4004 {
4005 	struct arg_netdev_event arg = {
4006 		.dev = dev,
4007 		{
4008 			.event = event,
4009 		},
4010 	};
4011 
4012 	fib6_clean_all(dev_net(dev), fib6_ifdown, &arg);
4013 }
4014 
4015 void rt6_disable_ip(struct net_device *dev, unsigned long event)
4016 {
4017 	rt6_sync_down_dev(dev, event);
4018 	rt6_uncached_list_flush_dev(dev_net(dev), dev);
4019 	neigh_ifdown(&nd_tbl, dev);
4020 }
4021 
4022 struct rt6_mtu_change_arg {
4023 	struct net_device *dev;
4024 	unsigned int mtu;
4025 };
4026 
4027 static int rt6_mtu_change_route(struct fib6_info *rt, void *p_arg)
4028 {
4029 	struct rt6_mtu_change_arg *arg = (struct rt6_mtu_change_arg *) p_arg;
4030 	struct inet6_dev *idev;
4031 
4032 	/* In IPv6 pmtu discovery is not optional,
4033 	   so that RTAX_MTU lock cannot disable it.
4034 	   We still use this lock to block changes
4035 	   caused by addrconf/ndisc.
4036 	*/
4037 
4038 	idev = __in6_dev_get(arg->dev);
4039 	if (!idev)
4040 		return 0;
4041 
4042 	/* For administrative MTU increase, there is no way to discover
4043 	   IPv6 PMTU increase, so PMTU increase should be updated here.
4044 	   Since RFC 1981 doesn't include administrative MTU increase
4045 	   update PMTU increase is a MUST. (i.e. jumbo frame)
4046 	 */
4047 	if (rt->fib6_nh.nh_dev == arg->dev &&
4048 	    !fib6_metric_locked(rt, RTAX_MTU)) {
4049 		u32 mtu = rt->fib6_pmtu;
4050 
4051 		if (mtu >= arg->mtu ||
4052 		    (mtu < arg->mtu && mtu == idev->cnf.mtu6))
4053 			fib6_metric_set(rt, RTAX_MTU, arg->mtu);
4054 
4055 		spin_lock_bh(&rt6_exception_lock);
4056 		rt6_exceptions_update_pmtu(idev, rt, arg->mtu);
4057 		spin_unlock_bh(&rt6_exception_lock);
4058 	}
4059 	return 0;
4060 }
4061 
4062 void rt6_mtu_change(struct net_device *dev, unsigned int mtu)
4063 {
4064 	struct rt6_mtu_change_arg arg = {
4065 		.dev = dev,
4066 		.mtu = mtu,
4067 	};
4068 
4069 	fib6_clean_all(dev_net(dev), rt6_mtu_change_route, &arg);
4070 }
4071 
4072 static const struct nla_policy rtm_ipv6_policy[RTA_MAX+1] = {
4073 	[RTA_GATEWAY]           = { .len = sizeof(struct in6_addr) },
4074 	[RTA_PREFSRC]		= { .len = sizeof(struct in6_addr) },
4075 	[RTA_OIF]               = { .type = NLA_U32 },
4076 	[RTA_IIF]		= { .type = NLA_U32 },
4077 	[RTA_PRIORITY]          = { .type = NLA_U32 },
4078 	[RTA_METRICS]           = { .type = NLA_NESTED },
4079 	[RTA_MULTIPATH]		= { .len = sizeof(struct rtnexthop) },
4080 	[RTA_PREF]              = { .type = NLA_U8 },
4081 	[RTA_ENCAP_TYPE]	= { .type = NLA_U16 },
4082 	[RTA_ENCAP]		= { .type = NLA_NESTED },
4083 	[RTA_EXPIRES]		= { .type = NLA_U32 },
4084 	[RTA_UID]		= { .type = NLA_U32 },
4085 	[RTA_MARK]		= { .type = NLA_U32 },
4086 	[RTA_TABLE]		= { .type = NLA_U32 },
4087 	[RTA_IP_PROTO]		= { .type = NLA_U8 },
4088 	[RTA_SPORT]		= { .type = NLA_U16 },
4089 	[RTA_DPORT]		= { .type = NLA_U16 },
4090 };
4091 
4092 static int rtm_to_fib6_config(struct sk_buff *skb, struct nlmsghdr *nlh,
4093 			      struct fib6_config *cfg,
4094 			      struct netlink_ext_ack *extack)
4095 {
4096 	struct rtmsg *rtm;
4097 	struct nlattr *tb[RTA_MAX+1];
4098 	unsigned int pref;
4099 	int err;
4100 
4101 	err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy,
4102 			  NULL);
4103 	if (err < 0)
4104 		goto errout;
4105 
4106 	err = -EINVAL;
4107 	rtm = nlmsg_data(nlh);
4108 	memset(cfg, 0, sizeof(*cfg));
4109 
4110 	cfg->fc_table = rtm->rtm_table;
4111 	cfg->fc_dst_len = rtm->rtm_dst_len;
4112 	cfg->fc_src_len = rtm->rtm_src_len;
4113 	cfg->fc_flags = RTF_UP;
4114 	cfg->fc_protocol = rtm->rtm_protocol;
4115 	cfg->fc_type = rtm->rtm_type;
4116 
4117 	if (rtm->rtm_type == RTN_UNREACHABLE ||
4118 	    rtm->rtm_type == RTN_BLACKHOLE ||
4119 	    rtm->rtm_type == RTN_PROHIBIT ||
4120 	    rtm->rtm_type == RTN_THROW)
4121 		cfg->fc_flags |= RTF_REJECT;
4122 
4123 	if (rtm->rtm_type == RTN_LOCAL)
4124 		cfg->fc_flags |= RTF_LOCAL;
4125 
4126 	if (rtm->rtm_flags & RTM_F_CLONED)
4127 		cfg->fc_flags |= RTF_CACHE;
4128 
4129 	cfg->fc_flags |= (rtm->rtm_flags & RTNH_F_ONLINK);
4130 
4131 	cfg->fc_nlinfo.portid = NETLINK_CB(skb).portid;
4132 	cfg->fc_nlinfo.nlh = nlh;
4133 	cfg->fc_nlinfo.nl_net = sock_net(skb->sk);
4134 
4135 	if (tb[RTA_GATEWAY]) {
4136 		cfg->fc_gateway = nla_get_in6_addr(tb[RTA_GATEWAY]);
4137 		cfg->fc_flags |= RTF_GATEWAY;
4138 	}
4139 
4140 	if (tb[RTA_DST]) {
4141 		int plen = (rtm->rtm_dst_len + 7) >> 3;
4142 
4143 		if (nla_len(tb[RTA_DST]) < plen)
4144 			goto errout;
4145 
4146 		nla_memcpy(&cfg->fc_dst, tb[RTA_DST], plen);
4147 	}
4148 
4149 	if (tb[RTA_SRC]) {
4150 		int plen = (rtm->rtm_src_len + 7) >> 3;
4151 
4152 		if (nla_len(tb[RTA_SRC]) < plen)
4153 			goto errout;
4154 
4155 		nla_memcpy(&cfg->fc_src, tb[RTA_SRC], plen);
4156 	}
4157 
4158 	if (tb[RTA_PREFSRC])
4159 		cfg->fc_prefsrc = nla_get_in6_addr(tb[RTA_PREFSRC]);
4160 
4161 	if (tb[RTA_OIF])
4162 		cfg->fc_ifindex = nla_get_u32(tb[RTA_OIF]);
4163 
4164 	if (tb[RTA_PRIORITY])
4165 		cfg->fc_metric = nla_get_u32(tb[RTA_PRIORITY]);
4166 
4167 	if (tb[RTA_METRICS]) {
4168 		cfg->fc_mx = nla_data(tb[RTA_METRICS]);
4169 		cfg->fc_mx_len = nla_len(tb[RTA_METRICS]);
4170 	}
4171 
4172 	if (tb[RTA_TABLE])
4173 		cfg->fc_table = nla_get_u32(tb[RTA_TABLE]);
4174 
4175 	if (tb[RTA_MULTIPATH]) {
4176 		cfg->fc_mp = nla_data(tb[RTA_MULTIPATH]);
4177 		cfg->fc_mp_len = nla_len(tb[RTA_MULTIPATH]);
4178 
4179 		err = lwtunnel_valid_encap_type_attr(cfg->fc_mp,
4180 						     cfg->fc_mp_len, extack);
4181 		if (err < 0)
4182 			goto errout;
4183 	}
4184 
4185 	if (tb[RTA_PREF]) {
4186 		pref = nla_get_u8(tb[RTA_PREF]);
4187 		if (pref != ICMPV6_ROUTER_PREF_LOW &&
4188 		    pref != ICMPV6_ROUTER_PREF_HIGH)
4189 			pref = ICMPV6_ROUTER_PREF_MEDIUM;
4190 		cfg->fc_flags |= RTF_PREF(pref);
4191 	}
4192 
4193 	if (tb[RTA_ENCAP])
4194 		cfg->fc_encap = tb[RTA_ENCAP];
4195 
4196 	if (tb[RTA_ENCAP_TYPE]) {
4197 		cfg->fc_encap_type = nla_get_u16(tb[RTA_ENCAP_TYPE]);
4198 
4199 		err = lwtunnel_valid_encap_type(cfg->fc_encap_type, extack);
4200 		if (err < 0)
4201 			goto errout;
4202 	}
4203 
4204 	if (tb[RTA_EXPIRES]) {
4205 		unsigned long timeout = addrconf_timeout_fixup(nla_get_u32(tb[RTA_EXPIRES]), HZ);
4206 
4207 		if (addrconf_finite_timeout(timeout)) {
4208 			cfg->fc_expires = jiffies_to_clock_t(timeout * HZ);
4209 			cfg->fc_flags |= RTF_EXPIRES;
4210 		}
4211 	}
4212 
4213 	err = 0;
4214 errout:
4215 	return err;
4216 }
4217 
4218 struct rt6_nh {
4219 	struct fib6_info *fib6_info;
4220 	struct fib6_config r_cfg;
4221 	struct list_head next;
4222 };
4223 
4224 static void ip6_print_replace_route_err(struct list_head *rt6_nh_list)
4225 {
4226 	struct rt6_nh *nh;
4227 
4228 	list_for_each_entry(nh, rt6_nh_list, next) {
4229 		pr_warn("IPV6: multipath route replace failed (check consistency of installed routes): %pI6c nexthop %pI6c ifi %d\n",
4230 		        &nh->r_cfg.fc_dst, &nh->r_cfg.fc_gateway,
4231 		        nh->r_cfg.fc_ifindex);
4232 	}
4233 }
4234 
4235 static int ip6_route_info_append(struct net *net,
4236 				 struct list_head *rt6_nh_list,
4237 				 struct fib6_info *rt,
4238 				 struct fib6_config *r_cfg)
4239 {
4240 	struct rt6_nh *nh;
4241 	int err = -EEXIST;
4242 
4243 	list_for_each_entry(nh, rt6_nh_list, next) {
4244 		/* check if fib6_info already exists */
4245 		if (rt6_duplicate_nexthop(nh->fib6_info, rt))
4246 			return err;
4247 	}
4248 
4249 	nh = kzalloc(sizeof(*nh), GFP_KERNEL);
4250 	if (!nh)
4251 		return -ENOMEM;
4252 	nh->fib6_info = rt;
4253 	err = ip6_convert_metrics(net, rt, r_cfg);
4254 	if (err) {
4255 		kfree(nh);
4256 		return err;
4257 	}
4258 	memcpy(&nh->r_cfg, r_cfg, sizeof(*r_cfg));
4259 	list_add_tail(&nh->next, rt6_nh_list);
4260 
4261 	return 0;
4262 }
4263 
4264 static void ip6_route_mpath_notify(struct fib6_info *rt,
4265 				   struct fib6_info *rt_last,
4266 				   struct nl_info *info,
4267 				   __u16 nlflags)
4268 {
4269 	/* if this is an APPEND route, then rt points to the first route
4270 	 * inserted and rt_last points to last route inserted. Userspace
4271 	 * wants a consistent dump of the route which starts at the first
4272 	 * nexthop. Since sibling routes are always added at the end of
4273 	 * the list, find the first sibling of the last route appended
4274 	 */
4275 	if ((nlflags & NLM_F_APPEND) && rt_last && rt_last->fib6_nsiblings) {
4276 		rt = list_first_entry(&rt_last->fib6_siblings,
4277 				      struct fib6_info,
4278 				      fib6_siblings);
4279 	}
4280 
4281 	if (rt)
4282 		inet6_rt_notify(RTM_NEWROUTE, rt, info, nlflags);
4283 }
4284 
4285 static int ip6_route_multipath_add(struct fib6_config *cfg,
4286 				   struct netlink_ext_ack *extack)
4287 {
4288 	struct fib6_info *rt_notif = NULL, *rt_last = NULL;
4289 	struct nl_info *info = &cfg->fc_nlinfo;
4290 	struct fib6_config r_cfg;
4291 	struct rtnexthop *rtnh;
4292 	struct fib6_info *rt;
4293 	struct rt6_nh *err_nh;
4294 	struct rt6_nh *nh, *nh_safe;
4295 	__u16 nlflags;
4296 	int remaining;
4297 	int attrlen;
4298 	int err = 1;
4299 	int nhn = 0;
4300 	int replace = (cfg->fc_nlinfo.nlh &&
4301 		       (cfg->fc_nlinfo.nlh->nlmsg_flags & NLM_F_REPLACE));
4302 	LIST_HEAD(rt6_nh_list);
4303 
4304 	nlflags = replace ? NLM_F_REPLACE : NLM_F_CREATE;
4305 	if (info->nlh && info->nlh->nlmsg_flags & NLM_F_APPEND)
4306 		nlflags |= NLM_F_APPEND;
4307 
4308 	remaining = cfg->fc_mp_len;
4309 	rtnh = (struct rtnexthop *)cfg->fc_mp;
4310 
4311 	/* Parse a Multipath Entry and build a list (rt6_nh_list) of
4312 	 * fib6_info structs per nexthop
4313 	 */
4314 	while (rtnh_ok(rtnh, remaining)) {
4315 		memcpy(&r_cfg, cfg, sizeof(*cfg));
4316 		if (rtnh->rtnh_ifindex)
4317 			r_cfg.fc_ifindex = rtnh->rtnh_ifindex;
4318 
4319 		attrlen = rtnh_attrlen(rtnh);
4320 		if (attrlen > 0) {
4321 			struct nlattr *nla, *attrs = rtnh_attrs(rtnh);
4322 
4323 			nla = nla_find(attrs, attrlen, RTA_GATEWAY);
4324 			if (nla) {
4325 				r_cfg.fc_gateway = nla_get_in6_addr(nla);
4326 				r_cfg.fc_flags |= RTF_GATEWAY;
4327 			}
4328 			r_cfg.fc_encap = nla_find(attrs, attrlen, RTA_ENCAP);
4329 			nla = nla_find(attrs, attrlen, RTA_ENCAP_TYPE);
4330 			if (nla)
4331 				r_cfg.fc_encap_type = nla_get_u16(nla);
4332 		}
4333 
4334 		r_cfg.fc_flags |= (rtnh->rtnh_flags & RTNH_F_ONLINK);
4335 		rt = ip6_route_info_create(&r_cfg, GFP_KERNEL, extack);
4336 		if (IS_ERR(rt)) {
4337 			err = PTR_ERR(rt);
4338 			rt = NULL;
4339 			goto cleanup;
4340 		}
4341 
4342 		rt->fib6_nh.nh_weight = rtnh->rtnh_hops + 1;
4343 
4344 		err = ip6_route_info_append(info->nl_net, &rt6_nh_list,
4345 					    rt, &r_cfg);
4346 		if (err) {
4347 			fib6_info_release(rt);
4348 			goto cleanup;
4349 		}
4350 
4351 		rtnh = rtnh_next(rtnh, &remaining);
4352 	}
4353 
4354 	/* for add and replace send one notification with all nexthops.
4355 	 * Skip the notification in fib6_add_rt2node and send one with
4356 	 * the full route when done
4357 	 */
4358 	info->skip_notify = 1;
4359 
4360 	err_nh = NULL;
4361 	list_for_each_entry(nh, &rt6_nh_list, next) {
4362 		rt_last = nh->fib6_info;
4363 		err = __ip6_ins_rt(nh->fib6_info, info, extack);
4364 		fib6_info_release(nh->fib6_info);
4365 
4366 		/* save reference to first route for notification */
4367 		if (!rt_notif && !err)
4368 			rt_notif = nh->fib6_info;
4369 
4370 		/* nh->fib6_info is used or freed at this point, reset to NULL*/
4371 		nh->fib6_info = NULL;
4372 		if (err) {
4373 			if (replace && nhn)
4374 				ip6_print_replace_route_err(&rt6_nh_list);
4375 			err_nh = nh;
4376 			goto add_errout;
4377 		}
4378 
4379 		/* Because each route is added like a single route we remove
4380 		 * these flags after the first nexthop: if there is a collision,
4381 		 * we have already failed to add the first nexthop:
4382 		 * fib6_add_rt2node() has rejected it; when replacing, old
4383 		 * nexthops have been replaced by first new, the rest should
4384 		 * be added to it.
4385 		 */
4386 		cfg->fc_nlinfo.nlh->nlmsg_flags &= ~(NLM_F_EXCL |
4387 						     NLM_F_REPLACE);
4388 		cfg->fc_nlinfo.nlh->nlmsg_flags |= NLM_F_APPEND;
4389 		nhn++;
4390 	}
4391 
4392 	/* success ... tell user about new route */
4393 	ip6_route_mpath_notify(rt_notif, rt_last, info, nlflags);
4394 	goto cleanup;
4395 
4396 add_errout:
4397 	/* send notification for routes that were added so that
4398 	 * the delete notifications sent by ip6_route_del are
4399 	 * coherent
4400 	 */
4401 	if (rt_notif)
4402 		ip6_route_mpath_notify(rt_notif, rt_last, info, nlflags);
4403 
4404 	/* Delete routes that were already added */
4405 	list_for_each_entry(nh, &rt6_nh_list, next) {
4406 		if (err_nh == nh)
4407 			break;
4408 		ip6_route_del(&nh->r_cfg, extack);
4409 	}
4410 
4411 cleanup:
4412 	list_for_each_entry_safe(nh, nh_safe, &rt6_nh_list, next) {
4413 		if (nh->fib6_info)
4414 			fib6_info_release(nh->fib6_info);
4415 		list_del(&nh->next);
4416 		kfree(nh);
4417 	}
4418 
4419 	return err;
4420 }
4421 
4422 static int ip6_route_multipath_del(struct fib6_config *cfg,
4423 				   struct netlink_ext_ack *extack)
4424 {
4425 	struct fib6_config r_cfg;
4426 	struct rtnexthop *rtnh;
4427 	int remaining;
4428 	int attrlen;
4429 	int err = 1, last_err = 0;
4430 
4431 	remaining = cfg->fc_mp_len;
4432 	rtnh = (struct rtnexthop *)cfg->fc_mp;
4433 
4434 	/* Parse a Multipath Entry */
4435 	while (rtnh_ok(rtnh, remaining)) {
4436 		memcpy(&r_cfg, cfg, sizeof(*cfg));
4437 		if (rtnh->rtnh_ifindex)
4438 			r_cfg.fc_ifindex = rtnh->rtnh_ifindex;
4439 
4440 		attrlen = rtnh_attrlen(rtnh);
4441 		if (attrlen > 0) {
4442 			struct nlattr *nla, *attrs = rtnh_attrs(rtnh);
4443 
4444 			nla = nla_find(attrs, attrlen, RTA_GATEWAY);
4445 			if (nla) {
4446 				nla_memcpy(&r_cfg.fc_gateway, nla, 16);
4447 				r_cfg.fc_flags |= RTF_GATEWAY;
4448 			}
4449 		}
4450 		err = ip6_route_del(&r_cfg, extack);
4451 		if (err)
4452 			last_err = err;
4453 
4454 		rtnh = rtnh_next(rtnh, &remaining);
4455 	}
4456 
4457 	return last_err;
4458 }
4459 
4460 static int inet6_rtm_delroute(struct sk_buff *skb, struct nlmsghdr *nlh,
4461 			      struct netlink_ext_ack *extack)
4462 {
4463 	struct fib6_config cfg;
4464 	int err;
4465 
4466 	err = rtm_to_fib6_config(skb, nlh, &cfg, extack);
4467 	if (err < 0)
4468 		return err;
4469 
4470 	if (cfg.fc_mp)
4471 		return ip6_route_multipath_del(&cfg, extack);
4472 	else {
4473 		cfg.fc_delete_all_nh = 1;
4474 		return ip6_route_del(&cfg, extack);
4475 	}
4476 }
4477 
4478 static int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr *nlh,
4479 			      struct netlink_ext_ack *extack)
4480 {
4481 	struct fib6_config cfg;
4482 	int err;
4483 
4484 	err = rtm_to_fib6_config(skb, nlh, &cfg, extack);
4485 	if (err < 0)
4486 		return err;
4487 
4488 	if (cfg.fc_mp)
4489 		return ip6_route_multipath_add(&cfg, extack);
4490 	else
4491 		return ip6_route_add(&cfg, GFP_KERNEL, extack);
4492 }
4493 
4494 static size_t rt6_nlmsg_size(struct fib6_info *rt)
4495 {
4496 	int nexthop_len = 0;
4497 
4498 	if (rt->fib6_nsiblings) {
4499 		nexthop_len = nla_total_size(0)	 /* RTA_MULTIPATH */
4500 			    + NLA_ALIGN(sizeof(struct rtnexthop))
4501 			    + nla_total_size(16) /* RTA_GATEWAY */
4502 			    + lwtunnel_get_encap_size(rt->fib6_nh.nh_lwtstate);
4503 
4504 		nexthop_len *= rt->fib6_nsiblings;
4505 	}
4506 
4507 	return NLMSG_ALIGN(sizeof(struct rtmsg))
4508 	       + nla_total_size(16) /* RTA_SRC */
4509 	       + nla_total_size(16) /* RTA_DST */
4510 	       + nla_total_size(16) /* RTA_GATEWAY */
4511 	       + nla_total_size(16) /* RTA_PREFSRC */
4512 	       + nla_total_size(4) /* RTA_TABLE */
4513 	       + nla_total_size(4) /* RTA_IIF */
4514 	       + nla_total_size(4) /* RTA_OIF */
4515 	       + nla_total_size(4) /* RTA_PRIORITY */
4516 	       + RTAX_MAX * nla_total_size(4) /* RTA_METRICS */
4517 	       + nla_total_size(sizeof(struct rta_cacheinfo))
4518 	       + nla_total_size(TCP_CA_NAME_MAX) /* RTAX_CC_ALGO */
4519 	       + nla_total_size(1) /* RTA_PREF */
4520 	       + lwtunnel_get_encap_size(rt->fib6_nh.nh_lwtstate)
4521 	       + nexthop_len;
4522 }
4523 
4524 static int rt6_nexthop_info(struct sk_buff *skb, struct fib6_info *rt,
4525 			    unsigned int *flags, bool skip_oif)
4526 {
4527 	if (rt->fib6_nh.nh_flags & RTNH_F_DEAD)
4528 		*flags |= RTNH_F_DEAD;
4529 
4530 	if (rt->fib6_nh.nh_flags & RTNH_F_LINKDOWN) {
4531 		*flags |= RTNH_F_LINKDOWN;
4532 
4533 		rcu_read_lock();
4534 		if (fib6_ignore_linkdown(rt))
4535 			*flags |= RTNH_F_DEAD;
4536 		rcu_read_unlock();
4537 	}
4538 
4539 	if (rt->fib6_flags & RTF_GATEWAY) {
4540 		if (nla_put_in6_addr(skb, RTA_GATEWAY, &rt->fib6_nh.nh_gw) < 0)
4541 			goto nla_put_failure;
4542 	}
4543 
4544 	*flags |= (rt->fib6_nh.nh_flags & RTNH_F_ONLINK);
4545 	if (rt->fib6_nh.nh_flags & RTNH_F_OFFLOAD)
4546 		*flags |= RTNH_F_OFFLOAD;
4547 
4548 	/* not needed for multipath encoding b/c it has a rtnexthop struct */
4549 	if (!skip_oif && rt->fib6_nh.nh_dev &&
4550 	    nla_put_u32(skb, RTA_OIF, rt->fib6_nh.nh_dev->ifindex))
4551 		goto nla_put_failure;
4552 
4553 	if (rt->fib6_nh.nh_lwtstate &&
4554 	    lwtunnel_fill_encap(skb, rt->fib6_nh.nh_lwtstate) < 0)
4555 		goto nla_put_failure;
4556 
4557 	return 0;
4558 
4559 nla_put_failure:
4560 	return -EMSGSIZE;
4561 }
4562 
4563 /* add multipath next hop */
4564 static int rt6_add_nexthop(struct sk_buff *skb, struct fib6_info *rt)
4565 {
4566 	const struct net_device *dev = rt->fib6_nh.nh_dev;
4567 	struct rtnexthop *rtnh;
4568 	unsigned int flags = 0;
4569 
4570 	rtnh = nla_reserve_nohdr(skb, sizeof(*rtnh));
4571 	if (!rtnh)
4572 		goto nla_put_failure;
4573 
4574 	rtnh->rtnh_hops = rt->fib6_nh.nh_weight - 1;
4575 	rtnh->rtnh_ifindex = dev ? dev->ifindex : 0;
4576 
4577 	if (rt6_nexthop_info(skb, rt, &flags, true) < 0)
4578 		goto nla_put_failure;
4579 
4580 	rtnh->rtnh_flags = flags;
4581 
4582 	/* length of rtnetlink header + attributes */
4583 	rtnh->rtnh_len = nlmsg_get_pos(skb) - (void *)rtnh;
4584 
4585 	return 0;
4586 
4587 nla_put_failure:
4588 	return -EMSGSIZE;
4589 }
4590 
4591 static int rt6_fill_node(struct net *net, struct sk_buff *skb,
4592 			 struct fib6_info *rt, struct dst_entry *dst,
4593 			 struct in6_addr *dest, struct in6_addr *src,
4594 			 int iif, int type, u32 portid, u32 seq,
4595 			 unsigned int flags)
4596 {
4597 	struct rtmsg *rtm;
4598 	struct nlmsghdr *nlh;
4599 	long expires = 0;
4600 	u32 *pmetrics;
4601 	u32 table;
4602 
4603 	nlh = nlmsg_put(skb, portid, seq, type, sizeof(*rtm), flags);
4604 	if (!nlh)
4605 		return -EMSGSIZE;
4606 
4607 	rtm = nlmsg_data(nlh);
4608 	rtm->rtm_family = AF_INET6;
4609 	rtm->rtm_dst_len = rt->fib6_dst.plen;
4610 	rtm->rtm_src_len = rt->fib6_src.plen;
4611 	rtm->rtm_tos = 0;
4612 	if (rt->fib6_table)
4613 		table = rt->fib6_table->tb6_id;
4614 	else
4615 		table = RT6_TABLE_UNSPEC;
4616 	rtm->rtm_table = table;
4617 	if (nla_put_u32(skb, RTA_TABLE, table))
4618 		goto nla_put_failure;
4619 
4620 	rtm->rtm_type = rt->fib6_type;
4621 	rtm->rtm_flags = 0;
4622 	rtm->rtm_scope = RT_SCOPE_UNIVERSE;
4623 	rtm->rtm_protocol = rt->fib6_protocol;
4624 
4625 	if (rt->fib6_flags & RTF_CACHE)
4626 		rtm->rtm_flags |= RTM_F_CLONED;
4627 
4628 	if (dest) {
4629 		if (nla_put_in6_addr(skb, RTA_DST, dest))
4630 			goto nla_put_failure;
4631 		rtm->rtm_dst_len = 128;
4632 	} else if (rtm->rtm_dst_len)
4633 		if (nla_put_in6_addr(skb, RTA_DST, &rt->fib6_dst.addr))
4634 			goto nla_put_failure;
4635 #ifdef CONFIG_IPV6_SUBTREES
4636 	if (src) {
4637 		if (nla_put_in6_addr(skb, RTA_SRC, src))
4638 			goto nla_put_failure;
4639 		rtm->rtm_src_len = 128;
4640 	} else if (rtm->rtm_src_len &&
4641 		   nla_put_in6_addr(skb, RTA_SRC, &rt->fib6_src.addr))
4642 		goto nla_put_failure;
4643 #endif
4644 	if (iif) {
4645 #ifdef CONFIG_IPV6_MROUTE
4646 		if (ipv6_addr_is_multicast(&rt->fib6_dst.addr)) {
4647 			int err = ip6mr_get_route(net, skb, rtm, portid);
4648 
4649 			if (err == 0)
4650 				return 0;
4651 			if (err < 0)
4652 				goto nla_put_failure;
4653 		} else
4654 #endif
4655 			if (nla_put_u32(skb, RTA_IIF, iif))
4656 				goto nla_put_failure;
4657 	} else if (dest) {
4658 		struct in6_addr saddr_buf;
4659 		if (ip6_route_get_saddr(net, rt, dest, 0, &saddr_buf) == 0 &&
4660 		    nla_put_in6_addr(skb, RTA_PREFSRC, &saddr_buf))
4661 			goto nla_put_failure;
4662 	}
4663 
4664 	if (rt->fib6_prefsrc.plen) {
4665 		struct in6_addr saddr_buf;
4666 		saddr_buf = rt->fib6_prefsrc.addr;
4667 		if (nla_put_in6_addr(skb, RTA_PREFSRC, &saddr_buf))
4668 			goto nla_put_failure;
4669 	}
4670 
4671 	pmetrics = dst ? dst_metrics_ptr(dst) : rt->fib6_metrics->metrics;
4672 	if (rtnetlink_put_metrics(skb, pmetrics) < 0)
4673 		goto nla_put_failure;
4674 
4675 	if (nla_put_u32(skb, RTA_PRIORITY, rt->fib6_metric))
4676 		goto nla_put_failure;
4677 
4678 	/* For multipath routes, walk the siblings list and add
4679 	 * each as a nexthop within RTA_MULTIPATH.
4680 	 */
4681 	if (rt->fib6_nsiblings) {
4682 		struct fib6_info *sibling, *next_sibling;
4683 		struct nlattr *mp;
4684 
4685 		mp = nla_nest_start(skb, RTA_MULTIPATH);
4686 		if (!mp)
4687 			goto nla_put_failure;
4688 
4689 		if (rt6_add_nexthop(skb, rt) < 0)
4690 			goto nla_put_failure;
4691 
4692 		list_for_each_entry_safe(sibling, next_sibling,
4693 					 &rt->fib6_siblings, fib6_siblings) {
4694 			if (rt6_add_nexthop(skb, sibling) < 0)
4695 				goto nla_put_failure;
4696 		}
4697 
4698 		nla_nest_end(skb, mp);
4699 	} else {
4700 		if (rt6_nexthop_info(skb, rt, &rtm->rtm_flags, false) < 0)
4701 			goto nla_put_failure;
4702 	}
4703 
4704 	if (rt->fib6_flags & RTF_EXPIRES) {
4705 		expires = dst ? dst->expires : rt->expires;
4706 		expires -= jiffies;
4707 	}
4708 
4709 	if (rtnl_put_cacheinfo(skb, dst, 0, expires, dst ? dst->error : 0) < 0)
4710 		goto nla_put_failure;
4711 
4712 	if (nla_put_u8(skb, RTA_PREF, IPV6_EXTRACT_PREF(rt->fib6_flags)))
4713 		goto nla_put_failure;
4714 
4715 
4716 	nlmsg_end(skb, nlh);
4717 	return 0;
4718 
4719 nla_put_failure:
4720 	nlmsg_cancel(skb, nlh);
4721 	return -EMSGSIZE;
4722 }
4723 
4724 int rt6_dump_route(struct fib6_info *rt, void *p_arg)
4725 {
4726 	struct rt6_rtnl_dump_arg *arg = (struct rt6_rtnl_dump_arg *) p_arg;
4727 	struct net *net = arg->net;
4728 
4729 	if (rt == net->ipv6.fib6_null_entry)
4730 		return 0;
4731 
4732 	if (nlmsg_len(arg->cb->nlh) >= sizeof(struct rtmsg)) {
4733 		struct rtmsg *rtm = nlmsg_data(arg->cb->nlh);
4734 
4735 		/* user wants prefix routes only */
4736 		if (rtm->rtm_flags & RTM_F_PREFIX &&
4737 		    !(rt->fib6_flags & RTF_PREFIX_RT)) {
4738 			/* success since this is not a prefix route */
4739 			return 1;
4740 		}
4741 	}
4742 
4743 	return rt6_fill_node(net, arg->skb, rt, NULL, NULL, NULL, 0,
4744 			     RTM_NEWROUTE, NETLINK_CB(arg->cb->skb).portid,
4745 			     arg->cb->nlh->nlmsg_seq, NLM_F_MULTI);
4746 }
4747 
4748 static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh,
4749 			      struct netlink_ext_ack *extack)
4750 {
4751 	struct net *net = sock_net(in_skb->sk);
4752 	struct nlattr *tb[RTA_MAX+1];
4753 	int err, iif = 0, oif = 0;
4754 	struct fib6_info *from;
4755 	struct dst_entry *dst;
4756 	struct rt6_info *rt;
4757 	struct sk_buff *skb;
4758 	struct rtmsg *rtm;
4759 	struct flowi6 fl6;
4760 	bool fibmatch;
4761 
4762 	err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy,
4763 			  extack);
4764 	if (err < 0)
4765 		goto errout;
4766 
4767 	err = -EINVAL;
4768 	memset(&fl6, 0, sizeof(fl6));
4769 	rtm = nlmsg_data(nlh);
4770 	fl6.flowlabel = ip6_make_flowinfo(rtm->rtm_tos, 0);
4771 	fibmatch = !!(rtm->rtm_flags & RTM_F_FIB_MATCH);
4772 
4773 	if (tb[RTA_SRC]) {
4774 		if (nla_len(tb[RTA_SRC]) < sizeof(struct in6_addr))
4775 			goto errout;
4776 
4777 		fl6.saddr = *(struct in6_addr *)nla_data(tb[RTA_SRC]);
4778 	}
4779 
4780 	if (tb[RTA_DST]) {
4781 		if (nla_len(tb[RTA_DST]) < sizeof(struct in6_addr))
4782 			goto errout;
4783 
4784 		fl6.daddr = *(struct in6_addr *)nla_data(tb[RTA_DST]);
4785 	}
4786 
4787 	if (tb[RTA_IIF])
4788 		iif = nla_get_u32(tb[RTA_IIF]);
4789 
4790 	if (tb[RTA_OIF])
4791 		oif = nla_get_u32(tb[RTA_OIF]);
4792 
4793 	if (tb[RTA_MARK])
4794 		fl6.flowi6_mark = nla_get_u32(tb[RTA_MARK]);
4795 
4796 	if (tb[RTA_UID])
4797 		fl6.flowi6_uid = make_kuid(current_user_ns(),
4798 					   nla_get_u32(tb[RTA_UID]));
4799 	else
4800 		fl6.flowi6_uid = iif ? INVALID_UID : current_uid();
4801 
4802 	if (tb[RTA_SPORT])
4803 		fl6.fl6_sport = nla_get_be16(tb[RTA_SPORT]);
4804 
4805 	if (tb[RTA_DPORT])
4806 		fl6.fl6_dport = nla_get_be16(tb[RTA_DPORT]);
4807 
4808 	if (tb[RTA_IP_PROTO]) {
4809 		err = rtm_getroute_parse_ip_proto(tb[RTA_IP_PROTO],
4810 						  &fl6.flowi6_proto, extack);
4811 		if (err)
4812 			goto errout;
4813 	}
4814 
4815 	if (iif) {
4816 		struct net_device *dev;
4817 		int flags = 0;
4818 
4819 		rcu_read_lock();
4820 
4821 		dev = dev_get_by_index_rcu(net, iif);
4822 		if (!dev) {
4823 			rcu_read_unlock();
4824 			err = -ENODEV;
4825 			goto errout;
4826 		}
4827 
4828 		fl6.flowi6_iif = iif;
4829 
4830 		if (!ipv6_addr_any(&fl6.saddr))
4831 			flags |= RT6_LOOKUP_F_HAS_SADDR;
4832 
4833 		dst = ip6_route_input_lookup(net, dev, &fl6, NULL, flags);
4834 
4835 		rcu_read_unlock();
4836 	} else {
4837 		fl6.flowi6_oif = oif;
4838 
4839 		dst = ip6_route_output(net, NULL, &fl6);
4840 	}
4841 
4842 
4843 	rt = container_of(dst, struct rt6_info, dst);
4844 	if (rt->dst.error) {
4845 		err = rt->dst.error;
4846 		ip6_rt_put(rt);
4847 		goto errout;
4848 	}
4849 
4850 	if (rt == net->ipv6.ip6_null_entry) {
4851 		err = rt->dst.error;
4852 		ip6_rt_put(rt);
4853 		goto errout;
4854 	}
4855 
4856 	skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
4857 	if (!skb) {
4858 		ip6_rt_put(rt);
4859 		err = -ENOBUFS;
4860 		goto errout;
4861 	}
4862 
4863 	skb_dst_set(skb, &rt->dst);
4864 
4865 	rcu_read_lock();
4866 	from = rcu_dereference(rt->from);
4867 
4868 	if (fibmatch)
4869 		err = rt6_fill_node(net, skb, from, NULL, NULL, NULL, iif,
4870 				    RTM_NEWROUTE, NETLINK_CB(in_skb).portid,
4871 				    nlh->nlmsg_seq, 0);
4872 	else
4873 		err = rt6_fill_node(net, skb, from, dst, &fl6.daddr,
4874 				    &fl6.saddr, iif, RTM_NEWROUTE,
4875 				    NETLINK_CB(in_skb).portid, nlh->nlmsg_seq,
4876 				    0);
4877 	rcu_read_unlock();
4878 
4879 	if (err < 0) {
4880 		kfree_skb(skb);
4881 		goto errout;
4882 	}
4883 
4884 	err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).portid);
4885 errout:
4886 	return err;
4887 }
4888 
4889 void inet6_rt_notify(int event, struct fib6_info *rt, struct nl_info *info,
4890 		     unsigned int nlm_flags)
4891 {
4892 	struct sk_buff *skb;
4893 	struct net *net = info->nl_net;
4894 	u32 seq;
4895 	int err;
4896 
4897 	err = -ENOBUFS;
4898 	seq = info->nlh ? info->nlh->nlmsg_seq : 0;
4899 
4900 	skb = nlmsg_new(rt6_nlmsg_size(rt), gfp_any());
4901 	if (!skb)
4902 		goto errout;
4903 
4904 	err = rt6_fill_node(net, skb, rt, NULL, NULL, NULL, 0,
4905 			    event, info->portid, seq, nlm_flags);
4906 	if (err < 0) {
4907 		/* -EMSGSIZE implies BUG in rt6_nlmsg_size() */
4908 		WARN_ON(err == -EMSGSIZE);
4909 		kfree_skb(skb);
4910 		goto errout;
4911 	}
4912 	rtnl_notify(skb, net, info->portid, RTNLGRP_IPV6_ROUTE,
4913 		    info->nlh, gfp_any());
4914 	return;
4915 errout:
4916 	if (err < 0)
4917 		rtnl_set_sk_err(net, RTNLGRP_IPV6_ROUTE, err);
4918 }
4919 
4920 static int ip6_route_dev_notify(struct notifier_block *this,
4921 				unsigned long event, void *ptr)
4922 {
4923 	struct net_device *dev = netdev_notifier_info_to_dev(ptr);
4924 	struct net *net = dev_net(dev);
4925 
4926 	if (!(dev->flags & IFF_LOOPBACK))
4927 		return NOTIFY_OK;
4928 
4929 	if (event == NETDEV_REGISTER) {
4930 		net->ipv6.fib6_null_entry->fib6_nh.nh_dev = dev;
4931 		net->ipv6.ip6_null_entry->dst.dev = dev;
4932 		net->ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(dev);
4933 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
4934 		net->ipv6.ip6_prohibit_entry->dst.dev = dev;
4935 		net->ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(dev);
4936 		net->ipv6.ip6_blk_hole_entry->dst.dev = dev;
4937 		net->ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(dev);
4938 #endif
4939 	 } else if (event == NETDEV_UNREGISTER &&
4940 		    dev->reg_state != NETREG_UNREGISTERED) {
4941 		/* NETDEV_UNREGISTER could be fired for multiple times by
4942 		 * netdev_wait_allrefs(). Make sure we only call this once.
4943 		 */
4944 		in6_dev_put_clear(&net->ipv6.ip6_null_entry->rt6i_idev);
4945 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
4946 		in6_dev_put_clear(&net->ipv6.ip6_prohibit_entry->rt6i_idev);
4947 		in6_dev_put_clear(&net->ipv6.ip6_blk_hole_entry->rt6i_idev);
4948 #endif
4949 	}
4950 
4951 	return NOTIFY_OK;
4952 }
4953 
4954 /*
4955  *	/proc
4956  */
4957 
4958 #ifdef CONFIG_PROC_FS
4959 
4960 static const struct file_operations ipv6_route_proc_fops = {
4961 	.open		= ipv6_route_open,
4962 	.read		= seq_read,
4963 	.llseek		= seq_lseek,
4964 	.release	= seq_release_net,
4965 };
4966 
4967 static int rt6_stats_seq_show(struct seq_file *seq, void *v)
4968 {
4969 	struct net *net = (struct net *)seq->private;
4970 	seq_printf(seq, "%04x %04x %04x %04x %04x %04x %04x\n",
4971 		   net->ipv6.rt6_stats->fib_nodes,
4972 		   net->ipv6.rt6_stats->fib_route_nodes,
4973 		   atomic_read(&net->ipv6.rt6_stats->fib_rt_alloc),
4974 		   net->ipv6.rt6_stats->fib_rt_entries,
4975 		   net->ipv6.rt6_stats->fib_rt_cache,
4976 		   dst_entries_get_slow(&net->ipv6.ip6_dst_ops),
4977 		   net->ipv6.rt6_stats->fib_discarded_routes);
4978 
4979 	return 0;
4980 }
4981 
4982 static int rt6_stats_seq_open(struct inode *inode, struct file *file)
4983 {
4984 	return single_open_net(inode, file, rt6_stats_seq_show);
4985 }
4986 
4987 static const struct file_operations rt6_stats_seq_fops = {
4988 	.open	 = rt6_stats_seq_open,
4989 	.read	 = seq_read,
4990 	.llseek	 = seq_lseek,
4991 	.release = single_release_net,
4992 };
4993 #endif	/* CONFIG_PROC_FS */
4994 
4995 #ifdef CONFIG_SYSCTL
4996 
4997 static
4998 int ipv6_sysctl_rtcache_flush(struct ctl_table *ctl, int write,
4999 			      void __user *buffer, size_t *lenp, loff_t *ppos)
5000 {
5001 	struct net *net;
5002 	int delay;
5003 	if (!write)
5004 		return -EINVAL;
5005 
5006 	net = (struct net *)ctl->extra1;
5007 	delay = net->ipv6.sysctl.flush_delay;
5008 	proc_dointvec(ctl, write, buffer, lenp, ppos);
5009 	fib6_run_gc(delay <= 0 ? 0 : (unsigned long)delay, net, delay > 0);
5010 	return 0;
5011 }
5012 
5013 struct ctl_table ipv6_route_table_template[] = {
5014 	{
5015 		.procname	=	"flush",
5016 		.data		=	&init_net.ipv6.sysctl.flush_delay,
5017 		.maxlen		=	sizeof(int),
5018 		.mode		=	0200,
5019 		.proc_handler	=	ipv6_sysctl_rtcache_flush
5020 	},
5021 	{
5022 		.procname	=	"gc_thresh",
5023 		.data		=	&ip6_dst_ops_template.gc_thresh,
5024 		.maxlen		=	sizeof(int),
5025 		.mode		=	0644,
5026 		.proc_handler	=	proc_dointvec,
5027 	},
5028 	{
5029 		.procname	=	"max_size",
5030 		.data		=	&init_net.ipv6.sysctl.ip6_rt_max_size,
5031 		.maxlen		=	sizeof(int),
5032 		.mode		=	0644,
5033 		.proc_handler	=	proc_dointvec,
5034 	},
5035 	{
5036 		.procname	=	"gc_min_interval",
5037 		.data		=	&init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
5038 		.maxlen		=	sizeof(int),
5039 		.mode		=	0644,
5040 		.proc_handler	=	proc_dointvec_jiffies,
5041 	},
5042 	{
5043 		.procname	=	"gc_timeout",
5044 		.data		=	&init_net.ipv6.sysctl.ip6_rt_gc_timeout,
5045 		.maxlen		=	sizeof(int),
5046 		.mode		=	0644,
5047 		.proc_handler	=	proc_dointvec_jiffies,
5048 	},
5049 	{
5050 		.procname	=	"gc_interval",
5051 		.data		=	&init_net.ipv6.sysctl.ip6_rt_gc_interval,
5052 		.maxlen		=	sizeof(int),
5053 		.mode		=	0644,
5054 		.proc_handler	=	proc_dointvec_jiffies,
5055 	},
5056 	{
5057 		.procname	=	"gc_elasticity",
5058 		.data		=	&init_net.ipv6.sysctl.ip6_rt_gc_elasticity,
5059 		.maxlen		=	sizeof(int),
5060 		.mode		=	0644,
5061 		.proc_handler	=	proc_dointvec,
5062 	},
5063 	{
5064 		.procname	=	"mtu_expires",
5065 		.data		=	&init_net.ipv6.sysctl.ip6_rt_mtu_expires,
5066 		.maxlen		=	sizeof(int),
5067 		.mode		=	0644,
5068 		.proc_handler	=	proc_dointvec_jiffies,
5069 	},
5070 	{
5071 		.procname	=	"min_adv_mss",
5072 		.data		=	&init_net.ipv6.sysctl.ip6_rt_min_advmss,
5073 		.maxlen		=	sizeof(int),
5074 		.mode		=	0644,
5075 		.proc_handler	=	proc_dointvec,
5076 	},
5077 	{
5078 		.procname	=	"gc_min_interval_ms",
5079 		.data		=	&init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
5080 		.maxlen		=	sizeof(int),
5081 		.mode		=	0644,
5082 		.proc_handler	=	proc_dointvec_ms_jiffies,
5083 	},
5084 	{ }
5085 };
5086 
5087 struct ctl_table * __net_init ipv6_route_sysctl_init(struct net *net)
5088 {
5089 	struct ctl_table *table;
5090 
5091 	table = kmemdup(ipv6_route_table_template,
5092 			sizeof(ipv6_route_table_template),
5093 			GFP_KERNEL);
5094 
5095 	if (table) {
5096 		table[0].data = &net->ipv6.sysctl.flush_delay;
5097 		table[0].extra1 = net;
5098 		table[1].data = &net->ipv6.ip6_dst_ops.gc_thresh;
5099 		table[2].data = &net->ipv6.sysctl.ip6_rt_max_size;
5100 		table[3].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
5101 		table[4].data = &net->ipv6.sysctl.ip6_rt_gc_timeout;
5102 		table[5].data = &net->ipv6.sysctl.ip6_rt_gc_interval;
5103 		table[6].data = &net->ipv6.sysctl.ip6_rt_gc_elasticity;
5104 		table[7].data = &net->ipv6.sysctl.ip6_rt_mtu_expires;
5105 		table[8].data = &net->ipv6.sysctl.ip6_rt_min_advmss;
5106 		table[9].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
5107 
5108 		/* Don't export sysctls to unprivileged users */
5109 		if (net->user_ns != &init_user_ns)
5110 			table[0].procname = NULL;
5111 	}
5112 
5113 	return table;
5114 }
5115 #endif
5116 
5117 static int __net_init ip6_route_net_init(struct net *net)
5118 {
5119 	int ret = -ENOMEM;
5120 
5121 	memcpy(&net->ipv6.ip6_dst_ops, &ip6_dst_ops_template,
5122 	       sizeof(net->ipv6.ip6_dst_ops));
5123 
5124 	if (dst_entries_init(&net->ipv6.ip6_dst_ops) < 0)
5125 		goto out_ip6_dst_ops;
5126 
5127 	net->ipv6.fib6_null_entry = kmemdup(&fib6_null_entry_template,
5128 					    sizeof(*net->ipv6.fib6_null_entry),
5129 					    GFP_KERNEL);
5130 	if (!net->ipv6.fib6_null_entry)
5131 		goto out_ip6_dst_entries;
5132 
5133 	net->ipv6.ip6_null_entry = kmemdup(&ip6_null_entry_template,
5134 					   sizeof(*net->ipv6.ip6_null_entry),
5135 					   GFP_KERNEL);
5136 	if (!net->ipv6.ip6_null_entry)
5137 		goto out_fib6_null_entry;
5138 	net->ipv6.ip6_null_entry->dst.ops = &net->ipv6.ip6_dst_ops;
5139 	dst_init_metrics(&net->ipv6.ip6_null_entry->dst,
5140 			 ip6_template_metrics, true);
5141 
5142 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
5143 	net->ipv6.fib6_has_custom_rules = false;
5144 	net->ipv6.ip6_prohibit_entry = kmemdup(&ip6_prohibit_entry_template,
5145 					       sizeof(*net->ipv6.ip6_prohibit_entry),
5146 					       GFP_KERNEL);
5147 	if (!net->ipv6.ip6_prohibit_entry)
5148 		goto out_ip6_null_entry;
5149 	net->ipv6.ip6_prohibit_entry->dst.ops = &net->ipv6.ip6_dst_ops;
5150 	dst_init_metrics(&net->ipv6.ip6_prohibit_entry->dst,
5151 			 ip6_template_metrics, true);
5152 
5153 	net->ipv6.ip6_blk_hole_entry = kmemdup(&ip6_blk_hole_entry_template,
5154 					       sizeof(*net->ipv6.ip6_blk_hole_entry),
5155 					       GFP_KERNEL);
5156 	if (!net->ipv6.ip6_blk_hole_entry)
5157 		goto out_ip6_prohibit_entry;
5158 	net->ipv6.ip6_blk_hole_entry->dst.ops = &net->ipv6.ip6_dst_ops;
5159 	dst_init_metrics(&net->ipv6.ip6_blk_hole_entry->dst,
5160 			 ip6_template_metrics, true);
5161 #endif
5162 
5163 	net->ipv6.sysctl.flush_delay = 0;
5164 	net->ipv6.sysctl.ip6_rt_max_size = 4096;
5165 	net->ipv6.sysctl.ip6_rt_gc_min_interval = HZ / 2;
5166 	net->ipv6.sysctl.ip6_rt_gc_timeout = 60*HZ;
5167 	net->ipv6.sysctl.ip6_rt_gc_interval = 30*HZ;
5168 	net->ipv6.sysctl.ip6_rt_gc_elasticity = 9;
5169 	net->ipv6.sysctl.ip6_rt_mtu_expires = 10*60*HZ;
5170 	net->ipv6.sysctl.ip6_rt_min_advmss = IPV6_MIN_MTU - 20 - 40;
5171 
5172 	net->ipv6.ip6_rt_gc_expire = 30*HZ;
5173 
5174 	ret = 0;
5175 out:
5176 	return ret;
5177 
5178 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
5179 out_ip6_prohibit_entry:
5180 	kfree(net->ipv6.ip6_prohibit_entry);
5181 out_ip6_null_entry:
5182 	kfree(net->ipv6.ip6_null_entry);
5183 #endif
5184 out_fib6_null_entry:
5185 	kfree(net->ipv6.fib6_null_entry);
5186 out_ip6_dst_entries:
5187 	dst_entries_destroy(&net->ipv6.ip6_dst_ops);
5188 out_ip6_dst_ops:
5189 	goto out;
5190 }
5191 
5192 static void __net_exit ip6_route_net_exit(struct net *net)
5193 {
5194 	kfree(net->ipv6.fib6_null_entry);
5195 	kfree(net->ipv6.ip6_null_entry);
5196 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
5197 	kfree(net->ipv6.ip6_prohibit_entry);
5198 	kfree(net->ipv6.ip6_blk_hole_entry);
5199 #endif
5200 	dst_entries_destroy(&net->ipv6.ip6_dst_ops);
5201 }
5202 
5203 static int __net_init ip6_route_net_init_late(struct net *net)
5204 {
5205 #ifdef CONFIG_PROC_FS
5206 	proc_create("ipv6_route", 0, net->proc_net, &ipv6_route_proc_fops);
5207 	proc_create("rt6_stats", 0444, net->proc_net, &rt6_stats_seq_fops);
5208 #endif
5209 	return 0;
5210 }
5211 
5212 static void __net_exit ip6_route_net_exit_late(struct net *net)
5213 {
5214 #ifdef CONFIG_PROC_FS
5215 	remove_proc_entry("ipv6_route", net->proc_net);
5216 	remove_proc_entry("rt6_stats", net->proc_net);
5217 #endif
5218 }
5219 
5220 static struct pernet_operations ip6_route_net_ops = {
5221 	.init = ip6_route_net_init,
5222 	.exit = ip6_route_net_exit,
5223 };
5224 
5225 static int __net_init ipv6_inetpeer_init(struct net *net)
5226 {
5227 	struct inet_peer_base *bp = kmalloc(sizeof(*bp), GFP_KERNEL);
5228 
5229 	if (!bp)
5230 		return -ENOMEM;
5231 	inet_peer_base_init(bp);
5232 	net->ipv6.peers = bp;
5233 	return 0;
5234 }
5235 
5236 static void __net_exit ipv6_inetpeer_exit(struct net *net)
5237 {
5238 	struct inet_peer_base *bp = net->ipv6.peers;
5239 
5240 	net->ipv6.peers = NULL;
5241 	inetpeer_invalidate_tree(bp);
5242 	kfree(bp);
5243 }
5244 
5245 static struct pernet_operations ipv6_inetpeer_ops = {
5246 	.init	=	ipv6_inetpeer_init,
5247 	.exit	=	ipv6_inetpeer_exit,
5248 };
5249 
5250 static struct pernet_operations ip6_route_net_late_ops = {
5251 	.init = ip6_route_net_init_late,
5252 	.exit = ip6_route_net_exit_late,
5253 };
5254 
5255 static struct notifier_block ip6_route_dev_notifier = {
5256 	.notifier_call = ip6_route_dev_notify,
5257 	.priority = ADDRCONF_NOTIFY_PRIORITY - 10,
5258 };
5259 
5260 void __init ip6_route_init_special_entries(void)
5261 {
5262 	/* Registering of the loopback is done before this portion of code,
5263 	 * the loopback reference in rt6_info will not be taken, do it
5264 	 * manually for init_net */
5265 	init_net.ipv6.fib6_null_entry->fib6_nh.nh_dev = init_net.loopback_dev;
5266 	init_net.ipv6.ip6_null_entry->dst.dev = init_net.loopback_dev;
5267 	init_net.ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
5268   #ifdef CONFIG_IPV6_MULTIPLE_TABLES
5269 	init_net.ipv6.ip6_prohibit_entry->dst.dev = init_net.loopback_dev;
5270 	init_net.ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
5271 	init_net.ipv6.ip6_blk_hole_entry->dst.dev = init_net.loopback_dev;
5272 	init_net.ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
5273   #endif
5274 }
5275 
5276 int __init ip6_route_init(void)
5277 {
5278 	int ret;
5279 	int cpu;
5280 
5281 	ret = -ENOMEM;
5282 	ip6_dst_ops_template.kmem_cachep =
5283 		kmem_cache_create("ip6_dst_cache", sizeof(struct rt6_info), 0,
5284 				  SLAB_HWCACHE_ALIGN, NULL);
5285 	if (!ip6_dst_ops_template.kmem_cachep)
5286 		goto out;
5287 
5288 	ret = dst_entries_init(&ip6_dst_blackhole_ops);
5289 	if (ret)
5290 		goto out_kmem_cache;
5291 
5292 	ret = register_pernet_subsys(&ipv6_inetpeer_ops);
5293 	if (ret)
5294 		goto out_dst_entries;
5295 
5296 	ret = register_pernet_subsys(&ip6_route_net_ops);
5297 	if (ret)
5298 		goto out_register_inetpeer;
5299 
5300 	ip6_dst_blackhole_ops.kmem_cachep = ip6_dst_ops_template.kmem_cachep;
5301 
5302 	ret = fib6_init();
5303 	if (ret)
5304 		goto out_register_subsys;
5305 
5306 	ret = xfrm6_init();
5307 	if (ret)
5308 		goto out_fib6_init;
5309 
5310 	ret = fib6_rules_init();
5311 	if (ret)
5312 		goto xfrm6_init;
5313 
5314 	ret = register_pernet_subsys(&ip6_route_net_late_ops);
5315 	if (ret)
5316 		goto fib6_rules_init;
5317 
5318 	ret = rtnl_register_module(THIS_MODULE, PF_INET6, RTM_NEWROUTE,
5319 				   inet6_rtm_newroute, NULL, 0);
5320 	if (ret < 0)
5321 		goto out_register_late_subsys;
5322 
5323 	ret = rtnl_register_module(THIS_MODULE, PF_INET6, RTM_DELROUTE,
5324 				   inet6_rtm_delroute, NULL, 0);
5325 	if (ret < 0)
5326 		goto out_register_late_subsys;
5327 
5328 	ret = rtnl_register_module(THIS_MODULE, PF_INET6, RTM_GETROUTE,
5329 				   inet6_rtm_getroute, NULL,
5330 				   RTNL_FLAG_DOIT_UNLOCKED);
5331 	if (ret < 0)
5332 		goto out_register_late_subsys;
5333 
5334 	ret = register_netdevice_notifier(&ip6_route_dev_notifier);
5335 	if (ret)
5336 		goto out_register_late_subsys;
5337 
5338 	for_each_possible_cpu(cpu) {
5339 		struct uncached_list *ul = per_cpu_ptr(&rt6_uncached_list, cpu);
5340 
5341 		INIT_LIST_HEAD(&ul->head);
5342 		spin_lock_init(&ul->lock);
5343 	}
5344 
5345 out:
5346 	return ret;
5347 
5348 out_register_late_subsys:
5349 	rtnl_unregister_all(PF_INET6);
5350 	unregister_pernet_subsys(&ip6_route_net_late_ops);
5351 fib6_rules_init:
5352 	fib6_rules_cleanup();
5353 xfrm6_init:
5354 	xfrm6_fini();
5355 out_fib6_init:
5356 	fib6_gc_cleanup();
5357 out_register_subsys:
5358 	unregister_pernet_subsys(&ip6_route_net_ops);
5359 out_register_inetpeer:
5360 	unregister_pernet_subsys(&ipv6_inetpeer_ops);
5361 out_dst_entries:
5362 	dst_entries_destroy(&ip6_dst_blackhole_ops);
5363 out_kmem_cache:
5364 	kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);
5365 	goto out;
5366 }
5367 
5368 void ip6_route_cleanup(void)
5369 {
5370 	unregister_netdevice_notifier(&ip6_route_dev_notifier);
5371 	unregister_pernet_subsys(&ip6_route_net_late_ops);
5372 	fib6_rules_cleanup();
5373 	xfrm6_fini();
5374 	fib6_gc_cleanup();
5375 	unregister_pernet_subsys(&ipv6_inetpeer_ops);
5376 	unregister_pernet_subsys(&ip6_route_net_ops);
5377 	dst_entries_destroy(&ip6_dst_blackhole_ops);
5378 	kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);
5379 }
5380