xref: /linux/net/ipv6/route.c (revision f412eed9dfdeeb6becd7de2ffe8b5d0a8b3f81ca)
1 /*
2  *	Linux INET6 implementation
3  *	FIB front-end.
4  *
5  *	Authors:
6  *	Pedro Roque		<roque@di.fc.ul.pt>
7  *
8  *	This program is free software; you can redistribute it and/or
9  *      modify it under the terms of the GNU General Public License
10  *      as published by the Free Software Foundation; either version
11  *      2 of the License, or (at your option) any later version.
12  */
13 
14 /*	Changes:
15  *
16  *	YOSHIFUJI Hideaki @USAGI
17  *		reworked default router selection.
18  *		- respect outgoing interface
19  *		- select from (probably) reachable routers (i.e.
20  *		routers in REACHABLE, STALE, DELAY or PROBE states).
21  *		- always select the same router if it is (probably)
22  *		reachable.  otherwise, round-robin the list.
23  *	Ville Nuorvala
24  *		Fixed routing subtrees.
25  */
26 
27 #define pr_fmt(fmt) "IPv6: " fmt
28 
29 #include <linux/capability.h>
30 #include <linux/errno.h>
31 #include <linux/export.h>
32 #include <linux/types.h>
33 #include <linux/times.h>
34 #include <linux/socket.h>
35 #include <linux/sockios.h>
36 #include <linux/net.h>
37 #include <linux/route.h>
38 #include <linux/netdevice.h>
39 #include <linux/in6.h>
40 #include <linux/mroute6.h>
41 #include <linux/init.h>
42 #include <linux/if_arp.h>
43 #include <linux/proc_fs.h>
44 #include <linux/seq_file.h>
45 #include <linux/nsproxy.h>
46 #include <linux/slab.h>
47 #include <linux/jhash.h>
48 #include <net/net_namespace.h>
49 #include <net/snmp.h>
50 #include <net/ipv6.h>
51 #include <net/ip6_fib.h>
52 #include <net/ip6_route.h>
53 #include <net/ndisc.h>
54 #include <net/addrconf.h>
55 #include <net/tcp.h>
56 #include <linux/rtnetlink.h>
57 #include <net/dst.h>
58 #include <net/dst_metadata.h>
59 #include <net/xfrm.h>
60 #include <net/netevent.h>
61 #include <net/netlink.h>
62 #include <net/nexthop.h>
63 #include <net/lwtunnel.h>
64 #include <net/ip_tunnels.h>
65 #include <net/l3mdev.h>
66 #include <trace/events/fib6.h>
67 
68 #include <linux/uaccess.h>
69 
70 #ifdef CONFIG_SYSCTL
71 #include <linux/sysctl.h>
72 #endif
73 
74 enum rt6_nud_state {
75 	RT6_NUD_FAIL_HARD = -3,
76 	RT6_NUD_FAIL_PROBE = -2,
77 	RT6_NUD_FAIL_DO_RR = -1,
78 	RT6_NUD_SUCCEED = 1
79 };
80 
81 static struct dst_entry	*ip6_dst_check(struct dst_entry *dst, u32 cookie);
82 static unsigned int	 ip6_default_advmss(const struct dst_entry *dst);
83 static unsigned int	 ip6_mtu(const struct dst_entry *dst);
84 static struct dst_entry *ip6_negative_advice(struct dst_entry *);
85 static void		ip6_dst_destroy(struct dst_entry *);
86 static void		ip6_dst_ifdown(struct dst_entry *,
87 				       struct net_device *dev, int how);
88 static int		 ip6_dst_gc(struct dst_ops *ops);
89 
90 static int		ip6_pkt_discard(struct sk_buff *skb);
91 static int		ip6_pkt_discard_out(struct net *net, struct sock *sk, struct sk_buff *skb);
92 static int		ip6_pkt_prohibit(struct sk_buff *skb);
93 static int		ip6_pkt_prohibit_out(struct net *net, struct sock *sk, struct sk_buff *skb);
94 static void		ip6_link_failure(struct sk_buff *skb);
95 static void		ip6_rt_update_pmtu(struct dst_entry *dst, struct sock *sk,
96 					   struct sk_buff *skb, u32 mtu);
97 static void		rt6_do_redirect(struct dst_entry *dst, struct sock *sk,
98 					struct sk_buff *skb);
99 static int rt6_score_route(struct fib6_info *rt, int oif, int strict);
100 static size_t rt6_nlmsg_size(struct fib6_info *rt);
101 static int rt6_fill_node(struct net *net, struct sk_buff *skb,
102 			 struct fib6_info *rt, struct dst_entry *dst,
103 			 struct in6_addr *dest, struct in6_addr *src,
104 			 int iif, int type, u32 portid, u32 seq,
105 			 unsigned int flags);
106 static struct rt6_info *rt6_find_cached_rt(struct fib6_info *rt,
107 					   struct in6_addr *daddr,
108 					   struct in6_addr *saddr);
109 
110 #ifdef CONFIG_IPV6_ROUTE_INFO
111 static struct fib6_info *rt6_add_route_info(struct net *net,
112 					   const struct in6_addr *prefix, int prefixlen,
113 					   const struct in6_addr *gwaddr,
114 					   struct net_device *dev,
115 					   unsigned int pref);
116 static struct fib6_info *rt6_get_route_info(struct net *net,
117 					   const struct in6_addr *prefix, int prefixlen,
118 					   const struct in6_addr *gwaddr,
119 					   struct net_device *dev);
120 #endif
121 
122 struct uncached_list {
123 	spinlock_t		lock;
124 	struct list_head	head;
125 };
126 
127 static DEFINE_PER_CPU_ALIGNED(struct uncached_list, rt6_uncached_list);
128 
129 void rt6_uncached_list_add(struct rt6_info *rt)
130 {
131 	struct uncached_list *ul = raw_cpu_ptr(&rt6_uncached_list);
132 
133 	rt->rt6i_uncached_list = ul;
134 
135 	spin_lock_bh(&ul->lock);
136 	list_add_tail(&rt->rt6i_uncached, &ul->head);
137 	spin_unlock_bh(&ul->lock);
138 }
139 
140 void rt6_uncached_list_del(struct rt6_info *rt)
141 {
142 	if (!list_empty(&rt->rt6i_uncached)) {
143 		struct uncached_list *ul = rt->rt6i_uncached_list;
144 		struct net *net = dev_net(rt->dst.dev);
145 
146 		spin_lock_bh(&ul->lock);
147 		list_del(&rt->rt6i_uncached);
148 		atomic_dec(&net->ipv6.rt6_stats->fib_rt_uncache);
149 		spin_unlock_bh(&ul->lock);
150 	}
151 }
152 
153 static void rt6_uncached_list_flush_dev(struct net *net, struct net_device *dev)
154 {
155 	struct net_device *loopback_dev = net->loopback_dev;
156 	int cpu;
157 
158 	if (dev == loopback_dev)
159 		return;
160 
161 	for_each_possible_cpu(cpu) {
162 		struct uncached_list *ul = per_cpu_ptr(&rt6_uncached_list, cpu);
163 		struct rt6_info *rt;
164 
165 		spin_lock_bh(&ul->lock);
166 		list_for_each_entry(rt, &ul->head, rt6i_uncached) {
167 			struct inet6_dev *rt_idev = rt->rt6i_idev;
168 			struct net_device *rt_dev = rt->dst.dev;
169 
170 			if (rt_idev->dev == dev) {
171 				rt->rt6i_idev = in6_dev_get(loopback_dev);
172 				in6_dev_put(rt_idev);
173 			}
174 
175 			if (rt_dev == dev) {
176 				rt->dst.dev = loopback_dev;
177 				dev_hold(rt->dst.dev);
178 				dev_put(rt_dev);
179 			}
180 		}
181 		spin_unlock_bh(&ul->lock);
182 	}
183 }
184 
185 static inline const void *choose_neigh_daddr(const struct in6_addr *p,
186 					     struct sk_buff *skb,
187 					     const void *daddr)
188 {
189 	if (!ipv6_addr_any(p))
190 		return (const void *) p;
191 	else if (skb)
192 		return &ipv6_hdr(skb)->daddr;
193 	return daddr;
194 }
195 
196 struct neighbour *ip6_neigh_lookup(const struct in6_addr *gw,
197 				   struct net_device *dev,
198 				   struct sk_buff *skb,
199 				   const void *daddr)
200 {
201 	struct neighbour *n;
202 
203 	daddr = choose_neigh_daddr(gw, skb, daddr);
204 	n = __ipv6_neigh_lookup(dev, daddr);
205 	if (n)
206 		return n;
207 	return neigh_create(&nd_tbl, daddr, dev);
208 }
209 
210 static struct neighbour *ip6_dst_neigh_lookup(const struct dst_entry *dst,
211 					      struct sk_buff *skb,
212 					      const void *daddr)
213 {
214 	const struct rt6_info *rt = container_of(dst, struct rt6_info, dst);
215 
216 	return ip6_neigh_lookup(&rt->rt6i_gateway, dst->dev, skb, daddr);
217 }
218 
219 static void ip6_confirm_neigh(const struct dst_entry *dst, const void *daddr)
220 {
221 	struct net_device *dev = dst->dev;
222 	struct rt6_info *rt = (struct rt6_info *)dst;
223 
224 	daddr = choose_neigh_daddr(&rt->rt6i_gateway, NULL, daddr);
225 	if (!daddr)
226 		return;
227 	if (dev->flags & (IFF_NOARP | IFF_LOOPBACK))
228 		return;
229 	if (ipv6_addr_is_multicast((const struct in6_addr *)daddr))
230 		return;
231 	__ipv6_confirm_neigh(dev, daddr);
232 }
233 
234 static struct dst_ops ip6_dst_ops_template = {
235 	.family			=	AF_INET6,
236 	.gc			=	ip6_dst_gc,
237 	.gc_thresh		=	1024,
238 	.check			=	ip6_dst_check,
239 	.default_advmss		=	ip6_default_advmss,
240 	.mtu			=	ip6_mtu,
241 	.cow_metrics		=	dst_cow_metrics_generic,
242 	.destroy		=	ip6_dst_destroy,
243 	.ifdown			=	ip6_dst_ifdown,
244 	.negative_advice	=	ip6_negative_advice,
245 	.link_failure		=	ip6_link_failure,
246 	.update_pmtu		=	ip6_rt_update_pmtu,
247 	.redirect		=	rt6_do_redirect,
248 	.local_out		=	__ip6_local_out,
249 	.neigh_lookup		=	ip6_dst_neigh_lookup,
250 	.confirm_neigh		=	ip6_confirm_neigh,
251 };
252 
253 static unsigned int ip6_blackhole_mtu(const struct dst_entry *dst)
254 {
255 	unsigned int mtu = dst_metric_raw(dst, RTAX_MTU);
256 
257 	return mtu ? : dst->dev->mtu;
258 }
259 
260 static void ip6_rt_blackhole_update_pmtu(struct dst_entry *dst, struct sock *sk,
261 					 struct sk_buff *skb, u32 mtu)
262 {
263 }
264 
265 static void ip6_rt_blackhole_redirect(struct dst_entry *dst, struct sock *sk,
266 				      struct sk_buff *skb)
267 {
268 }
269 
270 static struct dst_ops ip6_dst_blackhole_ops = {
271 	.family			=	AF_INET6,
272 	.destroy		=	ip6_dst_destroy,
273 	.check			=	ip6_dst_check,
274 	.mtu			=	ip6_blackhole_mtu,
275 	.default_advmss		=	ip6_default_advmss,
276 	.update_pmtu		=	ip6_rt_blackhole_update_pmtu,
277 	.redirect		=	ip6_rt_blackhole_redirect,
278 	.cow_metrics		=	dst_cow_metrics_generic,
279 	.neigh_lookup		=	ip6_dst_neigh_lookup,
280 };
281 
282 static const u32 ip6_template_metrics[RTAX_MAX] = {
283 	[RTAX_HOPLIMIT - 1] = 0,
284 };
285 
286 static const struct fib6_info fib6_null_entry_template = {
287 	.fib6_flags	= (RTF_REJECT | RTF_NONEXTHOP),
288 	.fib6_protocol  = RTPROT_KERNEL,
289 	.fib6_metric	= ~(u32)0,
290 	.fib6_ref	= ATOMIC_INIT(1),
291 	.fib6_type	= RTN_UNREACHABLE,
292 	.fib6_metrics	= (struct dst_metrics *)&dst_default_metrics,
293 };
294 
295 static const struct rt6_info ip6_null_entry_template = {
296 	.dst = {
297 		.__refcnt	= ATOMIC_INIT(1),
298 		.__use		= 1,
299 		.obsolete	= DST_OBSOLETE_FORCE_CHK,
300 		.error		= -ENETUNREACH,
301 		.input		= ip6_pkt_discard,
302 		.output		= ip6_pkt_discard_out,
303 	},
304 	.rt6i_flags	= (RTF_REJECT | RTF_NONEXTHOP),
305 };
306 
307 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
308 
309 static const struct rt6_info ip6_prohibit_entry_template = {
310 	.dst = {
311 		.__refcnt	= ATOMIC_INIT(1),
312 		.__use		= 1,
313 		.obsolete	= DST_OBSOLETE_FORCE_CHK,
314 		.error		= -EACCES,
315 		.input		= ip6_pkt_prohibit,
316 		.output		= ip6_pkt_prohibit_out,
317 	},
318 	.rt6i_flags	= (RTF_REJECT | RTF_NONEXTHOP),
319 };
320 
321 static const struct rt6_info ip6_blk_hole_entry_template = {
322 	.dst = {
323 		.__refcnt	= ATOMIC_INIT(1),
324 		.__use		= 1,
325 		.obsolete	= DST_OBSOLETE_FORCE_CHK,
326 		.error		= -EINVAL,
327 		.input		= dst_discard,
328 		.output		= dst_discard_out,
329 	},
330 	.rt6i_flags	= (RTF_REJECT | RTF_NONEXTHOP),
331 };
332 
333 #endif
334 
335 static void rt6_info_init(struct rt6_info *rt)
336 {
337 	struct dst_entry *dst = &rt->dst;
338 
339 	memset(dst + 1, 0, sizeof(*rt) - sizeof(*dst));
340 	INIT_LIST_HEAD(&rt->rt6i_uncached);
341 }
342 
343 /* allocate dst with ip6_dst_ops */
344 struct rt6_info *ip6_dst_alloc(struct net *net, struct net_device *dev,
345 			       int flags)
346 {
347 	struct rt6_info *rt = dst_alloc(&net->ipv6.ip6_dst_ops, dev,
348 					1, DST_OBSOLETE_FORCE_CHK, flags);
349 
350 	if (rt) {
351 		rt6_info_init(rt);
352 		atomic_inc(&net->ipv6.rt6_stats->fib_rt_alloc);
353 	}
354 
355 	return rt;
356 }
357 EXPORT_SYMBOL(ip6_dst_alloc);
358 
359 static void ip6_dst_destroy(struct dst_entry *dst)
360 {
361 	struct rt6_info *rt = (struct rt6_info *)dst;
362 	struct fib6_info *from;
363 	struct inet6_dev *idev;
364 
365 	dst_destroy_metrics_generic(dst);
366 	rt6_uncached_list_del(rt);
367 
368 	idev = rt->rt6i_idev;
369 	if (idev) {
370 		rt->rt6i_idev = NULL;
371 		in6_dev_put(idev);
372 	}
373 
374 	rcu_read_lock();
375 	from = rcu_dereference(rt->from);
376 	rcu_assign_pointer(rt->from, NULL);
377 	fib6_info_release(from);
378 	rcu_read_unlock();
379 }
380 
381 static void ip6_dst_ifdown(struct dst_entry *dst, struct net_device *dev,
382 			   int how)
383 {
384 	struct rt6_info *rt = (struct rt6_info *)dst;
385 	struct inet6_dev *idev = rt->rt6i_idev;
386 	struct net_device *loopback_dev =
387 		dev_net(dev)->loopback_dev;
388 
389 	if (idev && idev->dev != loopback_dev) {
390 		struct inet6_dev *loopback_idev = in6_dev_get(loopback_dev);
391 		if (loopback_idev) {
392 			rt->rt6i_idev = loopback_idev;
393 			in6_dev_put(idev);
394 		}
395 	}
396 }
397 
398 static bool __rt6_check_expired(const struct rt6_info *rt)
399 {
400 	if (rt->rt6i_flags & RTF_EXPIRES)
401 		return time_after(jiffies, rt->dst.expires);
402 	else
403 		return false;
404 }
405 
406 static bool rt6_check_expired(const struct rt6_info *rt)
407 {
408 	struct fib6_info *from;
409 
410 	from = rcu_dereference(rt->from);
411 
412 	if (rt->rt6i_flags & RTF_EXPIRES) {
413 		if (time_after(jiffies, rt->dst.expires))
414 			return true;
415 	} else if (from) {
416 		return rt->dst.obsolete != DST_OBSOLETE_FORCE_CHK ||
417 			fib6_check_expired(from);
418 	}
419 	return false;
420 }
421 
422 static struct fib6_info *rt6_multipath_select(const struct net *net,
423 					      struct fib6_info *match,
424 					     struct flowi6 *fl6, int oif,
425 					     const struct sk_buff *skb,
426 					     int strict)
427 {
428 	struct fib6_info *sibling, *next_sibling;
429 
430 	/* We might have already computed the hash for ICMPv6 errors. In such
431 	 * case it will always be non-zero. Otherwise now is the time to do it.
432 	 */
433 	if (!fl6->mp_hash)
434 		fl6->mp_hash = rt6_multipath_hash(net, fl6, skb, NULL);
435 
436 	if (fl6->mp_hash <= atomic_read(&match->fib6_nh.nh_upper_bound))
437 		return match;
438 
439 	list_for_each_entry_safe(sibling, next_sibling, &match->fib6_siblings,
440 				 fib6_siblings) {
441 		int nh_upper_bound;
442 
443 		nh_upper_bound = atomic_read(&sibling->fib6_nh.nh_upper_bound);
444 		if (fl6->mp_hash > nh_upper_bound)
445 			continue;
446 		if (rt6_score_route(sibling, oif, strict) < 0)
447 			break;
448 		match = sibling;
449 		break;
450 	}
451 
452 	return match;
453 }
454 
455 /*
456  *	Route lookup. rcu_read_lock() should be held.
457  */
458 
459 static inline struct fib6_info *rt6_device_match(struct net *net,
460 						 struct fib6_info *rt,
461 						    const struct in6_addr *saddr,
462 						    int oif,
463 						    int flags)
464 {
465 	struct fib6_info *sprt;
466 
467 	if (!oif && ipv6_addr_any(saddr) &&
468 	    !(rt->fib6_nh.nh_flags & RTNH_F_DEAD))
469 		return rt;
470 
471 	for (sprt = rt; sprt; sprt = rcu_dereference(sprt->rt6_next)) {
472 		const struct net_device *dev = sprt->fib6_nh.nh_dev;
473 
474 		if (sprt->fib6_nh.nh_flags & RTNH_F_DEAD)
475 			continue;
476 
477 		if (oif) {
478 			if (dev->ifindex == oif)
479 				return sprt;
480 		} else {
481 			if (ipv6_chk_addr(net, saddr, dev,
482 					  flags & RT6_LOOKUP_F_IFACE))
483 				return sprt;
484 		}
485 	}
486 
487 	if (oif && flags & RT6_LOOKUP_F_IFACE)
488 		return net->ipv6.fib6_null_entry;
489 
490 	return rt->fib6_nh.nh_flags & RTNH_F_DEAD ? net->ipv6.fib6_null_entry : rt;
491 }
492 
493 #ifdef CONFIG_IPV6_ROUTER_PREF
494 struct __rt6_probe_work {
495 	struct work_struct work;
496 	struct in6_addr target;
497 	struct net_device *dev;
498 };
499 
500 static void rt6_probe_deferred(struct work_struct *w)
501 {
502 	struct in6_addr mcaddr;
503 	struct __rt6_probe_work *work =
504 		container_of(w, struct __rt6_probe_work, work);
505 
506 	addrconf_addr_solict_mult(&work->target, &mcaddr);
507 	ndisc_send_ns(work->dev, &work->target, &mcaddr, NULL, 0);
508 	dev_put(work->dev);
509 	kfree(work);
510 }
511 
512 static void rt6_probe(struct fib6_info *rt)
513 {
514 	struct __rt6_probe_work *work;
515 	const struct in6_addr *nh_gw;
516 	struct neighbour *neigh;
517 	struct net_device *dev;
518 
519 	/*
520 	 * Okay, this does not seem to be appropriate
521 	 * for now, however, we need to check if it
522 	 * is really so; aka Router Reachability Probing.
523 	 *
524 	 * Router Reachability Probe MUST be rate-limited
525 	 * to no more than one per minute.
526 	 */
527 	if (!rt || !(rt->fib6_flags & RTF_GATEWAY))
528 		return;
529 
530 	nh_gw = &rt->fib6_nh.nh_gw;
531 	dev = rt->fib6_nh.nh_dev;
532 	rcu_read_lock_bh();
533 	neigh = __ipv6_neigh_lookup_noref(dev, nh_gw);
534 	if (neigh) {
535 		struct inet6_dev *idev;
536 
537 		if (neigh->nud_state & NUD_VALID)
538 			goto out;
539 
540 		idev = __in6_dev_get(dev);
541 		work = NULL;
542 		write_lock(&neigh->lock);
543 		if (!(neigh->nud_state & NUD_VALID) &&
544 		    time_after(jiffies,
545 			       neigh->updated + idev->cnf.rtr_probe_interval)) {
546 			work = kmalloc(sizeof(*work), GFP_ATOMIC);
547 			if (work)
548 				__neigh_set_probe_once(neigh);
549 		}
550 		write_unlock(&neigh->lock);
551 	} else {
552 		work = kmalloc(sizeof(*work), GFP_ATOMIC);
553 	}
554 
555 	if (work) {
556 		INIT_WORK(&work->work, rt6_probe_deferred);
557 		work->target = *nh_gw;
558 		dev_hold(dev);
559 		work->dev = dev;
560 		schedule_work(&work->work);
561 	}
562 
563 out:
564 	rcu_read_unlock_bh();
565 }
566 #else
567 static inline void rt6_probe(struct fib6_info *rt)
568 {
569 }
570 #endif
571 
572 /*
573  * Default Router Selection (RFC 2461 6.3.6)
574  */
575 static inline int rt6_check_dev(struct fib6_info *rt, int oif)
576 {
577 	const struct net_device *dev = rt->fib6_nh.nh_dev;
578 
579 	if (!oif || dev->ifindex == oif)
580 		return 2;
581 	return 0;
582 }
583 
584 static inline enum rt6_nud_state rt6_check_neigh(struct fib6_info *rt)
585 {
586 	enum rt6_nud_state ret = RT6_NUD_FAIL_HARD;
587 	struct neighbour *neigh;
588 
589 	if (rt->fib6_flags & RTF_NONEXTHOP ||
590 	    !(rt->fib6_flags & RTF_GATEWAY))
591 		return RT6_NUD_SUCCEED;
592 
593 	rcu_read_lock_bh();
594 	neigh = __ipv6_neigh_lookup_noref(rt->fib6_nh.nh_dev,
595 					  &rt->fib6_nh.nh_gw);
596 	if (neigh) {
597 		read_lock(&neigh->lock);
598 		if (neigh->nud_state & NUD_VALID)
599 			ret = RT6_NUD_SUCCEED;
600 #ifdef CONFIG_IPV6_ROUTER_PREF
601 		else if (!(neigh->nud_state & NUD_FAILED))
602 			ret = RT6_NUD_SUCCEED;
603 		else
604 			ret = RT6_NUD_FAIL_PROBE;
605 #endif
606 		read_unlock(&neigh->lock);
607 	} else {
608 		ret = IS_ENABLED(CONFIG_IPV6_ROUTER_PREF) ?
609 		      RT6_NUD_SUCCEED : RT6_NUD_FAIL_DO_RR;
610 	}
611 	rcu_read_unlock_bh();
612 
613 	return ret;
614 }
615 
616 static int rt6_score_route(struct fib6_info *rt, int oif, int strict)
617 {
618 	int m;
619 
620 	m = rt6_check_dev(rt, oif);
621 	if (!m && (strict & RT6_LOOKUP_F_IFACE))
622 		return RT6_NUD_FAIL_HARD;
623 #ifdef CONFIG_IPV6_ROUTER_PREF
624 	m |= IPV6_DECODE_PREF(IPV6_EXTRACT_PREF(rt->fib6_flags)) << 2;
625 #endif
626 	if (strict & RT6_LOOKUP_F_REACHABLE) {
627 		int n = rt6_check_neigh(rt);
628 		if (n < 0)
629 			return n;
630 	}
631 	return m;
632 }
633 
634 /* called with rc_read_lock held */
635 static inline bool fib6_ignore_linkdown(const struct fib6_info *f6i)
636 {
637 	const struct net_device *dev = fib6_info_nh_dev(f6i);
638 	bool rc = false;
639 
640 	if (dev) {
641 		const struct inet6_dev *idev = __in6_dev_get(dev);
642 
643 		rc = !!idev->cnf.ignore_routes_with_linkdown;
644 	}
645 
646 	return rc;
647 }
648 
649 static struct fib6_info *find_match(struct fib6_info *rt, int oif, int strict,
650 				   int *mpri, struct fib6_info *match,
651 				   bool *do_rr)
652 {
653 	int m;
654 	bool match_do_rr = false;
655 
656 	if (rt->fib6_nh.nh_flags & RTNH_F_DEAD)
657 		goto out;
658 
659 	if (fib6_ignore_linkdown(rt) &&
660 	    rt->fib6_nh.nh_flags & RTNH_F_LINKDOWN &&
661 	    !(strict & RT6_LOOKUP_F_IGNORE_LINKSTATE))
662 		goto out;
663 
664 	if (fib6_check_expired(rt))
665 		goto out;
666 
667 	m = rt6_score_route(rt, oif, strict);
668 	if (m == RT6_NUD_FAIL_DO_RR) {
669 		match_do_rr = true;
670 		m = 0; /* lowest valid score */
671 	} else if (m == RT6_NUD_FAIL_HARD) {
672 		goto out;
673 	}
674 
675 	if (strict & RT6_LOOKUP_F_REACHABLE)
676 		rt6_probe(rt);
677 
678 	/* note that m can be RT6_NUD_FAIL_PROBE at this point */
679 	if (m > *mpri) {
680 		*do_rr = match_do_rr;
681 		*mpri = m;
682 		match = rt;
683 	}
684 out:
685 	return match;
686 }
687 
688 static struct fib6_info *find_rr_leaf(struct fib6_node *fn,
689 				     struct fib6_info *leaf,
690 				     struct fib6_info *rr_head,
691 				     u32 metric, int oif, int strict,
692 				     bool *do_rr)
693 {
694 	struct fib6_info *rt, *match, *cont;
695 	int mpri = -1;
696 
697 	match = NULL;
698 	cont = NULL;
699 	for (rt = rr_head; rt; rt = rcu_dereference(rt->rt6_next)) {
700 		if (rt->fib6_metric != metric) {
701 			cont = rt;
702 			break;
703 		}
704 
705 		match = find_match(rt, oif, strict, &mpri, match, do_rr);
706 	}
707 
708 	for (rt = leaf; rt && rt != rr_head;
709 	     rt = rcu_dereference(rt->rt6_next)) {
710 		if (rt->fib6_metric != metric) {
711 			cont = rt;
712 			break;
713 		}
714 
715 		match = find_match(rt, oif, strict, &mpri, match, do_rr);
716 	}
717 
718 	if (match || !cont)
719 		return match;
720 
721 	for (rt = cont; rt; rt = rcu_dereference(rt->rt6_next))
722 		match = find_match(rt, oif, strict, &mpri, match, do_rr);
723 
724 	return match;
725 }
726 
727 static struct fib6_info *rt6_select(struct net *net, struct fib6_node *fn,
728 				   int oif, int strict)
729 {
730 	struct fib6_info *leaf = rcu_dereference(fn->leaf);
731 	struct fib6_info *match, *rt0;
732 	bool do_rr = false;
733 	int key_plen;
734 
735 	if (!leaf || leaf == net->ipv6.fib6_null_entry)
736 		return net->ipv6.fib6_null_entry;
737 
738 	rt0 = rcu_dereference(fn->rr_ptr);
739 	if (!rt0)
740 		rt0 = leaf;
741 
742 	/* Double check to make sure fn is not an intermediate node
743 	 * and fn->leaf does not points to its child's leaf
744 	 * (This might happen if all routes under fn are deleted from
745 	 * the tree and fib6_repair_tree() is called on the node.)
746 	 */
747 	key_plen = rt0->fib6_dst.plen;
748 #ifdef CONFIG_IPV6_SUBTREES
749 	if (rt0->fib6_src.plen)
750 		key_plen = rt0->fib6_src.plen;
751 #endif
752 	if (fn->fn_bit != key_plen)
753 		return net->ipv6.fib6_null_entry;
754 
755 	match = find_rr_leaf(fn, leaf, rt0, rt0->fib6_metric, oif, strict,
756 			     &do_rr);
757 
758 	if (do_rr) {
759 		struct fib6_info *next = rcu_dereference(rt0->rt6_next);
760 
761 		/* no entries matched; do round-robin */
762 		if (!next || next->fib6_metric != rt0->fib6_metric)
763 			next = leaf;
764 
765 		if (next != rt0) {
766 			spin_lock_bh(&leaf->fib6_table->tb6_lock);
767 			/* make sure next is not being deleted from the tree */
768 			if (next->fib6_node)
769 				rcu_assign_pointer(fn->rr_ptr, next);
770 			spin_unlock_bh(&leaf->fib6_table->tb6_lock);
771 		}
772 	}
773 
774 	return match ? match : net->ipv6.fib6_null_entry;
775 }
776 
777 static bool rt6_is_gw_or_nonexthop(const struct fib6_info *rt)
778 {
779 	return (rt->fib6_flags & (RTF_NONEXTHOP | RTF_GATEWAY));
780 }
781 
782 #ifdef CONFIG_IPV6_ROUTE_INFO
783 int rt6_route_rcv(struct net_device *dev, u8 *opt, int len,
784 		  const struct in6_addr *gwaddr)
785 {
786 	struct net *net = dev_net(dev);
787 	struct route_info *rinfo = (struct route_info *) opt;
788 	struct in6_addr prefix_buf, *prefix;
789 	unsigned int pref;
790 	unsigned long lifetime;
791 	struct fib6_info *rt;
792 
793 	if (len < sizeof(struct route_info)) {
794 		return -EINVAL;
795 	}
796 
797 	/* Sanity check for prefix_len and length */
798 	if (rinfo->length > 3) {
799 		return -EINVAL;
800 	} else if (rinfo->prefix_len > 128) {
801 		return -EINVAL;
802 	} else if (rinfo->prefix_len > 64) {
803 		if (rinfo->length < 2) {
804 			return -EINVAL;
805 		}
806 	} else if (rinfo->prefix_len > 0) {
807 		if (rinfo->length < 1) {
808 			return -EINVAL;
809 		}
810 	}
811 
812 	pref = rinfo->route_pref;
813 	if (pref == ICMPV6_ROUTER_PREF_INVALID)
814 		return -EINVAL;
815 
816 	lifetime = addrconf_timeout_fixup(ntohl(rinfo->lifetime), HZ);
817 
818 	if (rinfo->length == 3)
819 		prefix = (struct in6_addr *)rinfo->prefix;
820 	else {
821 		/* this function is safe */
822 		ipv6_addr_prefix(&prefix_buf,
823 				 (struct in6_addr *)rinfo->prefix,
824 				 rinfo->prefix_len);
825 		prefix = &prefix_buf;
826 	}
827 
828 	if (rinfo->prefix_len == 0)
829 		rt = rt6_get_dflt_router(net, gwaddr, dev);
830 	else
831 		rt = rt6_get_route_info(net, prefix, rinfo->prefix_len,
832 					gwaddr, dev);
833 
834 	if (rt && !lifetime) {
835 		ip6_del_rt(net, rt);
836 		rt = NULL;
837 	}
838 
839 	if (!rt && lifetime)
840 		rt = rt6_add_route_info(net, prefix, rinfo->prefix_len, gwaddr,
841 					dev, pref);
842 	else if (rt)
843 		rt->fib6_flags = RTF_ROUTEINFO |
844 				 (rt->fib6_flags & ~RTF_PREF_MASK) | RTF_PREF(pref);
845 
846 	if (rt) {
847 		if (!addrconf_finite_timeout(lifetime))
848 			fib6_clean_expires(rt);
849 		else
850 			fib6_set_expires(rt, jiffies + HZ * lifetime);
851 
852 		fib6_info_release(rt);
853 	}
854 	return 0;
855 }
856 #endif
857 
858 /*
859  *	Misc support functions
860  */
861 
862 /* called with rcu_lock held */
863 static struct net_device *ip6_rt_get_dev_rcu(struct fib6_info *rt)
864 {
865 	struct net_device *dev = rt->fib6_nh.nh_dev;
866 
867 	if (rt->fib6_flags & (RTF_LOCAL | RTF_ANYCAST)) {
868 		/* for copies of local routes, dst->dev needs to be the
869 		 * device if it is a master device, the master device if
870 		 * device is enslaved, and the loopback as the default
871 		 */
872 		if (netif_is_l3_slave(dev) &&
873 		    !rt6_need_strict(&rt->fib6_dst.addr))
874 			dev = l3mdev_master_dev_rcu(dev);
875 		else if (!netif_is_l3_master(dev))
876 			dev = dev_net(dev)->loopback_dev;
877 		/* last case is netif_is_l3_master(dev) is true in which
878 		 * case we want dev returned to be dev
879 		 */
880 	}
881 
882 	return dev;
883 }
884 
885 static const int fib6_prop[RTN_MAX + 1] = {
886 	[RTN_UNSPEC]	= 0,
887 	[RTN_UNICAST]	= 0,
888 	[RTN_LOCAL]	= 0,
889 	[RTN_BROADCAST]	= 0,
890 	[RTN_ANYCAST]	= 0,
891 	[RTN_MULTICAST]	= 0,
892 	[RTN_BLACKHOLE]	= -EINVAL,
893 	[RTN_UNREACHABLE] = -EHOSTUNREACH,
894 	[RTN_PROHIBIT]	= -EACCES,
895 	[RTN_THROW]	= -EAGAIN,
896 	[RTN_NAT]	= -EINVAL,
897 	[RTN_XRESOLVE]	= -EINVAL,
898 };
899 
900 static int ip6_rt_type_to_error(u8 fib6_type)
901 {
902 	return fib6_prop[fib6_type];
903 }
904 
905 static unsigned short fib6_info_dst_flags(struct fib6_info *rt)
906 {
907 	unsigned short flags = 0;
908 
909 	if (rt->dst_nocount)
910 		flags |= DST_NOCOUNT;
911 	if (rt->dst_nopolicy)
912 		flags |= DST_NOPOLICY;
913 	if (rt->dst_host)
914 		flags |= DST_HOST;
915 
916 	return flags;
917 }
918 
919 static void ip6_rt_init_dst_reject(struct rt6_info *rt, struct fib6_info *ort)
920 {
921 	rt->dst.error = ip6_rt_type_to_error(ort->fib6_type);
922 
923 	switch (ort->fib6_type) {
924 	case RTN_BLACKHOLE:
925 		rt->dst.output = dst_discard_out;
926 		rt->dst.input = dst_discard;
927 		break;
928 	case RTN_PROHIBIT:
929 		rt->dst.output = ip6_pkt_prohibit_out;
930 		rt->dst.input = ip6_pkt_prohibit;
931 		break;
932 	case RTN_THROW:
933 	case RTN_UNREACHABLE:
934 	default:
935 		rt->dst.output = ip6_pkt_discard_out;
936 		rt->dst.input = ip6_pkt_discard;
937 		break;
938 	}
939 }
940 
941 static void ip6_rt_init_dst(struct rt6_info *rt, struct fib6_info *ort)
942 {
943 	rt->dst.flags |= fib6_info_dst_flags(ort);
944 
945 	if (ort->fib6_flags & RTF_REJECT) {
946 		ip6_rt_init_dst_reject(rt, ort);
947 		return;
948 	}
949 
950 	rt->dst.error = 0;
951 	rt->dst.output = ip6_output;
952 
953 	if (ort->fib6_type == RTN_LOCAL) {
954 		rt->dst.input = ip6_input;
955 	} else if (ipv6_addr_type(&ort->fib6_dst.addr) & IPV6_ADDR_MULTICAST) {
956 		rt->dst.input = ip6_mc_input;
957 	} else {
958 		rt->dst.input = ip6_forward;
959 	}
960 
961 	if (ort->fib6_nh.nh_lwtstate) {
962 		rt->dst.lwtstate = lwtstate_get(ort->fib6_nh.nh_lwtstate);
963 		lwtunnel_set_redirect(&rt->dst);
964 	}
965 
966 	rt->dst.lastuse = jiffies;
967 }
968 
969 static void rt6_set_from(struct rt6_info *rt, struct fib6_info *from)
970 {
971 	rt->rt6i_flags &= ~RTF_EXPIRES;
972 	fib6_info_hold(from);
973 	rcu_assign_pointer(rt->from, from);
974 	dst_init_metrics(&rt->dst, from->fib6_metrics->metrics, true);
975 	if (from->fib6_metrics != &dst_default_metrics) {
976 		rt->dst._metrics |= DST_METRICS_REFCOUNTED;
977 		refcount_inc(&from->fib6_metrics->refcnt);
978 	}
979 }
980 
981 static void ip6_rt_copy_init(struct rt6_info *rt, struct fib6_info *ort)
982 {
983 	struct net_device *dev = fib6_info_nh_dev(ort);
984 
985 	ip6_rt_init_dst(rt, ort);
986 
987 	rt->rt6i_dst = ort->fib6_dst;
988 	rt->rt6i_idev = dev ? in6_dev_get(dev) : NULL;
989 	rt->rt6i_gateway = ort->fib6_nh.nh_gw;
990 	rt->rt6i_flags = ort->fib6_flags;
991 	rt6_set_from(rt, ort);
992 #ifdef CONFIG_IPV6_SUBTREES
993 	rt->rt6i_src = ort->fib6_src;
994 #endif
995 	rt->rt6i_prefsrc = ort->fib6_prefsrc;
996 	rt->dst.lwtstate = lwtstate_get(ort->fib6_nh.nh_lwtstate);
997 }
998 
999 static struct fib6_node* fib6_backtrack(struct fib6_node *fn,
1000 					struct in6_addr *saddr)
1001 {
1002 	struct fib6_node *pn, *sn;
1003 	while (1) {
1004 		if (fn->fn_flags & RTN_TL_ROOT)
1005 			return NULL;
1006 		pn = rcu_dereference(fn->parent);
1007 		sn = FIB6_SUBTREE(pn);
1008 		if (sn && sn != fn)
1009 			fn = fib6_lookup(sn, NULL, saddr);
1010 		else
1011 			fn = pn;
1012 		if (fn->fn_flags & RTN_RTINFO)
1013 			return fn;
1014 	}
1015 }
1016 
1017 static bool ip6_hold_safe(struct net *net, struct rt6_info **prt,
1018 			  bool null_fallback)
1019 {
1020 	struct rt6_info *rt = *prt;
1021 
1022 	if (dst_hold_safe(&rt->dst))
1023 		return true;
1024 	if (null_fallback) {
1025 		rt = net->ipv6.ip6_null_entry;
1026 		dst_hold(&rt->dst);
1027 	} else {
1028 		rt = NULL;
1029 	}
1030 	*prt = rt;
1031 	return false;
1032 }
1033 
1034 /* called with rcu_lock held */
1035 static struct rt6_info *ip6_create_rt_rcu(struct fib6_info *rt)
1036 {
1037 	unsigned short flags = fib6_info_dst_flags(rt);
1038 	struct net_device *dev = rt->fib6_nh.nh_dev;
1039 	struct rt6_info *nrt;
1040 
1041 	nrt = ip6_dst_alloc(dev_net(dev), dev, flags);
1042 	if (nrt)
1043 		ip6_rt_copy_init(nrt, rt);
1044 
1045 	return nrt;
1046 }
1047 
1048 static struct rt6_info *ip6_pol_route_lookup(struct net *net,
1049 					     struct fib6_table *table,
1050 					     struct flowi6 *fl6,
1051 					     const struct sk_buff *skb,
1052 					     int flags)
1053 {
1054 	struct fib6_info *f6i;
1055 	struct fib6_node *fn;
1056 	struct rt6_info *rt;
1057 
1058 	if (fl6->flowi6_flags & FLOWI_FLAG_SKIP_NH_OIF)
1059 		flags &= ~RT6_LOOKUP_F_IFACE;
1060 
1061 	rcu_read_lock();
1062 	fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
1063 restart:
1064 	f6i = rcu_dereference(fn->leaf);
1065 	if (!f6i) {
1066 		f6i = net->ipv6.fib6_null_entry;
1067 	} else {
1068 		f6i = rt6_device_match(net, f6i, &fl6->saddr,
1069 				      fl6->flowi6_oif, flags);
1070 		if (f6i->fib6_nsiblings && fl6->flowi6_oif == 0)
1071 			f6i = rt6_multipath_select(net, f6i, fl6,
1072 						   fl6->flowi6_oif, skb, flags);
1073 	}
1074 	if (f6i == net->ipv6.fib6_null_entry) {
1075 		fn = fib6_backtrack(fn, &fl6->saddr);
1076 		if (fn)
1077 			goto restart;
1078 	}
1079 
1080 	/* Search through exception table */
1081 	rt = rt6_find_cached_rt(f6i, &fl6->daddr, &fl6->saddr);
1082 	if (rt) {
1083 		if (ip6_hold_safe(net, &rt, true))
1084 			dst_use_noref(&rt->dst, jiffies);
1085 	} else if (f6i == net->ipv6.fib6_null_entry) {
1086 		rt = net->ipv6.ip6_null_entry;
1087 		dst_hold(&rt->dst);
1088 	} else {
1089 		rt = ip6_create_rt_rcu(f6i);
1090 		if (!rt) {
1091 			rt = net->ipv6.ip6_null_entry;
1092 			dst_hold(&rt->dst);
1093 		}
1094 	}
1095 
1096 	rcu_read_unlock();
1097 
1098 	trace_fib6_table_lookup(net, rt, table, fl6);
1099 
1100 	return rt;
1101 }
1102 
1103 struct dst_entry *ip6_route_lookup(struct net *net, struct flowi6 *fl6,
1104 				   const struct sk_buff *skb, int flags)
1105 {
1106 	return fib6_rule_lookup(net, fl6, skb, flags, ip6_pol_route_lookup);
1107 }
1108 EXPORT_SYMBOL_GPL(ip6_route_lookup);
1109 
1110 struct rt6_info *rt6_lookup(struct net *net, const struct in6_addr *daddr,
1111 			    const struct in6_addr *saddr, int oif,
1112 			    const struct sk_buff *skb, int strict)
1113 {
1114 	struct flowi6 fl6 = {
1115 		.flowi6_oif = oif,
1116 		.daddr = *daddr,
1117 	};
1118 	struct dst_entry *dst;
1119 	int flags = strict ? RT6_LOOKUP_F_IFACE : 0;
1120 
1121 	if (saddr) {
1122 		memcpy(&fl6.saddr, saddr, sizeof(*saddr));
1123 		flags |= RT6_LOOKUP_F_HAS_SADDR;
1124 	}
1125 
1126 	dst = fib6_rule_lookup(net, &fl6, skb, flags, ip6_pol_route_lookup);
1127 	if (dst->error == 0)
1128 		return (struct rt6_info *) dst;
1129 
1130 	dst_release(dst);
1131 
1132 	return NULL;
1133 }
1134 EXPORT_SYMBOL(rt6_lookup);
1135 
1136 /* ip6_ins_rt is called with FREE table->tb6_lock.
1137  * It takes new route entry, the addition fails by any reason the
1138  * route is released.
1139  * Caller must hold dst before calling it.
1140  */
1141 
1142 static int __ip6_ins_rt(struct fib6_info *rt, struct nl_info *info,
1143 			struct netlink_ext_ack *extack)
1144 {
1145 	int err;
1146 	struct fib6_table *table;
1147 
1148 	table = rt->fib6_table;
1149 	spin_lock_bh(&table->tb6_lock);
1150 	err = fib6_add(&table->tb6_root, rt, info, extack);
1151 	spin_unlock_bh(&table->tb6_lock);
1152 
1153 	return err;
1154 }
1155 
1156 int ip6_ins_rt(struct net *net, struct fib6_info *rt)
1157 {
1158 	struct nl_info info = {	.nl_net = net, };
1159 
1160 	return __ip6_ins_rt(rt, &info, NULL);
1161 }
1162 
1163 static struct rt6_info *ip6_rt_cache_alloc(struct fib6_info *ort,
1164 					   const struct in6_addr *daddr,
1165 					   const struct in6_addr *saddr)
1166 {
1167 	struct net_device *dev;
1168 	struct rt6_info *rt;
1169 
1170 	/*
1171 	 *	Clone the route.
1172 	 */
1173 
1174 	dev = ip6_rt_get_dev_rcu(ort);
1175 	rt = ip6_dst_alloc(dev_net(dev), dev, 0);
1176 	if (!rt)
1177 		return NULL;
1178 
1179 	ip6_rt_copy_init(rt, ort);
1180 	rt->rt6i_flags |= RTF_CACHE;
1181 	rt->dst.flags |= DST_HOST;
1182 	rt->rt6i_dst.addr = *daddr;
1183 	rt->rt6i_dst.plen = 128;
1184 
1185 	if (!rt6_is_gw_or_nonexthop(ort)) {
1186 		if (ort->fib6_dst.plen != 128 &&
1187 		    ipv6_addr_equal(&ort->fib6_dst.addr, daddr))
1188 			rt->rt6i_flags |= RTF_ANYCAST;
1189 #ifdef CONFIG_IPV6_SUBTREES
1190 		if (rt->rt6i_src.plen && saddr) {
1191 			rt->rt6i_src.addr = *saddr;
1192 			rt->rt6i_src.plen = 128;
1193 		}
1194 #endif
1195 	}
1196 
1197 	return rt;
1198 }
1199 
1200 static struct rt6_info *ip6_rt_pcpu_alloc(struct fib6_info *rt)
1201 {
1202 	unsigned short flags = fib6_info_dst_flags(rt);
1203 	struct net_device *dev;
1204 	struct rt6_info *pcpu_rt;
1205 
1206 	rcu_read_lock();
1207 	dev = ip6_rt_get_dev_rcu(rt);
1208 	pcpu_rt = ip6_dst_alloc(dev_net(dev), dev, flags);
1209 	rcu_read_unlock();
1210 	if (!pcpu_rt)
1211 		return NULL;
1212 	ip6_rt_copy_init(pcpu_rt, rt);
1213 	pcpu_rt->rt6i_flags |= RTF_PCPU;
1214 	return pcpu_rt;
1215 }
1216 
1217 /* It should be called with rcu_read_lock() acquired */
1218 static struct rt6_info *rt6_get_pcpu_route(struct fib6_info *rt)
1219 {
1220 	struct rt6_info *pcpu_rt, **p;
1221 
1222 	p = this_cpu_ptr(rt->rt6i_pcpu);
1223 	pcpu_rt = *p;
1224 
1225 	if (pcpu_rt)
1226 		ip6_hold_safe(NULL, &pcpu_rt, false);
1227 
1228 	return pcpu_rt;
1229 }
1230 
1231 static struct rt6_info *rt6_make_pcpu_route(struct net *net,
1232 					    struct fib6_info *rt)
1233 {
1234 	struct rt6_info *pcpu_rt, *prev, **p;
1235 
1236 	pcpu_rt = ip6_rt_pcpu_alloc(rt);
1237 	if (!pcpu_rt) {
1238 		dst_hold(&net->ipv6.ip6_null_entry->dst);
1239 		return net->ipv6.ip6_null_entry;
1240 	}
1241 
1242 	dst_hold(&pcpu_rt->dst);
1243 	p = this_cpu_ptr(rt->rt6i_pcpu);
1244 	prev = cmpxchg(p, NULL, pcpu_rt);
1245 	BUG_ON(prev);
1246 
1247 	return pcpu_rt;
1248 }
1249 
1250 /* exception hash table implementation
1251  */
1252 static DEFINE_SPINLOCK(rt6_exception_lock);
1253 
1254 /* Remove rt6_ex from hash table and free the memory
1255  * Caller must hold rt6_exception_lock
1256  */
1257 static void rt6_remove_exception(struct rt6_exception_bucket *bucket,
1258 				 struct rt6_exception *rt6_ex)
1259 {
1260 	struct net *net;
1261 
1262 	if (!bucket || !rt6_ex)
1263 		return;
1264 
1265 	net = dev_net(rt6_ex->rt6i->dst.dev);
1266 	hlist_del_rcu(&rt6_ex->hlist);
1267 	dst_release(&rt6_ex->rt6i->dst);
1268 	kfree_rcu(rt6_ex, rcu);
1269 	WARN_ON_ONCE(!bucket->depth);
1270 	bucket->depth--;
1271 	net->ipv6.rt6_stats->fib_rt_cache--;
1272 }
1273 
1274 /* Remove oldest rt6_ex in bucket and free the memory
1275  * Caller must hold rt6_exception_lock
1276  */
1277 static void rt6_exception_remove_oldest(struct rt6_exception_bucket *bucket)
1278 {
1279 	struct rt6_exception *rt6_ex, *oldest = NULL;
1280 
1281 	if (!bucket)
1282 		return;
1283 
1284 	hlist_for_each_entry(rt6_ex, &bucket->chain, hlist) {
1285 		if (!oldest || time_before(rt6_ex->stamp, oldest->stamp))
1286 			oldest = rt6_ex;
1287 	}
1288 	rt6_remove_exception(bucket, oldest);
1289 }
1290 
1291 static u32 rt6_exception_hash(const struct in6_addr *dst,
1292 			      const struct in6_addr *src)
1293 {
1294 	static u32 seed __read_mostly;
1295 	u32 val;
1296 
1297 	net_get_random_once(&seed, sizeof(seed));
1298 	val = jhash(dst, sizeof(*dst), seed);
1299 
1300 #ifdef CONFIG_IPV6_SUBTREES
1301 	if (src)
1302 		val = jhash(src, sizeof(*src), val);
1303 #endif
1304 	return hash_32(val, FIB6_EXCEPTION_BUCKET_SIZE_SHIFT);
1305 }
1306 
1307 /* Helper function to find the cached rt in the hash table
1308  * and update bucket pointer to point to the bucket for this
1309  * (daddr, saddr) pair
1310  * Caller must hold rt6_exception_lock
1311  */
1312 static struct rt6_exception *
1313 __rt6_find_exception_spinlock(struct rt6_exception_bucket **bucket,
1314 			      const struct in6_addr *daddr,
1315 			      const struct in6_addr *saddr)
1316 {
1317 	struct rt6_exception *rt6_ex;
1318 	u32 hval;
1319 
1320 	if (!(*bucket) || !daddr)
1321 		return NULL;
1322 
1323 	hval = rt6_exception_hash(daddr, saddr);
1324 	*bucket += hval;
1325 
1326 	hlist_for_each_entry(rt6_ex, &(*bucket)->chain, hlist) {
1327 		struct rt6_info *rt6 = rt6_ex->rt6i;
1328 		bool matched = ipv6_addr_equal(daddr, &rt6->rt6i_dst.addr);
1329 
1330 #ifdef CONFIG_IPV6_SUBTREES
1331 		if (matched && saddr)
1332 			matched = ipv6_addr_equal(saddr, &rt6->rt6i_src.addr);
1333 #endif
1334 		if (matched)
1335 			return rt6_ex;
1336 	}
1337 	return NULL;
1338 }
1339 
1340 /* Helper function to find the cached rt in the hash table
1341  * and update bucket pointer to point to the bucket for this
1342  * (daddr, saddr) pair
1343  * Caller must hold rcu_read_lock()
1344  */
1345 static struct rt6_exception *
1346 __rt6_find_exception_rcu(struct rt6_exception_bucket **bucket,
1347 			 const struct in6_addr *daddr,
1348 			 const struct in6_addr *saddr)
1349 {
1350 	struct rt6_exception *rt6_ex;
1351 	u32 hval;
1352 
1353 	WARN_ON_ONCE(!rcu_read_lock_held());
1354 
1355 	if (!(*bucket) || !daddr)
1356 		return NULL;
1357 
1358 	hval = rt6_exception_hash(daddr, saddr);
1359 	*bucket += hval;
1360 
1361 	hlist_for_each_entry_rcu(rt6_ex, &(*bucket)->chain, hlist) {
1362 		struct rt6_info *rt6 = rt6_ex->rt6i;
1363 		bool matched = ipv6_addr_equal(daddr, &rt6->rt6i_dst.addr);
1364 
1365 #ifdef CONFIG_IPV6_SUBTREES
1366 		if (matched && saddr)
1367 			matched = ipv6_addr_equal(saddr, &rt6->rt6i_src.addr);
1368 #endif
1369 		if (matched)
1370 			return rt6_ex;
1371 	}
1372 	return NULL;
1373 }
1374 
1375 static unsigned int fib6_mtu(const struct fib6_info *rt)
1376 {
1377 	unsigned int mtu;
1378 
1379 	if (rt->fib6_pmtu) {
1380 		mtu = rt->fib6_pmtu;
1381 	} else {
1382 		struct net_device *dev = fib6_info_nh_dev(rt);
1383 		struct inet6_dev *idev;
1384 
1385 		rcu_read_lock();
1386 		idev = __in6_dev_get(dev);
1387 		mtu = idev->cnf.mtu6;
1388 		rcu_read_unlock();
1389 	}
1390 
1391 	mtu = min_t(unsigned int, mtu, IP6_MAX_MTU);
1392 
1393 	return mtu - lwtunnel_headroom(rt->fib6_nh.nh_lwtstate, mtu);
1394 }
1395 
1396 static int rt6_insert_exception(struct rt6_info *nrt,
1397 				struct fib6_info *ort)
1398 {
1399 	struct net *net = dev_net(nrt->dst.dev);
1400 	struct rt6_exception_bucket *bucket;
1401 	struct in6_addr *src_key = NULL;
1402 	struct rt6_exception *rt6_ex;
1403 	int err = 0;
1404 
1405 	spin_lock_bh(&rt6_exception_lock);
1406 
1407 	if (ort->exception_bucket_flushed) {
1408 		err = -EINVAL;
1409 		goto out;
1410 	}
1411 
1412 	bucket = rcu_dereference_protected(ort->rt6i_exception_bucket,
1413 					lockdep_is_held(&rt6_exception_lock));
1414 	if (!bucket) {
1415 		bucket = kcalloc(FIB6_EXCEPTION_BUCKET_SIZE, sizeof(*bucket),
1416 				 GFP_ATOMIC);
1417 		if (!bucket) {
1418 			err = -ENOMEM;
1419 			goto out;
1420 		}
1421 		rcu_assign_pointer(ort->rt6i_exception_bucket, bucket);
1422 	}
1423 
1424 #ifdef CONFIG_IPV6_SUBTREES
1425 	/* rt6i_src.plen != 0 indicates ort is in subtree
1426 	 * and exception table is indexed by a hash of
1427 	 * both rt6i_dst and rt6i_src.
1428 	 * Otherwise, the exception table is indexed by
1429 	 * a hash of only rt6i_dst.
1430 	 */
1431 	if (ort->fib6_src.plen)
1432 		src_key = &nrt->rt6i_src.addr;
1433 #endif
1434 
1435 	/* Update rt6i_prefsrc as it could be changed
1436 	 * in rt6_remove_prefsrc()
1437 	 */
1438 	nrt->rt6i_prefsrc = ort->fib6_prefsrc;
1439 	/* rt6_mtu_change() might lower mtu on ort.
1440 	 * Only insert this exception route if its mtu
1441 	 * is less than ort's mtu value.
1442 	 */
1443 	if (dst_metric_raw(&nrt->dst, RTAX_MTU) >= fib6_mtu(ort)) {
1444 		err = -EINVAL;
1445 		goto out;
1446 	}
1447 
1448 	rt6_ex = __rt6_find_exception_spinlock(&bucket, &nrt->rt6i_dst.addr,
1449 					       src_key);
1450 	if (rt6_ex)
1451 		rt6_remove_exception(bucket, rt6_ex);
1452 
1453 	rt6_ex = kzalloc(sizeof(*rt6_ex), GFP_ATOMIC);
1454 	if (!rt6_ex) {
1455 		err = -ENOMEM;
1456 		goto out;
1457 	}
1458 	rt6_ex->rt6i = nrt;
1459 	rt6_ex->stamp = jiffies;
1460 	hlist_add_head_rcu(&rt6_ex->hlist, &bucket->chain);
1461 	bucket->depth++;
1462 	net->ipv6.rt6_stats->fib_rt_cache++;
1463 
1464 	if (bucket->depth > FIB6_MAX_DEPTH)
1465 		rt6_exception_remove_oldest(bucket);
1466 
1467 out:
1468 	spin_unlock_bh(&rt6_exception_lock);
1469 
1470 	/* Update fn->fn_sernum to invalidate all cached dst */
1471 	if (!err) {
1472 		spin_lock_bh(&ort->fib6_table->tb6_lock);
1473 		fib6_update_sernum(net, ort);
1474 		spin_unlock_bh(&ort->fib6_table->tb6_lock);
1475 		fib6_force_start_gc(net);
1476 	}
1477 
1478 	return err;
1479 }
1480 
1481 void rt6_flush_exceptions(struct fib6_info *rt)
1482 {
1483 	struct rt6_exception_bucket *bucket;
1484 	struct rt6_exception *rt6_ex;
1485 	struct hlist_node *tmp;
1486 	int i;
1487 
1488 	spin_lock_bh(&rt6_exception_lock);
1489 	/* Prevent rt6_insert_exception() to recreate the bucket list */
1490 	rt->exception_bucket_flushed = 1;
1491 
1492 	bucket = rcu_dereference_protected(rt->rt6i_exception_bucket,
1493 				    lockdep_is_held(&rt6_exception_lock));
1494 	if (!bucket)
1495 		goto out;
1496 
1497 	for (i = 0; i < FIB6_EXCEPTION_BUCKET_SIZE; i++) {
1498 		hlist_for_each_entry_safe(rt6_ex, tmp, &bucket->chain, hlist)
1499 			rt6_remove_exception(bucket, rt6_ex);
1500 		WARN_ON_ONCE(bucket->depth);
1501 		bucket++;
1502 	}
1503 
1504 out:
1505 	spin_unlock_bh(&rt6_exception_lock);
1506 }
1507 
1508 /* Find cached rt in the hash table inside passed in rt
1509  * Caller has to hold rcu_read_lock()
1510  */
1511 static struct rt6_info *rt6_find_cached_rt(struct fib6_info *rt,
1512 					   struct in6_addr *daddr,
1513 					   struct in6_addr *saddr)
1514 {
1515 	struct rt6_exception_bucket *bucket;
1516 	struct in6_addr *src_key = NULL;
1517 	struct rt6_exception *rt6_ex;
1518 	struct rt6_info *res = NULL;
1519 
1520 	bucket = rcu_dereference(rt->rt6i_exception_bucket);
1521 
1522 #ifdef CONFIG_IPV6_SUBTREES
1523 	/* rt6i_src.plen != 0 indicates rt is in subtree
1524 	 * and exception table is indexed by a hash of
1525 	 * both rt6i_dst and rt6i_src.
1526 	 * Otherwise, the exception table is indexed by
1527 	 * a hash of only rt6i_dst.
1528 	 */
1529 	if (rt->fib6_src.plen)
1530 		src_key = saddr;
1531 #endif
1532 	rt6_ex = __rt6_find_exception_rcu(&bucket, daddr, src_key);
1533 
1534 	if (rt6_ex && !rt6_check_expired(rt6_ex->rt6i))
1535 		res = rt6_ex->rt6i;
1536 
1537 	return res;
1538 }
1539 
1540 /* Remove the passed in cached rt from the hash table that contains it */
1541 static int rt6_remove_exception_rt(struct rt6_info *rt)
1542 {
1543 	struct rt6_exception_bucket *bucket;
1544 	struct in6_addr *src_key = NULL;
1545 	struct rt6_exception *rt6_ex;
1546 	struct fib6_info *from;
1547 	int err;
1548 
1549 	from = rcu_dereference(rt->from);
1550 	if (!from ||
1551 	    !(rt->rt6i_flags & RTF_CACHE))
1552 		return -EINVAL;
1553 
1554 	if (!rcu_access_pointer(from->rt6i_exception_bucket))
1555 		return -ENOENT;
1556 
1557 	spin_lock_bh(&rt6_exception_lock);
1558 	bucket = rcu_dereference_protected(from->rt6i_exception_bucket,
1559 				    lockdep_is_held(&rt6_exception_lock));
1560 #ifdef CONFIG_IPV6_SUBTREES
1561 	/* rt6i_src.plen != 0 indicates 'from' is in subtree
1562 	 * and exception table is indexed by a hash of
1563 	 * both rt6i_dst and rt6i_src.
1564 	 * Otherwise, the exception table is indexed by
1565 	 * a hash of only rt6i_dst.
1566 	 */
1567 	if (from->fib6_src.plen)
1568 		src_key = &rt->rt6i_src.addr;
1569 #endif
1570 	rt6_ex = __rt6_find_exception_spinlock(&bucket,
1571 					       &rt->rt6i_dst.addr,
1572 					       src_key);
1573 	if (rt6_ex) {
1574 		rt6_remove_exception(bucket, rt6_ex);
1575 		err = 0;
1576 	} else {
1577 		err = -ENOENT;
1578 	}
1579 
1580 	spin_unlock_bh(&rt6_exception_lock);
1581 	return err;
1582 }
1583 
1584 /* Find rt6_ex which contains the passed in rt cache and
1585  * refresh its stamp
1586  */
1587 static void rt6_update_exception_stamp_rt(struct rt6_info *rt)
1588 {
1589 	struct rt6_exception_bucket *bucket;
1590 	struct fib6_info *from = rt->from;
1591 	struct in6_addr *src_key = NULL;
1592 	struct rt6_exception *rt6_ex;
1593 
1594 	if (!from ||
1595 	    !(rt->rt6i_flags & RTF_CACHE))
1596 		return;
1597 
1598 	rcu_read_lock();
1599 	bucket = rcu_dereference(from->rt6i_exception_bucket);
1600 
1601 #ifdef CONFIG_IPV6_SUBTREES
1602 	/* rt6i_src.plen != 0 indicates 'from' is in subtree
1603 	 * and exception table is indexed by a hash of
1604 	 * both rt6i_dst and rt6i_src.
1605 	 * Otherwise, the exception table is indexed by
1606 	 * a hash of only rt6i_dst.
1607 	 */
1608 	if (from->fib6_src.plen)
1609 		src_key = &rt->rt6i_src.addr;
1610 #endif
1611 	rt6_ex = __rt6_find_exception_rcu(&bucket,
1612 					  &rt->rt6i_dst.addr,
1613 					  src_key);
1614 	if (rt6_ex)
1615 		rt6_ex->stamp = jiffies;
1616 
1617 	rcu_read_unlock();
1618 }
1619 
1620 static void rt6_exceptions_remove_prefsrc(struct fib6_info *rt)
1621 {
1622 	struct rt6_exception_bucket *bucket;
1623 	struct rt6_exception *rt6_ex;
1624 	int i;
1625 
1626 	bucket = rcu_dereference_protected(rt->rt6i_exception_bucket,
1627 					lockdep_is_held(&rt6_exception_lock));
1628 
1629 	if (bucket) {
1630 		for (i = 0; i < FIB6_EXCEPTION_BUCKET_SIZE; i++) {
1631 			hlist_for_each_entry(rt6_ex, &bucket->chain, hlist) {
1632 				rt6_ex->rt6i->rt6i_prefsrc.plen = 0;
1633 			}
1634 			bucket++;
1635 		}
1636 	}
1637 }
1638 
1639 static bool rt6_mtu_change_route_allowed(struct inet6_dev *idev,
1640 					 struct rt6_info *rt, int mtu)
1641 {
1642 	/* If the new MTU is lower than the route PMTU, this new MTU will be the
1643 	 * lowest MTU in the path: always allow updating the route PMTU to
1644 	 * reflect PMTU decreases.
1645 	 *
1646 	 * If the new MTU is higher, and the route PMTU is equal to the local
1647 	 * MTU, this means the old MTU is the lowest in the path, so allow
1648 	 * updating it: if other nodes now have lower MTUs, PMTU discovery will
1649 	 * handle this.
1650 	 */
1651 
1652 	if (dst_mtu(&rt->dst) >= mtu)
1653 		return true;
1654 
1655 	if (dst_mtu(&rt->dst) == idev->cnf.mtu6)
1656 		return true;
1657 
1658 	return false;
1659 }
1660 
1661 static void rt6_exceptions_update_pmtu(struct inet6_dev *idev,
1662 				       struct fib6_info *rt, int mtu)
1663 {
1664 	struct rt6_exception_bucket *bucket;
1665 	struct rt6_exception *rt6_ex;
1666 	int i;
1667 
1668 	bucket = rcu_dereference_protected(rt->rt6i_exception_bucket,
1669 					lockdep_is_held(&rt6_exception_lock));
1670 
1671 	if (!bucket)
1672 		return;
1673 
1674 	for (i = 0; i < FIB6_EXCEPTION_BUCKET_SIZE; i++) {
1675 		hlist_for_each_entry(rt6_ex, &bucket->chain, hlist) {
1676 			struct rt6_info *entry = rt6_ex->rt6i;
1677 
1678 			/* For RTF_CACHE with rt6i_pmtu == 0 (i.e. a redirected
1679 			 * route), the metrics of its rt->from have already
1680 			 * been updated.
1681 			 */
1682 			if (dst_metric_raw(&entry->dst, RTAX_MTU) &&
1683 			    rt6_mtu_change_route_allowed(idev, entry, mtu))
1684 				dst_metric_set(&entry->dst, RTAX_MTU, mtu);
1685 		}
1686 		bucket++;
1687 	}
1688 }
1689 
1690 #define RTF_CACHE_GATEWAY	(RTF_GATEWAY | RTF_CACHE)
1691 
1692 static void rt6_exceptions_clean_tohost(struct fib6_info *rt,
1693 					struct in6_addr *gateway)
1694 {
1695 	struct rt6_exception_bucket *bucket;
1696 	struct rt6_exception *rt6_ex;
1697 	struct hlist_node *tmp;
1698 	int i;
1699 
1700 	if (!rcu_access_pointer(rt->rt6i_exception_bucket))
1701 		return;
1702 
1703 	spin_lock_bh(&rt6_exception_lock);
1704 	bucket = rcu_dereference_protected(rt->rt6i_exception_bucket,
1705 				     lockdep_is_held(&rt6_exception_lock));
1706 
1707 	if (bucket) {
1708 		for (i = 0; i < FIB6_EXCEPTION_BUCKET_SIZE; i++) {
1709 			hlist_for_each_entry_safe(rt6_ex, tmp,
1710 						  &bucket->chain, hlist) {
1711 				struct rt6_info *entry = rt6_ex->rt6i;
1712 
1713 				if ((entry->rt6i_flags & RTF_CACHE_GATEWAY) ==
1714 				    RTF_CACHE_GATEWAY &&
1715 				    ipv6_addr_equal(gateway,
1716 						    &entry->rt6i_gateway)) {
1717 					rt6_remove_exception(bucket, rt6_ex);
1718 				}
1719 			}
1720 			bucket++;
1721 		}
1722 	}
1723 
1724 	spin_unlock_bh(&rt6_exception_lock);
1725 }
1726 
1727 static void rt6_age_examine_exception(struct rt6_exception_bucket *bucket,
1728 				      struct rt6_exception *rt6_ex,
1729 				      struct fib6_gc_args *gc_args,
1730 				      unsigned long now)
1731 {
1732 	struct rt6_info *rt = rt6_ex->rt6i;
1733 
1734 	/* we are pruning and obsoleting aged-out and non gateway exceptions
1735 	 * even if others have still references to them, so that on next
1736 	 * dst_check() such references can be dropped.
1737 	 * EXPIRES exceptions - e.g. pmtu-generated ones are pruned when
1738 	 * expired, independently from their aging, as per RFC 8201 section 4
1739 	 */
1740 	if (!(rt->rt6i_flags & RTF_EXPIRES)) {
1741 		if (time_after_eq(now, rt->dst.lastuse + gc_args->timeout)) {
1742 			RT6_TRACE("aging clone %p\n", rt);
1743 			rt6_remove_exception(bucket, rt6_ex);
1744 			return;
1745 		}
1746 	} else if (time_after(jiffies, rt->dst.expires)) {
1747 		RT6_TRACE("purging expired route %p\n", rt);
1748 		rt6_remove_exception(bucket, rt6_ex);
1749 		return;
1750 	}
1751 
1752 	if (rt->rt6i_flags & RTF_GATEWAY) {
1753 		struct neighbour *neigh;
1754 		__u8 neigh_flags = 0;
1755 
1756 		neigh = __ipv6_neigh_lookup_noref(rt->dst.dev, &rt->rt6i_gateway);
1757 		if (neigh)
1758 			neigh_flags = neigh->flags;
1759 
1760 		if (!(neigh_flags & NTF_ROUTER)) {
1761 			RT6_TRACE("purging route %p via non-router but gateway\n",
1762 				  rt);
1763 			rt6_remove_exception(bucket, rt6_ex);
1764 			return;
1765 		}
1766 	}
1767 
1768 	gc_args->more++;
1769 }
1770 
1771 void rt6_age_exceptions(struct fib6_info *rt,
1772 			struct fib6_gc_args *gc_args,
1773 			unsigned long now)
1774 {
1775 	struct rt6_exception_bucket *bucket;
1776 	struct rt6_exception *rt6_ex;
1777 	struct hlist_node *tmp;
1778 	int i;
1779 
1780 	if (!rcu_access_pointer(rt->rt6i_exception_bucket))
1781 		return;
1782 
1783 	rcu_read_lock_bh();
1784 	spin_lock(&rt6_exception_lock);
1785 	bucket = rcu_dereference_protected(rt->rt6i_exception_bucket,
1786 				    lockdep_is_held(&rt6_exception_lock));
1787 
1788 	if (bucket) {
1789 		for (i = 0; i < FIB6_EXCEPTION_BUCKET_SIZE; i++) {
1790 			hlist_for_each_entry_safe(rt6_ex, tmp,
1791 						  &bucket->chain, hlist) {
1792 				rt6_age_examine_exception(bucket, rt6_ex,
1793 							  gc_args, now);
1794 			}
1795 			bucket++;
1796 		}
1797 	}
1798 	spin_unlock(&rt6_exception_lock);
1799 	rcu_read_unlock_bh();
1800 }
1801 
1802 struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table,
1803 			       int oif, struct flowi6 *fl6,
1804 			       const struct sk_buff *skb, int flags)
1805 {
1806 	struct fib6_node *fn, *saved_fn;
1807 	struct fib6_info *f6i;
1808 	struct rt6_info *rt;
1809 	int strict = 0;
1810 
1811 	strict |= flags & RT6_LOOKUP_F_IFACE;
1812 	strict |= flags & RT6_LOOKUP_F_IGNORE_LINKSTATE;
1813 	if (net->ipv6.devconf_all->forwarding == 0)
1814 		strict |= RT6_LOOKUP_F_REACHABLE;
1815 
1816 	rcu_read_lock();
1817 
1818 	fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
1819 	saved_fn = fn;
1820 
1821 	if (fl6->flowi6_flags & FLOWI_FLAG_SKIP_NH_OIF)
1822 		oif = 0;
1823 
1824 redo_rt6_select:
1825 	f6i = rt6_select(net, fn, oif, strict);
1826 	if (f6i->fib6_nsiblings)
1827 		f6i = rt6_multipath_select(net, f6i, fl6, oif, skb, strict);
1828 	if (f6i == net->ipv6.fib6_null_entry) {
1829 		fn = fib6_backtrack(fn, &fl6->saddr);
1830 		if (fn)
1831 			goto redo_rt6_select;
1832 		else if (strict & RT6_LOOKUP_F_REACHABLE) {
1833 			/* also consider unreachable route */
1834 			strict &= ~RT6_LOOKUP_F_REACHABLE;
1835 			fn = saved_fn;
1836 			goto redo_rt6_select;
1837 		}
1838 	}
1839 
1840 	if (f6i == net->ipv6.fib6_null_entry) {
1841 		rt = net->ipv6.ip6_null_entry;
1842 		rcu_read_unlock();
1843 		dst_hold(&rt->dst);
1844 		trace_fib6_table_lookup(net, rt, table, fl6);
1845 		return rt;
1846 	}
1847 
1848 	/*Search through exception table */
1849 	rt = rt6_find_cached_rt(f6i, &fl6->daddr, &fl6->saddr);
1850 	if (rt) {
1851 		if (ip6_hold_safe(net, &rt, true))
1852 			dst_use_noref(&rt->dst, jiffies);
1853 
1854 		rcu_read_unlock();
1855 		trace_fib6_table_lookup(net, rt, table, fl6);
1856 		return rt;
1857 	} else if (unlikely((fl6->flowi6_flags & FLOWI_FLAG_KNOWN_NH) &&
1858 			    !(f6i->fib6_flags & RTF_GATEWAY))) {
1859 		/* Create a RTF_CACHE clone which will not be
1860 		 * owned by the fib6 tree.  It is for the special case where
1861 		 * the daddr in the skb during the neighbor look-up is different
1862 		 * from the fl6->daddr used to look-up route here.
1863 		 */
1864 		struct rt6_info *uncached_rt;
1865 
1866 		uncached_rt = ip6_rt_cache_alloc(f6i, &fl6->daddr, NULL);
1867 
1868 		rcu_read_unlock();
1869 
1870 		if (uncached_rt) {
1871 			/* Uncached_rt's refcnt is taken during ip6_rt_cache_alloc()
1872 			 * No need for another dst_hold()
1873 			 */
1874 			rt6_uncached_list_add(uncached_rt);
1875 			atomic_inc(&net->ipv6.rt6_stats->fib_rt_uncache);
1876 		} else {
1877 			uncached_rt = net->ipv6.ip6_null_entry;
1878 			dst_hold(&uncached_rt->dst);
1879 		}
1880 
1881 		trace_fib6_table_lookup(net, uncached_rt, table, fl6);
1882 		return uncached_rt;
1883 
1884 	} else {
1885 		/* Get a percpu copy */
1886 
1887 		struct rt6_info *pcpu_rt;
1888 
1889 		local_bh_disable();
1890 		pcpu_rt = rt6_get_pcpu_route(f6i);
1891 
1892 		if (!pcpu_rt)
1893 			pcpu_rt = rt6_make_pcpu_route(net, f6i);
1894 
1895 		local_bh_enable();
1896 		rcu_read_unlock();
1897 		trace_fib6_table_lookup(net, pcpu_rt, table, fl6);
1898 		return pcpu_rt;
1899 	}
1900 }
1901 EXPORT_SYMBOL_GPL(ip6_pol_route);
1902 
1903 static struct rt6_info *ip6_pol_route_input(struct net *net,
1904 					    struct fib6_table *table,
1905 					    struct flowi6 *fl6,
1906 					    const struct sk_buff *skb,
1907 					    int flags)
1908 {
1909 	return ip6_pol_route(net, table, fl6->flowi6_iif, fl6, skb, flags);
1910 }
1911 
1912 struct dst_entry *ip6_route_input_lookup(struct net *net,
1913 					 struct net_device *dev,
1914 					 struct flowi6 *fl6,
1915 					 const struct sk_buff *skb,
1916 					 int flags)
1917 {
1918 	if (rt6_need_strict(&fl6->daddr) && dev->type != ARPHRD_PIMREG)
1919 		flags |= RT6_LOOKUP_F_IFACE;
1920 
1921 	return fib6_rule_lookup(net, fl6, skb, flags, ip6_pol_route_input);
1922 }
1923 EXPORT_SYMBOL_GPL(ip6_route_input_lookup);
1924 
1925 static void ip6_multipath_l3_keys(const struct sk_buff *skb,
1926 				  struct flow_keys *keys,
1927 				  struct flow_keys *flkeys)
1928 {
1929 	const struct ipv6hdr *outer_iph = ipv6_hdr(skb);
1930 	const struct ipv6hdr *key_iph = outer_iph;
1931 	struct flow_keys *_flkeys = flkeys;
1932 	const struct ipv6hdr *inner_iph;
1933 	const struct icmp6hdr *icmph;
1934 	struct ipv6hdr _inner_iph;
1935 
1936 	if (likely(outer_iph->nexthdr != IPPROTO_ICMPV6))
1937 		goto out;
1938 
1939 	icmph = icmp6_hdr(skb);
1940 	if (icmph->icmp6_type != ICMPV6_DEST_UNREACH &&
1941 	    icmph->icmp6_type != ICMPV6_PKT_TOOBIG &&
1942 	    icmph->icmp6_type != ICMPV6_TIME_EXCEED &&
1943 	    icmph->icmp6_type != ICMPV6_PARAMPROB)
1944 		goto out;
1945 
1946 	inner_iph = skb_header_pointer(skb,
1947 				       skb_transport_offset(skb) + sizeof(*icmph),
1948 				       sizeof(_inner_iph), &_inner_iph);
1949 	if (!inner_iph)
1950 		goto out;
1951 
1952 	key_iph = inner_iph;
1953 	_flkeys = NULL;
1954 out:
1955 	if (_flkeys) {
1956 		keys->addrs.v6addrs.src = _flkeys->addrs.v6addrs.src;
1957 		keys->addrs.v6addrs.dst = _flkeys->addrs.v6addrs.dst;
1958 		keys->tags.flow_label = _flkeys->tags.flow_label;
1959 		keys->basic.ip_proto = _flkeys->basic.ip_proto;
1960 	} else {
1961 		keys->addrs.v6addrs.src = key_iph->saddr;
1962 		keys->addrs.v6addrs.dst = key_iph->daddr;
1963 		keys->tags.flow_label = ip6_flowinfo(key_iph);
1964 		keys->basic.ip_proto = key_iph->nexthdr;
1965 	}
1966 }
1967 
1968 /* if skb is set it will be used and fl6 can be NULL */
1969 u32 rt6_multipath_hash(const struct net *net, const struct flowi6 *fl6,
1970 		       const struct sk_buff *skb, struct flow_keys *flkeys)
1971 {
1972 	struct flow_keys hash_keys;
1973 	u32 mhash;
1974 
1975 	switch (ip6_multipath_hash_policy(net)) {
1976 	case 0:
1977 		memset(&hash_keys, 0, sizeof(hash_keys));
1978 		hash_keys.control.addr_type = FLOW_DISSECTOR_KEY_IPV6_ADDRS;
1979 		if (skb) {
1980 			ip6_multipath_l3_keys(skb, &hash_keys, flkeys);
1981 		} else {
1982 			hash_keys.addrs.v6addrs.src = fl6->saddr;
1983 			hash_keys.addrs.v6addrs.dst = fl6->daddr;
1984 			hash_keys.tags.flow_label = (__force u32)fl6->flowlabel;
1985 			hash_keys.basic.ip_proto = fl6->flowi6_proto;
1986 		}
1987 		break;
1988 	case 1:
1989 		if (skb) {
1990 			unsigned int flag = FLOW_DISSECTOR_F_STOP_AT_ENCAP;
1991 			struct flow_keys keys;
1992 
1993 			/* short-circuit if we already have L4 hash present */
1994 			if (skb->l4_hash)
1995 				return skb_get_hash_raw(skb) >> 1;
1996 
1997 			memset(&hash_keys, 0, sizeof(hash_keys));
1998 
1999                         if (!flkeys) {
2000 				skb_flow_dissect_flow_keys(skb, &keys, flag);
2001 				flkeys = &keys;
2002 			}
2003 			hash_keys.control.addr_type = FLOW_DISSECTOR_KEY_IPV6_ADDRS;
2004 			hash_keys.addrs.v6addrs.src = flkeys->addrs.v6addrs.src;
2005 			hash_keys.addrs.v6addrs.dst = flkeys->addrs.v6addrs.dst;
2006 			hash_keys.ports.src = flkeys->ports.src;
2007 			hash_keys.ports.dst = flkeys->ports.dst;
2008 			hash_keys.basic.ip_proto = flkeys->basic.ip_proto;
2009 		} else {
2010 			memset(&hash_keys, 0, sizeof(hash_keys));
2011 			hash_keys.control.addr_type = FLOW_DISSECTOR_KEY_IPV6_ADDRS;
2012 			hash_keys.addrs.v6addrs.src = fl6->saddr;
2013 			hash_keys.addrs.v6addrs.dst = fl6->daddr;
2014 			hash_keys.ports.src = fl6->fl6_sport;
2015 			hash_keys.ports.dst = fl6->fl6_dport;
2016 			hash_keys.basic.ip_proto = fl6->flowi6_proto;
2017 		}
2018 		break;
2019 	}
2020 	mhash = flow_hash_from_keys(&hash_keys);
2021 
2022 	return mhash >> 1;
2023 }
2024 
2025 void ip6_route_input(struct sk_buff *skb)
2026 {
2027 	const struct ipv6hdr *iph = ipv6_hdr(skb);
2028 	struct net *net = dev_net(skb->dev);
2029 	int flags = RT6_LOOKUP_F_HAS_SADDR;
2030 	struct ip_tunnel_info *tun_info;
2031 	struct flowi6 fl6 = {
2032 		.flowi6_iif = skb->dev->ifindex,
2033 		.daddr = iph->daddr,
2034 		.saddr = iph->saddr,
2035 		.flowlabel = ip6_flowinfo(iph),
2036 		.flowi6_mark = skb->mark,
2037 		.flowi6_proto = iph->nexthdr,
2038 	};
2039 	struct flow_keys *flkeys = NULL, _flkeys;
2040 
2041 	tun_info = skb_tunnel_info(skb);
2042 	if (tun_info && !(tun_info->mode & IP_TUNNEL_INFO_TX))
2043 		fl6.flowi6_tun_key.tun_id = tun_info->key.tun_id;
2044 
2045 	if (fib6_rules_early_flow_dissect(net, skb, &fl6, &_flkeys))
2046 		flkeys = &_flkeys;
2047 
2048 	if (unlikely(fl6.flowi6_proto == IPPROTO_ICMPV6))
2049 		fl6.mp_hash = rt6_multipath_hash(net, &fl6, skb, flkeys);
2050 	skb_dst_drop(skb);
2051 	skb_dst_set(skb,
2052 		    ip6_route_input_lookup(net, skb->dev, &fl6, skb, flags));
2053 }
2054 
2055 static struct rt6_info *ip6_pol_route_output(struct net *net,
2056 					     struct fib6_table *table,
2057 					     struct flowi6 *fl6,
2058 					     const struct sk_buff *skb,
2059 					     int flags)
2060 {
2061 	return ip6_pol_route(net, table, fl6->flowi6_oif, fl6, skb, flags);
2062 }
2063 
2064 struct dst_entry *ip6_route_output_flags(struct net *net, const struct sock *sk,
2065 					 struct flowi6 *fl6, int flags)
2066 {
2067 	bool any_src;
2068 
2069 	if (rt6_need_strict(&fl6->daddr)) {
2070 		struct dst_entry *dst;
2071 
2072 		dst = l3mdev_link_scope_lookup(net, fl6);
2073 		if (dst)
2074 			return dst;
2075 	}
2076 
2077 	fl6->flowi6_iif = LOOPBACK_IFINDEX;
2078 
2079 	any_src = ipv6_addr_any(&fl6->saddr);
2080 	if ((sk && sk->sk_bound_dev_if) || rt6_need_strict(&fl6->daddr) ||
2081 	    (fl6->flowi6_oif && any_src))
2082 		flags |= RT6_LOOKUP_F_IFACE;
2083 
2084 	if (!any_src)
2085 		flags |= RT6_LOOKUP_F_HAS_SADDR;
2086 	else if (sk)
2087 		flags |= rt6_srcprefs2flags(inet6_sk(sk)->srcprefs);
2088 
2089 	return fib6_rule_lookup(net, fl6, NULL, flags, ip6_pol_route_output);
2090 }
2091 EXPORT_SYMBOL_GPL(ip6_route_output_flags);
2092 
2093 struct dst_entry *ip6_blackhole_route(struct net *net, struct dst_entry *dst_orig)
2094 {
2095 	struct rt6_info *rt, *ort = (struct rt6_info *) dst_orig;
2096 	struct net_device *loopback_dev = net->loopback_dev;
2097 	struct dst_entry *new = NULL;
2098 
2099 	rt = dst_alloc(&ip6_dst_blackhole_ops, loopback_dev, 1,
2100 		       DST_OBSOLETE_DEAD, 0);
2101 	if (rt) {
2102 		rt6_info_init(rt);
2103 		atomic_inc(&net->ipv6.rt6_stats->fib_rt_alloc);
2104 
2105 		new = &rt->dst;
2106 		new->__use = 1;
2107 		new->input = dst_discard;
2108 		new->output = dst_discard_out;
2109 
2110 		dst_copy_metrics(new, &ort->dst);
2111 
2112 		rt->rt6i_idev = in6_dev_get(loopback_dev);
2113 		rt->rt6i_gateway = ort->rt6i_gateway;
2114 		rt->rt6i_flags = ort->rt6i_flags & ~RTF_PCPU;
2115 
2116 		memcpy(&rt->rt6i_dst, &ort->rt6i_dst, sizeof(struct rt6key));
2117 #ifdef CONFIG_IPV6_SUBTREES
2118 		memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
2119 #endif
2120 	}
2121 
2122 	dst_release(dst_orig);
2123 	return new ? new : ERR_PTR(-ENOMEM);
2124 }
2125 
2126 /*
2127  *	Destination cache support functions
2128  */
2129 
2130 static bool fib6_check(struct fib6_info *f6i, u32 cookie)
2131 {
2132 	u32 rt_cookie = 0;
2133 
2134 	if (!fib6_get_cookie_safe(f6i, &rt_cookie) || rt_cookie != cookie)
2135 		return false;
2136 
2137 	if (fib6_check_expired(f6i))
2138 		return false;
2139 
2140 	return true;
2141 }
2142 
2143 static struct dst_entry *rt6_check(struct rt6_info *rt,
2144 				   struct fib6_info *from,
2145 				   u32 cookie)
2146 {
2147 	u32 rt_cookie = 0;
2148 
2149 	if ((from && !fib6_get_cookie_safe(from, &rt_cookie)) ||
2150 	    rt_cookie != cookie)
2151 		return NULL;
2152 
2153 	if (rt6_check_expired(rt))
2154 		return NULL;
2155 
2156 	return &rt->dst;
2157 }
2158 
2159 static struct dst_entry *rt6_dst_from_check(struct rt6_info *rt,
2160 					    struct fib6_info *from,
2161 					    u32 cookie)
2162 {
2163 	if (!__rt6_check_expired(rt) &&
2164 	    rt->dst.obsolete == DST_OBSOLETE_FORCE_CHK &&
2165 	    fib6_check(from, cookie))
2166 		return &rt->dst;
2167 	else
2168 		return NULL;
2169 }
2170 
2171 static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie)
2172 {
2173 	struct dst_entry *dst_ret;
2174 	struct fib6_info *from;
2175 	struct rt6_info *rt;
2176 
2177 	rt = container_of(dst, struct rt6_info, dst);
2178 
2179 	rcu_read_lock();
2180 
2181 	/* All IPV6 dsts are created with ->obsolete set to the value
2182 	 * DST_OBSOLETE_FORCE_CHK which forces validation calls down
2183 	 * into this function always.
2184 	 */
2185 
2186 	from = rcu_dereference(rt->from);
2187 
2188 	if (from && (rt->rt6i_flags & RTF_PCPU ||
2189 	    unlikely(!list_empty(&rt->rt6i_uncached))))
2190 		dst_ret = rt6_dst_from_check(rt, from, cookie);
2191 	else
2192 		dst_ret = rt6_check(rt, from, cookie);
2193 
2194 	rcu_read_unlock();
2195 
2196 	return dst_ret;
2197 }
2198 
2199 static struct dst_entry *ip6_negative_advice(struct dst_entry *dst)
2200 {
2201 	struct rt6_info *rt = (struct rt6_info *) dst;
2202 
2203 	if (rt) {
2204 		if (rt->rt6i_flags & RTF_CACHE) {
2205 			rcu_read_lock();
2206 			if (rt6_check_expired(rt)) {
2207 				rt6_remove_exception_rt(rt);
2208 				dst = NULL;
2209 			}
2210 			rcu_read_unlock();
2211 		} else {
2212 			dst_release(dst);
2213 			dst = NULL;
2214 		}
2215 	}
2216 	return dst;
2217 }
2218 
2219 static void ip6_link_failure(struct sk_buff *skb)
2220 {
2221 	struct rt6_info *rt;
2222 
2223 	icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_ADDR_UNREACH, 0);
2224 
2225 	rt = (struct rt6_info *) skb_dst(skb);
2226 	if (rt) {
2227 		rcu_read_lock();
2228 		if (rt->rt6i_flags & RTF_CACHE) {
2229 			if (dst_hold_safe(&rt->dst))
2230 				rt6_remove_exception_rt(rt);
2231 		} else {
2232 			struct fib6_info *from;
2233 			struct fib6_node *fn;
2234 
2235 			from = rcu_dereference(rt->from);
2236 			if (from) {
2237 				fn = rcu_dereference(from->fib6_node);
2238 				if (fn && (rt->rt6i_flags & RTF_DEFAULT))
2239 					fn->fn_sernum = -1;
2240 			}
2241 		}
2242 		rcu_read_unlock();
2243 	}
2244 }
2245 
2246 static void rt6_update_expires(struct rt6_info *rt0, int timeout)
2247 {
2248 	if (!(rt0->rt6i_flags & RTF_EXPIRES)) {
2249 		struct fib6_info *from;
2250 
2251 		rcu_read_lock();
2252 		from = rcu_dereference(rt0->from);
2253 		if (from)
2254 			rt0->dst.expires = from->expires;
2255 		rcu_read_unlock();
2256 	}
2257 
2258 	dst_set_expires(&rt0->dst, timeout);
2259 	rt0->rt6i_flags |= RTF_EXPIRES;
2260 }
2261 
2262 static void rt6_do_update_pmtu(struct rt6_info *rt, u32 mtu)
2263 {
2264 	struct net *net = dev_net(rt->dst.dev);
2265 
2266 	dst_metric_set(&rt->dst, RTAX_MTU, mtu);
2267 	rt->rt6i_flags |= RTF_MODIFIED;
2268 	rt6_update_expires(rt, net->ipv6.sysctl.ip6_rt_mtu_expires);
2269 }
2270 
2271 static bool rt6_cache_allowed_for_pmtu(const struct rt6_info *rt)
2272 {
2273 	bool from_set;
2274 
2275 	rcu_read_lock();
2276 	from_set = !!rcu_dereference(rt->from);
2277 	rcu_read_unlock();
2278 
2279 	return !(rt->rt6i_flags & RTF_CACHE) &&
2280 		(rt->rt6i_flags & RTF_PCPU || from_set);
2281 }
2282 
2283 static void __ip6_rt_update_pmtu(struct dst_entry *dst, const struct sock *sk,
2284 				 const struct ipv6hdr *iph, u32 mtu)
2285 {
2286 	const struct in6_addr *daddr, *saddr;
2287 	struct rt6_info *rt6 = (struct rt6_info *)dst;
2288 
2289 	if (rt6->rt6i_flags & RTF_LOCAL)
2290 		return;
2291 
2292 	if (dst_metric_locked(dst, RTAX_MTU))
2293 		return;
2294 
2295 	if (iph) {
2296 		daddr = &iph->daddr;
2297 		saddr = &iph->saddr;
2298 	} else if (sk) {
2299 		daddr = &sk->sk_v6_daddr;
2300 		saddr = &inet6_sk(sk)->saddr;
2301 	} else {
2302 		daddr = NULL;
2303 		saddr = NULL;
2304 	}
2305 	dst_confirm_neigh(dst, daddr);
2306 	mtu = max_t(u32, mtu, IPV6_MIN_MTU);
2307 	if (mtu >= dst_mtu(dst))
2308 		return;
2309 
2310 	if (!rt6_cache_allowed_for_pmtu(rt6)) {
2311 		rt6_do_update_pmtu(rt6, mtu);
2312 		/* update rt6_ex->stamp for cache */
2313 		if (rt6->rt6i_flags & RTF_CACHE)
2314 			rt6_update_exception_stamp_rt(rt6);
2315 	} else if (daddr) {
2316 		struct fib6_info *from;
2317 		struct rt6_info *nrt6;
2318 
2319 		rcu_read_lock();
2320 		from = rcu_dereference(rt6->from);
2321 		nrt6 = ip6_rt_cache_alloc(from, daddr, saddr);
2322 		if (nrt6) {
2323 			rt6_do_update_pmtu(nrt6, mtu);
2324 			if (rt6_insert_exception(nrt6, from))
2325 				dst_release_immediate(&nrt6->dst);
2326 		}
2327 		rcu_read_unlock();
2328 	}
2329 }
2330 
2331 static void ip6_rt_update_pmtu(struct dst_entry *dst, struct sock *sk,
2332 			       struct sk_buff *skb, u32 mtu)
2333 {
2334 	__ip6_rt_update_pmtu(dst, sk, skb ? ipv6_hdr(skb) : NULL, mtu);
2335 }
2336 
2337 void ip6_update_pmtu(struct sk_buff *skb, struct net *net, __be32 mtu,
2338 		     int oif, u32 mark, kuid_t uid)
2339 {
2340 	const struct ipv6hdr *iph = (struct ipv6hdr *) skb->data;
2341 	struct dst_entry *dst;
2342 	struct flowi6 fl6;
2343 
2344 	memset(&fl6, 0, sizeof(fl6));
2345 	fl6.flowi6_oif = oif;
2346 	fl6.flowi6_mark = mark ? mark : IP6_REPLY_MARK(net, skb->mark);
2347 	fl6.daddr = iph->daddr;
2348 	fl6.saddr = iph->saddr;
2349 	fl6.flowlabel = ip6_flowinfo(iph);
2350 	fl6.flowi6_uid = uid;
2351 
2352 	dst = ip6_route_output(net, NULL, &fl6);
2353 	if (!dst->error)
2354 		__ip6_rt_update_pmtu(dst, NULL, iph, ntohl(mtu));
2355 	dst_release(dst);
2356 }
2357 EXPORT_SYMBOL_GPL(ip6_update_pmtu);
2358 
2359 void ip6_sk_update_pmtu(struct sk_buff *skb, struct sock *sk, __be32 mtu)
2360 {
2361 	struct dst_entry *dst;
2362 
2363 	ip6_update_pmtu(skb, sock_net(sk), mtu,
2364 			sk->sk_bound_dev_if, sk->sk_mark, sk->sk_uid);
2365 
2366 	dst = __sk_dst_get(sk);
2367 	if (!dst || !dst->obsolete ||
2368 	    dst->ops->check(dst, inet6_sk(sk)->dst_cookie))
2369 		return;
2370 
2371 	bh_lock_sock(sk);
2372 	if (!sock_owned_by_user(sk) && !ipv6_addr_v4mapped(&sk->sk_v6_daddr))
2373 		ip6_datagram_dst_update(sk, false);
2374 	bh_unlock_sock(sk);
2375 }
2376 EXPORT_SYMBOL_GPL(ip6_sk_update_pmtu);
2377 
2378 void ip6_sk_dst_store_flow(struct sock *sk, struct dst_entry *dst,
2379 			   const struct flowi6 *fl6)
2380 {
2381 #ifdef CONFIG_IPV6_SUBTREES
2382 	struct ipv6_pinfo *np = inet6_sk(sk);
2383 #endif
2384 
2385 	ip6_dst_store(sk, dst,
2386 		      ipv6_addr_equal(&fl6->daddr, &sk->sk_v6_daddr) ?
2387 		      &sk->sk_v6_daddr : NULL,
2388 #ifdef CONFIG_IPV6_SUBTREES
2389 		      ipv6_addr_equal(&fl6->saddr, &np->saddr) ?
2390 		      &np->saddr :
2391 #endif
2392 		      NULL);
2393 }
2394 
2395 /* Handle redirects */
2396 struct ip6rd_flowi {
2397 	struct flowi6 fl6;
2398 	struct in6_addr gateway;
2399 };
2400 
2401 static struct rt6_info *__ip6_route_redirect(struct net *net,
2402 					     struct fib6_table *table,
2403 					     struct flowi6 *fl6,
2404 					     const struct sk_buff *skb,
2405 					     int flags)
2406 {
2407 	struct ip6rd_flowi *rdfl = (struct ip6rd_flowi *)fl6;
2408 	struct rt6_info *ret = NULL, *rt_cache;
2409 	struct fib6_info *rt;
2410 	struct fib6_node *fn;
2411 
2412 	/* Get the "current" route for this destination and
2413 	 * check if the redirect has come from appropriate router.
2414 	 *
2415 	 * RFC 4861 specifies that redirects should only be
2416 	 * accepted if they come from the nexthop to the target.
2417 	 * Due to the way the routes are chosen, this notion
2418 	 * is a bit fuzzy and one might need to check all possible
2419 	 * routes.
2420 	 */
2421 
2422 	rcu_read_lock();
2423 	fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
2424 restart:
2425 	for_each_fib6_node_rt_rcu(fn) {
2426 		if (rt->fib6_nh.nh_flags & RTNH_F_DEAD)
2427 			continue;
2428 		if (fib6_check_expired(rt))
2429 			continue;
2430 		if (rt->fib6_flags & RTF_REJECT)
2431 			break;
2432 		if (!(rt->fib6_flags & RTF_GATEWAY))
2433 			continue;
2434 		if (fl6->flowi6_oif != rt->fib6_nh.nh_dev->ifindex)
2435 			continue;
2436 		/* rt_cache's gateway might be different from its 'parent'
2437 		 * in the case of an ip redirect.
2438 		 * So we keep searching in the exception table if the gateway
2439 		 * is different.
2440 		 */
2441 		if (!ipv6_addr_equal(&rdfl->gateway, &rt->fib6_nh.nh_gw)) {
2442 			rt_cache = rt6_find_cached_rt(rt,
2443 						      &fl6->daddr,
2444 						      &fl6->saddr);
2445 			if (rt_cache &&
2446 			    ipv6_addr_equal(&rdfl->gateway,
2447 					    &rt_cache->rt6i_gateway)) {
2448 				ret = rt_cache;
2449 				break;
2450 			}
2451 			continue;
2452 		}
2453 		break;
2454 	}
2455 
2456 	if (!rt)
2457 		rt = net->ipv6.fib6_null_entry;
2458 	else if (rt->fib6_flags & RTF_REJECT) {
2459 		ret = net->ipv6.ip6_null_entry;
2460 		goto out;
2461 	}
2462 
2463 	if (rt == net->ipv6.fib6_null_entry) {
2464 		fn = fib6_backtrack(fn, &fl6->saddr);
2465 		if (fn)
2466 			goto restart;
2467 	}
2468 
2469 out:
2470 	if (ret)
2471 		dst_hold(&ret->dst);
2472 	else
2473 		ret = ip6_create_rt_rcu(rt);
2474 
2475 	rcu_read_unlock();
2476 
2477 	trace_fib6_table_lookup(net, ret, table, fl6);
2478 	return ret;
2479 };
2480 
2481 static struct dst_entry *ip6_route_redirect(struct net *net,
2482 					    const struct flowi6 *fl6,
2483 					    const struct sk_buff *skb,
2484 					    const struct in6_addr *gateway)
2485 {
2486 	int flags = RT6_LOOKUP_F_HAS_SADDR;
2487 	struct ip6rd_flowi rdfl;
2488 
2489 	rdfl.fl6 = *fl6;
2490 	rdfl.gateway = *gateway;
2491 
2492 	return fib6_rule_lookup(net, &rdfl.fl6, skb,
2493 				flags, __ip6_route_redirect);
2494 }
2495 
2496 void ip6_redirect(struct sk_buff *skb, struct net *net, int oif, u32 mark,
2497 		  kuid_t uid)
2498 {
2499 	const struct ipv6hdr *iph = (struct ipv6hdr *) skb->data;
2500 	struct dst_entry *dst;
2501 	struct flowi6 fl6;
2502 
2503 	memset(&fl6, 0, sizeof(fl6));
2504 	fl6.flowi6_iif = LOOPBACK_IFINDEX;
2505 	fl6.flowi6_oif = oif;
2506 	fl6.flowi6_mark = mark;
2507 	fl6.daddr = iph->daddr;
2508 	fl6.saddr = iph->saddr;
2509 	fl6.flowlabel = ip6_flowinfo(iph);
2510 	fl6.flowi6_uid = uid;
2511 
2512 	dst = ip6_route_redirect(net, &fl6, skb, &ipv6_hdr(skb)->saddr);
2513 	rt6_do_redirect(dst, NULL, skb);
2514 	dst_release(dst);
2515 }
2516 EXPORT_SYMBOL_GPL(ip6_redirect);
2517 
2518 void ip6_redirect_no_header(struct sk_buff *skb, struct net *net, int oif,
2519 			    u32 mark)
2520 {
2521 	const struct ipv6hdr *iph = ipv6_hdr(skb);
2522 	const struct rd_msg *msg = (struct rd_msg *)icmp6_hdr(skb);
2523 	struct dst_entry *dst;
2524 	struct flowi6 fl6;
2525 
2526 	memset(&fl6, 0, sizeof(fl6));
2527 	fl6.flowi6_iif = LOOPBACK_IFINDEX;
2528 	fl6.flowi6_oif = oif;
2529 	fl6.flowi6_mark = mark;
2530 	fl6.daddr = msg->dest;
2531 	fl6.saddr = iph->daddr;
2532 	fl6.flowi6_uid = sock_net_uid(net, NULL);
2533 
2534 	dst = ip6_route_redirect(net, &fl6, skb, &iph->saddr);
2535 	rt6_do_redirect(dst, NULL, skb);
2536 	dst_release(dst);
2537 }
2538 
2539 void ip6_sk_redirect(struct sk_buff *skb, struct sock *sk)
2540 {
2541 	ip6_redirect(skb, sock_net(sk), sk->sk_bound_dev_if, sk->sk_mark,
2542 		     sk->sk_uid);
2543 }
2544 EXPORT_SYMBOL_GPL(ip6_sk_redirect);
2545 
2546 static unsigned int ip6_default_advmss(const struct dst_entry *dst)
2547 {
2548 	struct net_device *dev = dst->dev;
2549 	unsigned int mtu = dst_mtu(dst);
2550 	struct net *net = dev_net(dev);
2551 
2552 	mtu -= sizeof(struct ipv6hdr) + sizeof(struct tcphdr);
2553 
2554 	if (mtu < net->ipv6.sysctl.ip6_rt_min_advmss)
2555 		mtu = net->ipv6.sysctl.ip6_rt_min_advmss;
2556 
2557 	/*
2558 	 * Maximal non-jumbo IPv6 payload is IPV6_MAXPLEN and
2559 	 * corresponding MSS is IPV6_MAXPLEN - tcp_header_size.
2560 	 * IPV6_MAXPLEN is also valid and means: "any MSS,
2561 	 * rely only on pmtu discovery"
2562 	 */
2563 	if (mtu > IPV6_MAXPLEN - sizeof(struct tcphdr))
2564 		mtu = IPV6_MAXPLEN;
2565 	return mtu;
2566 }
2567 
2568 static unsigned int ip6_mtu(const struct dst_entry *dst)
2569 {
2570 	struct inet6_dev *idev;
2571 	unsigned int mtu;
2572 
2573 	mtu = dst_metric_raw(dst, RTAX_MTU);
2574 	if (mtu)
2575 		goto out;
2576 
2577 	mtu = IPV6_MIN_MTU;
2578 
2579 	rcu_read_lock();
2580 	idev = __in6_dev_get(dst->dev);
2581 	if (idev)
2582 		mtu = idev->cnf.mtu6;
2583 	rcu_read_unlock();
2584 
2585 out:
2586 	mtu = min_t(unsigned int, mtu, IP6_MAX_MTU);
2587 
2588 	return mtu - lwtunnel_headroom(dst->lwtstate, mtu);
2589 }
2590 
2591 struct dst_entry *icmp6_dst_alloc(struct net_device *dev,
2592 				  struct flowi6 *fl6)
2593 {
2594 	struct dst_entry *dst;
2595 	struct rt6_info *rt;
2596 	struct inet6_dev *idev = in6_dev_get(dev);
2597 	struct net *net = dev_net(dev);
2598 
2599 	if (unlikely(!idev))
2600 		return ERR_PTR(-ENODEV);
2601 
2602 	rt = ip6_dst_alloc(net, dev, 0);
2603 	if (unlikely(!rt)) {
2604 		in6_dev_put(idev);
2605 		dst = ERR_PTR(-ENOMEM);
2606 		goto out;
2607 	}
2608 
2609 	rt->dst.flags |= DST_HOST;
2610 	rt->dst.input = ip6_input;
2611 	rt->dst.output  = ip6_output;
2612 	rt->rt6i_gateway  = fl6->daddr;
2613 	rt->rt6i_dst.addr = fl6->daddr;
2614 	rt->rt6i_dst.plen = 128;
2615 	rt->rt6i_idev     = idev;
2616 	dst_metric_set(&rt->dst, RTAX_HOPLIMIT, 0);
2617 
2618 	/* Add this dst into uncached_list so that rt6_disable_ip() can
2619 	 * do proper release of the net_device
2620 	 */
2621 	rt6_uncached_list_add(rt);
2622 	atomic_inc(&net->ipv6.rt6_stats->fib_rt_uncache);
2623 
2624 	dst = xfrm_lookup(net, &rt->dst, flowi6_to_flowi(fl6), NULL, 0);
2625 
2626 out:
2627 	return dst;
2628 }
2629 
2630 static int ip6_dst_gc(struct dst_ops *ops)
2631 {
2632 	struct net *net = container_of(ops, struct net, ipv6.ip6_dst_ops);
2633 	int rt_min_interval = net->ipv6.sysctl.ip6_rt_gc_min_interval;
2634 	int rt_max_size = net->ipv6.sysctl.ip6_rt_max_size;
2635 	int rt_elasticity = net->ipv6.sysctl.ip6_rt_gc_elasticity;
2636 	int rt_gc_timeout = net->ipv6.sysctl.ip6_rt_gc_timeout;
2637 	unsigned long rt_last_gc = net->ipv6.ip6_rt_last_gc;
2638 	int entries;
2639 
2640 	entries = dst_entries_get_fast(ops);
2641 	if (time_after(rt_last_gc + rt_min_interval, jiffies) &&
2642 	    entries <= rt_max_size)
2643 		goto out;
2644 
2645 	net->ipv6.ip6_rt_gc_expire++;
2646 	fib6_run_gc(net->ipv6.ip6_rt_gc_expire, net, true);
2647 	entries = dst_entries_get_slow(ops);
2648 	if (entries < ops->gc_thresh)
2649 		net->ipv6.ip6_rt_gc_expire = rt_gc_timeout>>1;
2650 out:
2651 	net->ipv6.ip6_rt_gc_expire -= net->ipv6.ip6_rt_gc_expire>>rt_elasticity;
2652 	return entries > rt_max_size;
2653 }
2654 
2655 static int ip6_convert_metrics(struct net *net, struct fib6_info *rt,
2656 			       struct fib6_config *cfg)
2657 {
2658 	struct dst_metrics *p;
2659 
2660 	if (!cfg->fc_mx)
2661 		return 0;
2662 
2663 	p = kzalloc(sizeof(*rt->fib6_metrics), GFP_KERNEL);
2664 	if (unlikely(!p))
2665 		return -ENOMEM;
2666 
2667 	refcount_set(&p->refcnt, 1);
2668 	rt->fib6_metrics = p;
2669 
2670 	return ip_metrics_convert(net, cfg->fc_mx, cfg->fc_mx_len, p->metrics);
2671 }
2672 
2673 static struct rt6_info *ip6_nh_lookup_table(struct net *net,
2674 					    struct fib6_config *cfg,
2675 					    const struct in6_addr *gw_addr,
2676 					    u32 tbid, int flags)
2677 {
2678 	struct flowi6 fl6 = {
2679 		.flowi6_oif = cfg->fc_ifindex,
2680 		.daddr = *gw_addr,
2681 		.saddr = cfg->fc_prefsrc,
2682 	};
2683 	struct fib6_table *table;
2684 	struct rt6_info *rt;
2685 
2686 	table = fib6_get_table(net, tbid);
2687 	if (!table)
2688 		return NULL;
2689 
2690 	if (!ipv6_addr_any(&cfg->fc_prefsrc))
2691 		flags |= RT6_LOOKUP_F_HAS_SADDR;
2692 
2693 	flags |= RT6_LOOKUP_F_IGNORE_LINKSTATE;
2694 	rt = ip6_pol_route(net, table, cfg->fc_ifindex, &fl6, NULL, flags);
2695 
2696 	/* if table lookup failed, fall back to full lookup */
2697 	if (rt == net->ipv6.ip6_null_entry) {
2698 		ip6_rt_put(rt);
2699 		rt = NULL;
2700 	}
2701 
2702 	return rt;
2703 }
2704 
2705 static int ip6_route_check_nh_onlink(struct net *net,
2706 				     struct fib6_config *cfg,
2707 				     const struct net_device *dev,
2708 				     struct netlink_ext_ack *extack)
2709 {
2710 	u32 tbid = l3mdev_fib_table(dev) ? : RT_TABLE_MAIN;
2711 	const struct in6_addr *gw_addr = &cfg->fc_gateway;
2712 	u32 flags = RTF_LOCAL | RTF_ANYCAST | RTF_REJECT;
2713 	struct rt6_info *grt;
2714 	int err;
2715 
2716 	err = 0;
2717 	grt = ip6_nh_lookup_table(net, cfg, gw_addr, tbid, 0);
2718 	if (grt) {
2719 		if (!grt->dst.error &&
2720 		    (grt->rt6i_flags & flags || dev != grt->dst.dev)) {
2721 			NL_SET_ERR_MSG(extack,
2722 				       "Nexthop has invalid gateway or device mismatch");
2723 			err = -EINVAL;
2724 		}
2725 
2726 		ip6_rt_put(grt);
2727 	}
2728 
2729 	return err;
2730 }
2731 
2732 static int ip6_route_check_nh(struct net *net,
2733 			      struct fib6_config *cfg,
2734 			      struct net_device **_dev,
2735 			      struct inet6_dev **idev)
2736 {
2737 	const struct in6_addr *gw_addr = &cfg->fc_gateway;
2738 	struct net_device *dev = _dev ? *_dev : NULL;
2739 	struct rt6_info *grt = NULL;
2740 	int err = -EHOSTUNREACH;
2741 
2742 	if (cfg->fc_table) {
2743 		int flags = RT6_LOOKUP_F_IFACE;
2744 
2745 		grt = ip6_nh_lookup_table(net, cfg, gw_addr,
2746 					  cfg->fc_table, flags);
2747 		if (grt) {
2748 			if (grt->rt6i_flags & RTF_GATEWAY ||
2749 			    (dev && dev != grt->dst.dev)) {
2750 				ip6_rt_put(grt);
2751 				grt = NULL;
2752 			}
2753 		}
2754 	}
2755 
2756 	if (!grt)
2757 		grt = rt6_lookup(net, gw_addr, NULL, cfg->fc_ifindex, NULL, 1);
2758 
2759 	if (!grt)
2760 		goto out;
2761 
2762 	if (dev) {
2763 		if (dev != grt->dst.dev) {
2764 			ip6_rt_put(grt);
2765 			goto out;
2766 		}
2767 	} else {
2768 		*_dev = dev = grt->dst.dev;
2769 		*idev = grt->rt6i_idev;
2770 		dev_hold(dev);
2771 		in6_dev_hold(grt->rt6i_idev);
2772 	}
2773 
2774 	if (!(grt->rt6i_flags & RTF_GATEWAY))
2775 		err = 0;
2776 
2777 	ip6_rt_put(grt);
2778 
2779 out:
2780 	return err;
2781 }
2782 
2783 static int ip6_validate_gw(struct net *net, struct fib6_config *cfg,
2784 			   struct net_device **_dev, struct inet6_dev **idev,
2785 			   struct netlink_ext_ack *extack)
2786 {
2787 	const struct in6_addr *gw_addr = &cfg->fc_gateway;
2788 	int gwa_type = ipv6_addr_type(gw_addr);
2789 	bool skip_dev = gwa_type & IPV6_ADDR_LINKLOCAL ? false : true;
2790 	const struct net_device *dev = *_dev;
2791 	bool need_addr_check = !dev;
2792 	int err = -EINVAL;
2793 
2794 	/* if gw_addr is local we will fail to detect this in case
2795 	 * address is still TENTATIVE (DAD in progress). rt6_lookup()
2796 	 * will return already-added prefix route via interface that
2797 	 * prefix route was assigned to, which might be non-loopback.
2798 	 */
2799 	if (dev &&
2800 	    ipv6_chk_addr_and_flags(net, gw_addr, dev, skip_dev, 0, 0)) {
2801 		NL_SET_ERR_MSG(extack, "Gateway can not be a local address");
2802 		goto out;
2803 	}
2804 
2805 	if (gwa_type != (IPV6_ADDR_LINKLOCAL | IPV6_ADDR_UNICAST)) {
2806 		/* IPv6 strictly inhibits using not link-local
2807 		 * addresses as nexthop address.
2808 		 * Otherwise, router will not able to send redirects.
2809 		 * It is very good, but in some (rare!) circumstances
2810 		 * (SIT, PtP, NBMA NOARP links) it is handy to allow
2811 		 * some exceptions. --ANK
2812 		 * We allow IPv4-mapped nexthops to support RFC4798-type
2813 		 * addressing
2814 		 */
2815 		if (!(gwa_type & (IPV6_ADDR_UNICAST | IPV6_ADDR_MAPPED))) {
2816 			NL_SET_ERR_MSG(extack, "Invalid gateway address");
2817 			goto out;
2818 		}
2819 
2820 		if (cfg->fc_flags & RTNH_F_ONLINK)
2821 			err = ip6_route_check_nh_onlink(net, cfg, dev, extack);
2822 		else
2823 			err = ip6_route_check_nh(net, cfg, _dev, idev);
2824 
2825 		if (err)
2826 			goto out;
2827 	}
2828 
2829 	/* reload in case device was changed */
2830 	dev = *_dev;
2831 
2832 	err = -EINVAL;
2833 	if (!dev) {
2834 		NL_SET_ERR_MSG(extack, "Egress device not specified");
2835 		goto out;
2836 	} else if (dev->flags & IFF_LOOPBACK) {
2837 		NL_SET_ERR_MSG(extack,
2838 			       "Egress device can not be loopback device for this route");
2839 		goto out;
2840 	}
2841 
2842 	/* if we did not check gw_addr above, do so now that the
2843 	 * egress device has been resolved.
2844 	 */
2845 	if (need_addr_check &&
2846 	    ipv6_chk_addr_and_flags(net, gw_addr, dev, skip_dev, 0, 0)) {
2847 		NL_SET_ERR_MSG(extack, "Gateway can not be a local address");
2848 		goto out;
2849 	}
2850 
2851 	err = 0;
2852 out:
2853 	return err;
2854 }
2855 
2856 static struct fib6_info *ip6_route_info_create(struct fib6_config *cfg,
2857 					      gfp_t gfp_flags,
2858 					      struct netlink_ext_ack *extack)
2859 {
2860 	struct net *net = cfg->fc_nlinfo.nl_net;
2861 	struct fib6_info *rt = NULL;
2862 	struct net_device *dev = NULL;
2863 	struct inet6_dev *idev = NULL;
2864 	struct fib6_table *table;
2865 	int addr_type;
2866 	int err = -EINVAL;
2867 
2868 	/* RTF_PCPU is an internal flag; can not be set by userspace */
2869 	if (cfg->fc_flags & RTF_PCPU) {
2870 		NL_SET_ERR_MSG(extack, "Userspace can not set RTF_PCPU");
2871 		goto out;
2872 	}
2873 
2874 	/* RTF_CACHE is an internal flag; can not be set by userspace */
2875 	if (cfg->fc_flags & RTF_CACHE) {
2876 		NL_SET_ERR_MSG(extack, "Userspace can not set RTF_CACHE");
2877 		goto out;
2878 	}
2879 
2880 	if (cfg->fc_type > RTN_MAX) {
2881 		NL_SET_ERR_MSG(extack, "Invalid route type");
2882 		goto out;
2883 	}
2884 
2885 	if (cfg->fc_dst_len > 128) {
2886 		NL_SET_ERR_MSG(extack, "Invalid prefix length");
2887 		goto out;
2888 	}
2889 	if (cfg->fc_src_len > 128) {
2890 		NL_SET_ERR_MSG(extack, "Invalid source address length");
2891 		goto out;
2892 	}
2893 #ifndef CONFIG_IPV6_SUBTREES
2894 	if (cfg->fc_src_len) {
2895 		NL_SET_ERR_MSG(extack,
2896 			       "Specifying source address requires IPV6_SUBTREES to be enabled");
2897 		goto out;
2898 	}
2899 #endif
2900 	if (cfg->fc_ifindex) {
2901 		err = -ENODEV;
2902 		dev = dev_get_by_index(net, cfg->fc_ifindex);
2903 		if (!dev)
2904 			goto out;
2905 		idev = in6_dev_get(dev);
2906 		if (!idev)
2907 			goto out;
2908 	}
2909 
2910 	if (cfg->fc_metric == 0)
2911 		cfg->fc_metric = IP6_RT_PRIO_USER;
2912 
2913 	if (cfg->fc_flags & RTNH_F_ONLINK) {
2914 		if (!dev) {
2915 			NL_SET_ERR_MSG(extack,
2916 				       "Nexthop device required for onlink");
2917 			err = -ENODEV;
2918 			goto out;
2919 		}
2920 
2921 		if (!(dev->flags & IFF_UP)) {
2922 			NL_SET_ERR_MSG(extack, "Nexthop device is not up");
2923 			err = -ENETDOWN;
2924 			goto out;
2925 		}
2926 	}
2927 
2928 	err = -ENOBUFS;
2929 	if (cfg->fc_nlinfo.nlh &&
2930 	    !(cfg->fc_nlinfo.nlh->nlmsg_flags & NLM_F_CREATE)) {
2931 		table = fib6_get_table(net, cfg->fc_table);
2932 		if (!table) {
2933 			pr_warn("NLM_F_CREATE should be specified when creating new route\n");
2934 			table = fib6_new_table(net, cfg->fc_table);
2935 		}
2936 	} else {
2937 		table = fib6_new_table(net, cfg->fc_table);
2938 	}
2939 
2940 	if (!table)
2941 		goto out;
2942 
2943 	err = -ENOMEM;
2944 	rt = fib6_info_alloc(gfp_flags);
2945 	if (!rt)
2946 		goto out;
2947 
2948 	if (cfg->fc_flags & RTF_ADDRCONF)
2949 		rt->dst_nocount = true;
2950 
2951 	err = ip6_convert_metrics(net, rt, cfg);
2952 	if (err < 0)
2953 		goto out;
2954 
2955 	if (cfg->fc_flags & RTF_EXPIRES)
2956 		fib6_set_expires(rt, jiffies +
2957 				clock_t_to_jiffies(cfg->fc_expires));
2958 	else
2959 		fib6_clean_expires(rt);
2960 
2961 	if (cfg->fc_protocol == RTPROT_UNSPEC)
2962 		cfg->fc_protocol = RTPROT_BOOT;
2963 	rt->fib6_protocol = cfg->fc_protocol;
2964 
2965 	addr_type = ipv6_addr_type(&cfg->fc_dst);
2966 
2967 	if (cfg->fc_encap) {
2968 		struct lwtunnel_state *lwtstate;
2969 
2970 		err = lwtunnel_build_state(cfg->fc_encap_type,
2971 					   cfg->fc_encap, AF_INET6, cfg,
2972 					   &lwtstate, extack);
2973 		if (err)
2974 			goto out;
2975 		rt->fib6_nh.nh_lwtstate = lwtstate_get(lwtstate);
2976 	}
2977 
2978 	ipv6_addr_prefix(&rt->fib6_dst.addr, &cfg->fc_dst, cfg->fc_dst_len);
2979 	rt->fib6_dst.plen = cfg->fc_dst_len;
2980 	if (rt->fib6_dst.plen == 128)
2981 		rt->dst_host = true;
2982 
2983 #ifdef CONFIG_IPV6_SUBTREES
2984 	ipv6_addr_prefix(&rt->fib6_src.addr, &cfg->fc_src, cfg->fc_src_len);
2985 	rt->fib6_src.plen = cfg->fc_src_len;
2986 #endif
2987 
2988 	rt->fib6_metric = cfg->fc_metric;
2989 	rt->fib6_nh.nh_weight = 1;
2990 
2991 	rt->fib6_type = cfg->fc_type;
2992 
2993 	/* We cannot add true routes via loopback here,
2994 	   they would result in kernel looping; promote them to reject routes
2995 	 */
2996 	if ((cfg->fc_flags & RTF_REJECT) ||
2997 	    (dev && (dev->flags & IFF_LOOPBACK) &&
2998 	     !(addr_type & IPV6_ADDR_LOOPBACK) &&
2999 	     !(cfg->fc_flags & RTF_LOCAL))) {
3000 		/* hold loopback dev/idev if we haven't done so. */
3001 		if (dev != net->loopback_dev) {
3002 			if (dev) {
3003 				dev_put(dev);
3004 				in6_dev_put(idev);
3005 			}
3006 			dev = net->loopback_dev;
3007 			dev_hold(dev);
3008 			idev = in6_dev_get(dev);
3009 			if (!idev) {
3010 				err = -ENODEV;
3011 				goto out;
3012 			}
3013 		}
3014 		rt->fib6_flags = RTF_REJECT|RTF_NONEXTHOP;
3015 		goto install_route;
3016 	}
3017 
3018 	if (cfg->fc_flags & RTF_GATEWAY) {
3019 		err = ip6_validate_gw(net, cfg, &dev, &idev, extack);
3020 		if (err)
3021 			goto out;
3022 
3023 		rt->fib6_nh.nh_gw = cfg->fc_gateway;
3024 	}
3025 
3026 	err = -ENODEV;
3027 	if (!dev)
3028 		goto out;
3029 
3030 	if (idev->cnf.disable_ipv6) {
3031 		NL_SET_ERR_MSG(extack, "IPv6 is disabled on nexthop device");
3032 		err = -EACCES;
3033 		goto out;
3034 	}
3035 
3036 	if (!(dev->flags & IFF_UP)) {
3037 		NL_SET_ERR_MSG(extack, "Nexthop device is not up");
3038 		err = -ENETDOWN;
3039 		goto out;
3040 	}
3041 
3042 	if (!ipv6_addr_any(&cfg->fc_prefsrc)) {
3043 		if (!ipv6_chk_addr(net, &cfg->fc_prefsrc, dev, 0)) {
3044 			NL_SET_ERR_MSG(extack, "Invalid source address");
3045 			err = -EINVAL;
3046 			goto out;
3047 		}
3048 		rt->fib6_prefsrc.addr = cfg->fc_prefsrc;
3049 		rt->fib6_prefsrc.plen = 128;
3050 	} else
3051 		rt->fib6_prefsrc.plen = 0;
3052 
3053 	rt->fib6_flags = cfg->fc_flags;
3054 
3055 install_route:
3056 	if (!(rt->fib6_flags & (RTF_LOCAL | RTF_ANYCAST)) &&
3057 	    !netif_carrier_ok(dev))
3058 		rt->fib6_nh.nh_flags |= RTNH_F_LINKDOWN;
3059 	rt->fib6_nh.nh_flags |= (cfg->fc_flags & RTNH_F_ONLINK);
3060 	rt->fib6_nh.nh_dev = dev;
3061 	rt->fib6_table = table;
3062 
3063 	cfg->fc_nlinfo.nl_net = dev_net(dev);
3064 
3065 	if (idev)
3066 		in6_dev_put(idev);
3067 
3068 	return rt;
3069 out:
3070 	if (dev)
3071 		dev_put(dev);
3072 	if (idev)
3073 		in6_dev_put(idev);
3074 
3075 	fib6_info_release(rt);
3076 	return ERR_PTR(err);
3077 }
3078 
3079 int ip6_route_add(struct fib6_config *cfg, gfp_t gfp_flags,
3080 		  struct netlink_ext_ack *extack)
3081 {
3082 	struct fib6_info *rt;
3083 	int err;
3084 
3085 	rt = ip6_route_info_create(cfg, gfp_flags, extack);
3086 	if (IS_ERR(rt))
3087 		return PTR_ERR(rt);
3088 
3089 	err = __ip6_ins_rt(rt, &cfg->fc_nlinfo, extack);
3090 	fib6_info_release(rt);
3091 
3092 	return err;
3093 }
3094 
3095 static int __ip6_del_rt(struct fib6_info *rt, struct nl_info *info)
3096 {
3097 	struct net *net = info->nl_net;
3098 	struct fib6_table *table;
3099 	int err;
3100 
3101 	if (rt == net->ipv6.fib6_null_entry) {
3102 		err = -ENOENT;
3103 		goto out;
3104 	}
3105 
3106 	table = rt->fib6_table;
3107 	spin_lock_bh(&table->tb6_lock);
3108 	err = fib6_del(rt, info);
3109 	spin_unlock_bh(&table->tb6_lock);
3110 
3111 out:
3112 	fib6_info_release(rt);
3113 	return err;
3114 }
3115 
3116 int ip6_del_rt(struct net *net, struct fib6_info *rt)
3117 {
3118 	struct nl_info info = { .nl_net = net };
3119 
3120 	return __ip6_del_rt(rt, &info);
3121 }
3122 
3123 static int __ip6_del_rt_siblings(struct fib6_info *rt, struct fib6_config *cfg)
3124 {
3125 	struct nl_info *info = &cfg->fc_nlinfo;
3126 	struct net *net = info->nl_net;
3127 	struct sk_buff *skb = NULL;
3128 	struct fib6_table *table;
3129 	int err = -ENOENT;
3130 
3131 	if (rt == net->ipv6.fib6_null_entry)
3132 		goto out_put;
3133 	table = rt->fib6_table;
3134 	spin_lock_bh(&table->tb6_lock);
3135 
3136 	if (rt->fib6_nsiblings && cfg->fc_delete_all_nh) {
3137 		struct fib6_info *sibling, *next_sibling;
3138 
3139 		/* prefer to send a single notification with all hops */
3140 		skb = nlmsg_new(rt6_nlmsg_size(rt), gfp_any());
3141 		if (skb) {
3142 			u32 seq = info->nlh ? info->nlh->nlmsg_seq : 0;
3143 
3144 			if (rt6_fill_node(net, skb, rt, NULL,
3145 					  NULL, NULL, 0, RTM_DELROUTE,
3146 					  info->portid, seq, 0) < 0) {
3147 				kfree_skb(skb);
3148 				skb = NULL;
3149 			} else
3150 				info->skip_notify = 1;
3151 		}
3152 
3153 		list_for_each_entry_safe(sibling, next_sibling,
3154 					 &rt->fib6_siblings,
3155 					 fib6_siblings) {
3156 			err = fib6_del(sibling, info);
3157 			if (err)
3158 				goto out_unlock;
3159 		}
3160 	}
3161 
3162 	err = fib6_del(rt, info);
3163 out_unlock:
3164 	spin_unlock_bh(&table->tb6_lock);
3165 out_put:
3166 	fib6_info_release(rt);
3167 
3168 	if (skb) {
3169 		rtnl_notify(skb, net, info->portid, RTNLGRP_IPV6_ROUTE,
3170 			    info->nlh, gfp_any());
3171 	}
3172 	return err;
3173 }
3174 
3175 static int ip6_del_cached_rt(struct rt6_info *rt, struct fib6_config *cfg)
3176 {
3177 	int rc = -ESRCH;
3178 
3179 	if (cfg->fc_ifindex && rt->dst.dev->ifindex != cfg->fc_ifindex)
3180 		goto out;
3181 
3182 	if (cfg->fc_flags & RTF_GATEWAY &&
3183 	    !ipv6_addr_equal(&cfg->fc_gateway, &rt->rt6i_gateway))
3184 		goto out;
3185 	if (dst_hold_safe(&rt->dst))
3186 		rc = rt6_remove_exception_rt(rt);
3187 out:
3188 	return rc;
3189 }
3190 
3191 static int ip6_route_del(struct fib6_config *cfg,
3192 			 struct netlink_ext_ack *extack)
3193 {
3194 	struct rt6_info *rt_cache;
3195 	struct fib6_table *table;
3196 	struct fib6_info *rt;
3197 	struct fib6_node *fn;
3198 	int err = -ESRCH;
3199 
3200 	table = fib6_get_table(cfg->fc_nlinfo.nl_net, cfg->fc_table);
3201 	if (!table) {
3202 		NL_SET_ERR_MSG(extack, "FIB table does not exist");
3203 		return err;
3204 	}
3205 
3206 	rcu_read_lock();
3207 
3208 	fn = fib6_locate(&table->tb6_root,
3209 			 &cfg->fc_dst, cfg->fc_dst_len,
3210 			 &cfg->fc_src, cfg->fc_src_len,
3211 			 !(cfg->fc_flags & RTF_CACHE));
3212 
3213 	if (fn) {
3214 		for_each_fib6_node_rt_rcu(fn) {
3215 			if (cfg->fc_flags & RTF_CACHE) {
3216 				int rc;
3217 
3218 				rt_cache = rt6_find_cached_rt(rt, &cfg->fc_dst,
3219 							      &cfg->fc_src);
3220 				if (rt_cache) {
3221 					rc = ip6_del_cached_rt(rt_cache, cfg);
3222 					if (rc != -ESRCH)
3223 						return rc;
3224 				}
3225 				continue;
3226 			}
3227 			if (cfg->fc_ifindex &&
3228 			    (!rt->fib6_nh.nh_dev ||
3229 			     rt->fib6_nh.nh_dev->ifindex != cfg->fc_ifindex))
3230 				continue;
3231 			if (cfg->fc_flags & RTF_GATEWAY &&
3232 			    !ipv6_addr_equal(&cfg->fc_gateway, &rt->fib6_nh.nh_gw))
3233 				continue;
3234 			if (cfg->fc_metric && cfg->fc_metric != rt->fib6_metric)
3235 				continue;
3236 			if (cfg->fc_protocol && cfg->fc_protocol != rt->fib6_protocol)
3237 				continue;
3238 			fib6_info_hold(rt);
3239 			rcu_read_unlock();
3240 
3241 			/* if gateway was specified only delete the one hop */
3242 			if (cfg->fc_flags & RTF_GATEWAY)
3243 				return __ip6_del_rt(rt, &cfg->fc_nlinfo);
3244 
3245 			return __ip6_del_rt_siblings(rt, cfg);
3246 		}
3247 	}
3248 	rcu_read_unlock();
3249 
3250 	return err;
3251 }
3252 
3253 static void rt6_do_redirect(struct dst_entry *dst, struct sock *sk, struct sk_buff *skb)
3254 {
3255 	struct netevent_redirect netevent;
3256 	struct rt6_info *rt, *nrt = NULL;
3257 	struct ndisc_options ndopts;
3258 	struct inet6_dev *in6_dev;
3259 	struct neighbour *neigh;
3260 	struct fib6_info *from;
3261 	struct rd_msg *msg;
3262 	int optlen, on_link;
3263 	u8 *lladdr;
3264 
3265 	optlen = skb_tail_pointer(skb) - skb_transport_header(skb);
3266 	optlen -= sizeof(*msg);
3267 
3268 	if (optlen < 0) {
3269 		net_dbg_ratelimited("rt6_do_redirect: packet too short\n");
3270 		return;
3271 	}
3272 
3273 	msg = (struct rd_msg *)icmp6_hdr(skb);
3274 
3275 	if (ipv6_addr_is_multicast(&msg->dest)) {
3276 		net_dbg_ratelimited("rt6_do_redirect: destination address is multicast\n");
3277 		return;
3278 	}
3279 
3280 	on_link = 0;
3281 	if (ipv6_addr_equal(&msg->dest, &msg->target)) {
3282 		on_link = 1;
3283 	} else if (ipv6_addr_type(&msg->target) !=
3284 		   (IPV6_ADDR_UNICAST|IPV6_ADDR_LINKLOCAL)) {
3285 		net_dbg_ratelimited("rt6_do_redirect: target address is not link-local unicast\n");
3286 		return;
3287 	}
3288 
3289 	in6_dev = __in6_dev_get(skb->dev);
3290 	if (!in6_dev)
3291 		return;
3292 	if (in6_dev->cnf.forwarding || !in6_dev->cnf.accept_redirects)
3293 		return;
3294 
3295 	/* RFC2461 8.1:
3296 	 *	The IP source address of the Redirect MUST be the same as the current
3297 	 *	first-hop router for the specified ICMP Destination Address.
3298 	 */
3299 
3300 	if (!ndisc_parse_options(skb->dev, msg->opt, optlen, &ndopts)) {
3301 		net_dbg_ratelimited("rt6_redirect: invalid ND options\n");
3302 		return;
3303 	}
3304 
3305 	lladdr = NULL;
3306 	if (ndopts.nd_opts_tgt_lladdr) {
3307 		lladdr = ndisc_opt_addr_data(ndopts.nd_opts_tgt_lladdr,
3308 					     skb->dev);
3309 		if (!lladdr) {
3310 			net_dbg_ratelimited("rt6_redirect: invalid link-layer address length\n");
3311 			return;
3312 		}
3313 	}
3314 
3315 	rt = (struct rt6_info *) dst;
3316 	if (rt->rt6i_flags & RTF_REJECT) {
3317 		net_dbg_ratelimited("rt6_redirect: source isn't a valid nexthop for redirect target\n");
3318 		return;
3319 	}
3320 
3321 	/* Redirect received -> path was valid.
3322 	 * Look, redirects are sent only in response to data packets,
3323 	 * so that this nexthop apparently is reachable. --ANK
3324 	 */
3325 	dst_confirm_neigh(&rt->dst, &ipv6_hdr(skb)->saddr);
3326 
3327 	neigh = __neigh_lookup(&nd_tbl, &msg->target, skb->dev, 1);
3328 	if (!neigh)
3329 		return;
3330 
3331 	/*
3332 	 *	We have finally decided to accept it.
3333 	 */
3334 
3335 	ndisc_update(skb->dev, neigh, lladdr, NUD_STALE,
3336 		     NEIGH_UPDATE_F_WEAK_OVERRIDE|
3337 		     NEIGH_UPDATE_F_OVERRIDE|
3338 		     (on_link ? 0 : (NEIGH_UPDATE_F_OVERRIDE_ISROUTER|
3339 				     NEIGH_UPDATE_F_ISROUTER)),
3340 		     NDISC_REDIRECT, &ndopts);
3341 
3342 	rcu_read_lock();
3343 	from = rcu_dereference(rt->from);
3344 	fib6_info_hold(from);
3345 	rcu_read_unlock();
3346 
3347 	nrt = ip6_rt_cache_alloc(from, &msg->dest, NULL);
3348 	if (!nrt)
3349 		goto out;
3350 
3351 	nrt->rt6i_flags = RTF_GATEWAY|RTF_UP|RTF_DYNAMIC|RTF_CACHE;
3352 	if (on_link)
3353 		nrt->rt6i_flags &= ~RTF_GATEWAY;
3354 
3355 	nrt->rt6i_gateway = *(struct in6_addr *)neigh->primary_key;
3356 
3357 	/* No need to remove rt from the exception table if rt is
3358 	 * a cached route because rt6_insert_exception() will
3359 	 * takes care of it
3360 	 */
3361 	if (rt6_insert_exception(nrt, from)) {
3362 		dst_release_immediate(&nrt->dst);
3363 		goto out;
3364 	}
3365 
3366 	netevent.old = &rt->dst;
3367 	netevent.new = &nrt->dst;
3368 	netevent.daddr = &msg->dest;
3369 	netevent.neigh = neigh;
3370 	call_netevent_notifiers(NETEVENT_REDIRECT, &netevent);
3371 
3372 out:
3373 	fib6_info_release(from);
3374 	neigh_release(neigh);
3375 }
3376 
3377 #ifdef CONFIG_IPV6_ROUTE_INFO
3378 static struct fib6_info *rt6_get_route_info(struct net *net,
3379 					   const struct in6_addr *prefix, int prefixlen,
3380 					   const struct in6_addr *gwaddr,
3381 					   struct net_device *dev)
3382 {
3383 	u32 tb_id = l3mdev_fib_table(dev) ? : RT6_TABLE_INFO;
3384 	int ifindex = dev->ifindex;
3385 	struct fib6_node *fn;
3386 	struct fib6_info *rt = NULL;
3387 	struct fib6_table *table;
3388 
3389 	table = fib6_get_table(net, tb_id);
3390 	if (!table)
3391 		return NULL;
3392 
3393 	rcu_read_lock();
3394 	fn = fib6_locate(&table->tb6_root, prefix, prefixlen, NULL, 0, true);
3395 	if (!fn)
3396 		goto out;
3397 
3398 	for_each_fib6_node_rt_rcu(fn) {
3399 		if (rt->fib6_nh.nh_dev->ifindex != ifindex)
3400 			continue;
3401 		if ((rt->fib6_flags & (RTF_ROUTEINFO|RTF_GATEWAY)) != (RTF_ROUTEINFO|RTF_GATEWAY))
3402 			continue;
3403 		if (!ipv6_addr_equal(&rt->fib6_nh.nh_gw, gwaddr))
3404 			continue;
3405 		fib6_info_hold(rt);
3406 		break;
3407 	}
3408 out:
3409 	rcu_read_unlock();
3410 	return rt;
3411 }
3412 
3413 static struct fib6_info *rt6_add_route_info(struct net *net,
3414 					   const struct in6_addr *prefix, int prefixlen,
3415 					   const struct in6_addr *gwaddr,
3416 					   struct net_device *dev,
3417 					   unsigned int pref)
3418 {
3419 	struct fib6_config cfg = {
3420 		.fc_metric	= IP6_RT_PRIO_USER,
3421 		.fc_ifindex	= dev->ifindex,
3422 		.fc_dst_len	= prefixlen,
3423 		.fc_flags	= RTF_GATEWAY | RTF_ADDRCONF | RTF_ROUTEINFO |
3424 				  RTF_UP | RTF_PREF(pref),
3425 		.fc_protocol = RTPROT_RA,
3426 		.fc_type = RTN_UNICAST,
3427 		.fc_nlinfo.portid = 0,
3428 		.fc_nlinfo.nlh = NULL,
3429 		.fc_nlinfo.nl_net = net,
3430 	};
3431 
3432 	cfg.fc_table = l3mdev_fib_table(dev) ? : RT6_TABLE_INFO,
3433 	cfg.fc_dst = *prefix;
3434 	cfg.fc_gateway = *gwaddr;
3435 
3436 	/* We should treat it as a default route if prefix length is 0. */
3437 	if (!prefixlen)
3438 		cfg.fc_flags |= RTF_DEFAULT;
3439 
3440 	ip6_route_add(&cfg, GFP_ATOMIC, NULL);
3441 
3442 	return rt6_get_route_info(net, prefix, prefixlen, gwaddr, dev);
3443 }
3444 #endif
3445 
3446 struct fib6_info *rt6_get_dflt_router(struct net *net,
3447 				     const struct in6_addr *addr,
3448 				     struct net_device *dev)
3449 {
3450 	u32 tb_id = l3mdev_fib_table(dev) ? : RT6_TABLE_DFLT;
3451 	struct fib6_info *rt;
3452 	struct fib6_table *table;
3453 
3454 	table = fib6_get_table(net, tb_id);
3455 	if (!table)
3456 		return NULL;
3457 
3458 	rcu_read_lock();
3459 	for_each_fib6_node_rt_rcu(&table->tb6_root) {
3460 		if (dev == rt->fib6_nh.nh_dev &&
3461 		    ((rt->fib6_flags & (RTF_ADDRCONF | RTF_DEFAULT)) == (RTF_ADDRCONF | RTF_DEFAULT)) &&
3462 		    ipv6_addr_equal(&rt->fib6_nh.nh_gw, addr))
3463 			break;
3464 	}
3465 	if (rt)
3466 		fib6_info_hold(rt);
3467 	rcu_read_unlock();
3468 	return rt;
3469 }
3470 
3471 struct fib6_info *rt6_add_dflt_router(struct net *net,
3472 				     const struct in6_addr *gwaddr,
3473 				     struct net_device *dev,
3474 				     unsigned int pref)
3475 {
3476 	struct fib6_config cfg = {
3477 		.fc_table	= l3mdev_fib_table(dev) ? : RT6_TABLE_DFLT,
3478 		.fc_metric	= IP6_RT_PRIO_USER,
3479 		.fc_ifindex	= dev->ifindex,
3480 		.fc_flags	= RTF_GATEWAY | RTF_ADDRCONF | RTF_DEFAULT |
3481 				  RTF_UP | RTF_EXPIRES | RTF_PREF(pref),
3482 		.fc_protocol = RTPROT_RA,
3483 		.fc_type = RTN_UNICAST,
3484 		.fc_nlinfo.portid = 0,
3485 		.fc_nlinfo.nlh = NULL,
3486 		.fc_nlinfo.nl_net = net,
3487 	};
3488 
3489 	cfg.fc_gateway = *gwaddr;
3490 
3491 	if (!ip6_route_add(&cfg, GFP_ATOMIC, NULL)) {
3492 		struct fib6_table *table;
3493 
3494 		table = fib6_get_table(dev_net(dev), cfg.fc_table);
3495 		if (table)
3496 			table->flags |= RT6_TABLE_HAS_DFLT_ROUTER;
3497 	}
3498 
3499 	return rt6_get_dflt_router(net, gwaddr, dev);
3500 }
3501 
3502 static void __rt6_purge_dflt_routers(struct net *net,
3503 				     struct fib6_table *table)
3504 {
3505 	struct fib6_info *rt;
3506 
3507 restart:
3508 	rcu_read_lock();
3509 	for_each_fib6_node_rt_rcu(&table->tb6_root) {
3510 		struct net_device *dev = fib6_info_nh_dev(rt);
3511 		struct inet6_dev *idev = dev ? __in6_dev_get(dev) : NULL;
3512 
3513 		if (rt->fib6_flags & (RTF_DEFAULT | RTF_ADDRCONF) &&
3514 		    (!idev || idev->cnf.accept_ra != 2)) {
3515 			fib6_info_hold(rt);
3516 			rcu_read_unlock();
3517 			ip6_del_rt(net, rt);
3518 			goto restart;
3519 		}
3520 	}
3521 	rcu_read_unlock();
3522 
3523 	table->flags &= ~RT6_TABLE_HAS_DFLT_ROUTER;
3524 }
3525 
3526 void rt6_purge_dflt_routers(struct net *net)
3527 {
3528 	struct fib6_table *table;
3529 	struct hlist_head *head;
3530 	unsigned int h;
3531 
3532 	rcu_read_lock();
3533 
3534 	for (h = 0; h < FIB6_TABLE_HASHSZ; h++) {
3535 		head = &net->ipv6.fib_table_hash[h];
3536 		hlist_for_each_entry_rcu(table, head, tb6_hlist) {
3537 			if (table->flags & RT6_TABLE_HAS_DFLT_ROUTER)
3538 				__rt6_purge_dflt_routers(net, table);
3539 		}
3540 	}
3541 
3542 	rcu_read_unlock();
3543 }
3544 
3545 static void rtmsg_to_fib6_config(struct net *net,
3546 				 struct in6_rtmsg *rtmsg,
3547 				 struct fib6_config *cfg)
3548 {
3549 	memset(cfg, 0, sizeof(*cfg));
3550 
3551 	cfg->fc_table = l3mdev_fib_table_by_index(net, rtmsg->rtmsg_ifindex) ?
3552 			 : RT6_TABLE_MAIN;
3553 	cfg->fc_ifindex = rtmsg->rtmsg_ifindex;
3554 	cfg->fc_metric = rtmsg->rtmsg_metric;
3555 	cfg->fc_expires = rtmsg->rtmsg_info;
3556 	cfg->fc_dst_len = rtmsg->rtmsg_dst_len;
3557 	cfg->fc_src_len = rtmsg->rtmsg_src_len;
3558 	cfg->fc_flags = rtmsg->rtmsg_flags;
3559 	cfg->fc_type = rtmsg->rtmsg_type;
3560 
3561 	cfg->fc_nlinfo.nl_net = net;
3562 
3563 	cfg->fc_dst = rtmsg->rtmsg_dst;
3564 	cfg->fc_src = rtmsg->rtmsg_src;
3565 	cfg->fc_gateway = rtmsg->rtmsg_gateway;
3566 }
3567 
3568 int ipv6_route_ioctl(struct net *net, unsigned int cmd, void __user *arg)
3569 {
3570 	struct fib6_config cfg;
3571 	struct in6_rtmsg rtmsg;
3572 	int err;
3573 
3574 	switch (cmd) {
3575 	case SIOCADDRT:		/* Add a route */
3576 	case SIOCDELRT:		/* Delete a route */
3577 		if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
3578 			return -EPERM;
3579 		err = copy_from_user(&rtmsg, arg,
3580 				     sizeof(struct in6_rtmsg));
3581 		if (err)
3582 			return -EFAULT;
3583 
3584 		rtmsg_to_fib6_config(net, &rtmsg, &cfg);
3585 
3586 		rtnl_lock();
3587 		switch (cmd) {
3588 		case SIOCADDRT:
3589 			err = ip6_route_add(&cfg, GFP_KERNEL, NULL);
3590 			break;
3591 		case SIOCDELRT:
3592 			err = ip6_route_del(&cfg, NULL);
3593 			break;
3594 		default:
3595 			err = -EINVAL;
3596 		}
3597 		rtnl_unlock();
3598 
3599 		return err;
3600 	}
3601 
3602 	return -EINVAL;
3603 }
3604 
3605 /*
3606  *	Drop the packet on the floor
3607  */
3608 
3609 static int ip6_pkt_drop(struct sk_buff *skb, u8 code, int ipstats_mib_noroutes)
3610 {
3611 	int type;
3612 	struct dst_entry *dst = skb_dst(skb);
3613 	switch (ipstats_mib_noroutes) {
3614 	case IPSTATS_MIB_INNOROUTES:
3615 		type = ipv6_addr_type(&ipv6_hdr(skb)->daddr);
3616 		if (type == IPV6_ADDR_ANY) {
3617 			IP6_INC_STATS(dev_net(dst->dev),
3618 				      __in6_dev_get_safely(skb->dev),
3619 				      IPSTATS_MIB_INADDRERRORS);
3620 			break;
3621 		}
3622 		/* FALLTHROUGH */
3623 	case IPSTATS_MIB_OUTNOROUTES:
3624 		IP6_INC_STATS(dev_net(dst->dev), ip6_dst_idev(dst),
3625 			      ipstats_mib_noroutes);
3626 		break;
3627 	}
3628 	icmpv6_send(skb, ICMPV6_DEST_UNREACH, code, 0);
3629 	kfree_skb(skb);
3630 	return 0;
3631 }
3632 
3633 static int ip6_pkt_discard(struct sk_buff *skb)
3634 {
3635 	return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_INNOROUTES);
3636 }
3637 
3638 static int ip6_pkt_discard_out(struct net *net, struct sock *sk, struct sk_buff *skb)
3639 {
3640 	skb->dev = skb_dst(skb)->dev;
3641 	return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_OUTNOROUTES);
3642 }
3643 
3644 static int ip6_pkt_prohibit(struct sk_buff *skb)
3645 {
3646 	return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_INNOROUTES);
3647 }
3648 
3649 static int ip6_pkt_prohibit_out(struct net *net, struct sock *sk, struct sk_buff *skb)
3650 {
3651 	skb->dev = skb_dst(skb)->dev;
3652 	return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_OUTNOROUTES);
3653 }
3654 
3655 /*
3656  *	Allocate a dst for local (unicast / anycast) address.
3657  */
3658 
3659 struct fib6_info *addrconf_f6i_alloc(struct net *net,
3660 				     struct inet6_dev *idev,
3661 				     const struct in6_addr *addr,
3662 				     bool anycast, gfp_t gfp_flags)
3663 {
3664 	u32 tb_id;
3665 	struct net_device *dev = idev->dev;
3666 	struct fib6_info *f6i;
3667 
3668 	f6i = fib6_info_alloc(gfp_flags);
3669 	if (!f6i)
3670 		return ERR_PTR(-ENOMEM);
3671 
3672 	f6i->dst_nocount = true;
3673 	f6i->dst_host = true;
3674 	f6i->fib6_protocol = RTPROT_KERNEL;
3675 	f6i->fib6_flags = RTF_UP | RTF_NONEXTHOP;
3676 	if (anycast) {
3677 		f6i->fib6_type = RTN_ANYCAST;
3678 		f6i->fib6_flags |= RTF_ANYCAST;
3679 	} else {
3680 		f6i->fib6_type = RTN_LOCAL;
3681 		f6i->fib6_flags |= RTF_LOCAL;
3682 	}
3683 
3684 	f6i->fib6_nh.nh_gw = *addr;
3685 	dev_hold(dev);
3686 	f6i->fib6_nh.nh_dev = dev;
3687 	f6i->fib6_dst.addr = *addr;
3688 	f6i->fib6_dst.plen = 128;
3689 	tb_id = l3mdev_fib_table(idev->dev) ? : RT6_TABLE_LOCAL;
3690 	f6i->fib6_table = fib6_get_table(net, tb_id);
3691 
3692 	return f6i;
3693 }
3694 
3695 /* remove deleted ip from prefsrc entries */
3696 struct arg_dev_net_ip {
3697 	struct net_device *dev;
3698 	struct net *net;
3699 	struct in6_addr *addr;
3700 };
3701 
3702 static int fib6_remove_prefsrc(struct fib6_info *rt, void *arg)
3703 {
3704 	struct net_device *dev = ((struct arg_dev_net_ip *)arg)->dev;
3705 	struct net *net = ((struct arg_dev_net_ip *)arg)->net;
3706 	struct in6_addr *addr = ((struct arg_dev_net_ip *)arg)->addr;
3707 
3708 	if (((void *)rt->fib6_nh.nh_dev == dev || !dev) &&
3709 	    rt != net->ipv6.fib6_null_entry &&
3710 	    ipv6_addr_equal(addr, &rt->fib6_prefsrc.addr)) {
3711 		spin_lock_bh(&rt6_exception_lock);
3712 		/* remove prefsrc entry */
3713 		rt->fib6_prefsrc.plen = 0;
3714 		/* need to update cache as well */
3715 		rt6_exceptions_remove_prefsrc(rt);
3716 		spin_unlock_bh(&rt6_exception_lock);
3717 	}
3718 	return 0;
3719 }
3720 
3721 void rt6_remove_prefsrc(struct inet6_ifaddr *ifp)
3722 {
3723 	struct net *net = dev_net(ifp->idev->dev);
3724 	struct arg_dev_net_ip adni = {
3725 		.dev = ifp->idev->dev,
3726 		.net = net,
3727 		.addr = &ifp->addr,
3728 	};
3729 	fib6_clean_all(net, fib6_remove_prefsrc, &adni);
3730 }
3731 
3732 #define RTF_RA_ROUTER		(RTF_ADDRCONF | RTF_DEFAULT | RTF_GATEWAY)
3733 
3734 /* Remove routers and update dst entries when gateway turn into host. */
3735 static int fib6_clean_tohost(struct fib6_info *rt, void *arg)
3736 {
3737 	struct in6_addr *gateway = (struct in6_addr *)arg;
3738 
3739 	if (((rt->fib6_flags & RTF_RA_ROUTER) == RTF_RA_ROUTER) &&
3740 	    ipv6_addr_equal(gateway, &rt->fib6_nh.nh_gw)) {
3741 		return -1;
3742 	}
3743 
3744 	/* Further clean up cached routes in exception table.
3745 	 * This is needed because cached route may have a different
3746 	 * gateway than its 'parent' in the case of an ip redirect.
3747 	 */
3748 	rt6_exceptions_clean_tohost(rt, gateway);
3749 
3750 	return 0;
3751 }
3752 
3753 void rt6_clean_tohost(struct net *net, struct in6_addr *gateway)
3754 {
3755 	fib6_clean_all(net, fib6_clean_tohost, gateway);
3756 }
3757 
3758 struct arg_netdev_event {
3759 	const struct net_device *dev;
3760 	union {
3761 		unsigned int nh_flags;
3762 		unsigned long event;
3763 	};
3764 };
3765 
3766 static struct fib6_info *rt6_multipath_first_sibling(const struct fib6_info *rt)
3767 {
3768 	struct fib6_info *iter;
3769 	struct fib6_node *fn;
3770 
3771 	fn = rcu_dereference_protected(rt->fib6_node,
3772 			lockdep_is_held(&rt->fib6_table->tb6_lock));
3773 	iter = rcu_dereference_protected(fn->leaf,
3774 			lockdep_is_held(&rt->fib6_table->tb6_lock));
3775 	while (iter) {
3776 		if (iter->fib6_metric == rt->fib6_metric &&
3777 		    rt6_qualify_for_ecmp(iter))
3778 			return iter;
3779 		iter = rcu_dereference_protected(iter->rt6_next,
3780 				lockdep_is_held(&rt->fib6_table->tb6_lock));
3781 	}
3782 
3783 	return NULL;
3784 }
3785 
3786 static bool rt6_is_dead(const struct fib6_info *rt)
3787 {
3788 	if (rt->fib6_nh.nh_flags & RTNH_F_DEAD ||
3789 	    (rt->fib6_nh.nh_flags & RTNH_F_LINKDOWN &&
3790 	     fib6_ignore_linkdown(rt)))
3791 		return true;
3792 
3793 	return false;
3794 }
3795 
3796 static int rt6_multipath_total_weight(const struct fib6_info *rt)
3797 {
3798 	struct fib6_info *iter;
3799 	int total = 0;
3800 
3801 	if (!rt6_is_dead(rt))
3802 		total += rt->fib6_nh.nh_weight;
3803 
3804 	list_for_each_entry(iter, &rt->fib6_siblings, fib6_siblings) {
3805 		if (!rt6_is_dead(iter))
3806 			total += iter->fib6_nh.nh_weight;
3807 	}
3808 
3809 	return total;
3810 }
3811 
3812 static void rt6_upper_bound_set(struct fib6_info *rt, int *weight, int total)
3813 {
3814 	int upper_bound = -1;
3815 
3816 	if (!rt6_is_dead(rt)) {
3817 		*weight += rt->fib6_nh.nh_weight;
3818 		upper_bound = DIV_ROUND_CLOSEST_ULL((u64) (*weight) << 31,
3819 						    total) - 1;
3820 	}
3821 	atomic_set(&rt->fib6_nh.nh_upper_bound, upper_bound);
3822 }
3823 
3824 static void rt6_multipath_upper_bound_set(struct fib6_info *rt, int total)
3825 {
3826 	struct fib6_info *iter;
3827 	int weight = 0;
3828 
3829 	rt6_upper_bound_set(rt, &weight, total);
3830 
3831 	list_for_each_entry(iter, &rt->fib6_siblings, fib6_siblings)
3832 		rt6_upper_bound_set(iter, &weight, total);
3833 }
3834 
3835 void rt6_multipath_rebalance(struct fib6_info *rt)
3836 {
3837 	struct fib6_info *first;
3838 	int total;
3839 
3840 	/* In case the entire multipath route was marked for flushing,
3841 	 * then there is no need to rebalance upon the removal of every
3842 	 * sibling route.
3843 	 */
3844 	if (!rt->fib6_nsiblings || rt->should_flush)
3845 		return;
3846 
3847 	/* During lookup routes are evaluated in order, so we need to
3848 	 * make sure upper bounds are assigned from the first sibling
3849 	 * onwards.
3850 	 */
3851 	first = rt6_multipath_first_sibling(rt);
3852 	if (WARN_ON_ONCE(!first))
3853 		return;
3854 
3855 	total = rt6_multipath_total_weight(first);
3856 	rt6_multipath_upper_bound_set(first, total);
3857 }
3858 
3859 static int fib6_ifup(struct fib6_info *rt, void *p_arg)
3860 {
3861 	const struct arg_netdev_event *arg = p_arg;
3862 	struct net *net = dev_net(arg->dev);
3863 
3864 	if (rt != net->ipv6.fib6_null_entry && rt->fib6_nh.nh_dev == arg->dev) {
3865 		rt->fib6_nh.nh_flags &= ~arg->nh_flags;
3866 		fib6_update_sernum_upto_root(net, rt);
3867 		rt6_multipath_rebalance(rt);
3868 	}
3869 
3870 	return 0;
3871 }
3872 
3873 void rt6_sync_up(struct net_device *dev, unsigned int nh_flags)
3874 {
3875 	struct arg_netdev_event arg = {
3876 		.dev = dev,
3877 		{
3878 			.nh_flags = nh_flags,
3879 		},
3880 	};
3881 
3882 	if (nh_flags & RTNH_F_DEAD && netif_carrier_ok(dev))
3883 		arg.nh_flags |= RTNH_F_LINKDOWN;
3884 
3885 	fib6_clean_all(dev_net(dev), fib6_ifup, &arg);
3886 }
3887 
3888 static bool rt6_multipath_uses_dev(const struct fib6_info *rt,
3889 				   const struct net_device *dev)
3890 {
3891 	struct fib6_info *iter;
3892 
3893 	if (rt->fib6_nh.nh_dev == dev)
3894 		return true;
3895 	list_for_each_entry(iter, &rt->fib6_siblings, fib6_siblings)
3896 		if (iter->fib6_nh.nh_dev == dev)
3897 			return true;
3898 
3899 	return false;
3900 }
3901 
3902 static void rt6_multipath_flush(struct fib6_info *rt)
3903 {
3904 	struct fib6_info *iter;
3905 
3906 	rt->should_flush = 1;
3907 	list_for_each_entry(iter, &rt->fib6_siblings, fib6_siblings)
3908 		iter->should_flush = 1;
3909 }
3910 
3911 static unsigned int rt6_multipath_dead_count(const struct fib6_info *rt,
3912 					     const struct net_device *down_dev)
3913 {
3914 	struct fib6_info *iter;
3915 	unsigned int dead = 0;
3916 
3917 	if (rt->fib6_nh.nh_dev == down_dev ||
3918 	    rt->fib6_nh.nh_flags & RTNH_F_DEAD)
3919 		dead++;
3920 	list_for_each_entry(iter, &rt->fib6_siblings, fib6_siblings)
3921 		if (iter->fib6_nh.nh_dev == down_dev ||
3922 		    iter->fib6_nh.nh_flags & RTNH_F_DEAD)
3923 			dead++;
3924 
3925 	return dead;
3926 }
3927 
3928 static void rt6_multipath_nh_flags_set(struct fib6_info *rt,
3929 				       const struct net_device *dev,
3930 				       unsigned int nh_flags)
3931 {
3932 	struct fib6_info *iter;
3933 
3934 	if (rt->fib6_nh.nh_dev == dev)
3935 		rt->fib6_nh.nh_flags |= nh_flags;
3936 	list_for_each_entry(iter, &rt->fib6_siblings, fib6_siblings)
3937 		if (iter->fib6_nh.nh_dev == dev)
3938 			iter->fib6_nh.nh_flags |= nh_flags;
3939 }
3940 
3941 /* called with write lock held for table with rt */
3942 static int fib6_ifdown(struct fib6_info *rt, void *p_arg)
3943 {
3944 	const struct arg_netdev_event *arg = p_arg;
3945 	const struct net_device *dev = arg->dev;
3946 	struct net *net = dev_net(dev);
3947 
3948 	if (rt == net->ipv6.fib6_null_entry)
3949 		return 0;
3950 
3951 	switch (arg->event) {
3952 	case NETDEV_UNREGISTER:
3953 		return rt->fib6_nh.nh_dev == dev ? -1 : 0;
3954 	case NETDEV_DOWN:
3955 		if (rt->should_flush)
3956 			return -1;
3957 		if (!rt->fib6_nsiblings)
3958 			return rt->fib6_nh.nh_dev == dev ? -1 : 0;
3959 		if (rt6_multipath_uses_dev(rt, dev)) {
3960 			unsigned int count;
3961 
3962 			count = rt6_multipath_dead_count(rt, dev);
3963 			if (rt->fib6_nsiblings + 1 == count) {
3964 				rt6_multipath_flush(rt);
3965 				return -1;
3966 			}
3967 			rt6_multipath_nh_flags_set(rt, dev, RTNH_F_DEAD |
3968 						   RTNH_F_LINKDOWN);
3969 			fib6_update_sernum(net, rt);
3970 			rt6_multipath_rebalance(rt);
3971 		}
3972 		return -2;
3973 	case NETDEV_CHANGE:
3974 		if (rt->fib6_nh.nh_dev != dev ||
3975 		    rt->fib6_flags & (RTF_LOCAL | RTF_ANYCAST))
3976 			break;
3977 		rt->fib6_nh.nh_flags |= RTNH_F_LINKDOWN;
3978 		rt6_multipath_rebalance(rt);
3979 		break;
3980 	}
3981 
3982 	return 0;
3983 }
3984 
3985 void rt6_sync_down_dev(struct net_device *dev, unsigned long event)
3986 {
3987 	struct arg_netdev_event arg = {
3988 		.dev = dev,
3989 		{
3990 			.event = event,
3991 		},
3992 	};
3993 
3994 	fib6_clean_all(dev_net(dev), fib6_ifdown, &arg);
3995 }
3996 
3997 void rt6_disable_ip(struct net_device *dev, unsigned long event)
3998 {
3999 	rt6_sync_down_dev(dev, event);
4000 	rt6_uncached_list_flush_dev(dev_net(dev), dev);
4001 	neigh_ifdown(&nd_tbl, dev);
4002 }
4003 
4004 struct rt6_mtu_change_arg {
4005 	struct net_device *dev;
4006 	unsigned int mtu;
4007 };
4008 
4009 static int rt6_mtu_change_route(struct fib6_info *rt, void *p_arg)
4010 {
4011 	struct rt6_mtu_change_arg *arg = (struct rt6_mtu_change_arg *) p_arg;
4012 	struct inet6_dev *idev;
4013 
4014 	/* In IPv6 pmtu discovery is not optional,
4015 	   so that RTAX_MTU lock cannot disable it.
4016 	   We still use this lock to block changes
4017 	   caused by addrconf/ndisc.
4018 	*/
4019 
4020 	idev = __in6_dev_get(arg->dev);
4021 	if (!idev)
4022 		return 0;
4023 
4024 	/* For administrative MTU increase, there is no way to discover
4025 	   IPv6 PMTU increase, so PMTU increase should be updated here.
4026 	   Since RFC 1981 doesn't include administrative MTU increase
4027 	   update PMTU increase is a MUST. (i.e. jumbo frame)
4028 	 */
4029 	if (rt->fib6_nh.nh_dev == arg->dev &&
4030 	    !fib6_metric_locked(rt, RTAX_MTU)) {
4031 		u32 mtu = rt->fib6_pmtu;
4032 
4033 		if (mtu >= arg->mtu ||
4034 		    (mtu < arg->mtu && mtu == idev->cnf.mtu6))
4035 			fib6_metric_set(rt, RTAX_MTU, arg->mtu);
4036 
4037 		spin_lock_bh(&rt6_exception_lock);
4038 		rt6_exceptions_update_pmtu(idev, rt, arg->mtu);
4039 		spin_unlock_bh(&rt6_exception_lock);
4040 	}
4041 	return 0;
4042 }
4043 
4044 void rt6_mtu_change(struct net_device *dev, unsigned int mtu)
4045 {
4046 	struct rt6_mtu_change_arg arg = {
4047 		.dev = dev,
4048 		.mtu = mtu,
4049 	};
4050 
4051 	fib6_clean_all(dev_net(dev), rt6_mtu_change_route, &arg);
4052 }
4053 
4054 static const struct nla_policy rtm_ipv6_policy[RTA_MAX+1] = {
4055 	[RTA_GATEWAY]           = { .len = sizeof(struct in6_addr) },
4056 	[RTA_PREFSRC]		= { .len = sizeof(struct in6_addr) },
4057 	[RTA_OIF]               = { .type = NLA_U32 },
4058 	[RTA_IIF]		= { .type = NLA_U32 },
4059 	[RTA_PRIORITY]          = { .type = NLA_U32 },
4060 	[RTA_METRICS]           = { .type = NLA_NESTED },
4061 	[RTA_MULTIPATH]		= { .len = sizeof(struct rtnexthop) },
4062 	[RTA_PREF]              = { .type = NLA_U8 },
4063 	[RTA_ENCAP_TYPE]	= { .type = NLA_U16 },
4064 	[RTA_ENCAP]		= { .type = NLA_NESTED },
4065 	[RTA_EXPIRES]		= { .type = NLA_U32 },
4066 	[RTA_UID]		= { .type = NLA_U32 },
4067 	[RTA_MARK]		= { .type = NLA_U32 },
4068 	[RTA_TABLE]		= { .type = NLA_U32 },
4069 };
4070 
4071 static int rtm_to_fib6_config(struct sk_buff *skb, struct nlmsghdr *nlh,
4072 			      struct fib6_config *cfg,
4073 			      struct netlink_ext_ack *extack)
4074 {
4075 	struct rtmsg *rtm;
4076 	struct nlattr *tb[RTA_MAX+1];
4077 	unsigned int pref;
4078 	int err;
4079 
4080 	err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy,
4081 			  NULL);
4082 	if (err < 0)
4083 		goto errout;
4084 
4085 	err = -EINVAL;
4086 	rtm = nlmsg_data(nlh);
4087 	memset(cfg, 0, sizeof(*cfg));
4088 
4089 	cfg->fc_table = rtm->rtm_table;
4090 	cfg->fc_dst_len = rtm->rtm_dst_len;
4091 	cfg->fc_src_len = rtm->rtm_src_len;
4092 	cfg->fc_flags = RTF_UP;
4093 	cfg->fc_protocol = rtm->rtm_protocol;
4094 	cfg->fc_type = rtm->rtm_type;
4095 
4096 	if (rtm->rtm_type == RTN_UNREACHABLE ||
4097 	    rtm->rtm_type == RTN_BLACKHOLE ||
4098 	    rtm->rtm_type == RTN_PROHIBIT ||
4099 	    rtm->rtm_type == RTN_THROW)
4100 		cfg->fc_flags |= RTF_REJECT;
4101 
4102 	if (rtm->rtm_type == RTN_LOCAL)
4103 		cfg->fc_flags |= RTF_LOCAL;
4104 
4105 	if (rtm->rtm_flags & RTM_F_CLONED)
4106 		cfg->fc_flags |= RTF_CACHE;
4107 
4108 	cfg->fc_flags |= (rtm->rtm_flags & RTNH_F_ONLINK);
4109 
4110 	cfg->fc_nlinfo.portid = NETLINK_CB(skb).portid;
4111 	cfg->fc_nlinfo.nlh = nlh;
4112 	cfg->fc_nlinfo.nl_net = sock_net(skb->sk);
4113 
4114 	if (tb[RTA_GATEWAY]) {
4115 		cfg->fc_gateway = nla_get_in6_addr(tb[RTA_GATEWAY]);
4116 		cfg->fc_flags |= RTF_GATEWAY;
4117 	}
4118 
4119 	if (tb[RTA_DST]) {
4120 		int plen = (rtm->rtm_dst_len + 7) >> 3;
4121 
4122 		if (nla_len(tb[RTA_DST]) < plen)
4123 			goto errout;
4124 
4125 		nla_memcpy(&cfg->fc_dst, tb[RTA_DST], plen);
4126 	}
4127 
4128 	if (tb[RTA_SRC]) {
4129 		int plen = (rtm->rtm_src_len + 7) >> 3;
4130 
4131 		if (nla_len(tb[RTA_SRC]) < plen)
4132 			goto errout;
4133 
4134 		nla_memcpy(&cfg->fc_src, tb[RTA_SRC], plen);
4135 	}
4136 
4137 	if (tb[RTA_PREFSRC])
4138 		cfg->fc_prefsrc = nla_get_in6_addr(tb[RTA_PREFSRC]);
4139 
4140 	if (tb[RTA_OIF])
4141 		cfg->fc_ifindex = nla_get_u32(tb[RTA_OIF]);
4142 
4143 	if (tb[RTA_PRIORITY])
4144 		cfg->fc_metric = nla_get_u32(tb[RTA_PRIORITY]);
4145 
4146 	if (tb[RTA_METRICS]) {
4147 		cfg->fc_mx = nla_data(tb[RTA_METRICS]);
4148 		cfg->fc_mx_len = nla_len(tb[RTA_METRICS]);
4149 	}
4150 
4151 	if (tb[RTA_TABLE])
4152 		cfg->fc_table = nla_get_u32(tb[RTA_TABLE]);
4153 
4154 	if (tb[RTA_MULTIPATH]) {
4155 		cfg->fc_mp = nla_data(tb[RTA_MULTIPATH]);
4156 		cfg->fc_mp_len = nla_len(tb[RTA_MULTIPATH]);
4157 
4158 		err = lwtunnel_valid_encap_type_attr(cfg->fc_mp,
4159 						     cfg->fc_mp_len, extack);
4160 		if (err < 0)
4161 			goto errout;
4162 	}
4163 
4164 	if (tb[RTA_PREF]) {
4165 		pref = nla_get_u8(tb[RTA_PREF]);
4166 		if (pref != ICMPV6_ROUTER_PREF_LOW &&
4167 		    pref != ICMPV6_ROUTER_PREF_HIGH)
4168 			pref = ICMPV6_ROUTER_PREF_MEDIUM;
4169 		cfg->fc_flags |= RTF_PREF(pref);
4170 	}
4171 
4172 	if (tb[RTA_ENCAP])
4173 		cfg->fc_encap = tb[RTA_ENCAP];
4174 
4175 	if (tb[RTA_ENCAP_TYPE]) {
4176 		cfg->fc_encap_type = nla_get_u16(tb[RTA_ENCAP_TYPE]);
4177 
4178 		err = lwtunnel_valid_encap_type(cfg->fc_encap_type, extack);
4179 		if (err < 0)
4180 			goto errout;
4181 	}
4182 
4183 	if (tb[RTA_EXPIRES]) {
4184 		unsigned long timeout = addrconf_timeout_fixup(nla_get_u32(tb[RTA_EXPIRES]), HZ);
4185 
4186 		if (addrconf_finite_timeout(timeout)) {
4187 			cfg->fc_expires = jiffies_to_clock_t(timeout * HZ);
4188 			cfg->fc_flags |= RTF_EXPIRES;
4189 		}
4190 	}
4191 
4192 	err = 0;
4193 errout:
4194 	return err;
4195 }
4196 
4197 struct rt6_nh {
4198 	struct fib6_info *fib6_info;
4199 	struct fib6_config r_cfg;
4200 	struct list_head next;
4201 };
4202 
4203 static void ip6_print_replace_route_err(struct list_head *rt6_nh_list)
4204 {
4205 	struct rt6_nh *nh;
4206 
4207 	list_for_each_entry(nh, rt6_nh_list, next) {
4208 		pr_warn("IPV6: multipath route replace failed (check consistency of installed routes): %pI6c nexthop %pI6c ifi %d\n",
4209 		        &nh->r_cfg.fc_dst, &nh->r_cfg.fc_gateway,
4210 		        nh->r_cfg.fc_ifindex);
4211 	}
4212 }
4213 
4214 static int ip6_route_info_append(struct net *net,
4215 				 struct list_head *rt6_nh_list,
4216 				 struct fib6_info *rt,
4217 				 struct fib6_config *r_cfg)
4218 {
4219 	struct rt6_nh *nh;
4220 	int err = -EEXIST;
4221 
4222 	list_for_each_entry(nh, rt6_nh_list, next) {
4223 		/* check if fib6_info already exists */
4224 		if (rt6_duplicate_nexthop(nh->fib6_info, rt))
4225 			return err;
4226 	}
4227 
4228 	nh = kzalloc(sizeof(*nh), GFP_KERNEL);
4229 	if (!nh)
4230 		return -ENOMEM;
4231 	nh->fib6_info = rt;
4232 	err = ip6_convert_metrics(net, rt, r_cfg);
4233 	if (err) {
4234 		kfree(nh);
4235 		return err;
4236 	}
4237 	memcpy(&nh->r_cfg, r_cfg, sizeof(*r_cfg));
4238 	list_add_tail(&nh->next, rt6_nh_list);
4239 
4240 	return 0;
4241 }
4242 
4243 static void ip6_route_mpath_notify(struct fib6_info *rt,
4244 				   struct fib6_info *rt_last,
4245 				   struct nl_info *info,
4246 				   __u16 nlflags)
4247 {
4248 	/* if this is an APPEND route, then rt points to the first route
4249 	 * inserted and rt_last points to last route inserted. Userspace
4250 	 * wants a consistent dump of the route which starts at the first
4251 	 * nexthop. Since sibling routes are always added at the end of
4252 	 * the list, find the first sibling of the last route appended
4253 	 */
4254 	if ((nlflags & NLM_F_APPEND) && rt_last && rt_last->fib6_nsiblings) {
4255 		rt = list_first_entry(&rt_last->fib6_siblings,
4256 				      struct fib6_info,
4257 				      fib6_siblings);
4258 	}
4259 
4260 	if (rt)
4261 		inet6_rt_notify(RTM_NEWROUTE, rt, info, nlflags);
4262 }
4263 
4264 static int ip6_route_multipath_add(struct fib6_config *cfg,
4265 				   struct netlink_ext_ack *extack)
4266 {
4267 	struct fib6_info *rt_notif = NULL, *rt_last = NULL;
4268 	struct nl_info *info = &cfg->fc_nlinfo;
4269 	struct fib6_config r_cfg;
4270 	struct rtnexthop *rtnh;
4271 	struct fib6_info *rt;
4272 	struct rt6_nh *err_nh;
4273 	struct rt6_nh *nh, *nh_safe;
4274 	__u16 nlflags;
4275 	int remaining;
4276 	int attrlen;
4277 	int err = 1;
4278 	int nhn = 0;
4279 	int replace = (cfg->fc_nlinfo.nlh &&
4280 		       (cfg->fc_nlinfo.nlh->nlmsg_flags & NLM_F_REPLACE));
4281 	LIST_HEAD(rt6_nh_list);
4282 
4283 	nlflags = replace ? NLM_F_REPLACE : NLM_F_CREATE;
4284 	if (info->nlh && info->nlh->nlmsg_flags & NLM_F_APPEND)
4285 		nlflags |= NLM_F_APPEND;
4286 
4287 	remaining = cfg->fc_mp_len;
4288 	rtnh = (struct rtnexthop *)cfg->fc_mp;
4289 
4290 	/* Parse a Multipath Entry and build a list (rt6_nh_list) of
4291 	 * fib6_info structs per nexthop
4292 	 */
4293 	while (rtnh_ok(rtnh, remaining)) {
4294 		memcpy(&r_cfg, cfg, sizeof(*cfg));
4295 		if (rtnh->rtnh_ifindex)
4296 			r_cfg.fc_ifindex = rtnh->rtnh_ifindex;
4297 
4298 		attrlen = rtnh_attrlen(rtnh);
4299 		if (attrlen > 0) {
4300 			struct nlattr *nla, *attrs = rtnh_attrs(rtnh);
4301 
4302 			nla = nla_find(attrs, attrlen, RTA_GATEWAY);
4303 			if (nla) {
4304 				r_cfg.fc_gateway = nla_get_in6_addr(nla);
4305 				r_cfg.fc_flags |= RTF_GATEWAY;
4306 			}
4307 			r_cfg.fc_encap = nla_find(attrs, attrlen, RTA_ENCAP);
4308 			nla = nla_find(attrs, attrlen, RTA_ENCAP_TYPE);
4309 			if (nla)
4310 				r_cfg.fc_encap_type = nla_get_u16(nla);
4311 		}
4312 
4313 		r_cfg.fc_flags |= (rtnh->rtnh_flags & RTNH_F_ONLINK);
4314 		rt = ip6_route_info_create(&r_cfg, GFP_KERNEL, extack);
4315 		if (IS_ERR(rt)) {
4316 			err = PTR_ERR(rt);
4317 			rt = NULL;
4318 			goto cleanup;
4319 		}
4320 
4321 		rt->fib6_nh.nh_weight = rtnh->rtnh_hops + 1;
4322 
4323 		err = ip6_route_info_append(info->nl_net, &rt6_nh_list,
4324 					    rt, &r_cfg);
4325 		if (err) {
4326 			fib6_info_release(rt);
4327 			goto cleanup;
4328 		}
4329 
4330 		rtnh = rtnh_next(rtnh, &remaining);
4331 	}
4332 
4333 	/* for add and replace send one notification with all nexthops.
4334 	 * Skip the notification in fib6_add_rt2node and send one with
4335 	 * the full route when done
4336 	 */
4337 	info->skip_notify = 1;
4338 
4339 	err_nh = NULL;
4340 	list_for_each_entry(nh, &rt6_nh_list, next) {
4341 		rt_last = nh->fib6_info;
4342 		err = __ip6_ins_rt(nh->fib6_info, info, extack);
4343 		fib6_info_release(nh->fib6_info);
4344 
4345 		/* save reference to first route for notification */
4346 		if (!rt_notif && !err)
4347 			rt_notif = nh->fib6_info;
4348 
4349 		/* nh->fib6_info is used or freed at this point, reset to NULL*/
4350 		nh->fib6_info = NULL;
4351 		if (err) {
4352 			if (replace && nhn)
4353 				ip6_print_replace_route_err(&rt6_nh_list);
4354 			err_nh = nh;
4355 			goto add_errout;
4356 		}
4357 
4358 		/* Because each route is added like a single route we remove
4359 		 * these flags after the first nexthop: if there is a collision,
4360 		 * we have already failed to add the first nexthop:
4361 		 * fib6_add_rt2node() has rejected it; when replacing, old
4362 		 * nexthops have been replaced by first new, the rest should
4363 		 * be added to it.
4364 		 */
4365 		cfg->fc_nlinfo.nlh->nlmsg_flags &= ~(NLM_F_EXCL |
4366 						     NLM_F_REPLACE);
4367 		nhn++;
4368 	}
4369 
4370 	/* success ... tell user about new route */
4371 	ip6_route_mpath_notify(rt_notif, rt_last, info, nlflags);
4372 	goto cleanup;
4373 
4374 add_errout:
4375 	/* send notification for routes that were added so that
4376 	 * the delete notifications sent by ip6_route_del are
4377 	 * coherent
4378 	 */
4379 	if (rt_notif)
4380 		ip6_route_mpath_notify(rt_notif, rt_last, info, nlflags);
4381 
4382 	/* Delete routes that were already added */
4383 	list_for_each_entry(nh, &rt6_nh_list, next) {
4384 		if (err_nh == nh)
4385 			break;
4386 		ip6_route_del(&nh->r_cfg, extack);
4387 	}
4388 
4389 cleanup:
4390 	list_for_each_entry_safe(nh, nh_safe, &rt6_nh_list, next) {
4391 		if (nh->fib6_info)
4392 			fib6_info_release(nh->fib6_info);
4393 		list_del(&nh->next);
4394 		kfree(nh);
4395 	}
4396 
4397 	return err;
4398 }
4399 
4400 static int ip6_route_multipath_del(struct fib6_config *cfg,
4401 				   struct netlink_ext_ack *extack)
4402 {
4403 	struct fib6_config r_cfg;
4404 	struct rtnexthop *rtnh;
4405 	int remaining;
4406 	int attrlen;
4407 	int err = 1, last_err = 0;
4408 
4409 	remaining = cfg->fc_mp_len;
4410 	rtnh = (struct rtnexthop *)cfg->fc_mp;
4411 
4412 	/* Parse a Multipath Entry */
4413 	while (rtnh_ok(rtnh, remaining)) {
4414 		memcpy(&r_cfg, cfg, sizeof(*cfg));
4415 		if (rtnh->rtnh_ifindex)
4416 			r_cfg.fc_ifindex = rtnh->rtnh_ifindex;
4417 
4418 		attrlen = rtnh_attrlen(rtnh);
4419 		if (attrlen > 0) {
4420 			struct nlattr *nla, *attrs = rtnh_attrs(rtnh);
4421 
4422 			nla = nla_find(attrs, attrlen, RTA_GATEWAY);
4423 			if (nla) {
4424 				nla_memcpy(&r_cfg.fc_gateway, nla, 16);
4425 				r_cfg.fc_flags |= RTF_GATEWAY;
4426 			}
4427 		}
4428 		err = ip6_route_del(&r_cfg, extack);
4429 		if (err)
4430 			last_err = err;
4431 
4432 		rtnh = rtnh_next(rtnh, &remaining);
4433 	}
4434 
4435 	return last_err;
4436 }
4437 
4438 static int inet6_rtm_delroute(struct sk_buff *skb, struct nlmsghdr *nlh,
4439 			      struct netlink_ext_ack *extack)
4440 {
4441 	struct fib6_config cfg;
4442 	int err;
4443 
4444 	err = rtm_to_fib6_config(skb, nlh, &cfg, extack);
4445 	if (err < 0)
4446 		return err;
4447 
4448 	if (cfg.fc_mp)
4449 		return ip6_route_multipath_del(&cfg, extack);
4450 	else {
4451 		cfg.fc_delete_all_nh = 1;
4452 		return ip6_route_del(&cfg, extack);
4453 	}
4454 }
4455 
4456 static int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr *nlh,
4457 			      struct netlink_ext_ack *extack)
4458 {
4459 	struct fib6_config cfg;
4460 	int err;
4461 
4462 	err = rtm_to_fib6_config(skb, nlh, &cfg, extack);
4463 	if (err < 0)
4464 		return err;
4465 
4466 	if (cfg.fc_mp)
4467 		return ip6_route_multipath_add(&cfg, extack);
4468 	else
4469 		return ip6_route_add(&cfg, GFP_KERNEL, extack);
4470 }
4471 
4472 static size_t rt6_nlmsg_size(struct fib6_info *rt)
4473 {
4474 	int nexthop_len = 0;
4475 
4476 	if (rt->fib6_nsiblings) {
4477 		nexthop_len = nla_total_size(0)	 /* RTA_MULTIPATH */
4478 			    + NLA_ALIGN(sizeof(struct rtnexthop))
4479 			    + nla_total_size(16) /* RTA_GATEWAY */
4480 			    + lwtunnel_get_encap_size(rt->fib6_nh.nh_lwtstate);
4481 
4482 		nexthop_len *= rt->fib6_nsiblings;
4483 	}
4484 
4485 	return NLMSG_ALIGN(sizeof(struct rtmsg))
4486 	       + nla_total_size(16) /* RTA_SRC */
4487 	       + nla_total_size(16) /* RTA_DST */
4488 	       + nla_total_size(16) /* RTA_GATEWAY */
4489 	       + nla_total_size(16) /* RTA_PREFSRC */
4490 	       + nla_total_size(4) /* RTA_TABLE */
4491 	       + nla_total_size(4) /* RTA_IIF */
4492 	       + nla_total_size(4) /* RTA_OIF */
4493 	       + nla_total_size(4) /* RTA_PRIORITY */
4494 	       + RTAX_MAX * nla_total_size(4) /* RTA_METRICS */
4495 	       + nla_total_size(sizeof(struct rta_cacheinfo))
4496 	       + nla_total_size(TCP_CA_NAME_MAX) /* RTAX_CC_ALGO */
4497 	       + nla_total_size(1) /* RTA_PREF */
4498 	       + lwtunnel_get_encap_size(rt->fib6_nh.nh_lwtstate)
4499 	       + nexthop_len;
4500 }
4501 
4502 static int rt6_nexthop_info(struct sk_buff *skb, struct fib6_info *rt,
4503 			    unsigned int *flags, bool skip_oif)
4504 {
4505 	if (rt->fib6_nh.nh_flags & RTNH_F_DEAD)
4506 		*flags |= RTNH_F_DEAD;
4507 
4508 	if (rt->fib6_nh.nh_flags & RTNH_F_LINKDOWN) {
4509 		*flags |= RTNH_F_LINKDOWN;
4510 
4511 		rcu_read_lock();
4512 		if (fib6_ignore_linkdown(rt))
4513 			*flags |= RTNH_F_DEAD;
4514 		rcu_read_unlock();
4515 	}
4516 
4517 	if (rt->fib6_flags & RTF_GATEWAY) {
4518 		if (nla_put_in6_addr(skb, RTA_GATEWAY, &rt->fib6_nh.nh_gw) < 0)
4519 			goto nla_put_failure;
4520 	}
4521 
4522 	*flags |= (rt->fib6_nh.nh_flags & RTNH_F_ONLINK);
4523 	if (rt->fib6_nh.nh_flags & RTNH_F_OFFLOAD)
4524 		*flags |= RTNH_F_OFFLOAD;
4525 
4526 	/* not needed for multipath encoding b/c it has a rtnexthop struct */
4527 	if (!skip_oif && rt->fib6_nh.nh_dev &&
4528 	    nla_put_u32(skb, RTA_OIF, rt->fib6_nh.nh_dev->ifindex))
4529 		goto nla_put_failure;
4530 
4531 	if (rt->fib6_nh.nh_lwtstate &&
4532 	    lwtunnel_fill_encap(skb, rt->fib6_nh.nh_lwtstate) < 0)
4533 		goto nla_put_failure;
4534 
4535 	return 0;
4536 
4537 nla_put_failure:
4538 	return -EMSGSIZE;
4539 }
4540 
4541 /* add multipath next hop */
4542 static int rt6_add_nexthop(struct sk_buff *skb, struct fib6_info *rt)
4543 {
4544 	const struct net_device *dev = rt->fib6_nh.nh_dev;
4545 	struct rtnexthop *rtnh;
4546 	unsigned int flags = 0;
4547 
4548 	rtnh = nla_reserve_nohdr(skb, sizeof(*rtnh));
4549 	if (!rtnh)
4550 		goto nla_put_failure;
4551 
4552 	rtnh->rtnh_hops = rt->fib6_nh.nh_weight - 1;
4553 	rtnh->rtnh_ifindex = dev ? dev->ifindex : 0;
4554 
4555 	if (rt6_nexthop_info(skb, rt, &flags, true) < 0)
4556 		goto nla_put_failure;
4557 
4558 	rtnh->rtnh_flags = flags;
4559 
4560 	/* length of rtnetlink header + attributes */
4561 	rtnh->rtnh_len = nlmsg_get_pos(skb) - (void *)rtnh;
4562 
4563 	return 0;
4564 
4565 nla_put_failure:
4566 	return -EMSGSIZE;
4567 }
4568 
4569 static int rt6_fill_node(struct net *net, struct sk_buff *skb,
4570 			 struct fib6_info *rt, struct dst_entry *dst,
4571 			 struct in6_addr *dest, struct in6_addr *src,
4572 			 int iif, int type, u32 portid, u32 seq,
4573 			 unsigned int flags)
4574 {
4575 	struct rtmsg *rtm;
4576 	struct nlmsghdr *nlh;
4577 	long expires = 0;
4578 	u32 *pmetrics;
4579 	u32 table;
4580 
4581 	nlh = nlmsg_put(skb, portid, seq, type, sizeof(*rtm), flags);
4582 	if (!nlh)
4583 		return -EMSGSIZE;
4584 
4585 	rtm = nlmsg_data(nlh);
4586 	rtm->rtm_family = AF_INET6;
4587 	rtm->rtm_dst_len = rt->fib6_dst.plen;
4588 	rtm->rtm_src_len = rt->fib6_src.plen;
4589 	rtm->rtm_tos = 0;
4590 	if (rt->fib6_table)
4591 		table = rt->fib6_table->tb6_id;
4592 	else
4593 		table = RT6_TABLE_UNSPEC;
4594 	rtm->rtm_table = table;
4595 	if (nla_put_u32(skb, RTA_TABLE, table))
4596 		goto nla_put_failure;
4597 
4598 	rtm->rtm_type = rt->fib6_type;
4599 	rtm->rtm_flags = 0;
4600 	rtm->rtm_scope = RT_SCOPE_UNIVERSE;
4601 	rtm->rtm_protocol = rt->fib6_protocol;
4602 
4603 	if (rt->fib6_flags & RTF_CACHE)
4604 		rtm->rtm_flags |= RTM_F_CLONED;
4605 
4606 	if (dest) {
4607 		if (nla_put_in6_addr(skb, RTA_DST, dest))
4608 			goto nla_put_failure;
4609 		rtm->rtm_dst_len = 128;
4610 	} else if (rtm->rtm_dst_len)
4611 		if (nla_put_in6_addr(skb, RTA_DST, &rt->fib6_dst.addr))
4612 			goto nla_put_failure;
4613 #ifdef CONFIG_IPV6_SUBTREES
4614 	if (src) {
4615 		if (nla_put_in6_addr(skb, RTA_SRC, src))
4616 			goto nla_put_failure;
4617 		rtm->rtm_src_len = 128;
4618 	} else if (rtm->rtm_src_len &&
4619 		   nla_put_in6_addr(skb, RTA_SRC, &rt->fib6_src.addr))
4620 		goto nla_put_failure;
4621 #endif
4622 	if (iif) {
4623 #ifdef CONFIG_IPV6_MROUTE
4624 		if (ipv6_addr_is_multicast(&rt->fib6_dst.addr)) {
4625 			int err = ip6mr_get_route(net, skb, rtm, portid);
4626 
4627 			if (err == 0)
4628 				return 0;
4629 			if (err < 0)
4630 				goto nla_put_failure;
4631 		} else
4632 #endif
4633 			if (nla_put_u32(skb, RTA_IIF, iif))
4634 				goto nla_put_failure;
4635 	} else if (dest) {
4636 		struct in6_addr saddr_buf;
4637 		if (ip6_route_get_saddr(net, rt, dest, 0, &saddr_buf) == 0 &&
4638 		    nla_put_in6_addr(skb, RTA_PREFSRC, &saddr_buf))
4639 			goto nla_put_failure;
4640 	}
4641 
4642 	if (rt->fib6_prefsrc.plen) {
4643 		struct in6_addr saddr_buf;
4644 		saddr_buf = rt->fib6_prefsrc.addr;
4645 		if (nla_put_in6_addr(skb, RTA_PREFSRC, &saddr_buf))
4646 			goto nla_put_failure;
4647 	}
4648 
4649 	pmetrics = dst ? dst_metrics_ptr(dst) : rt->fib6_metrics->metrics;
4650 	if (rtnetlink_put_metrics(skb, pmetrics) < 0)
4651 		goto nla_put_failure;
4652 
4653 	if (nla_put_u32(skb, RTA_PRIORITY, rt->fib6_metric))
4654 		goto nla_put_failure;
4655 
4656 	/* For multipath routes, walk the siblings list and add
4657 	 * each as a nexthop within RTA_MULTIPATH.
4658 	 */
4659 	if (rt->fib6_nsiblings) {
4660 		struct fib6_info *sibling, *next_sibling;
4661 		struct nlattr *mp;
4662 
4663 		mp = nla_nest_start(skb, RTA_MULTIPATH);
4664 		if (!mp)
4665 			goto nla_put_failure;
4666 
4667 		if (rt6_add_nexthop(skb, rt) < 0)
4668 			goto nla_put_failure;
4669 
4670 		list_for_each_entry_safe(sibling, next_sibling,
4671 					 &rt->fib6_siblings, fib6_siblings) {
4672 			if (rt6_add_nexthop(skb, sibling) < 0)
4673 				goto nla_put_failure;
4674 		}
4675 
4676 		nla_nest_end(skb, mp);
4677 	} else {
4678 		if (rt6_nexthop_info(skb, rt, &rtm->rtm_flags, false) < 0)
4679 			goto nla_put_failure;
4680 	}
4681 
4682 	if (rt->fib6_flags & RTF_EXPIRES) {
4683 		expires = dst ? dst->expires : rt->expires;
4684 		expires -= jiffies;
4685 	}
4686 
4687 	if (rtnl_put_cacheinfo(skb, dst, 0, expires, dst ? dst->error : 0) < 0)
4688 		goto nla_put_failure;
4689 
4690 	if (nla_put_u8(skb, RTA_PREF, IPV6_EXTRACT_PREF(rt->fib6_flags)))
4691 		goto nla_put_failure;
4692 
4693 
4694 	nlmsg_end(skb, nlh);
4695 	return 0;
4696 
4697 nla_put_failure:
4698 	nlmsg_cancel(skb, nlh);
4699 	return -EMSGSIZE;
4700 }
4701 
4702 int rt6_dump_route(struct fib6_info *rt, void *p_arg)
4703 {
4704 	struct rt6_rtnl_dump_arg *arg = (struct rt6_rtnl_dump_arg *) p_arg;
4705 	struct net *net = arg->net;
4706 
4707 	if (rt == net->ipv6.fib6_null_entry)
4708 		return 0;
4709 
4710 	if (nlmsg_len(arg->cb->nlh) >= sizeof(struct rtmsg)) {
4711 		struct rtmsg *rtm = nlmsg_data(arg->cb->nlh);
4712 
4713 		/* user wants prefix routes only */
4714 		if (rtm->rtm_flags & RTM_F_PREFIX &&
4715 		    !(rt->fib6_flags & RTF_PREFIX_RT)) {
4716 			/* success since this is not a prefix route */
4717 			return 1;
4718 		}
4719 	}
4720 
4721 	return rt6_fill_node(net, arg->skb, rt, NULL, NULL, NULL, 0,
4722 			     RTM_NEWROUTE, NETLINK_CB(arg->cb->skb).portid,
4723 			     arg->cb->nlh->nlmsg_seq, NLM_F_MULTI);
4724 }
4725 
4726 static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh,
4727 			      struct netlink_ext_ack *extack)
4728 {
4729 	struct net *net = sock_net(in_skb->sk);
4730 	struct nlattr *tb[RTA_MAX+1];
4731 	int err, iif = 0, oif = 0;
4732 	struct fib6_info *from;
4733 	struct dst_entry *dst;
4734 	struct rt6_info *rt;
4735 	struct sk_buff *skb;
4736 	struct rtmsg *rtm;
4737 	struct flowi6 fl6;
4738 	bool fibmatch;
4739 
4740 	err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy,
4741 			  extack);
4742 	if (err < 0)
4743 		goto errout;
4744 
4745 	err = -EINVAL;
4746 	memset(&fl6, 0, sizeof(fl6));
4747 	rtm = nlmsg_data(nlh);
4748 	fl6.flowlabel = ip6_make_flowinfo(rtm->rtm_tos, 0);
4749 	fibmatch = !!(rtm->rtm_flags & RTM_F_FIB_MATCH);
4750 
4751 	if (tb[RTA_SRC]) {
4752 		if (nla_len(tb[RTA_SRC]) < sizeof(struct in6_addr))
4753 			goto errout;
4754 
4755 		fl6.saddr = *(struct in6_addr *)nla_data(tb[RTA_SRC]);
4756 	}
4757 
4758 	if (tb[RTA_DST]) {
4759 		if (nla_len(tb[RTA_DST]) < sizeof(struct in6_addr))
4760 			goto errout;
4761 
4762 		fl6.daddr = *(struct in6_addr *)nla_data(tb[RTA_DST]);
4763 	}
4764 
4765 	if (tb[RTA_IIF])
4766 		iif = nla_get_u32(tb[RTA_IIF]);
4767 
4768 	if (tb[RTA_OIF])
4769 		oif = nla_get_u32(tb[RTA_OIF]);
4770 
4771 	if (tb[RTA_MARK])
4772 		fl6.flowi6_mark = nla_get_u32(tb[RTA_MARK]);
4773 
4774 	if (tb[RTA_UID])
4775 		fl6.flowi6_uid = make_kuid(current_user_ns(),
4776 					   nla_get_u32(tb[RTA_UID]));
4777 	else
4778 		fl6.flowi6_uid = iif ? INVALID_UID : current_uid();
4779 
4780 	if (iif) {
4781 		struct net_device *dev;
4782 		int flags = 0;
4783 
4784 		rcu_read_lock();
4785 
4786 		dev = dev_get_by_index_rcu(net, iif);
4787 		if (!dev) {
4788 			rcu_read_unlock();
4789 			err = -ENODEV;
4790 			goto errout;
4791 		}
4792 
4793 		fl6.flowi6_iif = iif;
4794 
4795 		if (!ipv6_addr_any(&fl6.saddr))
4796 			flags |= RT6_LOOKUP_F_HAS_SADDR;
4797 
4798 		dst = ip6_route_input_lookup(net, dev, &fl6, NULL, flags);
4799 
4800 		rcu_read_unlock();
4801 	} else {
4802 		fl6.flowi6_oif = oif;
4803 
4804 		dst = ip6_route_output(net, NULL, &fl6);
4805 	}
4806 
4807 
4808 	rt = container_of(dst, struct rt6_info, dst);
4809 	if (rt->dst.error) {
4810 		err = rt->dst.error;
4811 		ip6_rt_put(rt);
4812 		goto errout;
4813 	}
4814 
4815 	if (rt == net->ipv6.ip6_null_entry) {
4816 		err = rt->dst.error;
4817 		ip6_rt_put(rt);
4818 		goto errout;
4819 	}
4820 
4821 	skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
4822 	if (!skb) {
4823 		ip6_rt_put(rt);
4824 		err = -ENOBUFS;
4825 		goto errout;
4826 	}
4827 
4828 	skb_dst_set(skb, &rt->dst);
4829 
4830 	rcu_read_lock();
4831 	from = rcu_dereference(rt->from);
4832 
4833 	if (fibmatch)
4834 		err = rt6_fill_node(net, skb, from, NULL, NULL, NULL, iif,
4835 				    RTM_NEWROUTE, NETLINK_CB(in_skb).portid,
4836 				    nlh->nlmsg_seq, 0);
4837 	else
4838 		err = rt6_fill_node(net, skb, from, dst, &fl6.daddr,
4839 				    &fl6.saddr, iif, RTM_NEWROUTE,
4840 				    NETLINK_CB(in_skb).portid, nlh->nlmsg_seq,
4841 				    0);
4842 	rcu_read_unlock();
4843 
4844 	if (err < 0) {
4845 		kfree_skb(skb);
4846 		goto errout;
4847 	}
4848 
4849 	err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).portid);
4850 errout:
4851 	return err;
4852 }
4853 
4854 void inet6_rt_notify(int event, struct fib6_info *rt, struct nl_info *info,
4855 		     unsigned int nlm_flags)
4856 {
4857 	struct sk_buff *skb;
4858 	struct net *net = info->nl_net;
4859 	u32 seq;
4860 	int err;
4861 
4862 	err = -ENOBUFS;
4863 	seq = info->nlh ? info->nlh->nlmsg_seq : 0;
4864 
4865 	skb = nlmsg_new(rt6_nlmsg_size(rt), gfp_any());
4866 	if (!skb)
4867 		goto errout;
4868 
4869 	err = rt6_fill_node(net, skb, rt, NULL, NULL, NULL, 0,
4870 			    event, info->portid, seq, nlm_flags);
4871 	if (err < 0) {
4872 		/* -EMSGSIZE implies BUG in rt6_nlmsg_size() */
4873 		WARN_ON(err == -EMSGSIZE);
4874 		kfree_skb(skb);
4875 		goto errout;
4876 	}
4877 	rtnl_notify(skb, net, info->portid, RTNLGRP_IPV6_ROUTE,
4878 		    info->nlh, gfp_any());
4879 	return;
4880 errout:
4881 	if (err < 0)
4882 		rtnl_set_sk_err(net, RTNLGRP_IPV6_ROUTE, err);
4883 }
4884 
4885 static int ip6_route_dev_notify(struct notifier_block *this,
4886 				unsigned long event, void *ptr)
4887 {
4888 	struct net_device *dev = netdev_notifier_info_to_dev(ptr);
4889 	struct net *net = dev_net(dev);
4890 
4891 	if (!(dev->flags & IFF_LOOPBACK))
4892 		return NOTIFY_OK;
4893 
4894 	if (event == NETDEV_REGISTER) {
4895 		net->ipv6.fib6_null_entry->fib6_nh.nh_dev = dev;
4896 		net->ipv6.ip6_null_entry->dst.dev = dev;
4897 		net->ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(dev);
4898 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
4899 		net->ipv6.ip6_prohibit_entry->dst.dev = dev;
4900 		net->ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(dev);
4901 		net->ipv6.ip6_blk_hole_entry->dst.dev = dev;
4902 		net->ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(dev);
4903 #endif
4904 	 } else if (event == NETDEV_UNREGISTER &&
4905 		    dev->reg_state != NETREG_UNREGISTERED) {
4906 		/* NETDEV_UNREGISTER could be fired for multiple times by
4907 		 * netdev_wait_allrefs(). Make sure we only call this once.
4908 		 */
4909 		in6_dev_put_clear(&net->ipv6.ip6_null_entry->rt6i_idev);
4910 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
4911 		in6_dev_put_clear(&net->ipv6.ip6_prohibit_entry->rt6i_idev);
4912 		in6_dev_put_clear(&net->ipv6.ip6_blk_hole_entry->rt6i_idev);
4913 #endif
4914 	}
4915 
4916 	return NOTIFY_OK;
4917 }
4918 
4919 /*
4920  *	/proc
4921  */
4922 
4923 #ifdef CONFIG_PROC_FS
4924 
4925 static const struct file_operations ipv6_route_proc_fops = {
4926 	.open		= ipv6_route_open,
4927 	.read		= seq_read,
4928 	.llseek		= seq_lseek,
4929 	.release	= seq_release_net,
4930 };
4931 
4932 static int rt6_stats_seq_show(struct seq_file *seq, void *v)
4933 {
4934 	struct net *net = (struct net *)seq->private;
4935 	seq_printf(seq, "%04x %04x %04x %04x %04x %04x %04x\n",
4936 		   net->ipv6.rt6_stats->fib_nodes,
4937 		   net->ipv6.rt6_stats->fib_route_nodes,
4938 		   atomic_read(&net->ipv6.rt6_stats->fib_rt_alloc),
4939 		   net->ipv6.rt6_stats->fib_rt_entries,
4940 		   net->ipv6.rt6_stats->fib_rt_cache,
4941 		   dst_entries_get_slow(&net->ipv6.ip6_dst_ops),
4942 		   net->ipv6.rt6_stats->fib_discarded_routes);
4943 
4944 	return 0;
4945 }
4946 
4947 static int rt6_stats_seq_open(struct inode *inode, struct file *file)
4948 {
4949 	return single_open_net(inode, file, rt6_stats_seq_show);
4950 }
4951 
4952 static const struct file_operations rt6_stats_seq_fops = {
4953 	.open	 = rt6_stats_seq_open,
4954 	.read	 = seq_read,
4955 	.llseek	 = seq_lseek,
4956 	.release = single_release_net,
4957 };
4958 #endif	/* CONFIG_PROC_FS */
4959 
4960 #ifdef CONFIG_SYSCTL
4961 
4962 static
4963 int ipv6_sysctl_rtcache_flush(struct ctl_table *ctl, int write,
4964 			      void __user *buffer, size_t *lenp, loff_t *ppos)
4965 {
4966 	struct net *net;
4967 	int delay;
4968 	if (!write)
4969 		return -EINVAL;
4970 
4971 	net = (struct net *)ctl->extra1;
4972 	delay = net->ipv6.sysctl.flush_delay;
4973 	proc_dointvec(ctl, write, buffer, lenp, ppos);
4974 	fib6_run_gc(delay <= 0 ? 0 : (unsigned long)delay, net, delay > 0);
4975 	return 0;
4976 }
4977 
4978 struct ctl_table ipv6_route_table_template[] = {
4979 	{
4980 		.procname	=	"flush",
4981 		.data		=	&init_net.ipv6.sysctl.flush_delay,
4982 		.maxlen		=	sizeof(int),
4983 		.mode		=	0200,
4984 		.proc_handler	=	ipv6_sysctl_rtcache_flush
4985 	},
4986 	{
4987 		.procname	=	"gc_thresh",
4988 		.data		=	&ip6_dst_ops_template.gc_thresh,
4989 		.maxlen		=	sizeof(int),
4990 		.mode		=	0644,
4991 		.proc_handler	=	proc_dointvec,
4992 	},
4993 	{
4994 		.procname	=	"max_size",
4995 		.data		=	&init_net.ipv6.sysctl.ip6_rt_max_size,
4996 		.maxlen		=	sizeof(int),
4997 		.mode		=	0644,
4998 		.proc_handler	=	proc_dointvec,
4999 	},
5000 	{
5001 		.procname	=	"gc_min_interval",
5002 		.data		=	&init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
5003 		.maxlen		=	sizeof(int),
5004 		.mode		=	0644,
5005 		.proc_handler	=	proc_dointvec_jiffies,
5006 	},
5007 	{
5008 		.procname	=	"gc_timeout",
5009 		.data		=	&init_net.ipv6.sysctl.ip6_rt_gc_timeout,
5010 		.maxlen		=	sizeof(int),
5011 		.mode		=	0644,
5012 		.proc_handler	=	proc_dointvec_jiffies,
5013 	},
5014 	{
5015 		.procname	=	"gc_interval",
5016 		.data		=	&init_net.ipv6.sysctl.ip6_rt_gc_interval,
5017 		.maxlen		=	sizeof(int),
5018 		.mode		=	0644,
5019 		.proc_handler	=	proc_dointvec_jiffies,
5020 	},
5021 	{
5022 		.procname	=	"gc_elasticity",
5023 		.data		=	&init_net.ipv6.sysctl.ip6_rt_gc_elasticity,
5024 		.maxlen		=	sizeof(int),
5025 		.mode		=	0644,
5026 		.proc_handler	=	proc_dointvec,
5027 	},
5028 	{
5029 		.procname	=	"mtu_expires",
5030 		.data		=	&init_net.ipv6.sysctl.ip6_rt_mtu_expires,
5031 		.maxlen		=	sizeof(int),
5032 		.mode		=	0644,
5033 		.proc_handler	=	proc_dointvec_jiffies,
5034 	},
5035 	{
5036 		.procname	=	"min_adv_mss",
5037 		.data		=	&init_net.ipv6.sysctl.ip6_rt_min_advmss,
5038 		.maxlen		=	sizeof(int),
5039 		.mode		=	0644,
5040 		.proc_handler	=	proc_dointvec,
5041 	},
5042 	{
5043 		.procname	=	"gc_min_interval_ms",
5044 		.data		=	&init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
5045 		.maxlen		=	sizeof(int),
5046 		.mode		=	0644,
5047 		.proc_handler	=	proc_dointvec_ms_jiffies,
5048 	},
5049 	{ }
5050 };
5051 
5052 struct ctl_table * __net_init ipv6_route_sysctl_init(struct net *net)
5053 {
5054 	struct ctl_table *table;
5055 
5056 	table = kmemdup(ipv6_route_table_template,
5057 			sizeof(ipv6_route_table_template),
5058 			GFP_KERNEL);
5059 
5060 	if (table) {
5061 		table[0].data = &net->ipv6.sysctl.flush_delay;
5062 		table[0].extra1 = net;
5063 		table[1].data = &net->ipv6.ip6_dst_ops.gc_thresh;
5064 		table[2].data = &net->ipv6.sysctl.ip6_rt_max_size;
5065 		table[3].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
5066 		table[4].data = &net->ipv6.sysctl.ip6_rt_gc_timeout;
5067 		table[5].data = &net->ipv6.sysctl.ip6_rt_gc_interval;
5068 		table[6].data = &net->ipv6.sysctl.ip6_rt_gc_elasticity;
5069 		table[7].data = &net->ipv6.sysctl.ip6_rt_mtu_expires;
5070 		table[8].data = &net->ipv6.sysctl.ip6_rt_min_advmss;
5071 		table[9].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
5072 
5073 		/* Don't export sysctls to unprivileged users */
5074 		if (net->user_ns != &init_user_ns)
5075 			table[0].procname = NULL;
5076 	}
5077 
5078 	return table;
5079 }
5080 #endif
5081 
5082 static int __net_init ip6_route_net_init(struct net *net)
5083 {
5084 	int ret = -ENOMEM;
5085 
5086 	memcpy(&net->ipv6.ip6_dst_ops, &ip6_dst_ops_template,
5087 	       sizeof(net->ipv6.ip6_dst_ops));
5088 
5089 	if (dst_entries_init(&net->ipv6.ip6_dst_ops) < 0)
5090 		goto out_ip6_dst_ops;
5091 
5092 	net->ipv6.fib6_null_entry = kmemdup(&fib6_null_entry_template,
5093 					    sizeof(*net->ipv6.fib6_null_entry),
5094 					    GFP_KERNEL);
5095 	if (!net->ipv6.fib6_null_entry)
5096 		goto out_ip6_dst_entries;
5097 
5098 	net->ipv6.ip6_null_entry = kmemdup(&ip6_null_entry_template,
5099 					   sizeof(*net->ipv6.ip6_null_entry),
5100 					   GFP_KERNEL);
5101 	if (!net->ipv6.ip6_null_entry)
5102 		goto out_fib6_null_entry;
5103 	net->ipv6.ip6_null_entry->dst.ops = &net->ipv6.ip6_dst_ops;
5104 	dst_init_metrics(&net->ipv6.ip6_null_entry->dst,
5105 			 ip6_template_metrics, true);
5106 
5107 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
5108 	net->ipv6.fib6_has_custom_rules = false;
5109 	net->ipv6.ip6_prohibit_entry = kmemdup(&ip6_prohibit_entry_template,
5110 					       sizeof(*net->ipv6.ip6_prohibit_entry),
5111 					       GFP_KERNEL);
5112 	if (!net->ipv6.ip6_prohibit_entry)
5113 		goto out_ip6_null_entry;
5114 	net->ipv6.ip6_prohibit_entry->dst.ops = &net->ipv6.ip6_dst_ops;
5115 	dst_init_metrics(&net->ipv6.ip6_prohibit_entry->dst,
5116 			 ip6_template_metrics, true);
5117 
5118 	net->ipv6.ip6_blk_hole_entry = kmemdup(&ip6_blk_hole_entry_template,
5119 					       sizeof(*net->ipv6.ip6_blk_hole_entry),
5120 					       GFP_KERNEL);
5121 	if (!net->ipv6.ip6_blk_hole_entry)
5122 		goto out_ip6_prohibit_entry;
5123 	net->ipv6.ip6_blk_hole_entry->dst.ops = &net->ipv6.ip6_dst_ops;
5124 	dst_init_metrics(&net->ipv6.ip6_blk_hole_entry->dst,
5125 			 ip6_template_metrics, true);
5126 #endif
5127 
5128 	net->ipv6.sysctl.flush_delay = 0;
5129 	net->ipv6.sysctl.ip6_rt_max_size = 4096;
5130 	net->ipv6.sysctl.ip6_rt_gc_min_interval = HZ / 2;
5131 	net->ipv6.sysctl.ip6_rt_gc_timeout = 60*HZ;
5132 	net->ipv6.sysctl.ip6_rt_gc_interval = 30*HZ;
5133 	net->ipv6.sysctl.ip6_rt_gc_elasticity = 9;
5134 	net->ipv6.sysctl.ip6_rt_mtu_expires = 10*60*HZ;
5135 	net->ipv6.sysctl.ip6_rt_min_advmss = IPV6_MIN_MTU - 20 - 40;
5136 
5137 	net->ipv6.ip6_rt_gc_expire = 30*HZ;
5138 
5139 	ret = 0;
5140 out:
5141 	return ret;
5142 
5143 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
5144 out_ip6_prohibit_entry:
5145 	kfree(net->ipv6.ip6_prohibit_entry);
5146 out_ip6_null_entry:
5147 	kfree(net->ipv6.ip6_null_entry);
5148 #endif
5149 out_fib6_null_entry:
5150 	kfree(net->ipv6.fib6_null_entry);
5151 out_ip6_dst_entries:
5152 	dst_entries_destroy(&net->ipv6.ip6_dst_ops);
5153 out_ip6_dst_ops:
5154 	goto out;
5155 }
5156 
5157 static void __net_exit ip6_route_net_exit(struct net *net)
5158 {
5159 	kfree(net->ipv6.fib6_null_entry);
5160 	kfree(net->ipv6.ip6_null_entry);
5161 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
5162 	kfree(net->ipv6.ip6_prohibit_entry);
5163 	kfree(net->ipv6.ip6_blk_hole_entry);
5164 #endif
5165 	dst_entries_destroy(&net->ipv6.ip6_dst_ops);
5166 }
5167 
5168 static int __net_init ip6_route_net_init_late(struct net *net)
5169 {
5170 #ifdef CONFIG_PROC_FS
5171 	proc_create("ipv6_route", 0, net->proc_net, &ipv6_route_proc_fops);
5172 	proc_create("rt6_stats", 0444, net->proc_net, &rt6_stats_seq_fops);
5173 #endif
5174 	return 0;
5175 }
5176 
5177 static void __net_exit ip6_route_net_exit_late(struct net *net)
5178 {
5179 #ifdef CONFIG_PROC_FS
5180 	remove_proc_entry("ipv6_route", net->proc_net);
5181 	remove_proc_entry("rt6_stats", net->proc_net);
5182 #endif
5183 }
5184 
5185 static struct pernet_operations ip6_route_net_ops = {
5186 	.init = ip6_route_net_init,
5187 	.exit = ip6_route_net_exit,
5188 };
5189 
5190 static int __net_init ipv6_inetpeer_init(struct net *net)
5191 {
5192 	struct inet_peer_base *bp = kmalloc(sizeof(*bp), GFP_KERNEL);
5193 
5194 	if (!bp)
5195 		return -ENOMEM;
5196 	inet_peer_base_init(bp);
5197 	net->ipv6.peers = bp;
5198 	return 0;
5199 }
5200 
5201 static void __net_exit ipv6_inetpeer_exit(struct net *net)
5202 {
5203 	struct inet_peer_base *bp = net->ipv6.peers;
5204 
5205 	net->ipv6.peers = NULL;
5206 	inetpeer_invalidate_tree(bp);
5207 	kfree(bp);
5208 }
5209 
5210 static struct pernet_operations ipv6_inetpeer_ops = {
5211 	.init	=	ipv6_inetpeer_init,
5212 	.exit	=	ipv6_inetpeer_exit,
5213 };
5214 
5215 static struct pernet_operations ip6_route_net_late_ops = {
5216 	.init = ip6_route_net_init_late,
5217 	.exit = ip6_route_net_exit_late,
5218 };
5219 
5220 static struct notifier_block ip6_route_dev_notifier = {
5221 	.notifier_call = ip6_route_dev_notify,
5222 	.priority = ADDRCONF_NOTIFY_PRIORITY - 10,
5223 };
5224 
5225 void __init ip6_route_init_special_entries(void)
5226 {
5227 	/* Registering of the loopback is done before this portion of code,
5228 	 * the loopback reference in rt6_info will not be taken, do it
5229 	 * manually for init_net */
5230 	init_net.ipv6.fib6_null_entry->fib6_nh.nh_dev = init_net.loopback_dev;
5231 	init_net.ipv6.ip6_null_entry->dst.dev = init_net.loopback_dev;
5232 	init_net.ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
5233   #ifdef CONFIG_IPV6_MULTIPLE_TABLES
5234 	init_net.ipv6.ip6_prohibit_entry->dst.dev = init_net.loopback_dev;
5235 	init_net.ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
5236 	init_net.ipv6.ip6_blk_hole_entry->dst.dev = init_net.loopback_dev;
5237 	init_net.ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
5238   #endif
5239 }
5240 
5241 int __init ip6_route_init(void)
5242 {
5243 	int ret;
5244 	int cpu;
5245 
5246 	ret = -ENOMEM;
5247 	ip6_dst_ops_template.kmem_cachep =
5248 		kmem_cache_create("ip6_dst_cache", sizeof(struct rt6_info), 0,
5249 				  SLAB_HWCACHE_ALIGN, NULL);
5250 	if (!ip6_dst_ops_template.kmem_cachep)
5251 		goto out;
5252 
5253 	ret = dst_entries_init(&ip6_dst_blackhole_ops);
5254 	if (ret)
5255 		goto out_kmem_cache;
5256 
5257 	ret = register_pernet_subsys(&ipv6_inetpeer_ops);
5258 	if (ret)
5259 		goto out_dst_entries;
5260 
5261 	ret = register_pernet_subsys(&ip6_route_net_ops);
5262 	if (ret)
5263 		goto out_register_inetpeer;
5264 
5265 	ip6_dst_blackhole_ops.kmem_cachep = ip6_dst_ops_template.kmem_cachep;
5266 
5267 	ret = fib6_init();
5268 	if (ret)
5269 		goto out_register_subsys;
5270 
5271 	ret = xfrm6_init();
5272 	if (ret)
5273 		goto out_fib6_init;
5274 
5275 	ret = fib6_rules_init();
5276 	if (ret)
5277 		goto xfrm6_init;
5278 
5279 	ret = register_pernet_subsys(&ip6_route_net_late_ops);
5280 	if (ret)
5281 		goto fib6_rules_init;
5282 
5283 	ret = rtnl_register_module(THIS_MODULE, PF_INET6, RTM_NEWROUTE,
5284 				   inet6_rtm_newroute, NULL, 0);
5285 	if (ret < 0)
5286 		goto out_register_late_subsys;
5287 
5288 	ret = rtnl_register_module(THIS_MODULE, PF_INET6, RTM_DELROUTE,
5289 				   inet6_rtm_delroute, NULL, 0);
5290 	if (ret < 0)
5291 		goto out_register_late_subsys;
5292 
5293 	ret = rtnl_register_module(THIS_MODULE, PF_INET6, RTM_GETROUTE,
5294 				   inet6_rtm_getroute, NULL,
5295 				   RTNL_FLAG_DOIT_UNLOCKED);
5296 	if (ret < 0)
5297 		goto out_register_late_subsys;
5298 
5299 	ret = register_netdevice_notifier(&ip6_route_dev_notifier);
5300 	if (ret)
5301 		goto out_register_late_subsys;
5302 
5303 	for_each_possible_cpu(cpu) {
5304 		struct uncached_list *ul = per_cpu_ptr(&rt6_uncached_list, cpu);
5305 
5306 		INIT_LIST_HEAD(&ul->head);
5307 		spin_lock_init(&ul->lock);
5308 	}
5309 
5310 out:
5311 	return ret;
5312 
5313 out_register_late_subsys:
5314 	rtnl_unregister_all(PF_INET6);
5315 	unregister_pernet_subsys(&ip6_route_net_late_ops);
5316 fib6_rules_init:
5317 	fib6_rules_cleanup();
5318 xfrm6_init:
5319 	xfrm6_fini();
5320 out_fib6_init:
5321 	fib6_gc_cleanup();
5322 out_register_subsys:
5323 	unregister_pernet_subsys(&ip6_route_net_ops);
5324 out_register_inetpeer:
5325 	unregister_pernet_subsys(&ipv6_inetpeer_ops);
5326 out_dst_entries:
5327 	dst_entries_destroy(&ip6_dst_blackhole_ops);
5328 out_kmem_cache:
5329 	kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);
5330 	goto out;
5331 }
5332 
5333 void ip6_route_cleanup(void)
5334 {
5335 	unregister_netdevice_notifier(&ip6_route_dev_notifier);
5336 	unregister_pernet_subsys(&ip6_route_net_late_ops);
5337 	fib6_rules_cleanup();
5338 	xfrm6_fini();
5339 	fib6_gc_cleanup();
5340 	unregister_pernet_subsys(&ipv6_inetpeer_ops);
5341 	unregister_pernet_subsys(&ip6_route_net_ops);
5342 	dst_entries_destroy(&ip6_dst_blackhole_ops);
5343 	kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);
5344 }
5345