xref: /linux/net/ipv6/route.c (revision c411ed854584a71b0e86ac3019b60e4789d88086)
1 /*
2  *	Linux INET6 implementation
3  *	FIB front-end.
4  *
5  *	Authors:
6  *	Pedro Roque		<roque@di.fc.ul.pt>
7  *
8  *	This program is free software; you can redistribute it and/or
9  *      modify it under the terms of the GNU General Public License
10  *      as published by the Free Software Foundation; either version
11  *      2 of the License, or (at your option) any later version.
12  */
13 
14 /*	Changes:
15  *
16  *	YOSHIFUJI Hideaki @USAGI
17  *		reworked default router selection.
18  *		- respect outgoing interface
19  *		- select from (probably) reachable routers (i.e.
20  *		routers in REACHABLE, STALE, DELAY or PROBE states).
21  *		- always select the same router if it is (probably)
22  *		reachable.  otherwise, round-robin the list.
23  *	Ville Nuorvala
24  *		Fixed routing subtrees.
25  */
26 
27 #define pr_fmt(fmt) "IPv6: " fmt
28 
29 #include <linux/capability.h>
30 #include <linux/errno.h>
31 #include <linux/export.h>
32 #include <linux/types.h>
33 #include <linux/times.h>
34 #include <linux/socket.h>
35 #include <linux/sockios.h>
36 #include <linux/net.h>
37 #include <linux/route.h>
38 #include <linux/netdevice.h>
39 #include <linux/in6.h>
40 #include <linux/mroute6.h>
41 #include <linux/init.h>
42 #include <linux/if_arp.h>
43 #include <linux/proc_fs.h>
44 #include <linux/seq_file.h>
45 #include <linux/nsproxy.h>
46 #include <linux/slab.h>
47 #include <net/net_namespace.h>
48 #include <net/snmp.h>
49 #include <net/ipv6.h>
50 #include <net/ip6_fib.h>
51 #include <net/ip6_route.h>
52 #include <net/ndisc.h>
53 #include <net/addrconf.h>
54 #include <net/tcp.h>
55 #include <linux/rtnetlink.h>
56 #include <net/dst.h>
57 #include <net/dst_metadata.h>
58 #include <net/xfrm.h>
59 #include <net/netevent.h>
60 #include <net/netlink.h>
61 #include <net/nexthop.h>
62 #include <net/lwtunnel.h>
63 #include <net/ip_tunnels.h>
64 #include <net/l3mdev.h>
65 #include <trace/events/fib6.h>
66 
67 #include <linux/uaccess.h>
68 
69 #ifdef CONFIG_SYSCTL
70 #include <linux/sysctl.h>
71 #endif
72 
73 enum rt6_nud_state {
74 	RT6_NUD_FAIL_HARD = -3,
75 	RT6_NUD_FAIL_PROBE = -2,
76 	RT6_NUD_FAIL_DO_RR = -1,
77 	RT6_NUD_SUCCEED = 1
78 };
79 
80 static void ip6_rt_copy_init(struct rt6_info *rt, struct rt6_info *ort);
81 static struct dst_entry	*ip6_dst_check(struct dst_entry *dst, u32 cookie);
82 static unsigned int	 ip6_default_advmss(const struct dst_entry *dst);
83 static unsigned int	 ip6_mtu(const struct dst_entry *dst);
84 static struct dst_entry *ip6_negative_advice(struct dst_entry *);
85 static void		ip6_dst_destroy(struct dst_entry *);
86 static void		ip6_dst_ifdown(struct dst_entry *,
87 				       struct net_device *dev, int how);
88 static int		 ip6_dst_gc(struct dst_ops *ops);
89 
90 static int		ip6_pkt_discard(struct sk_buff *skb);
91 static int		ip6_pkt_discard_out(struct net *net, struct sock *sk, struct sk_buff *skb);
92 static int		ip6_pkt_prohibit(struct sk_buff *skb);
93 static int		ip6_pkt_prohibit_out(struct net *net, struct sock *sk, struct sk_buff *skb);
94 static void		ip6_link_failure(struct sk_buff *skb);
95 static void		ip6_rt_update_pmtu(struct dst_entry *dst, struct sock *sk,
96 					   struct sk_buff *skb, u32 mtu);
97 static void		rt6_do_redirect(struct dst_entry *dst, struct sock *sk,
98 					struct sk_buff *skb);
99 static void		rt6_dst_from_metrics_check(struct rt6_info *rt);
100 static int rt6_score_route(struct rt6_info *rt, int oif, int strict);
101 static size_t rt6_nlmsg_size(struct rt6_info *rt);
102 static int rt6_fill_node(struct net *net,
103 			 struct sk_buff *skb, struct rt6_info *rt,
104 			 struct in6_addr *dst, struct in6_addr *src,
105 			 int iif, int type, u32 portid, u32 seq,
106 			 unsigned int flags);
107 
108 #ifdef CONFIG_IPV6_ROUTE_INFO
109 static struct rt6_info *rt6_add_route_info(struct net *net,
110 					   const struct in6_addr *prefix, int prefixlen,
111 					   const struct in6_addr *gwaddr,
112 					   struct net_device *dev,
113 					   unsigned int pref);
114 static struct rt6_info *rt6_get_route_info(struct net *net,
115 					   const struct in6_addr *prefix, int prefixlen,
116 					   const struct in6_addr *gwaddr,
117 					   struct net_device *dev);
118 #endif
119 
120 struct uncached_list {
121 	spinlock_t		lock;
122 	struct list_head	head;
123 };
124 
125 static DEFINE_PER_CPU_ALIGNED(struct uncached_list, rt6_uncached_list);
126 
127 static void rt6_uncached_list_add(struct rt6_info *rt)
128 {
129 	struct uncached_list *ul = raw_cpu_ptr(&rt6_uncached_list);
130 
131 	rt->rt6i_uncached_list = ul;
132 
133 	spin_lock_bh(&ul->lock);
134 	list_add_tail(&rt->rt6i_uncached, &ul->head);
135 	spin_unlock_bh(&ul->lock);
136 }
137 
138 static void rt6_uncached_list_del(struct rt6_info *rt)
139 {
140 	if (!list_empty(&rt->rt6i_uncached)) {
141 		struct uncached_list *ul = rt->rt6i_uncached_list;
142 
143 		spin_lock_bh(&ul->lock);
144 		list_del(&rt->rt6i_uncached);
145 		spin_unlock_bh(&ul->lock);
146 	}
147 }
148 
149 static void rt6_uncached_list_flush_dev(struct net *net, struct net_device *dev)
150 {
151 	struct net_device *loopback_dev = net->loopback_dev;
152 	int cpu;
153 
154 	if (dev == loopback_dev)
155 		return;
156 
157 	for_each_possible_cpu(cpu) {
158 		struct uncached_list *ul = per_cpu_ptr(&rt6_uncached_list, cpu);
159 		struct rt6_info *rt;
160 
161 		spin_lock_bh(&ul->lock);
162 		list_for_each_entry(rt, &ul->head, rt6i_uncached) {
163 			struct inet6_dev *rt_idev = rt->rt6i_idev;
164 			struct net_device *rt_dev = rt->dst.dev;
165 
166 			if (rt_idev->dev == dev) {
167 				rt->rt6i_idev = in6_dev_get(loopback_dev);
168 				in6_dev_put(rt_idev);
169 			}
170 
171 			if (rt_dev == dev) {
172 				rt->dst.dev = loopback_dev;
173 				dev_hold(rt->dst.dev);
174 				dev_put(rt_dev);
175 			}
176 		}
177 		spin_unlock_bh(&ul->lock);
178 	}
179 }
180 
181 static u32 *rt6_pcpu_cow_metrics(struct rt6_info *rt)
182 {
183 	return dst_metrics_write_ptr(rt->dst.from);
184 }
185 
186 static u32 *ipv6_cow_metrics(struct dst_entry *dst, unsigned long old)
187 {
188 	struct rt6_info *rt = (struct rt6_info *)dst;
189 
190 	if (rt->rt6i_flags & RTF_PCPU)
191 		return rt6_pcpu_cow_metrics(rt);
192 	else if (rt->rt6i_flags & RTF_CACHE)
193 		return NULL;
194 	else
195 		return dst_cow_metrics_generic(dst, old);
196 }
197 
198 static inline const void *choose_neigh_daddr(struct rt6_info *rt,
199 					     struct sk_buff *skb,
200 					     const void *daddr)
201 {
202 	struct in6_addr *p = &rt->rt6i_gateway;
203 
204 	if (!ipv6_addr_any(p))
205 		return (const void *) p;
206 	else if (skb)
207 		return &ipv6_hdr(skb)->daddr;
208 	return daddr;
209 }
210 
211 static struct neighbour *ip6_neigh_lookup(const struct dst_entry *dst,
212 					  struct sk_buff *skb,
213 					  const void *daddr)
214 {
215 	struct rt6_info *rt = (struct rt6_info *) dst;
216 	struct neighbour *n;
217 
218 	daddr = choose_neigh_daddr(rt, skb, daddr);
219 	n = __ipv6_neigh_lookup(dst->dev, daddr);
220 	if (n)
221 		return n;
222 	return neigh_create(&nd_tbl, daddr, dst->dev);
223 }
224 
225 static void ip6_confirm_neigh(const struct dst_entry *dst, const void *daddr)
226 {
227 	struct net_device *dev = dst->dev;
228 	struct rt6_info *rt = (struct rt6_info *)dst;
229 
230 	daddr = choose_neigh_daddr(rt, NULL, daddr);
231 	if (!daddr)
232 		return;
233 	if (dev->flags & (IFF_NOARP | IFF_LOOPBACK))
234 		return;
235 	if (ipv6_addr_is_multicast((const struct in6_addr *)daddr))
236 		return;
237 	__ipv6_confirm_neigh(dev, daddr);
238 }
239 
240 static struct dst_ops ip6_dst_ops_template = {
241 	.family			=	AF_INET6,
242 	.gc			=	ip6_dst_gc,
243 	.gc_thresh		=	1024,
244 	.check			=	ip6_dst_check,
245 	.default_advmss		=	ip6_default_advmss,
246 	.mtu			=	ip6_mtu,
247 	.cow_metrics		=	ipv6_cow_metrics,
248 	.destroy		=	ip6_dst_destroy,
249 	.ifdown			=	ip6_dst_ifdown,
250 	.negative_advice	=	ip6_negative_advice,
251 	.link_failure		=	ip6_link_failure,
252 	.update_pmtu		=	ip6_rt_update_pmtu,
253 	.redirect		=	rt6_do_redirect,
254 	.local_out		=	__ip6_local_out,
255 	.neigh_lookup		=	ip6_neigh_lookup,
256 	.confirm_neigh		=	ip6_confirm_neigh,
257 };
258 
259 static unsigned int ip6_blackhole_mtu(const struct dst_entry *dst)
260 {
261 	unsigned int mtu = dst_metric_raw(dst, RTAX_MTU);
262 
263 	return mtu ? : dst->dev->mtu;
264 }
265 
266 static void ip6_rt_blackhole_update_pmtu(struct dst_entry *dst, struct sock *sk,
267 					 struct sk_buff *skb, u32 mtu)
268 {
269 }
270 
271 static void ip6_rt_blackhole_redirect(struct dst_entry *dst, struct sock *sk,
272 				      struct sk_buff *skb)
273 {
274 }
275 
276 static struct dst_ops ip6_dst_blackhole_ops = {
277 	.family			=	AF_INET6,
278 	.destroy		=	ip6_dst_destroy,
279 	.check			=	ip6_dst_check,
280 	.mtu			=	ip6_blackhole_mtu,
281 	.default_advmss		=	ip6_default_advmss,
282 	.update_pmtu		=	ip6_rt_blackhole_update_pmtu,
283 	.redirect		=	ip6_rt_blackhole_redirect,
284 	.cow_metrics		=	dst_cow_metrics_generic,
285 	.neigh_lookup		=	ip6_neigh_lookup,
286 };
287 
288 static const u32 ip6_template_metrics[RTAX_MAX] = {
289 	[RTAX_HOPLIMIT - 1] = 0,
290 };
291 
292 static const struct rt6_info ip6_null_entry_template = {
293 	.dst = {
294 		.__refcnt	= ATOMIC_INIT(1),
295 		.__use		= 1,
296 		.obsolete	= DST_OBSOLETE_FORCE_CHK,
297 		.error		= -ENETUNREACH,
298 		.input		= ip6_pkt_discard,
299 		.output		= ip6_pkt_discard_out,
300 	},
301 	.rt6i_flags	= (RTF_REJECT | RTF_NONEXTHOP),
302 	.rt6i_protocol  = RTPROT_KERNEL,
303 	.rt6i_metric	= ~(u32) 0,
304 	.rt6i_ref	= ATOMIC_INIT(1),
305 };
306 
307 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
308 
309 static const struct rt6_info ip6_prohibit_entry_template = {
310 	.dst = {
311 		.__refcnt	= ATOMIC_INIT(1),
312 		.__use		= 1,
313 		.obsolete	= DST_OBSOLETE_FORCE_CHK,
314 		.error		= -EACCES,
315 		.input		= ip6_pkt_prohibit,
316 		.output		= ip6_pkt_prohibit_out,
317 	},
318 	.rt6i_flags	= (RTF_REJECT | RTF_NONEXTHOP),
319 	.rt6i_protocol  = RTPROT_KERNEL,
320 	.rt6i_metric	= ~(u32) 0,
321 	.rt6i_ref	= ATOMIC_INIT(1),
322 };
323 
324 static const struct rt6_info ip6_blk_hole_entry_template = {
325 	.dst = {
326 		.__refcnt	= ATOMIC_INIT(1),
327 		.__use		= 1,
328 		.obsolete	= DST_OBSOLETE_FORCE_CHK,
329 		.error		= -EINVAL,
330 		.input		= dst_discard,
331 		.output		= dst_discard_out,
332 	},
333 	.rt6i_flags	= (RTF_REJECT | RTF_NONEXTHOP),
334 	.rt6i_protocol  = RTPROT_KERNEL,
335 	.rt6i_metric	= ~(u32) 0,
336 	.rt6i_ref	= ATOMIC_INIT(1),
337 };
338 
339 #endif
340 
341 static void rt6_info_init(struct rt6_info *rt)
342 {
343 	struct dst_entry *dst = &rt->dst;
344 
345 	memset(dst + 1, 0, sizeof(*rt) - sizeof(*dst));
346 	INIT_LIST_HEAD(&rt->rt6i_siblings);
347 	INIT_LIST_HEAD(&rt->rt6i_uncached);
348 }
349 
350 /* allocate dst with ip6_dst_ops */
351 static struct rt6_info *__ip6_dst_alloc(struct net *net,
352 					struct net_device *dev,
353 					int flags)
354 {
355 	struct rt6_info *rt = dst_alloc(&net->ipv6.ip6_dst_ops, dev,
356 					1, DST_OBSOLETE_FORCE_CHK, flags);
357 
358 	if (rt)
359 		rt6_info_init(rt);
360 
361 	return rt;
362 }
363 
364 struct rt6_info *ip6_dst_alloc(struct net *net,
365 			       struct net_device *dev,
366 			       int flags)
367 {
368 	struct rt6_info *rt = __ip6_dst_alloc(net, dev, flags);
369 
370 	if (rt) {
371 		rt->rt6i_pcpu = alloc_percpu_gfp(struct rt6_info *, GFP_ATOMIC);
372 		if (rt->rt6i_pcpu) {
373 			int cpu;
374 
375 			for_each_possible_cpu(cpu) {
376 				struct rt6_info **p;
377 
378 				p = per_cpu_ptr(rt->rt6i_pcpu, cpu);
379 				/* no one shares rt */
380 				*p =  NULL;
381 			}
382 		} else {
383 			dst_release_immediate(&rt->dst);
384 			return NULL;
385 		}
386 	}
387 
388 	return rt;
389 }
390 EXPORT_SYMBOL(ip6_dst_alloc);
391 
392 static void ip6_dst_destroy(struct dst_entry *dst)
393 {
394 	struct rt6_info *rt = (struct rt6_info *)dst;
395 	struct dst_entry *from = dst->from;
396 	struct inet6_dev *idev;
397 
398 	dst_destroy_metrics_generic(dst);
399 	free_percpu(rt->rt6i_pcpu);
400 	rt6_uncached_list_del(rt);
401 
402 	idev = rt->rt6i_idev;
403 	if (idev) {
404 		rt->rt6i_idev = NULL;
405 		in6_dev_put(idev);
406 	}
407 
408 	dst->from = NULL;
409 	dst_release(from);
410 }
411 
412 static void ip6_dst_ifdown(struct dst_entry *dst, struct net_device *dev,
413 			   int how)
414 {
415 	struct rt6_info *rt = (struct rt6_info *)dst;
416 	struct inet6_dev *idev = rt->rt6i_idev;
417 	struct net_device *loopback_dev =
418 		dev_net(dev)->loopback_dev;
419 
420 	if (idev && idev->dev != loopback_dev) {
421 		struct inet6_dev *loopback_idev = in6_dev_get(loopback_dev);
422 		if (loopback_idev) {
423 			rt->rt6i_idev = loopback_idev;
424 			in6_dev_put(idev);
425 		}
426 	}
427 }
428 
429 static bool __rt6_check_expired(const struct rt6_info *rt)
430 {
431 	if (rt->rt6i_flags & RTF_EXPIRES)
432 		return time_after(jiffies, rt->dst.expires);
433 	else
434 		return false;
435 }
436 
437 static bool rt6_check_expired(const struct rt6_info *rt)
438 {
439 	if (rt->rt6i_flags & RTF_EXPIRES) {
440 		if (time_after(jiffies, rt->dst.expires))
441 			return true;
442 	} else if (rt->dst.from) {
443 		return rt6_check_expired((struct rt6_info *) rt->dst.from);
444 	}
445 	return false;
446 }
447 
448 static struct rt6_info *rt6_multipath_select(struct rt6_info *match,
449 					     struct flowi6 *fl6, int oif,
450 					     int strict)
451 {
452 	struct rt6_info *sibling, *next_sibling;
453 	int route_choosen;
454 
455 	/* We might have already computed the hash for ICMPv6 errors. In such
456 	 * case it will always be non-zero. Otherwise now is the time to do it.
457 	 */
458 	if (!fl6->mp_hash)
459 		fl6->mp_hash = rt6_multipath_hash(fl6, NULL);
460 
461 	route_choosen = fl6->mp_hash % (match->rt6i_nsiblings + 1);
462 	/* Don't change the route, if route_choosen == 0
463 	 * (siblings does not include ourself)
464 	 */
465 	if (route_choosen)
466 		list_for_each_entry_safe(sibling, next_sibling,
467 				&match->rt6i_siblings, rt6i_siblings) {
468 			route_choosen--;
469 			if (route_choosen == 0) {
470 				if (rt6_score_route(sibling, oif, strict) < 0)
471 					break;
472 				match = sibling;
473 				break;
474 			}
475 		}
476 	return match;
477 }
478 
479 /*
480  *	Route lookup. Any table->tb6_lock is implied.
481  */
482 
483 static inline struct rt6_info *rt6_device_match(struct net *net,
484 						    struct rt6_info *rt,
485 						    const struct in6_addr *saddr,
486 						    int oif,
487 						    int flags)
488 {
489 	struct rt6_info *local = NULL;
490 	struct rt6_info *sprt;
491 
492 	if (!oif && ipv6_addr_any(saddr))
493 		goto out;
494 
495 	for (sprt = rt; sprt; sprt = sprt->dst.rt6_next) {
496 		struct net_device *dev = sprt->dst.dev;
497 
498 		if (oif) {
499 			if (dev->ifindex == oif)
500 				return sprt;
501 			if (dev->flags & IFF_LOOPBACK) {
502 				if (!sprt->rt6i_idev ||
503 				    sprt->rt6i_idev->dev->ifindex != oif) {
504 					if (flags & RT6_LOOKUP_F_IFACE)
505 						continue;
506 					if (local &&
507 					    local->rt6i_idev->dev->ifindex == oif)
508 						continue;
509 				}
510 				local = sprt;
511 			}
512 		} else {
513 			if (ipv6_chk_addr(net, saddr, dev,
514 					  flags & RT6_LOOKUP_F_IFACE))
515 				return sprt;
516 		}
517 	}
518 
519 	if (oif) {
520 		if (local)
521 			return local;
522 
523 		if (flags & RT6_LOOKUP_F_IFACE)
524 			return net->ipv6.ip6_null_entry;
525 	}
526 out:
527 	return rt;
528 }
529 
530 #ifdef CONFIG_IPV6_ROUTER_PREF
531 struct __rt6_probe_work {
532 	struct work_struct work;
533 	struct in6_addr target;
534 	struct net_device *dev;
535 };
536 
537 static void rt6_probe_deferred(struct work_struct *w)
538 {
539 	struct in6_addr mcaddr;
540 	struct __rt6_probe_work *work =
541 		container_of(w, struct __rt6_probe_work, work);
542 
543 	addrconf_addr_solict_mult(&work->target, &mcaddr);
544 	ndisc_send_ns(work->dev, &work->target, &mcaddr, NULL, 0);
545 	dev_put(work->dev);
546 	kfree(work);
547 }
548 
549 static void rt6_probe(struct rt6_info *rt)
550 {
551 	struct __rt6_probe_work *work;
552 	struct neighbour *neigh;
553 	/*
554 	 * Okay, this does not seem to be appropriate
555 	 * for now, however, we need to check if it
556 	 * is really so; aka Router Reachability Probing.
557 	 *
558 	 * Router Reachability Probe MUST be rate-limited
559 	 * to no more than one per minute.
560 	 */
561 	if (!rt || !(rt->rt6i_flags & RTF_GATEWAY))
562 		return;
563 	rcu_read_lock_bh();
564 	neigh = __ipv6_neigh_lookup_noref(rt->dst.dev, &rt->rt6i_gateway);
565 	if (neigh) {
566 		if (neigh->nud_state & NUD_VALID)
567 			goto out;
568 
569 		work = NULL;
570 		write_lock(&neigh->lock);
571 		if (!(neigh->nud_state & NUD_VALID) &&
572 		    time_after(jiffies,
573 			       neigh->updated +
574 			       rt->rt6i_idev->cnf.rtr_probe_interval)) {
575 			work = kmalloc(sizeof(*work), GFP_ATOMIC);
576 			if (work)
577 				__neigh_set_probe_once(neigh);
578 		}
579 		write_unlock(&neigh->lock);
580 	} else {
581 		work = kmalloc(sizeof(*work), GFP_ATOMIC);
582 	}
583 
584 	if (work) {
585 		INIT_WORK(&work->work, rt6_probe_deferred);
586 		work->target = rt->rt6i_gateway;
587 		dev_hold(rt->dst.dev);
588 		work->dev = rt->dst.dev;
589 		schedule_work(&work->work);
590 	}
591 
592 out:
593 	rcu_read_unlock_bh();
594 }
595 #else
596 static inline void rt6_probe(struct rt6_info *rt)
597 {
598 }
599 #endif
600 
601 /*
602  * Default Router Selection (RFC 2461 6.3.6)
603  */
604 static inline int rt6_check_dev(struct rt6_info *rt, int oif)
605 {
606 	struct net_device *dev = rt->dst.dev;
607 	if (!oif || dev->ifindex == oif)
608 		return 2;
609 	if ((dev->flags & IFF_LOOPBACK) &&
610 	    rt->rt6i_idev && rt->rt6i_idev->dev->ifindex == oif)
611 		return 1;
612 	return 0;
613 }
614 
615 static inline enum rt6_nud_state rt6_check_neigh(struct rt6_info *rt)
616 {
617 	struct neighbour *neigh;
618 	enum rt6_nud_state ret = RT6_NUD_FAIL_HARD;
619 
620 	if (rt->rt6i_flags & RTF_NONEXTHOP ||
621 	    !(rt->rt6i_flags & RTF_GATEWAY))
622 		return RT6_NUD_SUCCEED;
623 
624 	rcu_read_lock_bh();
625 	neigh = __ipv6_neigh_lookup_noref(rt->dst.dev, &rt->rt6i_gateway);
626 	if (neigh) {
627 		read_lock(&neigh->lock);
628 		if (neigh->nud_state & NUD_VALID)
629 			ret = RT6_NUD_SUCCEED;
630 #ifdef CONFIG_IPV6_ROUTER_PREF
631 		else if (!(neigh->nud_state & NUD_FAILED))
632 			ret = RT6_NUD_SUCCEED;
633 		else
634 			ret = RT6_NUD_FAIL_PROBE;
635 #endif
636 		read_unlock(&neigh->lock);
637 	} else {
638 		ret = IS_ENABLED(CONFIG_IPV6_ROUTER_PREF) ?
639 		      RT6_NUD_SUCCEED : RT6_NUD_FAIL_DO_RR;
640 	}
641 	rcu_read_unlock_bh();
642 
643 	return ret;
644 }
645 
646 static int rt6_score_route(struct rt6_info *rt, int oif,
647 			   int strict)
648 {
649 	int m;
650 
651 	m = rt6_check_dev(rt, oif);
652 	if (!m && (strict & RT6_LOOKUP_F_IFACE))
653 		return RT6_NUD_FAIL_HARD;
654 #ifdef CONFIG_IPV6_ROUTER_PREF
655 	m |= IPV6_DECODE_PREF(IPV6_EXTRACT_PREF(rt->rt6i_flags)) << 2;
656 #endif
657 	if (strict & RT6_LOOKUP_F_REACHABLE) {
658 		int n = rt6_check_neigh(rt);
659 		if (n < 0)
660 			return n;
661 	}
662 	return m;
663 }
664 
665 static struct rt6_info *find_match(struct rt6_info *rt, int oif, int strict,
666 				   int *mpri, struct rt6_info *match,
667 				   bool *do_rr)
668 {
669 	int m;
670 	bool match_do_rr = false;
671 	struct inet6_dev *idev = rt->rt6i_idev;
672 	struct net_device *dev = rt->dst.dev;
673 
674 	if (dev && !netif_carrier_ok(dev) &&
675 	    idev->cnf.ignore_routes_with_linkdown &&
676 	    !(strict & RT6_LOOKUP_F_IGNORE_LINKSTATE))
677 		goto out;
678 
679 	if (rt6_check_expired(rt))
680 		goto out;
681 
682 	m = rt6_score_route(rt, oif, strict);
683 	if (m == RT6_NUD_FAIL_DO_RR) {
684 		match_do_rr = true;
685 		m = 0; /* lowest valid score */
686 	} else if (m == RT6_NUD_FAIL_HARD) {
687 		goto out;
688 	}
689 
690 	if (strict & RT6_LOOKUP_F_REACHABLE)
691 		rt6_probe(rt);
692 
693 	/* note that m can be RT6_NUD_FAIL_PROBE at this point */
694 	if (m > *mpri) {
695 		*do_rr = match_do_rr;
696 		*mpri = m;
697 		match = rt;
698 	}
699 out:
700 	return match;
701 }
702 
703 static struct rt6_info *find_rr_leaf(struct fib6_node *fn,
704 				     struct rt6_info *rr_head,
705 				     u32 metric, int oif, int strict,
706 				     bool *do_rr)
707 {
708 	struct rt6_info *rt, *match, *cont;
709 	int mpri = -1;
710 
711 	match = NULL;
712 	cont = NULL;
713 	for (rt = rr_head; rt; rt = rt->dst.rt6_next) {
714 		if (rt->rt6i_metric != metric) {
715 			cont = rt;
716 			break;
717 		}
718 
719 		match = find_match(rt, oif, strict, &mpri, match, do_rr);
720 	}
721 
722 	for (rt = fn->leaf; rt && rt != rr_head; rt = rt->dst.rt6_next) {
723 		if (rt->rt6i_metric != metric) {
724 			cont = rt;
725 			break;
726 		}
727 
728 		match = find_match(rt, oif, strict, &mpri, match, do_rr);
729 	}
730 
731 	if (match || !cont)
732 		return match;
733 
734 	for (rt = cont; rt; rt = rt->dst.rt6_next)
735 		match = find_match(rt, oif, strict, &mpri, match, do_rr);
736 
737 	return match;
738 }
739 
740 static struct rt6_info *rt6_select(struct fib6_node *fn, int oif, int strict)
741 {
742 	struct rt6_info *match, *rt0;
743 	struct net *net;
744 	bool do_rr = false;
745 
746 	rt0 = fn->rr_ptr;
747 	if (!rt0)
748 		fn->rr_ptr = rt0 = fn->leaf;
749 
750 	match = find_rr_leaf(fn, rt0, rt0->rt6i_metric, oif, strict,
751 			     &do_rr);
752 
753 	if (do_rr) {
754 		struct rt6_info *next = rt0->dst.rt6_next;
755 
756 		/* no entries matched; do round-robin */
757 		if (!next || next->rt6i_metric != rt0->rt6i_metric)
758 			next = fn->leaf;
759 
760 		if (next != rt0)
761 			fn->rr_ptr = next;
762 	}
763 
764 	net = dev_net(rt0->dst.dev);
765 	return match ? match : net->ipv6.ip6_null_entry;
766 }
767 
768 static bool rt6_is_gw_or_nonexthop(const struct rt6_info *rt)
769 {
770 	return (rt->rt6i_flags & (RTF_NONEXTHOP | RTF_GATEWAY));
771 }
772 
773 #ifdef CONFIG_IPV6_ROUTE_INFO
774 int rt6_route_rcv(struct net_device *dev, u8 *opt, int len,
775 		  const struct in6_addr *gwaddr)
776 {
777 	struct net *net = dev_net(dev);
778 	struct route_info *rinfo = (struct route_info *) opt;
779 	struct in6_addr prefix_buf, *prefix;
780 	unsigned int pref;
781 	unsigned long lifetime;
782 	struct rt6_info *rt;
783 
784 	if (len < sizeof(struct route_info)) {
785 		return -EINVAL;
786 	}
787 
788 	/* Sanity check for prefix_len and length */
789 	if (rinfo->length > 3) {
790 		return -EINVAL;
791 	} else if (rinfo->prefix_len > 128) {
792 		return -EINVAL;
793 	} else if (rinfo->prefix_len > 64) {
794 		if (rinfo->length < 2) {
795 			return -EINVAL;
796 		}
797 	} else if (rinfo->prefix_len > 0) {
798 		if (rinfo->length < 1) {
799 			return -EINVAL;
800 		}
801 	}
802 
803 	pref = rinfo->route_pref;
804 	if (pref == ICMPV6_ROUTER_PREF_INVALID)
805 		return -EINVAL;
806 
807 	lifetime = addrconf_timeout_fixup(ntohl(rinfo->lifetime), HZ);
808 
809 	if (rinfo->length == 3)
810 		prefix = (struct in6_addr *)rinfo->prefix;
811 	else {
812 		/* this function is safe */
813 		ipv6_addr_prefix(&prefix_buf,
814 				 (struct in6_addr *)rinfo->prefix,
815 				 rinfo->prefix_len);
816 		prefix = &prefix_buf;
817 	}
818 
819 	if (rinfo->prefix_len == 0)
820 		rt = rt6_get_dflt_router(gwaddr, dev);
821 	else
822 		rt = rt6_get_route_info(net, prefix, rinfo->prefix_len,
823 					gwaddr, dev);
824 
825 	if (rt && !lifetime) {
826 		ip6_del_rt(rt);
827 		rt = NULL;
828 	}
829 
830 	if (!rt && lifetime)
831 		rt = rt6_add_route_info(net, prefix, rinfo->prefix_len, gwaddr,
832 					dev, pref);
833 	else if (rt)
834 		rt->rt6i_flags = RTF_ROUTEINFO |
835 				 (rt->rt6i_flags & ~RTF_PREF_MASK) | RTF_PREF(pref);
836 
837 	if (rt) {
838 		if (!addrconf_finite_timeout(lifetime))
839 			rt6_clean_expires(rt);
840 		else
841 			rt6_set_expires(rt, jiffies + HZ * lifetime);
842 
843 		ip6_rt_put(rt);
844 	}
845 	return 0;
846 }
847 #endif
848 
849 static struct fib6_node* fib6_backtrack(struct fib6_node *fn,
850 					struct in6_addr *saddr)
851 {
852 	struct fib6_node *pn;
853 	while (1) {
854 		if (fn->fn_flags & RTN_TL_ROOT)
855 			return NULL;
856 		pn = fn->parent;
857 		if (FIB6_SUBTREE(pn) && FIB6_SUBTREE(pn) != fn)
858 			fn = fib6_lookup(FIB6_SUBTREE(pn), NULL, saddr);
859 		else
860 			fn = pn;
861 		if (fn->fn_flags & RTN_RTINFO)
862 			return fn;
863 	}
864 }
865 
866 static struct rt6_info *ip6_pol_route_lookup(struct net *net,
867 					     struct fib6_table *table,
868 					     struct flowi6 *fl6, int flags)
869 {
870 	struct fib6_node *fn;
871 	struct rt6_info *rt;
872 
873 	read_lock_bh(&table->tb6_lock);
874 	fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
875 restart:
876 	rt = fn->leaf;
877 	rt = rt6_device_match(net, rt, &fl6->saddr, fl6->flowi6_oif, flags);
878 	if (rt->rt6i_nsiblings && fl6->flowi6_oif == 0)
879 		rt = rt6_multipath_select(rt, fl6, fl6->flowi6_oif, flags);
880 	if (rt == net->ipv6.ip6_null_entry) {
881 		fn = fib6_backtrack(fn, &fl6->saddr);
882 		if (fn)
883 			goto restart;
884 	}
885 	dst_use(&rt->dst, jiffies);
886 	read_unlock_bh(&table->tb6_lock);
887 
888 	trace_fib6_table_lookup(net, rt, table->tb6_id, fl6);
889 
890 	return rt;
891 
892 }
893 
894 struct dst_entry *ip6_route_lookup(struct net *net, struct flowi6 *fl6,
895 				    int flags)
896 {
897 	return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_lookup);
898 }
899 EXPORT_SYMBOL_GPL(ip6_route_lookup);
900 
901 struct rt6_info *rt6_lookup(struct net *net, const struct in6_addr *daddr,
902 			    const struct in6_addr *saddr, int oif, int strict)
903 {
904 	struct flowi6 fl6 = {
905 		.flowi6_oif = oif,
906 		.daddr = *daddr,
907 	};
908 	struct dst_entry *dst;
909 	int flags = strict ? RT6_LOOKUP_F_IFACE : 0;
910 
911 	if (saddr) {
912 		memcpy(&fl6.saddr, saddr, sizeof(*saddr));
913 		flags |= RT6_LOOKUP_F_HAS_SADDR;
914 	}
915 
916 	dst = fib6_rule_lookup(net, &fl6, flags, ip6_pol_route_lookup);
917 	if (dst->error == 0)
918 		return (struct rt6_info *) dst;
919 
920 	dst_release(dst);
921 
922 	return NULL;
923 }
924 EXPORT_SYMBOL(rt6_lookup);
925 
926 /* ip6_ins_rt is called with FREE table->tb6_lock.
927  * It takes new route entry, the addition fails by any reason the
928  * route is released.
929  * Caller must hold dst before calling it.
930  */
931 
932 static int __ip6_ins_rt(struct rt6_info *rt, struct nl_info *info,
933 			struct mx6_config *mxc,
934 			struct netlink_ext_ack *extack)
935 {
936 	int err;
937 	struct fib6_table *table;
938 
939 	table = rt->rt6i_table;
940 	write_lock_bh(&table->tb6_lock);
941 	err = fib6_add(&table->tb6_root, rt, info, mxc, extack);
942 	write_unlock_bh(&table->tb6_lock);
943 
944 	return err;
945 }
946 
947 int ip6_ins_rt(struct rt6_info *rt)
948 {
949 	struct nl_info info = {	.nl_net = dev_net(rt->dst.dev), };
950 	struct mx6_config mxc = { .mx = NULL, };
951 
952 	/* Hold dst to account for the reference from the fib6 tree */
953 	dst_hold(&rt->dst);
954 	return __ip6_ins_rt(rt, &info, &mxc, NULL);
955 }
956 
957 /* called with rcu_lock held */
958 static struct net_device *ip6_rt_get_dev_rcu(struct rt6_info *rt)
959 {
960 	struct net_device *dev = rt->dst.dev;
961 
962 	if (rt->rt6i_flags & RTF_LOCAL) {
963 		/* for copies of local routes, dst->dev needs to be the
964 		 * device if it is a master device, the master device if
965 		 * device is enslaved, and the loopback as the default
966 		 */
967 		if (netif_is_l3_slave(dev) &&
968 		    !rt6_need_strict(&rt->rt6i_dst.addr))
969 			dev = l3mdev_master_dev_rcu(dev);
970 		else if (!netif_is_l3_master(dev))
971 			dev = dev_net(dev)->loopback_dev;
972 		/* last case is netif_is_l3_master(dev) is true in which
973 		 * case we want dev returned to be dev
974 		 */
975 	}
976 
977 	return dev;
978 }
979 
980 static struct rt6_info *ip6_rt_cache_alloc(struct rt6_info *ort,
981 					   const struct in6_addr *daddr,
982 					   const struct in6_addr *saddr)
983 {
984 	struct net_device *dev;
985 	struct rt6_info *rt;
986 
987 	/*
988 	 *	Clone the route.
989 	 */
990 
991 	if (ort->rt6i_flags & (RTF_CACHE | RTF_PCPU))
992 		ort = (struct rt6_info *)ort->dst.from;
993 
994 	rcu_read_lock();
995 	dev = ip6_rt_get_dev_rcu(ort);
996 	rt = __ip6_dst_alloc(dev_net(dev), dev, 0);
997 	rcu_read_unlock();
998 	if (!rt)
999 		return NULL;
1000 
1001 	ip6_rt_copy_init(rt, ort);
1002 	rt->rt6i_flags |= RTF_CACHE;
1003 	rt->rt6i_metric = 0;
1004 	rt->dst.flags |= DST_HOST;
1005 	rt->rt6i_dst.addr = *daddr;
1006 	rt->rt6i_dst.plen = 128;
1007 
1008 	if (!rt6_is_gw_or_nonexthop(ort)) {
1009 		if (ort->rt6i_dst.plen != 128 &&
1010 		    ipv6_addr_equal(&ort->rt6i_dst.addr, daddr))
1011 			rt->rt6i_flags |= RTF_ANYCAST;
1012 #ifdef CONFIG_IPV6_SUBTREES
1013 		if (rt->rt6i_src.plen && saddr) {
1014 			rt->rt6i_src.addr = *saddr;
1015 			rt->rt6i_src.plen = 128;
1016 		}
1017 #endif
1018 	}
1019 
1020 	return rt;
1021 }
1022 
1023 static struct rt6_info *ip6_rt_pcpu_alloc(struct rt6_info *rt)
1024 {
1025 	struct net_device *dev;
1026 	struct rt6_info *pcpu_rt;
1027 
1028 	rcu_read_lock();
1029 	dev = ip6_rt_get_dev_rcu(rt);
1030 	pcpu_rt = __ip6_dst_alloc(dev_net(dev), dev, rt->dst.flags);
1031 	rcu_read_unlock();
1032 	if (!pcpu_rt)
1033 		return NULL;
1034 	ip6_rt_copy_init(pcpu_rt, rt);
1035 	pcpu_rt->rt6i_protocol = rt->rt6i_protocol;
1036 	pcpu_rt->rt6i_flags |= RTF_PCPU;
1037 	return pcpu_rt;
1038 }
1039 
1040 /* It should be called with read_lock_bh(&tb6_lock) acquired */
1041 static struct rt6_info *rt6_get_pcpu_route(struct rt6_info *rt)
1042 {
1043 	struct rt6_info *pcpu_rt, **p;
1044 
1045 	p = this_cpu_ptr(rt->rt6i_pcpu);
1046 	pcpu_rt = *p;
1047 
1048 	if (pcpu_rt) {
1049 		dst_hold(&pcpu_rt->dst);
1050 		rt6_dst_from_metrics_check(pcpu_rt);
1051 	}
1052 	return pcpu_rt;
1053 }
1054 
1055 static struct rt6_info *rt6_make_pcpu_route(struct rt6_info *rt)
1056 {
1057 	struct fib6_table *table = rt->rt6i_table;
1058 	struct rt6_info *pcpu_rt, *prev, **p;
1059 
1060 	pcpu_rt = ip6_rt_pcpu_alloc(rt);
1061 	if (!pcpu_rt) {
1062 		struct net *net = dev_net(rt->dst.dev);
1063 
1064 		dst_hold(&net->ipv6.ip6_null_entry->dst);
1065 		return net->ipv6.ip6_null_entry;
1066 	}
1067 
1068 	read_lock_bh(&table->tb6_lock);
1069 	if (rt->rt6i_pcpu) {
1070 		p = this_cpu_ptr(rt->rt6i_pcpu);
1071 		prev = cmpxchg(p, NULL, pcpu_rt);
1072 		if (prev) {
1073 			/* If someone did it before us, return prev instead */
1074 			dst_release_immediate(&pcpu_rt->dst);
1075 			pcpu_rt = prev;
1076 		}
1077 	} else {
1078 		/* rt has been removed from the fib6 tree
1079 		 * before we have a chance to acquire the read_lock.
1080 		 * In this case, don't brother to create a pcpu rt
1081 		 * since rt is going away anyway.  The next
1082 		 * dst_check() will trigger a re-lookup.
1083 		 */
1084 		dst_release_immediate(&pcpu_rt->dst);
1085 		pcpu_rt = rt;
1086 	}
1087 	dst_hold(&pcpu_rt->dst);
1088 	rt6_dst_from_metrics_check(pcpu_rt);
1089 	read_unlock_bh(&table->tb6_lock);
1090 	return pcpu_rt;
1091 }
1092 
1093 struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table,
1094 			       int oif, struct flowi6 *fl6, int flags)
1095 {
1096 	struct fib6_node *fn, *saved_fn;
1097 	struct rt6_info *rt;
1098 	int strict = 0;
1099 
1100 	strict |= flags & RT6_LOOKUP_F_IFACE;
1101 	strict |= flags & RT6_LOOKUP_F_IGNORE_LINKSTATE;
1102 	if (net->ipv6.devconf_all->forwarding == 0)
1103 		strict |= RT6_LOOKUP_F_REACHABLE;
1104 
1105 	read_lock_bh(&table->tb6_lock);
1106 
1107 	fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
1108 	saved_fn = fn;
1109 
1110 	if (fl6->flowi6_flags & FLOWI_FLAG_SKIP_NH_OIF)
1111 		oif = 0;
1112 
1113 redo_rt6_select:
1114 	rt = rt6_select(fn, oif, strict);
1115 	if (rt->rt6i_nsiblings)
1116 		rt = rt6_multipath_select(rt, fl6, oif, strict);
1117 	if (rt == net->ipv6.ip6_null_entry) {
1118 		fn = fib6_backtrack(fn, &fl6->saddr);
1119 		if (fn)
1120 			goto redo_rt6_select;
1121 		else if (strict & RT6_LOOKUP_F_REACHABLE) {
1122 			/* also consider unreachable route */
1123 			strict &= ~RT6_LOOKUP_F_REACHABLE;
1124 			fn = saved_fn;
1125 			goto redo_rt6_select;
1126 		}
1127 	}
1128 
1129 
1130 	if (rt == net->ipv6.ip6_null_entry || (rt->rt6i_flags & RTF_CACHE)) {
1131 		dst_use(&rt->dst, jiffies);
1132 		read_unlock_bh(&table->tb6_lock);
1133 
1134 		rt6_dst_from_metrics_check(rt);
1135 
1136 		trace_fib6_table_lookup(net, rt, table->tb6_id, fl6);
1137 		return rt;
1138 	} else if (unlikely((fl6->flowi6_flags & FLOWI_FLAG_KNOWN_NH) &&
1139 			    !(rt->rt6i_flags & RTF_GATEWAY))) {
1140 		/* Create a RTF_CACHE clone which will not be
1141 		 * owned by the fib6 tree.  It is for the special case where
1142 		 * the daddr in the skb during the neighbor look-up is different
1143 		 * from the fl6->daddr used to look-up route here.
1144 		 */
1145 
1146 		struct rt6_info *uncached_rt;
1147 
1148 		dst_use(&rt->dst, jiffies);
1149 		read_unlock_bh(&table->tb6_lock);
1150 
1151 		uncached_rt = ip6_rt_cache_alloc(rt, &fl6->daddr, NULL);
1152 		dst_release(&rt->dst);
1153 
1154 		if (uncached_rt) {
1155 			/* Uncached_rt's refcnt is taken during ip6_rt_cache_alloc()
1156 			 * No need for another dst_hold()
1157 			 */
1158 			rt6_uncached_list_add(uncached_rt);
1159 		} else {
1160 			uncached_rt = net->ipv6.ip6_null_entry;
1161 			dst_hold(&uncached_rt->dst);
1162 		}
1163 
1164 		trace_fib6_table_lookup(net, uncached_rt, table->tb6_id, fl6);
1165 		return uncached_rt;
1166 
1167 	} else {
1168 		/* Get a percpu copy */
1169 
1170 		struct rt6_info *pcpu_rt;
1171 
1172 		rt->dst.lastuse = jiffies;
1173 		rt->dst.__use++;
1174 		pcpu_rt = rt6_get_pcpu_route(rt);
1175 
1176 		if (pcpu_rt) {
1177 			read_unlock_bh(&table->tb6_lock);
1178 		} else {
1179 			/* We have to do the read_unlock first
1180 			 * because rt6_make_pcpu_route() may trigger
1181 			 * ip6_dst_gc() which will take the write_lock.
1182 			 */
1183 			dst_hold(&rt->dst);
1184 			read_unlock_bh(&table->tb6_lock);
1185 			pcpu_rt = rt6_make_pcpu_route(rt);
1186 			dst_release(&rt->dst);
1187 		}
1188 
1189 		trace_fib6_table_lookup(net, pcpu_rt, table->tb6_id, fl6);
1190 		return pcpu_rt;
1191 
1192 	}
1193 }
1194 EXPORT_SYMBOL_GPL(ip6_pol_route);
1195 
1196 static struct rt6_info *ip6_pol_route_input(struct net *net, struct fib6_table *table,
1197 					    struct flowi6 *fl6, int flags)
1198 {
1199 	return ip6_pol_route(net, table, fl6->flowi6_iif, fl6, flags);
1200 }
1201 
1202 struct dst_entry *ip6_route_input_lookup(struct net *net,
1203 					 struct net_device *dev,
1204 					 struct flowi6 *fl6, int flags)
1205 {
1206 	if (rt6_need_strict(&fl6->daddr) && dev->type != ARPHRD_PIMREG)
1207 		flags |= RT6_LOOKUP_F_IFACE;
1208 
1209 	return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_input);
1210 }
1211 EXPORT_SYMBOL_GPL(ip6_route_input_lookup);
1212 
1213 static void ip6_multipath_l3_keys(const struct sk_buff *skb,
1214 				  struct flow_keys *keys)
1215 {
1216 	const struct ipv6hdr *outer_iph = ipv6_hdr(skb);
1217 	const struct ipv6hdr *key_iph = outer_iph;
1218 	const struct ipv6hdr *inner_iph;
1219 	const struct icmp6hdr *icmph;
1220 	struct ipv6hdr _inner_iph;
1221 
1222 	if (likely(outer_iph->nexthdr != IPPROTO_ICMPV6))
1223 		goto out;
1224 
1225 	icmph = icmp6_hdr(skb);
1226 	if (icmph->icmp6_type != ICMPV6_DEST_UNREACH &&
1227 	    icmph->icmp6_type != ICMPV6_PKT_TOOBIG &&
1228 	    icmph->icmp6_type != ICMPV6_TIME_EXCEED &&
1229 	    icmph->icmp6_type != ICMPV6_PARAMPROB)
1230 		goto out;
1231 
1232 	inner_iph = skb_header_pointer(skb,
1233 				       skb_transport_offset(skb) + sizeof(*icmph),
1234 				       sizeof(_inner_iph), &_inner_iph);
1235 	if (!inner_iph)
1236 		goto out;
1237 
1238 	key_iph = inner_iph;
1239 out:
1240 	memset(keys, 0, sizeof(*keys));
1241 	keys->control.addr_type = FLOW_DISSECTOR_KEY_IPV6_ADDRS;
1242 	keys->addrs.v6addrs.src = key_iph->saddr;
1243 	keys->addrs.v6addrs.dst = key_iph->daddr;
1244 	keys->tags.flow_label = ip6_flowinfo(key_iph);
1245 	keys->basic.ip_proto = key_iph->nexthdr;
1246 }
1247 
1248 /* if skb is set it will be used and fl6 can be NULL */
1249 u32 rt6_multipath_hash(const struct flowi6 *fl6, const struct sk_buff *skb)
1250 {
1251 	struct flow_keys hash_keys;
1252 
1253 	if (skb) {
1254 		ip6_multipath_l3_keys(skb, &hash_keys);
1255 		return flow_hash_from_keys(&hash_keys);
1256 	}
1257 
1258 	return get_hash_from_flowi6(fl6);
1259 }
1260 
1261 void ip6_route_input(struct sk_buff *skb)
1262 {
1263 	const struct ipv6hdr *iph = ipv6_hdr(skb);
1264 	struct net *net = dev_net(skb->dev);
1265 	int flags = RT6_LOOKUP_F_HAS_SADDR;
1266 	struct ip_tunnel_info *tun_info;
1267 	struct flowi6 fl6 = {
1268 		.flowi6_iif = skb->dev->ifindex,
1269 		.daddr = iph->daddr,
1270 		.saddr = iph->saddr,
1271 		.flowlabel = ip6_flowinfo(iph),
1272 		.flowi6_mark = skb->mark,
1273 		.flowi6_proto = iph->nexthdr,
1274 	};
1275 
1276 	tun_info = skb_tunnel_info(skb);
1277 	if (tun_info && !(tun_info->mode & IP_TUNNEL_INFO_TX))
1278 		fl6.flowi6_tun_key.tun_id = tun_info->key.tun_id;
1279 	if (unlikely(fl6.flowi6_proto == IPPROTO_ICMPV6))
1280 		fl6.mp_hash = rt6_multipath_hash(&fl6, skb);
1281 	skb_dst_drop(skb);
1282 	skb_dst_set(skb, ip6_route_input_lookup(net, skb->dev, &fl6, flags));
1283 }
1284 
1285 static struct rt6_info *ip6_pol_route_output(struct net *net, struct fib6_table *table,
1286 					     struct flowi6 *fl6, int flags)
1287 {
1288 	return ip6_pol_route(net, table, fl6->flowi6_oif, fl6, flags);
1289 }
1290 
1291 struct dst_entry *ip6_route_output_flags(struct net *net, const struct sock *sk,
1292 					 struct flowi6 *fl6, int flags)
1293 {
1294 	bool any_src;
1295 
1296 	if (rt6_need_strict(&fl6->daddr)) {
1297 		struct dst_entry *dst;
1298 
1299 		dst = l3mdev_link_scope_lookup(net, fl6);
1300 		if (dst)
1301 			return dst;
1302 	}
1303 
1304 	fl6->flowi6_iif = LOOPBACK_IFINDEX;
1305 
1306 	any_src = ipv6_addr_any(&fl6->saddr);
1307 	if ((sk && sk->sk_bound_dev_if) || rt6_need_strict(&fl6->daddr) ||
1308 	    (fl6->flowi6_oif && any_src))
1309 		flags |= RT6_LOOKUP_F_IFACE;
1310 
1311 	if (!any_src)
1312 		flags |= RT6_LOOKUP_F_HAS_SADDR;
1313 	else if (sk)
1314 		flags |= rt6_srcprefs2flags(inet6_sk(sk)->srcprefs);
1315 
1316 	return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_output);
1317 }
1318 EXPORT_SYMBOL_GPL(ip6_route_output_flags);
1319 
1320 struct dst_entry *ip6_blackhole_route(struct net *net, struct dst_entry *dst_orig)
1321 {
1322 	struct rt6_info *rt, *ort = (struct rt6_info *) dst_orig;
1323 	struct net_device *loopback_dev = net->loopback_dev;
1324 	struct dst_entry *new = NULL;
1325 
1326 	rt = dst_alloc(&ip6_dst_blackhole_ops, loopback_dev, 1,
1327 		       DST_OBSOLETE_NONE, 0);
1328 	if (rt) {
1329 		rt6_info_init(rt);
1330 
1331 		new = &rt->dst;
1332 		new->__use = 1;
1333 		new->input = dst_discard;
1334 		new->output = dst_discard_out;
1335 
1336 		dst_copy_metrics(new, &ort->dst);
1337 
1338 		rt->rt6i_idev = in6_dev_get(loopback_dev);
1339 		rt->rt6i_gateway = ort->rt6i_gateway;
1340 		rt->rt6i_flags = ort->rt6i_flags & ~RTF_PCPU;
1341 		rt->rt6i_metric = 0;
1342 
1343 		memcpy(&rt->rt6i_dst, &ort->rt6i_dst, sizeof(struct rt6key));
1344 #ifdef CONFIG_IPV6_SUBTREES
1345 		memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
1346 #endif
1347 	}
1348 
1349 	dst_release(dst_orig);
1350 	return new ? new : ERR_PTR(-ENOMEM);
1351 }
1352 
1353 /*
1354  *	Destination cache support functions
1355  */
1356 
1357 static void rt6_dst_from_metrics_check(struct rt6_info *rt)
1358 {
1359 	if (rt->dst.from &&
1360 	    dst_metrics_ptr(&rt->dst) != dst_metrics_ptr(rt->dst.from))
1361 		dst_init_metrics(&rt->dst, dst_metrics_ptr(rt->dst.from), true);
1362 }
1363 
1364 static struct dst_entry *rt6_check(struct rt6_info *rt, u32 cookie)
1365 {
1366 	if (!rt->rt6i_node || (rt->rt6i_node->fn_sernum != cookie))
1367 		return NULL;
1368 
1369 	if (rt6_check_expired(rt))
1370 		return NULL;
1371 
1372 	return &rt->dst;
1373 }
1374 
1375 static struct dst_entry *rt6_dst_from_check(struct rt6_info *rt, u32 cookie)
1376 {
1377 	if (!__rt6_check_expired(rt) &&
1378 	    rt->dst.obsolete == DST_OBSOLETE_FORCE_CHK &&
1379 	    rt6_check((struct rt6_info *)(rt->dst.from), cookie))
1380 		return &rt->dst;
1381 	else
1382 		return NULL;
1383 }
1384 
1385 static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie)
1386 {
1387 	struct rt6_info *rt;
1388 
1389 	rt = (struct rt6_info *) dst;
1390 
1391 	/* All IPV6 dsts are created with ->obsolete set to the value
1392 	 * DST_OBSOLETE_FORCE_CHK which forces validation calls down
1393 	 * into this function always.
1394 	 */
1395 
1396 	rt6_dst_from_metrics_check(rt);
1397 
1398 	if (rt->rt6i_flags & RTF_PCPU ||
1399 	    (unlikely(!list_empty(&rt->rt6i_uncached)) && rt->dst.from))
1400 		return rt6_dst_from_check(rt, cookie);
1401 	else
1402 		return rt6_check(rt, cookie);
1403 }
1404 
1405 static struct dst_entry *ip6_negative_advice(struct dst_entry *dst)
1406 {
1407 	struct rt6_info *rt = (struct rt6_info *) dst;
1408 
1409 	if (rt) {
1410 		if (rt->rt6i_flags & RTF_CACHE) {
1411 			if (rt6_check_expired(rt)) {
1412 				ip6_del_rt(rt);
1413 				dst = NULL;
1414 			}
1415 		} else {
1416 			dst_release(dst);
1417 			dst = NULL;
1418 		}
1419 	}
1420 	return dst;
1421 }
1422 
1423 static void ip6_link_failure(struct sk_buff *skb)
1424 {
1425 	struct rt6_info *rt;
1426 
1427 	icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_ADDR_UNREACH, 0);
1428 
1429 	rt = (struct rt6_info *) skb_dst(skb);
1430 	if (rt) {
1431 		if (rt->rt6i_flags & RTF_CACHE) {
1432 			if (dst_hold_safe(&rt->dst))
1433 				ip6_del_rt(rt);
1434 		} else if (rt->rt6i_node && (rt->rt6i_flags & RTF_DEFAULT)) {
1435 			rt->rt6i_node->fn_sernum = -1;
1436 		}
1437 	}
1438 }
1439 
1440 static void rt6_do_update_pmtu(struct rt6_info *rt, u32 mtu)
1441 {
1442 	struct net *net = dev_net(rt->dst.dev);
1443 
1444 	rt->rt6i_flags |= RTF_MODIFIED;
1445 	rt->rt6i_pmtu = mtu;
1446 	rt6_update_expires(rt, net->ipv6.sysctl.ip6_rt_mtu_expires);
1447 }
1448 
1449 static bool rt6_cache_allowed_for_pmtu(const struct rt6_info *rt)
1450 {
1451 	return !(rt->rt6i_flags & RTF_CACHE) &&
1452 		(rt->rt6i_flags & RTF_PCPU || rt->rt6i_node);
1453 }
1454 
1455 static void __ip6_rt_update_pmtu(struct dst_entry *dst, const struct sock *sk,
1456 				 const struct ipv6hdr *iph, u32 mtu)
1457 {
1458 	const struct in6_addr *daddr, *saddr;
1459 	struct rt6_info *rt6 = (struct rt6_info *)dst;
1460 
1461 	if (rt6->rt6i_flags & RTF_LOCAL)
1462 		return;
1463 
1464 	if (dst_metric_locked(dst, RTAX_MTU))
1465 		return;
1466 
1467 	if (iph) {
1468 		daddr = &iph->daddr;
1469 		saddr = &iph->saddr;
1470 	} else if (sk) {
1471 		daddr = &sk->sk_v6_daddr;
1472 		saddr = &inet6_sk(sk)->saddr;
1473 	} else {
1474 		daddr = NULL;
1475 		saddr = NULL;
1476 	}
1477 	dst_confirm_neigh(dst, daddr);
1478 	mtu = max_t(u32, mtu, IPV6_MIN_MTU);
1479 	if (mtu >= dst_mtu(dst))
1480 		return;
1481 
1482 	if (!rt6_cache_allowed_for_pmtu(rt6)) {
1483 		rt6_do_update_pmtu(rt6, mtu);
1484 	} else if (daddr) {
1485 		struct rt6_info *nrt6;
1486 
1487 		nrt6 = ip6_rt_cache_alloc(rt6, daddr, saddr);
1488 		if (nrt6) {
1489 			rt6_do_update_pmtu(nrt6, mtu);
1490 
1491 			/* ip6_ins_rt(nrt6) will bump the
1492 			 * rt6->rt6i_node->fn_sernum
1493 			 * which will fail the next rt6_check() and
1494 			 * invalidate the sk->sk_dst_cache.
1495 			 */
1496 			ip6_ins_rt(nrt6);
1497 			/* Release the reference taken in
1498 			 * ip6_rt_cache_alloc()
1499 			 */
1500 			dst_release(&nrt6->dst);
1501 		}
1502 	}
1503 }
1504 
1505 static void ip6_rt_update_pmtu(struct dst_entry *dst, struct sock *sk,
1506 			       struct sk_buff *skb, u32 mtu)
1507 {
1508 	__ip6_rt_update_pmtu(dst, sk, skb ? ipv6_hdr(skb) : NULL, mtu);
1509 }
1510 
1511 void ip6_update_pmtu(struct sk_buff *skb, struct net *net, __be32 mtu,
1512 		     int oif, u32 mark, kuid_t uid)
1513 {
1514 	const struct ipv6hdr *iph = (struct ipv6hdr *) skb->data;
1515 	struct dst_entry *dst;
1516 	struct flowi6 fl6;
1517 
1518 	memset(&fl6, 0, sizeof(fl6));
1519 	fl6.flowi6_oif = oif;
1520 	fl6.flowi6_mark = mark ? mark : IP6_REPLY_MARK(net, skb->mark);
1521 	fl6.daddr = iph->daddr;
1522 	fl6.saddr = iph->saddr;
1523 	fl6.flowlabel = ip6_flowinfo(iph);
1524 	fl6.flowi6_uid = uid;
1525 
1526 	dst = ip6_route_output(net, NULL, &fl6);
1527 	if (!dst->error)
1528 		__ip6_rt_update_pmtu(dst, NULL, iph, ntohl(mtu));
1529 	dst_release(dst);
1530 }
1531 EXPORT_SYMBOL_GPL(ip6_update_pmtu);
1532 
1533 void ip6_sk_update_pmtu(struct sk_buff *skb, struct sock *sk, __be32 mtu)
1534 {
1535 	struct dst_entry *dst;
1536 
1537 	ip6_update_pmtu(skb, sock_net(sk), mtu,
1538 			sk->sk_bound_dev_if, sk->sk_mark, sk->sk_uid);
1539 
1540 	dst = __sk_dst_get(sk);
1541 	if (!dst || !dst->obsolete ||
1542 	    dst->ops->check(dst, inet6_sk(sk)->dst_cookie))
1543 		return;
1544 
1545 	bh_lock_sock(sk);
1546 	if (!sock_owned_by_user(sk) && !ipv6_addr_v4mapped(&sk->sk_v6_daddr))
1547 		ip6_datagram_dst_update(sk, false);
1548 	bh_unlock_sock(sk);
1549 }
1550 EXPORT_SYMBOL_GPL(ip6_sk_update_pmtu);
1551 
1552 /* Handle redirects */
1553 struct ip6rd_flowi {
1554 	struct flowi6 fl6;
1555 	struct in6_addr gateway;
1556 };
1557 
1558 static struct rt6_info *__ip6_route_redirect(struct net *net,
1559 					     struct fib6_table *table,
1560 					     struct flowi6 *fl6,
1561 					     int flags)
1562 {
1563 	struct ip6rd_flowi *rdfl = (struct ip6rd_flowi *)fl6;
1564 	struct rt6_info *rt;
1565 	struct fib6_node *fn;
1566 
1567 	/* Get the "current" route for this destination and
1568 	 * check if the redirect has come from appropriate router.
1569 	 *
1570 	 * RFC 4861 specifies that redirects should only be
1571 	 * accepted if they come from the nexthop to the target.
1572 	 * Due to the way the routes are chosen, this notion
1573 	 * is a bit fuzzy and one might need to check all possible
1574 	 * routes.
1575 	 */
1576 
1577 	read_lock_bh(&table->tb6_lock);
1578 	fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
1579 restart:
1580 	for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
1581 		if (rt6_check_expired(rt))
1582 			continue;
1583 		if (rt->dst.error)
1584 			break;
1585 		if (!(rt->rt6i_flags & RTF_GATEWAY))
1586 			continue;
1587 		if (fl6->flowi6_oif != rt->dst.dev->ifindex)
1588 			continue;
1589 		if (!ipv6_addr_equal(&rdfl->gateway, &rt->rt6i_gateway))
1590 			continue;
1591 		break;
1592 	}
1593 
1594 	if (!rt)
1595 		rt = net->ipv6.ip6_null_entry;
1596 	else if (rt->dst.error) {
1597 		rt = net->ipv6.ip6_null_entry;
1598 		goto out;
1599 	}
1600 
1601 	if (rt == net->ipv6.ip6_null_entry) {
1602 		fn = fib6_backtrack(fn, &fl6->saddr);
1603 		if (fn)
1604 			goto restart;
1605 	}
1606 
1607 out:
1608 	dst_hold(&rt->dst);
1609 
1610 	read_unlock_bh(&table->tb6_lock);
1611 
1612 	trace_fib6_table_lookup(net, rt, table->tb6_id, fl6);
1613 	return rt;
1614 };
1615 
1616 static struct dst_entry *ip6_route_redirect(struct net *net,
1617 					const struct flowi6 *fl6,
1618 					const struct in6_addr *gateway)
1619 {
1620 	int flags = RT6_LOOKUP_F_HAS_SADDR;
1621 	struct ip6rd_flowi rdfl;
1622 
1623 	rdfl.fl6 = *fl6;
1624 	rdfl.gateway = *gateway;
1625 
1626 	return fib6_rule_lookup(net, &rdfl.fl6,
1627 				flags, __ip6_route_redirect);
1628 }
1629 
1630 void ip6_redirect(struct sk_buff *skb, struct net *net, int oif, u32 mark,
1631 		  kuid_t uid)
1632 {
1633 	const struct ipv6hdr *iph = (struct ipv6hdr *) skb->data;
1634 	struct dst_entry *dst;
1635 	struct flowi6 fl6;
1636 
1637 	memset(&fl6, 0, sizeof(fl6));
1638 	fl6.flowi6_iif = LOOPBACK_IFINDEX;
1639 	fl6.flowi6_oif = oif;
1640 	fl6.flowi6_mark = mark;
1641 	fl6.daddr = iph->daddr;
1642 	fl6.saddr = iph->saddr;
1643 	fl6.flowlabel = ip6_flowinfo(iph);
1644 	fl6.flowi6_uid = uid;
1645 
1646 	dst = ip6_route_redirect(net, &fl6, &ipv6_hdr(skb)->saddr);
1647 	rt6_do_redirect(dst, NULL, skb);
1648 	dst_release(dst);
1649 }
1650 EXPORT_SYMBOL_GPL(ip6_redirect);
1651 
1652 void ip6_redirect_no_header(struct sk_buff *skb, struct net *net, int oif,
1653 			    u32 mark)
1654 {
1655 	const struct ipv6hdr *iph = ipv6_hdr(skb);
1656 	const struct rd_msg *msg = (struct rd_msg *)icmp6_hdr(skb);
1657 	struct dst_entry *dst;
1658 	struct flowi6 fl6;
1659 
1660 	memset(&fl6, 0, sizeof(fl6));
1661 	fl6.flowi6_iif = LOOPBACK_IFINDEX;
1662 	fl6.flowi6_oif = oif;
1663 	fl6.flowi6_mark = mark;
1664 	fl6.daddr = msg->dest;
1665 	fl6.saddr = iph->daddr;
1666 	fl6.flowi6_uid = sock_net_uid(net, NULL);
1667 
1668 	dst = ip6_route_redirect(net, &fl6, &iph->saddr);
1669 	rt6_do_redirect(dst, NULL, skb);
1670 	dst_release(dst);
1671 }
1672 
1673 void ip6_sk_redirect(struct sk_buff *skb, struct sock *sk)
1674 {
1675 	ip6_redirect(skb, sock_net(sk), sk->sk_bound_dev_if, sk->sk_mark,
1676 		     sk->sk_uid);
1677 }
1678 EXPORT_SYMBOL_GPL(ip6_sk_redirect);
1679 
1680 static unsigned int ip6_default_advmss(const struct dst_entry *dst)
1681 {
1682 	struct net_device *dev = dst->dev;
1683 	unsigned int mtu = dst_mtu(dst);
1684 	struct net *net = dev_net(dev);
1685 
1686 	mtu -= sizeof(struct ipv6hdr) + sizeof(struct tcphdr);
1687 
1688 	if (mtu < net->ipv6.sysctl.ip6_rt_min_advmss)
1689 		mtu = net->ipv6.sysctl.ip6_rt_min_advmss;
1690 
1691 	/*
1692 	 * Maximal non-jumbo IPv6 payload is IPV6_MAXPLEN and
1693 	 * corresponding MSS is IPV6_MAXPLEN - tcp_header_size.
1694 	 * IPV6_MAXPLEN is also valid and means: "any MSS,
1695 	 * rely only on pmtu discovery"
1696 	 */
1697 	if (mtu > IPV6_MAXPLEN - sizeof(struct tcphdr))
1698 		mtu = IPV6_MAXPLEN;
1699 	return mtu;
1700 }
1701 
1702 static unsigned int ip6_mtu(const struct dst_entry *dst)
1703 {
1704 	const struct rt6_info *rt = (const struct rt6_info *)dst;
1705 	unsigned int mtu = rt->rt6i_pmtu;
1706 	struct inet6_dev *idev;
1707 
1708 	if (mtu)
1709 		goto out;
1710 
1711 	mtu = dst_metric_raw(dst, RTAX_MTU);
1712 	if (mtu)
1713 		goto out;
1714 
1715 	mtu = IPV6_MIN_MTU;
1716 
1717 	rcu_read_lock();
1718 	idev = __in6_dev_get(dst->dev);
1719 	if (idev)
1720 		mtu = idev->cnf.mtu6;
1721 	rcu_read_unlock();
1722 
1723 out:
1724 	mtu = min_t(unsigned int, mtu, IP6_MAX_MTU);
1725 
1726 	return mtu - lwtunnel_headroom(dst->lwtstate, mtu);
1727 }
1728 
1729 struct dst_entry *icmp6_dst_alloc(struct net_device *dev,
1730 				  struct flowi6 *fl6)
1731 {
1732 	struct dst_entry *dst;
1733 	struct rt6_info *rt;
1734 	struct inet6_dev *idev = in6_dev_get(dev);
1735 	struct net *net = dev_net(dev);
1736 
1737 	if (unlikely(!idev))
1738 		return ERR_PTR(-ENODEV);
1739 
1740 	rt = ip6_dst_alloc(net, dev, 0);
1741 	if (unlikely(!rt)) {
1742 		in6_dev_put(idev);
1743 		dst = ERR_PTR(-ENOMEM);
1744 		goto out;
1745 	}
1746 
1747 	rt->dst.flags |= DST_HOST;
1748 	rt->dst.output  = ip6_output;
1749 	rt->rt6i_gateway  = fl6->daddr;
1750 	rt->rt6i_dst.addr = fl6->daddr;
1751 	rt->rt6i_dst.plen = 128;
1752 	rt->rt6i_idev     = idev;
1753 	dst_metric_set(&rt->dst, RTAX_HOPLIMIT, 0);
1754 
1755 	/* Add this dst into uncached_list so that rt6_ifdown() can
1756 	 * do proper release of the net_device
1757 	 */
1758 	rt6_uncached_list_add(rt);
1759 
1760 	dst = xfrm_lookup(net, &rt->dst, flowi6_to_flowi(fl6), NULL, 0);
1761 
1762 out:
1763 	return dst;
1764 }
1765 
1766 static int ip6_dst_gc(struct dst_ops *ops)
1767 {
1768 	struct net *net = container_of(ops, struct net, ipv6.ip6_dst_ops);
1769 	int rt_min_interval = net->ipv6.sysctl.ip6_rt_gc_min_interval;
1770 	int rt_max_size = net->ipv6.sysctl.ip6_rt_max_size;
1771 	int rt_elasticity = net->ipv6.sysctl.ip6_rt_gc_elasticity;
1772 	int rt_gc_timeout = net->ipv6.sysctl.ip6_rt_gc_timeout;
1773 	unsigned long rt_last_gc = net->ipv6.ip6_rt_last_gc;
1774 	int entries;
1775 
1776 	entries = dst_entries_get_fast(ops);
1777 	if (time_after(rt_last_gc + rt_min_interval, jiffies) &&
1778 	    entries <= rt_max_size)
1779 		goto out;
1780 
1781 	net->ipv6.ip6_rt_gc_expire++;
1782 	fib6_run_gc(net->ipv6.ip6_rt_gc_expire, net, true);
1783 	entries = dst_entries_get_slow(ops);
1784 	if (entries < ops->gc_thresh)
1785 		net->ipv6.ip6_rt_gc_expire = rt_gc_timeout>>1;
1786 out:
1787 	net->ipv6.ip6_rt_gc_expire -= net->ipv6.ip6_rt_gc_expire>>rt_elasticity;
1788 	return entries > rt_max_size;
1789 }
1790 
1791 static int ip6_convert_metrics(struct mx6_config *mxc,
1792 			       const struct fib6_config *cfg)
1793 {
1794 	bool ecn_ca = false;
1795 	struct nlattr *nla;
1796 	int remaining;
1797 	u32 *mp;
1798 
1799 	if (!cfg->fc_mx)
1800 		return 0;
1801 
1802 	mp = kzalloc(sizeof(u32) * RTAX_MAX, GFP_KERNEL);
1803 	if (unlikely(!mp))
1804 		return -ENOMEM;
1805 
1806 	nla_for_each_attr(nla, cfg->fc_mx, cfg->fc_mx_len, remaining) {
1807 		int type = nla_type(nla);
1808 		u32 val;
1809 
1810 		if (!type)
1811 			continue;
1812 		if (unlikely(type > RTAX_MAX))
1813 			goto err;
1814 
1815 		if (type == RTAX_CC_ALGO) {
1816 			char tmp[TCP_CA_NAME_MAX];
1817 
1818 			nla_strlcpy(tmp, nla, sizeof(tmp));
1819 			val = tcp_ca_get_key_by_name(tmp, &ecn_ca);
1820 			if (val == TCP_CA_UNSPEC)
1821 				goto err;
1822 		} else {
1823 			val = nla_get_u32(nla);
1824 		}
1825 		if (type == RTAX_HOPLIMIT && val > 255)
1826 			val = 255;
1827 		if (type == RTAX_FEATURES && (val & ~RTAX_FEATURE_MASK))
1828 			goto err;
1829 
1830 		mp[type - 1] = val;
1831 		__set_bit(type - 1, mxc->mx_valid);
1832 	}
1833 
1834 	if (ecn_ca) {
1835 		__set_bit(RTAX_FEATURES - 1, mxc->mx_valid);
1836 		mp[RTAX_FEATURES - 1] |= DST_FEATURE_ECN_CA;
1837 	}
1838 
1839 	mxc->mx = mp;
1840 	return 0;
1841  err:
1842 	kfree(mp);
1843 	return -EINVAL;
1844 }
1845 
1846 static struct rt6_info *ip6_nh_lookup_table(struct net *net,
1847 					    struct fib6_config *cfg,
1848 					    const struct in6_addr *gw_addr)
1849 {
1850 	struct flowi6 fl6 = {
1851 		.flowi6_oif = cfg->fc_ifindex,
1852 		.daddr = *gw_addr,
1853 		.saddr = cfg->fc_prefsrc,
1854 	};
1855 	struct fib6_table *table;
1856 	struct rt6_info *rt;
1857 	int flags = RT6_LOOKUP_F_IFACE | RT6_LOOKUP_F_IGNORE_LINKSTATE;
1858 
1859 	table = fib6_get_table(net, cfg->fc_table);
1860 	if (!table)
1861 		return NULL;
1862 
1863 	if (!ipv6_addr_any(&cfg->fc_prefsrc))
1864 		flags |= RT6_LOOKUP_F_HAS_SADDR;
1865 
1866 	rt = ip6_pol_route(net, table, cfg->fc_ifindex, &fl6, flags);
1867 
1868 	/* if table lookup failed, fall back to full lookup */
1869 	if (rt == net->ipv6.ip6_null_entry) {
1870 		ip6_rt_put(rt);
1871 		rt = NULL;
1872 	}
1873 
1874 	return rt;
1875 }
1876 
1877 static struct rt6_info *ip6_route_info_create(struct fib6_config *cfg,
1878 					      struct netlink_ext_ack *extack)
1879 {
1880 	struct net *net = cfg->fc_nlinfo.nl_net;
1881 	struct rt6_info *rt = NULL;
1882 	struct net_device *dev = NULL;
1883 	struct inet6_dev *idev = NULL;
1884 	struct fib6_table *table;
1885 	int addr_type;
1886 	int err = -EINVAL;
1887 
1888 	/* RTF_PCPU is an internal flag; can not be set by userspace */
1889 	if (cfg->fc_flags & RTF_PCPU) {
1890 		NL_SET_ERR_MSG(extack, "Userspace can not set RTF_PCPU");
1891 		goto out;
1892 	}
1893 
1894 	if (cfg->fc_dst_len > 128) {
1895 		NL_SET_ERR_MSG(extack, "Invalid prefix length");
1896 		goto out;
1897 	}
1898 	if (cfg->fc_src_len > 128) {
1899 		NL_SET_ERR_MSG(extack, "Invalid source address length");
1900 		goto out;
1901 	}
1902 #ifndef CONFIG_IPV6_SUBTREES
1903 	if (cfg->fc_src_len) {
1904 		NL_SET_ERR_MSG(extack,
1905 			       "Specifying source address requires IPV6_SUBTREES to be enabled");
1906 		goto out;
1907 	}
1908 #endif
1909 	if (cfg->fc_ifindex) {
1910 		err = -ENODEV;
1911 		dev = dev_get_by_index(net, cfg->fc_ifindex);
1912 		if (!dev)
1913 			goto out;
1914 		idev = in6_dev_get(dev);
1915 		if (!idev)
1916 			goto out;
1917 	}
1918 
1919 	if (cfg->fc_metric == 0)
1920 		cfg->fc_metric = IP6_RT_PRIO_USER;
1921 
1922 	err = -ENOBUFS;
1923 	if (cfg->fc_nlinfo.nlh &&
1924 	    !(cfg->fc_nlinfo.nlh->nlmsg_flags & NLM_F_CREATE)) {
1925 		table = fib6_get_table(net, cfg->fc_table);
1926 		if (!table) {
1927 			pr_warn("NLM_F_CREATE should be specified when creating new route\n");
1928 			table = fib6_new_table(net, cfg->fc_table);
1929 		}
1930 	} else {
1931 		table = fib6_new_table(net, cfg->fc_table);
1932 	}
1933 
1934 	if (!table)
1935 		goto out;
1936 
1937 	rt = ip6_dst_alloc(net, NULL,
1938 			   (cfg->fc_flags & RTF_ADDRCONF) ? 0 : DST_NOCOUNT);
1939 
1940 	if (!rt) {
1941 		err = -ENOMEM;
1942 		goto out;
1943 	}
1944 
1945 	if (cfg->fc_flags & RTF_EXPIRES)
1946 		rt6_set_expires(rt, jiffies +
1947 				clock_t_to_jiffies(cfg->fc_expires));
1948 	else
1949 		rt6_clean_expires(rt);
1950 
1951 	if (cfg->fc_protocol == RTPROT_UNSPEC)
1952 		cfg->fc_protocol = RTPROT_BOOT;
1953 	rt->rt6i_protocol = cfg->fc_protocol;
1954 
1955 	addr_type = ipv6_addr_type(&cfg->fc_dst);
1956 
1957 	if (addr_type & IPV6_ADDR_MULTICAST)
1958 		rt->dst.input = ip6_mc_input;
1959 	else if (cfg->fc_flags & RTF_LOCAL)
1960 		rt->dst.input = ip6_input;
1961 	else
1962 		rt->dst.input = ip6_forward;
1963 
1964 	rt->dst.output = ip6_output;
1965 
1966 	if (cfg->fc_encap) {
1967 		struct lwtunnel_state *lwtstate;
1968 
1969 		err = lwtunnel_build_state(cfg->fc_encap_type,
1970 					   cfg->fc_encap, AF_INET6, cfg,
1971 					   &lwtstate, extack);
1972 		if (err)
1973 			goto out;
1974 		rt->dst.lwtstate = lwtstate_get(lwtstate);
1975 		if (lwtunnel_output_redirect(rt->dst.lwtstate)) {
1976 			rt->dst.lwtstate->orig_output = rt->dst.output;
1977 			rt->dst.output = lwtunnel_output;
1978 		}
1979 		if (lwtunnel_input_redirect(rt->dst.lwtstate)) {
1980 			rt->dst.lwtstate->orig_input = rt->dst.input;
1981 			rt->dst.input = lwtunnel_input;
1982 		}
1983 	}
1984 
1985 	ipv6_addr_prefix(&rt->rt6i_dst.addr, &cfg->fc_dst, cfg->fc_dst_len);
1986 	rt->rt6i_dst.plen = cfg->fc_dst_len;
1987 	if (rt->rt6i_dst.plen == 128)
1988 		rt->dst.flags |= DST_HOST;
1989 
1990 #ifdef CONFIG_IPV6_SUBTREES
1991 	ipv6_addr_prefix(&rt->rt6i_src.addr, &cfg->fc_src, cfg->fc_src_len);
1992 	rt->rt6i_src.plen = cfg->fc_src_len;
1993 #endif
1994 
1995 	rt->rt6i_metric = cfg->fc_metric;
1996 
1997 	/* We cannot add true routes via loopback here,
1998 	   they would result in kernel looping; promote them to reject routes
1999 	 */
2000 	if ((cfg->fc_flags & RTF_REJECT) ||
2001 	    (dev && (dev->flags & IFF_LOOPBACK) &&
2002 	     !(addr_type & IPV6_ADDR_LOOPBACK) &&
2003 	     !(cfg->fc_flags & RTF_LOCAL))) {
2004 		/* hold loopback dev/idev if we haven't done so. */
2005 		if (dev != net->loopback_dev) {
2006 			if (dev) {
2007 				dev_put(dev);
2008 				in6_dev_put(idev);
2009 			}
2010 			dev = net->loopback_dev;
2011 			dev_hold(dev);
2012 			idev = in6_dev_get(dev);
2013 			if (!idev) {
2014 				err = -ENODEV;
2015 				goto out;
2016 			}
2017 		}
2018 		rt->rt6i_flags = RTF_REJECT|RTF_NONEXTHOP;
2019 		switch (cfg->fc_type) {
2020 		case RTN_BLACKHOLE:
2021 			rt->dst.error = -EINVAL;
2022 			rt->dst.output = dst_discard_out;
2023 			rt->dst.input = dst_discard;
2024 			break;
2025 		case RTN_PROHIBIT:
2026 			rt->dst.error = -EACCES;
2027 			rt->dst.output = ip6_pkt_prohibit_out;
2028 			rt->dst.input = ip6_pkt_prohibit;
2029 			break;
2030 		case RTN_THROW:
2031 		case RTN_UNREACHABLE:
2032 		default:
2033 			rt->dst.error = (cfg->fc_type == RTN_THROW) ? -EAGAIN
2034 					: (cfg->fc_type == RTN_UNREACHABLE)
2035 					? -EHOSTUNREACH : -ENETUNREACH;
2036 			rt->dst.output = ip6_pkt_discard_out;
2037 			rt->dst.input = ip6_pkt_discard;
2038 			break;
2039 		}
2040 		goto install_route;
2041 	}
2042 
2043 	if (cfg->fc_flags & RTF_GATEWAY) {
2044 		const struct in6_addr *gw_addr;
2045 		int gwa_type;
2046 
2047 		gw_addr = &cfg->fc_gateway;
2048 		gwa_type = ipv6_addr_type(gw_addr);
2049 
2050 		/* if gw_addr is local we will fail to detect this in case
2051 		 * address is still TENTATIVE (DAD in progress). rt6_lookup()
2052 		 * will return already-added prefix route via interface that
2053 		 * prefix route was assigned to, which might be non-loopback.
2054 		 */
2055 		err = -EINVAL;
2056 		if (ipv6_chk_addr_and_flags(net, gw_addr,
2057 					    gwa_type & IPV6_ADDR_LINKLOCAL ?
2058 					    dev : NULL, 0, 0)) {
2059 			NL_SET_ERR_MSG(extack, "Invalid gateway address");
2060 			goto out;
2061 		}
2062 		rt->rt6i_gateway = *gw_addr;
2063 
2064 		if (gwa_type != (IPV6_ADDR_LINKLOCAL|IPV6_ADDR_UNICAST)) {
2065 			struct rt6_info *grt = NULL;
2066 
2067 			/* IPv6 strictly inhibits using not link-local
2068 			   addresses as nexthop address.
2069 			   Otherwise, router will not able to send redirects.
2070 			   It is very good, but in some (rare!) circumstances
2071 			   (SIT, PtP, NBMA NOARP links) it is handy to allow
2072 			   some exceptions. --ANK
2073 			   We allow IPv4-mapped nexthops to support RFC4798-type
2074 			   addressing
2075 			 */
2076 			if (!(gwa_type & (IPV6_ADDR_UNICAST |
2077 					  IPV6_ADDR_MAPPED))) {
2078 				NL_SET_ERR_MSG(extack,
2079 					       "Invalid gateway address");
2080 				goto out;
2081 			}
2082 
2083 			if (cfg->fc_table) {
2084 				grt = ip6_nh_lookup_table(net, cfg, gw_addr);
2085 
2086 				if (grt) {
2087 					if (grt->rt6i_flags & RTF_GATEWAY ||
2088 					    (dev && dev != grt->dst.dev)) {
2089 						ip6_rt_put(grt);
2090 						grt = NULL;
2091 					}
2092 				}
2093 			}
2094 
2095 			if (!grt)
2096 				grt = rt6_lookup(net, gw_addr, NULL,
2097 						 cfg->fc_ifindex, 1);
2098 
2099 			err = -EHOSTUNREACH;
2100 			if (!grt)
2101 				goto out;
2102 			if (dev) {
2103 				if (dev != grt->dst.dev) {
2104 					ip6_rt_put(grt);
2105 					goto out;
2106 				}
2107 			} else {
2108 				dev = grt->dst.dev;
2109 				idev = grt->rt6i_idev;
2110 				dev_hold(dev);
2111 				in6_dev_hold(grt->rt6i_idev);
2112 			}
2113 			if (!(grt->rt6i_flags & RTF_GATEWAY))
2114 				err = 0;
2115 			ip6_rt_put(grt);
2116 
2117 			if (err)
2118 				goto out;
2119 		}
2120 		err = -EINVAL;
2121 		if (!dev) {
2122 			NL_SET_ERR_MSG(extack, "Egress device not specified");
2123 			goto out;
2124 		} else if (dev->flags & IFF_LOOPBACK) {
2125 			NL_SET_ERR_MSG(extack,
2126 				       "Egress device can not be loopback device for this route");
2127 			goto out;
2128 		}
2129 	}
2130 
2131 	err = -ENODEV;
2132 	if (!dev)
2133 		goto out;
2134 
2135 	if (!ipv6_addr_any(&cfg->fc_prefsrc)) {
2136 		if (!ipv6_chk_addr(net, &cfg->fc_prefsrc, dev, 0)) {
2137 			NL_SET_ERR_MSG(extack, "Invalid source address");
2138 			err = -EINVAL;
2139 			goto out;
2140 		}
2141 		rt->rt6i_prefsrc.addr = cfg->fc_prefsrc;
2142 		rt->rt6i_prefsrc.plen = 128;
2143 	} else
2144 		rt->rt6i_prefsrc.plen = 0;
2145 
2146 	rt->rt6i_flags = cfg->fc_flags;
2147 
2148 install_route:
2149 	rt->dst.dev = dev;
2150 	rt->rt6i_idev = idev;
2151 	rt->rt6i_table = table;
2152 
2153 	cfg->fc_nlinfo.nl_net = dev_net(dev);
2154 
2155 	return rt;
2156 out:
2157 	if (dev)
2158 		dev_put(dev);
2159 	if (idev)
2160 		in6_dev_put(idev);
2161 	if (rt)
2162 		dst_release_immediate(&rt->dst);
2163 
2164 	return ERR_PTR(err);
2165 }
2166 
2167 int ip6_route_add(struct fib6_config *cfg,
2168 		  struct netlink_ext_ack *extack)
2169 {
2170 	struct mx6_config mxc = { .mx = NULL, };
2171 	struct rt6_info *rt;
2172 	int err;
2173 
2174 	rt = ip6_route_info_create(cfg, extack);
2175 	if (IS_ERR(rt)) {
2176 		err = PTR_ERR(rt);
2177 		rt = NULL;
2178 		goto out;
2179 	}
2180 
2181 	err = ip6_convert_metrics(&mxc, cfg);
2182 	if (err)
2183 		goto out;
2184 
2185 	err = __ip6_ins_rt(rt, &cfg->fc_nlinfo, &mxc, extack);
2186 
2187 	kfree(mxc.mx);
2188 
2189 	return err;
2190 out:
2191 	if (rt)
2192 		dst_release_immediate(&rt->dst);
2193 
2194 	return err;
2195 }
2196 
2197 static int __ip6_del_rt(struct rt6_info *rt, struct nl_info *info)
2198 {
2199 	int err;
2200 	struct fib6_table *table;
2201 	struct net *net = dev_net(rt->dst.dev);
2202 
2203 	if (rt == net->ipv6.ip6_null_entry) {
2204 		err = -ENOENT;
2205 		goto out;
2206 	}
2207 
2208 	table = rt->rt6i_table;
2209 	write_lock_bh(&table->tb6_lock);
2210 	err = fib6_del(rt, info);
2211 	write_unlock_bh(&table->tb6_lock);
2212 
2213 out:
2214 	ip6_rt_put(rt);
2215 	return err;
2216 }
2217 
2218 int ip6_del_rt(struct rt6_info *rt)
2219 {
2220 	struct nl_info info = {
2221 		.nl_net = dev_net(rt->dst.dev),
2222 	};
2223 	return __ip6_del_rt(rt, &info);
2224 }
2225 
2226 static int __ip6_del_rt_siblings(struct rt6_info *rt, struct fib6_config *cfg)
2227 {
2228 	struct nl_info *info = &cfg->fc_nlinfo;
2229 	struct net *net = info->nl_net;
2230 	struct sk_buff *skb = NULL;
2231 	struct fib6_table *table;
2232 	int err = -ENOENT;
2233 
2234 	if (rt == net->ipv6.ip6_null_entry)
2235 		goto out_put;
2236 	table = rt->rt6i_table;
2237 	write_lock_bh(&table->tb6_lock);
2238 
2239 	if (rt->rt6i_nsiblings && cfg->fc_delete_all_nh) {
2240 		struct rt6_info *sibling, *next_sibling;
2241 
2242 		/* prefer to send a single notification with all hops */
2243 		skb = nlmsg_new(rt6_nlmsg_size(rt), gfp_any());
2244 		if (skb) {
2245 			u32 seq = info->nlh ? info->nlh->nlmsg_seq : 0;
2246 
2247 			if (rt6_fill_node(net, skb, rt,
2248 					  NULL, NULL, 0, RTM_DELROUTE,
2249 					  info->portid, seq, 0) < 0) {
2250 				kfree_skb(skb);
2251 				skb = NULL;
2252 			} else
2253 				info->skip_notify = 1;
2254 		}
2255 
2256 		list_for_each_entry_safe(sibling, next_sibling,
2257 					 &rt->rt6i_siblings,
2258 					 rt6i_siblings) {
2259 			err = fib6_del(sibling, info);
2260 			if (err)
2261 				goto out_unlock;
2262 		}
2263 	}
2264 
2265 	err = fib6_del(rt, info);
2266 out_unlock:
2267 	write_unlock_bh(&table->tb6_lock);
2268 out_put:
2269 	ip6_rt_put(rt);
2270 
2271 	if (skb) {
2272 		rtnl_notify(skb, net, info->portid, RTNLGRP_IPV6_ROUTE,
2273 			    info->nlh, gfp_any());
2274 	}
2275 	return err;
2276 }
2277 
2278 static int ip6_route_del(struct fib6_config *cfg,
2279 			 struct netlink_ext_ack *extack)
2280 {
2281 	struct fib6_table *table;
2282 	struct fib6_node *fn;
2283 	struct rt6_info *rt;
2284 	int err = -ESRCH;
2285 
2286 	table = fib6_get_table(cfg->fc_nlinfo.nl_net, cfg->fc_table);
2287 	if (!table) {
2288 		NL_SET_ERR_MSG(extack, "FIB table does not exist");
2289 		return err;
2290 	}
2291 
2292 	read_lock_bh(&table->tb6_lock);
2293 
2294 	fn = fib6_locate(&table->tb6_root,
2295 			 &cfg->fc_dst, cfg->fc_dst_len,
2296 			 &cfg->fc_src, cfg->fc_src_len);
2297 
2298 	if (fn) {
2299 		for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
2300 			if ((rt->rt6i_flags & RTF_CACHE) &&
2301 			    !(cfg->fc_flags & RTF_CACHE))
2302 				continue;
2303 			if (cfg->fc_ifindex &&
2304 			    (!rt->dst.dev ||
2305 			     rt->dst.dev->ifindex != cfg->fc_ifindex))
2306 				continue;
2307 			if (cfg->fc_flags & RTF_GATEWAY &&
2308 			    !ipv6_addr_equal(&cfg->fc_gateway, &rt->rt6i_gateway))
2309 				continue;
2310 			if (cfg->fc_metric && cfg->fc_metric != rt->rt6i_metric)
2311 				continue;
2312 			if (cfg->fc_protocol && cfg->fc_protocol != rt->rt6i_protocol)
2313 				continue;
2314 			dst_hold(&rt->dst);
2315 			read_unlock_bh(&table->tb6_lock);
2316 
2317 			/* if gateway was specified only delete the one hop */
2318 			if (cfg->fc_flags & RTF_GATEWAY)
2319 				return __ip6_del_rt(rt, &cfg->fc_nlinfo);
2320 
2321 			return __ip6_del_rt_siblings(rt, cfg);
2322 		}
2323 	}
2324 	read_unlock_bh(&table->tb6_lock);
2325 
2326 	return err;
2327 }
2328 
2329 static void rt6_do_redirect(struct dst_entry *dst, struct sock *sk, struct sk_buff *skb)
2330 {
2331 	struct netevent_redirect netevent;
2332 	struct rt6_info *rt, *nrt = NULL;
2333 	struct ndisc_options ndopts;
2334 	struct inet6_dev *in6_dev;
2335 	struct neighbour *neigh;
2336 	struct rd_msg *msg;
2337 	int optlen, on_link;
2338 	u8 *lladdr;
2339 
2340 	optlen = skb_tail_pointer(skb) - skb_transport_header(skb);
2341 	optlen -= sizeof(*msg);
2342 
2343 	if (optlen < 0) {
2344 		net_dbg_ratelimited("rt6_do_redirect: packet too short\n");
2345 		return;
2346 	}
2347 
2348 	msg = (struct rd_msg *)icmp6_hdr(skb);
2349 
2350 	if (ipv6_addr_is_multicast(&msg->dest)) {
2351 		net_dbg_ratelimited("rt6_do_redirect: destination address is multicast\n");
2352 		return;
2353 	}
2354 
2355 	on_link = 0;
2356 	if (ipv6_addr_equal(&msg->dest, &msg->target)) {
2357 		on_link = 1;
2358 	} else if (ipv6_addr_type(&msg->target) !=
2359 		   (IPV6_ADDR_UNICAST|IPV6_ADDR_LINKLOCAL)) {
2360 		net_dbg_ratelimited("rt6_do_redirect: target address is not link-local unicast\n");
2361 		return;
2362 	}
2363 
2364 	in6_dev = __in6_dev_get(skb->dev);
2365 	if (!in6_dev)
2366 		return;
2367 	if (in6_dev->cnf.forwarding || !in6_dev->cnf.accept_redirects)
2368 		return;
2369 
2370 	/* RFC2461 8.1:
2371 	 *	The IP source address of the Redirect MUST be the same as the current
2372 	 *	first-hop router for the specified ICMP Destination Address.
2373 	 */
2374 
2375 	if (!ndisc_parse_options(skb->dev, msg->opt, optlen, &ndopts)) {
2376 		net_dbg_ratelimited("rt6_redirect: invalid ND options\n");
2377 		return;
2378 	}
2379 
2380 	lladdr = NULL;
2381 	if (ndopts.nd_opts_tgt_lladdr) {
2382 		lladdr = ndisc_opt_addr_data(ndopts.nd_opts_tgt_lladdr,
2383 					     skb->dev);
2384 		if (!lladdr) {
2385 			net_dbg_ratelimited("rt6_redirect: invalid link-layer address length\n");
2386 			return;
2387 		}
2388 	}
2389 
2390 	rt = (struct rt6_info *) dst;
2391 	if (rt->rt6i_flags & RTF_REJECT) {
2392 		net_dbg_ratelimited("rt6_redirect: source isn't a valid nexthop for redirect target\n");
2393 		return;
2394 	}
2395 
2396 	/* Redirect received -> path was valid.
2397 	 * Look, redirects are sent only in response to data packets,
2398 	 * so that this nexthop apparently is reachable. --ANK
2399 	 */
2400 	dst_confirm_neigh(&rt->dst, &ipv6_hdr(skb)->saddr);
2401 
2402 	neigh = __neigh_lookup(&nd_tbl, &msg->target, skb->dev, 1);
2403 	if (!neigh)
2404 		return;
2405 
2406 	/*
2407 	 *	We have finally decided to accept it.
2408 	 */
2409 
2410 	ndisc_update(skb->dev, neigh, lladdr, NUD_STALE,
2411 		     NEIGH_UPDATE_F_WEAK_OVERRIDE|
2412 		     NEIGH_UPDATE_F_OVERRIDE|
2413 		     (on_link ? 0 : (NEIGH_UPDATE_F_OVERRIDE_ISROUTER|
2414 				     NEIGH_UPDATE_F_ISROUTER)),
2415 		     NDISC_REDIRECT, &ndopts);
2416 
2417 	nrt = ip6_rt_cache_alloc(rt, &msg->dest, NULL);
2418 	if (!nrt)
2419 		goto out;
2420 
2421 	nrt->rt6i_flags = RTF_GATEWAY|RTF_UP|RTF_DYNAMIC|RTF_CACHE;
2422 	if (on_link)
2423 		nrt->rt6i_flags &= ~RTF_GATEWAY;
2424 
2425 	nrt->rt6i_protocol = RTPROT_REDIRECT;
2426 	nrt->rt6i_gateway = *(struct in6_addr *)neigh->primary_key;
2427 
2428 	if (ip6_ins_rt(nrt))
2429 		goto out_release;
2430 
2431 	netevent.old = &rt->dst;
2432 	netevent.new = &nrt->dst;
2433 	netevent.daddr = &msg->dest;
2434 	netevent.neigh = neigh;
2435 	call_netevent_notifiers(NETEVENT_REDIRECT, &netevent);
2436 
2437 	if (rt->rt6i_flags & RTF_CACHE) {
2438 		rt = (struct rt6_info *) dst_clone(&rt->dst);
2439 		ip6_del_rt(rt);
2440 	}
2441 
2442 out_release:
2443 	/* Release the reference taken in
2444 	 * ip6_rt_cache_alloc()
2445 	 */
2446 	dst_release(&nrt->dst);
2447 
2448 out:
2449 	neigh_release(neigh);
2450 }
2451 
2452 /*
2453  *	Misc support functions
2454  */
2455 
2456 static void rt6_set_from(struct rt6_info *rt, struct rt6_info *from)
2457 {
2458 	BUG_ON(from->dst.from);
2459 
2460 	rt->rt6i_flags &= ~RTF_EXPIRES;
2461 	dst_hold(&from->dst);
2462 	rt->dst.from = &from->dst;
2463 	dst_init_metrics(&rt->dst, dst_metrics_ptr(&from->dst), true);
2464 }
2465 
2466 static void ip6_rt_copy_init(struct rt6_info *rt, struct rt6_info *ort)
2467 {
2468 	rt->dst.input = ort->dst.input;
2469 	rt->dst.output = ort->dst.output;
2470 	rt->rt6i_dst = ort->rt6i_dst;
2471 	rt->dst.error = ort->dst.error;
2472 	rt->rt6i_idev = ort->rt6i_idev;
2473 	if (rt->rt6i_idev)
2474 		in6_dev_hold(rt->rt6i_idev);
2475 	rt->dst.lastuse = jiffies;
2476 	rt->rt6i_gateway = ort->rt6i_gateway;
2477 	rt->rt6i_flags = ort->rt6i_flags;
2478 	rt6_set_from(rt, ort);
2479 	rt->rt6i_metric = ort->rt6i_metric;
2480 #ifdef CONFIG_IPV6_SUBTREES
2481 	rt->rt6i_src = ort->rt6i_src;
2482 #endif
2483 	rt->rt6i_prefsrc = ort->rt6i_prefsrc;
2484 	rt->rt6i_table = ort->rt6i_table;
2485 	rt->dst.lwtstate = lwtstate_get(ort->dst.lwtstate);
2486 }
2487 
2488 #ifdef CONFIG_IPV6_ROUTE_INFO
2489 static struct rt6_info *rt6_get_route_info(struct net *net,
2490 					   const struct in6_addr *prefix, int prefixlen,
2491 					   const struct in6_addr *gwaddr,
2492 					   struct net_device *dev)
2493 {
2494 	u32 tb_id = l3mdev_fib_table(dev) ? : RT6_TABLE_INFO;
2495 	int ifindex = dev->ifindex;
2496 	struct fib6_node *fn;
2497 	struct rt6_info *rt = NULL;
2498 	struct fib6_table *table;
2499 
2500 	table = fib6_get_table(net, tb_id);
2501 	if (!table)
2502 		return NULL;
2503 
2504 	read_lock_bh(&table->tb6_lock);
2505 	fn = fib6_locate(&table->tb6_root, prefix, prefixlen, NULL, 0);
2506 	if (!fn)
2507 		goto out;
2508 
2509 	for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
2510 		if (rt->dst.dev->ifindex != ifindex)
2511 			continue;
2512 		if ((rt->rt6i_flags & (RTF_ROUTEINFO|RTF_GATEWAY)) != (RTF_ROUTEINFO|RTF_GATEWAY))
2513 			continue;
2514 		if (!ipv6_addr_equal(&rt->rt6i_gateway, gwaddr))
2515 			continue;
2516 		dst_hold(&rt->dst);
2517 		break;
2518 	}
2519 out:
2520 	read_unlock_bh(&table->tb6_lock);
2521 	return rt;
2522 }
2523 
2524 static struct rt6_info *rt6_add_route_info(struct net *net,
2525 					   const struct in6_addr *prefix, int prefixlen,
2526 					   const struct in6_addr *gwaddr,
2527 					   struct net_device *dev,
2528 					   unsigned int pref)
2529 {
2530 	struct fib6_config cfg = {
2531 		.fc_metric	= IP6_RT_PRIO_USER,
2532 		.fc_ifindex	= dev->ifindex,
2533 		.fc_dst_len	= prefixlen,
2534 		.fc_flags	= RTF_GATEWAY | RTF_ADDRCONF | RTF_ROUTEINFO |
2535 				  RTF_UP | RTF_PREF(pref),
2536 		.fc_protocol = RTPROT_RA,
2537 		.fc_nlinfo.portid = 0,
2538 		.fc_nlinfo.nlh = NULL,
2539 		.fc_nlinfo.nl_net = net,
2540 	};
2541 
2542 	cfg.fc_table = l3mdev_fib_table(dev) ? : RT6_TABLE_INFO,
2543 	cfg.fc_dst = *prefix;
2544 	cfg.fc_gateway = *gwaddr;
2545 
2546 	/* We should treat it as a default route if prefix length is 0. */
2547 	if (!prefixlen)
2548 		cfg.fc_flags |= RTF_DEFAULT;
2549 
2550 	ip6_route_add(&cfg, NULL);
2551 
2552 	return rt6_get_route_info(net, prefix, prefixlen, gwaddr, dev);
2553 }
2554 #endif
2555 
2556 struct rt6_info *rt6_get_dflt_router(const struct in6_addr *addr, struct net_device *dev)
2557 {
2558 	u32 tb_id = l3mdev_fib_table(dev) ? : RT6_TABLE_DFLT;
2559 	struct rt6_info *rt;
2560 	struct fib6_table *table;
2561 
2562 	table = fib6_get_table(dev_net(dev), tb_id);
2563 	if (!table)
2564 		return NULL;
2565 
2566 	read_lock_bh(&table->tb6_lock);
2567 	for (rt = table->tb6_root.leaf; rt; rt = rt->dst.rt6_next) {
2568 		if (dev == rt->dst.dev &&
2569 		    ((rt->rt6i_flags & (RTF_ADDRCONF | RTF_DEFAULT)) == (RTF_ADDRCONF | RTF_DEFAULT)) &&
2570 		    ipv6_addr_equal(&rt->rt6i_gateway, addr))
2571 			break;
2572 	}
2573 	if (rt)
2574 		dst_hold(&rt->dst);
2575 	read_unlock_bh(&table->tb6_lock);
2576 	return rt;
2577 }
2578 
2579 struct rt6_info *rt6_add_dflt_router(const struct in6_addr *gwaddr,
2580 				     struct net_device *dev,
2581 				     unsigned int pref)
2582 {
2583 	struct fib6_config cfg = {
2584 		.fc_table	= l3mdev_fib_table(dev) ? : RT6_TABLE_DFLT,
2585 		.fc_metric	= IP6_RT_PRIO_USER,
2586 		.fc_ifindex	= dev->ifindex,
2587 		.fc_flags	= RTF_GATEWAY | RTF_ADDRCONF | RTF_DEFAULT |
2588 				  RTF_UP | RTF_EXPIRES | RTF_PREF(pref),
2589 		.fc_protocol = RTPROT_RA,
2590 		.fc_nlinfo.portid = 0,
2591 		.fc_nlinfo.nlh = NULL,
2592 		.fc_nlinfo.nl_net = dev_net(dev),
2593 	};
2594 
2595 	cfg.fc_gateway = *gwaddr;
2596 
2597 	if (!ip6_route_add(&cfg, NULL)) {
2598 		struct fib6_table *table;
2599 
2600 		table = fib6_get_table(dev_net(dev), cfg.fc_table);
2601 		if (table)
2602 			table->flags |= RT6_TABLE_HAS_DFLT_ROUTER;
2603 	}
2604 
2605 	return rt6_get_dflt_router(gwaddr, dev);
2606 }
2607 
2608 static void __rt6_purge_dflt_routers(struct fib6_table *table)
2609 {
2610 	struct rt6_info *rt;
2611 
2612 restart:
2613 	read_lock_bh(&table->tb6_lock);
2614 	for (rt = table->tb6_root.leaf; rt; rt = rt->dst.rt6_next) {
2615 		if (rt->rt6i_flags & (RTF_DEFAULT | RTF_ADDRCONF) &&
2616 		    (!rt->rt6i_idev || rt->rt6i_idev->cnf.accept_ra != 2)) {
2617 			dst_hold(&rt->dst);
2618 			read_unlock_bh(&table->tb6_lock);
2619 			ip6_del_rt(rt);
2620 			goto restart;
2621 		}
2622 	}
2623 	read_unlock_bh(&table->tb6_lock);
2624 
2625 	table->flags &= ~RT6_TABLE_HAS_DFLT_ROUTER;
2626 }
2627 
2628 void rt6_purge_dflt_routers(struct net *net)
2629 {
2630 	struct fib6_table *table;
2631 	struct hlist_head *head;
2632 	unsigned int h;
2633 
2634 	rcu_read_lock();
2635 
2636 	for (h = 0; h < FIB6_TABLE_HASHSZ; h++) {
2637 		head = &net->ipv6.fib_table_hash[h];
2638 		hlist_for_each_entry_rcu(table, head, tb6_hlist) {
2639 			if (table->flags & RT6_TABLE_HAS_DFLT_ROUTER)
2640 				__rt6_purge_dflt_routers(table);
2641 		}
2642 	}
2643 
2644 	rcu_read_unlock();
2645 }
2646 
2647 static void rtmsg_to_fib6_config(struct net *net,
2648 				 struct in6_rtmsg *rtmsg,
2649 				 struct fib6_config *cfg)
2650 {
2651 	memset(cfg, 0, sizeof(*cfg));
2652 
2653 	cfg->fc_table = l3mdev_fib_table_by_index(net, rtmsg->rtmsg_ifindex) ?
2654 			 : RT6_TABLE_MAIN;
2655 	cfg->fc_ifindex = rtmsg->rtmsg_ifindex;
2656 	cfg->fc_metric = rtmsg->rtmsg_metric;
2657 	cfg->fc_expires = rtmsg->rtmsg_info;
2658 	cfg->fc_dst_len = rtmsg->rtmsg_dst_len;
2659 	cfg->fc_src_len = rtmsg->rtmsg_src_len;
2660 	cfg->fc_flags = rtmsg->rtmsg_flags;
2661 
2662 	cfg->fc_nlinfo.nl_net = net;
2663 
2664 	cfg->fc_dst = rtmsg->rtmsg_dst;
2665 	cfg->fc_src = rtmsg->rtmsg_src;
2666 	cfg->fc_gateway = rtmsg->rtmsg_gateway;
2667 }
2668 
2669 int ipv6_route_ioctl(struct net *net, unsigned int cmd, void __user *arg)
2670 {
2671 	struct fib6_config cfg;
2672 	struct in6_rtmsg rtmsg;
2673 	int err;
2674 
2675 	switch (cmd) {
2676 	case SIOCADDRT:		/* Add a route */
2677 	case SIOCDELRT:		/* Delete a route */
2678 		if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
2679 			return -EPERM;
2680 		err = copy_from_user(&rtmsg, arg,
2681 				     sizeof(struct in6_rtmsg));
2682 		if (err)
2683 			return -EFAULT;
2684 
2685 		rtmsg_to_fib6_config(net, &rtmsg, &cfg);
2686 
2687 		rtnl_lock();
2688 		switch (cmd) {
2689 		case SIOCADDRT:
2690 			err = ip6_route_add(&cfg, NULL);
2691 			break;
2692 		case SIOCDELRT:
2693 			err = ip6_route_del(&cfg, NULL);
2694 			break;
2695 		default:
2696 			err = -EINVAL;
2697 		}
2698 		rtnl_unlock();
2699 
2700 		return err;
2701 	}
2702 
2703 	return -EINVAL;
2704 }
2705 
2706 /*
2707  *	Drop the packet on the floor
2708  */
2709 
2710 static int ip6_pkt_drop(struct sk_buff *skb, u8 code, int ipstats_mib_noroutes)
2711 {
2712 	int type;
2713 	struct dst_entry *dst = skb_dst(skb);
2714 	switch (ipstats_mib_noroutes) {
2715 	case IPSTATS_MIB_INNOROUTES:
2716 		type = ipv6_addr_type(&ipv6_hdr(skb)->daddr);
2717 		if (type == IPV6_ADDR_ANY) {
2718 			IP6_INC_STATS(dev_net(dst->dev), ip6_dst_idev(dst),
2719 				      IPSTATS_MIB_INADDRERRORS);
2720 			break;
2721 		}
2722 		/* FALLTHROUGH */
2723 	case IPSTATS_MIB_OUTNOROUTES:
2724 		IP6_INC_STATS(dev_net(dst->dev), ip6_dst_idev(dst),
2725 			      ipstats_mib_noroutes);
2726 		break;
2727 	}
2728 	icmpv6_send(skb, ICMPV6_DEST_UNREACH, code, 0);
2729 	kfree_skb(skb);
2730 	return 0;
2731 }
2732 
2733 static int ip6_pkt_discard(struct sk_buff *skb)
2734 {
2735 	return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_INNOROUTES);
2736 }
2737 
2738 static int ip6_pkt_discard_out(struct net *net, struct sock *sk, struct sk_buff *skb)
2739 {
2740 	skb->dev = skb_dst(skb)->dev;
2741 	return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_OUTNOROUTES);
2742 }
2743 
2744 static int ip6_pkt_prohibit(struct sk_buff *skb)
2745 {
2746 	return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_INNOROUTES);
2747 }
2748 
2749 static int ip6_pkt_prohibit_out(struct net *net, struct sock *sk, struct sk_buff *skb)
2750 {
2751 	skb->dev = skb_dst(skb)->dev;
2752 	return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_OUTNOROUTES);
2753 }
2754 
2755 /*
2756  *	Allocate a dst for local (unicast / anycast) address.
2757  */
2758 
2759 struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev,
2760 				    const struct in6_addr *addr,
2761 				    bool anycast)
2762 {
2763 	u32 tb_id;
2764 	struct net *net = dev_net(idev->dev);
2765 	struct net_device *dev = idev->dev;
2766 	struct rt6_info *rt;
2767 
2768 	rt = ip6_dst_alloc(net, dev, DST_NOCOUNT);
2769 	if (!rt)
2770 		return ERR_PTR(-ENOMEM);
2771 
2772 	in6_dev_hold(idev);
2773 
2774 	rt->dst.flags |= DST_HOST;
2775 	rt->dst.input = ip6_input;
2776 	rt->dst.output = ip6_output;
2777 	rt->rt6i_idev = idev;
2778 
2779 	rt->rt6i_protocol = RTPROT_KERNEL;
2780 	rt->rt6i_flags = RTF_UP | RTF_NONEXTHOP;
2781 	if (anycast)
2782 		rt->rt6i_flags |= RTF_ANYCAST;
2783 	else
2784 		rt->rt6i_flags |= RTF_LOCAL;
2785 
2786 	rt->rt6i_gateway  = *addr;
2787 	rt->rt6i_dst.addr = *addr;
2788 	rt->rt6i_dst.plen = 128;
2789 	tb_id = l3mdev_fib_table(idev->dev) ? : RT6_TABLE_LOCAL;
2790 	rt->rt6i_table = fib6_get_table(net, tb_id);
2791 
2792 	return rt;
2793 }
2794 
2795 /* remove deleted ip from prefsrc entries */
2796 struct arg_dev_net_ip {
2797 	struct net_device *dev;
2798 	struct net *net;
2799 	struct in6_addr *addr;
2800 };
2801 
2802 static int fib6_remove_prefsrc(struct rt6_info *rt, void *arg)
2803 {
2804 	struct net_device *dev = ((struct arg_dev_net_ip *)arg)->dev;
2805 	struct net *net = ((struct arg_dev_net_ip *)arg)->net;
2806 	struct in6_addr *addr = ((struct arg_dev_net_ip *)arg)->addr;
2807 
2808 	if (((void *)rt->dst.dev == dev || !dev) &&
2809 	    rt != net->ipv6.ip6_null_entry &&
2810 	    ipv6_addr_equal(addr, &rt->rt6i_prefsrc.addr)) {
2811 		/* remove prefsrc entry */
2812 		rt->rt6i_prefsrc.plen = 0;
2813 	}
2814 	return 0;
2815 }
2816 
2817 void rt6_remove_prefsrc(struct inet6_ifaddr *ifp)
2818 {
2819 	struct net *net = dev_net(ifp->idev->dev);
2820 	struct arg_dev_net_ip adni = {
2821 		.dev = ifp->idev->dev,
2822 		.net = net,
2823 		.addr = &ifp->addr,
2824 	};
2825 	fib6_clean_all(net, fib6_remove_prefsrc, &adni);
2826 }
2827 
2828 #define RTF_RA_ROUTER		(RTF_ADDRCONF | RTF_DEFAULT | RTF_GATEWAY)
2829 #define RTF_CACHE_GATEWAY	(RTF_GATEWAY | RTF_CACHE)
2830 
2831 /* Remove routers and update dst entries when gateway turn into host. */
2832 static int fib6_clean_tohost(struct rt6_info *rt, void *arg)
2833 {
2834 	struct in6_addr *gateway = (struct in6_addr *)arg;
2835 
2836 	if ((((rt->rt6i_flags & RTF_RA_ROUTER) == RTF_RA_ROUTER) ||
2837 	     ((rt->rt6i_flags & RTF_CACHE_GATEWAY) == RTF_CACHE_GATEWAY)) &&
2838 	     ipv6_addr_equal(gateway, &rt->rt6i_gateway)) {
2839 		return -1;
2840 	}
2841 	return 0;
2842 }
2843 
2844 void rt6_clean_tohost(struct net *net, struct in6_addr *gateway)
2845 {
2846 	fib6_clean_all(net, fib6_clean_tohost, gateway);
2847 }
2848 
2849 struct arg_dev_net {
2850 	struct net_device *dev;
2851 	struct net *net;
2852 };
2853 
2854 /* called with write lock held for table with rt */
2855 static int fib6_ifdown(struct rt6_info *rt, void *arg)
2856 {
2857 	const struct arg_dev_net *adn = arg;
2858 	const struct net_device *dev = adn->dev;
2859 
2860 	if ((rt->dst.dev == dev || !dev) &&
2861 	    rt != adn->net->ipv6.ip6_null_entry &&
2862 	    (rt->rt6i_nsiblings == 0 ||
2863 	     (dev && netdev_unregistering(dev)) ||
2864 	     !rt->rt6i_idev->cnf.ignore_routes_with_linkdown))
2865 		return -1;
2866 
2867 	return 0;
2868 }
2869 
2870 void rt6_ifdown(struct net *net, struct net_device *dev)
2871 {
2872 	struct arg_dev_net adn = {
2873 		.dev = dev,
2874 		.net = net,
2875 	};
2876 
2877 	fib6_clean_all(net, fib6_ifdown, &adn);
2878 	if (dev)
2879 		rt6_uncached_list_flush_dev(net, dev);
2880 }
2881 
2882 struct rt6_mtu_change_arg {
2883 	struct net_device *dev;
2884 	unsigned int mtu;
2885 };
2886 
2887 static int rt6_mtu_change_route(struct rt6_info *rt, void *p_arg)
2888 {
2889 	struct rt6_mtu_change_arg *arg = (struct rt6_mtu_change_arg *) p_arg;
2890 	struct inet6_dev *idev;
2891 
2892 	/* In IPv6 pmtu discovery is not optional,
2893 	   so that RTAX_MTU lock cannot disable it.
2894 	   We still use this lock to block changes
2895 	   caused by addrconf/ndisc.
2896 	*/
2897 
2898 	idev = __in6_dev_get(arg->dev);
2899 	if (!idev)
2900 		return 0;
2901 
2902 	/* For administrative MTU increase, there is no way to discover
2903 	   IPv6 PMTU increase, so PMTU increase should be updated here.
2904 	   Since RFC 1981 doesn't include administrative MTU increase
2905 	   update PMTU increase is a MUST. (i.e. jumbo frame)
2906 	 */
2907 	/*
2908 	   If new MTU is less than route PMTU, this new MTU will be the
2909 	   lowest MTU in the path, update the route PMTU to reflect PMTU
2910 	   decreases; if new MTU is greater than route PMTU, and the
2911 	   old MTU is the lowest MTU in the path, update the route PMTU
2912 	   to reflect the increase. In this case if the other nodes' MTU
2913 	   also have the lowest MTU, TOO BIG MESSAGE will be lead to
2914 	   PMTU discovery.
2915 	 */
2916 	if (rt->dst.dev == arg->dev &&
2917 	    dst_metric_raw(&rt->dst, RTAX_MTU) &&
2918 	    !dst_metric_locked(&rt->dst, RTAX_MTU)) {
2919 		if (rt->rt6i_flags & RTF_CACHE) {
2920 			/* For RTF_CACHE with rt6i_pmtu == 0
2921 			 * (i.e. a redirected route),
2922 			 * the metrics of its rt->dst.from has already
2923 			 * been updated.
2924 			 */
2925 			if (rt->rt6i_pmtu && rt->rt6i_pmtu > arg->mtu)
2926 				rt->rt6i_pmtu = arg->mtu;
2927 		} else if (dst_mtu(&rt->dst) >= arg->mtu ||
2928 			   (dst_mtu(&rt->dst) < arg->mtu &&
2929 			    dst_mtu(&rt->dst) == idev->cnf.mtu6)) {
2930 			dst_metric_set(&rt->dst, RTAX_MTU, arg->mtu);
2931 		}
2932 	}
2933 	return 0;
2934 }
2935 
2936 void rt6_mtu_change(struct net_device *dev, unsigned int mtu)
2937 {
2938 	struct rt6_mtu_change_arg arg = {
2939 		.dev = dev,
2940 		.mtu = mtu,
2941 	};
2942 
2943 	fib6_clean_all(dev_net(dev), rt6_mtu_change_route, &arg);
2944 }
2945 
2946 static const struct nla_policy rtm_ipv6_policy[RTA_MAX+1] = {
2947 	[RTA_GATEWAY]           = { .len = sizeof(struct in6_addr) },
2948 	[RTA_OIF]               = { .type = NLA_U32 },
2949 	[RTA_IIF]		= { .type = NLA_U32 },
2950 	[RTA_PRIORITY]          = { .type = NLA_U32 },
2951 	[RTA_METRICS]           = { .type = NLA_NESTED },
2952 	[RTA_MULTIPATH]		= { .len = sizeof(struct rtnexthop) },
2953 	[RTA_PREF]              = { .type = NLA_U8 },
2954 	[RTA_ENCAP_TYPE]	= { .type = NLA_U16 },
2955 	[RTA_ENCAP]		= { .type = NLA_NESTED },
2956 	[RTA_EXPIRES]		= { .type = NLA_U32 },
2957 	[RTA_UID]		= { .type = NLA_U32 },
2958 	[RTA_MARK]		= { .type = NLA_U32 },
2959 };
2960 
2961 static int rtm_to_fib6_config(struct sk_buff *skb, struct nlmsghdr *nlh,
2962 			      struct fib6_config *cfg,
2963 			      struct netlink_ext_ack *extack)
2964 {
2965 	struct rtmsg *rtm;
2966 	struct nlattr *tb[RTA_MAX+1];
2967 	unsigned int pref;
2968 	int err;
2969 
2970 	err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy,
2971 			  NULL);
2972 	if (err < 0)
2973 		goto errout;
2974 
2975 	err = -EINVAL;
2976 	rtm = nlmsg_data(nlh);
2977 	memset(cfg, 0, sizeof(*cfg));
2978 
2979 	cfg->fc_table = rtm->rtm_table;
2980 	cfg->fc_dst_len = rtm->rtm_dst_len;
2981 	cfg->fc_src_len = rtm->rtm_src_len;
2982 	cfg->fc_flags = RTF_UP;
2983 	cfg->fc_protocol = rtm->rtm_protocol;
2984 	cfg->fc_type = rtm->rtm_type;
2985 
2986 	if (rtm->rtm_type == RTN_UNREACHABLE ||
2987 	    rtm->rtm_type == RTN_BLACKHOLE ||
2988 	    rtm->rtm_type == RTN_PROHIBIT ||
2989 	    rtm->rtm_type == RTN_THROW)
2990 		cfg->fc_flags |= RTF_REJECT;
2991 
2992 	if (rtm->rtm_type == RTN_LOCAL)
2993 		cfg->fc_flags |= RTF_LOCAL;
2994 
2995 	if (rtm->rtm_flags & RTM_F_CLONED)
2996 		cfg->fc_flags |= RTF_CACHE;
2997 
2998 	cfg->fc_nlinfo.portid = NETLINK_CB(skb).portid;
2999 	cfg->fc_nlinfo.nlh = nlh;
3000 	cfg->fc_nlinfo.nl_net = sock_net(skb->sk);
3001 
3002 	if (tb[RTA_GATEWAY]) {
3003 		cfg->fc_gateway = nla_get_in6_addr(tb[RTA_GATEWAY]);
3004 		cfg->fc_flags |= RTF_GATEWAY;
3005 	}
3006 
3007 	if (tb[RTA_DST]) {
3008 		int plen = (rtm->rtm_dst_len + 7) >> 3;
3009 
3010 		if (nla_len(tb[RTA_DST]) < plen)
3011 			goto errout;
3012 
3013 		nla_memcpy(&cfg->fc_dst, tb[RTA_DST], plen);
3014 	}
3015 
3016 	if (tb[RTA_SRC]) {
3017 		int plen = (rtm->rtm_src_len + 7) >> 3;
3018 
3019 		if (nla_len(tb[RTA_SRC]) < plen)
3020 			goto errout;
3021 
3022 		nla_memcpy(&cfg->fc_src, tb[RTA_SRC], plen);
3023 	}
3024 
3025 	if (tb[RTA_PREFSRC])
3026 		cfg->fc_prefsrc = nla_get_in6_addr(tb[RTA_PREFSRC]);
3027 
3028 	if (tb[RTA_OIF])
3029 		cfg->fc_ifindex = nla_get_u32(tb[RTA_OIF]);
3030 
3031 	if (tb[RTA_PRIORITY])
3032 		cfg->fc_metric = nla_get_u32(tb[RTA_PRIORITY]);
3033 
3034 	if (tb[RTA_METRICS]) {
3035 		cfg->fc_mx = nla_data(tb[RTA_METRICS]);
3036 		cfg->fc_mx_len = nla_len(tb[RTA_METRICS]);
3037 	}
3038 
3039 	if (tb[RTA_TABLE])
3040 		cfg->fc_table = nla_get_u32(tb[RTA_TABLE]);
3041 
3042 	if (tb[RTA_MULTIPATH]) {
3043 		cfg->fc_mp = nla_data(tb[RTA_MULTIPATH]);
3044 		cfg->fc_mp_len = nla_len(tb[RTA_MULTIPATH]);
3045 
3046 		err = lwtunnel_valid_encap_type_attr(cfg->fc_mp,
3047 						     cfg->fc_mp_len, extack);
3048 		if (err < 0)
3049 			goto errout;
3050 	}
3051 
3052 	if (tb[RTA_PREF]) {
3053 		pref = nla_get_u8(tb[RTA_PREF]);
3054 		if (pref != ICMPV6_ROUTER_PREF_LOW &&
3055 		    pref != ICMPV6_ROUTER_PREF_HIGH)
3056 			pref = ICMPV6_ROUTER_PREF_MEDIUM;
3057 		cfg->fc_flags |= RTF_PREF(pref);
3058 	}
3059 
3060 	if (tb[RTA_ENCAP])
3061 		cfg->fc_encap = tb[RTA_ENCAP];
3062 
3063 	if (tb[RTA_ENCAP_TYPE]) {
3064 		cfg->fc_encap_type = nla_get_u16(tb[RTA_ENCAP_TYPE]);
3065 
3066 		err = lwtunnel_valid_encap_type(cfg->fc_encap_type, extack);
3067 		if (err < 0)
3068 			goto errout;
3069 	}
3070 
3071 	if (tb[RTA_EXPIRES]) {
3072 		unsigned long timeout = addrconf_timeout_fixup(nla_get_u32(tb[RTA_EXPIRES]), HZ);
3073 
3074 		if (addrconf_finite_timeout(timeout)) {
3075 			cfg->fc_expires = jiffies_to_clock_t(timeout * HZ);
3076 			cfg->fc_flags |= RTF_EXPIRES;
3077 		}
3078 	}
3079 
3080 	err = 0;
3081 errout:
3082 	return err;
3083 }
3084 
3085 struct rt6_nh {
3086 	struct rt6_info *rt6_info;
3087 	struct fib6_config r_cfg;
3088 	struct mx6_config mxc;
3089 	struct list_head next;
3090 };
3091 
3092 static void ip6_print_replace_route_err(struct list_head *rt6_nh_list)
3093 {
3094 	struct rt6_nh *nh;
3095 
3096 	list_for_each_entry(nh, rt6_nh_list, next) {
3097 		pr_warn("IPV6: multipath route replace failed (check consistency of installed routes): %pI6c nexthop %pI6c ifi %d\n",
3098 		        &nh->r_cfg.fc_dst, &nh->r_cfg.fc_gateway,
3099 		        nh->r_cfg.fc_ifindex);
3100 	}
3101 }
3102 
3103 static int ip6_route_info_append(struct list_head *rt6_nh_list,
3104 				 struct rt6_info *rt, struct fib6_config *r_cfg)
3105 {
3106 	struct rt6_nh *nh;
3107 	int err = -EEXIST;
3108 
3109 	list_for_each_entry(nh, rt6_nh_list, next) {
3110 		/* check if rt6_info already exists */
3111 		if (rt6_duplicate_nexthop(nh->rt6_info, rt))
3112 			return err;
3113 	}
3114 
3115 	nh = kzalloc(sizeof(*nh), GFP_KERNEL);
3116 	if (!nh)
3117 		return -ENOMEM;
3118 	nh->rt6_info = rt;
3119 	err = ip6_convert_metrics(&nh->mxc, r_cfg);
3120 	if (err) {
3121 		kfree(nh);
3122 		return err;
3123 	}
3124 	memcpy(&nh->r_cfg, r_cfg, sizeof(*r_cfg));
3125 	list_add_tail(&nh->next, rt6_nh_list);
3126 
3127 	return 0;
3128 }
3129 
3130 static void ip6_route_mpath_notify(struct rt6_info *rt,
3131 				   struct rt6_info *rt_last,
3132 				   struct nl_info *info,
3133 				   __u16 nlflags)
3134 {
3135 	/* if this is an APPEND route, then rt points to the first route
3136 	 * inserted and rt_last points to last route inserted. Userspace
3137 	 * wants a consistent dump of the route which starts at the first
3138 	 * nexthop. Since sibling routes are always added at the end of
3139 	 * the list, find the first sibling of the last route appended
3140 	 */
3141 	if ((nlflags & NLM_F_APPEND) && rt_last && rt_last->rt6i_nsiblings) {
3142 		rt = list_first_entry(&rt_last->rt6i_siblings,
3143 				      struct rt6_info,
3144 				      rt6i_siblings);
3145 	}
3146 
3147 	if (rt)
3148 		inet6_rt_notify(RTM_NEWROUTE, rt, info, nlflags);
3149 }
3150 
3151 static int ip6_route_multipath_add(struct fib6_config *cfg,
3152 				   struct netlink_ext_ack *extack)
3153 {
3154 	struct rt6_info *rt_notif = NULL, *rt_last = NULL;
3155 	struct nl_info *info = &cfg->fc_nlinfo;
3156 	struct fib6_config r_cfg;
3157 	struct rtnexthop *rtnh;
3158 	struct rt6_info *rt;
3159 	struct rt6_nh *err_nh;
3160 	struct rt6_nh *nh, *nh_safe;
3161 	__u16 nlflags;
3162 	int remaining;
3163 	int attrlen;
3164 	int err = 1;
3165 	int nhn = 0;
3166 	int replace = (cfg->fc_nlinfo.nlh &&
3167 		       (cfg->fc_nlinfo.nlh->nlmsg_flags & NLM_F_REPLACE));
3168 	LIST_HEAD(rt6_nh_list);
3169 
3170 	nlflags = replace ? NLM_F_REPLACE : NLM_F_CREATE;
3171 	if (info->nlh && info->nlh->nlmsg_flags & NLM_F_APPEND)
3172 		nlflags |= NLM_F_APPEND;
3173 
3174 	remaining = cfg->fc_mp_len;
3175 	rtnh = (struct rtnexthop *)cfg->fc_mp;
3176 
3177 	/* Parse a Multipath Entry and build a list (rt6_nh_list) of
3178 	 * rt6_info structs per nexthop
3179 	 */
3180 	while (rtnh_ok(rtnh, remaining)) {
3181 		memcpy(&r_cfg, cfg, sizeof(*cfg));
3182 		if (rtnh->rtnh_ifindex)
3183 			r_cfg.fc_ifindex = rtnh->rtnh_ifindex;
3184 
3185 		attrlen = rtnh_attrlen(rtnh);
3186 		if (attrlen > 0) {
3187 			struct nlattr *nla, *attrs = rtnh_attrs(rtnh);
3188 
3189 			nla = nla_find(attrs, attrlen, RTA_GATEWAY);
3190 			if (nla) {
3191 				r_cfg.fc_gateway = nla_get_in6_addr(nla);
3192 				r_cfg.fc_flags |= RTF_GATEWAY;
3193 			}
3194 			r_cfg.fc_encap = nla_find(attrs, attrlen, RTA_ENCAP);
3195 			nla = nla_find(attrs, attrlen, RTA_ENCAP_TYPE);
3196 			if (nla)
3197 				r_cfg.fc_encap_type = nla_get_u16(nla);
3198 		}
3199 
3200 		rt = ip6_route_info_create(&r_cfg, extack);
3201 		if (IS_ERR(rt)) {
3202 			err = PTR_ERR(rt);
3203 			rt = NULL;
3204 			goto cleanup;
3205 		}
3206 
3207 		err = ip6_route_info_append(&rt6_nh_list, rt, &r_cfg);
3208 		if (err) {
3209 			dst_release_immediate(&rt->dst);
3210 			goto cleanup;
3211 		}
3212 
3213 		rtnh = rtnh_next(rtnh, &remaining);
3214 	}
3215 
3216 	/* for add and replace send one notification with all nexthops.
3217 	 * Skip the notification in fib6_add_rt2node and send one with
3218 	 * the full route when done
3219 	 */
3220 	info->skip_notify = 1;
3221 
3222 	err_nh = NULL;
3223 	list_for_each_entry(nh, &rt6_nh_list, next) {
3224 		rt_last = nh->rt6_info;
3225 		err = __ip6_ins_rt(nh->rt6_info, info, &nh->mxc, extack);
3226 		/* save reference to first route for notification */
3227 		if (!rt_notif && !err)
3228 			rt_notif = nh->rt6_info;
3229 
3230 		/* nh->rt6_info is used or freed at this point, reset to NULL*/
3231 		nh->rt6_info = NULL;
3232 		if (err) {
3233 			if (replace && nhn)
3234 				ip6_print_replace_route_err(&rt6_nh_list);
3235 			err_nh = nh;
3236 			goto add_errout;
3237 		}
3238 
3239 		/* Because each route is added like a single route we remove
3240 		 * these flags after the first nexthop: if there is a collision,
3241 		 * we have already failed to add the first nexthop:
3242 		 * fib6_add_rt2node() has rejected it; when replacing, old
3243 		 * nexthops have been replaced by first new, the rest should
3244 		 * be added to it.
3245 		 */
3246 		cfg->fc_nlinfo.nlh->nlmsg_flags &= ~(NLM_F_EXCL |
3247 						     NLM_F_REPLACE);
3248 		nhn++;
3249 	}
3250 
3251 	/* success ... tell user about new route */
3252 	ip6_route_mpath_notify(rt_notif, rt_last, info, nlflags);
3253 	goto cleanup;
3254 
3255 add_errout:
3256 	/* send notification for routes that were added so that
3257 	 * the delete notifications sent by ip6_route_del are
3258 	 * coherent
3259 	 */
3260 	if (rt_notif)
3261 		ip6_route_mpath_notify(rt_notif, rt_last, info, nlflags);
3262 
3263 	/* Delete routes that were already added */
3264 	list_for_each_entry(nh, &rt6_nh_list, next) {
3265 		if (err_nh == nh)
3266 			break;
3267 		ip6_route_del(&nh->r_cfg, extack);
3268 	}
3269 
3270 cleanup:
3271 	list_for_each_entry_safe(nh, nh_safe, &rt6_nh_list, next) {
3272 		if (nh->rt6_info)
3273 			dst_release_immediate(&nh->rt6_info->dst);
3274 		kfree(nh->mxc.mx);
3275 		list_del(&nh->next);
3276 		kfree(nh);
3277 	}
3278 
3279 	return err;
3280 }
3281 
3282 static int ip6_route_multipath_del(struct fib6_config *cfg,
3283 				   struct netlink_ext_ack *extack)
3284 {
3285 	struct fib6_config r_cfg;
3286 	struct rtnexthop *rtnh;
3287 	int remaining;
3288 	int attrlen;
3289 	int err = 1, last_err = 0;
3290 
3291 	remaining = cfg->fc_mp_len;
3292 	rtnh = (struct rtnexthop *)cfg->fc_mp;
3293 
3294 	/* Parse a Multipath Entry */
3295 	while (rtnh_ok(rtnh, remaining)) {
3296 		memcpy(&r_cfg, cfg, sizeof(*cfg));
3297 		if (rtnh->rtnh_ifindex)
3298 			r_cfg.fc_ifindex = rtnh->rtnh_ifindex;
3299 
3300 		attrlen = rtnh_attrlen(rtnh);
3301 		if (attrlen > 0) {
3302 			struct nlattr *nla, *attrs = rtnh_attrs(rtnh);
3303 
3304 			nla = nla_find(attrs, attrlen, RTA_GATEWAY);
3305 			if (nla) {
3306 				nla_memcpy(&r_cfg.fc_gateway, nla, 16);
3307 				r_cfg.fc_flags |= RTF_GATEWAY;
3308 			}
3309 		}
3310 		err = ip6_route_del(&r_cfg, extack);
3311 		if (err)
3312 			last_err = err;
3313 
3314 		rtnh = rtnh_next(rtnh, &remaining);
3315 	}
3316 
3317 	return last_err;
3318 }
3319 
3320 static int inet6_rtm_delroute(struct sk_buff *skb, struct nlmsghdr *nlh,
3321 			      struct netlink_ext_ack *extack)
3322 {
3323 	struct fib6_config cfg;
3324 	int err;
3325 
3326 	err = rtm_to_fib6_config(skb, nlh, &cfg, extack);
3327 	if (err < 0)
3328 		return err;
3329 
3330 	if (cfg.fc_mp)
3331 		return ip6_route_multipath_del(&cfg, extack);
3332 	else {
3333 		cfg.fc_delete_all_nh = 1;
3334 		return ip6_route_del(&cfg, extack);
3335 	}
3336 }
3337 
3338 static int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr *nlh,
3339 			      struct netlink_ext_ack *extack)
3340 {
3341 	struct fib6_config cfg;
3342 	int err;
3343 
3344 	err = rtm_to_fib6_config(skb, nlh, &cfg, extack);
3345 	if (err < 0)
3346 		return err;
3347 
3348 	if (cfg.fc_mp)
3349 		return ip6_route_multipath_add(&cfg, extack);
3350 	else
3351 		return ip6_route_add(&cfg, extack);
3352 }
3353 
3354 static size_t rt6_nlmsg_size(struct rt6_info *rt)
3355 {
3356 	int nexthop_len = 0;
3357 
3358 	if (rt->rt6i_nsiblings) {
3359 		nexthop_len = nla_total_size(0)	 /* RTA_MULTIPATH */
3360 			    + NLA_ALIGN(sizeof(struct rtnexthop))
3361 			    + nla_total_size(16) /* RTA_GATEWAY */
3362 			    + lwtunnel_get_encap_size(rt->dst.lwtstate);
3363 
3364 		nexthop_len *= rt->rt6i_nsiblings;
3365 	}
3366 
3367 	return NLMSG_ALIGN(sizeof(struct rtmsg))
3368 	       + nla_total_size(16) /* RTA_SRC */
3369 	       + nla_total_size(16) /* RTA_DST */
3370 	       + nla_total_size(16) /* RTA_GATEWAY */
3371 	       + nla_total_size(16) /* RTA_PREFSRC */
3372 	       + nla_total_size(4) /* RTA_TABLE */
3373 	       + nla_total_size(4) /* RTA_IIF */
3374 	       + nla_total_size(4) /* RTA_OIF */
3375 	       + nla_total_size(4) /* RTA_PRIORITY */
3376 	       + RTAX_MAX * nla_total_size(4) /* RTA_METRICS */
3377 	       + nla_total_size(sizeof(struct rta_cacheinfo))
3378 	       + nla_total_size(TCP_CA_NAME_MAX) /* RTAX_CC_ALGO */
3379 	       + nla_total_size(1) /* RTA_PREF */
3380 	       + lwtunnel_get_encap_size(rt->dst.lwtstate)
3381 	       + nexthop_len;
3382 }
3383 
3384 static int rt6_nexthop_info(struct sk_buff *skb, struct rt6_info *rt,
3385 			    unsigned int *flags, bool skip_oif)
3386 {
3387 	if (!netif_running(rt->dst.dev) || !netif_carrier_ok(rt->dst.dev)) {
3388 		*flags |= RTNH_F_LINKDOWN;
3389 		if (rt->rt6i_idev->cnf.ignore_routes_with_linkdown)
3390 			*flags |= RTNH_F_DEAD;
3391 	}
3392 
3393 	if (rt->rt6i_flags & RTF_GATEWAY) {
3394 		if (nla_put_in6_addr(skb, RTA_GATEWAY, &rt->rt6i_gateway) < 0)
3395 			goto nla_put_failure;
3396 	}
3397 
3398 	if (rt->rt6i_nh_flags & RTNH_F_OFFLOAD)
3399 		*flags |= RTNH_F_OFFLOAD;
3400 
3401 	/* not needed for multipath encoding b/c it has a rtnexthop struct */
3402 	if (!skip_oif && rt->dst.dev &&
3403 	    nla_put_u32(skb, RTA_OIF, rt->dst.dev->ifindex))
3404 		goto nla_put_failure;
3405 
3406 	if (rt->dst.lwtstate &&
3407 	    lwtunnel_fill_encap(skb, rt->dst.lwtstate) < 0)
3408 		goto nla_put_failure;
3409 
3410 	return 0;
3411 
3412 nla_put_failure:
3413 	return -EMSGSIZE;
3414 }
3415 
3416 /* add multipath next hop */
3417 static int rt6_add_nexthop(struct sk_buff *skb, struct rt6_info *rt)
3418 {
3419 	struct rtnexthop *rtnh;
3420 	unsigned int flags = 0;
3421 
3422 	rtnh = nla_reserve_nohdr(skb, sizeof(*rtnh));
3423 	if (!rtnh)
3424 		goto nla_put_failure;
3425 
3426 	rtnh->rtnh_hops = 0;
3427 	rtnh->rtnh_ifindex = rt->dst.dev ? rt->dst.dev->ifindex : 0;
3428 
3429 	if (rt6_nexthop_info(skb, rt, &flags, true) < 0)
3430 		goto nla_put_failure;
3431 
3432 	rtnh->rtnh_flags = flags;
3433 
3434 	/* length of rtnetlink header + attributes */
3435 	rtnh->rtnh_len = nlmsg_get_pos(skb) - (void *)rtnh;
3436 
3437 	return 0;
3438 
3439 nla_put_failure:
3440 	return -EMSGSIZE;
3441 }
3442 
3443 static int rt6_fill_node(struct net *net,
3444 			 struct sk_buff *skb, struct rt6_info *rt,
3445 			 struct in6_addr *dst, struct in6_addr *src,
3446 			 int iif, int type, u32 portid, u32 seq,
3447 			 unsigned int flags)
3448 {
3449 	u32 metrics[RTAX_MAX];
3450 	struct rtmsg *rtm;
3451 	struct nlmsghdr *nlh;
3452 	long expires;
3453 	u32 table;
3454 
3455 	nlh = nlmsg_put(skb, portid, seq, type, sizeof(*rtm), flags);
3456 	if (!nlh)
3457 		return -EMSGSIZE;
3458 
3459 	rtm = nlmsg_data(nlh);
3460 	rtm->rtm_family = AF_INET6;
3461 	rtm->rtm_dst_len = rt->rt6i_dst.plen;
3462 	rtm->rtm_src_len = rt->rt6i_src.plen;
3463 	rtm->rtm_tos = 0;
3464 	if (rt->rt6i_table)
3465 		table = rt->rt6i_table->tb6_id;
3466 	else
3467 		table = RT6_TABLE_UNSPEC;
3468 	rtm->rtm_table = table;
3469 	if (nla_put_u32(skb, RTA_TABLE, table))
3470 		goto nla_put_failure;
3471 	if (rt->rt6i_flags & RTF_REJECT) {
3472 		switch (rt->dst.error) {
3473 		case -EINVAL:
3474 			rtm->rtm_type = RTN_BLACKHOLE;
3475 			break;
3476 		case -EACCES:
3477 			rtm->rtm_type = RTN_PROHIBIT;
3478 			break;
3479 		case -EAGAIN:
3480 			rtm->rtm_type = RTN_THROW;
3481 			break;
3482 		default:
3483 			rtm->rtm_type = RTN_UNREACHABLE;
3484 			break;
3485 		}
3486 	}
3487 	else if (rt->rt6i_flags & RTF_LOCAL)
3488 		rtm->rtm_type = RTN_LOCAL;
3489 	else if (rt->rt6i_flags & RTF_ANYCAST)
3490 		rtm->rtm_type = RTN_ANYCAST;
3491 	else if (rt->dst.dev && (rt->dst.dev->flags & IFF_LOOPBACK))
3492 		rtm->rtm_type = RTN_LOCAL;
3493 	else
3494 		rtm->rtm_type = RTN_UNICAST;
3495 	rtm->rtm_flags = 0;
3496 	rtm->rtm_scope = RT_SCOPE_UNIVERSE;
3497 	rtm->rtm_protocol = rt->rt6i_protocol;
3498 
3499 	if (rt->rt6i_flags & RTF_CACHE)
3500 		rtm->rtm_flags |= RTM_F_CLONED;
3501 
3502 	if (dst) {
3503 		if (nla_put_in6_addr(skb, RTA_DST, dst))
3504 			goto nla_put_failure;
3505 		rtm->rtm_dst_len = 128;
3506 	} else if (rtm->rtm_dst_len)
3507 		if (nla_put_in6_addr(skb, RTA_DST, &rt->rt6i_dst.addr))
3508 			goto nla_put_failure;
3509 #ifdef CONFIG_IPV6_SUBTREES
3510 	if (src) {
3511 		if (nla_put_in6_addr(skb, RTA_SRC, src))
3512 			goto nla_put_failure;
3513 		rtm->rtm_src_len = 128;
3514 	} else if (rtm->rtm_src_len &&
3515 		   nla_put_in6_addr(skb, RTA_SRC, &rt->rt6i_src.addr))
3516 		goto nla_put_failure;
3517 #endif
3518 	if (iif) {
3519 #ifdef CONFIG_IPV6_MROUTE
3520 		if (ipv6_addr_is_multicast(&rt->rt6i_dst.addr)) {
3521 			int err = ip6mr_get_route(net, skb, rtm, portid);
3522 
3523 			if (err == 0)
3524 				return 0;
3525 			if (err < 0)
3526 				goto nla_put_failure;
3527 		} else
3528 #endif
3529 			if (nla_put_u32(skb, RTA_IIF, iif))
3530 				goto nla_put_failure;
3531 	} else if (dst) {
3532 		struct in6_addr saddr_buf;
3533 		if (ip6_route_get_saddr(net, rt, dst, 0, &saddr_buf) == 0 &&
3534 		    nla_put_in6_addr(skb, RTA_PREFSRC, &saddr_buf))
3535 			goto nla_put_failure;
3536 	}
3537 
3538 	if (rt->rt6i_prefsrc.plen) {
3539 		struct in6_addr saddr_buf;
3540 		saddr_buf = rt->rt6i_prefsrc.addr;
3541 		if (nla_put_in6_addr(skb, RTA_PREFSRC, &saddr_buf))
3542 			goto nla_put_failure;
3543 	}
3544 
3545 	memcpy(metrics, dst_metrics_ptr(&rt->dst), sizeof(metrics));
3546 	if (rt->rt6i_pmtu)
3547 		metrics[RTAX_MTU - 1] = rt->rt6i_pmtu;
3548 	if (rtnetlink_put_metrics(skb, metrics) < 0)
3549 		goto nla_put_failure;
3550 
3551 	if (nla_put_u32(skb, RTA_PRIORITY, rt->rt6i_metric))
3552 		goto nla_put_failure;
3553 
3554 	/* For multipath routes, walk the siblings list and add
3555 	 * each as a nexthop within RTA_MULTIPATH.
3556 	 */
3557 	if (rt->rt6i_nsiblings) {
3558 		struct rt6_info *sibling, *next_sibling;
3559 		struct nlattr *mp;
3560 
3561 		mp = nla_nest_start(skb, RTA_MULTIPATH);
3562 		if (!mp)
3563 			goto nla_put_failure;
3564 
3565 		if (rt6_add_nexthop(skb, rt) < 0)
3566 			goto nla_put_failure;
3567 
3568 		list_for_each_entry_safe(sibling, next_sibling,
3569 					 &rt->rt6i_siblings, rt6i_siblings) {
3570 			if (rt6_add_nexthop(skb, sibling) < 0)
3571 				goto nla_put_failure;
3572 		}
3573 
3574 		nla_nest_end(skb, mp);
3575 	} else {
3576 		if (rt6_nexthop_info(skb, rt, &rtm->rtm_flags, false) < 0)
3577 			goto nla_put_failure;
3578 	}
3579 
3580 	expires = (rt->rt6i_flags & RTF_EXPIRES) ? rt->dst.expires - jiffies : 0;
3581 
3582 	if (rtnl_put_cacheinfo(skb, &rt->dst, 0, expires, rt->dst.error) < 0)
3583 		goto nla_put_failure;
3584 
3585 	if (nla_put_u8(skb, RTA_PREF, IPV6_EXTRACT_PREF(rt->rt6i_flags)))
3586 		goto nla_put_failure;
3587 
3588 
3589 	nlmsg_end(skb, nlh);
3590 	return 0;
3591 
3592 nla_put_failure:
3593 	nlmsg_cancel(skb, nlh);
3594 	return -EMSGSIZE;
3595 }
3596 
3597 int rt6_dump_route(struct rt6_info *rt, void *p_arg)
3598 {
3599 	struct rt6_rtnl_dump_arg *arg = (struct rt6_rtnl_dump_arg *) p_arg;
3600 	struct net *net = arg->net;
3601 
3602 	if (rt == net->ipv6.ip6_null_entry)
3603 		return 0;
3604 
3605 	if (nlmsg_len(arg->cb->nlh) >= sizeof(struct rtmsg)) {
3606 		struct rtmsg *rtm = nlmsg_data(arg->cb->nlh);
3607 
3608 		/* user wants prefix routes only */
3609 		if (rtm->rtm_flags & RTM_F_PREFIX &&
3610 		    !(rt->rt6i_flags & RTF_PREFIX_RT)) {
3611 			/* success since this is not a prefix route */
3612 			return 1;
3613 		}
3614 	}
3615 
3616 	return rt6_fill_node(net,
3617 		     arg->skb, rt, NULL, NULL, 0, RTM_NEWROUTE,
3618 		     NETLINK_CB(arg->cb->skb).portid, arg->cb->nlh->nlmsg_seq,
3619 		     NLM_F_MULTI);
3620 }
3621 
3622 static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh,
3623 			      struct netlink_ext_ack *extack)
3624 {
3625 	struct net *net = sock_net(in_skb->sk);
3626 	struct nlattr *tb[RTA_MAX+1];
3627 	int err, iif = 0, oif = 0;
3628 	struct dst_entry *dst;
3629 	struct rt6_info *rt;
3630 	struct sk_buff *skb;
3631 	struct rtmsg *rtm;
3632 	struct flowi6 fl6;
3633 	bool fibmatch;
3634 
3635 	err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy,
3636 			  extack);
3637 	if (err < 0)
3638 		goto errout;
3639 
3640 	err = -EINVAL;
3641 	memset(&fl6, 0, sizeof(fl6));
3642 	rtm = nlmsg_data(nlh);
3643 	fl6.flowlabel = ip6_make_flowinfo(rtm->rtm_tos, 0);
3644 	fibmatch = !!(rtm->rtm_flags & RTM_F_FIB_MATCH);
3645 
3646 	if (tb[RTA_SRC]) {
3647 		if (nla_len(tb[RTA_SRC]) < sizeof(struct in6_addr))
3648 			goto errout;
3649 
3650 		fl6.saddr = *(struct in6_addr *)nla_data(tb[RTA_SRC]);
3651 	}
3652 
3653 	if (tb[RTA_DST]) {
3654 		if (nla_len(tb[RTA_DST]) < sizeof(struct in6_addr))
3655 			goto errout;
3656 
3657 		fl6.daddr = *(struct in6_addr *)nla_data(tb[RTA_DST]);
3658 	}
3659 
3660 	if (tb[RTA_IIF])
3661 		iif = nla_get_u32(tb[RTA_IIF]);
3662 
3663 	if (tb[RTA_OIF])
3664 		oif = nla_get_u32(tb[RTA_OIF]);
3665 
3666 	if (tb[RTA_MARK])
3667 		fl6.flowi6_mark = nla_get_u32(tb[RTA_MARK]);
3668 
3669 	if (tb[RTA_UID])
3670 		fl6.flowi6_uid = make_kuid(current_user_ns(),
3671 					   nla_get_u32(tb[RTA_UID]));
3672 	else
3673 		fl6.flowi6_uid = iif ? INVALID_UID : current_uid();
3674 
3675 	if (iif) {
3676 		struct net_device *dev;
3677 		int flags = 0;
3678 
3679 		rcu_read_lock();
3680 
3681 		dev = dev_get_by_index_rcu(net, iif);
3682 		if (!dev) {
3683 			rcu_read_unlock();
3684 			err = -ENODEV;
3685 			goto errout;
3686 		}
3687 
3688 		fl6.flowi6_iif = iif;
3689 
3690 		if (!ipv6_addr_any(&fl6.saddr))
3691 			flags |= RT6_LOOKUP_F_HAS_SADDR;
3692 
3693 		if (!fibmatch)
3694 			dst = ip6_route_input_lookup(net, dev, &fl6, flags);
3695 		else
3696 			dst = ip6_route_lookup(net, &fl6, 0);
3697 
3698 		rcu_read_unlock();
3699 	} else {
3700 		fl6.flowi6_oif = oif;
3701 
3702 		if (!fibmatch)
3703 			dst = ip6_route_output(net, NULL, &fl6);
3704 		else
3705 			dst = ip6_route_lookup(net, &fl6, 0);
3706 	}
3707 
3708 
3709 	rt = container_of(dst, struct rt6_info, dst);
3710 	if (rt->dst.error) {
3711 		err = rt->dst.error;
3712 		ip6_rt_put(rt);
3713 		goto errout;
3714 	}
3715 
3716 	if (rt == net->ipv6.ip6_null_entry) {
3717 		err = rt->dst.error;
3718 		ip6_rt_put(rt);
3719 		goto errout;
3720 	}
3721 
3722 	skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
3723 	if (!skb) {
3724 		ip6_rt_put(rt);
3725 		err = -ENOBUFS;
3726 		goto errout;
3727 	}
3728 
3729 	skb_dst_set(skb, &rt->dst);
3730 	if (fibmatch)
3731 		err = rt6_fill_node(net, skb, rt, NULL, NULL, iif,
3732 				    RTM_NEWROUTE, NETLINK_CB(in_skb).portid,
3733 				    nlh->nlmsg_seq, 0);
3734 	else
3735 		err = rt6_fill_node(net, skb, rt, &fl6.daddr, &fl6.saddr, iif,
3736 				    RTM_NEWROUTE, NETLINK_CB(in_skb).portid,
3737 				    nlh->nlmsg_seq, 0);
3738 	if (err < 0) {
3739 		kfree_skb(skb);
3740 		goto errout;
3741 	}
3742 
3743 	err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).portid);
3744 errout:
3745 	return err;
3746 }
3747 
3748 void inet6_rt_notify(int event, struct rt6_info *rt, struct nl_info *info,
3749 		     unsigned int nlm_flags)
3750 {
3751 	struct sk_buff *skb;
3752 	struct net *net = info->nl_net;
3753 	u32 seq;
3754 	int err;
3755 
3756 	err = -ENOBUFS;
3757 	seq = info->nlh ? info->nlh->nlmsg_seq : 0;
3758 
3759 	skb = nlmsg_new(rt6_nlmsg_size(rt), gfp_any());
3760 	if (!skb)
3761 		goto errout;
3762 
3763 	err = rt6_fill_node(net, skb, rt, NULL, NULL, 0,
3764 				event, info->portid, seq, nlm_flags);
3765 	if (err < 0) {
3766 		/* -EMSGSIZE implies BUG in rt6_nlmsg_size() */
3767 		WARN_ON(err == -EMSGSIZE);
3768 		kfree_skb(skb);
3769 		goto errout;
3770 	}
3771 	rtnl_notify(skb, net, info->portid, RTNLGRP_IPV6_ROUTE,
3772 		    info->nlh, gfp_any());
3773 	return;
3774 errout:
3775 	if (err < 0)
3776 		rtnl_set_sk_err(net, RTNLGRP_IPV6_ROUTE, err);
3777 }
3778 
3779 static int ip6_route_dev_notify(struct notifier_block *this,
3780 				unsigned long event, void *ptr)
3781 {
3782 	struct net_device *dev = netdev_notifier_info_to_dev(ptr);
3783 	struct net *net = dev_net(dev);
3784 
3785 	if (!(dev->flags & IFF_LOOPBACK))
3786 		return NOTIFY_OK;
3787 
3788 	if (event == NETDEV_REGISTER) {
3789 		net->ipv6.ip6_null_entry->dst.dev = dev;
3790 		net->ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(dev);
3791 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
3792 		net->ipv6.ip6_prohibit_entry->dst.dev = dev;
3793 		net->ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(dev);
3794 		net->ipv6.ip6_blk_hole_entry->dst.dev = dev;
3795 		net->ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(dev);
3796 #endif
3797 	 } else if (event == NETDEV_UNREGISTER &&
3798 		    dev->reg_state != NETREG_UNREGISTERED) {
3799 		/* NETDEV_UNREGISTER could be fired for multiple times by
3800 		 * netdev_wait_allrefs(). Make sure we only call this once.
3801 		 */
3802 		in6_dev_put_clear(&net->ipv6.ip6_null_entry->rt6i_idev);
3803 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
3804 		in6_dev_put_clear(&net->ipv6.ip6_prohibit_entry->rt6i_idev);
3805 		in6_dev_put_clear(&net->ipv6.ip6_blk_hole_entry->rt6i_idev);
3806 #endif
3807 	}
3808 
3809 	return NOTIFY_OK;
3810 }
3811 
3812 /*
3813  *	/proc
3814  */
3815 
3816 #ifdef CONFIG_PROC_FS
3817 
3818 static const struct file_operations ipv6_route_proc_fops = {
3819 	.owner		= THIS_MODULE,
3820 	.open		= ipv6_route_open,
3821 	.read		= seq_read,
3822 	.llseek		= seq_lseek,
3823 	.release	= seq_release_net,
3824 };
3825 
3826 static int rt6_stats_seq_show(struct seq_file *seq, void *v)
3827 {
3828 	struct net *net = (struct net *)seq->private;
3829 	seq_printf(seq, "%04x %04x %04x %04x %04x %04x %04x\n",
3830 		   net->ipv6.rt6_stats->fib_nodes,
3831 		   net->ipv6.rt6_stats->fib_route_nodes,
3832 		   net->ipv6.rt6_stats->fib_rt_alloc,
3833 		   net->ipv6.rt6_stats->fib_rt_entries,
3834 		   net->ipv6.rt6_stats->fib_rt_cache,
3835 		   dst_entries_get_slow(&net->ipv6.ip6_dst_ops),
3836 		   net->ipv6.rt6_stats->fib_discarded_routes);
3837 
3838 	return 0;
3839 }
3840 
3841 static int rt6_stats_seq_open(struct inode *inode, struct file *file)
3842 {
3843 	return single_open_net(inode, file, rt6_stats_seq_show);
3844 }
3845 
3846 static const struct file_operations rt6_stats_seq_fops = {
3847 	.owner	 = THIS_MODULE,
3848 	.open	 = rt6_stats_seq_open,
3849 	.read	 = seq_read,
3850 	.llseek	 = seq_lseek,
3851 	.release = single_release_net,
3852 };
3853 #endif	/* CONFIG_PROC_FS */
3854 
3855 #ifdef CONFIG_SYSCTL
3856 
3857 static
3858 int ipv6_sysctl_rtcache_flush(struct ctl_table *ctl, int write,
3859 			      void __user *buffer, size_t *lenp, loff_t *ppos)
3860 {
3861 	struct net *net;
3862 	int delay;
3863 	if (!write)
3864 		return -EINVAL;
3865 
3866 	net = (struct net *)ctl->extra1;
3867 	delay = net->ipv6.sysctl.flush_delay;
3868 	proc_dointvec(ctl, write, buffer, lenp, ppos);
3869 	fib6_run_gc(delay <= 0 ? 0 : (unsigned long)delay, net, delay > 0);
3870 	return 0;
3871 }
3872 
3873 struct ctl_table ipv6_route_table_template[] = {
3874 	{
3875 		.procname	=	"flush",
3876 		.data		=	&init_net.ipv6.sysctl.flush_delay,
3877 		.maxlen		=	sizeof(int),
3878 		.mode		=	0200,
3879 		.proc_handler	=	ipv6_sysctl_rtcache_flush
3880 	},
3881 	{
3882 		.procname	=	"gc_thresh",
3883 		.data		=	&ip6_dst_ops_template.gc_thresh,
3884 		.maxlen		=	sizeof(int),
3885 		.mode		=	0644,
3886 		.proc_handler	=	proc_dointvec,
3887 	},
3888 	{
3889 		.procname	=	"max_size",
3890 		.data		=	&init_net.ipv6.sysctl.ip6_rt_max_size,
3891 		.maxlen		=	sizeof(int),
3892 		.mode		=	0644,
3893 		.proc_handler	=	proc_dointvec,
3894 	},
3895 	{
3896 		.procname	=	"gc_min_interval",
3897 		.data		=	&init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
3898 		.maxlen		=	sizeof(int),
3899 		.mode		=	0644,
3900 		.proc_handler	=	proc_dointvec_jiffies,
3901 	},
3902 	{
3903 		.procname	=	"gc_timeout",
3904 		.data		=	&init_net.ipv6.sysctl.ip6_rt_gc_timeout,
3905 		.maxlen		=	sizeof(int),
3906 		.mode		=	0644,
3907 		.proc_handler	=	proc_dointvec_jiffies,
3908 	},
3909 	{
3910 		.procname	=	"gc_interval",
3911 		.data		=	&init_net.ipv6.sysctl.ip6_rt_gc_interval,
3912 		.maxlen		=	sizeof(int),
3913 		.mode		=	0644,
3914 		.proc_handler	=	proc_dointvec_jiffies,
3915 	},
3916 	{
3917 		.procname	=	"gc_elasticity",
3918 		.data		=	&init_net.ipv6.sysctl.ip6_rt_gc_elasticity,
3919 		.maxlen		=	sizeof(int),
3920 		.mode		=	0644,
3921 		.proc_handler	=	proc_dointvec,
3922 	},
3923 	{
3924 		.procname	=	"mtu_expires",
3925 		.data		=	&init_net.ipv6.sysctl.ip6_rt_mtu_expires,
3926 		.maxlen		=	sizeof(int),
3927 		.mode		=	0644,
3928 		.proc_handler	=	proc_dointvec_jiffies,
3929 	},
3930 	{
3931 		.procname	=	"min_adv_mss",
3932 		.data		=	&init_net.ipv6.sysctl.ip6_rt_min_advmss,
3933 		.maxlen		=	sizeof(int),
3934 		.mode		=	0644,
3935 		.proc_handler	=	proc_dointvec,
3936 	},
3937 	{
3938 		.procname	=	"gc_min_interval_ms",
3939 		.data		=	&init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
3940 		.maxlen		=	sizeof(int),
3941 		.mode		=	0644,
3942 		.proc_handler	=	proc_dointvec_ms_jiffies,
3943 	},
3944 	{ }
3945 };
3946 
3947 struct ctl_table * __net_init ipv6_route_sysctl_init(struct net *net)
3948 {
3949 	struct ctl_table *table;
3950 
3951 	table = kmemdup(ipv6_route_table_template,
3952 			sizeof(ipv6_route_table_template),
3953 			GFP_KERNEL);
3954 
3955 	if (table) {
3956 		table[0].data = &net->ipv6.sysctl.flush_delay;
3957 		table[0].extra1 = net;
3958 		table[1].data = &net->ipv6.ip6_dst_ops.gc_thresh;
3959 		table[2].data = &net->ipv6.sysctl.ip6_rt_max_size;
3960 		table[3].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
3961 		table[4].data = &net->ipv6.sysctl.ip6_rt_gc_timeout;
3962 		table[5].data = &net->ipv6.sysctl.ip6_rt_gc_interval;
3963 		table[6].data = &net->ipv6.sysctl.ip6_rt_gc_elasticity;
3964 		table[7].data = &net->ipv6.sysctl.ip6_rt_mtu_expires;
3965 		table[8].data = &net->ipv6.sysctl.ip6_rt_min_advmss;
3966 		table[9].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
3967 
3968 		/* Don't export sysctls to unprivileged users */
3969 		if (net->user_ns != &init_user_ns)
3970 			table[0].procname = NULL;
3971 	}
3972 
3973 	return table;
3974 }
3975 #endif
3976 
3977 static int __net_init ip6_route_net_init(struct net *net)
3978 {
3979 	int ret = -ENOMEM;
3980 
3981 	memcpy(&net->ipv6.ip6_dst_ops, &ip6_dst_ops_template,
3982 	       sizeof(net->ipv6.ip6_dst_ops));
3983 
3984 	if (dst_entries_init(&net->ipv6.ip6_dst_ops) < 0)
3985 		goto out_ip6_dst_ops;
3986 
3987 	net->ipv6.ip6_null_entry = kmemdup(&ip6_null_entry_template,
3988 					   sizeof(*net->ipv6.ip6_null_entry),
3989 					   GFP_KERNEL);
3990 	if (!net->ipv6.ip6_null_entry)
3991 		goto out_ip6_dst_entries;
3992 	net->ipv6.ip6_null_entry->dst.path =
3993 		(struct dst_entry *)net->ipv6.ip6_null_entry;
3994 	net->ipv6.ip6_null_entry->dst.ops = &net->ipv6.ip6_dst_ops;
3995 	dst_init_metrics(&net->ipv6.ip6_null_entry->dst,
3996 			 ip6_template_metrics, true);
3997 
3998 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
3999 	net->ipv6.fib6_has_custom_rules = false;
4000 	net->ipv6.ip6_prohibit_entry = kmemdup(&ip6_prohibit_entry_template,
4001 					       sizeof(*net->ipv6.ip6_prohibit_entry),
4002 					       GFP_KERNEL);
4003 	if (!net->ipv6.ip6_prohibit_entry)
4004 		goto out_ip6_null_entry;
4005 	net->ipv6.ip6_prohibit_entry->dst.path =
4006 		(struct dst_entry *)net->ipv6.ip6_prohibit_entry;
4007 	net->ipv6.ip6_prohibit_entry->dst.ops = &net->ipv6.ip6_dst_ops;
4008 	dst_init_metrics(&net->ipv6.ip6_prohibit_entry->dst,
4009 			 ip6_template_metrics, true);
4010 
4011 	net->ipv6.ip6_blk_hole_entry = kmemdup(&ip6_blk_hole_entry_template,
4012 					       sizeof(*net->ipv6.ip6_blk_hole_entry),
4013 					       GFP_KERNEL);
4014 	if (!net->ipv6.ip6_blk_hole_entry)
4015 		goto out_ip6_prohibit_entry;
4016 	net->ipv6.ip6_blk_hole_entry->dst.path =
4017 		(struct dst_entry *)net->ipv6.ip6_blk_hole_entry;
4018 	net->ipv6.ip6_blk_hole_entry->dst.ops = &net->ipv6.ip6_dst_ops;
4019 	dst_init_metrics(&net->ipv6.ip6_blk_hole_entry->dst,
4020 			 ip6_template_metrics, true);
4021 #endif
4022 
4023 	net->ipv6.sysctl.flush_delay = 0;
4024 	net->ipv6.sysctl.ip6_rt_max_size = 4096;
4025 	net->ipv6.sysctl.ip6_rt_gc_min_interval = HZ / 2;
4026 	net->ipv6.sysctl.ip6_rt_gc_timeout = 60*HZ;
4027 	net->ipv6.sysctl.ip6_rt_gc_interval = 30*HZ;
4028 	net->ipv6.sysctl.ip6_rt_gc_elasticity = 9;
4029 	net->ipv6.sysctl.ip6_rt_mtu_expires = 10*60*HZ;
4030 	net->ipv6.sysctl.ip6_rt_min_advmss = IPV6_MIN_MTU - 20 - 40;
4031 
4032 	net->ipv6.ip6_rt_gc_expire = 30*HZ;
4033 
4034 	ret = 0;
4035 out:
4036 	return ret;
4037 
4038 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
4039 out_ip6_prohibit_entry:
4040 	kfree(net->ipv6.ip6_prohibit_entry);
4041 out_ip6_null_entry:
4042 	kfree(net->ipv6.ip6_null_entry);
4043 #endif
4044 out_ip6_dst_entries:
4045 	dst_entries_destroy(&net->ipv6.ip6_dst_ops);
4046 out_ip6_dst_ops:
4047 	goto out;
4048 }
4049 
4050 static void __net_exit ip6_route_net_exit(struct net *net)
4051 {
4052 	kfree(net->ipv6.ip6_null_entry);
4053 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
4054 	kfree(net->ipv6.ip6_prohibit_entry);
4055 	kfree(net->ipv6.ip6_blk_hole_entry);
4056 #endif
4057 	dst_entries_destroy(&net->ipv6.ip6_dst_ops);
4058 }
4059 
4060 static int __net_init ip6_route_net_init_late(struct net *net)
4061 {
4062 #ifdef CONFIG_PROC_FS
4063 	proc_create("ipv6_route", 0, net->proc_net, &ipv6_route_proc_fops);
4064 	proc_create("rt6_stats", S_IRUGO, net->proc_net, &rt6_stats_seq_fops);
4065 #endif
4066 	return 0;
4067 }
4068 
4069 static void __net_exit ip6_route_net_exit_late(struct net *net)
4070 {
4071 #ifdef CONFIG_PROC_FS
4072 	remove_proc_entry("ipv6_route", net->proc_net);
4073 	remove_proc_entry("rt6_stats", net->proc_net);
4074 #endif
4075 }
4076 
4077 static struct pernet_operations ip6_route_net_ops = {
4078 	.init = ip6_route_net_init,
4079 	.exit = ip6_route_net_exit,
4080 };
4081 
4082 static int __net_init ipv6_inetpeer_init(struct net *net)
4083 {
4084 	struct inet_peer_base *bp = kmalloc(sizeof(*bp), GFP_KERNEL);
4085 
4086 	if (!bp)
4087 		return -ENOMEM;
4088 	inet_peer_base_init(bp);
4089 	net->ipv6.peers = bp;
4090 	return 0;
4091 }
4092 
4093 static void __net_exit ipv6_inetpeer_exit(struct net *net)
4094 {
4095 	struct inet_peer_base *bp = net->ipv6.peers;
4096 
4097 	net->ipv6.peers = NULL;
4098 	inetpeer_invalidate_tree(bp);
4099 	kfree(bp);
4100 }
4101 
4102 static struct pernet_operations ipv6_inetpeer_ops = {
4103 	.init	=	ipv6_inetpeer_init,
4104 	.exit	=	ipv6_inetpeer_exit,
4105 };
4106 
4107 static struct pernet_operations ip6_route_net_late_ops = {
4108 	.init = ip6_route_net_init_late,
4109 	.exit = ip6_route_net_exit_late,
4110 };
4111 
4112 static struct notifier_block ip6_route_dev_notifier = {
4113 	.notifier_call = ip6_route_dev_notify,
4114 	.priority = ADDRCONF_NOTIFY_PRIORITY - 10,
4115 };
4116 
4117 void __init ip6_route_init_special_entries(void)
4118 {
4119 	/* Registering of the loopback is done before this portion of code,
4120 	 * the loopback reference in rt6_info will not be taken, do it
4121 	 * manually for init_net */
4122 	init_net.ipv6.ip6_null_entry->dst.dev = init_net.loopback_dev;
4123 	init_net.ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
4124   #ifdef CONFIG_IPV6_MULTIPLE_TABLES
4125 	init_net.ipv6.ip6_prohibit_entry->dst.dev = init_net.loopback_dev;
4126 	init_net.ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
4127 	init_net.ipv6.ip6_blk_hole_entry->dst.dev = init_net.loopback_dev;
4128 	init_net.ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
4129   #endif
4130 }
4131 
4132 int __init ip6_route_init(void)
4133 {
4134 	int ret;
4135 	int cpu;
4136 
4137 	ret = -ENOMEM;
4138 	ip6_dst_ops_template.kmem_cachep =
4139 		kmem_cache_create("ip6_dst_cache", sizeof(struct rt6_info), 0,
4140 				  SLAB_HWCACHE_ALIGN, NULL);
4141 	if (!ip6_dst_ops_template.kmem_cachep)
4142 		goto out;
4143 
4144 	ret = dst_entries_init(&ip6_dst_blackhole_ops);
4145 	if (ret)
4146 		goto out_kmem_cache;
4147 
4148 	ret = register_pernet_subsys(&ipv6_inetpeer_ops);
4149 	if (ret)
4150 		goto out_dst_entries;
4151 
4152 	ret = register_pernet_subsys(&ip6_route_net_ops);
4153 	if (ret)
4154 		goto out_register_inetpeer;
4155 
4156 	ip6_dst_blackhole_ops.kmem_cachep = ip6_dst_ops_template.kmem_cachep;
4157 
4158 	ret = fib6_init();
4159 	if (ret)
4160 		goto out_register_subsys;
4161 
4162 	ret = xfrm6_init();
4163 	if (ret)
4164 		goto out_fib6_init;
4165 
4166 	ret = fib6_rules_init();
4167 	if (ret)
4168 		goto xfrm6_init;
4169 
4170 	ret = register_pernet_subsys(&ip6_route_net_late_ops);
4171 	if (ret)
4172 		goto fib6_rules_init;
4173 
4174 	ret = -ENOBUFS;
4175 	if (__rtnl_register(PF_INET6, RTM_NEWROUTE, inet6_rtm_newroute, NULL, 0) ||
4176 	    __rtnl_register(PF_INET6, RTM_DELROUTE, inet6_rtm_delroute, NULL, 0) ||
4177 	    __rtnl_register(PF_INET6, RTM_GETROUTE, inet6_rtm_getroute, NULL,
4178 			    RTNL_FLAG_DOIT_UNLOCKED))
4179 		goto out_register_late_subsys;
4180 
4181 	ret = register_netdevice_notifier(&ip6_route_dev_notifier);
4182 	if (ret)
4183 		goto out_register_late_subsys;
4184 
4185 	for_each_possible_cpu(cpu) {
4186 		struct uncached_list *ul = per_cpu_ptr(&rt6_uncached_list, cpu);
4187 
4188 		INIT_LIST_HEAD(&ul->head);
4189 		spin_lock_init(&ul->lock);
4190 	}
4191 
4192 out:
4193 	return ret;
4194 
4195 out_register_late_subsys:
4196 	unregister_pernet_subsys(&ip6_route_net_late_ops);
4197 fib6_rules_init:
4198 	fib6_rules_cleanup();
4199 xfrm6_init:
4200 	xfrm6_fini();
4201 out_fib6_init:
4202 	fib6_gc_cleanup();
4203 out_register_subsys:
4204 	unregister_pernet_subsys(&ip6_route_net_ops);
4205 out_register_inetpeer:
4206 	unregister_pernet_subsys(&ipv6_inetpeer_ops);
4207 out_dst_entries:
4208 	dst_entries_destroy(&ip6_dst_blackhole_ops);
4209 out_kmem_cache:
4210 	kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);
4211 	goto out;
4212 }
4213 
4214 void ip6_route_cleanup(void)
4215 {
4216 	unregister_netdevice_notifier(&ip6_route_dev_notifier);
4217 	unregister_pernet_subsys(&ip6_route_net_late_ops);
4218 	fib6_rules_cleanup();
4219 	xfrm6_fini();
4220 	fib6_gc_cleanup();
4221 	unregister_pernet_subsys(&ipv6_inetpeer_ops);
4222 	unregister_pernet_subsys(&ip6_route_net_ops);
4223 	dst_entries_destroy(&ip6_dst_blackhole_ops);
4224 	kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);
4225 }
4226