xref: /linux/net/ipv6/route.c (revision 26b433d0da062d6e19d75350c0171d3cf8ff560d)
1 /*
2  *	Linux INET6 implementation
3  *	FIB front-end.
4  *
5  *	Authors:
6  *	Pedro Roque		<roque@di.fc.ul.pt>
7  *
8  *	This program is free software; you can redistribute it and/or
9  *      modify it under the terms of the GNU General Public License
10  *      as published by the Free Software Foundation; either version
11  *      2 of the License, or (at your option) any later version.
12  */
13 
14 /*	Changes:
15  *
16  *	YOSHIFUJI Hideaki @USAGI
17  *		reworked default router selection.
18  *		- respect outgoing interface
19  *		- select from (probably) reachable routers (i.e.
20  *		routers in REACHABLE, STALE, DELAY or PROBE states).
21  *		- always select the same router if it is (probably)
22  *		reachable.  otherwise, round-robin the list.
23  *	Ville Nuorvala
24  *		Fixed routing subtrees.
25  */
26 
27 #define pr_fmt(fmt) "IPv6: " fmt
28 
29 #include <linux/capability.h>
30 #include <linux/errno.h>
31 #include <linux/export.h>
32 #include <linux/types.h>
33 #include <linux/times.h>
34 #include <linux/socket.h>
35 #include <linux/sockios.h>
36 #include <linux/net.h>
37 #include <linux/route.h>
38 #include <linux/netdevice.h>
39 #include <linux/in6.h>
40 #include <linux/mroute6.h>
41 #include <linux/init.h>
42 #include <linux/if_arp.h>
43 #include <linux/proc_fs.h>
44 #include <linux/seq_file.h>
45 #include <linux/nsproxy.h>
46 #include <linux/slab.h>
47 #include <net/net_namespace.h>
48 #include <net/snmp.h>
49 #include <net/ipv6.h>
50 #include <net/ip6_fib.h>
51 #include <net/ip6_route.h>
52 #include <net/ndisc.h>
53 #include <net/addrconf.h>
54 #include <net/tcp.h>
55 #include <linux/rtnetlink.h>
56 #include <net/dst.h>
57 #include <net/dst_metadata.h>
58 #include <net/xfrm.h>
59 #include <net/netevent.h>
60 #include <net/netlink.h>
61 #include <net/nexthop.h>
62 #include <net/lwtunnel.h>
63 #include <net/ip_tunnels.h>
64 #include <net/l3mdev.h>
65 #include <trace/events/fib6.h>
66 
67 #include <linux/uaccess.h>
68 
69 #ifdef CONFIG_SYSCTL
70 #include <linux/sysctl.h>
71 #endif
72 
73 enum rt6_nud_state {
74 	RT6_NUD_FAIL_HARD = -3,
75 	RT6_NUD_FAIL_PROBE = -2,
76 	RT6_NUD_FAIL_DO_RR = -1,
77 	RT6_NUD_SUCCEED = 1
78 };
79 
80 static void ip6_rt_copy_init(struct rt6_info *rt, struct rt6_info *ort);
81 static struct dst_entry	*ip6_dst_check(struct dst_entry *dst, u32 cookie);
82 static unsigned int	 ip6_default_advmss(const struct dst_entry *dst);
83 static unsigned int	 ip6_mtu(const struct dst_entry *dst);
84 static struct dst_entry *ip6_negative_advice(struct dst_entry *);
85 static void		ip6_dst_destroy(struct dst_entry *);
86 static void		ip6_dst_ifdown(struct dst_entry *,
87 				       struct net_device *dev, int how);
88 static int		 ip6_dst_gc(struct dst_ops *ops);
89 
90 static int		ip6_pkt_discard(struct sk_buff *skb);
91 static int		ip6_pkt_discard_out(struct net *net, struct sock *sk, struct sk_buff *skb);
92 static int		ip6_pkt_prohibit(struct sk_buff *skb);
93 static int		ip6_pkt_prohibit_out(struct net *net, struct sock *sk, struct sk_buff *skb);
94 static void		ip6_link_failure(struct sk_buff *skb);
95 static void		ip6_rt_update_pmtu(struct dst_entry *dst, struct sock *sk,
96 					   struct sk_buff *skb, u32 mtu);
97 static void		rt6_do_redirect(struct dst_entry *dst, struct sock *sk,
98 					struct sk_buff *skb);
99 static void		rt6_dst_from_metrics_check(struct rt6_info *rt);
100 static int rt6_score_route(struct rt6_info *rt, int oif, int strict);
101 static size_t rt6_nlmsg_size(struct rt6_info *rt);
102 static int rt6_fill_node(struct net *net,
103 			 struct sk_buff *skb, struct rt6_info *rt,
104 			 struct in6_addr *dst, struct in6_addr *src,
105 			 int iif, int type, u32 portid, u32 seq,
106 			 unsigned int flags);
107 
108 #ifdef CONFIG_IPV6_ROUTE_INFO
109 static struct rt6_info *rt6_add_route_info(struct net *net,
110 					   const struct in6_addr *prefix, int prefixlen,
111 					   const struct in6_addr *gwaddr,
112 					   struct net_device *dev,
113 					   unsigned int pref);
114 static struct rt6_info *rt6_get_route_info(struct net *net,
115 					   const struct in6_addr *prefix, int prefixlen,
116 					   const struct in6_addr *gwaddr,
117 					   struct net_device *dev);
118 #endif
119 
120 struct uncached_list {
121 	spinlock_t		lock;
122 	struct list_head	head;
123 };
124 
125 static DEFINE_PER_CPU_ALIGNED(struct uncached_list, rt6_uncached_list);
126 
127 static void rt6_uncached_list_add(struct rt6_info *rt)
128 {
129 	struct uncached_list *ul = raw_cpu_ptr(&rt6_uncached_list);
130 
131 	rt->rt6i_uncached_list = ul;
132 
133 	spin_lock_bh(&ul->lock);
134 	list_add_tail(&rt->rt6i_uncached, &ul->head);
135 	spin_unlock_bh(&ul->lock);
136 }
137 
138 static void rt6_uncached_list_del(struct rt6_info *rt)
139 {
140 	if (!list_empty(&rt->rt6i_uncached)) {
141 		struct uncached_list *ul = rt->rt6i_uncached_list;
142 
143 		spin_lock_bh(&ul->lock);
144 		list_del(&rt->rt6i_uncached);
145 		spin_unlock_bh(&ul->lock);
146 	}
147 }
148 
149 static void rt6_uncached_list_flush_dev(struct net *net, struct net_device *dev)
150 {
151 	struct net_device *loopback_dev = net->loopback_dev;
152 	int cpu;
153 
154 	if (dev == loopback_dev)
155 		return;
156 
157 	for_each_possible_cpu(cpu) {
158 		struct uncached_list *ul = per_cpu_ptr(&rt6_uncached_list, cpu);
159 		struct rt6_info *rt;
160 
161 		spin_lock_bh(&ul->lock);
162 		list_for_each_entry(rt, &ul->head, rt6i_uncached) {
163 			struct inet6_dev *rt_idev = rt->rt6i_idev;
164 			struct net_device *rt_dev = rt->dst.dev;
165 
166 			if (rt_idev->dev == dev) {
167 				rt->rt6i_idev = in6_dev_get(loopback_dev);
168 				in6_dev_put(rt_idev);
169 			}
170 
171 			if (rt_dev == dev) {
172 				rt->dst.dev = loopback_dev;
173 				dev_hold(rt->dst.dev);
174 				dev_put(rt_dev);
175 			}
176 		}
177 		spin_unlock_bh(&ul->lock);
178 	}
179 }
180 
181 static u32 *rt6_pcpu_cow_metrics(struct rt6_info *rt)
182 {
183 	return dst_metrics_write_ptr(rt->dst.from);
184 }
185 
186 static u32 *ipv6_cow_metrics(struct dst_entry *dst, unsigned long old)
187 {
188 	struct rt6_info *rt = (struct rt6_info *)dst;
189 
190 	if (rt->rt6i_flags & RTF_PCPU)
191 		return rt6_pcpu_cow_metrics(rt);
192 	else if (rt->rt6i_flags & RTF_CACHE)
193 		return NULL;
194 	else
195 		return dst_cow_metrics_generic(dst, old);
196 }
197 
198 static inline const void *choose_neigh_daddr(struct rt6_info *rt,
199 					     struct sk_buff *skb,
200 					     const void *daddr)
201 {
202 	struct in6_addr *p = &rt->rt6i_gateway;
203 
204 	if (!ipv6_addr_any(p))
205 		return (const void *) p;
206 	else if (skb)
207 		return &ipv6_hdr(skb)->daddr;
208 	return daddr;
209 }
210 
211 static struct neighbour *ip6_neigh_lookup(const struct dst_entry *dst,
212 					  struct sk_buff *skb,
213 					  const void *daddr)
214 {
215 	struct rt6_info *rt = (struct rt6_info *) dst;
216 	struct neighbour *n;
217 
218 	daddr = choose_neigh_daddr(rt, skb, daddr);
219 	n = __ipv6_neigh_lookup(dst->dev, daddr);
220 	if (n)
221 		return n;
222 	return neigh_create(&nd_tbl, daddr, dst->dev);
223 }
224 
225 static void ip6_confirm_neigh(const struct dst_entry *dst, const void *daddr)
226 {
227 	struct net_device *dev = dst->dev;
228 	struct rt6_info *rt = (struct rt6_info *)dst;
229 
230 	daddr = choose_neigh_daddr(rt, NULL, daddr);
231 	if (!daddr)
232 		return;
233 	if (dev->flags & (IFF_NOARP | IFF_LOOPBACK))
234 		return;
235 	if (ipv6_addr_is_multicast((const struct in6_addr *)daddr))
236 		return;
237 	__ipv6_confirm_neigh(dev, daddr);
238 }
239 
240 static struct dst_ops ip6_dst_ops_template = {
241 	.family			=	AF_INET6,
242 	.gc			=	ip6_dst_gc,
243 	.gc_thresh		=	1024,
244 	.check			=	ip6_dst_check,
245 	.default_advmss		=	ip6_default_advmss,
246 	.mtu			=	ip6_mtu,
247 	.cow_metrics		=	ipv6_cow_metrics,
248 	.destroy		=	ip6_dst_destroy,
249 	.ifdown			=	ip6_dst_ifdown,
250 	.negative_advice	=	ip6_negative_advice,
251 	.link_failure		=	ip6_link_failure,
252 	.update_pmtu		=	ip6_rt_update_pmtu,
253 	.redirect		=	rt6_do_redirect,
254 	.local_out		=	__ip6_local_out,
255 	.neigh_lookup		=	ip6_neigh_lookup,
256 	.confirm_neigh		=	ip6_confirm_neigh,
257 };
258 
259 static unsigned int ip6_blackhole_mtu(const struct dst_entry *dst)
260 {
261 	unsigned int mtu = dst_metric_raw(dst, RTAX_MTU);
262 
263 	return mtu ? : dst->dev->mtu;
264 }
265 
266 static void ip6_rt_blackhole_update_pmtu(struct dst_entry *dst, struct sock *sk,
267 					 struct sk_buff *skb, u32 mtu)
268 {
269 }
270 
271 static void ip6_rt_blackhole_redirect(struct dst_entry *dst, struct sock *sk,
272 				      struct sk_buff *skb)
273 {
274 }
275 
276 static struct dst_ops ip6_dst_blackhole_ops = {
277 	.family			=	AF_INET6,
278 	.destroy		=	ip6_dst_destroy,
279 	.check			=	ip6_dst_check,
280 	.mtu			=	ip6_blackhole_mtu,
281 	.default_advmss		=	ip6_default_advmss,
282 	.update_pmtu		=	ip6_rt_blackhole_update_pmtu,
283 	.redirect		=	ip6_rt_blackhole_redirect,
284 	.cow_metrics		=	dst_cow_metrics_generic,
285 	.neigh_lookup		=	ip6_neigh_lookup,
286 };
287 
288 static const u32 ip6_template_metrics[RTAX_MAX] = {
289 	[RTAX_HOPLIMIT - 1] = 0,
290 };
291 
292 static const struct rt6_info ip6_null_entry_template = {
293 	.dst = {
294 		.__refcnt	= ATOMIC_INIT(1),
295 		.__use		= 1,
296 		.obsolete	= DST_OBSOLETE_FORCE_CHK,
297 		.error		= -ENETUNREACH,
298 		.input		= ip6_pkt_discard,
299 		.output		= ip6_pkt_discard_out,
300 	},
301 	.rt6i_flags	= (RTF_REJECT | RTF_NONEXTHOP),
302 	.rt6i_protocol  = RTPROT_KERNEL,
303 	.rt6i_metric	= ~(u32) 0,
304 	.rt6i_ref	= ATOMIC_INIT(1),
305 };
306 
307 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
308 
309 static const struct rt6_info ip6_prohibit_entry_template = {
310 	.dst = {
311 		.__refcnt	= ATOMIC_INIT(1),
312 		.__use		= 1,
313 		.obsolete	= DST_OBSOLETE_FORCE_CHK,
314 		.error		= -EACCES,
315 		.input		= ip6_pkt_prohibit,
316 		.output		= ip6_pkt_prohibit_out,
317 	},
318 	.rt6i_flags	= (RTF_REJECT | RTF_NONEXTHOP),
319 	.rt6i_protocol  = RTPROT_KERNEL,
320 	.rt6i_metric	= ~(u32) 0,
321 	.rt6i_ref	= ATOMIC_INIT(1),
322 };
323 
324 static const struct rt6_info ip6_blk_hole_entry_template = {
325 	.dst = {
326 		.__refcnt	= ATOMIC_INIT(1),
327 		.__use		= 1,
328 		.obsolete	= DST_OBSOLETE_FORCE_CHK,
329 		.error		= -EINVAL,
330 		.input		= dst_discard,
331 		.output		= dst_discard_out,
332 	},
333 	.rt6i_flags	= (RTF_REJECT | RTF_NONEXTHOP),
334 	.rt6i_protocol  = RTPROT_KERNEL,
335 	.rt6i_metric	= ~(u32) 0,
336 	.rt6i_ref	= ATOMIC_INIT(1),
337 };
338 
339 #endif
340 
341 static void rt6_info_init(struct rt6_info *rt)
342 {
343 	struct dst_entry *dst = &rt->dst;
344 
345 	memset(dst + 1, 0, sizeof(*rt) - sizeof(*dst));
346 	INIT_LIST_HEAD(&rt->rt6i_siblings);
347 	INIT_LIST_HEAD(&rt->rt6i_uncached);
348 }
349 
350 /* allocate dst with ip6_dst_ops */
351 static struct rt6_info *__ip6_dst_alloc(struct net *net,
352 					struct net_device *dev,
353 					int flags)
354 {
355 	struct rt6_info *rt = dst_alloc(&net->ipv6.ip6_dst_ops, dev,
356 					1, DST_OBSOLETE_FORCE_CHK, flags);
357 
358 	if (rt)
359 		rt6_info_init(rt);
360 
361 	return rt;
362 }
363 
364 struct rt6_info *ip6_dst_alloc(struct net *net,
365 			       struct net_device *dev,
366 			       int flags)
367 {
368 	struct rt6_info *rt = __ip6_dst_alloc(net, dev, flags);
369 
370 	if (rt) {
371 		rt->rt6i_pcpu = alloc_percpu_gfp(struct rt6_info *, GFP_ATOMIC);
372 		if (rt->rt6i_pcpu) {
373 			int cpu;
374 
375 			for_each_possible_cpu(cpu) {
376 				struct rt6_info **p;
377 
378 				p = per_cpu_ptr(rt->rt6i_pcpu, cpu);
379 				/* no one shares rt */
380 				*p =  NULL;
381 			}
382 		} else {
383 			dst_release_immediate(&rt->dst);
384 			return NULL;
385 		}
386 	}
387 
388 	return rt;
389 }
390 EXPORT_SYMBOL(ip6_dst_alloc);
391 
392 static void ip6_dst_destroy(struct dst_entry *dst)
393 {
394 	struct rt6_info *rt = (struct rt6_info *)dst;
395 	struct dst_entry *from = dst->from;
396 	struct inet6_dev *idev;
397 
398 	dst_destroy_metrics_generic(dst);
399 	free_percpu(rt->rt6i_pcpu);
400 	rt6_uncached_list_del(rt);
401 
402 	idev = rt->rt6i_idev;
403 	if (idev) {
404 		rt->rt6i_idev = NULL;
405 		in6_dev_put(idev);
406 	}
407 
408 	dst->from = NULL;
409 	dst_release(from);
410 }
411 
412 static void ip6_dst_ifdown(struct dst_entry *dst, struct net_device *dev,
413 			   int how)
414 {
415 	struct rt6_info *rt = (struct rt6_info *)dst;
416 	struct inet6_dev *idev = rt->rt6i_idev;
417 	struct net_device *loopback_dev =
418 		dev_net(dev)->loopback_dev;
419 
420 	if (idev && idev->dev != loopback_dev) {
421 		struct inet6_dev *loopback_idev = in6_dev_get(loopback_dev);
422 		if (loopback_idev) {
423 			rt->rt6i_idev = loopback_idev;
424 			in6_dev_put(idev);
425 		}
426 	}
427 }
428 
429 static bool __rt6_check_expired(const struct rt6_info *rt)
430 {
431 	if (rt->rt6i_flags & RTF_EXPIRES)
432 		return time_after(jiffies, rt->dst.expires);
433 	else
434 		return false;
435 }
436 
437 static bool rt6_check_expired(const struct rt6_info *rt)
438 {
439 	if (rt->rt6i_flags & RTF_EXPIRES) {
440 		if (time_after(jiffies, rt->dst.expires))
441 			return true;
442 	} else if (rt->dst.from) {
443 		return rt->dst.obsolete != DST_OBSOLETE_FORCE_CHK ||
444 		       rt6_check_expired((struct rt6_info *)rt->dst.from);
445 	}
446 	return false;
447 }
448 
449 /* Multipath route selection:
450  *   Hash based function using packet header and flowlabel.
451  * Adapted from fib_info_hashfn()
452  */
453 static int rt6_info_hash_nhsfn(unsigned int candidate_count,
454 			       const struct flowi6 *fl6)
455 {
456 	return get_hash_from_flowi6(fl6) % candidate_count;
457 }
458 
459 static struct rt6_info *rt6_multipath_select(struct rt6_info *match,
460 					     struct flowi6 *fl6, int oif,
461 					     int strict)
462 {
463 	struct rt6_info *sibling, *next_sibling;
464 	int route_choosen;
465 
466 	route_choosen = rt6_info_hash_nhsfn(match->rt6i_nsiblings + 1, fl6);
467 	/* Don't change the route, if route_choosen == 0
468 	 * (siblings does not include ourself)
469 	 */
470 	if (route_choosen)
471 		list_for_each_entry_safe(sibling, next_sibling,
472 				&match->rt6i_siblings, rt6i_siblings) {
473 			route_choosen--;
474 			if (route_choosen == 0) {
475 				if (rt6_score_route(sibling, oif, strict) < 0)
476 					break;
477 				match = sibling;
478 				break;
479 			}
480 		}
481 	return match;
482 }
483 
484 /*
485  *	Route lookup. Any table->tb6_lock is implied.
486  */
487 
488 static inline struct rt6_info *rt6_device_match(struct net *net,
489 						    struct rt6_info *rt,
490 						    const struct in6_addr *saddr,
491 						    int oif,
492 						    int flags)
493 {
494 	struct rt6_info *local = NULL;
495 	struct rt6_info *sprt;
496 
497 	if (!oif && ipv6_addr_any(saddr))
498 		goto out;
499 
500 	for (sprt = rt; sprt; sprt = sprt->dst.rt6_next) {
501 		struct net_device *dev = sprt->dst.dev;
502 
503 		if (oif) {
504 			if (dev->ifindex == oif)
505 				return sprt;
506 			if (dev->flags & IFF_LOOPBACK) {
507 				if (!sprt->rt6i_idev ||
508 				    sprt->rt6i_idev->dev->ifindex != oif) {
509 					if (flags & RT6_LOOKUP_F_IFACE)
510 						continue;
511 					if (local &&
512 					    local->rt6i_idev->dev->ifindex == oif)
513 						continue;
514 				}
515 				local = sprt;
516 			}
517 		} else {
518 			if (ipv6_chk_addr(net, saddr, dev,
519 					  flags & RT6_LOOKUP_F_IFACE))
520 				return sprt;
521 		}
522 	}
523 
524 	if (oif) {
525 		if (local)
526 			return local;
527 
528 		if (flags & RT6_LOOKUP_F_IFACE)
529 			return net->ipv6.ip6_null_entry;
530 	}
531 out:
532 	return rt;
533 }
534 
535 #ifdef CONFIG_IPV6_ROUTER_PREF
536 struct __rt6_probe_work {
537 	struct work_struct work;
538 	struct in6_addr target;
539 	struct net_device *dev;
540 };
541 
542 static void rt6_probe_deferred(struct work_struct *w)
543 {
544 	struct in6_addr mcaddr;
545 	struct __rt6_probe_work *work =
546 		container_of(w, struct __rt6_probe_work, work);
547 
548 	addrconf_addr_solict_mult(&work->target, &mcaddr);
549 	ndisc_send_ns(work->dev, &work->target, &mcaddr, NULL, 0);
550 	dev_put(work->dev);
551 	kfree(work);
552 }
553 
554 static void rt6_probe(struct rt6_info *rt)
555 {
556 	struct __rt6_probe_work *work;
557 	struct neighbour *neigh;
558 	/*
559 	 * Okay, this does not seem to be appropriate
560 	 * for now, however, we need to check if it
561 	 * is really so; aka Router Reachability Probing.
562 	 *
563 	 * Router Reachability Probe MUST be rate-limited
564 	 * to no more than one per minute.
565 	 */
566 	if (!rt || !(rt->rt6i_flags & RTF_GATEWAY))
567 		return;
568 	rcu_read_lock_bh();
569 	neigh = __ipv6_neigh_lookup_noref(rt->dst.dev, &rt->rt6i_gateway);
570 	if (neigh) {
571 		if (neigh->nud_state & NUD_VALID)
572 			goto out;
573 
574 		work = NULL;
575 		write_lock(&neigh->lock);
576 		if (!(neigh->nud_state & NUD_VALID) &&
577 		    time_after(jiffies,
578 			       neigh->updated +
579 			       rt->rt6i_idev->cnf.rtr_probe_interval)) {
580 			work = kmalloc(sizeof(*work), GFP_ATOMIC);
581 			if (work)
582 				__neigh_set_probe_once(neigh);
583 		}
584 		write_unlock(&neigh->lock);
585 	} else {
586 		work = kmalloc(sizeof(*work), GFP_ATOMIC);
587 	}
588 
589 	if (work) {
590 		INIT_WORK(&work->work, rt6_probe_deferred);
591 		work->target = rt->rt6i_gateway;
592 		dev_hold(rt->dst.dev);
593 		work->dev = rt->dst.dev;
594 		schedule_work(&work->work);
595 	}
596 
597 out:
598 	rcu_read_unlock_bh();
599 }
600 #else
601 static inline void rt6_probe(struct rt6_info *rt)
602 {
603 }
604 #endif
605 
606 /*
607  * Default Router Selection (RFC 2461 6.3.6)
608  */
609 static inline int rt6_check_dev(struct rt6_info *rt, int oif)
610 {
611 	struct net_device *dev = rt->dst.dev;
612 	if (!oif || dev->ifindex == oif)
613 		return 2;
614 	if ((dev->flags & IFF_LOOPBACK) &&
615 	    rt->rt6i_idev && rt->rt6i_idev->dev->ifindex == oif)
616 		return 1;
617 	return 0;
618 }
619 
620 static inline enum rt6_nud_state rt6_check_neigh(struct rt6_info *rt)
621 {
622 	struct neighbour *neigh;
623 	enum rt6_nud_state ret = RT6_NUD_FAIL_HARD;
624 
625 	if (rt->rt6i_flags & RTF_NONEXTHOP ||
626 	    !(rt->rt6i_flags & RTF_GATEWAY))
627 		return RT6_NUD_SUCCEED;
628 
629 	rcu_read_lock_bh();
630 	neigh = __ipv6_neigh_lookup_noref(rt->dst.dev, &rt->rt6i_gateway);
631 	if (neigh) {
632 		read_lock(&neigh->lock);
633 		if (neigh->nud_state & NUD_VALID)
634 			ret = RT6_NUD_SUCCEED;
635 #ifdef CONFIG_IPV6_ROUTER_PREF
636 		else if (!(neigh->nud_state & NUD_FAILED))
637 			ret = RT6_NUD_SUCCEED;
638 		else
639 			ret = RT6_NUD_FAIL_PROBE;
640 #endif
641 		read_unlock(&neigh->lock);
642 	} else {
643 		ret = IS_ENABLED(CONFIG_IPV6_ROUTER_PREF) ?
644 		      RT6_NUD_SUCCEED : RT6_NUD_FAIL_DO_RR;
645 	}
646 	rcu_read_unlock_bh();
647 
648 	return ret;
649 }
650 
651 static int rt6_score_route(struct rt6_info *rt, int oif,
652 			   int strict)
653 {
654 	int m;
655 
656 	m = rt6_check_dev(rt, oif);
657 	if (!m && (strict & RT6_LOOKUP_F_IFACE))
658 		return RT6_NUD_FAIL_HARD;
659 #ifdef CONFIG_IPV6_ROUTER_PREF
660 	m |= IPV6_DECODE_PREF(IPV6_EXTRACT_PREF(rt->rt6i_flags)) << 2;
661 #endif
662 	if (strict & RT6_LOOKUP_F_REACHABLE) {
663 		int n = rt6_check_neigh(rt);
664 		if (n < 0)
665 			return n;
666 	}
667 	return m;
668 }
669 
670 static struct rt6_info *find_match(struct rt6_info *rt, int oif, int strict,
671 				   int *mpri, struct rt6_info *match,
672 				   bool *do_rr)
673 {
674 	int m;
675 	bool match_do_rr = false;
676 	struct inet6_dev *idev = rt->rt6i_idev;
677 	struct net_device *dev = rt->dst.dev;
678 
679 	if (dev && !netif_carrier_ok(dev) &&
680 	    idev->cnf.ignore_routes_with_linkdown &&
681 	    !(strict & RT6_LOOKUP_F_IGNORE_LINKSTATE))
682 		goto out;
683 
684 	if (rt6_check_expired(rt))
685 		goto out;
686 
687 	m = rt6_score_route(rt, oif, strict);
688 	if (m == RT6_NUD_FAIL_DO_RR) {
689 		match_do_rr = true;
690 		m = 0; /* lowest valid score */
691 	} else if (m == RT6_NUD_FAIL_HARD) {
692 		goto out;
693 	}
694 
695 	if (strict & RT6_LOOKUP_F_REACHABLE)
696 		rt6_probe(rt);
697 
698 	/* note that m can be RT6_NUD_FAIL_PROBE at this point */
699 	if (m > *mpri) {
700 		*do_rr = match_do_rr;
701 		*mpri = m;
702 		match = rt;
703 	}
704 out:
705 	return match;
706 }
707 
708 static struct rt6_info *find_rr_leaf(struct fib6_node *fn,
709 				     struct rt6_info *rr_head,
710 				     u32 metric, int oif, int strict,
711 				     bool *do_rr)
712 {
713 	struct rt6_info *rt, *match, *cont;
714 	int mpri = -1;
715 
716 	match = NULL;
717 	cont = NULL;
718 	for (rt = rr_head; rt; rt = rt->dst.rt6_next) {
719 		if (rt->rt6i_metric != metric) {
720 			cont = rt;
721 			break;
722 		}
723 
724 		match = find_match(rt, oif, strict, &mpri, match, do_rr);
725 	}
726 
727 	for (rt = fn->leaf; rt && rt != rr_head; rt = rt->dst.rt6_next) {
728 		if (rt->rt6i_metric != metric) {
729 			cont = rt;
730 			break;
731 		}
732 
733 		match = find_match(rt, oif, strict, &mpri, match, do_rr);
734 	}
735 
736 	if (match || !cont)
737 		return match;
738 
739 	for (rt = cont; rt; rt = rt->dst.rt6_next)
740 		match = find_match(rt, oif, strict, &mpri, match, do_rr);
741 
742 	return match;
743 }
744 
745 static struct rt6_info *rt6_select(struct fib6_node *fn, int oif, int strict)
746 {
747 	struct rt6_info *match, *rt0;
748 	struct net *net;
749 	bool do_rr = false;
750 
751 	rt0 = fn->rr_ptr;
752 	if (!rt0)
753 		fn->rr_ptr = rt0 = fn->leaf;
754 
755 	match = find_rr_leaf(fn, rt0, rt0->rt6i_metric, oif, strict,
756 			     &do_rr);
757 
758 	if (do_rr) {
759 		struct rt6_info *next = rt0->dst.rt6_next;
760 
761 		/* no entries matched; do round-robin */
762 		if (!next || next->rt6i_metric != rt0->rt6i_metric)
763 			next = fn->leaf;
764 
765 		if (next != rt0)
766 			fn->rr_ptr = next;
767 	}
768 
769 	net = dev_net(rt0->dst.dev);
770 	return match ? match : net->ipv6.ip6_null_entry;
771 }
772 
773 static bool rt6_is_gw_or_nonexthop(const struct rt6_info *rt)
774 {
775 	return (rt->rt6i_flags & (RTF_NONEXTHOP | RTF_GATEWAY));
776 }
777 
778 #ifdef CONFIG_IPV6_ROUTE_INFO
779 int rt6_route_rcv(struct net_device *dev, u8 *opt, int len,
780 		  const struct in6_addr *gwaddr)
781 {
782 	struct net *net = dev_net(dev);
783 	struct route_info *rinfo = (struct route_info *) opt;
784 	struct in6_addr prefix_buf, *prefix;
785 	unsigned int pref;
786 	unsigned long lifetime;
787 	struct rt6_info *rt;
788 
789 	if (len < sizeof(struct route_info)) {
790 		return -EINVAL;
791 	}
792 
793 	/* Sanity check for prefix_len and length */
794 	if (rinfo->length > 3) {
795 		return -EINVAL;
796 	} else if (rinfo->prefix_len > 128) {
797 		return -EINVAL;
798 	} else if (rinfo->prefix_len > 64) {
799 		if (rinfo->length < 2) {
800 			return -EINVAL;
801 		}
802 	} else if (rinfo->prefix_len > 0) {
803 		if (rinfo->length < 1) {
804 			return -EINVAL;
805 		}
806 	}
807 
808 	pref = rinfo->route_pref;
809 	if (pref == ICMPV6_ROUTER_PREF_INVALID)
810 		return -EINVAL;
811 
812 	lifetime = addrconf_timeout_fixup(ntohl(rinfo->lifetime), HZ);
813 
814 	if (rinfo->length == 3)
815 		prefix = (struct in6_addr *)rinfo->prefix;
816 	else {
817 		/* this function is safe */
818 		ipv6_addr_prefix(&prefix_buf,
819 				 (struct in6_addr *)rinfo->prefix,
820 				 rinfo->prefix_len);
821 		prefix = &prefix_buf;
822 	}
823 
824 	if (rinfo->prefix_len == 0)
825 		rt = rt6_get_dflt_router(gwaddr, dev);
826 	else
827 		rt = rt6_get_route_info(net, prefix, rinfo->prefix_len,
828 					gwaddr, dev);
829 
830 	if (rt && !lifetime) {
831 		ip6_del_rt(rt);
832 		rt = NULL;
833 	}
834 
835 	if (!rt && lifetime)
836 		rt = rt6_add_route_info(net, prefix, rinfo->prefix_len, gwaddr,
837 					dev, pref);
838 	else if (rt)
839 		rt->rt6i_flags = RTF_ROUTEINFO |
840 				 (rt->rt6i_flags & ~RTF_PREF_MASK) | RTF_PREF(pref);
841 
842 	if (rt) {
843 		if (!addrconf_finite_timeout(lifetime))
844 			rt6_clean_expires(rt);
845 		else
846 			rt6_set_expires(rt, jiffies + HZ * lifetime);
847 
848 		ip6_rt_put(rt);
849 	}
850 	return 0;
851 }
852 #endif
853 
854 static struct fib6_node* fib6_backtrack(struct fib6_node *fn,
855 					struct in6_addr *saddr)
856 {
857 	struct fib6_node *pn;
858 	while (1) {
859 		if (fn->fn_flags & RTN_TL_ROOT)
860 			return NULL;
861 		pn = fn->parent;
862 		if (FIB6_SUBTREE(pn) && FIB6_SUBTREE(pn) != fn)
863 			fn = fib6_lookup(FIB6_SUBTREE(pn), NULL, saddr);
864 		else
865 			fn = pn;
866 		if (fn->fn_flags & RTN_RTINFO)
867 			return fn;
868 	}
869 }
870 
871 static struct rt6_info *ip6_pol_route_lookup(struct net *net,
872 					     struct fib6_table *table,
873 					     struct flowi6 *fl6, int flags)
874 {
875 	struct fib6_node *fn;
876 	struct rt6_info *rt;
877 
878 	read_lock_bh(&table->tb6_lock);
879 	fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
880 restart:
881 	rt = fn->leaf;
882 	rt = rt6_device_match(net, rt, &fl6->saddr, fl6->flowi6_oif, flags);
883 	if (rt->rt6i_nsiblings && fl6->flowi6_oif == 0)
884 		rt = rt6_multipath_select(rt, fl6, fl6->flowi6_oif, flags);
885 	if (rt == net->ipv6.ip6_null_entry) {
886 		fn = fib6_backtrack(fn, &fl6->saddr);
887 		if (fn)
888 			goto restart;
889 	}
890 	dst_use(&rt->dst, jiffies);
891 	read_unlock_bh(&table->tb6_lock);
892 
893 	trace_fib6_table_lookup(net, rt, table->tb6_id, fl6);
894 
895 	return rt;
896 
897 }
898 
899 struct dst_entry *ip6_route_lookup(struct net *net, struct flowi6 *fl6,
900 				    int flags)
901 {
902 	return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_lookup);
903 }
904 EXPORT_SYMBOL_GPL(ip6_route_lookup);
905 
906 struct rt6_info *rt6_lookup(struct net *net, const struct in6_addr *daddr,
907 			    const struct in6_addr *saddr, int oif, int strict)
908 {
909 	struct flowi6 fl6 = {
910 		.flowi6_oif = oif,
911 		.daddr = *daddr,
912 	};
913 	struct dst_entry *dst;
914 	int flags = strict ? RT6_LOOKUP_F_IFACE : 0;
915 
916 	if (saddr) {
917 		memcpy(&fl6.saddr, saddr, sizeof(*saddr));
918 		flags |= RT6_LOOKUP_F_HAS_SADDR;
919 	}
920 
921 	dst = fib6_rule_lookup(net, &fl6, flags, ip6_pol_route_lookup);
922 	if (dst->error == 0)
923 		return (struct rt6_info *) dst;
924 
925 	dst_release(dst);
926 
927 	return NULL;
928 }
929 EXPORT_SYMBOL(rt6_lookup);
930 
931 /* ip6_ins_rt is called with FREE table->tb6_lock.
932  * It takes new route entry, the addition fails by any reason the
933  * route is released.
934  * Caller must hold dst before calling it.
935  */
936 
937 static int __ip6_ins_rt(struct rt6_info *rt, struct nl_info *info,
938 			struct mx6_config *mxc,
939 			struct netlink_ext_ack *extack)
940 {
941 	int err;
942 	struct fib6_table *table;
943 
944 	table = rt->rt6i_table;
945 	write_lock_bh(&table->tb6_lock);
946 	err = fib6_add(&table->tb6_root, rt, info, mxc, extack);
947 	write_unlock_bh(&table->tb6_lock);
948 
949 	return err;
950 }
951 
952 int ip6_ins_rt(struct rt6_info *rt)
953 {
954 	struct nl_info info = {	.nl_net = dev_net(rt->dst.dev), };
955 	struct mx6_config mxc = { .mx = NULL, };
956 
957 	/* Hold dst to account for the reference from the fib6 tree */
958 	dst_hold(&rt->dst);
959 	return __ip6_ins_rt(rt, &info, &mxc, NULL);
960 }
961 
962 static struct rt6_info *ip6_rt_cache_alloc(struct rt6_info *ort,
963 					   const struct in6_addr *daddr,
964 					   const struct in6_addr *saddr)
965 {
966 	struct rt6_info *rt;
967 
968 	/*
969 	 *	Clone the route.
970 	 */
971 
972 	if (ort->rt6i_flags & (RTF_CACHE | RTF_PCPU))
973 		ort = (struct rt6_info *)ort->dst.from;
974 
975 	rt = __ip6_dst_alloc(dev_net(ort->dst.dev), ort->dst.dev, 0);
976 
977 	if (!rt)
978 		return NULL;
979 
980 	ip6_rt_copy_init(rt, ort);
981 	rt->rt6i_flags |= RTF_CACHE;
982 	rt->rt6i_metric = 0;
983 	rt->dst.flags |= DST_HOST;
984 	rt->rt6i_dst.addr = *daddr;
985 	rt->rt6i_dst.plen = 128;
986 
987 	if (!rt6_is_gw_or_nonexthop(ort)) {
988 		if (ort->rt6i_dst.plen != 128 &&
989 		    ipv6_addr_equal(&ort->rt6i_dst.addr, daddr))
990 			rt->rt6i_flags |= RTF_ANYCAST;
991 #ifdef CONFIG_IPV6_SUBTREES
992 		if (rt->rt6i_src.plen && saddr) {
993 			rt->rt6i_src.addr = *saddr;
994 			rt->rt6i_src.plen = 128;
995 		}
996 #endif
997 	}
998 
999 	return rt;
1000 }
1001 
1002 static struct rt6_info *ip6_rt_pcpu_alloc(struct rt6_info *rt)
1003 {
1004 	struct rt6_info *pcpu_rt;
1005 
1006 	pcpu_rt = __ip6_dst_alloc(dev_net(rt->dst.dev),
1007 				  rt->dst.dev, rt->dst.flags);
1008 
1009 	if (!pcpu_rt)
1010 		return NULL;
1011 	ip6_rt_copy_init(pcpu_rt, rt);
1012 	pcpu_rt->rt6i_protocol = rt->rt6i_protocol;
1013 	pcpu_rt->rt6i_flags |= RTF_PCPU;
1014 	return pcpu_rt;
1015 }
1016 
1017 /* It should be called with read_lock_bh(&tb6_lock) acquired */
1018 static struct rt6_info *rt6_get_pcpu_route(struct rt6_info *rt)
1019 {
1020 	struct rt6_info *pcpu_rt, **p;
1021 
1022 	p = this_cpu_ptr(rt->rt6i_pcpu);
1023 	pcpu_rt = *p;
1024 
1025 	if (pcpu_rt) {
1026 		dst_hold(&pcpu_rt->dst);
1027 		rt6_dst_from_metrics_check(pcpu_rt);
1028 	}
1029 	return pcpu_rt;
1030 }
1031 
1032 static struct rt6_info *rt6_make_pcpu_route(struct rt6_info *rt)
1033 {
1034 	struct fib6_table *table = rt->rt6i_table;
1035 	struct rt6_info *pcpu_rt, *prev, **p;
1036 
1037 	pcpu_rt = ip6_rt_pcpu_alloc(rt);
1038 	if (!pcpu_rt) {
1039 		struct net *net = dev_net(rt->dst.dev);
1040 
1041 		dst_hold(&net->ipv6.ip6_null_entry->dst);
1042 		return net->ipv6.ip6_null_entry;
1043 	}
1044 
1045 	read_lock_bh(&table->tb6_lock);
1046 	if (rt->rt6i_pcpu) {
1047 		p = this_cpu_ptr(rt->rt6i_pcpu);
1048 		prev = cmpxchg(p, NULL, pcpu_rt);
1049 		if (prev) {
1050 			/* If someone did it before us, return prev instead */
1051 			dst_release_immediate(&pcpu_rt->dst);
1052 			pcpu_rt = prev;
1053 		}
1054 	} else {
1055 		/* rt has been removed from the fib6 tree
1056 		 * before we have a chance to acquire the read_lock.
1057 		 * In this case, don't brother to create a pcpu rt
1058 		 * since rt is going away anyway.  The next
1059 		 * dst_check() will trigger a re-lookup.
1060 		 */
1061 		dst_release_immediate(&pcpu_rt->dst);
1062 		pcpu_rt = rt;
1063 	}
1064 	dst_hold(&pcpu_rt->dst);
1065 	rt6_dst_from_metrics_check(pcpu_rt);
1066 	read_unlock_bh(&table->tb6_lock);
1067 	return pcpu_rt;
1068 }
1069 
1070 struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table,
1071 			       int oif, struct flowi6 *fl6, int flags)
1072 {
1073 	struct fib6_node *fn, *saved_fn;
1074 	struct rt6_info *rt;
1075 	int strict = 0;
1076 
1077 	strict |= flags & RT6_LOOKUP_F_IFACE;
1078 	strict |= flags & RT6_LOOKUP_F_IGNORE_LINKSTATE;
1079 	if (net->ipv6.devconf_all->forwarding == 0)
1080 		strict |= RT6_LOOKUP_F_REACHABLE;
1081 
1082 	read_lock_bh(&table->tb6_lock);
1083 
1084 	fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
1085 	saved_fn = fn;
1086 
1087 	if (fl6->flowi6_flags & FLOWI_FLAG_SKIP_NH_OIF)
1088 		oif = 0;
1089 
1090 redo_rt6_select:
1091 	rt = rt6_select(fn, oif, strict);
1092 	if (rt->rt6i_nsiblings)
1093 		rt = rt6_multipath_select(rt, fl6, oif, strict);
1094 	if (rt == net->ipv6.ip6_null_entry) {
1095 		fn = fib6_backtrack(fn, &fl6->saddr);
1096 		if (fn)
1097 			goto redo_rt6_select;
1098 		else if (strict & RT6_LOOKUP_F_REACHABLE) {
1099 			/* also consider unreachable route */
1100 			strict &= ~RT6_LOOKUP_F_REACHABLE;
1101 			fn = saved_fn;
1102 			goto redo_rt6_select;
1103 		}
1104 	}
1105 
1106 
1107 	if (rt == net->ipv6.ip6_null_entry || (rt->rt6i_flags & RTF_CACHE)) {
1108 		dst_use(&rt->dst, jiffies);
1109 		read_unlock_bh(&table->tb6_lock);
1110 
1111 		rt6_dst_from_metrics_check(rt);
1112 
1113 		trace_fib6_table_lookup(net, rt, table->tb6_id, fl6);
1114 		return rt;
1115 	} else if (unlikely((fl6->flowi6_flags & FLOWI_FLAG_KNOWN_NH) &&
1116 			    !(rt->rt6i_flags & RTF_GATEWAY))) {
1117 		/* Create a RTF_CACHE clone which will not be
1118 		 * owned by the fib6 tree.  It is for the special case where
1119 		 * the daddr in the skb during the neighbor look-up is different
1120 		 * from the fl6->daddr used to look-up route here.
1121 		 */
1122 
1123 		struct rt6_info *uncached_rt;
1124 
1125 		dst_use(&rt->dst, jiffies);
1126 		read_unlock_bh(&table->tb6_lock);
1127 
1128 		uncached_rt = ip6_rt_cache_alloc(rt, &fl6->daddr, NULL);
1129 		dst_release(&rt->dst);
1130 
1131 		if (uncached_rt) {
1132 			/* Uncached_rt's refcnt is taken during ip6_rt_cache_alloc()
1133 			 * No need for another dst_hold()
1134 			 */
1135 			rt6_uncached_list_add(uncached_rt);
1136 		} else {
1137 			uncached_rt = net->ipv6.ip6_null_entry;
1138 			dst_hold(&uncached_rt->dst);
1139 		}
1140 
1141 		trace_fib6_table_lookup(net, uncached_rt, table->tb6_id, fl6);
1142 		return uncached_rt;
1143 
1144 	} else {
1145 		/* Get a percpu copy */
1146 
1147 		struct rt6_info *pcpu_rt;
1148 
1149 		rt->dst.lastuse = jiffies;
1150 		rt->dst.__use++;
1151 		pcpu_rt = rt6_get_pcpu_route(rt);
1152 
1153 		if (pcpu_rt) {
1154 			read_unlock_bh(&table->tb6_lock);
1155 		} else {
1156 			/* We have to do the read_unlock first
1157 			 * because rt6_make_pcpu_route() may trigger
1158 			 * ip6_dst_gc() which will take the write_lock.
1159 			 */
1160 			dst_hold(&rt->dst);
1161 			read_unlock_bh(&table->tb6_lock);
1162 			pcpu_rt = rt6_make_pcpu_route(rt);
1163 			dst_release(&rt->dst);
1164 		}
1165 
1166 		trace_fib6_table_lookup(net, pcpu_rt, table->tb6_id, fl6);
1167 		return pcpu_rt;
1168 
1169 	}
1170 }
1171 EXPORT_SYMBOL_GPL(ip6_pol_route);
1172 
1173 static struct rt6_info *ip6_pol_route_input(struct net *net, struct fib6_table *table,
1174 					    struct flowi6 *fl6, int flags)
1175 {
1176 	return ip6_pol_route(net, table, fl6->flowi6_iif, fl6, flags);
1177 }
1178 
1179 struct dst_entry *ip6_route_input_lookup(struct net *net,
1180 					 struct net_device *dev,
1181 					 struct flowi6 *fl6, int flags)
1182 {
1183 	if (rt6_need_strict(&fl6->daddr) && dev->type != ARPHRD_PIMREG)
1184 		flags |= RT6_LOOKUP_F_IFACE;
1185 
1186 	return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_input);
1187 }
1188 EXPORT_SYMBOL_GPL(ip6_route_input_lookup);
1189 
1190 void ip6_route_input(struct sk_buff *skb)
1191 {
1192 	const struct ipv6hdr *iph = ipv6_hdr(skb);
1193 	struct net *net = dev_net(skb->dev);
1194 	int flags = RT6_LOOKUP_F_HAS_SADDR;
1195 	struct ip_tunnel_info *tun_info;
1196 	struct flowi6 fl6 = {
1197 		.flowi6_iif = skb->dev->ifindex,
1198 		.daddr = iph->daddr,
1199 		.saddr = iph->saddr,
1200 		.flowlabel = ip6_flowinfo(iph),
1201 		.flowi6_mark = skb->mark,
1202 		.flowi6_proto = iph->nexthdr,
1203 	};
1204 
1205 	tun_info = skb_tunnel_info(skb);
1206 	if (tun_info && !(tun_info->mode & IP_TUNNEL_INFO_TX))
1207 		fl6.flowi6_tun_key.tun_id = tun_info->key.tun_id;
1208 	skb_dst_drop(skb);
1209 	skb_dst_set(skb, ip6_route_input_lookup(net, skb->dev, &fl6, flags));
1210 }
1211 
1212 static struct rt6_info *ip6_pol_route_output(struct net *net, struct fib6_table *table,
1213 					     struct flowi6 *fl6, int flags)
1214 {
1215 	return ip6_pol_route(net, table, fl6->flowi6_oif, fl6, flags);
1216 }
1217 
1218 struct dst_entry *ip6_route_output_flags(struct net *net, const struct sock *sk,
1219 					 struct flowi6 *fl6, int flags)
1220 {
1221 	bool any_src;
1222 
1223 	if (rt6_need_strict(&fl6->daddr)) {
1224 		struct dst_entry *dst;
1225 
1226 		dst = l3mdev_link_scope_lookup(net, fl6);
1227 		if (dst)
1228 			return dst;
1229 	}
1230 
1231 	fl6->flowi6_iif = LOOPBACK_IFINDEX;
1232 
1233 	any_src = ipv6_addr_any(&fl6->saddr);
1234 	if ((sk && sk->sk_bound_dev_if) || rt6_need_strict(&fl6->daddr) ||
1235 	    (fl6->flowi6_oif && any_src))
1236 		flags |= RT6_LOOKUP_F_IFACE;
1237 
1238 	if (!any_src)
1239 		flags |= RT6_LOOKUP_F_HAS_SADDR;
1240 	else if (sk)
1241 		flags |= rt6_srcprefs2flags(inet6_sk(sk)->srcprefs);
1242 
1243 	return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_output);
1244 }
1245 EXPORT_SYMBOL_GPL(ip6_route_output_flags);
1246 
1247 struct dst_entry *ip6_blackhole_route(struct net *net, struct dst_entry *dst_orig)
1248 {
1249 	struct rt6_info *rt, *ort = (struct rt6_info *) dst_orig;
1250 	struct net_device *loopback_dev = net->loopback_dev;
1251 	struct dst_entry *new = NULL;
1252 
1253 	rt = dst_alloc(&ip6_dst_blackhole_ops, loopback_dev, 1,
1254 		       DST_OBSOLETE_NONE, 0);
1255 	if (rt) {
1256 		rt6_info_init(rt);
1257 
1258 		new = &rt->dst;
1259 		new->__use = 1;
1260 		new->input = dst_discard;
1261 		new->output = dst_discard_out;
1262 
1263 		dst_copy_metrics(new, &ort->dst);
1264 
1265 		rt->rt6i_idev = in6_dev_get(loopback_dev);
1266 		rt->rt6i_gateway = ort->rt6i_gateway;
1267 		rt->rt6i_flags = ort->rt6i_flags & ~RTF_PCPU;
1268 		rt->rt6i_metric = 0;
1269 
1270 		memcpy(&rt->rt6i_dst, &ort->rt6i_dst, sizeof(struct rt6key));
1271 #ifdef CONFIG_IPV6_SUBTREES
1272 		memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
1273 #endif
1274 	}
1275 
1276 	dst_release(dst_orig);
1277 	return new ? new : ERR_PTR(-ENOMEM);
1278 }
1279 
1280 /*
1281  *	Destination cache support functions
1282  */
1283 
1284 static void rt6_dst_from_metrics_check(struct rt6_info *rt)
1285 {
1286 	if (rt->dst.from &&
1287 	    dst_metrics_ptr(&rt->dst) != dst_metrics_ptr(rt->dst.from))
1288 		dst_init_metrics(&rt->dst, dst_metrics_ptr(rt->dst.from), true);
1289 }
1290 
1291 static struct dst_entry *rt6_check(struct rt6_info *rt, u32 cookie)
1292 {
1293 	u32 rt_cookie = 0;
1294 
1295 	if (!rt6_get_cookie_safe(rt, &rt_cookie) || rt_cookie != cookie)
1296 		return NULL;
1297 
1298 	if (rt6_check_expired(rt))
1299 		return NULL;
1300 
1301 	return &rt->dst;
1302 }
1303 
1304 static struct dst_entry *rt6_dst_from_check(struct rt6_info *rt, u32 cookie)
1305 {
1306 	if (!__rt6_check_expired(rt) &&
1307 	    rt->dst.obsolete == DST_OBSOLETE_FORCE_CHK &&
1308 	    rt6_check((struct rt6_info *)(rt->dst.from), cookie))
1309 		return &rt->dst;
1310 	else
1311 		return NULL;
1312 }
1313 
1314 static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie)
1315 {
1316 	struct rt6_info *rt;
1317 
1318 	rt = (struct rt6_info *) dst;
1319 
1320 	/* All IPV6 dsts are created with ->obsolete set to the value
1321 	 * DST_OBSOLETE_FORCE_CHK which forces validation calls down
1322 	 * into this function always.
1323 	 */
1324 
1325 	rt6_dst_from_metrics_check(rt);
1326 
1327 	if (rt->rt6i_flags & RTF_PCPU ||
1328 	    (unlikely(!list_empty(&rt->rt6i_uncached)) && rt->dst.from))
1329 		return rt6_dst_from_check(rt, cookie);
1330 	else
1331 		return rt6_check(rt, cookie);
1332 }
1333 
1334 static struct dst_entry *ip6_negative_advice(struct dst_entry *dst)
1335 {
1336 	struct rt6_info *rt = (struct rt6_info *) dst;
1337 
1338 	if (rt) {
1339 		if (rt->rt6i_flags & RTF_CACHE) {
1340 			if (rt6_check_expired(rt)) {
1341 				ip6_del_rt(rt);
1342 				dst = NULL;
1343 			}
1344 		} else {
1345 			dst_release(dst);
1346 			dst = NULL;
1347 		}
1348 	}
1349 	return dst;
1350 }
1351 
1352 static void ip6_link_failure(struct sk_buff *skb)
1353 {
1354 	struct rt6_info *rt;
1355 
1356 	icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_ADDR_UNREACH, 0);
1357 
1358 	rt = (struct rt6_info *) skb_dst(skb);
1359 	if (rt) {
1360 		if (rt->rt6i_flags & RTF_CACHE) {
1361 			if (dst_hold_safe(&rt->dst))
1362 				ip6_del_rt(rt);
1363 		} else {
1364 			struct fib6_node *fn;
1365 
1366 			rcu_read_lock();
1367 			fn = rcu_dereference(rt->rt6i_node);
1368 			if (fn && (rt->rt6i_flags & RTF_DEFAULT))
1369 				fn->fn_sernum = -1;
1370 			rcu_read_unlock();
1371 		}
1372 	}
1373 }
1374 
1375 static void rt6_do_update_pmtu(struct rt6_info *rt, u32 mtu)
1376 {
1377 	struct net *net = dev_net(rt->dst.dev);
1378 
1379 	rt->rt6i_flags |= RTF_MODIFIED;
1380 	rt->rt6i_pmtu = mtu;
1381 	rt6_update_expires(rt, net->ipv6.sysctl.ip6_rt_mtu_expires);
1382 }
1383 
1384 static bool rt6_cache_allowed_for_pmtu(const struct rt6_info *rt)
1385 {
1386 	return !(rt->rt6i_flags & RTF_CACHE) &&
1387 		(rt->rt6i_flags & RTF_PCPU ||
1388 		 rcu_access_pointer(rt->rt6i_node));
1389 }
1390 
1391 static void __ip6_rt_update_pmtu(struct dst_entry *dst, const struct sock *sk,
1392 				 const struct ipv6hdr *iph, u32 mtu)
1393 {
1394 	const struct in6_addr *daddr, *saddr;
1395 	struct rt6_info *rt6 = (struct rt6_info *)dst;
1396 
1397 	if (rt6->rt6i_flags & RTF_LOCAL)
1398 		return;
1399 
1400 	if (dst_metric_locked(dst, RTAX_MTU))
1401 		return;
1402 
1403 	if (iph) {
1404 		daddr = &iph->daddr;
1405 		saddr = &iph->saddr;
1406 	} else if (sk) {
1407 		daddr = &sk->sk_v6_daddr;
1408 		saddr = &inet6_sk(sk)->saddr;
1409 	} else {
1410 		daddr = NULL;
1411 		saddr = NULL;
1412 	}
1413 	dst_confirm_neigh(dst, daddr);
1414 	mtu = max_t(u32, mtu, IPV6_MIN_MTU);
1415 	if (mtu >= dst_mtu(dst))
1416 		return;
1417 
1418 	if (!rt6_cache_allowed_for_pmtu(rt6)) {
1419 		rt6_do_update_pmtu(rt6, mtu);
1420 	} else if (daddr) {
1421 		struct rt6_info *nrt6;
1422 
1423 		nrt6 = ip6_rt_cache_alloc(rt6, daddr, saddr);
1424 		if (nrt6) {
1425 			rt6_do_update_pmtu(nrt6, mtu);
1426 
1427 			/* ip6_ins_rt(nrt6) will bump the
1428 			 * rt6->rt6i_node->fn_sernum
1429 			 * which will fail the next rt6_check() and
1430 			 * invalidate the sk->sk_dst_cache.
1431 			 */
1432 			ip6_ins_rt(nrt6);
1433 			/* Release the reference taken in
1434 			 * ip6_rt_cache_alloc()
1435 			 */
1436 			dst_release(&nrt6->dst);
1437 		}
1438 	}
1439 }
1440 
1441 static void ip6_rt_update_pmtu(struct dst_entry *dst, struct sock *sk,
1442 			       struct sk_buff *skb, u32 mtu)
1443 {
1444 	__ip6_rt_update_pmtu(dst, sk, skb ? ipv6_hdr(skb) : NULL, mtu);
1445 }
1446 
1447 void ip6_update_pmtu(struct sk_buff *skb, struct net *net, __be32 mtu,
1448 		     int oif, u32 mark, kuid_t uid)
1449 {
1450 	const struct ipv6hdr *iph = (struct ipv6hdr *) skb->data;
1451 	struct dst_entry *dst;
1452 	struct flowi6 fl6;
1453 
1454 	memset(&fl6, 0, sizeof(fl6));
1455 	fl6.flowi6_oif = oif;
1456 	fl6.flowi6_mark = mark ? mark : IP6_REPLY_MARK(net, skb->mark);
1457 	fl6.daddr = iph->daddr;
1458 	fl6.saddr = iph->saddr;
1459 	fl6.flowlabel = ip6_flowinfo(iph);
1460 	fl6.flowi6_uid = uid;
1461 
1462 	dst = ip6_route_output(net, NULL, &fl6);
1463 	if (!dst->error)
1464 		__ip6_rt_update_pmtu(dst, NULL, iph, ntohl(mtu));
1465 	dst_release(dst);
1466 }
1467 EXPORT_SYMBOL_GPL(ip6_update_pmtu);
1468 
1469 void ip6_sk_update_pmtu(struct sk_buff *skb, struct sock *sk, __be32 mtu)
1470 {
1471 	struct dst_entry *dst;
1472 
1473 	ip6_update_pmtu(skb, sock_net(sk), mtu,
1474 			sk->sk_bound_dev_if, sk->sk_mark, sk->sk_uid);
1475 
1476 	dst = __sk_dst_get(sk);
1477 	if (!dst || !dst->obsolete ||
1478 	    dst->ops->check(dst, inet6_sk(sk)->dst_cookie))
1479 		return;
1480 
1481 	bh_lock_sock(sk);
1482 	if (!sock_owned_by_user(sk) && !ipv6_addr_v4mapped(&sk->sk_v6_daddr))
1483 		ip6_datagram_dst_update(sk, false);
1484 	bh_unlock_sock(sk);
1485 }
1486 EXPORT_SYMBOL_GPL(ip6_sk_update_pmtu);
1487 
1488 /* Handle redirects */
1489 struct ip6rd_flowi {
1490 	struct flowi6 fl6;
1491 	struct in6_addr gateway;
1492 };
1493 
1494 static struct rt6_info *__ip6_route_redirect(struct net *net,
1495 					     struct fib6_table *table,
1496 					     struct flowi6 *fl6,
1497 					     int flags)
1498 {
1499 	struct ip6rd_flowi *rdfl = (struct ip6rd_flowi *)fl6;
1500 	struct rt6_info *rt;
1501 	struct fib6_node *fn;
1502 
1503 	/* Get the "current" route for this destination and
1504 	 * check if the redirect has come from appropriate router.
1505 	 *
1506 	 * RFC 4861 specifies that redirects should only be
1507 	 * accepted if they come from the nexthop to the target.
1508 	 * Due to the way the routes are chosen, this notion
1509 	 * is a bit fuzzy and one might need to check all possible
1510 	 * routes.
1511 	 */
1512 
1513 	read_lock_bh(&table->tb6_lock);
1514 	fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
1515 restart:
1516 	for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
1517 		if (rt6_check_expired(rt))
1518 			continue;
1519 		if (rt->dst.error)
1520 			break;
1521 		if (!(rt->rt6i_flags & RTF_GATEWAY))
1522 			continue;
1523 		if (fl6->flowi6_oif != rt->dst.dev->ifindex)
1524 			continue;
1525 		if (!ipv6_addr_equal(&rdfl->gateway, &rt->rt6i_gateway))
1526 			continue;
1527 		break;
1528 	}
1529 
1530 	if (!rt)
1531 		rt = net->ipv6.ip6_null_entry;
1532 	else if (rt->dst.error) {
1533 		rt = net->ipv6.ip6_null_entry;
1534 		goto out;
1535 	}
1536 
1537 	if (rt == net->ipv6.ip6_null_entry) {
1538 		fn = fib6_backtrack(fn, &fl6->saddr);
1539 		if (fn)
1540 			goto restart;
1541 	}
1542 
1543 out:
1544 	dst_hold(&rt->dst);
1545 
1546 	read_unlock_bh(&table->tb6_lock);
1547 
1548 	trace_fib6_table_lookup(net, rt, table->tb6_id, fl6);
1549 	return rt;
1550 };
1551 
1552 static struct dst_entry *ip6_route_redirect(struct net *net,
1553 					const struct flowi6 *fl6,
1554 					const struct in6_addr *gateway)
1555 {
1556 	int flags = RT6_LOOKUP_F_HAS_SADDR;
1557 	struct ip6rd_flowi rdfl;
1558 
1559 	rdfl.fl6 = *fl6;
1560 	rdfl.gateway = *gateway;
1561 
1562 	return fib6_rule_lookup(net, &rdfl.fl6,
1563 				flags, __ip6_route_redirect);
1564 }
1565 
1566 void ip6_redirect(struct sk_buff *skb, struct net *net, int oif, u32 mark,
1567 		  kuid_t uid)
1568 {
1569 	const struct ipv6hdr *iph = (struct ipv6hdr *) skb->data;
1570 	struct dst_entry *dst;
1571 	struct flowi6 fl6;
1572 
1573 	memset(&fl6, 0, sizeof(fl6));
1574 	fl6.flowi6_iif = LOOPBACK_IFINDEX;
1575 	fl6.flowi6_oif = oif;
1576 	fl6.flowi6_mark = mark;
1577 	fl6.daddr = iph->daddr;
1578 	fl6.saddr = iph->saddr;
1579 	fl6.flowlabel = ip6_flowinfo(iph);
1580 	fl6.flowi6_uid = uid;
1581 
1582 	dst = ip6_route_redirect(net, &fl6, &ipv6_hdr(skb)->saddr);
1583 	rt6_do_redirect(dst, NULL, skb);
1584 	dst_release(dst);
1585 }
1586 EXPORT_SYMBOL_GPL(ip6_redirect);
1587 
1588 void ip6_redirect_no_header(struct sk_buff *skb, struct net *net, int oif,
1589 			    u32 mark)
1590 {
1591 	const struct ipv6hdr *iph = ipv6_hdr(skb);
1592 	const struct rd_msg *msg = (struct rd_msg *)icmp6_hdr(skb);
1593 	struct dst_entry *dst;
1594 	struct flowi6 fl6;
1595 
1596 	memset(&fl6, 0, sizeof(fl6));
1597 	fl6.flowi6_iif = LOOPBACK_IFINDEX;
1598 	fl6.flowi6_oif = oif;
1599 	fl6.flowi6_mark = mark;
1600 	fl6.daddr = msg->dest;
1601 	fl6.saddr = iph->daddr;
1602 	fl6.flowi6_uid = sock_net_uid(net, NULL);
1603 
1604 	dst = ip6_route_redirect(net, &fl6, &iph->saddr);
1605 	rt6_do_redirect(dst, NULL, skb);
1606 	dst_release(dst);
1607 }
1608 
1609 void ip6_sk_redirect(struct sk_buff *skb, struct sock *sk)
1610 {
1611 	ip6_redirect(skb, sock_net(sk), sk->sk_bound_dev_if, sk->sk_mark,
1612 		     sk->sk_uid);
1613 }
1614 EXPORT_SYMBOL_GPL(ip6_sk_redirect);
1615 
1616 static unsigned int ip6_default_advmss(const struct dst_entry *dst)
1617 {
1618 	struct net_device *dev = dst->dev;
1619 	unsigned int mtu = dst_mtu(dst);
1620 	struct net *net = dev_net(dev);
1621 
1622 	mtu -= sizeof(struct ipv6hdr) + sizeof(struct tcphdr);
1623 
1624 	if (mtu < net->ipv6.sysctl.ip6_rt_min_advmss)
1625 		mtu = net->ipv6.sysctl.ip6_rt_min_advmss;
1626 
1627 	/*
1628 	 * Maximal non-jumbo IPv6 payload is IPV6_MAXPLEN and
1629 	 * corresponding MSS is IPV6_MAXPLEN - tcp_header_size.
1630 	 * IPV6_MAXPLEN is also valid and means: "any MSS,
1631 	 * rely only on pmtu discovery"
1632 	 */
1633 	if (mtu > IPV6_MAXPLEN - sizeof(struct tcphdr))
1634 		mtu = IPV6_MAXPLEN;
1635 	return mtu;
1636 }
1637 
1638 static unsigned int ip6_mtu(const struct dst_entry *dst)
1639 {
1640 	const struct rt6_info *rt = (const struct rt6_info *)dst;
1641 	unsigned int mtu = rt->rt6i_pmtu;
1642 	struct inet6_dev *idev;
1643 
1644 	if (mtu)
1645 		goto out;
1646 
1647 	mtu = dst_metric_raw(dst, RTAX_MTU);
1648 	if (mtu)
1649 		goto out;
1650 
1651 	mtu = IPV6_MIN_MTU;
1652 
1653 	rcu_read_lock();
1654 	idev = __in6_dev_get(dst->dev);
1655 	if (idev)
1656 		mtu = idev->cnf.mtu6;
1657 	rcu_read_unlock();
1658 
1659 out:
1660 	mtu = min_t(unsigned int, mtu, IP6_MAX_MTU);
1661 
1662 	return mtu - lwtunnel_headroom(dst->lwtstate, mtu);
1663 }
1664 
1665 struct dst_entry *icmp6_dst_alloc(struct net_device *dev,
1666 				  struct flowi6 *fl6)
1667 {
1668 	struct dst_entry *dst;
1669 	struct rt6_info *rt;
1670 	struct inet6_dev *idev = in6_dev_get(dev);
1671 	struct net *net = dev_net(dev);
1672 
1673 	if (unlikely(!idev))
1674 		return ERR_PTR(-ENODEV);
1675 
1676 	rt = ip6_dst_alloc(net, dev, 0);
1677 	if (unlikely(!rt)) {
1678 		in6_dev_put(idev);
1679 		dst = ERR_PTR(-ENOMEM);
1680 		goto out;
1681 	}
1682 
1683 	rt->dst.flags |= DST_HOST;
1684 	rt->dst.output  = ip6_output;
1685 	rt->rt6i_gateway  = fl6->daddr;
1686 	rt->rt6i_dst.addr = fl6->daddr;
1687 	rt->rt6i_dst.plen = 128;
1688 	rt->rt6i_idev     = idev;
1689 	dst_metric_set(&rt->dst, RTAX_HOPLIMIT, 0);
1690 
1691 	/* Add this dst into uncached_list so that rt6_ifdown() can
1692 	 * do proper release of the net_device
1693 	 */
1694 	rt6_uncached_list_add(rt);
1695 
1696 	dst = xfrm_lookup(net, &rt->dst, flowi6_to_flowi(fl6), NULL, 0);
1697 
1698 out:
1699 	return dst;
1700 }
1701 
1702 static int ip6_dst_gc(struct dst_ops *ops)
1703 {
1704 	struct net *net = container_of(ops, struct net, ipv6.ip6_dst_ops);
1705 	int rt_min_interval = net->ipv6.sysctl.ip6_rt_gc_min_interval;
1706 	int rt_max_size = net->ipv6.sysctl.ip6_rt_max_size;
1707 	int rt_elasticity = net->ipv6.sysctl.ip6_rt_gc_elasticity;
1708 	int rt_gc_timeout = net->ipv6.sysctl.ip6_rt_gc_timeout;
1709 	unsigned long rt_last_gc = net->ipv6.ip6_rt_last_gc;
1710 	int entries;
1711 
1712 	entries = dst_entries_get_fast(ops);
1713 	if (time_after(rt_last_gc + rt_min_interval, jiffies) &&
1714 	    entries <= rt_max_size)
1715 		goto out;
1716 
1717 	net->ipv6.ip6_rt_gc_expire++;
1718 	fib6_run_gc(net->ipv6.ip6_rt_gc_expire, net, true);
1719 	entries = dst_entries_get_slow(ops);
1720 	if (entries < ops->gc_thresh)
1721 		net->ipv6.ip6_rt_gc_expire = rt_gc_timeout>>1;
1722 out:
1723 	net->ipv6.ip6_rt_gc_expire -= net->ipv6.ip6_rt_gc_expire>>rt_elasticity;
1724 	return entries > rt_max_size;
1725 }
1726 
1727 static int ip6_convert_metrics(struct mx6_config *mxc,
1728 			       const struct fib6_config *cfg)
1729 {
1730 	bool ecn_ca = false;
1731 	struct nlattr *nla;
1732 	int remaining;
1733 	u32 *mp;
1734 
1735 	if (!cfg->fc_mx)
1736 		return 0;
1737 
1738 	mp = kzalloc(sizeof(u32) * RTAX_MAX, GFP_KERNEL);
1739 	if (unlikely(!mp))
1740 		return -ENOMEM;
1741 
1742 	nla_for_each_attr(nla, cfg->fc_mx, cfg->fc_mx_len, remaining) {
1743 		int type = nla_type(nla);
1744 		u32 val;
1745 
1746 		if (!type)
1747 			continue;
1748 		if (unlikely(type > RTAX_MAX))
1749 			goto err;
1750 
1751 		if (type == RTAX_CC_ALGO) {
1752 			char tmp[TCP_CA_NAME_MAX];
1753 
1754 			nla_strlcpy(tmp, nla, sizeof(tmp));
1755 			val = tcp_ca_get_key_by_name(tmp, &ecn_ca);
1756 			if (val == TCP_CA_UNSPEC)
1757 				goto err;
1758 		} else {
1759 			val = nla_get_u32(nla);
1760 		}
1761 		if (type == RTAX_HOPLIMIT && val > 255)
1762 			val = 255;
1763 		if (type == RTAX_FEATURES && (val & ~RTAX_FEATURE_MASK))
1764 			goto err;
1765 
1766 		mp[type - 1] = val;
1767 		__set_bit(type - 1, mxc->mx_valid);
1768 	}
1769 
1770 	if (ecn_ca) {
1771 		__set_bit(RTAX_FEATURES - 1, mxc->mx_valid);
1772 		mp[RTAX_FEATURES - 1] |= DST_FEATURE_ECN_CA;
1773 	}
1774 
1775 	mxc->mx = mp;
1776 	return 0;
1777  err:
1778 	kfree(mp);
1779 	return -EINVAL;
1780 }
1781 
1782 static struct rt6_info *ip6_nh_lookup_table(struct net *net,
1783 					    struct fib6_config *cfg,
1784 					    const struct in6_addr *gw_addr)
1785 {
1786 	struct flowi6 fl6 = {
1787 		.flowi6_oif = cfg->fc_ifindex,
1788 		.daddr = *gw_addr,
1789 		.saddr = cfg->fc_prefsrc,
1790 	};
1791 	struct fib6_table *table;
1792 	struct rt6_info *rt;
1793 	int flags = RT6_LOOKUP_F_IFACE | RT6_LOOKUP_F_IGNORE_LINKSTATE;
1794 
1795 	table = fib6_get_table(net, cfg->fc_table);
1796 	if (!table)
1797 		return NULL;
1798 
1799 	if (!ipv6_addr_any(&cfg->fc_prefsrc))
1800 		flags |= RT6_LOOKUP_F_HAS_SADDR;
1801 
1802 	rt = ip6_pol_route(net, table, cfg->fc_ifindex, &fl6, flags);
1803 
1804 	/* if table lookup failed, fall back to full lookup */
1805 	if (rt == net->ipv6.ip6_null_entry) {
1806 		ip6_rt_put(rt);
1807 		rt = NULL;
1808 	}
1809 
1810 	return rt;
1811 }
1812 
1813 static struct rt6_info *ip6_route_info_create(struct fib6_config *cfg,
1814 					      struct netlink_ext_ack *extack)
1815 {
1816 	struct net *net = cfg->fc_nlinfo.nl_net;
1817 	struct rt6_info *rt = NULL;
1818 	struct net_device *dev = NULL;
1819 	struct inet6_dev *idev = NULL;
1820 	struct fib6_table *table;
1821 	int addr_type;
1822 	int err = -EINVAL;
1823 
1824 	/* RTF_PCPU is an internal flag; can not be set by userspace */
1825 	if (cfg->fc_flags & RTF_PCPU) {
1826 		NL_SET_ERR_MSG(extack, "Userspace can not set RTF_PCPU");
1827 		goto out;
1828 	}
1829 
1830 	if (cfg->fc_dst_len > 128) {
1831 		NL_SET_ERR_MSG(extack, "Invalid prefix length");
1832 		goto out;
1833 	}
1834 	if (cfg->fc_src_len > 128) {
1835 		NL_SET_ERR_MSG(extack, "Invalid source address length");
1836 		goto out;
1837 	}
1838 #ifndef CONFIG_IPV6_SUBTREES
1839 	if (cfg->fc_src_len) {
1840 		NL_SET_ERR_MSG(extack,
1841 			       "Specifying source address requires IPV6_SUBTREES to be enabled");
1842 		goto out;
1843 	}
1844 #endif
1845 	if (cfg->fc_ifindex) {
1846 		err = -ENODEV;
1847 		dev = dev_get_by_index(net, cfg->fc_ifindex);
1848 		if (!dev)
1849 			goto out;
1850 		idev = in6_dev_get(dev);
1851 		if (!idev)
1852 			goto out;
1853 	}
1854 
1855 	if (cfg->fc_metric == 0)
1856 		cfg->fc_metric = IP6_RT_PRIO_USER;
1857 
1858 	err = -ENOBUFS;
1859 	if (cfg->fc_nlinfo.nlh &&
1860 	    !(cfg->fc_nlinfo.nlh->nlmsg_flags & NLM_F_CREATE)) {
1861 		table = fib6_get_table(net, cfg->fc_table);
1862 		if (!table) {
1863 			pr_warn("NLM_F_CREATE should be specified when creating new route\n");
1864 			table = fib6_new_table(net, cfg->fc_table);
1865 		}
1866 	} else {
1867 		table = fib6_new_table(net, cfg->fc_table);
1868 	}
1869 
1870 	if (!table)
1871 		goto out;
1872 
1873 	rt = ip6_dst_alloc(net, NULL,
1874 			   (cfg->fc_flags & RTF_ADDRCONF) ? 0 : DST_NOCOUNT);
1875 
1876 	if (!rt) {
1877 		err = -ENOMEM;
1878 		goto out;
1879 	}
1880 
1881 	if (cfg->fc_flags & RTF_EXPIRES)
1882 		rt6_set_expires(rt, jiffies +
1883 				clock_t_to_jiffies(cfg->fc_expires));
1884 	else
1885 		rt6_clean_expires(rt);
1886 
1887 	if (cfg->fc_protocol == RTPROT_UNSPEC)
1888 		cfg->fc_protocol = RTPROT_BOOT;
1889 	rt->rt6i_protocol = cfg->fc_protocol;
1890 
1891 	addr_type = ipv6_addr_type(&cfg->fc_dst);
1892 
1893 	if (addr_type & IPV6_ADDR_MULTICAST)
1894 		rt->dst.input = ip6_mc_input;
1895 	else if (cfg->fc_flags & RTF_LOCAL)
1896 		rt->dst.input = ip6_input;
1897 	else
1898 		rt->dst.input = ip6_forward;
1899 
1900 	rt->dst.output = ip6_output;
1901 
1902 	if (cfg->fc_encap) {
1903 		struct lwtunnel_state *lwtstate;
1904 
1905 		err = lwtunnel_build_state(cfg->fc_encap_type,
1906 					   cfg->fc_encap, AF_INET6, cfg,
1907 					   &lwtstate, extack);
1908 		if (err)
1909 			goto out;
1910 		rt->dst.lwtstate = lwtstate_get(lwtstate);
1911 		if (lwtunnel_output_redirect(rt->dst.lwtstate)) {
1912 			rt->dst.lwtstate->orig_output = rt->dst.output;
1913 			rt->dst.output = lwtunnel_output;
1914 		}
1915 		if (lwtunnel_input_redirect(rt->dst.lwtstate)) {
1916 			rt->dst.lwtstate->orig_input = rt->dst.input;
1917 			rt->dst.input = lwtunnel_input;
1918 		}
1919 	}
1920 
1921 	ipv6_addr_prefix(&rt->rt6i_dst.addr, &cfg->fc_dst, cfg->fc_dst_len);
1922 	rt->rt6i_dst.plen = cfg->fc_dst_len;
1923 	if (rt->rt6i_dst.plen == 128)
1924 		rt->dst.flags |= DST_HOST;
1925 
1926 #ifdef CONFIG_IPV6_SUBTREES
1927 	ipv6_addr_prefix(&rt->rt6i_src.addr, &cfg->fc_src, cfg->fc_src_len);
1928 	rt->rt6i_src.plen = cfg->fc_src_len;
1929 #endif
1930 
1931 	rt->rt6i_metric = cfg->fc_metric;
1932 
1933 	/* We cannot add true routes via loopback here,
1934 	   they would result in kernel looping; promote them to reject routes
1935 	 */
1936 	if ((cfg->fc_flags & RTF_REJECT) ||
1937 	    (dev && (dev->flags & IFF_LOOPBACK) &&
1938 	     !(addr_type & IPV6_ADDR_LOOPBACK) &&
1939 	     !(cfg->fc_flags & RTF_LOCAL))) {
1940 		/* hold loopback dev/idev if we haven't done so. */
1941 		if (dev != net->loopback_dev) {
1942 			if (dev) {
1943 				dev_put(dev);
1944 				in6_dev_put(idev);
1945 			}
1946 			dev = net->loopback_dev;
1947 			dev_hold(dev);
1948 			idev = in6_dev_get(dev);
1949 			if (!idev) {
1950 				err = -ENODEV;
1951 				goto out;
1952 			}
1953 		}
1954 		rt->rt6i_flags = RTF_REJECT|RTF_NONEXTHOP;
1955 		switch (cfg->fc_type) {
1956 		case RTN_BLACKHOLE:
1957 			rt->dst.error = -EINVAL;
1958 			rt->dst.output = dst_discard_out;
1959 			rt->dst.input = dst_discard;
1960 			break;
1961 		case RTN_PROHIBIT:
1962 			rt->dst.error = -EACCES;
1963 			rt->dst.output = ip6_pkt_prohibit_out;
1964 			rt->dst.input = ip6_pkt_prohibit;
1965 			break;
1966 		case RTN_THROW:
1967 		case RTN_UNREACHABLE:
1968 		default:
1969 			rt->dst.error = (cfg->fc_type == RTN_THROW) ? -EAGAIN
1970 					: (cfg->fc_type == RTN_UNREACHABLE)
1971 					? -EHOSTUNREACH : -ENETUNREACH;
1972 			rt->dst.output = ip6_pkt_discard_out;
1973 			rt->dst.input = ip6_pkt_discard;
1974 			break;
1975 		}
1976 		goto install_route;
1977 	}
1978 
1979 	if (cfg->fc_flags & RTF_GATEWAY) {
1980 		const struct in6_addr *gw_addr;
1981 		int gwa_type;
1982 
1983 		gw_addr = &cfg->fc_gateway;
1984 		gwa_type = ipv6_addr_type(gw_addr);
1985 
1986 		/* if gw_addr is local we will fail to detect this in case
1987 		 * address is still TENTATIVE (DAD in progress). rt6_lookup()
1988 		 * will return already-added prefix route via interface that
1989 		 * prefix route was assigned to, which might be non-loopback.
1990 		 */
1991 		err = -EINVAL;
1992 		if (ipv6_chk_addr_and_flags(net, gw_addr,
1993 					    gwa_type & IPV6_ADDR_LINKLOCAL ?
1994 					    dev : NULL, 0, 0)) {
1995 			NL_SET_ERR_MSG(extack, "Invalid gateway address");
1996 			goto out;
1997 		}
1998 		rt->rt6i_gateway = *gw_addr;
1999 
2000 		if (gwa_type != (IPV6_ADDR_LINKLOCAL|IPV6_ADDR_UNICAST)) {
2001 			struct rt6_info *grt = NULL;
2002 
2003 			/* IPv6 strictly inhibits using not link-local
2004 			   addresses as nexthop address.
2005 			   Otherwise, router will not able to send redirects.
2006 			   It is very good, but in some (rare!) circumstances
2007 			   (SIT, PtP, NBMA NOARP links) it is handy to allow
2008 			   some exceptions. --ANK
2009 			   We allow IPv4-mapped nexthops to support RFC4798-type
2010 			   addressing
2011 			 */
2012 			if (!(gwa_type & (IPV6_ADDR_UNICAST |
2013 					  IPV6_ADDR_MAPPED))) {
2014 				NL_SET_ERR_MSG(extack,
2015 					       "Invalid gateway address");
2016 				goto out;
2017 			}
2018 
2019 			if (cfg->fc_table) {
2020 				grt = ip6_nh_lookup_table(net, cfg, gw_addr);
2021 
2022 				if (grt) {
2023 					if (grt->rt6i_flags & RTF_GATEWAY ||
2024 					    (dev && dev != grt->dst.dev)) {
2025 						ip6_rt_put(grt);
2026 						grt = NULL;
2027 					}
2028 				}
2029 			}
2030 
2031 			if (!grt)
2032 				grt = rt6_lookup(net, gw_addr, NULL,
2033 						 cfg->fc_ifindex, 1);
2034 
2035 			err = -EHOSTUNREACH;
2036 			if (!grt)
2037 				goto out;
2038 			if (dev) {
2039 				if (dev != grt->dst.dev) {
2040 					ip6_rt_put(grt);
2041 					goto out;
2042 				}
2043 			} else {
2044 				dev = grt->dst.dev;
2045 				idev = grt->rt6i_idev;
2046 				dev_hold(dev);
2047 				in6_dev_hold(grt->rt6i_idev);
2048 			}
2049 			if (!(grt->rt6i_flags & RTF_GATEWAY))
2050 				err = 0;
2051 			ip6_rt_put(grt);
2052 
2053 			if (err)
2054 				goto out;
2055 		}
2056 		err = -EINVAL;
2057 		if (!dev) {
2058 			NL_SET_ERR_MSG(extack, "Egress device not specified");
2059 			goto out;
2060 		} else if (dev->flags & IFF_LOOPBACK) {
2061 			NL_SET_ERR_MSG(extack,
2062 				       "Egress device can not be loopback device for this route");
2063 			goto out;
2064 		}
2065 	}
2066 
2067 	err = -ENODEV;
2068 	if (!dev)
2069 		goto out;
2070 
2071 	if (!ipv6_addr_any(&cfg->fc_prefsrc)) {
2072 		if (!ipv6_chk_addr(net, &cfg->fc_prefsrc, dev, 0)) {
2073 			NL_SET_ERR_MSG(extack, "Invalid source address");
2074 			err = -EINVAL;
2075 			goto out;
2076 		}
2077 		rt->rt6i_prefsrc.addr = cfg->fc_prefsrc;
2078 		rt->rt6i_prefsrc.plen = 128;
2079 	} else
2080 		rt->rt6i_prefsrc.plen = 0;
2081 
2082 	rt->rt6i_flags = cfg->fc_flags;
2083 
2084 install_route:
2085 	rt->dst.dev = dev;
2086 	rt->rt6i_idev = idev;
2087 	rt->rt6i_table = table;
2088 
2089 	cfg->fc_nlinfo.nl_net = dev_net(dev);
2090 
2091 	return rt;
2092 out:
2093 	if (dev)
2094 		dev_put(dev);
2095 	if (idev)
2096 		in6_dev_put(idev);
2097 	if (rt)
2098 		dst_release_immediate(&rt->dst);
2099 
2100 	return ERR_PTR(err);
2101 }
2102 
2103 int ip6_route_add(struct fib6_config *cfg,
2104 		  struct netlink_ext_ack *extack)
2105 {
2106 	struct mx6_config mxc = { .mx = NULL, };
2107 	struct rt6_info *rt;
2108 	int err;
2109 
2110 	rt = ip6_route_info_create(cfg, extack);
2111 	if (IS_ERR(rt)) {
2112 		err = PTR_ERR(rt);
2113 		rt = NULL;
2114 		goto out;
2115 	}
2116 
2117 	err = ip6_convert_metrics(&mxc, cfg);
2118 	if (err)
2119 		goto out;
2120 
2121 	err = __ip6_ins_rt(rt, &cfg->fc_nlinfo, &mxc, extack);
2122 
2123 	kfree(mxc.mx);
2124 
2125 	return err;
2126 out:
2127 	if (rt)
2128 		dst_release_immediate(&rt->dst);
2129 
2130 	return err;
2131 }
2132 
2133 static int __ip6_del_rt(struct rt6_info *rt, struct nl_info *info)
2134 {
2135 	int err;
2136 	struct fib6_table *table;
2137 	struct net *net = dev_net(rt->dst.dev);
2138 
2139 	if (rt == net->ipv6.ip6_null_entry) {
2140 		err = -ENOENT;
2141 		goto out;
2142 	}
2143 
2144 	table = rt->rt6i_table;
2145 	write_lock_bh(&table->tb6_lock);
2146 	err = fib6_del(rt, info);
2147 	write_unlock_bh(&table->tb6_lock);
2148 
2149 out:
2150 	ip6_rt_put(rt);
2151 	return err;
2152 }
2153 
2154 int ip6_del_rt(struct rt6_info *rt)
2155 {
2156 	struct nl_info info = {
2157 		.nl_net = dev_net(rt->dst.dev),
2158 	};
2159 	return __ip6_del_rt(rt, &info);
2160 }
2161 
2162 static int __ip6_del_rt_siblings(struct rt6_info *rt, struct fib6_config *cfg)
2163 {
2164 	struct nl_info *info = &cfg->fc_nlinfo;
2165 	struct net *net = info->nl_net;
2166 	struct sk_buff *skb = NULL;
2167 	struct fib6_table *table;
2168 	int err = -ENOENT;
2169 
2170 	if (rt == net->ipv6.ip6_null_entry)
2171 		goto out_put;
2172 	table = rt->rt6i_table;
2173 	write_lock_bh(&table->tb6_lock);
2174 
2175 	if (rt->rt6i_nsiblings && cfg->fc_delete_all_nh) {
2176 		struct rt6_info *sibling, *next_sibling;
2177 
2178 		/* prefer to send a single notification with all hops */
2179 		skb = nlmsg_new(rt6_nlmsg_size(rt), gfp_any());
2180 		if (skb) {
2181 			u32 seq = info->nlh ? info->nlh->nlmsg_seq : 0;
2182 
2183 			if (rt6_fill_node(net, skb, rt,
2184 					  NULL, NULL, 0, RTM_DELROUTE,
2185 					  info->portid, seq, 0) < 0) {
2186 				kfree_skb(skb);
2187 				skb = NULL;
2188 			} else
2189 				info->skip_notify = 1;
2190 		}
2191 
2192 		list_for_each_entry_safe(sibling, next_sibling,
2193 					 &rt->rt6i_siblings,
2194 					 rt6i_siblings) {
2195 			err = fib6_del(sibling, info);
2196 			if (err)
2197 				goto out_unlock;
2198 		}
2199 	}
2200 
2201 	err = fib6_del(rt, info);
2202 out_unlock:
2203 	write_unlock_bh(&table->tb6_lock);
2204 out_put:
2205 	ip6_rt_put(rt);
2206 
2207 	if (skb) {
2208 		rtnl_notify(skb, net, info->portid, RTNLGRP_IPV6_ROUTE,
2209 			    info->nlh, gfp_any());
2210 	}
2211 	return err;
2212 }
2213 
2214 static int ip6_route_del(struct fib6_config *cfg,
2215 			 struct netlink_ext_ack *extack)
2216 {
2217 	struct fib6_table *table;
2218 	struct fib6_node *fn;
2219 	struct rt6_info *rt;
2220 	int err = -ESRCH;
2221 
2222 	table = fib6_get_table(cfg->fc_nlinfo.nl_net, cfg->fc_table);
2223 	if (!table) {
2224 		NL_SET_ERR_MSG(extack, "FIB table does not exist");
2225 		return err;
2226 	}
2227 
2228 	read_lock_bh(&table->tb6_lock);
2229 
2230 	fn = fib6_locate(&table->tb6_root,
2231 			 &cfg->fc_dst, cfg->fc_dst_len,
2232 			 &cfg->fc_src, cfg->fc_src_len);
2233 
2234 	if (fn) {
2235 		for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
2236 			if ((rt->rt6i_flags & RTF_CACHE) &&
2237 			    !(cfg->fc_flags & RTF_CACHE))
2238 				continue;
2239 			if (cfg->fc_ifindex &&
2240 			    (!rt->dst.dev ||
2241 			     rt->dst.dev->ifindex != cfg->fc_ifindex))
2242 				continue;
2243 			if (cfg->fc_flags & RTF_GATEWAY &&
2244 			    !ipv6_addr_equal(&cfg->fc_gateway, &rt->rt6i_gateway))
2245 				continue;
2246 			if (cfg->fc_metric && cfg->fc_metric != rt->rt6i_metric)
2247 				continue;
2248 			if (cfg->fc_protocol && cfg->fc_protocol != rt->rt6i_protocol)
2249 				continue;
2250 			dst_hold(&rt->dst);
2251 			read_unlock_bh(&table->tb6_lock);
2252 
2253 			/* if gateway was specified only delete the one hop */
2254 			if (cfg->fc_flags & RTF_GATEWAY)
2255 				return __ip6_del_rt(rt, &cfg->fc_nlinfo);
2256 
2257 			return __ip6_del_rt_siblings(rt, cfg);
2258 		}
2259 	}
2260 	read_unlock_bh(&table->tb6_lock);
2261 
2262 	return err;
2263 }
2264 
2265 static void rt6_do_redirect(struct dst_entry *dst, struct sock *sk, struct sk_buff *skb)
2266 {
2267 	struct netevent_redirect netevent;
2268 	struct rt6_info *rt, *nrt = NULL;
2269 	struct ndisc_options ndopts;
2270 	struct inet6_dev *in6_dev;
2271 	struct neighbour *neigh;
2272 	struct rd_msg *msg;
2273 	int optlen, on_link;
2274 	u8 *lladdr;
2275 
2276 	optlen = skb_tail_pointer(skb) - skb_transport_header(skb);
2277 	optlen -= sizeof(*msg);
2278 
2279 	if (optlen < 0) {
2280 		net_dbg_ratelimited("rt6_do_redirect: packet too short\n");
2281 		return;
2282 	}
2283 
2284 	msg = (struct rd_msg *)icmp6_hdr(skb);
2285 
2286 	if (ipv6_addr_is_multicast(&msg->dest)) {
2287 		net_dbg_ratelimited("rt6_do_redirect: destination address is multicast\n");
2288 		return;
2289 	}
2290 
2291 	on_link = 0;
2292 	if (ipv6_addr_equal(&msg->dest, &msg->target)) {
2293 		on_link = 1;
2294 	} else if (ipv6_addr_type(&msg->target) !=
2295 		   (IPV6_ADDR_UNICAST|IPV6_ADDR_LINKLOCAL)) {
2296 		net_dbg_ratelimited("rt6_do_redirect: target address is not link-local unicast\n");
2297 		return;
2298 	}
2299 
2300 	in6_dev = __in6_dev_get(skb->dev);
2301 	if (!in6_dev)
2302 		return;
2303 	if (in6_dev->cnf.forwarding || !in6_dev->cnf.accept_redirects)
2304 		return;
2305 
2306 	/* RFC2461 8.1:
2307 	 *	The IP source address of the Redirect MUST be the same as the current
2308 	 *	first-hop router for the specified ICMP Destination Address.
2309 	 */
2310 
2311 	if (!ndisc_parse_options(skb->dev, msg->opt, optlen, &ndopts)) {
2312 		net_dbg_ratelimited("rt6_redirect: invalid ND options\n");
2313 		return;
2314 	}
2315 
2316 	lladdr = NULL;
2317 	if (ndopts.nd_opts_tgt_lladdr) {
2318 		lladdr = ndisc_opt_addr_data(ndopts.nd_opts_tgt_lladdr,
2319 					     skb->dev);
2320 		if (!lladdr) {
2321 			net_dbg_ratelimited("rt6_redirect: invalid link-layer address length\n");
2322 			return;
2323 		}
2324 	}
2325 
2326 	rt = (struct rt6_info *) dst;
2327 	if (rt->rt6i_flags & RTF_REJECT) {
2328 		net_dbg_ratelimited("rt6_redirect: source isn't a valid nexthop for redirect target\n");
2329 		return;
2330 	}
2331 
2332 	/* Redirect received -> path was valid.
2333 	 * Look, redirects are sent only in response to data packets,
2334 	 * so that this nexthop apparently is reachable. --ANK
2335 	 */
2336 	dst_confirm_neigh(&rt->dst, &ipv6_hdr(skb)->saddr);
2337 
2338 	neigh = __neigh_lookup(&nd_tbl, &msg->target, skb->dev, 1);
2339 	if (!neigh)
2340 		return;
2341 
2342 	/*
2343 	 *	We have finally decided to accept it.
2344 	 */
2345 
2346 	ndisc_update(skb->dev, neigh, lladdr, NUD_STALE,
2347 		     NEIGH_UPDATE_F_WEAK_OVERRIDE|
2348 		     NEIGH_UPDATE_F_OVERRIDE|
2349 		     (on_link ? 0 : (NEIGH_UPDATE_F_OVERRIDE_ISROUTER|
2350 				     NEIGH_UPDATE_F_ISROUTER)),
2351 		     NDISC_REDIRECT, &ndopts);
2352 
2353 	nrt = ip6_rt_cache_alloc(rt, &msg->dest, NULL);
2354 	if (!nrt)
2355 		goto out;
2356 
2357 	nrt->rt6i_flags = RTF_GATEWAY|RTF_UP|RTF_DYNAMIC|RTF_CACHE;
2358 	if (on_link)
2359 		nrt->rt6i_flags &= ~RTF_GATEWAY;
2360 
2361 	nrt->rt6i_protocol = RTPROT_REDIRECT;
2362 	nrt->rt6i_gateway = *(struct in6_addr *)neigh->primary_key;
2363 
2364 	if (ip6_ins_rt(nrt))
2365 		goto out_release;
2366 
2367 	netevent.old = &rt->dst;
2368 	netevent.new = &nrt->dst;
2369 	netevent.daddr = &msg->dest;
2370 	netevent.neigh = neigh;
2371 	call_netevent_notifiers(NETEVENT_REDIRECT, &netevent);
2372 
2373 	if (rt->rt6i_flags & RTF_CACHE) {
2374 		rt = (struct rt6_info *) dst_clone(&rt->dst);
2375 		ip6_del_rt(rt);
2376 	}
2377 
2378 out_release:
2379 	/* Release the reference taken in
2380 	 * ip6_rt_cache_alloc()
2381 	 */
2382 	dst_release(&nrt->dst);
2383 
2384 out:
2385 	neigh_release(neigh);
2386 }
2387 
2388 /*
2389  *	Misc support functions
2390  */
2391 
2392 static void rt6_set_from(struct rt6_info *rt, struct rt6_info *from)
2393 {
2394 	BUG_ON(from->dst.from);
2395 
2396 	rt->rt6i_flags &= ~RTF_EXPIRES;
2397 	dst_hold(&from->dst);
2398 	rt->dst.from = &from->dst;
2399 	dst_init_metrics(&rt->dst, dst_metrics_ptr(&from->dst), true);
2400 }
2401 
2402 static void ip6_rt_copy_init(struct rt6_info *rt, struct rt6_info *ort)
2403 {
2404 	rt->dst.input = ort->dst.input;
2405 	rt->dst.output = ort->dst.output;
2406 	rt->rt6i_dst = ort->rt6i_dst;
2407 	rt->dst.error = ort->dst.error;
2408 	rt->rt6i_idev = ort->rt6i_idev;
2409 	if (rt->rt6i_idev)
2410 		in6_dev_hold(rt->rt6i_idev);
2411 	rt->dst.lastuse = jiffies;
2412 	rt->rt6i_gateway = ort->rt6i_gateway;
2413 	rt->rt6i_flags = ort->rt6i_flags;
2414 	rt6_set_from(rt, ort);
2415 	rt->rt6i_metric = ort->rt6i_metric;
2416 #ifdef CONFIG_IPV6_SUBTREES
2417 	rt->rt6i_src = ort->rt6i_src;
2418 #endif
2419 	rt->rt6i_prefsrc = ort->rt6i_prefsrc;
2420 	rt->rt6i_table = ort->rt6i_table;
2421 	rt->dst.lwtstate = lwtstate_get(ort->dst.lwtstate);
2422 }
2423 
2424 #ifdef CONFIG_IPV6_ROUTE_INFO
2425 static struct rt6_info *rt6_get_route_info(struct net *net,
2426 					   const struct in6_addr *prefix, int prefixlen,
2427 					   const struct in6_addr *gwaddr,
2428 					   struct net_device *dev)
2429 {
2430 	u32 tb_id = l3mdev_fib_table(dev) ? : RT6_TABLE_INFO;
2431 	int ifindex = dev->ifindex;
2432 	struct fib6_node *fn;
2433 	struct rt6_info *rt = NULL;
2434 	struct fib6_table *table;
2435 
2436 	table = fib6_get_table(net, tb_id);
2437 	if (!table)
2438 		return NULL;
2439 
2440 	read_lock_bh(&table->tb6_lock);
2441 	fn = fib6_locate(&table->tb6_root, prefix, prefixlen, NULL, 0);
2442 	if (!fn)
2443 		goto out;
2444 
2445 	for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
2446 		if (rt->dst.dev->ifindex != ifindex)
2447 			continue;
2448 		if ((rt->rt6i_flags & (RTF_ROUTEINFO|RTF_GATEWAY)) != (RTF_ROUTEINFO|RTF_GATEWAY))
2449 			continue;
2450 		if (!ipv6_addr_equal(&rt->rt6i_gateway, gwaddr))
2451 			continue;
2452 		dst_hold(&rt->dst);
2453 		break;
2454 	}
2455 out:
2456 	read_unlock_bh(&table->tb6_lock);
2457 	return rt;
2458 }
2459 
2460 static struct rt6_info *rt6_add_route_info(struct net *net,
2461 					   const struct in6_addr *prefix, int prefixlen,
2462 					   const struct in6_addr *gwaddr,
2463 					   struct net_device *dev,
2464 					   unsigned int pref)
2465 {
2466 	struct fib6_config cfg = {
2467 		.fc_metric	= IP6_RT_PRIO_USER,
2468 		.fc_ifindex	= dev->ifindex,
2469 		.fc_dst_len	= prefixlen,
2470 		.fc_flags	= RTF_GATEWAY | RTF_ADDRCONF | RTF_ROUTEINFO |
2471 				  RTF_UP | RTF_PREF(pref),
2472 		.fc_protocol = RTPROT_RA,
2473 		.fc_nlinfo.portid = 0,
2474 		.fc_nlinfo.nlh = NULL,
2475 		.fc_nlinfo.nl_net = net,
2476 	};
2477 
2478 	cfg.fc_table = l3mdev_fib_table(dev) ? : RT6_TABLE_INFO,
2479 	cfg.fc_dst = *prefix;
2480 	cfg.fc_gateway = *gwaddr;
2481 
2482 	/* We should treat it as a default route if prefix length is 0. */
2483 	if (!prefixlen)
2484 		cfg.fc_flags |= RTF_DEFAULT;
2485 
2486 	ip6_route_add(&cfg, NULL);
2487 
2488 	return rt6_get_route_info(net, prefix, prefixlen, gwaddr, dev);
2489 }
2490 #endif
2491 
2492 struct rt6_info *rt6_get_dflt_router(const struct in6_addr *addr, struct net_device *dev)
2493 {
2494 	u32 tb_id = l3mdev_fib_table(dev) ? : RT6_TABLE_DFLT;
2495 	struct rt6_info *rt;
2496 	struct fib6_table *table;
2497 
2498 	table = fib6_get_table(dev_net(dev), tb_id);
2499 	if (!table)
2500 		return NULL;
2501 
2502 	read_lock_bh(&table->tb6_lock);
2503 	for (rt = table->tb6_root.leaf; rt; rt = rt->dst.rt6_next) {
2504 		if (dev == rt->dst.dev &&
2505 		    ((rt->rt6i_flags & (RTF_ADDRCONF | RTF_DEFAULT)) == (RTF_ADDRCONF | RTF_DEFAULT)) &&
2506 		    ipv6_addr_equal(&rt->rt6i_gateway, addr))
2507 			break;
2508 	}
2509 	if (rt)
2510 		dst_hold(&rt->dst);
2511 	read_unlock_bh(&table->tb6_lock);
2512 	return rt;
2513 }
2514 
2515 struct rt6_info *rt6_add_dflt_router(const struct in6_addr *gwaddr,
2516 				     struct net_device *dev,
2517 				     unsigned int pref)
2518 {
2519 	struct fib6_config cfg = {
2520 		.fc_table	= l3mdev_fib_table(dev) ? : RT6_TABLE_DFLT,
2521 		.fc_metric	= IP6_RT_PRIO_USER,
2522 		.fc_ifindex	= dev->ifindex,
2523 		.fc_flags	= RTF_GATEWAY | RTF_ADDRCONF | RTF_DEFAULT |
2524 				  RTF_UP | RTF_EXPIRES | RTF_PREF(pref),
2525 		.fc_protocol = RTPROT_RA,
2526 		.fc_nlinfo.portid = 0,
2527 		.fc_nlinfo.nlh = NULL,
2528 		.fc_nlinfo.nl_net = dev_net(dev),
2529 	};
2530 
2531 	cfg.fc_gateway = *gwaddr;
2532 
2533 	if (!ip6_route_add(&cfg, NULL)) {
2534 		struct fib6_table *table;
2535 
2536 		table = fib6_get_table(dev_net(dev), cfg.fc_table);
2537 		if (table)
2538 			table->flags |= RT6_TABLE_HAS_DFLT_ROUTER;
2539 	}
2540 
2541 	return rt6_get_dflt_router(gwaddr, dev);
2542 }
2543 
2544 static void __rt6_purge_dflt_routers(struct fib6_table *table)
2545 {
2546 	struct rt6_info *rt;
2547 
2548 restart:
2549 	read_lock_bh(&table->tb6_lock);
2550 	for (rt = table->tb6_root.leaf; rt; rt = rt->dst.rt6_next) {
2551 		if (rt->rt6i_flags & (RTF_DEFAULT | RTF_ADDRCONF) &&
2552 		    (!rt->rt6i_idev || rt->rt6i_idev->cnf.accept_ra != 2)) {
2553 			dst_hold(&rt->dst);
2554 			read_unlock_bh(&table->tb6_lock);
2555 			ip6_del_rt(rt);
2556 			goto restart;
2557 		}
2558 	}
2559 	read_unlock_bh(&table->tb6_lock);
2560 
2561 	table->flags &= ~RT6_TABLE_HAS_DFLT_ROUTER;
2562 }
2563 
2564 void rt6_purge_dflt_routers(struct net *net)
2565 {
2566 	struct fib6_table *table;
2567 	struct hlist_head *head;
2568 	unsigned int h;
2569 
2570 	rcu_read_lock();
2571 
2572 	for (h = 0; h < FIB6_TABLE_HASHSZ; h++) {
2573 		head = &net->ipv6.fib_table_hash[h];
2574 		hlist_for_each_entry_rcu(table, head, tb6_hlist) {
2575 			if (table->flags & RT6_TABLE_HAS_DFLT_ROUTER)
2576 				__rt6_purge_dflt_routers(table);
2577 		}
2578 	}
2579 
2580 	rcu_read_unlock();
2581 }
2582 
2583 static void rtmsg_to_fib6_config(struct net *net,
2584 				 struct in6_rtmsg *rtmsg,
2585 				 struct fib6_config *cfg)
2586 {
2587 	memset(cfg, 0, sizeof(*cfg));
2588 
2589 	cfg->fc_table = l3mdev_fib_table_by_index(net, rtmsg->rtmsg_ifindex) ?
2590 			 : RT6_TABLE_MAIN;
2591 	cfg->fc_ifindex = rtmsg->rtmsg_ifindex;
2592 	cfg->fc_metric = rtmsg->rtmsg_metric;
2593 	cfg->fc_expires = rtmsg->rtmsg_info;
2594 	cfg->fc_dst_len = rtmsg->rtmsg_dst_len;
2595 	cfg->fc_src_len = rtmsg->rtmsg_src_len;
2596 	cfg->fc_flags = rtmsg->rtmsg_flags;
2597 
2598 	cfg->fc_nlinfo.nl_net = net;
2599 
2600 	cfg->fc_dst = rtmsg->rtmsg_dst;
2601 	cfg->fc_src = rtmsg->rtmsg_src;
2602 	cfg->fc_gateway = rtmsg->rtmsg_gateway;
2603 }
2604 
2605 int ipv6_route_ioctl(struct net *net, unsigned int cmd, void __user *arg)
2606 {
2607 	struct fib6_config cfg;
2608 	struct in6_rtmsg rtmsg;
2609 	int err;
2610 
2611 	switch (cmd) {
2612 	case SIOCADDRT:		/* Add a route */
2613 	case SIOCDELRT:		/* Delete a route */
2614 		if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
2615 			return -EPERM;
2616 		err = copy_from_user(&rtmsg, arg,
2617 				     sizeof(struct in6_rtmsg));
2618 		if (err)
2619 			return -EFAULT;
2620 
2621 		rtmsg_to_fib6_config(net, &rtmsg, &cfg);
2622 
2623 		rtnl_lock();
2624 		switch (cmd) {
2625 		case SIOCADDRT:
2626 			err = ip6_route_add(&cfg, NULL);
2627 			break;
2628 		case SIOCDELRT:
2629 			err = ip6_route_del(&cfg, NULL);
2630 			break;
2631 		default:
2632 			err = -EINVAL;
2633 		}
2634 		rtnl_unlock();
2635 
2636 		return err;
2637 	}
2638 
2639 	return -EINVAL;
2640 }
2641 
2642 /*
2643  *	Drop the packet on the floor
2644  */
2645 
2646 static int ip6_pkt_drop(struct sk_buff *skb, u8 code, int ipstats_mib_noroutes)
2647 {
2648 	int type;
2649 	struct dst_entry *dst = skb_dst(skb);
2650 	switch (ipstats_mib_noroutes) {
2651 	case IPSTATS_MIB_INNOROUTES:
2652 		type = ipv6_addr_type(&ipv6_hdr(skb)->daddr);
2653 		if (type == IPV6_ADDR_ANY) {
2654 			IP6_INC_STATS(dev_net(dst->dev), ip6_dst_idev(dst),
2655 				      IPSTATS_MIB_INADDRERRORS);
2656 			break;
2657 		}
2658 		/* FALLTHROUGH */
2659 	case IPSTATS_MIB_OUTNOROUTES:
2660 		IP6_INC_STATS(dev_net(dst->dev), ip6_dst_idev(dst),
2661 			      ipstats_mib_noroutes);
2662 		break;
2663 	}
2664 	icmpv6_send(skb, ICMPV6_DEST_UNREACH, code, 0);
2665 	kfree_skb(skb);
2666 	return 0;
2667 }
2668 
2669 static int ip6_pkt_discard(struct sk_buff *skb)
2670 {
2671 	return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_INNOROUTES);
2672 }
2673 
2674 static int ip6_pkt_discard_out(struct net *net, struct sock *sk, struct sk_buff *skb)
2675 {
2676 	skb->dev = skb_dst(skb)->dev;
2677 	return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_OUTNOROUTES);
2678 }
2679 
2680 static int ip6_pkt_prohibit(struct sk_buff *skb)
2681 {
2682 	return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_INNOROUTES);
2683 }
2684 
2685 static int ip6_pkt_prohibit_out(struct net *net, struct sock *sk, struct sk_buff *skb)
2686 {
2687 	skb->dev = skb_dst(skb)->dev;
2688 	return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_OUTNOROUTES);
2689 }
2690 
2691 /*
2692  *	Allocate a dst for local (unicast / anycast) address.
2693  */
2694 
2695 struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev,
2696 				    const struct in6_addr *addr,
2697 				    bool anycast)
2698 {
2699 	u32 tb_id;
2700 	struct net *net = dev_net(idev->dev);
2701 	struct net_device *dev = net->loopback_dev;
2702 	struct rt6_info *rt;
2703 
2704 	/* use L3 Master device as loopback for host routes if device
2705 	 * is enslaved and address is not link local or multicast
2706 	 */
2707 	if (!rt6_need_strict(addr))
2708 		dev = l3mdev_master_dev_rcu(idev->dev) ? : dev;
2709 
2710 	rt = ip6_dst_alloc(net, dev, DST_NOCOUNT);
2711 	if (!rt)
2712 		return ERR_PTR(-ENOMEM);
2713 
2714 	in6_dev_hold(idev);
2715 
2716 	rt->dst.flags |= DST_HOST;
2717 	rt->dst.input = ip6_input;
2718 	rt->dst.output = ip6_output;
2719 	rt->rt6i_idev = idev;
2720 
2721 	rt->rt6i_protocol = RTPROT_KERNEL;
2722 	rt->rt6i_flags = RTF_UP | RTF_NONEXTHOP;
2723 	if (anycast)
2724 		rt->rt6i_flags |= RTF_ANYCAST;
2725 	else
2726 		rt->rt6i_flags |= RTF_LOCAL;
2727 
2728 	rt->rt6i_gateway  = *addr;
2729 	rt->rt6i_dst.addr = *addr;
2730 	rt->rt6i_dst.plen = 128;
2731 	tb_id = l3mdev_fib_table(idev->dev) ? : RT6_TABLE_LOCAL;
2732 	rt->rt6i_table = fib6_get_table(net, tb_id);
2733 
2734 	return rt;
2735 }
2736 
2737 /* remove deleted ip from prefsrc entries */
2738 struct arg_dev_net_ip {
2739 	struct net_device *dev;
2740 	struct net *net;
2741 	struct in6_addr *addr;
2742 };
2743 
2744 static int fib6_remove_prefsrc(struct rt6_info *rt, void *arg)
2745 {
2746 	struct net_device *dev = ((struct arg_dev_net_ip *)arg)->dev;
2747 	struct net *net = ((struct arg_dev_net_ip *)arg)->net;
2748 	struct in6_addr *addr = ((struct arg_dev_net_ip *)arg)->addr;
2749 
2750 	if (((void *)rt->dst.dev == dev || !dev) &&
2751 	    rt != net->ipv6.ip6_null_entry &&
2752 	    ipv6_addr_equal(addr, &rt->rt6i_prefsrc.addr)) {
2753 		/* remove prefsrc entry */
2754 		rt->rt6i_prefsrc.plen = 0;
2755 	}
2756 	return 0;
2757 }
2758 
2759 void rt6_remove_prefsrc(struct inet6_ifaddr *ifp)
2760 {
2761 	struct net *net = dev_net(ifp->idev->dev);
2762 	struct arg_dev_net_ip adni = {
2763 		.dev = ifp->idev->dev,
2764 		.net = net,
2765 		.addr = &ifp->addr,
2766 	};
2767 	fib6_clean_all(net, fib6_remove_prefsrc, &adni);
2768 }
2769 
2770 #define RTF_RA_ROUTER		(RTF_ADDRCONF | RTF_DEFAULT | RTF_GATEWAY)
2771 #define RTF_CACHE_GATEWAY	(RTF_GATEWAY | RTF_CACHE)
2772 
2773 /* Remove routers and update dst entries when gateway turn into host. */
2774 static int fib6_clean_tohost(struct rt6_info *rt, void *arg)
2775 {
2776 	struct in6_addr *gateway = (struct in6_addr *)arg;
2777 
2778 	if ((((rt->rt6i_flags & RTF_RA_ROUTER) == RTF_RA_ROUTER) ||
2779 	     ((rt->rt6i_flags & RTF_CACHE_GATEWAY) == RTF_CACHE_GATEWAY)) &&
2780 	     ipv6_addr_equal(gateway, &rt->rt6i_gateway)) {
2781 		return -1;
2782 	}
2783 	return 0;
2784 }
2785 
2786 void rt6_clean_tohost(struct net *net, struct in6_addr *gateway)
2787 {
2788 	fib6_clean_all(net, fib6_clean_tohost, gateway);
2789 }
2790 
2791 struct arg_dev_net {
2792 	struct net_device *dev;
2793 	struct net *net;
2794 };
2795 
2796 /* called with write lock held for table with rt */
2797 static int fib6_ifdown(struct rt6_info *rt, void *arg)
2798 {
2799 	const struct arg_dev_net *adn = arg;
2800 	const struct net_device *dev = adn->dev;
2801 
2802 	if ((rt->dst.dev == dev || !dev) &&
2803 	    rt != adn->net->ipv6.ip6_null_entry &&
2804 	    (rt->rt6i_nsiblings == 0 ||
2805 	     (dev && netdev_unregistering(dev)) ||
2806 	     !rt->rt6i_idev->cnf.ignore_routes_with_linkdown))
2807 		return -1;
2808 
2809 	return 0;
2810 }
2811 
2812 void rt6_ifdown(struct net *net, struct net_device *dev)
2813 {
2814 	struct arg_dev_net adn = {
2815 		.dev = dev,
2816 		.net = net,
2817 	};
2818 
2819 	fib6_clean_all(net, fib6_ifdown, &adn);
2820 	if (dev)
2821 		rt6_uncached_list_flush_dev(net, dev);
2822 }
2823 
2824 struct rt6_mtu_change_arg {
2825 	struct net_device *dev;
2826 	unsigned int mtu;
2827 };
2828 
2829 static int rt6_mtu_change_route(struct rt6_info *rt, void *p_arg)
2830 {
2831 	struct rt6_mtu_change_arg *arg = (struct rt6_mtu_change_arg *) p_arg;
2832 	struct inet6_dev *idev;
2833 
2834 	/* In IPv6 pmtu discovery is not optional,
2835 	   so that RTAX_MTU lock cannot disable it.
2836 	   We still use this lock to block changes
2837 	   caused by addrconf/ndisc.
2838 	*/
2839 
2840 	idev = __in6_dev_get(arg->dev);
2841 	if (!idev)
2842 		return 0;
2843 
2844 	/* For administrative MTU increase, there is no way to discover
2845 	   IPv6 PMTU increase, so PMTU increase should be updated here.
2846 	   Since RFC 1981 doesn't include administrative MTU increase
2847 	   update PMTU increase is a MUST. (i.e. jumbo frame)
2848 	 */
2849 	/*
2850 	   If new MTU is less than route PMTU, this new MTU will be the
2851 	   lowest MTU in the path, update the route PMTU to reflect PMTU
2852 	   decreases; if new MTU is greater than route PMTU, and the
2853 	   old MTU is the lowest MTU in the path, update the route PMTU
2854 	   to reflect the increase. In this case if the other nodes' MTU
2855 	   also have the lowest MTU, TOO BIG MESSAGE will be lead to
2856 	   PMTU discovery.
2857 	 */
2858 	if (rt->dst.dev == arg->dev &&
2859 	    dst_metric_raw(&rt->dst, RTAX_MTU) &&
2860 	    !dst_metric_locked(&rt->dst, RTAX_MTU)) {
2861 		if (rt->rt6i_flags & RTF_CACHE) {
2862 			/* For RTF_CACHE with rt6i_pmtu == 0
2863 			 * (i.e. a redirected route),
2864 			 * the metrics of its rt->dst.from has already
2865 			 * been updated.
2866 			 */
2867 			if (rt->rt6i_pmtu && rt->rt6i_pmtu > arg->mtu)
2868 				rt->rt6i_pmtu = arg->mtu;
2869 		} else if (dst_mtu(&rt->dst) >= arg->mtu ||
2870 			   (dst_mtu(&rt->dst) < arg->mtu &&
2871 			    dst_mtu(&rt->dst) == idev->cnf.mtu6)) {
2872 			dst_metric_set(&rt->dst, RTAX_MTU, arg->mtu);
2873 		}
2874 	}
2875 	return 0;
2876 }
2877 
2878 void rt6_mtu_change(struct net_device *dev, unsigned int mtu)
2879 {
2880 	struct rt6_mtu_change_arg arg = {
2881 		.dev = dev,
2882 		.mtu = mtu,
2883 	};
2884 
2885 	fib6_clean_all(dev_net(dev), rt6_mtu_change_route, &arg);
2886 }
2887 
2888 static const struct nla_policy rtm_ipv6_policy[RTA_MAX+1] = {
2889 	[RTA_GATEWAY]           = { .len = sizeof(struct in6_addr) },
2890 	[RTA_OIF]               = { .type = NLA_U32 },
2891 	[RTA_IIF]		= { .type = NLA_U32 },
2892 	[RTA_PRIORITY]          = { .type = NLA_U32 },
2893 	[RTA_METRICS]           = { .type = NLA_NESTED },
2894 	[RTA_MULTIPATH]		= { .len = sizeof(struct rtnexthop) },
2895 	[RTA_PREF]              = { .type = NLA_U8 },
2896 	[RTA_ENCAP_TYPE]	= { .type = NLA_U16 },
2897 	[RTA_ENCAP]		= { .type = NLA_NESTED },
2898 	[RTA_EXPIRES]		= { .type = NLA_U32 },
2899 	[RTA_UID]		= { .type = NLA_U32 },
2900 	[RTA_MARK]		= { .type = NLA_U32 },
2901 };
2902 
2903 static int rtm_to_fib6_config(struct sk_buff *skb, struct nlmsghdr *nlh,
2904 			      struct fib6_config *cfg,
2905 			      struct netlink_ext_ack *extack)
2906 {
2907 	struct rtmsg *rtm;
2908 	struct nlattr *tb[RTA_MAX+1];
2909 	unsigned int pref;
2910 	int err;
2911 
2912 	err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy,
2913 			  NULL);
2914 	if (err < 0)
2915 		goto errout;
2916 
2917 	err = -EINVAL;
2918 	rtm = nlmsg_data(nlh);
2919 	memset(cfg, 0, sizeof(*cfg));
2920 
2921 	cfg->fc_table = rtm->rtm_table;
2922 	cfg->fc_dst_len = rtm->rtm_dst_len;
2923 	cfg->fc_src_len = rtm->rtm_src_len;
2924 	cfg->fc_flags = RTF_UP;
2925 	cfg->fc_protocol = rtm->rtm_protocol;
2926 	cfg->fc_type = rtm->rtm_type;
2927 
2928 	if (rtm->rtm_type == RTN_UNREACHABLE ||
2929 	    rtm->rtm_type == RTN_BLACKHOLE ||
2930 	    rtm->rtm_type == RTN_PROHIBIT ||
2931 	    rtm->rtm_type == RTN_THROW)
2932 		cfg->fc_flags |= RTF_REJECT;
2933 
2934 	if (rtm->rtm_type == RTN_LOCAL)
2935 		cfg->fc_flags |= RTF_LOCAL;
2936 
2937 	if (rtm->rtm_flags & RTM_F_CLONED)
2938 		cfg->fc_flags |= RTF_CACHE;
2939 
2940 	cfg->fc_nlinfo.portid = NETLINK_CB(skb).portid;
2941 	cfg->fc_nlinfo.nlh = nlh;
2942 	cfg->fc_nlinfo.nl_net = sock_net(skb->sk);
2943 
2944 	if (tb[RTA_GATEWAY]) {
2945 		cfg->fc_gateway = nla_get_in6_addr(tb[RTA_GATEWAY]);
2946 		cfg->fc_flags |= RTF_GATEWAY;
2947 	}
2948 
2949 	if (tb[RTA_DST]) {
2950 		int plen = (rtm->rtm_dst_len + 7) >> 3;
2951 
2952 		if (nla_len(tb[RTA_DST]) < plen)
2953 			goto errout;
2954 
2955 		nla_memcpy(&cfg->fc_dst, tb[RTA_DST], plen);
2956 	}
2957 
2958 	if (tb[RTA_SRC]) {
2959 		int plen = (rtm->rtm_src_len + 7) >> 3;
2960 
2961 		if (nla_len(tb[RTA_SRC]) < plen)
2962 			goto errout;
2963 
2964 		nla_memcpy(&cfg->fc_src, tb[RTA_SRC], plen);
2965 	}
2966 
2967 	if (tb[RTA_PREFSRC])
2968 		cfg->fc_prefsrc = nla_get_in6_addr(tb[RTA_PREFSRC]);
2969 
2970 	if (tb[RTA_OIF])
2971 		cfg->fc_ifindex = nla_get_u32(tb[RTA_OIF]);
2972 
2973 	if (tb[RTA_PRIORITY])
2974 		cfg->fc_metric = nla_get_u32(tb[RTA_PRIORITY]);
2975 
2976 	if (tb[RTA_METRICS]) {
2977 		cfg->fc_mx = nla_data(tb[RTA_METRICS]);
2978 		cfg->fc_mx_len = nla_len(tb[RTA_METRICS]);
2979 	}
2980 
2981 	if (tb[RTA_TABLE])
2982 		cfg->fc_table = nla_get_u32(tb[RTA_TABLE]);
2983 
2984 	if (tb[RTA_MULTIPATH]) {
2985 		cfg->fc_mp = nla_data(tb[RTA_MULTIPATH]);
2986 		cfg->fc_mp_len = nla_len(tb[RTA_MULTIPATH]);
2987 
2988 		err = lwtunnel_valid_encap_type_attr(cfg->fc_mp,
2989 						     cfg->fc_mp_len, extack);
2990 		if (err < 0)
2991 			goto errout;
2992 	}
2993 
2994 	if (tb[RTA_PREF]) {
2995 		pref = nla_get_u8(tb[RTA_PREF]);
2996 		if (pref != ICMPV6_ROUTER_PREF_LOW &&
2997 		    pref != ICMPV6_ROUTER_PREF_HIGH)
2998 			pref = ICMPV6_ROUTER_PREF_MEDIUM;
2999 		cfg->fc_flags |= RTF_PREF(pref);
3000 	}
3001 
3002 	if (tb[RTA_ENCAP])
3003 		cfg->fc_encap = tb[RTA_ENCAP];
3004 
3005 	if (tb[RTA_ENCAP_TYPE]) {
3006 		cfg->fc_encap_type = nla_get_u16(tb[RTA_ENCAP_TYPE]);
3007 
3008 		err = lwtunnel_valid_encap_type(cfg->fc_encap_type, extack);
3009 		if (err < 0)
3010 			goto errout;
3011 	}
3012 
3013 	if (tb[RTA_EXPIRES]) {
3014 		unsigned long timeout = addrconf_timeout_fixup(nla_get_u32(tb[RTA_EXPIRES]), HZ);
3015 
3016 		if (addrconf_finite_timeout(timeout)) {
3017 			cfg->fc_expires = jiffies_to_clock_t(timeout * HZ);
3018 			cfg->fc_flags |= RTF_EXPIRES;
3019 		}
3020 	}
3021 
3022 	err = 0;
3023 errout:
3024 	return err;
3025 }
3026 
3027 struct rt6_nh {
3028 	struct rt6_info *rt6_info;
3029 	struct fib6_config r_cfg;
3030 	struct mx6_config mxc;
3031 	struct list_head next;
3032 };
3033 
3034 static void ip6_print_replace_route_err(struct list_head *rt6_nh_list)
3035 {
3036 	struct rt6_nh *nh;
3037 
3038 	list_for_each_entry(nh, rt6_nh_list, next) {
3039 		pr_warn("IPV6: multipath route replace failed (check consistency of installed routes): %pI6c nexthop %pI6c ifi %d\n",
3040 		        &nh->r_cfg.fc_dst, &nh->r_cfg.fc_gateway,
3041 		        nh->r_cfg.fc_ifindex);
3042 	}
3043 }
3044 
3045 static int ip6_route_info_append(struct list_head *rt6_nh_list,
3046 				 struct rt6_info *rt, struct fib6_config *r_cfg)
3047 {
3048 	struct rt6_nh *nh;
3049 	int err = -EEXIST;
3050 
3051 	list_for_each_entry(nh, rt6_nh_list, next) {
3052 		/* check if rt6_info already exists */
3053 		if (rt6_duplicate_nexthop(nh->rt6_info, rt))
3054 			return err;
3055 	}
3056 
3057 	nh = kzalloc(sizeof(*nh), GFP_KERNEL);
3058 	if (!nh)
3059 		return -ENOMEM;
3060 	nh->rt6_info = rt;
3061 	err = ip6_convert_metrics(&nh->mxc, r_cfg);
3062 	if (err) {
3063 		kfree(nh);
3064 		return err;
3065 	}
3066 	memcpy(&nh->r_cfg, r_cfg, sizeof(*r_cfg));
3067 	list_add_tail(&nh->next, rt6_nh_list);
3068 
3069 	return 0;
3070 }
3071 
3072 static void ip6_route_mpath_notify(struct rt6_info *rt,
3073 				   struct rt6_info *rt_last,
3074 				   struct nl_info *info,
3075 				   __u16 nlflags)
3076 {
3077 	/* if this is an APPEND route, then rt points to the first route
3078 	 * inserted and rt_last points to last route inserted. Userspace
3079 	 * wants a consistent dump of the route which starts at the first
3080 	 * nexthop. Since sibling routes are always added at the end of
3081 	 * the list, find the first sibling of the last route appended
3082 	 */
3083 	if ((nlflags & NLM_F_APPEND) && rt_last && rt_last->rt6i_nsiblings) {
3084 		rt = list_first_entry(&rt_last->rt6i_siblings,
3085 				      struct rt6_info,
3086 				      rt6i_siblings);
3087 	}
3088 
3089 	if (rt)
3090 		inet6_rt_notify(RTM_NEWROUTE, rt, info, nlflags);
3091 }
3092 
3093 static int ip6_route_multipath_add(struct fib6_config *cfg,
3094 				   struct netlink_ext_ack *extack)
3095 {
3096 	struct rt6_info *rt_notif = NULL, *rt_last = NULL;
3097 	struct nl_info *info = &cfg->fc_nlinfo;
3098 	struct fib6_config r_cfg;
3099 	struct rtnexthop *rtnh;
3100 	struct rt6_info *rt;
3101 	struct rt6_nh *err_nh;
3102 	struct rt6_nh *nh, *nh_safe;
3103 	__u16 nlflags;
3104 	int remaining;
3105 	int attrlen;
3106 	int err = 1;
3107 	int nhn = 0;
3108 	int replace = (cfg->fc_nlinfo.nlh &&
3109 		       (cfg->fc_nlinfo.nlh->nlmsg_flags & NLM_F_REPLACE));
3110 	LIST_HEAD(rt6_nh_list);
3111 
3112 	nlflags = replace ? NLM_F_REPLACE : NLM_F_CREATE;
3113 	if (info->nlh && info->nlh->nlmsg_flags & NLM_F_APPEND)
3114 		nlflags |= NLM_F_APPEND;
3115 
3116 	remaining = cfg->fc_mp_len;
3117 	rtnh = (struct rtnexthop *)cfg->fc_mp;
3118 
3119 	/* Parse a Multipath Entry and build a list (rt6_nh_list) of
3120 	 * rt6_info structs per nexthop
3121 	 */
3122 	while (rtnh_ok(rtnh, remaining)) {
3123 		memcpy(&r_cfg, cfg, sizeof(*cfg));
3124 		if (rtnh->rtnh_ifindex)
3125 			r_cfg.fc_ifindex = rtnh->rtnh_ifindex;
3126 
3127 		attrlen = rtnh_attrlen(rtnh);
3128 		if (attrlen > 0) {
3129 			struct nlattr *nla, *attrs = rtnh_attrs(rtnh);
3130 
3131 			nla = nla_find(attrs, attrlen, RTA_GATEWAY);
3132 			if (nla) {
3133 				r_cfg.fc_gateway = nla_get_in6_addr(nla);
3134 				r_cfg.fc_flags |= RTF_GATEWAY;
3135 			}
3136 			r_cfg.fc_encap = nla_find(attrs, attrlen, RTA_ENCAP);
3137 			nla = nla_find(attrs, attrlen, RTA_ENCAP_TYPE);
3138 			if (nla)
3139 				r_cfg.fc_encap_type = nla_get_u16(nla);
3140 		}
3141 
3142 		rt = ip6_route_info_create(&r_cfg, extack);
3143 		if (IS_ERR(rt)) {
3144 			err = PTR_ERR(rt);
3145 			rt = NULL;
3146 			goto cleanup;
3147 		}
3148 
3149 		err = ip6_route_info_append(&rt6_nh_list, rt, &r_cfg);
3150 		if (err) {
3151 			dst_release_immediate(&rt->dst);
3152 			goto cleanup;
3153 		}
3154 
3155 		rtnh = rtnh_next(rtnh, &remaining);
3156 	}
3157 
3158 	/* for add and replace send one notification with all nexthops.
3159 	 * Skip the notification in fib6_add_rt2node and send one with
3160 	 * the full route when done
3161 	 */
3162 	info->skip_notify = 1;
3163 
3164 	err_nh = NULL;
3165 	list_for_each_entry(nh, &rt6_nh_list, next) {
3166 		rt_last = nh->rt6_info;
3167 		err = __ip6_ins_rt(nh->rt6_info, info, &nh->mxc, extack);
3168 		/* save reference to first route for notification */
3169 		if (!rt_notif && !err)
3170 			rt_notif = nh->rt6_info;
3171 
3172 		/* nh->rt6_info is used or freed at this point, reset to NULL*/
3173 		nh->rt6_info = NULL;
3174 		if (err) {
3175 			if (replace && nhn)
3176 				ip6_print_replace_route_err(&rt6_nh_list);
3177 			err_nh = nh;
3178 			goto add_errout;
3179 		}
3180 
3181 		/* Because each route is added like a single route we remove
3182 		 * these flags after the first nexthop: if there is a collision,
3183 		 * we have already failed to add the first nexthop:
3184 		 * fib6_add_rt2node() has rejected it; when replacing, old
3185 		 * nexthops have been replaced by first new, the rest should
3186 		 * be added to it.
3187 		 */
3188 		cfg->fc_nlinfo.nlh->nlmsg_flags &= ~(NLM_F_EXCL |
3189 						     NLM_F_REPLACE);
3190 		nhn++;
3191 	}
3192 
3193 	/* success ... tell user about new route */
3194 	ip6_route_mpath_notify(rt_notif, rt_last, info, nlflags);
3195 	goto cleanup;
3196 
3197 add_errout:
3198 	/* send notification for routes that were added so that
3199 	 * the delete notifications sent by ip6_route_del are
3200 	 * coherent
3201 	 */
3202 	if (rt_notif)
3203 		ip6_route_mpath_notify(rt_notif, rt_last, info, nlflags);
3204 
3205 	/* Delete routes that were already added */
3206 	list_for_each_entry(nh, &rt6_nh_list, next) {
3207 		if (err_nh == nh)
3208 			break;
3209 		ip6_route_del(&nh->r_cfg, extack);
3210 	}
3211 
3212 cleanup:
3213 	list_for_each_entry_safe(nh, nh_safe, &rt6_nh_list, next) {
3214 		if (nh->rt6_info)
3215 			dst_release_immediate(&nh->rt6_info->dst);
3216 		kfree(nh->mxc.mx);
3217 		list_del(&nh->next);
3218 		kfree(nh);
3219 	}
3220 
3221 	return err;
3222 }
3223 
3224 static int ip6_route_multipath_del(struct fib6_config *cfg,
3225 				   struct netlink_ext_ack *extack)
3226 {
3227 	struct fib6_config r_cfg;
3228 	struct rtnexthop *rtnh;
3229 	int remaining;
3230 	int attrlen;
3231 	int err = 1, last_err = 0;
3232 
3233 	remaining = cfg->fc_mp_len;
3234 	rtnh = (struct rtnexthop *)cfg->fc_mp;
3235 
3236 	/* Parse a Multipath Entry */
3237 	while (rtnh_ok(rtnh, remaining)) {
3238 		memcpy(&r_cfg, cfg, sizeof(*cfg));
3239 		if (rtnh->rtnh_ifindex)
3240 			r_cfg.fc_ifindex = rtnh->rtnh_ifindex;
3241 
3242 		attrlen = rtnh_attrlen(rtnh);
3243 		if (attrlen > 0) {
3244 			struct nlattr *nla, *attrs = rtnh_attrs(rtnh);
3245 
3246 			nla = nla_find(attrs, attrlen, RTA_GATEWAY);
3247 			if (nla) {
3248 				nla_memcpy(&r_cfg.fc_gateway, nla, 16);
3249 				r_cfg.fc_flags |= RTF_GATEWAY;
3250 			}
3251 		}
3252 		err = ip6_route_del(&r_cfg, extack);
3253 		if (err)
3254 			last_err = err;
3255 
3256 		rtnh = rtnh_next(rtnh, &remaining);
3257 	}
3258 
3259 	return last_err;
3260 }
3261 
3262 static int inet6_rtm_delroute(struct sk_buff *skb, struct nlmsghdr *nlh,
3263 			      struct netlink_ext_ack *extack)
3264 {
3265 	struct fib6_config cfg;
3266 	int err;
3267 
3268 	err = rtm_to_fib6_config(skb, nlh, &cfg, extack);
3269 	if (err < 0)
3270 		return err;
3271 
3272 	if (cfg.fc_mp)
3273 		return ip6_route_multipath_del(&cfg, extack);
3274 	else {
3275 		cfg.fc_delete_all_nh = 1;
3276 		return ip6_route_del(&cfg, extack);
3277 	}
3278 }
3279 
3280 static int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr *nlh,
3281 			      struct netlink_ext_ack *extack)
3282 {
3283 	struct fib6_config cfg;
3284 	int err;
3285 
3286 	err = rtm_to_fib6_config(skb, nlh, &cfg, extack);
3287 	if (err < 0)
3288 		return err;
3289 
3290 	if (cfg.fc_mp)
3291 		return ip6_route_multipath_add(&cfg, extack);
3292 	else
3293 		return ip6_route_add(&cfg, extack);
3294 }
3295 
3296 static size_t rt6_nlmsg_size(struct rt6_info *rt)
3297 {
3298 	int nexthop_len = 0;
3299 
3300 	if (rt->rt6i_nsiblings) {
3301 		nexthop_len = nla_total_size(0)	 /* RTA_MULTIPATH */
3302 			    + NLA_ALIGN(sizeof(struct rtnexthop))
3303 			    + nla_total_size(16) /* RTA_GATEWAY */
3304 			    + lwtunnel_get_encap_size(rt->dst.lwtstate);
3305 
3306 		nexthop_len *= rt->rt6i_nsiblings;
3307 	}
3308 
3309 	return NLMSG_ALIGN(sizeof(struct rtmsg))
3310 	       + nla_total_size(16) /* RTA_SRC */
3311 	       + nla_total_size(16) /* RTA_DST */
3312 	       + nla_total_size(16) /* RTA_GATEWAY */
3313 	       + nla_total_size(16) /* RTA_PREFSRC */
3314 	       + nla_total_size(4) /* RTA_TABLE */
3315 	       + nla_total_size(4) /* RTA_IIF */
3316 	       + nla_total_size(4) /* RTA_OIF */
3317 	       + nla_total_size(4) /* RTA_PRIORITY */
3318 	       + RTAX_MAX * nla_total_size(4) /* RTA_METRICS */
3319 	       + nla_total_size(sizeof(struct rta_cacheinfo))
3320 	       + nla_total_size(TCP_CA_NAME_MAX) /* RTAX_CC_ALGO */
3321 	       + nla_total_size(1) /* RTA_PREF */
3322 	       + lwtunnel_get_encap_size(rt->dst.lwtstate)
3323 	       + nexthop_len;
3324 }
3325 
3326 static int rt6_nexthop_info(struct sk_buff *skb, struct rt6_info *rt,
3327 			    unsigned int *flags, bool skip_oif)
3328 {
3329 	if (!netif_running(rt->dst.dev) || !netif_carrier_ok(rt->dst.dev)) {
3330 		*flags |= RTNH_F_LINKDOWN;
3331 		if (rt->rt6i_idev->cnf.ignore_routes_with_linkdown)
3332 			*flags |= RTNH_F_DEAD;
3333 	}
3334 
3335 	if (rt->rt6i_flags & RTF_GATEWAY) {
3336 		if (nla_put_in6_addr(skb, RTA_GATEWAY, &rt->rt6i_gateway) < 0)
3337 			goto nla_put_failure;
3338 	}
3339 
3340 	/* not needed for multipath encoding b/c it has a rtnexthop struct */
3341 	if (!skip_oif && rt->dst.dev &&
3342 	    nla_put_u32(skb, RTA_OIF, rt->dst.dev->ifindex))
3343 		goto nla_put_failure;
3344 
3345 	if (rt->dst.lwtstate &&
3346 	    lwtunnel_fill_encap(skb, rt->dst.lwtstate) < 0)
3347 		goto nla_put_failure;
3348 
3349 	return 0;
3350 
3351 nla_put_failure:
3352 	return -EMSGSIZE;
3353 }
3354 
3355 /* add multipath next hop */
3356 static int rt6_add_nexthop(struct sk_buff *skb, struct rt6_info *rt)
3357 {
3358 	struct rtnexthop *rtnh;
3359 	unsigned int flags = 0;
3360 
3361 	rtnh = nla_reserve_nohdr(skb, sizeof(*rtnh));
3362 	if (!rtnh)
3363 		goto nla_put_failure;
3364 
3365 	rtnh->rtnh_hops = 0;
3366 	rtnh->rtnh_ifindex = rt->dst.dev ? rt->dst.dev->ifindex : 0;
3367 
3368 	if (rt6_nexthop_info(skb, rt, &flags, true) < 0)
3369 		goto nla_put_failure;
3370 
3371 	rtnh->rtnh_flags = flags;
3372 
3373 	/* length of rtnetlink header + attributes */
3374 	rtnh->rtnh_len = nlmsg_get_pos(skb) - (void *)rtnh;
3375 
3376 	return 0;
3377 
3378 nla_put_failure:
3379 	return -EMSGSIZE;
3380 }
3381 
3382 static int rt6_fill_node(struct net *net,
3383 			 struct sk_buff *skb, struct rt6_info *rt,
3384 			 struct in6_addr *dst, struct in6_addr *src,
3385 			 int iif, int type, u32 portid, u32 seq,
3386 			 unsigned int flags)
3387 {
3388 	u32 metrics[RTAX_MAX];
3389 	struct rtmsg *rtm;
3390 	struct nlmsghdr *nlh;
3391 	long expires;
3392 	u32 table;
3393 
3394 	nlh = nlmsg_put(skb, portid, seq, type, sizeof(*rtm), flags);
3395 	if (!nlh)
3396 		return -EMSGSIZE;
3397 
3398 	rtm = nlmsg_data(nlh);
3399 	rtm->rtm_family = AF_INET6;
3400 	rtm->rtm_dst_len = rt->rt6i_dst.plen;
3401 	rtm->rtm_src_len = rt->rt6i_src.plen;
3402 	rtm->rtm_tos = 0;
3403 	if (rt->rt6i_table)
3404 		table = rt->rt6i_table->tb6_id;
3405 	else
3406 		table = RT6_TABLE_UNSPEC;
3407 	rtm->rtm_table = table;
3408 	if (nla_put_u32(skb, RTA_TABLE, table))
3409 		goto nla_put_failure;
3410 	if (rt->rt6i_flags & RTF_REJECT) {
3411 		switch (rt->dst.error) {
3412 		case -EINVAL:
3413 			rtm->rtm_type = RTN_BLACKHOLE;
3414 			break;
3415 		case -EACCES:
3416 			rtm->rtm_type = RTN_PROHIBIT;
3417 			break;
3418 		case -EAGAIN:
3419 			rtm->rtm_type = RTN_THROW;
3420 			break;
3421 		default:
3422 			rtm->rtm_type = RTN_UNREACHABLE;
3423 			break;
3424 		}
3425 	}
3426 	else if (rt->rt6i_flags & RTF_LOCAL)
3427 		rtm->rtm_type = RTN_LOCAL;
3428 	else if (rt->rt6i_flags & RTF_ANYCAST)
3429 		rtm->rtm_type = RTN_ANYCAST;
3430 	else if (rt->dst.dev && (rt->dst.dev->flags & IFF_LOOPBACK))
3431 		rtm->rtm_type = RTN_LOCAL;
3432 	else
3433 		rtm->rtm_type = RTN_UNICAST;
3434 	rtm->rtm_flags = 0;
3435 	rtm->rtm_scope = RT_SCOPE_UNIVERSE;
3436 	rtm->rtm_protocol = rt->rt6i_protocol;
3437 
3438 	if (rt->rt6i_flags & RTF_CACHE)
3439 		rtm->rtm_flags |= RTM_F_CLONED;
3440 
3441 	if (dst) {
3442 		if (nla_put_in6_addr(skb, RTA_DST, dst))
3443 			goto nla_put_failure;
3444 		rtm->rtm_dst_len = 128;
3445 	} else if (rtm->rtm_dst_len)
3446 		if (nla_put_in6_addr(skb, RTA_DST, &rt->rt6i_dst.addr))
3447 			goto nla_put_failure;
3448 #ifdef CONFIG_IPV6_SUBTREES
3449 	if (src) {
3450 		if (nla_put_in6_addr(skb, RTA_SRC, src))
3451 			goto nla_put_failure;
3452 		rtm->rtm_src_len = 128;
3453 	} else if (rtm->rtm_src_len &&
3454 		   nla_put_in6_addr(skb, RTA_SRC, &rt->rt6i_src.addr))
3455 		goto nla_put_failure;
3456 #endif
3457 	if (iif) {
3458 #ifdef CONFIG_IPV6_MROUTE
3459 		if (ipv6_addr_is_multicast(&rt->rt6i_dst.addr)) {
3460 			int err = ip6mr_get_route(net, skb, rtm, portid);
3461 
3462 			if (err == 0)
3463 				return 0;
3464 			if (err < 0)
3465 				goto nla_put_failure;
3466 		} else
3467 #endif
3468 			if (nla_put_u32(skb, RTA_IIF, iif))
3469 				goto nla_put_failure;
3470 	} else if (dst) {
3471 		struct in6_addr saddr_buf;
3472 		if (ip6_route_get_saddr(net, rt, dst, 0, &saddr_buf) == 0 &&
3473 		    nla_put_in6_addr(skb, RTA_PREFSRC, &saddr_buf))
3474 			goto nla_put_failure;
3475 	}
3476 
3477 	if (rt->rt6i_prefsrc.plen) {
3478 		struct in6_addr saddr_buf;
3479 		saddr_buf = rt->rt6i_prefsrc.addr;
3480 		if (nla_put_in6_addr(skb, RTA_PREFSRC, &saddr_buf))
3481 			goto nla_put_failure;
3482 	}
3483 
3484 	memcpy(metrics, dst_metrics_ptr(&rt->dst), sizeof(metrics));
3485 	if (rt->rt6i_pmtu)
3486 		metrics[RTAX_MTU - 1] = rt->rt6i_pmtu;
3487 	if (rtnetlink_put_metrics(skb, metrics) < 0)
3488 		goto nla_put_failure;
3489 
3490 	if (nla_put_u32(skb, RTA_PRIORITY, rt->rt6i_metric))
3491 		goto nla_put_failure;
3492 
3493 	/* For multipath routes, walk the siblings list and add
3494 	 * each as a nexthop within RTA_MULTIPATH.
3495 	 */
3496 	if (rt->rt6i_nsiblings) {
3497 		struct rt6_info *sibling, *next_sibling;
3498 		struct nlattr *mp;
3499 
3500 		mp = nla_nest_start(skb, RTA_MULTIPATH);
3501 		if (!mp)
3502 			goto nla_put_failure;
3503 
3504 		if (rt6_add_nexthop(skb, rt) < 0)
3505 			goto nla_put_failure;
3506 
3507 		list_for_each_entry_safe(sibling, next_sibling,
3508 					 &rt->rt6i_siblings, rt6i_siblings) {
3509 			if (rt6_add_nexthop(skb, sibling) < 0)
3510 				goto nla_put_failure;
3511 		}
3512 
3513 		nla_nest_end(skb, mp);
3514 	} else {
3515 		if (rt6_nexthop_info(skb, rt, &rtm->rtm_flags, false) < 0)
3516 			goto nla_put_failure;
3517 	}
3518 
3519 	expires = (rt->rt6i_flags & RTF_EXPIRES) ? rt->dst.expires - jiffies : 0;
3520 
3521 	if (rtnl_put_cacheinfo(skb, &rt->dst, 0, expires, rt->dst.error) < 0)
3522 		goto nla_put_failure;
3523 
3524 	if (nla_put_u8(skb, RTA_PREF, IPV6_EXTRACT_PREF(rt->rt6i_flags)))
3525 		goto nla_put_failure;
3526 
3527 
3528 	nlmsg_end(skb, nlh);
3529 	return 0;
3530 
3531 nla_put_failure:
3532 	nlmsg_cancel(skb, nlh);
3533 	return -EMSGSIZE;
3534 }
3535 
3536 int rt6_dump_route(struct rt6_info *rt, void *p_arg)
3537 {
3538 	struct rt6_rtnl_dump_arg *arg = (struct rt6_rtnl_dump_arg *) p_arg;
3539 	struct net *net = arg->net;
3540 
3541 	if (rt == net->ipv6.ip6_null_entry)
3542 		return 0;
3543 
3544 	if (nlmsg_len(arg->cb->nlh) >= sizeof(struct rtmsg)) {
3545 		struct rtmsg *rtm = nlmsg_data(arg->cb->nlh);
3546 
3547 		/* user wants prefix routes only */
3548 		if (rtm->rtm_flags & RTM_F_PREFIX &&
3549 		    !(rt->rt6i_flags & RTF_PREFIX_RT)) {
3550 			/* success since this is not a prefix route */
3551 			return 1;
3552 		}
3553 	}
3554 
3555 	return rt6_fill_node(net,
3556 		     arg->skb, rt, NULL, NULL, 0, RTM_NEWROUTE,
3557 		     NETLINK_CB(arg->cb->skb).portid, arg->cb->nlh->nlmsg_seq,
3558 		     NLM_F_MULTI);
3559 }
3560 
3561 static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh,
3562 			      struct netlink_ext_ack *extack)
3563 {
3564 	struct net *net = sock_net(in_skb->sk);
3565 	struct nlattr *tb[RTA_MAX+1];
3566 	int err, iif = 0, oif = 0;
3567 	struct dst_entry *dst;
3568 	struct rt6_info *rt;
3569 	struct sk_buff *skb;
3570 	struct rtmsg *rtm;
3571 	struct flowi6 fl6;
3572 	bool fibmatch;
3573 
3574 	err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy,
3575 			  extack);
3576 	if (err < 0)
3577 		goto errout;
3578 
3579 	err = -EINVAL;
3580 	memset(&fl6, 0, sizeof(fl6));
3581 	rtm = nlmsg_data(nlh);
3582 	fl6.flowlabel = ip6_make_flowinfo(rtm->rtm_tos, 0);
3583 	fibmatch = !!(rtm->rtm_flags & RTM_F_FIB_MATCH);
3584 
3585 	if (tb[RTA_SRC]) {
3586 		if (nla_len(tb[RTA_SRC]) < sizeof(struct in6_addr))
3587 			goto errout;
3588 
3589 		fl6.saddr = *(struct in6_addr *)nla_data(tb[RTA_SRC]);
3590 	}
3591 
3592 	if (tb[RTA_DST]) {
3593 		if (nla_len(tb[RTA_DST]) < sizeof(struct in6_addr))
3594 			goto errout;
3595 
3596 		fl6.daddr = *(struct in6_addr *)nla_data(tb[RTA_DST]);
3597 	}
3598 
3599 	if (tb[RTA_IIF])
3600 		iif = nla_get_u32(tb[RTA_IIF]);
3601 
3602 	if (tb[RTA_OIF])
3603 		oif = nla_get_u32(tb[RTA_OIF]);
3604 
3605 	if (tb[RTA_MARK])
3606 		fl6.flowi6_mark = nla_get_u32(tb[RTA_MARK]);
3607 
3608 	if (tb[RTA_UID])
3609 		fl6.flowi6_uid = make_kuid(current_user_ns(),
3610 					   nla_get_u32(tb[RTA_UID]));
3611 	else
3612 		fl6.flowi6_uid = iif ? INVALID_UID : current_uid();
3613 
3614 	if (iif) {
3615 		struct net_device *dev;
3616 		int flags = 0;
3617 
3618 		dev = __dev_get_by_index(net, iif);
3619 		if (!dev) {
3620 			err = -ENODEV;
3621 			goto errout;
3622 		}
3623 
3624 		fl6.flowi6_iif = iif;
3625 
3626 		if (!ipv6_addr_any(&fl6.saddr))
3627 			flags |= RT6_LOOKUP_F_HAS_SADDR;
3628 
3629 		if (!fibmatch)
3630 			dst = ip6_route_input_lookup(net, dev, &fl6, flags);
3631 	} else {
3632 		fl6.flowi6_oif = oif;
3633 
3634 		if (!fibmatch)
3635 			dst = ip6_route_output(net, NULL, &fl6);
3636 	}
3637 
3638 	if (fibmatch)
3639 		dst = ip6_route_lookup(net, &fl6, 0);
3640 
3641 	rt = container_of(dst, struct rt6_info, dst);
3642 	if (rt->dst.error) {
3643 		err = rt->dst.error;
3644 		ip6_rt_put(rt);
3645 		goto errout;
3646 	}
3647 
3648 	if (rt == net->ipv6.ip6_null_entry) {
3649 		err = rt->dst.error;
3650 		ip6_rt_put(rt);
3651 		goto errout;
3652 	}
3653 
3654 	skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
3655 	if (!skb) {
3656 		ip6_rt_put(rt);
3657 		err = -ENOBUFS;
3658 		goto errout;
3659 	}
3660 
3661 	skb_dst_set(skb, &rt->dst);
3662 	if (fibmatch)
3663 		err = rt6_fill_node(net, skb, rt, NULL, NULL, iif,
3664 				    RTM_NEWROUTE, NETLINK_CB(in_skb).portid,
3665 				    nlh->nlmsg_seq, 0);
3666 	else
3667 		err = rt6_fill_node(net, skb, rt, &fl6.daddr, &fl6.saddr, iif,
3668 				    RTM_NEWROUTE, NETLINK_CB(in_skb).portid,
3669 				    nlh->nlmsg_seq, 0);
3670 	if (err < 0) {
3671 		kfree_skb(skb);
3672 		goto errout;
3673 	}
3674 
3675 	err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).portid);
3676 errout:
3677 	return err;
3678 }
3679 
3680 void inet6_rt_notify(int event, struct rt6_info *rt, struct nl_info *info,
3681 		     unsigned int nlm_flags)
3682 {
3683 	struct sk_buff *skb;
3684 	struct net *net = info->nl_net;
3685 	u32 seq;
3686 	int err;
3687 
3688 	err = -ENOBUFS;
3689 	seq = info->nlh ? info->nlh->nlmsg_seq : 0;
3690 
3691 	skb = nlmsg_new(rt6_nlmsg_size(rt), gfp_any());
3692 	if (!skb)
3693 		goto errout;
3694 
3695 	err = rt6_fill_node(net, skb, rt, NULL, NULL, 0,
3696 				event, info->portid, seq, nlm_flags);
3697 	if (err < 0) {
3698 		/* -EMSGSIZE implies BUG in rt6_nlmsg_size() */
3699 		WARN_ON(err == -EMSGSIZE);
3700 		kfree_skb(skb);
3701 		goto errout;
3702 	}
3703 	rtnl_notify(skb, net, info->portid, RTNLGRP_IPV6_ROUTE,
3704 		    info->nlh, gfp_any());
3705 	return;
3706 errout:
3707 	if (err < 0)
3708 		rtnl_set_sk_err(net, RTNLGRP_IPV6_ROUTE, err);
3709 }
3710 
3711 static int ip6_route_dev_notify(struct notifier_block *this,
3712 				unsigned long event, void *ptr)
3713 {
3714 	struct net_device *dev = netdev_notifier_info_to_dev(ptr);
3715 	struct net *net = dev_net(dev);
3716 
3717 	if (!(dev->flags & IFF_LOOPBACK))
3718 		return NOTIFY_OK;
3719 
3720 	if (event == NETDEV_REGISTER) {
3721 		net->ipv6.ip6_null_entry->dst.dev = dev;
3722 		net->ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(dev);
3723 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
3724 		net->ipv6.ip6_prohibit_entry->dst.dev = dev;
3725 		net->ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(dev);
3726 		net->ipv6.ip6_blk_hole_entry->dst.dev = dev;
3727 		net->ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(dev);
3728 #endif
3729 	 } else if (event == NETDEV_UNREGISTER &&
3730 		    dev->reg_state != NETREG_UNREGISTERED) {
3731 		/* NETDEV_UNREGISTER could be fired for multiple times by
3732 		 * netdev_wait_allrefs(). Make sure we only call this once.
3733 		 */
3734 		in6_dev_put_clear(&net->ipv6.ip6_null_entry->rt6i_idev);
3735 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
3736 		in6_dev_put_clear(&net->ipv6.ip6_prohibit_entry->rt6i_idev);
3737 		in6_dev_put_clear(&net->ipv6.ip6_blk_hole_entry->rt6i_idev);
3738 #endif
3739 	}
3740 
3741 	return NOTIFY_OK;
3742 }
3743 
3744 /*
3745  *	/proc
3746  */
3747 
3748 #ifdef CONFIG_PROC_FS
3749 
3750 static const struct file_operations ipv6_route_proc_fops = {
3751 	.owner		= THIS_MODULE,
3752 	.open		= ipv6_route_open,
3753 	.read		= seq_read,
3754 	.llseek		= seq_lseek,
3755 	.release	= seq_release_net,
3756 };
3757 
3758 static int rt6_stats_seq_show(struct seq_file *seq, void *v)
3759 {
3760 	struct net *net = (struct net *)seq->private;
3761 	seq_printf(seq, "%04x %04x %04x %04x %04x %04x %04x\n",
3762 		   net->ipv6.rt6_stats->fib_nodes,
3763 		   net->ipv6.rt6_stats->fib_route_nodes,
3764 		   net->ipv6.rt6_stats->fib_rt_alloc,
3765 		   net->ipv6.rt6_stats->fib_rt_entries,
3766 		   net->ipv6.rt6_stats->fib_rt_cache,
3767 		   dst_entries_get_slow(&net->ipv6.ip6_dst_ops),
3768 		   net->ipv6.rt6_stats->fib_discarded_routes);
3769 
3770 	return 0;
3771 }
3772 
3773 static int rt6_stats_seq_open(struct inode *inode, struct file *file)
3774 {
3775 	return single_open_net(inode, file, rt6_stats_seq_show);
3776 }
3777 
3778 static const struct file_operations rt6_stats_seq_fops = {
3779 	.owner	 = THIS_MODULE,
3780 	.open	 = rt6_stats_seq_open,
3781 	.read	 = seq_read,
3782 	.llseek	 = seq_lseek,
3783 	.release = single_release_net,
3784 };
3785 #endif	/* CONFIG_PROC_FS */
3786 
3787 #ifdef CONFIG_SYSCTL
3788 
3789 static
3790 int ipv6_sysctl_rtcache_flush(struct ctl_table *ctl, int write,
3791 			      void __user *buffer, size_t *lenp, loff_t *ppos)
3792 {
3793 	struct net *net;
3794 	int delay;
3795 	if (!write)
3796 		return -EINVAL;
3797 
3798 	net = (struct net *)ctl->extra1;
3799 	delay = net->ipv6.sysctl.flush_delay;
3800 	proc_dointvec(ctl, write, buffer, lenp, ppos);
3801 	fib6_run_gc(delay <= 0 ? 0 : (unsigned long)delay, net, delay > 0);
3802 	return 0;
3803 }
3804 
3805 struct ctl_table ipv6_route_table_template[] = {
3806 	{
3807 		.procname	=	"flush",
3808 		.data		=	&init_net.ipv6.sysctl.flush_delay,
3809 		.maxlen		=	sizeof(int),
3810 		.mode		=	0200,
3811 		.proc_handler	=	ipv6_sysctl_rtcache_flush
3812 	},
3813 	{
3814 		.procname	=	"gc_thresh",
3815 		.data		=	&ip6_dst_ops_template.gc_thresh,
3816 		.maxlen		=	sizeof(int),
3817 		.mode		=	0644,
3818 		.proc_handler	=	proc_dointvec,
3819 	},
3820 	{
3821 		.procname	=	"max_size",
3822 		.data		=	&init_net.ipv6.sysctl.ip6_rt_max_size,
3823 		.maxlen		=	sizeof(int),
3824 		.mode		=	0644,
3825 		.proc_handler	=	proc_dointvec,
3826 	},
3827 	{
3828 		.procname	=	"gc_min_interval",
3829 		.data		=	&init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
3830 		.maxlen		=	sizeof(int),
3831 		.mode		=	0644,
3832 		.proc_handler	=	proc_dointvec_jiffies,
3833 	},
3834 	{
3835 		.procname	=	"gc_timeout",
3836 		.data		=	&init_net.ipv6.sysctl.ip6_rt_gc_timeout,
3837 		.maxlen		=	sizeof(int),
3838 		.mode		=	0644,
3839 		.proc_handler	=	proc_dointvec_jiffies,
3840 	},
3841 	{
3842 		.procname	=	"gc_interval",
3843 		.data		=	&init_net.ipv6.sysctl.ip6_rt_gc_interval,
3844 		.maxlen		=	sizeof(int),
3845 		.mode		=	0644,
3846 		.proc_handler	=	proc_dointvec_jiffies,
3847 	},
3848 	{
3849 		.procname	=	"gc_elasticity",
3850 		.data		=	&init_net.ipv6.sysctl.ip6_rt_gc_elasticity,
3851 		.maxlen		=	sizeof(int),
3852 		.mode		=	0644,
3853 		.proc_handler	=	proc_dointvec,
3854 	},
3855 	{
3856 		.procname	=	"mtu_expires",
3857 		.data		=	&init_net.ipv6.sysctl.ip6_rt_mtu_expires,
3858 		.maxlen		=	sizeof(int),
3859 		.mode		=	0644,
3860 		.proc_handler	=	proc_dointvec_jiffies,
3861 	},
3862 	{
3863 		.procname	=	"min_adv_mss",
3864 		.data		=	&init_net.ipv6.sysctl.ip6_rt_min_advmss,
3865 		.maxlen		=	sizeof(int),
3866 		.mode		=	0644,
3867 		.proc_handler	=	proc_dointvec,
3868 	},
3869 	{
3870 		.procname	=	"gc_min_interval_ms",
3871 		.data		=	&init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
3872 		.maxlen		=	sizeof(int),
3873 		.mode		=	0644,
3874 		.proc_handler	=	proc_dointvec_ms_jiffies,
3875 	},
3876 	{ }
3877 };
3878 
3879 struct ctl_table * __net_init ipv6_route_sysctl_init(struct net *net)
3880 {
3881 	struct ctl_table *table;
3882 
3883 	table = kmemdup(ipv6_route_table_template,
3884 			sizeof(ipv6_route_table_template),
3885 			GFP_KERNEL);
3886 
3887 	if (table) {
3888 		table[0].data = &net->ipv6.sysctl.flush_delay;
3889 		table[0].extra1 = net;
3890 		table[1].data = &net->ipv6.ip6_dst_ops.gc_thresh;
3891 		table[2].data = &net->ipv6.sysctl.ip6_rt_max_size;
3892 		table[3].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
3893 		table[4].data = &net->ipv6.sysctl.ip6_rt_gc_timeout;
3894 		table[5].data = &net->ipv6.sysctl.ip6_rt_gc_interval;
3895 		table[6].data = &net->ipv6.sysctl.ip6_rt_gc_elasticity;
3896 		table[7].data = &net->ipv6.sysctl.ip6_rt_mtu_expires;
3897 		table[8].data = &net->ipv6.sysctl.ip6_rt_min_advmss;
3898 		table[9].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
3899 
3900 		/* Don't export sysctls to unprivileged users */
3901 		if (net->user_ns != &init_user_ns)
3902 			table[0].procname = NULL;
3903 	}
3904 
3905 	return table;
3906 }
3907 #endif
3908 
3909 static int __net_init ip6_route_net_init(struct net *net)
3910 {
3911 	int ret = -ENOMEM;
3912 
3913 	memcpy(&net->ipv6.ip6_dst_ops, &ip6_dst_ops_template,
3914 	       sizeof(net->ipv6.ip6_dst_ops));
3915 
3916 	if (dst_entries_init(&net->ipv6.ip6_dst_ops) < 0)
3917 		goto out_ip6_dst_ops;
3918 
3919 	net->ipv6.ip6_null_entry = kmemdup(&ip6_null_entry_template,
3920 					   sizeof(*net->ipv6.ip6_null_entry),
3921 					   GFP_KERNEL);
3922 	if (!net->ipv6.ip6_null_entry)
3923 		goto out_ip6_dst_entries;
3924 	net->ipv6.ip6_null_entry->dst.path =
3925 		(struct dst_entry *)net->ipv6.ip6_null_entry;
3926 	net->ipv6.ip6_null_entry->dst.ops = &net->ipv6.ip6_dst_ops;
3927 	dst_init_metrics(&net->ipv6.ip6_null_entry->dst,
3928 			 ip6_template_metrics, true);
3929 
3930 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
3931 	net->ipv6.ip6_prohibit_entry = kmemdup(&ip6_prohibit_entry_template,
3932 					       sizeof(*net->ipv6.ip6_prohibit_entry),
3933 					       GFP_KERNEL);
3934 	if (!net->ipv6.ip6_prohibit_entry)
3935 		goto out_ip6_null_entry;
3936 	net->ipv6.ip6_prohibit_entry->dst.path =
3937 		(struct dst_entry *)net->ipv6.ip6_prohibit_entry;
3938 	net->ipv6.ip6_prohibit_entry->dst.ops = &net->ipv6.ip6_dst_ops;
3939 	dst_init_metrics(&net->ipv6.ip6_prohibit_entry->dst,
3940 			 ip6_template_metrics, true);
3941 
3942 	net->ipv6.ip6_blk_hole_entry = kmemdup(&ip6_blk_hole_entry_template,
3943 					       sizeof(*net->ipv6.ip6_blk_hole_entry),
3944 					       GFP_KERNEL);
3945 	if (!net->ipv6.ip6_blk_hole_entry)
3946 		goto out_ip6_prohibit_entry;
3947 	net->ipv6.ip6_blk_hole_entry->dst.path =
3948 		(struct dst_entry *)net->ipv6.ip6_blk_hole_entry;
3949 	net->ipv6.ip6_blk_hole_entry->dst.ops = &net->ipv6.ip6_dst_ops;
3950 	dst_init_metrics(&net->ipv6.ip6_blk_hole_entry->dst,
3951 			 ip6_template_metrics, true);
3952 #endif
3953 
3954 	net->ipv6.sysctl.flush_delay = 0;
3955 	net->ipv6.sysctl.ip6_rt_max_size = 4096;
3956 	net->ipv6.sysctl.ip6_rt_gc_min_interval = HZ / 2;
3957 	net->ipv6.sysctl.ip6_rt_gc_timeout = 60*HZ;
3958 	net->ipv6.sysctl.ip6_rt_gc_interval = 30*HZ;
3959 	net->ipv6.sysctl.ip6_rt_gc_elasticity = 9;
3960 	net->ipv6.sysctl.ip6_rt_mtu_expires = 10*60*HZ;
3961 	net->ipv6.sysctl.ip6_rt_min_advmss = IPV6_MIN_MTU - 20 - 40;
3962 
3963 	net->ipv6.ip6_rt_gc_expire = 30*HZ;
3964 
3965 	ret = 0;
3966 out:
3967 	return ret;
3968 
3969 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
3970 out_ip6_prohibit_entry:
3971 	kfree(net->ipv6.ip6_prohibit_entry);
3972 out_ip6_null_entry:
3973 	kfree(net->ipv6.ip6_null_entry);
3974 #endif
3975 out_ip6_dst_entries:
3976 	dst_entries_destroy(&net->ipv6.ip6_dst_ops);
3977 out_ip6_dst_ops:
3978 	goto out;
3979 }
3980 
3981 static void __net_exit ip6_route_net_exit(struct net *net)
3982 {
3983 	kfree(net->ipv6.ip6_null_entry);
3984 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
3985 	kfree(net->ipv6.ip6_prohibit_entry);
3986 	kfree(net->ipv6.ip6_blk_hole_entry);
3987 #endif
3988 	dst_entries_destroy(&net->ipv6.ip6_dst_ops);
3989 }
3990 
3991 static int __net_init ip6_route_net_init_late(struct net *net)
3992 {
3993 #ifdef CONFIG_PROC_FS
3994 	proc_create("ipv6_route", 0, net->proc_net, &ipv6_route_proc_fops);
3995 	proc_create("rt6_stats", S_IRUGO, net->proc_net, &rt6_stats_seq_fops);
3996 #endif
3997 	return 0;
3998 }
3999 
4000 static void __net_exit ip6_route_net_exit_late(struct net *net)
4001 {
4002 #ifdef CONFIG_PROC_FS
4003 	remove_proc_entry("ipv6_route", net->proc_net);
4004 	remove_proc_entry("rt6_stats", net->proc_net);
4005 #endif
4006 }
4007 
4008 static struct pernet_operations ip6_route_net_ops = {
4009 	.init = ip6_route_net_init,
4010 	.exit = ip6_route_net_exit,
4011 };
4012 
4013 static int __net_init ipv6_inetpeer_init(struct net *net)
4014 {
4015 	struct inet_peer_base *bp = kmalloc(sizeof(*bp), GFP_KERNEL);
4016 
4017 	if (!bp)
4018 		return -ENOMEM;
4019 	inet_peer_base_init(bp);
4020 	net->ipv6.peers = bp;
4021 	return 0;
4022 }
4023 
4024 static void __net_exit ipv6_inetpeer_exit(struct net *net)
4025 {
4026 	struct inet_peer_base *bp = net->ipv6.peers;
4027 
4028 	net->ipv6.peers = NULL;
4029 	inetpeer_invalidate_tree(bp);
4030 	kfree(bp);
4031 }
4032 
4033 static struct pernet_operations ipv6_inetpeer_ops = {
4034 	.init	=	ipv6_inetpeer_init,
4035 	.exit	=	ipv6_inetpeer_exit,
4036 };
4037 
4038 static struct pernet_operations ip6_route_net_late_ops = {
4039 	.init = ip6_route_net_init_late,
4040 	.exit = ip6_route_net_exit_late,
4041 };
4042 
4043 static struct notifier_block ip6_route_dev_notifier = {
4044 	.notifier_call = ip6_route_dev_notify,
4045 	.priority = ADDRCONF_NOTIFY_PRIORITY - 10,
4046 };
4047 
4048 void __init ip6_route_init_special_entries(void)
4049 {
4050 	/* Registering of the loopback is done before this portion of code,
4051 	 * the loopback reference in rt6_info will not be taken, do it
4052 	 * manually for init_net */
4053 	init_net.ipv6.ip6_null_entry->dst.dev = init_net.loopback_dev;
4054 	init_net.ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
4055   #ifdef CONFIG_IPV6_MULTIPLE_TABLES
4056 	init_net.ipv6.ip6_prohibit_entry->dst.dev = init_net.loopback_dev;
4057 	init_net.ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
4058 	init_net.ipv6.ip6_blk_hole_entry->dst.dev = init_net.loopback_dev;
4059 	init_net.ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
4060   #endif
4061 }
4062 
4063 int __init ip6_route_init(void)
4064 {
4065 	int ret;
4066 	int cpu;
4067 
4068 	ret = -ENOMEM;
4069 	ip6_dst_ops_template.kmem_cachep =
4070 		kmem_cache_create("ip6_dst_cache", sizeof(struct rt6_info), 0,
4071 				  SLAB_HWCACHE_ALIGN, NULL);
4072 	if (!ip6_dst_ops_template.kmem_cachep)
4073 		goto out;
4074 
4075 	ret = dst_entries_init(&ip6_dst_blackhole_ops);
4076 	if (ret)
4077 		goto out_kmem_cache;
4078 
4079 	ret = register_pernet_subsys(&ipv6_inetpeer_ops);
4080 	if (ret)
4081 		goto out_dst_entries;
4082 
4083 	ret = register_pernet_subsys(&ip6_route_net_ops);
4084 	if (ret)
4085 		goto out_register_inetpeer;
4086 
4087 	ip6_dst_blackhole_ops.kmem_cachep = ip6_dst_ops_template.kmem_cachep;
4088 
4089 	ret = fib6_init();
4090 	if (ret)
4091 		goto out_register_subsys;
4092 
4093 	ret = xfrm6_init();
4094 	if (ret)
4095 		goto out_fib6_init;
4096 
4097 	ret = fib6_rules_init();
4098 	if (ret)
4099 		goto xfrm6_init;
4100 
4101 	ret = register_pernet_subsys(&ip6_route_net_late_ops);
4102 	if (ret)
4103 		goto fib6_rules_init;
4104 
4105 	ret = -ENOBUFS;
4106 	if (__rtnl_register(PF_INET6, RTM_NEWROUTE, inet6_rtm_newroute, NULL, NULL) ||
4107 	    __rtnl_register(PF_INET6, RTM_DELROUTE, inet6_rtm_delroute, NULL, NULL) ||
4108 	    __rtnl_register(PF_INET6, RTM_GETROUTE, inet6_rtm_getroute, NULL, NULL))
4109 		goto out_register_late_subsys;
4110 
4111 	ret = register_netdevice_notifier(&ip6_route_dev_notifier);
4112 	if (ret)
4113 		goto out_register_late_subsys;
4114 
4115 	for_each_possible_cpu(cpu) {
4116 		struct uncached_list *ul = per_cpu_ptr(&rt6_uncached_list, cpu);
4117 
4118 		INIT_LIST_HEAD(&ul->head);
4119 		spin_lock_init(&ul->lock);
4120 	}
4121 
4122 out:
4123 	return ret;
4124 
4125 out_register_late_subsys:
4126 	unregister_pernet_subsys(&ip6_route_net_late_ops);
4127 fib6_rules_init:
4128 	fib6_rules_cleanup();
4129 xfrm6_init:
4130 	xfrm6_fini();
4131 out_fib6_init:
4132 	fib6_gc_cleanup();
4133 out_register_subsys:
4134 	unregister_pernet_subsys(&ip6_route_net_ops);
4135 out_register_inetpeer:
4136 	unregister_pernet_subsys(&ipv6_inetpeer_ops);
4137 out_dst_entries:
4138 	dst_entries_destroy(&ip6_dst_blackhole_ops);
4139 out_kmem_cache:
4140 	kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);
4141 	goto out;
4142 }
4143 
4144 void ip6_route_cleanup(void)
4145 {
4146 	unregister_netdevice_notifier(&ip6_route_dev_notifier);
4147 	unregister_pernet_subsys(&ip6_route_net_late_ops);
4148 	fib6_rules_cleanup();
4149 	xfrm6_fini();
4150 	fib6_gc_cleanup();
4151 	unregister_pernet_subsys(&ipv6_inetpeer_ops);
4152 	unregister_pernet_subsys(&ip6_route_net_ops);
4153 	dst_entries_destroy(&ip6_dst_blackhole_ops);
4154 	kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);
4155 }
4156