xref: /linux/net/ipv6/route.c (revision 3c2f85b8ce8acee0502d61fb53015eabd7d4c8fb)
1 /*
2  *	Linux INET6 implementation
3  *	FIB front-end.
4  *
5  *	Authors:
6  *	Pedro Roque		<roque@di.fc.ul.pt>
7  *
8  *	This program is free software; you can redistribute it and/or
9  *      modify it under the terms of the GNU General Public License
10  *      as published by the Free Software Foundation; either version
11  *      2 of the License, or (at your option) any later version.
12  */
13 
14 /*	Changes:
15  *
16  *	YOSHIFUJI Hideaki @USAGI
17  *		reworked default router selection.
18  *		- respect outgoing interface
19  *		- select from (probably) reachable routers (i.e.
20  *		routers in REACHABLE, STALE, DELAY or PROBE states).
21  *		- always select the same router if it is (probably)
22  *		reachable.  otherwise, round-robin the list.
23  *	Ville Nuorvala
24  *		Fixed routing subtrees.
25  */
26 
27 #define pr_fmt(fmt) "IPv6: " fmt
28 
29 #include <linux/capability.h>
30 #include <linux/errno.h>
31 #include <linux/export.h>
32 #include <linux/types.h>
33 #include <linux/times.h>
34 #include <linux/socket.h>
35 #include <linux/sockios.h>
36 #include <linux/net.h>
37 #include <linux/route.h>
38 #include <linux/netdevice.h>
39 #include <linux/in6.h>
40 #include <linux/mroute6.h>
41 #include <linux/init.h>
42 #include <linux/if_arp.h>
43 #include <linux/proc_fs.h>
44 #include <linux/seq_file.h>
45 #include <linux/nsproxy.h>
46 #include <linux/slab.h>
47 #include <net/net_namespace.h>
48 #include <net/snmp.h>
49 #include <net/ipv6.h>
50 #include <net/ip6_fib.h>
51 #include <net/ip6_route.h>
52 #include <net/ndisc.h>
53 #include <net/addrconf.h>
54 #include <net/tcp.h>
55 #include <linux/rtnetlink.h>
56 #include <net/dst.h>
57 #include <net/dst_metadata.h>
58 #include <net/xfrm.h>
59 #include <net/netevent.h>
60 #include <net/netlink.h>
61 #include <net/nexthop.h>
62 #include <net/lwtunnel.h>
63 #include <net/ip_tunnels.h>
64 
65 #include <asm/uaccess.h>
66 
67 #ifdef CONFIG_SYSCTL
68 #include <linux/sysctl.h>
69 #endif
70 
71 enum rt6_nud_state {
72 	RT6_NUD_FAIL_HARD = -3,
73 	RT6_NUD_FAIL_PROBE = -2,
74 	RT6_NUD_FAIL_DO_RR = -1,
75 	RT6_NUD_SUCCEED = 1
76 };
77 
78 static void ip6_rt_copy_init(struct rt6_info *rt, struct rt6_info *ort);
79 static struct dst_entry	*ip6_dst_check(struct dst_entry *dst, u32 cookie);
80 static unsigned int	 ip6_default_advmss(const struct dst_entry *dst);
81 static unsigned int	 ip6_mtu(const struct dst_entry *dst);
82 static struct dst_entry *ip6_negative_advice(struct dst_entry *);
83 static void		ip6_dst_destroy(struct dst_entry *);
84 static void		ip6_dst_ifdown(struct dst_entry *,
85 				       struct net_device *dev, int how);
86 static int		 ip6_dst_gc(struct dst_ops *ops);
87 
88 static int		ip6_pkt_discard(struct sk_buff *skb);
89 static int		ip6_pkt_discard_out(struct sock *sk, struct sk_buff *skb);
90 static int		ip6_pkt_prohibit(struct sk_buff *skb);
91 static int		ip6_pkt_prohibit_out(struct sock *sk, struct sk_buff *skb);
92 static void		ip6_link_failure(struct sk_buff *skb);
93 static void		ip6_rt_update_pmtu(struct dst_entry *dst, struct sock *sk,
94 					   struct sk_buff *skb, u32 mtu);
95 static void		rt6_do_redirect(struct dst_entry *dst, struct sock *sk,
96 					struct sk_buff *skb);
97 static void		rt6_dst_from_metrics_check(struct rt6_info *rt);
98 static int rt6_score_route(struct rt6_info *rt, int oif, int strict);
99 
100 #ifdef CONFIG_IPV6_ROUTE_INFO
101 static struct rt6_info *rt6_add_route_info(struct net *net,
102 					   const struct in6_addr *prefix, int prefixlen,
103 					   const struct in6_addr *gwaddr, int ifindex,
104 					   unsigned int pref);
105 static struct rt6_info *rt6_get_route_info(struct net *net,
106 					   const struct in6_addr *prefix, int prefixlen,
107 					   const struct in6_addr *gwaddr, int ifindex);
108 #endif
109 
110 struct uncached_list {
111 	spinlock_t		lock;
112 	struct list_head	head;
113 };
114 
115 static DEFINE_PER_CPU_ALIGNED(struct uncached_list, rt6_uncached_list);
116 
117 static void rt6_uncached_list_add(struct rt6_info *rt)
118 {
119 	struct uncached_list *ul = raw_cpu_ptr(&rt6_uncached_list);
120 
121 	rt->dst.flags |= DST_NOCACHE;
122 	rt->rt6i_uncached_list = ul;
123 
124 	spin_lock_bh(&ul->lock);
125 	list_add_tail(&rt->rt6i_uncached, &ul->head);
126 	spin_unlock_bh(&ul->lock);
127 }
128 
129 static void rt6_uncached_list_del(struct rt6_info *rt)
130 {
131 	if (!list_empty(&rt->rt6i_uncached)) {
132 		struct uncached_list *ul = rt->rt6i_uncached_list;
133 
134 		spin_lock_bh(&ul->lock);
135 		list_del(&rt->rt6i_uncached);
136 		spin_unlock_bh(&ul->lock);
137 	}
138 }
139 
140 static void rt6_uncached_list_flush_dev(struct net *net, struct net_device *dev)
141 {
142 	struct net_device *loopback_dev = net->loopback_dev;
143 	int cpu;
144 
145 	if (dev == loopback_dev)
146 		return;
147 
148 	for_each_possible_cpu(cpu) {
149 		struct uncached_list *ul = per_cpu_ptr(&rt6_uncached_list, cpu);
150 		struct rt6_info *rt;
151 
152 		spin_lock_bh(&ul->lock);
153 		list_for_each_entry(rt, &ul->head, rt6i_uncached) {
154 			struct inet6_dev *rt_idev = rt->rt6i_idev;
155 			struct net_device *rt_dev = rt->dst.dev;
156 
157 			if (rt_idev->dev == dev) {
158 				rt->rt6i_idev = in6_dev_get(loopback_dev);
159 				in6_dev_put(rt_idev);
160 			}
161 
162 			if (rt_dev == dev) {
163 				rt->dst.dev = loopback_dev;
164 				dev_hold(rt->dst.dev);
165 				dev_put(rt_dev);
166 			}
167 		}
168 		spin_unlock_bh(&ul->lock);
169 	}
170 }
171 
172 static u32 *rt6_pcpu_cow_metrics(struct rt6_info *rt)
173 {
174 	return dst_metrics_write_ptr(rt->dst.from);
175 }
176 
177 static u32 *ipv6_cow_metrics(struct dst_entry *dst, unsigned long old)
178 {
179 	struct rt6_info *rt = (struct rt6_info *)dst;
180 
181 	if (rt->rt6i_flags & RTF_PCPU)
182 		return rt6_pcpu_cow_metrics(rt);
183 	else if (rt->rt6i_flags & RTF_CACHE)
184 		return NULL;
185 	else
186 		return dst_cow_metrics_generic(dst, old);
187 }
188 
189 static inline const void *choose_neigh_daddr(struct rt6_info *rt,
190 					     struct sk_buff *skb,
191 					     const void *daddr)
192 {
193 	struct in6_addr *p = &rt->rt6i_gateway;
194 
195 	if (!ipv6_addr_any(p))
196 		return (const void *) p;
197 	else if (skb)
198 		return &ipv6_hdr(skb)->daddr;
199 	return daddr;
200 }
201 
202 static struct neighbour *ip6_neigh_lookup(const struct dst_entry *dst,
203 					  struct sk_buff *skb,
204 					  const void *daddr)
205 {
206 	struct rt6_info *rt = (struct rt6_info *) dst;
207 	struct neighbour *n;
208 
209 	daddr = choose_neigh_daddr(rt, skb, daddr);
210 	n = __ipv6_neigh_lookup(dst->dev, daddr);
211 	if (n)
212 		return n;
213 	return neigh_create(&nd_tbl, daddr, dst->dev);
214 }
215 
216 static struct dst_ops ip6_dst_ops_template = {
217 	.family			=	AF_INET6,
218 	.gc			=	ip6_dst_gc,
219 	.gc_thresh		=	1024,
220 	.check			=	ip6_dst_check,
221 	.default_advmss		=	ip6_default_advmss,
222 	.mtu			=	ip6_mtu,
223 	.cow_metrics		=	ipv6_cow_metrics,
224 	.destroy		=	ip6_dst_destroy,
225 	.ifdown			=	ip6_dst_ifdown,
226 	.negative_advice	=	ip6_negative_advice,
227 	.link_failure		=	ip6_link_failure,
228 	.update_pmtu		=	ip6_rt_update_pmtu,
229 	.redirect		=	rt6_do_redirect,
230 	.local_out		=	__ip6_local_out,
231 	.neigh_lookup		=	ip6_neigh_lookup,
232 };
233 
234 static unsigned int ip6_blackhole_mtu(const struct dst_entry *dst)
235 {
236 	unsigned int mtu = dst_metric_raw(dst, RTAX_MTU);
237 
238 	return mtu ? : dst->dev->mtu;
239 }
240 
241 static void ip6_rt_blackhole_update_pmtu(struct dst_entry *dst, struct sock *sk,
242 					 struct sk_buff *skb, u32 mtu)
243 {
244 }
245 
246 static void ip6_rt_blackhole_redirect(struct dst_entry *dst, struct sock *sk,
247 				      struct sk_buff *skb)
248 {
249 }
250 
251 static struct dst_ops ip6_dst_blackhole_ops = {
252 	.family			=	AF_INET6,
253 	.destroy		=	ip6_dst_destroy,
254 	.check			=	ip6_dst_check,
255 	.mtu			=	ip6_blackhole_mtu,
256 	.default_advmss		=	ip6_default_advmss,
257 	.update_pmtu		=	ip6_rt_blackhole_update_pmtu,
258 	.redirect		=	ip6_rt_blackhole_redirect,
259 	.cow_metrics		=	dst_cow_metrics_generic,
260 	.neigh_lookup		=	ip6_neigh_lookup,
261 };
262 
263 static const u32 ip6_template_metrics[RTAX_MAX] = {
264 	[RTAX_HOPLIMIT - 1] = 0,
265 };
266 
267 static const struct rt6_info ip6_null_entry_template = {
268 	.dst = {
269 		.__refcnt	= ATOMIC_INIT(1),
270 		.__use		= 1,
271 		.obsolete	= DST_OBSOLETE_FORCE_CHK,
272 		.error		= -ENETUNREACH,
273 		.input		= ip6_pkt_discard,
274 		.output		= ip6_pkt_discard_out,
275 	},
276 	.rt6i_flags	= (RTF_REJECT | RTF_NONEXTHOP),
277 	.rt6i_protocol  = RTPROT_KERNEL,
278 	.rt6i_metric	= ~(u32) 0,
279 	.rt6i_ref	= ATOMIC_INIT(1),
280 };
281 
282 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
283 
284 static const struct rt6_info ip6_prohibit_entry_template = {
285 	.dst = {
286 		.__refcnt	= ATOMIC_INIT(1),
287 		.__use		= 1,
288 		.obsolete	= DST_OBSOLETE_FORCE_CHK,
289 		.error		= -EACCES,
290 		.input		= ip6_pkt_prohibit,
291 		.output		= ip6_pkt_prohibit_out,
292 	},
293 	.rt6i_flags	= (RTF_REJECT | RTF_NONEXTHOP),
294 	.rt6i_protocol  = RTPROT_KERNEL,
295 	.rt6i_metric	= ~(u32) 0,
296 	.rt6i_ref	= ATOMIC_INIT(1),
297 };
298 
299 static const struct rt6_info ip6_blk_hole_entry_template = {
300 	.dst = {
301 		.__refcnt	= ATOMIC_INIT(1),
302 		.__use		= 1,
303 		.obsolete	= DST_OBSOLETE_FORCE_CHK,
304 		.error		= -EINVAL,
305 		.input		= dst_discard,
306 		.output		= dst_discard_sk,
307 	},
308 	.rt6i_flags	= (RTF_REJECT | RTF_NONEXTHOP),
309 	.rt6i_protocol  = RTPROT_KERNEL,
310 	.rt6i_metric	= ~(u32) 0,
311 	.rt6i_ref	= ATOMIC_INIT(1),
312 };
313 
314 #endif
315 
316 static void rt6_info_init(struct rt6_info *rt)
317 {
318 	struct dst_entry *dst = &rt->dst;
319 
320 	memset(dst + 1, 0, sizeof(*rt) - sizeof(*dst));
321 	INIT_LIST_HEAD(&rt->rt6i_siblings);
322 	INIT_LIST_HEAD(&rt->rt6i_uncached);
323 }
324 
325 /* allocate dst with ip6_dst_ops */
326 static struct rt6_info *__ip6_dst_alloc(struct net *net,
327 					struct net_device *dev,
328 					int flags)
329 {
330 	struct rt6_info *rt = dst_alloc(&net->ipv6.ip6_dst_ops, dev,
331 					0, DST_OBSOLETE_FORCE_CHK, flags);
332 
333 	if (rt)
334 		rt6_info_init(rt);
335 
336 	return rt;
337 }
338 
339 static struct rt6_info *ip6_dst_alloc(struct net *net,
340 				      struct net_device *dev,
341 				      int flags)
342 {
343 	struct rt6_info *rt = __ip6_dst_alloc(net, dev, flags);
344 
345 	if (rt) {
346 		rt->rt6i_pcpu = alloc_percpu_gfp(struct rt6_info *, GFP_ATOMIC);
347 		if (rt->rt6i_pcpu) {
348 			int cpu;
349 
350 			for_each_possible_cpu(cpu) {
351 				struct rt6_info **p;
352 
353 				p = per_cpu_ptr(rt->rt6i_pcpu, cpu);
354 				/* no one shares rt */
355 				*p =  NULL;
356 			}
357 		} else {
358 			dst_destroy((struct dst_entry *)rt);
359 			return NULL;
360 		}
361 	}
362 
363 	return rt;
364 }
365 
366 static void ip6_dst_destroy(struct dst_entry *dst)
367 {
368 	struct rt6_info *rt = (struct rt6_info *)dst;
369 	struct dst_entry *from = dst->from;
370 	struct inet6_dev *idev;
371 
372 	dst_destroy_metrics_generic(dst);
373 	free_percpu(rt->rt6i_pcpu);
374 	rt6_uncached_list_del(rt);
375 
376 	idev = rt->rt6i_idev;
377 	if (idev) {
378 		rt->rt6i_idev = NULL;
379 		in6_dev_put(idev);
380 	}
381 
382 	dst->from = NULL;
383 	dst_release(from);
384 }
385 
386 static void ip6_dst_ifdown(struct dst_entry *dst, struct net_device *dev,
387 			   int how)
388 {
389 	struct rt6_info *rt = (struct rt6_info *)dst;
390 	struct inet6_dev *idev = rt->rt6i_idev;
391 	struct net_device *loopback_dev =
392 		dev_net(dev)->loopback_dev;
393 
394 	if (dev != loopback_dev) {
395 		if (idev && idev->dev == dev) {
396 			struct inet6_dev *loopback_idev =
397 				in6_dev_get(loopback_dev);
398 			if (loopback_idev) {
399 				rt->rt6i_idev = loopback_idev;
400 				in6_dev_put(idev);
401 			}
402 		}
403 	}
404 }
405 
406 static bool rt6_check_expired(const struct rt6_info *rt)
407 {
408 	if (rt->rt6i_flags & RTF_EXPIRES) {
409 		if (time_after(jiffies, rt->dst.expires))
410 			return true;
411 	} else if (rt->dst.from) {
412 		return rt6_check_expired((struct rt6_info *) rt->dst.from);
413 	}
414 	return false;
415 }
416 
417 /* Multipath route selection:
418  *   Hash based function using packet header and flowlabel.
419  * Adapted from fib_info_hashfn()
420  */
421 static int rt6_info_hash_nhsfn(unsigned int candidate_count,
422 			       const struct flowi6 *fl6)
423 {
424 	unsigned int val = fl6->flowi6_proto;
425 
426 	val ^= ipv6_addr_hash(&fl6->daddr);
427 	val ^= ipv6_addr_hash(&fl6->saddr);
428 
429 	/* Work only if this not encapsulated */
430 	switch (fl6->flowi6_proto) {
431 	case IPPROTO_UDP:
432 	case IPPROTO_TCP:
433 	case IPPROTO_SCTP:
434 		val ^= (__force u16)fl6->fl6_sport;
435 		val ^= (__force u16)fl6->fl6_dport;
436 		break;
437 
438 	case IPPROTO_ICMPV6:
439 		val ^= (__force u16)fl6->fl6_icmp_type;
440 		val ^= (__force u16)fl6->fl6_icmp_code;
441 		break;
442 	}
443 	/* RFC6438 recommands to use flowlabel */
444 	val ^= (__force u32)fl6->flowlabel;
445 
446 	/* Perhaps, we need to tune, this function? */
447 	val = val ^ (val >> 7) ^ (val >> 12);
448 	return val % candidate_count;
449 }
450 
451 static struct rt6_info *rt6_multipath_select(struct rt6_info *match,
452 					     struct flowi6 *fl6, int oif,
453 					     int strict)
454 {
455 	struct rt6_info *sibling, *next_sibling;
456 	int route_choosen;
457 
458 	route_choosen = rt6_info_hash_nhsfn(match->rt6i_nsiblings + 1, fl6);
459 	/* Don't change the route, if route_choosen == 0
460 	 * (siblings does not include ourself)
461 	 */
462 	if (route_choosen)
463 		list_for_each_entry_safe(sibling, next_sibling,
464 				&match->rt6i_siblings, rt6i_siblings) {
465 			route_choosen--;
466 			if (route_choosen == 0) {
467 				if (rt6_score_route(sibling, oif, strict) < 0)
468 					break;
469 				match = sibling;
470 				break;
471 			}
472 		}
473 	return match;
474 }
475 
476 /*
477  *	Route lookup. Any table->tb6_lock is implied.
478  */
479 
480 static inline struct rt6_info *rt6_device_match(struct net *net,
481 						    struct rt6_info *rt,
482 						    const struct in6_addr *saddr,
483 						    int oif,
484 						    int flags)
485 {
486 	struct rt6_info *local = NULL;
487 	struct rt6_info *sprt;
488 
489 	if (!oif && ipv6_addr_any(saddr))
490 		goto out;
491 
492 	for (sprt = rt; sprt; sprt = sprt->dst.rt6_next) {
493 		struct net_device *dev = sprt->dst.dev;
494 
495 		if (oif) {
496 			if (dev->ifindex == oif)
497 				return sprt;
498 			if (dev->flags & IFF_LOOPBACK) {
499 				if (!sprt->rt6i_idev ||
500 				    sprt->rt6i_idev->dev->ifindex != oif) {
501 					if (flags & RT6_LOOKUP_F_IFACE && oif)
502 						continue;
503 					if (local && (!oif ||
504 						      local->rt6i_idev->dev->ifindex == oif))
505 						continue;
506 				}
507 				local = sprt;
508 			}
509 		} else {
510 			if (ipv6_chk_addr(net, saddr, dev,
511 					  flags & RT6_LOOKUP_F_IFACE))
512 				return sprt;
513 		}
514 	}
515 
516 	if (oif) {
517 		if (local)
518 			return local;
519 
520 		if (flags & RT6_LOOKUP_F_IFACE)
521 			return net->ipv6.ip6_null_entry;
522 	}
523 out:
524 	return rt;
525 }
526 
527 #ifdef CONFIG_IPV6_ROUTER_PREF
528 struct __rt6_probe_work {
529 	struct work_struct work;
530 	struct in6_addr target;
531 	struct net_device *dev;
532 };
533 
534 static void rt6_probe_deferred(struct work_struct *w)
535 {
536 	struct in6_addr mcaddr;
537 	struct __rt6_probe_work *work =
538 		container_of(w, struct __rt6_probe_work, work);
539 
540 	addrconf_addr_solict_mult(&work->target, &mcaddr);
541 	ndisc_send_ns(work->dev, NULL, &work->target, &mcaddr, NULL, NULL);
542 	dev_put(work->dev);
543 	kfree(work);
544 }
545 
546 static void rt6_probe(struct rt6_info *rt)
547 {
548 	struct __rt6_probe_work *work;
549 	struct neighbour *neigh;
550 	/*
551 	 * Okay, this does not seem to be appropriate
552 	 * for now, however, we need to check if it
553 	 * is really so; aka Router Reachability Probing.
554 	 *
555 	 * Router Reachability Probe MUST be rate-limited
556 	 * to no more than one per minute.
557 	 */
558 	if (!rt || !(rt->rt6i_flags & RTF_GATEWAY))
559 		return;
560 	rcu_read_lock_bh();
561 	neigh = __ipv6_neigh_lookup_noref(rt->dst.dev, &rt->rt6i_gateway);
562 	if (neigh) {
563 		if (neigh->nud_state & NUD_VALID)
564 			goto out;
565 
566 		work = NULL;
567 		write_lock(&neigh->lock);
568 		if (!(neigh->nud_state & NUD_VALID) &&
569 		    time_after(jiffies,
570 			       neigh->updated +
571 			       rt->rt6i_idev->cnf.rtr_probe_interval)) {
572 			work = kmalloc(sizeof(*work), GFP_ATOMIC);
573 			if (work)
574 				__neigh_set_probe_once(neigh);
575 		}
576 		write_unlock(&neigh->lock);
577 	} else {
578 		work = kmalloc(sizeof(*work), GFP_ATOMIC);
579 	}
580 
581 	if (work) {
582 		INIT_WORK(&work->work, rt6_probe_deferred);
583 		work->target = rt->rt6i_gateway;
584 		dev_hold(rt->dst.dev);
585 		work->dev = rt->dst.dev;
586 		schedule_work(&work->work);
587 	}
588 
589 out:
590 	rcu_read_unlock_bh();
591 }
592 #else
593 static inline void rt6_probe(struct rt6_info *rt)
594 {
595 }
596 #endif
597 
598 /*
599  * Default Router Selection (RFC 2461 6.3.6)
600  */
601 static inline int rt6_check_dev(struct rt6_info *rt, int oif)
602 {
603 	struct net_device *dev = rt->dst.dev;
604 	if (!oif || dev->ifindex == oif)
605 		return 2;
606 	if ((dev->flags & IFF_LOOPBACK) &&
607 	    rt->rt6i_idev && rt->rt6i_idev->dev->ifindex == oif)
608 		return 1;
609 	return 0;
610 }
611 
612 static inline enum rt6_nud_state rt6_check_neigh(struct rt6_info *rt)
613 {
614 	struct neighbour *neigh;
615 	enum rt6_nud_state ret = RT6_NUD_FAIL_HARD;
616 
617 	if (rt->rt6i_flags & RTF_NONEXTHOP ||
618 	    !(rt->rt6i_flags & RTF_GATEWAY))
619 		return RT6_NUD_SUCCEED;
620 
621 	rcu_read_lock_bh();
622 	neigh = __ipv6_neigh_lookup_noref(rt->dst.dev, &rt->rt6i_gateway);
623 	if (neigh) {
624 		read_lock(&neigh->lock);
625 		if (neigh->nud_state & NUD_VALID)
626 			ret = RT6_NUD_SUCCEED;
627 #ifdef CONFIG_IPV6_ROUTER_PREF
628 		else if (!(neigh->nud_state & NUD_FAILED))
629 			ret = RT6_NUD_SUCCEED;
630 		else
631 			ret = RT6_NUD_FAIL_PROBE;
632 #endif
633 		read_unlock(&neigh->lock);
634 	} else {
635 		ret = IS_ENABLED(CONFIG_IPV6_ROUTER_PREF) ?
636 		      RT6_NUD_SUCCEED : RT6_NUD_FAIL_DO_RR;
637 	}
638 	rcu_read_unlock_bh();
639 
640 	return ret;
641 }
642 
643 static int rt6_score_route(struct rt6_info *rt, int oif,
644 			   int strict)
645 {
646 	int m;
647 
648 	m = rt6_check_dev(rt, oif);
649 	if (!m && (strict & RT6_LOOKUP_F_IFACE))
650 		return RT6_NUD_FAIL_HARD;
651 #ifdef CONFIG_IPV6_ROUTER_PREF
652 	m |= IPV6_DECODE_PREF(IPV6_EXTRACT_PREF(rt->rt6i_flags)) << 2;
653 #endif
654 	if (strict & RT6_LOOKUP_F_REACHABLE) {
655 		int n = rt6_check_neigh(rt);
656 		if (n < 0)
657 			return n;
658 	}
659 	return m;
660 }
661 
662 static struct rt6_info *find_match(struct rt6_info *rt, int oif, int strict,
663 				   int *mpri, struct rt6_info *match,
664 				   bool *do_rr)
665 {
666 	int m;
667 	bool match_do_rr = false;
668 	struct inet6_dev *idev = rt->rt6i_idev;
669 	struct net_device *dev = rt->dst.dev;
670 
671 	if (dev && !netif_carrier_ok(dev) &&
672 	    idev->cnf.ignore_routes_with_linkdown)
673 		goto out;
674 
675 	if (rt6_check_expired(rt))
676 		goto out;
677 
678 	m = rt6_score_route(rt, oif, strict);
679 	if (m == RT6_NUD_FAIL_DO_RR) {
680 		match_do_rr = true;
681 		m = 0; /* lowest valid score */
682 	} else if (m == RT6_NUD_FAIL_HARD) {
683 		goto out;
684 	}
685 
686 	if (strict & RT6_LOOKUP_F_REACHABLE)
687 		rt6_probe(rt);
688 
689 	/* note that m can be RT6_NUD_FAIL_PROBE at this point */
690 	if (m > *mpri) {
691 		*do_rr = match_do_rr;
692 		*mpri = m;
693 		match = rt;
694 	}
695 out:
696 	return match;
697 }
698 
699 static struct rt6_info *find_rr_leaf(struct fib6_node *fn,
700 				     struct rt6_info *rr_head,
701 				     u32 metric, int oif, int strict,
702 				     bool *do_rr)
703 {
704 	struct rt6_info *rt, *match, *cont;
705 	int mpri = -1;
706 
707 	match = NULL;
708 	cont = NULL;
709 	for (rt = rr_head; rt; rt = rt->dst.rt6_next) {
710 		if (rt->rt6i_metric != metric) {
711 			cont = rt;
712 			break;
713 		}
714 
715 		match = find_match(rt, oif, strict, &mpri, match, do_rr);
716 	}
717 
718 	for (rt = fn->leaf; rt && rt != rr_head; rt = rt->dst.rt6_next) {
719 		if (rt->rt6i_metric != metric) {
720 			cont = rt;
721 			break;
722 		}
723 
724 		match = find_match(rt, oif, strict, &mpri, match, do_rr);
725 	}
726 
727 	if (match || !cont)
728 		return match;
729 
730 	for (rt = cont; rt; rt = rt->dst.rt6_next)
731 		match = find_match(rt, oif, strict, &mpri, match, do_rr);
732 
733 	return match;
734 }
735 
736 static struct rt6_info *rt6_select(struct fib6_node *fn, int oif, int strict)
737 {
738 	struct rt6_info *match, *rt0;
739 	struct net *net;
740 	bool do_rr = false;
741 
742 	rt0 = fn->rr_ptr;
743 	if (!rt0)
744 		fn->rr_ptr = rt0 = fn->leaf;
745 
746 	match = find_rr_leaf(fn, rt0, rt0->rt6i_metric, oif, strict,
747 			     &do_rr);
748 
749 	if (do_rr) {
750 		struct rt6_info *next = rt0->dst.rt6_next;
751 
752 		/* no entries matched; do round-robin */
753 		if (!next || next->rt6i_metric != rt0->rt6i_metric)
754 			next = fn->leaf;
755 
756 		if (next != rt0)
757 			fn->rr_ptr = next;
758 	}
759 
760 	net = dev_net(rt0->dst.dev);
761 	return match ? match : net->ipv6.ip6_null_entry;
762 }
763 
764 static bool rt6_is_gw_or_nonexthop(const struct rt6_info *rt)
765 {
766 	return (rt->rt6i_flags & (RTF_NONEXTHOP | RTF_GATEWAY));
767 }
768 
769 #ifdef CONFIG_IPV6_ROUTE_INFO
770 int rt6_route_rcv(struct net_device *dev, u8 *opt, int len,
771 		  const struct in6_addr *gwaddr)
772 {
773 	struct net *net = dev_net(dev);
774 	struct route_info *rinfo = (struct route_info *) opt;
775 	struct in6_addr prefix_buf, *prefix;
776 	unsigned int pref;
777 	unsigned long lifetime;
778 	struct rt6_info *rt;
779 
780 	if (len < sizeof(struct route_info)) {
781 		return -EINVAL;
782 	}
783 
784 	/* Sanity check for prefix_len and length */
785 	if (rinfo->length > 3) {
786 		return -EINVAL;
787 	} else if (rinfo->prefix_len > 128) {
788 		return -EINVAL;
789 	} else if (rinfo->prefix_len > 64) {
790 		if (rinfo->length < 2) {
791 			return -EINVAL;
792 		}
793 	} else if (rinfo->prefix_len > 0) {
794 		if (rinfo->length < 1) {
795 			return -EINVAL;
796 		}
797 	}
798 
799 	pref = rinfo->route_pref;
800 	if (pref == ICMPV6_ROUTER_PREF_INVALID)
801 		return -EINVAL;
802 
803 	lifetime = addrconf_timeout_fixup(ntohl(rinfo->lifetime), HZ);
804 
805 	if (rinfo->length == 3)
806 		prefix = (struct in6_addr *)rinfo->prefix;
807 	else {
808 		/* this function is safe */
809 		ipv6_addr_prefix(&prefix_buf,
810 				 (struct in6_addr *)rinfo->prefix,
811 				 rinfo->prefix_len);
812 		prefix = &prefix_buf;
813 	}
814 
815 	if (rinfo->prefix_len == 0)
816 		rt = rt6_get_dflt_router(gwaddr, dev);
817 	else
818 		rt = rt6_get_route_info(net, prefix, rinfo->prefix_len,
819 					gwaddr, dev->ifindex);
820 
821 	if (rt && !lifetime) {
822 		ip6_del_rt(rt);
823 		rt = NULL;
824 	}
825 
826 	if (!rt && lifetime)
827 		rt = rt6_add_route_info(net, prefix, rinfo->prefix_len, gwaddr, dev->ifindex,
828 					pref);
829 	else if (rt)
830 		rt->rt6i_flags = RTF_ROUTEINFO |
831 				 (rt->rt6i_flags & ~RTF_PREF_MASK) | RTF_PREF(pref);
832 
833 	if (rt) {
834 		if (!addrconf_finite_timeout(lifetime))
835 			rt6_clean_expires(rt);
836 		else
837 			rt6_set_expires(rt, jiffies + HZ * lifetime);
838 
839 		ip6_rt_put(rt);
840 	}
841 	return 0;
842 }
843 #endif
844 
845 static struct fib6_node* fib6_backtrack(struct fib6_node *fn,
846 					struct in6_addr *saddr)
847 {
848 	struct fib6_node *pn;
849 	while (1) {
850 		if (fn->fn_flags & RTN_TL_ROOT)
851 			return NULL;
852 		pn = fn->parent;
853 		if (FIB6_SUBTREE(pn) && FIB6_SUBTREE(pn) != fn)
854 			fn = fib6_lookup(FIB6_SUBTREE(pn), NULL, saddr);
855 		else
856 			fn = pn;
857 		if (fn->fn_flags & RTN_RTINFO)
858 			return fn;
859 	}
860 }
861 
862 static struct rt6_info *ip6_pol_route_lookup(struct net *net,
863 					     struct fib6_table *table,
864 					     struct flowi6 *fl6, int flags)
865 {
866 	struct fib6_node *fn;
867 	struct rt6_info *rt;
868 
869 	read_lock_bh(&table->tb6_lock);
870 	fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
871 restart:
872 	rt = fn->leaf;
873 	rt = rt6_device_match(net, rt, &fl6->saddr, fl6->flowi6_oif, flags);
874 	if (rt->rt6i_nsiblings && fl6->flowi6_oif == 0)
875 		rt = rt6_multipath_select(rt, fl6, fl6->flowi6_oif, flags);
876 	if (rt == net->ipv6.ip6_null_entry) {
877 		fn = fib6_backtrack(fn, &fl6->saddr);
878 		if (fn)
879 			goto restart;
880 	}
881 	dst_use(&rt->dst, jiffies);
882 	read_unlock_bh(&table->tb6_lock);
883 	return rt;
884 
885 }
886 
887 struct dst_entry *ip6_route_lookup(struct net *net, struct flowi6 *fl6,
888 				    int flags)
889 {
890 	return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_lookup);
891 }
892 EXPORT_SYMBOL_GPL(ip6_route_lookup);
893 
894 struct rt6_info *rt6_lookup(struct net *net, const struct in6_addr *daddr,
895 			    const struct in6_addr *saddr, int oif, int strict)
896 {
897 	struct flowi6 fl6 = {
898 		.flowi6_oif = oif,
899 		.daddr = *daddr,
900 	};
901 	struct dst_entry *dst;
902 	int flags = strict ? RT6_LOOKUP_F_IFACE : 0;
903 
904 	if (saddr) {
905 		memcpy(&fl6.saddr, saddr, sizeof(*saddr));
906 		flags |= RT6_LOOKUP_F_HAS_SADDR;
907 	}
908 
909 	dst = fib6_rule_lookup(net, &fl6, flags, ip6_pol_route_lookup);
910 	if (dst->error == 0)
911 		return (struct rt6_info *) dst;
912 
913 	dst_release(dst);
914 
915 	return NULL;
916 }
917 EXPORT_SYMBOL(rt6_lookup);
918 
919 /* ip6_ins_rt is called with FREE table->tb6_lock.
920    It takes new route entry, the addition fails by any reason the
921    route is freed. In any case, if caller does not hold it, it may
922    be destroyed.
923  */
924 
925 static int __ip6_ins_rt(struct rt6_info *rt, struct nl_info *info,
926 			struct mx6_config *mxc)
927 {
928 	int err;
929 	struct fib6_table *table;
930 
931 	table = rt->rt6i_table;
932 	write_lock_bh(&table->tb6_lock);
933 	err = fib6_add(&table->tb6_root, rt, info, mxc);
934 	write_unlock_bh(&table->tb6_lock);
935 
936 	return err;
937 }
938 
939 int ip6_ins_rt(struct rt6_info *rt)
940 {
941 	struct nl_info info = {	.nl_net = dev_net(rt->dst.dev), };
942 	struct mx6_config mxc = { .mx = NULL, };
943 
944 	return __ip6_ins_rt(rt, &info, &mxc);
945 }
946 
947 static struct rt6_info *ip6_rt_cache_alloc(struct rt6_info *ort,
948 					   const struct in6_addr *daddr,
949 					   const struct in6_addr *saddr)
950 {
951 	struct rt6_info *rt;
952 
953 	/*
954 	 *	Clone the route.
955 	 */
956 
957 	if (ort->rt6i_flags & (RTF_CACHE | RTF_PCPU))
958 		ort = (struct rt6_info *)ort->dst.from;
959 
960 	rt = __ip6_dst_alloc(dev_net(ort->dst.dev), ort->dst.dev, 0);
961 
962 	if (!rt)
963 		return NULL;
964 
965 	ip6_rt_copy_init(rt, ort);
966 	rt->rt6i_flags |= RTF_CACHE;
967 	rt->rt6i_metric = 0;
968 	rt->dst.flags |= DST_HOST;
969 	rt->rt6i_dst.addr = *daddr;
970 	rt->rt6i_dst.plen = 128;
971 
972 	if (!rt6_is_gw_or_nonexthop(ort)) {
973 		if (ort->rt6i_dst.plen != 128 &&
974 		    ipv6_addr_equal(&ort->rt6i_dst.addr, daddr))
975 			rt->rt6i_flags |= RTF_ANYCAST;
976 #ifdef CONFIG_IPV6_SUBTREES
977 		if (rt->rt6i_src.plen && saddr) {
978 			rt->rt6i_src.addr = *saddr;
979 			rt->rt6i_src.plen = 128;
980 		}
981 #endif
982 	}
983 
984 	return rt;
985 }
986 
987 static struct rt6_info *ip6_rt_pcpu_alloc(struct rt6_info *rt)
988 {
989 	struct rt6_info *pcpu_rt;
990 
991 	pcpu_rt = __ip6_dst_alloc(dev_net(rt->dst.dev),
992 				  rt->dst.dev, rt->dst.flags);
993 
994 	if (!pcpu_rt)
995 		return NULL;
996 	ip6_rt_copy_init(pcpu_rt, rt);
997 	pcpu_rt->rt6i_protocol = rt->rt6i_protocol;
998 	pcpu_rt->rt6i_flags |= RTF_PCPU;
999 	return pcpu_rt;
1000 }
1001 
1002 /* It should be called with read_lock_bh(&tb6_lock) acquired */
1003 static struct rt6_info *rt6_get_pcpu_route(struct rt6_info *rt)
1004 {
1005 	struct rt6_info *pcpu_rt, **p;
1006 
1007 	p = this_cpu_ptr(rt->rt6i_pcpu);
1008 	pcpu_rt = *p;
1009 
1010 	if (pcpu_rt) {
1011 		dst_hold(&pcpu_rt->dst);
1012 		rt6_dst_from_metrics_check(pcpu_rt);
1013 	}
1014 	return pcpu_rt;
1015 }
1016 
1017 static struct rt6_info *rt6_make_pcpu_route(struct rt6_info *rt)
1018 {
1019 	struct fib6_table *table = rt->rt6i_table;
1020 	struct rt6_info *pcpu_rt, *prev, **p;
1021 
1022 	pcpu_rt = ip6_rt_pcpu_alloc(rt);
1023 	if (!pcpu_rt) {
1024 		struct net *net = dev_net(rt->dst.dev);
1025 
1026 		dst_hold(&net->ipv6.ip6_null_entry->dst);
1027 		return net->ipv6.ip6_null_entry;
1028 	}
1029 
1030 	read_lock_bh(&table->tb6_lock);
1031 	if (rt->rt6i_pcpu) {
1032 		p = this_cpu_ptr(rt->rt6i_pcpu);
1033 		prev = cmpxchg(p, NULL, pcpu_rt);
1034 		if (prev) {
1035 			/* If someone did it before us, return prev instead */
1036 			dst_destroy(&pcpu_rt->dst);
1037 			pcpu_rt = prev;
1038 		}
1039 	} else {
1040 		/* rt has been removed from the fib6 tree
1041 		 * before we have a chance to acquire the read_lock.
1042 		 * In this case, don't brother to create a pcpu rt
1043 		 * since rt is going away anyway.  The next
1044 		 * dst_check() will trigger a re-lookup.
1045 		 */
1046 		dst_destroy(&pcpu_rt->dst);
1047 		pcpu_rt = rt;
1048 	}
1049 	dst_hold(&pcpu_rt->dst);
1050 	rt6_dst_from_metrics_check(pcpu_rt);
1051 	read_unlock_bh(&table->tb6_lock);
1052 	return pcpu_rt;
1053 }
1054 
1055 static struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table, int oif,
1056 				      struct flowi6 *fl6, int flags)
1057 {
1058 	struct fib6_node *fn, *saved_fn;
1059 	struct rt6_info *rt;
1060 	int strict = 0;
1061 
1062 	strict |= flags & RT6_LOOKUP_F_IFACE;
1063 	if (net->ipv6.devconf_all->forwarding == 0)
1064 		strict |= RT6_LOOKUP_F_REACHABLE;
1065 
1066 	read_lock_bh(&table->tb6_lock);
1067 
1068 	fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
1069 	saved_fn = fn;
1070 
1071 redo_rt6_select:
1072 	rt = rt6_select(fn, oif, strict);
1073 	if (rt->rt6i_nsiblings)
1074 		rt = rt6_multipath_select(rt, fl6, oif, strict);
1075 	if (rt == net->ipv6.ip6_null_entry) {
1076 		fn = fib6_backtrack(fn, &fl6->saddr);
1077 		if (fn)
1078 			goto redo_rt6_select;
1079 		else if (strict & RT6_LOOKUP_F_REACHABLE) {
1080 			/* also consider unreachable route */
1081 			strict &= ~RT6_LOOKUP_F_REACHABLE;
1082 			fn = saved_fn;
1083 			goto redo_rt6_select;
1084 		}
1085 	}
1086 
1087 
1088 	if (rt == net->ipv6.ip6_null_entry || (rt->rt6i_flags & RTF_CACHE)) {
1089 		dst_use(&rt->dst, jiffies);
1090 		read_unlock_bh(&table->tb6_lock);
1091 
1092 		rt6_dst_from_metrics_check(rt);
1093 		return rt;
1094 	} else if (unlikely((fl6->flowi6_flags & FLOWI_FLAG_KNOWN_NH) &&
1095 			    !(rt->rt6i_flags & RTF_GATEWAY))) {
1096 		/* Create a RTF_CACHE clone which will not be
1097 		 * owned by the fib6 tree.  It is for the special case where
1098 		 * the daddr in the skb during the neighbor look-up is different
1099 		 * from the fl6->daddr used to look-up route here.
1100 		 */
1101 
1102 		struct rt6_info *uncached_rt;
1103 
1104 		dst_use(&rt->dst, jiffies);
1105 		read_unlock_bh(&table->tb6_lock);
1106 
1107 		uncached_rt = ip6_rt_cache_alloc(rt, &fl6->daddr, NULL);
1108 		dst_release(&rt->dst);
1109 
1110 		if (uncached_rt)
1111 			rt6_uncached_list_add(uncached_rt);
1112 		else
1113 			uncached_rt = net->ipv6.ip6_null_entry;
1114 
1115 		dst_hold(&uncached_rt->dst);
1116 		return uncached_rt;
1117 
1118 	} else {
1119 		/* Get a percpu copy */
1120 
1121 		struct rt6_info *pcpu_rt;
1122 
1123 		rt->dst.lastuse = jiffies;
1124 		rt->dst.__use++;
1125 		pcpu_rt = rt6_get_pcpu_route(rt);
1126 
1127 		if (pcpu_rt) {
1128 			read_unlock_bh(&table->tb6_lock);
1129 		} else {
1130 			/* We have to do the read_unlock first
1131 			 * because rt6_make_pcpu_route() may trigger
1132 			 * ip6_dst_gc() which will take the write_lock.
1133 			 */
1134 			dst_hold(&rt->dst);
1135 			read_unlock_bh(&table->tb6_lock);
1136 			pcpu_rt = rt6_make_pcpu_route(rt);
1137 			dst_release(&rt->dst);
1138 		}
1139 
1140 		return pcpu_rt;
1141 
1142 	}
1143 }
1144 
1145 static struct rt6_info *ip6_pol_route_input(struct net *net, struct fib6_table *table,
1146 					    struct flowi6 *fl6, int flags)
1147 {
1148 	return ip6_pol_route(net, table, fl6->flowi6_iif, fl6, flags);
1149 }
1150 
1151 static struct dst_entry *ip6_route_input_lookup(struct net *net,
1152 						struct net_device *dev,
1153 						struct flowi6 *fl6, int flags)
1154 {
1155 	if (rt6_need_strict(&fl6->daddr) && dev->type != ARPHRD_PIMREG)
1156 		flags |= RT6_LOOKUP_F_IFACE;
1157 
1158 	return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_input);
1159 }
1160 
1161 void ip6_route_input(struct sk_buff *skb)
1162 {
1163 	const struct ipv6hdr *iph = ipv6_hdr(skb);
1164 	struct net *net = dev_net(skb->dev);
1165 	int flags = RT6_LOOKUP_F_HAS_SADDR;
1166 	struct ip_tunnel_info *tun_info;
1167 	struct flowi6 fl6 = {
1168 		.flowi6_iif = skb->dev->ifindex,
1169 		.daddr = iph->daddr,
1170 		.saddr = iph->saddr,
1171 		.flowlabel = ip6_flowinfo(iph),
1172 		.flowi6_mark = skb->mark,
1173 		.flowi6_proto = iph->nexthdr,
1174 	};
1175 
1176 	tun_info = skb_tunnel_info(skb);
1177 	if (tun_info && !(tun_info->mode & IP_TUNNEL_INFO_TX))
1178 		fl6.flowi6_tun_key.tun_id = tun_info->key.tun_id;
1179 	skb_dst_drop(skb);
1180 	skb_dst_set(skb, ip6_route_input_lookup(net, skb->dev, &fl6, flags));
1181 }
1182 
1183 static struct rt6_info *ip6_pol_route_output(struct net *net, struct fib6_table *table,
1184 					     struct flowi6 *fl6, int flags)
1185 {
1186 	return ip6_pol_route(net, table, fl6->flowi6_oif, fl6, flags);
1187 }
1188 
1189 struct dst_entry *ip6_route_output(struct net *net, const struct sock *sk,
1190 				    struct flowi6 *fl6)
1191 {
1192 	int flags = 0;
1193 
1194 	fl6->flowi6_iif = LOOPBACK_IFINDEX;
1195 
1196 	if ((sk && sk->sk_bound_dev_if) || rt6_need_strict(&fl6->daddr) ||
1197 	    fl6->flowi6_oif)
1198 		flags |= RT6_LOOKUP_F_IFACE;
1199 
1200 	if (!ipv6_addr_any(&fl6->saddr))
1201 		flags |= RT6_LOOKUP_F_HAS_SADDR;
1202 	else if (sk)
1203 		flags |= rt6_srcprefs2flags(inet6_sk(sk)->srcprefs);
1204 
1205 	return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_output);
1206 }
1207 EXPORT_SYMBOL(ip6_route_output);
1208 
1209 struct dst_entry *ip6_blackhole_route(struct net *net, struct dst_entry *dst_orig)
1210 {
1211 	struct rt6_info *rt, *ort = (struct rt6_info *) dst_orig;
1212 	struct dst_entry *new = NULL;
1213 
1214 	rt = dst_alloc(&ip6_dst_blackhole_ops, ort->dst.dev, 1, DST_OBSOLETE_NONE, 0);
1215 	if (rt) {
1216 		rt6_info_init(rt);
1217 
1218 		new = &rt->dst;
1219 		new->__use = 1;
1220 		new->input = dst_discard;
1221 		new->output = dst_discard_sk;
1222 
1223 		dst_copy_metrics(new, &ort->dst);
1224 		rt->rt6i_idev = ort->rt6i_idev;
1225 		if (rt->rt6i_idev)
1226 			in6_dev_hold(rt->rt6i_idev);
1227 
1228 		rt->rt6i_gateway = ort->rt6i_gateway;
1229 		rt->rt6i_flags = ort->rt6i_flags & ~RTF_PCPU;
1230 		rt->rt6i_metric = 0;
1231 
1232 		memcpy(&rt->rt6i_dst, &ort->rt6i_dst, sizeof(struct rt6key));
1233 #ifdef CONFIG_IPV6_SUBTREES
1234 		memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
1235 #endif
1236 
1237 		dst_free(new);
1238 	}
1239 
1240 	dst_release(dst_orig);
1241 	return new ? new : ERR_PTR(-ENOMEM);
1242 }
1243 
1244 /*
1245  *	Destination cache support functions
1246  */
1247 
1248 static void rt6_dst_from_metrics_check(struct rt6_info *rt)
1249 {
1250 	if (rt->dst.from &&
1251 	    dst_metrics_ptr(&rt->dst) != dst_metrics_ptr(rt->dst.from))
1252 		dst_init_metrics(&rt->dst, dst_metrics_ptr(rt->dst.from), true);
1253 }
1254 
1255 static struct dst_entry *rt6_check(struct rt6_info *rt, u32 cookie)
1256 {
1257 	if (!rt->rt6i_node || (rt->rt6i_node->fn_sernum != cookie))
1258 		return NULL;
1259 
1260 	if (rt6_check_expired(rt))
1261 		return NULL;
1262 
1263 	return &rt->dst;
1264 }
1265 
1266 static struct dst_entry *rt6_dst_from_check(struct rt6_info *rt, u32 cookie)
1267 {
1268 	if (rt->dst.obsolete == DST_OBSOLETE_FORCE_CHK &&
1269 	    rt6_check((struct rt6_info *)(rt->dst.from), cookie))
1270 		return &rt->dst;
1271 	else
1272 		return NULL;
1273 }
1274 
1275 static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie)
1276 {
1277 	struct rt6_info *rt;
1278 
1279 	rt = (struct rt6_info *) dst;
1280 
1281 	/* All IPV6 dsts are created with ->obsolete set to the value
1282 	 * DST_OBSOLETE_FORCE_CHK which forces validation calls down
1283 	 * into this function always.
1284 	 */
1285 
1286 	rt6_dst_from_metrics_check(rt);
1287 
1288 	if ((rt->rt6i_flags & RTF_PCPU) || unlikely(dst->flags & DST_NOCACHE))
1289 		return rt6_dst_from_check(rt, cookie);
1290 	else
1291 		return rt6_check(rt, cookie);
1292 }
1293 
1294 static struct dst_entry *ip6_negative_advice(struct dst_entry *dst)
1295 {
1296 	struct rt6_info *rt = (struct rt6_info *) dst;
1297 
1298 	if (rt) {
1299 		if (rt->rt6i_flags & RTF_CACHE) {
1300 			if (rt6_check_expired(rt)) {
1301 				ip6_del_rt(rt);
1302 				dst = NULL;
1303 			}
1304 		} else {
1305 			dst_release(dst);
1306 			dst = NULL;
1307 		}
1308 	}
1309 	return dst;
1310 }
1311 
1312 static void ip6_link_failure(struct sk_buff *skb)
1313 {
1314 	struct rt6_info *rt;
1315 
1316 	icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_ADDR_UNREACH, 0);
1317 
1318 	rt = (struct rt6_info *) skb_dst(skb);
1319 	if (rt) {
1320 		if (rt->rt6i_flags & RTF_CACHE) {
1321 			dst_hold(&rt->dst);
1322 			ip6_del_rt(rt);
1323 		} else if (rt->rt6i_node && (rt->rt6i_flags & RTF_DEFAULT)) {
1324 			rt->rt6i_node->fn_sernum = -1;
1325 		}
1326 	}
1327 }
1328 
1329 static void rt6_do_update_pmtu(struct rt6_info *rt, u32 mtu)
1330 {
1331 	struct net *net = dev_net(rt->dst.dev);
1332 
1333 	rt->rt6i_flags |= RTF_MODIFIED;
1334 	rt->rt6i_pmtu = mtu;
1335 	rt6_update_expires(rt, net->ipv6.sysctl.ip6_rt_mtu_expires);
1336 }
1337 
1338 static void __ip6_rt_update_pmtu(struct dst_entry *dst, const struct sock *sk,
1339 				 const struct ipv6hdr *iph, u32 mtu)
1340 {
1341 	struct rt6_info *rt6 = (struct rt6_info *)dst;
1342 
1343 	if (rt6->rt6i_flags & RTF_LOCAL)
1344 		return;
1345 
1346 	dst_confirm(dst);
1347 	mtu = max_t(u32, mtu, IPV6_MIN_MTU);
1348 	if (mtu >= dst_mtu(dst))
1349 		return;
1350 
1351 	if (rt6->rt6i_flags & RTF_CACHE) {
1352 		rt6_do_update_pmtu(rt6, mtu);
1353 	} else {
1354 		const struct in6_addr *daddr, *saddr;
1355 		struct rt6_info *nrt6;
1356 
1357 		if (iph) {
1358 			daddr = &iph->daddr;
1359 			saddr = &iph->saddr;
1360 		} else if (sk) {
1361 			daddr = &sk->sk_v6_daddr;
1362 			saddr = &inet6_sk(sk)->saddr;
1363 		} else {
1364 			return;
1365 		}
1366 		nrt6 = ip6_rt_cache_alloc(rt6, daddr, saddr);
1367 		if (nrt6) {
1368 			rt6_do_update_pmtu(nrt6, mtu);
1369 
1370 			/* ip6_ins_rt(nrt6) will bump the
1371 			 * rt6->rt6i_node->fn_sernum
1372 			 * which will fail the next rt6_check() and
1373 			 * invalidate the sk->sk_dst_cache.
1374 			 */
1375 			ip6_ins_rt(nrt6);
1376 		}
1377 	}
1378 }
1379 
1380 static void ip6_rt_update_pmtu(struct dst_entry *dst, struct sock *sk,
1381 			       struct sk_buff *skb, u32 mtu)
1382 {
1383 	__ip6_rt_update_pmtu(dst, sk, skb ? ipv6_hdr(skb) : NULL, mtu);
1384 }
1385 
1386 void ip6_update_pmtu(struct sk_buff *skb, struct net *net, __be32 mtu,
1387 		     int oif, u32 mark)
1388 {
1389 	const struct ipv6hdr *iph = (struct ipv6hdr *) skb->data;
1390 	struct dst_entry *dst;
1391 	struct flowi6 fl6;
1392 
1393 	memset(&fl6, 0, sizeof(fl6));
1394 	fl6.flowi6_oif = oif;
1395 	fl6.flowi6_mark = mark ? mark : IP6_REPLY_MARK(net, skb->mark);
1396 	fl6.daddr = iph->daddr;
1397 	fl6.saddr = iph->saddr;
1398 	fl6.flowlabel = ip6_flowinfo(iph);
1399 
1400 	dst = ip6_route_output(net, NULL, &fl6);
1401 	if (!dst->error)
1402 		__ip6_rt_update_pmtu(dst, NULL, iph, ntohl(mtu));
1403 	dst_release(dst);
1404 }
1405 EXPORT_SYMBOL_GPL(ip6_update_pmtu);
1406 
1407 void ip6_sk_update_pmtu(struct sk_buff *skb, struct sock *sk, __be32 mtu)
1408 {
1409 	ip6_update_pmtu(skb, sock_net(sk), mtu,
1410 			sk->sk_bound_dev_if, sk->sk_mark);
1411 }
1412 EXPORT_SYMBOL_GPL(ip6_sk_update_pmtu);
1413 
1414 /* Handle redirects */
1415 struct ip6rd_flowi {
1416 	struct flowi6 fl6;
1417 	struct in6_addr gateway;
1418 };
1419 
1420 static struct rt6_info *__ip6_route_redirect(struct net *net,
1421 					     struct fib6_table *table,
1422 					     struct flowi6 *fl6,
1423 					     int flags)
1424 {
1425 	struct ip6rd_flowi *rdfl = (struct ip6rd_flowi *)fl6;
1426 	struct rt6_info *rt;
1427 	struct fib6_node *fn;
1428 
1429 	/* Get the "current" route for this destination and
1430 	 * check if the redirect has come from approriate router.
1431 	 *
1432 	 * RFC 4861 specifies that redirects should only be
1433 	 * accepted if they come from the nexthop to the target.
1434 	 * Due to the way the routes are chosen, this notion
1435 	 * is a bit fuzzy and one might need to check all possible
1436 	 * routes.
1437 	 */
1438 
1439 	read_lock_bh(&table->tb6_lock);
1440 	fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
1441 restart:
1442 	for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
1443 		if (rt6_check_expired(rt))
1444 			continue;
1445 		if (rt->dst.error)
1446 			break;
1447 		if (!(rt->rt6i_flags & RTF_GATEWAY))
1448 			continue;
1449 		if (fl6->flowi6_oif != rt->dst.dev->ifindex)
1450 			continue;
1451 		if (!ipv6_addr_equal(&rdfl->gateway, &rt->rt6i_gateway))
1452 			continue;
1453 		break;
1454 	}
1455 
1456 	if (!rt)
1457 		rt = net->ipv6.ip6_null_entry;
1458 	else if (rt->dst.error) {
1459 		rt = net->ipv6.ip6_null_entry;
1460 		goto out;
1461 	}
1462 
1463 	if (rt == net->ipv6.ip6_null_entry) {
1464 		fn = fib6_backtrack(fn, &fl6->saddr);
1465 		if (fn)
1466 			goto restart;
1467 	}
1468 
1469 out:
1470 	dst_hold(&rt->dst);
1471 
1472 	read_unlock_bh(&table->tb6_lock);
1473 
1474 	return rt;
1475 };
1476 
1477 static struct dst_entry *ip6_route_redirect(struct net *net,
1478 					const struct flowi6 *fl6,
1479 					const struct in6_addr *gateway)
1480 {
1481 	int flags = RT6_LOOKUP_F_HAS_SADDR;
1482 	struct ip6rd_flowi rdfl;
1483 
1484 	rdfl.fl6 = *fl6;
1485 	rdfl.gateway = *gateway;
1486 
1487 	return fib6_rule_lookup(net, &rdfl.fl6,
1488 				flags, __ip6_route_redirect);
1489 }
1490 
1491 void ip6_redirect(struct sk_buff *skb, struct net *net, int oif, u32 mark)
1492 {
1493 	const struct ipv6hdr *iph = (struct ipv6hdr *) skb->data;
1494 	struct dst_entry *dst;
1495 	struct flowi6 fl6;
1496 
1497 	memset(&fl6, 0, sizeof(fl6));
1498 	fl6.flowi6_iif = LOOPBACK_IFINDEX;
1499 	fl6.flowi6_oif = oif;
1500 	fl6.flowi6_mark = mark;
1501 	fl6.daddr = iph->daddr;
1502 	fl6.saddr = iph->saddr;
1503 	fl6.flowlabel = ip6_flowinfo(iph);
1504 
1505 	dst = ip6_route_redirect(net, &fl6, &ipv6_hdr(skb)->saddr);
1506 	rt6_do_redirect(dst, NULL, skb);
1507 	dst_release(dst);
1508 }
1509 EXPORT_SYMBOL_GPL(ip6_redirect);
1510 
1511 void ip6_redirect_no_header(struct sk_buff *skb, struct net *net, int oif,
1512 			    u32 mark)
1513 {
1514 	const struct ipv6hdr *iph = ipv6_hdr(skb);
1515 	const struct rd_msg *msg = (struct rd_msg *)icmp6_hdr(skb);
1516 	struct dst_entry *dst;
1517 	struct flowi6 fl6;
1518 
1519 	memset(&fl6, 0, sizeof(fl6));
1520 	fl6.flowi6_iif = LOOPBACK_IFINDEX;
1521 	fl6.flowi6_oif = oif;
1522 	fl6.flowi6_mark = mark;
1523 	fl6.daddr = msg->dest;
1524 	fl6.saddr = iph->daddr;
1525 
1526 	dst = ip6_route_redirect(net, &fl6, &iph->saddr);
1527 	rt6_do_redirect(dst, NULL, skb);
1528 	dst_release(dst);
1529 }
1530 
1531 void ip6_sk_redirect(struct sk_buff *skb, struct sock *sk)
1532 {
1533 	ip6_redirect(skb, sock_net(sk), sk->sk_bound_dev_if, sk->sk_mark);
1534 }
1535 EXPORT_SYMBOL_GPL(ip6_sk_redirect);
1536 
1537 static unsigned int ip6_default_advmss(const struct dst_entry *dst)
1538 {
1539 	struct net_device *dev = dst->dev;
1540 	unsigned int mtu = dst_mtu(dst);
1541 	struct net *net = dev_net(dev);
1542 
1543 	mtu -= sizeof(struct ipv6hdr) + sizeof(struct tcphdr);
1544 
1545 	if (mtu < net->ipv6.sysctl.ip6_rt_min_advmss)
1546 		mtu = net->ipv6.sysctl.ip6_rt_min_advmss;
1547 
1548 	/*
1549 	 * Maximal non-jumbo IPv6 payload is IPV6_MAXPLEN and
1550 	 * corresponding MSS is IPV6_MAXPLEN - tcp_header_size.
1551 	 * IPV6_MAXPLEN is also valid and means: "any MSS,
1552 	 * rely only on pmtu discovery"
1553 	 */
1554 	if (mtu > IPV6_MAXPLEN - sizeof(struct tcphdr))
1555 		mtu = IPV6_MAXPLEN;
1556 	return mtu;
1557 }
1558 
1559 static unsigned int ip6_mtu(const struct dst_entry *dst)
1560 {
1561 	const struct rt6_info *rt = (const struct rt6_info *)dst;
1562 	unsigned int mtu = rt->rt6i_pmtu;
1563 	struct inet6_dev *idev;
1564 
1565 	if (mtu)
1566 		goto out;
1567 
1568 	mtu = dst_metric_raw(dst, RTAX_MTU);
1569 	if (mtu)
1570 		goto out;
1571 
1572 	mtu = IPV6_MIN_MTU;
1573 
1574 	rcu_read_lock();
1575 	idev = __in6_dev_get(dst->dev);
1576 	if (idev)
1577 		mtu = idev->cnf.mtu6;
1578 	rcu_read_unlock();
1579 
1580 out:
1581 	return min_t(unsigned int, mtu, IP6_MAX_MTU);
1582 }
1583 
1584 static struct dst_entry *icmp6_dst_gc_list;
1585 static DEFINE_SPINLOCK(icmp6_dst_lock);
1586 
1587 struct dst_entry *icmp6_dst_alloc(struct net_device *dev,
1588 				  struct flowi6 *fl6)
1589 {
1590 	struct dst_entry *dst;
1591 	struct rt6_info *rt;
1592 	struct inet6_dev *idev = in6_dev_get(dev);
1593 	struct net *net = dev_net(dev);
1594 
1595 	if (unlikely(!idev))
1596 		return ERR_PTR(-ENODEV);
1597 
1598 	rt = ip6_dst_alloc(net, dev, 0);
1599 	if (unlikely(!rt)) {
1600 		in6_dev_put(idev);
1601 		dst = ERR_PTR(-ENOMEM);
1602 		goto out;
1603 	}
1604 
1605 	rt->dst.flags |= DST_HOST;
1606 	rt->dst.output  = ip6_output;
1607 	atomic_set(&rt->dst.__refcnt, 1);
1608 	rt->rt6i_gateway  = fl6->daddr;
1609 	rt->rt6i_dst.addr = fl6->daddr;
1610 	rt->rt6i_dst.plen = 128;
1611 	rt->rt6i_idev     = idev;
1612 	dst_metric_set(&rt->dst, RTAX_HOPLIMIT, 0);
1613 
1614 	spin_lock_bh(&icmp6_dst_lock);
1615 	rt->dst.next = icmp6_dst_gc_list;
1616 	icmp6_dst_gc_list = &rt->dst;
1617 	spin_unlock_bh(&icmp6_dst_lock);
1618 
1619 	fib6_force_start_gc(net);
1620 
1621 	dst = xfrm_lookup(net, &rt->dst, flowi6_to_flowi(fl6), NULL, 0);
1622 
1623 out:
1624 	return dst;
1625 }
1626 
1627 int icmp6_dst_gc(void)
1628 {
1629 	struct dst_entry *dst, **pprev;
1630 	int more = 0;
1631 
1632 	spin_lock_bh(&icmp6_dst_lock);
1633 	pprev = &icmp6_dst_gc_list;
1634 
1635 	while ((dst = *pprev) != NULL) {
1636 		if (!atomic_read(&dst->__refcnt)) {
1637 			*pprev = dst->next;
1638 			dst_free(dst);
1639 		} else {
1640 			pprev = &dst->next;
1641 			++more;
1642 		}
1643 	}
1644 
1645 	spin_unlock_bh(&icmp6_dst_lock);
1646 
1647 	return more;
1648 }
1649 
1650 static void icmp6_clean_all(int (*func)(struct rt6_info *rt, void *arg),
1651 			    void *arg)
1652 {
1653 	struct dst_entry *dst, **pprev;
1654 
1655 	spin_lock_bh(&icmp6_dst_lock);
1656 	pprev = &icmp6_dst_gc_list;
1657 	while ((dst = *pprev) != NULL) {
1658 		struct rt6_info *rt = (struct rt6_info *) dst;
1659 		if (func(rt, arg)) {
1660 			*pprev = dst->next;
1661 			dst_free(dst);
1662 		} else {
1663 			pprev = &dst->next;
1664 		}
1665 	}
1666 	spin_unlock_bh(&icmp6_dst_lock);
1667 }
1668 
1669 static int ip6_dst_gc(struct dst_ops *ops)
1670 {
1671 	struct net *net = container_of(ops, struct net, ipv6.ip6_dst_ops);
1672 	int rt_min_interval = net->ipv6.sysctl.ip6_rt_gc_min_interval;
1673 	int rt_max_size = net->ipv6.sysctl.ip6_rt_max_size;
1674 	int rt_elasticity = net->ipv6.sysctl.ip6_rt_gc_elasticity;
1675 	int rt_gc_timeout = net->ipv6.sysctl.ip6_rt_gc_timeout;
1676 	unsigned long rt_last_gc = net->ipv6.ip6_rt_last_gc;
1677 	int entries;
1678 
1679 	entries = dst_entries_get_fast(ops);
1680 	if (time_after(rt_last_gc + rt_min_interval, jiffies) &&
1681 	    entries <= rt_max_size)
1682 		goto out;
1683 
1684 	net->ipv6.ip6_rt_gc_expire++;
1685 	fib6_run_gc(net->ipv6.ip6_rt_gc_expire, net, true);
1686 	entries = dst_entries_get_slow(ops);
1687 	if (entries < ops->gc_thresh)
1688 		net->ipv6.ip6_rt_gc_expire = rt_gc_timeout>>1;
1689 out:
1690 	net->ipv6.ip6_rt_gc_expire -= net->ipv6.ip6_rt_gc_expire>>rt_elasticity;
1691 	return entries > rt_max_size;
1692 }
1693 
1694 static int ip6_convert_metrics(struct mx6_config *mxc,
1695 			       const struct fib6_config *cfg)
1696 {
1697 	bool ecn_ca = false;
1698 	struct nlattr *nla;
1699 	int remaining;
1700 	u32 *mp;
1701 
1702 	if (!cfg->fc_mx)
1703 		return 0;
1704 
1705 	mp = kzalloc(sizeof(u32) * RTAX_MAX, GFP_KERNEL);
1706 	if (unlikely(!mp))
1707 		return -ENOMEM;
1708 
1709 	nla_for_each_attr(nla, cfg->fc_mx, cfg->fc_mx_len, remaining) {
1710 		int type = nla_type(nla);
1711 		u32 val;
1712 
1713 		if (!type)
1714 			continue;
1715 		if (unlikely(type > RTAX_MAX))
1716 			goto err;
1717 
1718 		if (type == RTAX_CC_ALGO) {
1719 			char tmp[TCP_CA_NAME_MAX];
1720 
1721 			nla_strlcpy(tmp, nla, sizeof(tmp));
1722 			val = tcp_ca_get_key_by_name(tmp, &ecn_ca);
1723 			if (val == TCP_CA_UNSPEC)
1724 				goto err;
1725 		} else {
1726 			val = nla_get_u32(nla);
1727 		}
1728 		if (type == RTAX_FEATURES && (val & ~RTAX_FEATURE_MASK))
1729 			goto err;
1730 
1731 		mp[type - 1] = val;
1732 		__set_bit(type - 1, mxc->mx_valid);
1733 	}
1734 
1735 	if (ecn_ca) {
1736 		__set_bit(RTAX_FEATURES - 1, mxc->mx_valid);
1737 		mp[RTAX_FEATURES - 1] |= DST_FEATURE_ECN_CA;
1738 	}
1739 
1740 	mxc->mx = mp;
1741 	return 0;
1742  err:
1743 	kfree(mp);
1744 	return -EINVAL;
1745 }
1746 
1747 int ip6_route_info_create(struct fib6_config *cfg, struct rt6_info **rt_ret)
1748 {
1749 	int err;
1750 	struct net *net = cfg->fc_nlinfo.nl_net;
1751 	struct rt6_info *rt = NULL;
1752 	struct net_device *dev = NULL;
1753 	struct inet6_dev *idev = NULL;
1754 	struct fib6_table *table;
1755 	int addr_type;
1756 
1757 	if (cfg->fc_dst_len > 128 || cfg->fc_src_len > 128)
1758 		return -EINVAL;
1759 #ifndef CONFIG_IPV6_SUBTREES
1760 	if (cfg->fc_src_len)
1761 		return -EINVAL;
1762 #endif
1763 	if (cfg->fc_ifindex) {
1764 		err = -ENODEV;
1765 		dev = dev_get_by_index(net, cfg->fc_ifindex);
1766 		if (!dev)
1767 			goto out;
1768 		idev = in6_dev_get(dev);
1769 		if (!idev)
1770 			goto out;
1771 	}
1772 
1773 	if (cfg->fc_metric == 0)
1774 		cfg->fc_metric = IP6_RT_PRIO_USER;
1775 
1776 	err = -ENOBUFS;
1777 	if (cfg->fc_nlinfo.nlh &&
1778 	    !(cfg->fc_nlinfo.nlh->nlmsg_flags & NLM_F_CREATE)) {
1779 		table = fib6_get_table(net, cfg->fc_table);
1780 		if (!table) {
1781 			pr_warn("NLM_F_CREATE should be specified when creating new route\n");
1782 			table = fib6_new_table(net, cfg->fc_table);
1783 		}
1784 	} else {
1785 		table = fib6_new_table(net, cfg->fc_table);
1786 	}
1787 
1788 	if (!table)
1789 		goto out;
1790 
1791 	rt = ip6_dst_alloc(net, NULL,
1792 			   (cfg->fc_flags & RTF_ADDRCONF) ? 0 : DST_NOCOUNT);
1793 
1794 	if (!rt) {
1795 		err = -ENOMEM;
1796 		goto out;
1797 	}
1798 
1799 	if (cfg->fc_flags & RTF_EXPIRES)
1800 		rt6_set_expires(rt, jiffies +
1801 				clock_t_to_jiffies(cfg->fc_expires));
1802 	else
1803 		rt6_clean_expires(rt);
1804 
1805 	if (cfg->fc_protocol == RTPROT_UNSPEC)
1806 		cfg->fc_protocol = RTPROT_BOOT;
1807 	rt->rt6i_protocol = cfg->fc_protocol;
1808 
1809 	addr_type = ipv6_addr_type(&cfg->fc_dst);
1810 
1811 	if (addr_type & IPV6_ADDR_MULTICAST)
1812 		rt->dst.input = ip6_mc_input;
1813 	else if (cfg->fc_flags & RTF_LOCAL)
1814 		rt->dst.input = ip6_input;
1815 	else
1816 		rt->dst.input = ip6_forward;
1817 
1818 	rt->dst.output = ip6_output;
1819 
1820 	if (cfg->fc_encap) {
1821 		struct lwtunnel_state *lwtstate;
1822 
1823 		err = lwtunnel_build_state(dev, cfg->fc_encap_type,
1824 					   cfg->fc_encap, AF_INET6, cfg,
1825 					   &lwtstate);
1826 		if (err)
1827 			goto out;
1828 		rt->dst.lwtstate = lwtstate_get(lwtstate);
1829 		if (lwtunnel_output_redirect(rt->dst.lwtstate)) {
1830 			rt->dst.lwtstate->orig_output = rt->dst.output;
1831 			rt->dst.output = lwtunnel_output;
1832 		}
1833 		if (lwtunnel_input_redirect(rt->dst.lwtstate)) {
1834 			rt->dst.lwtstate->orig_input = rt->dst.input;
1835 			rt->dst.input = lwtunnel_input;
1836 		}
1837 	}
1838 
1839 	ipv6_addr_prefix(&rt->rt6i_dst.addr, &cfg->fc_dst, cfg->fc_dst_len);
1840 	rt->rt6i_dst.plen = cfg->fc_dst_len;
1841 	if (rt->rt6i_dst.plen == 128)
1842 		rt->dst.flags |= DST_HOST;
1843 
1844 #ifdef CONFIG_IPV6_SUBTREES
1845 	ipv6_addr_prefix(&rt->rt6i_src.addr, &cfg->fc_src, cfg->fc_src_len);
1846 	rt->rt6i_src.plen = cfg->fc_src_len;
1847 #endif
1848 
1849 	rt->rt6i_metric = cfg->fc_metric;
1850 
1851 	/* We cannot add true routes via loopback here,
1852 	   they would result in kernel looping; promote them to reject routes
1853 	 */
1854 	if ((cfg->fc_flags & RTF_REJECT) ||
1855 	    (dev && (dev->flags & IFF_LOOPBACK) &&
1856 	     !(addr_type & IPV6_ADDR_LOOPBACK) &&
1857 	     !(cfg->fc_flags & RTF_LOCAL))) {
1858 		/* hold loopback dev/idev if we haven't done so. */
1859 		if (dev != net->loopback_dev) {
1860 			if (dev) {
1861 				dev_put(dev);
1862 				in6_dev_put(idev);
1863 			}
1864 			dev = net->loopback_dev;
1865 			dev_hold(dev);
1866 			idev = in6_dev_get(dev);
1867 			if (!idev) {
1868 				err = -ENODEV;
1869 				goto out;
1870 			}
1871 		}
1872 		rt->rt6i_flags = RTF_REJECT|RTF_NONEXTHOP;
1873 		switch (cfg->fc_type) {
1874 		case RTN_BLACKHOLE:
1875 			rt->dst.error = -EINVAL;
1876 			rt->dst.output = dst_discard_sk;
1877 			rt->dst.input = dst_discard;
1878 			break;
1879 		case RTN_PROHIBIT:
1880 			rt->dst.error = -EACCES;
1881 			rt->dst.output = ip6_pkt_prohibit_out;
1882 			rt->dst.input = ip6_pkt_prohibit;
1883 			break;
1884 		case RTN_THROW:
1885 		case RTN_UNREACHABLE:
1886 		default:
1887 			rt->dst.error = (cfg->fc_type == RTN_THROW) ? -EAGAIN
1888 					: (cfg->fc_type == RTN_UNREACHABLE)
1889 					? -EHOSTUNREACH : -ENETUNREACH;
1890 			rt->dst.output = ip6_pkt_discard_out;
1891 			rt->dst.input = ip6_pkt_discard;
1892 			break;
1893 		}
1894 		goto install_route;
1895 	}
1896 
1897 	if (cfg->fc_flags & RTF_GATEWAY) {
1898 		const struct in6_addr *gw_addr;
1899 		int gwa_type;
1900 
1901 		gw_addr = &cfg->fc_gateway;
1902 		gwa_type = ipv6_addr_type(gw_addr);
1903 
1904 		/* if gw_addr is local we will fail to detect this in case
1905 		 * address is still TENTATIVE (DAD in progress). rt6_lookup()
1906 		 * will return already-added prefix route via interface that
1907 		 * prefix route was assigned to, which might be non-loopback.
1908 		 */
1909 		err = -EINVAL;
1910 		if (ipv6_chk_addr_and_flags(net, gw_addr,
1911 					    gwa_type & IPV6_ADDR_LINKLOCAL ?
1912 					    dev : NULL, 0, 0))
1913 			goto out;
1914 
1915 		rt->rt6i_gateway = *gw_addr;
1916 
1917 		if (gwa_type != (IPV6_ADDR_LINKLOCAL|IPV6_ADDR_UNICAST)) {
1918 			struct rt6_info *grt;
1919 
1920 			/* IPv6 strictly inhibits using not link-local
1921 			   addresses as nexthop address.
1922 			   Otherwise, router will not able to send redirects.
1923 			   It is very good, but in some (rare!) circumstances
1924 			   (SIT, PtP, NBMA NOARP links) it is handy to allow
1925 			   some exceptions. --ANK
1926 			 */
1927 			if (!(gwa_type & IPV6_ADDR_UNICAST))
1928 				goto out;
1929 
1930 			grt = rt6_lookup(net, gw_addr, NULL, cfg->fc_ifindex, 1);
1931 
1932 			err = -EHOSTUNREACH;
1933 			if (!grt)
1934 				goto out;
1935 			if (dev) {
1936 				if (dev != grt->dst.dev) {
1937 					ip6_rt_put(grt);
1938 					goto out;
1939 				}
1940 			} else {
1941 				dev = grt->dst.dev;
1942 				idev = grt->rt6i_idev;
1943 				dev_hold(dev);
1944 				in6_dev_hold(grt->rt6i_idev);
1945 			}
1946 			if (!(grt->rt6i_flags & RTF_GATEWAY))
1947 				err = 0;
1948 			ip6_rt_put(grt);
1949 
1950 			if (err)
1951 				goto out;
1952 		}
1953 		err = -EINVAL;
1954 		if (!dev || (dev->flags & IFF_LOOPBACK))
1955 			goto out;
1956 	}
1957 
1958 	err = -ENODEV;
1959 	if (!dev)
1960 		goto out;
1961 
1962 	if (!ipv6_addr_any(&cfg->fc_prefsrc)) {
1963 		if (!ipv6_chk_addr(net, &cfg->fc_prefsrc, dev, 0)) {
1964 			err = -EINVAL;
1965 			goto out;
1966 		}
1967 		rt->rt6i_prefsrc.addr = cfg->fc_prefsrc;
1968 		rt->rt6i_prefsrc.plen = 128;
1969 	} else
1970 		rt->rt6i_prefsrc.plen = 0;
1971 
1972 	rt->rt6i_flags = cfg->fc_flags;
1973 
1974 install_route:
1975 	rt->dst.dev = dev;
1976 	rt->rt6i_idev = idev;
1977 	rt->rt6i_table = table;
1978 
1979 	cfg->fc_nlinfo.nl_net = dev_net(dev);
1980 
1981 	*rt_ret = rt;
1982 
1983 	return 0;
1984 out:
1985 	if (dev)
1986 		dev_put(dev);
1987 	if (idev)
1988 		in6_dev_put(idev);
1989 	if (rt)
1990 		dst_free(&rt->dst);
1991 
1992 	*rt_ret = NULL;
1993 
1994 	return err;
1995 }
1996 
1997 int ip6_route_add(struct fib6_config *cfg)
1998 {
1999 	struct mx6_config mxc = { .mx = NULL, };
2000 	struct rt6_info *rt = NULL;
2001 	int err;
2002 
2003 	err = ip6_route_info_create(cfg, &rt);
2004 	if (err)
2005 		goto out;
2006 
2007 	err = ip6_convert_metrics(&mxc, cfg);
2008 	if (err)
2009 		goto out;
2010 
2011 	err = __ip6_ins_rt(rt, &cfg->fc_nlinfo, &mxc);
2012 
2013 	kfree(mxc.mx);
2014 
2015 	return err;
2016 out:
2017 	if (rt)
2018 		dst_free(&rt->dst);
2019 
2020 	return err;
2021 }
2022 
2023 static int __ip6_del_rt(struct rt6_info *rt, struct nl_info *info)
2024 {
2025 	int err;
2026 	struct fib6_table *table;
2027 	struct net *net = dev_net(rt->dst.dev);
2028 
2029 	if (rt == net->ipv6.ip6_null_entry ||
2030 	    rt->dst.flags & DST_NOCACHE) {
2031 		err = -ENOENT;
2032 		goto out;
2033 	}
2034 
2035 	table = rt->rt6i_table;
2036 	write_lock_bh(&table->tb6_lock);
2037 	err = fib6_del(rt, info);
2038 	write_unlock_bh(&table->tb6_lock);
2039 
2040 out:
2041 	ip6_rt_put(rt);
2042 	return err;
2043 }
2044 
2045 int ip6_del_rt(struct rt6_info *rt)
2046 {
2047 	struct nl_info info = {
2048 		.nl_net = dev_net(rt->dst.dev),
2049 	};
2050 	return __ip6_del_rt(rt, &info);
2051 }
2052 
2053 static int ip6_route_del(struct fib6_config *cfg)
2054 {
2055 	struct fib6_table *table;
2056 	struct fib6_node *fn;
2057 	struct rt6_info *rt;
2058 	int err = -ESRCH;
2059 
2060 	table = fib6_get_table(cfg->fc_nlinfo.nl_net, cfg->fc_table);
2061 	if (!table)
2062 		return err;
2063 
2064 	read_lock_bh(&table->tb6_lock);
2065 
2066 	fn = fib6_locate(&table->tb6_root,
2067 			 &cfg->fc_dst, cfg->fc_dst_len,
2068 			 &cfg->fc_src, cfg->fc_src_len);
2069 
2070 	if (fn) {
2071 		for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
2072 			if ((rt->rt6i_flags & RTF_CACHE) &&
2073 			    !(cfg->fc_flags & RTF_CACHE))
2074 				continue;
2075 			if (cfg->fc_ifindex &&
2076 			    (!rt->dst.dev ||
2077 			     rt->dst.dev->ifindex != cfg->fc_ifindex))
2078 				continue;
2079 			if (cfg->fc_flags & RTF_GATEWAY &&
2080 			    !ipv6_addr_equal(&cfg->fc_gateway, &rt->rt6i_gateway))
2081 				continue;
2082 			if (cfg->fc_metric && cfg->fc_metric != rt->rt6i_metric)
2083 				continue;
2084 			dst_hold(&rt->dst);
2085 			read_unlock_bh(&table->tb6_lock);
2086 
2087 			return __ip6_del_rt(rt, &cfg->fc_nlinfo);
2088 		}
2089 	}
2090 	read_unlock_bh(&table->tb6_lock);
2091 
2092 	return err;
2093 }
2094 
2095 static void rt6_do_redirect(struct dst_entry *dst, struct sock *sk, struct sk_buff *skb)
2096 {
2097 	struct net *net = dev_net(skb->dev);
2098 	struct netevent_redirect netevent;
2099 	struct rt6_info *rt, *nrt = NULL;
2100 	struct ndisc_options ndopts;
2101 	struct inet6_dev *in6_dev;
2102 	struct neighbour *neigh;
2103 	struct rd_msg *msg;
2104 	int optlen, on_link;
2105 	u8 *lladdr;
2106 
2107 	optlen = skb_tail_pointer(skb) - skb_transport_header(skb);
2108 	optlen -= sizeof(*msg);
2109 
2110 	if (optlen < 0) {
2111 		net_dbg_ratelimited("rt6_do_redirect: packet too short\n");
2112 		return;
2113 	}
2114 
2115 	msg = (struct rd_msg *)icmp6_hdr(skb);
2116 
2117 	if (ipv6_addr_is_multicast(&msg->dest)) {
2118 		net_dbg_ratelimited("rt6_do_redirect: destination address is multicast\n");
2119 		return;
2120 	}
2121 
2122 	on_link = 0;
2123 	if (ipv6_addr_equal(&msg->dest, &msg->target)) {
2124 		on_link = 1;
2125 	} else if (ipv6_addr_type(&msg->target) !=
2126 		   (IPV6_ADDR_UNICAST|IPV6_ADDR_LINKLOCAL)) {
2127 		net_dbg_ratelimited("rt6_do_redirect: target address is not link-local unicast\n");
2128 		return;
2129 	}
2130 
2131 	in6_dev = __in6_dev_get(skb->dev);
2132 	if (!in6_dev)
2133 		return;
2134 	if (in6_dev->cnf.forwarding || !in6_dev->cnf.accept_redirects)
2135 		return;
2136 
2137 	/* RFC2461 8.1:
2138 	 *	The IP source address of the Redirect MUST be the same as the current
2139 	 *	first-hop router for the specified ICMP Destination Address.
2140 	 */
2141 
2142 	if (!ndisc_parse_options(msg->opt, optlen, &ndopts)) {
2143 		net_dbg_ratelimited("rt6_redirect: invalid ND options\n");
2144 		return;
2145 	}
2146 
2147 	lladdr = NULL;
2148 	if (ndopts.nd_opts_tgt_lladdr) {
2149 		lladdr = ndisc_opt_addr_data(ndopts.nd_opts_tgt_lladdr,
2150 					     skb->dev);
2151 		if (!lladdr) {
2152 			net_dbg_ratelimited("rt6_redirect: invalid link-layer address length\n");
2153 			return;
2154 		}
2155 	}
2156 
2157 	rt = (struct rt6_info *) dst;
2158 	if (rt == net->ipv6.ip6_null_entry) {
2159 		net_dbg_ratelimited("rt6_redirect: source isn't a valid nexthop for redirect target\n");
2160 		return;
2161 	}
2162 
2163 	/* Redirect received -> path was valid.
2164 	 * Look, redirects are sent only in response to data packets,
2165 	 * so that this nexthop apparently is reachable. --ANK
2166 	 */
2167 	dst_confirm(&rt->dst);
2168 
2169 	neigh = __neigh_lookup(&nd_tbl, &msg->target, skb->dev, 1);
2170 	if (!neigh)
2171 		return;
2172 
2173 	/*
2174 	 *	We have finally decided to accept it.
2175 	 */
2176 
2177 	neigh_update(neigh, lladdr, NUD_STALE,
2178 		     NEIGH_UPDATE_F_WEAK_OVERRIDE|
2179 		     NEIGH_UPDATE_F_OVERRIDE|
2180 		     (on_link ? 0 : (NEIGH_UPDATE_F_OVERRIDE_ISROUTER|
2181 				     NEIGH_UPDATE_F_ISROUTER))
2182 		     );
2183 
2184 	nrt = ip6_rt_cache_alloc(rt, &msg->dest, NULL);
2185 	if (!nrt)
2186 		goto out;
2187 
2188 	nrt->rt6i_flags = RTF_GATEWAY|RTF_UP|RTF_DYNAMIC|RTF_CACHE;
2189 	if (on_link)
2190 		nrt->rt6i_flags &= ~RTF_GATEWAY;
2191 
2192 	nrt->rt6i_gateway = *(struct in6_addr *)neigh->primary_key;
2193 
2194 	if (ip6_ins_rt(nrt))
2195 		goto out;
2196 
2197 	netevent.old = &rt->dst;
2198 	netevent.new = &nrt->dst;
2199 	netevent.daddr = &msg->dest;
2200 	netevent.neigh = neigh;
2201 	call_netevent_notifiers(NETEVENT_REDIRECT, &netevent);
2202 
2203 	if (rt->rt6i_flags & RTF_CACHE) {
2204 		rt = (struct rt6_info *) dst_clone(&rt->dst);
2205 		ip6_del_rt(rt);
2206 	}
2207 
2208 out:
2209 	neigh_release(neigh);
2210 }
2211 
2212 /*
2213  *	Misc support functions
2214  */
2215 
2216 static void rt6_set_from(struct rt6_info *rt, struct rt6_info *from)
2217 {
2218 	BUG_ON(from->dst.from);
2219 
2220 	rt->rt6i_flags &= ~RTF_EXPIRES;
2221 	dst_hold(&from->dst);
2222 	rt->dst.from = &from->dst;
2223 	dst_init_metrics(&rt->dst, dst_metrics_ptr(&from->dst), true);
2224 }
2225 
2226 static void ip6_rt_copy_init(struct rt6_info *rt, struct rt6_info *ort)
2227 {
2228 	rt->dst.input = ort->dst.input;
2229 	rt->dst.output = ort->dst.output;
2230 	rt->rt6i_dst = ort->rt6i_dst;
2231 	rt->dst.error = ort->dst.error;
2232 	rt->rt6i_idev = ort->rt6i_idev;
2233 	if (rt->rt6i_idev)
2234 		in6_dev_hold(rt->rt6i_idev);
2235 	rt->dst.lastuse = jiffies;
2236 	rt->rt6i_gateway = ort->rt6i_gateway;
2237 	rt->rt6i_flags = ort->rt6i_flags;
2238 	rt6_set_from(rt, ort);
2239 	rt->rt6i_metric = ort->rt6i_metric;
2240 #ifdef CONFIG_IPV6_SUBTREES
2241 	rt->rt6i_src = ort->rt6i_src;
2242 #endif
2243 	rt->rt6i_prefsrc = ort->rt6i_prefsrc;
2244 	rt->rt6i_table = ort->rt6i_table;
2245 	rt->dst.lwtstate = lwtstate_get(ort->dst.lwtstate);
2246 }
2247 
2248 #ifdef CONFIG_IPV6_ROUTE_INFO
2249 static struct rt6_info *rt6_get_route_info(struct net *net,
2250 					   const struct in6_addr *prefix, int prefixlen,
2251 					   const struct in6_addr *gwaddr, int ifindex)
2252 {
2253 	struct fib6_node *fn;
2254 	struct rt6_info *rt = NULL;
2255 	struct fib6_table *table;
2256 
2257 	table = fib6_get_table(net, RT6_TABLE_INFO);
2258 	if (!table)
2259 		return NULL;
2260 
2261 	read_lock_bh(&table->tb6_lock);
2262 	fn = fib6_locate(&table->tb6_root, prefix, prefixlen, NULL, 0);
2263 	if (!fn)
2264 		goto out;
2265 
2266 	for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
2267 		if (rt->dst.dev->ifindex != ifindex)
2268 			continue;
2269 		if ((rt->rt6i_flags & (RTF_ROUTEINFO|RTF_GATEWAY)) != (RTF_ROUTEINFO|RTF_GATEWAY))
2270 			continue;
2271 		if (!ipv6_addr_equal(&rt->rt6i_gateway, gwaddr))
2272 			continue;
2273 		dst_hold(&rt->dst);
2274 		break;
2275 	}
2276 out:
2277 	read_unlock_bh(&table->tb6_lock);
2278 	return rt;
2279 }
2280 
2281 static struct rt6_info *rt6_add_route_info(struct net *net,
2282 					   const struct in6_addr *prefix, int prefixlen,
2283 					   const struct in6_addr *gwaddr, int ifindex,
2284 					   unsigned int pref)
2285 {
2286 	struct fib6_config cfg = {
2287 		.fc_table	= RT6_TABLE_INFO,
2288 		.fc_metric	= IP6_RT_PRIO_USER,
2289 		.fc_ifindex	= ifindex,
2290 		.fc_dst_len	= prefixlen,
2291 		.fc_flags	= RTF_GATEWAY | RTF_ADDRCONF | RTF_ROUTEINFO |
2292 				  RTF_UP | RTF_PREF(pref),
2293 		.fc_nlinfo.portid = 0,
2294 		.fc_nlinfo.nlh = NULL,
2295 		.fc_nlinfo.nl_net = net,
2296 	};
2297 
2298 	cfg.fc_dst = *prefix;
2299 	cfg.fc_gateway = *gwaddr;
2300 
2301 	/* We should treat it as a default route if prefix length is 0. */
2302 	if (!prefixlen)
2303 		cfg.fc_flags |= RTF_DEFAULT;
2304 
2305 	ip6_route_add(&cfg);
2306 
2307 	return rt6_get_route_info(net, prefix, prefixlen, gwaddr, ifindex);
2308 }
2309 #endif
2310 
2311 struct rt6_info *rt6_get_dflt_router(const struct in6_addr *addr, struct net_device *dev)
2312 {
2313 	struct rt6_info *rt;
2314 	struct fib6_table *table;
2315 
2316 	table = fib6_get_table(dev_net(dev), RT6_TABLE_DFLT);
2317 	if (!table)
2318 		return NULL;
2319 
2320 	read_lock_bh(&table->tb6_lock);
2321 	for (rt = table->tb6_root.leaf; rt; rt = rt->dst.rt6_next) {
2322 		if (dev == rt->dst.dev &&
2323 		    ((rt->rt6i_flags & (RTF_ADDRCONF | RTF_DEFAULT)) == (RTF_ADDRCONF | RTF_DEFAULT)) &&
2324 		    ipv6_addr_equal(&rt->rt6i_gateway, addr))
2325 			break;
2326 	}
2327 	if (rt)
2328 		dst_hold(&rt->dst);
2329 	read_unlock_bh(&table->tb6_lock);
2330 	return rt;
2331 }
2332 
2333 struct rt6_info *rt6_add_dflt_router(const struct in6_addr *gwaddr,
2334 				     struct net_device *dev,
2335 				     unsigned int pref)
2336 {
2337 	struct fib6_config cfg = {
2338 		.fc_table	= RT6_TABLE_DFLT,
2339 		.fc_metric	= IP6_RT_PRIO_USER,
2340 		.fc_ifindex	= dev->ifindex,
2341 		.fc_flags	= RTF_GATEWAY | RTF_ADDRCONF | RTF_DEFAULT |
2342 				  RTF_UP | RTF_EXPIRES | RTF_PREF(pref),
2343 		.fc_nlinfo.portid = 0,
2344 		.fc_nlinfo.nlh = NULL,
2345 		.fc_nlinfo.nl_net = dev_net(dev),
2346 	};
2347 
2348 	cfg.fc_gateway = *gwaddr;
2349 
2350 	ip6_route_add(&cfg);
2351 
2352 	return rt6_get_dflt_router(gwaddr, dev);
2353 }
2354 
2355 void rt6_purge_dflt_routers(struct net *net)
2356 {
2357 	struct rt6_info *rt;
2358 	struct fib6_table *table;
2359 
2360 	/* NOTE: Keep consistent with rt6_get_dflt_router */
2361 	table = fib6_get_table(net, RT6_TABLE_DFLT);
2362 	if (!table)
2363 		return;
2364 
2365 restart:
2366 	read_lock_bh(&table->tb6_lock);
2367 	for (rt = table->tb6_root.leaf; rt; rt = rt->dst.rt6_next) {
2368 		if (rt->rt6i_flags & (RTF_DEFAULT | RTF_ADDRCONF) &&
2369 		    (!rt->rt6i_idev || rt->rt6i_idev->cnf.accept_ra != 2)) {
2370 			dst_hold(&rt->dst);
2371 			read_unlock_bh(&table->tb6_lock);
2372 			ip6_del_rt(rt);
2373 			goto restart;
2374 		}
2375 	}
2376 	read_unlock_bh(&table->tb6_lock);
2377 }
2378 
2379 static void rtmsg_to_fib6_config(struct net *net,
2380 				 struct in6_rtmsg *rtmsg,
2381 				 struct fib6_config *cfg)
2382 {
2383 	memset(cfg, 0, sizeof(*cfg));
2384 
2385 	cfg->fc_table = RT6_TABLE_MAIN;
2386 	cfg->fc_ifindex = rtmsg->rtmsg_ifindex;
2387 	cfg->fc_metric = rtmsg->rtmsg_metric;
2388 	cfg->fc_expires = rtmsg->rtmsg_info;
2389 	cfg->fc_dst_len = rtmsg->rtmsg_dst_len;
2390 	cfg->fc_src_len = rtmsg->rtmsg_src_len;
2391 	cfg->fc_flags = rtmsg->rtmsg_flags;
2392 
2393 	cfg->fc_nlinfo.nl_net = net;
2394 
2395 	cfg->fc_dst = rtmsg->rtmsg_dst;
2396 	cfg->fc_src = rtmsg->rtmsg_src;
2397 	cfg->fc_gateway = rtmsg->rtmsg_gateway;
2398 }
2399 
2400 int ipv6_route_ioctl(struct net *net, unsigned int cmd, void __user *arg)
2401 {
2402 	struct fib6_config cfg;
2403 	struct in6_rtmsg rtmsg;
2404 	int err;
2405 
2406 	switch (cmd) {
2407 	case SIOCADDRT:		/* Add a route */
2408 	case SIOCDELRT:		/* Delete a route */
2409 		if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
2410 			return -EPERM;
2411 		err = copy_from_user(&rtmsg, arg,
2412 				     sizeof(struct in6_rtmsg));
2413 		if (err)
2414 			return -EFAULT;
2415 
2416 		rtmsg_to_fib6_config(net, &rtmsg, &cfg);
2417 
2418 		rtnl_lock();
2419 		switch (cmd) {
2420 		case SIOCADDRT:
2421 			err = ip6_route_add(&cfg);
2422 			break;
2423 		case SIOCDELRT:
2424 			err = ip6_route_del(&cfg);
2425 			break;
2426 		default:
2427 			err = -EINVAL;
2428 		}
2429 		rtnl_unlock();
2430 
2431 		return err;
2432 	}
2433 
2434 	return -EINVAL;
2435 }
2436 
2437 /*
2438  *	Drop the packet on the floor
2439  */
2440 
2441 static int ip6_pkt_drop(struct sk_buff *skb, u8 code, int ipstats_mib_noroutes)
2442 {
2443 	int type;
2444 	struct dst_entry *dst = skb_dst(skb);
2445 	switch (ipstats_mib_noroutes) {
2446 	case IPSTATS_MIB_INNOROUTES:
2447 		type = ipv6_addr_type(&ipv6_hdr(skb)->daddr);
2448 		if (type == IPV6_ADDR_ANY) {
2449 			IP6_INC_STATS(dev_net(dst->dev), ip6_dst_idev(dst),
2450 				      IPSTATS_MIB_INADDRERRORS);
2451 			break;
2452 		}
2453 		/* FALLTHROUGH */
2454 	case IPSTATS_MIB_OUTNOROUTES:
2455 		IP6_INC_STATS(dev_net(dst->dev), ip6_dst_idev(dst),
2456 			      ipstats_mib_noroutes);
2457 		break;
2458 	}
2459 	icmpv6_send(skb, ICMPV6_DEST_UNREACH, code, 0);
2460 	kfree_skb(skb);
2461 	return 0;
2462 }
2463 
2464 static int ip6_pkt_discard(struct sk_buff *skb)
2465 {
2466 	return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_INNOROUTES);
2467 }
2468 
2469 static int ip6_pkt_discard_out(struct sock *sk, struct sk_buff *skb)
2470 {
2471 	skb->dev = skb_dst(skb)->dev;
2472 	return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_OUTNOROUTES);
2473 }
2474 
2475 static int ip6_pkt_prohibit(struct sk_buff *skb)
2476 {
2477 	return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_INNOROUTES);
2478 }
2479 
2480 static int ip6_pkt_prohibit_out(struct sock *sk, struct sk_buff *skb)
2481 {
2482 	skb->dev = skb_dst(skb)->dev;
2483 	return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_OUTNOROUTES);
2484 }
2485 
2486 /*
2487  *	Allocate a dst for local (unicast / anycast) address.
2488  */
2489 
2490 struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev,
2491 				    const struct in6_addr *addr,
2492 				    bool anycast)
2493 {
2494 	struct net *net = dev_net(idev->dev);
2495 	struct rt6_info *rt = ip6_dst_alloc(net, net->loopback_dev,
2496 					    DST_NOCOUNT);
2497 	if (!rt)
2498 		return ERR_PTR(-ENOMEM);
2499 
2500 	in6_dev_hold(idev);
2501 
2502 	rt->dst.flags |= DST_HOST;
2503 	rt->dst.input = ip6_input;
2504 	rt->dst.output = ip6_output;
2505 	rt->rt6i_idev = idev;
2506 
2507 	rt->rt6i_flags = RTF_UP | RTF_NONEXTHOP;
2508 	if (anycast)
2509 		rt->rt6i_flags |= RTF_ANYCAST;
2510 	else
2511 		rt->rt6i_flags |= RTF_LOCAL;
2512 
2513 	rt->rt6i_gateway  = *addr;
2514 	rt->rt6i_dst.addr = *addr;
2515 	rt->rt6i_dst.plen = 128;
2516 	rt->rt6i_table = fib6_get_table(net, RT6_TABLE_LOCAL);
2517 	rt->dst.flags |= DST_NOCACHE;
2518 
2519 	atomic_set(&rt->dst.__refcnt, 1);
2520 
2521 	return rt;
2522 }
2523 
2524 int ip6_route_get_saddr(struct net *net,
2525 			struct rt6_info *rt,
2526 			const struct in6_addr *daddr,
2527 			unsigned int prefs,
2528 			struct in6_addr *saddr)
2529 {
2530 	struct inet6_dev *idev =
2531 		rt ? ip6_dst_idev((struct dst_entry *)rt) : NULL;
2532 	int err = 0;
2533 	if (rt && rt->rt6i_prefsrc.plen)
2534 		*saddr = rt->rt6i_prefsrc.addr;
2535 	else
2536 		err = ipv6_dev_get_saddr(net, idev ? idev->dev : NULL,
2537 					 daddr, prefs, saddr);
2538 	return err;
2539 }
2540 
2541 /* remove deleted ip from prefsrc entries */
2542 struct arg_dev_net_ip {
2543 	struct net_device *dev;
2544 	struct net *net;
2545 	struct in6_addr *addr;
2546 };
2547 
2548 static int fib6_remove_prefsrc(struct rt6_info *rt, void *arg)
2549 {
2550 	struct net_device *dev = ((struct arg_dev_net_ip *)arg)->dev;
2551 	struct net *net = ((struct arg_dev_net_ip *)arg)->net;
2552 	struct in6_addr *addr = ((struct arg_dev_net_ip *)arg)->addr;
2553 
2554 	if (((void *)rt->dst.dev == dev || !dev) &&
2555 	    rt != net->ipv6.ip6_null_entry &&
2556 	    ipv6_addr_equal(addr, &rt->rt6i_prefsrc.addr)) {
2557 		/* remove prefsrc entry */
2558 		rt->rt6i_prefsrc.plen = 0;
2559 	}
2560 	return 0;
2561 }
2562 
2563 void rt6_remove_prefsrc(struct inet6_ifaddr *ifp)
2564 {
2565 	struct net *net = dev_net(ifp->idev->dev);
2566 	struct arg_dev_net_ip adni = {
2567 		.dev = ifp->idev->dev,
2568 		.net = net,
2569 		.addr = &ifp->addr,
2570 	};
2571 	fib6_clean_all(net, fib6_remove_prefsrc, &adni);
2572 }
2573 
2574 #define RTF_RA_ROUTER		(RTF_ADDRCONF | RTF_DEFAULT | RTF_GATEWAY)
2575 #define RTF_CACHE_GATEWAY	(RTF_GATEWAY | RTF_CACHE)
2576 
2577 /* Remove routers and update dst entries when gateway turn into host. */
2578 static int fib6_clean_tohost(struct rt6_info *rt, void *arg)
2579 {
2580 	struct in6_addr *gateway = (struct in6_addr *)arg;
2581 
2582 	if ((((rt->rt6i_flags & RTF_RA_ROUTER) == RTF_RA_ROUTER) ||
2583 	     ((rt->rt6i_flags & RTF_CACHE_GATEWAY) == RTF_CACHE_GATEWAY)) &&
2584 	     ipv6_addr_equal(gateway, &rt->rt6i_gateway)) {
2585 		return -1;
2586 	}
2587 	return 0;
2588 }
2589 
2590 void rt6_clean_tohost(struct net *net, struct in6_addr *gateway)
2591 {
2592 	fib6_clean_all(net, fib6_clean_tohost, gateway);
2593 }
2594 
2595 struct arg_dev_net {
2596 	struct net_device *dev;
2597 	struct net *net;
2598 };
2599 
2600 static int fib6_ifdown(struct rt6_info *rt, void *arg)
2601 {
2602 	const struct arg_dev_net *adn = arg;
2603 	const struct net_device *dev = adn->dev;
2604 
2605 	if ((rt->dst.dev == dev || !dev) &&
2606 	    rt != adn->net->ipv6.ip6_null_entry)
2607 		return -1;
2608 
2609 	return 0;
2610 }
2611 
2612 void rt6_ifdown(struct net *net, struct net_device *dev)
2613 {
2614 	struct arg_dev_net adn = {
2615 		.dev = dev,
2616 		.net = net,
2617 	};
2618 
2619 	fib6_clean_all(net, fib6_ifdown, &adn);
2620 	icmp6_clean_all(fib6_ifdown, &adn);
2621 	if (dev)
2622 		rt6_uncached_list_flush_dev(net, dev);
2623 }
2624 
2625 struct rt6_mtu_change_arg {
2626 	struct net_device *dev;
2627 	unsigned int mtu;
2628 };
2629 
2630 static int rt6_mtu_change_route(struct rt6_info *rt, void *p_arg)
2631 {
2632 	struct rt6_mtu_change_arg *arg = (struct rt6_mtu_change_arg *) p_arg;
2633 	struct inet6_dev *idev;
2634 
2635 	/* In IPv6 pmtu discovery is not optional,
2636 	   so that RTAX_MTU lock cannot disable it.
2637 	   We still use this lock to block changes
2638 	   caused by addrconf/ndisc.
2639 	*/
2640 
2641 	idev = __in6_dev_get(arg->dev);
2642 	if (!idev)
2643 		return 0;
2644 
2645 	/* For administrative MTU increase, there is no way to discover
2646 	   IPv6 PMTU increase, so PMTU increase should be updated here.
2647 	   Since RFC 1981 doesn't include administrative MTU increase
2648 	   update PMTU increase is a MUST. (i.e. jumbo frame)
2649 	 */
2650 	/*
2651 	   If new MTU is less than route PMTU, this new MTU will be the
2652 	   lowest MTU in the path, update the route PMTU to reflect PMTU
2653 	   decreases; if new MTU is greater than route PMTU, and the
2654 	   old MTU is the lowest MTU in the path, update the route PMTU
2655 	   to reflect the increase. In this case if the other nodes' MTU
2656 	   also have the lowest MTU, TOO BIG MESSAGE will be lead to
2657 	   PMTU discouvery.
2658 	 */
2659 	if (rt->dst.dev == arg->dev &&
2660 	    !dst_metric_locked(&rt->dst, RTAX_MTU)) {
2661 		if (rt->rt6i_flags & RTF_CACHE) {
2662 			/* For RTF_CACHE with rt6i_pmtu == 0
2663 			 * (i.e. a redirected route),
2664 			 * the metrics of its rt->dst.from has already
2665 			 * been updated.
2666 			 */
2667 			if (rt->rt6i_pmtu && rt->rt6i_pmtu > arg->mtu)
2668 				rt->rt6i_pmtu = arg->mtu;
2669 		} else if (dst_mtu(&rt->dst) >= arg->mtu ||
2670 			   (dst_mtu(&rt->dst) < arg->mtu &&
2671 			    dst_mtu(&rt->dst) == idev->cnf.mtu6)) {
2672 			dst_metric_set(&rt->dst, RTAX_MTU, arg->mtu);
2673 		}
2674 	}
2675 	return 0;
2676 }
2677 
2678 void rt6_mtu_change(struct net_device *dev, unsigned int mtu)
2679 {
2680 	struct rt6_mtu_change_arg arg = {
2681 		.dev = dev,
2682 		.mtu = mtu,
2683 	};
2684 
2685 	fib6_clean_all(dev_net(dev), rt6_mtu_change_route, &arg);
2686 }
2687 
2688 static const struct nla_policy rtm_ipv6_policy[RTA_MAX+1] = {
2689 	[RTA_GATEWAY]           = { .len = sizeof(struct in6_addr) },
2690 	[RTA_OIF]               = { .type = NLA_U32 },
2691 	[RTA_IIF]		= { .type = NLA_U32 },
2692 	[RTA_PRIORITY]          = { .type = NLA_U32 },
2693 	[RTA_METRICS]           = { .type = NLA_NESTED },
2694 	[RTA_MULTIPATH]		= { .len = sizeof(struct rtnexthop) },
2695 	[RTA_PREF]              = { .type = NLA_U8 },
2696 	[RTA_ENCAP_TYPE]	= { .type = NLA_U16 },
2697 	[RTA_ENCAP]		= { .type = NLA_NESTED },
2698 };
2699 
2700 static int rtm_to_fib6_config(struct sk_buff *skb, struct nlmsghdr *nlh,
2701 			      struct fib6_config *cfg)
2702 {
2703 	struct rtmsg *rtm;
2704 	struct nlattr *tb[RTA_MAX+1];
2705 	unsigned int pref;
2706 	int err;
2707 
2708 	err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy);
2709 	if (err < 0)
2710 		goto errout;
2711 
2712 	err = -EINVAL;
2713 	rtm = nlmsg_data(nlh);
2714 	memset(cfg, 0, sizeof(*cfg));
2715 
2716 	cfg->fc_table = rtm->rtm_table;
2717 	cfg->fc_dst_len = rtm->rtm_dst_len;
2718 	cfg->fc_src_len = rtm->rtm_src_len;
2719 	cfg->fc_flags = RTF_UP;
2720 	cfg->fc_protocol = rtm->rtm_protocol;
2721 	cfg->fc_type = rtm->rtm_type;
2722 
2723 	if (rtm->rtm_type == RTN_UNREACHABLE ||
2724 	    rtm->rtm_type == RTN_BLACKHOLE ||
2725 	    rtm->rtm_type == RTN_PROHIBIT ||
2726 	    rtm->rtm_type == RTN_THROW)
2727 		cfg->fc_flags |= RTF_REJECT;
2728 
2729 	if (rtm->rtm_type == RTN_LOCAL)
2730 		cfg->fc_flags |= RTF_LOCAL;
2731 
2732 	if (rtm->rtm_flags & RTM_F_CLONED)
2733 		cfg->fc_flags |= RTF_CACHE;
2734 
2735 	cfg->fc_nlinfo.portid = NETLINK_CB(skb).portid;
2736 	cfg->fc_nlinfo.nlh = nlh;
2737 	cfg->fc_nlinfo.nl_net = sock_net(skb->sk);
2738 
2739 	if (tb[RTA_GATEWAY]) {
2740 		cfg->fc_gateway = nla_get_in6_addr(tb[RTA_GATEWAY]);
2741 		cfg->fc_flags |= RTF_GATEWAY;
2742 	}
2743 
2744 	if (tb[RTA_DST]) {
2745 		int plen = (rtm->rtm_dst_len + 7) >> 3;
2746 
2747 		if (nla_len(tb[RTA_DST]) < plen)
2748 			goto errout;
2749 
2750 		nla_memcpy(&cfg->fc_dst, tb[RTA_DST], plen);
2751 	}
2752 
2753 	if (tb[RTA_SRC]) {
2754 		int plen = (rtm->rtm_src_len + 7) >> 3;
2755 
2756 		if (nla_len(tb[RTA_SRC]) < plen)
2757 			goto errout;
2758 
2759 		nla_memcpy(&cfg->fc_src, tb[RTA_SRC], plen);
2760 	}
2761 
2762 	if (tb[RTA_PREFSRC])
2763 		cfg->fc_prefsrc = nla_get_in6_addr(tb[RTA_PREFSRC]);
2764 
2765 	if (tb[RTA_OIF])
2766 		cfg->fc_ifindex = nla_get_u32(tb[RTA_OIF]);
2767 
2768 	if (tb[RTA_PRIORITY])
2769 		cfg->fc_metric = nla_get_u32(tb[RTA_PRIORITY]);
2770 
2771 	if (tb[RTA_METRICS]) {
2772 		cfg->fc_mx = nla_data(tb[RTA_METRICS]);
2773 		cfg->fc_mx_len = nla_len(tb[RTA_METRICS]);
2774 	}
2775 
2776 	if (tb[RTA_TABLE])
2777 		cfg->fc_table = nla_get_u32(tb[RTA_TABLE]);
2778 
2779 	if (tb[RTA_MULTIPATH]) {
2780 		cfg->fc_mp = nla_data(tb[RTA_MULTIPATH]);
2781 		cfg->fc_mp_len = nla_len(tb[RTA_MULTIPATH]);
2782 	}
2783 
2784 	if (tb[RTA_PREF]) {
2785 		pref = nla_get_u8(tb[RTA_PREF]);
2786 		if (pref != ICMPV6_ROUTER_PREF_LOW &&
2787 		    pref != ICMPV6_ROUTER_PREF_HIGH)
2788 			pref = ICMPV6_ROUTER_PREF_MEDIUM;
2789 		cfg->fc_flags |= RTF_PREF(pref);
2790 	}
2791 
2792 	if (tb[RTA_ENCAP])
2793 		cfg->fc_encap = tb[RTA_ENCAP];
2794 
2795 	if (tb[RTA_ENCAP_TYPE])
2796 		cfg->fc_encap_type = nla_get_u16(tb[RTA_ENCAP_TYPE]);
2797 
2798 	err = 0;
2799 errout:
2800 	return err;
2801 }
2802 
2803 struct rt6_nh {
2804 	struct rt6_info *rt6_info;
2805 	struct fib6_config r_cfg;
2806 	struct mx6_config mxc;
2807 	struct list_head next;
2808 };
2809 
2810 static void ip6_print_replace_route_err(struct list_head *rt6_nh_list)
2811 {
2812 	struct rt6_nh *nh;
2813 
2814 	list_for_each_entry(nh, rt6_nh_list, next) {
2815 		pr_warn("IPV6: multipath route replace failed (check consistency of installed routes): %pI6 nexthop %pI6 ifi %d\n",
2816 		        &nh->r_cfg.fc_dst, &nh->r_cfg.fc_gateway,
2817 		        nh->r_cfg.fc_ifindex);
2818 	}
2819 }
2820 
2821 static int ip6_route_info_append(struct list_head *rt6_nh_list,
2822 				 struct rt6_info *rt, struct fib6_config *r_cfg)
2823 {
2824 	struct rt6_nh *nh;
2825 	struct rt6_info *rtnh;
2826 	int err = -EEXIST;
2827 
2828 	list_for_each_entry(nh, rt6_nh_list, next) {
2829 		/* check if rt6_info already exists */
2830 		rtnh = nh->rt6_info;
2831 
2832 		if (rtnh->dst.dev == rt->dst.dev &&
2833 		    rtnh->rt6i_idev == rt->rt6i_idev &&
2834 		    ipv6_addr_equal(&rtnh->rt6i_gateway,
2835 				    &rt->rt6i_gateway))
2836 			return err;
2837 	}
2838 
2839 	nh = kzalloc(sizeof(*nh), GFP_KERNEL);
2840 	if (!nh)
2841 		return -ENOMEM;
2842 	nh->rt6_info = rt;
2843 	err = ip6_convert_metrics(&nh->mxc, r_cfg);
2844 	if (err) {
2845 		kfree(nh);
2846 		return err;
2847 	}
2848 	memcpy(&nh->r_cfg, r_cfg, sizeof(*r_cfg));
2849 	list_add_tail(&nh->next, rt6_nh_list);
2850 
2851 	return 0;
2852 }
2853 
2854 static int ip6_route_multipath_add(struct fib6_config *cfg)
2855 {
2856 	struct fib6_config r_cfg;
2857 	struct rtnexthop *rtnh;
2858 	struct rt6_info *rt;
2859 	struct rt6_nh *err_nh;
2860 	struct rt6_nh *nh, *nh_safe;
2861 	int remaining;
2862 	int attrlen;
2863 	int err = 1;
2864 	int nhn = 0;
2865 	int replace = (cfg->fc_nlinfo.nlh &&
2866 		       (cfg->fc_nlinfo.nlh->nlmsg_flags & NLM_F_REPLACE));
2867 	LIST_HEAD(rt6_nh_list);
2868 
2869 	remaining = cfg->fc_mp_len;
2870 	rtnh = (struct rtnexthop *)cfg->fc_mp;
2871 
2872 	/* Parse a Multipath Entry and build a list (rt6_nh_list) of
2873 	 * rt6_info structs per nexthop
2874 	 */
2875 	while (rtnh_ok(rtnh, remaining)) {
2876 		memcpy(&r_cfg, cfg, sizeof(*cfg));
2877 		if (rtnh->rtnh_ifindex)
2878 			r_cfg.fc_ifindex = rtnh->rtnh_ifindex;
2879 
2880 		attrlen = rtnh_attrlen(rtnh);
2881 		if (attrlen > 0) {
2882 			struct nlattr *nla, *attrs = rtnh_attrs(rtnh);
2883 
2884 			nla = nla_find(attrs, attrlen, RTA_GATEWAY);
2885 			if (nla) {
2886 				r_cfg.fc_gateway = nla_get_in6_addr(nla);
2887 				r_cfg.fc_flags |= RTF_GATEWAY;
2888 			}
2889 			r_cfg.fc_encap = nla_find(attrs, attrlen, RTA_ENCAP);
2890 			nla = nla_find(attrs, attrlen, RTA_ENCAP_TYPE);
2891 			if (nla)
2892 				r_cfg.fc_encap_type = nla_get_u16(nla);
2893 		}
2894 
2895 		err = ip6_route_info_create(&r_cfg, &rt);
2896 		if (err)
2897 			goto cleanup;
2898 
2899 		err = ip6_route_info_append(&rt6_nh_list, rt, &r_cfg);
2900 		if (err) {
2901 			dst_free(&rt->dst);
2902 			goto cleanup;
2903 		}
2904 
2905 		rtnh = rtnh_next(rtnh, &remaining);
2906 	}
2907 
2908 	err_nh = NULL;
2909 	list_for_each_entry(nh, &rt6_nh_list, next) {
2910 		err = __ip6_ins_rt(nh->rt6_info, &cfg->fc_nlinfo, &nh->mxc);
2911 		/* nh->rt6_info is used or freed at this point, reset to NULL*/
2912 		nh->rt6_info = NULL;
2913 		if (err) {
2914 			if (replace && nhn)
2915 				ip6_print_replace_route_err(&rt6_nh_list);
2916 			err_nh = nh;
2917 			goto add_errout;
2918 		}
2919 
2920 		/* Because each route is added like a single route we remove
2921 		 * these flags after the first nexthop: if there is a collision,
2922 		 * we have already failed to add the first nexthop:
2923 		 * fib6_add_rt2node() has rejected it; when replacing, old
2924 		 * nexthops have been replaced by first new, the rest should
2925 		 * be added to it.
2926 		 */
2927 		cfg->fc_nlinfo.nlh->nlmsg_flags &= ~(NLM_F_EXCL |
2928 						     NLM_F_REPLACE);
2929 		nhn++;
2930 	}
2931 
2932 	goto cleanup;
2933 
2934 add_errout:
2935 	/* Delete routes that were already added */
2936 	list_for_each_entry(nh, &rt6_nh_list, next) {
2937 		if (err_nh == nh)
2938 			break;
2939 		ip6_route_del(&nh->r_cfg);
2940 	}
2941 
2942 cleanup:
2943 	list_for_each_entry_safe(nh, nh_safe, &rt6_nh_list, next) {
2944 		if (nh->rt6_info)
2945 			dst_free(&nh->rt6_info->dst);
2946 		kfree(nh->mxc.mx);
2947 		list_del(&nh->next);
2948 		kfree(nh);
2949 	}
2950 
2951 	return err;
2952 }
2953 
2954 static int ip6_route_multipath_del(struct fib6_config *cfg)
2955 {
2956 	struct fib6_config r_cfg;
2957 	struct rtnexthop *rtnh;
2958 	int remaining;
2959 	int attrlen;
2960 	int err = 1, last_err = 0;
2961 
2962 	remaining = cfg->fc_mp_len;
2963 	rtnh = (struct rtnexthop *)cfg->fc_mp;
2964 
2965 	/* Parse a Multipath Entry */
2966 	while (rtnh_ok(rtnh, remaining)) {
2967 		memcpy(&r_cfg, cfg, sizeof(*cfg));
2968 		if (rtnh->rtnh_ifindex)
2969 			r_cfg.fc_ifindex = rtnh->rtnh_ifindex;
2970 
2971 		attrlen = rtnh_attrlen(rtnh);
2972 		if (attrlen > 0) {
2973 			struct nlattr *nla, *attrs = rtnh_attrs(rtnh);
2974 
2975 			nla = nla_find(attrs, attrlen, RTA_GATEWAY);
2976 			if (nla) {
2977 				nla_memcpy(&r_cfg.fc_gateway, nla, 16);
2978 				r_cfg.fc_flags |= RTF_GATEWAY;
2979 			}
2980 		}
2981 		err = ip6_route_del(&r_cfg);
2982 		if (err)
2983 			last_err = err;
2984 
2985 		rtnh = rtnh_next(rtnh, &remaining);
2986 	}
2987 
2988 	return last_err;
2989 }
2990 
2991 static int inet6_rtm_delroute(struct sk_buff *skb, struct nlmsghdr *nlh)
2992 {
2993 	struct fib6_config cfg;
2994 	int err;
2995 
2996 	err = rtm_to_fib6_config(skb, nlh, &cfg);
2997 	if (err < 0)
2998 		return err;
2999 
3000 	if (cfg.fc_mp)
3001 		return ip6_route_multipath_del(&cfg);
3002 	else
3003 		return ip6_route_del(&cfg);
3004 }
3005 
3006 static int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr *nlh)
3007 {
3008 	struct fib6_config cfg;
3009 	int err;
3010 
3011 	err = rtm_to_fib6_config(skb, nlh, &cfg);
3012 	if (err < 0)
3013 		return err;
3014 
3015 	if (cfg.fc_mp)
3016 		return ip6_route_multipath_add(&cfg);
3017 	else
3018 		return ip6_route_add(&cfg);
3019 }
3020 
3021 static inline size_t rt6_nlmsg_size(struct rt6_info *rt)
3022 {
3023 	return NLMSG_ALIGN(sizeof(struct rtmsg))
3024 	       + nla_total_size(16) /* RTA_SRC */
3025 	       + nla_total_size(16) /* RTA_DST */
3026 	       + nla_total_size(16) /* RTA_GATEWAY */
3027 	       + nla_total_size(16) /* RTA_PREFSRC */
3028 	       + nla_total_size(4) /* RTA_TABLE */
3029 	       + nla_total_size(4) /* RTA_IIF */
3030 	       + nla_total_size(4) /* RTA_OIF */
3031 	       + nla_total_size(4) /* RTA_PRIORITY */
3032 	       + RTAX_MAX * nla_total_size(4) /* RTA_METRICS */
3033 	       + nla_total_size(sizeof(struct rta_cacheinfo))
3034 	       + nla_total_size(TCP_CA_NAME_MAX) /* RTAX_CC_ALGO */
3035 	       + nla_total_size(1) /* RTA_PREF */
3036 	       + lwtunnel_get_encap_size(rt->dst.lwtstate);
3037 }
3038 
3039 static int rt6_fill_node(struct net *net,
3040 			 struct sk_buff *skb, struct rt6_info *rt,
3041 			 struct in6_addr *dst, struct in6_addr *src,
3042 			 int iif, int type, u32 portid, u32 seq,
3043 			 int prefix, int nowait, unsigned int flags)
3044 {
3045 	u32 metrics[RTAX_MAX];
3046 	struct rtmsg *rtm;
3047 	struct nlmsghdr *nlh;
3048 	long expires;
3049 	u32 table;
3050 
3051 	if (prefix) {	/* user wants prefix routes only */
3052 		if (!(rt->rt6i_flags & RTF_PREFIX_RT)) {
3053 			/* success since this is not a prefix route */
3054 			return 1;
3055 		}
3056 	}
3057 
3058 	nlh = nlmsg_put(skb, portid, seq, type, sizeof(*rtm), flags);
3059 	if (!nlh)
3060 		return -EMSGSIZE;
3061 
3062 	rtm = nlmsg_data(nlh);
3063 	rtm->rtm_family = AF_INET6;
3064 	rtm->rtm_dst_len = rt->rt6i_dst.plen;
3065 	rtm->rtm_src_len = rt->rt6i_src.plen;
3066 	rtm->rtm_tos = 0;
3067 	if (rt->rt6i_table)
3068 		table = rt->rt6i_table->tb6_id;
3069 	else
3070 		table = RT6_TABLE_UNSPEC;
3071 	rtm->rtm_table = table;
3072 	if (nla_put_u32(skb, RTA_TABLE, table))
3073 		goto nla_put_failure;
3074 	if (rt->rt6i_flags & RTF_REJECT) {
3075 		switch (rt->dst.error) {
3076 		case -EINVAL:
3077 			rtm->rtm_type = RTN_BLACKHOLE;
3078 			break;
3079 		case -EACCES:
3080 			rtm->rtm_type = RTN_PROHIBIT;
3081 			break;
3082 		case -EAGAIN:
3083 			rtm->rtm_type = RTN_THROW;
3084 			break;
3085 		default:
3086 			rtm->rtm_type = RTN_UNREACHABLE;
3087 			break;
3088 		}
3089 	}
3090 	else if (rt->rt6i_flags & RTF_LOCAL)
3091 		rtm->rtm_type = RTN_LOCAL;
3092 	else if (rt->dst.dev && (rt->dst.dev->flags & IFF_LOOPBACK))
3093 		rtm->rtm_type = RTN_LOCAL;
3094 	else
3095 		rtm->rtm_type = RTN_UNICAST;
3096 	rtm->rtm_flags = 0;
3097 	if (!netif_carrier_ok(rt->dst.dev)) {
3098 		rtm->rtm_flags |= RTNH_F_LINKDOWN;
3099 		if (rt->rt6i_idev->cnf.ignore_routes_with_linkdown)
3100 			rtm->rtm_flags |= RTNH_F_DEAD;
3101 	}
3102 	rtm->rtm_scope = RT_SCOPE_UNIVERSE;
3103 	rtm->rtm_protocol = rt->rt6i_protocol;
3104 	if (rt->rt6i_flags & RTF_DYNAMIC)
3105 		rtm->rtm_protocol = RTPROT_REDIRECT;
3106 	else if (rt->rt6i_flags & RTF_ADDRCONF) {
3107 		if (rt->rt6i_flags & (RTF_DEFAULT | RTF_ROUTEINFO))
3108 			rtm->rtm_protocol = RTPROT_RA;
3109 		else
3110 			rtm->rtm_protocol = RTPROT_KERNEL;
3111 	}
3112 
3113 	if (rt->rt6i_flags & RTF_CACHE)
3114 		rtm->rtm_flags |= RTM_F_CLONED;
3115 
3116 	if (dst) {
3117 		if (nla_put_in6_addr(skb, RTA_DST, dst))
3118 			goto nla_put_failure;
3119 		rtm->rtm_dst_len = 128;
3120 	} else if (rtm->rtm_dst_len)
3121 		if (nla_put_in6_addr(skb, RTA_DST, &rt->rt6i_dst.addr))
3122 			goto nla_put_failure;
3123 #ifdef CONFIG_IPV6_SUBTREES
3124 	if (src) {
3125 		if (nla_put_in6_addr(skb, RTA_SRC, src))
3126 			goto nla_put_failure;
3127 		rtm->rtm_src_len = 128;
3128 	} else if (rtm->rtm_src_len &&
3129 		   nla_put_in6_addr(skb, RTA_SRC, &rt->rt6i_src.addr))
3130 		goto nla_put_failure;
3131 #endif
3132 	if (iif) {
3133 #ifdef CONFIG_IPV6_MROUTE
3134 		if (ipv6_addr_is_multicast(&rt->rt6i_dst.addr)) {
3135 			int err = ip6mr_get_route(net, skb, rtm, nowait);
3136 			if (err <= 0) {
3137 				if (!nowait) {
3138 					if (err == 0)
3139 						return 0;
3140 					goto nla_put_failure;
3141 				} else {
3142 					if (err == -EMSGSIZE)
3143 						goto nla_put_failure;
3144 				}
3145 			}
3146 		} else
3147 #endif
3148 			if (nla_put_u32(skb, RTA_IIF, iif))
3149 				goto nla_put_failure;
3150 	} else if (dst) {
3151 		struct in6_addr saddr_buf;
3152 		if (ip6_route_get_saddr(net, rt, dst, 0, &saddr_buf) == 0 &&
3153 		    nla_put_in6_addr(skb, RTA_PREFSRC, &saddr_buf))
3154 			goto nla_put_failure;
3155 	}
3156 
3157 	if (rt->rt6i_prefsrc.plen) {
3158 		struct in6_addr saddr_buf;
3159 		saddr_buf = rt->rt6i_prefsrc.addr;
3160 		if (nla_put_in6_addr(skb, RTA_PREFSRC, &saddr_buf))
3161 			goto nla_put_failure;
3162 	}
3163 
3164 	memcpy(metrics, dst_metrics_ptr(&rt->dst), sizeof(metrics));
3165 	if (rt->rt6i_pmtu)
3166 		metrics[RTAX_MTU - 1] = rt->rt6i_pmtu;
3167 	if (rtnetlink_put_metrics(skb, metrics) < 0)
3168 		goto nla_put_failure;
3169 
3170 	if (rt->rt6i_flags & RTF_GATEWAY) {
3171 		if (nla_put_in6_addr(skb, RTA_GATEWAY, &rt->rt6i_gateway) < 0)
3172 			goto nla_put_failure;
3173 	}
3174 
3175 	if (rt->dst.dev &&
3176 	    nla_put_u32(skb, RTA_OIF, rt->dst.dev->ifindex))
3177 		goto nla_put_failure;
3178 	if (nla_put_u32(skb, RTA_PRIORITY, rt->rt6i_metric))
3179 		goto nla_put_failure;
3180 
3181 	expires = (rt->rt6i_flags & RTF_EXPIRES) ? rt->dst.expires - jiffies : 0;
3182 
3183 	if (rtnl_put_cacheinfo(skb, &rt->dst, 0, expires, rt->dst.error) < 0)
3184 		goto nla_put_failure;
3185 
3186 	if (nla_put_u8(skb, RTA_PREF, IPV6_EXTRACT_PREF(rt->rt6i_flags)))
3187 		goto nla_put_failure;
3188 
3189 	lwtunnel_fill_encap(skb, rt->dst.lwtstate);
3190 
3191 	nlmsg_end(skb, nlh);
3192 	return 0;
3193 
3194 nla_put_failure:
3195 	nlmsg_cancel(skb, nlh);
3196 	return -EMSGSIZE;
3197 }
3198 
3199 int rt6_dump_route(struct rt6_info *rt, void *p_arg)
3200 {
3201 	struct rt6_rtnl_dump_arg *arg = (struct rt6_rtnl_dump_arg *) p_arg;
3202 	int prefix;
3203 
3204 	if (nlmsg_len(arg->cb->nlh) >= sizeof(struct rtmsg)) {
3205 		struct rtmsg *rtm = nlmsg_data(arg->cb->nlh);
3206 		prefix = (rtm->rtm_flags & RTM_F_PREFIX) != 0;
3207 	} else
3208 		prefix = 0;
3209 
3210 	return rt6_fill_node(arg->net,
3211 		     arg->skb, rt, NULL, NULL, 0, RTM_NEWROUTE,
3212 		     NETLINK_CB(arg->cb->skb).portid, arg->cb->nlh->nlmsg_seq,
3213 		     prefix, 0, NLM_F_MULTI);
3214 }
3215 
3216 static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh)
3217 {
3218 	struct net *net = sock_net(in_skb->sk);
3219 	struct nlattr *tb[RTA_MAX+1];
3220 	struct rt6_info *rt;
3221 	struct sk_buff *skb;
3222 	struct rtmsg *rtm;
3223 	struct flowi6 fl6;
3224 	int err, iif = 0, oif = 0;
3225 
3226 	err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy);
3227 	if (err < 0)
3228 		goto errout;
3229 
3230 	err = -EINVAL;
3231 	memset(&fl6, 0, sizeof(fl6));
3232 
3233 	if (tb[RTA_SRC]) {
3234 		if (nla_len(tb[RTA_SRC]) < sizeof(struct in6_addr))
3235 			goto errout;
3236 
3237 		fl6.saddr = *(struct in6_addr *)nla_data(tb[RTA_SRC]);
3238 	}
3239 
3240 	if (tb[RTA_DST]) {
3241 		if (nla_len(tb[RTA_DST]) < sizeof(struct in6_addr))
3242 			goto errout;
3243 
3244 		fl6.daddr = *(struct in6_addr *)nla_data(tb[RTA_DST]);
3245 	}
3246 
3247 	if (tb[RTA_IIF])
3248 		iif = nla_get_u32(tb[RTA_IIF]);
3249 
3250 	if (tb[RTA_OIF])
3251 		oif = nla_get_u32(tb[RTA_OIF]);
3252 
3253 	if (tb[RTA_MARK])
3254 		fl6.flowi6_mark = nla_get_u32(tb[RTA_MARK]);
3255 
3256 	if (iif) {
3257 		struct net_device *dev;
3258 		int flags = 0;
3259 
3260 		dev = __dev_get_by_index(net, iif);
3261 		if (!dev) {
3262 			err = -ENODEV;
3263 			goto errout;
3264 		}
3265 
3266 		fl6.flowi6_iif = iif;
3267 
3268 		if (!ipv6_addr_any(&fl6.saddr))
3269 			flags |= RT6_LOOKUP_F_HAS_SADDR;
3270 
3271 		rt = (struct rt6_info *)ip6_route_input_lookup(net, dev, &fl6,
3272 							       flags);
3273 	} else {
3274 		fl6.flowi6_oif = oif;
3275 
3276 		rt = (struct rt6_info *)ip6_route_output(net, NULL, &fl6);
3277 	}
3278 
3279 	skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
3280 	if (!skb) {
3281 		ip6_rt_put(rt);
3282 		err = -ENOBUFS;
3283 		goto errout;
3284 	}
3285 
3286 	/* Reserve room for dummy headers, this skb can pass
3287 	   through good chunk of routing engine.
3288 	 */
3289 	skb_reset_mac_header(skb);
3290 	skb_reserve(skb, MAX_HEADER + sizeof(struct ipv6hdr));
3291 
3292 	skb_dst_set(skb, &rt->dst);
3293 
3294 	err = rt6_fill_node(net, skb, rt, &fl6.daddr, &fl6.saddr, iif,
3295 			    RTM_NEWROUTE, NETLINK_CB(in_skb).portid,
3296 			    nlh->nlmsg_seq, 0, 0, 0);
3297 	if (err < 0) {
3298 		kfree_skb(skb);
3299 		goto errout;
3300 	}
3301 
3302 	err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).portid);
3303 errout:
3304 	return err;
3305 }
3306 
3307 void inet6_rt_notify(int event, struct rt6_info *rt, struct nl_info *info,
3308 		     unsigned int nlm_flags)
3309 {
3310 	struct sk_buff *skb;
3311 	struct net *net = info->nl_net;
3312 	u32 seq;
3313 	int err;
3314 
3315 	err = -ENOBUFS;
3316 	seq = info->nlh ? info->nlh->nlmsg_seq : 0;
3317 
3318 	skb = nlmsg_new(rt6_nlmsg_size(rt), gfp_any());
3319 	if (!skb)
3320 		goto errout;
3321 
3322 	err = rt6_fill_node(net, skb, rt, NULL, NULL, 0,
3323 				event, info->portid, seq, 0, 0, nlm_flags);
3324 	if (err < 0) {
3325 		/* -EMSGSIZE implies BUG in rt6_nlmsg_size() */
3326 		WARN_ON(err == -EMSGSIZE);
3327 		kfree_skb(skb);
3328 		goto errout;
3329 	}
3330 	rtnl_notify(skb, net, info->portid, RTNLGRP_IPV6_ROUTE,
3331 		    info->nlh, gfp_any());
3332 	return;
3333 errout:
3334 	if (err < 0)
3335 		rtnl_set_sk_err(net, RTNLGRP_IPV6_ROUTE, err);
3336 }
3337 
3338 static int ip6_route_dev_notify(struct notifier_block *this,
3339 				unsigned long event, void *ptr)
3340 {
3341 	struct net_device *dev = netdev_notifier_info_to_dev(ptr);
3342 	struct net *net = dev_net(dev);
3343 
3344 	if (event == NETDEV_REGISTER && (dev->flags & IFF_LOOPBACK)) {
3345 		net->ipv6.ip6_null_entry->dst.dev = dev;
3346 		net->ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(dev);
3347 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
3348 		net->ipv6.ip6_prohibit_entry->dst.dev = dev;
3349 		net->ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(dev);
3350 		net->ipv6.ip6_blk_hole_entry->dst.dev = dev;
3351 		net->ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(dev);
3352 #endif
3353 	}
3354 
3355 	return NOTIFY_OK;
3356 }
3357 
3358 /*
3359  *	/proc
3360  */
3361 
3362 #ifdef CONFIG_PROC_FS
3363 
3364 static const struct file_operations ipv6_route_proc_fops = {
3365 	.owner		= THIS_MODULE,
3366 	.open		= ipv6_route_open,
3367 	.read		= seq_read,
3368 	.llseek		= seq_lseek,
3369 	.release	= seq_release_net,
3370 };
3371 
3372 static int rt6_stats_seq_show(struct seq_file *seq, void *v)
3373 {
3374 	struct net *net = (struct net *)seq->private;
3375 	seq_printf(seq, "%04x %04x %04x %04x %04x %04x %04x\n",
3376 		   net->ipv6.rt6_stats->fib_nodes,
3377 		   net->ipv6.rt6_stats->fib_route_nodes,
3378 		   net->ipv6.rt6_stats->fib_rt_alloc,
3379 		   net->ipv6.rt6_stats->fib_rt_entries,
3380 		   net->ipv6.rt6_stats->fib_rt_cache,
3381 		   dst_entries_get_slow(&net->ipv6.ip6_dst_ops),
3382 		   net->ipv6.rt6_stats->fib_discarded_routes);
3383 
3384 	return 0;
3385 }
3386 
3387 static int rt6_stats_seq_open(struct inode *inode, struct file *file)
3388 {
3389 	return single_open_net(inode, file, rt6_stats_seq_show);
3390 }
3391 
3392 static const struct file_operations rt6_stats_seq_fops = {
3393 	.owner	 = THIS_MODULE,
3394 	.open	 = rt6_stats_seq_open,
3395 	.read	 = seq_read,
3396 	.llseek	 = seq_lseek,
3397 	.release = single_release_net,
3398 };
3399 #endif	/* CONFIG_PROC_FS */
3400 
3401 #ifdef CONFIG_SYSCTL
3402 
3403 static
3404 int ipv6_sysctl_rtcache_flush(struct ctl_table *ctl, int write,
3405 			      void __user *buffer, size_t *lenp, loff_t *ppos)
3406 {
3407 	struct net *net;
3408 	int delay;
3409 	if (!write)
3410 		return -EINVAL;
3411 
3412 	net = (struct net *)ctl->extra1;
3413 	delay = net->ipv6.sysctl.flush_delay;
3414 	proc_dointvec(ctl, write, buffer, lenp, ppos);
3415 	fib6_run_gc(delay <= 0 ? 0 : (unsigned long)delay, net, delay > 0);
3416 	return 0;
3417 }
3418 
3419 struct ctl_table ipv6_route_table_template[] = {
3420 	{
3421 		.procname	=	"flush",
3422 		.data		=	&init_net.ipv6.sysctl.flush_delay,
3423 		.maxlen		=	sizeof(int),
3424 		.mode		=	0200,
3425 		.proc_handler	=	ipv6_sysctl_rtcache_flush
3426 	},
3427 	{
3428 		.procname	=	"gc_thresh",
3429 		.data		=	&ip6_dst_ops_template.gc_thresh,
3430 		.maxlen		=	sizeof(int),
3431 		.mode		=	0644,
3432 		.proc_handler	=	proc_dointvec,
3433 	},
3434 	{
3435 		.procname	=	"max_size",
3436 		.data		=	&init_net.ipv6.sysctl.ip6_rt_max_size,
3437 		.maxlen		=	sizeof(int),
3438 		.mode		=	0644,
3439 		.proc_handler	=	proc_dointvec,
3440 	},
3441 	{
3442 		.procname	=	"gc_min_interval",
3443 		.data		=	&init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
3444 		.maxlen		=	sizeof(int),
3445 		.mode		=	0644,
3446 		.proc_handler	=	proc_dointvec_jiffies,
3447 	},
3448 	{
3449 		.procname	=	"gc_timeout",
3450 		.data		=	&init_net.ipv6.sysctl.ip6_rt_gc_timeout,
3451 		.maxlen		=	sizeof(int),
3452 		.mode		=	0644,
3453 		.proc_handler	=	proc_dointvec_jiffies,
3454 	},
3455 	{
3456 		.procname	=	"gc_interval",
3457 		.data		=	&init_net.ipv6.sysctl.ip6_rt_gc_interval,
3458 		.maxlen		=	sizeof(int),
3459 		.mode		=	0644,
3460 		.proc_handler	=	proc_dointvec_jiffies,
3461 	},
3462 	{
3463 		.procname	=	"gc_elasticity",
3464 		.data		=	&init_net.ipv6.sysctl.ip6_rt_gc_elasticity,
3465 		.maxlen		=	sizeof(int),
3466 		.mode		=	0644,
3467 		.proc_handler	=	proc_dointvec,
3468 	},
3469 	{
3470 		.procname	=	"mtu_expires",
3471 		.data		=	&init_net.ipv6.sysctl.ip6_rt_mtu_expires,
3472 		.maxlen		=	sizeof(int),
3473 		.mode		=	0644,
3474 		.proc_handler	=	proc_dointvec_jiffies,
3475 	},
3476 	{
3477 		.procname	=	"min_adv_mss",
3478 		.data		=	&init_net.ipv6.sysctl.ip6_rt_min_advmss,
3479 		.maxlen		=	sizeof(int),
3480 		.mode		=	0644,
3481 		.proc_handler	=	proc_dointvec,
3482 	},
3483 	{
3484 		.procname	=	"gc_min_interval_ms",
3485 		.data		=	&init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
3486 		.maxlen		=	sizeof(int),
3487 		.mode		=	0644,
3488 		.proc_handler	=	proc_dointvec_ms_jiffies,
3489 	},
3490 	{ }
3491 };
3492 
3493 struct ctl_table * __net_init ipv6_route_sysctl_init(struct net *net)
3494 {
3495 	struct ctl_table *table;
3496 
3497 	table = kmemdup(ipv6_route_table_template,
3498 			sizeof(ipv6_route_table_template),
3499 			GFP_KERNEL);
3500 
3501 	if (table) {
3502 		table[0].data = &net->ipv6.sysctl.flush_delay;
3503 		table[0].extra1 = net;
3504 		table[1].data = &net->ipv6.ip6_dst_ops.gc_thresh;
3505 		table[2].data = &net->ipv6.sysctl.ip6_rt_max_size;
3506 		table[3].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
3507 		table[4].data = &net->ipv6.sysctl.ip6_rt_gc_timeout;
3508 		table[5].data = &net->ipv6.sysctl.ip6_rt_gc_interval;
3509 		table[6].data = &net->ipv6.sysctl.ip6_rt_gc_elasticity;
3510 		table[7].data = &net->ipv6.sysctl.ip6_rt_mtu_expires;
3511 		table[8].data = &net->ipv6.sysctl.ip6_rt_min_advmss;
3512 		table[9].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
3513 
3514 		/* Don't export sysctls to unprivileged users */
3515 		if (net->user_ns != &init_user_ns)
3516 			table[0].procname = NULL;
3517 	}
3518 
3519 	return table;
3520 }
3521 #endif
3522 
3523 static int __net_init ip6_route_net_init(struct net *net)
3524 {
3525 	int ret = -ENOMEM;
3526 
3527 	memcpy(&net->ipv6.ip6_dst_ops, &ip6_dst_ops_template,
3528 	       sizeof(net->ipv6.ip6_dst_ops));
3529 
3530 	if (dst_entries_init(&net->ipv6.ip6_dst_ops) < 0)
3531 		goto out_ip6_dst_ops;
3532 
3533 	net->ipv6.ip6_null_entry = kmemdup(&ip6_null_entry_template,
3534 					   sizeof(*net->ipv6.ip6_null_entry),
3535 					   GFP_KERNEL);
3536 	if (!net->ipv6.ip6_null_entry)
3537 		goto out_ip6_dst_entries;
3538 	net->ipv6.ip6_null_entry->dst.path =
3539 		(struct dst_entry *)net->ipv6.ip6_null_entry;
3540 	net->ipv6.ip6_null_entry->dst.ops = &net->ipv6.ip6_dst_ops;
3541 	dst_init_metrics(&net->ipv6.ip6_null_entry->dst,
3542 			 ip6_template_metrics, true);
3543 
3544 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
3545 	net->ipv6.ip6_prohibit_entry = kmemdup(&ip6_prohibit_entry_template,
3546 					       sizeof(*net->ipv6.ip6_prohibit_entry),
3547 					       GFP_KERNEL);
3548 	if (!net->ipv6.ip6_prohibit_entry)
3549 		goto out_ip6_null_entry;
3550 	net->ipv6.ip6_prohibit_entry->dst.path =
3551 		(struct dst_entry *)net->ipv6.ip6_prohibit_entry;
3552 	net->ipv6.ip6_prohibit_entry->dst.ops = &net->ipv6.ip6_dst_ops;
3553 	dst_init_metrics(&net->ipv6.ip6_prohibit_entry->dst,
3554 			 ip6_template_metrics, true);
3555 
3556 	net->ipv6.ip6_blk_hole_entry = kmemdup(&ip6_blk_hole_entry_template,
3557 					       sizeof(*net->ipv6.ip6_blk_hole_entry),
3558 					       GFP_KERNEL);
3559 	if (!net->ipv6.ip6_blk_hole_entry)
3560 		goto out_ip6_prohibit_entry;
3561 	net->ipv6.ip6_blk_hole_entry->dst.path =
3562 		(struct dst_entry *)net->ipv6.ip6_blk_hole_entry;
3563 	net->ipv6.ip6_blk_hole_entry->dst.ops = &net->ipv6.ip6_dst_ops;
3564 	dst_init_metrics(&net->ipv6.ip6_blk_hole_entry->dst,
3565 			 ip6_template_metrics, true);
3566 #endif
3567 
3568 	net->ipv6.sysctl.flush_delay = 0;
3569 	net->ipv6.sysctl.ip6_rt_max_size = 4096;
3570 	net->ipv6.sysctl.ip6_rt_gc_min_interval = HZ / 2;
3571 	net->ipv6.sysctl.ip6_rt_gc_timeout = 60*HZ;
3572 	net->ipv6.sysctl.ip6_rt_gc_interval = 30*HZ;
3573 	net->ipv6.sysctl.ip6_rt_gc_elasticity = 9;
3574 	net->ipv6.sysctl.ip6_rt_mtu_expires = 10*60*HZ;
3575 	net->ipv6.sysctl.ip6_rt_min_advmss = IPV6_MIN_MTU - 20 - 40;
3576 
3577 	net->ipv6.ip6_rt_gc_expire = 30*HZ;
3578 
3579 	ret = 0;
3580 out:
3581 	return ret;
3582 
3583 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
3584 out_ip6_prohibit_entry:
3585 	kfree(net->ipv6.ip6_prohibit_entry);
3586 out_ip6_null_entry:
3587 	kfree(net->ipv6.ip6_null_entry);
3588 #endif
3589 out_ip6_dst_entries:
3590 	dst_entries_destroy(&net->ipv6.ip6_dst_ops);
3591 out_ip6_dst_ops:
3592 	goto out;
3593 }
3594 
3595 static void __net_exit ip6_route_net_exit(struct net *net)
3596 {
3597 	kfree(net->ipv6.ip6_null_entry);
3598 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
3599 	kfree(net->ipv6.ip6_prohibit_entry);
3600 	kfree(net->ipv6.ip6_blk_hole_entry);
3601 #endif
3602 	dst_entries_destroy(&net->ipv6.ip6_dst_ops);
3603 }
3604 
3605 static int __net_init ip6_route_net_init_late(struct net *net)
3606 {
3607 #ifdef CONFIG_PROC_FS
3608 	proc_create("ipv6_route", 0, net->proc_net, &ipv6_route_proc_fops);
3609 	proc_create("rt6_stats", S_IRUGO, net->proc_net, &rt6_stats_seq_fops);
3610 #endif
3611 	return 0;
3612 }
3613 
3614 static void __net_exit ip6_route_net_exit_late(struct net *net)
3615 {
3616 #ifdef CONFIG_PROC_FS
3617 	remove_proc_entry("ipv6_route", net->proc_net);
3618 	remove_proc_entry("rt6_stats", net->proc_net);
3619 #endif
3620 }
3621 
3622 static struct pernet_operations ip6_route_net_ops = {
3623 	.init = ip6_route_net_init,
3624 	.exit = ip6_route_net_exit,
3625 };
3626 
3627 static int __net_init ipv6_inetpeer_init(struct net *net)
3628 {
3629 	struct inet_peer_base *bp = kmalloc(sizeof(*bp), GFP_KERNEL);
3630 
3631 	if (!bp)
3632 		return -ENOMEM;
3633 	inet_peer_base_init(bp);
3634 	net->ipv6.peers = bp;
3635 	return 0;
3636 }
3637 
3638 static void __net_exit ipv6_inetpeer_exit(struct net *net)
3639 {
3640 	struct inet_peer_base *bp = net->ipv6.peers;
3641 
3642 	net->ipv6.peers = NULL;
3643 	inetpeer_invalidate_tree(bp);
3644 	kfree(bp);
3645 }
3646 
3647 static struct pernet_operations ipv6_inetpeer_ops = {
3648 	.init	=	ipv6_inetpeer_init,
3649 	.exit	=	ipv6_inetpeer_exit,
3650 };
3651 
3652 static struct pernet_operations ip6_route_net_late_ops = {
3653 	.init = ip6_route_net_init_late,
3654 	.exit = ip6_route_net_exit_late,
3655 };
3656 
3657 static struct notifier_block ip6_route_dev_notifier = {
3658 	.notifier_call = ip6_route_dev_notify,
3659 	.priority = 0,
3660 };
3661 
3662 int __init ip6_route_init(void)
3663 {
3664 	int ret;
3665 	int cpu;
3666 
3667 	ret = -ENOMEM;
3668 	ip6_dst_ops_template.kmem_cachep =
3669 		kmem_cache_create("ip6_dst_cache", sizeof(struct rt6_info), 0,
3670 				  SLAB_HWCACHE_ALIGN, NULL);
3671 	if (!ip6_dst_ops_template.kmem_cachep)
3672 		goto out;
3673 
3674 	ret = dst_entries_init(&ip6_dst_blackhole_ops);
3675 	if (ret)
3676 		goto out_kmem_cache;
3677 
3678 	ret = register_pernet_subsys(&ipv6_inetpeer_ops);
3679 	if (ret)
3680 		goto out_dst_entries;
3681 
3682 	ret = register_pernet_subsys(&ip6_route_net_ops);
3683 	if (ret)
3684 		goto out_register_inetpeer;
3685 
3686 	ip6_dst_blackhole_ops.kmem_cachep = ip6_dst_ops_template.kmem_cachep;
3687 
3688 	/* Registering of the loopback is done before this portion of code,
3689 	 * the loopback reference in rt6_info will not be taken, do it
3690 	 * manually for init_net */
3691 	init_net.ipv6.ip6_null_entry->dst.dev = init_net.loopback_dev;
3692 	init_net.ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
3693   #ifdef CONFIG_IPV6_MULTIPLE_TABLES
3694 	init_net.ipv6.ip6_prohibit_entry->dst.dev = init_net.loopback_dev;
3695 	init_net.ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
3696 	init_net.ipv6.ip6_blk_hole_entry->dst.dev = init_net.loopback_dev;
3697 	init_net.ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
3698   #endif
3699 	ret = fib6_init();
3700 	if (ret)
3701 		goto out_register_subsys;
3702 
3703 	ret = xfrm6_init();
3704 	if (ret)
3705 		goto out_fib6_init;
3706 
3707 	ret = fib6_rules_init();
3708 	if (ret)
3709 		goto xfrm6_init;
3710 
3711 	ret = register_pernet_subsys(&ip6_route_net_late_ops);
3712 	if (ret)
3713 		goto fib6_rules_init;
3714 
3715 	ret = -ENOBUFS;
3716 	if (__rtnl_register(PF_INET6, RTM_NEWROUTE, inet6_rtm_newroute, NULL, NULL) ||
3717 	    __rtnl_register(PF_INET6, RTM_DELROUTE, inet6_rtm_delroute, NULL, NULL) ||
3718 	    __rtnl_register(PF_INET6, RTM_GETROUTE, inet6_rtm_getroute, NULL, NULL))
3719 		goto out_register_late_subsys;
3720 
3721 	ret = register_netdevice_notifier(&ip6_route_dev_notifier);
3722 	if (ret)
3723 		goto out_register_late_subsys;
3724 
3725 	for_each_possible_cpu(cpu) {
3726 		struct uncached_list *ul = per_cpu_ptr(&rt6_uncached_list, cpu);
3727 
3728 		INIT_LIST_HEAD(&ul->head);
3729 		spin_lock_init(&ul->lock);
3730 	}
3731 
3732 out:
3733 	return ret;
3734 
3735 out_register_late_subsys:
3736 	unregister_pernet_subsys(&ip6_route_net_late_ops);
3737 fib6_rules_init:
3738 	fib6_rules_cleanup();
3739 xfrm6_init:
3740 	xfrm6_fini();
3741 out_fib6_init:
3742 	fib6_gc_cleanup();
3743 out_register_subsys:
3744 	unregister_pernet_subsys(&ip6_route_net_ops);
3745 out_register_inetpeer:
3746 	unregister_pernet_subsys(&ipv6_inetpeer_ops);
3747 out_dst_entries:
3748 	dst_entries_destroy(&ip6_dst_blackhole_ops);
3749 out_kmem_cache:
3750 	kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);
3751 	goto out;
3752 }
3753 
3754 void ip6_route_cleanup(void)
3755 {
3756 	unregister_netdevice_notifier(&ip6_route_dev_notifier);
3757 	unregister_pernet_subsys(&ip6_route_net_late_ops);
3758 	fib6_rules_cleanup();
3759 	xfrm6_fini();
3760 	fib6_gc_cleanup();
3761 	unregister_pernet_subsys(&ipv6_inetpeer_ops);
3762 	unregister_pernet_subsys(&ip6_route_net_ops);
3763 	dst_entries_destroy(&ip6_dst_blackhole_ops);
3764 	kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);
3765 }
3766