xref: /linux/net/ipv6/route.c (revision 89e47d3b8a273b0eac21e4bf6d7fdb86b654fa16)
1 /*
2  *	Linux INET6 implementation
3  *	FIB front-end.
4  *
5  *	Authors:
6  *	Pedro Roque		<roque@di.fc.ul.pt>
7  *
8  *	This program is free software; you can redistribute it and/or
9  *      modify it under the terms of the GNU General Public License
10  *      as published by the Free Software Foundation; either version
11  *      2 of the License, or (at your option) any later version.
12  */
13 
14 /*	Changes:
15  *
16  *	YOSHIFUJI Hideaki @USAGI
17  *		reworked default router selection.
18  *		- respect outgoing interface
19  *		- select from (probably) reachable routers (i.e.
20  *		routers in REACHABLE, STALE, DELAY or PROBE states).
21  *		- always select the same router if it is (probably)
22  *		reachable.  otherwise, round-robin the list.
23  *	Ville Nuorvala
24  *		Fixed routing subtrees.
25  */
26 
27 #define pr_fmt(fmt) "IPv6: " fmt
28 
29 #include <linux/capability.h>
30 #include <linux/errno.h>
31 #include <linux/export.h>
32 #include <linux/types.h>
33 #include <linux/times.h>
34 #include <linux/socket.h>
35 #include <linux/sockios.h>
36 #include <linux/net.h>
37 #include <linux/route.h>
38 #include <linux/netdevice.h>
39 #include <linux/in6.h>
40 #include <linux/mroute6.h>
41 #include <linux/init.h>
42 #include <linux/if_arp.h>
43 #include <linux/proc_fs.h>
44 #include <linux/seq_file.h>
45 #include <linux/nsproxy.h>
46 #include <linux/slab.h>
47 #include <net/net_namespace.h>
48 #include <net/snmp.h>
49 #include <net/ipv6.h>
50 #include <net/ip6_fib.h>
51 #include <net/ip6_route.h>
52 #include <net/ndisc.h>
53 #include <net/addrconf.h>
54 #include <net/tcp.h>
55 #include <linux/rtnetlink.h>
56 #include <net/dst.h>
57 #include <net/xfrm.h>
58 #include <net/netevent.h>
59 #include <net/netlink.h>
60 #include <net/nexthop.h>
61 
62 #include <asm/uaccess.h>
63 
64 #ifdef CONFIG_SYSCTL
65 #include <linux/sysctl.h>
66 #endif
67 
68 enum rt6_nud_state {
69 	RT6_NUD_FAIL_HARD = -3,
70 	RT6_NUD_FAIL_PROBE = -2,
71 	RT6_NUD_FAIL_DO_RR = -1,
72 	RT6_NUD_SUCCEED = 1
73 };
74 
75 static struct rt6_info *ip6_rt_copy(struct rt6_info *ort,
76 				    const struct in6_addr *dest);
77 static struct dst_entry	*ip6_dst_check(struct dst_entry *dst, u32 cookie);
78 static unsigned int	 ip6_default_advmss(const struct dst_entry *dst);
79 static unsigned int	 ip6_mtu(const struct dst_entry *dst);
80 static struct dst_entry *ip6_negative_advice(struct dst_entry *);
81 static void		ip6_dst_destroy(struct dst_entry *);
82 static void		ip6_dst_ifdown(struct dst_entry *,
83 				       struct net_device *dev, int how);
84 static int		 ip6_dst_gc(struct dst_ops *ops);
85 
86 static int		ip6_pkt_discard(struct sk_buff *skb);
87 static int		ip6_pkt_discard_out(struct sk_buff *skb);
88 static int		ip6_pkt_prohibit(struct sk_buff *skb);
89 static int		ip6_pkt_prohibit_out(struct sk_buff *skb);
90 static void		ip6_link_failure(struct sk_buff *skb);
91 static void		ip6_rt_update_pmtu(struct dst_entry *dst, struct sock *sk,
92 					   struct sk_buff *skb, u32 mtu);
93 static void		rt6_do_redirect(struct dst_entry *dst, struct sock *sk,
94 					struct sk_buff *skb);
95 static int rt6_score_route(struct rt6_info *rt, int oif, int strict);
96 
97 #ifdef CONFIG_IPV6_ROUTE_INFO
98 static struct rt6_info *rt6_add_route_info(struct net *net,
99 					   const struct in6_addr *prefix, int prefixlen,
100 					   const struct in6_addr *gwaddr, int ifindex,
101 					   unsigned int pref);
102 static struct rt6_info *rt6_get_route_info(struct net *net,
103 					   const struct in6_addr *prefix, int prefixlen,
104 					   const struct in6_addr *gwaddr, int ifindex);
105 #endif
106 
107 static u32 *ipv6_cow_metrics(struct dst_entry *dst, unsigned long old)
108 {
109 	struct rt6_info *rt = (struct rt6_info *) dst;
110 	struct inet_peer *peer;
111 	u32 *p = NULL;
112 
113 	if (!(rt->dst.flags & DST_HOST))
114 		return NULL;
115 
116 	peer = rt6_get_peer_create(rt);
117 	if (peer) {
118 		u32 *old_p = __DST_METRICS_PTR(old);
119 		unsigned long prev, new;
120 
121 		p = peer->metrics;
122 		if (inet_metrics_new(peer))
123 			memcpy(p, old_p, sizeof(u32) * RTAX_MAX);
124 
125 		new = (unsigned long) p;
126 		prev = cmpxchg(&dst->_metrics, old, new);
127 
128 		if (prev != old) {
129 			p = __DST_METRICS_PTR(prev);
130 			if (prev & DST_METRICS_READ_ONLY)
131 				p = NULL;
132 		}
133 	}
134 	return p;
135 }
136 
137 static inline const void *choose_neigh_daddr(struct rt6_info *rt,
138 					     struct sk_buff *skb,
139 					     const void *daddr)
140 {
141 	struct in6_addr *p = &rt->rt6i_gateway;
142 
143 	if (!ipv6_addr_any(p))
144 		return (const void *) p;
145 	else if (skb)
146 		return &ipv6_hdr(skb)->daddr;
147 	return daddr;
148 }
149 
150 static struct neighbour *ip6_neigh_lookup(const struct dst_entry *dst,
151 					  struct sk_buff *skb,
152 					  const void *daddr)
153 {
154 	struct rt6_info *rt = (struct rt6_info *) dst;
155 	struct neighbour *n;
156 
157 	daddr = choose_neigh_daddr(rt, skb, daddr);
158 	n = __ipv6_neigh_lookup(dst->dev, daddr);
159 	if (n)
160 		return n;
161 	return neigh_create(&nd_tbl, daddr, dst->dev);
162 }
163 
164 static struct dst_ops ip6_dst_ops_template = {
165 	.family			=	AF_INET6,
166 	.protocol		=	cpu_to_be16(ETH_P_IPV6),
167 	.gc			=	ip6_dst_gc,
168 	.gc_thresh		=	1024,
169 	.check			=	ip6_dst_check,
170 	.default_advmss		=	ip6_default_advmss,
171 	.mtu			=	ip6_mtu,
172 	.cow_metrics		=	ipv6_cow_metrics,
173 	.destroy		=	ip6_dst_destroy,
174 	.ifdown			=	ip6_dst_ifdown,
175 	.negative_advice	=	ip6_negative_advice,
176 	.link_failure		=	ip6_link_failure,
177 	.update_pmtu		=	ip6_rt_update_pmtu,
178 	.redirect		=	rt6_do_redirect,
179 	.local_out		=	__ip6_local_out,
180 	.neigh_lookup		=	ip6_neigh_lookup,
181 };
182 
183 static unsigned int ip6_blackhole_mtu(const struct dst_entry *dst)
184 {
185 	unsigned int mtu = dst_metric_raw(dst, RTAX_MTU);
186 
187 	return mtu ? : dst->dev->mtu;
188 }
189 
190 static void ip6_rt_blackhole_update_pmtu(struct dst_entry *dst, struct sock *sk,
191 					 struct sk_buff *skb, u32 mtu)
192 {
193 }
194 
195 static void ip6_rt_blackhole_redirect(struct dst_entry *dst, struct sock *sk,
196 				      struct sk_buff *skb)
197 {
198 }
199 
200 static u32 *ip6_rt_blackhole_cow_metrics(struct dst_entry *dst,
201 					 unsigned long old)
202 {
203 	return NULL;
204 }
205 
206 static struct dst_ops ip6_dst_blackhole_ops = {
207 	.family			=	AF_INET6,
208 	.protocol		=	cpu_to_be16(ETH_P_IPV6),
209 	.destroy		=	ip6_dst_destroy,
210 	.check			=	ip6_dst_check,
211 	.mtu			=	ip6_blackhole_mtu,
212 	.default_advmss		=	ip6_default_advmss,
213 	.update_pmtu		=	ip6_rt_blackhole_update_pmtu,
214 	.redirect		=	ip6_rt_blackhole_redirect,
215 	.cow_metrics		=	ip6_rt_blackhole_cow_metrics,
216 	.neigh_lookup		=	ip6_neigh_lookup,
217 };
218 
219 static const u32 ip6_template_metrics[RTAX_MAX] = {
220 	[RTAX_HOPLIMIT - 1] = 0,
221 };
222 
223 static const struct rt6_info ip6_null_entry_template = {
224 	.dst = {
225 		.__refcnt	= ATOMIC_INIT(1),
226 		.__use		= 1,
227 		.obsolete	= DST_OBSOLETE_FORCE_CHK,
228 		.error		= -ENETUNREACH,
229 		.input		= ip6_pkt_discard,
230 		.output		= ip6_pkt_discard_out,
231 	},
232 	.rt6i_flags	= (RTF_REJECT | RTF_NONEXTHOP),
233 	.rt6i_protocol  = RTPROT_KERNEL,
234 	.rt6i_metric	= ~(u32) 0,
235 	.rt6i_ref	= ATOMIC_INIT(1),
236 };
237 
238 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
239 
240 static const struct rt6_info ip6_prohibit_entry_template = {
241 	.dst = {
242 		.__refcnt	= ATOMIC_INIT(1),
243 		.__use		= 1,
244 		.obsolete	= DST_OBSOLETE_FORCE_CHK,
245 		.error		= -EACCES,
246 		.input		= ip6_pkt_prohibit,
247 		.output		= ip6_pkt_prohibit_out,
248 	},
249 	.rt6i_flags	= (RTF_REJECT | RTF_NONEXTHOP),
250 	.rt6i_protocol  = RTPROT_KERNEL,
251 	.rt6i_metric	= ~(u32) 0,
252 	.rt6i_ref	= ATOMIC_INIT(1),
253 };
254 
255 static const struct rt6_info ip6_blk_hole_entry_template = {
256 	.dst = {
257 		.__refcnt	= ATOMIC_INIT(1),
258 		.__use		= 1,
259 		.obsolete	= DST_OBSOLETE_FORCE_CHK,
260 		.error		= -EINVAL,
261 		.input		= dst_discard,
262 		.output		= dst_discard,
263 	},
264 	.rt6i_flags	= (RTF_REJECT | RTF_NONEXTHOP),
265 	.rt6i_protocol  = RTPROT_KERNEL,
266 	.rt6i_metric	= ~(u32) 0,
267 	.rt6i_ref	= ATOMIC_INIT(1),
268 };
269 
270 #endif
271 
272 /* allocate dst with ip6_dst_ops */
273 static inline struct rt6_info *ip6_dst_alloc(struct net *net,
274 					     struct net_device *dev,
275 					     int flags,
276 					     struct fib6_table *table)
277 {
278 	struct rt6_info *rt = dst_alloc(&net->ipv6.ip6_dst_ops, dev,
279 					0, DST_OBSOLETE_FORCE_CHK, flags);
280 
281 	if (rt) {
282 		struct dst_entry *dst = &rt->dst;
283 
284 		memset(dst + 1, 0, sizeof(*rt) - sizeof(*dst));
285 		rt6_init_peer(rt, table ? &table->tb6_peers : net->ipv6.peers);
286 		rt->rt6i_genid = rt_genid_ipv6(net);
287 		INIT_LIST_HEAD(&rt->rt6i_siblings);
288 	}
289 	return rt;
290 }
291 
292 static void ip6_dst_destroy(struct dst_entry *dst)
293 {
294 	struct rt6_info *rt = (struct rt6_info *)dst;
295 	struct inet6_dev *idev = rt->rt6i_idev;
296 	struct dst_entry *from = dst->from;
297 
298 	if (!(rt->dst.flags & DST_HOST))
299 		dst_destroy_metrics_generic(dst);
300 
301 	if (idev) {
302 		rt->rt6i_idev = NULL;
303 		in6_dev_put(idev);
304 	}
305 
306 	dst->from = NULL;
307 	dst_release(from);
308 
309 	if (rt6_has_peer(rt)) {
310 		struct inet_peer *peer = rt6_peer_ptr(rt);
311 		inet_putpeer(peer);
312 	}
313 }
314 
315 void rt6_bind_peer(struct rt6_info *rt, int create)
316 {
317 	struct inet_peer_base *base;
318 	struct inet_peer *peer;
319 
320 	base = inetpeer_base_ptr(rt->_rt6i_peer);
321 	if (!base)
322 		return;
323 
324 	peer = inet_getpeer_v6(base, &rt->rt6i_dst.addr, create);
325 	if (peer) {
326 		if (!rt6_set_peer(rt, peer))
327 			inet_putpeer(peer);
328 	}
329 }
330 
331 static void ip6_dst_ifdown(struct dst_entry *dst, struct net_device *dev,
332 			   int how)
333 {
334 	struct rt6_info *rt = (struct rt6_info *)dst;
335 	struct inet6_dev *idev = rt->rt6i_idev;
336 	struct net_device *loopback_dev =
337 		dev_net(dev)->loopback_dev;
338 
339 	if (dev != loopback_dev) {
340 		if (idev && idev->dev == dev) {
341 			struct inet6_dev *loopback_idev =
342 				in6_dev_get(loopback_dev);
343 			if (loopback_idev) {
344 				rt->rt6i_idev = loopback_idev;
345 				in6_dev_put(idev);
346 			}
347 		}
348 	}
349 }
350 
351 static bool rt6_check_expired(const struct rt6_info *rt)
352 {
353 	if (rt->rt6i_flags & RTF_EXPIRES) {
354 		if (time_after(jiffies, rt->dst.expires))
355 			return true;
356 	} else if (rt->dst.from) {
357 		return rt6_check_expired((struct rt6_info *) rt->dst.from);
358 	}
359 	return false;
360 }
361 
362 static bool rt6_need_strict(const struct in6_addr *daddr)
363 {
364 	return ipv6_addr_type(daddr) &
365 		(IPV6_ADDR_MULTICAST | IPV6_ADDR_LINKLOCAL | IPV6_ADDR_LOOPBACK);
366 }
367 
368 /* Multipath route selection:
369  *   Hash based function using packet header and flowlabel.
370  * Adapted from fib_info_hashfn()
371  */
372 static int rt6_info_hash_nhsfn(unsigned int candidate_count,
373 			       const struct flowi6 *fl6)
374 {
375 	unsigned int val = fl6->flowi6_proto;
376 
377 	val ^= ipv6_addr_hash(&fl6->daddr);
378 	val ^= ipv6_addr_hash(&fl6->saddr);
379 
380 	/* Work only if this not encapsulated */
381 	switch (fl6->flowi6_proto) {
382 	case IPPROTO_UDP:
383 	case IPPROTO_TCP:
384 	case IPPROTO_SCTP:
385 		val ^= (__force u16)fl6->fl6_sport;
386 		val ^= (__force u16)fl6->fl6_dport;
387 		break;
388 
389 	case IPPROTO_ICMPV6:
390 		val ^= (__force u16)fl6->fl6_icmp_type;
391 		val ^= (__force u16)fl6->fl6_icmp_code;
392 		break;
393 	}
394 	/* RFC6438 recommands to use flowlabel */
395 	val ^= (__force u32)fl6->flowlabel;
396 
397 	/* Perhaps, we need to tune, this function? */
398 	val = val ^ (val >> 7) ^ (val >> 12);
399 	return val % candidate_count;
400 }
401 
402 static struct rt6_info *rt6_multipath_select(struct rt6_info *match,
403 					     struct flowi6 *fl6, int oif,
404 					     int strict)
405 {
406 	struct rt6_info *sibling, *next_sibling;
407 	int route_choosen;
408 
409 	route_choosen = rt6_info_hash_nhsfn(match->rt6i_nsiblings + 1, fl6);
410 	/* Don't change the route, if route_choosen == 0
411 	 * (siblings does not include ourself)
412 	 */
413 	if (route_choosen)
414 		list_for_each_entry_safe(sibling, next_sibling,
415 				&match->rt6i_siblings, rt6i_siblings) {
416 			route_choosen--;
417 			if (route_choosen == 0) {
418 				if (rt6_score_route(sibling, oif, strict) < 0)
419 					break;
420 				match = sibling;
421 				break;
422 			}
423 		}
424 	return match;
425 }
426 
427 /*
428  *	Route lookup. Any table->tb6_lock is implied.
429  */
430 
431 static inline struct rt6_info *rt6_device_match(struct net *net,
432 						    struct rt6_info *rt,
433 						    const struct in6_addr *saddr,
434 						    int oif,
435 						    int flags)
436 {
437 	struct rt6_info *local = NULL;
438 	struct rt6_info *sprt;
439 
440 	if (!oif && ipv6_addr_any(saddr))
441 		goto out;
442 
443 	for (sprt = rt; sprt; sprt = sprt->dst.rt6_next) {
444 		struct net_device *dev = sprt->dst.dev;
445 
446 		if (oif) {
447 			if (dev->ifindex == oif)
448 				return sprt;
449 			if (dev->flags & IFF_LOOPBACK) {
450 				if (!sprt->rt6i_idev ||
451 				    sprt->rt6i_idev->dev->ifindex != oif) {
452 					if (flags & RT6_LOOKUP_F_IFACE && oif)
453 						continue;
454 					if (local && (!oif ||
455 						      local->rt6i_idev->dev->ifindex == oif))
456 						continue;
457 				}
458 				local = sprt;
459 			}
460 		} else {
461 			if (ipv6_chk_addr(net, saddr, dev,
462 					  flags & RT6_LOOKUP_F_IFACE))
463 				return sprt;
464 		}
465 	}
466 
467 	if (oif) {
468 		if (local)
469 			return local;
470 
471 		if (flags & RT6_LOOKUP_F_IFACE)
472 			return net->ipv6.ip6_null_entry;
473 	}
474 out:
475 	return rt;
476 }
477 
478 #ifdef CONFIG_IPV6_ROUTER_PREF
479 struct __rt6_probe_work {
480 	struct work_struct work;
481 	struct in6_addr target;
482 	struct net_device *dev;
483 };
484 
485 static void rt6_probe_deferred(struct work_struct *w)
486 {
487 	struct in6_addr mcaddr;
488 	struct __rt6_probe_work *work =
489 		container_of(w, struct __rt6_probe_work, work);
490 
491 	addrconf_addr_solict_mult(&work->target, &mcaddr);
492 	ndisc_send_ns(work->dev, NULL, &work->target, &mcaddr, NULL);
493 	dev_put(work->dev);
494 	kfree(w);
495 }
496 
497 static void rt6_probe(struct rt6_info *rt)
498 {
499 	struct neighbour *neigh;
500 	/*
501 	 * Okay, this does not seem to be appropriate
502 	 * for now, however, we need to check if it
503 	 * is really so; aka Router Reachability Probing.
504 	 *
505 	 * Router Reachability Probe MUST be rate-limited
506 	 * to no more than one per minute.
507 	 */
508 	if (!rt || !(rt->rt6i_flags & RTF_GATEWAY))
509 		return;
510 	rcu_read_lock_bh();
511 	neigh = __ipv6_neigh_lookup_noref(rt->dst.dev, &rt->rt6i_gateway);
512 	if (neigh) {
513 		write_lock(&neigh->lock);
514 		if (neigh->nud_state & NUD_VALID)
515 			goto out;
516 	}
517 
518 	if (!neigh ||
519 	    time_after(jiffies, neigh->updated + rt->rt6i_idev->cnf.rtr_probe_interval)) {
520 		struct __rt6_probe_work *work;
521 
522 		work = kmalloc(sizeof(*work), GFP_ATOMIC);
523 
524 		if (neigh && work)
525 			__neigh_set_probe_once(neigh);
526 
527 		if (neigh)
528 			write_unlock(&neigh->lock);
529 
530 		if (work) {
531 			INIT_WORK(&work->work, rt6_probe_deferred);
532 			work->target = rt->rt6i_gateway;
533 			dev_hold(rt->dst.dev);
534 			work->dev = rt->dst.dev;
535 			schedule_work(&work->work);
536 		}
537 	} else {
538 out:
539 		write_unlock(&neigh->lock);
540 	}
541 	rcu_read_unlock_bh();
542 }
543 #else
544 static inline void rt6_probe(struct rt6_info *rt)
545 {
546 }
547 #endif
548 
549 /*
550  * Default Router Selection (RFC 2461 6.3.6)
551  */
552 static inline int rt6_check_dev(struct rt6_info *rt, int oif)
553 {
554 	struct net_device *dev = rt->dst.dev;
555 	if (!oif || dev->ifindex == oif)
556 		return 2;
557 	if ((dev->flags & IFF_LOOPBACK) &&
558 	    rt->rt6i_idev && rt->rt6i_idev->dev->ifindex == oif)
559 		return 1;
560 	return 0;
561 }
562 
563 static inline enum rt6_nud_state rt6_check_neigh(struct rt6_info *rt)
564 {
565 	struct neighbour *neigh;
566 	enum rt6_nud_state ret = RT6_NUD_FAIL_HARD;
567 
568 	if (rt->rt6i_flags & RTF_NONEXTHOP ||
569 	    !(rt->rt6i_flags & RTF_GATEWAY))
570 		return RT6_NUD_SUCCEED;
571 
572 	rcu_read_lock_bh();
573 	neigh = __ipv6_neigh_lookup_noref(rt->dst.dev, &rt->rt6i_gateway);
574 	if (neigh) {
575 		read_lock(&neigh->lock);
576 		if (neigh->nud_state & NUD_VALID)
577 			ret = RT6_NUD_SUCCEED;
578 #ifdef CONFIG_IPV6_ROUTER_PREF
579 		else if (!(neigh->nud_state & NUD_FAILED))
580 			ret = RT6_NUD_SUCCEED;
581 		else
582 			ret = RT6_NUD_FAIL_PROBE;
583 #endif
584 		read_unlock(&neigh->lock);
585 	} else {
586 		ret = IS_ENABLED(CONFIG_IPV6_ROUTER_PREF) ?
587 		      RT6_NUD_SUCCEED : RT6_NUD_FAIL_DO_RR;
588 	}
589 	rcu_read_unlock_bh();
590 
591 	return ret;
592 }
593 
594 static int rt6_score_route(struct rt6_info *rt, int oif,
595 			   int strict)
596 {
597 	int m;
598 
599 	m = rt6_check_dev(rt, oif);
600 	if (!m && (strict & RT6_LOOKUP_F_IFACE))
601 		return RT6_NUD_FAIL_HARD;
602 #ifdef CONFIG_IPV6_ROUTER_PREF
603 	m |= IPV6_DECODE_PREF(IPV6_EXTRACT_PREF(rt->rt6i_flags)) << 2;
604 #endif
605 	if (strict & RT6_LOOKUP_F_REACHABLE) {
606 		int n = rt6_check_neigh(rt);
607 		if (n < 0)
608 			return n;
609 	}
610 	return m;
611 }
612 
613 static struct rt6_info *find_match(struct rt6_info *rt, int oif, int strict,
614 				   int *mpri, struct rt6_info *match,
615 				   bool *do_rr)
616 {
617 	int m;
618 	bool match_do_rr = false;
619 
620 	if (rt6_check_expired(rt))
621 		goto out;
622 
623 	m = rt6_score_route(rt, oif, strict);
624 	if (m == RT6_NUD_FAIL_DO_RR) {
625 		match_do_rr = true;
626 		m = 0; /* lowest valid score */
627 	} else if (m == RT6_NUD_FAIL_HARD) {
628 		goto out;
629 	}
630 
631 	if (strict & RT6_LOOKUP_F_REACHABLE)
632 		rt6_probe(rt);
633 
634 	/* note that m can be RT6_NUD_FAIL_PROBE at this point */
635 	if (m > *mpri) {
636 		*do_rr = match_do_rr;
637 		*mpri = m;
638 		match = rt;
639 	}
640 out:
641 	return match;
642 }
643 
644 static struct rt6_info *find_rr_leaf(struct fib6_node *fn,
645 				     struct rt6_info *rr_head,
646 				     u32 metric, int oif, int strict,
647 				     bool *do_rr)
648 {
649 	struct rt6_info *rt, *match;
650 	int mpri = -1;
651 
652 	match = NULL;
653 	for (rt = rr_head; rt && rt->rt6i_metric == metric;
654 	     rt = rt->dst.rt6_next)
655 		match = find_match(rt, oif, strict, &mpri, match, do_rr);
656 	for (rt = fn->leaf; rt && rt != rr_head && rt->rt6i_metric == metric;
657 	     rt = rt->dst.rt6_next)
658 		match = find_match(rt, oif, strict, &mpri, match, do_rr);
659 
660 	return match;
661 }
662 
663 static struct rt6_info *rt6_select(struct fib6_node *fn, int oif, int strict)
664 {
665 	struct rt6_info *match, *rt0;
666 	struct net *net;
667 	bool do_rr = false;
668 
669 	rt0 = fn->rr_ptr;
670 	if (!rt0)
671 		fn->rr_ptr = rt0 = fn->leaf;
672 
673 	match = find_rr_leaf(fn, rt0, rt0->rt6i_metric, oif, strict,
674 			     &do_rr);
675 
676 	if (do_rr) {
677 		struct rt6_info *next = rt0->dst.rt6_next;
678 
679 		/* no entries matched; do round-robin */
680 		if (!next || next->rt6i_metric != rt0->rt6i_metric)
681 			next = fn->leaf;
682 
683 		if (next != rt0)
684 			fn->rr_ptr = next;
685 	}
686 
687 	net = dev_net(rt0->dst.dev);
688 	return match ? match : net->ipv6.ip6_null_entry;
689 }
690 
691 #ifdef CONFIG_IPV6_ROUTE_INFO
692 int rt6_route_rcv(struct net_device *dev, u8 *opt, int len,
693 		  const struct in6_addr *gwaddr)
694 {
695 	struct net *net = dev_net(dev);
696 	struct route_info *rinfo = (struct route_info *) opt;
697 	struct in6_addr prefix_buf, *prefix;
698 	unsigned int pref;
699 	unsigned long lifetime;
700 	struct rt6_info *rt;
701 
702 	if (len < sizeof(struct route_info)) {
703 		return -EINVAL;
704 	}
705 
706 	/* Sanity check for prefix_len and length */
707 	if (rinfo->length > 3) {
708 		return -EINVAL;
709 	} else if (rinfo->prefix_len > 128) {
710 		return -EINVAL;
711 	} else if (rinfo->prefix_len > 64) {
712 		if (rinfo->length < 2) {
713 			return -EINVAL;
714 		}
715 	} else if (rinfo->prefix_len > 0) {
716 		if (rinfo->length < 1) {
717 			return -EINVAL;
718 		}
719 	}
720 
721 	pref = rinfo->route_pref;
722 	if (pref == ICMPV6_ROUTER_PREF_INVALID)
723 		return -EINVAL;
724 
725 	lifetime = addrconf_timeout_fixup(ntohl(rinfo->lifetime), HZ);
726 
727 	if (rinfo->length == 3)
728 		prefix = (struct in6_addr *)rinfo->prefix;
729 	else {
730 		/* this function is safe */
731 		ipv6_addr_prefix(&prefix_buf,
732 				 (struct in6_addr *)rinfo->prefix,
733 				 rinfo->prefix_len);
734 		prefix = &prefix_buf;
735 	}
736 
737 	if (rinfo->prefix_len == 0)
738 		rt = rt6_get_dflt_router(gwaddr, dev);
739 	else
740 		rt = rt6_get_route_info(net, prefix, rinfo->prefix_len,
741 					gwaddr, dev->ifindex);
742 
743 	if (rt && !lifetime) {
744 		ip6_del_rt(rt);
745 		rt = NULL;
746 	}
747 
748 	if (!rt && lifetime)
749 		rt = rt6_add_route_info(net, prefix, rinfo->prefix_len, gwaddr, dev->ifindex,
750 					pref);
751 	else if (rt)
752 		rt->rt6i_flags = RTF_ROUTEINFO |
753 				 (rt->rt6i_flags & ~RTF_PREF_MASK) | RTF_PREF(pref);
754 
755 	if (rt) {
756 		if (!addrconf_finite_timeout(lifetime))
757 			rt6_clean_expires(rt);
758 		else
759 			rt6_set_expires(rt, jiffies + HZ * lifetime);
760 
761 		ip6_rt_put(rt);
762 	}
763 	return 0;
764 }
765 #endif
766 
767 #define BACKTRACK(__net, saddr)			\
768 do { \
769 	if (rt == __net->ipv6.ip6_null_entry) {	\
770 		struct fib6_node *pn; \
771 		while (1) { \
772 			if (fn->fn_flags & RTN_TL_ROOT) \
773 				goto out; \
774 			pn = fn->parent; \
775 			if (FIB6_SUBTREE(pn) && FIB6_SUBTREE(pn) != fn) \
776 				fn = fib6_lookup(FIB6_SUBTREE(pn), NULL, saddr); \
777 			else \
778 				fn = pn; \
779 			if (fn->fn_flags & RTN_RTINFO) \
780 				goto restart; \
781 		} \
782 	} \
783 } while (0)
784 
785 static struct rt6_info *ip6_pol_route_lookup(struct net *net,
786 					     struct fib6_table *table,
787 					     struct flowi6 *fl6, int flags)
788 {
789 	struct fib6_node *fn;
790 	struct rt6_info *rt;
791 
792 	read_lock_bh(&table->tb6_lock);
793 	fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
794 restart:
795 	rt = fn->leaf;
796 	rt = rt6_device_match(net, rt, &fl6->saddr, fl6->flowi6_oif, flags);
797 	if (rt->rt6i_nsiblings && fl6->flowi6_oif == 0)
798 		rt = rt6_multipath_select(rt, fl6, fl6->flowi6_oif, flags);
799 	BACKTRACK(net, &fl6->saddr);
800 out:
801 	dst_use(&rt->dst, jiffies);
802 	read_unlock_bh(&table->tb6_lock);
803 	return rt;
804 
805 }
806 
807 struct dst_entry * ip6_route_lookup(struct net *net, struct flowi6 *fl6,
808 				    int flags)
809 {
810 	return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_lookup);
811 }
812 EXPORT_SYMBOL_GPL(ip6_route_lookup);
813 
814 struct rt6_info *rt6_lookup(struct net *net, const struct in6_addr *daddr,
815 			    const struct in6_addr *saddr, int oif, int strict)
816 {
817 	struct flowi6 fl6 = {
818 		.flowi6_oif = oif,
819 		.daddr = *daddr,
820 	};
821 	struct dst_entry *dst;
822 	int flags = strict ? RT6_LOOKUP_F_IFACE : 0;
823 
824 	if (saddr) {
825 		memcpy(&fl6.saddr, saddr, sizeof(*saddr));
826 		flags |= RT6_LOOKUP_F_HAS_SADDR;
827 	}
828 
829 	dst = fib6_rule_lookup(net, &fl6, flags, ip6_pol_route_lookup);
830 	if (dst->error == 0)
831 		return (struct rt6_info *) dst;
832 
833 	dst_release(dst);
834 
835 	return NULL;
836 }
837 
838 EXPORT_SYMBOL(rt6_lookup);
839 
840 /* ip6_ins_rt is called with FREE table->tb6_lock.
841    It takes new route entry, the addition fails by any reason the
842    route is freed. In any case, if caller does not hold it, it may
843    be destroyed.
844  */
845 
846 static int __ip6_ins_rt(struct rt6_info *rt, struct nl_info *info)
847 {
848 	int err;
849 	struct fib6_table *table;
850 
851 	table = rt->rt6i_table;
852 	write_lock_bh(&table->tb6_lock);
853 	err = fib6_add(&table->tb6_root, rt, info);
854 	write_unlock_bh(&table->tb6_lock);
855 
856 	return err;
857 }
858 
859 int ip6_ins_rt(struct rt6_info *rt)
860 {
861 	struct nl_info info = {
862 		.nl_net = dev_net(rt->dst.dev),
863 	};
864 	return __ip6_ins_rt(rt, &info);
865 }
866 
867 static struct rt6_info *rt6_alloc_cow(struct rt6_info *ort,
868 				      const struct in6_addr *daddr,
869 				      const struct in6_addr *saddr)
870 {
871 	struct rt6_info *rt;
872 
873 	/*
874 	 *	Clone the route.
875 	 */
876 
877 	rt = ip6_rt_copy(ort, daddr);
878 
879 	if (rt) {
880 		if (ort->rt6i_dst.plen != 128 &&
881 		    ipv6_addr_equal(&ort->rt6i_dst.addr, daddr))
882 			rt->rt6i_flags |= RTF_ANYCAST;
883 
884 		rt->rt6i_flags |= RTF_CACHE;
885 
886 #ifdef CONFIG_IPV6_SUBTREES
887 		if (rt->rt6i_src.plen && saddr) {
888 			rt->rt6i_src.addr = *saddr;
889 			rt->rt6i_src.plen = 128;
890 		}
891 #endif
892 	}
893 
894 	return rt;
895 }
896 
897 static struct rt6_info *rt6_alloc_clone(struct rt6_info *ort,
898 					const struct in6_addr *daddr)
899 {
900 	struct rt6_info *rt = ip6_rt_copy(ort, daddr);
901 
902 	if (rt)
903 		rt->rt6i_flags |= RTF_CACHE;
904 	return rt;
905 }
906 
907 static struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table, int oif,
908 				      struct flowi6 *fl6, int flags)
909 {
910 	struct fib6_node *fn;
911 	struct rt6_info *rt, *nrt;
912 	int strict = 0;
913 	int attempts = 3;
914 	int err;
915 	int reachable = net->ipv6.devconf_all->forwarding ? 0 : RT6_LOOKUP_F_REACHABLE;
916 
917 	strict |= flags & RT6_LOOKUP_F_IFACE;
918 
919 relookup:
920 	read_lock_bh(&table->tb6_lock);
921 
922 restart_2:
923 	fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
924 
925 restart:
926 	rt = rt6_select(fn, oif, strict | reachable);
927 	if (rt->rt6i_nsiblings)
928 		rt = rt6_multipath_select(rt, fl6, oif, strict | reachable);
929 	BACKTRACK(net, &fl6->saddr);
930 	if (rt == net->ipv6.ip6_null_entry ||
931 	    rt->rt6i_flags & RTF_CACHE)
932 		goto out;
933 
934 	dst_hold(&rt->dst);
935 	read_unlock_bh(&table->tb6_lock);
936 
937 	if (!(rt->rt6i_flags & (RTF_NONEXTHOP | RTF_GATEWAY)))
938 		nrt = rt6_alloc_cow(rt, &fl6->daddr, &fl6->saddr);
939 	else if (!(rt->dst.flags & DST_HOST))
940 		nrt = rt6_alloc_clone(rt, &fl6->daddr);
941 	else
942 		goto out2;
943 
944 	ip6_rt_put(rt);
945 	rt = nrt ? : net->ipv6.ip6_null_entry;
946 
947 	dst_hold(&rt->dst);
948 	if (nrt) {
949 		err = ip6_ins_rt(nrt);
950 		if (!err)
951 			goto out2;
952 	}
953 
954 	if (--attempts <= 0)
955 		goto out2;
956 
957 	/*
958 	 * Race condition! In the gap, when table->tb6_lock was
959 	 * released someone could insert this route.  Relookup.
960 	 */
961 	ip6_rt_put(rt);
962 	goto relookup;
963 
964 out:
965 	if (reachable) {
966 		reachable = 0;
967 		goto restart_2;
968 	}
969 	dst_hold(&rt->dst);
970 	read_unlock_bh(&table->tb6_lock);
971 out2:
972 	rt->dst.lastuse = jiffies;
973 	rt->dst.__use++;
974 
975 	return rt;
976 }
977 
978 static struct rt6_info *ip6_pol_route_input(struct net *net, struct fib6_table *table,
979 					    struct flowi6 *fl6, int flags)
980 {
981 	return ip6_pol_route(net, table, fl6->flowi6_iif, fl6, flags);
982 }
983 
984 static struct dst_entry *ip6_route_input_lookup(struct net *net,
985 						struct net_device *dev,
986 						struct flowi6 *fl6, int flags)
987 {
988 	if (rt6_need_strict(&fl6->daddr) && dev->type != ARPHRD_PIMREG)
989 		flags |= RT6_LOOKUP_F_IFACE;
990 
991 	return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_input);
992 }
993 
994 void ip6_route_input(struct sk_buff *skb)
995 {
996 	const struct ipv6hdr *iph = ipv6_hdr(skb);
997 	struct net *net = dev_net(skb->dev);
998 	int flags = RT6_LOOKUP_F_HAS_SADDR;
999 	struct flowi6 fl6 = {
1000 		.flowi6_iif = skb->dev->ifindex,
1001 		.daddr = iph->daddr,
1002 		.saddr = iph->saddr,
1003 		.flowlabel = ip6_flowinfo(iph),
1004 		.flowi6_mark = skb->mark,
1005 		.flowi6_proto = iph->nexthdr,
1006 	};
1007 
1008 	skb_dst_set(skb, ip6_route_input_lookup(net, skb->dev, &fl6, flags));
1009 }
1010 
1011 static struct rt6_info *ip6_pol_route_output(struct net *net, struct fib6_table *table,
1012 					     struct flowi6 *fl6, int flags)
1013 {
1014 	return ip6_pol_route(net, table, fl6->flowi6_oif, fl6, flags);
1015 }
1016 
1017 struct dst_entry * ip6_route_output(struct net *net, const struct sock *sk,
1018 				    struct flowi6 *fl6)
1019 {
1020 	int flags = 0;
1021 
1022 	fl6->flowi6_iif = LOOPBACK_IFINDEX;
1023 
1024 	if ((sk && sk->sk_bound_dev_if) || rt6_need_strict(&fl6->daddr))
1025 		flags |= RT6_LOOKUP_F_IFACE;
1026 
1027 	if (!ipv6_addr_any(&fl6->saddr))
1028 		flags |= RT6_LOOKUP_F_HAS_SADDR;
1029 	else if (sk)
1030 		flags |= rt6_srcprefs2flags(inet6_sk(sk)->srcprefs);
1031 
1032 	return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_output);
1033 }
1034 
1035 EXPORT_SYMBOL(ip6_route_output);
1036 
1037 struct dst_entry *ip6_blackhole_route(struct net *net, struct dst_entry *dst_orig)
1038 {
1039 	struct rt6_info *rt, *ort = (struct rt6_info *) dst_orig;
1040 	struct dst_entry *new = NULL;
1041 
1042 	rt = dst_alloc(&ip6_dst_blackhole_ops, ort->dst.dev, 1, DST_OBSOLETE_NONE, 0);
1043 	if (rt) {
1044 		new = &rt->dst;
1045 
1046 		memset(new + 1, 0, sizeof(*rt) - sizeof(*new));
1047 		rt6_init_peer(rt, net->ipv6.peers);
1048 
1049 		new->__use = 1;
1050 		new->input = dst_discard;
1051 		new->output = dst_discard;
1052 
1053 		if (dst_metrics_read_only(&ort->dst))
1054 			new->_metrics = ort->dst._metrics;
1055 		else
1056 			dst_copy_metrics(new, &ort->dst);
1057 		rt->rt6i_idev = ort->rt6i_idev;
1058 		if (rt->rt6i_idev)
1059 			in6_dev_hold(rt->rt6i_idev);
1060 
1061 		rt->rt6i_gateway = ort->rt6i_gateway;
1062 		rt->rt6i_flags = ort->rt6i_flags;
1063 		rt->rt6i_metric = 0;
1064 
1065 		memcpy(&rt->rt6i_dst, &ort->rt6i_dst, sizeof(struct rt6key));
1066 #ifdef CONFIG_IPV6_SUBTREES
1067 		memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
1068 #endif
1069 
1070 		dst_free(new);
1071 	}
1072 
1073 	dst_release(dst_orig);
1074 	return new ? new : ERR_PTR(-ENOMEM);
1075 }
1076 
1077 /*
1078  *	Destination cache support functions
1079  */
1080 
1081 static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie)
1082 {
1083 	struct rt6_info *rt;
1084 
1085 	rt = (struct rt6_info *) dst;
1086 
1087 	/* All IPV6 dsts are created with ->obsolete set to the value
1088 	 * DST_OBSOLETE_FORCE_CHK which forces validation calls down
1089 	 * into this function always.
1090 	 */
1091 	if (rt->rt6i_genid != rt_genid_ipv6(dev_net(rt->dst.dev)))
1092 		return NULL;
1093 
1094 	if (!rt->rt6i_node || (rt->rt6i_node->fn_sernum != cookie))
1095 		return NULL;
1096 
1097 	if (rt6_check_expired(rt))
1098 		return NULL;
1099 
1100 	return dst;
1101 }
1102 
1103 static struct dst_entry *ip6_negative_advice(struct dst_entry *dst)
1104 {
1105 	struct rt6_info *rt = (struct rt6_info *) dst;
1106 
1107 	if (rt) {
1108 		if (rt->rt6i_flags & RTF_CACHE) {
1109 			if (rt6_check_expired(rt)) {
1110 				ip6_del_rt(rt);
1111 				dst = NULL;
1112 			}
1113 		} else {
1114 			dst_release(dst);
1115 			dst = NULL;
1116 		}
1117 	}
1118 	return dst;
1119 }
1120 
1121 static void ip6_link_failure(struct sk_buff *skb)
1122 {
1123 	struct rt6_info *rt;
1124 
1125 	icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_ADDR_UNREACH, 0);
1126 
1127 	rt = (struct rt6_info *) skb_dst(skb);
1128 	if (rt) {
1129 		if (rt->rt6i_flags & RTF_CACHE) {
1130 			dst_hold(&rt->dst);
1131 			if (ip6_del_rt(rt))
1132 				dst_free(&rt->dst);
1133 		} else if (rt->rt6i_node && (rt->rt6i_flags & RTF_DEFAULT)) {
1134 			rt->rt6i_node->fn_sernum = -1;
1135 		}
1136 	}
1137 }
1138 
1139 static void ip6_rt_update_pmtu(struct dst_entry *dst, struct sock *sk,
1140 			       struct sk_buff *skb, u32 mtu)
1141 {
1142 	struct rt6_info *rt6 = (struct rt6_info*)dst;
1143 
1144 	dst_confirm(dst);
1145 	if (mtu < dst_mtu(dst) && rt6->rt6i_dst.plen == 128) {
1146 		struct net *net = dev_net(dst->dev);
1147 
1148 		rt6->rt6i_flags |= RTF_MODIFIED;
1149 		if (mtu < IPV6_MIN_MTU) {
1150 			u32 features = dst_metric(dst, RTAX_FEATURES);
1151 			mtu = IPV6_MIN_MTU;
1152 			features |= RTAX_FEATURE_ALLFRAG;
1153 			dst_metric_set(dst, RTAX_FEATURES, features);
1154 		}
1155 		dst_metric_set(dst, RTAX_MTU, mtu);
1156 		rt6_update_expires(rt6, net->ipv6.sysctl.ip6_rt_mtu_expires);
1157 	}
1158 }
1159 
1160 void ip6_update_pmtu(struct sk_buff *skb, struct net *net, __be32 mtu,
1161 		     int oif, u32 mark)
1162 {
1163 	const struct ipv6hdr *iph = (struct ipv6hdr *) skb->data;
1164 	struct dst_entry *dst;
1165 	struct flowi6 fl6;
1166 
1167 	memset(&fl6, 0, sizeof(fl6));
1168 	fl6.flowi6_oif = oif;
1169 	fl6.flowi6_mark = mark;
1170 	fl6.daddr = iph->daddr;
1171 	fl6.saddr = iph->saddr;
1172 	fl6.flowlabel = ip6_flowinfo(iph);
1173 
1174 	dst = ip6_route_output(net, NULL, &fl6);
1175 	if (!dst->error)
1176 		ip6_rt_update_pmtu(dst, NULL, skb, ntohl(mtu));
1177 	dst_release(dst);
1178 }
1179 EXPORT_SYMBOL_GPL(ip6_update_pmtu);
1180 
1181 void ip6_sk_update_pmtu(struct sk_buff *skb, struct sock *sk, __be32 mtu)
1182 {
1183 	ip6_update_pmtu(skb, sock_net(sk), mtu,
1184 			sk->sk_bound_dev_if, sk->sk_mark);
1185 }
1186 EXPORT_SYMBOL_GPL(ip6_sk_update_pmtu);
1187 
1188 /* Handle redirects */
1189 struct ip6rd_flowi {
1190 	struct flowi6 fl6;
1191 	struct in6_addr gateway;
1192 };
1193 
1194 static struct rt6_info *__ip6_route_redirect(struct net *net,
1195 					     struct fib6_table *table,
1196 					     struct flowi6 *fl6,
1197 					     int flags)
1198 {
1199 	struct ip6rd_flowi *rdfl = (struct ip6rd_flowi *)fl6;
1200 	struct rt6_info *rt;
1201 	struct fib6_node *fn;
1202 
1203 	/* Get the "current" route for this destination and
1204 	 * check if the redirect has come from approriate router.
1205 	 *
1206 	 * RFC 4861 specifies that redirects should only be
1207 	 * accepted if they come from the nexthop to the target.
1208 	 * Due to the way the routes are chosen, this notion
1209 	 * is a bit fuzzy and one might need to check all possible
1210 	 * routes.
1211 	 */
1212 
1213 	read_lock_bh(&table->tb6_lock);
1214 	fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
1215 restart:
1216 	for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
1217 		if (rt6_check_expired(rt))
1218 			continue;
1219 		if (rt->dst.error)
1220 			break;
1221 		if (!(rt->rt6i_flags & RTF_GATEWAY))
1222 			continue;
1223 		if (fl6->flowi6_oif != rt->dst.dev->ifindex)
1224 			continue;
1225 		if (!ipv6_addr_equal(&rdfl->gateway, &rt->rt6i_gateway))
1226 			continue;
1227 		break;
1228 	}
1229 
1230 	if (!rt)
1231 		rt = net->ipv6.ip6_null_entry;
1232 	else if (rt->dst.error) {
1233 		rt = net->ipv6.ip6_null_entry;
1234 		goto out;
1235 	}
1236 	BACKTRACK(net, &fl6->saddr);
1237 out:
1238 	dst_hold(&rt->dst);
1239 
1240 	read_unlock_bh(&table->tb6_lock);
1241 
1242 	return rt;
1243 };
1244 
1245 static struct dst_entry *ip6_route_redirect(struct net *net,
1246 					const struct flowi6 *fl6,
1247 					const struct in6_addr *gateway)
1248 {
1249 	int flags = RT6_LOOKUP_F_HAS_SADDR;
1250 	struct ip6rd_flowi rdfl;
1251 
1252 	rdfl.fl6 = *fl6;
1253 	rdfl.gateway = *gateway;
1254 
1255 	return fib6_rule_lookup(net, &rdfl.fl6,
1256 				flags, __ip6_route_redirect);
1257 }
1258 
1259 void ip6_redirect(struct sk_buff *skb, struct net *net, int oif, u32 mark)
1260 {
1261 	const struct ipv6hdr *iph = (struct ipv6hdr *) skb->data;
1262 	struct dst_entry *dst;
1263 	struct flowi6 fl6;
1264 
1265 	memset(&fl6, 0, sizeof(fl6));
1266 	fl6.flowi6_oif = oif;
1267 	fl6.flowi6_mark = mark;
1268 	fl6.daddr = iph->daddr;
1269 	fl6.saddr = iph->saddr;
1270 	fl6.flowlabel = ip6_flowinfo(iph);
1271 
1272 	dst = ip6_route_redirect(net, &fl6, &ipv6_hdr(skb)->saddr);
1273 	rt6_do_redirect(dst, NULL, skb);
1274 	dst_release(dst);
1275 }
1276 EXPORT_SYMBOL_GPL(ip6_redirect);
1277 
1278 void ip6_redirect_no_header(struct sk_buff *skb, struct net *net, int oif,
1279 			    u32 mark)
1280 {
1281 	const struct ipv6hdr *iph = ipv6_hdr(skb);
1282 	const struct rd_msg *msg = (struct rd_msg *)icmp6_hdr(skb);
1283 	struct dst_entry *dst;
1284 	struct flowi6 fl6;
1285 
1286 	memset(&fl6, 0, sizeof(fl6));
1287 	fl6.flowi6_oif = oif;
1288 	fl6.flowi6_mark = mark;
1289 	fl6.daddr = msg->dest;
1290 	fl6.saddr = iph->daddr;
1291 
1292 	dst = ip6_route_redirect(net, &fl6, &iph->saddr);
1293 	rt6_do_redirect(dst, NULL, skb);
1294 	dst_release(dst);
1295 }
1296 
1297 void ip6_sk_redirect(struct sk_buff *skb, struct sock *sk)
1298 {
1299 	ip6_redirect(skb, sock_net(sk), sk->sk_bound_dev_if, sk->sk_mark);
1300 }
1301 EXPORT_SYMBOL_GPL(ip6_sk_redirect);
1302 
1303 static unsigned int ip6_default_advmss(const struct dst_entry *dst)
1304 {
1305 	struct net_device *dev = dst->dev;
1306 	unsigned int mtu = dst_mtu(dst);
1307 	struct net *net = dev_net(dev);
1308 
1309 	mtu -= sizeof(struct ipv6hdr) + sizeof(struct tcphdr);
1310 
1311 	if (mtu < net->ipv6.sysctl.ip6_rt_min_advmss)
1312 		mtu = net->ipv6.sysctl.ip6_rt_min_advmss;
1313 
1314 	/*
1315 	 * Maximal non-jumbo IPv6 payload is IPV6_MAXPLEN and
1316 	 * corresponding MSS is IPV6_MAXPLEN - tcp_header_size.
1317 	 * IPV6_MAXPLEN is also valid and means: "any MSS,
1318 	 * rely only on pmtu discovery"
1319 	 */
1320 	if (mtu > IPV6_MAXPLEN - sizeof(struct tcphdr))
1321 		mtu = IPV6_MAXPLEN;
1322 	return mtu;
1323 }
1324 
1325 static unsigned int ip6_mtu(const struct dst_entry *dst)
1326 {
1327 	struct inet6_dev *idev;
1328 	unsigned int mtu = dst_metric_raw(dst, RTAX_MTU);
1329 
1330 	if (mtu)
1331 		return mtu;
1332 
1333 	mtu = IPV6_MIN_MTU;
1334 
1335 	rcu_read_lock();
1336 	idev = __in6_dev_get(dst->dev);
1337 	if (idev)
1338 		mtu = idev->cnf.mtu6;
1339 	rcu_read_unlock();
1340 
1341 	return mtu;
1342 }
1343 
1344 static struct dst_entry *icmp6_dst_gc_list;
1345 static DEFINE_SPINLOCK(icmp6_dst_lock);
1346 
1347 struct dst_entry *icmp6_dst_alloc(struct net_device *dev,
1348 				  struct flowi6 *fl6)
1349 {
1350 	struct dst_entry *dst;
1351 	struct rt6_info *rt;
1352 	struct inet6_dev *idev = in6_dev_get(dev);
1353 	struct net *net = dev_net(dev);
1354 
1355 	if (unlikely(!idev))
1356 		return ERR_PTR(-ENODEV);
1357 
1358 	rt = ip6_dst_alloc(net, dev, 0, NULL);
1359 	if (unlikely(!rt)) {
1360 		in6_dev_put(idev);
1361 		dst = ERR_PTR(-ENOMEM);
1362 		goto out;
1363 	}
1364 
1365 	rt->dst.flags |= DST_HOST;
1366 	rt->dst.output  = ip6_output;
1367 	atomic_set(&rt->dst.__refcnt, 1);
1368 	rt->rt6i_gateway  = fl6->daddr;
1369 	rt->rt6i_dst.addr = fl6->daddr;
1370 	rt->rt6i_dst.plen = 128;
1371 	rt->rt6i_idev     = idev;
1372 	dst_metric_set(&rt->dst, RTAX_HOPLIMIT, 0);
1373 
1374 	spin_lock_bh(&icmp6_dst_lock);
1375 	rt->dst.next = icmp6_dst_gc_list;
1376 	icmp6_dst_gc_list = &rt->dst;
1377 	spin_unlock_bh(&icmp6_dst_lock);
1378 
1379 	fib6_force_start_gc(net);
1380 
1381 	dst = xfrm_lookup(net, &rt->dst, flowi6_to_flowi(fl6), NULL, 0);
1382 
1383 out:
1384 	return dst;
1385 }
1386 
1387 int icmp6_dst_gc(void)
1388 {
1389 	struct dst_entry *dst, **pprev;
1390 	int more = 0;
1391 
1392 	spin_lock_bh(&icmp6_dst_lock);
1393 	pprev = &icmp6_dst_gc_list;
1394 
1395 	while ((dst = *pprev) != NULL) {
1396 		if (!atomic_read(&dst->__refcnt)) {
1397 			*pprev = dst->next;
1398 			dst_free(dst);
1399 		} else {
1400 			pprev = &dst->next;
1401 			++more;
1402 		}
1403 	}
1404 
1405 	spin_unlock_bh(&icmp6_dst_lock);
1406 
1407 	return more;
1408 }
1409 
1410 static void icmp6_clean_all(int (*func)(struct rt6_info *rt, void *arg),
1411 			    void *arg)
1412 {
1413 	struct dst_entry *dst, **pprev;
1414 
1415 	spin_lock_bh(&icmp6_dst_lock);
1416 	pprev = &icmp6_dst_gc_list;
1417 	while ((dst = *pprev) != NULL) {
1418 		struct rt6_info *rt = (struct rt6_info *) dst;
1419 		if (func(rt, arg)) {
1420 			*pprev = dst->next;
1421 			dst_free(dst);
1422 		} else {
1423 			pprev = &dst->next;
1424 		}
1425 	}
1426 	spin_unlock_bh(&icmp6_dst_lock);
1427 }
1428 
1429 static int ip6_dst_gc(struct dst_ops *ops)
1430 {
1431 	struct net *net = container_of(ops, struct net, ipv6.ip6_dst_ops);
1432 	int rt_min_interval = net->ipv6.sysctl.ip6_rt_gc_min_interval;
1433 	int rt_max_size = net->ipv6.sysctl.ip6_rt_max_size;
1434 	int rt_elasticity = net->ipv6.sysctl.ip6_rt_gc_elasticity;
1435 	int rt_gc_timeout = net->ipv6.sysctl.ip6_rt_gc_timeout;
1436 	unsigned long rt_last_gc = net->ipv6.ip6_rt_last_gc;
1437 	int entries;
1438 
1439 	entries = dst_entries_get_fast(ops);
1440 	if (time_after(rt_last_gc + rt_min_interval, jiffies) &&
1441 	    entries <= rt_max_size)
1442 		goto out;
1443 
1444 	net->ipv6.ip6_rt_gc_expire++;
1445 	fib6_run_gc(net->ipv6.ip6_rt_gc_expire, net, entries > rt_max_size);
1446 	entries = dst_entries_get_slow(ops);
1447 	if (entries < ops->gc_thresh)
1448 		net->ipv6.ip6_rt_gc_expire = rt_gc_timeout>>1;
1449 out:
1450 	net->ipv6.ip6_rt_gc_expire -= net->ipv6.ip6_rt_gc_expire>>rt_elasticity;
1451 	return entries > rt_max_size;
1452 }
1453 
1454 /*
1455  *
1456  */
1457 
1458 int ip6_route_add(struct fib6_config *cfg)
1459 {
1460 	int err;
1461 	struct net *net = cfg->fc_nlinfo.nl_net;
1462 	struct rt6_info *rt = NULL;
1463 	struct net_device *dev = NULL;
1464 	struct inet6_dev *idev = NULL;
1465 	struct fib6_table *table;
1466 	int addr_type;
1467 
1468 	if (cfg->fc_dst_len > 128 || cfg->fc_src_len > 128)
1469 		return -EINVAL;
1470 #ifndef CONFIG_IPV6_SUBTREES
1471 	if (cfg->fc_src_len)
1472 		return -EINVAL;
1473 #endif
1474 	if (cfg->fc_ifindex) {
1475 		err = -ENODEV;
1476 		dev = dev_get_by_index(net, cfg->fc_ifindex);
1477 		if (!dev)
1478 			goto out;
1479 		idev = in6_dev_get(dev);
1480 		if (!idev)
1481 			goto out;
1482 	}
1483 
1484 	if (cfg->fc_metric == 0)
1485 		cfg->fc_metric = IP6_RT_PRIO_USER;
1486 
1487 	err = -ENOBUFS;
1488 	if (cfg->fc_nlinfo.nlh &&
1489 	    !(cfg->fc_nlinfo.nlh->nlmsg_flags & NLM_F_CREATE)) {
1490 		table = fib6_get_table(net, cfg->fc_table);
1491 		if (!table) {
1492 			pr_warn("NLM_F_CREATE should be specified when creating new route\n");
1493 			table = fib6_new_table(net, cfg->fc_table);
1494 		}
1495 	} else {
1496 		table = fib6_new_table(net, cfg->fc_table);
1497 	}
1498 
1499 	if (!table)
1500 		goto out;
1501 
1502 	rt = ip6_dst_alloc(net, NULL, DST_NOCOUNT, table);
1503 
1504 	if (!rt) {
1505 		err = -ENOMEM;
1506 		goto out;
1507 	}
1508 
1509 	if (cfg->fc_flags & RTF_EXPIRES)
1510 		rt6_set_expires(rt, jiffies +
1511 				clock_t_to_jiffies(cfg->fc_expires));
1512 	else
1513 		rt6_clean_expires(rt);
1514 
1515 	if (cfg->fc_protocol == RTPROT_UNSPEC)
1516 		cfg->fc_protocol = RTPROT_BOOT;
1517 	rt->rt6i_protocol = cfg->fc_protocol;
1518 
1519 	addr_type = ipv6_addr_type(&cfg->fc_dst);
1520 
1521 	if (addr_type & IPV6_ADDR_MULTICAST)
1522 		rt->dst.input = ip6_mc_input;
1523 	else if (cfg->fc_flags & RTF_LOCAL)
1524 		rt->dst.input = ip6_input;
1525 	else
1526 		rt->dst.input = ip6_forward;
1527 
1528 	rt->dst.output = ip6_output;
1529 
1530 	ipv6_addr_prefix(&rt->rt6i_dst.addr, &cfg->fc_dst, cfg->fc_dst_len);
1531 	rt->rt6i_dst.plen = cfg->fc_dst_len;
1532 	if (rt->rt6i_dst.plen == 128)
1533 	       rt->dst.flags |= DST_HOST;
1534 
1535 	if (!(rt->dst.flags & DST_HOST) && cfg->fc_mx) {
1536 		u32 *metrics = kzalloc(sizeof(u32) * RTAX_MAX, GFP_KERNEL);
1537 		if (!metrics) {
1538 			err = -ENOMEM;
1539 			goto out;
1540 		}
1541 		dst_init_metrics(&rt->dst, metrics, 0);
1542 	}
1543 #ifdef CONFIG_IPV6_SUBTREES
1544 	ipv6_addr_prefix(&rt->rt6i_src.addr, &cfg->fc_src, cfg->fc_src_len);
1545 	rt->rt6i_src.plen = cfg->fc_src_len;
1546 #endif
1547 
1548 	rt->rt6i_metric = cfg->fc_metric;
1549 
1550 	/* We cannot add true routes via loopback here,
1551 	   they would result in kernel looping; promote them to reject routes
1552 	 */
1553 	if ((cfg->fc_flags & RTF_REJECT) ||
1554 	    (dev && (dev->flags & IFF_LOOPBACK) &&
1555 	     !(addr_type & IPV6_ADDR_LOOPBACK) &&
1556 	     !(cfg->fc_flags & RTF_LOCAL))) {
1557 		/* hold loopback dev/idev if we haven't done so. */
1558 		if (dev != net->loopback_dev) {
1559 			if (dev) {
1560 				dev_put(dev);
1561 				in6_dev_put(idev);
1562 			}
1563 			dev = net->loopback_dev;
1564 			dev_hold(dev);
1565 			idev = in6_dev_get(dev);
1566 			if (!idev) {
1567 				err = -ENODEV;
1568 				goto out;
1569 			}
1570 		}
1571 		rt->rt6i_flags = RTF_REJECT|RTF_NONEXTHOP;
1572 		switch (cfg->fc_type) {
1573 		case RTN_BLACKHOLE:
1574 			rt->dst.error = -EINVAL;
1575 			rt->dst.output = dst_discard;
1576 			rt->dst.input = dst_discard;
1577 			break;
1578 		case RTN_PROHIBIT:
1579 			rt->dst.error = -EACCES;
1580 			rt->dst.output = ip6_pkt_prohibit_out;
1581 			rt->dst.input = ip6_pkt_prohibit;
1582 			break;
1583 		case RTN_THROW:
1584 		default:
1585 			rt->dst.error = (cfg->fc_type == RTN_THROW) ? -EAGAIN
1586 					: -ENETUNREACH;
1587 			rt->dst.output = ip6_pkt_discard_out;
1588 			rt->dst.input = ip6_pkt_discard;
1589 			break;
1590 		}
1591 		goto install_route;
1592 	}
1593 
1594 	if (cfg->fc_flags & RTF_GATEWAY) {
1595 		const struct in6_addr *gw_addr;
1596 		int gwa_type;
1597 
1598 		gw_addr = &cfg->fc_gateway;
1599 		rt->rt6i_gateway = *gw_addr;
1600 		gwa_type = ipv6_addr_type(gw_addr);
1601 
1602 		if (gwa_type != (IPV6_ADDR_LINKLOCAL|IPV6_ADDR_UNICAST)) {
1603 			struct rt6_info *grt;
1604 
1605 			/* IPv6 strictly inhibits using not link-local
1606 			   addresses as nexthop address.
1607 			   Otherwise, router will not able to send redirects.
1608 			   It is very good, but in some (rare!) circumstances
1609 			   (SIT, PtP, NBMA NOARP links) it is handy to allow
1610 			   some exceptions. --ANK
1611 			 */
1612 			err = -EINVAL;
1613 			if (!(gwa_type & IPV6_ADDR_UNICAST))
1614 				goto out;
1615 
1616 			grt = rt6_lookup(net, gw_addr, NULL, cfg->fc_ifindex, 1);
1617 
1618 			err = -EHOSTUNREACH;
1619 			if (!grt)
1620 				goto out;
1621 			if (dev) {
1622 				if (dev != grt->dst.dev) {
1623 					ip6_rt_put(grt);
1624 					goto out;
1625 				}
1626 			} else {
1627 				dev = grt->dst.dev;
1628 				idev = grt->rt6i_idev;
1629 				dev_hold(dev);
1630 				in6_dev_hold(grt->rt6i_idev);
1631 			}
1632 			if (!(grt->rt6i_flags & RTF_GATEWAY))
1633 				err = 0;
1634 			ip6_rt_put(grt);
1635 
1636 			if (err)
1637 				goto out;
1638 		}
1639 		err = -EINVAL;
1640 		if (!dev || (dev->flags & IFF_LOOPBACK))
1641 			goto out;
1642 	}
1643 
1644 	err = -ENODEV;
1645 	if (!dev)
1646 		goto out;
1647 
1648 	if (!ipv6_addr_any(&cfg->fc_prefsrc)) {
1649 		if (!ipv6_chk_addr(net, &cfg->fc_prefsrc, dev, 0)) {
1650 			err = -EINVAL;
1651 			goto out;
1652 		}
1653 		rt->rt6i_prefsrc.addr = cfg->fc_prefsrc;
1654 		rt->rt6i_prefsrc.plen = 128;
1655 	} else
1656 		rt->rt6i_prefsrc.plen = 0;
1657 
1658 	rt->rt6i_flags = cfg->fc_flags;
1659 
1660 install_route:
1661 	if (cfg->fc_mx) {
1662 		struct nlattr *nla;
1663 		int remaining;
1664 
1665 		nla_for_each_attr(nla, cfg->fc_mx, cfg->fc_mx_len, remaining) {
1666 			int type = nla_type(nla);
1667 
1668 			if (type) {
1669 				if (type > RTAX_MAX) {
1670 					err = -EINVAL;
1671 					goto out;
1672 				}
1673 
1674 				dst_metric_set(&rt->dst, type, nla_get_u32(nla));
1675 			}
1676 		}
1677 	}
1678 
1679 	rt->dst.dev = dev;
1680 	rt->rt6i_idev = idev;
1681 	rt->rt6i_table = table;
1682 
1683 	cfg->fc_nlinfo.nl_net = dev_net(dev);
1684 
1685 	return __ip6_ins_rt(rt, &cfg->fc_nlinfo);
1686 
1687 out:
1688 	if (dev)
1689 		dev_put(dev);
1690 	if (idev)
1691 		in6_dev_put(idev);
1692 	if (rt)
1693 		dst_free(&rt->dst);
1694 	return err;
1695 }
1696 
1697 static int __ip6_del_rt(struct rt6_info *rt, struct nl_info *info)
1698 {
1699 	int err;
1700 	struct fib6_table *table;
1701 	struct net *net = dev_net(rt->dst.dev);
1702 
1703 	if (rt == net->ipv6.ip6_null_entry) {
1704 		err = -ENOENT;
1705 		goto out;
1706 	}
1707 
1708 	table = rt->rt6i_table;
1709 	write_lock_bh(&table->tb6_lock);
1710 	err = fib6_del(rt, info);
1711 	write_unlock_bh(&table->tb6_lock);
1712 
1713 out:
1714 	ip6_rt_put(rt);
1715 	return err;
1716 }
1717 
1718 int ip6_del_rt(struct rt6_info *rt)
1719 {
1720 	struct nl_info info = {
1721 		.nl_net = dev_net(rt->dst.dev),
1722 	};
1723 	return __ip6_del_rt(rt, &info);
1724 }
1725 
1726 static int ip6_route_del(struct fib6_config *cfg)
1727 {
1728 	struct fib6_table *table;
1729 	struct fib6_node *fn;
1730 	struct rt6_info *rt;
1731 	int err = -ESRCH;
1732 
1733 	table = fib6_get_table(cfg->fc_nlinfo.nl_net, cfg->fc_table);
1734 	if (!table)
1735 		return err;
1736 
1737 	read_lock_bh(&table->tb6_lock);
1738 
1739 	fn = fib6_locate(&table->tb6_root,
1740 			 &cfg->fc_dst, cfg->fc_dst_len,
1741 			 &cfg->fc_src, cfg->fc_src_len);
1742 
1743 	if (fn) {
1744 		for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
1745 			if (cfg->fc_ifindex &&
1746 			    (!rt->dst.dev ||
1747 			     rt->dst.dev->ifindex != cfg->fc_ifindex))
1748 				continue;
1749 			if (cfg->fc_flags & RTF_GATEWAY &&
1750 			    !ipv6_addr_equal(&cfg->fc_gateway, &rt->rt6i_gateway))
1751 				continue;
1752 			if (cfg->fc_metric && cfg->fc_metric != rt->rt6i_metric)
1753 				continue;
1754 			dst_hold(&rt->dst);
1755 			read_unlock_bh(&table->tb6_lock);
1756 
1757 			return __ip6_del_rt(rt, &cfg->fc_nlinfo);
1758 		}
1759 	}
1760 	read_unlock_bh(&table->tb6_lock);
1761 
1762 	return err;
1763 }
1764 
1765 static void rt6_do_redirect(struct dst_entry *dst, struct sock *sk, struct sk_buff *skb)
1766 {
1767 	struct net *net = dev_net(skb->dev);
1768 	struct netevent_redirect netevent;
1769 	struct rt6_info *rt, *nrt = NULL;
1770 	struct ndisc_options ndopts;
1771 	struct inet6_dev *in6_dev;
1772 	struct neighbour *neigh;
1773 	struct rd_msg *msg;
1774 	int optlen, on_link;
1775 	u8 *lladdr;
1776 
1777 	optlen = skb_tail_pointer(skb) - skb_transport_header(skb);
1778 	optlen -= sizeof(*msg);
1779 
1780 	if (optlen < 0) {
1781 		net_dbg_ratelimited("rt6_do_redirect: packet too short\n");
1782 		return;
1783 	}
1784 
1785 	msg = (struct rd_msg *)icmp6_hdr(skb);
1786 
1787 	if (ipv6_addr_is_multicast(&msg->dest)) {
1788 		net_dbg_ratelimited("rt6_do_redirect: destination address is multicast\n");
1789 		return;
1790 	}
1791 
1792 	on_link = 0;
1793 	if (ipv6_addr_equal(&msg->dest, &msg->target)) {
1794 		on_link = 1;
1795 	} else if (ipv6_addr_type(&msg->target) !=
1796 		   (IPV6_ADDR_UNICAST|IPV6_ADDR_LINKLOCAL)) {
1797 		net_dbg_ratelimited("rt6_do_redirect: target address is not link-local unicast\n");
1798 		return;
1799 	}
1800 
1801 	in6_dev = __in6_dev_get(skb->dev);
1802 	if (!in6_dev)
1803 		return;
1804 	if (in6_dev->cnf.forwarding || !in6_dev->cnf.accept_redirects)
1805 		return;
1806 
1807 	/* RFC2461 8.1:
1808 	 *	The IP source address of the Redirect MUST be the same as the current
1809 	 *	first-hop router for the specified ICMP Destination Address.
1810 	 */
1811 
1812 	if (!ndisc_parse_options(msg->opt, optlen, &ndopts)) {
1813 		net_dbg_ratelimited("rt6_redirect: invalid ND options\n");
1814 		return;
1815 	}
1816 
1817 	lladdr = NULL;
1818 	if (ndopts.nd_opts_tgt_lladdr) {
1819 		lladdr = ndisc_opt_addr_data(ndopts.nd_opts_tgt_lladdr,
1820 					     skb->dev);
1821 		if (!lladdr) {
1822 			net_dbg_ratelimited("rt6_redirect: invalid link-layer address length\n");
1823 			return;
1824 		}
1825 	}
1826 
1827 	rt = (struct rt6_info *) dst;
1828 	if (rt == net->ipv6.ip6_null_entry) {
1829 		net_dbg_ratelimited("rt6_redirect: source isn't a valid nexthop for redirect target\n");
1830 		return;
1831 	}
1832 
1833 	/* Redirect received -> path was valid.
1834 	 * Look, redirects are sent only in response to data packets,
1835 	 * so that this nexthop apparently is reachable. --ANK
1836 	 */
1837 	dst_confirm(&rt->dst);
1838 
1839 	neigh = __neigh_lookup(&nd_tbl, &msg->target, skb->dev, 1);
1840 	if (!neigh)
1841 		return;
1842 
1843 	/*
1844 	 *	We have finally decided to accept it.
1845 	 */
1846 
1847 	neigh_update(neigh, lladdr, NUD_STALE,
1848 		     NEIGH_UPDATE_F_WEAK_OVERRIDE|
1849 		     NEIGH_UPDATE_F_OVERRIDE|
1850 		     (on_link ? 0 : (NEIGH_UPDATE_F_OVERRIDE_ISROUTER|
1851 				     NEIGH_UPDATE_F_ISROUTER))
1852 		     );
1853 
1854 	nrt = ip6_rt_copy(rt, &msg->dest);
1855 	if (!nrt)
1856 		goto out;
1857 
1858 	nrt->rt6i_flags = RTF_GATEWAY|RTF_UP|RTF_DYNAMIC|RTF_CACHE;
1859 	if (on_link)
1860 		nrt->rt6i_flags &= ~RTF_GATEWAY;
1861 
1862 	nrt->rt6i_gateway = *(struct in6_addr *)neigh->primary_key;
1863 
1864 	if (ip6_ins_rt(nrt))
1865 		goto out;
1866 
1867 	netevent.old = &rt->dst;
1868 	netevent.new = &nrt->dst;
1869 	netevent.daddr = &msg->dest;
1870 	netevent.neigh = neigh;
1871 	call_netevent_notifiers(NETEVENT_REDIRECT, &netevent);
1872 
1873 	if (rt->rt6i_flags & RTF_CACHE) {
1874 		rt = (struct rt6_info *) dst_clone(&rt->dst);
1875 		ip6_del_rt(rt);
1876 	}
1877 
1878 out:
1879 	neigh_release(neigh);
1880 }
1881 
1882 /*
1883  *	Misc support functions
1884  */
1885 
1886 static struct rt6_info *ip6_rt_copy(struct rt6_info *ort,
1887 				    const struct in6_addr *dest)
1888 {
1889 	struct net *net = dev_net(ort->dst.dev);
1890 	struct rt6_info *rt = ip6_dst_alloc(net, ort->dst.dev, 0,
1891 					    ort->rt6i_table);
1892 
1893 	if (rt) {
1894 		rt->dst.input = ort->dst.input;
1895 		rt->dst.output = ort->dst.output;
1896 		rt->dst.flags |= DST_HOST;
1897 
1898 		rt->rt6i_dst.addr = *dest;
1899 		rt->rt6i_dst.plen = 128;
1900 		dst_copy_metrics(&rt->dst, &ort->dst);
1901 		rt->dst.error = ort->dst.error;
1902 		rt->rt6i_idev = ort->rt6i_idev;
1903 		if (rt->rt6i_idev)
1904 			in6_dev_hold(rt->rt6i_idev);
1905 		rt->dst.lastuse = jiffies;
1906 
1907 		if (ort->rt6i_flags & RTF_GATEWAY)
1908 			rt->rt6i_gateway = ort->rt6i_gateway;
1909 		else
1910 			rt->rt6i_gateway = *dest;
1911 		rt->rt6i_flags = ort->rt6i_flags;
1912 		if ((ort->rt6i_flags & (RTF_DEFAULT | RTF_ADDRCONF)) ==
1913 		    (RTF_DEFAULT | RTF_ADDRCONF))
1914 			rt6_set_from(rt, ort);
1915 		rt->rt6i_metric = 0;
1916 
1917 #ifdef CONFIG_IPV6_SUBTREES
1918 		memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
1919 #endif
1920 		memcpy(&rt->rt6i_prefsrc, &ort->rt6i_prefsrc, sizeof(struct rt6key));
1921 		rt->rt6i_table = ort->rt6i_table;
1922 	}
1923 	return rt;
1924 }
1925 
1926 #ifdef CONFIG_IPV6_ROUTE_INFO
1927 static struct rt6_info *rt6_get_route_info(struct net *net,
1928 					   const struct in6_addr *prefix, int prefixlen,
1929 					   const struct in6_addr *gwaddr, int ifindex)
1930 {
1931 	struct fib6_node *fn;
1932 	struct rt6_info *rt = NULL;
1933 	struct fib6_table *table;
1934 
1935 	table = fib6_get_table(net, RT6_TABLE_INFO);
1936 	if (!table)
1937 		return NULL;
1938 
1939 	read_lock_bh(&table->tb6_lock);
1940 	fn = fib6_locate(&table->tb6_root, prefix ,prefixlen, NULL, 0);
1941 	if (!fn)
1942 		goto out;
1943 
1944 	for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
1945 		if (rt->dst.dev->ifindex != ifindex)
1946 			continue;
1947 		if ((rt->rt6i_flags & (RTF_ROUTEINFO|RTF_GATEWAY)) != (RTF_ROUTEINFO|RTF_GATEWAY))
1948 			continue;
1949 		if (!ipv6_addr_equal(&rt->rt6i_gateway, gwaddr))
1950 			continue;
1951 		dst_hold(&rt->dst);
1952 		break;
1953 	}
1954 out:
1955 	read_unlock_bh(&table->tb6_lock);
1956 	return rt;
1957 }
1958 
1959 static struct rt6_info *rt6_add_route_info(struct net *net,
1960 					   const struct in6_addr *prefix, int prefixlen,
1961 					   const struct in6_addr *gwaddr, int ifindex,
1962 					   unsigned int pref)
1963 {
1964 	struct fib6_config cfg = {
1965 		.fc_table	= RT6_TABLE_INFO,
1966 		.fc_metric	= IP6_RT_PRIO_USER,
1967 		.fc_ifindex	= ifindex,
1968 		.fc_dst_len	= prefixlen,
1969 		.fc_flags	= RTF_GATEWAY | RTF_ADDRCONF | RTF_ROUTEINFO |
1970 				  RTF_UP | RTF_PREF(pref),
1971 		.fc_nlinfo.portid = 0,
1972 		.fc_nlinfo.nlh = NULL,
1973 		.fc_nlinfo.nl_net = net,
1974 	};
1975 
1976 	cfg.fc_dst = *prefix;
1977 	cfg.fc_gateway = *gwaddr;
1978 
1979 	/* We should treat it as a default route if prefix length is 0. */
1980 	if (!prefixlen)
1981 		cfg.fc_flags |= RTF_DEFAULT;
1982 
1983 	ip6_route_add(&cfg);
1984 
1985 	return rt6_get_route_info(net, prefix, prefixlen, gwaddr, ifindex);
1986 }
1987 #endif
1988 
1989 struct rt6_info *rt6_get_dflt_router(const struct in6_addr *addr, struct net_device *dev)
1990 {
1991 	struct rt6_info *rt;
1992 	struct fib6_table *table;
1993 
1994 	table = fib6_get_table(dev_net(dev), RT6_TABLE_DFLT);
1995 	if (!table)
1996 		return NULL;
1997 
1998 	read_lock_bh(&table->tb6_lock);
1999 	for (rt = table->tb6_root.leaf; rt; rt=rt->dst.rt6_next) {
2000 		if (dev == rt->dst.dev &&
2001 		    ((rt->rt6i_flags & (RTF_ADDRCONF | RTF_DEFAULT)) == (RTF_ADDRCONF | RTF_DEFAULT)) &&
2002 		    ipv6_addr_equal(&rt->rt6i_gateway, addr))
2003 			break;
2004 	}
2005 	if (rt)
2006 		dst_hold(&rt->dst);
2007 	read_unlock_bh(&table->tb6_lock);
2008 	return rt;
2009 }
2010 
2011 struct rt6_info *rt6_add_dflt_router(const struct in6_addr *gwaddr,
2012 				     struct net_device *dev,
2013 				     unsigned int pref)
2014 {
2015 	struct fib6_config cfg = {
2016 		.fc_table	= RT6_TABLE_DFLT,
2017 		.fc_metric	= IP6_RT_PRIO_USER,
2018 		.fc_ifindex	= dev->ifindex,
2019 		.fc_flags	= RTF_GATEWAY | RTF_ADDRCONF | RTF_DEFAULT |
2020 				  RTF_UP | RTF_EXPIRES | RTF_PREF(pref),
2021 		.fc_nlinfo.portid = 0,
2022 		.fc_nlinfo.nlh = NULL,
2023 		.fc_nlinfo.nl_net = dev_net(dev),
2024 	};
2025 
2026 	cfg.fc_gateway = *gwaddr;
2027 
2028 	ip6_route_add(&cfg);
2029 
2030 	return rt6_get_dflt_router(gwaddr, dev);
2031 }
2032 
2033 void rt6_purge_dflt_routers(struct net *net)
2034 {
2035 	struct rt6_info *rt;
2036 	struct fib6_table *table;
2037 
2038 	/* NOTE: Keep consistent with rt6_get_dflt_router */
2039 	table = fib6_get_table(net, RT6_TABLE_DFLT);
2040 	if (!table)
2041 		return;
2042 
2043 restart:
2044 	read_lock_bh(&table->tb6_lock);
2045 	for (rt = table->tb6_root.leaf; rt; rt = rt->dst.rt6_next) {
2046 		if (rt->rt6i_flags & (RTF_DEFAULT | RTF_ADDRCONF) &&
2047 		    (!rt->rt6i_idev || rt->rt6i_idev->cnf.accept_ra != 2)) {
2048 			dst_hold(&rt->dst);
2049 			read_unlock_bh(&table->tb6_lock);
2050 			ip6_del_rt(rt);
2051 			goto restart;
2052 		}
2053 	}
2054 	read_unlock_bh(&table->tb6_lock);
2055 }
2056 
2057 static void rtmsg_to_fib6_config(struct net *net,
2058 				 struct in6_rtmsg *rtmsg,
2059 				 struct fib6_config *cfg)
2060 {
2061 	memset(cfg, 0, sizeof(*cfg));
2062 
2063 	cfg->fc_table = RT6_TABLE_MAIN;
2064 	cfg->fc_ifindex = rtmsg->rtmsg_ifindex;
2065 	cfg->fc_metric = rtmsg->rtmsg_metric;
2066 	cfg->fc_expires = rtmsg->rtmsg_info;
2067 	cfg->fc_dst_len = rtmsg->rtmsg_dst_len;
2068 	cfg->fc_src_len = rtmsg->rtmsg_src_len;
2069 	cfg->fc_flags = rtmsg->rtmsg_flags;
2070 
2071 	cfg->fc_nlinfo.nl_net = net;
2072 
2073 	cfg->fc_dst = rtmsg->rtmsg_dst;
2074 	cfg->fc_src = rtmsg->rtmsg_src;
2075 	cfg->fc_gateway = rtmsg->rtmsg_gateway;
2076 }
2077 
2078 int ipv6_route_ioctl(struct net *net, unsigned int cmd, void __user *arg)
2079 {
2080 	struct fib6_config cfg;
2081 	struct in6_rtmsg rtmsg;
2082 	int err;
2083 
2084 	switch(cmd) {
2085 	case SIOCADDRT:		/* Add a route */
2086 	case SIOCDELRT:		/* Delete a route */
2087 		if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
2088 			return -EPERM;
2089 		err = copy_from_user(&rtmsg, arg,
2090 				     sizeof(struct in6_rtmsg));
2091 		if (err)
2092 			return -EFAULT;
2093 
2094 		rtmsg_to_fib6_config(net, &rtmsg, &cfg);
2095 
2096 		rtnl_lock();
2097 		switch (cmd) {
2098 		case SIOCADDRT:
2099 			err = ip6_route_add(&cfg);
2100 			break;
2101 		case SIOCDELRT:
2102 			err = ip6_route_del(&cfg);
2103 			break;
2104 		default:
2105 			err = -EINVAL;
2106 		}
2107 		rtnl_unlock();
2108 
2109 		return err;
2110 	}
2111 
2112 	return -EINVAL;
2113 }
2114 
2115 /*
2116  *	Drop the packet on the floor
2117  */
2118 
2119 static int ip6_pkt_drop(struct sk_buff *skb, u8 code, int ipstats_mib_noroutes)
2120 {
2121 	int type;
2122 	struct dst_entry *dst = skb_dst(skb);
2123 	switch (ipstats_mib_noroutes) {
2124 	case IPSTATS_MIB_INNOROUTES:
2125 		type = ipv6_addr_type(&ipv6_hdr(skb)->daddr);
2126 		if (type == IPV6_ADDR_ANY) {
2127 			IP6_INC_STATS(dev_net(dst->dev), ip6_dst_idev(dst),
2128 				      IPSTATS_MIB_INADDRERRORS);
2129 			break;
2130 		}
2131 		/* FALLTHROUGH */
2132 	case IPSTATS_MIB_OUTNOROUTES:
2133 		IP6_INC_STATS(dev_net(dst->dev), ip6_dst_idev(dst),
2134 			      ipstats_mib_noroutes);
2135 		break;
2136 	}
2137 	icmpv6_send(skb, ICMPV6_DEST_UNREACH, code, 0);
2138 	kfree_skb(skb);
2139 	return 0;
2140 }
2141 
2142 static int ip6_pkt_discard(struct sk_buff *skb)
2143 {
2144 	return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_INNOROUTES);
2145 }
2146 
2147 static int ip6_pkt_discard_out(struct sk_buff *skb)
2148 {
2149 	skb->dev = skb_dst(skb)->dev;
2150 	return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_OUTNOROUTES);
2151 }
2152 
2153 static int ip6_pkt_prohibit(struct sk_buff *skb)
2154 {
2155 	return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_INNOROUTES);
2156 }
2157 
2158 static int ip6_pkt_prohibit_out(struct sk_buff *skb)
2159 {
2160 	skb->dev = skb_dst(skb)->dev;
2161 	return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_OUTNOROUTES);
2162 }
2163 
2164 /*
2165  *	Allocate a dst for local (unicast / anycast) address.
2166  */
2167 
2168 struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev,
2169 				    const struct in6_addr *addr,
2170 				    bool anycast)
2171 {
2172 	struct net *net = dev_net(idev->dev);
2173 	struct rt6_info *rt = ip6_dst_alloc(net, net->loopback_dev, 0, NULL);
2174 
2175 	if (!rt) {
2176 		net_warn_ratelimited("Maximum number of routes reached, consider increasing route/max_size\n");
2177 		return ERR_PTR(-ENOMEM);
2178 	}
2179 
2180 	in6_dev_hold(idev);
2181 
2182 	rt->dst.flags |= DST_HOST;
2183 	rt->dst.input = ip6_input;
2184 	rt->dst.output = ip6_output;
2185 	rt->rt6i_idev = idev;
2186 
2187 	rt->rt6i_flags = RTF_UP | RTF_NONEXTHOP;
2188 	if (anycast)
2189 		rt->rt6i_flags |= RTF_ANYCAST;
2190 	else
2191 		rt->rt6i_flags |= RTF_LOCAL;
2192 
2193 	rt->rt6i_gateway  = *addr;
2194 	rt->rt6i_dst.addr = *addr;
2195 	rt->rt6i_dst.plen = 128;
2196 	rt->rt6i_table = fib6_get_table(net, RT6_TABLE_LOCAL);
2197 
2198 	atomic_set(&rt->dst.__refcnt, 1);
2199 
2200 	return rt;
2201 }
2202 
2203 int ip6_route_get_saddr(struct net *net,
2204 			struct rt6_info *rt,
2205 			const struct in6_addr *daddr,
2206 			unsigned int prefs,
2207 			struct in6_addr *saddr)
2208 {
2209 	struct inet6_dev *idev = ip6_dst_idev((struct dst_entry*)rt);
2210 	int err = 0;
2211 	if (rt->rt6i_prefsrc.plen)
2212 		*saddr = rt->rt6i_prefsrc.addr;
2213 	else
2214 		err = ipv6_dev_get_saddr(net, idev ? idev->dev : NULL,
2215 					 daddr, prefs, saddr);
2216 	return err;
2217 }
2218 
2219 /* remove deleted ip from prefsrc entries */
2220 struct arg_dev_net_ip {
2221 	struct net_device *dev;
2222 	struct net *net;
2223 	struct in6_addr *addr;
2224 };
2225 
2226 static int fib6_remove_prefsrc(struct rt6_info *rt, void *arg)
2227 {
2228 	struct net_device *dev = ((struct arg_dev_net_ip *)arg)->dev;
2229 	struct net *net = ((struct arg_dev_net_ip *)arg)->net;
2230 	struct in6_addr *addr = ((struct arg_dev_net_ip *)arg)->addr;
2231 
2232 	if (((void *)rt->dst.dev == dev || !dev) &&
2233 	    rt != net->ipv6.ip6_null_entry &&
2234 	    ipv6_addr_equal(addr, &rt->rt6i_prefsrc.addr)) {
2235 		/* remove prefsrc entry */
2236 		rt->rt6i_prefsrc.plen = 0;
2237 	}
2238 	return 0;
2239 }
2240 
2241 void rt6_remove_prefsrc(struct inet6_ifaddr *ifp)
2242 {
2243 	struct net *net = dev_net(ifp->idev->dev);
2244 	struct arg_dev_net_ip adni = {
2245 		.dev = ifp->idev->dev,
2246 		.net = net,
2247 		.addr = &ifp->addr,
2248 	};
2249 	fib6_clean_all(net, fib6_remove_prefsrc, 0, &adni);
2250 }
2251 
2252 struct arg_dev_net {
2253 	struct net_device *dev;
2254 	struct net *net;
2255 };
2256 
2257 static int fib6_ifdown(struct rt6_info *rt, void *arg)
2258 {
2259 	const struct arg_dev_net *adn = arg;
2260 	const struct net_device *dev = adn->dev;
2261 
2262 	if ((rt->dst.dev == dev || !dev) &&
2263 	    rt != adn->net->ipv6.ip6_null_entry)
2264 		return -1;
2265 
2266 	return 0;
2267 }
2268 
2269 void rt6_ifdown(struct net *net, struct net_device *dev)
2270 {
2271 	struct arg_dev_net adn = {
2272 		.dev = dev,
2273 		.net = net,
2274 	};
2275 
2276 	fib6_clean_all(net, fib6_ifdown, 0, &adn);
2277 	icmp6_clean_all(fib6_ifdown, &adn);
2278 }
2279 
2280 struct rt6_mtu_change_arg {
2281 	struct net_device *dev;
2282 	unsigned int mtu;
2283 };
2284 
2285 static int rt6_mtu_change_route(struct rt6_info *rt, void *p_arg)
2286 {
2287 	struct rt6_mtu_change_arg *arg = (struct rt6_mtu_change_arg *) p_arg;
2288 	struct inet6_dev *idev;
2289 
2290 	/* In IPv6 pmtu discovery is not optional,
2291 	   so that RTAX_MTU lock cannot disable it.
2292 	   We still use this lock to block changes
2293 	   caused by addrconf/ndisc.
2294 	*/
2295 
2296 	idev = __in6_dev_get(arg->dev);
2297 	if (!idev)
2298 		return 0;
2299 
2300 	/* For administrative MTU increase, there is no way to discover
2301 	   IPv6 PMTU increase, so PMTU increase should be updated here.
2302 	   Since RFC 1981 doesn't include administrative MTU increase
2303 	   update PMTU increase is a MUST. (i.e. jumbo frame)
2304 	 */
2305 	/*
2306 	   If new MTU is less than route PMTU, this new MTU will be the
2307 	   lowest MTU in the path, update the route PMTU to reflect PMTU
2308 	   decreases; if new MTU is greater than route PMTU, and the
2309 	   old MTU is the lowest MTU in the path, update the route PMTU
2310 	   to reflect the increase. In this case if the other nodes' MTU
2311 	   also have the lowest MTU, TOO BIG MESSAGE will be lead to
2312 	   PMTU discouvery.
2313 	 */
2314 	if (rt->dst.dev == arg->dev &&
2315 	    !dst_metric_locked(&rt->dst, RTAX_MTU) &&
2316 	    (dst_mtu(&rt->dst) >= arg->mtu ||
2317 	     (dst_mtu(&rt->dst) < arg->mtu &&
2318 	      dst_mtu(&rt->dst) == idev->cnf.mtu6))) {
2319 		dst_metric_set(&rt->dst, RTAX_MTU, arg->mtu);
2320 	}
2321 	return 0;
2322 }
2323 
2324 void rt6_mtu_change(struct net_device *dev, unsigned int mtu)
2325 {
2326 	struct rt6_mtu_change_arg arg = {
2327 		.dev = dev,
2328 		.mtu = mtu,
2329 	};
2330 
2331 	fib6_clean_all(dev_net(dev), rt6_mtu_change_route, 0, &arg);
2332 }
2333 
2334 static const struct nla_policy rtm_ipv6_policy[RTA_MAX+1] = {
2335 	[RTA_GATEWAY]           = { .len = sizeof(struct in6_addr) },
2336 	[RTA_OIF]               = { .type = NLA_U32 },
2337 	[RTA_IIF]		= { .type = NLA_U32 },
2338 	[RTA_PRIORITY]          = { .type = NLA_U32 },
2339 	[RTA_METRICS]           = { .type = NLA_NESTED },
2340 	[RTA_MULTIPATH]		= { .len = sizeof(struct rtnexthop) },
2341 };
2342 
2343 static int rtm_to_fib6_config(struct sk_buff *skb, struct nlmsghdr *nlh,
2344 			      struct fib6_config *cfg)
2345 {
2346 	struct rtmsg *rtm;
2347 	struct nlattr *tb[RTA_MAX+1];
2348 	int err;
2349 
2350 	err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy);
2351 	if (err < 0)
2352 		goto errout;
2353 
2354 	err = -EINVAL;
2355 	rtm = nlmsg_data(nlh);
2356 	memset(cfg, 0, sizeof(*cfg));
2357 
2358 	cfg->fc_table = rtm->rtm_table;
2359 	cfg->fc_dst_len = rtm->rtm_dst_len;
2360 	cfg->fc_src_len = rtm->rtm_src_len;
2361 	cfg->fc_flags = RTF_UP;
2362 	cfg->fc_protocol = rtm->rtm_protocol;
2363 	cfg->fc_type = rtm->rtm_type;
2364 
2365 	if (rtm->rtm_type == RTN_UNREACHABLE ||
2366 	    rtm->rtm_type == RTN_BLACKHOLE ||
2367 	    rtm->rtm_type == RTN_PROHIBIT ||
2368 	    rtm->rtm_type == RTN_THROW)
2369 		cfg->fc_flags |= RTF_REJECT;
2370 
2371 	if (rtm->rtm_type == RTN_LOCAL)
2372 		cfg->fc_flags |= RTF_LOCAL;
2373 
2374 	cfg->fc_nlinfo.portid = NETLINK_CB(skb).portid;
2375 	cfg->fc_nlinfo.nlh = nlh;
2376 	cfg->fc_nlinfo.nl_net = sock_net(skb->sk);
2377 
2378 	if (tb[RTA_GATEWAY]) {
2379 		nla_memcpy(&cfg->fc_gateway, tb[RTA_GATEWAY], 16);
2380 		cfg->fc_flags |= RTF_GATEWAY;
2381 	}
2382 
2383 	if (tb[RTA_DST]) {
2384 		int plen = (rtm->rtm_dst_len + 7) >> 3;
2385 
2386 		if (nla_len(tb[RTA_DST]) < plen)
2387 			goto errout;
2388 
2389 		nla_memcpy(&cfg->fc_dst, tb[RTA_DST], plen);
2390 	}
2391 
2392 	if (tb[RTA_SRC]) {
2393 		int plen = (rtm->rtm_src_len + 7) >> 3;
2394 
2395 		if (nla_len(tb[RTA_SRC]) < plen)
2396 			goto errout;
2397 
2398 		nla_memcpy(&cfg->fc_src, tb[RTA_SRC], plen);
2399 	}
2400 
2401 	if (tb[RTA_PREFSRC])
2402 		nla_memcpy(&cfg->fc_prefsrc, tb[RTA_PREFSRC], 16);
2403 
2404 	if (tb[RTA_OIF])
2405 		cfg->fc_ifindex = nla_get_u32(tb[RTA_OIF]);
2406 
2407 	if (tb[RTA_PRIORITY])
2408 		cfg->fc_metric = nla_get_u32(tb[RTA_PRIORITY]);
2409 
2410 	if (tb[RTA_METRICS]) {
2411 		cfg->fc_mx = nla_data(tb[RTA_METRICS]);
2412 		cfg->fc_mx_len = nla_len(tb[RTA_METRICS]);
2413 	}
2414 
2415 	if (tb[RTA_TABLE])
2416 		cfg->fc_table = nla_get_u32(tb[RTA_TABLE]);
2417 
2418 	if (tb[RTA_MULTIPATH]) {
2419 		cfg->fc_mp = nla_data(tb[RTA_MULTIPATH]);
2420 		cfg->fc_mp_len = nla_len(tb[RTA_MULTIPATH]);
2421 	}
2422 
2423 	err = 0;
2424 errout:
2425 	return err;
2426 }
2427 
2428 static int ip6_route_multipath(struct fib6_config *cfg, int add)
2429 {
2430 	struct fib6_config r_cfg;
2431 	struct rtnexthop *rtnh;
2432 	int remaining;
2433 	int attrlen;
2434 	int err = 0, last_err = 0;
2435 
2436 beginning:
2437 	rtnh = (struct rtnexthop *)cfg->fc_mp;
2438 	remaining = cfg->fc_mp_len;
2439 
2440 	/* Parse a Multipath Entry */
2441 	while (rtnh_ok(rtnh, remaining)) {
2442 		memcpy(&r_cfg, cfg, sizeof(*cfg));
2443 		if (rtnh->rtnh_ifindex)
2444 			r_cfg.fc_ifindex = rtnh->rtnh_ifindex;
2445 
2446 		attrlen = rtnh_attrlen(rtnh);
2447 		if (attrlen > 0) {
2448 			struct nlattr *nla, *attrs = rtnh_attrs(rtnh);
2449 
2450 			nla = nla_find(attrs, attrlen, RTA_GATEWAY);
2451 			if (nla) {
2452 				nla_memcpy(&r_cfg.fc_gateway, nla, 16);
2453 				r_cfg.fc_flags |= RTF_GATEWAY;
2454 			}
2455 		}
2456 		err = add ? ip6_route_add(&r_cfg) : ip6_route_del(&r_cfg);
2457 		if (err) {
2458 			last_err = err;
2459 			/* If we are trying to remove a route, do not stop the
2460 			 * loop when ip6_route_del() fails (because next hop is
2461 			 * already gone), we should try to remove all next hops.
2462 			 */
2463 			if (add) {
2464 				/* If add fails, we should try to delete all
2465 				 * next hops that have been already added.
2466 				 */
2467 				add = 0;
2468 				goto beginning;
2469 			}
2470 		}
2471 		/* Because each route is added like a single route we remove
2472 		 * this flag after the first nexthop (if there is a collision,
2473 		 * we have already fail to add the first nexthop:
2474 		 * fib6_add_rt2node() has reject it).
2475 		 */
2476 		cfg->fc_nlinfo.nlh->nlmsg_flags &= ~NLM_F_EXCL;
2477 		rtnh = rtnh_next(rtnh, &remaining);
2478 	}
2479 
2480 	return last_err;
2481 }
2482 
2483 static int inet6_rtm_delroute(struct sk_buff *skb, struct nlmsghdr* nlh)
2484 {
2485 	struct fib6_config cfg;
2486 	int err;
2487 
2488 	err = rtm_to_fib6_config(skb, nlh, &cfg);
2489 	if (err < 0)
2490 		return err;
2491 
2492 	if (cfg.fc_mp)
2493 		return ip6_route_multipath(&cfg, 0);
2494 	else
2495 		return ip6_route_del(&cfg);
2496 }
2497 
2498 static int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr* nlh)
2499 {
2500 	struct fib6_config cfg;
2501 	int err;
2502 
2503 	err = rtm_to_fib6_config(skb, nlh, &cfg);
2504 	if (err < 0)
2505 		return err;
2506 
2507 	if (cfg.fc_mp)
2508 		return ip6_route_multipath(&cfg, 1);
2509 	else
2510 		return ip6_route_add(&cfg);
2511 }
2512 
2513 static inline size_t rt6_nlmsg_size(void)
2514 {
2515 	return NLMSG_ALIGN(sizeof(struct rtmsg))
2516 	       + nla_total_size(16) /* RTA_SRC */
2517 	       + nla_total_size(16) /* RTA_DST */
2518 	       + nla_total_size(16) /* RTA_GATEWAY */
2519 	       + nla_total_size(16) /* RTA_PREFSRC */
2520 	       + nla_total_size(4) /* RTA_TABLE */
2521 	       + nla_total_size(4) /* RTA_IIF */
2522 	       + nla_total_size(4) /* RTA_OIF */
2523 	       + nla_total_size(4) /* RTA_PRIORITY */
2524 	       + RTAX_MAX * nla_total_size(4) /* RTA_METRICS */
2525 	       + nla_total_size(sizeof(struct rta_cacheinfo));
2526 }
2527 
2528 static int rt6_fill_node(struct net *net,
2529 			 struct sk_buff *skb, struct rt6_info *rt,
2530 			 struct in6_addr *dst, struct in6_addr *src,
2531 			 int iif, int type, u32 portid, u32 seq,
2532 			 int prefix, int nowait, unsigned int flags)
2533 {
2534 	struct rtmsg *rtm;
2535 	struct nlmsghdr *nlh;
2536 	long expires;
2537 	u32 table;
2538 
2539 	if (prefix) {	/* user wants prefix routes only */
2540 		if (!(rt->rt6i_flags & RTF_PREFIX_RT)) {
2541 			/* success since this is not a prefix route */
2542 			return 1;
2543 		}
2544 	}
2545 
2546 	nlh = nlmsg_put(skb, portid, seq, type, sizeof(*rtm), flags);
2547 	if (!nlh)
2548 		return -EMSGSIZE;
2549 
2550 	rtm = nlmsg_data(nlh);
2551 	rtm->rtm_family = AF_INET6;
2552 	rtm->rtm_dst_len = rt->rt6i_dst.plen;
2553 	rtm->rtm_src_len = rt->rt6i_src.plen;
2554 	rtm->rtm_tos = 0;
2555 	if (rt->rt6i_table)
2556 		table = rt->rt6i_table->tb6_id;
2557 	else
2558 		table = RT6_TABLE_UNSPEC;
2559 	rtm->rtm_table = table;
2560 	if (nla_put_u32(skb, RTA_TABLE, table))
2561 		goto nla_put_failure;
2562 	if (rt->rt6i_flags & RTF_REJECT) {
2563 		switch (rt->dst.error) {
2564 		case -EINVAL:
2565 			rtm->rtm_type = RTN_BLACKHOLE;
2566 			break;
2567 		case -EACCES:
2568 			rtm->rtm_type = RTN_PROHIBIT;
2569 			break;
2570 		case -EAGAIN:
2571 			rtm->rtm_type = RTN_THROW;
2572 			break;
2573 		default:
2574 			rtm->rtm_type = RTN_UNREACHABLE;
2575 			break;
2576 		}
2577 	}
2578 	else if (rt->rt6i_flags & RTF_LOCAL)
2579 		rtm->rtm_type = RTN_LOCAL;
2580 	else if (rt->dst.dev && (rt->dst.dev->flags & IFF_LOOPBACK))
2581 		rtm->rtm_type = RTN_LOCAL;
2582 	else
2583 		rtm->rtm_type = RTN_UNICAST;
2584 	rtm->rtm_flags = 0;
2585 	rtm->rtm_scope = RT_SCOPE_UNIVERSE;
2586 	rtm->rtm_protocol = rt->rt6i_protocol;
2587 	if (rt->rt6i_flags & RTF_DYNAMIC)
2588 		rtm->rtm_protocol = RTPROT_REDIRECT;
2589 	else if (rt->rt6i_flags & RTF_ADDRCONF) {
2590 		if (rt->rt6i_flags & (RTF_DEFAULT | RTF_ROUTEINFO))
2591 			rtm->rtm_protocol = RTPROT_RA;
2592 		else
2593 			rtm->rtm_protocol = RTPROT_KERNEL;
2594 	}
2595 
2596 	if (rt->rt6i_flags & RTF_CACHE)
2597 		rtm->rtm_flags |= RTM_F_CLONED;
2598 
2599 	if (dst) {
2600 		if (nla_put(skb, RTA_DST, 16, dst))
2601 			goto nla_put_failure;
2602 		rtm->rtm_dst_len = 128;
2603 	} else if (rtm->rtm_dst_len)
2604 		if (nla_put(skb, RTA_DST, 16, &rt->rt6i_dst.addr))
2605 			goto nla_put_failure;
2606 #ifdef CONFIG_IPV6_SUBTREES
2607 	if (src) {
2608 		if (nla_put(skb, RTA_SRC, 16, src))
2609 			goto nla_put_failure;
2610 		rtm->rtm_src_len = 128;
2611 	} else if (rtm->rtm_src_len &&
2612 		   nla_put(skb, RTA_SRC, 16, &rt->rt6i_src.addr))
2613 		goto nla_put_failure;
2614 #endif
2615 	if (iif) {
2616 #ifdef CONFIG_IPV6_MROUTE
2617 		if (ipv6_addr_is_multicast(&rt->rt6i_dst.addr)) {
2618 			int err = ip6mr_get_route(net, skb, rtm, nowait);
2619 			if (err <= 0) {
2620 				if (!nowait) {
2621 					if (err == 0)
2622 						return 0;
2623 					goto nla_put_failure;
2624 				} else {
2625 					if (err == -EMSGSIZE)
2626 						goto nla_put_failure;
2627 				}
2628 			}
2629 		} else
2630 #endif
2631 			if (nla_put_u32(skb, RTA_IIF, iif))
2632 				goto nla_put_failure;
2633 	} else if (dst) {
2634 		struct in6_addr saddr_buf;
2635 		if (ip6_route_get_saddr(net, rt, dst, 0, &saddr_buf) == 0 &&
2636 		    nla_put(skb, RTA_PREFSRC, 16, &saddr_buf))
2637 			goto nla_put_failure;
2638 	}
2639 
2640 	if (rt->rt6i_prefsrc.plen) {
2641 		struct in6_addr saddr_buf;
2642 		saddr_buf = rt->rt6i_prefsrc.addr;
2643 		if (nla_put(skb, RTA_PREFSRC, 16, &saddr_buf))
2644 			goto nla_put_failure;
2645 	}
2646 
2647 	if (rtnetlink_put_metrics(skb, dst_metrics_ptr(&rt->dst)) < 0)
2648 		goto nla_put_failure;
2649 
2650 	if (rt->rt6i_flags & RTF_GATEWAY) {
2651 		if (nla_put(skb, RTA_GATEWAY, 16, &rt->rt6i_gateway) < 0)
2652 			goto nla_put_failure;
2653 	}
2654 
2655 	if (rt->dst.dev &&
2656 	    nla_put_u32(skb, RTA_OIF, rt->dst.dev->ifindex))
2657 		goto nla_put_failure;
2658 	if (nla_put_u32(skb, RTA_PRIORITY, rt->rt6i_metric))
2659 		goto nla_put_failure;
2660 
2661 	expires = (rt->rt6i_flags & RTF_EXPIRES) ? rt->dst.expires - jiffies : 0;
2662 
2663 	if (rtnl_put_cacheinfo(skb, &rt->dst, 0, expires, rt->dst.error) < 0)
2664 		goto nla_put_failure;
2665 
2666 	return nlmsg_end(skb, nlh);
2667 
2668 nla_put_failure:
2669 	nlmsg_cancel(skb, nlh);
2670 	return -EMSGSIZE;
2671 }
2672 
2673 int rt6_dump_route(struct rt6_info *rt, void *p_arg)
2674 {
2675 	struct rt6_rtnl_dump_arg *arg = (struct rt6_rtnl_dump_arg *) p_arg;
2676 	int prefix;
2677 
2678 	if (nlmsg_len(arg->cb->nlh) >= sizeof(struct rtmsg)) {
2679 		struct rtmsg *rtm = nlmsg_data(arg->cb->nlh);
2680 		prefix = (rtm->rtm_flags & RTM_F_PREFIX) != 0;
2681 	} else
2682 		prefix = 0;
2683 
2684 	return rt6_fill_node(arg->net,
2685 		     arg->skb, rt, NULL, NULL, 0, RTM_NEWROUTE,
2686 		     NETLINK_CB(arg->cb->skb).portid, arg->cb->nlh->nlmsg_seq,
2687 		     prefix, 0, NLM_F_MULTI);
2688 }
2689 
2690 static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh)
2691 {
2692 	struct net *net = sock_net(in_skb->sk);
2693 	struct nlattr *tb[RTA_MAX+1];
2694 	struct rt6_info *rt;
2695 	struct sk_buff *skb;
2696 	struct rtmsg *rtm;
2697 	struct flowi6 fl6;
2698 	int err, iif = 0, oif = 0;
2699 
2700 	err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy);
2701 	if (err < 0)
2702 		goto errout;
2703 
2704 	err = -EINVAL;
2705 	memset(&fl6, 0, sizeof(fl6));
2706 
2707 	if (tb[RTA_SRC]) {
2708 		if (nla_len(tb[RTA_SRC]) < sizeof(struct in6_addr))
2709 			goto errout;
2710 
2711 		fl6.saddr = *(struct in6_addr *)nla_data(tb[RTA_SRC]);
2712 	}
2713 
2714 	if (tb[RTA_DST]) {
2715 		if (nla_len(tb[RTA_DST]) < sizeof(struct in6_addr))
2716 			goto errout;
2717 
2718 		fl6.daddr = *(struct in6_addr *)nla_data(tb[RTA_DST]);
2719 	}
2720 
2721 	if (tb[RTA_IIF])
2722 		iif = nla_get_u32(tb[RTA_IIF]);
2723 
2724 	if (tb[RTA_OIF])
2725 		oif = nla_get_u32(tb[RTA_OIF]);
2726 
2727 	if (iif) {
2728 		struct net_device *dev;
2729 		int flags = 0;
2730 
2731 		dev = __dev_get_by_index(net, iif);
2732 		if (!dev) {
2733 			err = -ENODEV;
2734 			goto errout;
2735 		}
2736 
2737 		fl6.flowi6_iif = iif;
2738 
2739 		if (!ipv6_addr_any(&fl6.saddr))
2740 			flags |= RT6_LOOKUP_F_HAS_SADDR;
2741 
2742 		rt = (struct rt6_info *)ip6_route_input_lookup(net, dev, &fl6,
2743 							       flags);
2744 	} else {
2745 		fl6.flowi6_oif = oif;
2746 
2747 		rt = (struct rt6_info *)ip6_route_output(net, NULL, &fl6);
2748 	}
2749 
2750 	skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
2751 	if (!skb) {
2752 		ip6_rt_put(rt);
2753 		err = -ENOBUFS;
2754 		goto errout;
2755 	}
2756 
2757 	/* Reserve room for dummy headers, this skb can pass
2758 	   through good chunk of routing engine.
2759 	 */
2760 	skb_reset_mac_header(skb);
2761 	skb_reserve(skb, MAX_HEADER + sizeof(struct ipv6hdr));
2762 
2763 	skb_dst_set(skb, &rt->dst);
2764 
2765 	err = rt6_fill_node(net, skb, rt, &fl6.daddr, &fl6.saddr, iif,
2766 			    RTM_NEWROUTE, NETLINK_CB(in_skb).portid,
2767 			    nlh->nlmsg_seq, 0, 0, 0);
2768 	if (err < 0) {
2769 		kfree_skb(skb);
2770 		goto errout;
2771 	}
2772 
2773 	err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).portid);
2774 errout:
2775 	return err;
2776 }
2777 
2778 void inet6_rt_notify(int event, struct rt6_info *rt, struct nl_info *info)
2779 {
2780 	struct sk_buff *skb;
2781 	struct net *net = info->nl_net;
2782 	u32 seq;
2783 	int err;
2784 
2785 	err = -ENOBUFS;
2786 	seq = info->nlh ? info->nlh->nlmsg_seq : 0;
2787 
2788 	skb = nlmsg_new(rt6_nlmsg_size(), gfp_any());
2789 	if (!skb)
2790 		goto errout;
2791 
2792 	err = rt6_fill_node(net, skb, rt, NULL, NULL, 0,
2793 				event, info->portid, seq, 0, 0, 0);
2794 	if (err < 0) {
2795 		/* -EMSGSIZE implies BUG in rt6_nlmsg_size() */
2796 		WARN_ON(err == -EMSGSIZE);
2797 		kfree_skb(skb);
2798 		goto errout;
2799 	}
2800 	rtnl_notify(skb, net, info->portid, RTNLGRP_IPV6_ROUTE,
2801 		    info->nlh, gfp_any());
2802 	return;
2803 errout:
2804 	if (err < 0)
2805 		rtnl_set_sk_err(net, RTNLGRP_IPV6_ROUTE, err);
2806 }
2807 
2808 static int ip6_route_dev_notify(struct notifier_block *this,
2809 				unsigned long event, void *ptr)
2810 {
2811 	struct net_device *dev = netdev_notifier_info_to_dev(ptr);
2812 	struct net *net = dev_net(dev);
2813 
2814 	if (event == NETDEV_REGISTER && (dev->flags & IFF_LOOPBACK)) {
2815 		net->ipv6.ip6_null_entry->dst.dev = dev;
2816 		net->ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(dev);
2817 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
2818 		net->ipv6.ip6_prohibit_entry->dst.dev = dev;
2819 		net->ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(dev);
2820 		net->ipv6.ip6_blk_hole_entry->dst.dev = dev;
2821 		net->ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(dev);
2822 #endif
2823 	}
2824 
2825 	return NOTIFY_OK;
2826 }
2827 
2828 /*
2829  *	/proc
2830  */
2831 
2832 #ifdef CONFIG_PROC_FS
2833 
2834 static const struct file_operations ipv6_route_proc_fops = {
2835 	.owner		= THIS_MODULE,
2836 	.open		= ipv6_route_open,
2837 	.read		= seq_read,
2838 	.llseek		= seq_lseek,
2839 	.release	= seq_release_net,
2840 };
2841 
2842 static int rt6_stats_seq_show(struct seq_file *seq, void *v)
2843 {
2844 	struct net *net = (struct net *)seq->private;
2845 	seq_printf(seq, "%04x %04x %04x %04x %04x %04x %04x\n",
2846 		   net->ipv6.rt6_stats->fib_nodes,
2847 		   net->ipv6.rt6_stats->fib_route_nodes,
2848 		   net->ipv6.rt6_stats->fib_rt_alloc,
2849 		   net->ipv6.rt6_stats->fib_rt_entries,
2850 		   net->ipv6.rt6_stats->fib_rt_cache,
2851 		   dst_entries_get_slow(&net->ipv6.ip6_dst_ops),
2852 		   net->ipv6.rt6_stats->fib_discarded_routes);
2853 
2854 	return 0;
2855 }
2856 
2857 static int rt6_stats_seq_open(struct inode *inode, struct file *file)
2858 {
2859 	return single_open_net(inode, file, rt6_stats_seq_show);
2860 }
2861 
2862 static const struct file_operations rt6_stats_seq_fops = {
2863 	.owner	 = THIS_MODULE,
2864 	.open	 = rt6_stats_seq_open,
2865 	.read	 = seq_read,
2866 	.llseek	 = seq_lseek,
2867 	.release = single_release_net,
2868 };
2869 #endif	/* CONFIG_PROC_FS */
2870 
2871 #ifdef CONFIG_SYSCTL
2872 
2873 static
2874 int ipv6_sysctl_rtcache_flush(struct ctl_table *ctl, int write,
2875 			      void __user *buffer, size_t *lenp, loff_t *ppos)
2876 {
2877 	struct net *net;
2878 	int delay;
2879 	if (!write)
2880 		return -EINVAL;
2881 
2882 	net = (struct net *)ctl->extra1;
2883 	delay = net->ipv6.sysctl.flush_delay;
2884 	proc_dointvec(ctl, write, buffer, lenp, ppos);
2885 	fib6_run_gc(delay <= 0 ? 0 : (unsigned long)delay, net, delay > 0);
2886 	return 0;
2887 }
2888 
2889 struct ctl_table ipv6_route_table_template[] = {
2890 	{
2891 		.procname	=	"flush",
2892 		.data		=	&init_net.ipv6.sysctl.flush_delay,
2893 		.maxlen		=	sizeof(int),
2894 		.mode		=	0200,
2895 		.proc_handler	=	ipv6_sysctl_rtcache_flush
2896 	},
2897 	{
2898 		.procname	=	"gc_thresh",
2899 		.data		=	&ip6_dst_ops_template.gc_thresh,
2900 		.maxlen		=	sizeof(int),
2901 		.mode		=	0644,
2902 		.proc_handler	=	proc_dointvec,
2903 	},
2904 	{
2905 		.procname	=	"max_size",
2906 		.data		=	&init_net.ipv6.sysctl.ip6_rt_max_size,
2907 		.maxlen		=	sizeof(int),
2908 		.mode		=	0644,
2909 		.proc_handler	=	proc_dointvec,
2910 	},
2911 	{
2912 		.procname	=	"gc_min_interval",
2913 		.data		=	&init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
2914 		.maxlen		=	sizeof(int),
2915 		.mode		=	0644,
2916 		.proc_handler	=	proc_dointvec_jiffies,
2917 	},
2918 	{
2919 		.procname	=	"gc_timeout",
2920 		.data		=	&init_net.ipv6.sysctl.ip6_rt_gc_timeout,
2921 		.maxlen		=	sizeof(int),
2922 		.mode		=	0644,
2923 		.proc_handler	=	proc_dointvec_jiffies,
2924 	},
2925 	{
2926 		.procname	=	"gc_interval",
2927 		.data		=	&init_net.ipv6.sysctl.ip6_rt_gc_interval,
2928 		.maxlen		=	sizeof(int),
2929 		.mode		=	0644,
2930 		.proc_handler	=	proc_dointvec_jiffies,
2931 	},
2932 	{
2933 		.procname	=	"gc_elasticity",
2934 		.data		=	&init_net.ipv6.sysctl.ip6_rt_gc_elasticity,
2935 		.maxlen		=	sizeof(int),
2936 		.mode		=	0644,
2937 		.proc_handler	=	proc_dointvec,
2938 	},
2939 	{
2940 		.procname	=	"mtu_expires",
2941 		.data		=	&init_net.ipv6.sysctl.ip6_rt_mtu_expires,
2942 		.maxlen		=	sizeof(int),
2943 		.mode		=	0644,
2944 		.proc_handler	=	proc_dointvec_jiffies,
2945 	},
2946 	{
2947 		.procname	=	"min_adv_mss",
2948 		.data		=	&init_net.ipv6.sysctl.ip6_rt_min_advmss,
2949 		.maxlen		=	sizeof(int),
2950 		.mode		=	0644,
2951 		.proc_handler	=	proc_dointvec,
2952 	},
2953 	{
2954 		.procname	=	"gc_min_interval_ms",
2955 		.data		=	&init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
2956 		.maxlen		=	sizeof(int),
2957 		.mode		=	0644,
2958 		.proc_handler	=	proc_dointvec_ms_jiffies,
2959 	},
2960 	{ }
2961 };
2962 
2963 struct ctl_table * __net_init ipv6_route_sysctl_init(struct net *net)
2964 {
2965 	struct ctl_table *table;
2966 
2967 	table = kmemdup(ipv6_route_table_template,
2968 			sizeof(ipv6_route_table_template),
2969 			GFP_KERNEL);
2970 
2971 	if (table) {
2972 		table[0].data = &net->ipv6.sysctl.flush_delay;
2973 		table[0].extra1 = net;
2974 		table[1].data = &net->ipv6.ip6_dst_ops.gc_thresh;
2975 		table[2].data = &net->ipv6.sysctl.ip6_rt_max_size;
2976 		table[3].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
2977 		table[4].data = &net->ipv6.sysctl.ip6_rt_gc_timeout;
2978 		table[5].data = &net->ipv6.sysctl.ip6_rt_gc_interval;
2979 		table[6].data = &net->ipv6.sysctl.ip6_rt_gc_elasticity;
2980 		table[7].data = &net->ipv6.sysctl.ip6_rt_mtu_expires;
2981 		table[8].data = &net->ipv6.sysctl.ip6_rt_min_advmss;
2982 		table[9].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
2983 
2984 		/* Don't export sysctls to unprivileged users */
2985 		if (net->user_ns != &init_user_ns)
2986 			table[0].procname = NULL;
2987 	}
2988 
2989 	return table;
2990 }
2991 #endif
2992 
2993 static int __net_init ip6_route_net_init(struct net *net)
2994 {
2995 	int ret = -ENOMEM;
2996 
2997 	memcpy(&net->ipv6.ip6_dst_ops, &ip6_dst_ops_template,
2998 	       sizeof(net->ipv6.ip6_dst_ops));
2999 
3000 	if (dst_entries_init(&net->ipv6.ip6_dst_ops) < 0)
3001 		goto out_ip6_dst_ops;
3002 
3003 	net->ipv6.ip6_null_entry = kmemdup(&ip6_null_entry_template,
3004 					   sizeof(*net->ipv6.ip6_null_entry),
3005 					   GFP_KERNEL);
3006 	if (!net->ipv6.ip6_null_entry)
3007 		goto out_ip6_dst_entries;
3008 	net->ipv6.ip6_null_entry->dst.path =
3009 		(struct dst_entry *)net->ipv6.ip6_null_entry;
3010 	net->ipv6.ip6_null_entry->dst.ops = &net->ipv6.ip6_dst_ops;
3011 	dst_init_metrics(&net->ipv6.ip6_null_entry->dst,
3012 			 ip6_template_metrics, true);
3013 
3014 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
3015 	net->ipv6.ip6_prohibit_entry = kmemdup(&ip6_prohibit_entry_template,
3016 					       sizeof(*net->ipv6.ip6_prohibit_entry),
3017 					       GFP_KERNEL);
3018 	if (!net->ipv6.ip6_prohibit_entry)
3019 		goto out_ip6_null_entry;
3020 	net->ipv6.ip6_prohibit_entry->dst.path =
3021 		(struct dst_entry *)net->ipv6.ip6_prohibit_entry;
3022 	net->ipv6.ip6_prohibit_entry->dst.ops = &net->ipv6.ip6_dst_ops;
3023 	dst_init_metrics(&net->ipv6.ip6_prohibit_entry->dst,
3024 			 ip6_template_metrics, true);
3025 
3026 	net->ipv6.ip6_blk_hole_entry = kmemdup(&ip6_blk_hole_entry_template,
3027 					       sizeof(*net->ipv6.ip6_blk_hole_entry),
3028 					       GFP_KERNEL);
3029 	if (!net->ipv6.ip6_blk_hole_entry)
3030 		goto out_ip6_prohibit_entry;
3031 	net->ipv6.ip6_blk_hole_entry->dst.path =
3032 		(struct dst_entry *)net->ipv6.ip6_blk_hole_entry;
3033 	net->ipv6.ip6_blk_hole_entry->dst.ops = &net->ipv6.ip6_dst_ops;
3034 	dst_init_metrics(&net->ipv6.ip6_blk_hole_entry->dst,
3035 			 ip6_template_metrics, true);
3036 #endif
3037 
3038 	net->ipv6.sysctl.flush_delay = 0;
3039 	net->ipv6.sysctl.ip6_rt_max_size = 4096;
3040 	net->ipv6.sysctl.ip6_rt_gc_min_interval = HZ / 2;
3041 	net->ipv6.sysctl.ip6_rt_gc_timeout = 60*HZ;
3042 	net->ipv6.sysctl.ip6_rt_gc_interval = 30*HZ;
3043 	net->ipv6.sysctl.ip6_rt_gc_elasticity = 9;
3044 	net->ipv6.sysctl.ip6_rt_mtu_expires = 10*60*HZ;
3045 	net->ipv6.sysctl.ip6_rt_min_advmss = IPV6_MIN_MTU - 20 - 40;
3046 
3047 	net->ipv6.ip6_rt_gc_expire = 30*HZ;
3048 
3049 	ret = 0;
3050 out:
3051 	return ret;
3052 
3053 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
3054 out_ip6_prohibit_entry:
3055 	kfree(net->ipv6.ip6_prohibit_entry);
3056 out_ip6_null_entry:
3057 	kfree(net->ipv6.ip6_null_entry);
3058 #endif
3059 out_ip6_dst_entries:
3060 	dst_entries_destroy(&net->ipv6.ip6_dst_ops);
3061 out_ip6_dst_ops:
3062 	goto out;
3063 }
3064 
3065 static void __net_exit ip6_route_net_exit(struct net *net)
3066 {
3067 	kfree(net->ipv6.ip6_null_entry);
3068 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
3069 	kfree(net->ipv6.ip6_prohibit_entry);
3070 	kfree(net->ipv6.ip6_blk_hole_entry);
3071 #endif
3072 	dst_entries_destroy(&net->ipv6.ip6_dst_ops);
3073 }
3074 
3075 static int __net_init ip6_route_net_init_late(struct net *net)
3076 {
3077 #ifdef CONFIG_PROC_FS
3078 	proc_create("ipv6_route", 0, net->proc_net, &ipv6_route_proc_fops);
3079 	proc_create("rt6_stats", S_IRUGO, net->proc_net, &rt6_stats_seq_fops);
3080 #endif
3081 	return 0;
3082 }
3083 
3084 static void __net_exit ip6_route_net_exit_late(struct net *net)
3085 {
3086 #ifdef CONFIG_PROC_FS
3087 	remove_proc_entry("ipv6_route", net->proc_net);
3088 	remove_proc_entry("rt6_stats", net->proc_net);
3089 #endif
3090 }
3091 
3092 static struct pernet_operations ip6_route_net_ops = {
3093 	.init = ip6_route_net_init,
3094 	.exit = ip6_route_net_exit,
3095 };
3096 
3097 static int __net_init ipv6_inetpeer_init(struct net *net)
3098 {
3099 	struct inet_peer_base *bp = kmalloc(sizeof(*bp), GFP_KERNEL);
3100 
3101 	if (!bp)
3102 		return -ENOMEM;
3103 	inet_peer_base_init(bp);
3104 	net->ipv6.peers = bp;
3105 	return 0;
3106 }
3107 
3108 static void __net_exit ipv6_inetpeer_exit(struct net *net)
3109 {
3110 	struct inet_peer_base *bp = net->ipv6.peers;
3111 
3112 	net->ipv6.peers = NULL;
3113 	inetpeer_invalidate_tree(bp);
3114 	kfree(bp);
3115 }
3116 
3117 static struct pernet_operations ipv6_inetpeer_ops = {
3118 	.init	=	ipv6_inetpeer_init,
3119 	.exit	=	ipv6_inetpeer_exit,
3120 };
3121 
3122 static struct pernet_operations ip6_route_net_late_ops = {
3123 	.init = ip6_route_net_init_late,
3124 	.exit = ip6_route_net_exit_late,
3125 };
3126 
3127 static struct notifier_block ip6_route_dev_notifier = {
3128 	.notifier_call = ip6_route_dev_notify,
3129 	.priority = 0,
3130 };
3131 
3132 int __init ip6_route_init(void)
3133 {
3134 	int ret;
3135 
3136 	ret = -ENOMEM;
3137 	ip6_dst_ops_template.kmem_cachep =
3138 		kmem_cache_create("ip6_dst_cache", sizeof(struct rt6_info), 0,
3139 				  SLAB_HWCACHE_ALIGN, NULL);
3140 	if (!ip6_dst_ops_template.kmem_cachep)
3141 		goto out;
3142 
3143 	ret = dst_entries_init(&ip6_dst_blackhole_ops);
3144 	if (ret)
3145 		goto out_kmem_cache;
3146 
3147 	ret = register_pernet_subsys(&ipv6_inetpeer_ops);
3148 	if (ret)
3149 		goto out_dst_entries;
3150 
3151 	ret = register_pernet_subsys(&ip6_route_net_ops);
3152 	if (ret)
3153 		goto out_register_inetpeer;
3154 
3155 	ip6_dst_blackhole_ops.kmem_cachep = ip6_dst_ops_template.kmem_cachep;
3156 
3157 	/* Registering of the loopback is done before this portion of code,
3158 	 * the loopback reference in rt6_info will not be taken, do it
3159 	 * manually for init_net */
3160 	init_net.ipv6.ip6_null_entry->dst.dev = init_net.loopback_dev;
3161 	init_net.ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
3162   #ifdef CONFIG_IPV6_MULTIPLE_TABLES
3163 	init_net.ipv6.ip6_prohibit_entry->dst.dev = init_net.loopback_dev;
3164 	init_net.ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
3165 	init_net.ipv6.ip6_blk_hole_entry->dst.dev = init_net.loopback_dev;
3166 	init_net.ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
3167   #endif
3168 	ret = fib6_init();
3169 	if (ret)
3170 		goto out_register_subsys;
3171 
3172 	ret = xfrm6_init();
3173 	if (ret)
3174 		goto out_fib6_init;
3175 
3176 	ret = fib6_rules_init();
3177 	if (ret)
3178 		goto xfrm6_init;
3179 
3180 	ret = register_pernet_subsys(&ip6_route_net_late_ops);
3181 	if (ret)
3182 		goto fib6_rules_init;
3183 
3184 	ret = -ENOBUFS;
3185 	if (__rtnl_register(PF_INET6, RTM_NEWROUTE, inet6_rtm_newroute, NULL, NULL) ||
3186 	    __rtnl_register(PF_INET6, RTM_DELROUTE, inet6_rtm_delroute, NULL, NULL) ||
3187 	    __rtnl_register(PF_INET6, RTM_GETROUTE, inet6_rtm_getroute, NULL, NULL))
3188 		goto out_register_late_subsys;
3189 
3190 	ret = register_netdevice_notifier(&ip6_route_dev_notifier);
3191 	if (ret)
3192 		goto out_register_late_subsys;
3193 
3194 out:
3195 	return ret;
3196 
3197 out_register_late_subsys:
3198 	unregister_pernet_subsys(&ip6_route_net_late_ops);
3199 fib6_rules_init:
3200 	fib6_rules_cleanup();
3201 xfrm6_init:
3202 	xfrm6_fini();
3203 out_fib6_init:
3204 	fib6_gc_cleanup();
3205 out_register_subsys:
3206 	unregister_pernet_subsys(&ip6_route_net_ops);
3207 out_register_inetpeer:
3208 	unregister_pernet_subsys(&ipv6_inetpeer_ops);
3209 out_dst_entries:
3210 	dst_entries_destroy(&ip6_dst_blackhole_ops);
3211 out_kmem_cache:
3212 	kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);
3213 	goto out;
3214 }
3215 
3216 void ip6_route_cleanup(void)
3217 {
3218 	unregister_netdevice_notifier(&ip6_route_dev_notifier);
3219 	unregister_pernet_subsys(&ip6_route_net_late_ops);
3220 	fib6_rules_cleanup();
3221 	xfrm6_fini();
3222 	fib6_gc_cleanup();
3223 	unregister_pernet_subsys(&ipv6_inetpeer_ops);
3224 	unregister_pernet_subsys(&ip6_route_net_ops);
3225 	dst_entries_destroy(&ip6_dst_blackhole_ops);
3226 	kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);
3227 }
3228