xref: /linux/net/ipv6/route.c (revision 2d87650a3bf1b80f7d0d150ee1af3f8a89e5b7aa)
1 /*
2  *	Linux INET6 implementation
3  *	FIB front-end.
4  *
5  *	Authors:
6  *	Pedro Roque		<roque@di.fc.ul.pt>
7  *
8  *	This program is free software; you can redistribute it and/or
9  *      modify it under the terms of the GNU General Public License
10  *      as published by the Free Software Foundation; either version
11  *      2 of the License, or (at your option) any later version.
12  */
13 
14 /*	Changes:
15  *
16  *	YOSHIFUJI Hideaki @USAGI
17  *		reworked default router selection.
18  *		- respect outgoing interface
19  *		- select from (probably) reachable routers (i.e.
20  *		routers in REACHABLE, STALE, DELAY or PROBE states).
21  *		- always select the same router if it is (probably)
22  *		reachable.  otherwise, round-robin the list.
23  *	Ville Nuorvala
24  *		Fixed routing subtrees.
25  */
26 
27 #define pr_fmt(fmt) "IPv6: " fmt
28 
29 #include <linux/capability.h>
30 #include <linux/errno.h>
31 #include <linux/export.h>
32 #include <linux/types.h>
33 #include <linux/times.h>
34 #include <linux/socket.h>
35 #include <linux/sockios.h>
36 #include <linux/net.h>
37 #include <linux/route.h>
38 #include <linux/netdevice.h>
39 #include <linux/in6.h>
40 #include <linux/mroute6.h>
41 #include <linux/init.h>
42 #include <linux/if_arp.h>
43 #include <linux/proc_fs.h>
44 #include <linux/seq_file.h>
45 #include <linux/nsproxy.h>
46 #include <linux/slab.h>
47 #include <net/net_namespace.h>
48 #include <net/snmp.h>
49 #include <net/ipv6.h>
50 #include <net/ip6_fib.h>
51 #include <net/ip6_route.h>
52 #include <net/ndisc.h>
53 #include <net/addrconf.h>
54 #include <net/tcp.h>
55 #include <linux/rtnetlink.h>
56 #include <net/dst.h>
57 #include <net/xfrm.h>
58 #include <net/netevent.h>
59 #include <net/netlink.h>
60 #include <net/nexthop.h>
61 
62 #include <asm/uaccess.h>
63 
64 #ifdef CONFIG_SYSCTL
65 #include <linux/sysctl.h>
66 #endif
67 
68 enum rt6_nud_state {
69 	RT6_NUD_FAIL_HARD = -3,
70 	RT6_NUD_FAIL_PROBE = -2,
71 	RT6_NUD_FAIL_DO_RR = -1,
72 	RT6_NUD_SUCCEED = 1
73 };
74 
75 static struct rt6_info *ip6_rt_copy(struct rt6_info *ort,
76 				    const struct in6_addr *dest);
77 static struct dst_entry	*ip6_dst_check(struct dst_entry *dst, u32 cookie);
78 static unsigned int	 ip6_default_advmss(const struct dst_entry *dst);
79 static unsigned int	 ip6_mtu(const struct dst_entry *dst);
80 static struct dst_entry *ip6_negative_advice(struct dst_entry *);
81 static void		ip6_dst_destroy(struct dst_entry *);
82 static void		ip6_dst_ifdown(struct dst_entry *,
83 				       struct net_device *dev, int how);
84 static int		 ip6_dst_gc(struct dst_ops *ops);
85 
86 static int		ip6_pkt_discard(struct sk_buff *skb);
87 static int		ip6_pkt_discard_out(struct sk_buff *skb);
88 static int		ip6_pkt_prohibit(struct sk_buff *skb);
89 static int		ip6_pkt_prohibit_out(struct sk_buff *skb);
90 static void		ip6_link_failure(struct sk_buff *skb);
91 static void		ip6_rt_update_pmtu(struct dst_entry *dst, struct sock *sk,
92 					   struct sk_buff *skb, u32 mtu);
93 static void		rt6_do_redirect(struct dst_entry *dst, struct sock *sk,
94 					struct sk_buff *skb);
95 static int rt6_score_route(struct rt6_info *rt, int oif, int strict);
96 
97 #ifdef CONFIG_IPV6_ROUTE_INFO
98 static struct rt6_info *rt6_add_route_info(struct net *net,
99 					   const struct in6_addr *prefix, int prefixlen,
100 					   const struct in6_addr *gwaddr, int ifindex,
101 					   unsigned int pref);
102 static struct rt6_info *rt6_get_route_info(struct net *net,
103 					   const struct in6_addr *prefix, int prefixlen,
104 					   const struct in6_addr *gwaddr, int ifindex);
105 #endif
106 
107 static u32 *ipv6_cow_metrics(struct dst_entry *dst, unsigned long old)
108 {
109 	struct rt6_info *rt = (struct rt6_info *) dst;
110 	struct inet_peer *peer;
111 	u32 *p = NULL;
112 
113 	if (!(rt->dst.flags & DST_HOST))
114 		return NULL;
115 
116 	peer = rt6_get_peer_create(rt);
117 	if (peer) {
118 		u32 *old_p = __DST_METRICS_PTR(old);
119 		unsigned long prev, new;
120 
121 		p = peer->metrics;
122 		if (inet_metrics_new(peer))
123 			memcpy(p, old_p, sizeof(u32) * RTAX_MAX);
124 
125 		new = (unsigned long) p;
126 		prev = cmpxchg(&dst->_metrics, old, new);
127 
128 		if (prev != old) {
129 			p = __DST_METRICS_PTR(prev);
130 			if (prev & DST_METRICS_READ_ONLY)
131 				p = NULL;
132 		}
133 	}
134 	return p;
135 }
136 
137 static inline const void *choose_neigh_daddr(struct rt6_info *rt,
138 					     struct sk_buff *skb,
139 					     const void *daddr)
140 {
141 	struct in6_addr *p = &rt->rt6i_gateway;
142 
143 	if (!ipv6_addr_any(p))
144 		return (const void *) p;
145 	else if (skb)
146 		return &ipv6_hdr(skb)->daddr;
147 	return daddr;
148 }
149 
150 static struct neighbour *ip6_neigh_lookup(const struct dst_entry *dst,
151 					  struct sk_buff *skb,
152 					  const void *daddr)
153 {
154 	struct rt6_info *rt = (struct rt6_info *) dst;
155 	struct neighbour *n;
156 
157 	daddr = choose_neigh_daddr(rt, skb, daddr);
158 	n = __ipv6_neigh_lookup(dst->dev, daddr);
159 	if (n)
160 		return n;
161 	return neigh_create(&nd_tbl, daddr, dst->dev);
162 }
163 
164 static struct dst_ops ip6_dst_ops_template = {
165 	.family			=	AF_INET6,
166 	.protocol		=	cpu_to_be16(ETH_P_IPV6),
167 	.gc			=	ip6_dst_gc,
168 	.gc_thresh		=	1024,
169 	.check			=	ip6_dst_check,
170 	.default_advmss		=	ip6_default_advmss,
171 	.mtu			=	ip6_mtu,
172 	.cow_metrics		=	ipv6_cow_metrics,
173 	.destroy		=	ip6_dst_destroy,
174 	.ifdown			=	ip6_dst_ifdown,
175 	.negative_advice	=	ip6_negative_advice,
176 	.link_failure		=	ip6_link_failure,
177 	.update_pmtu		=	ip6_rt_update_pmtu,
178 	.redirect		=	rt6_do_redirect,
179 	.local_out		=	__ip6_local_out,
180 	.neigh_lookup		=	ip6_neigh_lookup,
181 };
182 
183 static unsigned int ip6_blackhole_mtu(const struct dst_entry *dst)
184 {
185 	unsigned int mtu = dst_metric_raw(dst, RTAX_MTU);
186 
187 	return mtu ? : dst->dev->mtu;
188 }
189 
190 static void ip6_rt_blackhole_update_pmtu(struct dst_entry *dst, struct sock *sk,
191 					 struct sk_buff *skb, u32 mtu)
192 {
193 }
194 
195 static void ip6_rt_blackhole_redirect(struct dst_entry *dst, struct sock *sk,
196 				      struct sk_buff *skb)
197 {
198 }
199 
200 static u32 *ip6_rt_blackhole_cow_metrics(struct dst_entry *dst,
201 					 unsigned long old)
202 {
203 	return NULL;
204 }
205 
206 static struct dst_ops ip6_dst_blackhole_ops = {
207 	.family			=	AF_INET6,
208 	.protocol		=	cpu_to_be16(ETH_P_IPV6),
209 	.destroy		=	ip6_dst_destroy,
210 	.check			=	ip6_dst_check,
211 	.mtu			=	ip6_blackhole_mtu,
212 	.default_advmss		=	ip6_default_advmss,
213 	.update_pmtu		=	ip6_rt_blackhole_update_pmtu,
214 	.redirect		=	ip6_rt_blackhole_redirect,
215 	.cow_metrics		=	ip6_rt_blackhole_cow_metrics,
216 	.neigh_lookup		=	ip6_neigh_lookup,
217 };
218 
219 static const u32 ip6_template_metrics[RTAX_MAX] = {
220 	[RTAX_HOPLIMIT - 1] = 0,
221 };
222 
223 static const struct rt6_info ip6_null_entry_template = {
224 	.dst = {
225 		.__refcnt	= ATOMIC_INIT(1),
226 		.__use		= 1,
227 		.obsolete	= DST_OBSOLETE_FORCE_CHK,
228 		.error		= -ENETUNREACH,
229 		.input		= ip6_pkt_discard,
230 		.output		= ip6_pkt_discard_out,
231 	},
232 	.rt6i_flags	= (RTF_REJECT | RTF_NONEXTHOP),
233 	.rt6i_protocol  = RTPROT_KERNEL,
234 	.rt6i_metric	= ~(u32) 0,
235 	.rt6i_ref	= ATOMIC_INIT(1),
236 };
237 
238 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
239 
240 static const struct rt6_info ip6_prohibit_entry_template = {
241 	.dst = {
242 		.__refcnt	= ATOMIC_INIT(1),
243 		.__use		= 1,
244 		.obsolete	= DST_OBSOLETE_FORCE_CHK,
245 		.error		= -EACCES,
246 		.input		= ip6_pkt_prohibit,
247 		.output		= ip6_pkt_prohibit_out,
248 	},
249 	.rt6i_flags	= (RTF_REJECT | RTF_NONEXTHOP),
250 	.rt6i_protocol  = RTPROT_KERNEL,
251 	.rt6i_metric	= ~(u32) 0,
252 	.rt6i_ref	= ATOMIC_INIT(1),
253 };
254 
255 static const struct rt6_info ip6_blk_hole_entry_template = {
256 	.dst = {
257 		.__refcnt	= ATOMIC_INIT(1),
258 		.__use		= 1,
259 		.obsolete	= DST_OBSOLETE_FORCE_CHK,
260 		.error		= -EINVAL,
261 		.input		= dst_discard,
262 		.output		= dst_discard,
263 	},
264 	.rt6i_flags	= (RTF_REJECT | RTF_NONEXTHOP),
265 	.rt6i_protocol  = RTPROT_KERNEL,
266 	.rt6i_metric	= ~(u32) 0,
267 	.rt6i_ref	= ATOMIC_INIT(1),
268 };
269 
270 #endif
271 
272 /* allocate dst with ip6_dst_ops */
273 static inline struct rt6_info *ip6_dst_alloc(struct net *net,
274 					     struct net_device *dev,
275 					     int flags,
276 					     struct fib6_table *table)
277 {
278 	struct rt6_info *rt = dst_alloc(&net->ipv6.ip6_dst_ops, dev,
279 					0, DST_OBSOLETE_FORCE_CHK, flags);
280 
281 	if (rt) {
282 		struct dst_entry *dst = &rt->dst;
283 
284 		memset(dst + 1, 0, sizeof(*rt) - sizeof(*dst));
285 		rt6_init_peer(rt, table ? &table->tb6_peers : net->ipv6.peers);
286 		rt->rt6i_genid = rt_genid_ipv6(net);
287 		INIT_LIST_HEAD(&rt->rt6i_siblings);
288 	}
289 	return rt;
290 }
291 
292 static void ip6_dst_destroy(struct dst_entry *dst)
293 {
294 	struct rt6_info *rt = (struct rt6_info *)dst;
295 	struct inet6_dev *idev = rt->rt6i_idev;
296 	struct dst_entry *from = dst->from;
297 
298 	if (!(rt->dst.flags & DST_HOST))
299 		dst_destroy_metrics_generic(dst);
300 
301 	if (idev) {
302 		rt->rt6i_idev = NULL;
303 		in6_dev_put(idev);
304 	}
305 
306 	dst->from = NULL;
307 	dst_release(from);
308 
309 	if (rt6_has_peer(rt)) {
310 		struct inet_peer *peer = rt6_peer_ptr(rt);
311 		inet_putpeer(peer);
312 	}
313 }
314 
315 void rt6_bind_peer(struct rt6_info *rt, int create)
316 {
317 	struct inet_peer_base *base;
318 	struct inet_peer *peer;
319 
320 	base = inetpeer_base_ptr(rt->_rt6i_peer);
321 	if (!base)
322 		return;
323 
324 	peer = inet_getpeer_v6(base, &rt->rt6i_dst.addr, create);
325 	if (peer) {
326 		if (!rt6_set_peer(rt, peer))
327 			inet_putpeer(peer);
328 	}
329 }
330 
331 static void ip6_dst_ifdown(struct dst_entry *dst, struct net_device *dev,
332 			   int how)
333 {
334 	struct rt6_info *rt = (struct rt6_info *)dst;
335 	struct inet6_dev *idev = rt->rt6i_idev;
336 	struct net_device *loopback_dev =
337 		dev_net(dev)->loopback_dev;
338 
339 	if (dev != loopback_dev) {
340 		if (idev && idev->dev == dev) {
341 			struct inet6_dev *loopback_idev =
342 				in6_dev_get(loopback_dev);
343 			if (loopback_idev) {
344 				rt->rt6i_idev = loopback_idev;
345 				in6_dev_put(idev);
346 			}
347 		}
348 	}
349 }
350 
351 static bool rt6_check_expired(const struct rt6_info *rt)
352 {
353 	if (rt->rt6i_flags & RTF_EXPIRES) {
354 		if (time_after(jiffies, rt->dst.expires))
355 			return true;
356 	} else if (rt->dst.from) {
357 		return rt6_check_expired((struct rt6_info *) rt->dst.from);
358 	}
359 	return false;
360 }
361 
362 static bool rt6_need_strict(const struct in6_addr *daddr)
363 {
364 	return ipv6_addr_type(daddr) &
365 		(IPV6_ADDR_MULTICAST | IPV6_ADDR_LINKLOCAL | IPV6_ADDR_LOOPBACK);
366 }
367 
368 /* Multipath route selection:
369  *   Hash based function using packet header and flowlabel.
370  * Adapted from fib_info_hashfn()
371  */
372 static int rt6_info_hash_nhsfn(unsigned int candidate_count,
373 			       const struct flowi6 *fl6)
374 {
375 	unsigned int val = fl6->flowi6_proto;
376 
377 	val ^= ipv6_addr_hash(&fl6->daddr);
378 	val ^= ipv6_addr_hash(&fl6->saddr);
379 
380 	/* Work only if this not encapsulated */
381 	switch (fl6->flowi6_proto) {
382 	case IPPROTO_UDP:
383 	case IPPROTO_TCP:
384 	case IPPROTO_SCTP:
385 		val ^= (__force u16)fl6->fl6_sport;
386 		val ^= (__force u16)fl6->fl6_dport;
387 		break;
388 
389 	case IPPROTO_ICMPV6:
390 		val ^= (__force u16)fl6->fl6_icmp_type;
391 		val ^= (__force u16)fl6->fl6_icmp_code;
392 		break;
393 	}
394 	/* RFC6438 recommands to use flowlabel */
395 	val ^= (__force u32)fl6->flowlabel;
396 
397 	/* Perhaps, we need to tune, this function? */
398 	val = val ^ (val >> 7) ^ (val >> 12);
399 	return val % candidate_count;
400 }
401 
402 static struct rt6_info *rt6_multipath_select(struct rt6_info *match,
403 					     struct flowi6 *fl6, int oif,
404 					     int strict)
405 {
406 	struct rt6_info *sibling, *next_sibling;
407 	int route_choosen;
408 
409 	route_choosen = rt6_info_hash_nhsfn(match->rt6i_nsiblings + 1, fl6);
410 	/* Don't change the route, if route_choosen == 0
411 	 * (siblings does not include ourself)
412 	 */
413 	if (route_choosen)
414 		list_for_each_entry_safe(sibling, next_sibling,
415 				&match->rt6i_siblings, rt6i_siblings) {
416 			route_choosen--;
417 			if (route_choosen == 0) {
418 				if (rt6_score_route(sibling, oif, strict) < 0)
419 					break;
420 				match = sibling;
421 				break;
422 			}
423 		}
424 	return match;
425 }
426 
427 /*
428  *	Route lookup. Any table->tb6_lock is implied.
429  */
430 
431 static inline struct rt6_info *rt6_device_match(struct net *net,
432 						    struct rt6_info *rt,
433 						    const struct in6_addr *saddr,
434 						    int oif,
435 						    int flags)
436 {
437 	struct rt6_info *local = NULL;
438 	struct rt6_info *sprt;
439 
440 	if (!oif && ipv6_addr_any(saddr))
441 		goto out;
442 
443 	for (sprt = rt; sprt; sprt = sprt->dst.rt6_next) {
444 		struct net_device *dev = sprt->dst.dev;
445 
446 		if (oif) {
447 			if (dev->ifindex == oif)
448 				return sprt;
449 			if (dev->flags & IFF_LOOPBACK) {
450 				if (!sprt->rt6i_idev ||
451 				    sprt->rt6i_idev->dev->ifindex != oif) {
452 					if (flags & RT6_LOOKUP_F_IFACE && oif)
453 						continue;
454 					if (local && (!oif ||
455 						      local->rt6i_idev->dev->ifindex == oif))
456 						continue;
457 				}
458 				local = sprt;
459 			}
460 		} else {
461 			if (ipv6_chk_addr(net, saddr, dev,
462 					  flags & RT6_LOOKUP_F_IFACE))
463 				return sprt;
464 		}
465 	}
466 
467 	if (oif) {
468 		if (local)
469 			return local;
470 
471 		if (flags & RT6_LOOKUP_F_IFACE)
472 			return net->ipv6.ip6_null_entry;
473 	}
474 out:
475 	return rt;
476 }
477 
478 #ifdef CONFIG_IPV6_ROUTER_PREF
479 struct __rt6_probe_work {
480 	struct work_struct work;
481 	struct in6_addr target;
482 	struct net_device *dev;
483 };
484 
485 static void rt6_probe_deferred(struct work_struct *w)
486 {
487 	struct in6_addr mcaddr;
488 	struct __rt6_probe_work *work =
489 		container_of(w, struct __rt6_probe_work, work);
490 
491 	addrconf_addr_solict_mult(&work->target, &mcaddr);
492 	ndisc_send_ns(work->dev, NULL, &work->target, &mcaddr, NULL);
493 	dev_put(work->dev);
494 	kfree(w);
495 }
496 
497 static void rt6_probe(struct rt6_info *rt)
498 {
499 	struct neighbour *neigh;
500 	/*
501 	 * Okay, this does not seem to be appropriate
502 	 * for now, however, we need to check if it
503 	 * is really so; aka Router Reachability Probing.
504 	 *
505 	 * Router Reachability Probe MUST be rate-limited
506 	 * to no more than one per minute.
507 	 */
508 	if (!rt || !(rt->rt6i_flags & RTF_GATEWAY))
509 		return;
510 	rcu_read_lock_bh();
511 	neigh = __ipv6_neigh_lookup_noref(rt->dst.dev, &rt->rt6i_gateway);
512 	if (neigh) {
513 		write_lock(&neigh->lock);
514 		if (neigh->nud_state & NUD_VALID)
515 			goto out;
516 	}
517 
518 	if (!neigh ||
519 	    time_after(jiffies, neigh->updated + rt->rt6i_idev->cnf.rtr_probe_interval)) {
520 		struct __rt6_probe_work *work;
521 
522 		work = kmalloc(sizeof(*work), GFP_ATOMIC);
523 
524 		if (neigh && work)
525 			__neigh_set_probe_once(neigh);
526 
527 		if (neigh)
528 			write_unlock(&neigh->lock);
529 
530 		if (work) {
531 			INIT_WORK(&work->work, rt6_probe_deferred);
532 			work->target = rt->rt6i_gateway;
533 			dev_hold(rt->dst.dev);
534 			work->dev = rt->dst.dev;
535 			schedule_work(&work->work);
536 		}
537 	} else {
538 out:
539 		write_unlock(&neigh->lock);
540 	}
541 	rcu_read_unlock_bh();
542 }
543 #else
544 static inline void rt6_probe(struct rt6_info *rt)
545 {
546 }
547 #endif
548 
549 /*
550  * Default Router Selection (RFC 2461 6.3.6)
551  */
552 static inline int rt6_check_dev(struct rt6_info *rt, int oif)
553 {
554 	struct net_device *dev = rt->dst.dev;
555 	if (!oif || dev->ifindex == oif)
556 		return 2;
557 	if ((dev->flags & IFF_LOOPBACK) &&
558 	    rt->rt6i_idev && rt->rt6i_idev->dev->ifindex == oif)
559 		return 1;
560 	return 0;
561 }
562 
563 static inline enum rt6_nud_state rt6_check_neigh(struct rt6_info *rt)
564 {
565 	struct neighbour *neigh;
566 	enum rt6_nud_state ret = RT6_NUD_FAIL_HARD;
567 
568 	if (rt->rt6i_flags & RTF_NONEXTHOP ||
569 	    !(rt->rt6i_flags & RTF_GATEWAY))
570 		return RT6_NUD_SUCCEED;
571 
572 	rcu_read_lock_bh();
573 	neigh = __ipv6_neigh_lookup_noref(rt->dst.dev, &rt->rt6i_gateway);
574 	if (neigh) {
575 		read_lock(&neigh->lock);
576 		if (neigh->nud_state & NUD_VALID)
577 			ret = RT6_NUD_SUCCEED;
578 #ifdef CONFIG_IPV6_ROUTER_PREF
579 		else if (!(neigh->nud_state & NUD_FAILED))
580 			ret = RT6_NUD_SUCCEED;
581 		else
582 			ret = RT6_NUD_FAIL_PROBE;
583 #endif
584 		read_unlock(&neigh->lock);
585 	} else {
586 		ret = IS_ENABLED(CONFIG_IPV6_ROUTER_PREF) ?
587 		      RT6_NUD_SUCCEED : RT6_NUD_FAIL_DO_RR;
588 	}
589 	rcu_read_unlock_bh();
590 
591 	return ret;
592 }
593 
594 static int rt6_score_route(struct rt6_info *rt, int oif,
595 			   int strict)
596 {
597 	int m;
598 
599 	m = rt6_check_dev(rt, oif);
600 	if (!m && (strict & RT6_LOOKUP_F_IFACE))
601 		return RT6_NUD_FAIL_HARD;
602 #ifdef CONFIG_IPV6_ROUTER_PREF
603 	m |= IPV6_DECODE_PREF(IPV6_EXTRACT_PREF(rt->rt6i_flags)) << 2;
604 #endif
605 	if (strict & RT6_LOOKUP_F_REACHABLE) {
606 		int n = rt6_check_neigh(rt);
607 		if (n < 0)
608 			return n;
609 	}
610 	return m;
611 }
612 
613 static struct rt6_info *find_match(struct rt6_info *rt, int oif, int strict,
614 				   int *mpri, struct rt6_info *match,
615 				   bool *do_rr)
616 {
617 	int m;
618 	bool match_do_rr = false;
619 
620 	if (rt6_check_expired(rt))
621 		goto out;
622 
623 	m = rt6_score_route(rt, oif, strict);
624 	if (m == RT6_NUD_FAIL_DO_RR) {
625 		match_do_rr = true;
626 		m = 0; /* lowest valid score */
627 	} else if (m == RT6_NUD_FAIL_HARD) {
628 		goto out;
629 	}
630 
631 	if (strict & RT6_LOOKUP_F_REACHABLE)
632 		rt6_probe(rt);
633 
634 	/* note that m can be RT6_NUD_FAIL_PROBE at this point */
635 	if (m > *mpri) {
636 		*do_rr = match_do_rr;
637 		*mpri = m;
638 		match = rt;
639 	}
640 out:
641 	return match;
642 }
643 
644 static struct rt6_info *find_rr_leaf(struct fib6_node *fn,
645 				     struct rt6_info *rr_head,
646 				     u32 metric, int oif, int strict,
647 				     bool *do_rr)
648 {
649 	struct rt6_info *rt, *match;
650 	int mpri = -1;
651 
652 	match = NULL;
653 	for (rt = rr_head; rt && rt->rt6i_metric == metric;
654 	     rt = rt->dst.rt6_next)
655 		match = find_match(rt, oif, strict, &mpri, match, do_rr);
656 	for (rt = fn->leaf; rt && rt != rr_head && rt->rt6i_metric == metric;
657 	     rt = rt->dst.rt6_next)
658 		match = find_match(rt, oif, strict, &mpri, match, do_rr);
659 
660 	return match;
661 }
662 
663 static struct rt6_info *rt6_select(struct fib6_node *fn, int oif, int strict)
664 {
665 	struct rt6_info *match, *rt0;
666 	struct net *net;
667 	bool do_rr = false;
668 
669 	rt0 = fn->rr_ptr;
670 	if (!rt0)
671 		fn->rr_ptr = rt0 = fn->leaf;
672 
673 	match = find_rr_leaf(fn, rt0, rt0->rt6i_metric, oif, strict,
674 			     &do_rr);
675 
676 	if (do_rr) {
677 		struct rt6_info *next = rt0->dst.rt6_next;
678 
679 		/* no entries matched; do round-robin */
680 		if (!next || next->rt6i_metric != rt0->rt6i_metric)
681 			next = fn->leaf;
682 
683 		if (next != rt0)
684 			fn->rr_ptr = next;
685 	}
686 
687 	net = dev_net(rt0->dst.dev);
688 	return match ? match : net->ipv6.ip6_null_entry;
689 }
690 
691 #ifdef CONFIG_IPV6_ROUTE_INFO
692 int rt6_route_rcv(struct net_device *dev, u8 *opt, int len,
693 		  const struct in6_addr *gwaddr)
694 {
695 	struct net *net = dev_net(dev);
696 	struct route_info *rinfo = (struct route_info *) opt;
697 	struct in6_addr prefix_buf, *prefix;
698 	unsigned int pref;
699 	unsigned long lifetime;
700 	struct rt6_info *rt;
701 
702 	if (len < sizeof(struct route_info)) {
703 		return -EINVAL;
704 	}
705 
706 	/* Sanity check for prefix_len and length */
707 	if (rinfo->length > 3) {
708 		return -EINVAL;
709 	} else if (rinfo->prefix_len > 128) {
710 		return -EINVAL;
711 	} else if (rinfo->prefix_len > 64) {
712 		if (rinfo->length < 2) {
713 			return -EINVAL;
714 		}
715 	} else if (rinfo->prefix_len > 0) {
716 		if (rinfo->length < 1) {
717 			return -EINVAL;
718 		}
719 	}
720 
721 	pref = rinfo->route_pref;
722 	if (pref == ICMPV6_ROUTER_PREF_INVALID)
723 		return -EINVAL;
724 
725 	lifetime = addrconf_timeout_fixup(ntohl(rinfo->lifetime), HZ);
726 
727 	if (rinfo->length == 3)
728 		prefix = (struct in6_addr *)rinfo->prefix;
729 	else {
730 		/* this function is safe */
731 		ipv6_addr_prefix(&prefix_buf,
732 				 (struct in6_addr *)rinfo->prefix,
733 				 rinfo->prefix_len);
734 		prefix = &prefix_buf;
735 	}
736 
737 	if (rinfo->prefix_len == 0)
738 		rt = rt6_get_dflt_router(gwaddr, dev);
739 	else
740 		rt = rt6_get_route_info(net, prefix, rinfo->prefix_len,
741 					gwaddr, dev->ifindex);
742 
743 	if (rt && !lifetime) {
744 		ip6_del_rt(rt);
745 		rt = NULL;
746 	}
747 
748 	if (!rt && lifetime)
749 		rt = rt6_add_route_info(net, prefix, rinfo->prefix_len, gwaddr, dev->ifindex,
750 					pref);
751 	else if (rt)
752 		rt->rt6i_flags = RTF_ROUTEINFO |
753 				 (rt->rt6i_flags & ~RTF_PREF_MASK) | RTF_PREF(pref);
754 
755 	if (rt) {
756 		if (!addrconf_finite_timeout(lifetime))
757 			rt6_clean_expires(rt);
758 		else
759 			rt6_set_expires(rt, jiffies + HZ * lifetime);
760 
761 		ip6_rt_put(rt);
762 	}
763 	return 0;
764 }
765 #endif
766 
767 #define BACKTRACK(__net, saddr)			\
768 do { \
769 	if (rt == __net->ipv6.ip6_null_entry) {	\
770 		struct fib6_node *pn; \
771 		while (1) { \
772 			if (fn->fn_flags & RTN_TL_ROOT) \
773 				goto out; \
774 			pn = fn->parent; \
775 			if (FIB6_SUBTREE(pn) && FIB6_SUBTREE(pn) != fn) \
776 				fn = fib6_lookup(FIB6_SUBTREE(pn), NULL, saddr); \
777 			else \
778 				fn = pn; \
779 			if (fn->fn_flags & RTN_RTINFO) \
780 				goto restart; \
781 		} \
782 	} \
783 } while (0)
784 
785 static struct rt6_info *ip6_pol_route_lookup(struct net *net,
786 					     struct fib6_table *table,
787 					     struct flowi6 *fl6, int flags)
788 {
789 	struct fib6_node *fn;
790 	struct rt6_info *rt;
791 
792 	read_lock_bh(&table->tb6_lock);
793 	fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
794 restart:
795 	rt = fn->leaf;
796 	rt = rt6_device_match(net, rt, &fl6->saddr, fl6->flowi6_oif, flags);
797 	if (rt->rt6i_nsiblings && fl6->flowi6_oif == 0)
798 		rt = rt6_multipath_select(rt, fl6, fl6->flowi6_oif, flags);
799 	BACKTRACK(net, &fl6->saddr);
800 out:
801 	dst_use(&rt->dst, jiffies);
802 	read_unlock_bh(&table->tb6_lock);
803 	return rt;
804 
805 }
806 
807 struct dst_entry * ip6_route_lookup(struct net *net, struct flowi6 *fl6,
808 				    int flags)
809 {
810 	return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_lookup);
811 }
812 EXPORT_SYMBOL_GPL(ip6_route_lookup);
813 
814 struct rt6_info *rt6_lookup(struct net *net, const struct in6_addr *daddr,
815 			    const struct in6_addr *saddr, int oif, int strict)
816 {
817 	struct flowi6 fl6 = {
818 		.flowi6_oif = oif,
819 		.daddr = *daddr,
820 	};
821 	struct dst_entry *dst;
822 	int flags = strict ? RT6_LOOKUP_F_IFACE : 0;
823 
824 	if (saddr) {
825 		memcpy(&fl6.saddr, saddr, sizeof(*saddr));
826 		flags |= RT6_LOOKUP_F_HAS_SADDR;
827 	}
828 
829 	dst = fib6_rule_lookup(net, &fl6, flags, ip6_pol_route_lookup);
830 	if (dst->error == 0)
831 		return (struct rt6_info *) dst;
832 
833 	dst_release(dst);
834 
835 	return NULL;
836 }
837 
838 EXPORT_SYMBOL(rt6_lookup);
839 
840 /* ip6_ins_rt is called with FREE table->tb6_lock.
841    It takes new route entry, the addition fails by any reason the
842    route is freed. In any case, if caller does not hold it, it may
843    be destroyed.
844  */
845 
846 static int __ip6_ins_rt(struct rt6_info *rt, struct nl_info *info)
847 {
848 	int err;
849 	struct fib6_table *table;
850 
851 	table = rt->rt6i_table;
852 	write_lock_bh(&table->tb6_lock);
853 	err = fib6_add(&table->tb6_root, rt, info);
854 	write_unlock_bh(&table->tb6_lock);
855 
856 	return err;
857 }
858 
859 int ip6_ins_rt(struct rt6_info *rt)
860 {
861 	struct nl_info info = {
862 		.nl_net = dev_net(rt->dst.dev),
863 	};
864 	return __ip6_ins_rt(rt, &info);
865 }
866 
867 static struct rt6_info *rt6_alloc_cow(struct rt6_info *ort,
868 				      const struct in6_addr *daddr,
869 				      const struct in6_addr *saddr)
870 {
871 	struct rt6_info *rt;
872 
873 	/*
874 	 *	Clone the route.
875 	 */
876 
877 	rt = ip6_rt_copy(ort, daddr);
878 
879 	if (rt) {
880 		if (ort->rt6i_dst.plen != 128 &&
881 		    ipv6_addr_equal(&ort->rt6i_dst.addr, daddr))
882 			rt->rt6i_flags |= RTF_ANYCAST;
883 
884 		rt->rt6i_flags |= RTF_CACHE;
885 
886 #ifdef CONFIG_IPV6_SUBTREES
887 		if (rt->rt6i_src.plen && saddr) {
888 			rt->rt6i_src.addr = *saddr;
889 			rt->rt6i_src.plen = 128;
890 		}
891 #endif
892 	}
893 
894 	return rt;
895 }
896 
897 static struct rt6_info *rt6_alloc_clone(struct rt6_info *ort,
898 					const struct in6_addr *daddr)
899 {
900 	struct rt6_info *rt = ip6_rt_copy(ort, daddr);
901 
902 	if (rt)
903 		rt->rt6i_flags |= RTF_CACHE;
904 	return rt;
905 }
906 
907 static struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table, int oif,
908 				      struct flowi6 *fl6, int flags)
909 {
910 	struct fib6_node *fn;
911 	struct rt6_info *rt, *nrt;
912 	int strict = 0;
913 	int attempts = 3;
914 	int err;
915 	int reachable = net->ipv6.devconf_all->forwarding ? 0 : RT6_LOOKUP_F_REACHABLE;
916 
917 	strict |= flags & RT6_LOOKUP_F_IFACE;
918 
919 relookup:
920 	read_lock_bh(&table->tb6_lock);
921 
922 restart_2:
923 	fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
924 
925 restart:
926 	rt = rt6_select(fn, oif, strict | reachable);
927 	if (rt->rt6i_nsiblings)
928 		rt = rt6_multipath_select(rt, fl6, oif, strict | reachable);
929 	BACKTRACK(net, &fl6->saddr);
930 	if (rt == net->ipv6.ip6_null_entry ||
931 	    rt->rt6i_flags & RTF_CACHE)
932 		goto out;
933 
934 	dst_hold(&rt->dst);
935 	read_unlock_bh(&table->tb6_lock);
936 
937 	if (!(rt->rt6i_flags & (RTF_NONEXTHOP | RTF_GATEWAY)))
938 		nrt = rt6_alloc_cow(rt, &fl6->daddr, &fl6->saddr);
939 	else if (!(rt->dst.flags & DST_HOST))
940 		nrt = rt6_alloc_clone(rt, &fl6->daddr);
941 	else
942 		goto out2;
943 
944 	ip6_rt_put(rt);
945 	rt = nrt ? : net->ipv6.ip6_null_entry;
946 
947 	dst_hold(&rt->dst);
948 	if (nrt) {
949 		err = ip6_ins_rt(nrt);
950 		if (!err)
951 			goto out2;
952 	}
953 
954 	if (--attempts <= 0)
955 		goto out2;
956 
957 	/*
958 	 * Race condition! In the gap, when table->tb6_lock was
959 	 * released someone could insert this route.  Relookup.
960 	 */
961 	ip6_rt_put(rt);
962 	goto relookup;
963 
964 out:
965 	if (reachable) {
966 		reachable = 0;
967 		goto restart_2;
968 	}
969 	dst_hold(&rt->dst);
970 	read_unlock_bh(&table->tb6_lock);
971 out2:
972 	rt->dst.lastuse = jiffies;
973 	rt->dst.__use++;
974 
975 	return rt;
976 }
977 
978 static struct rt6_info *ip6_pol_route_input(struct net *net, struct fib6_table *table,
979 					    struct flowi6 *fl6, int flags)
980 {
981 	return ip6_pol_route(net, table, fl6->flowi6_iif, fl6, flags);
982 }
983 
984 static struct dst_entry *ip6_route_input_lookup(struct net *net,
985 						struct net_device *dev,
986 						struct flowi6 *fl6, int flags)
987 {
988 	if (rt6_need_strict(&fl6->daddr) && dev->type != ARPHRD_PIMREG)
989 		flags |= RT6_LOOKUP_F_IFACE;
990 
991 	return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_input);
992 }
993 
994 void ip6_route_input(struct sk_buff *skb)
995 {
996 	const struct ipv6hdr *iph = ipv6_hdr(skb);
997 	struct net *net = dev_net(skb->dev);
998 	int flags = RT6_LOOKUP_F_HAS_SADDR;
999 	struct flowi6 fl6 = {
1000 		.flowi6_iif = skb->dev->ifindex,
1001 		.daddr = iph->daddr,
1002 		.saddr = iph->saddr,
1003 		.flowlabel = ip6_flowinfo(iph),
1004 		.flowi6_mark = skb->mark,
1005 		.flowi6_proto = iph->nexthdr,
1006 	};
1007 
1008 	skb_dst_set(skb, ip6_route_input_lookup(net, skb->dev, &fl6, flags));
1009 }
1010 
1011 static struct rt6_info *ip6_pol_route_output(struct net *net, struct fib6_table *table,
1012 					     struct flowi6 *fl6, int flags)
1013 {
1014 	return ip6_pol_route(net, table, fl6->flowi6_oif, fl6, flags);
1015 }
1016 
1017 struct dst_entry * ip6_route_output(struct net *net, const struct sock *sk,
1018 				    struct flowi6 *fl6)
1019 {
1020 	int flags = 0;
1021 
1022 	fl6->flowi6_iif = LOOPBACK_IFINDEX;
1023 
1024 	if ((sk && sk->sk_bound_dev_if) || rt6_need_strict(&fl6->daddr))
1025 		flags |= RT6_LOOKUP_F_IFACE;
1026 
1027 	if (!ipv6_addr_any(&fl6->saddr))
1028 		flags |= RT6_LOOKUP_F_HAS_SADDR;
1029 	else if (sk)
1030 		flags |= rt6_srcprefs2flags(inet6_sk(sk)->srcprefs);
1031 
1032 	return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_output);
1033 }
1034 
1035 EXPORT_SYMBOL(ip6_route_output);
1036 
1037 struct dst_entry *ip6_blackhole_route(struct net *net, struct dst_entry *dst_orig)
1038 {
1039 	struct rt6_info *rt, *ort = (struct rt6_info *) dst_orig;
1040 	struct dst_entry *new = NULL;
1041 
1042 	rt = dst_alloc(&ip6_dst_blackhole_ops, ort->dst.dev, 1, DST_OBSOLETE_NONE, 0);
1043 	if (rt) {
1044 		new = &rt->dst;
1045 
1046 		memset(new + 1, 0, sizeof(*rt) - sizeof(*new));
1047 		rt6_init_peer(rt, net->ipv6.peers);
1048 
1049 		new->__use = 1;
1050 		new->input = dst_discard;
1051 		new->output = dst_discard;
1052 
1053 		if (dst_metrics_read_only(&ort->dst))
1054 			new->_metrics = ort->dst._metrics;
1055 		else
1056 			dst_copy_metrics(new, &ort->dst);
1057 		rt->rt6i_idev = ort->rt6i_idev;
1058 		if (rt->rt6i_idev)
1059 			in6_dev_hold(rt->rt6i_idev);
1060 
1061 		rt->rt6i_gateway = ort->rt6i_gateway;
1062 		rt->rt6i_flags = ort->rt6i_flags;
1063 		rt->rt6i_metric = 0;
1064 
1065 		memcpy(&rt->rt6i_dst, &ort->rt6i_dst, sizeof(struct rt6key));
1066 #ifdef CONFIG_IPV6_SUBTREES
1067 		memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
1068 #endif
1069 
1070 		dst_free(new);
1071 	}
1072 
1073 	dst_release(dst_orig);
1074 	return new ? new : ERR_PTR(-ENOMEM);
1075 }
1076 
1077 /*
1078  *	Destination cache support functions
1079  */
1080 
1081 static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie)
1082 {
1083 	struct rt6_info *rt;
1084 
1085 	rt = (struct rt6_info *) dst;
1086 
1087 	/* All IPV6 dsts are created with ->obsolete set to the value
1088 	 * DST_OBSOLETE_FORCE_CHK which forces validation calls down
1089 	 * into this function always.
1090 	 */
1091 	if (rt->rt6i_genid != rt_genid_ipv6(dev_net(rt->dst.dev)))
1092 		return NULL;
1093 
1094 	if (!rt->rt6i_node || (rt->rt6i_node->fn_sernum != cookie))
1095 		return NULL;
1096 
1097 	if (rt6_check_expired(rt))
1098 		return NULL;
1099 
1100 	return dst;
1101 }
1102 
1103 static struct dst_entry *ip6_negative_advice(struct dst_entry *dst)
1104 {
1105 	struct rt6_info *rt = (struct rt6_info *) dst;
1106 
1107 	if (rt) {
1108 		if (rt->rt6i_flags & RTF_CACHE) {
1109 			if (rt6_check_expired(rt)) {
1110 				ip6_del_rt(rt);
1111 				dst = NULL;
1112 			}
1113 		} else {
1114 			dst_release(dst);
1115 			dst = NULL;
1116 		}
1117 	}
1118 	return dst;
1119 }
1120 
1121 static void ip6_link_failure(struct sk_buff *skb)
1122 {
1123 	struct rt6_info *rt;
1124 
1125 	icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_ADDR_UNREACH, 0);
1126 
1127 	rt = (struct rt6_info *) skb_dst(skb);
1128 	if (rt) {
1129 		if (rt->rt6i_flags & RTF_CACHE) {
1130 			dst_hold(&rt->dst);
1131 			if (ip6_del_rt(rt))
1132 				dst_free(&rt->dst);
1133 		} else if (rt->rt6i_node && (rt->rt6i_flags & RTF_DEFAULT)) {
1134 			rt->rt6i_node->fn_sernum = -1;
1135 		}
1136 	}
1137 }
1138 
1139 static void ip6_rt_update_pmtu(struct dst_entry *dst, struct sock *sk,
1140 			       struct sk_buff *skb, u32 mtu)
1141 {
1142 	struct rt6_info *rt6 = (struct rt6_info*)dst;
1143 
1144 	dst_confirm(dst);
1145 	if (mtu < dst_mtu(dst) && rt6->rt6i_dst.plen == 128) {
1146 		struct net *net = dev_net(dst->dev);
1147 
1148 		rt6->rt6i_flags |= RTF_MODIFIED;
1149 		if (mtu < IPV6_MIN_MTU) {
1150 			u32 features = dst_metric(dst, RTAX_FEATURES);
1151 			mtu = IPV6_MIN_MTU;
1152 			features |= RTAX_FEATURE_ALLFRAG;
1153 			dst_metric_set(dst, RTAX_FEATURES, features);
1154 		}
1155 		dst_metric_set(dst, RTAX_MTU, mtu);
1156 		rt6_update_expires(rt6, net->ipv6.sysctl.ip6_rt_mtu_expires);
1157 	}
1158 }
1159 
1160 void ip6_update_pmtu(struct sk_buff *skb, struct net *net, __be32 mtu,
1161 		     int oif, u32 mark)
1162 {
1163 	const struct ipv6hdr *iph = (struct ipv6hdr *) skb->data;
1164 	struct dst_entry *dst;
1165 	struct flowi6 fl6;
1166 
1167 	memset(&fl6, 0, sizeof(fl6));
1168 	fl6.flowi6_oif = oif;
1169 	fl6.flowi6_mark = mark;
1170 	fl6.daddr = iph->daddr;
1171 	fl6.saddr = iph->saddr;
1172 	fl6.flowlabel = ip6_flowinfo(iph);
1173 
1174 	dst = ip6_route_output(net, NULL, &fl6);
1175 	if (!dst->error)
1176 		ip6_rt_update_pmtu(dst, NULL, skb, ntohl(mtu));
1177 	dst_release(dst);
1178 }
1179 EXPORT_SYMBOL_GPL(ip6_update_pmtu);
1180 
1181 void ip6_sk_update_pmtu(struct sk_buff *skb, struct sock *sk, __be32 mtu)
1182 {
1183 	ip6_update_pmtu(skb, sock_net(sk), mtu,
1184 			sk->sk_bound_dev_if, sk->sk_mark);
1185 }
1186 EXPORT_SYMBOL_GPL(ip6_sk_update_pmtu);
1187 
1188 /* Handle redirects */
1189 struct ip6rd_flowi {
1190 	struct flowi6 fl6;
1191 	struct in6_addr gateway;
1192 };
1193 
1194 static struct rt6_info *__ip6_route_redirect(struct net *net,
1195 					     struct fib6_table *table,
1196 					     struct flowi6 *fl6,
1197 					     int flags)
1198 {
1199 	struct ip6rd_flowi *rdfl = (struct ip6rd_flowi *)fl6;
1200 	struct rt6_info *rt;
1201 	struct fib6_node *fn;
1202 
1203 	/* Get the "current" route for this destination and
1204 	 * check if the redirect has come from approriate router.
1205 	 *
1206 	 * RFC 4861 specifies that redirects should only be
1207 	 * accepted if they come from the nexthop to the target.
1208 	 * Due to the way the routes are chosen, this notion
1209 	 * is a bit fuzzy and one might need to check all possible
1210 	 * routes.
1211 	 */
1212 
1213 	read_lock_bh(&table->tb6_lock);
1214 	fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
1215 restart:
1216 	for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
1217 		if (rt6_check_expired(rt))
1218 			continue;
1219 		if (rt->dst.error)
1220 			break;
1221 		if (!(rt->rt6i_flags & RTF_GATEWAY))
1222 			continue;
1223 		if (fl6->flowi6_oif != rt->dst.dev->ifindex)
1224 			continue;
1225 		if (!ipv6_addr_equal(&rdfl->gateway, &rt->rt6i_gateway))
1226 			continue;
1227 		break;
1228 	}
1229 
1230 	if (!rt)
1231 		rt = net->ipv6.ip6_null_entry;
1232 	else if (rt->dst.error) {
1233 		rt = net->ipv6.ip6_null_entry;
1234 		goto out;
1235 	}
1236 	BACKTRACK(net, &fl6->saddr);
1237 out:
1238 	dst_hold(&rt->dst);
1239 
1240 	read_unlock_bh(&table->tb6_lock);
1241 
1242 	return rt;
1243 };
1244 
1245 static struct dst_entry *ip6_route_redirect(struct net *net,
1246 					const struct flowi6 *fl6,
1247 					const struct in6_addr *gateway)
1248 {
1249 	int flags = RT6_LOOKUP_F_HAS_SADDR;
1250 	struct ip6rd_flowi rdfl;
1251 
1252 	rdfl.fl6 = *fl6;
1253 	rdfl.gateway = *gateway;
1254 
1255 	return fib6_rule_lookup(net, &rdfl.fl6,
1256 				flags, __ip6_route_redirect);
1257 }
1258 
1259 void ip6_redirect(struct sk_buff *skb, struct net *net, int oif, u32 mark)
1260 {
1261 	const struct ipv6hdr *iph = (struct ipv6hdr *) skb->data;
1262 	struct dst_entry *dst;
1263 	struct flowi6 fl6;
1264 
1265 	memset(&fl6, 0, sizeof(fl6));
1266 	fl6.flowi6_oif = oif;
1267 	fl6.flowi6_mark = mark;
1268 	fl6.daddr = iph->daddr;
1269 	fl6.saddr = iph->saddr;
1270 	fl6.flowlabel = ip6_flowinfo(iph);
1271 
1272 	dst = ip6_route_redirect(net, &fl6, &ipv6_hdr(skb)->saddr);
1273 	rt6_do_redirect(dst, NULL, skb);
1274 	dst_release(dst);
1275 }
1276 EXPORT_SYMBOL_GPL(ip6_redirect);
1277 
1278 void ip6_redirect_no_header(struct sk_buff *skb, struct net *net, int oif,
1279 			    u32 mark)
1280 {
1281 	const struct ipv6hdr *iph = ipv6_hdr(skb);
1282 	const struct rd_msg *msg = (struct rd_msg *)icmp6_hdr(skb);
1283 	struct dst_entry *dst;
1284 	struct flowi6 fl6;
1285 
1286 	memset(&fl6, 0, sizeof(fl6));
1287 	fl6.flowi6_oif = oif;
1288 	fl6.flowi6_mark = mark;
1289 	fl6.daddr = msg->dest;
1290 	fl6.saddr = iph->daddr;
1291 
1292 	dst = ip6_route_redirect(net, &fl6, &iph->saddr);
1293 	rt6_do_redirect(dst, NULL, skb);
1294 	dst_release(dst);
1295 }
1296 
1297 void ip6_sk_redirect(struct sk_buff *skb, struct sock *sk)
1298 {
1299 	ip6_redirect(skb, sock_net(sk), sk->sk_bound_dev_if, sk->sk_mark);
1300 }
1301 EXPORT_SYMBOL_GPL(ip6_sk_redirect);
1302 
1303 static unsigned int ip6_default_advmss(const struct dst_entry *dst)
1304 {
1305 	struct net_device *dev = dst->dev;
1306 	unsigned int mtu = dst_mtu(dst);
1307 	struct net *net = dev_net(dev);
1308 
1309 	mtu -= sizeof(struct ipv6hdr) + sizeof(struct tcphdr);
1310 
1311 	if (mtu < net->ipv6.sysctl.ip6_rt_min_advmss)
1312 		mtu = net->ipv6.sysctl.ip6_rt_min_advmss;
1313 
1314 	/*
1315 	 * Maximal non-jumbo IPv6 payload is IPV6_MAXPLEN and
1316 	 * corresponding MSS is IPV6_MAXPLEN - tcp_header_size.
1317 	 * IPV6_MAXPLEN is also valid and means: "any MSS,
1318 	 * rely only on pmtu discovery"
1319 	 */
1320 	if (mtu > IPV6_MAXPLEN - sizeof(struct tcphdr))
1321 		mtu = IPV6_MAXPLEN;
1322 	return mtu;
1323 }
1324 
1325 static unsigned int ip6_mtu(const struct dst_entry *dst)
1326 {
1327 	struct inet6_dev *idev;
1328 	unsigned int mtu = dst_metric_raw(dst, RTAX_MTU);
1329 
1330 	if (mtu)
1331 		return mtu;
1332 
1333 	mtu = IPV6_MIN_MTU;
1334 
1335 	rcu_read_lock();
1336 	idev = __in6_dev_get(dst->dev);
1337 	if (idev)
1338 		mtu = idev->cnf.mtu6;
1339 	rcu_read_unlock();
1340 
1341 	return mtu;
1342 }
1343 
1344 static struct dst_entry *icmp6_dst_gc_list;
1345 static DEFINE_SPINLOCK(icmp6_dst_lock);
1346 
1347 struct dst_entry *icmp6_dst_alloc(struct net_device *dev,
1348 				  struct flowi6 *fl6)
1349 {
1350 	struct dst_entry *dst;
1351 	struct rt6_info *rt;
1352 	struct inet6_dev *idev = in6_dev_get(dev);
1353 	struct net *net = dev_net(dev);
1354 
1355 	if (unlikely(!idev))
1356 		return ERR_PTR(-ENODEV);
1357 
1358 	rt = ip6_dst_alloc(net, dev, 0, NULL);
1359 	if (unlikely(!rt)) {
1360 		in6_dev_put(idev);
1361 		dst = ERR_PTR(-ENOMEM);
1362 		goto out;
1363 	}
1364 
1365 	rt->dst.flags |= DST_HOST;
1366 	rt->dst.output  = ip6_output;
1367 	atomic_set(&rt->dst.__refcnt, 1);
1368 	rt->rt6i_gateway  = fl6->daddr;
1369 	rt->rt6i_dst.addr = fl6->daddr;
1370 	rt->rt6i_dst.plen = 128;
1371 	rt->rt6i_idev     = idev;
1372 	dst_metric_set(&rt->dst, RTAX_HOPLIMIT, 0);
1373 
1374 	spin_lock_bh(&icmp6_dst_lock);
1375 	rt->dst.next = icmp6_dst_gc_list;
1376 	icmp6_dst_gc_list = &rt->dst;
1377 	spin_unlock_bh(&icmp6_dst_lock);
1378 
1379 	fib6_force_start_gc(net);
1380 
1381 	dst = xfrm_lookup(net, &rt->dst, flowi6_to_flowi(fl6), NULL, 0);
1382 
1383 out:
1384 	return dst;
1385 }
1386 
1387 int icmp6_dst_gc(void)
1388 {
1389 	struct dst_entry *dst, **pprev;
1390 	int more = 0;
1391 
1392 	spin_lock_bh(&icmp6_dst_lock);
1393 	pprev = &icmp6_dst_gc_list;
1394 
1395 	while ((dst = *pprev) != NULL) {
1396 		if (!atomic_read(&dst->__refcnt)) {
1397 			*pprev = dst->next;
1398 			dst_free(dst);
1399 		} else {
1400 			pprev = &dst->next;
1401 			++more;
1402 		}
1403 	}
1404 
1405 	spin_unlock_bh(&icmp6_dst_lock);
1406 
1407 	return more;
1408 }
1409 
1410 static void icmp6_clean_all(int (*func)(struct rt6_info *rt, void *arg),
1411 			    void *arg)
1412 {
1413 	struct dst_entry *dst, **pprev;
1414 
1415 	spin_lock_bh(&icmp6_dst_lock);
1416 	pprev = &icmp6_dst_gc_list;
1417 	while ((dst = *pprev) != NULL) {
1418 		struct rt6_info *rt = (struct rt6_info *) dst;
1419 		if (func(rt, arg)) {
1420 			*pprev = dst->next;
1421 			dst_free(dst);
1422 		} else {
1423 			pprev = &dst->next;
1424 		}
1425 	}
1426 	spin_unlock_bh(&icmp6_dst_lock);
1427 }
1428 
1429 static int ip6_dst_gc(struct dst_ops *ops)
1430 {
1431 	struct net *net = container_of(ops, struct net, ipv6.ip6_dst_ops);
1432 	int rt_min_interval = net->ipv6.sysctl.ip6_rt_gc_min_interval;
1433 	int rt_max_size = net->ipv6.sysctl.ip6_rt_max_size;
1434 	int rt_elasticity = net->ipv6.sysctl.ip6_rt_gc_elasticity;
1435 	int rt_gc_timeout = net->ipv6.sysctl.ip6_rt_gc_timeout;
1436 	unsigned long rt_last_gc = net->ipv6.ip6_rt_last_gc;
1437 	int entries;
1438 
1439 	entries = dst_entries_get_fast(ops);
1440 	if (time_after(rt_last_gc + rt_min_interval, jiffies) &&
1441 	    entries <= rt_max_size)
1442 		goto out;
1443 
1444 	net->ipv6.ip6_rt_gc_expire++;
1445 	fib6_run_gc(net->ipv6.ip6_rt_gc_expire, net, entries > rt_max_size);
1446 	entries = dst_entries_get_slow(ops);
1447 	if (entries < ops->gc_thresh)
1448 		net->ipv6.ip6_rt_gc_expire = rt_gc_timeout>>1;
1449 out:
1450 	net->ipv6.ip6_rt_gc_expire -= net->ipv6.ip6_rt_gc_expire>>rt_elasticity;
1451 	return entries > rt_max_size;
1452 }
1453 
1454 /*
1455  *
1456  */
1457 
1458 int ip6_route_add(struct fib6_config *cfg)
1459 {
1460 	int err;
1461 	struct net *net = cfg->fc_nlinfo.nl_net;
1462 	struct rt6_info *rt = NULL;
1463 	struct net_device *dev = NULL;
1464 	struct inet6_dev *idev = NULL;
1465 	struct fib6_table *table;
1466 	int addr_type;
1467 
1468 	if (cfg->fc_dst_len > 128 || cfg->fc_src_len > 128)
1469 		return -EINVAL;
1470 #ifndef CONFIG_IPV6_SUBTREES
1471 	if (cfg->fc_src_len)
1472 		return -EINVAL;
1473 #endif
1474 	if (cfg->fc_ifindex) {
1475 		err = -ENODEV;
1476 		dev = dev_get_by_index(net, cfg->fc_ifindex);
1477 		if (!dev)
1478 			goto out;
1479 		idev = in6_dev_get(dev);
1480 		if (!idev)
1481 			goto out;
1482 	}
1483 
1484 	if (cfg->fc_metric == 0)
1485 		cfg->fc_metric = IP6_RT_PRIO_USER;
1486 
1487 	err = -ENOBUFS;
1488 	if (cfg->fc_nlinfo.nlh &&
1489 	    !(cfg->fc_nlinfo.nlh->nlmsg_flags & NLM_F_CREATE)) {
1490 		table = fib6_get_table(net, cfg->fc_table);
1491 		if (!table) {
1492 			pr_warn("NLM_F_CREATE should be specified when creating new route\n");
1493 			table = fib6_new_table(net, cfg->fc_table);
1494 		}
1495 	} else {
1496 		table = fib6_new_table(net, cfg->fc_table);
1497 	}
1498 
1499 	if (!table)
1500 		goto out;
1501 
1502 	rt = ip6_dst_alloc(net, NULL, DST_NOCOUNT, table);
1503 
1504 	if (!rt) {
1505 		err = -ENOMEM;
1506 		goto out;
1507 	}
1508 
1509 	if (cfg->fc_flags & RTF_EXPIRES)
1510 		rt6_set_expires(rt, jiffies +
1511 				clock_t_to_jiffies(cfg->fc_expires));
1512 	else
1513 		rt6_clean_expires(rt);
1514 
1515 	if (cfg->fc_protocol == RTPROT_UNSPEC)
1516 		cfg->fc_protocol = RTPROT_BOOT;
1517 	rt->rt6i_protocol = cfg->fc_protocol;
1518 
1519 	addr_type = ipv6_addr_type(&cfg->fc_dst);
1520 
1521 	if (addr_type & IPV6_ADDR_MULTICAST)
1522 		rt->dst.input = ip6_mc_input;
1523 	else if (cfg->fc_flags & RTF_LOCAL)
1524 		rt->dst.input = ip6_input;
1525 	else
1526 		rt->dst.input = ip6_forward;
1527 
1528 	rt->dst.output = ip6_output;
1529 
1530 	ipv6_addr_prefix(&rt->rt6i_dst.addr, &cfg->fc_dst, cfg->fc_dst_len);
1531 	rt->rt6i_dst.plen = cfg->fc_dst_len;
1532 	if (rt->rt6i_dst.plen == 128)
1533 	       rt->dst.flags |= DST_HOST;
1534 
1535 	if (!(rt->dst.flags & DST_HOST) && cfg->fc_mx) {
1536 		u32 *metrics = kzalloc(sizeof(u32) * RTAX_MAX, GFP_KERNEL);
1537 		if (!metrics) {
1538 			err = -ENOMEM;
1539 			goto out;
1540 		}
1541 		dst_init_metrics(&rt->dst, metrics, 0);
1542 	}
1543 #ifdef CONFIG_IPV6_SUBTREES
1544 	ipv6_addr_prefix(&rt->rt6i_src.addr, &cfg->fc_src, cfg->fc_src_len);
1545 	rt->rt6i_src.plen = cfg->fc_src_len;
1546 #endif
1547 
1548 	rt->rt6i_metric = cfg->fc_metric;
1549 
1550 	/* We cannot add true routes via loopback here,
1551 	   they would result in kernel looping; promote them to reject routes
1552 	 */
1553 	if ((cfg->fc_flags & RTF_REJECT) ||
1554 	    (dev && (dev->flags & IFF_LOOPBACK) &&
1555 	     !(addr_type & IPV6_ADDR_LOOPBACK) &&
1556 	     !(cfg->fc_flags & RTF_LOCAL))) {
1557 		/* hold loopback dev/idev if we haven't done so. */
1558 		if (dev != net->loopback_dev) {
1559 			if (dev) {
1560 				dev_put(dev);
1561 				in6_dev_put(idev);
1562 			}
1563 			dev = net->loopback_dev;
1564 			dev_hold(dev);
1565 			idev = in6_dev_get(dev);
1566 			if (!idev) {
1567 				err = -ENODEV;
1568 				goto out;
1569 			}
1570 		}
1571 		rt->rt6i_flags = RTF_REJECT|RTF_NONEXTHOP;
1572 		switch (cfg->fc_type) {
1573 		case RTN_BLACKHOLE:
1574 			rt->dst.error = -EINVAL;
1575 			rt->dst.output = dst_discard;
1576 			rt->dst.input = dst_discard;
1577 			break;
1578 		case RTN_PROHIBIT:
1579 			rt->dst.error = -EACCES;
1580 			rt->dst.output = ip6_pkt_prohibit_out;
1581 			rt->dst.input = ip6_pkt_prohibit;
1582 			break;
1583 		case RTN_THROW:
1584 		default:
1585 			rt->dst.error = (cfg->fc_type == RTN_THROW) ? -EAGAIN
1586 					: -ENETUNREACH;
1587 			rt->dst.output = ip6_pkt_discard_out;
1588 			rt->dst.input = ip6_pkt_discard;
1589 			break;
1590 		}
1591 		goto install_route;
1592 	}
1593 
1594 	if (cfg->fc_flags & RTF_GATEWAY) {
1595 		const struct in6_addr *gw_addr;
1596 		int gwa_type;
1597 
1598 		gw_addr = &cfg->fc_gateway;
1599 		rt->rt6i_gateway = *gw_addr;
1600 		gwa_type = ipv6_addr_type(gw_addr);
1601 
1602 		if (gwa_type != (IPV6_ADDR_LINKLOCAL|IPV6_ADDR_UNICAST)) {
1603 			struct rt6_info *grt;
1604 
1605 			/* IPv6 strictly inhibits using not link-local
1606 			   addresses as nexthop address.
1607 			   Otherwise, router will not able to send redirects.
1608 			   It is very good, but in some (rare!) circumstances
1609 			   (SIT, PtP, NBMA NOARP links) it is handy to allow
1610 			   some exceptions. --ANK
1611 			 */
1612 			err = -EINVAL;
1613 			if (!(gwa_type & IPV6_ADDR_UNICAST))
1614 				goto out;
1615 
1616 			grt = rt6_lookup(net, gw_addr, NULL, cfg->fc_ifindex, 1);
1617 
1618 			err = -EHOSTUNREACH;
1619 			if (!grt)
1620 				goto out;
1621 			if (dev) {
1622 				if (dev != grt->dst.dev) {
1623 					ip6_rt_put(grt);
1624 					goto out;
1625 				}
1626 			} else {
1627 				dev = grt->dst.dev;
1628 				idev = grt->rt6i_idev;
1629 				dev_hold(dev);
1630 				in6_dev_hold(grt->rt6i_idev);
1631 			}
1632 			if (!(grt->rt6i_flags & RTF_GATEWAY))
1633 				err = 0;
1634 			ip6_rt_put(grt);
1635 
1636 			if (err)
1637 				goto out;
1638 		}
1639 		err = -EINVAL;
1640 		if (!dev || (dev->flags & IFF_LOOPBACK))
1641 			goto out;
1642 	}
1643 
1644 	err = -ENODEV;
1645 	if (!dev)
1646 		goto out;
1647 
1648 	if (!ipv6_addr_any(&cfg->fc_prefsrc)) {
1649 		if (!ipv6_chk_addr(net, &cfg->fc_prefsrc, dev, 0)) {
1650 			err = -EINVAL;
1651 			goto out;
1652 		}
1653 		rt->rt6i_prefsrc.addr = cfg->fc_prefsrc;
1654 		rt->rt6i_prefsrc.plen = 128;
1655 	} else
1656 		rt->rt6i_prefsrc.plen = 0;
1657 
1658 	rt->rt6i_flags = cfg->fc_flags;
1659 
1660 install_route:
1661 	if (cfg->fc_mx) {
1662 		struct nlattr *nla;
1663 		int remaining;
1664 
1665 		nla_for_each_attr(nla, cfg->fc_mx, cfg->fc_mx_len, remaining) {
1666 			int type = nla_type(nla);
1667 
1668 			if (type) {
1669 				if (type > RTAX_MAX) {
1670 					err = -EINVAL;
1671 					goto out;
1672 				}
1673 
1674 				dst_metric_set(&rt->dst, type, nla_get_u32(nla));
1675 			}
1676 		}
1677 	}
1678 
1679 	rt->dst.dev = dev;
1680 	rt->rt6i_idev = idev;
1681 	rt->rt6i_table = table;
1682 
1683 	cfg->fc_nlinfo.nl_net = dev_net(dev);
1684 
1685 	return __ip6_ins_rt(rt, &cfg->fc_nlinfo);
1686 
1687 out:
1688 	if (dev)
1689 		dev_put(dev);
1690 	if (idev)
1691 		in6_dev_put(idev);
1692 	if (rt)
1693 		dst_free(&rt->dst);
1694 	return err;
1695 }
1696 
1697 static int __ip6_del_rt(struct rt6_info *rt, struct nl_info *info)
1698 {
1699 	int err;
1700 	struct fib6_table *table;
1701 	struct net *net = dev_net(rt->dst.dev);
1702 
1703 	if (rt == net->ipv6.ip6_null_entry) {
1704 		err = -ENOENT;
1705 		goto out;
1706 	}
1707 
1708 	table = rt->rt6i_table;
1709 	write_lock_bh(&table->tb6_lock);
1710 	err = fib6_del(rt, info);
1711 	write_unlock_bh(&table->tb6_lock);
1712 
1713 out:
1714 	ip6_rt_put(rt);
1715 	return err;
1716 }
1717 
1718 int ip6_del_rt(struct rt6_info *rt)
1719 {
1720 	struct nl_info info = {
1721 		.nl_net = dev_net(rt->dst.dev),
1722 	};
1723 	return __ip6_del_rt(rt, &info);
1724 }
1725 
1726 static int ip6_route_del(struct fib6_config *cfg)
1727 {
1728 	struct fib6_table *table;
1729 	struct fib6_node *fn;
1730 	struct rt6_info *rt;
1731 	int err = -ESRCH;
1732 
1733 	table = fib6_get_table(cfg->fc_nlinfo.nl_net, cfg->fc_table);
1734 	if (!table)
1735 		return err;
1736 
1737 	read_lock_bh(&table->tb6_lock);
1738 
1739 	fn = fib6_locate(&table->tb6_root,
1740 			 &cfg->fc_dst, cfg->fc_dst_len,
1741 			 &cfg->fc_src, cfg->fc_src_len);
1742 
1743 	if (fn) {
1744 		for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
1745 			if (cfg->fc_ifindex &&
1746 			    (!rt->dst.dev ||
1747 			     rt->dst.dev->ifindex != cfg->fc_ifindex))
1748 				continue;
1749 			if (cfg->fc_flags & RTF_GATEWAY &&
1750 			    !ipv6_addr_equal(&cfg->fc_gateway, &rt->rt6i_gateway))
1751 				continue;
1752 			if (cfg->fc_metric && cfg->fc_metric != rt->rt6i_metric)
1753 				continue;
1754 			dst_hold(&rt->dst);
1755 			read_unlock_bh(&table->tb6_lock);
1756 
1757 			return __ip6_del_rt(rt, &cfg->fc_nlinfo);
1758 		}
1759 	}
1760 	read_unlock_bh(&table->tb6_lock);
1761 
1762 	return err;
1763 }
1764 
1765 static void rt6_do_redirect(struct dst_entry *dst, struct sock *sk, struct sk_buff *skb)
1766 {
1767 	struct net *net = dev_net(skb->dev);
1768 	struct netevent_redirect netevent;
1769 	struct rt6_info *rt, *nrt = NULL;
1770 	struct ndisc_options ndopts;
1771 	struct inet6_dev *in6_dev;
1772 	struct neighbour *neigh;
1773 	struct rd_msg *msg;
1774 	int optlen, on_link;
1775 	u8 *lladdr;
1776 
1777 	optlen = skb_tail_pointer(skb) - skb_transport_header(skb);
1778 	optlen -= sizeof(*msg);
1779 
1780 	if (optlen < 0) {
1781 		net_dbg_ratelimited("rt6_do_redirect: packet too short\n");
1782 		return;
1783 	}
1784 
1785 	msg = (struct rd_msg *)icmp6_hdr(skb);
1786 
1787 	if (ipv6_addr_is_multicast(&msg->dest)) {
1788 		net_dbg_ratelimited("rt6_do_redirect: destination address is multicast\n");
1789 		return;
1790 	}
1791 
1792 	on_link = 0;
1793 	if (ipv6_addr_equal(&msg->dest, &msg->target)) {
1794 		on_link = 1;
1795 	} else if (ipv6_addr_type(&msg->target) !=
1796 		   (IPV6_ADDR_UNICAST|IPV6_ADDR_LINKLOCAL)) {
1797 		net_dbg_ratelimited("rt6_do_redirect: target address is not link-local unicast\n");
1798 		return;
1799 	}
1800 
1801 	in6_dev = __in6_dev_get(skb->dev);
1802 	if (!in6_dev)
1803 		return;
1804 	if (in6_dev->cnf.forwarding || !in6_dev->cnf.accept_redirects)
1805 		return;
1806 
1807 	/* RFC2461 8.1:
1808 	 *	The IP source address of the Redirect MUST be the same as the current
1809 	 *	first-hop router for the specified ICMP Destination Address.
1810 	 */
1811 
1812 	if (!ndisc_parse_options(msg->opt, optlen, &ndopts)) {
1813 		net_dbg_ratelimited("rt6_redirect: invalid ND options\n");
1814 		return;
1815 	}
1816 
1817 	lladdr = NULL;
1818 	if (ndopts.nd_opts_tgt_lladdr) {
1819 		lladdr = ndisc_opt_addr_data(ndopts.nd_opts_tgt_lladdr,
1820 					     skb->dev);
1821 		if (!lladdr) {
1822 			net_dbg_ratelimited("rt6_redirect: invalid link-layer address length\n");
1823 			return;
1824 		}
1825 	}
1826 
1827 	rt = (struct rt6_info *) dst;
1828 	if (rt == net->ipv6.ip6_null_entry) {
1829 		net_dbg_ratelimited("rt6_redirect: source isn't a valid nexthop for redirect target\n");
1830 		return;
1831 	}
1832 
1833 	/* Redirect received -> path was valid.
1834 	 * Look, redirects are sent only in response to data packets,
1835 	 * so that this nexthop apparently is reachable. --ANK
1836 	 */
1837 	dst_confirm(&rt->dst);
1838 
1839 	neigh = __neigh_lookup(&nd_tbl, &msg->target, skb->dev, 1);
1840 	if (!neigh)
1841 		return;
1842 
1843 	/*
1844 	 *	We have finally decided to accept it.
1845 	 */
1846 
1847 	neigh_update(neigh, lladdr, NUD_STALE,
1848 		     NEIGH_UPDATE_F_WEAK_OVERRIDE|
1849 		     NEIGH_UPDATE_F_OVERRIDE|
1850 		     (on_link ? 0 : (NEIGH_UPDATE_F_OVERRIDE_ISROUTER|
1851 				     NEIGH_UPDATE_F_ISROUTER))
1852 		     );
1853 
1854 	nrt = ip6_rt_copy(rt, &msg->dest);
1855 	if (!nrt)
1856 		goto out;
1857 
1858 	nrt->rt6i_flags = RTF_GATEWAY|RTF_UP|RTF_DYNAMIC|RTF_CACHE;
1859 	if (on_link)
1860 		nrt->rt6i_flags &= ~RTF_GATEWAY;
1861 
1862 	nrt->rt6i_gateway = *(struct in6_addr *)neigh->primary_key;
1863 
1864 	if (ip6_ins_rt(nrt))
1865 		goto out;
1866 
1867 	netevent.old = &rt->dst;
1868 	netevent.new = &nrt->dst;
1869 	netevent.daddr = &msg->dest;
1870 	netevent.neigh = neigh;
1871 	call_netevent_notifiers(NETEVENT_REDIRECT, &netevent);
1872 
1873 	if (rt->rt6i_flags & RTF_CACHE) {
1874 		rt = (struct rt6_info *) dst_clone(&rt->dst);
1875 		ip6_del_rt(rt);
1876 	}
1877 
1878 out:
1879 	neigh_release(neigh);
1880 }
1881 
1882 /*
1883  *	Misc support functions
1884  */
1885 
1886 static struct rt6_info *ip6_rt_copy(struct rt6_info *ort,
1887 				    const struct in6_addr *dest)
1888 {
1889 	struct net *net = dev_net(ort->dst.dev);
1890 	struct rt6_info *rt = ip6_dst_alloc(net, ort->dst.dev, 0,
1891 					    ort->rt6i_table);
1892 
1893 	if (rt) {
1894 		rt->dst.input = ort->dst.input;
1895 		rt->dst.output = ort->dst.output;
1896 		rt->dst.flags |= DST_HOST;
1897 
1898 		rt->rt6i_dst.addr = *dest;
1899 		rt->rt6i_dst.plen = 128;
1900 		dst_copy_metrics(&rt->dst, &ort->dst);
1901 		rt->dst.error = ort->dst.error;
1902 		rt->rt6i_idev = ort->rt6i_idev;
1903 		if (rt->rt6i_idev)
1904 			in6_dev_hold(rt->rt6i_idev);
1905 		rt->dst.lastuse = jiffies;
1906 
1907 		if (ort->rt6i_flags & RTF_GATEWAY)
1908 			rt->rt6i_gateway = ort->rt6i_gateway;
1909 		else
1910 			rt->rt6i_gateway = *dest;
1911 		rt->rt6i_flags = ort->rt6i_flags;
1912 		if ((ort->rt6i_flags & (RTF_DEFAULT | RTF_ADDRCONF)) ==
1913 		    (RTF_DEFAULT | RTF_ADDRCONF))
1914 			rt6_set_from(rt, ort);
1915 		rt->rt6i_metric = 0;
1916 
1917 #ifdef CONFIG_IPV6_SUBTREES
1918 		memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
1919 #endif
1920 		memcpy(&rt->rt6i_prefsrc, &ort->rt6i_prefsrc, sizeof(struct rt6key));
1921 		rt->rt6i_table = ort->rt6i_table;
1922 	}
1923 	return rt;
1924 }
1925 
1926 #ifdef CONFIG_IPV6_ROUTE_INFO
1927 static struct rt6_info *rt6_get_route_info(struct net *net,
1928 					   const struct in6_addr *prefix, int prefixlen,
1929 					   const struct in6_addr *gwaddr, int ifindex)
1930 {
1931 	struct fib6_node *fn;
1932 	struct rt6_info *rt = NULL;
1933 	struct fib6_table *table;
1934 
1935 	table = fib6_get_table(net, RT6_TABLE_INFO);
1936 	if (!table)
1937 		return NULL;
1938 
1939 	read_lock_bh(&table->tb6_lock);
1940 	fn = fib6_locate(&table->tb6_root, prefix ,prefixlen, NULL, 0);
1941 	if (!fn)
1942 		goto out;
1943 
1944 	for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
1945 		if (rt->dst.dev->ifindex != ifindex)
1946 			continue;
1947 		if ((rt->rt6i_flags & (RTF_ROUTEINFO|RTF_GATEWAY)) != (RTF_ROUTEINFO|RTF_GATEWAY))
1948 			continue;
1949 		if (!ipv6_addr_equal(&rt->rt6i_gateway, gwaddr))
1950 			continue;
1951 		dst_hold(&rt->dst);
1952 		break;
1953 	}
1954 out:
1955 	read_unlock_bh(&table->tb6_lock);
1956 	return rt;
1957 }
1958 
1959 static struct rt6_info *rt6_add_route_info(struct net *net,
1960 					   const struct in6_addr *prefix, int prefixlen,
1961 					   const struct in6_addr *gwaddr, int ifindex,
1962 					   unsigned int pref)
1963 {
1964 	struct fib6_config cfg = {
1965 		.fc_table	= RT6_TABLE_INFO,
1966 		.fc_metric	= IP6_RT_PRIO_USER,
1967 		.fc_ifindex	= ifindex,
1968 		.fc_dst_len	= prefixlen,
1969 		.fc_flags	= RTF_GATEWAY | RTF_ADDRCONF | RTF_ROUTEINFO |
1970 				  RTF_UP | RTF_PREF(pref),
1971 		.fc_nlinfo.portid = 0,
1972 		.fc_nlinfo.nlh = NULL,
1973 		.fc_nlinfo.nl_net = net,
1974 	};
1975 
1976 	cfg.fc_dst = *prefix;
1977 	cfg.fc_gateway = *gwaddr;
1978 
1979 	/* We should treat it as a default route if prefix length is 0. */
1980 	if (!prefixlen)
1981 		cfg.fc_flags |= RTF_DEFAULT;
1982 
1983 	ip6_route_add(&cfg);
1984 
1985 	return rt6_get_route_info(net, prefix, prefixlen, gwaddr, ifindex);
1986 }
1987 #endif
1988 
1989 struct rt6_info *rt6_get_dflt_router(const struct in6_addr *addr, struct net_device *dev)
1990 {
1991 	struct rt6_info *rt;
1992 	struct fib6_table *table;
1993 
1994 	table = fib6_get_table(dev_net(dev), RT6_TABLE_DFLT);
1995 	if (!table)
1996 		return NULL;
1997 
1998 	read_lock_bh(&table->tb6_lock);
1999 	for (rt = table->tb6_root.leaf; rt; rt=rt->dst.rt6_next) {
2000 		if (dev == rt->dst.dev &&
2001 		    ((rt->rt6i_flags & (RTF_ADDRCONF | RTF_DEFAULT)) == (RTF_ADDRCONF | RTF_DEFAULT)) &&
2002 		    ipv6_addr_equal(&rt->rt6i_gateway, addr))
2003 			break;
2004 	}
2005 	if (rt)
2006 		dst_hold(&rt->dst);
2007 	read_unlock_bh(&table->tb6_lock);
2008 	return rt;
2009 }
2010 
2011 struct rt6_info *rt6_add_dflt_router(const struct in6_addr *gwaddr,
2012 				     struct net_device *dev,
2013 				     unsigned int pref)
2014 {
2015 	struct fib6_config cfg = {
2016 		.fc_table	= RT6_TABLE_DFLT,
2017 		.fc_metric	= IP6_RT_PRIO_USER,
2018 		.fc_ifindex	= dev->ifindex,
2019 		.fc_flags	= RTF_GATEWAY | RTF_ADDRCONF | RTF_DEFAULT |
2020 				  RTF_UP | RTF_EXPIRES | RTF_PREF(pref),
2021 		.fc_nlinfo.portid = 0,
2022 		.fc_nlinfo.nlh = NULL,
2023 		.fc_nlinfo.nl_net = dev_net(dev),
2024 	};
2025 
2026 	cfg.fc_gateway = *gwaddr;
2027 
2028 	ip6_route_add(&cfg);
2029 
2030 	return rt6_get_dflt_router(gwaddr, dev);
2031 }
2032 
2033 void rt6_purge_dflt_routers(struct net *net)
2034 {
2035 	struct rt6_info *rt;
2036 	struct fib6_table *table;
2037 
2038 	/* NOTE: Keep consistent with rt6_get_dflt_router */
2039 	table = fib6_get_table(net, RT6_TABLE_DFLT);
2040 	if (!table)
2041 		return;
2042 
2043 restart:
2044 	read_lock_bh(&table->tb6_lock);
2045 	for (rt = table->tb6_root.leaf; rt; rt = rt->dst.rt6_next) {
2046 		if (rt->rt6i_flags & (RTF_DEFAULT | RTF_ADDRCONF) &&
2047 		    (!rt->rt6i_idev || rt->rt6i_idev->cnf.accept_ra != 2)) {
2048 			dst_hold(&rt->dst);
2049 			read_unlock_bh(&table->tb6_lock);
2050 			ip6_del_rt(rt);
2051 			goto restart;
2052 		}
2053 	}
2054 	read_unlock_bh(&table->tb6_lock);
2055 }
2056 
2057 static void rtmsg_to_fib6_config(struct net *net,
2058 				 struct in6_rtmsg *rtmsg,
2059 				 struct fib6_config *cfg)
2060 {
2061 	memset(cfg, 0, sizeof(*cfg));
2062 
2063 	cfg->fc_table = RT6_TABLE_MAIN;
2064 	cfg->fc_ifindex = rtmsg->rtmsg_ifindex;
2065 	cfg->fc_metric = rtmsg->rtmsg_metric;
2066 	cfg->fc_expires = rtmsg->rtmsg_info;
2067 	cfg->fc_dst_len = rtmsg->rtmsg_dst_len;
2068 	cfg->fc_src_len = rtmsg->rtmsg_src_len;
2069 	cfg->fc_flags = rtmsg->rtmsg_flags;
2070 
2071 	cfg->fc_nlinfo.nl_net = net;
2072 
2073 	cfg->fc_dst = rtmsg->rtmsg_dst;
2074 	cfg->fc_src = rtmsg->rtmsg_src;
2075 	cfg->fc_gateway = rtmsg->rtmsg_gateway;
2076 }
2077 
2078 int ipv6_route_ioctl(struct net *net, unsigned int cmd, void __user *arg)
2079 {
2080 	struct fib6_config cfg;
2081 	struct in6_rtmsg rtmsg;
2082 	int err;
2083 
2084 	switch(cmd) {
2085 	case SIOCADDRT:		/* Add a route */
2086 	case SIOCDELRT:		/* Delete a route */
2087 		if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
2088 			return -EPERM;
2089 		err = copy_from_user(&rtmsg, arg,
2090 				     sizeof(struct in6_rtmsg));
2091 		if (err)
2092 			return -EFAULT;
2093 
2094 		rtmsg_to_fib6_config(net, &rtmsg, &cfg);
2095 
2096 		rtnl_lock();
2097 		switch (cmd) {
2098 		case SIOCADDRT:
2099 			err = ip6_route_add(&cfg);
2100 			break;
2101 		case SIOCDELRT:
2102 			err = ip6_route_del(&cfg);
2103 			break;
2104 		default:
2105 			err = -EINVAL;
2106 		}
2107 		rtnl_unlock();
2108 
2109 		return err;
2110 	}
2111 
2112 	return -EINVAL;
2113 }
2114 
2115 /*
2116  *	Drop the packet on the floor
2117  */
2118 
2119 static int ip6_pkt_drop(struct sk_buff *skb, u8 code, int ipstats_mib_noroutes)
2120 {
2121 	int type;
2122 	struct dst_entry *dst = skb_dst(skb);
2123 	switch (ipstats_mib_noroutes) {
2124 	case IPSTATS_MIB_INNOROUTES:
2125 		type = ipv6_addr_type(&ipv6_hdr(skb)->daddr);
2126 		if (type == IPV6_ADDR_ANY) {
2127 			IP6_INC_STATS(dev_net(dst->dev), ip6_dst_idev(dst),
2128 				      IPSTATS_MIB_INADDRERRORS);
2129 			break;
2130 		}
2131 		/* FALLTHROUGH */
2132 	case IPSTATS_MIB_OUTNOROUTES:
2133 		IP6_INC_STATS(dev_net(dst->dev), ip6_dst_idev(dst),
2134 			      ipstats_mib_noroutes);
2135 		break;
2136 	}
2137 	icmpv6_send(skb, ICMPV6_DEST_UNREACH, code, 0);
2138 	kfree_skb(skb);
2139 	return 0;
2140 }
2141 
2142 static int ip6_pkt_discard(struct sk_buff *skb)
2143 {
2144 	return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_INNOROUTES);
2145 }
2146 
2147 static int ip6_pkt_discard_out(struct sk_buff *skb)
2148 {
2149 	skb->dev = skb_dst(skb)->dev;
2150 	return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_OUTNOROUTES);
2151 }
2152 
2153 static int ip6_pkt_prohibit(struct sk_buff *skb)
2154 {
2155 	return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_INNOROUTES);
2156 }
2157 
2158 static int ip6_pkt_prohibit_out(struct sk_buff *skb)
2159 {
2160 	skb->dev = skb_dst(skb)->dev;
2161 	return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_OUTNOROUTES);
2162 }
2163 
2164 /*
2165  *	Allocate a dst for local (unicast / anycast) address.
2166  */
2167 
2168 struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev,
2169 				    const struct in6_addr *addr,
2170 				    bool anycast)
2171 {
2172 	struct net *net = dev_net(idev->dev);
2173 	struct rt6_info *rt = ip6_dst_alloc(net, net->loopback_dev,
2174 					    DST_NOCOUNT, NULL);
2175 	if (!rt)
2176 		return ERR_PTR(-ENOMEM);
2177 
2178 	in6_dev_hold(idev);
2179 
2180 	rt->dst.flags |= DST_HOST;
2181 	rt->dst.input = ip6_input;
2182 	rt->dst.output = ip6_output;
2183 	rt->rt6i_idev = idev;
2184 
2185 	rt->rt6i_flags = RTF_UP | RTF_NONEXTHOP;
2186 	if (anycast)
2187 		rt->rt6i_flags |= RTF_ANYCAST;
2188 	else
2189 		rt->rt6i_flags |= RTF_LOCAL;
2190 
2191 	rt->rt6i_gateway  = *addr;
2192 	rt->rt6i_dst.addr = *addr;
2193 	rt->rt6i_dst.plen = 128;
2194 	rt->rt6i_table = fib6_get_table(net, RT6_TABLE_LOCAL);
2195 
2196 	atomic_set(&rt->dst.__refcnt, 1);
2197 
2198 	return rt;
2199 }
2200 
2201 int ip6_route_get_saddr(struct net *net,
2202 			struct rt6_info *rt,
2203 			const struct in6_addr *daddr,
2204 			unsigned int prefs,
2205 			struct in6_addr *saddr)
2206 {
2207 	struct inet6_dev *idev = ip6_dst_idev((struct dst_entry*)rt);
2208 	int err = 0;
2209 	if (rt->rt6i_prefsrc.plen)
2210 		*saddr = rt->rt6i_prefsrc.addr;
2211 	else
2212 		err = ipv6_dev_get_saddr(net, idev ? idev->dev : NULL,
2213 					 daddr, prefs, saddr);
2214 	return err;
2215 }
2216 
2217 /* remove deleted ip from prefsrc entries */
2218 struct arg_dev_net_ip {
2219 	struct net_device *dev;
2220 	struct net *net;
2221 	struct in6_addr *addr;
2222 };
2223 
2224 static int fib6_remove_prefsrc(struct rt6_info *rt, void *arg)
2225 {
2226 	struct net_device *dev = ((struct arg_dev_net_ip *)arg)->dev;
2227 	struct net *net = ((struct arg_dev_net_ip *)arg)->net;
2228 	struct in6_addr *addr = ((struct arg_dev_net_ip *)arg)->addr;
2229 
2230 	if (((void *)rt->dst.dev == dev || !dev) &&
2231 	    rt != net->ipv6.ip6_null_entry &&
2232 	    ipv6_addr_equal(addr, &rt->rt6i_prefsrc.addr)) {
2233 		/* remove prefsrc entry */
2234 		rt->rt6i_prefsrc.plen = 0;
2235 	}
2236 	return 0;
2237 }
2238 
2239 void rt6_remove_prefsrc(struct inet6_ifaddr *ifp)
2240 {
2241 	struct net *net = dev_net(ifp->idev->dev);
2242 	struct arg_dev_net_ip adni = {
2243 		.dev = ifp->idev->dev,
2244 		.net = net,
2245 		.addr = &ifp->addr,
2246 	};
2247 	fib6_clean_all(net, fib6_remove_prefsrc, 0, &adni);
2248 }
2249 
2250 struct arg_dev_net {
2251 	struct net_device *dev;
2252 	struct net *net;
2253 };
2254 
2255 static int fib6_ifdown(struct rt6_info *rt, void *arg)
2256 {
2257 	const struct arg_dev_net *adn = arg;
2258 	const struct net_device *dev = adn->dev;
2259 
2260 	if ((rt->dst.dev == dev || !dev) &&
2261 	    rt != adn->net->ipv6.ip6_null_entry)
2262 		return -1;
2263 
2264 	return 0;
2265 }
2266 
2267 void rt6_ifdown(struct net *net, struct net_device *dev)
2268 {
2269 	struct arg_dev_net adn = {
2270 		.dev = dev,
2271 		.net = net,
2272 	};
2273 
2274 	fib6_clean_all(net, fib6_ifdown, 0, &adn);
2275 	icmp6_clean_all(fib6_ifdown, &adn);
2276 }
2277 
2278 struct rt6_mtu_change_arg {
2279 	struct net_device *dev;
2280 	unsigned int mtu;
2281 };
2282 
2283 static int rt6_mtu_change_route(struct rt6_info *rt, void *p_arg)
2284 {
2285 	struct rt6_mtu_change_arg *arg = (struct rt6_mtu_change_arg *) p_arg;
2286 	struct inet6_dev *idev;
2287 
2288 	/* In IPv6 pmtu discovery is not optional,
2289 	   so that RTAX_MTU lock cannot disable it.
2290 	   We still use this lock to block changes
2291 	   caused by addrconf/ndisc.
2292 	*/
2293 
2294 	idev = __in6_dev_get(arg->dev);
2295 	if (!idev)
2296 		return 0;
2297 
2298 	/* For administrative MTU increase, there is no way to discover
2299 	   IPv6 PMTU increase, so PMTU increase should be updated here.
2300 	   Since RFC 1981 doesn't include administrative MTU increase
2301 	   update PMTU increase is a MUST. (i.e. jumbo frame)
2302 	 */
2303 	/*
2304 	   If new MTU is less than route PMTU, this new MTU will be the
2305 	   lowest MTU in the path, update the route PMTU to reflect PMTU
2306 	   decreases; if new MTU is greater than route PMTU, and the
2307 	   old MTU is the lowest MTU in the path, update the route PMTU
2308 	   to reflect the increase. In this case if the other nodes' MTU
2309 	   also have the lowest MTU, TOO BIG MESSAGE will be lead to
2310 	   PMTU discouvery.
2311 	 */
2312 	if (rt->dst.dev == arg->dev &&
2313 	    !dst_metric_locked(&rt->dst, RTAX_MTU) &&
2314 	    (dst_mtu(&rt->dst) >= arg->mtu ||
2315 	     (dst_mtu(&rt->dst) < arg->mtu &&
2316 	      dst_mtu(&rt->dst) == idev->cnf.mtu6))) {
2317 		dst_metric_set(&rt->dst, RTAX_MTU, arg->mtu);
2318 	}
2319 	return 0;
2320 }
2321 
2322 void rt6_mtu_change(struct net_device *dev, unsigned int mtu)
2323 {
2324 	struct rt6_mtu_change_arg arg = {
2325 		.dev = dev,
2326 		.mtu = mtu,
2327 	};
2328 
2329 	fib6_clean_all(dev_net(dev), rt6_mtu_change_route, 0, &arg);
2330 }
2331 
2332 static const struct nla_policy rtm_ipv6_policy[RTA_MAX+1] = {
2333 	[RTA_GATEWAY]           = { .len = sizeof(struct in6_addr) },
2334 	[RTA_OIF]               = { .type = NLA_U32 },
2335 	[RTA_IIF]		= { .type = NLA_U32 },
2336 	[RTA_PRIORITY]          = { .type = NLA_U32 },
2337 	[RTA_METRICS]           = { .type = NLA_NESTED },
2338 	[RTA_MULTIPATH]		= { .len = sizeof(struct rtnexthop) },
2339 };
2340 
2341 static int rtm_to_fib6_config(struct sk_buff *skb, struct nlmsghdr *nlh,
2342 			      struct fib6_config *cfg)
2343 {
2344 	struct rtmsg *rtm;
2345 	struct nlattr *tb[RTA_MAX+1];
2346 	int err;
2347 
2348 	err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy);
2349 	if (err < 0)
2350 		goto errout;
2351 
2352 	err = -EINVAL;
2353 	rtm = nlmsg_data(nlh);
2354 	memset(cfg, 0, sizeof(*cfg));
2355 
2356 	cfg->fc_table = rtm->rtm_table;
2357 	cfg->fc_dst_len = rtm->rtm_dst_len;
2358 	cfg->fc_src_len = rtm->rtm_src_len;
2359 	cfg->fc_flags = RTF_UP;
2360 	cfg->fc_protocol = rtm->rtm_protocol;
2361 	cfg->fc_type = rtm->rtm_type;
2362 
2363 	if (rtm->rtm_type == RTN_UNREACHABLE ||
2364 	    rtm->rtm_type == RTN_BLACKHOLE ||
2365 	    rtm->rtm_type == RTN_PROHIBIT ||
2366 	    rtm->rtm_type == RTN_THROW)
2367 		cfg->fc_flags |= RTF_REJECT;
2368 
2369 	if (rtm->rtm_type == RTN_LOCAL)
2370 		cfg->fc_flags |= RTF_LOCAL;
2371 
2372 	cfg->fc_nlinfo.portid = NETLINK_CB(skb).portid;
2373 	cfg->fc_nlinfo.nlh = nlh;
2374 	cfg->fc_nlinfo.nl_net = sock_net(skb->sk);
2375 
2376 	if (tb[RTA_GATEWAY]) {
2377 		nla_memcpy(&cfg->fc_gateway, tb[RTA_GATEWAY], 16);
2378 		cfg->fc_flags |= RTF_GATEWAY;
2379 	}
2380 
2381 	if (tb[RTA_DST]) {
2382 		int plen = (rtm->rtm_dst_len + 7) >> 3;
2383 
2384 		if (nla_len(tb[RTA_DST]) < plen)
2385 			goto errout;
2386 
2387 		nla_memcpy(&cfg->fc_dst, tb[RTA_DST], plen);
2388 	}
2389 
2390 	if (tb[RTA_SRC]) {
2391 		int plen = (rtm->rtm_src_len + 7) >> 3;
2392 
2393 		if (nla_len(tb[RTA_SRC]) < plen)
2394 			goto errout;
2395 
2396 		nla_memcpy(&cfg->fc_src, tb[RTA_SRC], plen);
2397 	}
2398 
2399 	if (tb[RTA_PREFSRC])
2400 		nla_memcpy(&cfg->fc_prefsrc, tb[RTA_PREFSRC], 16);
2401 
2402 	if (tb[RTA_OIF])
2403 		cfg->fc_ifindex = nla_get_u32(tb[RTA_OIF]);
2404 
2405 	if (tb[RTA_PRIORITY])
2406 		cfg->fc_metric = nla_get_u32(tb[RTA_PRIORITY]);
2407 
2408 	if (tb[RTA_METRICS]) {
2409 		cfg->fc_mx = nla_data(tb[RTA_METRICS]);
2410 		cfg->fc_mx_len = nla_len(tb[RTA_METRICS]);
2411 	}
2412 
2413 	if (tb[RTA_TABLE])
2414 		cfg->fc_table = nla_get_u32(tb[RTA_TABLE]);
2415 
2416 	if (tb[RTA_MULTIPATH]) {
2417 		cfg->fc_mp = nla_data(tb[RTA_MULTIPATH]);
2418 		cfg->fc_mp_len = nla_len(tb[RTA_MULTIPATH]);
2419 	}
2420 
2421 	err = 0;
2422 errout:
2423 	return err;
2424 }
2425 
2426 static int ip6_route_multipath(struct fib6_config *cfg, int add)
2427 {
2428 	struct fib6_config r_cfg;
2429 	struct rtnexthop *rtnh;
2430 	int remaining;
2431 	int attrlen;
2432 	int err = 0, last_err = 0;
2433 
2434 beginning:
2435 	rtnh = (struct rtnexthop *)cfg->fc_mp;
2436 	remaining = cfg->fc_mp_len;
2437 
2438 	/* Parse a Multipath Entry */
2439 	while (rtnh_ok(rtnh, remaining)) {
2440 		memcpy(&r_cfg, cfg, sizeof(*cfg));
2441 		if (rtnh->rtnh_ifindex)
2442 			r_cfg.fc_ifindex = rtnh->rtnh_ifindex;
2443 
2444 		attrlen = rtnh_attrlen(rtnh);
2445 		if (attrlen > 0) {
2446 			struct nlattr *nla, *attrs = rtnh_attrs(rtnh);
2447 
2448 			nla = nla_find(attrs, attrlen, RTA_GATEWAY);
2449 			if (nla) {
2450 				nla_memcpy(&r_cfg.fc_gateway, nla, 16);
2451 				r_cfg.fc_flags |= RTF_GATEWAY;
2452 			}
2453 		}
2454 		err = add ? ip6_route_add(&r_cfg) : ip6_route_del(&r_cfg);
2455 		if (err) {
2456 			last_err = err;
2457 			/* If we are trying to remove a route, do not stop the
2458 			 * loop when ip6_route_del() fails (because next hop is
2459 			 * already gone), we should try to remove all next hops.
2460 			 */
2461 			if (add) {
2462 				/* If add fails, we should try to delete all
2463 				 * next hops that have been already added.
2464 				 */
2465 				add = 0;
2466 				goto beginning;
2467 			}
2468 		}
2469 		/* Because each route is added like a single route we remove
2470 		 * this flag after the first nexthop (if there is a collision,
2471 		 * we have already fail to add the first nexthop:
2472 		 * fib6_add_rt2node() has reject it).
2473 		 */
2474 		cfg->fc_nlinfo.nlh->nlmsg_flags &= ~NLM_F_EXCL;
2475 		rtnh = rtnh_next(rtnh, &remaining);
2476 	}
2477 
2478 	return last_err;
2479 }
2480 
2481 static int inet6_rtm_delroute(struct sk_buff *skb, struct nlmsghdr* nlh)
2482 {
2483 	struct fib6_config cfg;
2484 	int err;
2485 
2486 	err = rtm_to_fib6_config(skb, nlh, &cfg);
2487 	if (err < 0)
2488 		return err;
2489 
2490 	if (cfg.fc_mp)
2491 		return ip6_route_multipath(&cfg, 0);
2492 	else
2493 		return ip6_route_del(&cfg);
2494 }
2495 
2496 static int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr* nlh)
2497 {
2498 	struct fib6_config cfg;
2499 	int err;
2500 
2501 	err = rtm_to_fib6_config(skb, nlh, &cfg);
2502 	if (err < 0)
2503 		return err;
2504 
2505 	if (cfg.fc_mp)
2506 		return ip6_route_multipath(&cfg, 1);
2507 	else
2508 		return ip6_route_add(&cfg);
2509 }
2510 
2511 static inline size_t rt6_nlmsg_size(void)
2512 {
2513 	return NLMSG_ALIGN(sizeof(struct rtmsg))
2514 	       + nla_total_size(16) /* RTA_SRC */
2515 	       + nla_total_size(16) /* RTA_DST */
2516 	       + nla_total_size(16) /* RTA_GATEWAY */
2517 	       + nla_total_size(16) /* RTA_PREFSRC */
2518 	       + nla_total_size(4) /* RTA_TABLE */
2519 	       + nla_total_size(4) /* RTA_IIF */
2520 	       + nla_total_size(4) /* RTA_OIF */
2521 	       + nla_total_size(4) /* RTA_PRIORITY */
2522 	       + RTAX_MAX * nla_total_size(4) /* RTA_METRICS */
2523 	       + nla_total_size(sizeof(struct rta_cacheinfo));
2524 }
2525 
2526 static int rt6_fill_node(struct net *net,
2527 			 struct sk_buff *skb, struct rt6_info *rt,
2528 			 struct in6_addr *dst, struct in6_addr *src,
2529 			 int iif, int type, u32 portid, u32 seq,
2530 			 int prefix, int nowait, unsigned int flags)
2531 {
2532 	struct rtmsg *rtm;
2533 	struct nlmsghdr *nlh;
2534 	long expires;
2535 	u32 table;
2536 
2537 	if (prefix) {	/* user wants prefix routes only */
2538 		if (!(rt->rt6i_flags & RTF_PREFIX_RT)) {
2539 			/* success since this is not a prefix route */
2540 			return 1;
2541 		}
2542 	}
2543 
2544 	nlh = nlmsg_put(skb, portid, seq, type, sizeof(*rtm), flags);
2545 	if (!nlh)
2546 		return -EMSGSIZE;
2547 
2548 	rtm = nlmsg_data(nlh);
2549 	rtm->rtm_family = AF_INET6;
2550 	rtm->rtm_dst_len = rt->rt6i_dst.plen;
2551 	rtm->rtm_src_len = rt->rt6i_src.plen;
2552 	rtm->rtm_tos = 0;
2553 	if (rt->rt6i_table)
2554 		table = rt->rt6i_table->tb6_id;
2555 	else
2556 		table = RT6_TABLE_UNSPEC;
2557 	rtm->rtm_table = table;
2558 	if (nla_put_u32(skb, RTA_TABLE, table))
2559 		goto nla_put_failure;
2560 	if (rt->rt6i_flags & RTF_REJECT) {
2561 		switch (rt->dst.error) {
2562 		case -EINVAL:
2563 			rtm->rtm_type = RTN_BLACKHOLE;
2564 			break;
2565 		case -EACCES:
2566 			rtm->rtm_type = RTN_PROHIBIT;
2567 			break;
2568 		case -EAGAIN:
2569 			rtm->rtm_type = RTN_THROW;
2570 			break;
2571 		default:
2572 			rtm->rtm_type = RTN_UNREACHABLE;
2573 			break;
2574 		}
2575 	}
2576 	else if (rt->rt6i_flags & RTF_LOCAL)
2577 		rtm->rtm_type = RTN_LOCAL;
2578 	else if (rt->dst.dev && (rt->dst.dev->flags & IFF_LOOPBACK))
2579 		rtm->rtm_type = RTN_LOCAL;
2580 	else
2581 		rtm->rtm_type = RTN_UNICAST;
2582 	rtm->rtm_flags = 0;
2583 	rtm->rtm_scope = RT_SCOPE_UNIVERSE;
2584 	rtm->rtm_protocol = rt->rt6i_protocol;
2585 	if (rt->rt6i_flags & RTF_DYNAMIC)
2586 		rtm->rtm_protocol = RTPROT_REDIRECT;
2587 	else if (rt->rt6i_flags & RTF_ADDRCONF) {
2588 		if (rt->rt6i_flags & (RTF_DEFAULT | RTF_ROUTEINFO))
2589 			rtm->rtm_protocol = RTPROT_RA;
2590 		else
2591 			rtm->rtm_protocol = RTPROT_KERNEL;
2592 	}
2593 
2594 	if (rt->rt6i_flags & RTF_CACHE)
2595 		rtm->rtm_flags |= RTM_F_CLONED;
2596 
2597 	if (dst) {
2598 		if (nla_put(skb, RTA_DST, 16, dst))
2599 			goto nla_put_failure;
2600 		rtm->rtm_dst_len = 128;
2601 	} else if (rtm->rtm_dst_len)
2602 		if (nla_put(skb, RTA_DST, 16, &rt->rt6i_dst.addr))
2603 			goto nla_put_failure;
2604 #ifdef CONFIG_IPV6_SUBTREES
2605 	if (src) {
2606 		if (nla_put(skb, RTA_SRC, 16, src))
2607 			goto nla_put_failure;
2608 		rtm->rtm_src_len = 128;
2609 	} else if (rtm->rtm_src_len &&
2610 		   nla_put(skb, RTA_SRC, 16, &rt->rt6i_src.addr))
2611 		goto nla_put_failure;
2612 #endif
2613 	if (iif) {
2614 #ifdef CONFIG_IPV6_MROUTE
2615 		if (ipv6_addr_is_multicast(&rt->rt6i_dst.addr)) {
2616 			int err = ip6mr_get_route(net, skb, rtm, nowait);
2617 			if (err <= 0) {
2618 				if (!nowait) {
2619 					if (err == 0)
2620 						return 0;
2621 					goto nla_put_failure;
2622 				} else {
2623 					if (err == -EMSGSIZE)
2624 						goto nla_put_failure;
2625 				}
2626 			}
2627 		} else
2628 #endif
2629 			if (nla_put_u32(skb, RTA_IIF, iif))
2630 				goto nla_put_failure;
2631 	} else if (dst) {
2632 		struct in6_addr saddr_buf;
2633 		if (ip6_route_get_saddr(net, rt, dst, 0, &saddr_buf) == 0 &&
2634 		    nla_put(skb, RTA_PREFSRC, 16, &saddr_buf))
2635 			goto nla_put_failure;
2636 	}
2637 
2638 	if (rt->rt6i_prefsrc.plen) {
2639 		struct in6_addr saddr_buf;
2640 		saddr_buf = rt->rt6i_prefsrc.addr;
2641 		if (nla_put(skb, RTA_PREFSRC, 16, &saddr_buf))
2642 			goto nla_put_failure;
2643 	}
2644 
2645 	if (rtnetlink_put_metrics(skb, dst_metrics_ptr(&rt->dst)) < 0)
2646 		goto nla_put_failure;
2647 
2648 	if (rt->rt6i_flags & RTF_GATEWAY) {
2649 		if (nla_put(skb, RTA_GATEWAY, 16, &rt->rt6i_gateway) < 0)
2650 			goto nla_put_failure;
2651 	}
2652 
2653 	if (rt->dst.dev &&
2654 	    nla_put_u32(skb, RTA_OIF, rt->dst.dev->ifindex))
2655 		goto nla_put_failure;
2656 	if (nla_put_u32(skb, RTA_PRIORITY, rt->rt6i_metric))
2657 		goto nla_put_failure;
2658 
2659 	expires = (rt->rt6i_flags & RTF_EXPIRES) ? rt->dst.expires - jiffies : 0;
2660 
2661 	if (rtnl_put_cacheinfo(skb, &rt->dst, 0, expires, rt->dst.error) < 0)
2662 		goto nla_put_failure;
2663 
2664 	return nlmsg_end(skb, nlh);
2665 
2666 nla_put_failure:
2667 	nlmsg_cancel(skb, nlh);
2668 	return -EMSGSIZE;
2669 }
2670 
2671 int rt6_dump_route(struct rt6_info *rt, void *p_arg)
2672 {
2673 	struct rt6_rtnl_dump_arg *arg = (struct rt6_rtnl_dump_arg *) p_arg;
2674 	int prefix;
2675 
2676 	if (nlmsg_len(arg->cb->nlh) >= sizeof(struct rtmsg)) {
2677 		struct rtmsg *rtm = nlmsg_data(arg->cb->nlh);
2678 		prefix = (rtm->rtm_flags & RTM_F_PREFIX) != 0;
2679 	} else
2680 		prefix = 0;
2681 
2682 	return rt6_fill_node(arg->net,
2683 		     arg->skb, rt, NULL, NULL, 0, RTM_NEWROUTE,
2684 		     NETLINK_CB(arg->cb->skb).portid, arg->cb->nlh->nlmsg_seq,
2685 		     prefix, 0, NLM_F_MULTI);
2686 }
2687 
2688 static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh)
2689 {
2690 	struct net *net = sock_net(in_skb->sk);
2691 	struct nlattr *tb[RTA_MAX+1];
2692 	struct rt6_info *rt;
2693 	struct sk_buff *skb;
2694 	struct rtmsg *rtm;
2695 	struct flowi6 fl6;
2696 	int err, iif = 0, oif = 0;
2697 
2698 	err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy);
2699 	if (err < 0)
2700 		goto errout;
2701 
2702 	err = -EINVAL;
2703 	memset(&fl6, 0, sizeof(fl6));
2704 
2705 	if (tb[RTA_SRC]) {
2706 		if (nla_len(tb[RTA_SRC]) < sizeof(struct in6_addr))
2707 			goto errout;
2708 
2709 		fl6.saddr = *(struct in6_addr *)nla_data(tb[RTA_SRC]);
2710 	}
2711 
2712 	if (tb[RTA_DST]) {
2713 		if (nla_len(tb[RTA_DST]) < sizeof(struct in6_addr))
2714 			goto errout;
2715 
2716 		fl6.daddr = *(struct in6_addr *)nla_data(tb[RTA_DST]);
2717 	}
2718 
2719 	if (tb[RTA_IIF])
2720 		iif = nla_get_u32(tb[RTA_IIF]);
2721 
2722 	if (tb[RTA_OIF])
2723 		oif = nla_get_u32(tb[RTA_OIF]);
2724 
2725 	if (iif) {
2726 		struct net_device *dev;
2727 		int flags = 0;
2728 
2729 		dev = __dev_get_by_index(net, iif);
2730 		if (!dev) {
2731 			err = -ENODEV;
2732 			goto errout;
2733 		}
2734 
2735 		fl6.flowi6_iif = iif;
2736 
2737 		if (!ipv6_addr_any(&fl6.saddr))
2738 			flags |= RT6_LOOKUP_F_HAS_SADDR;
2739 
2740 		rt = (struct rt6_info *)ip6_route_input_lookup(net, dev, &fl6,
2741 							       flags);
2742 	} else {
2743 		fl6.flowi6_oif = oif;
2744 
2745 		rt = (struct rt6_info *)ip6_route_output(net, NULL, &fl6);
2746 	}
2747 
2748 	skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
2749 	if (!skb) {
2750 		ip6_rt_put(rt);
2751 		err = -ENOBUFS;
2752 		goto errout;
2753 	}
2754 
2755 	/* Reserve room for dummy headers, this skb can pass
2756 	   through good chunk of routing engine.
2757 	 */
2758 	skb_reset_mac_header(skb);
2759 	skb_reserve(skb, MAX_HEADER + sizeof(struct ipv6hdr));
2760 
2761 	skb_dst_set(skb, &rt->dst);
2762 
2763 	err = rt6_fill_node(net, skb, rt, &fl6.daddr, &fl6.saddr, iif,
2764 			    RTM_NEWROUTE, NETLINK_CB(in_skb).portid,
2765 			    nlh->nlmsg_seq, 0, 0, 0);
2766 	if (err < 0) {
2767 		kfree_skb(skb);
2768 		goto errout;
2769 	}
2770 
2771 	err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).portid);
2772 errout:
2773 	return err;
2774 }
2775 
2776 void inet6_rt_notify(int event, struct rt6_info *rt, struct nl_info *info)
2777 {
2778 	struct sk_buff *skb;
2779 	struct net *net = info->nl_net;
2780 	u32 seq;
2781 	int err;
2782 
2783 	err = -ENOBUFS;
2784 	seq = info->nlh ? info->nlh->nlmsg_seq : 0;
2785 
2786 	skb = nlmsg_new(rt6_nlmsg_size(), gfp_any());
2787 	if (!skb)
2788 		goto errout;
2789 
2790 	err = rt6_fill_node(net, skb, rt, NULL, NULL, 0,
2791 				event, info->portid, seq, 0, 0, 0);
2792 	if (err < 0) {
2793 		/* -EMSGSIZE implies BUG in rt6_nlmsg_size() */
2794 		WARN_ON(err == -EMSGSIZE);
2795 		kfree_skb(skb);
2796 		goto errout;
2797 	}
2798 	rtnl_notify(skb, net, info->portid, RTNLGRP_IPV6_ROUTE,
2799 		    info->nlh, gfp_any());
2800 	return;
2801 errout:
2802 	if (err < 0)
2803 		rtnl_set_sk_err(net, RTNLGRP_IPV6_ROUTE, err);
2804 }
2805 
2806 static int ip6_route_dev_notify(struct notifier_block *this,
2807 				unsigned long event, void *ptr)
2808 {
2809 	struct net_device *dev = netdev_notifier_info_to_dev(ptr);
2810 	struct net *net = dev_net(dev);
2811 
2812 	if (event == NETDEV_REGISTER && (dev->flags & IFF_LOOPBACK)) {
2813 		net->ipv6.ip6_null_entry->dst.dev = dev;
2814 		net->ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(dev);
2815 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
2816 		net->ipv6.ip6_prohibit_entry->dst.dev = dev;
2817 		net->ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(dev);
2818 		net->ipv6.ip6_blk_hole_entry->dst.dev = dev;
2819 		net->ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(dev);
2820 #endif
2821 	}
2822 
2823 	return NOTIFY_OK;
2824 }
2825 
2826 /*
2827  *	/proc
2828  */
2829 
2830 #ifdef CONFIG_PROC_FS
2831 
2832 static const struct file_operations ipv6_route_proc_fops = {
2833 	.owner		= THIS_MODULE,
2834 	.open		= ipv6_route_open,
2835 	.read		= seq_read,
2836 	.llseek		= seq_lseek,
2837 	.release	= seq_release_net,
2838 };
2839 
2840 static int rt6_stats_seq_show(struct seq_file *seq, void *v)
2841 {
2842 	struct net *net = (struct net *)seq->private;
2843 	seq_printf(seq, "%04x %04x %04x %04x %04x %04x %04x\n",
2844 		   net->ipv6.rt6_stats->fib_nodes,
2845 		   net->ipv6.rt6_stats->fib_route_nodes,
2846 		   net->ipv6.rt6_stats->fib_rt_alloc,
2847 		   net->ipv6.rt6_stats->fib_rt_entries,
2848 		   net->ipv6.rt6_stats->fib_rt_cache,
2849 		   dst_entries_get_slow(&net->ipv6.ip6_dst_ops),
2850 		   net->ipv6.rt6_stats->fib_discarded_routes);
2851 
2852 	return 0;
2853 }
2854 
2855 static int rt6_stats_seq_open(struct inode *inode, struct file *file)
2856 {
2857 	return single_open_net(inode, file, rt6_stats_seq_show);
2858 }
2859 
2860 static const struct file_operations rt6_stats_seq_fops = {
2861 	.owner	 = THIS_MODULE,
2862 	.open	 = rt6_stats_seq_open,
2863 	.read	 = seq_read,
2864 	.llseek	 = seq_lseek,
2865 	.release = single_release_net,
2866 };
2867 #endif	/* CONFIG_PROC_FS */
2868 
2869 #ifdef CONFIG_SYSCTL
2870 
2871 static
2872 int ipv6_sysctl_rtcache_flush(struct ctl_table *ctl, int write,
2873 			      void __user *buffer, size_t *lenp, loff_t *ppos)
2874 {
2875 	struct net *net;
2876 	int delay;
2877 	if (!write)
2878 		return -EINVAL;
2879 
2880 	net = (struct net *)ctl->extra1;
2881 	delay = net->ipv6.sysctl.flush_delay;
2882 	proc_dointvec(ctl, write, buffer, lenp, ppos);
2883 	fib6_run_gc(delay <= 0 ? 0 : (unsigned long)delay, net, delay > 0);
2884 	return 0;
2885 }
2886 
2887 struct ctl_table ipv6_route_table_template[] = {
2888 	{
2889 		.procname	=	"flush",
2890 		.data		=	&init_net.ipv6.sysctl.flush_delay,
2891 		.maxlen		=	sizeof(int),
2892 		.mode		=	0200,
2893 		.proc_handler	=	ipv6_sysctl_rtcache_flush
2894 	},
2895 	{
2896 		.procname	=	"gc_thresh",
2897 		.data		=	&ip6_dst_ops_template.gc_thresh,
2898 		.maxlen		=	sizeof(int),
2899 		.mode		=	0644,
2900 		.proc_handler	=	proc_dointvec,
2901 	},
2902 	{
2903 		.procname	=	"max_size",
2904 		.data		=	&init_net.ipv6.sysctl.ip6_rt_max_size,
2905 		.maxlen		=	sizeof(int),
2906 		.mode		=	0644,
2907 		.proc_handler	=	proc_dointvec,
2908 	},
2909 	{
2910 		.procname	=	"gc_min_interval",
2911 		.data		=	&init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
2912 		.maxlen		=	sizeof(int),
2913 		.mode		=	0644,
2914 		.proc_handler	=	proc_dointvec_jiffies,
2915 	},
2916 	{
2917 		.procname	=	"gc_timeout",
2918 		.data		=	&init_net.ipv6.sysctl.ip6_rt_gc_timeout,
2919 		.maxlen		=	sizeof(int),
2920 		.mode		=	0644,
2921 		.proc_handler	=	proc_dointvec_jiffies,
2922 	},
2923 	{
2924 		.procname	=	"gc_interval",
2925 		.data		=	&init_net.ipv6.sysctl.ip6_rt_gc_interval,
2926 		.maxlen		=	sizeof(int),
2927 		.mode		=	0644,
2928 		.proc_handler	=	proc_dointvec_jiffies,
2929 	},
2930 	{
2931 		.procname	=	"gc_elasticity",
2932 		.data		=	&init_net.ipv6.sysctl.ip6_rt_gc_elasticity,
2933 		.maxlen		=	sizeof(int),
2934 		.mode		=	0644,
2935 		.proc_handler	=	proc_dointvec,
2936 	},
2937 	{
2938 		.procname	=	"mtu_expires",
2939 		.data		=	&init_net.ipv6.sysctl.ip6_rt_mtu_expires,
2940 		.maxlen		=	sizeof(int),
2941 		.mode		=	0644,
2942 		.proc_handler	=	proc_dointvec_jiffies,
2943 	},
2944 	{
2945 		.procname	=	"min_adv_mss",
2946 		.data		=	&init_net.ipv6.sysctl.ip6_rt_min_advmss,
2947 		.maxlen		=	sizeof(int),
2948 		.mode		=	0644,
2949 		.proc_handler	=	proc_dointvec,
2950 	},
2951 	{
2952 		.procname	=	"gc_min_interval_ms",
2953 		.data		=	&init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
2954 		.maxlen		=	sizeof(int),
2955 		.mode		=	0644,
2956 		.proc_handler	=	proc_dointvec_ms_jiffies,
2957 	},
2958 	{ }
2959 };
2960 
2961 struct ctl_table * __net_init ipv6_route_sysctl_init(struct net *net)
2962 {
2963 	struct ctl_table *table;
2964 
2965 	table = kmemdup(ipv6_route_table_template,
2966 			sizeof(ipv6_route_table_template),
2967 			GFP_KERNEL);
2968 
2969 	if (table) {
2970 		table[0].data = &net->ipv6.sysctl.flush_delay;
2971 		table[0].extra1 = net;
2972 		table[1].data = &net->ipv6.ip6_dst_ops.gc_thresh;
2973 		table[2].data = &net->ipv6.sysctl.ip6_rt_max_size;
2974 		table[3].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
2975 		table[4].data = &net->ipv6.sysctl.ip6_rt_gc_timeout;
2976 		table[5].data = &net->ipv6.sysctl.ip6_rt_gc_interval;
2977 		table[6].data = &net->ipv6.sysctl.ip6_rt_gc_elasticity;
2978 		table[7].data = &net->ipv6.sysctl.ip6_rt_mtu_expires;
2979 		table[8].data = &net->ipv6.sysctl.ip6_rt_min_advmss;
2980 		table[9].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
2981 
2982 		/* Don't export sysctls to unprivileged users */
2983 		if (net->user_ns != &init_user_ns)
2984 			table[0].procname = NULL;
2985 	}
2986 
2987 	return table;
2988 }
2989 #endif
2990 
2991 static int __net_init ip6_route_net_init(struct net *net)
2992 {
2993 	int ret = -ENOMEM;
2994 
2995 	memcpy(&net->ipv6.ip6_dst_ops, &ip6_dst_ops_template,
2996 	       sizeof(net->ipv6.ip6_dst_ops));
2997 
2998 	if (dst_entries_init(&net->ipv6.ip6_dst_ops) < 0)
2999 		goto out_ip6_dst_ops;
3000 
3001 	net->ipv6.ip6_null_entry = kmemdup(&ip6_null_entry_template,
3002 					   sizeof(*net->ipv6.ip6_null_entry),
3003 					   GFP_KERNEL);
3004 	if (!net->ipv6.ip6_null_entry)
3005 		goto out_ip6_dst_entries;
3006 	net->ipv6.ip6_null_entry->dst.path =
3007 		(struct dst_entry *)net->ipv6.ip6_null_entry;
3008 	net->ipv6.ip6_null_entry->dst.ops = &net->ipv6.ip6_dst_ops;
3009 	dst_init_metrics(&net->ipv6.ip6_null_entry->dst,
3010 			 ip6_template_metrics, true);
3011 
3012 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
3013 	net->ipv6.ip6_prohibit_entry = kmemdup(&ip6_prohibit_entry_template,
3014 					       sizeof(*net->ipv6.ip6_prohibit_entry),
3015 					       GFP_KERNEL);
3016 	if (!net->ipv6.ip6_prohibit_entry)
3017 		goto out_ip6_null_entry;
3018 	net->ipv6.ip6_prohibit_entry->dst.path =
3019 		(struct dst_entry *)net->ipv6.ip6_prohibit_entry;
3020 	net->ipv6.ip6_prohibit_entry->dst.ops = &net->ipv6.ip6_dst_ops;
3021 	dst_init_metrics(&net->ipv6.ip6_prohibit_entry->dst,
3022 			 ip6_template_metrics, true);
3023 
3024 	net->ipv6.ip6_blk_hole_entry = kmemdup(&ip6_blk_hole_entry_template,
3025 					       sizeof(*net->ipv6.ip6_blk_hole_entry),
3026 					       GFP_KERNEL);
3027 	if (!net->ipv6.ip6_blk_hole_entry)
3028 		goto out_ip6_prohibit_entry;
3029 	net->ipv6.ip6_blk_hole_entry->dst.path =
3030 		(struct dst_entry *)net->ipv6.ip6_blk_hole_entry;
3031 	net->ipv6.ip6_blk_hole_entry->dst.ops = &net->ipv6.ip6_dst_ops;
3032 	dst_init_metrics(&net->ipv6.ip6_blk_hole_entry->dst,
3033 			 ip6_template_metrics, true);
3034 #endif
3035 
3036 	net->ipv6.sysctl.flush_delay = 0;
3037 	net->ipv6.sysctl.ip6_rt_max_size = 4096;
3038 	net->ipv6.sysctl.ip6_rt_gc_min_interval = HZ / 2;
3039 	net->ipv6.sysctl.ip6_rt_gc_timeout = 60*HZ;
3040 	net->ipv6.sysctl.ip6_rt_gc_interval = 30*HZ;
3041 	net->ipv6.sysctl.ip6_rt_gc_elasticity = 9;
3042 	net->ipv6.sysctl.ip6_rt_mtu_expires = 10*60*HZ;
3043 	net->ipv6.sysctl.ip6_rt_min_advmss = IPV6_MIN_MTU - 20 - 40;
3044 
3045 	net->ipv6.ip6_rt_gc_expire = 30*HZ;
3046 
3047 	ret = 0;
3048 out:
3049 	return ret;
3050 
3051 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
3052 out_ip6_prohibit_entry:
3053 	kfree(net->ipv6.ip6_prohibit_entry);
3054 out_ip6_null_entry:
3055 	kfree(net->ipv6.ip6_null_entry);
3056 #endif
3057 out_ip6_dst_entries:
3058 	dst_entries_destroy(&net->ipv6.ip6_dst_ops);
3059 out_ip6_dst_ops:
3060 	goto out;
3061 }
3062 
3063 static void __net_exit ip6_route_net_exit(struct net *net)
3064 {
3065 	kfree(net->ipv6.ip6_null_entry);
3066 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
3067 	kfree(net->ipv6.ip6_prohibit_entry);
3068 	kfree(net->ipv6.ip6_blk_hole_entry);
3069 #endif
3070 	dst_entries_destroy(&net->ipv6.ip6_dst_ops);
3071 }
3072 
3073 static int __net_init ip6_route_net_init_late(struct net *net)
3074 {
3075 #ifdef CONFIG_PROC_FS
3076 	proc_create("ipv6_route", 0, net->proc_net, &ipv6_route_proc_fops);
3077 	proc_create("rt6_stats", S_IRUGO, net->proc_net, &rt6_stats_seq_fops);
3078 #endif
3079 	return 0;
3080 }
3081 
3082 static void __net_exit ip6_route_net_exit_late(struct net *net)
3083 {
3084 #ifdef CONFIG_PROC_FS
3085 	remove_proc_entry("ipv6_route", net->proc_net);
3086 	remove_proc_entry("rt6_stats", net->proc_net);
3087 #endif
3088 }
3089 
3090 static struct pernet_operations ip6_route_net_ops = {
3091 	.init = ip6_route_net_init,
3092 	.exit = ip6_route_net_exit,
3093 };
3094 
3095 static int __net_init ipv6_inetpeer_init(struct net *net)
3096 {
3097 	struct inet_peer_base *bp = kmalloc(sizeof(*bp), GFP_KERNEL);
3098 
3099 	if (!bp)
3100 		return -ENOMEM;
3101 	inet_peer_base_init(bp);
3102 	net->ipv6.peers = bp;
3103 	return 0;
3104 }
3105 
3106 static void __net_exit ipv6_inetpeer_exit(struct net *net)
3107 {
3108 	struct inet_peer_base *bp = net->ipv6.peers;
3109 
3110 	net->ipv6.peers = NULL;
3111 	inetpeer_invalidate_tree(bp);
3112 	kfree(bp);
3113 }
3114 
3115 static struct pernet_operations ipv6_inetpeer_ops = {
3116 	.init	=	ipv6_inetpeer_init,
3117 	.exit	=	ipv6_inetpeer_exit,
3118 };
3119 
3120 static struct pernet_operations ip6_route_net_late_ops = {
3121 	.init = ip6_route_net_init_late,
3122 	.exit = ip6_route_net_exit_late,
3123 };
3124 
3125 static struct notifier_block ip6_route_dev_notifier = {
3126 	.notifier_call = ip6_route_dev_notify,
3127 	.priority = 0,
3128 };
3129 
3130 int __init ip6_route_init(void)
3131 {
3132 	int ret;
3133 
3134 	ret = -ENOMEM;
3135 	ip6_dst_ops_template.kmem_cachep =
3136 		kmem_cache_create("ip6_dst_cache", sizeof(struct rt6_info), 0,
3137 				  SLAB_HWCACHE_ALIGN, NULL);
3138 	if (!ip6_dst_ops_template.kmem_cachep)
3139 		goto out;
3140 
3141 	ret = dst_entries_init(&ip6_dst_blackhole_ops);
3142 	if (ret)
3143 		goto out_kmem_cache;
3144 
3145 	ret = register_pernet_subsys(&ipv6_inetpeer_ops);
3146 	if (ret)
3147 		goto out_dst_entries;
3148 
3149 	ret = register_pernet_subsys(&ip6_route_net_ops);
3150 	if (ret)
3151 		goto out_register_inetpeer;
3152 
3153 	ip6_dst_blackhole_ops.kmem_cachep = ip6_dst_ops_template.kmem_cachep;
3154 
3155 	/* Registering of the loopback is done before this portion of code,
3156 	 * the loopback reference in rt6_info will not be taken, do it
3157 	 * manually for init_net */
3158 	init_net.ipv6.ip6_null_entry->dst.dev = init_net.loopback_dev;
3159 	init_net.ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
3160   #ifdef CONFIG_IPV6_MULTIPLE_TABLES
3161 	init_net.ipv6.ip6_prohibit_entry->dst.dev = init_net.loopback_dev;
3162 	init_net.ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
3163 	init_net.ipv6.ip6_blk_hole_entry->dst.dev = init_net.loopback_dev;
3164 	init_net.ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
3165   #endif
3166 	ret = fib6_init();
3167 	if (ret)
3168 		goto out_register_subsys;
3169 
3170 	ret = xfrm6_init();
3171 	if (ret)
3172 		goto out_fib6_init;
3173 
3174 	ret = fib6_rules_init();
3175 	if (ret)
3176 		goto xfrm6_init;
3177 
3178 	ret = register_pernet_subsys(&ip6_route_net_late_ops);
3179 	if (ret)
3180 		goto fib6_rules_init;
3181 
3182 	ret = -ENOBUFS;
3183 	if (__rtnl_register(PF_INET6, RTM_NEWROUTE, inet6_rtm_newroute, NULL, NULL) ||
3184 	    __rtnl_register(PF_INET6, RTM_DELROUTE, inet6_rtm_delroute, NULL, NULL) ||
3185 	    __rtnl_register(PF_INET6, RTM_GETROUTE, inet6_rtm_getroute, NULL, NULL))
3186 		goto out_register_late_subsys;
3187 
3188 	ret = register_netdevice_notifier(&ip6_route_dev_notifier);
3189 	if (ret)
3190 		goto out_register_late_subsys;
3191 
3192 out:
3193 	return ret;
3194 
3195 out_register_late_subsys:
3196 	unregister_pernet_subsys(&ip6_route_net_late_ops);
3197 fib6_rules_init:
3198 	fib6_rules_cleanup();
3199 xfrm6_init:
3200 	xfrm6_fini();
3201 out_fib6_init:
3202 	fib6_gc_cleanup();
3203 out_register_subsys:
3204 	unregister_pernet_subsys(&ip6_route_net_ops);
3205 out_register_inetpeer:
3206 	unregister_pernet_subsys(&ipv6_inetpeer_ops);
3207 out_dst_entries:
3208 	dst_entries_destroy(&ip6_dst_blackhole_ops);
3209 out_kmem_cache:
3210 	kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);
3211 	goto out;
3212 }
3213 
3214 void ip6_route_cleanup(void)
3215 {
3216 	unregister_netdevice_notifier(&ip6_route_dev_notifier);
3217 	unregister_pernet_subsys(&ip6_route_net_late_ops);
3218 	fib6_rules_cleanup();
3219 	xfrm6_fini();
3220 	fib6_gc_cleanup();
3221 	unregister_pernet_subsys(&ipv6_inetpeer_ops);
3222 	unregister_pernet_subsys(&ip6_route_net_ops);
3223 	dst_entries_destroy(&ip6_dst_blackhole_ops);
3224 	kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);
3225 }
3226