xref: /linux/net/ipv6/ndisc.c (revision 25489a4f556414445d342951615178368ee45cde)
1 // SPDX-License-Identifier: GPL-2.0-or-later
2 /*
3  *	Neighbour Discovery for IPv6
4  *	Linux INET6 implementation
5  *
6  *	Authors:
7  *	Pedro Roque		<roque@di.fc.ul.pt>
8  *	Mike Shaver		<shaver@ingenia.com>
9  */
10 
11 /*
12  *	Changes:
13  *
14  *	Alexey I. Froloff		:	RFC6106 (DNSSL) support
15  *	Pierre Ynard			:	export userland ND options
16  *						through netlink (RDNSS support)
17  *	Lars Fenneberg			:	fixed MTU setting on receipt
18  *						of an RA.
19  *	Janos Farkas			:	kmalloc failure checks
20  *	Alexey Kuznetsov		:	state machine reworked
21  *						and moved to net/core.
22  *	Pekka Savola			:	RFC2461 validation
23  *	YOSHIFUJI Hideaki @USAGI	:	Verify ND options properly
24  */
25 
26 #define pr_fmt(fmt) "ICMPv6: " fmt
27 
28 #include <linux/module.h>
29 #include <linux/errno.h>
30 #include <linux/types.h>
31 #include <linux/socket.h>
32 #include <linux/sockios.h>
33 #include <linux/sched.h>
34 #include <linux/net.h>
35 #include <linux/in6.h>
36 #include <linux/route.h>
37 #include <linux/init.h>
38 #include <linux/rcupdate.h>
39 #include <linux/slab.h>
40 #ifdef CONFIG_SYSCTL
41 #include <linux/sysctl.h>
42 #endif
43 
44 #include <linux/if_addr.h>
45 #include <linux/if_ether.h>
46 #include <linux/if_arp.h>
47 #include <linux/ipv6.h>
48 #include <linux/icmpv6.h>
49 #include <linux/jhash.h>
50 
51 #include <net/sock.h>
52 #include <net/snmp.h>
53 
54 #include <net/ipv6.h>
55 #include <net/protocol.h>
56 #include <net/ndisc.h>
57 #include <net/ip6_route.h>
58 #include <net/addrconf.h>
59 #include <net/icmp.h>
60 
61 #include <net/netlink.h>
62 #include <linux/rtnetlink.h>
63 
64 #include <net/flow.h>
65 #include <net/ip6_checksum.h>
66 #include <net/inet_common.h>
67 #include <linux/proc_fs.h>
68 
69 #include <linux/netfilter.h>
70 #include <linux/netfilter_ipv6.h>
71 
72 static u32 ndisc_hash(const void *pkey,
73 		      const struct net_device *dev,
74 		      __u32 *hash_rnd);
75 static bool ndisc_key_eq(const struct neighbour *neigh, const void *pkey);
76 static bool ndisc_allow_add(const struct net_device *dev,
77 			    struct netlink_ext_ack *extack);
78 static int ndisc_constructor(struct neighbour *neigh);
79 static void ndisc_solicit(struct neighbour *neigh, struct sk_buff *skb);
80 static void ndisc_error_report(struct neighbour *neigh, struct sk_buff *skb);
81 static int pndisc_constructor(struct pneigh_entry *n);
82 static void pndisc_destructor(struct pneigh_entry *n);
83 static void pndisc_redo(struct sk_buff *skb);
84 static int ndisc_is_multicast(const void *pkey);
85 
86 static const struct neigh_ops ndisc_generic_ops = {
87 	.family =		AF_INET6,
88 	.solicit =		ndisc_solicit,
89 	.error_report =		ndisc_error_report,
90 	.output =		neigh_resolve_output,
91 	.connected_output =	neigh_connected_output,
92 };
93 
94 static const struct neigh_ops ndisc_hh_ops = {
95 	.family =		AF_INET6,
96 	.solicit =		ndisc_solicit,
97 	.error_report =		ndisc_error_report,
98 	.output =		neigh_resolve_output,
99 	.connected_output =	neigh_resolve_output,
100 };
101 
102 
103 static const struct neigh_ops ndisc_direct_ops = {
104 	.family =		AF_INET6,
105 	.output =		neigh_direct_output,
106 	.connected_output =	neigh_direct_output,
107 };
108 
109 struct neigh_table nd_tbl = {
110 	.family =	AF_INET6,
111 	.key_len =	sizeof(struct in6_addr),
112 	.protocol =	cpu_to_be16(ETH_P_IPV6),
113 	.hash =		ndisc_hash,
114 	.key_eq =	ndisc_key_eq,
115 	.constructor =	ndisc_constructor,
116 	.pconstructor =	pndisc_constructor,
117 	.pdestructor =	pndisc_destructor,
118 	.proxy_redo =	pndisc_redo,
119 	.is_multicast =	ndisc_is_multicast,
120 	.allow_add  =   ndisc_allow_add,
121 	.id =		"ndisc_cache",
122 	.parms = {
123 		.tbl			= &nd_tbl,
124 		.reachable_time		= ND_REACHABLE_TIME,
125 		.data = {
126 			[NEIGH_VAR_MCAST_PROBES] = 3,
127 			[NEIGH_VAR_UCAST_PROBES] = 3,
128 			[NEIGH_VAR_RETRANS_TIME] = ND_RETRANS_TIMER,
129 			[NEIGH_VAR_BASE_REACHABLE_TIME] = ND_REACHABLE_TIME,
130 			[NEIGH_VAR_DELAY_PROBE_TIME] = 5 * HZ,
131 			[NEIGH_VAR_INTERVAL_PROBE_TIME_MS] = 5 * HZ,
132 			[NEIGH_VAR_GC_STALETIME] = 60 * HZ,
133 			[NEIGH_VAR_QUEUE_LEN_BYTES] = SK_WMEM_MAX,
134 			[NEIGH_VAR_PROXY_QLEN] = 64,
135 			[NEIGH_VAR_ANYCAST_DELAY] = 1 * HZ,
136 			[NEIGH_VAR_PROXY_DELAY] = (8 * HZ) / 10,
137 		},
138 	},
139 	.gc_interval =	  30 * HZ,
140 	.gc_thresh1 =	 128,
141 	.gc_thresh2 =	 512,
142 	.gc_thresh3 =	1024,
143 };
144 EXPORT_SYMBOL_GPL(nd_tbl);
145 
146 void __ndisc_fill_addr_option(struct sk_buff *skb, int type, const void *data,
147 			      int data_len, int pad)
148 {
149 	int space = __ndisc_opt_addr_space(data_len, pad);
150 	u8 *opt = skb_put(skb, space);
151 
152 	opt[0] = type;
153 	opt[1] = space>>3;
154 
155 	memset(opt + 2, 0, pad);
156 	opt   += pad;
157 	space -= pad;
158 
159 	memcpy(opt+2, data, data_len);
160 	data_len += 2;
161 	opt += data_len;
162 	space -= data_len;
163 	if (space > 0)
164 		memset(opt, 0, space);
165 }
166 EXPORT_SYMBOL_GPL(__ndisc_fill_addr_option);
167 
168 static inline void ndisc_fill_addr_option(struct sk_buff *skb, int type,
169 					  const void *data, u8 icmp6_type)
170 {
171 	__ndisc_fill_addr_option(skb, type, data, skb->dev->addr_len,
172 				 ndisc_addr_option_pad(skb->dev->type));
173 	ndisc_ops_fill_addr_option(skb->dev, skb, icmp6_type);
174 }
175 
176 static inline void ndisc_fill_redirect_addr_option(struct sk_buff *skb,
177 						   void *ha,
178 						   const u8 *ops_data)
179 {
180 	ndisc_fill_addr_option(skb, ND_OPT_TARGET_LL_ADDR, ha, NDISC_REDIRECT);
181 	ndisc_ops_fill_redirect_addr_option(skb->dev, skb, ops_data);
182 }
183 
184 static struct nd_opt_hdr *ndisc_next_option(struct nd_opt_hdr *cur,
185 					    struct nd_opt_hdr *end)
186 {
187 	int type;
188 	if (!cur || !end || cur >= end)
189 		return NULL;
190 	type = cur->nd_opt_type;
191 	do {
192 		cur = ((void *)cur) + (cur->nd_opt_len << 3);
193 	} while (cur < end && cur->nd_opt_type != type);
194 	return cur <= end && cur->nd_opt_type == type ? cur : NULL;
195 }
196 
197 static inline int ndisc_is_useropt(const struct net_device *dev,
198 				   struct nd_opt_hdr *opt)
199 {
200 	return opt->nd_opt_type == ND_OPT_PREFIX_INFO ||
201 		opt->nd_opt_type == ND_OPT_RDNSS ||
202 		opt->nd_opt_type == ND_OPT_DNSSL ||
203 		opt->nd_opt_type == ND_OPT_6CO ||
204 		opt->nd_opt_type == ND_OPT_CAPTIVE_PORTAL ||
205 		opt->nd_opt_type == ND_OPT_PREF64;
206 }
207 
208 static struct nd_opt_hdr *ndisc_next_useropt(const struct net_device *dev,
209 					     struct nd_opt_hdr *cur,
210 					     struct nd_opt_hdr *end)
211 {
212 	if (!cur || !end || cur >= end)
213 		return NULL;
214 	do {
215 		cur = ((void *)cur) + (cur->nd_opt_len << 3);
216 	} while (cur < end && !ndisc_is_useropt(dev, cur));
217 	return cur <= end && ndisc_is_useropt(dev, cur) ? cur : NULL;
218 }
219 
220 struct ndisc_options *ndisc_parse_options(const struct net_device *dev,
221 					  u8 *opt, int opt_len,
222 					  struct ndisc_options *ndopts)
223 {
224 	struct nd_opt_hdr *nd_opt = (struct nd_opt_hdr *)opt;
225 
226 	if (!nd_opt || opt_len < 0 || !ndopts)
227 		return NULL;
228 	memset(ndopts, 0, sizeof(*ndopts));
229 	while (opt_len) {
230 		bool unknown = false;
231 		int l;
232 		if (opt_len < sizeof(struct nd_opt_hdr))
233 			return NULL;
234 		l = nd_opt->nd_opt_len << 3;
235 		if (opt_len < l || l == 0)
236 			return NULL;
237 		if (ndisc_ops_parse_options(dev, nd_opt, ndopts))
238 			goto next_opt;
239 		switch (nd_opt->nd_opt_type) {
240 		case ND_OPT_SOURCE_LL_ADDR:
241 		case ND_OPT_TARGET_LL_ADDR:
242 		case ND_OPT_MTU:
243 		case ND_OPT_NONCE:
244 		case ND_OPT_REDIRECT_HDR:
245 			if (ndopts->nd_opt_array[nd_opt->nd_opt_type]) {
246 				ND_PRINTK(2, warn,
247 					  "%s: duplicated ND6 option found: type=%d\n",
248 					  __func__, nd_opt->nd_opt_type);
249 			} else {
250 				ndopts->nd_opt_array[nd_opt->nd_opt_type] = nd_opt;
251 			}
252 			break;
253 		case ND_OPT_PREFIX_INFO:
254 			ndopts->nd_opts_pi_end = nd_opt;
255 			if (!ndopts->nd_opt_array[nd_opt->nd_opt_type])
256 				ndopts->nd_opt_array[nd_opt->nd_opt_type] = nd_opt;
257 			break;
258 #ifdef CONFIG_IPV6_ROUTE_INFO
259 		case ND_OPT_ROUTE_INFO:
260 			ndopts->nd_opts_ri_end = nd_opt;
261 			if (!ndopts->nd_opts_ri)
262 				ndopts->nd_opts_ri = nd_opt;
263 			break;
264 #endif
265 		default:
266 			unknown = true;
267 		}
268 		if (ndisc_is_useropt(dev, nd_opt)) {
269 			ndopts->nd_useropts_end = nd_opt;
270 			if (!ndopts->nd_useropts)
271 				ndopts->nd_useropts = nd_opt;
272 		} else if (unknown) {
273 			/*
274 			 * Unknown options must be silently ignored,
275 			 * to accommodate future extension to the
276 			 * protocol.
277 			 */
278 			ND_PRINTK(2, notice,
279 				  "%s: ignored unsupported option; type=%d, len=%d\n",
280 				  __func__,
281 				  nd_opt->nd_opt_type,
282 				  nd_opt->nd_opt_len);
283 		}
284 next_opt:
285 		opt_len -= l;
286 		nd_opt = ((void *)nd_opt) + l;
287 	}
288 	return ndopts;
289 }
290 
291 int ndisc_mc_map(const struct in6_addr *addr, char *buf, struct net_device *dev, int dir)
292 {
293 	switch (dev->type) {
294 	case ARPHRD_ETHER:
295 	case ARPHRD_IEEE802:	/* Not sure. Check it later. --ANK */
296 	case ARPHRD_FDDI:
297 		ipv6_eth_mc_map(addr, buf);
298 		return 0;
299 	case ARPHRD_ARCNET:
300 		ipv6_arcnet_mc_map(addr, buf);
301 		return 0;
302 	case ARPHRD_INFINIBAND:
303 		ipv6_ib_mc_map(addr, dev->broadcast, buf);
304 		return 0;
305 	case ARPHRD_IPGRE:
306 		return ipv6_ipgre_mc_map(addr, dev->broadcast, buf);
307 	default:
308 		if (dir) {
309 			memcpy(buf, dev->broadcast, dev->addr_len);
310 			return 0;
311 		}
312 	}
313 	return -EINVAL;
314 }
315 EXPORT_SYMBOL(ndisc_mc_map);
316 
317 static u32 ndisc_hash(const void *pkey,
318 		      const struct net_device *dev,
319 		      __u32 *hash_rnd)
320 {
321 	return ndisc_hashfn(pkey, dev, hash_rnd);
322 }
323 
324 static bool ndisc_key_eq(const struct neighbour *n, const void *pkey)
325 {
326 	return neigh_key_eq128(n, pkey);
327 }
328 
329 static int ndisc_constructor(struct neighbour *neigh)
330 {
331 	struct in6_addr *addr = (struct in6_addr *)&neigh->primary_key;
332 	struct net_device *dev = neigh->dev;
333 	struct inet6_dev *in6_dev;
334 	struct neigh_parms *parms;
335 	bool is_multicast = ipv6_addr_is_multicast(addr);
336 
337 	in6_dev = in6_dev_get(dev);
338 	if (!in6_dev) {
339 		return -EINVAL;
340 	}
341 
342 	parms = in6_dev->nd_parms;
343 	__neigh_parms_put(neigh->parms);
344 	neigh->parms = neigh_parms_clone(parms);
345 
346 	neigh->type = is_multicast ? RTN_MULTICAST : RTN_UNICAST;
347 	if (!dev->header_ops) {
348 		neigh->nud_state = NUD_NOARP;
349 		neigh->ops = &ndisc_direct_ops;
350 		neigh->output = neigh_direct_output;
351 	} else {
352 		if (is_multicast) {
353 			neigh->nud_state = NUD_NOARP;
354 			ndisc_mc_map(addr, neigh->ha, dev, 1);
355 		} else if (dev->flags&(IFF_NOARP|IFF_LOOPBACK)) {
356 			neigh->nud_state = NUD_NOARP;
357 			memcpy(neigh->ha, dev->dev_addr, dev->addr_len);
358 			if (dev->flags&IFF_LOOPBACK)
359 				neigh->type = RTN_LOCAL;
360 		} else if (dev->flags&IFF_POINTOPOINT) {
361 			neigh->nud_state = NUD_NOARP;
362 			memcpy(neigh->ha, dev->broadcast, dev->addr_len);
363 		}
364 		if (dev->header_ops->cache)
365 			neigh->ops = &ndisc_hh_ops;
366 		else
367 			neigh->ops = &ndisc_generic_ops;
368 		if (neigh->nud_state&NUD_VALID)
369 			neigh->output = neigh->ops->connected_output;
370 		else
371 			neigh->output = neigh->ops->output;
372 	}
373 	in6_dev_put(in6_dev);
374 	return 0;
375 }
376 
377 static int pndisc_constructor(struct pneigh_entry *n)
378 {
379 	struct in6_addr *addr = (struct in6_addr *)&n->key;
380 	struct in6_addr maddr;
381 	struct net_device *dev = n->dev;
382 
383 	if (!dev || !__in6_dev_get(dev))
384 		return -EINVAL;
385 	addrconf_addr_solict_mult(addr, &maddr);
386 	ipv6_dev_mc_inc(dev, &maddr);
387 	return 0;
388 }
389 
390 static void pndisc_destructor(struct pneigh_entry *n)
391 {
392 	struct in6_addr *addr = (struct in6_addr *)&n->key;
393 	struct in6_addr maddr;
394 	struct net_device *dev = n->dev;
395 
396 	if (!dev || !__in6_dev_get(dev))
397 		return;
398 	addrconf_addr_solict_mult(addr, &maddr);
399 	ipv6_dev_mc_dec(dev, &maddr);
400 }
401 
402 /* called with rtnl held */
403 static bool ndisc_allow_add(const struct net_device *dev,
404 			    struct netlink_ext_ack *extack)
405 {
406 	struct inet6_dev *idev = __in6_dev_get(dev);
407 
408 	if (!idev || idev->cnf.disable_ipv6) {
409 		NL_SET_ERR_MSG(extack, "IPv6 is disabled on this device");
410 		return false;
411 	}
412 
413 	return true;
414 }
415 
416 static struct sk_buff *ndisc_alloc_skb(struct net_device *dev,
417 				       int len)
418 {
419 	int hlen = LL_RESERVED_SPACE(dev);
420 	int tlen = dev->needed_tailroom;
421 	struct sk_buff *skb;
422 
423 	skb = alloc_skb(hlen + sizeof(struct ipv6hdr) + len + tlen, GFP_ATOMIC);
424 	if (!skb)
425 		return NULL;
426 
427 	skb->protocol = htons(ETH_P_IPV6);
428 	skb->dev = dev;
429 
430 	skb_reserve(skb, hlen + sizeof(struct ipv6hdr));
431 	skb_reset_transport_header(skb);
432 
433 	/* Manually assign socket ownership as we avoid calling
434 	 * sock_alloc_send_pskb() to bypass wmem buffer limits
435 	 */
436 	rcu_read_lock();
437 	skb_set_owner_w(skb, dev_net_rcu(dev)->ipv6.ndisc_sk);
438 	rcu_read_unlock();
439 
440 	return skb;
441 }
442 
443 static void ip6_nd_hdr(struct sk_buff *skb,
444 		       const struct in6_addr *saddr,
445 		       const struct in6_addr *daddr,
446 		       int hop_limit, int len)
447 {
448 	struct ipv6hdr *hdr;
449 	struct inet6_dev *idev;
450 	unsigned tclass;
451 
452 	rcu_read_lock();
453 	idev = __in6_dev_get(skb->dev);
454 	tclass = idev ? READ_ONCE(idev->cnf.ndisc_tclass) : 0;
455 	rcu_read_unlock();
456 
457 	skb_push(skb, sizeof(*hdr));
458 	skb_reset_network_header(skb);
459 	hdr = ipv6_hdr(skb);
460 
461 	ip6_flow_hdr(hdr, tclass, 0);
462 
463 	hdr->payload_len = htons(len);
464 	hdr->nexthdr = IPPROTO_ICMPV6;
465 	hdr->hop_limit = hop_limit;
466 
467 	hdr->saddr = *saddr;
468 	hdr->daddr = *daddr;
469 }
470 
471 void ndisc_send_skb(struct sk_buff *skb, const struct in6_addr *daddr,
472 		    const struct in6_addr *saddr)
473 {
474 	struct icmp6hdr *icmp6h = icmp6_hdr(skb);
475 	struct dst_entry *dst = skb_dst(skb);
476 	struct net_device *dev;
477 	struct inet6_dev *idev;
478 	struct net *net;
479 	struct sock *sk;
480 	int err;
481 	u8 type;
482 
483 	type = icmp6h->icmp6_type;
484 
485 	rcu_read_lock();
486 
487 	net = dev_net_rcu(skb->dev);
488 	sk = net->ipv6.ndisc_sk;
489 	if (!dst) {
490 		struct flowi6 fl6;
491 		int oif = skb->dev->ifindex;
492 
493 		icmpv6_flow_init(sk, &fl6, type, saddr, daddr, oif);
494 		dst = icmp6_dst_alloc(skb->dev, &fl6);
495 		if (IS_ERR(dst)) {
496 			rcu_read_unlock();
497 			kfree_skb(skb);
498 			return;
499 		}
500 
501 		skb_dst_set(skb, dst);
502 	}
503 
504 	icmp6h->icmp6_cksum = csum_ipv6_magic(saddr, daddr, skb->len,
505 					      IPPROTO_ICMPV6,
506 					      csum_partial(icmp6h,
507 							   skb->len, 0));
508 
509 	ip6_nd_hdr(skb, saddr, daddr, READ_ONCE(inet6_sk(sk)->hop_limit), skb->len);
510 
511 	dev = dst_dev(dst);
512 	idev = __in6_dev_get(dev);
513 	IP6_INC_STATS(net, idev, IPSTATS_MIB_OUTREQUESTS);
514 
515 	err = NF_HOOK(NFPROTO_IPV6, NF_INET_LOCAL_OUT,
516 		      net, sk, skb, NULL, dev,
517 		      dst_output);
518 	if (!err) {
519 		ICMP6MSGOUT_INC_STATS(net, idev, type);
520 		ICMP6_INC_STATS(net, idev, ICMP6_MIB_OUTMSGS);
521 	}
522 
523 	rcu_read_unlock();
524 }
525 EXPORT_SYMBOL(ndisc_send_skb);
526 
527 void ndisc_send_na(struct net_device *dev, const struct in6_addr *daddr,
528 		   const struct in6_addr *solicited_addr,
529 		   bool router, bool solicited, bool override, bool inc_opt)
530 {
531 	struct sk_buff *skb;
532 	struct in6_addr tmpaddr;
533 	struct inet6_ifaddr *ifp;
534 	const struct in6_addr *src_addr;
535 	struct nd_msg *msg;
536 	int optlen = 0;
537 
538 	/* for anycast or proxy, solicited_addr != src_addr */
539 	ifp = ipv6_get_ifaddr(dev_net(dev), solicited_addr, dev, 1);
540 	if (ifp) {
541 		src_addr = solicited_addr;
542 		if (ifp->flags & IFA_F_OPTIMISTIC)
543 			override = false;
544 		inc_opt |= READ_ONCE(ifp->idev->cnf.force_tllao);
545 		in6_ifa_put(ifp);
546 	} else {
547 		if (ipv6_dev_get_saddr(dev_net(dev), dev, daddr,
548 				       inet6_sk(dev_net(dev)->ipv6.ndisc_sk)->srcprefs,
549 				       &tmpaddr))
550 			return;
551 		src_addr = &tmpaddr;
552 	}
553 
554 	if (!dev->addr_len)
555 		inc_opt = false;
556 	if (inc_opt)
557 		optlen += ndisc_opt_addr_space(dev,
558 					       NDISC_NEIGHBOUR_ADVERTISEMENT);
559 
560 	skb = ndisc_alloc_skb(dev, sizeof(*msg) + optlen);
561 	if (!skb)
562 		return;
563 
564 	msg = skb_put(skb, sizeof(*msg));
565 	*msg = (struct nd_msg) {
566 		.icmph = {
567 			.icmp6_type = NDISC_NEIGHBOUR_ADVERTISEMENT,
568 			.icmp6_router = router,
569 			.icmp6_solicited = solicited,
570 			.icmp6_override = override,
571 		},
572 		.target = *solicited_addr,
573 	};
574 
575 	if (inc_opt)
576 		ndisc_fill_addr_option(skb, ND_OPT_TARGET_LL_ADDR,
577 				       dev->dev_addr,
578 				       NDISC_NEIGHBOUR_ADVERTISEMENT);
579 
580 	ndisc_send_skb(skb, daddr, src_addr);
581 }
582 
583 static void ndisc_send_unsol_na(struct net_device *dev)
584 {
585 	struct inet6_dev *idev;
586 	struct inet6_ifaddr *ifa;
587 
588 	idev = in6_dev_get(dev);
589 	if (!idev)
590 		return;
591 
592 	read_lock_bh(&idev->lock);
593 	list_for_each_entry(ifa, &idev->addr_list, if_list) {
594 		/* skip tentative addresses until dad completes */
595 		if (ifa->flags & IFA_F_TENTATIVE &&
596 		    !(ifa->flags & IFA_F_OPTIMISTIC))
597 			continue;
598 
599 		ndisc_send_na(dev, &in6addr_linklocal_allnodes, &ifa->addr,
600 			      /*router=*/ !!idev->cnf.forwarding,
601 			      /*solicited=*/ false, /*override=*/ true,
602 			      /*inc_opt=*/ true);
603 	}
604 	read_unlock_bh(&idev->lock);
605 
606 	in6_dev_put(idev);
607 }
608 
609 struct sk_buff *ndisc_ns_create(struct net_device *dev, const struct in6_addr *solicit,
610 				const struct in6_addr *saddr, u64 nonce)
611 {
612 	int inc_opt = dev->addr_len;
613 	struct sk_buff *skb;
614 	struct nd_msg *msg;
615 	int optlen = 0;
616 
617 	if (!saddr)
618 		return NULL;
619 
620 	if (ipv6_addr_any(saddr))
621 		inc_opt = false;
622 	if (inc_opt)
623 		optlen += ndisc_opt_addr_space(dev,
624 					       NDISC_NEIGHBOUR_SOLICITATION);
625 	if (nonce != 0)
626 		optlen += 8;
627 
628 	skb = ndisc_alloc_skb(dev, sizeof(*msg) + optlen);
629 	if (!skb)
630 		return NULL;
631 
632 	msg = skb_put(skb, sizeof(*msg));
633 	*msg = (struct nd_msg) {
634 		.icmph = {
635 			.icmp6_type = NDISC_NEIGHBOUR_SOLICITATION,
636 		},
637 		.target = *solicit,
638 	};
639 
640 	if (inc_opt)
641 		ndisc_fill_addr_option(skb, ND_OPT_SOURCE_LL_ADDR,
642 				       dev->dev_addr,
643 				       NDISC_NEIGHBOUR_SOLICITATION);
644 	if (nonce != 0) {
645 		u8 *opt = skb_put(skb, 8);
646 
647 		opt[0] = ND_OPT_NONCE;
648 		opt[1] = 8 >> 3;
649 		memcpy(opt + 2, &nonce, 6);
650 	}
651 
652 	return skb;
653 }
654 EXPORT_SYMBOL(ndisc_ns_create);
655 
656 void ndisc_send_ns(struct net_device *dev, const struct in6_addr *solicit,
657 		   const struct in6_addr *daddr, const struct in6_addr *saddr,
658 		   u64 nonce)
659 {
660 	struct in6_addr addr_buf;
661 	struct sk_buff *skb;
662 
663 	if (!saddr) {
664 		if (ipv6_get_lladdr(dev, &addr_buf,
665 				    (IFA_F_TENTATIVE | IFA_F_OPTIMISTIC)))
666 			return;
667 		saddr = &addr_buf;
668 	}
669 
670 	skb = ndisc_ns_create(dev, solicit, saddr, nonce);
671 
672 	if (skb)
673 		ndisc_send_skb(skb, daddr, saddr);
674 }
675 
676 void ndisc_send_rs(struct net_device *dev, const struct in6_addr *saddr,
677 		   const struct in6_addr *daddr)
678 {
679 	struct sk_buff *skb;
680 	struct rs_msg *msg;
681 	int send_sllao = dev->addr_len;
682 	int optlen = 0;
683 
684 #ifdef CONFIG_IPV6_OPTIMISTIC_DAD
685 	/*
686 	 * According to section 2.2 of RFC 4429, we must not
687 	 * send router solicitations with a sllao from
688 	 * optimistic addresses, but we may send the solicitation
689 	 * if we don't include the sllao.  So here we check
690 	 * if our address is optimistic, and if so, we
691 	 * suppress the inclusion of the sllao.
692 	 */
693 	if (send_sllao) {
694 		struct inet6_ifaddr *ifp = ipv6_get_ifaddr(dev_net(dev), saddr,
695 							   dev, 1);
696 		if (ifp) {
697 			if (ifp->flags & IFA_F_OPTIMISTIC)  {
698 				send_sllao = 0;
699 			}
700 			in6_ifa_put(ifp);
701 		} else {
702 			send_sllao = 0;
703 		}
704 	}
705 #endif
706 	if (send_sllao)
707 		optlen += ndisc_opt_addr_space(dev, NDISC_ROUTER_SOLICITATION);
708 
709 	skb = ndisc_alloc_skb(dev, sizeof(*msg) + optlen);
710 	if (!skb)
711 		return;
712 
713 	msg = skb_put(skb, sizeof(*msg));
714 	*msg = (struct rs_msg) {
715 		.icmph = {
716 			.icmp6_type = NDISC_ROUTER_SOLICITATION,
717 		},
718 	};
719 
720 	if (send_sllao)
721 		ndisc_fill_addr_option(skb, ND_OPT_SOURCE_LL_ADDR,
722 				       dev->dev_addr,
723 				       NDISC_ROUTER_SOLICITATION);
724 
725 	ndisc_send_skb(skb, daddr, saddr);
726 }
727 
728 
729 static void ndisc_error_report(struct neighbour *neigh, struct sk_buff *skb)
730 {
731 	/*
732 	 *	"The sender MUST return an ICMP
733 	 *	 destination unreachable"
734 	 */
735 	dst_link_failure(skb);
736 	kfree_skb(skb);
737 }
738 
739 /* Called with locked neigh: either read or both */
740 
741 static void ndisc_solicit(struct neighbour *neigh, struct sk_buff *skb)
742 {
743 	struct in6_addr *saddr = NULL;
744 	struct in6_addr mcaddr;
745 	struct net_device *dev = neigh->dev;
746 	struct in6_addr *target = (struct in6_addr *)&neigh->primary_key;
747 	int probes = atomic_read(&neigh->probes);
748 
749 	if (skb && ipv6_chk_addr_and_flags(dev_net(dev), &ipv6_hdr(skb)->saddr,
750 					   dev, false, 1,
751 					   IFA_F_TENTATIVE|IFA_F_OPTIMISTIC))
752 		saddr = &ipv6_hdr(skb)->saddr;
753 	probes -= NEIGH_VAR(neigh->parms, UCAST_PROBES);
754 	if (probes < 0) {
755 		if (!(READ_ONCE(neigh->nud_state) & NUD_VALID)) {
756 			ND_PRINTK(1, dbg,
757 				  "%s: trying to ucast probe in NUD_INVALID: %pI6\n",
758 				  __func__, target);
759 		}
760 		ndisc_send_ns(dev, target, target, saddr, 0);
761 	} else if ((probes -= NEIGH_VAR(neigh->parms, APP_PROBES)) < 0) {
762 		neigh_app_ns(neigh);
763 	} else {
764 		addrconf_addr_solict_mult(target, &mcaddr);
765 		ndisc_send_ns(dev, target, &mcaddr, saddr, 0);
766 	}
767 }
768 
769 static int pndisc_is_router(const void *pkey,
770 			    struct net_device *dev)
771 {
772 	struct pneigh_entry *n;
773 	int ret = -1;
774 
775 	read_lock_bh(&nd_tbl.lock);
776 	n = __pneigh_lookup(&nd_tbl, dev_net(dev), pkey, dev);
777 	if (n)
778 		ret = !!(n->flags & NTF_ROUTER);
779 	read_unlock_bh(&nd_tbl.lock);
780 
781 	return ret;
782 }
783 
784 void ndisc_update(const struct net_device *dev, struct neighbour *neigh,
785 		  const u8 *lladdr, u8 new, u32 flags, u8 icmp6_type,
786 		  struct ndisc_options *ndopts)
787 {
788 	neigh_update(neigh, lladdr, new, flags, 0);
789 	/* report ndisc ops about neighbour update */
790 	ndisc_ops_update(dev, neigh, flags, icmp6_type, ndopts);
791 }
792 
793 static enum skb_drop_reason ndisc_recv_ns(struct sk_buff *skb)
794 {
795 	struct nd_msg *msg = (struct nd_msg *)skb_transport_header(skb);
796 	const struct in6_addr *saddr = &ipv6_hdr(skb)->saddr;
797 	const struct in6_addr *daddr = &ipv6_hdr(skb)->daddr;
798 	u8 *lladdr = NULL;
799 	u32 ndoptlen = skb_tail_pointer(skb) - (skb_transport_header(skb) +
800 				    offsetof(struct nd_msg, opt));
801 	struct ndisc_options ndopts;
802 	struct net_device *dev = skb->dev;
803 	struct inet6_ifaddr *ifp;
804 	struct inet6_dev *idev = NULL;
805 	struct neighbour *neigh;
806 	int dad = ipv6_addr_any(saddr);
807 	int is_router = -1;
808 	SKB_DR(reason);
809 	u64 nonce = 0;
810 	bool inc;
811 
812 	if (skb->len < sizeof(struct nd_msg))
813 		return SKB_DROP_REASON_PKT_TOO_SMALL;
814 
815 	if (ipv6_addr_is_multicast(&msg->target)) {
816 		ND_PRINTK(2, warn, "NS: multicast target address\n");
817 		return reason;
818 	}
819 
820 	/*
821 	 * RFC2461 7.1.1:
822 	 * DAD has to be destined for solicited node multicast address.
823 	 */
824 	if (dad && !ipv6_addr_is_solict_mult(daddr)) {
825 		ND_PRINTK(2, warn, "NS: bad DAD packet (wrong destination)\n");
826 		return reason;
827 	}
828 
829 	if (!ndisc_parse_options(dev, msg->opt, ndoptlen, &ndopts))
830 		return SKB_DROP_REASON_IPV6_NDISC_BAD_OPTIONS;
831 
832 	if (ndopts.nd_opts_src_lladdr) {
833 		lladdr = ndisc_opt_addr_data(ndopts.nd_opts_src_lladdr, dev);
834 		if (!lladdr) {
835 			ND_PRINTK(2, warn,
836 				  "NS: invalid link-layer address length\n");
837 			return reason;
838 		}
839 
840 		/* RFC2461 7.1.1:
841 		 *	If the IP source address is the unspecified address,
842 		 *	there MUST NOT be source link-layer address option
843 		 *	in the message.
844 		 */
845 		if (dad) {
846 			ND_PRINTK(2, warn,
847 				  "NS: bad DAD packet (link-layer address option)\n");
848 			return reason;
849 		}
850 	}
851 	if (ndopts.nd_opts_nonce && ndopts.nd_opts_nonce->nd_opt_len == 1)
852 		memcpy(&nonce, (u8 *)(ndopts.nd_opts_nonce + 1), 6);
853 
854 	inc = ipv6_addr_is_multicast(daddr);
855 
856 	ifp = ipv6_get_ifaddr(dev_net(dev), &msg->target, dev, 1);
857 	if (ifp) {
858 have_ifp:
859 		if (ifp->flags & (IFA_F_TENTATIVE|IFA_F_OPTIMISTIC)) {
860 			if (dad) {
861 				if (nonce != 0 && ifp->dad_nonce == nonce) {
862 					u8 *np = (u8 *)&nonce;
863 					/* Matching nonce if looped back */
864 					ND_PRINTK(2, notice,
865 						  "%s: IPv6 DAD loopback for address %pI6c nonce %pM ignored\n",
866 						  ifp->idev->dev->name,
867 						  &ifp->addr, np);
868 					goto out;
869 				}
870 				/*
871 				 * We are colliding with another node
872 				 * who is doing DAD
873 				 * so fail our DAD process
874 				 */
875 				addrconf_dad_failure(skb, ifp);
876 				return reason;
877 			} else {
878 				/*
879 				 * This is not a dad solicitation.
880 				 * If we are an optimistic node,
881 				 * we should respond.
882 				 * Otherwise, we should ignore it.
883 				 */
884 				if (!(ifp->flags & IFA_F_OPTIMISTIC))
885 					goto out;
886 			}
887 		}
888 
889 		idev = ifp->idev;
890 	} else {
891 		struct net *net = dev_net(dev);
892 
893 		/* perhaps an address on the master device */
894 		if (netif_is_l3_slave(dev)) {
895 			struct net_device *mdev;
896 
897 			mdev = netdev_master_upper_dev_get_rcu(dev);
898 			if (mdev) {
899 				ifp = ipv6_get_ifaddr(net, &msg->target, mdev, 1);
900 				if (ifp)
901 					goto have_ifp;
902 			}
903 		}
904 
905 		idev = in6_dev_get(dev);
906 		if (!idev) {
907 			/* XXX: count this drop? */
908 			return reason;
909 		}
910 
911 		if (ipv6_chk_acast_addr(net, dev, &msg->target) ||
912 		    (READ_ONCE(idev->cnf.forwarding) &&
913 		     (READ_ONCE(net->ipv6.devconf_all->proxy_ndp) ||
914 		      READ_ONCE(idev->cnf.proxy_ndp)) &&
915 		     (is_router = pndisc_is_router(&msg->target, dev)) >= 0)) {
916 			if (!(NEIGH_CB(skb)->flags & LOCALLY_ENQUEUED) &&
917 			    skb->pkt_type != PACKET_HOST &&
918 			    inc &&
919 			    NEIGH_VAR(idev->nd_parms, PROXY_DELAY) != 0) {
920 				/*
921 				 * for anycast or proxy,
922 				 * sender should delay its response
923 				 * by a random time between 0 and
924 				 * MAX_ANYCAST_DELAY_TIME seconds.
925 				 * (RFC2461) -- yoshfuji
926 				 */
927 				struct sk_buff *n = skb_clone(skb, GFP_ATOMIC);
928 				if (n)
929 					pneigh_enqueue(&nd_tbl, idev->nd_parms, n);
930 				goto out;
931 			}
932 		} else {
933 			SKB_DR_SET(reason, IPV6_NDISC_NS_OTHERHOST);
934 			goto out;
935 		}
936 	}
937 
938 	if (is_router < 0)
939 		is_router = READ_ONCE(idev->cnf.forwarding);
940 
941 	if (dad) {
942 		ndisc_send_na(dev, &in6addr_linklocal_allnodes, &msg->target,
943 			      !!is_router, false, (ifp != NULL), true);
944 		goto out;
945 	}
946 
947 	if (inc)
948 		NEIGH_CACHE_STAT_INC(&nd_tbl, rcv_probes_mcast);
949 	else
950 		NEIGH_CACHE_STAT_INC(&nd_tbl, rcv_probes_ucast);
951 
952 	/*
953 	 *	update / create cache entry
954 	 *	for the source address
955 	 */
956 	neigh = __neigh_lookup(&nd_tbl, saddr, dev,
957 			       !inc || lladdr || !dev->addr_len);
958 	if (neigh)
959 		ndisc_update(dev, neigh, lladdr, NUD_STALE,
960 			     NEIGH_UPDATE_F_WEAK_OVERRIDE|
961 			     NEIGH_UPDATE_F_OVERRIDE,
962 			     NDISC_NEIGHBOUR_SOLICITATION, &ndopts);
963 	if (neigh || !dev->header_ops) {
964 		ndisc_send_na(dev, saddr, &msg->target, !!is_router,
965 			      true, (ifp != NULL && inc), inc);
966 		if (neigh)
967 			neigh_release(neigh);
968 		reason = SKB_CONSUMED;
969 	}
970 
971 out:
972 	if (ifp)
973 		in6_ifa_put(ifp);
974 	else
975 		in6_dev_put(idev);
976 	return reason;
977 }
978 
979 static int accept_untracked_na(struct net_device *dev, struct in6_addr *saddr)
980 {
981 	struct inet6_dev *idev = __in6_dev_get(dev);
982 
983 	switch (READ_ONCE(idev->cnf.accept_untracked_na)) {
984 	case 0: /* Don't accept untracked na (absent in neighbor cache) */
985 		return 0;
986 	case 1: /* Create new entries from na if currently untracked */
987 		return 1;
988 	case 2: /* Create new entries from untracked na only if saddr is in the
989 		 * same subnet as an address configured on the interface that
990 		 * received the na
991 		 */
992 		return !!ipv6_chk_prefix(saddr, dev);
993 	default:
994 		return 0;
995 	}
996 }
997 
998 static enum skb_drop_reason ndisc_recv_na(struct sk_buff *skb)
999 {
1000 	struct nd_msg *msg = (struct nd_msg *)skb_transport_header(skb);
1001 	struct in6_addr *saddr = &ipv6_hdr(skb)->saddr;
1002 	const struct in6_addr *daddr = &ipv6_hdr(skb)->daddr;
1003 	u8 *lladdr = NULL;
1004 	u32 ndoptlen = skb_tail_pointer(skb) - (skb_transport_header(skb) +
1005 				    offsetof(struct nd_msg, opt));
1006 	struct ndisc_options ndopts;
1007 	struct net_device *dev = skb->dev;
1008 	struct inet6_dev *idev = __in6_dev_get(dev);
1009 	struct inet6_ifaddr *ifp;
1010 	struct neighbour *neigh;
1011 	SKB_DR(reason);
1012 	u8 new_state;
1013 
1014 	if (skb->len < sizeof(struct nd_msg))
1015 		return SKB_DROP_REASON_PKT_TOO_SMALL;
1016 
1017 	if (ipv6_addr_is_multicast(&msg->target)) {
1018 		ND_PRINTK(2, warn, "NA: target address is multicast\n");
1019 		return reason;
1020 	}
1021 
1022 	if (ipv6_addr_is_multicast(daddr) &&
1023 	    msg->icmph.icmp6_solicited) {
1024 		ND_PRINTK(2, warn, "NA: solicited NA is multicasted\n");
1025 		return reason;
1026 	}
1027 
1028 	/* For some 802.11 wireless deployments (and possibly other networks),
1029 	 * there will be a NA proxy and unsolicitd packets are attacks
1030 	 * and thus should not be accepted.
1031 	 * drop_unsolicited_na takes precedence over accept_untracked_na
1032 	 */
1033 	if (!msg->icmph.icmp6_solicited && idev &&
1034 	    READ_ONCE(idev->cnf.drop_unsolicited_na))
1035 		return reason;
1036 
1037 	if (!ndisc_parse_options(dev, msg->opt, ndoptlen, &ndopts))
1038 		return SKB_DROP_REASON_IPV6_NDISC_BAD_OPTIONS;
1039 
1040 	if (ndopts.nd_opts_tgt_lladdr) {
1041 		lladdr = ndisc_opt_addr_data(ndopts.nd_opts_tgt_lladdr, dev);
1042 		if (!lladdr) {
1043 			ND_PRINTK(2, warn,
1044 				  "NA: invalid link-layer address length\n");
1045 			return reason;
1046 		}
1047 	}
1048 	ifp = ipv6_get_ifaddr(dev_net(dev), &msg->target, dev, 1);
1049 	if (ifp) {
1050 		if (skb->pkt_type != PACKET_LOOPBACK
1051 		    && (ifp->flags & IFA_F_TENTATIVE)) {
1052 				addrconf_dad_failure(skb, ifp);
1053 				return reason;
1054 		}
1055 		/* What should we make now? The advertisement
1056 		   is invalid, but ndisc specs say nothing
1057 		   about it. It could be misconfiguration, or
1058 		   an smart proxy agent tries to help us :-)
1059 
1060 		   We should not print the error if NA has been
1061 		   received from loopback - it is just our own
1062 		   unsolicited advertisement.
1063 		 */
1064 		if (skb->pkt_type != PACKET_LOOPBACK)
1065 			ND_PRINTK(1, warn,
1066 				  "NA: %pM advertised our address %pI6c on %s!\n",
1067 				  eth_hdr(skb)->h_source, &ifp->addr, ifp->idev->dev->name);
1068 		in6_ifa_put(ifp);
1069 		return reason;
1070 	}
1071 
1072 	neigh = neigh_lookup(&nd_tbl, &msg->target, dev);
1073 
1074 	/* RFC 9131 updates original Neighbour Discovery RFC 4861.
1075 	 * NAs with Target LL Address option without a corresponding
1076 	 * entry in the neighbour cache can now create a STALE neighbour
1077 	 * cache entry on routers.
1078 	 *
1079 	 *   entry accept  fwding  solicited        behaviour
1080 	 * ------- ------  ------  ---------    ----------------------
1081 	 * present      X       X         0     Set state to STALE
1082 	 * present      X       X         1     Set state to REACHABLE
1083 	 *  absent      0       X         X     Do nothing
1084 	 *  absent      1       0         X     Do nothing
1085 	 *  absent      1       1         X     Add a new STALE entry
1086 	 *
1087 	 * Note that we don't do a (daddr == all-routers-mcast) check.
1088 	 */
1089 	new_state = msg->icmph.icmp6_solicited ? NUD_REACHABLE : NUD_STALE;
1090 	if (!neigh && lladdr && idev && READ_ONCE(idev->cnf.forwarding)) {
1091 		if (accept_untracked_na(dev, saddr)) {
1092 			neigh = neigh_create(&nd_tbl, &msg->target, dev);
1093 			new_state = NUD_STALE;
1094 		}
1095 	}
1096 
1097 	if (neigh && !IS_ERR(neigh)) {
1098 		u8 old_flags = neigh->flags;
1099 		struct net *net = dev_net(dev);
1100 
1101 		if (READ_ONCE(neigh->nud_state) & NUD_FAILED)
1102 			goto out;
1103 
1104 		/*
1105 		 * Don't update the neighbor cache entry on a proxy NA from
1106 		 * ourselves because either the proxied node is off link or it
1107 		 * has already sent a NA to us.
1108 		 */
1109 		if (lladdr && !memcmp(lladdr, dev->dev_addr, dev->addr_len) &&
1110 		    READ_ONCE(net->ipv6.devconf_all->forwarding) &&
1111 		    READ_ONCE(net->ipv6.devconf_all->proxy_ndp) &&
1112 		    pneigh_lookup(&nd_tbl, net, &msg->target, dev, 0)) {
1113 			/* XXX: idev->cnf.proxy_ndp */
1114 			goto out;
1115 		}
1116 
1117 		ndisc_update(dev, neigh, lladdr,
1118 			     new_state,
1119 			     NEIGH_UPDATE_F_WEAK_OVERRIDE|
1120 			     (msg->icmph.icmp6_override ? NEIGH_UPDATE_F_OVERRIDE : 0)|
1121 			     NEIGH_UPDATE_F_OVERRIDE_ISROUTER|
1122 			     (msg->icmph.icmp6_router ? NEIGH_UPDATE_F_ISROUTER : 0),
1123 			     NDISC_NEIGHBOUR_ADVERTISEMENT, &ndopts);
1124 
1125 		if ((old_flags & ~neigh->flags) & NTF_ROUTER) {
1126 			/*
1127 			 * Change: router to host
1128 			 */
1129 			rt6_clean_tohost(dev_net(dev),  saddr);
1130 		}
1131 		reason = SKB_CONSUMED;
1132 out:
1133 		neigh_release(neigh);
1134 	}
1135 	return reason;
1136 }
1137 
1138 static enum skb_drop_reason ndisc_recv_rs(struct sk_buff *skb)
1139 {
1140 	struct rs_msg *rs_msg = (struct rs_msg *)skb_transport_header(skb);
1141 	unsigned long ndoptlen = skb->len - sizeof(*rs_msg);
1142 	struct neighbour *neigh;
1143 	struct inet6_dev *idev;
1144 	const struct in6_addr *saddr = &ipv6_hdr(skb)->saddr;
1145 	struct ndisc_options ndopts;
1146 	u8 *lladdr = NULL;
1147 	SKB_DR(reason);
1148 
1149 	if (skb->len < sizeof(*rs_msg))
1150 		return SKB_DROP_REASON_PKT_TOO_SMALL;
1151 
1152 	idev = __in6_dev_get(skb->dev);
1153 	if (!idev) {
1154 		ND_PRINTK(1, err, "RS: can't find in6 device\n");
1155 		return reason;
1156 	}
1157 
1158 	/* Don't accept RS if we're not in router mode */
1159 	if (!READ_ONCE(idev->cnf.forwarding))
1160 		goto out;
1161 
1162 	/*
1163 	 * Don't update NCE if src = ::;
1164 	 * this implies that the source node has no ip address assigned yet.
1165 	 */
1166 	if (ipv6_addr_any(saddr))
1167 		goto out;
1168 
1169 	/* Parse ND options */
1170 	if (!ndisc_parse_options(skb->dev, rs_msg->opt, ndoptlen, &ndopts))
1171 		return SKB_DROP_REASON_IPV6_NDISC_BAD_OPTIONS;
1172 
1173 	if (ndopts.nd_opts_src_lladdr) {
1174 		lladdr = ndisc_opt_addr_data(ndopts.nd_opts_src_lladdr,
1175 					     skb->dev);
1176 		if (!lladdr)
1177 			goto out;
1178 	}
1179 
1180 	neigh = __neigh_lookup(&nd_tbl, saddr, skb->dev, 1);
1181 	if (neigh) {
1182 		ndisc_update(skb->dev, neigh, lladdr, NUD_STALE,
1183 			     NEIGH_UPDATE_F_WEAK_OVERRIDE|
1184 			     NEIGH_UPDATE_F_OVERRIDE|
1185 			     NEIGH_UPDATE_F_OVERRIDE_ISROUTER,
1186 			     NDISC_ROUTER_SOLICITATION, &ndopts);
1187 		neigh_release(neigh);
1188 		reason = SKB_CONSUMED;
1189 	}
1190 out:
1191 	return reason;
1192 }
1193 
1194 static void ndisc_ra_useropt(struct sk_buff *ra, struct nd_opt_hdr *opt)
1195 {
1196 	struct icmp6hdr *icmp6h = (struct icmp6hdr *)skb_transport_header(ra);
1197 	struct sk_buff *skb;
1198 	struct nlmsghdr *nlh;
1199 	struct nduseroptmsg *ndmsg;
1200 	struct net *net = dev_net(ra->dev);
1201 	int err;
1202 	int base_size = NLMSG_ALIGN(sizeof(struct nduseroptmsg)
1203 				    + (opt->nd_opt_len << 3));
1204 	size_t msg_size = base_size + nla_total_size(sizeof(struct in6_addr));
1205 
1206 	skb = nlmsg_new(msg_size, GFP_ATOMIC);
1207 	if (!skb) {
1208 		err = -ENOBUFS;
1209 		goto errout;
1210 	}
1211 
1212 	nlh = nlmsg_put(skb, 0, 0, RTM_NEWNDUSEROPT, base_size, 0);
1213 	if (!nlh) {
1214 		goto nla_put_failure;
1215 	}
1216 
1217 	ndmsg = nlmsg_data(nlh);
1218 	ndmsg->nduseropt_family = AF_INET6;
1219 	ndmsg->nduseropt_ifindex = ra->dev->ifindex;
1220 	ndmsg->nduseropt_icmp_type = icmp6h->icmp6_type;
1221 	ndmsg->nduseropt_icmp_code = icmp6h->icmp6_code;
1222 	ndmsg->nduseropt_opts_len = opt->nd_opt_len << 3;
1223 
1224 	memcpy(ndmsg + 1, opt, opt->nd_opt_len << 3);
1225 
1226 	if (nla_put_in6_addr(skb, NDUSEROPT_SRCADDR, &ipv6_hdr(ra)->saddr))
1227 		goto nla_put_failure;
1228 	nlmsg_end(skb, nlh);
1229 
1230 	rtnl_notify(skb, net, 0, RTNLGRP_ND_USEROPT, NULL, GFP_ATOMIC);
1231 	return;
1232 
1233 nla_put_failure:
1234 	nlmsg_free(skb);
1235 	err = -EMSGSIZE;
1236 errout:
1237 	rtnl_set_sk_err(net, RTNLGRP_ND_USEROPT, err);
1238 }
1239 
1240 static enum skb_drop_reason ndisc_router_discovery(struct sk_buff *skb)
1241 {
1242 	struct ra_msg *ra_msg = (struct ra_msg *)skb_transport_header(skb);
1243 	bool send_ifinfo_notify = false;
1244 	struct neighbour *neigh = NULL;
1245 	struct ndisc_options ndopts;
1246 	struct fib6_info *rt = NULL;
1247 	struct inet6_dev *in6_dev;
1248 	struct fib6_table *table;
1249 	u32 defrtr_usr_metric;
1250 	unsigned int pref = 0;
1251 	__u32 old_if_flags;
1252 	struct net *net;
1253 	SKB_DR(reason);
1254 	int lifetime;
1255 	int optlen;
1256 
1257 	__u8 *opt = (__u8 *)(ra_msg + 1);
1258 
1259 	optlen = (skb_tail_pointer(skb) - skb_transport_header(skb)) -
1260 		sizeof(struct ra_msg);
1261 
1262 	ND_PRINTK(2, info,
1263 		  "RA: %s, dev: %s\n",
1264 		  __func__, skb->dev->name);
1265 	if (!(ipv6_addr_type(&ipv6_hdr(skb)->saddr) & IPV6_ADDR_LINKLOCAL)) {
1266 		ND_PRINTK(2, warn, "RA: source address is not link-local\n");
1267 		return reason;
1268 	}
1269 	if (optlen < 0)
1270 		return SKB_DROP_REASON_PKT_TOO_SMALL;
1271 
1272 #ifdef CONFIG_IPV6_NDISC_NODETYPE
1273 	if (skb->ndisc_nodetype == NDISC_NODETYPE_HOST) {
1274 		ND_PRINTK(2, warn, "RA: from host or unauthorized router\n");
1275 		return reason;
1276 	}
1277 #endif
1278 
1279 	in6_dev = __in6_dev_get(skb->dev);
1280 	if (!in6_dev) {
1281 		ND_PRINTK(0, err, "RA: can't find inet6 device for %s\n",
1282 			  skb->dev->name);
1283 		return reason;
1284 	}
1285 
1286 	if (!ndisc_parse_options(skb->dev, opt, optlen, &ndopts))
1287 		return SKB_DROP_REASON_IPV6_NDISC_BAD_OPTIONS;
1288 
1289 	if (!ipv6_accept_ra(in6_dev)) {
1290 		ND_PRINTK(2, info,
1291 			  "RA: %s, did not accept ra for dev: %s\n",
1292 			  __func__, skb->dev->name);
1293 		goto skip_linkparms;
1294 	}
1295 
1296 #ifdef CONFIG_IPV6_NDISC_NODETYPE
1297 	/* skip link-specific parameters from interior routers */
1298 	if (skb->ndisc_nodetype == NDISC_NODETYPE_NODEFAULT) {
1299 		ND_PRINTK(2, info,
1300 			  "RA: %s, nodetype is NODEFAULT, dev: %s\n",
1301 			  __func__, skb->dev->name);
1302 		goto skip_linkparms;
1303 	}
1304 #endif
1305 
1306 	if (in6_dev->if_flags & IF_RS_SENT) {
1307 		/*
1308 		 *	flag that an RA was received after an RS was sent
1309 		 *	out on this interface.
1310 		 */
1311 		in6_dev->if_flags |= IF_RA_RCVD;
1312 	}
1313 
1314 	/*
1315 	 * Remember the managed/otherconf flags from most recently
1316 	 * received RA message (RFC 2462) -- yoshfuji
1317 	 */
1318 	old_if_flags = in6_dev->if_flags;
1319 	in6_dev->if_flags = (in6_dev->if_flags & ~(IF_RA_MANAGED |
1320 				IF_RA_OTHERCONF)) |
1321 				(ra_msg->icmph.icmp6_addrconf_managed ?
1322 					IF_RA_MANAGED : 0) |
1323 				(ra_msg->icmph.icmp6_addrconf_other ?
1324 					IF_RA_OTHERCONF : 0);
1325 
1326 	if (old_if_flags != in6_dev->if_flags)
1327 		send_ifinfo_notify = true;
1328 
1329 	if (!READ_ONCE(in6_dev->cnf.accept_ra_defrtr)) {
1330 		ND_PRINTK(2, info,
1331 			  "RA: %s, defrtr is false for dev: %s\n",
1332 			  __func__, skb->dev->name);
1333 		goto skip_defrtr;
1334 	}
1335 
1336 	lifetime = ntohs(ra_msg->icmph.icmp6_rt_lifetime);
1337 	if (lifetime != 0 &&
1338 	    lifetime < READ_ONCE(in6_dev->cnf.accept_ra_min_lft)) {
1339 		ND_PRINTK(2, info,
1340 			  "RA: router lifetime (%ds) is too short: %s\n",
1341 			  lifetime, skb->dev->name);
1342 		goto skip_defrtr;
1343 	}
1344 
1345 	/* Do not accept RA with source-addr found on local machine unless
1346 	 * accept_ra_from_local is set to true.
1347 	 */
1348 	net = dev_net(in6_dev->dev);
1349 	if (!READ_ONCE(in6_dev->cnf.accept_ra_from_local) &&
1350 	    ipv6_chk_addr(net, &ipv6_hdr(skb)->saddr, in6_dev->dev, 0)) {
1351 		ND_PRINTK(2, info,
1352 			  "RA from local address detected on dev: %s: default router ignored\n",
1353 			  skb->dev->name);
1354 		goto skip_defrtr;
1355 	}
1356 
1357 #ifdef CONFIG_IPV6_ROUTER_PREF
1358 	pref = ra_msg->icmph.icmp6_router_pref;
1359 	/* 10b is handled as if it were 00b (medium) */
1360 	if (pref == ICMPV6_ROUTER_PREF_INVALID ||
1361 	    !READ_ONCE(in6_dev->cnf.accept_ra_rtr_pref))
1362 		pref = ICMPV6_ROUTER_PREF_MEDIUM;
1363 #endif
1364 	/* routes added from RAs do not use nexthop objects */
1365 	rt = rt6_get_dflt_router(net, &ipv6_hdr(skb)->saddr, skb->dev);
1366 	if (rt) {
1367 		neigh = ip6_neigh_lookup(&rt->fib6_nh->fib_nh_gw6,
1368 					 rt->fib6_nh->fib_nh_dev, NULL,
1369 					  &ipv6_hdr(skb)->saddr);
1370 		if (!neigh) {
1371 			ND_PRINTK(0, err,
1372 				  "RA: %s got default router without neighbour\n",
1373 				  __func__);
1374 			fib6_info_release(rt);
1375 			return reason;
1376 		}
1377 	}
1378 	/* Set default route metric as specified by user */
1379 	defrtr_usr_metric = in6_dev->cnf.ra_defrtr_metric;
1380 	/* delete the route if lifetime is 0 or if metric needs change */
1381 	if (rt && (lifetime == 0 || rt->fib6_metric != defrtr_usr_metric)) {
1382 		ip6_del_rt(net, rt, false);
1383 		rt = NULL;
1384 	}
1385 
1386 	ND_PRINTK(3, info, "RA: rt: %p  lifetime: %d, metric: %d, for dev: %s\n",
1387 		  rt, lifetime, defrtr_usr_metric, skb->dev->name);
1388 	if (!rt && lifetime) {
1389 		ND_PRINTK(3, info, "RA: adding default router\n");
1390 
1391 		if (neigh)
1392 			neigh_release(neigh);
1393 
1394 		rt = rt6_add_dflt_router(net, &ipv6_hdr(skb)->saddr,
1395 					 skb->dev, pref, defrtr_usr_metric,
1396 					 lifetime);
1397 		if (!rt) {
1398 			ND_PRINTK(0, err,
1399 				  "RA: %s failed to add default route\n",
1400 				  __func__);
1401 			return reason;
1402 		}
1403 
1404 		neigh = ip6_neigh_lookup(&rt->fib6_nh->fib_nh_gw6,
1405 					 rt->fib6_nh->fib_nh_dev, NULL,
1406 					  &ipv6_hdr(skb)->saddr);
1407 		if (!neigh) {
1408 			ND_PRINTK(0, err,
1409 				  "RA: %s got default router without neighbour\n",
1410 				  __func__);
1411 			fib6_info_release(rt);
1412 			return reason;
1413 		}
1414 		neigh->flags |= NTF_ROUTER;
1415 	} else if (rt && IPV6_EXTRACT_PREF(rt->fib6_flags) != pref) {
1416 		struct nl_info nlinfo = {
1417 			.nl_net = net,
1418 		};
1419 		rt->fib6_flags = (rt->fib6_flags & ~RTF_PREF_MASK) | RTF_PREF(pref);
1420 		inet6_rt_notify(RTM_NEWROUTE, rt, &nlinfo, NLM_F_REPLACE);
1421 	}
1422 
1423 	if (rt) {
1424 		table = rt->fib6_table;
1425 		spin_lock_bh(&table->tb6_lock);
1426 
1427 		fib6_set_expires(rt, jiffies + (HZ * lifetime));
1428 		fib6_add_gc_list(rt);
1429 
1430 		spin_unlock_bh(&table->tb6_lock);
1431 	}
1432 	if (READ_ONCE(in6_dev->cnf.accept_ra_min_hop_limit) < 256 &&
1433 	    ra_msg->icmph.icmp6_hop_limit) {
1434 		if (READ_ONCE(in6_dev->cnf.accept_ra_min_hop_limit) <=
1435 		    ra_msg->icmph.icmp6_hop_limit) {
1436 			WRITE_ONCE(in6_dev->cnf.hop_limit,
1437 				   ra_msg->icmph.icmp6_hop_limit);
1438 			fib6_metric_set(rt, RTAX_HOPLIMIT,
1439 					ra_msg->icmph.icmp6_hop_limit);
1440 		} else {
1441 			ND_PRINTK(2, warn, "RA: Got route advertisement with lower hop_limit than minimum\n");
1442 		}
1443 	}
1444 
1445 skip_defrtr:
1446 
1447 	/*
1448 	 *	Update Reachable Time and Retrans Timer
1449 	 */
1450 
1451 	if (in6_dev->nd_parms) {
1452 		unsigned long rtime = ntohl(ra_msg->retrans_timer);
1453 
1454 		if (rtime && rtime/1000 < MAX_SCHEDULE_TIMEOUT/HZ) {
1455 			rtime = (rtime*HZ)/1000;
1456 			if (rtime < HZ/100)
1457 				rtime = HZ/100;
1458 			NEIGH_VAR_SET(in6_dev->nd_parms, RETRANS_TIME, rtime);
1459 			in6_dev->tstamp = jiffies;
1460 			send_ifinfo_notify = true;
1461 		}
1462 
1463 		rtime = ntohl(ra_msg->reachable_time);
1464 		if (rtime && rtime/1000 < MAX_SCHEDULE_TIMEOUT/(3*HZ)) {
1465 			rtime = (rtime*HZ)/1000;
1466 
1467 			if (rtime < HZ/10)
1468 				rtime = HZ/10;
1469 
1470 			if (rtime != NEIGH_VAR(in6_dev->nd_parms, BASE_REACHABLE_TIME)) {
1471 				NEIGH_VAR_SET(in6_dev->nd_parms,
1472 					      BASE_REACHABLE_TIME, rtime);
1473 				NEIGH_VAR_SET(in6_dev->nd_parms,
1474 					      GC_STALETIME, 3 * rtime);
1475 				in6_dev->nd_parms->reachable_time = neigh_rand_reach_time(rtime);
1476 				in6_dev->tstamp = jiffies;
1477 				send_ifinfo_notify = true;
1478 			}
1479 		}
1480 	}
1481 
1482 skip_linkparms:
1483 
1484 	/*
1485 	 *	Process options.
1486 	 */
1487 
1488 	if (!neigh)
1489 		neigh = __neigh_lookup(&nd_tbl, &ipv6_hdr(skb)->saddr,
1490 				       skb->dev, 1);
1491 	if (neigh) {
1492 		u8 *lladdr = NULL;
1493 		if (ndopts.nd_opts_src_lladdr) {
1494 			lladdr = ndisc_opt_addr_data(ndopts.nd_opts_src_lladdr,
1495 						     skb->dev);
1496 			if (!lladdr) {
1497 				ND_PRINTK(2, warn,
1498 					  "RA: invalid link-layer address length\n");
1499 				goto out;
1500 			}
1501 		}
1502 		ndisc_update(skb->dev, neigh, lladdr, NUD_STALE,
1503 			     NEIGH_UPDATE_F_WEAK_OVERRIDE|
1504 			     NEIGH_UPDATE_F_OVERRIDE|
1505 			     NEIGH_UPDATE_F_OVERRIDE_ISROUTER|
1506 			     NEIGH_UPDATE_F_ISROUTER,
1507 			     NDISC_ROUTER_ADVERTISEMENT, &ndopts);
1508 		reason = SKB_CONSUMED;
1509 	}
1510 
1511 	if (!ipv6_accept_ra(in6_dev)) {
1512 		ND_PRINTK(2, info,
1513 			  "RA: %s, accept_ra is false for dev: %s\n",
1514 			  __func__, skb->dev->name);
1515 		goto out;
1516 	}
1517 
1518 #ifdef CONFIG_IPV6_ROUTE_INFO
1519 	if (!READ_ONCE(in6_dev->cnf.accept_ra_from_local) &&
1520 	    ipv6_chk_addr(dev_net(in6_dev->dev), &ipv6_hdr(skb)->saddr,
1521 			  in6_dev->dev, 0)) {
1522 		ND_PRINTK(2, info,
1523 			  "RA from local address detected on dev: %s: router info ignored.\n",
1524 			  skb->dev->name);
1525 		goto skip_routeinfo;
1526 	}
1527 
1528 	if (READ_ONCE(in6_dev->cnf.accept_ra_rtr_pref) && ndopts.nd_opts_ri) {
1529 		struct nd_opt_hdr *p;
1530 		for (p = ndopts.nd_opts_ri;
1531 		     p;
1532 		     p = ndisc_next_option(p, ndopts.nd_opts_ri_end)) {
1533 			struct route_info *ri = (struct route_info *)p;
1534 #ifdef CONFIG_IPV6_NDISC_NODETYPE
1535 			if (skb->ndisc_nodetype == NDISC_NODETYPE_NODEFAULT &&
1536 			    ri->prefix_len == 0)
1537 				continue;
1538 #endif
1539 			if (ri->prefix_len == 0 &&
1540 			    !READ_ONCE(in6_dev->cnf.accept_ra_defrtr))
1541 				continue;
1542 			if (ri->lifetime != 0 &&
1543 			    ntohl(ri->lifetime) < READ_ONCE(in6_dev->cnf.accept_ra_min_lft))
1544 				continue;
1545 			if (ri->prefix_len < READ_ONCE(in6_dev->cnf.accept_ra_rt_info_min_plen))
1546 				continue;
1547 			if (ri->prefix_len > READ_ONCE(in6_dev->cnf.accept_ra_rt_info_max_plen))
1548 				continue;
1549 			rt6_route_rcv(skb->dev, (u8 *)p, (p->nd_opt_len) << 3,
1550 				      &ipv6_hdr(skb)->saddr);
1551 		}
1552 	}
1553 
1554 skip_routeinfo:
1555 #endif
1556 
1557 #ifdef CONFIG_IPV6_NDISC_NODETYPE
1558 	/* skip link-specific ndopts from interior routers */
1559 	if (skb->ndisc_nodetype == NDISC_NODETYPE_NODEFAULT) {
1560 		ND_PRINTK(2, info,
1561 			  "RA: %s, nodetype is NODEFAULT (interior routes), dev: %s\n",
1562 			  __func__, skb->dev->name);
1563 		goto out;
1564 	}
1565 #endif
1566 
1567 	if (READ_ONCE(in6_dev->cnf.accept_ra_pinfo) && ndopts.nd_opts_pi) {
1568 		struct nd_opt_hdr *p;
1569 		for (p = ndopts.nd_opts_pi;
1570 		     p;
1571 		     p = ndisc_next_option(p, ndopts.nd_opts_pi_end)) {
1572 			addrconf_prefix_rcv(skb->dev, (u8 *)p,
1573 					    (p->nd_opt_len) << 3,
1574 					    ndopts.nd_opts_src_lladdr != NULL);
1575 		}
1576 	}
1577 
1578 	if (ndopts.nd_opts_mtu && READ_ONCE(in6_dev->cnf.accept_ra_mtu)) {
1579 		__be32 n;
1580 		u32 mtu;
1581 
1582 		memcpy(&n, ((u8 *)(ndopts.nd_opts_mtu+1))+2, sizeof(mtu));
1583 		mtu = ntohl(n);
1584 
1585 		if (in6_dev->ra_mtu != mtu) {
1586 			in6_dev->ra_mtu = mtu;
1587 			send_ifinfo_notify = true;
1588 		}
1589 
1590 		if (mtu < IPV6_MIN_MTU || mtu > skb->dev->mtu) {
1591 			ND_PRINTK(2, warn, "RA: invalid mtu: %d\n", mtu);
1592 		} else if (READ_ONCE(in6_dev->cnf.mtu6) != mtu) {
1593 			WRITE_ONCE(in6_dev->cnf.mtu6, mtu);
1594 			fib6_metric_set(rt, RTAX_MTU, mtu);
1595 			rt6_mtu_change(skb->dev, mtu);
1596 		}
1597 	}
1598 
1599 	if (ndopts.nd_useropts) {
1600 		struct nd_opt_hdr *p;
1601 		for (p = ndopts.nd_useropts;
1602 		     p;
1603 		     p = ndisc_next_useropt(skb->dev, p,
1604 					    ndopts.nd_useropts_end)) {
1605 			ndisc_ra_useropt(skb, p);
1606 		}
1607 	}
1608 
1609 	if (ndopts.nd_opts_tgt_lladdr || ndopts.nd_opts_rh) {
1610 		ND_PRINTK(2, warn, "RA: invalid RA options\n");
1611 	}
1612 out:
1613 	/* Send a notify if RA changed managed/otherconf flags or
1614 	 * timer settings or ra_mtu value
1615 	 */
1616 	if (send_ifinfo_notify)
1617 		inet6_ifinfo_notify(RTM_NEWLINK, in6_dev);
1618 
1619 	fib6_info_release(rt);
1620 	if (neigh)
1621 		neigh_release(neigh);
1622 	return reason;
1623 }
1624 
1625 static enum skb_drop_reason ndisc_redirect_rcv(struct sk_buff *skb)
1626 {
1627 	struct rd_msg *msg = (struct rd_msg *)skb_transport_header(skb);
1628 	u32 ndoptlen = skb_tail_pointer(skb) - (skb_transport_header(skb) +
1629 				    offsetof(struct rd_msg, opt));
1630 	struct ndisc_options ndopts;
1631 	SKB_DR(reason);
1632 	u8 *hdr;
1633 
1634 #ifdef CONFIG_IPV6_NDISC_NODETYPE
1635 	switch (skb->ndisc_nodetype) {
1636 	case NDISC_NODETYPE_HOST:
1637 	case NDISC_NODETYPE_NODEFAULT:
1638 		ND_PRINTK(2, warn,
1639 			  "Redirect: from host or unauthorized router\n");
1640 		return reason;
1641 	}
1642 #endif
1643 
1644 	if (!(ipv6_addr_type(&ipv6_hdr(skb)->saddr) & IPV6_ADDR_LINKLOCAL)) {
1645 		ND_PRINTK(2, warn,
1646 			  "Redirect: source address is not link-local\n");
1647 		return reason;
1648 	}
1649 
1650 	if (!ndisc_parse_options(skb->dev, msg->opt, ndoptlen, &ndopts))
1651 		return SKB_DROP_REASON_IPV6_NDISC_BAD_OPTIONS;
1652 
1653 	if (!ndopts.nd_opts_rh) {
1654 		ip6_redirect_no_header(skb, dev_net(skb->dev),
1655 					skb->dev->ifindex);
1656 		return reason;
1657 	}
1658 
1659 	hdr = (u8 *)ndopts.nd_opts_rh;
1660 	hdr += 8;
1661 	if (!pskb_pull(skb, hdr - skb_transport_header(skb)))
1662 		return SKB_DROP_REASON_PKT_TOO_SMALL;
1663 
1664 	return icmpv6_notify(skb, NDISC_REDIRECT, 0, 0);
1665 }
1666 
1667 static void ndisc_fill_redirect_hdr_option(struct sk_buff *skb,
1668 					   struct sk_buff *orig_skb,
1669 					   int rd_len)
1670 {
1671 	u8 *opt = skb_put(skb, rd_len);
1672 
1673 	memset(opt, 0, 8);
1674 	*(opt++) = ND_OPT_REDIRECT_HDR;
1675 	*(opt++) = (rd_len >> 3);
1676 	opt += 6;
1677 
1678 	skb_copy_bits(orig_skb, skb_network_offset(orig_skb), opt,
1679 		      rd_len - 8);
1680 }
1681 
1682 void ndisc_send_redirect(struct sk_buff *skb, const struct in6_addr *target)
1683 {
1684 	struct net_device *dev = skb->dev;
1685 	struct net *net = dev_net_rcu(dev);
1686 	struct sock *sk = net->ipv6.ndisc_sk;
1687 	int optlen = 0;
1688 	struct inet_peer *peer;
1689 	struct sk_buff *buff;
1690 	struct rd_msg *msg;
1691 	struct in6_addr saddr_buf;
1692 	struct rt6_info *rt;
1693 	struct dst_entry *dst;
1694 	struct flowi6 fl6;
1695 	int rd_len;
1696 	u8 ha_buf[MAX_ADDR_LEN], *ha = NULL,
1697 	   ops_data_buf[NDISC_OPS_REDIRECT_DATA_SPACE], *ops_data = NULL;
1698 	bool ret;
1699 
1700 	if (netif_is_l3_master(dev)) {
1701 		dev = dev_get_by_index_rcu(net, IPCB(skb)->iif);
1702 		if (!dev)
1703 			return;
1704 	}
1705 
1706 	if (ipv6_get_lladdr(dev, &saddr_buf, IFA_F_TENTATIVE)) {
1707 		ND_PRINTK(2, warn, "Redirect: no link-local address on %s\n",
1708 			  dev->name);
1709 		return;
1710 	}
1711 
1712 	if (!ipv6_addr_equal(&ipv6_hdr(skb)->daddr, target) &&
1713 	    ipv6_addr_type(target) != (IPV6_ADDR_UNICAST|IPV6_ADDR_LINKLOCAL)) {
1714 		ND_PRINTK(2, warn,
1715 			  "Redirect: target address is not link-local unicast\n");
1716 		return;
1717 	}
1718 
1719 	icmpv6_flow_init(sk, &fl6, NDISC_REDIRECT,
1720 			 &saddr_buf, &ipv6_hdr(skb)->saddr, dev->ifindex);
1721 
1722 	dst = ip6_route_output(net, NULL, &fl6);
1723 	if (dst->error) {
1724 		dst_release(dst);
1725 		return;
1726 	}
1727 	dst = xfrm_lookup(net, dst, flowi6_to_flowi(&fl6), NULL, 0);
1728 	if (IS_ERR(dst))
1729 		return;
1730 
1731 	rt = dst_rt6_info(dst);
1732 
1733 	if (rt->rt6i_flags & RTF_GATEWAY) {
1734 		ND_PRINTK(2, warn,
1735 			  "Redirect: destination is not a neighbour\n");
1736 		goto release;
1737 	}
1738 
1739 	peer = inet_getpeer_v6(net->ipv6.peers, &ipv6_hdr(skb)->saddr);
1740 	ret = inet_peer_xrlim_allow(peer, 1*HZ);
1741 
1742 	if (!ret)
1743 		goto release;
1744 
1745 	if (dev->addr_len) {
1746 		struct neighbour *neigh = dst_neigh_lookup(skb_dst(skb), target);
1747 		if (!neigh) {
1748 			ND_PRINTK(2, warn,
1749 				  "Redirect: no neigh for target address\n");
1750 			goto release;
1751 		}
1752 
1753 		read_lock_bh(&neigh->lock);
1754 		if (neigh->nud_state & NUD_VALID) {
1755 			memcpy(ha_buf, neigh->ha, dev->addr_len);
1756 			read_unlock_bh(&neigh->lock);
1757 			ha = ha_buf;
1758 			optlen += ndisc_redirect_opt_addr_space(dev, neigh,
1759 								ops_data_buf,
1760 								&ops_data);
1761 		} else
1762 			read_unlock_bh(&neigh->lock);
1763 
1764 		neigh_release(neigh);
1765 	}
1766 
1767 	rd_len = min_t(unsigned int,
1768 		       IPV6_MIN_MTU - sizeof(struct ipv6hdr) - sizeof(*msg) - optlen,
1769 		       skb->len + 8);
1770 	rd_len &= ~0x7;
1771 	optlen += rd_len;
1772 
1773 	buff = ndisc_alloc_skb(dev, sizeof(*msg) + optlen);
1774 	if (!buff)
1775 		goto release;
1776 
1777 	msg = skb_put(buff, sizeof(*msg));
1778 	*msg = (struct rd_msg) {
1779 		.icmph = {
1780 			.icmp6_type = NDISC_REDIRECT,
1781 		},
1782 		.target = *target,
1783 		.dest = ipv6_hdr(skb)->daddr,
1784 	};
1785 
1786 	/*
1787 	 *	include target_address option
1788 	 */
1789 
1790 	if (ha)
1791 		ndisc_fill_redirect_addr_option(buff, ha, ops_data);
1792 
1793 	/*
1794 	 *	build redirect option and copy skb over to the new packet.
1795 	 */
1796 
1797 	if (rd_len)
1798 		ndisc_fill_redirect_hdr_option(buff, skb, rd_len);
1799 
1800 	skb_dst_set(buff, dst);
1801 	ndisc_send_skb(buff, &ipv6_hdr(skb)->saddr, &saddr_buf);
1802 	return;
1803 
1804 release:
1805 	dst_release(dst);
1806 }
1807 
1808 static void pndisc_redo(struct sk_buff *skb)
1809 {
1810 	enum skb_drop_reason reason = ndisc_recv_ns(skb);
1811 
1812 	kfree_skb_reason(skb, reason);
1813 }
1814 
1815 static int ndisc_is_multicast(const void *pkey)
1816 {
1817 	return ipv6_addr_is_multicast((struct in6_addr *)pkey);
1818 }
1819 
1820 static bool ndisc_suppress_frag_ndisc(struct sk_buff *skb)
1821 {
1822 	struct inet6_dev *idev = __in6_dev_get(skb->dev);
1823 
1824 	if (!idev)
1825 		return true;
1826 	if (IP6CB(skb)->flags & IP6SKB_FRAGMENTED &&
1827 	    READ_ONCE(idev->cnf.suppress_frag_ndisc)) {
1828 		net_warn_ratelimited("Received fragmented ndisc packet. Carefully consider disabling suppress_frag_ndisc.\n");
1829 		return true;
1830 	}
1831 	return false;
1832 }
1833 
1834 enum skb_drop_reason ndisc_rcv(struct sk_buff *skb)
1835 {
1836 	struct nd_msg *msg;
1837 	SKB_DR(reason);
1838 
1839 	if (ndisc_suppress_frag_ndisc(skb))
1840 		return SKB_DROP_REASON_IPV6_NDISC_FRAG;
1841 
1842 	if (skb_linearize(skb))
1843 		return SKB_DROP_REASON_NOMEM;
1844 
1845 	msg = (struct nd_msg *)skb_transport_header(skb);
1846 
1847 	__skb_push(skb, skb->data - skb_transport_header(skb));
1848 
1849 	if (ipv6_hdr(skb)->hop_limit != 255) {
1850 		ND_PRINTK(2, warn, "NDISC: invalid hop-limit: %d\n",
1851 			  ipv6_hdr(skb)->hop_limit);
1852 		return SKB_DROP_REASON_IPV6_NDISC_HOP_LIMIT;
1853 	}
1854 
1855 	if (msg->icmph.icmp6_code != 0) {
1856 		ND_PRINTK(2, warn, "NDISC: invalid ICMPv6 code: %d\n",
1857 			  msg->icmph.icmp6_code);
1858 		return SKB_DROP_REASON_IPV6_NDISC_BAD_CODE;
1859 	}
1860 
1861 	switch (msg->icmph.icmp6_type) {
1862 	case NDISC_NEIGHBOUR_SOLICITATION:
1863 		memset(NEIGH_CB(skb), 0, sizeof(struct neighbour_cb));
1864 		reason = ndisc_recv_ns(skb);
1865 		break;
1866 
1867 	case NDISC_NEIGHBOUR_ADVERTISEMENT:
1868 		reason = ndisc_recv_na(skb);
1869 		break;
1870 
1871 	case NDISC_ROUTER_SOLICITATION:
1872 		reason = ndisc_recv_rs(skb);
1873 		break;
1874 
1875 	case NDISC_ROUTER_ADVERTISEMENT:
1876 		reason = ndisc_router_discovery(skb);
1877 		break;
1878 
1879 	case NDISC_REDIRECT:
1880 		reason = ndisc_redirect_rcv(skb);
1881 		break;
1882 	}
1883 
1884 	return reason;
1885 }
1886 
1887 static int ndisc_netdev_event(struct notifier_block *this, unsigned long event, void *ptr)
1888 {
1889 	struct net_device *dev = netdev_notifier_info_to_dev(ptr);
1890 	struct netdev_notifier_change_info *change_info;
1891 	struct net *net = dev_net(dev);
1892 	struct inet6_dev *idev;
1893 	bool evict_nocarrier;
1894 
1895 	switch (event) {
1896 	case NETDEV_CHANGEADDR:
1897 		neigh_changeaddr(&nd_tbl, dev);
1898 		fib6_run_gc(0, net, false);
1899 		fallthrough;
1900 	case NETDEV_UP:
1901 		idev = in6_dev_get(dev);
1902 		if (!idev)
1903 			break;
1904 		if (READ_ONCE(idev->cnf.ndisc_notify) ||
1905 		    READ_ONCE(net->ipv6.devconf_all->ndisc_notify))
1906 			ndisc_send_unsol_na(dev);
1907 		in6_dev_put(idev);
1908 		break;
1909 	case NETDEV_CHANGE:
1910 		idev = in6_dev_get(dev);
1911 		if (!idev)
1912 			evict_nocarrier = true;
1913 		else {
1914 			evict_nocarrier = READ_ONCE(idev->cnf.ndisc_evict_nocarrier) &&
1915 					  READ_ONCE(net->ipv6.devconf_all->ndisc_evict_nocarrier);
1916 			in6_dev_put(idev);
1917 		}
1918 
1919 		change_info = ptr;
1920 		if (change_info->flags_changed & IFF_NOARP)
1921 			neigh_changeaddr(&nd_tbl, dev);
1922 		if (evict_nocarrier && !netif_carrier_ok(dev))
1923 			neigh_carrier_down(&nd_tbl, dev);
1924 		break;
1925 	case NETDEV_DOWN:
1926 		neigh_ifdown(&nd_tbl, dev);
1927 		fib6_run_gc(0, net, false);
1928 		break;
1929 	case NETDEV_NOTIFY_PEERS:
1930 		ndisc_send_unsol_na(dev);
1931 		break;
1932 	default:
1933 		break;
1934 	}
1935 
1936 	return NOTIFY_DONE;
1937 }
1938 
1939 static struct notifier_block ndisc_netdev_notifier = {
1940 	.notifier_call = ndisc_netdev_event,
1941 	.priority = ADDRCONF_NOTIFY_PRIORITY - 5,
1942 };
1943 
1944 #ifdef CONFIG_SYSCTL
1945 static void ndisc_warn_deprecated_sysctl(const struct ctl_table *ctl,
1946 					 const char *func, const char *dev_name)
1947 {
1948 	static char warncomm[TASK_COMM_LEN];
1949 	static int warned;
1950 	if (strcmp(warncomm, current->comm) && warned < 5) {
1951 		strscpy(warncomm, current->comm);
1952 		pr_warn("process `%s' is using deprecated sysctl (%s) net.ipv6.neigh.%s.%s - use net.ipv6.neigh.%s.%s_ms instead\n",
1953 			warncomm, func,
1954 			dev_name, ctl->procname,
1955 			dev_name, ctl->procname);
1956 		warned++;
1957 	}
1958 }
1959 
1960 int ndisc_ifinfo_sysctl_change(const struct ctl_table *ctl, int write, void *buffer,
1961 		size_t *lenp, loff_t *ppos)
1962 {
1963 	struct net_device *dev = ctl->extra1;
1964 	struct inet6_dev *idev;
1965 	int ret;
1966 
1967 	if ((strcmp(ctl->procname, "retrans_time") == 0) ||
1968 	    (strcmp(ctl->procname, "base_reachable_time") == 0))
1969 		ndisc_warn_deprecated_sysctl(ctl, "syscall", dev ? dev->name : "default");
1970 
1971 	if (strcmp(ctl->procname, "retrans_time") == 0)
1972 		ret = neigh_proc_dointvec(ctl, write, buffer, lenp, ppos);
1973 
1974 	else if (strcmp(ctl->procname, "base_reachable_time") == 0)
1975 		ret = neigh_proc_dointvec_jiffies(ctl, write,
1976 						  buffer, lenp, ppos);
1977 
1978 	else if ((strcmp(ctl->procname, "retrans_time_ms") == 0) ||
1979 		 (strcmp(ctl->procname, "base_reachable_time_ms") == 0))
1980 		ret = neigh_proc_dointvec_ms_jiffies(ctl, write,
1981 						     buffer, lenp, ppos);
1982 	else
1983 		ret = -1;
1984 
1985 	if (write && ret == 0 && dev && (idev = in6_dev_get(dev)) != NULL) {
1986 		if (ctl->data == &NEIGH_VAR(idev->nd_parms, BASE_REACHABLE_TIME))
1987 			idev->nd_parms->reachable_time =
1988 					neigh_rand_reach_time(NEIGH_VAR(idev->nd_parms, BASE_REACHABLE_TIME));
1989 		WRITE_ONCE(idev->tstamp, jiffies);
1990 		inet6_ifinfo_notify(RTM_NEWLINK, idev);
1991 		in6_dev_put(idev);
1992 	}
1993 	return ret;
1994 }
1995 
1996 
1997 #endif
1998 
1999 static int __net_init ndisc_net_init(struct net *net)
2000 {
2001 	struct ipv6_pinfo *np;
2002 	struct sock *sk;
2003 	int err;
2004 
2005 	err = inet_ctl_sock_create(&sk, PF_INET6,
2006 				   SOCK_RAW, IPPROTO_ICMPV6, net);
2007 	if (err < 0) {
2008 		ND_PRINTK(0, err,
2009 			  "NDISC: Failed to initialize the control socket (err %d)\n",
2010 			  err);
2011 		return err;
2012 	}
2013 
2014 	net->ipv6.ndisc_sk = sk;
2015 
2016 	np = inet6_sk(sk);
2017 	np->hop_limit = 255;
2018 	/* Do not loopback ndisc messages */
2019 	inet6_clear_bit(MC6_LOOP, sk);
2020 
2021 	return 0;
2022 }
2023 
2024 static void __net_exit ndisc_net_exit(struct net *net)
2025 {
2026 	inet_ctl_sock_destroy(net->ipv6.ndisc_sk);
2027 }
2028 
2029 static struct pernet_operations ndisc_net_ops = {
2030 	.init = ndisc_net_init,
2031 	.exit = ndisc_net_exit,
2032 };
2033 
2034 int __init ndisc_init(void)
2035 {
2036 	int err;
2037 
2038 	err = register_pernet_subsys(&ndisc_net_ops);
2039 	if (err)
2040 		return err;
2041 	/*
2042 	 * Initialize the neighbour table
2043 	 */
2044 	neigh_table_init(NEIGH_ND_TABLE, &nd_tbl);
2045 
2046 #ifdef CONFIG_SYSCTL
2047 	err = neigh_sysctl_register(NULL, &nd_tbl.parms,
2048 				    ndisc_ifinfo_sysctl_change);
2049 	if (err)
2050 		goto out_unregister_pernet;
2051 out:
2052 #endif
2053 	return err;
2054 
2055 #ifdef CONFIG_SYSCTL
2056 out_unregister_pernet:
2057 	unregister_pernet_subsys(&ndisc_net_ops);
2058 	goto out;
2059 #endif
2060 }
2061 
2062 int __init ndisc_late_init(void)
2063 {
2064 	return register_netdevice_notifier(&ndisc_netdev_notifier);
2065 }
2066 
2067 void ndisc_late_cleanup(void)
2068 {
2069 	unregister_netdevice_notifier(&ndisc_netdev_notifier);
2070 }
2071 
2072 void ndisc_cleanup(void)
2073 {
2074 #ifdef CONFIG_SYSCTL
2075 	neigh_sysctl_unregister(&nd_tbl.parms);
2076 #endif
2077 	neigh_table_clear(NEIGH_ND_TABLE, &nd_tbl);
2078 	unregister_pernet_subsys(&ndisc_net_ops);
2079 }
2080