xref: /linux/net/ipv6/ndisc.c (revision dfecb0c5af3b07ebfa84be63a7a21bfc9e29a872)
1 // SPDX-License-Identifier: GPL-2.0-or-later
2 /*
3  *	Neighbour Discovery for IPv6
4  *	Linux INET6 implementation
5  *
6  *	Authors:
7  *	Pedro Roque		<roque@di.fc.ul.pt>
8  *	Mike Shaver		<shaver@ingenia.com>
9  */
10 
11 /*
12  *	Changes:
13  *
14  *	Alexey I. Froloff		:	RFC6106 (DNSSL) support
15  *	Pierre Ynard			:	export userland ND options
16  *						through netlink (RDNSS support)
17  *	Lars Fenneberg			:	fixed MTU setting on receipt
18  *						of an RA.
19  *	Janos Farkas			:	kmalloc failure checks
20  *	Alexey Kuznetsov		:	state machine reworked
21  *						and moved to net/core.
22  *	Pekka Savola			:	RFC2461 validation
23  *	YOSHIFUJI Hideaki @USAGI	:	Verify ND options properly
24  */
25 
26 #define pr_fmt(fmt) "ICMPv6: " fmt
27 
28 #include <linux/module.h>
29 #include <linux/errno.h>
30 #include <linux/types.h>
31 #include <linux/socket.h>
32 #include <linux/sockios.h>
33 #include <linux/sched.h>
34 #include <linux/net.h>
35 #include <linux/in6.h>
36 #include <linux/route.h>
37 #include <linux/init.h>
38 #include <linux/rcupdate.h>
39 #include <linux/slab.h>
40 #ifdef CONFIG_SYSCTL
41 #include <linux/sysctl.h>
42 #endif
43 
44 #include <linux/if_addr.h>
45 #include <linux/if_ether.h>
46 #include <linux/if_arp.h>
47 #include <linux/ipv6.h>
48 #include <linux/icmpv6.h>
49 #include <linux/jhash.h>
50 
51 #include <net/sock.h>
52 #include <net/snmp.h>
53 
54 #include <net/ipv6.h>
55 #include <net/protocol.h>
56 #include <net/ndisc.h>
57 #include <net/ip6_route.h>
58 #include <net/addrconf.h>
59 #include <net/icmp.h>
60 
61 #include <net/netlink.h>
62 #include <linux/rtnetlink.h>
63 
64 #include <net/flow.h>
65 #include <net/ip6_checksum.h>
66 #include <net/inet_common.h>
67 #include <linux/proc_fs.h>
68 
69 #include <linux/netfilter.h>
70 #include <linux/netfilter_ipv6.h>
71 
72 static u32 ndisc_hash(const void *pkey,
73 		      const struct net_device *dev,
74 		      __u32 *hash_rnd);
75 static bool ndisc_key_eq(const struct neighbour *neigh, const void *pkey);
76 static bool ndisc_allow_add(const struct net_device *dev,
77 			    struct netlink_ext_ack *extack);
78 static int ndisc_constructor(struct neighbour *neigh);
79 static void ndisc_solicit(struct neighbour *neigh, struct sk_buff *skb);
80 static void ndisc_error_report(struct neighbour *neigh, struct sk_buff *skb);
81 static int pndisc_constructor(struct pneigh_entry *n);
82 static void pndisc_destructor(struct pneigh_entry *n);
83 static void pndisc_redo(struct sk_buff *skb);
84 static int ndisc_is_multicast(const void *pkey);
85 
86 static const struct neigh_ops ndisc_generic_ops = {
87 	.family =		AF_INET6,
88 	.solicit =		ndisc_solicit,
89 	.error_report =		ndisc_error_report,
90 	.output =		neigh_resolve_output,
91 	.connected_output =	neigh_connected_output,
92 };
93 
94 static const struct neigh_ops ndisc_hh_ops = {
95 	.family =		AF_INET6,
96 	.solicit =		ndisc_solicit,
97 	.error_report =		ndisc_error_report,
98 	.output =		neigh_resolve_output,
99 	.connected_output =	neigh_resolve_output,
100 };
101 
102 
103 static const struct neigh_ops ndisc_direct_ops = {
104 	.family =		AF_INET6,
105 	.output =		neigh_direct_output,
106 	.connected_output =	neigh_direct_output,
107 };
108 
109 struct neigh_table nd_tbl = {
110 	.family =	AF_INET6,
111 	.key_len =	sizeof(struct in6_addr),
112 	.protocol =	cpu_to_be16(ETH_P_IPV6),
113 	.hash =		ndisc_hash,
114 	.key_eq =	ndisc_key_eq,
115 	.constructor =	ndisc_constructor,
116 	.pconstructor =	pndisc_constructor,
117 	.pdestructor =	pndisc_destructor,
118 	.proxy_redo =	pndisc_redo,
119 	.is_multicast =	ndisc_is_multicast,
120 	.allow_add  =   ndisc_allow_add,
121 	.id =		"ndisc_cache",
122 	.parms = {
123 		.tbl			= &nd_tbl,
124 		.reachable_time		= ND_REACHABLE_TIME,
125 		.data = {
126 			[NEIGH_VAR_MCAST_PROBES] = 3,
127 			[NEIGH_VAR_UCAST_PROBES] = 3,
128 			[NEIGH_VAR_RETRANS_TIME] = ND_RETRANS_TIMER,
129 			[NEIGH_VAR_BASE_REACHABLE_TIME] = ND_REACHABLE_TIME,
130 			[NEIGH_VAR_DELAY_PROBE_TIME] = 5 * HZ,
131 			[NEIGH_VAR_INTERVAL_PROBE_TIME_MS] = 5 * HZ,
132 			[NEIGH_VAR_GC_STALETIME] = 60 * HZ,
133 			[NEIGH_VAR_QUEUE_LEN_BYTES] = SK_WMEM_DEFAULT,
134 			[NEIGH_VAR_PROXY_QLEN] = 64,
135 			[NEIGH_VAR_ANYCAST_DELAY] = 1 * HZ,
136 			[NEIGH_VAR_PROXY_DELAY] = (8 * HZ) / 10,
137 		},
138 	},
139 	.gc_interval =	  30 * HZ,
140 	.gc_thresh1 =	 128,
141 	.gc_thresh2 =	 512,
142 	.gc_thresh3 =	1024,
143 };
144 EXPORT_SYMBOL_GPL(nd_tbl);
145 
146 void __ndisc_fill_addr_option(struct sk_buff *skb, int type, const void *data,
147 			      int data_len, int pad)
148 {
149 	int space = __ndisc_opt_addr_space(data_len, pad);
150 	u8 *opt = skb_put(skb, space);
151 
152 	opt[0] = type;
153 	opt[1] = space>>3;
154 
155 	memset(opt + 2, 0, pad);
156 	opt   += pad;
157 	space -= pad;
158 
159 	memcpy(opt+2, data, data_len);
160 	data_len += 2;
161 	opt += data_len;
162 	space -= data_len;
163 	if (space > 0)
164 		memset(opt, 0, space);
165 }
166 EXPORT_SYMBOL_GPL(__ndisc_fill_addr_option);
167 
168 static inline void ndisc_fill_addr_option(struct sk_buff *skb, int type,
169 					  const void *data, u8 icmp6_type)
170 {
171 	__ndisc_fill_addr_option(skb, type, data, skb->dev->addr_len,
172 				 ndisc_addr_option_pad(skb->dev->type));
173 	ndisc_ops_fill_addr_option(skb->dev, skb, icmp6_type);
174 }
175 
176 static inline void ndisc_fill_redirect_addr_option(struct sk_buff *skb,
177 						   void *ha,
178 						   const u8 *ops_data)
179 {
180 	ndisc_fill_addr_option(skb, ND_OPT_TARGET_LL_ADDR, ha, NDISC_REDIRECT);
181 	ndisc_ops_fill_redirect_addr_option(skb->dev, skb, ops_data);
182 }
183 
184 static struct nd_opt_hdr *ndisc_next_option(struct nd_opt_hdr *cur,
185 					    struct nd_opt_hdr *end)
186 {
187 	int type;
188 	if (!cur || !end || cur >= end)
189 		return NULL;
190 	type = cur->nd_opt_type;
191 	do {
192 		cur = ((void *)cur) + (cur->nd_opt_len << 3);
193 	} while (cur < end && cur->nd_opt_type != type);
194 	return cur <= end && cur->nd_opt_type == type ? cur : NULL;
195 }
196 
197 static inline int ndisc_is_useropt(const struct net_device *dev,
198 				   struct nd_opt_hdr *opt)
199 {
200 	return opt->nd_opt_type == ND_OPT_PREFIX_INFO ||
201 		opt->nd_opt_type == ND_OPT_RDNSS ||
202 		opt->nd_opt_type == ND_OPT_DNSSL ||
203 		opt->nd_opt_type == ND_OPT_6CO ||
204 		opt->nd_opt_type == ND_OPT_CAPTIVE_PORTAL ||
205 		opt->nd_opt_type == ND_OPT_PREF64;
206 }
207 
208 static struct nd_opt_hdr *ndisc_next_useropt(const struct net_device *dev,
209 					     struct nd_opt_hdr *cur,
210 					     struct nd_opt_hdr *end)
211 {
212 	if (!cur || !end || cur >= end)
213 		return NULL;
214 	do {
215 		cur = ((void *)cur) + (cur->nd_opt_len << 3);
216 	} while (cur < end && !ndisc_is_useropt(dev, cur));
217 	return cur <= end && ndisc_is_useropt(dev, cur) ? cur : NULL;
218 }
219 
220 struct ndisc_options *ndisc_parse_options(const struct net_device *dev,
221 					  u8 *opt, int opt_len,
222 					  struct ndisc_options *ndopts)
223 {
224 	struct nd_opt_hdr *nd_opt = (struct nd_opt_hdr *)opt;
225 
226 	if (!nd_opt || opt_len < 0 || !ndopts)
227 		return NULL;
228 	memset(ndopts, 0, sizeof(*ndopts));
229 	while (opt_len) {
230 		bool unknown = false;
231 		int l;
232 		if (opt_len < sizeof(struct nd_opt_hdr))
233 			return NULL;
234 		l = nd_opt->nd_opt_len << 3;
235 		if (opt_len < l || l == 0)
236 			return NULL;
237 		if (ndisc_ops_parse_options(dev, nd_opt, ndopts))
238 			goto next_opt;
239 		switch (nd_opt->nd_opt_type) {
240 		case ND_OPT_SOURCE_LL_ADDR:
241 		case ND_OPT_TARGET_LL_ADDR:
242 		case ND_OPT_MTU:
243 		case ND_OPT_NONCE:
244 		case ND_OPT_REDIRECT_HDR:
245 			if (ndopts->nd_opt_array[nd_opt->nd_opt_type]) {
246 				net_dbg_ratelimited("%s: duplicated ND6 option found: type=%d\n",
247 						    __func__, nd_opt->nd_opt_type);
248 			} else {
249 				ndopts->nd_opt_array[nd_opt->nd_opt_type] = nd_opt;
250 			}
251 			break;
252 		case ND_OPT_PREFIX_INFO:
253 			ndopts->nd_opts_pi_end = nd_opt;
254 			if (!ndopts->nd_opt_array[nd_opt->nd_opt_type])
255 				ndopts->nd_opt_array[nd_opt->nd_opt_type] = nd_opt;
256 			break;
257 #ifdef CONFIG_IPV6_ROUTE_INFO
258 		case ND_OPT_ROUTE_INFO:
259 			ndopts->nd_opts_ri_end = nd_opt;
260 			if (!ndopts->nd_opts_ri)
261 				ndopts->nd_opts_ri = nd_opt;
262 			break;
263 #endif
264 		default:
265 			unknown = true;
266 		}
267 		if (ndisc_is_useropt(dev, nd_opt)) {
268 			ndopts->nd_useropts_end = nd_opt;
269 			if (!ndopts->nd_useropts)
270 				ndopts->nd_useropts = nd_opt;
271 		} else if (unknown) {
272 			/*
273 			 * Unknown options must be silently ignored,
274 			 * to accommodate future extension to the
275 			 * protocol.
276 			 */
277 			net_dbg_ratelimited("%s: ignored unsupported option; type=%d, len=%d\n",
278 					    __func__, nd_opt->nd_opt_type, nd_opt->nd_opt_len);
279 		}
280 next_opt:
281 		opt_len -= l;
282 		nd_opt = ((void *)nd_opt) + l;
283 	}
284 	return ndopts;
285 }
286 
287 int ndisc_mc_map(const struct in6_addr *addr, char *buf, struct net_device *dev, int dir)
288 {
289 	switch (dev->type) {
290 	case ARPHRD_ETHER:
291 	case ARPHRD_IEEE802:	/* Not sure. Check it later. --ANK */
292 	case ARPHRD_FDDI:
293 		ipv6_eth_mc_map(addr, buf);
294 		return 0;
295 	case ARPHRD_ARCNET:
296 		ipv6_arcnet_mc_map(addr, buf);
297 		return 0;
298 	case ARPHRD_INFINIBAND:
299 		ipv6_ib_mc_map(addr, dev->broadcast, buf);
300 		return 0;
301 	case ARPHRD_IPGRE:
302 		return ipv6_ipgre_mc_map(addr, dev->broadcast, buf);
303 	default:
304 		if (dir) {
305 			memcpy(buf, dev->broadcast, dev->addr_len);
306 			return 0;
307 		}
308 	}
309 	return -EINVAL;
310 }
311 EXPORT_SYMBOL(ndisc_mc_map);
312 
313 static u32 ndisc_hash(const void *pkey,
314 		      const struct net_device *dev,
315 		      __u32 *hash_rnd)
316 {
317 	return ndisc_hashfn(pkey, dev, hash_rnd);
318 }
319 
320 static bool ndisc_key_eq(const struct neighbour *n, const void *pkey)
321 {
322 	return neigh_key_eq128(n, pkey);
323 }
324 
325 static int ndisc_constructor(struct neighbour *neigh)
326 {
327 	struct in6_addr *addr = (struct in6_addr *)&neigh->primary_key;
328 	struct net_device *dev = neigh->dev;
329 	struct inet6_dev *in6_dev;
330 	struct neigh_parms *parms;
331 	bool is_multicast = ipv6_addr_is_multicast(addr);
332 
333 	in6_dev = in6_dev_get(dev);
334 	if (!in6_dev) {
335 		return -EINVAL;
336 	}
337 
338 	parms = in6_dev->nd_parms;
339 	__neigh_parms_put(neigh->parms);
340 	neigh->parms = neigh_parms_clone(parms);
341 
342 	neigh->type = is_multicast ? RTN_MULTICAST : RTN_UNICAST;
343 	if (!dev->header_ops) {
344 		neigh->nud_state = NUD_NOARP;
345 		neigh->ops = &ndisc_direct_ops;
346 		neigh->output = neigh_direct_output;
347 	} else {
348 		if (is_multicast) {
349 			neigh->nud_state = NUD_NOARP;
350 			ndisc_mc_map(addr, neigh->ha, dev, 1);
351 		} else if (dev->flags&(IFF_NOARP|IFF_LOOPBACK)) {
352 			neigh->nud_state = NUD_NOARP;
353 			memcpy(neigh->ha, dev->dev_addr, dev->addr_len);
354 			if (dev->flags&IFF_LOOPBACK)
355 				neigh->type = RTN_LOCAL;
356 		} else if (dev->flags&IFF_POINTOPOINT) {
357 			neigh->nud_state = NUD_NOARP;
358 			memcpy(neigh->ha, dev->broadcast, dev->addr_len);
359 		}
360 		if (dev->header_ops->cache)
361 			neigh->ops = &ndisc_hh_ops;
362 		else
363 			neigh->ops = &ndisc_generic_ops;
364 		if (neigh->nud_state&NUD_VALID)
365 			neigh->output = neigh->ops->connected_output;
366 		else
367 			neigh->output = neigh->ops->output;
368 	}
369 	in6_dev_put(in6_dev);
370 	return 0;
371 }
372 
373 static int pndisc_constructor(struct pneigh_entry *n)
374 {
375 	struct in6_addr *addr = (struct in6_addr *)&n->key;
376 	struct net_device *dev = n->dev;
377 	struct in6_addr maddr;
378 
379 	if (!dev)
380 		return -EINVAL;
381 
382 	addrconf_addr_solict_mult(addr, &maddr);
383 	return ipv6_dev_mc_inc(dev, &maddr);
384 }
385 
386 static void pndisc_destructor(struct pneigh_entry *n)
387 {
388 	struct in6_addr *addr = (struct in6_addr *)&n->key;
389 	struct net_device *dev = n->dev;
390 	struct in6_addr maddr;
391 
392 	if (!dev)
393 		return;
394 
395 	addrconf_addr_solict_mult(addr, &maddr);
396 	ipv6_dev_mc_dec(dev, &maddr);
397 }
398 
399 /* called with rtnl held */
400 static bool ndisc_allow_add(const struct net_device *dev,
401 			    struct netlink_ext_ack *extack)
402 {
403 	struct inet6_dev *idev = __in6_dev_get(dev);
404 
405 	if (!idev || idev->cnf.disable_ipv6) {
406 		NL_SET_ERR_MSG(extack, "IPv6 is disabled on this device");
407 		return false;
408 	}
409 
410 	return true;
411 }
412 
413 static struct sk_buff *ndisc_alloc_skb(struct net_device *dev,
414 				       int len)
415 {
416 	int hlen = LL_RESERVED_SPACE(dev);
417 	int tlen = dev->needed_tailroom;
418 	struct sk_buff *skb;
419 
420 	skb = alloc_skb(hlen + sizeof(struct ipv6hdr) + len + tlen, GFP_ATOMIC);
421 	if (!skb)
422 		return NULL;
423 
424 	skb->protocol = htons(ETH_P_IPV6);
425 	skb->dev = dev;
426 
427 	skb_reserve(skb, hlen + sizeof(struct ipv6hdr));
428 	skb_reset_transport_header(skb);
429 
430 	/* Manually assign socket ownership as we avoid calling
431 	 * sock_alloc_send_pskb() to bypass wmem buffer limits
432 	 */
433 	rcu_read_lock();
434 	skb_set_owner_w(skb, dev_net_rcu(dev)->ipv6.ndisc_sk);
435 	rcu_read_unlock();
436 
437 	return skb;
438 }
439 
440 static void ip6_nd_hdr(struct sk_buff *skb,
441 		       const struct in6_addr *saddr,
442 		       const struct in6_addr *daddr,
443 		       int hop_limit, int len)
444 {
445 	struct ipv6hdr *hdr;
446 	struct inet6_dev *idev;
447 	unsigned tclass;
448 
449 	rcu_read_lock();
450 	idev = __in6_dev_get(skb->dev);
451 	tclass = idev ? READ_ONCE(idev->cnf.ndisc_tclass) : 0;
452 	rcu_read_unlock();
453 
454 	skb_push(skb, sizeof(*hdr));
455 	skb_reset_network_header(skb);
456 	hdr = ipv6_hdr(skb);
457 
458 	ip6_flow_hdr(hdr, tclass, 0);
459 
460 	hdr->payload_len = htons(len);
461 	hdr->nexthdr = IPPROTO_ICMPV6;
462 	hdr->hop_limit = hop_limit;
463 
464 	hdr->saddr = *saddr;
465 	hdr->daddr = *daddr;
466 }
467 
468 void ndisc_send_skb(struct sk_buff *skb, const struct in6_addr *daddr,
469 		    const struct in6_addr *saddr)
470 {
471 	struct icmp6hdr *icmp6h = icmp6_hdr(skb);
472 	struct dst_entry *dst = skb_dst(skb);
473 	struct net_device *dev;
474 	struct inet6_dev *idev;
475 	struct net *net;
476 	struct sock *sk;
477 	int err;
478 	u8 type;
479 
480 	type = icmp6h->icmp6_type;
481 
482 	rcu_read_lock();
483 
484 	net = dev_net_rcu(skb->dev);
485 	sk = net->ipv6.ndisc_sk;
486 	if (!dst) {
487 		struct flowi6 fl6;
488 		int oif = skb->dev->ifindex;
489 
490 		icmpv6_flow_init(sk, &fl6, type, saddr, daddr, oif);
491 		dst = icmp6_dst_alloc(skb->dev, &fl6);
492 		if (IS_ERR(dst)) {
493 			rcu_read_unlock();
494 			kfree_skb(skb);
495 			return;
496 		}
497 
498 		skb_dst_set(skb, dst);
499 	}
500 
501 	icmp6h->icmp6_cksum = csum_ipv6_magic(saddr, daddr, skb->len,
502 					      IPPROTO_ICMPV6,
503 					      csum_partial(icmp6h,
504 							   skb->len, 0));
505 
506 	ip6_nd_hdr(skb, saddr, daddr, READ_ONCE(inet6_sk(sk)->hop_limit), skb->len);
507 
508 	dev = dst_dev_rcu(dst);
509 	idev = __in6_dev_get(dev);
510 	IP6_INC_STATS(net, idev, IPSTATS_MIB_OUTREQUESTS);
511 
512 	err = NF_HOOK(NFPROTO_IPV6, NF_INET_LOCAL_OUT,
513 		      net, sk, skb, NULL, dev,
514 		      dst_output);
515 	if (!err) {
516 		ICMP6MSGOUT_INC_STATS(net, idev, type);
517 		ICMP6_INC_STATS(net, idev, ICMP6_MIB_OUTMSGS);
518 	}
519 
520 	rcu_read_unlock();
521 }
522 EXPORT_SYMBOL(ndisc_send_skb);
523 
524 void ndisc_send_na(struct net_device *dev, const struct in6_addr *daddr,
525 		   const struct in6_addr *solicited_addr,
526 		   bool router, bool solicited, bool override, bool inc_opt)
527 {
528 	struct sk_buff *skb;
529 	struct in6_addr tmpaddr;
530 	struct inet6_ifaddr *ifp;
531 	const struct in6_addr *src_addr;
532 	struct nd_msg *msg;
533 	int optlen = 0;
534 
535 	/* for anycast or proxy, solicited_addr != src_addr */
536 	ifp = ipv6_get_ifaddr(dev_net(dev), solicited_addr, dev, 1);
537 	if (ifp) {
538 		src_addr = solicited_addr;
539 		if (ifp->flags & IFA_F_OPTIMISTIC)
540 			override = false;
541 		inc_opt |= READ_ONCE(ifp->idev->cnf.force_tllao);
542 		in6_ifa_put(ifp);
543 	} else {
544 		if (ipv6_dev_get_saddr(dev_net(dev), dev, daddr,
545 				       inet6_sk(dev_net(dev)->ipv6.ndisc_sk)->srcprefs,
546 				       &tmpaddr))
547 			return;
548 		src_addr = &tmpaddr;
549 	}
550 
551 	if (!dev->addr_len)
552 		inc_opt = false;
553 	if (inc_opt)
554 		optlen += ndisc_opt_addr_space(dev,
555 					       NDISC_NEIGHBOUR_ADVERTISEMENT);
556 
557 	skb = ndisc_alloc_skb(dev, sizeof(*msg) + optlen);
558 	if (!skb)
559 		return;
560 
561 	msg = skb_put(skb, sizeof(*msg));
562 	*msg = (struct nd_msg) {
563 		.icmph = {
564 			.icmp6_type = NDISC_NEIGHBOUR_ADVERTISEMENT,
565 			.icmp6_router = router,
566 			.icmp6_solicited = solicited,
567 			.icmp6_override = override,
568 		},
569 		.target = *solicited_addr,
570 	};
571 
572 	if (inc_opt)
573 		ndisc_fill_addr_option(skb, ND_OPT_TARGET_LL_ADDR,
574 				       dev->dev_addr,
575 				       NDISC_NEIGHBOUR_ADVERTISEMENT);
576 
577 	ndisc_send_skb(skb, daddr, src_addr);
578 }
579 EXPORT_SYMBOL_GPL(ndisc_send_na);
580 
581 static void ndisc_send_unsol_na(struct net_device *dev)
582 {
583 	struct inet6_dev *idev;
584 	struct inet6_ifaddr *ifa;
585 
586 	idev = in6_dev_get(dev);
587 	if (!idev)
588 		return;
589 
590 	read_lock_bh(&idev->lock);
591 	list_for_each_entry(ifa, &idev->addr_list, if_list) {
592 		/* skip tentative addresses until dad completes */
593 		if (ifa->flags & IFA_F_TENTATIVE &&
594 		    !(ifa->flags & IFA_F_OPTIMISTIC))
595 			continue;
596 
597 		ndisc_send_na(dev, &in6addr_linklocal_allnodes, &ifa->addr,
598 			      /*router=*/ !!idev->cnf.forwarding,
599 			      /*solicited=*/ false, /*override=*/ true,
600 			      /*inc_opt=*/ true);
601 	}
602 	read_unlock_bh(&idev->lock);
603 
604 	in6_dev_put(idev);
605 }
606 
607 struct sk_buff *ndisc_ns_create(struct net_device *dev, const struct in6_addr *solicit,
608 				const struct in6_addr *saddr, u64 nonce)
609 {
610 	int inc_opt = dev->addr_len;
611 	struct sk_buff *skb;
612 	struct nd_msg *msg;
613 	int optlen = 0;
614 
615 	if (!saddr)
616 		return NULL;
617 
618 	if (ipv6_addr_any(saddr))
619 		inc_opt = false;
620 	if (inc_opt)
621 		optlen += ndisc_opt_addr_space(dev,
622 					       NDISC_NEIGHBOUR_SOLICITATION);
623 	if (nonce != 0)
624 		optlen += 8;
625 
626 	skb = ndisc_alloc_skb(dev, sizeof(*msg) + optlen);
627 	if (!skb)
628 		return NULL;
629 
630 	msg = skb_put(skb, sizeof(*msg));
631 	*msg = (struct nd_msg) {
632 		.icmph = {
633 			.icmp6_type = NDISC_NEIGHBOUR_SOLICITATION,
634 		},
635 		.target = *solicit,
636 	};
637 
638 	if (inc_opt)
639 		ndisc_fill_addr_option(skb, ND_OPT_SOURCE_LL_ADDR,
640 				       dev->dev_addr,
641 				       NDISC_NEIGHBOUR_SOLICITATION);
642 	if (nonce != 0) {
643 		u8 *opt = skb_put(skb, 8);
644 
645 		opt[0] = ND_OPT_NONCE;
646 		opt[1] = 8 >> 3;
647 		memcpy(opt + 2, &nonce, 6);
648 	}
649 
650 	return skb;
651 }
652 EXPORT_SYMBOL(ndisc_ns_create);
653 
654 void ndisc_send_ns(struct net_device *dev, const struct in6_addr *solicit,
655 		   const struct in6_addr *daddr, const struct in6_addr *saddr,
656 		   u64 nonce)
657 {
658 	struct in6_addr addr_buf;
659 	struct sk_buff *skb;
660 
661 	if (!saddr) {
662 		if (ipv6_get_lladdr(dev, &addr_buf,
663 				    (IFA_F_TENTATIVE | IFA_F_OPTIMISTIC)))
664 			return;
665 		saddr = &addr_buf;
666 	}
667 
668 	skb = ndisc_ns_create(dev, solicit, saddr, nonce);
669 
670 	if (skb)
671 		ndisc_send_skb(skb, daddr, saddr);
672 }
673 
674 void ndisc_send_rs(struct net_device *dev, const struct in6_addr *saddr,
675 		   const struct in6_addr *daddr)
676 {
677 	struct sk_buff *skb;
678 	struct rs_msg *msg;
679 	int send_sllao = dev->addr_len;
680 	int optlen = 0;
681 
682 #ifdef CONFIG_IPV6_OPTIMISTIC_DAD
683 	/*
684 	 * According to section 2.2 of RFC 4429, we must not
685 	 * send router solicitations with a sllao from
686 	 * optimistic addresses, but we may send the solicitation
687 	 * if we don't include the sllao.  So here we check
688 	 * if our address is optimistic, and if so, we
689 	 * suppress the inclusion of the sllao.
690 	 */
691 	if (send_sllao) {
692 		struct inet6_ifaddr *ifp = ipv6_get_ifaddr(dev_net(dev), saddr,
693 							   dev, 1);
694 		if (ifp) {
695 			if (ifp->flags & IFA_F_OPTIMISTIC)  {
696 				send_sllao = 0;
697 			}
698 			in6_ifa_put(ifp);
699 		} else {
700 			send_sllao = 0;
701 		}
702 	}
703 #endif
704 	if (send_sllao)
705 		optlen += ndisc_opt_addr_space(dev, NDISC_ROUTER_SOLICITATION);
706 
707 	skb = ndisc_alloc_skb(dev, sizeof(*msg) + optlen);
708 	if (!skb)
709 		return;
710 
711 	msg = skb_put(skb, sizeof(*msg));
712 	*msg = (struct rs_msg) {
713 		.icmph = {
714 			.icmp6_type = NDISC_ROUTER_SOLICITATION,
715 		},
716 	};
717 
718 	if (send_sllao)
719 		ndisc_fill_addr_option(skb, ND_OPT_SOURCE_LL_ADDR,
720 				       dev->dev_addr,
721 				       NDISC_ROUTER_SOLICITATION);
722 
723 	ndisc_send_skb(skb, daddr, saddr);
724 }
725 
726 
727 static void ndisc_error_report(struct neighbour *neigh, struct sk_buff *skb)
728 {
729 	/*
730 	 *	"The sender MUST return an ICMP
731 	 *	 destination unreachable"
732 	 */
733 	dst_link_failure(skb);
734 	kfree_skb(skb);
735 }
736 
737 /* Called with locked neigh: either read or both */
738 
739 static void ndisc_solicit(struct neighbour *neigh, struct sk_buff *skb)
740 {
741 	struct in6_addr *saddr = NULL;
742 	struct in6_addr mcaddr;
743 	struct net_device *dev = neigh->dev;
744 	struct in6_addr *target = (struct in6_addr *)&neigh->primary_key;
745 	int probes = atomic_read(&neigh->probes);
746 
747 	if (skb && ipv6_chk_addr_and_flags(dev_net(dev), &ipv6_hdr(skb)->saddr,
748 					   dev, false, 1,
749 					   IFA_F_TENTATIVE|IFA_F_OPTIMISTIC))
750 		saddr = &ipv6_hdr(skb)->saddr;
751 	probes -= NEIGH_VAR(neigh->parms, UCAST_PROBES);
752 	if (probes < 0) {
753 		if (!(READ_ONCE(neigh->nud_state) & NUD_VALID)) {
754 			net_dbg_ratelimited("%s: trying to ucast probe in NUD_INVALID: %pI6\n",
755 					    __func__, target);
756 		}
757 		ndisc_send_ns(dev, target, target, saddr, 0);
758 	} else if ((probes -= NEIGH_VAR(neigh->parms, APP_PROBES)) < 0) {
759 		neigh_app_ns(neigh);
760 	} else {
761 		addrconf_addr_solict_mult(target, &mcaddr);
762 		ndisc_send_ns(dev, target, &mcaddr, saddr, 0);
763 	}
764 }
765 
766 static int pndisc_is_router(const void *pkey,
767 			    struct net_device *dev)
768 {
769 	struct pneigh_entry *n;
770 	int ret = -1;
771 
772 	n = pneigh_lookup(&nd_tbl, dev_net(dev), pkey, dev);
773 	if (n)
774 		ret = !!(READ_ONCE(n->flags) & NTF_ROUTER);
775 
776 	return ret;
777 }
778 
779 void ndisc_update(const struct net_device *dev, struct neighbour *neigh,
780 		  const u8 *lladdr, u8 new, u32 flags, u8 icmp6_type,
781 		  struct ndisc_options *ndopts)
782 {
783 	neigh_update(neigh, lladdr, new, flags, 0);
784 	/* report ndisc ops about neighbour update */
785 	ndisc_ops_update(dev, neigh, flags, icmp6_type, ndopts);
786 }
787 
788 static enum skb_drop_reason ndisc_recv_ns(struct sk_buff *skb)
789 {
790 	struct nd_msg *msg = (struct nd_msg *)skb_transport_header(skb);
791 	const struct in6_addr *saddr = &ipv6_hdr(skb)->saddr;
792 	const struct in6_addr *daddr = &ipv6_hdr(skb)->daddr;
793 	u8 *lladdr = NULL;
794 	u32 ndoptlen = skb_tail_pointer(skb) - (skb_transport_header(skb) +
795 				    offsetof(struct nd_msg, opt));
796 	struct ndisc_options ndopts;
797 	struct net_device *dev = skb->dev;
798 	struct inet6_ifaddr *ifp;
799 	struct inet6_dev *idev = NULL;
800 	struct neighbour *neigh;
801 	int dad = ipv6_addr_any(saddr);
802 	int is_router = -1;
803 	SKB_DR(reason);
804 	u64 nonce = 0;
805 	bool inc;
806 
807 	if (skb->len < sizeof(struct nd_msg))
808 		return SKB_DROP_REASON_PKT_TOO_SMALL;
809 
810 	if (ipv6_addr_is_multicast(&msg->target)) {
811 		net_dbg_ratelimited("NS: multicast target address\n");
812 		return reason;
813 	}
814 
815 	/*
816 	 * RFC2461 7.1.1:
817 	 * DAD has to be destined for solicited node multicast address.
818 	 */
819 	if (dad && !ipv6_addr_is_solict_mult(daddr)) {
820 		net_dbg_ratelimited("NS: bad DAD packet (wrong destination)\n");
821 		return reason;
822 	}
823 
824 	if (!ndisc_parse_options(dev, msg->opt, ndoptlen, &ndopts))
825 		return SKB_DROP_REASON_IPV6_NDISC_BAD_OPTIONS;
826 
827 	if (ndopts.nd_opts_src_lladdr) {
828 		lladdr = ndisc_opt_addr_data(ndopts.nd_opts_src_lladdr, dev);
829 		if (!lladdr) {
830 			net_dbg_ratelimited("NS: invalid link-layer address length\n");
831 			return reason;
832 		}
833 
834 		/* RFC2461 7.1.1:
835 		 *	If the IP source address is the unspecified address,
836 		 *	there MUST NOT be source link-layer address option
837 		 *	in the message.
838 		 */
839 		if (dad) {
840 			net_dbg_ratelimited("NS: bad DAD packet (link-layer address option)\n");
841 			return reason;
842 		}
843 	}
844 	if (ndopts.nd_opts_nonce && ndopts.nd_opts_nonce->nd_opt_len == 1)
845 		memcpy(&nonce, (u8 *)(ndopts.nd_opts_nonce + 1), 6);
846 
847 	inc = ipv6_addr_is_multicast(daddr);
848 
849 	ifp = ipv6_get_ifaddr(dev_net(dev), &msg->target, dev, 1);
850 	if (ifp) {
851 have_ifp:
852 		if (ifp->flags & (IFA_F_TENTATIVE|IFA_F_OPTIMISTIC)) {
853 			if (dad) {
854 				if (nonce != 0 && ifp->dad_nonce == nonce) {
855 					u8 *np = (u8 *)&nonce;
856 					/* Matching nonce if looped back */
857 					net_dbg_ratelimited("%s: IPv6 DAD loopback for address %pI6c nonce %pM ignored\n",
858 							    ifp->idev->dev->name, &ifp->addr, np);
859 					goto out;
860 				}
861 				/*
862 				 * We are colliding with another node
863 				 * who is doing DAD
864 				 * so fail our DAD process
865 				 */
866 				addrconf_dad_failure(skb, ifp);
867 				return reason;
868 			} else {
869 				/*
870 				 * This is not a dad solicitation.
871 				 * If we are an optimistic node,
872 				 * we should respond.
873 				 * Otherwise, we should ignore it.
874 				 */
875 				if (!(ifp->flags & IFA_F_OPTIMISTIC))
876 					goto out;
877 			}
878 		}
879 
880 		idev = ifp->idev;
881 	} else {
882 		struct net *net = dev_net(dev);
883 
884 		/* perhaps an address on the master device */
885 		if (netif_is_l3_slave(dev)) {
886 			struct net_device *mdev;
887 
888 			mdev = netdev_master_upper_dev_get_rcu(dev);
889 			if (mdev) {
890 				ifp = ipv6_get_ifaddr(net, &msg->target, mdev, 1);
891 				if (ifp)
892 					goto have_ifp;
893 			}
894 		}
895 
896 		idev = in6_dev_get(dev);
897 		if (!idev) {
898 			/* XXX: count this drop? */
899 			return reason;
900 		}
901 
902 		if (ipv6_chk_acast_addr(net, dev, &msg->target) ||
903 		    (READ_ONCE(idev->cnf.forwarding) &&
904 		     (READ_ONCE(net->ipv6.devconf_all->proxy_ndp) ||
905 		      READ_ONCE(idev->cnf.proxy_ndp)) &&
906 		     (is_router = pndisc_is_router(&msg->target, dev)) >= 0)) {
907 			if (!(NEIGH_CB(skb)->flags & LOCALLY_ENQUEUED) &&
908 			    skb->pkt_type != PACKET_HOST &&
909 			    inc &&
910 			    NEIGH_VAR(idev->nd_parms, PROXY_DELAY) != 0) {
911 				/*
912 				 * for anycast or proxy,
913 				 * sender should delay its response
914 				 * by a random time between 0 and
915 				 * MAX_ANYCAST_DELAY_TIME seconds.
916 				 * (RFC2461) -- yoshfuji
917 				 */
918 				struct sk_buff *n = skb_clone(skb, GFP_ATOMIC);
919 				if (n)
920 					pneigh_enqueue(&nd_tbl, idev->nd_parms, n);
921 				goto out;
922 			}
923 		} else {
924 			SKB_DR_SET(reason, IPV6_NDISC_NS_OTHERHOST);
925 			goto out;
926 		}
927 	}
928 
929 	if (is_router < 0)
930 		is_router = READ_ONCE(idev->cnf.forwarding);
931 
932 	if (dad) {
933 		ndisc_send_na(dev, &in6addr_linklocal_allnodes, &msg->target,
934 			      !!is_router, false, (ifp != NULL), true);
935 		goto out;
936 	}
937 
938 	if (inc)
939 		NEIGH_CACHE_STAT_INC(&nd_tbl, rcv_probes_mcast);
940 	else
941 		NEIGH_CACHE_STAT_INC(&nd_tbl, rcv_probes_ucast);
942 
943 	/*
944 	 *	update / create cache entry
945 	 *	for the source address
946 	 */
947 	neigh = __neigh_lookup(&nd_tbl, saddr, dev,
948 			       !inc || lladdr || !dev->addr_len);
949 	if (neigh)
950 		ndisc_update(dev, neigh, lladdr, NUD_STALE,
951 			     NEIGH_UPDATE_F_WEAK_OVERRIDE|
952 			     NEIGH_UPDATE_F_OVERRIDE,
953 			     NDISC_NEIGHBOUR_SOLICITATION, &ndopts);
954 	if (neigh || !dev->header_ops) {
955 		ndisc_send_na(dev, saddr, &msg->target, !!is_router,
956 			      true, (ifp != NULL && inc), inc);
957 		if (neigh)
958 			neigh_release(neigh);
959 		reason = SKB_CONSUMED;
960 	}
961 
962 out:
963 	if (ifp)
964 		in6_ifa_put(ifp);
965 	else
966 		in6_dev_put(idev);
967 	return reason;
968 }
969 
970 static int accept_untracked_na(struct net_device *dev, struct in6_addr *saddr)
971 {
972 	struct inet6_dev *idev = __in6_dev_get(dev);
973 
974 	switch (READ_ONCE(idev->cnf.accept_untracked_na)) {
975 	case 0: /* Don't accept untracked na (absent in neighbor cache) */
976 		return 0;
977 	case 1: /* Create new entries from na if currently untracked */
978 		return 1;
979 	case 2: /* Create new entries from untracked na only if saddr is in the
980 		 * same subnet as an address configured on the interface that
981 		 * received the na
982 		 */
983 		return !!ipv6_chk_prefix(saddr, dev);
984 	default:
985 		return 0;
986 	}
987 }
988 
989 static enum skb_drop_reason ndisc_recv_na(struct sk_buff *skb)
990 {
991 	struct nd_msg *msg = (struct nd_msg *)skb_transport_header(skb);
992 	struct in6_addr *saddr = &ipv6_hdr(skb)->saddr;
993 	const struct in6_addr *daddr = &ipv6_hdr(skb)->daddr;
994 	u8 *lladdr = NULL;
995 	u32 ndoptlen = skb_tail_pointer(skb) - (skb_transport_header(skb) +
996 				    offsetof(struct nd_msg, opt));
997 	struct ndisc_options ndopts;
998 	struct net_device *dev = skb->dev;
999 	struct inet6_dev *idev = __in6_dev_get(dev);
1000 	struct inet6_ifaddr *ifp;
1001 	struct neighbour *neigh;
1002 	SKB_DR(reason);
1003 	u8 new_state;
1004 
1005 	if (skb->len < sizeof(struct nd_msg))
1006 		return SKB_DROP_REASON_PKT_TOO_SMALL;
1007 
1008 	if (ipv6_addr_is_multicast(&msg->target)) {
1009 		net_dbg_ratelimited("NA: target address is multicast\n");
1010 		return reason;
1011 	}
1012 
1013 	if (ipv6_addr_is_multicast(daddr) &&
1014 	    msg->icmph.icmp6_solicited) {
1015 		net_dbg_ratelimited("NA: solicited NA is multicasted\n");
1016 		return reason;
1017 	}
1018 
1019 	/* For some 802.11 wireless deployments (and possibly other networks),
1020 	 * there will be a NA proxy and unsolicitd packets are attacks
1021 	 * and thus should not be accepted.
1022 	 * drop_unsolicited_na takes precedence over accept_untracked_na
1023 	 */
1024 	if (!msg->icmph.icmp6_solicited && idev &&
1025 	    READ_ONCE(idev->cnf.drop_unsolicited_na))
1026 		return reason;
1027 
1028 	if (!ndisc_parse_options(dev, msg->opt, ndoptlen, &ndopts))
1029 		return SKB_DROP_REASON_IPV6_NDISC_BAD_OPTIONS;
1030 
1031 	if (ndopts.nd_opts_tgt_lladdr) {
1032 		lladdr = ndisc_opt_addr_data(ndopts.nd_opts_tgt_lladdr, dev);
1033 		if (!lladdr) {
1034 			net_dbg_ratelimited("NA: invalid link-layer address length\n");
1035 			return reason;
1036 		}
1037 	}
1038 	ifp = ipv6_get_ifaddr(dev_net(dev), &msg->target, dev, 1);
1039 	if (ifp) {
1040 		if (skb->pkt_type != PACKET_LOOPBACK
1041 		    && (ifp->flags & IFA_F_TENTATIVE)) {
1042 				addrconf_dad_failure(skb, ifp);
1043 				return reason;
1044 		}
1045 		/* What should we make now? The advertisement
1046 		   is invalid, but ndisc specs say nothing
1047 		   about it. It could be misconfiguration, or
1048 		   an smart proxy agent tries to help us :-)
1049 
1050 		   We should not print the error if NA has been
1051 		   received from loopback - it is just our own
1052 		   unsolicited advertisement.
1053 		 */
1054 		if (skb->pkt_type != PACKET_LOOPBACK)
1055 			net_warn_ratelimited("NA: %pM advertised our address %pI6c on %s!\n",
1056 					     eth_hdr(skb)->h_source, &ifp->addr,
1057 					     ifp->idev->dev->name);
1058 		in6_ifa_put(ifp);
1059 		return reason;
1060 	}
1061 
1062 	neigh = neigh_lookup(&nd_tbl, &msg->target, dev);
1063 
1064 	/* RFC 9131 updates original Neighbour Discovery RFC 4861.
1065 	 * NAs with Target LL Address option without a corresponding
1066 	 * entry in the neighbour cache can now create a STALE neighbour
1067 	 * cache entry on routers.
1068 	 *
1069 	 *   entry accept  fwding  solicited        behaviour
1070 	 * ------- ------  ------  ---------    ----------------------
1071 	 * present      X       X         0     Set state to STALE
1072 	 * present      X       X         1     Set state to REACHABLE
1073 	 *  absent      0       X         X     Do nothing
1074 	 *  absent      1       0         X     Do nothing
1075 	 *  absent      1       1         X     Add a new STALE entry
1076 	 *
1077 	 * Note that we don't do a (daddr == all-routers-mcast) check.
1078 	 */
1079 	new_state = msg->icmph.icmp6_solicited ? NUD_REACHABLE : NUD_STALE;
1080 	if (!neigh && lladdr && idev && READ_ONCE(idev->cnf.forwarding)) {
1081 		if (accept_untracked_na(dev, saddr)) {
1082 			neigh = neigh_create(&nd_tbl, &msg->target, dev);
1083 			new_state = NUD_STALE;
1084 		}
1085 	}
1086 
1087 	if (neigh && !IS_ERR(neigh)) {
1088 		u8 old_flags = neigh->flags;
1089 		struct net *net = dev_net(dev);
1090 
1091 		if (READ_ONCE(neigh->nud_state) & NUD_FAILED)
1092 			goto out;
1093 
1094 		/*
1095 		 * Don't update the neighbor cache entry on a proxy NA from
1096 		 * ourselves because either the proxied node is off link or it
1097 		 * has already sent a NA to us.
1098 		 */
1099 		if (lladdr && !memcmp(lladdr, dev->dev_addr, dev->addr_len) &&
1100 		    READ_ONCE(net->ipv6.devconf_all->forwarding) &&
1101 		    READ_ONCE(net->ipv6.devconf_all->proxy_ndp) &&
1102 		    pneigh_lookup(&nd_tbl, net, &msg->target, dev)) {
1103 			/* XXX: idev->cnf.proxy_ndp */
1104 			goto out;
1105 		}
1106 
1107 		ndisc_update(dev, neigh, lladdr,
1108 			     new_state,
1109 			     NEIGH_UPDATE_F_WEAK_OVERRIDE|
1110 			     (msg->icmph.icmp6_override ? NEIGH_UPDATE_F_OVERRIDE : 0)|
1111 			     NEIGH_UPDATE_F_OVERRIDE_ISROUTER|
1112 			     (msg->icmph.icmp6_router ? NEIGH_UPDATE_F_ISROUTER : 0),
1113 			     NDISC_NEIGHBOUR_ADVERTISEMENT, &ndopts);
1114 
1115 		if ((old_flags & ~neigh->flags) & NTF_ROUTER) {
1116 			/*
1117 			 * Change: router to host
1118 			 */
1119 			rt6_clean_tohost(dev_net(dev),  saddr);
1120 		}
1121 		reason = SKB_CONSUMED;
1122 out:
1123 		neigh_release(neigh);
1124 	}
1125 	return reason;
1126 }
1127 
1128 static enum skb_drop_reason ndisc_recv_rs(struct sk_buff *skb)
1129 {
1130 	struct rs_msg *rs_msg = (struct rs_msg *)skb_transport_header(skb);
1131 	unsigned long ndoptlen = skb->len - sizeof(*rs_msg);
1132 	struct neighbour *neigh;
1133 	struct inet6_dev *idev;
1134 	const struct in6_addr *saddr = &ipv6_hdr(skb)->saddr;
1135 	struct ndisc_options ndopts;
1136 	u8 *lladdr = NULL;
1137 	SKB_DR(reason);
1138 
1139 	if (skb->len < sizeof(*rs_msg))
1140 		return SKB_DROP_REASON_PKT_TOO_SMALL;
1141 
1142 	idev = __in6_dev_get(skb->dev);
1143 	if (!idev) {
1144 		net_err_ratelimited("RS: can't find in6 device\n");
1145 		return reason;
1146 	}
1147 
1148 	/* Don't accept RS if we're not in router mode */
1149 	if (!READ_ONCE(idev->cnf.forwarding))
1150 		goto out;
1151 
1152 	/*
1153 	 * Don't update NCE if src = ::;
1154 	 * this implies that the source node has no ip address assigned yet.
1155 	 */
1156 	if (ipv6_addr_any(saddr))
1157 		goto out;
1158 
1159 	/* Parse ND options */
1160 	if (!ndisc_parse_options(skb->dev, rs_msg->opt, ndoptlen, &ndopts))
1161 		return SKB_DROP_REASON_IPV6_NDISC_BAD_OPTIONS;
1162 
1163 	if (ndopts.nd_opts_src_lladdr) {
1164 		lladdr = ndisc_opt_addr_data(ndopts.nd_opts_src_lladdr,
1165 					     skb->dev);
1166 		if (!lladdr)
1167 			goto out;
1168 	}
1169 
1170 	neigh = __neigh_lookup(&nd_tbl, saddr, skb->dev, 1);
1171 	if (neigh) {
1172 		ndisc_update(skb->dev, neigh, lladdr, NUD_STALE,
1173 			     NEIGH_UPDATE_F_WEAK_OVERRIDE|
1174 			     NEIGH_UPDATE_F_OVERRIDE|
1175 			     NEIGH_UPDATE_F_OVERRIDE_ISROUTER,
1176 			     NDISC_ROUTER_SOLICITATION, &ndopts);
1177 		neigh_release(neigh);
1178 		reason = SKB_CONSUMED;
1179 	}
1180 out:
1181 	return reason;
1182 }
1183 
1184 static void ndisc_ra_useropt(struct sk_buff *ra, struct nd_opt_hdr *opt)
1185 {
1186 	struct icmp6hdr *icmp6h = (struct icmp6hdr *)skb_transport_header(ra);
1187 	struct sk_buff *skb;
1188 	struct nlmsghdr *nlh;
1189 	struct nduseroptmsg *ndmsg;
1190 	struct net *net = dev_net(ra->dev);
1191 	int err;
1192 	int base_size = NLMSG_ALIGN(sizeof(struct nduseroptmsg)
1193 				    + (opt->nd_opt_len << 3));
1194 	size_t msg_size = base_size + nla_total_size(sizeof(struct in6_addr));
1195 
1196 	skb = nlmsg_new(msg_size, GFP_ATOMIC);
1197 	if (!skb) {
1198 		err = -ENOBUFS;
1199 		goto errout;
1200 	}
1201 
1202 	nlh = nlmsg_put(skb, 0, 0, RTM_NEWNDUSEROPT, base_size, 0);
1203 	if (!nlh) {
1204 		goto nla_put_failure;
1205 	}
1206 
1207 	ndmsg = nlmsg_data(nlh);
1208 	ndmsg->nduseropt_family = AF_INET6;
1209 	ndmsg->nduseropt_ifindex = ra->dev->ifindex;
1210 	ndmsg->nduseropt_icmp_type = icmp6h->icmp6_type;
1211 	ndmsg->nduseropt_icmp_code = icmp6h->icmp6_code;
1212 	ndmsg->nduseropt_opts_len = opt->nd_opt_len << 3;
1213 	ndmsg->nduseropt_pad1 = 0;
1214 	ndmsg->nduseropt_pad2 = 0;
1215 	ndmsg->nduseropt_pad3 = 0;
1216 
1217 	memcpy(ndmsg + 1, opt, opt->nd_opt_len << 3);
1218 
1219 	if (nla_put_in6_addr(skb, NDUSEROPT_SRCADDR, &ipv6_hdr(ra)->saddr))
1220 		goto nla_put_failure;
1221 	nlmsg_end(skb, nlh);
1222 
1223 	rtnl_notify(skb, net, 0, RTNLGRP_ND_USEROPT, NULL, GFP_ATOMIC);
1224 	return;
1225 
1226 nla_put_failure:
1227 	nlmsg_free(skb);
1228 	err = -EMSGSIZE;
1229 errout:
1230 	rtnl_set_sk_err(net, RTNLGRP_ND_USEROPT, err);
1231 }
1232 
1233 static enum skb_drop_reason ndisc_router_discovery(struct sk_buff *skb)
1234 {
1235 	struct ra_msg *ra_msg = (struct ra_msg *)skb_transport_header(skb);
1236 	bool send_ifinfo_notify = false;
1237 	struct neighbour *neigh = NULL;
1238 	struct ndisc_options ndopts;
1239 	struct fib6_info *rt = NULL;
1240 	struct inet6_dev *in6_dev;
1241 	struct fib6_table *table;
1242 	u32 defrtr_usr_metric;
1243 	unsigned int pref = 0;
1244 	__u32 old_if_flags;
1245 	struct net *net;
1246 	SKB_DR(reason);
1247 	int lifetime;
1248 	int optlen;
1249 
1250 	__u8 *opt = (__u8 *)(ra_msg + 1);
1251 
1252 	optlen = (skb_tail_pointer(skb) - skb_transport_header(skb)) -
1253 		sizeof(struct ra_msg);
1254 
1255 	net_dbg_ratelimited("RA: %s, dev: %s\n", __func__, skb->dev->name);
1256 	if (!(ipv6_addr_type(&ipv6_hdr(skb)->saddr) & IPV6_ADDR_LINKLOCAL)) {
1257 		net_dbg_ratelimited("RA: source address is not link-local\n");
1258 		return reason;
1259 	}
1260 	if (optlen < 0)
1261 		return SKB_DROP_REASON_PKT_TOO_SMALL;
1262 
1263 #ifdef CONFIG_IPV6_NDISC_NODETYPE
1264 	if (skb->ndisc_nodetype == NDISC_NODETYPE_HOST) {
1265 		net_dbg_ratelimited("RA: from host or unauthorized router\n");
1266 		return reason;
1267 	}
1268 #endif
1269 
1270 	in6_dev = __in6_dev_get(skb->dev);
1271 	if (!in6_dev) {
1272 		net_err_ratelimited("RA: can't find inet6 device for %s\n", skb->dev->name);
1273 		return reason;
1274 	}
1275 
1276 	if (!ndisc_parse_options(skb->dev, opt, optlen, &ndopts))
1277 		return SKB_DROP_REASON_IPV6_NDISC_BAD_OPTIONS;
1278 
1279 	if (!ipv6_accept_ra(in6_dev)) {
1280 		net_dbg_ratelimited("RA: %s, did not accept ra for dev: %s\n", __func__,
1281 				    skb->dev->name);
1282 		goto skip_linkparms;
1283 	}
1284 
1285 #ifdef CONFIG_IPV6_NDISC_NODETYPE
1286 	/* skip link-specific parameters from interior routers */
1287 	if (skb->ndisc_nodetype == NDISC_NODETYPE_NODEFAULT) {
1288 		net_dbg_ratelimited("RA: %s, nodetype is NODEFAULT, dev: %s\n", __func__,
1289 				    skb->dev->name);
1290 		goto skip_linkparms;
1291 	}
1292 #endif
1293 
1294 	if (in6_dev->if_flags & IF_RS_SENT) {
1295 		/*
1296 		 *	flag that an RA was received after an RS was sent
1297 		 *	out on this interface.
1298 		 */
1299 		in6_dev->if_flags |= IF_RA_RCVD;
1300 	}
1301 
1302 	/*
1303 	 * Remember the managed/otherconf flags from most recently
1304 	 * received RA message (RFC 2462) -- yoshfuji
1305 	 */
1306 	old_if_flags = in6_dev->if_flags;
1307 	in6_dev->if_flags = (in6_dev->if_flags & ~(IF_RA_MANAGED |
1308 				IF_RA_OTHERCONF)) |
1309 				(ra_msg->icmph.icmp6_addrconf_managed ?
1310 					IF_RA_MANAGED : 0) |
1311 				(ra_msg->icmph.icmp6_addrconf_other ?
1312 					IF_RA_OTHERCONF : 0);
1313 
1314 	if (old_if_flags != in6_dev->if_flags)
1315 		send_ifinfo_notify = true;
1316 
1317 	if (!READ_ONCE(in6_dev->cnf.accept_ra_defrtr)) {
1318 		net_dbg_ratelimited("RA: %s, defrtr is false for dev: %s\n", __func__,
1319 				    skb->dev->name);
1320 		goto skip_defrtr;
1321 	}
1322 
1323 	lifetime = ntohs(ra_msg->icmph.icmp6_rt_lifetime);
1324 	if (lifetime != 0 &&
1325 	    lifetime < READ_ONCE(in6_dev->cnf.accept_ra_min_lft)) {
1326 		net_dbg_ratelimited("RA: router lifetime (%ds) is too short: %s\n", lifetime,
1327 				    skb->dev->name);
1328 		goto skip_defrtr;
1329 	}
1330 
1331 	/* Do not accept RA with source-addr found on local machine unless
1332 	 * accept_ra_from_local is set to true.
1333 	 */
1334 	net = dev_net(in6_dev->dev);
1335 	if (!READ_ONCE(in6_dev->cnf.accept_ra_from_local) &&
1336 	    ipv6_chk_addr(net, &ipv6_hdr(skb)->saddr, in6_dev->dev, 0)) {
1337 		net_dbg_ratelimited("RA from local address detected on dev: %s: default router ignored\n",
1338 				    skb->dev->name);
1339 		goto skip_defrtr;
1340 	}
1341 
1342 #ifdef CONFIG_IPV6_ROUTER_PREF
1343 	pref = ra_msg->icmph.icmp6_router_pref;
1344 	/* 10b is handled as if it were 00b (medium) */
1345 	if (pref == ICMPV6_ROUTER_PREF_INVALID ||
1346 	    !READ_ONCE(in6_dev->cnf.accept_ra_rtr_pref))
1347 		pref = ICMPV6_ROUTER_PREF_MEDIUM;
1348 #endif
1349 	/* routes added from RAs do not use nexthop objects */
1350 	rt = rt6_get_dflt_router(net, &ipv6_hdr(skb)->saddr, skb->dev);
1351 	if (rt) {
1352 		neigh = ip6_neigh_lookup(&rt->fib6_nh->fib_nh_gw6,
1353 					 rt->fib6_nh->fib_nh_dev, NULL,
1354 					  &ipv6_hdr(skb)->saddr);
1355 		if (!neigh) {
1356 			net_err_ratelimited("RA: %s got default router without neighbour\n",
1357 					    __func__);
1358 			fib6_info_release(rt);
1359 			return reason;
1360 		}
1361 	}
1362 	/* Set default route metric as specified by user */
1363 	defrtr_usr_metric = in6_dev->cnf.ra_defrtr_metric;
1364 	/* delete the route if lifetime is 0 or if metric needs change */
1365 	if (rt && (lifetime == 0 || rt->fib6_metric != defrtr_usr_metric)) {
1366 		ip6_del_rt(net, rt, false);
1367 		rt = NULL;
1368 	}
1369 
1370 	net_dbg_ratelimited("RA: rt: %p  lifetime: %d, metric: %d, for dev: %s\n", rt, lifetime,
1371 			    defrtr_usr_metric, skb->dev->name);
1372 	if (!rt && lifetime) {
1373 		net_dbg_ratelimited("RA: adding default router\n");
1374 
1375 		if (neigh)
1376 			neigh_release(neigh);
1377 
1378 		rt = rt6_add_dflt_router(net, &ipv6_hdr(skb)->saddr,
1379 					 skb->dev, pref, defrtr_usr_metric,
1380 					 lifetime);
1381 		if (!rt) {
1382 			net_err_ratelimited("RA: %s failed to add default route\n", __func__);
1383 			return reason;
1384 		}
1385 
1386 		neigh = ip6_neigh_lookup(&rt->fib6_nh->fib_nh_gw6,
1387 					 rt->fib6_nh->fib_nh_dev, NULL,
1388 					  &ipv6_hdr(skb)->saddr);
1389 		if (!neigh) {
1390 			net_err_ratelimited("RA: %s got default router without neighbour\n",
1391 					    __func__);
1392 			fib6_info_release(rt);
1393 			return reason;
1394 		}
1395 		neigh->flags |= NTF_ROUTER;
1396 	} else if (rt && IPV6_EXTRACT_PREF(rt->fib6_flags) != pref) {
1397 		struct nl_info nlinfo = {
1398 			.nl_net = net,
1399 		};
1400 		rt->fib6_flags = (rt->fib6_flags & ~RTF_PREF_MASK) | RTF_PREF(pref);
1401 		inet6_rt_notify(RTM_NEWROUTE, rt, &nlinfo, NLM_F_REPLACE);
1402 	}
1403 
1404 	if (rt) {
1405 		table = rt->fib6_table;
1406 		spin_lock_bh(&table->tb6_lock);
1407 
1408 		fib6_set_expires(rt, jiffies + (HZ * lifetime));
1409 		fib6_add_gc_list(rt);
1410 
1411 		spin_unlock_bh(&table->tb6_lock);
1412 	}
1413 	if (READ_ONCE(in6_dev->cnf.accept_ra_min_hop_limit) < 256 &&
1414 	    ra_msg->icmph.icmp6_hop_limit) {
1415 		if (READ_ONCE(in6_dev->cnf.accept_ra_min_hop_limit) <=
1416 		    ra_msg->icmph.icmp6_hop_limit) {
1417 			WRITE_ONCE(in6_dev->cnf.hop_limit,
1418 				   ra_msg->icmph.icmp6_hop_limit);
1419 			fib6_metric_set(rt, RTAX_HOPLIMIT,
1420 					ra_msg->icmph.icmp6_hop_limit);
1421 		} else {
1422 			net_dbg_ratelimited("RA: Got route advertisement with lower hop_limit than minimum\n");
1423 		}
1424 	}
1425 
1426 skip_defrtr:
1427 
1428 	/*
1429 	 *	Update Reachable Time and Retrans Timer
1430 	 */
1431 
1432 	if (in6_dev->nd_parms) {
1433 		unsigned long rtime = ntohl(ra_msg->retrans_timer);
1434 
1435 		if (rtime && rtime/1000 < MAX_SCHEDULE_TIMEOUT/HZ) {
1436 			rtime = (rtime*HZ)/1000;
1437 			if (rtime < HZ/100)
1438 				rtime = HZ/100;
1439 			NEIGH_VAR_SET(in6_dev->nd_parms, RETRANS_TIME, rtime);
1440 			in6_dev->tstamp = jiffies;
1441 			send_ifinfo_notify = true;
1442 		}
1443 
1444 		rtime = ntohl(ra_msg->reachable_time);
1445 		if (rtime && rtime/1000 < MAX_SCHEDULE_TIMEOUT/(3*HZ)) {
1446 			rtime = (rtime*HZ)/1000;
1447 
1448 			if (rtime < HZ/10)
1449 				rtime = HZ/10;
1450 
1451 			if (rtime != NEIGH_VAR(in6_dev->nd_parms, BASE_REACHABLE_TIME)) {
1452 				NEIGH_VAR_SET(in6_dev->nd_parms,
1453 					      BASE_REACHABLE_TIME, rtime);
1454 				NEIGH_VAR_SET(in6_dev->nd_parms,
1455 					      GC_STALETIME, 3 * rtime);
1456 				neigh_set_reach_time(in6_dev->nd_parms);
1457 				in6_dev->tstamp = jiffies;
1458 				send_ifinfo_notify = true;
1459 			}
1460 		}
1461 	}
1462 
1463 skip_linkparms:
1464 
1465 	/*
1466 	 *	Process options.
1467 	 */
1468 
1469 	if (!neigh)
1470 		neigh = __neigh_lookup(&nd_tbl, &ipv6_hdr(skb)->saddr,
1471 				       skb->dev, 1);
1472 	if (neigh) {
1473 		u8 *lladdr = NULL;
1474 		if (ndopts.nd_opts_src_lladdr) {
1475 			lladdr = ndisc_opt_addr_data(ndopts.nd_opts_src_lladdr,
1476 						     skb->dev);
1477 			if (!lladdr) {
1478 				net_dbg_ratelimited("RA: invalid link-layer address length\n");
1479 				goto out;
1480 			}
1481 		}
1482 		ndisc_update(skb->dev, neigh, lladdr, NUD_STALE,
1483 			     NEIGH_UPDATE_F_WEAK_OVERRIDE|
1484 			     NEIGH_UPDATE_F_OVERRIDE|
1485 			     NEIGH_UPDATE_F_OVERRIDE_ISROUTER|
1486 			     NEIGH_UPDATE_F_ISROUTER,
1487 			     NDISC_ROUTER_ADVERTISEMENT, &ndopts);
1488 		reason = SKB_CONSUMED;
1489 	}
1490 
1491 	if (!ipv6_accept_ra(in6_dev)) {
1492 		net_dbg_ratelimited("RA: %s, accept_ra is false for dev: %s\n", __func__,
1493 				    skb->dev->name);
1494 		goto out;
1495 	}
1496 
1497 #ifdef CONFIG_IPV6_ROUTE_INFO
1498 	if (!READ_ONCE(in6_dev->cnf.accept_ra_from_local) &&
1499 	    ipv6_chk_addr(dev_net(in6_dev->dev), &ipv6_hdr(skb)->saddr,
1500 			  in6_dev->dev, 0)) {
1501 		net_dbg_ratelimited("RA from local address detected on dev: %s: router info ignored.\n",
1502 				    skb->dev->name);
1503 		goto skip_routeinfo;
1504 	}
1505 
1506 	if (READ_ONCE(in6_dev->cnf.accept_ra_rtr_pref) && ndopts.nd_opts_ri) {
1507 		struct nd_opt_hdr *p;
1508 		for (p = ndopts.nd_opts_ri;
1509 		     p;
1510 		     p = ndisc_next_option(p, ndopts.nd_opts_ri_end)) {
1511 			struct route_info *ri = (struct route_info *)p;
1512 #ifdef CONFIG_IPV6_NDISC_NODETYPE
1513 			if (skb->ndisc_nodetype == NDISC_NODETYPE_NODEFAULT &&
1514 			    ri->prefix_len == 0)
1515 				continue;
1516 #endif
1517 			if (ri->prefix_len == 0 &&
1518 			    !READ_ONCE(in6_dev->cnf.accept_ra_defrtr))
1519 				continue;
1520 			if (ri->lifetime != 0 &&
1521 			    ntohl(ri->lifetime) < READ_ONCE(in6_dev->cnf.accept_ra_min_lft))
1522 				continue;
1523 			if (ri->prefix_len < READ_ONCE(in6_dev->cnf.accept_ra_rt_info_min_plen))
1524 				continue;
1525 			if (ri->prefix_len > READ_ONCE(in6_dev->cnf.accept_ra_rt_info_max_plen))
1526 				continue;
1527 			rt6_route_rcv(skb->dev, (u8 *)p, (p->nd_opt_len) << 3,
1528 				      &ipv6_hdr(skb)->saddr);
1529 		}
1530 	}
1531 
1532 skip_routeinfo:
1533 #endif
1534 
1535 #ifdef CONFIG_IPV6_NDISC_NODETYPE
1536 	/* skip link-specific ndopts from interior routers */
1537 	if (skb->ndisc_nodetype == NDISC_NODETYPE_NODEFAULT) {
1538 		net_dbg_ratelimited("RA: %s, nodetype is NODEFAULT (interior routes), dev: %s\n",
1539 				    __func__, skb->dev->name);
1540 		goto out;
1541 	}
1542 #endif
1543 
1544 	if (READ_ONCE(in6_dev->cnf.accept_ra_pinfo) && ndopts.nd_opts_pi) {
1545 		struct nd_opt_hdr *p;
1546 		for (p = ndopts.nd_opts_pi;
1547 		     p;
1548 		     p = ndisc_next_option(p, ndopts.nd_opts_pi_end)) {
1549 			addrconf_prefix_rcv(skb->dev, (u8 *)p,
1550 					    (p->nd_opt_len) << 3,
1551 					    ndopts.nd_opts_src_lladdr != NULL);
1552 		}
1553 	}
1554 
1555 	if (ndopts.nd_opts_mtu && READ_ONCE(in6_dev->cnf.accept_ra_mtu)) {
1556 		__be32 n;
1557 		u32 mtu;
1558 
1559 		memcpy(&n, ((u8 *)(ndopts.nd_opts_mtu+1))+2, sizeof(mtu));
1560 		mtu = ntohl(n);
1561 
1562 		if (READ_ONCE(in6_dev->ra_mtu) != mtu) {
1563 			WRITE_ONCE(in6_dev->ra_mtu, mtu);
1564 			send_ifinfo_notify = true;
1565 		}
1566 
1567 		if (mtu < IPV6_MIN_MTU || mtu > skb->dev->mtu) {
1568 			net_dbg_ratelimited("RA: invalid mtu: %d\n", mtu);
1569 		} else if (READ_ONCE(in6_dev->cnf.mtu6) != mtu) {
1570 			WRITE_ONCE(in6_dev->cnf.mtu6, mtu);
1571 			fib6_metric_set(rt, RTAX_MTU, mtu);
1572 			rt6_mtu_change(skb->dev, mtu);
1573 		}
1574 	}
1575 
1576 	if (ndopts.nd_useropts) {
1577 		struct nd_opt_hdr *p;
1578 		for (p = ndopts.nd_useropts;
1579 		     p;
1580 		     p = ndisc_next_useropt(skb->dev, p,
1581 					    ndopts.nd_useropts_end)) {
1582 			ndisc_ra_useropt(skb, p);
1583 		}
1584 	}
1585 
1586 	if (ndopts.nd_opts_tgt_lladdr || ndopts.nd_opts_rh) {
1587 		net_dbg_ratelimited("RA: invalid RA options\n");
1588 	}
1589 out:
1590 	/* Send a notify if RA changed managed/otherconf flags or
1591 	 * timer settings or ra_mtu value
1592 	 */
1593 	if (send_ifinfo_notify)
1594 		inet6_ifinfo_notify(RTM_NEWLINK, in6_dev);
1595 
1596 	fib6_info_release(rt);
1597 	if (neigh)
1598 		neigh_release(neigh);
1599 	return reason;
1600 }
1601 
1602 static enum skb_drop_reason ndisc_redirect_rcv(struct sk_buff *skb)
1603 {
1604 	struct rd_msg *msg = (struct rd_msg *)skb_transport_header(skb);
1605 	u32 ndoptlen = skb_tail_pointer(skb) - (skb_transport_header(skb) +
1606 				    offsetof(struct rd_msg, opt));
1607 	struct ndisc_options ndopts;
1608 	SKB_DR(reason);
1609 	u8 *hdr;
1610 
1611 #ifdef CONFIG_IPV6_NDISC_NODETYPE
1612 	switch (skb->ndisc_nodetype) {
1613 	case NDISC_NODETYPE_HOST:
1614 	case NDISC_NODETYPE_NODEFAULT:
1615 		net_dbg_ratelimited("Redirect: from host or unauthorized router\n");
1616 		return reason;
1617 	}
1618 #endif
1619 
1620 	if (!(ipv6_addr_type(&ipv6_hdr(skb)->saddr) & IPV6_ADDR_LINKLOCAL)) {
1621 		net_dbg_ratelimited("Redirect: source address is not link-local\n");
1622 		return reason;
1623 	}
1624 
1625 	if (!ndisc_parse_options(skb->dev, msg->opt, ndoptlen, &ndopts))
1626 		return SKB_DROP_REASON_IPV6_NDISC_BAD_OPTIONS;
1627 
1628 	if (!ndopts.nd_opts_rh) {
1629 		ip6_redirect_no_header(skb, dev_net(skb->dev),
1630 					skb->dev->ifindex);
1631 		return reason;
1632 	}
1633 
1634 	hdr = (u8 *)ndopts.nd_opts_rh;
1635 	hdr += 8;
1636 	if (!pskb_pull(skb, hdr - skb_transport_header(skb)))
1637 		return SKB_DROP_REASON_PKT_TOO_SMALL;
1638 
1639 	return icmpv6_notify(skb, NDISC_REDIRECT, 0, 0);
1640 }
1641 
1642 static void ndisc_fill_redirect_hdr_option(struct sk_buff *skb,
1643 					   struct sk_buff *orig_skb,
1644 					   int rd_len)
1645 {
1646 	u8 *opt = skb_put(skb, rd_len);
1647 
1648 	memset(opt, 0, 8);
1649 	*(opt++) = ND_OPT_REDIRECT_HDR;
1650 	*(opt++) = (rd_len >> 3);
1651 	opt += 6;
1652 
1653 	skb_copy_bits(orig_skb, skb_network_offset(orig_skb), opt,
1654 		      rd_len - 8);
1655 }
1656 
1657 void ndisc_send_redirect(struct sk_buff *skb, const struct in6_addr *target)
1658 {
1659 	struct net_device *dev = skb->dev;
1660 	struct net *net = dev_net_rcu(dev);
1661 	struct sock *sk = net->ipv6.ndisc_sk;
1662 	int optlen = 0;
1663 	struct inet_peer *peer;
1664 	struct sk_buff *buff;
1665 	struct rd_msg *msg;
1666 	struct in6_addr saddr_buf;
1667 	struct rt6_info *rt;
1668 	struct dst_entry *dst;
1669 	struct flowi6 fl6;
1670 	int rd_len;
1671 	u8 ha_buf[MAX_ADDR_LEN], *ha = NULL,
1672 	   ops_data_buf[NDISC_OPS_REDIRECT_DATA_SPACE], *ops_data = NULL;
1673 	bool ret;
1674 
1675 	if (netif_is_l3_master(dev)) {
1676 		dev = dev_get_by_index_rcu(net, IPCB(skb)->iif);
1677 		if (!dev)
1678 			return;
1679 	}
1680 
1681 	if (ipv6_get_lladdr(dev, &saddr_buf, IFA_F_TENTATIVE)) {
1682 		net_dbg_ratelimited("Redirect: no link-local address on %s\n", dev->name);
1683 		return;
1684 	}
1685 
1686 	if (!ipv6_addr_equal(&ipv6_hdr(skb)->daddr, target) &&
1687 	    ipv6_addr_type(target) != (IPV6_ADDR_UNICAST|IPV6_ADDR_LINKLOCAL)) {
1688 		net_dbg_ratelimited("Redirect: target address is not link-local unicast\n");
1689 		return;
1690 	}
1691 
1692 	icmpv6_flow_init(sk, &fl6, NDISC_REDIRECT,
1693 			 &saddr_buf, &ipv6_hdr(skb)->saddr, dev->ifindex);
1694 
1695 	dst = ip6_route_output(net, NULL, &fl6);
1696 	if (dst->error) {
1697 		dst_release(dst);
1698 		return;
1699 	}
1700 	dst = xfrm_lookup(net, dst, flowi6_to_flowi(&fl6), NULL, 0);
1701 	if (IS_ERR(dst))
1702 		return;
1703 
1704 	rt = dst_rt6_info(dst);
1705 
1706 	if (rt->rt6i_flags & RTF_GATEWAY) {
1707 		net_dbg_ratelimited("Redirect: destination is not a neighbour\n");
1708 		goto release;
1709 	}
1710 
1711 	peer = inet_getpeer_v6(net->ipv6.peers, &ipv6_hdr(skb)->saddr);
1712 	ret = inet_peer_xrlim_allow(peer, 1*HZ);
1713 
1714 	if (!ret)
1715 		goto release;
1716 
1717 	if (dev->addr_len) {
1718 		struct neighbour *neigh = dst_neigh_lookup(skb_dst(skb), target);
1719 		if (!neigh) {
1720 			net_dbg_ratelimited("Redirect: no neigh for target address\n");
1721 			goto release;
1722 		}
1723 
1724 		read_lock_bh(&neigh->lock);
1725 		if (neigh->nud_state & NUD_VALID) {
1726 			memcpy(ha_buf, neigh->ha, dev->addr_len);
1727 			read_unlock_bh(&neigh->lock);
1728 			ha = ha_buf;
1729 			optlen += ndisc_redirect_opt_addr_space(dev, neigh,
1730 								ops_data_buf,
1731 								&ops_data);
1732 		} else
1733 			read_unlock_bh(&neigh->lock);
1734 
1735 		neigh_release(neigh);
1736 	}
1737 
1738 	rd_len = min_t(unsigned int,
1739 		       IPV6_MIN_MTU - sizeof(struct ipv6hdr) - sizeof(*msg) - optlen,
1740 		       skb->len + 8);
1741 	rd_len &= ~0x7;
1742 	optlen += rd_len;
1743 
1744 	buff = ndisc_alloc_skb(dev, sizeof(*msg) + optlen);
1745 	if (!buff)
1746 		goto release;
1747 
1748 	msg = skb_put(buff, sizeof(*msg));
1749 	*msg = (struct rd_msg) {
1750 		.icmph = {
1751 			.icmp6_type = NDISC_REDIRECT,
1752 		},
1753 		.target = *target,
1754 		.dest = ipv6_hdr(skb)->daddr,
1755 	};
1756 
1757 	/*
1758 	 *	include target_address option
1759 	 */
1760 
1761 	if (ha)
1762 		ndisc_fill_redirect_addr_option(buff, ha, ops_data);
1763 
1764 	/*
1765 	 *	build redirect option and copy skb over to the new packet.
1766 	 */
1767 
1768 	if (rd_len)
1769 		ndisc_fill_redirect_hdr_option(buff, skb, rd_len);
1770 
1771 	skb_dst_set(buff, dst);
1772 	ndisc_send_skb(buff, &ipv6_hdr(skb)->saddr, &saddr_buf);
1773 	return;
1774 
1775 release:
1776 	dst_release(dst);
1777 }
1778 
1779 static void pndisc_redo(struct sk_buff *skb)
1780 {
1781 	enum skb_drop_reason reason = ndisc_recv_ns(skb);
1782 
1783 	kfree_skb_reason(skb, reason);
1784 }
1785 
1786 static int ndisc_is_multicast(const void *pkey)
1787 {
1788 	return ipv6_addr_is_multicast((struct in6_addr *)pkey);
1789 }
1790 
1791 static bool ndisc_suppress_frag_ndisc(struct sk_buff *skb)
1792 {
1793 	struct inet6_dev *idev = __in6_dev_get(skb->dev);
1794 
1795 	if (!idev)
1796 		return true;
1797 	if (IP6CB(skb)->flags & IP6SKB_FRAGMENTED &&
1798 	    READ_ONCE(idev->cnf.suppress_frag_ndisc)) {
1799 		net_warn_ratelimited("Received fragmented ndisc packet. Carefully consider disabling suppress_frag_ndisc.\n");
1800 		return true;
1801 	}
1802 	return false;
1803 }
1804 
1805 enum skb_drop_reason ndisc_rcv(struct sk_buff *skb)
1806 {
1807 	struct nd_msg *msg;
1808 	SKB_DR(reason);
1809 
1810 	if (ndisc_suppress_frag_ndisc(skb))
1811 		return SKB_DROP_REASON_IPV6_NDISC_FRAG;
1812 
1813 	if (skb_linearize(skb))
1814 		return SKB_DROP_REASON_NOMEM;
1815 
1816 	msg = (struct nd_msg *)skb_transport_header(skb);
1817 
1818 	__skb_push(skb, skb->data - skb_transport_header(skb));
1819 
1820 	if (ipv6_hdr(skb)->hop_limit != 255) {
1821 		net_dbg_ratelimited("NDISC: invalid hop-limit: %d\n", ipv6_hdr(skb)->hop_limit);
1822 		return SKB_DROP_REASON_IPV6_NDISC_HOP_LIMIT;
1823 	}
1824 
1825 	if (msg->icmph.icmp6_code != 0) {
1826 		net_dbg_ratelimited("NDISC: invalid ICMPv6 code: %d\n", msg->icmph.icmp6_code);
1827 		return SKB_DROP_REASON_IPV6_NDISC_BAD_CODE;
1828 	}
1829 
1830 	switch (msg->icmph.icmp6_type) {
1831 	case NDISC_NEIGHBOUR_SOLICITATION:
1832 		memset(NEIGH_CB(skb), 0, sizeof(struct neighbour_cb));
1833 		reason = ndisc_recv_ns(skb);
1834 		break;
1835 
1836 	case NDISC_NEIGHBOUR_ADVERTISEMENT:
1837 		reason = ndisc_recv_na(skb);
1838 		break;
1839 
1840 	case NDISC_ROUTER_SOLICITATION:
1841 		reason = ndisc_recv_rs(skb);
1842 		break;
1843 
1844 	case NDISC_ROUTER_ADVERTISEMENT:
1845 		reason = ndisc_router_discovery(skb);
1846 		break;
1847 
1848 	case NDISC_REDIRECT:
1849 		reason = ndisc_redirect_rcv(skb);
1850 		break;
1851 	}
1852 
1853 	return reason;
1854 }
1855 
1856 static int ndisc_netdev_event(struct notifier_block *this, unsigned long event, void *ptr)
1857 {
1858 	struct net_device *dev = netdev_notifier_info_to_dev(ptr);
1859 	struct netdev_notifier_change_info *change_info;
1860 	struct net *net = dev_net(dev);
1861 	struct inet6_dev *idev;
1862 	bool evict_nocarrier;
1863 
1864 	switch (event) {
1865 	case NETDEV_CHANGEADDR:
1866 		neigh_changeaddr(&nd_tbl, dev);
1867 		fib6_run_gc(0, net, false);
1868 		fallthrough;
1869 	case NETDEV_UP:
1870 		idev = in6_dev_get(dev);
1871 		if (!idev)
1872 			break;
1873 		if (READ_ONCE(idev->cnf.ndisc_notify) ||
1874 		    READ_ONCE(net->ipv6.devconf_all->ndisc_notify))
1875 			ndisc_send_unsol_na(dev);
1876 		in6_dev_put(idev);
1877 		break;
1878 	case NETDEV_CHANGE:
1879 		idev = in6_dev_get(dev);
1880 		if (!idev)
1881 			evict_nocarrier = true;
1882 		else {
1883 			evict_nocarrier = READ_ONCE(idev->cnf.ndisc_evict_nocarrier) &&
1884 					  READ_ONCE(net->ipv6.devconf_all->ndisc_evict_nocarrier);
1885 			in6_dev_put(idev);
1886 		}
1887 
1888 		change_info = ptr;
1889 		if (change_info->flags_changed & IFF_NOARP)
1890 			neigh_changeaddr(&nd_tbl, dev);
1891 		if (evict_nocarrier && !netif_carrier_ok(dev))
1892 			neigh_carrier_down(&nd_tbl, dev);
1893 		break;
1894 	case NETDEV_DOWN:
1895 		neigh_ifdown(&nd_tbl, dev);
1896 		fib6_run_gc(0, net, false);
1897 		break;
1898 	case NETDEV_NOTIFY_PEERS:
1899 		ndisc_send_unsol_na(dev);
1900 		break;
1901 	default:
1902 		break;
1903 	}
1904 
1905 	return NOTIFY_DONE;
1906 }
1907 
1908 static struct notifier_block ndisc_netdev_notifier = {
1909 	.notifier_call = ndisc_netdev_event,
1910 	.priority = ADDRCONF_NOTIFY_PRIORITY - 5,
1911 };
1912 
1913 #ifdef CONFIG_SYSCTL
1914 static void ndisc_warn_deprecated_sysctl(const struct ctl_table *ctl,
1915 					 const char *func, const char *dev_name)
1916 {
1917 	static char warncomm[TASK_COMM_LEN];
1918 	static int warned;
1919 	if (strcmp(warncomm, current->comm) && warned < 5) {
1920 		strscpy(warncomm, current->comm);
1921 		pr_warn("process `%s' is using deprecated sysctl (%s) net.ipv6.neigh.%s.%s - use net.ipv6.neigh.%s.%s_ms instead\n",
1922 			warncomm, func,
1923 			dev_name, ctl->procname,
1924 			dev_name, ctl->procname);
1925 		warned++;
1926 	}
1927 }
1928 
1929 int ndisc_ifinfo_sysctl_change(const struct ctl_table *ctl, int write, void *buffer,
1930 		size_t *lenp, loff_t *ppos)
1931 {
1932 	struct net_device *dev = ctl->extra1;
1933 	struct inet6_dev *idev;
1934 	int ret;
1935 
1936 	if ((strcmp(ctl->procname, "retrans_time") == 0) ||
1937 	    (strcmp(ctl->procname, "base_reachable_time") == 0))
1938 		ndisc_warn_deprecated_sysctl(ctl, "syscall", dev ? dev->name : "default");
1939 
1940 	if (strcmp(ctl->procname, "retrans_time") == 0)
1941 		ret = neigh_proc_dointvec(ctl, write, buffer, lenp, ppos);
1942 
1943 	else if (strcmp(ctl->procname, "base_reachable_time") == 0)
1944 		ret = neigh_proc_dointvec_jiffies(ctl, write,
1945 						  buffer, lenp, ppos);
1946 
1947 	else if ((strcmp(ctl->procname, "retrans_time_ms") == 0) ||
1948 		 (strcmp(ctl->procname, "base_reachable_time_ms") == 0))
1949 		ret = neigh_proc_dointvec_ms_jiffies(ctl, write,
1950 						     buffer, lenp, ppos);
1951 	else
1952 		ret = -1;
1953 
1954 	if (write && ret == 0 && dev && (idev = in6_dev_get(dev)) != NULL) {
1955 		if (ctl->data == NEIGH_VAR_PTR(idev->nd_parms, BASE_REACHABLE_TIME))
1956 			neigh_set_reach_time(idev->nd_parms);
1957 
1958 		WRITE_ONCE(idev->tstamp, jiffies);
1959 		inet6_ifinfo_notify(RTM_NEWLINK, idev);
1960 		in6_dev_put(idev);
1961 	}
1962 	return ret;
1963 }
1964 
1965 
1966 #endif
1967 
1968 static int __net_init ndisc_net_init(struct net *net)
1969 {
1970 	struct ipv6_pinfo *np;
1971 	struct sock *sk;
1972 	int err;
1973 
1974 	err = inet_ctl_sock_create(&sk, PF_INET6,
1975 				   SOCK_RAW, IPPROTO_ICMPV6, net);
1976 	if (err < 0) {
1977 		net_err_ratelimited("NDISC: Failed to initialize the control socket (err %d)\n",
1978 				    err);
1979 		return err;
1980 	}
1981 
1982 	net->ipv6.ndisc_sk = sk;
1983 
1984 	np = inet6_sk(sk);
1985 	np->hop_limit = 255;
1986 	/* Do not loopback ndisc messages */
1987 	inet6_clear_bit(MC6_LOOP, sk);
1988 
1989 	return 0;
1990 }
1991 
1992 static void __net_exit ndisc_net_exit(struct net *net)
1993 {
1994 	inet_ctl_sock_destroy(net->ipv6.ndisc_sk);
1995 }
1996 
1997 static struct pernet_operations ndisc_net_ops = {
1998 	.init = ndisc_net_init,
1999 	.exit = ndisc_net_exit,
2000 };
2001 
2002 int __init ndisc_init(void)
2003 {
2004 	int err;
2005 
2006 	err = register_pernet_subsys(&ndisc_net_ops);
2007 	if (err)
2008 		return err;
2009 	/*
2010 	 * Initialize the neighbour table
2011 	 */
2012 	neigh_table_init(NEIGH_ND_TABLE, &nd_tbl);
2013 
2014 #ifdef CONFIG_SYSCTL
2015 	err = neigh_sysctl_register(NULL, &nd_tbl.parms,
2016 				    ndisc_ifinfo_sysctl_change);
2017 	if (err)
2018 		goto out_unregister_pernet;
2019 out:
2020 #endif
2021 	return err;
2022 
2023 #ifdef CONFIG_SYSCTL
2024 out_unregister_pernet:
2025 	unregister_pernet_subsys(&ndisc_net_ops);
2026 	goto out;
2027 #endif
2028 }
2029 
2030 int __init ndisc_late_init(void)
2031 {
2032 	return register_netdevice_notifier(&ndisc_netdev_notifier);
2033 }
2034 
2035 void ndisc_late_cleanup(void)
2036 {
2037 	unregister_netdevice_notifier(&ndisc_netdev_notifier);
2038 }
2039 
2040 void ndisc_cleanup(void)
2041 {
2042 #ifdef CONFIG_SYSCTL
2043 	neigh_sysctl_unregister(&nd_tbl.parms);
2044 #endif
2045 	neigh_table_clear(NEIGH_ND_TABLE, &nd_tbl);
2046 	unregister_pernet_subsys(&ndisc_net_ops);
2047 }
2048