xref: /linux/net/ipv6/icmp.c (revision 6dfafbd0299a60bfb5d5e277fdf100037c7ded07)
1 // SPDX-License-Identifier: GPL-2.0-or-later
2 /*
3  *	Internet Control Message Protocol (ICMPv6)
4  *	Linux INET6 implementation
5  *
6  *	Authors:
7  *	Pedro Roque		<roque@di.fc.ul.pt>
8  *
9  *	Based on net/ipv4/icmp.c
10  *
11  *	RFC 1885
12  */
13 
14 /*
15  *	Changes:
16  *
17  *	Andi Kleen		:	exception handling
18  *	Andi Kleen			add rate limits. never reply to a icmp.
19  *					add more length checks and other fixes.
20  *	yoshfuji		:	ensure to sent parameter problem for
21  *					fragments.
22  *	YOSHIFUJI Hideaki @USAGI:	added sysctl for icmp rate limit.
23  *	Randy Dunlap and
24  *	YOSHIFUJI Hideaki @USAGI:	Per-interface statistics support
25  *	Kazunori MIYAZAWA @USAGI:       change output process to use ip6_append_data
26  */
27 
28 #define pr_fmt(fmt) "IPv6: " fmt
29 
30 #include <linux/module.h>
31 #include <linux/errno.h>
32 #include <linux/types.h>
33 #include <linux/socket.h>
34 #include <linux/in.h>
35 #include <linux/kernel.h>
36 #include <linux/sockios.h>
37 #include <linux/net.h>
38 #include <linux/skbuff.h>
39 #include <linux/init.h>
40 #include <linux/netfilter.h>
41 #include <linux/slab.h>
42 
43 #ifdef CONFIG_SYSCTL
44 #include <linux/sysctl.h>
45 #endif
46 
47 #include <linux/inet.h>
48 #include <linux/netdevice.h>
49 #include <linux/icmpv6.h>
50 
51 #include <net/ip.h>
52 #include <net/sock.h>
53 
54 #include <net/ipv6.h>
55 #include <net/ip6_checksum.h>
56 #include <net/ping.h>
57 #include <net/protocol.h>
58 #include <net/raw.h>
59 #include <net/rawv6.h>
60 #include <net/seg6.h>
61 #include <net/transp_v6.h>
62 #include <net/ip6_route.h>
63 #include <net/addrconf.h>
64 #include <net/icmp.h>
65 #include <net/xfrm.h>
66 #include <net/inet_common.h>
67 #include <net/dsfield.h>
68 #include <net/l3mdev.h>
69 
70 #include <linux/uaccess.h>
71 
72 static DEFINE_PER_CPU(struct sock *, ipv6_icmp_sk);
73 
74 static int icmpv6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
75 		       u8 type, u8 code, int offset, __be32 info)
76 {
77 	/* icmpv6_notify checks 8 bytes can be pulled, icmp6hdr is 8 bytes */
78 	struct icmp6hdr *icmp6 = (struct icmp6hdr *) (skb->data + offset);
79 	struct net *net = dev_net_rcu(skb->dev);
80 
81 	if (type == ICMPV6_PKT_TOOBIG)
82 		ip6_update_pmtu(skb, net, info, skb->dev->ifindex, 0, sock_net_uid(net, NULL));
83 	else if (type == NDISC_REDIRECT)
84 		ip6_redirect(skb, net, skb->dev->ifindex, 0,
85 			     sock_net_uid(net, NULL));
86 
87 	if (!(type & ICMPV6_INFOMSG_MASK))
88 		if (icmp6->icmp6_type == ICMPV6_ECHO_REQUEST)
89 			ping_err(skb, offset, ntohl(info));
90 
91 	return 0;
92 }
93 
94 static int icmpv6_rcv(struct sk_buff *skb);
95 
96 static const struct inet6_protocol icmpv6_protocol = {
97 	.handler	=	icmpv6_rcv,
98 	.err_handler	=	icmpv6_err,
99 	.flags		=	INET6_PROTO_NOPOLICY|INET6_PROTO_FINAL,
100 };
101 
102 /* Called with BH disabled */
103 static struct sock *icmpv6_xmit_lock(struct net *net)
104 {
105 	struct sock *sk;
106 
107 	sk = this_cpu_read(ipv6_icmp_sk);
108 	if (unlikely(!spin_trylock(&sk->sk_lock.slock))) {
109 		/* This can happen if the output path (f.e. SIT or
110 		 * ip6ip6 tunnel) signals dst_link_failure() for an
111 		 * outgoing ICMP6 packet.
112 		 */
113 		return NULL;
114 	}
115 	sock_net_set(sk, net);
116 	return sk;
117 }
118 
119 static void icmpv6_xmit_unlock(struct sock *sk)
120 {
121 	sock_net_set(sk, &init_net);
122 	spin_unlock(&sk->sk_lock.slock);
123 }
124 
125 /*
126  * Figure out, may we reply to this packet with icmp error.
127  *
128  * We do not reply, if:
129  *	- it was icmp error message.
130  *	- it is truncated, so that it is known, that protocol is ICMPV6
131  *	  (i.e. in the middle of some exthdr)
132  *
133  *	--ANK (980726)
134  */
135 
136 static bool is_ineligible(const struct sk_buff *skb)
137 {
138 	int ptr = (u8 *)(ipv6_hdr(skb) + 1) - skb->data;
139 	int len = skb->len - ptr;
140 	__u8 nexthdr = ipv6_hdr(skb)->nexthdr;
141 	__be16 frag_off;
142 
143 	if (len < 0)
144 		return true;
145 
146 	ptr = ipv6_skip_exthdr(skb, ptr, &nexthdr, &frag_off);
147 	if (ptr < 0)
148 		return false;
149 	if (nexthdr == IPPROTO_ICMPV6) {
150 		u8 _type, *tp;
151 		tp = skb_header_pointer(skb,
152 			ptr+offsetof(struct icmp6hdr, icmp6_type),
153 			sizeof(_type), &_type);
154 
155 		/* Based on RFC 8200, Section 4.5 Fragment Header, return
156 		 * false if this is a fragment packet with no icmp header info.
157 		 */
158 		if (!tp && frag_off != 0)
159 			return false;
160 		else if (!tp || !(*tp & ICMPV6_INFOMSG_MASK))
161 			return true;
162 	}
163 	return false;
164 }
165 
166 static bool icmpv6_mask_allow(struct net *net, int type)
167 {
168 	if (type > ICMPV6_MSG_MAX)
169 		return true;
170 
171 	/* Limit if icmp type is set in ratemask. */
172 	if (!test_bit(type, net->ipv6.sysctl.icmpv6_ratemask))
173 		return true;
174 
175 	return false;
176 }
177 
178 static bool icmpv6_global_allow(struct net *net, int type,
179 				bool *apply_ratelimit)
180 {
181 	if (icmpv6_mask_allow(net, type))
182 		return true;
183 
184 	if (icmp_global_allow(net)) {
185 		*apply_ratelimit = true;
186 		return true;
187 	}
188 	__ICMP_INC_STATS(net, ICMP_MIB_RATELIMITGLOBAL);
189 	return false;
190 }
191 
192 /*
193  * Check the ICMP output rate limit
194  */
195 static bool icmpv6_xrlim_allow(struct sock *sk, u8 type,
196 			       struct flowi6 *fl6, bool apply_ratelimit)
197 {
198 	struct net *net = sock_net(sk);
199 	struct net_device *dev;
200 	struct dst_entry *dst;
201 	bool res = false;
202 
203 	if (!apply_ratelimit)
204 		return true;
205 
206 	/*
207 	 * Look up the output route.
208 	 * XXX: perhaps the expire for routing entries cloned by
209 	 * this lookup should be more aggressive (not longer than timeout).
210 	 */
211 	dst = ip6_route_output(net, sk, fl6);
212 	rcu_read_lock();
213 	dev = dst_dev_rcu(dst);
214 	if (dst->error) {
215 		IP6_INC_STATS(net, ip6_dst_idev(dst),
216 			      IPSTATS_MIB_OUTNOROUTES);
217 	} else if (dev && (dev->flags & IFF_LOOPBACK)) {
218 		res = true;
219 	} else {
220 		struct rt6_info *rt = dst_rt6_info(dst);
221 		int tmo = net->ipv6.sysctl.icmpv6_time;
222 		struct inet_peer *peer;
223 
224 		/* Give more bandwidth to wider prefixes. */
225 		if (rt->rt6i_dst.plen < 128)
226 			tmo >>= ((128 - rt->rt6i_dst.plen)>>5);
227 
228 		peer = inet_getpeer_v6(net->ipv6.peers, &fl6->daddr);
229 		res = inet_peer_xrlim_allow(peer, tmo);
230 	}
231 	rcu_read_unlock();
232 	if (!res)
233 		__ICMP6_INC_STATS(net, NULL, ICMP6_MIB_RATELIMITHOST);
234 	else
235 		icmp_global_consume(net);
236 	dst_release(dst);
237 	return res;
238 }
239 
240 static bool icmpv6_rt_has_prefsrc(struct sock *sk, u8 type,
241 				  struct flowi6 *fl6)
242 {
243 	struct net *net = sock_net(sk);
244 	struct dst_entry *dst;
245 	bool res = false;
246 
247 	dst = ip6_route_output(net, sk, fl6);
248 	if (!dst->error) {
249 		struct rt6_info *rt = dst_rt6_info(dst);
250 		struct in6_addr prefsrc;
251 
252 		rt6_get_prefsrc(rt, &prefsrc);
253 		res = !ipv6_addr_any(&prefsrc);
254 	}
255 	dst_release(dst);
256 	return res;
257 }
258 
259 /*
260  *	an inline helper for the "simple" if statement below
261  *	checks if parameter problem report is caused by an
262  *	unrecognized IPv6 option that has the Option Type
263  *	highest-order two bits set to 10
264  */
265 
266 static bool opt_unrec(struct sk_buff *skb, __u32 offset)
267 {
268 	u8 _optval, *op;
269 
270 	offset += skb_network_offset(skb);
271 	op = skb_header_pointer(skb, offset, sizeof(_optval), &_optval);
272 	if (!op)
273 		return true;
274 	return (*op & 0xC0) == 0x80;
275 }
276 
277 void icmpv6_push_pending_frames(struct sock *sk, struct flowi6 *fl6,
278 				struct icmp6hdr *thdr, int len)
279 {
280 	struct sk_buff *skb;
281 	struct icmp6hdr *icmp6h;
282 
283 	skb = skb_peek(&sk->sk_write_queue);
284 	if (!skb)
285 		return;
286 
287 	icmp6h = icmp6_hdr(skb);
288 	memcpy(icmp6h, thdr, sizeof(struct icmp6hdr));
289 	icmp6h->icmp6_cksum = 0;
290 
291 	if (skb_queue_len(&sk->sk_write_queue) == 1) {
292 		skb->csum = csum_partial(icmp6h,
293 					sizeof(struct icmp6hdr), skb->csum);
294 		icmp6h->icmp6_cksum = csum_ipv6_magic(&fl6->saddr,
295 						      &fl6->daddr,
296 						      len, fl6->flowi6_proto,
297 						      skb->csum);
298 	} else {
299 		__wsum tmp_csum = 0;
300 
301 		skb_queue_walk(&sk->sk_write_queue, skb) {
302 			tmp_csum = csum_add(tmp_csum, skb->csum);
303 		}
304 
305 		tmp_csum = csum_partial(icmp6h,
306 					sizeof(struct icmp6hdr), tmp_csum);
307 		icmp6h->icmp6_cksum = csum_ipv6_magic(&fl6->saddr,
308 						      &fl6->daddr,
309 						      len, fl6->flowi6_proto,
310 						      tmp_csum);
311 	}
312 	ip6_push_pending_frames(sk);
313 }
314 
315 struct icmpv6_msg {
316 	struct sk_buff	*skb;
317 	int		offset;
318 	uint8_t		type;
319 };
320 
321 static int icmpv6_getfrag(void *from, char *to, int offset, int len, int odd, struct sk_buff *skb)
322 {
323 	struct icmpv6_msg *msg = (struct icmpv6_msg *) from;
324 	struct sk_buff *org_skb = msg->skb;
325 	__wsum csum;
326 
327 	csum = skb_copy_and_csum_bits(org_skb, msg->offset + offset,
328 				      to, len);
329 	skb->csum = csum_block_add(skb->csum, csum, odd);
330 	if (!(msg->type & ICMPV6_INFOMSG_MASK))
331 		nf_ct_attach(skb, org_skb);
332 	return 0;
333 }
334 
335 #if IS_ENABLED(CONFIG_IPV6_MIP6)
336 static void mip6_addr_swap(struct sk_buff *skb, const struct inet6_skb_parm *opt)
337 {
338 	struct ipv6hdr *iph = ipv6_hdr(skb);
339 	struct ipv6_destopt_hao *hao;
340 	int off;
341 
342 	if (opt->dsthao) {
343 		off = ipv6_find_tlv(skb, opt->dsthao, IPV6_TLV_HAO);
344 		if (likely(off >= 0)) {
345 			hao = (struct ipv6_destopt_hao *)
346 					(skb_network_header(skb) + off);
347 			swap(iph->saddr, hao->addr);
348 		}
349 	}
350 }
351 #else
352 static inline void mip6_addr_swap(struct sk_buff *skb, const struct inet6_skb_parm *opt) {}
353 #endif
354 
355 static struct dst_entry *icmpv6_route_lookup(struct net *net,
356 					     struct sk_buff *skb,
357 					     struct sock *sk,
358 					     struct flowi6 *fl6)
359 {
360 	struct dst_entry *dst, *dst2;
361 	struct flowi6 fl2;
362 	int err;
363 
364 	err = ip6_dst_lookup(net, sk, &dst, fl6);
365 	if (err)
366 		return ERR_PTR(err);
367 
368 	/*
369 	 * We won't send icmp if the destination is known
370 	 * anycast unless we need to treat anycast as unicast.
371 	 */
372 	if (!READ_ONCE(net->ipv6.sysctl.icmpv6_error_anycast_as_unicast) &&
373 	    ipv6_anycast_destination(dst, &fl6->daddr)) {
374 		net_dbg_ratelimited("icmp6_send: acast source\n");
375 		dst_release(dst);
376 		return ERR_PTR(-EINVAL);
377 	}
378 
379 	/* No need to clone since we're just using its address. */
380 	dst2 = dst;
381 
382 	dst = xfrm_lookup(net, dst, flowi6_to_flowi(fl6), sk, 0);
383 	if (!IS_ERR(dst)) {
384 		if (dst != dst2)
385 			return dst;
386 	} else {
387 		if (PTR_ERR(dst) == -EPERM)
388 			dst = NULL;
389 		else
390 			return dst;
391 	}
392 
393 	err = xfrm_decode_session_reverse(net, skb, flowi6_to_flowi(&fl2), AF_INET6);
394 	if (err)
395 		goto relookup_failed;
396 
397 	err = ip6_dst_lookup(net, sk, &dst2, &fl2);
398 	if (err)
399 		goto relookup_failed;
400 
401 	dst2 = xfrm_lookup(net, dst2, flowi6_to_flowi(&fl2), sk, XFRM_LOOKUP_ICMP);
402 	if (!IS_ERR(dst2)) {
403 		dst_release(dst);
404 		dst = dst2;
405 	} else {
406 		err = PTR_ERR(dst2);
407 		if (err == -EPERM) {
408 			dst_release(dst);
409 			return dst2;
410 		} else
411 			goto relookup_failed;
412 	}
413 
414 relookup_failed:
415 	if (dst)
416 		return dst;
417 	return ERR_PTR(err);
418 }
419 
420 static struct net_device *icmp6_dev(const struct sk_buff *skb)
421 {
422 	struct net_device *dev = skb->dev;
423 
424 	/* for local traffic to local address, skb dev is the loopback
425 	 * device. Check if there is a dst attached to the skb and if so
426 	 * get the real device index. Same is needed for replies to a link
427 	 * local address on a device enslaved to an L3 master device
428 	 */
429 	if (unlikely(dev->ifindex == LOOPBACK_IFINDEX || netif_is_l3_master(skb->dev))) {
430 		const struct rt6_info *rt6 = skb_rt6_info(skb);
431 
432 		/* The destination could be an external IP in Ext Hdr (SRv6, RPL, etc.),
433 		 * and ip6_null_entry could be set to skb if no route is found.
434 		 */
435 		if (rt6 && rt6->rt6i_idev)
436 			dev = rt6->rt6i_idev->dev;
437 	}
438 
439 	return dev;
440 }
441 
442 static int icmp6_iif(const struct sk_buff *skb)
443 {
444 	return icmp6_dev(skb)->ifindex;
445 }
446 
447 struct icmp6_ext_iio_addr6_subobj {
448 	__be16 afi;
449 	__be16 reserved;
450 	struct in6_addr addr6;
451 };
452 
453 static unsigned int icmp6_ext_iio_len(void)
454 {
455 	return sizeof(struct icmp_extobj_hdr) +
456 		/* ifIndex */
457 		sizeof(__be32) +
458 		/* Interface Address Sub-Object */
459 		sizeof(struct icmp6_ext_iio_addr6_subobj) +
460 		/* Interface Name Sub-Object. Length must be a multiple of 4
461 		 * bytes.
462 		 */
463 		ALIGN(sizeof(struct icmp_ext_iio_name_subobj), 4) +
464 		/* MTU */
465 		sizeof(__be32);
466 }
467 
468 static unsigned int icmp6_ext_max_len(u8 ext_objs)
469 {
470 	unsigned int ext_max_len;
471 
472 	ext_max_len = sizeof(struct icmp_ext_hdr);
473 
474 	if (ext_objs & BIT(ICMP_ERR_EXT_IIO_IIF))
475 		ext_max_len += icmp6_ext_iio_len();
476 
477 	return ext_max_len;
478 }
479 
480 static struct in6_addr *icmp6_ext_iio_addr6_find(const struct net_device *dev)
481 {
482 	struct inet6_dev *in6_dev;
483 	struct inet6_ifaddr *ifa;
484 
485 	in6_dev = __in6_dev_get(dev);
486 	if (!in6_dev)
487 		return NULL;
488 
489 	/* It is unclear from RFC 5837 which IP address should be chosen, but
490 	 * it makes sense to choose a global unicast address.
491 	 */
492 	list_for_each_entry_rcu(ifa, &in6_dev->addr_list, if_list) {
493 		if (ifa->flags & (IFA_F_TENTATIVE | IFA_F_DADFAILED))
494 			continue;
495 		if (ipv6_addr_type(&ifa->addr) != IPV6_ADDR_UNICAST ||
496 		    ipv6_addr_src_scope(&ifa->addr) != IPV6_ADDR_SCOPE_GLOBAL)
497 			continue;
498 		return &ifa->addr;
499 	}
500 
501 	return NULL;
502 }
503 
504 static void icmp6_ext_iio_iif_append(struct net *net, struct sk_buff *skb,
505 				     int iif)
506 {
507 	struct icmp_ext_iio_name_subobj *name_subobj;
508 	struct icmp_extobj_hdr *objh;
509 	struct net_device *dev;
510 	struct in6_addr *addr6;
511 	__be32 data;
512 
513 	if (!iif)
514 		return;
515 
516 	/* Add the fields in the order specified by RFC 5837. */
517 	objh = skb_put(skb, sizeof(*objh));
518 	objh->class_num = ICMP_EXT_OBJ_CLASS_IIO;
519 	objh->class_type = ICMP_EXT_CTYPE_IIO_ROLE(ICMP_EXT_CTYPE_IIO_ROLE_IIF);
520 
521 	data = htonl(iif);
522 	skb_put_data(skb, &data, sizeof(__be32));
523 	objh->class_type |= ICMP_EXT_CTYPE_IIO_IFINDEX;
524 
525 	rcu_read_lock();
526 
527 	dev = dev_get_by_index_rcu(net, iif);
528 	if (!dev)
529 		goto out;
530 
531 	addr6 = icmp6_ext_iio_addr6_find(dev);
532 	if (addr6) {
533 		struct icmp6_ext_iio_addr6_subobj *addr6_subobj;
534 
535 		addr6_subobj = skb_put_zero(skb, sizeof(*addr6_subobj));
536 		addr6_subobj->afi = htons(ICMP_AFI_IP6);
537 		addr6_subobj->addr6 = *addr6;
538 		objh->class_type |= ICMP_EXT_CTYPE_IIO_IPADDR;
539 	}
540 
541 	name_subobj = skb_put_zero(skb, ALIGN(sizeof(*name_subobj), 4));
542 	name_subobj->len = ALIGN(sizeof(*name_subobj), 4);
543 	netdev_copy_name(dev, name_subobj->name);
544 	objh->class_type |= ICMP_EXT_CTYPE_IIO_NAME;
545 
546 	data = htonl(READ_ONCE(dev->mtu));
547 	skb_put_data(skb, &data, sizeof(__be32));
548 	objh->class_type |= ICMP_EXT_CTYPE_IIO_MTU;
549 
550 out:
551 	rcu_read_unlock();
552 	objh->length = htons(skb_tail_pointer(skb) - (unsigned char *)objh);
553 }
554 
555 static void icmp6_ext_objs_append(struct net *net, struct sk_buff *skb,
556 				  u8 ext_objs, int iif)
557 {
558 	if (ext_objs & BIT(ICMP_ERR_EXT_IIO_IIF))
559 		icmp6_ext_iio_iif_append(net, skb, iif);
560 }
561 
562 static struct sk_buff *
563 icmp6_ext_append(struct net *net, struct sk_buff *skb_in,
564 		 struct icmp6hdr *icmp6h, unsigned int room, int iif)
565 {
566 	unsigned int payload_len, ext_max_len, ext_len;
567 	struct icmp_ext_hdr *ext_hdr;
568 	struct sk_buff *skb;
569 	u8 ext_objs;
570 	int nhoff;
571 
572 	switch (icmp6h->icmp6_type) {
573 	case ICMPV6_DEST_UNREACH:
574 	case ICMPV6_TIME_EXCEED:
575 		break;
576 	default:
577 		return NULL;
578 	}
579 
580 	/* Do not overwrite existing extensions. This can happen when we
581 	 * receive an ICMPv4 message with extensions from a tunnel and
582 	 * translate it to an ICMPv6 message towards an IPv6 host in the
583 	 * overlay network.
584 	 */
585 	if (icmp6h->icmp6_datagram_len)
586 		return NULL;
587 
588 	ext_objs = READ_ONCE(net->ipv6.sysctl.icmpv6_errors_extension_mask);
589 	if (!ext_objs)
590 		return NULL;
591 
592 	ext_max_len = icmp6_ext_max_len(ext_objs);
593 	if (ICMP_EXT_ORIG_DGRAM_MIN_LEN + ext_max_len > room)
594 		return NULL;
595 
596 	skb = skb_clone(skb_in, GFP_ATOMIC);
597 	if (!skb)
598 		return NULL;
599 
600 	nhoff = skb_network_offset(skb);
601 	payload_len = min(skb->len - nhoff, ICMP_EXT_ORIG_DGRAM_MIN_LEN);
602 
603 	if (!pskb_network_may_pull(skb, payload_len))
604 		goto free_skb;
605 
606 	if (pskb_trim(skb, nhoff + ICMP_EXT_ORIG_DGRAM_MIN_LEN) ||
607 	    __skb_put_padto(skb, nhoff + ICMP_EXT_ORIG_DGRAM_MIN_LEN, false))
608 		goto free_skb;
609 
610 	if (pskb_expand_head(skb, 0, ext_max_len, GFP_ATOMIC))
611 		goto free_skb;
612 
613 	ext_hdr = skb_put_zero(skb, sizeof(*ext_hdr));
614 	ext_hdr->version = ICMP_EXT_VERSION_2;
615 
616 	icmp6_ext_objs_append(net, skb, ext_objs, iif);
617 
618 	/* Do not send an empty extension structure. */
619 	ext_len = skb_tail_pointer(skb) - (unsigned char *)ext_hdr;
620 	if (ext_len == sizeof(*ext_hdr))
621 		goto free_skb;
622 
623 	ext_hdr->checksum = ip_compute_csum(ext_hdr, ext_len);
624 	/* The length of the original datagram in 64-bit words (RFC 4884). */
625 	icmp6h->icmp6_datagram_len = ICMP_EXT_ORIG_DGRAM_MIN_LEN / sizeof(u64);
626 
627 	return skb;
628 
629 free_skb:
630 	consume_skb(skb);
631 	return NULL;
632 }
633 
634 /*
635  *	Send an ICMP message in response to a packet in error
636  */
637 void icmp6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info,
638 		const struct in6_addr *force_saddr,
639 		const struct inet6_skb_parm *parm)
640 {
641 	struct inet6_dev *idev = NULL;
642 	struct ipv6hdr *hdr = ipv6_hdr(skb);
643 	struct sock *sk;
644 	struct net *net;
645 	struct ipv6_pinfo *np;
646 	const struct in6_addr *saddr = NULL;
647 	bool apply_ratelimit = false;
648 	struct sk_buff *ext_skb;
649 	struct dst_entry *dst;
650 	unsigned int room;
651 	struct icmp6hdr tmp_hdr;
652 	struct flowi6 fl6;
653 	struct icmpv6_msg msg;
654 	struct ipcm6_cookie ipc6;
655 	int iif = 0;
656 	int addr_type = 0;
657 	int len;
658 	u32 mark;
659 
660 	if ((u8 *)hdr < skb->head ||
661 	    (skb_network_header(skb) + sizeof(*hdr)) > skb_tail_pointer(skb))
662 		return;
663 
664 	if (!skb->dev)
665 		return;
666 
667 	rcu_read_lock();
668 
669 	net = dev_net_rcu(skb->dev);
670 	mark = IP6_REPLY_MARK(net, skb->mark);
671 	/*
672 	 *	Make sure we respect the rules
673 	 *	i.e. RFC 1885 2.4(e)
674 	 *	Rule (e.1) is enforced by not using icmp6_send
675 	 *	in any code that processes icmp errors.
676 	 */
677 	addr_type = ipv6_addr_type(&hdr->daddr);
678 
679 	if (ipv6_chk_addr(net, &hdr->daddr, skb->dev, 0) ||
680 	    ipv6_chk_acast_addr_src(net, skb->dev, &hdr->daddr))
681 		saddr = &hdr->daddr;
682 
683 	/*
684 	 *	Dest addr check
685 	 */
686 
687 	if (addr_type & IPV6_ADDR_MULTICAST || skb->pkt_type != PACKET_HOST) {
688 		if (type != ICMPV6_PKT_TOOBIG &&
689 		    !(type == ICMPV6_PARAMPROB &&
690 		      code == ICMPV6_UNK_OPTION &&
691 		      (opt_unrec(skb, info))))
692 			goto out;
693 
694 		saddr = NULL;
695 	}
696 
697 	addr_type = ipv6_addr_type(&hdr->saddr);
698 
699 	/*
700 	 *	Source addr check
701 	 */
702 
703 	if (__ipv6_addr_needs_scope_id(addr_type)) {
704 		iif = icmp6_iif(skb);
705 	} else {
706 		/*
707 		 * The source device is used for looking up which routing table
708 		 * to use for sending an ICMP error.
709 		 */
710 		iif = l3mdev_master_ifindex(skb->dev);
711 	}
712 
713 	/*
714 	 *	Must not send error if the source does not uniquely
715 	 *	identify a single node (RFC2463 Section 2.4).
716 	 *	We check unspecified / multicast addresses here,
717 	 *	and anycast addresses will be checked later.
718 	 */
719 	if ((addr_type == IPV6_ADDR_ANY) || (addr_type & IPV6_ADDR_MULTICAST)) {
720 		net_dbg_ratelimited("icmp6_send: addr_any/mcast source [%pI6c > %pI6c]\n",
721 				    &hdr->saddr, &hdr->daddr);
722 		goto out;
723 	}
724 
725 	/*
726 	 *	Never answer to a ICMP packet.
727 	 */
728 	if (is_ineligible(skb)) {
729 		net_dbg_ratelimited("icmp6_send: no reply to icmp error [%pI6c > %pI6c]\n",
730 				    &hdr->saddr, &hdr->daddr);
731 		goto out;
732 	}
733 
734 	/* Needed by both icmpv6_global_allow and icmpv6_xmit_lock */
735 	local_bh_disable();
736 
737 	/* Check global sysctl_icmp_msgs_per_sec ratelimit */
738 	if (!(skb->dev->flags & IFF_LOOPBACK) &&
739 	    !icmpv6_global_allow(net, type, &apply_ratelimit))
740 		goto out_bh_enable;
741 
742 	mip6_addr_swap(skb, parm);
743 
744 	sk = icmpv6_xmit_lock(net);
745 	if (!sk)
746 		goto out_bh_enable;
747 
748 	memset(&fl6, 0, sizeof(fl6));
749 	fl6.flowi6_proto = IPPROTO_ICMPV6;
750 	fl6.daddr = hdr->saddr;
751 	if (force_saddr)
752 		saddr = force_saddr;
753 	if (saddr) {
754 		fl6.saddr = *saddr;
755 	} else if (!icmpv6_rt_has_prefsrc(sk, type, &fl6)) {
756 		/* select a more meaningful saddr from input if */
757 		struct net_device *in_netdev;
758 
759 		in_netdev = dev_get_by_index(net, parm->iif);
760 		if (in_netdev) {
761 			ipv6_dev_get_saddr(net, in_netdev, &fl6.daddr,
762 					   inet6_sk(sk)->srcprefs,
763 					   &fl6.saddr);
764 			dev_put(in_netdev);
765 		}
766 	}
767 	fl6.flowi6_mark = mark;
768 	fl6.flowi6_oif = iif;
769 	fl6.fl6_icmp_type = type;
770 	fl6.fl6_icmp_code = code;
771 	fl6.flowi6_uid = sock_net_uid(net, NULL);
772 	fl6.mp_hash = rt6_multipath_hash(net, &fl6, skb, NULL);
773 	security_skb_classify_flow(skb, flowi6_to_flowi_common(&fl6));
774 
775 	np = inet6_sk(sk);
776 
777 	if (!icmpv6_xrlim_allow(sk, type, &fl6, apply_ratelimit))
778 		goto out_unlock;
779 
780 	tmp_hdr.icmp6_type = type;
781 	tmp_hdr.icmp6_code = code;
782 	tmp_hdr.icmp6_cksum = 0;
783 	tmp_hdr.icmp6_pointer = htonl(info);
784 
785 	if (!fl6.flowi6_oif && ipv6_addr_is_multicast(&fl6.daddr))
786 		fl6.flowi6_oif = READ_ONCE(np->mcast_oif);
787 	else if (!fl6.flowi6_oif)
788 		fl6.flowi6_oif = READ_ONCE(np->ucast_oif);
789 
790 	ipcm6_init_sk(&ipc6, sk);
791 	ipc6.sockc.mark = mark;
792 	fl6.flowlabel = ip6_make_flowinfo(ipc6.tclass, fl6.flowlabel);
793 
794 	dst = icmpv6_route_lookup(net, skb, sk, &fl6);
795 	if (IS_ERR(dst))
796 		goto out_unlock;
797 
798 	ipc6.hlimit = ip6_sk_dst_hoplimit(np, &fl6, dst);
799 
800 	msg.skb = skb;
801 	msg.offset = skb_network_offset(skb);
802 	msg.type = type;
803 
804 	room = IPV6_MIN_MTU - sizeof(struct ipv6hdr) - sizeof(struct icmp6hdr);
805 	ext_skb = icmp6_ext_append(net, skb, &tmp_hdr, room, parm->iif);
806 	if (ext_skb)
807 		msg.skb = ext_skb;
808 
809 	len = msg.skb->len - msg.offset;
810 	len = min_t(unsigned int, len, room);
811 	if (len < 0) {
812 		net_dbg_ratelimited("icmp: len problem [%pI6c > %pI6c]\n",
813 				    &hdr->saddr, &hdr->daddr);
814 		goto out_dst_release;
815 	}
816 
817 	idev = __in6_dev_get(skb->dev);
818 
819 	if (ip6_append_data(sk, icmpv6_getfrag, &msg,
820 			    len + sizeof(struct icmp6hdr),
821 			    sizeof(struct icmp6hdr),
822 			    &ipc6, &fl6, dst_rt6_info(dst),
823 			    MSG_DONTWAIT)) {
824 		ICMP6_INC_STATS(net, idev, ICMP6_MIB_OUTERRORS);
825 		ip6_flush_pending_frames(sk);
826 	} else {
827 		icmpv6_push_pending_frames(sk, &fl6, &tmp_hdr,
828 					   len + sizeof(struct icmp6hdr));
829 	}
830 
831 out_dst_release:
832 	if (ext_skb)
833 		consume_skb(ext_skb);
834 	dst_release(dst);
835 out_unlock:
836 	icmpv6_xmit_unlock(sk);
837 out_bh_enable:
838 	local_bh_enable();
839 out:
840 	rcu_read_unlock();
841 }
842 EXPORT_SYMBOL(icmp6_send);
843 
844 /* Slightly more convenient version of icmp6_send with drop reasons.
845  */
846 void icmpv6_param_prob_reason(struct sk_buff *skb, u8 code, int pos,
847 			      enum skb_drop_reason reason)
848 {
849 	icmp6_send(skb, ICMPV6_PARAMPROB, code, pos, NULL, IP6CB(skb));
850 	kfree_skb_reason(skb, reason);
851 }
852 
853 /* Generate icmpv6 with type/code ICMPV6_DEST_UNREACH/ICMPV6_ADDR_UNREACH
854  * if sufficient data bytes are available
855  * @nhs is the size of the tunnel header(s) :
856  *  Either an IPv4 header for SIT encap
857  *         an IPv4 header + GRE header for GRE encap
858  */
859 int ip6_err_gen_icmpv6_unreach(struct sk_buff *skb, int nhs, int type,
860 			       unsigned int data_len)
861 {
862 	struct in6_addr temp_saddr;
863 	struct rt6_info *rt;
864 	struct sk_buff *skb2;
865 	u32 info = 0;
866 
867 	if (!pskb_may_pull(skb, nhs + sizeof(struct ipv6hdr) + 8))
868 		return 1;
869 
870 	/* RFC 4884 (partial) support for ICMP extensions */
871 	if (data_len < 128 || (data_len & 7) || skb->len < data_len)
872 		data_len = 0;
873 
874 	skb2 = data_len ? skb_copy(skb, GFP_ATOMIC) : skb_clone(skb, GFP_ATOMIC);
875 
876 	if (!skb2)
877 		return 1;
878 
879 	skb_dst_drop(skb2);
880 	skb_pull(skb2, nhs);
881 	skb_reset_network_header(skb2);
882 
883 	rt = rt6_lookup(dev_net_rcu(skb->dev), &ipv6_hdr(skb2)->saddr,
884 			NULL, 0, skb, 0);
885 
886 	if (rt && rt->dst.dev)
887 		skb2->dev = rt->dst.dev;
888 
889 	ipv6_addr_set_v4mapped(ip_hdr(skb)->saddr, &temp_saddr);
890 
891 	if (data_len) {
892 		/* RFC 4884 (partial) support :
893 		 * insert 0 padding at the end, before the extensions
894 		 */
895 		__skb_push(skb2, nhs);
896 		skb_reset_network_header(skb2);
897 		memmove(skb2->data, skb2->data + nhs, data_len - nhs);
898 		memset(skb2->data + data_len - nhs, 0, nhs);
899 		/* RFC 4884 4.5 : Length is measured in 64-bit words,
900 		 * and stored in reserved[0]
901 		 */
902 		info = (data_len/8) << 24;
903 	}
904 	if (type == ICMP_TIME_EXCEEDED)
905 		icmp6_send(skb2, ICMPV6_TIME_EXCEED, ICMPV6_EXC_HOPLIMIT,
906 			   info, &temp_saddr, IP6CB(skb2));
907 	else
908 		icmp6_send(skb2, ICMPV6_DEST_UNREACH, ICMPV6_ADDR_UNREACH,
909 			   info, &temp_saddr, IP6CB(skb2));
910 	if (rt)
911 		ip6_rt_put(rt);
912 
913 	kfree_skb(skb2);
914 
915 	return 0;
916 }
917 EXPORT_SYMBOL(ip6_err_gen_icmpv6_unreach);
918 
919 static enum skb_drop_reason icmpv6_echo_reply(struct sk_buff *skb)
920 {
921 	struct net *net = dev_net_rcu(skb->dev);
922 	struct sock *sk;
923 	struct inet6_dev *idev;
924 	struct ipv6_pinfo *np;
925 	const struct in6_addr *saddr = NULL;
926 	struct icmp6hdr *icmph = icmp6_hdr(skb);
927 	bool apply_ratelimit = false;
928 	struct icmp6hdr tmp_hdr;
929 	struct flowi6 fl6;
930 	struct icmpv6_msg msg;
931 	struct dst_entry *dst;
932 	struct ipcm6_cookie ipc6;
933 	u32 mark = IP6_REPLY_MARK(net, skb->mark);
934 	SKB_DR(reason);
935 	bool acast;
936 	u8 type;
937 
938 	if (ipv6_addr_is_multicast(&ipv6_hdr(skb)->daddr) &&
939 	    net->ipv6.sysctl.icmpv6_echo_ignore_multicast)
940 		return reason;
941 
942 	saddr = &ipv6_hdr(skb)->daddr;
943 
944 	acast = ipv6_anycast_destination(skb_dst(skb), saddr);
945 	if (acast && net->ipv6.sysctl.icmpv6_echo_ignore_anycast)
946 		return reason;
947 
948 	if (!ipv6_unicast_destination(skb) &&
949 	    !(net->ipv6.sysctl.anycast_src_echo_reply && acast))
950 		saddr = NULL;
951 
952 	if (icmph->icmp6_type == ICMPV6_EXT_ECHO_REQUEST)
953 		type = ICMPV6_EXT_ECHO_REPLY;
954 	else
955 		type = ICMPV6_ECHO_REPLY;
956 
957 	memcpy(&tmp_hdr, icmph, sizeof(tmp_hdr));
958 	tmp_hdr.icmp6_type = type;
959 
960 	memset(&fl6, 0, sizeof(fl6));
961 	if (net->ipv6.sysctl.flowlabel_reflect & FLOWLABEL_REFLECT_ICMPV6_ECHO_REPLIES)
962 		fl6.flowlabel = ip6_flowlabel(ipv6_hdr(skb));
963 
964 	fl6.flowi6_proto = IPPROTO_ICMPV6;
965 	fl6.daddr = ipv6_hdr(skb)->saddr;
966 	if (saddr)
967 		fl6.saddr = *saddr;
968 	fl6.flowi6_oif = icmp6_iif(skb);
969 	fl6.fl6_icmp_type = type;
970 	fl6.flowi6_mark = mark;
971 	fl6.flowi6_uid = sock_net_uid(net, NULL);
972 	security_skb_classify_flow(skb, flowi6_to_flowi_common(&fl6));
973 
974 	local_bh_disable();
975 	sk = icmpv6_xmit_lock(net);
976 	if (!sk)
977 		goto out_bh_enable;
978 	np = inet6_sk(sk);
979 
980 	if (!fl6.flowi6_oif && ipv6_addr_is_multicast(&fl6.daddr))
981 		fl6.flowi6_oif = READ_ONCE(np->mcast_oif);
982 	else if (!fl6.flowi6_oif)
983 		fl6.flowi6_oif = READ_ONCE(np->ucast_oif);
984 
985 	if (ip6_dst_lookup(net, sk, &dst, &fl6))
986 		goto out;
987 	dst = xfrm_lookup(net, dst, flowi6_to_flowi(&fl6), sk, 0);
988 	if (IS_ERR(dst))
989 		goto out;
990 
991 	/* Check the ratelimit */
992 	if ((!(skb->dev->flags & IFF_LOOPBACK) &&
993 	    !icmpv6_global_allow(net, ICMPV6_ECHO_REPLY, &apply_ratelimit)) ||
994 	    !icmpv6_xrlim_allow(sk, ICMPV6_ECHO_REPLY, &fl6, apply_ratelimit))
995 		goto out_dst_release;
996 
997 	idev = __in6_dev_get(skb->dev);
998 
999 	msg.skb = skb;
1000 	msg.offset = 0;
1001 	msg.type = type;
1002 
1003 	ipcm6_init_sk(&ipc6, sk);
1004 	ipc6.hlimit = ip6_sk_dst_hoplimit(np, &fl6, dst);
1005 	ipc6.tclass = ipv6_get_dsfield(ipv6_hdr(skb));
1006 	ipc6.sockc.mark = mark;
1007 
1008 	if (icmph->icmp6_type == ICMPV6_EXT_ECHO_REQUEST)
1009 		if (!icmp_build_probe(skb, (struct icmphdr *)&tmp_hdr))
1010 			goto out_dst_release;
1011 
1012 	if (ip6_append_data(sk, icmpv6_getfrag, &msg,
1013 			    skb->len + sizeof(struct icmp6hdr),
1014 			    sizeof(struct icmp6hdr), &ipc6, &fl6,
1015 			    dst_rt6_info(dst), MSG_DONTWAIT)) {
1016 		__ICMP6_INC_STATS(net, idev, ICMP6_MIB_OUTERRORS);
1017 		ip6_flush_pending_frames(sk);
1018 	} else {
1019 		icmpv6_push_pending_frames(sk, &fl6, &tmp_hdr,
1020 					   skb->len + sizeof(struct icmp6hdr));
1021 		reason = SKB_CONSUMED;
1022 	}
1023 out_dst_release:
1024 	dst_release(dst);
1025 out:
1026 	icmpv6_xmit_unlock(sk);
1027 out_bh_enable:
1028 	local_bh_enable();
1029 	return reason;
1030 }
1031 
1032 enum skb_drop_reason icmpv6_notify(struct sk_buff *skb, u8 type,
1033 				   u8 code, __be32 info)
1034 {
1035 	struct inet6_skb_parm *opt = IP6CB(skb);
1036 	struct net *net = dev_net_rcu(skb->dev);
1037 	const struct inet6_protocol *ipprot;
1038 	enum skb_drop_reason reason;
1039 	int inner_offset;
1040 	__be16 frag_off;
1041 	u8 nexthdr;
1042 
1043 	reason = pskb_may_pull_reason(skb, sizeof(struct ipv6hdr));
1044 	if (reason != SKB_NOT_DROPPED_YET)
1045 		goto out;
1046 
1047 	seg6_icmp_srh(skb, opt);
1048 
1049 	nexthdr = ((struct ipv6hdr *)skb->data)->nexthdr;
1050 	if (ipv6_ext_hdr(nexthdr)) {
1051 		/* now skip over extension headers */
1052 		inner_offset = ipv6_skip_exthdr(skb, sizeof(struct ipv6hdr),
1053 						&nexthdr, &frag_off);
1054 		if (inner_offset < 0) {
1055 			SKB_DR_SET(reason, IPV6_BAD_EXTHDR);
1056 			goto out;
1057 		}
1058 	} else {
1059 		inner_offset = sizeof(struct ipv6hdr);
1060 	}
1061 
1062 	/* Checkin header including 8 bytes of inner protocol header. */
1063 	reason = pskb_may_pull_reason(skb, inner_offset + 8);
1064 	if (reason != SKB_NOT_DROPPED_YET)
1065 		goto out;
1066 
1067 	/* BUGGG_FUTURE: we should try to parse exthdrs in this packet.
1068 	   Without this we will not able f.e. to make source routed
1069 	   pmtu discovery.
1070 	   Corresponding argument (opt) to notifiers is already added.
1071 	   --ANK (980726)
1072 	 */
1073 
1074 	ipprot = rcu_dereference(inet6_protos[nexthdr]);
1075 	if (ipprot && ipprot->err_handler)
1076 		ipprot->err_handler(skb, opt, type, code, inner_offset, info);
1077 
1078 	raw6_icmp_error(skb, nexthdr, type, code, inner_offset, info);
1079 	return SKB_CONSUMED;
1080 
1081 out:
1082 	__ICMP6_INC_STATS(net, __in6_dev_get(skb->dev), ICMP6_MIB_INERRORS);
1083 	return reason;
1084 }
1085 
1086 /*
1087  *	Handle icmp messages
1088  */
1089 
1090 static int icmpv6_rcv(struct sk_buff *skb)
1091 {
1092 	enum skb_drop_reason reason = SKB_DROP_REASON_NOT_SPECIFIED;
1093 	struct net *net = dev_net_rcu(skb->dev);
1094 	struct net_device *dev = icmp6_dev(skb);
1095 	struct inet6_dev *idev = __in6_dev_get(dev);
1096 	const struct in6_addr *saddr, *daddr;
1097 	struct icmp6hdr *hdr;
1098 	u8 type;
1099 
1100 	if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb)) {
1101 		struct sec_path *sp = skb_sec_path(skb);
1102 		int nh;
1103 
1104 		if (!(sp && sp->xvec[sp->len - 1]->props.flags &
1105 				 XFRM_STATE_ICMP)) {
1106 			reason = SKB_DROP_REASON_XFRM_POLICY;
1107 			goto drop_no_count;
1108 		}
1109 
1110 		if (!pskb_may_pull(skb, sizeof(*hdr) + sizeof(struct ipv6hdr)))
1111 			goto drop_no_count;
1112 
1113 		nh = skb_network_offset(skb);
1114 		skb_set_network_header(skb, sizeof(*hdr));
1115 
1116 		if (!xfrm6_policy_check_reverse(NULL, XFRM_POLICY_IN,
1117 						skb)) {
1118 			reason = SKB_DROP_REASON_XFRM_POLICY;
1119 			goto drop_no_count;
1120 		}
1121 
1122 		skb_set_network_header(skb, nh);
1123 	}
1124 
1125 	__ICMP6_INC_STATS(dev_net_rcu(dev), idev, ICMP6_MIB_INMSGS);
1126 
1127 	saddr = &ipv6_hdr(skb)->saddr;
1128 	daddr = &ipv6_hdr(skb)->daddr;
1129 
1130 	if (skb_checksum_validate(skb, IPPROTO_ICMPV6, ip6_compute_pseudo)) {
1131 		net_dbg_ratelimited("ICMPv6 checksum failed [%pI6c > %pI6c]\n",
1132 				    saddr, daddr);
1133 		goto csum_error;
1134 	}
1135 
1136 	if (!pskb_pull(skb, sizeof(*hdr)))
1137 		goto discard_it;
1138 
1139 	hdr = icmp6_hdr(skb);
1140 
1141 	type = hdr->icmp6_type;
1142 
1143 	ICMP6MSGIN_INC_STATS(dev_net_rcu(dev), idev, type);
1144 
1145 	switch (type) {
1146 	case ICMPV6_ECHO_REQUEST:
1147 		if (!net->ipv6.sysctl.icmpv6_echo_ignore_all)
1148 			reason = icmpv6_echo_reply(skb);
1149 		break;
1150 	case ICMPV6_EXT_ECHO_REQUEST:
1151 		if (!net->ipv6.sysctl.icmpv6_echo_ignore_all &&
1152 		    READ_ONCE(net->ipv4.sysctl_icmp_echo_enable_probe))
1153 			reason = icmpv6_echo_reply(skb);
1154 		break;
1155 
1156 	case ICMPV6_ECHO_REPLY:
1157 	case ICMPV6_EXT_ECHO_REPLY:
1158 		ping_rcv(skb);
1159 		return 0;
1160 
1161 	case ICMPV6_PKT_TOOBIG:
1162 		/* BUGGG_FUTURE: if packet contains rthdr, we cannot update
1163 		   standard destination cache. Seems, only "advanced"
1164 		   destination cache will allow to solve this problem
1165 		   --ANK (980726)
1166 		 */
1167 		if (!pskb_may_pull(skb, sizeof(struct ipv6hdr)))
1168 			goto discard_it;
1169 		hdr = icmp6_hdr(skb);
1170 
1171 		/* to notify */
1172 		fallthrough;
1173 	case ICMPV6_DEST_UNREACH:
1174 	case ICMPV6_TIME_EXCEED:
1175 	case ICMPV6_PARAMPROB:
1176 		reason = icmpv6_notify(skb, type, hdr->icmp6_code,
1177 				       hdr->icmp6_mtu);
1178 		break;
1179 
1180 	case NDISC_ROUTER_SOLICITATION:
1181 	case NDISC_ROUTER_ADVERTISEMENT:
1182 	case NDISC_NEIGHBOUR_SOLICITATION:
1183 	case NDISC_NEIGHBOUR_ADVERTISEMENT:
1184 	case NDISC_REDIRECT:
1185 		reason = ndisc_rcv(skb);
1186 		break;
1187 
1188 	case ICMPV6_MGM_QUERY:
1189 		igmp6_event_query(skb);
1190 		return 0;
1191 
1192 	case ICMPV6_MGM_REPORT:
1193 		igmp6_event_report(skb);
1194 		return 0;
1195 
1196 	case ICMPV6_MGM_REDUCTION:
1197 	case ICMPV6_NI_QUERY:
1198 	case ICMPV6_NI_REPLY:
1199 	case ICMPV6_MLD2_REPORT:
1200 	case ICMPV6_DHAAD_REQUEST:
1201 	case ICMPV6_DHAAD_REPLY:
1202 	case ICMPV6_MOBILE_PREFIX_SOL:
1203 	case ICMPV6_MOBILE_PREFIX_ADV:
1204 		break;
1205 
1206 	default:
1207 		/* informational */
1208 		if (type & ICMPV6_INFOMSG_MASK)
1209 			break;
1210 
1211 		net_dbg_ratelimited("icmpv6: msg of unknown type [%pI6c > %pI6c]\n",
1212 				    saddr, daddr);
1213 
1214 		/*
1215 		 * error of unknown type.
1216 		 * must pass to upper level
1217 		 */
1218 
1219 		reason = icmpv6_notify(skb, type, hdr->icmp6_code,
1220 				       hdr->icmp6_mtu);
1221 	}
1222 
1223 	/* until the v6 path can be better sorted assume failure and
1224 	 * preserve the status quo behaviour for the rest of the paths to here
1225 	 */
1226 	if (reason)
1227 		kfree_skb_reason(skb, reason);
1228 	else
1229 		consume_skb(skb);
1230 
1231 	return 0;
1232 
1233 csum_error:
1234 	reason = SKB_DROP_REASON_ICMP_CSUM;
1235 	__ICMP6_INC_STATS(dev_net_rcu(dev), idev, ICMP6_MIB_CSUMERRORS);
1236 discard_it:
1237 	__ICMP6_INC_STATS(dev_net_rcu(dev), idev, ICMP6_MIB_INERRORS);
1238 drop_no_count:
1239 	kfree_skb_reason(skb, reason);
1240 	return 0;
1241 }
1242 
1243 void icmpv6_flow_init(const struct sock *sk, struct flowi6 *fl6, u8 type,
1244 		      const struct in6_addr *saddr,
1245 		      const struct in6_addr *daddr, int oif)
1246 {
1247 	memset(fl6, 0, sizeof(*fl6));
1248 	fl6->saddr = *saddr;
1249 	fl6->daddr = *daddr;
1250 	fl6->flowi6_proto	= IPPROTO_ICMPV6;
1251 	fl6->fl6_icmp_type	= type;
1252 	fl6->fl6_icmp_code	= 0;
1253 	fl6->flowi6_oif		= oif;
1254 	security_sk_classify_flow(sk, flowi6_to_flowi_common(fl6));
1255 }
1256 
1257 int __init icmpv6_init(void)
1258 {
1259 	struct sock *sk;
1260 	int err, i;
1261 
1262 	for_each_possible_cpu(i) {
1263 		err = inet_ctl_sock_create(&sk, PF_INET6,
1264 					   SOCK_RAW, IPPROTO_ICMPV6, &init_net);
1265 		if (err < 0) {
1266 			pr_err("Failed to initialize the ICMP6 control socket (err %d)\n",
1267 			       err);
1268 			return err;
1269 		}
1270 
1271 		per_cpu(ipv6_icmp_sk, i) = sk;
1272 
1273 		/* Enough space for 2 64K ICMP packets, including
1274 		 * sk_buff struct overhead.
1275 		 */
1276 		sk->sk_sndbuf = 2 * SKB_TRUESIZE(64 * 1024);
1277 	}
1278 
1279 	err = -EAGAIN;
1280 	if (inet6_add_protocol(&icmpv6_protocol, IPPROTO_ICMPV6) < 0)
1281 		goto fail;
1282 
1283 	err = inet6_register_icmp_sender(icmp6_send);
1284 	if (err)
1285 		goto sender_reg_err;
1286 	return 0;
1287 
1288 sender_reg_err:
1289 	inet6_del_protocol(&icmpv6_protocol, IPPROTO_ICMPV6);
1290 fail:
1291 	pr_err("Failed to register ICMP6 protocol\n");
1292 	return err;
1293 }
1294 
1295 void icmpv6_cleanup(void)
1296 {
1297 	inet6_unregister_icmp_sender(icmp6_send);
1298 	inet6_del_protocol(&icmpv6_protocol, IPPROTO_ICMPV6);
1299 }
1300 
1301 
1302 static const struct icmp6_err {
1303 	int err;
1304 	int fatal;
1305 } tab_unreach[] = {
1306 	{	/* NOROUTE */
1307 		.err	= ENETUNREACH,
1308 		.fatal	= 0,
1309 	},
1310 	{	/* ADM_PROHIBITED */
1311 		.err	= EACCES,
1312 		.fatal	= 1,
1313 	},
1314 	{	/* Was NOT_NEIGHBOUR, now reserved */
1315 		.err	= EHOSTUNREACH,
1316 		.fatal	= 0,
1317 	},
1318 	{	/* ADDR_UNREACH	*/
1319 		.err	= EHOSTUNREACH,
1320 		.fatal	= 0,
1321 	},
1322 	{	/* PORT_UNREACH	*/
1323 		.err	= ECONNREFUSED,
1324 		.fatal	= 1,
1325 	},
1326 	{	/* POLICY_FAIL */
1327 		.err	= EACCES,
1328 		.fatal	= 1,
1329 	},
1330 	{	/* REJECT_ROUTE	*/
1331 		.err	= EACCES,
1332 		.fatal	= 1,
1333 	},
1334 };
1335 
1336 int icmpv6_err_convert(u8 type, u8 code, int *err)
1337 {
1338 	int fatal = 0;
1339 
1340 	*err = EPROTO;
1341 
1342 	switch (type) {
1343 	case ICMPV6_DEST_UNREACH:
1344 		fatal = 1;
1345 		if (code < ARRAY_SIZE(tab_unreach)) {
1346 			*err  = tab_unreach[code].err;
1347 			fatal = tab_unreach[code].fatal;
1348 		}
1349 		break;
1350 
1351 	case ICMPV6_PKT_TOOBIG:
1352 		*err = EMSGSIZE;
1353 		break;
1354 
1355 	case ICMPV6_PARAMPROB:
1356 		*err = EPROTO;
1357 		fatal = 1;
1358 		break;
1359 
1360 	case ICMPV6_TIME_EXCEED:
1361 		*err = EHOSTUNREACH;
1362 		break;
1363 	}
1364 
1365 	return fatal;
1366 }
1367 EXPORT_SYMBOL(icmpv6_err_convert);
1368 
1369 #ifdef CONFIG_SYSCTL
1370 
1371 static u32 icmpv6_errors_extension_mask_all =
1372 	GENMASK_U8(ICMP_ERR_EXT_COUNT - 1, 0);
1373 
1374 static struct ctl_table ipv6_icmp_table_template[] = {
1375 	{
1376 		.procname	= "ratelimit",
1377 		.data		= &init_net.ipv6.sysctl.icmpv6_time,
1378 		.maxlen		= sizeof(int),
1379 		.mode		= 0644,
1380 		.proc_handler	= proc_dointvec_ms_jiffies,
1381 	},
1382 	{
1383 		.procname	= "echo_ignore_all",
1384 		.data		= &init_net.ipv6.sysctl.icmpv6_echo_ignore_all,
1385 		.maxlen		= sizeof(u8),
1386 		.mode		= 0644,
1387 		.proc_handler = proc_dou8vec_minmax,
1388 	},
1389 	{
1390 		.procname	= "echo_ignore_multicast",
1391 		.data		= &init_net.ipv6.sysctl.icmpv6_echo_ignore_multicast,
1392 		.maxlen		= sizeof(u8),
1393 		.mode		= 0644,
1394 		.proc_handler = proc_dou8vec_minmax,
1395 	},
1396 	{
1397 		.procname	= "echo_ignore_anycast",
1398 		.data		= &init_net.ipv6.sysctl.icmpv6_echo_ignore_anycast,
1399 		.maxlen		= sizeof(u8),
1400 		.mode		= 0644,
1401 		.proc_handler = proc_dou8vec_minmax,
1402 	},
1403 	{
1404 		.procname	= "ratemask",
1405 		.data		= &init_net.ipv6.sysctl.icmpv6_ratemask_ptr,
1406 		.maxlen		= ICMPV6_MSG_MAX + 1,
1407 		.mode		= 0644,
1408 		.proc_handler = proc_do_large_bitmap,
1409 	},
1410 	{
1411 		.procname	= "error_anycast_as_unicast",
1412 		.data		= &init_net.ipv6.sysctl.icmpv6_error_anycast_as_unicast,
1413 		.maxlen		= sizeof(u8),
1414 		.mode		= 0644,
1415 		.proc_handler	= proc_dou8vec_minmax,
1416 		.extra1		= SYSCTL_ZERO,
1417 		.extra2		= SYSCTL_ONE,
1418 	},
1419 	{
1420 		.procname	= "errors_extension_mask",
1421 		.data		= &init_net.ipv6.sysctl.icmpv6_errors_extension_mask,
1422 		.maxlen		= sizeof(u8),
1423 		.mode		= 0644,
1424 		.proc_handler	= proc_dou8vec_minmax,
1425 		.extra1		= SYSCTL_ZERO,
1426 		.extra2		= &icmpv6_errors_extension_mask_all,
1427 	},
1428 };
1429 
1430 struct ctl_table * __net_init ipv6_icmp_sysctl_init(struct net *net)
1431 {
1432 	struct ctl_table *table;
1433 
1434 	table = kmemdup(ipv6_icmp_table_template,
1435 			sizeof(ipv6_icmp_table_template),
1436 			GFP_KERNEL);
1437 
1438 	if (table) {
1439 		table[0].data = &net->ipv6.sysctl.icmpv6_time;
1440 		table[1].data = &net->ipv6.sysctl.icmpv6_echo_ignore_all;
1441 		table[2].data = &net->ipv6.sysctl.icmpv6_echo_ignore_multicast;
1442 		table[3].data = &net->ipv6.sysctl.icmpv6_echo_ignore_anycast;
1443 		table[4].data = &net->ipv6.sysctl.icmpv6_ratemask_ptr;
1444 		table[5].data = &net->ipv6.sysctl.icmpv6_error_anycast_as_unicast;
1445 		table[6].data = &net->ipv6.sysctl.icmpv6_errors_extension_mask;
1446 	}
1447 	return table;
1448 }
1449 
1450 size_t ipv6_icmp_sysctl_table_size(void)
1451 {
1452 	return ARRAY_SIZE(ipv6_icmp_table_template);
1453 }
1454 #endif
1455