xref: /linux/net/ipv6/icmp.c (revision 3f0a50f345f78183f6e9b39c2f45ca5dcaa511ca)
1 // SPDX-License-Identifier: GPL-2.0-or-later
2 /*
3  *	Internet Control Message Protocol (ICMPv6)
4  *	Linux INET6 implementation
5  *
6  *	Authors:
7  *	Pedro Roque		<roque@di.fc.ul.pt>
8  *
9  *	Based on net/ipv4/icmp.c
10  *
11  *	RFC 1885
12  */
13 
14 /*
15  *	Changes:
16  *
17  *	Andi Kleen		:	exception handling
18  *	Andi Kleen			add rate limits. never reply to a icmp.
19  *					add more length checks and other fixes.
20  *	yoshfuji		:	ensure to sent parameter problem for
21  *					fragments.
22  *	YOSHIFUJI Hideaki @USAGI:	added sysctl for icmp rate limit.
23  *	Randy Dunlap and
24  *	YOSHIFUJI Hideaki @USAGI:	Per-interface statistics support
25  *	Kazunori MIYAZAWA @USAGI:       change output process to use ip6_append_data
26  */
27 
28 #define pr_fmt(fmt) "IPv6: " fmt
29 
30 #include <linux/module.h>
31 #include <linux/errno.h>
32 #include <linux/types.h>
33 #include <linux/socket.h>
34 #include <linux/in.h>
35 #include <linux/kernel.h>
36 #include <linux/sockios.h>
37 #include <linux/net.h>
38 #include <linux/skbuff.h>
39 #include <linux/init.h>
40 #include <linux/netfilter.h>
41 #include <linux/slab.h>
42 
43 #ifdef CONFIG_SYSCTL
44 #include <linux/sysctl.h>
45 #endif
46 
47 #include <linux/inet.h>
48 #include <linux/netdevice.h>
49 #include <linux/icmpv6.h>
50 
51 #include <net/ip.h>
52 #include <net/sock.h>
53 
54 #include <net/ipv6.h>
55 #include <net/ip6_checksum.h>
56 #include <net/ping.h>
57 #include <net/protocol.h>
58 #include <net/raw.h>
59 #include <net/rawv6.h>
60 #include <net/seg6.h>
61 #include <net/transp_v6.h>
62 #include <net/ip6_route.h>
63 #include <net/addrconf.h>
64 #include <net/icmp.h>
65 #include <net/xfrm.h>
66 #include <net/inet_common.h>
67 #include <net/dsfield.h>
68 #include <net/l3mdev.h>
69 
70 #include <linux/uaccess.h>
71 
72 static DEFINE_PER_CPU(struct sock *, ipv6_icmp_sk);
73 
74 static int icmpv6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
75 		       u8 type, u8 code, int offset, __be32 info)
76 {
77 	/* icmpv6_notify checks 8 bytes can be pulled, icmp6hdr is 8 bytes */
78 	struct icmp6hdr *icmp6 = (struct icmp6hdr *) (skb->data + offset);
79 	struct net *net = dev_net(skb->dev);
80 
81 	if (type == ICMPV6_PKT_TOOBIG)
82 		ip6_update_pmtu(skb, net, info, skb->dev->ifindex, 0, sock_net_uid(net, NULL));
83 	else if (type == NDISC_REDIRECT)
84 		ip6_redirect(skb, net, skb->dev->ifindex, 0,
85 			     sock_net_uid(net, NULL));
86 
87 	if (!(type & ICMPV6_INFOMSG_MASK))
88 		if (icmp6->icmp6_type == ICMPV6_ECHO_REQUEST)
89 			ping_err(skb, offset, ntohl(info));
90 
91 	return 0;
92 }
93 
94 static int icmpv6_rcv(struct sk_buff *skb);
95 
96 static const struct inet6_protocol icmpv6_protocol = {
97 	.handler	=	icmpv6_rcv,
98 	.err_handler	=	icmpv6_err,
99 	.flags		=	INET6_PROTO_NOPOLICY|INET6_PROTO_FINAL,
100 };
101 
102 /* Called with BH disabled */
103 static struct sock *icmpv6_xmit_lock(struct net *net)
104 {
105 	struct sock *sk;
106 
107 	sk = this_cpu_read(ipv6_icmp_sk);
108 	if (unlikely(!spin_trylock(&sk->sk_lock.slock))) {
109 		/* This can happen if the output path (f.e. SIT or
110 		 * ip6ip6 tunnel) signals dst_link_failure() for an
111 		 * outgoing ICMP6 packet.
112 		 */
113 		return NULL;
114 	}
115 	sock_net_set(sk, net);
116 	return sk;
117 }
118 
119 static void icmpv6_xmit_unlock(struct sock *sk)
120 {
121 	sock_net_set(sk, &init_net);
122 	spin_unlock(&sk->sk_lock.slock);
123 }
124 
125 /*
126  * Figure out, may we reply to this packet with icmp error.
127  *
128  * We do not reply, if:
129  *	- it was icmp error message.
130  *	- it is truncated, so that it is known, that protocol is ICMPV6
131  *	  (i.e. in the middle of some exthdr)
132  *
133  *	--ANK (980726)
134  */
135 
136 static bool is_ineligible(const struct sk_buff *skb)
137 {
138 	int ptr = (u8 *)(ipv6_hdr(skb) + 1) - skb->data;
139 	int len = skb->len - ptr;
140 	__u8 nexthdr = ipv6_hdr(skb)->nexthdr;
141 	__be16 frag_off;
142 
143 	if (len < 0)
144 		return true;
145 
146 	ptr = ipv6_skip_exthdr(skb, ptr, &nexthdr, &frag_off);
147 	if (ptr < 0)
148 		return false;
149 	if (nexthdr == IPPROTO_ICMPV6) {
150 		u8 _type, *tp;
151 		tp = skb_header_pointer(skb,
152 			ptr+offsetof(struct icmp6hdr, icmp6_type),
153 			sizeof(_type), &_type);
154 
155 		/* Based on RFC 8200, Section 4.5 Fragment Header, return
156 		 * false if this is a fragment packet with no icmp header info.
157 		 */
158 		if (!tp && frag_off != 0)
159 			return false;
160 		else if (!tp || !(*tp & ICMPV6_INFOMSG_MASK))
161 			return true;
162 	}
163 	return false;
164 }
165 
166 static bool icmpv6_mask_allow(struct net *net, int type)
167 {
168 	if (type > ICMPV6_MSG_MAX)
169 		return true;
170 
171 	/* Limit if icmp type is set in ratemask. */
172 	if (!test_bit(type, net->ipv6.sysctl.icmpv6_ratemask))
173 		return true;
174 
175 	return false;
176 }
177 
178 static bool icmpv6_global_allow(struct net *net, int type)
179 {
180 	if (icmpv6_mask_allow(net, type))
181 		return true;
182 
183 	if (icmp_global_allow())
184 		return true;
185 
186 	return false;
187 }
188 
189 /*
190  * Check the ICMP output rate limit
191  */
192 static bool icmpv6_xrlim_allow(struct sock *sk, u8 type,
193 			       struct flowi6 *fl6)
194 {
195 	struct net *net = sock_net(sk);
196 	struct dst_entry *dst;
197 	bool res = false;
198 
199 	if (icmpv6_mask_allow(net, type))
200 		return true;
201 
202 	/*
203 	 * Look up the output route.
204 	 * XXX: perhaps the expire for routing entries cloned by
205 	 * this lookup should be more aggressive (not longer than timeout).
206 	 */
207 	dst = ip6_route_output(net, sk, fl6);
208 	if (dst->error) {
209 		IP6_INC_STATS(net, ip6_dst_idev(dst),
210 			      IPSTATS_MIB_OUTNOROUTES);
211 	} else if (dst->dev && (dst->dev->flags&IFF_LOOPBACK)) {
212 		res = true;
213 	} else {
214 		struct rt6_info *rt = (struct rt6_info *)dst;
215 		int tmo = net->ipv6.sysctl.icmpv6_time;
216 		struct inet_peer *peer;
217 
218 		/* Give more bandwidth to wider prefixes. */
219 		if (rt->rt6i_dst.plen < 128)
220 			tmo >>= ((128 - rt->rt6i_dst.plen)>>5);
221 
222 		peer = inet_getpeer_v6(net->ipv6.peers, &fl6->daddr, 1);
223 		res = inet_peer_xrlim_allow(peer, tmo);
224 		if (peer)
225 			inet_putpeer(peer);
226 	}
227 	dst_release(dst);
228 	return res;
229 }
230 
231 static bool icmpv6_rt_has_prefsrc(struct sock *sk, u8 type,
232 				  struct flowi6 *fl6)
233 {
234 	struct net *net = sock_net(sk);
235 	struct dst_entry *dst;
236 	bool res = false;
237 
238 	dst = ip6_route_output(net, sk, fl6);
239 	if (!dst->error) {
240 		struct rt6_info *rt = (struct rt6_info *)dst;
241 		struct in6_addr prefsrc;
242 
243 		rt6_get_prefsrc(rt, &prefsrc);
244 		res = !ipv6_addr_any(&prefsrc);
245 	}
246 	dst_release(dst);
247 	return res;
248 }
249 
250 /*
251  *	an inline helper for the "simple" if statement below
252  *	checks if parameter problem report is caused by an
253  *	unrecognized IPv6 option that has the Option Type
254  *	highest-order two bits set to 10
255  */
256 
257 static bool opt_unrec(struct sk_buff *skb, __u32 offset)
258 {
259 	u8 _optval, *op;
260 
261 	offset += skb_network_offset(skb);
262 	op = skb_header_pointer(skb, offset, sizeof(_optval), &_optval);
263 	if (!op)
264 		return true;
265 	return (*op & 0xC0) == 0x80;
266 }
267 
268 void icmpv6_push_pending_frames(struct sock *sk, struct flowi6 *fl6,
269 				struct icmp6hdr *thdr, int len)
270 {
271 	struct sk_buff *skb;
272 	struct icmp6hdr *icmp6h;
273 
274 	skb = skb_peek(&sk->sk_write_queue);
275 	if (!skb)
276 		return;
277 
278 	icmp6h = icmp6_hdr(skb);
279 	memcpy(icmp6h, thdr, sizeof(struct icmp6hdr));
280 	icmp6h->icmp6_cksum = 0;
281 
282 	if (skb_queue_len(&sk->sk_write_queue) == 1) {
283 		skb->csum = csum_partial(icmp6h,
284 					sizeof(struct icmp6hdr), skb->csum);
285 		icmp6h->icmp6_cksum = csum_ipv6_magic(&fl6->saddr,
286 						      &fl6->daddr,
287 						      len, fl6->flowi6_proto,
288 						      skb->csum);
289 	} else {
290 		__wsum tmp_csum = 0;
291 
292 		skb_queue_walk(&sk->sk_write_queue, skb) {
293 			tmp_csum = csum_add(tmp_csum, skb->csum);
294 		}
295 
296 		tmp_csum = csum_partial(icmp6h,
297 					sizeof(struct icmp6hdr), tmp_csum);
298 		icmp6h->icmp6_cksum = csum_ipv6_magic(&fl6->saddr,
299 						      &fl6->daddr,
300 						      len, fl6->flowi6_proto,
301 						      tmp_csum);
302 	}
303 	ip6_push_pending_frames(sk);
304 }
305 
306 struct icmpv6_msg {
307 	struct sk_buff	*skb;
308 	int		offset;
309 	uint8_t		type;
310 };
311 
312 static int icmpv6_getfrag(void *from, char *to, int offset, int len, int odd, struct sk_buff *skb)
313 {
314 	struct icmpv6_msg *msg = (struct icmpv6_msg *) from;
315 	struct sk_buff *org_skb = msg->skb;
316 	__wsum csum;
317 
318 	csum = skb_copy_and_csum_bits(org_skb, msg->offset + offset,
319 				      to, len);
320 	skb->csum = csum_block_add(skb->csum, csum, odd);
321 	if (!(msg->type & ICMPV6_INFOMSG_MASK))
322 		nf_ct_attach(skb, org_skb);
323 	return 0;
324 }
325 
326 #if IS_ENABLED(CONFIG_IPV6_MIP6)
327 static void mip6_addr_swap(struct sk_buff *skb, const struct inet6_skb_parm *opt)
328 {
329 	struct ipv6hdr *iph = ipv6_hdr(skb);
330 	struct ipv6_destopt_hao *hao;
331 	struct in6_addr tmp;
332 	int off;
333 
334 	if (opt->dsthao) {
335 		off = ipv6_find_tlv(skb, opt->dsthao, IPV6_TLV_HAO);
336 		if (likely(off >= 0)) {
337 			hao = (struct ipv6_destopt_hao *)
338 					(skb_network_header(skb) + off);
339 			tmp = iph->saddr;
340 			iph->saddr = hao->addr;
341 			hao->addr = tmp;
342 		}
343 	}
344 }
345 #else
346 static inline void mip6_addr_swap(struct sk_buff *skb, const struct inet6_skb_parm *opt) {}
347 #endif
348 
349 static struct dst_entry *icmpv6_route_lookup(struct net *net,
350 					     struct sk_buff *skb,
351 					     struct sock *sk,
352 					     struct flowi6 *fl6)
353 {
354 	struct dst_entry *dst, *dst2;
355 	struct flowi6 fl2;
356 	int err;
357 
358 	err = ip6_dst_lookup(net, sk, &dst, fl6);
359 	if (err)
360 		return ERR_PTR(err);
361 
362 	/*
363 	 * We won't send icmp if the destination is known
364 	 * anycast.
365 	 */
366 	if (ipv6_anycast_destination(dst, &fl6->daddr)) {
367 		net_dbg_ratelimited("icmp6_send: acast source\n");
368 		dst_release(dst);
369 		return ERR_PTR(-EINVAL);
370 	}
371 
372 	/* No need to clone since we're just using its address. */
373 	dst2 = dst;
374 
375 	dst = xfrm_lookup(net, dst, flowi6_to_flowi(fl6), sk, 0);
376 	if (!IS_ERR(dst)) {
377 		if (dst != dst2)
378 			return dst;
379 	} else {
380 		if (PTR_ERR(dst) == -EPERM)
381 			dst = NULL;
382 		else
383 			return dst;
384 	}
385 
386 	err = xfrm_decode_session_reverse(skb, flowi6_to_flowi(&fl2), AF_INET6);
387 	if (err)
388 		goto relookup_failed;
389 
390 	err = ip6_dst_lookup(net, sk, &dst2, &fl2);
391 	if (err)
392 		goto relookup_failed;
393 
394 	dst2 = xfrm_lookup(net, dst2, flowi6_to_flowi(&fl2), sk, XFRM_LOOKUP_ICMP);
395 	if (!IS_ERR(dst2)) {
396 		dst_release(dst);
397 		dst = dst2;
398 	} else {
399 		err = PTR_ERR(dst2);
400 		if (err == -EPERM) {
401 			dst_release(dst);
402 			return dst2;
403 		} else
404 			goto relookup_failed;
405 	}
406 
407 relookup_failed:
408 	if (dst)
409 		return dst;
410 	return ERR_PTR(err);
411 }
412 
413 static struct net_device *icmp6_dev(const struct sk_buff *skb)
414 {
415 	struct net_device *dev = skb->dev;
416 
417 	/* for local traffic to local address, skb dev is the loopback
418 	 * device. Check if there is a dst attached to the skb and if so
419 	 * get the real device index. Same is needed for replies to a link
420 	 * local address on a device enslaved to an L3 master device
421 	 */
422 	if (unlikely(dev->ifindex == LOOPBACK_IFINDEX || netif_is_l3_master(skb->dev))) {
423 		const struct rt6_info *rt6 = skb_rt6_info(skb);
424 
425 		if (rt6)
426 			dev = rt6->rt6i_idev->dev;
427 	}
428 
429 	return dev;
430 }
431 
432 static int icmp6_iif(const struct sk_buff *skb)
433 {
434 	return icmp6_dev(skb)->ifindex;
435 }
436 
437 /*
438  *	Send an ICMP message in response to a packet in error
439  */
440 void icmp6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info,
441 		const struct in6_addr *force_saddr,
442 		const struct inet6_skb_parm *parm)
443 {
444 	struct inet6_dev *idev = NULL;
445 	struct ipv6hdr *hdr = ipv6_hdr(skb);
446 	struct sock *sk;
447 	struct net *net;
448 	struct ipv6_pinfo *np;
449 	const struct in6_addr *saddr = NULL;
450 	struct dst_entry *dst;
451 	struct icmp6hdr tmp_hdr;
452 	struct flowi6 fl6;
453 	struct icmpv6_msg msg;
454 	struct ipcm6_cookie ipc6;
455 	int iif = 0;
456 	int addr_type = 0;
457 	int len;
458 	u32 mark;
459 
460 	if ((u8 *)hdr < skb->head ||
461 	    (skb_network_header(skb) + sizeof(*hdr)) > skb_tail_pointer(skb))
462 		return;
463 
464 	if (!skb->dev)
465 		return;
466 	net = dev_net(skb->dev);
467 	mark = IP6_REPLY_MARK(net, skb->mark);
468 	/*
469 	 *	Make sure we respect the rules
470 	 *	i.e. RFC 1885 2.4(e)
471 	 *	Rule (e.1) is enforced by not using icmp6_send
472 	 *	in any code that processes icmp errors.
473 	 */
474 	addr_type = ipv6_addr_type(&hdr->daddr);
475 
476 	if (ipv6_chk_addr(net, &hdr->daddr, skb->dev, 0) ||
477 	    ipv6_chk_acast_addr_src(net, skb->dev, &hdr->daddr))
478 		saddr = &hdr->daddr;
479 
480 	/*
481 	 *	Dest addr check
482 	 */
483 
484 	if (addr_type & IPV6_ADDR_MULTICAST || skb->pkt_type != PACKET_HOST) {
485 		if (type != ICMPV6_PKT_TOOBIG &&
486 		    !(type == ICMPV6_PARAMPROB &&
487 		      code == ICMPV6_UNK_OPTION &&
488 		      (opt_unrec(skb, info))))
489 			return;
490 
491 		saddr = NULL;
492 	}
493 
494 	addr_type = ipv6_addr_type(&hdr->saddr);
495 
496 	/*
497 	 *	Source addr check
498 	 */
499 
500 	if (__ipv6_addr_needs_scope_id(addr_type)) {
501 		iif = icmp6_iif(skb);
502 	} else {
503 		/*
504 		 * The source device is used for looking up which routing table
505 		 * to use for sending an ICMP error.
506 		 */
507 		iif = l3mdev_master_ifindex(skb->dev);
508 	}
509 
510 	/*
511 	 *	Must not send error if the source does not uniquely
512 	 *	identify a single node (RFC2463 Section 2.4).
513 	 *	We check unspecified / multicast addresses here,
514 	 *	and anycast addresses will be checked later.
515 	 */
516 	if ((addr_type == IPV6_ADDR_ANY) || (addr_type & IPV6_ADDR_MULTICAST)) {
517 		net_dbg_ratelimited("icmp6_send: addr_any/mcast source [%pI6c > %pI6c]\n",
518 				    &hdr->saddr, &hdr->daddr);
519 		return;
520 	}
521 
522 	/*
523 	 *	Never answer to a ICMP packet.
524 	 */
525 	if (is_ineligible(skb)) {
526 		net_dbg_ratelimited("icmp6_send: no reply to icmp error [%pI6c > %pI6c]\n",
527 				    &hdr->saddr, &hdr->daddr);
528 		return;
529 	}
530 
531 	/* Needed by both icmp_global_allow and icmpv6_xmit_lock */
532 	local_bh_disable();
533 
534 	/* Check global sysctl_icmp_msgs_per_sec ratelimit */
535 	if (!(skb->dev->flags & IFF_LOOPBACK) && !icmpv6_global_allow(net, type))
536 		goto out_bh_enable;
537 
538 	mip6_addr_swap(skb, parm);
539 
540 	sk = icmpv6_xmit_lock(net);
541 	if (!sk)
542 		goto out_bh_enable;
543 
544 	memset(&fl6, 0, sizeof(fl6));
545 	fl6.flowi6_proto = IPPROTO_ICMPV6;
546 	fl6.daddr = hdr->saddr;
547 	if (force_saddr)
548 		saddr = force_saddr;
549 	if (saddr) {
550 		fl6.saddr = *saddr;
551 	} else if (!icmpv6_rt_has_prefsrc(sk, type, &fl6)) {
552 		/* select a more meaningful saddr from input if */
553 		struct net_device *in_netdev;
554 
555 		in_netdev = dev_get_by_index(net, parm->iif);
556 		if (in_netdev) {
557 			ipv6_dev_get_saddr(net, in_netdev, &fl6.daddr,
558 					   inet6_sk(sk)->srcprefs,
559 					   &fl6.saddr);
560 			dev_put(in_netdev);
561 		}
562 	}
563 	fl6.flowi6_mark = mark;
564 	fl6.flowi6_oif = iif;
565 	fl6.fl6_icmp_type = type;
566 	fl6.fl6_icmp_code = code;
567 	fl6.flowi6_uid = sock_net_uid(net, NULL);
568 	fl6.mp_hash = rt6_multipath_hash(net, &fl6, skb, NULL);
569 	security_skb_classify_flow(skb, flowi6_to_flowi_common(&fl6));
570 
571 	np = inet6_sk(sk);
572 
573 	if (!icmpv6_xrlim_allow(sk, type, &fl6))
574 		goto out;
575 
576 	tmp_hdr.icmp6_type = type;
577 	tmp_hdr.icmp6_code = code;
578 	tmp_hdr.icmp6_cksum = 0;
579 	tmp_hdr.icmp6_pointer = htonl(info);
580 
581 	if (!fl6.flowi6_oif && ipv6_addr_is_multicast(&fl6.daddr))
582 		fl6.flowi6_oif = np->mcast_oif;
583 	else if (!fl6.flowi6_oif)
584 		fl6.flowi6_oif = np->ucast_oif;
585 
586 	ipcm6_init_sk(&ipc6, np);
587 	ipc6.sockc.mark = mark;
588 	fl6.flowlabel = ip6_make_flowinfo(ipc6.tclass, fl6.flowlabel);
589 
590 	dst = icmpv6_route_lookup(net, skb, sk, &fl6);
591 	if (IS_ERR(dst))
592 		goto out;
593 
594 	ipc6.hlimit = ip6_sk_dst_hoplimit(np, &fl6, dst);
595 
596 	msg.skb = skb;
597 	msg.offset = skb_network_offset(skb);
598 	msg.type = type;
599 
600 	len = skb->len - msg.offset;
601 	len = min_t(unsigned int, len, IPV6_MIN_MTU - sizeof(struct ipv6hdr) - sizeof(struct icmp6hdr));
602 	if (len < 0) {
603 		net_dbg_ratelimited("icmp: len problem [%pI6c > %pI6c]\n",
604 				    &hdr->saddr, &hdr->daddr);
605 		goto out_dst_release;
606 	}
607 
608 	rcu_read_lock();
609 	idev = __in6_dev_get(skb->dev);
610 
611 	if (ip6_append_data(sk, icmpv6_getfrag, &msg,
612 			    len + sizeof(struct icmp6hdr),
613 			    sizeof(struct icmp6hdr),
614 			    &ipc6, &fl6, (struct rt6_info *)dst,
615 			    MSG_DONTWAIT)) {
616 		ICMP6_INC_STATS(net, idev, ICMP6_MIB_OUTERRORS);
617 		ip6_flush_pending_frames(sk);
618 	} else {
619 		icmpv6_push_pending_frames(sk, &fl6, &tmp_hdr,
620 					   len + sizeof(struct icmp6hdr));
621 	}
622 	rcu_read_unlock();
623 out_dst_release:
624 	dst_release(dst);
625 out:
626 	icmpv6_xmit_unlock(sk);
627 out_bh_enable:
628 	local_bh_enable();
629 }
630 EXPORT_SYMBOL(icmp6_send);
631 
632 /* Slightly more convenient version of icmp6_send.
633  */
634 void icmpv6_param_prob(struct sk_buff *skb, u8 code, int pos)
635 {
636 	icmp6_send(skb, ICMPV6_PARAMPROB, code, pos, NULL, IP6CB(skb));
637 	kfree_skb(skb);
638 }
639 
640 /* Generate icmpv6 with type/code ICMPV6_DEST_UNREACH/ICMPV6_ADDR_UNREACH
641  * if sufficient data bytes are available
642  * @nhs is the size of the tunnel header(s) :
643  *  Either an IPv4 header for SIT encap
644  *         an IPv4 header + GRE header for GRE encap
645  */
646 int ip6_err_gen_icmpv6_unreach(struct sk_buff *skb, int nhs, int type,
647 			       unsigned int data_len)
648 {
649 	struct in6_addr temp_saddr;
650 	struct rt6_info *rt;
651 	struct sk_buff *skb2;
652 	u32 info = 0;
653 
654 	if (!pskb_may_pull(skb, nhs + sizeof(struct ipv6hdr) + 8))
655 		return 1;
656 
657 	/* RFC 4884 (partial) support for ICMP extensions */
658 	if (data_len < 128 || (data_len & 7) || skb->len < data_len)
659 		data_len = 0;
660 
661 	skb2 = data_len ? skb_copy(skb, GFP_ATOMIC) : skb_clone(skb, GFP_ATOMIC);
662 
663 	if (!skb2)
664 		return 1;
665 
666 	skb_dst_drop(skb2);
667 	skb_pull(skb2, nhs);
668 	skb_reset_network_header(skb2);
669 
670 	rt = rt6_lookup(dev_net(skb->dev), &ipv6_hdr(skb2)->saddr, NULL, 0,
671 			skb, 0);
672 
673 	if (rt && rt->dst.dev)
674 		skb2->dev = rt->dst.dev;
675 
676 	ipv6_addr_set_v4mapped(ip_hdr(skb)->saddr, &temp_saddr);
677 
678 	if (data_len) {
679 		/* RFC 4884 (partial) support :
680 		 * insert 0 padding at the end, before the extensions
681 		 */
682 		__skb_push(skb2, nhs);
683 		skb_reset_network_header(skb2);
684 		memmove(skb2->data, skb2->data + nhs, data_len - nhs);
685 		memset(skb2->data + data_len - nhs, 0, nhs);
686 		/* RFC 4884 4.5 : Length is measured in 64-bit words,
687 		 * and stored in reserved[0]
688 		 */
689 		info = (data_len/8) << 24;
690 	}
691 	if (type == ICMP_TIME_EXCEEDED)
692 		icmp6_send(skb2, ICMPV6_TIME_EXCEED, ICMPV6_EXC_HOPLIMIT,
693 			   info, &temp_saddr, IP6CB(skb2));
694 	else
695 		icmp6_send(skb2, ICMPV6_DEST_UNREACH, ICMPV6_ADDR_UNREACH,
696 			   info, &temp_saddr, IP6CB(skb2));
697 	if (rt)
698 		ip6_rt_put(rt);
699 
700 	kfree_skb(skb2);
701 
702 	return 0;
703 }
704 EXPORT_SYMBOL(ip6_err_gen_icmpv6_unreach);
705 
706 static void icmpv6_echo_reply(struct sk_buff *skb)
707 {
708 	struct net *net = dev_net(skb->dev);
709 	struct sock *sk;
710 	struct inet6_dev *idev;
711 	struct ipv6_pinfo *np;
712 	const struct in6_addr *saddr = NULL;
713 	struct icmp6hdr *icmph = icmp6_hdr(skb);
714 	struct icmp6hdr tmp_hdr;
715 	struct flowi6 fl6;
716 	struct icmpv6_msg msg;
717 	struct dst_entry *dst;
718 	struct ipcm6_cookie ipc6;
719 	u32 mark = IP6_REPLY_MARK(net, skb->mark);
720 	bool acast;
721 	u8 type;
722 
723 	if (ipv6_addr_is_multicast(&ipv6_hdr(skb)->daddr) &&
724 	    net->ipv6.sysctl.icmpv6_echo_ignore_multicast)
725 		return;
726 
727 	saddr = &ipv6_hdr(skb)->daddr;
728 
729 	acast = ipv6_anycast_destination(skb_dst(skb), saddr);
730 	if (acast && net->ipv6.sysctl.icmpv6_echo_ignore_anycast)
731 		return;
732 
733 	if (!ipv6_unicast_destination(skb) &&
734 	    !(net->ipv6.sysctl.anycast_src_echo_reply && acast))
735 		saddr = NULL;
736 
737 	if (icmph->icmp6_type == ICMPV6_EXT_ECHO_REQUEST)
738 		type = ICMPV6_EXT_ECHO_REPLY;
739 	else
740 		type = ICMPV6_ECHO_REPLY;
741 
742 	memcpy(&tmp_hdr, icmph, sizeof(tmp_hdr));
743 	tmp_hdr.icmp6_type = type;
744 
745 	memset(&fl6, 0, sizeof(fl6));
746 	if (net->ipv6.sysctl.flowlabel_reflect & FLOWLABEL_REFLECT_ICMPV6_ECHO_REPLIES)
747 		fl6.flowlabel = ip6_flowlabel(ipv6_hdr(skb));
748 
749 	fl6.flowi6_proto = IPPROTO_ICMPV6;
750 	fl6.daddr = ipv6_hdr(skb)->saddr;
751 	if (saddr)
752 		fl6.saddr = *saddr;
753 	fl6.flowi6_oif = icmp6_iif(skb);
754 	fl6.fl6_icmp_type = type;
755 	fl6.flowi6_mark = mark;
756 	fl6.flowi6_uid = sock_net_uid(net, NULL);
757 	security_skb_classify_flow(skb, flowi6_to_flowi_common(&fl6));
758 
759 	local_bh_disable();
760 	sk = icmpv6_xmit_lock(net);
761 	if (!sk)
762 		goto out_bh_enable;
763 	np = inet6_sk(sk);
764 
765 	if (!fl6.flowi6_oif && ipv6_addr_is_multicast(&fl6.daddr))
766 		fl6.flowi6_oif = np->mcast_oif;
767 	else if (!fl6.flowi6_oif)
768 		fl6.flowi6_oif = np->ucast_oif;
769 
770 	if (ip6_dst_lookup(net, sk, &dst, &fl6))
771 		goto out;
772 	dst = xfrm_lookup(net, dst, flowi6_to_flowi(&fl6), sk, 0);
773 	if (IS_ERR(dst))
774 		goto out;
775 
776 	/* Check the ratelimit */
777 	if ((!(skb->dev->flags & IFF_LOOPBACK) && !icmpv6_global_allow(net, ICMPV6_ECHO_REPLY)) ||
778 	    !icmpv6_xrlim_allow(sk, ICMPV6_ECHO_REPLY, &fl6))
779 		goto out_dst_release;
780 
781 	idev = __in6_dev_get(skb->dev);
782 
783 	msg.skb = skb;
784 	msg.offset = 0;
785 	msg.type = type;
786 
787 	ipcm6_init_sk(&ipc6, np);
788 	ipc6.hlimit = ip6_sk_dst_hoplimit(np, &fl6, dst);
789 	ipc6.tclass = ipv6_get_dsfield(ipv6_hdr(skb));
790 	ipc6.sockc.mark = mark;
791 
792 	if (icmph->icmp6_type == ICMPV6_EXT_ECHO_REQUEST)
793 		if (!icmp_build_probe(skb, (struct icmphdr *)&tmp_hdr))
794 			goto out_dst_release;
795 
796 	if (ip6_append_data(sk, icmpv6_getfrag, &msg,
797 			    skb->len + sizeof(struct icmp6hdr),
798 			    sizeof(struct icmp6hdr), &ipc6, &fl6,
799 			    (struct rt6_info *)dst, MSG_DONTWAIT)) {
800 		__ICMP6_INC_STATS(net, idev, ICMP6_MIB_OUTERRORS);
801 		ip6_flush_pending_frames(sk);
802 	} else {
803 		icmpv6_push_pending_frames(sk, &fl6, &tmp_hdr,
804 					   skb->len + sizeof(struct icmp6hdr));
805 	}
806 out_dst_release:
807 	dst_release(dst);
808 out:
809 	icmpv6_xmit_unlock(sk);
810 out_bh_enable:
811 	local_bh_enable();
812 }
813 
814 void icmpv6_notify(struct sk_buff *skb, u8 type, u8 code, __be32 info)
815 {
816 	struct inet6_skb_parm *opt = IP6CB(skb);
817 	const struct inet6_protocol *ipprot;
818 	int inner_offset;
819 	__be16 frag_off;
820 	u8 nexthdr;
821 	struct net *net = dev_net(skb->dev);
822 
823 	if (!pskb_may_pull(skb, sizeof(struct ipv6hdr)))
824 		goto out;
825 
826 	seg6_icmp_srh(skb, opt);
827 
828 	nexthdr = ((struct ipv6hdr *)skb->data)->nexthdr;
829 	if (ipv6_ext_hdr(nexthdr)) {
830 		/* now skip over extension headers */
831 		inner_offset = ipv6_skip_exthdr(skb, sizeof(struct ipv6hdr),
832 						&nexthdr, &frag_off);
833 		if (inner_offset < 0)
834 			goto out;
835 	} else {
836 		inner_offset = sizeof(struct ipv6hdr);
837 	}
838 
839 	/* Checkin header including 8 bytes of inner protocol header. */
840 	if (!pskb_may_pull(skb, inner_offset+8))
841 		goto out;
842 
843 	/* BUGGG_FUTURE: we should try to parse exthdrs in this packet.
844 	   Without this we will not able f.e. to make source routed
845 	   pmtu discovery.
846 	   Corresponding argument (opt) to notifiers is already added.
847 	   --ANK (980726)
848 	 */
849 
850 	ipprot = rcu_dereference(inet6_protos[nexthdr]);
851 	if (ipprot && ipprot->err_handler)
852 		ipprot->err_handler(skb, opt, type, code, inner_offset, info);
853 
854 	raw6_icmp_error(skb, nexthdr, type, code, inner_offset, info);
855 	return;
856 
857 out:
858 	__ICMP6_INC_STATS(net, __in6_dev_get(skb->dev), ICMP6_MIB_INERRORS);
859 }
860 
861 /*
862  *	Handle icmp messages
863  */
864 
865 static int icmpv6_rcv(struct sk_buff *skb)
866 {
867 	struct net *net = dev_net(skb->dev);
868 	struct net_device *dev = icmp6_dev(skb);
869 	struct inet6_dev *idev = __in6_dev_get(dev);
870 	const struct in6_addr *saddr, *daddr;
871 	struct icmp6hdr *hdr;
872 	u8 type;
873 	bool success = false;
874 
875 	if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb)) {
876 		struct sec_path *sp = skb_sec_path(skb);
877 		int nh;
878 
879 		if (!(sp && sp->xvec[sp->len - 1]->props.flags &
880 				 XFRM_STATE_ICMP))
881 			goto drop_no_count;
882 
883 		if (!pskb_may_pull(skb, sizeof(*hdr) + sizeof(struct ipv6hdr)))
884 			goto drop_no_count;
885 
886 		nh = skb_network_offset(skb);
887 		skb_set_network_header(skb, sizeof(*hdr));
888 
889 		if (!xfrm6_policy_check_reverse(NULL, XFRM_POLICY_IN, skb))
890 			goto drop_no_count;
891 
892 		skb_set_network_header(skb, nh);
893 	}
894 
895 	__ICMP6_INC_STATS(dev_net(dev), idev, ICMP6_MIB_INMSGS);
896 
897 	saddr = &ipv6_hdr(skb)->saddr;
898 	daddr = &ipv6_hdr(skb)->daddr;
899 
900 	if (skb_checksum_validate(skb, IPPROTO_ICMPV6, ip6_compute_pseudo)) {
901 		net_dbg_ratelimited("ICMPv6 checksum failed [%pI6c > %pI6c]\n",
902 				    saddr, daddr);
903 		goto csum_error;
904 	}
905 
906 	if (!pskb_pull(skb, sizeof(*hdr)))
907 		goto discard_it;
908 
909 	hdr = icmp6_hdr(skb);
910 
911 	type = hdr->icmp6_type;
912 
913 	ICMP6MSGIN_INC_STATS(dev_net(dev), idev, type);
914 
915 	switch (type) {
916 	case ICMPV6_ECHO_REQUEST:
917 		if (!net->ipv6.sysctl.icmpv6_echo_ignore_all)
918 			icmpv6_echo_reply(skb);
919 		break;
920 	case ICMPV6_EXT_ECHO_REQUEST:
921 		if (!net->ipv6.sysctl.icmpv6_echo_ignore_all &&
922 		    net->ipv4.sysctl_icmp_echo_enable_probe)
923 			icmpv6_echo_reply(skb);
924 		break;
925 
926 	case ICMPV6_ECHO_REPLY:
927 		success = ping_rcv(skb);
928 		break;
929 
930 	case ICMPV6_EXT_ECHO_REPLY:
931 		success = ping_rcv(skb);
932 		break;
933 
934 	case ICMPV6_PKT_TOOBIG:
935 		/* BUGGG_FUTURE: if packet contains rthdr, we cannot update
936 		   standard destination cache. Seems, only "advanced"
937 		   destination cache will allow to solve this problem
938 		   --ANK (980726)
939 		 */
940 		if (!pskb_may_pull(skb, sizeof(struct ipv6hdr)))
941 			goto discard_it;
942 		hdr = icmp6_hdr(skb);
943 
944 		/* to notify */
945 		fallthrough;
946 	case ICMPV6_DEST_UNREACH:
947 	case ICMPV6_TIME_EXCEED:
948 	case ICMPV6_PARAMPROB:
949 		icmpv6_notify(skb, type, hdr->icmp6_code, hdr->icmp6_mtu);
950 		break;
951 
952 	case NDISC_ROUTER_SOLICITATION:
953 	case NDISC_ROUTER_ADVERTISEMENT:
954 	case NDISC_NEIGHBOUR_SOLICITATION:
955 	case NDISC_NEIGHBOUR_ADVERTISEMENT:
956 	case NDISC_REDIRECT:
957 		ndisc_rcv(skb);
958 		break;
959 
960 	case ICMPV6_MGM_QUERY:
961 		igmp6_event_query(skb);
962 		return 0;
963 
964 	case ICMPV6_MGM_REPORT:
965 		igmp6_event_report(skb);
966 		return 0;
967 
968 	case ICMPV6_MGM_REDUCTION:
969 	case ICMPV6_NI_QUERY:
970 	case ICMPV6_NI_REPLY:
971 	case ICMPV6_MLD2_REPORT:
972 	case ICMPV6_DHAAD_REQUEST:
973 	case ICMPV6_DHAAD_REPLY:
974 	case ICMPV6_MOBILE_PREFIX_SOL:
975 	case ICMPV6_MOBILE_PREFIX_ADV:
976 		break;
977 
978 	default:
979 		/* informational */
980 		if (type & ICMPV6_INFOMSG_MASK)
981 			break;
982 
983 		net_dbg_ratelimited("icmpv6: msg of unknown type [%pI6c > %pI6c]\n",
984 				    saddr, daddr);
985 
986 		/*
987 		 * error of unknown type.
988 		 * must pass to upper level
989 		 */
990 
991 		icmpv6_notify(skb, type, hdr->icmp6_code, hdr->icmp6_mtu);
992 	}
993 
994 	/* until the v6 path can be better sorted assume failure and
995 	 * preserve the status quo behaviour for the rest of the paths to here
996 	 */
997 	if (success)
998 		consume_skb(skb);
999 	else
1000 		kfree_skb(skb);
1001 
1002 	return 0;
1003 
1004 csum_error:
1005 	__ICMP6_INC_STATS(dev_net(dev), idev, ICMP6_MIB_CSUMERRORS);
1006 discard_it:
1007 	__ICMP6_INC_STATS(dev_net(dev), idev, ICMP6_MIB_INERRORS);
1008 drop_no_count:
1009 	kfree_skb(skb);
1010 	return 0;
1011 }
1012 
1013 void icmpv6_flow_init(struct sock *sk, struct flowi6 *fl6,
1014 		      u8 type,
1015 		      const struct in6_addr *saddr,
1016 		      const struct in6_addr *daddr,
1017 		      int oif)
1018 {
1019 	memset(fl6, 0, sizeof(*fl6));
1020 	fl6->saddr = *saddr;
1021 	fl6->daddr = *daddr;
1022 	fl6->flowi6_proto	= IPPROTO_ICMPV6;
1023 	fl6->fl6_icmp_type	= type;
1024 	fl6->fl6_icmp_code	= 0;
1025 	fl6->flowi6_oif		= oif;
1026 	security_sk_classify_flow(sk, flowi6_to_flowi_common(fl6));
1027 }
1028 
1029 int __init icmpv6_init(void)
1030 {
1031 	struct sock *sk;
1032 	int err, i;
1033 
1034 	for_each_possible_cpu(i) {
1035 		err = inet_ctl_sock_create(&sk, PF_INET6,
1036 					   SOCK_RAW, IPPROTO_ICMPV6, &init_net);
1037 		if (err < 0) {
1038 			pr_err("Failed to initialize the ICMP6 control socket (err %d)\n",
1039 			       err);
1040 			return err;
1041 		}
1042 
1043 		per_cpu(ipv6_icmp_sk, i) = sk;
1044 
1045 		/* Enough space for 2 64K ICMP packets, including
1046 		 * sk_buff struct overhead.
1047 		 */
1048 		sk->sk_sndbuf = 2 * SKB_TRUESIZE(64 * 1024);
1049 	}
1050 
1051 	err = -EAGAIN;
1052 	if (inet6_add_protocol(&icmpv6_protocol, IPPROTO_ICMPV6) < 0)
1053 		goto fail;
1054 
1055 	err = inet6_register_icmp_sender(icmp6_send);
1056 	if (err)
1057 		goto sender_reg_err;
1058 	return 0;
1059 
1060 sender_reg_err:
1061 	inet6_del_protocol(&icmpv6_protocol, IPPROTO_ICMPV6);
1062 fail:
1063 	pr_err("Failed to register ICMP6 protocol\n");
1064 	return err;
1065 }
1066 
1067 void icmpv6_cleanup(void)
1068 {
1069 	inet6_unregister_icmp_sender(icmp6_send);
1070 	inet6_del_protocol(&icmpv6_protocol, IPPROTO_ICMPV6);
1071 }
1072 
1073 
1074 static const struct icmp6_err {
1075 	int err;
1076 	int fatal;
1077 } tab_unreach[] = {
1078 	{	/* NOROUTE */
1079 		.err	= ENETUNREACH,
1080 		.fatal	= 0,
1081 	},
1082 	{	/* ADM_PROHIBITED */
1083 		.err	= EACCES,
1084 		.fatal	= 1,
1085 	},
1086 	{	/* Was NOT_NEIGHBOUR, now reserved */
1087 		.err	= EHOSTUNREACH,
1088 		.fatal	= 0,
1089 	},
1090 	{	/* ADDR_UNREACH	*/
1091 		.err	= EHOSTUNREACH,
1092 		.fatal	= 0,
1093 	},
1094 	{	/* PORT_UNREACH	*/
1095 		.err	= ECONNREFUSED,
1096 		.fatal	= 1,
1097 	},
1098 	{	/* POLICY_FAIL */
1099 		.err	= EACCES,
1100 		.fatal	= 1,
1101 	},
1102 	{	/* REJECT_ROUTE	*/
1103 		.err	= EACCES,
1104 		.fatal	= 1,
1105 	},
1106 };
1107 
1108 int icmpv6_err_convert(u8 type, u8 code, int *err)
1109 {
1110 	int fatal = 0;
1111 
1112 	*err = EPROTO;
1113 
1114 	switch (type) {
1115 	case ICMPV6_DEST_UNREACH:
1116 		fatal = 1;
1117 		if (code < ARRAY_SIZE(tab_unreach)) {
1118 			*err  = tab_unreach[code].err;
1119 			fatal = tab_unreach[code].fatal;
1120 		}
1121 		break;
1122 
1123 	case ICMPV6_PKT_TOOBIG:
1124 		*err = EMSGSIZE;
1125 		break;
1126 
1127 	case ICMPV6_PARAMPROB:
1128 		*err = EPROTO;
1129 		fatal = 1;
1130 		break;
1131 
1132 	case ICMPV6_TIME_EXCEED:
1133 		*err = EHOSTUNREACH;
1134 		break;
1135 	}
1136 
1137 	return fatal;
1138 }
1139 EXPORT_SYMBOL(icmpv6_err_convert);
1140 
1141 #ifdef CONFIG_SYSCTL
1142 static struct ctl_table ipv6_icmp_table_template[] = {
1143 	{
1144 		.procname	= "ratelimit",
1145 		.data		= &init_net.ipv6.sysctl.icmpv6_time,
1146 		.maxlen		= sizeof(int),
1147 		.mode		= 0644,
1148 		.proc_handler	= proc_dointvec_ms_jiffies,
1149 	},
1150 	{
1151 		.procname	= "echo_ignore_all",
1152 		.data		= &init_net.ipv6.sysctl.icmpv6_echo_ignore_all,
1153 		.maxlen		= sizeof(u8),
1154 		.mode		= 0644,
1155 		.proc_handler = proc_dou8vec_minmax,
1156 	},
1157 	{
1158 		.procname	= "echo_ignore_multicast",
1159 		.data		= &init_net.ipv6.sysctl.icmpv6_echo_ignore_multicast,
1160 		.maxlen		= sizeof(u8),
1161 		.mode		= 0644,
1162 		.proc_handler = proc_dou8vec_minmax,
1163 	},
1164 	{
1165 		.procname	= "echo_ignore_anycast",
1166 		.data		= &init_net.ipv6.sysctl.icmpv6_echo_ignore_anycast,
1167 		.maxlen		= sizeof(u8),
1168 		.mode		= 0644,
1169 		.proc_handler = proc_dou8vec_minmax,
1170 	},
1171 	{
1172 		.procname	= "ratemask",
1173 		.data		= &init_net.ipv6.sysctl.icmpv6_ratemask_ptr,
1174 		.maxlen		= ICMPV6_MSG_MAX + 1,
1175 		.mode		= 0644,
1176 		.proc_handler = proc_do_large_bitmap,
1177 	},
1178 	{ },
1179 };
1180 
1181 struct ctl_table * __net_init ipv6_icmp_sysctl_init(struct net *net)
1182 {
1183 	struct ctl_table *table;
1184 
1185 	table = kmemdup(ipv6_icmp_table_template,
1186 			sizeof(ipv6_icmp_table_template),
1187 			GFP_KERNEL);
1188 
1189 	if (table) {
1190 		table[0].data = &net->ipv6.sysctl.icmpv6_time;
1191 		table[1].data = &net->ipv6.sysctl.icmpv6_echo_ignore_all;
1192 		table[2].data = &net->ipv6.sysctl.icmpv6_echo_ignore_multicast;
1193 		table[3].data = &net->ipv6.sysctl.icmpv6_echo_ignore_anycast;
1194 		table[4].data = &net->ipv6.sysctl.icmpv6_ratemask_ptr;
1195 	}
1196 	return table;
1197 }
1198 #endif
1199