xref: /linux/net/ipv6/icmp.c (revision 48dea9a700c8728cc31a1dd44588b97578de86ee)
1 // SPDX-License-Identifier: GPL-2.0-or-later
2 /*
3  *	Internet Control Message Protocol (ICMPv6)
4  *	Linux INET6 implementation
5  *
6  *	Authors:
7  *	Pedro Roque		<roque@di.fc.ul.pt>
8  *
9  *	Based on net/ipv4/icmp.c
10  *
11  *	RFC 1885
12  */
13 
14 /*
15  *	Changes:
16  *
17  *	Andi Kleen		:	exception handling
18  *	Andi Kleen			add rate limits. never reply to a icmp.
19  *					add more length checks and other fixes.
20  *	yoshfuji		:	ensure to sent parameter problem for
21  *					fragments.
22  *	YOSHIFUJI Hideaki @USAGI:	added sysctl for icmp rate limit.
23  *	Randy Dunlap and
24  *	YOSHIFUJI Hideaki @USAGI:	Per-interface statistics support
25  *	Kazunori MIYAZAWA @USAGI:       change output process to use ip6_append_data
26  */
27 
28 #define pr_fmt(fmt) "IPv6: " fmt
29 
30 #include <linux/module.h>
31 #include <linux/errno.h>
32 #include <linux/types.h>
33 #include <linux/socket.h>
34 #include <linux/in.h>
35 #include <linux/kernel.h>
36 #include <linux/sockios.h>
37 #include <linux/net.h>
38 #include <linux/skbuff.h>
39 #include <linux/init.h>
40 #include <linux/netfilter.h>
41 #include <linux/slab.h>
42 
43 #ifdef CONFIG_SYSCTL
44 #include <linux/sysctl.h>
45 #endif
46 
47 #include <linux/inet.h>
48 #include <linux/netdevice.h>
49 #include <linux/icmpv6.h>
50 
51 #include <net/ip.h>
52 #include <net/sock.h>
53 
54 #include <net/ipv6.h>
55 #include <net/ip6_checksum.h>
56 #include <net/ping.h>
57 #include <net/protocol.h>
58 #include <net/raw.h>
59 #include <net/rawv6.h>
60 #include <net/transp_v6.h>
61 #include <net/ip6_route.h>
62 #include <net/addrconf.h>
63 #include <net/icmp.h>
64 #include <net/xfrm.h>
65 #include <net/inet_common.h>
66 #include <net/dsfield.h>
67 #include <net/l3mdev.h>
68 
69 #include <linux/uaccess.h>
70 
71 /*
72  *	The ICMP socket(s). This is the most convenient way to flow control
73  *	our ICMP output as well as maintain a clean interface throughout
74  *	all layers. All Socketless IP sends will soon be gone.
75  *
76  *	On SMP we have one ICMP socket per-cpu.
77  */
78 static struct sock *icmpv6_sk(struct net *net)
79 {
80 	return this_cpu_read(*net->ipv6.icmp_sk);
81 }
82 
83 static int icmpv6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
84 		       u8 type, u8 code, int offset, __be32 info)
85 {
86 	/* icmpv6_notify checks 8 bytes can be pulled, icmp6hdr is 8 bytes */
87 	struct icmp6hdr *icmp6 = (struct icmp6hdr *) (skb->data + offset);
88 	struct net *net = dev_net(skb->dev);
89 
90 	if (type == ICMPV6_PKT_TOOBIG)
91 		ip6_update_pmtu(skb, net, info, skb->dev->ifindex, 0, sock_net_uid(net, NULL));
92 	else if (type == NDISC_REDIRECT)
93 		ip6_redirect(skb, net, skb->dev->ifindex, 0,
94 			     sock_net_uid(net, NULL));
95 
96 	if (!(type & ICMPV6_INFOMSG_MASK))
97 		if (icmp6->icmp6_type == ICMPV6_ECHO_REQUEST)
98 			ping_err(skb, offset, ntohl(info));
99 
100 	return 0;
101 }
102 
103 static int icmpv6_rcv(struct sk_buff *skb);
104 
105 static const struct inet6_protocol icmpv6_protocol = {
106 	.handler	=	icmpv6_rcv,
107 	.err_handler	=	icmpv6_err,
108 	.flags		=	INET6_PROTO_NOPOLICY|INET6_PROTO_FINAL,
109 };
110 
111 /* Called with BH disabled */
112 static __inline__ struct sock *icmpv6_xmit_lock(struct net *net)
113 {
114 	struct sock *sk;
115 
116 	sk = icmpv6_sk(net);
117 	if (unlikely(!spin_trylock(&sk->sk_lock.slock))) {
118 		/* This can happen if the output path (f.e. SIT or
119 		 * ip6ip6 tunnel) signals dst_link_failure() for an
120 		 * outgoing ICMP6 packet.
121 		 */
122 		return NULL;
123 	}
124 	return sk;
125 }
126 
127 static __inline__ void icmpv6_xmit_unlock(struct sock *sk)
128 {
129 	spin_unlock(&sk->sk_lock.slock);
130 }
131 
132 /*
133  * Figure out, may we reply to this packet with icmp error.
134  *
135  * We do not reply, if:
136  *	- it was icmp error message.
137  *	- it is truncated, so that it is known, that protocol is ICMPV6
138  *	  (i.e. in the middle of some exthdr)
139  *
140  *	--ANK (980726)
141  */
142 
143 static bool is_ineligible(const struct sk_buff *skb)
144 {
145 	int ptr = (u8 *)(ipv6_hdr(skb) + 1) - skb->data;
146 	int len = skb->len - ptr;
147 	__u8 nexthdr = ipv6_hdr(skb)->nexthdr;
148 	__be16 frag_off;
149 
150 	if (len < 0)
151 		return true;
152 
153 	ptr = ipv6_skip_exthdr(skb, ptr, &nexthdr, &frag_off);
154 	if (ptr < 0)
155 		return false;
156 	if (nexthdr == IPPROTO_ICMPV6) {
157 		u8 _type, *tp;
158 		tp = skb_header_pointer(skb,
159 			ptr+offsetof(struct icmp6hdr, icmp6_type),
160 			sizeof(_type), &_type);
161 		if (!tp || !(*tp & ICMPV6_INFOMSG_MASK))
162 			return true;
163 	}
164 	return false;
165 }
166 
167 static bool icmpv6_mask_allow(struct net *net, int type)
168 {
169 	if (type > ICMPV6_MSG_MAX)
170 		return true;
171 
172 	/* Limit if icmp type is set in ratemask. */
173 	if (!test_bit(type, net->ipv6.sysctl.icmpv6_ratemask))
174 		return true;
175 
176 	return false;
177 }
178 
179 static bool icmpv6_global_allow(struct net *net, int type)
180 {
181 	if (icmpv6_mask_allow(net, type))
182 		return true;
183 
184 	if (icmp_global_allow())
185 		return true;
186 
187 	return false;
188 }
189 
190 /*
191  * Check the ICMP output rate limit
192  */
193 static bool icmpv6_xrlim_allow(struct sock *sk, u8 type,
194 			       struct flowi6 *fl6)
195 {
196 	struct net *net = sock_net(sk);
197 	struct dst_entry *dst;
198 	bool res = false;
199 
200 	if (icmpv6_mask_allow(net, type))
201 		return true;
202 
203 	/*
204 	 * Look up the output route.
205 	 * XXX: perhaps the expire for routing entries cloned by
206 	 * this lookup should be more aggressive (not longer than timeout).
207 	 */
208 	dst = ip6_route_output(net, sk, fl6);
209 	if (dst->error) {
210 		IP6_INC_STATS(net, ip6_dst_idev(dst),
211 			      IPSTATS_MIB_OUTNOROUTES);
212 	} else if (dst->dev && (dst->dev->flags&IFF_LOOPBACK)) {
213 		res = true;
214 	} else {
215 		struct rt6_info *rt = (struct rt6_info *)dst;
216 		int tmo = net->ipv6.sysctl.icmpv6_time;
217 		struct inet_peer *peer;
218 
219 		/* Give more bandwidth to wider prefixes. */
220 		if (rt->rt6i_dst.plen < 128)
221 			tmo >>= ((128 - rt->rt6i_dst.plen)>>5);
222 
223 		peer = inet_getpeer_v6(net->ipv6.peers, &fl6->daddr, 1);
224 		res = inet_peer_xrlim_allow(peer, tmo);
225 		if (peer)
226 			inet_putpeer(peer);
227 	}
228 	dst_release(dst);
229 	return res;
230 }
231 
232 static bool icmpv6_rt_has_prefsrc(struct sock *sk, u8 type,
233 				  struct flowi6 *fl6)
234 {
235 	struct net *net = sock_net(sk);
236 	struct dst_entry *dst;
237 	bool res = false;
238 
239 	dst = ip6_route_output(net, sk, fl6);
240 	if (!dst->error) {
241 		struct rt6_info *rt = (struct rt6_info *)dst;
242 		struct in6_addr prefsrc;
243 
244 		rt6_get_prefsrc(rt, &prefsrc);
245 		res = !ipv6_addr_any(&prefsrc);
246 	}
247 	dst_release(dst);
248 	return res;
249 }
250 
251 /*
252  *	an inline helper for the "simple" if statement below
253  *	checks if parameter problem report is caused by an
254  *	unrecognized IPv6 option that has the Option Type
255  *	highest-order two bits set to 10
256  */
257 
258 static bool opt_unrec(struct sk_buff *skb, __u32 offset)
259 {
260 	u8 _optval, *op;
261 
262 	offset += skb_network_offset(skb);
263 	op = skb_header_pointer(skb, offset, sizeof(_optval), &_optval);
264 	if (!op)
265 		return true;
266 	return (*op & 0xC0) == 0x80;
267 }
268 
269 void icmpv6_push_pending_frames(struct sock *sk, struct flowi6 *fl6,
270 				struct icmp6hdr *thdr, int len)
271 {
272 	struct sk_buff *skb;
273 	struct icmp6hdr *icmp6h;
274 
275 	skb = skb_peek(&sk->sk_write_queue);
276 	if (!skb)
277 		return;
278 
279 	icmp6h = icmp6_hdr(skb);
280 	memcpy(icmp6h, thdr, sizeof(struct icmp6hdr));
281 	icmp6h->icmp6_cksum = 0;
282 
283 	if (skb_queue_len(&sk->sk_write_queue) == 1) {
284 		skb->csum = csum_partial(icmp6h,
285 					sizeof(struct icmp6hdr), skb->csum);
286 		icmp6h->icmp6_cksum = csum_ipv6_magic(&fl6->saddr,
287 						      &fl6->daddr,
288 						      len, fl6->flowi6_proto,
289 						      skb->csum);
290 	} else {
291 		__wsum tmp_csum = 0;
292 
293 		skb_queue_walk(&sk->sk_write_queue, skb) {
294 			tmp_csum = csum_add(tmp_csum, skb->csum);
295 		}
296 
297 		tmp_csum = csum_partial(icmp6h,
298 					sizeof(struct icmp6hdr), tmp_csum);
299 		icmp6h->icmp6_cksum = csum_ipv6_magic(&fl6->saddr,
300 						      &fl6->daddr,
301 						      len, fl6->flowi6_proto,
302 						      tmp_csum);
303 	}
304 	ip6_push_pending_frames(sk);
305 }
306 
307 struct icmpv6_msg {
308 	struct sk_buff	*skb;
309 	int		offset;
310 	uint8_t		type;
311 };
312 
313 static int icmpv6_getfrag(void *from, char *to, int offset, int len, int odd, struct sk_buff *skb)
314 {
315 	struct icmpv6_msg *msg = (struct icmpv6_msg *) from;
316 	struct sk_buff *org_skb = msg->skb;
317 	__wsum csum = 0;
318 
319 	csum = skb_copy_and_csum_bits(org_skb, msg->offset + offset,
320 				      to, len, csum);
321 	skb->csum = csum_block_add(skb->csum, csum, odd);
322 	if (!(msg->type & ICMPV6_INFOMSG_MASK))
323 		nf_ct_attach(skb, org_skb);
324 	return 0;
325 }
326 
327 #if IS_ENABLED(CONFIG_IPV6_MIP6)
328 static void mip6_addr_swap(struct sk_buff *skb)
329 {
330 	struct ipv6hdr *iph = ipv6_hdr(skb);
331 	struct inet6_skb_parm *opt = IP6CB(skb);
332 	struct ipv6_destopt_hao *hao;
333 	struct in6_addr tmp;
334 	int off;
335 
336 	if (opt->dsthao) {
337 		off = ipv6_find_tlv(skb, opt->dsthao, IPV6_TLV_HAO);
338 		if (likely(off >= 0)) {
339 			hao = (struct ipv6_destopt_hao *)
340 					(skb_network_header(skb) + off);
341 			tmp = iph->saddr;
342 			iph->saddr = hao->addr;
343 			hao->addr = tmp;
344 		}
345 	}
346 }
347 #else
348 static inline void mip6_addr_swap(struct sk_buff *skb) {}
349 #endif
350 
351 static struct dst_entry *icmpv6_route_lookup(struct net *net,
352 					     struct sk_buff *skb,
353 					     struct sock *sk,
354 					     struct flowi6 *fl6)
355 {
356 	struct dst_entry *dst, *dst2;
357 	struct flowi6 fl2;
358 	int err;
359 
360 	err = ip6_dst_lookup(net, sk, &dst, fl6);
361 	if (err)
362 		return ERR_PTR(err);
363 
364 	/*
365 	 * We won't send icmp if the destination is known
366 	 * anycast.
367 	 */
368 	if (ipv6_anycast_destination(dst, &fl6->daddr)) {
369 		net_dbg_ratelimited("icmp6_send: acast source\n");
370 		dst_release(dst);
371 		return ERR_PTR(-EINVAL);
372 	}
373 
374 	/* No need to clone since we're just using its address. */
375 	dst2 = dst;
376 
377 	dst = xfrm_lookup(net, dst, flowi6_to_flowi(fl6), sk, 0);
378 	if (!IS_ERR(dst)) {
379 		if (dst != dst2)
380 			return dst;
381 	} else {
382 		if (PTR_ERR(dst) == -EPERM)
383 			dst = NULL;
384 		else
385 			return dst;
386 	}
387 
388 	err = xfrm_decode_session_reverse(skb, flowi6_to_flowi(&fl2), AF_INET6);
389 	if (err)
390 		goto relookup_failed;
391 
392 	err = ip6_dst_lookup(net, sk, &dst2, &fl2);
393 	if (err)
394 		goto relookup_failed;
395 
396 	dst2 = xfrm_lookup(net, dst2, flowi6_to_flowi(&fl2), sk, XFRM_LOOKUP_ICMP);
397 	if (!IS_ERR(dst2)) {
398 		dst_release(dst);
399 		dst = dst2;
400 	} else {
401 		err = PTR_ERR(dst2);
402 		if (err == -EPERM) {
403 			dst_release(dst);
404 			return dst2;
405 		} else
406 			goto relookup_failed;
407 	}
408 
409 relookup_failed:
410 	if (dst)
411 		return dst;
412 	return ERR_PTR(err);
413 }
414 
415 static struct net_device *icmp6_dev(const struct sk_buff *skb)
416 {
417 	struct net_device *dev = skb->dev;
418 
419 	/* for local traffic to local address, skb dev is the loopback
420 	 * device. Check if there is a dst attached to the skb and if so
421 	 * get the real device index. Same is needed for replies to a link
422 	 * local address on a device enslaved to an L3 master device
423 	 */
424 	if (unlikely(dev->ifindex == LOOPBACK_IFINDEX || netif_is_l3_master(skb->dev))) {
425 		const struct rt6_info *rt6 = skb_rt6_info(skb);
426 
427 		if (rt6)
428 			dev = rt6->rt6i_idev->dev;
429 	}
430 
431 	return dev;
432 }
433 
434 static int icmp6_iif(const struct sk_buff *skb)
435 {
436 	return icmp6_dev(skb)->ifindex;
437 }
438 
439 /*
440  *	Send an ICMP message in response to a packet in error
441  */
442 void icmp6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info,
443 		const struct in6_addr *force_saddr)
444 {
445 	struct inet6_dev *idev = NULL;
446 	struct ipv6hdr *hdr = ipv6_hdr(skb);
447 	struct sock *sk;
448 	struct net *net;
449 	struct ipv6_pinfo *np;
450 	const struct in6_addr *saddr = NULL;
451 	struct dst_entry *dst;
452 	struct icmp6hdr tmp_hdr;
453 	struct flowi6 fl6;
454 	struct icmpv6_msg msg;
455 	struct ipcm6_cookie ipc6;
456 	int iif = 0;
457 	int addr_type = 0;
458 	int len;
459 	u32 mark;
460 
461 	if ((u8 *)hdr < skb->head ||
462 	    (skb_network_header(skb) + sizeof(*hdr)) > skb_tail_pointer(skb))
463 		return;
464 
465 	if (!skb->dev)
466 		return;
467 	net = dev_net(skb->dev);
468 	mark = IP6_REPLY_MARK(net, skb->mark);
469 	/*
470 	 *	Make sure we respect the rules
471 	 *	i.e. RFC 1885 2.4(e)
472 	 *	Rule (e.1) is enforced by not using icmp6_send
473 	 *	in any code that processes icmp errors.
474 	 */
475 	addr_type = ipv6_addr_type(&hdr->daddr);
476 
477 	if (ipv6_chk_addr(net, &hdr->daddr, skb->dev, 0) ||
478 	    ipv6_chk_acast_addr_src(net, skb->dev, &hdr->daddr))
479 		saddr = &hdr->daddr;
480 
481 	/*
482 	 *	Dest addr check
483 	 */
484 
485 	if (addr_type & IPV6_ADDR_MULTICAST || skb->pkt_type != PACKET_HOST) {
486 		if (type != ICMPV6_PKT_TOOBIG &&
487 		    !(type == ICMPV6_PARAMPROB &&
488 		      code == ICMPV6_UNK_OPTION &&
489 		      (opt_unrec(skb, info))))
490 			return;
491 
492 		saddr = NULL;
493 	}
494 
495 	addr_type = ipv6_addr_type(&hdr->saddr);
496 
497 	/*
498 	 *	Source addr check
499 	 */
500 
501 	if (__ipv6_addr_needs_scope_id(addr_type)) {
502 		iif = icmp6_iif(skb);
503 	} else {
504 		dst = skb_dst(skb);
505 		iif = l3mdev_master_ifindex(dst ? dst->dev : skb->dev);
506 	}
507 
508 	/*
509 	 *	Must not send error if the source does not uniquely
510 	 *	identify a single node (RFC2463 Section 2.4).
511 	 *	We check unspecified / multicast addresses here,
512 	 *	and anycast addresses will be checked later.
513 	 */
514 	if ((addr_type == IPV6_ADDR_ANY) || (addr_type & IPV6_ADDR_MULTICAST)) {
515 		net_dbg_ratelimited("icmp6_send: addr_any/mcast source [%pI6c > %pI6c]\n",
516 				    &hdr->saddr, &hdr->daddr);
517 		return;
518 	}
519 
520 	/*
521 	 *	Never answer to a ICMP packet.
522 	 */
523 	if (is_ineligible(skb)) {
524 		net_dbg_ratelimited("icmp6_send: no reply to icmp error [%pI6c > %pI6c]\n",
525 				    &hdr->saddr, &hdr->daddr);
526 		return;
527 	}
528 
529 	/* Needed by both icmp_global_allow and icmpv6_xmit_lock */
530 	local_bh_disable();
531 
532 	/* Check global sysctl_icmp_msgs_per_sec ratelimit */
533 	if (!(skb->dev->flags & IFF_LOOPBACK) && !icmpv6_global_allow(net, type))
534 		goto out_bh_enable;
535 
536 	mip6_addr_swap(skb);
537 
538 	sk = icmpv6_xmit_lock(net);
539 	if (!sk)
540 		goto out_bh_enable;
541 
542 	memset(&fl6, 0, sizeof(fl6));
543 	fl6.flowi6_proto = IPPROTO_ICMPV6;
544 	fl6.daddr = hdr->saddr;
545 	if (force_saddr)
546 		saddr = force_saddr;
547 	if (saddr) {
548 		fl6.saddr = *saddr;
549 	} else if (!icmpv6_rt_has_prefsrc(sk, type, &fl6)) {
550 		/* select a more meaningful saddr from input if */
551 		struct net_device *in_netdev;
552 
553 		in_netdev = dev_get_by_index(net, IP6CB(skb)->iif);
554 		if (in_netdev) {
555 			ipv6_dev_get_saddr(net, in_netdev, &fl6.daddr,
556 					   inet6_sk(sk)->srcprefs,
557 					   &fl6.saddr);
558 			dev_put(in_netdev);
559 		}
560 	}
561 	fl6.flowi6_mark = mark;
562 	fl6.flowi6_oif = iif;
563 	fl6.fl6_icmp_type = type;
564 	fl6.fl6_icmp_code = code;
565 	fl6.flowi6_uid = sock_net_uid(net, NULL);
566 	fl6.mp_hash = rt6_multipath_hash(net, &fl6, skb, NULL);
567 	security_skb_classify_flow(skb, flowi6_to_flowi(&fl6));
568 
569 	np = inet6_sk(sk);
570 
571 	if (!icmpv6_xrlim_allow(sk, type, &fl6))
572 		goto out;
573 
574 	tmp_hdr.icmp6_type = type;
575 	tmp_hdr.icmp6_code = code;
576 	tmp_hdr.icmp6_cksum = 0;
577 	tmp_hdr.icmp6_pointer = htonl(info);
578 
579 	if (!fl6.flowi6_oif && ipv6_addr_is_multicast(&fl6.daddr))
580 		fl6.flowi6_oif = np->mcast_oif;
581 	else if (!fl6.flowi6_oif)
582 		fl6.flowi6_oif = np->ucast_oif;
583 
584 	ipcm6_init_sk(&ipc6, np);
585 	ipc6.sockc.mark = mark;
586 	fl6.flowlabel = ip6_make_flowinfo(ipc6.tclass, fl6.flowlabel);
587 
588 	dst = icmpv6_route_lookup(net, skb, sk, &fl6);
589 	if (IS_ERR(dst))
590 		goto out;
591 
592 	ipc6.hlimit = ip6_sk_dst_hoplimit(np, &fl6, dst);
593 
594 	msg.skb = skb;
595 	msg.offset = skb_network_offset(skb);
596 	msg.type = type;
597 
598 	len = skb->len - msg.offset;
599 	len = min_t(unsigned int, len, IPV6_MIN_MTU - sizeof(struct ipv6hdr) - sizeof(struct icmp6hdr));
600 	if (len < 0) {
601 		net_dbg_ratelimited("icmp: len problem [%pI6c > %pI6c]\n",
602 				    &hdr->saddr, &hdr->daddr);
603 		goto out_dst_release;
604 	}
605 
606 	rcu_read_lock();
607 	idev = __in6_dev_get(skb->dev);
608 
609 	if (ip6_append_data(sk, icmpv6_getfrag, &msg,
610 			    len + sizeof(struct icmp6hdr),
611 			    sizeof(struct icmp6hdr),
612 			    &ipc6, &fl6, (struct rt6_info *)dst,
613 			    MSG_DONTWAIT)) {
614 		ICMP6_INC_STATS(net, idev, ICMP6_MIB_OUTERRORS);
615 		ip6_flush_pending_frames(sk);
616 	} else {
617 		icmpv6_push_pending_frames(sk, &fl6, &tmp_hdr,
618 					   len + sizeof(struct icmp6hdr));
619 	}
620 	rcu_read_unlock();
621 out_dst_release:
622 	dst_release(dst);
623 out:
624 	icmpv6_xmit_unlock(sk);
625 out_bh_enable:
626 	local_bh_enable();
627 }
628 EXPORT_SYMBOL(icmp6_send);
629 
630 /* Slightly more convenient version of icmp6_send.
631  */
632 void icmpv6_param_prob(struct sk_buff *skb, u8 code, int pos)
633 {
634 	icmp6_send(skb, ICMPV6_PARAMPROB, code, pos, NULL);
635 	kfree_skb(skb);
636 }
637 
638 /* Generate icmpv6 with type/code ICMPV6_DEST_UNREACH/ICMPV6_ADDR_UNREACH
639  * if sufficient data bytes are available
640  * @nhs is the size of the tunnel header(s) :
641  *  Either an IPv4 header for SIT encap
642  *         an IPv4 header + GRE header for GRE encap
643  */
644 int ip6_err_gen_icmpv6_unreach(struct sk_buff *skb, int nhs, int type,
645 			       unsigned int data_len)
646 {
647 	struct in6_addr temp_saddr;
648 	struct rt6_info *rt;
649 	struct sk_buff *skb2;
650 	u32 info = 0;
651 
652 	if (!pskb_may_pull(skb, nhs + sizeof(struct ipv6hdr) + 8))
653 		return 1;
654 
655 	/* RFC 4884 (partial) support for ICMP extensions */
656 	if (data_len < 128 || (data_len & 7) || skb->len < data_len)
657 		data_len = 0;
658 
659 	skb2 = data_len ? skb_copy(skb, GFP_ATOMIC) : skb_clone(skb, GFP_ATOMIC);
660 
661 	if (!skb2)
662 		return 1;
663 
664 	skb_dst_drop(skb2);
665 	skb_pull(skb2, nhs);
666 	skb_reset_network_header(skb2);
667 
668 	rt = rt6_lookup(dev_net(skb->dev), &ipv6_hdr(skb2)->saddr, NULL, 0,
669 			skb, 0);
670 
671 	if (rt && rt->dst.dev)
672 		skb2->dev = rt->dst.dev;
673 
674 	ipv6_addr_set_v4mapped(ip_hdr(skb)->saddr, &temp_saddr);
675 
676 	if (data_len) {
677 		/* RFC 4884 (partial) support :
678 		 * insert 0 padding at the end, before the extensions
679 		 */
680 		__skb_push(skb2, nhs);
681 		skb_reset_network_header(skb2);
682 		memmove(skb2->data, skb2->data + nhs, data_len - nhs);
683 		memset(skb2->data + data_len - nhs, 0, nhs);
684 		/* RFC 4884 4.5 : Length is measured in 64-bit words,
685 		 * and stored in reserved[0]
686 		 */
687 		info = (data_len/8) << 24;
688 	}
689 	if (type == ICMP_TIME_EXCEEDED)
690 		icmp6_send(skb2, ICMPV6_TIME_EXCEED, ICMPV6_EXC_HOPLIMIT,
691 			   info, &temp_saddr);
692 	else
693 		icmp6_send(skb2, ICMPV6_DEST_UNREACH, ICMPV6_ADDR_UNREACH,
694 			   info, &temp_saddr);
695 	if (rt)
696 		ip6_rt_put(rt);
697 
698 	kfree_skb(skb2);
699 
700 	return 0;
701 }
702 EXPORT_SYMBOL(ip6_err_gen_icmpv6_unreach);
703 
704 static void icmpv6_echo_reply(struct sk_buff *skb)
705 {
706 	struct net *net = dev_net(skb->dev);
707 	struct sock *sk;
708 	struct inet6_dev *idev;
709 	struct ipv6_pinfo *np;
710 	const struct in6_addr *saddr = NULL;
711 	struct icmp6hdr *icmph = icmp6_hdr(skb);
712 	struct icmp6hdr tmp_hdr;
713 	struct flowi6 fl6;
714 	struct icmpv6_msg msg;
715 	struct dst_entry *dst;
716 	struct ipcm6_cookie ipc6;
717 	u32 mark = IP6_REPLY_MARK(net, skb->mark);
718 	bool acast;
719 
720 	if (ipv6_addr_is_multicast(&ipv6_hdr(skb)->daddr) &&
721 	    net->ipv6.sysctl.icmpv6_echo_ignore_multicast)
722 		return;
723 
724 	saddr = &ipv6_hdr(skb)->daddr;
725 
726 	acast = ipv6_anycast_destination(skb_dst(skb), saddr);
727 	if (acast && net->ipv6.sysctl.icmpv6_echo_ignore_anycast)
728 		return;
729 
730 	if (!ipv6_unicast_destination(skb) &&
731 	    !(net->ipv6.sysctl.anycast_src_echo_reply && acast))
732 		saddr = NULL;
733 
734 	memcpy(&tmp_hdr, icmph, sizeof(tmp_hdr));
735 	tmp_hdr.icmp6_type = ICMPV6_ECHO_REPLY;
736 
737 	memset(&fl6, 0, sizeof(fl6));
738 	if (net->ipv6.sysctl.flowlabel_reflect & FLOWLABEL_REFLECT_ICMPV6_ECHO_REPLIES)
739 		fl6.flowlabel = ip6_flowlabel(ipv6_hdr(skb));
740 
741 	fl6.flowi6_proto = IPPROTO_ICMPV6;
742 	fl6.daddr = ipv6_hdr(skb)->saddr;
743 	if (saddr)
744 		fl6.saddr = *saddr;
745 	fl6.flowi6_oif = icmp6_iif(skb);
746 	fl6.fl6_icmp_type = ICMPV6_ECHO_REPLY;
747 	fl6.flowi6_mark = mark;
748 	fl6.flowi6_uid = sock_net_uid(net, NULL);
749 	security_skb_classify_flow(skb, flowi6_to_flowi(&fl6));
750 
751 	local_bh_disable();
752 	sk = icmpv6_xmit_lock(net);
753 	if (!sk)
754 		goto out_bh_enable;
755 	np = inet6_sk(sk);
756 
757 	if (!fl6.flowi6_oif && ipv6_addr_is_multicast(&fl6.daddr))
758 		fl6.flowi6_oif = np->mcast_oif;
759 	else if (!fl6.flowi6_oif)
760 		fl6.flowi6_oif = np->ucast_oif;
761 
762 	if (ip6_dst_lookup(net, sk, &dst, &fl6))
763 		goto out;
764 	dst = xfrm_lookup(net, dst, flowi6_to_flowi(&fl6), sk, 0);
765 	if (IS_ERR(dst))
766 		goto out;
767 
768 	/* Check the ratelimit */
769 	if ((!(skb->dev->flags & IFF_LOOPBACK) && !icmpv6_global_allow(net, ICMPV6_ECHO_REPLY)) ||
770 	    !icmpv6_xrlim_allow(sk, ICMPV6_ECHO_REPLY, &fl6))
771 		goto out_dst_release;
772 
773 	idev = __in6_dev_get(skb->dev);
774 
775 	msg.skb = skb;
776 	msg.offset = 0;
777 	msg.type = ICMPV6_ECHO_REPLY;
778 
779 	ipcm6_init_sk(&ipc6, np);
780 	ipc6.hlimit = ip6_sk_dst_hoplimit(np, &fl6, dst);
781 	ipc6.tclass = ipv6_get_dsfield(ipv6_hdr(skb));
782 	ipc6.sockc.mark = mark;
783 
784 	if (ip6_append_data(sk, icmpv6_getfrag, &msg,
785 			    skb->len + sizeof(struct icmp6hdr),
786 			    sizeof(struct icmp6hdr), &ipc6, &fl6,
787 			    (struct rt6_info *)dst, MSG_DONTWAIT)) {
788 		__ICMP6_INC_STATS(net, idev, ICMP6_MIB_OUTERRORS);
789 		ip6_flush_pending_frames(sk);
790 	} else {
791 		icmpv6_push_pending_frames(sk, &fl6, &tmp_hdr,
792 					   skb->len + sizeof(struct icmp6hdr));
793 	}
794 out_dst_release:
795 	dst_release(dst);
796 out:
797 	icmpv6_xmit_unlock(sk);
798 out_bh_enable:
799 	local_bh_enable();
800 }
801 
802 void icmpv6_notify(struct sk_buff *skb, u8 type, u8 code, __be32 info)
803 {
804 	const struct inet6_protocol *ipprot;
805 	int inner_offset;
806 	__be16 frag_off;
807 	u8 nexthdr;
808 	struct net *net = dev_net(skb->dev);
809 
810 	if (!pskb_may_pull(skb, sizeof(struct ipv6hdr)))
811 		goto out;
812 
813 	nexthdr = ((struct ipv6hdr *)skb->data)->nexthdr;
814 	if (ipv6_ext_hdr(nexthdr)) {
815 		/* now skip over extension headers */
816 		inner_offset = ipv6_skip_exthdr(skb, sizeof(struct ipv6hdr),
817 						&nexthdr, &frag_off);
818 		if (inner_offset < 0)
819 			goto out;
820 	} else {
821 		inner_offset = sizeof(struct ipv6hdr);
822 	}
823 
824 	/* Checkin header including 8 bytes of inner protocol header. */
825 	if (!pskb_may_pull(skb, inner_offset+8))
826 		goto out;
827 
828 	/* BUGGG_FUTURE: we should try to parse exthdrs in this packet.
829 	   Without this we will not able f.e. to make source routed
830 	   pmtu discovery.
831 	   Corresponding argument (opt) to notifiers is already added.
832 	   --ANK (980726)
833 	 */
834 
835 	ipprot = rcu_dereference(inet6_protos[nexthdr]);
836 	if (ipprot && ipprot->err_handler)
837 		ipprot->err_handler(skb, NULL, type, code, inner_offset, info);
838 
839 	raw6_icmp_error(skb, nexthdr, type, code, inner_offset, info);
840 	return;
841 
842 out:
843 	__ICMP6_INC_STATS(net, __in6_dev_get(skb->dev), ICMP6_MIB_INERRORS);
844 }
845 
846 /*
847  *	Handle icmp messages
848  */
849 
850 static int icmpv6_rcv(struct sk_buff *skb)
851 {
852 	struct net *net = dev_net(skb->dev);
853 	struct net_device *dev = icmp6_dev(skb);
854 	struct inet6_dev *idev = __in6_dev_get(dev);
855 	const struct in6_addr *saddr, *daddr;
856 	struct icmp6hdr *hdr;
857 	u8 type;
858 	bool success = false;
859 
860 	if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb)) {
861 		struct sec_path *sp = skb_sec_path(skb);
862 		int nh;
863 
864 		if (!(sp && sp->xvec[sp->len - 1]->props.flags &
865 				 XFRM_STATE_ICMP))
866 			goto drop_no_count;
867 
868 		if (!pskb_may_pull(skb, sizeof(*hdr) + sizeof(struct ipv6hdr)))
869 			goto drop_no_count;
870 
871 		nh = skb_network_offset(skb);
872 		skb_set_network_header(skb, sizeof(*hdr));
873 
874 		if (!xfrm6_policy_check_reverse(NULL, XFRM_POLICY_IN, skb))
875 			goto drop_no_count;
876 
877 		skb_set_network_header(skb, nh);
878 	}
879 
880 	__ICMP6_INC_STATS(dev_net(dev), idev, ICMP6_MIB_INMSGS);
881 
882 	saddr = &ipv6_hdr(skb)->saddr;
883 	daddr = &ipv6_hdr(skb)->daddr;
884 
885 	if (skb_checksum_validate(skb, IPPROTO_ICMPV6, ip6_compute_pseudo)) {
886 		net_dbg_ratelimited("ICMPv6 checksum failed [%pI6c > %pI6c]\n",
887 				    saddr, daddr);
888 		goto csum_error;
889 	}
890 
891 	if (!pskb_pull(skb, sizeof(*hdr)))
892 		goto discard_it;
893 
894 	hdr = icmp6_hdr(skb);
895 
896 	type = hdr->icmp6_type;
897 
898 	ICMP6MSGIN_INC_STATS(dev_net(dev), idev, type);
899 
900 	switch (type) {
901 	case ICMPV6_ECHO_REQUEST:
902 		if (!net->ipv6.sysctl.icmpv6_echo_ignore_all)
903 			icmpv6_echo_reply(skb);
904 		break;
905 
906 	case ICMPV6_ECHO_REPLY:
907 		success = ping_rcv(skb);
908 		break;
909 
910 	case ICMPV6_PKT_TOOBIG:
911 		/* BUGGG_FUTURE: if packet contains rthdr, we cannot update
912 		   standard destination cache. Seems, only "advanced"
913 		   destination cache will allow to solve this problem
914 		   --ANK (980726)
915 		 */
916 		if (!pskb_may_pull(skb, sizeof(struct ipv6hdr)))
917 			goto discard_it;
918 		hdr = icmp6_hdr(skb);
919 
920 		/* to notify */
921 		fallthrough;
922 	case ICMPV6_DEST_UNREACH:
923 	case ICMPV6_TIME_EXCEED:
924 	case ICMPV6_PARAMPROB:
925 		icmpv6_notify(skb, type, hdr->icmp6_code, hdr->icmp6_mtu);
926 		break;
927 
928 	case NDISC_ROUTER_SOLICITATION:
929 	case NDISC_ROUTER_ADVERTISEMENT:
930 	case NDISC_NEIGHBOUR_SOLICITATION:
931 	case NDISC_NEIGHBOUR_ADVERTISEMENT:
932 	case NDISC_REDIRECT:
933 		ndisc_rcv(skb);
934 		break;
935 
936 	case ICMPV6_MGM_QUERY:
937 		igmp6_event_query(skb);
938 		break;
939 
940 	case ICMPV6_MGM_REPORT:
941 		igmp6_event_report(skb);
942 		break;
943 
944 	case ICMPV6_MGM_REDUCTION:
945 	case ICMPV6_NI_QUERY:
946 	case ICMPV6_NI_REPLY:
947 	case ICMPV6_MLD2_REPORT:
948 	case ICMPV6_DHAAD_REQUEST:
949 	case ICMPV6_DHAAD_REPLY:
950 	case ICMPV6_MOBILE_PREFIX_SOL:
951 	case ICMPV6_MOBILE_PREFIX_ADV:
952 		break;
953 
954 	default:
955 		/* informational */
956 		if (type & ICMPV6_INFOMSG_MASK)
957 			break;
958 
959 		net_dbg_ratelimited("icmpv6: msg of unknown type [%pI6c > %pI6c]\n",
960 				    saddr, daddr);
961 
962 		/*
963 		 * error of unknown type.
964 		 * must pass to upper level
965 		 */
966 
967 		icmpv6_notify(skb, type, hdr->icmp6_code, hdr->icmp6_mtu);
968 	}
969 
970 	/* until the v6 path can be better sorted assume failure and
971 	 * preserve the status quo behaviour for the rest of the paths to here
972 	 */
973 	if (success)
974 		consume_skb(skb);
975 	else
976 		kfree_skb(skb);
977 
978 	return 0;
979 
980 csum_error:
981 	__ICMP6_INC_STATS(dev_net(dev), idev, ICMP6_MIB_CSUMERRORS);
982 discard_it:
983 	__ICMP6_INC_STATS(dev_net(dev), idev, ICMP6_MIB_INERRORS);
984 drop_no_count:
985 	kfree_skb(skb);
986 	return 0;
987 }
988 
989 void icmpv6_flow_init(struct sock *sk, struct flowi6 *fl6,
990 		      u8 type,
991 		      const struct in6_addr *saddr,
992 		      const struct in6_addr *daddr,
993 		      int oif)
994 {
995 	memset(fl6, 0, sizeof(*fl6));
996 	fl6->saddr = *saddr;
997 	fl6->daddr = *daddr;
998 	fl6->flowi6_proto	= IPPROTO_ICMPV6;
999 	fl6->fl6_icmp_type	= type;
1000 	fl6->fl6_icmp_code	= 0;
1001 	fl6->flowi6_oif		= oif;
1002 	security_sk_classify_flow(sk, flowi6_to_flowi(fl6));
1003 }
1004 
1005 static void __net_exit icmpv6_sk_exit(struct net *net)
1006 {
1007 	int i;
1008 
1009 	for_each_possible_cpu(i)
1010 		inet_ctl_sock_destroy(*per_cpu_ptr(net->ipv6.icmp_sk, i));
1011 	free_percpu(net->ipv6.icmp_sk);
1012 }
1013 
1014 static int __net_init icmpv6_sk_init(struct net *net)
1015 {
1016 	struct sock *sk;
1017 	int err, i;
1018 
1019 	net->ipv6.icmp_sk = alloc_percpu(struct sock *);
1020 	if (!net->ipv6.icmp_sk)
1021 		return -ENOMEM;
1022 
1023 	for_each_possible_cpu(i) {
1024 		err = inet_ctl_sock_create(&sk, PF_INET6,
1025 					   SOCK_RAW, IPPROTO_ICMPV6, net);
1026 		if (err < 0) {
1027 			pr_err("Failed to initialize the ICMP6 control socket (err %d)\n",
1028 			       err);
1029 			goto fail;
1030 		}
1031 
1032 		*per_cpu_ptr(net->ipv6.icmp_sk, i) = sk;
1033 
1034 		/* Enough space for 2 64K ICMP packets, including
1035 		 * sk_buff struct overhead.
1036 		 */
1037 		sk->sk_sndbuf = 2 * SKB_TRUESIZE(64 * 1024);
1038 	}
1039 	return 0;
1040 
1041  fail:
1042 	icmpv6_sk_exit(net);
1043 	return err;
1044 }
1045 
1046 static struct pernet_operations icmpv6_sk_ops = {
1047 	.init = icmpv6_sk_init,
1048 	.exit = icmpv6_sk_exit,
1049 };
1050 
1051 int __init icmpv6_init(void)
1052 {
1053 	int err;
1054 
1055 	err = register_pernet_subsys(&icmpv6_sk_ops);
1056 	if (err < 0)
1057 		return err;
1058 
1059 	err = -EAGAIN;
1060 	if (inet6_add_protocol(&icmpv6_protocol, IPPROTO_ICMPV6) < 0)
1061 		goto fail;
1062 
1063 	err = inet6_register_icmp_sender(icmp6_send);
1064 	if (err)
1065 		goto sender_reg_err;
1066 	return 0;
1067 
1068 sender_reg_err:
1069 	inet6_del_protocol(&icmpv6_protocol, IPPROTO_ICMPV6);
1070 fail:
1071 	pr_err("Failed to register ICMP6 protocol\n");
1072 	unregister_pernet_subsys(&icmpv6_sk_ops);
1073 	return err;
1074 }
1075 
1076 void icmpv6_cleanup(void)
1077 {
1078 	inet6_unregister_icmp_sender(icmp6_send);
1079 	unregister_pernet_subsys(&icmpv6_sk_ops);
1080 	inet6_del_protocol(&icmpv6_protocol, IPPROTO_ICMPV6);
1081 }
1082 
1083 
1084 static const struct icmp6_err {
1085 	int err;
1086 	int fatal;
1087 } tab_unreach[] = {
1088 	{	/* NOROUTE */
1089 		.err	= ENETUNREACH,
1090 		.fatal	= 0,
1091 	},
1092 	{	/* ADM_PROHIBITED */
1093 		.err	= EACCES,
1094 		.fatal	= 1,
1095 	},
1096 	{	/* Was NOT_NEIGHBOUR, now reserved */
1097 		.err	= EHOSTUNREACH,
1098 		.fatal	= 0,
1099 	},
1100 	{	/* ADDR_UNREACH	*/
1101 		.err	= EHOSTUNREACH,
1102 		.fatal	= 0,
1103 	},
1104 	{	/* PORT_UNREACH	*/
1105 		.err	= ECONNREFUSED,
1106 		.fatal	= 1,
1107 	},
1108 	{	/* POLICY_FAIL */
1109 		.err	= EACCES,
1110 		.fatal	= 1,
1111 	},
1112 	{	/* REJECT_ROUTE	*/
1113 		.err	= EACCES,
1114 		.fatal	= 1,
1115 	},
1116 };
1117 
1118 int icmpv6_err_convert(u8 type, u8 code, int *err)
1119 {
1120 	int fatal = 0;
1121 
1122 	*err = EPROTO;
1123 
1124 	switch (type) {
1125 	case ICMPV6_DEST_UNREACH:
1126 		fatal = 1;
1127 		if (code < ARRAY_SIZE(tab_unreach)) {
1128 			*err  = tab_unreach[code].err;
1129 			fatal = tab_unreach[code].fatal;
1130 		}
1131 		break;
1132 
1133 	case ICMPV6_PKT_TOOBIG:
1134 		*err = EMSGSIZE;
1135 		break;
1136 
1137 	case ICMPV6_PARAMPROB:
1138 		*err = EPROTO;
1139 		fatal = 1;
1140 		break;
1141 
1142 	case ICMPV6_TIME_EXCEED:
1143 		*err = EHOSTUNREACH;
1144 		break;
1145 	}
1146 
1147 	return fatal;
1148 }
1149 EXPORT_SYMBOL(icmpv6_err_convert);
1150 
1151 #ifdef CONFIG_SYSCTL
1152 static struct ctl_table ipv6_icmp_table_template[] = {
1153 	{
1154 		.procname	= "ratelimit",
1155 		.data		= &init_net.ipv6.sysctl.icmpv6_time,
1156 		.maxlen		= sizeof(int),
1157 		.mode		= 0644,
1158 		.proc_handler	= proc_dointvec_ms_jiffies,
1159 	},
1160 	{
1161 		.procname	= "echo_ignore_all",
1162 		.data		= &init_net.ipv6.sysctl.icmpv6_echo_ignore_all,
1163 		.maxlen		= sizeof(int),
1164 		.mode		= 0644,
1165 		.proc_handler = proc_dointvec,
1166 	},
1167 	{
1168 		.procname	= "echo_ignore_multicast",
1169 		.data		= &init_net.ipv6.sysctl.icmpv6_echo_ignore_multicast,
1170 		.maxlen		= sizeof(int),
1171 		.mode		= 0644,
1172 		.proc_handler = proc_dointvec,
1173 	},
1174 	{
1175 		.procname	= "echo_ignore_anycast",
1176 		.data		= &init_net.ipv6.sysctl.icmpv6_echo_ignore_anycast,
1177 		.maxlen		= sizeof(int),
1178 		.mode		= 0644,
1179 		.proc_handler = proc_dointvec,
1180 	},
1181 	{
1182 		.procname	= "ratemask",
1183 		.data		= &init_net.ipv6.sysctl.icmpv6_ratemask_ptr,
1184 		.maxlen		= ICMPV6_MSG_MAX + 1,
1185 		.mode		= 0644,
1186 		.proc_handler = proc_do_large_bitmap,
1187 	},
1188 	{ },
1189 };
1190 
1191 struct ctl_table * __net_init ipv6_icmp_sysctl_init(struct net *net)
1192 {
1193 	struct ctl_table *table;
1194 
1195 	table = kmemdup(ipv6_icmp_table_template,
1196 			sizeof(ipv6_icmp_table_template),
1197 			GFP_KERNEL);
1198 
1199 	if (table) {
1200 		table[0].data = &net->ipv6.sysctl.icmpv6_time;
1201 		table[1].data = &net->ipv6.sysctl.icmpv6_echo_ignore_all;
1202 		table[2].data = &net->ipv6.sysctl.icmpv6_echo_ignore_multicast;
1203 		table[3].data = &net->ipv6.sysctl.icmpv6_echo_ignore_anycast;
1204 		table[4].data = &net->ipv6.sysctl.icmpv6_ratemask_ptr;
1205 	}
1206 	return table;
1207 }
1208 #endif
1209