xref: /linux/net/ipv6/icmp.c (revision 1fc31357ad194fb98691f3d122bcd47e59239e83)
1 /*
2  *	Internet Control Message Protocol (ICMPv6)
3  *	Linux INET6 implementation
4  *
5  *	Authors:
6  *	Pedro Roque		<roque@di.fc.ul.pt>
7  *
8  *	Based on net/ipv4/icmp.c
9  *
10  *	RFC 1885
11  *
12  *	This program is free software; you can redistribute it and/or
13  *      modify it under the terms of the GNU General Public License
14  *      as published by the Free Software Foundation; either version
15  *      2 of the License, or (at your option) any later version.
16  */
17 
18 /*
19  *	Changes:
20  *
21  *	Andi Kleen		:	exception handling
22  *	Andi Kleen			add rate limits. never reply to a icmp.
23  *					add more length checks and other fixes.
24  *	yoshfuji		:	ensure to sent parameter problem for
25  *					fragments.
26  *	YOSHIFUJI Hideaki @USAGI:	added sysctl for icmp rate limit.
27  *	Randy Dunlap and
28  *	YOSHIFUJI Hideaki @USAGI:	Per-interface statistics support
29  *	Kazunori MIYAZAWA @USAGI:       change output process to use ip6_append_data
30  */
31 
32 #define pr_fmt(fmt) "IPv6: " fmt
33 
34 #include <linux/module.h>
35 #include <linux/errno.h>
36 #include <linux/types.h>
37 #include <linux/socket.h>
38 #include <linux/in.h>
39 #include <linux/kernel.h>
40 #include <linux/sockios.h>
41 #include <linux/net.h>
42 #include <linux/skbuff.h>
43 #include <linux/init.h>
44 #include <linux/netfilter.h>
45 #include <linux/slab.h>
46 
47 #ifdef CONFIG_SYSCTL
48 #include <linux/sysctl.h>
49 #endif
50 
51 #include <linux/inet.h>
52 #include <linux/netdevice.h>
53 #include <linux/icmpv6.h>
54 
55 #include <net/ip.h>
56 #include <net/sock.h>
57 
58 #include <net/ipv6.h>
59 #include <net/ip6_checksum.h>
60 #include <net/ping.h>
61 #include <net/protocol.h>
62 #include <net/raw.h>
63 #include <net/rawv6.h>
64 #include <net/transp_v6.h>
65 #include <net/ip6_route.h>
66 #include <net/addrconf.h>
67 #include <net/icmp.h>
68 #include <net/xfrm.h>
69 #include <net/inet_common.h>
70 #include <net/dsfield.h>
71 #include <net/l3mdev.h>
72 
73 #include <asm/uaccess.h>
74 
75 /*
76  *	The ICMP socket(s). This is the most convenient way to flow control
77  *	our ICMP output as well as maintain a clean interface throughout
78  *	all layers. All Socketless IP sends will soon be gone.
79  *
80  *	On SMP we have one ICMP socket per-cpu.
81  */
82 static inline struct sock *icmpv6_sk(struct net *net)
83 {
84 	return net->ipv6.icmp_sk[smp_processor_id()];
85 }
86 
87 static void icmpv6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
88 		       u8 type, u8 code, int offset, __be32 info)
89 {
90 	/* icmpv6_notify checks 8 bytes can be pulled, icmp6hdr is 8 bytes */
91 	struct icmp6hdr *icmp6 = (struct icmp6hdr *) (skb->data + offset);
92 	struct net *net = dev_net(skb->dev);
93 
94 	if (type == ICMPV6_PKT_TOOBIG)
95 		ip6_update_pmtu(skb, net, info, 0, 0, sock_net_uid(net, NULL));
96 	else if (type == NDISC_REDIRECT)
97 		ip6_redirect(skb, net, skb->dev->ifindex, 0,
98 			     sock_net_uid(net, NULL));
99 
100 	if (!(type & ICMPV6_INFOMSG_MASK))
101 		if (icmp6->icmp6_type == ICMPV6_ECHO_REQUEST)
102 			ping_err(skb, offset, ntohl(info));
103 }
104 
105 static int icmpv6_rcv(struct sk_buff *skb);
106 
107 static const struct inet6_protocol icmpv6_protocol = {
108 	.handler	=	icmpv6_rcv,
109 	.err_handler	=	icmpv6_err,
110 	.flags		=	INET6_PROTO_NOPOLICY|INET6_PROTO_FINAL,
111 };
112 
113 static __inline__ struct sock *icmpv6_xmit_lock(struct net *net)
114 {
115 	struct sock *sk;
116 
117 	local_bh_disable();
118 
119 	sk = icmpv6_sk(net);
120 	if (unlikely(!spin_trylock(&sk->sk_lock.slock))) {
121 		/* This can happen if the output path (f.e. SIT or
122 		 * ip6ip6 tunnel) signals dst_link_failure() for an
123 		 * outgoing ICMP6 packet.
124 		 */
125 		local_bh_enable();
126 		return NULL;
127 	}
128 	return sk;
129 }
130 
131 static __inline__ void icmpv6_xmit_unlock(struct sock *sk)
132 {
133 	spin_unlock_bh(&sk->sk_lock.slock);
134 }
135 
136 /*
137  * Figure out, may we reply to this packet with icmp error.
138  *
139  * We do not reply, if:
140  *	- it was icmp error message.
141  *	- it is truncated, so that it is known, that protocol is ICMPV6
142  *	  (i.e. in the middle of some exthdr)
143  *
144  *	--ANK (980726)
145  */
146 
147 static bool is_ineligible(const struct sk_buff *skb)
148 {
149 	int ptr = (u8 *)(ipv6_hdr(skb) + 1) - skb->data;
150 	int len = skb->len - ptr;
151 	__u8 nexthdr = ipv6_hdr(skb)->nexthdr;
152 	__be16 frag_off;
153 
154 	if (len < 0)
155 		return true;
156 
157 	ptr = ipv6_skip_exthdr(skb, ptr, &nexthdr, &frag_off);
158 	if (ptr < 0)
159 		return false;
160 	if (nexthdr == IPPROTO_ICMPV6) {
161 		u8 _type, *tp;
162 		tp = skb_header_pointer(skb,
163 			ptr+offsetof(struct icmp6hdr, icmp6_type),
164 			sizeof(_type), &_type);
165 		if (!tp || !(*tp & ICMPV6_INFOMSG_MASK))
166 			return true;
167 	}
168 	return false;
169 }
170 
171 /*
172  * Check the ICMP output rate limit
173  */
174 static bool icmpv6_xrlim_allow(struct sock *sk, u8 type,
175 			       struct flowi6 *fl6)
176 {
177 	struct net *net = sock_net(sk);
178 	struct dst_entry *dst;
179 	bool res = false;
180 
181 	/* Informational messages are not limited. */
182 	if (type & ICMPV6_INFOMSG_MASK)
183 		return true;
184 
185 	/* Do not limit pmtu discovery, it would break it. */
186 	if (type == ICMPV6_PKT_TOOBIG)
187 		return true;
188 
189 	/*
190 	 * Look up the output route.
191 	 * XXX: perhaps the expire for routing entries cloned by
192 	 * this lookup should be more aggressive (not longer than timeout).
193 	 */
194 	dst = ip6_route_output(net, sk, fl6);
195 	if (dst->error) {
196 		IP6_INC_STATS(net, ip6_dst_idev(dst),
197 			      IPSTATS_MIB_OUTNOROUTES);
198 	} else if (dst->dev && (dst->dev->flags&IFF_LOOPBACK)) {
199 		res = true;
200 	} else {
201 		struct rt6_info *rt = (struct rt6_info *)dst;
202 		int tmo = net->ipv6.sysctl.icmpv6_time;
203 
204 		/* Give more bandwidth to wider prefixes. */
205 		if (rt->rt6i_dst.plen < 128)
206 			tmo >>= ((128 - rt->rt6i_dst.plen)>>5);
207 
208 		if (icmp_global_allow()) {
209 			struct inet_peer *peer;
210 
211 			peer = inet_getpeer_v6(net->ipv6.peers,
212 					       &fl6->daddr, 1);
213 			res = inet_peer_xrlim_allow(peer, tmo);
214 			if (peer)
215 				inet_putpeer(peer);
216 		}
217 	}
218 	dst_release(dst);
219 	return res;
220 }
221 
222 /*
223  *	an inline helper for the "simple" if statement below
224  *	checks if parameter problem report is caused by an
225  *	unrecognized IPv6 option that has the Option Type
226  *	highest-order two bits set to 10
227  */
228 
229 static bool opt_unrec(struct sk_buff *skb, __u32 offset)
230 {
231 	u8 _optval, *op;
232 
233 	offset += skb_network_offset(skb);
234 	op = skb_header_pointer(skb, offset, sizeof(_optval), &_optval);
235 	if (!op)
236 		return true;
237 	return (*op & 0xC0) == 0x80;
238 }
239 
240 int icmpv6_push_pending_frames(struct sock *sk, struct flowi6 *fl6,
241 			       struct icmp6hdr *thdr, int len)
242 {
243 	struct sk_buff *skb;
244 	struct icmp6hdr *icmp6h;
245 	int err = 0;
246 
247 	skb = skb_peek(&sk->sk_write_queue);
248 	if (!skb)
249 		goto out;
250 
251 	icmp6h = icmp6_hdr(skb);
252 	memcpy(icmp6h, thdr, sizeof(struct icmp6hdr));
253 	icmp6h->icmp6_cksum = 0;
254 
255 	if (skb_queue_len(&sk->sk_write_queue) == 1) {
256 		skb->csum = csum_partial(icmp6h,
257 					sizeof(struct icmp6hdr), skb->csum);
258 		icmp6h->icmp6_cksum = csum_ipv6_magic(&fl6->saddr,
259 						      &fl6->daddr,
260 						      len, fl6->flowi6_proto,
261 						      skb->csum);
262 	} else {
263 		__wsum tmp_csum = 0;
264 
265 		skb_queue_walk(&sk->sk_write_queue, skb) {
266 			tmp_csum = csum_add(tmp_csum, skb->csum);
267 		}
268 
269 		tmp_csum = csum_partial(icmp6h,
270 					sizeof(struct icmp6hdr), tmp_csum);
271 		icmp6h->icmp6_cksum = csum_ipv6_magic(&fl6->saddr,
272 						      &fl6->daddr,
273 						      len, fl6->flowi6_proto,
274 						      tmp_csum);
275 	}
276 	ip6_push_pending_frames(sk);
277 out:
278 	return err;
279 }
280 
281 struct icmpv6_msg {
282 	struct sk_buff	*skb;
283 	int		offset;
284 	uint8_t		type;
285 };
286 
287 static int icmpv6_getfrag(void *from, char *to, int offset, int len, int odd, struct sk_buff *skb)
288 {
289 	struct icmpv6_msg *msg = (struct icmpv6_msg *) from;
290 	struct sk_buff *org_skb = msg->skb;
291 	__wsum csum = 0;
292 
293 	csum = skb_copy_and_csum_bits(org_skb, msg->offset + offset,
294 				      to, len, csum);
295 	skb->csum = csum_block_add(skb->csum, csum, odd);
296 	if (!(msg->type & ICMPV6_INFOMSG_MASK))
297 		nf_ct_attach(skb, org_skb);
298 	return 0;
299 }
300 
301 #if IS_ENABLED(CONFIG_IPV6_MIP6)
302 static void mip6_addr_swap(struct sk_buff *skb)
303 {
304 	struct ipv6hdr *iph = ipv6_hdr(skb);
305 	struct inet6_skb_parm *opt = IP6CB(skb);
306 	struct ipv6_destopt_hao *hao;
307 	struct in6_addr tmp;
308 	int off;
309 
310 	if (opt->dsthao) {
311 		off = ipv6_find_tlv(skb, opt->dsthao, IPV6_TLV_HAO);
312 		if (likely(off >= 0)) {
313 			hao = (struct ipv6_destopt_hao *)
314 					(skb_network_header(skb) + off);
315 			tmp = iph->saddr;
316 			iph->saddr = hao->addr;
317 			hao->addr = tmp;
318 		}
319 	}
320 }
321 #else
322 static inline void mip6_addr_swap(struct sk_buff *skb) {}
323 #endif
324 
325 static struct dst_entry *icmpv6_route_lookup(struct net *net,
326 					     struct sk_buff *skb,
327 					     struct sock *sk,
328 					     struct flowi6 *fl6)
329 {
330 	struct dst_entry *dst, *dst2;
331 	struct flowi6 fl2;
332 	int err;
333 
334 	err = ip6_dst_lookup(net, sk, &dst, fl6);
335 	if (err)
336 		return ERR_PTR(err);
337 
338 	/*
339 	 * We won't send icmp if the destination is known
340 	 * anycast.
341 	 */
342 	if (ipv6_anycast_destination(dst, &fl6->daddr)) {
343 		net_dbg_ratelimited("icmp6_send: acast source\n");
344 		dst_release(dst);
345 		return ERR_PTR(-EINVAL);
346 	}
347 
348 	/* No need to clone since we're just using its address. */
349 	dst2 = dst;
350 
351 	dst = xfrm_lookup(net, dst, flowi6_to_flowi(fl6), sk, 0);
352 	if (!IS_ERR(dst)) {
353 		if (dst != dst2)
354 			return dst;
355 	} else {
356 		if (PTR_ERR(dst) == -EPERM)
357 			dst = NULL;
358 		else
359 			return dst;
360 	}
361 
362 	err = xfrm_decode_session_reverse(skb, flowi6_to_flowi(&fl2), AF_INET6);
363 	if (err)
364 		goto relookup_failed;
365 
366 	err = ip6_dst_lookup(net, sk, &dst2, &fl2);
367 	if (err)
368 		goto relookup_failed;
369 
370 	dst2 = xfrm_lookup(net, dst2, flowi6_to_flowi(&fl2), sk, XFRM_LOOKUP_ICMP);
371 	if (!IS_ERR(dst2)) {
372 		dst_release(dst);
373 		dst = dst2;
374 	} else {
375 		err = PTR_ERR(dst2);
376 		if (err == -EPERM) {
377 			dst_release(dst);
378 			return dst2;
379 		} else
380 			goto relookup_failed;
381 	}
382 
383 relookup_failed:
384 	if (dst)
385 		return dst;
386 	return ERR_PTR(err);
387 }
388 
389 /*
390  *	Send an ICMP message in response to a packet in error
391  */
392 static void icmp6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info,
393 		       const struct in6_addr *force_saddr)
394 {
395 	struct net *net = dev_net(skb->dev);
396 	struct inet6_dev *idev = NULL;
397 	struct ipv6hdr *hdr = ipv6_hdr(skb);
398 	struct sock *sk;
399 	struct ipv6_pinfo *np;
400 	const struct in6_addr *saddr = NULL;
401 	struct dst_entry *dst;
402 	struct icmp6hdr tmp_hdr;
403 	struct flowi6 fl6;
404 	struct icmpv6_msg msg;
405 	struct sockcm_cookie sockc_unused = {0};
406 	struct ipcm6_cookie ipc6;
407 	int iif = 0;
408 	int addr_type = 0;
409 	int len;
410 	int err = 0;
411 	u32 mark = IP6_REPLY_MARK(net, skb->mark);
412 
413 	if ((u8 *)hdr < skb->head ||
414 	    (skb_network_header(skb) + sizeof(*hdr)) > skb_tail_pointer(skb))
415 		return;
416 
417 	/*
418 	 *	Make sure we respect the rules
419 	 *	i.e. RFC 1885 2.4(e)
420 	 *	Rule (e.1) is enforced by not using icmp6_send
421 	 *	in any code that processes icmp errors.
422 	 */
423 	addr_type = ipv6_addr_type(&hdr->daddr);
424 
425 	if (ipv6_chk_addr(net, &hdr->daddr, skb->dev, 0) ||
426 	    ipv6_chk_acast_addr_src(net, skb->dev, &hdr->daddr))
427 		saddr = &hdr->daddr;
428 
429 	/*
430 	 *	Dest addr check
431 	 */
432 
433 	if (addr_type & IPV6_ADDR_MULTICAST || skb->pkt_type != PACKET_HOST) {
434 		if (type != ICMPV6_PKT_TOOBIG &&
435 		    !(type == ICMPV6_PARAMPROB &&
436 		      code == ICMPV6_UNK_OPTION &&
437 		      (opt_unrec(skb, info))))
438 			return;
439 
440 		saddr = NULL;
441 	}
442 
443 	addr_type = ipv6_addr_type(&hdr->saddr);
444 
445 	/*
446 	 *	Source addr check
447 	 */
448 
449 	if (__ipv6_addr_needs_scope_id(addr_type))
450 		iif = skb->dev->ifindex;
451 	else
452 		iif = l3mdev_master_ifindex(skb_dst(skb)->dev);
453 
454 	/*
455 	 *	Must not send error if the source does not uniquely
456 	 *	identify a single node (RFC2463 Section 2.4).
457 	 *	We check unspecified / multicast addresses here,
458 	 *	and anycast addresses will be checked later.
459 	 */
460 	if ((addr_type == IPV6_ADDR_ANY) || (addr_type & IPV6_ADDR_MULTICAST)) {
461 		net_dbg_ratelimited("icmp6_send: addr_any/mcast source [%pI6c > %pI6c]\n",
462 				    &hdr->saddr, &hdr->daddr);
463 		return;
464 	}
465 
466 	/*
467 	 *	Never answer to a ICMP packet.
468 	 */
469 	if (is_ineligible(skb)) {
470 		net_dbg_ratelimited("icmp6_send: no reply to icmp error [%pI6c > %pI6c]\n",
471 				    &hdr->saddr, &hdr->daddr);
472 		return;
473 	}
474 
475 	mip6_addr_swap(skb);
476 
477 	memset(&fl6, 0, sizeof(fl6));
478 	fl6.flowi6_proto = IPPROTO_ICMPV6;
479 	fl6.daddr = hdr->saddr;
480 	if (force_saddr)
481 		saddr = force_saddr;
482 	if (saddr)
483 		fl6.saddr = *saddr;
484 	fl6.flowi6_mark = mark;
485 	fl6.flowi6_oif = iif;
486 	fl6.fl6_icmp_type = type;
487 	fl6.fl6_icmp_code = code;
488 	fl6.flowi6_uid = sock_net_uid(net, NULL);
489 	security_skb_classify_flow(skb, flowi6_to_flowi(&fl6));
490 
491 	sk = icmpv6_xmit_lock(net);
492 	if (!sk)
493 		return;
494 	sk->sk_mark = mark;
495 	np = inet6_sk(sk);
496 
497 	if (!icmpv6_xrlim_allow(sk, type, &fl6))
498 		goto out;
499 
500 	tmp_hdr.icmp6_type = type;
501 	tmp_hdr.icmp6_code = code;
502 	tmp_hdr.icmp6_cksum = 0;
503 	tmp_hdr.icmp6_pointer = htonl(info);
504 
505 	if (!fl6.flowi6_oif && ipv6_addr_is_multicast(&fl6.daddr))
506 		fl6.flowi6_oif = np->mcast_oif;
507 	else if (!fl6.flowi6_oif)
508 		fl6.flowi6_oif = np->ucast_oif;
509 
510 	ipc6.tclass = np->tclass;
511 	fl6.flowlabel = ip6_make_flowinfo(ipc6.tclass, fl6.flowlabel);
512 
513 	dst = icmpv6_route_lookup(net, skb, sk, &fl6);
514 	if (IS_ERR(dst))
515 		goto out;
516 
517 	ipc6.hlimit = ip6_sk_dst_hoplimit(np, &fl6, dst);
518 	ipc6.dontfrag = np->dontfrag;
519 	ipc6.opt = NULL;
520 
521 	msg.skb = skb;
522 	msg.offset = skb_network_offset(skb);
523 	msg.type = type;
524 
525 	len = skb->len - msg.offset;
526 	len = min_t(unsigned int, len, IPV6_MIN_MTU - sizeof(struct ipv6hdr) - sizeof(struct icmp6hdr));
527 	if (len < 0) {
528 		net_dbg_ratelimited("icmp: len problem [%pI6c > %pI6c]\n",
529 				    &hdr->saddr, &hdr->daddr);
530 		goto out_dst_release;
531 	}
532 
533 	rcu_read_lock();
534 	idev = __in6_dev_get(skb->dev);
535 
536 	err = ip6_append_data(sk, icmpv6_getfrag, &msg,
537 			      len + sizeof(struct icmp6hdr),
538 			      sizeof(struct icmp6hdr),
539 			      &ipc6, &fl6, (struct rt6_info *)dst,
540 			      MSG_DONTWAIT, &sockc_unused);
541 	if (err) {
542 		ICMP6_INC_STATS(net, idev, ICMP6_MIB_OUTERRORS);
543 		ip6_flush_pending_frames(sk);
544 	} else {
545 		err = icmpv6_push_pending_frames(sk, &fl6, &tmp_hdr,
546 						 len + sizeof(struct icmp6hdr));
547 	}
548 	rcu_read_unlock();
549 out_dst_release:
550 	dst_release(dst);
551 out:
552 	icmpv6_xmit_unlock(sk);
553 }
554 
555 /* Slightly more convenient version of icmp6_send.
556  */
557 void icmpv6_param_prob(struct sk_buff *skb, u8 code, int pos)
558 {
559 	icmp6_send(skb, ICMPV6_PARAMPROB, code, pos, NULL);
560 	kfree_skb(skb);
561 }
562 
563 /* Generate icmpv6 with type/code ICMPV6_DEST_UNREACH/ICMPV6_ADDR_UNREACH
564  * if sufficient data bytes are available
565  * @nhs is the size of the tunnel header(s) :
566  *  Either an IPv4 header for SIT encap
567  *         an IPv4 header + GRE header for GRE encap
568  */
569 int ip6_err_gen_icmpv6_unreach(struct sk_buff *skb, int nhs, int type,
570 			       unsigned int data_len)
571 {
572 	struct in6_addr temp_saddr;
573 	struct rt6_info *rt;
574 	struct sk_buff *skb2;
575 	u32 info = 0;
576 
577 	if (!pskb_may_pull(skb, nhs + sizeof(struct ipv6hdr) + 8))
578 		return 1;
579 
580 	/* RFC 4884 (partial) support for ICMP extensions */
581 	if (data_len < 128 || (data_len & 7) || skb->len < data_len)
582 		data_len = 0;
583 
584 	skb2 = data_len ? skb_copy(skb, GFP_ATOMIC) : skb_clone(skb, GFP_ATOMIC);
585 
586 	if (!skb2)
587 		return 1;
588 
589 	skb_dst_drop(skb2);
590 	skb_pull(skb2, nhs);
591 	skb_reset_network_header(skb2);
592 
593 	rt = rt6_lookup(dev_net(skb->dev), &ipv6_hdr(skb2)->saddr, NULL, 0, 0);
594 
595 	if (rt && rt->dst.dev)
596 		skb2->dev = rt->dst.dev;
597 
598 	ipv6_addr_set_v4mapped(ip_hdr(skb)->saddr, &temp_saddr);
599 
600 	if (data_len) {
601 		/* RFC 4884 (partial) support :
602 		 * insert 0 padding at the end, before the extensions
603 		 */
604 		__skb_push(skb2, nhs);
605 		skb_reset_network_header(skb2);
606 		memmove(skb2->data, skb2->data + nhs, data_len - nhs);
607 		memset(skb2->data + data_len - nhs, 0, nhs);
608 		/* RFC 4884 4.5 : Length is measured in 64-bit words,
609 		 * and stored in reserved[0]
610 		 */
611 		info = (data_len/8) << 24;
612 	}
613 	if (type == ICMP_TIME_EXCEEDED)
614 		icmp6_send(skb2, ICMPV6_TIME_EXCEED, ICMPV6_EXC_HOPLIMIT,
615 			   info, &temp_saddr);
616 	else
617 		icmp6_send(skb2, ICMPV6_DEST_UNREACH, ICMPV6_ADDR_UNREACH,
618 			   info, &temp_saddr);
619 	if (rt)
620 		ip6_rt_put(rt);
621 
622 	kfree_skb(skb2);
623 
624 	return 0;
625 }
626 EXPORT_SYMBOL(ip6_err_gen_icmpv6_unreach);
627 
628 static void icmpv6_echo_reply(struct sk_buff *skb)
629 {
630 	struct net *net = dev_net(skb->dev);
631 	struct sock *sk;
632 	struct inet6_dev *idev;
633 	struct ipv6_pinfo *np;
634 	const struct in6_addr *saddr = NULL;
635 	struct icmp6hdr *icmph = icmp6_hdr(skb);
636 	struct icmp6hdr tmp_hdr;
637 	struct flowi6 fl6;
638 	struct icmpv6_msg msg;
639 	struct dst_entry *dst;
640 	struct ipcm6_cookie ipc6;
641 	int err = 0;
642 	u32 mark = IP6_REPLY_MARK(net, skb->mark);
643 	struct sockcm_cookie sockc_unused = {0};
644 
645 	saddr = &ipv6_hdr(skb)->daddr;
646 
647 	if (!ipv6_unicast_destination(skb) &&
648 	    !(net->ipv6.sysctl.anycast_src_echo_reply &&
649 	      ipv6_anycast_destination(skb_dst(skb), saddr)))
650 		saddr = NULL;
651 
652 	memcpy(&tmp_hdr, icmph, sizeof(tmp_hdr));
653 	tmp_hdr.icmp6_type = ICMPV6_ECHO_REPLY;
654 
655 	memset(&fl6, 0, sizeof(fl6));
656 	fl6.flowi6_proto = IPPROTO_ICMPV6;
657 	fl6.daddr = ipv6_hdr(skb)->saddr;
658 	if (saddr)
659 		fl6.saddr = *saddr;
660 	fl6.flowi6_oif = skb->dev->ifindex;
661 	fl6.fl6_icmp_type = ICMPV6_ECHO_REPLY;
662 	fl6.flowi6_mark = mark;
663 	fl6.flowi6_uid = sock_net_uid(net, NULL);
664 	security_skb_classify_flow(skb, flowi6_to_flowi(&fl6));
665 
666 	sk = icmpv6_xmit_lock(net);
667 	if (!sk)
668 		return;
669 	sk->sk_mark = mark;
670 	np = inet6_sk(sk);
671 
672 	if (!fl6.flowi6_oif && ipv6_addr_is_multicast(&fl6.daddr))
673 		fl6.flowi6_oif = np->mcast_oif;
674 	else if (!fl6.flowi6_oif)
675 		fl6.flowi6_oif = np->ucast_oif;
676 
677 	err = ip6_dst_lookup(net, sk, &dst, &fl6);
678 	if (err)
679 		goto out;
680 	dst = xfrm_lookup(net, dst, flowi6_to_flowi(&fl6), sk, 0);
681 	if (IS_ERR(dst))
682 		goto out;
683 
684 	idev = __in6_dev_get(skb->dev);
685 
686 	msg.skb = skb;
687 	msg.offset = 0;
688 	msg.type = ICMPV6_ECHO_REPLY;
689 
690 	ipc6.hlimit = ip6_sk_dst_hoplimit(np, &fl6, dst);
691 	ipc6.tclass = ipv6_get_dsfield(ipv6_hdr(skb));
692 	ipc6.dontfrag = np->dontfrag;
693 	ipc6.opt = NULL;
694 
695 	err = ip6_append_data(sk, icmpv6_getfrag, &msg, skb->len + sizeof(struct icmp6hdr),
696 				sizeof(struct icmp6hdr), &ipc6, &fl6,
697 				(struct rt6_info *)dst, MSG_DONTWAIT,
698 				&sockc_unused);
699 
700 	if (err) {
701 		__ICMP6_INC_STATS(net, idev, ICMP6_MIB_OUTERRORS);
702 		ip6_flush_pending_frames(sk);
703 	} else {
704 		err = icmpv6_push_pending_frames(sk, &fl6, &tmp_hdr,
705 						 skb->len + sizeof(struct icmp6hdr));
706 	}
707 	dst_release(dst);
708 out:
709 	icmpv6_xmit_unlock(sk);
710 }
711 
712 void icmpv6_notify(struct sk_buff *skb, u8 type, u8 code, __be32 info)
713 {
714 	const struct inet6_protocol *ipprot;
715 	int inner_offset;
716 	__be16 frag_off;
717 	u8 nexthdr;
718 	struct net *net = dev_net(skb->dev);
719 
720 	if (!pskb_may_pull(skb, sizeof(struct ipv6hdr)))
721 		goto out;
722 
723 	nexthdr = ((struct ipv6hdr *)skb->data)->nexthdr;
724 	if (ipv6_ext_hdr(nexthdr)) {
725 		/* now skip over extension headers */
726 		inner_offset = ipv6_skip_exthdr(skb, sizeof(struct ipv6hdr),
727 						&nexthdr, &frag_off);
728 		if (inner_offset < 0)
729 			goto out;
730 	} else {
731 		inner_offset = sizeof(struct ipv6hdr);
732 	}
733 
734 	/* Checkin header including 8 bytes of inner protocol header. */
735 	if (!pskb_may_pull(skb, inner_offset+8))
736 		goto out;
737 
738 	/* BUGGG_FUTURE: we should try to parse exthdrs in this packet.
739 	   Without this we will not able f.e. to make source routed
740 	   pmtu discovery.
741 	   Corresponding argument (opt) to notifiers is already added.
742 	   --ANK (980726)
743 	 */
744 
745 	ipprot = rcu_dereference(inet6_protos[nexthdr]);
746 	if (ipprot && ipprot->err_handler)
747 		ipprot->err_handler(skb, NULL, type, code, inner_offset, info);
748 
749 	raw6_icmp_error(skb, nexthdr, type, code, inner_offset, info);
750 	return;
751 
752 out:
753 	__ICMP6_INC_STATS(net, __in6_dev_get(skb->dev), ICMP6_MIB_INERRORS);
754 }
755 
756 /*
757  *	Handle icmp messages
758  */
759 
760 static int icmpv6_rcv(struct sk_buff *skb)
761 {
762 	struct net_device *dev = skb->dev;
763 	struct inet6_dev *idev = __in6_dev_get(dev);
764 	const struct in6_addr *saddr, *daddr;
765 	struct icmp6hdr *hdr;
766 	u8 type;
767 	bool success = false;
768 
769 	if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb)) {
770 		struct sec_path *sp = skb_sec_path(skb);
771 		int nh;
772 
773 		if (!(sp && sp->xvec[sp->len - 1]->props.flags &
774 				 XFRM_STATE_ICMP))
775 			goto drop_no_count;
776 
777 		if (!pskb_may_pull(skb, sizeof(*hdr) + sizeof(struct ipv6hdr)))
778 			goto drop_no_count;
779 
780 		nh = skb_network_offset(skb);
781 		skb_set_network_header(skb, sizeof(*hdr));
782 
783 		if (!xfrm6_policy_check_reverse(NULL, XFRM_POLICY_IN, skb))
784 			goto drop_no_count;
785 
786 		skb_set_network_header(skb, nh);
787 	}
788 
789 	__ICMP6_INC_STATS(dev_net(dev), idev, ICMP6_MIB_INMSGS);
790 
791 	saddr = &ipv6_hdr(skb)->saddr;
792 	daddr = &ipv6_hdr(skb)->daddr;
793 
794 	if (skb_checksum_validate(skb, IPPROTO_ICMPV6, ip6_compute_pseudo)) {
795 		net_dbg_ratelimited("ICMPv6 checksum failed [%pI6c > %pI6c]\n",
796 				    saddr, daddr);
797 		goto csum_error;
798 	}
799 
800 	if (!pskb_pull(skb, sizeof(*hdr)))
801 		goto discard_it;
802 
803 	hdr = icmp6_hdr(skb);
804 
805 	type = hdr->icmp6_type;
806 
807 	ICMP6MSGIN_INC_STATS(dev_net(dev), idev, type);
808 
809 	switch (type) {
810 	case ICMPV6_ECHO_REQUEST:
811 		icmpv6_echo_reply(skb);
812 		break;
813 
814 	case ICMPV6_ECHO_REPLY:
815 		success = ping_rcv(skb);
816 		break;
817 
818 	case ICMPV6_PKT_TOOBIG:
819 		/* BUGGG_FUTURE: if packet contains rthdr, we cannot update
820 		   standard destination cache. Seems, only "advanced"
821 		   destination cache will allow to solve this problem
822 		   --ANK (980726)
823 		 */
824 		if (!pskb_may_pull(skb, sizeof(struct ipv6hdr)))
825 			goto discard_it;
826 		hdr = icmp6_hdr(skb);
827 
828 		/*
829 		 *	Drop through to notify
830 		 */
831 
832 	case ICMPV6_DEST_UNREACH:
833 	case ICMPV6_TIME_EXCEED:
834 	case ICMPV6_PARAMPROB:
835 		icmpv6_notify(skb, type, hdr->icmp6_code, hdr->icmp6_mtu);
836 		break;
837 
838 	case NDISC_ROUTER_SOLICITATION:
839 	case NDISC_ROUTER_ADVERTISEMENT:
840 	case NDISC_NEIGHBOUR_SOLICITATION:
841 	case NDISC_NEIGHBOUR_ADVERTISEMENT:
842 	case NDISC_REDIRECT:
843 		ndisc_rcv(skb);
844 		break;
845 
846 	case ICMPV6_MGM_QUERY:
847 		igmp6_event_query(skb);
848 		break;
849 
850 	case ICMPV6_MGM_REPORT:
851 		igmp6_event_report(skb);
852 		break;
853 
854 	case ICMPV6_MGM_REDUCTION:
855 	case ICMPV6_NI_QUERY:
856 	case ICMPV6_NI_REPLY:
857 	case ICMPV6_MLD2_REPORT:
858 	case ICMPV6_DHAAD_REQUEST:
859 	case ICMPV6_DHAAD_REPLY:
860 	case ICMPV6_MOBILE_PREFIX_SOL:
861 	case ICMPV6_MOBILE_PREFIX_ADV:
862 		break;
863 
864 	default:
865 		/* informational */
866 		if (type & ICMPV6_INFOMSG_MASK)
867 			break;
868 
869 		net_dbg_ratelimited("icmpv6: msg of unknown type [%pI6c > %pI6c]\n",
870 				    saddr, daddr);
871 
872 		/*
873 		 * error of unknown type.
874 		 * must pass to upper level
875 		 */
876 
877 		icmpv6_notify(skb, type, hdr->icmp6_code, hdr->icmp6_mtu);
878 	}
879 
880 	/* until the v6 path can be better sorted assume failure and
881 	 * preserve the status quo behaviour for the rest of the paths to here
882 	 */
883 	if (success)
884 		consume_skb(skb);
885 	else
886 		kfree_skb(skb);
887 
888 	return 0;
889 
890 csum_error:
891 	__ICMP6_INC_STATS(dev_net(dev), idev, ICMP6_MIB_CSUMERRORS);
892 discard_it:
893 	__ICMP6_INC_STATS(dev_net(dev), idev, ICMP6_MIB_INERRORS);
894 drop_no_count:
895 	kfree_skb(skb);
896 	return 0;
897 }
898 
899 void icmpv6_flow_init(struct sock *sk, struct flowi6 *fl6,
900 		      u8 type,
901 		      const struct in6_addr *saddr,
902 		      const struct in6_addr *daddr,
903 		      int oif)
904 {
905 	memset(fl6, 0, sizeof(*fl6));
906 	fl6->saddr = *saddr;
907 	fl6->daddr = *daddr;
908 	fl6->flowi6_proto	= IPPROTO_ICMPV6;
909 	fl6->fl6_icmp_type	= type;
910 	fl6->fl6_icmp_code	= 0;
911 	fl6->flowi6_oif		= oif;
912 	security_sk_classify_flow(sk, flowi6_to_flowi(fl6));
913 }
914 
915 static int __net_init icmpv6_sk_init(struct net *net)
916 {
917 	struct sock *sk;
918 	int err, i, j;
919 
920 	net->ipv6.icmp_sk =
921 		kzalloc(nr_cpu_ids * sizeof(struct sock *), GFP_KERNEL);
922 	if (!net->ipv6.icmp_sk)
923 		return -ENOMEM;
924 
925 	for_each_possible_cpu(i) {
926 		err = inet_ctl_sock_create(&sk, PF_INET6,
927 					   SOCK_RAW, IPPROTO_ICMPV6, net);
928 		if (err < 0) {
929 			pr_err("Failed to initialize the ICMP6 control socket (err %d)\n",
930 			       err);
931 			goto fail;
932 		}
933 
934 		net->ipv6.icmp_sk[i] = sk;
935 
936 		/* Enough space for 2 64K ICMP packets, including
937 		 * sk_buff struct overhead.
938 		 */
939 		sk->sk_sndbuf = 2 * SKB_TRUESIZE(64 * 1024);
940 	}
941 	return 0;
942 
943  fail:
944 	for (j = 0; j < i; j++)
945 		inet_ctl_sock_destroy(net->ipv6.icmp_sk[j]);
946 	kfree(net->ipv6.icmp_sk);
947 	return err;
948 }
949 
950 static void __net_exit icmpv6_sk_exit(struct net *net)
951 {
952 	int i;
953 
954 	for_each_possible_cpu(i) {
955 		inet_ctl_sock_destroy(net->ipv6.icmp_sk[i]);
956 	}
957 	kfree(net->ipv6.icmp_sk);
958 }
959 
960 static struct pernet_operations icmpv6_sk_ops = {
961 	.init = icmpv6_sk_init,
962 	.exit = icmpv6_sk_exit,
963 };
964 
965 int __init icmpv6_init(void)
966 {
967 	int err;
968 
969 	err = register_pernet_subsys(&icmpv6_sk_ops);
970 	if (err < 0)
971 		return err;
972 
973 	err = -EAGAIN;
974 	if (inet6_add_protocol(&icmpv6_protocol, IPPROTO_ICMPV6) < 0)
975 		goto fail;
976 
977 	err = inet6_register_icmp_sender(icmp6_send);
978 	if (err)
979 		goto sender_reg_err;
980 	return 0;
981 
982 sender_reg_err:
983 	inet6_del_protocol(&icmpv6_protocol, IPPROTO_ICMPV6);
984 fail:
985 	pr_err("Failed to register ICMP6 protocol\n");
986 	unregister_pernet_subsys(&icmpv6_sk_ops);
987 	return err;
988 }
989 
990 void icmpv6_cleanup(void)
991 {
992 	inet6_unregister_icmp_sender(icmp6_send);
993 	unregister_pernet_subsys(&icmpv6_sk_ops);
994 	inet6_del_protocol(&icmpv6_protocol, IPPROTO_ICMPV6);
995 }
996 
997 
998 static const struct icmp6_err {
999 	int err;
1000 	int fatal;
1001 } tab_unreach[] = {
1002 	{	/* NOROUTE */
1003 		.err	= ENETUNREACH,
1004 		.fatal	= 0,
1005 	},
1006 	{	/* ADM_PROHIBITED */
1007 		.err	= EACCES,
1008 		.fatal	= 1,
1009 	},
1010 	{	/* Was NOT_NEIGHBOUR, now reserved */
1011 		.err	= EHOSTUNREACH,
1012 		.fatal	= 0,
1013 	},
1014 	{	/* ADDR_UNREACH	*/
1015 		.err	= EHOSTUNREACH,
1016 		.fatal	= 0,
1017 	},
1018 	{	/* PORT_UNREACH	*/
1019 		.err	= ECONNREFUSED,
1020 		.fatal	= 1,
1021 	},
1022 	{	/* POLICY_FAIL */
1023 		.err	= EACCES,
1024 		.fatal	= 1,
1025 	},
1026 	{	/* REJECT_ROUTE	*/
1027 		.err	= EACCES,
1028 		.fatal	= 1,
1029 	},
1030 };
1031 
1032 int icmpv6_err_convert(u8 type, u8 code, int *err)
1033 {
1034 	int fatal = 0;
1035 
1036 	*err = EPROTO;
1037 
1038 	switch (type) {
1039 	case ICMPV6_DEST_UNREACH:
1040 		fatal = 1;
1041 		if (code < ARRAY_SIZE(tab_unreach)) {
1042 			*err  = tab_unreach[code].err;
1043 			fatal = tab_unreach[code].fatal;
1044 		}
1045 		break;
1046 
1047 	case ICMPV6_PKT_TOOBIG:
1048 		*err = EMSGSIZE;
1049 		break;
1050 
1051 	case ICMPV6_PARAMPROB:
1052 		*err = EPROTO;
1053 		fatal = 1;
1054 		break;
1055 
1056 	case ICMPV6_TIME_EXCEED:
1057 		*err = EHOSTUNREACH;
1058 		break;
1059 	}
1060 
1061 	return fatal;
1062 }
1063 EXPORT_SYMBOL(icmpv6_err_convert);
1064 
1065 #ifdef CONFIG_SYSCTL
1066 static struct ctl_table ipv6_icmp_table_template[] = {
1067 	{
1068 		.procname	= "ratelimit",
1069 		.data		= &init_net.ipv6.sysctl.icmpv6_time,
1070 		.maxlen		= sizeof(int),
1071 		.mode		= 0644,
1072 		.proc_handler	= proc_dointvec_ms_jiffies,
1073 	},
1074 	{ },
1075 };
1076 
1077 struct ctl_table * __net_init ipv6_icmp_sysctl_init(struct net *net)
1078 {
1079 	struct ctl_table *table;
1080 
1081 	table = kmemdup(ipv6_icmp_table_template,
1082 			sizeof(ipv6_icmp_table_template),
1083 			GFP_KERNEL);
1084 
1085 	if (table)
1086 		table[0].data = &net->ipv6.sysctl.icmpv6_time;
1087 
1088 	return table;
1089 }
1090 #endif
1091