xref: /linux/net/ipv6/icmp.c (revision f3539c12d8196ce0a1993364d30b3a18908470d1)
1 /*
2  *	Internet Control Message Protocol (ICMPv6)
3  *	Linux INET6 implementation
4  *
5  *	Authors:
6  *	Pedro Roque		<roque@di.fc.ul.pt>
7  *
8  *	Based on net/ipv4/icmp.c
9  *
10  *	RFC 1885
11  *
12  *	This program is free software; you can redistribute it and/or
13  *      modify it under the terms of the GNU General Public License
14  *      as published by the Free Software Foundation; either version
15  *      2 of the License, or (at your option) any later version.
16  */
17 
18 /*
19  *	Changes:
20  *
21  *	Andi Kleen		:	exception handling
22  *	Andi Kleen			add rate limits. never reply to a icmp.
23  *					add more length checks and other fixes.
24  *	yoshfuji		:	ensure to sent parameter problem for
25  *					fragments.
26  *	YOSHIFUJI Hideaki @USAGI:	added sysctl for icmp rate limit.
27  *	Randy Dunlap and
28  *	YOSHIFUJI Hideaki @USAGI:	Per-interface statistics support
29  *	Kazunori MIYAZAWA @USAGI:       change output process to use ip6_append_data
30  */
31 
32 #define pr_fmt(fmt) "IPv6: " fmt
33 
34 #include <linux/module.h>
35 #include <linux/errno.h>
36 #include <linux/types.h>
37 #include <linux/socket.h>
38 #include <linux/in.h>
39 #include <linux/kernel.h>
40 #include <linux/sockios.h>
41 #include <linux/net.h>
42 #include <linux/skbuff.h>
43 #include <linux/init.h>
44 #include <linux/netfilter.h>
45 #include <linux/slab.h>
46 
47 #ifdef CONFIG_SYSCTL
48 #include <linux/sysctl.h>
49 #endif
50 
51 #include <linux/inet.h>
52 #include <linux/netdevice.h>
53 #include <linux/icmpv6.h>
54 
55 #include <net/ip.h>
56 #include <net/sock.h>
57 
58 #include <net/ipv6.h>
59 #include <net/ip6_checksum.h>
60 #include <net/ping.h>
61 #include <net/protocol.h>
62 #include <net/raw.h>
63 #include <net/rawv6.h>
64 #include <net/transp_v6.h>
65 #include <net/ip6_route.h>
66 #include <net/addrconf.h>
67 #include <net/icmp.h>
68 #include <net/xfrm.h>
69 #include <net/inet_common.h>
70 #include <net/dsfield.h>
71 #include <net/l3mdev.h>
72 
73 #include <asm/uaccess.h>
74 
75 /*
76  *	The ICMP socket(s). This is the most convenient way to flow control
77  *	our ICMP output as well as maintain a clean interface throughout
78  *	all layers. All Socketless IP sends will soon be gone.
79  *
80  *	On SMP we have one ICMP socket per-cpu.
81  */
82 static inline struct sock *icmpv6_sk(struct net *net)
83 {
84 	return net->ipv6.icmp_sk[smp_processor_id()];
85 }
86 
87 static void icmpv6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
88 		       u8 type, u8 code, int offset, __be32 info)
89 {
90 	/* icmpv6_notify checks 8 bytes can be pulled, icmp6hdr is 8 bytes */
91 	struct icmp6hdr *icmp6 = (struct icmp6hdr *) (skb->data + offset);
92 	struct net *net = dev_net(skb->dev);
93 
94 	if (type == ICMPV6_PKT_TOOBIG)
95 		ip6_update_pmtu(skb, net, info, 0, 0);
96 	else if (type == NDISC_REDIRECT)
97 		ip6_redirect(skb, net, skb->dev->ifindex, 0);
98 
99 	if (!(type & ICMPV6_INFOMSG_MASK))
100 		if (icmp6->icmp6_type == ICMPV6_ECHO_REQUEST)
101 			ping_err(skb, offset, ntohl(info));
102 }
103 
104 static int icmpv6_rcv(struct sk_buff *skb);
105 
106 static const struct inet6_protocol icmpv6_protocol = {
107 	.handler	=	icmpv6_rcv,
108 	.err_handler	=	icmpv6_err,
109 	.flags		=	INET6_PROTO_NOPOLICY|INET6_PROTO_FINAL,
110 };
111 
112 static __inline__ struct sock *icmpv6_xmit_lock(struct net *net)
113 {
114 	struct sock *sk;
115 
116 	local_bh_disable();
117 
118 	sk = icmpv6_sk(net);
119 	if (unlikely(!spin_trylock(&sk->sk_lock.slock))) {
120 		/* This can happen if the output path (f.e. SIT or
121 		 * ip6ip6 tunnel) signals dst_link_failure() for an
122 		 * outgoing ICMP6 packet.
123 		 */
124 		local_bh_enable();
125 		return NULL;
126 	}
127 	return sk;
128 }
129 
130 static __inline__ void icmpv6_xmit_unlock(struct sock *sk)
131 {
132 	spin_unlock_bh(&sk->sk_lock.slock);
133 }
134 
135 /*
136  * Figure out, may we reply to this packet with icmp error.
137  *
138  * We do not reply, if:
139  *	- it was icmp error message.
140  *	- it is truncated, so that it is known, that protocol is ICMPV6
141  *	  (i.e. in the middle of some exthdr)
142  *
143  *	--ANK (980726)
144  */
145 
146 static bool is_ineligible(const struct sk_buff *skb)
147 {
148 	int ptr = (u8 *)(ipv6_hdr(skb) + 1) - skb->data;
149 	int len = skb->len - ptr;
150 	__u8 nexthdr = ipv6_hdr(skb)->nexthdr;
151 	__be16 frag_off;
152 
153 	if (len < 0)
154 		return true;
155 
156 	ptr = ipv6_skip_exthdr(skb, ptr, &nexthdr, &frag_off);
157 	if (ptr < 0)
158 		return false;
159 	if (nexthdr == IPPROTO_ICMPV6) {
160 		u8 _type, *tp;
161 		tp = skb_header_pointer(skb,
162 			ptr+offsetof(struct icmp6hdr, icmp6_type),
163 			sizeof(_type), &_type);
164 		if (!tp || !(*tp & ICMPV6_INFOMSG_MASK))
165 			return true;
166 	}
167 	return false;
168 }
169 
170 /*
171  * Check the ICMP output rate limit
172  */
173 static bool icmpv6_xrlim_allow(struct sock *sk, u8 type,
174 			       struct flowi6 *fl6)
175 {
176 	struct net *net = sock_net(sk);
177 	struct dst_entry *dst;
178 	bool res = false;
179 
180 	/* Informational messages are not limited. */
181 	if (type & ICMPV6_INFOMSG_MASK)
182 		return true;
183 
184 	/* Do not limit pmtu discovery, it would break it. */
185 	if (type == ICMPV6_PKT_TOOBIG)
186 		return true;
187 
188 	/*
189 	 * Look up the output route.
190 	 * XXX: perhaps the expire for routing entries cloned by
191 	 * this lookup should be more aggressive (not longer than timeout).
192 	 */
193 	dst = ip6_route_output(net, sk, fl6);
194 	if (dst->error) {
195 		IP6_INC_STATS(net, ip6_dst_idev(dst),
196 			      IPSTATS_MIB_OUTNOROUTES);
197 	} else if (dst->dev && (dst->dev->flags&IFF_LOOPBACK)) {
198 		res = true;
199 	} else {
200 		struct rt6_info *rt = (struct rt6_info *)dst;
201 		int tmo = net->ipv6.sysctl.icmpv6_time;
202 
203 		/* Give more bandwidth to wider prefixes. */
204 		if (rt->rt6i_dst.plen < 128)
205 			tmo >>= ((128 - rt->rt6i_dst.plen)>>5);
206 
207 		if (icmp_global_allow()) {
208 			struct inet_peer *peer;
209 
210 			peer = inet_getpeer_v6(net->ipv6.peers,
211 					       &fl6->daddr, 1);
212 			res = inet_peer_xrlim_allow(peer, tmo);
213 			if (peer)
214 				inet_putpeer(peer);
215 		}
216 	}
217 	dst_release(dst);
218 	return res;
219 }
220 
221 /*
222  *	an inline helper for the "simple" if statement below
223  *	checks if parameter problem report is caused by an
224  *	unrecognized IPv6 option that has the Option Type
225  *	highest-order two bits set to 10
226  */
227 
228 static bool opt_unrec(struct sk_buff *skb, __u32 offset)
229 {
230 	u8 _optval, *op;
231 
232 	offset += skb_network_offset(skb);
233 	op = skb_header_pointer(skb, offset, sizeof(_optval), &_optval);
234 	if (!op)
235 		return true;
236 	return (*op & 0xC0) == 0x80;
237 }
238 
239 int icmpv6_push_pending_frames(struct sock *sk, struct flowi6 *fl6,
240 			       struct icmp6hdr *thdr, int len)
241 {
242 	struct sk_buff *skb;
243 	struct icmp6hdr *icmp6h;
244 	int err = 0;
245 
246 	skb = skb_peek(&sk->sk_write_queue);
247 	if (!skb)
248 		goto out;
249 
250 	icmp6h = icmp6_hdr(skb);
251 	memcpy(icmp6h, thdr, sizeof(struct icmp6hdr));
252 	icmp6h->icmp6_cksum = 0;
253 
254 	if (skb_queue_len(&sk->sk_write_queue) == 1) {
255 		skb->csum = csum_partial(icmp6h,
256 					sizeof(struct icmp6hdr), skb->csum);
257 		icmp6h->icmp6_cksum = csum_ipv6_magic(&fl6->saddr,
258 						      &fl6->daddr,
259 						      len, fl6->flowi6_proto,
260 						      skb->csum);
261 	} else {
262 		__wsum tmp_csum = 0;
263 
264 		skb_queue_walk(&sk->sk_write_queue, skb) {
265 			tmp_csum = csum_add(tmp_csum, skb->csum);
266 		}
267 
268 		tmp_csum = csum_partial(icmp6h,
269 					sizeof(struct icmp6hdr), tmp_csum);
270 		icmp6h->icmp6_cksum = csum_ipv6_magic(&fl6->saddr,
271 						      &fl6->daddr,
272 						      len, fl6->flowi6_proto,
273 						      tmp_csum);
274 	}
275 	ip6_push_pending_frames(sk);
276 out:
277 	return err;
278 }
279 
280 struct icmpv6_msg {
281 	struct sk_buff	*skb;
282 	int		offset;
283 	uint8_t		type;
284 };
285 
286 static int icmpv6_getfrag(void *from, char *to, int offset, int len, int odd, struct sk_buff *skb)
287 {
288 	struct icmpv6_msg *msg = (struct icmpv6_msg *) from;
289 	struct sk_buff *org_skb = msg->skb;
290 	__wsum csum = 0;
291 
292 	csum = skb_copy_and_csum_bits(org_skb, msg->offset + offset,
293 				      to, len, csum);
294 	skb->csum = csum_block_add(skb->csum, csum, odd);
295 	if (!(msg->type & ICMPV6_INFOMSG_MASK))
296 		nf_ct_attach(skb, org_skb);
297 	return 0;
298 }
299 
300 #if IS_ENABLED(CONFIG_IPV6_MIP6)
301 static void mip6_addr_swap(struct sk_buff *skb)
302 {
303 	struct ipv6hdr *iph = ipv6_hdr(skb);
304 	struct inet6_skb_parm *opt = IP6CB(skb);
305 	struct ipv6_destopt_hao *hao;
306 	struct in6_addr tmp;
307 	int off;
308 
309 	if (opt->dsthao) {
310 		off = ipv6_find_tlv(skb, opt->dsthao, IPV6_TLV_HAO);
311 		if (likely(off >= 0)) {
312 			hao = (struct ipv6_destopt_hao *)
313 					(skb_network_header(skb) + off);
314 			tmp = iph->saddr;
315 			iph->saddr = hao->addr;
316 			hao->addr = tmp;
317 		}
318 	}
319 }
320 #else
321 static inline void mip6_addr_swap(struct sk_buff *skb) {}
322 #endif
323 
324 static struct dst_entry *icmpv6_route_lookup(struct net *net,
325 					     struct sk_buff *skb,
326 					     struct sock *sk,
327 					     struct flowi6 *fl6)
328 {
329 	struct dst_entry *dst, *dst2;
330 	struct flowi6 fl2;
331 	int err;
332 
333 	err = ip6_dst_lookup(net, sk, &dst, fl6);
334 	if (err)
335 		return ERR_PTR(err);
336 
337 	/*
338 	 * We won't send icmp if the destination is known
339 	 * anycast.
340 	 */
341 	if (ipv6_anycast_destination(dst, &fl6->daddr)) {
342 		net_dbg_ratelimited("icmp6_send: acast source\n");
343 		dst_release(dst);
344 		return ERR_PTR(-EINVAL);
345 	}
346 
347 	/* No need to clone since we're just using its address. */
348 	dst2 = dst;
349 
350 	dst = xfrm_lookup(net, dst, flowi6_to_flowi(fl6), sk, 0);
351 	if (!IS_ERR(dst)) {
352 		if (dst != dst2)
353 			return dst;
354 	} else {
355 		if (PTR_ERR(dst) == -EPERM)
356 			dst = NULL;
357 		else
358 			return dst;
359 	}
360 
361 	err = xfrm_decode_session_reverse(skb, flowi6_to_flowi(&fl2), AF_INET6);
362 	if (err)
363 		goto relookup_failed;
364 
365 	err = ip6_dst_lookup(net, sk, &dst2, &fl2);
366 	if (err)
367 		goto relookup_failed;
368 
369 	dst2 = xfrm_lookup(net, dst2, flowi6_to_flowi(&fl2), sk, XFRM_LOOKUP_ICMP);
370 	if (!IS_ERR(dst2)) {
371 		dst_release(dst);
372 		dst = dst2;
373 	} else {
374 		err = PTR_ERR(dst2);
375 		if (err == -EPERM) {
376 			dst_release(dst);
377 			return dst2;
378 		} else
379 			goto relookup_failed;
380 	}
381 
382 relookup_failed:
383 	if (dst)
384 		return dst;
385 	return ERR_PTR(err);
386 }
387 
388 /*
389  *	Send an ICMP message in response to a packet in error
390  */
391 static void icmp6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info,
392 		       const struct in6_addr *force_saddr)
393 {
394 	struct net *net = dev_net(skb->dev);
395 	struct inet6_dev *idev = NULL;
396 	struct ipv6hdr *hdr = ipv6_hdr(skb);
397 	struct sock *sk;
398 	struct ipv6_pinfo *np;
399 	const struct in6_addr *saddr = NULL;
400 	struct dst_entry *dst;
401 	struct icmp6hdr tmp_hdr;
402 	struct flowi6 fl6;
403 	struct icmpv6_msg msg;
404 	struct sockcm_cookie sockc_unused = {0};
405 	struct ipcm6_cookie ipc6;
406 	int iif = 0;
407 	int addr_type = 0;
408 	int len;
409 	int err = 0;
410 	u32 mark = IP6_REPLY_MARK(net, skb->mark);
411 
412 	if ((u8 *)hdr < skb->head ||
413 	    (skb_network_header(skb) + sizeof(*hdr)) > skb_tail_pointer(skb))
414 		return;
415 
416 	/*
417 	 *	Make sure we respect the rules
418 	 *	i.e. RFC 1885 2.4(e)
419 	 *	Rule (e.1) is enforced by not using icmp6_send
420 	 *	in any code that processes icmp errors.
421 	 */
422 	addr_type = ipv6_addr_type(&hdr->daddr);
423 
424 	if (ipv6_chk_addr(net, &hdr->daddr, skb->dev, 0) ||
425 	    ipv6_chk_acast_addr_src(net, skb->dev, &hdr->daddr))
426 		saddr = &hdr->daddr;
427 
428 	/*
429 	 *	Dest addr check
430 	 */
431 
432 	if (addr_type & IPV6_ADDR_MULTICAST || skb->pkt_type != PACKET_HOST) {
433 		if (type != ICMPV6_PKT_TOOBIG &&
434 		    !(type == ICMPV6_PARAMPROB &&
435 		      code == ICMPV6_UNK_OPTION &&
436 		      (opt_unrec(skb, info))))
437 			return;
438 
439 		saddr = NULL;
440 	}
441 
442 	addr_type = ipv6_addr_type(&hdr->saddr);
443 
444 	/*
445 	 *	Source addr check
446 	 */
447 
448 	if (__ipv6_addr_needs_scope_id(addr_type))
449 		iif = skb->dev->ifindex;
450 	else
451 		iif = l3mdev_master_ifindex(skb->dev);
452 
453 	/*
454 	 *	Must not send error if the source does not uniquely
455 	 *	identify a single node (RFC2463 Section 2.4).
456 	 *	We check unspecified / multicast addresses here,
457 	 *	and anycast addresses will be checked later.
458 	 */
459 	if ((addr_type == IPV6_ADDR_ANY) || (addr_type & IPV6_ADDR_MULTICAST)) {
460 		net_dbg_ratelimited("icmp6_send: addr_any/mcast source [%pI6c > %pI6c]\n",
461 				    &hdr->saddr, &hdr->daddr);
462 		return;
463 	}
464 
465 	/*
466 	 *	Never answer to a ICMP packet.
467 	 */
468 	if (is_ineligible(skb)) {
469 		net_dbg_ratelimited("icmp6_send: no reply to icmp error [%pI6c > %pI6c]\n",
470 				    &hdr->saddr, &hdr->daddr);
471 		return;
472 	}
473 
474 	mip6_addr_swap(skb);
475 
476 	memset(&fl6, 0, sizeof(fl6));
477 	fl6.flowi6_proto = IPPROTO_ICMPV6;
478 	fl6.daddr = hdr->saddr;
479 	if (force_saddr)
480 		saddr = force_saddr;
481 	if (saddr)
482 		fl6.saddr = *saddr;
483 	fl6.flowi6_mark = mark;
484 	fl6.flowi6_oif = iif;
485 	fl6.fl6_icmp_type = type;
486 	fl6.fl6_icmp_code = code;
487 	security_skb_classify_flow(skb, flowi6_to_flowi(&fl6));
488 
489 	sk = icmpv6_xmit_lock(net);
490 	if (!sk)
491 		return;
492 	sk->sk_mark = mark;
493 	np = inet6_sk(sk);
494 
495 	if (!icmpv6_xrlim_allow(sk, type, &fl6))
496 		goto out;
497 
498 	tmp_hdr.icmp6_type = type;
499 	tmp_hdr.icmp6_code = code;
500 	tmp_hdr.icmp6_cksum = 0;
501 	tmp_hdr.icmp6_pointer = htonl(info);
502 
503 	if (!fl6.flowi6_oif && ipv6_addr_is_multicast(&fl6.daddr))
504 		fl6.flowi6_oif = np->mcast_oif;
505 	else if (!fl6.flowi6_oif)
506 		fl6.flowi6_oif = np->ucast_oif;
507 
508 	ipc6.tclass = np->tclass;
509 	fl6.flowlabel = ip6_make_flowinfo(ipc6.tclass, fl6.flowlabel);
510 
511 	dst = icmpv6_route_lookup(net, skb, sk, &fl6);
512 	if (IS_ERR(dst))
513 		goto out;
514 
515 	ipc6.hlimit = ip6_sk_dst_hoplimit(np, &fl6, dst);
516 	ipc6.dontfrag = np->dontfrag;
517 	ipc6.opt = NULL;
518 
519 	msg.skb = skb;
520 	msg.offset = skb_network_offset(skb);
521 	msg.type = type;
522 
523 	len = skb->len - msg.offset;
524 	len = min_t(unsigned int, len, IPV6_MIN_MTU - sizeof(struct ipv6hdr) - sizeof(struct icmp6hdr));
525 	if (len < 0) {
526 		net_dbg_ratelimited("icmp: len problem [%pI6c > %pI6c]\n",
527 				    &hdr->saddr, &hdr->daddr);
528 		goto out_dst_release;
529 	}
530 
531 	rcu_read_lock();
532 	idev = __in6_dev_get(skb->dev);
533 
534 	err = ip6_append_data(sk, icmpv6_getfrag, &msg,
535 			      len + sizeof(struct icmp6hdr),
536 			      sizeof(struct icmp6hdr),
537 			      &ipc6, &fl6, (struct rt6_info *)dst,
538 			      MSG_DONTWAIT, &sockc_unused);
539 	if (err) {
540 		ICMP6_INC_STATS(net, idev, ICMP6_MIB_OUTERRORS);
541 		ip6_flush_pending_frames(sk);
542 	} else {
543 		err = icmpv6_push_pending_frames(sk, &fl6, &tmp_hdr,
544 						 len + sizeof(struct icmp6hdr));
545 	}
546 	rcu_read_unlock();
547 out_dst_release:
548 	dst_release(dst);
549 out:
550 	icmpv6_xmit_unlock(sk);
551 }
552 
553 /* Slightly more convenient version of icmp6_send.
554  */
555 void icmpv6_param_prob(struct sk_buff *skb, u8 code, int pos)
556 {
557 	icmp6_send(skb, ICMPV6_PARAMPROB, code, pos, NULL);
558 	kfree_skb(skb);
559 }
560 
561 /* Generate icmpv6 with type/code ICMPV6_DEST_UNREACH/ICMPV6_ADDR_UNREACH
562  * if sufficient data bytes are available
563  * @nhs is the size of the tunnel header(s) :
564  *  Either an IPv4 header for SIT encap
565  *         an IPv4 header + GRE header for GRE encap
566  */
567 int ip6_err_gen_icmpv6_unreach(struct sk_buff *skb, int nhs, int type,
568 			       unsigned int data_len)
569 {
570 	struct in6_addr temp_saddr;
571 	struct rt6_info *rt;
572 	struct sk_buff *skb2;
573 	u32 info = 0;
574 
575 	if (!pskb_may_pull(skb, nhs + sizeof(struct ipv6hdr) + 8))
576 		return 1;
577 
578 	/* RFC 4884 (partial) support for ICMP extensions */
579 	if (data_len < 128 || (data_len & 7) || skb->len < data_len)
580 		data_len = 0;
581 
582 	skb2 = data_len ? skb_copy(skb, GFP_ATOMIC) : skb_clone(skb, GFP_ATOMIC);
583 
584 	if (!skb2)
585 		return 1;
586 
587 	skb_dst_drop(skb2);
588 	skb_pull(skb2, nhs);
589 	skb_reset_network_header(skb2);
590 
591 	rt = rt6_lookup(dev_net(skb->dev), &ipv6_hdr(skb2)->saddr, NULL, 0, 0);
592 
593 	if (rt && rt->dst.dev)
594 		skb2->dev = rt->dst.dev;
595 
596 	ipv6_addr_set_v4mapped(ip_hdr(skb)->saddr, &temp_saddr);
597 
598 	if (data_len) {
599 		/* RFC 4884 (partial) support :
600 		 * insert 0 padding at the end, before the extensions
601 		 */
602 		__skb_push(skb2, nhs);
603 		skb_reset_network_header(skb2);
604 		memmove(skb2->data, skb2->data + nhs, data_len - nhs);
605 		memset(skb2->data + data_len - nhs, 0, nhs);
606 		/* RFC 4884 4.5 : Length is measured in 64-bit words,
607 		 * and stored in reserved[0]
608 		 */
609 		info = (data_len/8) << 24;
610 	}
611 	if (type == ICMP_TIME_EXCEEDED)
612 		icmp6_send(skb2, ICMPV6_TIME_EXCEED, ICMPV6_EXC_HOPLIMIT,
613 			   info, &temp_saddr);
614 	else
615 		icmp6_send(skb2, ICMPV6_DEST_UNREACH, ICMPV6_ADDR_UNREACH,
616 			   info, &temp_saddr);
617 	if (rt)
618 		ip6_rt_put(rt);
619 
620 	kfree_skb(skb2);
621 
622 	return 0;
623 }
624 EXPORT_SYMBOL(ip6_err_gen_icmpv6_unreach);
625 
626 static void icmpv6_echo_reply(struct sk_buff *skb)
627 {
628 	struct net *net = dev_net(skb->dev);
629 	struct sock *sk;
630 	struct inet6_dev *idev;
631 	struct ipv6_pinfo *np;
632 	const struct in6_addr *saddr = NULL;
633 	struct icmp6hdr *icmph = icmp6_hdr(skb);
634 	struct icmp6hdr tmp_hdr;
635 	struct flowi6 fl6;
636 	struct icmpv6_msg msg;
637 	struct dst_entry *dst;
638 	struct ipcm6_cookie ipc6;
639 	int err = 0;
640 	u32 mark = IP6_REPLY_MARK(net, skb->mark);
641 	struct sockcm_cookie sockc_unused = {0};
642 
643 	saddr = &ipv6_hdr(skb)->daddr;
644 
645 	if (!ipv6_unicast_destination(skb) &&
646 	    !(net->ipv6.sysctl.anycast_src_echo_reply &&
647 	      ipv6_anycast_destination(skb_dst(skb), saddr)))
648 		saddr = NULL;
649 
650 	memcpy(&tmp_hdr, icmph, sizeof(tmp_hdr));
651 	tmp_hdr.icmp6_type = ICMPV6_ECHO_REPLY;
652 
653 	memset(&fl6, 0, sizeof(fl6));
654 	fl6.flowi6_proto = IPPROTO_ICMPV6;
655 	fl6.daddr = ipv6_hdr(skb)->saddr;
656 	if (saddr)
657 		fl6.saddr = *saddr;
658 	fl6.flowi6_oif = skb->dev->ifindex;
659 	fl6.fl6_icmp_type = ICMPV6_ECHO_REPLY;
660 	fl6.flowi6_mark = mark;
661 	security_skb_classify_flow(skb, flowi6_to_flowi(&fl6));
662 
663 	sk = icmpv6_xmit_lock(net);
664 	if (!sk)
665 		return;
666 	sk->sk_mark = mark;
667 	np = inet6_sk(sk);
668 
669 	if (!fl6.flowi6_oif && ipv6_addr_is_multicast(&fl6.daddr))
670 		fl6.flowi6_oif = np->mcast_oif;
671 	else if (!fl6.flowi6_oif)
672 		fl6.flowi6_oif = np->ucast_oif;
673 
674 	err = ip6_dst_lookup(net, sk, &dst, &fl6);
675 	if (err)
676 		goto out;
677 	dst = xfrm_lookup(net, dst, flowi6_to_flowi(&fl6), sk, 0);
678 	if (IS_ERR(dst))
679 		goto out;
680 
681 	idev = __in6_dev_get(skb->dev);
682 
683 	msg.skb = skb;
684 	msg.offset = 0;
685 	msg.type = ICMPV6_ECHO_REPLY;
686 
687 	ipc6.hlimit = ip6_sk_dst_hoplimit(np, &fl6, dst);
688 	ipc6.tclass = ipv6_get_dsfield(ipv6_hdr(skb));
689 	ipc6.dontfrag = np->dontfrag;
690 	ipc6.opt = NULL;
691 
692 	err = ip6_append_data(sk, icmpv6_getfrag, &msg, skb->len + sizeof(struct icmp6hdr),
693 				sizeof(struct icmp6hdr), &ipc6, &fl6,
694 				(struct rt6_info *)dst, MSG_DONTWAIT,
695 				&sockc_unused);
696 
697 	if (err) {
698 		__ICMP6_INC_STATS(net, idev, ICMP6_MIB_OUTERRORS);
699 		ip6_flush_pending_frames(sk);
700 	} else {
701 		err = icmpv6_push_pending_frames(sk, &fl6, &tmp_hdr,
702 						 skb->len + sizeof(struct icmp6hdr));
703 	}
704 	dst_release(dst);
705 out:
706 	icmpv6_xmit_unlock(sk);
707 }
708 
709 void icmpv6_notify(struct sk_buff *skb, u8 type, u8 code, __be32 info)
710 {
711 	const struct inet6_protocol *ipprot;
712 	int inner_offset;
713 	__be16 frag_off;
714 	u8 nexthdr;
715 	struct net *net = dev_net(skb->dev);
716 
717 	if (!pskb_may_pull(skb, sizeof(struct ipv6hdr)))
718 		goto out;
719 
720 	nexthdr = ((struct ipv6hdr *)skb->data)->nexthdr;
721 	if (ipv6_ext_hdr(nexthdr)) {
722 		/* now skip over extension headers */
723 		inner_offset = ipv6_skip_exthdr(skb, sizeof(struct ipv6hdr),
724 						&nexthdr, &frag_off);
725 		if (inner_offset < 0)
726 			goto out;
727 	} else {
728 		inner_offset = sizeof(struct ipv6hdr);
729 	}
730 
731 	/* Checkin header including 8 bytes of inner protocol header. */
732 	if (!pskb_may_pull(skb, inner_offset+8))
733 		goto out;
734 
735 	/* BUGGG_FUTURE: we should try to parse exthdrs in this packet.
736 	   Without this we will not able f.e. to make source routed
737 	   pmtu discovery.
738 	   Corresponding argument (opt) to notifiers is already added.
739 	   --ANK (980726)
740 	 */
741 
742 	ipprot = rcu_dereference(inet6_protos[nexthdr]);
743 	if (ipprot && ipprot->err_handler)
744 		ipprot->err_handler(skb, NULL, type, code, inner_offset, info);
745 
746 	raw6_icmp_error(skb, nexthdr, type, code, inner_offset, info);
747 	return;
748 
749 out:
750 	__ICMP6_INC_STATS(net, __in6_dev_get(skb->dev), ICMP6_MIB_INERRORS);
751 }
752 
753 /*
754  *	Handle icmp messages
755  */
756 
757 static int icmpv6_rcv(struct sk_buff *skb)
758 {
759 	struct net_device *dev = skb->dev;
760 	struct inet6_dev *idev = __in6_dev_get(dev);
761 	const struct in6_addr *saddr, *daddr;
762 	struct icmp6hdr *hdr;
763 	u8 type;
764 	bool success = false;
765 
766 	if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb)) {
767 		struct sec_path *sp = skb_sec_path(skb);
768 		int nh;
769 
770 		if (!(sp && sp->xvec[sp->len - 1]->props.flags &
771 				 XFRM_STATE_ICMP))
772 			goto drop_no_count;
773 
774 		if (!pskb_may_pull(skb, sizeof(*hdr) + sizeof(struct ipv6hdr)))
775 			goto drop_no_count;
776 
777 		nh = skb_network_offset(skb);
778 		skb_set_network_header(skb, sizeof(*hdr));
779 
780 		if (!xfrm6_policy_check_reverse(NULL, XFRM_POLICY_IN, skb))
781 			goto drop_no_count;
782 
783 		skb_set_network_header(skb, nh);
784 	}
785 
786 	__ICMP6_INC_STATS(dev_net(dev), idev, ICMP6_MIB_INMSGS);
787 
788 	saddr = &ipv6_hdr(skb)->saddr;
789 	daddr = &ipv6_hdr(skb)->daddr;
790 
791 	if (skb_checksum_validate(skb, IPPROTO_ICMPV6, ip6_compute_pseudo)) {
792 		net_dbg_ratelimited("ICMPv6 checksum failed [%pI6c > %pI6c]\n",
793 				    saddr, daddr);
794 		goto csum_error;
795 	}
796 
797 	if (!pskb_pull(skb, sizeof(*hdr)))
798 		goto discard_it;
799 
800 	hdr = icmp6_hdr(skb);
801 
802 	type = hdr->icmp6_type;
803 
804 	ICMP6MSGIN_INC_STATS(dev_net(dev), idev, type);
805 
806 	switch (type) {
807 	case ICMPV6_ECHO_REQUEST:
808 		icmpv6_echo_reply(skb);
809 		break;
810 
811 	case ICMPV6_ECHO_REPLY:
812 		success = ping_rcv(skb);
813 		break;
814 
815 	case ICMPV6_PKT_TOOBIG:
816 		/* BUGGG_FUTURE: if packet contains rthdr, we cannot update
817 		   standard destination cache. Seems, only "advanced"
818 		   destination cache will allow to solve this problem
819 		   --ANK (980726)
820 		 */
821 		if (!pskb_may_pull(skb, sizeof(struct ipv6hdr)))
822 			goto discard_it;
823 		hdr = icmp6_hdr(skb);
824 
825 		/*
826 		 *	Drop through to notify
827 		 */
828 
829 	case ICMPV6_DEST_UNREACH:
830 	case ICMPV6_TIME_EXCEED:
831 	case ICMPV6_PARAMPROB:
832 		icmpv6_notify(skb, type, hdr->icmp6_code, hdr->icmp6_mtu);
833 		break;
834 
835 	case NDISC_ROUTER_SOLICITATION:
836 	case NDISC_ROUTER_ADVERTISEMENT:
837 	case NDISC_NEIGHBOUR_SOLICITATION:
838 	case NDISC_NEIGHBOUR_ADVERTISEMENT:
839 	case NDISC_REDIRECT:
840 		ndisc_rcv(skb);
841 		break;
842 
843 	case ICMPV6_MGM_QUERY:
844 		igmp6_event_query(skb);
845 		break;
846 
847 	case ICMPV6_MGM_REPORT:
848 		igmp6_event_report(skb);
849 		break;
850 
851 	case ICMPV6_MGM_REDUCTION:
852 	case ICMPV6_NI_QUERY:
853 	case ICMPV6_NI_REPLY:
854 	case ICMPV6_MLD2_REPORT:
855 	case ICMPV6_DHAAD_REQUEST:
856 	case ICMPV6_DHAAD_REPLY:
857 	case ICMPV6_MOBILE_PREFIX_SOL:
858 	case ICMPV6_MOBILE_PREFIX_ADV:
859 		break;
860 
861 	default:
862 		/* informational */
863 		if (type & ICMPV6_INFOMSG_MASK)
864 			break;
865 
866 		net_dbg_ratelimited("icmpv6: msg of unknown type [%pI6c > %pI6c]\n",
867 				    saddr, daddr);
868 
869 		/*
870 		 * error of unknown type.
871 		 * must pass to upper level
872 		 */
873 
874 		icmpv6_notify(skb, type, hdr->icmp6_code, hdr->icmp6_mtu);
875 	}
876 
877 	/* until the v6 path can be better sorted assume failure and
878 	 * preserve the status quo behaviour for the rest of the paths to here
879 	 */
880 	if (success)
881 		consume_skb(skb);
882 	else
883 		kfree_skb(skb);
884 
885 	return 0;
886 
887 csum_error:
888 	__ICMP6_INC_STATS(dev_net(dev), idev, ICMP6_MIB_CSUMERRORS);
889 discard_it:
890 	__ICMP6_INC_STATS(dev_net(dev), idev, ICMP6_MIB_INERRORS);
891 drop_no_count:
892 	kfree_skb(skb);
893 	return 0;
894 }
895 
896 void icmpv6_flow_init(struct sock *sk, struct flowi6 *fl6,
897 		      u8 type,
898 		      const struct in6_addr *saddr,
899 		      const struct in6_addr *daddr,
900 		      int oif)
901 {
902 	memset(fl6, 0, sizeof(*fl6));
903 	fl6->saddr = *saddr;
904 	fl6->daddr = *daddr;
905 	fl6->flowi6_proto	= IPPROTO_ICMPV6;
906 	fl6->fl6_icmp_type	= type;
907 	fl6->fl6_icmp_code	= 0;
908 	fl6->flowi6_oif		= oif;
909 	security_sk_classify_flow(sk, flowi6_to_flowi(fl6));
910 }
911 
912 static int __net_init icmpv6_sk_init(struct net *net)
913 {
914 	struct sock *sk;
915 	int err, i, j;
916 
917 	net->ipv6.icmp_sk =
918 		kzalloc(nr_cpu_ids * sizeof(struct sock *), GFP_KERNEL);
919 	if (!net->ipv6.icmp_sk)
920 		return -ENOMEM;
921 
922 	for_each_possible_cpu(i) {
923 		err = inet_ctl_sock_create(&sk, PF_INET6,
924 					   SOCK_RAW, IPPROTO_ICMPV6, net);
925 		if (err < 0) {
926 			pr_err("Failed to initialize the ICMP6 control socket (err %d)\n",
927 			       err);
928 			goto fail;
929 		}
930 
931 		net->ipv6.icmp_sk[i] = sk;
932 
933 		/* Enough space for 2 64K ICMP packets, including
934 		 * sk_buff struct overhead.
935 		 */
936 		sk->sk_sndbuf = 2 * SKB_TRUESIZE(64 * 1024);
937 	}
938 	return 0;
939 
940  fail:
941 	for (j = 0; j < i; j++)
942 		inet_ctl_sock_destroy(net->ipv6.icmp_sk[j]);
943 	kfree(net->ipv6.icmp_sk);
944 	return err;
945 }
946 
947 static void __net_exit icmpv6_sk_exit(struct net *net)
948 {
949 	int i;
950 
951 	for_each_possible_cpu(i) {
952 		inet_ctl_sock_destroy(net->ipv6.icmp_sk[i]);
953 	}
954 	kfree(net->ipv6.icmp_sk);
955 }
956 
957 static struct pernet_operations icmpv6_sk_ops = {
958 	.init = icmpv6_sk_init,
959 	.exit = icmpv6_sk_exit,
960 };
961 
962 int __init icmpv6_init(void)
963 {
964 	int err;
965 
966 	err = register_pernet_subsys(&icmpv6_sk_ops);
967 	if (err < 0)
968 		return err;
969 
970 	err = -EAGAIN;
971 	if (inet6_add_protocol(&icmpv6_protocol, IPPROTO_ICMPV6) < 0)
972 		goto fail;
973 
974 	err = inet6_register_icmp_sender(icmp6_send);
975 	if (err)
976 		goto sender_reg_err;
977 	return 0;
978 
979 sender_reg_err:
980 	inet6_del_protocol(&icmpv6_protocol, IPPROTO_ICMPV6);
981 fail:
982 	pr_err("Failed to register ICMP6 protocol\n");
983 	unregister_pernet_subsys(&icmpv6_sk_ops);
984 	return err;
985 }
986 
987 void icmpv6_cleanup(void)
988 {
989 	inet6_unregister_icmp_sender(icmp6_send);
990 	unregister_pernet_subsys(&icmpv6_sk_ops);
991 	inet6_del_protocol(&icmpv6_protocol, IPPROTO_ICMPV6);
992 }
993 
994 
995 static const struct icmp6_err {
996 	int err;
997 	int fatal;
998 } tab_unreach[] = {
999 	{	/* NOROUTE */
1000 		.err	= ENETUNREACH,
1001 		.fatal	= 0,
1002 	},
1003 	{	/* ADM_PROHIBITED */
1004 		.err	= EACCES,
1005 		.fatal	= 1,
1006 	},
1007 	{	/* Was NOT_NEIGHBOUR, now reserved */
1008 		.err	= EHOSTUNREACH,
1009 		.fatal	= 0,
1010 	},
1011 	{	/* ADDR_UNREACH	*/
1012 		.err	= EHOSTUNREACH,
1013 		.fatal	= 0,
1014 	},
1015 	{	/* PORT_UNREACH	*/
1016 		.err	= ECONNREFUSED,
1017 		.fatal	= 1,
1018 	},
1019 	{	/* POLICY_FAIL */
1020 		.err	= EACCES,
1021 		.fatal	= 1,
1022 	},
1023 	{	/* REJECT_ROUTE	*/
1024 		.err	= EACCES,
1025 		.fatal	= 1,
1026 	},
1027 };
1028 
1029 int icmpv6_err_convert(u8 type, u8 code, int *err)
1030 {
1031 	int fatal = 0;
1032 
1033 	*err = EPROTO;
1034 
1035 	switch (type) {
1036 	case ICMPV6_DEST_UNREACH:
1037 		fatal = 1;
1038 		if (code < ARRAY_SIZE(tab_unreach)) {
1039 			*err  = tab_unreach[code].err;
1040 			fatal = tab_unreach[code].fatal;
1041 		}
1042 		break;
1043 
1044 	case ICMPV6_PKT_TOOBIG:
1045 		*err = EMSGSIZE;
1046 		break;
1047 
1048 	case ICMPV6_PARAMPROB:
1049 		*err = EPROTO;
1050 		fatal = 1;
1051 		break;
1052 
1053 	case ICMPV6_TIME_EXCEED:
1054 		*err = EHOSTUNREACH;
1055 		break;
1056 	}
1057 
1058 	return fatal;
1059 }
1060 EXPORT_SYMBOL(icmpv6_err_convert);
1061 
1062 #ifdef CONFIG_SYSCTL
1063 static struct ctl_table ipv6_icmp_table_template[] = {
1064 	{
1065 		.procname	= "ratelimit",
1066 		.data		= &init_net.ipv6.sysctl.icmpv6_time,
1067 		.maxlen		= sizeof(int),
1068 		.mode		= 0644,
1069 		.proc_handler	= proc_dointvec_ms_jiffies,
1070 	},
1071 	{ },
1072 };
1073 
1074 struct ctl_table * __net_init ipv6_icmp_sysctl_init(struct net *net)
1075 {
1076 	struct ctl_table *table;
1077 
1078 	table = kmemdup(ipv6_icmp_table_template,
1079 			sizeof(ipv6_icmp_table_template),
1080 			GFP_KERNEL);
1081 
1082 	if (table)
1083 		table[0].data = &net->ipv6.sysctl.icmpv6_time;
1084 
1085 	return table;
1086 }
1087 #endif
1088