xref: /linux/net/ipv6/icmp.c (revision ebf68996de0ab250c5d520eb2291ab65643e9a1e)
1 // SPDX-License-Identifier: GPL-2.0-or-later
2 /*
3  *	Internet Control Message Protocol (ICMPv6)
4  *	Linux INET6 implementation
5  *
6  *	Authors:
7  *	Pedro Roque		<roque@di.fc.ul.pt>
8  *
9  *	Based on net/ipv4/icmp.c
10  *
11  *	RFC 1885
12  */
13 
14 /*
15  *	Changes:
16  *
17  *	Andi Kleen		:	exception handling
18  *	Andi Kleen			add rate limits. never reply to a icmp.
19  *					add more length checks and other fixes.
20  *	yoshfuji		:	ensure to sent parameter problem for
21  *					fragments.
22  *	YOSHIFUJI Hideaki @USAGI:	added sysctl for icmp rate limit.
23  *	Randy Dunlap and
24  *	YOSHIFUJI Hideaki @USAGI:	Per-interface statistics support
25  *	Kazunori MIYAZAWA @USAGI:       change output process to use ip6_append_data
26  */
27 
28 #define pr_fmt(fmt) "IPv6: " fmt
29 
30 #include <linux/module.h>
31 #include <linux/errno.h>
32 #include <linux/types.h>
33 #include <linux/socket.h>
34 #include <linux/in.h>
35 #include <linux/kernel.h>
36 #include <linux/sockios.h>
37 #include <linux/net.h>
38 #include <linux/skbuff.h>
39 #include <linux/init.h>
40 #include <linux/netfilter.h>
41 #include <linux/slab.h>
42 
43 #ifdef CONFIG_SYSCTL
44 #include <linux/sysctl.h>
45 #endif
46 
47 #include <linux/inet.h>
48 #include <linux/netdevice.h>
49 #include <linux/icmpv6.h>
50 
51 #include <net/ip.h>
52 #include <net/sock.h>
53 
54 #include <net/ipv6.h>
55 #include <net/ip6_checksum.h>
56 #include <net/ping.h>
57 #include <net/protocol.h>
58 #include <net/raw.h>
59 #include <net/rawv6.h>
60 #include <net/transp_v6.h>
61 #include <net/ip6_route.h>
62 #include <net/addrconf.h>
63 #include <net/icmp.h>
64 #include <net/xfrm.h>
65 #include <net/inet_common.h>
66 #include <net/dsfield.h>
67 #include <net/l3mdev.h>
68 
69 #include <linux/uaccess.h>
70 
71 /*
72  *	The ICMP socket(s). This is the most convenient way to flow control
73  *	our ICMP output as well as maintain a clean interface throughout
74  *	all layers. All Socketless IP sends will soon be gone.
75  *
76  *	On SMP we have one ICMP socket per-cpu.
77  */
78 static inline struct sock *icmpv6_sk(struct net *net)
79 {
80 	return *this_cpu_ptr(net->ipv6.icmp_sk);
81 }
82 
83 static int icmpv6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
84 		       u8 type, u8 code, int offset, __be32 info)
85 {
86 	/* icmpv6_notify checks 8 bytes can be pulled, icmp6hdr is 8 bytes */
87 	struct icmp6hdr *icmp6 = (struct icmp6hdr *) (skb->data + offset);
88 	struct net *net = dev_net(skb->dev);
89 
90 	if (type == ICMPV6_PKT_TOOBIG)
91 		ip6_update_pmtu(skb, net, info, skb->dev->ifindex, 0, sock_net_uid(net, NULL));
92 	else if (type == NDISC_REDIRECT)
93 		ip6_redirect(skb, net, skb->dev->ifindex, 0,
94 			     sock_net_uid(net, NULL));
95 
96 	if (!(type & ICMPV6_INFOMSG_MASK))
97 		if (icmp6->icmp6_type == ICMPV6_ECHO_REQUEST)
98 			ping_err(skb, offset, ntohl(info));
99 
100 	return 0;
101 }
102 
103 static int icmpv6_rcv(struct sk_buff *skb);
104 
105 static const struct inet6_protocol icmpv6_protocol = {
106 	.handler	=	icmpv6_rcv,
107 	.err_handler	=	icmpv6_err,
108 	.flags		=	INET6_PROTO_NOPOLICY|INET6_PROTO_FINAL,
109 };
110 
111 /* Called with BH disabled */
112 static __inline__ struct sock *icmpv6_xmit_lock(struct net *net)
113 {
114 	struct sock *sk;
115 
116 	sk = icmpv6_sk(net);
117 	if (unlikely(!spin_trylock(&sk->sk_lock.slock))) {
118 		/* This can happen if the output path (f.e. SIT or
119 		 * ip6ip6 tunnel) signals dst_link_failure() for an
120 		 * outgoing ICMP6 packet.
121 		 */
122 		return NULL;
123 	}
124 	return sk;
125 }
126 
127 static __inline__ void icmpv6_xmit_unlock(struct sock *sk)
128 {
129 	spin_unlock(&sk->sk_lock.slock);
130 }
131 
132 /*
133  * Figure out, may we reply to this packet with icmp error.
134  *
135  * We do not reply, if:
136  *	- it was icmp error message.
137  *	- it is truncated, so that it is known, that protocol is ICMPV6
138  *	  (i.e. in the middle of some exthdr)
139  *
140  *	--ANK (980726)
141  */
142 
143 static bool is_ineligible(const struct sk_buff *skb)
144 {
145 	int ptr = (u8 *)(ipv6_hdr(skb) + 1) - skb->data;
146 	int len = skb->len - ptr;
147 	__u8 nexthdr = ipv6_hdr(skb)->nexthdr;
148 	__be16 frag_off;
149 
150 	if (len < 0)
151 		return true;
152 
153 	ptr = ipv6_skip_exthdr(skb, ptr, &nexthdr, &frag_off);
154 	if (ptr < 0)
155 		return false;
156 	if (nexthdr == IPPROTO_ICMPV6) {
157 		u8 _type, *tp;
158 		tp = skb_header_pointer(skb,
159 			ptr+offsetof(struct icmp6hdr, icmp6_type),
160 			sizeof(_type), &_type);
161 		if (!tp || !(*tp & ICMPV6_INFOMSG_MASK))
162 			return true;
163 	}
164 	return false;
165 }
166 
167 static bool icmpv6_mask_allow(struct net *net, int type)
168 {
169 	if (type > ICMPV6_MSG_MAX)
170 		return true;
171 
172 	/* Limit if icmp type is set in ratemask. */
173 	if (!test_bit(type, net->ipv6.sysctl.icmpv6_ratemask))
174 		return true;
175 
176 	return false;
177 }
178 
179 static bool icmpv6_global_allow(struct net *net, int type)
180 {
181 	if (icmpv6_mask_allow(net, type))
182 		return true;
183 
184 	if (icmp_global_allow())
185 		return true;
186 
187 	return false;
188 }
189 
190 /*
191  * Check the ICMP output rate limit
192  */
193 static bool icmpv6_xrlim_allow(struct sock *sk, u8 type,
194 			       struct flowi6 *fl6)
195 {
196 	struct net *net = sock_net(sk);
197 	struct dst_entry *dst;
198 	bool res = false;
199 
200 	if (icmpv6_mask_allow(net, type))
201 		return true;
202 
203 	/*
204 	 * Look up the output route.
205 	 * XXX: perhaps the expire for routing entries cloned by
206 	 * this lookup should be more aggressive (not longer than timeout).
207 	 */
208 	dst = ip6_route_output(net, sk, fl6);
209 	if (dst->error) {
210 		IP6_INC_STATS(net, ip6_dst_idev(dst),
211 			      IPSTATS_MIB_OUTNOROUTES);
212 	} else if (dst->dev && (dst->dev->flags&IFF_LOOPBACK)) {
213 		res = true;
214 	} else {
215 		struct rt6_info *rt = (struct rt6_info *)dst;
216 		int tmo = net->ipv6.sysctl.icmpv6_time;
217 		struct inet_peer *peer;
218 
219 		/* Give more bandwidth to wider prefixes. */
220 		if (rt->rt6i_dst.plen < 128)
221 			tmo >>= ((128 - rt->rt6i_dst.plen)>>5);
222 
223 		peer = inet_getpeer_v6(net->ipv6.peers, &fl6->daddr, 1);
224 		res = inet_peer_xrlim_allow(peer, tmo);
225 		if (peer)
226 			inet_putpeer(peer);
227 	}
228 	dst_release(dst);
229 	return res;
230 }
231 
232 /*
233  *	an inline helper for the "simple" if statement below
234  *	checks if parameter problem report is caused by an
235  *	unrecognized IPv6 option that has the Option Type
236  *	highest-order two bits set to 10
237  */
238 
239 static bool opt_unrec(struct sk_buff *skb, __u32 offset)
240 {
241 	u8 _optval, *op;
242 
243 	offset += skb_network_offset(skb);
244 	op = skb_header_pointer(skb, offset, sizeof(_optval), &_optval);
245 	if (!op)
246 		return true;
247 	return (*op & 0xC0) == 0x80;
248 }
249 
250 void icmpv6_push_pending_frames(struct sock *sk, struct flowi6 *fl6,
251 				struct icmp6hdr *thdr, int len)
252 {
253 	struct sk_buff *skb;
254 	struct icmp6hdr *icmp6h;
255 
256 	skb = skb_peek(&sk->sk_write_queue);
257 	if (!skb)
258 		return;
259 
260 	icmp6h = icmp6_hdr(skb);
261 	memcpy(icmp6h, thdr, sizeof(struct icmp6hdr));
262 	icmp6h->icmp6_cksum = 0;
263 
264 	if (skb_queue_len(&sk->sk_write_queue) == 1) {
265 		skb->csum = csum_partial(icmp6h,
266 					sizeof(struct icmp6hdr), skb->csum);
267 		icmp6h->icmp6_cksum = csum_ipv6_magic(&fl6->saddr,
268 						      &fl6->daddr,
269 						      len, fl6->flowi6_proto,
270 						      skb->csum);
271 	} else {
272 		__wsum tmp_csum = 0;
273 
274 		skb_queue_walk(&sk->sk_write_queue, skb) {
275 			tmp_csum = csum_add(tmp_csum, skb->csum);
276 		}
277 
278 		tmp_csum = csum_partial(icmp6h,
279 					sizeof(struct icmp6hdr), tmp_csum);
280 		icmp6h->icmp6_cksum = csum_ipv6_magic(&fl6->saddr,
281 						      &fl6->daddr,
282 						      len, fl6->flowi6_proto,
283 						      tmp_csum);
284 	}
285 	ip6_push_pending_frames(sk);
286 }
287 
288 struct icmpv6_msg {
289 	struct sk_buff	*skb;
290 	int		offset;
291 	uint8_t		type;
292 };
293 
294 static int icmpv6_getfrag(void *from, char *to, int offset, int len, int odd, struct sk_buff *skb)
295 {
296 	struct icmpv6_msg *msg = (struct icmpv6_msg *) from;
297 	struct sk_buff *org_skb = msg->skb;
298 	__wsum csum = 0;
299 
300 	csum = skb_copy_and_csum_bits(org_skb, msg->offset + offset,
301 				      to, len, csum);
302 	skb->csum = csum_block_add(skb->csum, csum, odd);
303 	if (!(msg->type & ICMPV6_INFOMSG_MASK))
304 		nf_ct_attach(skb, org_skb);
305 	return 0;
306 }
307 
308 #if IS_ENABLED(CONFIG_IPV6_MIP6)
309 static void mip6_addr_swap(struct sk_buff *skb)
310 {
311 	struct ipv6hdr *iph = ipv6_hdr(skb);
312 	struct inet6_skb_parm *opt = IP6CB(skb);
313 	struct ipv6_destopt_hao *hao;
314 	struct in6_addr tmp;
315 	int off;
316 
317 	if (opt->dsthao) {
318 		off = ipv6_find_tlv(skb, opt->dsthao, IPV6_TLV_HAO);
319 		if (likely(off >= 0)) {
320 			hao = (struct ipv6_destopt_hao *)
321 					(skb_network_header(skb) + off);
322 			tmp = iph->saddr;
323 			iph->saddr = hao->addr;
324 			hao->addr = tmp;
325 		}
326 	}
327 }
328 #else
329 static inline void mip6_addr_swap(struct sk_buff *skb) {}
330 #endif
331 
332 static struct dst_entry *icmpv6_route_lookup(struct net *net,
333 					     struct sk_buff *skb,
334 					     struct sock *sk,
335 					     struct flowi6 *fl6)
336 {
337 	struct dst_entry *dst, *dst2;
338 	struct flowi6 fl2;
339 	int err;
340 
341 	err = ip6_dst_lookup(net, sk, &dst, fl6);
342 	if (err)
343 		return ERR_PTR(err);
344 
345 	/*
346 	 * We won't send icmp if the destination is known
347 	 * anycast.
348 	 */
349 	if (ipv6_anycast_destination(dst, &fl6->daddr)) {
350 		net_dbg_ratelimited("icmp6_send: acast source\n");
351 		dst_release(dst);
352 		return ERR_PTR(-EINVAL);
353 	}
354 
355 	/* No need to clone since we're just using its address. */
356 	dst2 = dst;
357 
358 	dst = xfrm_lookup(net, dst, flowi6_to_flowi(fl6), sk, 0);
359 	if (!IS_ERR(dst)) {
360 		if (dst != dst2)
361 			return dst;
362 	} else {
363 		if (PTR_ERR(dst) == -EPERM)
364 			dst = NULL;
365 		else
366 			return dst;
367 	}
368 
369 	err = xfrm_decode_session_reverse(skb, flowi6_to_flowi(&fl2), AF_INET6);
370 	if (err)
371 		goto relookup_failed;
372 
373 	err = ip6_dst_lookup(net, sk, &dst2, &fl2);
374 	if (err)
375 		goto relookup_failed;
376 
377 	dst2 = xfrm_lookup(net, dst2, flowi6_to_flowi(&fl2), sk, XFRM_LOOKUP_ICMP);
378 	if (!IS_ERR(dst2)) {
379 		dst_release(dst);
380 		dst = dst2;
381 	} else {
382 		err = PTR_ERR(dst2);
383 		if (err == -EPERM) {
384 			dst_release(dst);
385 			return dst2;
386 		} else
387 			goto relookup_failed;
388 	}
389 
390 relookup_failed:
391 	if (dst)
392 		return dst;
393 	return ERR_PTR(err);
394 }
395 
396 static int icmp6_iif(const struct sk_buff *skb)
397 {
398 	int iif = skb->dev->ifindex;
399 
400 	/* for local traffic to local address, skb dev is the loopback
401 	 * device. Check if there is a dst attached to the skb and if so
402 	 * get the real device index. Same is needed for replies to a link
403 	 * local address on a device enslaved to an L3 master device
404 	 */
405 	if (unlikely(iif == LOOPBACK_IFINDEX || netif_is_l3_master(skb->dev))) {
406 		const struct rt6_info *rt6 = skb_rt6_info(skb);
407 
408 		if (rt6)
409 			iif = rt6->rt6i_idev->dev->ifindex;
410 	}
411 
412 	return iif;
413 }
414 
415 /*
416  *	Send an ICMP message in response to a packet in error
417  */
418 static void icmp6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info,
419 		       const struct in6_addr *force_saddr)
420 {
421 	struct inet6_dev *idev = NULL;
422 	struct ipv6hdr *hdr = ipv6_hdr(skb);
423 	struct sock *sk;
424 	struct net *net;
425 	struct ipv6_pinfo *np;
426 	const struct in6_addr *saddr = NULL;
427 	struct dst_entry *dst;
428 	struct icmp6hdr tmp_hdr;
429 	struct flowi6 fl6;
430 	struct icmpv6_msg msg;
431 	struct ipcm6_cookie ipc6;
432 	int iif = 0;
433 	int addr_type = 0;
434 	int len;
435 	u32 mark;
436 
437 	if ((u8 *)hdr < skb->head ||
438 	    (skb_network_header(skb) + sizeof(*hdr)) > skb_tail_pointer(skb))
439 		return;
440 
441 	if (!skb->dev)
442 		return;
443 	net = dev_net(skb->dev);
444 	mark = IP6_REPLY_MARK(net, skb->mark);
445 	/*
446 	 *	Make sure we respect the rules
447 	 *	i.e. RFC 1885 2.4(e)
448 	 *	Rule (e.1) is enforced by not using icmp6_send
449 	 *	in any code that processes icmp errors.
450 	 */
451 	addr_type = ipv6_addr_type(&hdr->daddr);
452 
453 	if (ipv6_chk_addr(net, &hdr->daddr, skb->dev, 0) ||
454 	    ipv6_chk_acast_addr_src(net, skb->dev, &hdr->daddr))
455 		saddr = &hdr->daddr;
456 
457 	/*
458 	 *	Dest addr check
459 	 */
460 
461 	if (addr_type & IPV6_ADDR_MULTICAST || skb->pkt_type != PACKET_HOST) {
462 		if (type != ICMPV6_PKT_TOOBIG &&
463 		    !(type == ICMPV6_PARAMPROB &&
464 		      code == ICMPV6_UNK_OPTION &&
465 		      (opt_unrec(skb, info))))
466 			return;
467 
468 		saddr = NULL;
469 	}
470 
471 	addr_type = ipv6_addr_type(&hdr->saddr);
472 
473 	/*
474 	 *	Source addr check
475 	 */
476 
477 	if (__ipv6_addr_needs_scope_id(addr_type)) {
478 		iif = icmp6_iif(skb);
479 	} else {
480 		dst = skb_dst(skb);
481 		iif = l3mdev_master_ifindex(dst ? dst->dev : skb->dev);
482 	}
483 
484 	/*
485 	 *	Must not send error if the source does not uniquely
486 	 *	identify a single node (RFC2463 Section 2.4).
487 	 *	We check unspecified / multicast addresses here,
488 	 *	and anycast addresses will be checked later.
489 	 */
490 	if ((addr_type == IPV6_ADDR_ANY) || (addr_type & IPV6_ADDR_MULTICAST)) {
491 		net_dbg_ratelimited("icmp6_send: addr_any/mcast source [%pI6c > %pI6c]\n",
492 				    &hdr->saddr, &hdr->daddr);
493 		return;
494 	}
495 
496 	/*
497 	 *	Never answer to a ICMP packet.
498 	 */
499 	if (is_ineligible(skb)) {
500 		net_dbg_ratelimited("icmp6_send: no reply to icmp error [%pI6c > %pI6c]\n",
501 				    &hdr->saddr, &hdr->daddr);
502 		return;
503 	}
504 
505 	/* Needed by both icmp_global_allow and icmpv6_xmit_lock */
506 	local_bh_disable();
507 
508 	/* Check global sysctl_icmp_msgs_per_sec ratelimit */
509 	if (!(skb->dev->flags & IFF_LOOPBACK) && !icmpv6_global_allow(net, type))
510 		goto out_bh_enable;
511 
512 	mip6_addr_swap(skb);
513 
514 	memset(&fl6, 0, sizeof(fl6));
515 	fl6.flowi6_proto = IPPROTO_ICMPV6;
516 	fl6.daddr = hdr->saddr;
517 	if (force_saddr)
518 		saddr = force_saddr;
519 	if (saddr)
520 		fl6.saddr = *saddr;
521 	fl6.flowi6_mark = mark;
522 	fl6.flowi6_oif = iif;
523 	fl6.fl6_icmp_type = type;
524 	fl6.fl6_icmp_code = code;
525 	fl6.flowi6_uid = sock_net_uid(net, NULL);
526 	fl6.mp_hash = rt6_multipath_hash(net, &fl6, skb, NULL);
527 	security_skb_classify_flow(skb, flowi6_to_flowi(&fl6));
528 
529 	sk = icmpv6_xmit_lock(net);
530 	if (!sk)
531 		goto out_bh_enable;
532 
533 	sk->sk_mark = mark;
534 	np = inet6_sk(sk);
535 
536 	if (!icmpv6_xrlim_allow(sk, type, &fl6))
537 		goto out;
538 
539 	tmp_hdr.icmp6_type = type;
540 	tmp_hdr.icmp6_code = code;
541 	tmp_hdr.icmp6_cksum = 0;
542 	tmp_hdr.icmp6_pointer = htonl(info);
543 
544 	if (!fl6.flowi6_oif && ipv6_addr_is_multicast(&fl6.daddr))
545 		fl6.flowi6_oif = np->mcast_oif;
546 	else if (!fl6.flowi6_oif)
547 		fl6.flowi6_oif = np->ucast_oif;
548 
549 	ipcm6_init_sk(&ipc6, np);
550 	fl6.flowlabel = ip6_make_flowinfo(ipc6.tclass, fl6.flowlabel);
551 
552 	dst = icmpv6_route_lookup(net, skb, sk, &fl6);
553 	if (IS_ERR(dst))
554 		goto out;
555 
556 	ipc6.hlimit = ip6_sk_dst_hoplimit(np, &fl6, dst);
557 
558 	msg.skb = skb;
559 	msg.offset = skb_network_offset(skb);
560 	msg.type = type;
561 
562 	len = skb->len - msg.offset;
563 	len = min_t(unsigned int, len, IPV6_MIN_MTU - sizeof(struct ipv6hdr) - sizeof(struct icmp6hdr));
564 	if (len < 0) {
565 		net_dbg_ratelimited("icmp: len problem [%pI6c > %pI6c]\n",
566 				    &hdr->saddr, &hdr->daddr);
567 		goto out_dst_release;
568 	}
569 
570 	rcu_read_lock();
571 	idev = __in6_dev_get(skb->dev);
572 
573 	if (ip6_append_data(sk, icmpv6_getfrag, &msg,
574 			    len + sizeof(struct icmp6hdr),
575 			    sizeof(struct icmp6hdr),
576 			    &ipc6, &fl6, (struct rt6_info *)dst,
577 			    MSG_DONTWAIT)) {
578 		ICMP6_INC_STATS(net, idev, ICMP6_MIB_OUTERRORS);
579 		ip6_flush_pending_frames(sk);
580 	} else {
581 		icmpv6_push_pending_frames(sk, &fl6, &tmp_hdr,
582 					   len + sizeof(struct icmp6hdr));
583 	}
584 	rcu_read_unlock();
585 out_dst_release:
586 	dst_release(dst);
587 out:
588 	icmpv6_xmit_unlock(sk);
589 out_bh_enable:
590 	local_bh_enable();
591 }
592 
593 /* Slightly more convenient version of icmp6_send.
594  */
595 void icmpv6_param_prob(struct sk_buff *skb, u8 code, int pos)
596 {
597 	icmp6_send(skb, ICMPV6_PARAMPROB, code, pos, NULL);
598 	kfree_skb(skb);
599 }
600 
601 /* Generate icmpv6 with type/code ICMPV6_DEST_UNREACH/ICMPV6_ADDR_UNREACH
602  * if sufficient data bytes are available
603  * @nhs is the size of the tunnel header(s) :
604  *  Either an IPv4 header for SIT encap
605  *         an IPv4 header + GRE header for GRE encap
606  */
607 int ip6_err_gen_icmpv6_unreach(struct sk_buff *skb, int nhs, int type,
608 			       unsigned int data_len)
609 {
610 	struct in6_addr temp_saddr;
611 	struct rt6_info *rt;
612 	struct sk_buff *skb2;
613 	u32 info = 0;
614 
615 	if (!pskb_may_pull(skb, nhs + sizeof(struct ipv6hdr) + 8))
616 		return 1;
617 
618 	/* RFC 4884 (partial) support for ICMP extensions */
619 	if (data_len < 128 || (data_len & 7) || skb->len < data_len)
620 		data_len = 0;
621 
622 	skb2 = data_len ? skb_copy(skb, GFP_ATOMIC) : skb_clone(skb, GFP_ATOMIC);
623 
624 	if (!skb2)
625 		return 1;
626 
627 	skb_dst_drop(skb2);
628 	skb_pull(skb2, nhs);
629 	skb_reset_network_header(skb2);
630 
631 	rt = rt6_lookup(dev_net(skb->dev), &ipv6_hdr(skb2)->saddr, NULL, 0,
632 			skb, 0);
633 
634 	if (rt && rt->dst.dev)
635 		skb2->dev = rt->dst.dev;
636 
637 	ipv6_addr_set_v4mapped(ip_hdr(skb)->saddr, &temp_saddr);
638 
639 	if (data_len) {
640 		/* RFC 4884 (partial) support :
641 		 * insert 0 padding at the end, before the extensions
642 		 */
643 		__skb_push(skb2, nhs);
644 		skb_reset_network_header(skb2);
645 		memmove(skb2->data, skb2->data + nhs, data_len - nhs);
646 		memset(skb2->data + data_len - nhs, 0, nhs);
647 		/* RFC 4884 4.5 : Length is measured in 64-bit words,
648 		 * and stored in reserved[0]
649 		 */
650 		info = (data_len/8) << 24;
651 	}
652 	if (type == ICMP_TIME_EXCEEDED)
653 		icmp6_send(skb2, ICMPV6_TIME_EXCEED, ICMPV6_EXC_HOPLIMIT,
654 			   info, &temp_saddr);
655 	else
656 		icmp6_send(skb2, ICMPV6_DEST_UNREACH, ICMPV6_ADDR_UNREACH,
657 			   info, &temp_saddr);
658 	if (rt)
659 		ip6_rt_put(rt);
660 
661 	kfree_skb(skb2);
662 
663 	return 0;
664 }
665 EXPORT_SYMBOL(ip6_err_gen_icmpv6_unreach);
666 
667 static void icmpv6_echo_reply(struct sk_buff *skb)
668 {
669 	struct net *net = dev_net(skb->dev);
670 	struct sock *sk;
671 	struct inet6_dev *idev;
672 	struct ipv6_pinfo *np;
673 	const struct in6_addr *saddr = NULL;
674 	struct icmp6hdr *icmph = icmp6_hdr(skb);
675 	struct icmp6hdr tmp_hdr;
676 	struct flowi6 fl6;
677 	struct icmpv6_msg msg;
678 	struct dst_entry *dst;
679 	struct ipcm6_cookie ipc6;
680 	u32 mark = IP6_REPLY_MARK(net, skb->mark);
681 	bool acast;
682 
683 	if (ipv6_addr_is_multicast(&ipv6_hdr(skb)->daddr) &&
684 	    net->ipv6.sysctl.icmpv6_echo_ignore_multicast)
685 		return;
686 
687 	saddr = &ipv6_hdr(skb)->daddr;
688 
689 	acast = ipv6_anycast_destination(skb_dst(skb), saddr);
690 	if (acast && net->ipv6.sysctl.icmpv6_echo_ignore_anycast)
691 		return;
692 
693 	if (!ipv6_unicast_destination(skb) &&
694 	    !(net->ipv6.sysctl.anycast_src_echo_reply && acast))
695 		saddr = NULL;
696 
697 	memcpy(&tmp_hdr, icmph, sizeof(tmp_hdr));
698 	tmp_hdr.icmp6_type = ICMPV6_ECHO_REPLY;
699 
700 	memset(&fl6, 0, sizeof(fl6));
701 	fl6.flowi6_proto = IPPROTO_ICMPV6;
702 	fl6.daddr = ipv6_hdr(skb)->saddr;
703 	if (saddr)
704 		fl6.saddr = *saddr;
705 	fl6.flowi6_oif = icmp6_iif(skb);
706 	fl6.fl6_icmp_type = ICMPV6_ECHO_REPLY;
707 	fl6.flowi6_mark = mark;
708 	fl6.flowi6_uid = sock_net_uid(net, NULL);
709 	security_skb_classify_flow(skb, flowi6_to_flowi(&fl6));
710 
711 	local_bh_disable();
712 	sk = icmpv6_xmit_lock(net);
713 	if (!sk)
714 		goto out_bh_enable;
715 	sk->sk_mark = mark;
716 	np = inet6_sk(sk);
717 
718 	if (!fl6.flowi6_oif && ipv6_addr_is_multicast(&fl6.daddr))
719 		fl6.flowi6_oif = np->mcast_oif;
720 	else if (!fl6.flowi6_oif)
721 		fl6.flowi6_oif = np->ucast_oif;
722 
723 	if (ip6_dst_lookup(net, sk, &dst, &fl6))
724 		goto out;
725 	dst = xfrm_lookup(net, dst, flowi6_to_flowi(&fl6), sk, 0);
726 	if (IS_ERR(dst))
727 		goto out;
728 
729 	/* Check the ratelimit */
730 	if ((!(skb->dev->flags & IFF_LOOPBACK) && !icmpv6_global_allow(net, ICMPV6_ECHO_REPLY)) ||
731 	    !icmpv6_xrlim_allow(sk, ICMPV6_ECHO_REPLY, &fl6))
732 		goto out_dst_release;
733 
734 	idev = __in6_dev_get(skb->dev);
735 
736 	msg.skb = skb;
737 	msg.offset = 0;
738 	msg.type = ICMPV6_ECHO_REPLY;
739 
740 	ipcm6_init_sk(&ipc6, np);
741 	ipc6.hlimit = ip6_sk_dst_hoplimit(np, &fl6, dst);
742 	ipc6.tclass = ipv6_get_dsfield(ipv6_hdr(skb));
743 
744 	if (ip6_append_data(sk, icmpv6_getfrag, &msg,
745 			    skb->len + sizeof(struct icmp6hdr),
746 			    sizeof(struct icmp6hdr), &ipc6, &fl6,
747 			    (struct rt6_info *)dst, MSG_DONTWAIT)) {
748 		__ICMP6_INC_STATS(net, idev, ICMP6_MIB_OUTERRORS);
749 		ip6_flush_pending_frames(sk);
750 	} else {
751 		icmpv6_push_pending_frames(sk, &fl6, &tmp_hdr,
752 					   skb->len + sizeof(struct icmp6hdr));
753 	}
754 out_dst_release:
755 	dst_release(dst);
756 out:
757 	icmpv6_xmit_unlock(sk);
758 out_bh_enable:
759 	local_bh_enable();
760 }
761 
762 void icmpv6_notify(struct sk_buff *skb, u8 type, u8 code, __be32 info)
763 {
764 	const struct inet6_protocol *ipprot;
765 	int inner_offset;
766 	__be16 frag_off;
767 	u8 nexthdr;
768 	struct net *net = dev_net(skb->dev);
769 
770 	if (!pskb_may_pull(skb, sizeof(struct ipv6hdr)))
771 		goto out;
772 
773 	nexthdr = ((struct ipv6hdr *)skb->data)->nexthdr;
774 	if (ipv6_ext_hdr(nexthdr)) {
775 		/* now skip over extension headers */
776 		inner_offset = ipv6_skip_exthdr(skb, sizeof(struct ipv6hdr),
777 						&nexthdr, &frag_off);
778 		if (inner_offset < 0)
779 			goto out;
780 	} else {
781 		inner_offset = sizeof(struct ipv6hdr);
782 	}
783 
784 	/* Checkin header including 8 bytes of inner protocol header. */
785 	if (!pskb_may_pull(skb, inner_offset+8))
786 		goto out;
787 
788 	/* BUGGG_FUTURE: we should try to parse exthdrs in this packet.
789 	   Without this we will not able f.e. to make source routed
790 	   pmtu discovery.
791 	   Corresponding argument (opt) to notifiers is already added.
792 	   --ANK (980726)
793 	 */
794 
795 	ipprot = rcu_dereference(inet6_protos[nexthdr]);
796 	if (ipprot && ipprot->err_handler)
797 		ipprot->err_handler(skb, NULL, type, code, inner_offset, info);
798 
799 	raw6_icmp_error(skb, nexthdr, type, code, inner_offset, info);
800 	return;
801 
802 out:
803 	__ICMP6_INC_STATS(net, __in6_dev_get(skb->dev), ICMP6_MIB_INERRORS);
804 }
805 
806 /*
807  *	Handle icmp messages
808  */
809 
810 static int icmpv6_rcv(struct sk_buff *skb)
811 {
812 	struct net *net = dev_net(skb->dev);
813 	struct net_device *dev = skb->dev;
814 	struct inet6_dev *idev = __in6_dev_get(dev);
815 	const struct in6_addr *saddr, *daddr;
816 	struct icmp6hdr *hdr;
817 	u8 type;
818 	bool success = false;
819 
820 	if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb)) {
821 		struct sec_path *sp = skb_sec_path(skb);
822 		int nh;
823 
824 		if (!(sp && sp->xvec[sp->len - 1]->props.flags &
825 				 XFRM_STATE_ICMP))
826 			goto drop_no_count;
827 
828 		if (!pskb_may_pull(skb, sizeof(*hdr) + sizeof(struct ipv6hdr)))
829 			goto drop_no_count;
830 
831 		nh = skb_network_offset(skb);
832 		skb_set_network_header(skb, sizeof(*hdr));
833 
834 		if (!xfrm6_policy_check_reverse(NULL, XFRM_POLICY_IN, skb))
835 			goto drop_no_count;
836 
837 		skb_set_network_header(skb, nh);
838 	}
839 
840 	__ICMP6_INC_STATS(dev_net(dev), idev, ICMP6_MIB_INMSGS);
841 
842 	saddr = &ipv6_hdr(skb)->saddr;
843 	daddr = &ipv6_hdr(skb)->daddr;
844 
845 	if (skb_checksum_validate(skb, IPPROTO_ICMPV6, ip6_compute_pseudo)) {
846 		net_dbg_ratelimited("ICMPv6 checksum failed [%pI6c > %pI6c]\n",
847 				    saddr, daddr);
848 		goto csum_error;
849 	}
850 
851 	if (!pskb_pull(skb, sizeof(*hdr)))
852 		goto discard_it;
853 
854 	hdr = icmp6_hdr(skb);
855 
856 	type = hdr->icmp6_type;
857 
858 	ICMP6MSGIN_INC_STATS(dev_net(dev), idev, type);
859 
860 	switch (type) {
861 	case ICMPV6_ECHO_REQUEST:
862 		if (!net->ipv6.sysctl.icmpv6_echo_ignore_all)
863 			icmpv6_echo_reply(skb);
864 		break;
865 
866 	case ICMPV6_ECHO_REPLY:
867 		success = ping_rcv(skb);
868 		break;
869 
870 	case ICMPV6_PKT_TOOBIG:
871 		/* BUGGG_FUTURE: if packet contains rthdr, we cannot update
872 		   standard destination cache. Seems, only "advanced"
873 		   destination cache will allow to solve this problem
874 		   --ANK (980726)
875 		 */
876 		if (!pskb_may_pull(skb, sizeof(struct ipv6hdr)))
877 			goto discard_it;
878 		hdr = icmp6_hdr(skb);
879 
880 		/* to notify */
881 		/* fall through */
882 	case ICMPV6_DEST_UNREACH:
883 	case ICMPV6_TIME_EXCEED:
884 	case ICMPV6_PARAMPROB:
885 		icmpv6_notify(skb, type, hdr->icmp6_code, hdr->icmp6_mtu);
886 		break;
887 
888 	case NDISC_ROUTER_SOLICITATION:
889 	case NDISC_ROUTER_ADVERTISEMENT:
890 	case NDISC_NEIGHBOUR_SOLICITATION:
891 	case NDISC_NEIGHBOUR_ADVERTISEMENT:
892 	case NDISC_REDIRECT:
893 		ndisc_rcv(skb);
894 		break;
895 
896 	case ICMPV6_MGM_QUERY:
897 		igmp6_event_query(skb);
898 		break;
899 
900 	case ICMPV6_MGM_REPORT:
901 		igmp6_event_report(skb);
902 		break;
903 
904 	case ICMPV6_MGM_REDUCTION:
905 	case ICMPV6_NI_QUERY:
906 	case ICMPV6_NI_REPLY:
907 	case ICMPV6_MLD2_REPORT:
908 	case ICMPV6_DHAAD_REQUEST:
909 	case ICMPV6_DHAAD_REPLY:
910 	case ICMPV6_MOBILE_PREFIX_SOL:
911 	case ICMPV6_MOBILE_PREFIX_ADV:
912 		break;
913 
914 	default:
915 		/* informational */
916 		if (type & ICMPV6_INFOMSG_MASK)
917 			break;
918 
919 		net_dbg_ratelimited("icmpv6: msg of unknown type [%pI6c > %pI6c]\n",
920 				    saddr, daddr);
921 
922 		/*
923 		 * error of unknown type.
924 		 * must pass to upper level
925 		 */
926 
927 		icmpv6_notify(skb, type, hdr->icmp6_code, hdr->icmp6_mtu);
928 	}
929 
930 	/* until the v6 path can be better sorted assume failure and
931 	 * preserve the status quo behaviour for the rest of the paths to here
932 	 */
933 	if (success)
934 		consume_skb(skb);
935 	else
936 		kfree_skb(skb);
937 
938 	return 0;
939 
940 csum_error:
941 	__ICMP6_INC_STATS(dev_net(dev), idev, ICMP6_MIB_CSUMERRORS);
942 discard_it:
943 	__ICMP6_INC_STATS(dev_net(dev), idev, ICMP6_MIB_INERRORS);
944 drop_no_count:
945 	kfree_skb(skb);
946 	return 0;
947 }
948 
949 void icmpv6_flow_init(struct sock *sk, struct flowi6 *fl6,
950 		      u8 type,
951 		      const struct in6_addr *saddr,
952 		      const struct in6_addr *daddr,
953 		      int oif)
954 {
955 	memset(fl6, 0, sizeof(*fl6));
956 	fl6->saddr = *saddr;
957 	fl6->daddr = *daddr;
958 	fl6->flowi6_proto	= IPPROTO_ICMPV6;
959 	fl6->fl6_icmp_type	= type;
960 	fl6->fl6_icmp_code	= 0;
961 	fl6->flowi6_oif		= oif;
962 	security_sk_classify_flow(sk, flowi6_to_flowi(fl6));
963 }
964 
965 static void __net_exit icmpv6_sk_exit(struct net *net)
966 {
967 	int i;
968 
969 	for_each_possible_cpu(i)
970 		inet_ctl_sock_destroy(*per_cpu_ptr(net->ipv6.icmp_sk, i));
971 	free_percpu(net->ipv6.icmp_sk);
972 }
973 
974 static int __net_init icmpv6_sk_init(struct net *net)
975 {
976 	struct sock *sk;
977 	int err, i;
978 
979 	net->ipv6.icmp_sk = alloc_percpu(struct sock *);
980 	if (!net->ipv6.icmp_sk)
981 		return -ENOMEM;
982 
983 	for_each_possible_cpu(i) {
984 		err = inet_ctl_sock_create(&sk, PF_INET6,
985 					   SOCK_RAW, IPPROTO_ICMPV6, net);
986 		if (err < 0) {
987 			pr_err("Failed to initialize the ICMP6 control socket (err %d)\n",
988 			       err);
989 			goto fail;
990 		}
991 
992 		*per_cpu_ptr(net->ipv6.icmp_sk, i) = sk;
993 
994 		/* Enough space for 2 64K ICMP packets, including
995 		 * sk_buff struct overhead.
996 		 */
997 		sk->sk_sndbuf = 2 * SKB_TRUESIZE(64 * 1024);
998 	}
999 	return 0;
1000 
1001  fail:
1002 	icmpv6_sk_exit(net);
1003 	return err;
1004 }
1005 
1006 static struct pernet_operations icmpv6_sk_ops = {
1007 	.init = icmpv6_sk_init,
1008 	.exit = icmpv6_sk_exit,
1009 };
1010 
1011 int __init icmpv6_init(void)
1012 {
1013 	int err;
1014 
1015 	err = register_pernet_subsys(&icmpv6_sk_ops);
1016 	if (err < 0)
1017 		return err;
1018 
1019 	err = -EAGAIN;
1020 	if (inet6_add_protocol(&icmpv6_protocol, IPPROTO_ICMPV6) < 0)
1021 		goto fail;
1022 
1023 	err = inet6_register_icmp_sender(icmp6_send);
1024 	if (err)
1025 		goto sender_reg_err;
1026 	return 0;
1027 
1028 sender_reg_err:
1029 	inet6_del_protocol(&icmpv6_protocol, IPPROTO_ICMPV6);
1030 fail:
1031 	pr_err("Failed to register ICMP6 protocol\n");
1032 	unregister_pernet_subsys(&icmpv6_sk_ops);
1033 	return err;
1034 }
1035 
1036 void icmpv6_cleanup(void)
1037 {
1038 	inet6_unregister_icmp_sender(icmp6_send);
1039 	unregister_pernet_subsys(&icmpv6_sk_ops);
1040 	inet6_del_protocol(&icmpv6_protocol, IPPROTO_ICMPV6);
1041 }
1042 
1043 
1044 static const struct icmp6_err {
1045 	int err;
1046 	int fatal;
1047 } tab_unreach[] = {
1048 	{	/* NOROUTE */
1049 		.err	= ENETUNREACH,
1050 		.fatal	= 0,
1051 	},
1052 	{	/* ADM_PROHIBITED */
1053 		.err	= EACCES,
1054 		.fatal	= 1,
1055 	},
1056 	{	/* Was NOT_NEIGHBOUR, now reserved */
1057 		.err	= EHOSTUNREACH,
1058 		.fatal	= 0,
1059 	},
1060 	{	/* ADDR_UNREACH	*/
1061 		.err	= EHOSTUNREACH,
1062 		.fatal	= 0,
1063 	},
1064 	{	/* PORT_UNREACH	*/
1065 		.err	= ECONNREFUSED,
1066 		.fatal	= 1,
1067 	},
1068 	{	/* POLICY_FAIL */
1069 		.err	= EACCES,
1070 		.fatal	= 1,
1071 	},
1072 	{	/* REJECT_ROUTE	*/
1073 		.err	= EACCES,
1074 		.fatal	= 1,
1075 	},
1076 };
1077 
1078 int icmpv6_err_convert(u8 type, u8 code, int *err)
1079 {
1080 	int fatal = 0;
1081 
1082 	*err = EPROTO;
1083 
1084 	switch (type) {
1085 	case ICMPV6_DEST_UNREACH:
1086 		fatal = 1;
1087 		if (code < ARRAY_SIZE(tab_unreach)) {
1088 			*err  = tab_unreach[code].err;
1089 			fatal = tab_unreach[code].fatal;
1090 		}
1091 		break;
1092 
1093 	case ICMPV6_PKT_TOOBIG:
1094 		*err = EMSGSIZE;
1095 		break;
1096 
1097 	case ICMPV6_PARAMPROB:
1098 		*err = EPROTO;
1099 		fatal = 1;
1100 		break;
1101 
1102 	case ICMPV6_TIME_EXCEED:
1103 		*err = EHOSTUNREACH;
1104 		break;
1105 	}
1106 
1107 	return fatal;
1108 }
1109 EXPORT_SYMBOL(icmpv6_err_convert);
1110 
1111 #ifdef CONFIG_SYSCTL
1112 static struct ctl_table ipv6_icmp_table_template[] = {
1113 	{
1114 		.procname	= "ratelimit",
1115 		.data		= &init_net.ipv6.sysctl.icmpv6_time,
1116 		.maxlen		= sizeof(int),
1117 		.mode		= 0644,
1118 		.proc_handler	= proc_dointvec_ms_jiffies,
1119 	},
1120 	{
1121 		.procname	= "echo_ignore_all",
1122 		.data		= &init_net.ipv6.sysctl.icmpv6_echo_ignore_all,
1123 		.maxlen		= sizeof(int),
1124 		.mode		= 0644,
1125 		.proc_handler = proc_dointvec,
1126 	},
1127 	{
1128 		.procname	= "echo_ignore_multicast",
1129 		.data		= &init_net.ipv6.sysctl.icmpv6_echo_ignore_multicast,
1130 		.maxlen		= sizeof(int),
1131 		.mode		= 0644,
1132 		.proc_handler = proc_dointvec,
1133 	},
1134 	{
1135 		.procname	= "echo_ignore_anycast",
1136 		.data		= &init_net.ipv6.sysctl.icmpv6_echo_ignore_anycast,
1137 		.maxlen		= sizeof(int),
1138 		.mode		= 0644,
1139 		.proc_handler = proc_dointvec,
1140 	},
1141 	{
1142 		.procname	= "ratemask",
1143 		.data		= &init_net.ipv6.sysctl.icmpv6_ratemask_ptr,
1144 		.maxlen		= ICMPV6_MSG_MAX + 1,
1145 		.mode		= 0644,
1146 		.proc_handler = proc_do_large_bitmap,
1147 	},
1148 	{ },
1149 };
1150 
1151 struct ctl_table * __net_init ipv6_icmp_sysctl_init(struct net *net)
1152 {
1153 	struct ctl_table *table;
1154 
1155 	table = kmemdup(ipv6_icmp_table_template,
1156 			sizeof(ipv6_icmp_table_template),
1157 			GFP_KERNEL);
1158 
1159 	if (table) {
1160 		table[0].data = &net->ipv6.sysctl.icmpv6_time;
1161 		table[1].data = &net->ipv6.sysctl.icmpv6_echo_ignore_all;
1162 		table[2].data = &net->ipv6.sysctl.icmpv6_echo_ignore_multicast;
1163 		table[3].data = &net->ipv6.sysctl.icmpv6_echo_ignore_anycast;
1164 		table[4].data = &net->ipv6.sysctl.icmpv6_ratemask_ptr;
1165 	}
1166 	return table;
1167 }
1168 #endif
1169