xref: /linux/net/ipv6/icmp.c (revision a2cce7a9f1b8cc3d4edce106fb971529f1d4d9ce)
1 /*
2  *	Internet Control Message Protocol (ICMPv6)
3  *	Linux INET6 implementation
4  *
5  *	Authors:
6  *	Pedro Roque		<roque@di.fc.ul.pt>
7  *
8  *	Based on net/ipv4/icmp.c
9  *
10  *	RFC 1885
11  *
12  *	This program is free software; you can redistribute it and/or
13  *      modify it under the terms of the GNU General Public License
14  *      as published by the Free Software Foundation; either version
15  *      2 of the License, or (at your option) any later version.
16  */
17 
18 /*
19  *	Changes:
20  *
21  *	Andi Kleen		:	exception handling
22  *	Andi Kleen			add rate limits. never reply to a icmp.
23  *					add more length checks and other fixes.
24  *	yoshfuji		:	ensure to sent parameter problem for
25  *					fragments.
26  *	YOSHIFUJI Hideaki @USAGI:	added sysctl for icmp rate limit.
27  *	Randy Dunlap and
28  *	YOSHIFUJI Hideaki @USAGI:	Per-interface statistics support
29  *	Kazunori MIYAZAWA @USAGI:       change output process to use ip6_append_data
30  */
31 
32 #define pr_fmt(fmt) "IPv6: " fmt
33 
34 #include <linux/module.h>
35 #include <linux/errno.h>
36 #include <linux/types.h>
37 #include <linux/socket.h>
38 #include <linux/in.h>
39 #include <linux/kernel.h>
40 #include <linux/sockios.h>
41 #include <linux/net.h>
42 #include <linux/skbuff.h>
43 #include <linux/init.h>
44 #include <linux/netfilter.h>
45 #include <linux/slab.h>
46 
47 #ifdef CONFIG_SYSCTL
48 #include <linux/sysctl.h>
49 #endif
50 
51 #include <linux/inet.h>
52 #include <linux/netdevice.h>
53 #include <linux/icmpv6.h>
54 
55 #include <net/ip.h>
56 #include <net/sock.h>
57 
58 #include <net/ipv6.h>
59 #include <net/ip6_checksum.h>
60 #include <net/ping.h>
61 #include <net/protocol.h>
62 #include <net/raw.h>
63 #include <net/rawv6.h>
64 #include <net/transp_v6.h>
65 #include <net/ip6_route.h>
66 #include <net/addrconf.h>
67 #include <net/icmp.h>
68 #include <net/xfrm.h>
69 #include <net/inet_common.h>
70 #include <net/dsfield.h>
71 
72 #include <asm/uaccess.h>
73 
74 /*
75  *	The ICMP socket(s). This is the most convenient way to flow control
76  *	our ICMP output as well as maintain a clean interface throughout
77  *	all layers. All Socketless IP sends will soon be gone.
78  *
79  *	On SMP we have one ICMP socket per-cpu.
80  */
81 static inline struct sock *icmpv6_sk(struct net *net)
82 {
83 	return net->ipv6.icmp_sk[smp_processor_id()];
84 }
85 
86 static void icmpv6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
87 		       u8 type, u8 code, int offset, __be32 info)
88 {
89 	/* icmpv6_notify checks 8 bytes can be pulled, icmp6hdr is 8 bytes */
90 	struct icmp6hdr *icmp6 = (struct icmp6hdr *) (skb->data + offset);
91 	struct net *net = dev_net(skb->dev);
92 
93 	if (type == ICMPV6_PKT_TOOBIG)
94 		ip6_update_pmtu(skb, net, info, 0, 0);
95 	else if (type == NDISC_REDIRECT)
96 		ip6_redirect(skb, net, skb->dev->ifindex, 0);
97 
98 	if (!(type & ICMPV6_INFOMSG_MASK))
99 		if (icmp6->icmp6_type == ICMPV6_ECHO_REQUEST)
100 			ping_err(skb, offset, info);
101 }
102 
103 static int icmpv6_rcv(struct sk_buff *skb);
104 
105 static const struct inet6_protocol icmpv6_protocol = {
106 	.handler	=	icmpv6_rcv,
107 	.err_handler	=	icmpv6_err,
108 	.flags		=	INET6_PROTO_NOPOLICY|INET6_PROTO_FINAL,
109 };
110 
111 static __inline__ struct sock *icmpv6_xmit_lock(struct net *net)
112 {
113 	struct sock *sk;
114 
115 	local_bh_disable();
116 
117 	sk = icmpv6_sk(net);
118 	if (unlikely(!spin_trylock(&sk->sk_lock.slock))) {
119 		/* This can happen if the output path (f.e. SIT or
120 		 * ip6ip6 tunnel) signals dst_link_failure() for an
121 		 * outgoing ICMP6 packet.
122 		 */
123 		local_bh_enable();
124 		return NULL;
125 	}
126 	return sk;
127 }
128 
129 static __inline__ void icmpv6_xmit_unlock(struct sock *sk)
130 {
131 	spin_unlock_bh(&sk->sk_lock.slock);
132 }
133 
134 /*
135  * Figure out, may we reply to this packet with icmp error.
136  *
137  * We do not reply, if:
138  *	- it was icmp error message.
139  *	- it is truncated, so that it is known, that protocol is ICMPV6
140  *	  (i.e. in the middle of some exthdr)
141  *
142  *	--ANK (980726)
143  */
144 
145 static bool is_ineligible(const struct sk_buff *skb)
146 {
147 	int ptr = (u8 *)(ipv6_hdr(skb) + 1) - skb->data;
148 	int len = skb->len - ptr;
149 	__u8 nexthdr = ipv6_hdr(skb)->nexthdr;
150 	__be16 frag_off;
151 
152 	if (len < 0)
153 		return true;
154 
155 	ptr = ipv6_skip_exthdr(skb, ptr, &nexthdr, &frag_off);
156 	if (ptr < 0)
157 		return false;
158 	if (nexthdr == IPPROTO_ICMPV6) {
159 		u8 _type, *tp;
160 		tp = skb_header_pointer(skb,
161 			ptr+offsetof(struct icmp6hdr, icmp6_type),
162 			sizeof(_type), &_type);
163 		if (!tp || !(*tp & ICMPV6_INFOMSG_MASK))
164 			return true;
165 	}
166 	return false;
167 }
168 
169 /*
170  * Check the ICMP output rate limit
171  */
172 static bool icmpv6_xrlim_allow(struct sock *sk, u8 type,
173 			       struct flowi6 *fl6)
174 {
175 	struct net *net = sock_net(sk);
176 	struct dst_entry *dst;
177 	bool res = false;
178 
179 	/* Informational messages are not limited. */
180 	if (type & ICMPV6_INFOMSG_MASK)
181 		return true;
182 
183 	/* Do not limit pmtu discovery, it would break it. */
184 	if (type == ICMPV6_PKT_TOOBIG)
185 		return true;
186 
187 	/*
188 	 * Look up the output route.
189 	 * XXX: perhaps the expire for routing entries cloned by
190 	 * this lookup should be more aggressive (not longer than timeout).
191 	 */
192 	dst = ip6_route_output(net, sk, fl6);
193 	if (dst->error) {
194 		IP6_INC_STATS(net, ip6_dst_idev(dst),
195 			      IPSTATS_MIB_OUTNOROUTES);
196 	} else if (dst->dev && (dst->dev->flags&IFF_LOOPBACK)) {
197 		res = true;
198 	} else {
199 		struct rt6_info *rt = (struct rt6_info *)dst;
200 		int tmo = net->ipv6.sysctl.icmpv6_time;
201 
202 		/* Give more bandwidth to wider prefixes. */
203 		if (rt->rt6i_dst.plen < 128)
204 			tmo >>= ((128 - rt->rt6i_dst.plen)>>5);
205 
206 		if (icmp_global_allow()) {
207 			struct inet_peer *peer;
208 
209 			peer = inet_getpeer_v6(net->ipv6.peers,
210 					       &fl6->daddr, 1);
211 			res = inet_peer_xrlim_allow(peer, tmo);
212 			if (peer)
213 				inet_putpeer(peer);
214 		}
215 	}
216 	dst_release(dst);
217 	return res;
218 }
219 
220 /*
221  *	an inline helper for the "simple" if statement below
222  *	checks if parameter problem report is caused by an
223  *	unrecognized IPv6 option that has the Option Type
224  *	highest-order two bits set to 10
225  */
226 
227 static bool opt_unrec(struct sk_buff *skb, __u32 offset)
228 {
229 	u8 _optval, *op;
230 
231 	offset += skb_network_offset(skb);
232 	op = skb_header_pointer(skb, offset, sizeof(_optval), &_optval);
233 	if (!op)
234 		return true;
235 	return (*op & 0xC0) == 0x80;
236 }
237 
238 int icmpv6_push_pending_frames(struct sock *sk, struct flowi6 *fl6,
239 			       struct icmp6hdr *thdr, int len)
240 {
241 	struct sk_buff *skb;
242 	struct icmp6hdr *icmp6h;
243 	int err = 0;
244 
245 	skb = skb_peek(&sk->sk_write_queue);
246 	if (!skb)
247 		goto out;
248 
249 	icmp6h = icmp6_hdr(skb);
250 	memcpy(icmp6h, thdr, sizeof(struct icmp6hdr));
251 	icmp6h->icmp6_cksum = 0;
252 
253 	if (skb_queue_len(&sk->sk_write_queue) == 1) {
254 		skb->csum = csum_partial(icmp6h,
255 					sizeof(struct icmp6hdr), skb->csum);
256 		icmp6h->icmp6_cksum = csum_ipv6_magic(&fl6->saddr,
257 						      &fl6->daddr,
258 						      len, fl6->flowi6_proto,
259 						      skb->csum);
260 	} else {
261 		__wsum tmp_csum = 0;
262 
263 		skb_queue_walk(&sk->sk_write_queue, skb) {
264 			tmp_csum = csum_add(tmp_csum, skb->csum);
265 		}
266 
267 		tmp_csum = csum_partial(icmp6h,
268 					sizeof(struct icmp6hdr), tmp_csum);
269 		icmp6h->icmp6_cksum = csum_ipv6_magic(&fl6->saddr,
270 						      &fl6->daddr,
271 						      len, fl6->flowi6_proto,
272 						      tmp_csum);
273 	}
274 	ip6_push_pending_frames(sk);
275 out:
276 	return err;
277 }
278 
279 struct icmpv6_msg {
280 	struct sk_buff	*skb;
281 	int		offset;
282 	uint8_t		type;
283 };
284 
285 static int icmpv6_getfrag(void *from, char *to, int offset, int len, int odd, struct sk_buff *skb)
286 {
287 	struct icmpv6_msg *msg = (struct icmpv6_msg *) from;
288 	struct sk_buff *org_skb = msg->skb;
289 	__wsum csum = 0;
290 
291 	csum = skb_copy_and_csum_bits(org_skb, msg->offset + offset,
292 				      to, len, csum);
293 	skb->csum = csum_block_add(skb->csum, csum, odd);
294 	if (!(msg->type & ICMPV6_INFOMSG_MASK))
295 		nf_ct_attach(skb, org_skb);
296 	return 0;
297 }
298 
299 #if IS_ENABLED(CONFIG_IPV6_MIP6)
300 static void mip6_addr_swap(struct sk_buff *skb)
301 {
302 	struct ipv6hdr *iph = ipv6_hdr(skb);
303 	struct inet6_skb_parm *opt = IP6CB(skb);
304 	struct ipv6_destopt_hao *hao;
305 	struct in6_addr tmp;
306 	int off;
307 
308 	if (opt->dsthao) {
309 		off = ipv6_find_tlv(skb, opt->dsthao, IPV6_TLV_HAO);
310 		if (likely(off >= 0)) {
311 			hao = (struct ipv6_destopt_hao *)
312 					(skb_network_header(skb) + off);
313 			tmp = iph->saddr;
314 			iph->saddr = hao->addr;
315 			hao->addr = tmp;
316 		}
317 	}
318 }
319 #else
320 static inline void mip6_addr_swap(struct sk_buff *skb) {}
321 #endif
322 
323 static struct dst_entry *icmpv6_route_lookup(struct net *net,
324 					     struct sk_buff *skb,
325 					     struct sock *sk,
326 					     struct flowi6 *fl6)
327 {
328 	struct dst_entry *dst, *dst2;
329 	struct flowi6 fl2;
330 	int err;
331 
332 	err = ip6_dst_lookup(net, sk, &dst, fl6);
333 	if (err)
334 		return ERR_PTR(err);
335 
336 	/*
337 	 * We won't send icmp if the destination is known
338 	 * anycast.
339 	 */
340 	if (ipv6_anycast_destination(dst, &fl6->daddr)) {
341 		net_dbg_ratelimited("icmp6_send: acast source\n");
342 		dst_release(dst);
343 		return ERR_PTR(-EINVAL);
344 	}
345 
346 	/* No need to clone since we're just using its address. */
347 	dst2 = dst;
348 
349 	dst = xfrm_lookup(net, dst, flowi6_to_flowi(fl6), sk, 0);
350 	if (!IS_ERR(dst)) {
351 		if (dst != dst2)
352 			return dst;
353 	} else {
354 		if (PTR_ERR(dst) == -EPERM)
355 			dst = NULL;
356 		else
357 			return dst;
358 	}
359 
360 	err = xfrm_decode_session_reverse(skb, flowi6_to_flowi(&fl2), AF_INET6);
361 	if (err)
362 		goto relookup_failed;
363 
364 	err = ip6_dst_lookup(net, sk, &dst2, &fl2);
365 	if (err)
366 		goto relookup_failed;
367 
368 	dst2 = xfrm_lookup(net, dst2, flowi6_to_flowi(&fl2), sk, XFRM_LOOKUP_ICMP);
369 	if (!IS_ERR(dst2)) {
370 		dst_release(dst);
371 		dst = dst2;
372 	} else {
373 		err = PTR_ERR(dst2);
374 		if (err == -EPERM) {
375 			dst_release(dst);
376 			return dst2;
377 		} else
378 			goto relookup_failed;
379 	}
380 
381 relookup_failed:
382 	if (dst)
383 		return dst;
384 	return ERR_PTR(err);
385 }
386 
387 /*
388  *	Send an ICMP message in response to a packet in error
389  */
390 static void icmp6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info)
391 {
392 	struct net *net = dev_net(skb->dev);
393 	struct inet6_dev *idev = NULL;
394 	struct ipv6hdr *hdr = ipv6_hdr(skb);
395 	struct sock *sk;
396 	struct ipv6_pinfo *np;
397 	const struct in6_addr *saddr = NULL;
398 	struct dst_entry *dst;
399 	struct icmp6hdr tmp_hdr;
400 	struct flowi6 fl6;
401 	struct icmpv6_msg msg;
402 	int iif = 0;
403 	int addr_type = 0;
404 	int len;
405 	int hlimit;
406 	int err = 0;
407 	u32 mark = IP6_REPLY_MARK(net, skb->mark);
408 
409 	if ((u8 *)hdr < skb->head ||
410 	    (skb_network_header(skb) + sizeof(*hdr)) > skb_tail_pointer(skb))
411 		return;
412 
413 	/*
414 	 *	Make sure we respect the rules
415 	 *	i.e. RFC 1885 2.4(e)
416 	 *	Rule (e.1) is enforced by not using icmp6_send
417 	 *	in any code that processes icmp errors.
418 	 */
419 	addr_type = ipv6_addr_type(&hdr->daddr);
420 
421 	if (ipv6_chk_addr(net, &hdr->daddr, skb->dev, 0) ||
422 	    ipv6_chk_acast_addr_src(net, skb->dev, &hdr->daddr))
423 		saddr = &hdr->daddr;
424 
425 	/*
426 	 *	Dest addr check
427 	 */
428 
429 	if (addr_type & IPV6_ADDR_MULTICAST || skb->pkt_type != PACKET_HOST) {
430 		if (type != ICMPV6_PKT_TOOBIG &&
431 		    !(type == ICMPV6_PARAMPROB &&
432 		      code == ICMPV6_UNK_OPTION &&
433 		      (opt_unrec(skb, info))))
434 			return;
435 
436 		saddr = NULL;
437 	}
438 
439 	addr_type = ipv6_addr_type(&hdr->saddr);
440 
441 	/*
442 	 *	Source addr check
443 	 */
444 
445 	if (__ipv6_addr_needs_scope_id(addr_type))
446 		iif = skb->dev->ifindex;
447 
448 	/*
449 	 *	Must not send error if the source does not uniquely
450 	 *	identify a single node (RFC2463 Section 2.4).
451 	 *	We check unspecified / multicast addresses here,
452 	 *	and anycast addresses will be checked later.
453 	 */
454 	if ((addr_type == IPV6_ADDR_ANY) || (addr_type & IPV6_ADDR_MULTICAST)) {
455 		net_dbg_ratelimited("icmp6_send: addr_any/mcast source\n");
456 		return;
457 	}
458 
459 	/*
460 	 *	Never answer to a ICMP packet.
461 	 */
462 	if (is_ineligible(skb)) {
463 		net_dbg_ratelimited("icmp6_send: no reply to icmp error\n");
464 		return;
465 	}
466 
467 	mip6_addr_swap(skb);
468 
469 	memset(&fl6, 0, sizeof(fl6));
470 	fl6.flowi6_proto = IPPROTO_ICMPV6;
471 	fl6.daddr = hdr->saddr;
472 	if (saddr)
473 		fl6.saddr = *saddr;
474 	fl6.flowi6_mark = mark;
475 	fl6.flowi6_oif = iif;
476 	fl6.fl6_icmp_type = type;
477 	fl6.fl6_icmp_code = code;
478 	security_skb_classify_flow(skb, flowi6_to_flowi(&fl6));
479 
480 	sk = icmpv6_xmit_lock(net);
481 	if (!sk)
482 		return;
483 	sk->sk_mark = mark;
484 	np = inet6_sk(sk);
485 
486 	if (!icmpv6_xrlim_allow(sk, type, &fl6))
487 		goto out;
488 
489 	tmp_hdr.icmp6_type = type;
490 	tmp_hdr.icmp6_code = code;
491 	tmp_hdr.icmp6_cksum = 0;
492 	tmp_hdr.icmp6_pointer = htonl(info);
493 
494 	if (!fl6.flowi6_oif && ipv6_addr_is_multicast(&fl6.daddr))
495 		fl6.flowi6_oif = np->mcast_oif;
496 	else if (!fl6.flowi6_oif)
497 		fl6.flowi6_oif = np->ucast_oif;
498 
499 	dst = icmpv6_route_lookup(net, skb, sk, &fl6);
500 	if (IS_ERR(dst))
501 		goto out;
502 
503 	hlimit = ip6_sk_dst_hoplimit(np, &fl6, dst);
504 
505 	msg.skb = skb;
506 	msg.offset = skb_network_offset(skb);
507 	msg.type = type;
508 
509 	len = skb->len - msg.offset;
510 	len = min_t(unsigned int, len, IPV6_MIN_MTU - sizeof(struct ipv6hdr) - sizeof(struct icmp6hdr));
511 	if (len < 0) {
512 		net_dbg_ratelimited("icmp: len problem\n");
513 		goto out_dst_release;
514 	}
515 
516 	rcu_read_lock();
517 	idev = __in6_dev_get(skb->dev);
518 
519 	err = ip6_append_data(sk, icmpv6_getfrag, &msg,
520 			      len + sizeof(struct icmp6hdr),
521 			      sizeof(struct icmp6hdr), hlimit,
522 			      np->tclass, NULL, &fl6, (struct rt6_info *)dst,
523 			      MSG_DONTWAIT, np->dontfrag);
524 	if (err) {
525 		ICMP6_INC_STATS(net, idev, ICMP6_MIB_OUTERRORS);
526 		ip6_flush_pending_frames(sk);
527 	} else {
528 		err = icmpv6_push_pending_frames(sk, &fl6, &tmp_hdr,
529 						 len + sizeof(struct icmp6hdr));
530 	}
531 	rcu_read_unlock();
532 out_dst_release:
533 	dst_release(dst);
534 out:
535 	icmpv6_xmit_unlock(sk);
536 }
537 
538 /* Slightly more convenient version of icmp6_send.
539  */
540 void icmpv6_param_prob(struct sk_buff *skb, u8 code, int pos)
541 {
542 	icmp6_send(skb, ICMPV6_PARAMPROB, code, pos);
543 	kfree_skb(skb);
544 }
545 
546 static void icmpv6_echo_reply(struct sk_buff *skb)
547 {
548 	struct net *net = dev_net(skb->dev);
549 	struct sock *sk;
550 	struct inet6_dev *idev;
551 	struct ipv6_pinfo *np;
552 	const struct in6_addr *saddr = NULL;
553 	struct icmp6hdr *icmph = icmp6_hdr(skb);
554 	struct icmp6hdr tmp_hdr;
555 	struct flowi6 fl6;
556 	struct icmpv6_msg msg;
557 	struct dst_entry *dst;
558 	int err = 0;
559 	int hlimit;
560 	u8 tclass;
561 	u32 mark = IP6_REPLY_MARK(net, skb->mark);
562 
563 	saddr = &ipv6_hdr(skb)->daddr;
564 
565 	if (!ipv6_unicast_destination(skb) &&
566 	    !(net->ipv6.sysctl.anycast_src_echo_reply &&
567 	      ipv6_anycast_destination(skb_dst(skb), saddr)))
568 		saddr = NULL;
569 
570 	memcpy(&tmp_hdr, icmph, sizeof(tmp_hdr));
571 	tmp_hdr.icmp6_type = ICMPV6_ECHO_REPLY;
572 
573 	memset(&fl6, 0, sizeof(fl6));
574 	fl6.flowi6_proto = IPPROTO_ICMPV6;
575 	fl6.daddr = ipv6_hdr(skb)->saddr;
576 	if (saddr)
577 		fl6.saddr = *saddr;
578 	fl6.flowi6_oif = skb->dev->ifindex;
579 	fl6.fl6_icmp_type = ICMPV6_ECHO_REPLY;
580 	fl6.flowi6_mark = mark;
581 	security_skb_classify_flow(skb, flowi6_to_flowi(&fl6));
582 
583 	sk = icmpv6_xmit_lock(net);
584 	if (!sk)
585 		return;
586 	sk->sk_mark = mark;
587 	np = inet6_sk(sk);
588 
589 	if (!fl6.flowi6_oif && ipv6_addr_is_multicast(&fl6.daddr))
590 		fl6.flowi6_oif = np->mcast_oif;
591 	else if (!fl6.flowi6_oif)
592 		fl6.flowi6_oif = np->ucast_oif;
593 
594 	err = ip6_dst_lookup(net, sk, &dst, &fl6);
595 	if (err)
596 		goto out;
597 	dst = xfrm_lookup(net, dst, flowi6_to_flowi(&fl6), sk, 0);
598 	if (IS_ERR(dst))
599 		goto out;
600 
601 	hlimit = ip6_sk_dst_hoplimit(np, &fl6, dst);
602 
603 	idev = __in6_dev_get(skb->dev);
604 
605 	msg.skb = skb;
606 	msg.offset = 0;
607 	msg.type = ICMPV6_ECHO_REPLY;
608 
609 	tclass = ipv6_get_dsfield(ipv6_hdr(skb));
610 	err = ip6_append_data(sk, icmpv6_getfrag, &msg, skb->len + sizeof(struct icmp6hdr),
611 				sizeof(struct icmp6hdr), hlimit, tclass, NULL, &fl6,
612 				(struct rt6_info *)dst, MSG_DONTWAIT,
613 				np->dontfrag);
614 
615 	if (err) {
616 		ICMP6_INC_STATS_BH(net, idev, ICMP6_MIB_OUTERRORS);
617 		ip6_flush_pending_frames(sk);
618 	} else {
619 		err = icmpv6_push_pending_frames(sk, &fl6, &tmp_hdr,
620 						 skb->len + sizeof(struct icmp6hdr));
621 	}
622 	dst_release(dst);
623 out:
624 	icmpv6_xmit_unlock(sk);
625 }
626 
627 void icmpv6_notify(struct sk_buff *skb, u8 type, u8 code, __be32 info)
628 {
629 	const struct inet6_protocol *ipprot;
630 	int inner_offset;
631 	__be16 frag_off;
632 	u8 nexthdr;
633 	struct net *net = dev_net(skb->dev);
634 
635 	if (!pskb_may_pull(skb, sizeof(struct ipv6hdr)))
636 		goto out;
637 
638 	nexthdr = ((struct ipv6hdr *)skb->data)->nexthdr;
639 	if (ipv6_ext_hdr(nexthdr)) {
640 		/* now skip over extension headers */
641 		inner_offset = ipv6_skip_exthdr(skb, sizeof(struct ipv6hdr),
642 						&nexthdr, &frag_off);
643 		if (inner_offset < 0)
644 			goto out;
645 	} else {
646 		inner_offset = sizeof(struct ipv6hdr);
647 	}
648 
649 	/* Checkin header including 8 bytes of inner protocol header. */
650 	if (!pskb_may_pull(skb, inner_offset+8))
651 		goto out;
652 
653 	/* BUGGG_FUTURE: we should try to parse exthdrs in this packet.
654 	   Without this we will not able f.e. to make source routed
655 	   pmtu discovery.
656 	   Corresponding argument (opt) to notifiers is already added.
657 	   --ANK (980726)
658 	 */
659 
660 	ipprot = rcu_dereference(inet6_protos[nexthdr]);
661 	if (ipprot && ipprot->err_handler)
662 		ipprot->err_handler(skb, NULL, type, code, inner_offset, info);
663 
664 	raw6_icmp_error(skb, nexthdr, type, code, inner_offset, info);
665 	return;
666 
667 out:
668 	ICMP6_INC_STATS_BH(net, __in6_dev_get(skb->dev), ICMP6_MIB_INERRORS);
669 }
670 
671 /*
672  *	Handle icmp messages
673  */
674 
675 static int icmpv6_rcv(struct sk_buff *skb)
676 {
677 	struct net_device *dev = skb->dev;
678 	struct inet6_dev *idev = __in6_dev_get(dev);
679 	const struct in6_addr *saddr, *daddr;
680 	struct icmp6hdr *hdr;
681 	u8 type;
682 	bool success = false;
683 
684 	if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb)) {
685 		struct sec_path *sp = skb_sec_path(skb);
686 		int nh;
687 
688 		if (!(sp && sp->xvec[sp->len - 1]->props.flags &
689 				 XFRM_STATE_ICMP))
690 			goto drop_no_count;
691 
692 		if (!pskb_may_pull(skb, sizeof(*hdr) + sizeof(struct ipv6hdr)))
693 			goto drop_no_count;
694 
695 		nh = skb_network_offset(skb);
696 		skb_set_network_header(skb, sizeof(*hdr));
697 
698 		if (!xfrm6_policy_check_reverse(NULL, XFRM_POLICY_IN, skb))
699 			goto drop_no_count;
700 
701 		skb_set_network_header(skb, nh);
702 	}
703 
704 	ICMP6_INC_STATS_BH(dev_net(dev), idev, ICMP6_MIB_INMSGS);
705 
706 	saddr = &ipv6_hdr(skb)->saddr;
707 	daddr = &ipv6_hdr(skb)->daddr;
708 
709 	if (skb_checksum_validate(skb, IPPROTO_ICMPV6, ip6_compute_pseudo)) {
710 		net_dbg_ratelimited("ICMPv6 checksum failed [%pI6c > %pI6c]\n",
711 				    saddr, daddr);
712 		goto csum_error;
713 	}
714 
715 	if (!pskb_pull(skb, sizeof(*hdr)))
716 		goto discard_it;
717 
718 	hdr = icmp6_hdr(skb);
719 
720 	type = hdr->icmp6_type;
721 
722 	ICMP6MSGIN_INC_STATS_BH(dev_net(dev), idev, type);
723 
724 	switch (type) {
725 	case ICMPV6_ECHO_REQUEST:
726 		icmpv6_echo_reply(skb);
727 		break;
728 
729 	case ICMPV6_ECHO_REPLY:
730 		success = ping_rcv(skb);
731 		break;
732 
733 	case ICMPV6_PKT_TOOBIG:
734 		/* BUGGG_FUTURE: if packet contains rthdr, we cannot update
735 		   standard destination cache. Seems, only "advanced"
736 		   destination cache will allow to solve this problem
737 		   --ANK (980726)
738 		 */
739 		if (!pskb_may_pull(skb, sizeof(struct ipv6hdr)))
740 			goto discard_it;
741 		hdr = icmp6_hdr(skb);
742 
743 		/*
744 		 *	Drop through to notify
745 		 */
746 
747 	case ICMPV6_DEST_UNREACH:
748 	case ICMPV6_TIME_EXCEED:
749 	case ICMPV6_PARAMPROB:
750 		icmpv6_notify(skb, type, hdr->icmp6_code, hdr->icmp6_mtu);
751 		break;
752 
753 	case NDISC_ROUTER_SOLICITATION:
754 	case NDISC_ROUTER_ADVERTISEMENT:
755 	case NDISC_NEIGHBOUR_SOLICITATION:
756 	case NDISC_NEIGHBOUR_ADVERTISEMENT:
757 	case NDISC_REDIRECT:
758 		ndisc_rcv(skb);
759 		break;
760 
761 	case ICMPV6_MGM_QUERY:
762 		igmp6_event_query(skb);
763 		break;
764 
765 	case ICMPV6_MGM_REPORT:
766 		igmp6_event_report(skb);
767 		break;
768 
769 	case ICMPV6_MGM_REDUCTION:
770 	case ICMPV6_NI_QUERY:
771 	case ICMPV6_NI_REPLY:
772 	case ICMPV6_MLD2_REPORT:
773 	case ICMPV6_DHAAD_REQUEST:
774 	case ICMPV6_DHAAD_REPLY:
775 	case ICMPV6_MOBILE_PREFIX_SOL:
776 	case ICMPV6_MOBILE_PREFIX_ADV:
777 		break;
778 
779 	default:
780 		/* informational */
781 		if (type & ICMPV6_INFOMSG_MASK)
782 			break;
783 
784 		net_dbg_ratelimited("icmpv6: msg of unknown type\n");
785 
786 		/*
787 		 * error of unknown type.
788 		 * must pass to upper level
789 		 */
790 
791 		icmpv6_notify(skb, type, hdr->icmp6_code, hdr->icmp6_mtu);
792 	}
793 
794 	/* until the v6 path can be better sorted assume failure and
795 	 * preserve the status quo behaviour for the rest of the paths to here
796 	 */
797 	if (success)
798 		consume_skb(skb);
799 	else
800 		kfree_skb(skb);
801 
802 	return 0;
803 
804 csum_error:
805 	ICMP6_INC_STATS_BH(dev_net(dev), idev, ICMP6_MIB_CSUMERRORS);
806 discard_it:
807 	ICMP6_INC_STATS_BH(dev_net(dev), idev, ICMP6_MIB_INERRORS);
808 drop_no_count:
809 	kfree_skb(skb);
810 	return 0;
811 }
812 
813 void icmpv6_flow_init(struct sock *sk, struct flowi6 *fl6,
814 		      u8 type,
815 		      const struct in6_addr *saddr,
816 		      const struct in6_addr *daddr,
817 		      int oif)
818 {
819 	memset(fl6, 0, sizeof(*fl6));
820 	fl6->saddr = *saddr;
821 	fl6->daddr = *daddr;
822 	fl6->flowi6_proto	= IPPROTO_ICMPV6;
823 	fl6->fl6_icmp_type	= type;
824 	fl6->fl6_icmp_code	= 0;
825 	fl6->flowi6_oif		= oif;
826 	security_sk_classify_flow(sk, flowi6_to_flowi(fl6));
827 }
828 
829 /*
830  * Special lock-class for __icmpv6_sk:
831  */
832 static struct lock_class_key icmpv6_socket_sk_dst_lock_key;
833 
834 static int __net_init icmpv6_sk_init(struct net *net)
835 {
836 	struct sock *sk;
837 	int err, i, j;
838 
839 	net->ipv6.icmp_sk =
840 		kzalloc(nr_cpu_ids * sizeof(struct sock *), GFP_KERNEL);
841 	if (!net->ipv6.icmp_sk)
842 		return -ENOMEM;
843 
844 	for_each_possible_cpu(i) {
845 		err = inet_ctl_sock_create(&sk, PF_INET6,
846 					   SOCK_RAW, IPPROTO_ICMPV6, net);
847 		if (err < 0) {
848 			pr_err("Failed to initialize the ICMP6 control socket (err %d)\n",
849 			       err);
850 			goto fail;
851 		}
852 
853 		net->ipv6.icmp_sk[i] = sk;
854 
855 		/*
856 		 * Split off their lock-class, because sk->sk_dst_lock
857 		 * gets used from softirqs, which is safe for
858 		 * __icmpv6_sk (because those never get directly used
859 		 * via userspace syscalls), but unsafe for normal sockets.
860 		 */
861 		lockdep_set_class(&sk->sk_dst_lock,
862 				  &icmpv6_socket_sk_dst_lock_key);
863 
864 		/* Enough space for 2 64K ICMP packets, including
865 		 * sk_buff struct overhead.
866 		 */
867 		sk->sk_sndbuf = 2 * SKB_TRUESIZE(64 * 1024);
868 	}
869 	return 0;
870 
871  fail:
872 	for (j = 0; j < i; j++)
873 		inet_ctl_sock_destroy(net->ipv6.icmp_sk[j]);
874 	kfree(net->ipv6.icmp_sk);
875 	return err;
876 }
877 
878 static void __net_exit icmpv6_sk_exit(struct net *net)
879 {
880 	int i;
881 
882 	for_each_possible_cpu(i) {
883 		inet_ctl_sock_destroy(net->ipv6.icmp_sk[i]);
884 	}
885 	kfree(net->ipv6.icmp_sk);
886 }
887 
888 static struct pernet_operations icmpv6_sk_ops = {
889 	.init = icmpv6_sk_init,
890 	.exit = icmpv6_sk_exit,
891 };
892 
893 int __init icmpv6_init(void)
894 {
895 	int err;
896 
897 	err = register_pernet_subsys(&icmpv6_sk_ops);
898 	if (err < 0)
899 		return err;
900 
901 	err = -EAGAIN;
902 	if (inet6_add_protocol(&icmpv6_protocol, IPPROTO_ICMPV6) < 0)
903 		goto fail;
904 
905 	err = inet6_register_icmp_sender(icmp6_send);
906 	if (err)
907 		goto sender_reg_err;
908 	return 0;
909 
910 sender_reg_err:
911 	inet6_del_protocol(&icmpv6_protocol, IPPROTO_ICMPV6);
912 fail:
913 	pr_err("Failed to register ICMP6 protocol\n");
914 	unregister_pernet_subsys(&icmpv6_sk_ops);
915 	return err;
916 }
917 
918 void icmpv6_cleanup(void)
919 {
920 	inet6_unregister_icmp_sender(icmp6_send);
921 	unregister_pernet_subsys(&icmpv6_sk_ops);
922 	inet6_del_protocol(&icmpv6_protocol, IPPROTO_ICMPV6);
923 }
924 
925 
926 static const struct icmp6_err {
927 	int err;
928 	int fatal;
929 } tab_unreach[] = {
930 	{	/* NOROUTE */
931 		.err	= ENETUNREACH,
932 		.fatal	= 0,
933 	},
934 	{	/* ADM_PROHIBITED */
935 		.err	= EACCES,
936 		.fatal	= 1,
937 	},
938 	{	/* Was NOT_NEIGHBOUR, now reserved */
939 		.err	= EHOSTUNREACH,
940 		.fatal	= 0,
941 	},
942 	{	/* ADDR_UNREACH	*/
943 		.err	= EHOSTUNREACH,
944 		.fatal	= 0,
945 	},
946 	{	/* PORT_UNREACH	*/
947 		.err	= ECONNREFUSED,
948 		.fatal	= 1,
949 	},
950 	{	/* POLICY_FAIL */
951 		.err	= EACCES,
952 		.fatal	= 1,
953 	},
954 	{	/* REJECT_ROUTE	*/
955 		.err	= EACCES,
956 		.fatal	= 1,
957 	},
958 };
959 
960 int icmpv6_err_convert(u8 type, u8 code, int *err)
961 {
962 	int fatal = 0;
963 
964 	*err = EPROTO;
965 
966 	switch (type) {
967 	case ICMPV6_DEST_UNREACH:
968 		fatal = 1;
969 		if (code < ARRAY_SIZE(tab_unreach)) {
970 			*err  = tab_unreach[code].err;
971 			fatal = tab_unreach[code].fatal;
972 		}
973 		break;
974 
975 	case ICMPV6_PKT_TOOBIG:
976 		*err = EMSGSIZE;
977 		break;
978 
979 	case ICMPV6_PARAMPROB:
980 		*err = EPROTO;
981 		fatal = 1;
982 		break;
983 
984 	case ICMPV6_TIME_EXCEED:
985 		*err = EHOSTUNREACH;
986 		break;
987 	}
988 
989 	return fatal;
990 }
991 EXPORT_SYMBOL(icmpv6_err_convert);
992 
993 #ifdef CONFIG_SYSCTL
994 static struct ctl_table ipv6_icmp_table_template[] = {
995 	{
996 		.procname	= "ratelimit",
997 		.data		= &init_net.ipv6.sysctl.icmpv6_time,
998 		.maxlen		= sizeof(int),
999 		.mode		= 0644,
1000 		.proc_handler	= proc_dointvec_ms_jiffies,
1001 	},
1002 	{ },
1003 };
1004 
1005 struct ctl_table * __net_init ipv6_icmp_sysctl_init(struct net *net)
1006 {
1007 	struct ctl_table *table;
1008 
1009 	table = kmemdup(ipv6_icmp_table_template,
1010 			sizeof(ipv6_icmp_table_template),
1011 			GFP_KERNEL);
1012 
1013 	if (table)
1014 		table[0].data = &net->ipv6.sysctl.icmpv6_time;
1015 
1016 	return table;
1017 }
1018 #endif
1019