xref: /linux/net/ipv6/icmp.c (revision 26b0d14106954ae46d2f4f7eec3481828a210f7d)
1 /*
2  *	Internet Control Message Protocol (ICMPv6)
3  *	Linux INET6 implementation
4  *
5  *	Authors:
6  *	Pedro Roque		<roque@di.fc.ul.pt>
7  *
8  *	Based on net/ipv4/icmp.c
9  *
10  *	RFC 1885
11  *
12  *	This program is free software; you can redistribute it and/or
13  *      modify it under the terms of the GNU General Public License
14  *      as published by the Free Software Foundation; either version
15  *      2 of the License, or (at your option) any later version.
16  */
17 
18 /*
19  *	Changes:
20  *
21  *	Andi Kleen		:	exception handling
22  *	Andi Kleen			add rate limits. never reply to a icmp.
23  *					add more length checks and other fixes.
24  *	yoshfuji		:	ensure to sent parameter problem for
25  *					fragments.
26  *	YOSHIFUJI Hideaki @USAGI:	added sysctl for icmp rate limit.
27  *	Randy Dunlap and
28  *	YOSHIFUJI Hideaki @USAGI:	Per-interface statistics support
29  *	Kazunori MIYAZAWA @USAGI:       change output process to use ip6_append_data
30  */
31 
32 #define pr_fmt(fmt) "IPv6: " fmt
33 
34 #include <linux/module.h>
35 #include <linux/errno.h>
36 #include <linux/types.h>
37 #include <linux/socket.h>
38 #include <linux/in.h>
39 #include <linux/kernel.h>
40 #include <linux/sockios.h>
41 #include <linux/net.h>
42 #include <linux/skbuff.h>
43 #include <linux/init.h>
44 #include <linux/netfilter.h>
45 #include <linux/slab.h>
46 
47 #ifdef CONFIG_SYSCTL
48 #include <linux/sysctl.h>
49 #endif
50 
51 #include <linux/inet.h>
52 #include <linux/netdevice.h>
53 #include <linux/icmpv6.h>
54 
55 #include <net/ip.h>
56 #include <net/sock.h>
57 
58 #include <net/ipv6.h>
59 #include <net/ip6_checksum.h>
60 #include <net/protocol.h>
61 #include <net/raw.h>
62 #include <net/rawv6.h>
63 #include <net/transp_v6.h>
64 #include <net/ip6_route.h>
65 #include <net/addrconf.h>
66 #include <net/icmp.h>
67 #include <net/xfrm.h>
68 #include <net/inet_common.h>
69 
70 #include <asm/uaccess.h>
71 
72 /*
73  *	The ICMP socket(s). This is the most convenient way to flow control
74  *	our ICMP output as well as maintain a clean interface throughout
75  *	all layers. All Socketless IP sends will soon be gone.
76  *
77  *	On SMP we have one ICMP socket per-cpu.
78  */
79 static inline struct sock *icmpv6_sk(struct net *net)
80 {
81 	return net->ipv6.icmp_sk[smp_processor_id()];
82 }
83 
84 static int icmpv6_rcv(struct sk_buff *skb);
85 
86 static const struct inet6_protocol icmpv6_protocol = {
87 	.handler	=	icmpv6_rcv,
88 	.flags		=	INET6_PROTO_NOPOLICY|INET6_PROTO_FINAL,
89 };
90 
91 static __inline__ struct sock *icmpv6_xmit_lock(struct net *net)
92 {
93 	struct sock *sk;
94 
95 	local_bh_disable();
96 
97 	sk = icmpv6_sk(net);
98 	if (unlikely(!spin_trylock(&sk->sk_lock.slock))) {
99 		/* This can happen if the output path (f.e. SIT or
100 		 * ip6ip6 tunnel) signals dst_link_failure() for an
101 		 * outgoing ICMP6 packet.
102 		 */
103 		local_bh_enable();
104 		return NULL;
105 	}
106 	return sk;
107 }
108 
109 static __inline__ void icmpv6_xmit_unlock(struct sock *sk)
110 {
111 	spin_unlock_bh(&sk->sk_lock.slock);
112 }
113 
114 /*
115  * Slightly more convenient version of icmpv6_send.
116  */
117 void icmpv6_param_prob(struct sk_buff *skb, u8 code, int pos)
118 {
119 	icmpv6_send(skb, ICMPV6_PARAMPROB, code, pos);
120 	kfree_skb(skb);
121 }
122 
123 /*
124  * Figure out, may we reply to this packet with icmp error.
125  *
126  * We do not reply, if:
127  *	- it was icmp error message.
128  *	- it is truncated, so that it is known, that protocol is ICMPV6
129  *	  (i.e. in the middle of some exthdr)
130  *
131  *	--ANK (980726)
132  */
133 
134 static bool is_ineligible(const struct sk_buff *skb)
135 {
136 	int ptr = (u8 *)(ipv6_hdr(skb) + 1) - skb->data;
137 	int len = skb->len - ptr;
138 	__u8 nexthdr = ipv6_hdr(skb)->nexthdr;
139 	__be16 frag_off;
140 
141 	if (len < 0)
142 		return true;
143 
144 	ptr = ipv6_skip_exthdr(skb, ptr, &nexthdr, &frag_off);
145 	if (ptr < 0)
146 		return false;
147 	if (nexthdr == IPPROTO_ICMPV6) {
148 		u8 _type, *tp;
149 		tp = skb_header_pointer(skb,
150 			ptr+offsetof(struct icmp6hdr, icmp6_type),
151 			sizeof(_type), &_type);
152 		if (tp == NULL ||
153 		    !(*tp & ICMPV6_INFOMSG_MASK))
154 			return true;
155 	}
156 	return false;
157 }
158 
159 /*
160  * Check the ICMP output rate limit
161  */
162 static inline bool icmpv6_xrlim_allow(struct sock *sk, u8 type,
163 				      struct flowi6 *fl6)
164 {
165 	struct dst_entry *dst;
166 	struct net *net = sock_net(sk);
167 	bool res = false;
168 
169 	/* Informational messages are not limited. */
170 	if (type & ICMPV6_INFOMSG_MASK)
171 		return true;
172 
173 	/* Do not limit pmtu discovery, it would break it. */
174 	if (type == ICMPV6_PKT_TOOBIG)
175 		return true;
176 
177 	/*
178 	 * Look up the output route.
179 	 * XXX: perhaps the expire for routing entries cloned by
180 	 * this lookup should be more aggressive (not longer than timeout).
181 	 */
182 	dst = ip6_route_output(net, sk, fl6);
183 	if (dst->error) {
184 		IP6_INC_STATS(net, ip6_dst_idev(dst),
185 			      IPSTATS_MIB_OUTNOROUTES);
186 	} else if (dst->dev && (dst->dev->flags&IFF_LOOPBACK)) {
187 		res = true;
188 	} else {
189 		struct rt6_info *rt = (struct rt6_info *)dst;
190 		int tmo = net->ipv6.sysctl.icmpv6_time;
191 
192 		/* Give more bandwidth to wider prefixes. */
193 		if (rt->rt6i_dst.plen < 128)
194 			tmo >>= ((128 - rt->rt6i_dst.plen)>>5);
195 
196 		if (!rt->rt6i_peer)
197 			rt6_bind_peer(rt, 1);
198 		res = inet_peer_xrlim_allow(rt->rt6i_peer, tmo);
199 	}
200 	dst_release(dst);
201 	return res;
202 }
203 
204 /*
205  *	an inline helper for the "simple" if statement below
206  *	checks if parameter problem report is caused by an
207  *	unrecognized IPv6 option that has the Option Type
208  *	highest-order two bits set to 10
209  */
210 
211 static bool opt_unrec(struct sk_buff *skb, __u32 offset)
212 {
213 	u8 _optval, *op;
214 
215 	offset += skb_network_offset(skb);
216 	op = skb_header_pointer(skb, offset, sizeof(_optval), &_optval);
217 	if (op == NULL)
218 		return true;
219 	return (*op & 0xC0) == 0x80;
220 }
221 
222 static int icmpv6_push_pending_frames(struct sock *sk, struct flowi6 *fl6, struct icmp6hdr *thdr, int len)
223 {
224 	struct sk_buff *skb;
225 	struct icmp6hdr *icmp6h;
226 	int err = 0;
227 
228 	if ((skb = skb_peek(&sk->sk_write_queue)) == NULL)
229 		goto out;
230 
231 	icmp6h = icmp6_hdr(skb);
232 	memcpy(icmp6h, thdr, sizeof(struct icmp6hdr));
233 	icmp6h->icmp6_cksum = 0;
234 
235 	if (skb_queue_len(&sk->sk_write_queue) == 1) {
236 		skb->csum = csum_partial(icmp6h,
237 					sizeof(struct icmp6hdr), skb->csum);
238 		icmp6h->icmp6_cksum = csum_ipv6_magic(&fl6->saddr,
239 						      &fl6->daddr,
240 						      len, fl6->flowi6_proto,
241 						      skb->csum);
242 	} else {
243 		__wsum tmp_csum = 0;
244 
245 		skb_queue_walk(&sk->sk_write_queue, skb) {
246 			tmp_csum = csum_add(tmp_csum, skb->csum);
247 		}
248 
249 		tmp_csum = csum_partial(icmp6h,
250 					sizeof(struct icmp6hdr), tmp_csum);
251 		icmp6h->icmp6_cksum = csum_ipv6_magic(&fl6->saddr,
252 						      &fl6->daddr,
253 						      len, fl6->flowi6_proto,
254 						      tmp_csum);
255 	}
256 	ip6_push_pending_frames(sk);
257 out:
258 	return err;
259 }
260 
261 struct icmpv6_msg {
262 	struct sk_buff	*skb;
263 	int		offset;
264 	uint8_t		type;
265 };
266 
267 static int icmpv6_getfrag(void *from, char *to, int offset, int len, int odd, struct sk_buff *skb)
268 {
269 	struct icmpv6_msg *msg = (struct icmpv6_msg *) from;
270 	struct sk_buff *org_skb = msg->skb;
271 	__wsum csum = 0;
272 
273 	csum = skb_copy_and_csum_bits(org_skb, msg->offset + offset,
274 				      to, len, csum);
275 	skb->csum = csum_block_add(skb->csum, csum, odd);
276 	if (!(msg->type & ICMPV6_INFOMSG_MASK))
277 		nf_ct_attach(skb, org_skb);
278 	return 0;
279 }
280 
281 #if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE)
282 static void mip6_addr_swap(struct sk_buff *skb)
283 {
284 	struct ipv6hdr *iph = ipv6_hdr(skb);
285 	struct inet6_skb_parm *opt = IP6CB(skb);
286 	struct ipv6_destopt_hao *hao;
287 	struct in6_addr tmp;
288 	int off;
289 
290 	if (opt->dsthao) {
291 		off = ipv6_find_tlv(skb, opt->dsthao, IPV6_TLV_HAO);
292 		if (likely(off >= 0)) {
293 			hao = (struct ipv6_destopt_hao *)
294 					(skb_network_header(skb) + off);
295 			tmp = iph->saddr;
296 			iph->saddr = hao->addr;
297 			hao->addr = tmp;
298 		}
299 	}
300 }
301 #else
302 static inline void mip6_addr_swap(struct sk_buff *skb) {}
303 #endif
304 
305 static struct dst_entry *icmpv6_route_lookup(struct net *net, struct sk_buff *skb,
306 					     struct sock *sk, struct flowi6 *fl6)
307 {
308 	struct dst_entry *dst, *dst2;
309 	struct flowi6 fl2;
310 	int err;
311 
312 	err = ip6_dst_lookup(sk, &dst, fl6);
313 	if (err)
314 		return ERR_PTR(err);
315 
316 	/*
317 	 * We won't send icmp if the destination is known
318 	 * anycast.
319 	 */
320 	if (((struct rt6_info *)dst)->rt6i_flags & RTF_ANYCAST) {
321 		LIMIT_NETDEBUG(KERN_DEBUG "icmpv6_send: acast source\n");
322 		dst_release(dst);
323 		return ERR_PTR(-EINVAL);
324 	}
325 
326 	/* No need to clone since we're just using its address. */
327 	dst2 = dst;
328 
329 	dst = xfrm_lookup(net, dst, flowi6_to_flowi(fl6), sk, 0);
330 	if (!IS_ERR(dst)) {
331 		if (dst != dst2)
332 			return dst;
333 	} else {
334 		if (PTR_ERR(dst) == -EPERM)
335 			dst = NULL;
336 		else
337 			return dst;
338 	}
339 
340 	err = xfrm_decode_session_reverse(skb, flowi6_to_flowi(&fl2), AF_INET6);
341 	if (err)
342 		goto relookup_failed;
343 
344 	err = ip6_dst_lookup(sk, &dst2, &fl2);
345 	if (err)
346 		goto relookup_failed;
347 
348 	dst2 = xfrm_lookup(net, dst2, flowi6_to_flowi(&fl2), sk, XFRM_LOOKUP_ICMP);
349 	if (!IS_ERR(dst2)) {
350 		dst_release(dst);
351 		dst = dst2;
352 	} else {
353 		err = PTR_ERR(dst2);
354 		if (err == -EPERM) {
355 			dst_release(dst);
356 			return dst2;
357 		} else
358 			goto relookup_failed;
359 	}
360 
361 relookup_failed:
362 	if (dst)
363 		return dst;
364 	return ERR_PTR(err);
365 }
366 
367 /*
368  *	Send an ICMP message in response to a packet in error
369  */
370 void icmpv6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info)
371 {
372 	struct net *net = dev_net(skb->dev);
373 	struct inet6_dev *idev = NULL;
374 	struct ipv6hdr *hdr = ipv6_hdr(skb);
375 	struct sock *sk;
376 	struct ipv6_pinfo *np;
377 	const struct in6_addr *saddr = NULL;
378 	struct dst_entry *dst;
379 	struct icmp6hdr tmp_hdr;
380 	struct flowi6 fl6;
381 	struct icmpv6_msg msg;
382 	int iif = 0;
383 	int addr_type = 0;
384 	int len;
385 	int hlimit;
386 	int err = 0;
387 
388 	if ((u8 *)hdr < skb->head ||
389 	    (skb->network_header + sizeof(*hdr)) > skb->tail)
390 		return;
391 
392 	/*
393 	 *	Make sure we respect the rules
394 	 *	i.e. RFC 1885 2.4(e)
395 	 *	Rule (e.1) is enforced by not using icmpv6_send
396 	 *	in any code that processes icmp errors.
397 	 */
398 	addr_type = ipv6_addr_type(&hdr->daddr);
399 
400 	if (ipv6_chk_addr(net, &hdr->daddr, skb->dev, 0))
401 		saddr = &hdr->daddr;
402 
403 	/*
404 	 *	Dest addr check
405 	 */
406 
407 	if ((addr_type & IPV6_ADDR_MULTICAST || skb->pkt_type != PACKET_HOST)) {
408 		if (type != ICMPV6_PKT_TOOBIG &&
409 		    !(type == ICMPV6_PARAMPROB &&
410 		      code == ICMPV6_UNK_OPTION &&
411 		      (opt_unrec(skb, info))))
412 			return;
413 
414 		saddr = NULL;
415 	}
416 
417 	addr_type = ipv6_addr_type(&hdr->saddr);
418 
419 	/*
420 	 *	Source addr check
421 	 */
422 
423 	if (addr_type & IPV6_ADDR_LINKLOCAL)
424 		iif = skb->dev->ifindex;
425 
426 	/*
427 	 *	Must not send error if the source does not uniquely
428 	 *	identify a single node (RFC2463 Section 2.4).
429 	 *	We check unspecified / multicast addresses here,
430 	 *	and anycast addresses will be checked later.
431 	 */
432 	if ((addr_type == IPV6_ADDR_ANY) || (addr_type & IPV6_ADDR_MULTICAST)) {
433 		LIMIT_NETDEBUG(KERN_DEBUG "icmpv6_send: addr_any/mcast source\n");
434 		return;
435 	}
436 
437 	/*
438 	 *	Never answer to a ICMP packet.
439 	 */
440 	if (is_ineligible(skb)) {
441 		LIMIT_NETDEBUG(KERN_DEBUG "icmpv6_send: no reply to icmp error\n");
442 		return;
443 	}
444 
445 	mip6_addr_swap(skb);
446 
447 	memset(&fl6, 0, sizeof(fl6));
448 	fl6.flowi6_proto = IPPROTO_ICMPV6;
449 	fl6.daddr = hdr->saddr;
450 	if (saddr)
451 		fl6.saddr = *saddr;
452 	fl6.flowi6_oif = iif;
453 	fl6.fl6_icmp_type = type;
454 	fl6.fl6_icmp_code = code;
455 	security_skb_classify_flow(skb, flowi6_to_flowi(&fl6));
456 
457 	sk = icmpv6_xmit_lock(net);
458 	if (sk == NULL)
459 		return;
460 	np = inet6_sk(sk);
461 
462 	if (!icmpv6_xrlim_allow(sk, type, &fl6))
463 		goto out;
464 
465 	tmp_hdr.icmp6_type = type;
466 	tmp_hdr.icmp6_code = code;
467 	tmp_hdr.icmp6_cksum = 0;
468 	tmp_hdr.icmp6_pointer = htonl(info);
469 
470 	if (!fl6.flowi6_oif && ipv6_addr_is_multicast(&fl6.daddr))
471 		fl6.flowi6_oif = np->mcast_oif;
472 	else if (!fl6.flowi6_oif)
473 		fl6.flowi6_oif = np->ucast_oif;
474 
475 	dst = icmpv6_route_lookup(net, skb, sk, &fl6);
476 	if (IS_ERR(dst))
477 		goto out;
478 
479 	if (ipv6_addr_is_multicast(&fl6.daddr))
480 		hlimit = np->mcast_hops;
481 	else
482 		hlimit = np->hop_limit;
483 	if (hlimit < 0)
484 		hlimit = ip6_dst_hoplimit(dst);
485 
486 	msg.skb = skb;
487 	msg.offset = skb_network_offset(skb);
488 	msg.type = type;
489 
490 	len = skb->len - msg.offset;
491 	len = min_t(unsigned int, len, IPV6_MIN_MTU - sizeof(struct ipv6hdr) -sizeof(struct icmp6hdr));
492 	if (len < 0) {
493 		LIMIT_NETDEBUG(KERN_DEBUG "icmp: len problem\n");
494 		goto out_dst_release;
495 	}
496 
497 	rcu_read_lock();
498 	idev = __in6_dev_get(skb->dev);
499 
500 	err = ip6_append_data(sk, icmpv6_getfrag, &msg,
501 			      len + sizeof(struct icmp6hdr),
502 			      sizeof(struct icmp6hdr), hlimit,
503 			      np->tclass, NULL, &fl6, (struct rt6_info *)dst,
504 			      MSG_DONTWAIT, np->dontfrag);
505 	if (err) {
506 		ICMP6_INC_STATS_BH(net, idev, ICMP6_MIB_OUTERRORS);
507 		ip6_flush_pending_frames(sk);
508 	} else {
509 		err = icmpv6_push_pending_frames(sk, &fl6, &tmp_hdr,
510 						 len + sizeof(struct icmp6hdr));
511 	}
512 	rcu_read_unlock();
513 out_dst_release:
514 	dst_release(dst);
515 out:
516 	icmpv6_xmit_unlock(sk);
517 }
518 EXPORT_SYMBOL(icmpv6_send);
519 
520 static void icmpv6_echo_reply(struct sk_buff *skb)
521 {
522 	struct net *net = dev_net(skb->dev);
523 	struct sock *sk;
524 	struct inet6_dev *idev;
525 	struct ipv6_pinfo *np;
526 	const struct in6_addr *saddr = NULL;
527 	struct icmp6hdr *icmph = icmp6_hdr(skb);
528 	struct icmp6hdr tmp_hdr;
529 	struct flowi6 fl6;
530 	struct icmpv6_msg msg;
531 	struct dst_entry *dst;
532 	int err = 0;
533 	int hlimit;
534 
535 	saddr = &ipv6_hdr(skb)->daddr;
536 
537 	if (!ipv6_unicast_destination(skb))
538 		saddr = NULL;
539 
540 	memcpy(&tmp_hdr, icmph, sizeof(tmp_hdr));
541 	tmp_hdr.icmp6_type = ICMPV6_ECHO_REPLY;
542 
543 	memset(&fl6, 0, sizeof(fl6));
544 	fl6.flowi6_proto = IPPROTO_ICMPV6;
545 	fl6.daddr = ipv6_hdr(skb)->saddr;
546 	if (saddr)
547 		fl6.saddr = *saddr;
548 	fl6.flowi6_oif = skb->dev->ifindex;
549 	fl6.fl6_icmp_type = ICMPV6_ECHO_REPLY;
550 	security_skb_classify_flow(skb, flowi6_to_flowi(&fl6));
551 
552 	sk = icmpv6_xmit_lock(net);
553 	if (sk == NULL)
554 		return;
555 	np = inet6_sk(sk);
556 
557 	if (!fl6.flowi6_oif && ipv6_addr_is_multicast(&fl6.daddr))
558 		fl6.flowi6_oif = np->mcast_oif;
559 	else if (!fl6.flowi6_oif)
560 		fl6.flowi6_oif = np->ucast_oif;
561 
562 	err = ip6_dst_lookup(sk, &dst, &fl6);
563 	if (err)
564 		goto out;
565 	dst = xfrm_lookup(net, dst, flowi6_to_flowi(&fl6), sk, 0);
566 	if (IS_ERR(dst))
567 		goto out;
568 
569 	if (ipv6_addr_is_multicast(&fl6.daddr))
570 		hlimit = np->mcast_hops;
571 	else
572 		hlimit = np->hop_limit;
573 	if (hlimit < 0)
574 		hlimit = ip6_dst_hoplimit(dst);
575 
576 	idev = __in6_dev_get(skb->dev);
577 
578 	msg.skb = skb;
579 	msg.offset = 0;
580 	msg.type = ICMPV6_ECHO_REPLY;
581 
582 	err = ip6_append_data(sk, icmpv6_getfrag, &msg, skb->len + sizeof(struct icmp6hdr),
583 				sizeof(struct icmp6hdr), hlimit, np->tclass, NULL, &fl6,
584 				(struct rt6_info *)dst, MSG_DONTWAIT,
585 				np->dontfrag);
586 
587 	if (err) {
588 		ICMP6_INC_STATS_BH(net, idev, ICMP6_MIB_OUTERRORS);
589 		ip6_flush_pending_frames(sk);
590 	} else {
591 		err = icmpv6_push_pending_frames(sk, &fl6, &tmp_hdr,
592 						 skb->len + sizeof(struct icmp6hdr));
593 	}
594 	dst_release(dst);
595 out:
596 	icmpv6_xmit_unlock(sk);
597 }
598 
599 static void icmpv6_notify(struct sk_buff *skb, u8 type, u8 code, __be32 info)
600 {
601 	const struct inet6_protocol *ipprot;
602 	int inner_offset;
603 	int hash;
604 	u8 nexthdr;
605 	__be16 frag_off;
606 
607 	if (!pskb_may_pull(skb, sizeof(struct ipv6hdr)))
608 		return;
609 
610 	nexthdr = ((struct ipv6hdr *)skb->data)->nexthdr;
611 	if (ipv6_ext_hdr(nexthdr)) {
612 		/* now skip over extension headers */
613 		inner_offset = ipv6_skip_exthdr(skb, sizeof(struct ipv6hdr),
614 						&nexthdr, &frag_off);
615 		if (inner_offset<0)
616 			return;
617 	} else {
618 		inner_offset = sizeof(struct ipv6hdr);
619 	}
620 
621 	/* Checkin header including 8 bytes of inner protocol header. */
622 	if (!pskb_may_pull(skb, inner_offset+8))
623 		return;
624 
625 	/* BUGGG_FUTURE: we should try to parse exthdrs in this packet.
626 	   Without this we will not able f.e. to make source routed
627 	   pmtu discovery.
628 	   Corresponding argument (opt) to notifiers is already added.
629 	   --ANK (980726)
630 	 */
631 
632 	hash = nexthdr & (MAX_INET_PROTOS - 1);
633 
634 	rcu_read_lock();
635 	ipprot = rcu_dereference(inet6_protos[hash]);
636 	if (ipprot && ipprot->err_handler)
637 		ipprot->err_handler(skb, NULL, type, code, inner_offset, info);
638 	rcu_read_unlock();
639 
640 	raw6_icmp_error(skb, nexthdr, type, code, inner_offset, info);
641 }
642 
643 /*
644  *	Handle icmp messages
645  */
646 
647 static int icmpv6_rcv(struct sk_buff *skb)
648 {
649 	struct net_device *dev = skb->dev;
650 	struct inet6_dev *idev = __in6_dev_get(dev);
651 	const struct in6_addr *saddr, *daddr;
652 	const struct ipv6hdr *orig_hdr;
653 	struct icmp6hdr *hdr;
654 	u8 type;
655 
656 	if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb)) {
657 		struct sec_path *sp = skb_sec_path(skb);
658 		int nh;
659 
660 		if (!(sp && sp->xvec[sp->len - 1]->props.flags &
661 				 XFRM_STATE_ICMP))
662 			goto drop_no_count;
663 
664 		if (!pskb_may_pull(skb, sizeof(*hdr) + sizeof(*orig_hdr)))
665 			goto drop_no_count;
666 
667 		nh = skb_network_offset(skb);
668 		skb_set_network_header(skb, sizeof(*hdr));
669 
670 		if (!xfrm6_policy_check_reverse(NULL, XFRM_POLICY_IN, skb))
671 			goto drop_no_count;
672 
673 		skb_set_network_header(skb, nh);
674 	}
675 
676 	ICMP6_INC_STATS_BH(dev_net(dev), idev, ICMP6_MIB_INMSGS);
677 
678 	saddr = &ipv6_hdr(skb)->saddr;
679 	daddr = &ipv6_hdr(skb)->daddr;
680 
681 	/* Perform checksum. */
682 	switch (skb->ip_summed) {
683 	case CHECKSUM_COMPLETE:
684 		if (!csum_ipv6_magic(saddr, daddr, skb->len, IPPROTO_ICMPV6,
685 				     skb->csum))
686 			break;
687 		/* fall through */
688 	case CHECKSUM_NONE:
689 		skb->csum = ~csum_unfold(csum_ipv6_magic(saddr, daddr, skb->len,
690 					     IPPROTO_ICMPV6, 0));
691 		if (__skb_checksum_complete(skb)) {
692 			LIMIT_NETDEBUG(KERN_DEBUG "ICMPv6 checksum failed [%pI6 > %pI6]\n",
693 				       saddr, daddr);
694 			goto discard_it;
695 		}
696 	}
697 
698 	if (!pskb_pull(skb, sizeof(*hdr)))
699 		goto discard_it;
700 
701 	hdr = icmp6_hdr(skb);
702 
703 	type = hdr->icmp6_type;
704 
705 	ICMP6MSGIN_INC_STATS_BH(dev_net(dev), idev, type);
706 
707 	switch (type) {
708 	case ICMPV6_ECHO_REQUEST:
709 		icmpv6_echo_reply(skb);
710 		break;
711 
712 	case ICMPV6_ECHO_REPLY:
713 		/* we couldn't care less */
714 		break;
715 
716 	case ICMPV6_PKT_TOOBIG:
717 		/* BUGGG_FUTURE: if packet contains rthdr, we cannot update
718 		   standard destination cache. Seems, only "advanced"
719 		   destination cache will allow to solve this problem
720 		   --ANK (980726)
721 		 */
722 		if (!pskb_may_pull(skb, sizeof(struct ipv6hdr)))
723 			goto discard_it;
724 		hdr = icmp6_hdr(skb);
725 		orig_hdr = (struct ipv6hdr *) (hdr + 1);
726 		rt6_pmtu_discovery(&orig_hdr->daddr, &orig_hdr->saddr, dev,
727 				   ntohl(hdr->icmp6_mtu));
728 
729 		/*
730 		 *	Drop through to notify
731 		 */
732 
733 	case ICMPV6_DEST_UNREACH:
734 	case ICMPV6_TIME_EXCEED:
735 	case ICMPV6_PARAMPROB:
736 		icmpv6_notify(skb, type, hdr->icmp6_code, hdr->icmp6_mtu);
737 		break;
738 
739 	case NDISC_ROUTER_SOLICITATION:
740 	case NDISC_ROUTER_ADVERTISEMENT:
741 	case NDISC_NEIGHBOUR_SOLICITATION:
742 	case NDISC_NEIGHBOUR_ADVERTISEMENT:
743 	case NDISC_REDIRECT:
744 		ndisc_rcv(skb);
745 		break;
746 
747 	case ICMPV6_MGM_QUERY:
748 		igmp6_event_query(skb);
749 		break;
750 
751 	case ICMPV6_MGM_REPORT:
752 		igmp6_event_report(skb);
753 		break;
754 
755 	case ICMPV6_MGM_REDUCTION:
756 	case ICMPV6_NI_QUERY:
757 	case ICMPV6_NI_REPLY:
758 	case ICMPV6_MLD2_REPORT:
759 	case ICMPV6_DHAAD_REQUEST:
760 	case ICMPV6_DHAAD_REPLY:
761 	case ICMPV6_MOBILE_PREFIX_SOL:
762 	case ICMPV6_MOBILE_PREFIX_ADV:
763 		break;
764 
765 	default:
766 		LIMIT_NETDEBUG(KERN_DEBUG "icmpv6: msg of unknown type\n");
767 
768 		/* informational */
769 		if (type & ICMPV6_INFOMSG_MASK)
770 			break;
771 
772 		/*
773 		 * error of unknown type.
774 		 * must pass to upper level
775 		 */
776 
777 		icmpv6_notify(skb, type, hdr->icmp6_code, hdr->icmp6_mtu);
778 	}
779 
780 	kfree_skb(skb);
781 	return 0;
782 
783 discard_it:
784 	ICMP6_INC_STATS_BH(dev_net(dev), idev, ICMP6_MIB_INERRORS);
785 drop_no_count:
786 	kfree_skb(skb);
787 	return 0;
788 }
789 
790 void icmpv6_flow_init(struct sock *sk, struct flowi6 *fl6,
791 		      u8 type,
792 		      const struct in6_addr *saddr,
793 		      const struct in6_addr *daddr,
794 		      int oif)
795 {
796 	memset(fl6, 0, sizeof(*fl6));
797 	fl6->saddr = *saddr;
798 	fl6->daddr = *daddr;
799 	fl6->flowi6_proto 	= IPPROTO_ICMPV6;
800 	fl6->fl6_icmp_type	= type;
801 	fl6->fl6_icmp_code	= 0;
802 	fl6->flowi6_oif		= oif;
803 	security_sk_classify_flow(sk, flowi6_to_flowi(fl6));
804 }
805 
806 /*
807  * Special lock-class for __icmpv6_sk:
808  */
809 static struct lock_class_key icmpv6_socket_sk_dst_lock_key;
810 
811 static int __net_init icmpv6_sk_init(struct net *net)
812 {
813 	struct sock *sk;
814 	int err, i, j;
815 
816 	net->ipv6.icmp_sk =
817 		kzalloc(nr_cpu_ids * sizeof(struct sock *), GFP_KERNEL);
818 	if (net->ipv6.icmp_sk == NULL)
819 		return -ENOMEM;
820 
821 	for_each_possible_cpu(i) {
822 		err = inet_ctl_sock_create(&sk, PF_INET6,
823 					   SOCK_RAW, IPPROTO_ICMPV6, net);
824 		if (err < 0) {
825 			pr_err("Failed to initialize the ICMP6 control socket (err %d)\n",
826 			       err);
827 			goto fail;
828 		}
829 
830 		net->ipv6.icmp_sk[i] = sk;
831 
832 		/*
833 		 * Split off their lock-class, because sk->sk_dst_lock
834 		 * gets used from softirqs, which is safe for
835 		 * __icmpv6_sk (because those never get directly used
836 		 * via userspace syscalls), but unsafe for normal sockets.
837 		 */
838 		lockdep_set_class(&sk->sk_dst_lock,
839 				  &icmpv6_socket_sk_dst_lock_key);
840 
841 		/* Enough space for 2 64K ICMP packets, including
842 		 * sk_buff struct overhead.
843 		 */
844 		sk->sk_sndbuf = 2 * SKB_TRUESIZE(64 * 1024);
845 	}
846 	return 0;
847 
848  fail:
849 	for (j = 0; j < i; j++)
850 		inet_ctl_sock_destroy(net->ipv6.icmp_sk[j]);
851 	kfree(net->ipv6.icmp_sk);
852 	return err;
853 }
854 
855 static void __net_exit icmpv6_sk_exit(struct net *net)
856 {
857 	int i;
858 
859 	for_each_possible_cpu(i) {
860 		inet_ctl_sock_destroy(net->ipv6.icmp_sk[i]);
861 	}
862 	kfree(net->ipv6.icmp_sk);
863 }
864 
865 static struct pernet_operations icmpv6_sk_ops = {
866        .init = icmpv6_sk_init,
867        .exit = icmpv6_sk_exit,
868 };
869 
870 int __init icmpv6_init(void)
871 {
872 	int err;
873 
874 	err = register_pernet_subsys(&icmpv6_sk_ops);
875 	if (err < 0)
876 		return err;
877 
878 	err = -EAGAIN;
879 	if (inet6_add_protocol(&icmpv6_protocol, IPPROTO_ICMPV6) < 0)
880 		goto fail;
881 	return 0;
882 
883 fail:
884 	pr_err("Failed to register ICMP6 protocol\n");
885 	unregister_pernet_subsys(&icmpv6_sk_ops);
886 	return err;
887 }
888 
889 void icmpv6_cleanup(void)
890 {
891 	unregister_pernet_subsys(&icmpv6_sk_ops);
892 	inet6_del_protocol(&icmpv6_protocol, IPPROTO_ICMPV6);
893 }
894 
895 
896 static const struct icmp6_err {
897 	int err;
898 	int fatal;
899 } tab_unreach[] = {
900 	{	/* NOROUTE */
901 		.err	= ENETUNREACH,
902 		.fatal	= 0,
903 	},
904 	{	/* ADM_PROHIBITED */
905 		.err	= EACCES,
906 		.fatal	= 1,
907 	},
908 	{	/* Was NOT_NEIGHBOUR, now reserved */
909 		.err	= EHOSTUNREACH,
910 		.fatal	= 0,
911 	},
912 	{	/* ADDR_UNREACH	*/
913 		.err	= EHOSTUNREACH,
914 		.fatal	= 0,
915 	},
916 	{	/* PORT_UNREACH	*/
917 		.err	= ECONNREFUSED,
918 		.fatal	= 1,
919 	},
920 };
921 
922 int icmpv6_err_convert(u8 type, u8 code, int *err)
923 {
924 	int fatal = 0;
925 
926 	*err = EPROTO;
927 
928 	switch (type) {
929 	case ICMPV6_DEST_UNREACH:
930 		fatal = 1;
931 		if (code <= ICMPV6_PORT_UNREACH) {
932 			*err  = tab_unreach[code].err;
933 			fatal = tab_unreach[code].fatal;
934 		}
935 		break;
936 
937 	case ICMPV6_PKT_TOOBIG:
938 		*err = EMSGSIZE;
939 		break;
940 
941 	case ICMPV6_PARAMPROB:
942 		*err = EPROTO;
943 		fatal = 1;
944 		break;
945 
946 	case ICMPV6_TIME_EXCEED:
947 		*err = EHOSTUNREACH;
948 		break;
949 	}
950 
951 	return fatal;
952 }
953 EXPORT_SYMBOL(icmpv6_err_convert);
954 
955 #ifdef CONFIG_SYSCTL
956 ctl_table ipv6_icmp_table_template[] = {
957 	{
958 		.procname	= "ratelimit",
959 		.data		= &init_net.ipv6.sysctl.icmpv6_time,
960 		.maxlen		= sizeof(int),
961 		.mode		= 0644,
962 		.proc_handler	= proc_dointvec_ms_jiffies,
963 	},
964 	{ },
965 };
966 
967 struct ctl_table * __net_init ipv6_icmp_sysctl_init(struct net *net)
968 {
969 	struct ctl_table *table;
970 
971 	table = kmemdup(ipv6_icmp_table_template,
972 			sizeof(ipv6_icmp_table_template),
973 			GFP_KERNEL);
974 
975 	if (table)
976 		table[0].data = &net->ipv6.sysctl.icmpv6_time;
977 
978 	return table;
979 }
980 #endif
981 
982