1 // SPDX-License-Identifier: GPL-2.0-or-later
2 /*
3 * Internet Control Message Protocol (ICMPv6)
4 * Linux INET6 implementation
5 *
6 * Authors:
7 * Pedro Roque <roque@di.fc.ul.pt>
8 *
9 * Based on net/ipv4/icmp.c
10 *
11 * RFC 1885
12 */
13
14 /*
15 * Changes:
16 *
17 * Andi Kleen : exception handling
18 * Andi Kleen add rate limits. never reply to a icmp.
19 * add more length checks and other fixes.
20 * yoshfuji : ensure to sent parameter problem for
21 * fragments.
22 * YOSHIFUJI Hideaki @USAGI: added sysctl for icmp rate limit.
23 * Randy Dunlap and
24 * YOSHIFUJI Hideaki @USAGI: Per-interface statistics support
25 * Kazunori MIYAZAWA @USAGI: change output process to use ip6_append_data
26 */
27
28 #define pr_fmt(fmt) "IPv6: " fmt
29
30 #include <linux/module.h>
31 #include <linux/errno.h>
32 #include <linux/types.h>
33 #include <linux/socket.h>
34 #include <linux/in.h>
35 #include <linux/kernel.h>
36 #include <linux/sockios.h>
37 #include <linux/net.h>
38 #include <linux/skbuff.h>
39 #include <linux/init.h>
40 #include <linux/netfilter.h>
41 #include <linux/slab.h>
42
43 #ifdef CONFIG_SYSCTL
44 #include <linux/sysctl.h>
45 #endif
46
47 #include <linux/inet.h>
48 #include <linux/netdevice.h>
49 #include <linux/icmpv6.h>
50
51 #include <net/ip.h>
52 #include <net/sock.h>
53
54 #include <net/ipv6.h>
55 #include <net/ip6_checksum.h>
56 #include <net/ping.h>
57 #include <net/protocol.h>
58 #include <net/raw.h>
59 #include <net/rawv6.h>
60 #include <net/seg6.h>
61 #include <net/transp_v6.h>
62 #include <net/ip6_route.h>
63 #include <net/addrconf.h>
64 #include <net/icmp.h>
65 #include <net/xfrm.h>
66 #include <net/inet_common.h>
67 #include <net/dsfield.h>
68 #include <net/l3mdev.h>
69
70 #include <linux/uaccess.h>
71
72 static DEFINE_PER_CPU(struct sock *, ipv6_icmp_sk);
73
icmpv6_err(struct sk_buff * skb,struct inet6_skb_parm * opt,u8 type,u8 code,int offset,__be32 info)74 static int icmpv6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
75 u8 type, u8 code, int offset, __be32 info)
76 {
77 /* icmpv6_notify checks 8 bytes can be pulled, icmp6hdr is 8 bytes */
78 struct icmp6hdr *icmp6 = (struct icmp6hdr *) (skb->data + offset);
79 struct net *net = dev_net_rcu(skb->dev);
80
81 if (type == ICMPV6_PKT_TOOBIG)
82 ip6_update_pmtu(skb, net, info, skb->dev->ifindex, 0, sock_net_uid(net, NULL));
83 else if (type == NDISC_REDIRECT)
84 ip6_redirect(skb, net, skb->dev->ifindex, 0,
85 sock_net_uid(net, NULL));
86
87 if (!(type & ICMPV6_INFOMSG_MASK))
88 if (icmp6->icmp6_type == ICMPV6_ECHO_REQUEST)
89 ping_err(skb, offset, ntohl(info));
90
91 return 0;
92 }
93
94 static int icmpv6_rcv(struct sk_buff *skb);
95
96 static const struct inet6_protocol icmpv6_protocol = {
97 .handler = icmpv6_rcv,
98 .err_handler = icmpv6_err,
99 .flags = INET6_PROTO_NOPOLICY|INET6_PROTO_FINAL,
100 };
101
102 /* Called with BH disabled */
icmpv6_xmit_lock(struct net * net)103 static struct sock *icmpv6_xmit_lock(struct net *net)
104 {
105 struct sock *sk;
106
107 sk = this_cpu_read(ipv6_icmp_sk);
108 if (unlikely(!spin_trylock(&sk->sk_lock.slock))) {
109 /* This can happen if the output path (f.e. SIT or
110 * ip6ip6 tunnel) signals dst_link_failure() for an
111 * outgoing ICMP6 packet.
112 */
113 return NULL;
114 }
115 sock_net_set(sk, net);
116 return sk;
117 }
118
icmpv6_xmit_unlock(struct sock * sk)119 static void icmpv6_xmit_unlock(struct sock *sk)
120 {
121 sock_net_set(sk, &init_net);
122 spin_unlock(&sk->sk_lock.slock);
123 }
124
125 /*
126 * Figure out, may we reply to this packet with icmp error.
127 *
128 * We do not reply, if:
129 * - it was icmp error message.
130 * - it is truncated, so that it is known, that protocol is ICMPV6
131 * (i.e. in the middle of some exthdr)
132 *
133 * --ANK (980726)
134 */
135
is_ineligible(const struct sk_buff * skb)136 static bool is_ineligible(const struct sk_buff *skb)
137 {
138 int ptr = (u8 *)(ipv6_hdr(skb) + 1) - skb->data;
139 int len = skb->len - ptr;
140 __u8 nexthdr = ipv6_hdr(skb)->nexthdr;
141 __be16 frag_off;
142
143 if (len < 0)
144 return true;
145
146 ptr = ipv6_skip_exthdr(skb, ptr, &nexthdr, &frag_off);
147 if (ptr < 0)
148 return false;
149 if (nexthdr == IPPROTO_ICMPV6) {
150 u8 _type, *tp;
151 tp = skb_header_pointer(skb,
152 ptr+offsetof(struct icmp6hdr, icmp6_type),
153 sizeof(_type), &_type);
154
155 /* Based on RFC 8200, Section 4.5 Fragment Header, return
156 * false if this is a fragment packet with no icmp header info.
157 */
158 if (!tp && frag_off != 0)
159 return false;
160 else if (!tp || !(*tp & ICMPV6_INFOMSG_MASK))
161 return true;
162 }
163 return false;
164 }
165
icmpv6_mask_allow(struct net * net,int type)166 static bool icmpv6_mask_allow(struct net *net, int type)
167 {
168 if (type > ICMPV6_MSG_MAX)
169 return true;
170
171 /* Limit if icmp type is set in ratemask. */
172 if (!test_bit(type, net->ipv6.sysctl.icmpv6_ratemask))
173 return true;
174
175 return false;
176 }
177
icmpv6_global_allow(struct net * net,int type,bool * apply_ratelimit)178 static bool icmpv6_global_allow(struct net *net, int type,
179 bool *apply_ratelimit)
180 {
181 if (icmpv6_mask_allow(net, type))
182 return true;
183
184 if (icmp_global_allow(net)) {
185 *apply_ratelimit = true;
186 return true;
187 }
188 __ICMP_INC_STATS(net, ICMP_MIB_RATELIMITGLOBAL);
189 return false;
190 }
191
192 /*
193 * Check the ICMP output rate limit
194 */
icmpv6_xrlim_allow(struct sock * sk,u8 type,struct flowi6 * fl6,bool apply_ratelimit)195 static bool icmpv6_xrlim_allow(struct sock *sk, u8 type,
196 struct flowi6 *fl6, bool apply_ratelimit)
197 {
198 struct net *net = sock_net(sk);
199 struct net_device *dev;
200 struct dst_entry *dst;
201 bool res = false;
202
203 if (!apply_ratelimit)
204 return true;
205
206 /*
207 * Look up the output route.
208 * XXX: perhaps the expire for routing entries cloned by
209 * this lookup should be more aggressive (not longer than timeout).
210 */
211 dst = ip6_route_output(net, sk, fl6);
212 dev = dst_dev(dst);
213 if (dst->error) {
214 IP6_INC_STATS(net, ip6_dst_idev(dst),
215 IPSTATS_MIB_OUTNOROUTES);
216 } else if (dev && (dev->flags & IFF_LOOPBACK)) {
217 res = true;
218 } else {
219 struct rt6_info *rt = dst_rt6_info(dst);
220 int tmo = net->ipv6.sysctl.icmpv6_time;
221 struct inet_peer *peer;
222
223 /* Give more bandwidth to wider prefixes. */
224 if (rt->rt6i_dst.plen < 128)
225 tmo >>= ((128 - rt->rt6i_dst.plen)>>5);
226
227 rcu_read_lock();
228 peer = inet_getpeer_v6(net->ipv6.peers, &fl6->daddr);
229 res = inet_peer_xrlim_allow(peer, tmo);
230 rcu_read_unlock();
231 }
232 if (!res)
233 __ICMP6_INC_STATS(net, ip6_dst_idev(dst),
234 ICMP6_MIB_RATELIMITHOST);
235 else
236 icmp_global_consume(net);
237 dst_release(dst);
238 return res;
239 }
240
icmpv6_rt_has_prefsrc(struct sock * sk,u8 type,struct flowi6 * fl6)241 static bool icmpv6_rt_has_prefsrc(struct sock *sk, u8 type,
242 struct flowi6 *fl6)
243 {
244 struct net *net = sock_net(sk);
245 struct dst_entry *dst;
246 bool res = false;
247
248 dst = ip6_route_output(net, sk, fl6);
249 if (!dst->error) {
250 struct rt6_info *rt = dst_rt6_info(dst);
251 struct in6_addr prefsrc;
252
253 rt6_get_prefsrc(rt, &prefsrc);
254 res = !ipv6_addr_any(&prefsrc);
255 }
256 dst_release(dst);
257 return res;
258 }
259
260 /*
261 * an inline helper for the "simple" if statement below
262 * checks if parameter problem report is caused by an
263 * unrecognized IPv6 option that has the Option Type
264 * highest-order two bits set to 10
265 */
266
opt_unrec(struct sk_buff * skb,__u32 offset)267 static bool opt_unrec(struct sk_buff *skb, __u32 offset)
268 {
269 u8 _optval, *op;
270
271 offset += skb_network_offset(skb);
272 op = skb_header_pointer(skb, offset, sizeof(_optval), &_optval);
273 if (!op)
274 return true;
275 return (*op & 0xC0) == 0x80;
276 }
277
icmpv6_push_pending_frames(struct sock * sk,struct flowi6 * fl6,struct icmp6hdr * thdr,int len)278 void icmpv6_push_pending_frames(struct sock *sk, struct flowi6 *fl6,
279 struct icmp6hdr *thdr, int len)
280 {
281 struct sk_buff *skb;
282 struct icmp6hdr *icmp6h;
283
284 skb = skb_peek(&sk->sk_write_queue);
285 if (!skb)
286 return;
287
288 icmp6h = icmp6_hdr(skb);
289 memcpy(icmp6h, thdr, sizeof(struct icmp6hdr));
290 icmp6h->icmp6_cksum = 0;
291
292 if (skb_queue_len(&sk->sk_write_queue) == 1) {
293 skb->csum = csum_partial(icmp6h,
294 sizeof(struct icmp6hdr), skb->csum);
295 icmp6h->icmp6_cksum = csum_ipv6_magic(&fl6->saddr,
296 &fl6->daddr,
297 len, fl6->flowi6_proto,
298 skb->csum);
299 } else {
300 __wsum tmp_csum = 0;
301
302 skb_queue_walk(&sk->sk_write_queue, skb) {
303 tmp_csum = csum_add(tmp_csum, skb->csum);
304 }
305
306 tmp_csum = csum_partial(icmp6h,
307 sizeof(struct icmp6hdr), tmp_csum);
308 icmp6h->icmp6_cksum = csum_ipv6_magic(&fl6->saddr,
309 &fl6->daddr,
310 len, fl6->flowi6_proto,
311 tmp_csum);
312 }
313 ip6_push_pending_frames(sk);
314 }
315
316 struct icmpv6_msg {
317 struct sk_buff *skb;
318 int offset;
319 uint8_t type;
320 };
321
icmpv6_getfrag(void * from,char * to,int offset,int len,int odd,struct sk_buff * skb)322 static int icmpv6_getfrag(void *from, char *to, int offset, int len, int odd, struct sk_buff *skb)
323 {
324 struct icmpv6_msg *msg = (struct icmpv6_msg *) from;
325 struct sk_buff *org_skb = msg->skb;
326 __wsum csum;
327
328 csum = skb_copy_and_csum_bits(org_skb, msg->offset + offset,
329 to, len);
330 skb->csum = csum_block_add(skb->csum, csum, odd);
331 if (!(msg->type & ICMPV6_INFOMSG_MASK))
332 nf_ct_attach(skb, org_skb);
333 return 0;
334 }
335
336 #if IS_ENABLED(CONFIG_IPV6_MIP6)
mip6_addr_swap(struct sk_buff * skb,const struct inet6_skb_parm * opt)337 static void mip6_addr_swap(struct sk_buff *skb, const struct inet6_skb_parm *opt)
338 {
339 struct ipv6hdr *iph = ipv6_hdr(skb);
340 struct ipv6_destopt_hao *hao;
341 int off;
342
343 if (opt->dsthao) {
344 off = ipv6_find_tlv(skb, opt->dsthao, IPV6_TLV_HAO);
345 if (likely(off >= 0)) {
346 hao = (struct ipv6_destopt_hao *)
347 (skb_network_header(skb) + off);
348 swap(iph->saddr, hao->addr);
349 }
350 }
351 }
352 #else
mip6_addr_swap(struct sk_buff * skb,const struct inet6_skb_parm * opt)353 static inline void mip6_addr_swap(struct sk_buff *skb, const struct inet6_skb_parm *opt) {}
354 #endif
355
icmpv6_route_lookup(struct net * net,struct sk_buff * skb,struct sock * sk,struct flowi6 * fl6)356 static struct dst_entry *icmpv6_route_lookup(struct net *net,
357 struct sk_buff *skb,
358 struct sock *sk,
359 struct flowi6 *fl6)
360 {
361 struct dst_entry *dst, *dst2;
362 struct flowi6 fl2;
363 int err;
364
365 err = ip6_dst_lookup(net, sk, &dst, fl6);
366 if (err)
367 return ERR_PTR(err);
368
369 /*
370 * We won't send icmp if the destination is known
371 * anycast unless we need to treat anycast as unicast.
372 */
373 if (!READ_ONCE(net->ipv6.sysctl.icmpv6_error_anycast_as_unicast) &&
374 ipv6_anycast_destination(dst, &fl6->daddr)) {
375 net_dbg_ratelimited("icmp6_send: acast source\n");
376 dst_release(dst);
377 return ERR_PTR(-EINVAL);
378 }
379
380 /* No need to clone since we're just using its address. */
381 dst2 = dst;
382
383 dst = xfrm_lookup(net, dst, flowi6_to_flowi(fl6), sk, 0);
384 if (!IS_ERR(dst)) {
385 if (dst != dst2)
386 return dst;
387 } else {
388 if (PTR_ERR(dst) == -EPERM)
389 dst = NULL;
390 else
391 return dst;
392 }
393
394 err = xfrm_decode_session_reverse(net, skb, flowi6_to_flowi(&fl2), AF_INET6);
395 if (err)
396 goto relookup_failed;
397
398 err = ip6_dst_lookup(net, sk, &dst2, &fl2);
399 if (err)
400 goto relookup_failed;
401
402 dst2 = xfrm_lookup(net, dst2, flowi6_to_flowi(&fl2), sk, XFRM_LOOKUP_ICMP);
403 if (!IS_ERR(dst2)) {
404 dst_release(dst);
405 dst = dst2;
406 } else {
407 err = PTR_ERR(dst2);
408 if (err == -EPERM) {
409 dst_release(dst);
410 return dst2;
411 } else
412 goto relookup_failed;
413 }
414
415 relookup_failed:
416 if (dst)
417 return dst;
418 return ERR_PTR(err);
419 }
420
icmp6_dev(const struct sk_buff * skb)421 static struct net_device *icmp6_dev(const struct sk_buff *skb)
422 {
423 struct net_device *dev = skb->dev;
424
425 /* for local traffic to local address, skb dev is the loopback
426 * device. Check if there is a dst attached to the skb and if so
427 * get the real device index. Same is needed for replies to a link
428 * local address on a device enslaved to an L3 master device
429 */
430 if (unlikely(dev->ifindex == LOOPBACK_IFINDEX || netif_is_l3_master(skb->dev))) {
431 const struct rt6_info *rt6 = skb_rt6_info(skb);
432
433 /* The destination could be an external IP in Ext Hdr (SRv6, RPL, etc.),
434 * and ip6_null_entry could be set to skb if no route is found.
435 */
436 if (rt6 && rt6->rt6i_idev)
437 dev = rt6->rt6i_idev->dev;
438 }
439
440 return dev;
441 }
442
icmp6_iif(const struct sk_buff * skb)443 static int icmp6_iif(const struct sk_buff *skb)
444 {
445 return icmp6_dev(skb)->ifindex;
446 }
447
448 /*
449 * Send an ICMP message in response to a packet in error
450 */
icmp6_send(struct sk_buff * skb,u8 type,u8 code,__u32 info,const struct in6_addr * force_saddr,const struct inet6_skb_parm * parm)451 void icmp6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info,
452 const struct in6_addr *force_saddr,
453 const struct inet6_skb_parm *parm)
454 {
455 struct inet6_dev *idev = NULL;
456 struct ipv6hdr *hdr = ipv6_hdr(skb);
457 struct sock *sk;
458 struct net *net;
459 struct ipv6_pinfo *np;
460 const struct in6_addr *saddr = NULL;
461 bool apply_ratelimit = false;
462 struct dst_entry *dst;
463 struct icmp6hdr tmp_hdr;
464 struct flowi6 fl6;
465 struct icmpv6_msg msg;
466 struct ipcm6_cookie ipc6;
467 int iif = 0;
468 int addr_type = 0;
469 int len;
470 u32 mark;
471
472 if ((u8 *)hdr < skb->head ||
473 (skb_network_header(skb) + sizeof(*hdr)) > skb_tail_pointer(skb))
474 return;
475
476 if (!skb->dev)
477 return;
478
479 rcu_read_lock();
480
481 net = dev_net_rcu(skb->dev);
482 mark = IP6_REPLY_MARK(net, skb->mark);
483 /*
484 * Make sure we respect the rules
485 * i.e. RFC 1885 2.4(e)
486 * Rule (e.1) is enforced by not using icmp6_send
487 * in any code that processes icmp errors.
488 */
489 addr_type = ipv6_addr_type(&hdr->daddr);
490
491 if (ipv6_chk_addr(net, &hdr->daddr, skb->dev, 0) ||
492 ipv6_chk_acast_addr_src(net, skb->dev, &hdr->daddr))
493 saddr = &hdr->daddr;
494
495 /*
496 * Dest addr check
497 */
498
499 if (addr_type & IPV6_ADDR_MULTICAST || skb->pkt_type != PACKET_HOST) {
500 if (type != ICMPV6_PKT_TOOBIG &&
501 !(type == ICMPV6_PARAMPROB &&
502 code == ICMPV6_UNK_OPTION &&
503 (opt_unrec(skb, info))))
504 goto out;
505
506 saddr = NULL;
507 }
508
509 addr_type = ipv6_addr_type(&hdr->saddr);
510
511 /*
512 * Source addr check
513 */
514
515 if (__ipv6_addr_needs_scope_id(addr_type)) {
516 iif = icmp6_iif(skb);
517 } else {
518 /*
519 * The source device is used for looking up which routing table
520 * to use for sending an ICMP error.
521 */
522 iif = l3mdev_master_ifindex(skb->dev);
523 }
524
525 /*
526 * Must not send error if the source does not uniquely
527 * identify a single node (RFC2463 Section 2.4).
528 * We check unspecified / multicast addresses here,
529 * and anycast addresses will be checked later.
530 */
531 if ((addr_type == IPV6_ADDR_ANY) || (addr_type & IPV6_ADDR_MULTICAST)) {
532 net_dbg_ratelimited("icmp6_send: addr_any/mcast source [%pI6c > %pI6c]\n",
533 &hdr->saddr, &hdr->daddr);
534 goto out;
535 }
536
537 /*
538 * Never answer to a ICMP packet.
539 */
540 if (is_ineligible(skb)) {
541 net_dbg_ratelimited("icmp6_send: no reply to icmp error [%pI6c > %pI6c]\n",
542 &hdr->saddr, &hdr->daddr);
543 goto out;
544 }
545
546 /* Needed by both icmpv6_global_allow and icmpv6_xmit_lock */
547 local_bh_disable();
548
549 /* Check global sysctl_icmp_msgs_per_sec ratelimit */
550 if (!(skb->dev->flags & IFF_LOOPBACK) &&
551 !icmpv6_global_allow(net, type, &apply_ratelimit))
552 goto out_bh_enable;
553
554 mip6_addr_swap(skb, parm);
555
556 sk = icmpv6_xmit_lock(net);
557 if (!sk)
558 goto out_bh_enable;
559
560 memset(&fl6, 0, sizeof(fl6));
561 fl6.flowi6_proto = IPPROTO_ICMPV6;
562 fl6.daddr = hdr->saddr;
563 if (force_saddr)
564 saddr = force_saddr;
565 if (saddr) {
566 fl6.saddr = *saddr;
567 } else if (!icmpv6_rt_has_prefsrc(sk, type, &fl6)) {
568 /* select a more meaningful saddr from input if */
569 struct net_device *in_netdev;
570
571 in_netdev = dev_get_by_index(net, parm->iif);
572 if (in_netdev) {
573 ipv6_dev_get_saddr(net, in_netdev, &fl6.daddr,
574 inet6_sk(sk)->srcprefs,
575 &fl6.saddr);
576 dev_put(in_netdev);
577 }
578 }
579 fl6.flowi6_mark = mark;
580 fl6.flowi6_oif = iif;
581 fl6.fl6_icmp_type = type;
582 fl6.fl6_icmp_code = code;
583 fl6.flowi6_uid = sock_net_uid(net, NULL);
584 fl6.mp_hash = rt6_multipath_hash(net, &fl6, skb, NULL);
585 security_skb_classify_flow(skb, flowi6_to_flowi_common(&fl6));
586
587 np = inet6_sk(sk);
588
589 if (!icmpv6_xrlim_allow(sk, type, &fl6, apply_ratelimit))
590 goto out_unlock;
591
592 tmp_hdr.icmp6_type = type;
593 tmp_hdr.icmp6_code = code;
594 tmp_hdr.icmp6_cksum = 0;
595 tmp_hdr.icmp6_pointer = htonl(info);
596
597 if (!fl6.flowi6_oif && ipv6_addr_is_multicast(&fl6.daddr))
598 fl6.flowi6_oif = READ_ONCE(np->mcast_oif);
599 else if (!fl6.flowi6_oif)
600 fl6.flowi6_oif = READ_ONCE(np->ucast_oif);
601
602 ipcm6_init_sk(&ipc6, sk);
603 ipc6.sockc.mark = mark;
604 fl6.flowlabel = ip6_make_flowinfo(ipc6.tclass, fl6.flowlabel);
605
606 dst = icmpv6_route_lookup(net, skb, sk, &fl6);
607 if (IS_ERR(dst))
608 goto out_unlock;
609
610 ipc6.hlimit = ip6_sk_dst_hoplimit(np, &fl6, dst);
611
612 msg.skb = skb;
613 msg.offset = skb_network_offset(skb);
614 msg.type = type;
615
616 len = skb->len - msg.offset;
617 len = min_t(unsigned int, len, IPV6_MIN_MTU - sizeof(struct ipv6hdr) - sizeof(struct icmp6hdr));
618 if (len < 0) {
619 net_dbg_ratelimited("icmp: len problem [%pI6c > %pI6c]\n",
620 &hdr->saddr, &hdr->daddr);
621 goto out_dst_release;
622 }
623
624 idev = __in6_dev_get(skb->dev);
625
626 if (ip6_append_data(sk, icmpv6_getfrag, &msg,
627 len + sizeof(struct icmp6hdr),
628 sizeof(struct icmp6hdr),
629 &ipc6, &fl6, dst_rt6_info(dst),
630 MSG_DONTWAIT)) {
631 ICMP6_INC_STATS(net, idev, ICMP6_MIB_OUTERRORS);
632 ip6_flush_pending_frames(sk);
633 } else {
634 icmpv6_push_pending_frames(sk, &fl6, &tmp_hdr,
635 len + sizeof(struct icmp6hdr));
636 }
637
638 out_dst_release:
639 dst_release(dst);
640 out_unlock:
641 icmpv6_xmit_unlock(sk);
642 out_bh_enable:
643 local_bh_enable();
644 out:
645 rcu_read_unlock();
646 }
647 EXPORT_SYMBOL(icmp6_send);
648
649 /* Slightly more convenient version of icmp6_send with drop reasons.
650 */
icmpv6_param_prob_reason(struct sk_buff * skb,u8 code,int pos,enum skb_drop_reason reason)651 void icmpv6_param_prob_reason(struct sk_buff *skb, u8 code, int pos,
652 enum skb_drop_reason reason)
653 {
654 icmp6_send(skb, ICMPV6_PARAMPROB, code, pos, NULL, IP6CB(skb));
655 kfree_skb_reason(skb, reason);
656 }
657
658 /* Generate icmpv6 with type/code ICMPV6_DEST_UNREACH/ICMPV6_ADDR_UNREACH
659 * if sufficient data bytes are available
660 * @nhs is the size of the tunnel header(s) :
661 * Either an IPv4 header for SIT encap
662 * an IPv4 header + GRE header for GRE encap
663 */
ip6_err_gen_icmpv6_unreach(struct sk_buff * skb,int nhs,int type,unsigned int data_len)664 int ip6_err_gen_icmpv6_unreach(struct sk_buff *skb, int nhs, int type,
665 unsigned int data_len)
666 {
667 struct in6_addr temp_saddr;
668 struct rt6_info *rt;
669 struct sk_buff *skb2;
670 u32 info = 0;
671
672 if (!pskb_may_pull(skb, nhs + sizeof(struct ipv6hdr) + 8))
673 return 1;
674
675 /* RFC 4884 (partial) support for ICMP extensions */
676 if (data_len < 128 || (data_len & 7) || skb->len < data_len)
677 data_len = 0;
678
679 skb2 = data_len ? skb_copy(skb, GFP_ATOMIC) : skb_clone(skb, GFP_ATOMIC);
680
681 if (!skb2)
682 return 1;
683
684 skb_dst_drop(skb2);
685 skb_pull(skb2, nhs);
686 skb_reset_network_header(skb2);
687
688 rt = rt6_lookup(dev_net_rcu(skb->dev), &ipv6_hdr(skb2)->saddr,
689 NULL, 0, skb, 0);
690
691 if (rt && rt->dst.dev)
692 skb2->dev = rt->dst.dev;
693
694 ipv6_addr_set_v4mapped(ip_hdr(skb)->saddr, &temp_saddr);
695
696 if (data_len) {
697 /* RFC 4884 (partial) support :
698 * insert 0 padding at the end, before the extensions
699 */
700 __skb_push(skb2, nhs);
701 skb_reset_network_header(skb2);
702 memmove(skb2->data, skb2->data + nhs, data_len - nhs);
703 memset(skb2->data + data_len - nhs, 0, nhs);
704 /* RFC 4884 4.5 : Length is measured in 64-bit words,
705 * and stored in reserved[0]
706 */
707 info = (data_len/8) << 24;
708 }
709 if (type == ICMP_TIME_EXCEEDED)
710 icmp6_send(skb2, ICMPV6_TIME_EXCEED, ICMPV6_EXC_HOPLIMIT,
711 info, &temp_saddr, IP6CB(skb2));
712 else
713 icmp6_send(skb2, ICMPV6_DEST_UNREACH, ICMPV6_ADDR_UNREACH,
714 info, &temp_saddr, IP6CB(skb2));
715 if (rt)
716 ip6_rt_put(rt);
717
718 kfree_skb(skb2);
719
720 return 0;
721 }
722 EXPORT_SYMBOL(ip6_err_gen_icmpv6_unreach);
723
icmpv6_echo_reply(struct sk_buff * skb)724 static enum skb_drop_reason icmpv6_echo_reply(struct sk_buff *skb)
725 {
726 struct net *net = dev_net_rcu(skb->dev);
727 struct sock *sk;
728 struct inet6_dev *idev;
729 struct ipv6_pinfo *np;
730 const struct in6_addr *saddr = NULL;
731 struct icmp6hdr *icmph = icmp6_hdr(skb);
732 bool apply_ratelimit = false;
733 struct icmp6hdr tmp_hdr;
734 struct flowi6 fl6;
735 struct icmpv6_msg msg;
736 struct dst_entry *dst;
737 struct ipcm6_cookie ipc6;
738 u32 mark = IP6_REPLY_MARK(net, skb->mark);
739 SKB_DR(reason);
740 bool acast;
741 u8 type;
742
743 if (ipv6_addr_is_multicast(&ipv6_hdr(skb)->daddr) &&
744 net->ipv6.sysctl.icmpv6_echo_ignore_multicast)
745 return reason;
746
747 saddr = &ipv6_hdr(skb)->daddr;
748
749 acast = ipv6_anycast_destination(skb_dst(skb), saddr);
750 if (acast && net->ipv6.sysctl.icmpv6_echo_ignore_anycast)
751 return reason;
752
753 if (!ipv6_unicast_destination(skb) &&
754 !(net->ipv6.sysctl.anycast_src_echo_reply && acast))
755 saddr = NULL;
756
757 if (icmph->icmp6_type == ICMPV6_EXT_ECHO_REQUEST)
758 type = ICMPV6_EXT_ECHO_REPLY;
759 else
760 type = ICMPV6_ECHO_REPLY;
761
762 memcpy(&tmp_hdr, icmph, sizeof(tmp_hdr));
763 tmp_hdr.icmp6_type = type;
764
765 memset(&fl6, 0, sizeof(fl6));
766 if (net->ipv6.sysctl.flowlabel_reflect & FLOWLABEL_REFLECT_ICMPV6_ECHO_REPLIES)
767 fl6.flowlabel = ip6_flowlabel(ipv6_hdr(skb));
768
769 fl6.flowi6_proto = IPPROTO_ICMPV6;
770 fl6.daddr = ipv6_hdr(skb)->saddr;
771 if (saddr)
772 fl6.saddr = *saddr;
773 fl6.flowi6_oif = icmp6_iif(skb);
774 fl6.fl6_icmp_type = type;
775 fl6.flowi6_mark = mark;
776 fl6.flowi6_uid = sock_net_uid(net, NULL);
777 security_skb_classify_flow(skb, flowi6_to_flowi_common(&fl6));
778
779 local_bh_disable();
780 sk = icmpv6_xmit_lock(net);
781 if (!sk)
782 goto out_bh_enable;
783 np = inet6_sk(sk);
784
785 if (!fl6.flowi6_oif && ipv6_addr_is_multicast(&fl6.daddr))
786 fl6.flowi6_oif = READ_ONCE(np->mcast_oif);
787 else if (!fl6.flowi6_oif)
788 fl6.flowi6_oif = READ_ONCE(np->ucast_oif);
789
790 if (ip6_dst_lookup(net, sk, &dst, &fl6))
791 goto out;
792 dst = xfrm_lookup(net, dst, flowi6_to_flowi(&fl6), sk, 0);
793 if (IS_ERR(dst))
794 goto out;
795
796 /* Check the ratelimit */
797 if ((!(skb->dev->flags & IFF_LOOPBACK) &&
798 !icmpv6_global_allow(net, ICMPV6_ECHO_REPLY, &apply_ratelimit)) ||
799 !icmpv6_xrlim_allow(sk, ICMPV6_ECHO_REPLY, &fl6, apply_ratelimit))
800 goto out_dst_release;
801
802 idev = __in6_dev_get(skb->dev);
803
804 msg.skb = skb;
805 msg.offset = 0;
806 msg.type = type;
807
808 ipcm6_init_sk(&ipc6, sk);
809 ipc6.hlimit = ip6_sk_dst_hoplimit(np, &fl6, dst);
810 ipc6.tclass = ipv6_get_dsfield(ipv6_hdr(skb));
811 ipc6.sockc.mark = mark;
812
813 if (icmph->icmp6_type == ICMPV6_EXT_ECHO_REQUEST)
814 if (!icmp_build_probe(skb, (struct icmphdr *)&tmp_hdr))
815 goto out_dst_release;
816
817 if (ip6_append_data(sk, icmpv6_getfrag, &msg,
818 skb->len + sizeof(struct icmp6hdr),
819 sizeof(struct icmp6hdr), &ipc6, &fl6,
820 dst_rt6_info(dst), MSG_DONTWAIT)) {
821 __ICMP6_INC_STATS(net, idev, ICMP6_MIB_OUTERRORS);
822 ip6_flush_pending_frames(sk);
823 } else {
824 icmpv6_push_pending_frames(sk, &fl6, &tmp_hdr,
825 skb->len + sizeof(struct icmp6hdr));
826 reason = SKB_CONSUMED;
827 }
828 out_dst_release:
829 dst_release(dst);
830 out:
831 icmpv6_xmit_unlock(sk);
832 out_bh_enable:
833 local_bh_enable();
834 return reason;
835 }
836
icmpv6_notify(struct sk_buff * skb,u8 type,u8 code,__be32 info)837 enum skb_drop_reason icmpv6_notify(struct sk_buff *skb, u8 type,
838 u8 code, __be32 info)
839 {
840 struct inet6_skb_parm *opt = IP6CB(skb);
841 struct net *net = dev_net_rcu(skb->dev);
842 const struct inet6_protocol *ipprot;
843 enum skb_drop_reason reason;
844 int inner_offset;
845 __be16 frag_off;
846 u8 nexthdr;
847
848 reason = pskb_may_pull_reason(skb, sizeof(struct ipv6hdr));
849 if (reason != SKB_NOT_DROPPED_YET)
850 goto out;
851
852 seg6_icmp_srh(skb, opt);
853
854 nexthdr = ((struct ipv6hdr *)skb->data)->nexthdr;
855 if (ipv6_ext_hdr(nexthdr)) {
856 /* now skip over extension headers */
857 inner_offset = ipv6_skip_exthdr(skb, sizeof(struct ipv6hdr),
858 &nexthdr, &frag_off);
859 if (inner_offset < 0) {
860 SKB_DR_SET(reason, IPV6_BAD_EXTHDR);
861 goto out;
862 }
863 } else {
864 inner_offset = sizeof(struct ipv6hdr);
865 }
866
867 /* Checkin header including 8 bytes of inner protocol header. */
868 reason = pskb_may_pull_reason(skb, inner_offset + 8);
869 if (reason != SKB_NOT_DROPPED_YET)
870 goto out;
871
872 /* BUGGG_FUTURE: we should try to parse exthdrs in this packet.
873 Without this we will not able f.e. to make source routed
874 pmtu discovery.
875 Corresponding argument (opt) to notifiers is already added.
876 --ANK (980726)
877 */
878
879 ipprot = rcu_dereference(inet6_protos[nexthdr]);
880 if (ipprot && ipprot->err_handler)
881 ipprot->err_handler(skb, opt, type, code, inner_offset, info);
882
883 raw6_icmp_error(skb, nexthdr, type, code, inner_offset, info);
884 return SKB_CONSUMED;
885
886 out:
887 __ICMP6_INC_STATS(net, __in6_dev_get(skb->dev), ICMP6_MIB_INERRORS);
888 return reason;
889 }
890
891 /*
892 * Handle icmp messages
893 */
894
icmpv6_rcv(struct sk_buff * skb)895 static int icmpv6_rcv(struct sk_buff *skb)
896 {
897 enum skb_drop_reason reason = SKB_DROP_REASON_NOT_SPECIFIED;
898 struct net *net = dev_net_rcu(skb->dev);
899 struct net_device *dev = icmp6_dev(skb);
900 struct inet6_dev *idev = __in6_dev_get(dev);
901 const struct in6_addr *saddr, *daddr;
902 struct icmp6hdr *hdr;
903 u8 type;
904
905 if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb)) {
906 struct sec_path *sp = skb_sec_path(skb);
907 int nh;
908
909 if (!(sp && sp->xvec[sp->len - 1]->props.flags &
910 XFRM_STATE_ICMP)) {
911 reason = SKB_DROP_REASON_XFRM_POLICY;
912 goto drop_no_count;
913 }
914
915 if (!pskb_may_pull(skb, sizeof(*hdr) + sizeof(struct ipv6hdr)))
916 goto drop_no_count;
917
918 nh = skb_network_offset(skb);
919 skb_set_network_header(skb, sizeof(*hdr));
920
921 if (!xfrm6_policy_check_reverse(NULL, XFRM_POLICY_IN,
922 skb)) {
923 reason = SKB_DROP_REASON_XFRM_POLICY;
924 goto drop_no_count;
925 }
926
927 skb_set_network_header(skb, nh);
928 }
929
930 __ICMP6_INC_STATS(dev_net_rcu(dev), idev, ICMP6_MIB_INMSGS);
931
932 saddr = &ipv6_hdr(skb)->saddr;
933 daddr = &ipv6_hdr(skb)->daddr;
934
935 if (skb_checksum_validate(skb, IPPROTO_ICMPV6, ip6_compute_pseudo)) {
936 net_dbg_ratelimited("ICMPv6 checksum failed [%pI6c > %pI6c]\n",
937 saddr, daddr);
938 goto csum_error;
939 }
940
941 if (!pskb_pull(skb, sizeof(*hdr)))
942 goto discard_it;
943
944 hdr = icmp6_hdr(skb);
945
946 type = hdr->icmp6_type;
947
948 ICMP6MSGIN_INC_STATS(dev_net_rcu(dev), idev, type);
949
950 switch (type) {
951 case ICMPV6_ECHO_REQUEST:
952 if (!net->ipv6.sysctl.icmpv6_echo_ignore_all)
953 reason = icmpv6_echo_reply(skb);
954 break;
955 case ICMPV6_EXT_ECHO_REQUEST:
956 if (!net->ipv6.sysctl.icmpv6_echo_ignore_all &&
957 READ_ONCE(net->ipv4.sysctl_icmp_echo_enable_probe))
958 reason = icmpv6_echo_reply(skb);
959 break;
960
961 case ICMPV6_ECHO_REPLY:
962 case ICMPV6_EXT_ECHO_REPLY:
963 ping_rcv(skb);
964 return 0;
965
966 case ICMPV6_PKT_TOOBIG:
967 /* BUGGG_FUTURE: if packet contains rthdr, we cannot update
968 standard destination cache. Seems, only "advanced"
969 destination cache will allow to solve this problem
970 --ANK (980726)
971 */
972 if (!pskb_may_pull(skb, sizeof(struct ipv6hdr)))
973 goto discard_it;
974 hdr = icmp6_hdr(skb);
975
976 /* to notify */
977 fallthrough;
978 case ICMPV6_DEST_UNREACH:
979 case ICMPV6_TIME_EXCEED:
980 case ICMPV6_PARAMPROB:
981 reason = icmpv6_notify(skb, type, hdr->icmp6_code,
982 hdr->icmp6_mtu);
983 break;
984
985 case NDISC_ROUTER_SOLICITATION:
986 case NDISC_ROUTER_ADVERTISEMENT:
987 case NDISC_NEIGHBOUR_SOLICITATION:
988 case NDISC_NEIGHBOUR_ADVERTISEMENT:
989 case NDISC_REDIRECT:
990 reason = ndisc_rcv(skb);
991 break;
992
993 case ICMPV6_MGM_QUERY:
994 igmp6_event_query(skb);
995 return 0;
996
997 case ICMPV6_MGM_REPORT:
998 igmp6_event_report(skb);
999 return 0;
1000
1001 case ICMPV6_MGM_REDUCTION:
1002 case ICMPV6_NI_QUERY:
1003 case ICMPV6_NI_REPLY:
1004 case ICMPV6_MLD2_REPORT:
1005 case ICMPV6_DHAAD_REQUEST:
1006 case ICMPV6_DHAAD_REPLY:
1007 case ICMPV6_MOBILE_PREFIX_SOL:
1008 case ICMPV6_MOBILE_PREFIX_ADV:
1009 break;
1010
1011 default:
1012 /* informational */
1013 if (type & ICMPV6_INFOMSG_MASK)
1014 break;
1015
1016 net_dbg_ratelimited("icmpv6: msg of unknown type [%pI6c > %pI6c]\n",
1017 saddr, daddr);
1018
1019 /*
1020 * error of unknown type.
1021 * must pass to upper level
1022 */
1023
1024 reason = icmpv6_notify(skb, type, hdr->icmp6_code,
1025 hdr->icmp6_mtu);
1026 }
1027
1028 /* until the v6 path can be better sorted assume failure and
1029 * preserve the status quo behaviour for the rest of the paths to here
1030 */
1031 if (reason)
1032 kfree_skb_reason(skb, reason);
1033 else
1034 consume_skb(skb);
1035
1036 return 0;
1037
1038 csum_error:
1039 reason = SKB_DROP_REASON_ICMP_CSUM;
1040 __ICMP6_INC_STATS(dev_net_rcu(dev), idev, ICMP6_MIB_CSUMERRORS);
1041 discard_it:
1042 __ICMP6_INC_STATS(dev_net_rcu(dev), idev, ICMP6_MIB_INERRORS);
1043 drop_no_count:
1044 kfree_skb_reason(skb, reason);
1045 return 0;
1046 }
1047
icmpv6_flow_init(const struct sock * sk,struct flowi6 * fl6,u8 type,const struct in6_addr * saddr,const struct in6_addr * daddr,int oif)1048 void icmpv6_flow_init(const struct sock *sk, struct flowi6 *fl6, u8 type,
1049 const struct in6_addr *saddr,
1050 const struct in6_addr *daddr, int oif)
1051 {
1052 memset(fl6, 0, sizeof(*fl6));
1053 fl6->saddr = *saddr;
1054 fl6->daddr = *daddr;
1055 fl6->flowi6_proto = IPPROTO_ICMPV6;
1056 fl6->fl6_icmp_type = type;
1057 fl6->fl6_icmp_code = 0;
1058 fl6->flowi6_oif = oif;
1059 security_sk_classify_flow(sk, flowi6_to_flowi_common(fl6));
1060 }
1061
icmpv6_init(void)1062 int __init icmpv6_init(void)
1063 {
1064 struct sock *sk;
1065 int err, i;
1066
1067 for_each_possible_cpu(i) {
1068 err = inet_ctl_sock_create(&sk, PF_INET6,
1069 SOCK_RAW, IPPROTO_ICMPV6, &init_net);
1070 if (err < 0) {
1071 pr_err("Failed to initialize the ICMP6 control socket (err %d)\n",
1072 err);
1073 return err;
1074 }
1075
1076 per_cpu(ipv6_icmp_sk, i) = sk;
1077
1078 /* Enough space for 2 64K ICMP packets, including
1079 * sk_buff struct overhead.
1080 */
1081 sk->sk_sndbuf = 2 * SKB_TRUESIZE(64 * 1024);
1082 }
1083
1084 err = -EAGAIN;
1085 if (inet6_add_protocol(&icmpv6_protocol, IPPROTO_ICMPV6) < 0)
1086 goto fail;
1087
1088 err = inet6_register_icmp_sender(icmp6_send);
1089 if (err)
1090 goto sender_reg_err;
1091 return 0;
1092
1093 sender_reg_err:
1094 inet6_del_protocol(&icmpv6_protocol, IPPROTO_ICMPV6);
1095 fail:
1096 pr_err("Failed to register ICMP6 protocol\n");
1097 return err;
1098 }
1099
icmpv6_cleanup(void)1100 void icmpv6_cleanup(void)
1101 {
1102 inet6_unregister_icmp_sender(icmp6_send);
1103 inet6_del_protocol(&icmpv6_protocol, IPPROTO_ICMPV6);
1104 }
1105
1106
1107 static const struct icmp6_err {
1108 int err;
1109 int fatal;
1110 } tab_unreach[] = {
1111 { /* NOROUTE */
1112 .err = ENETUNREACH,
1113 .fatal = 0,
1114 },
1115 { /* ADM_PROHIBITED */
1116 .err = EACCES,
1117 .fatal = 1,
1118 },
1119 { /* Was NOT_NEIGHBOUR, now reserved */
1120 .err = EHOSTUNREACH,
1121 .fatal = 0,
1122 },
1123 { /* ADDR_UNREACH */
1124 .err = EHOSTUNREACH,
1125 .fatal = 0,
1126 },
1127 { /* PORT_UNREACH */
1128 .err = ECONNREFUSED,
1129 .fatal = 1,
1130 },
1131 { /* POLICY_FAIL */
1132 .err = EACCES,
1133 .fatal = 1,
1134 },
1135 { /* REJECT_ROUTE */
1136 .err = EACCES,
1137 .fatal = 1,
1138 },
1139 };
1140
icmpv6_err_convert(u8 type,u8 code,int * err)1141 int icmpv6_err_convert(u8 type, u8 code, int *err)
1142 {
1143 int fatal = 0;
1144
1145 *err = EPROTO;
1146
1147 switch (type) {
1148 case ICMPV6_DEST_UNREACH:
1149 fatal = 1;
1150 if (code < ARRAY_SIZE(tab_unreach)) {
1151 *err = tab_unreach[code].err;
1152 fatal = tab_unreach[code].fatal;
1153 }
1154 break;
1155
1156 case ICMPV6_PKT_TOOBIG:
1157 *err = EMSGSIZE;
1158 break;
1159
1160 case ICMPV6_PARAMPROB:
1161 *err = EPROTO;
1162 fatal = 1;
1163 break;
1164
1165 case ICMPV6_TIME_EXCEED:
1166 *err = EHOSTUNREACH;
1167 break;
1168 }
1169
1170 return fatal;
1171 }
1172 EXPORT_SYMBOL(icmpv6_err_convert);
1173
1174 #ifdef CONFIG_SYSCTL
1175 static struct ctl_table ipv6_icmp_table_template[] = {
1176 {
1177 .procname = "ratelimit",
1178 .data = &init_net.ipv6.sysctl.icmpv6_time,
1179 .maxlen = sizeof(int),
1180 .mode = 0644,
1181 .proc_handler = proc_dointvec_ms_jiffies,
1182 },
1183 {
1184 .procname = "echo_ignore_all",
1185 .data = &init_net.ipv6.sysctl.icmpv6_echo_ignore_all,
1186 .maxlen = sizeof(u8),
1187 .mode = 0644,
1188 .proc_handler = proc_dou8vec_minmax,
1189 },
1190 {
1191 .procname = "echo_ignore_multicast",
1192 .data = &init_net.ipv6.sysctl.icmpv6_echo_ignore_multicast,
1193 .maxlen = sizeof(u8),
1194 .mode = 0644,
1195 .proc_handler = proc_dou8vec_minmax,
1196 },
1197 {
1198 .procname = "echo_ignore_anycast",
1199 .data = &init_net.ipv6.sysctl.icmpv6_echo_ignore_anycast,
1200 .maxlen = sizeof(u8),
1201 .mode = 0644,
1202 .proc_handler = proc_dou8vec_minmax,
1203 },
1204 {
1205 .procname = "ratemask",
1206 .data = &init_net.ipv6.sysctl.icmpv6_ratemask_ptr,
1207 .maxlen = ICMPV6_MSG_MAX + 1,
1208 .mode = 0644,
1209 .proc_handler = proc_do_large_bitmap,
1210 },
1211 {
1212 .procname = "error_anycast_as_unicast",
1213 .data = &init_net.ipv6.sysctl.icmpv6_error_anycast_as_unicast,
1214 .maxlen = sizeof(u8),
1215 .mode = 0644,
1216 .proc_handler = proc_dou8vec_minmax,
1217 .extra1 = SYSCTL_ZERO,
1218 .extra2 = SYSCTL_ONE,
1219 },
1220 };
1221
ipv6_icmp_sysctl_init(struct net * net)1222 struct ctl_table * __net_init ipv6_icmp_sysctl_init(struct net *net)
1223 {
1224 struct ctl_table *table;
1225
1226 table = kmemdup(ipv6_icmp_table_template,
1227 sizeof(ipv6_icmp_table_template),
1228 GFP_KERNEL);
1229
1230 if (table) {
1231 table[0].data = &net->ipv6.sysctl.icmpv6_time;
1232 table[1].data = &net->ipv6.sysctl.icmpv6_echo_ignore_all;
1233 table[2].data = &net->ipv6.sysctl.icmpv6_echo_ignore_multicast;
1234 table[3].data = &net->ipv6.sysctl.icmpv6_echo_ignore_anycast;
1235 table[4].data = &net->ipv6.sysctl.icmpv6_ratemask_ptr;
1236 table[5].data = &net->ipv6.sysctl.icmpv6_error_anycast_as_unicast;
1237 }
1238 return table;
1239 }
1240
ipv6_icmp_sysctl_table_size(void)1241 size_t ipv6_icmp_sysctl_table_size(void)
1242 {
1243 return ARRAY_SIZE(ipv6_icmp_table_template);
1244 }
1245 #endif
1246