1 // SPDX-License-Identifier: GPL-2.0-or-later 2 /* 3 * Internet Control Message Protocol (ICMPv6) 4 * Linux INET6 implementation 5 * 6 * Authors: 7 * Pedro Roque <roque@di.fc.ul.pt> 8 * 9 * Based on net/ipv4/icmp.c 10 * 11 * RFC 1885 12 */ 13 14 /* 15 * Changes: 16 * 17 * Andi Kleen : exception handling 18 * Andi Kleen add rate limits. never reply to a icmp. 19 * add more length checks and other fixes. 20 * yoshfuji : ensure to sent parameter problem for 21 * fragments. 22 * YOSHIFUJI Hideaki @USAGI: added sysctl for icmp rate limit. 23 * Randy Dunlap and 24 * YOSHIFUJI Hideaki @USAGI: Per-interface statistics support 25 * Kazunori MIYAZAWA @USAGI: change output process to use ip6_append_data 26 */ 27 28 #define pr_fmt(fmt) "IPv6: " fmt 29 30 #include <linux/module.h> 31 #include <linux/errno.h> 32 #include <linux/types.h> 33 #include <linux/socket.h> 34 #include <linux/in.h> 35 #include <linux/kernel.h> 36 #include <linux/sockios.h> 37 #include <linux/net.h> 38 #include <linux/skbuff.h> 39 #include <linux/init.h> 40 #include <linux/netfilter.h> 41 #include <linux/slab.h> 42 43 #ifdef CONFIG_SYSCTL 44 #include <linux/sysctl.h> 45 #endif 46 47 #include <linux/inet.h> 48 #include <linux/netdevice.h> 49 #include <linux/icmpv6.h> 50 51 #include <net/ip.h> 52 #include <net/sock.h> 53 54 #include <net/ipv6.h> 55 #include <net/ip6_checksum.h> 56 #include <net/ping.h> 57 #include <net/protocol.h> 58 #include <net/raw.h> 59 #include <net/rawv6.h> 60 #include <net/seg6.h> 61 #include <net/transp_v6.h> 62 #include <net/ip6_route.h> 63 #include <net/addrconf.h> 64 #include <net/icmp.h> 65 #include <net/xfrm.h> 66 #include <net/inet_common.h> 67 #include <net/dsfield.h> 68 #include <net/l3mdev.h> 69 70 #include <linux/uaccess.h> 71 72 static DEFINE_PER_CPU(struct sock *, ipv6_icmp_sk); 73 74 static int icmpv6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, 75 u8 type, u8 code, int offset, __be32 info) 76 { 77 /* icmpv6_notify checks 8 bytes can be pulled, icmp6hdr is 8 bytes */ 78 struct icmp6hdr *icmp6 = (struct icmp6hdr *) (skb->data + offset); 79 struct net *net = dev_net_rcu(skb->dev); 80 81 if (type == ICMPV6_PKT_TOOBIG) 82 ip6_update_pmtu(skb, net, info, skb->dev->ifindex, 0, sock_net_uid(net, NULL)); 83 else if (type == NDISC_REDIRECT) 84 ip6_redirect(skb, net, skb->dev->ifindex, 0, 85 sock_net_uid(net, NULL)); 86 87 if (!(type & ICMPV6_INFOMSG_MASK)) 88 if (icmp6->icmp6_type == ICMPV6_ECHO_REQUEST) 89 ping_err(skb, offset, ntohl(info)); 90 91 return 0; 92 } 93 94 static int icmpv6_rcv(struct sk_buff *skb); 95 96 static const struct inet6_protocol icmpv6_protocol = { 97 .handler = icmpv6_rcv, 98 .err_handler = icmpv6_err, 99 .flags = INET6_PROTO_NOPOLICY|INET6_PROTO_FINAL, 100 }; 101 102 /* Called with BH disabled */ 103 static struct sock *icmpv6_xmit_lock(struct net *net) 104 { 105 struct sock *sk; 106 107 sk = this_cpu_read(ipv6_icmp_sk); 108 if (unlikely(!spin_trylock(&sk->sk_lock.slock))) { 109 /* This can happen if the output path (f.e. SIT or 110 * ip6ip6 tunnel) signals dst_link_failure() for an 111 * outgoing ICMP6 packet. 112 */ 113 return NULL; 114 } 115 sock_net_set(sk, net); 116 return sk; 117 } 118 119 static void icmpv6_xmit_unlock(struct sock *sk) 120 { 121 sock_net_set(sk, &init_net); 122 spin_unlock(&sk->sk_lock.slock); 123 } 124 125 /* 126 * Figure out, may we reply to this packet with icmp error. 127 * 128 * We do not reply, if: 129 * - it was icmp error message. 130 * - it is truncated, so that it is known, that protocol is ICMPV6 131 * (i.e. in the middle of some exthdr) 132 * 133 * --ANK (980726) 134 */ 135 136 static bool is_ineligible(const struct sk_buff *skb) 137 { 138 int ptr = (u8 *)(ipv6_hdr(skb) + 1) - skb->data; 139 int len = skb->len - ptr; 140 __u8 nexthdr = ipv6_hdr(skb)->nexthdr; 141 __be16 frag_off; 142 143 if (len < 0) 144 return true; 145 146 ptr = ipv6_skip_exthdr(skb, ptr, &nexthdr, &frag_off); 147 if (ptr < 0) 148 return false; 149 if (nexthdr == IPPROTO_ICMPV6) { 150 u8 _type, *tp; 151 tp = skb_header_pointer(skb, 152 ptr+offsetof(struct icmp6hdr, icmp6_type), 153 sizeof(_type), &_type); 154 155 /* Based on RFC 8200, Section 4.5 Fragment Header, return 156 * false if this is a fragment packet with no icmp header info. 157 */ 158 if (!tp && frag_off != 0) 159 return false; 160 else if (!tp || !(*tp & ICMPV6_INFOMSG_MASK)) 161 return true; 162 } 163 return false; 164 } 165 166 static bool icmpv6_mask_allow(struct net *net, int type) 167 { 168 if (type > ICMPV6_MSG_MAX) 169 return true; 170 171 /* Limit if icmp type is set in ratemask. */ 172 if (!test_bit(type, net->ipv6.sysctl.icmpv6_ratemask)) 173 return true; 174 175 return false; 176 } 177 178 static bool icmpv6_global_allow(struct net *net, int type, 179 bool *apply_ratelimit) 180 { 181 if (icmpv6_mask_allow(net, type)) 182 return true; 183 184 if (icmp_global_allow(net)) { 185 *apply_ratelimit = true; 186 return true; 187 } 188 __ICMP_INC_STATS(net, ICMP_MIB_RATELIMITGLOBAL); 189 return false; 190 } 191 192 /* 193 * Check the ICMP output rate limit 194 */ 195 static bool icmpv6_xrlim_allow(struct sock *sk, u8 type, 196 struct flowi6 *fl6, bool apply_ratelimit) 197 { 198 struct net *net = sock_net(sk); 199 struct net_device *dev; 200 struct dst_entry *dst; 201 bool res = false; 202 203 if (!apply_ratelimit) 204 return true; 205 206 /* 207 * Look up the output route. 208 * XXX: perhaps the expire for routing entries cloned by 209 * this lookup should be more aggressive (not longer than timeout). 210 */ 211 dst = ip6_route_output(net, sk, fl6); 212 rcu_read_lock(); 213 dev = dst_dev_rcu(dst); 214 if (dst->error) { 215 IP6_INC_STATS(net, ip6_dst_idev(dst), 216 IPSTATS_MIB_OUTNOROUTES); 217 } else if (dev && (dev->flags & IFF_LOOPBACK)) { 218 res = true; 219 } else { 220 struct rt6_info *rt = dst_rt6_info(dst); 221 int tmo = net->ipv6.sysctl.icmpv6_time; 222 struct inet_peer *peer; 223 224 /* Give more bandwidth to wider prefixes. */ 225 if (rt->rt6i_dst.plen < 128) 226 tmo >>= ((128 - rt->rt6i_dst.plen)>>5); 227 228 peer = inet_getpeer_v6(net->ipv6.peers, &fl6->daddr); 229 res = inet_peer_xrlim_allow(peer, tmo); 230 } 231 rcu_read_unlock(); 232 if (!res) 233 __ICMP6_INC_STATS(net, NULL, ICMP6_MIB_RATELIMITHOST); 234 else 235 icmp_global_consume(net); 236 dst_release(dst); 237 return res; 238 } 239 240 static bool icmpv6_rt_has_prefsrc(struct sock *sk, u8 type, 241 struct flowi6 *fl6) 242 { 243 struct net *net = sock_net(sk); 244 struct dst_entry *dst; 245 bool res = false; 246 247 dst = ip6_route_output(net, sk, fl6); 248 if (!dst->error) { 249 struct rt6_info *rt = dst_rt6_info(dst); 250 struct in6_addr prefsrc; 251 252 rt6_get_prefsrc(rt, &prefsrc); 253 res = !ipv6_addr_any(&prefsrc); 254 } 255 dst_release(dst); 256 return res; 257 } 258 259 /* 260 * an inline helper for the "simple" if statement below 261 * checks if parameter problem report is caused by an 262 * unrecognized IPv6 option that has the Option Type 263 * highest-order two bits set to 10 264 */ 265 266 static bool opt_unrec(struct sk_buff *skb, __u32 offset) 267 { 268 u8 _optval, *op; 269 270 offset += skb_network_offset(skb); 271 op = skb_header_pointer(skb, offset, sizeof(_optval), &_optval); 272 if (!op) 273 return true; 274 return (*op & 0xC0) == 0x80; 275 } 276 277 void icmpv6_push_pending_frames(struct sock *sk, struct flowi6 *fl6, 278 struct icmp6hdr *thdr, int len) 279 { 280 struct sk_buff *skb; 281 struct icmp6hdr *icmp6h; 282 283 skb = skb_peek(&sk->sk_write_queue); 284 if (!skb) 285 return; 286 287 icmp6h = icmp6_hdr(skb); 288 memcpy(icmp6h, thdr, sizeof(struct icmp6hdr)); 289 icmp6h->icmp6_cksum = 0; 290 291 if (skb_queue_len(&sk->sk_write_queue) == 1) { 292 skb->csum = csum_partial(icmp6h, 293 sizeof(struct icmp6hdr), skb->csum); 294 icmp6h->icmp6_cksum = csum_ipv6_magic(&fl6->saddr, 295 &fl6->daddr, 296 len, fl6->flowi6_proto, 297 skb->csum); 298 } else { 299 __wsum tmp_csum = 0; 300 301 skb_queue_walk(&sk->sk_write_queue, skb) { 302 tmp_csum = csum_add(tmp_csum, skb->csum); 303 } 304 305 tmp_csum = csum_partial(icmp6h, 306 sizeof(struct icmp6hdr), tmp_csum); 307 icmp6h->icmp6_cksum = csum_ipv6_magic(&fl6->saddr, 308 &fl6->daddr, 309 len, fl6->flowi6_proto, 310 tmp_csum); 311 } 312 ip6_push_pending_frames(sk); 313 } 314 315 struct icmpv6_msg { 316 struct sk_buff *skb; 317 int offset; 318 uint8_t type; 319 }; 320 321 static int icmpv6_getfrag(void *from, char *to, int offset, int len, int odd, struct sk_buff *skb) 322 { 323 struct icmpv6_msg *msg = (struct icmpv6_msg *) from; 324 struct sk_buff *org_skb = msg->skb; 325 __wsum csum; 326 327 csum = skb_copy_and_csum_bits(org_skb, msg->offset + offset, 328 to, len); 329 skb->csum = csum_block_add(skb->csum, csum, odd); 330 if (!(msg->type & ICMPV6_INFOMSG_MASK)) 331 nf_ct_attach(skb, org_skb); 332 return 0; 333 } 334 335 #if IS_ENABLED(CONFIG_IPV6_MIP6) 336 static void mip6_addr_swap(struct sk_buff *skb, const struct inet6_skb_parm *opt) 337 { 338 struct ipv6hdr *iph = ipv6_hdr(skb); 339 struct ipv6_destopt_hao *hao; 340 int off; 341 342 if (opt->dsthao) { 343 off = ipv6_find_tlv(skb, opt->dsthao, IPV6_TLV_HAO); 344 if (likely(off >= 0)) { 345 hao = (struct ipv6_destopt_hao *) 346 (skb_network_header(skb) + off); 347 swap(iph->saddr, hao->addr); 348 } 349 } 350 } 351 #else 352 static inline void mip6_addr_swap(struct sk_buff *skb, const struct inet6_skb_parm *opt) {} 353 #endif 354 355 static struct dst_entry *icmpv6_route_lookup(struct net *net, 356 struct sk_buff *skb, 357 struct sock *sk, 358 struct flowi6 *fl6) 359 { 360 struct dst_entry *dst, *dst2; 361 struct flowi6 fl2; 362 int err; 363 364 err = ip6_dst_lookup(net, sk, &dst, fl6); 365 if (err) 366 return ERR_PTR(err); 367 368 /* 369 * We won't send icmp if the destination is known 370 * anycast unless we need to treat anycast as unicast. 371 */ 372 if (!READ_ONCE(net->ipv6.sysctl.icmpv6_error_anycast_as_unicast) && 373 ipv6_anycast_destination(dst, &fl6->daddr)) { 374 net_dbg_ratelimited("icmp6_send: acast source\n"); 375 dst_release(dst); 376 return ERR_PTR(-EINVAL); 377 } 378 379 /* No need to clone since we're just using its address. */ 380 dst2 = dst; 381 382 dst = xfrm_lookup(net, dst, flowi6_to_flowi(fl6), sk, 0); 383 if (!IS_ERR(dst)) { 384 if (dst != dst2) 385 return dst; 386 } else { 387 if (PTR_ERR(dst) == -EPERM) 388 dst = NULL; 389 else 390 return dst; 391 } 392 393 err = xfrm_decode_session_reverse(net, skb, flowi6_to_flowi(&fl2), AF_INET6); 394 if (err) 395 goto relookup_failed; 396 397 err = ip6_dst_lookup(net, sk, &dst2, &fl2); 398 if (err) 399 goto relookup_failed; 400 401 dst2 = xfrm_lookup(net, dst2, flowi6_to_flowi(&fl2), sk, XFRM_LOOKUP_ICMP); 402 if (!IS_ERR(dst2)) { 403 dst_release(dst); 404 dst = dst2; 405 } else { 406 err = PTR_ERR(dst2); 407 if (err == -EPERM) { 408 dst_release(dst); 409 return dst2; 410 } else 411 goto relookup_failed; 412 } 413 414 relookup_failed: 415 if (dst) 416 return dst; 417 return ERR_PTR(err); 418 } 419 420 static struct net_device *icmp6_dev(const struct sk_buff *skb) 421 { 422 struct net_device *dev = skb->dev; 423 424 /* for local traffic to local address, skb dev is the loopback 425 * device. Check if there is a dst attached to the skb and if so 426 * get the real device index. Same is needed for replies to a link 427 * local address on a device enslaved to an L3 master device 428 */ 429 if (unlikely(dev->ifindex == LOOPBACK_IFINDEX || netif_is_l3_master(skb->dev))) { 430 const struct rt6_info *rt6 = skb_rt6_info(skb); 431 432 /* The destination could be an external IP in Ext Hdr (SRv6, RPL, etc.), 433 * and ip6_null_entry could be set to skb if no route is found. 434 */ 435 if (rt6 && rt6->rt6i_idev) 436 dev = rt6->rt6i_idev->dev; 437 } 438 439 return dev; 440 } 441 442 static int icmp6_iif(const struct sk_buff *skb) 443 { 444 return icmp6_dev(skb)->ifindex; 445 } 446 447 /* 448 * Send an ICMP message in response to a packet in error 449 */ 450 void icmp6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info, 451 const struct in6_addr *force_saddr, 452 const struct inet6_skb_parm *parm) 453 { 454 struct inet6_dev *idev = NULL; 455 struct ipv6hdr *hdr = ipv6_hdr(skb); 456 struct sock *sk; 457 struct net *net; 458 struct ipv6_pinfo *np; 459 const struct in6_addr *saddr = NULL; 460 bool apply_ratelimit = false; 461 struct dst_entry *dst; 462 struct icmp6hdr tmp_hdr; 463 struct flowi6 fl6; 464 struct icmpv6_msg msg; 465 struct ipcm6_cookie ipc6; 466 int iif = 0; 467 int addr_type = 0; 468 int len; 469 u32 mark; 470 471 if ((u8 *)hdr < skb->head || 472 (skb_network_header(skb) + sizeof(*hdr)) > skb_tail_pointer(skb)) 473 return; 474 475 if (!skb->dev) 476 return; 477 478 rcu_read_lock(); 479 480 net = dev_net_rcu(skb->dev); 481 mark = IP6_REPLY_MARK(net, skb->mark); 482 /* 483 * Make sure we respect the rules 484 * i.e. RFC 1885 2.4(e) 485 * Rule (e.1) is enforced by not using icmp6_send 486 * in any code that processes icmp errors. 487 */ 488 addr_type = ipv6_addr_type(&hdr->daddr); 489 490 if (ipv6_chk_addr(net, &hdr->daddr, skb->dev, 0) || 491 ipv6_chk_acast_addr_src(net, skb->dev, &hdr->daddr)) 492 saddr = &hdr->daddr; 493 494 /* 495 * Dest addr check 496 */ 497 498 if (addr_type & IPV6_ADDR_MULTICAST || skb->pkt_type != PACKET_HOST) { 499 if (type != ICMPV6_PKT_TOOBIG && 500 !(type == ICMPV6_PARAMPROB && 501 code == ICMPV6_UNK_OPTION && 502 (opt_unrec(skb, info)))) 503 goto out; 504 505 saddr = NULL; 506 } 507 508 addr_type = ipv6_addr_type(&hdr->saddr); 509 510 /* 511 * Source addr check 512 */ 513 514 if (__ipv6_addr_needs_scope_id(addr_type)) { 515 iif = icmp6_iif(skb); 516 } else { 517 /* 518 * The source device is used for looking up which routing table 519 * to use for sending an ICMP error. 520 */ 521 iif = l3mdev_master_ifindex(skb->dev); 522 } 523 524 /* 525 * Must not send error if the source does not uniquely 526 * identify a single node (RFC2463 Section 2.4). 527 * We check unspecified / multicast addresses here, 528 * and anycast addresses will be checked later. 529 */ 530 if ((addr_type == IPV6_ADDR_ANY) || (addr_type & IPV6_ADDR_MULTICAST)) { 531 net_dbg_ratelimited("icmp6_send: addr_any/mcast source [%pI6c > %pI6c]\n", 532 &hdr->saddr, &hdr->daddr); 533 goto out; 534 } 535 536 /* 537 * Never answer to a ICMP packet. 538 */ 539 if (is_ineligible(skb)) { 540 net_dbg_ratelimited("icmp6_send: no reply to icmp error [%pI6c > %pI6c]\n", 541 &hdr->saddr, &hdr->daddr); 542 goto out; 543 } 544 545 /* Needed by both icmpv6_global_allow and icmpv6_xmit_lock */ 546 local_bh_disable(); 547 548 /* Check global sysctl_icmp_msgs_per_sec ratelimit */ 549 if (!(skb->dev->flags & IFF_LOOPBACK) && 550 !icmpv6_global_allow(net, type, &apply_ratelimit)) 551 goto out_bh_enable; 552 553 mip6_addr_swap(skb, parm); 554 555 sk = icmpv6_xmit_lock(net); 556 if (!sk) 557 goto out_bh_enable; 558 559 memset(&fl6, 0, sizeof(fl6)); 560 fl6.flowi6_proto = IPPROTO_ICMPV6; 561 fl6.daddr = hdr->saddr; 562 if (force_saddr) 563 saddr = force_saddr; 564 if (saddr) { 565 fl6.saddr = *saddr; 566 } else if (!icmpv6_rt_has_prefsrc(sk, type, &fl6)) { 567 /* select a more meaningful saddr from input if */ 568 struct net_device *in_netdev; 569 570 in_netdev = dev_get_by_index(net, parm->iif); 571 if (in_netdev) { 572 ipv6_dev_get_saddr(net, in_netdev, &fl6.daddr, 573 inet6_sk(sk)->srcprefs, 574 &fl6.saddr); 575 dev_put(in_netdev); 576 } 577 } 578 fl6.flowi6_mark = mark; 579 fl6.flowi6_oif = iif; 580 fl6.fl6_icmp_type = type; 581 fl6.fl6_icmp_code = code; 582 fl6.flowi6_uid = sock_net_uid(net, NULL); 583 fl6.mp_hash = rt6_multipath_hash(net, &fl6, skb, NULL); 584 security_skb_classify_flow(skb, flowi6_to_flowi_common(&fl6)); 585 586 np = inet6_sk(sk); 587 588 if (!icmpv6_xrlim_allow(sk, type, &fl6, apply_ratelimit)) 589 goto out_unlock; 590 591 tmp_hdr.icmp6_type = type; 592 tmp_hdr.icmp6_code = code; 593 tmp_hdr.icmp6_cksum = 0; 594 tmp_hdr.icmp6_pointer = htonl(info); 595 596 if (!fl6.flowi6_oif && ipv6_addr_is_multicast(&fl6.daddr)) 597 fl6.flowi6_oif = READ_ONCE(np->mcast_oif); 598 else if (!fl6.flowi6_oif) 599 fl6.flowi6_oif = READ_ONCE(np->ucast_oif); 600 601 ipcm6_init_sk(&ipc6, sk); 602 ipc6.sockc.mark = mark; 603 fl6.flowlabel = ip6_make_flowinfo(ipc6.tclass, fl6.flowlabel); 604 605 dst = icmpv6_route_lookup(net, skb, sk, &fl6); 606 if (IS_ERR(dst)) 607 goto out_unlock; 608 609 ipc6.hlimit = ip6_sk_dst_hoplimit(np, &fl6, dst); 610 611 msg.skb = skb; 612 msg.offset = skb_network_offset(skb); 613 msg.type = type; 614 615 len = skb->len - msg.offset; 616 len = min_t(unsigned int, len, IPV6_MIN_MTU - sizeof(struct ipv6hdr) - sizeof(struct icmp6hdr)); 617 if (len < 0) { 618 net_dbg_ratelimited("icmp: len problem [%pI6c > %pI6c]\n", 619 &hdr->saddr, &hdr->daddr); 620 goto out_dst_release; 621 } 622 623 idev = __in6_dev_get(skb->dev); 624 625 if (ip6_append_data(sk, icmpv6_getfrag, &msg, 626 len + sizeof(struct icmp6hdr), 627 sizeof(struct icmp6hdr), 628 &ipc6, &fl6, dst_rt6_info(dst), 629 MSG_DONTWAIT)) { 630 ICMP6_INC_STATS(net, idev, ICMP6_MIB_OUTERRORS); 631 ip6_flush_pending_frames(sk); 632 } else { 633 icmpv6_push_pending_frames(sk, &fl6, &tmp_hdr, 634 len + sizeof(struct icmp6hdr)); 635 } 636 637 out_dst_release: 638 dst_release(dst); 639 out_unlock: 640 icmpv6_xmit_unlock(sk); 641 out_bh_enable: 642 local_bh_enable(); 643 out: 644 rcu_read_unlock(); 645 } 646 EXPORT_SYMBOL(icmp6_send); 647 648 /* Slightly more convenient version of icmp6_send with drop reasons. 649 */ 650 void icmpv6_param_prob_reason(struct sk_buff *skb, u8 code, int pos, 651 enum skb_drop_reason reason) 652 { 653 icmp6_send(skb, ICMPV6_PARAMPROB, code, pos, NULL, IP6CB(skb)); 654 kfree_skb_reason(skb, reason); 655 } 656 657 /* Generate icmpv6 with type/code ICMPV6_DEST_UNREACH/ICMPV6_ADDR_UNREACH 658 * if sufficient data bytes are available 659 * @nhs is the size of the tunnel header(s) : 660 * Either an IPv4 header for SIT encap 661 * an IPv4 header + GRE header for GRE encap 662 */ 663 int ip6_err_gen_icmpv6_unreach(struct sk_buff *skb, int nhs, int type, 664 unsigned int data_len) 665 { 666 struct in6_addr temp_saddr; 667 struct rt6_info *rt; 668 struct sk_buff *skb2; 669 u32 info = 0; 670 671 if (!pskb_may_pull(skb, nhs + sizeof(struct ipv6hdr) + 8)) 672 return 1; 673 674 /* RFC 4884 (partial) support for ICMP extensions */ 675 if (data_len < 128 || (data_len & 7) || skb->len < data_len) 676 data_len = 0; 677 678 skb2 = data_len ? skb_copy(skb, GFP_ATOMIC) : skb_clone(skb, GFP_ATOMIC); 679 680 if (!skb2) 681 return 1; 682 683 skb_dst_drop(skb2); 684 skb_pull(skb2, nhs); 685 skb_reset_network_header(skb2); 686 687 rt = rt6_lookup(dev_net_rcu(skb->dev), &ipv6_hdr(skb2)->saddr, 688 NULL, 0, skb, 0); 689 690 if (rt && rt->dst.dev) 691 skb2->dev = rt->dst.dev; 692 693 ipv6_addr_set_v4mapped(ip_hdr(skb)->saddr, &temp_saddr); 694 695 if (data_len) { 696 /* RFC 4884 (partial) support : 697 * insert 0 padding at the end, before the extensions 698 */ 699 __skb_push(skb2, nhs); 700 skb_reset_network_header(skb2); 701 memmove(skb2->data, skb2->data + nhs, data_len - nhs); 702 memset(skb2->data + data_len - nhs, 0, nhs); 703 /* RFC 4884 4.5 : Length is measured in 64-bit words, 704 * and stored in reserved[0] 705 */ 706 info = (data_len/8) << 24; 707 } 708 if (type == ICMP_TIME_EXCEEDED) 709 icmp6_send(skb2, ICMPV6_TIME_EXCEED, ICMPV6_EXC_HOPLIMIT, 710 info, &temp_saddr, IP6CB(skb2)); 711 else 712 icmp6_send(skb2, ICMPV6_DEST_UNREACH, ICMPV6_ADDR_UNREACH, 713 info, &temp_saddr, IP6CB(skb2)); 714 if (rt) 715 ip6_rt_put(rt); 716 717 kfree_skb(skb2); 718 719 return 0; 720 } 721 EXPORT_SYMBOL(ip6_err_gen_icmpv6_unreach); 722 723 static enum skb_drop_reason icmpv6_echo_reply(struct sk_buff *skb) 724 { 725 struct net *net = dev_net_rcu(skb->dev); 726 struct sock *sk; 727 struct inet6_dev *idev; 728 struct ipv6_pinfo *np; 729 const struct in6_addr *saddr = NULL; 730 struct icmp6hdr *icmph = icmp6_hdr(skb); 731 bool apply_ratelimit = false; 732 struct icmp6hdr tmp_hdr; 733 struct flowi6 fl6; 734 struct icmpv6_msg msg; 735 struct dst_entry *dst; 736 struct ipcm6_cookie ipc6; 737 u32 mark = IP6_REPLY_MARK(net, skb->mark); 738 SKB_DR(reason); 739 bool acast; 740 u8 type; 741 742 if (ipv6_addr_is_multicast(&ipv6_hdr(skb)->daddr) && 743 net->ipv6.sysctl.icmpv6_echo_ignore_multicast) 744 return reason; 745 746 saddr = &ipv6_hdr(skb)->daddr; 747 748 acast = ipv6_anycast_destination(skb_dst(skb), saddr); 749 if (acast && net->ipv6.sysctl.icmpv6_echo_ignore_anycast) 750 return reason; 751 752 if (!ipv6_unicast_destination(skb) && 753 !(net->ipv6.sysctl.anycast_src_echo_reply && acast)) 754 saddr = NULL; 755 756 if (icmph->icmp6_type == ICMPV6_EXT_ECHO_REQUEST) 757 type = ICMPV6_EXT_ECHO_REPLY; 758 else 759 type = ICMPV6_ECHO_REPLY; 760 761 memcpy(&tmp_hdr, icmph, sizeof(tmp_hdr)); 762 tmp_hdr.icmp6_type = type; 763 764 memset(&fl6, 0, sizeof(fl6)); 765 if (net->ipv6.sysctl.flowlabel_reflect & FLOWLABEL_REFLECT_ICMPV6_ECHO_REPLIES) 766 fl6.flowlabel = ip6_flowlabel(ipv6_hdr(skb)); 767 768 fl6.flowi6_proto = IPPROTO_ICMPV6; 769 fl6.daddr = ipv6_hdr(skb)->saddr; 770 if (saddr) 771 fl6.saddr = *saddr; 772 fl6.flowi6_oif = icmp6_iif(skb); 773 fl6.fl6_icmp_type = type; 774 fl6.flowi6_mark = mark; 775 fl6.flowi6_uid = sock_net_uid(net, NULL); 776 security_skb_classify_flow(skb, flowi6_to_flowi_common(&fl6)); 777 778 local_bh_disable(); 779 sk = icmpv6_xmit_lock(net); 780 if (!sk) 781 goto out_bh_enable; 782 np = inet6_sk(sk); 783 784 if (!fl6.flowi6_oif && ipv6_addr_is_multicast(&fl6.daddr)) 785 fl6.flowi6_oif = READ_ONCE(np->mcast_oif); 786 else if (!fl6.flowi6_oif) 787 fl6.flowi6_oif = READ_ONCE(np->ucast_oif); 788 789 if (ip6_dst_lookup(net, sk, &dst, &fl6)) 790 goto out; 791 dst = xfrm_lookup(net, dst, flowi6_to_flowi(&fl6), sk, 0); 792 if (IS_ERR(dst)) 793 goto out; 794 795 /* Check the ratelimit */ 796 if ((!(skb->dev->flags & IFF_LOOPBACK) && 797 !icmpv6_global_allow(net, ICMPV6_ECHO_REPLY, &apply_ratelimit)) || 798 !icmpv6_xrlim_allow(sk, ICMPV6_ECHO_REPLY, &fl6, apply_ratelimit)) 799 goto out_dst_release; 800 801 idev = __in6_dev_get(skb->dev); 802 803 msg.skb = skb; 804 msg.offset = 0; 805 msg.type = type; 806 807 ipcm6_init_sk(&ipc6, sk); 808 ipc6.hlimit = ip6_sk_dst_hoplimit(np, &fl6, dst); 809 ipc6.tclass = ipv6_get_dsfield(ipv6_hdr(skb)); 810 ipc6.sockc.mark = mark; 811 812 if (icmph->icmp6_type == ICMPV6_EXT_ECHO_REQUEST) 813 if (!icmp_build_probe(skb, (struct icmphdr *)&tmp_hdr)) 814 goto out_dst_release; 815 816 if (ip6_append_data(sk, icmpv6_getfrag, &msg, 817 skb->len + sizeof(struct icmp6hdr), 818 sizeof(struct icmp6hdr), &ipc6, &fl6, 819 dst_rt6_info(dst), MSG_DONTWAIT)) { 820 __ICMP6_INC_STATS(net, idev, ICMP6_MIB_OUTERRORS); 821 ip6_flush_pending_frames(sk); 822 } else { 823 icmpv6_push_pending_frames(sk, &fl6, &tmp_hdr, 824 skb->len + sizeof(struct icmp6hdr)); 825 reason = SKB_CONSUMED; 826 } 827 out_dst_release: 828 dst_release(dst); 829 out: 830 icmpv6_xmit_unlock(sk); 831 out_bh_enable: 832 local_bh_enable(); 833 return reason; 834 } 835 836 enum skb_drop_reason icmpv6_notify(struct sk_buff *skb, u8 type, 837 u8 code, __be32 info) 838 { 839 struct inet6_skb_parm *opt = IP6CB(skb); 840 struct net *net = dev_net_rcu(skb->dev); 841 const struct inet6_protocol *ipprot; 842 enum skb_drop_reason reason; 843 int inner_offset; 844 __be16 frag_off; 845 u8 nexthdr; 846 847 reason = pskb_may_pull_reason(skb, sizeof(struct ipv6hdr)); 848 if (reason != SKB_NOT_DROPPED_YET) 849 goto out; 850 851 seg6_icmp_srh(skb, opt); 852 853 nexthdr = ((struct ipv6hdr *)skb->data)->nexthdr; 854 if (ipv6_ext_hdr(nexthdr)) { 855 /* now skip over extension headers */ 856 inner_offset = ipv6_skip_exthdr(skb, sizeof(struct ipv6hdr), 857 &nexthdr, &frag_off); 858 if (inner_offset < 0) { 859 SKB_DR_SET(reason, IPV6_BAD_EXTHDR); 860 goto out; 861 } 862 } else { 863 inner_offset = sizeof(struct ipv6hdr); 864 } 865 866 /* Checkin header including 8 bytes of inner protocol header. */ 867 reason = pskb_may_pull_reason(skb, inner_offset + 8); 868 if (reason != SKB_NOT_DROPPED_YET) 869 goto out; 870 871 /* BUGGG_FUTURE: we should try to parse exthdrs in this packet. 872 Without this we will not able f.e. to make source routed 873 pmtu discovery. 874 Corresponding argument (opt) to notifiers is already added. 875 --ANK (980726) 876 */ 877 878 ipprot = rcu_dereference(inet6_protos[nexthdr]); 879 if (ipprot && ipprot->err_handler) 880 ipprot->err_handler(skb, opt, type, code, inner_offset, info); 881 882 raw6_icmp_error(skb, nexthdr, type, code, inner_offset, info); 883 return SKB_CONSUMED; 884 885 out: 886 __ICMP6_INC_STATS(net, __in6_dev_get(skb->dev), ICMP6_MIB_INERRORS); 887 return reason; 888 } 889 890 /* 891 * Handle icmp messages 892 */ 893 894 static int icmpv6_rcv(struct sk_buff *skb) 895 { 896 enum skb_drop_reason reason = SKB_DROP_REASON_NOT_SPECIFIED; 897 struct net *net = dev_net_rcu(skb->dev); 898 struct net_device *dev = icmp6_dev(skb); 899 struct inet6_dev *idev = __in6_dev_get(dev); 900 const struct in6_addr *saddr, *daddr; 901 struct icmp6hdr *hdr; 902 u8 type; 903 904 if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb)) { 905 struct sec_path *sp = skb_sec_path(skb); 906 int nh; 907 908 if (!(sp && sp->xvec[sp->len - 1]->props.flags & 909 XFRM_STATE_ICMP)) { 910 reason = SKB_DROP_REASON_XFRM_POLICY; 911 goto drop_no_count; 912 } 913 914 if (!pskb_may_pull(skb, sizeof(*hdr) + sizeof(struct ipv6hdr))) 915 goto drop_no_count; 916 917 nh = skb_network_offset(skb); 918 skb_set_network_header(skb, sizeof(*hdr)); 919 920 if (!xfrm6_policy_check_reverse(NULL, XFRM_POLICY_IN, 921 skb)) { 922 reason = SKB_DROP_REASON_XFRM_POLICY; 923 goto drop_no_count; 924 } 925 926 skb_set_network_header(skb, nh); 927 } 928 929 __ICMP6_INC_STATS(dev_net_rcu(dev), idev, ICMP6_MIB_INMSGS); 930 931 saddr = &ipv6_hdr(skb)->saddr; 932 daddr = &ipv6_hdr(skb)->daddr; 933 934 if (skb_checksum_validate(skb, IPPROTO_ICMPV6, ip6_compute_pseudo)) { 935 net_dbg_ratelimited("ICMPv6 checksum failed [%pI6c > %pI6c]\n", 936 saddr, daddr); 937 goto csum_error; 938 } 939 940 if (!pskb_pull(skb, sizeof(*hdr))) 941 goto discard_it; 942 943 hdr = icmp6_hdr(skb); 944 945 type = hdr->icmp6_type; 946 947 ICMP6MSGIN_INC_STATS(dev_net_rcu(dev), idev, type); 948 949 switch (type) { 950 case ICMPV6_ECHO_REQUEST: 951 if (!net->ipv6.sysctl.icmpv6_echo_ignore_all) 952 reason = icmpv6_echo_reply(skb); 953 break; 954 case ICMPV6_EXT_ECHO_REQUEST: 955 if (!net->ipv6.sysctl.icmpv6_echo_ignore_all && 956 READ_ONCE(net->ipv4.sysctl_icmp_echo_enable_probe)) 957 reason = icmpv6_echo_reply(skb); 958 break; 959 960 case ICMPV6_ECHO_REPLY: 961 case ICMPV6_EXT_ECHO_REPLY: 962 ping_rcv(skb); 963 return 0; 964 965 case ICMPV6_PKT_TOOBIG: 966 /* BUGGG_FUTURE: if packet contains rthdr, we cannot update 967 standard destination cache. Seems, only "advanced" 968 destination cache will allow to solve this problem 969 --ANK (980726) 970 */ 971 if (!pskb_may_pull(skb, sizeof(struct ipv6hdr))) 972 goto discard_it; 973 hdr = icmp6_hdr(skb); 974 975 /* to notify */ 976 fallthrough; 977 case ICMPV6_DEST_UNREACH: 978 case ICMPV6_TIME_EXCEED: 979 case ICMPV6_PARAMPROB: 980 reason = icmpv6_notify(skb, type, hdr->icmp6_code, 981 hdr->icmp6_mtu); 982 break; 983 984 case NDISC_ROUTER_SOLICITATION: 985 case NDISC_ROUTER_ADVERTISEMENT: 986 case NDISC_NEIGHBOUR_SOLICITATION: 987 case NDISC_NEIGHBOUR_ADVERTISEMENT: 988 case NDISC_REDIRECT: 989 reason = ndisc_rcv(skb); 990 break; 991 992 case ICMPV6_MGM_QUERY: 993 igmp6_event_query(skb); 994 return 0; 995 996 case ICMPV6_MGM_REPORT: 997 igmp6_event_report(skb); 998 return 0; 999 1000 case ICMPV6_MGM_REDUCTION: 1001 case ICMPV6_NI_QUERY: 1002 case ICMPV6_NI_REPLY: 1003 case ICMPV6_MLD2_REPORT: 1004 case ICMPV6_DHAAD_REQUEST: 1005 case ICMPV6_DHAAD_REPLY: 1006 case ICMPV6_MOBILE_PREFIX_SOL: 1007 case ICMPV6_MOBILE_PREFIX_ADV: 1008 break; 1009 1010 default: 1011 /* informational */ 1012 if (type & ICMPV6_INFOMSG_MASK) 1013 break; 1014 1015 net_dbg_ratelimited("icmpv6: msg of unknown type [%pI6c > %pI6c]\n", 1016 saddr, daddr); 1017 1018 /* 1019 * error of unknown type. 1020 * must pass to upper level 1021 */ 1022 1023 reason = icmpv6_notify(skb, type, hdr->icmp6_code, 1024 hdr->icmp6_mtu); 1025 } 1026 1027 /* until the v6 path can be better sorted assume failure and 1028 * preserve the status quo behaviour for the rest of the paths to here 1029 */ 1030 if (reason) 1031 kfree_skb_reason(skb, reason); 1032 else 1033 consume_skb(skb); 1034 1035 return 0; 1036 1037 csum_error: 1038 reason = SKB_DROP_REASON_ICMP_CSUM; 1039 __ICMP6_INC_STATS(dev_net_rcu(dev), idev, ICMP6_MIB_CSUMERRORS); 1040 discard_it: 1041 __ICMP6_INC_STATS(dev_net_rcu(dev), idev, ICMP6_MIB_INERRORS); 1042 drop_no_count: 1043 kfree_skb_reason(skb, reason); 1044 return 0; 1045 } 1046 1047 void icmpv6_flow_init(const struct sock *sk, struct flowi6 *fl6, u8 type, 1048 const struct in6_addr *saddr, 1049 const struct in6_addr *daddr, int oif) 1050 { 1051 memset(fl6, 0, sizeof(*fl6)); 1052 fl6->saddr = *saddr; 1053 fl6->daddr = *daddr; 1054 fl6->flowi6_proto = IPPROTO_ICMPV6; 1055 fl6->fl6_icmp_type = type; 1056 fl6->fl6_icmp_code = 0; 1057 fl6->flowi6_oif = oif; 1058 security_sk_classify_flow(sk, flowi6_to_flowi_common(fl6)); 1059 } 1060 1061 int __init icmpv6_init(void) 1062 { 1063 struct sock *sk; 1064 int err, i; 1065 1066 for_each_possible_cpu(i) { 1067 err = inet_ctl_sock_create(&sk, PF_INET6, 1068 SOCK_RAW, IPPROTO_ICMPV6, &init_net); 1069 if (err < 0) { 1070 pr_err("Failed to initialize the ICMP6 control socket (err %d)\n", 1071 err); 1072 return err; 1073 } 1074 1075 per_cpu(ipv6_icmp_sk, i) = sk; 1076 1077 /* Enough space for 2 64K ICMP packets, including 1078 * sk_buff struct overhead. 1079 */ 1080 sk->sk_sndbuf = 2 * SKB_TRUESIZE(64 * 1024); 1081 } 1082 1083 err = -EAGAIN; 1084 if (inet6_add_protocol(&icmpv6_protocol, IPPROTO_ICMPV6) < 0) 1085 goto fail; 1086 1087 err = inet6_register_icmp_sender(icmp6_send); 1088 if (err) 1089 goto sender_reg_err; 1090 return 0; 1091 1092 sender_reg_err: 1093 inet6_del_protocol(&icmpv6_protocol, IPPROTO_ICMPV6); 1094 fail: 1095 pr_err("Failed to register ICMP6 protocol\n"); 1096 return err; 1097 } 1098 1099 void icmpv6_cleanup(void) 1100 { 1101 inet6_unregister_icmp_sender(icmp6_send); 1102 inet6_del_protocol(&icmpv6_protocol, IPPROTO_ICMPV6); 1103 } 1104 1105 1106 static const struct icmp6_err { 1107 int err; 1108 int fatal; 1109 } tab_unreach[] = { 1110 { /* NOROUTE */ 1111 .err = ENETUNREACH, 1112 .fatal = 0, 1113 }, 1114 { /* ADM_PROHIBITED */ 1115 .err = EACCES, 1116 .fatal = 1, 1117 }, 1118 { /* Was NOT_NEIGHBOUR, now reserved */ 1119 .err = EHOSTUNREACH, 1120 .fatal = 0, 1121 }, 1122 { /* ADDR_UNREACH */ 1123 .err = EHOSTUNREACH, 1124 .fatal = 0, 1125 }, 1126 { /* PORT_UNREACH */ 1127 .err = ECONNREFUSED, 1128 .fatal = 1, 1129 }, 1130 { /* POLICY_FAIL */ 1131 .err = EACCES, 1132 .fatal = 1, 1133 }, 1134 { /* REJECT_ROUTE */ 1135 .err = EACCES, 1136 .fatal = 1, 1137 }, 1138 }; 1139 1140 int icmpv6_err_convert(u8 type, u8 code, int *err) 1141 { 1142 int fatal = 0; 1143 1144 *err = EPROTO; 1145 1146 switch (type) { 1147 case ICMPV6_DEST_UNREACH: 1148 fatal = 1; 1149 if (code < ARRAY_SIZE(tab_unreach)) { 1150 *err = tab_unreach[code].err; 1151 fatal = tab_unreach[code].fatal; 1152 } 1153 break; 1154 1155 case ICMPV6_PKT_TOOBIG: 1156 *err = EMSGSIZE; 1157 break; 1158 1159 case ICMPV6_PARAMPROB: 1160 *err = EPROTO; 1161 fatal = 1; 1162 break; 1163 1164 case ICMPV6_TIME_EXCEED: 1165 *err = EHOSTUNREACH; 1166 break; 1167 } 1168 1169 return fatal; 1170 } 1171 EXPORT_SYMBOL(icmpv6_err_convert); 1172 1173 #ifdef CONFIG_SYSCTL 1174 static struct ctl_table ipv6_icmp_table_template[] = { 1175 { 1176 .procname = "ratelimit", 1177 .data = &init_net.ipv6.sysctl.icmpv6_time, 1178 .maxlen = sizeof(int), 1179 .mode = 0644, 1180 .proc_handler = proc_dointvec_ms_jiffies, 1181 }, 1182 { 1183 .procname = "echo_ignore_all", 1184 .data = &init_net.ipv6.sysctl.icmpv6_echo_ignore_all, 1185 .maxlen = sizeof(u8), 1186 .mode = 0644, 1187 .proc_handler = proc_dou8vec_minmax, 1188 }, 1189 { 1190 .procname = "echo_ignore_multicast", 1191 .data = &init_net.ipv6.sysctl.icmpv6_echo_ignore_multicast, 1192 .maxlen = sizeof(u8), 1193 .mode = 0644, 1194 .proc_handler = proc_dou8vec_minmax, 1195 }, 1196 { 1197 .procname = "echo_ignore_anycast", 1198 .data = &init_net.ipv6.sysctl.icmpv6_echo_ignore_anycast, 1199 .maxlen = sizeof(u8), 1200 .mode = 0644, 1201 .proc_handler = proc_dou8vec_minmax, 1202 }, 1203 { 1204 .procname = "ratemask", 1205 .data = &init_net.ipv6.sysctl.icmpv6_ratemask_ptr, 1206 .maxlen = ICMPV6_MSG_MAX + 1, 1207 .mode = 0644, 1208 .proc_handler = proc_do_large_bitmap, 1209 }, 1210 { 1211 .procname = "error_anycast_as_unicast", 1212 .data = &init_net.ipv6.sysctl.icmpv6_error_anycast_as_unicast, 1213 .maxlen = sizeof(u8), 1214 .mode = 0644, 1215 .proc_handler = proc_dou8vec_minmax, 1216 .extra1 = SYSCTL_ZERO, 1217 .extra2 = SYSCTL_ONE, 1218 }, 1219 }; 1220 1221 struct ctl_table * __net_init ipv6_icmp_sysctl_init(struct net *net) 1222 { 1223 struct ctl_table *table; 1224 1225 table = kmemdup(ipv6_icmp_table_template, 1226 sizeof(ipv6_icmp_table_template), 1227 GFP_KERNEL); 1228 1229 if (table) { 1230 table[0].data = &net->ipv6.sysctl.icmpv6_time; 1231 table[1].data = &net->ipv6.sysctl.icmpv6_echo_ignore_all; 1232 table[2].data = &net->ipv6.sysctl.icmpv6_echo_ignore_multicast; 1233 table[3].data = &net->ipv6.sysctl.icmpv6_echo_ignore_anycast; 1234 table[4].data = &net->ipv6.sysctl.icmpv6_ratemask_ptr; 1235 table[5].data = &net->ipv6.sysctl.icmpv6_error_anycast_as_unicast; 1236 } 1237 return table; 1238 } 1239 1240 size_t ipv6_icmp_sysctl_table_size(void) 1241 { 1242 return ARRAY_SIZE(ipv6_icmp_table_template); 1243 } 1244 #endif 1245