1 // SPDX-License-Identifier: GPL-2.0-or-later 2 /* 3 * INET An implementation of the TCP/IP protocol suite for the LINUX 4 * operating system. INET is implemented using the BSD Socket 5 * interface as the means of communication with the user level. 6 * 7 * "Ping" sockets 8 * 9 * Based on ipv4/udp.c code. 10 * 11 * Authors: Vasiliy Kulikov / Openwall (for Linux 2.6), 12 * Pavel Kankovsky (for Linux 2.4.32) 13 * 14 * Pavel gave all rights to bugs to Vasiliy, 15 * none of the bugs are Pavel's now. 16 */ 17 18 #include <linux/uaccess.h> 19 #include <linux/types.h> 20 #include <linux/fcntl.h> 21 #include <linux/socket.h> 22 #include <linux/sockios.h> 23 #include <linux/in.h> 24 #include <linux/errno.h> 25 #include <linux/timer.h> 26 #include <linux/mm.h> 27 #include <linux/inet.h> 28 #include <linux/netdevice.h> 29 #include <net/snmp.h> 30 #include <net/ip.h> 31 #include <net/icmp.h> 32 #include <net/protocol.h> 33 #include <linux/skbuff.h> 34 #include <linux/proc_fs.h> 35 #include <linux/export.h> 36 #include <linux/bpf-cgroup.h> 37 #include <net/sock.h> 38 #include <net/ping.h> 39 #include <net/udp.h> 40 #include <net/route.h> 41 #include <net/inet_common.h> 42 #include <net/checksum.h> 43 44 #if IS_ENABLED(CONFIG_IPV6) 45 #include <linux/in6.h> 46 #include <linux/icmpv6.h> 47 #include <net/addrconf.h> 48 #include <net/ipv6.h> 49 #include <net/transp_v6.h> 50 #endif 51 52 struct ping_table { 53 struct hlist_head hash[PING_HTABLE_SIZE]; 54 spinlock_t lock; 55 }; 56 57 static struct ping_table ping_table; 58 struct pingv6_ops pingv6_ops; 59 60 static inline u32 ping_hashfn(const struct net *net, u32 num, u32 mask) 61 { 62 u32 res = (num + net_hash_mix(net)) & mask; 63 64 pr_debug("hash(%u) = %u\n", num, res); 65 return res; 66 } 67 68 static inline struct hlist_head *ping_hashslot(struct ping_table *table, 69 struct net *net, unsigned int num) 70 { 71 return &table->hash[ping_hashfn(net, num, PING_HTABLE_MASK)]; 72 } 73 74 int ping_get_port(struct sock *sk, unsigned short ident) 75 { 76 struct net *net = sock_net(sk); 77 struct inet_sock *isk, *isk2; 78 struct hlist_head *hlist; 79 struct sock *sk2 = NULL; 80 81 isk = inet_sk(sk); 82 spin_lock(&ping_table.lock); 83 if (ident == 0) { 84 u16 result = net->ipv4.ping_port_rover + 1; 85 u32 i; 86 87 for (i = 0; i < (1L << 16); i++, result++) { 88 if (!result) 89 continue; /* avoid zero */ 90 hlist = ping_hashslot(&ping_table, net, result); 91 sk_for_each(sk2, hlist) { 92 if (!net_eq(sock_net(sk2), net)) 93 continue; 94 isk2 = inet_sk(sk2); 95 96 if (isk2->inet_num == result) 97 goto next_port; 98 } 99 100 /* found */ 101 net->ipv4.ping_port_rover = ident = result; 102 break; 103 next_port: 104 ; 105 } 106 if (i >= (1L << 16)) 107 goto fail; 108 } else { 109 hlist = ping_hashslot(&ping_table, net, ident); 110 sk_for_each(sk2, hlist) { 111 if (!net_eq(sock_net(sk2), net)) 112 continue; 113 isk2 = inet_sk(sk2); 114 115 /* BUG? Why is this reuse and not reuseaddr? ping.c 116 * doesn't turn off SO_REUSEADDR, and it doesn't expect 117 * that other ping processes can steal its packets. 118 */ 119 if ((isk2->inet_num == ident) && 120 (sk2 != sk) && 121 (!sk2->sk_reuse || !sk->sk_reuse)) 122 goto fail; 123 } 124 } 125 126 pr_debug("found port/ident = %d\n", ident); 127 isk->inet_num = ident; 128 if (sk_unhashed(sk)) { 129 pr_debug("was not hashed\n"); 130 sk_add_node_rcu(sk, hlist); 131 sock_set_flag(sk, SOCK_RCU_FREE); 132 sock_prot_inuse_add(net, sk->sk_prot, 1); 133 } 134 spin_unlock(&ping_table.lock); 135 return 0; 136 137 fail: 138 spin_unlock(&ping_table.lock); 139 return -EADDRINUSE; 140 } 141 142 void ping_unhash(struct sock *sk) 143 { 144 struct inet_sock *isk = inet_sk(sk); 145 146 pr_debug("ping_unhash(isk=%p,isk->num=%u)\n", isk, isk->inet_num); 147 spin_lock(&ping_table.lock); 148 if (sk_del_node_init_rcu(sk)) { 149 WRITE_ONCE(isk->inet_num, 0); 150 isk->inet_sport = 0; 151 sock_prot_inuse_add(sock_net(sk), sk->sk_prot, -1); 152 } 153 spin_unlock(&ping_table.lock); 154 } 155 156 /* Called under rcu_read_lock() */ 157 static struct sock *ping_lookup(struct net *net, struct sk_buff *skb, u16 ident) 158 { 159 struct hlist_head *hslot = ping_hashslot(&ping_table, net, ident); 160 struct sock *sk = NULL; 161 struct inet_sock *isk; 162 int dif, sdif; 163 164 if (skb->protocol == htons(ETH_P_IP)) { 165 dif = inet_iif(skb); 166 sdif = inet_sdif(skb); 167 pr_debug("try to find: num = %d, daddr = %pI4, dif = %d\n", 168 (int)ident, &ip_hdr(skb)->daddr, dif); 169 #if IS_ENABLED(CONFIG_IPV6) 170 } else if (skb->protocol == htons(ETH_P_IPV6)) { 171 dif = inet6_iif(skb); 172 sdif = inet6_sdif(skb); 173 pr_debug("try to find: num = %d, daddr = %pI6c, dif = %d\n", 174 (int)ident, &ipv6_hdr(skb)->daddr, dif); 175 #endif 176 } else { 177 return NULL; 178 } 179 180 sk_for_each_rcu(sk, hslot) { 181 int bound_dev_if; 182 183 if (!net_eq(sock_net(sk), net)) 184 continue; 185 isk = inet_sk(sk); 186 187 pr_debug("iterate\n"); 188 if (READ_ONCE(isk->inet_num) != ident) 189 continue; 190 191 bound_dev_if = READ_ONCE(sk->sk_bound_dev_if); 192 if (skb->protocol == htons(ETH_P_IP) && 193 sk->sk_family == AF_INET) { 194 __be32 rcv_saddr = READ_ONCE(isk->inet_rcv_saddr); 195 196 pr_debug("found: %p: num=%d, daddr=%pI4, dif=%d\n", sk, 197 ident, &rcv_saddr, 198 bound_dev_if); 199 200 if (rcv_saddr && rcv_saddr != ip_hdr(skb)->daddr) 201 continue; 202 #if IS_ENABLED(CONFIG_IPV6) 203 } else if (skb->protocol == htons(ETH_P_IPV6) && 204 sk->sk_family == AF_INET6) { 205 206 pr_debug("found: %p: num=%d, daddr=%pI6c, dif=%d\n", sk, 207 ident, 208 &sk->sk_v6_rcv_saddr, 209 bound_dev_if); 210 211 if (!ipv6_addr_any(&sk->sk_v6_rcv_saddr) && 212 !ipv6_addr_equal(&sk->sk_v6_rcv_saddr, 213 &ipv6_hdr(skb)->daddr)) 214 continue; 215 #endif 216 } else { 217 continue; 218 } 219 220 if (bound_dev_if && bound_dev_if != dif && 221 bound_dev_if != sdif) 222 continue; 223 224 goto exit; 225 } 226 227 sk = NULL; 228 exit: 229 230 return sk; 231 } 232 233 static void inet_get_ping_group_range_net(struct net *net, kgid_t *low, 234 kgid_t *high) 235 { 236 kgid_t *data = net->ipv4.ping_group_range.range; 237 unsigned int seq; 238 239 do { 240 seq = read_seqbegin(&net->ipv4.ping_group_range.lock); 241 242 *low = data[0]; 243 *high = data[1]; 244 } while (read_seqretry(&net->ipv4.ping_group_range.lock, seq)); 245 } 246 247 248 int ping_init_sock(struct sock *sk) 249 { 250 struct net *net = sock_net(sk); 251 kgid_t group = current_egid(); 252 struct group_info *group_info; 253 int i; 254 kgid_t low, high; 255 int ret = 0; 256 257 if (sk->sk_family == AF_INET6) 258 sk->sk_ipv6only = 1; 259 260 inet_get_ping_group_range_net(net, &low, &high); 261 if (gid_lte(low, group) && gid_lte(group, high)) 262 return 0; 263 264 group_info = get_current_groups(); 265 for (i = 0; i < group_info->ngroups; i++) { 266 kgid_t gid = group_info->gid[i]; 267 268 if (gid_lte(low, gid) && gid_lte(gid, high)) 269 goto out_release_group; 270 } 271 272 ret = -EACCES; 273 274 out_release_group: 275 put_group_info(group_info); 276 return ret; 277 } 278 279 void ping_close(struct sock *sk, long timeout) 280 { 281 pr_debug("ping_close(sk=%p,sk->num=%u)\n", 282 inet_sk(sk), inet_sk(sk)->inet_num); 283 pr_debug("isk->refcnt = %d\n", refcount_read(&sk->sk_refcnt)); 284 285 sk_common_release(sk); 286 } 287 288 static int ping_pre_connect(struct sock *sk, struct sockaddr_unsized *uaddr, 289 int addr_len) 290 { 291 /* This check is replicated from __ip4_datagram_connect() and 292 * intended to prevent BPF program called below from accessing bytes 293 * that are out of the bound specified by user in addr_len. 294 */ 295 if (addr_len < sizeof(struct sockaddr_in)) 296 return -EINVAL; 297 298 return BPF_CGROUP_RUN_PROG_INET4_CONNECT_LOCK(sk, uaddr, &addr_len); 299 } 300 301 /* Checks the bind address and possibly modifies sk->sk_bound_dev_if. */ 302 static int ping_check_bind_addr(struct sock *sk, struct inet_sock *isk, 303 struct sockaddr_unsized *uaddr, int addr_len) 304 { 305 struct net *net = sock_net(sk); 306 if (sk->sk_family == AF_INET) { 307 struct sockaddr_in *addr = (struct sockaddr_in *) uaddr; 308 u32 tb_id = RT_TABLE_LOCAL; 309 int chk_addr_ret; 310 311 if (addr_len < sizeof(*addr)) 312 return -EINVAL; 313 314 if (addr->sin_family != AF_INET && 315 !(addr->sin_family == AF_UNSPEC && 316 addr->sin_addr.s_addr == htonl(INADDR_ANY))) 317 return -EAFNOSUPPORT; 318 319 pr_debug("ping_check_bind_addr(sk=%p,addr=%pI4,port=%d)\n", 320 sk, &addr->sin_addr.s_addr, ntohs(addr->sin_port)); 321 322 if (addr->sin_addr.s_addr == htonl(INADDR_ANY)) 323 return 0; 324 325 tb_id = l3mdev_fib_table_by_index(net, sk->sk_bound_dev_if) ? : tb_id; 326 chk_addr_ret = inet_addr_type_table(net, addr->sin_addr.s_addr, tb_id); 327 328 if (chk_addr_ret == RTN_MULTICAST || 329 chk_addr_ret == RTN_BROADCAST || 330 (chk_addr_ret != RTN_LOCAL && 331 !inet_can_nonlocal_bind(net, isk))) 332 return -EADDRNOTAVAIL; 333 334 #if IS_ENABLED(CONFIG_IPV6) 335 } else if (sk->sk_family == AF_INET6) { 336 struct sockaddr_in6 *addr = (struct sockaddr_in6 *) uaddr; 337 int addr_type, scoped, has_addr; 338 struct net_device *dev = NULL; 339 340 if (addr_len < sizeof(*addr)) 341 return -EINVAL; 342 343 if (addr->sin6_family != AF_INET6) 344 return -EAFNOSUPPORT; 345 346 pr_debug("ping_check_bind_addr(sk=%p,addr=%pI6c,port=%d)\n", 347 sk, addr->sin6_addr.s6_addr, ntohs(addr->sin6_port)); 348 349 addr_type = ipv6_addr_type(&addr->sin6_addr); 350 scoped = __ipv6_addr_needs_scope_id(addr_type); 351 if ((addr_type != IPV6_ADDR_ANY && 352 !(addr_type & IPV6_ADDR_UNICAST)) || 353 (scoped && !addr->sin6_scope_id)) 354 return -EINVAL; 355 356 rcu_read_lock(); 357 if (addr->sin6_scope_id) { 358 dev = dev_get_by_index_rcu(net, addr->sin6_scope_id); 359 if (!dev) { 360 rcu_read_unlock(); 361 return -ENODEV; 362 } 363 } 364 365 if (!dev && sk->sk_bound_dev_if) { 366 dev = dev_get_by_index_rcu(net, sk->sk_bound_dev_if); 367 if (!dev) { 368 rcu_read_unlock(); 369 return -ENODEV; 370 } 371 } 372 has_addr = pingv6_ops.ipv6_chk_addr(net, &addr->sin6_addr, dev, 373 scoped); 374 rcu_read_unlock(); 375 376 if (!(ipv6_can_nonlocal_bind(net, isk) || has_addr || 377 addr_type == IPV6_ADDR_ANY)) 378 return -EADDRNOTAVAIL; 379 380 if (scoped) 381 sk->sk_bound_dev_if = addr->sin6_scope_id; 382 #endif 383 } else { 384 return -EAFNOSUPPORT; 385 } 386 return 0; 387 } 388 389 static void ping_set_saddr(struct sock *sk, struct sockaddr_unsized *saddr) 390 { 391 if (saddr->sa_family == AF_INET) { 392 struct inet_sock *isk = inet_sk(sk); 393 struct sockaddr_in *addr = (struct sockaddr_in *) saddr; 394 395 isk->inet_saddr = addr->sin_addr.s_addr; 396 WRITE_ONCE(isk->inet_rcv_saddr, addr->sin_addr.s_addr); 397 #if IS_ENABLED(CONFIG_IPV6) 398 } else if (saddr->sa_family == AF_INET6) { 399 struct sockaddr_in6 *addr = (struct sockaddr_in6 *) saddr; 400 struct ipv6_pinfo *np = inet6_sk(sk); 401 sk->sk_v6_rcv_saddr = np->saddr = addr->sin6_addr; 402 #endif 403 } 404 } 405 406 /* 407 * We need our own bind because there are no privileged id's == local ports. 408 * Moreover, we don't allow binding to multi- and broadcast addresses. 409 */ 410 411 int ping_bind(struct sock *sk, struct sockaddr_unsized *uaddr, int addr_len) 412 { 413 struct inet_sock *isk = inet_sk(sk); 414 unsigned short snum; 415 int err; 416 int dif = sk->sk_bound_dev_if; 417 418 err = ping_check_bind_addr(sk, isk, uaddr, addr_len); 419 if (err) 420 return err; 421 422 lock_sock(sk); 423 424 err = -EINVAL; 425 if (isk->inet_num != 0) 426 goto out; 427 428 err = -EADDRINUSE; 429 snum = ntohs(((struct sockaddr_in *)uaddr)->sin_port); 430 if (ping_get_port(sk, snum) != 0) { 431 /* Restore possibly modified sk->sk_bound_dev_if by ping_check_bind_addr(). */ 432 sk->sk_bound_dev_if = dif; 433 goto out; 434 } 435 ping_set_saddr(sk, uaddr); 436 437 pr_debug("after bind(): num = %hu, dif = %d\n", 438 isk->inet_num, 439 sk->sk_bound_dev_if); 440 441 err = 0; 442 if (sk->sk_family == AF_INET && isk->inet_rcv_saddr) 443 sk->sk_userlocks |= SOCK_BINDADDR_LOCK; 444 #if IS_ENABLED(CONFIG_IPV6) 445 if (sk->sk_family == AF_INET6 && !ipv6_addr_any(&sk->sk_v6_rcv_saddr)) 446 sk->sk_userlocks |= SOCK_BINDADDR_LOCK; 447 #endif 448 449 if (snum) 450 sk->sk_userlocks |= SOCK_BINDPORT_LOCK; 451 isk->inet_sport = htons(isk->inet_num); 452 isk->inet_daddr = 0; 453 isk->inet_dport = 0; 454 455 #if IS_ENABLED(CONFIG_IPV6) 456 if (sk->sk_family == AF_INET6) 457 memset(&sk->sk_v6_daddr, 0, sizeof(sk->sk_v6_daddr)); 458 #endif 459 460 sk_dst_reset(sk); 461 out: 462 release_sock(sk); 463 pr_debug("ping_v4_bind -> %d\n", err); 464 return err; 465 } 466 467 /* 468 * Is this a supported type of ICMP message? 469 */ 470 471 static inline int ping_supported(int family, int type, int code) 472 { 473 return (family == AF_INET && type == ICMP_ECHO && code == 0) || 474 (family == AF_INET && type == ICMP_EXT_ECHO && code == 0) || 475 (family == AF_INET6 && type == ICMPV6_ECHO_REQUEST && code == 0) || 476 (family == AF_INET6 && type == ICMPV6_EXT_ECHO_REQUEST && code == 0); 477 } 478 479 /* 480 * This routine is called by the ICMP module when it gets some 481 * sort of error condition. 482 */ 483 484 void ping_err(struct sk_buff *skb, int offset, u32 info) 485 { 486 int family; 487 struct icmphdr *icmph; 488 struct inet_sock *inet_sock; 489 int type; 490 int code; 491 struct net *net = dev_net(skb->dev); 492 struct sock *sk; 493 int harderr; 494 int err; 495 496 if (skb->protocol == htons(ETH_P_IP)) { 497 family = AF_INET; 498 type = icmp_hdr(skb)->type; 499 code = icmp_hdr(skb)->code; 500 icmph = (struct icmphdr *)(skb->data + offset); 501 } else if (skb->protocol == htons(ETH_P_IPV6)) { 502 family = AF_INET6; 503 type = icmp6_hdr(skb)->icmp6_type; 504 code = icmp6_hdr(skb)->icmp6_code; 505 icmph = (struct icmphdr *) (skb->data + offset); 506 } else { 507 BUG(); 508 } 509 510 /* We assume the packet has already been checked by icmp_unreach */ 511 512 if (!ping_supported(family, icmph->type, icmph->code)) 513 return; 514 515 pr_debug("ping_err(proto=0x%x,type=%d,code=%d,id=%04x,seq=%04x)\n", 516 skb->protocol, type, code, ntohs(icmph->un.echo.id), 517 ntohs(icmph->un.echo.sequence)); 518 519 sk = ping_lookup(net, skb, ntohs(icmph->un.echo.id)); 520 if (!sk) { 521 pr_debug("no socket, dropping\n"); 522 return; /* No socket for error */ 523 } 524 pr_debug("err on socket %p\n", sk); 525 526 err = 0; 527 harderr = 0; 528 inet_sock = inet_sk(sk); 529 530 if (skb->protocol == htons(ETH_P_IP)) { 531 switch (type) { 532 default: 533 case ICMP_TIME_EXCEEDED: 534 err = EHOSTUNREACH; 535 break; 536 case ICMP_SOURCE_QUENCH: 537 /* This is not a real error but ping wants to see it. 538 * Report it with some fake errno. 539 */ 540 err = EREMOTEIO; 541 break; 542 case ICMP_PARAMETERPROB: 543 err = EPROTO; 544 harderr = 1; 545 break; 546 case ICMP_DEST_UNREACH: 547 if (code == ICMP_FRAG_NEEDED) { /* Path MTU discovery */ 548 ipv4_sk_update_pmtu(skb, sk, info); 549 if (READ_ONCE(inet_sock->pmtudisc) != IP_PMTUDISC_DONT) { 550 err = EMSGSIZE; 551 harderr = 1; 552 break; 553 } 554 goto out; 555 } 556 err = EHOSTUNREACH; 557 if (code <= NR_ICMP_UNREACH) { 558 harderr = icmp_err_convert[code].fatal; 559 err = icmp_err_convert[code].errno; 560 } 561 break; 562 case ICMP_REDIRECT: 563 /* See ICMP_SOURCE_QUENCH */ 564 ipv4_sk_redirect(skb, sk); 565 err = EREMOTEIO; 566 break; 567 } 568 #if IS_ENABLED(CONFIG_IPV6) 569 } else if (skb->protocol == htons(ETH_P_IPV6)) { 570 harderr = pingv6_ops.icmpv6_err_convert(type, code, &err); 571 #endif 572 } 573 574 /* 575 * RFC1122: OK. Passes ICMP errors back to application, as per 576 * 4.1.3.3. 577 */ 578 if ((family == AF_INET && !inet_test_bit(RECVERR, sk)) || 579 (family == AF_INET6 && !inet6_test_bit(RECVERR6, sk))) { 580 if (!harderr || sk->sk_state != TCP_ESTABLISHED) 581 goto out; 582 } else { 583 if (family == AF_INET) { 584 ip_icmp_error(sk, skb, err, 0 /* no remote port */, 585 info, (u8 *)icmph); 586 #if IS_ENABLED(CONFIG_IPV6) 587 } else if (family == AF_INET6) { 588 pingv6_ops.ipv6_icmp_error(sk, skb, err, 0, 589 info, (u8 *)icmph); 590 #endif 591 } 592 } 593 sk->sk_err = err; 594 sk_error_report(sk); 595 out: 596 return; 597 } 598 599 /* 600 * Copy and checksum an ICMP Echo packet from user space into a buffer 601 * starting from the payload. 602 */ 603 604 int ping_getfrag(void *from, char *to, 605 int offset, int fraglen, int odd, struct sk_buff *skb) 606 { 607 struct pingfakehdr *pfh = from; 608 609 if (!csum_and_copy_from_iter_full(to, fraglen, &pfh->wcheck, 610 &pfh->msg->msg_iter)) 611 return -EFAULT; 612 613 #if IS_ENABLED(CONFIG_IPV6) 614 /* For IPv6, checksum each skb as we go along, as expected by 615 * icmpv6_push_pending_frames. For IPv4, accumulate the checksum in 616 * wcheck, it will be finalized in ping_v4_push_pending_frames. 617 */ 618 if (pfh->family == AF_INET6) { 619 skb->csum = csum_block_add(skb->csum, pfh->wcheck, odd); 620 skb->ip_summed = CHECKSUM_NONE; 621 pfh->wcheck = 0; 622 } 623 #endif 624 625 return 0; 626 } 627 628 static int ping_v4_push_pending_frames(struct sock *sk, struct pingfakehdr *pfh, 629 struct flowi4 *fl4) 630 { 631 struct sk_buff *skb = skb_peek(&sk->sk_write_queue); 632 633 if (!skb) 634 return 0; 635 pfh->wcheck = csum_partial((char *)&pfh->icmph, 636 sizeof(struct icmphdr), pfh->wcheck); 637 pfh->icmph.checksum = csum_fold(pfh->wcheck); 638 memcpy(icmp_hdr(skb), &pfh->icmph, sizeof(struct icmphdr)); 639 skb->ip_summed = CHECKSUM_NONE; 640 return ip_push_pending_frames(sk, fl4); 641 } 642 643 int ping_common_sendmsg(int family, struct msghdr *msg, size_t len, 644 void *user_icmph, size_t icmph_len) 645 { 646 u8 type, code; 647 648 if (len > 0xFFFF) 649 return -EMSGSIZE; 650 651 /* Must have at least a full ICMP header. */ 652 if (len < icmph_len) 653 return -EINVAL; 654 655 /* 656 * Check the flags. 657 */ 658 659 /* Mirror BSD error message compatibility */ 660 if (msg->msg_flags & MSG_OOB) 661 return -EOPNOTSUPP; 662 663 /* 664 * Fetch the ICMP header provided by the userland. 665 * iovec is modified! The ICMP header is consumed. 666 */ 667 if (memcpy_from_msg(user_icmph, msg, icmph_len)) 668 return -EFAULT; 669 670 if (family == AF_INET) { 671 type = ((struct icmphdr *) user_icmph)->type; 672 code = ((struct icmphdr *) user_icmph)->code; 673 #if IS_ENABLED(CONFIG_IPV6) 674 } else if (family == AF_INET6) { 675 type = ((struct icmp6hdr *) user_icmph)->icmp6_type; 676 code = ((struct icmp6hdr *) user_icmph)->icmp6_code; 677 #endif 678 } else { 679 BUG(); 680 } 681 682 if (!ping_supported(family, type, code)) 683 return -EINVAL; 684 685 return 0; 686 } 687 688 static int ping_v4_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) 689 { 690 DEFINE_RAW_FLEX(struct ip_options_rcu, opt_copy, opt.__data, 691 IP_OPTIONS_DATA_FIXED_SIZE); 692 struct net *net = sock_net(sk); 693 struct flowi4 fl4; 694 struct inet_sock *inet = inet_sk(sk); 695 struct ipcm_cookie ipc; 696 struct icmphdr user_icmph; 697 struct pingfakehdr pfh; 698 struct rtable *rt = NULL; 699 int free = 0; 700 __be32 saddr, daddr, faddr; 701 u8 scope; 702 int err; 703 704 pr_debug("ping_v4_sendmsg(sk=%p,sk->num=%u)\n", inet, inet->inet_num); 705 706 err = ping_common_sendmsg(AF_INET, msg, len, &user_icmph, 707 sizeof(user_icmph)); 708 if (err) 709 return err; 710 711 /* 712 * Get and verify the address. 713 */ 714 715 if (msg->msg_name) { 716 DECLARE_SOCKADDR(struct sockaddr_in *, usin, msg->msg_name); 717 if (msg->msg_namelen < sizeof(*usin)) 718 return -EINVAL; 719 if (usin->sin_family != AF_INET) 720 return -EAFNOSUPPORT; 721 daddr = usin->sin_addr.s_addr; 722 /* no remote port */ 723 } else { 724 if (sk->sk_state != TCP_ESTABLISHED) 725 return -EDESTADDRREQ; 726 daddr = inet->inet_daddr; 727 /* no remote port */ 728 } 729 730 ipcm_init_sk(&ipc, inet); 731 732 if (msg->msg_controllen) { 733 err = ip_cmsg_send(sk, msg, &ipc, false); 734 if (unlikely(err)) { 735 kfree(ipc.opt); 736 return err; 737 } 738 if (ipc.opt) 739 free = 1; 740 } 741 if (!ipc.opt) { 742 struct ip_options_rcu *inet_opt; 743 744 rcu_read_lock(); 745 inet_opt = rcu_dereference(inet->inet_opt); 746 if (inet_opt) { 747 memcpy(opt_copy, inet_opt, 748 sizeof(*inet_opt) + inet_opt->opt.optlen); 749 ipc.opt = opt_copy; 750 } 751 rcu_read_unlock(); 752 } 753 754 saddr = ipc.addr; 755 ipc.addr = faddr = daddr; 756 757 if (ipc.opt && ipc.opt->opt.srr) { 758 if (!daddr) { 759 err = -EINVAL; 760 goto out_free; 761 } 762 faddr = ipc.opt->opt.faddr; 763 } 764 scope = ip_sendmsg_scope(inet, &ipc, msg); 765 766 if (ipv4_is_multicast(daddr)) { 767 if (!ipc.oif || netif_index_is_l3_master(sock_net(sk), ipc.oif)) 768 ipc.oif = READ_ONCE(inet->mc_index); 769 if (!saddr) 770 saddr = READ_ONCE(inet->mc_addr); 771 } else if (!ipc.oif) 772 ipc.oif = READ_ONCE(inet->uc_index); 773 774 flowi4_init_output(&fl4, ipc.oif, ipc.sockc.mark, 775 ipc.tos & INET_DSCP_MASK, scope, 776 sk->sk_protocol, inet_sk_flowi_flags(sk), faddr, 777 saddr, 0, 0, sk_uid(sk)); 778 779 fl4.fl4_icmp_type = user_icmph.type; 780 fl4.fl4_icmp_code = user_icmph.code; 781 782 security_sk_classify_flow(sk, flowi4_to_flowi_common(&fl4)); 783 rt = ip_route_output_flow(net, &fl4, sk); 784 if (IS_ERR(rt)) { 785 err = PTR_ERR(rt); 786 rt = NULL; 787 if (err == -ENETUNREACH) 788 IP_INC_STATS(net, IPSTATS_MIB_OUTNOROUTES); 789 goto out; 790 } 791 792 err = -EACCES; 793 if ((rt->rt_flags & RTCF_BROADCAST) && 794 !sock_flag(sk, SOCK_BROADCAST)) 795 goto out; 796 797 if (msg->msg_flags & MSG_CONFIRM) 798 goto do_confirm; 799 back_from_confirm: 800 801 if (!ipc.addr) 802 ipc.addr = fl4.daddr; 803 804 lock_sock(sk); 805 806 pfh.icmph.type = user_icmph.type; /* already checked */ 807 pfh.icmph.code = user_icmph.code; /* ditto */ 808 pfh.icmph.checksum = 0; 809 pfh.icmph.un.echo.id = inet->inet_sport; 810 pfh.icmph.un.echo.sequence = user_icmph.un.echo.sequence; 811 pfh.msg = msg; 812 pfh.wcheck = 0; 813 pfh.family = AF_INET; 814 815 err = ip_append_data(sk, &fl4, ping_getfrag, &pfh, len, 816 sizeof(struct icmphdr), &ipc, &rt, 817 msg->msg_flags); 818 if (err) 819 ip_flush_pending_frames(sk); 820 else 821 err = ping_v4_push_pending_frames(sk, &pfh, &fl4); 822 release_sock(sk); 823 824 out: 825 ip_rt_put(rt); 826 out_free: 827 if (free) 828 kfree(ipc.opt); 829 if (!err) 830 return len; 831 return err; 832 833 do_confirm: 834 if (msg->msg_flags & MSG_PROBE) 835 dst_confirm_neigh(&rt->dst, &fl4.daddr); 836 if (!(msg->msg_flags & MSG_PROBE) || len) 837 goto back_from_confirm; 838 err = 0; 839 goto out; 840 } 841 842 int ping_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int flags) 843 { 844 struct inet_sock *isk = inet_sk(sk); 845 int family = sk->sk_family; 846 struct sk_buff *skb; 847 int copied, err; 848 849 pr_debug("ping_recvmsg(sk=%p,sk->num=%u)\n", isk, 850 READ_ONCE(isk->inet_num)); 851 852 err = -EOPNOTSUPP; 853 if (flags & MSG_OOB) 854 goto out; 855 856 if (flags & MSG_ERRQUEUE) 857 return inet_recv_error(sk, msg, len); 858 859 skb = skb_recv_datagram(sk, flags, &err); 860 if (!skb) 861 goto out; 862 863 copied = skb->len; 864 if (copied > len) { 865 msg->msg_flags |= MSG_TRUNC; 866 copied = len; 867 } 868 869 /* Don't bother checking the checksum */ 870 err = skb_copy_datagram_msg(skb, 0, msg, copied); 871 if (err) 872 goto done; 873 874 sock_recv_timestamp(msg, sk, skb); 875 876 /* Copy the address and add cmsg data. */ 877 if (family == AF_INET) { 878 DECLARE_SOCKADDR(struct sockaddr_in *, sin, msg->msg_name); 879 880 if (sin) { 881 sin->sin_family = AF_INET; 882 sin->sin_port = 0 /* skb->h.uh->source */; 883 sin->sin_addr.s_addr = ip_hdr(skb)->saddr; 884 memset(sin->sin_zero, 0, sizeof(sin->sin_zero)); 885 msg->msg_namelen = sizeof(*sin); 886 } 887 888 if (inet_cmsg_flags(isk)) 889 ip_cmsg_recv(msg, skb); 890 891 #if IS_ENABLED(CONFIG_IPV6) 892 } else if (family == AF_INET6) { 893 struct ipv6hdr *ip6 = ipv6_hdr(skb); 894 DECLARE_SOCKADDR(struct sockaddr_in6 *, sin6, msg->msg_name); 895 896 if (sin6) { 897 sin6->sin6_family = AF_INET6; 898 sin6->sin6_port = 0; 899 sin6->sin6_addr = ip6->saddr; 900 sin6->sin6_flowinfo = 0; 901 if (inet6_test_bit(SNDFLOW, sk)) 902 sin6->sin6_flowinfo = ip6_flowinfo(ip6); 903 sin6->sin6_scope_id = 904 ipv6_iface_scope_id(&sin6->sin6_addr, 905 inet6_iif(skb)); 906 msg->msg_namelen = sizeof(*sin6); 907 } 908 909 if (inet6_sk(sk)->rxopt.all) 910 pingv6_ops.ip6_datagram_recv_common_ctl(sk, msg, skb); 911 if (skb->protocol == htons(ETH_P_IPV6) && 912 inet6_sk(sk)->rxopt.all) 913 pingv6_ops.ip6_datagram_recv_specific_ctl(sk, msg, skb); 914 else if (skb->protocol == htons(ETH_P_IP) && 915 inet_cmsg_flags(isk)) 916 ip_cmsg_recv(msg, skb); 917 #endif 918 } else { 919 BUG(); 920 } 921 922 err = copied; 923 924 done: 925 skb_free_datagram(sk, skb); 926 out: 927 pr_debug("ping_recvmsg -> %d\n", err); 928 return err; 929 } 930 931 static enum skb_drop_reason __ping_queue_rcv_skb(struct sock *sk, 932 struct sk_buff *skb) 933 { 934 enum skb_drop_reason reason; 935 936 pr_debug("ping_queue_rcv_skb(sk=%p,sk->num=%d,skb=%p)\n", 937 inet_sk(sk), inet_sk(sk)->inet_num, skb); 938 reason = sock_queue_rcv_skb_reason(sk, skb); 939 if (reason) { 940 sk_skb_reason_drop(sk, skb, reason); 941 pr_debug("ping_queue_rcv_skb -> failed\n"); 942 return reason; 943 } 944 return SKB_NOT_DROPPED_YET; 945 } 946 947 int ping_queue_rcv_skb(struct sock *sk, struct sk_buff *skb) 948 { 949 return __ping_queue_rcv_skb(sk, skb) ? -1 : 0; 950 } 951 952 953 /* 954 * All we need to do is get the socket. 955 */ 956 957 enum skb_drop_reason ping_rcv(struct sk_buff *skb) 958 { 959 struct net *net = dev_net(skb->dev); 960 struct icmphdr *icmph = icmp_hdr(skb); 961 struct sock *sk; 962 963 /* We assume the packet has already been checked by icmp_rcv */ 964 965 pr_debug("ping_rcv(skb=%p,id=%04x,seq=%04x)\n", 966 skb, ntohs(icmph->un.echo.id), ntohs(icmph->un.echo.sequence)); 967 968 /* Push ICMP header back */ 969 skb_push(skb, skb->data - (u8 *)icmph); 970 971 sk = ping_lookup(net, skb, ntohs(icmph->un.echo.id)); 972 if (sk) 973 return __ping_queue_rcv_skb(sk, skb); 974 975 kfree_skb_reason(skb, SKB_DROP_REASON_NO_SOCKET); 976 return SKB_DROP_REASON_NO_SOCKET; 977 } 978 979 struct proto ping_prot = { 980 .name = "PING", 981 .owner = THIS_MODULE, 982 .init = ping_init_sock, 983 .close = ping_close, 984 .pre_connect = ping_pre_connect, 985 .connect = ip4_datagram_connect, 986 .disconnect = __udp_disconnect, 987 .setsockopt = ip_setsockopt, 988 .getsockopt = ip_getsockopt, 989 .sendmsg = ping_v4_sendmsg, 990 .recvmsg = ping_recvmsg, 991 .bind = ping_bind, 992 .backlog_rcv = ping_queue_rcv_skb, 993 .release_cb = ip4_datagram_release_cb, 994 .unhash = ping_unhash, 995 .get_port = ping_get_port, 996 .put_port = ping_unhash, 997 .obj_size = sizeof(struct inet_sock), 998 }; 999 1000 #ifdef CONFIG_PROC_FS 1001 1002 static struct sock *ping_get_first(struct seq_file *seq, int start) 1003 { 1004 struct sock *sk; 1005 struct ping_iter_state *state = seq->private; 1006 struct net *net = seq_file_net(seq); 1007 1008 for (state->bucket = start; state->bucket < PING_HTABLE_SIZE; 1009 ++state->bucket) { 1010 struct hlist_head *hslot; 1011 1012 hslot = &ping_table.hash[state->bucket]; 1013 1014 if (hlist_empty(hslot)) 1015 continue; 1016 1017 sk_for_each(sk, hslot) { 1018 if (net_eq(sock_net(sk), net) && 1019 sk->sk_family == state->family) 1020 goto found; 1021 } 1022 } 1023 sk = NULL; 1024 found: 1025 return sk; 1026 } 1027 1028 static struct sock *ping_get_next(struct seq_file *seq, struct sock *sk) 1029 { 1030 struct ping_iter_state *state = seq->private; 1031 struct net *net = seq_file_net(seq); 1032 1033 do { 1034 sk = sk_next(sk); 1035 } while (sk && (!net_eq(sock_net(sk), net))); 1036 1037 if (!sk) 1038 return ping_get_first(seq, state->bucket + 1); 1039 return sk; 1040 } 1041 1042 static struct sock *ping_get_idx(struct seq_file *seq, loff_t pos) 1043 { 1044 struct sock *sk = ping_get_first(seq, 0); 1045 1046 if (sk) 1047 while (pos && (sk = ping_get_next(seq, sk)) != NULL) 1048 --pos; 1049 return pos ? NULL : sk; 1050 } 1051 1052 void *ping_seq_start(struct seq_file *seq, loff_t *pos, sa_family_t family) 1053 __acquires(ping_table.lock) 1054 { 1055 struct ping_iter_state *state = seq->private; 1056 state->bucket = 0; 1057 state->family = family; 1058 1059 spin_lock(&ping_table.lock); 1060 1061 return *pos ? ping_get_idx(seq, *pos-1) : SEQ_START_TOKEN; 1062 } 1063 1064 static void *ping_v4_seq_start(struct seq_file *seq, loff_t *pos) 1065 { 1066 return ping_seq_start(seq, pos, AF_INET); 1067 } 1068 1069 void *ping_seq_next(struct seq_file *seq, void *v, loff_t *pos) 1070 { 1071 struct sock *sk; 1072 1073 if (v == SEQ_START_TOKEN) 1074 sk = ping_get_idx(seq, 0); 1075 else 1076 sk = ping_get_next(seq, v); 1077 1078 ++*pos; 1079 return sk; 1080 } 1081 1082 void ping_seq_stop(struct seq_file *seq, void *v) 1083 __releases(ping_table.lock) 1084 { 1085 spin_unlock(&ping_table.lock); 1086 } 1087 1088 static void ping_v4_format_sock(struct sock *sp, struct seq_file *f, 1089 int bucket) 1090 { 1091 struct inet_sock *inet = inet_sk(sp); 1092 __be32 dest = inet->inet_daddr; 1093 __be32 src = inet->inet_rcv_saddr; 1094 __u16 destp = ntohs(inet->inet_dport); 1095 __u16 srcp = ntohs(inet->inet_sport); 1096 1097 seq_printf(f, "%5d: %08X:%04X %08X:%04X" 1098 " %02X %08X:%08X %02X:%08lX %08X %5u %8d %llu %d %pK %u", 1099 bucket, src, srcp, dest, destp, sp->sk_state, 1100 sk_wmem_alloc_get(sp), 1101 sk_rmem_alloc_get(sp), 1102 0, 0L, 0, 1103 from_kuid_munged(seq_user_ns(f), sk_uid(sp)), 1104 0, sock_i_ino(sp), 1105 refcount_read(&sp->sk_refcnt), sp, 1106 sk_drops_read(sp)); 1107 } 1108 1109 static int ping_v4_seq_show(struct seq_file *seq, void *v) 1110 { 1111 seq_setwidth(seq, 127); 1112 if (v == SEQ_START_TOKEN) 1113 seq_puts(seq, " sl local_address rem_address st tx_queue " 1114 "rx_queue tr tm->when retrnsmt uid timeout " 1115 "inode ref pointer drops"); 1116 else { 1117 struct ping_iter_state *state = seq->private; 1118 1119 ping_v4_format_sock(v, seq, state->bucket); 1120 } 1121 seq_pad(seq, '\n'); 1122 return 0; 1123 } 1124 1125 static const struct seq_operations ping_v4_seq_ops = { 1126 .start = ping_v4_seq_start, 1127 .show = ping_v4_seq_show, 1128 .next = ping_seq_next, 1129 .stop = ping_seq_stop, 1130 }; 1131 1132 static int __net_init ping_v4_proc_init_net(struct net *net) 1133 { 1134 if (!proc_create_net("icmp", 0444, net->proc_net, &ping_v4_seq_ops, 1135 sizeof(struct ping_iter_state))) 1136 return -ENOMEM; 1137 1138 net->ipv4.ping_port_rover = get_random_u16(); 1139 return 0; 1140 } 1141 1142 static void __net_exit ping_v4_proc_exit_net(struct net *net) 1143 { 1144 remove_proc_entry("icmp", net->proc_net); 1145 } 1146 1147 static struct pernet_operations ping_v4_net_ops = { 1148 .init = ping_v4_proc_init_net, 1149 .exit = ping_v4_proc_exit_net, 1150 }; 1151 1152 int __init ping_proc_init(void) 1153 { 1154 return register_pernet_subsys(&ping_v4_net_ops); 1155 } 1156 1157 void ping_proc_exit(void) 1158 { 1159 unregister_pernet_subsys(&ping_v4_net_ops); 1160 } 1161 1162 #endif 1163 1164 void __init ping_init(void) 1165 { 1166 int i; 1167 1168 for (i = 0; i < PING_HTABLE_SIZE; i++) 1169 INIT_HLIST_HEAD(&ping_table.hash[i]); 1170 spin_lock_init(&ping_table.lock); 1171 } 1172