1 // SPDX-License-Identifier: GPL-2.0-or-later 2 /* 3 * tcp_diag.c Module for monitoring TCP transport protocols sockets. 4 * 5 * Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru> 6 */ 7 8 #include <linux/module.h> 9 #include <linux/net.h> 10 #include <linux/sock_diag.h> 11 #include <linux/inet_diag.h> 12 13 #include <linux/tcp.h> 14 15 #include <net/inet_hashtables.h> 16 #include <net/inet6_hashtables.h> 17 #include <net/inet_timewait_sock.h> 18 #include <net/netlink.h> 19 #include <net/tcp.h> 20 21 static void tcp_diag_get_info(struct sock *sk, struct inet_diag_msg *r, 22 void *_info) 23 { 24 struct tcp_info *info = _info; 25 26 if (inet_sk_state_load(sk) == TCP_LISTEN) { 27 r->idiag_rqueue = READ_ONCE(sk->sk_ack_backlog); 28 r->idiag_wqueue = READ_ONCE(sk->sk_max_ack_backlog); 29 } else if (sk->sk_type == SOCK_STREAM) { 30 const struct tcp_sock *tp = tcp_sk(sk); 31 32 r->idiag_rqueue = max_t(int, READ_ONCE(tp->rcv_nxt) - 33 READ_ONCE(tp->copied_seq), 0); 34 r->idiag_wqueue = READ_ONCE(tp->write_seq) - tp->snd_una; 35 } 36 if (info) 37 tcp_get_info(sk, info); 38 } 39 40 #ifdef CONFIG_TCP_MD5SIG 41 static void tcp_diag_md5sig_fill(struct tcp_diag_md5sig *info, 42 const struct tcp_md5sig_key *key) 43 { 44 info->tcpm_family = key->family; 45 info->tcpm_prefixlen = key->prefixlen; 46 info->tcpm_keylen = key->keylen; 47 memcpy(info->tcpm_key, key->key, key->keylen); 48 49 if (key->family == AF_INET) 50 info->tcpm_addr[0] = key->addr.a4.s_addr; 51 #if IS_ENABLED(CONFIG_IPV6) 52 else if (key->family == AF_INET6) 53 memcpy(&info->tcpm_addr, &key->addr.a6, 54 sizeof(info->tcpm_addr)); 55 #endif 56 } 57 58 static int tcp_diag_put_md5sig(struct sk_buff *skb, 59 const struct tcp_md5sig_info *md5sig) 60 { 61 const struct tcp_md5sig_key *key; 62 struct tcp_diag_md5sig *info; 63 struct nlattr *attr; 64 int md5sig_count = 0; 65 66 hlist_for_each_entry_rcu(key, &md5sig->head, node) 67 md5sig_count++; 68 if (md5sig_count == 0) 69 return 0; 70 71 attr = nla_reserve(skb, INET_DIAG_MD5SIG, 72 md5sig_count * sizeof(struct tcp_diag_md5sig)); 73 if (!attr) 74 return -EMSGSIZE; 75 76 info = nla_data(attr); 77 memset(info, 0, md5sig_count * sizeof(struct tcp_diag_md5sig)); 78 hlist_for_each_entry_rcu(key, &md5sig->head, node) { 79 tcp_diag_md5sig_fill(info++, key); 80 if (--md5sig_count == 0) 81 break; 82 } 83 84 return 0; 85 } 86 #endif 87 88 static int tcp_diag_put_ulp(struct sk_buff *skb, struct sock *sk, 89 const struct tcp_ulp_ops *ulp_ops, bool net_admin) 90 { 91 struct nlattr *nest; 92 int err; 93 94 nest = nla_nest_start_noflag(skb, INET_DIAG_ULP_INFO); 95 if (!nest) 96 return -EMSGSIZE; 97 98 err = nla_put_string(skb, INET_ULP_INFO_NAME, ulp_ops->name); 99 if (err) 100 goto nla_failure; 101 102 if (ulp_ops->get_info) 103 err = ulp_ops->get_info(sk, skb, net_admin); 104 if (err) 105 goto nla_failure; 106 107 nla_nest_end(skb, nest); 108 return 0; 109 110 nla_failure: 111 nla_nest_cancel(skb, nest); 112 return err; 113 } 114 115 static int tcp_diag_get_aux(struct sock *sk, bool net_admin, 116 struct sk_buff *skb) 117 { 118 struct inet_connection_sock *icsk = inet_csk(sk); 119 const struct tcp_ulp_ops *ulp_ops; 120 int err = 0; 121 122 #ifdef CONFIG_TCP_MD5SIG 123 if (net_admin) { 124 struct tcp_md5sig_info *md5sig; 125 126 rcu_read_lock(); 127 md5sig = rcu_dereference(tcp_sk(sk)->md5sig_info); 128 if (md5sig) 129 err = tcp_diag_put_md5sig(skb, md5sig); 130 rcu_read_unlock(); 131 if (err < 0) 132 return err; 133 } 134 #endif 135 136 ulp_ops = icsk->icsk_ulp_ops; 137 if (ulp_ops) { 138 err = tcp_diag_put_ulp(skb, sk, ulp_ops, net_admin); 139 if (err < 0) 140 return err; 141 } 142 143 return 0; 144 } 145 146 static size_t tcp_diag_get_aux_size(struct sock *sk, bool net_admin) 147 { 148 struct inet_connection_sock *icsk = inet_csk(sk); 149 size_t size = 0; 150 151 #ifdef CONFIG_TCP_MD5SIG 152 if (net_admin && sk_fullsock(sk)) { 153 const struct tcp_md5sig_info *md5sig; 154 const struct tcp_md5sig_key *key; 155 size_t md5sig_count = 0; 156 157 rcu_read_lock(); 158 md5sig = rcu_dereference(tcp_sk(sk)->md5sig_info); 159 if (md5sig) { 160 hlist_for_each_entry_rcu(key, &md5sig->head, node) 161 md5sig_count++; 162 } 163 rcu_read_unlock(); 164 size += nla_total_size(md5sig_count * 165 sizeof(struct tcp_diag_md5sig)); 166 } 167 #endif 168 169 if (sk_fullsock(sk)) { 170 const struct tcp_ulp_ops *ulp_ops; 171 172 ulp_ops = icsk->icsk_ulp_ops; 173 if (ulp_ops) { 174 size += nla_total_size(0) + 175 nla_total_size(TCP_ULP_NAME_MAX); 176 if (ulp_ops->get_info_size) 177 size += ulp_ops->get_info_size(sk, net_admin); 178 } 179 } 180 181 return size 182 + nla_total_size(sizeof(struct tcp_info)) 183 + nla_total_size(sizeof(struct inet_diag_msg)) 184 + inet_diag_msg_attrs_size() 185 + nla_total_size(sizeof(struct inet_diag_meminfo)) 186 + nla_total_size(SK_MEMINFO_VARS * sizeof(u32)) 187 + nla_total_size(TCP_CA_NAME_MAX) 188 + nla_total_size(sizeof(struct tcpvegas_info)) 189 + 64; 190 } 191 192 static int tcp_twsk_diag_fill(struct sock *sk, 193 struct sk_buff *skb, 194 struct netlink_callback *cb, 195 u16 nlmsg_flags, bool net_admin) 196 { 197 struct inet_timewait_sock *tw = inet_twsk(sk); 198 struct inet_diag_msg *r; 199 struct nlmsghdr *nlh; 200 long tmo; 201 202 nlh = nlmsg_put(skb, NETLINK_CB(cb->skb).portid, 203 cb->nlh->nlmsg_seq, cb->nlh->nlmsg_type, 204 sizeof(*r), nlmsg_flags); 205 if (!nlh) 206 return -EMSGSIZE; 207 208 r = nlmsg_data(nlh); 209 DEBUG_NET_WARN_ON_ONCE(tw->tw_state != TCP_TIME_WAIT); 210 211 inet_diag_msg_common_fill(r, sk); 212 r->idiag_retrans = 0; 213 214 r->idiag_state = READ_ONCE(tw->tw_substate); 215 r->idiag_timer = 3; 216 tmo = tw->tw_timer.expires - jiffies; 217 r->idiag_expires = jiffies_delta_to_msecs(tmo); 218 r->idiag_rqueue = 0; 219 r->idiag_wqueue = 0; 220 r->idiag_uid = 0; 221 r->idiag_inode = 0; 222 223 if (net_admin && nla_put_u32(skb, INET_DIAG_MARK, 224 tw->tw_mark)) { 225 nlmsg_cancel(skb, nlh); 226 return -EMSGSIZE; 227 } 228 229 nlmsg_end(skb, nlh); 230 return 0; 231 } 232 233 static int tcp_req_diag_fill(struct sock *sk, struct sk_buff *skb, 234 struct netlink_callback *cb, 235 u16 nlmsg_flags, bool net_admin) 236 { 237 struct request_sock *reqsk = inet_reqsk(sk); 238 struct inet_diag_msg *r; 239 struct nlmsghdr *nlh; 240 long tmo; 241 242 nlh = nlmsg_put(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, 243 cb->nlh->nlmsg_type, sizeof(*r), nlmsg_flags); 244 if (!nlh) 245 return -EMSGSIZE; 246 247 r = nlmsg_data(nlh); 248 inet_diag_msg_common_fill(r, sk); 249 r->idiag_state = TCP_SYN_RECV; 250 r->idiag_timer = 1; 251 r->idiag_retrans = READ_ONCE(reqsk->num_retrans); 252 253 BUILD_BUG_ON(offsetof(struct inet_request_sock, ir_cookie) != 254 offsetof(struct sock, sk_cookie)); 255 256 tmo = READ_ONCE(inet_reqsk(sk)->rsk_timer.expires) - jiffies; 257 r->idiag_expires = jiffies_delta_to_msecs(tmo); 258 r->idiag_rqueue = 0; 259 r->idiag_wqueue = 0; 260 r->idiag_uid = 0; 261 r->idiag_inode = 0; 262 263 if (net_admin && nla_put_u32(skb, INET_DIAG_MARK, 264 inet_rsk(reqsk)->ir_mark)) { 265 nlmsg_cancel(skb, nlh); 266 return -EMSGSIZE; 267 } 268 269 nlmsg_end(skb, nlh); 270 return 0; 271 } 272 273 static int sk_diag_fill(struct sock *sk, struct sk_buff *skb, 274 struct netlink_callback *cb, 275 const struct inet_diag_req_v2 *r, 276 u16 nlmsg_flags, bool net_admin) 277 { 278 if (sk->sk_state == TCP_TIME_WAIT) 279 return tcp_twsk_diag_fill(sk, skb, cb, nlmsg_flags, net_admin); 280 281 if (sk->sk_state == TCP_NEW_SYN_RECV) 282 return tcp_req_diag_fill(sk, skb, cb, nlmsg_flags, net_admin); 283 284 return inet_sk_diag_fill(sk, inet_csk(sk), skb, cb, r, nlmsg_flags, 285 net_admin); 286 } 287 288 static void twsk_build_assert(void) 289 { 290 BUILD_BUG_ON(offsetof(struct inet_timewait_sock, tw_family) != 291 offsetof(struct sock, sk_family)); 292 293 BUILD_BUG_ON(offsetof(struct inet_timewait_sock, tw_num) != 294 offsetof(struct inet_sock, inet_num)); 295 296 BUILD_BUG_ON(offsetof(struct inet_timewait_sock, tw_dport) != 297 offsetof(struct inet_sock, inet_dport)); 298 299 BUILD_BUG_ON(offsetof(struct inet_timewait_sock, tw_rcv_saddr) != 300 offsetof(struct inet_sock, inet_rcv_saddr)); 301 302 BUILD_BUG_ON(offsetof(struct inet_timewait_sock, tw_daddr) != 303 offsetof(struct inet_sock, inet_daddr)); 304 305 #if IS_ENABLED(CONFIG_IPV6) 306 BUILD_BUG_ON(offsetof(struct inet_timewait_sock, tw_v6_rcv_saddr) != 307 offsetof(struct sock, sk_v6_rcv_saddr)); 308 309 BUILD_BUG_ON(offsetof(struct inet_timewait_sock, tw_v6_daddr) != 310 offsetof(struct sock, sk_v6_daddr)); 311 #endif 312 } 313 314 static void tcp_diag_dump(struct sk_buff *skb, struct netlink_callback *cb, 315 const struct inet_diag_req_v2 *r) 316 { 317 bool net_admin = netlink_net_capable(cb->skb, CAP_NET_ADMIN); 318 struct inet_diag_dump_data *cb_data = cb->data; 319 struct net *net = sock_net(skb->sk); 320 u32 idiag_states = r->idiag_states; 321 struct inet_hashinfo *hashinfo; 322 int i, num, s_i, s_num; 323 struct sock *sk; 324 325 hashinfo = net->ipv4.tcp_death_row.hashinfo; 326 if (idiag_states & TCPF_SYN_RECV) 327 idiag_states |= TCPF_NEW_SYN_RECV; 328 s_i = cb->args[1]; 329 s_num = num = cb->args[2]; 330 331 if (cb->args[0] == 0) { 332 if (!(idiag_states & TCPF_LISTEN) || r->id.idiag_dport) 333 goto skip_listen_ht; 334 335 for (i = s_i; i <= hashinfo->lhash2_mask; i++) { 336 struct inet_listen_hashbucket *ilb; 337 struct hlist_nulls_node *node; 338 339 num = 0; 340 ilb = &hashinfo->lhash2[i]; 341 342 if (hlist_nulls_empty(&ilb->nulls_head)) { 343 s_num = 0; 344 continue; 345 } 346 spin_lock(&ilb->lock); 347 sk_nulls_for_each(sk, node, &ilb->nulls_head) { 348 struct inet_sock *inet = inet_sk(sk); 349 350 if (!net_eq(sock_net(sk), net)) 351 continue; 352 353 if (num < s_num) { 354 num++; 355 continue; 356 } 357 358 if (r->sdiag_family != AF_UNSPEC && 359 sk->sk_family != r->sdiag_family) 360 goto next_listen; 361 362 if (r->id.idiag_sport != inet->inet_sport && 363 r->id.idiag_sport) 364 goto next_listen; 365 366 if (!inet_diag_bc_sk(cb_data, sk)) 367 goto next_listen; 368 369 if (inet_sk_diag_fill(sk, inet_csk(sk), skb, 370 cb, r, NLM_F_MULTI, 371 net_admin) < 0) { 372 spin_unlock(&ilb->lock); 373 goto done; 374 } 375 376 next_listen: 377 ++num; 378 } 379 spin_unlock(&ilb->lock); 380 381 s_num = 0; 382 } 383 skip_listen_ht: 384 cb->args[0] = 1; 385 s_i = num = s_num = 0; 386 } 387 388 /* Process a maximum of SKARR_SZ sockets at a time when walking hash buckets 389 * with bh disabled. 390 */ 391 #define SKARR_SZ 16 392 393 /* Dump bound but inactive (not listening, connecting, etc.) sockets */ 394 if (cb->args[0] == 1) { 395 if (!(idiag_states & TCPF_BOUND_INACTIVE)) 396 goto skip_bind_ht; 397 398 for (i = s_i; i < hashinfo->bhash_size; i++) { 399 struct inet_bind_hashbucket *ibb; 400 struct inet_bind2_bucket *tb2; 401 struct sock *sk_arr[SKARR_SZ]; 402 int num_arr[SKARR_SZ]; 403 int idx, accum, res; 404 405 resume_bind_walk: 406 num = 0; 407 accum = 0; 408 ibb = &hashinfo->bhash2[i]; 409 410 if (hlist_empty(&ibb->chain)) { 411 s_num = 0; 412 continue; 413 } 414 spin_lock_bh(&ibb->lock); 415 inet_bind_bucket_for_each(tb2, &ibb->chain) { 416 if (!net_eq(ib2_net(tb2), net)) 417 continue; 418 419 sk_for_each_bound(sk, &tb2->owners) { 420 struct inet_sock *inet = inet_sk(sk); 421 422 if (num < s_num) 423 goto next_bind; 424 425 if (sk->sk_state != TCP_CLOSE || 426 !inet->inet_num) 427 goto next_bind; 428 429 if (r->sdiag_family != AF_UNSPEC && 430 r->sdiag_family != sk->sk_family) 431 goto next_bind; 432 433 if (!inet_diag_bc_sk(cb_data, sk)) 434 goto next_bind; 435 436 sock_hold(sk); 437 num_arr[accum] = num; 438 sk_arr[accum] = sk; 439 if (++accum == SKARR_SZ) 440 goto pause_bind_walk; 441 next_bind: 442 num++; 443 } 444 } 445 pause_bind_walk: 446 spin_unlock_bh(&ibb->lock); 447 448 res = 0; 449 for (idx = 0; idx < accum; idx++) { 450 if (res >= 0) { 451 res = inet_sk_diag_fill(sk_arr[idx], 452 NULL, skb, cb, 453 r, NLM_F_MULTI, 454 net_admin); 455 if (res < 0) 456 num = num_arr[idx]; 457 } 458 sock_put(sk_arr[idx]); 459 } 460 if (res < 0) 461 goto done; 462 463 cond_resched(); 464 465 if (accum == SKARR_SZ) { 466 s_num = num + 1; 467 goto resume_bind_walk; 468 } 469 470 s_num = 0; 471 } 472 skip_bind_ht: 473 cb->args[0] = 2; 474 s_i = num = s_num = 0; 475 } 476 477 if (!(idiag_states & ~TCPF_LISTEN)) 478 goto out; 479 480 for (i = s_i; i <= hashinfo->ehash_mask; i++) { 481 struct inet_ehash_bucket *head = &hashinfo->ehash[i]; 482 spinlock_t *lock = inet_ehash_lockp(hashinfo, i); 483 struct hlist_nulls_node *node; 484 struct sock *sk_arr[SKARR_SZ]; 485 int num_arr[SKARR_SZ]; 486 int idx, accum, res; 487 488 if (hlist_nulls_empty(&head->chain)) 489 continue; 490 491 if (i > s_i) 492 s_num = 0; 493 494 next_chunk: 495 num = 0; 496 accum = 0; 497 spin_lock_bh(lock); 498 sk_nulls_for_each(sk, node, &head->chain) { 499 int state; 500 501 if (!net_eq(sock_net(sk), net)) 502 continue; 503 if (num < s_num) 504 goto next_normal; 505 state = (sk->sk_state == TCP_TIME_WAIT) ? 506 READ_ONCE(inet_twsk(sk)->tw_substate) : sk->sk_state; 507 if (!(idiag_states & (1 << state))) 508 goto next_normal; 509 if (r->sdiag_family != AF_UNSPEC && 510 sk->sk_family != r->sdiag_family) 511 goto next_normal; 512 if (r->id.idiag_sport != htons(sk->sk_num) && 513 r->id.idiag_sport) 514 goto next_normal; 515 if (r->id.idiag_dport != sk->sk_dport && 516 r->id.idiag_dport) 517 goto next_normal; 518 twsk_build_assert(); 519 520 if (!inet_diag_bc_sk(cb_data, sk)) 521 goto next_normal; 522 523 if (!refcount_inc_not_zero(&sk->sk_refcnt)) 524 goto next_normal; 525 526 num_arr[accum] = num; 527 sk_arr[accum] = sk; 528 if (++accum == SKARR_SZ) 529 break; 530 next_normal: 531 ++num; 532 } 533 spin_unlock_bh(lock); 534 535 res = 0; 536 for (idx = 0; idx < accum; idx++) { 537 if (res >= 0) { 538 res = sk_diag_fill(sk_arr[idx], skb, cb, r, 539 NLM_F_MULTI, net_admin); 540 if (res < 0) 541 num = num_arr[idx]; 542 } 543 sock_gen_put(sk_arr[idx]); 544 } 545 if (res < 0) 546 break; 547 548 cond_resched(); 549 550 if (accum == SKARR_SZ) { 551 s_num = num + 1; 552 goto next_chunk; 553 } 554 } 555 556 done: 557 cb->args[1] = i; 558 cb->args[2] = num; 559 out: 560 ; 561 } 562 563 static struct sock *tcp_diag_find_one_icsk(struct net *net, 564 const struct inet_diag_req_v2 *req) 565 { 566 struct sock *sk; 567 568 rcu_read_lock(); 569 if (req->sdiag_family == AF_INET) { 570 sk = inet_lookup(net, NULL, 0, req->id.idiag_dst[0], 571 req->id.idiag_dport, req->id.idiag_src[0], 572 req->id.idiag_sport, req->id.idiag_if); 573 #if IS_ENABLED(CONFIG_IPV6) 574 } else if (req->sdiag_family == AF_INET6) { 575 if (ipv6_addr_v4mapped((struct in6_addr *)req->id.idiag_dst) && 576 ipv6_addr_v4mapped((struct in6_addr *)req->id.idiag_src)) 577 sk = inet_lookup(net, NULL, 0, req->id.idiag_dst[3], 578 req->id.idiag_dport, req->id.idiag_src[3], 579 req->id.idiag_sport, req->id.idiag_if); 580 else 581 sk = inet6_lookup(net, NULL, 0, 582 (struct in6_addr *)req->id.idiag_dst, 583 req->id.idiag_dport, 584 (struct in6_addr *)req->id.idiag_src, 585 req->id.idiag_sport, 586 req->id.idiag_if); 587 #endif 588 } else { 589 rcu_read_unlock(); 590 return ERR_PTR(-EINVAL); 591 } 592 rcu_read_unlock(); 593 if (!sk) 594 return ERR_PTR(-ENOENT); 595 596 if (sock_diag_check_cookie(sk, req->id.idiag_cookie)) { 597 sock_gen_put(sk); 598 return ERR_PTR(-ENOENT); 599 } 600 601 return sk; 602 } 603 604 static int tcp_diag_dump_one(struct netlink_callback *cb, 605 const struct inet_diag_req_v2 *req) 606 { 607 struct sk_buff *in_skb = cb->skb; 608 struct sk_buff *rep; 609 struct sock *sk; 610 struct net *net; 611 bool net_admin; 612 int err; 613 614 net = sock_net(in_skb->sk); 615 sk = tcp_diag_find_one_icsk(net, req); 616 if (IS_ERR(sk)) 617 return PTR_ERR(sk); 618 619 net_admin = netlink_net_capable(in_skb, CAP_NET_ADMIN); 620 rep = nlmsg_new(tcp_diag_get_aux_size(sk, net_admin), GFP_KERNEL); 621 if (!rep) { 622 err = -ENOMEM; 623 goto out; 624 } 625 626 err = sk_diag_fill(sk, rep, cb, req, 0, net_admin); 627 if (err < 0) { 628 WARN_ON(err == -EMSGSIZE); 629 nlmsg_free(rep); 630 goto out; 631 } 632 err = nlmsg_unicast(net->diag_nlsk, rep, NETLINK_CB(in_skb).portid); 633 634 out: 635 if (sk) 636 sock_gen_put(sk); 637 638 return err; 639 } 640 641 #ifdef CONFIG_INET_DIAG_DESTROY 642 static int tcp_diag_destroy(struct sk_buff *in_skb, 643 const struct inet_diag_req_v2 *req) 644 { 645 struct net *net = sock_net(in_skb->sk); 646 struct sock *sk; 647 int err; 648 649 sk = tcp_diag_find_one_icsk(net, req); 650 if (IS_ERR(sk)) 651 return PTR_ERR(sk); 652 653 err = sock_diag_destroy(sk, ECONNABORTED); 654 655 sock_gen_put(sk); 656 657 return err; 658 } 659 #endif 660 661 static const struct inet_diag_handler tcp_diag_handler = { 662 .owner = THIS_MODULE, 663 .dump = tcp_diag_dump, 664 .dump_one = tcp_diag_dump_one, 665 .idiag_get_info = tcp_diag_get_info, 666 .idiag_get_aux = tcp_diag_get_aux, 667 .idiag_type = IPPROTO_TCP, 668 .idiag_info_size = sizeof(struct tcp_info), 669 #ifdef CONFIG_INET_DIAG_DESTROY 670 .destroy = tcp_diag_destroy, 671 #endif 672 }; 673 674 static int __init tcp_diag_init(void) 675 { 676 return inet_diag_register(&tcp_diag_handler); 677 } 678 679 static void __exit tcp_diag_exit(void) 680 { 681 inet_diag_unregister(&tcp_diag_handler); 682 } 683 684 module_init(tcp_diag_init); 685 module_exit(tcp_diag_exit); 686 MODULE_LICENSE("GPL"); 687 MODULE_DESCRIPTION("TCP socket monitoring via SOCK_DIAG"); 688 MODULE_ALIAS_NET_PF_PROTO_TYPE(PF_NETLINK, NETLINK_SOCK_DIAG, 2-6 /* AF_INET - IPPROTO_TCP */); 689