1 // SPDX-License-Identifier: GPL-2.0-or-later 2 /* 3 * tcp_diag.c Module for monitoring TCP transport protocols sockets. 4 * 5 * Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru> 6 */ 7 8 #include <linux/module.h> 9 #include <linux/net.h> 10 #include <linux/sock_diag.h> 11 #include <linux/inet_diag.h> 12 13 #include <linux/tcp.h> 14 15 #include <net/inet_hashtables.h> 16 #include <net/inet6_hashtables.h> 17 #include <net/inet_timewait_sock.h> 18 #include <net/netlink.h> 19 #include <net/tcp.h> 20 21 static void tcp_diag_get_info(struct sock *sk, struct inet_diag_msg *r, 22 void *_info) 23 { 24 struct tcp_info *info = _info; 25 26 if (inet_sk_state_load(sk) == TCP_LISTEN) { 27 r->idiag_rqueue = READ_ONCE(sk->sk_ack_backlog); 28 r->idiag_wqueue = READ_ONCE(sk->sk_max_ack_backlog); 29 } else if (sk->sk_type == SOCK_STREAM) { 30 const struct tcp_sock *tp = tcp_sk(sk); 31 32 r->idiag_rqueue = max_t(int, READ_ONCE(tp->rcv_nxt) - 33 READ_ONCE(tp->copied_seq), 0); 34 r->idiag_wqueue = READ_ONCE(tp->write_seq) - tp->snd_una; 35 } 36 if (info) 37 tcp_get_info(sk, info); 38 } 39 40 #ifdef CONFIG_TCP_MD5SIG 41 static void tcp_diag_md5sig_fill(struct tcp_diag_md5sig *info, 42 const struct tcp_md5sig_key *key) 43 { 44 info->tcpm_family = key->family; 45 info->tcpm_prefixlen = key->prefixlen; 46 info->tcpm_keylen = key->keylen; 47 memcpy(info->tcpm_key, key->key, key->keylen); 48 49 if (key->family == AF_INET) 50 info->tcpm_addr[0] = key->addr.a4.s_addr; 51 #if IS_ENABLED(CONFIG_IPV6) 52 else if (key->family == AF_INET6) 53 memcpy(&info->tcpm_addr, &key->addr.a6, 54 sizeof(info->tcpm_addr)); 55 #endif 56 } 57 58 static int tcp_diag_put_md5sig(struct sk_buff *skb, 59 const struct tcp_md5sig_info *md5sig) 60 { 61 const struct tcp_md5sig_key *key; 62 struct tcp_diag_md5sig *info; 63 struct nlattr *attr; 64 int md5sig_count = 0; 65 66 hlist_for_each_entry_rcu(key, &md5sig->head, node) 67 md5sig_count++; 68 if (md5sig_count == 0) 69 return 0; 70 71 attr = nla_reserve(skb, INET_DIAG_MD5SIG, 72 md5sig_count * sizeof(struct tcp_diag_md5sig)); 73 if (!attr) 74 return -EMSGSIZE; 75 76 info = nla_data(attr); 77 memset(info, 0, md5sig_count * sizeof(struct tcp_diag_md5sig)); 78 hlist_for_each_entry_rcu(key, &md5sig->head, node) { 79 tcp_diag_md5sig_fill(info++, key); 80 if (--md5sig_count == 0) 81 break; 82 } 83 84 return 0; 85 } 86 #endif 87 88 static int tcp_diag_put_ulp(struct sk_buff *skb, struct sock *sk, 89 const struct tcp_ulp_ops *ulp_ops, bool net_admin) 90 { 91 struct nlattr *nest; 92 int err; 93 94 nest = nla_nest_start_noflag(skb, INET_DIAG_ULP_INFO); 95 if (!nest) 96 return -EMSGSIZE; 97 98 err = nla_put_string(skb, INET_ULP_INFO_NAME, ulp_ops->name); 99 if (err) 100 goto nla_failure; 101 102 if (ulp_ops->get_info) 103 err = ulp_ops->get_info(sk, skb, net_admin); 104 if (err) 105 goto nla_failure; 106 107 nla_nest_end(skb, nest); 108 return 0; 109 110 nla_failure: 111 nla_nest_cancel(skb, nest); 112 return err; 113 } 114 115 static int tcp_diag_get_aux(struct sock *sk, bool net_admin, 116 struct sk_buff *skb) 117 { 118 struct inet_connection_sock *icsk = inet_csk(sk); 119 const struct tcp_ulp_ops *ulp_ops; 120 int err = 0; 121 122 #ifdef CONFIG_TCP_MD5SIG 123 if (net_admin) { 124 struct tcp_md5sig_info *md5sig; 125 126 rcu_read_lock(); 127 md5sig = rcu_dereference(tcp_sk(sk)->md5sig_info); 128 if (md5sig) 129 err = tcp_diag_put_md5sig(skb, md5sig); 130 rcu_read_unlock(); 131 if (err < 0) 132 return err; 133 } 134 #endif 135 136 ulp_ops = icsk->icsk_ulp_ops; 137 if (ulp_ops) { 138 err = tcp_diag_put_ulp(skb, sk, ulp_ops, net_admin); 139 if (err < 0) 140 return err; 141 } 142 143 return 0; 144 } 145 146 static size_t tcp_diag_get_aux_size(struct sock *sk, bool net_admin) 147 { 148 struct inet_connection_sock *icsk = inet_csk(sk); 149 size_t size = 0; 150 151 #ifdef CONFIG_TCP_MD5SIG 152 if (net_admin && sk_fullsock(sk)) { 153 const struct tcp_md5sig_info *md5sig; 154 const struct tcp_md5sig_key *key; 155 size_t md5sig_count = 0; 156 157 rcu_read_lock(); 158 md5sig = rcu_dereference(tcp_sk(sk)->md5sig_info); 159 if (md5sig) { 160 hlist_for_each_entry_rcu(key, &md5sig->head, node) 161 md5sig_count++; 162 } 163 rcu_read_unlock(); 164 size += nla_total_size(md5sig_count * 165 sizeof(struct tcp_diag_md5sig)); 166 } 167 #endif 168 169 if (sk_fullsock(sk)) { 170 const struct tcp_ulp_ops *ulp_ops; 171 172 ulp_ops = icsk->icsk_ulp_ops; 173 if (ulp_ops) { 174 size += nla_total_size(0) + 175 nla_total_size(TCP_ULP_NAME_MAX); 176 if (ulp_ops->get_info_size) 177 size += ulp_ops->get_info_size(sk, net_admin); 178 } 179 } 180 181 return size 182 + nla_total_size(sizeof(struct tcp_info)) 183 + nla_total_size(sizeof(struct inet_diag_msg)) 184 + inet_diag_msg_attrs_size() 185 + nla_total_size(sizeof(struct inet_diag_meminfo)) 186 + nla_total_size(SK_MEMINFO_VARS * sizeof(u32)) 187 + nla_total_size(TCP_CA_NAME_MAX) 188 + nla_total_size(sizeof(struct tcpvegas_info)) 189 + 64; 190 } 191 192 static int tcp_twsk_diag_fill(struct sock *sk, 193 struct sk_buff *skb, 194 struct netlink_callback *cb, 195 u16 nlmsg_flags, bool net_admin) 196 { 197 struct inet_timewait_sock *tw = inet_twsk(sk); 198 struct inet_diag_msg *r; 199 struct nlmsghdr *nlh; 200 long tmo; 201 202 nlh = nlmsg_put(skb, NETLINK_CB(cb->skb).portid, 203 cb->nlh->nlmsg_seq, cb->nlh->nlmsg_type, 204 sizeof(*r), nlmsg_flags); 205 if (!nlh) 206 return -EMSGSIZE; 207 208 r = nlmsg_data(nlh); 209 DEBUG_NET_WARN_ON_ONCE(tw->tw_state != TCP_TIME_WAIT); 210 211 inet_diag_msg_common_fill(r, sk); 212 r->idiag_retrans = 0; 213 214 r->idiag_state = READ_ONCE(tw->tw_substate); 215 r->idiag_timer = 3; 216 tmo = tw->tw_timer.expires - jiffies; 217 r->idiag_expires = jiffies_delta_to_msecs(tmo); 218 r->idiag_rqueue = 0; 219 r->idiag_wqueue = 0; 220 r->idiag_uid = 0; 221 r->idiag_inode = 0; 222 223 if (net_admin && nla_put_u32(skb, INET_DIAG_MARK, 224 tw->tw_mark)) { 225 nlmsg_cancel(skb, nlh); 226 return -EMSGSIZE; 227 } 228 229 nlmsg_end(skb, nlh); 230 return 0; 231 } 232 233 static int tcp_req_diag_fill(struct sock *sk, struct sk_buff *skb, 234 struct netlink_callback *cb, 235 u16 nlmsg_flags, bool net_admin) 236 { 237 struct request_sock *reqsk = inet_reqsk(sk); 238 struct inet_diag_msg *r; 239 struct nlmsghdr *nlh; 240 long tmo; 241 242 nlh = nlmsg_put(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, 243 cb->nlh->nlmsg_type, sizeof(*r), nlmsg_flags); 244 if (!nlh) 245 return -EMSGSIZE; 246 247 r = nlmsg_data(nlh); 248 inet_diag_msg_common_fill(r, sk); 249 r->idiag_state = TCP_SYN_RECV; 250 r->idiag_timer = 1; 251 r->idiag_retrans = reqsk->num_retrans; 252 253 BUILD_BUG_ON(offsetof(struct inet_request_sock, ir_cookie) != 254 offsetof(struct sock, sk_cookie)); 255 256 tmo = inet_reqsk(sk)->rsk_timer.expires - jiffies; 257 r->idiag_expires = jiffies_delta_to_msecs(tmo); 258 r->idiag_rqueue = 0; 259 r->idiag_wqueue = 0; 260 r->idiag_uid = 0; 261 r->idiag_inode = 0; 262 263 if (net_admin && nla_put_u32(skb, INET_DIAG_MARK, 264 inet_rsk(reqsk)->ir_mark)) { 265 nlmsg_cancel(skb, nlh); 266 return -EMSGSIZE; 267 } 268 269 nlmsg_end(skb, nlh); 270 return 0; 271 } 272 273 static int sk_diag_fill(struct sock *sk, struct sk_buff *skb, 274 struct netlink_callback *cb, 275 const struct inet_diag_req_v2 *r, 276 u16 nlmsg_flags, bool net_admin) 277 { 278 if (sk->sk_state == TCP_TIME_WAIT) 279 return tcp_twsk_diag_fill(sk, skb, cb, nlmsg_flags, net_admin); 280 281 if (sk->sk_state == TCP_NEW_SYN_RECV) 282 return tcp_req_diag_fill(sk, skb, cb, nlmsg_flags, net_admin); 283 284 return inet_sk_diag_fill(sk, inet_csk(sk), skb, cb, r, nlmsg_flags, 285 net_admin); 286 } 287 288 static void twsk_build_assert(void) 289 { 290 BUILD_BUG_ON(offsetof(struct inet_timewait_sock, tw_family) != 291 offsetof(struct sock, sk_family)); 292 293 BUILD_BUG_ON(offsetof(struct inet_timewait_sock, tw_num) != 294 offsetof(struct inet_sock, inet_num)); 295 296 BUILD_BUG_ON(offsetof(struct inet_timewait_sock, tw_dport) != 297 offsetof(struct inet_sock, inet_dport)); 298 299 BUILD_BUG_ON(offsetof(struct inet_timewait_sock, tw_rcv_saddr) != 300 offsetof(struct inet_sock, inet_rcv_saddr)); 301 302 BUILD_BUG_ON(offsetof(struct inet_timewait_sock, tw_daddr) != 303 offsetof(struct inet_sock, inet_daddr)); 304 305 #if IS_ENABLED(CONFIG_IPV6) 306 BUILD_BUG_ON(offsetof(struct inet_timewait_sock, tw_v6_rcv_saddr) != 307 offsetof(struct sock, sk_v6_rcv_saddr)); 308 309 BUILD_BUG_ON(offsetof(struct inet_timewait_sock, tw_v6_daddr) != 310 offsetof(struct sock, sk_v6_daddr)); 311 #endif 312 } 313 314 static void tcp_diag_dump(struct sk_buff *skb, struct netlink_callback *cb, 315 const struct inet_diag_req_v2 *r) 316 { 317 bool net_admin = netlink_net_capable(cb->skb, CAP_NET_ADMIN); 318 struct inet_diag_dump_data *cb_data = cb->data; 319 struct net *net = sock_net(skb->sk); 320 u32 idiag_states = r->idiag_states; 321 struct inet_hashinfo *hashinfo; 322 int i, num, s_i, s_num; 323 struct nlattr *bc; 324 struct sock *sk; 325 326 hashinfo = net->ipv4.tcp_death_row.hashinfo; 327 bc = cb_data->inet_diag_nla_bc; 328 if (idiag_states & TCPF_SYN_RECV) 329 idiag_states |= TCPF_NEW_SYN_RECV; 330 s_i = cb->args[1]; 331 s_num = num = cb->args[2]; 332 333 if (cb->args[0] == 0) { 334 if (!(idiag_states & TCPF_LISTEN) || r->id.idiag_dport) 335 goto skip_listen_ht; 336 337 for (i = s_i; i <= hashinfo->lhash2_mask; i++) { 338 struct inet_listen_hashbucket *ilb; 339 struct hlist_nulls_node *node; 340 341 num = 0; 342 ilb = &hashinfo->lhash2[i]; 343 344 if (hlist_nulls_empty(&ilb->nulls_head)) { 345 s_num = 0; 346 continue; 347 } 348 spin_lock(&ilb->lock); 349 sk_nulls_for_each(sk, node, &ilb->nulls_head) { 350 struct inet_sock *inet = inet_sk(sk); 351 352 if (!net_eq(sock_net(sk), net)) 353 continue; 354 355 if (num < s_num) { 356 num++; 357 continue; 358 } 359 360 if (r->sdiag_family != AF_UNSPEC && 361 sk->sk_family != r->sdiag_family) 362 goto next_listen; 363 364 if (r->id.idiag_sport != inet->inet_sport && 365 r->id.idiag_sport) 366 goto next_listen; 367 368 if (!inet_diag_bc_sk(bc, sk)) 369 goto next_listen; 370 371 if (inet_sk_diag_fill(sk, inet_csk(sk), skb, 372 cb, r, NLM_F_MULTI, 373 net_admin) < 0) { 374 spin_unlock(&ilb->lock); 375 goto done; 376 } 377 378 next_listen: 379 ++num; 380 } 381 spin_unlock(&ilb->lock); 382 383 s_num = 0; 384 } 385 skip_listen_ht: 386 cb->args[0] = 1; 387 s_i = num = s_num = 0; 388 } 389 390 /* Process a maximum of SKARR_SZ sockets at a time when walking hash buckets 391 * with bh disabled. 392 */ 393 #define SKARR_SZ 16 394 395 /* Dump bound but inactive (not listening, connecting, etc.) sockets */ 396 if (cb->args[0] == 1) { 397 if (!(idiag_states & TCPF_BOUND_INACTIVE)) 398 goto skip_bind_ht; 399 400 for (i = s_i; i < hashinfo->bhash_size; i++) { 401 struct inet_bind_hashbucket *ibb; 402 struct inet_bind2_bucket *tb2; 403 struct sock *sk_arr[SKARR_SZ]; 404 int num_arr[SKARR_SZ]; 405 int idx, accum, res; 406 407 resume_bind_walk: 408 num = 0; 409 accum = 0; 410 ibb = &hashinfo->bhash2[i]; 411 412 if (hlist_empty(&ibb->chain)) { 413 s_num = 0; 414 continue; 415 } 416 spin_lock_bh(&ibb->lock); 417 inet_bind_bucket_for_each(tb2, &ibb->chain) { 418 if (!net_eq(ib2_net(tb2), net)) 419 continue; 420 421 sk_for_each_bound(sk, &tb2->owners) { 422 struct inet_sock *inet = inet_sk(sk); 423 424 if (num < s_num) 425 goto next_bind; 426 427 if (sk->sk_state != TCP_CLOSE || 428 !inet->inet_num) 429 goto next_bind; 430 431 if (r->sdiag_family != AF_UNSPEC && 432 r->sdiag_family != sk->sk_family) 433 goto next_bind; 434 435 if (!inet_diag_bc_sk(bc, sk)) 436 goto next_bind; 437 438 sock_hold(sk); 439 num_arr[accum] = num; 440 sk_arr[accum] = sk; 441 if (++accum == SKARR_SZ) 442 goto pause_bind_walk; 443 next_bind: 444 num++; 445 } 446 } 447 pause_bind_walk: 448 spin_unlock_bh(&ibb->lock); 449 450 res = 0; 451 for (idx = 0; idx < accum; idx++) { 452 if (res >= 0) { 453 res = inet_sk_diag_fill(sk_arr[idx], 454 NULL, skb, cb, 455 r, NLM_F_MULTI, 456 net_admin); 457 if (res < 0) 458 num = num_arr[idx]; 459 } 460 sock_put(sk_arr[idx]); 461 } 462 if (res < 0) 463 goto done; 464 465 cond_resched(); 466 467 if (accum == SKARR_SZ) { 468 s_num = num + 1; 469 goto resume_bind_walk; 470 } 471 472 s_num = 0; 473 } 474 skip_bind_ht: 475 cb->args[0] = 2; 476 s_i = num = s_num = 0; 477 } 478 479 if (!(idiag_states & ~TCPF_LISTEN)) 480 goto out; 481 482 for (i = s_i; i <= hashinfo->ehash_mask; i++) { 483 struct inet_ehash_bucket *head = &hashinfo->ehash[i]; 484 spinlock_t *lock = inet_ehash_lockp(hashinfo, i); 485 struct hlist_nulls_node *node; 486 struct sock *sk_arr[SKARR_SZ]; 487 int num_arr[SKARR_SZ]; 488 int idx, accum, res; 489 490 if (hlist_nulls_empty(&head->chain)) 491 continue; 492 493 if (i > s_i) 494 s_num = 0; 495 496 next_chunk: 497 num = 0; 498 accum = 0; 499 spin_lock_bh(lock); 500 sk_nulls_for_each(sk, node, &head->chain) { 501 int state; 502 503 if (!net_eq(sock_net(sk), net)) 504 continue; 505 if (num < s_num) 506 goto next_normal; 507 state = (sk->sk_state == TCP_TIME_WAIT) ? 508 READ_ONCE(inet_twsk(sk)->tw_substate) : sk->sk_state; 509 if (!(idiag_states & (1 << state))) 510 goto next_normal; 511 if (r->sdiag_family != AF_UNSPEC && 512 sk->sk_family != r->sdiag_family) 513 goto next_normal; 514 if (r->id.idiag_sport != htons(sk->sk_num) && 515 r->id.idiag_sport) 516 goto next_normal; 517 if (r->id.idiag_dport != sk->sk_dport && 518 r->id.idiag_dport) 519 goto next_normal; 520 twsk_build_assert(); 521 522 if (!inet_diag_bc_sk(bc, sk)) 523 goto next_normal; 524 525 if (!refcount_inc_not_zero(&sk->sk_refcnt)) 526 goto next_normal; 527 528 num_arr[accum] = num; 529 sk_arr[accum] = sk; 530 if (++accum == SKARR_SZ) 531 break; 532 next_normal: 533 ++num; 534 } 535 spin_unlock_bh(lock); 536 537 res = 0; 538 for (idx = 0; idx < accum; idx++) { 539 if (res >= 0) { 540 res = sk_diag_fill(sk_arr[idx], skb, cb, r, 541 NLM_F_MULTI, net_admin); 542 if (res < 0) 543 num = num_arr[idx]; 544 } 545 sock_gen_put(sk_arr[idx]); 546 } 547 if (res < 0) 548 break; 549 550 cond_resched(); 551 552 if (accum == SKARR_SZ) { 553 s_num = num + 1; 554 goto next_chunk; 555 } 556 } 557 558 done: 559 cb->args[1] = i; 560 cb->args[2] = num; 561 out: 562 ; 563 } 564 565 static struct sock *tcp_diag_find_one_icsk(struct net *net, 566 const struct inet_diag_req_v2 *req) 567 { 568 struct sock *sk; 569 570 rcu_read_lock(); 571 if (req->sdiag_family == AF_INET) { 572 sk = inet_lookup(net, NULL, 0, req->id.idiag_dst[0], 573 req->id.idiag_dport, req->id.idiag_src[0], 574 req->id.idiag_sport, req->id.idiag_if); 575 #if IS_ENABLED(CONFIG_IPV6) 576 } else if (req->sdiag_family == AF_INET6) { 577 if (ipv6_addr_v4mapped((struct in6_addr *)req->id.idiag_dst) && 578 ipv6_addr_v4mapped((struct in6_addr *)req->id.idiag_src)) 579 sk = inet_lookup(net, NULL, 0, req->id.idiag_dst[3], 580 req->id.idiag_dport, req->id.idiag_src[3], 581 req->id.idiag_sport, req->id.idiag_if); 582 else 583 sk = inet6_lookup(net, NULL, 0, 584 (struct in6_addr *)req->id.idiag_dst, 585 req->id.idiag_dport, 586 (struct in6_addr *)req->id.idiag_src, 587 req->id.idiag_sport, 588 req->id.idiag_if); 589 #endif 590 } else { 591 rcu_read_unlock(); 592 return ERR_PTR(-EINVAL); 593 } 594 rcu_read_unlock(); 595 if (!sk) 596 return ERR_PTR(-ENOENT); 597 598 if (sock_diag_check_cookie(sk, req->id.idiag_cookie)) { 599 sock_gen_put(sk); 600 return ERR_PTR(-ENOENT); 601 } 602 603 return sk; 604 } 605 606 static int tcp_diag_dump_one(struct netlink_callback *cb, 607 const struct inet_diag_req_v2 *req) 608 { 609 struct sk_buff *in_skb = cb->skb; 610 struct sk_buff *rep; 611 struct sock *sk; 612 struct net *net; 613 bool net_admin; 614 int err; 615 616 net = sock_net(in_skb->sk); 617 sk = tcp_diag_find_one_icsk(net, req); 618 if (IS_ERR(sk)) 619 return PTR_ERR(sk); 620 621 net_admin = netlink_net_capable(in_skb, CAP_NET_ADMIN); 622 rep = nlmsg_new(tcp_diag_get_aux_size(sk, net_admin), GFP_KERNEL); 623 if (!rep) { 624 err = -ENOMEM; 625 goto out; 626 } 627 628 err = sk_diag_fill(sk, rep, cb, req, 0, net_admin); 629 if (err < 0) { 630 WARN_ON(err == -EMSGSIZE); 631 nlmsg_free(rep); 632 goto out; 633 } 634 err = nlmsg_unicast(net->diag_nlsk, rep, NETLINK_CB(in_skb).portid); 635 636 out: 637 if (sk) 638 sock_gen_put(sk); 639 640 return err; 641 } 642 643 #ifdef CONFIG_INET_DIAG_DESTROY 644 static int tcp_diag_destroy(struct sk_buff *in_skb, 645 const struct inet_diag_req_v2 *req) 646 { 647 struct net *net = sock_net(in_skb->sk); 648 struct sock *sk; 649 int err; 650 651 sk = tcp_diag_find_one_icsk(net, req); 652 if (IS_ERR(sk)) 653 return PTR_ERR(sk); 654 655 err = sock_diag_destroy(sk, ECONNABORTED); 656 657 sock_gen_put(sk); 658 659 return err; 660 } 661 #endif 662 663 static const struct inet_diag_handler tcp_diag_handler = { 664 .owner = THIS_MODULE, 665 .dump = tcp_diag_dump, 666 .dump_one = tcp_diag_dump_one, 667 .idiag_get_info = tcp_diag_get_info, 668 .idiag_get_aux = tcp_diag_get_aux, 669 .idiag_type = IPPROTO_TCP, 670 .idiag_info_size = sizeof(struct tcp_info), 671 #ifdef CONFIG_INET_DIAG_DESTROY 672 .destroy = tcp_diag_destroy, 673 #endif 674 }; 675 676 static int __init tcp_diag_init(void) 677 { 678 return inet_diag_register(&tcp_diag_handler); 679 } 680 681 static void __exit tcp_diag_exit(void) 682 { 683 inet_diag_unregister(&tcp_diag_handler); 684 } 685 686 module_init(tcp_diag_init); 687 module_exit(tcp_diag_exit); 688 MODULE_LICENSE("GPL"); 689 MODULE_DESCRIPTION("TCP socket monitoring via SOCK_DIAG"); 690 MODULE_ALIAS_NET_PF_PROTO_TYPE(PF_NETLINK, NETLINK_SOCK_DIAG, 2-6 /* AF_INET - IPPROTO_TCP */); 691