1 // SPDX-License-Identifier: GPL-2.0-or-later 2 /* 3 * inet_diag.c Module for monitoring INET transport protocols sockets. 4 * 5 * Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru> 6 */ 7 8 #include <linux/kernel.h> 9 #include <linux/module.h> 10 #include <linux/types.h> 11 #include <linux/fcntl.h> 12 #include <linux/random.h> 13 #include <linux/slab.h> 14 #include <linux/cache.h> 15 #include <linux/init.h> 16 #include <linux/time.h> 17 18 #include <net/icmp.h> 19 #include <net/tcp.h> 20 #include <net/ipv6.h> 21 #include <net/inet_common.h> 22 #include <net/inet_connection_sock.h> 23 #include <net/inet_hashtables.h> 24 #include <net/inet_timewait_sock.h> 25 #include <net/inet6_hashtables.h> 26 #include <net/bpf_sk_storage.h> 27 #include <net/netlink.h> 28 29 #include <linux/inet.h> 30 #include <linux/stddef.h> 31 32 #include <linux/inet_diag.h> 33 #include <linux/sock_diag.h> 34 35 static const struct inet_diag_handler __rcu **inet_diag_table; 36 37 struct inet_diag_entry { 38 const __be32 *saddr; 39 const __be32 *daddr; 40 u16 sport; 41 u16 dport; 42 u16 family; 43 u16 userlocks; 44 u32 ifindex; 45 u32 mark; 46 #ifdef CONFIG_SOCK_CGROUP_DATA 47 u64 cgroup_id; 48 #endif 49 }; 50 51 static const struct inet_diag_handler *inet_diag_lock_handler(int proto) 52 { 53 const struct inet_diag_handler *handler; 54 55 if (proto < 0 || proto >= IPPROTO_MAX) 56 return NULL; 57 58 if (!READ_ONCE(inet_diag_table[proto])) 59 sock_load_diag_module(AF_INET, proto); 60 61 rcu_read_lock(); 62 handler = rcu_dereference(inet_diag_table[proto]); 63 if (handler && !try_module_get(handler->owner)) 64 handler = NULL; 65 rcu_read_unlock(); 66 67 return handler; 68 } 69 70 static void inet_diag_unlock_handler(const struct inet_diag_handler *handler) 71 { 72 module_put(handler->owner); 73 } 74 75 void inet_diag_msg_common_fill(struct inet_diag_msg *r, struct sock *sk) 76 { 77 r->idiag_family = sk->sk_family; 78 79 r->id.idiag_sport = htons(sk->sk_num); 80 r->id.idiag_dport = sk->sk_dport; 81 r->id.idiag_if = sk->sk_bound_dev_if; 82 sock_diag_save_cookie(sk, r->id.idiag_cookie); 83 84 #if IS_ENABLED(CONFIG_IPV6) 85 if (sk->sk_family == AF_INET6) { 86 *(struct in6_addr *)r->id.idiag_src = sk->sk_v6_rcv_saddr; 87 *(struct in6_addr *)r->id.idiag_dst = sk->sk_v6_daddr; 88 } else 89 #endif 90 { 91 memset(&r->id.idiag_src, 0, sizeof(r->id.idiag_src)); 92 memset(&r->id.idiag_dst, 0, sizeof(r->id.idiag_dst)); 93 94 r->id.idiag_src[0] = sk->sk_rcv_saddr; 95 r->id.idiag_dst[0] = sk->sk_daddr; 96 } 97 } 98 EXPORT_SYMBOL_GPL(inet_diag_msg_common_fill); 99 100 static size_t inet_sk_attr_size(struct sock *sk, 101 const struct inet_diag_req_v2 *req, 102 bool net_admin) 103 { 104 const struct inet_diag_handler *handler; 105 size_t aux = 0; 106 107 rcu_read_lock(); 108 handler = rcu_dereference(inet_diag_table[req->sdiag_protocol]); 109 DEBUG_NET_WARN_ON_ONCE(!handler); 110 if (handler && handler->idiag_get_aux_size) 111 aux = handler->idiag_get_aux_size(sk, net_admin); 112 rcu_read_unlock(); 113 114 return nla_total_size(sizeof(struct tcp_info)) 115 + nla_total_size(sizeof(struct inet_diag_msg)) 116 + inet_diag_msg_attrs_size() 117 + nla_total_size(sizeof(struct inet_diag_meminfo)) 118 + nla_total_size(SK_MEMINFO_VARS * sizeof(u32)) 119 + nla_total_size(TCP_CA_NAME_MAX) 120 + nla_total_size(sizeof(struct tcpvegas_info)) 121 + aux 122 + 64; 123 } 124 125 int inet_diag_msg_attrs_fill(struct sock *sk, struct sk_buff *skb, 126 struct inet_diag_msg *r, int ext, 127 struct user_namespace *user_ns, 128 bool net_admin) 129 { 130 const struct inet_sock *inet = inet_sk(sk); 131 struct inet_diag_sockopt inet_sockopt; 132 133 if (nla_put_u8(skb, INET_DIAG_SHUTDOWN, sk->sk_shutdown)) 134 goto errout; 135 136 /* IPv6 dual-stack sockets use inet->tos for IPv4 connections, 137 * hence this needs to be included regardless of socket family. 138 */ 139 if (ext & (1 << (INET_DIAG_TOS - 1))) 140 if (nla_put_u8(skb, INET_DIAG_TOS, READ_ONCE(inet->tos)) < 0) 141 goto errout; 142 143 #if IS_ENABLED(CONFIG_IPV6) 144 if (r->idiag_family == AF_INET6) { 145 if (ext & (1 << (INET_DIAG_TCLASS - 1))) 146 if (nla_put_u8(skb, INET_DIAG_TCLASS, 147 inet6_sk(sk)->tclass) < 0) 148 goto errout; 149 150 if (((1 << sk->sk_state) & (TCPF_LISTEN | TCPF_CLOSE)) && 151 nla_put_u8(skb, INET_DIAG_SKV6ONLY, ipv6_only_sock(sk))) 152 goto errout; 153 } 154 #endif 155 156 if (net_admin && nla_put_u32(skb, INET_DIAG_MARK, READ_ONCE(sk->sk_mark))) 157 goto errout; 158 159 if (ext & (1 << (INET_DIAG_CLASS_ID - 1)) || 160 ext & (1 << (INET_DIAG_TCLASS - 1))) { 161 u32 classid = 0; 162 163 #ifdef CONFIG_SOCK_CGROUP_DATA 164 classid = sock_cgroup_classid(&sk->sk_cgrp_data); 165 #endif 166 /* Fallback to socket priority if class id isn't set. 167 * Classful qdiscs use it as direct reference to class. 168 * For cgroup2 classid is always zero. 169 */ 170 if (!classid) 171 classid = READ_ONCE(sk->sk_priority); 172 173 if (nla_put_u32(skb, INET_DIAG_CLASS_ID, classid)) 174 goto errout; 175 } 176 177 #ifdef CONFIG_SOCK_CGROUP_DATA 178 if (nla_put_u64_64bit(skb, INET_DIAG_CGROUP_ID, 179 cgroup_id(sock_cgroup_ptr(&sk->sk_cgrp_data)), 180 INET_DIAG_PAD)) 181 goto errout; 182 #endif 183 184 r->idiag_uid = from_kuid_munged(user_ns, sock_i_uid(sk)); 185 r->idiag_inode = sock_i_ino(sk); 186 187 memset(&inet_sockopt, 0, sizeof(inet_sockopt)); 188 inet_sockopt.recverr = inet_test_bit(RECVERR, sk); 189 inet_sockopt.is_icsk = inet_test_bit(IS_ICSK, sk); 190 inet_sockopt.freebind = inet_test_bit(FREEBIND, sk); 191 inet_sockopt.hdrincl = inet_test_bit(HDRINCL, sk); 192 inet_sockopt.mc_loop = inet_test_bit(MC_LOOP, sk); 193 inet_sockopt.transparent = inet_test_bit(TRANSPARENT, sk); 194 inet_sockopt.mc_all = inet_test_bit(MC_ALL, sk); 195 inet_sockopt.nodefrag = inet_test_bit(NODEFRAG, sk); 196 inet_sockopt.bind_address_no_port = inet_test_bit(BIND_ADDRESS_NO_PORT, sk); 197 inet_sockopt.recverr_rfc4884 = inet_test_bit(RECVERR_RFC4884, sk); 198 inet_sockopt.defer_connect = inet_test_bit(DEFER_CONNECT, sk); 199 if (nla_put(skb, INET_DIAG_SOCKOPT, sizeof(inet_sockopt), 200 &inet_sockopt)) 201 goto errout; 202 203 return 0; 204 errout: 205 return 1; 206 } 207 EXPORT_SYMBOL_GPL(inet_diag_msg_attrs_fill); 208 209 static int inet_diag_parse_attrs(const struct nlmsghdr *nlh, int hdrlen, 210 struct nlattr **req_nlas) 211 { 212 struct nlattr *nla; 213 int remaining; 214 215 nlmsg_for_each_attr(nla, nlh, hdrlen, remaining) { 216 int type = nla_type(nla); 217 218 if (type == INET_DIAG_REQ_PROTOCOL && nla_len(nla) != sizeof(u32)) 219 return -EINVAL; 220 221 if (type < __INET_DIAG_REQ_MAX) 222 req_nlas[type] = nla; 223 } 224 return 0; 225 } 226 227 static int inet_diag_get_protocol(const struct inet_diag_req_v2 *req, 228 const struct inet_diag_dump_data *data) 229 { 230 if (data->req_nlas[INET_DIAG_REQ_PROTOCOL]) 231 return nla_get_u32(data->req_nlas[INET_DIAG_REQ_PROTOCOL]); 232 return req->sdiag_protocol; 233 } 234 235 #define MAX_DUMP_ALLOC_SIZE (KMALLOC_MAX_SIZE - SKB_DATA_ALIGN(sizeof(struct skb_shared_info))) 236 237 int inet_sk_diag_fill(struct sock *sk, struct inet_connection_sock *icsk, 238 struct sk_buff *skb, struct netlink_callback *cb, 239 const struct inet_diag_req_v2 *req, 240 u16 nlmsg_flags, bool net_admin) 241 { 242 const struct tcp_congestion_ops *ca_ops; 243 const struct inet_diag_handler *handler; 244 struct inet_diag_dump_data *cb_data; 245 int ext = req->idiag_ext; 246 struct inet_diag_msg *r; 247 struct nlmsghdr *nlh; 248 struct nlattr *attr; 249 void *info = NULL; 250 u8 icsk_pending; 251 int protocol; 252 253 cb_data = cb->data; 254 protocol = inet_diag_get_protocol(req, cb_data); 255 256 /* inet_diag_lock_handler() made sure inet_diag_table[] is stable. */ 257 handler = rcu_dereference_protected(inet_diag_table[protocol], 1); 258 DEBUG_NET_WARN_ON_ONCE(!handler); 259 if (!handler) 260 return -ENXIO; 261 262 nlh = nlmsg_put(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, 263 cb->nlh->nlmsg_type, sizeof(*r), nlmsg_flags); 264 if (!nlh) 265 return -EMSGSIZE; 266 267 r = nlmsg_data(nlh); 268 BUG_ON(!sk_fullsock(sk)); 269 270 inet_diag_msg_common_fill(r, sk); 271 r->idiag_state = sk->sk_state; 272 r->idiag_timer = 0; 273 r->idiag_retrans = 0; 274 r->idiag_expires = 0; 275 276 if (inet_diag_msg_attrs_fill(sk, skb, r, ext, 277 sk_user_ns(NETLINK_CB(cb->skb).sk), 278 net_admin)) 279 goto errout; 280 281 if (ext & (1 << (INET_DIAG_MEMINFO - 1))) { 282 struct inet_diag_meminfo minfo = { 283 .idiag_rmem = sk_rmem_alloc_get(sk), 284 .idiag_wmem = READ_ONCE(sk->sk_wmem_queued), 285 .idiag_fmem = sk_forward_alloc_get(sk), 286 .idiag_tmem = sk_wmem_alloc_get(sk), 287 }; 288 289 if (nla_put(skb, INET_DIAG_MEMINFO, sizeof(minfo), &minfo) < 0) 290 goto errout; 291 } 292 293 if (ext & (1 << (INET_DIAG_SKMEMINFO - 1))) 294 if (sock_diag_put_meminfo(sk, skb, INET_DIAG_SKMEMINFO)) 295 goto errout; 296 297 /* 298 * RAW sockets might have user-defined protocols assigned, 299 * so report the one supplied on socket creation. 300 */ 301 if (sk->sk_type == SOCK_RAW) { 302 if (nla_put_u8(skb, INET_DIAG_PROTOCOL, sk->sk_protocol)) 303 goto errout; 304 } 305 306 if (!icsk) { 307 handler->idiag_get_info(sk, r, NULL); 308 goto out; 309 } 310 311 icsk_pending = smp_load_acquire(&icsk->icsk_pending); 312 if (icsk_pending == ICSK_TIME_RETRANS || 313 icsk_pending == ICSK_TIME_REO_TIMEOUT || 314 icsk_pending == ICSK_TIME_LOSS_PROBE) { 315 r->idiag_timer = 1; 316 r->idiag_retrans = icsk->icsk_retransmits; 317 r->idiag_expires = 318 jiffies_delta_to_msecs(icsk->icsk_timeout - jiffies); 319 } else if (icsk_pending == ICSK_TIME_PROBE0) { 320 r->idiag_timer = 4; 321 r->idiag_retrans = icsk->icsk_probes_out; 322 r->idiag_expires = 323 jiffies_delta_to_msecs(icsk->icsk_timeout - jiffies); 324 } else if (timer_pending(&sk->sk_timer)) { 325 r->idiag_timer = 2; 326 r->idiag_retrans = icsk->icsk_probes_out; 327 r->idiag_expires = 328 jiffies_delta_to_msecs(sk->sk_timer.expires - jiffies); 329 } 330 331 if ((ext & (1 << (INET_DIAG_INFO - 1))) && handler->idiag_info_size) { 332 attr = nla_reserve_64bit(skb, INET_DIAG_INFO, 333 handler->idiag_info_size, 334 INET_DIAG_PAD); 335 if (!attr) 336 goto errout; 337 338 info = nla_data(attr); 339 } 340 341 if (ext & (1 << (INET_DIAG_CONG - 1))) { 342 int err = 0; 343 344 rcu_read_lock(); 345 ca_ops = READ_ONCE(icsk->icsk_ca_ops); 346 if (ca_ops) 347 err = nla_put_string(skb, INET_DIAG_CONG, ca_ops->name); 348 rcu_read_unlock(); 349 if (err < 0) 350 goto errout; 351 } 352 353 handler->idiag_get_info(sk, r, info); 354 355 if (ext & (1 << (INET_DIAG_INFO - 1)) && handler->idiag_get_aux) 356 if (handler->idiag_get_aux(sk, net_admin, skb) < 0) 357 goto errout; 358 359 if (sk->sk_state < TCP_TIME_WAIT) { 360 union tcp_cc_info info; 361 size_t sz = 0; 362 int attr; 363 364 rcu_read_lock(); 365 ca_ops = READ_ONCE(icsk->icsk_ca_ops); 366 if (ca_ops && ca_ops->get_info) 367 sz = ca_ops->get_info(sk, ext, &attr, &info); 368 rcu_read_unlock(); 369 if (sz && nla_put(skb, attr, sz, &info) < 0) 370 goto errout; 371 } 372 373 /* Keep it at the end for potential retry with a larger skb, 374 * or else do best-effort fitting, which is only done for the 375 * first_nlmsg. 376 */ 377 if (cb_data->bpf_stg_diag) { 378 bool first_nlmsg = ((unsigned char *)nlh == skb->data); 379 unsigned int prev_min_dump_alloc; 380 unsigned int total_nla_size = 0; 381 unsigned int msg_len; 382 int err; 383 384 msg_len = skb_tail_pointer(skb) - (unsigned char *)nlh; 385 err = bpf_sk_storage_diag_put(cb_data->bpf_stg_diag, sk, skb, 386 INET_DIAG_SK_BPF_STORAGES, 387 &total_nla_size); 388 389 if (!err) 390 goto out; 391 392 total_nla_size += msg_len; 393 prev_min_dump_alloc = cb->min_dump_alloc; 394 if (total_nla_size > prev_min_dump_alloc) 395 cb->min_dump_alloc = min_t(u32, total_nla_size, 396 MAX_DUMP_ALLOC_SIZE); 397 398 if (!first_nlmsg) 399 goto errout; 400 401 if (cb->min_dump_alloc > prev_min_dump_alloc) 402 /* Retry with pskb_expand_head() with 403 * __GFP_DIRECT_RECLAIM 404 */ 405 goto errout; 406 407 WARN_ON_ONCE(total_nla_size <= prev_min_dump_alloc); 408 409 /* Send what we have for this sk 410 * and move on to the next sk in the following 411 * dump() 412 */ 413 } 414 415 out: 416 nlmsg_end(skb, nlh); 417 return 0; 418 419 errout: 420 nlmsg_cancel(skb, nlh); 421 return -EMSGSIZE; 422 } 423 EXPORT_SYMBOL_GPL(inet_sk_diag_fill); 424 425 static int inet_twsk_diag_fill(struct sock *sk, 426 struct sk_buff *skb, 427 struct netlink_callback *cb, 428 u16 nlmsg_flags, bool net_admin) 429 { 430 struct inet_timewait_sock *tw = inet_twsk(sk); 431 struct inet_diag_msg *r; 432 struct nlmsghdr *nlh; 433 long tmo; 434 435 nlh = nlmsg_put(skb, NETLINK_CB(cb->skb).portid, 436 cb->nlh->nlmsg_seq, cb->nlh->nlmsg_type, 437 sizeof(*r), nlmsg_flags); 438 if (!nlh) 439 return -EMSGSIZE; 440 441 r = nlmsg_data(nlh); 442 BUG_ON(tw->tw_state != TCP_TIME_WAIT); 443 444 inet_diag_msg_common_fill(r, sk); 445 r->idiag_retrans = 0; 446 447 r->idiag_state = READ_ONCE(tw->tw_substate); 448 r->idiag_timer = 3; 449 tmo = tw->tw_timer.expires - jiffies; 450 r->idiag_expires = jiffies_delta_to_msecs(tmo); 451 r->idiag_rqueue = 0; 452 r->idiag_wqueue = 0; 453 r->idiag_uid = 0; 454 r->idiag_inode = 0; 455 456 if (net_admin && nla_put_u32(skb, INET_DIAG_MARK, 457 tw->tw_mark)) { 458 nlmsg_cancel(skb, nlh); 459 return -EMSGSIZE; 460 } 461 462 nlmsg_end(skb, nlh); 463 return 0; 464 } 465 466 static int inet_req_diag_fill(struct sock *sk, struct sk_buff *skb, 467 struct netlink_callback *cb, 468 u16 nlmsg_flags, bool net_admin) 469 { 470 struct request_sock *reqsk = inet_reqsk(sk); 471 struct inet_diag_msg *r; 472 struct nlmsghdr *nlh; 473 long tmo; 474 475 nlh = nlmsg_put(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, 476 cb->nlh->nlmsg_type, sizeof(*r), nlmsg_flags); 477 if (!nlh) 478 return -EMSGSIZE; 479 480 r = nlmsg_data(nlh); 481 inet_diag_msg_common_fill(r, sk); 482 r->idiag_state = TCP_SYN_RECV; 483 r->idiag_timer = 1; 484 r->idiag_retrans = reqsk->num_retrans; 485 486 BUILD_BUG_ON(offsetof(struct inet_request_sock, ir_cookie) != 487 offsetof(struct sock, sk_cookie)); 488 489 tmo = inet_reqsk(sk)->rsk_timer.expires - jiffies; 490 r->idiag_expires = jiffies_delta_to_msecs(tmo); 491 r->idiag_rqueue = 0; 492 r->idiag_wqueue = 0; 493 r->idiag_uid = 0; 494 r->idiag_inode = 0; 495 496 if (net_admin && nla_put_u32(skb, INET_DIAG_MARK, 497 inet_rsk(reqsk)->ir_mark)) { 498 nlmsg_cancel(skb, nlh); 499 return -EMSGSIZE; 500 } 501 502 nlmsg_end(skb, nlh); 503 return 0; 504 } 505 506 static int sk_diag_fill(struct sock *sk, struct sk_buff *skb, 507 struct netlink_callback *cb, 508 const struct inet_diag_req_v2 *r, 509 u16 nlmsg_flags, bool net_admin) 510 { 511 if (sk->sk_state == TCP_TIME_WAIT) 512 return inet_twsk_diag_fill(sk, skb, cb, nlmsg_flags, net_admin); 513 514 if (sk->sk_state == TCP_NEW_SYN_RECV) 515 return inet_req_diag_fill(sk, skb, cb, nlmsg_flags, net_admin); 516 517 return inet_sk_diag_fill(sk, inet_csk(sk), skb, cb, r, nlmsg_flags, 518 net_admin); 519 } 520 521 struct sock *inet_diag_find_one_icsk(struct net *net, 522 struct inet_hashinfo *hashinfo, 523 const struct inet_diag_req_v2 *req) 524 { 525 struct sock *sk; 526 527 rcu_read_lock(); 528 if (req->sdiag_family == AF_INET) 529 sk = inet_lookup(net, hashinfo, NULL, 0, req->id.idiag_dst[0], 530 req->id.idiag_dport, req->id.idiag_src[0], 531 req->id.idiag_sport, req->id.idiag_if); 532 #if IS_ENABLED(CONFIG_IPV6) 533 else if (req->sdiag_family == AF_INET6) { 534 if (ipv6_addr_v4mapped((struct in6_addr *)req->id.idiag_dst) && 535 ipv6_addr_v4mapped((struct in6_addr *)req->id.idiag_src)) 536 sk = inet_lookup(net, hashinfo, NULL, 0, req->id.idiag_dst[3], 537 req->id.idiag_dport, req->id.idiag_src[3], 538 req->id.idiag_sport, req->id.idiag_if); 539 else 540 sk = inet6_lookup(net, hashinfo, NULL, 0, 541 (struct in6_addr *)req->id.idiag_dst, 542 req->id.idiag_dport, 543 (struct in6_addr *)req->id.idiag_src, 544 req->id.idiag_sport, 545 req->id.idiag_if); 546 } 547 #endif 548 else { 549 rcu_read_unlock(); 550 return ERR_PTR(-EINVAL); 551 } 552 rcu_read_unlock(); 553 if (!sk) 554 return ERR_PTR(-ENOENT); 555 556 if (sock_diag_check_cookie(sk, req->id.idiag_cookie)) { 557 sock_gen_put(sk); 558 return ERR_PTR(-ENOENT); 559 } 560 561 return sk; 562 } 563 EXPORT_SYMBOL_GPL(inet_diag_find_one_icsk); 564 565 int inet_diag_dump_one_icsk(struct inet_hashinfo *hashinfo, 566 struct netlink_callback *cb, 567 const struct inet_diag_req_v2 *req) 568 { 569 struct sk_buff *in_skb = cb->skb; 570 bool net_admin = netlink_net_capable(in_skb, CAP_NET_ADMIN); 571 struct net *net = sock_net(in_skb->sk); 572 struct sk_buff *rep; 573 struct sock *sk; 574 int err; 575 576 sk = inet_diag_find_one_icsk(net, hashinfo, req); 577 if (IS_ERR(sk)) 578 return PTR_ERR(sk); 579 580 rep = nlmsg_new(inet_sk_attr_size(sk, req, net_admin), GFP_KERNEL); 581 if (!rep) { 582 err = -ENOMEM; 583 goto out; 584 } 585 586 err = sk_diag_fill(sk, rep, cb, req, 0, net_admin); 587 if (err < 0) { 588 WARN_ON(err == -EMSGSIZE); 589 nlmsg_free(rep); 590 goto out; 591 } 592 err = nlmsg_unicast(net->diag_nlsk, rep, NETLINK_CB(in_skb).portid); 593 594 out: 595 if (sk) 596 sock_gen_put(sk); 597 598 return err; 599 } 600 EXPORT_SYMBOL_GPL(inet_diag_dump_one_icsk); 601 602 static int inet_diag_cmd_exact(int cmd, struct sk_buff *in_skb, 603 const struct nlmsghdr *nlh, 604 int hdrlen, 605 const struct inet_diag_req_v2 *req) 606 { 607 const struct inet_diag_handler *handler; 608 struct inet_diag_dump_data dump_data; 609 int err, protocol; 610 611 memset(&dump_data, 0, sizeof(dump_data)); 612 err = inet_diag_parse_attrs(nlh, hdrlen, dump_data.req_nlas); 613 if (err) 614 return err; 615 616 protocol = inet_diag_get_protocol(req, &dump_data); 617 618 handler = inet_diag_lock_handler(protocol); 619 if (!handler) 620 return -ENOENT; 621 622 if (cmd == SOCK_DIAG_BY_FAMILY) { 623 struct netlink_callback cb = { 624 .nlh = nlh, 625 .skb = in_skb, 626 .data = &dump_data, 627 }; 628 err = handler->dump_one(&cb, req); 629 } else if (cmd == SOCK_DESTROY && handler->destroy) { 630 err = handler->destroy(in_skb, req); 631 } else { 632 err = -EOPNOTSUPP; 633 } 634 inet_diag_unlock_handler(handler); 635 636 return err; 637 } 638 639 static int bitstring_match(const __be32 *a1, const __be32 *a2, int bits) 640 { 641 int words = bits >> 5; 642 643 bits &= 0x1f; 644 645 if (words) { 646 if (memcmp(a1, a2, words << 2)) 647 return 0; 648 } 649 if (bits) { 650 __be32 w1, w2; 651 __be32 mask; 652 653 w1 = a1[words]; 654 w2 = a2[words]; 655 656 mask = htonl((0xffffffff) << (32 - bits)); 657 658 if ((w1 ^ w2) & mask) 659 return 0; 660 } 661 662 return 1; 663 } 664 665 static int inet_diag_bc_run(const struct nlattr *_bc, 666 const struct inet_diag_entry *entry) 667 { 668 const void *bc = nla_data(_bc); 669 int len = nla_len(_bc); 670 671 while (len > 0) { 672 int yes = 1; 673 const struct inet_diag_bc_op *op = bc; 674 675 switch (op->code) { 676 case INET_DIAG_BC_NOP: 677 break; 678 case INET_DIAG_BC_JMP: 679 yes = 0; 680 break; 681 case INET_DIAG_BC_S_EQ: 682 yes = entry->sport == op[1].no; 683 break; 684 case INET_DIAG_BC_S_GE: 685 yes = entry->sport >= op[1].no; 686 break; 687 case INET_DIAG_BC_S_LE: 688 yes = entry->sport <= op[1].no; 689 break; 690 case INET_DIAG_BC_D_EQ: 691 yes = entry->dport == op[1].no; 692 break; 693 case INET_DIAG_BC_D_GE: 694 yes = entry->dport >= op[1].no; 695 break; 696 case INET_DIAG_BC_D_LE: 697 yes = entry->dport <= op[1].no; 698 break; 699 case INET_DIAG_BC_AUTO: 700 yes = !(entry->userlocks & SOCK_BINDPORT_LOCK); 701 break; 702 case INET_DIAG_BC_S_COND: 703 case INET_DIAG_BC_D_COND: { 704 const struct inet_diag_hostcond *cond; 705 const __be32 *addr; 706 707 cond = (const struct inet_diag_hostcond *)(op + 1); 708 if (cond->port != -1 && 709 cond->port != (op->code == INET_DIAG_BC_S_COND ? 710 entry->sport : entry->dport)) { 711 yes = 0; 712 break; 713 } 714 715 if (op->code == INET_DIAG_BC_S_COND) 716 addr = entry->saddr; 717 else 718 addr = entry->daddr; 719 720 if (cond->family != AF_UNSPEC && 721 cond->family != entry->family) { 722 if (entry->family == AF_INET6 && 723 cond->family == AF_INET) { 724 if (addr[0] == 0 && addr[1] == 0 && 725 addr[2] == htonl(0xffff) && 726 bitstring_match(addr + 3, 727 cond->addr, 728 cond->prefix_len)) 729 break; 730 } 731 yes = 0; 732 break; 733 } 734 735 if (cond->prefix_len == 0) 736 break; 737 if (bitstring_match(addr, cond->addr, 738 cond->prefix_len)) 739 break; 740 yes = 0; 741 break; 742 } 743 case INET_DIAG_BC_DEV_COND: { 744 u32 ifindex; 745 746 ifindex = *((const u32 *)(op + 1)); 747 if (ifindex != entry->ifindex) 748 yes = 0; 749 break; 750 } 751 case INET_DIAG_BC_MARK_COND: { 752 struct inet_diag_markcond *cond; 753 754 cond = (struct inet_diag_markcond *)(op + 1); 755 if ((entry->mark & cond->mask) != cond->mark) 756 yes = 0; 757 break; 758 } 759 #ifdef CONFIG_SOCK_CGROUP_DATA 760 case INET_DIAG_BC_CGROUP_COND: { 761 u64 cgroup_id; 762 763 cgroup_id = get_unaligned((const u64 *)(op + 1)); 764 if (cgroup_id != entry->cgroup_id) 765 yes = 0; 766 break; 767 } 768 #endif 769 } 770 771 if (yes) { 772 len -= op->yes; 773 bc += op->yes; 774 } else { 775 len -= op->no; 776 bc += op->no; 777 } 778 } 779 return len == 0; 780 } 781 782 /* This helper is available for all sockets (ESTABLISH, TIMEWAIT, SYN_RECV) 783 */ 784 static void entry_fill_addrs(struct inet_diag_entry *entry, 785 const struct sock *sk) 786 { 787 #if IS_ENABLED(CONFIG_IPV6) 788 if (sk->sk_family == AF_INET6) { 789 entry->saddr = sk->sk_v6_rcv_saddr.s6_addr32; 790 entry->daddr = sk->sk_v6_daddr.s6_addr32; 791 } else 792 #endif 793 { 794 entry->saddr = &sk->sk_rcv_saddr; 795 entry->daddr = &sk->sk_daddr; 796 } 797 } 798 799 int inet_diag_bc_sk(const struct nlattr *bc, struct sock *sk) 800 { 801 struct inet_sock *inet = inet_sk(sk); 802 struct inet_diag_entry entry; 803 804 if (!bc) 805 return 1; 806 807 entry.family = sk->sk_family; 808 entry_fill_addrs(&entry, sk); 809 entry.sport = inet->inet_num; 810 entry.dport = ntohs(inet->inet_dport); 811 entry.ifindex = sk->sk_bound_dev_if; 812 entry.userlocks = sk_fullsock(sk) ? sk->sk_userlocks : 0; 813 if (sk_fullsock(sk)) 814 entry.mark = READ_ONCE(sk->sk_mark); 815 else if (sk->sk_state == TCP_NEW_SYN_RECV) 816 entry.mark = inet_rsk(inet_reqsk(sk))->ir_mark; 817 else if (sk->sk_state == TCP_TIME_WAIT) 818 entry.mark = inet_twsk(sk)->tw_mark; 819 else 820 entry.mark = 0; 821 #ifdef CONFIG_SOCK_CGROUP_DATA 822 entry.cgroup_id = sk_fullsock(sk) ? 823 cgroup_id(sock_cgroup_ptr(&sk->sk_cgrp_data)) : 0; 824 #endif 825 826 return inet_diag_bc_run(bc, &entry); 827 } 828 EXPORT_SYMBOL_GPL(inet_diag_bc_sk); 829 830 static int valid_cc(const void *bc, int len, int cc) 831 { 832 while (len >= 0) { 833 const struct inet_diag_bc_op *op = bc; 834 835 if (cc > len) 836 return 0; 837 if (cc == len) 838 return 1; 839 if (op->yes < 4 || op->yes & 3) 840 return 0; 841 len -= op->yes; 842 bc += op->yes; 843 } 844 return 0; 845 } 846 847 /* data is u32 ifindex */ 848 static bool valid_devcond(const struct inet_diag_bc_op *op, int len, 849 int *min_len) 850 { 851 /* Check ifindex space. */ 852 *min_len += sizeof(u32); 853 if (len < *min_len) 854 return false; 855 856 return true; 857 } 858 /* Validate an inet_diag_hostcond. */ 859 static bool valid_hostcond(const struct inet_diag_bc_op *op, int len, 860 int *min_len) 861 { 862 struct inet_diag_hostcond *cond; 863 int addr_len; 864 865 /* Check hostcond space. */ 866 *min_len += sizeof(struct inet_diag_hostcond); 867 if (len < *min_len) 868 return false; 869 cond = (struct inet_diag_hostcond *)(op + 1); 870 871 /* Check address family and address length. */ 872 switch (cond->family) { 873 case AF_UNSPEC: 874 addr_len = 0; 875 break; 876 case AF_INET: 877 addr_len = sizeof(struct in_addr); 878 break; 879 case AF_INET6: 880 addr_len = sizeof(struct in6_addr); 881 break; 882 default: 883 return false; 884 } 885 *min_len += addr_len; 886 if (len < *min_len) 887 return false; 888 889 /* Check prefix length (in bits) vs address length (in bytes). */ 890 if (cond->prefix_len > 8 * addr_len) 891 return false; 892 893 return true; 894 } 895 896 /* Validate a port comparison operator. */ 897 static bool valid_port_comparison(const struct inet_diag_bc_op *op, 898 int len, int *min_len) 899 { 900 /* Port comparisons put the port in a follow-on inet_diag_bc_op. */ 901 *min_len += sizeof(struct inet_diag_bc_op); 902 if (len < *min_len) 903 return false; 904 return true; 905 } 906 907 static bool valid_markcond(const struct inet_diag_bc_op *op, int len, 908 int *min_len) 909 { 910 *min_len += sizeof(struct inet_diag_markcond); 911 return len >= *min_len; 912 } 913 914 #ifdef CONFIG_SOCK_CGROUP_DATA 915 static bool valid_cgroupcond(const struct inet_diag_bc_op *op, int len, 916 int *min_len) 917 { 918 *min_len += sizeof(u64); 919 return len >= *min_len; 920 } 921 #endif 922 923 static int inet_diag_bc_audit(const struct nlattr *attr, 924 const struct sk_buff *skb) 925 { 926 bool net_admin = netlink_net_capable(skb, CAP_NET_ADMIN); 927 const void *bytecode, *bc; 928 int bytecode_len, len; 929 930 if (!attr || nla_len(attr) < sizeof(struct inet_diag_bc_op)) 931 return -EINVAL; 932 933 bytecode = bc = nla_data(attr); 934 len = bytecode_len = nla_len(attr); 935 936 while (len > 0) { 937 int min_len = sizeof(struct inet_diag_bc_op); 938 const struct inet_diag_bc_op *op = bc; 939 940 switch (op->code) { 941 case INET_DIAG_BC_S_COND: 942 case INET_DIAG_BC_D_COND: 943 if (!valid_hostcond(bc, len, &min_len)) 944 return -EINVAL; 945 break; 946 case INET_DIAG_BC_DEV_COND: 947 if (!valid_devcond(bc, len, &min_len)) 948 return -EINVAL; 949 break; 950 case INET_DIAG_BC_S_EQ: 951 case INET_DIAG_BC_S_GE: 952 case INET_DIAG_BC_S_LE: 953 case INET_DIAG_BC_D_EQ: 954 case INET_DIAG_BC_D_GE: 955 case INET_DIAG_BC_D_LE: 956 if (!valid_port_comparison(bc, len, &min_len)) 957 return -EINVAL; 958 break; 959 case INET_DIAG_BC_MARK_COND: 960 if (!net_admin) 961 return -EPERM; 962 if (!valid_markcond(bc, len, &min_len)) 963 return -EINVAL; 964 break; 965 #ifdef CONFIG_SOCK_CGROUP_DATA 966 case INET_DIAG_BC_CGROUP_COND: 967 if (!valid_cgroupcond(bc, len, &min_len)) 968 return -EINVAL; 969 break; 970 #endif 971 case INET_DIAG_BC_AUTO: 972 case INET_DIAG_BC_JMP: 973 case INET_DIAG_BC_NOP: 974 break; 975 default: 976 return -EINVAL; 977 } 978 979 if (op->code != INET_DIAG_BC_NOP) { 980 if (op->no < min_len || op->no > len + 4 || op->no & 3) 981 return -EINVAL; 982 if (op->no < len && 983 !valid_cc(bytecode, bytecode_len, len - op->no)) 984 return -EINVAL; 985 } 986 987 if (op->yes < min_len || op->yes > len + 4 || op->yes & 3) 988 return -EINVAL; 989 bc += op->yes; 990 len -= op->yes; 991 } 992 return len == 0 ? 0 : -EINVAL; 993 } 994 995 static void twsk_build_assert(void) 996 { 997 BUILD_BUG_ON(offsetof(struct inet_timewait_sock, tw_family) != 998 offsetof(struct sock, sk_family)); 999 1000 BUILD_BUG_ON(offsetof(struct inet_timewait_sock, tw_num) != 1001 offsetof(struct inet_sock, inet_num)); 1002 1003 BUILD_BUG_ON(offsetof(struct inet_timewait_sock, tw_dport) != 1004 offsetof(struct inet_sock, inet_dport)); 1005 1006 BUILD_BUG_ON(offsetof(struct inet_timewait_sock, tw_rcv_saddr) != 1007 offsetof(struct inet_sock, inet_rcv_saddr)); 1008 1009 BUILD_BUG_ON(offsetof(struct inet_timewait_sock, tw_daddr) != 1010 offsetof(struct inet_sock, inet_daddr)); 1011 1012 #if IS_ENABLED(CONFIG_IPV6) 1013 BUILD_BUG_ON(offsetof(struct inet_timewait_sock, tw_v6_rcv_saddr) != 1014 offsetof(struct sock, sk_v6_rcv_saddr)); 1015 1016 BUILD_BUG_ON(offsetof(struct inet_timewait_sock, tw_v6_daddr) != 1017 offsetof(struct sock, sk_v6_daddr)); 1018 #endif 1019 } 1020 1021 void inet_diag_dump_icsk(struct inet_hashinfo *hashinfo, struct sk_buff *skb, 1022 struct netlink_callback *cb, 1023 const struct inet_diag_req_v2 *r) 1024 { 1025 bool net_admin = netlink_net_capable(cb->skb, CAP_NET_ADMIN); 1026 struct inet_diag_dump_data *cb_data = cb->data; 1027 struct net *net = sock_net(skb->sk); 1028 u32 idiag_states = r->idiag_states; 1029 int i, num, s_i, s_num; 1030 struct nlattr *bc; 1031 struct sock *sk; 1032 1033 bc = cb_data->inet_diag_nla_bc; 1034 if (idiag_states & TCPF_SYN_RECV) 1035 idiag_states |= TCPF_NEW_SYN_RECV; 1036 s_i = cb->args[1]; 1037 s_num = num = cb->args[2]; 1038 1039 if (cb->args[0] == 0) { 1040 if (!(idiag_states & TCPF_LISTEN) || r->id.idiag_dport) 1041 goto skip_listen_ht; 1042 1043 for (i = s_i; i <= hashinfo->lhash2_mask; i++) { 1044 struct inet_listen_hashbucket *ilb; 1045 struct hlist_nulls_node *node; 1046 1047 num = 0; 1048 ilb = &hashinfo->lhash2[i]; 1049 1050 if (hlist_nulls_empty(&ilb->nulls_head)) { 1051 s_num = 0; 1052 continue; 1053 } 1054 spin_lock(&ilb->lock); 1055 sk_nulls_for_each(sk, node, &ilb->nulls_head) { 1056 struct inet_sock *inet = inet_sk(sk); 1057 1058 if (!net_eq(sock_net(sk), net)) 1059 continue; 1060 1061 if (num < s_num) { 1062 num++; 1063 continue; 1064 } 1065 1066 if (r->sdiag_family != AF_UNSPEC && 1067 sk->sk_family != r->sdiag_family) 1068 goto next_listen; 1069 1070 if (r->id.idiag_sport != inet->inet_sport && 1071 r->id.idiag_sport) 1072 goto next_listen; 1073 1074 if (!inet_diag_bc_sk(bc, sk)) 1075 goto next_listen; 1076 1077 if (inet_sk_diag_fill(sk, inet_csk(sk), skb, 1078 cb, r, NLM_F_MULTI, 1079 net_admin) < 0) { 1080 spin_unlock(&ilb->lock); 1081 goto done; 1082 } 1083 1084 next_listen: 1085 ++num; 1086 } 1087 spin_unlock(&ilb->lock); 1088 1089 s_num = 0; 1090 } 1091 skip_listen_ht: 1092 cb->args[0] = 1; 1093 s_i = num = s_num = 0; 1094 } 1095 1096 /* Process a maximum of SKARR_SZ sockets at a time when walking hash buckets 1097 * with bh disabled. 1098 */ 1099 #define SKARR_SZ 16 1100 1101 /* Dump bound but inactive (not listening, connecting, etc.) sockets */ 1102 if (cb->args[0] == 1) { 1103 if (!(idiag_states & TCPF_BOUND_INACTIVE)) 1104 goto skip_bind_ht; 1105 1106 for (i = s_i; i < hashinfo->bhash_size; i++) { 1107 struct inet_bind_hashbucket *ibb; 1108 struct inet_bind2_bucket *tb2; 1109 struct sock *sk_arr[SKARR_SZ]; 1110 int num_arr[SKARR_SZ]; 1111 int idx, accum, res; 1112 1113 resume_bind_walk: 1114 num = 0; 1115 accum = 0; 1116 ibb = &hashinfo->bhash2[i]; 1117 1118 if (hlist_empty(&ibb->chain)) { 1119 s_num = 0; 1120 continue; 1121 } 1122 spin_lock_bh(&ibb->lock); 1123 inet_bind_bucket_for_each(tb2, &ibb->chain) { 1124 if (!net_eq(ib2_net(tb2), net)) 1125 continue; 1126 1127 sk_for_each_bound(sk, &tb2->owners) { 1128 struct inet_sock *inet = inet_sk(sk); 1129 1130 if (num < s_num) 1131 goto next_bind; 1132 1133 if (sk->sk_state != TCP_CLOSE || 1134 !inet->inet_num) 1135 goto next_bind; 1136 1137 if (r->sdiag_family != AF_UNSPEC && 1138 r->sdiag_family != sk->sk_family) 1139 goto next_bind; 1140 1141 if (!inet_diag_bc_sk(bc, sk)) 1142 goto next_bind; 1143 1144 sock_hold(sk); 1145 num_arr[accum] = num; 1146 sk_arr[accum] = sk; 1147 if (++accum == SKARR_SZ) 1148 goto pause_bind_walk; 1149 next_bind: 1150 num++; 1151 } 1152 } 1153 pause_bind_walk: 1154 spin_unlock_bh(&ibb->lock); 1155 1156 res = 0; 1157 for (idx = 0; idx < accum; idx++) { 1158 if (res >= 0) { 1159 res = inet_sk_diag_fill(sk_arr[idx], 1160 NULL, skb, cb, 1161 r, NLM_F_MULTI, 1162 net_admin); 1163 if (res < 0) 1164 num = num_arr[idx]; 1165 } 1166 sock_put(sk_arr[idx]); 1167 } 1168 if (res < 0) 1169 goto done; 1170 1171 cond_resched(); 1172 1173 if (accum == SKARR_SZ) { 1174 s_num = num + 1; 1175 goto resume_bind_walk; 1176 } 1177 1178 s_num = 0; 1179 } 1180 skip_bind_ht: 1181 cb->args[0] = 2; 1182 s_i = num = s_num = 0; 1183 } 1184 1185 if (!(idiag_states & ~TCPF_LISTEN)) 1186 goto out; 1187 1188 for (i = s_i; i <= hashinfo->ehash_mask; i++) { 1189 struct inet_ehash_bucket *head = &hashinfo->ehash[i]; 1190 spinlock_t *lock = inet_ehash_lockp(hashinfo, i); 1191 struct hlist_nulls_node *node; 1192 struct sock *sk_arr[SKARR_SZ]; 1193 int num_arr[SKARR_SZ]; 1194 int idx, accum, res; 1195 1196 if (hlist_nulls_empty(&head->chain)) 1197 continue; 1198 1199 if (i > s_i) 1200 s_num = 0; 1201 1202 next_chunk: 1203 num = 0; 1204 accum = 0; 1205 spin_lock_bh(lock); 1206 sk_nulls_for_each(sk, node, &head->chain) { 1207 int state; 1208 1209 if (!net_eq(sock_net(sk), net)) 1210 continue; 1211 if (num < s_num) 1212 goto next_normal; 1213 state = (sk->sk_state == TCP_TIME_WAIT) ? 1214 READ_ONCE(inet_twsk(sk)->tw_substate) : sk->sk_state; 1215 if (!(idiag_states & (1 << state))) 1216 goto next_normal; 1217 if (r->sdiag_family != AF_UNSPEC && 1218 sk->sk_family != r->sdiag_family) 1219 goto next_normal; 1220 if (r->id.idiag_sport != htons(sk->sk_num) && 1221 r->id.idiag_sport) 1222 goto next_normal; 1223 if (r->id.idiag_dport != sk->sk_dport && 1224 r->id.idiag_dport) 1225 goto next_normal; 1226 twsk_build_assert(); 1227 1228 if (!inet_diag_bc_sk(bc, sk)) 1229 goto next_normal; 1230 1231 if (!refcount_inc_not_zero(&sk->sk_refcnt)) 1232 goto next_normal; 1233 1234 num_arr[accum] = num; 1235 sk_arr[accum] = sk; 1236 if (++accum == SKARR_SZ) 1237 break; 1238 next_normal: 1239 ++num; 1240 } 1241 spin_unlock_bh(lock); 1242 res = 0; 1243 for (idx = 0; idx < accum; idx++) { 1244 if (res >= 0) { 1245 res = sk_diag_fill(sk_arr[idx], skb, cb, r, 1246 NLM_F_MULTI, net_admin); 1247 if (res < 0) 1248 num = num_arr[idx]; 1249 } 1250 sock_gen_put(sk_arr[idx]); 1251 } 1252 if (res < 0) 1253 break; 1254 cond_resched(); 1255 if (accum == SKARR_SZ) { 1256 s_num = num + 1; 1257 goto next_chunk; 1258 } 1259 } 1260 1261 done: 1262 cb->args[1] = i; 1263 cb->args[2] = num; 1264 out: 1265 ; 1266 } 1267 EXPORT_SYMBOL_GPL(inet_diag_dump_icsk); 1268 1269 static int __inet_diag_dump(struct sk_buff *skb, struct netlink_callback *cb, 1270 const struct inet_diag_req_v2 *r) 1271 { 1272 struct inet_diag_dump_data *cb_data = cb->data; 1273 const struct inet_diag_handler *handler; 1274 u32 prev_min_dump_alloc; 1275 int protocol, err = 0; 1276 1277 protocol = inet_diag_get_protocol(r, cb_data); 1278 1279 again: 1280 prev_min_dump_alloc = cb->min_dump_alloc; 1281 handler = inet_diag_lock_handler(protocol); 1282 if (handler) { 1283 handler->dump(skb, cb, r); 1284 inet_diag_unlock_handler(handler); 1285 } else { 1286 err = -ENOENT; 1287 } 1288 /* The skb is not large enough to fit one sk info and 1289 * inet_sk_diag_fill() has requested for a larger skb. 1290 */ 1291 if (!skb->len && cb->min_dump_alloc > prev_min_dump_alloc) { 1292 err = pskb_expand_head(skb, 0, cb->min_dump_alloc, GFP_KERNEL); 1293 if (!err) 1294 goto again; 1295 } 1296 1297 return err ? : skb->len; 1298 } 1299 1300 static int inet_diag_dump(struct sk_buff *skb, struct netlink_callback *cb) 1301 { 1302 return __inet_diag_dump(skb, cb, nlmsg_data(cb->nlh)); 1303 } 1304 1305 static int __inet_diag_dump_start(struct netlink_callback *cb, int hdrlen) 1306 { 1307 const struct nlmsghdr *nlh = cb->nlh; 1308 struct inet_diag_dump_data *cb_data; 1309 struct sk_buff *skb = cb->skb; 1310 struct nlattr *nla; 1311 int err; 1312 1313 cb_data = kzalloc(sizeof(*cb_data), GFP_KERNEL); 1314 if (!cb_data) 1315 return -ENOMEM; 1316 1317 err = inet_diag_parse_attrs(nlh, hdrlen, cb_data->req_nlas); 1318 if (err) { 1319 kfree(cb_data); 1320 return err; 1321 } 1322 nla = cb_data->inet_diag_nla_bc; 1323 if (nla) { 1324 err = inet_diag_bc_audit(nla, skb); 1325 if (err) { 1326 kfree(cb_data); 1327 return err; 1328 } 1329 } 1330 1331 nla = cb_data->inet_diag_nla_bpf_stgs; 1332 if (nla) { 1333 struct bpf_sk_storage_diag *bpf_stg_diag; 1334 1335 bpf_stg_diag = bpf_sk_storage_diag_alloc(nla); 1336 if (IS_ERR(bpf_stg_diag)) { 1337 kfree(cb_data); 1338 return PTR_ERR(bpf_stg_diag); 1339 } 1340 cb_data->bpf_stg_diag = bpf_stg_diag; 1341 } 1342 1343 cb->data = cb_data; 1344 return 0; 1345 } 1346 1347 static int inet_diag_dump_start(struct netlink_callback *cb) 1348 { 1349 return __inet_diag_dump_start(cb, sizeof(struct inet_diag_req_v2)); 1350 } 1351 1352 static int inet_diag_dump_start_compat(struct netlink_callback *cb) 1353 { 1354 return __inet_diag_dump_start(cb, sizeof(struct inet_diag_req)); 1355 } 1356 1357 static int inet_diag_dump_done(struct netlink_callback *cb) 1358 { 1359 struct inet_diag_dump_data *cb_data = cb->data; 1360 1361 bpf_sk_storage_diag_free(cb_data->bpf_stg_diag); 1362 kfree(cb->data); 1363 1364 return 0; 1365 } 1366 1367 static int inet_diag_type2proto(int type) 1368 { 1369 switch (type) { 1370 case TCPDIAG_GETSOCK: 1371 return IPPROTO_TCP; 1372 case DCCPDIAG_GETSOCK: 1373 return IPPROTO_DCCP; 1374 default: 1375 return 0; 1376 } 1377 } 1378 1379 static int inet_diag_dump_compat(struct sk_buff *skb, 1380 struct netlink_callback *cb) 1381 { 1382 struct inet_diag_req *rc = nlmsg_data(cb->nlh); 1383 struct inet_diag_req_v2 req; 1384 1385 req.sdiag_family = AF_UNSPEC; /* compatibility */ 1386 req.sdiag_protocol = inet_diag_type2proto(cb->nlh->nlmsg_type); 1387 req.idiag_ext = rc->idiag_ext; 1388 req.pad = 0; 1389 req.idiag_states = rc->idiag_states; 1390 req.id = rc->id; 1391 1392 return __inet_diag_dump(skb, cb, &req); 1393 } 1394 1395 static int inet_diag_get_exact_compat(struct sk_buff *in_skb, 1396 const struct nlmsghdr *nlh) 1397 { 1398 struct inet_diag_req *rc = nlmsg_data(nlh); 1399 struct inet_diag_req_v2 req; 1400 1401 req.sdiag_family = rc->idiag_family; 1402 req.sdiag_protocol = inet_diag_type2proto(nlh->nlmsg_type); 1403 req.idiag_ext = rc->idiag_ext; 1404 req.pad = 0; 1405 req.idiag_states = rc->idiag_states; 1406 req.id = rc->id; 1407 1408 return inet_diag_cmd_exact(SOCK_DIAG_BY_FAMILY, in_skb, nlh, 1409 sizeof(struct inet_diag_req), &req); 1410 } 1411 1412 static int inet_diag_rcv_msg_compat(struct sk_buff *skb, struct nlmsghdr *nlh) 1413 { 1414 int hdrlen = sizeof(struct inet_diag_req); 1415 struct net *net = sock_net(skb->sk); 1416 1417 if (nlh->nlmsg_type >= INET_DIAG_GETSOCK_MAX || 1418 nlmsg_len(nlh) < hdrlen) 1419 return -EINVAL; 1420 1421 if (nlh->nlmsg_flags & NLM_F_DUMP) { 1422 struct netlink_dump_control c = { 1423 .start = inet_diag_dump_start_compat, 1424 .done = inet_diag_dump_done, 1425 .dump = inet_diag_dump_compat, 1426 }; 1427 return netlink_dump_start(net->diag_nlsk, skb, nlh, &c); 1428 } 1429 1430 return inet_diag_get_exact_compat(skb, nlh); 1431 } 1432 1433 static int inet_diag_handler_cmd(struct sk_buff *skb, struct nlmsghdr *h) 1434 { 1435 int hdrlen = sizeof(struct inet_diag_req_v2); 1436 struct net *net = sock_net(skb->sk); 1437 1438 if (nlmsg_len(h) < hdrlen) 1439 return -EINVAL; 1440 1441 if (h->nlmsg_type == SOCK_DIAG_BY_FAMILY && 1442 h->nlmsg_flags & NLM_F_DUMP) { 1443 struct netlink_dump_control c = { 1444 .start = inet_diag_dump_start, 1445 .done = inet_diag_dump_done, 1446 .dump = inet_diag_dump, 1447 }; 1448 return netlink_dump_start(net->diag_nlsk, skb, h, &c); 1449 } 1450 1451 return inet_diag_cmd_exact(h->nlmsg_type, skb, h, hdrlen, 1452 nlmsg_data(h)); 1453 } 1454 1455 static 1456 int inet_diag_handler_get_info(struct sk_buff *skb, struct sock *sk) 1457 { 1458 const struct inet_diag_handler *handler; 1459 struct nlmsghdr *nlh; 1460 struct nlattr *attr; 1461 struct inet_diag_msg *r; 1462 void *info = NULL; 1463 int err = 0; 1464 1465 nlh = nlmsg_put(skb, 0, 0, SOCK_DIAG_BY_FAMILY, sizeof(*r), 0); 1466 if (!nlh) 1467 return -ENOMEM; 1468 1469 r = nlmsg_data(nlh); 1470 memset(r, 0, sizeof(*r)); 1471 inet_diag_msg_common_fill(r, sk); 1472 if (sk->sk_type == SOCK_DGRAM || sk->sk_type == SOCK_STREAM) 1473 r->id.idiag_sport = inet_sk(sk)->inet_sport; 1474 r->idiag_state = sk->sk_state; 1475 1476 if ((err = nla_put_u8(skb, INET_DIAG_PROTOCOL, sk->sk_protocol))) { 1477 nlmsg_cancel(skb, nlh); 1478 return err; 1479 } 1480 1481 handler = inet_diag_lock_handler(sk->sk_protocol); 1482 if (!handler) { 1483 nlmsg_cancel(skb, nlh); 1484 return -ENOENT; 1485 } 1486 1487 attr = handler->idiag_info_size 1488 ? nla_reserve_64bit(skb, INET_DIAG_INFO, 1489 handler->idiag_info_size, 1490 INET_DIAG_PAD) 1491 : NULL; 1492 if (attr) 1493 info = nla_data(attr); 1494 1495 handler->idiag_get_info(sk, r, info); 1496 inet_diag_unlock_handler(handler); 1497 1498 nlmsg_end(skb, nlh); 1499 return 0; 1500 } 1501 1502 static const struct sock_diag_handler inet_diag_handler = { 1503 .owner = THIS_MODULE, 1504 .family = AF_INET, 1505 .dump = inet_diag_handler_cmd, 1506 .get_info = inet_diag_handler_get_info, 1507 .destroy = inet_diag_handler_cmd, 1508 }; 1509 1510 static const struct sock_diag_handler inet6_diag_handler = { 1511 .owner = THIS_MODULE, 1512 .family = AF_INET6, 1513 .dump = inet_diag_handler_cmd, 1514 .get_info = inet_diag_handler_get_info, 1515 .destroy = inet_diag_handler_cmd, 1516 }; 1517 1518 int inet_diag_register(const struct inet_diag_handler *h) 1519 { 1520 const __u16 type = h->idiag_type; 1521 1522 if (type >= IPPROTO_MAX) 1523 return -EINVAL; 1524 1525 return !cmpxchg((const struct inet_diag_handler **)&inet_diag_table[type], 1526 NULL, h) ? 0 : -EEXIST; 1527 } 1528 EXPORT_SYMBOL_GPL(inet_diag_register); 1529 1530 void inet_diag_unregister(const struct inet_diag_handler *h) 1531 { 1532 const __u16 type = h->idiag_type; 1533 1534 if (type >= IPPROTO_MAX) 1535 return; 1536 1537 xchg((const struct inet_diag_handler **)&inet_diag_table[type], 1538 NULL); 1539 } 1540 EXPORT_SYMBOL_GPL(inet_diag_unregister); 1541 1542 static const struct sock_diag_inet_compat inet_diag_compat = { 1543 .owner = THIS_MODULE, 1544 .fn = inet_diag_rcv_msg_compat, 1545 }; 1546 1547 static int __init inet_diag_init(void) 1548 { 1549 const int inet_diag_table_size = (IPPROTO_MAX * 1550 sizeof(struct inet_diag_handler *)); 1551 int err = -ENOMEM; 1552 1553 inet_diag_table = kzalloc(inet_diag_table_size, GFP_KERNEL); 1554 if (!inet_diag_table) 1555 goto out; 1556 1557 err = sock_diag_register(&inet_diag_handler); 1558 if (err) 1559 goto out_free_nl; 1560 1561 err = sock_diag_register(&inet6_diag_handler); 1562 if (err) 1563 goto out_free_inet; 1564 1565 sock_diag_register_inet_compat(&inet_diag_compat); 1566 out: 1567 return err; 1568 1569 out_free_inet: 1570 sock_diag_unregister(&inet_diag_handler); 1571 out_free_nl: 1572 kfree(inet_diag_table); 1573 goto out; 1574 } 1575 1576 static void __exit inet_diag_exit(void) 1577 { 1578 sock_diag_unregister(&inet6_diag_handler); 1579 sock_diag_unregister(&inet_diag_handler); 1580 sock_diag_unregister_inet_compat(&inet_diag_compat); 1581 kfree(inet_diag_table); 1582 } 1583 1584 module_init(inet_diag_init); 1585 module_exit(inet_diag_exit); 1586 MODULE_LICENSE("GPL"); 1587 MODULE_DESCRIPTION("INET/INET6: socket monitoring via SOCK_DIAG"); 1588 MODULE_ALIAS_NET_PF_PROTO_TYPE(PF_NETLINK, NETLINK_SOCK_DIAG, 2 /* AF_INET */); 1589 MODULE_ALIAS_NET_PF_PROTO_TYPE(PF_NETLINK, NETLINK_SOCK_DIAG, 10 /* AF_INET6 */); 1590