1 // SPDX-License-Identifier: GPL-2.0-or-later 2 /* 3 * inet_diag.c Module for monitoring INET transport protocols sockets. 4 * 5 * Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru> 6 */ 7 8 #include <linux/kernel.h> 9 #include <linux/module.h> 10 #include <linux/types.h> 11 #include <linux/fcntl.h> 12 #include <linux/random.h> 13 #include <linux/slab.h> 14 #include <linux/cache.h> 15 #include <linux/init.h> 16 #include <linux/time.h> 17 18 #include <net/icmp.h> 19 #include <net/tcp.h> 20 #include <net/ipv6.h> 21 #include <net/inet_common.h> 22 #include <net/inet_connection_sock.h> 23 #include <net/inet_hashtables.h> 24 #include <net/inet_timewait_sock.h> 25 #include <net/inet6_hashtables.h> 26 #include <net/bpf_sk_storage.h> 27 #include <net/netlink.h> 28 29 #include <linux/inet.h> 30 #include <linux/stddef.h> 31 32 #include <linux/inet_diag.h> 33 #include <linux/sock_diag.h> 34 35 static const struct inet_diag_handler __rcu **inet_diag_table; 36 37 struct inet_diag_entry { 38 const __be32 *saddr; 39 const __be32 *daddr; 40 u16 sport; 41 u16 dport; 42 u16 family; 43 u16 userlocks; 44 u32 ifindex; 45 u32 mark; 46 #ifdef CONFIG_SOCK_CGROUP_DATA 47 u64 cgroup_id; 48 #endif 49 }; 50 51 static const struct inet_diag_handler *inet_diag_lock_handler(int proto) 52 { 53 const struct inet_diag_handler *handler; 54 55 if (proto < 0 || proto >= IPPROTO_MAX) 56 return NULL; 57 58 if (!READ_ONCE(inet_diag_table[proto])) 59 sock_load_diag_module(AF_INET, proto); 60 61 rcu_read_lock(); 62 handler = rcu_dereference(inet_diag_table[proto]); 63 if (handler && !try_module_get(handler->owner)) 64 handler = NULL; 65 rcu_read_unlock(); 66 67 return handler; 68 } 69 70 static void inet_diag_unlock_handler(const struct inet_diag_handler *handler) 71 { 72 module_put(handler->owner); 73 } 74 75 void inet_diag_msg_common_fill(struct inet_diag_msg *r, struct sock *sk) 76 { 77 r->idiag_family = sk->sk_family; 78 79 r->id.idiag_sport = htons(sk->sk_num); 80 r->id.idiag_dport = sk->sk_dport; 81 r->id.idiag_if = sk->sk_bound_dev_if; 82 sock_diag_save_cookie(sk, r->id.idiag_cookie); 83 84 #if IS_ENABLED(CONFIG_IPV6) 85 if (sk->sk_family == AF_INET6) { 86 *(struct in6_addr *)r->id.idiag_src = sk->sk_v6_rcv_saddr; 87 *(struct in6_addr *)r->id.idiag_dst = sk->sk_v6_daddr; 88 } else 89 #endif 90 { 91 memset(&r->id.idiag_src, 0, sizeof(r->id.idiag_src)); 92 memset(&r->id.idiag_dst, 0, sizeof(r->id.idiag_dst)); 93 94 r->id.idiag_src[0] = sk->sk_rcv_saddr; 95 r->id.idiag_dst[0] = sk->sk_daddr; 96 } 97 } 98 EXPORT_SYMBOL_GPL(inet_diag_msg_common_fill); 99 100 static size_t inet_sk_attr_size(struct sock *sk, 101 const struct inet_diag_req_v2 *req, 102 bool net_admin) 103 { 104 const struct inet_diag_handler *handler; 105 size_t aux = 0; 106 107 rcu_read_lock(); 108 handler = rcu_dereference(inet_diag_table[req->sdiag_protocol]); 109 DEBUG_NET_WARN_ON_ONCE(!handler); 110 if (handler && handler->idiag_get_aux_size) 111 aux = handler->idiag_get_aux_size(sk, net_admin); 112 rcu_read_unlock(); 113 114 return nla_total_size(sizeof(struct tcp_info)) 115 + nla_total_size(sizeof(struct inet_diag_msg)) 116 + inet_diag_msg_attrs_size() 117 + nla_total_size(sizeof(struct inet_diag_meminfo)) 118 + nla_total_size(SK_MEMINFO_VARS * sizeof(u32)) 119 + nla_total_size(TCP_CA_NAME_MAX) 120 + nla_total_size(sizeof(struct tcpvegas_info)) 121 + aux 122 + 64; 123 } 124 125 int inet_diag_msg_attrs_fill(struct sock *sk, struct sk_buff *skb, 126 struct inet_diag_msg *r, int ext, 127 struct user_namespace *user_ns, 128 bool net_admin) 129 { 130 const struct inet_sock *inet = inet_sk(sk); 131 struct inet_diag_sockopt inet_sockopt; 132 133 if (nla_put_u8(skb, INET_DIAG_SHUTDOWN, sk->sk_shutdown)) 134 goto errout; 135 136 /* IPv6 dual-stack sockets use inet->tos for IPv4 connections, 137 * hence this needs to be included regardless of socket family. 138 */ 139 if (ext & (1 << (INET_DIAG_TOS - 1))) 140 if (nla_put_u8(skb, INET_DIAG_TOS, READ_ONCE(inet->tos)) < 0) 141 goto errout; 142 143 #if IS_ENABLED(CONFIG_IPV6) 144 if (r->idiag_family == AF_INET6) { 145 if (ext & (1 << (INET_DIAG_TCLASS - 1))) 146 if (nla_put_u8(skb, INET_DIAG_TCLASS, 147 inet6_sk(sk)->tclass) < 0) 148 goto errout; 149 150 if (((1 << sk->sk_state) & (TCPF_LISTEN | TCPF_CLOSE)) && 151 nla_put_u8(skb, INET_DIAG_SKV6ONLY, ipv6_only_sock(sk))) 152 goto errout; 153 } 154 #endif 155 156 if (net_admin && nla_put_u32(skb, INET_DIAG_MARK, READ_ONCE(sk->sk_mark))) 157 goto errout; 158 159 if (ext & (1 << (INET_DIAG_CLASS_ID - 1)) || 160 ext & (1 << (INET_DIAG_TCLASS - 1))) { 161 u32 classid = 0; 162 163 #ifdef CONFIG_SOCK_CGROUP_DATA 164 classid = sock_cgroup_classid(&sk->sk_cgrp_data); 165 #endif 166 /* Fallback to socket priority if class id isn't set. 167 * Classful qdiscs use it as direct reference to class. 168 * For cgroup2 classid is always zero. 169 */ 170 if (!classid) 171 classid = READ_ONCE(sk->sk_priority); 172 173 if (nla_put_u32(skb, INET_DIAG_CLASS_ID, classid)) 174 goto errout; 175 } 176 177 #ifdef CONFIG_SOCK_CGROUP_DATA 178 if (nla_put_u64_64bit(skb, INET_DIAG_CGROUP_ID, 179 cgroup_id(sock_cgroup_ptr(&sk->sk_cgrp_data)), 180 INET_DIAG_PAD)) 181 goto errout; 182 #endif 183 184 r->idiag_uid = from_kuid_munged(user_ns, sock_i_uid(sk)); 185 r->idiag_inode = sock_i_ino(sk); 186 187 memset(&inet_sockopt, 0, sizeof(inet_sockopt)); 188 inet_sockopt.recverr = inet_test_bit(RECVERR, sk); 189 inet_sockopt.is_icsk = inet_test_bit(IS_ICSK, sk); 190 inet_sockopt.freebind = inet_test_bit(FREEBIND, sk); 191 inet_sockopt.hdrincl = inet_test_bit(HDRINCL, sk); 192 inet_sockopt.mc_loop = inet_test_bit(MC_LOOP, sk); 193 inet_sockopt.transparent = inet_test_bit(TRANSPARENT, sk); 194 inet_sockopt.mc_all = inet_test_bit(MC_ALL, sk); 195 inet_sockopt.nodefrag = inet_test_bit(NODEFRAG, sk); 196 inet_sockopt.bind_address_no_port = inet_test_bit(BIND_ADDRESS_NO_PORT, sk); 197 inet_sockopt.recverr_rfc4884 = inet_test_bit(RECVERR_RFC4884, sk); 198 inet_sockopt.defer_connect = inet_test_bit(DEFER_CONNECT, sk); 199 if (nla_put(skb, INET_DIAG_SOCKOPT, sizeof(inet_sockopt), 200 &inet_sockopt)) 201 goto errout; 202 203 return 0; 204 errout: 205 return 1; 206 } 207 EXPORT_SYMBOL_GPL(inet_diag_msg_attrs_fill); 208 209 static int inet_diag_parse_attrs(const struct nlmsghdr *nlh, int hdrlen, 210 struct nlattr **req_nlas) 211 { 212 struct nlattr *nla; 213 int remaining; 214 215 nlmsg_for_each_attr(nla, nlh, hdrlen, remaining) { 216 int type = nla_type(nla); 217 218 if (type == INET_DIAG_REQ_PROTOCOL && nla_len(nla) != sizeof(u32)) 219 return -EINVAL; 220 221 if (type < __INET_DIAG_REQ_MAX) 222 req_nlas[type] = nla; 223 } 224 return 0; 225 } 226 227 static int inet_diag_get_protocol(const struct inet_diag_req_v2 *req, 228 const struct inet_diag_dump_data *data) 229 { 230 if (data->req_nlas[INET_DIAG_REQ_PROTOCOL]) 231 return nla_get_u32(data->req_nlas[INET_DIAG_REQ_PROTOCOL]); 232 return req->sdiag_protocol; 233 } 234 235 #define MAX_DUMP_ALLOC_SIZE (KMALLOC_MAX_SIZE - SKB_DATA_ALIGN(sizeof(struct skb_shared_info))) 236 237 int inet_sk_diag_fill(struct sock *sk, struct inet_connection_sock *icsk, 238 struct sk_buff *skb, struct netlink_callback *cb, 239 const struct inet_diag_req_v2 *req, 240 u16 nlmsg_flags, bool net_admin) 241 { 242 const struct tcp_congestion_ops *ca_ops; 243 const struct inet_diag_handler *handler; 244 struct inet_diag_dump_data *cb_data; 245 int ext = req->idiag_ext; 246 struct inet_diag_msg *r; 247 struct nlmsghdr *nlh; 248 struct nlattr *attr; 249 void *info = NULL; 250 int protocol; 251 252 cb_data = cb->data; 253 protocol = inet_diag_get_protocol(req, cb_data); 254 255 /* inet_diag_lock_handler() made sure inet_diag_table[] is stable. */ 256 handler = rcu_dereference_protected(inet_diag_table[protocol], 1); 257 DEBUG_NET_WARN_ON_ONCE(!handler); 258 if (!handler) 259 return -ENXIO; 260 261 nlh = nlmsg_put(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, 262 cb->nlh->nlmsg_type, sizeof(*r), nlmsg_flags); 263 if (!nlh) 264 return -EMSGSIZE; 265 266 r = nlmsg_data(nlh); 267 BUG_ON(!sk_fullsock(sk)); 268 269 inet_diag_msg_common_fill(r, sk); 270 r->idiag_state = sk->sk_state; 271 r->idiag_timer = 0; 272 r->idiag_retrans = 0; 273 r->idiag_expires = 0; 274 275 if (inet_diag_msg_attrs_fill(sk, skb, r, ext, 276 sk_user_ns(NETLINK_CB(cb->skb).sk), 277 net_admin)) 278 goto errout; 279 280 if (ext & (1 << (INET_DIAG_MEMINFO - 1))) { 281 struct inet_diag_meminfo minfo = { 282 .idiag_rmem = sk_rmem_alloc_get(sk), 283 .idiag_wmem = READ_ONCE(sk->sk_wmem_queued), 284 .idiag_fmem = sk_forward_alloc_get(sk), 285 .idiag_tmem = sk_wmem_alloc_get(sk), 286 }; 287 288 if (nla_put(skb, INET_DIAG_MEMINFO, sizeof(minfo), &minfo) < 0) 289 goto errout; 290 } 291 292 if (ext & (1 << (INET_DIAG_SKMEMINFO - 1))) 293 if (sock_diag_put_meminfo(sk, skb, INET_DIAG_SKMEMINFO)) 294 goto errout; 295 296 /* 297 * RAW sockets might have user-defined protocols assigned, 298 * so report the one supplied on socket creation. 299 */ 300 if (sk->sk_type == SOCK_RAW) { 301 if (nla_put_u8(skb, INET_DIAG_PROTOCOL, sk->sk_protocol)) 302 goto errout; 303 } 304 305 if (!icsk) { 306 handler->idiag_get_info(sk, r, NULL); 307 goto out; 308 } 309 310 if (icsk->icsk_pending == ICSK_TIME_RETRANS || 311 icsk->icsk_pending == ICSK_TIME_REO_TIMEOUT || 312 icsk->icsk_pending == ICSK_TIME_LOSS_PROBE) { 313 r->idiag_timer = 1; 314 r->idiag_retrans = icsk->icsk_retransmits; 315 r->idiag_expires = 316 jiffies_delta_to_msecs(icsk->icsk_timeout - jiffies); 317 } else if (icsk->icsk_pending == ICSK_TIME_PROBE0) { 318 r->idiag_timer = 4; 319 r->idiag_retrans = icsk->icsk_probes_out; 320 r->idiag_expires = 321 jiffies_delta_to_msecs(icsk->icsk_timeout - jiffies); 322 } else if (timer_pending(&sk->sk_timer)) { 323 r->idiag_timer = 2; 324 r->idiag_retrans = icsk->icsk_probes_out; 325 r->idiag_expires = 326 jiffies_delta_to_msecs(sk->sk_timer.expires - jiffies); 327 } 328 329 if ((ext & (1 << (INET_DIAG_INFO - 1))) && handler->idiag_info_size) { 330 attr = nla_reserve_64bit(skb, INET_DIAG_INFO, 331 handler->idiag_info_size, 332 INET_DIAG_PAD); 333 if (!attr) 334 goto errout; 335 336 info = nla_data(attr); 337 } 338 339 if (ext & (1 << (INET_DIAG_CONG - 1))) { 340 int err = 0; 341 342 rcu_read_lock(); 343 ca_ops = READ_ONCE(icsk->icsk_ca_ops); 344 if (ca_ops) 345 err = nla_put_string(skb, INET_DIAG_CONG, ca_ops->name); 346 rcu_read_unlock(); 347 if (err < 0) 348 goto errout; 349 } 350 351 handler->idiag_get_info(sk, r, info); 352 353 if (ext & (1 << (INET_DIAG_INFO - 1)) && handler->idiag_get_aux) 354 if (handler->idiag_get_aux(sk, net_admin, skb) < 0) 355 goto errout; 356 357 if (sk->sk_state < TCP_TIME_WAIT) { 358 union tcp_cc_info info; 359 size_t sz = 0; 360 int attr; 361 362 rcu_read_lock(); 363 ca_ops = READ_ONCE(icsk->icsk_ca_ops); 364 if (ca_ops && ca_ops->get_info) 365 sz = ca_ops->get_info(sk, ext, &attr, &info); 366 rcu_read_unlock(); 367 if (sz && nla_put(skb, attr, sz, &info) < 0) 368 goto errout; 369 } 370 371 /* Keep it at the end for potential retry with a larger skb, 372 * or else do best-effort fitting, which is only done for the 373 * first_nlmsg. 374 */ 375 if (cb_data->bpf_stg_diag) { 376 bool first_nlmsg = ((unsigned char *)nlh == skb->data); 377 unsigned int prev_min_dump_alloc; 378 unsigned int total_nla_size = 0; 379 unsigned int msg_len; 380 int err; 381 382 msg_len = skb_tail_pointer(skb) - (unsigned char *)nlh; 383 err = bpf_sk_storage_diag_put(cb_data->bpf_stg_diag, sk, skb, 384 INET_DIAG_SK_BPF_STORAGES, 385 &total_nla_size); 386 387 if (!err) 388 goto out; 389 390 total_nla_size += msg_len; 391 prev_min_dump_alloc = cb->min_dump_alloc; 392 if (total_nla_size > prev_min_dump_alloc) 393 cb->min_dump_alloc = min_t(u32, total_nla_size, 394 MAX_DUMP_ALLOC_SIZE); 395 396 if (!first_nlmsg) 397 goto errout; 398 399 if (cb->min_dump_alloc > prev_min_dump_alloc) 400 /* Retry with pskb_expand_head() with 401 * __GFP_DIRECT_RECLAIM 402 */ 403 goto errout; 404 405 WARN_ON_ONCE(total_nla_size <= prev_min_dump_alloc); 406 407 /* Send what we have for this sk 408 * and move on to the next sk in the following 409 * dump() 410 */ 411 } 412 413 out: 414 nlmsg_end(skb, nlh); 415 return 0; 416 417 errout: 418 nlmsg_cancel(skb, nlh); 419 return -EMSGSIZE; 420 } 421 EXPORT_SYMBOL_GPL(inet_sk_diag_fill); 422 423 static int inet_twsk_diag_fill(struct sock *sk, 424 struct sk_buff *skb, 425 struct netlink_callback *cb, 426 u16 nlmsg_flags, bool net_admin) 427 { 428 struct inet_timewait_sock *tw = inet_twsk(sk); 429 struct inet_diag_msg *r; 430 struct nlmsghdr *nlh; 431 long tmo; 432 433 nlh = nlmsg_put(skb, NETLINK_CB(cb->skb).portid, 434 cb->nlh->nlmsg_seq, cb->nlh->nlmsg_type, 435 sizeof(*r), nlmsg_flags); 436 if (!nlh) 437 return -EMSGSIZE; 438 439 r = nlmsg_data(nlh); 440 BUG_ON(tw->tw_state != TCP_TIME_WAIT); 441 442 inet_diag_msg_common_fill(r, sk); 443 r->idiag_retrans = 0; 444 445 r->idiag_state = tw->tw_substate; 446 r->idiag_timer = 3; 447 tmo = tw->tw_timer.expires - jiffies; 448 r->idiag_expires = jiffies_delta_to_msecs(tmo); 449 r->idiag_rqueue = 0; 450 r->idiag_wqueue = 0; 451 r->idiag_uid = 0; 452 r->idiag_inode = 0; 453 454 if (net_admin && nla_put_u32(skb, INET_DIAG_MARK, 455 tw->tw_mark)) { 456 nlmsg_cancel(skb, nlh); 457 return -EMSGSIZE; 458 } 459 460 nlmsg_end(skb, nlh); 461 return 0; 462 } 463 464 static int inet_req_diag_fill(struct sock *sk, struct sk_buff *skb, 465 struct netlink_callback *cb, 466 u16 nlmsg_flags, bool net_admin) 467 { 468 struct request_sock *reqsk = inet_reqsk(sk); 469 struct inet_diag_msg *r; 470 struct nlmsghdr *nlh; 471 long tmo; 472 473 nlh = nlmsg_put(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, 474 cb->nlh->nlmsg_type, sizeof(*r), nlmsg_flags); 475 if (!nlh) 476 return -EMSGSIZE; 477 478 r = nlmsg_data(nlh); 479 inet_diag_msg_common_fill(r, sk); 480 r->idiag_state = TCP_SYN_RECV; 481 r->idiag_timer = 1; 482 r->idiag_retrans = reqsk->num_retrans; 483 484 BUILD_BUG_ON(offsetof(struct inet_request_sock, ir_cookie) != 485 offsetof(struct sock, sk_cookie)); 486 487 tmo = inet_reqsk(sk)->rsk_timer.expires - jiffies; 488 r->idiag_expires = jiffies_delta_to_msecs(tmo); 489 r->idiag_rqueue = 0; 490 r->idiag_wqueue = 0; 491 r->idiag_uid = 0; 492 r->idiag_inode = 0; 493 494 if (net_admin && nla_put_u32(skb, INET_DIAG_MARK, 495 inet_rsk(reqsk)->ir_mark)) { 496 nlmsg_cancel(skb, nlh); 497 return -EMSGSIZE; 498 } 499 500 nlmsg_end(skb, nlh); 501 return 0; 502 } 503 504 static int sk_diag_fill(struct sock *sk, struct sk_buff *skb, 505 struct netlink_callback *cb, 506 const struct inet_diag_req_v2 *r, 507 u16 nlmsg_flags, bool net_admin) 508 { 509 if (sk->sk_state == TCP_TIME_WAIT) 510 return inet_twsk_diag_fill(sk, skb, cb, nlmsg_flags, net_admin); 511 512 if (sk->sk_state == TCP_NEW_SYN_RECV) 513 return inet_req_diag_fill(sk, skb, cb, nlmsg_flags, net_admin); 514 515 return inet_sk_diag_fill(sk, inet_csk(sk), skb, cb, r, nlmsg_flags, 516 net_admin); 517 } 518 519 struct sock *inet_diag_find_one_icsk(struct net *net, 520 struct inet_hashinfo *hashinfo, 521 const struct inet_diag_req_v2 *req) 522 { 523 struct sock *sk; 524 525 rcu_read_lock(); 526 if (req->sdiag_family == AF_INET) 527 sk = inet_lookup(net, hashinfo, NULL, 0, req->id.idiag_dst[0], 528 req->id.idiag_dport, req->id.idiag_src[0], 529 req->id.idiag_sport, req->id.idiag_if); 530 #if IS_ENABLED(CONFIG_IPV6) 531 else if (req->sdiag_family == AF_INET6) { 532 if (ipv6_addr_v4mapped((struct in6_addr *)req->id.idiag_dst) && 533 ipv6_addr_v4mapped((struct in6_addr *)req->id.idiag_src)) 534 sk = inet_lookup(net, hashinfo, NULL, 0, req->id.idiag_dst[3], 535 req->id.idiag_dport, req->id.idiag_src[3], 536 req->id.idiag_sport, req->id.idiag_if); 537 else 538 sk = inet6_lookup(net, hashinfo, NULL, 0, 539 (struct in6_addr *)req->id.idiag_dst, 540 req->id.idiag_dport, 541 (struct in6_addr *)req->id.idiag_src, 542 req->id.idiag_sport, 543 req->id.idiag_if); 544 } 545 #endif 546 else { 547 rcu_read_unlock(); 548 return ERR_PTR(-EINVAL); 549 } 550 rcu_read_unlock(); 551 if (!sk) 552 return ERR_PTR(-ENOENT); 553 554 if (sock_diag_check_cookie(sk, req->id.idiag_cookie)) { 555 sock_gen_put(sk); 556 return ERR_PTR(-ENOENT); 557 } 558 559 return sk; 560 } 561 EXPORT_SYMBOL_GPL(inet_diag_find_one_icsk); 562 563 int inet_diag_dump_one_icsk(struct inet_hashinfo *hashinfo, 564 struct netlink_callback *cb, 565 const struct inet_diag_req_v2 *req) 566 { 567 struct sk_buff *in_skb = cb->skb; 568 bool net_admin = netlink_net_capable(in_skb, CAP_NET_ADMIN); 569 struct net *net = sock_net(in_skb->sk); 570 struct sk_buff *rep; 571 struct sock *sk; 572 int err; 573 574 sk = inet_diag_find_one_icsk(net, hashinfo, req); 575 if (IS_ERR(sk)) 576 return PTR_ERR(sk); 577 578 rep = nlmsg_new(inet_sk_attr_size(sk, req, net_admin), GFP_KERNEL); 579 if (!rep) { 580 err = -ENOMEM; 581 goto out; 582 } 583 584 err = sk_diag_fill(sk, rep, cb, req, 0, net_admin); 585 if (err < 0) { 586 WARN_ON(err == -EMSGSIZE); 587 nlmsg_free(rep); 588 goto out; 589 } 590 err = nlmsg_unicast(net->diag_nlsk, rep, NETLINK_CB(in_skb).portid); 591 592 out: 593 if (sk) 594 sock_gen_put(sk); 595 596 return err; 597 } 598 EXPORT_SYMBOL_GPL(inet_diag_dump_one_icsk); 599 600 static int inet_diag_cmd_exact(int cmd, struct sk_buff *in_skb, 601 const struct nlmsghdr *nlh, 602 int hdrlen, 603 const struct inet_diag_req_v2 *req) 604 { 605 const struct inet_diag_handler *handler; 606 struct inet_diag_dump_data dump_data; 607 int err, protocol; 608 609 memset(&dump_data, 0, sizeof(dump_data)); 610 err = inet_diag_parse_attrs(nlh, hdrlen, dump_data.req_nlas); 611 if (err) 612 return err; 613 614 protocol = inet_diag_get_protocol(req, &dump_data); 615 616 handler = inet_diag_lock_handler(protocol); 617 if (!handler) 618 return -ENOENT; 619 620 if (cmd == SOCK_DIAG_BY_FAMILY) { 621 struct netlink_callback cb = { 622 .nlh = nlh, 623 .skb = in_skb, 624 .data = &dump_data, 625 }; 626 err = handler->dump_one(&cb, req); 627 } else if (cmd == SOCK_DESTROY && handler->destroy) { 628 err = handler->destroy(in_skb, req); 629 } else { 630 err = -EOPNOTSUPP; 631 } 632 inet_diag_unlock_handler(handler); 633 634 return err; 635 } 636 637 static int bitstring_match(const __be32 *a1, const __be32 *a2, int bits) 638 { 639 int words = bits >> 5; 640 641 bits &= 0x1f; 642 643 if (words) { 644 if (memcmp(a1, a2, words << 2)) 645 return 0; 646 } 647 if (bits) { 648 __be32 w1, w2; 649 __be32 mask; 650 651 w1 = a1[words]; 652 w2 = a2[words]; 653 654 mask = htonl((0xffffffff) << (32 - bits)); 655 656 if ((w1 ^ w2) & mask) 657 return 0; 658 } 659 660 return 1; 661 } 662 663 static int inet_diag_bc_run(const struct nlattr *_bc, 664 const struct inet_diag_entry *entry) 665 { 666 const void *bc = nla_data(_bc); 667 int len = nla_len(_bc); 668 669 while (len > 0) { 670 int yes = 1; 671 const struct inet_diag_bc_op *op = bc; 672 673 switch (op->code) { 674 case INET_DIAG_BC_NOP: 675 break; 676 case INET_DIAG_BC_JMP: 677 yes = 0; 678 break; 679 case INET_DIAG_BC_S_EQ: 680 yes = entry->sport == op[1].no; 681 break; 682 case INET_DIAG_BC_S_GE: 683 yes = entry->sport >= op[1].no; 684 break; 685 case INET_DIAG_BC_S_LE: 686 yes = entry->sport <= op[1].no; 687 break; 688 case INET_DIAG_BC_D_EQ: 689 yes = entry->dport == op[1].no; 690 break; 691 case INET_DIAG_BC_D_GE: 692 yes = entry->dport >= op[1].no; 693 break; 694 case INET_DIAG_BC_D_LE: 695 yes = entry->dport <= op[1].no; 696 break; 697 case INET_DIAG_BC_AUTO: 698 yes = !(entry->userlocks & SOCK_BINDPORT_LOCK); 699 break; 700 case INET_DIAG_BC_S_COND: 701 case INET_DIAG_BC_D_COND: { 702 const struct inet_diag_hostcond *cond; 703 const __be32 *addr; 704 705 cond = (const struct inet_diag_hostcond *)(op + 1); 706 if (cond->port != -1 && 707 cond->port != (op->code == INET_DIAG_BC_S_COND ? 708 entry->sport : entry->dport)) { 709 yes = 0; 710 break; 711 } 712 713 if (op->code == INET_DIAG_BC_S_COND) 714 addr = entry->saddr; 715 else 716 addr = entry->daddr; 717 718 if (cond->family != AF_UNSPEC && 719 cond->family != entry->family) { 720 if (entry->family == AF_INET6 && 721 cond->family == AF_INET) { 722 if (addr[0] == 0 && addr[1] == 0 && 723 addr[2] == htonl(0xffff) && 724 bitstring_match(addr + 3, 725 cond->addr, 726 cond->prefix_len)) 727 break; 728 } 729 yes = 0; 730 break; 731 } 732 733 if (cond->prefix_len == 0) 734 break; 735 if (bitstring_match(addr, cond->addr, 736 cond->prefix_len)) 737 break; 738 yes = 0; 739 break; 740 } 741 case INET_DIAG_BC_DEV_COND: { 742 u32 ifindex; 743 744 ifindex = *((const u32 *)(op + 1)); 745 if (ifindex != entry->ifindex) 746 yes = 0; 747 break; 748 } 749 case INET_DIAG_BC_MARK_COND: { 750 struct inet_diag_markcond *cond; 751 752 cond = (struct inet_diag_markcond *)(op + 1); 753 if ((entry->mark & cond->mask) != cond->mark) 754 yes = 0; 755 break; 756 } 757 #ifdef CONFIG_SOCK_CGROUP_DATA 758 case INET_DIAG_BC_CGROUP_COND: { 759 u64 cgroup_id; 760 761 cgroup_id = get_unaligned((const u64 *)(op + 1)); 762 if (cgroup_id != entry->cgroup_id) 763 yes = 0; 764 break; 765 } 766 #endif 767 } 768 769 if (yes) { 770 len -= op->yes; 771 bc += op->yes; 772 } else { 773 len -= op->no; 774 bc += op->no; 775 } 776 } 777 return len == 0; 778 } 779 780 /* This helper is available for all sockets (ESTABLISH, TIMEWAIT, SYN_RECV) 781 */ 782 static void entry_fill_addrs(struct inet_diag_entry *entry, 783 const struct sock *sk) 784 { 785 #if IS_ENABLED(CONFIG_IPV6) 786 if (sk->sk_family == AF_INET6) { 787 entry->saddr = sk->sk_v6_rcv_saddr.s6_addr32; 788 entry->daddr = sk->sk_v6_daddr.s6_addr32; 789 } else 790 #endif 791 { 792 entry->saddr = &sk->sk_rcv_saddr; 793 entry->daddr = &sk->sk_daddr; 794 } 795 } 796 797 int inet_diag_bc_sk(const struct nlattr *bc, struct sock *sk) 798 { 799 struct inet_sock *inet = inet_sk(sk); 800 struct inet_diag_entry entry; 801 802 if (!bc) 803 return 1; 804 805 entry.family = sk->sk_family; 806 entry_fill_addrs(&entry, sk); 807 entry.sport = inet->inet_num; 808 entry.dport = ntohs(inet->inet_dport); 809 entry.ifindex = sk->sk_bound_dev_if; 810 entry.userlocks = sk_fullsock(sk) ? sk->sk_userlocks : 0; 811 if (sk_fullsock(sk)) 812 entry.mark = READ_ONCE(sk->sk_mark); 813 else if (sk->sk_state == TCP_NEW_SYN_RECV) 814 entry.mark = inet_rsk(inet_reqsk(sk))->ir_mark; 815 else if (sk->sk_state == TCP_TIME_WAIT) 816 entry.mark = inet_twsk(sk)->tw_mark; 817 else 818 entry.mark = 0; 819 #ifdef CONFIG_SOCK_CGROUP_DATA 820 entry.cgroup_id = sk_fullsock(sk) ? 821 cgroup_id(sock_cgroup_ptr(&sk->sk_cgrp_data)) : 0; 822 #endif 823 824 return inet_diag_bc_run(bc, &entry); 825 } 826 EXPORT_SYMBOL_GPL(inet_diag_bc_sk); 827 828 static int valid_cc(const void *bc, int len, int cc) 829 { 830 while (len >= 0) { 831 const struct inet_diag_bc_op *op = bc; 832 833 if (cc > len) 834 return 0; 835 if (cc == len) 836 return 1; 837 if (op->yes < 4 || op->yes & 3) 838 return 0; 839 len -= op->yes; 840 bc += op->yes; 841 } 842 return 0; 843 } 844 845 /* data is u32 ifindex */ 846 static bool valid_devcond(const struct inet_diag_bc_op *op, int len, 847 int *min_len) 848 { 849 /* Check ifindex space. */ 850 *min_len += sizeof(u32); 851 if (len < *min_len) 852 return false; 853 854 return true; 855 } 856 /* Validate an inet_diag_hostcond. */ 857 static bool valid_hostcond(const struct inet_diag_bc_op *op, int len, 858 int *min_len) 859 { 860 struct inet_diag_hostcond *cond; 861 int addr_len; 862 863 /* Check hostcond space. */ 864 *min_len += sizeof(struct inet_diag_hostcond); 865 if (len < *min_len) 866 return false; 867 cond = (struct inet_diag_hostcond *)(op + 1); 868 869 /* Check address family and address length. */ 870 switch (cond->family) { 871 case AF_UNSPEC: 872 addr_len = 0; 873 break; 874 case AF_INET: 875 addr_len = sizeof(struct in_addr); 876 break; 877 case AF_INET6: 878 addr_len = sizeof(struct in6_addr); 879 break; 880 default: 881 return false; 882 } 883 *min_len += addr_len; 884 if (len < *min_len) 885 return false; 886 887 /* Check prefix length (in bits) vs address length (in bytes). */ 888 if (cond->prefix_len > 8 * addr_len) 889 return false; 890 891 return true; 892 } 893 894 /* Validate a port comparison operator. */ 895 static bool valid_port_comparison(const struct inet_diag_bc_op *op, 896 int len, int *min_len) 897 { 898 /* Port comparisons put the port in a follow-on inet_diag_bc_op. */ 899 *min_len += sizeof(struct inet_diag_bc_op); 900 if (len < *min_len) 901 return false; 902 return true; 903 } 904 905 static bool valid_markcond(const struct inet_diag_bc_op *op, int len, 906 int *min_len) 907 { 908 *min_len += sizeof(struct inet_diag_markcond); 909 return len >= *min_len; 910 } 911 912 #ifdef CONFIG_SOCK_CGROUP_DATA 913 static bool valid_cgroupcond(const struct inet_diag_bc_op *op, int len, 914 int *min_len) 915 { 916 *min_len += sizeof(u64); 917 return len >= *min_len; 918 } 919 #endif 920 921 static int inet_diag_bc_audit(const struct nlattr *attr, 922 const struct sk_buff *skb) 923 { 924 bool net_admin = netlink_net_capable(skb, CAP_NET_ADMIN); 925 const void *bytecode, *bc; 926 int bytecode_len, len; 927 928 if (!attr || nla_len(attr) < sizeof(struct inet_diag_bc_op)) 929 return -EINVAL; 930 931 bytecode = bc = nla_data(attr); 932 len = bytecode_len = nla_len(attr); 933 934 while (len > 0) { 935 int min_len = sizeof(struct inet_diag_bc_op); 936 const struct inet_diag_bc_op *op = bc; 937 938 switch (op->code) { 939 case INET_DIAG_BC_S_COND: 940 case INET_DIAG_BC_D_COND: 941 if (!valid_hostcond(bc, len, &min_len)) 942 return -EINVAL; 943 break; 944 case INET_DIAG_BC_DEV_COND: 945 if (!valid_devcond(bc, len, &min_len)) 946 return -EINVAL; 947 break; 948 case INET_DIAG_BC_S_EQ: 949 case INET_DIAG_BC_S_GE: 950 case INET_DIAG_BC_S_LE: 951 case INET_DIAG_BC_D_EQ: 952 case INET_DIAG_BC_D_GE: 953 case INET_DIAG_BC_D_LE: 954 if (!valid_port_comparison(bc, len, &min_len)) 955 return -EINVAL; 956 break; 957 case INET_DIAG_BC_MARK_COND: 958 if (!net_admin) 959 return -EPERM; 960 if (!valid_markcond(bc, len, &min_len)) 961 return -EINVAL; 962 break; 963 #ifdef CONFIG_SOCK_CGROUP_DATA 964 case INET_DIAG_BC_CGROUP_COND: 965 if (!valid_cgroupcond(bc, len, &min_len)) 966 return -EINVAL; 967 break; 968 #endif 969 case INET_DIAG_BC_AUTO: 970 case INET_DIAG_BC_JMP: 971 case INET_DIAG_BC_NOP: 972 break; 973 default: 974 return -EINVAL; 975 } 976 977 if (op->code != INET_DIAG_BC_NOP) { 978 if (op->no < min_len || op->no > len + 4 || op->no & 3) 979 return -EINVAL; 980 if (op->no < len && 981 !valid_cc(bytecode, bytecode_len, len - op->no)) 982 return -EINVAL; 983 } 984 985 if (op->yes < min_len || op->yes > len + 4 || op->yes & 3) 986 return -EINVAL; 987 bc += op->yes; 988 len -= op->yes; 989 } 990 return len == 0 ? 0 : -EINVAL; 991 } 992 993 static void twsk_build_assert(void) 994 { 995 BUILD_BUG_ON(offsetof(struct inet_timewait_sock, tw_family) != 996 offsetof(struct sock, sk_family)); 997 998 BUILD_BUG_ON(offsetof(struct inet_timewait_sock, tw_num) != 999 offsetof(struct inet_sock, inet_num)); 1000 1001 BUILD_BUG_ON(offsetof(struct inet_timewait_sock, tw_dport) != 1002 offsetof(struct inet_sock, inet_dport)); 1003 1004 BUILD_BUG_ON(offsetof(struct inet_timewait_sock, tw_rcv_saddr) != 1005 offsetof(struct inet_sock, inet_rcv_saddr)); 1006 1007 BUILD_BUG_ON(offsetof(struct inet_timewait_sock, tw_daddr) != 1008 offsetof(struct inet_sock, inet_daddr)); 1009 1010 #if IS_ENABLED(CONFIG_IPV6) 1011 BUILD_BUG_ON(offsetof(struct inet_timewait_sock, tw_v6_rcv_saddr) != 1012 offsetof(struct sock, sk_v6_rcv_saddr)); 1013 1014 BUILD_BUG_ON(offsetof(struct inet_timewait_sock, tw_v6_daddr) != 1015 offsetof(struct sock, sk_v6_daddr)); 1016 #endif 1017 } 1018 1019 void inet_diag_dump_icsk(struct inet_hashinfo *hashinfo, struct sk_buff *skb, 1020 struct netlink_callback *cb, 1021 const struct inet_diag_req_v2 *r) 1022 { 1023 bool net_admin = netlink_net_capable(cb->skb, CAP_NET_ADMIN); 1024 struct inet_diag_dump_data *cb_data = cb->data; 1025 struct net *net = sock_net(skb->sk); 1026 u32 idiag_states = r->idiag_states; 1027 int i, num, s_i, s_num; 1028 struct nlattr *bc; 1029 struct sock *sk; 1030 1031 bc = cb_data->inet_diag_nla_bc; 1032 if (idiag_states & TCPF_SYN_RECV) 1033 idiag_states |= TCPF_NEW_SYN_RECV; 1034 s_i = cb->args[1]; 1035 s_num = num = cb->args[2]; 1036 1037 if (cb->args[0] == 0) { 1038 if (!(idiag_states & TCPF_LISTEN) || r->id.idiag_dport) 1039 goto skip_listen_ht; 1040 1041 for (i = s_i; i <= hashinfo->lhash2_mask; i++) { 1042 struct inet_listen_hashbucket *ilb; 1043 struct hlist_nulls_node *node; 1044 1045 num = 0; 1046 ilb = &hashinfo->lhash2[i]; 1047 1048 if (hlist_nulls_empty(&ilb->nulls_head)) { 1049 s_num = 0; 1050 continue; 1051 } 1052 spin_lock(&ilb->lock); 1053 sk_nulls_for_each(sk, node, &ilb->nulls_head) { 1054 struct inet_sock *inet = inet_sk(sk); 1055 1056 if (!net_eq(sock_net(sk), net)) 1057 continue; 1058 1059 if (num < s_num) { 1060 num++; 1061 continue; 1062 } 1063 1064 if (r->sdiag_family != AF_UNSPEC && 1065 sk->sk_family != r->sdiag_family) 1066 goto next_listen; 1067 1068 if (r->id.idiag_sport != inet->inet_sport && 1069 r->id.idiag_sport) 1070 goto next_listen; 1071 1072 if (!inet_diag_bc_sk(bc, sk)) 1073 goto next_listen; 1074 1075 if (inet_sk_diag_fill(sk, inet_csk(sk), skb, 1076 cb, r, NLM_F_MULTI, 1077 net_admin) < 0) { 1078 spin_unlock(&ilb->lock); 1079 goto done; 1080 } 1081 1082 next_listen: 1083 ++num; 1084 } 1085 spin_unlock(&ilb->lock); 1086 1087 s_num = 0; 1088 } 1089 skip_listen_ht: 1090 cb->args[0] = 1; 1091 s_i = num = s_num = 0; 1092 } 1093 1094 /* Process a maximum of SKARR_SZ sockets at a time when walking hash buckets 1095 * with bh disabled. 1096 */ 1097 #define SKARR_SZ 16 1098 1099 /* Dump bound but inactive (not listening, connecting, etc.) sockets */ 1100 if (cb->args[0] == 1) { 1101 if (!(idiag_states & TCPF_BOUND_INACTIVE)) 1102 goto skip_bind_ht; 1103 1104 for (i = s_i; i < hashinfo->bhash_size; i++) { 1105 struct inet_bind_hashbucket *ibb; 1106 struct inet_bind2_bucket *tb2; 1107 struct sock *sk_arr[SKARR_SZ]; 1108 int num_arr[SKARR_SZ]; 1109 int idx, accum, res; 1110 1111 resume_bind_walk: 1112 num = 0; 1113 accum = 0; 1114 ibb = &hashinfo->bhash2[i]; 1115 1116 if (hlist_empty(&ibb->chain)) { 1117 s_num = 0; 1118 continue; 1119 } 1120 spin_lock_bh(&ibb->lock); 1121 inet_bind_bucket_for_each(tb2, &ibb->chain) { 1122 if (!net_eq(ib2_net(tb2), net)) 1123 continue; 1124 1125 sk_for_each_bound(sk, &tb2->owners) { 1126 struct inet_sock *inet = inet_sk(sk); 1127 1128 if (num < s_num) 1129 goto next_bind; 1130 1131 if (sk->sk_state != TCP_CLOSE || 1132 !inet->inet_num) 1133 goto next_bind; 1134 1135 if (r->sdiag_family != AF_UNSPEC && 1136 r->sdiag_family != sk->sk_family) 1137 goto next_bind; 1138 1139 if (!inet_diag_bc_sk(bc, sk)) 1140 goto next_bind; 1141 1142 sock_hold(sk); 1143 num_arr[accum] = num; 1144 sk_arr[accum] = sk; 1145 if (++accum == SKARR_SZ) 1146 goto pause_bind_walk; 1147 next_bind: 1148 num++; 1149 } 1150 } 1151 pause_bind_walk: 1152 spin_unlock_bh(&ibb->lock); 1153 1154 res = 0; 1155 for (idx = 0; idx < accum; idx++) { 1156 if (res >= 0) { 1157 res = inet_sk_diag_fill(sk_arr[idx], 1158 NULL, skb, cb, 1159 r, NLM_F_MULTI, 1160 net_admin); 1161 if (res < 0) 1162 num = num_arr[idx]; 1163 } 1164 sock_put(sk_arr[idx]); 1165 } 1166 if (res < 0) 1167 goto done; 1168 1169 cond_resched(); 1170 1171 if (accum == SKARR_SZ) { 1172 s_num = num + 1; 1173 goto resume_bind_walk; 1174 } 1175 1176 s_num = 0; 1177 } 1178 skip_bind_ht: 1179 cb->args[0] = 2; 1180 s_i = num = s_num = 0; 1181 } 1182 1183 if (!(idiag_states & ~TCPF_LISTEN)) 1184 goto out; 1185 1186 for (i = s_i; i <= hashinfo->ehash_mask; i++) { 1187 struct inet_ehash_bucket *head = &hashinfo->ehash[i]; 1188 spinlock_t *lock = inet_ehash_lockp(hashinfo, i); 1189 struct hlist_nulls_node *node; 1190 struct sock *sk_arr[SKARR_SZ]; 1191 int num_arr[SKARR_SZ]; 1192 int idx, accum, res; 1193 1194 if (hlist_nulls_empty(&head->chain)) 1195 continue; 1196 1197 if (i > s_i) 1198 s_num = 0; 1199 1200 next_chunk: 1201 num = 0; 1202 accum = 0; 1203 spin_lock_bh(lock); 1204 sk_nulls_for_each(sk, node, &head->chain) { 1205 int state; 1206 1207 if (!net_eq(sock_net(sk), net)) 1208 continue; 1209 if (num < s_num) 1210 goto next_normal; 1211 state = (sk->sk_state == TCP_TIME_WAIT) ? 1212 inet_twsk(sk)->tw_substate : sk->sk_state; 1213 if (!(idiag_states & (1 << state))) 1214 goto next_normal; 1215 if (r->sdiag_family != AF_UNSPEC && 1216 sk->sk_family != r->sdiag_family) 1217 goto next_normal; 1218 if (r->id.idiag_sport != htons(sk->sk_num) && 1219 r->id.idiag_sport) 1220 goto next_normal; 1221 if (r->id.idiag_dport != sk->sk_dport && 1222 r->id.idiag_dport) 1223 goto next_normal; 1224 twsk_build_assert(); 1225 1226 if (!inet_diag_bc_sk(bc, sk)) 1227 goto next_normal; 1228 1229 if (!refcount_inc_not_zero(&sk->sk_refcnt)) 1230 goto next_normal; 1231 1232 num_arr[accum] = num; 1233 sk_arr[accum] = sk; 1234 if (++accum == SKARR_SZ) 1235 break; 1236 next_normal: 1237 ++num; 1238 } 1239 spin_unlock_bh(lock); 1240 res = 0; 1241 for (idx = 0; idx < accum; idx++) { 1242 if (res >= 0) { 1243 res = sk_diag_fill(sk_arr[idx], skb, cb, r, 1244 NLM_F_MULTI, net_admin); 1245 if (res < 0) 1246 num = num_arr[idx]; 1247 } 1248 sock_gen_put(sk_arr[idx]); 1249 } 1250 if (res < 0) 1251 break; 1252 cond_resched(); 1253 if (accum == SKARR_SZ) { 1254 s_num = num + 1; 1255 goto next_chunk; 1256 } 1257 } 1258 1259 done: 1260 cb->args[1] = i; 1261 cb->args[2] = num; 1262 out: 1263 ; 1264 } 1265 EXPORT_SYMBOL_GPL(inet_diag_dump_icsk); 1266 1267 static int __inet_diag_dump(struct sk_buff *skb, struct netlink_callback *cb, 1268 const struct inet_diag_req_v2 *r) 1269 { 1270 struct inet_diag_dump_data *cb_data = cb->data; 1271 const struct inet_diag_handler *handler; 1272 u32 prev_min_dump_alloc; 1273 int protocol, err = 0; 1274 1275 protocol = inet_diag_get_protocol(r, cb_data); 1276 1277 again: 1278 prev_min_dump_alloc = cb->min_dump_alloc; 1279 handler = inet_diag_lock_handler(protocol); 1280 if (handler) { 1281 handler->dump(skb, cb, r); 1282 inet_diag_unlock_handler(handler); 1283 } else { 1284 err = -ENOENT; 1285 } 1286 /* The skb is not large enough to fit one sk info and 1287 * inet_sk_diag_fill() has requested for a larger skb. 1288 */ 1289 if (!skb->len && cb->min_dump_alloc > prev_min_dump_alloc) { 1290 err = pskb_expand_head(skb, 0, cb->min_dump_alloc, GFP_KERNEL); 1291 if (!err) 1292 goto again; 1293 } 1294 1295 return err ? : skb->len; 1296 } 1297 1298 static int inet_diag_dump(struct sk_buff *skb, struct netlink_callback *cb) 1299 { 1300 return __inet_diag_dump(skb, cb, nlmsg_data(cb->nlh)); 1301 } 1302 1303 static int __inet_diag_dump_start(struct netlink_callback *cb, int hdrlen) 1304 { 1305 const struct nlmsghdr *nlh = cb->nlh; 1306 struct inet_diag_dump_data *cb_data; 1307 struct sk_buff *skb = cb->skb; 1308 struct nlattr *nla; 1309 int err; 1310 1311 cb_data = kzalloc(sizeof(*cb_data), GFP_KERNEL); 1312 if (!cb_data) 1313 return -ENOMEM; 1314 1315 err = inet_diag_parse_attrs(nlh, hdrlen, cb_data->req_nlas); 1316 if (err) { 1317 kfree(cb_data); 1318 return err; 1319 } 1320 nla = cb_data->inet_diag_nla_bc; 1321 if (nla) { 1322 err = inet_diag_bc_audit(nla, skb); 1323 if (err) { 1324 kfree(cb_data); 1325 return err; 1326 } 1327 } 1328 1329 nla = cb_data->inet_diag_nla_bpf_stgs; 1330 if (nla) { 1331 struct bpf_sk_storage_diag *bpf_stg_diag; 1332 1333 bpf_stg_diag = bpf_sk_storage_diag_alloc(nla); 1334 if (IS_ERR(bpf_stg_diag)) { 1335 kfree(cb_data); 1336 return PTR_ERR(bpf_stg_diag); 1337 } 1338 cb_data->bpf_stg_diag = bpf_stg_diag; 1339 } 1340 1341 cb->data = cb_data; 1342 return 0; 1343 } 1344 1345 static int inet_diag_dump_start(struct netlink_callback *cb) 1346 { 1347 return __inet_diag_dump_start(cb, sizeof(struct inet_diag_req_v2)); 1348 } 1349 1350 static int inet_diag_dump_start_compat(struct netlink_callback *cb) 1351 { 1352 return __inet_diag_dump_start(cb, sizeof(struct inet_diag_req)); 1353 } 1354 1355 static int inet_diag_dump_done(struct netlink_callback *cb) 1356 { 1357 struct inet_diag_dump_data *cb_data = cb->data; 1358 1359 bpf_sk_storage_diag_free(cb_data->bpf_stg_diag); 1360 kfree(cb->data); 1361 1362 return 0; 1363 } 1364 1365 static int inet_diag_type2proto(int type) 1366 { 1367 switch (type) { 1368 case TCPDIAG_GETSOCK: 1369 return IPPROTO_TCP; 1370 case DCCPDIAG_GETSOCK: 1371 return IPPROTO_DCCP; 1372 default: 1373 return 0; 1374 } 1375 } 1376 1377 static int inet_diag_dump_compat(struct sk_buff *skb, 1378 struct netlink_callback *cb) 1379 { 1380 struct inet_diag_req *rc = nlmsg_data(cb->nlh); 1381 struct inet_diag_req_v2 req; 1382 1383 req.sdiag_family = AF_UNSPEC; /* compatibility */ 1384 req.sdiag_protocol = inet_diag_type2proto(cb->nlh->nlmsg_type); 1385 req.idiag_ext = rc->idiag_ext; 1386 req.pad = 0; 1387 req.idiag_states = rc->idiag_states; 1388 req.id = rc->id; 1389 1390 return __inet_diag_dump(skb, cb, &req); 1391 } 1392 1393 static int inet_diag_get_exact_compat(struct sk_buff *in_skb, 1394 const struct nlmsghdr *nlh) 1395 { 1396 struct inet_diag_req *rc = nlmsg_data(nlh); 1397 struct inet_diag_req_v2 req; 1398 1399 req.sdiag_family = rc->idiag_family; 1400 req.sdiag_protocol = inet_diag_type2proto(nlh->nlmsg_type); 1401 req.idiag_ext = rc->idiag_ext; 1402 req.pad = 0; 1403 req.idiag_states = rc->idiag_states; 1404 req.id = rc->id; 1405 1406 return inet_diag_cmd_exact(SOCK_DIAG_BY_FAMILY, in_skb, nlh, 1407 sizeof(struct inet_diag_req), &req); 1408 } 1409 1410 static int inet_diag_rcv_msg_compat(struct sk_buff *skb, struct nlmsghdr *nlh) 1411 { 1412 int hdrlen = sizeof(struct inet_diag_req); 1413 struct net *net = sock_net(skb->sk); 1414 1415 if (nlh->nlmsg_type >= INET_DIAG_GETSOCK_MAX || 1416 nlmsg_len(nlh) < hdrlen) 1417 return -EINVAL; 1418 1419 if (nlh->nlmsg_flags & NLM_F_DUMP) { 1420 struct netlink_dump_control c = { 1421 .start = inet_diag_dump_start_compat, 1422 .done = inet_diag_dump_done, 1423 .dump = inet_diag_dump_compat, 1424 }; 1425 return netlink_dump_start(net->diag_nlsk, skb, nlh, &c); 1426 } 1427 1428 return inet_diag_get_exact_compat(skb, nlh); 1429 } 1430 1431 static int inet_diag_handler_cmd(struct sk_buff *skb, struct nlmsghdr *h) 1432 { 1433 int hdrlen = sizeof(struct inet_diag_req_v2); 1434 struct net *net = sock_net(skb->sk); 1435 1436 if (nlmsg_len(h) < hdrlen) 1437 return -EINVAL; 1438 1439 if (h->nlmsg_type == SOCK_DIAG_BY_FAMILY && 1440 h->nlmsg_flags & NLM_F_DUMP) { 1441 struct netlink_dump_control c = { 1442 .start = inet_diag_dump_start, 1443 .done = inet_diag_dump_done, 1444 .dump = inet_diag_dump, 1445 }; 1446 return netlink_dump_start(net->diag_nlsk, skb, h, &c); 1447 } 1448 1449 return inet_diag_cmd_exact(h->nlmsg_type, skb, h, hdrlen, 1450 nlmsg_data(h)); 1451 } 1452 1453 static 1454 int inet_diag_handler_get_info(struct sk_buff *skb, struct sock *sk) 1455 { 1456 const struct inet_diag_handler *handler; 1457 struct nlmsghdr *nlh; 1458 struct nlattr *attr; 1459 struct inet_diag_msg *r; 1460 void *info = NULL; 1461 int err = 0; 1462 1463 nlh = nlmsg_put(skb, 0, 0, SOCK_DIAG_BY_FAMILY, sizeof(*r), 0); 1464 if (!nlh) 1465 return -ENOMEM; 1466 1467 r = nlmsg_data(nlh); 1468 memset(r, 0, sizeof(*r)); 1469 inet_diag_msg_common_fill(r, sk); 1470 if (sk->sk_type == SOCK_DGRAM || sk->sk_type == SOCK_STREAM) 1471 r->id.idiag_sport = inet_sk(sk)->inet_sport; 1472 r->idiag_state = sk->sk_state; 1473 1474 if ((err = nla_put_u8(skb, INET_DIAG_PROTOCOL, sk->sk_protocol))) { 1475 nlmsg_cancel(skb, nlh); 1476 return err; 1477 } 1478 1479 handler = inet_diag_lock_handler(sk->sk_protocol); 1480 if (!handler) { 1481 nlmsg_cancel(skb, nlh); 1482 return -ENOENT; 1483 } 1484 1485 attr = handler->idiag_info_size 1486 ? nla_reserve_64bit(skb, INET_DIAG_INFO, 1487 handler->idiag_info_size, 1488 INET_DIAG_PAD) 1489 : NULL; 1490 if (attr) 1491 info = nla_data(attr); 1492 1493 handler->idiag_get_info(sk, r, info); 1494 inet_diag_unlock_handler(handler); 1495 1496 nlmsg_end(skb, nlh); 1497 return 0; 1498 } 1499 1500 static const struct sock_diag_handler inet_diag_handler = { 1501 .owner = THIS_MODULE, 1502 .family = AF_INET, 1503 .dump = inet_diag_handler_cmd, 1504 .get_info = inet_diag_handler_get_info, 1505 .destroy = inet_diag_handler_cmd, 1506 }; 1507 1508 static const struct sock_diag_handler inet6_diag_handler = { 1509 .owner = THIS_MODULE, 1510 .family = AF_INET6, 1511 .dump = inet_diag_handler_cmd, 1512 .get_info = inet_diag_handler_get_info, 1513 .destroy = inet_diag_handler_cmd, 1514 }; 1515 1516 int inet_diag_register(const struct inet_diag_handler *h) 1517 { 1518 const __u16 type = h->idiag_type; 1519 1520 if (type >= IPPROTO_MAX) 1521 return -EINVAL; 1522 1523 return !cmpxchg((const struct inet_diag_handler **)&inet_diag_table[type], 1524 NULL, h) ? 0 : -EEXIST; 1525 } 1526 EXPORT_SYMBOL_GPL(inet_diag_register); 1527 1528 void inet_diag_unregister(const struct inet_diag_handler *h) 1529 { 1530 const __u16 type = h->idiag_type; 1531 1532 if (type >= IPPROTO_MAX) 1533 return; 1534 1535 xchg((const struct inet_diag_handler **)&inet_diag_table[type], 1536 NULL); 1537 } 1538 EXPORT_SYMBOL_GPL(inet_diag_unregister); 1539 1540 static const struct sock_diag_inet_compat inet_diag_compat = { 1541 .owner = THIS_MODULE, 1542 .fn = inet_diag_rcv_msg_compat, 1543 }; 1544 1545 static int __init inet_diag_init(void) 1546 { 1547 const int inet_diag_table_size = (IPPROTO_MAX * 1548 sizeof(struct inet_diag_handler *)); 1549 int err = -ENOMEM; 1550 1551 inet_diag_table = kzalloc(inet_diag_table_size, GFP_KERNEL); 1552 if (!inet_diag_table) 1553 goto out; 1554 1555 err = sock_diag_register(&inet_diag_handler); 1556 if (err) 1557 goto out_free_nl; 1558 1559 err = sock_diag_register(&inet6_diag_handler); 1560 if (err) 1561 goto out_free_inet; 1562 1563 sock_diag_register_inet_compat(&inet_diag_compat); 1564 out: 1565 return err; 1566 1567 out_free_inet: 1568 sock_diag_unregister(&inet_diag_handler); 1569 out_free_nl: 1570 kfree(inet_diag_table); 1571 goto out; 1572 } 1573 1574 static void __exit inet_diag_exit(void) 1575 { 1576 sock_diag_unregister(&inet6_diag_handler); 1577 sock_diag_unregister(&inet_diag_handler); 1578 sock_diag_unregister_inet_compat(&inet_diag_compat); 1579 kfree(inet_diag_table); 1580 } 1581 1582 module_init(inet_diag_init); 1583 module_exit(inet_diag_exit); 1584 MODULE_LICENSE("GPL"); 1585 MODULE_DESCRIPTION("INET/INET6: socket monitoring via SOCK_DIAG"); 1586 MODULE_ALIAS_NET_PF_PROTO_TYPE(PF_NETLINK, NETLINK_SOCK_DIAG, 2 /* AF_INET */); 1587 MODULE_ALIAS_NET_PF_PROTO_TYPE(PF_NETLINK, NETLINK_SOCK_DIAG, 10 /* AF_INET6 */); 1588