1 // SPDX-License-Identifier: GPL-2.0-or-later 2 /* 3 * inet_diag.c Module for monitoring INET transport protocols sockets. 4 * 5 * Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru> 6 */ 7 8 #include <linux/kernel.h> 9 #include <linux/module.h> 10 #include <linux/types.h> 11 #include <linux/fcntl.h> 12 #include <linux/random.h> 13 #include <linux/slab.h> 14 #include <linux/cache.h> 15 #include <linux/init.h> 16 #include <linux/time.h> 17 18 #include <net/icmp.h> 19 #include <net/tcp.h> 20 #include <net/ipv6.h> 21 #include <net/inet_common.h> 22 #include <net/inet_connection_sock.h> 23 #include <net/inet_hashtables.h> 24 #include <net/inet_timewait_sock.h> 25 #include <net/inet6_hashtables.h> 26 #include <net/bpf_sk_storage.h> 27 #include <net/netlink.h> 28 29 #include <linux/inet.h> 30 #include <linux/stddef.h> 31 32 #include <linux/inet_diag.h> 33 #include <linux/sock_diag.h> 34 35 static const struct inet_diag_handler **inet_diag_table; 36 37 struct inet_diag_entry { 38 const __be32 *saddr; 39 const __be32 *daddr; 40 u16 sport; 41 u16 dport; 42 u16 family; 43 u16 userlocks; 44 u32 ifindex; 45 u32 mark; 46 #ifdef CONFIG_SOCK_CGROUP_DATA 47 u64 cgroup_id; 48 #endif 49 }; 50 51 static DEFINE_MUTEX(inet_diag_table_mutex); 52 53 static const struct inet_diag_handler *inet_diag_lock_handler(int proto) 54 { 55 if (proto < 0 || proto >= IPPROTO_MAX) { 56 mutex_lock(&inet_diag_table_mutex); 57 return ERR_PTR(-ENOENT); 58 } 59 60 if (!inet_diag_table[proto]) 61 sock_load_diag_module(AF_INET, proto); 62 63 mutex_lock(&inet_diag_table_mutex); 64 if (!inet_diag_table[proto]) 65 return ERR_PTR(-ENOENT); 66 67 return inet_diag_table[proto]; 68 } 69 70 static void inet_diag_unlock_handler(const struct inet_diag_handler *handler) 71 { 72 mutex_unlock(&inet_diag_table_mutex); 73 } 74 75 void inet_diag_msg_common_fill(struct inet_diag_msg *r, struct sock *sk) 76 { 77 r->idiag_family = sk->sk_family; 78 79 r->id.idiag_sport = htons(sk->sk_num); 80 r->id.idiag_dport = sk->sk_dport; 81 r->id.idiag_if = sk->sk_bound_dev_if; 82 sock_diag_save_cookie(sk, r->id.idiag_cookie); 83 84 #if IS_ENABLED(CONFIG_IPV6) 85 if (sk->sk_family == AF_INET6) { 86 *(struct in6_addr *)r->id.idiag_src = sk->sk_v6_rcv_saddr; 87 *(struct in6_addr *)r->id.idiag_dst = sk->sk_v6_daddr; 88 } else 89 #endif 90 { 91 memset(&r->id.idiag_src, 0, sizeof(r->id.idiag_src)); 92 memset(&r->id.idiag_dst, 0, sizeof(r->id.idiag_dst)); 93 94 r->id.idiag_src[0] = sk->sk_rcv_saddr; 95 r->id.idiag_dst[0] = sk->sk_daddr; 96 } 97 } 98 EXPORT_SYMBOL_GPL(inet_diag_msg_common_fill); 99 100 static size_t inet_sk_attr_size(struct sock *sk, 101 const struct inet_diag_req_v2 *req, 102 bool net_admin) 103 { 104 const struct inet_diag_handler *handler; 105 size_t aux = 0; 106 107 handler = inet_diag_table[req->sdiag_protocol]; 108 if (handler && handler->idiag_get_aux_size) 109 aux = handler->idiag_get_aux_size(sk, net_admin); 110 111 return nla_total_size(sizeof(struct tcp_info)) 112 + nla_total_size(sizeof(struct inet_diag_msg)) 113 + inet_diag_msg_attrs_size() 114 + nla_total_size(sizeof(struct inet_diag_meminfo)) 115 + nla_total_size(SK_MEMINFO_VARS * sizeof(u32)) 116 + nla_total_size(TCP_CA_NAME_MAX) 117 + nla_total_size(sizeof(struct tcpvegas_info)) 118 + aux 119 + 64; 120 } 121 122 int inet_diag_msg_attrs_fill(struct sock *sk, struct sk_buff *skb, 123 struct inet_diag_msg *r, int ext, 124 struct user_namespace *user_ns, 125 bool net_admin) 126 { 127 const struct inet_sock *inet = inet_sk(sk); 128 struct inet_diag_sockopt inet_sockopt; 129 130 if (nla_put_u8(skb, INET_DIAG_SHUTDOWN, sk->sk_shutdown)) 131 goto errout; 132 133 /* IPv6 dual-stack sockets use inet->tos for IPv4 connections, 134 * hence this needs to be included regardless of socket family. 135 */ 136 if (ext & (1 << (INET_DIAG_TOS - 1))) 137 if (nla_put_u8(skb, INET_DIAG_TOS, READ_ONCE(inet->tos)) < 0) 138 goto errout; 139 140 #if IS_ENABLED(CONFIG_IPV6) 141 if (r->idiag_family == AF_INET6) { 142 if (ext & (1 << (INET_DIAG_TCLASS - 1))) 143 if (nla_put_u8(skb, INET_DIAG_TCLASS, 144 inet6_sk(sk)->tclass) < 0) 145 goto errout; 146 147 if (((1 << sk->sk_state) & (TCPF_LISTEN | TCPF_CLOSE)) && 148 nla_put_u8(skb, INET_DIAG_SKV6ONLY, ipv6_only_sock(sk))) 149 goto errout; 150 } 151 #endif 152 153 if (net_admin && nla_put_u32(skb, INET_DIAG_MARK, READ_ONCE(sk->sk_mark))) 154 goto errout; 155 156 if (ext & (1 << (INET_DIAG_CLASS_ID - 1)) || 157 ext & (1 << (INET_DIAG_TCLASS - 1))) { 158 u32 classid = 0; 159 160 #ifdef CONFIG_SOCK_CGROUP_DATA 161 classid = sock_cgroup_classid(&sk->sk_cgrp_data); 162 #endif 163 /* Fallback to socket priority if class id isn't set. 164 * Classful qdiscs use it as direct reference to class. 165 * For cgroup2 classid is always zero. 166 */ 167 if (!classid) 168 classid = READ_ONCE(sk->sk_priority); 169 170 if (nla_put_u32(skb, INET_DIAG_CLASS_ID, classid)) 171 goto errout; 172 } 173 174 #ifdef CONFIG_SOCK_CGROUP_DATA 175 if (nla_put_u64_64bit(skb, INET_DIAG_CGROUP_ID, 176 cgroup_id(sock_cgroup_ptr(&sk->sk_cgrp_data)), 177 INET_DIAG_PAD)) 178 goto errout; 179 #endif 180 181 r->idiag_uid = from_kuid_munged(user_ns, sock_i_uid(sk)); 182 r->idiag_inode = sock_i_ino(sk); 183 184 memset(&inet_sockopt, 0, sizeof(inet_sockopt)); 185 inet_sockopt.recverr = inet_test_bit(RECVERR, sk); 186 inet_sockopt.is_icsk = inet_test_bit(IS_ICSK, sk); 187 inet_sockopt.freebind = inet_test_bit(FREEBIND, sk); 188 inet_sockopt.hdrincl = inet_test_bit(HDRINCL, sk); 189 inet_sockopt.mc_loop = inet_test_bit(MC_LOOP, sk); 190 inet_sockopt.transparent = inet_test_bit(TRANSPARENT, sk); 191 inet_sockopt.mc_all = inet_test_bit(MC_ALL, sk); 192 inet_sockopt.nodefrag = inet_test_bit(NODEFRAG, sk); 193 inet_sockopt.bind_address_no_port = inet_test_bit(BIND_ADDRESS_NO_PORT, sk); 194 inet_sockopt.recverr_rfc4884 = inet_test_bit(RECVERR_RFC4884, sk); 195 inet_sockopt.defer_connect = inet_test_bit(DEFER_CONNECT, sk); 196 if (nla_put(skb, INET_DIAG_SOCKOPT, sizeof(inet_sockopt), 197 &inet_sockopt)) 198 goto errout; 199 200 return 0; 201 errout: 202 return 1; 203 } 204 EXPORT_SYMBOL_GPL(inet_diag_msg_attrs_fill); 205 206 static int inet_diag_parse_attrs(const struct nlmsghdr *nlh, int hdrlen, 207 struct nlattr **req_nlas) 208 { 209 struct nlattr *nla; 210 int remaining; 211 212 nlmsg_for_each_attr(nla, nlh, hdrlen, remaining) { 213 int type = nla_type(nla); 214 215 if (type == INET_DIAG_REQ_PROTOCOL && nla_len(nla) != sizeof(u32)) 216 return -EINVAL; 217 218 if (type < __INET_DIAG_REQ_MAX) 219 req_nlas[type] = nla; 220 } 221 return 0; 222 } 223 224 static int inet_diag_get_protocol(const struct inet_diag_req_v2 *req, 225 const struct inet_diag_dump_data *data) 226 { 227 if (data->req_nlas[INET_DIAG_REQ_PROTOCOL]) 228 return nla_get_u32(data->req_nlas[INET_DIAG_REQ_PROTOCOL]); 229 return req->sdiag_protocol; 230 } 231 232 #define MAX_DUMP_ALLOC_SIZE (KMALLOC_MAX_SIZE - SKB_DATA_ALIGN(sizeof(struct skb_shared_info))) 233 234 int inet_sk_diag_fill(struct sock *sk, struct inet_connection_sock *icsk, 235 struct sk_buff *skb, struct netlink_callback *cb, 236 const struct inet_diag_req_v2 *req, 237 u16 nlmsg_flags, bool net_admin) 238 { 239 const struct tcp_congestion_ops *ca_ops; 240 const struct inet_diag_handler *handler; 241 struct inet_diag_dump_data *cb_data; 242 int ext = req->idiag_ext; 243 struct inet_diag_msg *r; 244 struct nlmsghdr *nlh; 245 struct nlattr *attr; 246 void *info = NULL; 247 248 cb_data = cb->data; 249 handler = inet_diag_table[inet_diag_get_protocol(req, cb_data)]; 250 BUG_ON(!handler); 251 252 nlh = nlmsg_put(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, 253 cb->nlh->nlmsg_type, sizeof(*r), nlmsg_flags); 254 if (!nlh) 255 return -EMSGSIZE; 256 257 r = nlmsg_data(nlh); 258 BUG_ON(!sk_fullsock(sk)); 259 260 inet_diag_msg_common_fill(r, sk); 261 r->idiag_state = sk->sk_state; 262 r->idiag_timer = 0; 263 r->idiag_retrans = 0; 264 r->idiag_expires = 0; 265 266 if (inet_diag_msg_attrs_fill(sk, skb, r, ext, 267 sk_user_ns(NETLINK_CB(cb->skb).sk), 268 net_admin)) 269 goto errout; 270 271 if (ext & (1 << (INET_DIAG_MEMINFO - 1))) { 272 struct inet_diag_meminfo minfo = { 273 .idiag_rmem = sk_rmem_alloc_get(sk), 274 .idiag_wmem = READ_ONCE(sk->sk_wmem_queued), 275 .idiag_fmem = sk_forward_alloc_get(sk), 276 .idiag_tmem = sk_wmem_alloc_get(sk), 277 }; 278 279 if (nla_put(skb, INET_DIAG_MEMINFO, sizeof(minfo), &minfo) < 0) 280 goto errout; 281 } 282 283 if (ext & (1 << (INET_DIAG_SKMEMINFO - 1))) 284 if (sock_diag_put_meminfo(sk, skb, INET_DIAG_SKMEMINFO)) 285 goto errout; 286 287 /* 288 * RAW sockets might have user-defined protocols assigned, 289 * so report the one supplied on socket creation. 290 */ 291 if (sk->sk_type == SOCK_RAW) { 292 if (nla_put_u8(skb, INET_DIAG_PROTOCOL, sk->sk_protocol)) 293 goto errout; 294 } 295 296 if (!icsk) { 297 handler->idiag_get_info(sk, r, NULL); 298 goto out; 299 } 300 301 if (icsk->icsk_pending == ICSK_TIME_RETRANS || 302 icsk->icsk_pending == ICSK_TIME_REO_TIMEOUT || 303 icsk->icsk_pending == ICSK_TIME_LOSS_PROBE) { 304 r->idiag_timer = 1; 305 r->idiag_retrans = icsk->icsk_retransmits; 306 r->idiag_expires = 307 jiffies_delta_to_msecs(icsk->icsk_timeout - jiffies); 308 } else if (icsk->icsk_pending == ICSK_TIME_PROBE0) { 309 r->idiag_timer = 4; 310 r->idiag_retrans = icsk->icsk_probes_out; 311 r->idiag_expires = 312 jiffies_delta_to_msecs(icsk->icsk_timeout - jiffies); 313 } else if (timer_pending(&sk->sk_timer)) { 314 r->idiag_timer = 2; 315 r->idiag_retrans = icsk->icsk_probes_out; 316 r->idiag_expires = 317 jiffies_delta_to_msecs(sk->sk_timer.expires - jiffies); 318 } 319 320 if ((ext & (1 << (INET_DIAG_INFO - 1))) && handler->idiag_info_size) { 321 attr = nla_reserve_64bit(skb, INET_DIAG_INFO, 322 handler->idiag_info_size, 323 INET_DIAG_PAD); 324 if (!attr) 325 goto errout; 326 327 info = nla_data(attr); 328 } 329 330 if (ext & (1 << (INET_DIAG_CONG - 1))) { 331 int err = 0; 332 333 rcu_read_lock(); 334 ca_ops = READ_ONCE(icsk->icsk_ca_ops); 335 if (ca_ops) 336 err = nla_put_string(skb, INET_DIAG_CONG, ca_ops->name); 337 rcu_read_unlock(); 338 if (err < 0) 339 goto errout; 340 } 341 342 handler->idiag_get_info(sk, r, info); 343 344 if (ext & (1 << (INET_DIAG_INFO - 1)) && handler->idiag_get_aux) 345 if (handler->idiag_get_aux(sk, net_admin, skb) < 0) 346 goto errout; 347 348 if (sk->sk_state < TCP_TIME_WAIT) { 349 union tcp_cc_info info; 350 size_t sz = 0; 351 int attr; 352 353 rcu_read_lock(); 354 ca_ops = READ_ONCE(icsk->icsk_ca_ops); 355 if (ca_ops && ca_ops->get_info) 356 sz = ca_ops->get_info(sk, ext, &attr, &info); 357 rcu_read_unlock(); 358 if (sz && nla_put(skb, attr, sz, &info) < 0) 359 goto errout; 360 } 361 362 /* Keep it at the end for potential retry with a larger skb, 363 * or else do best-effort fitting, which is only done for the 364 * first_nlmsg. 365 */ 366 if (cb_data->bpf_stg_diag) { 367 bool first_nlmsg = ((unsigned char *)nlh == skb->data); 368 unsigned int prev_min_dump_alloc; 369 unsigned int total_nla_size = 0; 370 unsigned int msg_len; 371 int err; 372 373 msg_len = skb_tail_pointer(skb) - (unsigned char *)nlh; 374 err = bpf_sk_storage_diag_put(cb_data->bpf_stg_diag, sk, skb, 375 INET_DIAG_SK_BPF_STORAGES, 376 &total_nla_size); 377 378 if (!err) 379 goto out; 380 381 total_nla_size += msg_len; 382 prev_min_dump_alloc = cb->min_dump_alloc; 383 if (total_nla_size > prev_min_dump_alloc) 384 cb->min_dump_alloc = min_t(u32, total_nla_size, 385 MAX_DUMP_ALLOC_SIZE); 386 387 if (!first_nlmsg) 388 goto errout; 389 390 if (cb->min_dump_alloc > prev_min_dump_alloc) 391 /* Retry with pskb_expand_head() with 392 * __GFP_DIRECT_RECLAIM 393 */ 394 goto errout; 395 396 WARN_ON_ONCE(total_nla_size <= prev_min_dump_alloc); 397 398 /* Send what we have for this sk 399 * and move on to the next sk in the following 400 * dump() 401 */ 402 } 403 404 out: 405 nlmsg_end(skb, nlh); 406 return 0; 407 408 errout: 409 nlmsg_cancel(skb, nlh); 410 return -EMSGSIZE; 411 } 412 EXPORT_SYMBOL_GPL(inet_sk_diag_fill); 413 414 static int inet_twsk_diag_fill(struct sock *sk, 415 struct sk_buff *skb, 416 struct netlink_callback *cb, 417 u16 nlmsg_flags, bool net_admin) 418 { 419 struct inet_timewait_sock *tw = inet_twsk(sk); 420 struct inet_diag_msg *r; 421 struct nlmsghdr *nlh; 422 long tmo; 423 424 nlh = nlmsg_put(skb, NETLINK_CB(cb->skb).portid, 425 cb->nlh->nlmsg_seq, cb->nlh->nlmsg_type, 426 sizeof(*r), nlmsg_flags); 427 if (!nlh) 428 return -EMSGSIZE; 429 430 r = nlmsg_data(nlh); 431 BUG_ON(tw->tw_state != TCP_TIME_WAIT); 432 433 inet_diag_msg_common_fill(r, sk); 434 r->idiag_retrans = 0; 435 436 r->idiag_state = tw->tw_substate; 437 r->idiag_timer = 3; 438 tmo = tw->tw_timer.expires - jiffies; 439 r->idiag_expires = jiffies_delta_to_msecs(tmo); 440 r->idiag_rqueue = 0; 441 r->idiag_wqueue = 0; 442 r->idiag_uid = 0; 443 r->idiag_inode = 0; 444 445 if (net_admin && nla_put_u32(skb, INET_DIAG_MARK, 446 tw->tw_mark)) { 447 nlmsg_cancel(skb, nlh); 448 return -EMSGSIZE; 449 } 450 451 nlmsg_end(skb, nlh); 452 return 0; 453 } 454 455 static int inet_req_diag_fill(struct sock *sk, struct sk_buff *skb, 456 struct netlink_callback *cb, 457 u16 nlmsg_flags, bool net_admin) 458 { 459 struct request_sock *reqsk = inet_reqsk(sk); 460 struct inet_diag_msg *r; 461 struct nlmsghdr *nlh; 462 long tmo; 463 464 nlh = nlmsg_put(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, 465 cb->nlh->nlmsg_type, sizeof(*r), nlmsg_flags); 466 if (!nlh) 467 return -EMSGSIZE; 468 469 r = nlmsg_data(nlh); 470 inet_diag_msg_common_fill(r, sk); 471 r->idiag_state = TCP_SYN_RECV; 472 r->idiag_timer = 1; 473 r->idiag_retrans = reqsk->num_retrans; 474 475 BUILD_BUG_ON(offsetof(struct inet_request_sock, ir_cookie) != 476 offsetof(struct sock, sk_cookie)); 477 478 tmo = inet_reqsk(sk)->rsk_timer.expires - jiffies; 479 r->idiag_expires = jiffies_delta_to_msecs(tmo); 480 r->idiag_rqueue = 0; 481 r->idiag_wqueue = 0; 482 r->idiag_uid = 0; 483 r->idiag_inode = 0; 484 485 if (net_admin && nla_put_u32(skb, INET_DIAG_MARK, 486 inet_rsk(reqsk)->ir_mark)) { 487 nlmsg_cancel(skb, nlh); 488 return -EMSGSIZE; 489 } 490 491 nlmsg_end(skb, nlh); 492 return 0; 493 } 494 495 static int sk_diag_fill(struct sock *sk, struct sk_buff *skb, 496 struct netlink_callback *cb, 497 const struct inet_diag_req_v2 *r, 498 u16 nlmsg_flags, bool net_admin) 499 { 500 if (sk->sk_state == TCP_TIME_WAIT) 501 return inet_twsk_diag_fill(sk, skb, cb, nlmsg_flags, net_admin); 502 503 if (sk->sk_state == TCP_NEW_SYN_RECV) 504 return inet_req_diag_fill(sk, skb, cb, nlmsg_flags, net_admin); 505 506 return inet_sk_diag_fill(sk, inet_csk(sk), skb, cb, r, nlmsg_flags, 507 net_admin); 508 } 509 510 struct sock *inet_diag_find_one_icsk(struct net *net, 511 struct inet_hashinfo *hashinfo, 512 const struct inet_diag_req_v2 *req) 513 { 514 struct sock *sk; 515 516 rcu_read_lock(); 517 if (req->sdiag_family == AF_INET) 518 sk = inet_lookup(net, hashinfo, NULL, 0, req->id.idiag_dst[0], 519 req->id.idiag_dport, req->id.idiag_src[0], 520 req->id.idiag_sport, req->id.idiag_if); 521 #if IS_ENABLED(CONFIG_IPV6) 522 else if (req->sdiag_family == AF_INET6) { 523 if (ipv6_addr_v4mapped((struct in6_addr *)req->id.idiag_dst) && 524 ipv6_addr_v4mapped((struct in6_addr *)req->id.idiag_src)) 525 sk = inet_lookup(net, hashinfo, NULL, 0, req->id.idiag_dst[3], 526 req->id.idiag_dport, req->id.idiag_src[3], 527 req->id.idiag_sport, req->id.idiag_if); 528 else 529 sk = inet6_lookup(net, hashinfo, NULL, 0, 530 (struct in6_addr *)req->id.idiag_dst, 531 req->id.idiag_dport, 532 (struct in6_addr *)req->id.idiag_src, 533 req->id.idiag_sport, 534 req->id.idiag_if); 535 } 536 #endif 537 else { 538 rcu_read_unlock(); 539 return ERR_PTR(-EINVAL); 540 } 541 rcu_read_unlock(); 542 if (!sk) 543 return ERR_PTR(-ENOENT); 544 545 if (sock_diag_check_cookie(sk, req->id.idiag_cookie)) { 546 sock_gen_put(sk); 547 return ERR_PTR(-ENOENT); 548 } 549 550 return sk; 551 } 552 EXPORT_SYMBOL_GPL(inet_diag_find_one_icsk); 553 554 int inet_diag_dump_one_icsk(struct inet_hashinfo *hashinfo, 555 struct netlink_callback *cb, 556 const struct inet_diag_req_v2 *req) 557 { 558 struct sk_buff *in_skb = cb->skb; 559 bool net_admin = netlink_net_capable(in_skb, CAP_NET_ADMIN); 560 struct net *net = sock_net(in_skb->sk); 561 struct sk_buff *rep; 562 struct sock *sk; 563 int err; 564 565 sk = inet_diag_find_one_icsk(net, hashinfo, req); 566 if (IS_ERR(sk)) 567 return PTR_ERR(sk); 568 569 rep = nlmsg_new(inet_sk_attr_size(sk, req, net_admin), GFP_KERNEL); 570 if (!rep) { 571 err = -ENOMEM; 572 goto out; 573 } 574 575 err = sk_diag_fill(sk, rep, cb, req, 0, net_admin); 576 if (err < 0) { 577 WARN_ON(err == -EMSGSIZE); 578 nlmsg_free(rep); 579 goto out; 580 } 581 err = nlmsg_unicast(net->diag_nlsk, rep, NETLINK_CB(in_skb).portid); 582 583 out: 584 if (sk) 585 sock_gen_put(sk); 586 587 return err; 588 } 589 EXPORT_SYMBOL_GPL(inet_diag_dump_one_icsk); 590 591 static int inet_diag_cmd_exact(int cmd, struct sk_buff *in_skb, 592 const struct nlmsghdr *nlh, 593 int hdrlen, 594 const struct inet_diag_req_v2 *req) 595 { 596 const struct inet_diag_handler *handler; 597 struct inet_diag_dump_data dump_data; 598 int err, protocol; 599 600 memset(&dump_data, 0, sizeof(dump_data)); 601 err = inet_diag_parse_attrs(nlh, hdrlen, dump_data.req_nlas); 602 if (err) 603 return err; 604 605 protocol = inet_diag_get_protocol(req, &dump_data); 606 607 handler = inet_diag_lock_handler(protocol); 608 if (IS_ERR(handler)) { 609 err = PTR_ERR(handler); 610 } else if (cmd == SOCK_DIAG_BY_FAMILY) { 611 struct netlink_callback cb = { 612 .nlh = nlh, 613 .skb = in_skb, 614 .data = &dump_data, 615 }; 616 err = handler->dump_one(&cb, req); 617 } else if (cmd == SOCK_DESTROY && handler->destroy) { 618 err = handler->destroy(in_skb, req); 619 } else { 620 err = -EOPNOTSUPP; 621 } 622 inet_diag_unlock_handler(handler); 623 624 return err; 625 } 626 627 static int bitstring_match(const __be32 *a1, const __be32 *a2, int bits) 628 { 629 int words = bits >> 5; 630 631 bits &= 0x1f; 632 633 if (words) { 634 if (memcmp(a1, a2, words << 2)) 635 return 0; 636 } 637 if (bits) { 638 __be32 w1, w2; 639 __be32 mask; 640 641 w1 = a1[words]; 642 w2 = a2[words]; 643 644 mask = htonl((0xffffffff) << (32 - bits)); 645 646 if ((w1 ^ w2) & mask) 647 return 0; 648 } 649 650 return 1; 651 } 652 653 static int inet_diag_bc_run(const struct nlattr *_bc, 654 const struct inet_diag_entry *entry) 655 { 656 const void *bc = nla_data(_bc); 657 int len = nla_len(_bc); 658 659 while (len > 0) { 660 int yes = 1; 661 const struct inet_diag_bc_op *op = bc; 662 663 switch (op->code) { 664 case INET_DIAG_BC_NOP: 665 break; 666 case INET_DIAG_BC_JMP: 667 yes = 0; 668 break; 669 case INET_DIAG_BC_S_EQ: 670 yes = entry->sport == op[1].no; 671 break; 672 case INET_DIAG_BC_S_GE: 673 yes = entry->sport >= op[1].no; 674 break; 675 case INET_DIAG_BC_S_LE: 676 yes = entry->sport <= op[1].no; 677 break; 678 case INET_DIAG_BC_D_EQ: 679 yes = entry->dport == op[1].no; 680 break; 681 case INET_DIAG_BC_D_GE: 682 yes = entry->dport >= op[1].no; 683 break; 684 case INET_DIAG_BC_D_LE: 685 yes = entry->dport <= op[1].no; 686 break; 687 case INET_DIAG_BC_AUTO: 688 yes = !(entry->userlocks & SOCK_BINDPORT_LOCK); 689 break; 690 case INET_DIAG_BC_S_COND: 691 case INET_DIAG_BC_D_COND: { 692 const struct inet_diag_hostcond *cond; 693 const __be32 *addr; 694 695 cond = (const struct inet_diag_hostcond *)(op + 1); 696 if (cond->port != -1 && 697 cond->port != (op->code == INET_DIAG_BC_S_COND ? 698 entry->sport : entry->dport)) { 699 yes = 0; 700 break; 701 } 702 703 if (op->code == INET_DIAG_BC_S_COND) 704 addr = entry->saddr; 705 else 706 addr = entry->daddr; 707 708 if (cond->family != AF_UNSPEC && 709 cond->family != entry->family) { 710 if (entry->family == AF_INET6 && 711 cond->family == AF_INET) { 712 if (addr[0] == 0 && addr[1] == 0 && 713 addr[2] == htonl(0xffff) && 714 bitstring_match(addr + 3, 715 cond->addr, 716 cond->prefix_len)) 717 break; 718 } 719 yes = 0; 720 break; 721 } 722 723 if (cond->prefix_len == 0) 724 break; 725 if (bitstring_match(addr, cond->addr, 726 cond->prefix_len)) 727 break; 728 yes = 0; 729 break; 730 } 731 case INET_DIAG_BC_DEV_COND: { 732 u32 ifindex; 733 734 ifindex = *((const u32 *)(op + 1)); 735 if (ifindex != entry->ifindex) 736 yes = 0; 737 break; 738 } 739 case INET_DIAG_BC_MARK_COND: { 740 struct inet_diag_markcond *cond; 741 742 cond = (struct inet_diag_markcond *)(op + 1); 743 if ((entry->mark & cond->mask) != cond->mark) 744 yes = 0; 745 break; 746 } 747 #ifdef CONFIG_SOCK_CGROUP_DATA 748 case INET_DIAG_BC_CGROUP_COND: { 749 u64 cgroup_id; 750 751 cgroup_id = get_unaligned((const u64 *)(op + 1)); 752 if (cgroup_id != entry->cgroup_id) 753 yes = 0; 754 break; 755 } 756 #endif 757 } 758 759 if (yes) { 760 len -= op->yes; 761 bc += op->yes; 762 } else { 763 len -= op->no; 764 bc += op->no; 765 } 766 } 767 return len == 0; 768 } 769 770 /* This helper is available for all sockets (ESTABLISH, TIMEWAIT, SYN_RECV) 771 */ 772 static void entry_fill_addrs(struct inet_diag_entry *entry, 773 const struct sock *sk) 774 { 775 #if IS_ENABLED(CONFIG_IPV6) 776 if (sk->sk_family == AF_INET6) { 777 entry->saddr = sk->sk_v6_rcv_saddr.s6_addr32; 778 entry->daddr = sk->sk_v6_daddr.s6_addr32; 779 } else 780 #endif 781 { 782 entry->saddr = &sk->sk_rcv_saddr; 783 entry->daddr = &sk->sk_daddr; 784 } 785 } 786 787 int inet_diag_bc_sk(const struct nlattr *bc, struct sock *sk) 788 { 789 struct inet_sock *inet = inet_sk(sk); 790 struct inet_diag_entry entry; 791 792 if (!bc) 793 return 1; 794 795 entry.family = sk->sk_family; 796 entry_fill_addrs(&entry, sk); 797 entry.sport = inet->inet_num; 798 entry.dport = ntohs(inet->inet_dport); 799 entry.ifindex = sk->sk_bound_dev_if; 800 entry.userlocks = sk_fullsock(sk) ? sk->sk_userlocks : 0; 801 if (sk_fullsock(sk)) 802 entry.mark = READ_ONCE(sk->sk_mark); 803 else if (sk->sk_state == TCP_NEW_SYN_RECV) 804 entry.mark = inet_rsk(inet_reqsk(sk))->ir_mark; 805 else if (sk->sk_state == TCP_TIME_WAIT) 806 entry.mark = inet_twsk(sk)->tw_mark; 807 else 808 entry.mark = 0; 809 #ifdef CONFIG_SOCK_CGROUP_DATA 810 entry.cgroup_id = sk_fullsock(sk) ? 811 cgroup_id(sock_cgroup_ptr(&sk->sk_cgrp_data)) : 0; 812 #endif 813 814 return inet_diag_bc_run(bc, &entry); 815 } 816 EXPORT_SYMBOL_GPL(inet_diag_bc_sk); 817 818 static int valid_cc(const void *bc, int len, int cc) 819 { 820 while (len >= 0) { 821 const struct inet_diag_bc_op *op = bc; 822 823 if (cc > len) 824 return 0; 825 if (cc == len) 826 return 1; 827 if (op->yes < 4 || op->yes & 3) 828 return 0; 829 len -= op->yes; 830 bc += op->yes; 831 } 832 return 0; 833 } 834 835 /* data is u32 ifindex */ 836 static bool valid_devcond(const struct inet_diag_bc_op *op, int len, 837 int *min_len) 838 { 839 /* Check ifindex space. */ 840 *min_len += sizeof(u32); 841 if (len < *min_len) 842 return false; 843 844 return true; 845 } 846 /* Validate an inet_diag_hostcond. */ 847 static bool valid_hostcond(const struct inet_diag_bc_op *op, int len, 848 int *min_len) 849 { 850 struct inet_diag_hostcond *cond; 851 int addr_len; 852 853 /* Check hostcond space. */ 854 *min_len += sizeof(struct inet_diag_hostcond); 855 if (len < *min_len) 856 return false; 857 cond = (struct inet_diag_hostcond *)(op + 1); 858 859 /* Check address family and address length. */ 860 switch (cond->family) { 861 case AF_UNSPEC: 862 addr_len = 0; 863 break; 864 case AF_INET: 865 addr_len = sizeof(struct in_addr); 866 break; 867 case AF_INET6: 868 addr_len = sizeof(struct in6_addr); 869 break; 870 default: 871 return false; 872 } 873 *min_len += addr_len; 874 if (len < *min_len) 875 return false; 876 877 /* Check prefix length (in bits) vs address length (in bytes). */ 878 if (cond->prefix_len > 8 * addr_len) 879 return false; 880 881 return true; 882 } 883 884 /* Validate a port comparison operator. */ 885 static bool valid_port_comparison(const struct inet_diag_bc_op *op, 886 int len, int *min_len) 887 { 888 /* Port comparisons put the port in a follow-on inet_diag_bc_op. */ 889 *min_len += sizeof(struct inet_diag_bc_op); 890 if (len < *min_len) 891 return false; 892 return true; 893 } 894 895 static bool valid_markcond(const struct inet_diag_bc_op *op, int len, 896 int *min_len) 897 { 898 *min_len += sizeof(struct inet_diag_markcond); 899 return len >= *min_len; 900 } 901 902 #ifdef CONFIG_SOCK_CGROUP_DATA 903 static bool valid_cgroupcond(const struct inet_diag_bc_op *op, int len, 904 int *min_len) 905 { 906 *min_len += sizeof(u64); 907 return len >= *min_len; 908 } 909 #endif 910 911 static int inet_diag_bc_audit(const struct nlattr *attr, 912 const struct sk_buff *skb) 913 { 914 bool net_admin = netlink_net_capable(skb, CAP_NET_ADMIN); 915 const void *bytecode, *bc; 916 int bytecode_len, len; 917 918 if (!attr || nla_len(attr) < sizeof(struct inet_diag_bc_op)) 919 return -EINVAL; 920 921 bytecode = bc = nla_data(attr); 922 len = bytecode_len = nla_len(attr); 923 924 while (len > 0) { 925 int min_len = sizeof(struct inet_diag_bc_op); 926 const struct inet_diag_bc_op *op = bc; 927 928 switch (op->code) { 929 case INET_DIAG_BC_S_COND: 930 case INET_DIAG_BC_D_COND: 931 if (!valid_hostcond(bc, len, &min_len)) 932 return -EINVAL; 933 break; 934 case INET_DIAG_BC_DEV_COND: 935 if (!valid_devcond(bc, len, &min_len)) 936 return -EINVAL; 937 break; 938 case INET_DIAG_BC_S_EQ: 939 case INET_DIAG_BC_S_GE: 940 case INET_DIAG_BC_S_LE: 941 case INET_DIAG_BC_D_EQ: 942 case INET_DIAG_BC_D_GE: 943 case INET_DIAG_BC_D_LE: 944 if (!valid_port_comparison(bc, len, &min_len)) 945 return -EINVAL; 946 break; 947 case INET_DIAG_BC_MARK_COND: 948 if (!net_admin) 949 return -EPERM; 950 if (!valid_markcond(bc, len, &min_len)) 951 return -EINVAL; 952 break; 953 #ifdef CONFIG_SOCK_CGROUP_DATA 954 case INET_DIAG_BC_CGROUP_COND: 955 if (!valid_cgroupcond(bc, len, &min_len)) 956 return -EINVAL; 957 break; 958 #endif 959 case INET_DIAG_BC_AUTO: 960 case INET_DIAG_BC_JMP: 961 case INET_DIAG_BC_NOP: 962 break; 963 default: 964 return -EINVAL; 965 } 966 967 if (op->code != INET_DIAG_BC_NOP) { 968 if (op->no < min_len || op->no > len + 4 || op->no & 3) 969 return -EINVAL; 970 if (op->no < len && 971 !valid_cc(bytecode, bytecode_len, len - op->no)) 972 return -EINVAL; 973 } 974 975 if (op->yes < min_len || op->yes > len + 4 || op->yes & 3) 976 return -EINVAL; 977 bc += op->yes; 978 len -= op->yes; 979 } 980 return len == 0 ? 0 : -EINVAL; 981 } 982 983 static void twsk_build_assert(void) 984 { 985 BUILD_BUG_ON(offsetof(struct inet_timewait_sock, tw_family) != 986 offsetof(struct sock, sk_family)); 987 988 BUILD_BUG_ON(offsetof(struct inet_timewait_sock, tw_num) != 989 offsetof(struct inet_sock, inet_num)); 990 991 BUILD_BUG_ON(offsetof(struct inet_timewait_sock, tw_dport) != 992 offsetof(struct inet_sock, inet_dport)); 993 994 BUILD_BUG_ON(offsetof(struct inet_timewait_sock, tw_rcv_saddr) != 995 offsetof(struct inet_sock, inet_rcv_saddr)); 996 997 BUILD_BUG_ON(offsetof(struct inet_timewait_sock, tw_daddr) != 998 offsetof(struct inet_sock, inet_daddr)); 999 1000 #if IS_ENABLED(CONFIG_IPV6) 1001 BUILD_BUG_ON(offsetof(struct inet_timewait_sock, tw_v6_rcv_saddr) != 1002 offsetof(struct sock, sk_v6_rcv_saddr)); 1003 1004 BUILD_BUG_ON(offsetof(struct inet_timewait_sock, tw_v6_daddr) != 1005 offsetof(struct sock, sk_v6_daddr)); 1006 #endif 1007 } 1008 1009 void inet_diag_dump_icsk(struct inet_hashinfo *hashinfo, struct sk_buff *skb, 1010 struct netlink_callback *cb, 1011 const struct inet_diag_req_v2 *r) 1012 { 1013 bool net_admin = netlink_net_capable(cb->skb, CAP_NET_ADMIN); 1014 struct inet_diag_dump_data *cb_data = cb->data; 1015 struct net *net = sock_net(skb->sk); 1016 u32 idiag_states = r->idiag_states; 1017 int i, num, s_i, s_num; 1018 struct nlattr *bc; 1019 struct sock *sk; 1020 1021 bc = cb_data->inet_diag_nla_bc; 1022 if (idiag_states & TCPF_SYN_RECV) 1023 idiag_states |= TCPF_NEW_SYN_RECV; 1024 s_i = cb->args[1]; 1025 s_num = num = cb->args[2]; 1026 1027 if (cb->args[0] == 0) { 1028 if (!(idiag_states & TCPF_LISTEN) || r->id.idiag_dport) 1029 goto skip_listen_ht; 1030 1031 for (i = s_i; i <= hashinfo->lhash2_mask; i++) { 1032 struct inet_listen_hashbucket *ilb; 1033 struct hlist_nulls_node *node; 1034 1035 num = 0; 1036 ilb = &hashinfo->lhash2[i]; 1037 1038 spin_lock(&ilb->lock); 1039 sk_nulls_for_each(sk, node, &ilb->nulls_head) { 1040 struct inet_sock *inet = inet_sk(sk); 1041 1042 if (!net_eq(sock_net(sk), net)) 1043 continue; 1044 1045 if (num < s_num) { 1046 num++; 1047 continue; 1048 } 1049 1050 if (r->sdiag_family != AF_UNSPEC && 1051 sk->sk_family != r->sdiag_family) 1052 goto next_listen; 1053 1054 if (r->id.idiag_sport != inet->inet_sport && 1055 r->id.idiag_sport) 1056 goto next_listen; 1057 1058 if (!inet_diag_bc_sk(bc, sk)) 1059 goto next_listen; 1060 1061 if (inet_sk_diag_fill(sk, inet_csk(sk), skb, 1062 cb, r, NLM_F_MULTI, 1063 net_admin) < 0) { 1064 spin_unlock(&ilb->lock); 1065 goto done; 1066 } 1067 1068 next_listen: 1069 ++num; 1070 } 1071 spin_unlock(&ilb->lock); 1072 1073 s_num = 0; 1074 } 1075 skip_listen_ht: 1076 cb->args[0] = 1; 1077 s_i = num = s_num = 0; 1078 } 1079 1080 /* Process a maximum of SKARR_SZ sockets at a time when walking hash buckets 1081 * with bh disabled. 1082 */ 1083 #define SKARR_SZ 16 1084 1085 /* Dump bound but inactive (not listening, connecting, etc.) sockets */ 1086 if (cb->args[0] == 1) { 1087 if (!(idiag_states & TCPF_BOUND_INACTIVE)) 1088 goto skip_bind_ht; 1089 1090 for (i = s_i; i < hashinfo->bhash_size; i++) { 1091 struct inet_bind_hashbucket *ibb; 1092 struct inet_bind2_bucket *tb2; 1093 struct sock *sk_arr[SKARR_SZ]; 1094 int num_arr[SKARR_SZ]; 1095 int idx, accum, res; 1096 1097 resume_bind_walk: 1098 num = 0; 1099 accum = 0; 1100 ibb = &hashinfo->bhash2[i]; 1101 1102 spin_lock_bh(&ibb->lock); 1103 inet_bind_bucket_for_each(tb2, &ibb->chain) { 1104 if (!net_eq(ib2_net(tb2), net)) 1105 continue; 1106 1107 sk_for_each_bound(sk, &tb2->owners) { 1108 struct inet_sock *inet = inet_sk(sk); 1109 1110 if (num < s_num) 1111 goto next_bind; 1112 1113 if (sk->sk_state != TCP_CLOSE || 1114 !inet->inet_num) 1115 goto next_bind; 1116 1117 if (r->sdiag_family != AF_UNSPEC && 1118 r->sdiag_family != sk->sk_family) 1119 goto next_bind; 1120 1121 if (!inet_diag_bc_sk(bc, sk)) 1122 goto next_bind; 1123 1124 sock_hold(sk); 1125 num_arr[accum] = num; 1126 sk_arr[accum] = sk; 1127 if (++accum == SKARR_SZ) 1128 goto pause_bind_walk; 1129 next_bind: 1130 num++; 1131 } 1132 } 1133 pause_bind_walk: 1134 spin_unlock_bh(&ibb->lock); 1135 1136 res = 0; 1137 for (idx = 0; idx < accum; idx++) { 1138 if (res >= 0) { 1139 res = inet_sk_diag_fill(sk_arr[idx], 1140 NULL, skb, cb, 1141 r, NLM_F_MULTI, 1142 net_admin); 1143 if (res < 0) 1144 num = num_arr[idx]; 1145 } 1146 sock_put(sk_arr[idx]); 1147 } 1148 if (res < 0) 1149 goto done; 1150 1151 cond_resched(); 1152 1153 if (accum == SKARR_SZ) { 1154 s_num = num + 1; 1155 goto resume_bind_walk; 1156 } 1157 1158 s_num = 0; 1159 } 1160 skip_bind_ht: 1161 cb->args[0] = 2; 1162 s_i = num = s_num = 0; 1163 } 1164 1165 if (!(idiag_states & ~TCPF_LISTEN)) 1166 goto out; 1167 1168 for (i = s_i; i <= hashinfo->ehash_mask; i++) { 1169 struct inet_ehash_bucket *head = &hashinfo->ehash[i]; 1170 spinlock_t *lock = inet_ehash_lockp(hashinfo, i); 1171 struct hlist_nulls_node *node; 1172 struct sock *sk_arr[SKARR_SZ]; 1173 int num_arr[SKARR_SZ]; 1174 int idx, accum, res; 1175 1176 if (hlist_nulls_empty(&head->chain)) 1177 continue; 1178 1179 if (i > s_i) 1180 s_num = 0; 1181 1182 next_chunk: 1183 num = 0; 1184 accum = 0; 1185 spin_lock_bh(lock); 1186 sk_nulls_for_each(sk, node, &head->chain) { 1187 int state; 1188 1189 if (!net_eq(sock_net(sk), net)) 1190 continue; 1191 if (num < s_num) 1192 goto next_normal; 1193 state = (sk->sk_state == TCP_TIME_WAIT) ? 1194 inet_twsk(sk)->tw_substate : sk->sk_state; 1195 if (!(idiag_states & (1 << state))) 1196 goto next_normal; 1197 if (r->sdiag_family != AF_UNSPEC && 1198 sk->sk_family != r->sdiag_family) 1199 goto next_normal; 1200 if (r->id.idiag_sport != htons(sk->sk_num) && 1201 r->id.idiag_sport) 1202 goto next_normal; 1203 if (r->id.idiag_dport != sk->sk_dport && 1204 r->id.idiag_dport) 1205 goto next_normal; 1206 twsk_build_assert(); 1207 1208 if (!inet_diag_bc_sk(bc, sk)) 1209 goto next_normal; 1210 1211 if (!refcount_inc_not_zero(&sk->sk_refcnt)) 1212 goto next_normal; 1213 1214 num_arr[accum] = num; 1215 sk_arr[accum] = sk; 1216 if (++accum == SKARR_SZ) 1217 break; 1218 next_normal: 1219 ++num; 1220 } 1221 spin_unlock_bh(lock); 1222 res = 0; 1223 for (idx = 0; idx < accum; idx++) { 1224 if (res >= 0) { 1225 res = sk_diag_fill(sk_arr[idx], skb, cb, r, 1226 NLM_F_MULTI, net_admin); 1227 if (res < 0) 1228 num = num_arr[idx]; 1229 } 1230 sock_gen_put(sk_arr[idx]); 1231 } 1232 if (res < 0) 1233 break; 1234 cond_resched(); 1235 if (accum == SKARR_SZ) { 1236 s_num = num + 1; 1237 goto next_chunk; 1238 } 1239 } 1240 1241 done: 1242 cb->args[1] = i; 1243 cb->args[2] = num; 1244 out: 1245 ; 1246 } 1247 EXPORT_SYMBOL_GPL(inet_diag_dump_icsk); 1248 1249 static int __inet_diag_dump(struct sk_buff *skb, struct netlink_callback *cb, 1250 const struct inet_diag_req_v2 *r) 1251 { 1252 struct inet_diag_dump_data *cb_data = cb->data; 1253 const struct inet_diag_handler *handler; 1254 u32 prev_min_dump_alloc; 1255 int protocol, err = 0; 1256 1257 protocol = inet_diag_get_protocol(r, cb_data); 1258 1259 again: 1260 prev_min_dump_alloc = cb->min_dump_alloc; 1261 handler = inet_diag_lock_handler(protocol); 1262 if (!IS_ERR(handler)) 1263 handler->dump(skb, cb, r); 1264 else 1265 err = PTR_ERR(handler); 1266 inet_diag_unlock_handler(handler); 1267 1268 /* The skb is not large enough to fit one sk info and 1269 * inet_sk_diag_fill() has requested for a larger skb. 1270 */ 1271 if (!skb->len && cb->min_dump_alloc > prev_min_dump_alloc) { 1272 err = pskb_expand_head(skb, 0, cb->min_dump_alloc, GFP_KERNEL); 1273 if (!err) 1274 goto again; 1275 } 1276 1277 return err ? : skb->len; 1278 } 1279 1280 static int inet_diag_dump(struct sk_buff *skb, struct netlink_callback *cb) 1281 { 1282 return __inet_diag_dump(skb, cb, nlmsg_data(cb->nlh)); 1283 } 1284 1285 static int __inet_diag_dump_start(struct netlink_callback *cb, int hdrlen) 1286 { 1287 const struct nlmsghdr *nlh = cb->nlh; 1288 struct inet_diag_dump_data *cb_data; 1289 struct sk_buff *skb = cb->skb; 1290 struct nlattr *nla; 1291 int err; 1292 1293 cb_data = kzalloc(sizeof(*cb_data), GFP_KERNEL); 1294 if (!cb_data) 1295 return -ENOMEM; 1296 1297 err = inet_diag_parse_attrs(nlh, hdrlen, cb_data->req_nlas); 1298 if (err) { 1299 kfree(cb_data); 1300 return err; 1301 } 1302 nla = cb_data->inet_diag_nla_bc; 1303 if (nla) { 1304 err = inet_diag_bc_audit(nla, skb); 1305 if (err) { 1306 kfree(cb_data); 1307 return err; 1308 } 1309 } 1310 1311 nla = cb_data->inet_diag_nla_bpf_stgs; 1312 if (nla) { 1313 struct bpf_sk_storage_diag *bpf_stg_diag; 1314 1315 bpf_stg_diag = bpf_sk_storage_diag_alloc(nla); 1316 if (IS_ERR(bpf_stg_diag)) { 1317 kfree(cb_data); 1318 return PTR_ERR(bpf_stg_diag); 1319 } 1320 cb_data->bpf_stg_diag = bpf_stg_diag; 1321 } 1322 1323 cb->data = cb_data; 1324 return 0; 1325 } 1326 1327 static int inet_diag_dump_start(struct netlink_callback *cb) 1328 { 1329 return __inet_diag_dump_start(cb, sizeof(struct inet_diag_req_v2)); 1330 } 1331 1332 static int inet_diag_dump_start_compat(struct netlink_callback *cb) 1333 { 1334 return __inet_diag_dump_start(cb, sizeof(struct inet_diag_req)); 1335 } 1336 1337 static int inet_diag_dump_done(struct netlink_callback *cb) 1338 { 1339 struct inet_diag_dump_data *cb_data = cb->data; 1340 1341 bpf_sk_storage_diag_free(cb_data->bpf_stg_diag); 1342 kfree(cb->data); 1343 1344 return 0; 1345 } 1346 1347 static int inet_diag_type2proto(int type) 1348 { 1349 switch (type) { 1350 case TCPDIAG_GETSOCK: 1351 return IPPROTO_TCP; 1352 case DCCPDIAG_GETSOCK: 1353 return IPPROTO_DCCP; 1354 default: 1355 return 0; 1356 } 1357 } 1358 1359 static int inet_diag_dump_compat(struct sk_buff *skb, 1360 struct netlink_callback *cb) 1361 { 1362 struct inet_diag_req *rc = nlmsg_data(cb->nlh); 1363 struct inet_diag_req_v2 req; 1364 1365 req.sdiag_family = AF_UNSPEC; /* compatibility */ 1366 req.sdiag_protocol = inet_diag_type2proto(cb->nlh->nlmsg_type); 1367 req.idiag_ext = rc->idiag_ext; 1368 req.idiag_states = rc->idiag_states; 1369 req.id = rc->id; 1370 1371 return __inet_diag_dump(skb, cb, &req); 1372 } 1373 1374 static int inet_diag_get_exact_compat(struct sk_buff *in_skb, 1375 const struct nlmsghdr *nlh) 1376 { 1377 struct inet_diag_req *rc = nlmsg_data(nlh); 1378 struct inet_diag_req_v2 req; 1379 1380 req.sdiag_family = rc->idiag_family; 1381 req.sdiag_protocol = inet_diag_type2proto(nlh->nlmsg_type); 1382 req.idiag_ext = rc->idiag_ext; 1383 req.idiag_states = rc->idiag_states; 1384 req.id = rc->id; 1385 1386 return inet_diag_cmd_exact(SOCK_DIAG_BY_FAMILY, in_skb, nlh, 1387 sizeof(struct inet_diag_req), &req); 1388 } 1389 1390 static int inet_diag_rcv_msg_compat(struct sk_buff *skb, struct nlmsghdr *nlh) 1391 { 1392 int hdrlen = sizeof(struct inet_diag_req); 1393 struct net *net = sock_net(skb->sk); 1394 1395 if (nlh->nlmsg_type >= INET_DIAG_GETSOCK_MAX || 1396 nlmsg_len(nlh) < hdrlen) 1397 return -EINVAL; 1398 1399 if (nlh->nlmsg_flags & NLM_F_DUMP) { 1400 struct netlink_dump_control c = { 1401 .start = inet_diag_dump_start_compat, 1402 .done = inet_diag_dump_done, 1403 .dump = inet_diag_dump_compat, 1404 }; 1405 return netlink_dump_start(net->diag_nlsk, skb, nlh, &c); 1406 } 1407 1408 return inet_diag_get_exact_compat(skb, nlh); 1409 } 1410 1411 static int inet_diag_handler_cmd(struct sk_buff *skb, struct nlmsghdr *h) 1412 { 1413 int hdrlen = sizeof(struct inet_diag_req_v2); 1414 struct net *net = sock_net(skb->sk); 1415 1416 if (nlmsg_len(h) < hdrlen) 1417 return -EINVAL; 1418 1419 if (h->nlmsg_type == SOCK_DIAG_BY_FAMILY && 1420 h->nlmsg_flags & NLM_F_DUMP) { 1421 struct netlink_dump_control c = { 1422 .start = inet_diag_dump_start, 1423 .done = inet_diag_dump_done, 1424 .dump = inet_diag_dump, 1425 }; 1426 return netlink_dump_start(net->diag_nlsk, skb, h, &c); 1427 } 1428 1429 return inet_diag_cmd_exact(h->nlmsg_type, skb, h, hdrlen, 1430 nlmsg_data(h)); 1431 } 1432 1433 static 1434 int inet_diag_handler_get_info(struct sk_buff *skb, struct sock *sk) 1435 { 1436 const struct inet_diag_handler *handler; 1437 struct nlmsghdr *nlh; 1438 struct nlattr *attr; 1439 struct inet_diag_msg *r; 1440 void *info = NULL; 1441 int err = 0; 1442 1443 nlh = nlmsg_put(skb, 0, 0, SOCK_DIAG_BY_FAMILY, sizeof(*r), 0); 1444 if (!nlh) 1445 return -ENOMEM; 1446 1447 r = nlmsg_data(nlh); 1448 memset(r, 0, sizeof(*r)); 1449 inet_diag_msg_common_fill(r, sk); 1450 if (sk->sk_type == SOCK_DGRAM || sk->sk_type == SOCK_STREAM) 1451 r->id.idiag_sport = inet_sk(sk)->inet_sport; 1452 r->idiag_state = sk->sk_state; 1453 1454 if ((err = nla_put_u8(skb, INET_DIAG_PROTOCOL, sk->sk_protocol))) { 1455 nlmsg_cancel(skb, nlh); 1456 return err; 1457 } 1458 1459 handler = inet_diag_lock_handler(sk->sk_protocol); 1460 if (IS_ERR(handler)) { 1461 inet_diag_unlock_handler(handler); 1462 nlmsg_cancel(skb, nlh); 1463 return PTR_ERR(handler); 1464 } 1465 1466 attr = handler->idiag_info_size 1467 ? nla_reserve_64bit(skb, INET_DIAG_INFO, 1468 handler->idiag_info_size, 1469 INET_DIAG_PAD) 1470 : NULL; 1471 if (attr) 1472 info = nla_data(attr); 1473 1474 handler->idiag_get_info(sk, r, info); 1475 inet_diag_unlock_handler(handler); 1476 1477 nlmsg_end(skb, nlh); 1478 return 0; 1479 } 1480 1481 static const struct sock_diag_handler inet_diag_handler = { 1482 .family = AF_INET, 1483 .dump = inet_diag_handler_cmd, 1484 .get_info = inet_diag_handler_get_info, 1485 .destroy = inet_diag_handler_cmd, 1486 }; 1487 1488 static const struct sock_diag_handler inet6_diag_handler = { 1489 .family = AF_INET6, 1490 .dump = inet_diag_handler_cmd, 1491 .get_info = inet_diag_handler_get_info, 1492 .destroy = inet_diag_handler_cmd, 1493 }; 1494 1495 int inet_diag_register(const struct inet_diag_handler *h) 1496 { 1497 const __u16 type = h->idiag_type; 1498 int err = -EINVAL; 1499 1500 if (type >= IPPROTO_MAX) 1501 goto out; 1502 1503 mutex_lock(&inet_diag_table_mutex); 1504 err = -EEXIST; 1505 if (!inet_diag_table[type]) { 1506 inet_diag_table[type] = h; 1507 err = 0; 1508 } 1509 mutex_unlock(&inet_diag_table_mutex); 1510 out: 1511 return err; 1512 } 1513 EXPORT_SYMBOL_GPL(inet_diag_register); 1514 1515 void inet_diag_unregister(const struct inet_diag_handler *h) 1516 { 1517 const __u16 type = h->idiag_type; 1518 1519 if (type >= IPPROTO_MAX) 1520 return; 1521 1522 mutex_lock(&inet_diag_table_mutex); 1523 inet_diag_table[type] = NULL; 1524 mutex_unlock(&inet_diag_table_mutex); 1525 } 1526 EXPORT_SYMBOL_GPL(inet_diag_unregister); 1527 1528 static int __init inet_diag_init(void) 1529 { 1530 const int inet_diag_table_size = (IPPROTO_MAX * 1531 sizeof(struct inet_diag_handler *)); 1532 int err = -ENOMEM; 1533 1534 inet_diag_table = kzalloc(inet_diag_table_size, GFP_KERNEL); 1535 if (!inet_diag_table) 1536 goto out; 1537 1538 err = sock_diag_register(&inet_diag_handler); 1539 if (err) 1540 goto out_free_nl; 1541 1542 err = sock_diag_register(&inet6_diag_handler); 1543 if (err) 1544 goto out_free_inet; 1545 1546 sock_diag_register_inet_compat(inet_diag_rcv_msg_compat); 1547 out: 1548 return err; 1549 1550 out_free_inet: 1551 sock_diag_unregister(&inet_diag_handler); 1552 out_free_nl: 1553 kfree(inet_diag_table); 1554 goto out; 1555 } 1556 1557 static void __exit inet_diag_exit(void) 1558 { 1559 sock_diag_unregister(&inet6_diag_handler); 1560 sock_diag_unregister(&inet_diag_handler); 1561 sock_diag_unregister_inet_compat(inet_diag_rcv_msg_compat); 1562 kfree(inet_diag_table); 1563 } 1564 1565 module_init(inet_diag_init); 1566 module_exit(inet_diag_exit); 1567 MODULE_LICENSE("GPL"); 1568 MODULE_DESCRIPTION("INET/INET6: socket monitoring via SOCK_DIAG"); 1569 MODULE_ALIAS_NET_PF_PROTO_TYPE(PF_NETLINK, NETLINK_SOCK_DIAG, 2 /* AF_INET */); 1570 MODULE_ALIAS_NET_PF_PROTO_TYPE(PF_NETLINK, NETLINK_SOCK_DIAG, 10 /* AF_INET6 */); 1571