1 // SPDX-License-Identifier: GPL-2.0-or-later 2 /* 3 * NETLINK Kernel-user communication protocol. 4 * 5 * Authors: Alan Cox <alan@lxorguk.ukuu.org.uk> 6 * Alexey Kuznetsov <kuznet@ms2.inr.ac.ru> 7 * Patrick McHardy <kaber@trash.net> 8 * 9 * Tue Jun 26 14:36:48 MEST 2001 Herbert "herp" Rosmanith 10 * added netlink_proto_exit 11 * Tue Jan 22 18:32:44 BRST 2002 Arnaldo C. de Melo <acme@conectiva.com.br> 12 * use nlk_sk, as sk->protinfo is on a diet 8) 13 * Fri Jul 22 19:51:12 MEST 2005 Harald Welte <laforge@gnumonks.org> 14 * - inc module use count of module that owns 15 * the kernel socket in case userspace opens 16 * socket of same protocol 17 * - remove all module support, since netlink is 18 * mandatory if CONFIG_NET=y these days 19 */ 20 21 #include <linux/module.h> 22 23 #include <linux/bpf.h> 24 #include <linux/capability.h> 25 #include <linux/kernel.h> 26 #include <linux/filter.h> 27 #include <linux/init.h> 28 #include <linux/signal.h> 29 #include <linux/sched.h> 30 #include <linux/errno.h> 31 #include <linux/string.h> 32 #include <linux/stat.h> 33 #include <linux/socket.h> 34 #include <linux/un.h> 35 #include <linux/fcntl.h> 36 #include <linux/termios.h> 37 #include <linux/sockios.h> 38 #include <linux/net.h> 39 #include <linux/fs.h> 40 #include <linux/slab.h> 41 #include <linux/uaccess.h> 42 #include <linux/skbuff.h> 43 #include <linux/netdevice.h> 44 #include <linux/rtnetlink.h> 45 #include <linux/proc_fs.h> 46 #include <linux/seq_file.h> 47 #include <linux/notifier.h> 48 #include <linux/security.h> 49 #include <linux/jhash.h> 50 #include <linux/jiffies.h> 51 #include <linux/random.h> 52 #include <linux/bitops.h> 53 #include <linux/mm.h> 54 #include <linux/types.h> 55 #include <linux/audit.h> 56 #include <linux/mutex.h> 57 #include <linux/vmalloc.h> 58 #include <linux/if_arp.h> 59 #include <linux/rhashtable.h> 60 #include <asm/cacheflush.h> 61 #include <linux/hash.h> 62 #include <linux/net_namespace.h> 63 #include <linux/nospec.h> 64 #include <linux/btf_ids.h> 65 66 #include <net/net_namespace.h> 67 #include <net/netns/generic.h> 68 #include <net/sock.h> 69 #include <net/scm.h> 70 #include <net/netlink.h> 71 #define CREATE_TRACE_POINTS 72 #include <trace/events/netlink.h> 73 74 #include "af_netlink.h" 75 #include "genetlink.h" 76 77 struct listeners { 78 struct rcu_head rcu; 79 unsigned long masks[]; 80 }; 81 82 /* state bits */ 83 #define NETLINK_S_CONGESTED 0x0 84 85 static inline int netlink_is_kernel(struct sock *sk) 86 { 87 return nlk_test_bit(KERNEL_SOCKET, sk); 88 } 89 90 struct netlink_table *nl_table __read_mostly; 91 EXPORT_SYMBOL_GPL(nl_table); 92 93 static DECLARE_WAIT_QUEUE_HEAD(nl_table_wait); 94 95 static struct lock_class_key nlk_cb_mutex_keys[MAX_LINKS]; 96 97 static const char *const nlk_cb_mutex_key_strings[MAX_LINKS + 1] = { 98 "nlk_cb_mutex-ROUTE", 99 "nlk_cb_mutex-1", 100 "nlk_cb_mutex-USERSOCK", 101 "nlk_cb_mutex-FIREWALL", 102 "nlk_cb_mutex-SOCK_DIAG", 103 "nlk_cb_mutex-NFLOG", 104 "nlk_cb_mutex-XFRM", 105 "nlk_cb_mutex-SELINUX", 106 "nlk_cb_mutex-ISCSI", 107 "nlk_cb_mutex-AUDIT", 108 "nlk_cb_mutex-FIB_LOOKUP", 109 "nlk_cb_mutex-CONNECTOR", 110 "nlk_cb_mutex-NETFILTER", 111 "nlk_cb_mutex-IP6_FW", 112 "nlk_cb_mutex-DNRTMSG", 113 "nlk_cb_mutex-KOBJECT_UEVENT", 114 "nlk_cb_mutex-GENERIC", 115 "nlk_cb_mutex-17", 116 "nlk_cb_mutex-SCSITRANSPORT", 117 "nlk_cb_mutex-ECRYPTFS", 118 "nlk_cb_mutex-RDMA", 119 "nlk_cb_mutex-CRYPTO", 120 "nlk_cb_mutex-SMC", 121 "nlk_cb_mutex-23", 122 "nlk_cb_mutex-24", 123 "nlk_cb_mutex-25", 124 "nlk_cb_mutex-26", 125 "nlk_cb_mutex-27", 126 "nlk_cb_mutex-28", 127 "nlk_cb_mutex-29", 128 "nlk_cb_mutex-30", 129 "nlk_cb_mutex-31", 130 "nlk_cb_mutex-MAX_LINKS" 131 }; 132 133 static int netlink_dump(struct sock *sk, bool lock_taken); 134 135 /* nl_table locking explained: 136 * Lookup and traversal are protected with an RCU read-side lock. Insertion 137 * and removal are protected with per bucket lock while using RCU list 138 * modification primitives and may run in parallel to RCU protected lookups. 139 * Destruction of the Netlink socket may only occur *after* nl_table_lock has 140 * been acquired * either during or after the socket has been removed from 141 * the list and after an RCU grace period. 142 */ 143 DEFINE_RWLOCK(nl_table_lock); 144 EXPORT_SYMBOL_GPL(nl_table_lock); 145 static atomic_t nl_table_users = ATOMIC_INIT(0); 146 147 #define nl_deref_protected(X) rcu_dereference_protected(X, lockdep_is_held(&nl_table_lock)); 148 149 static BLOCKING_NOTIFIER_HEAD(netlink_chain); 150 151 152 static const struct rhashtable_params netlink_rhashtable_params; 153 154 void do_trace_netlink_extack(const char *msg) 155 { 156 trace_netlink_extack(msg); 157 } 158 EXPORT_SYMBOL(do_trace_netlink_extack); 159 160 static inline u32 netlink_group_mask(u32 group) 161 { 162 if (group > 32) 163 return 0; 164 return group ? 1 << (group - 1) : 0; 165 } 166 167 static struct sk_buff *netlink_to_full_skb(const struct sk_buff *skb, 168 gfp_t gfp_mask) 169 { 170 unsigned int len = skb->len; 171 struct sk_buff *new; 172 173 new = alloc_skb(len, gfp_mask); 174 if (new == NULL) 175 return NULL; 176 177 NETLINK_CB(new).portid = NETLINK_CB(skb).portid; 178 NETLINK_CB(new).dst_group = NETLINK_CB(skb).dst_group; 179 NETLINK_CB(new).creds = NETLINK_CB(skb).creds; 180 181 skb_put_data(new, skb->data, len); 182 return new; 183 } 184 185 static unsigned int netlink_tap_net_id; 186 187 struct netlink_tap_net { 188 struct list_head netlink_tap_all; 189 struct mutex netlink_tap_lock; 190 }; 191 192 int netlink_add_tap(struct netlink_tap *nt) 193 { 194 struct net *net = dev_net(nt->dev); 195 struct netlink_tap_net *nn = net_generic(net, netlink_tap_net_id); 196 197 if (unlikely(nt->dev->type != ARPHRD_NETLINK)) 198 return -EINVAL; 199 200 mutex_lock(&nn->netlink_tap_lock); 201 list_add_rcu(&nt->list, &nn->netlink_tap_all); 202 mutex_unlock(&nn->netlink_tap_lock); 203 204 __module_get(nt->module); 205 206 return 0; 207 } 208 EXPORT_SYMBOL_GPL(netlink_add_tap); 209 210 static int __netlink_remove_tap(struct netlink_tap *nt) 211 { 212 struct net *net = dev_net(nt->dev); 213 struct netlink_tap_net *nn = net_generic(net, netlink_tap_net_id); 214 bool found = false; 215 struct netlink_tap *tmp; 216 217 mutex_lock(&nn->netlink_tap_lock); 218 219 list_for_each_entry(tmp, &nn->netlink_tap_all, list) { 220 if (nt == tmp) { 221 list_del_rcu(&nt->list); 222 found = true; 223 goto out; 224 } 225 } 226 227 pr_warn("__netlink_remove_tap: %p not found\n", nt); 228 out: 229 mutex_unlock(&nn->netlink_tap_lock); 230 231 if (found) 232 module_put(nt->module); 233 234 return found ? 0 : -ENODEV; 235 } 236 237 int netlink_remove_tap(struct netlink_tap *nt) 238 { 239 int ret; 240 241 ret = __netlink_remove_tap(nt); 242 synchronize_net(); 243 244 return ret; 245 } 246 EXPORT_SYMBOL_GPL(netlink_remove_tap); 247 248 static __net_init int netlink_tap_init_net(struct net *net) 249 { 250 struct netlink_tap_net *nn = net_generic(net, netlink_tap_net_id); 251 252 INIT_LIST_HEAD(&nn->netlink_tap_all); 253 mutex_init(&nn->netlink_tap_lock); 254 return 0; 255 } 256 257 static struct pernet_operations netlink_tap_net_ops = { 258 .init = netlink_tap_init_net, 259 .id = &netlink_tap_net_id, 260 .size = sizeof(struct netlink_tap_net), 261 }; 262 263 static bool netlink_filter_tap(const struct sk_buff *skb) 264 { 265 struct sock *sk = skb->sk; 266 267 /* We take the more conservative approach and 268 * whitelist socket protocols that may pass. 269 */ 270 switch (sk->sk_protocol) { 271 case NETLINK_ROUTE: 272 case NETLINK_USERSOCK: 273 case NETLINK_SOCK_DIAG: 274 case NETLINK_NFLOG: 275 case NETLINK_XFRM: 276 case NETLINK_FIB_LOOKUP: 277 case NETLINK_NETFILTER: 278 case NETLINK_GENERIC: 279 return true; 280 } 281 282 return false; 283 } 284 285 static int __netlink_deliver_tap_skb(struct sk_buff *skb, 286 struct net_device *dev) 287 { 288 struct sk_buff *nskb; 289 struct sock *sk = skb->sk; 290 int ret = -ENOMEM; 291 292 if (!net_eq(dev_net(dev), sock_net(sk))) 293 return 0; 294 295 dev_hold(dev); 296 297 if (is_vmalloc_addr(skb->head)) 298 nskb = netlink_to_full_skb(skb, GFP_ATOMIC); 299 else 300 nskb = skb_clone(skb, GFP_ATOMIC); 301 if (nskb) { 302 nskb->dev = dev; 303 nskb->protocol = htons((u16) sk->sk_protocol); 304 nskb->pkt_type = netlink_is_kernel(sk) ? 305 PACKET_KERNEL : PACKET_USER; 306 skb_reset_network_header(nskb); 307 ret = dev_queue_xmit(nskb); 308 if (unlikely(ret > 0)) 309 ret = net_xmit_errno(ret); 310 } 311 312 dev_put(dev); 313 return ret; 314 } 315 316 static void __netlink_deliver_tap(struct sk_buff *skb, struct netlink_tap_net *nn) 317 { 318 int ret; 319 struct netlink_tap *tmp; 320 321 if (!netlink_filter_tap(skb)) 322 return; 323 324 list_for_each_entry_rcu(tmp, &nn->netlink_tap_all, list) { 325 ret = __netlink_deliver_tap_skb(skb, tmp->dev); 326 if (unlikely(ret)) 327 break; 328 } 329 } 330 331 static void netlink_deliver_tap(struct net *net, struct sk_buff *skb) 332 { 333 struct netlink_tap_net *nn = net_generic(net, netlink_tap_net_id); 334 335 rcu_read_lock(); 336 337 if (unlikely(!list_empty(&nn->netlink_tap_all))) 338 __netlink_deliver_tap(skb, nn); 339 340 rcu_read_unlock(); 341 } 342 343 static void netlink_deliver_tap_kernel(struct sock *dst, struct sock *src, 344 struct sk_buff *skb) 345 { 346 if (!(netlink_is_kernel(dst) && netlink_is_kernel(src))) 347 netlink_deliver_tap(sock_net(dst), skb); 348 } 349 350 static void netlink_overrun(struct sock *sk) 351 { 352 if (!nlk_test_bit(RECV_NO_ENOBUFS, sk)) { 353 if (!test_and_set_bit(NETLINK_S_CONGESTED, 354 &nlk_sk(sk)->state)) { 355 WRITE_ONCE(sk->sk_err, ENOBUFS); 356 sk_error_report(sk); 357 } 358 } 359 sk_drops_inc(sk); 360 } 361 362 static void netlink_rcv_wake(struct sock *sk) 363 { 364 struct netlink_sock *nlk = nlk_sk(sk); 365 366 if (skb_queue_empty_lockless(&sk->sk_receive_queue)) 367 clear_bit(NETLINK_S_CONGESTED, &nlk->state); 368 if (!test_bit(NETLINK_S_CONGESTED, &nlk->state)) 369 wake_up_interruptible(&nlk->wait); 370 } 371 372 static void netlink_skb_destructor(struct sk_buff *skb) 373 { 374 if (is_vmalloc_addr(skb->head)) { 375 if (!skb->cloned || 376 !atomic_dec_return(&(skb_shinfo(skb)->dataref))) 377 vfree_atomic(skb->head); 378 379 skb->head = NULL; 380 } 381 if (skb->sk != NULL) 382 sock_rfree(skb); 383 } 384 385 static void netlink_skb_set_owner_r(struct sk_buff *skb, struct sock *sk) 386 { 387 WARN_ON(skb->sk != NULL); 388 skb->sk = sk; 389 skb->destructor = netlink_skb_destructor; 390 sk_mem_charge(sk, skb->truesize); 391 } 392 393 static void netlink_sock_destruct(struct sock *sk) 394 { 395 skb_queue_purge(&sk->sk_receive_queue); 396 397 if (!sock_flag(sk, SOCK_DEAD)) { 398 printk(KERN_ERR "Freeing alive netlink socket %p\n", sk); 399 return; 400 } 401 402 WARN_ON(atomic_read(&sk->sk_rmem_alloc)); 403 WARN_ON(refcount_read(&sk->sk_wmem_alloc)); 404 WARN_ON(nlk_sk(sk)->groups); 405 } 406 407 /* This lock without WQ_FLAG_EXCLUSIVE is good on UP and it is _very_ bad on 408 * SMP. Look, when several writers sleep and reader wakes them up, all but one 409 * immediately hit write lock and grab all the cpus. Exclusive sleep solves 410 * this, _but_ remember, it adds useless work on UP machines. 411 */ 412 413 void netlink_table_grab(void) 414 __acquires(nl_table_lock) 415 { 416 might_sleep(); 417 418 write_lock_irq(&nl_table_lock); 419 420 if (atomic_read(&nl_table_users)) { 421 DECLARE_WAITQUEUE(wait, current); 422 423 add_wait_queue_exclusive(&nl_table_wait, &wait); 424 for (;;) { 425 set_current_state(TASK_UNINTERRUPTIBLE); 426 if (atomic_read(&nl_table_users) == 0) 427 break; 428 write_unlock_irq(&nl_table_lock); 429 schedule(); 430 write_lock_irq(&nl_table_lock); 431 } 432 433 __set_current_state(TASK_RUNNING); 434 remove_wait_queue(&nl_table_wait, &wait); 435 } 436 } 437 438 void netlink_table_ungrab(void) 439 __releases(nl_table_lock) 440 { 441 write_unlock_irq(&nl_table_lock); 442 wake_up(&nl_table_wait); 443 } 444 445 static inline void 446 netlink_lock_table(void) 447 { 448 unsigned long flags; 449 450 /* read_lock() synchronizes us to netlink_table_grab */ 451 452 read_lock_irqsave(&nl_table_lock, flags); 453 atomic_inc(&nl_table_users); 454 read_unlock_irqrestore(&nl_table_lock, flags); 455 } 456 457 static inline void 458 netlink_unlock_table(void) 459 { 460 if (atomic_dec_and_test(&nl_table_users)) 461 wake_up(&nl_table_wait); 462 } 463 464 struct netlink_compare_arg 465 { 466 possible_net_t pnet; 467 u32 portid; 468 }; 469 470 /* Doing sizeof directly may yield 4 extra bytes on 64-bit. */ 471 #define netlink_compare_arg_len \ 472 (offsetof(struct netlink_compare_arg, portid) + sizeof(u32)) 473 474 static inline int netlink_compare(struct rhashtable_compare_arg *arg, 475 const void *ptr) 476 { 477 const struct netlink_compare_arg *x = arg->key; 478 const struct netlink_sock *nlk = ptr; 479 480 return nlk->portid != x->portid || 481 !net_eq(sock_net(&nlk->sk), read_pnet(&x->pnet)); 482 } 483 484 static void netlink_compare_arg_init(struct netlink_compare_arg *arg, 485 struct net *net, u32 portid) 486 { 487 memset(arg, 0, sizeof(*arg)); 488 write_pnet(&arg->pnet, net); 489 arg->portid = portid; 490 } 491 492 static struct sock *__netlink_lookup(struct netlink_table *table, u32 portid, 493 struct net *net) 494 { 495 struct netlink_compare_arg arg; 496 497 netlink_compare_arg_init(&arg, net, portid); 498 return rhashtable_lookup_fast(&table->hash, &arg, 499 netlink_rhashtable_params); 500 } 501 502 static int __netlink_insert(struct netlink_table *table, struct sock *sk) 503 { 504 struct netlink_compare_arg arg; 505 506 netlink_compare_arg_init(&arg, sock_net(sk), nlk_sk(sk)->portid); 507 return rhashtable_lookup_insert_key(&table->hash, &arg, 508 &nlk_sk(sk)->node, 509 netlink_rhashtable_params); 510 } 511 512 static struct sock *netlink_lookup(struct net *net, int protocol, u32 portid) 513 { 514 struct netlink_table *table = &nl_table[protocol]; 515 struct sock *sk; 516 517 rcu_read_lock(); 518 sk = __netlink_lookup(table, portid, net); 519 if (sk) 520 sock_hold(sk); 521 rcu_read_unlock(); 522 523 return sk; 524 } 525 526 static const struct proto_ops netlink_ops; 527 528 static void 529 netlink_update_listeners(struct sock *sk) 530 { 531 struct netlink_table *tbl = &nl_table[sk->sk_protocol]; 532 unsigned long mask; 533 unsigned int i; 534 struct listeners *listeners; 535 536 listeners = nl_deref_protected(tbl->listeners); 537 if (!listeners) 538 return; 539 540 for (i = 0; i < NLGRPLONGS(tbl->groups); i++) { 541 mask = 0; 542 sk_for_each_bound(sk, &tbl->mc_list) { 543 if (i < NLGRPLONGS(nlk_sk(sk)->ngroups)) 544 mask |= nlk_sk(sk)->groups[i]; 545 } 546 listeners->masks[i] = mask; 547 } 548 /* this function is only called with the netlink table "grabbed", which 549 * makes sure updates are visible before bind or setsockopt return. */ 550 } 551 552 static int netlink_insert(struct sock *sk, u32 portid) 553 { 554 struct netlink_table *table = &nl_table[sk->sk_protocol]; 555 int err; 556 557 lock_sock(sk); 558 559 err = nlk_sk(sk)->portid == portid ? 0 : -EBUSY; 560 if (nlk_sk(sk)->bound) 561 goto err; 562 563 /* portid can be read locklessly from netlink_getname(). */ 564 WRITE_ONCE(nlk_sk(sk)->portid, portid); 565 566 sock_hold(sk); 567 568 err = __netlink_insert(table, sk); 569 if (err) { 570 /* In case the hashtable backend returns with -EBUSY 571 * from here, it must not escape to the caller. 572 */ 573 if (unlikely(err == -EBUSY)) 574 err = -EOVERFLOW; 575 if (err == -EEXIST) 576 err = -EADDRINUSE; 577 sock_put(sk); 578 goto err; 579 } 580 581 /* We need to ensure that the socket is hashed and visible. */ 582 smp_wmb(); 583 /* Paired with lockless reads from netlink_bind(), 584 * netlink_connect() and netlink_sendmsg(). 585 */ 586 WRITE_ONCE(nlk_sk(sk)->bound, portid); 587 588 err: 589 release_sock(sk); 590 return err; 591 } 592 593 static void netlink_remove(struct sock *sk) 594 { 595 struct netlink_table *table; 596 597 table = &nl_table[sk->sk_protocol]; 598 if (!rhashtable_remove_fast(&table->hash, &nlk_sk(sk)->node, 599 netlink_rhashtable_params)) 600 __sock_put(sk); 601 602 netlink_table_grab(); 603 if (nlk_sk(sk)->subscriptions) { 604 __sk_del_bind_node(sk); 605 netlink_update_listeners(sk); 606 } 607 if (sk->sk_protocol == NETLINK_GENERIC) 608 atomic_inc(&genl_sk_destructing_cnt); 609 netlink_table_ungrab(); 610 } 611 612 static struct proto netlink_proto = { 613 .name = "NETLINK", 614 .owner = THIS_MODULE, 615 .obj_size = sizeof(struct netlink_sock), 616 }; 617 618 static int __netlink_create(struct net *net, struct socket *sock, 619 int protocol, int kern) 620 { 621 struct sock *sk; 622 struct netlink_sock *nlk; 623 624 sock->ops = &netlink_ops; 625 626 sk = sk_alloc(net, PF_NETLINK, GFP_KERNEL, &netlink_proto, kern); 627 if (!sk) 628 return -ENOMEM; 629 630 sock_init_data(sock, sk); 631 632 nlk = nlk_sk(sk); 633 mutex_init(&nlk->nl_cb_mutex); 634 lockdep_set_class_and_name(&nlk->nl_cb_mutex, 635 nlk_cb_mutex_keys + protocol, 636 nlk_cb_mutex_key_strings[protocol]); 637 init_waitqueue_head(&nlk->wait); 638 639 sk->sk_destruct = netlink_sock_destruct; 640 sk->sk_protocol = protocol; 641 return 0; 642 } 643 644 static int netlink_create(struct net *net, struct socket *sock, int protocol, 645 int kern) 646 { 647 struct module *module = NULL; 648 struct netlink_sock *nlk; 649 int (*bind)(struct net *net, int group); 650 void (*unbind)(struct net *net, int group); 651 void (*release)(struct sock *sock, unsigned long *groups); 652 int err = 0; 653 654 sock->state = SS_UNCONNECTED; 655 656 if (sock->type != SOCK_RAW && sock->type != SOCK_DGRAM) 657 return -ESOCKTNOSUPPORT; 658 659 if (protocol < 0 || protocol >= MAX_LINKS) 660 return -EPROTONOSUPPORT; 661 protocol = array_index_nospec(protocol, MAX_LINKS); 662 663 netlink_lock_table(); 664 #ifdef CONFIG_MODULES 665 if (!nl_table[protocol].registered) { 666 netlink_unlock_table(); 667 request_module("net-pf-%d-proto-%d", PF_NETLINK, protocol); 668 netlink_lock_table(); 669 } 670 #endif 671 if (nl_table[protocol].registered && 672 try_module_get(nl_table[protocol].module)) 673 module = nl_table[protocol].module; 674 else 675 err = -EPROTONOSUPPORT; 676 bind = nl_table[protocol].bind; 677 unbind = nl_table[protocol].unbind; 678 release = nl_table[protocol].release; 679 netlink_unlock_table(); 680 681 if (err < 0) 682 goto out; 683 684 err = __netlink_create(net, sock, protocol, kern); 685 if (err < 0) 686 goto out_module; 687 688 sock_prot_inuse_add(net, &netlink_proto, 1); 689 690 nlk = nlk_sk(sock->sk); 691 nlk->module = module; 692 nlk->netlink_bind = bind; 693 nlk->netlink_unbind = unbind; 694 nlk->netlink_release = release; 695 out: 696 return err; 697 698 out_module: 699 module_put(module); 700 goto out; 701 } 702 703 static void deferred_put_nlk_sk(struct rcu_head *head) 704 { 705 struct netlink_sock *nlk = container_of(head, struct netlink_sock, rcu); 706 struct sock *sk = &nlk->sk; 707 708 kfree(nlk->groups); 709 nlk->groups = NULL; 710 711 if (!refcount_dec_and_test(&sk->sk_refcnt)) 712 return; 713 714 sk_free(sk); 715 } 716 717 static int netlink_release(struct socket *sock) 718 { 719 struct sock *sk = sock->sk; 720 struct netlink_sock *nlk; 721 722 if (!sk) 723 return 0; 724 725 netlink_remove(sk); 726 sock_orphan(sk); 727 nlk = nlk_sk(sk); 728 729 /* 730 * OK. Socket is unlinked, any packets that arrive now 731 * will be purged. 732 */ 733 if (nlk->netlink_release) 734 nlk->netlink_release(sk, nlk->groups); 735 736 /* must not acquire netlink_table_lock in any way again before unbind 737 * and notifying genetlink is done as otherwise it might deadlock 738 */ 739 if (nlk->netlink_unbind) { 740 int i; 741 742 for (i = 0; i < nlk->ngroups; i++) 743 if (test_bit(i, nlk->groups)) 744 nlk->netlink_unbind(sock_net(sk), i + 1); 745 } 746 if (sk->sk_protocol == NETLINK_GENERIC && 747 atomic_dec_return(&genl_sk_destructing_cnt) == 0) 748 wake_up(&genl_sk_destructing_waitq); 749 750 sock->sk = NULL; 751 wake_up_interruptible_all(&nlk->wait); 752 753 skb_queue_purge(&sk->sk_write_queue); 754 755 if (nlk->portid && nlk->bound) { 756 struct netlink_notify n = { 757 .net = sock_net(sk), 758 .protocol = sk->sk_protocol, 759 .portid = nlk->portid, 760 }; 761 blocking_notifier_call_chain(&netlink_chain, 762 NETLINK_URELEASE, &n); 763 } 764 765 /* Terminate any outstanding dump */ 766 if (nlk->cb_running) { 767 if (nlk->cb.done) 768 nlk->cb.done(&nlk->cb); 769 module_put(nlk->cb.module); 770 kfree_skb(nlk->cb.skb); 771 WRITE_ONCE(nlk->cb_running, false); 772 } 773 774 module_put(nlk->module); 775 776 if (netlink_is_kernel(sk)) { 777 netlink_table_grab(); 778 BUG_ON(nl_table[sk->sk_protocol].registered == 0); 779 if (--nl_table[sk->sk_protocol].registered == 0) { 780 struct listeners *old; 781 782 old = nl_deref_protected(nl_table[sk->sk_protocol].listeners); 783 RCU_INIT_POINTER(nl_table[sk->sk_protocol].listeners, NULL); 784 kfree_rcu(old, rcu); 785 nl_table[sk->sk_protocol].module = NULL; 786 nl_table[sk->sk_protocol].bind = NULL; 787 nl_table[sk->sk_protocol].unbind = NULL; 788 nl_table[sk->sk_protocol].flags = 0; 789 nl_table[sk->sk_protocol].registered = 0; 790 } 791 netlink_table_ungrab(); 792 } 793 794 sock_prot_inuse_add(sock_net(sk), &netlink_proto, -1); 795 796 call_rcu(&nlk->rcu, deferred_put_nlk_sk); 797 return 0; 798 } 799 800 static int netlink_autobind(struct socket *sock) 801 { 802 struct sock *sk = sock->sk; 803 struct net *net = sock_net(sk); 804 struct netlink_table *table = &nl_table[sk->sk_protocol]; 805 s32 portid = task_tgid_vnr(current); 806 int err; 807 s32 rover = -4096; 808 bool ok; 809 810 retry: 811 cond_resched(); 812 rcu_read_lock(); 813 ok = !__netlink_lookup(table, portid, net); 814 rcu_read_unlock(); 815 if (!ok) { 816 /* Bind collision, search negative portid values. */ 817 if (rover == -4096) 818 /* rover will be in range [S32_MIN, -4097] */ 819 rover = S32_MIN + get_random_u32_below(-4096 - S32_MIN); 820 else if (rover >= -4096) 821 rover = -4097; 822 portid = rover--; 823 goto retry; 824 } 825 826 err = netlink_insert(sk, portid); 827 if (err == -EADDRINUSE) 828 goto retry; 829 830 /* If 2 threads race to autobind, that is fine. */ 831 if (err == -EBUSY) 832 err = 0; 833 834 return err; 835 } 836 837 /** 838 * __netlink_ns_capable - General netlink message capability test 839 * @nsp: NETLINK_CB of the socket buffer holding a netlink command from userspace. 840 * @user_ns: The user namespace of the capability to use 841 * @cap: The capability to use 842 * 843 * Test to see if the opener of the socket we received the message 844 * from had when the netlink socket was created and the sender of the 845 * message has the capability @cap in the user namespace @user_ns. 846 */ 847 bool __netlink_ns_capable(const struct netlink_skb_parms *nsp, 848 struct user_namespace *user_ns, int cap) 849 { 850 return ((nsp->flags & NETLINK_SKB_DST) || 851 file_ns_capable(nsp->sk->sk_socket->file, user_ns, cap)) && 852 ns_capable(user_ns, cap); 853 } 854 EXPORT_SYMBOL(__netlink_ns_capable); 855 856 /** 857 * netlink_ns_capable - General netlink message capability test 858 * @skb: socket buffer holding a netlink command from userspace 859 * @user_ns: The user namespace of the capability to use 860 * @cap: The capability to use 861 * 862 * Test to see if the opener of the socket we received the message 863 * from had when the netlink socket was created and the sender of the 864 * message has the capability @cap in the user namespace @user_ns. 865 */ 866 bool netlink_ns_capable(const struct sk_buff *skb, 867 struct user_namespace *user_ns, int cap) 868 { 869 return __netlink_ns_capable(&NETLINK_CB(skb), user_ns, cap); 870 } 871 EXPORT_SYMBOL(netlink_ns_capable); 872 873 /** 874 * netlink_capable - Netlink global message capability test 875 * @skb: socket buffer holding a netlink command from userspace 876 * @cap: The capability to use 877 * 878 * Test to see if the opener of the socket we received the message 879 * from had when the netlink socket was created and the sender of the 880 * message has the capability @cap in all user namespaces. 881 */ 882 bool netlink_capable(const struct sk_buff *skb, int cap) 883 { 884 return netlink_ns_capable(skb, &init_user_ns, cap); 885 } 886 EXPORT_SYMBOL(netlink_capable); 887 888 /** 889 * netlink_net_capable - Netlink network namespace message capability test 890 * @skb: socket buffer holding a netlink command from userspace 891 * @cap: The capability to use 892 * 893 * Test to see if the opener of the socket we received the message 894 * from had when the netlink socket was created and the sender of the 895 * message has the capability @cap over the network namespace of 896 * the socket we received the message from. 897 */ 898 bool netlink_net_capable(const struct sk_buff *skb, int cap) 899 { 900 return netlink_ns_capable(skb, sock_net(skb->sk)->user_ns, cap); 901 } 902 EXPORT_SYMBOL(netlink_net_capable); 903 904 static inline int netlink_allowed(const struct socket *sock, unsigned int flag) 905 { 906 return (nl_table[sock->sk->sk_protocol].flags & flag) || 907 ns_capable(sock_net(sock->sk)->user_ns, CAP_NET_ADMIN); 908 } 909 910 static void 911 netlink_update_subscriptions(struct sock *sk, unsigned int subscriptions) 912 { 913 struct netlink_sock *nlk = nlk_sk(sk); 914 915 if (nlk->subscriptions && !subscriptions) 916 __sk_del_bind_node(sk); 917 else if (!nlk->subscriptions && subscriptions) 918 sk_add_bind_node(sk, &nl_table[sk->sk_protocol].mc_list); 919 nlk->subscriptions = subscriptions; 920 } 921 922 static int netlink_realloc_groups(struct sock *sk) 923 { 924 struct netlink_sock *nlk = nlk_sk(sk); 925 unsigned int groups; 926 unsigned long *new_groups; 927 int err = 0; 928 929 netlink_table_grab(); 930 931 groups = nl_table[sk->sk_protocol].groups; 932 if (!nl_table[sk->sk_protocol].registered) { 933 err = -ENOENT; 934 goto out_unlock; 935 } 936 937 if (nlk->ngroups >= groups) 938 goto out_unlock; 939 940 new_groups = krealloc(nlk->groups, NLGRPSZ(groups), GFP_ATOMIC); 941 if (new_groups == NULL) { 942 err = -ENOMEM; 943 goto out_unlock; 944 } 945 memset((char *)new_groups + NLGRPSZ(nlk->ngroups), 0, 946 NLGRPSZ(groups) - NLGRPSZ(nlk->ngroups)); 947 948 nlk->groups = new_groups; 949 nlk->ngroups = groups; 950 out_unlock: 951 netlink_table_ungrab(); 952 return err; 953 } 954 955 static void netlink_undo_bind(int group, long unsigned int groups, 956 struct sock *sk) 957 { 958 struct netlink_sock *nlk = nlk_sk(sk); 959 int undo; 960 961 if (!nlk->netlink_unbind) 962 return; 963 964 for (undo = 0; undo < group; undo++) 965 if (test_bit(undo, &groups)) 966 nlk->netlink_unbind(sock_net(sk), undo + 1); 967 } 968 969 static int netlink_bind(struct socket *sock, struct sockaddr_unsized *addr, 970 int addr_len) 971 { 972 struct sock *sk = sock->sk; 973 struct net *net = sock_net(sk); 974 struct netlink_sock *nlk = nlk_sk(sk); 975 struct sockaddr_nl *nladdr = (struct sockaddr_nl *)addr; 976 int err = 0; 977 unsigned long groups; 978 bool bound; 979 980 if (addr_len < sizeof(struct sockaddr_nl)) 981 return -EINVAL; 982 983 if (nladdr->nl_family != AF_NETLINK) 984 return -EINVAL; 985 groups = nladdr->nl_groups; 986 987 /* Only superuser is allowed to listen multicasts */ 988 if (groups) { 989 if (!netlink_allowed(sock, NL_CFG_F_NONROOT_RECV)) 990 return -EPERM; 991 err = netlink_realloc_groups(sk); 992 if (err) 993 return err; 994 } 995 996 if (nlk->ngroups < BITS_PER_LONG) 997 groups &= (1UL << nlk->ngroups) - 1; 998 999 /* Paired with WRITE_ONCE() in netlink_insert() */ 1000 bound = READ_ONCE(nlk->bound); 1001 if (bound) { 1002 /* Ensure nlk->portid is up-to-date. */ 1003 smp_rmb(); 1004 1005 if (nladdr->nl_pid != nlk->portid) 1006 return -EINVAL; 1007 } 1008 1009 if (nlk->netlink_bind && groups) { 1010 int group; 1011 1012 /* nl_groups is a u32, so cap the maximum groups we can bind */ 1013 for (group = 0; group < BITS_PER_TYPE(u32); group++) { 1014 if (!test_bit(group, &groups)) 1015 continue; 1016 err = nlk->netlink_bind(net, group + 1); 1017 if (!err) 1018 continue; 1019 netlink_undo_bind(group, groups, sk); 1020 return err; 1021 } 1022 } 1023 1024 /* No need for barriers here as we return to user-space without 1025 * using any of the bound attributes. 1026 */ 1027 netlink_lock_table(); 1028 if (!bound) { 1029 err = nladdr->nl_pid ? 1030 netlink_insert(sk, nladdr->nl_pid) : 1031 netlink_autobind(sock); 1032 if (err) { 1033 netlink_undo_bind(BITS_PER_TYPE(u32), groups, sk); 1034 goto unlock; 1035 } 1036 } 1037 1038 if (!groups && (nlk->groups == NULL || !(u32)nlk->groups[0])) 1039 goto unlock; 1040 netlink_unlock_table(); 1041 1042 netlink_table_grab(); 1043 netlink_update_subscriptions(sk, nlk->subscriptions + 1044 hweight32(groups) - 1045 hweight32(nlk->groups[0])); 1046 nlk->groups[0] = (nlk->groups[0] & ~0xffffffffUL) | groups; 1047 netlink_update_listeners(sk); 1048 netlink_table_ungrab(); 1049 1050 return 0; 1051 1052 unlock: 1053 netlink_unlock_table(); 1054 return err; 1055 } 1056 1057 static int netlink_connect(struct socket *sock, struct sockaddr_unsized *addr, 1058 int alen, int flags) 1059 { 1060 int err = 0; 1061 struct sock *sk = sock->sk; 1062 struct netlink_sock *nlk = nlk_sk(sk); 1063 struct sockaddr_nl *nladdr = (struct sockaddr_nl *)addr; 1064 1065 if (alen < sizeof(addr->sa_family)) 1066 return -EINVAL; 1067 1068 if (addr->sa_family == AF_UNSPEC) { 1069 /* paired with READ_ONCE() in netlink_getsockbyportid() */ 1070 WRITE_ONCE(sk->sk_state, NETLINK_UNCONNECTED); 1071 /* dst_portid and dst_group can be read locklessly */ 1072 WRITE_ONCE(nlk->dst_portid, 0); 1073 WRITE_ONCE(nlk->dst_group, 0); 1074 return 0; 1075 } 1076 if (addr->sa_family != AF_NETLINK) 1077 return -EINVAL; 1078 1079 if (alen < sizeof(struct sockaddr_nl)) 1080 return -EINVAL; 1081 1082 if ((nladdr->nl_groups || nladdr->nl_pid) && 1083 !netlink_allowed(sock, NL_CFG_F_NONROOT_SEND)) 1084 return -EPERM; 1085 1086 /* No need for barriers here as we return to user-space without 1087 * using any of the bound attributes. 1088 * Paired with WRITE_ONCE() in netlink_insert(). 1089 */ 1090 if (!READ_ONCE(nlk->bound)) 1091 err = netlink_autobind(sock); 1092 1093 if (err == 0) { 1094 /* paired with READ_ONCE() in netlink_getsockbyportid() */ 1095 WRITE_ONCE(sk->sk_state, NETLINK_CONNECTED); 1096 /* dst_portid and dst_group can be read locklessly */ 1097 WRITE_ONCE(nlk->dst_portid, nladdr->nl_pid); 1098 WRITE_ONCE(nlk->dst_group, ffs(nladdr->nl_groups)); 1099 } 1100 1101 return err; 1102 } 1103 1104 static int netlink_getname(struct socket *sock, struct sockaddr *addr, 1105 int peer) 1106 { 1107 struct sock *sk = sock->sk; 1108 struct netlink_sock *nlk = nlk_sk(sk); 1109 DECLARE_SOCKADDR(struct sockaddr_nl *, nladdr, addr); 1110 1111 nladdr->nl_family = AF_NETLINK; 1112 nladdr->nl_pad = 0; 1113 1114 if (peer) { 1115 /* Paired with WRITE_ONCE() in netlink_connect() */ 1116 nladdr->nl_pid = READ_ONCE(nlk->dst_portid); 1117 nladdr->nl_groups = netlink_group_mask(READ_ONCE(nlk->dst_group)); 1118 } else { 1119 /* Paired with WRITE_ONCE() in netlink_insert() */ 1120 nladdr->nl_pid = READ_ONCE(nlk->portid); 1121 netlink_lock_table(); 1122 nladdr->nl_groups = nlk->groups ? nlk->groups[0] : 0; 1123 netlink_unlock_table(); 1124 } 1125 return sizeof(*nladdr); 1126 } 1127 1128 static int netlink_ioctl(struct socket *sock, unsigned int cmd, 1129 unsigned long arg) 1130 { 1131 /* try to hand this ioctl down to the NIC drivers. 1132 */ 1133 return -ENOIOCTLCMD; 1134 } 1135 1136 static struct sock *netlink_getsockbyportid(struct sock *ssk, u32 portid) 1137 { 1138 struct sock *sock; 1139 struct netlink_sock *nlk; 1140 1141 sock = netlink_lookup(sock_net(ssk), ssk->sk_protocol, portid); 1142 if (!sock) 1143 return ERR_PTR(-ECONNREFUSED); 1144 1145 /* Don't bother queuing skb if kernel socket has no input function */ 1146 nlk = nlk_sk(sock); 1147 /* dst_portid and sk_state can be changed in netlink_connect() */ 1148 if (READ_ONCE(sock->sk_state) == NETLINK_CONNECTED && 1149 READ_ONCE(nlk->dst_portid) != nlk_sk(ssk)->portid) { 1150 sock_put(sock); 1151 return ERR_PTR(-ECONNREFUSED); 1152 } 1153 return sock; 1154 } 1155 1156 struct sock *netlink_getsockbyfd(int fd) 1157 { 1158 CLASS(fd, f)(fd); 1159 struct inode *inode; 1160 struct sock *sock; 1161 1162 if (fd_empty(f)) 1163 return ERR_PTR(-EBADF); 1164 1165 inode = file_inode(fd_file(f)); 1166 if (!S_ISSOCK(inode->i_mode)) 1167 return ERR_PTR(-ENOTSOCK); 1168 1169 sock = SOCKET_I(inode)->sk; 1170 if (sock->sk_family != AF_NETLINK) 1171 return ERR_PTR(-EINVAL); 1172 1173 sock_hold(sock); 1174 return sock; 1175 } 1176 1177 struct sk_buff *netlink_alloc_large_skb(unsigned int size, int broadcast) 1178 { 1179 size_t head_size = SKB_HEAD_ALIGN(size); 1180 struct sk_buff *skb; 1181 void *data; 1182 1183 if (head_size <= PAGE_SIZE || broadcast) 1184 return alloc_skb(size, GFP_KERNEL); 1185 1186 data = kvmalloc(head_size, GFP_KERNEL); 1187 if (!data) 1188 return NULL; 1189 1190 skb = __build_skb(data, head_size); 1191 if (!skb) 1192 kvfree(data); 1193 else if (is_vmalloc_addr(data)) 1194 skb->destructor = netlink_skb_destructor; 1195 1196 return skb; 1197 } 1198 1199 /* 1200 * Attach a skb to a netlink socket. 1201 * The caller must hold a reference to the destination socket. On error, the 1202 * reference is dropped. The skb is not send to the destination, just all 1203 * all error checks are performed and memory in the queue is reserved. 1204 * Return values: 1205 * < 0: error. skb freed, reference to sock dropped. 1206 * 0: continue 1207 * 1: repeat lookup - reference dropped while waiting for socket memory. 1208 */ 1209 int netlink_attachskb(struct sock *sk, struct sk_buff *skb, 1210 long *timeo, struct sock *ssk) 1211 { 1212 DECLARE_WAITQUEUE(wait, current); 1213 struct netlink_sock *nlk; 1214 unsigned int rmem; 1215 1216 nlk = nlk_sk(sk); 1217 rmem = atomic_add_return(skb->truesize, &sk->sk_rmem_alloc); 1218 1219 if ((rmem == skb->truesize || rmem <= READ_ONCE(sk->sk_rcvbuf)) && 1220 !test_bit(NETLINK_S_CONGESTED, &nlk->state)) { 1221 netlink_skb_set_owner_r(skb, sk); 1222 return 0; 1223 } 1224 1225 atomic_sub(skb->truesize, &sk->sk_rmem_alloc); 1226 1227 if (!*timeo) { 1228 if (!ssk || netlink_is_kernel(ssk)) 1229 netlink_overrun(sk); 1230 sock_put(sk); 1231 kfree_skb(skb); 1232 return -EAGAIN; 1233 } 1234 1235 __set_current_state(TASK_INTERRUPTIBLE); 1236 add_wait_queue(&nlk->wait, &wait); 1237 rmem = atomic_read(&sk->sk_rmem_alloc); 1238 1239 if (((rmem && rmem + skb->truesize > READ_ONCE(sk->sk_rcvbuf)) || 1240 test_bit(NETLINK_S_CONGESTED, &nlk->state)) && 1241 !sock_flag(sk, SOCK_DEAD)) 1242 *timeo = schedule_timeout(*timeo); 1243 1244 __set_current_state(TASK_RUNNING); 1245 remove_wait_queue(&nlk->wait, &wait); 1246 sock_put(sk); 1247 1248 if (signal_pending(current)) { 1249 kfree_skb(skb); 1250 return sock_intr_errno(*timeo); 1251 } 1252 1253 return 1; 1254 } 1255 1256 static int __netlink_sendskb(struct sock *sk, struct sk_buff *skb) 1257 { 1258 int len = skb->len; 1259 1260 netlink_deliver_tap(sock_net(sk), skb); 1261 1262 skb_queue_tail(&sk->sk_receive_queue, skb); 1263 sk->sk_data_ready(sk); 1264 return len; 1265 } 1266 1267 int netlink_sendskb(struct sock *sk, struct sk_buff *skb) 1268 { 1269 int len = __netlink_sendskb(sk, skb); 1270 1271 sock_put(sk); 1272 return len; 1273 } 1274 1275 void netlink_detachskb(struct sock *sk, struct sk_buff *skb) 1276 { 1277 kfree_skb(skb); 1278 sock_put(sk); 1279 } 1280 1281 static struct sk_buff *netlink_trim(struct sk_buff *skb, gfp_t allocation) 1282 { 1283 int delta; 1284 1285 skb_assert_len(skb); 1286 WARN_ON(skb->sk != NULL); 1287 delta = skb->end - skb->tail; 1288 if (is_vmalloc_addr(skb->head) || delta * 2 < skb->truesize) 1289 return skb; 1290 1291 if (skb_shared(skb)) { 1292 struct sk_buff *nskb = skb_clone(skb, allocation); 1293 if (!nskb) 1294 return skb; 1295 consume_skb(skb); 1296 skb = nskb; 1297 } 1298 1299 pskb_expand_head(skb, 0, -delta, 1300 (allocation & ~__GFP_DIRECT_RECLAIM) | 1301 __GFP_NOWARN | __GFP_NORETRY); 1302 return skb; 1303 } 1304 1305 static int netlink_unicast_kernel(struct sock *sk, struct sk_buff *skb, 1306 struct sock *ssk) 1307 { 1308 int ret; 1309 struct netlink_sock *nlk = nlk_sk(sk); 1310 1311 ret = -ECONNREFUSED; 1312 if (nlk->netlink_rcv != NULL) { 1313 ret = skb->len; 1314 atomic_add(skb->truesize, &sk->sk_rmem_alloc); 1315 netlink_skb_set_owner_r(skb, sk); 1316 NETLINK_CB(skb).sk = ssk; 1317 netlink_deliver_tap_kernel(sk, ssk, skb); 1318 nlk->netlink_rcv(skb); 1319 consume_skb(skb); 1320 } else { 1321 kfree_skb(skb); 1322 } 1323 sock_put(sk); 1324 return ret; 1325 } 1326 1327 int netlink_unicast(struct sock *ssk, struct sk_buff *skb, 1328 u32 portid, int nonblock) 1329 { 1330 struct sock *sk; 1331 int err; 1332 long timeo; 1333 1334 skb = netlink_trim(skb, gfp_any()); 1335 1336 timeo = sock_sndtimeo(ssk, nonblock); 1337 retry: 1338 sk = netlink_getsockbyportid(ssk, portid); 1339 if (IS_ERR(sk)) { 1340 kfree_skb(skb); 1341 return PTR_ERR(sk); 1342 } 1343 if (netlink_is_kernel(sk)) 1344 return netlink_unicast_kernel(sk, skb, ssk); 1345 1346 if (sk_filter(sk, skb)) { 1347 err = skb->len; 1348 kfree_skb(skb); 1349 sock_put(sk); 1350 return err; 1351 } 1352 1353 err = netlink_attachskb(sk, skb, &timeo, ssk); 1354 if (err == 1) 1355 goto retry; 1356 if (err) 1357 return err; 1358 1359 return netlink_sendskb(sk, skb); 1360 } 1361 EXPORT_SYMBOL(netlink_unicast); 1362 1363 int netlink_has_listeners(struct sock *sk, unsigned int group) 1364 { 1365 int res = 0; 1366 struct listeners *listeners; 1367 1368 BUG_ON(!netlink_is_kernel(sk)); 1369 1370 rcu_read_lock(); 1371 listeners = rcu_dereference(nl_table[sk->sk_protocol].listeners); 1372 1373 if (listeners && group - 1 < nl_table[sk->sk_protocol].groups) 1374 res = test_bit(group - 1, listeners->masks); 1375 1376 rcu_read_unlock(); 1377 1378 return res; 1379 } 1380 EXPORT_SYMBOL_GPL(netlink_has_listeners); 1381 1382 bool netlink_strict_get_check(struct sk_buff *skb) 1383 { 1384 return nlk_test_bit(STRICT_CHK, NETLINK_CB(skb).sk); 1385 } 1386 EXPORT_SYMBOL_GPL(netlink_strict_get_check); 1387 1388 static int netlink_broadcast_deliver(struct sock *sk, struct sk_buff *skb) 1389 { 1390 struct netlink_sock *nlk = nlk_sk(sk); 1391 unsigned int rmem, rcvbuf; 1392 1393 rmem = atomic_add_return(skb->truesize, &sk->sk_rmem_alloc); 1394 rcvbuf = READ_ONCE(sk->sk_rcvbuf); 1395 1396 if ((rmem == skb->truesize || rmem <= rcvbuf) && 1397 !test_bit(NETLINK_S_CONGESTED, &nlk->state)) { 1398 netlink_skb_set_owner_r(skb, sk); 1399 __netlink_sendskb(sk, skb); 1400 return rmem > (rcvbuf >> 1); 1401 } 1402 1403 atomic_sub(skb->truesize, &sk->sk_rmem_alloc); 1404 return -1; 1405 } 1406 1407 struct netlink_broadcast_data { 1408 struct sock *exclude_sk; 1409 struct net *net; 1410 u32 portid; 1411 u32 group; 1412 int failure; 1413 int delivery_failure; 1414 int congested; 1415 int delivered; 1416 gfp_t allocation; 1417 struct sk_buff *skb, *skb2; 1418 int (*tx_filter)(struct sock *dsk, struct sk_buff *skb, void *data); 1419 void *tx_data; 1420 }; 1421 1422 static void do_one_broadcast(struct sock *sk, 1423 struct netlink_broadcast_data *p) 1424 { 1425 struct netlink_sock *nlk = nlk_sk(sk); 1426 int val; 1427 1428 if (p->exclude_sk == sk) 1429 return; 1430 1431 if (nlk->portid == p->portid || p->group - 1 >= nlk->ngroups || 1432 !test_bit(p->group - 1, nlk->groups)) 1433 return; 1434 1435 if (!net_eq(sock_net(sk), p->net)) { 1436 if (!nlk_test_bit(LISTEN_ALL_NSID, sk)) 1437 return; 1438 1439 if (!peernet_has_id(sock_net(sk), p->net)) 1440 return; 1441 1442 if (!file_ns_capable(sk->sk_socket->file, p->net->user_ns, 1443 CAP_NET_BROADCAST)) 1444 return; 1445 } 1446 1447 if (p->failure) { 1448 netlink_overrun(sk); 1449 return; 1450 } 1451 1452 sock_hold(sk); 1453 if (p->skb2 == NULL) { 1454 if (skb_shared(p->skb)) { 1455 p->skb2 = skb_clone(p->skb, p->allocation); 1456 } else { 1457 p->skb2 = skb_get(p->skb); 1458 /* 1459 * skb ownership may have been set when 1460 * delivered to a previous socket. 1461 */ 1462 skb_orphan(p->skb2); 1463 } 1464 } 1465 if (p->skb2 == NULL) { 1466 netlink_overrun(sk); 1467 /* Clone failed. Notify ALL listeners. */ 1468 p->failure = 1; 1469 if (nlk_test_bit(BROADCAST_SEND_ERROR, sk)) 1470 p->delivery_failure = 1; 1471 goto out; 1472 } 1473 1474 if (p->tx_filter && p->tx_filter(sk, p->skb2, p->tx_data)) { 1475 kfree_skb(p->skb2); 1476 p->skb2 = NULL; 1477 goto out; 1478 } 1479 1480 if (sk_filter(sk, p->skb2)) { 1481 kfree_skb(p->skb2); 1482 p->skb2 = NULL; 1483 goto out; 1484 } 1485 NETLINK_CB(p->skb2).nsid = peernet2id(sock_net(sk), p->net); 1486 if (NETLINK_CB(p->skb2).nsid != NETNSA_NSID_NOT_ASSIGNED) 1487 NETLINK_CB(p->skb2).nsid_is_set = true; 1488 val = netlink_broadcast_deliver(sk, p->skb2); 1489 if (val < 0) { 1490 netlink_overrun(sk); 1491 if (nlk_test_bit(BROADCAST_SEND_ERROR, sk)) 1492 p->delivery_failure = 1; 1493 } else { 1494 p->congested |= val; 1495 p->delivered = 1; 1496 p->skb2 = NULL; 1497 } 1498 out: 1499 sock_put(sk); 1500 } 1501 1502 int netlink_broadcast_filtered(struct sock *ssk, struct sk_buff *skb, 1503 u32 portid, 1504 u32 group, gfp_t allocation, 1505 netlink_filter_fn filter, 1506 void *filter_data) 1507 { 1508 struct net *net = sock_net(ssk); 1509 struct netlink_broadcast_data info; 1510 struct sock *sk; 1511 1512 skb = netlink_trim(skb, allocation); 1513 1514 info.exclude_sk = ssk; 1515 info.net = net; 1516 info.portid = portid; 1517 info.group = group; 1518 info.failure = 0; 1519 info.delivery_failure = 0; 1520 info.congested = 0; 1521 info.delivered = 0; 1522 info.allocation = allocation; 1523 info.skb = skb; 1524 info.skb2 = NULL; 1525 info.tx_filter = filter; 1526 info.tx_data = filter_data; 1527 1528 /* While we sleep in clone, do not allow to change socket list */ 1529 1530 netlink_lock_table(); 1531 1532 sk_for_each_bound(sk, &nl_table[ssk->sk_protocol].mc_list) 1533 do_one_broadcast(sk, &info); 1534 1535 consume_skb(skb); 1536 1537 netlink_unlock_table(); 1538 1539 if (info.delivery_failure) { 1540 kfree_skb(info.skb2); 1541 return -ENOBUFS; 1542 } 1543 consume_skb(info.skb2); 1544 1545 if (info.delivered) { 1546 if (info.congested && gfpflags_allow_blocking(allocation)) 1547 yield(); 1548 return 0; 1549 } 1550 return -ESRCH; 1551 } 1552 EXPORT_SYMBOL(netlink_broadcast_filtered); 1553 1554 int netlink_broadcast(struct sock *ssk, struct sk_buff *skb, u32 portid, 1555 u32 group, gfp_t allocation) 1556 { 1557 return netlink_broadcast_filtered(ssk, skb, portid, group, allocation, 1558 NULL, NULL); 1559 } 1560 EXPORT_SYMBOL(netlink_broadcast); 1561 1562 struct netlink_set_err_data { 1563 struct sock *exclude_sk; 1564 u32 portid; 1565 u32 group; 1566 int code; 1567 }; 1568 1569 static int do_one_set_err(struct sock *sk, struct netlink_set_err_data *p) 1570 { 1571 struct netlink_sock *nlk = nlk_sk(sk); 1572 int ret = 0; 1573 1574 if (sk == p->exclude_sk) 1575 goto out; 1576 1577 if (!net_eq(sock_net(sk), sock_net(p->exclude_sk))) 1578 goto out; 1579 1580 if (nlk->portid == p->portid || p->group - 1 >= nlk->ngroups || 1581 !test_bit(p->group - 1, nlk->groups)) 1582 goto out; 1583 1584 if (p->code == ENOBUFS && nlk_test_bit(RECV_NO_ENOBUFS, sk)) { 1585 ret = 1; 1586 goto out; 1587 } 1588 1589 WRITE_ONCE(sk->sk_err, p->code); 1590 sk_error_report(sk); 1591 out: 1592 return ret; 1593 } 1594 1595 /** 1596 * netlink_set_err - report error to broadcast listeners 1597 * @ssk: the kernel netlink socket, as returned by netlink_kernel_create() 1598 * @portid: the PORTID of a process that we want to skip (if any) 1599 * @group: the broadcast group that will notice the error 1600 * @code: error code, must be negative (as usual in kernelspace) 1601 * 1602 * This function returns the number of broadcast listeners that have set the 1603 * NETLINK_NO_ENOBUFS socket option. 1604 */ 1605 int netlink_set_err(struct sock *ssk, u32 portid, u32 group, int code) 1606 { 1607 struct netlink_set_err_data info; 1608 unsigned long flags; 1609 struct sock *sk; 1610 int ret = 0; 1611 1612 info.exclude_sk = ssk; 1613 info.portid = portid; 1614 info.group = group; 1615 /* sk->sk_err wants a positive error value */ 1616 info.code = -code; 1617 1618 read_lock_irqsave(&nl_table_lock, flags); 1619 1620 sk_for_each_bound(sk, &nl_table[ssk->sk_protocol].mc_list) 1621 ret += do_one_set_err(sk, &info); 1622 1623 read_unlock_irqrestore(&nl_table_lock, flags); 1624 return ret; 1625 } 1626 EXPORT_SYMBOL(netlink_set_err); 1627 1628 /* must be called with netlink table grabbed */ 1629 static void netlink_update_socket_mc(struct netlink_sock *nlk, 1630 unsigned int group, 1631 int is_new) 1632 { 1633 int old, new = !!is_new, subscriptions; 1634 1635 old = test_bit(group - 1, nlk->groups); 1636 subscriptions = nlk->subscriptions - old + new; 1637 __assign_bit(group - 1, nlk->groups, new); 1638 netlink_update_subscriptions(&nlk->sk, subscriptions); 1639 netlink_update_listeners(&nlk->sk); 1640 } 1641 1642 static int netlink_setsockopt(struct socket *sock, int level, int optname, 1643 sockptr_t optval, unsigned int optlen) 1644 { 1645 struct sock *sk = sock->sk; 1646 struct netlink_sock *nlk = nlk_sk(sk); 1647 unsigned int val = 0; 1648 int nr = -1; 1649 1650 if (level != SOL_NETLINK) 1651 return -ENOPROTOOPT; 1652 1653 if (optlen >= sizeof(int) && 1654 copy_from_sockptr(&val, optval, sizeof(val))) 1655 return -EFAULT; 1656 1657 switch (optname) { 1658 case NETLINK_PKTINFO: 1659 nr = NETLINK_F_RECV_PKTINFO; 1660 break; 1661 case NETLINK_ADD_MEMBERSHIP: 1662 case NETLINK_DROP_MEMBERSHIP: { 1663 int err; 1664 1665 if (!netlink_allowed(sock, NL_CFG_F_NONROOT_RECV)) 1666 return -EPERM; 1667 err = netlink_realloc_groups(sk); 1668 if (err) 1669 return err; 1670 if (!val || val - 1 >= nlk->ngroups) 1671 return -EINVAL; 1672 if (optname == NETLINK_ADD_MEMBERSHIP && nlk->netlink_bind) { 1673 err = nlk->netlink_bind(sock_net(sk), val); 1674 if (err) 1675 return err; 1676 } 1677 netlink_table_grab(); 1678 netlink_update_socket_mc(nlk, val, 1679 optname == NETLINK_ADD_MEMBERSHIP); 1680 netlink_table_ungrab(); 1681 if (optname == NETLINK_DROP_MEMBERSHIP && nlk->netlink_unbind) 1682 nlk->netlink_unbind(sock_net(sk), val); 1683 1684 break; 1685 } 1686 case NETLINK_BROADCAST_ERROR: 1687 nr = NETLINK_F_BROADCAST_SEND_ERROR; 1688 break; 1689 case NETLINK_NO_ENOBUFS: 1690 assign_bit(NETLINK_F_RECV_NO_ENOBUFS, &nlk->flags, val); 1691 if (val) { 1692 clear_bit(NETLINK_S_CONGESTED, &nlk->state); 1693 wake_up_interruptible(&nlk->wait); 1694 } 1695 break; 1696 case NETLINK_LISTEN_ALL_NSID: 1697 if (!ns_capable(sock_net(sk)->user_ns, CAP_NET_BROADCAST)) 1698 return -EPERM; 1699 nr = NETLINK_F_LISTEN_ALL_NSID; 1700 break; 1701 case NETLINK_CAP_ACK: 1702 nr = NETLINK_F_CAP_ACK; 1703 break; 1704 case NETLINK_EXT_ACK: 1705 nr = NETLINK_F_EXT_ACK; 1706 break; 1707 case NETLINK_GET_STRICT_CHK: 1708 nr = NETLINK_F_STRICT_CHK; 1709 break; 1710 default: 1711 return -ENOPROTOOPT; 1712 } 1713 if (nr >= 0) 1714 assign_bit(nr, &nlk->flags, val); 1715 return 0; 1716 } 1717 1718 static int netlink_getsockopt(struct socket *sock, int level, int optname, 1719 char __user *optval, int __user *optlen) 1720 { 1721 struct sock *sk = sock->sk; 1722 struct netlink_sock *nlk = nlk_sk(sk); 1723 unsigned int flag; 1724 int len, val; 1725 1726 if (level != SOL_NETLINK) 1727 return -ENOPROTOOPT; 1728 1729 if (get_user(len, optlen)) 1730 return -EFAULT; 1731 if (len < 0) 1732 return -EINVAL; 1733 1734 switch (optname) { 1735 case NETLINK_PKTINFO: 1736 flag = NETLINK_F_RECV_PKTINFO; 1737 break; 1738 case NETLINK_BROADCAST_ERROR: 1739 flag = NETLINK_F_BROADCAST_SEND_ERROR; 1740 break; 1741 case NETLINK_NO_ENOBUFS: 1742 flag = NETLINK_F_RECV_NO_ENOBUFS; 1743 break; 1744 case NETLINK_LIST_MEMBERSHIPS: { 1745 int pos, idx, shift, err = 0; 1746 1747 netlink_lock_table(); 1748 for (pos = 0; pos * 8 < nlk->ngroups; pos += sizeof(u32)) { 1749 if (len - pos < sizeof(u32)) 1750 break; 1751 1752 idx = pos / sizeof(unsigned long); 1753 shift = (pos % sizeof(unsigned long)) * 8; 1754 if (put_user((u32)(nlk->groups[idx] >> shift), 1755 (u32 __user *)(optval + pos))) { 1756 err = -EFAULT; 1757 break; 1758 } 1759 } 1760 if (put_user(ALIGN(BITS_TO_BYTES(nlk->ngroups), sizeof(u32)), optlen)) 1761 err = -EFAULT; 1762 netlink_unlock_table(); 1763 return err; 1764 } 1765 case NETLINK_LISTEN_ALL_NSID: 1766 flag = NETLINK_F_LISTEN_ALL_NSID; 1767 break; 1768 case NETLINK_CAP_ACK: 1769 flag = NETLINK_F_CAP_ACK; 1770 break; 1771 case NETLINK_EXT_ACK: 1772 flag = NETLINK_F_EXT_ACK; 1773 break; 1774 case NETLINK_GET_STRICT_CHK: 1775 flag = NETLINK_F_STRICT_CHK; 1776 break; 1777 default: 1778 return -ENOPROTOOPT; 1779 } 1780 1781 if (len < sizeof(int)) 1782 return -EINVAL; 1783 1784 len = sizeof(int); 1785 val = test_bit(flag, &nlk->flags); 1786 1787 if (put_user(len, optlen) || 1788 copy_to_user(optval, &val, len)) 1789 return -EFAULT; 1790 1791 return 0; 1792 } 1793 1794 static void netlink_cmsg_recv_pktinfo(struct msghdr *msg, struct sk_buff *skb) 1795 { 1796 struct nl_pktinfo info; 1797 1798 info.group = NETLINK_CB(skb).dst_group; 1799 put_cmsg(msg, SOL_NETLINK, NETLINK_PKTINFO, sizeof(info), &info); 1800 } 1801 1802 static void netlink_cmsg_listen_all_nsid(struct sock *sk, struct msghdr *msg, 1803 struct sk_buff *skb) 1804 { 1805 if (!NETLINK_CB(skb).nsid_is_set) 1806 return; 1807 1808 put_cmsg(msg, SOL_NETLINK, NETLINK_LISTEN_ALL_NSID, sizeof(int), 1809 &NETLINK_CB(skb).nsid); 1810 } 1811 1812 static int netlink_sendmsg(struct socket *sock, struct msghdr *msg, size_t len) 1813 { 1814 struct sock *sk = sock->sk; 1815 struct netlink_sock *nlk = nlk_sk(sk); 1816 DECLARE_SOCKADDR(struct sockaddr_nl *, addr, msg->msg_name); 1817 u32 dst_portid; 1818 u32 dst_group; 1819 struct sk_buff *skb; 1820 int err; 1821 struct scm_cookie scm; 1822 u32 netlink_skb_flags = 0; 1823 1824 if (msg->msg_flags & MSG_OOB) 1825 return -EOPNOTSUPP; 1826 1827 if (len == 0) { 1828 pr_warn_once("Zero length message leads to an empty skb\n"); 1829 return -ENODATA; 1830 } 1831 1832 err = scm_send(sock, msg, &scm, true); 1833 if (err < 0) 1834 return err; 1835 1836 if (msg->msg_namelen) { 1837 err = -EINVAL; 1838 if (msg->msg_namelen < sizeof(struct sockaddr_nl)) 1839 goto out; 1840 if (addr->nl_family != AF_NETLINK) 1841 goto out; 1842 dst_portid = addr->nl_pid; 1843 dst_group = ffs(addr->nl_groups); 1844 err = -EPERM; 1845 if ((dst_group || dst_portid) && 1846 !netlink_allowed(sock, NL_CFG_F_NONROOT_SEND)) 1847 goto out; 1848 netlink_skb_flags |= NETLINK_SKB_DST; 1849 } else { 1850 /* Paired with WRITE_ONCE() in netlink_connect() */ 1851 dst_portid = READ_ONCE(nlk->dst_portid); 1852 dst_group = READ_ONCE(nlk->dst_group); 1853 } 1854 1855 /* Paired with WRITE_ONCE() in netlink_insert() */ 1856 if (!READ_ONCE(nlk->bound)) { 1857 err = netlink_autobind(sock); 1858 if (err) 1859 goto out; 1860 } else { 1861 /* Ensure nlk is hashed and visible. */ 1862 smp_rmb(); 1863 } 1864 1865 err = -EMSGSIZE; 1866 if (len > sk->sk_sndbuf - 32) 1867 goto out; 1868 err = -ENOBUFS; 1869 skb = netlink_alloc_large_skb(len, dst_group); 1870 if (skb == NULL) 1871 goto out; 1872 1873 NETLINK_CB(skb).portid = nlk->portid; 1874 NETLINK_CB(skb).dst_group = dst_group; 1875 NETLINK_CB(skb).creds = scm.creds; 1876 NETLINK_CB(skb).flags = netlink_skb_flags; 1877 1878 err = -EFAULT; 1879 if (memcpy_from_msg(skb_put(skb, len), msg, len)) { 1880 kfree_skb(skb); 1881 goto out; 1882 } 1883 1884 err = security_netlink_send(sk, skb); 1885 if (err) { 1886 kfree_skb(skb); 1887 goto out; 1888 } 1889 1890 if (dst_group) { 1891 refcount_inc(&skb->users); 1892 netlink_broadcast(sk, skb, dst_portid, dst_group, GFP_KERNEL); 1893 } 1894 err = netlink_unicast(sk, skb, dst_portid, msg->msg_flags & MSG_DONTWAIT); 1895 1896 out: 1897 scm_destroy(&scm); 1898 return err; 1899 } 1900 1901 static int netlink_recvmsg(struct socket *sock, struct msghdr *msg, size_t len, 1902 int flags) 1903 { 1904 struct scm_cookie scm; 1905 struct sock *sk = sock->sk; 1906 struct netlink_sock *nlk = nlk_sk(sk); 1907 size_t copied, max_recvmsg_len; 1908 struct sk_buff *skb, *data_skb; 1909 int err, ret; 1910 1911 if (flags & MSG_OOB) 1912 return -EOPNOTSUPP; 1913 1914 copied = 0; 1915 1916 skb = skb_recv_datagram(sk, flags, &err); 1917 if (skb == NULL) 1918 goto out; 1919 1920 data_skb = skb; 1921 1922 #ifdef CONFIG_COMPAT_NETLINK_MESSAGES 1923 if (unlikely(skb_shinfo(skb)->frag_list)) { 1924 /* 1925 * If this skb has a frag_list, then here that means that we 1926 * will have to use the frag_list skb's data for compat tasks 1927 * and the regular skb's data for normal (non-compat) tasks. 1928 * 1929 * If we need to send the compat skb, assign it to the 1930 * 'data_skb' variable so that it will be used below for data 1931 * copying. We keep 'skb' for everything else, including 1932 * freeing both later. 1933 */ 1934 if (flags & MSG_CMSG_COMPAT) 1935 data_skb = skb_shinfo(skb)->frag_list; 1936 } 1937 #endif 1938 1939 /* Record the max length of recvmsg() calls for future allocations */ 1940 max_recvmsg_len = max(READ_ONCE(nlk->max_recvmsg_len), len); 1941 max_recvmsg_len = min_t(size_t, max_recvmsg_len, 1942 SKB_WITH_OVERHEAD(32768)); 1943 WRITE_ONCE(nlk->max_recvmsg_len, max_recvmsg_len); 1944 1945 copied = data_skb->len; 1946 if (len < copied) { 1947 msg->msg_flags |= MSG_TRUNC; 1948 copied = len; 1949 } 1950 1951 err = skb_copy_datagram_msg(data_skb, 0, msg, copied); 1952 1953 if (msg->msg_name) { 1954 DECLARE_SOCKADDR(struct sockaddr_nl *, addr, msg->msg_name); 1955 addr->nl_family = AF_NETLINK; 1956 addr->nl_pad = 0; 1957 addr->nl_pid = NETLINK_CB(skb).portid; 1958 addr->nl_groups = netlink_group_mask(NETLINK_CB(skb).dst_group); 1959 msg->msg_namelen = sizeof(*addr); 1960 } 1961 1962 if (nlk_test_bit(RECV_PKTINFO, sk)) 1963 netlink_cmsg_recv_pktinfo(msg, skb); 1964 if (nlk_test_bit(LISTEN_ALL_NSID, sk)) 1965 netlink_cmsg_listen_all_nsid(sk, msg, skb); 1966 1967 memset(&scm, 0, sizeof(scm)); 1968 scm.creds = *NETLINK_CREDS(skb); 1969 if (flags & MSG_TRUNC) 1970 copied = data_skb->len; 1971 1972 skb_free_datagram(sk, skb); 1973 1974 if (READ_ONCE(nlk->cb_running) && 1975 atomic_read(&sk->sk_rmem_alloc) <= sk->sk_rcvbuf / 2) { 1976 ret = netlink_dump(sk, false); 1977 if (ret) { 1978 WRITE_ONCE(sk->sk_err, -ret); 1979 sk_error_report(sk); 1980 } 1981 } 1982 1983 scm_recv(sock, msg, &scm, flags); 1984 out: 1985 netlink_rcv_wake(sk); 1986 return err ? : copied; 1987 } 1988 1989 static void netlink_data_ready(struct sock *sk) 1990 { 1991 BUG(); 1992 } 1993 1994 /* 1995 * We export these functions to other modules. They provide a 1996 * complete set of kernel non-blocking support for message 1997 * queueing. 1998 */ 1999 2000 struct sock * 2001 __netlink_kernel_create(struct net *net, int unit, struct module *module, 2002 struct netlink_kernel_cfg *cfg) 2003 { 2004 struct socket *sock; 2005 struct sock *sk; 2006 struct netlink_sock *nlk; 2007 struct listeners *listeners = NULL; 2008 unsigned int groups; 2009 2010 BUG_ON(!nl_table); 2011 2012 if (unit < 0 || unit >= MAX_LINKS) 2013 return NULL; 2014 2015 if (sock_create_lite(PF_NETLINK, SOCK_DGRAM, unit, &sock)) 2016 return NULL; 2017 2018 if (__netlink_create(net, sock, unit, 1) < 0) 2019 goto out_sock_release_nosk; 2020 2021 sk = sock->sk; 2022 2023 if (!cfg || cfg->groups < 32) 2024 groups = 32; 2025 else 2026 groups = cfg->groups; 2027 2028 listeners = kzalloc(sizeof(*listeners) + NLGRPSZ(groups), GFP_KERNEL); 2029 if (!listeners) 2030 goto out_sock_release; 2031 2032 sk->sk_data_ready = netlink_data_ready; 2033 if (cfg && cfg->input) 2034 nlk_sk(sk)->netlink_rcv = cfg->input; 2035 2036 if (netlink_insert(sk, 0)) 2037 goto out_sock_release; 2038 2039 nlk = nlk_sk(sk); 2040 set_bit(NETLINK_F_KERNEL_SOCKET, &nlk->flags); 2041 2042 netlink_table_grab(); 2043 if (!nl_table[unit].registered) { 2044 nl_table[unit].groups = groups; 2045 rcu_assign_pointer(nl_table[unit].listeners, listeners); 2046 nl_table[unit].module = module; 2047 if (cfg) { 2048 nl_table[unit].bind = cfg->bind; 2049 nl_table[unit].unbind = cfg->unbind; 2050 nl_table[unit].release = cfg->release; 2051 nl_table[unit].flags = cfg->flags; 2052 } 2053 nl_table[unit].registered = 1; 2054 } else { 2055 kfree(listeners); 2056 nl_table[unit].registered++; 2057 } 2058 netlink_table_ungrab(); 2059 return sk; 2060 2061 out_sock_release: 2062 kfree(listeners); 2063 netlink_kernel_release(sk); 2064 return NULL; 2065 2066 out_sock_release_nosk: 2067 sock_release(sock); 2068 return NULL; 2069 } 2070 EXPORT_SYMBOL(__netlink_kernel_create); 2071 2072 void 2073 netlink_kernel_release(struct sock *sk) 2074 { 2075 if (sk == NULL || sk->sk_socket == NULL) 2076 return; 2077 2078 sock_release(sk->sk_socket); 2079 } 2080 EXPORT_SYMBOL(netlink_kernel_release); 2081 2082 int __netlink_change_ngroups(struct sock *sk, unsigned int groups) 2083 { 2084 struct listeners *new, *old; 2085 struct netlink_table *tbl = &nl_table[sk->sk_protocol]; 2086 2087 if (groups < 32) 2088 groups = 32; 2089 2090 if (NLGRPSZ(tbl->groups) < NLGRPSZ(groups)) { 2091 new = kzalloc(sizeof(*new) + NLGRPSZ(groups), GFP_ATOMIC); 2092 if (!new) 2093 return -ENOMEM; 2094 old = nl_deref_protected(tbl->listeners); 2095 memcpy(new->masks, old->masks, NLGRPSZ(tbl->groups)); 2096 rcu_assign_pointer(tbl->listeners, new); 2097 2098 kfree_rcu(old, rcu); 2099 } 2100 tbl->groups = groups; 2101 2102 return 0; 2103 } 2104 2105 /** 2106 * netlink_change_ngroups - change number of multicast groups 2107 * 2108 * This changes the number of multicast groups that are available 2109 * on a certain netlink family. Note that it is not possible to 2110 * change the number of groups to below 32. Also note that it does 2111 * not implicitly call netlink_clear_multicast_users() when the 2112 * number of groups is reduced. 2113 * 2114 * @sk: The kernel netlink socket, as returned by netlink_kernel_create(). 2115 * @groups: The new number of groups. 2116 */ 2117 int netlink_change_ngroups(struct sock *sk, unsigned int groups) 2118 { 2119 int err; 2120 2121 netlink_table_grab(); 2122 err = __netlink_change_ngroups(sk, groups); 2123 netlink_table_ungrab(); 2124 2125 return err; 2126 } 2127 2128 void __netlink_clear_multicast_users(struct sock *ksk, unsigned int group) 2129 { 2130 struct sock *sk; 2131 struct netlink_table *tbl = &nl_table[ksk->sk_protocol]; 2132 struct hlist_node *tmp; 2133 2134 sk_for_each_bound_safe(sk, tmp, &tbl->mc_list) 2135 netlink_update_socket_mc(nlk_sk(sk), group, 0); 2136 } 2137 2138 struct nlmsghdr * 2139 __nlmsg_put(struct sk_buff *skb, u32 portid, u32 seq, int type, int len, int flags) 2140 { 2141 struct nlmsghdr *nlh; 2142 int size = nlmsg_msg_size(len); 2143 2144 nlh = skb_put(skb, NLMSG_ALIGN(size)); 2145 nlh->nlmsg_type = type; 2146 nlh->nlmsg_len = size; 2147 nlh->nlmsg_flags = flags; 2148 nlh->nlmsg_pid = portid; 2149 nlh->nlmsg_seq = seq; 2150 if (!__builtin_constant_p(size) || NLMSG_ALIGN(size) - size != 0) 2151 memset(nlmsg_data(nlh) + len, 0, NLMSG_ALIGN(size) - size); 2152 return nlh; 2153 } 2154 EXPORT_SYMBOL(__nlmsg_put); 2155 2156 static size_t 2157 netlink_ack_tlv_len(struct netlink_sock *nlk, int err, 2158 const struct netlink_ext_ack *extack) 2159 { 2160 size_t tlvlen; 2161 2162 if (!extack || !test_bit(NETLINK_F_EXT_ACK, &nlk->flags)) 2163 return 0; 2164 2165 tlvlen = 0; 2166 if (extack->_msg) 2167 tlvlen += nla_total_size(strlen(extack->_msg) + 1); 2168 if (extack->cookie_len) 2169 tlvlen += nla_total_size(extack->cookie_len); 2170 2171 /* Following attributes are only reported as error (not warning) */ 2172 if (!err) 2173 return tlvlen; 2174 2175 if (extack->bad_attr) 2176 tlvlen += nla_total_size(sizeof(u32)); 2177 if (extack->policy) 2178 tlvlen += netlink_policy_dump_attr_size_estimate(extack->policy); 2179 if (extack->miss_type) 2180 tlvlen += nla_total_size(sizeof(u32)); 2181 if (extack->miss_nest) 2182 tlvlen += nla_total_size(sizeof(u32)); 2183 2184 return tlvlen; 2185 } 2186 2187 static bool nlmsg_check_in_payload(const struct nlmsghdr *nlh, const void *addr) 2188 { 2189 return !WARN_ON(addr < nlmsg_data(nlh) || 2190 addr - (const void *) nlh >= nlh->nlmsg_len); 2191 } 2192 2193 static void 2194 netlink_ack_tlv_fill(struct sk_buff *skb, const struct nlmsghdr *nlh, int err, 2195 const struct netlink_ext_ack *extack) 2196 { 2197 if (extack->_msg) 2198 WARN_ON(nla_put_string(skb, NLMSGERR_ATTR_MSG, extack->_msg)); 2199 if (extack->cookie_len) 2200 WARN_ON(nla_put(skb, NLMSGERR_ATTR_COOKIE, 2201 extack->cookie_len, extack->cookie)); 2202 2203 if (!err) 2204 return; 2205 2206 if (extack->bad_attr && nlmsg_check_in_payload(nlh, extack->bad_attr)) 2207 WARN_ON(nla_put_u32(skb, NLMSGERR_ATTR_OFFS, 2208 (u8 *)extack->bad_attr - (const u8 *)nlh)); 2209 if (extack->policy) 2210 netlink_policy_dump_write_attr(skb, extack->policy, 2211 NLMSGERR_ATTR_POLICY); 2212 if (extack->miss_type) 2213 WARN_ON(nla_put_u32(skb, NLMSGERR_ATTR_MISS_TYPE, 2214 extack->miss_type)); 2215 if (extack->miss_nest && nlmsg_check_in_payload(nlh, extack->miss_nest)) 2216 WARN_ON(nla_put_u32(skb, NLMSGERR_ATTR_MISS_NEST, 2217 (u8 *)extack->miss_nest - (const u8 *)nlh)); 2218 } 2219 2220 /* 2221 * It looks a bit ugly. 2222 * It would be better to create kernel thread. 2223 */ 2224 2225 static int netlink_dump_done(struct netlink_sock *nlk, struct sk_buff *skb, 2226 struct netlink_callback *cb, 2227 struct netlink_ext_ack *extack) 2228 { 2229 struct nlmsghdr *nlh; 2230 size_t extack_len; 2231 2232 nlh = nlmsg_put_answer(skb, cb, NLMSG_DONE, sizeof(nlk->dump_done_errno), 2233 NLM_F_MULTI | cb->answer_flags); 2234 if (WARN_ON(!nlh)) 2235 return -ENOBUFS; 2236 2237 nl_dump_check_consistent(cb, nlh); 2238 memcpy(nlmsg_data(nlh), &nlk->dump_done_errno, sizeof(nlk->dump_done_errno)); 2239 2240 extack_len = netlink_ack_tlv_len(nlk, nlk->dump_done_errno, extack); 2241 if (extack_len) { 2242 nlh->nlmsg_flags |= NLM_F_ACK_TLVS; 2243 if (skb_tailroom(skb) >= extack_len) { 2244 netlink_ack_tlv_fill(skb, cb->nlh, 2245 nlk->dump_done_errno, extack); 2246 nlmsg_end(skb, nlh); 2247 } 2248 } 2249 2250 return 0; 2251 } 2252 2253 static int netlink_dump(struct sock *sk, bool lock_taken) 2254 { 2255 struct netlink_sock *nlk = nlk_sk(sk); 2256 struct netlink_ext_ack extack = {}; 2257 struct netlink_callback *cb; 2258 struct sk_buff *skb = NULL; 2259 unsigned int rmem, rcvbuf; 2260 size_t max_recvmsg_len; 2261 struct module *module; 2262 int err = -ENOBUFS; 2263 int alloc_min_size; 2264 int alloc_size; 2265 2266 if (!lock_taken) 2267 mutex_lock(&nlk->nl_cb_mutex); 2268 if (!nlk->cb_running) { 2269 err = -EINVAL; 2270 goto errout_skb; 2271 } 2272 2273 /* NLMSG_GOODSIZE is small to avoid high order allocations being 2274 * required, but it makes sense to _attempt_ a 32KiB allocation 2275 * to reduce number of system calls on dump operations, if user 2276 * ever provided a big enough buffer. 2277 */ 2278 cb = &nlk->cb; 2279 alloc_min_size = max_t(int, cb->min_dump_alloc, NLMSG_GOODSIZE); 2280 2281 max_recvmsg_len = READ_ONCE(nlk->max_recvmsg_len); 2282 if (alloc_min_size < max_recvmsg_len) { 2283 alloc_size = max_recvmsg_len; 2284 skb = alloc_skb(alloc_size, 2285 (GFP_KERNEL & ~__GFP_DIRECT_RECLAIM) | 2286 __GFP_NOWARN | __GFP_NORETRY); 2287 } 2288 if (!skb) { 2289 alloc_size = alloc_min_size; 2290 skb = alloc_skb(alloc_size, GFP_KERNEL); 2291 } 2292 if (!skb) 2293 goto errout_skb; 2294 2295 rcvbuf = READ_ONCE(sk->sk_rcvbuf); 2296 rmem = atomic_add_return(skb->truesize, &sk->sk_rmem_alloc); 2297 if (rmem != skb->truesize && rmem >= rcvbuf) { 2298 atomic_sub(skb->truesize, &sk->sk_rmem_alloc); 2299 goto errout_skb; 2300 } 2301 2302 /* Trim skb to allocated size. User is expected to provide buffer as 2303 * large as max(min_dump_alloc, 32KiB (max_recvmsg_len capped at 2304 * netlink_recvmsg())). dump will pack as many smaller messages as 2305 * could fit within the allocated skb. skb is typically allocated 2306 * with larger space than required (could be as much as near 2x the 2307 * requested size with align to next power of 2 approach). Allowing 2308 * dump to use the excess space makes it difficult for a user to have a 2309 * reasonable static buffer based on the expected largest dump of a 2310 * single netdev. The outcome is MSG_TRUNC error. 2311 */ 2312 skb_reserve(skb, skb_tailroom(skb) - alloc_size); 2313 2314 /* Make sure malicious BPF programs can not read unitialized memory 2315 * from skb->head -> skb->data 2316 */ 2317 skb_reset_network_header(skb); 2318 skb_reset_mac_header(skb); 2319 2320 netlink_skb_set_owner_r(skb, sk); 2321 2322 if (nlk->dump_done_errno > 0) { 2323 cb->extack = &extack; 2324 2325 nlk->dump_done_errno = cb->dump(skb, cb); 2326 2327 /* EMSGSIZE plus something already in the skb means 2328 * that there's more to dump but current skb has filled up. 2329 * If the callback really wants to return EMSGSIZE to user space 2330 * it needs to do so again, on the next cb->dump() call, 2331 * without putting data in the skb. 2332 */ 2333 if (nlk->dump_done_errno == -EMSGSIZE && skb->len) 2334 nlk->dump_done_errno = skb->len; 2335 2336 cb->extack = NULL; 2337 } 2338 2339 if (nlk->dump_done_errno > 0 || 2340 skb_tailroom(skb) < nlmsg_total_size(sizeof(nlk->dump_done_errno))) { 2341 mutex_unlock(&nlk->nl_cb_mutex); 2342 2343 if (sk_filter(sk, skb)) 2344 kfree_skb(skb); 2345 else 2346 __netlink_sendskb(sk, skb); 2347 return 0; 2348 } 2349 2350 if (netlink_dump_done(nlk, skb, cb, &extack)) 2351 goto errout_skb; 2352 2353 #ifdef CONFIG_COMPAT_NETLINK_MESSAGES 2354 /* frag_list skb's data is used for compat tasks 2355 * and the regular skb's data for normal (non-compat) tasks. 2356 * See netlink_recvmsg(). 2357 */ 2358 if (unlikely(skb_shinfo(skb)->frag_list)) { 2359 if (netlink_dump_done(nlk, skb_shinfo(skb)->frag_list, cb, &extack)) 2360 goto errout_skb; 2361 } 2362 #endif 2363 2364 if (sk_filter(sk, skb)) 2365 kfree_skb(skb); 2366 else 2367 __netlink_sendskb(sk, skb); 2368 2369 if (cb->done) 2370 cb->done(cb); 2371 2372 WRITE_ONCE(nlk->cb_running, false); 2373 module = cb->module; 2374 skb = cb->skb; 2375 mutex_unlock(&nlk->nl_cb_mutex); 2376 module_put(module); 2377 consume_skb(skb); 2378 return 0; 2379 2380 errout_skb: 2381 mutex_unlock(&nlk->nl_cb_mutex); 2382 kfree_skb(skb); 2383 return err; 2384 } 2385 2386 int __netlink_dump_start(struct sock *ssk, struct sk_buff *skb, 2387 const struct nlmsghdr *nlh, 2388 struct netlink_dump_control *control) 2389 { 2390 struct netlink_callback *cb; 2391 struct netlink_sock *nlk; 2392 struct sock *sk; 2393 int ret; 2394 2395 refcount_inc(&skb->users); 2396 2397 sk = netlink_lookup(sock_net(ssk), ssk->sk_protocol, NETLINK_CB(skb).portid); 2398 if (sk == NULL) { 2399 ret = -ECONNREFUSED; 2400 goto error_free; 2401 } 2402 2403 nlk = nlk_sk(sk); 2404 mutex_lock(&nlk->nl_cb_mutex); 2405 /* A dump is in progress... */ 2406 if (nlk->cb_running) { 2407 ret = -EBUSY; 2408 goto error_unlock; 2409 } 2410 /* add reference of module which cb->dump belongs to */ 2411 if (!try_module_get(control->module)) { 2412 ret = -EPROTONOSUPPORT; 2413 goto error_unlock; 2414 } 2415 2416 cb = &nlk->cb; 2417 memset(cb, 0, sizeof(*cb)); 2418 cb->dump = control->dump; 2419 cb->done = control->done; 2420 cb->nlh = nlh; 2421 cb->data = control->data; 2422 cb->module = control->module; 2423 cb->min_dump_alloc = control->min_dump_alloc; 2424 cb->flags = control->flags; 2425 cb->skb = skb; 2426 2427 cb->strict_check = nlk_test_bit(STRICT_CHK, NETLINK_CB(skb).sk); 2428 2429 if (control->start) { 2430 cb->extack = control->extack; 2431 ret = control->start(cb); 2432 cb->extack = NULL; 2433 if (ret) 2434 goto error_put; 2435 } 2436 2437 WRITE_ONCE(nlk->cb_running, true); 2438 nlk->dump_done_errno = INT_MAX; 2439 2440 ret = netlink_dump(sk, true); 2441 2442 sock_put(sk); 2443 2444 if (ret) 2445 return ret; 2446 2447 /* We successfully started a dump, by returning -EINTR we 2448 * signal not to send ACK even if it was requested. 2449 */ 2450 return -EINTR; 2451 2452 error_put: 2453 module_put(control->module); 2454 error_unlock: 2455 sock_put(sk); 2456 mutex_unlock(&nlk->nl_cb_mutex); 2457 error_free: 2458 kfree_skb(skb); 2459 return ret; 2460 } 2461 EXPORT_SYMBOL(__netlink_dump_start); 2462 2463 void netlink_ack(struct sk_buff *in_skb, struct nlmsghdr *nlh, int err, 2464 const struct netlink_ext_ack *extack) 2465 { 2466 struct sk_buff *skb; 2467 struct nlmsghdr *rep; 2468 struct nlmsgerr *errmsg; 2469 size_t payload = sizeof(*errmsg); 2470 struct netlink_sock *nlk = nlk_sk(NETLINK_CB(in_skb).sk); 2471 unsigned int flags = 0; 2472 size_t tlvlen; 2473 2474 /* Error messages get the original request appended, unless the user 2475 * requests to cap the error message, and get extra error data if 2476 * requested. 2477 */ 2478 if (err && !test_bit(NETLINK_F_CAP_ACK, &nlk->flags)) 2479 payload += nlmsg_len(nlh); 2480 else 2481 flags |= NLM_F_CAPPED; 2482 2483 tlvlen = netlink_ack_tlv_len(nlk, err, extack); 2484 if (tlvlen) 2485 flags |= NLM_F_ACK_TLVS; 2486 2487 skb = nlmsg_new(payload + tlvlen, GFP_KERNEL); 2488 if (!skb) 2489 goto err_skb; 2490 2491 rep = nlmsg_put(skb, NETLINK_CB(in_skb).portid, nlh->nlmsg_seq, 2492 NLMSG_ERROR, sizeof(*errmsg), flags); 2493 if (!rep) 2494 goto err_bad_put; 2495 errmsg = nlmsg_data(rep); 2496 errmsg->error = err; 2497 errmsg->msg = *nlh; 2498 2499 if (!(flags & NLM_F_CAPPED)) { 2500 if (!nlmsg_append(skb, nlmsg_len(nlh))) 2501 goto err_bad_put; 2502 2503 memcpy(nlmsg_data(&errmsg->msg), nlmsg_data(nlh), 2504 nlmsg_len(nlh)); 2505 } 2506 2507 if (tlvlen) 2508 netlink_ack_tlv_fill(skb, nlh, err, extack); 2509 2510 nlmsg_end(skb, rep); 2511 2512 nlmsg_unicast(in_skb->sk, skb, NETLINK_CB(in_skb).portid); 2513 2514 return; 2515 2516 err_bad_put: 2517 nlmsg_free(skb); 2518 err_skb: 2519 WRITE_ONCE(NETLINK_CB(in_skb).sk->sk_err, ENOBUFS); 2520 sk_error_report(NETLINK_CB(in_skb).sk); 2521 } 2522 EXPORT_SYMBOL(netlink_ack); 2523 2524 int netlink_rcv_skb(struct sk_buff *skb, int (*cb)(struct sk_buff *, 2525 struct nlmsghdr *, 2526 struct netlink_ext_ack *)) 2527 { 2528 struct netlink_ext_ack extack; 2529 struct nlmsghdr *nlh; 2530 int err; 2531 2532 while (skb->len >= nlmsg_total_size(0)) { 2533 int msglen; 2534 2535 memset(&extack, 0, sizeof(extack)); 2536 nlh = nlmsg_hdr(skb); 2537 err = 0; 2538 2539 if (nlh->nlmsg_len < NLMSG_HDRLEN || skb->len < nlh->nlmsg_len) 2540 return 0; 2541 2542 /* Only requests are handled by the kernel */ 2543 if (!(nlh->nlmsg_flags & NLM_F_REQUEST)) 2544 goto ack; 2545 2546 /* Skip control messages */ 2547 if (nlh->nlmsg_type < NLMSG_MIN_TYPE) 2548 goto ack; 2549 2550 err = cb(skb, nlh, &extack); 2551 if (err == -EINTR) 2552 goto skip; 2553 2554 ack: 2555 if (nlh->nlmsg_flags & NLM_F_ACK || err) 2556 netlink_ack(skb, nlh, err, &extack); 2557 2558 skip: 2559 msglen = NLMSG_ALIGN(nlh->nlmsg_len); 2560 if (msglen > skb->len) 2561 msglen = skb->len; 2562 skb_pull(skb, msglen); 2563 } 2564 2565 return 0; 2566 } 2567 EXPORT_SYMBOL(netlink_rcv_skb); 2568 2569 /** 2570 * nlmsg_notify - send a notification netlink message 2571 * @sk: netlink socket to use 2572 * @skb: notification message 2573 * @portid: destination netlink portid for reports or 0 2574 * @group: destination multicast group or 0 2575 * @report: 1 to report back, 0 to disable 2576 * @flags: allocation flags 2577 */ 2578 int nlmsg_notify(struct sock *sk, struct sk_buff *skb, u32 portid, 2579 unsigned int group, int report, gfp_t flags) 2580 { 2581 int err = 0; 2582 2583 if (group) { 2584 int exclude_portid = 0; 2585 2586 if (report) { 2587 refcount_inc(&skb->users); 2588 exclude_portid = portid; 2589 } 2590 2591 /* errors reported via destination sk->sk_err, but propagate 2592 * delivery errors if NETLINK_BROADCAST_ERROR flag is set */ 2593 err = nlmsg_multicast(sk, skb, exclude_portid, group, flags); 2594 if (err == -ESRCH) 2595 err = 0; 2596 } 2597 2598 if (report) { 2599 int err2; 2600 2601 err2 = nlmsg_unicast(sk, skb, portid); 2602 if (!err) 2603 err = err2; 2604 } 2605 2606 return err; 2607 } 2608 EXPORT_SYMBOL(nlmsg_notify); 2609 2610 #ifdef CONFIG_PROC_FS 2611 struct nl_seq_iter { 2612 struct seq_net_private p; 2613 struct rhashtable_iter hti; 2614 int link; 2615 }; 2616 2617 static void netlink_walk_start(struct nl_seq_iter *iter) 2618 { 2619 rhashtable_walk_enter(&nl_table[iter->link].hash, &iter->hti); 2620 rhashtable_walk_start(&iter->hti); 2621 } 2622 2623 static void netlink_walk_stop(struct nl_seq_iter *iter) 2624 { 2625 rhashtable_walk_stop(&iter->hti); 2626 rhashtable_walk_exit(&iter->hti); 2627 } 2628 2629 static void *__netlink_seq_next(struct seq_file *seq) 2630 { 2631 struct nl_seq_iter *iter = seq->private; 2632 struct netlink_sock *nlk; 2633 2634 do { 2635 for (;;) { 2636 nlk = rhashtable_walk_next(&iter->hti); 2637 2638 if (IS_ERR(nlk)) { 2639 if (PTR_ERR(nlk) == -EAGAIN) 2640 continue; 2641 2642 return nlk; 2643 } 2644 2645 if (nlk) 2646 break; 2647 2648 netlink_walk_stop(iter); 2649 if (++iter->link >= MAX_LINKS) 2650 return NULL; 2651 2652 netlink_walk_start(iter); 2653 } 2654 } while (sock_net(&nlk->sk) != seq_file_net(seq)); 2655 2656 return nlk; 2657 } 2658 2659 static void *netlink_seq_start(struct seq_file *seq, loff_t *posp) 2660 __acquires(RCU) 2661 { 2662 struct nl_seq_iter *iter = seq->private; 2663 void *obj = SEQ_START_TOKEN; 2664 loff_t pos; 2665 2666 iter->link = 0; 2667 2668 netlink_walk_start(iter); 2669 2670 for (pos = *posp; pos && obj && !IS_ERR(obj); pos--) 2671 obj = __netlink_seq_next(seq); 2672 2673 return obj; 2674 } 2675 2676 static void *netlink_seq_next(struct seq_file *seq, void *v, loff_t *pos) 2677 { 2678 ++*pos; 2679 return __netlink_seq_next(seq); 2680 } 2681 2682 static void netlink_native_seq_stop(struct seq_file *seq, void *v) 2683 { 2684 struct nl_seq_iter *iter = seq->private; 2685 2686 if (iter->link >= MAX_LINKS) 2687 return; 2688 2689 netlink_walk_stop(iter); 2690 } 2691 2692 2693 static int netlink_native_seq_show(struct seq_file *seq, void *v) 2694 { 2695 if (v == SEQ_START_TOKEN) { 2696 seq_puts(seq, 2697 "sk Eth Pid Groups " 2698 "Rmem Wmem Dump Locks Drops Inode\n"); 2699 } else { 2700 struct sock *s = v; 2701 struct netlink_sock *nlk = nlk_sk(s); 2702 2703 seq_printf(seq, "%pK %-3d %-10u %08x %-8d %-8d %-5d %-8d %-8u %-8lu\n", 2704 s, 2705 s->sk_protocol, 2706 nlk->portid, 2707 nlk->groups ? (u32)nlk->groups[0] : 0, 2708 sk_rmem_alloc_get(s), 2709 sk_wmem_alloc_get(s), 2710 READ_ONCE(nlk->cb_running), 2711 refcount_read(&s->sk_refcnt), 2712 sk_drops_read(s), 2713 sock_i_ino(s) 2714 ); 2715 2716 } 2717 return 0; 2718 } 2719 2720 #ifdef CONFIG_BPF_SYSCALL 2721 struct bpf_iter__netlink { 2722 __bpf_md_ptr(struct bpf_iter_meta *, meta); 2723 __bpf_md_ptr(struct netlink_sock *, sk); 2724 }; 2725 2726 DEFINE_BPF_ITER_FUNC(netlink, struct bpf_iter_meta *meta, struct netlink_sock *sk) 2727 2728 static int netlink_prog_seq_show(struct bpf_prog *prog, 2729 struct bpf_iter_meta *meta, 2730 void *v) 2731 { 2732 struct bpf_iter__netlink ctx; 2733 2734 meta->seq_num--; /* skip SEQ_START_TOKEN */ 2735 ctx.meta = meta; 2736 ctx.sk = nlk_sk((struct sock *)v); 2737 return bpf_iter_run_prog(prog, &ctx); 2738 } 2739 2740 static int netlink_seq_show(struct seq_file *seq, void *v) 2741 { 2742 struct bpf_iter_meta meta; 2743 struct bpf_prog *prog; 2744 2745 meta.seq = seq; 2746 prog = bpf_iter_get_info(&meta, false); 2747 if (!prog) 2748 return netlink_native_seq_show(seq, v); 2749 2750 if (v != SEQ_START_TOKEN) 2751 return netlink_prog_seq_show(prog, &meta, v); 2752 2753 return 0; 2754 } 2755 2756 static void netlink_seq_stop(struct seq_file *seq, void *v) 2757 { 2758 struct bpf_iter_meta meta; 2759 struct bpf_prog *prog; 2760 2761 if (!v) { 2762 meta.seq = seq; 2763 prog = bpf_iter_get_info(&meta, true); 2764 if (prog) 2765 (void)netlink_prog_seq_show(prog, &meta, v); 2766 } 2767 2768 netlink_native_seq_stop(seq, v); 2769 } 2770 #else 2771 static int netlink_seq_show(struct seq_file *seq, void *v) 2772 { 2773 return netlink_native_seq_show(seq, v); 2774 } 2775 2776 static void netlink_seq_stop(struct seq_file *seq, void *v) 2777 { 2778 netlink_native_seq_stop(seq, v); 2779 } 2780 #endif 2781 2782 static const struct seq_operations netlink_seq_ops = { 2783 .start = netlink_seq_start, 2784 .next = netlink_seq_next, 2785 .stop = netlink_seq_stop, 2786 .show = netlink_seq_show, 2787 }; 2788 #endif 2789 2790 int netlink_register_notifier(struct notifier_block *nb) 2791 { 2792 return blocking_notifier_chain_register(&netlink_chain, nb); 2793 } 2794 EXPORT_SYMBOL(netlink_register_notifier); 2795 2796 int netlink_unregister_notifier(struct notifier_block *nb) 2797 { 2798 return blocking_notifier_chain_unregister(&netlink_chain, nb); 2799 } 2800 EXPORT_SYMBOL(netlink_unregister_notifier); 2801 2802 static const struct proto_ops netlink_ops = { 2803 .family = PF_NETLINK, 2804 .owner = THIS_MODULE, 2805 .release = netlink_release, 2806 .bind = netlink_bind, 2807 .connect = netlink_connect, 2808 .socketpair = sock_no_socketpair, 2809 .accept = sock_no_accept, 2810 .getname = netlink_getname, 2811 .poll = datagram_poll, 2812 .ioctl = netlink_ioctl, 2813 .listen = sock_no_listen, 2814 .shutdown = sock_no_shutdown, 2815 .setsockopt = netlink_setsockopt, 2816 .getsockopt = netlink_getsockopt, 2817 .sendmsg = netlink_sendmsg, 2818 .recvmsg = netlink_recvmsg, 2819 .mmap = sock_no_mmap, 2820 }; 2821 2822 static const struct net_proto_family netlink_family_ops = { 2823 .family = PF_NETLINK, 2824 .create = netlink_create, 2825 .owner = THIS_MODULE, /* for consistency 8) */ 2826 }; 2827 2828 static int __net_init netlink_net_init(struct net *net) 2829 { 2830 #ifdef CONFIG_PROC_FS 2831 if (!proc_create_net("netlink", 0, net->proc_net, &netlink_seq_ops, 2832 sizeof(struct nl_seq_iter))) 2833 return -ENOMEM; 2834 #endif 2835 return 0; 2836 } 2837 2838 static void __net_exit netlink_net_exit(struct net *net) 2839 { 2840 #ifdef CONFIG_PROC_FS 2841 remove_proc_entry("netlink", net->proc_net); 2842 #endif 2843 } 2844 2845 static void __init netlink_add_usersock_entry(void) 2846 { 2847 struct listeners *listeners; 2848 int groups = 32; 2849 2850 listeners = kzalloc(sizeof(*listeners) + NLGRPSZ(groups), GFP_KERNEL); 2851 if (!listeners) 2852 panic("netlink_add_usersock_entry: Cannot allocate listeners\n"); 2853 2854 netlink_table_grab(); 2855 2856 nl_table[NETLINK_USERSOCK].groups = groups; 2857 rcu_assign_pointer(nl_table[NETLINK_USERSOCK].listeners, listeners); 2858 nl_table[NETLINK_USERSOCK].module = THIS_MODULE; 2859 nl_table[NETLINK_USERSOCK].registered = 1; 2860 nl_table[NETLINK_USERSOCK].flags = NL_CFG_F_NONROOT_SEND; 2861 2862 netlink_table_ungrab(); 2863 } 2864 2865 static struct pernet_operations __net_initdata netlink_net_ops = { 2866 .init = netlink_net_init, 2867 .exit = netlink_net_exit, 2868 }; 2869 2870 static inline u32 netlink_hash(const void *data, u32 len, u32 seed) 2871 { 2872 const struct netlink_sock *nlk = data; 2873 struct netlink_compare_arg arg; 2874 2875 netlink_compare_arg_init(&arg, sock_net(&nlk->sk), nlk->portid); 2876 return jhash2((u32 *)&arg, netlink_compare_arg_len / sizeof(u32), seed); 2877 } 2878 2879 static const struct rhashtable_params netlink_rhashtable_params = { 2880 .head_offset = offsetof(struct netlink_sock, node), 2881 .key_len = netlink_compare_arg_len, 2882 .obj_hashfn = netlink_hash, 2883 .obj_cmpfn = netlink_compare, 2884 .automatic_shrinking = true, 2885 }; 2886 2887 #if defined(CONFIG_BPF_SYSCALL) && defined(CONFIG_PROC_FS) 2888 BTF_ID_LIST_SINGLE(btf_netlink_sock_id, struct, netlink_sock) 2889 2890 static const struct bpf_iter_seq_info netlink_seq_info = { 2891 .seq_ops = &netlink_seq_ops, 2892 .init_seq_private = bpf_iter_init_seq_net, 2893 .fini_seq_private = bpf_iter_fini_seq_net, 2894 .seq_priv_size = sizeof(struct nl_seq_iter), 2895 }; 2896 2897 static struct bpf_iter_reg netlink_reg_info = { 2898 .target = "netlink", 2899 .ctx_arg_info_size = 1, 2900 .ctx_arg_info = { 2901 { offsetof(struct bpf_iter__netlink, sk), 2902 PTR_TO_BTF_ID_OR_NULL }, 2903 }, 2904 .seq_info = &netlink_seq_info, 2905 }; 2906 2907 static int __init bpf_iter_register(void) 2908 { 2909 netlink_reg_info.ctx_arg_info[0].btf_id = *btf_netlink_sock_id; 2910 return bpf_iter_reg_target(&netlink_reg_info); 2911 } 2912 #endif 2913 2914 static int __init netlink_proto_init(void) 2915 { 2916 int i; 2917 int err = proto_register(&netlink_proto, 0); 2918 2919 if (err != 0) 2920 goto out; 2921 2922 #if defined(CONFIG_BPF_SYSCALL) && defined(CONFIG_PROC_FS) 2923 err = bpf_iter_register(); 2924 if (err) 2925 goto out; 2926 #endif 2927 2928 BUILD_BUG_ON(sizeof(struct netlink_skb_parms) > sizeof_field(struct sk_buff, cb)); 2929 2930 nl_table = kcalloc(MAX_LINKS, sizeof(*nl_table), GFP_KERNEL); 2931 if (!nl_table) 2932 goto panic; 2933 2934 for (i = 0; i < MAX_LINKS; i++) { 2935 if (rhashtable_init(&nl_table[i].hash, 2936 &netlink_rhashtable_params) < 0) 2937 goto panic; 2938 } 2939 2940 netlink_add_usersock_entry(); 2941 2942 sock_register(&netlink_family_ops); 2943 register_pernet_subsys(&netlink_net_ops); 2944 register_pernet_subsys(&netlink_tap_net_ops); 2945 /* The netlink device handler may be needed early. */ 2946 rtnetlink_init(); 2947 out: 2948 return err; 2949 panic: 2950 panic("netlink_init: Cannot allocate nl_table\n"); 2951 } 2952 2953 core_initcall(netlink_proto_init); 2954