1 /* 2 * NETLINK Kernel-user communication protocol. 3 * 4 * Authors: Alan Cox <alan@lxorguk.ukuu.org.uk> 5 * Alexey Kuznetsov <kuznet@ms2.inr.ac.ru> 6 * 7 * This program is free software; you can redistribute it and/or 8 * modify it under the terms of the GNU General Public License 9 * as published by the Free Software Foundation; either version 10 * 2 of the License, or (at your option) any later version. 11 * 12 * Tue Jun 26 14:36:48 MEST 2001 Herbert "herp" Rosmanith 13 * added netlink_proto_exit 14 * Tue Jan 22 18:32:44 BRST 2002 Arnaldo C. de Melo <acme@conectiva.com.br> 15 * use nlk_sk, as sk->protinfo is on a diet 8) 16 * Fri Jul 22 19:51:12 MEST 2005 Harald Welte <laforge@gnumonks.org> 17 * - inc module use count of module that owns 18 * the kernel socket in case userspace opens 19 * socket of same protocol 20 * - remove all module support, since netlink is 21 * mandatory if CONFIG_NET=y these days 22 */ 23 24 #include <linux/module.h> 25 26 #include <linux/capability.h> 27 #include <linux/kernel.h> 28 #include <linux/init.h> 29 #include <linux/signal.h> 30 #include <linux/sched.h> 31 #include <linux/errno.h> 32 #include <linux/string.h> 33 #include <linux/stat.h> 34 #include <linux/socket.h> 35 #include <linux/un.h> 36 #include <linux/fcntl.h> 37 #include <linux/termios.h> 38 #include <linux/sockios.h> 39 #include <linux/net.h> 40 #include <linux/fs.h> 41 #include <linux/slab.h> 42 #include <asm/uaccess.h> 43 #include <linux/skbuff.h> 44 #include <linux/netdevice.h> 45 #include <linux/rtnetlink.h> 46 #include <linux/proc_fs.h> 47 #include <linux/seq_file.h> 48 #include <linux/notifier.h> 49 #include <linux/security.h> 50 #include <linux/jhash.h> 51 #include <linux/jiffies.h> 52 #include <linux/random.h> 53 #include <linux/bitops.h> 54 #include <linux/mm.h> 55 #include <linux/types.h> 56 #include <linux/audit.h> 57 #include <linux/mutex.h> 58 59 #include <net/net_namespace.h> 60 #include <net/sock.h> 61 #include <net/scm.h> 62 #include <net/netlink.h> 63 64 #define NLGRPSZ(x) (ALIGN(x, sizeof(unsigned long) * 8) / 8) 65 #define NLGRPLONGS(x) (NLGRPSZ(x)/sizeof(unsigned long)) 66 67 struct netlink_sock { 68 /* struct sock has to be the first member of netlink_sock */ 69 struct sock sk; 70 u32 pid; 71 u32 dst_pid; 72 u32 dst_group; 73 u32 flags; 74 u32 subscriptions; 75 u32 ngroups; 76 unsigned long *groups; 77 unsigned long state; 78 wait_queue_head_t wait; 79 struct netlink_callback *cb; 80 struct mutex *cb_mutex; 81 struct mutex cb_def_mutex; 82 void (*netlink_rcv)(struct sk_buff *skb); 83 struct module *module; 84 }; 85 86 #define NETLINK_KERNEL_SOCKET 0x1 87 #define NETLINK_RECV_PKTINFO 0x2 88 89 static inline struct netlink_sock *nlk_sk(struct sock *sk) 90 { 91 return container_of(sk, struct netlink_sock, sk); 92 } 93 94 static inline int netlink_is_kernel(struct sock *sk) 95 { 96 return nlk_sk(sk)->flags & NETLINK_KERNEL_SOCKET; 97 } 98 99 struct nl_pid_hash { 100 struct hlist_head *table; 101 unsigned long rehash_time; 102 103 unsigned int mask; 104 unsigned int shift; 105 106 unsigned int entries; 107 unsigned int max_shift; 108 109 u32 rnd; 110 }; 111 112 struct netlink_table { 113 struct nl_pid_hash hash; 114 struct hlist_head mc_list; 115 unsigned long *listeners; 116 unsigned int nl_nonroot; 117 unsigned int groups; 118 struct mutex *cb_mutex; 119 struct module *module; 120 int registered; 121 }; 122 123 static struct netlink_table *nl_table; 124 125 static DECLARE_WAIT_QUEUE_HEAD(nl_table_wait); 126 127 static int netlink_dump(struct sock *sk); 128 static void netlink_destroy_callback(struct netlink_callback *cb); 129 130 static DEFINE_RWLOCK(nl_table_lock); 131 static atomic_t nl_table_users = ATOMIC_INIT(0); 132 133 static ATOMIC_NOTIFIER_HEAD(netlink_chain); 134 135 static u32 netlink_group_mask(u32 group) 136 { 137 return group ? 1 << (group - 1) : 0; 138 } 139 140 static struct hlist_head *nl_pid_hashfn(struct nl_pid_hash *hash, u32 pid) 141 { 142 return &hash->table[jhash_1word(pid, hash->rnd) & hash->mask]; 143 } 144 145 static void netlink_sock_destruct(struct sock *sk) 146 { 147 struct netlink_sock *nlk = nlk_sk(sk); 148 149 if (nlk->cb) { 150 if (nlk->cb->done) 151 nlk->cb->done(nlk->cb); 152 netlink_destroy_callback(nlk->cb); 153 } 154 155 skb_queue_purge(&sk->sk_receive_queue); 156 157 if (!sock_flag(sk, SOCK_DEAD)) { 158 printk(KERN_ERR "Freeing alive netlink socket %p\n", sk); 159 return; 160 } 161 162 WARN_ON(atomic_read(&sk->sk_rmem_alloc)); 163 WARN_ON(atomic_read(&sk->sk_wmem_alloc)); 164 WARN_ON(nlk_sk(sk)->groups); 165 } 166 167 /* This lock without WQ_FLAG_EXCLUSIVE is good on UP and it is _very_ bad on 168 * SMP. Look, when several writers sleep and reader wakes them up, all but one 169 * immediately hit write lock and grab all the cpus. Exclusive sleep solves 170 * this, _but_ remember, it adds useless work on UP machines. 171 */ 172 173 static void netlink_table_grab(void) 174 __acquires(nl_table_lock) 175 { 176 write_lock_irq(&nl_table_lock); 177 178 if (atomic_read(&nl_table_users)) { 179 DECLARE_WAITQUEUE(wait, current); 180 181 add_wait_queue_exclusive(&nl_table_wait, &wait); 182 for (;;) { 183 set_current_state(TASK_UNINTERRUPTIBLE); 184 if (atomic_read(&nl_table_users) == 0) 185 break; 186 write_unlock_irq(&nl_table_lock); 187 schedule(); 188 write_lock_irq(&nl_table_lock); 189 } 190 191 __set_current_state(TASK_RUNNING); 192 remove_wait_queue(&nl_table_wait, &wait); 193 } 194 } 195 196 static void netlink_table_ungrab(void) 197 __releases(nl_table_lock) 198 { 199 write_unlock_irq(&nl_table_lock); 200 wake_up(&nl_table_wait); 201 } 202 203 static inline void 204 netlink_lock_table(void) 205 { 206 /* read_lock() synchronizes us to netlink_table_grab */ 207 208 read_lock(&nl_table_lock); 209 atomic_inc(&nl_table_users); 210 read_unlock(&nl_table_lock); 211 } 212 213 static inline void 214 netlink_unlock_table(void) 215 { 216 if (atomic_dec_and_test(&nl_table_users)) 217 wake_up(&nl_table_wait); 218 } 219 220 static inline struct sock *netlink_lookup(struct net *net, int protocol, 221 u32 pid) 222 { 223 struct nl_pid_hash *hash = &nl_table[protocol].hash; 224 struct hlist_head *head; 225 struct sock *sk; 226 struct hlist_node *node; 227 228 read_lock(&nl_table_lock); 229 head = nl_pid_hashfn(hash, pid); 230 sk_for_each(sk, node, head) { 231 if (net_eq(sock_net(sk), net) && (nlk_sk(sk)->pid == pid)) { 232 sock_hold(sk); 233 goto found; 234 } 235 } 236 sk = NULL; 237 found: 238 read_unlock(&nl_table_lock); 239 return sk; 240 } 241 242 static inline struct hlist_head *nl_pid_hash_zalloc(size_t size) 243 { 244 if (size <= PAGE_SIZE) 245 return kzalloc(size, GFP_ATOMIC); 246 else 247 return (struct hlist_head *) 248 __get_free_pages(GFP_ATOMIC | __GFP_ZERO, 249 get_order(size)); 250 } 251 252 static inline void nl_pid_hash_free(struct hlist_head *table, size_t size) 253 { 254 if (size <= PAGE_SIZE) 255 kfree(table); 256 else 257 free_pages((unsigned long)table, get_order(size)); 258 } 259 260 static int nl_pid_hash_rehash(struct nl_pid_hash *hash, int grow) 261 { 262 unsigned int omask, mask, shift; 263 size_t osize, size; 264 struct hlist_head *otable, *table; 265 int i; 266 267 omask = mask = hash->mask; 268 osize = size = (mask + 1) * sizeof(*table); 269 shift = hash->shift; 270 271 if (grow) { 272 if (++shift > hash->max_shift) 273 return 0; 274 mask = mask * 2 + 1; 275 size *= 2; 276 } 277 278 table = nl_pid_hash_zalloc(size); 279 if (!table) 280 return 0; 281 282 otable = hash->table; 283 hash->table = table; 284 hash->mask = mask; 285 hash->shift = shift; 286 get_random_bytes(&hash->rnd, sizeof(hash->rnd)); 287 288 for (i = 0; i <= omask; i++) { 289 struct sock *sk; 290 struct hlist_node *node, *tmp; 291 292 sk_for_each_safe(sk, node, tmp, &otable[i]) 293 __sk_add_node(sk, nl_pid_hashfn(hash, nlk_sk(sk)->pid)); 294 } 295 296 nl_pid_hash_free(otable, osize); 297 hash->rehash_time = jiffies + 10 * 60 * HZ; 298 return 1; 299 } 300 301 static inline int nl_pid_hash_dilute(struct nl_pid_hash *hash, int len) 302 { 303 int avg = hash->entries >> hash->shift; 304 305 if (unlikely(avg > 1) && nl_pid_hash_rehash(hash, 1)) 306 return 1; 307 308 if (unlikely(len > avg) && time_after(jiffies, hash->rehash_time)) { 309 nl_pid_hash_rehash(hash, 0); 310 return 1; 311 } 312 313 return 0; 314 } 315 316 static const struct proto_ops netlink_ops; 317 318 static void 319 netlink_update_listeners(struct sock *sk) 320 { 321 struct netlink_table *tbl = &nl_table[sk->sk_protocol]; 322 struct hlist_node *node; 323 unsigned long mask; 324 unsigned int i; 325 326 for (i = 0; i < NLGRPLONGS(tbl->groups); i++) { 327 mask = 0; 328 sk_for_each_bound(sk, node, &tbl->mc_list) { 329 if (i < NLGRPLONGS(nlk_sk(sk)->ngroups)) 330 mask |= nlk_sk(sk)->groups[i]; 331 } 332 tbl->listeners[i] = mask; 333 } 334 /* this function is only called with the netlink table "grabbed", which 335 * makes sure updates are visible before bind or setsockopt return. */ 336 } 337 338 static int netlink_insert(struct sock *sk, struct net *net, u32 pid) 339 { 340 struct nl_pid_hash *hash = &nl_table[sk->sk_protocol].hash; 341 struct hlist_head *head; 342 int err = -EADDRINUSE; 343 struct sock *osk; 344 struct hlist_node *node; 345 int len; 346 347 netlink_table_grab(); 348 head = nl_pid_hashfn(hash, pid); 349 len = 0; 350 sk_for_each(osk, node, head) { 351 if (net_eq(sock_net(osk), net) && (nlk_sk(osk)->pid == pid)) 352 break; 353 len++; 354 } 355 if (node) 356 goto err; 357 358 err = -EBUSY; 359 if (nlk_sk(sk)->pid) 360 goto err; 361 362 err = -ENOMEM; 363 if (BITS_PER_LONG > 32 && unlikely(hash->entries >= UINT_MAX)) 364 goto err; 365 366 if (len && nl_pid_hash_dilute(hash, len)) 367 head = nl_pid_hashfn(hash, pid); 368 hash->entries++; 369 nlk_sk(sk)->pid = pid; 370 sk_add_node(sk, head); 371 err = 0; 372 373 err: 374 netlink_table_ungrab(); 375 return err; 376 } 377 378 static void netlink_remove(struct sock *sk) 379 { 380 netlink_table_grab(); 381 if (sk_del_node_init(sk)) 382 nl_table[sk->sk_protocol].hash.entries--; 383 if (nlk_sk(sk)->subscriptions) 384 __sk_del_bind_node(sk); 385 netlink_table_ungrab(); 386 } 387 388 static struct proto netlink_proto = { 389 .name = "NETLINK", 390 .owner = THIS_MODULE, 391 .obj_size = sizeof(struct netlink_sock), 392 }; 393 394 static int __netlink_create(struct net *net, struct socket *sock, 395 struct mutex *cb_mutex, int protocol) 396 { 397 struct sock *sk; 398 struct netlink_sock *nlk; 399 400 sock->ops = &netlink_ops; 401 402 sk = sk_alloc(net, PF_NETLINK, GFP_KERNEL, &netlink_proto); 403 if (!sk) 404 return -ENOMEM; 405 406 sock_init_data(sock, sk); 407 408 nlk = nlk_sk(sk); 409 if (cb_mutex) 410 nlk->cb_mutex = cb_mutex; 411 else { 412 nlk->cb_mutex = &nlk->cb_def_mutex; 413 mutex_init(nlk->cb_mutex); 414 } 415 init_waitqueue_head(&nlk->wait); 416 417 sk->sk_destruct = netlink_sock_destruct; 418 sk->sk_protocol = protocol; 419 return 0; 420 } 421 422 static int netlink_create(struct net *net, struct socket *sock, int protocol) 423 { 424 struct module *module = NULL; 425 struct mutex *cb_mutex; 426 struct netlink_sock *nlk; 427 int err = 0; 428 429 sock->state = SS_UNCONNECTED; 430 431 if (sock->type != SOCK_RAW && sock->type != SOCK_DGRAM) 432 return -ESOCKTNOSUPPORT; 433 434 if (protocol < 0 || protocol >= MAX_LINKS) 435 return -EPROTONOSUPPORT; 436 437 netlink_lock_table(); 438 #ifdef CONFIG_MODULES 439 if (!nl_table[protocol].registered) { 440 netlink_unlock_table(); 441 request_module("net-pf-%d-proto-%d", PF_NETLINK, protocol); 442 netlink_lock_table(); 443 } 444 #endif 445 if (nl_table[protocol].registered && 446 try_module_get(nl_table[protocol].module)) 447 module = nl_table[protocol].module; 448 cb_mutex = nl_table[protocol].cb_mutex; 449 netlink_unlock_table(); 450 451 err = __netlink_create(net, sock, cb_mutex, protocol); 452 if (err < 0) 453 goto out_module; 454 455 local_bh_disable(); 456 sock_prot_inuse_add(net, &netlink_proto, 1); 457 local_bh_enable(); 458 459 nlk = nlk_sk(sock->sk); 460 nlk->module = module; 461 out: 462 return err; 463 464 out_module: 465 module_put(module); 466 goto out; 467 } 468 469 static int netlink_release(struct socket *sock) 470 { 471 struct sock *sk = sock->sk; 472 struct netlink_sock *nlk; 473 474 if (!sk) 475 return 0; 476 477 netlink_remove(sk); 478 sock_orphan(sk); 479 nlk = nlk_sk(sk); 480 481 /* 482 * OK. Socket is unlinked, any packets that arrive now 483 * will be purged. 484 */ 485 486 sock->sk = NULL; 487 wake_up_interruptible_all(&nlk->wait); 488 489 skb_queue_purge(&sk->sk_write_queue); 490 491 if (nlk->pid && !nlk->subscriptions) { 492 struct netlink_notify n = { 493 .net = sock_net(sk), 494 .protocol = sk->sk_protocol, 495 .pid = nlk->pid, 496 }; 497 atomic_notifier_call_chain(&netlink_chain, 498 NETLINK_URELEASE, &n); 499 } 500 501 module_put(nlk->module); 502 503 netlink_table_grab(); 504 if (netlink_is_kernel(sk)) { 505 BUG_ON(nl_table[sk->sk_protocol].registered == 0); 506 if (--nl_table[sk->sk_protocol].registered == 0) { 507 kfree(nl_table[sk->sk_protocol].listeners); 508 nl_table[sk->sk_protocol].module = NULL; 509 nl_table[sk->sk_protocol].registered = 0; 510 } 511 } else if (nlk->subscriptions) 512 netlink_update_listeners(sk); 513 netlink_table_ungrab(); 514 515 kfree(nlk->groups); 516 nlk->groups = NULL; 517 518 local_bh_disable(); 519 sock_prot_inuse_add(sock_net(sk), &netlink_proto, -1); 520 local_bh_enable(); 521 sock_put(sk); 522 return 0; 523 } 524 525 static int netlink_autobind(struct socket *sock) 526 { 527 struct sock *sk = sock->sk; 528 struct net *net = sock_net(sk); 529 struct nl_pid_hash *hash = &nl_table[sk->sk_protocol].hash; 530 struct hlist_head *head; 531 struct sock *osk; 532 struct hlist_node *node; 533 s32 pid = current->tgid; 534 int err; 535 static s32 rover = -4097; 536 537 retry: 538 cond_resched(); 539 netlink_table_grab(); 540 head = nl_pid_hashfn(hash, pid); 541 sk_for_each(osk, node, head) { 542 if (!net_eq(sock_net(osk), net)) 543 continue; 544 if (nlk_sk(osk)->pid == pid) { 545 /* Bind collision, search negative pid values. */ 546 pid = rover--; 547 if (rover > -4097) 548 rover = -4097; 549 netlink_table_ungrab(); 550 goto retry; 551 } 552 } 553 netlink_table_ungrab(); 554 555 err = netlink_insert(sk, net, pid); 556 if (err == -EADDRINUSE) 557 goto retry; 558 559 /* If 2 threads race to autobind, that is fine. */ 560 if (err == -EBUSY) 561 err = 0; 562 563 return err; 564 } 565 566 static inline int netlink_capable(struct socket *sock, unsigned int flag) 567 { 568 return (nl_table[sock->sk->sk_protocol].nl_nonroot & flag) || 569 capable(CAP_NET_ADMIN); 570 } 571 572 static void 573 netlink_update_subscriptions(struct sock *sk, unsigned int subscriptions) 574 { 575 struct netlink_sock *nlk = nlk_sk(sk); 576 577 if (nlk->subscriptions && !subscriptions) 578 __sk_del_bind_node(sk); 579 else if (!nlk->subscriptions && subscriptions) 580 sk_add_bind_node(sk, &nl_table[sk->sk_protocol].mc_list); 581 nlk->subscriptions = subscriptions; 582 } 583 584 static int netlink_realloc_groups(struct sock *sk) 585 { 586 struct netlink_sock *nlk = nlk_sk(sk); 587 unsigned int groups; 588 unsigned long *new_groups; 589 int err = 0; 590 591 netlink_table_grab(); 592 593 groups = nl_table[sk->sk_protocol].groups; 594 if (!nl_table[sk->sk_protocol].registered) { 595 err = -ENOENT; 596 goto out_unlock; 597 } 598 599 if (nlk->ngroups >= groups) 600 goto out_unlock; 601 602 new_groups = krealloc(nlk->groups, NLGRPSZ(groups), GFP_ATOMIC); 603 if (new_groups == NULL) { 604 err = -ENOMEM; 605 goto out_unlock; 606 } 607 memset((char *)new_groups + NLGRPSZ(nlk->ngroups), 0, 608 NLGRPSZ(groups) - NLGRPSZ(nlk->ngroups)); 609 610 nlk->groups = new_groups; 611 nlk->ngroups = groups; 612 out_unlock: 613 netlink_table_ungrab(); 614 return err; 615 } 616 617 static int netlink_bind(struct socket *sock, struct sockaddr *addr, 618 int addr_len) 619 { 620 struct sock *sk = sock->sk; 621 struct net *net = sock_net(sk); 622 struct netlink_sock *nlk = nlk_sk(sk); 623 struct sockaddr_nl *nladdr = (struct sockaddr_nl *)addr; 624 int err; 625 626 if (nladdr->nl_family != AF_NETLINK) 627 return -EINVAL; 628 629 /* Only superuser is allowed to listen multicasts */ 630 if (nladdr->nl_groups) { 631 if (!netlink_capable(sock, NL_NONROOT_RECV)) 632 return -EPERM; 633 err = netlink_realloc_groups(sk); 634 if (err) 635 return err; 636 } 637 638 if (nlk->pid) { 639 if (nladdr->nl_pid != nlk->pid) 640 return -EINVAL; 641 } else { 642 err = nladdr->nl_pid ? 643 netlink_insert(sk, net, nladdr->nl_pid) : 644 netlink_autobind(sock); 645 if (err) 646 return err; 647 } 648 649 if (!nladdr->nl_groups && (nlk->groups == NULL || !(u32)nlk->groups[0])) 650 return 0; 651 652 netlink_table_grab(); 653 netlink_update_subscriptions(sk, nlk->subscriptions + 654 hweight32(nladdr->nl_groups) - 655 hweight32(nlk->groups[0])); 656 nlk->groups[0] = (nlk->groups[0] & ~0xffffffffUL) | nladdr->nl_groups; 657 netlink_update_listeners(sk); 658 netlink_table_ungrab(); 659 660 return 0; 661 } 662 663 static int netlink_connect(struct socket *sock, struct sockaddr *addr, 664 int alen, int flags) 665 { 666 int err = 0; 667 struct sock *sk = sock->sk; 668 struct netlink_sock *nlk = nlk_sk(sk); 669 struct sockaddr_nl *nladdr = (struct sockaddr_nl *)addr; 670 671 if (addr->sa_family == AF_UNSPEC) { 672 sk->sk_state = NETLINK_UNCONNECTED; 673 nlk->dst_pid = 0; 674 nlk->dst_group = 0; 675 return 0; 676 } 677 if (addr->sa_family != AF_NETLINK) 678 return -EINVAL; 679 680 /* Only superuser is allowed to send multicasts */ 681 if (nladdr->nl_groups && !netlink_capable(sock, NL_NONROOT_SEND)) 682 return -EPERM; 683 684 if (!nlk->pid) 685 err = netlink_autobind(sock); 686 687 if (err == 0) { 688 sk->sk_state = NETLINK_CONNECTED; 689 nlk->dst_pid = nladdr->nl_pid; 690 nlk->dst_group = ffs(nladdr->nl_groups); 691 } 692 693 return err; 694 } 695 696 static int netlink_getname(struct socket *sock, struct sockaddr *addr, 697 int *addr_len, int peer) 698 { 699 struct sock *sk = sock->sk; 700 struct netlink_sock *nlk = nlk_sk(sk); 701 struct sockaddr_nl *nladdr = (struct sockaddr_nl *)addr; 702 703 nladdr->nl_family = AF_NETLINK; 704 nladdr->nl_pad = 0; 705 *addr_len = sizeof(*nladdr); 706 707 if (peer) { 708 nladdr->nl_pid = nlk->dst_pid; 709 nladdr->nl_groups = netlink_group_mask(nlk->dst_group); 710 } else { 711 nladdr->nl_pid = nlk->pid; 712 nladdr->nl_groups = nlk->groups ? nlk->groups[0] : 0; 713 } 714 return 0; 715 } 716 717 static void netlink_overrun(struct sock *sk) 718 { 719 if (!test_and_set_bit(0, &nlk_sk(sk)->state)) { 720 sk->sk_err = ENOBUFS; 721 sk->sk_error_report(sk); 722 } 723 } 724 725 static struct sock *netlink_getsockbypid(struct sock *ssk, u32 pid) 726 { 727 struct sock *sock; 728 struct netlink_sock *nlk; 729 730 sock = netlink_lookup(sock_net(ssk), ssk->sk_protocol, pid); 731 if (!sock) 732 return ERR_PTR(-ECONNREFUSED); 733 734 /* Don't bother queuing skb if kernel socket has no input function */ 735 nlk = nlk_sk(sock); 736 if (sock->sk_state == NETLINK_CONNECTED && 737 nlk->dst_pid != nlk_sk(ssk)->pid) { 738 sock_put(sock); 739 return ERR_PTR(-ECONNREFUSED); 740 } 741 return sock; 742 } 743 744 struct sock *netlink_getsockbyfilp(struct file *filp) 745 { 746 struct inode *inode = filp->f_path.dentry->d_inode; 747 struct sock *sock; 748 749 if (!S_ISSOCK(inode->i_mode)) 750 return ERR_PTR(-ENOTSOCK); 751 752 sock = SOCKET_I(inode)->sk; 753 if (sock->sk_family != AF_NETLINK) 754 return ERR_PTR(-EINVAL); 755 756 sock_hold(sock); 757 return sock; 758 } 759 760 /* 761 * Attach a skb to a netlink socket. 762 * The caller must hold a reference to the destination socket. On error, the 763 * reference is dropped. The skb is not send to the destination, just all 764 * all error checks are performed and memory in the queue is reserved. 765 * Return values: 766 * < 0: error. skb freed, reference to sock dropped. 767 * 0: continue 768 * 1: repeat lookup - reference dropped while waiting for socket memory. 769 */ 770 int netlink_attachskb(struct sock *sk, struct sk_buff *skb, 771 long *timeo, struct sock *ssk) 772 { 773 struct netlink_sock *nlk; 774 775 nlk = nlk_sk(sk); 776 777 if (atomic_read(&sk->sk_rmem_alloc) > sk->sk_rcvbuf || 778 test_bit(0, &nlk->state)) { 779 DECLARE_WAITQUEUE(wait, current); 780 if (!*timeo) { 781 if (!ssk || netlink_is_kernel(ssk)) 782 netlink_overrun(sk); 783 sock_put(sk); 784 kfree_skb(skb); 785 return -EAGAIN; 786 } 787 788 __set_current_state(TASK_INTERRUPTIBLE); 789 add_wait_queue(&nlk->wait, &wait); 790 791 if ((atomic_read(&sk->sk_rmem_alloc) > sk->sk_rcvbuf || 792 test_bit(0, &nlk->state)) && 793 !sock_flag(sk, SOCK_DEAD)) 794 *timeo = schedule_timeout(*timeo); 795 796 __set_current_state(TASK_RUNNING); 797 remove_wait_queue(&nlk->wait, &wait); 798 sock_put(sk); 799 800 if (signal_pending(current)) { 801 kfree_skb(skb); 802 return sock_intr_errno(*timeo); 803 } 804 return 1; 805 } 806 skb_set_owner_r(skb, sk); 807 return 0; 808 } 809 810 int netlink_sendskb(struct sock *sk, struct sk_buff *skb) 811 { 812 int len = skb->len; 813 814 skb_queue_tail(&sk->sk_receive_queue, skb); 815 sk->sk_data_ready(sk, len); 816 sock_put(sk); 817 return len; 818 } 819 820 void netlink_detachskb(struct sock *sk, struct sk_buff *skb) 821 { 822 kfree_skb(skb); 823 sock_put(sk); 824 } 825 826 static inline struct sk_buff *netlink_trim(struct sk_buff *skb, 827 gfp_t allocation) 828 { 829 int delta; 830 831 skb_orphan(skb); 832 833 delta = skb->end - skb->tail; 834 if (delta * 2 < skb->truesize) 835 return skb; 836 837 if (skb_shared(skb)) { 838 struct sk_buff *nskb = skb_clone(skb, allocation); 839 if (!nskb) 840 return skb; 841 kfree_skb(skb); 842 skb = nskb; 843 } 844 845 if (!pskb_expand_head(skb, 0, -delta, allocation)) 846 skb->truesize -= delta; 847 848 return skb; 849 } 850 851 static inline void netlink_rcv_wake(struct sock *sk) 852 { 853 struct netlink_sock *nlk = nlk_sk(sk); 854 855 if (skb_queue_empty(&sk->sk_receive_queue)) 856 clear_bit(0, &nlk->state); 857 if (!test_bit(0, &nlk->state)) 858 wake_up_interruptible(&nlk->wait); 859 } 860 861 static inline int netlink_unicast_kernel(struct sock *sk, struct sk_buff *skb) 862 { 863 int ret; 864 struct netlink_sock *nlk = nlk_sk(sk); 865 866 ret = -ECONNREFUSED; 867 if (nlk->netlink_rcv != NULL) { 868 ret = skb->len; 869 skb_set_owner_r(skb, sk); 870 nlk->netlink_rcv(skb); 871 } 872 kfree_skb(skb); 873 sock_put(sk); 874 return ret; 875 } 876 877 int netlink_unicast(struct sock *ssk, struct sk_buff *skb, 878 u32 pid, int nonblock) 879 { 880 struct sock *sk; 881 int err; 882 long timeo; 883 884 skb = netlink_trim(skb, gfp_any()); 885 886 timeo = sock_sndtimeo(ssk, nonblock); 887 retry: 888 sk = netlink_getsockbypid(ssk, pid); 889 if (IS_ERR(sk)) { 890 kfree_skb(skb); 891 return PTR_ERR(sk); 892 } 893 if (netlink_is_kernel(sk)) 894 return netlink_unicast_kernel(sk, skb); 895 896 if (sk_filter(sk, skb)) { 897 err = skb->len; 898 kfree_skb(skb); 899 sock_put(sk); 900 return err; 901 } 902 903 err = netlink_attachskb(sk, skb, &timeo, ssk); 904 if (err == 1) 905 goto retry; 906 if (err) 907 return err; 908 909 return netlink_sendskb(sk, skb); 910 } 911 EXPORT_SYMBOL(netlink_unicast); 912 913 int netlink_has_listeners(struct sock *sk, unsigned int group) 914 { 915 int res = 0; 916 unsigned long *listeners; 917 918 BUG_ON(!netlink_is_kernel(sk)); 919 920 rcu_read_lock(); 921 listeners = rcu_dereference(nl_table[sk->sk_protocol].listeners); 922 923 if (group - 1 < nl_table[sk->sk_protocol].groups) 924 res = test_bit(group - 1, listeners); 925 926 rcu_read_unlock(); 927 928 return res; 929 } 930 EXPORT_SYMBOL_GPL(netlink_has_listeners); 931 932 static inline int netlink_broadcast_deliver(struct sock *sk, 933 struct sk_buff *skb) 934 { 935 struct netlink_sock *nlk = nlk_sk(sk); 936 937 if (atomic_read(&sk->sk_rmem_alloc) <= sk->sk_rcvbuf && 938 !test_bit(0, &nlk->state)) { 939 skb_set_owner_r(skb, sk); 940 skb_queue_tail(&sk->sk_receive_queue, skb); 941 sk->sk_data_ready(sk, skb->len); 942 return atomic_read(&sk->sk_rmem_alloc) > sk->sk_rcvbuf; 943 } 944 return -1; 945 } 946 947 struct netlink_broadcast_data { 948 struct sock *exclude_sk; 949 struct net *net; 950 u32 pid; 951 u32 group; 952 int failure; 953 int congested; 954 int delivered; 955 gfp_t allocation; 956 struct sk_buff *skb, *skb2; 957 }; 958 959 static inline int do_one_broadcast(struct sock *sk, 960 struct netlink_broadcast_data *p) 961 { 962 struct netlink_sock *nlk = nlk_sk(sk); 963 int val; 964 965 if (p->exclude_sk == sk) 966 goto out; 967 968 if (nlk->pid == p->pid || p->group - 1 >= nlk->ngroups || 969 !test_bit(p->group - 1, nlk->groups)) 970 goto out; 971 972 if (!net_eq(sock_net(sk), p->net)) 973 goto out; 974 975 if (p->failure) { 976 netlink_overrun(sk); 977 goto out; 978 } 979 980 sock_hold(sk); 981 if (p->skb2 == NULL) { 982 if (skb_shared(p->skb)) { 983 p->skb2 = skb_clone(p->skb, p->allocation); 984 } else { 985 p->skb2 = skb_get(p->skb); 986 /* 987 * skb ownership may have been set when 988 * delivered to a previous socket. 989 */ 990 skb_orphan(p->skb2); 991 } 992 } 993 if (p->skb2 == NULL) { 994 netlink_overrun(sk); 995 /* Clone failed. Notify ALL listeners. */ 996 p->failure = 1; 997 } else if (sk_filter(sk, p->skb2)) { 998 kfree_skb(p->skb2); 999 p->skb2 = NULL; 1000 } else if ((val = netlink_broadcast_deliver(sk, p->skb2)) < 0) { 1001 netlink_overrun(sk); 1002 } else { 1003 p->congested |= val; 1004 p->delivered = 1; 1005 p->skb2 = NULL; 1006 } 1007 sock_put(sk); 1008 1009 out: 1010 return 0; 1011 } 1012 1013 int netlink_broadcast(struct sock *ssk, struct sk_buff *skb, u32 pid, 1014 u32 group, gfp_t allocation) 1015 { 1016 struct net *net = sock_net(ssk); 1017 struct netlink_broadcast_data info; 1018 struct hlist_node *node; 1019 struct sock *sk; 1020 1021 skb = netlink_trim(skb, allocation); 1022 1023 info.exclude_sk = ssk; 1024 info.net = net; 1025 info.pid = pid; 1026 info.group = group; 1027 info.failure = 0; 1028 info.congested = 0; 1029 info.delivered = 0; 1030 info.allocation = allocation; 1031 info.skb = skb; 1032 info.skb2 = NULL; 1033 1034 /* While we sleep in clone, do not allow to change socket list */ 1035 1036 netlink_lock_table(); 1037 1038 sk_for_each_bound(sk, node, &nl_table[ssk->sk_protocol].mc_list) 1039 do_one_broadcast(sk, &info); 1040 1041 kfree_skb(skb); 1042 1043 netlink_unlock_table(); 1044 1045 if (info.skb2) 1046 kfree_skb(info.skb2); 1047 1048 if (info.delivered) { 1049 if (info.congested && (allocation & __GFP_WAIT)) 1050 yield(); 1051 return 0; 1052 } 1053 if (info.failure) 1054 return -ENOBUFS; 1055 return -ESRCH; 1056 } 1057 EXPORT_SYMBOL(netlink_broadcast); 1058 1059 struct netlink_set_err_data { 1060 struct sock *exclude_sk; 1061 u32 pid; 1062 u32 group; 1063 int code; 1064 }; 1065 1066 static inline int do_one_set_err(struct sock *sk, 1067 struct netlink_set_err_data *p) 1068 { 1069 struct netlink_sock *nlk = nlk_sk(sk); 1070 1071 if (sk == p->exclude_sk) 1072 goto out; 1073 1074 if (sock_net(sk) != sock_net(p->exclude_sk)) 1075 goto out; 1076 1077 if (nlk->pid == p->pid || p->group - 1 >= nlk->ngroups || 1078 !test_bit(p->group - 1, nlk->groups)) 1079 goto out; 1080 1081 sk->sk_err = p->code; 1082 sk->sk_error_report(sk); 1083 out: 1084 return 0; 1085 } 1086 1087 /** 1088 * netlink_set_err - report error to broadcast listeners 1089 * @ssk: the kernel netlink socket, as returned by netlink_kernel_create() 1090 * @pid: the PID of a process that we want to skip (if any) 1091 * @groups: the broadcast group that will notice the error 1092 * @code: error code, must be negative (as usual in kernelspace) 1093 */ 1094 void netlink_set_err(struct sock *ssk, u32 pid, u32 group, int code) 1095 { 1096 struct netlink_set_err_data info; 1097 struct hlist_node *node; 1098 struct sock *sk; 1099 1100 info.exclude_sk = ssk; 1101 info.pid = pid; 1102 info.group = group; 1103 /* sk->sk_err wants a positive error value */ 1104 info.code = -code; 1105 1106 read_lock(&nl_table_lock); 1107 1108 sk_for_each_bound(sk, node, &nl_table[ssk->sk_protocol].mc_list) 1109 do_one_set_err(sk, &info); 1110 1111 read_unlock(&nl_table_lock); 1112 } 1113 1114 /* must be called with netlink table grabbed */ 1115 static void netlink_update_socket_mc(struct netlink_sock *nlk, 1116 unsigned int group, 1117 int is_new) 1118 { 1119 int old, new = !!is_new, subscriptions; 1120 1121 old = test_bit(group - 1, nlk->groups); 1122 subscriptions = nlk->subscriptions - old + new; 1123 if (new) 1124 __set_bit(group - 1, nlk->groups); 1125 else 1126 __clear_bit(group - 1, nlk->groups); 1127 netlink_update_subscriptions(&nlk->sk, subscriptions); 1128 netlink_update_listeners(&nlk->sk); 1129 } 1130 1131 static int netlink_setsockopt(struct socket *sock, int level, int optname, 1132 char __user *optval, int optlen) 1133 { 1134 struct sock *sk = sock->sk; 1135 struct netlink_sock *nlk = nlk_sk(sk); 1136 unsigned int val = 0; 1137 int err; 1138 1139 if (level != SOL_NETLINK) 1140 return -ENOPROTOOPT; 1141 1142 if (optlen >= sizeof(int) && 1143 get_user(val, (unsigned int __user *)optval)) 1144 return -EFAULT; 1145 1146 switch (optname) { 1147 case NETLINK_PKTINFO: 1148 if (val) 1149 nlk->flags |= NETLINK_RECV_PKTINFO; 1150 else 1151 nlk->flags &= ~NETLINK_RECV_PKTINFO; 1152 err = 0; 1153 break; 1154 case NETLINK_ADD_MEMBERSHIP: 1155 case NETLINK_DROP_MEMBERSHIP: { 1156 if (!netlink_capable(sock, NL_NONROOT_RECV)) 1157 return -EPERM; 1158 err = netlink_realloc_groups(sk); 1159 if (err) 1160 return err; 1161 if (!val || val - 1 >= nlk->ngroups) 1162 return -EINVAL; 1163 netlink_table_grab(); 1164 netlink_update_socket_mc(nlk, val, 1165 optname == NETLINK_ADD_MEMBERSHIP); 1166 netlink_table_ungrab(); 1167 err = 0; 1168 break; 1169 } 1170 default: 1171 err = -ENOPROTOOPT; 1172 } 1173 return err; 1174 } 1175 1176 static int netlink_getsockopt(struct socket *sock, int level, int optname, 1177 char __user *optval, int __user *optlen) 1178 { 1179 struct sock *sk = sock->sk; 1180 struct netlink_sock *nlk = nlk_sk(sk); 1181 int len, val, err; 1182 1183 if (level != SOL_NETLINK) 1184 return -ENOPROTOOPT; 1185 1186 if (get_user(len, optlen)) 1187 return -EFAULT; 1188 if (len < 0) 1189 return -EINVAL; 1190 1191 switch (optname) { 1192 case NETLINK_PKTINFO: 1193 if (len < sizeof(int)) 1194 return -EINVAL; 1195 len = sizeof(int); 1196 val = nlk->flags & NETLINK_RECV_PKTINFO ? 1 : 0; 1197 if (put_user(len, optlen) || 1198 put_user(val, optval)) 1199 return -EFAULT; 1200 err = 0; 1201 break; 1202 default: 1203 err = -ENOPROTOOPT; 1204 } 1205 return err; 1206 } 1207 1208 static void netlink_cmsg_recv_pktinfo(struct msghdr *msg, struct sk_buff *skb) 1209 { 1210 struct nl_pktinfo info; 1211 1212 info.group = NETLINK_CB(skb).dst_group; 1213 put_cmsg(msg, SOL_NETLINK, NETLINK_PKTINFO, sizeof(info), &info); 1214 } 1215 1216 static int netlink_sendmsg(struct kiocb *kiocb, struct socket *sock, 1217 struct msghdr *msg, size_t len) 1218 { 1219 struct sock_iocb *siocb = kiocb_to_siocb(kiocb); 1220 struct sock *sk = sock->sk; 1221 struct netlink_sock *nlk = nlk_sk(sk); 1222 struct sockaddr_nl *addr = msg->msg_name; 1223 u32 dst_pid; 1224 u32 dst_group; 1225 struct sk_buff *skb; 1226 int err; 1227 struct scm_cookie scm; 1228 1229 if (msg->msg_flags&MSG_OOB) 1230 return -EOPNOTSUPP; 1231 1232 if (NULL == siocb->scm) 1233 siocb->scm = &scm; 1234 err = scm_send(sock, msg, siocb->scm); 1235 if (err < 0) 1236 return err; 1237 1238 if (msg->msg_namelen) { 1239 if (addr->nl_family != AF_NETLINK) 1240 return -EINVAL; 1241 dst_pid = addr->nl_pid; 1242 dst_group = ffs(addr->nl_groups); 1243 if (dst_group && !netlink_capable(sock, NL_NONROOT_SEND)) 1244 return -EPERM; 1245 } else { 1246 dst_pid = nlk->dst_pid; 1247 dst_group = nlk->dst_group; 1248 } 1249 1250 if (!nlk->pid) { 1251 err = netlink_autobind(sock); 1252 if (err) 1253 goto out; 1254 } 1255 1256 err = -EMSGSIZE; 1257 if (len > sk->sk_sndbuf - 32) 1258 goto out; 1259 err = -ENOBUFS; 1260 skb = alloc_skb(len, GFP_KERNEL); 1261 if (skb == NULL) 1262 goto out; 1263 1264 NETLINK_CB(skb).pid = nlk->pid; 1265 NETLINK_CB(skb).dst_group = dst_group; 1266 NETLINK_CB(skb).loginuid = audit_get_loginuid(current); 1267 NETLINK_CB(skb).sessionid = audit_get_sessionid(current); 1268 security_task_getsecid(current, &(NETLINK_CB(skb).sid)); 1269 memcpy(NETLINK_CREDS(skb), &siocb->scm->creds, sizeof(struct ucred)); 1270 1271 /* What can I do? Netlink is asynchronous, so that 1272 we will have to save current capabilities to 1273 check them, when this message will be delivered 1274 to corresponding kernel module. --ANK (980802) 1275 */ 1276 1277 err = -EFAULT; 1278 if (memcpy_fromiovec(skb_put(skb, len), msg->msg_iov, len)) { 1279 kfree_skb(skb); 1280 goto out; 1281 } 1282 1283 err = security_netlink_send(sk, skb); 1284 if (err) { 1285 kfree_skb(skb); 1286 goto out; 1287 } 1288 1289 if (dst_group) { 1290 atomic_inc(&skb->users); 1291 netlink_broadcast(sk, skb, dst_pid, dst_group, GFP_KERNEL); 1292 } 1293 err = netlink_unicast(sk, skb, dst_pid, msg->msg_flags&MSG_DONTWAIT); 1294 1295 out: 1296 return err; 1297 } 1298 1299 static int netlink_recvmsg(struct kiocb *kiocb, struct socket *sock, 1300 struct msghdr *msg, size_t len, 1301 int flags) 1302 { 1303 struct sock_iocb *siocb = kiocb_to_siocb(kiocb); 1304 struct scm_cookie scm; 1305 struct sock *sk = sock->sk; 1306 struct netlink_sock *nlk = nlk_sk(sk); 1307 int noblock = flags&MSG_DONTWAIT; 1308 size_t copied; 1309 struct sk_buff *skb; 1310 int err; 1311 1312 if (flags&MSG_OOB) 1313 return -EOPNOTSUPP; 1314 1315 copied = 0; 1316 1317 skb = skb_recv_datagram(sk, flags, noblock, &err); 1318 if (skb == NULL) 1319 goto out; 1320 1321 msg->msg_namelen = 0; 1322 1323 copied = skb->len; 1324 if (len < copied) { 1325 msg->msg_flags |= MSG_TRUNC; 1326 copied = len; 1327 } 1328 1329 skb_reset_transport_header(skb); 1330 err = skb_copy_datagram_iovec(skb, 0, msg->msg_iov, copied); 1331 1332 if (msg->msg_name) { 1333 struct sockaddr_nl *addr = (struct sockaddr_nl *)msg->msg_name; 1334 addr->nl_family = AF_NETLINK; 1335 addr->nl_pad = 0; 1336 addr->nl_pid = NETLINK_CB(skb).pid; 1337 addr->nl_groups = netlink_group_mask(NETLINK_CB(skb).dst_group); 1338 msg->msg_namelen = sizeof(*addr); 1339 } 1340 1341 if (nlk->flags & NETLINK_RECV_PKTINFO) 1342 netlink_cmsg_recv_pktinfo(msg, skb); 1343 1344 if (NULL == siocb->scm) { 1345 memset(&scm, 0, sizeof(scm)); 1346 siocb->scm = &scm; 1347 } 1348 siocb->scm->creds = *NETLINK_CREDS(skb); 1349 if (flags & MSG_TRUNC) 1350 copied = skb->len; 1351 skb_free_datagram(sk, skb); 1352 1353 if (nlk->cb && atomic_read(&sk->sk_rmem_alloc) <= sk->sk_rcvbuf / 2) 1354 netlink_dump(sk); 1355 1356 scm_recv(sock, msg, siocb->scm, flags); 1357 out: 1358 netlink_rcv_wake(sk); 1359 return err ? : copied; 1360 } 1361 1362 static void netlink_data_ready(struct sock *sk, int len) 1363 { 1364 BUG(); 1365 } 1366 1367 /* 1368 * We export these functions to other modules. They provide a 1369 * complete set of kernel non-blocking support for message 1370 * queueing. 1371 */ 1372 1373 struct sock * 1374 netlink_kernel_create(struct net *net, int unit, unsigned int groups, 1375 void (*input)(struct sk_buff *skb), 1376 struct mutex *cb_mutex, struct module *module) 1377 { 1378 struct socket *sock; 1379 struct sock *sk; 1380 struct netlink_sock *nlk; 1381 unsigned long *listeners = NULL; 1382 1383 BUG_ON(!nl_table); 1384 1385 if (unit < 0 || unit >= MAX_LINKS) 1386 return NULL; 1387 1388 if (sock_create_lite(PF_NETLINK, SOCK_DGRAM, unit, &sock)) 1389 return NULL; 1390 1391 /* 1392 * We have to just have a reference on the net from sk, but don't 1393 * get_net it. Besides, we cannot get and then put the net here. 1394 * So we create one inside init_net and the move it to net. 1395 */ 1396 1397 if (__netlink_create(&init_net, sock, cb_mutex, unit) < 0) 1398 goto out_sock_release_nosk; 1399 1400 sk = sock->sk; 1401 sk_change_net(sk, net); 1402 1403 if (groups < 32) 1404 groups = 32; 1405 1406 listeners = kzalloc(NLGRPSZ(groups), GFP_KERNEL); 1407 if (!listeners) 1408 goto out_sock_release; 1409 1410 sk->sk_data_ready = netlink_data_ready; 1411 if (input) 1412 nlk_sk(sk)->netlink_rcv = input; 1413 1414 if (netlink_insert(sk, net, 0)) 1415 goto out_sock_release; 1416 1417 nlk = nlk_sk(sk); 1418 nlk->flags |= NETLINK_KERNEL_SOCKET; 1419 1420 netlink_table_grab(); 1421 if (!nl_table[unit].registered) { 1422 nl_table[unit].groups = groups; 1423 nl_table[unit].listeners = listeners; 1424 nl_table[unit].cb_mutex = cb_mutex; 1425 nl_table[unit].module = module; 1426 nl_table[unit].registered = 1; 1427 } else { 1428 kfree(listeners); 1429 nl_table[unit].registered++; 1430 } 1431 netlink_table_ungrab(); 1432 return sk; 1433 1434 out_sock_release: 1435 kfree(listeners); 1436 netlink_kernel_release(sk); 1437 return NULL; 1438 1439 out_sock_release_nosk: 1440 sock_release(sock); 1441 return NULL; 1442 } 1443 EXPORT_SYMBOL(netlink_kernel_create); 1444 1445 1446 void 1447 netlink_kernel_release(struct sock *sk) 1448 { 1449 sk_release_kernel(sk); 1450 } 1451 EXPORT_SYMBOL(netlink_kernel_release); 1452 1453 1454 /** 1455 * netlink_change_ngroups - change number of multicast groups 1456 * 1457 * This changes the number of multicast groups that are available 1458 * on a certain netlink family. Note that it is not possible to 1459 * change the number of groups to below 32. Also note that it does 1460 * not implicitly call netlink_clear_multicast_users() when the 1461 * number of groups is reduced. 1462 * 1463 * @sk: The kernel netlink socket, as returned by netlink_kernel_create(). 1464 * @groups: The new number of groups. 1465 */ 1466 int netlink_change_ngroups(struct sock *sk, unsigned int groups) 1467 { 1468 unsigned long *listeners, *old = NULL; 1469 struct netlink_table *tbl = &nl_table[sk->sk_protocol]; 1470 int err = 0; 1471 1472 if (groups < 32) 1473 groups = 32; 1474 1475 netlink_table_grab(); 1476 if (NLGRPSZ(tbl->groups) < NLGRPSZ(groups)) { 1477 listeners = kzalloc(NLGRPSZ(groups), GFP_ATOMIC); 1478 if (!listeners) { 1479 err = -ENOMEM; 1480 goto out_ungrab; 1481 } 1482 old = tbl->listeners; 1483 memcpy(listeners, old, NLGRPSZ(tbl->groups)); 1484 rcu_assign_pointer(tbl->listeners, listeners); 1485 } 1486 tbl->groups = groups; 1487 1488 out_ungrab: 1489 netlink_table_ungrab(); 1490 synchronize_rcu(); 1491 kfree(old); 1492 return err; 1493 } 1494 EXPORT_SYMBOL(netlink_change_ngroups); 1495 1496 /** 1497 * netlink_clear_multicast_users - kick off multicast listeners 1498 * 1499 * This function removes all listeners from the given group. 1500 * @ksk: The kernel netlink socket, as returned by 1501 * netlink_kernel_create(). 1502 * @group: The multicast group to clear. 1503 */ 1504 void netlink_clear_multicast_users(struct sock *ksk, unsigned int group) 1505 { 1506 struct sock *sk; 1507 struct hlist_node *node; 1508 struct netlink_table *tbl = &nl_table[ksk->sk_protocol]; 1509 1510 netlink_table_grab(); 1511 1512 sk_for_each_bound(sk, node, &tbl->mc_list) 1513 netlink_update_socket_mc(nlk_sk(sk), group, 0); 1514 1515 netlink_table_ungrab(); 1516 } 1517 EXPORT_SYMBOL(netlink_clear_multicast_users); 1518 1519 void netlink_set_nonroot(int protocol, unsigned int flags) 1520 { 1521 if ((unsigned int)protocol < MAX_LINKS) 1522 nl_table[protocol].nl_nonroot = flags; 1523 } 1524 EXPORT_SYMBOL(netlink_set_nonroot); 1525 1526 static void netlink_destroy_callback(struct netlink_callback *cb) 1527 { 1528 if (cb->skb) 1529 kfree_skb(cb->skb); 1530 kfree(cb); 1531 } 1532 1533 /* 1534 * It looks a bit ugly. 1535 * It would be better to create kernel thread. 1536 */ 1537 1538 static int netlink_dump(struct sock *sk) 1539 { 1540 struct netlink_sock *nlk = nlk_sk(sk); 1541 struct netlink_callback *cb; 1542 struct sk_buff *skb; 1543 struct nlmsghdr *nlh; 1544 int len, err = -ENOBUFS; 1545 1546 skb = sock_rmalloc(sk, NLMSG_GOODSIZE, 0, GFP_KERNEL); 1547 if (!skb) 1548 goto errout; 1549 1550 mutex_lock(nlk->cb_mutex); 1551 1552 cb = nlk->cb; 1553 if (cb == NULL) { 1554 err = -EINVAL; 1555 goto errout_skb; 1556 } 1557 1558 len = cb->dump(skb, cb); 1559 1560 if (len > 0) { 1561 mutex_unlock(nlk->cb_mutex); 1562 1563 if (sk_filter(sk, skb)) 1564 kfree_skb(skb); 1565 else { 1566 skb_queue_tail(&sk->sk_receive_queue, skb); 1567 sk->sk_data_ready(sk, skb->len); 1568 } 1569 return 0; 1570 } 1571 1572 nlh = nlmsg_put_answer(skb, cb, NLMSG_DONE, sizeof(len), NLM_F_MULTI); 1573 if (!nlh) 1574 goto errout_skb; 1575 1576 memcpy(nlmsg_data(nlh), &len, sizeof(len)); 1577 1578 if (sk_filter(sk, skb)) 1579 kfree_skb(skb); 1580 else { 1581 skb_queue_tail(&sk->sk_receive_queue, skb); 1582 sk->sk_data_ready(sk, skb->len); 1583 } 1584 1585 if (cb->done) 1586 cb->done(cb); 1587 nlk->cb = NULL; 1588 mutex_unlock(nlk->cb_mutex); 1589 1590 netlink_destroy_callback(cb); 1591 return 0; 1592 1593 errout_skb: 1594 mutex_unlock(nlk->cb_mutex); 1595 kfree_skb(skb); 1596 errout: 1597 return err; 1598 } 1599 1600 int netlink_dump_start(struct sock *ssk, struct sk_buff *skb, 1601 struct nlmsghdr *nlh, 1602 int (*dump)(struct sk_buff *skb, 1603 struct netlink_callback *), 1604 int (*done)(struct netlink_callback *)) 1605 { 1606 struct netlink_callback *cb; 1607 struct sock *sk; 1608 struct netlink_sock *nlk; 1609 1610 cb = kzalloc(sizeof(*cb), GFP_KERNEL); 1611 if (cb == NULL) 1612 return -ENOBUFS; 1613 1614 cb->dump = dump; 1615 cb->done = done; 1616 cb->nlh = nlh; 1617 atomic_inc(&skb->users); 1618 cb->skb = skb; 1619 1620 sk = netlink_lookup(sock_net(ssk), ssk->sk_protocol, NETLINK_CB(skb).pid); 1621 if (sk == NULL) { 1622 netlink_destroy_callback(cb); 1623 return -ECONNREFUSED; 1624 } 1625 nlk = nlk_sk(sk); 1626 /* A dump is in progress... */ 1627 mutex_lock(nlk->cb_mutex); 1628 if (nlk->cb) { 1629 mutex_unlock(nlk->cb_mutex); 1630 netlink_destroy_callback(cb); 1631 sock_put(sk); 1632 return -EBUSY; 1633 } 1634 nlk->cb = cb; 1635 mutex_unlock(nlk->cb_mutex); 1636 1637 netlink_dump(sk); 1638 sock_put(sk); 1639 1640 /* We successfully started a dump, by returning -EINTR we 1641 * signal not to send ACK even if it was requested. 1642 */ 1643 return -EINTR; 1644 } 1645 EXPORT_SYMBOL(netlink_dump_start); 1646 1647 void netlink_ack(struct sk_buff *in_skb, struct nlmsghdr *nlh, int err) 1648 { 1649 struct sk_buff *skb; 1650 struct nlmsghdr *rep; 1651 struct nlmsgerr *errmsg; 1652 size_t payload = sizeof(*errmsg); 1653 1654 /* error messages get the original request appened */ 1655 if (err) 1656 payload += nlmsg_len(nlh); 1657 1658 skb = nlmsg_new(payload, GFP_KERNEL); 1659 if (!skb) { 1660 struct sock *sk; 1661 1662 sk = netlink_lookup(sock_net(in_skb->sk), 1663 in_skb->sk->sk_protocol, 1664 NETLINK_CB(in_skb).pid); 1665 if (sk) { 1666 sk->sk_err = ENOBUFS; 1667 sk->sk_error_report(sk); 1668 sock_put(sk); 1669 } 1670 return; 1671 } 1672 1673 rep = __nlmsg_put(skb, NETLINK_CB(in_skb).pid, nlh->nlmsg_seq, 1674 NLMSG_ERROR, sizeof(struct nlmsgerr), 0); 1675 errmsg = nlmsg_data(rep); 1676 errmsg->error = err; 1677 memcpy(&errmsg->msg, nlh, err ? nlh->nlmsg_len : sizeof(*nlh)); 1678 netlink_unicast(in_skb->sk, skb, NETLINK_CB(in_skb).pid, MSG_DONTWAIT); 1679 } 1680 EXPORT_SYMBOL(netlink_ack); 1681 1682 int netlink_rcv_skb(struct sk_buff *skb, int (*cb)(struct sk_buff *, 1683 struct nlmsghdr *)) 1684 { 1685 struct nlmsghdr *nlh; 1686 int err; 1687 1688 while (skb->len >= nlmsg_total_size(0)) { 1689 int msglen; 1690 1691 nlh = nlmsg_hdr(skb); 1692 err = 0; 1693 1694 if (nlh->nlmsg_len < NLMSG_HDRLEN || skb->len < nlh->nlmsg_len) 1695 return 0; 1696 1697 /* Only requests are handled by the kernel */ 1698 if (!(nlh->nlmsg_flags & NLM_F_REQUEST)) 1699 goto ack; 1700 1701 /* Skip control messages */ 1702 if (nlh->nlmsg_type < NLMSG_MIN_TYPE) 1703 goto ack; 1704 1705 err = cb(skb, nlh); 1706 if (err == -EINTR) 1707 goto skip; 1708 1709 ack: 1710 if (nlh->nlmsg_flags & NLM_F_ACK || err) 1711 netlink_ack(skb, nlh, err); 1712 1713 skip: 1714 msglen = NLMSG_ALIGN(nlh->nlmsg_len); 1715 if (msglen > skb->len) 1716 msglen = skb->len; 1717 skb_pull(skb, msglen); 1718 } 1719 1720 return 0; 1721 } 1722 EXPORT_SYMBOL(netlink_rcv_skb); 1723 1724 /** 1725 * nlmsg_notify - send a notification netlink message 1726 * @sk: netlink socket to use 1727 * @skb: notification message 1728 * @pid: destination netlink pid for reports or 0 1729 * @group: destination multicast group or 0 1730 * @report: 1 to report back, 0 to disable 1731 * @flags: allocation flags 1732 */ 1733 int nlmsg_notify(struct sock *sk, struct sk_buff *skb, u32 pid, 1734 unsigned int group, int report, gfp_t flags) 1735 { 1736 int err = 0; 1737 1738 if (group) { 1739 int exclude_pid = 0; 1740 1741 if (report) { 1742 atomic_inc(&skb->users); 1743 exclude_pid = pid; 1744 } 1745 1746 /* errors reported via destination sk->sk_err */ 1747 nlmsg_multicast(sk, skb, exclude_pid, group, flags); 1748 } 1749 1750 if (report) 1751 err = nlmsg_unicast(sk, skb, pid); 1752 1753 return err; 1754 } 1755 EXPORT_SYMBOL(nlmsg_notify); 1756 1757 #ifdef CONFIG_PROC_FS 1758 struct nl_seq_iter { 1759 struct seq_net_private p; 1760 int link; 1761 int hash_idx; 1762 }; 1763 1764 static struct sock *netlink_seq_socket_idx(struct seq_file *seq, loff_t pos) 1765 { 1766 struct nl_seq_iter *iter = seq->private; 1767 int i, j; 1768 struct sock *s; 1769 struct hlist_node *node; 1770 loff_t off = 0; 1771 1772 for (i = 0; i < MAX_LINKS; i++) { 1773 struct nl_pid_hash *hash = &nl_table[i].hash; 1774 1775 for (j = 0; j <= hash->mask; j++) { 1776 sk_for_each(s, node, &hash->table[j]) { 1777 if (sock_net(s) != seq_file_net(seq)) 1778 continue; 1779 if (off == pos) { 1780 iter->link = i; 1781 iter->hash_idx = j; 1782 return s; 1783 } 1784 ++off; 1785 } 1786 } 1787 } 1788 return NULL; 1789 } 1790 1791 static void *netlink_seq_start(struct seq_file *seq, loff_t *pos) 1792 __acquires(nl_table_lock) 1793 { 1794 read_lock(&nl_table_lock); 1795 return *pos ? netlink_seq_socket_idx(seq, *pos - 1) : SEQ_START_TOKEN; 1796 } 1797 1798 static void *netlink_seq_next(struct seq_file *seq, void *v, loff_t *pos) 1799 { 1800 struct sock *s; 1801 struct nl_seq_iter *iter; 1802 int i, j; 1803 1804 ++*pos; 1805 1806 if (v == SEQ_START_TOKEN) 1807 return netlink_seq_socket_idx(seq, 0); 1808 1809 iter = seq->private; 1810 s = v; 1811 do { 1812 s = sk_next(s); 1813 } while (s && sock_net(s) != seq_file_net(seq)); 1814 if (s) 1815 return s; 1816 1817 i = iter->link; 1818 j = iter->hash_idx + 1; 1819 1820 do { 1821 struct nl_pid_hash *hash = &nl_table[i].hash; 1822 1823 for (; j <= hash->mask; j++) { 1824 s = sk_head(&hash->table[j]); 1825 while (s && sock_net(s) != seq_file_net(seq)) 1826 s = sk_next(s); 1827 if (s) { 1828 iter->link = i; 1829 iter->hash_idx = j; 1830 return s; 1831 } 1832 } 1833 1834 j = 0; 1835 } while (++i < MAX_LINKS); 1836 1837 return NULL; 1838 } 1839 1840 static void netlink_seq_stop(struct seq_file *seq, void *v) 1841 __releases(nl_table_lock) 1842 { 1843 read_unlock(&nl_table_lock); 1844 } 1845 1846 1847 static int netlink_seq_show(struct seq_file *seq, void *v) 1848 { 1849 if (v == SEQ_START_TOKEN) 1850 seq_puts(seq, 1851 "sk Eth Pid Groups " 1852 "Rmem Wmem Dump Locks\n"); 1853 else { 1854 struct sock *s = v; 1855 struct netlink_sock *nlk = nlk_sk(s); 1856 1857 seq_printf(seq, "%p %-3d %-6d %08x %-8d %-8d %p %d\n", 1858 s, 1859 s->sk_protocol, 1860 nlk->pid, 1861 nlk->groups ? (u32)nlk->groups[0] : 0, 1862 atomic_read(&s->sk_rmem_alloc), 1863 atomic_read(&s->sk_wmem_alloc), 1864 nlk->cb, 1865 atomic_read(&s->sk_refcnt) 1866 ); 1867 1868 } 1869 return 0; 1870 } 1871 1872 static const struct seq_operations netlink_seq_ops = { 1873 .start = netlink_seq_start, 1874 .next = netlink_seq_next, 1875 .stop = netlink_seq_stop, 1876 .show = netlink_seq_show, 1877 }; 1878 1879 1880 static int netlink_seq_open(struct inode *inode, struct file *file) 1881 { 1882 return seq_open_net(inode, file, &netlink_seq_ops, 1883 sizeof(struct nl_seq_iter)); 1884 } 1885 1886 static const struct file_operations netlink_seq_fops = { 1887 .owner = THIS_MODULE, 1888 .open = netlink_seq_open, 1889 .read = seq_read, 1890 .llseek = seq_lseek, 1891 .release = seq_release_net, 1892 }; 1893 1894 #endif 1895 1896 int netlink_register_notifier(struct notifier_block *nb) 1897 { 1898 return atomic_notifier_chain_register(&netlink_chain, nb); 1899 } 1900 EXPORT_SYMBOL(netlink_register_notifier); 1901 1902 int netlink_unregister_notifier(struct notifier_block *nb) 1903 { 1904 return atomic_notifier_chain_unregister(&netlink_chain, nb); 1905 } 1906 EXPORT_SYMBOL(netlink_unregister_notifier); 1907 1908 static const struct proto_ops netlink_ops = { 1909 .family = PF_NETLINK, 1910 .owner = THIS_MODULE, 1911 .release = netlink_release, 1912 .bind = netlink_bind, 1913 .connect = netlink_connect, 1914 .socketpair = sock_no_socketpair, 1915 .accept = sock_no_accept, 1916 .getname = netlink_getname, 1917 .poll = datagram_poll, 1918 .ioctl = sock_no_ioctl, 1919 .listen = sock_no_listen, 1920 .shutdown = sock_no_shutdown, 1921 .setsockopt = netlink_setsockopt, 1922 .getsockopt = netlink_getsockopt, 1923 .sendmsg = netlink_sendmsg, 1924 .recvmsg = netlink_recvmsg, 1925 .mmap = sock_no_mmap, 1926 .sendpage = sock_no_sendpage, 1927 }; 1928 1929 static struct net_proto_family netlink_family_ops = { 1930 .family = PF_NETLINK, 1931 .create = netlink_create, 1932 .owner = THIS_MODULE, /* for consistency 8) */ 1933 }; 1934 1935 static int __net_init netlink_net_init(struct net *net) 1936 { 1937 #ifdef CONFIG_PROC_FS 1938 if (!proc_net_fops_create(net, "netlink", 0, &netlink_seq_fops)) 1939 return -ENOMEM; 1940 #endif 1941 return 0; 1942 } 1943 1944 static void __net_exit netlink_net_exit(struct net *net) 1945 { 1946 #ifdef CONFIG_PROC_FS 1947 proc_net_remove(net, "netlink"); 1948 #endif 1949 } 1950 1951 static struct pernet_operations __net_initdata netlink_net_ops = { 1952 .init = netlink_net_init, 1953 .exit = netlink_net_exit, 1954 }; 1955 1956 static int __init netlink_proto_init(void) 1957 { 1958 struct sk_buff *dummy_skb; 1959 int i; 1960 unsigned long limit; 1961 unsigned int order; 1962 int err = proto_register(&netlink_proto, 0); 1963 1964 if (err != 0) 1965 goto out; 1966 1967 BUILD_BUG_ON(sizeof(struct netlink_skb_parms) > sizeof(dummy_skb->cb)); 1968 1969 nl_table = kcalloc(MAX_LINKS, sizeof(*nl_table), GFP_KERNEL); 1970 if (!nl_table) 1971 goto panic; 1972 1973 if (num_physpages >= (128 * 1024)) 1974 limit = num_physpages >> (21 - PAGE_SHIFT); 1975 else 1976 limit = num_physpages >> (23 - PAGE_SHIFT); 1977 1978 order = get_bitmask_order(limit) - 1 + PAGE_SHIFT; 1979 limit = (1UL << order) / sizeof(struct hlist_head); 1980 order = get_bitmask_order(min(limit, (unsigned long)UINT_MAX)) - 1; 1981 1982 for (i = 0; i < MAX_LINKS; i++) { 1983 struct nl_pid_hash *hash = &nl_table[i].hash; 1984 1985 hash->table = nl_pid_hash_zalloc(1 * sizeof(*hash->table)); 1986 if (!hash->table) { 1987 while (i-- > 0) 1988 nl_pid_hash_free(nl_table[i].hash.table, 1989 1 * sizeof(*hash->table)); 1990 kfree(nl_table); 1991 goto panic; 1992 } 1993 hash->max_shift = order; 1994 hash->shift = 0; 1995 hash->mask = 0; 1996 hash->rehash_time = jiffies; 1997 } 1998 1999 sock_register(&netlink_family_ops); 2000 register_pernet_subsys(&netlink_net_ops); 2001 /* The netlink device handler may be needed early. */ 2002 rtnetlink_init(); 2003 out: 2004 return err; 2005 panic: 2006 panic("netlink_init: Cannot allocate nl_table\n"); 2007 } 2008 2009 core_initcall(netlink_proto_init); 2010