1 /* 2 * NETLINK Kernel-user communication protocol. 3 * 4 * Authors: Alan Cox <alan@lxorguk.ukuu.org.uk> 5 * Alexey Kuznetsov <kuznet@ms2.inr.ac.ru> 6 * 7 * This program is free software; you can redistribute it and/or 8 * modify it under the terms of the GNU General Public License 9 * as published by the Free Software Foundation; either version 10 * 2 of the License, or (at your option) any later version. 11 * 12 * Tue Jun 26 14:36:48 MEST 2001 Herbert "herp" Rosmanith 13 * added netlink_proto_exit 14 * Tue Jan 22 18:32:44 BRST 2002 Arnaldo C. de Melo <acme@conectiva.com.br> 15 * use nlk_sk, as sk->protinfo is on a diet 8) 16 * Fri Jul 22 19:51:12 MEST 2005 Harald Welte <laforge@gnumonks.org> 17 * - inc module use count of module that owns 18 * the kernel socket in case userspace opens 19 * socket of same protocol 20 * - remove all module support, since netlink is 21 * mandatory if CONFIG_NET=y these days 22 */ 23 24 #include <linux/module.h> 25 26 #include <linux/capability.h> 27 #include <linux/kernel.h> 28 #include <linux/init.h> 29 #include <linux/signal.h> 30 #include <linux/sched.h> 31 #include <linux/errno.h> 32 #include <linux/string.h> 33 #include <linux/stat.h> 34 #include <linux/socket.h> 35 #include <linux/un.h> 36 #include <linux/fcntl.h> 37 #include <linux/termios.h> 38 #include <linux/sockios.h> 39 #include <linux/net.h> 40 #include <linux/fs.h> 41 #include <linux/slab.h> 42 #include <asm/uaccess.h> 43 #include <linux/skbuff.h> 44 #include <linux/netdevice.h> 45 #include <linux/rtnetlink.h> 46 #include <linux/proc_fs.h> 47 #include <linux/seq_file.h> 48 #include <linux/notifier.h> 49 #include <linux/security.h> 50 #include <linux/jhash.h> 51 #include <linux/jiffies.h> 52 #include <linux/random.h> 53 #include <linux/bitops.h> 54 #include <linux/mm.h> 55 #include <linux/types.h> 56 #include <linux/audit.h> 57 #include <linux/mutex.h> 58 59 #include <net/net_namespace.h> 60 #include <net/sock.h> 61 #include <net/scm.h> 62 #include <net/netlink.h> 63 64 #define NLGRPSZ(x) (ALIGN(x, sizeof(unsigned long) * 8) / 8) 65 #define NLGRPLONGS(x) (NLGRPSZ(x)/sizeof(unsigned long)) 66 67 struct netlink_sock { 68 /* struct sock has to be the first member of netlink_sock */ 69 struct sock sk; 70 u32 pid; 71 u32 dst_pid; 72 u32 dst_group; 73 u32 flags; 74 u32 subscriptions; 75 u32 ngroups; 76 unsigned long *groups; 77 unsigned long state; 78 wait_queue_head_t wait; 79 struct netlink_callback *cb; 80 struct mutex *cb_mutex; 81 struct mutex cb_def_mutex; 82 void (*netlink_rcv)(struct sk_buff *skb); 83 struct module *module; 84 }; 85 86 #define NETLINK_KERNEL_SOCKET 0x1 87 #define NETLINK_RECV_PKTINFO 0x2 88 89 static inline struct netlink_sock *nlk_sk(struct sock *sk) 90 { 91 return container_of(sk, struct netlink_sock, sk); 92 } 93 94 static inline int netlink_is_kernel(struct sock *sk) 95 { 96 return nlk_sk(sk)->flags & NETLINK_KERNEL_SOCKET; 97 } 98 99 struct nl_pid_hash { 100 struct hlist_head *table; 101 unsigned long rehash_time; 102 103 unsigned int mask; 104 unsigned int shift; 105 106 unsigned int entries; 107 unsigned int max_shift; 108 109 u32 rnd; 110 }; 111 112 struct netlink_table { 113 struct nl_pid_hash hash; 114 struct hlist_head mc_list; 115 unsigned long *listeners; 116 unsigned int nl_nonroot; 117 unsigned int groups; 118 struct mutex *cb_mutex; 119 struct module *module; 120 int registered; 121 }; 122 123 static struct netlink_table *nl_table; 124 125 static DECLARE_WAIT_QUEUE_HEAD(nl_table_wait); 126 127 static int netlink_dump(struct sock *sk); 128 static void netlink_destroy_callback(struct netlink_callback *cb); 129 130 static DEFINE_RWLOCK(nl_table_lock); 131 static atomic_t nl_table_users = ATOMIC_INIT(0); 132 133 static ATOMIC_NOTIFIER_HEAD(netlink_chain); 134 135 static u32 netlink_group_mask(u32 group) 136 { 137 return group ? 1 << (group - 1) : 0; 138 } 139 140 static struct hlist_head *nl_pid_hashfn(struct nl_pid_hash *hash, u32 pid) 141 { 142 return &hash->table[jhash_1word(pid, hash->rnd) & hash->mask]; 143 } 144 145 static void netlink_sock_destruct(struct sock *sk) 146 { 147 struct netlink_sock *nlk = nlk_sk(sk); 148 149 if (nlk->cb) { 150 if (nlk->cb->done) 151 nlk->cb->done(nlk->cb); 152 netlink_destroy_callback(nlk->cb); 153 } 154 155 skb_queue_purge(&sk->sk_receive_queue); 156 157 if (!sock_flag(sk, SOCK_DEAD)) { 158 printk(KERN_ERR "Freeing alive netlink socket %p\n", sk); 159 return; 160 } 161 162 WARN_ON(atomic_read(&sk->sk_rmem_alloc)); 163 WARN_ON(atomic_read(&sk->sk_wmem_alloc)); 164 WARN_ON(nlk_sk(sk)->groups); 165 } 166 167 /* This lock without WQ_FLAG_EXCLUSIVE is good on UP and it is _very_ bad on 168 * SMP. Look, when several writers sleep and reader wakes them up, all but one 169 * immediately hit write lock and grab all the cpus. Exclusive sleep solves 170 * this, _but_ remember, it adds useless work on UP machines. 171 */ 172 173 static void netlink_table_grab(void) 174 __acquires(nl_table_lock) 175 { 176 write_lock_irq(&nl_table_lock); 177 178 if (atomic_read(&nl_table_users)) { 179 DECLARE_WAITQUEUE(wait, current); 180 181 add_wait_queue_exclusive(&nl_table_wait, &wait); 182 for (;;) { 183 set_current_state(TASK_UNINTERRUPTIBLE); 184 if (atomic_read(&nl_table_users) == 0) 185 break; 186 write_unlock_irq(&nl_table_lock); 187 schedule(); 188 write_lock_irq(&nl_table_lock); 189 } 190 191 __set_current_state(TASK_RUNNING); 192 remove_wait_queue(&nl_table_wait, &wait); 193 } 194 } 195 196 static void netlink_table_ungrab(void) 197 __releases(nl_table_lock) 198 { 199 write_unlock_irq(&nl_table_lock); 200 wake_up(&nl_table_wait); 201 } 202 203 static inline void 204 netlink_lock_table(void) 205 { 206 /* read_lock() synchronizes us to netlink_table_grab */ 207 208 read_lock(&nl_table_lock); 209 atomic_inc(&nl_table_users); 210 read_unlock(&nl_table_lock); 211 } 212 213 static inline void 214 netlink_unlock_table(void) 215 { 216 if (atomic_dec_and_test(&nl_table_users)) 217 wake_up(&nl_table_wait); 218 } 219 220 static inline struct sock *netlink_lookup(struct net *net, int protocol, 221 u32 pid) 222 { 223 struct nl_pid_hash *hash = &nl_table[protocol].hash; 224 struct hlist_head *head; 225 struct sock *sk; 226 struct hlist_node *node; 227 228 read_lock(&nl_table_lock); 229 head = nl_pid_hashfn(hash, pid); 230 sk_for_each(sk, node, head) { 231 if (net_eq(sock_net(sk), net) && (nlk_sk(sk)->pid == pid)) { 232 sock_hold(sk); 233 goto found; 234 } 235 } 236 sk = NULL; 237 found: 238 read_unlock(&nl_table_lock); 239 return sk; 240 } 241 242 static inline struct hlist_head *nl_pid_hash_zalloc(size_t size) 243 { 244 if (size <= PAGE_SIZE) 245 return kzalloc(size, GFP_ATOMIC); 246 else 247 return (struct hlist_head *) 248 __get_free_pages(GFP_ATOMIC | __GFP_ZERO, 249 get_order(size)); 250 } 251 252 static inline void nl_pid_hash_free(struct hlist_head *table, size_t size) 253 { 254 if (size <= PAGE_SIZE) 255 kfree(table); 256 else 257 free_pages((unsigned long)table, get_order(size)); 258 } 259 260 static int nl_pid_hash_rehash(struct nl_pid_hash *hash, int grow) 261 { 262 unsigned int omask, mask, shift; 263 size_t osize, size; 264 struct hlist_head *otable, *table; 265 int i; 266 267 omask = mask = hash->mask; 268 osize = size = (mask + 1) * sizeof(*table); 269 shift = hash->shift; 270 271 if (grow) { 272 if (++shift > hash->max_shift) 273 return 0; 274 mask = mask * 2 + 1; 275 size *= 2; 276 } 277 278 table = nl_pid_hash_zalloc(size); 279 if (!table) 280 return 0; 281 282 otable = hash->table; 283 hash->table = table; 284 hash->mask = mask; 285 hash->shift = shift; 286 get_random_bytes(&hash->rnd, sizeof(hash->rnd)); 287 288 for (i = 0; i <= omask; i++) { 289 struct sock *sk; 290 struct hlist_node *node, *tmp; 291 292 sk_for_each_safe(sk, node, tmp, &otable[i]) 293 __sk_add_node(sk, nl_pid_hashfn(hash, nlk_sk(sk)->pid)); 294 } 295 296 nl_pid_hash_free(otable, osize); 297 hash->rehash_time = jiffies + 10 * 60 * HZ; 298 return 1; 299 } 300 301 static inline int nl_pid_hash_dilute(struct nl_pid_hash *hash, int len) 302 { 303 int avg = hash->entries >> hash->shift; 304 305 if (unlikely(avg > 1) && nl_pid_hash_rehash(hash, 1)) 306 return 1; 307 308 if (unlikely(len > avg) && time_after(jiffies, hash->rehash_time)) { 309 nl_pid_hash_rehash(hash, 0); 310 return 1; 311 } 312 313 return 0; 314 } 315 316 static const struct proto_ops netlink_ops; 317 318 static void 319 netlink_update_listeners(struct sock *sk) 320 { 321 struct netlink_table *tbl = &nl_table[sk->sk_protocol]; 322 struct hlist_node *node; 323 unsigned long mask; 324 unsigned int i; 325 326 for (i = 0; i < NLGRPLONGS(tbl->groups); i++) { 327 mask = 0; 328 sk_for_each_bound(sk, node, &tbl->mc_list) { 329 if (i < NLGRPLONGS(nlk_sk(sk)->ngroups)) 330 mask |= nlk_sk(sk)->groups[i]; 331 } 332 tbl->listeners[i] = mask; 333 } 334 /* this function is only called with the netlink table "grabbed", which 335 * makes sure updates are visible before bind or setsockopt return. */ 336 } 337 338 static int netlink_insert(struct sock *sk, struct net *net, u32 pid) 339 { 340 struct nl_pid_hash *hash = &nl_table[sk->sk_protocol].hash; 341 struct hlist_head *head; 342 int err = -EADDRINUSE; 343 struct sock *osk; 344 struct hlist_node *node; 345 int len; 346 347 netlink_table_grab(); 348 head = nl_pid_hashfn(hash, pid); 349 len = 0; 350 sk_for_each(osk, node, head) { 351 if (net_eq(sock_net(osk), net) && (nlk_sk(osk)->pid == pid)) 352 break; 353 len++; 354 } 355 if (node) 356 goto err; 357 358 err = -EBUSY; 359 if (nlk_sk(sk)->pid) 360 goto err; 361 362 err = -ENOMEM; 363 if (BITS_PER_LONG > 32 && unlikely(hash->entries >= UINT_MAX)) 364 goto err; 365 366 if (len && nl_pid_hash_dilute(hash, len)) 367 head = nl_pid_hashfn(hash, pid); 368 hash->entries++; 369 nlk_sk(sk)->pid = pid; 370 sk_add_node(sk, head); 371 err = 0; 372 373 err: 374 netlink_table_ungrab(); 375 return err; 376 } 377 378 static void netlink_remove(struct sock *sk) 379 { 380 netlink_table_grab(); 381 if (sk_del_node_init(sk)) 382 nl_table[sk->sk_protocol].hash.entries--; 383 if (nlk_sk(sk)->subscriptions) 384 __sk_del_bind_node(sk); 385 netlink_table_ungrab(); 386 } 387 388 static struct proto netlink_proto = { 389 .name = "NETLINK", 390 .owner = THIS_MODULE, 391 .obj_size = sizeof(struct netlink_sock), 392 }; 393 394 static int __netlink_create(struct net *net, struct socket *sock, 395 struct mutex *cb_mutex, int protocol) 396 { 397 struct sock *sk; 398 struct netlink_sock *nlk; 399 400 sock->ops = &netlink_ops; 401 402 sk = sk_alloc(net, PF_NETLINK, GFP_KERNEL, &netlink_proto); 403 if (!sk) 404 return -ENOMEM; 405 406 sock_init_data(sock, sk); 407 408 nlk = nlk_sk(sk); 409 if (cb_mutex) 410 nlk->cb_mutex = cb_mutex; 411 else { 412 nlk->cb_mutex = &nlk->cb_def_mutex; 413 mutex_init(nlk->cb_mutex); 414 } 415 init_waitqueue_head(&nlk->wait); 416 417 sk->sk_destruct = netlink_sock_destruct; 418 sk->sk_protocol = protocol; 419 return 0; 420 } 421 422 static int netlink_create(struct net *net, struct socket *sock, int protocol) 423 { 424 struct module *module = NULL; 425 struct mutex *cb_mutex; 426 struct netlink_sock *nlk; 427 int err = 0; 428 429 sock->state = SS_UNCONNECTED; 430 431 if (sock->type != SOCK_RAW && sock->type != SOCK_DGRAM) 432 return -ESOCKTNOSUPPORT; 433 434 if (protocol < 0 || protocol >= MAX_LINKS) 435 return -EPROTONOSUPPORT; 436 437 netlink_lock_table(); 438 #ifdef CONFIG_MODULES 439 if (!nl_table[protocol].registered) { 440 netlink_unlock_table(); 441 request_module("net-pf-%d-proto-%d", PF_NETLINK, protocol); 442 netlink_lock_table(); 443 } 444 #endif 445 if (nl_table[protocol].registered && 446 try_module_get(nl_table[protocol].module)) 447 module = nl_table[protocol].module; 448 cb_mutex = nl_table[protocol].cb_mutex; 449 netlink_unlock_table(); 450 451 err = __netlink_create(net, sock, cb_mutex, protocol); 452 if (err < 0) 453 goto out_module; 454 455 nlk = nlk_sk(sock->sk); 456 nlk->module = module; 457 out: 458 return err; 459 460 out_module: 461 module_put(module); 462 goto out; 463 } 464 465 static int netlink_release(struct socket *sock) 466 { 467 struct sock *sk = sock->sk; 468 struct netlink_sock *nlk; 469 470 if (!sk) 471 return 0; 472 473 netlink_remove(sk); 474 sock_orphan(sk); 475 nlk = nlk_sk(sk); 476 477 /* 478 * OK. Socket is unlinked, any packets that arrive now 479 * will be purged. 480 */ 481 482 sock->sk = NULL; 483 wake_up_interruptible_all(&nlk->wait); 484 485 skb_queue_purge(&sk->sk_write_queue); 486 487 if (nlk->pid && !nlk->subscriptions) { 488 struct netlink_notify n = { 489 .net = sock_net(sk), 490 .protocol = sk->sk_protocol, 491 .pid = nlk->pid, 492 }; 493 atomic_notifier_call_chain(&netlink_chain, 494 NETLINK_URELEASE, &n); 495 } 496 497 module_put(nlk->module); 498 499 netlink_table_grab(); 500 if (netlink_is_kernel(sk)) { 501 BUG_ON(nl_table[sk->sk_protocol].registered == 0); 502 if (--nl_table[sk->sk_protocol].registered == 0) { 503 kfree(nl_table[sk->sk_protocol].listeners); 504 nl_table[sk->sk_protocol].module = NULL; 505 nl_table[sk->sk_protocol].registered = 0; 506 } 507 } else if (nlk->subscriptions) 508 netlink_update_listeners(sk); 509 netlink_table_ungrab(); 510 511 kfree(nlk->groups); 512 nlk->groups = NULL; 513 514 sock_put(sk); 515 return 0; 516 } 517 518 static int netlink_autobind(struct socket *sock) 519 { 520 struct sock *sk = sock->sk; 521 struct net *net = sock_net(sk); 522 struct nl_pid_hash *hash = &nl_table[sk->sk_protocol].hash; 523 struct hlist_head *head; 524 struct sock *osk; 525 struct hlist_node *node; 526 s32 pid = current->tgid; 527 int err; 528 static s32 rover = -4097; 529 530 retry: 531 cond_resched(); 532 netlink_table_grab(); 533 head = nl_pid_hashfn(hash, pid); 534 sk_for_each(osk, node, head) { 535 if (!net_eq(sock_net(osk), net)) 536 continue; 537 if (nlk_sk(osk)->pid == pid) { 538 /* Bind collision, search negative pid values. */ 539 pid = rover--; 540 if (rover > -4097) 541 rover = -4097; 542 netlink_table_ungrab(); 543 goto retry; 544 } 545 } 546 netlink_table_ungrab(); 547 548 err = netlink_insert(sk, net, pid); 549 if (err == -EADDRINUSE) 550 goto retry; 551 552 /* If 2 threads race to autobind, that is fine. */ 553 if (err == -EBUSY) 554 err = 0; 555 556 return err; 557 } 558 559 static inline int netlink_capable(struct socket *sock, unsigned int flag) 560 { 561 return (nl_table[sock->sk->sk_protocol].nl_nonroot & flag) || 562 capable(CAP_NET_ADMIN); 563 } 564 565 static void 566 netlink_update_subscriptions(struct sock *sk, unsigned int subscriptions) 567 { 568 struct netlink_sock *nlk = nlk_sk(sk); 569 570 if (nlk->subscriptions && !subscriptions) 571 __sk_del_bind_node(sk); 572 else if (!nlk->subscriptions && subscriptions) 573 sk_add_bind_node(sk, &nl_table[sk->sk_protocol].mc_list); 574 nlk->subscriptions = subscriptions; 575 } 576 577 static int netlink_realloc_groups(struct sock *sk) 578 { 579 struct netlink_sock *nlk = nlk_sk(sk); 580 unsigned int groups; 581 unsigned long *new_groups; 582 int err = 0; 583 584 netlink_table_grab(); 585 586 groups = nl_table[sk->sk_protocol].groups; 587 if (!nl_table[sk->sk_protocol].registered) { 588 err = -ENOENT; 589 goto out_unlock; 590 } 591 592 if (nlk->ngroups >= groups) 593 goto out_unlock; 594 595 new_groups = krealloc(nlk->groups, NLGRPSZ(groups), GFP_ATOMIC); 596 if (new_groups == NULL) { 597 err = -ENOMEM; 598 goto out_unlock; 599 } 600 memset((char *)new_groups + NLGRPSZ(nlk->ngroups), 0, 601 NLGRPSZ(groups) - NLGRPSZ(nlk->ngroups)); 602 603 nlk->groups = new_groups; 604 nlk->ngroups = groups; 605 out_unlock: 606 netlink_table_ungrab(); 607 return err; 608 } 609 610 static int netlink_bind(struct socket *sock, struct sockaddr *addr, 611 int addr_len) 612 { 613 struct sock *sk = sock->sk; 614 struct net *net = sock_net(sk); 615 struct netlink_sock *nlk = nlk_sk(sk); 616 struct sockaddr_nl *nladdr = (struct sockaddr_nl *)addr; 617 int err; 618 619 if (nladdr->nl_family != AF_NETLINK) 620 return -EINVAL; 621 622 /* Only superuser is allowed to listen multicasts */ 623 if (nladdr->nl_groups) { 624 if (!netlink_capable(sock, NL_NONROOT_RECV)) 625 return -EPERM; 626 err = netlink_realloc_groups(sk); 627 if (err) 628 return err; 629 } 630 631 if (nlk->pid) { 632 if (nladdr->nl_pid != nlk->pid) 633 return -EINVAL; 634 } else { 635 err = nladdr->nl_pid ? 636 netlink_insert(sk, net, nladdr->nl_pid) : 637 netlink_autobind(sock); 638 if (err) 639 return err; 640 } 641 642 if (!nladdr->nl_groups && (nlk->groups == NULL || !(u32)nlk->groups[0])) 643 return 0; 644 645 netlink_table_grab(); 646 netlink_update_subscriptions(sk, nlk->subscriptions + 647 hweight32(nladdr->nl_groups) - 648 hweight32(nlk->groups[0])); 649 nlk->groups[0] = (nlk->groups[0] & ~0xffffffffUL) | nladdr->nl_groups; 650 netlink_update_listeners(sk); 651 netlink_table_ungrab(); 652 653 return 0; 654 } 655 656 static int netlink_connect(struct socket *sock, struct sockaddr *addr, 657 int alen, int flags) 658 { 659 int err = 0; 660 struct sock *sk = sock->sk; 661 struct netlink_sock *nlk = nlk_sk(sk); 662 struct sockaddr_nl *nladdr = (struct sockaddr_nl *)addr; 663 664 if (addr->sa_family == AF_UNSPEC) { 665 sk->sk_state = NETLINK_UNCONNECTED; 666 nlk->dst_pid = 0; 667 nlk->dst_group = 0; 668 return 0; 669 } 670 if (addr->sa_family != AF_NETLINK) 671 return -EINVAL; 672 673 /* Only superuser is allowed to send multicasts */ 674 if (nladdr->nl_groups && !netlink_capable(sock, NL_NONROOT_SEND)) 675 return -EPERM; 676 677 if (!nlk->pid) 678 err = netlink_autobind(sock); 679 680 if (err == 0) { 681 sk->sk_state = NETLINK_CONNECTED; 682 nlk->dst_pid = nladdr->nl_pid; 683 nlk->dst_group = ffs(nladdr->nl_groups); 684 } 685 686 return err; 687 } 688 689 static int netlink_getname(struct socket *sock, struct sockaddr *addr, 690 int *addr_len, int peer) 691 { 692 struct sock *sk = sock->sk; 693 struct netlink_sock *nlk = nlk_sk(sk); 694 struct sockaddr_nl *nladdr = (struct sockaddr_nl *)addr; 695 696 nladdr->nl_family = AF_NETLINK; 697 nladdr->nl_pad = 0; 698 *addr_len = sizeof(*nladdr); 699 700 if (peer) { 701 nladdr->nl_pid = nlk->dst_pid; 702 nladdr->nl_groups = netlink_group_mask(nlk->dst_group); 703 } else { 704 nladdr->nl_pid = nlk->pid; 705 nladdr->nl_groups = nlk->groups ? nlk->groups[0] : 0; 706 } 707 return 0; 708 } 709 710 static void netlink_overrun(struct sock *sk) 711 { 712 if (!test_and_set_bit(0, &nlk_sk(sk)->state)) { 713 sk->sk_err = ENOBUFS; 714 sk->sk_error_report(sk); 715 } 716 } 717 718 static struct sock *netlink_getsockbypid(struct sock *ssk, u32 pid) 719 { 720 struct sock *sock; 721 struct netlink_sock *nlk; 722 723 sock = netlink_lookup(sock_net(ssk), ssk->sk_protocol, pid); 724 if (!sock) 725 return ERR_PTR(-ECONNREFUSED); 726 727 /* Don't bother queuing skb if kernel socket has no input function */ 728 nlk = nlk_sk(sock); 729 if (sock->sk_state == NETLINK_CONNECTED && 730 nlk->dst_pid != nlk_sk(ssk)->pid) { 731 sock_put(sock); 732 return ERR_PTR(-ECONNREFUSED); 733 } 734 return sock; 735 } 736 737 struct sock *netlink_getsockbyfilp(struct file *filp) 738 { 739 struct inode *inode = filp->f_path.dentry->d_inode; 740 struct sock *sock; 741 742 if (!S_ISSOCK(inode->i_mode)) 743 return ERR_PTR(-ENOTSOCK); 744 745 sock = SOCKET_I(inode)->sk; 746 if (sock->sk_family != AF_NETLINK) 747 return ERR_PTR(-EINVAL); 748 749 sock_hold(sock); 750 return sock; 751 } 752 753 /* 754 * Attach a skb to a netlink socket. 755 * The caller must hold a reference to the destination socket. On error, the 756 * reference is dropped. The skb is not send to the destination, just all 757 * all error checks are performed and memory in the queue is reserved. 758 * Return values: 759 * < 0: error. skb freed, reference to sock dropped. 760 * 0: continue 761 * 1: repeat lookup - reference dropped while waiting for socket memory. 762 */ 763 int netlink_attachskb(struct sock *sk, struct sk_buff *skb, 764 long *timeo, struct sock *ssk) 765 { 766 struct netlink_sock *nlk; 767 768 nlk = nlk_sk(sk); 769 770 if (atomic_read(&sk->sk_rmem_alloc) > sk->sk_rcvbuf || 771 test_bit(0, &nlk->state)) { 772 DECLARE_WAITQUEUE(wait, current); 773 if (!*timeo) { 774 if (!ssk || netlink_is_kernel(ssk)) 775 netlink_overrun(sk); 776 sock_put(sk); 777 kfree_skb(skb); 778 return -EAGAIN; 779 } 780 781 __set_current_state(TASK_INTERRUPTIBLE); 782 add_wait_queue(&nlk->wait, &wait); 783 784 if ((atomic_read(&sk->sk_rmem_alloc) > sk->sk_rcvbuf || 785 test_bit(0, &nlk->state)) && 786 !sock_flag(sk, SOCK_DEAD)) 787 *timeo = schedule_timeout(*timeo); 788 789 __set_current_state(TASK_RUNNING); 790 remove_wait_queue(&nlk->wait, &wait); 791 sock_put(sk); 792 793 if (signal_pending(current)) { 794 kfree_skb(skb); 795 return sock_intr_errno(*timeo); 796 } 797 return 1; 798 } 799 skb_set_owner_r(skb, sk); 800 return 0; 801 } 802 803 int netlink_sendskb(struct sock *sk, struct sk_buff *skb) 804 { 805 int len = skb->len; 806 807 skb_queue_tail(&sk->sk_receive_queue, skb); 808 sk->sk_data_ready(sk, len); 809 sock_put(sk); 810 return len; 811 } 812 813 void netlink_detachskb(struct sock *sk, struct sk_buff *skb) 814 { 815 kfree_skb(skb); 816 sock_put(sk); 817 } 818 819 static inline struct sk_buff *netlink_trim(struct sk_buff *skb, 820 gfp_t allocation) 821 { 822 int delta; 823 824 skb_orphan(skb); 825 826 delta = skb->end - skb->tail; 827 if (delta * 2 < skb->truesize) 828 return skb; 829 830 if (skb_shared(skb)) { 831 struct sk_buff *nskb = skb_clone(skb, allocation); 832 if (!nskb) 833 return skb; 834 kfree_skb(skb); 835 skb = nskb; 836 } 837 838 if (!pskb_expand_head(skb, 0, -delta, allocation)) 839 skb->truesize -= delta; 840 841 return skb; 842 } 843 844 static inline void netlink_rcv_wake(struct sock *sk) 845 { 846 struct netlink_sock *nlk = nlk_sk(sk); 847 848 if (skb_queue_empty(&sk->sk_receive_queue)) 849 clear_bit(0, &nlk->state); 850 if (!test_bit(0, &nlk->state)) 851 wake_up_interruptible(&nlk->wait); 852 } 853 854 static inline int netlink_unicast_kernel(struct sock *sk, struct sk_buff *skb) 855 { 856 int ret; 857 struct netlink_sock *nlk = nlk_sk(sk); 858 859 ret = -ECONNREFUSED; 860 if (nlk->netlink_rcv != NULL) { 861 ret = skb->len; 862 skb_set_owner_r(skb, sk); 863 nlk->netlink_rcv(skb); 864 } 865 kfree_skb(skb); 866 sock_put(sk); 867 return ret; 868 } 869 870 int netlink_unicast(struct sock *ssk, struct sk_buff *skb, 871 u32 pid, int nonblock) 872 { 873 struct sock *sk; 874 int err; 875 long timeo; 876 877 skb = netlink_trim(skb, gfp_any()); 878 879 timeo = sock_sndtimeo(ssk, nonblock); 880 retry: 881 sk = netlink_getsockbypid(ssk, pid); 882 if (IS_ERR(sk)) { 883 kfree_skb(skb); 884 return PTR_ERR(sk); 885 } 886 if (netlink_is_kernel(sk)) 887 return netlink_unicast_kernel(sk, skb); 888 889 if (sk_filter(sk, skb)) { 890 err = skb->len; 891 kfree_skb(skb); 892 sock_put(sk); 893 return err; 894 } 895 896 err = netlink_attachskb(sk, skb, &timeo, ssk); 897 if (err == 1) 898 goto retry; 899 if (err) 900 return err; 901 902 return netlink_sendskb(sk, skb); 903 } 904 EXPORT_SYMBOL(netlink_unicast); 905 906 int netlink_has_listeners(struct sock *sk, unsigned int group) 907 { 908 int res = 0; 909 unsigned long *listeners; 910 911 BUG_ON(!netlink_is_kernel(sk)); 912 913 rcu_read_lock(); 914 listeners = rcu_dereference(nl_table[sk->sk_protocol].listeners); 915 916 if (group - 1 < nl_table[sk->sk_protocol].groups) 917 res = test_bit(group - 1, listeners); 918 919 rcu_read_unlock(); 920 921 return res; 922 } 923 EXPORT_SYMBOL_GPL(netlink_has_listeners); 924 925 static inline int netlink_broadcast_deliver(struct sock *sk, 926 struct sk_buff *skb) 927 { 928 struct netlink_sock *nlk = nlk_sk(sk); 929 930 if (atomic_read(&sk->sk_rmem_alloc) <= sk->sk_rcvbuf && 931 !test_bit(0, &nlk->state)) { 932 skb_set_owner_r(skb, sk); 933 skb_queue_tail(&sk->sk_receive_queue, skb); 934 sk->sk_data_ready(sk, skb->len); 935 return atomic_read(&sk->sk_rmem_alloc) > sk->sk_rcvbuf; 936 } 937 return -1; 938 } 939 940 struct netlink_broadcast_data { 941 struct sock *exclude_sk; 942 struct net *net; 943 u32 pid; 944 u32 group; 945 int failure; 946 int congested; 947 int delivered; 948 gfp_t allocation; 949 struct sk_buff *skb, *skb2; 950 }; 951 952 static inline int do_one_broadcast(struct sock *sk, 953 struct netlink_broadcast_data *p) 954 { 955 struct netlink_sock *nlk = nlk_sk(sk); 956 int val; 957 958 if (p->exclude_sk == sk) 959 goto out; 960 961 if (nlk->pid == p->pid || p->group - 1 >= nlk->ngroups || 962 !test_bit(p->group - 1, nlk->groups)) 963 goto out; 964 965 if (!net_eq(sock_net(sk), p->net)) 966 goto out; 967 968 if (p->failure) { 969 netlink_overrun(sk); 970 goto out; 971 } 972 973 sock_hold(sk); 974 if (p->skb2 == NULL) { 975 if (skb_shared(p->skb)) { 976 p->skb2 = skb_clone(p->skb, p->allocation); 977 } else { 978 p->skb2 = skb_get(p->skb); 979 /* 980 * skb ownership may have been set when 981 * delivered to a previous socket. 982 */ 983 skb_orphan(p->skb2); 984 } 985 } 986 if (p->skb2 == NULL) { 987 netlink_overrun(sk); 988 /* Clone failed. Notify ALL listeners. */ 989 p->failure = 1; 990 } else if (sk_filter(sk, p->skb2)) { 991 kfree_skb(p->skb2); 992 p->skb2 = NULL; 993 } else if ((val = netlink_broadcast_deliver(sk, p->skb2)) < 0) { 994 netlink_overrun(sk); 995 } else { 996 p->congested |= val; 997 p->delivered = 1; 998 p->skb2 = NULL; 999 } 1000 sock_put(sk); 1001 1002 out: 1003 return 0; 1004 } 1005 1006 int netlink_broadcast(struct sock *ssk, struct sk_buff *skb, u32 pid, 1007 u32 group, gfp_t allocation) 1008 { 1009 struct net *net = sock_net(ssk); 1010 struct netlink_broadcast_data info; 1011 struct hlist_node *node; 1012 struct sock *sk; 1013 1014 skb = netlink_trim(skb, allocation); 1015 1016 info.exclude_sk = ssk; 1017 info.net = net; 1018 info.pid = pid; 1019 info.group = group; 1020 info.failure = 0; 1021 info.congested = 0; 1022 info.delivered = 0; 1023 info.allocation = allocation; 1024 info.skb = skb; 1025 info.skb2 = NULL; 1026 1027 /* While we sleep in clone, do not allow to change socket list */ 1028 1029 netlink_lock_table(); 1030 1031 sk_for_each_bound(sk, node, &nl_table[ssk->sk_protocol].mc_list) 1032 do_one_broadcast(sk, &info); 1033 1034 kfree_skb(skb); 1035 1036 netlink_unlock_table(); 1037 1038 if (info.skb2) 1039 kfree_skb(info.skb2); 1040 1041 if (info.delivered) { 1042 if (info.congested && (allocation & __GFP_WAIT)) 1043 yield(); 1044 return 0; 1045 } 1046 if (info.failure) 1047 return -ENOBUFS; 1048 return -ESRCH; 1049 } 1050 EXPORT_SYMBOL(netlink_broadcast); 1051 1052 struct netlink_set_err_data { 1053 struct sock *exclude_sk; 1054 u32 pid; 1055 u32 group; 1056 int code; 1057 }; 1058 1059 static inline int do_one_set_err(struct sock *sk, 1060 struct netlink_set_err_data *p) 1061 { 1062 struct netlink_sock *nlk = nlk_sk(sk); 1063 1064 if (sk == p->exclude_sk) 1065 goto out; 1066 1067 if (sock_net(sk) != sock_net(p->exclude_sk)) 1068 goto out; 1069 1070 if (nlk->pid == p->pid || p->group - 1 >= nlk->ngroups || 1071 !test_bit(p->group - 1, nlk->groups)) 1072 goto out; 1073 1074 sk->sk_err = p->code; 1075 sk->sk_error_report(sk); 1076 out: 1077 return 0; 1078 } 1079 1080 void netlink_set_err(struct sock *ssk, u32 pid, u32 group, int code) 1081 { 1082 struct netlink_set_err_data info; 1083 struct hlist_node *node; 1084 struct sock *sk; 1085 1086 info.exclude_sk = ssk; 1087 info.pid = pid; 1088 info.group = group; 1089 info.code = code; 1090 1091 read_lock(&nl_table_lock); 1092 1093 sk_for_each_bound(sk, node, &nl_table[ssk->sk_protocol].mc_list) 1094 do_one_set_err(sk, &info); 1095 1096 read_unlock(&nl_table_lock); 1097 } 1098 1099 /* must be called with netlink table grabbed */ 1100 static void netlink_update_socket_mc(struct netlink_sock *nlk, 1101 unsigned int group, 1102 int is_new) 1103 { 1104 int old, new = !!is_new, subscriptions; 1105 1106 old = test_bit(group - 1, nlk->groups); 1107 subscriptions = nlk->subscriptions - old + new; 1108 if (new) 1109 __set_bit(group - 1, nlk->groups); 1110 else 1111 __clear_bit(group - 1, nlk->groups); 1112 netlink_update_subscriptions(&nlk->sk, subscriptions); 1113 netlink_update_listeners(&nlk->sk); 1114 } 1115 1116 static int netlink_setsockopt(struct socket *sock, int level, int optname, 1117 char __user *optval, int optlen) 1118 { 1119 struct sock *sk = sock->sk; 1120 struct netlink_sock *nlk = nlk_sk(sk); 1121 unsigned int val = 0; 1122 int err; 1123 1124 if (level != SOL_NETLINK) 1125 return -ENOPROTOOPT; 1126 1127 if (optlen >= sizeof(int) && 1128 get_user(val, (unsigned int __user *)optval)) 1129 return -EFAULT; 1130 1131 switch (optname) { 1132 case NETLINK_PKTINFO: 1133 if (val) 1134 nlk->flags |= NETLINK_RECV_PKTINFO; 1135 else 1136 nlk->flags &= ~NETLINK_RECV_PKTINFO; 1137 err = 0; 1138 break; 1139 case NETLINK_ADD_MEMBERSHIP: 1140 case NETLINK_DROP_MEMBERSHIP: { 1141 if (!netlink_capable(sock, NL_NONROOT_RECV)) 1142 return -EPERM; 1143 err = netlink_realloc_groups(sk); 1144 if (err) 1145 return err; 1146 if (!val || val - 1 >= nlk->ngroups) 1147 return -EINVAL; 1148 netlink_table_grab(); 1149 netlink_update_socket_mc(nlk, val, 1150 optname == NETLINK_ADD_MEMBERSHIP); 1151 netlink_table_ungrab(); 1152 err = 0; 1153 break; 1154 } 1155 default: 1156 err = -ENOPROTOOPT; 1157 } 1158 return err; 1159 } 1160 1161 static int netlink_getsockopt(struct socket *sock, int level, int optname, 1162 char __user *optval, int __user *optlen) 1163 { 1164 struct sock *sk = sock->sk; 1165 struct netlink_sock *nlk = nlk_sk(sk); 1166 int len, val, err; 1167 1168 if (level != SOL_NETLINK) 1169 return -ENOPROTOOPT; 1170 1171 if (get_user(len, optlen)) 1172 return -EFAULT; 1173 if (len < 0) 1174 return -EINVAL; 1175 1176 switch (optname) { 1177 case NETLINK_PKTINFO: 1178 if (len < sizeof(int)) 1179 return -EINVAL; 1180 len = sizeof(int); 1181 val = nlk->flags & NETLINK_RECV_PKTINFO ? 1 : 0; 1182 if (put_user(len, optlen) || 1183 put_user(val, optval)) 1184 return -EFAULT; 1185 err = 0; 1186 break; 1187 default: 1188 err = -ENOPROTOOPT; 1189 } 1190 return err; 1191 } 1192 1193 static void netlink_cmsg_recv_pktinfo(struct msghdr *msg, struct sk_buff *skb) 1194 { 1195 struct nl_pktinfo info; 1196 1197 info.group = NETLINK_CB(skb).dst_group; 1198 put_cmsg(msg, SOL_NETLINK, NETLINK_PKTINFO, sizeof(info), &info); 1199 } 1200 1201 static int netlink_sendmsg(struct kiocb *kiocb, struct socket *sock, 1202 struct msghdr *msg, size_t len) 1203 { 1204 struct sock_iocb *siocb = kiocb_to_siocb(kiocb); 1205 struct sock *sk = sock->sk; 1206 struct netlink_sock *nlk = nlk_sk(sk); 1207 struct sockaddr_nl *addr = msg->msg_name; 1208 u32 dst_pid; 1209 u32 dst_group; 1210 struct sk_buff *skb; 1211 int err; 1212 struct scm_cookie scm; 1213 1214 if (msg->msg_flags&MSG_OOB) 1215 return -EOPNOTSUPP; 1216 1217 if (NULL == siocb->scm) 1218 siocb->scm = &scm; 1219 err = scm_send(sock, msg, siocb->scm); 1220 if (err < 0) 1221 return err; 1222 1223 if (msg->msg_namelen) { 1224 if (addr->nl_family != AF_NETLINK) 1225 return -EINVAL; 1226 dst_pid = addr->nl_pid; 1227 dst_group = ffs(addr->nl_groups); 1228 if (dst_group && !netlink_capable(sock, NL_NONROOT_SEND)) 1229 return -EPERM; 1230 } else { 1231 dst_pid = nlk->dst_pid; 1232 dst_group = nlk->dst_group; 1233 } 1234 1235 if (!nlk->pid) { 1236 err = netlink_autobind(sock); 1237 if (err) 1238 goto out; 1239 } 1240 1241 err = -EMSGSIZE; 1242 if (len > sk->sk_sndbuf - 32) 1243 goto out; 1244 err = -ENOBUFS; 1245 skb = alloc_skb(len, GFP_KERNEL); 1246 if (skb == NULL) 1247 goto out; 1248 1249 NETLINK_CB(skb).pid = nlk->pid; 1250 NETLINK_CB(skb).dst_group = dst_group; 1251 NETLINK_CB(skb).loginuid = audit_get_loginuid(current); 1252 NETLINK_CB(skb).sessionid = audit_get_sessionid(current); 1253 security_task_getsecid(current, &(NETLINK_CB(skb).sid)); 1254 memcpy(NETLINK_CREDS(skb), &siocb->scm->creds, sizeof(struct ucred)); 1255 1256 /* What can I do? Netlink is asynchronous, so that 1257 we will have to save current capabilities to 1258 check them, when this message will be delivered 1259 to corresponding kernel module. --ANK (980802) 1260 */ 1261 1262 err = -EFAULT; 1263 if (memcpy_fromiovec(skb_put(skb, len), msg->msg_iov, len)) { 1264 kfree_skb(skb); 1265 goto out; 1266 } 1267 1268 err = security_netlink_send(sk, skb); 1269 if (err) { 1270 kfree_skb(skb); 1271 goto out; 1272 } 1273 1274 if (dst_group) { 1275 atomic_inc(&skb->users); 1276 netlink_broadcast(sk, skb, dst_pid, dst_group, GFP_KERNEL); 1277 } 1278 err = netlink_unicast(sk, skb, dst_pid, msg->msg_flags&MSG_DONTWAIT); 1279 1280 out: 1281 return err; 1282 } 1283 1284 static int netlink_recvmsg(struct kiocb *kiocb, struct socket *sock, 1285 struct msghdr *msg, size_t len, 1286 int flags) 1287 { 1288 struct sock_iocb *siocb = kiocb_to_siocb(kiocb); 1289 struct scm_cookie scm; 1290 struct sock *sk = sock->sk; 1291 struct netlink_sock *nlk = nlk_sk(sk); 1292 int noblock = flags&MSG_DONTWAIT; 1293 size_t copied; 1294 struct sk_buff *skb; 1295 int err; 1296 1297 if (flags&MSG_OOB) 1298 return -EOPNOTSUPP; 1299 1300 copied = 0; 1301 1302 skb = skb_recv_datagram(sk, flags, noblock, &err); 1303 if (skb == NULL) 1304 goto out; 1305 1306 msg->msg_namelen = 0; 1307 1308 copied = skb->len; 1309 if (len < copied) { 1310 msg->msg_flags |= MSG_TRUNC; 1311 copied = len; 1312 } 1313 1314 skb_reset_transport_header(skb); 1315 err = skb_copy_datagram_iovec(skb, 0, msg->msg_iov, copied); 1316 1317 if (msg->msg_name) { 1318 struct sockaddr_nl *addr = (struct sockaddr_nl *)msg->msg_name; 1319 addr->nl_family = AF_NETLINK; 1320 addr->nl_pad = 0; 1321 addr->nl_pid = NETLINK_CB(skb).pid; 1322 addr->nl_groups = netlink_group_mask(NETLINK_CB(skb).dst_group); 1323 msg->msg_namelen = sizeof(*addr); 1324 } 1325 1326 if (nlk->flags & NETLINK_RECV_PKTINFO) 1327 netlink_cmsg_recv_pktinfo(msg, skb); 1328 1329 if (NULL == siocb->scm) { 1330 memset(&scm, 0, sizeof(scm)); 1331 siocb->scm = &scm; 1332 } 1333 siocb->scm->creds = *NETLINK_CREDS(skb); 1334 if (flags & MSG_TRUNC) 1335 copied = skb->len; 1336 skb_free_datagram(sk, skb); 1337 1338 if (nlk->cb && atomic_read(&sk->sk_rmem_alloc) <= sk->sk_rcvbuf / 2) 1339 netlink_dump(sk); 1340 1341 scm_recv(sock, msg, siocb->scm, flags); 1342 out: 1343 netlink_rcv_wake(sk); 1344 return err ? : copied; 1345 } 1346 1347 static void netlink_data_ready(struct sock *sk, int len) 1348 { 1349 BUG(); 1350 } 1351 1352 /* 1353 * We export these functions to other modules. They provide a 1354 * complete set of kernel non-blocking support for message 1355 * queueing. 1356 */ 1357 1358 struct sock * 1359 netlink_kernel_create(struct net *net, int unit, unsigned int groups, 1360 void (*input)(struct sk_buff *skb), 1361 struct mutex *cb_mutex, struct module *module) 1362 { 1363 struct socket *sock; 1364 struct sock *sk; 1365 struct netlink_sock *nlk; 1366 unsigned long *listeners = NULL; 1367 1368 BUG_ON(!nl_table); 1369 1370 if (unit < 0 || unit >= MAX_LINKS) 1371 return NULL; 1372 1373 if (sock_create_lite(PF_NETLINK, SOCK_DGRAM, unit, &sock)) 1374 return NULL; 1375 1376 /* 1377 * We have to just have a reference on the net from sk, but don't 1378 * get_net it. Besides, we cannot get and then put the net here. 1379 * So we create one inside init_net and the move it to net. 1380 */ 1381 1382 if (__netlink_create(&init_net, sock, cb_mutex, unit) < 0) 1383 goto out_sock_release_nosk; 1384 1385 sk = sock->sk; 1386 sk_change_net(sk, net); 1387 1388 if (groups < 32) 1389 groups = 32; 1390 1391 listeners = kzalloc(NLGRPSZ(groups), GFP_KERNEL); 1392 if (!listeners) 1393 goto out_sock_release; 1394 1395 sk->sk_data_ready = netlink_data_ready; 1396 if (input) 1397 nlk_sk(sk)->netlink_rcv = input; 1398 1399 if (netlink_insert(sk, net, 0)) 1400 goto out_sock_release; 1401 1402 nlk = nlk_sk(sk); 1403 nlk->flags |= NETLINK_KERNEL_SOCKET; 1404 1405 netlink_table_grab(); 1406 if (!nl_table[unit].registered) { 1407 nl_table[unit].groups = groups; 1408 nl_table[unit].listeners = listeners; 1409 nl_table[unit].cb_mutex = cb_mutex; 1410 nl_table[unit].module = module; 1411 nl_table[unit].registered = 1; 1412 } else { 1413 kfree(listeners); 1414 nl_table[unit].registered++; 1415 } 1416 netlink_table_ungrab(); 1417 return sk; 1418 1419 out_sock_release: 1420 kfree(listeners); 1421 netlink_kernel_release(sk); 1422 return NULL; 1423 1424 out_sock_release_nosk: 1425 sock_release(sock); 1426 return NULL; 1427 } 1428 EXPORT_SYMBOL(netlink_kernel_create); 1429 1430 1431 void 1432 netlink_kernel_release(struct sock *sk) 1433 { 1434 sk_release_kernel(sk); 1435 } 1436 EXPORT_SYMBOL(netlink_kernel_release); 1437 1438 1439 /** 1440 * netlink_change_ngroups - change number of multicast groups 1441 * 1442 * This changes the number of multicast groups that are available 1443 * on a certain netlink family. Note that it is not possible to 1444 * change the number of groups to below 32. Also note that it does 1445 * not implicitly call netlink_clear_multicast_users() when the 1446 * number of groups is reduced. 1447 * 1448 * @sk: The kernel netlink socket, as returned by netlink_kernel_create(). 1449 * @groups: The new number of groups. 1450 */ 1451 int netlink_change_ngroups(struct sock *sk, unsigned int groups) 1452 { 1453 unsigned long *listeners, *old = NULL; 1454 struct netlink_table *tbl = &nl_table[sk->sk_protocol]; 1455 int err = 0; 1456 1457 if (groups < 32) 1458 groups = 32; 1459 1460 netlink_table_grab(); 1461 if (NLGRPSZ(tbl->groups) < NLGRPSZ(groups)) { 1462 listeners = kzalloc(NLGRPSZ(groups), GFP_ATOMIC); 1463 if (!listeners) { 1464 err = -ENOMEM; 1465 goto out_ungrab; 1466 } 1467 old = tbl->listeners; 1468 memcpy(listeners, old, NLGRPSZ(tbl->groups)); 1469 rcu_assign_pointer(tbl->listeners, listeners); 1470 } 1471 tbl->groups = groups; 1472 1473 out_ungrab: 1474 netlink_table_ungrab(); 1475 synchronize_rcu(); 1476 kfree(old); 1477 return err; 1478 } 1479 EXPORT_SYMBOL(netlink_change_ngroups); 1480 1481 /** 1482 * netlink_clear_multicast_users - kick off multicast listeners 1483 * 1484 * This function removes all listeners from the given group. 1485 * @ksk: The kernel netlink socket, as returned by 1486 * netlink_kernel_create(). 1487 * @group: The multicast group to clear. 1488 */ 1489 void netlink_clear_multicast_users(struct sock *ksk, unsigned int group) 1490 { 1491 struct sock *sk; 1492 struct hlist_node *node; 1493 struct netlink_table *tbl = &nl_table[ksk->sk_protocol]; 1494 1495 netlink_table_grab(); 1496 1497 sk_for_each_bound(sk, node, &tbl->mc_list) 1498 netlink_update_socket_mc(nlk_sk(sk), group, 0); 1499 1500 netlink_table_ungrab(); 1501 } 1502 EXPORT_SYMBOL(netlink_clear_multicast_users); 1503 1504 void netlink_set_nonroot(int protocol, unsigned int flags) 1505 { 1506 if ((unsigned int)protocol < MAX_LINKS) 1507 nl_table[protocol].nl_nonroot = flags; 1508 } 1509 EXPORT_SYMBOL(netlink_set_nonroot); 1510 1511 static void netlink_destroy_callback(struct netlink_callback *cb) 1512 { 1513 if (cb->skb) 1514 kfree_skb(cb->skb); 1515 kfree(cb); 1516 } 1517 1518 /* 1519 * It looks a bit ugly. 1520 * It would be better to create kernel thread. 1521 */ 1522 1523 static int netlink_dump(struct sock *sk) 1524 { 1525 struct netlink_sock *nlk = nlk_sk(sk); 1526 struct netlink_callback *cb; 1527 struct sk_buff *skb; 1528 struct nlmsghdr *nlh; 1529 int len, err = -ENOBUFS; 1530 1531 skb = sock_rmalloc(sk, NLMSG_GOODSIZE, 0, GFP_KERNEL); 1532 if (!skb) 1533 goto errout; 1534 1535 mutex_lock(nlk->cb_mutex); 1536 1537 cb = nlk->cb; 1538 if (cb == NULL) { 1539 err = -EINVAL; 1540 goto errout_skb; 1541 } 1542 1543 len = cb->dump(skb, cb); 1544 1545 if (len > 0) { 1546 mutex_unlock(nlk->cb_mutex); 1547 1548 if (sk_filter(sk, skb)) 1549 kfree_skb(skb); 1550 else { 1551 skb_queue_tail(&sk->sk_receive_queue, skb); 1552 sk->sk_data_ready(sk, skb->len); 1553 } 1554 return 0; 1555 } 1556 1557 nlh = nlmsg_put_answer(skb, cb, NLMSG_DONE, sizeof(len), NLM_F_MULTI); 1558 if (!nlh) 1559 goto errout_skb; 1560 1561 memcpy(nlmsg_data(nlh), &len, sizeof(len)); 1562 1563 if (sk_filter(sk, skb)) 1564 kfree_skb(skb); 1565 else { 1566 skb_queue_tail(&sk->sk_receive_queue, skb); 1567 sk->sk_data_ready(sk, skb->len); 1568 } 1569 1570 if (cb->done) 1571 cb->done(cb); 1572 nlk->cb = NULL; 1573 mutex_unlock(nlk->cb_mutex); 1574 1575 netlink_destroy_callback(cb); 1576 return 0; 1577 1578 errout_skb: 1579 mutex_unlock(nlk->cb_mutex); 1580 kfree_skb(skb); 1581 errout: 1582 return err; 1583 } 1584 1585 int netlink_dump_start(struct sock *ssk, struct sk_buff *skb, 1586 struct nlmsghdr *nlh, 1587 int (*dump)(struct sk_buff *skb, 1588 struct netlink_callback *), 1589 int (*done)(struct netlink_callback *)) 1590 { 1591 struct netlink_callback *cb; 1592 struct sock *sk; 1593 struct netlink_sock *nlk; 1594 1595 cb = kzalloc(sizeof(*cb), GFP_KERNEL); 1596 if (cb == NULL) 1597 return -ENOBUFS; 1598 1599 cb->dump = dump; 1600 cb->done = done; 1601 cb->nlh = nlh; 1602 atomic_inc(&skb->users); 1603 cb->skb = skb; 1604 1605 sk = netlink_lookup(sock_net(ssk), ssk->sk_protocol, NETLINK_CB(skb).pid); 1606 if (sk == NULL) { 1607 netlink_destroy_callback(cb); 1608 return -ECONNREFUSED; 1609 } 1610 nlk = nlk_sk(sk); 1611 /* A dump is in progress... */ 1612 mutex_lock(nlk->cb_mutex); 1613 if (nlk->cb) { 1614 mutex_unlock(nlk->cb_mutex); 1615 netlink_destroy_callback(cb); 1616 sock_put(sk); 1617 return -EBUSY; 1618 } 1619 nlk->cb = cb; 1620 mutex_unlock(nlk->cb_mutex); 1621 1622 netlink_dump(sk); 1623 sock_put(sk); 1624 1625 /* We successfully started a dump, by returning -EINTR we 1626 * signal not to send ACK even if it was requested. 1627 */ 1628 return -EINTR; 1629 } 1630 EXPORT_SYMBOL(netlink_dump_start); 1631 1632 void netlink_ack(struct sk_buff *in_skb, struct nlmsghdr *nlh, int err) 1633 { 1634 struct sk_buff *skb; 1635 struct nlmsghdr *rep; 1636 struct nlmsgerr *errmsg; 1637 size_t payload = sizeof(*errmsg); 1638 1639 /* error messages get the original request appened */ 1640 if (err) 1641 payload += nlmsg_len(nlh); 1642 1643 skb = nlmsg_new(payload, GFP_KERNEL); 1644 if (!skb) { 1645 struct sock *sk; 1646 1647 sk = netlink_lookup(sock_net(in_skb->sk), 1648 in_skb->sk->sk_protocol, 1649 NETLINK_CB(in_skb).pid); 1650 if (sk) { 1651 sk->sk_err = ENOBUFS; 1652 sk->sk_error_report(sk); 1653 sock_put(sk); 1654 } 1655 return; 1656 } 1657 1658 rep = __nlmsg_put(skb, NETLINK_CB(in_skb).pid, nlh->nlmsg_seq, 1659 NLMSG_ERROR, sizeof(struct nlmsgerr), 0); 1660 errmsg = nlmsg_data(rep); 1661 errmsg->error = err; 1662 memcpy(&errmsg->msg, nlh, err ? nlh->nlmsg_len : sizeof(*nlh)); 1663 netlink_unicast(in_skb->sk, skb, NETLINK_CB(in_skb).pid, MSG_DONTWAIT); 1664 } 1665 EXPORT_SYMBOL(netlink_ack); 1666 1667 int netlink_rcv_skb(struct sk_buff *skb, int (*cb)(struct sk_buff *, 1668 struct nlmsghdr *)) 1669 { 1670 struct nlmsghdr *nlh; 1671 int err; 1672 1673 while (skb->len >= nlmsg_total_size(0)) { 1674 int msglen; 1675 1676 nlh = nlmsg_hdr(skb); 1677 err = 0; 1678 1679 if (nlh->nlmsg_len < NLMSG_HDRLEN || skb->len < nlh->nlmsg_len) 1680 return 0; 1681 1682 /* Only requests are handled by the kernel */ 1683 if (!(nlh->nlmsg_flags & NLM_F_REQUEST)) 1684 goto ack; 1685 1686 /* Skip control messages */ 1687 if (nlh->nlmsg_type < NLMSG_MIN_TYPE) 1688 goto ack; 1689 1690 err = cb(skb, nlh); 1691 if (err == -EINTR) 1692 goto skip; 1693 1694 ack: 1695 if (nlh->nlmsg_flags & NLM_F_ACK || err) 1696 netlink_ack(skb, nlh, err); 1697 1698 skip: 1699 msglen = NLMSG_ALIGN(nlh->nlmsg_len); 1700 if (msglen > skb->len) 1701 msglen = skb->len; 1702 skb_pull(skb, msglen); 1703 } 1704 1705 return 0; 1706 } 1707 EXPORT_SYMBOL(netlink_rcv_skb); 1708 1709 /** 1710 * nlmsg_notify - send a notification netlink message 1711 * @sk: netlink socket to use 1712 * @skb: notification message 1713 * @pid: destination netlink pid for reports or 0 1714 * @group: destination multicast group or 0 1715 * @report: 1 to report back, 0 to disable 1716 * @flags: allocation flags 1717 */ 1718 int nlmsg_notify(struct sock *sk, struct sk_buff *skb, u32 pid, 1719 unsigned int group, int report, gfp_t flags) 1720 { 1721 int err = 0; 1722 1723 if (group) { 1724 int exclude_pid = 0; 1725 1726 if (report) { 1727 atomic_inc(&skb->users); 1728 exclude_pid = pid; 1729 } 1730 1731 /* errors reported via destination sk->sk_err */ 1732 nlmsg_multicast(sk, skb, exclude_pid, group, flags); 1733 } 1734 1735 if (report) 1736 err = nlmsg_unicast(sk, skb, pid); 1737 1738 return err; 1739 } 1740 EXPORT_SYMBOL(nlmsg_notify); 1741 1742 #ifdef CONFIG_PROC_FS 1743 struct nl_seq_iter { 1744 struct seq_net_private p; 1745 int link; 1746 int hash_idx; 1747 }; 1748 1749 static struct sock *netlink_seq_socket_idx(struct seq_file *seq, loff_t pos) 1750 { 1751 struct nl_seq_iter *iter = seq->private; 1752 int i, j; 1753 struct sock *s; 1754 struct hlist_node *node; 1755 loff_t off = 0; 1756 1757 for (i = 0; i < MAX_LINKS; i++) { 1758 struct nl_pid_hash *hash = &nl_table[i].hash; 1759 1760 for (j = 0; j <= hash->mask; j++) { 1761 sk_for_each(s, node, &hash->table[j]) { 1762 if (sock_net(s) != seq_file_net(seq)) 1763 continue; 1764 if (off == pos) { 1765 iter->link = i; 1766 iter->hash_idx = j; 1767 return s; 1768 } 1769 ++off; 1770 } 1771 } 1772 } 1773 return NULL; 1774 } 1775 1776 static void *netlink_seq_start(struct seq_file *seq, loff_t *pos) 1777 __acquires(nl_table_lock) 1778 { 1779 read_lock(&nl_table_lock); 1780 return *pos ? netlink_seq_socket_idx(seq, *pos - 1) : SEQ_START_TOKEN; 1781 } 1782 1783 static void *netlink_seq_next(struct seq_file *seq, void *v, loff_t *pos) 1784 { 1785 struct sock *s; 1786 struct nl_seq_iter *iter; 1787 int i, j; 1788 1789 ++*pos; 1790 1791 if (v == SEQ_START_TOKEN) 1792 return netlink_seq_socket_idx(seq, 0); 1793 1794 iter = seq->private; 1795 s = v; 1796 do { 1797 s = sk_next(s); 1798 } while (s && sock_net(s) != seq_file_net(seq)); 1799 if (s) 1800 return s; 1801 1802 i = iter->link; 1803 j = iter->hash_idx + 1; 1804 1805 do { 1806 struct nl_pid_hash *hash = &nl_table[i].hash; 1807 1808 for (; j <= hash->mask; j++) { 1809 s = sk_head(&hash->table[j]); 1810 while (s && sock_net(s) != seq_file_net(seq)) 1811 s = sk_next(s); 1812 if (s) { 1813 iter->link = i; 1814 iter->hash_idx = j; 1815 return s; 1816 } 1817 } 1818 1819 j = 0; 1820 } while (++i < MAX_LINKS); 1821 1822 return NULL; 1823 } 1824 1825 static void netlink_seq_stop(struct seq_file *seq, void *v) 1826 __releases(nl_table_lock) 1827 { 1828 read_unlock(&nl_table_lock); 1829 } 1830 1831 1832 static int netlink_seq_show(struct seq_file *seq, void *v) 1833 { 1834 if (v == SEQ_START_TOKEN) 1835 seq_puts(seq, 1836 "sk Eth Pid Groups " 1837 "Rmem Wmem Dump Locks\n"); 1838 else { 1839 struct sock *s = v; 1840 struct netlink_sock *nlk = nlk_sk(s); 1841 1842 seq_printf(seq, "%p %-3d %-6d %08x %-8d %-8d %p %d\n", 1843 s, 1844 s->sk_protocol, 1845 nlk->pid, 1846 nlk->groups ? (u32)nlk->groups[0] : 0, 1847 atomic_read(&s->sk_rmem_alloc), 1848 atomic_read(&s->sk_wmem_alloc), 1849 nlk->cb, 1850 atomic_read(&s->sk_refcnt) 1851 ); 1852 1853 } 1854 return 0; 1855 } 1856 1857 static const struct seq_operations netlink_seq_ops = { 1858 .start = netlink_seq_start, 1859 .next = netlink_seq_next, 1860 .stop = netlink_seq_stop, 1861 .show = netlink_seq_show, 1862 }; 1863 1864 1865 static int netlink_seq_open(struct inode *inode, struct file *file) 1866 { 1867 return seq_open_net(inode, file, &netlink_seq_ops, 1868 sizeof(struct nl_seq_iter)); 1869 } 1870 1871 static const struct file_operations netlink_seq_fops = { 1872 .owner = THIS_MODULE, 1873 .open = netlink_seq_open, 1874 .read = seq_read, 1875 .llseek = seq_lseek, 1876 .release = seq_release_net, 1877 }; 1878 1879 #endif 1880 1881 int netlink_register_notifier(struct notifier_block *nb) 1882 { 1883 return atomic_notifier_chain_register(&netlink_chain, nb); 1884 } 1885 EXPORT_SYMBOL(netlink_register_notifier); 1886 1887 int netlink_unregister_notifier(struct notifier_block *nb) 1888 { 1889 return atomic_notifier_chain_unregister(&netlink_chain, nb); 1890 } 1891 EXPORT_SYMBOL(netlink_unregister_notifier); 1892 1893 static const struct proto_ops netlink_ops = { 1894 .family = PF_NETLINK, 1895 .owner = THIS_MODULE, 1896 .release = netlink_release, 1897 .bind = netlink_bind, 1898 .connect = netlink_connect, 1899 .socketpair = sock_no_socketpair, 1900 .accept = sock_no_accept, 1901 .getname = netlink_getname, 1902 .poll = datagram_poll, 1903 .ioctl = sock_no_ioctl, 1904 .listen = sock_no_listen, 1905 .shutdown = sock_no_shutdown, 1906 .setsockopt = netlink_setsockopt, 1907 .getsockopt = netlink_getsockopt, 1908 .sendmsg = netlink_sendmsg, 1909 .recvmsg = netlink_recvmsg, 1910 .mmap = sock_no_mmap, 1911 .sendpage = sock_no_sendpage, 1912 }; 1913 1914 static struct net_proto_family netlink_family_ops = { 1915 .family = PF_NETLINK, 1916 .create = netlink_create, 1917 .owner = THIS_MODULE, /* for consistency 8) */ 1918 }; 1919 1920 static int __net_init netlink_net_init(struct net *net) 1921 { 1922 #ifdef CONFIG_PROC_FS 1923 if (!proc_net_fops_create(net, "netlink", 0, &netlink_seq_fops)) 1924 return -ENOMEM; 1925 #endif 1926 return 0; 1927 } 1928 1929 static void __net_exit netlink_net_exit(struct net *net) 1930 { 1931 #ifdef CONFIG_PROC_FS 1932 proc_net_remove(net, "netlink"); 1933 #endif 1934 } 1935 1936 static struct pernet_operations __net_initdata netlink_net_ops = { 1937 .init = netlink_net_init, 1938 .exit = netlink_net_exit, 1939 }; 1940 1941 static int __init netlink_proto_init(void) 1942 { 1943 struct sk_buff *dummy_skb; 1944 int i; 1945 unsigned long limit; 1946 unsigned int order; 1947 int err = proto_register(&netlink_proto, 0); 1948 1949 if (err != 0) 1950 goto out; 1951 1952 BUILD_BUG_ON(sizeof(struct netlink_skb_parms) > sizeof(dummy_skb->cb)); 1953 1954 nl_table = kcalloc(MAX_LINKS, sizeof(*nl_table), GFP_KERNEL); 1955 if (!nl_table) 1956 goto panic; 1957 1958 if (num_physpages >= (128 * 1024)) 1959 limit = num_physpages >> (21 - PAGE_SHIFT); 1960 else 1961 limit = num_physpages >> (23 - PAGE_SHIFT); 1962 1963 order = get_bitmask_order(limit) - 1 + PAGE_SHIFT; 1964 limit = (1UL << order) / sizeof(struct hlist_head); 1965 order = get_bitmask_order(min(limit, (unsigned long)UINT_MAX)) - 1; 1966 1967 for (i = 0; i < MAX_LINKS; i++) { 1968 struct nl_pid_hash *hash = &nl_table[i].hash; 1969 1970 hash->table = nl_pid_hash_zalloc(1 * sizeof(*hash->table)); 1971 if (!hash->table) { 1972 while (i-- > 0) 1973 nl_pid_hash_free(nl_table[i].hash.table, 1974 1 * sizeof(*hash->table)); 1975 kfree(nl_table); 1976 goto panic; 1977 } 1978 hash->max_shift = order; 1979 hash->shift = 0; 1980 hash->mask = 0; 1981 hash->rehash_time = jiffies; 1982 } 1983 1984 sock_register(&netlink_family_ops); 1985 register_pernet_subsys(&netlink_net_ops); 1986 /* The netlink device handler may be needed early. */ 1987 rtnetlink_init(); 1988 out: 1989 return err; 1990 panic: 1991 panic("netlink_init: Cannot allocate nl_table\n"); 1992 } 1993 1994 core_initcall(netlink_proto_init); 1995