1 /* 2 * NETLINK Kernel-user communication protocol. 3 * 4 * Authors: Alan Cox <alan@lxorguk.ukuu.org.uk> 5 * Alexey Kuznetsov <kuznet@ms2.inr.ac.ru> 6 * 7 * This program is free software; you can redistribute it and/or 8 * modify it under the terms of the GNU General Public License 9 * as published by the Free Software Foundation; either version 10 * 2 of the License, or (at your option) any later version. 11 * 12 * Tue Jun 26 14:36:48 MEST 2001 Herbert "herp" Rosmanith 13 * added netlink_proto_exit 14 * Tue Jan 22 18:32:44 BRST 2002 Arnaldo C. de Melo <acme@conectiva.com.br> 15 * use nlk_sk, as sk->protinfo is on a diet 8) 16 * Fri Jul 22 19:51:12 MEST 2005 Harald Welte <laforge@gnumonks.org> 17 * - inc module use count of module that owns 18 * the kernel socket in case userspace opens 19 * socket of same protocol 20 * - remove all module support, since netlink is 21 * mandatory if CONFIG_NET=y these days 22 */ 23 24 #include <linux/module.h> 25 26 #include <linux/capability.h> 27 #include <linux/kernel.h> 28 #include <linux/init.h> 29 #include <linux/signal.h> 30 #include <linux/sched.h> 31 #include <linux/errno.h> 32 #include <linux/string.h> 33 #include <linux/stat.h> 34 #include <linux/socket.h> 35 #include <linux/un.h> 36 #include <linux/fcntl.h> 37 #include <linux/termios.h> 38 #include <linux/sockios.h> 39 #include <linux/net.h> 40 #include <linux/fs.h> 41 #include <linux/slab.h> 42 #include <asm/uaccess.h> 43 #include <linux/skbuff.h> 44 #include <linux/netdevice.h> 45 #include <linux/rtnetlink.h> 46 #include <linux/proc_fs.h> 47 #include <linux/seq_file.h> 48 #include <linux/notifier.h> 49 #include <linux/security.h> 50 #include <linux/jhash.h> 51 #include <linux/jiffies.h> 52 #include <linux/random.h> 53 #include <linux/bitops.h> 54 #include <linux/mm.h> 55 #include <linux/types.h> 56 #include <linux/audit.h> 57 #include <linux/mutex.h> 58 59 #include <net/net_namespace.h> 60 #include <net/sock.h> 61 #include <net/scm.h> 62 #include <net/netlink.h> 63 64 #define NLGRPSZ(x) (ALIGN(x, sizeof(unsigned long) * 8) / 8) 65 #define NLGRPLONGS(x) (NLGRPSZ(x)/sizeof(unsigned long)) 66 67 struct netlink_sock { 68 /* struct sock has to be the first member of netlink_sock */ 69 struct sock sk; 70 u32 pid; 71 u32 dst_pid; 72 u32 dst_group; 73 u32 flags; 74 u32 subscriptions; 75 u32 ngroups; 76 unsigned long *groups; 77 unsigned long state; 78 wait_queue_head_t wait; 79 struct netlink_callback *cb; 80 struct mutex *cb_mutex; 81 struct mutex cb_def_mutex; 82 void (*netlink_rcv)(struct sk_buff *skb); 83 struct module *module; 84 }; 85 86 #define NETLINK_KERNEL_SOCKET 0x1 87 #define NETLINK_RECV_PKTINFO 0x2 88 89 static inline struct netlink_sock *nlk_sk(struct sock *sk) 90 { 91 return container_of(sk, struct netlink_sock, sk); 92 } 93 94 static inline int netlink_is_kernel(struct sock *sk) 95 { 96 return nlk_sk(sk)->flags & NETLINK_KERNEL_SOCKET; 97 } 98 99 struct nl_pid_hash { 100 struct hlist_head *table; 101 unsigned long rehash_time; 102 103 unsigned int mask; 104 unsigned int shift; 105 106 unsigned int entries; 107 unsigned int max_shift; 108 109 u32 rnd; 110 }; 111 112 struct netlink_table { 113 struct nl_pid_hash hash; 114 struct hlist_head mc_list; 115 unsigned long *listeners; 116 unsigned int nl_nonroot; 117 unsigned int groups; 118 struct mutex *cb_mutex; 119 struct module *module; 120 int registered; 121 }; 122 123 static struct netlink_table *nl_table; 124 125 static DECLARE_WAIT_QUEUE_HEAD(nl_table_wait); 126 127 static int netlink_dump(struct sock *sk); 128 static void netlink_destroy_callback(struct netlink_callback *cb); 129 130 static DEFINE_RWLOCK(nl_table_lock); 131 static atomic_t nl_table_users = ATOMIC_INIT(0); 132 133 static ATOMIC_NOTIFIER_HEAD(netlink_chain); 134 135 static u32 netlink_group_mask(u32 group) 136 { 137 return group ? 1 << (group - 1) : 0; 138 } 139 140 static struct hlist_head *nl_pid_hashfn(struct nl_pid_hash *hash, u32 pid) 141 { 142 return &hash->table[jhash_1word(pid, hash->rnd) & hash->mask]; 143 } 144 145 static void netlink_sock_destruct(struct sock *sk) 146 { 147 struct netlink_sock *nlk = nlk_sk(sk); 148 149 if (nlk->cb) { 150 if (nlk->cb->done) 151 nlk->cb->done(nlk->cb); 152 netlink_destroy_callback(nlk->cb); 153 } 154 155 skb_queue_purge(&sk->sk_receive_queue); 156 157 if (!sock_flag(sk, SOCK_DEAD)) { 158 printk(KERN_ERR "Freeing alive netlink socket %p\n", sk); 159 return; 160 } 161 162 WARN_ON(atomic_read(&sk->sk_rmem_alloc)); 163 WARN_ON(atomic_read(&sk->sk_wmem_alloc)); 164 WARN_ON(nlk_sk(sk)->groups); 165 } 166 167 /* This lock without WQ_FLAG_EXCLUSIVE is good on UP and it is _very_ bad on 168 * SMP. Look, when several writers sleep and reader wakes them up, all but one 169 * immediately hit write lock and grab all the cpus. Exclusive sleep solves 170 * this, _but_ remember, it adds useless work on UP machines. 171 */ 172 173 static void netlink_table_grab(void) 174 __acquires(nl_table_lock) 175 { 176 write_lock_irq(&nl_table_lock); 177 178 if (atomic_read(&nl_table_users)) { 179 DECLARE_WAITQUEUE(wait, current); 180 181 add_wait_queue_exclusive(&nl_table_wait, &wait); 182 for (;;) { 183 set_current_state(TASK_UNINTERRUPTIBLE); 184 if (atomic_read(&nl_table_users) == 0) 185 break; 186 write_unlock_irq(&nl_table_lock); 187 schedule(); 188 write_lock_irq(&nl_table_lock); 189 } 190 191 __set_current_state(TASK_RUNNING); 192 remove_wait_queue(&nl_table_wait, &wait); 193 } 194 } 195 196 static void netlink_table_ungrab(void) 197 __releases(nl_table_lock) 198 { 199 write_unlock_irq(&nl_table_lock); 200 wake_up(&nl_table_wait); 201 } 202 203 static inline void 204 netlink_lock_table(void) 205 { 206 /* read_lock() synchronizes us to netlink_table_grab */ 207 208 read_lock(&nl_table_lock); 209 atomic_inc(&nl_table_users); 210 read_unlock(&nl_table_lock); 211 } 212 213 static inline void 214 netlink_unlock_table(void) 215 { 216 if (atomic_dec_and_test(&nl_table_users)) 217 wake_up(&nl_table_wait); 218 } 219 220 static inline struct sock *netlink_lookup(struct net *net, int protocol, 221 u32 pid) 222 { 223 struct nl_pid_hash *hash = &nl_table[protocol].hash; 224 struct hlist_head *head; 225 struct sock *sk; 226 struct hlist_node *node; 227 228 read_lock(&nl_table_lock); 229 head = nl_pid_hashfn(hash, pid); 230 sk_for_each(sk, node, head) { 231 if (net_eq(sock_net(sk), net) && (nlk_sk(sk)->pid == pid)) { 232 sock_hold(sk); 233 goto found; 234 } 235 } 236 sk = NULL; 237 found: 238 read_unlock(&nl_table_lock); 239 return sk; 240 } 241 242 static inline struct hlist_head *nl_pid_hash_zalloc(size_t size) 243 { 244 if (size <= PAGE_SIZE) 245 return kzalloc(size, GFP_ATOMIC); 246 else 247 return (struct hlist_head *) 248 __get_free_pages(GFP_ATOMIC | __GFP_ZERO, 249 get_order(size)); 250 } 251 252 static inline void nl_pid_hash_free(struct hlist_head *table, size_t size) 253 { 254 if (size <= PAGE_SIZE) 255 kfree(table); 256 else 257 free_pages((unsigned long)table, get_order(size)); 258 } 259 260 static int nl_pid_hash_rehash(struct nl_pid_hash *hash, int grow) 261 { 262 unsigned int omask, mask, shift; 263 size_t osize, size; 264 struct hlist_head *otable, *table; 265 int i; 266 267 omask = mask = hash->mask; 268 osize = size = (mask + 1) * sizeof(*table); 269 shift = hash->shift; 270 271 if (grow) { 272 if (++shift > hash->max_shift) 273 return 0; 274 mask = mask * 2 + 1; 275 size *= 2; 276 } 277 278 table = nl_pid_hash_zalloc(size); 279 if (!table) 280 return 0; 281 282 otable = hash->table; 283 hash->table = table; 284 hash->mask = mask; 285 hash->shift = shift; 286 get_random_bytes(&hash->rnd, sizeof(hash->rnd)); 287 288 for (i = 0; i <= omask; i++) { 289 struct sock *sk; 290 struct hlist_node *node, *tmp; 291 292 sk_for_each_safe(sk, node, tmp, &otable[i]) 293 __sk_add_node(sk, nl_pid_hashfn(hash, nlk_sk(sk)->pid)); 294 } 295 296 nl_pid_hash_free(otable, osize); 297 hash->rehash_time = jiffies + 10 * 60 * HZ; 298 return 1; 299 } 300 301 static inline int nl_pid_hash_dilute(struct nl_pid_hash *hash, int len) 302 { 303 int avg = hash->entries >> hash->shift; 304 305 if (unlikely(avg > 1) && nl_pid_hash_rehash(hash, 1)) 306 return 1; 307 308 if (unlikely(len > avg) && time_after(jiffies, hash->rehash_time)) { 309 nl_pid_hash_rehash(hash, 0); 310 return 1; 311 } 312 313 return 0; 314 } 315 316 static const struct proto_ops netlink_ops; 317 318 static void 319 netlink_update_listeners(struct sock *sk) 320 { 321 struct netlink_table *tbl = &nl_table[sk->sk_protocol]; 322 struct hlist_node *node; 323 unsigned long mask; 324 unsigned int i; 325 326 for (i = 0; i < NLGRPLONGS(tbl->groups); i++) { 327 mask = 0; 328 sk_for_each_bound(sk, node, &tbl->mc_list) { 329 if (i < NLGRPLONGS(nlk_sk(sk)->ngroups)) 330 mask |= nlk_sk(sk)->groups[i]; 331 } 332 tbl->listeners[i] = mask; 333 } 334 /* this function is only called with the netlink table "grabbed", which 335 * makes sure updates are visible before bind or setsockopt return. */ 336 } 337 338 static int netlink_insert(struct sock *sk, struct net *net, u32 pid) 339 { 340 struct nl_pid_hash *hash = &nl_table[sk->sk_protocol].hash; 341 struct hlist_head *head; 342 int err = -EADDRINUSE; 343 struct sock *osk; 344 struct hlist_node *node; 345 int len; 346 347 netlink_table_grab(); 348 head = nl_pid_hashfn(hash, pid); 349 len = 0; 350 sk_for_each(osk, node, head) { 351 if (net_eq(sock_net(osk), net) && (nlk_sk(osk)->pid == pid)) 352 break; 353 len++; 354 } 355 if (node) 356 goto err; 357 358 err = -EBUSY; 359 if (nlk_sk(sk)->pid) 360 goto err; 361 362 err = -ENOMEM; 363 if (BITS_PER_LONG > 32 && unlikely(hash->entries >= UINT_MAX)) 364 goto err; 365 366 if (len && nl_pid_hash_dilute(hash, len)) 367 head = nl_pid_hashfn(hash, pid); 368 hash->entries++; 369 nlk_sk(sk)->pid = pid; 370 sk_add_node(sk, head); 371 err = 0; 372 373 err: 374 netlink_table_ungrab(); 375 return err; 376 } 377 378 static void netlink_remove(struct sock *sk) 379 { 380 netlink_table_grab(); 381 if (sk_del_node_init(sk)) 382 nl_table[sk->sk_protocol].hash.entries--; 383 if (nlk_sk(sk)->subscriptions) 384 __sk_del_bind_node(sk); 385 netlink_table_ungrab(); 386 } 387 388 static struct proto netlink_proto = { 389 .name = "NETLINK", 390 .owner = THIS_MODULE, 391 .obj_size = sizeof(struct netlink_sock), 392 }; 393 394 static int __netlink_create(struct net *net, struct socket *sock, 395 struct mutex *cb_mutex, int protocol) 396 { 397 struct sock *sk; 398 struct netlink_sock *nlk; 399 400 sock->ops = &netlink_ops; 401 402 sk = sk_alloc(net, PF_NETLINK, GFP_KERNEL, &netlink_proto); 403 if (!sk) 404 return -ENOMEM; 405 406 sock_init_data(sock, sk); 407 408 nlk = nlk_sk(sk); 409 if (cb_mutex) 410 nlk->cb_mutex = cb_mutex; 411 else { 412 nlk->cb_mutex = &nlk->cb_def_mutex; 413 mutex_init(nlk->cb_mutex); 414 } 415 init_waitqueue_head(&nlk->wait); 416 417 sk->sk_destruct = netlink_sock_destruct; 418 sk->sk_protocol = protocol; 419 return 0; 420 } 421 422 static int netlink_create(struct net *net, struct socket *sock, int protocol) 423 { 424 struct module *module = NULL; 425 struct mutex *cb_mutex; 426 struct netlink_sock *nlk; 427 int err = 0; 428 429 sock->state = SS_UNCONNECTED; 430 431 if (sock->type != SOCK_RAW && sock->type != SOCK_DGRAM) 432 return -ESOCKTNOSUPPORT; 433 434 if (protocol < 0 || protocol >= MAX_LINKS) 435 return -EPROTONOSUPPORT; 436 437 netlink_lock_table(); 438 #ifdef CONFIG_MODULES 439 if (!nl_table[protocol].registered) { 440 netlink_unlock_table(); 441 request_module("net-pf-%d-proto-%d", PF_NETLINK, protocol); 442 netlink_lock_table(); 443 } 444 #endif 445 if (nl_table[protocol].registered && 446 try_module_get(nl_table[protocol].module)) 447 module = nl_table[protocol].module; 448 cb_mutex = nl_table[protocol].cb_mutex; 449 netlink_unlock_table(); 450 451 err = __netlink_create(net, sock, cb_mutex, protocol); 452 if (err < 0) 453 goto out_module; 454 455 local_bh_disable(); 456 sock_prot_inuse_add(net, &netlink_proto, 1); 457 local_bh_enable(); 458 459 nlk = nlk_sk(sock->sk); 460 nlk->module = module; 461 out: 462 return err; 463 464 out_module: 465 module_put(module); 466 goto out; 467 } 468 469 static int netlink_release(struct socket *sock) 470 { 471 struct sock *sk = sock->sk; 472 struct netlink_sock *nlk; 473 474 if (!sk) 475 return 0; 476 477 netlink_remove(sk); 478 sock_orphan(sk); 479 nlk = nlk_sk(sk); 480 481 /* 482 * OK. Socket is unlinked, any packets that arrive now 483 * will be purged. 484 */ 485 486 sock->sk = NULL; 487 wake_up_interruptible_all(&nlk->wait); 488 489 skb_queue_purge(&sk->sk_write_queue); 490 491 if (nlk->pid && !nlk->subscriptions) { 492 struct netlink_notify n = { 493 .net = sock_net(sk), 494 .protocol = sk->sk_protocol, 495 .pid = nlk->pid, 496 }; 497 atomic_notifier_call_chain(&netlink_chain, 498 NETLINK_URELEASE, &n); 499 } 500 501 module_put(nlk->module); 502 503 netlink_table_grab(); 504 if (netlink_is_kernel(sk)) { 505 BUG_ON(nl_table[sk->sk_protocol].registered == 0); 506 if (--nl_table[sk->sk_protocol].registered == 0) { 507 kfree(nl_table[sk->sk_protocol].listeners); 508 nl_table[sk->sk_protocol].module = NULL; 509 nl_table[sk->sk_protocol].registered = 0; 510 } 511 } else if (nlk->subscriptions) 512 netlink_update_listeners(sk); 513 netlink_table_ungrab(); 514 515 kfree(nlk->groups); 516 nlk->groups = NULL; 517 518 local_bh_disable(); 519 sock_prot_inuse_add(sock_net(sk), &netlink_proto, -1); 520 local_bh_enable(); 521 sock_put(sk); 522 return 0; 523 } 524 525 static int netlink_autobind(struct socket *sock) 526 { 527 struct sock *sk = sock->sk; 528 struct net *net = sock_net(sk); 529 struct nl_pid_hash *hash = &nl_table[sk->sk_protocol].hash; 530 struct hlist_head *head; 531 struct sock *osk; 532 struct hlist_node *node; 533 s32 pid = current->tgid; 534 int err; 535 static s32 rover = -4097; 536 537 retry: 538 cond_resched(); 539 netlink_table_grab(); 540 head = nl_pid_hashfn(hash, pid); 541 sk_for_each(osk, node, head) { 542 if (!net_eq(sock_net(osk), net)) 543 continue; 544 if (nlk_sk(osk)->pid == pid) { 545 /* Bind collision, search negative pid values. */ 546 pid = rover--; 547 if (rover > -4097) 548 rover = -4097; 549 netlink_table_ungrab(); 550 goto retry; 551 } 552 } 553 netlink_table_ungrab(); 554 555 err = netlink_insert(sk, net, pid); 556 if (err == -EADDRINUSE) 557 goto retry; 558 559 /* If 2 threads race to autobind, that is fine. */ 560 if (err == -EBUSY) 561 err = 0; 562 563 return err; 564 } 565 566 static inline int netlink_capable(struct socket *sock, unsigned int flag) 567 { 568 return (nl_table[sock->sk->sk_protocol].nl_nonroot & flag) || 569 capable(CAP_NET_ADMIN); 570 } 571 572 static void 573 netlink_update_subscriptions(struct sock *sk, unsigned int subscriptions) 574 { 575 struct netlink_sock *nlk = nlk_sk(sk); 576 577 if (nlk->subscriptions && !subscriptions) 578 __sk_del_bind_node(sk); 579 else if (!nlk->subscriptions && subscriptions) 580 sk_add_bind_node(sk, &nl_table[sk->sk_protocol].mc_list); 581 nlk->subscriptions = subscriptions; 582 } 583 584 static int netlink_realloc_groups(struct sock *sk) 585 { 586 struct netlink_sock *nlk = nlk_sk(sk); 587 unsigned int groups; 588 unsigned long *new_groups; 589 int err = 0; 590 591 netlink_table_grab(); 592 593 groups = nl_table[sk->sk_protocol].groups; 594 if (!nl_table[sk->sk_protocol].registered) { 595 err = -ENOENT; 596 goto out_unlock; 597 } 598 599 if (nlk->ngroups >= groups) 600 goto out_unlock; 601 602 new_groups = krealloc(nlk->groups, NLGRPSZ(groups), GFP_ATOMIC); 603 if (new_groups == NULL) { 604 err = -ENOMEM; 605 goto out_unlock; 606 } 607 memset((char *)new_groups + NLGRPSZ(nlk->ngroups), 0, 608 NLGRPSZ(groups) - NLGRPSZ(nlk->ngroups)); 609 610 nlk->groups = new_groups; 611 nlk->ngroups = groups; 612 out_unlock: 613 netlink_table_ungrab(); 614 return err; 615 } 616 617 static int netlink_bind(struct socket *sock, struct sockaddr *addr, 618 int addr_len) 619 { 620 struct sock *sk = sock->sk; 621 struct net *net = sock_net(sk); 622 struct netlink_sock *nlk = nlk_sk(sk); 623 struct sockaddr_nl *nladdr = (struct sockaddr_nl *)addr; 624 int err; 625 626 if (nladdr->nl_family != AF_NETLINK) 627 return -EINVAL; 628 629 /* Only superuser is allowed to listen multicasts */ 630 if (nladdr->nl_groups) { 631 if (!netlink_capable(sock, NL_NONROOT_RECV)) 632 return -EPERM; 633 err = netlink_realloc_groups(sk); 634 if (err) 635 return err; 636 } 637 638 if (nlk->pid) { 639 if (nladdr->nl_pid != nlk->pid) 640 return -EINVAL; 641 } else { 642 err = nladdr->nl_pid ? 643 netlink_insert(sk, net, nladdr->nl_pid) : 644 netlink_autobind(sock); 645 if (err) 646 return err; 647 } 648 649 if (!nladdr->nl_groups && (nlk->groups == NULL || !(u32)nlk->groups[0])) 650 return 0; 651 652 netlink_table_grab(); 653 netlink_update_subscriptions(sk, nlk->subscriptions + 654 hweight32(nladdr->nl_groups) - 655 hweight32(nlk->groups[0])); 656 nlk->groups[0] = (nlk->groups[0] & ~0xffffffffUL) | nladdr->nl_groups; 657 netlink_update_listeners(sk); 658 netlink_table_ungrab(); 659 660 return 0; 661 } 662 663 static int netlink_connect(struct socket *sock, struct sockaddr *addr, 664 int alen, int flags) 665 { 666 int err = 0; 667 struct sock *sk = sock->sk; 668 struct netlink_sock *nlk = nlk_sk(sk); 669 struct sockaddr_nl *nladdr = (struct sockaddr_nl *)addr; 670 671 if (addr->sa_family == AF_UNSPEC) { 672 sk->sk_state = NETLINK_UNCONNECTED; 673 nlk->dst_pid = 0; 674 nlk->dst_group = 0; 675 return 0; 676 } 677 if (addr->sa_family != AF_NETLINK) 678 return -EINVAL; 679 680 /* Only superuser is allowed to send multicasts */ 681 if (nladdr->nl_groups && !netlink_capable(sock, NL_NONROOT_SEND)) 682 return -EPERM; 683 684 if (!nlk->pid) 685 err = netlink_autobind(sock); 686 687 if (err == 0) { 688 sk->sk_state = NETLINK_CONNECTED; 689 nlk->dst_pid = nladdr->nl_pid; 690 nlk->dst_group = ffs(nladdr->nl_groups); 691 } 692 693 return err; 694 } 695 696 static int netlink_getname(struct socket *sock, struct sockaddr *addr, 697 int *addr_len, int peer) 698 { 699 struct sock *sk = sock->sk; 700 struct netlink_sock *nlk = nlk_sk(sk); 701 struct sockaddr_nl *nladdr = (struct sockaddr_nl *)addr; 702 703 nladdr->nl_family = AF_NETLINK; 704 nladdr->nl_pad = 0; 705 *addr_len = sizeof(*nladdr); 706 707 if (peer) { 708 nladdr->nl_pid = nlk->dst_pid; 709 nladdr->nl_groups = netlink_group_mask(nlk->dst_group); 710 } else { 711 nladdr->nl_pid = nlk->pid; 712 nladdr->nl_groups = nlk->groups ? nlk->groups[0] : 0; 713 } 714 return 0; 715 } 716 717 static void netlink_overrun(struct sock *sk) 718 { 719 if (!test_and_set_bit(0, &nlk_sk(sk)->state)) { 720 sk->sk_err = ENOBUFS; 721 sk->sk_error_report(sk); 722 } 723 } 724 725 static struct sock *netlink_getsockbypid(struct sock *ssk, u32 pid) 726 { 727 struct sock *sock; 728 struct netlink_sock *nlk; 729 730 sock = netlink_lookup(sock_net(ssk), ssk->sk_protocol, pid); 731 if (!sock) 732 return ERR_PTR(-ECONNREFUSED); 733 734 /* Don't bother queuing skb if kernel socket has no input function */ 735 nlk = nlk_sk(sock); 736 if (sock->sk_state == NETLINK_CONNECTED && 737 nlk->dst_pid != nlk_sk(ssk)->pid) { 738 sock_put(sock); 739 return ERR_PTR(-ECONNREFUSED); 740 } 741 return sock; 742 } 743 744 struct sock *netlink_getsockbyfilp(struct file *filp) 745 { 746 struct inode *inode = filp->f_path.dentry->d_inode; 747 struct sock *sock; 748 749 if (!S_ISSOCK(inode->i_mode)) 750 return ERR_PTR(-ENOTSOCK); 751 752 sock = SOCKET_I(inode)->sk; 753 if (sock->sk_family != AF_NETLINK) 754 return ERR_PTR(-EINVAL); 755 756 sock_hold(sock); 757 return sock; 758 } 759 760 /* 761 * Attach a skb to a netlink socket. 762 * The caller must hold a reference to the destination socket. On error, the 763 * reference is dropped. The skb is not send to the destination, just all 764 * all error checks are performed and memory in the queue is reserved. 765 * Return values: 766 * < 0: error. skb freed, reference to sock dropped. 767 * 0: continue 768 * 1: repeat lookup - reference dropped while waiting for socket memory. 769 */ 770 int netlink_attachskb(struct sock *sk, struct sk_buff *skb, 771 long *timeo, struct sock *ssk) 772 { 773 struct netlink_sock *nlk; 774 775 nlk = nlk_sk(sk); 776 777 if (atomic_read(&sk->sk_rmem_alloc) > sk->sk_rcvbuf || 778 test_bit(0, &nlk->state)) { 779 DECLARE_WAITQUEUE(wait, current); 780 if (!*timeo) { 781 if (!ssk || netlink_is_kernel(ssk)) 782 netlink_overrun(sk); 783 sock_put(sk); 784 kfree_skb(skb); 785 return -EAGAIN; 786 } 787 788 __set_current_state(TASK_INTERRUPTIBLE); 789 add_wait_queue(&nlk->wait, &wait); 790 791 if ((atomic_read(&sk->sk_rmem_alloc) > sk->sk_rcvbuf || 792 test_bit(0, &nlk->state)) && 793 !sock_flag(sk, SOCK_DEAD)) 794 *timeo = schedule_timeout(*timeo); 795 796 __set_current_state(TASK_RUNNING); 797 remove_wait_queue(&nlk->wait, &wait); 798 sock_put(sk); 799 800 if (signal_pending(current)) { 801 kfree_skb(skb); 802 return sock_intr_errno(*timeo); 803 } 804 return 1; 805 } 806 skb_set_owner_r(skb, sk); 807 return 0; 808 } 809 810 int netlink_sendskb(struct sock *sk, struct sk_buff *skb) 811 { 812 int len = skb->len; 813 814 skb_queue_tail(&sk->sk_receive_queue, skb); 815 sk->sk_data_ready(sk, len); 816 sock_put(sk); 817 return len; 818 } 819 820 void netlink_detachskb(struct sock *sk, struct sk_buff *skb) 821 { 822 kfree_skb(skb); 823 sock_put(sk); 824 } 825 826 static inline struct sk_buff *netlink_trim(struct sk_buff *skb, 827 gfp_t allocation) 828 { 829 int delta; 830 831 skb_orphan(skb); 832 833 delta = skb->end - skb->tail; 834 if (delta * 2 < skb->truesize) 835 return skb; 836 837 if (skb_shared(skb)) { 838 struct sk_buff *nskb = skb_clone(skb, allocation); 839 if (!nskb) 840 return skb; 841 kfree_skb(skb); 842 skb = nskb; 843 } 844 845 if (!pskb_expand_head(skb, 0, -delta, allocation)) 846 skb->truesize -= delta; 847 848 return skb; 849 } 850 851 static inline void netlink_rcv_wake(struct sock *sk) 852 { 853 struct netlink_sock *nlk = nlk_sk(sk); 854 855 if (skb_queue_empty(&sk->sk_receive_queue)) 856 clear_bit(0, &nlk->state); 857 if (!test_bit(0, &nlk->state)) 858 wake_up_interruptible(&nlk->wait); 859 } 860 861 static inline int netlink_unicast_kernel(struct sock *sk, struct sk_buff *skb) 862 { 863 int ret; 864 struct netlink_sock *nlk = nlk_sk(sk); 865 866 ret = -ECONNREFUSED; 867 if (nlk->netlink_rcv != NULL) { 868 ret = skb->len; 869 skb_set_owner_r(skb, sk); 870 nlk->netlink_rcv(skb); 871 } 872 kfree_skb(skb); 873 sock_put(sk); 874 return ret; 875 } 876 877 int netlink_unicast(struct sock *ssk, struct sk_buff *skb, 878 u32 pid, int nonblock) 879 { 880 struct sock *sk; 881 int err; 882 long timeo; 883 884 skb = netlink_trim(skb, gfp_any()); 885 886 timeo = sock_sndtimeo(ssk, nonblock); 887 retry: 888 sk = netlink_getsockbypid(ssk, pid); 889 if (IS_ERR(sk)) { 890 kfree_skb(skb); 891 return PTR_ERR(sk); 892 } 893 if (netlink_is_kernel(sk)) 894 return netlink_unicast_kernel(sk, skb); 895 896 if (sk_filter(sk, skb)) { 897 err = skb->len; 898 kfree_skb(skb); 899 sock_put(sk); 900 return err; 901 } 902 903 err = netlink_attachskb(sk, skb, &timeo, ssk); 904 if (err == 1) 905 goto retry; 906 if (err) 907 return err; 908 909 return netlink_sendskb(sk, skb); 910 } 911 EXPORT_SYMBOL(netlink_unicast); 912 913 int netlink_has_listeners(struct sock *sk, unsigned int group) 914 { 915 int res = 0; 916 unsigned long *listeners; 917 918 BUG_ON(!netlink_is_kernel(sk)); 919 920 rcu_read_lock(); 921 listeners = rcu_dereference(nl_table[sk->sk_protocol].listeners); 922 923 if (group - 1 < nl_table[sk->sk_protocol].groups) 924 res = test_bit(group - 1, listeners); 925 926 rcu_read_unlock(); 927 928 return res; 929 } 930 EXPORT_SYMBOL_GPL(netlink_has_listeners); 931 932 static inline int netlink_broadcast_deliver(struct sock *sk, 933 struct sk_buff *skb) 934 { 935 struct netlink_sock *nlk = nlk_sk(sk); 936 937 if (atomic_read(&sk->sk_rmem_alloc) <= sk->sk_rcvbuf && 938 !test_bit(0, &nlk->state)) { 939 skb_set_owner_r(skb, sk); 940 skb_queue_tail(&sk->sk_receive_queue, skb); 941 sk->sk_data_ready(sk, skb->len); 942 return atomic_read(&sk->sk_rmem_alloc) > sk->sk_rcvbuf; 943 } 944 return -1; 945 } 946 947 struct netlink_broadcast_data { 948 struct sock *exclude_sk; 949 struct net *net; 950 u32 pid; 951 u32 group; 952 int failure; 953 int congested; 954 int delivered; 955 gfp_t allocation; 956 struct sk_buff *skb, *skb2; 957 }; 958 959 static inline int do_one_broadcast(struct sock *sk, 960 struct netlink_broadcast_data *p) 961 { 962 struct netlink_sock *nlk = nlk_sk(sk); 963 int val; 964 965 if (p->exclude_sk == sk) 966 goto out; 967 968 if (nlk->pid == p->pid || p->group - 1 >= nlk->ngroups || 969 !test_bit(p->group - 1, nlk->groups)) 970 goto out; 971 972 if (!net_eq(sock_net(sk), p->net)) 973 goto out; 974 975 if (p->failure) { 976 netlink_overrun(sk); 977 goto out; 978 } 979 980 sock_hold(sk); 981 if (p->skb2 == NULL) { 982 if (skb_shared(p->skb)) { 983 p->skb2 = skb_clone(p->skb, p->allocation); 984 } else { 985 p->skb2 = skb_get(p->skb); 986 /* 987 * skb ownership may have been set when 988 * delivered to a previous socket. 989 */ 990 skb_orphan(p->skb2); 991 } 992 } 993 if (p->skb2 == NULL) { 994 netlink_overrun(sk); 995 /* Clone failed. Notify ALL listeners. */ 996 p->failure = 1; 997 } else if (sk_filter(sk, p->skb2)) { 998 kfree_skb(p->skb2); 999 p->skb2 = NULL; 1000 } else if ((val = netlink_broadcast_deliver(sk, p->skb2)) < 0) { 1001 netlink_overrun(sk); 1002 } else { 1003 p->congested |= val; 1004 p->delivered = 1; 1005 p->skb2 = NULL; 1006 } 1007 sock_put(sk); 1008 1009 out: 1010 return 0; 1011 } 1012 1013 int netlink_broadcast(struct sock *ssk, struct sk_buff *skb, u32 pid, 1014 u32 group, gfp_t allocation) 1015 { 1016 struct net *net = sock_net(ssk); 1017 struct netlink_broadcast_data info; 1018 struct hlist_node *node; 1019 struct sock *sk; 1020 1021 skb = netlink_trim(skb, allocation); 1022 1023 info.exclude_sk = ssk; 1024 info.net = net; 1025 info.pid = pid; 1026 info.group = group; 1027 info.failure = 0; 1028 info.congested = 0; 1029 info.delivered = 0; 1030 info.allocation = allocation; 1031 info.skb = skb; 1032 info.skb2 = NULL; 1033 1034 /* While we sleep in clone, do not allow to change socket list */ 1035 1036 netlink_lock_table(); 1037 1038 sk_for_each_bound(sk, node, &nl_table[ssk->sk_protocol].mc_list) 1039 do_one_broadcast(sk, &info); 1040 1041 kfree_skb(skb); 1042 1043 netlink_unlock_table(); 1044 1045 if (info.skb2) 1046 kfree_skb(info.skb2); 1047 1048 if (info.delivered) { 1049 if (info.congested && (allocation & __GFP_WAIT)) 1050 yield(); 1051 return 0; 1052 } 1053 if (info.failure) 1054 return -ENOBUFS; 1055 return -ESRCH; 1056 } 1057 EXPORT_SYMBOL(netlink_broadcast); 1058 1059 struct netlink_set_err_data { 1060 struct sock *exclude_sk; 1061 u32 pid; 1062 u32 group; 1063 int code; 1064 }; 1065 1066 static inline int do_one_set_err(struct sock *sk, 1067 struct netlink_set_err_data *p) 1068 { 1069 struct netlink_sock *nlk = nlk_sk(sk); 1070 1071 if (sk == p->exclude_sk) 1072 goto out; 1073 1074 if (sock_net(sk) != sock_net(p->exclude_sk)) 1075 goto out; 1076 1077 if (nlk->pid == p->pid || p->group - 1 >= nlk->ngroups || 1078 !test_bit(p->group - 1, nlk->groups)) 1079 goto out; 1080 1081 sk->sk_err = p->code; 1082 sk->sk_error_report(sk); 1083 out: 1084 return 0; 1085 } 1086 1087 void netlink_set_err(struct sock *ssk, u32 pid, u32 group, int code) 1088 { 1089 struct netlink_set_err_data info; 1090 struct hlist_node *node; 1091 struct sock *sk; 1092 1093 info.exclude_sk = ssk; 1094 info.pid = pid; 1095 info.group = group; 1096 info.code = code; 1097 1098 read_lock(&nl_table_lock); 1099 1100 sk_for_each_bound(sk, node, &nl_table[ssk->sk_protocol].mc_list) 1101 do_one_set_err(sk, &info); 1102 1103 read_unlock(&nl_table_lock); 1104 } 1105 1106 /* must be called with netlink table grabbed */ 1107 static void netlink_update_socket_mc(struct netlink_sock *nlk, 1108 unsigned int group, 1109 int is_new) 1110 { 1111 int old, new = !!is_new, subscriptions; 1112 1113 old = test_bit(group - 1, nlk->groups); 1114 subscriptions = nlk->subscriptions - old + new; 1115 if (new) 1116 __set_bit(group - 1, nlk->groups); 1117 else 1118 __clear_bit(group - 1, nlk->groups); 1119 netlink_update_subscriptions(&nlk->sk, subscriptions); 1120 netlink_update_listeners(&nlk->sk); 1121 } 1122 1123 static int netlink_setsockopt(struct socket *sock, int level, int optname, 1124 char __user *optval, int optlen) 1125 { 1126 struct sock *sk = sock->sk; 1127 struct netlink_sock *nlk = nlk_sk(sk); 1128 unsigned int val = 0; 1129 int err; 1130 1131 if (level != SOL_NETLINK) 1132 return -ENOPROTOOPT; 1133 1134 if (optlen >= sizeof(int) && 1135 get_user(val, (unsigned int __user *)optval)) 1136 return -EFAULT; 1137 1138 switch (optname) { 1139 case NETLINK_PKTINFO: 1140 if (val) 1141 nlk->flags |= NETLINK_RECV_PKTINFO; 1142 else 1143 nlk->flags &= ~NETLINK_RECV_PKTINFO; 1144 err = 0; 1145 break; 1146 case NETLINK_ADD_MEMBERSHIP: 1147 case NETLINK_DROP_MEMBERSHIP: { 1148 if (!netlink_capable(sock, NL_NONROOT_RECV)) 1149 return -EPERM; 1150 err = netlink_realloc_groups(sk); 1151 if (err) 1152 return err; 1153 if (!val || val - 1 >= nlk->ngroups) 1154 return -EINVAL; 1155 netlink_table_grab(); 1156 netlink_update_socket_mc(nlk, val, 1157 optname == NETLINK_ADD_MEMBERSHIP); 1158 netlink_table_ungrab(); 1159 err = 0; 1160 break; 1161 } 1162 default: 1163 err = -ENOPROTOOPT; 1164 } 1165 return err; 1166 } 1167 1168 static int netlink_getsockopt(struct socket *sock, int level, int optname, 1169 char __user *optval, int __user *optlen) 1170 { 1171 struct sock *sk = sock->sk; 1172 struct netlink_sock *nlk = nlk_sk(sk); 1173 int len, val, err; 1174 1175 if (level != SOL_NETLINK) 1176 return -ENOPROTOOPT; 1177 1178 if (get_user(len, optlen)) 1179 return -EFAULT; 1180 if (len < 0) 1181 return -EINVAL; 1182 1183 switch (optname) { 1184 case NETLINK_PKTINFO: 1185 if (len < sizeof(int)) 1186 return -EINVAL; 1187 len = sizeof(int); 1188 val = nlk->flags & NETLINK_RECV_PKTINFO ? 1 : 0; 1189 if (put_user(len, optlen) || 1190 put_user(val, optval)) 1191 return -EFAULT; 1192 err = 0; 1193 break; 1194 default: 1195 err = -ENOPROTOOPT; 1196 } 1197 return err; 1198 } 1199 1200 static void netlink_cmsg_recv_pktinfo(struct msghdr *msg, struct sk_buff *skb) 1201 { 1202 struct nl_pktinfo info; 1203 1204 info.group = NETLINK_CB(skb).dst_group; 1205 put_cmsg(msg, SOL_NETLINK, NETLINK_PKTINFO, sizeof(info), &info); 1206 } 1207 1208 static int netlink_sendmsg(struct kiocb *kiocb, struct socket *sock, 1209 struct msghdr *msg, size_t len) 1210 { 1211 struct sock_iocb *siocb = kiocb_to_siocb(kiocb); 1212 struct sock *sk = sock->sk; 1213 struct netlink_sock *nlk = nlk_sk(sk); 1214 struct sockaddr_nl *addr = msg->msg_name; 1215 u32 dst_pid; 1216 u32 dst_group; 1217 struct sk_buff *skb; 1218 int err; 1219 struct scm_cookie scm; 1220 1221 if (msg->msg_flags&MSG_OOB) 1222 return -EOPNOTSUPP; 1223 1224 if (NULL == siocb->scm) 1225 siocb->scm = &scm; 1226 err = scm_send(sock, msg, siocb->scm); 1227 if (err < 0) 1228 return err; 1229 1230 if (msg->msg_namelen) { 1231 if (addr->nl_family != AF_NETLINK) 1232 return -EINVAL; 1233 dst_pid = addr->nl_pid; 1234 dst_group = ffs(addr->nl_groups); 1235 if (dst_group && !netlink_capable(sock, NL_NONROOT_SEND)) 1236 return -EPERM; 1237 } else { 1238 dst_pid = nlk->dst_pid; 1239 dst_group = nlk->dst_group; 1240 } 1241 1242 if (!nlk->pid) { 1243 err = netlink_autobind(sock); 1244 if (err) 1245 goto out; 1246 } 1247 1248 err = -EMSGSIZE; 1249 if (len > sk->sk_sndbuf - 32) 1250 goto out; 1251 err = -ENOBUFS; 1252 skb = alloc_skb(len, GFP_KERNEL); 1253 if (skb == NULL) 1254 goto out; 1255 1256 NETLINK_CB(skb).pid = nlk->pid; 1257 NETLINK_CB(skb).dst_group = dst_group; 1258 NETLINK_CB(skb).loginuid = audit_get_loginuid(current); 1259 NETLINK_CB(skb).sessionid = audit_get_sessionid(current); 1260 security_task_getsecid(current, &(NETLINK_CB(skb).sid)); 1261 memcpy(NETLINK_CREDS(skb), &siocb->scm->creds, sizeof(struct ucred)); 1262 1263 /* What can I do? Netlink is asynchronous, so that 1264 we will have to save current capabilities to 1265 check them, when this message will be delivered 1266 to corresponding kernel module. --ANK (980802) 1267 */ 1268 1269 err = -EFAULT; 1270 if (memcpy_fromiovec(skb_put(skb, len), msg->msg_iov, len)) { 1271 kfree_skb(skb); 1272 goto out; 1273 } 1274 1275 err = security_netlink_send(sk, skb); 1276 if (err) { 1277 kfree_skb(skb); 1278 goto out; 1279 } 1280 1281 if (dst_group) { 1282 atomic_inc(&skb->users); 1283 netlink_broadcast(sk, skb, dst_pid, dst_group, GFP_KERNEL); 1284 } 1285 err = netlink_unicast(sk, skb, dst_pid, msg->msg_flags&MSG_DONTWAIT); 1286 1287 out: 1288 return err; 1289 } 1290 1291 static int netlink_recvmsg(struct kiocb *kiocb, struct socket *sock, 1292 struct msghdr *msg, size_t len, 1293 int flags) 1294 { 1295 struct sock_iocb *siocb = kiocb_to_siocb(kiocb); 1296 struct scm_cookie scm; 1297 struct sock *sk = sock->sk; 1298 struct netlink_sock *nlk = nlk_sk(sk); 1299 int noblock = flags&MSG_DONTWAIT; 1300 size_t copied; 1301 struct sk_buff *skb; 1302 int err; 1303 1304 if (flags&MSG_OOB) 1305 return -EOPNOTSUPP; 1306 1307 copied = 0; 1308 1309 skb = skb_recv_datagram(sk, flags, noblock, &err); 1310 if (skb == NULL) 1311 goto out; 1312 1313 msg->msg_namelen = 0; 1314 1315 copied = skb->len; 1316 if (len < copied) { 1317 msg->msg_flags |= MSG_TRUNC; 1318 copied = len; 1319 } 1320 1321 skb_reset_transport_header(skb); 1322 err = skb_copy_datagram_iovec(skb, 0, msg->msg_iov, copied); 1323 1324 if (msg->msg_name) { 1325 struct sockaddr_nl *addr = (struct sockaddr_nl *)msg->msg_name; 1326 addr->nl_family = AF_NETLINK; 1327 addr->nl_pad = 0; 1328 addr->nl_pid = NETLINK_CB(skb).pid; 1329 addr->nl_groups = netlink_group_mask(NETLINK_CB(skb).dst_group); 1330 msg->msg_namelen = sizeof(*addr); 1331 } 1332 1333 if (nlk->flags & NETLINK_RECV_PKTINFO) 1334 netlink_cmsg_recv_pktinfo(msg, skb); 1335 1336 if (NULL == siocb->scm) { 1337 memset(&scm, 0, sizeof(scm)); 1338 siocb->scm = &scm; 1339 } 1340 siocb->scm->creds = *NETLINK_CREDS(skb); 1341 if (flags & MSG_TRUNC) 1342 copied = skb->len; 1343 skb_free_datagram(sk, skb); 1344 1345 if (nlk->cb && atomic_read(&sk->sk_rmem_alloc) <= sk->sk_rcvbuf / 2) 1346 netlink_dump(sk); 1347 1348 scm_recv(sock, msg, siocb->scm, flags); 1349 out: 1350 netlink_rcv_wake(sk); 1351 return err ? : copied; 1352 } 1353 1354 static void netlink_data_ready(struct sock *sk, int len) 1355 { 1356 BUG(); 1357 } 1358 1359 /* 1360 * We export these functions to other modules. They provide a 1361 * complete set of kernel non-blocking support for message 1362 * queueing. 1363 */ 1364 1365 struct sock * 1366 netlink_kernel_create(struct net *net, int unit, unsigned int groups, 1367 void (*input)(struct sk_buff *skb), 1368 struct mutex *cb_mutex, struct module *module) 1369 { 1370 struct socket *sock; 1371 struct sock *sk; 1372 struct netlink_sock *nlk; 1373 unsigned long *listeners = NULL; 1374 1375 BUG_ON(!nl_table); 1376 1377 if (unit < 0 || unit >= MAX_LINKS) 1378 return NULL; 1379 1380 if (sock_create_lite(PF_NETLINK, SOCK_DGRAM, unit, &sock)) 1381 return NULL; 1382 1383 /* 1384 * We have to just have a reference on the net from sk, but don't 1385 * get_net it. Besides, we cannot get and then put the net here. 1386 * So we create one inside init_net and the move it to net. 1387 */ 1388 1389 if (__netlink_create(&init_net, sock, cb_mutex, unit) < 0) 1390 goto out_sock_release_nosk; 1391 1392 sk = sock->sk; 1393 sk_change_net(sk, net); 1394 1395 if (groups < 32) 1396 groups = 32; 1397 1398 listeners = kzalloc(NLGRPSZ(groups), GFP_KERNEL); 1399 if (!listeners) 1400 goto out_sock_release; 1401 1402 sk->sk_data_ready = netlink_data_ready; 1403 if (input) 1404 nlk_sk(sk)->netlink_rcv = input; 1405 1406 if (netlink_insert(sk, net, 0)) 1407 goto out_sock_release; 1408 1409 nlk = nlk_sk(sk); 1410 nlk->flags |= NETLINK_KERNEL_SOCKET; 1411 1412 netlink_table_grab(); 1413 if (!nl_table[unit].registered) { 1414 nl_table[unit].groups = groups; 1415 nl_table[unit].listeners = listeners; 1416 nl_table[unit].cb_mutex = cb_mutex; 1417 nl_table[unit].module = module; 1418 nl_table[unit].registered = 1; 1419 } else { 1420 kfree(listeners); 1421 nl_table[unit].registered++; 1422 } 1423 netlink_table_ungrab(); 1424 return sk; 1425 1426 out_sock_release: 1427 kfree(listeners); 1428 netlink_kernel_release(sk); 1429 return NULL; 1430 1431 out_sock_release_nosk: 1432 sock_release(sock); 1433 return NULL; 1434 } 1435 EXPORT_SYMBOL(netlink_kernel_create); 1436 1437 1438 void 1439 netlink_kernel_release(struct sock *sk) 1440 { 1441 sk_release_kernel(sk); 1442 } 1443 EXPORT_SYMBOL(netlink_kernel_release); 1444 1445 1446 /** 1447 * netlink_change_ngroups - change number of multicast groups 1448 * 1449 * This changes the number of multicast groups that are available 1450 * on a certain netlink family. Note that it is not possible to 1451 * change the number of groups to below 32. Also note that it does 1452 * not implicitly call netlink_clear_multicast_users() when the 1453 * number of groups is reduced. 1454 * 1455 * @sk: The kernel netlink socket, as returned by netlink_kernel_create(). 1456 * @groups: The new number of groups. 1457 */ 1458 int netlink_change_ngroups(struct sock *sk, unsigned int groups) 1459 { 1460 unsigned long *listeners, *old = NULL; 1461 struct netlink_table *tbl = &nl_table[sk->sk_protocol]; 1462 int err = 0; 1463 1464 if (groups < 32) 1465 groups = 32; 1466 1467 netlink_table_grab(); 1468 if (NLGRPSZ(tbl->groups) < NLGRPSZ(groups)) { 1469 listeners = kzalloc(NLGRPSZ(groups), GFP_ATOMIC); 1470 if (!listeners) { 1471 err = -ENOMEM; 1472 goto out_ungrab; 1473 } 1474 old = tbl->listeners; 1475 memcpy(listeners, old, NLGRPSZ(tbl->groups)); 1476 rcu_assign_pointer(tbl->listeners, listeners); 1477 } 1478 tbl->groups = groups; 1479 1480 out_ungrab: 1481 netlink_table_ungrab(); 1482 synchronize_rcu(); 1483 kfree(old); 1484 return err; 1485 } 1486 EXPORT_SYMBOL(netlink_change_ngroups); 1487 1488 /** 1489 * netlink_clear_multicast_users - kick off multicast listeners 1490 * 1491 * This function removes all listeners from the given group. 1492 * @ksk: The kernel netlink socket, as returned by 1493 * netlink_kernel_create(). 1494 * @group: The multicast group to clear. 1495 */ 1496 void netlink_clear_multicast_users(struct sock *ksk, unsigned int group) 1497 { 1498 struct sock *sk; 1499 struct hlist_node *node; 1500 struct netlink_table *tbl = &nl_table[ksk->sk_protocol]; 1501 1502 netlink_table_grab(); 1503 1504 sk_for_each_bound(sk, node, &tbl->mc_list) 1505 netlink_update_socket_mc(nlk_sk(sk), group, 0); 1506 1507 netlink_table_ungrab(); 1508 } 1509 EXPORT_SYMBOL(netlink_clear_multicast_users); 1510 1511 void netlink_set_nonroot(int protocol, unsigned int flags) 1512 { 1513 if ((unsigned int)protocol < MAX_LINKS) 1514 nl_table[protocol].nl_nonroot = flags; 1515 } 1516 EXPORT_SYMBOL(netlink_set_nonroot); 1517 1518 static void netlink_destroy_callback(struct netlink_callback *cb) 1519 { 1520 if (cb->skb) 1521 kfree_skb(cb->skb); 1522 kfree(cb); 1523 } 1524 1525 /* 1526 * It looks a bit ugly. 1527 * It would be better to create kernel thread. 1528 */ 1529 1530 static int netlink_dump(struct sock *sk) 1531 { 1532 struct netlink_sock *nlk = nlk_sk(sk); 1533 struct netlink_callback *cb; 1534 struct sk_buff *skb; 1535 struct nlmsghdr *nlh; 1536 int len, err = -ENOBUFS; 1537 1538 skb = sock_rmalloc(sk, NLMSG_GOODSIZE, 0, GFP_KERNEL); 1539 if (!skb) 1540 goto errout; 1541 1542 mutex_lock(nlk->cb_mutex); 1543 1544 cb = nlk->cb; 1545 if (cb == NULL) { 1546 err = -EINVAL; 1547 goto errout_skb; 1548 } 1549 1550 len = cb->dump(skb, cb); 1551 1552 if (len > 0) { 1553 mutex_unlock(nlk->cb_mutex); 1554 1555 if (sk_filter(sk, skb)) 1556 kfree_skb(skb); 1557 else { 1558 skb_queue_tail(&sk->sk_receive_queue, skb); 1559 sk->sk_data_ready(sk, skb->len); 1560 } 1561 return 0; 1562 } 1563 1564 nlh = nlmsg_put_answer(skb, cb, NLMSG_DONE, sizeof(len), NLM_F_MULTI); 1565 if (!nlh) 1566 goto errout_skb; 1567 1568 memcpy(nlmsg_data(nlh), &len, sizeof(len)); 1569 1570 if (sk_filter(sk, skb)) 1571 kfree_skb(skb); 1572 else { 1573 skb_queue_tail(&sk->sk_receive_queue, skb); 1574 sk->sk_data_ready(sk, skb->len); 1575 } 1576 1577 if (cb->done) 1578 cb->done(cb); 1579 nlk->cb = NULL; 1580 mutex_unlock(nlk->cb_mutex); 1581 1582 netlink_destroy_callback(cb); 1583 return 0; 1584 1585 errout_skb: 1586 mutex_unlock(nlk->cb_mutex); 1587 kfree_skb(skb); 1588 errout: 1589 return err; 1590 } 1591 1592 int netlink_dump_start(struct sock *ssk, struct sk_buff *skb, 1593 struct nlmsghdr *nlh, 1594 int (*dump)(struct sk_buff *skb, 1595 struct netlink_callback *), 1596 int (*done)(struct netlink_callback *)) 1597 { 1598 struct netlink_callback *cb; 1599 struct sock *sk; 1600 struct netlink_sock *nlk; 1601 1602 cb = kzalloc(sizeof(*cb), GFP_KERNEL); 1603 if (cb == NULL) 1604 return -ENOBUFS; 1605 1606 cb->dump = dump; 1607 cb->done = done; 1608 cb->nlh = nlh; 1609 atomic_inc(&skb->users); 1610 cb->skb = skb; 1611 1612 sk = netlink_lookup(sock_net(ssk), ssk->sk_protocol, NETLINK_CB(skb).pid); 1613 if (sk == NULL) { 1614 netlink_destroy_callback(cb); 1615 return -ECONNREFUSED; 1616 } 1617 nlk = nlk_sk(sk); 1618 /* A dump is in progress... */ 1619 mutex_lock(nlk->cb_mutex); 1620 if (nlk->cb) { 1621 mutex_unlock(nlk->cb_mutex); 1622 netlink_destroy_callback(cb); 1623 sock_put(sk); 1624 return -EBUSY; 1625 } 1626 nlk->cb = cb; 1627 mutex_unlock(nlk->cb_mutex); 1628 1629 netlink_dump(sk); 1630 sock_put(sk); 1631 1632 /* We successfully started a dump, by returning -EINTR we 1633 * signal not to send ACK even if it was requested. 1634 */ 1635 return -EINTR; 1636 } 1637 EXPORT_SYMBOL(netlink_dump_start); 1638 1639 void netlink_ack(struct sk_buff *in_skb, struct nlmsghdr *nlh, int err) 1640 { 1641 struct sk_buff *skb; 1642 struct nlmsghdr *rep; 1643 struct nlmsgerr *errmsg; 1644 size_t payload = sizeof(*errmsg); 1645 1646 /* error messages get the original request appened */ 1647 if (err) 1648 payload += nlmsg_len(nlh); 1649 1650 skb = nlmsg_new(payload, GFP_KERNEL); 1651 if (!skb) { 1652 struct sock *sk; 1653 1654 sk = netlink_lookup(sock_net(in_skb->sk), 1655 in_skb->sk->sk_protocol, 1656 NETLINK_CB(in_skb).pid); 1657 if (sk) { 1658 sk->sk_err = ENOBUFS; 1659 sk->sk_error_report(sk); 1660 sock_put(sk); 1661 } 1662 return; 1663 } 1664 1665 rep = __nlmsg_put(skb, NETLINK_CB(in_skb).pid, nlh->nlmsg_seq, 1666 NLMSG_ERROR, sizeof(struct nlmsgerr), 0); 1667 errmsg = nlmsg_data(rep); 1668 errmsg->error = err; 1669 memcpy(&errmsg->msg, nlh, err ? nlh->nlmsg_len : sizeof(*nlh)); 1670 netlink_unicast(in_skb->sk, skb, NETLINK_CB(in_skb).pid, MSG_DONTWAIT); 1671 } 1672 EXPORT_SYMBOL(netlink_ack); 1673 1674 int netlink_rcv_skb(struct sk_buff *skb, int (*cb)(struct sk_buff *, 1675 struct nlmsghdr *)) 1676 { 1677 struct nlmsghdr *nlh; 1678 int err; 1679 1680 while (skb->len >= nlmsg_total_size(0)) { 1681 int msglen; 1682 1683 nlh = nlmsg_hdr(skb); 1684 err = 0; 1685 1686 if (nlh->nlmsg_len < NLMSG_HDRLEN || skb->len < nlh->nlmsg_len) 1687 return 0; 1688 1689 /* Only requests are handled by the kernel */ 1690 if (!(nlh->nlmsg_flags & NLM_F_REQUEST)) 1691 goto ack; 1692 1693 /* Skip control messages */ 1694 if (nlh->nlmsg_type < NLMSG_MIN_TYPE) 1695 goto ack; 1696 1697 err = cb(skb, nlh); 1698 if (err == -EINTR) 1699 goto skip; 1700 1701 ack: 1702 if (nlh->nlmsg_flags & NLM_F_ACK || err) 1703 netlink_ack(skb, nlh, err); 1704 1705 skip: 1706 msglen = NLMSG_ALIGN(nlh->nlmsg_len); 1707 if (msglen > skb->len) 1708 msglen = skb->len; 1709 skb_pull(skb, msglen); 1710 } 1711 1712 return 0; 1713 } 1714 EXPORT_SYMBOL(netlink_rcv_skb); 1715 1716 /** 1717 * nlmsg_notify - send a notification netlink message 1718 * @sk: netlink socket to use 1719 * @skb: notification message 1720 * @pid: destination netlink pid for reports or 0 1721 * @group: destination multicast group or 0 1722 * @report: 1 to report back, 0 to disable 1723 * @flags: allocation flags 1724 */ 1725 int nlmsg_notify(struct sock *sk, struct sk_buff *skb, u32 pid, 1726 unsigned int group, int report, gfp_t flags) 1727 { 1728 int err = 0; 1729 1730 if (group) { 1731 int exclude_pid = 0; 1732 1733 if (report) { 1734 atomic_inc(&skb->users); 1735 exclude_pid = pid; 1736 } 1737 1738 /* errors reported via destination sk->sk_err */ 1739 nlmsg_multicast(sk, skb, exclude_pid, group, flags); 1740 } 1741 1742 if (report) 1743 err = nlmsg_unicast(sk, skb, pid); 1744 1745 return err; 1746 } 1747 EXPORT_SYMBOL(nlmsg_notify); 1748 1749 #ifdef CONFIG_PROC_FS 1750 struct nl_seq_iter { 1751 struct seq_net_private p; 1752 int link; 1753 int hash_idx; 1754 }; 1755 1756 static struct sock *netlink_seq_socket_idx(struct seq_file *seq, loff_t pos) 1757 { 1758 struct nl_seq_iter *iter = seq->private; 1759 int i, j; 1760 struct sock *s; 1761 struct hlist_node *node; 1762 loff_t off = 0; 1763 1764 for (i = 0; i < MAX_LINKS; i++) { 1765 struct nl_pid_hash *hash = &nl_table[i].hash; 1766 1767 for (j = 0; j <= hash->mask; j++) { 1768 sk_for_each(s, node, &hash->table[j]) { 1769 if (sock_net(s) != seq_file_net(seq)) 1770 continue; 1771 if (off == pos) { 1772 iter->link = i; 1773 iter->hash_idx = j; 1774 return s; 1775 } 1776 ++off; 1777 } 1778 } 1779 } 1780 return NULL; 1781 } 1782 1783 static void *netlink_seq_start(struct seq_file *seq, loff_t *pos) 1784 __acquires(nl_table_lock) 1785 { 1786 read_lock(&nl_table_lock); 1787 return *pos ? netlink_seq_socket_idx(seq, *pos - 1) : SEQ_START_TOKEN; 1788 } 1789 1790 static void *netlink_seq_next(struct seq_file *seq, void *v, loff_t *pos) 1791 { 1792 struct sock *s; 1793 struct nl_seq_iter *iter; 1794 int i, j; 1795 1796 ++*pos; 1797 1798 if (v == SEQ_START_TOKEN) 1799 return netlink_seq_socket_idx(seq, 0); 1800 1801 iter = seq->private; 1802 s = v; 1803 do { 1804 s = sk_next(s); 1805 } while (s && sock_net(s) != seq_file_net(seq)); 1806 if (s) 1807 return s; 1808 1809 i = iter->link; 1810 j = iter->hash_idx + 1; 1811 1812 do { 1813 struct nl_pid_hash *hash = &nl_table[i].hash; 1814 1815 for (; j <= hash->mask; j++) { 1816 s = sk_head(&hash->table[j]); 1817 while (s && sock_net(s) != seq_file_net(seq)) 1818 s = sk_next(s); 1819 if (s) { 1820 iter->link = i; 1821 iter->hash_idx = j; 1822 return s; 1823 } 1824 } 1825 1826 j = 0; 1827 } while (++i < MAX_LINKS); 1828 1829 return NULL; 1830 } 1831 1832 static void netlink_seq_stop(struct seq_file *seq, void *v) 1833 __releases(nl_table_lock) 1834 { 1835 read_unlock(&nl_table_lock); 1836 } 1837 1838 1839 static int netlink_seq_show(struct seq_file *seq, void *v) 1840 { 1841 if (v == SEQ_START_TOKEN) 1842 seq_puts(seq, 1843 "sk Eth Pid Groups " 1844 "Rmem Wmem Dump Locks\n"); 1845 else { 1846 struct sock *s = v; 1847 struct netlink_sock *nlk = nlk_sk(s); 1848 1849 seq_printf(seq, "%p %-3d %-6d %08x %-8d %-8d %p %d\n", 1850 s, 1851 s->sk_protocol, 1852 nlk->pid, 1853 nlk->groups ? (u32)nlk->groups[0] : 0, 1854 atomic_read(&s->sk_rmem_alloc), 1855 atomic_read(&s->sk_wmem_alloc), 1856 nlk->cb, 1857 atomic_read(&s->sk_refcnt) 1858 ); 1859 1860 } 1861 return 0; 1862 } 1863 1864 static const struct seq_operations netlink_seq_ops = { 1865 .start = netlink_seq_start, 1866 .next = netlink_seq_next, 1867 .stop = netlink_seq_stop, 1868 .show = netlink_seq_show, 1869 }; 1870 1871 1872 static int netlink_seq_open(struct inode *inode, struct file *file) 1873 { 1874 return seq_open_net(inode, file, &netlink_seq_ops, 1875 sizeof(struct nl_seq_iter)); 1876 } 1877 1878 static const struct file_operations netlink_seq_fops = { 1879 .owner = THIS_MODULE, 1880 .open = netlink_seq_open, 1881 .read = seq_read, 1882 .llseek = seq_lseek, 1883 .release = seq_release_net, 1884 }; 1885 1886 #endif 1887 1888 int netlink_register_notifier(struct notifier_block *nb) 1889 { 1890 return atomic_notifier_chain_register(&netlink_chain, nb); 1891 } 1892 EXPORT_SYMBOL(netlink_register_notifier); 1893 1894 int netlink_unregister_notifier(struct notifier_block *nb) 1895 { 1896 return atomic_notifier_chain_unregister(&netlink_chain, nb); 1897 } 1898 EXPORT_SYMBOL(netlink_unregister_notifier); 1899 1900 static const struct proto_ops netlink_ops = { 1901 .family = PF_NETLINK, 1902 .owner = THIS_MODULE, 1903 .release = netlink_release, 1904 .bind = netlink_bind, 1905 .connect = netlink_connect, 1906 .socketpair = sock_no_socketpair, 1907 .accept = sock_no_accept, 1908 .getname = netlink_getname, 1909 .poll = datagram_poll, 1910 .ioctl = sock_no_ioctl, 1911 .listen = sock_no_listen, 1912 .shutdown = sock_no_shutdown, 1913 .setsockopt = netlink_setsockopt, 1914 .getsockopt = netlink_getsockopt, 1915 .sendmsg = netlink_sendmsg, 1916 .recvmsg = netlink_recvmsg, 1917 .mmap = sock_no_mmap, 1918 .sendpage = sock_no_sendpage, 1919 }; 1920 1921 static struct net_proto_family netlink_family_ops = { 1922 .family = PF_NETLINK, 1923 .create = netlink_create, 1924 .owner = THIS_MODULE, /* for consistency 8) */ 1925 }; 1926 1927 static int __net_init netlink_net_init(struct net *net) 1928 { 1929 #ifdef CONFIG_PROC_FS 1930 if (!proc_net_fops_create(net, "netlink", 0, &netlink_seq_fops)) 1931 return -ENOMEM; 1932 #endif 1933 return 0; 1934 } 1935 1936 static void __net_exit netlink_net_exit(struct net *net) 1937 { 1938 #ifdef CONFIG_PROC_FS 1939 proc_net_remove(net, "netlink"); 1940 #endif 1941 } 1942 1943 static struct pernet_operations __net_initdata netlink_net_ops = { 1944 .init = netlink_net_init, 1945 .exit = netlink_net_exit, 1946 }; 1947 1948 static int __init netlink_proto_init(void) 1949 { 1950 struct sk_buff *dummy_skb; 1951 int i; 1952 unsigned long limit; 1953 unsigned int order; 1954 int err = proto_register(&netlink_proto, 0); 1955 1956 if (err != 0) 1957 goto out; 1958 1959 BUILD_BUG_ON(sizeof(struct netlink_skb_parms) > sizeof(dummy_skb->cb)); 1960 1961 nl_table = kcalloc(MAX_LINKS, sizeof(*nl_table), GFP_KERNEL); 1962 if (!nl_table) 1963 goto panic; 1964 1965 if (num_physpages >= (128 * 1024)) 1966 limit = num_physpages >> (21 - PAGE_SHIFT); 1967 else 1968 limit = num_physpages >> (23 - PAGE_SHIFT); 1969 1970 order = get_bitmask_order(limit) - 1 + PAGE_SHIFT; 1971 limit = (1UL << order) / sizeof(struct hlist_head); 1972 order = get_bitmask_order(min(limit, (unsigned long)UINT_MAX)) - 1; 1973 1974 for (i = 0; i < MAX_LINKS; i++) { 1975 struct nl_pid_hash *hash = &nl_table[i].hash; 1976 1977 hash->table = nl_pid_hash_zalloc(1 * sizeof(*hash->table)); 1978 if (!hash->table) { 1979 while (i-- > 0) 1980 nl_pid_hash_free(nl_table[i].hash.table, 1981 1 * sizeof(*hash->table)); 1982 kfree(nl_table); 1983 goto panic; 1984 } 1985 hash->max_shift = order; 1986 hash->shift = 0; 1987 hash->mask = 0; 1988 hash->rehash_time = jiffies; 1989 } 1990 1991 sock_register(&netlink_family_ops); 1992 register_pernet_subsys(&netlink_net_ops); 1993 /* The netlink device handler may be needed early. */ 1994 rtnetlink_init(); 1995 out: 1996 return err; 1997 panic: 1998 panic("netlink_init: Cannot allocate nl_table\n"); 1999 } 2000 2001 core_initcall(netlink_proto_init); 2002