1 /* 2 * NETLINK Kernel-user communication protocol. 3 * 4 * Authors: Alan Cox <alan@lxorguk.ukuu.org.uk> 5 * Alexey Kuznetsov <kuznet@ms2.inr.ac.ru> 6 * 7 * This program is free software; you can redistribute it and/or 8 * modify it under the terms of the GNU General Public License 9 * as published by the Free Software Foundation; either version 10 * 2 of the License, or (at your option) any later version. 11 * 12 * Tue Jun 26 14:36:48 MEST 2001 Herbert "herp" Rosmanith 13 * added netlink_proto_exit 14 * Tue Jan 22 18:32:44 BRST 2002 Arnaldo C. de Melo <acme@conectiva.com.br> 15 * use nlk_sk, as sk->protinfo is on a diet 8) 16 * Fri Jul 22 19:51:12 MEST 2005 Harald Welte <laforge@gnumonks.org> 17 * - inc module use count of module that owns 18 * the kernel socket in case userspace opens 19 * socket of same protocol 20 * - remove all module support, since netlink is 21 * mandatory if CONFIG_NET=y these days 22 */ 23 24 #include <linux/module.h> 25 26 #include <linux/capability.h> 27 #include <linux/kernel.h> 28 #include <linux/init.h> 29 #include <linux/signal.h> 30 #include <linux/sched.h> 31 #include <linux/errno.h> 32 #include <linux/string.h> 33 #include <linux/stat.h> 34 #include <linux/socket.h> 35 #include <linux/un.h> 36 #include <linux/fcntl.h> 37 #include <linux/termios.h> 38 #include <linux/sockios.h> 39 #include <linux/net.h> 40 #include <linux/fs.h> 41 #include <linux/slab.h> 42 #include <asm/uaccess.h> 43 #include <linux/skbuff.h> 44 #include <linux/netdevice.h> 45 #include <linux/rtnetlink.h> 46 #include <linux/proc_fs.h> 47 #include <linux/seq_file.h> 48 #include <linux/notifier.h> 49 #include <linux/security.h> 50 #include <linux/jhash.h> 51 #include <linux/jiffies.h> 52 #include <linux/random.h> 53 #include <linux/bitops.h> 54 #include <linux/mm.h> 55 #include <linux/types.h> 56 #include <linux/audit.h> 57 #include <linux/mutex.h> 58 59 #include <net/net_namespace.h> 60 #include <net/sock.h> 61 #include <net/scm.h> 62 #include <net/netlink.h> 63 64 #define NLGRPSZ(x) (ALIGN(x, sizeof(unsigned long) * 8) / 8) 65 #define NLGRPLONGS(x) (NLGRPSZ(x)/sizeof(unsigned long)) 66 67 struct netlink_sock { 68 /* struct sock has to be the first member of netlink_sock */ 69 struct sock sk; 70 u32 portid; 71 u32 dst_portid; 72 u32 dst_group; 73 u32 flags; 74 u32 subscriptions; 75 u32 ngroups; 76 unsigned long *groups; 77 unsigned long state; 78 wait_queue_head_t wait; 79 struct netlink_callback *cb; 80 struct mutex *cb_mutex; 81 struct mutex cb_def_mutex; 82 void (*netlink_rcv)(struct sk_buff *skb); 83 void (*netlink_bind)(int group); 84 struct module *module; 85 }; 86 87 struct listeners { 88 struct rcu_head rcu; 89 unsigned long masks[0]; 90 }; 91 92 #define NETLINK_KERNEL_SOCKET 0x1 93 #define NETLINK_RECV_PKTINFO 0x2 94 #define NETLINK_BROADCAST_SEND_ERROR 0x4 95 #define NETLINK_RECV_NO_ENOBUFS 0x8 96 97 static inline struct netlink_sock *nlk_sk(struct sock *sk) 98 { 99 return container_of(sk, struct netlink_sock, sk); 100 } 101 102 static inline int netlink_is_kernel(struct sock *sk) 103 { 104 return nlk_sk(sk)->flags & NETLINK_KERNEL_SOCKET; 105 } 106 107 struct nl_portid_hash { 108 struct hlist_head *table; 109 unsigned long rehash_time; 110 111 unsigned int mask; 112 unsigned int shift; 113 114 unsigned int entries; 115 unsigned int max_shift; 116 117 u32 rnd; 118 }; 119 120 struct netlink_table { 121 struct nl_portid_hash hash; 122 struct hlist_head mc_list; 123 struct listeners __rcu *listeners; 124 unsigned int flags; 125 unsigned int groups; 126 struct mutex *cb_mutex; 127 struct module *module; 128 void (*bind)(int group); 129 int registered; 130 }; 131 132 static struct netlink_table *nl_table; 133 134 static DECLARE_WAIT_QUEUE_HEAD(nl_table_wait); 135 136 static int netlink_dump(struct sock *sk); 137 138 static DEFINE_RWLOCK(nl_table_lock); 139 static atomic_t nl_table_users = ATOMIC_INIT(0); 140 141 #define nl_deref_protected(X) rcu_dereference_protected(X, lockdep_is_held(&nl_table_lock)); 142 143 static ATOMIC_NOTIFIER_HEAD(netlink_chain); 144 145 static inline u32 netlink_group_mask(u32 group) 146 { 147 return group ? 1 << (group - 1) : 0; 148 } 149 150 static inline struct hlist_head *nl_portid_hashfn(struct nl_portid_hash *hash, u32 portid) 151 { 152 return &hash->table[jhash_1word(portid, hash->rnd) & hash->mask]; 153 } 154 155 static void netlink_destroy_callback(struct netlink_callback *cb) 156 { 157 kfree_skb(cb->skb); 158 kfree(cb); 159 } 160 161 static void netlink_consume_callback(struct netlink_callback *cb) 162 { 163 consume_skb(cb->skb); 164 kfree(cb); 165 } 166 167 static void netlink_sock_destruct(struct sock *sk) 168 { 169 struct netlink_sock *nlk = nlk_sk(sk); 170 171 if (nlk->cb) { 172 if (nlk->cb->done) 173 nlk->cb->done(nlk->cb); 174 175 module_put(nlk->cb->module); 176 netlink_destroy_callback(nlk->cb); 177 } 178 179 skb_queue_purge(&sk->sk_receive_queue); 180 181 if (!sock_flag(sk, SOCK_DEAD)) { 182 printk(KERN_ERR "Freeing alive netlink socket %p\n", sk); 183 return; 184 } 185 186 WARN_ON(atomic_read(&sk->sk_rmem_alloc)); 187 WARN_ON(atomic_read(&sk->sk_wmem_alloc)); 188 WARN_ON(nlk_sk(sk)->groups); 189 } 190 191 /* This lock without WQ_FLAG_EXCLUSIVE is good on UP and it is _very_ bad on 192 * SMP. Look, when several writers sleep and reader wakes them up, all but one 193 * immediately hit write lock and grab all the cpus. Exclusive sleep solves 194 * this, _but_ remember, it adds useless work on UP machines. 195 */ 196 197 void netlink_table_grab(void) 198 __acquires(nl_table_lock) 199 { 200 might_sleep(); 201 202 write_lock_irq(&nl_table_lock); 203 204 if (atomic_read(&nl_table_users)) { 205 DECLARE_WAITQUEUE(wait, current); 206 207 add_wait_queue_exclusive(&nl_table_wait, &wait); 208 for (;;) { 209 set_current_state(TASK_UNINTERRUPTIBLE); 210 if (atomic_read(&nl_table_users) == 0) 211 break; 212 write_unlock_irq(&nl_table_lock); 213 schedule(); 214 write_lock_irq(&nl_table_lock); 215 } 216 217 __set_current_state(TASK_RUNNING); 218 remove_wait_queue(&nl_table_wait, &wait); 219 } 220 } 221 222 void netlink_table_ungrab(void) 223 __releases(nl_table_lock) 224 { 225 write_unlock_irq(&nl_table_lock); 226 wake_up(&nl_table_wait); 227 } 228 229 static inline void 230 netlink_lock_table(void) 231 { 232 /* read_lock() synchronizes us to netlink_table_grab */ 233 234 read_lock(&nl_table_lock); 235 atomic_inc(&nl_table_users); 236 read_unlock(&nl_table_lock); 237 } 238 239 static inline void 240 netlink_unlock_table(void) 241 { 242 if (atomic_dec_and_test(&nl_table_users)) 243 wake_up(&nl_table_wait); 244 } 245 246 static struct sock *netlink_lookup(struct net *net, int protocol, u32 portid) 247 { 248 struct nl_portid_hash *hash = &nl_table[protocol].hash; 249 struct hlist_head *head; 250 struct sock *sk; 251 252 read_lock(&nl_table_lock); 253 head = nl_portid_hashfn(hash, portid); 254 sk_for_each(sk, head) { 255 if (net_eq(sock_net(sk), net) && (nlk_sk(sk)->portid == portid)) { 256 sock_hold(sk); 257 goto found; 258 } 259 } 260 sk = NULL; 261 found: 262 read_unlock(&nl_table_lock); 263 return sk; 264 } 265 266 static struct hlist_head *nl_portid_hash_zalloc(size_t size) 267 { 268 if (size <= PAGE_SIZE) 269 return kzalloc(size, GFP_ATOMIC); 270 else 271 return (struct hlist_head *) 272 __get_free_pages(GFP_ATOMIC | __GFP_ZERO, 273 get_order(size)); 274 } 275 276 static void nl_portid_hash_free(struct hlist_head *table, size_t size) 277 { 278 if (size <= PAGE_SIZE) 279 kfree(table); 280 else 281 free_pages((unsigned long)table, get_order(size)); 282 } 283 284 static int nl_portid_hash_rehash(struct nl_portid_hash *hash, int grow) 285 { 286 unsigned int omask, mask, shift; 287 size_t osize, size; 288 struct hlist_head *otable, *table; 289 int i; 290 291 omask = mask = hash->mask; 292 osize = size = (mask + 1) * sizeof(*table); 293 shift = hash->shift; 294 295 if (grow) { 296 if (++shift > hash->max_shift) 297 return 0; 298 mask = mask * 2 + 1; 299 size *= 2; 300 } 301 302 table = nl_portid_hash_zalloc(size); 303 if (!table) 304 return 0; 305 306 otable = hash->table; 307 hash->table = table; 308 hash->mask = mask; 309 hash->shift = shift; 310 get_random_bytes(&hash->rnd, sizeof(hash->rnd)); 311 312 for (i = 0; i <= omask; i++) { 313 struct sock *sk; 314 struct hlist_node *tmp; 315 316 sk_for_each_safe(sk, tmp, &otable[i]) 317 __sk_add_node(sk, nl_portid_hashfn(hash, nlk_sk(sk)->portid)); 318 } 319 320 nl_portid_hash_free(otable, osize); 321 hash->rehash_time = jiffies + 10 * 60 * HZ; 322 return 1; 323 } 324 325 static inline int nl_portid_hash_dilute(struct nl_portid_hash *hash, int len) 326 { 327 int avg = hash->entries >> hash->shift; 328 329 if (unlikely(avg > 1) && nl_portid_hash_rehash(hash, 1)) 330 return 1; 331 332 if (unlikely(len > avg) && time_after(jiffies, hash->rehash_time)) { 333 nl_portid_hash_rehash(hash, 0); 334 return 1; 335 } 336 337 return 0; 338 } 339 340 static const struct proto_ops netlink_ops; 341 342 static void 343 netlink_update_listeners(struct sock *sk) 344 { 345 struct netlink_table *tbl = &nl_table[sk->sk_protocol]; 346 unsigned long mask; 347 unsigned int i; 348 struct listeners *listeners; 349 350 listeners = nl_deref_protected(tbl->listeners); 351 if (!listeners) 352 return; 353 354 for (i = 0; i < NLGRPLONGS(tbl->groups); i++) { 355 mask = 0; 356 sk_for_each_bound(sk, &tbl->mc_list) { 357 if (i < NLGRPLONGS(nlk_sk(sk)->ngroups)) 358 mask |= nlk_sk(sk)->groups[i]; 359 } 360 listeners->masks[i] = mask; 361 } 362 /* this function is only called with the netlink table "grabbed", which 363 * makes sure updates are visible before bind or setsockopt return. */ 364 } 365 366 static int netlink_insert(struct sock *sk, struct net *net, u32 portid) 367 { 368 struct nl_portid_hash *hash = &nl_table[sk->sk_protocol].hash; 369 struct hlist_head *head; 370 int err = -EADDRINUSE; 371 struct sock *osk; 372 int len; 373 374 netlink_table_grab(); 375 head = nl_portid_hashfn(hash, portid); 376 len = 0; 377 sk_for_each(osk, head) { 378 if (net_eq(sock_net(osk), net) && (nlk_sk(osk)->portid == portid)) 379 break; 380 len++; 381 } 382 if (osk) 383 goto err; 384 385 err = -EBUSY; 386 if (nlk_sk(sk)->portid) 387 goto err; 388 389 err = -ENOMEM; 390 if (BITS_PER_LONG > 32 && unlikely(hash->entries >= UINT_MAX)) 391 goto err; 392 393 if (len && nl_portid_hash_dilute(hash, len)) 394 head = nl_portid_hashfn(hash, portid); 395 hash->entries++; 396 nlk_sk(sk)->portid = portid; 397 sk_add_node(sk, head); 398 err = 0; 399 400 err: 401 netlink_table_ungrab(); 402 return err; 403 } 404 405 static void netlink_remove(struct sock *sk) 406 { 407 netlink_table_grab(); 408 if (sk_del_node_init(sk)) 409 nl_table[sk->sk_protocol].hash.entries--; 410 if (nlk_sk(sk)->subscriptions) 411 __sk_del_bind_node(sk); 412 netlink_table_ungrab(); 413 } 414 415 static struct proto netlink_proto = { 416 .name = "NETLINK", 417 .owner = THIS_MODULE, 418 .obj_size = sizeof(struct netlink_sock), 419 }; 420 421 static int __netlink_create(struct net *net, struct socket *sock, 422 struct mutex *cb_mutex, int protocol) 423 { 424 struct sock *sk; 425 struct netlink_sock *nlk; 426 427 sock->ops = &netlink_ops; 428 429 sk = sk_alloc(net, PF_NETLINK, GFP_KERNEL, &netlink_proto); 430 if (!sk) 431 return -ENOMEM; 432 433 sock_init_data(sock, sk); 434 435 nlk = nlk_sk(sk); 436 if (cb_mutex) { 437 nlk->cb_mutex = cb_mutex; 438 } else { 439 nlk->cb_mutex = &nlk->cb_def_mutex; 440 mutex_init(nlk->cb_mutex); 441 } 442 init_waitqueue_head(&nlk->wait); 443 444 sk->sk_destruct = netlink_sock_destruct; 445 sk->sk_protocol = protocol; 446 return 0; 447 } 448 449 static int netlink_create(struct net *net, struct socket *sock, int protocol, 450 int kern) 451 { 452 struct module *module = NULL; 453 struct mutex *cb_mutex; 454 struct netlink_sock *nlk; 455 void (*bind)(int group); 456 int err = 0; 457 458 sock->state = SS_UNCONNECTED; 459 460 if (sock->type != SOCK_RAW && sock->type != SOCK_DGRAM) 461 return -ESOCKTNOSUPPORT; 462 463 if (protocol < 0 || protocol >= MAX_LINKS) 464 return -EPROTONOSUPPORT; 465 466 netlink_lock_table(); 467 #ifdef CONFIG_MODULES 468 if (!nl_table[protocol].registered) { 469 netlink_unlock_table(); 470 request_module("net-pf-%d-proto-%d", PF_NETLINK, protocol); 471 netlink_lock_table(); 472 } 473 #endif 474 if (nl_table[protocol].registered && 475 try_module_get(nl_table[protocol].module)) 476 module = nl_table[protocol].module; 477 else 478 err = -EPROTONOSUPPORT; 479 cb_mutex = nl_table[protocol].cb_mutex; 480 bind = nl_table[protocol].bind; 481 netlink_unlock_table(); 482 483 if (err < 0) 484 goto out; 485 486 err = __netlink_create(net, sock, cb_mutex, protocol); 487 if (err < 0) 488 goto out_module; 489 490 local_bh_disable(); 491 sock_prot_inuse_add(net, &netlink_proto, 1); 492 local_bh_enable(); 493 494 nlk = nlk_sk(sock->sk); 495 nlk->module = module; 496 nlk->netlink_bind = bind; 497 out: 498 return err; 499 500 out_module: 501 module_put(module); 502 goto out; 503 } 504 505 static int netlink_release(struct socket *sock) 506 { 507 struct sock *sk = sock->sk; 508 struct netlink_sock *nlk; 509 510 if (!sk) 511 return 0; 512 513 netlink_remove(sk); 514 sock_orphan(sk); 515 nlk = nlk_sk(sk); 516 517 /* 518 * OK. Socket is unlinked, any packets that arrive now 519 * will be purged. 520 */ 521 522 sock->sk = NULL; 523 wake_up_interruptible_all(&nlk->wait); 524 525 skb_queue_purge(&sk->sk_write_queue); 526 527 if (nlk->portid) { 528 struct netlink_notify n = { 529 .net = sock_net(sk), 530 .protocol = sk->sk_protocol, 531 .portid = nlk->portid, 532 }; 533 atomic_notifier_call_chain(&netlink_chain, 534 NETLINK_URELEASE, &n); 535 } 536 537 module_put(nlk->module); 538 539 netlink_table_grab(); 540 if (netlink_is_kernel(sk)) { 541 BUG_ON(nl_table[sk->sk_protocol].registered == 0); 542 if (--nl_table[sk->sk_protocol].registered == 0) { 543 struct listeners *old; 544 545 old = nl_deref_protected(nl_table[sk->sk_protocol].listeners); 546 RCU_INIT_POINTER(nl_table[sk->sk_protocol].listeners, NULL); 547 kfree_rcu(old, rcu); 548 nl_table[sk->sk_protocol].module = NULL; 549 nl_table[sk->sk_protocol].bind = NULL; 550 nl_table[sk->sk_protocol].flags = 0; 551 nl_table[sk->sk_protocol].registered = 0; 552 } 553 } else if (nlk->subscriptions) { 554 netlink_update_listeners(sk); 555 } 556 netlink_table_ungrab(); 557 558 kfree(nlk->groups); 559 nlk->groups = NULL; 560 561 local_bh_disable(); 562 sock_prot_inuse_add(sock_net(sk), &netlink_proto, -1); 563 local_bh_enable(); 564 sock_put(sk); 565 return 0; 566 } 567 568 static int netlink_autobind(struct socket *sock) 569 { 570 struct sock *sk = sock->sk; 571 struct net *net = sock_net(sk); 572 struct nl_portid_hash *hash = &nl_table[sk->sk_protocol].hash; 573 struct hlist_head *head; 574 struct sock *osk; 575 s32 portid = task_tgid_vnr(current); 576 int err; 577 static s32 rover = -4097; 578 579 retry: 580 cond_resched(); 581 netlink_table_grab(); 582 head = nl_portid_hashfn(hash, portid); 583 sk_for_each(osk, head) { 584 if (!net_eq(sock_net(osk), net)) 585 continue; 586 if (nlk_sk(osk)->portid == portid) { 587 /* Bind collision, search negative portid values. */ 588 portid = rover--; 589 if (rover > -4097) 590 rover = -4097; 591 netlink_table_ungrab(); 592 goto retry; 593 } 594 } 595 netlink_table_ungrab(); 596 597 err = netlink_insert(sk, net, portid); 598 if (err == -EADDRINUSE) 599 goto retry; 600 601 /* If 2 threads race to autobind, that is fine. */ 602 if (err == -EBUSY) 603 err = 0; 604 605 return err; 606 } 607 608 static inline int netlink_capable(const struct socket *sock, unsigned int flag) 609 { 610 return (nl_table[sock->sk->sk_protocol].flags & flag) || 611 ns_capable(sock_net(sock->sk)->user_ns, CAP_NET_ADMIN); 612 } 613 614 static void 615 netlink_update_subscriptions(struct sock *sk, unsigned int subscriptions) 616 { 617 struct netlink_sock *nlk = nlk_sk(sk); 618 619 if (nlk->subscriptions && !subscriptions) 620 __sk_del_bind_node(sk); 621 else if (!nlk->subscriptions && subscriptions) 622 sk_add_bind_node(sk, &nl_table[sk->sk_protocol].mc_list); 623 nlk->subscriptions = subscriptions; 624 } 625 626 static int netlink_realloc_groups(struct sock *sk) 627 { 628 struct netlink_sock *nlk = nlk_sk(sk); 629 unsigned int groups; 630 unsigned long *new_groups; 631 int err = 0; 632 633 netlink_table_grab(); 634 635 groups = nl_table[sk->sk_protocol].groups; 636 if (!nl_table[sk->sk_protocol].registered) { 637 err = -ENOENT; 638 goto out_unlock; 639 } 640 641 if (nlk->ngroups >= groups) 642 goto out_unlock; 643 644 new_groups = krealloc(nlk->groups, NLGRPSZ(groups), GFP_ATOMIC); 645 if (new_groups == NULL) { 646 err = -ENOMEM; 647 goto out_unlock; 648 } 649 memset((char *)new_groups + NLGRPSZ(nlk->ngroups), 0, 650 NLGRPSZ(groups) - NLGRPSZ(nlk->ngroups)); 651 652 nlk->groups = new_groups; 653 nlk->ngroups = groups; 654 out_unlock: 655 netlink_table_ungrab(); 656 return err; 657 } 658 659 static int netlink_bind(struct socket *sock, struct sockaddr *addr, 660 int addr_len) 661 { 662 struct sock *sk = sock->sk; 663 struct net *net = sock_net(sk); 664 struct netlink_sock *nlk = nlk_sk(sk); 665 struct sockaddr_nl *nladdr = (struct sockaddr_nl *)addr; 666 int err; 667 668 if (addr_len < sizeof(struct sockaddr_nl)) 669 return -EINVAL; 670 671 if (nladdr->nl_family != AF_NETLINK) 672 return -EINVAL; 673 674 /* Only superuser is allowed to listen multicasts */ 675 if (nladdr->nl_groups) { 676 if (!netlink_capable(sock, NL_CFG_F_NONROOT_RECV)) 677 return -EPERM; 678 err = netlink_realloc_groups(sk); 679 if (err) 680 return err; 681 } 682 683 if (nlk->portid) { 684 if (nladdr->nl_pid != nlk->portid) 685 return -EINVAL; 686 } else { 687 err = nladdr->nl_pid ? 688 netlink_insert(sk, net, nladdr->nl_pid) : 689 netlink_autobind(sock); 690 if (err) 691 return err; 692 } 693 694 if (!nladdr->nl_groups && (nlk->groups == NULL || !(u32)nlk->groups[0])) 695 return 0; 696 697 netlink_table_grab(); 698 netlink_update_subscriptions(sk, nlk->subscriptions + 699 hweight32(nladdr->nl_groups) - 700 hweight32(nlk->groups[0])); 701 nlk->groups[0] = (nlk->groups[0] & ~0xffffffffUL) | nladdr->nl_groups; 702 netlink_update_listeners(sk); 703 netlink_table_ungrab(); 704 705 if (nlk->netlink_bind && nlk->groups[0]) { 706 int i; 707 708 for (i=0; i<nlk->ngroups; i++) { 709 if (test_bit(i, nlk->groups)) 710 nlk->netlink_bind(i); 711 } 712 } 713 714 return 0; 715 } 716 717 static int netlink_connect(struct socket *sock, struct sockaddr *addr, 718 int alen, int flags) 719 { 720 int err = 0; 721 struct sock *sk = sock->sk; 722 struct netlink_sock *nlk = nlk_sk(sk); 723 struct sockaddr_nl *nladdr = (struct sockaddr_nl *)addr; 724 725 if (alen < sizeof(addr->sa_family)) 726 return -EINVAL; 727 728 if (addr->sa_family == AF_UNSPEC) { 729 sk->sk_state = NETLINK_UNCONNECTED; 730 nlk->dst_portid = 0; 731 nlk->dst_group = 0; 732 return 0; 733 } 734 if (addr->sa_family != AF_NETLINK) 735 return -EINVAL; 736 737 /* Only superuser is allowed to send multicasts */ 738 if (nladdr->nl_groups && !netlink_capable(sock, NL_CFG_F_NONROOT_SEND)) 739 return -EPERM; 740 741 if (!nlk->portid) 742 err = netlink_autobind(sock); 743 744 if (err == 0) { 745 sk->sk_state = NETLINK_CONNECTED; 746 nlk->dst_portid = nladdr->nl_pid; 747 nlk->dst_group = ffs(nladdr->nl_groups); 748 } 749 750 return err; 751 } 752 753 static int netlink_getname(struct socket *sock, struct sockaddr *addr, 754 int *addr_len, int peer) 755 { 756 struct sock *sk = sock->sk; 757 struct netlink_sock *nlk = nlk_sk(sk); 758 DECLARE_SOCKADDR(struct sockaddr_nl *, nladdr, addr); 759 760 nladdr->nl_family = AF_NETLINK; 761 nladdr->nl_pad = 0; 762 *addr_len = sizeof(*nladdr); 763 764 if (peer) { 765 nladdr->nl_pid = nlk->dst_portid; 766 nladdr->nl_groups = netlink_group_mask(nlk->dst_group); 767 } else { 768 nladdr->nl_pid = nlk->portid; 769 nladdr->nl_groups = nlk->groups ? nlk->groups[0] : 0; 770 } 771 return 0; 772 } 773 774 static void netlink_overrun(struct sock *sk) 775 { 776 struct netlink_sock *nlk = nlk_sk(sk); 777 778 if (!(nlk->flags & NETLINK_RECV_NO_ENOBUFS)) { 779 if (!test_and_set_bit(0, &nlk_sk(sk)->state)) { 780 sk->sk_err = ENOBUFS; 781 sk->sk_error_report(sk); 782 } 783 } 784 atomic_inc(&sk->sk_drops); 785 } 786 787 static struct sock *netlink_getsockbyportid(struct sock *ssk, u32 portid) 788 { 789 struct sock *sock; 790 struct netlink_sock *nlk; 791 792 sock = netlink_lookup(sock_net(ssk), ssk->sk_protocol, portid); 793 if (!sock) 794 return ERR_PTR(-ECONNREFUSED); 795 796 /* Don't bother queuing skb if kernel socket has no input function */ 797 nlk = nlk_sk(sock); 798 if (sock->sk_state == NETLINK_CONNECTED && 799 nlk->dst_portid != nlk_sk(ssk)->portid) { 800 sock_put(sock); 801 return ERR_PTR(-ECONNREFUSED); 802 } 803 return sock; 804 } 805 806 struct sock *netlink_getsockbyfilp(struct file *filp) 807 { 808 struct inode *inode = file_inode(filp); 809 struct sock *sock; 810 811 if (!S_ISSOCK(inode->i_mode)) 812 return ERR_PTR(-ENOTSOCK); 813 814 sock = SOCKET_I(inode)->sk; 815 if (sock->sk_family != AF_NETLINK) 816 return ERR_PTR(-EINVAL); 817 818 sock_hold(sock); 819 return sock; 820 } 821 822 /* 823 * Attach a skb to a netlink socket. 824 * The caller must hold a reference to the destination socket. On error, the 825 * reference is dropped. The skb is not send to the destination, just all 826 * all error checks are performed and memory in the queue is reserved. 827 * Return values: 828 * < 0: error. skb freed, reference to sock dropped. 829 * 0: continue 830 * 1: repeat lookup - reference dropped while waiting for socket memory. 831 */ 832 int netlink_attachskb(struct sock *sk, struct sk_buff *skb, 833 long *timeo, struct sock *ssk) 834 { 835 struct netlink_sock *nlk; 836 837 nlk = nlk_sk(sk); 838 839 if (atomic_read(&sk->sk_rmem_alloc) > sk->sk_rcvbuf || 840 test_bit(0, &nlk->state)) { 841 DECLARE_WAITQUEUE(wait, current); 842 if (!*timeo) { 843 if (!ssk || netlink_is_kernel(ssk)) 844 netlink_overrun(sk); 845 sock_put(sk); 846 kfree_skb(skb); 847 return -EAGAIN; 848 } 849 850 __set_current_state(TASK_INTERRUPTIBLE); 851 add_wait_queue(&nlk->wait, &wait); 852 853 if ((atomic_read(&sk->sk_rmem_alloc) > sk->sk_rcvbuf || 854 test_bit(0, &nlk->state)) && 855 !sock_flag(sk, SOCK_DEAD)) 856 *timeo = schedule_timeout(*timeo); 857 858 __set_current_state(TASK_RUNNING); 859 remove_wait_queue(&nlk->wait, &wait); 860 sock_put(sk); 861 862 if (signal_pending(current)) { 863 kfree_skb(skb); 864 return sock_intr_errno(*timeo); 865 } 866 return 1; 867 } 868 skb_set_owner_r(skb, sk); 869 return 0; 870 } 871 872 static int __netlink_sendskb(struct sock *sk, struct sk_buff *skb) 873 { 874 int len = skb->len; 875 876 skb_queue_tail(&sk->sk_receive_queue, skb); 877 sk->sk_data_ready(sk, len); 878 return len; 879 } 880 881 int netlink_sendskb(struct sock *sk, struct sk_buff *skb) 882 { 883 int len = __netlink_sendskb(sk, skb); 884 885 sock_put(sk); 886 return len; 887 } 888 889 void netlink_detachskb(struct sock *sk, struct sk_buff *skb) 890 { 891 kfree_skb(skb); 892 sock_put(sk); 893 } 894 895 static struct sk_buff *netlink_trim(struct sk_buff *skb, gfp_t allocation) 896 { 897 int delta; 898 899 skb_orphan(skb); 900 901 delta = skb->end - skb->tail; 902 if (delta * 2 < skb->truesize) 903 return skb; 904 905 if (skb_shared(skb)) { 906 struct sk_buff *nskb = skb_clone(skb, allocation); 907 if (!nskb) 908 return skb; 909 consume_skb(skb); 910 skb = nskb; 911 } 912 913 if (!pskb_expand_head(skb, 0, -delta, allocation)) 914 skb->truesize -= delta; 915 916 return skb; 917 } 918 919 static void netlink_rcv_wake(struct sock *sk) 920 { 921 struct netlink_sock *nlk = nlk_sk(sk); 922 923 if (skb_queue_empty(&sk->sk_receive_queue)) 924 clear_bit(0, &nlk->state); 925 if (!test_bit(0, &nlk->state)) 926 wake_up_interruptible(&nlk->wait); 927 } 928 929 static int netlink_unicast_kernel(struct sock *sk, struct sk_buff *skb, 930 struct sock *ssk) 931 { 932 int ret; 933 struct netlink_sock *nlk = nlk_sk(sk); 934 935 ret = -ECONNREFUSED; 936 if (nlk->netlink_rcv != NULL) { 937 ret = skb->len; 938 skb_set_owner_r(skb, sk); 939 NETLINK_CB(skb).ssk = ssk; 940 nlk->netlink_rcv(skb); 941 consume_skb(skb); 942 } else { 943 kfree_skb(skb); 944 } 945 sock_put(sk); 946 return ret; 947 } 948 949 int netlink_unicast(struct sock *ssk, struct sk_buff *skb, 950 u32 portid, int nonblock) 951 { 952 struct sock *sk; 953 int err; 954 long timeo; 955 956 skb = netlink_trim(skb, gfp_any()); 957 958 timeo = sock_sndtimeo(ssk, nonblock); 959 retry: 960 sk = netlink_getsockbyportid(ssk, portid); 961 if (IS_ERR(sk)) { 962 kfree_skb(skb); 963 return PTR_ERR(sk); 964 } 965 if (netlink_is_kernel(sk)) 966 return netlink_unicast_kernel(sk, skb, ssk); 967 968 if (sk_filter(sk, skb)) { 969 err = skb->len; 970 kfree_skb(skb); 971 sock_put(sk); 972 return err; 973 } 974 975 err = netlink_attachskb(sk, skb, &timeo, ssk); 976 if (err == 1) 977 goto retry; 978 if (err) 979 return err; 980 981 return netlink_sendskb(sk, skb); 982 } 983 EXPORT_SYMBOL(netlink_unicast); 984 985 int netlink_has_listeners(struct sock *sk, unsigned int group) 986 { 987 int res = 0; 988 struct listeners *listeners; 989 990 BUG_ON(!netlink_is_kernel(sk)); 991 992 rcu_read_lock(); 993 listeners = rcu_dereference(nl_table[sk->sk_protocol].listeners); 994 995 if (listeners && group - 1 < nl_table[sk->sk_protocol].groups) 996 res = test_bit(group - 1, listeners->masks); 997 998 rcu_read_unlock(); 999 1000 return res; 1001 } 1002 EXPORT_SYMBOL_GPL(netlink_has_listeners); 1003 1004 static int netlink_broadcast_deliver(struct sock *sk, struct sk_buff *skb) 1005 { 1006 struct netlink_sock *nlk = nlk_sk(sk); 1007 1008 if (atomic_read(&sk->sk_rmem_alloc) <= sk->sk_rcvbuf && 1009 !test_bit(0, &nlk->state)) { 1010 skb_set_owner_r(skb, sk); 1011 __netlink_sendskb(sk, skb); 1012 return atomic_read(&sk->sk_rmem_alloc) > (sk->sk_rcvbuf >> 1); 1013 } 1014 return -1; 1015 } 1016 1017 struct netlink_broadcast_data { 1018 struct sock *exclude_sk; 1019 struct net *net; 1020 u32 portid; 1021 u32 group; 1022 int failure; 1023 int delivery_failure; 1024 int congested; 1025 int delivered; 1026 gfp_t allocation; 1027 struct sk_buff *skb, *skb2; 1028 int (*tx_filter)(struct sock *dsk, struct sk_buff *skb, void *data); 1029 void *tx_data; 1030 }; 1031 1032 static int do_one_broadcast(struct sock *sk, 1033 struct netlink_broadcast_data *p) 1034 { 1035 struct netlink_sock *nlk = nlk_sk(sk); 1036 int val; 1037 1038 if (p->exclude_sk == sk) 1039 goto out; 1040 1041 if (nlk->portid == p->portid || p->group - 1 >= nlk->ngroups || 1042 !test_bit(p->group - 1, nlk->groups)) 1043 goto out; 1044 1045 if (!net_eq(sock_net(sk), p->net)) 1046 goto out; 1047 1048 if (p->failure) { 1049 netlink_overrun(sk); 1050 goto out; 1051 } 1052 1053 sock_hold(sk); 1054 if (p->skb2 == NULL) { 1055 if (skb_shared(p->skb)) { 1056 p->skb2 = skb_clone(p->skb, p->allocation); 1057 } else { 1058 p->skb2 = skb_get(p->skb); 1059 /* 1060 * skb ownership may have been set when 1061 * delivered to a previous socket. 1062 */ 1063 skb_orphan(p->skb2); 1064 } 1065 } 1066 if (p->skb2 == NULL) { 1067 netlink_overrun(sk); 1068 /* Clone failed. Notify ALL listeners. */ 1069 p->failure = 1; 1070 if (nlk->flags & NETLINK_BROADCAST_SEND_ERROR) 1071 p->delivery_failure = 1; 1072 } else if (p->tx_filter && p->tx_filter(sk, p->skb2, p->tx_data)) { 1073 kfree_skb(p->skb2); 1074 p->skb2 = NULL; 1075 } else if (sk_filter(sk, p->skb2)) { 1076 kfree_skb(p->skb2); 1077 p->skb2 = NULL; 1078 } else if ((val = netlink_broadcast_deliver(sk, p->skb2)) < 0) { 1079 netlink_overrun(sk); 1080 if (nlk->flags & NETLINK_BROADCAST_SEND_ERROR) 1081 p->delivery_failure = 1; 1082 } else { 1083 p->congested |= val; 1084 p->delivered = 1; 1085 p->skb2 = NULL; 1086 } 1087 sock_put(sk); 1088 1089 out: 1090 return 0; 1091 } 1092 1093 int netlink_broadcast_filtered(struct sock *ssk, struct sk_buff *skb, u32 portid, 1094 u32 group, gfp_t allocation, 1095 int (*filter)(struct sock *dsk, struct sk_buff *skb, void *data), 1096 void *filter_data) 1097 { 1098 struct net *net = sock_net(ssk); 1099 struct netlink_broadcast_data info; 1100 struct sock *sk; 1101 1102 skb = netlink_trim(skb, allocation); 1103 1104 info.exclude_sk = ssk; 1105 info.net = net; 1106 info.portid = portid; 1107 info.group = group; 1108 info.failure = 0; 1109 info.delivery_failure = 0; 1110 info.congested = 0; 1111 info.delivered = 0; 1112 info.allocation = allocation; 1113 info.skb = skb; 1114 info.skb2 = NULL; 1115 info.tx_filter = filter; 1116 info.tx_data = filter_data; 1117 1118 /* While we sleep in clone, do not allow to change socket list */ 1119 1120 netlink_lock_table(); 1121 1122 sk_for_each_bound(sk, &nl_table[ssk->sk_protocol].mc_list) 1123 do_one_broadcast(sk, &info); 1124 1125 consume_skb(skb); 1126 1127 netlink_unlock_table(); 1128 1129 if (info.delivery_failure) { 1130 kfree_skb(info.skb2); 1131 return -ENOBUFS; 1132 } 1133 consume_skb(info.skb2); 1134 1135 if (info.delivered) { 1136 if (info.congested && (allocation & __GFP_WAIT)) 1137 yield(); 1138 return 0; 1139 } 1140 return -ESRCH; 1141 } 1142 EXPORT_SYMBOL(netlink_broadcast_filtered); 1143 1144 int netlink_broadcast(struct sock *ssk, struct sk_buff *skb, u32 portid, 1145 u32 group, gfp_t allocation) 1146 { 1147 return netlink_broadcast_filtered(ssk, skb, portid, group, allocation, 1148 NULL, NULL); 1149 } 1150 EXPORT_SYMBOL(netlink_broadcast); 1151 1152 struct netlink_set_err_data { 1153 struct sock *exclude_sk; 1154 u32 portid; 1155 u32 group; 1156 int code; 1157 }; 1158 1159 static int do_one_set_err(struct sock *sk, struct netlink_set_err_data *p) 1160 { 1161 struct netlink_sock *nlk = nlk_sk(sk); 1162 int ret = 0; 1163 1164 if (sk == p->exclude_sk) 1165 goto out; 1166 1167 if (!net_eq(sock_net(sk), sock_net(p->exclude_sk))) 1168 goto out; 1169 1170 if (nlk->portid == p->portid || p->group - 1 >= nlk->ngroups || 1171 !test_bit(p->group - 1, nlk->groups)) 1172 goto out; 1173 1174 if (p->code == ENOBUFS && nlk->flags & NETLINK_RECV_NO_ENOBUFS) { 1175 ret = 1; 1176 goto out; 1177 } 1178 1179 sk->sk_err = p->code; 1180 sk->sk_error_report(sk); 1181 out: 1182 return ret; 1183 } 1184 1185 /** 1186 * netlink_set_err - report error to broadcast listeners 1187 * @ssk: the kernel netlink socket, as returned by netlink_kernel_create() 1188 * @portid: the PORTID of a process that we want to skip (if any) 1189 * @groups: the broadcast group that will notice the error 1190 * @code: error code, must be negative (as usual in kernelspace) 1191 * 1192 * This function returns the number of broadcast listeners that have set the 1193 * NETLINK_RECV_NO_ENOBUFS socket option. 1194 */ 1195 int netlink_set_err(struct sock *ssk, u32 portid, u32 group, int code) 1196 { 1197 struct netlink_set_err_data info; 1198 struct sock *sk; 1199 int ret = 0; 1200 1201 info.exclude_sk = ssk; 1202 info.portid = portid; 1203 info.group = group; 1204 /* sk->sk_err wants a positive error value */ 1205 info.code = -code; 1206 1207 read_lock(&nl_table_lock); 1208 1209 sk_for_each_bound(sk, &nl_table[ssk->sk_protocol].mc_list) 1210 ret += do_one_set_err(sk, &info); 1211 1212 read_unlock(&nl_table_lock); 1213 return ret; 1214 } 1215 EXPORT_SYMBOL(netlink_set_err); 1216 1217 /* must be called with netlink table grabbed */ 1218 static void netlink_update_socket_mc(struct netlink_sock *nlk, 1219 unsigned int group, 1220 int is_new) 1221 { 1222 int old, new = !!is_new, subscriptions; 1223 1224 old = test_bit(group - 1, nlk->groups); 1225 subscriptions = nlk->subscriptions - old + new; 1226 if (new) 1227 __set_bit(group - 1, nlk->groups); 1228 else 1229 __clear_bit(group - 1, nlk->groups); 1230 netlink_update_subscriptions(&nlk->sk, subscriptions); 1231 netlink_update_listeners(&nlk->sk); 1232 } 1233 1234 static int netlink_setsockopt(struct socket *sock, int level, int optname, 1235 char __user *optval, unsigned int optlen) 1236 { 1237 struct sock *sk = sock->sk; 1238 struct netlink_sock *nlk = nlk_sk(sk); 1239 unsigned int val = 0; 1240 int err; 1241 1242 if (level != SOL_NETLINK) 1243 return -ENOPROTOOPT; 1244 1245 if (optlen >= sizeof(int) && 1246 get_user(val, (unsigned int __user *)optval)) 1247 return -EFAULT; 1248 1249 switch (optname) { 1250 case NETLINK_PKTINFO: 1251 if (val) 1252 nlk->flags |= NETLINK_RECV_PKTINFO; 1253 else 1254 nlk->flags &= ~NETLINK_RECV_PKTINFO; 1255 err = 0; 1256 break; 1257 case NETLINK_ADD_MEMBERSHIP: 1258 case NETLINK_DROP_MEMBERSHIP: { 1259 if (!netlink_capable(sock, NL_CFG_F_NONROOT_RECV)) 1260 return -EPERM; 1261 err = netlink_realloc_groups(sk); 1262 if (err) 1263 return err; 1264 if (!val || val - 1 >= nlk->ngroups) 1265 return -EINVAL; 1266 netlink_table_grab(); 1267 netlink_update_socket_mc(nlk, val, 1268 optname == NETLINK_ADD_MEMBERSHIP); 1269 netlink_table_ungrab(); 1270 1271 if (nlk->netlink_bind) 1272 nlk->netlink_bind(val); 1273 1274 err = 0; 1275 break; 1276 } 1277 case NETLINK_BROADCAST_ERROR: 1278 if (val) 1279 nlk->flags |= NETLINK_BROADCAST_SEND_ERROR; 1280 else 1281 nlk->flags &= ~NETLINK_BROADCAST_SEND_ERROR; 1282 err = 0; 1283 break; 1284 case NETLINK_NO_ENOBUFS: 1285 if (val) { 1286 nlk->flags |= NETLINK_RECV_NO_ENOBUFS; 1287 clear_bit(0, &nlk->state); 1288 wake_up_interruptible(&nlk->wait); 1289 } else { 1290 nlk->flags &= ~NETLINK_RECV_NO_ENOBUFS; 1291 } 1292 err = 0; 1293 break; 1294 default: 1295 err = -ENOPROTOOPT; 1296 } 1297 return err; 1298 } 1299 1300 static int netlink_getsockopt(struct socket *sock, int level, int optname, 1301 char __user *optval, int __user *optlen) 1302 { 1303 struct sock *sk = sock->sk; 1304 struct netlink_sock *nlk = nlk_sk(sk); 1305 int len, val, err; 1306 1307 if (level != SOL_NETLINK) 1308 return -ENOPROTOOPT; 1309 1310 if (get_user(len, optlen)) 1311 return -EFAULT; 1312 if (len < 0) 1313 return -EINVAL; 1314 1315 switch (optname) { 1316 case NETLINK_PKTINFO: 1317 if (len < sizeof(int)) 1318 return -EINVAL; 1319 len = sizeof(int); 1320 val = nlk->flags & NETLINK_RECV_PKTINFO ? 1 : 0; 1321 if (put_user(len, optlen) || 1322 put_user(val, optval)) 1323 return -EFAULT; 1324 err = 0; 1325 break; 1326 case NETLINK_BROADCAST_ERROR: 1327 if (len < sizeof(int)) 1328 return -EINVAL; 1329 len = sizeof(int); 1330 val = nlk->flags & NETLINK_BROADCAST_SEND_ERROR ? 1 : 0; 1331 if (put_user(len, optlen) || 1332 put_user(val, optval)) 1333 return -EFAULT; 1334 err = 0; 1335 break; 1336 case NETLINK_NO_ENOBUFS: 1337 if (len < sizeof(int)) 1338 return -EINVAL; 1339 len = sizeof(int); 1340 val = nlk->flags & NETLINK_RECV_NO_ENOBUFS ? 1 : 0; 1341 if (put_user(len, optlen) || 1342 put_user(val, optval)) 1343 return -EFAULT; 1344 err = 0; 1345 break; 1346 default: 1347 err = -ENOPROTOOPT; 1348 } 1349 return err; 1350 } 1351 1352 static void netlink_cmsg_recv_pktinfo(struct msghdr *msg, struct sk_buff *skb) 1353 { 1354 struct nl_pktinfo info; 1355 1356 info.group = NETLINK_CB(skb).dst_group; 1357 put_cmsg(msg, SOL_NETLINK, NETLINK_PKTINFO, sizeof(info), &info); 1358 } 1359 1360 static int netlink_sendmsg(struct kiocb *kiocb, struct socket *sock, 1361 struct msghdr *msg, size_t len) 1362 { 1363 struct sock_iocb *siocb = kiocb_to_siocb(kiocb); 1364 struct sock *sk = sock->sk; 1365 struct netlink_sock *nlk = nlk_sk(sk); 1366 struct sockaddr_nl *addr = msg->msg_name; 1367 u32 dst_portid; 1368 u32 dst_group; 1369 struct sk_buff *skb; 1370 int err; 1371 struct scm_cookie scm; 1372 1373 if (msg->msg_flags&MSG_OOB) 1374 return -EOPNOTSUPP; 1375 1376 if (NULL == siocb->scm) 1377 siocb->scm = &scm; 1378 1379 err = scm_send(sock, msg, siocb->scm, true); 1380 if (err < 0) 1381 return err; 1382 1383 if (msg->msg_namelen) { 1384 err = -EINVAL; 1385 if (addr->nl_family != AF_NETLINK) 1386 goto out; 1387 dst_portid = addr->nl_pid; 1388 dst_group = ffs(addr->nl_groups); 1389 err = -EPERM; 1390 if ((dst_group || dst_portid) && 1391 !netlink_capable(sock, NL_CFG_F_NONROOT_SEND)) 1392 goto out; 1393 } else { 1394 dst_portid = nlk->dst_portid; 1395 dst_group = nlk->dst_group; 1396 } 1397 1398 if (!nlk->portid) { 1399 err = netlink_autobind(sock); 1400 if (err) 1401 goto out; 1402 } 1403 1404 err = -EMSGSIZE; 1405 if (len > sk->sk_sndbuf - 32) 1406 goto out; 1407 err = -ENOBUFS; 1408 skb = alloc_skb(len, GFP_KERNEL); 1409 if (skb == NULL) 1410 goto out; 1411 1412 NETLINK_CB(skb).portid = nlk->portid; 1413 NETLINK_CB(skb).dst_group = dst_group; 1414 NETLINK_CB(skb).creds = siocb->scm->creds; 1415 1416 err = -EFAULT; 1417 if (memcpy_fromiovec(skb_put(skb, len), msg->msg_iov, len)) { 1418 kfree_skb(skb); 1419 goto out; 1420 } 1421 1422 err = security_netlink_send(sk, skb); 1423 if (err) { 1424 kfree_skb(skb); 1425 goto out; 1426 } 1427 1428 if (dst_group) { 1429 atomic_inc(&skb->users); 1430 netlink_broadcast(sk, skb, dst_portid, dst_group, GFP_KERNEL); 1431 } 1432 err = netlink_unicast(sk, skb, dst_portid, msg->msg_flags&MSG_DONTWAIT); 1433 1434 out: 1435 scm_destroy(siocb->scm); 1436 return err; 1437 } 1438 1439 static int netlink_recvmsg(struct kiocb *kiocb, struct socket *sock, 1440 struct msghdr *msg, size_t len, 1441 int flags) 1442 { 1443 struct sock_iocb *siocb = kiocb_to_siocb(kiocb); 1444 struct scm_cookie scm; 1445 struct sock *sk = sock->sk; 1446 struct netlink_sock *nlk = nlk_sk(sk); 1447 int noblock = flags&MSG_DONTWAIT; 1448 size_t copied; 1449 struct sk_buff *skb, *data_skb; 1450 int err, ret; 1451 1452 if (flags&MSG_OOB) 1453 return -EOPNOTSUPP; 1454 1455 copied = 0; 1456 1457 skb = skb_recv_datagram(sk, flags, noblock, &err); 1458 if (skb == NULL) 1459 goto out; 1460 1461 data_skb = skb; 1462 1463 #ifdef CONFIG_COMPAT_NETLINK_MESSAGES 1464 if (unlikely(skb_shinfo(skb)->frag_list)) { 1465 /* 1466 * If this skb has a frag_list, then here that means that we 1467 * will have to use the frag_list skb's data for compat tasks 1468 * and the regular skb's data for normal (non-compat) tasks. 1469 * 1470 * If we need to send the compat skb, assign it to the 1471 * 'data_skb' variable so that it will be used below for data 1472 * copying. We keep 'skb' for everything else, including 1473 * freeing both later. 1474 */ 1475 if (flags & MSG_CMSG_COMPAT) 1476 data_skb = skb_shinfo(skb)->frag_list; 1477 } 1478 #endif 1479 1480 msg->msg_namelen = 0; 1481 1482 copied = data_skb->len; 1483 if (len < copied) { 1484 msg->msg_flags |= MSG_TRUNC; 1485 copied = len; 1486 } 1487 1488 skb_reset_transport_header(data_skb); 1489 err = skb_copy_datagram_iovec(data_skb, 0, msg->msg_iov, copied); 1490 1491 if (msg->msg_name) { 1492 struct sockaddr_nl *addr = (struct sockaddr_nl *)msg->msg_name; 1493 addr->nl_family = AF_NETLINK; 1494 addr->nl_pad = 0; 1495 addr->nl_pid = NETLINK_CB(skb).portid; 1496 addr->nl_groups = netlink_group_mask(NETLINK_CB(skb).dst_group); 1497 msg->msg_namelen = sizeof(*addr); 1498 } 1499 1500 if (nlk->flags & NETLINK_RECV_PKTINFO) 1501 netlink_cmsg_recv_pktinfo(msg, skb); 1502 1503 if (NULL == siocb->scm) { 1504 memset(&scm, 0, sizeof(scm)); 1505 siocb->scm = &scm; 1506 } 1507 siocb->scm->creds = *NETLINK_CREDS(skb); 1508 if (flags & MSG_TRUNC) 1509 copied = data_skb->len; 1510 1511 skb_free_datagram(sk, skb); 1512 1513 if (nlk->cb && atomic_read(&sk->sk_rmem_alloc) <= sk->sk_rcvbuf / 2) { 1514 ret = netlink_dump(sk); 1515 if (ret) { 1516 sk->sk_err = ret; 1517 sk->sk_error_report(sk); 1518 } 1519 } 1520 1521 scm_recv(sock, msg, siocb->scm, flags); 1522 out: 1523 netlink_rcv_wake(sk); 1524 return err ? : copied; 1525 } 1526 1527 static void netlink_data_ready(struct sock *sk, int len) 1528 { 1529 BUG(); 1530 } 1531 1532 /* 1533 * We export these functions to other modules. They provide a 1534 * complete set of kernel non-blocking support for message 1535 * queueing. 1536 */ 1537 1538 struct sock * 1539 __netlink_kernel_create(struct net *net, int unit, struct module *module, 1540 struct netlink_kernel_cfg *cfg) 1541 { 1542 struct socket *sock; 1543 struct sock *sk; 1544 struct netlink_sock *nlk; 1545 struct listeners *listeners = NULL; 1546 struct mutex *cb_mutex = cfg ? cfg->cb_mutex : NULL; 1547 unsigned int groups; 1548 1549 BUG_ON(!nl_table); 1550 1551 if (unit < 0 || unit >= MAX_LINKS) 1552 return NULL; 1553 1554 if (sock_create_lite(PF_NETLINK, SOCK_DGRAM, unit, &sock)) 1555 return NULL; 1556 1557 /* 1558 * We have to just have a reference on the net from sk, but don't 1559 * get_net it. Besides, we cannot get and then put the net here. 1560 * So we create one inside init_net and the move it to net. 1561 */ 1562 1563 if (__netlink_create(&init_net, sock, cb_mutex, unit) < 0) 1564 goto out_sock_release_nosk; 1565 1566 sk = sock->sk; 1567 sk_change_net(sk, net); 1568 1569 if (!cfg || cfg->groups < 32) 1570 groups = 32; 1571 else 1572 groups = cfg->groups; 1573 1574 listeners = kzalloc(sizeof(*listeners) + NLGRPSZ(groups), GFP_KERNEL); 1575 if (!listeners) 1576 goto out_sock_release; 1577 1578 sk->sk_data_ready = netlink_data_ready; 1579 if (cfg && cfg->input) 1580 nlk_sk(sk)->netlink_rcv = cfg->input; 1581 1582 if (netlink_insert(sk, net, 0)) 1583 goto out_sock_release; 1584 1585 nlk = nlk_sk(sk); 1586 nlk->flags |= NETLINK_KERNEL_SOCKET; 1587 1588 netlink_table_grab(); 1589 if (!nl_table[unit].registered) { 1590 nl_table[unit].groups = groups; 1591 rcu_assign_pointer(nl_table[unit].listeners, listeners); 1592 nl_table[unit].cb_mutex = cb_mutex; 1593 nl_table[unit].module = module; 1594 if (cfg) { 1595 nl_table[unit].bind = cfg->bind; 1596 nl_table[unit].flags = cfg->flags; 1597 } 1598 nl_table[unit].registered = 1; 1599 } else { 1600 kfree(listeners); 1601 nl_table[unit].registered++; 1602 } 1603 netlink_table_ungrab(); 1604 return sk; 1605 1606 out_sock_release: 1607 kfree(listeners); 1608 netlink_kernel_release(sk); 1609 return NULL; 1610 1611 out_sock_release_nosk: 1612 sock_release(sock); 1613 return NULL; 1614 } 1615 EXPORT_SYMBOL(__netlink_kernel_create); 1616 1617 void 1618 netlink_kernel_release(struct sock *sk) 1619 { 1620 sk_release_kernel(sk); 1621 } 1622 EXPORT_SYMBOL(netlink_kernel_release); 1623 1624 int __netlink_change_ngroups(struct sock *sk, unsigned int groups) 1625 { 1626 struct listeners *new, *old; 1627 struct netlink_table *tbl = &nl_table[sk->sk_protocol]; 1628 1629 if (groups < 32) 1630 groups = 32; 1631 1632 if (NLGRPSZ(tbl->groups) < NLGRPSZ(groups)) { 1633 new = kzalloc(sizeof(*new) + NLGRPSZ(groups), GFP_ATOMIC); 1634 if (!new) 1635 return -ENOMEM; 1636 old = nl_deref_protected(tbl->listeners); 1637 memcpy(new->masks, old->masks, NLGRPSZ(tbl->groups)); 1638 rcu_assign_pointer(tbl->listeners, new); 1639 1640 kfree_rcu(old, rcu); 1641 } 1642 tbl->groups = groups; 1643 1644 return 0; 1645 } 1646 1647 /** 1648 * netlink_change_ngroups - change number of multicast groups 1649 * 1650 * This changes the number of multicast groups that are available 1651 * on a certain netlink family. Note that it is not possible to 1652 * change the number of groups to below 32. Also note that it does 1653 * not implicitly call netlink_clear_multicast_users() when the 1654 * number of groups is reduced. 1655 * 1656 * @sk: The kernel netlink socket, as returned by netlink_kernel_create(). 1657 * @groups: The new number of groups. 1658 */ 1659 int netlink_change_ngroups(struct sock *sk, unsigned int groups) 1660 { 1661 int err; 1662 1663 netlink_table_grab(); 1664 err = __netlink_change_ngroups(sk, groups); 1665 netlink_table_ungrab(); 1666 1667 return err; 1668 } 1669 1670 void __netlink_clear_multicast_users(struct sock *ksk, unsigned int group) 1671 { 1672 struct sock *sk; 1673 struct netlink_table *tbl = &nl_table[ksk->sk_protocol]; 1674 1675 sk_for_each_bound(sk, &tbl->mc_list) 1676 netlink_update_socket_mc(nlk_sk(sk), group, 0); 1677 } 1678 1679 /** 1680 * netlink_clear_multicast_users - kick off multicast listeners 1681 * 1682 * This function removes all listeners from the given group. 1683 * @ksk: The kernel netlink socket, as returned by 1684 * netlink_kernel_create(). 1685 * @group: The multicast group to clear. 1686 */ 1687 void netlink_clear_multicast_users(struct sock *ksk, unsigned int group) 1688 { 1689 netlink_table_grab(); 1690 __netlink_clear_multicast_users(ksk, group); 1691 netlink_table_ungrab(); 1692 } 1693 1694 struct nlmsghdr * 1695 __nlmsg_put(struct sk_buff *skb, u32 portid, u32 seq, int type, int len, int flags) 1696 { 1697 struct nlmsghdr *nlh; 1698 int size = NLMSG_LENGTH(len); 1699 1700 nlh = (struct nlmsghdr*)skb_put(skb, NLMSG_ALIGN(size)); 1701 nlh->nlmsg_type = type; 1702 nlh->nlmsg_len = size; 1703 nlh->nlmsg_flags = flags; 1704 nlh->nlmsg_pid = portid; 1705 nlh->nlmsg_seq = seq; 1706 if (!__builtin_constant_p(size) || NLMSG_ALIGN(size) - size != 0) 1707 memset(NLMSG_DATA(nlh) + len, 0, NLMSG_ALIGN(size) - size); 1708 return nlh; 1709 } 1710 EXPORT_SYMBOL(__nlmsg_put); 1711 1712 /* 1713 * It looks a bit ugly. 1714 * It would be better to create kernel thread. 1715 */ 1716 1717 static int netlink_dump(struct sock *sk) 1718 { 1719 struct netlink_sock *nlk = nlk_sk(sk); 1720 struct netlink_callback *cb; 1721 struct sk_buff *skb = NULL; 1722 struct nlmsghdr *nlh; 1723 int len, err = -ENOBUFS; 1724 int alloc_size; 1725 1726 mutex_lock(nlk->cb_mutex); 1727 1728 cb = nlk->cb; 1729 if (cb == NULL) { 1730 err = -EINVAL; 1731 goto errout_skb; 1732 } 1733 1734 alloc_size = max_t(int, cb->min_dump_alloc, NLMSG_GOODSIZE); 1735 1736 skb = sock_rmalloc(sk, alloc_size, 0, GFP_KERNEL); 1737 if (!skb) 1738 goto errout_skb; 1739 1740 len = cb->dump(skb, cb); 1741 1742 if (len > 0) { 1743 mutex_unlock(nlk->cb_mutex); 1744 1745 if (sk_filter(sk, skb)) 1746 kfree_skb(skb); 1747 else 1748 __netlink_sendskb(sk, skb); 1749 return 0; 1750 } 1751 1752 nlh = nlmsg_put_answer(skb, cb, NLMSG_DONE, sizeof(len), NLM_F_MULTI); 1753 if (!nlh) 1754 goto errout_skb; 1755 1756 nl_dump_check_consistent(cb, nlh); 1757 1758 memcpy(nlmsg_data(nlh), &len, sizeof(len)); 1759 1760 if (sk_filter(sk, skb)) 1761 kfree_skb(skb); 1762 else 1763 __netlink_sendskb(sk, skb); 1764 1765 if (cb->done) 1766 cb->done(cb); 1767 nlk->cb = NULL; 1768 mutex_unlock(nlk->cb_mutex); 1769 1770 module_put(cb->module); 1771 netlink_consume_callback(cb); 1772 return 0; 1773 1774 errout_skb: 1775 mutex_unlock(nlk->cb_mutex); 1776 kfree_skb(skb); 1777 return err; 1778 } 1779 1780 int __netlink_dump_start(struct sock *ssk, struct sk_buff *skb, 1781 const struct nlmsghdr *nlh, 1782 struct netlink_dump_control *control) 1783 { 1784 struct netlink_callback *cb; 1785 struct sock *sk; 1786 struct netlink_sock *nlk; 1787 int ret; 1788 1789 cb = kzalloc(sizeof(*cb), GFP_KERNEL); 1790 if (cb == NULL) 1791 return -ENOBUFS; 1792 1793 cb->dump = control->dump; 1794 cb->done = control->done; 1795 cb->nlh = nlh; 1796 cb->data = control->data; 1797 cb->module = control->module; 1798 cb->min_dump_alloc = control->min_dump_alloc; 1799 atomic_inc(&skb->users); 1800 cb->skb = skb; 1801 1802 sk = netlink_lookup(sock_net(ssk), ssk->sk_protocol, NETLINK_CB(skb).portid); 1803 if (sk == NULL) { 1804 netlink_destroy_callback(cb); 1805 return -ECONNREFUSED; 1806 } 1807 nlk = nlk_sk(sk); 1808 1809 mutex_lock(nlk->cb_mutex); 1810 /* A dump is in progress... */ 1811 if (nlk->cb) { 1812 mutex_unlock(nlk->cb_mutex); 1813 netlink_destroy_callback(cb); 1814 ret = -EBUSY; 1815 goto out; 1816 } 1817 /* add reference of module which cb->dump belongs to */ 1818 if (!try_module_get(cb->module)) { 1819 mutex_unlock(nlk->cb_mutex); 1820 netlink_destroy_callback(cb); 1821 ret = -EPROTONOSUPPORT; 1822 goto out; 1823 } 1824 1825 nlk->cb = cb; 1826 mutex_unlock(nlk->cb_mutex); 1827 1828 ret = netlink_dump(sk); 1829 out: 1830 sock_put(sk); 1831 1832 if (ret) 1833 return ret; 1834 1835 /* We successfully started a dump, by returning -EINTR we 1836 * signal not to send ACK even if it was requested. 1837 */ 1838 return -EINTR; 1839 } 1840 EXPORT_SYMBOL(__netlink_dump_start); 1841 1842 void netlink_ack(struct sk_buff *in_skb, struct nlmsghdr *nlh, int err) 1843 { 1844 struct sk_buff *skb; 1845 struct nlmsghdr *rep; 1846 struct nlmsgerr *errmsg; 1847 size_t payload = sizeof(*errmsg); 1848 1849 /* error messages get the original request appened */ 1850 if (err) 1851 payload += nlmsg_len(nlh); 1852 1853 skb = nlmsg_new(payload, GFP_KERNEL); 1854 if (!skb) { 1855 struct sock *sk; 1856 1857 sk = netlink_lookup(sock_net(in_skb->sk), 1858 in_skb->sk->sk_protocol, 1859 NETLINK_CB(in_skb).portid); 1860 if (sk) { 1861 sk->sk_err = ENOBUFS; 1862 sk->sk_error_report(sk); 1863 sock_put(sk); 1864 } 1865 return; 1866 } 1867 1868 rep = __nlmsg_put(skb, NETLINK_CB(in_skb).portid, nlh->nlmsg_seq, 1869 NLMSG_ERROR, payload, 0); 1870 errmsg = nlmsg_data(rep); 1871 errmsg->error = err; 1872 memcpy(&errmsg->msg, nlh, err ? nlh->nlmsg_len : sizeof(*nlh)); 1873 netlink_unicast(in_skb->sk, skb, NETLINK_CB(in_skb).portid, MSG_DONTWAIT); 1874 } 1875 EXPORT_SYMBOL(netlink_ack); 1876 1877 int netlink_rcv_skb(struct sk_buff *skb, int (*cb)(struct sk_buff *, 1878 struct nlmsghdr *)) 1879 { 1880 struct nlmsghdr *nlh; 1881 int err; 1882 1883 while (skb->len >= nlmsg_total_size(0)) { 1884 int msglen; 1885 1886 nlh = nlmsg_hdr(skb); 1887 err = 0; 1888 1889 if (nlh->nlmsg_len < NLMSG_HDRLEN || skb->len < nlh->nlmsg_len) 1890 return 0; 1891 1892 /* Only requests are handled by the kernel */ 1893 if (!(nlh->nlmsg_flags & NLM_F_REQUEST)) 1894 goto ack; 1895 1896 /* Skip control messages */ 1897 if (nlh->nlmsg_type < NLMSG_MIN_TYPE) 1898 goto ack; 1899 1900 err = cb(skb, nlh); 1901 if (err == -EINTR) 1902 goto skip; 1903 1904 ack: 1905 if (nlh->nlmsg_flags & NLM_F_ACK || err) 1906 netlink_ack(skb, nlh, err); 1907 1908 skip: 1909 msglen = NLMSG_ALIGN(nlh->nlmsg_len); 1910 if (msglen > skb->len) 1911 msglen = skb->len; 1912 skb_pull(skb, msglen); 1913 } 1914 1915 return 0; 1916 } 1917 EXPORT_SYMBOL(netlink_rcv_skb); 1918 1919 /** 1920 * nlmsg_notify - send a notification netlink message 1921 * @sk: netlink socket to use 1922 * @skb: notification message 1923 * @portid: destination netlink portid for reports or 0 1924 * @group: destination multicast group or 0 1925 * @report: 1 to report back, 0 to disable 1926 * @flags: allocation flags 1927 */ 1928 int nlmsg_notify(struct sock *sk, struct sk_buff *skb, u32 portid, 1929 unsigned int group, int report, gfp_t flags) 1930 { 1931 int err = 0; 1932 1933 if (group) { 1934 int exclude_portid = 0; 1935 1936 if (report) { 1937 atomic_inc(&skb->users); 1938 exclude_portid = portid; 1939 } 1940 1941 /* errors reported via destination sk->sk_err, but propagate 1942 * delivery errors if NETLINK_BROADCAST_ERROR flag is set */ 1943 err = nlmsg_multicast(sk, skb, exclude_portid, group, flags); 1944 } 1945 1946 if (report) { 1947 int err2; 1948 1949 err2 = nlmsg_unicast(sk, skb, portid); 1950 if (!err || err == -ESRCH) 1951 err = err2; 1952 } 1953 1954 return err; 1955 } 1956 EXPORT_SYMBOL(nlmsg_notify); 1957 1958 #ifdef CONFIG_PROC_FS 1959 struct nl_seq_iter { 1960 struct seq_net_private p; 1961 int link; 1962 int hash_idx; 1963 }; 1964 1965 static struct sock *netlink_seq_socket_idx(struct seq_file *seq, loff_t pos) 1966 { 1967 struct nl_seq_iter *iter = seq->private; 1968 int i, j; 1969 struct sock *s; 1970 loff_t off = 0; 1971 1972 for (i = 0; i < MAX_LINKS; i++) { 1973 struct nl_portid_hash *hash = &nl_table[i].hash; 1974 1975 for (j = 0; j <= hash->mask; j++) { 1976 sk_for_each(s, &hash->table[j]) { 1977 if (sock_net(s) != seq_file_net(seq)) 1978 continue; 1979 if (off == pos) { 1980 iter->link = i; 1981 iter->hash_idx = j; 1982 return s; 1983 } 1984 ++off; 1985 } 1986 } 1987 } 1988 return NULL; 1989 } 1990 1991 static void *netlink_seq_start(struct seq_file *seq, loff_t *pos) 1992 __acquires(nl_table_lock) 1993 { 1994 read_lock(&nl_table_lock); 1995 return *pos ? netlink_seq_socket_idx(seq, *pos - 1) : SEQ_START_TOKEN; 1996 } 1997 1998 static void *netlink_seq_next(struct seq_file *seq, void *v, loff_t *pos) 1999 { 2000 struct sock *s; 2001 struct nl_seq_iter *iter; 2002 int i, j; 2003 2004 ++*pos; 2005 2006 if (v == SEQ_START_TOKEN) 2007 return netlink_seq_socket_idx(seq, 0); 2008 2009 iter = seq->private; 2010 s = v; 2011 do { 2012 s = sk_next(s); 2013 } while (s && sock_net(s) != seq_file_net(seq)); 2014 if (s) 2015 return s; 2016 2017 i = iter->link; 2018 j = iter->hash_idx + 1; 2019 2020 do { 2021 struct nl_portid_hash *hash = &nl_table[i].hash; 2022 2023 for (; j <= hash->mask; j++) { 2024 s = sk_head(&hash->table[j]); 2025 while (s && sock_net(s) != seq_file_net(seq)) 2026 s = sk_next(s); 2027 if (s) { 2028 iter->link = i; 2029 iter->hash_idx = j; 2030 return s; 2031 } 2032 } 2033 2034 j = 0; 2035 } while (++i < MAX_LINKS); 2036 2037 return NULL; 2038 } 2039 2040 static void netlink_seq_stop(struct seq_file *seq, void *v) 2041 __releases(nl_table_lock) 2042 { 2043 read_unlock(&nl_table_lock); 2044 } 2045 2046 2047 static int netlink_seq_show(struct seq_file *seq, void *v) 2048 { 2049 if (v == SEQ_START_TOKEN) { 2050 seq_puts(seq, 2051 "sk Eth Pid Groups " 2052 "Rmem Wmem Dump Locks Drops Inode\n"); 2053 } else { 2054 struct sock *s = v; 2055 struct netlink_sock *nlk = nlk_sk(s); 2056 2057 seq_printf(seq, "%pK %-3d %-6u %08x %-8d %-8d %pK %-8d %-8d %-8lu\n", 2058 s, 2059 s->sk_protocol, 2060 nlk->portid, 2061 nlk->groups ? (u32)nlk->groups[0] : 0, 2062 sk_rmem_alloc_get(s), 2063 sk_wmem_alloc_get(s), 2064 nlk->cb, 2065 atomic_read(&s->sk_refcnt), 2066 atomic_read(&s->sk_drops), 2067 sock_i_ino(s) 2068 ); 2069 2070 } 2071 return 0; 2072 } 2073 2074 static const struct seq_operations netlink_seq_ops = { 2075 .start = netlink_seq_start, 2076 .next = netlink_seq_next, 2077 .stop = netlink_seq_stop, 2078 .show = netlink_seq_show, 2079 }; 2080 2081 2082 static int netlink_seq_open(struct inode *inode, struct file *file) 2083 { 2084 return seq_open_net(inode, file, &netlink_seq_ops, 2085 sizeof(struct nl_seq_iter)); 2086 } 2087 2088 static const struct file_operations netlink_seq_fops = { 2089 .owner = THIS_MODULE, 2090 .open = netlink_seq_open, 2091 .read = seq_read, 2092 .llseek = seq_lseek, 2093 .release = seq_release_net, 2094 }; 2095 2096 #endif 2097 2098 int netlink_register_notifier(struct notifier_block *nb) 2099 { 2100 return atomic_notifier_chain_register(&netlink_chain, nb); 2101 } 2102 EXPORT_SYMBOL(netlink_register_notifier); 2103 2104 int netlink_unregister_notifier(struct notifier_block *nb) 2105 { 2106 return atomic_notifier_chain_unregister(&netlink_chain, nb); 2107 } 2108 EXPORT_SYMBOL(netlink_unregister_notifier); 2109 2110 static const struct proto_ops netlink_ops = { 2111 .family = PF_NETLINK, 2112 .owner = THIS_MODULE, 2113 .release = netlink_release, 2114 .bind = netlink_bind, 2115 .connect = netlink_connect, 2116 .socketpair = sock_no_socketpair, 2117 .accept = sock_no_accept, 2118 .getname = netlink_getname, 2119 .poll = datagram_poll, 2120 .ioctl = sock_no_ioctl, 2121 .listen = sock_no_listen, 2122 .shutdown = sock_no_shutdown, 2123 .setsockopt = netlink_setsockopt, 2124 .getsockopt = netlink_getsockopt, 2125 .sendmsg = netlink_sendmsg, 2126 .recvmsg = netlink_recvmsg, 2127 .mmap = sock_no_mmap, 2128 .sendpage = sock_no_sendpage, 2129 }; 2130 2131 static const struct net_proto_family netlink_family_ops = { 2132 .family = PF_NETLINK, 2133 .create = netlink_create, 2134 .owner = THIS_MODULE, /* for consistency 8) */ 2135 }; 2136 2137 static int __net_init netlink_net_init(struct net *net) 2138 { 2139 #ifdef CONFIG_PROC_FS 2140 if (!proc_create("netlink", 0, net->proc_net, &netlink_seq_fops)) 2141 return -ENOMEM; 2142 #endif 2143 return 0; 2144 } 2145 2146 static void __net_exit netlink_net_exit(struct net *net) 2147 { 2148 #ifdef CONFIG_PROC_FS 2149 remove_proc_entry("netlink", net->proc_net); 2150 #endif 2151 } 2152 2153 static void __init netlink_add_usersock_entry(void) 2154 { 2155 struct listeners *listeners; 2156 int groups = 32; 2157 2158 listeners = kzalloc(sizeof(*listeners) + NLGRPSZ(groups), GFP_KERNEL); 2159 if (!listeners) 2160 panic("netlink_add_usersock_entry: Cannot allocate listeners\n"); 2161 2162 netlink_table_grab(); 2163 2164 nl_table[NETLINK_USERSOCK].groups = groups; 2165 rcu_assign_pointer(nl_table[NETLINK_USERSOCK].listeners, listeners); 2166 nl_table[NETLINK_USERSOCK].module = THIS_MODULE; 2167 nl_table[NETLINK_USERSOCK].registered = 1; 2168 nl_table[NETLINK_USERSOCK].flags = NL_CFG_F_NONROOT_SEND; 2169 2170 netlink_table_ungrab(); 2171 } 2172 2173 static struct pernet_operations __net_initdata netlink_net_ops = { 2174 .init = netlink_net_init, 2175 .exit = netlink_net_exit, 2176 }; 2177 2178 static int __init netlink_proto_init(void) 2179 { 2180 int i; 2181 unsigned long limit; 2182 unsigned int order; 2183 int err = proto_register(&netlink_proto, 0); 2184 2185 if (err != 0) 2186 goto out; 2187 2188 BUILD_BUG_ON(sizeof(struct netlink_skb_parms) > FIELD_SIZEOF(struct sk_buff, cb)); 2189 2190 nl_table = kcalloc(MAX_LINKS, sizeof(*nl_table), GFP_KERNEL); 2191 if (!nl_table) 2192 goto panic; 2193 2194 if (totalram_pages >= (128 * 1024)) 2195 limit = totalram_pages >> (21 - PAGE_SHIFT); 2196 else 2197 limit = totalram_pages >> (23 - PAGE_SHIFT); 2198 2199 order = get_bitmask_order(limit) - 1 + PAGE_SHIFT; 2200 limit = (1UL << order) / sizeof(struct hlist_head); 2201 order = get_bitmask_order(min(limit, (unsigned long)UINT_MAX)) - 1; 2202 2203 for (i = 0; i < MAX_LINKS; i++) { 2204 struct nl_portid_hash *hash = &nl_table[i].hash; 2205 2206 hash->table = nl_portid_hash_zalloc(1 * sizeof(*hash->table)); 2207 if (!hash->table) { 2208 while (i-- > 0) 2209 nl_portid_hash_free(nl_table[i].hash.table, 2210 1 * sizeof(*hash->table)); 2211 kfree(nl_table); 2212 goto panic; 2213 } 2214 hash->max_shift = order; 2215 hash->shift = 0; 2216 hash->mask = 0; 2217 hash->rehash_time = jiffies; 2218 } 2219 2220 netlink_add_usersock_entry(); 2221 2222 sock_register(&netlink_family_ops); 2223 register_pernet_subsys(&netlink_net_ops); 2224 /* The netlink device handler may be needed early. */ 2225 rtnetlink_init(); 2226 out: 2227 return err; 2228 panic: 2229 panic("netlink_init: Cannot allocate nl_table\n"); 2230 } 2231 2232 core_initcall(netlink_proto_init); 2233