1 /* 2 * NET3 Protocol independent device support routines. 3 * 4 * This program is free software; you can redistribute it and/or 5 * modify it under the terms of the GNU General Public License 6 * as published by the Free Software Foundation; either version 7 * 2 of the License, or (at your option) any later version. 8 * 9 * Derived from the non IP parts of dev.c 1.0.19 10 * Authors: Ross Biro 11 * Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG> 12 * Mark Evans, <evansmp@uhura.aston.ac.uk> 13 * 14 * Additional Authors: 15 * Florian la Roche <rzsfl@rz.uni-sb.de> 16 * Alan Cox <gw4pts@gw4pts.ampr.org> 17 * David Hinds <dahinds@users.sourceforge.net> 18 * Alexey Kuznetsov <kuznet@ms2.inr.ac.ru> 19 * Adam Sulmicki <adam@cfar.umd.edu> 20 * Pekka Riikonen <priikone@poesidon.pspt.fi> 21 * 22 * Changes: 23 * D.J. Barrow : Fixed bug where dev->refcnt gets set 24 * to 2 if register_netdev gets called 25 * before net_dev_init & also removed a 26 * few lines of code in the process. 27 * Alan Cox : device private ioctl copies fields back. 28 * Alan Cox : Transmit queue code does relevant 29 * stunts to keep the queue safe. 30 * Alan Cox : Fixed double lock. 31 * Alan Cox : Fixed promisc NULL pointer trap 32 * ???????? : Support the full private ioctl range 33 * Alan Cox : Moved ioctl permission check into 34 * drivers 35 * Tim Kordas : SIOCADDMULTI/SIOCDELMULTI 36 * Alan Cox : 100 backlog just doesn't cut it when 37 * you start doing multicast video 8) 38 * Alan Cox : Rewrote net_bh and list manager. 39 * Alan Cox : Fix ETH_P_ALL echoback lengths. 40 * Alan Cox : Took out transmit every packet pass 41 * Saved a few bytes in the ioctl handler 42 * Alan Cox : Network driver sets packet type before 43 * calling netif_rx. Saves a function 44 * call a packet. 45 * Alan Cox : Hashed net_bh() 46 * Richard Kooijman: Timestamp fixes. 47 * Alan Cox : Wrong field in SIOCGIFDSTADDR 48 * Alan Cox : Device lock protection. 49 * Alan Cox : Fixed nasty side effect of device close 50 * changes. 51 * Rudi Cilibrasi : Pass the right thing to 52 * set_mac_address() 53 * Dave Miller : 32bit quantity for the device lock to 54 * make it work out on a Sparc. 55 * Bjorn Ekwall : Added KERNELD hack. 56 * Alan Cox : Cleaned up the backlog initialise. 57 * Craig Metz : SIOCGIFCONF fix if space for under 58 * 1 device. 59 * Thomas Bogendoerfer : Return ENODEV for dev_open, if there 60 * is no device open function. 61 * Andi Kleen : Fix error reporting for SIOCGIFCONF 62 * Michael Chastain : Fix signed/unsigned for SIOCGIFCONF 63 * Cyrus Durgin : Cleaned for KMOD 64 * Adam Sulmicki : Bug Fix : Network Device Unload 65 * A network device unload needs to purge 66 * the backlog queue. 67 * Paul Rusty Russell : SIOCSIFNAME 68 * Pekka Riikonen : Netdev boot-time settings code 69 * Andrew Morton : Make unregister_netdevice wait 70 * indefinitely on dev->refcnt 71 * J Hadi Salim : - Backlog queue sampling 72 * - netif_rx() feedback 73 */ 74 75 #include <asm/uaccess.h> 76 #include <asm/system.h> 77 #include <linux/bitops.h> 78 #include <linux/capability.h> 79 #include <linux/config.h> 80 #include <linux/cpu.h> 81 #include <linux/types.h> 82 #include <linux/kernel.h> 83 #include <linux/sched.h> 84 #include <linux/mutex.h> 85 #include <linux/string.h> 86 #include <linux/mm.h> 87 #include <linux/socket.h> 88 #include <linux/sockios.h> 89 #include <linux/errno.h> 90 #include <linux/interrupt.h> 91 #include <linux/if_ether.h> 92 #include <linux/netdevice.h> 93 #include <linux/etherdevice.h> 94 #include <linux/notifier.h> 95 #include <linux/skbuff.h> 96 #include <net/sock.h> 97 #include <linux/rtnetlink.h> 98 #include <linux/proc_fs.h> 99 #include <linux/seq_file.h> 100 #include <linux/stat.h> 101 #include <linux/if_bridge.h> 102 #include <linux/divert.h> 103 #include <net/dst.h> 104 #include <net/pkt_sched.h> 105 #include <net/checksum.h> 106 #include <linux/highmem.h> 107 #include <linux/init.h> 108 #include <linux/kmod.h> 109 #include <linux/module.h> 110 #include <linux/kallsyms.h> 111 #include <linux/netpoll.h> 112 #include <linux/rcupdate.h> 113 #include <linux/delay.h> 114 #include <linux/wireless.h> 115 #include <net/iw_handler.h> 116 #include <asm/current.h> 117 #include <linux/audit.h> 118 119 /* 120 * The list of packet types we will receive (as opposed to discard) 121 * and the routines to invoke. 122 * 123 * Why 16. Because with 16 the only overlap we get on a hash of the 124 * low nibble of the protocol value is RARP/SNAP/X.25. 125 * 126 * NOTE: That is no longer true with the addition of VLAN tags. Not 127 * sure which should go first, but I bet it won't make much 128 * difference if we are running VLANs. The good news is that 129 * this protocol won't be in the list unless compiled in, so 130 * the average user (w/out VLANs) will not be adversly affected. 131 * --BLG 132 * 133 * 0800 IP 134 * 8100 802.1Q VLAN 135 * 0001 802.3 136 * 0002 AX.25 137 * 0004 802.2 138 * 8035 RARP 139 * 0005 SNAP 140 * 0805 X.25 141 * 0806 ARP 142 * 8137 IPX 143 * 0009 Localtalk 144 * 86DD IPv6 145 */ 146 147 static DEFINE_SPINLOCK(ptype_lock); 148 static struct list_head ptype_base[16]; /* 16 way hashed list */ 149 static struct list_head ptype_all; /* Taps */ 150 151 /* 152 * The @dev_base list is protected by @dev_base_lock and the rtln 153 * semaphore. 154 * 155 * Pure readers hold dev_base_lock for reading. 156 * 157 * Writers must hold the rtnl semaphore while they loop through the 158 * dev_base list, and hold dev_base_lock for writing when they do the 159 * actual updates. This allows pure readers to access the list even 160 * while a writer is preparing to update it. 161 * 162 * To put it another way, dev_base_lock is held for writing only to 163 * protect against pure readers; the rtnl semaphore provides the 164 * protection against other writers. 165 * 166 * See, for example usages, register_netdevice() and 167 * unregister_netdevice(), which must be called with the rtnl 168 * semaphore held. 169 */ 170 struct net_device *dev_base; 171 static struct net_device **dev_tail = &dev_base; 172 DEFINE_RWLOCK(dev_base_lock); 173 174 EXPORT_SYMBOL(dev_base); 175 EXPORT_SYMBOL(dev_base_lock); 176 177 #define NETDEV_HASHBITS 8 178 static struct hlist_head dev_name_head[1<<NETDEV_HASHBITS]; 179 static struct hlist_head dev_index_head[1<<NETDEV_HASHBITS]; 180 181 static inline struct hlist_head *dev_name_hash(const char *name) 182 { 183 unsigned hash = full_name_hash(name, strnlen(name, IFNAMSIZ)); 184 return &dev_name_head[hash & ((1<<NETDEV_HASHBITS)-1)]; 185 } 186 187 static inline struct hlist_head *dev_index_hash(int ifindex) 188 { 189 return &dev_index_head[ifindex & ((1<<NETDEV_HASHBITS)-1)]; 190 } 191 192 /* 193 * Our notifier list 194 */ 195 196 static BLOCKING_NOTIFIER_HEAD(netdev_chain); 197 198 /* 199 * Device drivers call our routines to queue packets here. We empty the 200 * queue in the local softnet handler. 201 */ 202 DEFINE_PER_CPU(struct softnet_data, softnet_data) = { NULL }; 203 204 #ifdef CONFIG_SYSFS 205 extern int netdev_sysfs_init(void); 206 extern int netdev_register_sysfs(struct net_device *); 207 extern void netdev_unregister_sysfs(struct net_device *); 208 #else 209 #define netdev_sysfs_init() (0) 210 #define netdev_register_sysfs(dev) (0) 211 #define netdev_unregister_sysfs(dev) do { } while(0) 212 #endif 213 214 215 /******************************************************************************* 216 217 Protocol management and registration routines 218 219 *******************************************************************************/ 220 221 /* 222 * For efficiency 223 */ 224 225 int netdev_nit; 226 227 /* 228 * Add a protocol ID to the list. Now that the input handler is 229 * smarter we can dispense with all the messy stuff that used to be 230 * here. 231 * 232 * BEWARE!!! Protocol handlers, mangling input packets, 233 * MUST BE last in hash buckets and checking protocol handlers 234 * MUST start from promiscuous ptype_all chain in net_bh. 235 * It is true now, do not change it. 236 * Explanation follows: if protocol handler, mangling packet, will 237 * be the first on list, it is not able to sense, that packet 238 * is cloned and should be copied-on-write, so that it will 239 * change it and subsequent readers will get broken packet. 240 * --ANK (980803) 241 */ 242 243 /** 244 * dev_add_pack - add packet handler 245 * @pt: packet type declaration 246 * 247 * Add a protocol handler to the networking stack. The passed &packet_type 248 * is linked into kernel lists and may not be freed until it has been 249 * removed from the kernel lists. 250 * 251 * This call does not sleep therefore it can not 252 * guarantee all CPU's that are in middle of receiving packets 253 * will see the new packet type (until the next received packet). 254 */ 255 256 void dev_add_pack(struct packet_type *pt) 257 { 258 int hash; 259 260 spin_lock_bh(&ptype_lock); 261 if (pt->type == htons(ETH_P_ALL)) { 262 netdev_nit++; 263 list_add_rcu(&pt->list, &ptype_all); 264 } else { 265 hash = ntohs(pt->type) & 15; 266 list_add_rcu(&pt->list, &ptype_base[hash]); 267 } 268 spin_unlock_bh(&ptype_lock); 269 } 270 271 /** 272 * __dev_remove_pack - remove packet handler 273 * @pt: packet type declaration 274 * 275 * Remove a protocol handler that was previously added to the kernel 276 * protocol handlers by dev_add_pack(). The passed &packet_type is removed 277 * from the kernel lists and can be freed or reused once this function 278 * returns. 279 * 280 * The packet type might still be in use by receivers 281 * and must not be freed until after all the CPU's have gone 282 * through a quiescent state. 283 */ 284 void __dev_remove_pack(struct packet_type *pt) 285 { 286 struct list_head *head; 287 struct packet_type *pt1; 288 289 spin_lock_bh(&ptype_lock); 290 291 if (pt->type == htons(ETH_P_ALL)) { 292 netdev_nit--; 293 head = &ptype_all; 294 } else 295 head = &ptype_base[ntohs(pt->type) & 15]; 296 297 list_for_each_entry(pt1, head, list) { 298 if (pt == pt1) { 299 list_del_rcu(&pt->list); 300 goto out; 301 } 302 } 303 304 printk(KERN_WARNING "dev_remove_pack: %p not found.\n", pt); 305 out: 306 spin_unlock_bh(&ptype_lock); 307 } 308 /** 309 * dev_remove_pack - remove packet handler 310 * @pt: packet type declaration 311 * 312 * Remove a protocol handler that was previously added to the kernel 313 * protocol handlers by dev_add_pack(). The passed &packet_type is removed 314 * from the kernel lists and can be freed or reused once this function 315 * returns. 316 * 317 * This call sleeps to guarantee that no CPU is looking at the packet 318 * type after return. 319 */ 320 void dev_remove_pack(struct packet_type *pt) 321 { 322 __dev_remove_pack(pt); 323 324 synchronize_net(); 325 } 326 327 /****************************************************************************** 328 329 Device Boot-time Settings Routines 330 331 *******************************************************************************/ 332 333 /* Boot time configuration table */ 334 static struct netdev_boot_setup dev_boot_setup[NETDEV_BOOT_SETUP_MAX]; 335 336 /** 337 * netdev_boot_setup_add - add new setup entry 338 * @name: name of the device 339 * @map: configured settings for the device 340 * 341 * Adds new setup entry to the dev_boot_setup list. The function 342 * returns 0 on error and 1 on success. This is a generic routine to 343 * all netdevices. 344 */ 345 static int netdev_boot_setup_add(char *name, struct ifmap *map) 346 { 347 struct netdev_boot_setup *s; 348 int i; 349 350 s = dev_boot_setup; 351 for (i = 0; i < NETDEV_BOOT_SETUP_MAX; i++) { 352 if (s[i].name[0] == '\0' || s[i].name[0] == ' ') { 353 memset(s[i].name, 0, sizeof(s[i].name)); 354 strcpy(s[i].name, name); 355 memcpy(&s[i].map, map, sizeof(s[i].map)); 356 break; 357 } 358 } 359 360 return i >= NETDEV_BOOT_SETUP_MAX ? 0 : 1; 361 } 362 363 /** 364 * netdev_boot_setup_check - check boot time settings 365 * @dev: the netdevice 366 * 367 * Check boot time settings for the device. 368 * The found settings are set for the device to be used 369 * later in the device probing. 370 * Returns 0 if no settings found, 1 if they are. 371 */ 372 int netdev_boot_setup_check(struct net_device *dev) 373 { 374 struct netdev_boot_setup *s = dev_boot_setup; 375 int i; 376 377 for (i = 0; i < NETDEV_BOOT_SETUP_MAX; i++) { 378 if (s[i].name[0] != '\0' && s[i].name[0] != ' ' && 379 !strncmp(dev->name, s[i].name, strlen(s[i].name))) { 380 dev->irq = s[i].map.irq; 381 dev->base_addr = s[i].map.base_addr; 382 dev->mem_start = s[i].map.mem_start; 383 dev->mem_end = s[i].map.mem_end; 384 return 1; 385 } 386 } 387 return 0; 388 } 389 390 391 /** 392 * netdev_boot_base - get address from boot time settings 393 * @prefix: prefix for network device 394 * @unit: id for network device 395 * 396 * Check boot time settings for the base address of device. 397 * The found settings are set for the device to be used 398 * later in the device probing. 399 * Returns 0 if no settings found. 400 */ 401 unsigned long netdev_boot_base(const char *prefix, int unit) 402 { 403 const struct netdev_boot_setup *s = dev_boot_setup; 404 char name[IFNAMSIZ]; 405 int i; 406 407 sprintf(name, "%s%d", prefix, unit); 408 409 /* 410 * If device already registered then return base of 1 411 * to indicate not to probe for this interface 412 */ 413 if (__dev_get_by_name(name)) 414 return 1; 415 416 for (i = 0; i < NETDEV_BOOT_SETUP_MAX; i++) 417 if (!strcmp(name, s[i].name)) 418 return s[i].map.base_addr; 419 return 0; 420 } 421 422 /* 423 * Saves at boot time configured settings for any netdevice. 424 */ 425 int __init netdev_boot_setup(char *str) 426 { 427 int ints[5]; 428 struct ifmap map; 429 430 str = get_options(str, ARRAY_SIZE(ints), ints); 431 if (!str || !*str) 432 return 0; 433 434 /* Save settings */ 435 memset(&map, 0, sizeof(map)); 436 if (ints[0] > 0) 437 map.irq = ints[1]; 438 if (ints[0] > 1) 439 map.base_addr = ints[2]; 440 if (ints[0] > 2) 441 map.mem_start = ints[3]; 442 if (ints[0] > 3) 443 map.mem_end = ints[4]; 444 445 /* Add new entry to the list */ 446 return netdev_boot_setup_add(str, &map); 447 } 448 449 __setup("netdev=", netdev_boot_setup); 450 451 /******************************************************************************* 452 453 Device Interface Subroutines 454 455 *******************************************************************************/ 456 457 /** 458 * __dev_get_by_name - find a device by its name 459 * @name: name to find 460 * 461 * Find an interface by name. Must be called under RTNL semaphore 462 * or @dev_base_lock. If the name is found a pointer to the device 463 * is returned. If the name is not found then %NULL is returned. The 464 * reference counters are not incremented so the caller must be 465 * careful with locks. 466 */ 467 468 struct net_device *__dev_get_by_name(const char *name) 469 { 470 struct hlist_node *p; 471 472 hlist_for_each(p, dev_name_hash(name)) { 473 struct net_device *dev 474 = hlist_entry(p, struct net_device, name_hlist); 475 if (!strncmp(dev->name, name, IFNAMSIZ)) 476 return dev; 477 } 478 return NULL; 479 } 480 481 /** 482 * dev_get_by_name - find a device by its name 483 * @name: name to find 484 * 485 * Find an interface by name. This can be called from any 486 * context and does its own locking. The returned handle has 487 * the usage count incremented and the caller must use dev_put() to 488 * release it when it is no longer needed. %NULL is returned if no 489 * matching device is found. 490 */ 491 492 struct net_device *dev_get_by_name(const char *name) 493 { 494 struct net_device *dev; 495 496 read_lock(&dev_base_lock); 497 dev = __dev_get_by_name(name); 498 if (dev) 499 dev_hold(dev); 500 read_unlock(&dev_base_lock); 501 return dev; 502 } 503 504 /** 505 * __dev_get_by_index - find a device by its ifindex 506 * @ifindex: index of device 507 * 508 * Search for an interface by index. Returns %NULL if the device 509 * is not found or a pointer to the device. The device has not 510 * had its reference counter increased so the caller must be careful 511 * about locking. The caller must hold either the RTNL semaphore 512 * or @dev_base_lock. 513 */ 514 515 struct net_device *__dev_get_by_index(int ifindex) 516 { 517 struct hlist_node *p; 518 519 hlist_for_each(p, dev_index_hash(ifindex)) { 520 struct net_device *dev 521 = hlist_entry(p, struct net_device, index_hlist); 522 if (dev->ifindex == ifindex) 523 return dev; 524 } 525 return NULL; 526 } 527 528 529 /** 530 * dev_get_by_index - find a device by its ifindex 531 * @ifindex: index of device 532 * 533 * Search for an interface by index. Returns NULL if the device 534 * is not found or a pointer to the device. The device returned has 535 * had a reference added and the pointer is safe until the user calls 536 * dev_put to indicate they have finished with it. 537 */ 538 539 struct net_device *dev_get_by_index(int ifindex) 540 { 541 struct net_device *dev; 542 543 read_lock(&dev_base_lock); 544 dev = __dev_get_by_index(ifindex); 545 if (dev) 546 dev_hold(dev); 547 read_unlock(&dev_base_lock); 548 return dev; 549 } 550 551 /** 552 * dev_getbyhwaddr - find a device by its hardware address 553 * @type: media type of device 554 * @ha: hardware address 555 * 556 * Search for an interface by MAC address. Returns NULL if the device 557 * is not found or a pointer to the device. The caller must hold the 558 * rtnl semaphore. The returned device has not had its ref count increased 559 * and the caller must therefore be careful about locking 560 * 561 * BUGS: 562 * If the API was consistent this would be __dev_get_by_hwaddr 563 */ 564 565 struct net_device *dev_getbyhwaddr(unsigned short type, char *ha) 566 { 567 struct net_device *dev; 568 569 ASSERT_RTNL(); 570 571 for (dev = dev_base; dev; dev = dev->next) 572 if (dev->type == type && 573 !memcmp(dev->dev_addr, ha, dev->addr_len)) 574 break; 575 return dev; 576 } 577 578 EXPORT_SYMBOL(dev_getbyhwaddr); 579 580 struct net_device *dev_getfirstbyhwtype(unsigned short type) 581 { 582 struct net_device *dev; 583 584 rtnl_lock(); 585 for (dev = dev_base; dev; dev = dev->next) { 586 if (dev->type == type) { 587 dev_hold(dev); 588 break; 589 } 590 } 591 rtnl_unlock(); 592 return dev; 593 } 594 595 EXPORT_SYMBOL(dev_getfirstbyhwtype); 596 597 /** 598 * dev_get_by_flags - find any device with given flags 599 * @if_flags: IFF_* values 600 * @mask: bitmask of bits in if_flags to check 601 * 602 * Search for any interface with the given flags. Returns NULL if a device 603 * is not found or a pointer to the device. The device returned has 604 * had a reference added and the pointer is safe until the user calls 605 * dev_put to indicate they have finished with it. 606 */ 607 608 struct net_device * dev_get_by_flags(unsigned short if_flags, unsigned short mask) 609 { 610 struct net_device *dev; 611 612 read_lock(&dev_base_lock); 613 for (dev = dev_base; dev != NULL; dev = dev->next) { 614 if (((dev->flags ^ if_flags) & mask) == 0) { 615 dev_hold(dev); 616 break; 617 } 618 } 619 read_unlock(&dev_base_lock); 620 return dev; 621 } 622 623 /** 624 * dev_valid_name - check if name is okay for network device 625 * @name: name string 626 * 627 * Network device names need to be valid file names to 628 * to allow sysfs to work 629 */ 630 int dev_valid_name(const char *name) 631 { 632 return !(*name == '\0' 633 || !strcmp(name, ".") 634 || !strcmp(name, "..") 635 || strchr(name, '/')); 636 } 637 638 /** 639 * dev_alloc_name - allocate a name for a device 640 * @dev: device 641 * @name: name format string 642 * 643 * Passed a format string - eg "lt%d" it will try and find a suitable 644 * id. Not efficient for many devices, not called a lot. The caller 645 * must hold the dev_base or rtnl lock while allocating the name and 646 * adding the device in order to avoid duplicates. Returns the number 647 * of the unit assigned or a negative errno code. 648 */ 649 650 int dev_alloc_name(struct net_device *dev, const char *name) 651 { 652 int i = 0; 653 char buf[IFNAMSIZ]; 654 const char *p; 655 const int max_netdevices = 8*PAGE_SIZE; 656 long *inuse; 657 struct net_device *d; 658 659 p = strnchr(name, IFNAMSIZ-1, '%'); 660 if (p) { 661 /* 662 * Verify the string as this thing may have come from 663 * the user. There must be either one "%d" and no other "%" 664 * characters. 665 */ 666 if (p[1] != 'd' || strchr(p + 2, '%')) 667 return -EINVAL; 668 669 /* Use one page as a bit array of possible slots */ 670 inuse = (long *) get_zeroed_page(GFP_ATOMIC); 671 if (!inuse) 672 return -ENOMEM; 673 674 for (d = dev_base; d; d = d->next) { 675 if (!sscanf(d->name, name, &i)) 676 continue; 677 if (i < 0 || i >= max_netdevices) 678 continue; 679 680 /* avoid cases where sscanf is not exact inverse of printf */ 681 snprintf(buf, sizeof(buf), name, i); 682 if (!strncmp(buf, d->name, IFNAMSIZ)) 683 set_bit(i, inuse); 684 } 685 686 i = find_first_zero_bit(inuse, max_netdevices); 687 free_page((unsigned long) inuse); 688 } 689 690 snprintf(buf, sizeof(buf), name, i); 691 if (!__dev_get_by_name(buf)) { 692 strlcpy(dev->name, buf, IFNAMSIZ); 693 return i; 694 } 695 696 /* It is possible to run out of possible slots 697 * when the name is long and there isn't enough space left 698 * for the digits, or if all bits are used. 699 */ 700 return -ENFILE; 701 } 702 703 704 /** 705 * dev_change_name - change name of a device 706 * @dev: device 707 * @newname: name (or format string) must be at least IFNAMSIZ 708 * 709 * Change name of a device, can pass format strings "eth%d". 710 * for wildcarding. 711 */ 712 int dev_change_name(struct net_device *dev, char *newname) 713 { 714 int err = 0; 715 716 ASSERT_RTNL(); 717 718 if (dev->flags & IFF_UP) 719 return -EBUSY; 720 721 if (!dev_valid_name(newname)) 722 return -EINVAL; 723 724 if (strchr(newname, '%')) { 725 err = dev_alloc_name(dev, newname); 726 if (err < 0) 727 return err; 728 strcpy(newname, dev->name); 729 } 730 else if (__dev_get_by_name(newname)) 731 return -EEXIST; 732 else 733 strlcpy(dev->name, newname, IFNAMSIZ); 734 735 err = class_device_rename(&dev->class_dev, dev->name); 736 if (!err) { 737 hlist_del(&dev->name_hlist); 738 hlist_add_head(&dev->name_hlist, dev_name_hash(dev->name)); 739 blocking_notifier_call_chain(&netdev_chain, 740 NETDEV_CHANGENAME, dev); 741 } 742 743 return err; 744 } 745 746 /** 747 * netdev_features_change - device changes fatures 748 * @dev: device to cause notification 749 * 750 * Called to indicate a device has changed features. 751 */ 752 void netdev_features_change(struct net_device *dev) 753 { 754 blocking_notifier_call_chain(&netdev_chain, NETDEV_FEAT_CHANGE, dev); 755 } 756 EXPORT_SYMBOL(netdev_features_change); 757 758 /** 759 * netdev_state_change - device changes state 760 * @dev: device to cause notification 761 * 762 * Called to indicate a device has changed state. This function calls 763 * the notifier chains for netdev_chain and sends a NEWLINK message 764 * to the routing socket. 765 */ 766 void netdev_state_change(struct net_device *dev) 767 { 768 if (dev->flags & IFF_UP) { 769 blocking_notifier_call_chain(&netdev_chain, 770 NETDEV_CHANGE, dev); 771 rtmsg_ifinfo(RTM_NEWLINK, dev, 0); 772 } 773 } 774 775 /** 776 * dev_load - load a network module 777 * @name: name of interface 778 * 779 * If a network interface is not present and the process has suitable 780 * privileges this function loads the module. If module loading is not 781 * available in this kernel then it becomes a nop. 782 */ 783 784 void dev_load(const char *name) 785 { 786 struct net_device *dev; 787 788 read_lock(&dev_base_lock); 789 dev = __dev_get_by_name(name); 790 read_unlock(&dev_base_lock); 791 792 if (!dev && capable(CAP_SYS_MODULE)) 793 request_module("%s", name); 794 } 795 796 static int default_rebuild_header(struct sk_buff *skb) 797 { 798 printk(KERN_DEBUG "%s: default_rebuild_header called -- BUG!\n", 799 skb->dev ? skb->dev->name : "NULL!!!"); 800 kfree_skb(skb); 801 return 1; 802 } 803 804 805 /** 806 * dev_open - prepare an interface for use. 807 * @dev: device to open 808 * 809 * Takes a device from down to up state. The device's private open 810 * function is invoked and then the multicast lists are loaded. Finally 811 * the device is moved into the up state and a %NETDEV_UP message is 812 * sent to the netdev notifier chain. 813 * 814 * Calling this function on an active interface is a nop. On a failure 815 * a negative errno code is returned. 816 */ 817 int dev_open(struct net_device *dev) 818 { 819 int ret = 0; 820 821 /* 822 * Is it already up? 823 */ 824 825 if (dev->flags & IFF_UP) 826 return 0; 827 828 /* 829 * Is it even present? 830 */ 831 if (!netif_device_present(dev)) 832 return -ENODEV; 833 834 /* 835 * Call device private open method 836 */ 837 set_bit(__LINK_STATE_START, &dev->state); 838 if (dev->open) { 839 ret = dev->open(dev); 840 if (ret) 841 clear_bit(__LINK_STATE_START, &dev->state); 842 } 843 844 /* 845 * If it went open OK then: 846 */ 847 848 if (!ret) { 849 /* 850 * Set the flags. 851 */ 852 dev->flags |= IFF_UP; 853 854 /* 855 * Initialize multicasting status 856 */ 857 dev_mc_upload(dev); 858 859 /* 860 * Wakeup transmit queue engine 861 */ 862 dev_activate(dev); 863 864 /* 865 * ... and announce new interface. 866 */ 867 blocking_notifier_call_chain(&netdev_chain, NETDEV_UP, dev); 868 } 869 return ret; 870 } 871 872 /** 873 * dev_close - shutdown an interface. 874 * @dev: device to shutdown 875 * 876 * This function moves an active device into down state. A 877 * %NETDEV_GOING_DOWN is sent to the netdev notifier chain. The device 878 * is then deactivated and finally a %NETDEV_DOWN is sent to the notifier 879 * chain. 880 */ 881 int dev_close(struct net_device *dev) 882 { 883 if (!(dev->flags & IFF_UP)) 884 return 0; 885 886 /* 887 * Tell people we are going down, so that they can 888 * prepare to death, when device is still operating. 889 */ 890 blocking_notifier_call_chain(&netdev_chain, NETDEV_GOING_DOWN, dev); 891 892 dev_deactivate(dev); 893 894 clear_bit(__LINK_STATE_START, &dev->state); 895 896 /* Synchronize to scheduled poll. We cannot touch poll list, 897 * it can be even on different cpu. So just clear netif_running(), 898 * and wait when poll really will happen. Actually, the best place 899 * for this is inside dev->stop() after device stopped its irq 900 * engine, but this requires more changes in devices. */ 901 902 smp_mb__after_clear_bit(); /* Commit netif_running(). */ 903 while (test_bit(__LINK_STATE_RX_SCHED, &dev->state)) { 904 /* No hurry. */ 905 msleep(1); 906 } 907 908 /* 909 * Call the device specific close. This cannot fail. 910 * Only if device is UP 911 * 912 * We allow it to be called even after a DETACH hot-plug 913 * event. 914 */ 915 if (dev->stop) 916 dev->stop(dev); 917 918 /* 919 * Device is now down. 920 */ 921 922 dev->flags &= ~IFF_UP; 923 924 /* 925 * Tell people we are down 926 */ 927 blocking_notifier_call_chain(&netdev_chain, NETDEV_DOWN, dev); 928 929 return 0; 930 } 931 932 933 /* 934 * Device change register/unregister. These are not inline or static 935 * as we export them to the world. 936 */ 937 938 /** 939 * register_netdevice_notifier - register a network notifier block 940 * @nb: notifier 941 * 942 * Register a notifier to be called when network device events occur. 943 * The notifier passed is linked into the kernel structures and must 944 * not be reused until it has been unregistered. A negative errno code 945 * is returned on a failure. 946 * 947 * When registered all registration and up events are replayed 948 * to the new notifier to allow device to have a race free 949 * view of the network device list. 950 */ 951 952 int register_netdevice_notifier(struct notifier_block *nb) 953 { 954 struct net_device *dev; 955 int err; 956 957 rtnl_lock(); 958 err = blocking_notifier_chain_register(&netdev_chain, nb); 959 if (!err) { 960 for (dev = dev_base; dev; dev = dev->next) { 961 nb->notifier_call(nb, NETDEV_REGISTER, dev); 962 963 if (dev->flags & IFF_UP) 964 nb->notifier_call(nb, NETDEV_UP, dev); 965 } 966 } 967 rtnl_unlock(); 968 return err; 969 } 970 971 /** 972 * unregister_netdevice_notifier - unregister a network notifier block 973 * @nb: notifier 974 * 975 * Unregister a notifier previously registered by 976 * register_netdevice_notifier(). The notifier is unlinked into the 977 * kernel structures and may then be reused. A negative errno code 978 * is returned on a failure. 979 */ 980 981 int unregister_netdevice_notifier(struct notifier_block *nb) 982 { 983 int err; 984 985 rtnl_lock(); 986 err = blocking_notifier_chain_unregister(&netdev_chain, nb); 987 rtnl_unlock(); 988 return err; 989 } 990 991 /** 992 * call_netdevice_notifiers - call all network notifier blocks 993 * @val: value passed unmodified to notifier function 994 * @v: pointer passed unmodified to notifier function 995 * 996 * Call all network notifier blocks. Parameters and return value 997 * are as for blocking_notifier_call_chain(). 998 */ 999 1000 int call_netdevice_notifiers(unsigned long val, void *v) 1001 { 1002 return blocking_notifier_call_chain(&netdev_chain, val, v); 1003 } 1004 1005 /* When > 0 there are consumers of rx skb time stamps */ 1006 static atomic_t netstamp_needed = ATOMIC_INIT(0); 1007 1008 void net_enable_timestamp(void) 1009 { 1010 atomic_inc(&netstamp_needed); 1011 } 1012 1013 void net_disable_timestamp(void) 1014 { 1015 atomic_dec(&netstamp_needed); 1016 } 1017 1018 void __net_timestamp(struct sk_buff *skb) 1019 { 1020 struct timeval tv; 1021 1022 do_gettimeofday(&tv); 1023 skb_set_timestamp(skb, &tv); 1024 } 1025 EXPORT_SYMBOL(__net_timestamp); 1026 1027 static inline void net_timestamp(struct sk_buff *skb) 1028 { 1029 if (atomic_read(&netstamp_needed)) 1030 __net_timestamp(skb); 1031 else { 1032 skb->tstamp.off_sec = 0; 1033 skb->tstamp.off_usec = 0; 1034 } 1035 } 1036 1037 /* 1038 * Support routine. Sends outgoing frames to any network 1039 * taps currently in use. 1040 */ 1041 1042 void dev_queue_xmit_nit(struct sk_buff *skb, struct net_device *dev) 1043 { 1044 struct packet_type *ptype; 1045 1046 net_timestamp(skb); 1047 1048 rcu_read_lock(); 1049 list_for_each_entry_rcu(ptype, &ptype_all, list) { 1050 /* Never send packets back to the socket 1051 * they originated from - MvS (miquels@drinkel.ow.org) 1052 */ 1053 if ((ptype->dev == dev || !ptype->dev) && 1054 (ptype->af_packet_priv == NULL || 1055 (struct sock *)ptype->af_packet_priv != skb->sk)) { 1056 struct sk_buff *skb2= skb_clone(skb, GFP_ATOMIC); 1057 if (!skb2) 1058 break; 1059 1060 /* skb->nh should be correctly 1061 set by sender, so that the second statement is 1062 just protection against buggy protocols. 1063 */ 1064 skb2->mac.raw = skb2->data; 1065 1066 if (skb2->nh.raw < skb2->data || 1067 skb2->nh.raw > skb2->tail) { 1068 if (net_ratelimit()) 1069 printk(KERN_CRIT "protocol %04x is " 1070 "buggy, dev %s\n", 1071 skb2->protocol, dev->name); 1072 skb2->nh.raw = skb2->data; 1073 } 1074 1075 skb2->h.raw = skb2->nh.raw; 1076 skb2->pkt_type = PACKET_OUTGOING; 1077 ptype->func(skb2, skb->dev, ptype, skb->dev); 1078 } 1079 } 1080 rcu_read_unlock(); 1081 } 1082 1083 /* 1084 * Invalidate hardware checksum when packet is to be mangled, and 1085 * complete checksum manually on outgoing path. 1086 */ 1087 int skb_checksum_help(struct sk_buff *skb, int inward) 1088 { 1089 unsigned int csum; 1090 int ret = 0, offset = skb->h.raw - skb->data; 1091 1092 if (inward) { 1093 skb->ip_summed = CHECKSUM_NONE; 1094 goto out; 1095 } 1096 1097 if (skb_cloned(skb)) { 1098 ret = pskb_expand_head(skb, 0, 0, GFP_ATOMIC); 1099 if (ret) 1100 goto out; 1101 } 1102 1103 BUG_ON(offset > (int)skb->len); 1104 csum = skb_checksum(skb, offset, skb->len-offset, 0); 1105 1106 offset = skb->tail - skb->h.raw; 1107 BUG_ON(offset <= 0); 1108 BUG_ON(skb->csum + 2 > offset); 1109 1110 *(u16*)(skb->h.raw + skb->csum) = csum_fold(csum); 1111 skb->ip_summed = CHECKSUM_NONE; 1112 out: 1113 return ret; 1114 } 1115 1116 /* Take action when hardware reception checksum errors are detected. */ 1117 #ifdef CONFIG_BUG 1118 void netdev_rx_csum_fault(struct net_device *dev) 1119 { 1120 if (net_ratelimit()) { 1121 printk(KERN_ERR "%s: hw csum failure.\n", 1122 dev ? dev->name : "<unknown>"); 1123 dump_stack(); 1124 } 1125 } 1126 EXPORT_SYMBOL(netdev_rx_csum_fault); 1127 #endif 1128 1129 #ifdef CONFIG_HIGHMEM 1130 /* Actually, we should eliminate this check as soon as we know, that: 1131 * 1. IOMMU is present and allows to map all the memory. 1132 * 2. No high memory really exists on this machine. 1133 */ 1134 1135 static inline int illegal_highdma(struct net_device *dev, struct sk_buff *skb) 1136 { 1137 int i; 1138 1139 if (dev->features & NETIF_F_HIGHDMA) 1140 return 0; 1141 1142 for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) 1143 if (PageHighMem(skb_shinfo(skb)->frags[i].page)) 1144 return 1; 1145 1146 return 0; 1147 } 1148 #else 1149 #define illegal_highdma(dev, skb) (0) 1150 #endif 1151 1152 /* Keep head the same: replace data */ 1153 int __skb_linearize(struct sk_buff *skb, gfp_t gfp_mask) 1154 { 1155 unsigned int size; 1156 u8 *data; 1157 long offset; 1158 struct skb_shared_info *ninfo; 1159 int headerlen = skb->data - skb->head; 1160 int expand = (skb->tail + skb->data_len) - skb->end; 1161 1162 if (skb_shared(skb)) 1163 BUG(); 1164 1165 if (expand <= 0) 1166 expand = 0; 1167 1168 size = skb->end - skb->head + expand; 1169 size = SKB_DATA_ALIGN(size); 1170 data = kmalloc(size + sizeof(struct skb_shared_info), gfp_mask); 1171 if (!data) 1172 return -ENOMEM; 1173 1174 /* Copy entire thing */ 1175 if (skb_copy_bits(skb, -headerlen, data, headerlen + skb->len)) 1176 BUG(); 1177 1178 /* Set up shinfo */ 1179 ninfo = (struct skb_shared_info*)(data + size); 1180 atomic_set(&ninfo->dataref, 1); 1181 ninfo->tso_size = skb_shinfo(skb)->tso_size; 1182 ninfo->tso_segs = skb_shinfo(skb)->tso_segs; 1183 ninfo->nr_frags = 0; 1184 ninfo->frag_list = NULL; 1185 1186 /* Offset between the two in bytes */ 1187 offset = data - skb->head; 1188 1189 /* Free old data. */ 1190 skb_release_data(skb); 1191 1192 skb->head = data; 1193 skb->end = data + size; 1194 1195 /* Set up new pointers */ 1196 skb->h.raw += offset; 1197 skb->nh.raw += offset; 1198 skb->mac.raw += offset; 1199 skb->tail += offset; 1200 skb->data += offset; 1201 1202 /* We are no longer a clone, even if we were. */ 1203 skb->cloned = 0; 1204 1205 skb->tail += skb->data_len; 1206 skb->data_len = 0; 1207 return 0; 1208 } 1209 1210 #define HARD_TX_LOCK(dev, cpu) { \ 1211 if ((dev->features & NETIF_F_LLTX) == 0) { \ 1212 spin_lock(&dev->xmit_lock); \ 1213 dev->xmit_lock_owner = cpu; \ 1214 } \ 1215 } 1216 1217 #define HARD_TX_UNLOCK(dev) { \ 1218 if ((dev->features & NETIF_F_LLTX) == 0) { \ 1219 dev->xmit_lock_owner = -1; \ 1220 spin_unlock(&dev->xmit_lock); \ 1221 } \ 1222 } 1223 1224 /** 1225 * dev_queue_xmit - transmit a buffer 1226 * @skb: buffer to transmit 1227 * 1228 * Queue a buffer for transmission to a network device. The caller must 1229 * have set the device and priority and built the buffer before calling 1230 * this function. The function can be called from an interrupt. 1231 * 1232 * A negative errno code is returned on a failure. A success does not 1233 * guarantee the frame will be transmitted as it may be dropped due 1234 * to congestion or traffic shaping. 1235 * 1236 * ----------------------------------------------------------------------------------- 1237 * I notice this method can also return errors from the queue disciplines, 1238 * including NET_XMIT_DROP, which is a positive value. So, errors can also 1239 * be positive. 1240 * 1241 * Regardless of the return value, the skb is consumed, so it is currently 1242 * difficult to retry a send to this method. (You can bump the ref count 1243 * before sending to hold a reference for retry if you are careful.) 1244 * 1245 * When calling this method, interrupts MUST be enabled. This is because 1246 * the BH enable code must have IRQs enabled so that it will not deadlock. 1247 * --BLG 1248 */ 1249 1250 int dev_queue_xmit(struct sk_buff *skb) 1251 { 1252 struct net_device *dev = skb->dev; 1253 struct Qdisc *q; 1254 int rc = -ENOMEM; 1255 1256 if (skb_shinfo(skb)->frag_list && 1257 !(dev->features & NETIF_F_FRAGLIST) && 1258 __skb_linearize(skb, GFP_ATOMIC)) 1259 goto out_kfree_skb; 1260 1261 /* Fragmented skb is linearized if device does not support SG, 1262 * or if at least one of fragments is in highmem and device 1263 * does not support DMA from it. 1264 */ 1265 if (skb_shinfo(skb)->nr_frags && 1266 (!(dev->features & NETIF_F_SG) || illegal_highdma(dev, skb)) && 1267 __skb_linearize(skb, GFP_ATOMIC)) 1268 goto out_kfree_skb; 1269 1270 /* If packet is not checksummed and device does not support 1271 * checksumming for this protocol, complete checksumming here. 1272 */ 1273 if (skb->ip_summed == CHECKSUM_HW && 1274 (!(dev->features & (NETIF_F_HW_CSUM | NETIF_F_NO_CSUM)) && 1275 (!(dev->features & NETIF_F_IP_CSUM) || 1276 skb->protocol != htons(ETH_P_IP)))) 1277 if (skb_checksum_help(skb, 0)) 1278 goto out_kfree_skb; 1279 1280 spin_lock_prefetch(&dev->queue_lock); 1281 1282 /* Disable soft irqs for various locks below. Also 1283 * stops preemption for RCU. 1284 */ 1285 local_bh_disable(); 1286 1287 /* Updates of qdisc are serialized by queue_lock. 1288 * The struct Qdisc which is pointed to by qdisc is now a 1289 * rcu structure - it may be accessed without acquiring 1290 * a lock (but the structure may be stale.) The freeing of the 1291 * qdisc will be deferred until it's known that there are no 1292 * more references to it. 1293 * 1294 * If the qdisc has an enqueue function, we still need to 1295 * hold the queue_lock before calling it, since queue_lock 1296 * also serializes access to the device queue. 1297 */ 1298 1299 q = rcu_dereference(dev->qdisc); 1300 #ifdef CONFIG_NET_CLS_ACT 1301 skb->tc_verd = SET_TC_AT(skb->tc_verd,AT_EGRESS); 1302 #endif 1303 if (q->enqueue) { 1304 /* Grab device queue */ 1305 spin_lock(&dev->queue_lock); 1306 1307 rc = q->enqueue(skb, q); 1308 1309 qdisc_run(dev); 1310 1311 spin_unlock(&dev->queue_lock); 1312 rc = rc == NET_XMIT_BYPASS ? NET_XMIT_SUCCESS : rc; 1313 goto out; 1314 } 1315 1316 /* The device has no queue. Common case for software devices: 1317 loopback, all the sorts of tunnels... 1318 1319 Really, it is unlikely that xmit_lock protection is necessary here. 1320 (f.e. loopback and IP tunnels are clean ignoring statistics 1321 counters.) 1322 However, it is possible, that they rely on protection 1323 made by us here. 1324 1325 Check this and shot the lock. It is not prone from deadlocks. 1326 Either shot noqueue qdisc, it is even simpler 8) 1327 */ 1328 if (dev->flags & IFF_UP) { 1329 int cpu = smp_processor_id(); /* ok because BHs are off */ 1330 1331 if (dev->xmit_lock_owner != cpu) { 1332 1333 HARD_TX_LOCK(dev, cpu); 1334 1335 if (!netif_queue_stopped(dev)) { 1336 if (netdev_nit) 1337 dev_queue_xmit_nit(skb, dev); 1338 1339 rc = 0; 1340 if (!dev->hard_start_xmit(skb, dev)) { 1341 HARD_TX_UNLOCK(dev); 1342 goto out; 1343 } 1344 } 1345 HARD_TX_UNLOCK(dev); 1346 if (net_ratelimit()) 1347 printk(KERN_CRIT "Virtual device %s asks to " 1348 "queue packet!\n", dev->name); 1349 } else { 1350 /* Recursion is detected! It is possible, 1351 * unfortunately */ 1352 if (net_ratelimit()) 1353 printk(KERN_CRIT "Dead loop on virtual device " 1354 "%s, fix it urgently!\n", dev->name); 1355 } 1356 } 1357 1358 rc = -ENETDOWN; 1359 local_bh_enable(); 1360 1361 out_kfree_skb: 1362 kfree_skb(skb); 1363 return rc; 1364 out: 1365 local_bh_enable(); 1366 return rc; 1367 } 1368 1369 1370 /*======================================================================= 1371 Receiver routines 1372 =======================================================================*/ 1373 1374 int netdev_max_backlog = 1000; 1375 int netdev_budget = 300; 1376 int weight_p = 64; /* old backlog weight */ 1377 1378 DEFINE_PER_CPU(struct netif_rx_stats, netdev_rx_stat) = { 0, }; 1379 1380 1381 /** 1382 * netif_rx - post buffer to the network code 1383 * @skb: buffer to post 1384 * 1385 * This function receives a packet from a device driver and queues it for 1386 * the upper (protocol) levels to process. It always succeeds. The buffer 1387 * may be dropped during processing for congestion control or by the 1388 * protocol layers. 1389 * 1390 * return values: 1391 * NET_RX_SUCCESS (no congestion) 1392 * NET_RX_CN_LOW (low congestion) 1393 * NET_RX_CN_MOD (moderate congestion) 1394 * NET_RX_CN_HIGH (high congestion) 1395 * NET_RX_DROP (packet was dropped) 1396 * 1397 */ 1398 1399 int netif_rx(struct sk_buff *skb) 1400 { 1401 struct softnet_data *queue; 1402 unsigned long flags; 1403 1404 /* if netpoll wants it, pretend we never saw it */ 1405 if (netpoll_rx(skb)) 1406 return NET_RX_DROP; 1407 1408 if (!skb->tstamp.off_sec) 1409 net_timestamp(skb); 1410 1411 /* 1412 * The code is rearranged so that the path is the most 1413 * short when CPU is congested, but is still operating. 1414 */ 1415 local_irq_save(flags); 1416 queue = &__get_cpu_var(softnet_data); 1417 1418 __get_cpu_var(netdev_rx_stat).total++; 1419 if (queue->input_pkt_queue.qlen <= netdev_max_backlog) { 1420 if (queue->input_pkt_queue.qlen) { 1421 enqueue: 1422 dev_hold(skb->dev); 1423 __skb_queue_tail(&queue->input_pkt_queue, skb); 1424 local_irq_restore(flags); 1425 return NET_RX_SUCCESS; 1426 } 1427 1428 netif_rx_schedule(&queue->backlog_dev); 1429 goto enqueue; 1430 } 1431 1432 __get_cpu_var(netdev_rx_stat).dropped++; 1433 local_irq_restore(flags); 1434 1435 kfree_skb(skb); 1436 return NET_RX_DROP; 1437 } 1438 1439 int netif_rx_ni(struct sk_buff *skb) 1440 { 1441 int err; 1442 1443 preempt_disable(); 1444 err = netif_rx(skb); 1445 if (local_softirq_pending()) 1446 do_softirq(); 1447 preempt_enable(); 1448 1449 return err; 1450 } 1451 1452 EXPORT_SYMBOL(netif_rx_ni); 1453 1454 static inline struct net_device *skb_bond(struct sk_buff *skb) 1455 { 1456 struct net_device *dev = skb->dev; 1457 1458 if (dev->master) { 1459 /* 1460 * On bonding slaves other than the currently active 1461 * slave, suppress duplicates except for 802.3ad 1462 * ETH_P_SLOW and alb non-mcast/bcast. 1463 */ 1464 if (dev->priv_flags & IFF_SLAVE_INACTIVE) { 1465 if (dev->master->priv_flags & IFF_MASTER_ALB) { 1466 if (skb->pkt_type != PACKET_BROADCAST && 1467 skb->pkt_type != PACKET_MULTICAST) 1468 goto keep; 1469 } 1470 1471 if (dev->master->priv_flags & IFF_MASTER_8023AD && 1472 skb->protocol == __constant_htons(ETH_P_SLOW)) 1473 goto keep; 1474 1475 kfree_skb(skb); 1476 return NULL; 1477 } 1478 keep: 1479 skb->dev = dev->master; 1480 } 1481 1482 return dev; 1483 } 1484 1485 static void net_tx_action(struct softirq_action *h) 1486 { 1487 struct softnet_data *sd = &__get_cpu_var(softnet_data); 1488 1489 if (sd->completion_queue) { 1490 struct sk_buff *clist; 1491 1492 local_irq_disable(); 1493 clist = sd->completion_queue; 1494 sd->completion_queue = NULL; 1495 local_irq_enable(); 1496 1497 while (clist) { 1498 struct sk_buff *skb = clist; 1499 clist = clist->next; 1500 1501 BUG_TRAP(!atomic_read(&skb->users)); 1502 __kfree_skb(skb); 1503 } 1504 } 1505 1506 if (sd->output_queue) { 1507 struct net_device *head; 1508 1509 local_irq_disable(); 1510 head = sd->output_queue; 1511 sd->output_queue = NULL; 1512 local_irq_enable(); 1513 1514 while (head) { 1515 struct net_device *dev = head; 1516 head = head->next_sched; 1517 1518 smp_mb__before_clear_bit(); 1519 clear_bit(__LINK_STATE_SCHED, &dev->state); 1520 1521 if (spin_trylock(&dev->queue_lock)) { 1522 qdisc_run(dev); 1523 spin_unlock(&dev->queue_lock); 1524 } else { 1525 netif_schedule(dev); 1526 } 1527 } 1528 } 1529 } 1530 1531 static __inline__ int deliver_skb(struct sk_buff *skb, 1532 struct packet_type *pt_prev, 1533 struct net_device *orig_dev) 1534 { 1535 atomic_inc(&skb->users); 1536 return pt_prev->func(skb, skb->dev, pt_prev, orig_dev); 1537 } 1538 1539 #if defined(CONFIG_BRIDGE) || defined (CONFIG_BRIDGE_MODULE) 1540 int (*br_handle_frame_hook)(struct net_bridge_port *p, struct sk_buff **pskb); 1541 struct net_bridge; 1542 struct net_bridge_fdb_entry *(*br_fdb_get_hook)(struct net_bridge *br, 1543 unsigned char *addr); 1544 void (*br_fdb_put_hook)(struct net_bridge_fdb_entry *ent); 1545 1546 static __inline__ int handle_bridge(struct sk_buff **pskb, 1547 struct packet_type **pt_prev, int *ret, 1548 struct net_device *orig_dev) 1549 { 1550 struct net_bridge_port *port; 1551 1552 if ((*pskb)->pkt_type == PACKET_LOOPBACK || 1553 (port = rcu_dereference((*pskb)->dev->br_port)) == NULL) 1554 return 0; 1555 1556 if (*pt_prev) { 1557 *ret = deliver_skb(*pskb, *pt_prev, orig_dev); 1558 *pt_prev = NULL; 1559 } 1560 1561 return br_handle_frame_hook(port, pskb); 1562 } 1563 #else 1564 #define handle_bridge(skb, pt_prev, ret, orig_dev) (0) 1565 #endif 1566 1567 #ifdef CONFIG_NET_CLS_ACT 1568 /* TODO: Maybe we should just force sch_ingress to be compiled in 1569 * when CONFIG_NET_CLS_ACT is? otherwise some useless instructions 1570 * a compare and 2 stores extra right now if we dont have it on 1571 * but have CONFIG_NET_CLS_ACT 1572 * NOTE: This doesnt stop any functionality; if you dont have 1573 * the ingress scheduler, you just cant add policies on ingress. 1574 * 1575 */ 1576 static int ing_filter(struct sk_buff *skb) 1577 { 1578 struct Qdisc *q; 1579 struct net_device *dev = skb->dev; 1580 int result = TC_ACT_OK; 1581 1582 if (dev->qdisc_ingress) { 1583 __u32 ttl = (__u32) G_TC_RTTL(skb->tc_verd); 1584 if (MAX_RED_LOOP < ttl++) { 1585 printk("Redir loop detected Dropping packet (%s->%s)\n", 1586 skb->input_dev->name, skb->dev->name); 1587 return TC_ACT_SHOT; 1588 } 1589 1590 skb->tc_verd = SET_TC_RTTL(skb->tc_verd,ttl); 1591 1592 skb->tc_verd = SET_TC_AT(skb->tc_verd,AT_INGRESS); 1593 1594 spin_lock(&dev->ingress_lock); 1595 if ((q = dev->qdisc_ingress) != NULL) 1596 result = q->enqueue(skb, q); 1597 spin_unlock(&dev->ingress_lock); 1598 1599 } 1600 1601 return result; 1602 } 1603 #endif 1604 1605 int netif_receive_skb(struct sk_buff *skb) 1606 { 1607 struct packet_type *ptype, *pt_prev; 1608 struct net_device *orig_dev; 1609 int ret = NET_RX_DROP; 1610 unsigned short type; 1611 1612 /* if we've gotten here through NAPI, check netpoll */ 1613 if (skb->dev->poll && netpoll_rx(skb)) 1614 return NET_RX_DROP; 1615 1616 if (!skb->tstamp.off_sec) 1617 net_timestamp(skb); 1618 1619 if (!skb->input_dev) 1620 skb->input_dev = skb->dev; 1621 1622 orig_dev = skb_bond(skb); 1623 1624 if (!orig_dev) 1625 return NET_RX_DROP; 1626 1627 __get_cpu_var(netdev_rx_stat).total++; 1628 1629 skb->h.raw = skb->nh.raw = skb->data; 1630 skb->mac_len = skb->nh.raw - skb->mac.raw; 1631 1632 pt_prev = NULL; 1633 1634 rcu_read_lock(); 1635 1636 #ifdef CONFIG_NET_CLS_ACT 1637 if (skb->tc_verd & TC_NCLS) { 1638 skb->tc_verd = CLR_TC_NCLS(skb->tc_verd); 1639 goto ncls; 1640 } 1641 #endif 1642 1643 list_for_each_entry_rcu(ptype, &ptype_all, list) { 1644 if (!ptype->dev || ptype->dev == skb->dev) { 1645 if (pt_prev) 1646 ret = deliver_skb(skb, pt_prev, orig_dev); 1647 pt_prev = ptype; 1648 } 1649 } 1650 1651 #ifdef CONFIG_NET_CLS_ACT 1652 if (pt_prev) { 1653 ret = deliver_skb(skb, pt_prev, orig_dev); 1654 pt_prev = NULL; /* noone else should process this after*/ 1655 } else { 1656 skb->tc_verd = SET_TC_OK2MUNGE(skb->tc_verd); 1657 } 1658 1659 ret = ing_filter(skb); 1660 1661 if (ret == TC_ACT_SHOT || (ret == TC_ACT_STOLEN)) { 1662 kfree_skb(skb); 1663 goto out; 1664 } 1665 1666 skb->tc_verd = 0; 1667 ncls: 1668 #endif 1669 1670 handle_diverter(skb); 1671 1672 if (handle_bridge(&skb, &pt_prev, &ret, orig_dev)) 1673 goto out; 1674 1675 type = skb->protocol; 1676 list_for_each_entry_rcu(ptype, &ptype_base[ntohs(type)&15], list) { 1677 if (ptype->type == type && 1678 (!ptype->dev || ptype->dev == skb->dev)) { 1679 if (pt_prev) 1680 ret = deliver_skb(skb, pt_prev, orig_dev); 1681 pt_prev = ptype; 1682 } 1683 } 1684 1685 if (pt_prev) { 1686 ret = pt_prev->func(skb, skb->dev, pt_prev, orig_dev); 1687 } else { 1688 kfree_skb(skb); 1689 /* Jamal, now you will not able to escape explaining 1690 * me how you were going to use this. :-) 1691 */ 1692 ret = NET_RX_DROP; 1693 } 1694 1695 out: 1696 rcu_read_unlock(); 1697 return ret; 1698 } 1699 1700 static int process_backlog(struct net_device *backlog_dev, int *budget) 1701 { 1702 int work = 0; 1703 int quota = min(backlog_dev->quota, *budget); 1704 struct softnet_data *queue = &__get_cpu_var(softnet_data); 1705 unsigned long start_time = jiffies; 1706 1707 backlog_dev->weight = weight_p; 1708 for (;;) { 1709 struct sk_buff *skb; 1710 struct net_device *dev; 1711 1712 local_irq_disable(); 1713 skb = __skb_dequeue(&queue->input_pkt_queue); 1714 if (!skb) 1715 goto job_done; 1716 local_irq_enable(); 1717 1718 dev = skb->dev; 1719 1720 netif_receive_skb(skb); 1721 1722 dev_put(dev); 1723 1724 work++; 1725 1726 if (work >= quota || jiffies - start_time > 1) 1727 break; 1728 1729 } 1730 1731 backlog_dev->quota -= work; 1732 *budget -= work; 1733 return -1; 1734 1735 job_done: 1736 backlog_dev->quota -= work; 1737 *budget -= work; 1738 1739 list_del(&backlog_dev->poll_list); 1740 smp_mb__before_clear_bit(); 1741 netif_poll_enable(backlog_dev); 1742 1743 local_irq_enable(); 1744 return 0; 1745 } 1746 1747 static void net_rx_action(struct softirq_action *h) 1748 { 1749 struct softnet_data *queue = &__get_cpu_var(softnet_data); 1750 unsigned long start_time = jiffies; 1751 int budget = netdev_budget; 1752 void *have; 1753 1754 local_irq_disable(); 1755 1756 while (!list_empty(&queue->poll_list)) { 1757 struct net_device *dev; 1758 1759 if (budget <= 0 || jiffies - start_time > 1) 1760 goto softnet_break; 1761 1762 local_irq_enable(); 1763 1764 dev = list_entry(queue->poll_list.next, 1765 struct net_device, poll_list); 1766 have = netpoll_poll_lock(dev); 1767 1768 if (dev->quota <= 0 || dev->poll(dev, &budget)) { 1769 netpoll_poll_unlock(have); 1770 local_irq_disable(); 1771 list_move_tail(&dev->poll_list, &queue->poll_list); 1772 if (dev->quota < 0) 1773 dev->quota += dev->weight; 1774 else 1775 dev->quota = dev->weight; 1776 } else { 1777 netpoll_poll_unlock(have); 1778 dev_put(dev); 1779 local_irq_disable(); 1780 } 1781 } 1782 out: 1783 local_irq_enable(); 1784 return; 1785 1786 softnet_break: 1787 __get_cpu_var(netdev_rx_stat).time_squeeze++; 1788 __raise_softirq_irqoff(NET_RX_SOFTIRQ); 1789 goto out; 1790 } 1791 1792 static gifconf_func_t * gifconf_list [NPROTO]; 1793 1794 /** 1795 * register_gifconf - register a SIOCGIF handler 1796 * @family: Address family 1797 * @gifconf: Function handler 1798 * 1799 * Register protocol dependent address dumping routines. The handler 1800 * that is passed must not be freed or reused until it has been replaced 1801 * by another handler. 1802 */ 1803 int register_gifconf(unsigned int family, gifconf_func_t * gifconf) 1804 { 1805 if (family >= NPROTO) 1806 return -EINVAL; 1807 gifconf_list[family] = gifconf; 1808 return 0; 1809 } 1810 1811 1812 /* 1813 * Map an interface index to its name (SIOCGIFNAME) 1814 */ 1815 1816 /* 1817 * We need this ioctl for efficient implementation of the 1818 * if_indextoname() function required by the IPv6 API. Without 1819 * it, we would have to search all the interfaces to find a 1820 * match. --pb 1821 */ 1822 1823 static int dev_ifname(struct ifreq __user *arg) 1824 { 1825 struct net_device *dev; 1826 struct ifreq ifr; 1827 1828 /* 1829 * Fetch the caller's info block. 1830 */ 1831 1832 if (copy_from_user(&ifr, arg, sizeof(struct ifreq))) 1833 return -EFAULT; 1834 1835 read_lock(&dev_base_lock); 1836 dev = __dev_get_by_index(ifr.ifr_ifindex); 1837 if (!dev) { 1838 read_unlock(&dev_base_lock); 1839 return -ENODEV; 1840 } 1841 1842 strcpy(ifr.ifr_name, dev->name); 1843 read_unlock(&dev_base_lock); 1844 1845 if (copy_to_user(arg, &ifr, sizeof(struct ifreq))) 1846 return -EFAULT; 1847 return 0; 1848 } 1849 1850 /* 1851 * Perform a SIOCGIFCONF call. This structure will change 1852 * size eventually, and there is nothing I can do about it. 1853 * Thus we will need a 'compatibility mode'. 1854 */ 1855 1856 static int dev_ifconf(char __user *arg) 1857 { 1858 struct ifconf ifc; 1859 struct net_device *dev; 1860 char __user *pos; 1861 int len; 1862 int total; 1863 int i; 1864 1865 /* 1866 * Fetch the caller's info block. 1867 */ 1868 1869 if (copy_from_user(&ifc, arg, sizeof(struct ifconf))) 1870 return -EFAULT; 1871 1872 pos = ifc.ifc_buf; 1873 len = ifc.ifc_len; 1874 1875 /* 1876 * Loop over the interfaces, and write an info block for each. 1877 */ 1878 1879 total = 0; 1880 for (dev = dev_base; dev; dev = dev->next) { 1881 for (i = 0; i < NPROTO; i++) { 1882 if (gifconf_list[i]) { 1883 int done; 1884 if (!pos) 1885 done = gifconf_list[i](dev, NULL, 0); 1886 else 1887 done = gifconf_list[i](dev, pos + total, 1888 len - total); 1889 if (done < 0) 1890 return -EFAULT; 1891 total += done; 1892 } 1893 } 1894 } 1895 1896 /* 1897 * All done. Write the updated control block back to the caller. 1898 */ 1899 ifc.ifc_len = total; 1900 1901 /* 1902 * Both BSD and Solaris return 0 here, so we do too. 1903 */ 1904 return copy_to_user(arg, &ifc, sizeof(struct ifconf)) ? -EFAULT : 0; 1905 } 1906 1907 #ifdef CONFIG_PROC_FS 1908 /* 1909 * This is invoked by the /proc filesystem handler to display a device 1910 * in detail. 1911 */ 1912 static __inline__ struct net_device *dev_get_idx(loff_t pos) 1913 { 1914 struct net_device *dev; 1915 loff_t i; 1916 1917 for (i = 0, dev = dev_base; dev && i < pos; ++i, dev = dev->next); 1918 1919 return i == pos ? dev : NULL; 1920 } 1921 1922 void *dev_seq_start(struct seq_file *seq, loff_t *pos) 1923 { 1924 read_lock(&dev_base_lock); 1925 return *pos ? dev_get_idx(*pos - 1) : SEQ_START_TOKEN; 1926 } 1927 1928 void *dev_seq_next(struct seq_file *seq, void *v, loff_t *pos) 1929 { 1930 ++*pos; 1931 return v == SEQ_START_TOKEN ? dev_base : ((struct net_device *)v)->next; 1932 } 1933 1934 void dev_seq_stop(struct seq_file *seq, void *v) 1935 { 1936 read_unlock(&dev_base_lock); 1937 } 1938 1939 static void dev_seq_printf_stats(struct seq_file *seq, struct net_device *dev) 1940 { 1941 if (dev->get_stats) { 1942 struct net_device_stats *stats = dev->get_stats(dev); 1943 1944 seq_printf(seq, "%6s:%8lu %7lu %4lu %4lu %4lu %5lu %10lu %9lu " 1945 "%8lu %7lu %4lu %4lu %4lu %5lu %7lu %10lu\n", 1946 dev->name, stats->rx_bytes, stats->rx_packets, 1947 stats->rx_errors, 1948 stats->rx_dropped + stats->rx_missed_errors, 1949 stats->rx_fifo_errors, 1950 stats->rx_length_errors + stats->rx_over_errors + 1951 stats->rx_crc_errors + stats->rx_frame_errors, 1952 stats->rx_compressed, stats->multicast, 1953 stats->tx_bytes, stats->tx_packets, 1954 stats->tx_errors, stats->tx_dropped, 1955 stats->tx_fifo_errors, stats->collisions, 1956 stats->tx_carrier_errors + 1957 stats->tx_aborted_errors + 1958 stats->tx_window_errors + 1959 stats->tx_heartbeat_errors, 1960 stats->tx_compressed); 1961 } else 1962 seq_printf(seq, "%6s: No statistics available.\n", dev->name); 1963 } 1964 1965 /* 1966 * Called from the PROCfs module. This now uses the new arbitrary sized 1967 * /proc/net interface to create /proc/net/dev 1968 */ 1969 static int dev_seq_show(struct seq_file *seq, void *v) 1970 { 1971 if (v == SEQ_START_TOKEN) 1972 seq_puts(seq, "Inter-| Receive " 1973 " | Transmit\n" 1974 " face |bytes packets errs drop fifo frame " 1975 "compressed multicast|bytes packets errs " 1976 "drop fifo colls carrier compressed\n"); 1977 else 1978 dev_seq_printf_stats(seq, v); 1979 return 0; 1980 } 1981 1982 static struct netif_rx_stats *softnet_get_online(loff_t *pos) 1983 { 1984 struct netif_rx_stats *rc = NULL; 1985 1986 while (*pos < NR_CPUS) 1987 if (cpu_online(*pos)) { 1988 rc = &per_cpu(netdev_rx_stat, *pos); 1989 break; 1990 } else 1991 ++*pos; 1992 return rc; 1993 } 1994 1995 static void *softnet_seq_start(struct seq_file *seq, loff_t *pos) 1996 { 1997 return softnet_get_online(pos); 1998 } 1999 2000 static void *softnet_seq_next(struct seq_file *seq, void *v, loff_t *pos) 2001 { 2002 ++*pos; 2003 return softnet_get_online(pos); 2004 } 2005 2006 static void softnet_seq_stop(struct seq_file *seq, void *v) 2007 { 2008 } 2009 2010 static int softnet_seq_show(struct seq_file *seq, void *v) 2011 { 2012 struct netif_rx_stats *s = v; 2013 2014 seq_printf(seq, "%08x %08x %08x %08x %08x %08x %08x %08x %08x\n", 2015 s->total, s->dropped, s->time_squeeze, 0, 2016 0, 0, 0, 0, /* was fastroute */ 2017 s->cpu_collision ); 2018 return 0; 2019 } 2020 2021 static struct seq_operations dev_seq_ops = { 2022 .start = dev_seq_start, 2023 .next = dev_seq_next, 2024 .stop = dev_seq_stop, 2025 .show = dev_seq_show, 2026 }; 2027 2028 static int dev_seq_open(struct inode *inode, struct file *file) 2029 { 2030 return seq_open(file, &dev_seq_ops); 2031 } 2032 2033 static struct file_operations dev_seq_fops = { 2034 .owner = THIS_MODULE, 2035 .open = dev_seq_open, 2036 .read = seq_read, 2037 .llseek = seq_lseek, 2038 .release = seq_release, 2039 }; 2040 2041 static struct seq_operations softnet_seq_ops = { 2042 .start = softnet_seq_start, 2043 .next = softnet_seq_next, 2044 .stop = softnet_seq_stop, 2045 .show = softnet_seq_show, 2046 }; 2047 2048 static int softnet_seq_open(struct inode *inode, struct file *file) 2049 { 2050 return seq_open(file, &softnet_seq_ops); 2051 } 2052 2053 static struct file_operations softnet_seq_fops = { 2054 .owner = THIS_MODULE, 2055 .open = softnet_seq_open, 2056 .read = seq_read, 2057 .llseek = seq_lseek, 2058 .release = seq_release, 2059 }; 2060 2061 #ifdef CONFIG_WIRELESS_EXT 2062 extern int wireless_proc_init(void); 2063 #else 2064 #define wireless_proc_init() 0 2065 #endif 2066 2067 static int __init dev_proc_init(void) 2068 { 2069 int rc = -ENOMEM; 2070 2071 if (!proc_net_fops_create("dev", S_IRUGO, &dev_seq_fops)) 2072 goto out; 2073 if (!proc_net_fops_create("softnet_stat", S_IRUGO, &softnet_seq_fops)) 2074 goto out_dev; 2075 if (wireless_proc_init()) 2076 goto out_softnet; 2077 rc = 0; 2078 out: 2079 return rc; 2080 out_softnet: 2081 proc_net_remove("softnet_stat"); 2082 out_dev: 2083 proc_net_remove("dev"); 2084 goto out; 2085 } 2086 #else 2087 #define dev_proc_init() 0 2088 #endif /* CONFIG_PROC_FS */ 2089 2090 2091 /** 2092 * netdev_set_master - set up master/slave pair 2093 * @slave: slave device 2094 * @master: new master device 2095 * 2096 * Changes the master device of the slave. Pass %NULL to break the 2097 * bonding. The caller must hold the RTNL semaphore. On a failure 2098 * a negative errno code is returned. On success the reference counts 2099 * are adjusted, %RTM_NEWLINK is sent to the routing socket and the 2100 * function returns zero. 2101 */ 2102 int netdev_set_master(struct net_device *slave, struct net_device *master) 2103 { 2104 struct net_device *old = slave->master; 2105 2106 ASSERT_RTNL(); 2107 2108 if (master) { 2109 if (old) 2110 return -EBUSY; 2111 dev_hold(master); 2112 } 2113 2114 slave->master = master; 2115 2116 synchronize_net(); 2117 2118 if (old) 2119 dev_put(old); 2120 2121 if (master) 2122 slave->flags |= IFF_SLAVE; 2123 else 2124 slave->flags &= ~IFF_SLAVE; 2125 2126 rtmsg_ifinfo(RTM_NEWLINK, slave, IFF_SLAVE); 2127 return 0; 2128 } 2129 2130 /** 2131 * dev_set_promiscuity - update promiscuity count on a device 2132 * @dev: device 2133 * @inc: modifier 2134 * 2135 * Add or remove promsicuity from a device. While the count in the device 2136 * remains above zero the interface remains promiscuous. Once it hits zero 2137 * the device reverts back to normal filtering operation. A negative inc 2138 * value is used to drop promiscuity on the device. 2139 */ 2140 void dev_set_promiscuity(struct net_device *dev, int inc) 2141 { 2142 unsigned short old_flags = dev->flags; 2143 2144 if ((dev->promiscuity += inc) == 0) 2145 dev->flags &= ~IFF_PROMISC; 2146 else 2147 dev->flags |= IFF_PROMISC; 2148 if (dev->flags != old_flags) { 2149 dev_mc_upload(dev); 2150 printk(KERN_INFO "device %s %s promiscuous mode\n", 2151 dev->name, (dev->flags & IFF_PROMISC) ? "entered" : 2152 "left"); 2153 audit_log(current->audit_context, GFP_ATOMIC, 2154 AUDIT_ANOM_PROMISCUOUS, 2155 "dev=%s prom=%d old_prom=%d auid=%u", 2156 dev->name, (dev->flags & IFF_PROMISC), 2157 (old_flags & IFF_PROMISC), 2158 audit_get_loginuid(current->audit_context)); 2159 } 2160 } 2161 2162 /** 2163 * dev_set_allmulti - update allmulti count on a device 2164 * @dev: device 2165 * @inc: modifier 2166 * 2167 * Add or remove reception of all multicast frames to a device. While the 2168 * count in the device remains above zero the interface remains listening 2169 * to all interfaces. Once it hits zero the device reverts back to normal 2170 * filtering operation. A negative @inc value is used to drop the counter 2171 * when releasing a resource needing all multicasts. 2172 */ 2173 2174 void dev_set_allmulti(struct net_device *dev, int inc) 2175 { 2176 unsigned short old_flags = dev->flags; 2177 2178 dev->flags |= IFF_ALLMULTI; 2179 if ((dev->allmulti += inc) == 0) 2180 dev->flags &= ~IFF_ALLMULTI; 2181 if (dev->flags ^ old_flags) 2182 dev_mc_upload(dev); 2183 } 2184 2185 unsigned dev_get_flags(const struct net_device *dev) 2186 { 2187 unsigned flags; 2188 2189 flags = (dev->flags & ~(IFF_PROMISC | 2190 IFF_ALLMULTI | 2191 IFF_RUNNING | 2192 IFF_LOWER_UP | 2193 IFF_DORMANT)) | 2194 (dev->gflags & (IFF_PROMISC | 2195 IFF_ALLMULTI)); 2196 2197 if (netif_running(dev)) { 2198 if (netif_oper_up(dev)) 2199 flags |= IFF_RUNNING; 2200 if (netif_carrier_ok(dev)) 2201 flags |= IFF_LOWER_UP; 2202 if (netif_dormant(dev)) 2203 flags |= IFF_DORMANT; 2204 } 2205 2206 return flags; 2207 } 2208 2209 int dev_change_flags(struct net_device *dev, unsigned flags) 2210 { 2211 int ret; 2212 int old_flags = dev->flags; 2213 2214 /* 2215 * Set the flags on our device. 2216 */ 2217 2218 dev->flags = (flags & (IFF_DEBUG | IFF_NOTRAILERS | IFF_NOARP | 2219 IFF_DYNAMIC | IFF_MULTICAST | IFF_PORTSEL | 2220 IFF_AUTOMEDIA)) | 2221 (dev->flags & (IFF_UP | IFF_VOLATILE | IFF_PROMISC | 2222 IFF_ALLMULTI)); 2223 2224 /* 2225 * Load in the correct multicast list now the flags have changed. 2226 */ 2227 2228 dev_mc_upload(dev); 2229 2230 /* 2231 * Have we downed the interface. We handle IFF_UP ourselves 2232 * according to user attempts to set it, rather than blindly 2233 * setting it. 2234 */ 2235 2236 ret = 0; 2237 if ((old_flags ^ flags) & IFF_UP) { /* Bit is different ? */ 2238 ret = ((old_flags & IFF_UP) ? dev_close : dev_open)(dev); 2239 2240 if (!ret) 2241 dev_mc_upload(dev); 2242 } 2243 2244 if (dev->flags & IFF_UP && 2245 ((old_flags ^ dev->flags) &~ (IFF_UP | IFF_PROMISC | IFF_ALLMULTI | 2246 IFF_VOLATILE))) 2247 blocking_notifier_call_chain(&netdev_chain, 2248 NETDEV_CHANGE, dev); 2249 2250 if ((flags ^ dev->gflags) & IFF_PROMISC) { 2251 int inc = (flags & IFF_PROMISC) ? +1 : -1; 2252 dev->gflags ^= IFF_PROMISC; 2253 dev_set_promiscuity(dev, inc); 2254 } 2255 2256 /* NOTE: order of synchronization of IFF_PROMISC and IFF_ALLMULTI 2257 is important. Some (broken) drivers set IFF_PROMISC, when 2258 IFF_ALLMULTI is requested not asking us and not reporting. 2259 */ 2260 if ((flags ^ dev->gflags) & IFF_ALLMULTI) { 2261 int inc = (flags & IFF_ALLMULTI) ? +1 : -1; 2262 dev->gflags ^= IFF_ALLMULTI; 2263 dev_set_allmulti(dev, inc); 2264 } 2265 2266 if (old_flags ^ dev->flags) 2267 rtmsg_ifinfo(RTM_NEWLINK, dev, old_flags ^ dev->flags); 2268 2269 return ret; 2270 } 2271 2272 int dev_set_mtu(struct net_device *dev, int new_mtu) 2273 { 2274 int err; 2275 2276 if (new_mtu == dev->mtu) 2277 return 0; 2278 2279 /* MTU must be positive. */ 2280 if (new_mtu < 0) 2281 return -EINVAL; 2282 2283 if (!netif_device_present(dev)) 2284 return -ENODEV; 2285 2286 err = 0; 2287 if (dev->change_mtu) 2288 err = dev->change_mtu(dev, new_mtu); 2289 else 2290 dev->mtu = new_mtu; 2291 if (!err && dev->flags & IFF_UP) 2292 blocking_notifier_call_chain(&netdev_chain, 2293 NETDEV_CHANGEMTU, dev); 2294 return err; 2295 } 2296 2297 int dev_set_mac_address(struct net_device *dev, struct sockaddr *sa) 2298 { 2299 int err; 2300 2301 if (!dev->set_mac_address) 2302 return -EOPNOTSUPP; 2303 if (sa->sa_family != dev->type) 2304 return -EINVAL; 2305 if (!netif_device_present(dev)) 2306 return -ENODEV; 2307 err = dev->set_mac_address(dev, sa); 2308 if (!err) 2309 blocking_notifier_call_chain(&netdev_chain, 2310 NETDEV_CHANGEADDR, dev); 2311 return err; 2312 } 2313 2314 /* 2315 * Perform the SIOCxIFxxx calls. 2316 */ 2317 static int dev_ifsioc(struct ifreq *ifr, unsigned int cmd) 2318 { 2319 int err; 2320 struct net_device *dev = __dev_get_by_name(ifr->ifr_name); 2321 2322 if (!dev) 2323 return -ENODEV; 2324 2325 switch (cmd) { 2326 case SIOCGIFFLAGS: /* Get interface flags */ 2327 ifr->ifr_flags = dev_get_flags(dev); 2328 return 0; 2329 2330 case SIOCSIFFLAGS: /* Set interface flags */ 2331 return dev_change_flags(dev, ifr->ifr_flags); 2332 2333 case SIOCGIFMETRIC: /* Get the metric on the interface 2334 (currently unused) */ 2335 ifr->ifr_metric = 0; 2336 return 0; 2337 2338 case SIOCSIFMETRIC: /* Set the metric on the interface 2339 (currently unused) */ 2340 return -EOPNOTSUPP; 2341 2342 case SIOCGIFMTU: /* Get the MTU of a device */ 2343 ifr->ifr_mtu = dev->mtu; 2344 return 0; 2345 2346 case SIOCSIFMTU: /* Set the MTU of a device */ 2347 return dev_set_mtu(dev, ifr->ifr_mtu); 2348 2349 case SIOCGIFHWADDR: 2350 if (!dev->addr_len) 2351 memset(ifr->ifr_hwaddr.sa_data, 0, sizeof ifr->ifr_hwaddr.sa_data); 2352 else 2353 memcpy(ifr->ifr_hwaddr.sa_data, dev->dev_addr, 2354 min(sizeof ifr->ifr_hwaddr.sa_data, (size_t) dev->addr_len)); 2355 ifr->ifr_hwaddr.sa_family = dev->type; 2356 return 0; 2357 2358 case SIOCSIFHWADDR: 2359 return dev_set_mac_address(dev, &ifr->ifr_hwaddr); 2360 2361 case SIOCSIFHWBROADCAST: 2362 if (ifr->ifr_hwaddr.sa_family != dev->type) 2363 return -EINVAL; 2364 memcpy(dev->broadcast, ifr->ifr_hwaddr.sa_data, 2365 min(sizeof ifr->ifr_hwaddr.sa_data, (size_t) dev->addr_len)); 2366 blocking_notifier_call_chain(&netdev_chain, 2367 NETDEV_CHANGEADDR, dev); 2368 return 0; 2369 2370 case SIOCGIFMAP: 2371 ifr->ifr_map.mem_start = dev->mem_start; 2372 ifr->ifr_map.mem_end = dev->mem_end; 2373 ifr->ifr_map.base_addr = dev->base_addr; 2374 ifr->ifr_map.irq = dev->irq; 2375 ifr->ifr_map.dma = dev->dma; 2376 ifr->ifr_map.port = dev->if_port; 2377 return 0; 2378 2379 case SIOCSIFMAP: 2380 if (dev->set_config) { 2381 if (!netif_device_present(dev)) 2382 return -ENODEV; 2383 return dev->set_config(dev, &ifr->ifr_map); 2384 } 2385 return -EOPNOTSUPP; 2386 2387 case SIOCADDMULTI: 2388 if (!dev->set_multicast_list || 2389 ifr->ifr_hwaddr.sa_family != AF_UNSPEC) 2390 return -EINVAL; 2391 if (!netif_device_present(dev)) 2392 return -ENODEV; 2393 return dev_mc_add(dev, ifr->ifr_hwaddr.sa_data, 2394 dev->addr_len, 1); 2395 2396 case SIOCDELMULTI: 2397 if (!dev->set_multicast_list || 2398 ifr->ifr_hwaddr.sa_family != AF_UNSPEC) 2399 return -EINVAL; 2400 if (!netif_device_present(dev)) 2401 return -ENODEV; 2402 return dev_mc_delete(dev, ifr->ifr_hwaddr.sa_data, 2403 dev->addr_len, 1); 2404 2405 case SIOCGIFINDEX: 2406 ifr->ifr_ifindex = dev->ifindex; 2407 return 0; 2408 2409 case SIOCGIFTXQLEN: 2410 ifr->ifr_qlen = dev->tx_queue_len; 2411 return 0; 2412 2413 case SIOCSIFTXQLEN: 2414 if (ifr->ifr_qlen < 0) 2415 return -EINVAL; 2416 dev->tx_queue_len = ifr->ifr_qlen; 2417 return 0; 2418 2419 case SIOCSIFNAME: 2420 ifr->ifr_newname[IFNAMSIZ-1] = '\0'; 2421 return dev_change_name(dev, ifr->ifr_newname); 2422 2423 /* 2424 * Unknown or private ioctl 2425 */ 2426 2427 default: 2428 if ((cmd >= SIOCDEVPRIVATE && 2429 cmd <= SIOCDEVPRIVATE + 15) || 2430 cmd == SIOCBONDENSLAVE || 2431 cmd == SIOCBONDRELEASE || 2432 cmd == SIOCBONDSETHWADDR || 2433 cmd == SIOCBONDSLAVEINFOQUERY || 2434 cmd == SIOCBONDINFOQUERY || 2435 cmd == SIOCBONDCHANGEACTIVE || 2436 cmd == SIOCGMIIPHY || 2437 cmd == SIOCGMIIREG || 2438 cmd == SIOCSMIIREG || 2439 cmd == SIOCBRADDIF || 2440 cmd == SIOCBRDELIF || 2441 cmd == SIOCWANDEV) { 2442 err = -EOPNOTSUPP; 2443 if (dev->do_ioctl) { 2444 if (netif_device_present(dev)) 2445 err = dev->do_ioctl(dev, ifr, 2446 cmd); 2447 else 2448 err = -ENODEV; 2449 } 2450 } else 2451 err = -EINVAL; 2452 2453 } 2454 return err; 2455 } 2456 2457 /* 2458 * This function handles all "interface"-type I/O control requests. The actual 2459 * 'doing' part of this is dev_ifsioc above. 2460 */ 2461 2462 /** 2463 * dev_ioctl - network device ioctl 2464 * @cmd: command to issue 2465 * @arg: pointer to a struct ifreq in user space 2466 * 2467 * Issue ioctl functions to devices. This is normally called by the 2468 * user space syscall interfaces but can sometimes be useful for 2469 * other purposes. The return value is the return from the syscall if 2470 * positive or a negative errno code on error. 2471 */ 2472 2473 int dev_ioctl(unsigned int cmd, void __user *arg) 2474 { 2475 struct ifreq ifr; 2476 int ret; 2477 char *colon; 2478 2479 /* One special case: SIOCGIFCONF takes ifconf argument 2480 and requires shared lock, because it sleeps writing 2481 to user space. 2482 */ 2483 2484 if (cmd == SIOCGIFCONF) { 2485 rtnl_lock(); 2486 ret = dev_ifconf((char __user *) arg); 2487 rtnl_unlock(); 2488 return ret; 2489 } 2490 if (cmd == SIOCGIFNAME) 2491 return dev_ifname((struct ifreq __user *)arg); 2492 2493 if (copy_from_user(&ifr, arg, sizeof(struct ifreq))) 2494 return -EFAULT; 2495 2496 ifr.ifr_name[IFNAMSIZ-1] = 0; 2497 2498 colon = strchr(ifr.ifr_name, ':'); 2499 if (colon) 2500 *colon = 0; 2501 2502 /* 2503 * See which interface the caller is talking about. 2504 */ 2505 2506 switch (cmd) { 2507 /* 2508 * These ioctl calls: 2509 * - can be done by all. 2510 * - atomic and do not require locking. 2511 * - return a value 2512 */ 2513 case SIOCGIFFLAGS: 2514 case SIOCGIFMETRIC: 2515 case SIOCGIFMTU: 2516 case SIOCGIFHWADDR: 2517 case SIOCGIFSLAVE: 2518 case SIOCGIFMAP: 2519 case SIOCGIFINDEX: 2520 case SIOCGIFTXQLEN: 2521 dev_load(ifr.ifr_name); 2522 read_lock(&dev_base_lock); 2523 ret = dev_ifsioc(&ifr, cmd); 2524 read_unlock(&dev_base_lock); 2525 if (!ret) { 2526 if (colon) 2527 *colon = ':'; 2528 if (copy_to_user(arg, &ifr, 2529 sizeof(struct ifreq))) 2530 ret = -EFAULT; 2531 } 2532 return ret; 2533 2534 case SIOCETHTOOL: 2535 dev_load(ifr.ifr_name); 2536 rtnl_lock(); 2537 ret = dev_ethtool(&ifr); 2538 rtnl_unlock(); 2539 if (!ret) { 2540 if (colon) 2541 *colon = ':'; 2542 if (copy_to_user(arg, &ifr, 2543 sizeof(struct ifreq))) 2544 ret = -EFAULT; 2545 } 2546 return ret; 2547 2548 /* 2549 * These ioctl calls: 2550 * - require superuser power. 2551 * - require strict serialization. 2552 * - return a value 2553 */ 2554 case SIOCGMIIPHY: 2555 case SIOCGMIIREG: 2556 case SIOCSIFNAME: 2557 if (!capable(CAP_NET_ADMIN)) 2558 return -EPERM; 2559 dev_load(ifr.ifr_name); 2560 rtnl_lock(); 2561 ret = dev_ifsioc(&ifr, cmd); 2562 rtnl_unlock(); 2563 if (!ret) { 2564 if (colon) 2565 *colon = ':'; 2566 if (copy_to_user(arg, &ifr, 2567 sizeof(struct ifreq))) 2568 ret = -EFAULT; 2569 } 2570 return ret; 2571 2572 /* 2573 * These ioctl calls: 2574 * - require superuser power. 2575 * - require strict serialization. 2576 * - do not return a value 2577 */ 2578 case SIOCSIFFLAGS: 2579 case SIOCSIFMETRIC: 2580 case SIOCSIFMTU: 2581 case SIOCSIFMAP: 2582 case SIOCSIFHWADDR: 2583 case SIOCSIFSLAVE: 2584 case SIOCADDMULTI: 2585 case SIOCDELMULTI: 2586 case SIOCSIFHWBROADCAST: 2587 case SIOCSIFTXQLEN: 2588 case SIOCSMIIREG: 2589 case SIOCBONDENSLAVE: 2590 case SIOCBONDRELEASE: 2591 case SIOCBONDSETHWADDR: 2592 case SIOCBONDCHANGEACTIVE: 2593 case SIOCBRADDIF: 2594 case SIOCBRDELIF: 2595 if (!capable(CAP_NET_ADMIN)) 2596 return -EPERM; 2597 /* fall through */ 2598 case SIOCBONDSLAVEINFOQUERY: 2599 case SIOCBONDINFOQUERY: 2600 dev_load(ifr.ifr_name); 2601 rtnl_lock(); 2602 ret = dev_ifsioc(&ifr, cmd); 2603 rtnl_unlock(); 2604 return ret; 2605 2606 case SIOCGIFMEM: 2607 /* Get the per device memory space. We can add this but 2608 * currently do not support it */ 2609 case SIOCSIFMEM: 2610 /* Set the per device memory buffer space. 2611 * Not applicable in our case */ 2612 case SIOCSIFLINK: 2613 return -EINVAL; 2614 2615 /* 2616 * Unknown or private ioctl. 2617 */ 2618 default: 2619 if (cmd == SIOCWANDEV || 2620 (cmd >= SIOCDEVPRIVATE && 2621 cmd <= SIOCDEVPRIVATE + 15)) { 2622 dev_load(ifr.ifr_name); 2623 rtnl_lock(); 2624 ret = dev_ifsioc(&ifr, cmd); 2625 rtnl_unlock(); 2626 if (!ret && copy_to_user(arg, &ifr, 2627 sizeof(struct ifreq))) 2628 ret = -EFAULT; 2629 return ret; 2630 } 2631 #ifdef CONFIG_WIRELESS_EXT 2632 /* Take care of Wireless Extensions */ 2633 if (cmd >= SIOCIWFIRST && cmd <= SIOCIWLAST) { 2634 /* If command is `set a parameter', or 2635 * `get the encoding parameters', check if 2636 * the user has the right to do it */ 2637 if (IW_IS_SET(cmd) || cmd == SIOCGIWENCODE) { 2638 if (!capable(CAP_NET_ADMIN)) 2639 return -EPERM; 2640 } 2641 dev_load(ifr.ifr_name); 2642 rtnl_lock(); 2643 /* Follow me in net/core/wireless.c */ 2644 ret = wireless_process_ioctl(&ifr, cmd); 2645 rtnl_unlock(); 2646 if (IW_IS_GET(cmd) && 2647 copy_to_user(arg, &ifr, 2648 sizeof(struct ifreq))) 2649 ret = -EFAULT; 2650 return ret; 2651 } 2652 #endif /* CONFIG_WIRELESS_EXT */ 2653 return -EINVAL; 2654 } 2655 } 2656 2657 2658 /** 2659 * dev_new_index - allocate an ifindex 2660 * 2661 * Returns a suitable unique value for a new device interface 2662 * number. The caller must hold the rtnl semaphore or the 2663 * dev_base_lock to be sure it remains unique. 2664 */ 2665 static int dev_new_index(void) 2666 { 2667 static int ifindex; 2668 for (;;) { 2669 if (++ifindex <= 0) 2670 ifindex = 1; 2671 if (!__dev_get_by_index(ifindex)) 2672 return ifindex; 2673 } 2674 } 2675 2676 static int dev_boot_phase = 1; 2677 2678 /* Delayed registration/unregisteration */ 2679 static DEFINE_SPINLOCK(net_todo_list_lock); 2680 static struct list_head net_todo_list = LIST_HEAD_INIT(net_todo_list); 2681 2682 static inline void net_set_todo(struct net_device *dev) 2683 { 2684 spin_lock(&net_todo_list_lock); 2685 list_add_tail(&dev->todo_list, &net_todo_list); 2686 spin_unlock(&net_todo_list_lock); 2687 } 2688 2689 /** 2690 * register_netdevice - register a network device 2691 * @dev: device to register 2692 * 2693 * Take a completed network device structure and add it to the kernel 2694 * interfaces. A %NETDEV_REGISTER message is sent to the netdev notifier 2695 * chain. 0 is returned on success. A negative errno code is returned 2696 * on a failure to set up the device, or if the name is a duplicate. 2697 * 2698 * Callers must hold the rtnl semaphore. You may want 2699 * register_netdev() instead of this. 2700 * 2701 * BUGS: 2702 * The locking appears insufficient to guarantee two parallel registers 2703 * will not get the same name. 2704 */ 2705 2706 int register_netdevice(struct net_device *dev) 2707 { 2708 struct hlist_head *head; 2709 struct hlist_node *p; 2710 int ret; 2711 2712 BUG_ON(dev_boot_phase); 2713 ASSERT_RTNL(); 2714 2715 /* When net_device's are persistent, this will be fatal. */ 2716 BUG_ON(dev->reg_state != NETREG_UNINITIALIZED); 2717 2718 spin_lock_init(&dev->queue_lock); 2719 spin_lock_init(&dev->xmit_lock); 2720 dev->xmit_lock_owner = -1; 2721 #ifdef CONFIG_NET_CLS_ACT 2722 spin_lock_init(&dev->ingress_lock); 2723 #endif 2724 2725 ret = alloc_divert_blk(dev); 2726 if (ret) 2727 goto out; 2728 2729 dev->iflink = -1; 2730 2731 /* Init, if this function is available */ 2732 if (dev->init) { 2733 ret = dev->init(dev); 2734 if (ret) { 2735 if (ret > 0) 2736 ret = -EIO; 2737 goto out_err; 2738 } 2739 } 2740 2741 if (!dev_valid_name(dev->name)) { 2742 ret = -EINVAL; 2743 goto out_err; 2744 } 2745 2746 dev->ifindex = dev_new_index(); 2747 if (dev->iflink == -1) 2748 dev->iflink = dev->ifindex; 2749 2750 /* Check for existence of name */ 2751 head = dev_name_hash(dev->name); 2752 hlist_for_each(p, head) { 2753 struct net_device *d 2754 = hlist_entry(p, struct net_device, name_hlist); 2755 if (!strncmp(d->name, dev->name, IFNAMSIZ)) { 2756 ret = -EEXIST; 2757 goto out_err; 2758 } 2759 } 2760 2761 /* Fix illegal SG+CSUM combinations. */ 2762 if ((dev->features & NETIF_F_SG) && 2763 !(dev->features & (NETIF_F_IP_CSUM | 2764 NETIF_F_NO_CSUM | 2765 NETIF_F_HW_CSUM))) { 2766 printk("%s: Dropping NETIF_F_SG since no checksum feature.\n", 2767 dev->name); 2768 dev->features &= ~NETIF_F_SG; 2769 } 2770 2771 /* TSO requires that SG is present as well. */ 2772 if ((dev->features & NETIF_F_TSO) && 2773 !(dev->features & NETIF_F_SG)) { 2774 printk("%s: Dropping NETIF_F_TSO since no SG feature.\n", 2775 dev->name); 2776 dev->features &= ~NETIF_F_TSO; 2777 } 2778 if (dev->features & NETIF_F_UFO) { 2779 if (!(dev->features & NETIF_F_HW_CSUM)) { 2780 printk(KERN_ERR "%s: Dropping NETIF_F_UFO since no " 2781 "NETIF_F_HW_CSUM feature.\n", 2782 dev->name); 2783 dev->features &= ~NETIF_F_UFO; 2784 } 2785 if (!(dev->features & NETIF_F_SG)) { 2786 printk(KERN_ERR "%s: Dropping NETIF_F_UFO since no " 2787 "NETIF_F_SG feature.\n", 2788 dev->name); 2789 dev->features &= ~NETIF_F_UFO; 2790 } 2791 } 2792 2793 /* 2794 * nil rebuild_header routine, 2795 * that should be never called and used as just bug trap. 2796 */ 2797 2798 if (!dev->rebuild_header) 2799 dev->rebuild_header = default_rebuild_header; 2800 2801 /* 2802 * Default initial state at registry is that the 2803 * device is present. 2804 */ 2805 2806 set_bit(__LINK_STATE_PRESENT, &dev->state); 2807 2808 dev->next = NULL; 2809 dev_init_scheduler(dev); 2810 write_lock_bh(&dev_base_lock); 2811 *dev_tail = dev; 2812 dev_tail = &dev->next; 2813 hlist_add_head(&dev->name_hlist, head); 2814 hlist_add_head(&dev->index_hlist, dev_index_hash(dev->ifindex)); 2815 dev_hold(dev); 2816 dev->reg_state = NETREG_REGISTERING; 2817 write_unlock_bh(&dev_base_lock); 2818 2819 /* Notify protocols, that a new device appeared. */ 2820 blocking_notifier_call_chain(&netdev_chain, NETDEV_REGISTER, dev); 2821 2822 /* Finish registration after unlock */ 2823 net_set_todo(dev); 2824 ret = 0; 2825 2826 out: 2827 return ret; 2828 out_err: 2829 free_divert_blk(dev); 2830 goto out; 2831 } 2832 2833 /** 2834 * register_netdev - register a network device 2835 * @dev: device to register 2836 * 2837 * Take a completed network device structure and add it to the kernel 2838 * interfaces. A %NETDEV_REGISTER message is sent to the netdev notifier 2839 * chain. 0 is returned on success. A negative errno code is returned 2840 * on a failure to set up the device, or if the name is a duplicate. 2841 * 2842 * This is a wrapper around register_netdev that takes the rtnl semaphore 2843 * and expands the device name if you passed a format string to 2844 * alloc_netdev. 2845 */ 2846 int register_netdev(struct net_device *dev) 2847 { 2848 int err; 2849 2850 rtnl_lock(); 2851 2852 /* 2853 * If the name is a format string the caller wants us to do a 2854 * name allocation. 2855 */ 2856 if (strchr(dev->name, '%')) { 2857 err = dev_alloc_name(dev, dev->name); 2858 if (err < 0) 2859 goto out; 2860 } 2861 2862 /* 2863 * Back compatibility hook. Kill this one in 2.5 2864 */ 2865 if (dev->name[0] == 0 || dev->name[0] == ' ') { 2866 err = dev_alloc_name(dev, "eth%d"); 2867 if (err < 0) 2868 goto out; 2869 } 2870 2871 err = register_netdevice(dev); 2872 out: 2873 rtnl_unlock(); 2874 return err; 2875 } 2876 EXPORT_SYMBOL(register_netdev); 2877 2878 /* 2879 * netdev_wait_allrefs - wait until all references are gone. 2880 * 2881 * This is called when unregistering network devices. 2882 * 2883 * Any protocol or device that holds a reference should register 2884 * for netdevice notification, and cleanup and put back the 2885 * reference if they receive an UNREGISTER event. 2886 * We can get stuck here if buggy protocols don't correctly 2887 * call dev_put. 2888 */ 2889 static void netdev_wait_allrefs(struct net_device *dev) 2890 { 2891 unsigned long rebroadcast_time, warning_time; 2892 2893 rebroadcast_time = warning_time = jiffies; 2894 while (atomic_read(&dev->refcnt) != 0) { 2895 if (time_after(jiffies, rebroadcast_time + 1 * HZ)) { 2896 rtnl_lock(); 2897 2898 /* Rebroadcast unregister notification */ 2899 blocking_notifier_call_chain(&netdev_chain, 2900 NETDEV_UNREGISTER, dev); 2901 2902 if (test_bit(__LINK_STATE_LINKWATCH_PENDING, 2903 &dev->state)) { 2904 /* We must not have linkwatch events 2905 * pending on unregister. If this 2906 * happens, we simply run the queue 2907 * unscheduled, resulting in a noop 2908 * for this device. 2909 */ 2910 linkwatch_run_queue(); 2911 } 2912 2913 __rtnl_unlock(); 2914 2915 rebroadcast_time = jiffies; 2916 } 2917 2918 msleep(250); 2919 2920 if (time_after(jiffies, warning_time + 10 * HZ)) { 2921 printk(KERN_EMERG "unregister_netdevice: " 2922 "waiting for %s to become free. Usage " 2923 "count = %d\n", 2924 dev->name, atomic_read(&dev->refcnt)); 2925 warning_time = jiffies; 2926 } 2927 } 2928 } 2929 2930 /* The sequence is: 2931 * 2932 * rtnl_lock(); 2933 * ... 2934 * register_netdevice(x1); 2935 * register_netdevice(x2); 2936 * ... 2937 * unregister_netdevice(y1); 2938 * unregister_netdevice(y2); 2939 * ... 2940 * rtnl_unlock(); 2941 * free_netdev(y1); 2942 * free_netdev(y2); 2943 * 2944 * We are invoked by rtnl_unlock() after it drops the semaphore. 2945 * This allows us to deal with problems: 2946 * 1) We can create/delete sysfs objects which invoke hotplug 2947 * without deadlocking with linkwatch via keventd. 2948 * 2) Since we run with the RTNL semaphore not held, we can sleep 2949 * safely in order to wait for the netdev refcnt to drop to zero. 2950 */ 2951 static DEFINE_MUTEX(net_todo_run_mutex); 2952 void netdev_run_todo(void) 2953 { 2954 struct list_head list = LIST_HEAD_INIT(list); 2955 int err; 2956 2957 2958 /* Need to guard against multiple cpu's getting out of order. */ 2959 mutex_lock(&net_todo_run_mutex); 2960 2961 /* Not safe to do outside the semaphore. We must not return 2962 * until all unregister events invoked by the local processor 2963 * have been completed (either by this todo run, or one on 2964 * another cpu). 2965 */ 2966 if (list_empty(&net_todo_list)) 2967 goto out; 2968 2969 /* Snapshot list, allow later requests */ 2970 spin_lock(&net_todo_list_lock); 2971 list_splice_init(&net_todo_list, &list); 2972 spin_unlock(&net_todo_list_lock); 2973 2974 while (!list_empty(&list)) { 2975 struct net_device *dev 2976 = list_entry(list.next, struct net_device, todo_list); 2977 list_del(&dev->todo_list); 2978 2979 switch(dev->reg_state) { 2980 case NETREG_REGISTERING: 2981 err = netdev_register_sysfs(dev); 2982 if (err) 2983 printk(KERN_ERR "%s: failed sysfs registration (%d)\n", 2984 dev->name, err); 2985 dev->reg_state = NETREG_REGISTERED; 2986 break; 2987 2988 case NETREG_UNREGISTERING: 2989 netdev_unregister_sysfs(dev); 2990 dev->reg_state = NETREG_UNREGISTERED; 2991 2992 netdev_wait_allrefs(dev); 2993 2994 /* paranoia */ 2995 BUG_ON(atomic_read(&dev->refcnt)); 2996 BUG_TRAP(!dev->ip_ptr); 2997 BUG_TRAP(!dev->ip6_ptr); 2998 BUG_TRAP(!dev->dn_ptr); 2999 3000 3001 /* It must be the very last action, 3002 * after this 'dev' may point to freed up memory. 3003 */ 3004 if (dev->destructor) 3005 dev->destructor(dev); 3006 break; 3007 3008 default: 3009 printk(KERN_ERR "network todo '%s' but state %d\n", 3010 dev->name, dev->reg_state); 3011 break; 3012 } 3013 } 3014 3015 out: 3016 mutex_unlock(&net_todo_run_mutex); 3017 } 3018 3019 /** 3020 * alloc_netdev - allocate network device 3021 * @sizeof_priv: size of private data to allocate space for 3022 * @name: device name format string 3023 * @setup: callback to initialize device 3024 * 3025 * Allocates a struct net_device with private data area for driver use 3026 * and performs basic initialization. 3027 */ 3028 struct net_device *alloc_netdev(int sizeof_priv, const char *name, 3029 void (*setup)(struct net_device *)) 3030 { 3031 void *p; 3032 struct net_device *dev; 3033 int alloc_size; 3034 3035 /* ensure 32-byte alignment of both the device and private area */ 3036 alloc_size = (sizeof(*dev) + NETDEV_ALIGN_CONST) & ~NETDEV_ALIGN_CONST; 3037 alloc_size += sizeof_priv + NETDEV_ALIGN_CONST; 3038 3039 p = kmalloc(alloc_size, GFP_KERNEL); 3040 if (!p) { 3041 printk(KERN_ERR "alloc_dev: Unable to allocate device.\n"); 3042 return NULL; 3043 } 3044 memset(p, 0, alloc_size); 3045 3046 dev = (struct net_device *) 3047 (((long)p + NETDEV_ALIGN_CONST) & ~NETDEV_ALIGN_CONST); 3048 dev->padded = (char *)dev - (char *)p; 3049 3050 if (sizeof_priv) 3051 dev->priv = netdev_priv(dev); 3052 3053 setup(dev); 3054 strcpy(dev->name, name); 3055 return dev; 3056 } 3057 EXPORT_SYMBOL(alloc_netdev); 3058 3059 /** 3060 * free_netdev - free network device 3061 * @dev: device 3062 * 3063 * This function does the last stage of destroying an allocated device 3064 * interface. The reference to the device object is released. 3065 * If this is the last reference then it will be freed. 3066 */ 3067 void free_netdev(struct net_device *dev) 3068 { 3069 #ifdef CONFIG_SYSFS 3070 /* Compatiablity with error handling in drivers */ 3071 if (dev->reg_state == NETREG_UNINITIALIZED) { 3072 kfree((char *)dev - dev->padded); 3073 return; 3074 } 3075 3076 BUG_ON(dev->reg_state != NETREG_UNREGISTERED); 3077 dev->reg_state = NETREG_RELEASED; 3078 3079 /* will free via class release */ 3080 class_device_put(&dev->class_dev); 3081 #else 3082 kfree((char *)dev - dev->padded); 3083 #endif 3084 } 3085 3086 /* Synchronize with packet receive processing. */ 3087 void synchronize_net(void) 3088 { 3089 might_sleep(); 3090 synchronize_rcu(); 3091 } 3092 3093 /** 3094 * unregister_netdevice - remove device from the kernel 3095 * @dev: device 3096 * 3097 * This function shuts down a device interface and removes it 3098 * from the kernel tables. On success 0 is returned, on a failure 3099 * a negative errno code is returned. 3100 * 3101 * Callers must hold the rtnl semaphore. You may want 3102 * unregister_netdev() instead of this. 3103 */ 3104 3105 int unregister_netdevice(struct net_device *dev) 3106 { 3107 struct net_device *d, **dp; 3108 3109 BUG_ON(dev_boot_phase); 3110 ASSERT_RTNL(); 3111 3112 /* Some devices call without registering for initialization unwind. */ 3113 if (dev->reg_state == NETREG_UNINITIALIZED) { 3114 printk(KERN_DEBUG "unregister_netdevice: device %s/%p never " 3115 "was registered\n", dev->name, dev); 3116 return -ENODEV; 3117 } 3118 3119 BUG_ON(dev->reg_state != NETREG_REGISTERED); 3120 3121 /* If device is running, close it first. */ 3122 if (dev->flags & IFF_UP) 3123 dev_close(dev); 3124 3125 /* And unlink it from device chain. */ 3126 for (dp = &dev_base; (d = *dp) != NULL; dp = &d->next) { 3127 if (d == dev) { 3128 write_lock_bh(&dev_base_lock); 3129 hlist_del(&dev->name_hlist); 3130 hlist_del(&dev->index_hlist); 3131 if (dev_tail == &dev->next) 3132 dev_tail = dp; 3133 *dp = d->next; 3134 write_unlock_bh(&dev_base_lock); 3135 break; 3136 } 3137 } 3138 if (!d) { 3139 printk(KERN_ERR "unregister net_device: '%s' not found\n", 3140 dev->name); 3141 return -ENODEV; 3142 } 3143 3144 dev->reg_state = NETREG_UNREGISTERING; 3145 3146 synchronize_net(); 3147 3148 /* Shutdown queueing discipline. */ 3149 dev_shutdown(dev); 3150 3151 3152 /* Notify protocols, that we are about to destroy 3153 this device. They should clean all the things. 3154 */ 3155 blocking_notifier_call_chain(&netdev_chain, NETDEV_UNREGISTER, dev); 3156 3157 /* 3158 * Flush the multicast chain 3159 */ 3160 dev_mc_discard(dev); 3161 3162 if (dev->uninit) 3163 dev->uninit(dev); 3164 3165 /* Notifier chain MUST detach us from master device. */ 3166 BUG_TRAP(!dev->master); 3167 3168 free_divert_blk(dev); 3169 3170 /* Finish processing unregister after unlock */ 3171 net_set_todo(dev); 3172 3173 synchronize_net(); 3174 3175 dev_put(dev); 3176 return 0; 3177 } 3178 3179 /** 3180 * unregister_netdev - remove device from the kernel 3181 * @dev: device 3182 * 3183 * This function shuts down a device interface and removes it 3184 * from the kernel tables. On success 0 is returned, on a failure 3185 * a negative errno code is returned. 3186 * 3187 * This is just a wrapper for unregister_netdevice that takes 3188 * the rtnl semaphore. In general you want to use this and not 3189 * unregister_netdevice. 3190 */ 3191 void unregister_netdev(struct net_device *dev) 3192 { 3193 rtnl_lock(); 3194 unregister_netdevice(dev); 3195 rtnl_unlock(); 3196 } 3197 3198 EXPORT_SYMBOL(unregister_netdev); 3199 3200 #ifdef CONFIG_HOTPLUG_CPU 3201 static int dev_cpu_callback(struct notifier_block *nfb, 3202 unsigned long action, 3203 void *ocpu) 3204 { 3205 struct sk_buff **list_skb; 3206 struct net_device **list_net; 3207 struct sk_buff *skb; 3208 unsigned int cpu, oldcpu = (unsigned long)ocpu; 3209 struct softnet_data *sd, *oldsd; 3210 3211 if (action != CPU_DEAD) 3212 return NOTIFY_OK; 3213 3214 local_irq_disable(); 3215 cpu = smp_processor_id(); 3216 sd = &per_cpu(softnet_data, cpu); 3217 oldsd = &per_cpu(softnet_data, oldcpu); 3218 3219 /* Find end of our completion_queue. */ 3220 list_skb = &sd->completion_queue; 3221 while (*list_skb) 3222 list_skb = &(*list_skb)->next; 3223 /* Append completion queue from offline CPU. */ 3224 *list_skb = oldsd->completion_queue; 3225 oldsd->completion_queue = NULL; 3226 3227 /* Find end of our output_queue. */ 3228 list_net = &sd->output_queue; 3229 while (*list_net) 3230 list_net = &(*list_net)->next_sched; 3231 /* Append output queue from offline CPU. */ 3232 *list_net = oldsd->output_queue; 3233 oldsd->output_queue = NULL; 3234 3235 raise_softirq_irqoff(NET_TX_SOFTIRQ); 3236 local_irq_enable(); 3237 3238 /* Process offline CPU's input_pkt_queue */ 3239 while ((skb = __skb_dequeue(&oldsd->input_pkt_queue))) 3240 netif_rx(skb); 3241 3242 return NOTIFY_OK; 3243 } 3244 #endif /* CONFIG_HOTPLUG_CPU */ 3245 3246 3247 /* 3248 * Initialize the DEV module. At boot time this walks the device list and 3249 * unhooks any devices that fail to initialise (normally hardware not 3250 * present) and leaves us with a valid list of present and active devices. 3251 * 3252 */ 3253 3254 /* 3255 * This is called single threaded during boot, so no need 3256 * to take the rtnl semaphore. 3257 */ 3258 static int __init net_dev_init(void) 3259 { 3260 int i, rc = -ENOMEM; 3261 3262 BUG_ON(!dev_boot_phase); 3263 3264 net_random_init(); 3265 3266 if (dev_proc_init()) 3267 goto out; 3268 3269 if (netdev_sysfs_init()) 3270 goto out; 3271 3272 INIT_LIST_HEAD(&ptype_all); 3273 for (i = 0; i < 16; i++) 3274 INIT_LIST_HEAD(&ptype_base[i]); 3275 3276 for (i = 0; i < ARRAY_SIZE(dev_name_head); i++) 3277 INIT_HLIST_HEAD(&dev_name_head[i]); 3278 3279 for (i = 0; i < ARRAY_SIZE(dev_index_head); i++) 3280 INIT_HLIST_HEAD(&dev_index_head[i]); 3281 3282 /* 3283 * Initialise the packet receive queues. 3284 */ 3285 3286 for_each_cpu(i) { 3287 struct softnet_data *queue; 3288 3289 queue = &per_cpu(softnet_data, i); 3290 skb_queue_head_init(&queue->input_pkt_queue); 3291 queue->completion_queue = NULL; 3292 INIT_LIST_HEAD(&queue->poll_list); 3293 set_bit(__LINK_STATE_START, &queue->backlog_dev.state); 3294 queue->backlog_dev.weight = weight_p; 3295 queue->backlog_dev.poll = process_backlog; 3296 atomic_set(&queue->backlog_dev.refcnt, 1); 3297 } 3298 3299 dev_boot_phase = 0; 3300 3301 open_softirq(NET_TX_SOFTIRQ, net_tx_action, NULL); 3302 open_softirq(NET_RX_SOFTIRQ, net_rx_action, NULL); 3303 3304 hotcpu_notifier(dev_cpu_callback, 0); 3305 dst_init(); 3306 dev_mcast_init(); 3307 rc = 0; 3308 out: 3309 return rc; 3310 } 3311 3312 subsys_initcall(net_dev_init); 3313 3314 EXPORT_SYMBOL(__dev_get_by_index); 3315 EXPORT_SYMBOL(__dev_get_by_name); 3316 EXPORT_SYMBOL(__dev_remove_pack); 3317 EXPORT_SYMBOL(__skb_linearize); 3318 EXPORT_SYMBOL(dev_valid_name); 3319 EXPORT_SYMBOL(dev_add_pack); 3320 EXPORT_SYMBOL(dev_alloc_name); 3321 EXPORT_SYMBOL(dev_close); 3322 EXPORT_SYMBOL(dev_get_by_flags); 3323 EXPORT_SYMBOL(dev_get_by_index); 3324 EXPORT_SYMBOL(dev_get_by_name); 3325 EXPORT_SYMBOL(dev_open); 3326 EXPORT_SYMBOL(dev_queue_xmit); 3327 EXPORT_SYMBOL(dev_remove_pack); 3328 EXPORT_SYMBOL(dev_set_allmulti); 3329 EXPORT_SYMBOL(dev_set_promiscuity); 3330 EXPORT_SYMBOL(dev_change_flags); 3331 EXPORT_SYMBOL(dev_set_mtu); 3332 EXPORT_SYMBOL(dev_set_mac_address); 3333 EXPORT_SYMBOL(free_netdev); 3334 EXPORT_SYMBOL(netdev_boot_setup_check); 3335 EXPORT_SYMBOL(netdev_set_master); 3336 EXPORT_SYMBOL(netdev_state_change); 3337 EXPORT_SYMBOL(netif_receive_skb); 3338 EXPORT_SYMBOL(netif_rx); 3339 EXPORT_SYMBOL(register_gifconf); 3340 EXPORT_SYMBOL(register_netdevice); 3341 EXPORT_SYMBOL(register_netdevice_notifier); 3342 EXPORT_SYMBOL(skb_checksum_help); 3343 EXPORT_SYMBOL(synchronize_net); 3344 EXPORT_SYMBOL(unregister_netdevice); 3345 EXPORT_SYMBOL(unregister_netdevice_notifier); 3346 EXPORT_SYMBOL(net_enable_timestamp); 3347 EXPORT_SYMBOL(net_disable_timestamp); 3348 EXPORT_SYMBOL(dev_get_flags); 3349 3350 #if defined(CONFIG_BRIDGE) || defined(CONFIG_BRIDGE_MODULE) 3351 EXPORT_SYMBOL(br_handle_frame_hook); 3352 EXPORT_SYMBOL(br_fdb_get_hook); 3353 EXPORT_SYMBOL(br_fdb_put_hook); 3354 #endif 3355 3356 #ifdef CONFIG_KMOD 3357 EXPORT_SYMBOL(dev_load); 3358 #endif 3359 3360 EXPORT_PER_CPU_SYMBOL(softnet_data); 3361