1 /* 2 * originally based on the dummy device. 3 * 4 * Copyright 1999, Thomas Davis, tadavis@lbl.gov. 5 * Licensed under the GPL. Based on dummy.c, and eql.c devices. 6 * 7 * bonding.c: an Ethernet Bonding driver 8 * 9 * This is useful to talk to a Cisco EtherChannel compatible equipment: 10 * Cisco 5500 11 * Sun Trunking (Solaris) 12 * Alteon AceDirector Trunks 13 * Linux Bonding 14 * and probably many L2 switches ... 15 * 16 * How it works: 17 * ifconfig bond0 ipaddress netmask up 18 * will setup a network device, with an ip address. No mac address 19 * will be assigned at this time. The hw mac address will come from 20 * the first slave bonded to the channel. All slaves will then use 21 * this hw mac address. 22 * 23 * ifconfig bond0 down 24 * will release all slaves, marking them as down. 25 * 26 * ifenslave bond0 eth0 27 * will attach eth0 to bond0 as a slave. eth0 hw mac address will either 28 * a: be used as initial mac address 29 * b: if a hw mac address already is there, eth0's hw mac address 30 * will then be set from bond0. 31 * 32 * v0.1 - first working version. 33 * v0.2 - changed stats to be calculated by summing slaves stats. 34 * 35 * Changes: 36 * Arnaldo Carvalho de Melo <acme@conectiva.com.br> 37 * - fix leaks on failure at bond_init 38 * 39 * 2000/09/30 - Willy Tarreau <willy at meta-x.org> 40 * - added trivial code to release a slave device. 41 * - fixed security bug (CAP_NET_ADMIN not checked) 42 * - implemented MII link monitoring to disable dead links : 43 * All MII capable slaves are checked every <miimon> milliseconds 44 * (100 ms seems good). This value can be changed by passing it to 45 * insmod. A value of zero disables the monitoring (default). 46 * - fixed an infinite loop in bond_xmit_roundrobin() when there's no 47 * good slave. 48 * - made the code hopefully SMP safe 49 * 50 * 2000/10/03 - Willy Tarreau <willy at meta-x.org> 51 * - optimized slave lists based on relevant suggestions from Thomas Davis 52 * - implemented active-backup method to obtain HA with two switches: 53 * stay as long as possible on the same active interface, while we 54 * also monitor the backup one (MII link status) because we want to know 55 * if we are able to switch at any time. ( pass "mode=1" to insmod ) 56 * - lots of stress testings because we need it to be more robust than the 57 * wires ! :-> 58 * 59 * 2000/10/09 - Willy Tarreau <willy at meta-x.org> 60 * - added up and down delays after link state change. 61 * - optimized the slaves chaining so that when we run forward, we never 62 * repass through the bond itself, but we can find it by searching 63 * backwards. Renders the deletion more difficult, but accelerates the 64 * scan. 65 * - smarter enslaving and releasing. 66 * - finer and more robust SMP locking 67 * 68 * 2000/10/17 - Willy Tarreau <willy at meta-x.org> 69 * - fixed two potential SMP race conditions 70 * 71 * 2000/10/18 - Willy Tarreau <willy at meta-x.org> 72 * - small fixes to the monitoring FSM in case of zero delays 73 * 2000/11/01 - Willy Tarreau <willy at meta-x.org> 74 * - fixed first slave not automatically used in trunk mode. 75 * 2000/11/10 : spelling of "EtherChannel" corrected. 76 * 2000/11/13 : fixed a race condition in case of concurrent accesses to ioctl(). 77 * 2000/12/16 : fixed improper usage of rtnl_exlock_nowait(). 78 * 79 * 2001/1/3 - Chad N. Tindel <ctindel at ieee dot org> 80 * - The bonding driver now simulates MII status monitoring, just like 81 * a normal network device. It will show that the link is down iff 82 * every slave in the bond shows that their links are down. If at least 83 * one slave is up, the bond's MII status will appear as up. 84 * 85 * 2001/2/7 - Chad N. Tindel <ctindel at ieee dot org> 86 * - Applications can now query the bond from user space to get 87 * information which may be useful. They do this by calling 88 * the BOND_INFO_QUERY ioctl. Once the app knows how many slaves 89 * are in the bond, it can call the BOND_SLAVE_INFO_QUERY ioctl to 90 * get slave specific information (# link failures, etc). See 91 * <linux/if_bonding.h> for more details. The structs of interest 92 * are ifbond and ifslave. 93 * 94 * 2001/4/5 - Chad N. Tindel <ctindel at ieee dot org> 95 * - Ported to 2.4 Kernel 96 * 97 * 2001/5/2 - Jeffrey E. Mast <jeff at mastfamily dot com> 98 * - When a device is detached from a bond, the slave device is no longer 99 * left thinking that is has a master. 100 * 101 * 2001/5/16 - Jeffrey E. Mast <jeff at mastfamily dot com> 102 * - memset did not appropriately initialized the bond rw_locks. Used 103 * rwlock_init to initialize to unlocked state to prevent deadlock when 104 * first attempting a lock 105 * - Called SET_MODULE_OWNER for bond device 106 * 107 * 2001/5/17 - Tim Anderson <tsa at mvista.com> 108 * - 2 paths for releasing for slave release; 1 through ioctl 109 * and 2) through close. Both paths need to release the same way. 110 * - the free slave in bond release is changing slave status before 111 * the free. The netdev_set_master() is intended to change slave state 112 * so it should not be done as part of the release process. 113 * - Simple rule for slave state at release: only the active in A/B and 114 * only one in the trunked case. 115 * 116 * 2001/6/01 - Tim Anderson <tsa at mvista.com> 117 * - Now call dev_close when releasing a slave so it doesn't screw up 118 * out routing table. 119 * 120 * 2001/6/01 - Chad N. Tindel <ctindel at ieee dot org> 121 * - Added /proc support for getting bond and slave information. 122 * Information is in /proc/net/<bond device>/info. 123 * - Changed the locking when calling bond_close to prevent deadlock. 124 * 125 * 2001/8/05 - Janice Girouard <girouard at us.ibm.com> 126 * - correct problem where refcnt of slave is not incremented in bond_ioctl 127 * so the system hangs when halting. 128 * - correct locking problem when unable to malloc in bond_enslave. 129 * - adding bond_xmit_xor logic. 130 * - adding multiple bond device support. 131 * 132 * 2001/8/13 - Erik Habbinga <erik_habbinga at hp dot com> 133 * - correct locking problem with rtnl_exlock_nowait 134 * 135 * 2001/8/23 - Janice Girouard <girouard at us.ibm.com> 136 * - bzero initial dev_bonds, to correct oops 137 * - convert SIOCDEVPRIVATE to new MII ioctl calls 138 * 139 * 2001/9/13 - Takao Indoh <indou dot takao at jp dot fujitsu dot com> 140 * - Add the BOND_CHANGE_ACTIVE ioctl implementation 141 * 142 * 2001/9/14 - Mark Huth <mhuth at mvista dot com> 143 * - Change MII_LINK_READY to not check for end of auto-negotiation, 144 * but only for an up link. 145 * 146 * 2001/9/20 - Chad N. Tindel <ctindel at ieee dot org> 147 * - Add the device field to bonding_t. Previously the net_device 148 * corresponding to a bond wasn't available from the bonding_t 149 * structure. 150 * 151 * 2001/9/25 - Janice Girouard <girouard at us.ibm.com> 152 * - add arp_monitor for active backup mode 153 * 154 * 2001/10/23 - Takao Indoh <indou dot takao at jp dot fujitsu dot com> 155 * - Various memory leak fixes 156 * 157 * 2001/11/5 - Mark Huth <mark dot huth at mvista dot com> 158 * - Don't take rtnl lock in bond_mii_monitor as it deadlocks under 159 * certain hotswap conditions. 160 * Note: this same change may be required in bond_arp_monitor ??? 161 * - Remove possibility of calling bond_sethwaddr with NULL slave_dev ptr 162 * - Handle hot swap ethernet interface deregistration events to remove 163 * kernel oops following hot swap of enslaved interface 164 * 165 * 2002/1/2 - Chad N. Tindel <ctindel at ieee dot org> 166 * - Restore original slave flags at release time. 167 * 168 * 2002/02/18 - Erik Habbinga <erik_habbinga at hp dot com> 169 * - bond_release(): calling kfree on our_slave after call to 170 * bond_restore_slave_flags, not before 171 * - bond_enslave(): saving slave flags into original_flags before 172 * call to netdev_set_master, so the IFF_SLAVE flag doesn't end 173 * up in original_flags 174 * 175 * 2002/04/05 - Mark Smith <mark.smith at comdev dot cc> and 176 * Steve Mead <steve.mead at comdev dot cc> 177 * - Port Gleb Natapov's multicast support patchs from 2.4.12 178 * to 2.4.18 adding support for multicast. 179 * 180 * 2002/06/10 - Tony Cureington <tony.cureington * hp_com> 181 * - corrected uninitialized pointer (ifr.ifr_data) in bond_check_dev_link; 182 * actually changed function to use MIIPHY, then MIIREG, and finally 183 * ETHTOOL to determine the link status 184 * - fixed bad ifr_data pointer assignments in bond_ioctl 185 * - corrected mode 1 being reported as active-backup in bond_get_info; 186 * also added text to distinguish type of load balancing (rr or xor) 187 * - change arp_ip_target module param from "1-12s" (array of 12 ptrs) 188 * to "s" (a single ptr) 189 * 190 * 2002/08/30 - Jay Vosburgh <fubar at us dot ibm dot com> 191 * - Removed acquisition of xmit_lock in set_multicast_list; caused 192 * deadlock on SMP (lock is held by caller). 193 * - Revamped SIOCGMIIPHY, SIOCGMIIREG portion of bond_check_dev_link(). 194 * 195 * 2002/09/18 - Jay Vosburgh <fubar at us dot ibm dot com> 196 * - Fixed up bond_check_dev_link() (and callers): removed some magic 197 * numbers, banished local MII_ defines, wrapped ioctl calls to 198 * prevent EFAULT errors 199 * 200 * 2002/9/30 - Jay Vosburgh <fubar at us dot ibm dot com> 201 * - make sure the ip target matches the arp_target before saving the 202 * hw address. 203 * 204 * 2002/9/30 - Dan Eisner <eisner at 2robots dot com> 205 * - make sure my_ip is set before taking down the link, since 206 * not all switches respond if the source ip is not set. 207 * 208 * 2002/10/8 - Janice Girouard <girouard at us dot ibm dot com> 209 * - read in the local ip address when enslaving a device 210 * - add primary support 211 * - make sure 2*arp_interval has passed when a new device 212 * is brought on-line before taking it down. 213 * 214 * 2002/09/11 - Philippe De Muyter <phdm at macqel dot be> 215 * - Added bond_xmit_broadcast logic. 216 * - Added bond_mode() support function. 217 * 218 * 2002/10/26 - Laurent Deniel <laurent.deniel at free.fr> 219 * - allow to register multicast addresses only on active slave 220 * (useful in active-backup mode) 221 * - add multicast module parameter 222 * - fix deletion of multicast groups after unloading module 223 * 224 * 2002/11/06 - Kameshwara Rayaprolu <kameshwara.rao * wipro_com> 225 * - Changes to prevent panic from closing the device twice; if we close 226 * the device in bond_release, we must set the original_flags to down 227 * so it won't be closed again by the network layer. 228 * 229 * 2002/11/07 - Tony Cureington <tony.cureington * hp_com> 230 * - Fix arp_target_hw_addr memory leak 231 * - Created activebackup_arp_monitor function to handle arp monitoring 232 * in active backup mode - the bond_arp_monitor had several problems... 233 * such as allowing slaves to tx arps sequentially without any delay 234 * for a response 235 * - Renamed bond_arp_monitor to loadbalance_arp_monitor and re-wrote 236 * this function to just handle arp monitoring in load-balancing mode; 237 * it is a lot more compact now 238 * - Changes to ensure one and only one slave transmits in active-backup 239 * mode 240 * - Robustesize parameters; warn users about bad combinations of 241 * parameters; also if miimon is specified and a network driver does 242 * not support MII or ETHTOOL, inform the user of this 243 * - Changes to support link_failure_count when in arp monitoring mode 244 * - Fix up/down delay reported in /proc 245 * - Added version; log version; make version available from "modinfo -d" 246 * - Fixed problem in bond_check_dev_link - if the first IOCTL (SIOCGMIIPH) 247 * failed, the ETHTOOL ioctl never got a chance 248 * 249 * 2002/11/16 - Laurent Deniel <laurent.deniel at free.fr> 250 * - fix multicast handling in activebackup_arp_monitor 251 * - remove one unnecessary and confusing curr_active_slave == slave test 252 * in activebackup_arp_monitor 253 * 254 * 2002/11/17 - Laurent Deniel <laurent.deniel at free.fr> 255 * - fix bond_slave_info_query when slave_id = num_slaves 256 * 257 * 2002/11/19 - Janice Girouard <girouard at us dot ibm dot com> 258 * - correct ifr_data reference. Update ifr_data reference 259 * to mii_ioctl_data struct values to avoid confusion. 260 * 261 * 2002/11/22 - Bert Barbe <bert.barbe at oracle dot com> 262 * - Add support for multiple arp_ip_target 263 * 264 * 2002/12/13 - Jay Vosburgh <fubar at us dot ibm dot com> 265 * - Changed to allow text strings for mode and multicast, e.g., 266 * insmod bonding mode=active-backup. The numbers still work. 267 * One change: an invalid choice will cause module load failure, 268 * rather than the previous behavior of just picking one. 269 * - Minor cleanups; got rid of dup ctype stuff, atoi function 270 * 271 * 2003/02/07 - Jay Vosburgh <fubar at us dot ibm dot com> 272 * - Added use_carrier module parameter that causes miimon to 273 * use netif_carrier_ok() test instead of MII/ETHTOOL ioctls. 274 * - Minor cleanups; consolidated ioctl calls to one function. 275 * 276 * 2003/02/07 - Tony Cureington <tony.cureington * hp_com> 277 * - Fix bond_mii_monitor() logic error that could result in 278 * bonding round-robin mode ignoring links after failover/recovery 279 * 280 * 2003/03/17 - Jay Vosburgh <fubar at us dot ibm dot com> 281 * - kmalloc fix (GFP_KERNEL to GFP_ATOMIC) reported by 282 * Shmulik dot Hen at intel.com. 283 * - Based on discussion on mailing list, changed use of 284 * update_slave_cnt(), created wrapper functions for adding/removing 285 * slaves, changed bond_xmit_xor() to check slave_cnt instead of 286 * checking slave and slave->dev (which only worked by accident). 287 * - Misc code cleanup: get arp_send() prototype from header file, 288 * add max_bonds to bonding.txt. 289 * 290 * 2003/03/18 - Tsippy Mendelson <tsippy.mendelson at intel dot com> and 291 * Shmulik Hen <shmulik.hen at intel dot com> 292 * - Make sure only bond_attach_slave() and bond_detach_slave() can 293 * manipulate the slave list, including slave_cnt, even when in 294 * bond_release_all(). 295 * - Fixed hang in bond_release() with traffic running: 296 * netdev_set_master() must not be called from within the bond lock. 297 * 298 * 2003/03/18 - Tsippy Mendelson <tsippy.mendelson at intel dot com> and 299 * Shmulik Hen <shmulik.hen at intel dot com> 300 * - Fixed hang in bond_enslave() with traffic running: 301 * netdev_set_master() must not be called from within the bond lock. 302 * 303 * 2003/03/18 - Amir Noam <amir.noam at intel dot com> 304 * - Added support for getting slave's speed and duplex via ethtool. 305 * Needed for 802.3ad and other future modes. 306 * 307 * 2003/03/18 - Tsippy Mendelson <tsippy.mendelson at intel dot com> and 308 * Shmulik Hen <shmulik.hen at intel dot com> 309 * - Enable support of modes that need to use the unique mac address of 310 * each slave. 311 * * bond_enslave(): Moved setting the slave's mac address, and 312 * openning it, from the application to the driver. This breaks 313 * backward comaptibility with old versions of ifenslave that open 314 * the slave before enalsving it !!!. 315 * * bond_release(): The driver also takes care of closing the slave 316 * and restoring its original mac address. 317 * - Removed the code that restores all base driver's flags. 318 * Flags are automatically restored once all undo stages are done 319 * properly. 320 * - Block possibility of enslaving before the master is up. This 321 * prevents putting the system in an unstable state. 322 * 323 * 2003/03/18 - Amir Noam <amir.noam at intel dot com>, 324 * Tsippy Mendelson <tsippy.mendelson at intel dot com> and 325 * Shmulik Hen <shmulik.hen at intel dot com> 326 * - Added support for IEEE 802.3ad Dynamic link aggregation mode. 327 * 328 * 2003/05/01 - Amir Noam <amir.noam at intel dot com> 329 * - Added ABI version control to restore compatibility between 330 * new/old ifenslave and new/old bonding. 331 * 332 * 2003/05/01 - Shmulik Hen <shmulik.hen at intel dot com> 333 * - Fixed bug in bond_release_all(): save old value of curr_active_slave 334 * before setting it to NULL. 335 * - Changed driver versioning scheme to include version number instead 336 * of release date (that is already in another field). There are 3 337 * fields X.Y.Z where: 338 * X - Major version - big behavior changes 339 * Y - Minor version - addition of features 340 * Z - Extra version - minor changes and bug fixes 341 * The current version is 1.0.0 as a base line. 342 * 343 * 2003/05/01 - Tsippy Mendelson <tsippy.mendelson at intel dot com> and 344 * Amir Noam <amir.noam at intel dot com> 345 * - Added support for lacp_rate module param. 346 * - Code beautification and style changes (mainly in comments). 347 * new version - 1.0.1 348 * 349 * 2003/05/01 - Shmulik Hen <shmulik.hen at intel dot com> 350 * - Based on discussion on mailing list, changed locking scheme 351 * to use lock/unlock or lock_bh/unlock_bh appropriately instead 352 * of lock_irqsave/unlock_irqrestore. The new scheme helps exposing 353 * hidden bugs and solves system hangs that occurred due to the fact 354 * that holding lock_irqsave doesn't prevent softirqs from running. 355 * This also increases total throughput since interrupts are not 356 * blocked on each transmitted packets or monitor timeout. 357 * new version - 2.0.0 358 * 359 * 2003/05/01 - Shmulik Hen <shmulik.hen at intel dot com> 360 * - Added support for Transmit load balancing mode. 361 * - Concentrate all assignments of curr_active_slave to a single point 362 * so specific modes can take actions when the primary adapter is 363 * changed. 364 * - Take the updelay parameter into consideration during bond_enslave 365 * since some adapters loose their link during setting the device. 366 * - Renamed bond_3ad_link_status_changed() to 367 * bond_3ad_handle_link_change() for compatibility with TLB. 368 * new version - 2.1.0 369 * 370 * 2003/05/01 - Tsippy Mendelson <tsippy.mendelson at intel dot com> 371 * - Added support for Adaptive load balancing mode which is 372 * equivalent to Transmit load balancing + Receive load balancing. 373 * new version - 2.2.0 374 * 375 * 2003/05/15 - Jay Vosburgh <fubar at us dot ibm dot com> 376 * - Applied fix to activebackup_arp_monitor posted to bonding-devel 377 * by Tony Cureington <tony.cureington * hp_com>. Fixes ARP 378 * monitor endless failover bug. Version to 2.2.10 379 * 380 * 2003/05/20 - Amir Noam <amir.noam at intel dot com> 381 * - Fixed bug in ABI version control - Don't commit to a specific 382 * ABI version if receiving unsupported ioctl commands. 383 * 384 * 2003/05/22 - Jay Vosburgh <fubar at us dot ibm dot com> 385 * - Fix ifenslave -c causing bond to loose existing routes; 386 * added bond_set_mac_address() that doesn't require the 387 * bond to be down. 388 * - In conjunction with fix for ifenslave -c, in 389 * bond_change_active(), changing to the already active slave 390 * is no longer an error (it successfully does nothing). 391 * 392 * 2003/06/30 - Amir Noam <amir.noam at intel dot com> 393 * - Fixed bond_change_active() for ALB/TLB modes. 394 * Version to 2.2.14. 395 * 396 * 2003/07/29 - Amir Noam <amir.noam at intel dot com> 397 * - Fixed ARP monitoring bug. 398 * Version to 2.2.15. 399 * 400 * 2003/07/31 - Willy Tarreau <willy at ods dot org> 401 * - Fixed kernel panic when using ARP monitoring without 402 * setting bond's IP address. 403 * Version to 2.2.16. 404 * 405 * 2003/08/06 - Amir Noam <amir.noam at intel dot com> 406 * - Back port from 2.6: use alloc_netdev(); fix /proc handling; 407 * made stats a part of bond struct so no need to allocate 408 * and free it separately; use standard list operations instead 409 * of pre-allocated array of bonds. 410 * Version to 2.3.0. 411 * 412 * 2003/08/07 - Jay Vosburgh <fubar at us dot ibm dot com>, 413 * Amir Noam <amir.noam at intel dot com> and 414 * Shmulik Hen <shmulik.hen at intel dot com> 415 * - Propagating master's settings: Distinguish between modes that 416 * use a primary slave from those that don't, and propagate settings 417 * accordingly; Consolidate change_active opeartions and add 418 * reselect_active and find_best opeartions; Decouple promiscuous 419 * handling from the multicast mode setting; Add support for changing 420 * HW address and MTU with proper unwind; Consolidate procfs code, 421 * add CHANGENAME handler; Enhance netdev notification handling. 422 * Version to 2.4.0. 423 * 424 * 2003/09/15 - Stephen Hemminger <shemminger at osdl dot org>, 425 * Amir Noam <amir.noam at intel dot com> 426 * - Convert /proc to seq_file interface. 427 * Change /proc/net/bondX/info to /proc/net/bonding/bondX. 428 * Set version to 2.4.1. 429 * 430 * 2003/11/20 - Amir Noam <amir.noam at intel dot com> 431 * - Fix /proc creation/destruction. 432 * 433 * 2003/12/01 - Shmulik Hen <shmulik.hen at intel dot com> 434 * - Massive cleanup - Set version to 2.5.0 435 * Code changes: 436 * o Consolidate format of prints and debug prints. 437 * o Remove bonding_t/slave_t typedefs and consolidate all casts. 438 * o Remove dead code and unnecessary checks. 439 * o Consolidate starting/stopping timers. 440 * o Consolidate handling of primary module param throughout the code. 441 * o Removed multicast module param support - all settings are done 442 * according to mode. 443 * o Slave list iteration - bond is no longer part of the list, 444 * added cyclic list iteration macros. 445 * o Consolidate error handling in all xmit functions. 446 * Style changes: 447 * o Consolidate function naming and declarations. 448 * o Consolidate function params and local variables names. 449 * o Consolidate return values. 450 * o Consolidate curly braces. 451 * o Consolidate conditionals format. 452 * o Change struct member names and types. 453 * o Chomp trailing spaces, remove empty lines, fix indentations. 454 * o Re-organize code according to context. 455 * 456 * 2003/12/30 - Amir Noam <amir.noam at intel dot com> 457 * - Fixed: Cannot remove and re-enslave the original active slave. 458 * - Fixed: Releasing the original active slave causes mac address 459 * duplication. 460 * - Add support for slaves that use ethtool_ops. 461 * Set version to 2.5.3. 462 * 463 * 2004/01/05 - Amir Noam <amir.noam at intel dot com> 464 * - Save bonding parameters per bond instead of using the global values. 465 * Set version to 2.5.4. 466 * 467 * 2004/01/14 - Shmulik Hen <shmulik.hen at intel dot com> 468 * - Enhance VLAN support: 469 * * Add support for VLAN hardware acceleration capable slaves. 470 * * Add capability to tag self generated packets in ALB/TLB modes. 471 * Set version to 2.6.0. 472 * 2004/10/29 - Mitch Williams <mitch.a.williams at intel dot com> 473 * - Fixed bug when unloading module while using 802.3ad. If 474 * spinlock debugging is turned on, this causes a stack dump. 475 * Solution is to move call to dev_remove_pack outside of the 476 * spinlock. 477 * Set version to 2.6.1. 478 * 2005/06/05 - Jay Vosburgh <fubar@us.ibm.com> 479 * - Support for generating gratuitous ARPs in active-backup mode. 480 * Includes support for VLAN tagging all bonding-generated ARPs 481 * as needed. Set version to 2.6.2. 482 * 2005/06/08 - Jason Gabler <jygabler at lbl dot gov> 483 * - alternate hashing policy support for mode 2 484 * * Added kernel parameter "xmit_hash_policy" to allow the selection 485 * of different hashing policies for mode 2. The original mode 2 486 * policy is the default, now found in xmit_hash_policy_layer2(). 487 * * Added xmit_hash_policy_layer34() 488 * - Modified by Jay Vosburgh <fubar@us.ibm.com> to also support mode 4. 489 * Set version to 2.6.3. 490 */ 491 492 //#define BONDING_DEBUG 1 493 494 #include <linux/config.h> 495 #include <linux/kernel.h> 496 #include <linux/module.h> 497 #include <linux/sched.h> 498 #include <linux/types.h> 499 #include <linux/fcntl.h> 500 #include <linux/interrupt.h> 501 #include <linux/ptrace.h> 502 #include <linux/ioport.h> 503 #include <linux/in.h> 504 #include <net/ip.h> 505 #include <linux/ip.h> 506 #include <linux/tcp.h> 507 #include <linux/udp.h> 508 #include <linux/slab.h> 509 #include <linux/string.h> 510 #include <linux/init.h> 511 #include <linux/timer.h> 512 #include <linux/socket.h> 513 #include <linux/ctype.h> 514 #include <linux/inet.h> 515 #include <linux/bitops.h> 516 #include <asm/system.h> 517 #include <asm/io.h> 518 #include <asm/dma.h> 519 #include <asm/uaccess.h> 520 #include <linux/errno.h> 521 #include <linux/netdevice.h> 522 #include <linux/inetdevice.h> 523 #include <linux/etherdevice.h> 524 #include <linux/skbuff.h> 525 #include <net/sock.h> 526 #include <linux/rtnetlink.h> 527 #include <linux/proc_fs.h> 528 #include <linux/seq_file.h> 529 #include <linux/smp.h> 530 #include <linux/if_ether.h> 531 #include <net/arp.h> 532 #include <linux/mii.h> 533 #include <linux/ethtool.h> 534 #include <linux/if_vlan.h> 535 #include <linux/if_bonding.h> 536 #include <net/route.h> 537 #include "bonding.h" 538 #include "bond_3ad.h" 539 #include "bond_alb.h" 540 541 /*---------------------------- Module parameters ----------------------------*/ 542 543 /* monitor all links that often (in milliseconds). <=0 disables monitoring */ 544 #define BOND_LINK_MON_INTERV 0 545 #define BOND_LINK_ARP_INTERV 0 546 547 static int max_bonds = BOND_DEFAULT_MAX_BONDS; 548 static int miimon = BOND_LINK_MON_INTERV; 549 static int updelay = 0; 550 static int downdelay = 0; 551 static int use_carrier = 1; 552 static char *mode = NULL; 553 static char *primary = NULL; 554 static char *lacp_rate = NULL; 555 static char *xmit_hash_policy = NULL; 556 static int arp_interval = BOND_LINK_ARP_INTERV; 557 static char *arp_ip_target[BOND_MAX_ARP_TARGETS] = { NULL, }; 558 559 module_param(max_bonds, int, 0); 560 MODULE_PARM_DESC(max_bonds, "Max number of bonded devices"); 561 module_param(miimon, int, 0); 562 MODULE_PARM_DESC(miimon, "Link check interval in milliseconds"); 563 module_param(updelay, int, 0); 564 MODULE_PARM_DESC(updelay, "Delay before considering link up, in milliseconds"); 565 module_param(downdelay, int, 0); 566 MODULE_PARM_DESC(downdelay, "Delay before considering link down, in milliseconds"); 567 module_param(use_carrier, int, 0); 568 MODULE_PARM_DESC(use_carrier, "Use netif_carrier_ok (vs MII ioctls) in miimon; 0 for off, 1 for on (default)"); 569 module_param(mode, charp, 0); 570 MODULE_PARM_DESC(mode, "Mode of operation : 0 for round robin, 1 for active-backup, 2 for xor"); 571 module_param(primary, charp, 0); 572 MODULE_PARM_DESC(primary, "Primary network device to use"); 573 module_param(lacp_rate, charp, 0); 574 MODULE_PARM_DESC(lacp_rate, "LACPDU tx rate to request from 802.3ad partner (slow/fast)"); 575 module_param(xmit_hash_policy, charp, 0); 576 MODULE_PARM_DESC(xmit_hash_policy, "XOR hashing method : 0 for layer 2 (default), 1 for layer 3+4"); 577 module_param(arp_interval, int, 0); 578 MODULE_PARM_DESC(arp_interval, "arp interval in milliseconds"); 579 module_param_array(arp_ip_target, charp, NULL, 0); 580 MODULE_PARM_DESC(arp_ip_target, "arp targets in n.n.n.n form"); 581 582 /*----------------------------- Global variables ----------------------------*/ 583 584 static const char *version = 585 DRV_DESCRIPTION ": v" DRV_VERSION " (" DRV_RELDATE ")\n"; 586 587 static LIST_HEAD(bond_dev_list); 588 589 #ifdef CONFIG_PROC_FS 590 static struct proc_dir_entry *bond_proc_dir = NULL; 591 #endif 592 593 static u32 arp_target[BOND_MAX_ARP_TARGETS] = { 0, } ; 594 static int arp_ip_count = 0; 595 static int bond_mode = BOND_MODE_ROUNDROBIN; 596 static int xmit_hashtype= BOND_XMIT_POLICY_LAYER2; 597 static int lacp_fast = 0; 598 static int app_abi_ver = 0; 599 static int orig_app_abi_ver = -1; /* This is used to save the first ABI version 600 * we receive from the application. Once set, 601 * it won't be changed, and the module will 602 * refuse to enslave/release interfaces if the 603 * command comes from an application using 604 * another ABI version. 605 */ 606 struct bond_parm_tbl { 607 char *modename; 608 int mode; 609 }; 610 611 static struct bond_parm_tbl bond_lacp_tbl[] = { 612 { "slow", AD_LACP_SLOW}, 613 { "fast", AD_LACP_FAST}, 614 { NULL, -1}, 615 }; 616 617 static struct bond_parm_tbl bond_mode_tbl[] = { 618 { "balance-rr", BOND_MODE_ROUNDROBIN}, 619 { "active-backup", BOND_MODE_ACTIVEBACKUP}, 620 { "balance-xor", BOND_MODE_XOR}, 621 { "broadcast", BOND_MODE_BROADCAST}, 622 { "802.3ad", BOND_MODE_8023AD}, 623 { "balance-tlb", BOND_MODE_TLB}, 624 { "balance-alb", BOND_MODE_ALB}, 625 { NULL, -1}, 626 }; 627 628 static struct bond_parm_tbl xmit_hashtype_tbl[] = { 629 { "layer2", BOND_XMIT_POLICY_LAYER2}, 630 { "layer3+4", BOND_XMIT_POLICY_LAYER34}, 631 { NULL, -1}, 632 }; 633 634 /*-------------------------- Forward declarations ---------------------------*/ 635 636 static inline void bond_set_mode_ops(struct bonding *bond, int mode); 637 static void bond_send_gratuitous_arp(struct bonding *bond); 638 639 /*---------------------------- General routines -----------------------------*/ 640 641 static const char *bond_mode_name(int mode) 642 { 643 switch (mode) { 644 case BOND_MODE_ROUNDROBIN : 645 return "load balancing (round-robin)"; 646 case BOND_MODE_ACTIVEBACKUP : 647 return "fault-tolerance (active-backup)"; 648 case BOND_MODE_XOR : 649 return "load balancing (xor)"; 650 case BOND_MODE_BROADCAST : 651 return "fault-tolerance (broadcast)"; 652 case BOND_MODE_8023AD: 653 return "IEEE 802.3ad Dynamic link aggregation"; 654 case BOND_MODE_TLB: 655 return "transmit load balancing"; 656 case BOND_MODE_ALB: 657 return "adaptive load balancing"; 658 default: 659 return "unknown"; 660 } 661 } 662 663 /*---------------------------------- VLAN -----------------------------------*/ 664 665 /** 666 * bond_add_vlan - add a new vlan id on bond 667 * @bond: bond that got the notification 668 * @vlan_id: the vlan id to add 669 * 670 * Returns -ENOMEM if allocation failed. 671 */ 672 static int bond_add_vlan(struct bonding *bond, unsigned short vlan_id) 673 { 674 struct vlan_entry *vlan; 675 676 dprintk("bond: %s, vlan id %d\n", 677 (bond ? bond->dev->name: "None"), vlan_id); 678 679 vlan = kmalloc(sizeof(struct vlan_entry), GFP_KERNEL); 680 if (!vlan) { 681 return -ENOMEM; 682 } 683 684 INIT_LIST_HEAD(&vlan->vlan_list); 685 vlan->vlan_id = vlan_id; 686 vlan->vlan_ip = 0; 687 688 write_lock_bh(&bond->lock); 689 690 list_add_tail(&vlan->vlan_list, &bond->vlan_list); 691 692 write_unlock_bh(&bond->lock); 693 694 dprintk("added VLAN ID %d on bond %s\n", vlan_id, bond->dev->name); 695 696 return 0; 697 } 698 699 /** 700 * bond_del_vlan - delete a vlan id from bond 701 * @bond: bond that got the notification 702 * @vlan_id: the vlan id to delete 703 * 704 * returns -ENODEV if @vlan_id was not found in @bond. 705 */ 706 static int bond_del_vlan(struct bonding *bond, unsigned short vlan_id) 707 { 708 struct vlan_entry *vlan, *next; 709 int res = -ENODEV; 710 711 dprintk("bond: %s, vlan id %d\n", bond->dev->name, vlan_id); 712 713 write_lock_bh(&bond->lock); 714 715 list_for_each_entry_safe(vlan, next, &bond->vlan_list, vlan_list) { 716 if (vlan->vlan_id == vlan_id) { 717 list_del(&vlan->vlan_list); 718 719 if ((bond->params.mode == BOND_MODE_TLB) || 720 (bond->params.mode == BOND_MODE_ALB)) { 721 bond_alb_clear_vlan(bond, vlan_id); 722 } 723 724 dprintk("removed VLAN ID %d from bond %s\n", vlan_id, 725 bond->dev->name); 726 727 kfree(vlan); 728 729 if (list_empty(&bond->vlan_list) && 730 (bond->slave_cnt == 0)) { 731 /* Last VLAN removed and no slaves, so 732 * restore block on adding VLANs. This will 733 * be removed once new slaves that are not 734 * VLAN challenged will be added. 735 */ 736 bond->dev->features |= NETIF_F_VLAN_CHALLENGED; 737 } 738 739 res = 0; 740 goto out; 741 } 742 } 743 744 dprintk("couldn't find VLAN ID %d in bond %s\n", vlan_id, 745 bond->dev->name); 746 747 out: 748 write_unlock_bh(&bond->lock); 749 return res; 750 } 751 752 /** 753 * bond_has_challenged_slaves 754 * @bond: the bond we're working on 755 * 756 * Searches the slave list. Returns 1 if a vlan challenged slave 757 * was found, 0 otherwise. 758 * 759 * Assumes bond->lock is held. 760 */ 761 static int bond_has_challenged_slaves(struct bonding *bond) 762 { 763 struct slave *slave; 764 int i; 765 766 bond_for_each_slave(bond, slave, i) { 767 if (slave->dev->features & NETIF_F_VLAN_CHALLENGED) { 768 dprintk("found VLAN challenged slave - %s\n", 769 slave->dev->name); 770 return 1; 771 } 772 } 773 774 dprintk("no VLAN challenged slaves found\n"); 775 return 0; 776 } 777 778 /** 779 * bond_next_vlan - safely skip to the next item in the vlans list. 780 * @bond: the bond we're working on 781 * @curr: item we're advancing from 782 * 783 * Returns %NULL if list is empty, bond->next_vlan if @curr is %NULL, 784 * or @curr->next otherwise (even if it is @curr itself again). 785 * 786 * Caller must hold bond->lock 787 */ 788 struct vlan_entry *bond_next_vlan(struct bonding *bond, struct vlan_entry *curr) 789 { 790 struct vlan_entry *next, *last; 791 792 if (list_empty(&bond->vlan_list)) { 793 return NULL; 794 } 795 796 if (!curr) { 797 next = list_entry(bond->vlan_list.next, 798 struct vlan_entry, vlan_list); 799 } else { 800 last = list_entry(bond->vlan_list.prev, 801 struct vlan_entry, vlan_list); 802 if (last == curr) { 803 next = list_entry(bond->vlan_list.next, 804 struct vlan_entry, vlan_list); 805 } else { 806 next = list_entry(curr->vlan_list.next, 807 struct vlan_entry, vlan_list); 808 } 809 } 810 811 return next; 812 } 813 814 /** 815 * bond_dev_queue_xmit - Prepare skb for xmit. 816 * 817 * @bond: bond device that got this skb for tx. 818 * @skb: hw accel VLAN tagged skb to transmit 819 * @slave_dev: slave that is supposed to xmit this skbuff 820 * 821 * When the bond gets an skb to transmit that is 822 * already hardware accelerated VLAN tagged, and it 823 * needs to relay this skb to a slave that is not 824 * hw accel capable, the skb needs to be "unaccelerated", 825 * i.e. strip the hwaccel tag and re-insert it as part 826 * of the payload. 827 */ 828 int bond_dev_queue_xmit(struct bonding *bond, struct sk_buff *skb, struct net_device *slave_dev) 829 { 830 unsigned short vlan_id; 831 832 if (!list_empty(&bond->vlan_list) && 833 !(slave_dev->features & NETIF_F_HW_VLAN_TX) && 834 vlan_get_tag(skb, &vlan_id) == 0) { 835 skb->dev = slave_dev; 836 skb = vlan_put_tag(skb, vlan_id); 837 if (!skb) { 838 /* vlan_put_tag() frees the skb in case of error, 839 * so return success here so the calling functions 840 * won't attempt to free is again. 841 */ 842 return 0; 843 } 844 } else { 845 skb->dev = slave_dev; 846 } 847 848 skb->priority = 1; 849 dev_queue_xmit(skb); 850 851 return 0; 852 } 853 854 /* 855 * In the following 3 functions, bond_vlan_rx_register(), bond_vlan_rx_add_vid 856 * and bond_vlan_rx_kill_vid, We don't protect the slave list iteration with a 857 * lock because: 858 * a. This operation is performed in IOCTL context, 859 * b. The operation is protected by the RTNL semaphore in the 8021q code, 860 * c. Holding a lock with BH disabled while directly calling a base driver 861 * entry point is generally a BAD idea. 862 * 863 * The design of synchronization/protection for this operation in the 8021q 864 * module is good for one or more VLAN devices over a single physical device 865 * and cannot be extended for a teaming solution like bonding, so there is a 866 * potential race condition here where a net device from the vlan group might 867 * be referenced (either by a base driver or the 8021q code) while it is being 868 * removed from the system. However, it turns out we're not making matters 869 * worse, and if it works for regular VLAN usage it will work here too. 870 */ 871 872 /** 873 * bond_vlan_rx_register - Propagates registration to slaves 874 * @bond_dev: bonding net device that got called 875 * @grp: vlan group being registered 876 */ 877 static void bond_vlan_rx_register(struct net_device *bond_dev, struct vlan_group *grp) 878 { 879 struct bonding *bond = bond_dev->priv; 880 struct slave *slave; 881 int i; 882 883 bond->vlgrp = grp; 884 885 bond_for_each_slave(bond, slave, i) { 886 struct net_device *slave_dev = slave->dev; 887 888 if ((slave_dev->features & NETIF_F_HW_VLAN_RX) && 889 slave_dev->vlan_rx_register) { 890 slave_dev->vlan_rx_register(slave_dev, grp); 891 } 892 } 893 } 894 895 /** 896 * bond_vlan_rx_add_vid - Propagates adding an id to slaves 897 * @bond_dev: bonding net device that got called 898 * @vid: vlan id being added 899 */ 900 static void bond_vlan_rx_add_vid(struct net_device *bond_dev, uint16_t vid) 901 { 902 struct bonding *bond = bond_dev->priv; 903 struct slave *slave; 904 int i, res; 905 906 bond_for_each_slave(bond, slave, i) { 907 struct net_device *slave_dev = slave->dev; 908 909 if ((slave_dev->features & NETIF_F_HW_VLAN_FILTER) && 910 slave_dev->vlan_rx_add_vid) { 911 slave_dev->vlan_rx_add_vid(slave_dev, vid); 912 } 913 } 914 915 res = bond_add_vlan(bond, vid); 916 if (res) { 917 printk(KERN_ERR DRV_NAME 918 ": %s: Failed to add vlan id %d\n", 919 bond_dev->name, vid); 920 } 921 } 922 923 /** 924 * bond_vlan_rx_kill_vid - Propagates deleting an id to slaves 925 * @bond_dev: bonding net device that got called 926 * @vid: vlan id being removed 927 */ 928 static void bond_vlan_rx_kill_vid(struct net_device *bond_dev, uint16_t vid) 929 { 930 struct bonding *bond = bond_dev->priv; 931 struct slave *slave; 932 struct net_device *vlan_dev; 933 int i, res; 934 935 bond_for_each_slave(bond, slave, i) { 936 struct net_device *slave_dev = slave->dev; 937 938 if ((slave_dev->features & NETIF_F_HW_VLAN_FILTER) && 939 slave_dev->vlan_rx_kill_vid) { 940 /* Save and then restore vlan_dev in the grp array, 941 * since the slave's driver might clear it. 942 */ 943 vlan_dev = bond->vlgrp->vlan_devices[vid]; 944 slave_dev->vlan_rx_kill_vid(slave_dev, vid); 945 bond->vlgrp->vlan_devices[vid] = vlan_dev; 946 } 947 } 948 949 res = bond_del_vlan(bond, vid); 950 if (res) { 951 printk(KERN_ERR DRV_NAME 952 ": %s: Failed to remove vlan id %d\n", 953 bond_dev->name, vid); 954 } 955 } 956 957 static void bond_add_vlans_on_slave(struct bonding *bond, struct net_device *slave_dev) 958 { 959 struct vlan_entry *vlan; 960 961 write_lock_bh(&bond->lock); 962 963 if (list_empty(&bond->vlan_list)) { 964 goto out; 965 } 966 967 if ((slave_dev->features & NETIF_F_HW_VLAN_RX) && 968 slave_dev->vlan_rx_register) { 969 slave_dev->vlan_rx_register(slave_dev, bond->vlgrp); 970 } 971 972 if (!(slave_dev->features & NETIF_F_HW_VLAN_FILTER) || 973 !(slave_dev->vlan_rx_add_vid)) { 974 goto out; 975 } 976 977 list_for_each_entry(vlan, &bond->vlan_list, vlan_list) { 978 slave_dev->vlan_rx_add_vid(slave_dev, vlan->vlan_id); 979 } 980 981 out: 982 write_unlock_bh(&bond->lock); 983 } 984 985 static void bond_del_vlans_from_slave(struct bonding *bond, struct net_device *slave_dev) 986 { 987 struct vlan_entry *vlan; 988 struct net_device *vlan_dev; 989 990 write_lock_bh(&bond->lock); 991 992 if (list_empty(&bond->vlan_list)) { 993 goto out; 994 } 995 996 if (!(slave_dev->features & NETIF_F_HW_VLAN_FILTER) || 997 !(slave_dev->vlan_rx_kill_vid)) { 998 goto unreg; 999 } 1000 1001 list_for_each_entry(vlan, &bond->vlan_list, vlan_list) { 1002 /* Save and then restore vlan_dev in the grp array, 1003 * since the slave's driver might clear it. 1004 */ 1005 vlan_dev = bond->vlgrp->vlan_devices[vlan->vlan_id]; 1006 slave_dev->vlan_rx_kill_vid(slave_dev, vlan->vlan_id); 1007 bond->vlgrp->vlan_devices[vlan->vlan_id] = vlan_dev; 1008 } 1009 1010 unreg: 1011 if ((slave_dev->features & NETIF_F_HW_VLAN_RX) && 1012 slave_dev->vlan_rx_register) { 1013 slave_dev->vlan_rx_register(slave_dev, NULL); 1014 } 1015 1016 out: 1017 write_unlock_bh(&bond->lock); 1018 } 1019 1020 /*------------------------------- Link status -------------------------------*/ 1021 1022 /* 1023 * Get link speed and duplex from the slave's base driver 1024 * using ethtool. If for some reason the call fails or the 1025 * values are invalid, fake speed and duplex to 100/Full 1026 * and return error. 1027 */ 1028 static int bond_update_speed_duplex(struct slave *slave) 1029 { 1030 struct net_device *slave_dev = slave->dev; 1031 static int (* ioctl)(struct net_device *, struct ifreq *, int); 1032 struct ifreq ifr; 1033 struct ethtool_cmd etool; 1034 1035 /* Fake speed and duplex */ 1036 slave->speed = SPEED_100; 1037 slave->duplex = DUPLEX_FULL; 1038 1039 if (slave_dev->ethtool_ops) { 1040 u32 res; 1041 1042 if (!slave_dev->ethtool_ops->get_settings) { 1043 return -1; 1044 } 1045 1046 res = slave_dev->ethtool_ops->get_settings(slave_dev, &etool); 1047 if (res < 0) { 1048 return -1; 1049 } 1050 1051 goto verify; 1052 } 1053 1054 ioctl = slave_dev->do_ioctl; 1055 strncpy(ifr.ifr_name, slave_dev->name, IFNAMSIZ); 1056 etool.cmd = ETHTOOL_GSET; 1057 ifr.ifr_data = (char*)&etool; 1058 if (!ioctl || (IOCTL(slave_dev, &ifr, SIOCETHTOOL) < 0)) { 1059 return -1; 1060 } 1061 1062 verify: 1063 switch (etool.speed) { 1064 case SPEED_10: 1065 case SPEED_100: 1066 case SPEED_1000: 1067 break; 1068 default: 1069 return -1; 1070 } 1071 1072 switch (etool.duplex) { 1073 case DUPLEX_FULL: 1074 case DUPLEX_HALF: 1075 break; 1076 default: 1077 return -1; 1078 } 1079 1080 slave->speed = etool.speed; 1081 slave->duplex = etool.duplex; 1082 1083 return 0; 1084 } 1085 1086 /* 1087 * if <dev> supports MII link status reporting, check its link status. 1088 * 1089 * We either do MII/ETHTOOL ioctls, or check netif_carrier_ok(), 1090 * depening upon the setting of the use_carrier parameter. 1091 * 1092 * Return either BMSR_LSTATUS, meaning that the link is up (or we 1093 * can't tell and just pretend it is), or 0, meaning that the link is 1094 * down. 1095 * 1096 * If reporting is non-zero, instead of faking link up, return -1 if 1097 * both ETHTOOL and MII ioctls fail (meaning the device does not 1098 * support them). If use_carrier is set, return whatever it says. 1099 * It'd be nice if there was a good way to tell if a driver supports 1100 * netif_carrier, but there really isn't. 1101 */ 1102 static int bond_check_dev_link(struct bonding *bond, struct net_device *slave_dev, int reporting) 1103 { 1104 static int (* ioctl)(struct net_device *, struct ifreq *, int); 1105 struct ifreq ifr; 1106 struct mii_ioctl_data *mii; 1107 struct ethtool_value etool; 1108 1109 if (bond->params.use_carrier) { 1110 return netif_carrier_ok(slave_dev) ? BMSR_LSTATUS : 0; 1111 } 1112 1113 ioctl = slave_dev->do_ioctl; 1114 if (ioctl) { 1115 /* TODO: set pointer to correct ioctl on a per team member */ 1116 /* bases to make this more efficient. that is, once */ 1117 /* we determine the correct ioctl, we will always */ 1118 /* call it and not the others for that team */ 1119 /* member. */ 1120 1121 /* 1122 * We cannot assume that SIOCGMIIPHY will also read a 1123 * register; not all network drivers (e.g., e100) 1124 * support that. 1125 */ 1126 1127 /* Yes, the mii is overlaid on the ifreq.ifr_ifru */ 1128 strncpy(ifr.ifr_name, slave_dev->name, IFNAMSIZ); 1129 mii = if_mii(&ifr); 1130 if (IOCTL(slave_dev, &ifr, SIOCGMIIPHY) == 0) { 1131 mii->reg_num = MII_BMSR; 1132 if (IOCTL(slave_dev, &ifr, SIOCGMIIREG) == 0) { 1133 return (mii->val_out & BMSR_LSTATUS); 1134 } 1135 } 1136 } 1137 1138 /* try SIOCETHTOOL ioctl, some drivers cache ETHTOOL_GLINK */ 1139 /* for a period of time so we attempt to get link status */ 1140 /* from it last if the above MII ioctls fail... */ 1141 if (slave_dev->ethtool_ops) { 1142 if (slave_dev->ethtool_ops->get_link) { 1143 u32 link; 1144 1145 link = slave_dev->ethtool_ops->get_link(slave_dev); 1146 1147 return link ? BMSR_LSTATUS : 0; 1148 } 1149 } 1150 1151 if (ioctl) { 1152 strncpy(ifr.ifr_name, slave_dev->name, IFNAMSIZ); 1153 etool.cmd = ETHTOOL_GLINK; 1154 ifr.ifr_data = (char*)&etool; 1155 if (IOCTL(slave_dev, &ifr, SIOCETHTOOL) == 0) { 1156 if (etool.data == 1) { 1157 return BMSR_LSTATUS; 1158 } else { 1159 dprintk("SIOCETHTOOL shows link down\n"); 1160 return 0; 1161 } 1162 } 1163 } 1164 1165 /* 1166 * If reporting, report that either there's no dev->do_ioctl, 1167 * or both SIOCGMIIREG and SIOCETHTOOL failed (meaning that we 1168 * cannot report link status). If not reporting, pretend 1169 * we're ok. 1170 */ 1171 return (reporting ? -1 : BMSR_LSTATUS); 1172 } 1173 1174 /*----------------------------- Multicast list ------------------------------*/ 1175 1176 /* 1177 * Returns 0 if dmi1 and dmi2 are the same, non-0 otherwise 1178 */ 1179 static inline int bond_is_dmi_same(struct dev_mc_list *dmi1, struct dev_mc_list *dmi2) 1180 { 1181 return memcmp(dmi1->dmi_addr, dmi2->dmi_addr, dmi1->dmi_addrlen) == 0 && 1182 dmi1->dmi_addrlen == dmi2->dmi_addrlen; 1183 } 1184 1185 /* 1186 * returns dmi entry if found, NULL otherwise 1187 */ 1188 static struct dev_mc_list *bond_mc_list_find_dmi(struct dev_mc_list *dmi, struct dev_mc_list *mc_list) 1189 { 1190 struct dev_mc_list *idmi; 1191 1192 for (idmi = mc_list; idmi; idmi = idmi->next) { 1193 if (bond_is_dmi_same(dmi, idmi)) { 1194 return idmi; 1195 } 1196 } 1197 1198 return NULL; 1199 } 1200 1201 /* 1202 * Push the promiscuity flag down to appropriate slaves 1203 */ 1204 static void bond_set_promiscuity(struct bonding *bond, int inc) 1205 { 1206 if (USES_PRIMARY(bond->params.mode)) { 1207 /* write lock already acquired */ 1208 if (bond->curr_active_slave) { 1209 dev_set_promiscuity(bond->curr_active_slave->dev, inc); 1210 } 1211 } else { 1212 struct slave *slave; 1213 int i; 1214 bond_for_each_slave(bond, slave, i) { 1215 dev_set_promiscuity(slave->dev, inc); 1216 } 1217 } 1218 } 1219 1220 /* 1221 * Push the allmulti flag down to all slaves 1222 */ 1223 static void bond_set_allmulti(struct bonding *bond, int inc) 1224 { 1225 if (USES_PRIMARY(bond->params.mode)) { 1226 /* write lock already acquired */ 1227 if (bond->curr_active_slave) { 1228 dev_set_allmulti(bond->curr_active_slave->dev, inc); 1229 } 1230 } else { 1231 struct slave *slave; 1232 int i; 1233 bond_for_each_slave(bond, slave, i) { 1234 dev_set_allmulti(slave->dev, inc); 1235 } 1236 } 1237 } 1238 1239 /* 1240 * Add a Multicast address to slaves 1241 * according to mode 1242 */ 1243 static void bond_mc_add(struct bonding *bond, void *addr, int alen) 1244 { 1245 if (USES_PRIMARY(bond->params.mode)) { 1246 /* write lock already acquired */ 1247 if (bond->curr_active_slave) { 1248 dev_mc_add(bond->curr_active_slave->dev, addr, alen, 0); 1249 } 1250 } else { 1251 struct slave *slave; 1252 int i; 1253 bond_for_each_slave(bond, slave, i) { 1254 dev_mc_add(slave->dev, addr, alen, 0); 1255 } 1256 } 1257 } 1258 1259 /* 1260 * Remove a multicast address from slave 1261 * according to mode 1262 */ 1263 static void bond_mc_delete(struct bonding *bond, void *addr, int alen) 1264 { 1265 if (USES_PRIMARY(bond->params.mode)) { 1266 /* write lock already acquired */ 1267 if (bond->curr_active_slave) { 1268 dev_mc_delete(bond->curr_active_slave->dev, addr, alen, 0); 1269 } 1270 } else { 1271 struct slave *slave; 1272 int i; 1273 bond_for_each_slave(bond, slave, i) { 1274 dev_mc_delete(slave->dev, addr, alen, 0); 1275 } 1276 } 1277 } 1278 1279 /* 1280 * Totally destroys the mc_list in bond 1281 */ 1282 static void bond_mc_list_destroy(struct bonding *bond) 1283 { 1284 struct dev_mc_list *dmi; 1285 1286 dmi = bond->mc_list; 1287 while (dmi) { 1288 bond->mc_list = dmi->next; 1289 kfree(dmi); 1290 dmi = bond->mc_list; 1291 } 1292 } 1293 1294 /* 1295 * Copy all the Multicast addresses from src to the bonding device dst 1296 */ 1297 static int bond_mc_list_copy(struct dev_mc_list *mc_list, struct bonding *bond, int gpf_flag) 1298 { 1299 struct dev_mc_list *dmi, *new_dmi; 1300 1301 for (dmi = mc_list; dmi; dmi = dmi->next) { 1302 new_dmi = kmalloc(sizeof(struct dev_mc_list), gpf_flag); 1303 1304 if (!new_dmi) { 1305 /* FIXME: Potential memory leak !!! */ 1306 return -ENOMEM; 1307 } 1308 1309 new_dmi->next = bond->mc_list; 1310 bond->mc_list = new_dmi; 1311 new_dmi->dmi_addrlen = dmi->dmi_addrlen; 1312 memcpy(new_dmi->dmi_addr, dmi->dmi_addr, dmi->dmi_addrlen); 1313 new_dmi->dmi_users = dmi->dmi_users; 1314 new_dmi->dmi_gusers = dmi->dmi_gusers; 1315 } 1316 1317 return 0; 1318 } 1319 1320 /* 1321 * flush all members of flush->mc_list from device dev->mc_list 1322 */ 1323 static void bond_mc_list_flush(struct net_device *bond_dev, struct net_device *slave_dev) 1324 { 1325 struct bonding *bond = bond_dev->priv; 1326 struct dev_mc_list *dmi; 1327 1328 for (dmi = bond_dev->mc_list; dmi; dmi = dmi->next) { 1329 dev_mc_delete(slave_dev, dmi->dmi_addr, dmi->dmi_addrlen, 0); 1330 } 1331 1332 if (bond->params.mode == BOND_MODE_8023AD) { 1333 /* del lacpdu mc addr from mc list */ 1334 u8 lacpdu_multicast[ETH_ALEN] = MULTICAST_LACPDU_ADDR; 1335 1336 dev_mc_delete(slave_dev, lacpdu_multicast, ETH_ALEN, 0); 1337 } 1338 } 1339 1340 /*--------------------------- Active slave change ---------------------------*/ 1341 1342 /* 1343 * Update the mc list and multicast-related flags for the new and 1344 * old active slaves (if any) according to the multicast mode, and 1345 * promiscuous flags unconditionally. 1346 */ 1347 static void bond_mc_swap(struct bonding *bond, struct slave *new_active, struct slave *old_active) 1348 { 1349 struct dev_mc_list *dmi; 1350 1351 if (!USES_PRIMARY(bond->params.mode)) { 1352 /* nothing to do - mc list is already up-to-date on 1353 * all slaves 1354 */ 1355 return; 1356 } 1357 1358 if (old_active) { 1359 if (bond->dev->flags & IFF_PROMISC) { 1360 dev_set_promiscuity(old_active->dev, -1); 1361 } 1362 1363 if (bond->dev->flags & IFF_ALLMULTI) { 1364 dev_set_allmulti(old_active->dev, -1); 1365 } 1366 1367 for (dmi = bond->dev->mc_list; dmi; dmi = dmi->next) { 1368 dev_mc_delete(old_active->dev, dmi->dmi_addr, dmi->dmi_addrlen, 0); 1369 } 1370 } 1371 1372 if (new_active) { 1373 if (bond->dev->flags & IFF_PROMISC) { 1374 dev_set_promiscuity(new_active->dev, 1); 1375 } 1376 1377 if (bond->dev->flags & IFF_ALLMULTI) { 1378 dev_set_allmulti(new_active->dev, 1); 1379 } 1380 1381 for (dmi = bond->dev->mc_list; dmi; dmi = dmi->next) { 1382 dev_mc_add(new_active->dev, dmi->dmi_addr, dmi->dmi_addrlen, 0); 1383 } 1384 } 1385 } 1386 1387 /** 1388 * find_best_interface - select the best available slave to be the active one 1389 * @bond: our bonding struct 1390 * 1391 * Warning: Caller must hold curr_slave_lock for writing. 1392 */ 1393 static struct slave *bond_find_best_slave(struct bonding *bond) 1394 { 1395 struct slave *new_active, *old_active; 1396 struct slave *bestslave = NULL; 1397 int mintime = bond->params.updelay; 1398 int i; 1399 1400 new_active = old_active = bond->curr_active_slave; 1401 1402 if (!new_active) { /* there were no active slaves left */ 1403 if (bond->slave_cnt > 0) { /* found one slave */ 1404 new_active = bond->first_slave; 1405 } else { 1406 return NULL; /* still no slave, return NULL */ 1407 } 1408 } 1409 1410 /* first try the primary link; if arping, a link must tx/rx traffic 1411 * before it can be considered the curr_active_slave - also, we would skip 1412 * slaves between the curr_active_slave and primary_slave that may be up 1413 * and able to arp 1414 */ 1415 if ((bond->primary_slave) && 1416 (!bond->params.arp_interval) && 1417 (IS_UP(bond->primary_slave->dev))) { 1418 new_active = bond->primary_slave; 1419 } 1420 1421 /* remember where to stop iterating over the slaves */ 1422 old_active = new_active; 1423 1424 bond_for_each_slave_from(bond, new_active, i, old_active) { 1425 if (IS_UP(new_active->dev)) { 1426 if (new_active->link == BOND_LINK_UP) { 1427 return new_active; 1428 } else if (new_active->link == BOND_LINK_BACK) { 1429 /* link up, but waiting for stabilization */ 1430 if (new_active->delay < mintime) { 1431 mintime = new_active->delay; 1432 bestslave = new_active; 1433 } 1434 } 1435 } 1436 } 1437 1438 return bestslave; 1439 } 1440 1441 /** 1442 * change_active_interface - change the active slave into the specified one 1443 * @bond: our bonding struct 1444 * @new: the new slave to make the active one 1445 * 1446 * Set the new slave to the bond's settings and unset them on the old 1447 * curr_active_slave. 1448 * Setting include flags, mc-list, promiscuity, allmulti, etc. 1449 * 1450 * If @new's link state is %BOND_LINK_BACK we'll set it to %BOND_LINK_UP, 1451 * because it is apparently the best available slave we have, even though its 1452 * updelay hasn't timed out yet. 1453 * 1454 * Warning: Caller must hold curr_slave_lock for writing. 1455 */ 1456 static void bond_change_active_slave(struct bonding *bond, struct slave *new_active) 1457 { 1458 struct slave *old_active = bond->curr_active_slave; 1459 1460 if (old_active == new_active) { 1461 return; 1462 } 1463 1464 if (new_active) { 1465 if (new_active->link == BOND_LINK_BACK) { 1466 if (USES_PRIMARY(bond->params.mode)) { 1467 printk(KERN_INFO DRV_NAME 1468 ": %s: making interface %s the new " 1469 "active one %d ms earlier.\n", 1470 bond->dev->name, new_active->dev->name, 1471 (bond->params.updelay - new_active->delay) * bond->params.miimon); 1472 } 1473 1474 new_active->delay = 0; 1475 new_active->link = BOND_LINK_UP; 1476 new_active->jiffies = jiffies; 1477 1478 if (bond->params.mode == BOND_MODE_8023AD) { 1479 bond_3ad_handle_link_change(new_active, BOND_LINK_UP); 1480 } 1481 1482 if ((bond->params.mode == BOND_MODE_TLB) || 1483 (bond->params.mode == BOND_MODE_ALB)) { 1484 bond_alb_handle_link_change(bond, new_active, BOND_LINK_UP); 1485 } 1486 } else { 1487 if (USES_PRIMARY(bond->params.mode)) { 1488 printk(KERN_INFO DRV_NAME 1489 ": %s: making interface %s the new " 1490 "active one.\n", 1491 bond->dev->name, new_active->dev->name); 1492 } 1493 } 1494 } 1495 1496 if (USES_PRIMARY(bond->params.mode)) { 1497 bond_mc_swap(bond, new_active, old_active); 1498 } 1499 1500 if ((bond->params.mode == BOND_MODE_TLB) || 1501 (bond->params.mode == BOND_MODE_ALB)) { 1502 bond_alb_handle_active_change(bond, new_active); 1503 } else { 1504 bond->curr_active_slave = new_active; 1505 } 1506 1507 if (bond->params.mode == BOND_MODE_ACTIVEBACKUP) { 1508 if (old_active) { 1509 bond_set_slave_inactive_flags(old_active); 1510 } 1511 1512 if (new_active) { 1513 bond_set_slave_active_flags(new_active); 1514 } 1515 bond_send_gratuitous_arp(bond); 1516 } 1517 } 1518 1519 /** 1520 * bond_select_active_slave - select a new active slave, if needed 1521 * @bond: our bonding struct 1522 * 1523 * This functions shoud be called when one of the following occurs: 1524 * - The old curr_active_slave has been released or lost its link. 1525 * - The primary_slave has got its link back. 1526 * - A slave has got its link back and there's no old curr_active_slave. 1527 * 1528 * Warning: Caller must hold curr_slave_lock for writing. 1529 */ 1530 static void bond_select_active_slave(struct bonding *bond) 1531 { 1532 struct slave *best_slave; 1533 1534 best_slave = bond_find_best_slave(bond); 1535 if (best_slave != bond->curr_active_slave) { 1536 bond_change_active_slave(bond, best_slave); 1537 } 1538 } 1539 1540 /*--------------------------- slave list handling ---------------------------*/ 1541 1542 /* 1543 * This function attaches the slave to the end of list. 1544 * 1545 * bond->lock held for writing by caller. 1546 */ 1547 static void bond_attach_slave(struct bonding *bond, struct slave *new_slave) 1548 { 1549 if (bond->first_slave == NULL) { /* attaching the first slave */ 1550 new_slave->next = new_slave; 1551 new_slave->prev = new_slave; 1552 bond->first_slave = new_slave; 1553 } else { 1554 new_slave->next = bond->first_slave; 1555 new_slave->prev = bond->first_slave->prev; 1556 new_slave->next->prev = new_slave; 1557 new_slave->prev->next = new_slave; 1558 } 1559 1560 bond->slave_cnt++; 1561 } 1562 1563 /* 1564 * This function detaches the slave from the list. 1565 * WARNING: no check is made to verify if the slave effectively 1566 * belongs to <bond>. 1567 * Nothing is freed on return, structures are just unchained. 1568 * If any slave pointer in bond was pointing to <slave>, 1569 * it should be changed by the calling function. 1570 * 1571 * bond->lock held for writing by caller. 1572 */ 1573 static void bond_detach_slave(struct bonding *bond, struct slave *slave) 1574 { 1575 if (slave->next) { 1576 slave->next->prev = slave->prev; 1577 } 1578 1579 if (slave->prev) { 1580 slave->prev->next = slave->next; 1581 } 1582 1583 if (bond->first_slave == slave) { /* slave is the first slave */ 1584 if (bond->slave_cnt > 1) { /* there are more slave */ 1585 bond->first_slave = slave->next; 1586 } else { 1587 bond->first_slave = NULL; /* slave was the last one */ 1588 } 1589 } 1590 1591 slave->next = NULL; 1592 slave->prev = NULL; 1593 bond->slave_cnt--; 1594 } 1595 1596 /*---------------------------------- IOCTL ----------------------------------*/ 1597 1598 static int bond_sethwaddr(struct net_device *bond_dev, struct net_device *slave_dev) 1599 { 1600 dprintk("bond_dev=%p\n", bond_dev); 1601 dprintk("slave_dev=%p\n", slave_dev); 1602 dprintk("slave_dev->addr_len=%d\n", slave_dev->addr_len); 1603 memcpy(bond_dev->dev_addr, slave_dev->dev_addr, slave_dev->addr_len); 1604 return 0; 1605 } 1606 1607 #define BOND_INTERSECT_FEATURES \ 1608 (NETIF_F_SG|NETIF_F_IP_CSUM|NETIF_F_NO_CSUM|NETIF_F_HW_CSUM) 1609 1610 /* 1611 * Compute the features available to the bonding device by 1612 * intersection of all of the slave devices' BOND_INTERSECT_FEATURES. 1613 * Call this after attaching or detaching a slave to update the 1614 * bond's features. 1615 */ 1616 static int bond_compute_features(struct bonding *bond) 1617 { 1618 int i; 1619 struct slave *slave; 1620 struct net_device *bond_dev = bond->dev; 1621 int features = bond->bond_features; 1622 1623 bond_for_each_slave(bond, slave, i) { 1624 struct net_device * slave_dev = slave->dev; 1625 if (i == 0) { 1626 features |= BOND_INTERSECT_FEATURES; 1627 } 1628 features &= 1629 ~(~slave_dev->features & BOND_INTERSECT_FEATURES); 1630 } 1631 1632 /* turn off NETIF_F_SG if we need a csum and h/w can't do it */ 1633 if ((features & NETIF_F_SG) && 1634 !(features & (NETIF_F_IP_CSUM | 1635 NETIF_F_NO_CSUM | 1636 NETIF_F_HW_CSUM))) { 1637 features &= ~NETIF_F_SG; 1638 } 1639 1640 bond_dev->features = features; 1641 1642 return 0; 1643 } 1644 1645 /* enslave device <slave> to bond device <master> */ 1646 static int bond_enslave(struct net_device *bond_dev, struct net_device *slave_dev) 1647 { 1648 struct bonding *bond = bond_dev->priv; 1649 struct slave *new_slave = NULL; 1650 struct dev_mc_list *dmi; 1651 struct sockaddr addr; 1652 int link_reporting; 1653 int old_features = bond_dev->features; 1654 int res = 0; 1655 1656 if (slave_dev->do_ioctl == NULL) { 1657 printk(KERN_WARNING DRV_NAME 1658 ": Warning : no link monitoring support for %s\n", 1659 slave_dev->name); 1660 } 1661 1662 /* bond must be initialized by bond_open() before enslaving */ 1663 if (!(bond_dev->flags & IFF_UP)) { 1664 dprintk("Error, master_dev is not up\n"); 1665 return -EPERM; 1666 } 1667 1668 /* already enslaved */ 1669 if (slave_dev->flags & IFF_SLAVE) { 1670 dprintk("Error, Device was already enslaved\n"); 1671 return -EBUSY; 1672 } 1673 1674 /* vlan challenged mutual exclusion */ 1675 /* no need to lock since we're protected by rtnl_lock */ 1676 if (slave_dev->features & NETIF_F_VLAN_CHALLENGED) { 1677 dprintk("%s: NETIF_F_VLAN_CHALLENGED\n", slave_dev->name); 1678 if (!list_empty(&bond->vlan_list)) { 1679 printk(KERN_ERR DRV_NAME 1680 ": Error: cannot enslave VLAN " 1681 "challenged slave %s on VLAN enabled " 1682 "bond %s\n", slave_dev->name, 1683 bond_dev->name); 1684 return -EPERM; 1685 } else { 1686 printk(KERN_WARNING DRV_NAME 1687 ": Warning: enslaved VLAN challenged " 1688 "slave %s. Adding VLANs will be blocked as " 1689 "long as %s is part of bond %s\n", 1690 slave_dev->name, slave_dev->name, 1691 bond_dev->name); 1692 bond_dev->features |= NETIF_F_VLAN_CHALLENGED; 1693 } 1694 } else { 1695 dprintk("%s: ! NETIF_F_VLAN_CHALLENGED\n", slave_dev->name); 1696 if (bond->slave_cnt == 0) { 1697 /* First slave, and it is not VLAN challenged, 1698 * so remove the block of adding VLANs over the bond. 1699 */ 1700 bond_dev->features &= ~NETIF_F_VLAN_CHALLENGED; 1701 } 1702 } 1703 1704 if (app_abi_ver >= 1) { 1705 /* The application is using an ABI, which requires the 1706 * slave interface to be closed. 1707 */ 1708 if ((slave_dev->flags & IFF_UP)) { 1709 printk(KERN_ERR DRV_NAME 1710 ": Error: %s is up\n", 1711 slave_dev->name); 1712 res = -EPERM; 1713 goto err_undo_flags; 1714 } 1715 1716 if (slave_dev->set_mac_address == NULL) { 1717 printk(KERN_ERR DRV_NAME 1718 ": Error: The slave device you specified does " 1719 "not support setting the MAC address.\n"); 1720 printk(KERN_ERR 1721 "Your kernel likely does not support slave " 1722 "devices.\n"); 1723 1724 res = -EOPNOTSUPP; 1725 goto err_undo_flags; 1726 } 1727 } else { 1728 /* The application is not using an ABI, which requires the 1729 * slave interface to be open. 1730 */ 1731 if (!(slave_dev->flags & IFF_UP)) { 1732 printk(KERN_ERR DRV_NAME 1733 ": Error: %s is not running\n", 1734 slave_dev->name); 1735 res = -EINVAL; 1736 goto err_undo_flags; 1737 } 1738 1739 if ((bond->params.mode == BOND_MODE_8023AD) || 1740 (bond->params.mode == BOND_MODE_TLB) || 1741 (bond->params.mode == BOND_MODE_ALB)) { 1742 printk(KERN_ERR DRV_NAME 1743 ": Error: to use %s mode, you must upgrade " 1744 "ifenslave.\n", 1745 bond_mode_name(bond->params.mode)); 1746 res = -EOPNOTSUPP; 1747 goto err_undo_flags; 1748 } 1749 } 1750 1751 new_slave = kmalloc(sizeof(struct slave), GFP_KERNEL); 1752 if (!new_slave) { 1753 res = -ENOMEM; 1754 goto err_undo_flags; 1755 } 1756 1757 memset(new_slave, 0, sizeof(struct slave)); 1758 1759 /* save slave's original flags before calling 1760 * netdev_set_master and dev_open 1761 */ 1762 new_slave->original_flags = slave_dev->flags; 1763 1764 if (app_abi_ver >= 1) { 1765 /* save slave's original ("permanent") mac address for 1766 * modes that needs it, and for restoring it upon release, 1767 * and then set it to the master's address 1768 */ 1769 memcpy(new_slave->perm_hwaddr, slave_dev->dev_addr, ETH_ALEN); 1770 1771 /* set slave to master's mac address 1772 * The application already set the master's 1773 * mac address to that of the first slave 1774 */ 1775 memcpy(addr.sa_data, bond_dev->dev_addr, bond_dev->addr_len); 1776 addr.sa_family = slave_dev->type; 1777 res = dev_set_mac_address(slave_dev, &addr); 1778 if (res) { 1779 dprintk("Error %d calling set_mac_address\n", res); 1780 goto err_free; 1781 } 1782 1783 /* open the slave since the application closed it */ 1784 res = dev_open(slave_dev); 1785 if (res) { 1786 dprintk("Openning slave %s failed\n", slave_dev->name); 1787 goto err_restore_mac; 1788 } 1789 } 1790 1791 res = netdev_set_master(slave_dev, bond_dev); 1792 if (res) { 1793 dprintk("Error %d calling netdev_set_master\n", res); 1794 if (app_abi_ver < 1) { 1795 goto err_free; 1796 } else { 1797 goto err_close; 1798 } 1799 } 1800 1801 new_slave->dev = slave_dev; 1802 1803 if ((bond->params.mode == BOND_MODE_TLB) || 1804 (bond->params.mode == BOND_MODE_ALB)) { 1805 /* bond_alb_init_slave() must be called before all other stages since 1806 * it might fail and we do not want to have to undo everything 1807 */ 1808 res = bond_alb_init_slave(bond, new_slave); 1809 if (res) { 1810 goto err_unset_master; 1811 } 1812 } 1813 1814 /* If the mode USES_PRIMARY, then the new slave gets the 1815 * master's promisc (and mc) settings only if it becomes the 1816 * curr_active_slave, and that is taken care of later when calling 1817 * bond_change_active() 1818 */ 1819 if (!USES_PRIMARY(bond->params.mode)) { 1820 /* set promiscuity level to new slave */ 1821 if (bond_dev->flags & IFF_PROMISC) { 1822 dev_set_promiscuity(slave_dev, 1); 1823 } 1824 1825 /* set allmulti level to new slave */ 1826 if (bond_dev->flags & IFF_ALLMULTI) { 1827 dev_set_allmulti(slave_dev, 1); 1828 } 1829 1830 /* upload master's mc_list to new slave */ 1831 for (dmi = bond_dev->mc_list; dmi; dmi = dmi->next) { 1832 dev_mc_add (slave_dev, dmi->dmi_addr, dmi->dmi_addrlen, 0); 1833 } 1834 } 1835 1836 if (bond->params.mode == BOND_MODE_8023AD) { 1837 /* add lacpdu mc addr to mc list */ 1838 u8 lacpdu_multicast[ETH_ALEN] = MULTICAST_LACPDU_ADDR; 1839 1840 dev_mc_add(slave_dev, lacpdu_multicast, ETH_ALEN, 0); 1841 } 1842 1843 bond_add_vlans_on_slave(bond, slave_dev); 1844 1845 write_lock_bh(&bond->lock); 1846 1847 bond_attach_slave(bond, new_slave); 1848 1849 new_slave->delay = 0; 1850 new_slave->link_failure_count = 0; 1851 1852 bond_compute_features(bond); 1853 1854 if (bond->params.miimon && !bond->params.use_carrier) { 1855 link_reporting = bond_check_dev_link(bond, slave_dev, 1); 1856 1857 if ((link_reporting == -1) && !bond->params.arp_interval) { 1858 /* 1859 * miimon is set but a bonded network driver 1860 * does not support ETHTOOL/MII and 1861 * arp_interval is not set. Note: if 1862 * use_carrier is enabled, we will never go 1863 * here (because netif_carrier is always 1864 * supported); thus, we don't need to change 1865 * the messages for netif_carrier. 1866 */ 1867 printk(KERN_WARNING DRV_NAME 1868 ": Warning: MII and ETHTOOL support not " 1869 "available for interface %s, and " 1870 "arp_interval/arp_ip_target module parameters " 1871 "not specified, thus bonding will not detect " 1872 "link failures! see bonding.txt for details.\n", 1873 slave_dev->name); 1874 } else if (link_reporting == -1) { 1875 /* unable get link status using mii/ethtool */ 1876 printk(KERN_WARNING DRV_NAME 1877 ": Warning: can't get link status from " 1878 "interface %s; the network driver associated " 1879 "with this interface does not support MII or " 1880 "ETHTOOL link status reporting, thus miimon " 1881 "has no effect on this interface.\n", 1882 slave_dev->name); 1883 } 1884 } 1885 1886 /* check for initial state */ 1887 if (!bond->params.miimon || 1888 (bond_check_dev_link(bond, slave_dev, 0) == BMSR_LSTATUS)) { 1889 if (bond->params.updelay) { 1890 dprintk("Initial state of slave_dev is " 1891 "BOND_LINK_BACK\n"); 1892 new_slave->link = BOND_LINK_BACK; 1893 new_slave->delay = bond->params.updelay; 1894 } else { 1895 dprintk("Initial state of slave_dev is " 1896 "BOND_LINK_UP\n"); 1897 new_slave->link = BOND_LINK_UP; 1898 } 1899 new_slave->jiffies = jiffies; 1900 } else { 1901 dprintk("Initial state of slave_dev is " 1902 "BOND_LINK_DOWN\n"); 1903 new_slave->link = BOND_LINK_DOWN; 1904 } 1905 1906 if (bond_update_speed_duplex(new_slave) && 1907 (new_slave->link != BOND_LINK_DOWN)) { 1908 printk(KERN_WARNING DRV_NAME 1909 ": Warning: failed to get speed and duplex from %s, " 1910 "assumed to be 100Mb/sec and Full.\n", 1911 new_slave->dev->name); 1912 1913 if (bond->params.mode == BOND_MODE_8023AD) { 1914 printk(KERN_WARNING 1915 "Operation of 802.3ad mode requires ETHTOOL " 1916 "support in base driver for proper aggregator " 1917 "selection.\n"); 1918 } 1919 } 1920 1921 if (USES_PRIMARY(bond->params.mode) && bond->params.primary[0]) { 1922 /* if there is a primary slave, remember it */ 1923 if (strcmp(bond->params.primary, new_slave->dev->name) == 0) { 1924 bond->primary_slave = new_slave; 1925 } 1926 } 1927 1928 switch (bond->params.mode) { 1929 case BOND_MODE_ACTIVEBACKUP: 1930 /* if we're in active-backup mode, we need one and only one active 1931 * interface. The backup interfaces will have their NOARP flag set 1932 * because we need them to be completely deaf and not to respond to 1933 * any ARP request on the network to avoid fooling a switch. Thus, 1934 * since we guarantee that curr_active_slave always point to the last 1935 * usable interface, we just have to verify this interface's flag. 1936 */ 1937 if (((!bond->curr_active_slave) || 1938 (bond->curr_active_slave->dev->flags & IFF_NOARP)) && 1939 (new_slave->link != BOND_LINK_DOWN)) { 1940 dprintk("This is the first active slave\n"); 1941 /* first slave or no active slave yet, and this link 1942 is OK, so make this interface the active one */ 1943 bond_change_active_slave(bond, new_slave); 1944 } else { 1945 dprintk("This is just a backup slave\n"); 1946 bond_set_slave_inactive_flags(new_slave); 1947 } 1948 break; 1949 case BOND_MODE_8023AD: 1950 /* in 802.3ad mode, the internal mechanism 1951 * will activate the slaves in the selected 1952 * aggregator 1953 */ 1954 bond_set_slave_inactive_flags(new_slave); 1955 /* if this is the first slave */ 1956 if (bond->slave_cnt == 1) { 1957 SLAVE_AD_INFO(new_slave).id = 1; 1958 /* Initialize AD with the number of times that the AD timer is called in 1 second 1959 * can be called only after the mac address of the bond is set 1960 */ 1961 bond_3ad_initialize(bond, 1000/AD_TIMER_INTERVAL, 1962 bond->params.lacp_fast); 1963 } else { 1964 SLAVE_AD_INFO(new_slave).id = 1965 SLAVE_AD_INFO(new_slave->prev).id + 1; 1966 } 1967 1968 bond_3ad_bind_slave(new_slave); 1969 break; 1970 case BOND_MODE_TLB: 1971 case BOND_MODE_ALB: 1972 new_slave->state = BOND_STATE_ACTIVE; 1973 if ((!bond->curr_active_slave) && 1974 (new_slave->link != BOND_LINK_DOWN)) { 1975 /* first slave or no active slave yet, and this link 1976 * is OK, so make this interface the active one 1977 */ 1978 bond_change_active_slave(bond, new_slave); 1979 } 1980 break; 1981 default: 1982 dprintk("This slave is always active in trunk mode\n"); 1983 1984 /* always active in trunk mode */ 1985 new_slave->state = BOND_STATE_ACTIVE; 1986 1987 /* In trunking mode there is little meaning to curr_active_slave 1988 * anyway (it holds no special properties of the bond device), 1989 * so we can change it without calling change_active_interface() 1990 */ 1991 if (!bond->curr_active_slave) { 1992 bond->curr_active_slave = new_slave; 1993 } 1994 break; 1995 } /* switch(bond_mode) */ 1996 1997 write_unlock_bh(&bond->lock); 1998 1999 if (app_abi_ver < 1) { 2000 /* 2001 * !!! This is to support old versions of ifenslave. 2002 * We can remove this in 2.5 because our ifenslave takes 2003 * care of this for us. 2004 * We check to see if the master has a mac address yet. 2005 * If not, we'll give it the mac address of our slave device. 2006 */ 2007 int ndx = 0; 2008 2009 for (ndx = 0; ndx < bond_dev->addr_len; ndx++) { 2010 dprintk("Checking ndx=%d of bond_dev->dev_addr\n", 2011 ndx); 2012 if (bond_dev->dev_addr[ndx] != 0) { 2013 dprintk("Found non-zero byte at ndx=%d\n", 2014 ndx); 2015 break; 2016 } 2017 } 2018 2019 if (ndx == bond_dev->addr_len) { 2020 /* 2021 * We got all the way through the address and it was 2022 * all 0's. 2023 */ 2024 dprintk("%s doesn't have a MAC address yet. \n", 2025 bond_dev->name); 2026 dprintk("Going to give assign it from %s.\n", 2027 slave_dev->name); 2028 bond_sethwaddr(bond_dev, slave_dev); 2029 } 2030 } 2031 2032 printk(KERN_INFO DRV_NAME 2033 ": %s: enslaving %s as a%s interface with a%s link.\n", 2034 bond_dev->name, slave_dev->name, 2035 new_slave->state == BOND_STATE_ACTIVE ? "n active" : " backup", 2036 new_slave->link != BOND_LINK_DOWN ? "n up" : " down"); 2037 2038 /* enslave is successful */ 2039 return 0; 2040 2041 /* Undo stages on error */ 2042 err_unset_master: 2043 netdev_set_master(slave_dev, NULL); 2044 2045 err_close: 2046 dev_close(slave_dev); 2047 2048 err_restore_mac: 2049 memcpy(addr.sa_data, new_slave->perm_hwaddr, ETH_ALEN); 2050 addr.sa_family = slave_dev->type; 2051 dev_set_mac_address(slave_dev, &addr); 2052 2053 err_free: 2054 kfree(new_slave); 2055 2056 err_undo_flags: 2057 bond_dev->features = old_features; 2058 2059 return res; 2060 } 2061 2062 /* 2063 * Try to release the slave device <slave> from the bond device <master> 2064 * It is legal to access curr_active_slave without a lock because all the function 2065 * is write-locked. 2066 * 2067 * The rules for slave state should be: 2068 * for Active/Backup: 2069 * Active stays on all backups go down 2070 * for Bonded connections: 2071 * The first up interface should be left on and all others downed. 2072 */ 2073 static int bond_release(struct net_device *bond_dev, struct net_device *slave_dev) 2074 { 2075 struct bonding *bond = bond_dev->priv; 2076 struct slave *slave, *oldcurrent; 2077 struct sockaddr addr; 2078 int mac_addr_differ; 2079 2080 /* slave is not a slave or master is not master of this slave */ 2081 if (!(slave_dev->flags & IFF_SLAVE) || 2082 (slave_dev->master != bond_dev)) { 2083 printk(KERN_ERR DRV_NAME 2084 ": Error: %s: cannot release %s.\n", 2085 bond_dev->name, slave_dev->name); 2086 return -EINVAL; 2087 } 2088 2089 write_lock_bh(&bond->lock); 2090 2091 slave = bond_get_slave_by_dev(bond, slave_dev); 2092 if (!slave) { 2093 /* not a slave of this bond */ 2094 printk(KERN_INFO DRV_NAME 2095 ": %s: %s not enslaved\n", 2096 bond_dev->name, slave_dev->name); 2097 return -EINVAL; 2098 } 2099 2100 mac_addr_differ = memcmp(bond_dev->dev_addr, 2101 slave->perm_hwaddr, 2102 ETH_ALEN); 2103 if (!mac_addr_differ && (bond->slave_cnt > 1)) { 2104 printk(KERN_WARNING DRV_NAME 2105 ": Warning: the permanent HWaddr of %s " 2106 "- %02X:%02X:%02X:%02X:%02X:%02X - is " 2107 "still in use by %s. Set the HWaddr of " 2108 "%s to a different address to avoid " 2109 "conflicts.\n", 2110 slave_dev->name, 2111 slave->perm_hwaddr[0], 2112 slave->perm_hwaddr[1], 2113 slave->perm_hwaddr[2], 2114 slave->perm_hwaddr[3], 2115 slave->perm_hwaddr[4], 2116 slave->perm_hwaddr[5], 2117 bond_dev->name, 2118 slave_dev->name); 2119 } 2120 2121 /* Inform AD package of unbinding of slave. */ 2122 if (bond->params.mode == BOND_MODE_8023AD) { 2123 /* must be called before the slave is 2124 * detached from the list 2125 */ 2126 bond_3ad_unbind_slave(slave); 2127 } 2128 2129 printk(KERN_INFO DRV_NAME 2130 ": %s: releasing %s interface %s\n", 2131 bond_dev->name, 2132 (slave->state == BOND_STATE_ACTIVE) 2133 ? "active" : "backup", 2134 slave_dev->name); 2135 2136 oldcurrent = bond->curr_active_slave; 2137 2138 bond->current_arp_slave = NULL; 2139 2140 /* release the slave from its bond */ 2141 bond_detach_slave(bond, slave); 2142 2143 bond_compute_features(bond); 2144 2145 if (bond->primary_slave == slave) { 2146 bond->primary_slave = NULL; 2147 } 2148 2149 if (oldcurrent == slave) { 2150 bond_change_active_slave(bond, NULL); 2151 } 2152 2153 if ((bond->params.mode == BOND_MODE_TLB) || 2154 (bond->params.mode == BOND_MODE_ALB)) { 2155 /* Must be called only after the slave has been 2156 * detached from the list and the curr_active_slave 2157 * has been cleared (if our_slave == old_current), 2158 * but before a new active slave is selected. 2159 */ 2160 bond_alb_deinit_slave(bond, slave); 2161 } 2162 2163 if (oldcurrent == slave) { 2164 bond_select_active_slave(bond); 2165 2166 if (!bond->curr_active_slave) { 2167 printk(KERN_INFO DRV_NAME 2168 ": %s: now running without any active " 2169 "interface !\n", 2170 bond_dev->name); 2171 } 2172 } 2173 2174 if (bond->slave_cnt == 0) { 2175 /* if the last slave was removed, zero the mac address 2176 * of the master so it will be set by the application 2177 * to the mac address of the first slave 2178 */ 2179 memset(bond_dev->dev_addr, 0, bond_dev->addr_len); 2180 2181 if (list_empty(&bond->vlan_list)) { 2182 bond_dev->features |= NETIF_F_VLAN_CHALLENGED; 2183 } else { 2184 printk(KERN_WARNING DRV_NAME 2185 ": Warning: clearing HW address of %s while it " 2186 "still has VLANs.\n", 2187 bond_dev->name); 2188 printk(KERN_WARNING DRV_NAME 2189 ": When re-adding slaves, make sure the bond's " 2190 "HW address matches its VLANs'.\n"); 2191 } 2192 } else if ((bond_dev->features & NETIF_F_VLAN_CHALLENGED) && 2193 !bond_has_challenged_slaves(bond)) { 2194 printk(KERN_INFO DRV_NAME 2195 ": last VLAN challenged slave %s " 2196 "left bond %s. VLAN blocking is removed\n", 2197 slave_dev->name, bond_dev->name); 2198 bond_dev->features &= ~NETIF_F_VLAN_CHALLENGED; 2199 } 2200 2201 write_unlock_bh(&bond->lock); 2202 2203 bond_del_vlans_from_slave(bond, slave_dev); 2204 2205 /* If the mode USES_PRIMARY, then we should only remove its 2206 * promisc and mc settings if it was the curr_active_slave, but that was 2207 * already taken care of above when we detached the slave 2208 */ 2209 if (!USES_PRIMARY(bond->params.mode)) { 2210 /* unset promiscuity level from slave */ 2211 if (bond_dev->flags & IFF_PROMISC) { 2212 dev_set_promiscuity(slave_dev, -1); 2213 } 2214 2215 /* unset allmulti level from slave */ 2216 if (bond_dev->flags & IFF_ALLMULTI) { 2217 dev_set_allmulti(slave_dev, -1); 2218 } 2219 2220 /* flush master's mc_list from slave */ 2221 bond_mc_list_flush(bond_dev, slave_dev); 2222 } 2223 2224 netdev_set_master(slave_dev, NULL); 2225 2226 /* close slave before restoring its mac address */ 2227 dev_close(slave_dev); 2228 2229 if (app_abi_ver >= 1) { 2230 /* restore original ("permanent") mac address */ 2231 memcpy(addr.sa_data, slave->perm_hwaddr, ETH_ALEN); 2232 addr.sa_family = slave_dev->type; 2233 dev_set_mac_address(slave_dev, &addr); 2234 } 2235 2236 /* restore the original state of the 2237 * IFF_NOARP flag that might have been 2238 * set by bond_set_slave_inactive_flags() 2239 */ 2240 if ((slave->original_flags & IFF_NOARP) == 0) { 2241 slave_dev->flags &= ~IFF_NOARP; 2242 } 2243 2244 kfree(slave); 2245 2246 return 0; /* deletion OK */ 2247 } 2248 2249 /* 2250 * This function releases all slaves. 2251 */ 2252 static int bond_release_all(struct net_device *bond_dev) 2253 { 2254 struct bonding *bond = bond_dev->priv; 2255 struct slave *slave; 2256 struct net_device *slave_dev; 2257 struct sockaddr addr; 2258 2259 write_lock_bh(&bond->lock); 2260 2261 if (bond->slave_cnt == 0) { 2262 goto out; 2263 } 2264 2265 bond->current_arp_slave = NULL; 2266 bond->primary_slave = NULL; 2267 bond_change_active_slave(bond, NULL); 2268 2269 while ((slave = bond->first_slave) != NULL) { 2270 /* Inform AD package of unbinding of slave 2271 * before slave is detached from the list. 2272 */ 2273 if (bond->params.mode == BOND_MODE_8023AD) { 2274 bond_3ad_unbind_slave(slave); 2275 } 2276 2277 slave_dev = slave->dev; 2278 bond_detach_slave(bond, slave); 2279 2280 if ((bond->params.mode == BOND_MODE_TLB) || 2281 (bond->params.mode == BOND_MODE_ALB)) { 2282 /* must be called only after the slave 2283 * has been detached from the list 2284 */ 2285 bond_alb_deinit_slave(bond, slave); 2286 } 2287 2288 bond_compute_features(bond); 2289 2290 /* now that the slave is detached, unlock and perform 2291 * all the undo steps that should not be called from 2292 * within a lock. 2293 */ 2294 write_unlock_bh(&bond->lock); 2295 2296 bond_del_vlans_from_slave(bond, slave_dev); 2297 2298 /* If the mode USES_PRIMARY, then we should only remove its 2299 * promisc and mc settings if it was the curr_active_slave, but that was 2300 * already taken care of above when we detached the slave 2301 */ 2302 if (!USES_PRIMARY(bond->params.mode)) { 2303 /* unset promiscuity level from slave */ 2304 if (bond_dev->flags & IFF_PROMISC) { 2305 dev_set_promiscuity(slave_dev, -1); 2306 } 2307 2308 /* unset allmulti level from slave */ 2309 if (bond_dev->flags & IFF_ALLMULTI) { 2310 dev_set_allmulti(slave_dev, -1); 2311 } 2312 2313 /* flush master's mc_list from slave */ 2314 bond_mc_list_flush(bond_dev, slave_dev); 2315 } 2316 2317 netdev_set_master(slave_dev, NULL); 2318 2319 /* close slave before restoring its mac address */ 2320 dev_close(slave_dev); 2321 2322 if (app_abi_ver >= 1) { 2323 /* restore original ("permanent") mac address*/ 2324 memcpy(addr.sa_data, slave->perm_hwaddr, ETH_ALEN); 2325 addr.sa_family = slave_dev->type; 2326 dev_set_mac_address(slave_dev, &addr); 2327 } 2328 2329 /* restore the original state of the IFF_NOARP flag that might have 2330 * been set by bond_set_slave_inactive_flags() 2331 */ 2332 if ((slave->original_flags & IFF_NOARP) == 0) { 2333 slave_dev->flags &= ~IFF_NOARP; 2334 } 2335 2336 kfree(slave); 2337 2338 /* re-acquire the lock before getting the next slave */ 2339 write_lock_bh(&bond->lock); 2340 } 2341 2342 /* zero the mac address of the master so it will be 2343 * set by the application to the mac address of the 2344 * first slave 2345 */ 2346 memset(bond_dev->dev_addr, 0, bond_dev->addr_len); 2347 2348 if (list_empty(&bond->vlan_list)) { 2349 bond_dev->features |= NETIF_F_VLAN_CHALLENGED; 2350 } else { 2351 printk(KERN_WARNING DRV_NAME 2352 ": Warning: clearing HW address of %s while it " 2353 "still has VLANs.\n", 2354 bond_dev->name); 2355 printk(KERN_WARNING DRV_NAME 2356 ": When re-adding slaves, make sure the bond's " 2357 "HW address matches its VLANs'.\n"); 2358 } 2359 2360 printk(KERN_INFO DRV_NAME 2361 ": %s: released all slaves\n", 2362 bond_dev->name); 2363 2364 out: 2365 write_unlock_bh(&bond->lock); 2366 2367 return 0; 2368 } 2369 2370 /* 2371 * This function changes the active slave to slave <slave_dev>. 2372 * It returns -EINVAL in the following cases. 2373 * - <slave_dev> is not found in the list. 2374 * - There is not active slave now. 2375 * - <slave_dev> is already active. 2376 * - The link state of <slave_dev> is not BOND_LINK_UP. 2377 * - <slave_dev> is not running. 2378 * In these cases, this fuction does nothing. 2379 * In the other cases, currnt_slave pointer is changed and 0 is returned. 2380 */ 2381 static int bond_ioctl_change_active(struct net_device *bond_dev, struct net_device *slave_dev) 2382 { 2383 struct bonding *bond = bond_dev->priv; 2384 struct slave *old_active = NULL; 2385 struct slave *new_active = NULL; 2386 int res = 0; 2387 2388 if (!USES_PRIMARY(bond->params.mode)) { 2389 return -EINVAL; 2390 } 2391 2392 /* Verify that master_dev is indeed the master of slave_dev */ 2393 if (!(slave_dev->flags & IFF_SLAVE) || 2394 (slave_dev->master != bond_dev)) { 2395 return -EINVAL; 2396 } 2397 2398 write_lock_bh(&bond->lock); 2399 2400 old_active = bond->curr_active_slave; 2401 new_active = bond_get_slave_by_dev(bond, slave_dev); 2402 2403 /* 2404 * Changing to the current active: do nothing; return success. 2405 */ 2406 if (new_active && (new_active == old_active)) { 2407 write_unlock_bh(&bond->lock); 2408 return 0; 2409 } 2410 2411 if ((new_active) && 2412 (old_active) && 2413 (new_active->link == BOND_LINK_UP) && 2414 IS_UP(new_active->dev)) { 2415 bond_change_active_slave(bond, new_active); 2416 } else { 2417 res = -EINVAL; 2418 } 2419 2420 write_unlock_bh(&bond->lock); 2421 2422 return res; 2423 } 2424 2425 static int bond_ethtool_ioctl(struct net_device *bond_dev, struct ifreq *ifr) 2426 { 2427 struct ethtool_drvinfo info; 2428 void __user *addr = ifr->ifr_data; 2429 uint32_t cmd; 2430 2431 if (get_user(cmd, (uint32_t __user *)addr)) { 2432 return -EFAULT; 2433 } 2434 2435 switch (cmd) { 2436 case ETHTOOL_GDRVINFO: 2437 if (copy_from_user(&info, addr, sizeof(info))) { 2438 return -EFAULT; 2439 } 2440 2441 if (strcmp(info.driver, "ifenslave") == 0) { 2442 int new_abi_ver; 2443 char *endptr; 2444 2445 new_abi_ver = simple_strtoul(info.fw_version, 2446 &endptr, 0); 2447 if (*endptr) { 2448 printk(KERN_ERR DRV_NAME 2449 ": Error: got invalid ABI " 2450 "version from application\n"); 2451 2452 return -EINVAL; 2453 } 2454 2455 if (orig_app_abi_ver == -1) { 2456 orig_app_abi_ver = new_abi_ver; 2457 } 2458 2459 app_abi_ver = new_abi_ver; 2460 } 2461 2462 strncpy(info.driver, DRV_NAME, 32); 2463 strncpy(info.version, DRV_VERSION, 32); 2464 snprintf(info.fw_version, 32, "%d", BOND_ABI_VERSION); 2465 2466 if (copy_to_user(addr, &info, sizeof(info))) { 2467 return -EFAULT; 2468 } 2469 2470 return 0; 2471 default: 2472 return -EOPNOTSUPP; 2473 } 2474 } 2475 2476 static int bond_info_query(struct net_device *bond_dev, struct ifbond *info) 2477 { 2478 struct bonding *bond = bond_dev->priv; 2479 2480 info->bond_mode = bond->params.mode; 2481 info->miimon = bond->params.miimon; 2482 2483 read_lock_bh(&bond->lock); 2484 info->num_slaves = bond->slave_cnt; 2485 read_unlock_bh(&bond->lock); 2486 2487 return 0; 2488 } 2489 2490 static int bond_slave_info_query(struct net_device *bond_dev, struct ifslave *info) 2491 { 2492 struct bonding *bond = bond_dev->priv; 2493 struct slave *slave; 2494 int i, found = 0; 2495 2496 if (info->slave_id < 0) { 2497 return -ENODEV; 2498 } 2499 2500 read_lock_bh(&bond->lock); 2501 2502 bond_for_each_slave(bond, slave, i) { 2503 if (i == (int)info->slave_id) { 2504 found = 1; 2505 break; 2506 } 2507 } 2508 2509 read_unlock_bh(&bond->lock); 2510 2511 if (found) { 2512 strcpy(info->slave_name, slave->dev->name); 2513 info->link = slave->link; 2514 info->state = slave->state; 2515 info->link_failure_count = slave->link_failure_count; 2516 } else { 2517 return -ENODEV; 2518 } 2519 2520 return 0; 2521 } 2522 2523 /*-------------------------------- Monitoring -------------------------------*/ 2524 2525 /* this function is called regularly to monitor each slave's link. */ 2526 static void bond_mii_monitor(struct net_device *bond_dev) 2527 { 2528 struct bonding *bond = bond_dev->priv; 2529 struct slave *slave, *oldcurrent; 2530 int do_failover = 0; 2531 int delta_in_ticks; 2532 int i; 2533 2534 read_lock(&bond->lock); 2535 2536 delta_in_ticks = (bond->params.miimon * HZ) / 1000; 2537 2538 if (bond->kill_timers) { 2539 goto out; 2540 } 2541 2542 if (bond->slave_cnt == 0) { 2543 goto re_arm; 2544 } 2545 2546 /* we will try to read the link status of each of our slaves, and 2547 * set their IFF_RUNNING flag appropriately. For each slave not 2548 * supporting MII status, we won't do anything so that a user-space 2549 * program could monitor the link itself if needed. 2550 */ 2551 2552 read_lock(&bond->curr_slave_lock); 2553 oldcurrent = bond->curr_active_slave; 2554 read_unlock(&bond->curr_slave_lock); 2555 2556 bond_for_each_slave(bond, slave, i) { 2557 struct net_device *slave_dev = slave->dev; 2558 int link_state; 2559 u16 old_speed = slave->speed; 2560 u8 old_duplex = slave->duplex; 2561 2562 link_state = bond_check_dev_link(bond, slave_dev, 0); 2563 2564 switch (slave->link) { 2565 case BOND_LINK_UP: /* the link was up */ 2566 if (link_state == BMSR_LSTATUS) { 2567 /* link stays up, nothing more to do */ 2568 break; 2569 } else { /* link going down */ 2570 slave->link = BOND_LINK_FAIL; 2571 slave->delay = bond->params.downdelay; 2572 2573 if (slave->link_failure_count < UINT_MAX) { 2574 slave->link_failure_count++; 2575 } 2576 2577 if (bond->params.downdelay) { 2578 printk(KERN_INFO DRV_NAME 2579 ": %s: link status down for %s " 2580 "interface %s, disabling it in " 2581 "%d ms.\n", 2582 bond_dev->name, 2583 IS_UP(slave_dev) 2584 ? ((bond->params.mode == BOND_MODE_ACTIVEBACKUP) 2585 ? ((slave == oldcurrent) 2586 ? "active " : "backup ") 2587 : "") 2588 : "idle ", 2589 slave_dev->name, 2590 bond->params.downdelay * bond->params.miimon); 2591 } 2592 } 2593 /* no break ! fall through the BOND_LINK_FAIL test to 2594 ensure proper action to be taken 2595 */ 2596 case BOND_LINK_FAIL: /* the link has just gone down */ 2597 if (link_state != BMSR_LSTATUS) { 2598 /* link stays down */ 2599 if (slave->delay <= 0) { 2600 /* link down for too long time */ 2601 slave->link = BOND_LINK_DOWN; 2602 2603 /* in active/backup mode, we must 2604 * completely disable this interface 2605 */ 2606 if ((bond->params.mode == BOND_MODE_ACTIVEBACKUP) || 2607 (bond->params.mode == BOND_MODE_8023AD)) { 2608 bond_set_slave_inactive_flags(slave); 2609 } 2610 2611 printk(KERN_INFO DRV_NAME 2612 ": %s: link status definitely " 2613 "down for interface %s, " 2614 "disabling it\n", 2615 bond_dev->name, 2616 slave_dev->name); 2617 2618 /* notify ad that the link status has changed */ 2619 if (bond->params.mode == BOND_MODE_8023AD) { 2620 bond_3ad_handle_link_change(slave, BOND_LINK_DOWN); 2621 } 2622 2623 if ((bond->params.mode == BOND_MODE_TLB) || 2624 (bond->params.mode == BOND_MODE_ALB)) { 2625 bond_alb_handle_link_change(bond, slave, BOND_LINK_DOWN); 2626 } 2627 2628 if (slave == oldcurrent) { 2629 do_failover = 1; 2630 } 2631 } else { 2632 slave->delay--; 2633 } 2634 } else { 2635 /* link up again */ 2636 slave->link = BOND_LINK_UP; 2637 slave->jiffies = jiffies; 2638 printk(KERN_INFO DRV_NAME 2639 ": %s: link status up again after %d " 2640 "ms for interface %s.\n", 2641 bond_dev->name, 2642 (bond->params.downdelay - slave->delay) * bond->params.miimon, 2643 slave_dev->name); 2644 } 2645 break; 2646 case BOND_LINK_DOWN: /* the link was down */ 2647 if (link_state != BMSR_LSTATUS) { 2648 /* the link stays down, nothing more to do */ 2649 break; 2650 } else { /* link going up */ 2651 slave->link = BOND_LINK_BACK; 2652 slave->delay = bond->params.updelay; 2653 2654 if (bond->params.updelay) { 2655 /* if updelay == 0, no need to 2656 advertise about a 0 ms delay */ 2657 printk(KERN_INFO DRV_NAME 2658 ": %s: link status up for " 2659 "interface %s, enabling it " 2660 "in %d ms.\n", 2661 bond_dev->name, 2662 slave_dev->name, 2663 bond->params.updelay * bond->params.miimon); 2664 } 2665 } 2666 /* no break ! fall through the BOND_LINK_BACK state in 2667 case there's something to do. 2668 */ 2669 case BOND_LINK_BACK: /* the link has just come back */ 2670 if (link_state != BMSR_LSTATUS) { 2671 /* link down again */ 2672 slave->link = BOND_LINK_DOWN; 2673 2674 printk(KERN_INFO DRV_NAME 2675 ": %s: link status down again after %d " 2676 "ms for interface %s.\n", 2677 bond_dev->name, 2678 (bond->params.updelay - slave->delay) * bond->params.miimon, 2679 slave_dev->name); 2680 } else { 2681 /* link stays up */ 2682 if (slave->delay == 0) { 2683 /* now the link has been up for long time enough */ 2684 slave->link = BOND_LINK_UP; 2685 slave->jiffies = jiffies; 2686 2687 if (bond->params.mode == BOND_MODE_8023AD) { 2688 /* prevent it from being the active one */ 2689 slave->state = BOND_STATE_BACKUP; 2690 } else if (bond->params.mode != BOND_MODE_ACTIVEBACKUP) { 2691 /* make it immediately active */ 2692 slave->state = BOND_STATE_ACTIVE; 2693 } else if (slave != bond->primary_slave) { 2694 /* prevent it from being the active one */ 2695 slave->state = BOND_STATE_BACKUP; 2696 } 2697 2698 printk(KERN_INFO DRV_NAME 2699 ": %s: link status definitely " 2700 "up for interface %s.\n", 2701 bond_dev->name, 2702 slave_dev->name); 2703 2704 /* notify ad that the link status has changed */ 2705 if (bond->params.mode == BOND_MODE_8023AD) { 2706 bond_3ad_handle_link_change(slave, BOND_LINK_UP); 2707 } 2708 2709 if ((bond->params.mode == BOND_MODE_TLB) || 2710 (bond->params.mode == BOND_MODE_ALB)) { 2711 bond_alb_handle_link_change(bond, slave, BOND_LINK_UP); 2712 } 2713 2714 if ((!oldcurrent) || 2715 (slave == bond->primary_slave)) { 2716 do_failover = 1; 2717 } 2718 } else { 2719 slave->delay--; 2720 } 2721 } 2722 break; 2723 default: 2724 /* Should not happen */ 2725 printk(KERN_ERR "bonding: Error: %s Illegal value (link=%d)\n", 2726 slave->dev->name, slave->link); 2727 goto out; 2728 } /* end of switch (slave->link) */ 2729 2730 bond_update_speed_duplex(slave); 2731 2732 if (bond->params.mode == BOND_MODE_8023AD) { 2733 if (old_speed != slave->speed) { 2734 bond_3ad_adapter_speed_changed(slave); 2735 } 2736 2737 if (old_duplex != slave->duplex) { 2738 bond_3ad_adapter_duplex_changed(slave); 2739 } 2740 } 2741 2742 } /* end of for */ 2743 2744 if (do_failover) { 2745 write_lock(&bond->curr_slave_lock); 2746 2747 bond_select_active_slave(bond); 2748 2749 if (oldcurrent && !bond->curr_active_slave) { 2750 printk(KERN_INFO DRV_NAME 2751 ": %s: now running without any active " 2752 "interface !\n", 2753 bond_dev->name); 2754 } 2755 2756 write_unlock(&bond->curr_slave_lock); 2757 } 2758 2759 re_arm: 2760 if (bond->params.miimon) { 2761 mod_timer(&bond->mii_timer, jiffies + delta_in_ticks); 2762 } 2763 out: 2764 read_unlock(&bond->lock); 2765 } 2766 2767 2768 static u32 bond_glean_dev_ip(struct net_device *dev) 2769 { 2770 struct in_device *idev; 2771 struct in_ifaddr *ifa; 2772 u32 addr = 0; 2773 2774 if (!dev) 2775 return 0; 2776 2777 rcu_read_lock(); 2778 idev = __in_dev_get(dev); 2779 if (!idev) 2780 goto out; 2781 2782 ifa = idev->ifa_list; 2783 if (!ifa) 2784 goto out; 2785 2786 addr = ifa->ifa_local; 2787 out: 2788 rcu_read_unlock(); 2789 return addr; 2790 } 2791 2792 static int bond_has_ip(struct bonding *bond) 2793 { 2794 struct vlan_entry *vlan, *vlan_next; 2795 2796 if (bond->master_ip) 2797 return 1; 2798 2799 if (list_empty(&bond->vlan_list)) 2800 return 0; 2801 2802 list_for_each_entry_safe(vlan, vlan_next, &bond->vlan_list, 2803 vlan_list) { 2804 if (vlan->vlan_ip) 2805 return 1; 2806 } 2807 2808 return 0; 2809 } 2810 2811 /* 2812 * We go to the (large) trouble of VLAN tagging ARP frames because 2813 * switches in VLAN mode (especially if ports are configured as 2814 * "native" to a VLAN) might not pass non-tagged frames. 2815 */ 2816 static void bond_arp_send(struct net_device *slave_dev, int arp_op, u32 dest_ip, u32 src_ip, unsigned short vlan_id) 2817 { 2818 struct sk_buff *skb; 2819 2820 dprintk("arp %d on slave %s: dst %x src %x vid %d\n", arp_op, 2821 slave_dev->name, dest_ip, src_ip, vlan_id); 2822 2823 skb = arp_create(arp_op, ETH_P_ARP, dest_ip, slave_dev, src_ip, 2824 NULL, slave_dev->dev_addr, NULL); 2825 2826 if (!skb) { 2827 printk(KERN_ERR DRV_NAME ": ARP packet allocation failed\n"); 2828 return; 2829 } 2830 if (vlan_id) { 2831 skb = vlan_put_tag(skb, vlan_id); 2832 if (!skb) { 2833 printk(KERN_ERR DRV_NAME ": failed to insert VLAN tag\n"); 2834 return; 2835 } 2836 } 2837 arp_xmit(skb); 2838 } 2839 2840 2841 static void bond_arp_send_all(struct bonding *bond, struct slave *slave) 2842 { 2843 int i, vlan_id, rv; 2844 u32 *targets = bond->params.arp_targets; 2845 struct vlan_entry *vlan, *vlan_next; 2846 struct net_device *vlan_dev; 2847 struct flowi fl; 2848 struct rtable *rt; 2849 2850 for (i = 0; (i < BOND_MAX_ARP_TARGETS) && targets[i]; i++) { 2851 dprintk("basa: target %x\n", targets[i]); 2852 if (list_empty(&bond->vlan_list)) { 2853 dprintk("basa: empty vlan: arp_send\n"); 2854 bond_arp_send(slave->dev, ARPOP_REQUEST, targets[i], 2855 bond->master_ip, 0); 2856 continue; 2857 } 2858 2859 /* 2860 * If VLANs are configured, we do a route lookup to 2861 * determine which VLAN interface would be used, so we 2862 * can tag the ARP with the proper VLAN tag. 2863 */ 2864 memset(&fl, 0, sizeof(fl)); 2865 fl.fl4_dst = targets[i]; 2866 fl.fl4_tos = RTO_ONLINK; 2867 2868 rv = ip_route_output_key(&rt, &fl); 2869 if (rv) { 2870 if (net_ratelimit()) { 2871 printk(KERN_WARNING DRV_NAME 2872 ": %s: no route to arp_ip_target %u.%u.%u.%u\n", 2873 bond->dev->name, NIPQUAD(fl.fl4_dst)); 2874 } 2875 continue; 2876 } 2877 2878 /* 2879 * This target is not on a VLAN 2880 */ 2881 if (rt->u.dst.dev == bond->dev) { 2882 dprintk("basa: rtdev == bond->dev: arp_send\n"); 2883 bond_arp_send(slave->dev, ARPOP_REQUEST, targets[i], 2884 bond->master_ip, 0); 2885 continue; 2886 } 2887 2888 vlan_id = 0; 2889 list_for_each_entry_safe(vlan, vlan_next, &bond->vlan_list, 2890 vlan_list) { 2891 vlan_dev = bond->vlgrp->vlan_devices[vlan->vlan_id]; 2892 if (vlan_dev == rt->u.dst.dev) { 2893 vlan_id = vlan->vlan_id; 2894 dprintk("basa: vlan match on %s %d\n", 2895 vlan_dev->name, vlan_id); 2896 break; 2897 } 2898 } 2899 2900 if (vlan_id) { 2901 bond_arp_send(slave->dev, ARPOP_REQUEST, targets[i], 2902 vlan->vlan_ip, vlan_id); 2903 continue; 2904 } 2905 2906 if (net_ratelimit()) { 2907 printk(KERN_WARNING DRV_NAME 2908 ": %s: no path to arp_ip_target %u.%u.%u.%u via rt.dev %s\n", 2909 bond->dev->name, NIPQUAD(fl.fl4_dst), 2910 rt->u.dst.dev ? rt->u.dst.dev->name : "NULL"); 2911 } 2912 } 2913 } 2914 2915 /* 2916 * Kick out a gratuitous ARP for an IP on the bonding master plus one 2917 * for each VLAN above us. 2918 */ 2919 static void bond_send_gratuitous_arp(struct bonding *bond) 2920 { 2921 struct slave *slave = bond->curr_active_slave; 2922 struct vlan_entry *vlan; 2923 struct net_device *vlan_dev; 2924 2925 dprintk("bond_send_grat_arp: bond %s slave %s\n", bond->dev->name, 2926 slave ? slave->dev->name : "NULL"); 2927 if (!slave) 2928 return; 2929 2930 if (bond->master_ip) { 2931 bond_arp_send(slave->dev, ARPOP_REPLY, bond->master_ip, 2932 bond->master_ip, 0); 2933 } 2934 2935 list_for_each_entry(vlan, &bond->vlan_list, vlan_list) { 2936 vlan_dev = bond->vlgrp->vlan_devices[vlan->vlan_id]; 2937 if (vlan->vlan_ip) { 2938 bond_arp_send(slave->dev, ARPOP_REPLY, vlan->vlan_ip, 2939 vlan->vlan_ip, vlan->vlan_id); 2940 } 2941 } 2942 } 2943 2944 /* 2945 * this function is called regularly to monitor each slave's link 2946 * ensuring that traffic is being sent and received when arp monitoring 2947 * is used in load-balancing mode. if the adapter has been dormant, then an 2948 * arp is transmitted to generate traffic. see activebackup_arp_monitor for 2949 * arp monitoring in active backup mode. 2950 */ 2951 static void bond_loadbalance_arp_mon(struct net_device *bond_dev) 2952 { 2953 struct bonding *bond = bond_dev->priv; 2954 struct slave *slave, *oldcurrent; 2955 int do_failover = 0; 2956 int delta_in_ticks; 2957 int i; 2958 2959 read_lock(&bond->lock); 2960 2961 delta_in_ticks = (bond->params.arp_interval * HZ) / 1000; 2962 2963 if (bond->kill_timers) { 2964 goto out; 2965 } 2966 2967 if (bond->slave_cnt == 0) { 2968 goto re_arm; 2969 } 2970 2971 read_lock(&bond->curr_slave_lock); 2972 oldcurrent = bond->curr_active_slave; 2973 read_unlock(&bond->curr_slave_lock); 2974 2975 /* see if any of the previous devices are up now (i.e. they have 2976 * xmt and rcv traffic). the curr_active_slave does not come into 2977 * the picture unless it is null. also, slave->jiffies is not needed 2978 * here because we send an arp on each slave and give a slave as 2979 * long as it needs to get the tx/rx within the delta. 2980 * TODO: what about up/down delay in arp mode? it wasn't here before 2981 * so it can wait 2982 */ 2983 bond_for_each_slave(bond, slave, i) { 2984 if (slave->link != BOND_LINK_UP) { 2985 if (((jiffies - slave->dev->trans_start) <= delta_in_ticks) && 2986 ((jiffies - slave->dev->last_rx) <= delta_in_ticks)) { 2987 2988 slave->link = BOND_LINK_UP; 2989 slave->state = BOND_STATE_ACTIVE; 2990 2991 /* primary_slave has no meaning in round-robin 2992 * mode. the window of a slave being up and 2993 * curr_active_slave being null after enslaving 2994 * is closed. 2995 */ 2996 if (!oldcurrent) { 2997 printk(KERN_INFO DRV_NAME 2998 ": %s: link status definitely " 2999 "up for interface %s, ", 3000 bond_dev->name, 3001 slave->dev->name); 3002 do_failover = 1; 3003 } else { 3004 printk(KERN_INFO DRV_NAME 3005 ": %s: interface %s is now up\n", 3006 bond_dev->name, 3007 slave->dev->name); 3008 } 3009 } 3010 } else { 3011 /* slave->link == BOND_LINK_UP */ 3012 3013 /* not all switches will respond to an arp request 3014 * when the source ip is 0, so don't take the link down 3015 * if we don't know our ip yet 3016 */ 3017 if (((jiffies - slave->dev->trans_start) >= (2*delta_in_ticks)) || 3018 (((jiffies - slave->dev->last_rx) >= (2*delta_in_ticks)) && 3019 bond_has_ip(bond))) { 3020 3021 slave->link = BOND_LINK_DOWN; 3022 slave->state = BOND_STATE_BACKUP; 3023 3024 if (slave->link_failure_count < UINT_MAX) { 3025 slave->link_failure_count++; 3026 } 3027 3028 printk(KERN_INFO DRV_NAME 3029 ": %s: interface %s is now down.\n", 3030 bond_dev->name, 3031 slave->dev->name); 3032 3033 if (slave == oldcurrent) { 3034 do_failover = 1; 3035 } 3036 } 3037 } 3038 3039 /* note: if switch is in round-robin mode, all links 3040 * must tx arp to ensure all links rx an arp - otherwise 3041 * links may oscillate or not come up at all; if switch is 3042 * in something like xor mode, there is nothing we can 3043 * do - all replies will be rx'ed on same link causing slaves 3044 * to be unstable during low/no traffic periods 3045 */ 3046 if (IS_UP(slave->dev)) { 3047 bond_arp_send_all(bond, slave); 3048 } 3049 } 3050 3051 if (do_failover) { 3052 write_lock(&bond->curr_slave_lock); 3053 3054 bond_select_active_slave(bond); 3055 3056 if (oldcurrent && !bond->curr_active_slave) { 3057 printk(KERN_INFO DRV_NAME 3058 ": %s: now running without any active " 3059 "interface !\n", 3060 bond_dev->name); 3061 } 3062 3063 write_unlock(&bond->curr_slave_lock); 3064 } 3065 3066 re_arm: 3067 if (bond->params.arp_interval) { 3068 mod_timer(&bond->arp_timer, jiffies + delta_in_ticks); 3069 } 3070 out: 3071 read_unlock(&bond->lock); 3072 } 3073 3074 /* 3075 * When using arp monitoring in active-backup mode, this function is 3076 * called to determine if any backup slaves have went down or a new 3077 * current slave needs to be found. 3078 * The backup slaves never generate traffic, they are considered up by merely 3079 * receiving traffic. If the current slave goes down, each backup slave will 3080 * be given the opportunity to tx/rx an arp before being taken down - this 3081 * prevents all slaves from being taken down due to the current slave not 3082 * sending any traffic for the backups to receive. The arps are not necessarily 3083 * necessary, any tx and rx traffic will keep the current slave up. While any 3084 * rx traffic will keep the backup slaves up, the current slave is responsible 3085 * for generating traffic to keep them up regardless of any other traffic they 3086 * may have received. 3087 * see loadbalance_arp_monitor for arp monitoring in load balancing mode 3088 */ 3089 static void bond_activebackup_arp_mon(struct net_device *bond_dev) 3090 { 3091 struct bonding *bond = bond_dev->priv; 3092 struct slave *slave; 3093 int delta_in_ticks; 3094 int i; 3095 3096 read_lock(&bond->lock); 3097 3098 delta_in_ticks = (bond->params.arp_interval * HZ) / 1000; 3099 3100 if (bond->kill_timers) { 3101 goto out; 3102 } 3103 3104 if (bond->slave_cnt == 0) { 3105 goto re_arm; 3106 } 3107 3108 /* determine if any slave has come up or any backup slave has 3109 * gone down 3110 * TODO: what about up/down delay in arp mode? it wasn't here before 3111 * so it can wait 3112 */ 3113 bond_for_each_slave(bond, slave, i) { 3114 if (slave->link != BOND_LINK_UP) { 3115 if ((jiffies - slave->dev->last_rx) <= delta_in_ticks) { 3116 3117 slave->link = BOND_LINK_UP; 3118 3119 write_lock(&bond->curr_slave_lock); 3120 3121 if ((!bond->curr_active_slave) && 3122 ((jiffies - slave->dev->trans_start) <= delta_in_ticks)) { 3123 bond_change_active_slave(bond, slave); 3124 bond->current_arp_slave = NULL; 3125 } else if (bond->curr_active_slave != slave) { 3126 /* this slave has just come up but we 3127 * already have a current slave; this 3128 * can also happen if bond_enslave adds 3129 * a new slave that is up while we are 3130 * searching for a new slave 3131 */ 3132 bond_set_slave_inactive_flags(slave); 3133 bond->current_arp_slave = NULL; 3134 } 3135 3136 if (slave == bond->curr_active_slave) { 3137 printk(KERN_INFO DRV_NAME 3138 ": %s: %s is up and now the " 3139 "active interface\n", 3140 bond_dev->name, 3141 slave->dev->name); 3142 } else { 3143 printk(KERN_INFO DRV_NAME 3144 ": %s: backup interface %s is " 3145 "now up\n", 3146 bond_dev->name, 3147 slave->dev->name); 3148 } 3149 3150 write_unlock(&bond->curr_slave_lock); 3151 } 3152 } else { 3153 read_lock(&bond->curr_slave_lock); 3154 3155 if ((slave != bond->curr_active_slave) && 3156 (!bond->current_arp_slave) && 3157 (((jiffies - slave->dev->last_rx) >= 3*delta_in_ticks) && 3158 bond_has_ip(bond))) { 3159 /* a backup slave has gone down; three times 3160 * the delta allows the current slave to be 3161 * taken out before the backup slave. 3162 * note: a non-null current_arp_slave indicates 3163 * the curr_active_slave went down and we are 3164 * searching for a new one; under this 3165 * condition we only take the curr_active_slave 3166 * down - this gives each slave a chance to 3167 * tx/rx traffic before being taken out 3168 */ 3169 3170 read_unlock(&bond->curr_slave_lock); 3171 3172 slave->link = BOND_LINK_DOWN; 3173 3174 if (slave->link_failure_count < UINT_MAX) { 3175 slave->link_failure_count++; 3176 } 3177 3178 bond_set_slave_inactive_flags(slave); 3179 3180 printk(KERN_INFO DRV_NAME 3181 ": %s: backup interface %s is now down\n", 3182 bond_dev->name, 3183 slave->dev->name); 3184 } else { 3185 read_unlock(&bond->curr_slave_lock); 3186 } 3187 } 3188 } 3189 3190 read_lock(&bond->curr_slave_lock); 3191 slave = bond->curr_active_slave; 3192 read_unlock(&bond->curr_slave_lock); 3193 3194 if (slave) { 3195 /* if we have sent traffic in the past 2*arp_intervals but 3196 * haven't xmit and rx traffic in that time interval, select 3197 * a different slave. slave->jiffies is only updated when 3198 * a slave first becomes the curr_active_slave - not necessarily 3199 * after every arp; this ensures the slave has a full 2*delta 3200 * before being taken out. if a primary is being used, check 3201 * if it is up and needs to take over as the curr_active_slave 3202 */ 3203 if ((((jiffies - slave->dev->trans_start) >= (2*delta_in_ticks)) || 3204 (((jiffies - slave->dev->last_rx) >= (2*delta_in_ticks)) && 3205 bond_has_ip(bond))) && 3206 ((jiffies - slave->jiffies) >= 2*delta_in_ticks)) { 3207 3208 slave->link = BOND_LINK_DOWN; 3209 3210 if (slave->link_failure_count < UINT_MAX) { 3211 slave->link_failure_count++; 3212 } 3213 3214 printk(KERN_INFO DRV_NAME 3215 ": %s: link status down for active interface " 3216 "%s, disabling it\n", 3217 bond_dev->name, 3218 slave->dev->name); 3219 3220 write_lock(&bond->curr_slave_lock); 3221 3222 bond_select_active_slave(bond); 3223 slave = bond->curr_active_slave; 3224 3225 write_unlock(&bond->curr_slave_lock); 3226 3227 bond->current_arp_slave = slave; 3228 3229 if (slave) { 3230 slave->jiffies = jiffies; 3231 } 3232 } else if ((bond->primary_slave) && 3233 (bond->primary_slave != slave) && 3234 (bond->primary_slave->link == BOND_LINK_UP)) { 3235 /* at this point, slave is the curr_active_slave */ 3236 printk(KERN_INFO DRV_NAME 3237 ": %s: changing from interface %s to primary " 3238 "interface %s\n", 3239 bond_dev->name, 3240 slave->dev->name, 3241 bond->primary_slave->dev->name); 3242 3243 /* primary is up so switch to it */ 3244 write_lock(&bond->curr_slave_lock); 3245 bond_change_active_slave(bond, bond->primary_slave); 3246 write_unlock(&bond->curr_slave_lock); 3247 3248 slave = bond->primary_slave; 3249 slave->jiffies = jiffies; 3250 } else { 3251 bond->current_arp_slave = NULL; 3252 } 3253 3254 /* the current slave must tx an arp to ensure backup slaves 3255 * rx traffic 3256 */ 3257 if (slave && bond_has_ip(bond)) { 3258 bond_arp_send_all(bond, slave); 3259 } 3260 } 3261 3262 /* if we don't have a curr_active_slave, search for the next available 3263 * backup slave from the current_arp_slave and make it the candidate 3264 * for becoming the curr_active_slave 3265 */ 3266 if (!slave) { 3267 if (!bond->current_arp_slave) { 3268 bond->current_arp_slave = bond->first_slave; 3269 } 3270 3271 if (bond->current_arp_slave) { 3272 bond_set_slave_inactive_flags(bond->current_arp_slave); 3273 3274 /* search for next candidate */ 3275 bond_for_each_slave_from(bond, slave, i, bond->current_arp_slave->next) { 3276 if (IS_UP(slave->dev)) { 3277 slave->link = BOND_LINK_BACK; 3278 bond_set_slave_active_flags(slave); 3279 bond_arp_send_all(bond, slave); 3280 slave->jiffies = jiffies; 3281 bond->current_arp_slave = slave; 3282 break; 3283 } 3284 3285 /* if the link state is up at this point, we 3286 * mark it down - this can happen if we have 3287 * simultaneous link failures and 3288 * reselect_active_interface doesn't make this 3289 * one the current slave so it is still marked 3290 * up when it is actually down 3291 */ 3292 if (slave->link == BOND_LINK_UP) { 3293 slave->link = BOND_LINK_DOWN; 3294 if (slave->link_failure_count < UINT_MAX) { 3295 slave->link_failure_count++; 3296 } 3297 3298 bond_set_slave_inactive_flags(slave); 3299 3300 printk(KERN_INFO DRV_NAME 3301 ": %s: backup interface %s is " 3302 "now down.\n", 3303 bond_dev->name, 3304 slave->dev->name); 3305 } 3306 } 3307 } 3308 } 3309 3310 re_arm: 3311 if (bond->params.arp_interval) { 3312 mod_timer(&bond->arp_timer, jiffies + delta_in_ticks); 3313 } 3314 out: 3315 read_unlock(&bond->lock); 3316 } 3317 3318 /*------------------------------ proc/seq_file-------------------------------*/ 3319 3320 #ifdef CONFIG_PROC_FS 3321 3322 #define SEQ_START_TOKEN ((void *)1) 3323 3324 static void *bond_info_seq_start(struct seq_file *seq, loff_t *pos) 3325 { 3326 struct bonding *bond = seq->private; 3327 loff_t off = 0; 3328 struct slave *slave; 3329 int i; 3330 3331 /* make sure the bond won't be taken away */ 3332 read_lock(&dev_base_lock); 3333 read_lock_bh(&bond->lock); 3334 3335 if (*pos == 0) { 3336 return SEQ_START_TOKEN; 3337 } 3338 3339 bond_for_each_slave(bond, slave, i) { 3340 if (++off == *pos) { 3341 return slave; 3342 } 3343 } 3344 3345 return NULL; 3346 } 3347 3348 static void *bond_info_seq_next(struct seq_file *seq, void *v, loff_t *pos) 3349 { 3350 struct bonding *bond = seq->private; 3351 struct slave *slave = v; 3352 3353 ++*pos; 3354 if (v == SEQ_START_TOKEN) { 3355 return bond->first_slave; 3356 } 3357 3358 slave = slave->next; 3359 3360 return (slave == bond->first_slave) ? NULL : slave; 3361 } 3362 3363 static void bond_info_seq_stop(struct seq_file *seq, void *v) 3364 { 3365 struct bonding *bond = seq->private; 3366 3367 read_unlock_bh(&bond->lock); 3368 read_unlock(&dev_base_lock); 3369 } 3370 3371 static void bond_info_show_master(struct seq_file *seq) 3372 { 3373 struct bonding *bond = seq->private; 3374 struct slave *curr; 3375 3376 read_lock(&bond->curr_slave_lock); 3377 curr = bond->curr_active_slave; 3378 read_unlock(&bond->curr_slave_lock); 3379 3380 seq_printf(seq, "Bonding Mode: %s\n", 3381 bond_mode_name(bond->params.mode)); 3382 3383 if (USES_PRIMARY(bond->params.mode)) { 3384 seq_printf(seq, "Primary Slave: %s\n", 3385 (bond->params.primary[0]) ? 3386 bond->params.primary : "None"); 3387 3388 seq_printf(seq, "Currently Active Slave: %s\n", 3389 (curr) ? curr->dev->name : "None"); 3390 } 3391 3392 seq_printf(seq, "MII Status: %s\n", (curr) ? "up" : "down"); 3393 seq_printf(seq, "MII Polling Interval (ms): %d\n", bond->params.miimon); 3394 seq_printf(seq, "Up Delay (ms): %d\n", 3395 bond->params.updelay * bond->params.miimon); 3396 seq_printf(seq, "Down Delay (ms): %d\n", 3397 bond->params.downdelay * bond->params.miimon); 3398 3399 if (bond->params.mode == BOND_MODE_8023AD) { 3400 struct ad_info ad_info; 3401 3402 seq_puts(seq, "\n802.3ad info\n"); 3403 seq_printf(seq, "LACP rate: %s\n", 3404 (bond->params.lacp_fast) ? "fast" : "slow"); 3405 3406 if (bond_3ad_get_active_agg_info(bond, &ad_info)) { 3407 seq_printf(seq, "bond %s has no active aggregator\n", 3408 bond->dev->name); 3409 } else { 3410 seq_printf(seq, "Active Aggregator Info:\n"); 3411 3412 seq_printf(seq, "\tAggregator ID: %d\n", 3413 ad_info.aggregator_id); 3414 seq_printf(seq, "\tNumber of ports: %d\n", 3415 ad_info.ports); 3416 seq_printf(seq, "\tActor Key: %d\n", 3417 ad_info.actor_key); 3418 seq_printf(seq, "\tPartner Key: %d\n", 3419 ad_info.partner_key); 3420 seq_printf(seq, "\tPartner Mac Address: %02x:%02x:%02x:%02x:%02x:%02x\n", 3421 ad_info.partner_system[0], 3422 ad_info.partner_system[1], 3423 ad_info.partner_system[2], 3424 ad_info.partner_system[3], 3425 ad_info.partner_system[4], 3426 ad_info.partner_system[5]); 3427 } 3428 } 3429 } 3430 3431 static void bond_info_show_slave(struct seq_file *seq, const struct slave *slave) 3432 { 3433 struct bonding *bond = seq->private; 3434 3435 seq_printf(seq, "\nSlave Interface: %s\n", slave->dev->name); 3436 seq_printf(seq, "MII Status: %s\n", 3437 (slave->link == BOND_LINK_UP) ? "up" : "down"); 3438 seq_printf(seq, "Link Failure Count: %d\n", 3439 slave->link_failure_count); 3440 3441 if (app_abi_ver >= 1) { 3442 seq_printf(seq, 3443 "Permanent HW addr: %02x:%02x:%02x:%02x:%02x:%02x\n", 3444 slave->perm_hwaddr[0], 3445 slave->perm_hwaddr[1], 3446 slave->perm_hwaddr[2], 3447 slave->perm_hwaddr[3], 3448 slave->perm_hwaddr[4], 3449 slave->perm_hwaddr[5]); 3450 } 3451 3452 if (bond->params.mode == BOND_MODE_8023AD) { 3453 const struct aggregator *agg 3454 = SLAVE_AD_INFO(slave).port.aggregator; 3455 3456 if (agg) { 3457 seq_printf(seq, "Aggregator ID: %d\n", 3458 agg->aggregator_identifier); 3459 } else { 3460 seq_puts(seq, "Aggregator ID: N/A\n"); 3461 } 3462 } 3463 } 3464 3465 static int bond_info_seq_show(struct seq_file *seq, void *v) 3466 { 3467 if (v == SEQ_START_TOKEN) { 3468 seq_printf(seq, "%s\n", version); 3469 bond_info_show_master(seq); 3470 } else { 3471 bond_info_show_slave(seq, v); 3472 } 3473 3474 return 0; 3475 } 3476 3477 static struct seq_operations bond_info_seq_ops = { 3478 .start = bond_info_seq_start, 3479 .next = bond_info_seq_next, 3480 .stop = bond_info_seq_stop, 3481 .show = bond_info_seq_show, 3482 }; 3483 3484 static int bond_info_open(struct inode *inode, struct file *file) 3485 { 3486 struct seq_file *seq; 3487 struct proc_dir_entry *proc; 3488 int res; 3489 3490 res = seq_open(file, &bond_info_seq_ops); 3491 if (!res) { 3492 /* recover the pointer buried in proc_dir_entry data */ 3493 seq = file->private_data; 3494 proc = PDE(inode); 3495 seq->private = proc->data; 3496 } 3497 3498 return res; 3499 } 3500 3501 static struct file_operations bond_info_fops = { 3502 .owner = THIS_MODULE, 3503 .open = bond_info_open, 3504 .read = seq_read, 3505 .llseek = seq_lseek, 3506 .release = seq_release, 3507 }; 3508 3509 static int bond_create_proc_entry(struct bonding *bond) 3510 { 3511 struct net_device *bond_dev = bond->dev; 3512 3513 if (bond_proc_dir) { 3514 bond->proc_entry = create_proc_entry(bond_dev->name, 3515 S_IRUGO, 3516 bond_proc_dir); 3517 if (bond->proc_entry == NULL) { 3518 printk(KERN_WARNING DRV_NAME 3519 ": Warning: Cannot create /proc/net/%s/%s\n", 3520 DRV_NAME, bond_dev->name); 3521 } else { 3522 bond->proc_entry->data = bond; 3523 bond->proc_entry->proc_fops = &bond_info_fops; 3524 bond->proc_entry->owner = THIS_MODULE; 3525 memcpy(bond->proc_file_name, bond_dev->name, IFNAMSIZ); 3526 } 3527 } 3528 3529 return 0; 3530 } 3531 3532 static void bond_remove_proc_entry(struct bonding *bond) 3533 { 3534 if (bond_proc_dir && bond->proc_entry) { 3535 remove_proc_entry(bond->proc_file_name, bond_proc_dir); 3536 memset(bond->proc_file_name, 0, IFNAMSIZ); 3537 bond->proc_entry = NULL; 3538 } 3539 } 3540 3541 /* Create the bonding directory under /proc/net, if doesn't exist yet. 3542 * Caller must hold rtnl_lock. 3543 */ 3544 static void bond_create_proc_dir(void) 3545 { 3546 int len = strlen(DRV_NAME); 3547 3548 for (bond_proc_dir = proc_net->subdir; bond_proc_dir; 3549 bond_proc_dir = bond_proc_dir->next) { 3550 if ((bond_proc_dir->namelen == len) && 3551 !memcmp(bond_proc_dir->name, DRV_NAME, len)) { 3552 break; 3553 } 3554 } 3555 3556 if (!bond_proc_dir) { 3557 bond_proc_dir = proc_mkdir(DRV_NAME, proc_net); 3558 if (bond_proc_dir) { 3559 bond_proc_dir->owner = THIS_MODULE; 3560 } else { 3561 printk(KERN_WARNING DRV_NAME 3562 ": Warning: cannot create /proc/net/%s\n", 3563 DRV_NAME); 3564 } 3565 } 3566 } 3567 3568 /* Destroy the bonding directory under /proc/net, if empty. 3569 * Caller must hold rtnl_lock. 3570 */ 3571 static void bond_destroy_proc_dir(void) 3572 { 3573 struct proc_dir_entry *de; 3574 3575 if (!bond_proc_dir) { 3576 return; 3577 } 3578 3579 /* verify that the /proc dir is empty */ 3580 for (de = bond_proc_dir->subdir; de; de = de->next) { 3581 /* ignore . and .. */ 3582 if (*(de->name) != '.') { 3583 break; 3584 } 3585 } 3586 3587 if (de) { 3588 if (bond_proc_dir->owner == THIS_MODULE) { 3589 bond_proc_dir->owner = NULL; 3590 } 3591 } else { 3592 remove_proc_entry(DRV_NAME, proc_net); 3593 bond_proc_dir = NULL; 3594 } 3595 } 3596 #endif /* CONFIG_PROC_FS */ 3597 3598 /*-------------------------- netdev event handling --------------------------*/ 3599 3600 /* 3601 * Change device name 3602 */ 3603 static int bond_event_changename(struct bonding *bond) 3604 { 3605 #ifdef CONFIG_PROC_FS 3606 bond_remove_proc_entry(bond); 3607 bond_create_proc_entry(bond); 3608 #endif 3609 3610 return NOTIFY_DONE; 3611 } 3612 3613 static int bond_master_netdev_event(unsigned long event, struct net_device *bond_dev) 3614 { 3615 struct bonding *event_bond = bond_dev->priv; 3616 3617 switch (event) { 3618 case NETDEV_CHANGENAME: 3619 return bond_event_changename(event_bond); 3620 case NETDEV_UNREGISTER: 3621 /* 3622 * TODO: remove a bond from the list? 3623 */ 3624 break; 3625 default: 3626 break; 3627 } 3628 3629 return NOTIFY_DONE; 3630 } 3631 3632 static int bond_slave_netdev_event(unsigned long event, struct net_device *slave_dev) 3633 { 3634 struct net_device *bond_dev = slave_dev->master; 3635 struct bonding *bond = bond_dev->priv; 3636 3637 switch (event) { 3638 case NETDEV_UNREGISTER: 3639 if (bond_dev) { 3640 bond_release(bond_dev, slave_dev); 3641 } 3642 break; 3643 case NETDEV_CHANGE: 3644 /* 3645 * TODO: is this what we get if somebody 3646 * sets up a hierarchical bond, then rmmod's 3647 * one of the slave bonding devices? 3648 */ 3649 break; 3650 case NETDEV_DOWN: 3651 /* 3652 * ... Or is it this? 3653 */ 3654 break; 3655 case NETDEV_CHANGEMTU: 3656 /* 3657 * TODO: Should slaves be allowed to 3658 * independently alter their MTU? For 3659 * an active-backup bond, slaves need 3660 * not be the same type of device, so 3661 * MTUs may vary. For other modes, 3662 * slaves arguably should have the 3663 * same MTUs. To do this, we'd need to 3664 * take over the slave's change_mtu 3665 * function for the duration of their 3666 * servitude. 3667 */ 3668 break; 3669 case NETDEV_CHANGENAME: 3670 /* 3671 * TODO: handle changing the primary's name 3672 */ 3673 break; 3674 case NETDEV_FEAT_CHANGE: 3675 bond_compute_features(bond); 3676 break; 3677 default: 3678 break; 3679 } 3680 3681 return NOTIFY_DONE; 3682 } 3683 3684 /* 3685 * bond_netdev_event: handle netdev notifier chain events. 3686 * 3687 * This function receives events for the netdev chain. The caller (an 3688 * ioctl handler calling notifier_call_chain) holds the necessary 3689 * locks for us to safely manipulate the slave devices (RTNL lock, 3690 * dev_probe_lock). 3691 */ 3692 static int bond_netdev_event(struct notifier_block *this, unsigned long event, void *ptr) 3693 { 3694 struct net_device *event_dev = (struct net_device *)ptr; 3695 3696 dprintk("event_dev: %s, event: %lx\n", 3697 (event_dev ? event_dev->name : "None"), 3698 event); 3699 3700 if (event_dev->flags & IFF_MASTER) { 3701 dprintk("IFF_MASTER\n"); 3702 return bond_master_netdev_event(event, event_dev); 3703 } 3704 3705 if (event_dev->flags & IFF_SLAVE) { 3706 dprintk("IFF_SLAVE\n"); 3707 return bond_slave_netdev_event(event, event_dev); 3708 } 3709 3710 return NOTIFY_DONE; 3711 } 3712 3713 /* 3714 * bond_inetaddr_event: handle inetaddr notifier chain events. 3715 * 3716 * We keep track of device IPs primarily to use as source addresses in 3717 * ARP monitor probes (rather than spewing out broadcasts all the time). 3718 * 3719 * We track one IP for the main device (if it has one), plus one per VLAN. 3720 */ 3721 static int bond_inetaddr_event(struct notifier_block *this, unsigned long event, void *ptr) 3722 { 3723 struct in_ifaddr *ifa = ptr; 3724 struct net_device *vlan_dev, *event_dev = ifa->ifa_dev->dev; 3725 struct bonding *bond, *bond_next; 3726 struct vlan_entry *vlan, *vlan_next; 3727 3728 list_for_each_entry_safe(bond, bond_next, &bond_dev_list, bond_list) { 3729 if (bond->dev == event_dev) { 3730 switch (event) { 3731 case NETDEV_UP: 3732 bond->master_ip = ifa->ifa_local; 3733 return NOTIFY_OK; 3734 case NETDEV_DOWN: 3735 bond->master_ip = bond_glean_dev_ip(bond->dev); 3736 return NOTIFY_OK; 3737 default: 3738 return NOTIFY_DONE; 3739 } 3740 } 3741 3742 if (list_empty(&bond->vlan_list)) 3743 continue; 3744 3745 list_for_each_entry_safe(vlan, vlan_next, &bond->vlan_list, 3746 vlan_list) { 3747 vlan_dev = bond->vlgrp->vlan_devices[vlan->vlan_id]; 3748 if (vlan_dev == event_dev) { 3749 switch (event) { 3750 case NETDEV_UP: 3751 vlan->vlan_ip = ifa->ifa_local; 3752 return NOTIFY_OK; 3753 case NETDEV_DOWN: 3754 vlan->vlan_ip = 3755 bond_glean_dev_ip(vlan_dev); 3756 return NOTIFY_OK; 3757 default: 3758 return NOTIFY_DONE; 3759 } 3760 } 3761 } 3762 } 3763 return NOTIFY_DONE; 3764 } 3765 3766 static struct notifier_block bond_netdev_notifier = { 3767 .notifier_call = bond_netdev_event, 3768 }; 3769 3770 static struct notifier_block bond_inetaddr_notifier = { 3771 .notifier_call = bond_inetaddr_event, 3772 }; 3773 3774 /*-------------------------- Packet type handling ---------------------------*/ 3775 3776 /* register to receive lacpdus on a bond */ 3777 static void bond_register_lacpdu(struct bonding *bond) 3778 { 3779 struct packet_type *pk_type = &(BOND_AD_INFO(bond).ad_pkt_type); 3780 3781 /* initialize packet type */ 3782 pk_type->type = PKT_TYPE_LACPDU; 3783 pk_type->dev = bond->dev; 3784 pk_type->func = bond_3ad_lacpdu_recv; 3785 3786 dev_add_pack(pk_type); 3787 } 3788 3789 /* unregister to receive lacpdus on a bond */ 3790 static void bond_unregister_lacpdu(struct bonding *bond) 3791 { 3792 dev_remove_pack(&(BOND_AD_INFO(bond).ad_pkt_type)); 3793 } 3794 3795 /*---------------------------- Hashing Policies -----------------------------*/ 3796 3797 /* 3798 * Hash for the the output device based upon layer 3 and layer 4 data. If 3799 * the packet is a frag or not TCP or UDP, just use layer 3 data. If it is 3800 * altogether not IP, mimic bond_xmit_hash_policy_l2() 3801 */ 3802 static int bond_xmit_hash_policy_l34(struct sk_buff *skb, 3803 struct net_device *bond_dev, int count) 3804 { 3805 struct ethhdr *data = (struct ethhdr *)skb->data; 3806 struct iphdr *iph = skb->nh.iph; 3807 u16 *layer4hdr = (u16 *)((u32 *)iph + iph->ihl); 3808 int layer4_xor = 0; 3809 3810 if (skb->protocol == __constant_htons(ETH_P_IP)) { 3811 if (!(iph->frag_off & __constant_htons(IP_MF|IP_OFFSET)) && 3812 (iph->protocol == IPPROTO_TCP || 3813 iph->protocol == IPPROTO_UDP)) { 3814 layer4_xor = htons((*layer4hdr ^ *(layer4hdr + 1))); 3815 } 3816 return (layer4_xor ^ 3817 ((ntohl(iph->saddr ^ iph->daddr)) & 0xffff)) % count; 3818 3819 } 3820 3821 return (data->h_dest[5] ^ bond_dev->dev_addr[5]) % count; 3822 } 3823 3824 /* 3825 * Hash for the output device based upon layer 2 data 3826 */ 3827 static int bond_xmit_hash_policy_l2(struct sk_buff *skb, 3828 struct net_device *bond_dev, int count) 3829 { 3830 struct ethhdr *data = (struct ethhdr *)skb->data; 3831 3832 return (data->h_dest[5] ^ bond_dev->dev_addr[5]) % count; 3833 } 3834 3835 /*-------------------------- Device entry points ----------------------------*/ 3836 3837 static int bond_open(struct net_device *bond_dev) 3838 { 3839 struct bonding *bond = bond_dev->priv; 3840 struct timer_list *mii_timer = &bond->mii_timer; 3841 struct timer_list *arp_timer = &bond->arp_timer; 3842 3843 bond->kill_timers = 0; 3844 3845 if ((bond->params.mode == BOND_MODE_TLB) || 3846 (bond->params.mode == BOND_MODE_ALB)) { 3847 struct timer_list *alb_timer = &(BOND_ALB_INFO(bond).alb_timer); 3848 3849 /* bond_alb_initialize must be called before the timer 3850 * is started. 3851 */ 3852 if (bond_alb_initialize(bond, (bond->params.mode == BOND_MODE_ALB))) { 3853 /* something went wrong - fail the open operation */ 3854 return -1; 3855 } 3856 3857 init_timer(alb_timer); 3858 alb_timer->expires = jiffies + 1; 3859 alb_timer->data = (unsigned long)bond; 3860 alb_timer->function = (void *)&bond_alb_monitor; 3861 add_timer(alb_timer); 3862 } 3863 3864 if (bond->params.miimon) { /* link check interval, in milliseconds. */ 3865 init_timer(mii_timer); 3866 mii_timer->expires = jiffies + 1; 3867 mii_timer->data = (unsigned long)bond_dev; 3868 mii_timer->function = (void *)&bond_mii_monitor; 3869 add_timer(mii_timer); 3870 } 3871 3872 if (bond->params.arp_interval) { /* arp interval, in milliseconds. */ 3873 init_timer(arp_timer); 3874 arp_timer->expires = jiffies + 1; 3875 arp_timer->data = (unsigned long)bond_dev; 3876 if (bond->params.mode == BOND_MODE_ACTIVEBACKUP) { 3877 arp_timer->function = (void *)&bond_activebackup_arp_mon; 3878 } else { 3879 arp_timer->function = (void *)&bond_loadbalance_arp_mon; 3880 } 3881 add_timer(arp_timer); 3882 } 3883 3884 if (bond->params.mode == BOND_MODE_8023AD) { 3885 struct timer_list *ad_timer = &(BOND_AD_INFO(bond).ad_timer); 3886 init_timer(ad_timer); 3887 ad_timer->expires = jiffies + 1; 3888 ad_timer->data = (unsigned long)bond; 3889 ad_timer->function = (void *)&bond_3ad_state_machine_handler; 3890 add_timer(ad_timer); 3891 3892 /* register to receive LACPDUs */ 3893 bond_register_lacpdu(bond); 3894 } 3895 3896 return 0; 3897 } 3898 3899 static int bond_close(struct net_device *bond_dev) 3900 { 3901 struct bonding *bond = bond_dev->priv; 3902 3903 if (bond->params.mode == BOND_MODE_8023AD) { 3904 /* Unregister the receive of LACPDUs */ 3905 bond_unregister_lacpdu(bond); 3906 } 3907 3908 write_lock_bh(&bond->lock); 3909 3910 bond_mc_list_destroy(bond); 3911 3912 /* signal timers not to re-arm */ 3913 bond->kill_timers = 1; 3914 3915 write_unlock_bh(&bond->lock); 3916 3917 /* del_timer_sync must run without holding the bond->lock 3918 * because a running timer might be trying to hold it too 3919 */ 3920 3921 if (bond->params.miimon) { /* link check interval, in milliseconds. */ 3922 del_timer_sync(&bond->mii_timer); 3923 } 3924 3925 if (bond->params.arp_interval) { /* arp interval, in milliseconds. */ 3926 del_timer_sync(&bond->arp_timer); 3927 } 3928 3929 switch (bond->params.mode) { 3930 case BOND_MODE_8023AD: 3931 del_timer_sync(&(BOND_AD_INFO(bond).ad_timer)); 3932 break; 3933 case BOND_MODE_TLB: 3934 case BOND_MODE_ALB: 3935 del_timer_sync(&(BOND_ALB_INFO(bond).alb_timer)); 3936 break; 3937 default: 3938 break; 3939 } 3940 3941 /* Release the bonded slaves */ 3942 bond_release_all(bond_dev); 3943 3944 if ((bond->params.mode == BOND_MODE_TLB) || 3945 (bond->params.mode == BOND_MODE_ALB)) { 3946 /* Must be called only after all 3947 * slaves have been released 3948 */ 3949 bond_alb_deinitialize(bond); 3950 } 3951 3952 return 0; 3953 } 3954 3955 static struct net_device_stats *bond_get_stats(struct net_device *bond_dev) 3956 { 3957 struct bonding *bond = bond_dev->priv; 3958 struct net_device_stats *stats = &(bond->stats), *sstats; 3959 struct slave *slave; 3960 int i; 3961 3962 memset(stats, 0, sizeof(struct net_device_stats)); 3963 3964 read_lock_bh(&bond->lock); 3965 3966 bond_for_each_slave(bond, slave, i) { 3967 sstats = slave->dev->get_stats(slave->dev); 3968 3969 stats->rx_packets += sstats->rx_packets; 3970 stats->rx_bytes += sstats->rx_bytes; 3971 stats->rx_errors += sstats->rx_errors; 3972 stats->rx_dropped += sstats->rx_dropped; 3973 3974 stats->tx_packets += sstats->tx_packets; 3975 stats->tx_bytes += sstats->tx_bytes; 3976 stats->tx_errors += sstats->tx_errors; 3977 stats->tx_dropped += sstats->tx_dropped; 3978 3979 stats->multicast += sstats->multicast; 3980 stats->collisions += sstats->collisions; 3981 3982 stats->rx_length_errors += sstats->rx_length_errors; 3983 stats->rx_over_errors += sstats->rx_over_errors; 3984 stats->rx_crc_errors += sstats->rx_crc_errors; 3985 stats->rx_frame_errors += sstats->rx_frame_errors; 3986 stats->rx_fifo_errors += sstats->rx_fifo_errors; 3987 stats->rx_missed_errors += sstats->rx_missed_errors; 3988 3989 stats->tx_aborted_errors += sstats->tx_aborted_errors; 3990 stats->tx_carrier_errors += sstats->tx_carrier_errors; 3991 stats->tx_fifo_errors += sstats->tx_fifo_errors; 3992 stats->tx_heartbeat_errors += sstats->tx_heartbeat_errors; 3993 stats->tx_window_errors += sstats->tx_window_errors; 3994 } 3995 3996 read_unlock_bh(&bond->lock); 3997 3998 return stats; 3999 } 4000 4001 static int bond_do_ioctl(struct net_device *bond_dev, struct ifreq *ifr, int cmd) 4002 { 4003 struct net_device *slave_dev = NULL; 4004 struct ifbond k_binfo; 4005 struct ifbond __user *u_binfo = NULL; 4006 struct ifslave k_sinfo; 4007 struct ifslave __user *u_sinfo = NULL; 4008 struct mii_ioctl_data *mii = NULL; 4009 int prev_abi_ver = orig_app_abi_ver; 4010 int res = 0; 4011 4012 dprintk("bond_ioctl: master=%s, cmd=%d\n", 4013 bond_dev->name, cmd); 4014 4015 switch (cmd) { 4016 case SIOCETHTOOL: 4017 return bond_ethtool_ioctl(bond_dev, ifr); 4018 case SIOCGMIIPHY: 4019 mii = if_mii(ifr); 4020 if (!mii) { 4021 return -EINVAL; 4022 } 4023 mii->phy_id = 0; 4024 /* Fall Through */ 4025 case SIOCGMIIREG: 4026 /* 4027 * We do this again just in case we were called by SIOCGMIIREG 4028 * instead of SIOCGMIIPHY. 4029 */ 4030 mii = if_mii(ifr); 4031 if (!mii) { 4032 return -EINVAL; 4033 } 4034 4035 if (mii->reg_num == 1) { 4036 struct bonding *bond = bond_dev->priv; 4037 mii->val_out = 0; 4038 read_lock_bh(&bond->lock); 4039 read_lock(&bond->curr_slave_lock); 4040 if (bond->curr_active_slave) { 4041 mii->val_out = BMSR_LSTATUS; 4042 } 4043 read_unlock(&bond->curr_slave_lock); 4044 read_unlock_bh(&bond->lock); 4045 } 4046 4047 return 0; 4048 case BOND_INFO_QUERY_OLD: 4049 case SIOCBONDINFOQUERY: 4050 u_binfo = (struct ifbond __user *)ifr->ifr_data; 4051 4052 if (copy_from_user(&k_binfo, u_binfo, sizeof(ifbond))) { 4053 return -EFAULT; 4054 } 4055 4056 res = bond_info_query(bond_dev, &k_binfo); 4057 if (res == 0) { 4058 if (copy_to_user(u_binfo, &k_binfo, sizeof(ifbond))) { 4059 return -EFAULT; 4060 } 4061 } 4062 4063 return res; 4064 case BOND_SLAVE_INFO_QUERY_OLD: 4065 case SIOCBONDSLAVEINFOQUERY: 4066 u_sinfo = (struct ifslave __user *)ifr->ifr_data; 4067 4068 if (copy_from_user(&k_sinfo, u_sinfo, sizeof(ifslave))) { 4069 return -EFAULT; 4070 } 4071 4072 res = bond_slave_info_query(bond_dev, &k_sinfo); 4073 if (res == 0) { 4074 if (copy_to_user(u_sinfo, &k_sinfo, sizeof(ifslave))) { 4075 return -EFAULT; 4076 } 4077 } 4078 4079 return res; 4080 default: 4081 /* Go on */ 4082 break; 4083 } 4084 4085 if (!capable(CAP_NET_ADMIN)) { 4086 return -EPERM; 4087 } 4088 4089 if (orig_app_abi_ver == -1) { 4090 /* no orig_app_abi_ver was provided yet, so we'll use the 4091 * current one from now on, even if it's 0 4092 */ 4093 orig_app_abi_ver = app_abi_ver; 4094 4095 } else if (orig_app_abi_ver != app_abi_ver) { 4096 printk(KERN_ERR DRV_NAME 4097 ": Error: already using ifenslave ABI version %d; to " 4098 "upgrade ifenslave to version %d, you must first " 4099 "reload bonding.\n", 4100 orig_app_abi_ver, app_abi_ver); 4101 return -EINVAL; 4102 } 4103 4104 slave_dev = dev_get_by_name(ifr->ifr_slave); 4105 4106 dprintk("slave_dev=%p: \n", slave_dev); 4107 4108 if (!slave_dev) { 4109 res = -ENODEV; 4110 } else { 4111 dprintk("slave_dev->name=%s: \n", slave_dev->name); 4112 switch (cmd) { 4113 case BOND_ENSLAVE_OLD: 4114 case SIOCBONDENSLAVE: 4115 res = bond_enslave(bond_dev, slave_dev); 4116 break; 4117 case BOND_RELEASE_OLD: 4118 case SIOCBONDRELEASE: 4119 res = bond_release(bond_dev, slave_dev); 4120 break; 4121 case BOND_SETHWADDR_OLD: 4122 case SIOCBONDSETHWADDR: 4123 res = bond_sethwaddr(bond_dev, slave_dev); 4124 break; 4125 case BOND_CHANGE_ACTIVE_OLD: 4126 case SIOCBONDCHANGEACTIVE: 4127 res = bond_ioctl_change_active(bond_dev, slave_dev); 4128 break; 4129 default: 4130 res = -EOPNOTSUPP; 4131 } 4132 4133 dev_put(slave_dev); 4134 } 4135 4136 if (res < 0) { 4137 /* The ioctl failed, so there's no point in changing the 4138 * orig_app_abi_ver. We'll restore it's value just in case 4139 * we've changed it earlier in this function. 4140 */ 4141 orig_app_abi_ver = prev_abi_ver; 4142 } 4143 4144 return res; 4145 } 4146 4147 static void bond_set_multicast_list(struct net_device *bond_dev) 4148 { 4149 struct bonding *bond = bond_dev->priv; 4150 struct dev_mc_list *dmi; 4151 4152 write_lock_bh(&bond->lock); 4153 4154 /* 4155 * Do promisc before checking multicast_mode 4156 */ 4157 if ((bond_dev->flags & IFF_PROMISC) && !(bond->flags & IFF_PROMISC)) { 4158 bond_set_promiscuity(bond, 1); 4159 } 4160 4161 if (!(bond_dev->flags & IFF_PROMISC) && (bond->flags & IFF_PROMISC)) { 4162 bond_set_promiscuity(bond, -1); 4163 } 4164 4165 /* set allmulti flag to slaves */ 4166 if ((bond_dev->flags & IFF_ALLMULTI) && !(bond->flags & IFF_ALLMULTI)) { 4167 bond_set_allmulti(bond, 1); 4168 } 4169 4170 if (!(bond_dev->flags & IFF_ALLMULTI) && (bond->flags & IFF_ALLMULTI)) { 4171 bond_set_allmulti(bond, -1); 4172 } 4173 4174 bond->flags = bond_dev->flags; 4175 4176 /* looking for addresses to add to slaves' mc list */ 4177 for (dmi = bond_dev->mc_list; dmi; dmi = dmi->next) { 4178 if (!bond_mc_list_find_dmi(dmi, bond->mc_list)) { 4179 bond_mc_add(bond, dmi->dmi_addr, dmi->dmi_addrlen); 4180 } 4181 } 4182 4183 /* looking for addresses to delete from slaves' list */ 4184 for (dmi = bond->mc_list; dmi; dmi = dmi->next) { 4185 if (!bond_mc_list_find_dmi(dmi, bond_dev->mc_list)) { 4186 bond_mc_delete(bond, dmi->dmi_addr, dmi->dmi_addrlen); 4187 } 4188 } 4189 4190 /* save master's multicast list */ 4191 bond_mc_list_destroy(bond); 4192 bond_mc_list_copy(bond_dev->mc_list, bond, GFP_ATOMIC); 4193 4194 write_unlock_bh(&bond->lock); 4195 } 4196 4197 /* 4198 * Change the MTU of all of a master's slaves to match the master 4199 */ 4200 static int bond_change_mtu(struct net_device *bond_dev, int new_mtu) 4201 { 4202 struct bonding *bond = bond_dev->priv; 4203 struct slave *slave, *stop_at; 4204 int res = 0; 4205 int i; 4206 4207 dprintk("bond=%p, name=%s, new_mtu=%d\n", bond, 4208 (bond_dev ? bond_dev->name : "None"), new_mtu); 4209 4210 /* Can't hold bond->lock with bh disabled here since 4211 * some base drivers panic. On the other hand we can't 4212 * hold bond->lock without bh disabled because we'll 4213 * deadlock. The only solution is to rely on the fact 4214 * that we're under rtnl_lock here, and the slaves 4215 * list won't change. This doesn't solve the problem 4216 * of setting the slave's MTU while it is 4217 * transmitting, but the assumption is that the base 4218 * driver can handle that. 4219 * 4220 * TODO: figure out a way to safely iterate the slaves 4221 * list, but without holding a lock around the actual 4222 * call to the base driver. 4223 */ 4224 4225 bond_for_each_slave(bond, slave, i) { 4226 dprintk("s %p s->p %p c_m %p\n", slave, 4227 slave->prev, slave->dev->change_mtu); 4228 res = dev_set_mtu(slave->dev, new_mtu); 4229 4230 if (res) { 4231 /* If we failed to set the slave's mtu to the new value 4232 * we must abort the operation even in ACTIVE_BACKUP 4233 * mode, because if we allow the backup slaves to have 4234 * different mtu values than the active slave we'll 4235 * need to change their mtu when doing a failover. That 4236 * means changing their mtu from timer context, which 4237 * is probably not a good idea. 4238 */ 4239 dprintk("err %d %s\n", res, slave->dev->name); 4240 goto unwind; 4241 } 4242 } 4243 4244 bond_dev->mtu = new_mtu; 4245 4246 return 0; 4247 4248 unwind: 4249 /* unwind from head to the slave that failed */ 4250 stop_at = slave; 4251 bond_for_each_slave_from_to(bond, slave, i, bond->first_slave, stop_at) { 4252 int tmp_res; 4253 4254 tmp_res = dev_set_mtu(slave->dev, bond_dev->mtu); 4255 if (tmp_res) { 4256 dprintk("unwind err %d dev %s\n", tmp_res, 4257 slave->dev->name); 4258 } 4259 } 4260 4261 return res; 4262 } 4263 4264 /* 4265 * Change HW address 4266 * 4267 * Note that many devices must be down to change the HW address, and 4268 * downing the master releases all slaves. We can make bonds full of 4269 * bonding devices to test this, however. 4270 */ 4271 static int bond_set_mac_address(struct net_device *bond_dev, void *addr) 4272 { 4273 struct bonding *bond = bond_dev->priv; 4274 struct sockaddr *sa = addr, tmp_sa; 4275 struct slave *slave, *stop_at; 4276 int res = 0; 4277 int i; 4278 4279 dprintk("bond=%p, name=%s\n", bond, (bond_dev ? bond_dev->name : "None")); 4280 4281 if (!is_valid_ether_addr(sa->sa_data)) { 4282 return -EADDRNOTAVAIL; 4283 } 4284 4285 /* Can't hold bond->lock with bh disabled here since 4286 * some base drivers panic. On the other hand we can't 4287 * hold bond->lock without bh disabled because we'll 4288 * deadlock. The only solution is to rely on the fact 4289 * that we're under rtnl_lock here, and the slaves 4290 * list won't change. This doesn't solve the problem 4291 * of setting the slave's hw address while it is 4292 * transmitting, but the assumption is that the base 4293 * driver can handle that. 4294 * 4295 * TODO: figure out a way to safely iterate the slaves 4296 * list, but without holding a lock around the actual 4297 * call to the base driver. 4298 */ 4299 4300 bond_for_each_slave(bond, slave, i) { 4301 dprintk("slave %p %s\n", slave, slave->dev->name); 4302 4303 if (slave->dev->set_mac_address == NULL) { 4304 res = -EOPNOTSUPP; 4305 dprintk("EOPNOTSUPP %s\n", slave->dev->name); 4306 goto unwind; 4307 } 4308 4309 res = dev_set_mac_address(slave->dev, addr); 4310 if (res) { 4311 /* TODO: consider downing the slave 4312 * and retry ? 4313 * User should expect communications 4314 * breakage anyway until ARP finish 4315 * updating, so... 4316 */ 4317 dprintk("err %d %s\n", res, slave->dev->name); 4318 goto unwind; 4319 } 4320 } 4321 4322 /* success */ 4323 memcpy(bond_dev->dev_addr, sa->sa_data, bond_dev->addr_len); 4324 return 0; 4325 4326 unwind: 4327 memcpy(tmp_sa.sa_data, bond_dev->dev_addr, bond_dev->addr_len); 4328 tmp_sa.sa_family = bond_dev->type; 4329 4330 /* unwind from head to the slave that failed */ 4331 stop_at = slave; 4332 bond_for_each_slave_from_to(bond, slave, i, bond->first_slave, stop_at) { 4333 int tmp_res; 4334 4335 tmp_res = dev_set_mac_address(slave->dev, &tmp_sa); 4336 if (tmp_res) { 4337 dprintk("unwind err %d dev %s\n", tmp_res, 4338 slave->dev->name); 4339 } 4340 } 4341 4342 return res; 4343 } 4344 4345 static int bond_xmit_roundrobin(struct sk_buff *skb, struct net_device *bond_dev) 4346 { 4347 struct bonding *bond = bond_dev->priv; 4348 struct slave *slave, *start_at; 4349 int i; 4350 int res = 1; 4351 4352 read_lock(&bond->lock); 4353 4354 if (!BOND_IS_OK(bond)) { 4355 goto out; 4356 } 4357 4358 read_lock(&bond->curr_slave_lock); 4359 slave = start_at = bond->curr_active_slave; 4360 read_unlock(&bond->curr_slave_lock); 4361 4362 if (!slave) { 4363 goto out; 4364 } 4365 4366 bond_for_each_slave_from(bond, slave, i, start_at) { 4367 if (IS_UP(slave->dev) && 4368 (slave->link == BOND_LINK_UP) && 4369 (slave->state == BOND_STATE_ACTIVE)) { 4370 res = bond_dev_queue_xmit(bond, skb, slave->dev); 4371 4372 write_lock(&bond->curr_slave_lock); 4373 bond->curr_active_slave = slave->next; 4374 write_unlock(&bond->curr_slave_lock); 4375 4376 break; 4377 } 4378 } 4379 4380 4381 out: 4382 if (res) { 4383 /* no suitable interface, frame not sent */ 4384 dev_kfree_skb(skb); 4385 } 4386 read_unlock(&bond->lock); 4387 return 0; 4388 } 4389 4390 /* 4391 * in active-backup mode, we know that bond->curr_active_slave is always valid if 4392 * the bond has a usable interface. 4393 */ 4394 static int bond_xmit_activebackup(struct sk_buff *skb, struct net_device *bond_dev) 4395 { 4396 struct bonding *bond = bond_dev->priv; 4397 int res = 1; 4398 4399 read_lock(&bond->lock); 4400 read_lock(&bond->curr_slave_lock); 4401 4402 if (!BOND_IS_OK(bond)) { 4403 goto out; 4404 } 4405 4406 if (bond->curr_active_slave) { /* one usable interface */ 4407 res = bond_dev_queue_xmit(bond, skb, bond->curr_active_slave->dev); 4408 } 4409 4410 out: 4411 if (res) { 4412 /* no suitable interface, frame not sent */ 4413 dev_kfree_skb(skb); 4414 } 4415 read_unlock(&bond->curr_slave_lock); 4416 read_unlock(&bond->lock); 4417 return 0; 4418 } 4419 4420 /* 4421 * In bond_xmit_xor() , we determine the output device by using a pre- 4422 * determined xmit_hash_policy(), If the selected device is not enabled, 4423 * find the next active slave. 4424 */ 4425 static int bond_xmit_xor(struct sk_buff *skb, struct net_device *bond_dev) 4426 { 4427 struct bonding *bond = bond_dev->priv; 4428 struct slave *slave, *start_at; 4429 int slave_no; 4430 int i; 4431 int res = 1; 4432 4433 read_lock(&bond->lock); 4434 4435 if (!BOND_IS_OK(bond)) { 4436 goto out; 4437 } 4438 4439 slave_no = bond->xmit_hash_policy(skb, bond_dev, bond->slave_cnt); 4440 4441 bond_for_each_slave(bond, slave, i) { 4442 slave_no--; 4443 if (slave_no < 0) { 4444 break; 4445 } 4446 } 4447 4448 start_at = slave; 4449 4450 bond_for_each_slave_from(bond, slave, i, start_at) { 4451 if (IS_UP(slave->dev) && 4452 (slave->link == BOND_LINK_UP) && 4453 (slave->state == BOND_STATE_ACTIVE)) { 4454 res = bond_dev_queue_xmit(bond, skb, slave->dev); 4455 break; 4456 } 4457 } 4458 4459 out: 4460 if (res) { 4461 /* no suitable interface, frame not sent */ 4462 dev_kfree_skb(skb); 4463 } 4464 read_unlock(&bond->lock); 4465 return 0; 4466 } 4467 4468 /* 4469 * in broadcast mode, we send everything to all usable interfaces. 4470 */ 4471 static int bond_xmit_broadcast(struct sk_buff *skb, struct net_device *bond_dev) 4472 { 4473 struct bonding *bond = bond_dev->priv; 4474 struct slave *slave, *start_at; 4475 struct net_device *tx_dev = NULL; 4476 int i; 4477 int res = 1; 4478 4479 read_lock(&bond->lock); 4480 4481 if (!BOND_IS_OK(bond)) { 4482 goto out; 4483 } 4484 4485 read_lock(&bond->curr_slave_lock); 4486 start_at = bond->curr_active_slave; 4487 read_unlock(&bond->curr_slave_lock); 4488 4489 if (!start_at) { 4490 goto out; 4491 } 4492 4493 bond_for_each_slave_from(bond, slave, i, start_at) { 4494 if (IS_UP(slave->dev) && 4495 (slave->link == BOND_LINK_UP) && 4496 (slave->state == BOND_STATE_ACTIVE)) { 4497 if (tx_dev) { 4498 struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC); 4499 if (!skb2) { 4500 printk(KERN_ERR DRV_NAME 4501 ": Error: bond_xmit_broadcast(): " 4502 "skb_clone() failed\n"); 4503 continue; 4504 } 4505 4506 res = bond_dev_queue_xmit(bond, skb2, tx_dev); 4507 if (res) { 4508 dev_kfree_skb(skb2); 4509 continue; 4510 } 4511 } 4512 tx_dev = slave->dev; 4513 } 4514 } 4515 4516 if (tx_dev) { 4517 res = bond_dev_queue_xmit(bond, skb, tx_dev); 4518 } 4519 4520 out: 4521 if (res) { 4522 /* no suitable interface, frame not sent */ 4523 dev_kfree_skb(skb); 4524 } 4525 /* frame sent to all suitable interfaces */ 4526 read_unlock(&bond->lock); 4527 return 0; 4528 } 4529 4530 /*------------------------- Device initialization ---------------------------*/ 4531 4532 /* 4533 * set bond mode specific net device operations 4534 */ 4535 static inline void bond_set_mode_ops(struct bonding *bond, int mode) 4536 { 4537 struct net_device *bond_dev = bond->dev; 4538 4539 switch (mode) { 4540 case BOND_MODE_ROUNDROBIN: 4541 bond_dev->hard_start_xmit = bond_xmit_roundrobin; 4542 break; 4543 case BOND_MODE_ACTIVEBACKUP: 4544 bond_dev->hard_start_xmit = bond_xmit_activebackup; 4545 break; 4546 case BOND_MODE_XOR: 4547 bond_dev->hard_start_xmit = bond_xmit_xor; 4548 if (bond->params.xmit_policy == BOND_XMIT_POLICY_LAYER34) 4549 bond->xmit_hash_policy = bond_xmit_hash_policy_l34; 4550 else 4551 bond->xmit_hash_policy = bond_xmit_hash_policy_l2; 4552 break; 4553 case BOND_MODE_BROADCAST: 4554 bond_dev->hard_start_xmit = bond_xmit_broadcast; 4555 break; 4556 case BOND_MODE_8023AD: 4557 bond_dev->hard_start_xmit = bond_3ad_xmit_xor; 4558 if (bond->params.xmit_policy == BOND_XMIT_POLICY_LAYER34) 4559 bond->xmit_hash_policy = bond_xmit_hash_policy_l34; 4560 else 4561 bond->xmit_hash_policy = bond_xmit_hash_policy_l2; 4562 break; 4563 case BOND_MODE_TLB: 4564 case BOND_MODE_ALB: 4565 bond_dev->hard_start_xmit = bond_alb_xmit; 4566 bond_dev->set_mac_address = bond_alb_set_mac_address; 4567 break; 4568 default: 4569 /* Should never happen, mode already checked */ 4570 printk(KERN_ERR DRV_NAME 4571 ": Error: Unknown bonding mode %d\n", 4572 mode); 4573 break; 4574 } 4575 } 4576 4577 static struct ethtool_ops bond_ethtool_ops = { 4578 .get_tx_csum = ethtool_op_get_tx_csum, 4579 .get_sg = ethtool_op_get_sg, 4580 }; 4581 4582 /* 4583 * Does not allocate but creates a /proc entry. 4584 * Allowed to fail. 4585 */ 4586 static int __init bond_init(struct net_device *bond_dev, struct bond_params *params) 4587 { 4588 struct bonding *bond = bond_dev->priv; 4589 4590 dprintk("Begin bond_init for %s\n", bond_dev->name); 4591 4592 /* initialize rwlocks */ 4593 rwlock_init(&bond->lock); 4594 rwlock_init(&bond->curr_slave_lock); 4595 4596 bond->params = *params; /* copy params struct */ 4597 4598 /* Initialize pointers */ 4599 bond->first_slave = NULL; 4600 bond->curr_active_slave = NULL; 4601 bond->current_arp_slave = NULL; 4602 bond->primary_slave = NULL; 4603 bond->dev = bond_dev; 4604 INIT_LIST_HEAD(&bond->vlan_list); 4605 4606 /* Initialize the device entry points */ 4607 bond_dev->open = bond_open; 4608 bond_dev->stop = bond_close; 4609 bond_dev->get_stats = bond_get_stats; 4610 bond_dev->do_ioctl = bond_do_ioctl; 4611 bond_dev->ethtool_ops = &bond_ethtool_ops; 4612 bond_dev->set_multicast_list = bond_set_multicast_list; 4613 bond_dev->change_mtu = bond_change_mtu; 4614 bond_dev->set_mac_address = bond_set_mac_address; 4615 4616 bond_set_mode_ops(bond, bond->params.mode); 4617 4618 bond_dev->destructor = free_netdev; 4619 4620 /* Initialize the device options */ 4621 bond_dev->tx_queue_len = 0; 4622 bond_dev->flags |= IFF_MASTER|IFF_MULTICAST; 4623 4624 /* At first, we block adding VLANs. That's the only way to 4625 * prevent problems that occur when adding VLANs over an 4626 * empty bond. The block will be removed once non-challenged 4627 * slaves are enslaved. 4628 */ 4629 bond_dev->features |= NETIF_F_VLAN_CHALLENGED; 4630 4631 /* don't acquire bond device's xmit_lock when 4632 * transmitting */ 4633 bond_dev->features |= NETIF_F_LLTX; 4634 4635 /* By default, we declare the bond to be fully 4636 * VLAN hardware accelerated capable. Special 4637 * care is taken in the various xmit functions 4638 * when there are slaves that are not hw accel 4639 * capable 4640 */ 4641 bond_dev->vlan_rx_register = bond_vlan_rx_register; 4642 bond_dev->vlan_rx_add_vid = bond_vlan_rx_add_vid; 4643 bond_dev->vlan_rx_kill_vid = bond_vlan_rx_kill_vid; 4644 bond_dev->features |= (NETIF_F_HW_VLAN_TX | 4645 NETIF_F_HW_VLAN_RX | 4646 NETIF_F_HW_VLAN_FILTER); 4647 4648 bond->bond_features = bond_dev->features; 4649 4650 #ifdef CONFIG_PROC_FS 4651 bond_create_proc_entry(bond); 4652 #endif 4653 4654 list_add_tail(&bond->bond_list, &bond_dev_list); 4655 4656 return 0; 4657 } 4658 4659 /* De-initialize device specific data. 4660 * Caller must hold rtnl_lock. 4661 */ 4662 static inline void bond_deinit(struct net_device *bond_dev) 4663 { 4664 struct bonding *bond = bond_dev->priv; 4665 4666 list_del(&bond->bond_list); 4667 4668 #ifdef CONFIG_PROC_FS 4669 bond_remove_proc_entry(bond); 4670 #endif 4671 } 4672 4673 /* Unregister and free all bond devices. 4674 * Caller must hold rtnl_lock. 4675 */ 4676 static void bond_free_all(void) 4677 { 4678 struct bonding *bond, *nxt; 4679 4680 list_for_each_entry_safe(bond, nxt, &bond_dev_list, bond_list) { 4681 struct net_device *bond_dev = bond->dev; 4682 4683 unregister_netdevice(bond_dev); 4684 bond_deinit(bond_dev); 4685 } 4686 4687 #ifdef CONFIG_PROC_FS 4688 bond_destroy_proc_dir(); 4689 #endif 4690 } 4691 4692 /*------------------------- Module initialization ---------------------------*/ 4693 4694 /* 4695 * Convert string input module parms. Accept either the 4696 * number of the mode or its string name. 4697 */ 4698 static inline int bond_parse_parm(char *mode_arg, struct bond_parm_tbl *tbl) 4699 { 4700 int i; 4701 4702 for (i = 0; tbl[i].modename; i++) { 4703 if ((isdigit(*mode_arg) && 4704 tbl[i].mode == simple_strtol(mode_arg, NULL, 0)) || 4705 (strncmp(mode_arg, tbl[i].modename, 4706 strlen(tbl[i].modename)) == 0)) { 4707 return tbl[i].mode; 4708 } 4709 } 4710 4711 return -1; 4712 } 4713 4714 static int bond_check_params(struct bond_params *params) 4715 { 4716 /* 4717 * Convert string parameters. 4718 */ 4719 if (mode) { 4720 bond_mode = bond_parse_parm(mode, bond_mode_tbl); 4721 if (bond_mode == -1) { 4722 printk(KERN_ERR DRV_NAME 4723 ": Error: Invalid bonding mode \"%s\"\n", 4724 mode == NULL ? "NULL" : mode); 4725 return -EINVAL; 4726 } 4727 } 4728 4729 if (xmit_hash_policy) { 4730 if ((bond_mode != BOND_MODE_XOR) && 4731 (bond_mode != BOND_MODE_8023AD)) { 4732 printk(KERN_INFO DRV_NAME 4733 ": xor_mode param is irrelevant in mode %s\n", 4734 bond_mode_name(bond_mode)); 4735 } else { 4736 xmit_hashtype = bond_parse_parm(xmit_hash_policy, 4737 xmit_hashtype_tbl); 4738 if (xmit_hashtype == -1) { 4739 printk(KERN_ERR DRV_NAME 4740 ": Error: Invalid xmit_hash_policy \"%s\"\n", 4741 xmit_hash_policy == NULL ? "NULL" : 4742 xmit_hash_policy); 4743 return -EINVAL; 4744 } 4745 } 4746 } 4747 4748 if (lacp_rate) { 4749 if (bond_mode != BOND_MODE_8023AD) { 4750 printk(KERN_INFO DRV_NAME 4751 ": lacp_rate param is irrelevant in mode %s\n", 4752 bond_mode_name(bond_mode)); 4753 } else { 4754 lacp_fast = bond_parse_parm(lacp_rate, bond_lacp_tbl); 4755 if (lacp_fast == -1) { 4756 printk(KERN_ERR DRV_NAME 4757 ": Error: Invalid lacp rate \"%s\"\n", 4758 lacp_rate == NULL ? "NULL" : lacp_rate); 4759 return -EINVAL; 4760 } 4761 } 4762 } 4763 4764 if (max_bonds < 1 || max_bonds > INT_MAX) { 4765 printk(KERN_WARNING DRV_NAME 4766 ": Warning: max_bonds (%d) not in range %d-%d, so it " 4767 "was reset to BOND_DEFAULT_MAX_BONDS (%d)", 4768 max_bonds, 1, INT_MAX, BOND_DEFAULT_MAX_BONDS); 4769 max_bonds = BOND_DEFAULT_MAX_BONDS; 4770 } 4771 4772 if (miimon < 0) { 4773 printk(KERN_WARNING DRV_NAME 4774 ": Warning: miimon module parameter (%d), " 4775 "not in range 0-%d, so it was reset to %d\n", 4776 miimon, INT_MAX, BOND_LINK_MON_INTERV); 4777 miimon = BOND_LINK_MON_INTERV; 4778 } 4779 4780 if (updelay < 0) { 4781 printk(KERN_WARNING DRV_NAME 4782 ": Warning: updelay module parameter (%d), " 4783 "not in range 0-%d, so it was reset to 0\n", 4784 updelay, INT_MAX); 4785 updelay = 0; 4786 } 4787 4788 if (downdelay < 0) { 4789 printk(KERN_WARNING DRV_NAME 4790 ": Warning: downdelay module parameter (%d), " 4791 "not in range 0-%d, so it was reset to 0\n", 4792 downdelay, INT_MAX); 4793 downdelay = 0; 4794 } 4795 4796 if ((use_carrier != 0) && (use_carrier != 1)) { 4797 printk(KERN_WARNING DRV_NAME 4798 ": Warning: use_carrier module parameter (%d), " 4799 "not of valid value (0/1), so it was set to 1\n", 4800 use_carrier); 4801 use_carrier = 1; 4802 } 4803 4804 /* reset values for 802.3ad */ 4805 if (bond_mode == BOND_MODE_8023AD) { 4806 if (!miimon) { 4807 printk(KERN_WARNING DRV_NAME 4808 ": Warning: miimon must be specified, " 4809 "otherwise bonding will not detect link " 4810 "failure, speed and duplex which are " 4811 "essential for 802.3ad operation\n"); 4812 printk(KERN_WARNING "Forcing miimon to 100msec\n"); 4813 miimon = 100; 4814 } 4815 } 4816 4817 /* reset values for TLB/ALB */ 4818 if ((bond_mode == BOND_MODE_TLB) || 4819 (bond_mode == BOND_MODE_ALB)) { 4820 if (!miimon) { 4821 printk(KERN_WARNING DRV_NAME 4822 ": Warning: miimon must be specified, " 4823 "otherwise bonding will not detect link " 4824 "failure and link speed which are essential " 4825 "for TLB/ALB load balancing\n"); 4826 printk(KERN_WARNING "Forcing miimon to 100msec\n"); 4827 miimon = 100; 4828 } 4829 } 4830 4831 if (bond_mode == BOND_MODE_ALB) { 4832 printk(KERN_NOTICE DRV_NAME 4833 ": In ALB mode you might experience client " 4834 "disconnections upon reconnection of a link if the " 4835 "bonding module updelay parameter (%d msec) is " 4836 "incompatible with the forwarding delay time of the " 4837 "switch\n", 4838 updelay); 4839 } 4840 4841 if (!miimon) { 4842 if (updelay || downdelay) { 4843 /* just warn the user the up/down delay will have 4844 * no effect since miimon is zero... 4845 */ 4846 printk(KERN_WARNING DRV_NAME 4847 ": Warning: miimon module parameter not set " 4848 "and updelay (%d) or downdelay (%d) module " 4849 "parameter is set; updelay and downdelay have " 4850 "no effect unless miimon is set\n", 4851 updelay, downdelay); 4852 } 4853 } else { 4854 /* don't allow arp monitoring */ 4855 if (arp_interval) { 4856 printk(KERN_WARNING DRV_NAME 4857 ": Warning: miimon (%d) and arp_interval (%d) " 4858 "can't be used simultaneously, disabling ARP " 4859 "monitoring\n", 4860 miimon, arp_interval); 4861 arp_interval = 0; 4862 } 4863 4864 if ((updelay % miimon) != 0) { 4865 printk(KERN_WARNING DRV_NAME 4866 ": Warning: updelay (%d) is not a multiple " 4867 "of miimon (%d), updelay rounded to %d ms\n", 4868 updelay, miimon, (updelay / miimon) * miimon); 4869 } 4870 4871 updelay /= miimon; 4872 4873 if ((downdelay % miimon) != 0) { 4874 printk(KERN_WARNING DRV_NAME 4875 ": Warning: downdelay (%d) is not a multiple " 4876 "of miimon (%d), downdelay rounded to %d ms\n", 4877 downdelay, miimon, 4878 (downdelay / miimon) * miimon); 4879 } 4880 4881 downdelay /= miimon; 4882 } 4883 4884 if (arp_interval < 0) { 4885 printk(KERN_WARNING DRV_NAME 4886 ": Warning: arp_interval module parameter (%d) " 4887 ", not in range 0-%d, so it was reset to %d\n", 4888 arp_interval, INT_MAX, BOND_LINK_ARP_INTERV); 4889 arp_interval = BOND_LINK_ARP_INTERV; 4890 } 4891 4892 for (arp_ip_count = 0; 4893 (arp_ip_count < BOND_MAX_ARP_TARGETS) && arp_ip_target[arp_ip_count]; 4894 arp_ip_count++) { 4895 /* not complete check, but should be good enough to 4896 catch mistakes */ 4897 if (!isdigit(arp_ip_target[arp_ip_count][0])) { 4898 printk(KERN_WARNING DRV_NAME 4899 ": Warning: bad arp_ip_target module parameter " 4900 "(%s), ARP monitoring will not be performed\n", 4901 arp_ip_target[arp_ip_count]); 4902 arp_interval = 0; 4903 } else { 4904 u32 ip = in_aton(arp_ip_target[arp_ip_count]); 4905 arp_target[arp_ip_count] = ip; 4906 } 4907 } 4908 4909 if (arp_interval && !arp_ip_count) { 4910 /* don't allow arping if no arp_ip_target given... */ 4911 printk(KERN_WARNING DRV_NAME 4912 ": Warning: arp_interval module parameter (%d) " 4913 "specified without providing an arp_ip_target " 4914 "parameter, arp_interval was reset to 0\n", 4915 arp_interval); 4916 arp_interval = 0; 4917 } 4918 4919 if (miimon) { 4920 printk(KERN_INFO DRV_NAME 4921 ": MII link monitoring set to %d ms\n", 4922 miimon); 4923 } else if (arp_interval) { 4924 int i; 4925 4926 printk(KERN_INFO DRV_NAME 4927 ": ARP monitoring set to %d ms with %d target(s):", 4928 arp_interval, arp_ip_count); 4929 4930 for (i = 0; i < arp_ip_count; i++) 4931 printk (" %s", arp_ip_target[i]); 4932 4933 printk("\n"); 4934 4935 } else { 4936 /* miimon and arp_interval not set, we need one so things 4937 * work as expected, see bonding.txt for details 4938 */ 4939 printk(KERN_WARNING DRV_NAME 4940 ": Warning: either miimon or arp_interval and " 4941 "arp_ip_target module parameters must be specified, " 4942 "otherwise bonding will not detect link failures! see " 4943 "bonding.txt for details.\n"); 4944 } 4945 4946 if (primary && !USES_PRIMARY(bond_mode)) { 4947 /* currently, using a primary only makes sense 4948 * in active backup, TLB or ALB modes 4949 */ 4950 printk(KERN_WARNING DRV_NAME 4951 ": Warning: %s primary device specified but has no " 4952 "effect in %s mode\n", 4953 primary, bond_mode_name(bond_mode)); 4954 primary = NULL; 4955 } 4956 4957 /* fill params struct with the proper values */ 4958 params->mode = bond_mode; 4959 params->xmit_policy = xmit_hashtype; 4960 params->miimon = miimon; 4961 params->arp_interval = arp_interval; 4962 params->updelay = updelay; 4963 params->downdelay = downdelay; 4964 params->use_carrier = use_carrier; 4965 params->lacp_fast = lacp_fast; 4966 params->primary[0] = 0; 4967 4968 if (primary) { 4969 strncpy(params->primary, primary, IFNAMSIZ); 4970 params->primary[IFNAMSIZ - 1] = 0; 4971 } 4972 4973 memcpy(params->arp_targets, arp_target, sizeof(arp_target)); 4974 4975 return 0; 4976 } 4977 4978 static int __init bonding_init(void) 4979 { 4980 struct bond_params params; 4981 int i; 4982 int res; 4983 4984 printk(KERN_INFO "%s", version); 4985 4986 res = bond_check_params(¶ms); 4987 if (res) { 4988 return res; 4989 } 4990 4991 rtnl_lock(); 4992 4993 #ifdef CONFIG_PROC_FS 4994 bond_create_proc_dir(); 4995 #endif 4996 4997 for (i = 0; i < max_bonds; i++) { 4998 struct net_device *bond_dev; 4999 5000 bond_dev = alloc_netdev(sizeof(struct bonding), "", ether_setup); 5001 if (!bond_dev) { 5002 res = -ENOMEM; 5003 goto out_err; 5004 } 5005 5006 res = dev_alloc_name(bond_dev, "bond%d"); 5007 if (res < 0) { 5008 free_netdev(bond_dev); 5009 goto out_err; 5010 } 5011 5012 /* bond_init() must be called after dev_alloc_name() (for the 5013 * /proc files), but before register_netdevice(), because we 5014 * need to set function pointers. 5015 */ 5016 res = bond_init(bond_dev, ¶ms); 5017 if (res < 0) { 5018 free_netdev(bond_dev); 5019 goto out_err; 5020 } 5021 5022 SET_MODULE_OWNER(bond_dev); 5023 5024 res = register_netdevice(bond_dev); 5025 if (res < 0) { 5026 bond_deinit(bond_dev); 5027 free_netdev(bond_dev); 5028 goto out_err; 5029 } 5030 } 5031 5032 rtnl_unlock(); 5033 register_netdevice_notifier(&bond_netdev_notifier); 5034 register_inetaddr_notifier(&bond_inetaddr_notifier); 5035 5036 return 0; 5037 5038 out_err: 5039 /* free and unregister all bonds that were successfully added */ 5040 bond_free_all(); 5041 5042 rtnl_unlock(); 5043 5044 return res; 5045 } 5046 5047 static void __exit bonding_exit(void) 5048 { 5049 unregister_netdevice_notifier(&bond_netdev_notifier); 5050 unregister_inetaddr_notifier(&bond_inetaddr_notifier); 5051 5052 rtnl_lock(); 5053 bond_free_all(); 5054 rtnl_unlock(); 5055 } 5056 5057 module_init(bonding_init); 5058 module_exit(bonding_exit); 5059 MODULE_LICENSE("GPL"); 5060 MODULE_VERSION(DRV_VERSION); 5061 MODULE_DESCRIPTION(DRV_DESCRIPTION ", v" DRV_VERSION); 5062 MODULE_AUTHOR("Thomas Davis, tadavis@lbl.gov and many others"); 5063 MODULE_SUPPORTED_DEVICE("most ethernet devices"); 5064 5065 /* 5066 * Local variables: 5067 * c-indent-level: 8 5068 * c-basic-offset: 8 5069 * tab-width: 8 5070 * End: 5071 */ 5072 5073