1 /* 2 * originally based on the dummy device. 3 * 4 * Copyright 1999, Thomas Davis, tadavis@lbl.gov. 5 * Licensed under the GPL. Based on dummy.c, and eql.c devices. 6 * 7 * bonding.c: an Ethernet Bonding driver 8 * 9 * This is useful to talk to a Cisco EtherChannel compatible equipment: 10 * Cisco 5500 11 * Sun Trunking (Solaris) 12 * Alteon AceDirector Trunks 13 * Linux Bonding 14 * and probably many L2 switches ... 15 * 16 * How it works: 17 * ifconfig bond0 ipaddress netmask up 18 * will setup a network device, with an ip address. No mac address 19 * will be assigned at this time. The hw mac address will come from 20 * the first slave bonded to the channel. All slaves will then use 21 * this hw mac address. 22 * 23 * ifconfig bond0 down 24 * will release all slaves, marking them as down. 25 * 26 * ifenslave bond0 eth0 27 * will attach eth0 to bond0 as a slave. eth0 hw mac address will either 28 * a: be used as initial mac address 29 * b: if a hw mac address already is there, eth0's hw mac address 30 * will then be set from bond0. 31 * 32 */ 33 34 //#define BONDING_DEBUG 1 35 36 #include <linux/config.h> 37 #include <linux/kernel.h> 38 #include <linux/module.h> 39 #include <linux/sched.h> 40 #include <linux/types.h> 41 #include <linux/fcntl.h> 42 #include <linux/interrupt.h> 43 #include <linux/ptrace.h> 44 #include <linux/ioport.h> 45 #include <linux/in.h> 46 #include <net/ip.h> 47 #include <linux/ip.h> 48 #include <linux/tcp.h> 49 #include <linux/udp.h> 50 #include <linux/slab.h> 51 #include <linux/string.h> 52 #include <linux/init.h> 53 #include <linux/timer.h> 54 #include <linux/socket.h> 55 #include <linux/ctype.h> 56 #include <linux/inet.h> 57 #include <linux/bitops.h> 58 #include <asm/system.h> 59 #include <asm/io.h> 60 #include <asm/dma.h> 61 #include <asm/uaccess.h> 62 #include <linux/errno.h> 63 #include <linux/netdevice.h> 64 #include <linux/inetdevice.h> 65 #include <linux/etherdevice.h> 66 #include <linux/skbuff.h> 67 #include <net/sock.h> 68 #include <linux/rtnetlink.h> 69 #include <linux/proc_fs.h> 70 #include <linux/seq_file.h> 71 #include <linux/smp.h> 72 #include <linux/if_ether.h> 73 #include <net/arp.h> 74 #include <linux/mii.h> 75 #include <linux/ethtool.h> 76 #include <linux/if_vlan.h> 77 #include <linux/if_bonding.h> 78 #include <net/route.h> 79 #include "bonding.h" 80 #include "bond_3ad.h" 81 #include "bond_alb.h" 82 83 /*---------------------------- Module parameters ----------------------------*/ 84 85 /* monitor all links that often (in milliseconds). <=0 disables monitoring */ 86 #define BOND_LINK_MON_INTERV 0 87 #define BOND_LINK_ARP_INTERV 0 88 89 static int max_bonds = BOND_DEFAULT_MAX_BONDS; 90 static int miimon = BOND_LINK_MON_INTERV; 91 static int updelay = 0; 92 static int downdelay = 0; 93 static int use_carrier = 1; 94 static char *mode = NULL; 95 static char *primary = NULL; 96 static char *lacp_rate = NULL; 97 static char *xmit_hash_policy = NULL; 98 static int arp_interval = BOND_LINK_ARP_INTERV; 99 static char *arp_ip_target[BOND_MAX_ARP_TARGETS] = { NULL, }; 100 struct bond_params bonding_defaults; 101 102 module_param(max_bonds, int, 0); 103 MODULE_PARM_DESC(max_bonds, "Max number of bonded devices"); 104 module_param(miimon, int, 0); 105 MODULE_PARM_DESC(miimon, "Link check interval in milliseconds"); 106 module_param(updelay, int, 0); 107 MODULE_PARM_DESC(updelay, "Delay before considering link up, in milliseconds"); 108 module_param(downdelay, int, 0); 109 MODULE_PARM_DESC(downdelay, "Delay before considering link down, " 110 "in milliseconds"); 111 module_param(use_carrier, int, 0); 112 MODULE_PARM_DESC(use_carrier, "Use netif_carrier_ok (vs MII ioctls) in miimon; " 113 "0 for off, 1 for on (default)"); 114 module_param(mode, charp, 0); 115 MODULE_PARM_DESC(mode, "Mode of operation : 0 for balance-rr, " 116 "1 for active-backup, 2 for balance-xor, " 117 "3 for broadcast, 4 for 802.3ad, 5 for balance-tlb, " 118 "6 for balance-alb"); 119 module_param(primary, charp, 0); 120 MODULE_PARM_DESC(primary, "Primary network device to use"); 121 module_param(lacp_rate, charp, 0); 122 MODULE_PARM_DESC(lacp_rate, "LACPDU tx rate to request from 802.3ad partner " 123 "(slow/fast)"); 124 module_param(xmit_hash_policy, charp, 0); 125 MODULE_PARM_DESC(xmit_hash_policy, "XOR hashing method: 0 for layer 2 (default)" 126 ", 1 for layer 3+4"); 127 module_param(arp_interval, int, 0); 128 MODULE_PARM_DESC(arp_interval, "arp interval in milliseconds"); 129 module_param_array(arp_ip_target, charp, NULL, 0); 130 MODULE_PARM_DESC(arp_ip_target, "arp targets in n.n.n.n form"); 131 132 /*----------------------------- Global variables ----------------------------*/ 133 134 static const char * const version = 135 DRV_DESCRIPTION ": v" DRV_VERSION " (" DRV_RELDATE ")\n"; 136 137 LIST_HEAD(bond_dev_list); 138 139 #ifdef CONFIG_PROC_FS 140 static struct proc_dir_entry *bond_proc_dir = NULL; 141 #endif 142 143 extern struct rw_semaphore bonding_rwsem; 144 static u32 arp_target[BOND_MAX_ARP_TARGETS] = { 0, } ; 145 static int arp_ip_count = 0; 146 static int bond_mode = BOND_MODE_ROUNDROBIN; 147 static int xmit_hashtype= BOND_XMIT_POLICY_LAYER2; 148 static int lacp_fast = 0; 149 150 151 struct bond_parm_tbl bond_lacp_tbl[] = { 152 { "slow", AD_LACP_SLOW}, 153 { "fast", AD_LACP_FAST}, 154 { NULL, -1}, 155 }; 156 157 struct bond_parm_tbl bond_mode_tbl[] = { 158 { "balance-rr", BOND_MODE_ROUNDROBIN}, 159 { "active-backup", BOND_MODE_ACTIVEBACKUP}, 160 { "balance-xor", BOND_MODE_XOR}, 161 { "broadcast", BOND_MODE_BROADCAST}, 162 { "802.3ad", BOND_MODE_8023AD}, 163 { "balance-tlb", BOND_MODE_TLB}, 164 { "balance-alb", BOND_MODE_ALB}, 165 { NULL, -1}, 166 }; 167 168 struct bond_parm_tbl xmit_hashtype_tbl[] = { 169 { "layer2", BOND_XMIT_POLICY_LAYER2}, 170 { "layer3+4", BOND_XMIT_POLICY_LAYER34}, 171 { NULL, -1}, 172 }; 173 174 /*-------------------------- Forward declarations ---------------------------*/ 175 176 static void bond_send_gratuitous_arp(struct bonding *bond); 177 178 /*---------------------------- General routines -----------------------------*/ 179 180 const char *bond_mode_name(int mode) 181 { 182 switch (mode) { 183 case BOND_MODE_ROUNDROBIN : 184 return "load balancing (round-robin)"; 185 case BOND_MODE_ACTIVEBACKUP : 186 return "fault-tolerance (active-backup)"; 187 case BOND_MODE_XOR : 188 return "load balancing (xor)"; 189 case BOND_MODE_BROADCAST : 190 return "fault-tolerance (broadcast)"; 191 case BOND_MODE_8023AD: 192 return "IEEE 802.3ad Dynamic link aggregation"; 193 case BOND_MODE_TLB: 194 return "transmit load balancing"; 195 case BOND_MODE_ALB: 196 return "adaptive load balancing"; 197 default: 198 return "unknown"; 199 } 200 } 201 202 /*---------------------------------- VLAN -----------------------------------*/ 203 204 /** 205 * bond_add_vlan - add a new vlan id on bond 206 * @bond: bond that got the notification 207 * @vlan_id: the vlan id to add 208 * 209 * Returns -ENOMEM if allocation failed. 210 */ 211 static int bond_add_vlan(struct bonding *bond, unsigned short vlan_id) 212 { 213 struct vlan_entry *vlan; 214 215 dprintk("bond: %s, vlan id %d\n", 216 (bond ? bond->dev->name: "None"), vlan_id); 217 218 vlan = kmalloc(sizeof(struct vlan_entry), GFP_KERNEL); 219 if (!vlan) { 220 return -ENOMEM; 221 } 222 223 INIT_LIST_HEAD(&vlan->vlan_list); 224 vlan->vlan_id = vlan_id; 225 vlan->vlan_ip = 0; 226 227 write_lock_bh(&bond->lock); 228 229 list_add_tail(&vlan->vlan_list, &bond->vlan_list); 230 231 write_unlock_bh(&bond->lock); 232 233 dprintk("added VLAN ID %d on bond %s\n", vlan_id, bond->dev->name); 234 235 return 0; 236 } 237 238 /** 239 * bond_del_vlan - delete a vlan id from bond 240 * @bond: bond that got the notification 241 * @vlan_id: the vlan id to delete 242 * 243 * returns -ENODEV if @vlan_id was not found in @bond. 244 */ 245 static int bond_del_vlan(struct bonding *bond, unsigned short vlan_id) 246 { 247 struct vlan_entry *vlan, *next; 248 int res = -ENODEV; 249 250 dprintk("bond: %s, vlan id %d\n", bond->dev->name, vlan_id); 251 252 write_lock_bh(&bond->lock); 253 254 list_for_each_entry_safe(vlan, next, &bond->vlan_list, vlan_list) { 255 if (vlan->vlan_id == vlan_id) { 256 list_del(&vlan->vlan_list); 257 258 if ((bond->params.mode == BOND_MODE_TLB) || 259 (bond->params.mode == BOND_MODE_ALB)) { 260 bond_alb_clear_vlan(bond, vlan_id); 261 } 262 263 dprintk("removed VLAN ID %d from bond %s\n", vlan_id, 264 bond->dev->name); 265 266 kfree(vlan); 267 268 if (list_empty(&bond->vlan_list) && 269 (bond->slave_cnt == 0)) { 270 /* Last VLAN removed and no slaves, so 271 * restore block on adding VLANs. This will 272 * be removed once new slaves that are not 273 * VLAN challenged will be added. 274 */ 275 bond->dev->features |= NETIF_F_VLAN_CHALLENGED; 276 } 277 278 res = 0; 279 goto out; 280 } 281 } 282 283 dprintk("couldn't find VLAN ID %d in bond %s\n", vlan_id, 284 bond->dev->name); 285 286 out: 287 write_unlock_bh(&bond->lock); 288 return res; 289 } 290 291 /** 292 * bond_has_challenged_slaves 293 * @bond: the bond we're working on 294 * 295 * Searches the slave list. Returns 1 if a vlan challenged slave 296 * was found, 0 otherwise. 297 * 298 * Assumes bond->lock is held. 299 */ 300 static int bond_has_challenged_slaves(struct bonding *bond) 301 { 302 struct slave *slave; 303 int i; 304 305 bond_for_each_slave(bond, slave, i) { 306 if (slave->dev->features & NETIF_F_VLAN_CHALLENGED) { 307 dprintk("found VLAN challenged slave - %s\n", 308 slave->dev->name); 309 return 1; 310 } 311 } 312 313 dprintk("no VLAN challenged slaves found\n"); 314 return 0; 315 } 316 317 /** 318 * bond_next_vlan - safely skip to the next item in the vlans list. 319 * @bond: the bond we're working on 320 * @curr: item we're advancing from 321 * 322 * Returns %NULL if list is empty, bond->next_vlan if @curr is %NULL, 323 * or @curr->next otherwise (even if it is @curr itself again). 324 * 325 * Caller must hold bond->lock 326 */ 327 struct vlan_entry *bond_next_vlan(struct bonding *bond, struct vlan_entry *curr) 328 { 329 struct vlan_entry *next, *last; 330 331 if (list_empty(&bond->vlan_list)) { 332 return NULL; 333 } 334 335 if (!curr) { 336 next = list_entry(bond->vlan_list.next, 337 struct vlan_entry, vlan_list); 338 } else { 339 last = list_entry(bond->vlan_list.prev, 340 struct vlan_entry, vlan_list); 341 if (last == curr) { 342 next = list_entry(bond->vlan_list.next, 343 struct vlan_entry, vlan_list); 344 } else { 345 next = list_entry(curr->vlan_list.next, 346 struct vlan_entry, vlan_list); 347 } 348 } 349 350 return next; 351 } 352 353 /** 354 * bond_dev_queue_xmit - Prepare skb for xmit. 355 * 356 * @bond: bond device that got this skb for tx. 357 * @skb: hw accel VLAN tagged skb to transmit 358 * @slave_dev: slave that is supposed to xmit this skbuff 359 * 360 * When the bond gets an skb to transmit that is 361 * already hardware accelerated VLAN tagged, and it 362 * needs to relay this skb to a slave that is not 363 * hw accel capable, the skb needs to be "unaccelerated", 364 * i.e. strip the hwaccel tag and re-insert it as part 365 * of the payload. 366 */ 367 int bond_dev_queue_xmit(struct bonding *bond, struct sk_buff *skb, struct net_device *slave_dev) 368 { 369 unsigned short vlan_id; 370 371 if (!list_empty(&bond->vlan_list) && 372 !(slave_dev->features & NETIF_F_HW_VLAN_TX) && 373 vlan_get_tag(skb, &vlan_id) == 0) { 374 skb->dev = slave_dev; 375 skb = vlan_put_tag(skb, vlan_id); 376 if (!skb) { 377 /* vlan_put_tag() frees the skb in case of error, 378 * so return success here so the calling functions 379 * won't attempt to free is again. 380 */ 381 return 0; 382 } 383 } else { 384 skb->dev = slave_dev; 385 } 386 387 skb->priority = 1; 388 dev_queue_xmit(skb); 389 390 return 0; 391 } 392 393 /* 394 * In the following 3 functions, bond_vlan_rx_register(), bond_vlan_rx_add_vid 395 * and bond_vlan_rx_kill_vid, We don't protect the slave list iteration with a 396 * lock because: 397 * a. This operation is performed in IOCTL context, 398 * b. The operation is protected by the RTNL semaphore in the 8021q code, 399 * c. Holding a lock with BH disabled while directly calling a base driver 400 * entry point is generally a BAD idea. 401 * 402 * The design of synchronization/protection for this operation in the 8021q 403 * module is good for one or more VLAN devices over a single physical device 404 * and cannot be extended for a teaming solution like bonding, so there is a 405 * potential race condition here where a net device from the vlan group might 406 * be referenced (either by a base driver or the 8021q code) while it is being 407 * removed from the system. However, it turns out we're not making matters 408 * worse, and if it works for regular VLAN usage it will work here too. 409 */ 410 411 /** 412 * bond_vlan_rx_register - Propagates registration to slaves 413 * @bond_dev: bonding net device that got called 414 * @grp: vlan group being registered 415 */ 416 static void bond_vlan_rx_register(struct net_device *bond_dev, struct vlan_group *grp) 417 { 418 struct bonding *bond = bond_dev->priv; 419 struct slave *slave; 420 int i; 421 422 bond->vlgrp = grp; 423 424 bond_for_each_slave(bond, slave, i) { 425 struct net_device *slave_dev = slave->dev; 426 427 if ((slave_dev->features & NETIF_F_HW_VLAN_RX) && 428 slave_dev->vlan_rx_register) { 429 slave_dev->vlan_rx_register(slave_dev, grp); 430 } 431 } 432 } 433 434 /** 435 * bond_vlan_rx_add_vid - Propagates adding an id to slaves 436 * @bond_dev: bonding net device that got called 437 * @vid: vlan id being added 438 */ 439 static void bond_vlan_rx_add_vid(struct net_device *bond_dev, uint16_t vid) 440 { 441 struct bonding *bond = bond_dev->priv; 442 struct slave *slave; 443 int i, res; 444 445 bond_for_each_slave(bond, slave, i) { 446 struct net_device *slave_dev = slave->dev; 447 448 if ((slave_dev->features & NETIF_F_HW_VLAN_FILTER) && 449 slave_dev->vlan_rx_add_vid) { 450 slave_dev->vlan_rx_add_vid(slave_dev, vid); 451 } 452 } 453 454 res = bond_add_vlan(bond, vid); 455 if (res) { 456 printk(KERN_ERR DRV_NAME 457 ": %s: Error: Failed to add vlan id %d\n", 458 bond_dev->name, vid); 459 } 460 } 461 462 /** 463 * bond_vlan_rx_kill_vid - Propagates deleting an id to slaves 464 * @bond_dev: bonding net device that got called 465 * @vid: vlan id being removed 466 */ 467 static void bond_vlan_rx_kill_vid(struct net_device *bond_dev, uint16_t vid) 468 { 469 struct bonding *bond = bond_dev->priv; 470 struct slave *slave; 471 struct net_device *vlan_dev; 472 int i, res; 473 474 bond_for_each_slave(bond, slave, i) { 475 struct net_device *slave_dev = slave->dev; 476 477 if ((slave_dev->features & NETIF_F_HW_VLAN_FILTER) && 478 slave_dev->vlan_rx_kill_vid) { 479 /* Save and then restore vlan_dev in the grp array, 480 * since the slave's driver might clear it. 481 */ 482 vlan_dev = bond->vlgrp->vlan_devices[vid]; 483 slave_dev->vlan_rx_kill_vid(slave_dev, vid); 484 bond->vlgrp->vlan_devices[vid] = vlan_dev; 485 } 486 } 487 488 res = bond_del_vlan(bond, vid); 489 if (res) { 490 printk(KERN_ERR DRV_NAME 491 ": %s: Error: Failed to remove vlan id %d\n", 492 bond_dev->name, vid); 493 } 494 } 495 496 static void bond_add_vlans_on_slave(struct bonding *bond, struct net_device *slave_dev) 497 { 498 struct vlan_entry *vlan; 499 500 write_lock_bh(&bond->lock); 501 502 if (list_empty(&bond->vlan_list)) { 503 goto out; 504 } 505 506 if ((slave_dev->features & NETIF_F_HW_VLAN_RX) && 507 slave_dev->vlan_rx_register) { 508 slave_dev->vlan_rx_register(slave_dev, bond->vlgrp); 509 } 510 511 if (!(slave_dev->features & NETIF_F_HW_VLAN_FILTER) || 512 !(slave_dev->vlan_rx_add_vid)) { 513 goto out; 514 } 515 516 list_for_each_entry(vlan, &bond->vlan_list, vlan_list) { 517 slave_dev->vlan_rx_add_vid(slave_dev, vlan->vlan_id); 518 } 519 520 out: 521 write_unlock_bh(&bond->lock); 522 } 523 524 static void bond_del_vlans_from_slave(struct bonding *bond, struct net_device *slave_dev) 525 { 526 struct vlan_entry *vlan; 527 struct net_device *vlan_dev; 528 529 write_lock_bh(&bond->lock); 530 531 if (list_empty(&bond->vlan_list)) { 532 goto out; 533 } 534 535 if (!(slave_dev->features & NETIF_F_HW_VLAN_FILTER) || 536 !(slave_dev->vlan_rx_kill_vid)) { 537 goto unreg; 538 } 539 540 list_for_each_entry(vlan, &bond->vlan_list, vlan_list) { 541 /* Save and then restore vlan_dev in the grp array, 542 * since the slave's driver might clear it. 543 */ 544 vlan_dev = bond->vlgrp->vlan_devices[vlan->vlan_id]; 545 slave_dev->vlan_rx_kill_vid(slave_dev, vlan->vlan_id); 546 bond->vlgrp->vlan_devices[vlan->vlan_id] = vlan_dev; 547 } 548 549 unreg: 550 if ((slave_dev->features & NETIF_F_HW_VLAN_RX) && 551 slave_dev->vlan_rx_register) { 552 slave_dev->vlan_rx_register(slave_dev, NULL); 553 } 554 555 out: 556 write_unlock_bh(&bond->lock); 557 } 558 559 /*------------------------------- Link status -------------------------------*/ 560 561 /* 562 * Set the carrier state for the master according to the state of its 563 * slaves. If any slaves are up, the master is up. In 802.3ad mode, 564 * do special 802.3ad magic. 565 * 566 * Returns zero if carrier state does not change, nonzero if it does. 567 */ 568 static int bond_set_carrier(struct bonding *bond) 569 { 570 struct slave *slave; 571 int i; 572 573 if (bond->slave_cnt == 0) 574 goto down; 575 576 if (bond->params.mode == BOND_MODE_8023AD) 577 return bond_3ad_set_carrier(bond); 578 579 bond_for_each_slave(bond, slave, i) { 580 if (slave->link == BOND_LINK_UP) { 581 if (!netif_carrier_ok(bond->dev)) { 582 netif_carrier_on(bond->dev); 583 return 1; 584 } 585 return 0; 586 } 587 } 588 589 down: 590 if (netif_carrier_ok(bond->dev)) { 591 netif_carrier_off(bond->dev); 592 return 1; 593 } 594 return 0; 595 } 596 597 /* 598 * Get link speed and duplex from the slave's base driver 599 * using ethtool. If for some reason the call fails or the 600 * values are invalid, fake speed and duplex to 100/Full 601 * and return error. 602 */ 603 static int bond_update_speed_duplex(struct slave *slave) 604 { 605 struct net_device *slave_dev = slave->dev; 606 static int (* ioctl)(struct net_device *, struct ifreq *, int); 607 struct ifreq ifr; 608 struct ethtool_cmd etool; 609 610 /* Fake speed and duplex */ 611 slave->speed = SPEED_100; 612 slave->duplex = DUPLEX_FULL; 613 614 if (slave_dev->ethtool_ops) { 615 int res; 616 617 if (!slave_dev->ethtool_ops->get_settings) { 618 return -1; 619 } 620 621 res = slave_dev->ethtool_ops->get_settings(slave_dev, &etool); 622 if (res < 0) { 623 return -1; 624 } 625 626 goto verify; 627 } 628 629 ioctl = slave_dev->do_ioctl; 630 strncpy(ifr.ifr_name, slave_dev->name, IFNAMSIZ); 631 etool.cmd = ETHTOOL_GSET; 632 ifr.ifr_data = (char*)&etool; 633 if (!ioctl || (IOCTL(slave_dev, &ifr, SIOCETHTOOL) < 0)) { 634 return -1; 635 } 636 637 verify: 638 switch (etool.speed) { 639 case SPEED_10: 640 case SPEED_100: 641 case SPEED_1000: 642 break; 643 default: 644 return -1; 645 } 646 647 switch (etool.duplex) { 648 case DUPLEX_FULL: 649 case DUPLEX_HALF: 650 break; 651 default: 652 return -1; 653 } 654 655 slave->speed = etool.speed; 656 slave->duplex = etool.duplex; 657 658 return 0; 659 } 660 661 /* 662 * if <dev> supports MII link status reporting, check its link status. 663 * 664 * We either do MII/ETHTOOL ioctls, or check netif_carrier_ok(), 665 * depening upon the setting of the use_carrier parameter. 666 * 667 * Return either BMSR_LSTATUS, meaning that the link is up (or we 668 * can't tell and just pretend it is), or 0, meaning that the link is 669 * down. 670 * 671 * If reporting is non-zero, instead of faking link up, return -1 if 672 * both ETHTOOL and MII ioctls fail (meaning the device does not 673 * support them). If use_carrier is set, return whatever it says. 674 * It'd be nice if there was a good way to tell if a driver supports 675 * netif_carrier, but there really isn't. 676 */ 677 static int bond_check_dev_link(struct bonding *bond, struct net_device *slave_dev, int reporting) 678 { 679 static int (* ioctl)(struct net_device *, struct ifreq *, int); 680 struct ifreq ifr; 681 struct mii_ioctl_data *mii; 682 struct ethtool_value etool; 683 684 if (bond->params.use_carrier) { 685 return netif_carrier_ok(slave_dev) ? BMSR_LSTATUS : 0; 686 } 687 688 ioctl = slave_dev->do_ioctl; 689 if (ioctl) { 690 /* TODO: set pointer to correct ioctl on a per team member */ 691 /* bases to make this more efficient. that is, once */ 692 /* we determine the correct ioctl, we will always */ 693 /* call it and not the others for that team */ 694 /* member. */ 695 696 /* 697 * We cannot assume that SIOCGMIIPHY will also read a 698 * register; not all network drivers (e.g., e100) 699 * support that. 700 */ 701 702 /* Yes, the mii is overlaid on the ifreq.ifr_ifru */ 703 strncpy(ifr.ifr_name, slave_dev->name, IFNAMSIZ); 704 mii = if_mii(&ifr); 705 if (IOCTL(slave_dev, &ifr, SIOCGMIIPHY) == 0) { 706 mii->reg_num = MII_BMSR; 707 if (IOCTL(slave_dev, &ifr, SIOCGMIIREG) == 0) { 708 return (mii->val_out & BMSR_LSTATUS); 709 } 710 } 711 } 712 713 /* try SIOCETHTOOL ioctl, some drivers cache ETHTOOL_GLINK */ 714 /* for a period of time so we attempt to get link status */ 715 /* from it last if the above MII ioctls fail... */ 716 if (slave_dev->ethtool_ops) { 717 if (slave_dev->ethtool_ops->get_link) { 718 u32 link; 719 720 link = slave_dev->ethtool_ops->get_link(slave_dev); 721 722 return link ? BMSR_LSTATUS : 0; 723 } 724 } 725 726 if (ioctl) { 727 strncpy(ifr.ifr_name, slave_dev->name, IFNAMSIZ); 728 etool.cmd = ETHTOOL_GLINK; 729 ifr.ifr_data = (char*)&etool; 730 if (IOCTL(slave_dev, &ifr, SIOCETHTOOL) == 0) { 731 if (etool.data == 1) { 732 return BMSR_LSTATUS; 733 } else { 734 dprintk("SIOCETHTOOL shows link down\n"); 735 return 0; 736 } 737 } 738 } 739 740 /* 741 * If reporting, report that either there's no dev->do_ioctl, 742 * or both SIOCGMIIREG and SIOCETHTOOL failed (meaning that we 743 * cannot report link status). If not reporting, pretend 744 * we're ok. 745 */ 746 return (reporting ? -1 : BMSR_LSTATUS); 747 } 748 749 /*----------------------------- Multicast list ------------------------------*/ 750 751 /* 752 * Returns 0 if dmi1 and dmi2 are the same, non-0 otherwise 753 */ 754 static inline int bond_is_dmi_same(struct dev_mc_list *dmi1, struct dev_mc_list *dmi2) 755 { 756 return memcmp(dmi1->dmi_addr, dmi2->dmi_addr, dmi1->dmi_addrlen) == 0 && 757 dmi1->dmi_addrlen == dmi2->dmi_addrlen; 758 } 759 760 /* 761 * returns dmi entry if found, NULL otherwise 762 */ 763 static struct dev_mc_list *bond_mc_list_find_dmi(struct dev_mc_list *dmi, struct dev_mc_list *mc_list) 764 { 765 struct dev_mc_list *idmi; 766 767 for (idmi = mc_list; idmi; idmi = idmi->next) { 768 if (bond_is_dmi_same(dmi, idmi)) { 769 return idmi; 770 } 771 } 772 773 return NULL; 774 } 775 776 /* 777 * Push the promiscuity flag down to appropriate slaves 778 */ 779 static void bond_set_promiscuity(struct bonding *bond, int inc) 780 { 781 if (USES_PRIMARY(bond->params.mode)) { 782 /* write lock already acquired */ 783 if (bond->curr_active_slave) { 784 dev_set_promiscuity(bond->curr_active_slave->dev, inc); 785 } 786 } else { 787 struct slave *slave; 788 int i; 789 bond_for_each_slave(bond, slave, i) { 790 dev_set_promiscuity(slave->dev, inc); 791 } 792 } 793 } 794 795 /* 796 * Push the allmulti flag down to all slaves 797 */ 798 static void bond_set_allmulti(struct bonding *bond, int inc) 799 { 800 if (USES_PRIMARY(bond->params.mode)) { 801 /* write lock already acquired */ 802 if (bond->curr_active_slave) { 803 dev_set_allmulti(bond->curr_active_slave->dev, inc); 804 } 805 } else { 806 struct slave *slave; 807 int i; 808 bond_for_each_slave(bond, slave, i) { 809 dev_set_allmulti(slave->dev, inc); 810 } 811 } 812 } 813 814 /* 815 * Add a Multicast address to slaves 816 * according to mode 817 */ 818 static void bond_mc_add(struct bonding *bond, void *addr, int alen) 819 { 820 if (USES_PRIMARY(bond->params.mode)) { 821 /* write lock already acquired */ 822 if (bond->curr_active_slave) { 823 dev_mc_add(bond->curr_active_slave->dev, addr, alen, 0); 824 } 825 } else { 826 struct slave *slave; 827 int i; 828 bond_for_each_slave(bond, slave, i) { 829 dev_mc_add(slave->dev, addr, alen, 0); 830 } 831 } 832 } 833 834 /* 835 * Remove a multicast address from slave 836 * according to mode 837 */ 838 static void bond_mc_delete(struct bonding *bond, void *addr, int alen) 839 { 840 if (USES_PRIMARY(bond->params.mode)) { 841 /* write lock already acquired */ 842 if (bond->curr_active_slave) { 843 dev_mc_delete(bond->curr_active_slave->dev, addr, alen, 0); 844 } 845 } else { 846 struct slave *slave; 847 int i; 848 bond_for_each_slave(bond, slave, i) { 849 dev_mc_delete(slave->dev, addr, alen, 0); 850 } 851 } 852 } 853 854 /* 855 * Totally destroys the mc_list in bond 856 */ 857 static void bond_mc_list_destroy(struct bonding *bond) 858 { 859 struct dev_mc_list *dmi; 860 861 dmi = bond->mc_list; 862 while (dmi) { 863 bond->mc_list = dmi->next; 864 kfree(dmi); 865 dmi = bond->mc_list; 866 } 867 } 868 869 /* 870 * Copy all the Multicast addresses from src to the bonding device dst 871 */ 872 static int bond_mc_list_copy(struct dev_mc_list *mc_list, struct bonding *bond, 873 gfp_t gfp_flag) 874 { 875 struct dev_mc_list *dmi, *new_dmi; 876 877 for (dmi = mc_list; dmi; dmi = dmi->next) { 878 new_dmi = kmalloc(sizeof(struct dev_mc_list), gfp_flag); 879 880 if (!new_dmi) { 881 /* FIXME: Potential memory leak !!! */ 882 return -ENOMEM; 883 } 884 885 new_dmi->next = bond->mc_list; 886 bond->mc_list = new_dmi; 887 new_dmi->dmi_addrlen = dmi->dmi_addrlen; 888 memcpy(new_dmi->dmi_addr, dmi->dmi_addr, dmi->dmi_addrlen); 889 new_dmi->dmi_users = dmi->dmi_users; 890 new_dmi->dmi_gusers = dmi->dmi_gusers; 891 } 892 893 return 0; 894 } 895 896 /* 897 * flush all members of flush->mc_list from device dev->mc_list 898 */ 899 static void bond_mc_list_flush(struct net_device *bond_dev, struct net_device *slave_dev) 900 { 901 struct bonding *bond = bond_dev->priv; 902 struct dev_mc_list *dmi; 903 904 for (dmi = bond_dev->mc_list; dmi; dmi = dmi->next) { 905 dev_mc_delete(slave_dev, dmi->dmi_addr, dmi->dmi_addrlen, 0); 906 } 907 908 if (bond->params.mode == BOND_MODE_8023AD) { 909 /* del lacpdu mc addr from mc list */ 910 u8 lacpdu_multicast[ETH_ALEN] = MULTICAST_LACPDU_ADDR; 911 912 dev_mc_delete(slave_dev, lacpdu_multicast, ETH_ALEN, 0); 913 } 914 } 915 916 /*--------------------------- Active slave change ---------------------------*/ 917 918 /* 919 * Update the mc list and multicast-related flags for the new and 920 * old active slaves (if any) according to the multicast mode, and 921 * promiscuous flags unconditionally. 922 */ 923 static void bond_mc_swap(struct bonding *bond, struct slave *new_active, struct slave *old_active) 924 { 925 struct dev_mc_list *dmi; 926 927 if (!USES_PRIMARY(bond->params.mode)) { 928 /* nothing to do - mc list is already up-to-date on 929 * all slaves 930 */ 931 return; 932 } 933 934 if (old_active) { 935 if (bond->dev->flags & IFF_PROMISC) { 936 dev_set_promiscuity(old_active->dev, -1); 937 } 938 939 if (bond->dev->flags & IFF_ALLMULTI) { 940 dev_set_allmulti(old_active->dev, -1); 941 } 942 943 for (dmi = bond->dev->mc_list; dmi; dmi = dmi->next) { 944 dev_mc_delete(old_active->dev, dmi->dmi_addr, dmi->dmi_addrlen, 0); 945 } 946 } 947 948 if (new_active) { 949 if (bond->dev->flags & IFF_PROMISC) { 950 dev_set_promiscuity(new_active->dev, 1); 951 } 952 953 if (bond->dev->flags & IFF_ALLMULTI) { 954 dev_set_allmulti(new_active->dev, 1); 955 } 956 957 for (dmi = bond->dev->mc_list; dmi; dmi = dmi->next) { 958 dev_mc_add(new_active->dev, dmi->dmi_addr, dmi->dmi_addrlen, 0); 959 } 960 } 961 } 962 963 /** 964 * find_best_interface - select the best available slave to be the active one 965 * @bond: our bonding struct 966 * 967 * Warning: Caller must hold curr_slave_lock for writing. 968 */ 969 static struct slave *bond_find_best_slave(struct bonding *bond) 970 { 971 struct slave *new_active, *old_active; 972 struct slave *bestslave = NULL; 973 int mintime = bond->params.updelay; 974 int i; 975 976 new_active = old_active = bond->curr_active_slave; 977 978 if (!new_active) { /* there were no active slaves left */ 979 if (bond->slave_cnt > 0) { /* found one slave */ 980 new_active = bond->first_slave; 981 } else { 982 return NULL; /* still no slave, return NULL */ 983 } 984 } 985 986 /* first try the primary link; if arping, a link must tx/rx traffic 987 * before it can be considered the curr_active_slave - also, we would skip 988 * slaves between the curr_active_slave and primary_slave that may be up 989 * and able to arp 990 */ 991 if ((bond->primary_slave) && 992 (!bond->params.arp_interval) && 993 (IS_UP(bond->primary_slave->dev))) { 994 new_active = bond->primary_slave; 995 } 996 997 /* remember where to stop iterating over the slaves */ 998 old_active = new_active; 999 1000 bond_for_each_slave_from(bond, new_active, i, old_active) { 1001 if (IS_UP(new_active->dev)) { 1002 if (new_active->link == BOND_LINK_UP) { 1003 return new_active; 1004 } else if (new_active->link == BOND_LINK_BACK) { 1005 /* link up, but waiting for stabilization */ 1006 if (new_active->delay < mintime) { 1007 mintime = new_active->delay; 1008 bestslave = new_active; 1009 } 1010 } 1011 } 1012 } 1013 1014 return bestslave; 1015 } 1016 1017 /** 1018 * change_active_interface - change the active slave into the specified one 1019 * @bond: our bonding struct 1020 * @new: the new slave to make the active one 1021 * 1022 * Set the new slave to the bond's settings and unset them on the old 1023 * curr_active_slave. 1024 * Setting include flags, mc-list, promiscuity, allmulti, etc. 1025 * 1026 * If @new's link state is %BOND_LINK_BACK we'll set it to %BOND_LINK_UP, 1027 * because it is apparently the best available slave we have, even though its 1028 * updelay hasn't timed out yet. 1029 * 1030 * Warning: Caller must hold curr_slave_lock for writing. 1031 */ 1032 void bond_change_active_slave(struct bonding *bond, struct slave *new_active) 1033 { 1034 struct slave *old_active = bond->curr_active_slave; 1035 1036 if (old_active == new_active) { 1037 return; 1038 } 1039 1040 if (new_active) { 1041 if (new_active->link == BOND_LINK_BACK) { 1042 if (USES_PRIMARY(bond->params.mode)) { 1043 printk(KERN_INFO DRV_NAME 1044 ": %s: making interface %s the new " 1045 "active one %d ms earlier.\n", 1046 bond->dev->name, new_active->dev->name, 1047 (bond->params.updelay - new_active->delay) * bond->params.miimon); 1048 } 1049 1050 new_active->delay = 0; 1051 new_active->link = BOND_LINK_UP; 1052 new_active->jiffies = jiffies; 1053 1054 if (bond->params.mode == BOND_MODE_8023AD) { 1055 bond_3ad_handle_link_change(new_active, BOND_LINK_UP); 1056 } 1057 1058 if ((bond->params.mode == BOND_MODE_TLB) || 1059 (bond->params.mode == BOND_MODE_ALB)) { 1060 bond_alb_handle_link_change(bond, new_active, BOND_LINK_UP); 1061 } 1062 } else { 1063 if (USES_PRIMARY(bond->params.mode)) { 1064 printk(KERN_INFO DRV_NAME 1065 ": %s: making interface %s the new " 1066 "active one.\n", 1067 bond->dev->name, new_active->dev->name); 1068 } 1069 } 1070 } 1071 1072 if (USES_PRIMARY(bond->params.mode)) { 1073 bond_mc_swap(bond, new_active, old_active); 1074 } 1075 1076 if ((bond->params.mode == BOND_MODE_TLB) || 1077 (bond->params.mode == BOND_MODE_ALB)) { 1078 bond_alb_handle_active_change(bond, new_active); 1079 if (old_active) 1080 bond_set_slave_inactive_flags(old_active); 1081 if (new_active) 1082 bond_set_slave_active_flags(new_active); 1083 } else { 1084 bond->curr_active_slave = new_active; 1085 } 1086 1087 if (bond->params.mode == BOND_MODE_ACTIVEBACKUP) { 1088 if (old_active) { 1089 bond_set_slave_inactive_flags(old_active); 1090 } 1091 1092 if (new_active) { 1093 bond_set_slave_active_flags(new_active); 1094 } 1095 bond_send_gratuitous_arp(bond); 1096 } 1097 } 1098 1099 /** 1100 * bond_select_active_slave - select a new active slave, if needed 1101 * @bond: our bonding struct 1102 * 1103 * This functions shoud be called when one of the following occurs: 1104 * - The old curr_active_slave has been released or lost its link. 1105 * - The primary_slave has got its link back. 1106 * - A slave has got its link back and there's no old curr_active_slave. 1107 * 1108 * Warning: Caller must hold curr_slave_lock for writing. 1109 */ 1110 void bond_select_active_slave(struct bonding *bond) 1111 { 1112 struct slave *best_slave; 1113 int rv; 1114 1115 best_slave = bond_find_best_slave(bond); 1116 if (best_slave != bond->curr_active_slave) { 1117 bond_change_active_slave(bond, best_slave); 1118 rv = bond_set_carrier(bond); 1119 if (!rv) 1120 return; 1121 1122 if (netif_carrier_ok(bond->dev)) { 1123 printk(KERN_INFO DRV_NAME 1124 ": %s: first active interface up!\n", 1125 bond->dev->name); 1126 } else { 1127 printk(KERN_INFO DRV_NAME ": %s: " 1128 "now running without any active interface !\n", 1129 bond->dev->name); 1130 } 1131 } 1132 } 1133 1134 /*--------------------------- slave list handling ---------------------------*/ 1135 1136 /* 1137 * This function attaches the slave to the end of list. 1138 * 1139 * bond->lock held for writing by caller. 1140 */ 1141 static void bond_attach_slave(struct bonding *bond, struct slave *new_slave) 1142 { 1143 if (bond->first_slave == NULL) { /* attaching the first slave */ 1144 new_slave->next = new_slave; 1145 new_slave->prev = new_slave; 1146 bond->first_slave = new_slave; 1147 } else { 1148 new_slave->next = bond->first_slave; 1149 new_slave->prev = bond->first_slave->prev; 1150 new_slave->next->prev = new_slave; 1151 new_slave->prev->next = new_slave; 1152 } 1153 1154 bond->slave_cnt++; 1155 } 1156 1157 /* 1158 * This function detaches the slave from the list. 1159 * WARNING: no check is made to verify if the slave effectively 1160 * belongs to <bond>. 1161 * Nothing is freed on return, structures are just unchained. 1162 * If any slave pointer in bond was pointing to <slave>, 1163 * it should be changed by the calling function. 1164 * 1165 * bond->lock held for writing by caller. 1166 */ 1167 static void bond_detach_slave(struct bonding *bond, struct slave *slave) 1168 { 1169 if (slave->next) { 1170 slave->next->prev = slave->prev; 1171 } 1172 1173 if (slave->prev) { 1174 slave->prev->next = slave->next; 1175 } 1176 1177 if (bond->first_slave == slave) { /* slave is the first slave */ 1178 if (bond->slave_cnt > 1) { /* there are more slave */ 1179 bond->first_slave = slave->next; 1180 } else { 1181 bond->first_slave = NULL; /* slave was the last one */ 1182 } 1183 } 1184 1185 slave->next = NULL; 1186 slave->prev = NULL; 1187 bond->slave_cnt--; 1188 } 1189 1190 /*---------------------------------- IOCTL ----------------------------------*/ 1191 1192 int bond_sethwaddr(struct net_device *bond_dev, struct net_device *slave_dev) 1193 { 1194 dprintk("bond_dev=%p\n", bond_dev); 1195 dprintk("slave_dev=%p\n", slave_dev); 1196 dprintk("slave_dev->addr_len=%d\n", slave_dev->addr_len); 1197 memcpy(bond_dev->dev_addr, slave_dev->dev_addr, slave_dev->addr_len); 1198 return 0; 1199 } 1200 1201 #define BOND_INTERSECT_FEATURES \ 1202 (NETIF_F_SG|NETIF_F_IP_CSUM|NETIF_F_NO_CSUM|NETIF_F_HW_CSUM|\ 1203 NETIF_F_TSO|NETIF_F_UFO) 1204 1205 /* 1206 * Compute the common dev->feature set available to all slaves. Some 1207 * feature bits are managed elsewhere, so preserve feature bits set on 1208 * master device that are not part of the examined set. 1209 */ 1210 static int bond_compute_features(struct bonding *bond) 1211 { 1212 unsigned long features = BOND_INTERSECT_FEATURES; 1213 struct slave *slave; 1214 struct net_device *bond_dev = bond->dev; 1215 int i; 1216 1217 bond_for_each_slave(bond, slave, i) 1218 features &= (slave->dev->features & BOND_INTERSECT_FEATURES); 1219 1220 if ((features & NETIF_F_SG) && 1221 !(features & (NETIF_F_IP_CSUM | 1222 NETIF_F_NO_CSUM | 1223 NETIF_F_HW_CSUM))) 1224 features &= ~NETIF_F_SG; 1225 1226 /* 1227 * features will include NETIF_F_TSO (NETIF_F_UFO) iff all 1228 * slave devices support NETIF_F_TSO (NETIF_F_UFO), which 1229 * implies that all slaves also support scatter-gather 1230 * (NETIF_F_SG), which implies that features also includes 1231 * NETIF_F_SG. So no need to check whether we have an 1232 * illegal combination of NETIF_F_{TSO,UFO} and 1233 * !NETIF_F_SG 1234 */ 1235 1236 features |= (bond_dev->features & ~BOND_INTERSECT_FEATURES); 1237 bond_dev->features = features; 1238 1239 return 0; 1240 } 1241 1242 /* enslave device <slave> to bond device <master> */ 1243 int bond_enslave(struct net_device *bond_dev, struct net_device *slave_dev) 1244 { 1245 struct bonding *bond = bond_dev->priv; 1246 struct slave *new_slave = NULL; 1247 struct dev_mc_list *dmi; 1248 struct sockaddr addr; 1249 int link_reporting; 1250 int old_features = bond_dev->features; 1251 int res = 0; 1252 1253 if (!bond->params.use_carrier && slave_dev->ethtool_ops == NULL && 1254 slave_dev->do_ioctl == NULL) { 1255 printk(KERN_WARNING DRV_NAME 1256 ": %s: Warning: no link monitoring support for %s\n", 1257 bond_dev->name, slave_dev->name); 1258 } 1259 1260 /* bond must be initialized by bond_open() before enslaving */ 1261 if (!(bond_dev->flags & IFF_UP)) { 1262 dprintk("Error, master_dev is not up\n"); 1263 return -EPERM; 1264 } 1265 1266 /* already enslaved */ 1267 if (slave_dev->flags & IFF_SLAVE) { 1268 dprintk("Error, Device was already enslaved\n"); 1269 return -EBUSY; 1270 } 1271 1272 /* vlan challenged mutual exclusion */ 1273 /* no need to lock since we're protected by rtnl_lock */ 1274 if (slave_dev->features & NETIF_F_VLAN_CHALLENGED) { 1275 dprintk("%s: NETIF_F_VLAN_CHALLENGED\n", slave_dev->name); 1276 if (!list_empty(&bond->vlan_list)) { 1277 printk(KERN_ERR DRV_NAME 1278 ": %s: Error: cannot enslave VLAN " 1279 "challenged slave %s on VLAN enabled " 1280 "bond %s\n", bond_dev->name, slave_dev->name, 1281 bond_dev->name); 1282 return -EPERM; 1283 } else { 1284 printk(KERN_WARNING DRV_NAME 1285 ": %s: Warning: enslaved VLAN challenged " 1286 "slave %s. Adding VLANs will be blocked as " 1287 "long as %s is part of bond %s\n", 1288 bond_dev->name, slave_dev->name, slave_dev->name, 1289 bond_dev->name); 1290 bond_dev->features |= NETIF_F_VLAN_CHALLENGED; 1291 } 1292 } else { 1293 dprintk("%s: ! NETIF_F_VLAN_CHALLENGED\n", slave_dev->name); 1294 if (bond->slave_cnt == 0) { 1295 /* First slave, and it is not VLAN challenged, 1296 * so remove the block of adding VLANs over the bond. 1297 */ 1298 bond_dev->features &= ~NETIF_F_VLAN_CHALLENGED; 1299 } 1300 } 1301 1302 /* 1303 * Old ifenslave binaries are no longer supported. These can 1304 * be identified with moderate accurary by the state of the slave: 1305 * the current ifenslave will set the interface down prior to 1306 * enslaving it; the old ifenslave will not. 1307 */ 1308 if ((slave_dev->flags & IFF_UP)) { 1309 printk(KERN_ERR DRV_NAME ": %s is up. " 1310 "This may be due to an out of date ifenslave.\n", 1311 slave_dev->name); 1312 res = -EPERM; 1313 goto err_undo_flags; 1314 } 1315 1316 if (slave_dev->set_mac_address == NULL) { 1317 printk(KERN_ERR DRV_NAME 1318 ": %s: Error: The slave device you specified does " 1319 "not support setting the MAC address. " 1320 "Your kernel likely does not support slave " 1321 "devices.\n", bond_dev->name); 1322 res = -EOPNOTSUPP; 1323 goto err_undo_flags; 1324 } 1325 1326 new_slave = kmalloc(sizeof(struct slave), GFP_KERNEL); 1327 if (!new_slave) { 1328 res = -ENOMEM; 1329 goto err_undo_flags; 1330 } 1331 1332 memset(new_slave, 0, sizeof(struct slave)); 1333 1334 /* save slave's original flags before calling 1335 * netdev_set_master and dev_open 1336 */ 1337 new_slave->original_flags = slave_dev->flags; 1338 1339 /* 1340 * Save slave's original ("permanent") mac address for modes 1341 * that need it, and for restoring it upon release, and then 1342 * set it to the master's address 1343 */ 1344 memcpy(new_slave->perm_hwaddr, slave_dev->dev_addr, ETH_ALEN); 1345 1346 /* 1347 * Set slave to master's mac address. The application already 1348 * set the master's mac address to that of the first slave 1349 */ 1350 memcpy(addr.sa_data, bond_dev->dev_addr, bond_dev->addr_len); 1351 addr.sa_family = slave_dev->type; 1352 res = dev_set_mac_address(slave_dev, &addr); 1353 if (res) { 1354 dprintk("Error %d calling set_mac_address\n", res); 1355 goto err_free; 1356 } 1357 1358 /* open the slave since the application closed it */ 1359 res = dev_open(slave_dev); 1360 if (res) { 1361 dprintk("Openning slave %s failed\n", slave_dev->name); 1362 goto err_restore_mac; 1363 } 1364 1365 res = netdev_set_master(slave_dev, bond_dev); 1366 if (res) { 1367 dprintk("Error %d calling netdev_set_master\n", res); 1368 goto err_close; 1369 } 1370 1371 new_slave->dev = slave_dev; 1372 1373 if ((bond->params.mode == BOND_MODE_TLB) || 1374 (bond->params.mode == BOND_MODE_ALB)) { 1375 /* bond_alb_init_slave() must be called before all other stages since 1376 * it might fail and we do not want to have to undo everything 1377 */ 1378 res = bond_alb_init_slave(bond, new_slave); 1379 if (res) { 1380 goto err_unset_master; 1381 } 1382 } 1383 1384 /* If the mode USES_PRIMARY, then the new slave gets the 1385 * master's promisc (and mc) settings only if it becomes the 1386 * curr_active_slave, and that is taken care of later when calling 1387 * bond_change_active() 1388 */ 1389 if (!USES_PRIMARY(bond->params.mode)) { 1390 /* set promiscuity level to new slave */ 1391 if (bond_dev->flags & IFF_PROMISC) { 1392 dev_set_promiscuity(slave_dev, 1); 1393 } 1394 1395 /* set allmulti level to new slave */ 1396 if (bond_dev->flags & IFF_ALLMULTI) { 1397 dev_set_allmulti(slave_dev, 1); 1398 } 1399 1400 /* upload master's mc_list to new slave */ 1401 for (dmi = bond_dev->mc_list; dmi; dmi = dmi->next) { 1402 dev_mc_add (slave_dev, dmi->dmi_addr, dmi->dmi_addrlen, 0); 1403 } 1404 } 1405 1406 if (bond->params.mode == BOND_MODE_8023AD) { 1407 /* add lacpdu mc addr to mc list */ 1408 u8 lacpdu_multicast[ETH_ALEN] = MULTICAST_LACPDU_ADDR; 1409 1410 dev_mc_add(slave_dev, lacpdu_multicast, ETH_ALEN, 0); 1411 } 1412 1413 bond_add_vlans_on_slave(bond, slave_dev); 1414 1415 write_lock_bh(&bond->lock); 1416 1417 bond_attach_slave(bond, new_slave); 1418 1419 new_slave->delay = 0; 1420 new_slave->link_failure_count = 0; 1421 1422 bond_compute_features(bond); 1423 1424 if (bond->params.miimon && !bond->params.use_carrier) { 1425 link_reporting = bond_check_dev_link(bond, slave_dev, 1); 1426 1427 if ((link_reporting == -1) && !bond->params.arp_interval) { 1428 /* 1429 * miimon is set but a bonded network driver 1430 * does not support ETHTOOL/MII and 1431 * arp_interval is not set. Note: if 1432 * use_carrier is enabled, we will never go 1433 * here (because netif_carrier is always 1434 * supported); thus, we don't need to change 1435 * the messages for netif_carrier. 1436 */ 1437 printk(KERN_WARNING DRV_NAME 1438 ": %s: Warning: MII and ETHTOOL support not " 1439 "available for interface %s, and " 1440 "arp_interval/arp_ip_target module parameters " 1441 "not specified, thus bonding will not detect " 1442 "link failures! see bonding.txt for details.\n", 1443 bond_dev->name, slave_dev->name); 1444 } else if (link_reporting == -1) { 1445 /* unable get link status using mii/ethtool */ 1446 printk(KERN_WARNING DRV_NAME 1447 ": %s: Warning: can't get link status from " 1448 "interface %s; the network driver associated " 1449 "with this interface does not support MII or " 1450 "ETHTOOL link status reporting, thus miimon " 1451 "has no effect on this interface.\n", 1452 bond_dev->name, slave_dev->name); 1453 } 1454 } 1455 1456 /* check for initial state */ 1457 if (!bond->params.miimon || 1458 (bond_check_dev_link(bond, slave_dev, 0) == BMSR_LSTATUS)) { 1459 if (bond->params.updelay) { 1460 dprintk("Initial state of slave_dev is " 1461 "BOND_LINK_BACK\n"); 1462 new_slave->link = BOND_LINK_BACK; 1463 new_slave->delay = bond->params.updelay; 1464 } else { 1465 dprintk("Initial state of slave_dev is " 1466 "BOND_LINK_UP\n"); 1467 new_slave->link = BOND_LINK_UP; 1468 } 1469 new_slave->jiffies = jiffies; 1470 } else { 1471 dprintk("Initial state of slave_dev is " 1472 "BOND_LINK_DOWN\n"); 1473 new_slave->link = BOND_LINK_DOWN; 1474 } 1475 1476 if (bond_update_speed_duplex(new_slave) && 1477 (new_slave->link != BOND_LINK_DOWN)) { 1478 printk(KERN_WARNING DRV_NAME 1479 ": %s: Warning: failed to get speed and duplex from %s, " 1480 "assumed to be 100Mb/sec and Full.\n", 1481 bond_dev->name, new_slave->dev->name); 1482 1483 if (bond->params.mode == BOND_MODE_8023AD) { 1484 printk(KERN_WARNING DRV_NAME 1485 ": %s: Warning: Operation of 802.3ad mode requires ETHTOOL " 1486 "support in base driver for proper aggregator " 1487 "selection.\n", bond_dev->name); 1488 } 1489 } 1490 1491 if (USES_PRIMARY(bond->params.mode) && bond->params.primary[0]) { 1492 /* if there is a primary slave, remember it */ 1493 if (strcmp(bond->params.primary, new_slave->dev->name) == 0) { 1494 bond->primary_slave = new_slave; 1495 } 1496 } 1497 1498 switch (bond->params.mode) { 1499 case BOND_MODE_ACTIVEBACKUP: 1500 /* if we're in active-backup mode, we need one and 1501 * only one active interface. The backup interfaces 1502 * will have their SLAVE_INACTIVE flag set because we 1503 * need them to be drop all packets. Thus, since we 1504 * guarantee that curr_active_slave always point to 1505 * the last usable interface, we just have to verify 1506 * this interface's flag. 1507 */ 1508 if (((!bond->curr_active_slave) || 1509 (bond->curr_active_slave->dev->priv_flags & IFF_SLAVE_INACTIVE)) && 1510 (new_slave->link != BOND_LINK_DOWN)) { 1511 /* first slave or no active slave yet, and this link 1512 is OK, so make this interface the active one */ 1513 bond_change_active_slave(bond, new_slave); 1514 printk(KERN_INFO DRV_NAME 1515 ": %s: first active interface up!\n", 1516 bond->dev->name); 1517 netif_carrier_on(bond->dev); 1518 1519 } else { 1520 dprintk("This is just a backup slave\n"); 1521 bond_set_slave_inactive_flags(new_slave); 1522 } 1523 break; 1524 case BOND_MODE_8023AD: 1525 /* in 802.3ad mode, the internal mechanism 1526 * will activate the slaves in the selected 1527 * aggregator 1528 */ 1529 bond_set_slave_inactive_flags(new_slave); 1530 /* if this is the first slave */ 1531 if (bond->slave_cnt == 1) { 1532 SLAVE_AD_INFO(new_slave).id = 1; 1533 /* Initialize AD with the number of times that the AD timer is called in 1 second 1534 * can be called only after the mac address of the bond is set 1535 */ 1536 bond_3ad_initialize(bond, 1000/AD_TIMER_INTERVAL, 1537 bond->params.lacp_fast); 1538 } else { 1539 SLAVE_AD_INFO(new_slave).id = 1540 SLAVE_AD_INFO(new_slave->prev).id + 1; 1541 } 1542 1543 bond_3ad_bind_slave(new_slave); 1544 break; 1545 case BOND_MODE_TLB: 1546 case BOND_MODE_ALB: 1547 new_slave->state = BOND_STATE_ACTIVE; 1548 if ((!bond->curr_active_slave) && 1549 (new_slave->link != BOND_LINK_DOWN)) { 1550 /* first slave or no active slave yet, and this link 1551 * is OK, so make this interface the active one 1552 */ 1553 bond_change_active_slave(bond, new_slave); 1554 } else { 1555 bond_set_slave_inactive_flags(new_slave); 1556 } 1557 break; 1558 default: 1559 dprintk("This slave is always active in trunk mode\n"); 1560 1561 /* always active in trunk mode */ 1562 new_slave->state = BOND_STATE_ACTIVE; 1563 1564 /* In trunking mode there is little meaning to curr_active_slave 1565 * anyway (it holds no special properties of the bond device), 1566 * so we can change it without calling change_active_interface() 1567 */ 1568 if (!bond->curr_active_slave) { 1569 bond->curr_active_slave = new_slave; 1570 } 1571 break; 1572 } /* switch(bond_mode) */ 1573 1574 bond_set_carrier(bond); 1575 1576 write_unlock_bh(&bond->lock); 1577 1578 res = bond_create_slave_symlinks(bond_dev, slave_dev); 1579 if (res) 1580 goto err_unset_master; 1581 1582 printk(KERN_INFO DRV_NAME 1583 ": %s: enslaving %s as a%s interface with a%s link.\n", 1584 bond_dev->name, slave_dev->name, 1585 new_slave->state == BOND_STATE_ACTIVE ? "n active" : " backup", 1586 new_slave->link != BOND_LINK_DOWN ? "n up" : " down"); 1587 1588 /* enslave is successful */ 1589 return 0; 1590 1591 /* Undo stages on error */ 1592 err_unset_master: 1593 netdev_set_master(slave_dev, NULL); 1594 1595 err_close: 1596 dev_close(slave_dev); 1597 1598 err_restore_mac: 1599 memcpy(addr.sa_data, new_slave->perm_hwaddr, ETH_ALEN); 1600 addr.sa_family = slave_dev->type; 1601 dev_set_mac_address(slave_dev, &addr); 1602 1603 err_free: 1604 kfree(new_slave); 1605 1606 err_undo_flags: 1607 bond_dev->features = old_features; 1608 1609 return res; 1610 } 1611 1612 /* 1613 * Try to release the slave device <slave> from the bond device <master> 1614 * It is legal to access curr_active_slave without a lock because all the function 1615 * is write-locked. 1616 * 1617 * The rules for slave state should be: 1618 * for Active/Backup: 1619 * Active stays on all backups go down 1620 * for Bonded connections: 1621 * The first up interface should be left on and all others downed. 1622 */ 1623 int bond_release(struct net_device *bond_dev, struct net_device *slave_dev) 1624 { 1625 struct bonding *bond = bond_dev->priv; 1626 struct slave *slave, *oldcurrent; 1627 struct sockaddr addr; 1628 int mac_addr_differ; 1629 1630 /* slave is not a slave or master is not master of this slave */ 1631 if (!(slave_dev->flags & IFF_SLAVE) || 1632 (slave_dev->master != bond_dev)) { 1633 printk(KERN_ERR DRV_NAME 1634 ": %s: Error: cannot release %s.\n", 1635 bond_dev->name, slave_dev->name); 1636 return -EINVAL; 1637 } 1638 1639 write_lock_bh(&bond->lock); 1640 1641 slave = bond_get_slave_by_dev(bond, slave_dev); 1642 if (!slave) { 1643 /* not a slave of this bond */ 1644 printk(KERN_INFO DRV_NAME 1645 ": %s: %s not enslaved\n", 1646 bond_dev->name, slave_dev->name); 1647 write_unlock_bh(&bond->lock); 1648 return -EINVAL; 1649 } 1650 1651 mac_addr_differ = memcmp(bond_dev->dev_addr, 1652 slave->perm_hwaddr, 1653 ETH_ALEN); 1654 if (!mac_addr_differ && (bond->slave_cnt > 1)) { 1655 printk(KERN_WARNING DRV_NAME 1656 ": %s: Warning: the permanent HWaddr of %s " 1657 "- %02X:%02X:%02X:%02X:%02X:%02X - is " 1658 "still in use by %s. Set the HWaddr of " 1659 "%s to a different address to avoid " 1660 "conflicts.\n", 1661 bond_dev->name, 1662 slave_dev->name, 1663 slave->perm_hwaddr[0], 1664 slave->perm_hwaddr[1], 1665 slave->perm_hwaddr[2], 1666 slave->perm_hwaddr[3], 1667 slave->perm_hwaddr[4], 1668 slave->perm_hwaddr[5], 1669 bond_dev->name, 1670 slave_dev->name); 1671 } 1672 1673 /* Inform AD package of unbinding of slave. */ 1674 if (bond->params.mode == BOND_MODE_8023AD) { 1675 /* must be called before the slave is 1676 * detached from the list 1677 */ 1678 bond_3ad_unbind_slave(slave); 1679 } 1680 1681 printk(KERN_INFO DRV_NAME 1682 ": %s: releasing %s interface %s\n", 1683 bond_dev->name, 1684 (slave->state == BOND_STATE_ACTIVE) 1685 ? "active" : "backup", 1686 slave_dev->name); 1687 1688 oldcurrent = bond->curr_active_slave; 1689 1690 bond->current_arp_slave = NULL; 1691 1692 /* release the slave from its bond */ 1693 bond_detach_slave(bond, slave); 1694 1695 bond_compute_features(bond); 1696 1697 if (bond->primary_slave == slave) { 1698 bond->primary_slave = NULL; 1699 } 1700 1701 if (oldcurrent == slave) { 1702 bond_change_active_slave(bond, NULL); 1703 } 1704 1705 if ((bond->params.mode == BOND_MODE_TLB) || 1706 (bond->params.mode == BOND_MODE_ALB)) { 1707 /* Must be called only after the slave has been 1708 * detached from the list and the curr_active_slave 1709 * has been cleared (if our_slave == old_current), 1710 * but before a new active slave is selected. 1711 */ 1712 bond_alb_deinit_slave(bond, slave); 1713 } 1714 1715 if (oldcurrent == slave) 1716 bond_select_active_slave(bond); 1717 1718 if (bond->slave_cnt == 0) { 1719 bond_set_carrier(bond); 1720 1721 /* if the last slave was removed, zero the mac address 1722 * of the master so it will be set by the application 1723 * to the mac address of the first slave 1724 */ 1725 memset(bond_dev->dev_addr, 0, bond_dev->addr_len); 1726 1727 if (list_empty(&bond->vlan_list)) { 1728 bond_dev->features |= NETIF_F_VLAN_CHALLENGED; 1729 } else { 1730 printk(KERN_WARNING DRV_NAME 1731 ": %s: Warning: clearing HW address of %s while it " 1732 "still has VLANs.\n", 1733 bond_dev->name, bond_dev->name); 1734 printk(KERN_WARNING DRV_NAME 1735 ": %s: When re-adding slaves, make sure the bond's " 1736 "HW address matches its VLANs'.\n", 1737 bond_dev->name); 1738 } 1739 } else if ((bond_dev->features & NETIF_F_VLAN_CHALLENGED) && 1740 !bond_has_challenged_slaves(bond)) { 1741 printk(KERN_INFO DRV_NAME 1742 ": %s: last VLAN challenged slave %s " 1743 "left bond %s. VLAN blocking is removed\n", 1744 bond_dev->name, slave_dev->name, bond_dev->name); 1745 bond_dev->features &= ~NETIF_F_VLAN_CHALLENGED; 1746 } 1747 1748 write_unlock_bh(&bond->lock); 1749 1750 /* must do this from outside any spinlocks */ 1751 bond_destroy_slave_symlinks(bond_dev, slave_dev); 1752 1753 bond_del_vlans_from_slave(bond, slave_dev); 1754 1755 /* If the mode USES_PRIMARY, then we should only remove its 1756 * promisc and mc settings if it was the curr_active_slave, but that was 1757 * already taken care of above when we detached the slave 1758 */ 1759 if (!USES_PRIMARY(bond->params.mode)) { 1760 /* unset promiscuity level from slave */ 1761 if (bond_dev->flags & IFF_PROMISC) { 1762 dev_set_promiscuity(slave_dev, -1); 1763 } 1764 1765 /* unset allmulti level from slave */ 1766 if (bond_dev->flags & IFF_ALLMULTI) { 1767 dev_set_allmulti(slave_dev, -1); 1768 } 1769 1770 /* flush master's mc_list from slave */ 1771 bond_mc_list_flush(bond_dev, slave_dev); 1772 } 1773 1774 netdev_set_master(slave_dev, NULL); 1775 1776 /* close slave before restoring its mac address */ 1777 dev_close(slave_dev); 1778 1779 /* restore original ("permanent") mac address */ 1780 memcpy(addr.sa_data, slave->perm_hwaddr, ETH_ALEN); 1781 addr.sa_family = slave_dev->type; 1782 dev_set_mac_address(slave_dev, &addr); 1783 1784 slave_dev->priv_flags &= ~(IFF_MASTER_8023AD | IFF_MASTER_ALB | 1785 IFF_SLAVE_INACTIVE); 1786 1787 kfree(slave); 1788 1789 return 0; /* deletion OK */ 1790 } 1791 1792 /* 1793 * This function releases all slaves. 1794 */ 1795 static int bond_release_all(struct net_device *bond_dev) 1796 { 1797 struct bonding *bond = bond_dev->priv; 1798 struct slave *slave; 1799 struct net_device *slave_dev; 1800 struct sockaddr addr; 1801 1802 write_lock_bh(&bond->lock); 1803 1804 netif_carrier_off(bond_dev); 1805 1806 if (bond->slave_cnt == 0) { 1807 goto out; 1808 } 1809 1810 bond->current_arp_slave = NULL; 1811 bond->primary_slave = NULL; 1812 bond_change_active_slave(bond, NULL); 1813 1814 while ((slave = bond->first_slave) != NULL) { 1815 /* Inform AD package of unbinding of slave 1816 * before slave is detached from the list. 1817 */ 1818 if (bond->params.mode == BOND_MODE_8023AD) { 1819 bond_3ad_unbind_slave(slave); 1820 } 1821 1822 slave_dev = slave->dev; 1823 bond_detach_slave(bond, slave); 1824 1825 if ((bond->params.mode == BOND_MODE_TLB) || 1826 (bond->params.mode == BOND_MODE_ALB)) { 1827 /* must be called only after the slave 1828 * has been detached from the list 1829 */ 1830 bond_alb_deinit_slave(bond, slave); 1831 } 1832 1833 bond_compute_features(bond); 1834 1835 /* now that the slave is detached, unlock and perform 1836 * all the undo steps that should not be called from 1837 * within a lock. 1838 */ 1839 write_unlock_bh(&bond->lock); 1840 1841 bond_destroy_slave_symlinks(bond_dev, slave_dev); 1842 bond_del_vlans_from_slave(bond, slave_dev); 1843 1844 /* If the mode USES_PRIMARY, then we should only remove its 1845 * promisc and mc settings if it was the curr_active_slave, but that was 1846 * already taken care of above when we detached the slave 1847 */ 1848 if (!USES_PRIMARY(bond->params.mode)) { 1849 /* unset promiscuity level from slave */ 1850 if (bond_dev->flags & IFF_PROMISC) { 1851 dev_set_promiscuity(slave_dev, -1); 1852 } 1853 1854 /* unset allmulti level from slave */ 1855 if (bond_dev->flags & IFF_ALLMULTI) { 1856 dev_set_allmulti(slave_dev, -1); 1857 } 1858 1859 /* flush master's mc_list from slave */ 1860 bond_mc_list_flush(bond_dev, slave_dev); 1861 } 1862 1863 netdev_set_master(slave_dev, NULL); 1864 1865 /* close slave before restoring its mac address */ 1866 dev_close(slave_dev); 1867 1868 /* restore original ("permanent") mac address*/ 1869 memcpy(addr.sa_data, slave->perm_hwaddr, ETH_ALEN); 1870 addr.sa_family = slave_dev->type; 1871 dev_set_mac_address(slave_dev, &addr); 1872 1873 slave_dev->priv_flags &= ~(IFF_MASTER_8023AD | IFF_MASTER_ALB | 1874 IFF_SLAVE_INACTIVE); 1875 1876 kfree(slave); 1877 1878 /* re-acquire the lock before getting the next slave */ 1879 write_lock_bh(&bond->lock); 1880 } 1881 1882 /* zero the mac address of the master so it will be 1883 * set by the application to the mac address of the 1884 * first slave 1885 */ 1886 memset(bond_dev->dev_addr, 0, bond_dev->addr_len); 1887 1888 if (list_empty(&bond->vlan_list)) { 1889 bond_dev->features |= NETIF_F_VLAN_CHALLENGED; 1890 } else { 1891 printk(KERN_WARNING DRV_NAME 1892 ": %s: Warning: clearing HW address of %s while it " 1893 "still has VLANs.\n", 1894 bond_dev->name, bond_dev->name); 1895 printk(KERN_WARNING DRV_NAME 1896 ": %s: When re-adding slaves, make sure the bond's " 1897 "HW address matches its VLANs'.\n", 1898 bond_dev->name); 1899 } 1900 1901 printk(KERN_INFO DRV_NAME 1902 ": %s: released all slaves\n", 1903 bond_dev->name); 1904 1905 out: 1906 write_unlock_bh(&bond->lock); 1907 1908 return 0; 1909 } 1910 1911 /* 1912 * This function changes the active slave to slave <slave_dev>. 1913 * It returns -EINVAL in the following cases. 1914 * - <slave_dev> is not found in the list. 1915 * - There is not active slave now. 1916 * - <slave_dev> is already active. 1917 * - The link state of <slave_dev> is not BOND_LINK_UP. 1918 * - <slave_dev> is not running. 1919 * In these cases, this fuction does nothing. 1920 * In the other cases, currnt_slave pointer is changed and 0 is returned. 1921 */ 1922 static int bond_ioctl_change_active(struct net_device *bond_dev, struct net_device *slave_dev) 1923 { 1924 struct bonding *bond = bond_dev->priv; 1925 struct slave *old_active = NULL; 1926 struct slave *new_active = NULL; 1927 int res = 0; 1928 1929 if (!USES_PRIMARY(bond->params.mode)) { 1930 return -EINVAL; 1931 } 1932 1933 /* Verify that master_dev is indeed the master of slave_dev */ 1934 if (!(slave_dev->flags & IFF_SLAVE) || 1935 (slave_dev->master != bond_dev)) { 1936 return -EINVAL; 1937 } 1938 1939 write_lock_bh(&bond->lock); 1940 1941 old_active = bond->curr_active_slave; 1942 new_active = bond_get_slave_by_dev(bond, slave_dev); 1943 1944 /* 1945 * Changing to the current active: do nothing; return success. 1946 */ 1947 if (new_active && (new_active == old_active)) { 1948 write_unlock_bh(&bond->lock); 1949 return 0; 1950 } 1951 1952 if ((new_active) && 1953 (old_active) && 1954 (new_active->link == BOND_LINK_UP) && 1955 IS_UP(new_active->dev)) { 1956 bond_change_active_slave(bond, new_active); 1957 } else { 1958 res = -EINVAL; 1959 } 1960 1961 write_unlock_bh(&bond->lock); 1962 1963 return res; 1964 } 1965 1966 static int bond_info_query(struct net_device *bond_dev, struct ifbond *info) 1967 { 1968 struct bonding *bond = bond_dev->priv; 1969 1970 info->bond_mode = bond->params.mode; 1971 info->miimon = bond->params.miimon; 1972 1973 read_lock_bh(&bond->lock); 1974 info->num_slaves = bond->slave_cnt; 1975 read_unlock_bh(&bond->lock); 1976 1977 return 0; 1978 } 1979 1980 static int bond_slave_info_query(struct net_device *bond_dev, struct ifslave *info) 1981 { 1982 struct bonding *bond = bond_dev->priv; 1983 struct slave *slave; 1984 int i, found = 0; 1985 1986 if (info->slave_id < 0) { 1987 return -ENODEV; 1988 } 1989 1990 read_lock_bh(&bond->lock); 1991 1992 bond_for_each_slave(bond, slave, i) { 1993 if (i == (int)info->slave_id) { 1994 found = 1; 1995 break; 1996 } 1997 } 1998 1999 read_unlock_bh(&bond->lock); 2000 2001 if (found) { 2002 strcpy(info->slave_name, slave->dev->name); 2003 info->link = slave->link; 2004 info->state = slave->state; 2005 info->link_failure_count = slave->link_failure_count; 2006 } else { 2007 return -ENODEV; 2008 } 2009 2010 return 0; 2011 } 2012 2013 /*-------------------------------- Monitoring -------------------------------*/ 2014 2015 /* this function is called regularly to monitor each slave's link. */ 2016 void bond_mii_monitor(struct net_device *bond_dev) 2017 { 2018 struct bonding *bond = bond_dev->priv; 2019 struct slave *slave, *oldcurrent; 2020 int do_failover = 0; 2021 int delta_in_ticks; 2022 int i; 2023 2024 read_lock(&bond->lock); 2025 2026 delta_in_ticks = (bond->params.miimon * HZ) / 1000; 2027 2028 if (bond->kill_timers) { 2029 goto out; 2030 } 2031 2032 if (bond->slave_cnt == 0) { 2033 goto re_arm; 2034 } 2035 2036 /* we will try to read the link status of each of our slaves, and 2037 * set their IFF_RUNNING flag appropriately. For each slave not 2038 * supporting MII status, we won't do anything so that a user-space 2039 * program could monitor the link itself if needed. 2040 */ 2041 2042 read_lock(&bond->curr_slave_lock); 2043 oldcurrent = bond->curr_active_slave; 2044 read_unlock(&bond->curr_slave_lock); 2045 2046 bond_for_each_slave(bond, slave, i) { 2047 struct net_device *slave_dev = slave->dev; 2048 int link_state; 2049 u16 old_speed = slave->speed; 2050 u8 old_duplex = slave->duplex; 2051 2052 link_state = bond_check_dev_link(bond, slave_dev, 0); 2053 2054 switch (slave->link) { 2055 case BOND_LINK_UP: /* the link was up */ 2056 if (link_state == BMSR_LSTATUS) { 2057 /* link stays up, nothing more to do */ 2058 break; 2059 } else { /* link going down */ 2060 slave->link = BOND_LINK_FAIL; 2061 slave->delay = bond->params.downdelay; 2062 2063 if (slave->link_failure_count < UINT_MAX) { 2064 slave->link_failure_count++; 2065 } 2066 2067 if (bond->params.downdelay) { 2068 printk(KERN_INFO DRV_NAME 2069 ": %s: link status down for %s " 2070 "interface %s, disabling it in " 2071 "%d ms.\n", 2072 bond_dev->name, 2073 IS_UP(slave_dev) 2074 ? ((bond->params.mode == BOND_MODE_ACTIVEBACKUP) 2075 ? ((slave == oldcurrent) 2076 ? "active " : "backup ") 2077 : "") 2078 : "idle ", 2079 slave_dev->name, 2080 bond->params.downdelay * bond->params.miimon); 2081 } 2082 } 2083 /* no break ! fall through the BOND_LINK_FAIL test to 2084 ensure proper action to be taken 2085 */ 2086 case BOND_LINK_FAIL: /* the link has just gone down */ 2087 if (link_state != BMSR_LSTATUS) { 2088 /* link stays down */ 2089 if (slave->delay <= 0) { 2090 /* link down for too long time */ 2091 slave->link = BOND_LINK_DOWN; 2092 2093 /* in active/backup mode, we must 2094 * completely disable this interface 2095 */ 2096 if ((bond->params.mode == BOND_MODE_ACTIVEBACKUP) || 2097 (bond->params.mode == BOND_MODE_8023AD)) { 2098 bond_set_slave_inactive_flags(slave); 2099 } 2100 2101 printk(KERN_INFO DRV_NAME 2102 ": %s: link status definitely " 2103 "down for interface %s, " 2104 "disabling it\n", 2105 bond_dev->name, 2106 slave_dev->name); 2107 2108 /* notify ad that the link status has changed */ 2109 if (bond->params.mode == BOND_MODE_8023AD) { 2110 bond_3ad_handle_link_change(slave, BOND_LINK_DOWN); 2111 } 2112 2113 if ((bond->params.mode == BOND_MODE_TLB) || 2114 (bond->params.mode == BOND_MODE_ALB)) { 2115 bond_alb_handle_link_change(bond, slave, BOND_LINK_DOWN); 2116 } 2117 2118 if (slave == oldcurrent) { 2119 do_failover = 1; 2120 } 2121 } else { 2122 slave->delay--; 2123 } 2124 } else { 2125 /* link up again */ 2126 slave->link = BOND_LINK_UP; 2127 slave->jiffies = jiffies; 2128 printk(KERN_INFO DRV_NAME 2129 ": %s: link status up again after %d " 2130 "ms for interface %s.\n", 2131 bond_dev->name, 2132 (bond->params.downdelay - slave->delay) * bond->params.miimon, 2133 slave_dev->name); 2134 } 2135 break; 2136 case BOND_LINK_DOWN: /* the link was down */ 2137 if (link_state != BMSR_LSTATUS) { 2138 /* the link stays down, nothing more to do */ 2139 break; 2140 } else { /* link going up */ 2141 slave->link = BOND_LINK_BACK; 2142 slave->delay = bond->params.updelay; 2143 2144 if (bond->params.updelay) { 2145 /* if updelay == 0, no need to 2146 advertise about a 0 ms delay */ 2147 printk(KERN_INFO DRV_NAME 2148 ": %s: link status up for " 2149 "interface %s, enabling it " 2150 "in %d ms.\n", 2151 bond_dev->name, 2152 slave_dev->name, 2153 bond->params.updelay * bond->params.miimon); 2154 } 2155 } 2156 /* no break ! fall through the BOND_LINK_BACK state in 2157 case there's something to do. 2158 */ 2159 case BOND_LINK_BACK: /* the link has just come back */ 2160 if (link_state != BMSR_LSTATUS) { 2161 /* link down again */ 2162 slave->link = BOND_LINK_DOWN; 2163 2164 printk(KERN_INFO DRV_NAME 2165 ": %s: link status down again after %d " 2166 "ms for interface %s.\n", 2167 bond_dev->name, 2168 (bond->params.updelay - slave->delay) * bond->params.miimon, 2169 slave_dev->name); 2170 } else { 2171 /* link stays up */ 2172 if (slave->delay == 0) { 2173 /* now the link has been up for long time enough */ 2174 slave->link = BOND_LINK_UP; 2175 slave->jiffies = jiffies; 2176 2177 if (bond->params.mode == BOND_MODE_8023AD) { 2178 /* prevent it from being the active one */ 2179 slave->state = BOND_STATE_BACKUP; 2180 } else if (bond->params.mode != BOND_MODE_ACTIVEBACKUP) { 2181 /* make it immediately active */ 2182 slave->state = BOND_STATE_ACTIVE; 2183 } else if (slave != bond->primary_slave) { 2184 /* prevent it from being the active one */ 2185 slave->state = BOND_STATE_BACKUP; 2186 } 2187 2188 printk(KERN_INFO DRV_NAME 2189 ": %s: link status definitely " 2190 "up for interface %s.\n", 2191 bond_dev->name, 2192 slave_dev->name); 2193 2194 /* notify ad that the link status has changed */ 2195 if (bond->params.mode == BOND_MODE_8023AD) { 2196 bond_3ad_handle_link_change(slave, BOND_LINK_UP); 2197 } 2198 2199 if ((bond->params.mode == BOND_MODE_TLB) || 2200 (bond->params.mode == BOND_MODE_ALB)) { 2201 bond_alb_handle_link_change(bond, slave, BOND_LINK_UP); 2202 } 2203 2204 if ((!oldcurrent) || 2205 (slave == bond->primary_slave)) { 2206 do_failover = 1; 2207 } 2208 } else { 2209 slave->delay--; 2210 } 2211 } 2212 break; 2213 default: 2214 /* Should not happen */ 2215 printk(KERN_ERR DRV_NAME 2216 ": %s: Error: %s Illegal value (link=%d)\n", 2217 bond_dev->name, 2218 slave->dev->name, 2219 slave->link); 2220 goto out; 2221 } /* end of switch (slave->link) */ 2222 2223 bond_update_speed_duplex(slave); 2224 2225 if (bond->params.mode == BOND_MODE_8023AD) { 2226 if (old_speed != slave->speed) { 2227 bond_3ad_adapter_speed_changed(slave); 2228 } 2229 2230 if (old_duplex != slave->duplex) { 2231 bond_3ad_adapter_duplex_changed(slave); 2232 } 2233 } 2234 2235 } /* end of for */ 2236 2237 if (do_failover) { 2238 write_lock(&bond->curr_slave_lock); 2239 2240 bond_select_active_slave(bond); 2241 2242 write_unlock(&bond->curr_slave_lock); 2243 } else 2244 bond_set_carrier(bond); 2245 2246 re_arm: 2247 if (bond->params.miimon) { 2248 mod_timer(&bond->mii_timer, jiffies + delta_in_ticks); 2249 } 2250 out: 2251 read_unlock(&bond->lock); 2252 } 2253 2254 2255 static u32 bond_glean_dev_ip(struct net_device *dev) 2256 { 2257 struct in_device *idev; 2258 struct in_ifaddr *ifa; 2259 u32 addr = 0; 2260 2261 if (!dev) 2262 return 0; 2263 2264 rcu_read_lock(); 2265 idev = __in_dev_get_rcu(dev); 2266 if (!idev) 2267 goto out; 2268 2269 ifa = idev->ifa_list; 2270 if (!ifa) 2271 goto out; 2272 2273 addr = ifa->ifa_local; 2274 out: 2275 rcu_read_unlock(); 2276 return addr; 2277 } 2278 2279 static int bond_has_ip(struct bonding *bond) 2280 { 2281 struct vlan_entry *vlan, *vlan_next; 2282 2283 if (bond->master_ip) 2284 return 1; 2285 2286 if (list_empty(&bond->vlan_list)) 2287 return 0; 2288 2289 list_for_each_entry_safe(vlan, vlan_next, &bond->vlan_list, 2290 vlan_list) { 2291 if (vlan->vlan_ip) 2292 return 1; 2293 } 2294 2295 return 0; 2296 } 2297 2298 /* 2299 * We go to the (large) trouble of VLAN tagging ARP frames because 2300 * switches in VLAN mode (especially if ports are configured as 2301 * "native" to a VLAN) might not pass non-tagged frames. 2302 */ 2303 static void bond_arp_send(struct net_device *slave_dev, int arp_op, u32 dest_ip, u32 src_ip, unsigned short vlan_id) 2304 { 2305 struct sk_buff *skb; 2306 2307 dprintk("arp %d on slave %s: dst %x src %x vid %d\n", arp_op, 2308 slave_dev->name, dest_ip, src_ip, vlan_id); 2309 2310 skb = arp_create(arp_op, ETH_P_ARP, dest_ip, slave_dev, src_ip, 2311 NULL, slave_dev->dev_addr, NULL); 2312 2313 if (!skb) { 2314 printk(KERN_ERR DRV_NAME ": ARP packet allocation failed\n"); 2315 return; 2316 } 2317 if (vlan_id) { 2318 skb = vlan_put_tag(skb, vlan_id); 2319 if (!skb) { 2320 printk(KERN_ERR DRV_NAME ": failed to insert VLAN tag\n"); 2321 return; 2322 } 2323 } 2324 arp_xmit(skb); 2325 } 2326 2327 2328 static void bond_arp_send_all(struct bonding *bond, struct slave *slave) 2329 { 2330 int i, vlan_id, rv; 2331 u32 *targets = bond->params.arp_targets; 2332 struct vlan_entry *vlan, *vlan_next; 2333 struct net_device *vlan_dev; 2334 struct flowi fl; 2335 struct rtable *rt; 2336 2337 for (i = 0; (i < BOND_MAX_ARP_TARGETS); i++) { 2338 if (!targets[i]) 2339 continue; 2340 dprintk("basa: target %x\n", targets[i]); 2341 if (list_empty(&bond->vlan_list)) { 2342 dprintk("basa: empty vlan: arp_send\n"); 2343 bond_arp_send(slave->dev, ARPOP_REQUEST, targets[i], 2344 bond->master_ip, 0); 2345 continue; 2346 } 2347 2348 /* 2349 * If VLANs are configured, we do a route lookup to 2350 * determine which VLAN interface would be used, so we 2351 * can tag the ARP with the proper VLAN tag. 2352 */ 2353 memset(&fl, 0, sizeof(fl)); 2354 fl.fl4_dst = targets[i]; 2355 fl.fl4_tos = RTO_ONLINK; 2356 2357 rv = ip_route_output_key(&rt, &fl); 2358 if (rv) { 2359 if (net_ratelimit()) { 2360 printk(KERN_WARNING DRV_NAME 2361 ": %s: no route to arp_ip_target %u.%u.%u.%u\n", 2362 bond->dev->name, NIPQUAD(fl.fl4_dst)); 2363 } 2364 continue; 2365 } 2366 2367 /* 2368 * This target is not on a VLAN 2369 */ 2370 if (rt->u.dst.dev == bond->dev) { 2371 ip_rt_put(rt); 2372 dprintk("basa: rtdev == bond->dev: arp_send\n"); 2373 bond_arp_send(slave->dev, ARPOP_REQUEST, targets[i], 2374 bond->master_ip, 0); 2375 continue; 2376 } 2377 2378 vlan_id = 0; 2379 list_for_each_entry_safe(vlan, vlan_next, &bond->vlan_list, 2380 vlan_list) { 2381 vlan_dev = bond->vlgrp->vlan_devices[vlan->vlan_id]; 2382 if (vlan_dev == rt->u.dst.dev) { 2383 vlan_id = vlan->vlan_id; 2384 dprintk("basa: vlan match on %s %d\n", 2385 vlan_dev->name, vlan_id); 2386 break; 2387 } 2388 } 2389 2390 if (vlan_id) { 2391 ip_rt_put(rt); 2392 bond_arp_send(slave->dev, ARPOP_REQUEST, targets[i], 2393 vlan->vlan_ip, vlan_id); 2394 continue; 2395 } 2396 2397 if (net_ratelimit()) { 2398 printk(KERN_WARNING DRV_NAME 2399 ": %s: no path to arp_ip_target %u.%u.%u.%u via rt.dev %s\n", 2400 bond->dev->name, NIPQUAD(fl.fl4_dst), 2401 rt->u.dst.dev ? rt->u.dst.dev->name : "NULL"); 2402 } 2403 ip_rt_put(rt); 2404 } 2405 } 2406 2407 /* 2408 * Kick out a gratuitous ARP for an IP on the bonding master plus one 2409 * for each VLAN above us. 2410 */ 2411 static void bond_send_gratuitous_arp(struct bonding *bond) 2412 { 2413 struct slave *slave = bond->curr_active_slave; 2414 struct vlan_entry *vlan; 2415 struct net_device *vlan_dev; 2416 2417 dprintk("bond_send_grat_arp: bond %s slave %s\n", bond->dev->name, 2418 slave ? slave->dev->name : "NULL"); 2419 if (!slave) 2420 return; 2421 2422 if (bond->master_ip) { 2423 bond_arp_send(slave->dev, ARPOP_REPLY, bond->master_ip, 2424 bond->master_ip, 0); 2425 } 2426 2427 list_for_each_entry(vlan, &bond->vlan_list, vlan_list) { 2428 vlan_dev = bond->vlgrp->vlan_devices[vlan->vlan_id]; 2429 if (vlan->vlan_ip) { 2430 bond_arp_send(slave->dev, ARPOP_REPLY, vlan->vlan_ip, 2431 vlan->vlan_ip, vlan->vlan_id); 2432 } 2433 } 2434 } 2435 2436 /* 2437 * this function is called regularly to monitor each slave's link 2438 * ensuring that traffic is being sent and received when arp monitoring 2439 * is used in load-balancing mode. if the adapter has been dormant, then an 2440 * arp is transmitted to generate traffic. see activebackup_arp_monitor for 2441 * arp monitoring in active backup mode. 2442 */ 2443 void bond_loadbalance_arp_mon(struct net_device *bond_dev) 2444 { 2445 struct bonding *bond = bond_dev->priv; 2446 struct slave *slave, *oldcurrent; 2447 int do_failover = 0; 2448 int delta_in_ticks; 2449 int i; 2450 2451 read_lock(&bond->lock); 2452 2453 delta_in_ticks = (bond->params.arp_interval * HZ) / 1000; 2454 2455 if (bond->kill_timers) { 2456 goto out; 2457 } 2458 2459 if (bond->slave_cnt == 0) { 2460 goto re_arm; 2461 } 2462 2463 read_lock(&bond->curr_slave_lock); 2464 oldcurrent = bond->curr_active_slave; 2465 read_unlock(&bond->curr_slave_lock); 2466 2467 /* see if any of the previous devices are up now (i.e. they have 2468 * xmt and rcv traffic). the curr_active_slave does not come into 2469 * the picture unless it is null. also, slave->jiffies is not needed 2470 * here because we send an arp on each slave and give a slave as 2471 * long as it needs to get the tx/rx within the delta. 2472 * TODO: what about up/down delay in arp mode? it wasn't here before 2473 * so it can wait 2474 */ 2475 bond_for_each_slave(bond, slave, i) { 2476 if (slave->link != BOND_LINK_UP) { 2477 if (((jiffies - slave->dev->trans_start) <= delta_in_ticks) && 2478 ((jiffies - slave->dev->last_rx) <= delta_in_ticks)) { 2479 2480 slave->link = BOND_LINK_UP; 2481 slave->state = BOND_STATE_ACTIVE; 2482 2483 /* primary_slave has no meaning in round-robin 2484 * mode. the window of a slave being up and 2485 * curr_active_slave being null after enslaving 2486 * is closed. 2487 */ 2488 if (!oldcurrent) { 2489 printk(KERN_INFO DRV_NAME 2490 ": %s: link status definitely " 2491 "up for interface %s, ", 2492 bond_dev->name, 2493 slave->dev->name); 2494 do_failover = 1; 2495 } else { 2496 printk(KERN_INFO DRV_NAME 2497 ": %s: interface %s is now up\n", 2498 bond_dev->name, 2499 slave->dev->name); 2500 } 2501 } 2502 } else { 2503 /* slave->link == BOND_LINK_UP */ 2504 2505 /* not all switches will respond to an arp request 2506 * when the source ip is 0, so don't take the link down 2507 * if we don't know our ip yet 2508 */ 2509 if (((jiffies - slave->dev->trans_start) >= (2*delta_in_ticks)) || 2510 (((jiffies - slave->dev->last_rx) >= (2*delta_in_ticks)) && 2511 bond_has_ip(bond))) { 2512 2513 slave->link = BOND_LINK_DOWN; 2514 slave->state = BOND_STATE_BACKUP; 2515 2516 if (slave->link_failure_count < UINT_MAX) { 2517 slave->link_failure_count++; 2518 } 2519 2520 printk(KERN_INFO DRV_NAME 2521 ": %s: interface %s is now down.\n", 2522 bond_dev->name, 2523 slave->dev->name); 2524 2525 if (slave == oldcurrent) { 2526 do_failover = 1; 2527 } 2528 } 2529 } 2530 2531 /* note: if switch is in round-robin mode, all links 2532 * must tx arp to ensure all links rx an arp - otherwise 2533 * links may oscillate or not come up at all; if switch is 2534 * in something like xor mode, there is nothing we can 2535 * do - all replies will be rx'ed on same link causing slaves 2536 * to be unstable during low/no traffic periods 2537 */ 2538 if (IS_UP(slave->dev)) { 2539 bond_arp_send_all(bond, slave); 2540 } 2541 } 2542 2543 if (do_failover) { 2544 write_lock(&bond->curr_slave_lock); 2545 2546 bond_select_active_slave(bond); 2547 2548 write_unlock(&bond->curr_slave_lock); 2549 } 2550 2551 re_arm: 2552 if (bond->params.arp_interval) { 2553 mod_timer(&bond->arp_timer, jiffies + delta_in_ticks); 2554 } 2555 out: 2556 read_unlock(&bond->lock); 2557 } 2558 2559 /* 2560 * When using arp monitoring in active-backup mode, this function is 2561 * called to determine if any backup slaves have went down or a new 2562 * current slave needs to be found. 2563 * The backup slaves never generate traffic, they are considered up by merely 2564 * receiving traffic. If the current slave goes down, each backup slave will 2565 * be given the opportunity to tx/rx an arp before being taken down - this 2566 * prevents all slaves from being taken down due to the current slave not 2567 * sending any traffic for the backups to receive. The arps are not necessarily 2568 * necessary, any tx and rx traffic will keep the current slave up. While any 2569 * rx traffic will keep the backup slaves up, the current slave is responsible 2570 * for generating traffic to keep them up regardless of any other traffic they 2571 * may have received. 2572 * see loadbalance_arp_monitor for arp monitoring in load balancing mode 2573 */ 2574 void bond_activebackup_arp_mon(struct net_device *bond_dev) 2575 { 2576 struct bonding *bond = bond_dev->priv; 2577 struct slave *slave; 2578 int delta_in_ticks; 2579 int i; 2580 2581 read_lock(&bond->lock); 2582 2583 delta_in_ticks = (bond->params.arp_interval * HZ) / 1000; 2584 2585 if (bond->kill_timers) { 2586 goto out; 2587 } 2588 2589 if (bond->slave_cnt == 0) { 2590 goto re_arm; 2591 } 2592 2593 /* determine if any slave has come up or any backup slave has 2594 * gone down 2595 * TODO: what about up/down delay in arp mode? it wasn't here before 2596 * so it can wait 2597 */ 2598 bond_for_each_slave(bond, slave, i) { 2599 if (slave->link != BOND_LINK_UP) { 2600 if ((jiffies - slave->dev->last_rx) <= delta_in_ticks) { 2601 2602 slave->link = BOND_LINK_UP; 2603 2604 write_lock(&bond->curr_slave_lock); 2605 2606 if ((!bond->curr_active_slave) && 2607 ((jiffies - slave->dev->trans_start) <= delta_in_ticks)) { 2608 bond_change_active_slave(bond, slave); 2609 bond->current_arp_slave = NULL; 2610 } else if (bond->curr_active_slave != slave) { 2611 /* this slave has just come up but we 2612 * already have a current slave; this 2613 * can also happen if bond_enslave adds 2614 * a new slave that is up while we are 2615 * searching for a new slave 2616 */ 2617 bond_set_slave_inactive_flags(slave); 2618 bond->current_arp_slave = NULL; 2619 } 2620 2621 bond_set_carrier(bond); 2622 2623 if (slave == bond->curr_active_slave) { 2624 printk(KERN_INFO DRV_NAME 2625 ": %s: %s is up and now the " 2626 "active interface\n", 2627 bond_dev->name, 2628 slave->dev->name); 2629 netif_carrier_on(bond->dev); 2630 } else { 2631 printk(KERN_INFO DRV_NAME 2632 ": %s: backup interface %s is " 2633 "now up\n", 2634 bond_dev->name, 2635 slave->dev->name); 2636 } 2637 2638 write_unlock(&bond->curr_slave_lock); 2639 } 2640 } else { 2641 read_lock(&bond->curr_slave_lock); 2642 2643 if ((slave != bond->curr_active_slave) && 2644 (!bond->current_arp_slave) && 2645 (((jiffies - slave->dev->last_rx) >= 3*delta_in_ticks) && 2646 bond_has_ip(bond))) { 2647 /* a backup slave has gone down; three times 2648 * the delta allows the current slave to be 2649 * taken out before the backup slave. 2650 * note: a non-null current_arp_slave indicates 2651 * the curr_active_slave went down and we are 2652 * searching for a new one; under this 2653 * condition we only take the curr_active_slave 2654 * down - this gives each slave a chance to 2655 * tx/rx traffic before being taken out 2656 */ 2657 2658 read_unlock(&bond->curr_slave_lock); 2659 2660 slave->link = BOND_LINK_DOWN; 2661 2662 if (slave->link_failure_count < UINT_MAX) { 2663 slave->link_failure_count++; 2664 } 2665 2666 bond_set_slave_inactive_flags(slave); 2667 2668 printk(KERN_INFO DRV_NAME 2669 ": %s: backup interface %s is now down\n", 2670 bond_dev->name, 2671 slave->dev->name); 2672 } else { 2673 read_unlock(&bond->curr_slave_lock); 2674 } 2675 } 2676 } 2677 2678 read_lock(&bond->curr_slave_lock); 2679 slave = bond->curr_active_slave; 2680 read_unlock(&bond->curr_slave_lock); 2681 2682 if (slave) { 2683 /* if we have sent traffic in the past 2*arp_intervals but 2684 * haven't xmit and rx traffic in that time interval, select 2685 * a different slave. slave->jiffies is only updated when 2686 * a slave first becomes the curr_active_slave - not necessarily 2687 * after every arp; this ensures the slave has a full 2*delta 2688 * before being taken out. if a primary is being used, check 2689 * if it is up and needs to take over as the curr_active_slave 2690 */ 2691 if ((((jiffies - slave->dev->trans_start) >= (2*delta_in_ticks)) || 2692 (((jiffies - slave->dev->last_rx) >= (2*delta_in_ticks)) && 2693 bond_has_ip(bond))) && 2694 ((jiffies - slave->jiffies) >= 2*delta_in_ticks)) { 2695 2696 slave->link = BOND_LINK_DOWN; 2697 2698 if (slave->link_failure_count < UINT_MAX) { 2699 slave->link_failure_count++; 2700 } 2701 2702 printk(KERN_INFO DRV_NAME 2703 ": %s: link status down for active interface " 2704 "%s, disabling it\n", 2705 bond_dev->name, 2706 slave->dev->name); 2707 2708 write_lock(&bond->curr_slave_lock); 2709 2710 bond_select_active_slave(bond); 2711 slave = bond->curr_active_slave; 2712 2713 write_unlock(&bond->curr_slave_lock); 2714 2715 bond->current_arp_slave = slave; 2716 2717 if (slave) { 2718 slave->jiffies = jiffies; 2719 } 2720 } else if ((bond->primary_slave) && 2721 (bond->primary_slave != slave) && 2722 (bond->primary_slave->link == BOND_LINK_UP)) { 2723 /* at this point, slave is the curr_active_slave */ 2724 printk(KERN_INFO DRV_NAME 2725 ": %s: changing from interface %s to primary " 2726 "interface %s\n", 2727 bond_dev->name, 2728 slave->dev->name, 2729 bond->primary_slave->dev->name); 2730 2731 /* primary is up so switch to it */ 2732 write_lock(&bond->curr_slave_lock); 2733 bond_change_active_slave(bond, bond->primary_slave); 2734 write_unlock(&bond->curr_slave_lock); 2735 2736 slave = bond->primary_slave; 2737 slave->jiffies = jiffies; 2738 } else { 2739 bond->current_arp_slave = NULL; 2740 } 2741 2742 /* the current slave must tx an arp to ensure backup slaves 2743 * rx traffic 2744 */ 2745 if (slave && bond_has_ip(bond)) { 2746 bond_arp_send_all(bond, slave); 2747 } 2748 } 2749 2750 /* if we don't have a curr_active_slave, search for the next available 2751 * backup slave from the current_arp_slave and make it the candidate 2752 * for becoming the curr_active_slave 2753 */ 2754 if (!slave) { 2755 if (!bond->current_arp_slave) { 2756 bond->current_arp_slave = bond->first_slave; 2757 } 2758 2759 if (bond->current_arp_slave) { 2760 bond_set_slave_inactive_flags(bond->current_arp_slave); 2761 2762 /* search for next candidate */ 2763 bond_for_each_slave_from(bond, slave, i, bond->current_arp_slave->next) { 2764 if (IS_UP(slave->dev)) { 2765 slave->link = BOND_LINK_BACK; 2766 bond_set_slave_active_flags(slave); 2767 bond_arp_send_all(bond, slave); 2768 slave->jiffies = jiffies; 2769 bond->current_arp_slave = slave; 2770 break; 2771 } 2772 2773 /* if the link state is up at this point, we 2774 * mark it down - this can happen if we have 2775 * simultaneous link failures and 2776 * reselect_active_interface doesn't make this 2777 * one the current slave so it is still marked 2778 * up when it is actually down 2779 */ 2780 if (slave->link == BOND_LINK_UP) { 2781 slave->link = BOND_LINK_DOWN; 2782 if (slave->link_failure_count < UINT_MAX) { 2783 slave->link_failure_count++; 2784 } 2785 2786 bond_set_slave_inactive_flags(slave); 2787 2788 printk(KERN_INFO DRV_NAME 2789 ": %s: backup interface %s is " 2790 "now down.\n", 2791 bond_dev->name, 2792 slave->dev->name); 2793 } 2794 } 2795 } 2796 } 2797 2798 re_arm: 2799 if (bond->params.arp_interval) { 2800 mod_timer(&bond->arp_timer, jiffies + delta_in_ticks); 2801 } 2802 out: 2803 read_unlock(&bond->lock); 2804 } 2805 2806 /*------------------------------ proc/seq_file-------------------------------*/ 2807 2808 #ifdef CONFIG_PROC_FS 2809 2810 #define SEQ_START_TOKEN ((void *)1) 2811 2812 static void *bond_info_seq_start(struct seq_file *seq, loff_t *pos) 2813 { 2814 struct bonding *bond = seq->private; 2815 loff_t off = 0; 2816 struct slave *slave; 2817 int i; 2818 2819 /* make sure the bond won't be taken away */ 2820 read_lock(&dev_base_lock); 2821 read_lock_bh(&bond->lock); 2822 2823 if (*pos == 0) { 2824 return SEQ_START_TOKEN; 2825 } 2826 2827 bond_for_each_slave(bond, slave, i) { 2828 if (++off == *pos) { 2829 return slave; 2830 } 2831 } 2832 2833 return NULL; 2834 } 2835 2836 static void *bond_info_seq_next(struct seq_file *seq, void *v, loff_t *pos) 2837 { 2838 struct bonding *bond = seq->private; 2839 struct slave *slave = v; 2840 2841 ++*pos; 2842 if (v == SEQ_START_TOKEN) { 2843 return bond->first_slave; 2844 } 2845 2846 slave = slave->next; 2847 2848 return (slave == bond->first_slave) ? NULL : slave; 2849 } 2850 2851 static void bond_info_seq_stop(struct seq_file *seq, void *v) 2852 { 2853 struct bonding *bond = seq->private; 2854 2855 read_unlock_bh(&bond->lock); 2856 read_unlock(&dev_base_lock); 2857 } 2858 2859 static void bond_info_show_master(struct seq_file *seq) 2860 { 2861 struct bonding *bond = seq->private; 2862 struct slave *curr; 2863 int i; 2864 u32 target; 2865 2866 read_lock(&bond->curr_slave_lock); 2867 curr = bond->curr_active_slave; 2868 read_unlock(&bond->curr_slave_lock); 2869 2870 seq_printf(seq, "Bonding Mode: %s\n", 2871 bond_mode_name(bond->params.mode)); 2872 2873 if (bond->params.mode == BOND_MODE_XOR || 2874 bond->params.mode == BOND_MODE_8023AD) { 2875 seq_printf(seq, "Transmit Hash Policy: %s (%d)\n", 2876 xmit_hashtype_tbl[bond->params.xmit_policy].modename, 2877 bond->params.xmit_policy); 2878 } 2879 2880 if (USES_PRIMARY(bond->params.mode)) { 2881 seq_printf(seq, "Primary Slave: %s\n", 2882 (bond->primary_slave) ? 2883 bond->primary_slave->dev->name : "None"); 2884 2885 seq_printf(seq, "Currently Active Slave: %s\n", 2886 (curr) ? curr->dev->name : "None"); 2887 } 2888 2889 seq_printf(seq, "MII Status: %s\n", netif_carrier_ok(bond->dev) ? 2890 "up" : "down"); 2891 seq_printf(seq, "MII Polling Interval (ms): %d\n", bond->params.miimon); 2892 seq_printf(seq, "Up Delay (ms): %d\n", 2893 bond->params.updelay * bond->params.miimon); 2894 seq_printf(seq, "Down Delay (ms): %d\n", 2895 bond->params.downdelay * bond->params.miimon); 2896 2897 2898 /* ARP information */ 2899 if(bond->params.arp_interval > 0) { 2900 int printed=0; 2901 seq_printf(seq, "ARP Polling Interval (ms): %d\n", 2902 bond->params.arp_interval); 2903 2904 seq_printf(seq, "ARP IP target/s (n.n.n.n form):"); 2905 2906 for(i = 0; (i < BOND_MAX_ARP_TARGETS) ;i++) { 2907 if (!bond->params.arp_targets[i]) 2908 continue; 2909 if (printed) 2910 seq_printf(seq, ","); 2911 target = ntohl(bond->params.arp_targets[i]); 2912 seq_printf(seq, " %d.%d.%d.%d", HIPQUAD(target)); 2913 printed = 1; 2914 } 2915 seq_printf(seq, "\n"); 2916 } 2917 2918 if (bond->params.mode == BOND_MODE_8023AD) { 2919 struct ad_info ad_info; 2920 2921 seq_puts(seq, "\n802.3ad info\n"); 2922 seq_printf(seq, "LACP rate: %s\n", 2923 (bond->params.lacp_fast) ? "fast" : "slow"); 2924 2925 if (bond_3ad_get_active_agg_info(bond, &ad_info)) { 2926 seq_printf(seq, "bond %s has no active aggregator\n", 2927 bond->dev->name); 2928 } else { 2929 seq_printf(seq, "Active Aggregator Info:\n"); 2930 2931 seq_printf(seq, "\tAggregator ID: %d\n", 2932 ad_info.aggregator_id); 2933 seq_printf(seq, "\tNumber of ports: %d\n", 2934 ad_info.ports); 2935 seq_printf(seq, "\tActor Key: %d\n", 2936 ad_info.actor_key); 2937 seq_printf(seq, "\tPartner Key: %d\n", 2938 ad_info.partner_key); 2939 seq_printf(seq, "\tPartner Mac Address: %02x:%02x:%02x:%02x:%02x:%02x\n", 2940 ad_info.partner_system[0], 2941 ad_info.partner_system[1], 2942 ad_info.partner_system[2], 2943 ad_info.partner_system[3], 2944 ad_info.partner_system[4], 2945 ad_info.partner_system[5]); 2946 } 2947 } 2948 } 2949 2950 static void bond_info_show_slave(struct seq_file *seq, const struct slave *slave) 2951 { 2952 struct bonding *bond = seq->private; 2953 2954 seq_printf(seq, "\nSlave Interface: %s\n", slave->dev->name); 2955 seq_printf(seq, "MII Status: %s\n", 2956 (slave->link == BOND_LINK_UP) ? "up" : "down"); 2957 seq_printf(seq, "Link Failure Count: %d\n", 2958 slave->link_failure_count); 2959 2960 seq_printf(seq, 2961 "Permanent HW addr: %02x:%02x:%02x:%02x:%02x:%02x\n", 2962 slave->perm_hwaddr[0], slave->perm_hwaddr[1], 2963 slave->perm_hwaddr[2], slave->perm_hwaddr[3], 2964 slave->perm_hwaddr[4], slave->perm_hwaddr[5]); 2965 2966 if (bond->params.mode == BOND_MODE_8023AD) { 2967 const struct aggregator *agg 2968 = SLAVE_AD_INFO(slave).port.aggregator; 2969 2970 if (agg) { 2971 seq_printf(seq, "Aggregator ID: %d\n", 2972 agg->aggregator_identifier); 2973 } else { 2974 seq_puts(seq, "Aggregator ID: N/A\n"); 2975 } 2976 } 2977 } 2978 2979 static int bond_info_seq_show(struct seq_file *seq, void *v) 2980 { 2981 if (v == SEQ_START_TOKEN) { 2982 seq_printf(seq, "%s\n", version); 2983 bond_info_show_master(seq); 2984 } else { 2985 bond_info_show_slave(seq, v); 2986 } 2987 2988 return 0; 2989 } 2990 2991 static struct seq_operations bond_info_seq_ops = { 2992 .start = bond_info_seq_start, 2993 .next = bond_info_seq_next, 2994 .stop = bond_info_seq_stop, 2995 .show = bond_info_seq_show, 2996 }; 2997 2998 static int bond_info_open(struct inode *inode, struct file *file) 2999 { 3000 struct seq_file *seq; 3001 struct proc_dir_entry *proc; 3002 int res; 3003 3004 res = seq_open(file, &bond_info_seq_ops); 3005 if (!res) { 3006 /* recover the pointer buried in proc_dir_entry data */ 3007 seq = file->private_data; 3008 proc = PDE(inode); 3009 seq->private = proc->data; 3010 } 3011 3012 return res; 3013 } 3014 3015 static struct file_operations bond_info_fops = { 3016 .owner = THIS_MODULE, 3017 .open = bond_info_open, 3018 .read = seq_read, 3019 .llseek = seq_lseek, 3020 .release = seq_release, 3021 }; 3022 3023 static int bond_create_proc_entry(struct bonding *bond) 3024 { 3025 struct net_device *bond_dev = bond->dev; 3026 3027 if (bond_proc_dir) { 3028 bond->proc_entry = create_proc_entry(bond_dev->name, 3029 S_IRUGO, 3030 bond_proc_dir); 3031 if (bond->proc_entry == NULL) { 3032 printk(KERN_WARNING DRV_NAME 3033 ": Warning: Cannot create /proc/net/%s/%s\n", 3034 DRV_NAME, bond_dev->name); 3035 } else { 3036 bond->proc_entry->data = bond; 3037 bond->proc_entry->proc_fops = &bond_info_fops; 3038 bond->proc_entry->owner = THIS_MODULE; 3039 memcpy(bond->proc_file_name, bond_dev->name, IFNAMSIZ); 3040 } 3041 } 3042 3043 return 0; 3044 } 3045 3046 static void bond_remove_proc_entry(struct bonding *bond) 3047 { 3048 if (bond_proc_dir && bond->proc_entry) { 3049 remove_proc_entry(bond->proc_file_name, bond_proc_dir); 3050 memset(bond->proc_file_name, 0, IFNAMSIZ); 3051 bond->proc_entry = NULL; 3052 } 3053 } 3054 3055 /* Create the bonding directory under /proc/net, if doesn't exist yet. 3056 * Caller must hold rtnl_lock. 3057 */ 3058 static void bond_create_proc_dir(void) 3059 { 3060 int len = strlen(DRV_NAME); 3061 3062 for (bond_proc_dir = proc_net->subdir; bond_proc_dir; 3063 bond_proc_dir = bond_proc_dir->next) { 3064 if ((bond_proc_dir->namelen == len) && 3065 !memcmp(bond_proc_dir->name, DRV_NAME, len)) { 3066 break; 3067 } 3068 } 3069 3070 if (!bond_proc_dir) { 3071 bond_proc_dir = proc_mkdir(DRV_NAME, proc_net); 3072 if (bond_proc_dir) { 3073 bond_proc_dir->owner = THIS_MODULE; 3074 } else { 3075 printk(KERN_WARNING DRV_NAME 3076 ": Warning: cannot create /proc/net/%s\n", 3077 DRV_NAME); 3078 } 3079 } 3080 } 3081 3082 /* Destroy the bonding directory under /proc/net, if empty. 3083 * Caller must hold rtnl_lock. 3084 */ 3085 static void bond_destroy_proc_dir(void) 3086 { 3087 struct proc_dir_entry *de; 3088 3089 if (!bond_proc_dir) { 3090 return; 3091 } 3092 3093 /* verify that the /proc dir is empty */ 3094 for (de = bond_proc_dir->subdir; de; de = de->next) { 3095 /* ignore . and .. */ 3096 if (*(de->name) != '.') { 3097 break; 3098 } 3099 } 3100 3101 if (de) { 3102 if (bond_proc_dir->owner == THIS_MODULE) { 3103 bond_proc_dir->owner = NULL; 3104 } 3105 } else { 3106 remove_proc_entry(DRV_NAME, proc_net); 3107 bond_proc_dir = NULL; 3108 } 3109 } 3110 #endif /* CONFIG_PROC_FS */ 3111 3112 /*-------------------------- netdev event handling --------------------------*/ 3113 3114 /* 3115 * Change device name 3116 */ 3117 static int bond_event_changename(struct bonding *bond) 3118 { 3119 #ifdef CONFIG_PROC_FS 3120 bond_remove_proc_entry(bond); 3121 bond_create_proc_entry(bond); 3122 #endif 3123 down_write(&(bonding_rwsem)); 3124 bond_destroy_sysfs_entry(bond); 3125 bond_create_sysfs_entry(bond); 3126 up_write(&(bonding_rwsem)); 3127 return NOTIFY_DONE; 3128 } 3129 3130 static int bond_master_netdev_event(unsigned long event, struct net_device *bond_dev) 3131 { 3132 struct bonding *event_bond = bond_dev->priv; 3133 3134 switch (event) { 3135 case NETDEV_CHANGENAME: 3136 return bond_event_changename(event_bond); 3137 case NETDEV_UNREGISTER: 3138 /* 3139 * TODO: remove a bond from the list? 3140 */ 3141 break; 3142 default: 3143 break; 3144 } 3145 3146 return NOTIFY_DONE; 3147 } 3148 3149 static int bond_slave_netdev_event(unsigned long event, struct net_device *slave_dev) 3150 { 3151 struct net_device *bond_dev = slave_dev->master; 3152 struct bonding *bond = bond_dev->priv; 3153 3154 switch (event) { 3155 case NETDEV_UNREGISTER: 3156 if (bond_dev) { 3157 bond_release(bond_dev, slave_dev); 3158 } 3159 break; 3160 case NETDEV_CHANGE: 3161 /* 3162 * TODO: is this what we get if somebody 3163 * sets up a hierarchical bond, then rmmod's 3164 * one of the slave bonding devices? 3165 */ 3166 break; 3167 case NETDEV_DOWN: 3168 /* 3169 * ... Or is it this? 3170 */ 3171 break; 3172 case NETDEV_CHANGEMTU: 3173 /* 3174 * TODO: Should slaves be allowed to 3175 * independently alter their MTU? For 3176 * an active-backup bond, slaves need 3177 * not be the same type of device, so 3178 * MTUs may vary. For other modes, 3179 * slaves arguably should have the 3180 * same MTUs. To do this, we'd need to 3181 * take over the slave's change_mtu 3182 * function for the duration of their 3183 * servitude. 3184 */ 3185 break; 3186 case NETDEV_CHANGENAME: 3187 /* 3188 * TODO: handle changing the primary's name 3189 */ 3190 break; 3191 case NETDEV_FEAT_CHANGE: 3192 bond_compute_features(bond); 3193 break; 3194 default: 3195 break; 3196 } 3197 3198 return NOTIFY_DONE; 3199 } 3200 3201 /* 3202 * bond_netdev_event: handle netdev notifier chain events. 3203 * 3204 * This function receives events for the netdev chain. The caller (an 3205 * ioctl handler calling blocking_notifier_call_chain) holds the necessary 3206 * locks for us to safely manipulate the slave devices (RTNL lock, 3207 * dev_probe_lock). 3208 */ 3209 static int bond_netdev_event(struct notifier_block *this, unsigned long event, void *ptr) 3210 { 3211 struct net_device *event_dev = (struct net_device *)ptr; 3212 3213 dprintk("event_dev: %s, event: %lx\n", 3214 (event_dev ? event_dev->name : "None"), 3215 event); 3216 3217 if (event_dev->flags & IFF_MASTER) { 3218 dprintk("IFF_MASTER\n"); 3219 return bond_master_netdev_event(event, event_dev); 3220 } 3221 3222 if (event_dev->flags & IFF_SLAVE) { 3223 dprintk("IFF_SLAVE\n"); 3224 return bond_slave_netdev_event(event, event_dev); 3225 } 3226 3227 return NOTIFY_DONE; 3228 } 3229 3230 /* 3231 * bond_inetaddr_event: handle inetaddr notifier chain events. 3232 * 3233 * We keep track of device IPs primarily to use as source addresses in 3234 * ARP monitor probes (rather than spewing out broadcasts all the time). 3235 * 3236 * We track one IP for the main device (if it has one), plus one per VLAN. 3237 */ 3238 static int bond_inetaddr_event(struct notifier_block *this, unsigned long event, void *ptr) 3239 { 3240 struct in_ifaddr *ifa = ptr; 3241 struct net_device *vlan_dev, *event_dev = ifa->ifa_dev->dev; 3242 struct bonding *bond, *bond_next; 3243 struct vlan_entry *vlan, *vlan_next; 3244 3245 list_for_each_entry_safe(bond, bond_next, &bond_dev_list, bond_list) { 3246 if (bond->dev == event_dev) { 3247 switch (event) { 3248 case NETDEV_UP: 3249 bond->master_ip = ifa->ifa_local; 3250 return NOTIFY_OK; 3251 case NETDEV_DOWN: 3252 bond->master_ip = bond_glean_dev_ip(bond->dev); 3253 return NOTIFY_OK; 3254 default: 3255 return NOTIFY_DONE; 3256 } 3257 } 3258 3259 if (list_empty(&bond->vlan_list)) 3260 continue; 3261 3262 list_for_each_entry_safe(vlan, vlan_next, &bond->vlan_list, 3263 vlan_list) { 3264 vlan_dev = bond->vlgrp->vlan_devices[vlan->vlan_id]; 3265 if (vlan_dev == event_dev) { 3266 switch (event) { 3267 case NETDEV_UP: 3268 vlan->vlan_ip = ifa->ifa_local; 3269 return NOTIFY_OK; 3270 case NETDEV_DOWN: 3271 vlan->vlan_ip = 3272 bond_glean_dev_ip(vlan_dev); 3273 return NOTIFY_OK; 3274 default: 3275 return NOTIFY_DONE; 3276 } 3277 } 3278 } 3279 } 3280 return NOTIFY_DONE; 3281 } 3282 3283 static struct notifier_block bond_netdev_notifier = { 3284 .notifier_call = bond_netdev_event, 3285 }; 3286 3287 static struct notifier_block bond_inetaddr_notifier = { 3288 .notifier_call = bond_inetaddr_event, 3289 }; 3290 3291 /*-------------------------- Packet type handling ---------------------------*/ 3292 3293 /* register to receive lacpdus on a bond */ 3294 static void bond_register_lacpdu(struct bonding *bond) 3295 { 3296 struct packet_type *pk_type = &(BOND_AD_INFO(bond).ad_pkt_type); 3297 3298 /* initialize packet type */ 3299 pk_type->type = PKT_TYPE_LACPDU; 3300 pk_type->dev = bond->dev; 3301 pk_type->func = bond_3ad_lacpdu_recv; 3302 3303 dev_add_pack(pk_type); 3304 } 3305 3306 /* unregister to receive lacpdus on a bond */ 3307 static void bond_unregister_lacpdu(struct bonding *bond) 3308 { 3309 dev_remove_pack(&(BOND_AD_INFO(bond).ad_pkt_type)); 3310 } 3311 3312 /*---------------------------- Hashing Policies -----------------------------*/ 3313 3314 /* 3315 * Hash for the the output device based upon layer 3 and layer 4 data. If 3316 * the packet is a frag or not TCP or UDP, just use layer 3 data. If it is 3317 * altogether not IP, mimic bond_xmit_hash_policy_l2() 3318 */ 3319 static int bond_xmit_hash_policy_l34(struct sk_buff *skb, 3320 struct net_device *bond_dev, int count) 3321 { 3322 struct ethhdr *data = (struct ethhdr *)skb->data; 3323 struct iphdr *iph = skb->nh.iph; 3324 u16 *layer4hdr = (u16 *)((u32 *)iph + iph->ihl); 3325 int layer4_xor = 0; 3326 3327 if (skb->protocol == __constant_htons(ETH_P_IP)) { 3328 if (!(iph->frag_off & __constant_htons(IP_MF|IP_OFFSET)) && 3329 (iph->protocol == IPPROTO_TCP || 3330 iph->protocol == IPPROTO_UDP)) { 3331 layer4_xor = htons((*layer4hdr ^ *(layer4hdr + 1))); 3332 } 3333 return (layer4_xor ^ 3334 ((ntohl(iph->saddr ^ iph->daddr)) & 0xffff)) % count; 3335 3336 } 3337 3338 return (data->h_dest[5] ^ bond_dev->dev_addr[5]) % count; 3339 } 3340 3341 /* 3342 * Hash for the output device based upon layer 2 data 3343 */ 3344 static int bond_xmit_hash_policy_l2(struct sk_buff *skb, 3345 struct net_device *bond_dev, int count) 3346 { 3347 struct ethhdr *data = (struct ethhdr *)skb->data; 3348 3349 return (data->h_dest[5] ^ bond_dev->dev_addr[5]) % count; 3350 } 3351 3352 /*-------------------------- Device entry points ----------------------------*/ 3353 3354 static int bond_open(struct net_device *bond_dev) 3355 { 3356 struct bonding *bond = bond_dev->priv; 3357 struct timer_list *mii_timer = &bond->mii_timer; 3358 struct timer_list *arp_timer = &bond->arp_timer; 3359 3360 bond->kill_timers = 0; 3361 3362 if ((bond->params.mode == BOND_MODE_TLB) || 3363 (bond->params.mode == BOND_MODE_ALB)) { 3364 struct timer_list *alb_timer = &(BOND_ALB_INFO(bond).alb_timer); 3365 3366 /* bond_alb_initialize must be called before the timer 3367 * is started. 3368 */ 3369 if (bond_alb_initialize(bond, (bond->params.mode == BOND_MODE_ALB))) { 3370 /* something went wrong - fail the open operation */ 3371 return -1; 3372 } 3373 3374 init_timer(alb_timer); 3375 alb_timer->expires = jiffies + 1; 3376 alb_timer->data = (unsigned long)bond; 3377 alb_timer->function = (void *)&bond_alb_monitor; 3378 add_timer(alb_timer); 3379 } 3380 3381 if (bond->params.miimon) { /* link check interval, in milliseconds. */ 3382 init_timer(mii_timer); 3383 mii_timer->expires = jiffies + 1; 3384 mii_timer->data = (unsigned long)bond_dev; 3385 mii_timer->function = (void *)&bond_mii_monitor; 3386 add_timer(mii_timer); 3387 } 3388 3389 if (bond->params.arp_interval) { /* arp interval, in milliseconds. */ 3390 init_timer(arp_timer); 3391 arp_timer->expires = jiffies + 1; 3392 arp_timer->data = (unsigned long)bond_dev; 3393 if (bond->params.mode == BOND_MODE_ACTIVEBACKUP) { 3394 arp_timer->function = (void *)&bond_activebackup_arp_mon; 3395 } else { 3396 arp_timer->function = (void *)&bond_loadbalance_arp_mon; 3397 } 3398 add_timer(arp_timer); 3399 } 3400 3401 if (bond->params.mode == BOND_MODE_8023AD) { 3402 struct timer_list *ad_timer = &(BOND_AD_INFO(bond).ad_timer); 3403 init_timer(ad_timer); 3404 ad_timer->expires = jiffies + 1; 3405 ad_timer->data = (unsigned long)bond; 3406 ad_timer->function = (void *)&bond_3ad_state_machine_handler; 3407 add_timer(ad_timer); 3408 3409 /* register to receive LACPDUs */ 3410 bond_register_lacpdu(bond); 3411 } 3412 3413 return 0; 3414 } 3415 3416 static int bond_close(struct net_device *bond_dev) 3417 { 3418 struct bonding *bond = bond_dev->priv; 3419 3420 if (bond->params.mode == BOND_MODE_8023AD) { 3421 /* Unregister the receive of LACPDUs */ 3422 bond_unregister_lacpdu(bond); 3423 } 3424 3425 write_lock_bh(&bond->lock); 3426 3427 bond_mc_list_destroy(bond); 3428 3429 /* signal timers not to re-arm */ 3430 bond->kill_timers = 1; 3431 3432 write_unlock_bh(&bond->lock); 3433 3434 /* del_timer_sync must run without holding the bond->lock 3435 * because a running timer might be trying to hold it too 3436 */ 3437 3438 if (bond->params.miimon) { /* link check interval, in milliseconds. */ 3439 del_timer_sync(&bond->mii_timer); 3440 } 3441 3442 if (bond->params.arp_interval) { /* arp interval, in milliseconds. */ 3443 del_timer_sync(&bond->arp_timer); 3444 } 3445 3446 switch (bond->params.mode) { 3447 case BOND_MODE_8023AD: 3448 del_timer_sync(&(BOND_AD_INFO(bond).ad_timer)); 3449 break; 3450 case BOND_MODE_TLB: 3451 case BOND_MODE_ALB: 3452 del_timer_sync(&(BOND_ALB_INFO(bond).alb_timer)); 3453 break; 3454 default: 3455 break; 3456 } 3457 3458 /* Release the bonded slaves */ 3459 bond_release_all(bond_dev); 3460 3461 if ((bond->params.mode == BOND_MODE_TLB) || 3462 (bond->params.mode == BOND_MODE_ALB)) { 3463 /* Must be called only after all 3464 * slaves have been released 3465 */ 3466 bond_alb_deinitialize(bond); 3467 } 3468 3469 return 0; 3470 } 3471 3472 static struct net_device_stats *bond_get_stats(struct net_device *bond_dev) 3473 { 3474 struct bonding *bond = bond_dev->priv; 3475 struct net_device_stats *stats = &(bond->stats), *sstats; 3476 struct slave *slave; 3477 int i; 3478 3479 memset(stats, 0, sizeof(struct net_device_stats)); 3480 3481 read_lock_bh(&bond->lock); 3482 3483 bond_for_each_slave(bond, slave, i) { 3484 sstats = slave->dev->get_stats(slave->dev); 3485 3486 stats->rx_packets += sstats->rx_packets; 3487 stats->rx_bytes += sstats->rx_bytes; 3488 stats->rx_errors += sstats->rx_errors; 3489 stats->rx_dropped += sstats->rx_dropped; 3490 3491 stats->tx_packets += sstats->tx_packets; 3492 stats->tx_bytes += sstats->tx_bytes; 3493 stats->tx_errors += sstats->tx_errors; 3494 stats->tx_dropped += sstats->tx_dropped; 3495 3496 stats->multicast += sstats->multicast; 3497 stats->collisions += sstats->collisions; 3498 3499 stats->rx_length_errors += sstats->rx_length_errors; 3500 stats->rx_over_errors += sstats->rx_over_errors; 3501 stats->rx_crc_errors += sstats->rx_crc_errors; 3502 stats->rx_frame_errors += sstats->rx_frame_errors; 3503 stats->rx_fifo_errors += sstats->rx_fifo_errors; 3504 stats->rx_missed_errors += sstats->rx_missed_errors; 3505 3506 stats->tx_aborted_errors += sstats->tx_aborted_errors; 3507 stats->tx_carrier_errors += sstats->tx_carrier_errors; 3508 stats->tx_fifo_errors += sstats->tx_fifo_errors; 3509 stats->tx_heartbeat_errors += sstats->tx_heartbeat_errors; 3510 stats->tx_window_errors += sstats->tx_window_errors; 3511 } 3512 3513 read_unlock_bh(&bond->lock); 3514 3515 return stats; 3516 } 3517 3518 static int bond_do_ioctl(struct net_device *bond_dev, struct ifreq *ifr, int cmd) 3519 { 3520 struct net_device *slave_dev = NULL; 3521 struct ifbond k_binfo; 3522 struct ifbond __user *u_binfo = NULL; 3523 struct ifslave k_sinfo; 3524 struct ifslave __user *u_sinfo = NULL; 3525 struct mii_ioctl_data *mii = NULL; 3526 int res = 0; 3527 3528 dprintk("bond_ioctl: master=%s, cmd=%d\n", 3529 bond_dev->name, cmd); 3530 3531 switch (cmd) { 3532 case SIOCGMIIPHY: 3533 mii = if_mii(ifr); 3534 if (!mii) { 3535 return -EINVAL; 3536 } 3537 mii->phy_id = 0; 3538 /* Fall Through */ 3539 case SIOCGMIIREG: 3540 /* 3541 * We do this again just in case we were called by SIOCGMIIREG 3542 * instead of SIOCGMIIPHY. 3543 */ 3544 mii = if_mii(ifr); 3545 if (!mii) { 3546 return -EINVAL; 3547 } 3548 3549 if (mii->reg_num == 1) { 3550 struct bonding *bond = bond_dev->priv; 3551 mii->val_out = 0; 3552 read_lock_bh(&bond->lock); 3553 read_lock(&bond->curr_slave_lock); 3554 if (bond->curr_active_slave) { 3555 mii->val_out = BMSR_LSTATUS; 3556 } 3557 read_unlock(&bond->curr_slave_lock); 3558 read_unlock_bh(&bond->lock); 3559 } 3560 3561 return 0; 3562 case BOND_INFO_QUERY_OLD: 3563 case SIOCBONDINFOQUERY: 3564 u_binfo = (struct ifbond __user *)ifr->ifr_data; 3565 3566 if (copy_from_user(&k_binfo, u_binfo, sizeof(ifbond))) { 3567 return -EFAULT; 3568 } 3569 3570 res = bond_info_query(bond_dev, &k_binfo); 3571 if (res == 0) { 3572 if (copy_to_user(u_binfo, &k_binfo, sizeof(ifbond))) { 3573 return -EFAULT; 3574 } 3575 } 3576 3577 return res; 3578 case BOND_SLAVE_INFO_QUERY_OLD: 3579 case SIOCBONDSLAVEINFOQUERY: 3580 u_sinfo = (struct ifslave __user *)ifr->ifr_data; 3581 3582 if (copy_from_user(&k_sinfo, u_sinfo, sizeof(ifslave))) { 3583 return -EFAULT; 3584 } 3585 3586 res = bond_slave_info_query(bond_dev, &k_sinfo); 3587 if (res == 0) { 3588 if (copy_to_user(u_sinfo, &k_sinfo, sizeof(ifslave))) { 3589 return -EFAULT; 3590 } 3591 } 3592 3593 return res; 3594 default: 3595 /* Go on */ 3596 break; 3597 } 3598 3599 if (!capable(CAP_NET_ADMIN)) { 3600 return -EPERM; 3601 } 3602 3603 down_write(&(bonding_rwsem)); 3604 slave_dev = dev_get_by_name(ifr->ifr_slave); 3605 3606 dprintk("slave_dev=%p: \n", slave_dev); 3607 3608 if (!slave_dev) { 3609 res = -ENODEV; 3610 } else { 3611 dprintk("slave_dev->name=%s: \n", slave_dev->name); 3612 switch (cmd) { 3613 case BOND_ENSLAVE_OLD: 3614 case SIOCBONDENSLAVE: 3615 res = bond_enslave(bond_dev, slave_dev); 3616 break; 3617 case BOND_RELEASE_OLD: 3618 case SIOCBONDRELEASE: 3619 res = bond_release(bond_dev, slave_dev); 3620 break; 3621 case BOND_SETHWADDR_OLD: 3622 case SIOCBONDSETHWADDR: 3623 res = bond_sethwaddr(bond_dev, slave_dev); 3624 break; 3625 case BOND_CHANGE_ACTIVE_OLD: 3626 case SIOCBONDCHANGEACTIVE: 3627 res = bond_ioctl_change_active(bond_dev, slave_dev); 3628 break; 3629 default: 3630 res = -EOPNOTSUPP; 3631 } 3632 3633 dev_put(slave_dev); 3634 } 3635 3636 up_write(&(bonding_rwsem)); 3637 return res; 3638 } 3639 3640 static void bond_set_multicast_list(struct net_device *bond_dev) 3641 { 3642 struct bonding *bond = bond_dev->priv; 3643 struct dev_mc_list *dmi; 3644 3645 write_lock_bh(&bond->lock); 3646 3647 /* 3648 * Do promisc before checking multicast_mode 3649 */ 3650 if ((bond_dev->flags & IFF_PROMISC) && !(bond->flags & IFF_PROMISC)) { 3651 bond_set_promiscuity(bond, 1); 3652 } 3653 3654 if (!(bond_dev->flags & IFF_PROMISC) && (bond->flags & IFF_PROMISC)) { 3655 bond_set_promiscuity(bond, -1); 3656 } 3657 3658 /* set allmulti flag to slaves */ 3659 if ((bond_dev->flags & IFF_ALLMULTI) && !(bond->flags & IFF_ALLMULTI)) { 3660 bond_set_allmulti(bond, 1); 3661 } 3662 3663 if (!(bond_dev->flags & IFF_ALLMULTI) && (bond->flags & IFF_ALLMULTI)) { 3664 bond_set_allmulti(bond, -1); 3665 } 3666 3667 bond->flags = bond_dev->flags; 3668 3669 /* looking for addresses to add to slaves' mc list */ 3670 for (dmi = bond_dev->mc_list; dmi; dmi = dmi->next) { 3671 if (!bond_mc_list_find_dmi(dmi, bond->mc_list)) { 3672 bond_mc_add(bond, dmi->dmi_addr, dmi->dmi_addrlen); 3673 } 3674 } 3675 3676 /* looking for addresses to delete from slaves' list */ 3677 for (dmi = bond->mc_list; dmi; dmi = dmi->next) { 3678 if (!bond_mc_list_find_dmi(dmi, bond_dev->mc_list)) { 3679 bond_mc_delete(bond, dmi->dmi_addr, dmi->dmi_addrlen); 3680 } 3681 } 3682 3683 /* save master's multicast list */ 3684 bond_mc_list_destroy(bond); 3685 bond_mc_list_copy(bond_dev->mc_list, bond, GFP_ATOMIC); 3686 3687 write_unlock_bh(&bond->lock); 3688 } 3689 3690 /* 3691 * Change the MTU of all of a master's slaves to match the master 3692 */ 3693 static int bond_change_mtu(struct net_device *bond_dev, int new_mtu) 3694 { 3695 struct bonding *bond = bond_dev->priv; 3696 struct slave *slave, *stop_at; 3697 int res = 0; 3698 int i; 3699 3700 dprintk("bond=%p, name=%s, new_mtu=%d\n", bond, 3701 (bond_dev ? bond_dev->name : "None"), new_mtu); 3702 3703 /* Can't hold bond->lock with bh disabled here since 3704 * some base drivers panic. On the other hand we can't 3705 * hold bond->lock without bh disabled because we'll 3706 * deadlock. The only solution is to rely on the fact 3707 * that we're under rtnl_lock here, and the slaves 3708 * list won't change. This doesn't solve the problem 3709 * of setting the slave's MTU while it is 3710 * transmitting, but the assumption is that the base 3711 * driver can handle that. 3712 * 3713 * TODO: figure out a way to safely iterate the slaves 3714 * list, but without holding a lock around the actual 3715 * call to the base driver. 3716 */ 3717 3718 bond_for_each_slave(bond, slave, i) { 3719 dprintk("s %p s->p %p c_m %p\n", slave, 3720 slave->prev, slave->dev->change_mtu); 3721 3722 res = dev_set_mtu(slave->dev, new_mtu); 3723 3724 if (res) { 3725 /* If we failed to set the slave's mtu to the new value 3726 * we must abort the operation even in ACTIVE_BACKUP 3727 * mode, because if we allow the backup slaves to have 3728 * different mtu values than the active slave we'll 3729 * need to change their mtu when doing a failover. That 3730 * means changing their mtu from timer context, which 3731 * is probably not a good idea. 3732 */ 3733 dprintk("err %d %s\n", res, slave->dev->name); 3734 goto unwind; 3735 } 3736 } 3737 3738 bond_dev->mtu = new_mtu; 3739 3740 return 0; 3741 3742 unwind: 3743 /* unwind from head to the slave that failed */ 3744 stop_at = slave; 3745 bond_for_each_slave_from_to(bond, slave, i, bond->first_slave, stop_at) { 3746 int tmp_res; 3747 3748 tmp_res = dev_set_mtu(slave->dev, bond_dev->mtu); 3749 if (tmp_res) { 3750 dprintk("unwind err %d dev %s\n", tmp_res, 3751 slave->dev->name); 3752 } 3753 } 3754 3755 return res; 3756 } 3757 3758 /* 3759 * Change HW address 3760 * 3761 * Note that many devices must be down to change the HW address, and 3762 * downing the master releases all slaves. We can make bonds full of 3763 * bonding devices to test this, however. 3764 */ 3765 static int bond_set_mac_address(struct net_device *bond_dev, void *addr) 3766 { 3767 struct bonding *bond = bond_dev->priv; 3768 struct sockaddr *sa = addr, tmp_sa; 3769 struct slave *slave, *stop_at; 3770 int res = 0; 3771 int i; 3772 3773 dprintk("bond=%p, name=%s\n", bond, (bond_dev ? bond_dev->name : "None")); 3774 3775 if (!is_valid_ether_addr(sa->sa_data)) { 3776 return -EADDRNOTAVAIL; 3777 } 3778 3779 /* Can't hold bond->lock with bh disabled here since 3780 * some base drivers panic. On the other hand we can't 3781 * hold bond->lock without bh disabled because we'll 3782 * deadlock. The only solution is to rely on the fact 3783 * that we're under rtnl_lock here, and the slaves 3784 * list won't change. This doesn't solve the problem 3785 * of setting the slave's hw address while it is 3786 * transmitting, but the assumption is that the base 3787 * driver can handle that. 3788 * 3789 * TODO: figure out a way to safely iterate the slaves 3790 * list, but without holding a lock around the actual 3791 * call to the base driver. 3792 */ 3793 3794 bond_for_each_slave(bond, slave, i) { 3795 dprintk("slave %p %s\n", slave, slave->dev->name); 3796 3797 if (slave->dev->set_mac_address == NULL) { 3798 res = -EOPNOTSUPP; 3799 dprintk("EOPNOTSUPP %s\n", slave->dev->name); 3800 goto unwind; 3801 } 3802 3803 res = dev_set_mac_address(slave->dev, addr); 3804 if (res) { 3805 /* TODO: consider downing the slave 3806 * and retry ? 3807 * User should expect communications 3808 * breakage anyway until ARP finish 3809 * updating, so... 3810 */ 3811 dprintk("err %d %s\n", res, slave->dev->name); 3812 goto unwind; 3813 } 3814 } 3815 3816 /* success */ 3817 memcpy(bond_dev->dev_addr, sa->sa_data, bond_dev->addr_len); 3818 return 0; 3819 3820 unwind: 3821 memcpy(tmp_sa.sa_data, bond_dev->dev_addr, bond_dev->addr_len); 3822 tmp_sa.sa_family = bond_dev->type; 3823 3824 /* unwind from head to the slave that failed */ 3825 stop_at = slave; 3826 bond_for_each_slave_from_to(bond, slave, i, bond->first_slave, stop_at) { 3827 int tmp_res; 3828 3829 tmp_res = dev_set_mac_address(slave->dev, &tmp_sa); 3830 if (tmp_res) { 3831 dprintk("unwind err %d dev %s\n", tmp_res, 3832 slave->dev->name); 3833 } 3834 } 3835 3836 return res; 3837 } 3838 3839 static int bond_xmit_roundrobin(struct sk_buff *skb, struct net_device *bond_dev) 3840 { 3841 struct bonding *bond = bond_dev->priv; 3842 struct slave *slave, *start_at; 3843 int i; 3844 int res = 1; 3845 3846 read_lock(&bond->lock); 3847 3848 if (!BOND_IS_OK(bond)) { 3849 goto out; 3850 } 3851 3852 read_lock(&bond->curr_slave_lock); 3853 slave = start_at = bond->curr_active_slave; 3854 read_unlock(&bond->curr_slave_lock); 3855 3856 if (!slave) { 3857 goto out; 3858 } 3859 3860 bond_for_each_slave_from(bond, slave, i, start_at) { 3861 if (IS_UP(slave->dev) && 3862 (slave->link == BOND_LINK_UP) && 3863 (slave->state == BOND_STATE_ACTIVE)) { 3864 res = bond_dev_queue_xmit(bond, skb, slave->dev); 3865 3866 write_lock(&bond->curr_slave_lock); 3867 bond->curr_active_slave = slave->next; 3868 write_unlock(&bond->curr_slave_lock); 3869 3870 break; 3871 } 3872 } 3873 3874 3875 out: 3876 if (res) { 3877 /* no suitable interface, frame not sent */ 3878 dev_kfree_skb(skb); 3879 } 3880 read_unlock(&bond->lock); 3881 return 0; 3882 } 3883 3884 static void bond_activebackup_xmit_copy(struct sk_buff *skb, 3885 struct bonding *bond, 3886 struct slave *slave) 3887 { 3888 struct sk_buff *skb2 = skb_copy(skb, GFP_ATOMIC); 3889 struct ethhdr *eth_data; 3890 u8 *hwaddr; 3891 int res; 3892 3893 if (!skb2) { 3894 printk(KERN_ERR DRV_NAME ": Error: " 3895 "bond_activebackup_xmit_copy(): skb_copy() failed\n"); 3896 return; 3897 } 3898 3899 skb2->mac.raw = (unsigned char *)skb2->data; 3900 eth_data = eth_hdr(skb2); 3901 3902 /* Pick an appropriate source MAC address 3903 * -- use slave's perm MAC addr, unless used by bond 3904 * -- otherwise, borrow active slave's perm MAC addr 3905 * since that will not be used 3906 */ 3907 hwaddr = slave->perm_hwaddr; 3908 if (!memcmp(eth_data->h_source, hwaddr, ETH_ALEN)) 3909 hwaddr = bond->curr_active_slave->perm_hwaddr; 3910 3911 /* Set source MAC address appropriately */ 3912 memcpy(eth_data->h_source, hwaddr, ETH_ALEN); 3913 3914 res = bond_dev_queue_xmit(bond, skb2, slave->dev); 3915 if (res) 3916 dev_kfree_skb(skb2); 3917 3918 return; 3919 } 3920 3921 /* 3922 * in active-backup mode, we know that bond->curr_active_slave is always valid if 3923 * the bond has a usable interface. 3924 */ 3925 static int bond_xmit_activebackup(struct sk_buff *skb, struct net_device *bond_dev) 3926 { 3927 struct bonding *bond = bond_dev->priv; 3928 int res = 1; 3929 3930 read_lock(&bond->lock); 3931 read_lock(&bond->curr_slave_lock); 3932 3933 if (!BOND_IS_OK(bond)) { 3934 goto out; 3935 } 3936 3937 if (!bond->curr_active_slave) 3938 goto out; 3939 3940 /* Xmit IGMP frames on all slaves to ensure rapid fail-over 3941 for multicast traffic on snooping switches */ 3942 if (skb->protocol == __constant_htons(ETH_P_IP) && 3943 skb->nh.iph->protocol == IPPROTO_IGMP) { 3944 struct slave *slave, *active_slave; 3945 int i; 3946 3947 active_slave = bond->curr_active_slave; 3948 bond_for_each_slave_from_to(bond, slave, i, active_slave->next, 3949 active_slave->prev) 3950 if (IS_UP(slave->dev) && 3951 (slave->link == BOND_LINK_UP)) 3952 bond_activebackup_xmit_copy(skb, bond, slave); 3953 } 3954 3955 res = bond_dev_queue_xmit(bond, skb, bond->curr_active_slave->dev); 3956 3957 out: 3958 if (res) { 3959 /* no suitable interface, frame not sent */ 3960 dev_kfree_skb(skb); 3961 } 3962 read_unlock(&bond->curr_slave_lock); 3963 read_unlock(&bond->lock); 3964 return 0; 3965 } 3966 3967 /* 3968 * In bond_xmit_xor() , we determine the output device by using a pre- 3969 * determined xmit_hash_policy(), If the selected device is not enabled, 3970 * find the next active slave. 3971 */ 3972 static int bond_xmit_xor(struct sk_buff *skb, struct net_device *bond_dev) 3973 { 3974 struct bonding *bond = bond_dev->priv; 3975 struct slave *slave, *start_at; 3976 int slave_no; 3977 int i; 3978 int res = 1; 3979 3980 read_lock(&bond->lock); 3981 3982 if (!BOND_IS_OK(bond)) { 3983 goto out; 3984 } 3985 3986 slave_no = bond->xmit_hash_policy(skb, bond_dev, bond->slave_cnt); 3987 3988 bond_for_each_slave(bond, slave, i) { 3989 slave_no--; 3990 if (slave_no < 0) { 3991 break; 3992 } 3993 } 3994 3995 start_at = slave; 3996 3997 bond_for_each_slave_from(bond, slave, i, start_at) { 3998 if (IS_UP(slave->dev) && 3999 (slave->link == BOND_LINK_UP) && 4000 (slave->state == BOND_STATE_ACTIVE)) { 4001 res = bond_dev_queue_xmit(bond, skb, slave->dev); 4002 break; 4003 } 4004 } 4005 4006 out: 4007 if (res) { 4008 /* no suitable interface, frame not sent */ 4009 dev_kfree_skb(skb); 4010 } 4011 read_unlock(&bond->lock); 4012 return 0; 4013 } 4014 4015 /* 4016 * in broadcast mode, we send everything to all usable interfaces. 4017 */ 4018 static int bond_xmit_broadcast(struct sk_buff *skb, struct net_device *bond_dev) 4019 { 4020 struct bonding *bond = bond_dev->priv; 4021 struct slave *slave, *start_at; 4022 struct net_device *tx_dev = NULL; 4023 int i; 4024 int res = 1; 4025 4026 read_lock(&bond->lock); 4027 4028 if (!BOND_IS_OK(bond)) { 4029 goto out; 4030 } 4031 4032 read_lock(&bond->curr_slave_lock); 4033 start_at = bond->curr_active_slave; 4034 read_unlock(&bond->curr_slave_lock); 4035 4036 if (!start_at) { 4037 goto out; 4038 } 4039 4040 bond_for_each_slave_from(bond, slave, i, start_at) { 4041 if (IS_UP(slave->dev) && 4042 (slave->link == BOND_LINK_UP) && 4043 (slave->state == BOND_STATE_ACTIVE)) { 4044 if (tx_dev) { 4045 struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC); 4046 if (!skb2) { 4047 printk(KERN_ERR DRV_NAME 4048 ": %s: Error: bond_xmit_broadcast(): " 4049 "skb_clone() failed\n", 4050 bond_dev->name); 4051 continue; 4052 } 4053 4054 res = bond_dev_queue_xmit(bond, skb2, tx_dev); 4055 if (res) { 4056 dev_kfree_skb(skb2); 4057 continue; 4058 } 4059 } 4060 tx_dev = slave->dev; 4061 } 4062 } 4063 4064 if (tx_dev) { 4065 res = bond_dev_queue_xmit(bond, skb, tx_dev); 4066 } 4067 4068 out: 4069 if (res) { 4070 /* no suitable interface, frame not sent */ 4071 dev_kfree_skb(skb); 4072 } 4073 /* frame sent to all suitable interfaces */ 4074 read_unlock(&bond->lock); 4075 return 0; 4076 } 4077 4078 /*------------------------- Device initialization ---------------------------*/ 4079 4080 /* 4081 * set bond mode specific net device operations 4082 */ 4083 void bond_set_mode_ops(struct bonding *bond, int mode) 4084 { 4085 struct net_device *bond_dev = bond->dev; 4086 4087 switch (mode) { 4088 case BOND_MODE_ROUNDROBIN: 4089 bond_dev->hard_start_xmit = bond_xmit_roundrobin; 4090 break; 4091 case BOND_MODE_ACTIVEBACKUP: 4092 bond_dev->hard_start_xmit = bond_xmit_activebackup; 4093 break; 4094 case BOND_MODE_XOR: 4095 bond_dev->hard_start_xmit = bond_xmit_xor; 4096 if (bond->params.xmit_policy == BOND_XMIT_POLICY_LAYER34) 4097 bond->xmit_hash_policy = bond_xmit_hash_policy_l34; 4098 else 4099 bond->xmit_hash_policy = bond_xmit_hash_policy_l2; 4100 break; 4101 case BOND_MODE_BROADCAST: 4102 bond_dev->hard_start_xmit = bond_xmit_broadcast; 4103 break; 4104 case BOND_MODE_8023AD: 4105 bond_set_master_3ad_flags(bond); 4106 bond_dev->hard_start_xmit = bond_3ad_xmit_xor; 4107 if (bond->params.xmit_policy == BOND_XMIT_POLICY_LAYER34) 4108 bond->xmit_hash_policy = bond_xmit_hash_policy_l34; 4109 else 4110 bond->xmit_hash_policy = bond_xmit_hash_policy_l2; 4111 break; 4112 case BOND_MODE_ALB: 4113 bond_set_master_alb_flags(bond); 4114 /* FALLTHRU */ 4115 case BOND_MODE_TLB: 4116 bond_dev->hard_start_xmit = bond_alb_xmit; 4117 bond_dev->set_mac_address = bond_alb_set_mac_address; 4118 break; 4119 default: 4120 /* Should never happen, mode already checked */ 4121 printk(KERN_ERR DRV_NAME 4122 ": %s: Error: Unknown bonding mode %d\n", 4123 bond_dev->name, 4124 mode); 4125 break; 4126 } 4127 } 4128 4129 static void bond_ethtool_get_drvinfo(struct net_device *bond_dev, 4130 struct ethtool_drvinfo *drvinfo) 4131 { 4132 strncpy(drvinfo->driver, DRV_NAME, 32); 4133 strncpy(drvinfo->version, DRV_VERSION, 32); 4134 snprintf(drvinfo->fw_version, 32, "%d", BOND_ABI_VERSION); 4135 } 4136 4137 static struct ethtool_ops bond_ethtool_ops = { 4138 .get_tx_csum = ethtool_op_get_tx_csum, 4139 .get_tso = ethtool_op_get_tso, 4140 .get_ufo = ethtool_op_get_ufo, 4141 .get_sg = ethtool_op_get_sg, 4142 .get_drvinfo = bond_ethtool_get_drvinfo, 4143 }; 4144 4145 /* 4146 * Does not allocate but creates a /proc entry. 4147 * Allowed to fail. 4148 */ 4149 static int bond_init(struct net_device *bond_dev, struct bond_params *params) 4150 { 4151 struct bonding *bond = bond_dev->priv; 4152 4153 dprintk("Begin bond_init for %s\n", bond_dev->name); 4154 4155 /* initialize rwlocks */ 4156 rwlock_init(&bond->lock); 4157 rwlock_init(&bond->curr_slave_lock); 4158 4159 bond->params = *params; /* copy params struct */ 4160 4161 /* Initialize pointers */ 4162 bond->first_slave = NULL; 4163 bond->curr_active_slave = NULL; 4164 bond->current_arp_slave = NULL; 4165 bond->primary_slave = NULL; 4166 bond->dev = bond_dev; 4167 INIT_LIST_HEAD(&bond->vlan_list); 4168 4169 /* Initialize the device entry points */ 4170 bond_dev->open = bond_open; 4171 bond_dev->stop = bond_close; 4172 bond_dev->get_stats = bond_get_stats; 4173 bond_dev->do_ioctl = bond_do_ioctl; 4174 bond_dev->ethtool_ops = &bond_ethtool_ops; 4175 bond_dev->set_multicast_list = bond_set_multicast_list; 4176 bond_dev->change_mtu = bond_change_mtu; 4177 bond_dev->set_mac_address = bond_set_mac_address; 4178 4179 bond_set_mode_ops(bond, bond->params.mode); 4180 4181 bond_dev->destructor = free_netdev; 4182 4183 /* Initialize the device options */ 4184 bond_dev->tx_queue_len = 0; 4185 bond_dev->flags |= IFF_MASTER|IFF_MULTICAST; 4186 4187 /* At first, we block adding VLANs. That's the only way to 4188 * prevent problems that occur when adding VLANs over an 4189 * empty bond. The block will be removed once non-challenged 4190 * slaves are enslaved. 4191 */ 4192 bond_dev->features |= NETIF_F_VLAN_CHALLENGED; 4193 4194 /* don't acquire bond device's xmit_lock when 4195 * transmitting */ 4196 bond_dev->features |= NETIF_F_LLTX; 4197 4198 /* By default, we declare the bond to be fully 4199 * VLAN hardware accelerated capable. Special 4200 * care is taken in the various xmit functions 4201 * when there are slaves that are not hw accel 4202 * capable 4203 */ 4204 bond_dev->vlan_rx_register = bond_vlan_rx_register; 4205 bond_dev->vlan_rx_add_vid = bond_vlan_rx_add_vid; 4206 bond_dev->vlan_rx_kill_vid = bond_vlan_rx_kill_vid; 4207 bond_dev->features |= (NETIF_F_HW_VLAN_TX | 4208 NETIF_F_HW_VLAN_RX | 4209 NETIF_F_HW_VLAN_FILTER); 4210 4211 #ifdef CONFIG_PROC_FS 4212 bond_create_proc_entry(bond); 4213 #endif 4214 4215 list_add_tail(&bond->bond_list, &bond_dev_list); 4216 4217 return 0; 4218 } 4219 4220 /* De-initialize device specific data. 4221 * Caller must hold rtnl_lock. 4222 */ 4223 void bond_deinit(struct net_device *bond_dev) 4224 { 4225 struct bonding *bond = bond_dev->priv; 4226 4227 list_del(&bond->bond_list); 4228 4229 #ifdef CONFIG_PROC_FS 4230 bond_remove_proc_entry(bond); 4231 #endif 4232 } 4233 4234 /* Unregister and free all bond devices. 4235 * Caller must hold rtnl_lock. 4236 */ 4237 static void bond_free_all(void) 4238 { 4239 struct bonding *bond, *nxt; 4240 4241 list_for_each_entry_safe(bond, nxt, &bond_dev_list, bond_list) { 4242 struct net_device *bond_dev = bond->dev; 4243 4244 unregister_netdevice(bond_dev); 4245 bond_deinit(bond_dev); 4246 } 4247 4248 #ifdef CONFIG_PROC_FS 4249 bond_destroy_proc_dir(); 4250 #endif 4251 } 4252 4253 /*------------------------- Module initialization ---------------------------*/ 4254 4255 /* 4256 * Convert string input module parms. Accept either the 4257 * number of the mode or its string name. 4258 */ 4259 int bond_parse_parm(char *mode_arg, struct bond_parm_tbl *tbl) 4260 { 4261 int i; 4262 4263 for (i = 0; tbl[i].modename; i++) { 4264 if ((isdigit(*mode_arg) && 4265 tbl[i].mode == simple_strtol(mode_arg, NULL, 0)) || 4266 (strncmp(mode_arg, tbl[i].modename, 4267 strlen(tbl[i].modename)) == 0)) { 4268 return tbl[i].mode; 4269 } 4270 } 4271 4272 return -1; 4273 } 4274 4275 static int bond_check_params(struct bond_params *params) 4276 { 4277 /* 4278 * Convert string parameters. 4279 */ 4280 if (mode) { 4281 bond_mode = bond_parse_parm(mode, bond_mode_tbl); 4282 if (bond_mode == -1) { 4283 printk(KERN_ERR DRV_NAME 4284 ": Error: Invalid bonding mode \"%s\"\n", 4285 mode == NULL ? "NULL" : mode); 4286 return -EINVAL; 4287 } 4288 } 4289 4290 if (xmit_hash_policy) { 4291 if ((bond_mode != BOND_MODE_XOR) && 4292 (bond_mode != BOND_MODE_8023AD)) { 4293 printk(KERN_INFO DRV_NAME 4294 ": xor_mode param is irrelevant in mode %s\n", 4295 bond_mode_name(bond_mode)); 4296 } else { 4297 xmit_hashtype = bond_parse_parm(xmit_hash_policy, 4298 xmit_hashtype_tbl); 4299 if (xmit_hashtype == -1) { 4300 printk(KERN_ERR DRV_NAME 4301 ": Error: Invalid xmit_hash_policy \"%s\"\n", 4302 xmit_hash_policy == NULL ? "NULL" : 4303 xmit_hash_policy); 4304 return -EINVAL; 4305 } 4306 } 4307 } 4308 4309 if (lacp_rate) { 4310 if (bond_mode != BOND_MODE_8023AD) { 4311 printk(KERN_INFO DRV_NAME 4312 ": lacp_rate param is irrelevant in mode %s\n", 4313 bond_mode_name(bond_mode)); 4314 } else { 4315 lacp_fast = bond_parse_parm(lacp_rate, bond_lacp_tbl); 4316 if (lacp_fast == -1) { 4317 printk(KERN_ERR DRV_NAME 4318 ": Error: Invalid lacp rate \"%s\"\n", 4319 lacp_rate == NULL ? "NULL" : lacp_rate); 4320 return -EINVAL; 4321 } 4322 } 4323 } 4324 4325 if (max_bonds < 1 || max_bonds > INT_MAX) { 4326 printk(KERN_WARNING DRV_NAME 4327 ": Warning: max_bonds (%d) not in range %d-%d, so it " 4328 "was reset to BOND_DEFAULT_MAX_BONDS (%d)\n", 4329 max_bonds, 1, INT_MAX, BOND_DEFAULT_MAX_BONDS); 4330 max_bonds = BOND_DEFAULT_MAX_BONDS; 4331 } 4332 4333 if (miimon < 0) { 4334 printk(KERN_WARNING DRV_NAME 4335 ": Warning: miimon module parameter (%d), " 4336 "not in range 0-%d, so it was reset to %d\n", 4337 miimon, INT_MAX, BOND_LINK_MON_INTERV); 4338 miimon = BOND_LINK_MON_INTERV; 4339 } 4340 4341 if (updelay < 0) { 4342 printk(KERN_WARNING DRV_NAME 4343 ": Warning: updelay module parameter (%d), " 4344 "not in range 0-%d, so it was reset to 0\n", 4345 updelay, INT_MAX); 4346 updelay = 0; 4347 } 4348 4349 if (downdelay < 0) { 4350 printk(KERN_WARNING DRV_NAME 4351 ": Warning: downdelay module parameter (%d), " 4352 "not in range 0-%d, so it was reset to 0\n", 4353 downdelay, INT_MAX); 4354 downdelay = 0; 4355 } 4356 4357 if ((use_carrier != 0) && (use_carrier != 1)) { 4358 printk(KERN_WARNING DRV_NAME 4359 ": Warning: use_carrier module parameter (%d), " 4360 "not of valid value (0/1), so it was set to 1\n", 4361 use_carrier); 4362 use_carrier = 1; 4363 } 4364 4365 /* reset values for 802.3ad */ 4366 if (bond_mode == BOND_MODE_8023AD) { 4367 if (!miimon) { 4368 printk(KERN_WARNING DRV_NAME 4369 ": Warning: miimon must be specified, " 4370 "otherwise bonding will not detect link " 4371 "failure, speed and duplex which are " 4372 "essential for 802.3ad operation\n"); 4373 printk(KERN_WARNING "Forcing miimon to 100msec\n"); 4374 miimon = 100; 4375 } 4376 } 4377 4378 /* reset values for TLB/ALB */ 4379 if ((bond_mode == BOND_MODE_TLB) || 4380 (bond_mode == BOND_MODE_ALB)) { 4381 if (!miimon) { 4382 printk(KERN_WARNING DRV_NAME 4383 ": Warning: miimon must be specified, " 4384 "otherwise bonding will not detect link " 4385 "failure and link speed which are essential " 4386 "for TLB/ALB load balancing\n"); 4387 printk(KERN_WARNING "Forcing miimon to 100msec\n"); 4388 miimon = 100; 4389 } 4390 } 4391 4392 if (bond_mode == BOND_MODE_ALB) { 4393 printk(KERN_NOTICE DRV_NAME 4394 ": In ALB mode you might experience client " 4395 "disconnections upon reconnection of a link if the " 4396 "bonding module updelay parameter (%d msec) is " 4397 "incompatible with the forwarding delay time of the " 4398 "switch\n", 4399 updelay); 4400 } 4401 4402 if (!miimon) { 4403 if (updelay || downdelay) { 4404 /* just warn the user the up/down delay will have 4405 * no effect since miimon is zero... 4406 */ 4407 printk(KERN_WARNING DRV_NAME 4408 ": Warning: miimon module parameter not set " 4409 "and updelay (%d) or downdelay (%d) module " 4410 "parameter is set; updelay and downdelay have " 4411 "no effect unless miimon is set\n", 4412 updelay, downdelay); 4413 } 4414 } else { 4415 /* don't allow arp monitoring */ 4416 if (arp_interval) { 4417 printk(KERN_WARNING DRV_NAME 4418 ": Warning: miimon (%d) and arp_interval (%d) " 4419 "can't be used simultaneously, disabling ARP " 4420 "monitoring\n", 4421 miimon, arp_interval); 4422 arp_interval = 0; 4423 } 4424 4425 if ((updelay % miimon) != 0) { 4426 printk(KERN_WARNING DRV_NAME 4427 ": Warning: updelay (%d) is not a multiple " 4428 "of miimon (%d), updelay rounded to %d ms\n", 4429 updelay, miimon, (updelay / miimon) * miimon); 4430 } 4431 4432 updelay /= miimon; 4433 4434 if ((downdelay % miimon) != 0) { 4435 printk(KERN_WARNING DRV_NAME 4436 ": Warning: downdelay (%d) is not a multiple " 4437 "of miimon (%d), downdelay rounded to %d ms\n", 4438 downdelay, miimon, 4439 (downdelay / miimon) * miimon); 4440 } 4441 4442 downdelay /= miimon; 4443 } 4444 4445 if (arp_interval < 0) { 4446 printk(KERN_WARNING DRV_NAME 4447 ": Warning: arp_interval module parameter (%d) " 4448 ", not in range 0-%d, so it was reset to %d\n", 4449 arp_interval, INT_MAX, BOND_LINK_ARP_INTERV); 4450 arp_interval = BOND_LINK_ARP_INTERV; 4451 } 4452 4453 for (arp_ip_count = 0; 4454 (arp_ip_count < BOND_MAX_ARP_TARGETS) && arp_ip_target[arp_ip_count]; 4455 arp_ip_count++) { 4456 /* not complete check, but should be good enough to 4457 catch mistakes */ 4458 if (!isdigit(arp_ip_target[arp_ip_count][0])) { 4459 printk(KERN_WARNING DRV_NAME 4460 ": Warning: bad arp_ip_target module parameter " 4461 "(%s), ARP monitoring will not be performed\n", 4462 arp_ip_target[arp_ip_count]); 4463 arp_interval = 0; 4464 } else { 4465 u32 ip = in_aton(arp_ip_target[arp_ip_count]); 4466 arp_target[arp_ip_count] = ip; 4467 } 4468 } 4469 4470 if (arp_interval && !arp_ip_count) { 4471 /* don't allow arping if no arp_ip_target given... */ 4472 printk(KERN_WARNING DRV_NAME 4473 ": Warning: arp_interval module parameter (%d) " 4474 "specified without providing an arp_ip_target " 4475 "parameter, arp_interval was reset to 0\n", 4476 arp_interval); 4477 arp_interval = 0; 4478 } 4479 4480 if (miimon) { 4481 printk(KERN_INFO DRV_NAME 4482 ": MII link monitoring set to %d ms\n", 4483 miimon); 4484 } else if (arp_interval) { 4485 int i; 4486 4487 printk(KERN_INFO DRV_NAME 4488 ": ARP monitoring set to %d ms with %d target(s):", 4489 arp_interval, arp_ip_count); 4490 4491 for (i = 0; i < arp_ip_count; i++) 4492 printk (" %s", arp_ip_target[i]); 4493 4494 printk("\n"); 4495 4496 } else { 4497 /* miimon and arp_interval not set, we need one so things 4498 * work as expected, see bonding.txt for details 4499 */ 4500 printk(KERN_WARNING DRV_NAME 4501 ": Warning: either miimon or arp_interval and " 4502 "arp_ip_target module parameters must be specified, " 4503 "otherwise bonding will not detect link failures! see " 4504 "bonding.txt for details.\n"); 4505 } 4506 4507 if (primary && !USES_PRIMARY(bond_mode)) { 4508 /* currently, using a primary only makes sense 4509 * in active backup, TLB or ALB modes 4510 */ 4511 printk(KERN_WARNING DRV_NAME 4512 ": Warning: %s primary device specified but has no " 4513 "effect in %s mode\n", 4514 primary, bond_mode_name(bond_mode)); 4515 primary = NULL; 4516 } 4517 4518 /* fill params struct with the proper values */ 4519 params->mode = bond_mode; 4520 params->xmit_policy = xmit_hashtype; 4521 params->miimon = miimon; 4522 params->arp_interval = arp_interval; 4523 params->updelay = updelay; 4524 params->downdelay = downdelay; 4525 params->use_carrier = use_carrier; 4526 params->lacp_fast = lacp_fast; 4527 params->primary[0] = 0; 4528 4529 if (primary) { 4530 strncpy(params->primary, primary, IFNAMSIZ); 4531 params->primary[IFNAMSIZ - 1] = 0; 4532 } 4533 4534 memcpy(params->arp_targets, arp_target, sizeof(arp_target)); 4535 4536 return 0; 4537 } 4538 4539 /* Create a new bond based on the specified name and bonding parameters. 4540 * Caller must NOT hold rtnl_lock; we need to release it here before we 4541 * set up our sysfs entries. 4542 */ 4543 int bond_create(char *name, struct bond_params *params, struct bonding **newbond) 4544 { 4545 struct net_device *bond_dev; 4546 int res; 4547 4548 rtnl_lock(); 4549 bond_dev = alloc_netdev(sizeof(struct bonding), name, ether_setup); 4550 if (!bond_dev) { 4551 printk(KERN_ERR DRV_NAME 4552 ": %s: eek! can't alloc netdev!\n", 4553 name); 4554 res = -ENOMEM; 4555 goto out_rtnl; 4556 } 4557 4558 /* bond_init() must be called after dev_alloc_name() (for the 4559 * /proc files), but before register_netdevice(), because we 4560 * need to set function pointers. 4561 */ 4562 4563 res = bond_init(bond_dev, params); 4564 if (res < 0) { 4565 goto out_netdev; 4566 } 4567 4568 SET_MODULE_OWNER(bond_dev); 4569 4570 res = register_netdevice(bond_dev); 4571 if (res < 0) { 4572 goto out_bond; 4573 } 4574 if (newbond) 4575 *newbond = bond_dev->priv; 4576 4577 netif_carrier_off(bond_dev); 4578 4579 rtnl_unlock(); /* allows sysfs registration of net device */ 4580 res = bond_create_sysfs_entry(bond_dev->priv); 4581 goto done; 4582 out_bond: 4583 bond_deinit(bond_dev); 4584 out_netdev: 4585 free_netdev(bond_dev); 4586 out_rtnl: 4587 rtnl_unlock(); 4588 done: 4589 return res; 4590 } 4591 4592 static int __init bonding_init(void) 4593 { 4594 int i; 4595 int res; 4596 char new_bond_name[8]; /* Enough room for 999 bonds at init. */ 4597 4598 printk(KERN_INFO "%s", version); 4599 4600 res = bond_check_params(&bonding_defaults); 4601 if (res) { 4602 goto out; 4603 } 4604 4605 #ifdef CONFIG_PROC_FS 4606 bond_create_proc_dir(); 4607 #endif 4608 for (i = 0; i < max_bonds; i++) { 4609 sprintf(new_bond_name, "bond%d",i); 4610 res = bond_create(new_bond_name,&bonding_defaults, NULL); 4611 if (res) 4612 goto err; 4613 } 4614 4615 res = bond_create_sysfs(); 4616 if (res) 4617 goto err; 4618 4619 register_netdevice_notifier(&bond_netdev_notifier); 4620 register_inetaddr_notifier(&bond_inetaddr_notifier); 4621 4622 goto out; 4623 err: 4624 rtnl_lock(); 4625 bond_free_all(); 4626 bond_destroy_sysfs(); 4627 rtnl_unlock(); 4628 out: 4629 return res; 4630 4631 } 4632 4633 static void __exit bonding_exit(void) 4634 { 4635 unregister_netdevice_notifier(&bond_netdev_notifier); 4636 unregister_inetaddr_notifier(&bond_inetaddr_notifier); 4637 4638 rtnl_lock(); 4639 bond_free_all(); 4640 bond_destroy_sysfs(); 4641 rtnl_unlock(); 4642 } 4643 4644 module_init(bonding_init); 4645 module_exit(bonding_exit); 4646 MODULE_LICENSE("GPL"); 4647 MODULE_VERSION(DRV_VERSION); 4648 MODULE_DESCRIPTION(DRV_DESCRIPTION ", v" DRV_VERSION); 4649 MODULE_AUTHOR("Thomas Davis, tadavis@lbl.gov and many others"); 4650 MODULE_SUPPORTED_DEVICE("most ethernet devices"); 4651 4652 /* 4653 * Local variables: 4654 * c-indent-level: 8 4655 * c-basic-offset: 8 4656 * tab-width: 8 4657 * End: 4658 */ 4659 4660