1 /* 2 * originally based on the dummy device. 3 * 4 * Copyright 1999, Thomas Davis, tadavis@lbl.gov. 5 * Licensed under the GPL. Based on dummy.c, and eql.c devices. 6 * 7 * bonding.c: an Ethernet Bonding driver 8 * 9 * This is useful to talk to a Cisco EtherChannel compatible equipment: 10 * Cisco 5500 11 * Sun Trunking (Solaris) 12 * Alteon AceDirector Trunks 13 * Linux Bonding 14 * and probably many L2 switches ... 15 * 16 * How it works: 17 * ifconfig bond0 ipaddress netmask up 18 * will setup a network device, with an ip address. No mac address 19 * will be assigned at this time. The hw mac address will come from 20 * the first slave bonded to the channel. All slaves will then use 21 * this hw mac address. 22 * 23 * ifconfig bond0 down 24 * will release all slaves, marking them as down. 25 * 26 * ifenslave bond0 eth0 27 * will attach eth0 to bond0 as a slave. eth0 hw mac address will either 28 * a: be used as initial mac address 29 * b: if a hw mac address already is there, eth0's hw mac address 30 * will then be set from bond0. 31 * 32 */ 33 34 //#define BONDING_DEBUG 1 35 36 #include <linux/kernel.h> 37 #include <linux/module.h> 38 #include <linux/types.h> 39 #include <linux/fcntl.h> 40 #include <linux/interrupt.h> 41 #include <linux/ptrace.h> 42 #include <linux/ioport.h> 43 #include <linux/in.h> 44 #include <net/ip.h> 45 #include <linux/ip.h> 46 #include <linux/tcp.h> 47 #include <linux/udp.h> 48 #include <linux/slab.h> 49 #include <linux/string.h> 50 #include <linux/init.h> 51 #include <linux/timer.h> 52 #include <linux/socket.h> 53 #include <linux/ctype.h> 54 #include <linux/inet.h> 55 #include <linux/bitops.h> 56 #include <asm/system.h> 57 #include <asm/io.h> 58 #include <asm/dma.h> 59 #include <asm/uaccess.h> 60 #include <linux/errno.h> 61 #include <linux/netdevice.h> 62 #include <linux/inetdevice.h> 63 #include <linux/igmp.h> 64 #include <linux/etherdevice.h> 65 #include <linux/skbuff.h> 66 #include <net/sock.h> 67 #include <linux/rtnetlink.h> 68 #include <linux/proc_fs.h> 69 #include <linux/seq_file.h> 70 #include <linux/smp.h> 71 #include <linux/if_ether.h> 72 #include <net/arp.h> 73 #include <linux/mii.h> 74 #include <linux/ethtool.h> 75 #include <linux/if_vlan.h> 76 #include <linux/if_bonding.h> 77 #include <net/route.h> 78 #include <net/net_namespace.h> 79 #include "bonding.h" 80 #include "bond_3ad.h" 81 #include "bond_alb.h" 82 83 /*---------------------------- Module parameters ----------------------------*/ 84 85 /* monitor all links that often (in milliseconds). <=0 disables monitoring */ 86 #define BOND_LINK_MON_INTERV 0 87 #define BOND_LINK_ARP_INTERV 0 88 89 static int max_bonds = BOND_DEFAULT_MAX_BONDS; 90 static int miimon = BOND_LINK_MON_INTERV; 91 static int updelay = 0; 92 static int downdelay = 0; 93 static int use_carrier = 1; 94 static char *mode = NULL; 95 static char *primary = NULL; 96 static char *lacp_rate = NULL; 97 static char *xmit_hash_policy = NULL; 98 static int arp_interval = BOND_LINK_ARP_INTERV; 99 static char *arp_ip_target[BOND_MAX_ARP_TARGETS] = { NULL, }; 100 static char *arp_validate = NULL; 101 struct bond_params bonding_defaults; 102 103 module_param(max_bonds, int, 0); 104 MODULE_PARM_DESC(max_bonds, "Max number of bonded devices"); 105 module_param(miimon, int, 0); 106 MODULE_PARM_DESC(miimon, "Link check interval in milliseconds"); 107 module_param(updelay, int, 0); 108 MODULE_PARM_DESC(updelay, "Delay before considering link up, in milliseconds"); 109 module_param(downdelay, int, 0); 110 MODULE_PARM_DESC(downdelay, "Delay before considering link down, " 111 "in milliseconds"); 112 module_param(use_carrier, int, 0); 113 MODULE_PARM_DESC(use_carrier, "Use netif_carrier_ok (vs MII ioctls) in miimon; " 114 "0 for off, 1 for on (default)"); 115 module_param(mode, charp, 0); 116 MODULE_PARM_DESC(mode, "Mode of operation : 0 for balance-rr, " 117 "1 for active-backup, 2 for balance-xor, " 118 "3 for broadcast, 4 for 802.3ad, 5 for balance-tlb, " 119 "6 for balance-alb"); 120 module_param(primary, charp, 0); 121 MODULE_PARM_DESC(primary, "Primary network device to use"); 122 module_param(lacp_rate, charp, 0); 123 MODULE_PARM_DESC(lacp_rate, "LACPDU tx rate to request from 802.3ad partner " 124 "(slow/fast)"); 125 module_param(xmit_hash_policy, charp, 0); 126 MODULE_PARM_DESC(xmit_hash_policy, "XOR hashing method: 0 for layer 2 (default)" 127 ", 1 for layer 3+4"); 128 module_param(arp_interval, int, 0); 129 MODULE_PARM_DESC(arp_interval, "arp interval in milliseconds"); 130 module_param_array(arp_ip_target, charp, NULL, 0); 131 MODULE_PARM_DESC(arp_ip_target, "arp targets in n.n.n.n form"); 132 module_param(arp_validate, charp, 0); 133 MODULE_PARM_DESC(arp_validate, "validate src/dst of ARP probes: none (default), active, backup or all"); 134 135 /*----------------------------- Global variables ----------------------------*/ 136 137 static const char * const version = 138 DRV_DESCRIPTION ": v" DRV_VERSION " (" DRV_RELDATE ")\n"; 139 140 LIST_HEAD(bond_dev_list); 141 142 #ifdef CONFIG_PROC_FS 143 static struct proc_dir_entry *bond_proc_dir = NULL; 144 #endif 145 146 extern struct rw_semaphore bonding_rwsem; 147 static __be32 arp_target[BOND_MAX_ARP_TARGETS] = { 0, } ; 148 static int arp_ip_count = 0; 149 static int bond_mode = BOND_MODE_ROUNDROBIN; 150 static int xmit_hashtype= BOND_XMIT_POLICY_LAYER2; 151 static int lacp_fast = 0; 152 153 154 struct bond_parm_tbl bond_lacp_tbl[] = { 155 { "slow", AD_LACP_SLOW}, 156 { "fast", AD_LACP_FAST}, 157 { NULL, -1}, 158 }; 159 160 struct bond_parm_tbl bond_mode_tbl[] = { 161 { "balance-rr", BOND_MODE_ROUNDROBIN}, 162 { "active-backup", BOND_MODE_ACTIVEBACKUP}, 163 { "balance-xor", BOND_MODE_XOR}, 164 { "broadcast", BOND_MODE_BROADCAST}, 165 { "802.3ad", BOND_MODE_8023AD}, 166 { "balance-tlb", BOND_MODE_TLB}, 167 { "balance-alb", BOND_MODE_ALB}, 168 { NULL, -1}, 169 }; 170 171 struct bond_parm_tbl xmit_hashtype_tbl[] = { 172 { "layer2", BOND_XMIT_POLICY_LAYER2}, 173 { "layer3+4", BOND_XMIT_POLICY_LAYER34}, 174 { NULL, -1}, 175 }; 176 177 struct bond_parm_tbl arp_validate_tbl[] = { 178 { "none", BOND_ARP_VALIDATE_NONE}, 179 { "active", BOND_ARP_VALIDATE_ACTIVE}, 180 { "backup", BOND_ARP_VALIDATE_BACKUP}, 181 { "all", BOND_ARP_VALIDATE_ALL}, 182 { NULL, -1}, 183 }; 184 185 /*-------------------------- Forward declarations ---------------------------*/ 186 187 static void bond_send_gratuitous_arp(struct bonding *bond); 188 189 /*---------------------------- General routines -----------------------------*/ 190 191 static const char *bond_mode_name(int mode) 192 { 193 switch (mode) { 194 case BOND_MODE_ROUNDROBIN : 195 return "load balancing (round-robin)"; 196 case BOND_MODE_ACTIVEBACKUP : 197 return "fault-tolerance (active-backup)"; 198 case BOND_MODE_XOR : 199 return "load balancing (xor)"; 200 case BOND_MODE_BROADCAST : 201 return "fault-tolerance (broadcast)"; 202 case BOND_MODE_8023AD: 203 return "IEEE 802.3ad Dynamic link aggregation"; 204 case BOND_MODE_TLB: 205 return "transmit load balancing"; 206 case BOND_MODE_ALB: 207 return "adaptive load balancing"; 208 default: 209 return "unknown"; 210 } 211 } 212 213 /*---------------------------------- VLAN -----------------------------------*/ 214 215 /** 216 * bond_add_vlan - add a new vlan id on bond 217 * @bond: bond that got the notification 218 * @vlan_id: the vlan id to add 219 * 220 * Returns -ENOMEM if allocation failed. 221 */ 222 static int bond_add_vlan(struct bonding *bond, unsigned short vlan_id) 223 { 224 struct vlan_entry *vlan; 225 226 dprintk("bond: %s, vlan id %d\n", 227 (bond ? bond->dev->name: "None"), vlan_id); 228 229 vlan = kmalloc(sizeof(struct vlan_entry), GFP_KERNEL); 230 if (!vlan) { 231 return -ENOMEM; 232 } 233 234 INIT_LIST_HEAD(&vlan->vlan_list); 235 vlan->vlan_id = vlan_id; 236 vlan->vlan_ip = 0; 237 238 write_lock_bh(&bond->lock); 239 240 list_add_tail(&vlan->vlan_list, &bond->vlan_list); 241 242 write_unlock_bh(&bond->lock); 243 244 dprintk("added VLAN ID %d on bond %s\n", vlan_id, bond->dev->name); 245 246 return 0; 247 } 248 249 /** 250 * bond_del_vlan - delete a vlan id from bond 251 * @bond: bond that got the notification 252 * @vlan_id: the vlan id to delete 253 * 254 * returns -ENODEV if @vlan_id was not found in @bond. 255 */ 256 static int bond_del_vlan(struct bonding *bond, unsigned short vlan_id) 257 { 258 struct vlan_entry *vlan, *next; 259 int res = -ENODEV; 260 261 dprintk("bond: %s, vlan id %d\n", bond->dev->name, vlan_id); 262 263 write_lock_bh(&bond->lock); 264 265 list_for_each_entry_safe(vlan, next, &bond->vlan_list, vlan_list) { 266 if (vlan->vlan_id == vlan_id) { 267 list_del(&vlan->vlan_list); 268 269 if ((bond->params.mode == BOND_MODE_TLB) || 270 (bond->params.mode == BOND_MODE_ALB)) { 271 bond_alb_clear_vlan(bond, vlan_id); 272 } 273 274 dprintk("removed VLAN ID %d from bond %s\n", vlan_id, 275 bond->dev->name); 276 277 kfree(vlan); 278 279 if (list_empty(&bond->vlan_list) && 280 (bond->slave_cnt == 0)) { 281 /* Last VLAN removed and no slaves, so 282 * restore block on adding VLANs. This will 283 * be removed once new slaves that are not 284 * VLAN challenged will be added. 285 */ 286 bond->dev->features |= NETIF_F_VLAN_CHALLENGED; 287 } 288 289 res = 0; 290 goto out; 291 } 292 } 293 294 dprintk("couldn't find VLAN ID %d in bond %s\n", vlan_id, 295 bond->dev->name); 296 297 out: 298 write_unlock_bh(&bond->lock); 299 return res; 300 } 301 302 /** 303 * bond_has_challenged_slaves 304 * @bond: the bond we're working on 305 * 306 * Searches the slave list. Returns 1 if a vlan challenged slave 307 * was found, 0 otherwise. 308 * 309 * Assumes bond->lock is held. 310 */ 311 static int bond_has_challenged_slaves(struct bonding *bond) 312 { 313 struct slave *slave; 314 int i; 315 316 bond_for_each_slave(bond, slave, i) { 317 if (slave->dev->features & NETIF_F_VLAN_CHALLENGED) { 318 dprintk("found VLAN challenged slave - %s\n", 319 slave->dev->name); 320 return 1; 321 } 322 } 323 324 dprintk("no VLAN challenged slaves found\n"); 325 return 0; 326 } 327 328 /** 329 * bond_next_vlan - safely skip to the next item in the vlans list. 330 * @bond: the bond we're working on 331 * @curr: item we're advancing from 332 * 333 * Returns %NULL if list is empty, bond->next_vlan if @curr is %NULL, 334 * or @curr->next otherwise (even if it is @curr itself again). 335 * 336 * Caller must hold bond->lock 337 */ 338 struct vlan_entry *bond_next_vlan(struct bonding *bond, struct vlan_entry *curr) 339 { 340 struct vlan_entry *next, *last; 341 342 if (list_empty(&bond->vlan_list)) { 343 return NULL; 344 } 345 346 if (!curr) { 347 next = list_entry(bond->vlan_list.next, 348 struct vlan_entry, vlan_list); 349 } else { 350 last = list_entry(bond->vlan_list.prev, 351 struct vlan_entry, vlan_list); 352 if (last == curr) { 353 next = list_entry(bond->vlan_list.next, 354 struct vlan_entry, vlan_list); 355 } else { 356 next = list_entry(curr->vlan_list.next, 357 struct vlan_entry, vlan_list); 358 } 359 } 360 361 return next; 362 } 363 364 /** 365 * bond_dev_queue_xmit - Prepare skb for xmit. 366 * 367 * @bond: bond device that got this skb for tx. 368 * @skb: hw accel VLAN tagged skb to transmit 369 * @slave_dev: slave that is supposed to xmit this skbuff 370 * 371 * When the bond gets an skb to transmit that is 372 * already hardware accelerated VLAN tagged, and it 373 * needs to relay this skb to a slave that is not 374 * hw accel capable, the skb needs to be "unaccelerated", 375 * i.e. strip the hwaccel tag and re-insert it as part 376 * of the payload. 377 */ 378 int bond_dev_queue_xmit(struct bonding *bond, struct sk_buff *skb, struct net_device *slave_dev) 379 { 380 unsigned short vlan_id; 381 382 if (!list_empty(&bond->vlan_list) && 383 !(slave_dev->features & NETIF_F_HW_VLAN_TX) && 384 vlan_get_tag(skb, &vlan_id) == 0) { 385 skb->dev = slave_dev; 386 skb = vlan_put_tag(skb, vlan_id); 387 if (!skb) { 388 /* vlan_put_tag() frees the skb in case of error, 389 * so return success here so the calling functions 390 * won't attempt to free is again. 391 */ 392 return 0; 393 } 394 } else { 395 skb->dev = slave_dev; 396 } 397 398 skb->priority = 1; 399 dev_queue_xmit(skb); 400 401 return 0; 402 } 403 404 /* 405 * In the following 3 functions, bond_vlan_rx_register(), bond_vlan_rx_add_vid 406 * and bond_vlan_rx_kill_vid, We don't protect the slave list iteration with a 407 * lock because: 408 * a. This operation is performed in IOCTL context, 409 * b. The operation is protected by the RTNL semaphore in the 8021q code, 410 * c. Holding a lock with BH disabled while directly calling a base driver 411 * entry point is generally a BAD idea. 412 * 413 * The design of synchronization/protection for this operation in the 8021q 414 * module is good for one or more VLAN devices over a single physical device 415 * and cannot be extended for a teaming solution like bonding, so there is a 416 * potential race condition here where a net device from the vlan group might 417 * be referenced (either by a base driver or the 8021q code) while it is being 418 * removed from the system. However, it turns out we're not making matters 419 * worse, and if it works for regular VLAN usage it will work here too. 420 */ 421 422 /** 423 * bond_vlan_rx_register - Propagates registration to slaves 424 * @bond_dev: bonding net device that got called 425 * @grp: vlan group being registered 426 */ 427 static void bond_vlan_rx_register(struct net_device *bond_dev, struct vlan_group *grp) 428 { 429 struct bonding *bond = bond_dev->priv; 430 struct slave *slave; 431 int i; 432 433 bond->vlgrp = grp; 434 435 bond_for_each_slave(bond, slave, i) { 436 struct net_device *slave_dev = slave->dev; 437 438 if ((slave_dev->features & NETIF_F_HW_VLAN_RX) && 439 slave_dev->vlan_rx_register) { 440 slave_dev->vlan_rx_register(slave_dev, grp); 441 } 442 } 443 } 444 445 /** 446 * bond_vlan_rx_add_vid - Propagates adding an id to slaves 447 * @bond_dev: bonding net device that got called 448 * @vid: vlan id being added 449 */ 450 static void bond_vlan_rx_add_vid(struct net_device *bond_dev, uint16_t vid) 451 { 452 struct bonding *bond = bond_dev->priv; 453 struct slave *slave; 454 int i, res; 455 456 bond_for_each_slave(bond, slave, i) { 457 struct net_device *slave_dev = slave->dev; 458 459 if ((slave_dev->features & NETIF_F_HW_VLAN_FILTER) && 460 slave_dev->vlan_rx_add_vid) { 461 slave_dev->vlan_rx_add_vid(slave_dev, vid); 462 } 463 } 464 465 res = bond_add_vlan(bond, vid); 466 if (res) { 467 printk(KERN_ERR DRV_NAME 468 ": %s: Error: Failed to add vlan id %d\n", 469 bond_dev->name, vid); 470 } 471 } 472 473 /** 474 * bond_vlan_rx_kill_vid - Propagates deleting an id to slaves 475 * @bond_dev: bonding net device that got called 476 * @vid: vlan id being removed 477 */ 478 static void bond_vlan_rx_kill_vid(struct net_device *bond_dev, uint16_t vid) 479 { 480 struct bonding *bond = bond_dev->priv; 481 struct slave *slave; 482 struct net_device *vlan_dev; 483 int i, res; 484 485 bond_for_each_slave(bond, slave, i) { 486 struct net_device *slave_dev = slave->dev; 487 488 if ((slave_dev->features & NETIF_F_HW_VLAN_FILTER) && 489 slave_dev->vlan_rx_kill_vid) { 490 /* Save and then restore vlan_dev in the grp array, 491 * since the slave's driver might clear it. 492 */ 493 vlan_dev = vlan_group_get_device(bond->vlgrp, vid); 494 slave_dev->vlan_rx_kill_vid(slave_dev, vid); 495 vlan_group_set_device(bond->vlgrp, vid, vlan_dev); 496 } 497 } 498 499 res = bond_del_vlan(bond, vid); 500 if (res) { 501 printk(KERN_ERR DRV_NAME 502 ": %s: Error: Failed to remove vlan id %d\n", 503 bond_dev->name, vid); 504 } 505 } 506 507 static void bond_add_vlans_on_slave(struct bonding *bond, struct net_device *slave_dev) 508 { 509 struct vlan_entry *vlan; 510 511 write_lock_bh(&bond->lock); 512 513 if (list_empty(&bond->vlan_list)) { 514 goto out; 515 } 516 517 if ((slave_dev->features & NETIF_F_HW_VLAN_RX) && 518 slave_dev->vlan_rx_register) { 519 slave_dev->vlan_rx_register(slave_dev, bond->vlgrp); 520 } 521 522 if (!(slave_dev->features & NETIF_F_HW_VLAN_FILTER) || 523 !(slave_dev->vlan_rx_add_vid)) { 524 goto out; 525 } 526 527 list_for_each_entry(vlan, &bond->vlan_list, vlan_list) { 528 slave_dev->vlan_rx_add_vid(slave_dev, vlan->vlan_id); 529 } 530 531 out: 532 write_unlock_bh(&bond->lock); 533 } 534 535 static void bond_del_vlans_from_slave(struct bonding *bond, struct net_device *slave_dev) 536 { 537 struct vlan_entry *vlan; 538 struct net_device *vlan_dev; 539 540 write_lock_bh(&bond->lock); 541 542 if (list_empty(&bond->vlan_list)) { 543 goto out; 544 } 545 546 if (!(slave_dev->features & NETIF_F_HW_VLAN_FILTER) || 547 !(slave_dev->vlan_rx_kill_vid)) { 548 goto unreg; 549 } 550 551 list_for_each_entry(vlan, &bond->vlan_list, vlan_list) { 552 /* Save and then restore vlan_dev in the grp array, 553 * since the slave's driver might clear it. 554 */ 555 vlan_dev = vlan_group_get_device(bond->vlgrp, vlan->vlan_id); 556 slave_dev->vlan_rx_kill_vid(slave_dev, vlan->vlan_id); 557 vlan_group_set_device(bond->vlgrp, vlan->vlan_id, vlan_dev); 558 } 559 560 unreg: 561 if ((slave_dev->features & NETIF_F_HW_VLAN_RX) && 562 slave_dev->vlan_rx_register) { 563 slave_dev->vlan_rx_register(slave_dev, NULL); 564 } 565 566 out: 567 write_unlock_bh(&bond->lock); 568 } 569 570 /*------------------------------- Link status -------------------------------*/ 571 572 /* 573 * Set the carrier state for the master according to the state of its 574 * slaves. If any slaves are up, the master is up. In 802.3ad mode, 575 * do special 802.3ad magic. 576 * 577 * Returns zero if carrier state does not change, nonzero if it does. 578 */ 579 static int bond_set_carrier(struct bonding *bond) 580 { 581 struct slave *slave; 582 int i; 583 584 if (bond->slave_cnt == 0) 585 goto down; 586 587 if (bond->params.mode == BOND_MODE_8023AD) 588 return bond_3ad_set_carrier(bond); 589 590 bond_for_each_slave(bond, slave, i) { 591 if (slave->link == BOND_LINK_UP) { 592 if (!netif_carrier_ok(bond->dev)) { 593 netif_carrier_on(bond->dev); 594 return 1; 595 } 596 return 0; 597 } 598 } 599 600 down: 601 if (netif_carrier_ok(bond->dev)) { 602 netif_carrier_off(bond->dev); 603 return 1; 604 } 605 return 0; 606 } 607 608 /* 609 * Get link speed and duplex from the slave's base driver 610 * using ethtool. If for some reason the call fails or the 611 * values are invalid, fake speed and duplex to 100/Full 612 * and return error. 613 */ 614 static int bond_update_speed_duplex(struct slave *slave) 615 { 616 struct net_device *slave_dev = slave->dev; 617 struct ethtool_cmd etool; 618 int res; 619 620 /* Fake speed and duplex */ 621 slave->speed = SPEED_100; 622 slave->duplex = DUPLEX_FULL; 623 624 if (!slave_dev->ethtool_ops || !slave_dev->ethtool_ops->get_settings) 625 return -1; 626 627 res = slave_dev->ethtool_ops->get_settings(slave_dev, &etool); 628 if (res < 0) 629 return -1; 630 631 switch (etool.speed) { 632 case SPEED_10: 633 case SPEED_100: 634 case SPEED_1000: 635 case SPEED_10000: 636 break; 637 default: 638 return -1; 639 } 640 641 switch (etool.duplex) { 642 case DUPLEX_FULL: 643 case DUPLEX_HALF: 644 break; 645 default: 646 return -1; 647 } 648 649 slave->speed = etool.speed; 650 slave->duplex = etool.duplex; 651 652 return 0; 653 } 654 655 /* 656 * if <dev> supports MII link status reporting, check its link status. 657 * 658 * We either do MII/ETHTOOL ioctls, or check netif_carrier_ok(), 659 * depening upon the setting of the use_carrier parameter. 660 * 661 * Return either BMSR_LSTATUS, meaning that the link is up (or we 662 * can't tell and just pretend it is), or 0, meaning that the link is 663 * down. 664 * 665 * If reporting is non-zero, instead of faking link up, return -1 if 666 * both ETHTOOL and MII ioctls fail (meaning the device does not 667 * support them). If use_carrier is set, return whatever it says. 668 * It'd be nice if there was a good way to tell if a driver supports 669 * netif_carrier, but there really isn't. 670 */ 671 static int bond_check_dev_link(struct bonding *bond, struct net_device *slave_dev, int reporting) 672 { 673 static int (* ioctl)(struct net_device *, struct ifreq *, int); 674 struct ifreq ifr; 675 struct mii_ioctl_data *mii; 676 677 if (bond->params.use_carrier) { 678 return netif_carrier_ok(slave_dev) ? BMSR_LSTATUS : 0; 679 } 680 681 ioctl = slave_dev->do_ioctl; 682 if (ioctl) { 683 /* TODO: set pointer to correct ioctl on a per team member */ 684 /* bases to make this more efficient. that is, once */ 685 /* we determine the correct ioctl, we will always */ 686 /* call it and not the others for that team */ 687 /* member. */ 688 689 /* 690 * We cannot assume that SIOCGMIIPHY will also read a 691 * register; not all network drivers (e.g., e100) 692 * support that. 693 */ 694 695 /* Yes, the mii is overlaid on the ifreq.ifr_ifru */ 696 strncpy(ifr.ifr_name, slave_dev->name, IFNAMSIZ); 697 mii = if_mii(&ifr); 698 if (IOCTL(slave_dev, &ifr, SIOCGMIIPHY) == 0) { 699 mii->reg_num = MII_BMSR; 700 if (IOCTL(slave_dev, &ifr, SIOCGMIIREG) == 0) { 701 return (mii->val_out & BMSR_LSTATUS); 702 } 703 } 704 } 705 706 /* 707 * Some drivers cache ETHTOOL_GLINK for a period of time so we only 708 * attempt to get link status from it if the above MII ioctls fail. 709 */ 710 if (slave_dev->ethtool_ops) { 711 if (slave_dev->ethtool_ops->get_link) { 712 u32 link; 713 714 link = slave_dev->ethtool_ops->get_link(slave_dev); 715 716 return link ? BMSR_LSTATUS : 0; 717 } 718 } 719 720 /* 721 * If reporting, report that either there's no dev->do_ioctl, 722 * or both SIOCGMIIREG and get_link failed (meaning that we 723 * cannot report link status). If not reporting, pretend 724 * we're ok. 725 */ 726 return (reporting ? -1 : BMSR_LSTATUS); 727 } 728 729 /*----------------------------- Multicast list ------------------------------*/ 730 731 /* 732 * Returns 0 if dmi1 and dmi2 are the same, non-0 otherwise 733 */ 734 static inline int bond_is_dmi_same(struct dev_mc_list *dmi1, struct dev_mc_list *dmi2) 735 { 736 return memcmp(dmi1->dmi_addr, dmi2->dmi_addr, dmi1->dmi_addrlen) == 0 && 737 dmi1->dmi_addrlen == dmi2->dmi_addrlen; 738 } 739 740 /* 741 * returns dmi entry if found, NULL otherwise 742 */ 743 static struct dev_mc_list *bond_mc_list_find_dmi(struct dev_mc_list *dmi, struct dev_mc_list *mc_list) 744 { 745 struct dev_mc_list *idmi; 746 747 for (idmi = mc_list; idmi; idmi = idmi->next) { 748 if (bond_is_dmi_same(dmi, idmi)) { 749 return idmi; 750 } 751 } 752 753 return NULL; 754 } 755 756 /* 757 * Push the promiscuity flag down to appropriate slaves 758 */ 759 static void bond_set_promiscuity(struct bonding *bond, int inc) 760 { 761 if (USES_PRIMARY(bond->params.mode)) { 762 /* write lock already acquired */ 763 if (bond->curr_active_slave) { 764 dev_set_promiscuity(bond->curr_active_slave->dev, inc); 765 } 766 } else { 767 struct slave *slave; 768 int i; 769 bond_for_each_slave(bond, slave, i) { 770 dev_set_promiscuity(slave->dev, inc); 771 } 772 } 773 } 774 775 /* 776 * Push the allmulti flag down to all slaves 777 */ 778 static void bond_set_allmulti(struct bonding *bond, int inc) 779 { 780 if (USES_PRIMARY(bond->params.mode)) { 781 /* write lock already acquired */ 782 if (bond->curr_active_slave) { 783 dev_set_allmulti(bond->curr_active_slave->dev, inc); 784 } 785 } else { 786 struct slave *slave; 787 int i; 788 bond_for_each_slave(bond, slave, i) { 789 dev_set_allmulti(slave->dev, inc); 790 } 791 } 792 } 793 794 /* 795 * Add a Multicast address to slaves 796 * according to mode 797 */ 798 static void bond_mc_add(struct bonding *bond, void *addr, int alen) 799 { 800 if (USES_PRIMARY(bond->params.mode)) { 801 /* write lock already acquired */ 802 if (bond->curr_active_slave) { 803 dev_mc_add(bond->curr_active_slave->dev, addr, alen, 0); 804 } 805 } else { 806 struct slave *slave; 807 int i; 808 bond_for_each_slave(bond, slave, i) { 809 dev_mc_add(slave->dev, addr, alen, 0); 810 } 811 } 812 } 813 814 /* 815 * Remove a multicast address from slave 816 * according to mode 817 */ 818 static void bond_mc_delete(struct bonding *bond, void *addr, int alen) 819 { 820 if (USES_PRIMARY(bond->params.mode)) { 821 /* write lock already acquired */ 822 if (bond->curr_active_slave) { 823 dev_mc_delete(bond->curr_active_slave->dev, addr, alen, 0); 824 } 825 } else { 826 struct slave *slave; 827 int i; 828 bond_for_each_slave(bond, slave, i) { 829 dev_mc_delete(slave->dev, addr, alen, 0); 830 } 831 } 832 } 833 834 835 /* 836 * Retrieve the list of registered multicast addresses for the bonding 837 * device and retransmit an IGMP JOIN request to the current active 838 * slave. 839 */ 840 static void bond_resend_igmp_join_requests(struct bonding *bond) 841 { 842 struct in_device *in_dev; 843 struct ip_mc_list *im; 844 845 rcu_read_lock(); 846 in_dev = __in_dev_get_rcu(bond->dev); 847 if (in_dev) { 848 for (im = in_dev->mc_list; im; im = im->next) { 849 ip_mc_rejoin_group(im); 850 } 851 } 852 853 rcu_read_unlock(); 854 } 855 856 /* 857 * Totally destroys the mc_list in bond 858 */ 859 static void bond_mc_list_destroy(struct bonding *bond) 860 { 861 struct dev_mc_list *dmi; 862 863 dmi = bond->mc_list; 864 while (dmi) { 865 bond->mc_list = dmi->next; 866 kfree(dmi); 867 dmi = bond->mc_list; 868 } 869 bond->mc_list = NULL; 870 } 871 872 /* 873 * Copy all the Multicast addresses from src to the bonding device dst 874 */ 875 static int bond_mc_list_copy(struct dev_mc_list *mc_list, struct bonding *bond, 876 gfp_t gfp_flag) 877 { 878 struct dev_mc_list *dmi, *new_dmi; 879 880 for (dmi = mc_list; dmi; dmi = dmi->next) { 881 new_dmi = kmalloc(sizeof(struct dev_mc_list), gfp_flag); 882 883 if (!new_dmi) { 884 /* FIXME: Potential memory leak !!! */ 885 return -ENOMEM; 886 } 887 888 new_dmi->next = bond->mc_list; 889 bond->mc_list = new_dmi; 890 new_dmi->dmi_addrlen = dmi->dmi_addrlen; 891 memcpy(new_dmi->dmi_addr, dmi->dmi_addr, dmi->dmi_addrlen); 892 new_dmi->dmi_users = dmi->dmi_users; 893 new_dmi->dmi_gusers = dmi->dmi_gusers; 894 } 895 896 return 0; 897 } 898 899 /* 900 * flush all members of flush->mc_list from device dev->mc_list 901 */ 902 static void bond_mc_list_flush(struct net_device *bond_dev, struct net_device *slave_dev) 903 { 904 struct bonding *bond = bond_dev->priv; 905 struct dev_mc_list *dmi; 906 907 for (dmi = bond_dev->mc_list; dmi; dmi = dmi->next) { 908 dev_mc_delete(slave_dev, dmi->dmi_addr, dmi->dmi_addrlen, 0); 909 } 910 911 if (bond->params.mode == BOND_MODE_8023AD) { 912 /* del lacpdu mc addr from mc list */ 913 u8 lacpdu_multicast[ETH_ALEN] = MULTICAST_LACPDU_ADDR; 914 915 dev_mc_delete(slave_dev, lacpdu_multicast, ETH_ALEN, 0); 916 } 917 } 918 919 /*--------------------------- Active slave change ---------------------------*/ 920 921 /* 922 * Update the mc list and multicast-related flags for the new and 923 * old active slaves (if any) according to the multicast mode, and 924 * promiscuous flags unconditionally. 925 */ 926 static void bond_mc_swap(struct bonding *bond, struct slave *new_active, struct slave *old_active) 927 { 928 struct dev_mc_list *dmi; 929 930 if (!USES_PRIMARY(bond->params.mode)) { 931 /* nothing to do - mc list is already up-to-date on 932 * all slaves 933 */ 934 return; 935 } 936 937 if (old_active) { 938 if (bond->dev->flags & IFF_PROMISC) { 939 dev_set_promiscuity(old_active->dev, -1); 940 } 941 942 if (bond->dev->flags & IFF_ALLMULTI) { 943 dev_set_allmulti(old_active->dev, -1); 944 } 945 946 for (dmi = bond->dev->mc_list; dmi; dmi = dmi->next) { 947 dev_mc_delete(old_active->dev, dmi->dmi_addr, dmi->dmi_addrlen, 0); 948 } 949 } 950 951 if (new_active) { 952 if (bond->dev->flags & IFF_PROMISC) { 953 dev_set_promiscuity(new_active->dev, 1); 954 } 955 956 if (bond->dev->flags & IFF_ALLMULTI) { 957 dev_set_allmulti(new_active->dev, 1); 958 } 959 960 for (dmi = bond->dev->mc_list; dmi; dmi = dmi->next) { 961 dev_mc_add(new_active->dev, dmi->dmi_addr, dmi->dmi_addrlen, 0); 962 } 963 bond_resend_igmp_join_requests(bond); 964 } 965 } 966 967 /** 968 * find_best_interface - select the best available slave to be the active one 969 * @bond: our bonding struct 970 * 971 * Warning: Caller must hold curr_slave_lock for writing. 972 */ 973 static struct slave *bond_find_best_slave(struct bonding *bond) 974 { 975 struct slave *new_active, *old_active; 976 struct slave *bestslave = NULL; 977 int mintime = bond->params.updelay; 978 int i; 979 980 new_active = old_active = bond->curr_active_slave; 981 982 if (!new_active) { /* there were no active slaves left */ 983 if (bond->slave_cnt > 0) { /* found one slave */ 984 new_active = bond->first_slave; 985 } else { 986 return NULL; /* still no slave, return NULL */ 987 } 988 } 989 990 /* first try the primary link; if arping, a link must tx/rx traffic 991 * before it can be considered the curr_active_slave - also, we would skip 992 * slaves between the curr_active_slave and primary_slave that may be up 993 * and able to arp 994 */ 995 if ((bond->primary_slave) && 996 (!bond->params.arp_interval) && 997 (IS_UP(bond->primary_slave->dev))) { 998 new_active = bond->primary_slave; 999 } 1000 1001 /* remember where to stop iterating over the slaves */ 1002 old_active = new_active; 1003 1004 bond_for_each_slave_from(bond, new_active, i, old_active) { 1005 if (IS_UP(new_active->dev)) { 1006 if (new_active->link == BOND_LINK_UP) { 1007 return new_active; 1008 } else if (new_active->link == BOND_LINK_BACK) { 1009 /* link up, but waiting for stabilization */ 1010 if (new_active->delay < mintime) { 1011 mintime = new_active->delay; 1012 bestslave = new_active; 1013 } 1014 } 1015 } 1016 } 1017 1018 return bestslave; 1019 } 1020 1021 /** 1022 * change_active_interface - change the active slave into the specified one 1023 * @bond: our bonding struct 1024 * @new: the new slave to make the active one 1025 * 1026 * Set the new slave to the bond's settings and unset them on the old 1027 * curr_active_slave. 1028 * Setting include flags, mc-list, promiscuity, allmulti, etc. 1029 * 1030 * If @new's link state is %BOND_LINK_BACK we'll set it to %BOND_LINK_UP, 1031 * because it is apparently the best available slave we have, even though its 1032 * updelay hasn't timed out yet. 1033 * 1034 * Warning: Caller must hold curr_slave_lock for writing. 1035 */ 1036 void bond_change_active_slave(struct bonding *bond, struct slave *new_active) 1037 { 1038 struct slave *old_active = bond->curr_active_slave; 1039 1040 if (old_active == new_active) { 1041 return; 1042 } 1043 1044 if (new_active) { 1045 if (new_active->link == BOND_LINK_BACK) { 1046 if (USES_PRIMARY(bond->params.mode)) { 1047 printk(KERN_INFO DRV_NAME 1048 ": %s: making interface %s the new " 1049 "active one %d ms earlier.\n", 1050 bond->dev->name, new_active->dev->name, 1051 (bond->params.updelay - new_active->delay) * bond->params.miimon); 1052 } 1053 1054 new_active->delay = 0; 1055 new_active->link = BOND_LINK_UP; 1056 new_active->jiffies = jiffies; 1057 1058 if (bond->params.mode == BOND_MODE_8023AD) { 1059 bond_3ad_handle_link_change(new_active, BOND_LINK_UP); 1060 } 1061 1062 if ((bond->params.mode == BOND_MODE_TLB) || 1063 (bond->params.mode == BOND_MODE_ALB)) { 1064 bond_alb_handle_link_change(bond, new_active, BOND_LINK_UP); 1065 } 1066 } else { 1067 if (USES_PRIMARY(bond->params.mode)) { 1068 printk(KERN_INFO DRV_NAME 1069 ": %s: making interface %s the new " 1070 "active one.\n", 1071 bond->dev->name, new_active->dev->name); 1072 } 1073 } 1074 } 1075 1076 if (USES_PRIMARY(bond->params.mode)) { 1077 bond_mc_swap(bond, new_active, old_active); 1078 } 1079 1080 if ((bond->params.mode == BOND_MODE_TLB) || 1081 (bond->params.mode == BOND_MODE_ALB)) { 1082 bond_alb_handle_active_change(bond, new_active); 1083 if (old_active) 1084 bond_set_slave_inactive_flags(old_active); 1085 if (new_active) 1086 bond_set_slave_active_flags(new_active); 1087 } else { 1088 bond->curr_active_slave = new_active; 1089 } 1090 1091 if (bond->params.mode == BOND_MODE_ACTIVEBACKUP) { 1092 if (old_active) { 1093 bond_set_slave_inactive_flags(old_active); 1094 } 1095 1096 if (new_active) { 1097 bond_set_slave_active_flags(new_active); 1098 } 1099 bond_send_gratuitous_arp(bond); 1100 } 1101 } 1102 1103 /** 1104 * bond_select_active_slave - select a new active slave, if needed 1105 * @bond: our bonding struct 1106 * 1107 * This functions shoud be called when one of the following occurs: 1108 * - The old curr_active_slave has been released or lost its link. 1109 * - The primary_slave has got its link back. 1110 * - A slave has got its link back and there's no old curr_active_slave. 1111 * 1112 * Warning: Caller must hold curr_slave_lock for writing. 1113 */ 1114 void bond_select_active_slave(struct bonding *bond) 1115 { 1116 struct slave *best_slave; 1117 int rv; 1118 1119 best_slave = bond_find_best_slave(bond); 1120 if (best_slave != bond->curr_active_slave) { 1121 bond_change_active_slave(bond, best_slave); 1122 rv = bond_set_carrier(bond); 1123 if (!rv) 1124 return; 1125 1126 if (netif_carrier_ok(bond->dev)) { 1127 printk(KERN_INFO DRV_NAME 1128 ": %s: first active interface up!\n", 1129 bond->dev->name); 1130 } else { 1131 printk(KERN_INFO DRV_NAME ": %s: " 1132 "now running without any active interface !\n", 1133 bond->dev->name); 1134 } 1135 } 1136 } 1137 1138 /*--------------------------- slave list handling ---------------------------*/ 1139 1140 /* 1141 * This function attaches the slave to the end of list. 1142 * 1143 * bond->lock held for writing by caller. 1144 */ 1145 static void bond_attach_slave(struct bonding *bond, struct slave *new_slave) 1146 { 1147 if (bond->first_slave == NULL) { /* attaching the first slave */ 1148 new_slave->next = new_slave; 1149 new_slave->prev = new_slave; 1150 bond->first_slave = new_slave; 1151 } else { 1152 new_slave->next = bond->first_slave; 1153 new_slave->prev = bond->first_slave->prev; 1154 new_slave->next->prev = new_slave; 1155 new_slave->prev->next = new_slave; 1156 } 1157 1158 bond->slave_cnt++; 1159 } 1160 1161 /* 1162 * This function detaches the slave from the list. 1163 * WARNING: no check is made to verify if the slave effectively 1164 * belongs to <bond>. 1165 * Nothing is freed on return, structures are just unchained. 1166 * If any slave pointer in bond was pointing to <slave>, 1167 * it should be changed by the calling function. 1168 * 1169 * bond->lock held for writing by caller. 1170 */ 1171 static void bond_detach_slave(struct bonding *bond, struct slave *slave) 1172 { 1173 if (slave->next) { 1174 slave->next->prev = slave->prev; 1175 } 1176 1177 if (slave->prev) { 1178 slave->prev->next = slave->next; 1179 } 1180 1181 if (bond->first_slave == slave) { /* slave is the first slave */ 1182 if (bond->slave_cnt > 1) { /* there are more slave */ 1183 bond->first_slave = slave->next; 1184 } else { 1185 bond->first_slave = NULL; /* slave was the last one */ 1186 } 1187 } 1188 1189 slave->next = NULL; 1190 slave->prev = NULL; 1191 bond->slave_cnt--; 1192 } 1193 1194 /*---------------------------------- IOCTL ----------------------------------*/ 1195 1196 static int bond_sethwaddr(struct net_device *bond_dev, 1197 struct net_device *slave_dev) 1198 { 1199 dprintk("bond_dev=%p\n", bond_dev); 1200 dprintk("slave_dev=%p\n", slave_dev); 1201 dprintk("slave_dev->addr_len=%d\n", slave_dev->addr_len); 1202 memcpy(bond_dev->dev_addr, slave_dev->dev_addr, slave_dev->addr_len); 1203 return 0; 1204 } 1205 1206 #define BOND_VLAN_FEATURES \ 1207 (NETIF_F_VLAN_CHALLENGED | NETIF_F_HW_VLAN_RX | NETIF_F_HW_VLAN_TX | \ 1208 NETIF_F_HW_VLAN_FILTER) 1209 1210 /* 1211 * Compute the common dev->feature set available to all slaves. Some 1212 * feature bits are managed elsewhere, so preserve those feature bits 1213 * on the master device. 1214 */ 1215 static int bond_compute_features(struct bonding *bond) 1216 { 1217 struct slave *slave; 1218 struct net_device *bond_dev = bond->dev; 1219 unsigned long features = bond_dev->features; 1220 unsigned short max_hard_header_len = ETH_HLEN; 1221 int i; 1222 1223 features &= ~(NETIF_F_ALL_CSUM | BOND_VLAN_FEATURES); 1224 features |= NETIF_F_SG | NETIF_F_FRAGLIST | NETIF_F_HIGHDMA | 1225 NETIF_F_GSO_MASK | NETIF_F_NO_CSUM; 1226 1227 bond_for_each_slave(bond, slave, i) { 1228 features = netdev_compute_features(features, 1229 slave->dev->features); 1230 if (slave->dev->hard_header_len > max_hard_header_len) 1231 max_hard_header_len = slave->dev->hard_header_len; 1232 } 1233 1234 features |= (bond_dev->features & BOND_VLAN_FEATURES); 1235 bond_dev->features = features; 1236 bond_dev->hard_header_len = max_hard_header_len; 1237 1238 return 0; 1239 } 1240 1241 /* enslave device <slave> to bond device <master> */ 1242 int bond_enslave(struct net_device *bond_dev, struct net_device *slave_dev) 1243 { 1244 struct bonding *bond = bond_dev->priv; 1245 struct slave *new_slave = NULL; 1246 struct dev_mc_list *dmi; 1247 struct sockaddr addr; 1248 int link_reporting; 1249 int old_features = bond_dev->features; 1250 int res = 0; 1251 1252 if (!bond->params.use_carrier && slave_dev->ethtool_ops == NULL && 1253 slave_dev->do_ioctl == NULL) { 1254 printk(KERN_WARNING DRV_NAME 1255 ": %s: Warning: no link monitoring support for %s\n", 1256 bond_dev->name, slave_dev->name); 1257 } 1258 1259 /* bond must be initialized by bond_open() before enslaving */ 1260 if (!(bond_dev->flags & IFF_UP)) { 1261 dprintk("Error, master_dev is not up\n"); 1262 return -EPERM; 1263 } 1264 1265 /* already enslaved */ 1266 if (slave_dev->flags & IFF_SLAVE) { 1267 dprintk("Error, Device was already enslaved\n"); 1268 return -EBUSY; 1269 } 1270 1271 /* vlan challenged mutual exclusion */ 1272 /* no need to lock since we're protected by rtnl_lock */ 1273 if (slave_dev->features & NETIF_F_VLAN_CHALLENGED) { 1274 dprintk("%s: NETIF_F_VLAN_CHALLENGED\n", slave_dev->name); 1275 if (!list_empty(&bond->vlan_list)) { 1276 printk(KERN_ERR DRV_NAME 1277 ": %s: Error: cannot enslave VLAN " 1278 "challenged slave %s on VLAN enabled " 1279 "bond %s\n", bond_dev->name, slave_dev->name, 1280 bond_dev->name); 1281 return -EPERM; 1282 } else { 1283 printk(KERN_WARNING DRV_NAME 1284 ": %s: Warning: enslaved VLAN challenged " 1285 "slave %s. Adding VLANs will be blocked as " 1286 "long as %s is part of bond %s\n", 1287 bond_dev->name, slave_dev->name, slave_dev->name, 1288 bond_dev->name); 1289 bond_dev->features |= NETIF_F_VLAN_CHALLENGED; 1290 } 1291 } else { 1292 dprintk("%s: ! NETIF_F_VLAN_CHALLENGED\n", slave_dev->name); 1293 if (bond->slave_cnt == 0) { 1294 /* First slave, and it is not VLAN challenged, 1295 * so remove the block of adding VLANs over the bond. 1296 */ 1297 bond_dev->features &= ~NETIF_F_VLAN_CHALLENGED; 1298 } 1299 } 1300 1301 /* 1302 * Old ifenslave binaries are no longer supported. These can 1303 * be identified with moderate accurary by the state of the slave: 1304 * the current ifenslave will set the interface down prior to 1305 * enslaving it; the old ifenslave will not. 1306 */ 1307 if ((slave_dev->flags & IFF_UP)) { 1308 printk(KERN_ERR DRV_NAME ": %s is up. " 1309 "This may be due to an out of date ifenslave.\n", 1310 slave_dev->name); 1311 res = -EPERM; 1312 goto err_undo_flags; 1313 } 1314 1315 if (slave_dev->set_mac_address == NULL) { 1316 printk(KERN_ERR DRV_NAME 1317 ": %s: Error: The slave device you specified does " 1318 "not support setting the MAC address. " 1319 "Your kernel likely does not support slave " 1320 "devices.\n", bond_dev->name); 1321 res = -EOPNOTSUPP; 1322 goto err_undo_flags; 1323 } 1324 1325 new_slave = kzalloc(sizeof(struct slave), GFP_KERNEL); 1326 if (!new_slave) { 1327 res = -ENOMEM; 1328 goto err_undo_flags; 1329 } 1330 1331 /* save slave's original flags before calling 1332 * netdev_set_master and dev_open 1333 */ 1334 new_slave->original_flags = slave_dev->flags; 1335 1336 /* 1337 * Save slave's original ("permanent") mac address for modes 1338 * that need it, and for restoring it upon release, and then 1339 * set it to the master's address 1340 */ 1341 memcpy(new_slave->perm_hwaddr, slave_dev->dev_addr, ETH_ALEN); 1342 1343 /* 1344 * Set slave to master's mac address. The application already 1345 * set the master's mac address to that of the first slave 1346 */ 1347 memcpy(addr.sa_data, bond_dev->dev_addr, bond_dev->addr_len); 1348 addr.sa_family = slave_dev->type; 1349 res = dev_set_mac_address(slave_dev, &addr); 1350 if (res) { 1351 dprintk("Error %d calling set_mac_address\n", res); 1352 goto err_free; 1353 } 1354 1355 res = netdev_set_master(slave_dev, bond_dev); 1356 if (res) { 1357 dprintk("Error %d calling netdev_set_master\n", res); 1358 goto err_close; 1359 } 1360 /* open the slave since the application closed it */ 1361 res = dev_open(slave_dev); 1362 if (res) { 1363 dprintk("Openning slave %s failed\n", slave_dev->name); 1364 goto err_restore_mac; 1365 } 1366 1367 new_slave->dev = slave_dev; 1368 slave_dev->priv_flags |= IFF_BONDING; 1369 1370 if ((bond->params.mode == BOND_MODE_TLB) || 1371 (bond->params.mode == BOND_MODE_ALB)) { 1372 /* bond_alb_init_slave() must be called before all other stages since 1373 * it might fail and we do not want to have to undo everything 1374 */ 1375 res = bond_alb_init_slave(bond, new_slave); 1376 if (res) { 1377 goto err_unset_master; 1378 } 1379 } 1380 1381 /* If the mode USES_PRIMARY, then the new slave gets the 1382 * master's promisc (and mc) settings only if it becomes the 1383 * curr_active_slave, and that is taken care of later when calling 1384 * bond_change_active() 1385 */ 1386 if (!USES_PRIMARY(bond->params.mode)) { 1387 /* set promiscuity level to new slave */ 1388 if (bond_dev->flags & IFF_PROMISC) { 1389 dev_set_promiscuity(slave_dev, 1); 1390 } 1391 1392 /* set allmulti level to new slave */ 1393 if (bond_dev->flags & IFF_ALLMULTI) { 1394 dev_set_allmulti(slave_dev, 1); 1395 } 1396 1397 /* upload master's mc_list to new slave */ 1398 for (dmi = bond_dev->mc_list; dmi; dmi = dmi->next) { 1399 dev_mc_add (slave_dev, dmi->dmi_addr, dmi->dmi_addrlen, 0); 1400 } 1401 } 1402 1403 if (bond->params.mode == BOND_MODE_8023AD) { 1404 /* add lacpdu mc addr to mc list */ 1405 u8 lacpdu_multicast[ETH_ALEN] = MULTICAST_LACPDU_ADDR; 1406 1407 dev_mc_add(slave_dev, lacpdu_multicast, ETH_ALEN, 0); 1408 } 1409 1410 bond_add_vlans_on_slave(bond, slave_dev); 1411 1412 write_lock_bh(&bond->lock); 1413 1414 bond_attach_slave(bond, new_slave); 1415 1416 new_slave->delay = 0; 1417 new_slave->link_failure_count = 0; 1418 1419 bond_compute_features(bond); 1420 1421 new_slave->last_arp_rx = jiffies; 1422 1423 if (bond->params.miimon && !bond->params.use_carrier) { 1424 link_reporting = bond_check_dev_link(bond, slave_dev, 1); 1425 1426 if ((link_reporting == -1) && !bond->params.arp_interval) { 1427 /* 1428 * miimon is set but a bonded network driver 1429 * does not support ETHTOOL/MII and 1430 * arp_interval is not set. Note: if 1431 * use_carrier is enabled, we will never go 1432 * here (because netif_carrier is always 1433 * supported); thus, we don't need to change 1434 * the messages for netif_carrier. 1435 */ 1436 printk(KERN_WARNING DRV_NAME 1437 ": %s: Warning: MII and ETHTOOL support not " 1438 "available for interface %s, and " 1439 "arp_interval/arp_ip_target module parameters " 1440 "not specified, thus bonding will not detect " 1441 "link failures! see bonding.txt for details.\n", 1442 bond_dev->name, slave_dev->name); 1443 } else if (link_reporting == -1) { 1444 /* unable get link status using mii/ethtool */ 1445 printk(KERN_WARNING DRV_NAME 1446 ": %s: Warning: can't get link status from " 1447 "interface %s; the network driver associated " 1448 "with this interface does not support MII or " 1449 "ETHTOOL link status reporting, thus miimon " 1450 "has no effect on this interface.\n", 1451 bond_dev->name, slave_dev->name); 1452 } 1453 } 1454 1455 /* check for initial state */ 1456 if (!bond->params.miimon || 1457 (bond_check_dev_link(bond, slave_dev, 0) == BMSR_LSTATUS)) { 1458 if (bond->params.updelay) { 1459 dprintk("Initial state of slave_dev is " 1460 "BOND_LINK_BACK\n"); 1461 new_slave->link = BOND_LINK_BACK; 1462 new_slave->delay = bond->params.updelay; 1463 } else { 1464 dprintk("Initial state of slave_dev is " 1465 "BOND_LINK_UP\n"); 1466 new_slave->link = BOND_LINK_UP; 1467 } 1468 new_slave->jiffies = jiffies; 1469 } else { 1470 dprintk("Initial state of slave_dev is " 1471 "BOND_LINK_DOWN\n"); 1472 new_slave->link = BOND_LINK_DOWN; 1473 } 1474 1475 if (bond_update_speed_duplex(new_slave) && 1476 (new_slave->link != BOND_LINK_DOWN)) { 1477 printk(KERN_WARNING DRV_NAME 1478 ": %s: Warning: failed to get speed and duplex from %s, " 1479 "assumed to be 100Mb/sec and Full.\n", 1480 bond_dev->name, new_slave->dev->name); 1481 1482 if (bond->params.mode == BOND_MODE_8023AD) { 1483 printk(KERN_WARNING DRV_NAME 1484 ": %s: Warning: Operation of 802.3ad mode requires ETHTOOL " 1485 "support in base driver for proper aggregator " 1486 "selection.\n", bond_dev->name); 1487 } 1488 } 1489 1490 if (USES_PRIMARY(bond->params.mode) && bond->params.primary[0]) { 1491 /* if there is a primary slave, remember it */ 1492 if (strcmp(bond->params.primary, new_slave->dev->name) == 0) { 1493 bond->primary_slave = new_slave; 1494 } 1495 } 1496 1497 switch (bond->params.mode) { 1498 case BOND_MODE_ACTIVEBACKUP: 1499 bond_set_slave_inactive_flags(new_slave); 1500 bond_select_active_slave(bond); 1501 break; 1502 case BOND_MODE_8023AD: 1503 /* in 802.3ad mode, the internal mechanism 1504 * will activate the slaves in the selected 1505 * aggregator 1506 */ 1507 bond_set_slave_inactive_flags(new_slave); 1508 /* if this is the first slave */ 1509 if (bond->slave_cnt == 1) { 1510 SLAVE_AD_INFO(new_slave).id = 1; 1511 /* Initialize AD with the number of times that the AD timer is called in 1 second 1512 * can be called only after the mac address of the bond is set 1513 */ 1514 bond_3ad_initialize(bond, 1000/AD_TIMER_INTERVAL, 1515 bond->params.lacp_fast); 1516 } else { 1517 SLAVE_AD_INFO(new_slave).id = 1518 SLAVE_AD_INFO(new_slave->prev).id + 1; 1519 } 1520 1521 bond_3ad_bind_slave(new_slave); 1522 break; 1523 case BOND_MODE_TLB: 1524 case BOND_MODE_ALB: 1525 new_slave->state = BOND_STATE_ACTIVE; 1526 if ((!bond->curr_active_slave) && 1527 (new_slave->link != BOND_LINK_DOWN)) { 1528 /* first slave or no active slave yet, and this link 1529 * is OK, so make this interface the active one 1530 */ 1531 bond_change_active_slave(bond, new_slave); 1532 } else { 1533 bond_set_slave_inactive_flags(new_slave); 1534 } 1535 break; 1536 default: 1537 dprintk("This slave is always active in trunk mode\n"); 1538 1539 /* always active in trunk mode */ 1540 new_slave->state = BOND_STATE_ACTIVE; 1541 1542 /* In trunking mode there is little meaning to curr_active_slave 1543 * anyway (it holds no special properties of the bond device), 1544 * so we can change it without calling change_active_interface() 1545 */ 1546 if (!bond->curr_active_slave) { 1547 bond->curr_active_slave = new_slave; 1548 } 1549 break; 1550 } /* switch(bond_mode) */ 1551 1552 bond_set_carrier(bond); 1553 1554 write_unlock_bh(&bond->lock); 1555 1556 res = bond_create_slave_symlinks(bond_dev, slave_dev); 1557 if (res) 1558 goto err_unset_master; 1559 1560 printk(KERN_INFO DRV_NAME 1561 ": %s: enslaving %s as a%s interface with a%s link.\n", 1562 bond_dev->name, slave_dev->name, 1563 new_slave->state == BOND_STATE_ACTIVE ? "n active" : " backup", 1564 new_slave->link != BOND_LINK_DOWN ? "n up" : " down"); 1565 1566 /* enslave is successful */ 1567 return 0; 1568 1569 /* Undo stages on error */ 1570 err_unset_master: 1571 netdev_set_master(slave_dev, NULL); 1572 1573 err_close: 1574 dev_close(slave_dev); 1575 1576 err_restore_mac: 1577 memcpy(addr.sa_data, new_slave->perm_hwaddr, ETH_ALEN); 1578 addr.sa_family = slave_dev->type; 1579 dev_set_mac_address(slave_dev, &addr); 1580 1581 err_free: 1582 kfree(new_slave); 1583 1584 err_undo_flags: 1585 bond_dev->features = old_features; 1586 1587 return res; 1588 } 1589 1590 /* 1591 * Try to release the slave device <slave> from the bond device <master> 1592 * It is legal to access curr_active_slave without a lock because all the function 1593 * is write-locked. 1594 * 1595 * The rules for slave state should be: 1596 * for Active/Backup: 1597 * Active stays on all backups go down 1598 * for Bonded connections: 1599 * The first up interface should be left on and all others downed. 1600 */ 1601 int bond_release(struct net_device *bond_dev, struct net_device *slave_dev) 1602 { 1603 struct bonding *bond = bond_dev->priv; 1604 struct slave *slave, *oldcurrent; 1605 struct sockaddr addr; 1606 int mac_addr_differ; 1607 DECLARE_MAC_BUF(mac); 1608 1609 /* slave is not a slave or master is not master of this slave */ 1610 if (!(slave_dev->flags & IFF_SLAVE) || 1611 (slave_dev->master != bond_dev)) { 1612 printk(KERN_ERR DRV_NAME 1613 ": %s: Error: cannot release %s.\n", 1614 bond_dev->name, slave_dev->name); 1615 return -EINVAL; 1616 } 1617 1618 write_lock_bh(&bond->lock); 1619 1620 slave = bond_get_slave_by_dev(bond, slave_dev); 1621 if (!slave) { 1622 /* not a slave of this bond */ 1623 printk(KERN_INFO DRV_NAME 1624 ": %s: %s not enslaved\n", 1625 bond_dev->name, slave_dev->name); 1626 write_unlock_bh(&bond->lock); 1627 return -EINVAL; 1628 } 1629 1630 mac_addr_differ = memcmp(bond_dev->dev_addr, 1631 slave->perm_hwaddr, 1632 ETH_ALEN); 1633 if (!mac_addr_differ && (bond->slave_cnt > 1)) { 1634 printk(KERN_WARNING DRV_NAME 1635 ": %s: Warning: the permanent HWaddr of %s - " 1636 "%s - is still in use by %s. " 1637 "Set the HWaddr of %s to a different address " 1638 "to avoid conflicts.\n", 1639 bond_dev->name, 1640 slave_dev->name, 1641 print_mac(mac, slave->perm_hwaddr), 1642 bond_dev->name, 1643 slave_dev->name); 1644 } 1645 1646 /* Inform AD package of unbinding of slave. */ 1647 if (bond->params.mode == BOND_MODE_8023AD) { 1648 /* must be called before the slave is 1649 * detached from the list 1650 */ 1651 bond_3ad_unbind_slave(slave); 1652 } 1653 1654 printk(KERN_INFO DRV_NAME 1655 ": %s: releasing %s interface %s\n", 1656 bond_dev->name, 1657 (slave->state == BOND_STATE_ACTIVE) 1658 ? "active" : "backup", 1659 slave_dev->name); 1660 1661 oldcurrent = bond->curr_active_slave; 1662 1663 bond->current_arp_slave = NULL; 1664 1665 /* release the slave from its bond */ 1666 bond_detach_slave(bond, slave); 1667 1668 bond_compute_features(bond); 1669 1670 if (bond->primary_slave == slave) { 1671 bond->primary_slave = NULL; 1672 } 1673 1674 if (oldcurrent == slave) { 1675 bond_change_active_slave(bond, NULL); 1676 } 1677 1678 if ((bond->params.mode == BOND_MODE_TLB) || 1679 (bond->params.mode == BOND_MODE_ALB)) { 1680 /* Must be called only after the slave has been 1681 * detached from the list and the curr_active_slave 1682 * has been cleared (if our_slave == old_current), 1683 * but before a new active slave is selected. 1684 */ 1685 bond_alb_deinit_slave(bond, slave); 1686 } 1687 1688 if (oldcurrent == slave) 1689 bond_select_active_slave(bond); 1690 1691 if (bond->slave_cnt == 0) { 1692 bond_set_carrier(bond); 1693 1694 /* if the last slave was removed, zero the mac address 1695 * of the master so it will be set by the application 1696 * to the mac address of the first slave 1697 */ 1698 memset(bond_dev->dev_addr, 0, bond_dev->addr_len); 1699 1700 if (list_empty(&bond->vlan_list)) { 1701 bond_dev->features |= NETIF_F_VLAN_CHALLENGED; 1702 } else { 1703 printk(KERN_WARNING DRV_NAME 1704 ": %s: Warning: clearing HW address of %s while it " 1705 "still has VLANs.\n", 1706 bond_dev->name, bond_dev->name); 1707 printk(KERN_WARNING DRV_NAME 1708 ": %s: When re-adding slaves, make sure the bond's " 1709 "HW address matches its VLANs'.\n", 1710 bond_dev->name); 1711 } 1712 } else if ((bond_dev->features & NETIF_F_VLAN_CHALLENGED) && 1713 !bond_has_challenged_slaves(bond)) { 1714 printk(KERN_INFO DRV_NAME 1715 ": %s: last VLAN challenged slave %s " 1716 "left bond %s. VLAN blocking is removed\n", 1717 bond_dev->name, slave_dev->name, bond_dev->name); 1718 bond_dev->features &= ~NETIF_F_VLAN_CHALLENGED; 1719 } 1720 1721 write_unlock_bh(&bond->lock); 1722 1723 /* must do this from outside any spinlocks */ 1724 bond_destroy_slave_symlinks(bond_dev, slave_dev); 1725 1726 bond_del_vlans_from_slave(bond, slave_dev); 1727 1728 /* If the mode USES_PRIMARY, then we should only remove its 1729 * promisc and mc settings if it was the curr_active_slave, but that was 1730 * already taken care of above when we detached the slave 1731 */ 1732 if (!USES_PRIMARY(bond->params.mode)) { 1733 /* unset promiscuity level from slave */ 1734 if (bond_dev->flags & IFF_PROMISC) { 1735 dev_set_promiscuity(slave_dev, -1); 1736 } 1737 1738 /* unset allmulti level from slave */ 1739 if (bond_dev->flags & IFF_ALLMULTI) { 1740 dev_set_allmulti(slave_dev, -1); 1741 } 1742 1743 /* flush master's mc_list from slave */ 1744 bond_mc_list_flush(bond_dev, slave_dev); 1745 } 1746 1747 netdev_set_master(slave_dev, NULL); 1748 1749 /* close slave before restoring its mac address */ 1750 dev_close(slave_dev); 1751 1752 /* restore original ("permanent") mac address */ 1753 memcpy(addr.sa_data, slave->perm_hwaddr, ETH_ALEN); 1754 addr.sa_family = slave_dev->type; 1755 dev_set_mac_address(slave_dev, &addr); 1756 1757 slave_dev->priv_flags &= ~(IFF_MASTER_8023AD | IFF_MASTER_ALB | 1758 IFF_SLAVE_INACTIVE | IFF_BONDING | 1759 IFF_SLAVE_NEEDARP); 1760 1761 kfree(slave); 1762 1763 return 0; /* deletion OK */ 1764 } 1765 1766 /* 1767 * This function releases all slaves. 1768 */ 1769 static int bond_release_all(struct net_device *bond_dev) 1770 { 1771 struct bonding *bond = bond_dev->priv; 1772 struct slave *slave; 1773 struct net_device *slave_dev; 1774 struct sockaddr addr; 1775 1776 write_lock_bh(&bond->lock); 1777 1778 netif_carrier_off(bond_dev); 1779 1780 if (bond->slave_cnt == 0) { 1781 goto out; 1782 } 1783 1784 bond->current_arp_slave = NULL; 1785 bond->primary_slave = NULL; 1786 bond_change_active_slave(bond, NULL); 1787 1788 while ((slave = bond->first_slave) != NULL) { 1789 /* Inform AD package of unbinding of slave 1790 * before slave is detached from the list. 1791 */ 1792 if (bond->params.mode == BOND_MODE_8023AD) { 1793 bond_3ad_unbind_slave(slave); 1794 } 1795 1796 slave_dev = slave->dev; 1797 bond_detach_slave(bond, slave); 1798 1799 if ((bond->params.mode == BOND_MODE_TLB) || 1800 (bond->params.mode == BOND_MODE_ALB)) { 1801 /* must be called only after the slave 1802 * has been detached from the list 1803 */ 1804 bond_alb_deinit_slave(bond, slave); 1805 } 1806 1807 bond_compute_features(bond); 1808 1809 /* now that the slave is detached, unlock and perform 1810 * all the undo steps that should not be called from 1811 * within a lock. 1812 */ 1813 write_unlock_bh(&bond->lock); 1814 1815 bond_destroy_slave_symlinks(bond_dev, slave_dev); 1816 bond_del_vlans_from_slave(bond, slave_dev); 1817 1818 /* If the mode USES_PRIMARY, then we should only remove its 1819 * promisc and mc settings if it was the curr_active_slave, but that was 1820 * already taken care of above when we detached the slave 1821 */ 1822 if (!USES_PRIMARY(bond->params.mode)) { 1823 /* unset promiscuity level from slave */ 1824 if (bond_dev->flags & IFF_PROMISC) { 1825 dev_set_promiscuity(slave_dev, -1); 1826 } 1827 1828 /* unset allmulti level from slave */ 1829 if (bond_dev->flags & IFF_ALLMULTI) { 1830 dev_set_allmulti(slave_dev, -1); 1831 } 1832 1833 /* flush master's mc_list from slave */ 1834 bond_mc_list_flush(bond_dev, slave_dev); 1835 } 1836 1837 netdev_set_master(slave_dev, NULL); 1838 1839 /* close slave before restoring its mac address */ 1840 dev_close(slave_dev); 1841 1842 /* restore original ("permanent") mac address*/ 1843 memcpy(addr.sa_data, slave->perm_hwaddr, ETH_ALEN); 1844 addr.sa_family = slave_dev->type; 1845 dev_set_mac_address(slave_dev, &addr); 1846 1847 slave_dev->priv_flags &= ~(IFF_MASTER_8023AD | IFF_MASTER_ALB | 1848 IFF_SLAVE_INACTIVE); 1849 1850 kfree(slave); 1851 1852 /* re-acquire the lock before getting the next slave */ 1853 write_lock_bh(&bond->lock); 1854 } 1855 1856 /* zero the mac address of the master so it will be 1857 * set by the application to the mac address of the 1858 * first slave 1859 */ 1860 memset(bond_dev->dev_addr, 0, bond_dev->addr_len); 1861 1862 if (list_empty(&bond->vlan_list)) { 1863 bond_dev->features |= NETIF_F_VLAN_CHALLENGED; 1864 } else { 1865 printk(KERN_WARNING DRV_NAME 1866 ": %s: Warning: clearing HW address of %s while it " 1867 "still has VLANs.\n", 1868 bond_dev->name, bond_dev->name); 1869 printk(KERN_WARNING DRV_NAME 1870 ": %s: When re-adding slaves, make sure the bond's " 1871 "HW address matches its VLANs'.\n", 1872 bond_dev->name); 1873 } 1874 1875 printk(KERN_INFO DRV_NAME 1876 ": %s: released all slaves\n", 1877 bond_dev->name); 1878 1879 out: 1880 write_unlock_bh(&bond->lock); 1881 1882 return 0; 1883 } 1884 1885 /* 1886 * This function changes the active slave to slave <slave_dev>. 1887 * It returns -EINVAL in the following cases. 1888 * - <slave_dev> is not found in the list. 1889 * - There is not active slave now. 1890 * - <slave_dev> is already active. 1891 * - The link state of <slave_dev> is not BOND_LINK_UP. 1892 * - <slave_dev> is not running. 1893 * In these cases, this fuction does nothing. 1894 * In the other cases, currnt_slave pointer is changed and 0 is returned. 1895 */ 1896 static int bond_ioctl_change_active(struct net_device *bond_dev, struct net_device *slave_dev) 1897 { 1898 struct bonding *bond = bond_dev->priv; 1899 struct slave *old_active = NULL; 1900 struct slave *new_active = NULL; 1901 int res = 0; 1902 1903 if (!USES_PRIMARY(bond->params.mode)) { 1904 return -EINVAL; 1905 } 1906 1907 /* Verify that master_dev is indeed the master of slave_dev */ 1908 if (!(slave_dev->flags & IFF_SLAVE) || 1909 (slave_dev->master != bond_dev)) { 1910 return -EINVAL; 1911 } 1912 1913 write_lock_bh(&bond->lock); 1914 1915 old_active = bond->curr_active_slave; 1916 new_active = bond_get_slave_by_dev(bond, slave_dev); 1917 1918 /* 1919 * Changing to the current active: do nothing; return success. 1920 */ 1921 if (new_active && (new_active == old_active)) { 1922 write_unlock_bh(&bond->lock); 1923 return 0; 1924 } 1925 1926 if ((new_active) && 1927 (old_active) && 1928 (new_active->link == BOND_LINK_UP) && 1929 IS_UP(new_active->dev)) { 1930 bond_change_active_slave(bond, new_active); 1931 } else { 1932 res = -EINVAL; 1933 } 1934 1935 write_unlock_bh(&bond->lock); 1936 1937 return res; 1938 } 1939 1940 static int bond_info_query(struct net_device *bond_dev, struct ifbond *info) 1941 { 1942 struct bonding *bond = bond_dev->priv; 1943 1944 info->bond_mode = bond->params.mode; 1945 info->miimon = bond->params.miimon; 1946 1947 read_lock_bh(&bond->lock); 1948 info->num_slaves = bond->slave_cnt; 1949 read_unlock_bh(&bond->lock); 1950 1951 return 0; 1952 } 1953 1954 static int bond_slave_info_query(struct net_device *bond_dev, struct ifslave *info) 1955 { 1956 struct bonding *bond = bond_dev->priv; 1957 struct slave *slave; 1958 int i, found = 0; 1959 1960 if (info->slave_id < 0) { 1961 return -ENODEV; 1962 } 1963 1964 read_lock_bh(&bond->lock); 1965 1966 bond_for_each_slave(bond, slave, i) { 1967 if (i == (int)info->slave_id) { 1968 found = 1; 1969 break; 1970 } 1971 } 1972 1973 read_unlock_bh(&bond->lock); 1974 1975 if (found) { 1976 strcpy(info->slave_name, slave->dev->name); 1977 info->link = slave->link; 1978 info->state = slave->state; 1979 info->link_failure_count = slave->link_failure_count; 1980 } else { 1981 return -ENODEV; 1982 } 1983 1984 return 0; 1985 } 1986 1987 /*-------------------------------- Monitoring -------------------------------*/ 1988 1989 /* this function is called regularly to monitor each slave's link. */ 1990 void bond_mii_monitor(struct net_device *bond_dev) 1991 { 1992 struct bonding *bond = bond_dev->priv; 1993 struct slave *slave, *oldcurrent; 1994 int do_failover = 0; 1995 int delta_in_ticks; 1996 int i; 1997 1998 read_lock(&bond->lock); 1999 2000 delta_in_ticks = (bond->params.miimon * HZ) / 1000; 2001 2002 if (bond->kill_timers) { 2003 goto out; 2004 } 2005 2006 if (bond->slave_cnt == 0) { 2007 goto re_arm; 2008 } 2009 2010 /* we will try to read the link status of each of our slaves, and 2011 * set their IFF_RUNNING flag appropriately. For each slave not 2012 * supporting MII status, we won't do anything so that a user-space 2013 * program could monitor the link itself if needed. 2014 */ 2015 2016 read_lock(&bond->curr_slave_lock); 2017 oldcurrent = bond->curr_active_slave; 2018 read_unlock(&bond->curr_slave_lock); 2019 2020 bond_for_each_slave(bond, slave, i) { 2021 struct net_device *slave_dev = slave->dev; 2022 int link_state; 2023 u16 old_speed = slave->speed; 2024 u8 old_duplex = slave->duplex; 2025 2026 link_state = bond_check_dev_link(bond, slave_dev, 0); 2027 2028 switch (slave->link) { 2029 case BOND_LINK_UP: /* the link was up */ 2030 if (link_state == BMSR_LSTATUS) { 2031 /* link stays up, nothing more to do */ 2032 break; 2033 } else { /* link going down */ 2034 slave->link = BOND_LINK_FAIL; 2035 slave->delay = bond->params.downdelay; 2036 2037 if (slave->link_failure_count < UINT_MAX) { 2038 slave->link_failure_count++; 2039 } 2040 2041 if (bond->params.downdelay) { 2042 printk(KERN_INFO DRV_NAME 2043 ": %s: link status down for %s " 2044 "interface %s, disabling it in " 2045 "%d ms.\n", 2046 bond_dev->name, 2047 IS_UP(slave_dev) 2048 ? ((bond->params.mode == BOND_MODE_ACTIVEBACKUP) 2049 ? ((slave == oldcurrent) 2050 ? "active " : "backup ") 2051 : "") 2052 : "idle ", 2053 slave_dev->name, 2054 bond->params.downdelay * bond->params.miimon); 2055 } 2056 } 2057 /* no break ! fall through the BOND_LINK_FAIL test to 2058 ensure proper action to be taken 2059 */ 2060 case BOND_LINK_FAIL: /* the link has just gone down */ 2061 if (link_state != BMSR_LSTATUS) { 2062 /* link stays down */ 2063 if (slave->delay <= 0) { 2064 /* link down for too long time */ 2065 slave->link = BOND_LINK_DOWN; 2066 2067 /* in active/backup mode, we must 2068 * completely disable this interface 2069 */ 2070 if ((bond->params.mode == BOND_MODE_ACTIVEBACKUP) || 2071 (bond->params.mode == BOND_MODE_8023AD)) { 2072 bond_set_slave_inactive_flags(slave); 2073 } 2074 2075 printk(KERN_INFO DRV_NAME 2076 ": %s: link status definitely " 2077 "down for interface %s, " 2078 "disabling it\n", 2079 bond_dev->name, 2080 slave_dev->name); 2081 2082 /* notify ad that the link status has changed */ 2083 if (bond->params.mode == BOND_MODE_8023AD) { 2084 bond_3ad_handle_link_change(slave, BOND_LINK_DOWN); 2085 } 2086 2087 if ((bond->params.mode == BOND_MODE_TLB) || 2088 (bond->params.mode == BOND_MODE_ALB)) { 2089 bond_alb_handle_link_change(bond, slave, BOND_LINK_DOWN); 2090 } 2091 2092 if (slave == oldcurrent) { 2093 do_failover = 1; 2094 } 2095 } else { 2096 slave->delay--; 2097 } 2098 } else { 2099 /* link up again */ 2100 slave->link = BOND_LINK_UP; 2101 slave->jiffies = jiffies; 2102 printk(KERN_INFO DRV_NAME 2103 ": %s: link status up again after %d " 2104 "ms for interface %s.\n", 2105 bond_dev->name, 2106 (bond->params.downdelay - slave->delay) * bond->params.miimon, 2107 slave_dev->name); 2108 } 2109 break; 2110 case BOND_LINK_DOWN: /* the link was down */ 2111 if (link_state != BMSR_LSTATUS) { 2112 /* the link stays down, nothing more to do */ 2113 break; 2114 } else { /* link going up */ 2115 slave->link = BOND_LINK_BACK; 2116 slave->delay = bond->params.updelay; 2117 2118 if (bond->params.updelay) { 2119 /* if updelay == 0, no need to 2120 advertise about a 0 ms delay */ 2121 printk(KERN_INFO DRV_NAME 2122 ": %s: link status up for " 2123 "interface %s, enabling it " 2124 "in %d ms.\n", 2125 bond_dev->name, 2126 slave_dev->name, 2127 bond->params.updelay * bond->params.miimon); 2128 } 2129 } 2130 /* no break ! fall through the BOND_LINK_BACK state in 2131 case there's something to do. 2132 */ 2133 case BOND_LINK_BACK: /* the link has just come back */ 2134 if (link_state != BMSR_LSTATUS) { 2135 /* link down again */ 2136 slave->link = BOND_LINK_DOWN; 2137 2138 printk(KERN_INFO DRV_NAME 2139 ": %s: link status down again after %d " 2140 "ms for interface %s.\n", 2141 bond_dev->name, 2142 (bond->params.updelay - slave->delay) * bond->params.miimon, 2143 slave_dev->name); 2144 } else { 2145 /* link stays up */ 2146 if (slave->delay == 0) { 2147 /* now the link has been up for long time enough */ 2148 slave->link = BOND_LINK_UP; 2149 slave->jiffies = jiffies; 2150 2151 if (bond->params.mode == BOND_MODE_8023AD) { 2152 /* prevent it from being the active one */ 2153 slave->state = BOND_STATE_BACKUP; 2154 } else if (bond->params.mode != BOND_MODE_ACTIVEBACKUP) { 2155 /* make it immediately active */ 2156 slave->state = BOND_STATE_ACTIVE; 2157 } else if (slave != bond->primary_slave) { 2158 /* prevent it from being the active one */ 2159 slave->state = BOND_STATE_BACKUP; 2160 } 2161 2162 printk(KERN_INFO DRV_NAME 2163 ": %s: link status definitely " 2164 "up for interface %s.\n", 2165 bond_dev->name, 2166 slave_dev->name); 2167 2168 /* notify ad that the link status has changed */ 2169 if (bond->params.mode == BOND_MODE_8023AD) { 2170 bond_3ad_handle_link_change(slave, BOND_LINK_UP); 2171 } 2172 2173 if ((bond->params.mode == BOND_MODE_TLB) || 2174 (bond->params.mode == BOND_MODE_ALB)) { 2175 bond_alb_handle_link_change(bond, slave, BOND_LINK_UP); 2176 } 2177 2178 if ((!oldcurrent) || 2179 (slave == bond->primary_slave)) { 2180 do_failover = 1; 2181 } 2182 } else { 2183 slave->delay--; 2184 } 2185 } 2186 break; 2187 default: 2188 /* Should not happen */ 2189 printk(KERN_ERR DRV_NAME 2190 ": %s: Error: %s Illegal value (link=%d)\n", 2191 bond_dev->name, 2192 slave->dev->name, 2193 slave->link); 2194 goto out; 2195 } /* end of switch (slave->link) */ 2196 2197 bond_update_speed_duplex(slave); 2198 2199 if (bond->params.mode == BOND_MODE_8023AD) { 2200 if (old_speed != slave->speed) { 2201 bond_3ad_adapter_speed_changed(slave); 2202 } 2203 2204 if (old_duplex != slave->duplex) { 2205 bond_3ad_adapter_duplex_changed(slave); 2206 } 2207 } 2208 2209 } /* end of for */ 2210 2211 if (do_failover) { 2212 write_lock(&bond->curr_slave_lock); 2213 2214 bond_select_active_slave(bond); 2215 2216 write_unlock(&bond->curr_slave_lock); 2217 } else 2218 bond_set_carrier(bond); 2219 2220 re_arm: 2221 if (bond->params.miimon) { 2222 mod_timer(&bond->mii_timer, jiffies + delta_in_ticks); 2223 } 2224 out: 2225 read_unlock(&bond->lock); 2226 } 2227 2228 2229 static __be32 bond_glean_dev_ip(struct net_device *dev) 2230 { 2231 struct in_device *idev; 2232 struct in_ifaddr *ifa; 2233 __be32 addr = 0; 2234 2235 if (!dev) 2236 return 0; 2237 2238 rcu_read_lock(); 2239 idev = __in_dev_get_rcu(dev); 2240 if (!idev) 2241 goto out; 2242 2243 ifa = idev->ifa_list; 2244 if (!ifa) 2245 goto out; 2246 2247 addr = ifa->ifa_local; 2248 out: 2249 rcu_read_unlock(); 2250 return addr; 2251 } 2252 2253 static int bond_has_ip(struct bonding *bond) 2254 { 2255 struct vlan_entry *vlan, *vlan_next; 2256 2257 if (bond->master_ip) 2258 return 1; 2259 2260 if (list_empty(&bond->vlan_list)) 2261 return 0; 2262 2263 list_for_each_entry_safe(vlan, vlan_next, &bond->vlan_list, 2264 vlan_list) { 2265 if (vlan->vlan_ip) 2266 return 1; 2267 } 2268 2269 return 0; 2270 } 2271 2272 static int bond_has_this_ip(struct bonding *bond, __be32 ip) 2273 { 2274 struct vlan_entry *vlan, *vlan_next; 2275 2276 if (ip == bond->master_ip) 2277 return 1; 2278 2279 if (list_empty(&bond->vlan_list)) 2280 return 0; 2281 2282 list_for_each_entry_safe(vlan, vlan_next, &bond->vlan_list, 2283 vlan_list) { 2284 if (ip == vlan->vlan_ip) 2285 return 1; 2286 } 2287 2288 return 0; 2289 } 2290 2291 /* 2292 * We go to the (large) trouble of VLAN tagging ARP frames because 2293 * switches in VLAN mode (especially if ports are configured as 2294 * "native" to a VLAN) might not pass non-tagged frames. 2295 */ 2296 static void bond_arp_send(struct net_device *slave_dev, int arp_op, __be32 dest_ip, __be32 src_ip, unsigned short vlan_id) 2297 { 2298 struct sk_buff *skb; 2299 2300 dprintk("arp %d on slave %s: dst %x src %x vid %d\n", arp_op, 2301 slave_dev->name, dest_ip, src_ip, vlan_id); 2302 2303 skb = arp_create(arp_op, ETH_P_ARP, dest_ip, slave_dev, src_ip, 2304 NULL, slave_dev->dev_addr, NULL); 2305 2306 if (!skb) { 2307 printk(KERN_ERR DRV_NAME ": ARP packet allocation failed\n"); 2308 return; 2309 } 2310 if (vlan_id) { 2311 skb = vlan_put_tag(skb, vlan_id); 2312 if (!skb) { 2313 printk(KERN_ERR DRV_NAME ": failed to insert VLAN tag\n"); 2314 return; 2315 } 2316 } 2317 arp_xmit(skb); 2318 } 2319 2320 2321 static void bond_arp_send_all(struct bonding *bond, struct slave *slave) 2322 { 2323 int i, vlan_id, rv; 2324 __be32 *targets = bond->params.arp_targets; 2325 struct vlan_entry *vlan, *vlan_next; 2326 struct net_device *vlan_dev; 2327 struct flowi fl; 2328 struct rtable *rt; 2329 2330 for (i = 0; (i < BOND_MAX_ARP_TARGETS); i++) { 2331 if (!targets[i]) 2332 continue; 2333 dprintk("basa: target %x\n", targets[i]); 2334 if (list_empty(&bond->vlan_list)) { 2335 dprintk("basa: empty vlan: arp_send\n"); 2336 bond_arp_send(slave->dev, ARPOP_REQUEST, targets[i], 2337 bond->master_ip, 0); 2338 continue; 2339 } 2340 2341 /* 2342 * If VLANs are configured, we do a route lookup to 2343 * determine which VLAN interface would be used, so we 2344 * can tag the ARP with the proper VLAN tag. 2345 */ 2346 memset(&fl, 0, sizeof(fl)); 2347 fl.fl4_dst = targets[i]; 2348 fl.fl4_tos = RTO_ONLINK; 2349 2350 rv = ip_route_output_key(&rt, &fl); 2351 if (rv) { 2352 if (net_ratelimit()) { 2353 printk(KERN_WARNING DRV_NAME 2354 ": %s: no route to arp_ip_target %u.%u.%u.%u\n", 2355 bond->dev->name, NIPQUAD(fl.fl4_dst)); 2356 } 2357 continue; 2358 } 2359 2360 /* 2361 * This target is not on a VLAN 2362 */ 2363 if (rt->u.dst.dev == bond->dev) { 2364 ip_rt_put(rt); 2365 dprintk("basa: rtdev == bond->dev: arp_send\n"); 2366 bond_arp_send(slave->dev, ARPOP_REQUEST, targets[i], 2367 bond->master_ip, 0); 2368 continue; 2369 } 2370 2371 vlan_id = 0; 2372 list_for_each_entry_safe(vlan, vlan_next, &bond->vlan_list, 2373 vlan_list) { 2374 vlan_dev = vlan_group_get_device(bond->vlgrp, vlan->vlan_id); 2375 if (vlan_dev == rt->u.dst.dev) { 2376 vlan_id = vlan->vlan_id; 2377 dprintk("basa: vlan match on %s %d\n", 2378 vlan_dev->name, vlan_id); 2379 break; 2380 } 2381 } 2382 2383 if (vlan_id) { 2384 ip_rt_put(rt); 2385 bond_arp_send(slave->dev, ARPOP_REQUEST, targets[i], 2386 vlan->vlan_ip, vlan_id); 2387 continue; 2388 } 2389 2390 if (net_ratelimit()) { 2391 printk(KERN_WARNING DRV_NAME 2392 ": %s: no path to arp_ip_target %u.%u.%u.%u via rt.dev %s\n", 2393 bond->dev->name, NIPQUAD(fl.fl4_dst), 2394 rt->u.dst.dev ? rt->u.dst.dev->name : "NULL"); 2395 } 2396 ip_rt_put(rt); 2397 } 2398 } 2399 2400 /* 2401 * Kick out a gratuitous ARP for an IP on the bonding master plus one 2402 * for each VLAN above us. 2403 */ 2404 static void bond_send_gratuitous_arp(struct bonding *bond) 2405 { 2406 struct slave *slave = bond->curr_active_slave; 2407 struct vlan_entry *vlan; 2408 struct net_device *vlan_dev; 2409 2410 dprintk("bond_send_grat_arp: bond %s slave %s\n", bond->dev->name, 2411 slave ? slave->dev->name : "NULL"); 2412 if (!slave) 2413 return; 2414 2415 if (bond->master_ip) { 2416 bond_arp_send(slave->dev, ARPOP_REPLY, bond->master_ip, 2417 bond->master_ip, 0); 2418 } 2419 2420 list_for_each_entry(vlan, &bond->vlan_list, vlan_list) { 2421 vlan_dev = vlan_group_get_device(bond->vlgrp, vlan->vlan_id); 2422 if (vlan->vlan_ip) { 2423 bond_arp_send(slave->dev, ARPOP_REPLY, vlan->vlan_ip, 2424 vlan->vlan_ip, vlan->vlan_id); 2425 } 2426 } 2427 } 2428 2429 static void bond_validate_arp(struct bonding *bond, struct slave *slave, __be32 sip, __be32 tip) 2430 { 2431 int i; 2432 __be32 *targets = bond->params.arp_targets; 2433 2434 targets = bond->params.arp_targets; 2435 for (i = 0; (i < BOND_MAX_ARP_TARGETS) && targets[i]; i++) { 2436 dprintk("bva: sip %u.%u.%u.%u tip %u.%u.%u.%u t[%d] " 2437 "%u.%u.%u.%u bhti(tip) %d\n", 2438 NIPQUAD(sip), NIPQUAD(tip), i, NIPQUAD(targets[i]), 2439 bond_has_this_ip(bond, tip)); 2440 if (sip == targets[i]) { 2441 if (bond_has_this_ip(bond, tip)) 2442 slave->last_arp_rx = jiffies; 2443 return; 2444 } 2445 } 2446 } 2447 2448 static int bond_arp_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt, struct net_device *orig_dev) 2449 { 2450 struct arphdr *arp; 2451 struct slave *slave; 2452 struct bonding *bond; 2453 unsigned char *arp_ptr; 2454 __be32 sip, tip; 2455 2456 if (dev->nd_net != &init_net) 2457 goto out; 2458 2459 if (!(dev->priv_flags & IFF_BONDING) || !(dev->flags & IFF_MASTER)) 2460 goto out; 2461 2462 bond = dev->priv; 2463 read_lock(&bond->lock); 2464 2465 dprintk("bond_arp_rcv: bond %s skb->dev %s orig_dev %s\n", 2466 bond->dev->name, skb->dev ? skb->dev->name : "NULL", 2467 orig_dev ? orig_dev->name : "NULL"); 2468 2469 slave = bond_get_slave_by_dev(bond, orig_dev); 2470 if (!slave || !slave_do_arp_validate(bond, slave)) 2471 goto out_unlock; 2472 2473 /* ARP header, plus 2 device addresses, plus 2 IP addresses. */ 2474 if (!pskb_may_pull(skb, (sizeof(struct arphdr) + 2475 (2 * dev->addr_len) + 2476 (2 * sizeof(u32))))) 2477 goto out_unlock; 2478 2479 arp = arp_hdr(skb); 2480 if (arp->ar_hln != dev->addr_len || 2481 skb->pkt_type == PACKET_OTHERHOST || 2482 skb->pkt_type == PACKET_LOOPBACK || 2483 arp->ar_hrd != htons(ARPHRD_ETHER) || 2484 arp->ar_pro != htons(ETH_P_IP) || 2485 arp->ar_pln != 4) 2486 goto out_unlock; 2487 2488 arp_ptr = (unsigned char *)(arp + 1); 2489 arp_ptr += dev->addr_len; 2490 memcpy(&sip, arp_ptr, 4); 2491 arp_ptr += 4 + dev->addr_len; 2492 memcpy(&tip, arp_ptr, 4); 2493 2494 dprintk("bond_arp_rcv: %s %s/%d av %d sv %d sip %u.%u.%u.%u" 2495 " tip %u.%u.%u.%u\n", bond->dev->name, slave->dev->name, 2496 slave->state, bond->params.arp_validate, 2497 slave_do_arp_validate(bond, slave), NIPQUAD(sip), NIPQUAD(tip)); 2498 2499 /* 2500 * Backup slaves won't see the ARP reply, but do come through 2501 * here for each ARP probe (so we swap the sip/tip to validate 2502 * the probe). In a "redundant switch, common router" type of 2503 * configuration, the ARP probe will (hopefully) travel from 2504 * the active, through one switch, the router, then the other 2505 * switch before reaching the backup. 2506 */ 2507 if (slave->state == BOND_STATE_ACTIVE) 2508 bond_validate_arp(bond, slave, sip, tip); 2509 else 2510 bond_validate_arp(bond, slave, tip, sip); 2511 2512 out_unlock: 2513 read_unlock(&bond->lock); 2514 out: 2515 dev_kfree_skb(skb); 2516 return NET_RX_SUCCESS; 2517 } 2518 2519 /* 2520 * this function is called regularly to monitor each slave's link 2521 * ensuring that traffic is being sent and received when arp monitoring 2522 * is used in load-balancing mode. if the adapter has been dormant, then an 2523 * arp is transmitted to generate traffic. see activebackup_arp_monitor for 2524 * arp monitoring in active backup mode. 2525 */ 2526 void bond_loadbalance_arp_mon(struct net_device *bond_dev) 2527 { 2528 struct bonding *bond = bond_dev->priv; 2529 struct slave *slave, *oldcurrent; 2530 int do_failover = 0; 2531 int delta_in_ticks; 2532 int i; 2533 2534 read_lock(&bond->lock); 2535 2536 delta_in_ticks = (bond->params.arp_interval * HZ) / 1000; 2537 2538 if (bond->kill_timers) { 2539 goto out; 2540 } 2541 2542 if (bond->slave_cnt == 0) { 2543 goto re_arm; 2544 } 2545 2546 read_lock(&bond->curr_slave_lock); 2547 oldcurrent = bond->curr_active_slave; 2548 read_unlock(&bond->curr_slave_lock); 2549 2550 /* see if any of the previous devices are up now (i.e. they have 2551 * xmt and rcv traffic). the curr_active_slave does not come into 2552 * the picture unless it is null. also, slave->jiffies is not needed 2553 * here because we send an arp on each slave and give a slave as 2554 * long as it needs to get the tx/rx within the delta. 2555 * TODO: what about up/down delay in arp mode? it wasn't here before 2556 * so it can wait 2557 */ 2558 bond_for_each_slave(bond, slave, i) { 2559 if (slave->link != BOND_LINK_UP) { 2560 if (((jiffies - slave->dev->trans_start) <= delta_in_ticks) && 2561 ((jiffies - slave->dev->last_rx) <= delta_in_ticks)) { 2562 2563 slave->link = BOND_LINK_UP; 2564 slave->state = BOND_STATE_ACTIVE; 2565 2566 /* primary_slave has no meaning in round-robin 2567 * mode. the window of a slave being up and 2568 * curr_active_slave being null after enslaving 2569 * is closed. 2570 */ 2571 if (!oldcurrent) { 2572 printk(KERN_INFO DRV_NAME 2573 ": %s: link status definitely " 2574 "up for interface %s, ", 2575 bond_dev->name, 2576 slave->dev->name); 2577 do_failover = 1; 2578 } else { 2579 printk(KERN_INFO DRV_NAME 2580 ": %s: interface %s is now up\n", 2581 bond_dev->name, 2582 slave->dev->name); 2583 } 2584 } 2585 } else { 2586 /* slave->link == BOND_LINK_UP */ 2587 2588 /* not all switches will respond to an arp request 2589 * when the source ip is 0, so don't take the link down 2590 * if we don't know our ip yet 2591 */ 2592 if (((jiffies - slave->dev->trans_start) >= (2*delta_in_ticks)) || 2593 (((jiffies - slave->dev->last_rx) >= (2*delta_in_ticks)) && 2594 bond_has_ip(bond))) { 2595 2596 slave->link = BOND_LINK_DOWN; 2597 slave->state = BOND_STATE_BACKUP; 2598 2599 if (slave->link_failure_count < UINT_MAX) { 2600 slave->link_failure_count++; 2601 } 2602 2603 printk(KERN_INFO DRV_NAME 2604 ": %s: interface %s is now down.\n", 2605 bond_dev->name, 2606 slave->dev->name); 2607 2608 if (slave == oldcurrent) { 2609 do_failover = 1; 2610 } 2611 } 2612 } 2613 2614 /* note: if switch is in round-robin mode, all links 2615 * must tx arp to ensure all links rx an arp - otherwise 2616 * links may oscillate or not come up at all; if switch is 2617 * in something like xor mode, there is nothing we can 2618 * do - all replies will be rx'ed on same link causing slaves 2619 * to be unstable during low/no traffic periods 2620 */ 2621 if (IS_UP(slave->dev)) { 2622 bond_arp_send_all(bond, slave); 2623 } 2624 } 2625 2626 if (do_failover) { 2627 write_lock(&bond->curr_slave_lock); 2628 2629 bond_select_active_slave(bond); 2630 2631 write_unlock(&bond->curr_slave_lock); 2632 } 2633 2634 re_arm: 2635 if (bond->params.arp_interval) { 2636 mod_timer(&bond->arp_timer, jiffies + delta_in_ticks); 2637 } 2638 out: 2639 read_unlock(&bond->lock); 2640 } 2641 2642 /* 2643 * When using arp monitoring in active-backup mode, this function is 2644 * called to determine if any backup slaves have went down or a new 2645 * current slave needs to be found. 2646 * The backup slaves never generate traffic, they are considered up by merely 2647 * receiving traffic. If the current slave goes down, each backup slave will 2648 * be given the opportunity to tx/rx an arp before being taken down - this 2649 * prevents all slaves from being taken down due to the current slave not 2650 * sending any traffic for the backups to receive. The arps are not necessarily 2651 * necessary, any tx and rx traffic will keep the current slave up. While any 2652 * rx traffic will keep the backup slaves up, the current slave is responsible 2653 * for generating traffic to keep them up regardless of any other traffic they 2654 * may have received. 2655 * see loadbalance_arp_monitor for arp monitoring in load balancing mode 2656 */ 2657 void bond_activebackup_arp_mon(struct net_device *bond_dev) 2658 { 2659 struct bonding *bond = bond_dev->priv; 2660 struct slave *slave; 2661 int delta_in_ticks; 2662 int i; 2663 2664 read_lock(&bond->lock); 2665 2666 delta_in_ticks = (bond->params.arp_interval * HZ) / 1000; 2667 2668 if (bond->kill_timers) { 2669 goto out; 2670 } 2671 2672 if (bond->slave_cnt == 0) { 2673 goto re_arm; 2674 } 2675 2676 /* determine if any slave has come up or any backup slave has 2677 * gone down 2678 * TODO: what about up/down delay in arp mode? it wasn't here before 2679 * so it can wait 2680 */ 2681 bond_for_each_slave(bond, slave, i) { 2682 if (slave->link != BOND_LINK_UP) { 2683 if ((jiffies - slave_last_rx(bond, slave)) <= 2684 delta_in_ticks) { 2685 2686 slave->link = BOND_LINK_UP; 2687 2688 write_lock(&bond->curr_slave_lock); 2689 2690 if ((!bond->curr_active_slave) && 2691 ((jiffies - slave->dev->trans_start) <= delta_in_ticks)) { 2692 bond_change_active_slave(bond, slave); 2693 bond->current_arp_slave = NULL; 2694 } else if (bond->curr_active_slave != slave) { 2695 /* this slave has just come up but we 2696 * already have a current slave; this 2697 * can also happen if bond_enslave adds 2698 * a new slave that is up while we are 2699 * searching for a new slave 2700 */ 2701 bond_set_slave_inactive_flags(slave); 2702 bond->current_arp_slave = NULL; 2703 } 2704 2705 bond_set_carrier(bond); 2706 2707 if (slave == bond->curr_active_slave) { 2708 printk(KERN_INFO DRV_NAME 2709 ": %s: %s is up and now the " 2710 "active interface\n", 2711 bond_dev->name, 2712 slave->dev->name); 2713 netif_carrier_on(bond->dev); 2714 } else { 2715 printk(KERN_INFO DRV_NAME 2716 ": %s: backup interface %s is " 2717 "now up\n", 2718 bond_dev->name, 2719 slave->dev->name); 2720 } 2721 2722 write_unlock(&bond->curr_slave_lock); 2723 } 2724 } else { 2725 read_lock(&bond->curr_slave_lock); 2726 2727 if ((slave != bond->curr_active_slave) && 2728 (!bond->current_arp_slave) && 2729 (((jiffies - slave_last_rx(bond, slave)) >= 3*delta_in_ticks) && 2730 bond_has_ip(bond))) { 2731 /* a backup slave has gone down; three times 2732 * the delta allows the current slave to be 2733 * taken out before the backup slave. 2734 * note: a non-null current_arp_slave indicates 2735 * the curr_active_slave went down and we are 2736 * searching for a new one; under this 2737 * condition we only take the curr_active_slave 2738 * down - this gives each slave a chance to 2739 * tx/rx traffic before being taken out 2740 */ 2741 2742 read_unlock(&bond->curr_slave_lock); 2743 2744 slave->link = BOND_LINK_DOWN; 2745 2746 if (slave->link_failure_count < UINT_MAX) { 2747 slave->link_failure_count++; 2748 } 2749 2750 bond_set_slave_inactive_flags(slave); 2751 2752 printk(KERN_INFO DRV_NAME 2753 ": %s: backup interface %s is now down\n", 2754 bond_dev->name, 2755 slave->dev->name); 2756 } else { 2757 read_unlock(&bond->curr_slave_lock); 2758 } 2759 } 2760 } 2761 2762 read_lock(&bond->curr_slave_lock); 2763 slave = bond->curr_active_slave; 2764 read_unlock(&bond->curr_slave_lock); 2765 2766 if (slave) { 2767 /* if we have sent traffic in the past 2*arp_intervals but 2768 * haven't xmit and rx traffic in that time interval, select 2769 * a different slave. slave->jiffies is only updated when 2770 * a slave first becomes the curr_active_slave - not necessarily 2771 * after every arp; this ensures the slave has a full 2*delta 2772 * before being taken out. if a primary is being used, check 2773 * if it is up and needs to take over as the curr_active_slave 2774 */ 2775 if ((((jiffies - slave->dev->trans_start) >= (2*delta_in_ticks)) || 2776 (((jiffies - slave_last_rx(bond, slave)) >= (2*delta_in_ticks)) && 2777 bond_has_ip(bond))) && 2778 ((jiffies - slave->jiffies) >= 2*delta_in_ticks)) { 2779 2780 slave->link = BOND_LINK_DOWN; 2781 2782 if (slave->link_failure_count < UINT_MAX) { 2783 slave->link_failure_count++; 2784 } 2785 2786 printk(KERN_INFO DRV_NAME 2787 ": %s: link status down for active interface " 2788 "%s, disabling it\n", 2789 bond_dev->name, 2790 slave->dev->name); 2791 2792 write_lock(&bond->curr_slave_lock); 2793 2794 bond_select_active_slave(bond); 2795 slave = bond->curr_active_slave; 2796 2797 write_unlock(&bond->curr_slave_lock); 2798 2799 bond->current_arp_slave = slave; 2800 2801 if (slave) { 2802 slave->jiffies = jiffies; 2803 } 2804 } else if ((bond->primary_slave) && 2805 (bond->primary_slave != slave) && 2806 (bond->primary_slave->link == BOND_LINK_UP)) { 2807 /* at this point, slave is the curr_active_slave */ 2808 printk(KERN_INFO DRV_NAME 2809 ": %s: changing from interface %s to primary " 2810 "interface %s\n", 2811 bond_dev->name, 2812 slave->dev->name, 2813 bond->primary_slave->dev->name); 2814 2815 /* primary is up so switch to it */ 2816 write_lock(&bond->curr_slave_lock); 2817 bond_change_active_slave(bond, bond->primary_slave); 2818 write_unlock(&bond->curr_slave_lock); 2819 2820 slave = bond->primary_slave; 2821 slave->jiffies = jiffies; 2822 } else { 2823 bond->current_arp_slave = NULL; 2824 } 2825 2826 /* the current slave must tx an arp to ensure backup slaves 2827 * rx traffic 2828 */ 2829 if (slave && bond_has_ip(bond)) { 2830 bond_arp_send_all(bond, slave); 2831 } 2832 } 2833 2834 /* if we don't have a curr_active_slave, search for the next available 2835 * backup slave from the current_arp_slave and make it the candidate 2836 * for becoming the curr_active_slave 2837 */ 2838 if (!slave) { 2839 if (!bond->current_arp_slave) { 2840 bond->current_arp_slave = bond->first_slave; 2841 } 2842 2843 if (bond->current_arp_slave) { 2844 bond_set_slave_inactive_flags(bond->current_arp_slave); 2845 2846 /* search for next candidate */ 2847 bond_for_each_slave_from(bond, slave, i, bond->current_arp_slave->next) { 2848 if (IS_UP(slave->dev)) { 2849 slave->link = BOND_LINK_BACK; 2850 bond_set_slave_active_flags(slave); 2851 bond_arp_send_all(bond, slave); 2852 slave->jiffies = jiffies; 2853 bond->current_arp_slave = slave; 2854 break; 2855 } 2856 2857 /* if the link state is up at this point, we 2858 * mark it down - this can happen if we have 2859 * simultaneous link failures and 2860 * reselect_active_interface doesn't make this 2861 * one the current slave so it is still marked 2862 * up when it is actually down 2863 */ 2864 if (slave->link == BOND_LINK_UP) { 2865 slave->link = BOND_LINK_DOWN; 2866 if (slave->link_failure_count < UINT_MAX) { 2867 slave->link_failure_count++; 2868 } 2869 2870 bond_set_slave_inactive_flags(slave); 2871 2872 printk(KERN_INFO DRV_NAME 2873 ": %s: backup interface %s is " 2874 "now down.\n", 2875 bond_dev->name, 2876 slave->dev->name); 2877 } 2878 } 2879 } 2880 } 2881 2882 re_arm: 2883 if (bond->params.arp_interval) { 2884 mod_timer(&bond->arp_timer, jiffies + delta_in_ticks); 2885 } 2886 out: 2887 read_unlock(&bond->lock); 2888 } 2889 2890 /*------------------------------ proc/seq_file-------------------------------*/ 2891 2892 #ifdef CONFIG_PROC_FS 2893 2894 #define SEQ_START_TOKEN ((void *)1) 2895 2896 static void *bond_info_seq_start(struct seq_file *seq, loff_t *pos) 2897 { 2898 struct bonding *bond = seq->private; 2899 loff_t off = 0; 2900 struct slave *slave; 2901 int i; 2902 2903 /* make sure the bond won't be taken away */ 2904 read_lock(&dev_base_lock); 2905 read_lock_bh(&bond->lock); 2906 2907 if (*pos == 0) { 2908 return SEQ_START_TOKEN; 2909 } 2910 2911 bond_for_each_slave(bond, slave, i) { 2912 if (++off == *pos) { 2913 return slave; 2914 } 2915 } 2916 2917 return NULL; 2918 } 2919 2920 static void *bond_info_seq_next(struct seq_file *seq, void *v, loff_t *pos) 2921 { 2922 struct bonding *bond = seq->private; 2923 struct slave *slave = v; 2924 2925 ++*pos; 2926 if (v == SEQ_START_TOKEN) { 2927 return bond->first_slave; 2928 } 2929 2930 slave = slave->next; 2931 2932 return (slave == bond->first_slave) ? NULL : slave; 2933 } 2934 2935 static void bond_info_seq_stop(struct seq_file *seq, void *v) 2936 { 2937 struct bonding *bond = seq->private; 2938 2939 read_unlock_bh(&bond->lock); 2940 read_unlock(&dev_base_lock); 2941 } 2942 2943 static void bond_info_show_master(struct seq_file *seq) 2944 { 2945 struct bonding *bond = seq->private; 2946 struct slave *curr; 2947 int i; 2948 u32 target; 2949 2950 read_lock(&bond->curr_slave_lock); 2951 curr = bond->curr_active_slave; 2952 read_unlock(&bond->curr_slave_lock); 2953 2954 seq_printf(seq, "Bonding Mode: %s\n", 2955 bond_mode_name(bond->params.mode)); 2956 2957 if (bond->params.mode == BOND_MODE_XOR || 2958 bond->params.mode == BOND_MODE_8023AD) { 2959 seq_printf(seq, "Transmit Hash Policy: %s (%d)\n", 2960 xmit_hashtype_tbl[bond->params.xmit_policy].modename, 2961 bond->params.xmit_policy); 2962 } 2963 2964 if (USES_PRIMARY(bond->params.mode)) { 2965 seq_printf(seq, "Primary Slave: %s\n", 2966 (bond->primary_slave) ? 2967 bond->primary_slave->dev->name : "None"); 2968 2969 seq_printf(seq, "Currently Active Slave: %s\n", 2970 (curr) ? curr->dev->name : "None"); 2971 } 2972 2973 seq_printf(seq, "MII Status: %s\n", netif_carrier_ok(bond->dev) ? 2974 "up" : "down"); 2975 seq_printf(seq, "MII Polling Interval (ms): %d\n", bond->params.miimon); 2976 seq_printf(seq, "Up Delay (ms): %d\n", 2977 bond->params.updelay * bond->params.miimon); 2978 seq_printf(seq, "Down Delay (ms): %d\n", 2979 bond->params.downdelay * bond->params.miimon); 2980 2981 2982 /* ARP information */ 2983 if(bond->params.arp_interval > 0) { 2984 int printed=0; 2985 seq_printf(seq, "ARP Polling Interval (ms): %d\n", 2986 bond->params.arp_interval); 2987 2988 seq_printf(seq, "ARP IP target/s (n.n.n.n form):"); 2989 2990 for(i = 0; (i < BOND_MAX_ARP_TARGETS) ;i++) { 2991 if (!bond->params.arp_targets[i]) 2992 continue; 2993 if (printed) 2994 seq_printf(seq, ","); 2995 target = ntohl(bond->params.arp_targets[i]); 2996 seq_printf(seq, " %d.%d.%d.%d", HIPQUAD(target)); 2997 printed = 1; 2998 } 2999 seq_printf(seq, "\n"); 3000 } 3001 3002 if (bond->params.mode == BOND_MODE_8023AD) { 3003 struct ad_info ad_info; 3004 DECLARE_MAC_BUF(mac); 3005 3006 seq_puts(seq, "\n802.3ad info\n"); 3007 seq_printf(seq, "LACP rate: %s\n", 3008 (bond->params.lacp_fast) ? "fast" : "slow"); 3009 3010 if (bond_3ad_get_active_agg_info(bond, &ad_info)) { 3011 seq_printf(seq, "bond %s has no active aggregator\n", 3012 bond->dev->name); 3013 } else { 3014 seq_printf(seq, "Active Aggregator Info:\n"); 3015 3016 seq_printf(seq, "\tAggregator ID: %d\n", 3017 ad_info.aggregator_id); 3018 seq_printf(seq, "\tNumber of ports: %d\n", 3019 ad_info.ports); 3020 seq_printf(seq, "\tActor Key: %d\n", 3021 ad_info.actor_key); 3022 seq_printf(seq, "\tPartner Key: %d\n", 3023 ad_info.partner_key); 3024 seq_printf(seq, "\tPartner Mac Address: %s\n", 3025 print_mac(mac, ad_info.partner_system)); 3026 } 3027 } 3028 } 3029 3030 static void bond_info_show_slave(struct seq_file *seq, const struct slave *slave) 3031 { 3032 struct bonding *bond = seq->private; 3033 DECLARE_MAC_BUF(mac); 3034 3035 seq_printf(seq, "\nSlave Interface: %s\n", slave->dev->name); 3036 seq_printf(seq, "MII Status: %s\n", 3037 (slave->link == BOND_LINK_UP) ? "up" : "down"); 3038 seq_printf(seq, "Link Failure Count: %u\n", 3039 slave->link_failure_count); 3040 3041 seq_printf(seq, 3042 "Permanent HW addr: %s\n", 3043 print_mac(mac, slave->perm_hwaddr)); 3044 3045 if (bond->params.mode == BOND_MODE_8023AD) { 3046 const struct aggregator *agg 3047 = SLAVE_AD_INFO(slave).port.aggregator; 3048 3049 if (agg) { 3050 seq_printf(seq, "Aggregator ID: %d\n", 3051 agg->aggregator_identifier); 3052 } else { 3053 seq_puts(seq, "Aggregator ID: N/A\n"); 3054 } 3055 } 3056 } 3057 3058 static int bond_info_seq_show(struct seq_file *seq, void *v) 3059 { 3060 if (v == SEQ_START_TOKEN) { 3061 seq_printf(seq, "%s\n", version); 3062 bond_info_show_master(seq); 3063 } else { 3064 bond_info_show_slave(seq, v); 3065 } 3066 3067 return 0; 3068 } 3069 3070 static struct seq_operations bond_info_seq_ops = { 3071 .start = bond_info_seq_start, 3072 .next = bond_info_seq_next, 3073 .stop = bond_info_seq_stop, 3074 .show = bond_info_seq_show, 3075 }; 3076 3077 static int bond_info_open(struct inode *inode, struct file *file) 3078 { 3079 struct seq_file *seq; 3080 struct proc_dir_entry *proc; 3081 int res; 3082 3083 res = seq_open(file, &bond_info_seq_ops); 3084 if (!res) { 3085 /* recover the pointer buried in proc_dir_entry data */ 3086 seq = file->private_data; 3087 proc = PDE(inode); 3088 seq->private = proc->data; 3089 } 3090 3091 return res; 3092 } 3093 3094 static const struct file_operations bond_info_fops = { 3095 .owner = THIS_MODULE, 3096 .open = bond_info_open, 3097 .read = seq_read, 3098 .llseek = seq_lseek, 3099 .release = seq_release, 3100 }; 3101 3102 static int bond_create_proc_entry(struct bonding *bond) 3103 { 3104 struct net_device *bond_dev = bond->dev; 3105 3106 if (bond_proc_dir) { 3107 bond->proc_entry = create_proc_entry(bond_dev->name, 3108 S_IRUGO, 3109 bond_proc_dir); 3110 if (bond->proc_entry == NULL) { 3111 printk(KERN_WARNING DRV_NAME 3112 ": Warning: Cannot create /proc/net/%s/%s\n", 3113 DRV_NAME, bond_dev->name); 3114 } else { 3115 bond->proc_entry->data = bond; 3116 bond->proc_entry->proc_fops = &bond_info_fops; 3117 bond->proc_entry->owner = THIS_MODULE; 3118 memcpy(bond->proc_file_name, bond_dev->name, IFNAMSIZ); 3119 } 3120 } 3121 3122 return 0; 3123 } 3124 3125 static void bond_remove_proc_entry(struct bonding *bond) 3126 { 3127 if (bond_proc_dir && bond->proc_entry) { 3128 remove_proc_entry(bond->proc_file_name, bond_proc_dir); 3129 memset(bond->proc_file_name, 0, IFNAMSIZ); 3130 bond->proc_entry = NULL; 3131 } 3132 } 3133 3134 /* Create the bonding directory under /proc/net, if doesn't exist yet. 3135 * Caller must hold rtnl_lock. 3136 */ 3137 static void bond_create_proc_dir(void) 3138 { 3139 int len = strlen(DRV_NAME); 3140 3141 for (bond_proc_dir = init_net.proc_net->subdir; bond_proc_dir; 3142 bond_proc_dir = bond_proc_dir->next) { 3143 if ((bond_proc_dir->namelen == len) && 3144 !memcmp(bond_proc_dir->name, DRV_NAME, len)) { 3145 break; 3146 } 3147 } 3148 3149 if (!bond_proc_dir) { 3150 bond_proc_dir = proc_mkdir(DRV_NAME, init_net.proc_net); 3151 if (bond_proc_dir) { 3152 bond_proc_dir->owner = THIS_MODULE; 3153 } else { 3154 printk(KERN_WARNING DRV_NAME 3155 ": Warning: cannot create /proc/net/%s\n", 3156 DRV_NAME); 3157 } 3158 } 3159 } 3160 3161 /* Destroy the bonding directory under /proc/net, if empty. 3162 * Caller must hold rtnl_lock. 3163 */ 3164 static void bond_destroy_proc_dir(void) 3165 { 3166 struct proc_dir_entry *de; 3167 3168 if (!bond_proc_dir) { 3169 return; 3170 } 3171 3172 /* verify that the /proc dir is empty */ 3173 for (de = bond_proc_dir->subdir; de; de = de->next) { 3174 /* ignore . and .. */ 3175 if (*(de->name) != '.') { 3176 break; 3177 } 3178 } 3179 3180 if (de) { 3181 if (bond_proc_dir->owner == THIS_MODULE) { 3182 bond_proc_dir->owner = NULL; 3183 } 3184 } else { 3185 remove_proc_entry(DRV_NAME, init_net.proc_net); 3186 bond_proc_dir = NULL; 3187 } 3188 } 3189 #endif /* CONFIG_PROC_FS */ 3190 3191 /*-------------------------- netdev event handling --------------------------*/ 3192 3193 /* 3194 * Change device name 3195 */ 3196 static int bond_event_changename(struct bonding *bond) 3197 { 3198 #ifdef CONFIG_PROC_FS 3199 bond_remove_proc_entry(bond); 3200 bond_create_proc_entry(bond); 3201 #endif 3202 down_write(&(bonding_rwsem)); 3203 bond_destroy_sysfs_entry(bond); 3204 bond_create_sysfs_entry(bond); 3205 up_write(&(bonding_rwsem)); 3206 return NOTIFY_DONE; 3207 } 3208 3209 static int bond_master_netdev_event(unsigned long event, struct net_device *bond_dev) 3210 { 3211 struct bonding *event_bond = bond_dev->priv; 3212 3213 switch (event) { 3214 case NETDEV_CHANGENAME: 3215 return bond_event_changename(event_bond); 3216 case NETDEV_UNREGISTER: 3217 /* 3218 * TODO: remove a bond from the list? 3219 */ 3220 break; 3221 default: 3222 break; 3223 } 3224 3225 return NOTIFY_DONE; 3226 } 3227 3228 static int bond_slave_netdev_event(unsigned long event, struct net_device *slave_dev) 3229 { 3230 struct net_device *bond_dev = slave_dev->master; 3231 struct bonding *bond = bond_dev->priv; 3232 3233 switch (event) { 3234 case NETDEV_UNREGISTER: 3235 if (bond_dev) { 3236 bond_release(bond_dev, slave_dev); 3237 } 3238 break; 3239 case NETDEV_CHANGE: 3240 /* 3241 * TODO: is this what we get if somebody 3242 * sets up a hierarchical bond, then rmmod's 3243 * one of the slave bonding devices? 3244 */ 3245 break; 3246 case NETDEV_DOWN: 3247 /* 3248 * ... Or is it this? 3249 */ 3250 break; 3251 case NETDEV_CHANGEMTU: 3252 /* 3253 * TODO: Should slaves be allowed to 3254 * independently alter their MTU? For 3255 * an active-backup bond, slaves need 3256 * not be the same type of device, so 3257 * MTUs may vary. For other modes, 3258 * slaves arguably should have the 3259 * same MTUs. To do this, we'd need to 3260 * take over the slave's change_mtu 3261 * function for the duration of their 3262 * servitude. 3263 */ 3264 break; 3265 case NETDEV_CHANGENAME: 3266 /* 3267 * TODO: handle changing the primary's name 3268 */ 3269 break; 3270 case NETDEV_FEAT_CHANGE: 3271 bond_compute_features(bond); 3272 break; 3273 default: 3274 break; 3275 } 3276 3277 return NOTIFY_DONE; 3278 } 3279 3280 /* 3281 * bond_netdev_event: handle netdev notifier chain events. 3282 * 3283 * This function receives events for the netdev chain. The caller (an 3284 * ioctl handler calling blocking_notifier_call_chain) holds the necessary 3285 * locks for us to safely manipulate the slave devices (RTNL lock, 3286 * dev_probe_lock). 3287 */ 3288 static int bond_netdev_event(struct notifier_block *this, unsigned long event, void *ptr) 3289 { 3290 struct net_device *event_dev = (struct net_device *)ptr; 3291 3292 if (event_dev->nd_net != &init_net) 3293 return NOTIFY_DONE; 3294 3295 dprintk("event_dev: %s, event: %lx\n", 3296 (event_dev ? event_dev->name : "None"), 3297 event); 3298 3299 if (!(event_dev->priv_flags & IFF_BONDING)) 3300 return NOTIFY_DONE; 3301 3302 if (event_dev->flags & IFF_MASTER) { 3303 dprintk("IFF_MASTER\n"); 3304 return bond_master_netdev_event(event, event_dev); 3305 } 3306 3307 if (event_dev->flags & IFF_SLAVE) { 3308 dprintk("IFF_SLAVE\n"); 3309 return bond_slave_netdev_event(event, event_dev); 3310 } 3311 3312 return NOTIFY_DONE; 3313 } 3314 3315 /* 3316 * bond_inetaddr_event: handle inetaddr notifier chain events. 3317 * 3318 * We keep track of device IPs primarily to use as source addresses in 3319 * ARP monitor probes (rather than spewing out broadcasts all the time). 3320 * 3321 * We track one IP for the main device (if it has one), plus one per VLAN. 3322 */ 3323 static int bond_inetaddr_event(struct notifier_block *this, unsigned long event, void *ptr) 3324 { 3325 struct in_ifaddr *ifa = ptr; 3326 struct net_device *vlan_dev, *event_dev = ifa->ifa_dev->dev; 3327 struct bonding *bond, *bond_next; 3328 struct vlan_entry *vlan, *vlan_next; 3329 3330 list_for_each_entry_safe(bond, bond_next, &bond_dev_list, bond_list) { 3331 if (bond->dev == event_dev) { 3332 switch (event) { 3333 case NETDEV_UP: 3334 bond->master_ip = ifa->ifa_local; 3335 return NOTIFY_OK; 3336 case NETDEV_DOWN: 3337 bond->master_ip = bond_glean_dev_ip(bond->dev); 3338 return NOTIFY_OK; 3339 default: 3340 return NOTIFY_DONE; 3341 } 3342 } 3343 3344 if (list_empty(&bond->vlan_list)) 3345 continue; 3346 3347 list_for_each_entry_safe(vlan, vlan_next, &bond->vlan_list, 3348 vlan_list) { 3349 vlan_dev = vlan_group_get_device(bond->vlgrp, vlan->vlan_id); 3350 if (vlan_dev == event_dev) { 3351 switch (event) { 3352 case NETDEV_UP: 3353 vlan->vlan_ip = ifa->ifa_local; 3354 return NOTIFY_OK; 3355 case NETDEV_DOWN: 3356 vlan->vlan_ip = 3357 bond_glean_dev_ip(vlan_dev); 3358 return NOTIFY_OK; 3359 default: 3360 return NOTIFY_DONE; 3361 } 3362 } 3363 } 3364 } 3365 return NOTIFY_DONE; 3366 } 3367 3368 static struct notifier_block bond_netdev_notifier = { 3369 .notifier_call = bond_netdev_event, 3370 }; 3371 3372 static struct notifier_block bond_inetaddr_notifier = { 3373 .notifier_call = bond_inetaddr_event, 3374 }; 3375 3376 /*-------------------------- Packet type handling ---------------------------*/ 3377 3378 /* register to receive lacpdus on a bond */ 3379 static void bond_register_lacpdu(struct bonding *bond) 3380 { 3381 struct packet_type *pk_type = &(BOND_AD_INFO(bond).ad_pkt_type); 3382 3383 /* initialize packet type */ 3384 pk_type->type = PKT_TYPE_LACPDU; 3385 pk_type->dev = bond->dev; 3386 pk_type->func = bond_3ad_lacpdu_recv; 3387 3388 dev_add_pack(pk_type); 3389 } 3390 3391 /* unregister to receive lacpdus on a bond */ 3392 static void bond_unregister_lacpdu(struct bonding *bond) 3393 { 3394 dev_remove_pack(&(BOND_AD_INFO(bond).ad_pkt_type)); 3395 } 3396 3397 void bond_register_arp(struct bonding *bond) 3398 { 3399 struct packet_type *pt = &bond->arp_mon_pt; 3400 3401 if (pt->type) 3402 return; 3403 3404 pt->type = htons(ETH_P_ARP); 3405 pt->dev = bond->dev; 3406 pt->func = bond_arp_rcv; 3407 dev_add_pack(pt); 3408 } 3409 3410 void bond_unregister_arp(struct bonding *bond) 3411 { 3412 struct packet_type *pt = &bond->arp_mon_pt; 3413 3414 dev_remove_pack(pt); 3415 pt->type = 0; 3416 } 3417 3418 /*---------------------------- Hashing Policies -----------------------------*/ 3419 3420 /* 3421 * Hash for the output device based upon layer 3 and layer 4 data. If 3422 * the packet is a frag or not TCP or UDP, just use layer 3 data. If it is 3423 * altogether not IP, mimic bond_xmit_hash_policy_l2() 3424 */ 3425 static int bond_xmit_hash_policy_l34(struct sk_buff *skb, 3426 struct net_device *bond_dev, int count) 3427 { 3428 struct ethhdr *data = (struct ethhdr *)skb->data; 3429 struct iphdr *iph = ip_hdr(skb); 3430 __be16 *layer4hdr = (__be16 *)((u32 *)iph + iph->ihl); 3431 int layer4_xor = 0; 3432 3433 if (skb->protocol == __constant_htons(ETH_P_IP)) { 3434 if (!(iph->frag_off & __constant_htons(IP_MF|IP_OFFSET)) && 3435 (iph->protocol == IPPROTO_TCP || 3436 iph->protocol == IPPROTO_UDP)) { 3437 layer4_xor = ntohs((*layer4hdr ^ *(layer4hdr + 1))); 3438 } 3439 return (layer4_xor ^ 3440 ((ntohl(iph->saddr ^ iph->daddr)) & 0xffff)) % count; 3441 3442 } 3443 3444 return (data->h_dest[5] ^ bond_dev->dev_addr[5]) % count; 3445 } 3446 3447 /* 3448 * Hash for the output device based upon layer 2 data 3449 */ 3450 static int bond_xmit_hash_policy_l2(struct sk_buff *skb, 3451 struct net_device *bond_dev, int count) 3452 { 3453 struct ethhdr *data = (struct ethhdr *)skb->data; 3454 3455 return (data->h_dest[5] ^ bond_dev->dev_addr[5]) % count; 3456 } 3457 3458 /*-------------------------- Device entry points ----------------------------*/ 3459 3460 static int bond_open(struct net_device *bond_dev) 3461 { 3462 struct bonding *bond = bond_dev->priv; 3463 struct timer_list *mii_timer = &bond->mii_timer; 3464 struct timer_list *arp_timer = &bond->arp_timer; 3465 3466 bond->kill_timers = 0; 3467 3468 if ((bond->params.mode == BOND_MODE_TLB) || 3469 (bond->params.mode == BOND_MODE_ALB)) { 3470 struct timer_list *alb_timer = &(BOND_ALB_INFO(bond).alb_timer); 3471 3472 /* bond_alb_initialize must be called before the timer 3473 * is started. 3474 */ 3475 if (bond_alb_initialize(bond, (bond->params.mode == BOND_MODE_ALB))) { 3476 /* something went wrong - fail the open operation */ 3477 return -1; 3478 } 3479 3480 init_timer(alb_timer); 3481 alb_timer->expires = jiffies + 1; 3482 alb_timer->data = (unsigned long)bond; 3483 alb_timer->function = (void *)&bond_alb_monitor; 3484 add_timer(alb_timer); 3485 } 3486 3487 if (bond->params.miimon) { /* link check interval, in milliseconds. */ 3488 init_timer(mii_timer); 3489 mii_timer->expires = jiffies + 1; 3490 mii_timer->data = (unsigned long)bond_dev; 3491 mii_timer->function = (void *)&bond_mii_monitor; 3492 add_timer(mii_timer); 3493 } 3494 3495 if (bond->params.arp_interval) { /* arp interval, in milliseconds. */ 3496 init_timer(arp_timer); 3497 arp_timer->expires = jiffies + 1; 3498 arp_timer->data = (unsigned long)bond_dev; 3499 if (bond->params.mode == BOND_MODE_ACTIVEBACKUP) { 3500 arp_timer->function = (void *)&bond_activebackup_arp_mon; 3501 } else { 3502 arp_timer->function = (void *)&bond_loadbalance_arp_mon; 3503 } 3504 if (bond->params.arp_validate) 3505 bond_register_arp(bond); 3506 3507 add_timer(arp_timer); 3508 } 3509 3510 if (bond->params.mode == BOND_MODE_8023AD) { 3511 struct timer_list *ad_timer = &(BOND_AD_INFO(bond).ad_timer); 3512 init_timer(ad_timer); 3513 ad_timer->expires = jiffies + 1; 3514 ad_timer->data = (unsigned long)bond; 3515 ad_timer->function = (void *)&bond_3ad_state_machine_handler; 3516 add_timer(ad_timer); 3517 3518 /* register to receive LACPDUs */ 3519 bond_register_lacpdu(bond); 3520 } 3521 3522 return 0; 3523 } 3524 3525 static int bond_close(struct net_device *bond_dev) 3526 { 3527 struct bonding *bond = bond_dev->priv; 3528 3529 if (bond->params.mode == BOND_MODE_8023AD) { 3530 /* Unregister the receive of LACPDUs */ 3531 bond_unregister_lacpdu(bond); 3532 } 3533 3534 if (bond->params.arp_validate) 3535 bond_unregister_arp(bond); 3536 3537 write_lock_bh(&bond->lock); 3538 3539 3540 /* signal timers not to re-arm */ 3541 bond->kill_timers = 1; 3542 3543 write_unlock_bh(&bond->lock); 3544 3545 /* del_timer_sync must run without holding the bond->lock 3546 * because a running timer might be trying to hold it too 3547 */ 3548 3549 if (bond->params.miimon) { /* link check interval, in milliseconds. */ 3550 del_timer_sync(&bond->mii_timer); 3551 } 3552 3553 if (bond->params.arp_interval) { /* arp interval, in milliseconds. */ 3554 del_timer_sync(&bond->arp_timer); 3555 } 3556 3557 switch (bond->params.mode) { 3558 case BOND_MODE_8023AD: 3559 del_timer_sync(&(BOND_AD_INFO(bond).ad_timer)); 3560 break; 3561 case BOND_MODE_TLB: 3562 case BOND_MODE_ALB: 3563 del_timer_sync(&(BOND_ALB_INFO(bond).alb_timer)); 3564 break; 3565 default: 3566 break; 3567 } 3568 3569 3570 if ((bond->params.mode == BOND_MODE_TLB) || 3571 (bond->params.mode == BOND_MODE_ALB)) { 3572 /* Must be called only after all 3573 * slaves have been released 3574 */ 3575 bond_alb_deinitialize(bond); 3576 } 3577 3578 return 0; 3579 } 3580 3581 static struct net_device_stats *bond_get_stats(struct net_device *bond_dev) 3582 { 3583 struct bonding *bond = bond_dev->priv; 3584 struct net_device_stats *stats = &(bond->stats), *sstats; 3585 struct slave *slave; 3586 int i; 3587 3588 memset(stats, 0, sizeof(struct net_device_stats)); 3589 3590 read_lock_bh(&bond->lock); 3591 3592 bond_for_each_slave(bond, slave, i) { 3593 sstats = slave->dev->get_stats(slave->dev); 3594 stats->rx_packets += sstats->rx_packets; 3595 stats->rx_bytes += sstats->rx_bytes; 3596 stats->rx_errors += sstats->rx_errors; 3597 stats->rx_dropped += sstats->rx_dropped; 3598 3599 stats->tx_packets += sstats->tx_packets; 3600 stats->tx_bytes += sstats->tx_bytes; 3601 stats->tx_errors += sstats->tx_errors; 3602 stats->tx_dropped += sstats->tx_dropped; 3603 3604 stats->multicast += sstats->multicast; 3605 stats->collisions += sstats->collisions; 3606 3607 stats->rx_length_errors += sstats->rx_length_errors; 3608 stats->rx_over_errors += sstats->rx_over_errors; 3609 stats->rx_crc_errors += sstats->rx_crc_errors; 3610 stats->rx_frame_errors += sstats->rx_frame_errors; 3611 stats->rx_fifo_errors += sstats->rx_fifo_errors; 3612 stats->rx_missed_errors += sstats->rx_missed_errors; 3613 3614 stats->tx_aborted_errors += sstats->tx_aborted_errors; 3615 stats->tx_carrier_errors += sstats->tx_carrier_errors; 3616 stats->tx_fifo_errors += sstats->tx_fifo_errors; 3617 stats->tx_heartbeat_errors += sstats->tx_heartbeat_errors; 3618 stats->tx_window_errors += sstats->tx_window_errors; 3619 } 3620 3621 read_unlock_bh(&bond->lock); 3622 3623 return stats; 3624 } 3625 3626 static int bond_do_ioctl(struct net_device *bond_dev, struct ifreq *ifr, int cmd) 3627 { 3628 struct net_device *slave_dev = NULL; 3629 struct ifbond k_binfo; 3630 struct ifbond __user *u_binfo = NULL; 3631 struct ifslave k_sinfo; 3632 struct ifslave __user *u_sinfo = NULL; 3633 struct mii_ioctl_data *mii = NULL; 3634 int res = 0; 3635 3636 dprintk("bond_ioctl: master=%s, cmd=%d\n", 3637 bond_dev->name, cmd); 3638 3639 switch (cmd) { 3640 case SIOCGMIIPHY: 3641 mii = if_mii(ifr); 3642 if (!mii) { 3643 return -EINVAL; 3644 } 3645 mii->phy_id = 0; 3646 /* Fall Through */ 3647 case SIOCGMIIREG: 3648 /* 3649 * We do this again just in case we were called by SIOCGMIIREG 3650 * instead of SIOCGMIIPHY. 3651 */ 3652 mii = if_mii(ifr); 3653 if (!mii) { 3654 return -EINVAL; 3655 } 3656 3657 if (mii->reg_num == 1) { 3658 struct bonding *bond = bond_dev->priv; 3659 mii->val_out = 0; 3660 read_lock_bh(&bond->lock); 3661 read_lock(&bond->curr_slave_lock); 3662 if (netif_carrier_ok(bond->dev)) { 3663 mii->val_out = BMSR_LSTATUS; 3664 } 3665 read_unlock(&bond->curr_slave_lock); 3666 read_unlock_bh(&bond->lock); 3667 } 3668 3669 return 0; 3670 case BOND_INFO_QUERY_OLD: 3671 case SIOCBONDINFOQUERY: 3672 u_binfo = (struct ifbond __user *)ifr->ifr_data; 3673 3674 if (copy_from_user(&k_binfo, u_binfo, sizeof(ifbond))) { 3675 return -EFAULT; 3676 } 3677 3678 res = bond_info_query(bond_dev, &k_binfo); 3679 if (res == 0) { 3680 if (copy_to_user(u_binfo, &k_binfo, sizeof(ifbond))) { 3681 return -EFAULT; 3682 } 3683 } 3684 3685 return res; 3686 case BOND_SLAVE_INFO_QUERY_OLD: 3687 case SIOCBONDSLAVEINFOQUERY: 3688 u_sinfo = (struct ifslave __user *)ifr->ifr_data; 3689 3690 if (copy_from_user(&k_sinfo, u_sinfo, sizeof(ifslave))) { 3691 return -EFAULT; 3692 } 3693 3694 res = bond_slave_info_query(bond_dev, &k_sinfo); 3695 if (res == 0) { 3696 if (copy_to_user(u_sinfo, &k_sinfo, sizeof(ifslave))) { 3697 return -EFAULT; 3698 } 3699 } 3700 3701 return res; 3702 default: 3703 /* Go on */ 3704 break; 3705 } 3706 3707 if (!capable(CAP_NET_ADMIN)) { 3708 return -EPERM; 3709 } 3710 3711 down_write(&(bonding_rwsem)); 3712 slave_dev = dev_get_by_name(&init_net, ifr->ifr_slave); 3713 3714 dprintk("slave_dev=%p: \n", slave_dev); 3715 3716 if (!slave_dev) { 3717 res = -ENODEV; 3718 } else { 3719 dprintk("slave_dev->name=%s: \n", slave_dev->name); 3720 switch (cmd) { 3721 case BOND_ENSLAVE_OLD: 3722 case SIOCBONDENSLAVE: 3723 res = bond_enslave(bond_dev, slave_dev); 3724 break; 3725 case BOND_RELEASE_OLD: 3726 case SIOCBONDRELEASE: 3727 res = bond_release(bond_dev, slave_dev); 3728 break; 3729 case BOND_SETHWADDR_OLD: 3730 case SIOCBONDSETHWADDR: 3731 res = bond_sethwaddr(bond_dev, slave_dev); 3732 break; 3733 case BOND_CHANGE_ACTIVE_OLD: 3734 case SIOCBONDCHANGEACTIVE: 3735 res = bond_ioctl_change_active(bond_dev, slave_dev); 3736 break; 3737 default: 3738 res = -EOPNOTSUPP; 3739 } 3740 3741 dev_put(slave_dev); 3742 } 3743 3744 up_write(&(bonding_rwsem)); 3745 return res; 3746 } 3747 3748 static void bond_set_multicast_list(struct net_device *bond_dev) 3749 { 3750 struct bonding *bond = bond_dev->priv; 3751 struct dev_mc_list *dmi; 3752 3753 write_lock_bh(&bond->lock); 3754 3755 /* 3756 * Do promisc before checking multicast_mode 3757 */ 3758 if ((bond_dev->flags & IFF_PROMISC) && !(bond->flags & IFF_PROMISC)) { 3759 bond_set_promiscuity(bond, 1); 3760 } 3761 3762 if (!(bond_dev->flags & IFF_PROMISC) && (bond->flags & IFF_PROMISC)) { 3763 bond_set_promiscuity(bond, -1); 3764 } 3765 3766 /* set allmulti flag to slaves */ 3767 if ((bond_dev->flags & IFF_ALLMULTI) && !(bond->flags & IFF_ALLMULTI)) { 3768 bond_set_allmulti(bond, 1); 3769 } 3770 3771 if (!(bond_dev->flags & IFF_ALLMULTI) && (bond->flags & IFF_ALLMULTI)) { 3772 bond_set_allmulti(bond, -1); 3773 } 3774 3775 bond->flags = bond_dev->flags; 3776 3777 /* looking for addresses to add to slaves' mc list */ 3778 for (dmi = bond_dev->mc_list; dmi; dmi = dmi->next) { 3779 if (!bond_mc_list_find_dmi(dmi, bond->mc_list)) { 3780 bond_mc_add(bond, dmi->dmi_addr, dmi->dmi_addrlen); 3781 } 3782 } 3783 3784 /* looking for addresses to delete from slaves' list */ 3785 for (dmi = bond->mc_list; dmi; dmi = dmi->next) { 3786 if (!bond_mc_list_find_dmi(dmi, bond_dev->mc_list)) { 3787 bond_mc_delete(bond, dmi->dmi_addr, dmi->dmi_addrlen); 3788 } 3789 } 3790 3791 /* save master's multicast list */ 3792 bond_mc_list_destroy(bond); 3793 bond_mc_list_copy(bond_dev->mc_list, bond, GFP_ATOMIC); 3794 3795 write_unlock_bh(&bond->lock); 3796 } 3797 3798 /* 3799 * Change the MTU of all of a master's slaves to match the master 3800 */ 3801 static int bond_change_mtu(struct net_device *bond_dev, int new_mtu) 3802 { 3803 struct bonding *bond = bond_dev->priv; 3804 struct slave *slave, *stop_at; 3805 int res = 0; 3806 int i; 3807 3808 dprintk("bond=%p, name=%s, new_mtu=%d\n", bond, 3809 (bond_dev ? bond_dev->name : "None"), new_mtu); 3810 3811 /* Can't hold bond->lock with bh disabled here since 3812 * some base drivers panic. On the other hand we can't 3813 * hold bond->lock without bh disabled because we'll 3814 * deadlock. The only solution is to rely on the fact 3815 * that we're under rtnl_lock here, and the slaves 3816 * list won't change. This doesn't solve the problem 3817 * of setting the slave's MTU while it is 3818 * transmitting, but the assumption is that the base 3819 * driver can handle that. 3820 * 3821 * TODO: figure out a way to safely iterate the slaves 3822 * list, but without holding a lock around the actual 3823 * call to the base driver. 3824 */ 3825 3826 bond_for_each_slave(bond, slave, i) { 3827 dprintk("s %p s->p %p c_m %p\n", slave, 3828 slave->prev, slave->dev->change_mtu); 3829 3830 res = dev_set_mtu(slave->dev, new_mtu); 3831 3832 if (res) { 3833 /* If we failed to set the slave's mtu to the new value 3834 * we must abort the operation even in ACTIVE_BACKUP 3835 * mode, because if we allow the backup slaves to have 3836 * different mtu values than the active slave we'll 3837 * need to change their mtu when doing a failover. That 3838 * means changing their mtu from timer context, which 3839 * is probably not a good idea. 3840 */ 3841 dprintk("err %d %s\n", res, slave->dev->name); 3842 goto unwind; 3843 } 3844 } 3845 3846 bond_dev->mtu = new_mtu; 3847 3848 return 0; 3849 3850 unwind: 3851 /* unwind from head to the slave that failed */ 3852 stop_at = slave; 3853 bond_for_each_slave_from_to(bond, slave, i, bond->first_slave, stop_at) { 3854 int tmp_res; 3855 3856 tmp_res = dev_set_mtu(slave->dev, bond_dev->mtu); 3857 if (tmp_res) { 3858 dprintk("unwind err %d dev %s\n", tmp_res, 3859 slave->dev->name); 3860 } 3861 } 3862 3863 return res; 3864 } 3865 3866 /* 3867 * Change HW address 3868 * 3869 * Note that many devices must be down to change the HW address, and 3870 * downing the master releases all slaves. We can make bonds full of 3871 * bonding devices to test this, however. 3872 */ 3873 static int bond_set_mac_address(struct net_device *bond_dev, void *addr) 3874 { 3875 struct bonding *bond = bond_dev->priv; 3876 struct sockaddr *sa = addr, tmp_sa; 3877 struct slave *slave, *stop_at; 3878 int res = 0; 3879 int i; 3880 3881 dprintk("bond=%p, name=%s\n", bond, (bond_dev ? bond_dev->name : "None")); 3882 3883 if (!is_valid_ether_addr(sa->sa_data)) { 3884 return -EADDRNOTAVAIL; 3885 } 3886 3887 /* Can't hold bond->lock with bh disabled here since 3888 * some base drivers panic. On the other hand we can't 3889 * hold bond->lock without bh disabled because we'll 3890 * deadlock. The only solution is to rely on the fact 3891 * that we're under rtnl_lock here, and the slaves 3892 * list won't change. This doesn't solve the problem 3893 * of setting the slave's hw address while it is 3894 * transmitting, but the assumption is that the base 3895 * driver can handle that. 3896 * 3897 * TODO: figure out a way to safely iterate the slaves 3898 * list, but without holding a lock around the actual 3899 * call to the base driver. 3900 */ 3901 3902 bond_for_each_slave(bond, slave, i) { 3903 dprintk("slave %p %s\n", slave, slave->dev->name); 3904 3905 if (slave->dev->set_mac_address == NULL) { 3906 res = -EOPNOTSUPP; 3907 dprintk("EOPNOTSUPP %s\n", slave->dev->name); 3908 goto unwind; 3909 } 3910 3911 res = dev_set_mac_address(slave->dev, addr); 3912 if (res) { 3913 /* TODO: consider downing the slave 3914 * and retry ? 3915 * User should expect communications 3916 * breakage anyway until ARP finish 3917 * updating, so... 3918 */ 3919 dprintk("err %d %s\n", res, slave->dev->name); 3920 goto unwind; 3921 } 3922 } 3923 3924 /* success */ 3925 memcpy(bond_dev->dev_addr, sa->sa_data, bond_dev->addr_len); 3926 return 0; 3927 3928 unwind: 3929 memcpy(tmp_sa.sa_data, bond_dev->dev_addr, bond_dev->addr_len); 3930 tmp_sa.sa_family = bond_dev->type; 3931 3932 /* unwind from head to the slave that failed */ 3933 stop_at = slave; 3934 bond_for_each_slave_from_to(bond, slave, i, bond->first_slave, stop_at) { 3935 int tmp_res; 3936 3937 tmp_res = dev_set_mac_address(slave->dev, &tmp_sa); 3938 if (tmp_res) { 3939 dprintk("unwind err %d dev %s\n", tmp_res, 3940 slave->dev->name); 3941 } 3942 } 3943 3944 return res; 3945 } 3946 3947 static int bond_xmit_roundrobin(struct sk_buff *skb, struct net_device *bond_dev) 3948 { 3949 struct bonding *bond = bond_dev->priv; 3950 struct slave *slave, *start_at; 3951 int i; 3952 int res = 1; 3953 3954 read_lock(&bond->lock); 3955 3956 if (!BOND_IS_OK(bond)) { 3957 goto out; 3958 } 3959 3960 read_lock(&bond->curr_slave_lock); 3961 slave = start_at = bond->curr_active_slave; 3962 read_unlock(&bond->curr_slave_lock); 3963 3964 if (!slave) { 3965 goto out; 3966 } 3967 3968 bond_for_each_slave_from(bond, slave, i, start_at) { 3969 if (IS_UP(slave->dev) && 3970 (slave->link == BOND_LINK_UP) && 3971 (slave->state == BOND_STATE_ACTIVE)) { 3972 res = bond_dev_queue_xmit(bond, skb, slave->dev); 3973 3974 write_lock(&bond->curr_slave_lock); 3975 bond->curr_active_slave = slave->next; 3976 write_unlock(&bond->curr_slave_lock); 3977 3978 break; 3979 } 3980 } 3981 3982 3983 out: 3984 if (res) { 3985 /* no suitable interface, frame not sent */ 3986 dev_kfree_skb(skb); 3987 } 3988 read_unlock(&bond->lock); 3989 return 0; 3990 } 3991 3992 3993 /* 3994 * in active-backup mode, we know that bond->curr_active_slave is always valid if 3995 * the bond has a usable interface. 3996 */ 3997 static int bond_xmit_activebackup(struct sk_buff *skb, struct net_device *bond_dev) 3998 { 3999 struct bonding *bond = bond_dev->priv; 4000 int res = 1; 4001 4002 read_lock(&bond->lock); 4003 read_lock(&bond->curr_slave_lock); 4004 4005 if (!BOND_IS_OK(bond)) { 4006 goto out; 4007 } 4008 4009 if (!bond->curr_active_slave) 4010 goto out; 4011 4012 res = bond_dev_queue_xmit(bond, skb, bond->curr_active_slave->dev); 4013 4014 out: 4015 if (res) { 4016 /* no suitable interface, frame not sent */ 4017 dev_kfree_skb(skb); 4018 } 4019 read_unlock(&bond->curr_slave_lock); 4020 read_unlock(&bond->lock); 4021 return 0; 4022 } 4023 4024 /* 4025 * In bond_xmit_xor() , we determine the output device by using a pre- 4026 * determined xmit_hash_policy(), If the selected device is not enabled, 4027 * find the next active slave. 4028 */ 4029 static int bond_xmit_xor(struct sk_buff *skb, struct net_device *bond_dev) 4030 { 4031 struct bonding *bond = bond_dev->priv; 4032 struct slave *slave, *start_at; 4033 int slave_no; 4034 int i; 4035 int res = 1; 4036 4037 read_lock(&bond->lock); 4038 4039 if (!BOND_IS_OK(bond)) { 4040 goto out; 4041 } 4042 4043 slave_no = bond->xmit_hash_policy(skb, bond_dev, bond->slave_cnt); 4044 4045 bond_for_each_slave(bond, slave, i) { 4046 slave_no--; 4047 if (slave_no < 0) { 4048 break; 4049 } 4050 } 4051 4052 start_at = slave; 4053 4054 bond_for_each_slave_from(bond, slave, i, start_at) { 4055 if (IS_UP(slave->dev) && 4056 (slave->link == BOND_LINK_UP) && 4057 (slave->state == BOND_STATE_ACTIVE)) { 4058 res = bond_dev_queue_xmit(bond, skb, slave->dev); 4059 break; 4060 } 4061 } 4062 4063 out: 4064 if (res) { 4065 /* no suitable interface, frame not sent */ 4066 dev_kfree_skb(skb); 4067 } 4068 read_unlock(&bond->lock); 4069 return 0; 4070 } 4071 4072 /* 4073 * in broadcast mode, we send everything to all usable interfaces. 4074 */ 4075 static int bond_xmit_broadcast(struct sk_buff *skb, struct net_device *bond_dev) 4076 { 4077 struct bonding *bond = bond_dev->priv; 4078 struct slave *slave, *start_at; 4079 struct net_device *tx_dev = NULL; 4080 int i; 4081 int res = 1; 4082 4083 read_lock(&bond->lock); 4084 4085 if (!BOND_IS_OK(bond)) { 4086 goto out; 4087 } 4088 4089 read_lock(&bond->curr_slave_lock); 4090 start_at = bond->curr_active_slave; 4091 read_unlock(&bond->curr_slave_lock); 4092 4093 if (!start_at) { 4094 goto out; 4095 } 4096 4097 bond_for_each_slave_from(bond, slave, i, start_at) { 4098 if (IS_UP(slave->dev) && 4099 (slave->link == BOND_LINK_UP) && 4100 (slave->state == BOND_STATE_ACTIVE)) { 4101 if (tx_dev) { 4102 struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC); 4103 if (!skb2) { 4104 printk(KERN_ERR DRV_NAME 4105 ": %s: Error: bond_xmit_broadcast(): " 4106 "skb_clone() failed\n", 4107 bond_dev->name); 4108 continue; 4109 } 4110 4111 res = bond_dev_queue_xmit(bond, skb2, tx_dev); 4112 if (res) { 4113 dev_kfree_skb(skb2); 4114 continue; 4115 } 4116 } 4117 tx_dev = slave->dev; 4118 } 4119 } 4120 4121 if (tx_dev) { 4122 res = bond_dev_queue_xmit(bond, skb, tx_dev); 4123 } 4124 4125 out: 4126 if (res) { 4127 /* no suitable interface, frame not sent */ 4128 dev_kfree_skb(skb); 4129 } 4130 /* frame sent to all suitable interfaces */ 4131 read_unlock(&bond->lock); 4132 return 0; 4133 } 4134 4135 /*------------------------- Device initialization ---------------------------*/ 4136 4137 /* 4138 * set bond mode specific net device operations 4139 */ 4140 void bond_set_mode_ops(struct bonding *bond, int mode) 4141 { 4142 struct net_device *bond_dev = bond->dev; 4143 4144 switch (mode) { 4145 case BOND_MODE_ROUNDROBIN: 4146 bond_dev->hard_start_xmit = bond_xmit_roundrobin; 4147 break; 4148 case BOND_MODE_ACTIVEBACKUP: 4149 bond_dev->hard_start_xmit = bond_xmit_activebackup; 4150 break; 4151 case BOND_MODE_XOR: 4152 bond_dev->hard_start_xmit = bond_xmit_xor; 4153 if (bond->params.xmit_policy == BOND_XMIT_POLICY_LAYER34) 4154 bond->xmit_hash_policy = bond_xmit_hash_policy_l34; 4155 else 4156 bond->xmit_hash_policy = bond_xmit_hash_policy_l2; 4157 break; 4158 case BOND_MODE_BROADCAST: 4159 bond_dev->hard_start_xmit = bond_xmit_broadcast; 4160 break; 4161 case BOND_MODE_8023AD: 4162 bond_set_master_3ad_flags(bond); 4163 bond_dev->hard_start_xmit = bond_3ad_xmit_xor; 4164 if (bond->params.xmit_policy == BOND_XMIT_POLICY_LAYER34) 4165 bond->xmit_hash_policy = bond_xmit_hash_policy_l34; 4166 else 4167 bond->xmit_hash_policy = bond_xmit_hash_policy_l2; 4168 break; 4169 case BOND_MODE_ALB: 4170 bond_set_master_alb_flags(bond); 4171 /* FALLTHRU */ 4172 case BOND_MODE_TLB: 4173 bond_dev->hard_start_xmit = bond_alb_xmit; 4174 bond_dev->set_mac_address = bond_alb_set_mac_address; 4175 break; 4176 default: 4177 /* Should never happen, mode already checked */ 4178 printk(KERN_ERR DRV_NAME 4179 ": %s: Error: Unknown bonding mode %d\n", 4180 bond_dev->name, 4181 mode); 4182 break; 4183 } 4184 } 4185 4186 static void bond_ethtool_get_drvinfo(struct net_device *bond_dev, 4187 struct ethtool_drvinfo *drvinfo) 4188 { 4189 strncpy(drvinfo->driver, DRV_NAME, 32); 4190 strncpy(drvinfo->version, DRV_VERSION, 32); 4191 snprintf(drvinfo->fw_version, 32, "%d", BOND_ABI_VERSION); 4192 } 4193 4194 static const struct ethtool_ops bond_ethtool_ops = { 4195 .get_drvinfo = bond_ethtool_get_drvinfo, 4196 }; 4197 4198 /* 4199 * Does not allocate but creates a /proc entry. 4200 * Allowed to fail. 4201 */ 4202 static int bond_init(struct net_device *bond_dev, struct bond_params *params) 4203 { 4204 struct bonding *bond = bond_dev->priv; 4205 4206 dprintk("Begin bond_init for %s\n", bond_dev->name); 4207 4208 /* initialize rwlocks */ 4209 rwlock_init(&bond->lock); 4210 rwlock_init(&bond->curr_slave_lock); 4211 4212 bond->params = *params; /* copy params struct */ 4213 4214 /* Initialize pointers */ 4215 bond->first_slave = NULL; 4216 bond->curr_active_slave = NULL; 4217 bond->current_arp_slave = NULL; 4218 bond->primary_slave = NULL; 4219 bond->dev = bond_dev; 4220 INIT_LIST_HEAD(&bond->vlan_list); 4221 4222 /* Initialize the device entry points */ 4223 bond_dev->open = bond_open; 4224 bond_dev->stop = bond_close; 4225 bond_dev->get_stats = bond_get_stats; 4226 bond_dev->do_ioctl = bond_do_ioctl; 4227 bond_dev->ethtool_ops = &bond_ethtool_ops; 4228 bond_dev->set_multicast_list = bond_set_multicast_list; 4229 bond_dev->change_mtu = bond_change_mtu; 4230 bond_dev->set_mac_address = bond_set_mac_address; 4231 4232 bond_set_mode_ops(bond, bond->params.mode); 4233 4234 bond_dev->destructor = free_netdev; 4235 4236 /* Initialize the device options */ 4237 bond_dev->tx_queue_len = 0; 4238 bond_dev->flags |= IFF_MASTER|IFF_MULTICAST; 4239 bond_dev->priv_flags |= IFF_BONDING; 4240 4241 /* At first, we block adding VLANs. That's the only way to 4242 * prevent problems that occur when adding VLANs over an 4243 * empty bond. The block will be removed once non-challenged 4244 * slaves are enslaved. 4245 */ 4246 bond_dev->features |= NETIF_F_VLAN_CHALLENGED; 4247 4248 /* don't acquire bond device's netif_tx_lock when 4249 * transmitting */ 4250 bond_dev->features |= NETIF_F_LLTX; 4251 4252 /* By default, we declare the bond to be fully 4253 * VLAN hardware accelerated capable. Special 4254 * care is taken in the various xmit functions 4255 * when there are slaves that are not hw accel 4256 * capable 4257 */ 4258 bond_dev->vlan_rx_register = bond_vlan_rx_register; 4259 bond_dev->vlan_rx_add_vid = bond_vlan_rx_add_vid; 4260 bond_dev->vlan_rx_kill_vid = bond_vlan_rx_kill_vid; 4261 bond_dev->features |= (NETIF_F_HW_VLAN_TX | 4262 NETIF_F_HW_VLAN_RX | 4263 NETIF_F_HW_VLAN_FILTER); 4264 4265 #ifdef CONFIG_PROC_FS 4266 bond_create_proc_entry(bond); 4267 #endif 4268 4269 list_add_tail(&bond->bond_list, &bond_dev_list); 4270 4271 return 0; 4272 } 4273 4274 /* De-initialize device specific data. 4275 * Caller must hold rtnl_lock. 4276 */ 4277 void bond_deinit(struct net_device *bond_dev) 4278 { 4279 struct bonding *bond = bond_dev->priv; 4280 4281 list_del(&bond->bond_list); 4282 4283 #ifdef CONFIG_PROC_FS 4284 bond_remove_proc_entry(bond); 4285 #endif 4286 } 4287 4288 /* Unregister and free all bond devices. 4289 * Caller must hold rtnl_lock. 4290 */ 4291 static void bond_free_all(void) 4292 { 4293 struct bonding *bond, *nxt; 4294 4295 list_for_each_entry_safe(bond, nxt, &bond_dev_list, bond_list) { 4296 struct net_device *bond_dev = bond->dev; 4297 4298 bond_mc_list_destroy(bond); 4299 /* Release the bonded slaves */ 4300 bond_release_all(bond_dev); 4301 bond_deinit(bond_dev); 4302 unregister_netdevice(bond_dev); 4303 } 4304 4305 #ifdef CONFIG_PROC_FS 4306 bond_destroy_proc_dir(); 4307 #endif 4308 } 4309 4310 /*------------------------- Module initialization ---------------------------*/ 4311 4312 /* 4313 * Convert string input module parms. Accept either the 4314 * number of the mode or its string name. 4315 */ 4316 int bond_parse_parm(char *mode_arg, struct bond_parm_tbl *tbl) 4317 { 4318 int i; 4319 4320 for (i = 0; tbl[i].modename; i++) { 4321 if ((isdigit(*mode_arg) && 4322 tbl[i].mode == simple_strtol(mode_arg, NULL, 0)) || 4323 (strncmp(mode_arg, tbl[i].modename, 4324 strlen(tbl[i].modename)) == 0)) { 4325 return tbl[i].mode; 4326 } 4327 } 4328 4329 return -1; 4330 } 4331 4332 static int bond_check_params(struct bond_params *params) 4333 { 4334 int arp_validate_value; 4335 4336 /* 4337 * Convert string parameters. 4338 */ 4339 if (mode) { 4340 bond_mode = bond_parse_parm(mode, bond_mode_tbl); 4341 if (bond_mode == -1) { 4342 printk(KERN_ERR DRV_NAME 4343 ": Error: Invalid bonding mode \"%s\"\n", 4344 mode == NULL ? "NULL" : mode); 4345 return -EINVAL; 4346 } 4347 } 4348 4349 if (xmit_hash_policy) { 4350 if ((bond_mode != BOND_MODE_XOR) && 4351 (bond_mode != BOND_MODE_8023AD)) { 4352 printk(KERN_INFO DRV_NAME 4353 ": xor_mode param is irrelevant in mode %s\n", 4354 bond_mode_name(bond_mode)); 4355 } else { 4356 xmit_hashtype = bond_parse_parm(xmit_hash_policy, 4357 xmit_hashtype_tbl); 4358 if (xmit_hashtype == -1) { 4359 printk(KERN_ERR DRV_NAME 4360 ": Error: Invalid xmit_hash_policy \"%s\"\n", 4361 xmit_hash_policy == NULL ? "NULL" : 4362 xmit_hash_policy); 4363 return -EINVAL; 4364 } 4365 } 4366 } 4367 4368 if (lacp_rate) { 4369 if (bond_mode != BOND_MODE_8023AD) { 4370 printk(KERN_INFO DRV_NAME 4371 ": lacp_rate param is irrelevant in mode %s\n", 4372 bond_mode_name(bond_mode)); 4373 } else { 4374 lacp_fast = bond_parse_parm(lacp_rate, bond_lacp_tbl); 4375 if (lacp_fast == -1) { 4376 printk(KERN_ERR DRV_NAME 4377 ": Error: Invalid lacp rate \"%s\"\n", 4378 lacp_rate == NULL ? "NULL" : lacp_rate); 4379 return -EINVAL; 4380 } 4381 } 4382 } 4383 4384 if (max_bonds < 1 || max_bonds > INT_MAX) { 4385 printk(KERN_WARNING DRV_NAME 4386 ": Warning: max_bonds (%d) not in range %d-%d, so it " 4387 "was reset to BOND_DEFAULT_MAX_BONDS (%d)\n", 4388 max_bonds, 1, INT_MAX, BOND_DEFAULT_MAX_BONDS); 4389 max_bonds = BOND_DEFAULT_MAX_BONDS; 4390 } 4391 4392 if (miimon < 0) { 4393 printk(KERN_WARNING DRV_NAME 4394 ": Warning: miimon module parameter (%d), " 4395 "not in range 0-%d, so it was reset to %d\n", 4396 miimon, INT_MAX, BOND_LINK_MON_INTERV); 4397 miimon = BOND_LINK_MON_INTERV; 4398 } 4399 4400 if (updelay < 0) { 4401 printk(KERN_WARNING DRV_NAME 4402 ": Warning: updelay module parameter (%d), " 4403 "not in range 0-%d, so it was reset to 0\n", 4404 updelay, INT_MAX); 4405 updelay = 0; 4406 } 4407 4408 if (downdelay < 0) { 4409 printk(KERN_WARNING DRV_NAME 4410 ": Warning: downdelay module parameter (%d), " 4411 "not in range 0-%d, so it was reset to 0\n", 4412 downdelay, INT_MAX); 4413 downdelay = 0; 4414 } 4415 4416 if ((use_carrier != 0) && (use_carrier != 1)) { 4417 printk(KERN_WARNING DRV_NAME 4418 ": Warning: use_carrier module parameter (%d), " 4419 "not of valid value (0/1), so it was set to 1\n", 4420 use_carrier); 4421 use_carrier = 1; 4422 } 4423 4424 /* reset values for 802.3ad */ 4425 if (bond_mode == BOND_MODE_8023AD) { 4426 if (!miimon) { 4427 printk(KERN_WARNING DRV_NAME 4428 ": Warning: miimon must be specified, " 4429 "otherwise bonding will not detect link " 4430 "failure, speed and duplex which are " 4431 "essential for 802.3ad operation\n"); 4432 printk(KERN_WARNING "Forcing miimon to 100msec\n"); 4433 miimon = 100; 4434 } 4435 } 4436 4437 /* reset values for TLB/ALB */ 4438 if ((bond_mode == BOND_MODE_TLB) || 4439 (bond_mode == BOND_MODE_ALB)) { 4440 if (!miimon) { 4441 printk(KERN_WARNING DRV_NAME 4442 ": Warning: miimon must be specified, " 4443 "otherwise bonding will not detect link " 4444 "failure and link speed which are essential " 4445 "for TLB/ALB load balancing\n"); 4446 printk(KERN_WARNING "Forcing miimon to 100msec\n"); 4447 miimon = 100; 4448 } 4449 } 4450 4451 if (bond_mode == BOND_MODE_ALB) { 4452 printk(KERN_NOTICE DRV_NAME 4453 ": In ALB mode you might experience client " 4454 "disconnections upon reconnection of a link if the " 4455 "bonding module updelay parameter (%d msec) is " 4456 "incompatible with the forwarding delay time of the " 4457 "switch\n", 4458 updelay); 4459 } 4460 4461 if (!miimon) { 4462 if (updelay || downdelay) { 4463 /* just warn the user the up/down delay will have 4464 * no effect since miimon is zero... 4465 */ 4466 printk(KERN_WARNING DRV_NAME 4467 ": Warning: miimon module parameter not set " 4468 "and updelay (%d) or downdelay (%d) module " 4469 "parameter is set; updelay and downdelay have " 4470 "no effect unless miimon is set\n", 4471 updelay, downdelay); 4472 } 4473 } else { 4474 /* don't allow arp monitoring */ 4475 if (arp_interval) { 4476 printk(KERN_WARNING DRV_NAME 4477 ": Warning: miimon (%d) and arp_interval (%d) " 4478 "can't be used simultaneously, disabling ARP " 4479 "monitoring\n", 4480 miimon, arp_interval); 4481 arp_interval = 0; 4482 } 4483 4484 if ((updelay % miimon) != 0) { 4485 printk(KERN_WARNING DRV_NAME 4486 ": Warning: updelay (%d) is not a multiple " 4487 "of miimon (%d), updelay rounded to %d ms\n", 4488 updelay, miimon, (updelay / miimon) * miimon); 4489 } 4490 4491 updelay /= miimon; 4492 4493 if ((downdelay % miimon) != 0) { 4494 printk(KERN_WARNING DRV_NAME 4495 ": Warning: downdelay (%d) is not a multiple " 4496 "of miimon (%d), downdelay rounded to %d ms\n", 4497 downdelay, miimon, 4498 (downdelay / miimon) * miimon); 4499 } 4500 4501 downdelay /= miimon; 4502 } 4503 4504 if (arp_interval < 0) { 4505 printk(KERN_WARNING DRV_NAME 4506 ": Warning: arp_interval module parameter (%d) " 4507 ", not in range 0-%d, so it was reset to %d\n", 4508 arp_interval, INT_MAX, BOND_LINK_ARP_INTERV); 4509 arp_interval = BOND_LINK_ARP_INTERV; 4510 } 4511 4512 for (arp_ip_count = 0; 4513 (arp_ip_count < BOND_MAX_ARP_TARGETS) && arp_ip_target[arp_ip_count]; 4514 arp_ip_count++) { 4515 /* not complete check, but should be good enough to 4516 catch mistakes */ 4517 if (!isdigit(arp_ip_target[arp_ip_count][0])) { 4518 printk(KERN_WARNING DRV_NAME 4519 ": Warning: bad arp_ip_target module parameter " 4520 "(%s), ARP monitoring will not be performed\n", 4521 arp_ip_target[arp_ip_count]); 4522 arp_interval = 0; 4523 } else { 4524 __be32 ip = in_aton(arp_ip_target[arp_ip_count]); 4525 arp_target[arp_ip_count] = ip; 4526 } 4527 } 4528 4529 if (arp_interval && !arp_ip_count) { 4530 /* don't allow arping if no arp_ip_target given... */ 4531 printk(KERN_WARNING DRV_NAME 4532 ": Warning: arp_interval module parameter (%d) " 4533 "specified without providing an arp_ip_target " 4534 "parameter, arp_interval was reset to 0\n", 4535 arp_interval); 4536 arp_interval = 0; 4537 } 4538 4539 if (arp_validate) { 4540 if (bond_mode != BOND_MODE_ACTIVEBACKUP) { 4541 printk(KERN_ERR DRV_NAME 4542 ": arp_validate only supported in active-backup mode\n"); 4543 return -EINVAL; 4544 } 4545 if (!arp_interval) { 4546 printk(KERN_ERR DRV_NAME 4547 ": arp_validate requires arp_interval\n"); 4548 return -EINVAL; 4549 } 4550 4551 arp_validate_value = bond_parse_parm(arp_validate, 4552 arp_validate_tbl); 4553 if (arp_validate_value == -1) { 4554 printk(KERN_ERR DRV_NAME 4555 ": Error: invalid arp_validate \"%s\"\n", 4556 arp_validate == NULL ? "NULL" : arp_validate); 4557 return -EINVAL; 4558 } 4559 } else 4560 arp_validate_value = 0; 4561 4562 if (miimon) { 4563 printk(KERN_INFO DRV_NAME 4564 ": MII link monitoring set to %d ms\n", 4565 miimon); 4566 } else if (arp_interval) { 4567 int i; 4568 4569 printk(KERN_INFO DRV_NAME 4570 ": ARP monitoring set to %d ms, validate %s, with %d target(s):", 4571 arp_interval, 4572 arp_validate_tbl[arp_validate_value].modename, 4573 arp_ip_count); 4574 4575 for (i = 0; i < arp_ip_count; i++) 4576 printk (" %s", arp_ip_target[i]); 4577 4578 printk("\n"); 4579 4580 } else { 4581 /* miimon and arp_interval not set, we need one so things 4582 * work as expected, see bonding.txt for details 4583 */ 4584 printk(KERN_WARNING DRV_NAME 4585 ": Warning: either miimon or arp_interval and " 4586 "arp_ip_target module parameters must be specified, " 4587 "otherwise bonding will not detect link failures! see " 4588 "bonding.txt for details.\n"); 4589 } 4590 4591 if (primary && !USES_PRIMARY(bond_mode)) { 4592 /* currently, using a primary only makes sense 4593 * in active backup, TLB or ALB modes 4594 */ 4595 printk(KERN_WARNING DRV_NAME 4596 ": Warning: %s primary device specified but has no " 4597 "effect in %s mode\n", 4598 primary, bond_mode_name(bond_mode)); 4599 primary = NULL; 4600 } 4601 4602 /* fill params struct with the proper values */ 4603 params->mode = bond_mode; 4604 params->xmit_policy = xmit_hashtype; 4605 params->miimon = miimon; 4606 params->arp_interval = arp_interval; 4607 params->arp_validate = arp_validate_value; 4608 params->updelay = updelay; 4609 params->downdelay = downdelay; 4610 params->use_carrier = use_carrier; 4611 params->lacp_fast = lacp_fast; 4612 params->primary[0] = 0; 4613 4614 if (primary) { 4615 strncpy(params->primary, primary, IFNAMSIZ); 4616 params->primary[IFNAMSIZ - 1] = 0; 4617 } 4618 4619 memcpy(params->arp_targets, arp_target, sizeof(arp_target)); 4620 4621 return 0; 4622 } 4623 4624 static struct lock_class_key bonding_netdev_xmit_lock_key; 4625 4626 /* Create a new bond based on the specified name and bonding parameters. 4627 * If name is NULL, obtain a suitable "bond%d" name for us. 4628 * Caller must NOT hold rtnl_lock; we need to release it here before we 4629 * set up our sysfs entries. 4630 */ 4631 int bond_create(char *name, struct bond_params *params, struct bonding **newbond) 4632 { 4633 struct net_device *bond_dev; 4634 int res; 4635 4636 rtnl_lock(); 4637 bond_dev = alloc_netdev(sizeof(struct bonding), name ? name : "", 4638 ether_setup); 4639 if (!bond_dev) { 4640 printk(KERN_ERR DRV_NAME 4641 ": %s: eek! can't alloc netdev!\n", 4642 name); 4643 res = -ENOMEM; 4644 goto out_rtnl; 4645 } 4646 4647 if (!name) { 4648 res = dev_alloc_name(bond_dev, "bond%d"); 4649 if (res < 0) 4650 goto out_netdev; 4651 } 4652 4653 /* bond_init() must be called after dev_alloc_name() (for the 4654 * /proc files), but before register_netdevice(), because we 4655 * need to set function pointers. 4656 */ 4657 4658 res = bond_init(bond_dev, params); 4659 if (res < 0) { 4660 goto out_netdev; 4661 } 4662 4663 res = register_netdevice(bond_dev); 4664 if (res < 0) { 4665 goto out_bond; 4666 } 4667 4668 lockdep_set_class(&bond_dev->_xmit_lock, &bonding_netdev_xmit_lock_key); 4669 4670 if (newbond) 4671 *newbond = bond_dev->priv; 4672 4673 netif_carrier_off(bond_dev); 4674 4675 rtnl_unlock(); /* allows sysfs registration of net device */ 4676 res = bond_create_sysfs_entry(bond_dev->priv); 4677 if (res < 0) { 4678 rtnl_lock(); 4679 goto out_bond; 4680 } 4681 4682 return 0; 4683 4684 out_bond: 4685 bond_deinit(bond_dev); 4686 out_netdev: 4687 free_netdev(bond_dev); 4688 out_rtnl: 4689 rtnl_unlock(); 4690 return res; 4691 } 4692 4693 static int __init bonding_init(void) 4694 { 4695 int i; 4696 int res; 4697 4698 printk(KERN_INFO "%s", version); 4699 4700 res = bond_check_params(&bonding_defaults); 4701 if (res) { 4702 goto out; 4703 } 4704 4705 #ifdef CONFIG_PROC_FS 4706 bond_create_proc_dir(); 4707 #endif 4708 for (i = 0; i < max_bonds; i++) { 4709 res = bond_create(NULL, &bonding_defaults, NULL); 4710 if (res) 4711 goto err; 4712 } 4713 4714 res = bond_create_sysfs(); 4715 if (res) 4716 goto err; 4717 4718 register_netdevice_notifier(&bond_netdev_notifier); 4719 register_inetaddr_notifier(&bond_inetaddr_notifier); 4720 4721 goto out; 4722 err: 4723 rtnl_lock(); 4724 bond_free_all(); 4725 bond_destroy_sysfs(); 4726 rtnl_unlock(); 4727 out: 4728 return res; 4729 4730 } 4731 4732 static void __exit bonding_exit(void) 4733 { 4734 unregister_netdevice_notifier(&bond_netdev_notifier); 4735 unregister_inetaddr_notifier(&bond_inetaddr_notifier); 4736 4737 rtnl_lock(); 4738 bond_free_all(); 4739 bond_destroy_sysfs(); 4740 rtnl_unlock(); 4741 } 4742 4743 module_init(bonding_init); 4744 module_exit(bonding_exit); 4745 MODULE_LICENSE("GPL"); 4746 MODULE_VERSION(DRV_VERSION); 4747 MODULE_DESCRIPTION(DRV_DESCRIPTION ", v" DRV_VERSION); 4748 MODULE_AUTHOR("Thomas Davis, tadavis@lbl.gov and many others"); 4749 MODULE_SUPPORTED_DEVICE("most ethernet devices"); 4750 4751 /* 4752 * Local variables: 4753 * c-indent-level: 8 4754 * c-basic-offset: 8 4755 * tab-width: 8 4756 * End: 4757 */ 4758 4759