1 /* 2 * originally based on the dummy device. 3 * 4 * Copyright 1999, Thomas Davis, tadavis@lbl.gov. 5 * Licensed under the GPL. Based on dummy.c, and eql.c devices. 6 * 7 * bonding.c: an Ethernet Bonding driver 8 * 9 * This is useful to talk to a Cisco EtherChannel compatible equipment: 10 * Cisco 5500 11 * Sun Trunking (Solaris) 12 * Alteon AceDirector Trunks 13 * Linux Bonding 14 * and probably many L2 switches ... 15 * 16 * How it works: 17 * ifconfig bond0 ipaddress netmask up 18 * will setup a network device, with an ip address. No mac address 19 * will be assigned at this time. The hw mac address will come from 20 * the first slave bonded to the channel. All slaves will then use 21 * this hw mac address. 22 * 23 * ifconfig bond0 down 24 * will release all slaves, marking them as down. 25 * 26 * ifenslave bond0 eth0 27 * will attach eth0 to bond0 as a slave. eth0 hw mac address will either 28 * a: be used as initial mac address 29 * b: if a hw mac address already is there, eth0's hw mac address 30 * will then be set from bond0. 31 * 32 */ 33 34 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt 35 36 #include <linux/kernel.h> 37 #include <linux/module.h> 38 #include <linux/types.h> 39 #include <linux/fcntl.h> 40 #include <linux/interrupt.h> 41 #include <linux/ptrace.h> 42 #include <linux/ioport.h> 43 #include <linux/in.h> 44 #include <net/ip.h> 45 #include <linux/ip.h> 46 #include <linux/tcp.h> 47 #include <linux/udp.h> 48 #include <linux/slab.h> 49 #include <linux/string.h> 50 #include <linux/init.h> 51 #include <linux/timer.h> 52 #include <linux/socket.h> 53 #include <linux/ctype.h> 54 #include <linux/inet.h> 55 #include <linux/bitops.h> 56 #include <linux/io.h> 57 #include <asm/system.h> 58 #include <asm/dma.h> 59 #include <linux/uaccess.h> 60 #include <linux/errno.h> 61 #include <linux/netdevice.h> 62 #include <linux/inetdevice.h> 63 #include <linux/igmp.h> 64 #include <linux/etherdevice.h> 65 #include <linux/skbuff.h> 66 #include <net/sock.h> 67 #include <linux/rtnetlink.h> 68 #include <linux/proc_fs.h> 69 #include <linux/seq_file.h> 70 #include <linux/smp.h> 71 #include <linux/if_ether.h> 72 #include <net/arp.h> 73 #include <linux/mii.h> 74 #include <linux/ethtool.h> 75 #include <linux/if_vlan.h> 76 #include <linux/if_bonding.h> 77 #include <linux/jiffies.h> 78 #include <net/route.h> 79 #include <net/net_namespace.h> 80 #include <net/netns/generic.h> 81 #include "bonding.h" 82 #include "bond_3ad.h" 83 #include "bond_alb.h" 84 85 /*---------------------------- Module parameters ----------------------------*/ 86 87 /* monitor all links that often (in milliseconds). <=0 disables monitoring */ 88 #define BOND_LINK_MON_INTERV 0 89 #define BOND_LINK_ARP_INTERV 0 90 91 static int max_bonds = BOND_DEFAULT_MAX_BONDS; 92 static int num_grat_arp = 1; 93 static int num_unsol_na = 1; 94 static int miimon = BOND_LINK_MON_INTERV; 95 static int updelay; 96 static int downdelay; 97 static int use_carrier = 1; 98 static char *mode; 99 static char *primary; 100 static char *primary_reselect; 101 static char *lacp_rate; 102 static char *ad_select; 103 static char *xmit_hash_policy; 104 static int arp_interval = BOND_LINK_ARP_INTERV; 105 static char *arp_ip_target[BOND_MAX_ARP_TARGETS]; 106 static char *arp_validate; 107 static char *fail_over_mac; 108 static struct bond_params bonding_defaults; 109 110 module_param(max_bonds, int, 0); 111 MODULE_PARM_DESC(max_bonds, "Max number of bonded devices"); 112 module_param(num_grat_arp, int, 0644); 113 MODULE_PARM_DESC(num_grat_arp, "Number of gratuitous ARP packets to send on failover event"); 114 module_param(num_unsol_na, int, 0644); 115 MODULE_PARM_DESC(num_unsol_na, "Number of unsolicited IPv6 Neighbor Advertisements packets to send on failover event"); 116 module_param(miimon, int, 0); 117 MODULE_PARM_DESC(miimon, "Link check interval in milliseconds"); 118 module_param(updelay, int, 0); 119 MODULE_PARM_DESC(updelay, "Delay before considering link up, in milliseconds"); 120 module_param(downdelay, int, 0); 121 MODULE_PARM_DESC(downdelay, "Delay before considering link down, " 122 "in milliseconds"); 123 module_param(use_carrier, int, 0); 124 MODULE_PARM_DESC(use_carrier, "Use netif_carrier_ok (vs MII ioctls) in miimon; " 125 "0 for off, 1 for on (default)"); 126 module_param(mode, charp, 0); 127 MODULE_PARM_DESC(mode, "Mode of operation : 0 for balance-rr, " 128 "1 for active-backup, 2 for balance-xor, " 129 "3 for broadcast, 4 for 802.3ad, 5 for balance-tlb, " 130 "6 for balance-alb"); 131 module_param(primary, charp, 0); 132 MODULE_PARM_DESC(primary, "Primary network device to use"); 133 module_param(primary_reselect, charp, 0); 134 MODULE_PARM_DESC(primary_reselect, "Reselect primary slave " 135 "once it comes up; " 136 "0 for always (default), " 137 "1 for only if speed of primary is " 138 "better, " 139 "2 for only on active slave " 140 "failure"); 141 module_param(lacp_rate, charp, 0); 142 MODULE_PARM_DESC(lacp_rate, "LACPDU tx rate to request from 802.3ad partner " 143 "(slow/fast)"); 144 module_param(ad_select, charp, 0); 145 MODULE_PARM_DESC(ad_select, "803.ad aggregation selection logic: stable (0, default), bandwidth (1), count (2)"); 146 module_param(xmit_hash_policy, charp, 0); 147 MODULE_PARM_DESC(xmit_hash_policy, "XOR hashing method: 0 for layer 2 (default)" 148 ", 1 for layer 3+4"); 149 module_param(arp_interval, int, 0); 150 MODULE_PARM_DESC(arp_interval, "arp interval in milliseconds"); 151 module_param_array(arp_ip_target, charp, NULL, 0); 152 MODULE_PARM_DESC(arp_ip_target, "arp targets in n.n.n.n form"); 153 module_param(arp_validate, charp, 0); 154 MODULE_PARM_DESC(arp_validate, "validate src/dst of ARP probes: none (default), active, backup or all"); 155 module_param(fail_over_mac, charp, 0); 156 MODULE_PARM_DESC(fail_over_mac, "For active-backup, do not set all slaves to the same MAC. none (default), active or follow"); 157 158 /*----------------------------- Global variables ----------------------------*/ 159 160 static const char * const version = 161 DRV_DESCRIPTION ": v" DRV_VERSION " (" DRV_RELDATE ")\n"; 162 163 int bond_net_id __read_mostly; 164 165 static __be32 arp_target[BOND_MAX_ARP_TARGETS]; 166 static int arp_ip_count; 167 static int bond_mode = BOND_MODE_ROUNDROBIN; 168 static int xmit_hashtype = BOND_XMIT_POLICY_LAYER2; 169 static int lacp_fast; 170 171 172 const struct bond_parm_tbl bond_lacp_tbl[] = { 173 { "slow", AD_LACP_SLOW}, 174 { "fast", AD_LACP_FAST}, 175 { NULL, -1}, 176 }; 177 178 const struct bond_parm_tbl bond_mode_tbl[] = { 179 { "balance-rr", BOND_MODE_ROUNDROBIN}, 180 { "active-backup", BOND_MODE_ACTIVEBACKUP}, 181 { "balance-xor", BOND_MODE_XOR}, 182 { "broadcast", BOND_MODE_BROADCAST}, 183 { "802.3ad", BOND_MODE_8023AD}, 184 { "balance-tlb", BOND_MODE_TLB}, 185 { "balance-alb", BOND_MODE_ALB}, 186 { NULL, -1}, 187 }; 188 189 const struct bond_parm_tbl xmit_hashtype_tbl[] = { 190 { "layer2", BOND_XMIT_POLICY_LAYER2}, 191 { "layer3+4", BOND_XMIT_POLICY_LAYER34}, 192 { "layer2+3", BOND_XMIT_POLICY_LAYER23}, 193 { NULL, -1}, 194 }; 195 196 const struct bond_parm_tbl arp_validate_tbl[] = { 197 { "none", BOND_ARP_VALIDATE_NONE}, 198 { "active", BOND_ARP_VALIDATE_ACTIVE}, 199 { "backup", BOND_ARP_VALIDATE_BACKUP}, 200 { "all", BOND_ARP_VALIDATE_ALL}, 201 { NULL, -1}, 202 }; 203 204 const struct bond_parm_tbl fail_over_mac_tbl[] = { 205 { "none", BOND_FOM_NONE}, 206 { "active", BOND_FOM_ACTIVE}, 207 { "follow", BOND_FOM_FOLLOW}, 208 { NULL, -1}, 209 }; 210 211 const struct bond_parm_tbl pri_reselect_tbl[] = { 212 { "always", BOND_PRI_RESELECT_ALWAYS}, 213 { "better", BOND_PRI_RESELECT_BETTER}, 214 { "failure", BOND_PRI_RESELECT_FAILURE}, 215 { NULL, -1}, 216 }; 217 218 struct bond_parm_tbl ad_select_tbl[] = { 219 { "stable", BOND_AD_STABLE}, 220 { "bandwidth", BOND_AD_BANDWIDTH}, 221 { "count", BOND_AD_COUNT}, 222 { NULL, -1}, 223 }; 224 225 /*-------------------------- Forward declarations ---------------------------*/ 226 227 static void bond_send_gratuitous_arp(struct bonding *bond); 228 static int bond_init(struct net_device *bond_dev); 229 static void bond_uninit(struct net_device *bond_dev); 230 231 /*---------------------------- General routines -----------------------------*/ 232 233 static const char *bond_mode_name(int mode) 234 { 235 static const char *names[] = { 236 [BOND_MODE_ROUNDROBIN] = "load balancing (round-robin)", 237 [BOND_MODE_ACTIVEBACKUP] = "fault-tolerance (active-backup)", 238 [BOND_MODE_XOR] = "load balancing (xor)", 239 [BOND_MODE_BROADCAST] = "fault-tolerance (broadcast)", 240 [BOND_MODE_8023AD] = "IEEE 802.3ad Dynamic link aggregation", 241 [BOND_MODE_TLB] = "transmit load balancing", 242 [BOND_MODE_ALB] = "adaptive load balancing", 243 }; 244 245 if (mode < 0 || mode > BOND_MODE_ALB) 246 return "unknown"; 247 248 return names[mode]; 249 } 250 251 /*---------------------------------- VLAN -----------------------------------*/ 252 253 /** 254 * bond_add_vlan - add a new vlan id on bond 255 * @bond: bond that got the notification 256 * @vlan_id: the vlan id to add 257 * 258 * Returns -ENOMEM if allocation failed. 259 */ 260 static int bond_add_vlan(struct bonding *bond, unsigned short vlan_id) 261 { 262 struct vlan_entry *vlan; 263 264 pr_debug("bond: %s, vlan id %d\n", 265 (bond ? bond->dev->name : "None"), vlan_id); 266 267 vlan = kzalloc(sizeof(struct vlan_entry), GFP_KERNEL); 268 if (!vlan) 269 return -ENOMEM; 270 271 INIT_LIST_HEAD(&vlan->vlan_list); 272 vlan->vlan_id = vlan_id; 273 274 write_lock_bh(&bond->lock); 275 276 list_add_tail(&vlan->vlan_list, &bond->vlan_list); 277 278 write_unlock_bh(&bond->lock); 279 280 pr_debug("added VLAN ID %d on bond %s\n", vlan_id, bond->dev->name); 281 282 return 0; 283 } 284 285 /** 286 * bond_del_vlan - delete a vlan id from bond 287 * @bond: bond that got the notification 288 * @vlan_id: the vlan id to delete 289 * 290 * returns -ENODEV if @vlan_id was not found in @bond. 291 */ 292 static int bond_del_vlan(struct bonding *bond, unsigned short vlan_id) 293 { 294 struct vlan_entry *vlan; 295 int res = -ENODEV; 296 297 pr_debug("bond: %s, vlan id %d\n", bond->dev->name, vlan_id); 298 299 write_lock_bh(&bond->lock); 300 301 list_for_each_entry(vlan, &bond->vlan_list, vlan_list) { 302 if (vlan->vlan_id == vlan_id) { 303 list_del(&vlan->vlan_list); 304 305 if (bond_is_lb(bond)) 306 bond_alb_clear_vlan(bond, vlan_id); 307 308 pr_debug("removed VLAN ID %d from bond %s\n", 309 vlan_id, bond->dev->name); 310 311 kfree(vlan); 312 313 if (list_empty(&bond->vlan_list) && 314 (bond->slave_cnt == 0)) { 315 /* Last VLAN removed and no slaves, so 316 * restore block on adding VLANs. This will 317 * be removed once new slaves that are not 318 * VLAN challenged will be added. 319 */ 320 bond->dev->features |= NETIF_F_VLAN_CHALLENGED; 321 } 322 323 res = 0; 324 goto out; 325 } 326 } 327 328 pr_debug("couldn't find VLAN ID %d in bond %s\n", 329 vlan_id, bond->dev->name); 330 331 out: 332 write_unlock_bh(&bond->lock); 333 return res; 334 } 335 336 /** 337 * bond_has_challenged_slaves 338 * @bond: the bond we're working on 339 * 340 * Searches the slave list. Returns 1 if a vlan challenged slave 341 * was found, 0 otherwise. 342 * 343 * Assumes bond->lock is held. 344 */ 345 static int bond_has_challenged_slaves(struct bonding *bond) 346 { 347 struct slave *slave; 348 int i; 349 350 bond_for_each_slave(bond, slave, i) { 351 if (slave->dev->features & NETIF_F_VLAN_CHALLENGED) { 352 pr_debug("found VLAN challenged slave - %s\n", 353 slave->dev->name); 354 return 1; 355 } 356 } 357 358 pr_debug("no VLAN challenged slaves found\n"); 359 return 0; 360 } 361 362 /** 363 * bond_next_vlan - safely skip to the next item in the vlans list. 364 * @bond: the bond we're working on 365 * @curr: item we're advancing from 366 * 367 * Returns %NULL if list is empty, bond->next_vlan if @curr is %NULL, 368 * or @curr->next otherwise (even if it is @curr itself again). 369 * 370 * Caller must hold bond->lock 371 */ 372 struct vlan_entry *bond_next_vlan(struct bonding *bond, struct vlan_entry *curr) 373 { 374 struct vlan_entry *next, *last; 375 376 if (list_empty(&bond->vlan_list)) 377 return NULL; 378 379 if (!curr) { 380 next = list_entry(bond->vlan_list.next, 381 struct vlan_entry, vlan_list); 382 } else { 383 last = list_entry(bond->vlan_list.prev, 384 struct vlan_entry, vlan_list); 385 if (last == curr) { 386 next = list_entry(bond->vlan_list.next, 387 struct vlan_entry, vlan_list); 388 } else { 389 next = list_entry(curr->vlan_list.next, 390 struct vlan_entry, vlan_list); 391 } 392 } 393 394 return next; 395 } 396 397 /** 398 * bond_dev_queue_xmit - Prepare skb for xmit. 399 * 400 * @bond: bond device that got this skb for tx. 401 * @skb: hw accel VLAN tagged skb to transmit 402 * @slave_dev: slave that is supposed to xmit this skbuff 403 * 404 * When the bond gets an skb to transmit that is 405 * already hardware accelerated VLAN tagged, and it 406 * needs to relay this skb to a slave that is not 407 * hw accel capable, the skb needs to be "unaccelerated", 408 * i.e. strip the hwaccel tag and re-insert it as part 409 * of the payload. 410 */ 411 int bond_dev_queue_xmit(struct bonding *bond, struct sk_buff *skb, 412 struct net_device *slave_dev) 413 { 414 unsigned short uninitialized_var(vlan_id); 415 416 if (!list_empty(&bond->vlan_list) && 417 !(slave_dev->features & NETIF_F_HW_VLAN_TX) && 418 vlan_get_tag(skb, &vlan_id) == 0) { 419 skb->dev = slave_dev; 420 skb = vlan_put_tag(skb, vlan_id); 421 if (!skb) { 422 /* vlan_put_tag() frees the skb in case of error, 423 * so return success here so the calling functions 424 * won't attempt to free is again. 425 */ 426 return 0; 427 } 428 } else { 429 skb->dev = slave_dev; 430 } 431 432 skb->priority = 1; 433 dev_queue_xmit(skb); 434 435 return 0; 436 } 437 438 /* 439 * In the following 3 functions, bond_vlan_rx_register(), bond_vlan_rx_add_vid 440 * and bond_vlan_rx_kill_vid, We don't protect the slave list iteration with a 441 * lock because: 442 * a. This operation is performed in IOCTL context, 443 * b. The operation is protected by the RTNL semaphore in the 8021q code, 444 * c. Holding a lock with BH disabled while directly calling a base driver 445 * entry point is generally a BAD idea. 446 * 447 * The design of synchronization/protection for this operation in the 8021q 448 * module is good for one or more VLAN devices over a single physical device 449 * and cannot be extended for a teaming solution like bonding, so there is a 450 * potential race condition here where a net device from the vlan group might 451 * be referenced (either by a base driver or the 8021q code) while it is being 452 * removed from the system. However, it turns out we're not making matters 453 * worse, and if it works for regular VLAN usage it will work here too. 454 */ 455 456 /** 457 * bond_vlan_rx_register - Propagates registration to slaves 458 * @bond_dev: bonding net device that got called 459 * @grp: vlan group being registered 460 */ 461 static void bond_vlan_rx_register(struct net_device *bond_dev, 462 struct vlan_group *grp) 463 { 464 struct bonding *bond = netdev_priv(bond_dev); 465 struct slave *slave; 466 int i; 467 468 bond->vlgrp = grp; 469 470 bond_for_each_slave(bond, slave, i) { 471 struct net_device *slave_dev = slave->dev; 472 const struct net_device_ops *slave_ops = slave_dev->netdev_ops; 473 474 if ((slave_dev->features & NETIF_F_HW_VLAN_RX) && 475 slave_ops->ndo_vlan_rx_register) { 476 slave_ops->ndo_vlan_rx_register(slave_dev, grp); 477 } 478 } 479 } 480 481 /** 482 * bond_vlan_rx_add_vid - Propagates adding an id to slaves 483 * @bond_dev: bonding net device that got called 484 * @vid: vlan id being added 485 */ 486 static void bond_vlan_rx_add_vid(struct net_device *bond_dev, uint16_t vid) 487 { 488 struct bonding *bond = netdev_priv(bond_dev); 489 struct slave *slave; 490 int i, res; 491 492 bond_for_each_slave(bond, slave, i) { 493 struct net_device *slave_dev = slave->dev; 494 const struct net_device_ops *slave_ops = slave_dev->netdev_ops; 495 496 if ((slave_dev->features & NETIF_F_HW_VLAN_FILTER) && 497 slave_ops->ndo_vlan_rx_add_vid) { 498 slave_ops->ndo_vlan_rx_add_vid(slave_dev, vid); 499 } 500 } 501 502 res = bond_add_vlan(bond, vid); 503 if (res) { 504 pr_err("%s: Error: Failed to add vlan id %d\n", 505 bond_dev->name, vid); 506 } 507 } 508 509 /** 510 * bond_vlan_rx_kill_vid - Propagates deleting an id to slaves 511 * @bond_dev: bonding net device that got called 512 * @vid: vlan id being removed 513 */ 514 static void bond_vlan_rx_kill_vid(struct net_device *bond_dev, uint16_t vid) 515 { 516 struct bonding *bond = netdev_priv(bond_dev); 517 struct slave *slave; 518 struct net_device *vlan_dev; 519 int i, res; 520 521 bond_for_each_slave(bond, slave, i) { 522 struct net_device *slave_dev = slave->dev; 523 const struct net_device_ops *slave_ops = slave_dev->netdev_ops; 524 525 if ((slave_dev->features & NETIF_F_HW_VLAN_FILTER) && 526 slave_ops->ndo_vlan_rx_kill_vid) { 527 /* Save and then restore vlan_dev in the grp array, 528 * since the slave's driver might clear it. 529 */ 530 vlan_dev = vlan_group_get_device(bond->vlgrp, vid); 531 slave_ops->ndo_vlan_rx_kill_vid(slave_dev, vid); 532 vlan_group_set_device(bond->vlgrp, vid, vlan_dev); 533 } 534 } 535 536 res = bond_del_vlan(bond, vid); 537 if (res) { 538 pr_err("%s: Error: Failed to remove vlan id %d\n", 539 bond_dev->name, vid); 540 } 541 } 542 543 static void bond_add_vlans_on_slave(struct bonding *bond, struct net_device *slave_dev) 544 { 545 struct vlan_entry *vlan; 546 const struct net_device_ops *slave_ops = slave_dev->netdev_ops; 547 548 write_lock_bh(&bond->lock); 549 550 if (list_empty(&bond->vlan_list)) 551 goto out; 552 553 if ((slave_dev->features & NETIF_F_HW_VLAN_RX) && 554 slave_ops->ndo_vlan_rx_register) 555 slave_ops->ndo_vlan_rx_register(slave_dev, bond->vlgrp); 556 557 if (!(slave_dev->features & NETIF_F_HW_VLAN_FILTER) || 558 !(slave_ops->ndo_vlan_rx_add_vid)) 559 goto out; 560 561 list_for_each_entry(vlan, &bond->vlan_list, vlan_list) 562 slave_ops->ndo_vlan_rx_add_vid(slave_dev, vlan->vlan_id); 563 564 out: 565 write_unlock_bh(&bond->lock); 566 } 567 568 static void bond_del_vlans_from_slave(struct bonding *bond, 569 struct net_device *slave_dev) 570 { 571 const struct net_device_ops *slave_ops = slave_dev->netdev_ops; 572 struct vlan_entry *vlan; 573 struct net_device *vlan_dev; 574 575 write_lock_bh(&bond->lock); 576 577 if (list_empty(&bond->vlan_list)) 578 goto out; 579 580 if (!(slave_dev->features & NETIF_F_HW_VLAN_FILTER) || 581 !(slave_ops->ndo_vlan_rx_kill_vid)) 582 goto unreg; 583 584 list_for_each_entry(vlan, &bond->vlan_list, vlan_list) { 585 /* Save and then restore vlan_dev in the grp array, 586 * since the slave's driver might clear it. 587 */ 588 vlan_dev = vlan_group_get_device(bond->vlgrp, vlan->vlan_id); 589 slave_ops->ndo_vlan_rx_kill_vid(slave_dev, vlan->vlan_id); 590 vlan_group_set_device(bond->vlgrp, vlan->vlan_id, vlan_dev); 591 } 592 593 unreg: 594 if ((slave_dev->features & NETIF_F_HW_VLAN_RX) && 595 slave_ops->ndo_vlan_rx_register) 596 slave_ops->ndo_vlan_rx_register(slave_dev, NULL); 597 598 out: 599 write_unlock_bh(&bond->lock); 600 } 601 602 /*------------------------------- Link status -------------------------------*/ 603 604 /* 605 * Set the carrier state for the master according to the state of its 606 * slaves. If any slaves are up, the master is up. In 802.3ad mode, 607 * do special 802.3ad magic. 608 * 609 * Returns zero if carrier state does not change, nonzero if it does. 610 */ 611 static int bond_set_carrier(struct bonding *bond) 612 { 613 struct slave *slave; 614 int i; 615 616 if (bond->slave_cnt == 0) 617 goto down; 618 619 if (bond->params.mode == BOND_MODE_8023AD) 620 return bond_3ad_set_carrier(bond); 621 622 bond_for_each_slave(bond, slave, i) { 623 if (slave->link == BOND_LINK_UP) { 624 if (!netif_carrier_ok(bond->dev)) { 625 netif_carrier_on(bond->dev); 626 return 1; 627 } 628 return 0; 629 } 630 } 631 632 down: 633 if (netif_carrier_ok(bond->dev)) { 634 netif_carrier_off(bond->dev); 635 return 1; 636 } 637 return 0; 638 } 639 640 /* 641 * Get link speed and duplex from the slave's base driver 642 * using ethtool. If for some reason the call fails or the 643 * values are invalid, fake speed and duplex to 100/Full 644 * and return error. 645 */ 646 static int bond_update_speed_duplex(struct slave *slave) 647 { 648 struct net_device *slave_dev = slave->dev; 649 struct ethtool_cmd etool; 650 int res; 651 652 /* Fake speed and duplex */ 653 slave->speed = SPEED_100; 654 slave->duplex = DUPLEX_FULL; 655 656 if (!slave_dev->ethtool_ops || !slave_dev->ethtool_ops->get_settings) 657 return -1; 658 659 res = slave_dev->ethtool_ops->get_settings(slave_dev, &etool); 660 if (res < 0) 661 return -1; 662 663 switch (etool.speed) { 664 case SPEED_10: 665 case SPEED_100: 666 case SPEED_1000: 667 case SPEED_10000: 668 break; 669 default: 670 return -1; 671 } 672 673 switch (etool.duplex) { 674 case DUPLEX_FULL: 675 case DUPLEX_HALF: 676 break; 677 default: 678 return -1; 679 } 680 681 slave->speed = etool.speed; 682 slave->duplex = etool.duplex; 683 684 return 0; 685 } 686 687 /* 688 * if <dev> supports MII link status reporting, check its link status. 689 * 690 * We either do MII/ETHTOOL ioctls, or check netif_carrier_ok(), 691 * depending upon the setting of the use_carrier parameter. 692 * 693 * Return either BMSR_LSTATUS, meaning that the link is up (or we 694 * can't tell and just pretend it is), or 0, meaning that the link is 695 * down. 696 * 697 * If reporting is non-zero, instead of faking link up, return -1 if 698 * both ETHTOOL and MII ioctls fail (meaning the device does not 699 * support them). If use_carrier is set, return whatever it says. 700 * It'd be nice if there was a good way to tell if a driver supports 701 * netif_carrier, but there really isn't. 702 */ 703 static int bond_check_dev_link(struct bonding *bond, 704 struct net_device *slave_dev, int reporting) 705 { 706 const struct net_device_ops *slave_ops = slave_dev->netdev_ops; 707 int (*ioctl)(struct net_device *, struct ifreq *, int); 708 struct ifreq ifr; 709 struct mii_ioctl_data *mii; 710 711 if (!reporting && !netif_running(slave_dev)) 712 return 0; 713 714 if (bond->params.use_carrier) 715 return netif_carrier_ok(slave_dev) ? BMSR_LSTATUS : 0; 716 717 /* Try to get link status using Ethtool first. */ 718 if (slave_dev->ethtool_ops) { 719 if (slave_dev->ethtool_ops->get_link) { 720 u32 link; 721 722 link = slave_dev->ethtool_ops->get_link(slave_dev); 723 724 return link ? BMSR_LSTATUS : 0; 725 } 726 } 727 728 /* Ethtool can't be used, fallback to MII ioctls. */ 729 ioctl = slave_ops->ndo_do_ioctl; 730 if (ioctl) { 731 /* TODO: set pointer to correct ioctl on a per team member */ 732 /* bases to make this more efficient. that is, once */ 733 /* we determine the correct ioctl, we will always */ 734 /* call it and not the others for that team */ 735 /* member. */ 736 737 /* 738 * We cannot assume that SIOCGMIIPHY will also read a 739 * register; not all network drivers (e.g., e100) 740 * support that. 741 */ 742 743 /* Yes, the mii is overlaid on the ifreq.ifr_ifru */ 744 strncpy(ifr.ifr_name, slave_dev->name, IFNAMSIZ); 745 mii = if_mii(&ifr); 746 if (IOCTL(slave_dev, &ifr, SIOCGMIIPHY) == 0) { 747 mii->reg_num = MII_BMSR; 748 if (IOCTL(slave_dev, &ifr, SIOCGMIIREG) == 0) 749 return mii->val_out & BMSR_LSTATUS; 750 } 751 } 752 753 /* 754 * If reporting, report that either there's no dev->do_ioctl, 755 * or both SIOCGMIIREG and get_link failed (meaning that we 756 * cannot report link status). If not reporting, pretend 757 * we're ok. 758 */ 759 return reporting ? -1 : BMSR_LSTATUS; 760 } 761 762 /*----------------------------- Multicast list ------------------------------*/ 763 764 /* 765 * Returns 0 if dmi1 and dmi2 are the same, non-0 otherwise 766 */ 767 static inline int bond_is_dmi_same(const struct dev_mc_list *dmi1, 768 const struct dev_mc_list *dmi2) 769 { 770 return memcmp(dmi1->dmi_addr, dmi2->dmi_addr, dmi1->dmi_addrlen) == 0 && 771 dmi1->dmi_addrlen == dmi2->dmi_addrlen; 772 } 773 774 /* 775 * returns dmi entry if found, NULL otherwise 776 */ 777 static struct dev_mc_list *bond_mc_list_find_dmi(struct dev_mc_list *dmi, 778 struct dev_mc_list *mc_list) 779 { 780 struct dev_mc_list *idmi; 781 782 for (idmi = mc_list; idmi; idmi = idmi->next) { 783 if (bond_is_dmi_same(dmi, idmi)) 784 return idmi; 785 } 786 787 return NULL; 788 } 789 790 /* 791 * Push the promiscuity flag down to appropriate slaves 792 */ 793 static int bond_set_promiscuity(struct bonding *bond, int inc) 794 { 795 int err = 0; 796 if (USES_PRIMARY(bond->params.mode)) { 797 /* write lock already acquired */ 798 if (bond->curr_active_slave) { 799 err = dev_set_promiscuity(bond->curr_active_slave->dev, 800 inc); 801 } 802 } else { 803 struct slave *slave; 804 int i; 805 bond_for_each_slave(bond, slave, i) { 806 err = dev_set_promiscuity(slave->dev, inc); 807 if (err) 808 return err; 809 } 810 } 811 return err; 812 } 813 814 /* 815 * Push the allmulti flag down to all slaves 816 */ 817 static int bond_set_allmulti(struct bonding *bond, int inc) 818 { 819 int err = 0; 820 if (USES_PRIMARY(bond->params.mode)) { 821 /* write lock already acquired */ 822 if (bond->curr_active_slave) { 823 err = dev_set_allmulti(bond->curr_active_slave->dev, 824 inc); 825 } 826 } else { 827 struct slave *slave; 828 int i; 829 bond_for_each_slave(bond, slave, i) { 830 err = dev_set_allmulti(slave->dev, inc); 831 if (err) 832 return err; 833 } 834 } 835 return err; 836 } 837 838 /* 839 * Add a Multicast address to slaves 840 * according to mode 841 */ 842 static void bond_mc_add(struct bonding *bond, void *addr, int alen) 843 { 844 if (USES_PRIMARY(bond->params.mode)) { 845 /* write lock already acquired */ 846 if (bond->curr_active_slave) 847 dev_mc_add(bond->curr_active_slave->dev, addr, alen, 0); 848 } else { 849 struct slave *slave; 850 int i; 851 852 bond_for_each_slave(bond, slave, i) 853 dev_mc_add(slave->dev, addr, alen, 0); 854 } 855 } 856 857 /* 858 * Remove a multicast address from slave 859 * according to mode 860 */ 861 static void bond_mc_delete(struct bonding *bond, void *addr, int alen) 862 { 863 if (USES_PRIMARY(bond->params.mode)) { 864 /* write lock already acquired */ 865 if (bond->curr_active_slave) 866 dev_mc_delete(bond->curr_active_slave->dev, addr, 867 alen, 0); 868 } else { 869 struct slave *slave; 870 int i; 871 bond_for_each_slave(bond, slave, i) { 872 dev_mc_delete(slave->dev, addr, alen, 0); 873 } 874 } 875 } 876 877 878 /* 879 * Retrieve the list of registered multicast addresses for the bonding 880 * device and retransmit an IGMP JOIN request to the current active 881 * slave. 882 */ 883 static void bond_resend_igmp_join_requests(struct bonding *bond) 884 { 885 struct in_device *in_dev; 886 struct ip_mc_list *im; 887 888 rcu_read_lock(); 889 in_dev = __in_dev_get_rcu(bond->dev); 890 if (in_dev) { 891 for (im = in_dev->mc_list; im; im = im->next) 892 ip_mc_rejoin_group(im); 893 } 894 895 rcu_read_unlock(); 896 } 897 898 /* 899 * Totally destroys the mc_list in bond 900 */ 901 static void bond_mc_list_destroy(struct bonding *bond) 902 { 903 struct dev_mc_list *dmi; 904 905 dmi = bond->mc_list; 906 while (dmi) { 907 bond->mc_list = dmi->next; 908 kfree(dmi); 909 dmi = bond->mc_list; 910 } 911 912 bond->mc_list = NULL; 913 } 914 915 /* 916 * Copy all the Multicast addresses from src to the bonding device dst 917 */ 918 static int bond_mc_list_copy(struct dev_mc_list *mc_list, struct bonding *bond, 919 gfp_t gfp_flag) 920 { 921 struct dev_mc_list *dmi, *new_dmi; 922 923 for (dmi = mc_list; dmi; dmi = dmi->next) { 924 new_dmi = kmalloc(sizeof(struct dev_mc_list), gfp_flag); 925 926 if (!new_dmi) { 927 /* FIXME: Potential memory leak !!! */ 928 return -ENOMEM; 929 } 930 931 new_dmi->next = bond->mc_list; 932 bond->mc_list = new_dmi; 933 new_dmi->dmi_addrlen = dmi->dmi_addrlen; 934 memcpy(new_dmi->dmi_addr, dmi->dmi_addr, dmi->dmi_addrlen); 935 new_dmi->dmi_users = dmi->dmi_users; 936 new_dmi->dmi_gusers = dmi->dmi_gusers; 937 } 938 939 return 0; 940 } 941 942 /* 943 * flush all members of flush->mc_list from device dev->mc_list 944 */ 945 static void bond_mc_list_flush(struct net_device *bond_dev, 946 struct net_device *slave_dev) 947 { 948 struct bonding *bond = netdev_priv(bond_dev); 949 struct dev_mc_list *dmi; 950 951 for (dmi = bond_dev->mc_list; dmi; dmi = dmi->next) 952 dev_mc_delete(slave_dev, dmi->dmi_addr, dmi->dmi_addrlen, 0); 953 954 if (bond->params.mode == BOND_MODE_8023AD) { 955 /* del lacpdu mc addr from mc list */ 956 u8 lacpdu_multicast[ETH_ALEN] = MULTICAST_LACPDU_ADDR; 957 958 dev_mc_delete(slave_dev, lacpdu_multicast, ETH_ALEN, 0); 959 } 960 } 961 962 /*--------------------------- Active slave change ---------------------------*/ 963 964 /* 965 * Update the mc list and multicast-related flags for the new and 966 * old active slaves (if any) according to the multicast mode, and 967 * promiscuous flags unconditionally. 968 */ 969 static void bond_mc_swap(struct bonding *bond, struct slave *new_active, 970 struct slave *old_active) 971 { 972 struct dev_mc_list *dmi; 973 974 if (!USES_PRIMARY(bond->params.mode)) 975 /* nothing to do - mc list is already up-to-date on 976 * all slaves 977 */ 978 return; 979 980 if (old_active) { 981 if (bond->dev->flags & IFF_PROMISC) 982 dev_set_promiscuity(old_active->dev, -1); 983 984 if (bond->dev->flags & IFF_ALLMULTI) 985 dev_set_allmulti(old_active->dev, -1); 986 987 for (dmi = bond->dev->mc_list; dmi; dmi = dmi->next) 988 dev_mc_delete(old_active->dev, dmi->dmi_addr, 989 dmi->dmi_addrlen, 0); 990 } 991 992 if (new_active) { 993 /* FIXME: Signal errors upstream. */ 994 if (bond->dev->flags & IFF_PROMISC) 995 dev_set_promiscuity(new_active->dev, 1); 996 997 if (bond->dev->flags & IFF_ALLMULTI) 998 dev_set_allmulti(new_active->dev, 1); 999 1000 for (dmi = bond->dev->mc_list; dmi; dmi = dmi->next) 1001 dev_mc_add(new_active->dev, dmi->dmi_addr, 1002 dmi->dmi_addrlen, 0); 1003 bond_resend_igmp_join_requests(bond); 1004 } 1005 } 1006 1007 /* 1008 * bond_do_fail_over_mac 1009 * 1010 * Perform special MAC address swapping for fail_over_mac settings 1011 * 1012 * Called with RTNL, bond->lock for read, curr_slave_lock for write_bh. 1013 */ 1014 static void bond_do_fail_over_mac(struct bonding *bond, 1015 struct slave *new_active, 1016 struct slave *old_active) 1017 __releases(&bond->curr_slave_lock) 1018 __releases(&bond->lock) 1019 __acquires(&bond->lock) 1020 __acquires(&bond->curr_slave_lock) 1021 { 1022 u8 tmp_mac[ETH_ALEN]; 1023 struct sockaddr saddr; 1024 int rv; 1025 1026 switch (bond->params.fail_over_mac) { 1027 case BOND_FOM_ACTIVE: 1028 if (new_active) 1029 memcpy(bond->dev->dev_addr, new_active->dev->dev_addr, 1030 new_active->dev->addr_len); 1031 break; 1032 case BOND_FOM_FOLLOW: 1033 /* 1034 * if new_active && old_active, swap them 1035 * if just old_active, do nothing (going to no active slave) 1036 * if just new_active, set new_active to bond's MAC 1037 */ 1038 if (!new_active) 1039 return; 1040 1041 write_unlock_bh(&bond->curr_slave_lock); 1042 read_unlock(&bond->lock); 1043 1044 if (old_active) { 1045 memcpy(tmp_mac, new_active->dev->dev_addr, ETH_ALEN); 1046 memcpy(saddr.sa_data, old_active->dev->dev_addr, 1047 ETH_ALEN); 1048 saddr.sa_family = new_active->dev->type; 1049 } else { 1050 memcpy(saddr.sa_data, bond->dev->dev_addr, ETH_ALEN); 1051 saddr.sa_family = bond->dev->type; 1052 } 1053 1054 rv = dev_set_mac_address(new_active->dev, &saddr); 1055 if (rv) { 1056 pr_err("%s: Error %d setting MAC of slave %s\n", 1057 bond->dev->name, -rv, new_active->dev->name); 1058 goto out; 1059 } 1060 1061 if (!old_active) 1062 goto out; 1063 1064 memcpy(saddr.sa_data, tmp_mac, ETH_ALEN); 1065 saddr.sa_family = old_active->dev->type; 1066 1067 rv = dev_set_mac_address(old_active->dev, &saddr); 1068 if (rv) 1069 pr_err("%s: Error %d setting MAC of slave %s\n", 1070 bond->dev->name, -rv, new_active->dev->name); 1071 out: 1072 read_lock(&bond->lock); 1073 write_lock_bh(&bond->curr_slave_lock); 1074 break; 1075 default: 1076 pr_err("%s: bond_do_fail_over_mac impossible: bad policy %d\n", 1077 bond->dev->name, bond->params.fail_over_mac); 1078 break; 1079 } 1080 1081 } 1082 1083 static bool bond_should_change_active(struct bonding *bond) 1084 { 1085 struct slave *prim = bond->primary_slave; 1086 struct slave *curr = bond->curr_active_slave; 1087 1088 if (!prim || !curr || curr->link != BOND_LINK_UP) 1089 return true; 1090 if (bond->force_primary) { 1091 bond->force_primary = false; 1092 return true; 1093 } 1094 if (bond->params.primary_reselect == BOND_PRI_RESELECT_BETTER && 1095 (prim->speed < curr->speed || 1096 (prim->speed == curr->speed && prim->duplex <= curr->duplex))) 1097 return false; 1098 if (bond->params.primary_reselect == BOND_PRI_RESELECT_FAILURE) 1099 return false; 1100 return true; 1101 } 1102 1103 /** 1104 * find_best_interface - select the best available slave to be the active one 1105 * @bond: our bonding struct 1106 * 1107 * Warning: Caller must hold curr_slave_lock for writing. 1108 */ 1109 static struct slave *bond_find_best_slave(struct bonding *bond) 1110 { 1111 struct slave *new_active, *old_active; 1112 struct slave *bestslave = NULL; 1113 int mintime = bond->params.updelay; 1114 int i; 1115 1116 new_active = bond->curr_active_slave; 1117 1118 if (!new_active) { /* there were no active slaves left */ 1119 if (bond->slave_cnt > 0) /* found one slave */ 1120 new_active = bond->first_slave; 1121 else 1122 return NULL; /* still no slave, return NULL */ 1123 } 1124 1125 if ((bond->primary_slave) && 1126 bond->primary_slave->link == BOND_LINK_UP && 1127 bond_should_change_active(bond)) { 1128 new_active = bond->primary_slave; 1129 } 1130 1131 /* remember where to stop iterating over the slaves */ 1132 old_active = new_active; 1133 1134 bond_for_each_slave_from(bond, new_active, i, old_active) { 1135 if (new_active->link == BOND_LINK_UP) { 1136 return new_active; 1137 } else if (new_active->link == BOND_LINK_BACK && 1138 IS_UP(new_active->dev)) { 1139 /* link up, but waiting for stabilization */ 1140 if (new_active->delay < mintime) { 1141 mintime = new_active->delay; 1142 bestslave = new_active; 1143 } 1144 } 1145 } 1146 1147 return bestslave; 1148 } 1149 1150 /** 1151 * change_active_interface - change the active slave into the specified one 1152 * @bond: our bonding struct 1153 * @new: the new slave to make the active one 1154 * 1155 * Set the new slave to the bond's settings and unset them on the old 1156 * curr_active_slave. 1157 * Setting include flags, mc-list, promiscuity, allmulti, etc. 1158 * 1159 * If @new's link state is %BOND_LINK_BACK we'll set it to %BOND_LINK_UP, 1160 * because it is apparently the best available slave we have, even though its 1161 * updelay hasn't timed out yet. 1162 * 1163 * If new_active is not NULL, caller must hold bond->lock for read and 1164 * curr_slave_lock for write_bh. 1165 */ 1166 void bond_change_active_slave(struct bonding *bond, struct slave *new_active) 1167 { 1168 struct slave *old_active = bond->curr_active_slave; 1169 1170 if (old_active == new_active) 1171 return; 1172 1173 if (new_active) { 1174 new_active->jiffies = jiffies; 1175 1176 if (new_active->link == BOND_LINK_BACK) { 1177 if (USES_PRIMARY(bond->params.mode)) { 1178 pr_info("%s: making interface %s the new active one %d ms earlier.\n", 1179 bond->dev->name, new_active->dev->name, 1180 (bond->params.updelay - new_active->delay) * bond->params.miimon); 1181 } 1182 1183 new_active->delay = 0; 1184 new_active->link = BOND_LINK_UP; 1185 1186 if (bond->params.mode == BOND_MODE_8023AD) 1187 bond_3ad_handle_link_change(new_active, BOND_LINK_UP); 1188 1189 if (bond_is_lb(bond)) 1190 bond_alb_handle_link_change(bond, new_active, BOND_LINK_UP); 1191 } else { 1192 if (USES_PRIMARY(bond->params.mode)) { 1193 pr_info("%s: making interface %s the new active one.\n", 1194 bond->dev->name, new_active->dev->name); 1195 } 1196 } 1197 } 1198 1199 if (USES_PRIMARY(bond->params.mode)) 1200 bond_mc_swap(bond, new_active, old_active); 1201 1202 if (bond_is_lb(bond)) { 1203 bond_alb_handle_active_change(bond, new_active); 1204 if (old_active) 1205 bond_set_slave_inactive_flags(old_active); 1206 if (new_active) 1207 bond_set_slave_active_flags(new_active); 1208 } else { 1209 bond->curr_active_slave = new_active; 1210 } 1211 1212 if (bond->params.mode == BOND_MODE_ACTIVEBACKUP) { 1213 if (old_active) 1214 bond_set_slave_inactive_flags(old_active); 1215 1216 if (new_active) { 1217 bond_set_slave_active_flags(new_active); 1218 1219 if (bond->params.fail_over_mac) 1220 bond_do_fail_over_mac(bond, new_active, 1221 old_active); 1222 1223 bond->send_grat_arp = bond->params.num_grat_arp; 1224 bond_send_gratuitous_arp(bond); 1225 1226 bond->send_unsol_na = bond->params.num_unsol_na; 1227 bond_send_unsolicited_na(bond); 1228 1229 write_unlock_bh(&bond->curr_slave_lock); 1230 read_unlock(&bond->lock); 1231 1232 netdev_bonding_change(bond->dev, NETDEV_BONDING_FAILOVER); 1233 1234 read_lock(&bond->lock); 1235 write_lock_bh(&bond->curr_slave_lock); 1236 } 1237 } 1238 1239 /* resend IGMP joins since all were sent on curr_active_slave */ 1240 if (bond->params.mode == BOND_MODE_ROUNDROBIN) { 1241 bond_resend_igmp_join_requests(bond); 1242 } 1243 } 1244 1245 /** 1246 * bond_select_active_slave - select a new active slave, if needed 1247 * @bond: our bonding struct 1248 * 1249 * This functions should be called when one of the following occurs: 1250 * - The old curr_active_slave has been released or lost its link. 1251 * - The primary_slave has got its link back. 1252 * - A slave has got its link back and there's no old curr_active_slave. 1253 * 1254 * Caller must hold bond->lock for read and curr_slave_lock for write_bh. 1255 */ 1256 void bond_select_active_slave(struct bonding *bond) 1257 { 1258 struct slave *best_slave; 1259 int rv; 1260 1261 best_slave = bond_find_best_slave(bond); 1262 if (best_slave != bond->curr_active_slave) { 1263 bond_change_active_slave(bond, best_slave); 1264 rv = bond_set_carrier(bond); 1265 if (!rv) 1266 return; 1267 1268 if (netif_carrier_ok(bond->dev)) { 1269 pr_info("%s: first active interface up!\n", 1270 bond->dev->name); 1271 } else { 1272 pr_info("%s: now running without any active interface !\n", 1273 bond->dev->name); 1274 } 1275 } 1276 } 1277 1278 /*--------------------------- slave list handling ---------------------------*/ 1279 1280 /* 1281 * This function attaches the slave to the end of list. 1282 * 1283 * bond->lock held for writing by caller. 1284 */ 1285 static void bond_attach_slave(struct bonding *bond, struct slave *new_slave) 1286 { 1287 if (bond->first_slave == NULL) { /* attaching the first slave */ 1288 new_slave->next = new_slave; 1289 new_slave->prev = new_slave; 1290 bond->first_slave = new_slave; 1291 } else { 1292 new_slave->next = bond->first_slave; 1293 new_slave->prev = bond->first_slave->prev; 1294 new_slave->next->prev = new_slave; 1295 new_slave->prev->next = new_slave; 1296 } 1297 1298 bond->slave_cnt++; 1299 } 1300 1301 /* 1302 * This function detaches the slave from the list. 1303 * WARNING: no check is made to verify if the slave effectively 1304 * belongs to <bond>. 1305 * Nothing is freed on return, structures are just unchained. 1306 * If any slave pointer in bond was pointing to <slave>, 1307 * it should be changed by the calling function. 1308 * 1309 * bond->lock held for writing by caller. 1310 */ 1311 static void bond_detach_slave(struct bonding *bond, struct slave *slave) 1312 { 1313 if (slave->next) 1314 slave->next->prev = slave->prev; 1315 1316 if (slave->prev) 1317 slave->prev->next = slave->next; 1318 1319 if (bond->first_slave == slave) { /* slave is the first slave */ 1320 if (bond->slave_cnt > 1) { /* there are more slave */ 1321 bond->first_slave = slave->next; 1322 } else { 1323 bond->first_slave = NULL; /* slave was the last one */ 1324 } 1325 } 1326 1327 slave->next = NULL; 1328 slave->prev = NULL; 1329 bond->slave_cnt--; 1330 } 1331 1332 /*---------------------------------- IOCTL ----------------------------------*/ 1333 1334 static int bond_sethwaddr(struct net_device *bond_dev, 1335 struct net_device *slave_dev) 1336 { 1337 pr_debug("bond_dev=%p\n", bond_dev); 1338 pr_debug("slave_dev=%p\n", slave_dev); 1339 pr_debug("slave_dev->addr_len=%d\n", slave_dev->addr_len); 1340 memcpy(bond_dev->dev_addr, slave_dev->dev_addr, slave_dev->addr_len); 1341 return 0; 1342 } 1343 1344 #define BOND_VLAN_FEATURES \ 1345 (NETIF_F_VLAN_CHALLENGED | NETIF_F_HW_VLAN_RX | NETIF_F_HW_VLAN_TX | \ 1346 NETIF_F_HW_VLAN_FILTER) 1347 1348 /* 1349 * Compute the common dev->feature set available to all slaves. Some 1350 * feature bits are managed elsewhere, so preserve those feature bits 1351 * on the master device. 1352 */ 1353 static int bond_compute_features(struct bonding *bond) 1354 { 1355 struct slave *slave; 1356 struct net_device *bond_dev = bond->dev; 1357 unsigned long features = bond_dev->features; 1358 unsigned long vlan_features = 0; 1359 unsigned short max_hard_header_len = max((u16)ETH_HLEN, 1360 bond_dev->hard_header_len); 1361 int i; 1362 1363 features &= ~(NETIF_F_ALL_CSUM | BOND_VLAN_FEATURES); 1364 features |= NETIF_F_GSO_MASK | NETIF_F_NO_CSUM; 1365 1366 if (!bond->first_slave) 1367 goto done; 1368 1369 features &= ~NETIF_F_ONE_FOR_ALL; 1370 1371 vlan_features = bond->first_slave->dev->vlan_features; 1372 bond_for_each_slave(bond, slave, i) { 1373 features = netdev_increment_features(features, 1374 slave->dev->features, 1375 NETIF_F_ONE_FOR_ALL); 1376 vlan_features = netdev_increment_features(vlan_features, 1377 slave->dev->vlan_features, 1378 NETIF_F_ONE_FOR_ALL); 1379 if (slave->dev->hard_header_len > max_hard_header_len) 1380 max_hard_header_len = slave->dev->hard_header_len; 1381 } 1382 1383 done: 1384 features |= (bond_dev->features & BOND_VLAN_FEATURES); 1385 bond_dev->features = netdev_fix_features(features, NULL); 1386 bond_dev->vlan_features = netdev_fix_features(vlan_features, NULL); 1387 bond_dev->hard_header_len = max_hard_header_len; 1388 1389 return 0; 1390 } 1391 1392 static void bond_setup_by_slave(struct net_device *bond_dev, 1393 struct net_device *slave_dev) 1394 { 1395 struct bonding *bond = netdev_priv(bond_dev); 1396 1397 bond_dev->header_ops = slave_dev->header_ops; 1398 1399 bond_dev->type = slave_dev->type; 1400 bond_dev->hard_header_len = slave_dev->hard_header_len; 1401 bond_dev->addr_len = slave_dev->addr_len; 1402 1403 memcpy(bond_dev->broadcast, slave_dev->broadcast, 1404 slave_dev->addr_len); 1405 bond->setup_by_slave = 1; 1406 } 1407 1408 /* enslave device <slave> to bond device <master> */ 1409 int bond_enslave(struct net_device *bond_dev, struct net_device *slave_dev) 1410 { 1411 struct bonding *bond = netdev_priv(bond_dev); 1412 const struct net_device_ops *slave_ops = slave_dev->netdev_ops; 1413 struct slave *new_slave = NULL; 1414 struct dev_mc_list *dmi; 1415 struct sockaddr addr; 1416 int link_reporting; 1417 int old_features = bond_dev->features; 1418 int res = 0; 1419 1420 if (!bond->params.use_carrier && slave_dev->ethtool_ops == NULL && 1421 slave_ops->ndo_do_ioctl == NULL) { 1422 pr_warning("%s: Warning: no link monitoring support for %s\n", 1423 bond_dev->name, slave_dev->name); 1424 } 1425 1426 /* bond must be initialized by bond_open() before enslaving */ 1427 if (!(bond_dev->flags & IFF_UP)) { 1428 pr_warning("%s: master_dev is not up in bond_enslave\n", 1429 bond_dev->name); 1430 } 1431 1432 /* already enslaved */ 1433 if (slave_dev->flags & IFF_SLAVE) { 1434 pr_debug("Error, Device was already enslaved\n"); 1435 return -EBUSY; 1436 } 1437 1438 /* vlan challenged mutual exclusion */ 1439 /* no need to lock since we're protected by rtnl_lock */ 1440 if (slave_dev->features & NETIF_F_VLAN_CHALLENGED) { 1441 pr_debug("%s: NETIF_F_VLAN_CHALLENGED\n", slave_dev->name); 1442 if (!list_empty(&bond->vlan_list)) { 1443 pr_err("%s: Error: cannot enslave VLAN challenged slave %s on VLAN enabled bond %s\n", 1444 bond_dev->name, slave_dev->name, bond_dev->name); 1445 return -EPERM; 1446 } else { 1447 pr_warning("%s: Warning: enslaved VLAN challenged slave %s. Adding VLANs will be blocked as long as %s is part of bond %s\n", 1448 bond_dev->name, slave_dev->name, 1449 slave_dev->name, bond_dev->name); 1450 bond_dev->features |= NETIF_F_VLAN_CHALLENGED; 1451 } 1452 } else { 1453 pr_debug("%s: ! NETIF_F_VLAN_CHALLENGED\n", slave_dev->name); 1454 if (bond->slave_cnt == 0) { 1455 /* First slave, and it is not VLAN challenged, 1456 * so remove the block of adding VLANs over the bond. 1457 */ 1458 bond_dev->features &= ~NETIF_F_VLAN_CHALLENGED; 1459 } 1460 } 1461 1462 /* 1463 * Old ifenslave binaries are no longer supported. These can 1464 * be identified with moderate accuracy by the state of the slave: 1465 * the current ifenslave will set the interface down prior to 1466 * enslaving it; the old ifenslave will not. 1467 */ 1468 if ((slave_dev->flags & IFF_UP)) { 1469 pr_err("%s is up. This may be due to an out of date ifenslave.\n", 1470 slave_dev->name); 1471 res = -EPERM; 1472 goto err_undo_flags; 1473 } 1474 1475 /* set bonding device ether type by slave - bonding netdevices are 1476 * created with ether_setup, so when the slave type is not ARPHRD_ETHER 1477 * there is a need to override some of the type dependent attribs/funcs. 1478 * 1479 * bond ether type mutual exclusion - don't allow slaves of dissimilar 1480 * ether type (eg ARPHRD_ETHER and ARPHRD_INFINIBAND) share the same bond 1481 */ 1482 if (bond->slave_cnt == 0) { 1483 if (bond_dev->type != slave_dev->type) { 1484 pr_debug("%s: change device type from %d to %d\n", 1485 bond_dev->name, 1486 bond_dev->type, slave_dev->type); 1487 1488 netdev_bonding_change(bond_dev, NETDEV_BONDING_OLDTYPE); 1489 1490 if (slave_dev->type != ARPHRD_ETHER) 1491 bond_setup_by_slave(bond_dev, slave_dev); 1492 else 1493 ether_setup(bond_dev); 1494 1495 netdev_bonding_change(bond_dev, NETDEV_BONDING_NEWTYPE); 1496 } 1497 } else if (bond_dev->type != slave_dev->type) { 1498 pr_err("%s ether type (%d) is different from other slaves (%d), can not enslave it.\n", 1499 slave_dev->name, 1500 slave_dev->type, bond_dev->type); 1501 res = -EINVAL; 1502 goto err_undo_flags; 1503 } 1504 1505 if (slave_ops->ndo_set_mac_address == NULL) { 1506 if (bond->slave_cnt == 0) { 1507 pr_warning("%s: Warning: The first slave device specified does not support setting the MAC address. Setting fail_over_mac to active.", 1508 bond_dev->name); 1509 bond->params.fail_over_mac = BOND_FOM_ACTIVE; 1510 } else if (bond->params.fail_over_mac != BOND_FOM_ACTIVE) { 1511 pr_err("%s: Error: The slave device specified does not support setting the MAC address, but fail_over_mac is not set to active.\n", 1512 bond_dev->name); 1513 res = -EOPNOTSUPP; 1514 goto err_undo_flags; 1515 } 1516 } 1517 1518 new_slave = kzalloc(sizeof(struct slave), GFP_KERNEL); 1519 if (!new_slave) { 1520 res = -ENOMEM; 1521 goto err_undo_flags; 1522 } 1523 1524 /* save slave's original flags before calling 1525 * netdev_set_master and dev_open 1526 */ 1527 new_slave->original_flags = slave_dev->flags; 1528 1529 /* 1530 * Save slave's original ("permanent") mac address for modes 1531 * that need it, and for restoring it upon release, and then 1532 * set it to the master's address 1533 */ 1534 memcpy(new_slave->perm_hwaddr, slave_dev->dev_addr, ETH_ALEN); 1535 1536 if (!bond->params.fail_over_mac) { 1537 /* 1538 * Set slave to master's mac address. The application already 1539 * set the master's mac address to that of the first slave 1540 */ 1541 memcpy(addr.sa_data, bond_dev->dev_addr, bond_dev->addr_len); 1542 addr.sa_family = slave_dev->type; 1543 res = dev_set_mac_address(slave_dev, &addr); 1544 if (res) { 1545 pr_debug("Error %d calling set_mac_address\n", res); 1546 goto err_free; 1547 } 1548 } 1549 1550 res = netdev_set_master(slave_dev, bond_dev); 1551 if (res) { 1552 pr_debug("Error %d calling netdev_set_master\n", res); 1553 goto err_restore_mac; 1554 } 1555 /* open the slave since the application closed it */ 1556 res = dev_open(slave_dev); 1557 if (res) { 1558 pr_debug("Opening slave %s failed\n", slave_dev->name); 1559 goto err_unset_master; 1560 } 1561 1562 new_slave->dev = slave_dev; 1563 slave_dev->priv_flags |= IFF_BONDING; 1564 1565 if (bond_is_lb(bond)) { 1566 /* bond_alb_init_slave() must be called before all other stages since 1567 * it might fail and we do not want to have to undo everything 1568 */ 1569 res = bond_alb_init_slave(bond, new_slave); 1570 if (res) 1571 goto err_close; 1572 } 1573 1574 /* If the mode USES_PRIMARY, then the new slave gets the 1575 * master's promisc (and mc) settings only if it becomes the 1576 * curr_active_slave, and that is taken care of later when calling 1577 * bond_change_active() 1578 */ 1579 if (!USES_PRIMARY(bond->params.mode)) { 1580 /* set promiscuity level to new slave */ 1581 if (bond_dev->flags & IFF_PROMISC) { 1582 res = dev_set_promiscuity(slave_dev, 1); 1583 if (res) 1584 goto err_close; 1585 } 1586 1587 /* set allmulti level to new slave */ 1588 if (bond_dev->flags & IFF_ALLMULTI) { 1589 res = dev_set_allmulti(slave_dev, 1); 1590 if (res) 1591 goto err_close; 1592 } 1593 1594 netif_addr_lock_bh(bond_dev); 1595 /* upload master's mc_list to new slave */ 1596 for (dmi = bond_dev->mc_list; dmi; dmi = dmi->next) 1597 dev_mc_add(slave_dev, dmi->dmi_addr, 1598 dmi->dmi_addrlen, 0); 1599 netif_addr_unlock_bh(bond_dev); 1600 } 1601 1602 if (bond->params.mode == BOND_MODE_8023AD) { 1603 /* add lacpdu mc addr to mc list */ 1604 u8 lacpdu_multicast[ETH_ALEN] = MULTICAST_LACPDU_ADDR; 1605 1606 dev_mc_add(slave_dev, lacpdu_multicast, ETH_ALEN, 0); 1607 } 1608 1609 bond_add_vlans_on_slave(bond, slave_dev); 1610 1611 write_lock_bh(&bond->lock); 1612 1613 bond_attach_slave(bond, new_slave); 1614 1615 new_slave->delay = 0; 1616 new_slave->link_failure_count = 0; 1617 1618 bond_compute_features(bond); 1619 1620 write_unlock_bh(&bond->lock); 1621 1622 read_lock(&bond->lock); 1623 1624 new_slave->last_arp_rx = jiffies; 1625 1626 if (bond->params.miimon && !bond->params.use_carrier) { 1627 link_reporting = bond_check_dev_link(bond, slave_dev, 1); 1628 1629 if ((link_reporting == -1) && !bond->params.arp_interval) { 1630 /* 1631 * miimon is set but a bonded network driver 1632 * does not support ETHTOOL/MII and 1633 * arp_interval is not set. Note: if 1634 * use_carrier is enabled, we will never go 1635 * here (because netif_carrier is always 1636 * supported); thus, we don't need to change 1637 * the messages for netif_carrier. 1638 */ 1639 pr_warning("%s: Warning: MII and ETHTOOL support not available for interface %s, and arp_interval/arp_ip_target module parameters not specified, thus bonding will not detect link failures! see bonding.txt for details.\n", 1640 bond_dev->name, slave_dev->name); 1641 } else if (link_reporting == -1) { 1642 /* unable get link status using mii/ethtool */ 1643 pr_warning("%s: Warning: can't get link status from interface %s; the network driver associated with this interface does not support MII or ETHTOOL link status reporting, thus miimon has no effect on this interface.\n", 1644 bond_dev->name, slave_dev->name); 1645 } 1646 } 1647 1648 /* check for initial state */ 1649 if (!bond->params.miimon || 1650 (bond_check_dev_link(bond, slave_dev, 0) == BMSR_LSTATUS)) { 1651 if (bond->params.updelay) { 1652 pr_debug("Initial state of slave_dev is BOND_LINK_BACK\n"); 1653 new_slave->link = BOND_LINK_BACK; 1654 new_slave->delay = bond->params.updelay; 1655 } else { 1656 pr_debug("Initial state of slave_dev is BOND_LINK_UP\n"); 1657 new_slave->link = BOND_LINK_UP; 1658 } 1659 new_slave->jiffies = jiffies; 1660 } else { 1661 pr_debug("Initial state of slave_dev is BOND_LINK_DOWN\n"); 1662 new_slave->link = BOND_LINK_DOWN; 1663 } 1664 1665 if (bond_update_speed_duplex(new_slave) && 1666 (new_slave->link != BOND_LINK_DOWN)) { 1667 pr_warning("%s: Warning: failed to get speed and duplex from %s, assumed to be 100Mb/sec and Full.\n", 1668 bond_dev->name, new_slave->dev->name); 1669 1670 if (bond->params.mode == BOND_MODE_8023AD) { 1671 pr_warning("%s: Warning: Operation of 802.3ad mode requires ETHTOOL support in base driver for proper aggregator selection.\n", 1672 bond_dev->name); 1673 } 1674 } 1675 1676 if (USES_PRIMARY(bond->params.mode) && bond->params.primary[0]) { 1677 /* if there is a primary slave, remember it */ 1678 if (strcmp(bond->params.primary, new_slave->dev->name) == 0) { 1679 bond->primary_slave = new_slave; 1680 bond->force_primary = true; 1681 } 1682 } 1683 1684 write_lock_bh(&bond->curr_slave_lock); 1685 1686 switch (bond->params.mode) { 1687 case BOND_MODE_ACTIVEBACKUP: 1688 bond_set_slave_inactive_flags(new_slave); 1689 bond_select_active_slave(bond); 1690 break; 1691 case BOND_MODE_8023AD: 1692 /* in 802.3ad mode, the internal mechanism 1693 * will activate the slaves in the selected 1694 * aggregator 1695 */ 1696 bond_set_slave_inactive_flags(new_slave); 1697 /* if this is the first slave */ 1698 if (bond->slave_cnt == 1) { 1699 SLAVE_AD_INFO(new_slave).id = 1; 1700 /* Initialize AD with the number of times that the AD timer is called in 1 second 1701 * can be called only after the mac address of the bond is set 1702 */ 1703 bond_3ad_initialize(bond, 1000/AD_TIMER_INTERVAL, 1704 bond->params.lacp_fast); 1705 } else { 1706 SLAVE_AD_INFO(new_slave).id = 1707 SLAVE_AD_INFO(new_slave->prev).id + 1; 1708 } 1709 1710 bond_3ad_bind_slave(new_slave); 1711 break; 1712 case BOND_MODE_TLB: 1713 case BOND_MODE_ALB: 1714 new_slave->state = BOND_STATE_ACTIVE; 1715 bond_set_slave_inactive_flags(new_slave); 1716 bond_select_active_slave(bond); 1717 break; 1718 default: 1719 pr_debug("This slave is always active in trunk mode\n"); 1720 1721 /* always active in trunk mode */ 1722 new_slave->state = BOND_STATE_ACTIVE; 1723 1724 /* In trunking mode there is little meaning to curr_active_slave 1725 * anyway (it holds no special properties of the bond device), 1726 * so we can change it without calling change_active_interface() 1727 */ 1728 if (!bond->curr_active_slave) 1729 bond->curr_active_slave = new_slave; 1730 1731 break; 1732 } /* switch(bond_mode) */ 1733 1734 write_unlock_bh(&bond->curr_slave_lock); 1735 1736 bond_set_carrier(bond); 1737 1738 read_unlock(&bond->lock); 1739 1740 res = bond_create_slave_symlinks(bond_dev, slave_dev); 1741 if (res) 1742 goto err_close; 1743 1744 pr_info("%s: enslaving %s as a%s interface with a%s link.\n", 1745 bond_dev->name, slave_dev->name, 1746 new_slave->state == BOND_STATE_ACTIVE ? "n active" : " backup", 1747 new_slave->link != BOND_LINK_DOWN ? "n up" : " down"); 1748 1749 /* enslave is successful */ 1750 return 0; 1751 1752 /* Undo stages on error */ 1753 err_close: 1754 dev_close(slave_dev); 1755 1756 err_unset_master: 1757 netdev_set_master(slave_dev, NULL); 1758 1759 err_restore_mac: 1760 if (!bond->params.fail_over_mac) { 1761 /* XXX TODO - fom follow mode needs to change master's 1762 * MAC if this slave's MAC is in use by the bond, or at 1763 * least print a warning. 1764 */ 1765 memcpy(addr.sa_data, new_slave->perm_hwaddr, ETH_ALEN); 1766 addr.sa_family = slave_dev->type; 1767 dev_set_mac_address(slave_dev, &addr); 1768 } 1769 1770 err_free: 1771 kfree(new_slave); 1772 1773 err_undo_flags: 1774 bond_dev->features = old_features; 1775 1776 return res; 1777 } 1778 1779 /* 1780 * Try to release the slave device <slave> from the bond device <master> 1781 * It is legal to access curr_active_slave without a lock because all the function 1782 * is write-locked. 1783 * 1784 * The rules for slave state should be: 1785 * for Active/Backup: 1786 * Active stays on all backups go down 1787 * for Bonded connections: 1788 * The first up interface should be left on and all others downed. 1789 */ 1790 int bond_release(struct net_device *bond_dev, struct net_device *slave_dev) 1791 { 1792 struct bonding *bond = netdev_priv(bond_dev); 1793 struct slave *slave, *oldcurrent; 1794 struct sockaddr addr; 1795 1796 /* slave is not a slave or master is not master of this slave */ 1797 if (!(slave_dev->flags & IFF_SLAVE) || 1798 (slave_dev->master != bond_dev)) { 1799 pr_err("%s: Error: cannot release %s.\n", 1800 bond_dev->name, slave_dev->name); 1801 return -EINVAL; 1802 } 1803 1804 write_lock_bh(&bond->lock); 1805 1806 slave = bond_get_slave_by_dev(bond, slave_dev); 1807 if (!slave) { 1808 /* not a slave of this bond */ 1809 pr_info("%s: %s not enslaved\n", 1810 bond_dev->name, slave_dev->name); 1811 write_unlock_bh(&bond->lock); 1812 return -EINVAL; 1813 } 1814 1815 if (!bond->params.fail_over_mac) { 1816 if (!compare_ether_addr(bond_dev->dev_addr, slave->perm_hwaddr) && 1817 bond->slave_cnt > 1) 1818 pr_warning("%s: Warning: the permanent HWaddr of %s - %pM - is still in use by %s. Set the HWaddr of %s to a different address to avoid conflicts.\n", 1819 bond_dev->name, slave_dev->name, 1820 slave->perm_hwaddr, 1821 bond_dev->name, slave_dev->name); 1822 } 1823 1824 /* Inform AD package of unbinding of slave. */ 1825 if (bond->params.mode == BOND_MODE_8023AD) { 1826 /* must be called before the slave is 1827 * detached from the list 1828 */ 1829 bond_3ad_unbind_slave(slave); 1830 } 1831 1832 pr_info("%s: releasing %s interface %s\n", 1833 bond_dev->name, 1834 (slave->state == BOND_STATE_ACTIVE) ? "active" : "backup", 1835 slave_dev->name); 1836 1837 oldcurrent = bond->curr_active_slave; 1838 1839 bond->current_arp_slave = NULL; 1840 1841 /* release the slave from its bond */ 1842 bond_detach_slave(bond, slave); 1843 1844 bond_compute_features(bond); 1845 1846 if (bond->primary_slave == slave) 1847 bond->primary_slave = NULL; 1848 1849 if (oldcurrent == slave) 1850 bond_change_active_slave(bond, NULL); 1851 1852 if (bond_is_lb(bond)) { 1853 /* Must be called only after the slave has been 1854 * detached from the list and the curr_active_slave 1855 * has been cleared (if our_slave == old_current), 1856 * but before a new active slave is selected. 1857 */ 1858 write_unlock_bh(&bond->lock); 1859 bond_alb_deinit_slave(bond, slave); 1860 write_lock_bh(&bond->lock); 1861 } 1862 1863 if (oldcurrent == slave) { 1864 /* 1865 * Note that we hold RTNL over this sequence, so there 1866 * is no concern that another slave add/remove event 1867 * will interfere. 1868 */ 1869 write_unlock_bh(&bond->lock); 1870 read_lock(&bond->lock); 1871 write_lock_bh(&bond->curr_slave_lock); 1872 1873 bond_select_active_slave(bond); 1874 1875 write_unlock_bh(&bond->curr_slave_lock); 1876 read_unlock(&bond->lock); 1877 write_lock_bh(&bond->lock); 1878 } 1879 1880 if (bond->slave_cnt == 0) { 1881 bond_set_carrier(bond); 1882 1883 /* if the last slave was removed, zero the mac address 1884 * of the master so it will be set by the application 1885 * to the mac address of the first slave 1886 */ 1887 memset(bond_dev->dev_addr, 0, bond_dev->addr_len); 1888 1889 if (list_empty(&bond->vlan_list)) { 1890 bond_dev->features |= NETIF_F_VLAN_CHALLENGED; 1891 } else { 1892 pr_warning("%s: Warning: clearing HW address of %s while it still has VLANs.\n", 1893 bond_dev->name, bond_dev->name); 1894 pr_warning("%s: When re-adding slaves, make sure the bond's HW address matches its VLANs'.\n", 1895 bond_dev->name); 1896 } 1897 } else if ((bond_dev->features & NETIF_F_VLAN_CHALLENGED) && 1898 !bond_has_challenged_slaves(bond)) { 1899 pr_info("%s: last VLAN challenged slave %s left bond %s. VLAN blocking is removed\n", 1900 bond_dev->name, slave_dev->name, bond_dev->name); 1901 bond_dev->features &= ~NETIF_F_VLAN_CHALLENGED; 1902 } 1903 1904 write_unlock_bh(&bond->lock); 1905 1906 /* must do this from outside any spinlocks */ 1907 bond_destroy_slave_symlinks(bond_dev, slave_dev); 1908 1909 bond_del_vlans_from_slave(bond, slave_dev); 1910 1911 /* If the mode USES_PRIMARY, then we should only remove its 1912 * promisc and mc settings if it was the curr_active_slave, but that was 1913 * already taken care of above when we detached the slave 1914 */ 1915 if (!USES_PRIMARY(bond->params.mode)) { 1916 /* unset promiscuity level from slave */ 1917 if (bond_dev->flags & IFF_PROMISC) 1918 dev_set_promiscuity(slave_dev, -1); 1919 1920 /* unset allmulti level from slave */ 1921 if (bond_dev->flags & IFF_ALLMULTI) 1922 dev_set_allmulti(slave_dev, -1); 1923 1924 /* flush master's mc_list from slave */ 1925 netif_addr_lock_bh(bond_dev); 1926 bond_mc_list_flush(bond_dev, slave_dev); 1927 netif_addr_unlock_bh(bond_dev); 1928 } 1929 1930 netdev_set_master(slave_dev, NULL); 1931 1932 /* close slave before restoring its mac address */ 1933 dev_close(slave_dev); 1934 1935 if (bond->params.fail_over_mac != BOND_FOM_ACTIVE) { 1936 /* restore original ("permanent") mac address */ 1937 memcpy(addr.sa_data, slave->perm_hwaddr, ETH_ALEN); 1938 addr.sa_family = slave_dev->type; 1939 dev_set_mac_address(slave_dev, &addr); 1940 } 1941 1942 slave_dev->priv_flags &= ~(IFF_MASTER_8023AD | IFF_MASTER_ALB | 1943 IFF_SLAVE_INACTIVE | IFF_BONDING | 1944 IFF_SLAVE_NEEDARP); 1945 1946 kfree(slave); 1947 1948 return 0; /* deletion OK */ 1949 } 1950 1951 /* 1952 * First release a slave and than destroy the bond if no more slaves are left. 1953 * Must be under rtnl_lock when this function is called. 1954 */ 1955 int bond_release_and_destroy(struct net_device *bond_dev, 1956 struct net_device *slave_dev) 1957 { 1958 struct bonding *bond = netdev_priv(bond_dev); 1959 int ret; 1960 1961 ret = bond_release(bond_dev, slave_dev); 1962 if ((ret == 0) && (bond->slave_cnt == 0)) { 1963 pr_info("%s: destroying bond %s.\n", 1964 bond_dev->name, bond_dev->name); 1965 unregister_netdevice(bond_dev); 1966 } 1967 return ret; 1968 } 1969 1970 /* 1971 * This function releases all slaves. 1972 */ 1973 static int bond_release_all(struct net_device *bond_dev) 1974 { 1975 struct bonding *bond = netdev_priv(bond_dev); 1976 struct slave *slave; 1977 struct net_device *slave_dev; 1978 struct sockaddr addr; 1979 1980 write_lock_bh(&bond->lock); 1981 1982 netif_carrier_off(bond_dev); 1983 1984 if (bond->slave_cnt == 0) 1985 goto out; 1986 1987 bond->current_arp_slave = NULL; 1988 bond->primary_slave = NULL; 1989 bond_change_active_slave(bond, NULL); 1990 1991 while ((slave = bond->first_slave) != NULL) { 1992 /* Inform AD package of unbinding of slave 1993 * before slave is detached from the list. 1994 */ 1995 if (bond->params.mode == BOND_MODE_8023AD) 1996 bond_3ad_unbind_slave(slave); 1997 1998 slave_dev = slave->dev; 1999 bond_detach_slave(bond, slave); 2000 2001 /* now that the slave is detached, unlock and perform 2002 * all the undo steps that should not be called from 2003 * within a lock. 2004 */ 2005 write_unlock_bh(&bond->lock); 2006 2007 if (bond_is_lb(bond)) { 2008 /* must be called only after the slave 2009 * has been detached from the list 2010 */ 2011 bond_alb_deinit_slave(bond, slave); 2012 } 2013 2014 bond_compute_features(bond); 2015 2016 bond_destroy_slave_symlinks(bond_dev, slave_dev); 2017 bond_del_vlans_from_slave(bond, slave_dev); 2018 2019 /* If the mode USES_PRIMARY, then we should only remove its 2020 * promisc and mc settings if it was the curr_active_slave, but that was 2021 * already taken care of above when we detached the slave 2022 */ 2023 if (!USES_PRIMARY(bond->params.mode)) { 2024 /* unset promiscuity level from slave */ 2025 if (bond_dev->flags & IFF_PROMISC) 2026 dev_set_promiscuity(slave_dev, -1); 2027 2028 /* unset allmulti level from slave */ 2029 if (bond_dev->flags & IFF_ALLMULTI) 2030 dev_set_allmulti(slave_dev, -1); 2031 2032 /* flush master's mc_list from slave */ 2033 netif_addr_lock_bh(bond_dev); 2034 bond_mc_list_flush(bond_dev, slave_dev); 2035 netif_addr_unlock_bh(bond_dev); 2036 } 2037 2038 netdev_set_master(slave_dev, NULL); 2039 2040 /* close slave before restoring its mac address */ 2041 dev_close(slave_dev); 2042 2043 if (!bond->params.fail_over_mac) { 2044 /* restore original ("permanent") mac address*/ 2045 memcpy(addr.sa_data, slave->perm_hwaddr, ETH_ALEN); 2046 addr.sa_family = slave_dev->type; 2047 dev_set_mac_address(slave_dev, &addr); 2048 } 2049 2050 slave_dev->priv_flags &= ~(IFF_MASTER_8023AD | IFF_MASTER_ALB | 2051 IFF_SLAVE_INACTIVE); 2052 2053 kfree(slave); 2054 2055 /* re-acquire the lock before getting the next slave */ 2056 write_lock_bh(&bond->lock); 2057 } 2058 2059 /* zero the mac address of the master so it will be 2060 * set by the application to the mac address of the 2061 * first slave 2062 */ 2063 memset(bond_dev->dev_addr, 0, bond_dev->addr_len); 2064 2065 if (list_empty(&bond->vlan_list)) 2066 bond_dev->features |= NETIF_F_VLAN_CHALLENGED; 2067 else { 2068 pr_warning("%s: Warning: clearing HW address of %s while it still has VLANs.\n", 2069 bond_dev->name, bond_dev->name); 2070 pr_warning("%s: When re-adding slaves, make sure the bond's HW address matches its VLANs'.\n", 2071 bond_dev->name); 2072 } 2073 2074 pr_info("%s: released all slaves\n", bond_dev->name); 2075 2076 out: 2077 write_unlock_bh(&bond->lock); 2078 2079 return 0; 2080 } 2081 2082 /* 2083 * This function changes the active slave to slave <slave_dev>. 2084 * It returns -EINVAL in the following cases. 2085 * - <slave_dev> is not found in the list. 2086 * - There is not active slave now. 2087 * - <slave_dev> is already active. 2088 * - The link state of <slave_dev> is not BOND_LINK_UP. 2089 * - <slave_dev> is not running. 2090 * In these cases, this function does nothing. 2091 * In the other cases, current_slave pointer is changed and 0 is returned. 2092 */ 2093 static int bond_ioctl_change_active(struct net_device *bond_dev, struct net_device *slave_dev) 2094 { 2095 struct bonding *bond = netdev_priv(bond_dev); 2096 struct slave *old_active = NULL; 2097 struct slave *new_active = NULL; 2098 int res = 0; 2099 2100 if (!USES_PRIMARY(bond->params.mode)) 2101 return -EINVAL; 2102 2103 /* Verify that master_dev is indeed the master of slave_dev */ 2104 if (!(slave_dev->flags & IFF_SLAVE) || (slave_dev->master != bond_dev)) 2105 return -EINVAL; 2106 2107 read_lock(&bond->lock); 2108 2109 read_lock(&bond->curr_slave_lock); 2110 old_active = bond->curr_active_slave; 2111 read_unlock(&bond->curr_slave_lock); 2112 2113 new_active = bond_get_slave_by_dev(bond, slave_dev); 2114 2115 /* 2116 * Changing to the current active: do nothing; return success. 2117 */ 2118 if (new_active && (new_active == old_active)) { 2119 read_unlock(&bond->lock); 2120 return 0; 2121 } 2122 2123 if ((new_active) && 2124 (old_active) && 2125 (new_active->link == BOND_LINK_UP) && 2126 IS_UP(new_active->dev)) { 2127 write_lock_bh(&bond->curr_slave_lock); 2128 bond_change_active_slave(bond, new_active); 2129 write_unlock_bh(&bond->curr_slave_lock); 2130 } else 2131 res = -EINVAL; 2132 2133 read_unlock(&bond->lock); 2134 2135 return res; 2136 } 2137 2138 static int bond_info_query(struct net_device *bond_dev, struct ifbond *info) 2139 { 2140 struct bonding *bond = netdev_priv(bond_dev); 2141 2142 info->bond_mode = bond->params.mode; 2143 info->miimon = bond->params.miimon; 2144 2145 read_lock(&bond->lock); 2146 info->num_slaves = bond->slave_cnt; 2147 read_unlock(&bond->lock); 2148 2149 return 0; 2150 } 2151 2152 static int bond_slave_info_query(struct net_device *bond_dev, struct ifslave *info) 2153 { 2154 struct bonding *bond = netdev_priv(bond_dev); 2155 struct slave *slave; 2156 int i, res = -ENODEV; 2157 2158 read_lock(&bond->lock); 2159 2160 bond_for_each_slave(bond, slave, i) { 2161 if (i == (int)info->slave_id) { 2162 res = 0; 2163 strcpy(info->slave_name, slave->dev->name); 2164 info->link = slave->link; 2165 info->state = slave->state; 2166 info->link_failure_count = slave->link_failure_count; 2167 break; 2168 } 2169 } 2170 2171 read_unlock(&bond->lock); 2172 2173 return res; 2174 } 2175 2176 /*-------------------------------- Monitoring -------------------------------*/ 2177 2178 2179 static int bond_miimon_inspect(struct bonding *bond) 2180 { 2181 struct slave *slave; 2182 int i, link_state, commit = 0; 2183 bool ignore_updelay; 2184 2185 ignore_updelay = !bond->curr_active_slave ? true : false; 2186 2187 bond_for_each_slave(bond, slave, i) { 2188 slave->new_link = BOND_LINK_NOCHANGE; 2189 2190 link_state = bond_check_dev_link(bond, slave->dev, 0); 2191 2192 switch (slave->link) { 2193 case BOND_LINK_UP: 2194 if (link_state) 2195 continue; 2196 2197 slave->link = BOND_LINK_FAIL; 2198 slave->delay = bond->params.downdelay; 2199 if (slave->delay) { 2200 pr_info("%s: link status down for %sinterface %s, disabling it in %d ms.\n", 2201 bond->dev->name, 2202 (bond->params.mode == 2203 BOND_MODE_ACTIVEBACKUP) ? 2204 ((slave->state == BOND_STATE_ACTIVE) ? 2205 "active " : "backup ") : "", 2206 slave->dev->name, 2207 bond->params.downdelay * bond->params.miimon); 2208 } 2209 /*FALLTHRU*/ 2210 case BOND_LINK_FAIL: 2211 if (link_state) { 2212 /* 2213 * recovered before downdelay expired 2214 */ 2215 slave->link = BOND_LINK_UP; 2216 slave->jiffies = jiffies; 2217 pr_info("%s: link status up again after %d ms for interface %s.\n", 2218 bond->dev->name, 2219 (bond->params.downdelay - slave->delay) * 2220 bond->params.miimon, 2221 slave->dev->name); 2222 continue; 2223 } 2224 2225 if (slave->delay <= 0) { 2226 slave->new_link = BOND_LINK_DOWN; 2227 commit++; 2228 continue; 2229 } 2230 2231 slave->delay--; 2232 break; 2233 2234 case BOND_LINK_DOWN: 2235 if (!link_state) 2236 continue; 2237 2238 slave->link = BOND_LINK_BACK; 2239 slave->delay = bond->params.updelay; 2240 2241 if (slave->delay) { 2242 pr_info("%s: link status up for interface %s, enabling it in %d ms.\n", 2243 bond->dev->name, slave->dev->name, 2244 ignore_updelay ? 0 : 2245 bond->params.updelay * 2246 bond->params.miimon); 2247 } 2248 /*FALLTHRU*/ 2249 case BOND_LINK_BACK: 2250 if (!link_state) { 2251 slave->link = BOND_LINK_DOWN; 2252 pr_info("%s: link status down again after %d ms for interface %s.\n", 2253 bond->dev->name, 2254 (bond->params.updelay - slave->delay) * 2255 bond->params.miimon, 2256 slave->dev->name); 2257 2258 continue; 2259 } 2260 2261 if (ignore_updelay) 2262 slave->delay = 0; 2263 2264 if (slave->delay <= 0) { 2265 slave->new_link = BOND_LINK_UP; 2266 commit++; 2267 ignore_updelay = false; 2268 continue; 2269 } 2270 2271 slave->delay--; 2272 break; 2273 } 2274 } 2275 2276 return commit; 2277 } 2278 2279 static void bond_miimon_commit(struct bonding *bond) 2280 { 2281 struct slave *slave; 2282 int i; 2283 2284 bond_for_each_slave(bond, slave, i) { 2285 switch (slave->new_link) { 2286 case BOND_LINK_NOCHANGE: 2287 continue; 2288 2289 case BOND_LINK_UP: 2290 slave->link = BOND_LINK_UP; 2291 slave->jiffies = jiffies; 2292 2293 if (bond->params.mode == BOND_MODE_8023AD) { 2294 /* prevent it from being the active one */ 2295 slave->state = BOND_STATE_BACKUP; 2296 } else if (bond->params.mode != BOND_MODE_ACTIVEBACKUP) { 2297 /* make it immediately active */ 2298 slave->state = BOND_STATE_ACTIVE; 2299 } else if (slave != bond->primary_slave) { 2300 /* prevent it from being the active one */ 2301 slave->state = BOND_STATE_BACKUP; 2302 } 2303 2304 pr_info("%s: link status definitely up for interface %s.\n", 2305 bond->dev->name, slave->dev->name); 2306 2307 /* notify ad that the link status has changed */ 2308 if (bond->params.mode == BOND_MODE_8023AD) 2309 bond_3ad_handle_link_change(slave, BOND_LINK_UP); 2310 2311 if (bond_is_lb(bond)) 2312 bond_alb_handle_link_change(bond, slave, 2313 BOND_LINK_UP); 2314 2315 if (!bond->curr_active_slave || 2316 (slave == bond->primary_slave)) 2317 goto do_failover; 2318 2319 continue; 2320 2321 case BOND_LINK_DOWN: 2322 if (slave->link_failure_count < UINT_MAX) 2323 slave->link_failure_count++; 2324 2325 slave->link = BOND_LINK_DOWN; 2326 2327 if (bond->params.mode == BOND_MODE_ACTIVEBACKUP || 2328 bond->params.mode == BOND_MODE_8023AD) 2329 bond_set_slave_inactive_flags(slave); 2330 2331 pr_info("%s: link status definitely down for interface %s, disabling it\n", 2332 bond->dev->name, slave->dev->name); 2333 2334 if (bond->params.mode == BOND_MODE_8023AD) 2335 bond_3ad_handle_link_change(slave, 2336 BOND_LINK_DOWN); 2337 2338 if (bond_is_lb(bond)) 2339 bond_alb_handle_link_change(bond, slave, 2340 BOND_LINK_DOWN); 2341 2342 if (slave == bond->curr_active_slave) 2343 goto do_failover; 2344 2345 continue; 2346 2347 default: 2348 pr_err("%s: invalid new link %d on slave %s\n", 2349 bond->dev->name, slave->new_link, 2350 slave->dev->name); 2351 slave->new_link = BOND_LINK_NOCHANGE; 2352 2353 continue; 2354 } 2355 2356 do_failover: 2357 ASSERT_RTNL(); 2358 write_lock_bh(&bond->curr_slave_lock); 2359 bond_select_active_slave(bond); 2360 write_unlock_bh(&bond->curr_slave_lock); 2361 } 2362 2363 bond_set_carrier(bond); 2364 } 2365 2366 /* 2367 * bond_mii_monitor 2368 * 2369 * Really a wrapper that splits the mii monitor into two phases: an 2370 * inspection, then (if inspection indicates something needs to be done) 2371 * an acquisition of appropriate locks followed by a commit phase to 2372 * implement whatever link state changes are indicated. 2373 */ 2374 void bond_mii_monitor(struct work_struct *work) 2375 { 2376 struct bonding *bond = container_of(work, struct bonding, 2377 mii_work.work); 2378 2379 read_lock(&bond->lock); 2380 if (bond->kill_timers) 2381 goto out; 2382 2383 if (bond->slave_cnt == 0) 2384 goto re_arm; 2385 2386 if (bond->send_grat_arp) { 2387 read_lock(&bond->curr_slave_lock); 2388 bond_send_gratuitous_arp(bond); 2389 read_unlock(&bond->curr_slave_lock); 2390 } 2391 2392 if (bond->send_unsol_na) { 2393 read_lock(&bond->curr_slave_lock); 2394 bond_send_unsolicited_na(bond); 2395 read_unlock(&bond->curr_slave_lock); 2396 } 2397 2398 if (bond_miimon_inspect(bond)) { 2399 read_unlock(&bond->lock); 2400 rtnl_lock(); 2401 read_lock(&bond->lock); 2402 2403 bond_miimon_commit(bond); 2404 2405 read_unlock(&bond->lock); 2406 rtnl_unlock(); /* might sleep, hold no other locks */ 2407 read_lock(&bond->lock); 2408 } 2409 2410 re_arm: 2411 if (bond->params.miimon) 2412 queue_delayed_work(bond->wq, &bond->mii_work, 2413 msecs_to_jiffies(bond->params.miimon)); 2414 out: 2415 read_unlock(&bond->lock); 2416 } 2417 2418 static __be32 bond_glean_dev_ip(struct net_device *dev) 2419 { 2420 struct in_device *idev; 2421 struct in_ifaddr *ifa; 2422 __be32 addr = 0; 2423 2424 if (!dev) 2425 return 0; 2426 2427 rcu_read_lock(); 2428 idev = __in_dev_get_rcu(dev); 2429 if (!idev) 2430 goto out; 2431 2432 ifa = idev->ifa_list; 2433 if (!ifa) 2434 goto out; 2435 2436 addr = ifa->ifa_local; 2437 out: 2438 rcu_read_unlock(); 2439 return addr; 2440 } 2441 2442 static int bond_has_this_ip(struct bonding *bond, __be32 ip) 2443 { 2444 struct vlan_entry *vlan; 2445 2446 if (ip == bond->master_ip) 2447 return 1; 2448 2449 list_for_each_entry(vlan, &bond->vlan_list, vlan_list) { 2450 if (ip == vlan->vlan_ip) 2451 return 1; 2452 } 2453 2454 return 0; 2455 } 2456 2457 /* 2458 * We go to the (large) trouble of VLAN tagging ARP frames because 2459 * switches in VLAN mode (especially if ports are configured as 2460 * "native" to a VLAN) might not pass non-tagged frames. 2461 */ 2462 static void bond_arp_send(struct net_device *slave_dev, int arp_op, __be32 dest_ip, __be32 src_ip, unsigned short vlan_id) 2463 { 2464 struct sk_buff *skb; 2465 2466 pr_debug("arp %d on slave %s: dst %x src %x vid %d\n", arp_op, 2467 slave_dev->name, dest_ip, src_ip, vlan_id); 2468 2469 skb = arp_create(arp_op, ETH_P_ARP, dest_ip, slave_dev, src_ip, 2470 NULL, slave_dev->dev_addr, NULL); 2471 2472 if (!skb) { 2473 pr_err("ARP packet allocation failed\n"); 2474 return; 2475 } 2476 if (vlan_id) { 2477 skb = vlan_put_tag(skb, vlan_id); 2478 if (!skb) { 2479 pr_err("failed to insert VLAN tag\n"); 2480 return; 2481 } 2482 } 2483 arp_xmit(skb); 2484 } 2485 2486 2487 static void bond_arp_send_all(struct bonding *bond, struct slave *slave) 2488 { 2489 int i, vlan_id, rv; 2490 __be32 *targets = bond->params.arp_targets; 2491 struct vlan_entry *vlan; 2492 struct net_device *vlan_dev; 2493 struct flowi fl; 2494 struct rtable *rt; 2495 2496 for (i = 0; (i < BOND_MAX_ARP_TARGETS); i++) { 2497 if (!targets[i]) 2498 break; 2499 pr_debug("basa: target %x\n", targets[i]); 2500 if (list_empty(&bond->vlan_list)) { 2501 pr_debug("basa: empty vlan: arp_send\n"); 2502 bond_arp_send(slave->dev, ARPOP_REQUEST, targets[i], 2503 bond->master_ip, 0); 2504 continue; 2505 } 2506 2507 /* 2508 * If VLANs are configured, we do a route lookup to 2509 * determine which VLAN interface would be used, so we 2510 * can tag the ARP with the proper VLAN tag. 2511 */ 2512 memset(&fl, 0, sizeof(fl)); 2513 fl.fl4_dst = targets[i]; 2514 fl.fl4_tos = RTO_ONLINK; 2515 2516 rv = ip_route_output_key(dev_net(bond->dev), &rt, &fl); 2517 if (rv) { 2518 if (net_ratelimit()) { 2519 pr_warning("%s: no route to arp_ip_target %pI4\n", 2520 bond->dev->name, &fl.fl4_dst); 2521 } 2522 continue; 2523 } 2524 2525 /* 2526 * This target is not on a VLAN 2527 */ 2528 if (rt->u.dst.dev == bond->dev) { 2529 ip_rt_put(rt); 2530 pr_debug("basa: rtdev == bond->dev: arp_send\n"); 2531 bond_arp_send(slave->dev, ARPOP_REQUEST, targets[i], 2532 bond->master_ip, 0); 2533 continue; 2534 } 2535 2536 vlan_id = 0; 2537 list_for_each_entry(vlan, &bond->vlan_list, vlan_list) { 2538 vlan_dev = vlan_group_get_device(bond->vlgrp, vlan->vlan_id); 2539 if (vlan_dev == rt->u.dst.dev) { 2540 vlan_id = vlan->vlan_id; 2541 pr_debug("basa: vlan match on %s %d\n", 2542 vlan_dev->name, vlan_id); 2543 break; 2544 } 2545 } 2546 2547 if (vlan_id) { 2548 ip_rt_put(rt); 2549 bond_arp_send(slave->dev, ARPOP_REQUEST, targets[i], 2550 vlan->vlan_ip, vlan_id); 2551 continue; 2552 } 2553 2554 if (net_ratelimit()) { 2555 pr_warning("%s: no path to arp_ip_target %pI4 via rt.dev %s\n", 2556 bond->dev->name, &fl.fl4_dst, 2557 rt->u.dst.dev ? rt->u.dst.dev->name : "NULL"); 2558 } 2559 ip_rt_put(rt); 2560 } 2561 } 2562 2563 /* 2564 * Kick out a gratuitous ARP for an IP on the bonding master plus one 2565 * for each VLAN above us. 2566 * 2567 * Caller must hold curr_slave_lock for read or better 2568 */ 2569 static void bond_send_gratuitous_arp(struct bonding *bond) 2570 { 2571 struct slave *slave = bond->curr_active_slave; 2572 struct vlan_entry *vlan; 2573 struct net_device *vlan_dev; 2574 2575 pr_debug("bond_send_grat_arp: bond %s slave %s\n", 2576 bond->dev->name, slave ? slave->dev->name : "NULL"); 2577 2578 if (!slave || !bond->send_grat_arp || 2579 test_bit(__LINK_STATE_LINKWATCH_PENDING, &slave->dev->state)) 2580 return; 2581 2582 bond->send_grat_arp--; 2583 2584 if (bond->master_ip) { 2585 bond_arp_send(slave->dev, ARPOP_REPLY, bond->master_ip, 2586 bond->master_ip, 0); 2587 } 2588 2589 list_for_each_entry(vlan, &bond->vlan_list, vlan_list) { 2590 vlan_dev = vlan_group_get_device(bond->vlgrp, vlan->vlan_id); 2591 if (vlan->vlan_ip) { 2592 bond_arp_send(slave->dev, ARPOP_REPLY, vlan->vlan_ip, 2593 vlan->vlan_ip, vlan->vlan_id); 2594 } 2595 } 2596 } 2597 2598 static void bond_validate_arp(struct bonding *bond, struct slave *slave, __be32 sip, __be32 tip) 2599 { 2600 int i; 2601 __be32 *targets = bond->params.arp_targets; 2602 2603 for (i = 0; (i < BOND_MAX_ARP_TARGETS) && targets[i]; i++) { 2604 pr_debug("bva: sip %pI4 tip %pI4 t[%d] %pI4 bhti(tip) %d\n", 2605 &sip, &tip, i, &targets[i], 2606 bond_has_this_ip(bond, tip)); 2607 if (sip == targets[i]) { 2608 if (bond_has_this_ip(bond, tip)) 2609 slave->last_arp_rx = jiffies; 2610 return; 2611 } 2612 } 2613 } 2614 2615 static int bond_arp_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt, struct net_device *orig_dev) 2616 { 2617 struct arphdr *arp; 2618 struct slave *slave; 2619 struct bonding *bond; 2620 unsigned char *arp_ptr; 2621 __be32 sip, tip; 2622 2623 if (dev->priv_flags & IFF_802_1Q_VLAN) { 2624 /* 2625 * When using VLANS and bonding, dev and oriv_dev may be 2626 * incorrect if the physical interface supports VLAN 2627 * acceleration. With this change ARP validation now 2628 * works for hosts only reachable on the VLAN interface. 2629 */ 2630 dev = vlan_dev_real_dev(dev); 2631 orig_dev = dev_get_by_index_rcu(dev_net(skb->dev),skb->skb_iif); 2632 } 2633 2634 if (!(dev->priv_flags & IFF_BONDING) || !(dev->flags & IFF_MASTER)) 2635 goto out; 2636 2637 bond = netdev_priv(dev); 2638 read_lock(&bond->lock); 2639 2640 pr_debug("bond_arp_rcv: bond %s skb->dev %s orig_dev %s\n", 2641 bond->dev->name, skb->dev ? skb->dev->name : "NULL", 2642 orig_dev ? orig_dev->name : "NULL"); 2643 2644 slave = bond_get_slave_by_dev(bond, orig_dev); 2645 if (!slave || !slave_do_arp_validate(bond, slave)) 2646 goto out_unlock; 2647 2648 if (!pskb_may_pull(skb, arp_hdr_len(dev))) 2649 goto out_unlock; 2650 2651 arp = arp_hdr(skb); 2652 if (arp->ar_hln != dev->addr_len || 2653 skb->pkt_type == PACKET_OTHERHOST || 2654 skb->pkt_type == PACKET_LOOPBACK || 2655 arp->ar_hrd != htons(ARPHRD_ETHER) || 2656 arp->ar_pro != htons(ETH_P_IP) || 2657 arp->ar_pln != 4) 2658 goto out_unlock; 2659 2660 arp_ptr = (unsigned char *)(arp + 1); 2661 arp_ptr += dev->addr_len; 2662 memcpy(&sip, arp_ptr, 4); 2663 arp_ptr += 4 + dev->addr_len; 2664 memcpy(&tip, arp_ptr, 4); 2665 2666 pr_debug("bond_arp_rcv: %s %s/%d av %d sv %d sip %pI4 tip %pI4\n", 2667 bond->dev->name, slave->dev->name, slave->state, 2668 bond->params.arp_validate, slave_do_arp_validate(bond, slave), 2669 &sip, &tip); 2670 2671 /* 2672 * Backup slaves won't see the ARP reply, but do come through 2673 * here for each ARP probe (so we swap the sip/tip to validate 2674 * the probe). In a "redundant switch, common router" type of 2675 * configuration, the ARP probe will (hopefully) travel from 2676 * the active, through one switch, the router, then the other 2677 * switch before reaching the backup. 2678 */ 2679 if (slave->state == BOND_STATE_ACTIVE) 2680 bond_validate_arp(bond, slave, sip, tip); 2681 else 2682 bond_validate_arp(bond, slave, tip, sip); 2683 2684 out_unlock: 2685 read_unlock(&bond->lock); 2686 out: 2687 dev_kfree_skb(skb); 2688 return NET_RX_SUCCESS; 2689 } 2690 2691 /* 2692 * this function is called regularly to monitor each slave's link 2693 * ensuring that traffic is being sent and received when arp monitoring 2694 * is used in load-balancing mode. if the adapter has been dormant, then an 2695 * arp is transmitted to generate traffic. see activebackup_arp_monitor for 2696 * arp monitoring in active backup mode. 2697 */ 2698 void bond_loadbalance_arp_mon(struct work_struct *work) 2699 { 2700 struct bonding *bond = container_of(work, struct bonding, 2701 arp_work.work); 2702 struct slave *slave, *oldcurrent; 2703 int do_failover = 0; 2704 int delta_in_ticks; 2705 int i; 2706 2707 read_lock(&bond->lock); 2708 2709 delta_in_ticks = msecs_to_jiffies(bond->params.arp_interval); 2710 2711 if (bond->kill_timers) 2712 goto out; 2713 2714 if (bond->slave_cnt == 0) 2715 goto re_arm; 2716 2717 read_lock(&bond->curr_slave_lock); 2718 oldcurrent = bond->curr_active_slave; 2719 read_unlock(&bond->curr_slave_lock); 2720 2721 /* see if any of the previous devices are up now (i.e. they have 2722 * xmt and rcv traffic). the curr_active_slave does not come into 2723 * the picture unless it is null. also, slave->jiffies is not needed 2724 * here because we send an arp on each slave and give a slave as 2725 * long as it needs to get the tx/rx within the delta. 2726 * TODO: what about up/down delay in arp mode? it wasn't here before 2727 * so it can wait 2728 */ 2729 bond_for_each_slave(bond, slave, i) { 2730 if (slave->link != BOND_LINK_UP) { 2731 if (time_before_eq(jiffies, dev_trans_start(slave->dev) + delta_in_ticks) && 2732 time_before_eq(jiffies, slave->dev->last_rx + delta_in_ticks)) { 2733 2734 slave->link = BOND_LINK_UP; 2735 slave->state = BOND_STATE_ACTIVE; 2736 2737 /* primary_slave has no meaning in round-robin 2738 * mode. the window of a slave being up and 2739 * curr_active_slave being null after enslaving 2740 * is closed. 2741 */ 2742 if (!oldcurrent) { 2743 pr_info("%s: link status definitely up for interface %s, ", 2744 bond->dev->name, 2745 slave->dev->name); 2746 do_failover = 1; 2747 } else { 2748 pr_info("%s: interface %s is now up\n", 2749 bond->dev->name, 2750 slave->dev->name); 2751 } 2752 } 2753 } else { 2754 /* slave->link == BOND_LINK_UP */ 2755 2756 /* not all switches will respond to an arp request 2757 * when the source ip is 0, so don't take the link down 2758 * if we don't know our ip yet 2759 */ 2760 if (time_after_eq(jiffies, dev_trans_start(slave->dev) + 2*delta_in_ticks) || 2761 (time_after_eq(jiffies, slave->dev->last_rx + 2*delta_in_ticks))) { 2762 2763 slave->link = BOND_LINK_DOWN; 2764 slave->state = BOND_STATE_BACKUP; 2765 2766 if (slave->link_failure_count < UINT_MAX) 2767 slave->link_failure_count++; 2768 2769 pr_info("%s: interface %s is now down.\n", 2770 bond->dev->name, 2771 slave->dev->name); 2772 2773 if (slave == oldcurrent) 2774 do_failover = 1; 2775 } 2776 } 2777 2778 /* note: if switch is in round-robin mode, all links 2779 * must tx arp to ensure all links rx an arp - otherwise 2780 * links may oscillate or not come up at all; if switch is 2781 * in something like xor mode, there is nothing we can 2782 * do - all replies will be rx'ed on same link causing slaves 2783 * to be unstable during low/no traffic periods 2784 */ 2785 if (IS_UP(slave->dev)) 2786 bond_arp_send_all(bond, slave); 2787 } 2788 2789 if (do_failover) { 2790 write_lock_bh(&bond->curr_slave_lock); 2791 2792 bond_select_active_slave(bond); 2793 2794 write_unlock_bh(&bond->curr_slave_lock); 2795 } 2796 2797 re_arm: 2798 if (bond->params.arp_interval) 2799 queue_delayed_work(bond->wq, &bond->arp_work, delta_in_ticks); 2800 out: 2801 read_unlock(&bond->lock); 2802 } 2803 2804 /* 2805 * Called to inspect slaves for active-backup mode ARP monitor link state 2806 * changes. Sets new_link in slaves to specify what action should take 2807 * place for the slave. Returns 0 if no changes are found, >0 if changes 2808 * to link states must be committed. 2809 * 2810 * Called with bond->lock held for read. 2811 */ 2812 static int bond_ab_arp_inspect(struct bonding *bond, int delta_in_ticks) 2813 { 2814 struct slave *slave; 2815 int i, commit = 0; 2816 2817 bond_for_each_slave(bond, slave, i) { 2818 slave->new_link = BOND_LINK_NOCHANGE; 2819 2820 if (slave->link != BOND_LINK_UP) { 2821 if (time_before_eq(jiffies, slave_last_rx(bond, slave) + 2822 delta_in_ticks)) { 2823 slave->new_link = BOND_LINK_UP; 2824 commit++; 2825 } 2826 2827 continue; 2828 } 2829 2830 /* 2831 * Give slaves 2*delta after being enslaved or made 2832 * active. This avoids bouncing, as the last receive 2833 * times need a full ARP monitor cycle to be updated. 2834 */ 2835 if (!time_after_eq(jiffies, slave->jiffies + 2836 2 * delta_in_ticks)) 2837 continue; 2838 2839 /* 2840 * Backup slave is down if: 2841 * - No current_arp_slave AND 2842 * - more than 3*delta since last receive AND 2843 * - the bond has an IP address 2844 * 2845 * Note: a non-null current_arp_slave indicates 2846 * the curr_active_slave went down and we are 2847 * searching for a new one; under this condition 2848 * we only take the curr_active_slave down - this 2849 * gives each slave a chance to tx/rx traffic 2850 * before being taken out 2851 */ 2852 if (slave->state == BOND_STATE_BACKUP && 2853 !bond->current_arp_slave && 2854 time_after(jiffies, slave_last_rx(bond, slave) + 2855 3 * delta_in_ticks)) { 2856 slave->new_link = BOND_LINK_DOWN; 2857 commit++; 2858 } 2859 2860 /* 2861 * Active slave is down if: 2862 * - more than 2*delta since transmitting OR 2863 * - (more than 2*delta since receive AND 2864 * the bond has an IP address) 2865 */ 2866 if ((slave->state == BOND_STATE_ACTIVE) && 2867 (time_after_eq(jiffies, dev_trans_start(slave->dev) + 2868 2 * delta_in_ticks) || 2869 (time_after_eq(jiffies, slave_last_rx(bond, slave) 2870 + 2 * delta_in_ticks)))) { 2871 slave->new_link = BOND_LINK_DOWN; 2872 commit++; 2873 } 2874 } 2875 2876 return commit; 2877 } 2878 2879 /* 2880 * Called to commit link state changes noted by inspection step of 2881 * active-backup mode ARP monitor. 2882 * 2883 * Called with RTNL and bond->lock for read. 2884 */ 2885 static void bond_ab_arp_commit(struct bonding *bond, int delta_in_ticks) 2886 { 2887 struct slave *slave; 2888 int i; 2889 2890 bond_for_each_slave(bond, slave, i) { 2891 switch (slave->new_link) { 2892 case BOND_LINK_NOCHANGE: 2893 continue; 2894 2895 case BOND_LINK_UP: 2896 if ((!bond->curr_active_slave && 2897 time_before_eq(jiffies, 2898 dev_trans_start(slave->dev) + 2899 delta_in_ticks)) || 2900 bond->curr_active_slave != slave) { 2901 slave->link = BOND_LINK_UP; 2902 bond->current_arp_slave = NULL; 2903 2904 pr_info("%s: link status definitely up for interface %s.\n", 2905 bond->dev->name, slave->dev->name); 2906 2907 if (!bond->curr_active_slave || 2908 (slave == bond->primary_slave)) 2909 goto do_failover; 2910 2911 } 2912 2913 continue; 2914 2915 case BOND_LINK_DOWN: 2916 if (slave->link_failure_count < UINT_MAX) 2917 slave->link_failure_count++; 2918 2919 slave->link = BOND_LINK_DOWN; 2920 bond_set_slave_inactive_flags(slave); 2921 2922 pr_info("%s: link status definitely down for interface %s, disabling it\n", 2923 bond->dev->name, slave->dev->name); 2924 2925 if (slave == bond->curr_active_slave) { 2926 bond->current_arp_slave = NULL; 2927 goto do_failover; 2928 } 2929 2930 continue; 2931 2932 default: 2933 pr_err("%s: impossible: new_link %d on slave %s\n", 2934 bond->dev->name, slave->new_link, 2935 slave->dev->name); 2936 continue; 2937 } 2938 2939 do_failover: 2940 ASSERT_RTNL(); 2941 write_lock_bh(&bond->curr_slave_lock); 2942 bond_select_active_slave(bond); 2943 write_unlock_bh(&bond->curr_slave_lock); 2944 } 2945 2946 bond_set_carrier(bond); 2947 } 2948 2949 /* 2950 * Send ARP probes for active-backup mode ARP monitor. 2951 * 2952 * Called with bond->lock held for read. 2953 */ 2954 static void bond_ab_arp_probe(struct bonding *bond) 2955 { 2956 struct slave *slave; 2957 int i; 2958 2959 read_lock(&bond->curr_slave_lock); 2960 2961 if (bond->current_arp_slave && bond->curr_active_slave) 2962 pr_info("PROBE: c_arp %s && cas %s BAD\n", 2963 bond->current_arp_slave->dev->name, 2964 bond->curr_active_slave->dev->name); 2965 2966 if (bond->curr_active_slave) { 2967 bond_arp_send_all(bond, bond->curr_active_slave); 2968 read_unlock(&bond->curr_slave_lock); 2969 return; 2970 } 2971 2972 read_unlock(&bond->curr_slave_lock); 2973 2974 /* if we don't have a curr_active_slave, search for the next available 2975 * backup slave from the current_arp_slave and make it the candidate 2976 * for becoming the curr_active_slave 2977 */ 2978 2979 if (!bond->current_arp_slave) { 2980 bond->current_arp_slave = bond->first_slave; 2981 if (!bond->current_arp_slave) 2982 return; 2983 } 2984 2985 bond_set_slave_inactive_flags(bond->current_arp_slave); 2986 2987 /* search for next candidate */ 2988 bond_for_each_slave_from(bond, slave, i, bond->current_arp_slave->next) { 2989 if (IS_UP(slave->dev)) { 2990 slave->link = BOND_LINK_BACK; 2991 bond_set_slave_active_flags(slave); 2992 bond_arp_send_all(bond, slave); 2993 slave->jiffies = jiffies; 2994 bond->current_arp_slave = slave; 2995 break; 2996 } 2997 2998 /* if the link state is up at this point, we 2999 * mark it down - this can happen if we have 3000 * simultaneous link failures and 3001 * reselect_active_interface doesn't make this 3002 * one the current slave so it is still marked 3003 * up when it is actually down 3004 */ 3005 if (slave->link == BOND_LINK_UP) { 3006 slave->link = BOND_LINK_DOWN; 3007 if (slave->link_failure_count < UINT_MAX) 3008 slave->link_failure_count++; 3009 3010 bond_set_slave_inactive_flags(slave); 3011 3012 pr_info("%s: backup interface %s is now down.\n", 3013 bond->dev->name, slave->dev->name); 3014 } 3015 } 3016 } 3017 3018 void bond_activebackup_arp_mon(struct work_struct *work) 3019 { 3020 struct bonding *bond = container_of(work, struct bonding, 3021 arp_work.work); 3022 int delta_in_ticks; 3023 3024 read_lock(&bond->lock); 3025 3026 if (bond->kill_timers) 3027 goto out; 3028 3029 delta_in_ticks = msecs_to_jiffies(bond->params.arp_interval); 3030 3031 if (bond->slave_cnt == 0) 3032 goto re_arm; 3033 3034 if (bond->send_grat_arp) { 3035 read_lock(&bond->curr_slave_lock); 3036 bond_send_gratuitous_arp(bond); 3037 read_unlock(&bond->curr_slave_lock); 3038 } 3039 3040 if (bond->send_unsol_na) { 3041 read_lock(&bond->curr_slave_lock); 3042 bond_send_unsolicited_na(bond); 3043 read_unlock(&bond->curr_slave_lock); 3044 } 3045 3046 if (bond_ab_arp_inspect(bond, delta_in_ticks)) { 3047 read_unlock(&bond->lock); 3048 rtnl_lock(); 3049 read_lock(&bond->lock); 3050 3051 bond_ab_arp_commit(bond, delta_in_ticks); 3052 3053 read_unlock(&bond->lock); 3054 rtnl_unlock(); 3055 read_lock(&bond->lock); 3056 } 3057 3058 bond_ab_arp_probe(bond); 3059 3060 re_arm: 3061 if (bond->params.arp_interval) 3062 queue_delayed_work(bond->wq, &bond->arp_work, delta_in_ticks); 3063 out: 3064 read_unlock(&bond->lock); 3065 } 3066 3067 /*------------------------------ proc/seq_file-------------------------------*/ 3068 3069 #ifdef CONFIG_PROC_FS 3070 3071 static void *bond_info_seq_start(struct seq_file *seq, loff_t *pos) 3072 __acquires(&dev_base_lock) 3073 __acquires(&bond->lock) 3074 { 3075 struct bonding *bond = seq->private; 3076 loff_t off = 0; 3077 struct slave *slave; 3078 int i; 3079 3080 /* make sure the bond won't be taken away */ 3081 read_lock(&dev_base_lock); 3082 read_lock(&bond->lock); 3083 3084 if (*pos == 0) 3085 return SEQ_START_TOKEN; 3086 3087 bond_for_each_slave(bond, slave, i) { 3088 if (++off == *pos) 3089 return slave; 3090 } 3091 3092 return NULL; 3093 } 3094 3095 static void *bond_info_seq_next(struct seq_file *seq, void *v, loff_t *pos) 3096 { 3097 struct bonding *bond = seq->private; 3098 struct slave *slave = v; 3099 3100 ++*pos; 3101 if (v == SEQ_START_TOKEN) 3102 return bond->first_slave; 3103 3104 slave = slave->next; 3105 3106 return (slave == bond->first_slave) ? NULL : slave; 3107 } 3108 3109 static void bond_info_seq_stop(struct seq_file *seq, void *v) 3110 __releases(&bond->lock) 3111 __releases(&dev_base_lock) 3112 { 3113 struct bonding *bond = seq->private; 3114 3115 read_unlock(&bond->lock); 3116 read_unlock(&dev_base_lock); 3117 } 3118 3119 static void bond_info_show_master(struct seq_file *seq) 3120 { 3121 struct bonding *bond = seq->private; 3122 struct slave *curr; 3123 int i; 3124 3125 read_lock(&bond->curr_slave_lock); 3126 curr = bond->curr_active_slave; 3127 read_unlock(&bond->curr_slave_lock); 3128 3129 seq_printf(seq, "Bonding Mode: %s", 3130 bond_mode_name(bond->params.mode)); 3131 3132 if (bond->params.mode == BOND_MODE_ACTIVEBACKUP && 3133 bond->params.fail_over_mac) 3134 seq_printf(seq, " (fail_over_mac %s)", 3135 fail_over_mac_tbl[bond->params.fail_over_mac].modename); 3136 3137 seq_printf(seq, "\n"); 3138 3139 if (bond->params.mode == BOND_MODE_XOR || 3140 bond->params.mode == BOND_MODE_8023AD) { 3141 seq_printf(seq, "Transmit Hash Policy: %s (%d)\n", 3142 xmit_hashtype_tbl[bond->params.xmit_policy].modename, 3143 bond->params.xmit_policy); 3144 } 3145 3146 if (USES_PRIMARY(bond->params.mode)) { 3147 seq_printf(seq, "Primary Slave: %s", 3148 (bond->primary_slave) ? 3149 bond->primary_slave->dev->name : "None"); 3150 if (bond->primary_slave) 3151 seq_printf(seq, " (primary_reselect %s)", 3152 pri_reselect_tbl[bond->params.primary_reselect].modename); 3153 3154 seq_printf(seq, "\nCurrently Active Slave: %s\n", 3155 (curr) ? curr->dev->name : "None"); 3156 } 3157 3158 seq_printf(seq, "MII Status: %s\n", netif_carrier_ok(bond->dev) ? 3159 "up" : "down"); 3160 seq_printf(seq, "MII Polling Interval (ms): %d\n", bond->params.miimon); 3161 seq_printf(seq, "Up Delay (ms): %d\n", 3162 bond->params.updelay * bond->params.miimon); 3163 seq_printf(seq, "Down Delay (ms): %d\n", 3164 bond->params.downdelay * bond->params.miimon); 3165 3166 3167 /* ARP information */ 3168 if (bond->params.arp_interval > 0) { 3169 int printed = 0; 3170 seq_printf(seq, "ARP Polling Interval (ms): %d\n", 3171 bond->params.arp_interval); 3172 3173 seq_printf(seq, "ARP IP target/s (n.n.n.n form):"); 3174 3175 for (i = 0; (i < BOND_MAX_ARP_TARGETS); i++) { 3176 if (!bond->params.arp_targets[i]) 3177 break; 3178 if (printed) 3179 seq_printf(seq, ","); 3180 seq_printf(seq, " %pI4", &bond->params.arp_targets[i]); 3181 printed = 1; 3182 } 3183 seq_printf(seq, "\n"); 3184 } 3185 3186 if (bond->params.mode == BOND_MODE_8023AD) { 3187 struct ad_info ad_info; 3188 3189 seq_puts(seq, "\n802.3ad info\n"); 3190 seq_printf(seq, "LACP rate: %s\n", 3191 (bond->params.lacp_fast) ? "fast" : "slow"); 3192 seq_printf(seq, "Aggregator selection policy (ad_select): %s\n", 3193 ad_select_tbl[bond->params.ad_select].modename); 3194 3195 if (bond_3ad_get_active_agg_info(bond, &ad_info)) { 3196 seq_printf(seq, "bond %s has no active aggregator\n", 3197 bond->dev->name); 3198 } else { 3199 seq_printf(seq, "Active Aggregator Info:\n"); 3200 3201 seq_printf(seq, "\tAggregator ID: %d\n", 3202 ad_info.aggregator_id); 3203 seq_printf(seq, "\tNumber of ports: %d\n", 3204 ad_info.ports); 3205 seq_printf(seq, "\tActor Key: %d\n", 3206 ad_info.actor_key); 3207 seq_printf(seq, "\tPartner Key: %d\n", 3208 ad_info.partner_key); 3209 seq_printf(seq, "\tPartner Mac Address: %pM\n", 3210 ad_info.partner_system); 3211 } 3212 } 3213 } 3214 3215 static void bond_info_show_slave(struct seq_file *seq, 3216 const struct slave *slave) 3217 { 3218 struct bonding *bond = seq->private; 3219 3220 seq_printf(seq, "\nSlave Interface: %s\n", slave->dev->name); 3221 seq_printf(seq, "MII Status: %s\n", 3222 (slave->link == BOND_LINK_UP) ? "up" : "down"); 3223 seq_printf(seq, "Link Failure Count: %u\n", 3224 slave->link_failure_count); 3225 3226 seq_printf(seq, "Permanent HW addr: %pM\n", slave->perm_hwaddr); 3227 3228 if (bond->params.mode == BOND_MODE_8023AD) { 3229 const struct aggregator *agg 3230 = SLAVE_AD_INFO(slave).port.aggregator; 3231 3232 if (agg) 3233 seq_printf(seq, "Aggregator ID: %d\n", 3234 agg->aggregator_identifier); 3235 else 3236 seq_puts(seq, "Aggregator ID: N/A\n"); 3237 } 3238 } 3239 3240 static int bond_info_seq_show(struct seq_file *seq, void *v) 3241 { 3242 if (v == SEQ_START_TOKEN) { 3243 seq_printf(seq, "%s\n", version); 3244 bond_info_show_master(seq); 3245 } else 3246 bond_info_show_slave(seq, v); 3247 3248 return 0; 3249 } 3250 3251 static const struct seq_operations bond_info_seq_ops = { 3252 .start = bond_info_seq_start, 3253 .next = bond_info_seq_next, 3254 .stop = bond_info_seq_stop, 3255 .show = bond_info_seq_show, 3256 }; 3257 3258 static int bond_info_open(struct inode *inode, struct file *file) 3259 { 3260 struct seq_file *seq; 3261 struct proc_dir_entry *proc; 3262 int res; 3263 3264 res = seq_open(file, &bond_info_seq_ops); 3265 if (!res) { 3266 /* recover the pointer buried in proc_dir_entry data */ 3267 seq = file->private_data; 3268 proc = PDE(inode); 3269 seq->private = proc->data; 3270 } 3271 3272 return res; 3273 } 3274 3275 static const struct file_operations bond_info_fops = { 3276 .owner = THIS_MODULE, 3277 .open = bond_info_open, 3278 .read = seq_read, 3279 .llseek = seq_lseek, 3280 .release = seq_release, 3281 }; 3282 3283 static void bond_create_proc_entry(struct bonding *bond) 3284 { 3285 struct net_device *bond_dev = bond->dev; 3286 struct bond_net *bn = net_generic(dev_net(bond_dev), bond_net_id); 3287 3288 if (bn->proc_dir) { 3289 bond->proc_entry = proc_create_data(bond_dev->name, 3290 S_IRUGO, bn->proc_dir, 3291 &bond_info_fops, bond); 3292 if (bond->proc_entry == NULL) 3293 pr_warning("Warning: Cannot create /proc/net/%s/%s\n", 3294 DRV_NAME, bond_dev->name); 3295 else 3296 memcpy(bond->proc_file_name, bond_dev->name, IFNAMSIZ); 3297 } 3298 } 3299 3300 static void bond_remove_proc_entry(struct bonding *bond) 3301 { 3302 struct net_device *bond_dev = bond->dev; 3303 struct bond_net *bn = net_generic(dev_net(bond_dev), bond_net_id); 3304 3305 if (bn->proc_dir && bond->proc_entry) { 3306 remove_proc_entry(bond->proc_file_name, bn->proc_dir); 3307 memset(bond->proc_file_name, 0, IFNAMSIZ); 3308 bond->proc_entry = NULL; 3309 } 3310 } 3311 3312 /* Create the bonding directory under /proc/net, if doesn't exist yet. 3313 * Caller must hold rtnl_lock. 3314 */ 3315 static void __net_init bond_create_proc_dir(struct bond_net *bn) 3316 { 3317 if (!bn->proc_dir) { 3318 bn->proc_dir = proc_mkdir(DRV_NAME, bn->net->proc_net); 3319 if (!bn->proc_dir) 3320 pr_warning("Warning: cannot create /proc/net/%s\n", 3321 DRV_NAME); 3322 } 3323 } 3324 3325 /* Destroy the bonding directory under /proc/net, if empty. 3326 * Caller must hold rtnl_lock. 3327 */ 3328 static void __net_exit bond_destroy_proc_dir(struct bond_net *bn) 3329 { 3330 if (bn->proc_dir) { 3331 remove_proc_entry(DRV_NAME, bn->net->proc_net); 3332 bn->proc_dir = NULL; 3333 } 3334 } 3335 3336 #else /* !CONFIG_PROC_FS */ 3337 3338 static void bond_create_proc_entry(struct bonding *bond) 3339 { 3340 } 3341 3342 static void bond_remove_proc_entry(struct bonding *bond) 3343 { 3344 } 3345 3346 static inline void bond_create_proc_dir(struct bond_net *bn) 3347 { 3348 } 3349 3350 static inline void bond_destroy_proc_dir(struct bond_net *bn) 3351 { 3352 } 3353 3354 #endif /* CONFIG_PROC_FS */ 3355 3356 3357 /*-------------------------- netdev event handling --------------------------*/ 3358 3359 /* 3360 * Change device name 3361 */ 3362 static int bond_event_changename(struct bonding *bond) 3363 { 3364 bond_remove_proc_entry(bond); 3365 bond_create_proc_entry(bond); 3366 3367 return NOTIFY_DONE; 3368 } 3369 3370 static int bond_master_netdev_event(unsigned long event, 3371 struct net_device *bond_dev) 3372 { 3373 struct bonding *event_bond = netdev_priv(bond_dev); 3374 3375 switch (event) { 3376 case NETDEV_CHANGENAME: 3377 return bond_event_changename(event_bond); 3378 default: 3379 break; 3380 } 3381 3382 return NOTIFY_DONE; 3383 } 3384 3385 static int bond_slave_netdev_event(unsigned long event, 3386 struct net_device *slave_dev) 3387 { 3388 struct net_device *bond_dev = slave_dev->master; 3389 struct bonding *bond = netdev_priv(bond_dev); 3390 3391 switch (event) { 3392 case NETDEV_UNREGISTER: 3393 if (bond_dev) { 3394 if (bond->setup_by_slave) 3395 bond_release_and_destroy(bond_dev, slave_dev); 3396 else 3397 bond_release(bond_dev, slave_dev); 3398 } 3399 break; 3400 case NETDEV_CHANGE: 3401 if (bond->params.mode == BOND_MODE_8023AD || bond_is_lb(bond)) { 3402 struct slave *slave; 3403 3404 slave = bond_get_slave_by_dev(bond, slave_dev); 3405 if (slave) { 3406 u16 old_speed = slave->speed; 3407 u16 old_duplex = slave->duplex; 3408 3409 bond_update_speed_duplex(slave); 3410 3411 if (bond_is_lb(bond)) 3412 break; 3413 3414 if (old_speed != slave->speed) 3415 bond_3ad_adapter_speed_changed(slave); 3416 if (old_duplex != slave->duplex) 3417 bond_3ad_adapter_duplex_changed(slave); 3418 } 3419 } 3420 3421 break; 3422 case NETDEV_DOWN: 3423 /* 3424 * ... Or is it this? 3425 */ 3426 break; 3427 case NETDEV_CHANGEMTU: 3428 /* 3429 * TODO: Should slaves be allowed to 3430 * independently alter their MTU? For 3431 * an active-backup bond, slaves need 3432 * not be the same type of device, so 3433 * MTUs may vary. For other modes, 3434 * slaves arguably should have the 3435 * same MTUs. To do this, we'd need to 3436 * take over the slave's change_mtu 3437 * function for the duration of their 3438 * servitude. 3439 */ 3440 break; 3441 case NETDEV_CHANGENAME: 3442 /* 3443 * TODO: handle changing the primary's name 3444 */ 3445 break; 3446 case NETDEV_FEAT_CHANGE: 3447 bond_compute_features(bond); 3448 break; 3449 default: 3450 break; 3451 } 3452 3453 return NOTIFY_DONE; 3454 } 3455 3456 /* 3457 * bond_netdev_event: handle netdev notifier chain events. 3458 * 3459 * This function receives events for the netdev chain. The caller (an 3460 * ioctl handler calling blocking_notifier_call_chain) holds the necessary 3461 * locks for us to safely manipulate the slave devices (RTNL lock, 3462 * dev_probe_lock). 3463 */ 3464 static int bond_netdev_event(struct notifier_block *this, 3465 unsigned long event, void *ptr) 3466 { 3467 struct net_device *event_dev = (struct net_device *)ptr; 3468 3469 pr_debug("event_dev: %s, event: %lx\n", 3470 event_dev ? event_dev->name : "None", 3471 event); 3472 3473 if (!(event_dev->priv_flags & IFF_BONDING)) 3474 return NOTIFY_DONE; 3475 3476 if (event_dev->flags & IFF_MASTER) { 3477 pr_debug("IFF_MASTER\n"); 3478 return bond_master_netdev_event(event, event_dev); 3479 } 3480 3481 if (event_dev->flags & IFF_SLAVE) { 3482 pr_debug("IFF_SLAVE\n"); 3483 return bond_slave_netdev_event(event, event_dev); 3484 } 3485 3486 return NOTIFY_DONE; 3487 } 3488 3489 /* 3490 * bond_inetaddr_event: handle inetaddr notifier chain events. 3491 * 3492 * We keep track of device IPs primarily to use as source addresses in 3493 * ARP monitor probes (rather than spewing out broadcasts all the time). 3494 * 3495 * We track one IP for the main device (if it has one), plus one per VLAN. 3496 */ 3497 static int bond_inetaddr_event(struct notifier_block *this, unsigned long event, void *ptr) 3498 { 3499 struct in_ifaddr *ifa = ptr; 3500 struct net_device *vlan_dev, *event_dev = ifa->ifa_dev->dev; 3501 struct bond_net *bn = net_generic(dev_net(event_dev), bond_net_id); 3502 struct bonding *bond; 3503 struct vlan_entry *vlan; 3504 3505 list_for_each_entry(bond, &bn->dev_list, bond_list) { 3506 if (bond->dev == event_dev) { 3507 switch (event) { 3508 case NETDEV_UP: 3509 bond->master_ip = ifa->ifa_local; 3510 return NOTIFY_OK; 3511 case NETDEV_DOWN: 3512 bond->master_ip = bond_glean_dev_ip(bond->dev); 3513 return NOTIFY_OK; 3514 default: 3515 return NOTIFY_DONE; 3516 } 3517 } 3518 3519 list_for_each_entry(vlan, &bond->vlan_list, vlan_list) { 3520 vlan_dev = vlan_group_get_device(bond->vlgrp, vlan->vlan_id); 3521 if (vlan_dev == event_dev) { 3522 switch (event) { 3523 case NETDEV_UP: 3524 vlan->vlan_ip = ifa->ifa_local; 3525 return NOTIFY_OK; 3526 case NETDEV_DOWN: 3527 vlan->vlan_ip = 3528 bond_glean_dev_ip(vlan_dev); 3529 return NOTIFY_OK; 3530 default: 3531 return NOTIFY_DONE; 3532 } 3533 } 3534 } 3535 } 3536 return NOTIFY_DONE; 3537 } 3538 3539 static struct notifier_block bond_netdev_notifier = { 3540 .notifier_call = bond_netdev_event, 3541 }; 3542 3543 static struct notifier_block bond_inetaddr_notifier = { 3544 .notifier_call = bond_inetaddr_event, 3545 }; 3546 3547 /*-------------------------- Packet type handling ---------------------------*/ 3548 3549 /* register to receive lacpdus on a bond */ 3550 static void bond_register_lacpdu(struct bonding *bond) 3551 { 3552 struct packet_type *pk_type = &(BOND_AD_INFO(bond).ad_pkt_type); 3553 3554 /* initialize packet type */ 3555 pk_type->type = PKT_TYPE_LACPDU; 3556 pk_type->dev = bond->dev; 3557 pk_type->func = bond_3ad_lacpdu_recv; 3558 3559 dev_add_pack(pk_type); 3560 } 3561 3562 /* unregister to receive lacpdus on a bond */ 3563 static void bond_unregister_lacpdu(struct bonding *bond) 3564 { 3565 dev_remove_pack(&(BOND_AD_INFO(bond).ad_pkt_type)); 3566 } 3567 3568 void bond_register_arp(struct bonding *bond) 3569 { 3570 struct packet_type *pt = &bond->arp_mon_pt; 3571 3572 if (pt->type) 3573 return; 3574 3575 pt->type = htons(ETH_P_ARP); 3576 pt->dev = bond->dev; 3577 pt->func = bond_arp_rcv; 3578 dev_add_pack(pt); 3579 } 3580 3581 void bond_unregister_arp(struct bonding *bond) 3582 { 3583 struct packet_type *pt = &bond->arp_mon_pt; 3584 3585 dev_remove_pack(pt); 3586 pt->type = 0; 3587 } 3588 3589 /*---------------------------- Hashing Policies -----------------------------*/ 3590 3591 /* 3592 * Hash for the output device based upon layer 2 and layer 3 data. If 3593 * the packet is not IP mimic bond_xmit_hash_policy_l2() 3594 */ 3595 static int bond_xmit_hash_policy_l23(struct sk_buff *skb, int count) 3596 { 3597 struct ethhdr *data = (struct ethhdr *)skb->data; 3598 struct iphdr *iph = ip_hdr(skb); 3599 3600 if (skb->protocol == htons(ETH_P_IP)) { 3601 return ((ntohl(iph->saddr ^ iph->daddr) & 0xffff) ^ 3602 (data->h_dest[5] ^ data->h_source[5])) % count; 3603 } 3604 3605 return (data->h_dest[5] ^ data->h_source[5]) % count; 3606 } 3607 3608 /* 3609 * Hash for the output device based upon layer 3 and layer 4 data. If 3610 * the packet is a frag or not TCP or UDP, just use layer 3 data. If it is 3611 * altogether not IP, mimic bond_xmit_hash_policy_l2() 3612 */ 3613 static int bond_xmit_hash_policy_l34(struct sk_buff *skb, int count) 3614 { 3615 struct ethhdr *data = (struct ethhdr *)skb->data; 3616 struct iphdr *iph = ip_hdr(skb); 3617 __be16 *layer4hdr = (__be16 *)((u32 *)iph + iph->ihl); 3618 int layer4_xor = 0; 3619 3620 if (skb->protocol == htons(ETH_P_IP)) { 3621 if (!(iph->frag_off & htons(IP_MF|IP_OFFSET)) && 3622 (iph->protocol == IPPROTO_TCP || 3623 iph->protocol == IPPROTO_UDP)) { 3624 layer4_xor = ntohs((*layer4hdr ^ *(layer4hdr + 1))); 3625 } 3626 return (layer4_xor ^ 3627 ((ntohl(iph->saddr ^ iph->daddr)) & 0xffff)) % count; 3628 3629 } 3630 3631 return (data->h_dest[5] ^ data->h_source[5]) % count; 3632 } 3633 3634 /* 3635 * Hash for the output device based upon layer 2 data 3636 */ 3637 static int bond_xmit_hash_policy_l2(struct sk_buff *skb, int count) 3638 { 3639 struct ethhdr *data = (struct ethhdr *)skb->data; 3640 3641 return (data->h_dest[5] ^ data->h_source[5]) % count; 3642 } 3643 3644 /*-------------------------- Device entry points ----------------------------*/ 3645 3646 static int bond_open(struct net_device *bond_dev) 3647 { 3648 struct bonding *bond = netdev_priv(bond_dev); 3649 3650 bond->kill_timers = 0; 3651 3652 if (bond_is_lb(bond)) { 3653 /* bond_alb_initialize must be called before the timer 3654 * is started. 3655 */ 3656 if (bond_alb_initialize(bond, (bond->params.mode == BOND_MODE_ALB))) { 3657 /* something went wrong - fail the open operation */ 3658 return -ENOMEM; 3659 } 3660 3661 INIT_DELAYED_WORK(&bond->alb_work, bond_alb_monitor); 3662 queue_delayed_work(bond->wq, &bond->alb_work, 0); 3663 } 3664 3665 if (bond->params.miimon) { /* link check interval, in milliseconds. */ 3666 INIT_DELAYED_WORK(&bond->mii_work, bond_mii_monitor); 3667 queue_delayed_work(bond->wq, &bond->mii_work, 0); 3668 } 3669 3670 if (bond->params.arp_interval) { /* arp interval, in milliseconds. */ 3671 if (bond->params.mode == BOND_MODE_ACTIVEBACKUP) 3672 INIT_DELAYED_WORK(&bond->arp_work, 3673 bond_activebackup_arp_mon); 3674 else 3675 INIT_DELAYED_WORK(&bond->arp_work, 3676 bond_loadbalance_arp_mon); 3677 3678 queue_delayed_work(bond->wq, &bond->arp_work, 0); 3679 if (bond->params.arp_validate) 3680 bond_register_arp(bond); 3681 } 3682 3683 if (bond->params.mode == BOND_MODE_8023AD) { 3684 INIT_DELAYED_WORK(&bond->ad_work, bond_3ad_state_machine_handler); 3685 queue_delayed_work(bond->wq, &bond->ad_work, 0); 3686 /* register to receive LACPDUs */ 3687 bond_register_lacpdu(bond); 3688 bond_3ad_initiate_agg_selection(bond, 1); 3689 } 3690 3691 return 0; 3692 } 3693 3694 static int bond_close(struct net_device *bond_dev) 3695 { 3696 struct bonding *bond = netdev_priv(bond_dev); 3697 3698 if (bond->params.mode == BOND_MODE_8023AD) { 3699 /* Unregister the receive of LACPDUs */ 3700 bond_unregister_lacpdu(bond); 3701 } 3702 3703 if (bond->params.arp_validate) 3704 bond_unregister_arp(bond); 3705 3706 write_lock_bh(&bond->lock); 3707 3708 bond->send_grat_arp = 0; 3709 bond->send_unsol_na = 0; 3710 3711 /* signal timers not to re-arm */ 3712 bond->kill_timers = 1; 3713 3714 write_unlock_bh(&bond->lock); 3715 3716 if (bond->params.miimon) { /* link check interval, in milliseconds. */ 3717 cancel_delayed_work(&bond->mii_work); 3718 } 3719 3720 if (bond->params.arp_interval) { /* arp interval, in milliseconds. */ 3721 cancel_delayed_work(&bond->arp_work); 3722 } 3723 3724 switch (bond->params.mode) { 3725 case BOND_MODE_8023AD: 3726 cancel_delayed_work(&bond->ad_work); 3727 break; 3728 case BOND_MODE_TLB: 3729 case BOND_MODE_ALB: 3730 cancel_delayed_work(&bond->alb_work); 3731 break; 3732 default: 3733 break; 3734 } 3735 3736 3737 if (bond_is_lb(bond)) { 3738 /* Must be called only after all 3739 * slaves have been released 3740 */ 3741 bond_alb_deinitialize(bond); 3742 } 3743 3744 return 0; 3745 } 3746 3747 static struct net_device_stats *bond_get_stats(struct net_device *bond_dev) 3748 { 3749 struct bonding *bond = netdev_priv(bond_dev); 3750 struct net_device_stats *stats = &bond_dev->stats; 3751 struct net_device_stats local_stats; 3752 struct slave *slave; 3753 int i; 3754 3755 memset(&local_stats, 0, sizeof(struct net_device_stats)); 3756 3757 read_lock_bh(&bond->lock); 3758 3759 bond_for_each_slave(bond, slave, i) { 3760 const struct net_device_stats *sstats = dev_get_stats(slave->dev); 3761 3762 local_stats.rx_packets += sstats->rx_packets; 3763 local_stats.rx_bytes += sstats->rx_bytes; 3764 local_stats.rx_errors += sstats->rx_errors; 3765 local_stats.rx_dropped += sstats->rx_dropped; 3766 3767 local_stats.tx_packets += sstats->tx_packets; 3768 local_stats.tx_bytes += sstats->tx_bytes; 3769 local_stats.tx_errors += sstats->tx_errors; 3770 local_stats.tx_dropped += sstats->tx_dropped; 3771 3772 local_stats.multicast += sstats->multicast; 3773 local_stats.collisions += sstats->collisions; 3774 3775 local_stats.rx_length_errors += sstats->rx_length_errors; 3776 local_stats.rx_over_errors += sstats->rx_over_errors; 3777 local_stats.rx_crc_errors += sstats->rx_crc_errors; 3778 local_stats.rx_frame_errors += sstats->rx_frame_errors; 3779 local_stats.rx_fifo_errors += sstats->rx_fifo_errors; 3780 local_stats.rx_missed_errors += sstats->rx_missed_errors; 3781 3782 local_stats.tx_aborted_errors += sstats->tx_aborted_errors; 3783 local_stats.tx_carrier_errors += sstats->tx_carrier_errors; 3784 local_stats.tx_fifo_errors += sstats->tx_fifo_errors; 3785 local_stats.tx_heartbeat_errors += sstats->tx_heartbeat_errors; 3786 local_stats.tx_window_errors += sstats->tx_window_errors; 3787 } 3788 3789 memcpy(stats, &local_stats, sizeof(struct net_device_stats)); 3790 3791 read_unlock_bh(&bond->lock); 3792 3793 return stats; 3794 } 3795 3796 static int bond_do_ioctl(struct net_device *bond_dev, struct ifreq *ifr, int cmd) 3797 { 3798 struct net_device *slave_dev = NULL; 3799 struct ifbond k_binfo; 3800 struct ifbond __user *u_binfo = NULL; 3801 struct ifslave k_sinfo; 3802 struct ifslave __user *u_sinfo = NULL; 3803 struct mii_ioctl_data *mii = NULL; 3804 int res = 0; 3805 3806 pr_debug("bond_ioctl: master=%s, cmd=%d\n", bond_dev->name, cmd); 3807 3808 switch (cmd) { 3809 case SIOCGMIIPHY: 3810 mii = if_mii(ifr); 3811 if (!mii) 3812 return -EINVAL; 3813 3814 mii->phy_id = 0; 3815 /* Fall Through */ 3816 case SIOCGMIIREG: 3817 /* 3818 * We do this again just in case we were called by SIOCGMIIREG 3819 * instead of SIOCGMIIPHY. 3820 */ 3821 mii = if_mii(ifr); 3822 if (!mii) 3823 return -EINVAL; 3824 3825 3826 if (mii->reg_num == 1) { 3827 struct bonding *bond = netdev_priv(bond_dev); 3828 mii->val_out = 0; 3829 read_lock(&bond->lock); 3830 read_lock(&bond->curr_slave_lock); 3831 if (netif_carrier_ok(bond->dev)) 3832 mii->val_out = BMSR_LSTATUS; 3833 3834 read_unlock(&bond->curr_slave_lock); 3835 read_unlock(&bond->lock); 3836 } 3837 3838 return 0; 3839 case BOND_INFO_QUERY_OLD: 3840 case SIOCBONDINFOQUERY: 3841 u_binfo = (struct ifbond __user *)ifr->ifr_data; 3842 3843 if (copy_from_user(&k_binfo, u_binfo, sizeof(ifbond))) 3844 return -EFAULT; 3845 3846 res = bond_info_query(bond_dev, &k_binfo); 3847 if (res == 0 && 3848 copy_to_user(u_binfo, &k_binfo, sizeof(ifbond))) 3849 return -EFAULT; 3850 3851 return res; 3852 case BOND_SLAVE_INFO_QUERY_OLD: 3853 case SIOCBONDSLAVEINFOQUERY: 3854 u_sinfo = (struct ifslave __user *)ifr->ifr_data; 3855 3856 if (copy_from_user(&k_sinfo, u_sinfo, sizeof(ifslave))) 3857 return -EFAULT; 3858 3859 res = bond_slave_info_query(bond_dev, &k_sinfo); 3860 if (res == 0 && 3861 copy_to_user(u_sinfo, &k_sinfo, sizeof(ifslave))) 3862 return -EFAULT; 3863 3864 return res; 3865 default: 3866 /* Go on */ 3867 break; 3868 } 3869 3870 if (!capable(CAP_NET_ADMIN)) 3871 return -EPERM; 3872 3873 slave_dev = dev_get_by_name(dev_net(bond_dev), ifr->ifr_slave); 3874 3875 pr_debug("slave_dev=%p:\n", slave_dev); 3876 3877 if (!slave_dev) 3878 res = -ENODEV; 3879 else { 3880 pr_debug("slave_dev->name=%s:\n", slave_dev->name); 3881 switch (cmd) { 3882 case BOND_ENSLAVE_OLD: 3883 case SIOCBONDENSLAVE: 3884 res = bond_enslave(bond_dev, slave_dev); 3885 break; 3886 case BOND_RELEASE_OLD: 3887 case SIOCBONDRELEASE: 3888 res = bond_release(bond_dev, slave_dev); 3889 break; 3890 case BOND_SETHWADDR_OLD: 3891 case SIOCBONDSETHWADDR: 3892 res = bond_sethwaddr(bond_dev, slave_dev); 3893 break; 3894 case BOND_CHANGE_ACTIVE_OLD: 3895 case SIOCBONDCHANGEACTIVE: 3896 res = bond_ioctl_change_active(bond_dev, slave_dev); 3897 break; 3898 default: 3899 res = -EOPNOTSUPP; 3900 } 3901 3902 dev_put(slave_dev); 3903 } 3904 3905 return res; 3906 } 3907 3908 static void bond_set_multicast_list(struct net_device *bond_dev) 3909 { 3910 struct bonding *bond = netdev_priv(bond_dev); 3911 struct dev_mc_list *dmi; 3912 3913 /* 3914 * Do promisc before checking multicast_mode 3915 */ 3916 if ((bond_dev->flags & IFF_PROMISC) && !(bond->flags & IFF_PROMISC)) 3917 /* 3918 * FIXME: Need to handle the error when one of the multi-slaves 3919 * encounters error. 3920 */ 3921 bond_set_promiscuity(bond, 1); 3922 3923 3924 if (!(bond_dev->flags & IFF_PROMISC) && (bond->flags & IFF_PROMISC)) 3925 bond_set_promiscuity(bond, -1); 3926 3927 3928 /* set allmulti flag to slaves */ 3929 if ((bond_dev->flags & IFF_ALLMULTI) && !(bond->flags & IFF_ALLMULTI)) 3930 /* 3931 * FIXME: Need to handle the error when one of the multi-slaves 3932 * encounters error. 3933 */ 3934 bond_set_allmulti(bond, 1); 3935 3936 3937 if (!(bond_dev->flags & IFF_ALLMULTI) && (bond->flags & IFF_ALLMULTI)) 3938 bond_set_allmulti(bond, -1); 3939 3940 3941 read_lock(&bond->lock); 3942 3943 bond->flags = bond_dev->flags; 3944 3945 /* looking for addresses to add to slaves' mc list */ 3946 for (dmi = bond_dev->mc_list; dmi; dmi = dmi->next) { 3947 if (!bond_mc_list_find_dmi(dmi, bond->mc_list)) 3948 bond_mc_add(bond, dmi->dmi_addr, dmi->dmi_addrlen); 3949 } 3950 3951 /* looking for addresses to delete from slaves' list */ 3952 for (dmi = bond->mc_list; dmi; dmi = dmi->next) { 3953 if (!bond_mc_list_find_dmi(dmi, bond_dev->mc_list)) 3954 bond_mc_delete(bond, dmi->dmi_addr, dmi->dmi_addrlen); 3955 } 3956 3957 /* save master's multicast list */ 3958 bond_mc_list_destroy(bond); 3959 bond_mc_list_copy(bond_dev->mc_list, bond, GFP_ATOMIC); 3960 3961 read_unlock(&bond->lock); 3962 } 3963 3964 static int bond_neigh_setup(struct net_device *dev, struct neigh_parms *parms) 3965 { 3966 struct bonding *bond = netdev_priv(dev); 3967 struct slave *slave = bond->first_slave; 3968 3969 if (slave) { 3970 const struct net_device_ops *slave_ops 3971 = slave->dev->netdev_ops; 3972 if (slave_ops->ndo_neigh_setup) 3973 return slave_ops->ndo_neigh_setup(slave->dev, parms); 3974 } 3975 return 0; 3976 } 3977 3978 /* 3979 * Change the MTU of all of a master's slaves to match the master 3980 */ 3981 static int bond_change_mtu(struct net_device *bond_dev, int new_mtu) 3982 { 3983 struct bonding *bond = netdev_priv(bond_dev); 3984 struct slave *slave, *stop_at; 3985 int res = 0; 3986 int i; 3987 3988 pr_debug("bond=%p, name=%s, new_mtu=%d\n", bond, 3989 (bond_dev ? bond_dev->name : "None"), new_mtu); 3990 3991 /* Can't hold bond->lock with bh disabled here since 3992 * some base drivers panic. On the other hand we can't 3993 * hold bond->lock without bh disabled because we'll 3994 * deadlock. The only solution is to rely on the fact 3995 * that we're under rtnl_lock here, and the slaves 3996 * list won't change. This doesn't solve the problem 3997 * of setting the slave's MTU while it is 3998 * transmitting, but the assumption is that the base 3999 * driver can handle that. 4000 * 4001 * TODO: figure out a way to safely iterate the slaves 4002 * list, but without holding a lock around the actual 4003 * call to the base driver. 4004 */ 4005 4006 bond_for_each_slave(bond, slave, i) { 4007 pr_debug("s %p s->p %p c_m %p\n", 4008 slave, 4009 slave->prev, 4010 slave->dev->netdev_ops->ndo_change_mtu); 4011 4012 res = dev_set_mtu(slave->dev, new_mtu); 4013 4014 if (res) { 4015 /* If we failed to set the slave's mtu to the new value 4016 * we must abort the operation even in ACTIVE_BACKUP 4017 * mode, because if we allow the backup slaves to have 4018 * different mtu values than the active slave we'll 4019 * need to change their mtu when doing a failover. That 4020 * means changing their mtu from timer context, which 4021 * is probably not a good idea. 4022 */ 4023 pr_debug("err %d %s\n", res, slave->dev->name); 4024 goto unwind; 4025 } 4026 } 4027 4028 bond_dev->mtu = new_mtu; 4029 4030 return 0; 4031 4032 unwind: 4033 /* unwind from head to the slave that failed */ 4034 stop_at = slave; 4035 bond_for_each_slave_from_to(bond, slave, i, bond->first_slave, stop_at) { 4036 int tmp_res; 4037 4038 tmp_res = dev_set_mtu(slave->dev, bond_dev->mtu); 4039 if (tmp_res) { 4040 pr_debug("unwind err %d dev %s\n", 4041 tmp_res, slave->dev->name); 4042 } 4043 } 4044 4045 return res; 4046 } 4047 4048 /* 4049 * Change HW address 4050 * 4051 * Note that many devices must be down to change the HW address, and 4052 * downing the master releases all slaves. We can make bonds full of 4053 * bonding devices to test this, however. 4054 */ 4055 static int bond_set_mac_address(struct net_device *bond_dev, void *addr) 4056 { 4057 struct bonding *bond = netdev_priv(bond_dev); 4058 struct sockaddr *sa = addr, tmp_sa; 4059 struct slave *slave, *stop_at; 4060 int res = 0; 4061 int i; 4062 4063 if (bond->params.mode == BOND_MODE_ALB) 4064 return bond_alb_set_mac_address(bond_dev, addr); 4065 4066 4067 pr_debug("bond=%p, name=%s\n", 4068 bond, bond_dev ? bond_dev->name : "None"); 4069 4070 /* 4071 * If fail_over_mac is set to active, do nothing and return 4072 * success. Returning an error causes ifenslave to fail. 4073 */ 4074 if (bond->params.fail_over_mac == BOND_FOM_ACTIVE) 4075 return 0; 4076 4077 if (!is_valid_ether_addr(sa->sa_data)) 4078 return -EADDRNOTAVAIL; 4079 4080 /* Can't hold bond->lock with bh disabled here since 4081 * some base drivers panic. On the other hand we can't 4082 * hold bond->lock without bh disabled because we'll 4083 * deadlock. The only solution is to rely on the fact 4084 * that we're under rtnl_lock here, and the slaves 4085 * list won't change. This doesn't solve the problem 4086 * of setting the slave's hw address while it is 4087 * transmitting, but the assumption is that the base 4088 * driver can handle that. 4089 * 4090 * TODO: figure out a way to safely iterate the slaves 4091 * list, but without holding a lock around the actual 4092 * call to the base driver. 4093 */ 4094 4095 bond_for_each_slave(bond, slave, i) { 4096 const struct net_device_ops *slave_ops = slave->dev->netdev_ops; 4097 pr_debug("slave %p %s\n", slave, slave->dev->name); 4098 4099 if (slave_ops->ndo_set_mac_address == NULL) { 4100 res = -EOPNOTSUPP; 4101 pr_debug("EOPNOTSUPP %s\n", slave->dev->name); 4102 goto unwind; 4103 } 4104 4105 res = dev_set_mac_address(slave->dev, addr); 4106 if (res) { 4107 /* TODO: consider downing the slave 4108 * and retry ? 4109 * User should expect communications 4110 * breakage anyway until ARP finish 4111 * updating, so... 4112 */ 4113 pr_debug("err %d %s\n", res, slave->dev->name); 4114 goto unwind; 4115 } 4116 } 4117 4118 /* success */ 4119 memcpy(bond_dev->dev_addr, sa->sa_data, bond_dev->addr_len); 4120 return 0; 4121 4122 unwind: 4123 memcpy(tmp_sa.sa_data, bond_dev->dev_addr, bond_dev->addr_len); 4124 tmp_sa.sa_family = bond_dev->type; 4125 4126 /* unwind from head to the slave that failed */ 4127 stop_at = slave; 4128 bond_for_each_slave_from_to(bond, slave, i, bond->first_slave, stop_at) { 4129 int tmp_res; 4130 4131 tmp_res = dev_set_mac_address(slave->dev, &tmp_sa); 4132 if (tmp_res) { 4133 pr_debug("unwind err %d dev %s\n", 4134 tmp_res, slave->dev->name); 4135 } 4136 } 4137 4138 return res; 4139 } 4140 4141 static int bond_xmit_roundrobin(struct sk_buff *skb, struct net_device *bond_dev) 4142 { 4143 struct bonding *bond = netdev_priv(bond_dev); 4144 struct slave *slave, *start_at; 4145 int i, slave_no, res = 1; 4146 struct iphdr *iph = ip_hdr(skb); 4147 4148 read_lock(&bond->lock); 4149 4150 if (!BOND_IS_OK(bond)) 4151 goto out; 4152 /* 4153 * Start with the curr_active_slave that joined the bond as the 4154 * default for sending IGMP traffic. For failover purposes one 4155 * needs to maintain some consistency for the interface that will 4156 * send the join/membership reports. The curr_active_slave found 4157 * will send all of this type of traffic. 4158 */ 4159 if ((iph->protocol == IPPROTO_IGMP) && 4160 (skb->protocol == htons(ETH_P_IP))) { 4161 4162 read_lock(&bond->curr_slave_lock); 4163 slave = bond->curr_active_slave; 4164 read_unlock(&bond->curr_slave_lock); 4165 4166 if (!slave) 4167 goto out; 4168 } else { 4169 /* 4170 * Concurrent TX may collide on rr_tx_counter; we accept 4171 * that as being rare enough not to justify using an 4172 * atomic op here. 4173 */ 4174 slave_no = bond->rr_tx_counter++ % bond->slave_cnt; 4175 4176 bond_for_each_slave(bond, slave, i) { 4177 slave_no--; 4178 if (slave_no < 0) 4179 break; 4180 } 4181 } 4182 4183 start_at = slave; 4184 bond_for_each_slave_from(bond, slave, i, start_at) { 4185 if (IS_UP(slave->dev) && 4186 (slave->link == BOND_LINK_UP) && 4187 (slave->state == BOND_STATE_ACTIVE)) { 4188 res = bond_dev_queue_xmit(bond, skb, slave->dev); 4189 break; 4190 } 4191 } 4192 4193 out: 4194 if (res) { 4195 /* no suitable interface, frame not sent */ 4196 dev_kfree_skb(skb); 4197 } 4198 read_unlock(&bond->lock); 4199 return NETDEV_TX_OK; 4200 } 4201 4202 4203 /* 4204 * in active-backup mode, we know that bond->curr_active_slave is always valid if 4205 * the bond has a usable interface. 4206 */ 4207 static int bond_xmit_activebackup(struct sk_buff *skb, struct net_device *bond_dev) 4208 { 4209 struct bonding *bond = netdev_priv(bond_dev); 4210 int res = 1; 4211 4212 read_lock(&bond->lock); 4213 read_lock(&bond->curr_slave_lock); 4214 4215 if (!BOND_IS_OK(bond)) 4216 goto out; 4217 4218 if (!bond->curr_active_slave) 4219 goto out; 4220 4221 res = bond_dev_queue_xmit(bond, skb, bond->curr_active_slave->dev); 4222 4223 out: 4224 if (res) 4225 /* no suitable interface, frame not sent */ 4226 dev_kfree_skb(skb); 4227 4228 read_unlock(&bond->curr_slave_lock); 4229 read_unlock(&bond->lock); 4230 return NETDEV_TX_OK; 4231 } 4232 4233 /* 4234 * In bond_xmit_xor() , we determine the output device by using a pre- 4235 * determined xmit_hash_policy(), If the selected device is not enabled, 4236 * find the next active slave. 4237 */ 4238 static int bond_xmit_xor(struct sk_buff *skb, struct net_device *bond_dev) 4239 { 4240 struct bonding *bond = netdev_priv(bond_dev); 4241 struct slave *slave, *start_at; 4242 int slave_no; 4243 int i; 4244 int res = 1; 4245 4246 read_lock(&bond->lock); 4247 4248 if (!BOND_IS_OK(bond)) 4249 goto out; 4250 4251 slave_no = bond->xmit_hash_policy(skb, bond->slave_cnt); 4252 4253 bond_for_each_slave(bond, slave, i) { 4254 slave_no--; 4255 if (slave_no < 0) 4256 break; 4257 } 4258 4259 start_at = slave; 4260 4261 bond_for_each_slave_from(bond, slave, i, start_at) { 4262 if (IS_UP(slave->dev) && 4263 (slave->link == BOND_LINK_UP) && 4264 (slave->state == BOND_STATE_ACTIVE)) { 4265 res = bond_dev_queue_xmit(bond, skb, slave->dev); 4266 break; 4267 } 4268 } 4269 4270 out: 4271 if (res) { 4272 /* no suitable interface, frame not sent */ 4273 dev_kfree_skb(skb); 4274 } 4275 read_unlock(&bond->lock); 4276 return NETDEV_TX_OK; 4277 } 4278 4279 /* 4280 * in broadcast mode, we send everything to all usable interfaces. 4281 */ 4282 static int bond_xmit_broadcast(struct sk_buff *skb, struct net_device *bond_dev) 4283 { 4284 struct bonding *bond = netdev_priv(bond_dev); 4285 struct slave *slave, *start_at; 4286 struct net_device *tx_dev = NULL; 4287 int i; 4288 int res = 1; 4289 4290 read_lock(&bond->lock); 4291 4292 if (!BOND_IS_OK(bond)) 4293 goto out; 4294 4295 read_lock(&bond->curr_slave_lock); 4296 start_at = bond->curr_active_slave; 4297 read_unlock(&bond->curr_slave_lock); 4298 4299 if (!start_at) 4300 goto out; 4301 4302 bond_for_each_slave_from(bond, slave, i, start_at) { 4303 if (IS_UP(slave->dev) && 4304 (slave->link == BOND_LINK_UP) && 4305 (slave->state == BOND_STATE_ACTIVE)) { 4306 if (tx_dev) { 4307 struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC); 4308 if (!skb2) { 4309 pr_err("%s: Error: bond_xmit_broadcast(): skb_clone() failed\n", 4310 bond_dev->name); 4311 continue; 4312 } 4313 4314 res = bond_dev_queue_xmit(bond, skb2, tx_dev); 4315 if (res) { 4316 dev_kfree_skb(skb2); 4317 continue; 4318 } 4319 } 4320 tx_dev = slave->dev; 4321 } 4322 } 4323 4324 if (tx_dev) 4325 res = bond_dev_queue_xmit(bond, skb, tx_dev); 4326 4327 out: 4328 if (res) 4329 /* no suitable interface, frame not sent */ 4330 dev_kfree_skb(skb); 4331 4332 /* frame sent to all suitable interfaces */ 4333 read_unlock(&bond->lock); 4334 return NETDEV_TX_OK; 4335 } 4336 4337 /*------------------------- Device initialization ---------------------------*/ 4338 4339 static void bond_set_xmit_hash_policy(struct bonding *bond) 4340 { 4341 switch (bond->params.xmit_policy) { 4342 case BOND_XMIT_POLICY_LAYER23: 4343 bond->xmit_hash_policy = bond_xmit_hash_policy_l23; 4344 break; 4345 case BOND_XMIT_POLICY_LAYER34: 4346 bond->xmit_hash_policy = bond_xmit_hash_policy_l34; 4347 break; 4348 case BOND_XMIT_POLICY_LAYER2: 4349 default: 4350 bond->xmit_hash_policy = bond_xmit_hash_policy_l2; 4351 break; 4352 } 4353 } 4354 4355 static netdev_tx_t bond_start_xmit(struct sk_buff *skb, struct net_device *dev) 4356 { 4357 const struct bonding *bond = netdev_priv(dev); 4358 4359 switch (bond->params.mode) { 4360 case BOND_MODE_ROUNDROBIN: 4361 return bond_xmit_roundrobin(skb, dev); 4362 case BOND_MODE_ACTIVEBACKUP: 4363 return bond_xmit_activebackup(skb, dev); 4364 case BOND_MODE_XOR: 4365 return bond_xmit_xor(skb, dev); 4366 case BOND_MODE_BROADCAST: 4367 return bond_xmit_broadcast(skb, dev); 4368 case BOND_MODE_8023AD: 4369 return bond_3ad_xmit_xor(skb, dev); 4370 case BOND_MODE_ALB: 4371 case BOND_MODE_TLB: 4372 return bond_alb_xmit(skb, dev); 4373 default: 4374 /* Should never happen, mode already checked */ 4375 pr_err("%s: Error: Unknown bonding mode %d\n", 4376 dev->name, bond->params.mode); 4377 WARN_ON_ONCE(1); 4378 dev_kfree_skb(skb); 4379 return NETDEV_TX_OK; 4380 } 4381 } 4382 4383 4384 /* 4385 * set bond mode specific net device operations 4386 */ 4387 void bond_set_mode_ops(struct bonding *bond, int mode) 4388 { 4389 struct net_device *bond_dev = bond->dev; 4390 4391 switch (mode) { 4392 case BOND_MODE_ROUNDROBIN: 4393 break; 4394 case BOND_MODE_ACTIVEBACKUP: 4395 break; 4396 case BOND_MODE_XOR: 4397 bond_set_xmit_hash_policy(bond); 4398 break; 4399 case BOND_MODE_BROADCAST: 4400 break; 4401 case BOND_MODE_8023AD: 4402 bond_set_master_3ad_flags(bond); 4403 bond_set_xmit_hash_policy(bond); 4404 break; 4405 case BOND_MODE_ALB: 4406 bond_set_master_alb_flags(bond); 4407 /* FALLTHRU */ 4408 case BOND_MODE_TLB: 4409 break; 4410 default: 4411 /* Should never happen, mode already checked */ 4412 pr_err("%s: Error: Unknown bonding mode %d\n", 4413 bond_dev->name, mode); 4414 break; 4415 } 4416 } 4417 4418 static void bond_ethtool_get_drvinfo(struct net_device *bond_dev, 4419 struct ethtool_drvinfo *drvinfo) 4420 { 4421 strncpy(drvinfo->driver, DRV_NAME, 32); 4422 strncpy(drvinfo->version, DRV_VERSION, 32); 4423 snprintf(drvinfo->fw_version, 32, "%d", BOND_ABI_VERSION); 4424 } 4425 4426 static const struct ethtool_ops bond_ethtool_ops = { 4427 .get_drvinfo = bond_ethtool_get_drvinfo, 4428 .get_link = ethtool_op_get_link, 4429 .get_tx_csum = ethtool_op_get_tx_csum, 4430 .get_sg = ethtool_op_get_sg, 4431 .get_tso = ethtool_op_get_tso, 4432 .get_ufo = ethtool_op_get_ufo, 4433 .get_flags = ethtool_op_get_flags, 4434 }; 4435 4436 static const struct net_device_ops bond_netdev_ops = { 4437 .ndo_init = bond_init, 4438 .ndo_uninit = bond_uninit, 4439 .ndo_open = bond_open, 4440 .ndo_stop = bond_close, 4441 .ndo_start_xmit = bond_start_xmit, 4442 .ndo_get_stats = bond_get_stats, 4443 .ndo_do_ioctl = bond_do_ioctl, 4444 .ndo_set_multicast_list = bond_set_multicast_list, 4445 .ndo_change_mtu = bond_change_mtu, 4446 .ndo_set_mac_address = bond_set_mac_address, 4447 .ndo_neigh_setup = bond_neigh_setup, 4448 .ndo_vlan_rx_register = bond_vlan_rx_register, 4449 .ndo_vlan_rx_add_vid = bond_vlan_rx_add_vid, 4450 .ndo_vlan_rx_kill_vid = bond_vlan_rx_kill_vid, 4451 }; 4452 4453 static void bond_destructor(struct net_device *bond_dev) 4454 { 4455 struct bonding *bond = netdev_priv(bond_dev); 4456 if (bond->wq) 4457 destroy_workqueue(bond->wq); 4458 free_netdev(bond_dev); 4459 } 4460 4461 static void bond_setup(struct net_device *bond_dev) 4462 { 4463 struct bonding *bond = netdev_priv(bond_dev); 4464 4465 /* initialize rwlocks */ 4466 rwlock_init(&bond->lock); 4467 rwlock_init(&bond->curr_slave_lock); 4468 4469 bond->params = bonding_defaults; 4470 4471 /* Initialize pointers */ 4472 bond->dev = bond_dev; 4473 INIT_LIST_HEAD(&bond->vlan_list); 4474 4475 /* Initialize the device entry points */ 4476 ether_setup(bond_dev); 4477 bond_dev->netdev_ops = &bond_netdev_ops; 4478 bond_dev->ethtool_ops = &bond_ethtool_ops; 4479 bond_set_mode_ops(bond, bond->params.mode); 4480 4481 bond_dev->destructor = bond_destructor; 4482 4483 /* Initialize the device options */ 4484 bond_dev->tx_queue_len = 0; 4485 bond_dev->flags |= IFF_MASTER|IFF_MULTICAST; 4486 bond_dev->priv_flags |= IFF_BONDING; 4487 bond_dev->priv_flags &= ~IFF_XMIT_DST_RELEASE; 4488 4489 if (bond->params.arp_interval) 4490 bond_dev->priv_flags |= IFF_MASTER_ARPMON; 4491 4492 /* At first, we block adding VLANs. That's the only way to 4493 * prevent problems that occur when adding VLANs over an 4494 * empty bond. The block will be removed once non-challenged 4495 * slaves are enslaved. 4496 */ 4497 bond_dev->features |= NETIF_F_VLAN_CHALLENGED; 4498 4499 /* don't acquire bond device's netif_tx_lock when 4500 * transmitting */ 4501 bond_dev->features |= NETIF_F_LLTX; 4502 4503 /* By default, we declare the bond to be fully 4504 * VLAN hardware accelerated capable. Special 4505 * care is taken in the various xmit functions 4506 * when there are slaves that are not hw accel 4507 * capable 4508 */ 4509 bond_dev->features |= (NETIF_F_HW_VLAN_TX | 4510 NETIF_F_HW_VLAN_RX | 4511 NETIF_F_HW_VLAN_FILTER); 4512 4513 } 4514 4515 static void bond_work_cancel_all(struct bonding *bond) 4516 { 4517 write_lock_bh(&bond->lock); 4518 bond->kill_timers = 1; 4519 write_unlock_bh(&bond->lock); 4520 4521 if (bond->params.miimon && delayed_work_pending(&bond->mii_work)) 4522 cancel_delayed_work(&bond->mii_work); 4523 4524 if (bond->params.arp_interval && delayed_work_pending(&bond->arp_work)) 4525 cancel_delayed_work(&bond->arp_work); 4526 4527 if (bond->params.mode == BOND_MODE_ALB && 4528 delayed_work_pending(&bond->alb_work)) 4529 cancel_delayed_work(&bond->alb_work); 4530 4531 if (bond->params.mode == BOND_MODE_8023AD && 4532 delayed_work_pending(&bond->ad_work)) 4533 cancel_delayed_work(&bond->ad_work); 4534 } 4535 4536 /* 4537 * Destroy a bonding device. 4538 * Must be under rtnl_lock when this function is called. 4539 */ 4540 static void bond_uninit(struct net_device *bond_dev) 4541 { 4542 struct bonding *bond = netdev_priv(bond_dev); 4543 4544 /* Release the bonded slaves */ 4545 bond_release_all(bond_dev); 4546 4547 list_del(&bond->bond_list); 4548 4549 bond_work_cancel_all(bond); 4550 4551 bond_remove_proc_entry(bond); 4552 4553 netif_addr_lock_bh(bond_dev); 4554 bond_mc_list_destroy(bond); 4555 netif_addr_unlock_bh(bond_dev); 4556 } 4557 4558 /*------------------------- Module initialization ---------------------------*/ 4559 4560 /* 4561 * Convert string input module parms. Accept either the 4562 * number of the mode or its string name. A bit complicated because 4563 * some mode names are substrings of other names, and calls from sysfs 4564 * may have whitespace in the name (trailing newlines, for example). 4565 */ 4566 int bond_parse_parm(const char *buf, const struct bond_parm_tbl *tbl) 4567 { 4568 int modeint = -1, i, rv; 4569 char *p, modestr[BOND_MAX_MODENAME_LEN + 1] = { 0, }; 4570 4571 for (p = (char *)buf; *p; p++) 4572 if (!(isdigit(*p) || isspace(*p))) 4573 break; 4574 4575 if (*p) 4576 rv = sscanf(buf, "%20s", modestr); 4577 else 4578 rv = sscanf(buf, "%d", &modeint); 4579 4580 if (!rv) 4581 return -1; 4582 4583 for (i = 0; tbl[i].modename; i++) { 4584 if (modeint == tbl[i].mode) 4585 return tbl[i].mode; 4586 if (strcmp(modestr, tbl[i].modename) == 0) 4587 return tbl[i].mode; 4588 } 4589 4590 return -1; 4591 } 4592 4593 static int bond_check_params(struct bond_params *params) 4594 { 4595 int arp_validate_value, fail_over_mac_value, primary_reselect_value; 4596 4597 /* 4598 * Convert string parameters. 4599 */ 4600 if (mode) { 4601 bond_mode = bond_parse_parm(mode, bond_mode_tbl); 4602 if (bond_mode == -1) { 4603 pr_err("Error: Invalid bonding mode \"%s\"\n", 4604 mode == NULL ? "NULL" : mode); 4605 return -EINVAL; 4606 } 4607 } 4608 4609 if (xmit_hash_policy) { 4610 if ((bond_mode != BOND_MODE_XOR) && 4611 (bond_mode != BOND_MODE_8023AD)) { 4612 pr_info("xmit_hash_policy param is irrelevant in mode %s\n", 4613 bond_mode_name(bond_mode)); 4614 } else { 4615 xmit_hashtype = bond_parse_parm(xmit_hash_policy, 4616 xmit_hashtype_tbl); 4617 if (xmit_hashtype == -1) { 4618 pr_err("Error: Invalid xmit_hash_policy \"%s\"\n", 4619 xmit_hash_policy == NULL ? "NULL" : 4620 xmit_hash_policy); 4621 return -EINVAL; 4622 } 4623 } 4624 } 4625 4626 if (lacp_rate) { 4627 if (bond_mode != BOND_MODE_8023AD) { 4628 pr_info("lacp_rate param is irrelevant in mode %s\n", 4629 bond_mode_name(bond_mode)); 4630 } else { 4631 lacp_fast = bond_parse_parm(lacp_rate, bond_lacp_tbl); 4632 if (lacp_fast == -1) { 4633 pr_err("Error: Invalid lacp rate \"%s\"\n", 4634 lacp_rate == NULL ? "NULL" : lacp_rate); 4635 return -EINVAL; 4636 } 4637 } 4638 } 4639 4640 if (ad_select) { 4641 params->ad_select = bond_parse_parm(ad_select, ad_select_tbl); 4642 if (params->ad_select == -1) { 4643 pr_err("Error: Invalid ad_select \"%s\"\n", 4644 ad_select == NULL ? "NULL" : ad_select); 4645 return -EINVAL; 4646 } 4647 4648 if (bond_mode != BOND_MODE_8023AD) { 4649 pr_warning("ad_select param only affects 802.3ad mode\n"); 4650 } 4651 } else { 4652 params->ad_select = BOND_AD_STABLE; 4653 } 4654 4655 if (max_bonds < 0) { 4656 pr_warning("Warning: max_bonds (%d) not in range %d-%d, so it was reset to BOND_DEFAULT_MAX_BONDS (%d)\n", 4657 max_bonds, 0, INT_MAX, BOND_DEFAULT_MAX_BONDS); 4658 max_bonds = BOND_DEFAULT_MAX_BONDS; 4659 } 4660 4661 if (miimon < 0) { 4662 pr_warning("Warning: miimon module parameter (%d), not in range 0-%d, so it was reset to %d\n", 4663 miimon, INT_MAX, BOND_LINK_MON_INTERV); 4664 miimon = BOND_LINK_MON_INTERV; 4665 } 4666 4667 if (updelay < 0) { 4668 pr_warning("Warning: updelay module parameter (%d), not in range 0-%d, so it was reset to 0\n", 4669 updelay, INT_MAX); 4670 updelay = 0; 4671 } 4672 4673 if (downdelay < 0) { 4674 pr_warning("Warning: downdelay module parameter (%d), not in range 0-%d, so it was reset to 0\n", 4675 downdelay, INT_MAX); 4676 downdelay = 0; 4677 } 4678 4679 if ((use_carrier != 0) && (use_carrier != 1)) { 4680 pr_warning("Warning: use_carrier module parameter (%d), not of valid value (0/1), so it was set to 1\n", 4681 use_carrier); 4682 use_carrier = 1; 4683 } 4684 4685 if (num_grat_arp < 0 || num_grat_arp > 255) { 4686 pr_warning("Warning: num_grat_arp (%d) not in range 0-255 so it was reset to 1 \n", 4687 num_grat_arp); 4688 num_grat_arp = 1; 4689 } 4690 4691 if (num_unsol_na < 0 || num_unsol_na > 255) { 4692 pr_warning("Warning: num_unsol_na (%d) not in range 0-255 so it was reset to 1 \n", 4693 num_unsol_na); 4694 num_unsol_na = 1; 4695 } 4696 4697 /* reset values for 802.3ad */ 4698 if (bond_mode == BOND_MODE_8023AD) { 4699 if (!miimon) { 4700 pr_warning("Warning: miimon must be specified, otherwise bonding will not detect link failure, speed and duplex which are essential for 802.3ad operation\n"); 4701 pr_warning("Forcing miimon to 100msec\n"); 4702 miimon = 100; 4703 } 4704 } 4705 4706 /* reset values for TLB/ALB */ 4707 if ((bond_mode == BOND_MODE_TLB) || 4708 (bond_mode == BOND_MODE_ALB)) { 4709 if (!miimon) { 4710 pr_warning("Warning: miimon must be specified, otherwise bonding will not detect link failure and link speed which are essential for TLB/ALB load balancing\n"); 4711 pr_warning("Forcing miimon to 100msec\n"); 4712 miimon = 100; 4713 } 4714 } 4715 4716 if (bond_mode == BOND_MODE_ALB) { 4717 pr_notice("In ALB mode you might experience client disconnections upon reconnection of a link if the bonding module updelay parameter (%d msec) is incompatible with the forwarding delay time of the switch\n", 4718 updelay); 4719 } 4720 4721 if (!miimon) { 4722 if (updelay || downdelay) { 4723 /* just warn the user the up/down delay will have 4724 * no effect since miimon is zero... 4725 */ 4726 pr_warning("Warning: miimon module parameter not set and updelay (%d) or downdelay (%d) module parameter is set; updelay and downdelay have no effect unless miimon is set\n", 4727 updelay, downdelay); 4728 } 4729 } else { 4730 /* don't allow arp monitoring */ 4731 if (arp_interval) { 4732 pr_warning("Warning: miimon (%d) and arp_interval (%d) can't be used simultaneously, disabling ARP monitoring\n", 4733 miimon, arp_interval); 4734 arp_interval = 0; 4735 } 4736 4737 if ((updelay % miimon) != 0) { 4738 pr_warning("Warning: updelay (%d) is not a multiple of miimon (%d), updelay rounded to %d ms\n", 4739 updelay, miimon, 4740 (updelay / miimon) * miimon); 4741 } 4742 4743 updelay /= miimon; 4744 4745 if ((downdelay % miimon) != 0) { 4746 pr_warning("Warning: downdelay (%d) is not a multiple of miimon (%d), downdelay rounded to %d ms\n", 4747 downdelay, miimon, 4748 (downdelay / miimon) * miimon); 4749 } 4750 4751 downdelay /= miimon; 4752 } 4753 4754 if (arp_interval < 0) { 4755 pr_warning("Warning: arp_interval module parameter (%d) , not in range 0-%d, so it was reset to %d\n", 4756 arp_interval, INT_MAX, BOND_LINK_ARP_INTERV); 4757 arp_interval = BOND_LINK_ARP_INTERV; 4758 } 4759 4760 for (arp_ip_count = 0; 4761 (arp_ip_count < BOND_MAX_ARP_TARGETS) && arp_ip_target[arp_ip_count]; 4762 arp_ip_count++) { 4763 /* not complete check, but should be good enough to 4764 catch mistakes */ 4765 if (!isdigit(arp_ip_target[arp_ip_count][0])) { 4766 pr_warning("Warning: bad arp_ip_target module parameter (%s), ARP monitoring will not be performed\n", 4767 arp_ip_target[arp_ip_count]); 4768 arp_interval = 0; 4769 } else { 4770 __be32 ip = in_aton(arp_ip_target[arp_ip_count]); 4771 arp_target[arp_ip_count] = ip; 4772 } 4773 } 4774 4775 if (arp_interval && !arp_ip_count) { 4776 /* don't allow arping if no arp_ip_target given... */ 4777 pr_warning("Warning: arp_interval module parameter (%d) specified without providing an arp_ip_target parameter, arp_interval was reset to 0\n", 4778 arp_interval); 4779 arp_interval = 0; 4780 } 4781 4782 if (arp_validate) { 4783 if (bond_mode != BOND_MODE_ACTIVEBACKUP) { 4784 pr_err("arp_validate only supported in active-backup mode\n"); 4785 return -EINVAL; 4786 } 4787 if (!arp_interval) { 4788 pr_err("arp_validate requires arp_interval\n"); 4789 return -EINVAL; 4790 } 4791 4792 arp_validate_value = bond_parse_parm(arp_validate, 4793 arp_validate_tbl); 4794 if (arp_validate_value == -1) { 4795 pr_err("Error: invalid arp_validate \"%s\"\n", 4796 arp_validate == NULL ? "NULL" : arp_validate); 4797 return -EINVAL; 4798 } 4799 } else 4800 arp_validate_value = 0; 4801 4802 if (miimon) { 4803 pr_info("MII link monitoring set to %d ms\n", miimon); 4804 } else if (arp_interval) { 4805 int i; 4806 4807 pr_info("ARP monitoring set to %d ms, validate %s, with %d target(s):", 4808 arp_interval, 4809 arp_validate_tbl[arp_validate_value].modename, 4810 arp_ip_count); 4811 4812 for (i = 0; i < arp_ip_count; i++) 4813 pr_info(" %s", arp_ip_target[i]); 4814 4815 pr_info("\n"); 4816 4817 } else if (max_bonds) { 4818 /* miimon and arp_interval not set, we need one so things 4819 * work as expected, see bonding.txt for details 4820 */ 4821 pr_warning("Warning: either miimon or arp_interval and arp_ip_target module parameters must be specified, otherwise bonding will not detect link failures! see bonding.txt for details.\n"); 4822 } 4823 4824 if (primary && !USES_PRIMARY(bond_mode)) { 4825 /* currently, using a primary only makes sense 4826 * in active backup, TLB or ALB modes 4827 */ 4828 pr_warning("Warning: %s primary device specified but has no effect in %s mode\n", 4829 primary, bond_mode_name(bond_mode)); 4830 primary = NULL; 4831 } 4832 4833 if (primary && primary_reselect) { 4834 primary_reselect_value = bond_parse_parm(primary_reselect, 4835 pri_reselect_tbl); 4836 if (primary_reselect_value == -1) { 4837 pr_err("Error: Invalid primary_reselect \"%s\"\n", 4838 primary_reselect == 4839 NULL ? "NULL" : primary_reselect); 4840 return -EINVAL; 4841 } 4842 } else { 4843 primary_reselect_value = BOND_PRI_RESELECT_ALWAYS; 4844 } 4845 4846 if (fail_over_mac) { 4847 fail_over_mac_value = bond_parse_parm(fail_over_mac, 4848 fail_over_mac_tbl); 4849 if (fail_over_mac_value == -1) { 4850 pr_err("Error: invalid fail_over_mac \"%s\"\n", 4851 arp_validate == NULL ? "NULL" : arp_validate); 4852 return -EINVAL; 4853 } 4854 4855 if (bond_mode != BOND_MODE_ACTIVEBACKUP) 4856 pr_warning("Warning: fail_over_mac only affects active-backup mode.\n"); 4857 } else { 4858 fail_over_mac_value = BOND_FOM_NONE; 4859 } 4860 4861 /* fill params struct with the proper values */ 4862 params->mode = bond_mode; 4863 params->xmit_policy = xmit_hashtype; 4864 params->miimon = miimon; 4865 params->num_grat_arp = num_grat_arp; 4866 params->num_unsol_na = num_unsol_na; 4867 params->arp_interval = arp_interval; 4868 params->arp_validate = arp_validate_value; 4869 params->updelay = updelay; 4870 params->downdelay = downdelay; 4871 params->use_carrier = use_carrier; 4872 params->lacp_fast = lacp_fast; 4873 params->primary[0] = 0; 4874 params->primary_reselect = primary_reselect_value; 4875 params->fail_over_mac = fail_over_mac_value; 4876 4877 if (primary) { 4878 strncpy(params->primary, primary, IFNAMSIZ); 4879 params->primary[IFNAMSIZ - 1] = 0; 4880 } 4881 4882 memcpy(params->arp_targets, arp_target, sizeof(arp_target)); 4883 4884 return 0; 4885 } 4886 4887 static struct lock_class_key bonding_netdev_xmit_lock_key; 4888 static struct lock_class_key bonding_netdev_addr_lock_key; 4889 4890 static void bond_set_lockdep_class_one(struct net_device *dev, 4891 struct netdev_queue *txq, 4892 void *_unused) 4893 { 4894 lockdep_set_class(&txq->_xmit_lock, 4895 &bonding_netdev_xmit_lock_key); 4896 } 4897 4898 static void bond_set_lockdep_class(struct net_device *dev) 4899 { 4900 lockdep_set_class(&dev->addr_list_lock, 4901 &bonding_netdev_addr_lock_key); 4902 netdev_for_each_tx_queue(dev, bond_set_lockdep_class_one, NULL); 4903 } 4904 4905 /* 4906 * Called from registration process 4907 */ 4908 static int bond_init(struct net_device *bond_dev) 4909 { 4910 struct bonding *bond = netdev_priv(bond_dev); 4911 struct bond_net *bn = net_generic(dev_net(bond_dev), bond_net_id); 4912 4913 pr_debug("Begin bond_init for %s\n", bond_dev->name); 4914 4915 bond->wq = create_singlethread_workqueue(bond_dev->name); 4916 if (!bond->wq) 4917 return -ENOMEM; 4918 4919 bond_set_lockdep_class(bond_dev); 4920 4921 netif_carrier_off(bond_dev); 4922 4923 bond_create_proc_entry(bond); 4924 list_add_tail(&bond->bond_list, &bn->dev_list); 4925 4926 bond_prepare_sysfs_group(bond); 4927 return 0; 4928 } 4929 4930 static int bond_validate(struct nlattr *tb[], struct nlattr *data[]) 4931 { 4932 if (tb[IFLA_ADDRESS]) { 4933 if (nla_len(tb[IFLA_ADDRESS]) != ETH_ALEN) 4934 return -EINVAL; 4935 if (!is_valid_ether_addr(nla_data(tb[IFLA_ADDRESS]))) 4936 return -EADDRNOTAVAIL; 4937 } 4938 return 0; 4939 } 4940 4941 static struct rtnl_link_ops bond_link_ops __read_mostly = { 4942 .kind = "bond", 4943 .priv_size = sizeof(struct bonding), 4944 .setup = bond_setup, 4945 .validate = bond_validate, 4946 }; 4947 4948 /* Create a new bond based on the specified name and bonding parameters. 4949 * If name is NULL, obtain a suitable "bond%d" name for us. 4950 * Caller must NOT hold rtnl_lock; we need to release it here before we 4951 * set up our sysfs entries. 4952 */ 4953 int bond_create(struct net *net, const char *name) 4954 { 4955 struct net_device *bond_dev; 4956 int res; 4957 4958 rtnl_lock(); 4959 4960 bond_dev = alloc_netdev(sizeof(struct bonding), name ? name : "", 4961 bond_setup); 4962 if (!bond_dev) { 4963 pr_err("%s: eek! can't alloc netdev!\n", name); 4964 rtnl_unlock(); 4965 return -ENOMEM; 4966 } 4967 4968 dev_net_set(bond_dev, net); 4969 bond_dev->rtnl_link_ops = &bond_link_ops; 4970 4971 if (!name) { 4972 res = dev_alloc_name(bond_dev, "bond%d"); 4973 if (res < 0) 4974 goto out; 4975 } 4976 4977 res = register_netdevice(bond_dev); 4978 4979 out: 4980 rtnl_unlock(); 4981 if (res < 0) 4982 bond_destructor(bond_dev); 4983 return res; 4984 } 4985 4986 static int __net_init bond_net_init(struct net *net) 4987 { 4988 struct bond_net *bn = net_generic(net, bond_net_id); 4989 4990 bn->net = net; 4991 INIT_LIST_HEAD(&bn->dev_list); 4992 4993 bond_create_proc_dir(bn); 4994 4995 return 0; 4996 } 4997 4998 static void __net_exit bond_net_exit(struct net *net) 4999 { 5000 struct bond_net *bn = net_generic(net, bond_net_id); 5001 5002 bond_destroy_proc_dir(bn); 5003 } 5004 5005 static struct pernet_operations bond_net_ops = { 5006 .init = bond_net_init, 5007 .exit = bond_net_exit, 5008 .id = &bond_net_id, 5009 .size = sizeof(struct bond_net), 5010 }; 5011 5012 static int __init bonding_init(void) 5013 { 5014 int i; 5015 int res; 5016 5017 pr_info("%s", version); 5018 5019 res = bond_check_params(&bonding_defaults); 5020 if (res) 5021 goto out; 5022 5023 res = register_pernet_subsys(&bond_net_ops); 5024 if (res) 5025 goto out; 5026 5027 res = rtnl_link_register(&bond_link_ops); 5028 if (res) 5029 goto err_link; 5030 5031 for (i = 0; i < max_bonds; i++) { 5032 res = bond_create(&init_net, NULL); 5033 if (res) 5034 goto err; 5035 } 5036 5037 res = bond_create_sysfs(); 5038 if (res) 5039 goto err; 5040 5041 register_netdevice_notifier(&bond_netdev_notifier); 5042 register_inetaddr_notifier(&bond_inetaddr_notifier); 5043 bond_register_ipv6_notifier(); 5044 out: 5045 return res; 5046 err: 5047 rtnl_link_unregister(&bond_link_ops); 5048 err_link: 5049 unregister_pernet_subsys(&bond_net_ops); 5050 goto out; 5051 5052 } 5053 5054 static void __exit bonding_exit(void) 5055 { 5056 unregister_netdevice_notifier(&bond_netdev_notifier); 5057 unregister_inetaddr_notifier(&bond_inetaddr_notifier); 5058 bond_unregister_ipv6_notifier(); 5059 5060 bond_destroy_sysfs(); 5061 5062 rtnl_link_unregister(&bond_link_ops); 5063 unregister_pernet_subsys(&bond_net_ops); 5064 } 5065 5066 module_init(bonding_init); 5067 module_exit(bonding_exit); 5068 MODULE_LICENSE("GPL"); 5069 MODULE_VERSION(DRV_VERSION); 5070 MODULE_DESCRIPTION(DRV_DESCRIPTION ", v" DRV_VERSION); 5071 MODULE_AUTHOR("Thomas Davis, tadavis@lbl.gov and many others"); 5072 MODULE_ALIAS_RTNL_LINK("bond"); 5073