1 /* 2 * originally based on the dummy device. 3 * 4 * Copyright 1999, Thomas Davis, tadavis@lbl.gov. 5 * Licensed under the GPL. Based on dummy.c, and eql.c devices. 6 * 7 * bonding.c: an Ethernet Bonding driver 8 * 9 * This is useful to talk to a Cisco EtherChannel compatible equipment: 10 * Cisco 5500 11 * Sun Trunking (Solaris) 12 * Alteon AceDirector Trunks 13 * Linux Bonding 14 * and probably many L2 switches ... 15 * 16 * How it works: 17 * ifconfig bond0 ipaddress netmask up 18 * will setup a network device, with an ip address. No mac address 19 * will be assigned at this time. The hw mac address will come from 20 * the first slave bonded to the channel. All slaves will then use 21 * this hw mac address. 22 * 23 * ifconfig bond0 down 24 * will release all slaves, marking them as down. 25 * 26 * ifenslave bond0 eth0 27 * will attach eth0 to bond0 as a slave. eth0 hw mac address will either 28 * a: be used as initial mac address 29 * b: if a hw mac address already is there, eth0's hw mac address 30 * will then be set from bond0. 31 * 32 */ 33 34 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt 35 36 #include <linux/kernel.h> 37 #include <linux/module.h> 38 #include <linux/types.h> 39 #include <linux/fcntl.h> 40 #include <linux/interrupt.h> 41 #include <linux/ptrace.h> 42 #include <linux/ioport.h> 43 #include <linux/in.h> 44 #include <net/ip.h> 45 #include <linux/ip.h> 46 #include <linux/tcp.h> 47 #include <linux/udp.h> 48 #include <linux/slab.h> 49 #include <linux/string.h> 50 #include <linux/init.h> 51 #include <linux/timer.h> 52 #include <linux/socket.h> 53 #include <linux/ctype.h> 54 #include <linux/inet.h> 55 #include <linux/bitops.h> 56 #include <linux/io.h> 57 #include <asm/dma.h> 58 #include <linux/uaccess.h> 59 #include <linux/errno.h> 60 #include <linux/netdevice.h> 61 #include <linux/inetdevice.h> 62 #include <linux/igmp.h> 63 #include <linux/etherdevice.h> 64 #include <linux/skbuff.h> 65 #include <net/sock.h> 66 #include <linux/rtnetlink.h> 67 #include <linux/smp.h> 68 #include <linux/if_ether.h> 69 #include <net/arp.h> 70 #include <linux/mii.h> 71 #include <linux/ethtool.h> 72 #include <linux/if_vlan.h> 73 #include <linux/if_bonding.h> 74 #include <linux/jiffies.h> 75 #include <linux/preempt.h> 76 #include <net/route.h> 77 #include <net/net_namespace.h> 78 #include <net/netns/generic.h> 79 #include <net/pkt_sched.h> 80 #include <linux/rculist.h> 81 #include <net/flow_keys.h> 82 #include <linux/reciprocal_div.h> 83 #include "bonding.h" 84 #include "bond_3ad.h" 85 #include "bond_alb.h" 86 87 /*---------------------------- Module parameters ----------------------------*/ 88 89 /* monitor all links that often (in milliseconds). <=0 disables monitoring */ 90 #define BOND_LINK_MON_INTERV 0 91 #define BOND_LINK_ARP_INTERV 0 92 93 static int max_bonds = BOND_DEFAULT_MAX_BONDS; 94 static int tx_queues = BOND_DEFAULT_TX_QUEUES; 95 static int num_peer_notif = 1; 96 static int miimon = BOND_LINK_MON_INTERV; 97 static int updelay; 98 static int downdelay; 99 static int use_carrier = 1; 100 static char *mode; 101 static char *primary; 102 static char *primary_reselect; 103 static char *lacp_rate; 104 static int min_links; 105 static char *ad_select; 106 static char *xmit_hash_policy; 107 static int arp_interval = BOND_LINK_ARP_INTERV; 108 static char *arp_ip_target[BOND_MAX_ARP_TARGETS]; 109 static char *arp_validate; 110 static char *arp_all_targets; 111 static char *fail_over_mac; 112 static int all_slaves_active; 113 static struct bond_params bonding_defaults; 114 static int resend_igmp = BOND_DEFAULT_RESEND_IGMP; 115 static int packets_per_slave = 1; 116 static int lp_interval = BOND_ALB_DEFAULT_LP_INTERVAL; 117 118 module_param(max_bonds, int, 0); 119 MODULE_PARM_DESC(max_bonds, "Max number of bonded devices"); 120 module_param(tx_queues, int, 0); 121 MODULE_PARM_DESC(tx_queues, "Max number of transmit queues (default = 16)"); 122 module_param_named(num_grat_arp, num_peer_notif, int, 0644); 123 MODULE_PARM_DESC(num_grat_arp, "Number of peer notifications to send on " 124 "failover event (alias of num_unsol_na)"); 125 module_param_named(num_unsol_na, num_peer_notif, int, 0644); 126 MODULE_PARM_DESC(num_unsol_na, "Number of peer notifications to send on " 127 "failover event (alias of num_grat_arp)"); 128 module_param(miimon, int, 0); 129 MODULE_PARM_DESC(miimon, "Link check interval in milliseconds"); 130 module_param(updelay, int, 0); 131 MODULE_PARM_DESC(updelay, "Delay before considering link up, in milliseconds"); 132 module_param(downdelay, int, 0); 133 MODULE_PARM_DESC(downdelay, "Delay before considering link down, " 134 "in milliseconds"); 135 module_param(use_carrier, int, 0); 136 MODULE_PARM_DESC(use_carrier, "Use netif_carrier_ok (vs MII ioctls) in miimon; " 137 "0 for off, 1 for on (default)"); 138 module_param(mode, charp, 0); 139 MODULE_PARM_DESC(mode, "Mode of operation; 0 for balance-rr, " 140 "1 for active-backup, 2 for balance-xor, " 141 "3 for broadcast, 4 for 802.3ad, 5 for balance-tlb, " 142 "6 for balance-alb"); 143 module_param(primary, charp, 0); 144 MODULE_PARM_DESC(primary, "Primary network device to use"); 145 module_param(primary_reselect, charp, 0); 146 MODULE_PARM_DESC(primary_reselect, "Reselect primary slave " 147 "once it comes up; " 148 "0 for always (default), " 149 "1 for only if speed of primary is " 150 "better, " 151 "2 for only on active slave " 152 "failure"); 153 module_param(lacp_rate, charp, 0); 154 MODULE_PARM_DESC(lacp_rate, "LACPDU tx rate to request from 802.3ad partner; " 155 "0 for slow, 1 for fast"); 156 module_param(ad_select, charp, 0); 157 MODULE_PARM_DESC(ad_select, "803.ad aggregation selection logic; " 158 "0 for stable (default), 1 for bandwidth, " 159 "2 for count"); 160 module_param(min_links, int, 0); 161 MODULE_PARM_DESC(min_links, "Minimum number of available links before turning on carrier"); 162 163 module_param(xmit_hash_policy, charp, 0); 164 MODULE_PARM_DESC(xmit_hash_policy, "balance-xor and 802.3ad hashing method; " 165 "0 for layer 2 (default), 1 for layer 3+4, " 166 "2 for layer 2+3, 3 for encap layer 2+3, " 167 "4 for encap layer 3+4"); 168 module_param(arp_interval, int, 0); 169 MODULE_PARM_DESC(arp_interval, "arp interval in milliseconds"); 170 module_param_array(arp_ip_target, charp, NULL, 0); 171 MODULE_PARM_DESC(arp_ip_target, "arp targets in n.n.n.n form"); 172 module_param(arp_validate, charp, 0); 173 MODULE_PARM_DESC(arp_validate, "validate src/dst of ARP probes; " 174 "0 for none (default), 1 for active, " 175 "2 for backup, 3 for all"); 176 module_param(arp_all_targets, charp, 0); 177 MODULE_PARM_DESC(arp_all_targets, "fail on any/all arp targets timeout; 0 for any (default), 1 for all"); 178 module_param(fail_over_mac, charp, 0); 179 MODULE_PARM_DESC(fail_over_mac, "For active-backup, do not set all slaves to " 180 "the same MAC; 0 for none (default), " 181 "1 for active, 2 for follow"); 182 module_param(all_slaves_active, int, 0); 183 MODULE_PARM_DESC(all_slaves_active, "Keep all frames received on an interface" 184 "by setting active flag for all slaves; " 185 "0 for never (default), 1 for always."); 186 module_param(resend_igmp, int, 0); 187 MODULE_PARM_DESC(resend_igmp, "Number of IGMP membership reports to send on " 188 "link failure"); 189 module_param(packets_per_slave, int, 0); 190 MODULE_PARM_DESC(packets_per_slave, "Packets to send per slave in balance-rr " 191 "mode; 0 for a random slave, 1 packet per " 192 "slave (default), >1 packets per slave."); 193 module_param(lp_interval, uint, 0); 194 MODULE_PARM_DESC(lp_interval, "The number of seconds between instances where " 195 "the bonding driver sends learning packets to " 196 "each slaves peer switch. The default is 1."); 197 198 /*----------------------------- Global variables ----------------------------*/ 199 200 #ifdef CONFIG_NET_POLL_CONTROLLER 201 atomic_t netpoll_block_tx = ATOMIC_INIT(0); 202 #endif 203 204 int bond_net_id __read_mostly; 205 206 static __be32 arp_target[BOND_MAX_ARP_TARGETS]; 207 static int arp_ip_count; 208 static int bond_mode = BOND_MODE_ROUNDROBIN; 209 static int xmit_hashtype = BOND_XMIT_POLICY_LAYER2; 210 static int lacp_fast; 211 212 const struct bond_parm_tbl bond_lacp_tbl[] = { 213 { "slow", AD_LACP_SLOW}, 214 { "fast", AD_LACP_FAST}, 215 { NULL, -1}, 216 }; 217 218 const struct bond_parm_tbl bond_mode_tbl[] = { 219 { "balance-rr", BOND_MODE_ROUNDROBIN}, 220 { "active-backup", BOND_MODE_ACTIVEBACKUP}, 221 { "balance-xor", BOND_MODE_XOR}, 222 { "broadcast", BOND_MODE_BROADCAST}, 223 { "802.3ad", BOND_MODE_8023AD}, 224 { "balance-tlb", BOND_MODE_TLB}, 225 { "balance-alb", BOND_MODE_ALB}, 226 { NULL, -1}, 227 }; 228 229 const struct bond_parm_tbl xmit_hashtype_tbl[] = { 230 { "layer2", BOND_XMIT_POLICY_LAYER2}, 231 { "layer3+4", BOND_XMIT_POLICY_LAYER34}, 232 { "layer2+3", BOND_XMIT_POLICY_LAYER23}, 233 { "encap2+3", BOND_XMIT_POLICY_ENCAP23}, 234 { "encap3+4", BOND_XMIT_POLICY_ENCAP34}, 235 { NULL, -1}, 236 }; 237 238 const struct bond_parm_tbl arp_all_targets_tbl[] = { 239 { "any", BOND_ARP_TARGETS_ANY}, 240 { "all", BOND_ARP_TARGETS_ALL}, 241 { NULL, -1}, 242 }; 243 244 const struct bond_parm_tbl arp_validate_tbl[] = { 245 { "none", BOND_ARP_VALIDATE_NONE}, 246 { "active", BOND_ARP_VALIDATE_ACTIVE}, 247 { "backup", BOND_ARP_VALIDATE_BACKUP}, 248 { "all", BOND_ARP_VALIDATE_ALL}, 249 { NULL, -1}, 250 }; 251 252 const struct bond_parm_tbl fail_over_mac_tbl[] = { 253 { "none", BOND_FOM_NONE}, 254 { "active", BOND_FOM_ACTIVE}, 255 { "follow", BOND_FOM_FOLLOW}, 256 { NULL, -1}, 257 }; 258 259 const struct bond_parm_tbl pri_reselect_tbl[] = { 260 { "always", BOND_PRI_RESELECT_ALWAYS}, 261 { "better", BOND_PRI_RESELECT_BETTER}, 262 { "failure", BOND_PRI_RESELECT_FAILURE}, 263 { NULL, -1}, 264 }; 265 266 struct bond_parm_tbl ad_select_tbl[] = { 267 { "stable", BOND_AD_STABLE}, 268 { "bandwidth", BOND_AD_BANDWIDTH}, 269 { "count", BOND_AD_COUNT}, 270 { NULL, -1}, 271 }; 272 273 /*-------------------------- Forward declarations ---------------------------*/ 274 275 static int bond_init(struct net_device *bond_dev); 276 static void bond_uninit(struct net_device *bond_dev); 277 278 /*---------------------------- General routines -----------------------------*/ 279 280 const char *bond_mode_name(int mode) 281 { 282 static const char *names[] = { 283 [BOND_MODE_ROUNDROBIN] = "load balancing (round-robin)", 284 [BOND_MODE_ACTIVEBACKUP] = "fault-tolerance (active-backup)", 285 [BOND_MODE_XOR] = "load balancing (xor)", 286 [BOND_MODE_BROADCAST] = "fault-tolerance (broadcast)", 287 [BOND_MODE_8023AD] = "IEEE 802.3ad Dynamic link aggregation", 288 [BOND_MODE_TLB] = "transmit load balancing", 289 [BOND_MODE_ALB] = "adaptive load balancing", 290 }; 291 292 if (mode < BOND_MODE_ROUNDROBIN || mode > BOND_MODE_ALB) 293 return "unknown"; 294 295 return names[mode]; 296 } 297 298 /*---------------------------------- VLAN -----------------------------------*/ 299 300 /** 301 * bond_dev_queue_xmit - Prepare skb for xmit. 302 * 303 * @bond: bond device that got this skb for tx. 304 * @skb: hw accel VLAN tagged skb to transmit 305 * @slave_dev: slave that is supposed to xmit this skbuff 306 */ 307 int bond_dev_queue_xmit(struct bonding *bond, struct sk_buff *skb, 308 struct net_device *slave_dev) 309 { 310 skb->dev = slave_dev; 311 312 BUILD_BUG_ON(sizeof(skb->queue_mapping) != 313 sizeof(qdisc_skb_cb(skb)->slave_dev_queue_mapping)); 314 skb->queue_mapping = qdisc_skb_cb(skb)->slave_dev_queue_mapping; 315 316 if (unlikely(netpoll_tx_running(bond->dev))) 317 bond_netpoll_send_skb(bond_get_slave_by_dev(bond, slave_dev), skb); 318 else 319 dev_queue_xmit(skb); 320 321 return 0; 322 } 323 324 /* 325 * In the following 2 functions, bond_vlan_rx_add_vid and bond_vlan_rx_kill_vid, 326 * We don't protect the slave list iteration with a lock because: 327 * a. This operation is performed in IOCTL context, 328 * b. The operation is protected by the RTNL semaphore in the 8021q code, 329 * c. Holding a lock with BH disabled while directly calling a base driver 330 * entry point is generally a BAD idea. 331 * 332 * The design of synchronization/protection for this operation in the 8021q 333 * module is good for one or more VLAN devices over a single physical device 334 * and cannot be extended for a teaming solution like bonding, so there is a 335 * potential race condition here where a net device from the vlan group might 336 * be referenced (either by a base driver or the 8021q code) while it is being 337 * removed from the system. However, it turns out we're not making matters 338 * worse, and if it works for regular VLAN usage it will work here too. 339 */ 340 341 /** 342 * bond_vlan_rx_add_vid - Propagates adding an id to slaves 343 * @bond_dev: bonding net device that got called 344 * @vid: vlan id being added 345 */ 346 static int bond_vlan_rx_add_vid(struct net_device *bond_dev, 347 __be16 proto, u16 vid) 348 { 349 struct bonding *bond = netdev_priv(bond_dev); 350 struct slave *slave, *rollback_slave; 351 struct list_head *iter; 352 int res; 353 354 bond_for_each_slave(bond, slave, iter) { 355 res = vlan_vid_add(slave->dev, proto, vid); 356 if (res) 357 goto unwind; 358 } 359 360 return 0; 361 362 unwind: 363 /* unwind to the slave that failed */ 364 bond_for_each_slave(bond, rollback_slave, iter) { 365 if (rollback_slave == slave) 366 break; 367 368 vlan_vid_del(rollback_slave->dev, proto, vid); 369 } 370 371 return res; 372 } 373 374 /** 375 * bond_vlan_rx_kill_vid - Propagates deleting an id to slaves 376 * @bond_dev: bonding net device that got called 377 * @vid: vlan id being removed 378 */ 379 static int bond_vlan_rx_kill_vid(struct net_device *bond_dev, 380 __be16 proto, u16 vid) 381 { 382 struct bonding *bond = netdev_priv(bond_dev); 383 struct list_head *iter; 384 struct slave *slave; 385 386 bond_for_each_slave(bond, slave, iter) 387 vlan_vid_del(slave->dev, proto, vid); 388 389 if (bond_is_lb(bond)) 390 bond_alb_clear_vlan(bond, vid); 391 392 return 0; 393 } 394 395 /*------------------------------- Link status -------------------------------*/ 396 397 /* 398 * Set the carrier state for the master according to the state of its 399 * slaves. If any slaves are up, the master is up. In 802.3ad mode, 400 * do special 802.3ad magic. 401 * 402 * Returns zero if carrier state does not change, nonzero if it does. 403 */ 404 static int bond_set_carrier(struct bonding *bond) 405 { 406 struct list_head *iter; 407 struct slave *slave; 408 409 if (!bond_has_slaves(bond)) 410 goto down; 411 412 if (bond->params.mode == BOND_MODE_8023AD) 413 return bond_3ad_set_carrier(bond); 414 415 bond_for_each_slave(bond, slave, iter) { 416 if (slave->link == BOND_LINK_UP) { 417 if (!netif_carrier_ok(bond->dev)) { 418 netif_carrier_on(bond->dev); 419 return 1; 420 } 421 return 0; 422 } 423 } 424 425 down: 426 if (netif_carrier_ok(bond->dev)) { 427 netif_carrier_off(bond->dev); 428 return 1; 429 } 430 return 0; 431 } 432 433 /* 434 * Get link speed and duplex from the slave's base driver 435 * using ethtool. If for some reason the call fails or the 436 * values are invalid, set speed and duplex to -1, 437 * and return. 438 */ 439 static void bond_update_speed_duplex(struct slave *slave) 440 { 441 struct net_device *slave_dev = slave->dev; 442 struct ethtool_cmd ecmd; 443 u32 slave_speed; 444 int res; 445 446 slave->speed = SPEED_UNKNOWN; 447 slave->duplex = DUPLEX_UNKNOWN; 448 449 res = __ethtool_get_settings(slave_dev, &ecmd); 450 if (res < 0) 451 return; 452 453 slave_speed = ethtool_cmd_speed(&ecmd); 454 if (slave_speed == 0 || slave_speed == ((__u32) -1)) 455 return; 456 457 switch (ecmd.duplex) { 458 case DUPLEX_FULL: 459 case DUPLEX_HALF: 460 break; 461 default: 462 return; 463 } 464 465 slave->speed = slave_speed; 466 slave->duplex = ecmd.duplex; 467 468 return; 469 } 470 471 /* 472 * if <dev> supports MII link status reporting, check its link status. 473 * 474 * We either do MII/ETHTOOL ioctls, or check netif_carrier_ok(), 475 * depending upon the setting of the use_carrier parameter. 476 * 477 * Return either BMSR_LSTATUS, meaning that the link is up (or we 478 * can't tell and just pretend it is), or 0, meaning that the link is 479 * down. 480 * 481 * If reporting is non-zero, instead of faking link up, return -1 if 482 * both ETHTOOL and MII ioctls fail (meaning the device does not 483 * support them). If use_carrier is set, return whatever it says. 484 * It'd be nice if there was a good way to tell if a driver supports 485 * netif_carrier, but there really isn't. 486 */ 487 static int bond_check_dev_link(struct bonding *bond, 488 struct net_device *slave_dev, int reporting) 489 { 490 const struct net_device_ops *slave_ops = slave_dev->netdev_ops; 491 int (*ioctl)(struct net_device *, struct ifreq *, int); 492 struct ifreq ifr; 493 struct mii_ioctl_data *mii; 494 495 if (!reporting && !netif_running(slave_dev)) 496 return 0; 497 498 if (bond->params.use_carrier) 499 return netif_carrier_ok(slave_dev) ? BMSR_LSTATUS : 0; 500 501 /* Try to get link status using Ethtool first. */ 502 if (slave_dev->ethtool_ops->get_link) 503 return slave_dev->ethtool_ops->get_link(slave_dev) ? 504 BMSR_LSTATUS : 0; 505 506 /* Ethtool can't be used, fallback to MII ioctls. */ 507 ioctl = slave_ops->ndo_do_ioctl; 508 if (ioctl) { 509 /* TODO: set pointer to correct ioctl on a per team member */ 510 /* bases to make this more efficient. that is, once */ 511 /* we determine the correct ioctl, we will always */ 512 /* call it and not the others for that team */ 513 /* member. */ 514 515 /* 516 * We cannot assume that SIOCGMIIPHY will also read a 517 * register; not all network drivers (e.g., e100) 518 * support that. 519 */ 520 521 /* Yes, the mii is overlaid on the ifreq.ifr_ifru */ 522 strncpy(ifr.ifr_name, slave_dev->name, IFNAMSIZ); 523 mii = if_mii(&ifr); 524 if (IOCTL(slave_dev, &ifr, SIOCGMIIPHY) == 0) { 525 mii->reg_num = MII_BMSR; 526 if (IOCTL(slave_dev, &ifr, SIOCGMIIREG) == 0) 527 return mii->val_out & BMSR_LSTATUS; 528 } 529 } 530 531 /* 532 * If reporting, report that either there's no dev->do_ioctl, 533 * or both SIOCGMIIREG and get_link failed (meaning that we 534 * cannot report link status). If not reporting, pretend 535 * we're ok. 536 */ 537 return reporting ? -1 : BMSR_LSTATUS; 538 } 539 540 /*----------------------------- Multicast list ------------------------------*/ 541 542 /* 543 * Push the promiscuity flag down to appropriate slaves 544 */ 545 static int bond_set_promiscuity(struct bonding *bond, int inc) 546 { 547 struct list_head *iter; 548 int err = 0; 549 550 if (USES_PRIMARY(bond->params.mode)) { 551 /* write lock already acquired */ 552 if (bond->curr_active_slave) { 553 err = dev_set_promiscuity(bond->curr_active_slave->dev, 554 inc); 555 } 556 } else { 557 struct slave *slave; 558 559 bond_for_each_slave(bond, slave, iter) { 560 err = dev_set_promiscuity(slave->dev, inc); 561 if (err) 562 return err; 563 } 564 } 565 return err; 566 } 567 568 /* 569 * Push the allmulti flag down to all slaves 570 */ 571 static int bond_set_allmulti(struct bonding *bond, int inc) 572 { 573 struct list_head *iter; 574 int err = 0; 575 576 if (USES_PRIMARY(bond->params.mode)) { 577 /* write lock already acquired */ 578 if (bond->curr_active_slave) { 579 err = dev_set_allmulti(bond->curr_active_slave->dev, 580 inc); 581 } 582 } else { 583 struct slave *slave; 584 585 bond_for_each_slave(bond, slave, iter) { 586 err = dev_set_allmulti(slave->dev, inc); 587 if (err) 588 return err; 589 } 590 } 591 return err; 592 } 593 594 /* 595 * Retrieve the list of registered multicast addresses for the bonding 596 * device and retransmit an IGMP JOIN request to the current active 597 * slave. 598 */ 599 static void bond_resend_igmp_join_requests_delayed(struct work_struct *work) 600 { 601 struct bonding *bond = container_of(work, struct bonding, 602 mcast_work.work); 603 604 if (!rtnl_trylock()) { 605 queue_delayed_work(bond->wq, &bond->mcast_work, 1); 606 return; 607 } 608 call_netdevice_notifiers(NETDEV_RESEND_IGMP, bond->dev); 609 610 if (bond->igmp_retrans > 1) { 611 bond->igmp_retrans--; 612 queue_delayed_work(bond->wq, &bond->mcast_work, HZ/5); 613 } 614 rtnl_unlock(); 615 } 616 617 /* Flush bond's hardware addresses from slave 618 */ 619 static void bond_hw_addr_flush(struct net_device *bond_dev, 620 struct net_device *slave_dev) 621 { 622 struct bonding *bond = netdev_priv(bond_dev); 623 624 dev_uc_unsync(slave_dev, bond_dev); 625 dev_mc_unsync(slave_dev, bond_dev); 626 627 if (bond->params.mode == BOND_MODE_8023AD) { 628 /* del lacpdu mc addr from mc list */ 629 u8 lacpdu_multicast[ETH_ALEN] = MULTICAST_LACPDU_ADDR; 630 631 dev_mc_del(slave_dev, lacpdu_multicast); 632 } 633 } 634 635 /*--------------------------- Active slave change ---------------------------*/ 636 637 /* Update the hardware address list and promisc/allmulti for the new and 638 * old active slaves (if any). Modes that are !USES_PRIMARY keep all 639 * slaves up date at all times; only the USES_PRIMARY modes need to call 640 * this function to swap these settings during a failover. 641 */ 642 static void bond_hw_addr_swap(struct bonding *bond, struct slave *new_active, 643 struct slave *old_active) 644 { 645 ASSERT_RTNL(); 646 647 if (old_active) { 648 if (bond->dev->flags & IFF_PROMISC) 649 dev_set_promiscuity(old_active->dev, -1); 650 651 if (bond->dev->flags & IFF_ALLMULTI) 652 dev_set_allmulti(old_active->dev, -1); 653 654 bond_hw_addr_flush(bond->dev, old_active->dev); 655 } 656 657 if (new_active) { 658 /* FIXME: Signal errors upstream. */ 659 if (bond->dev->flags & IFF_PROMISC) 660 dev_set_promiscuity(new_active->dev, 1); 661 662 if (bond->dev->flags & IFF_ALLMULTI) 663 dev_set_allmulti(new_active->dev, 1); 664 665 netif_addr_lock_bh(bond->dev); 666 dev_uc_sync(new_active->dev, bond->dev); 667 dev_mc_sync(new_active->dev, bond->dev); 668 netif_addr_unlock_bh(bond->dev); 669 } 670 } 671 672 /** 673 * bond_set_dev_addr - clone slave's address to bond 674 * @bond_dev: bond net device 675 * @slave_dev: slave net device 676 * 677 * Should be called with RTNL held. 678 */ 679 static void bond_set_dev_addr(struct net_device *bond_dev, 680 struct net_device *slave_dev) 681 { 682 pr_debug("bond_dev=%p slave_dev=%p slave_dev->addr_len=%d\n", 683 bond_dev, slave_dev, slave_dev->addr_len); 684 memcpy(bond_dev->dev_addr, slave_dev->dev_addr, slave_dev->addr_len); 685 bond_dev->addr_assign_type = NET_ADDR_STOLEN; 686 call_netdevice_notifiers(NETDEV_CHANGEADDR, bond_dev); 687 } 688 689 /* 690 * bond_do_fail_over_mac 691 * 692 * Perform special MAC address swapping for fail_over_mac settings 693 * 694 * Called with RTNL, curr_slave_lock for write_bh. 695 */ 696 static void bond_do_fail_over_mac(struct bonding *bond, 697 struct slave *new_active, 698 struct slave *old_active) 699 __releases(&bond->curr_slave_lock) 700 __acquires(&bond->curr_slave_lock) 701 { 702 u8 tmp_mac[ETH_ALEN]; 703 struct sockaddr saddr; 704 int rv; 705 706 switch (bond->params.fail_over_mac) { 707 case BOND_FOM_ACTIVE: 708 if (new_active) { 709 write_unlock_bh(&bond->curr_slave_lock); 710 bond_set_dev_addr(bond->dev, new_active->dev); 711 write_lock_bh(&bond->curr_slave_lock); 712 } 713 break; 714 case BOND_FOM_FOLLOW: 715 /* 716 * if new_active && old_active, swap them 717 * if just old_active, do nothing (going to no active slave) 718 * if just new_active, set new_active to bond's MAC 719 */ 720 if (!new_active) 721 return; 722 723 write_unlock_bh(&bond->curr_slave_lock); 724 725 if (old_active) { 726 memcpy(tmp_mac, new_active->dev->dev_addr, ETH_ALEN); 727 memcpy(saddr.sa_data, old_active->dev->dev_addr, 728 ETH_ALEN); 729 saddr.sa_family = new_active->dev->type; 730 } else { 731 memcpy(saddr.sa_data, bond->dev->dev_addr, ETH_ALEN); 732 saddr.sa_family = bond->dev->type; 733 } 734 735 rv = dev_set_mac_address(new_active->dev, &saddr); 736 if (rv) { 737 pr_err("%s: Error %d setting MAC of slave %s\n", 738 bond->dev->name, -rv, new_active->dev->name); 739 goto out; 740 } 741 742 if (!old_active) 743 goto out; 744 745 memcpy(saddr.sa_data, tmp_mac, ETH_ALEN); 746 saddr.sa_family = old_active->dev->type; 747 748 rv = dev_set_mac_address(old_active->dev, &saddr); 749 if (rv) 750 pr_err("%s: Error %d setting MAC of slave %s\n", 751 bond->dev->name, -rv, new_active->dev->name); 752 out: 753 write_lock_bh(&bond->curr_slave_lock); 754 break; 755 default: 756 pr_err("%s: bond_do_fail_over_mac impossible: bad policy %d\n", 757 bond->dev->name, bond->params.fail_over_mac); 758 break; 759 } 760 761 } 762 763 static bool bond_should_change_active(struct bonding *bond) 764 { 765 struct slave *prim = bond->primary_slave; 766 struct slave *curr = bond->curr_active_slave; 767 768 if (!prim || !curr || curr->link != BOND_LINK_UP) 769 return true; 770 if (bond->force_primary) { 771 bond->force_primary = false; 772 return true; 773 } 774 if (bond->params.primary_reselect == BOND_PRI_RESELECT_BETTER && 775 (prim->speed < curr->speed || 776 (prim->speed == curr->speed && prim->duplex <= curr->duplex))) 777 return false; 778 if (bond->params.primary_reselect == BOND_PRI_RESELECT_FAILURE) 779 return false; 780 return true; 781 } 782 783 /** 784 * find_best_interface - select the best available slave to be the active one 785 * @bond: our bonding struct 786 */ 787 static struct slave *bond_find_best_slave(struct bonding *bond) 788 { 789 struct slave *slave, *bestslave = NULL; 790 struct list_head *iter; 791 int mintime = bond->params.updelay; 792 793 if (bond->primary_slave && bond->primary_slave->link == BOND_LINK_UP && 794 bond_should_change_active(bond)) 795 return bond->primary_slave; 796 797 bond_for_each_slave(bond, slave, iter) { 798 if (slave->link == BOND_LINK_UP) 799 return slave; 800 if (slave->link == BOND_LINK_BACK && IS_UP(slave->dev) && 801 slave->delay < mintime) { 802 mintime = slave->delay; 803 bestslave = slave; 804 } 805 } 806 807 return bestslave; 808 } 809 810 static bool bond_should_notify_peers(struct bonding *bond) 811 { 812 struct slave *slave; 813 814 rcu_read_lock(); 815 slave = rcu_dereference(bond->curr_active_slave); 816 rcu_read_unlock(); 817 818 pr_debug("bond_should_notify_peers: bond %s slave %s\n", 819 bond->dev->name, slave ? slave->dev->name : "NULL"); 820 821 if (!slave || !bond->send_peer_notif || 822 test_bit(__LINK_STATE_LINKWATCH_PENDING, &slave->dev->state)) 823 return false; 824 825 return true; 826 } 827 828 /** 829 * change_active_interface - change the active slave into the specified one 830 * @bond: our bonding struct 831 * @new: the new slave to make the active one 832 * 833 * Set the new slave to the bond's settings and unset them on the old 834 * curr_active_slave. 835 * Setting include flags, mc-list, promiscuity, allmulti, etc. 836 * 837 * If @new's link state is %BOND_LINK_BACK we'll set it to %BOND_LINK_UP, 838 * because it is apparently the best available slave we have, even though its 839 * updelay hasn't timed out yet. 840 * 841 * If new_active is not NULL, caller must hold curr_slave_lock for write_bh. 842 */ 843 void bond_change_active_slave(struct bonding *bond, struct slave *new_active) 844 { 845 struct slave *old_active = bond->curr_active_slave; 846 847 if (old_active == new_active) 848 return; 849 850 if (new_active) { 851 new_active->jiffies = jiffies; 852 853 if (new_active->link == BOND_LINK_BACK) { 854 if (USES_PRIMARY(bond->params.mode)) { 855 pr_info("%s: making interface %s the new active one %d ms earlier.\n", 856 bond->dev->name, new_active->dev->name, 857 (bond->params.updelay - new_active->delay) * bond->params.miimon); 858 } 859 860 new_active->delay = 0; 861 new_active->link = BOND_LINK_UP; 862 863 if (bond->params.mode == BOND_MODE_8023AD) 864 bond_3ad_handle_link_change(new_active, BOND_LINK_UP); 865 866 if (bond_is_lb(bond)) 867 bond_alb_handle_link_change(bond, new_active, BOND_LINK_UP); 868 } else { 869 if (USES_PRIMARY(bond->params.mode)) { 870 pr_info("%s: making interface %s the new active one.\n", 871 bond->dev->name, new_active->dev->name); 872 } 873 } 874 } 875 876 if (USES_PRIMARY(bond->params.mode)) 877 bond_hw_addr_swap(bond, new_active, old_active); 878 879 if (bond_is_lb(bond)) { 880 bond_alb_handle_active_change(bond, new_active); 881 if (old_active) 882 bond_set_slave_inactive_flags(old_active); 883 if (new_active) 884 bond_set_slave_active_flags(new_active); 885 } else { 886 rcu_assign_pointer(bond->curr_active_slave, new_active); 887 } 888 889 if (bond->params.mode == BOND_MODE_ACTIVEBACKUP) { 890 if (old_active) 891 bond_set_slave_inactive_flags(old_active); 892 893 if (new_active) { 894 bool should_notify_peers = false; 895 896 bond_set_slave_active_flags(new_active); 897 898 if (bond->params.fail_over_mac) 899 bond_do_fail_over_mac(bond, new_active, 900 old_active); 901 902 if (netif_running(bond->dev)) { 903 bond->send_peer_notif = 904 bond->params.num_peer_notif; 905 should_notify_peers = 906 bond_should_notify_peers(bond); 907 } 908 909 write_unlock_bh(&bond->curr_slave_lock); 910 911 call_netdevice_notifiers(NETDEV_BONDING_FAILOVER, bond->dev); 912 if (should_notify_peers) 913 call_netdevice_notifiers(NETDEV_NOTIFY_PEERS, 914 bond->dev); 915 916 write_lock_bh(&bond->curr_slave_lock); 917 } 918 } 919 920 /* resend IGMP joins since active slave has changed or 921 * all were sent on curr_active_slave. 922 * resend only if bond is brought up with the affected 923 * bonding modes and the retransmission is enabled */ 924 if (netif_running(bond->dev) && (bond->params.resend_igmp > 0) && 925 ((USES_PRIMARY(bond->params.mode) && new_active) || 926 bond->params.mode == BOND_MODE_ROUNDROBIN)) { 927 bond->igmp_retrans = bond->params.resend_igmp; 928 queue_delayed_work(bond->wq, &bond->mcast_work, 1); 929 } 930 } 931 932 /** 933 * bond_select_active_slave - select a new active slave, if needed 934 * @bond: our bonding struct 935 * 936 * This functions should be called when one of the following occurs: 937 * - The old curr_active_slave has been released or lost its link. 938 * - The primary_slave has got its link back. 939 * - A slave has got its link back and there's no old curr_active_slave. 940 * 941 * Caller must hold curr_slave_lock for write_bh. 942 */ 943 void bond_select_active_slave(struct bonding *bond) 944 { 945 struct slave *best_slave; 946 int rv; 947 948 best_slave = bond_find_best_slave(bond); 949 if (best_slave != bond->curr_active_slave) { 950 bond_change_active_slave(bond, best_slave); 951 rv = bond_set_carrier(bond); 952 if (!rv) 953 return; 954 955 if (netif_carrier_ok(bond->dev)) { 956 pr_info("%s: first active interface up!\n", 957 bond->dev->name); 958 } else { 959 pr_info("%s: now running without any active interface !\n", 960 bond->dev->name); 961 } 962 } 963 } 964 965 #ifdef CONFIG_NET_POLL_CONTROLLER 966 static inline int slave_enable_netpoll(struct slave *slave) 967 { 968 struct netpoll *np; 969 int err = 0; 970 971 np = kzalloc(sizeof(*np), GFP_ATOMIC); 972 err = -ENOMEM; 973 if (!np) 974 goto out; 975 976 err = __netpoll_setup(np, slave->dev, GFP_ATOMIC); 977 if (err) { 978 kfree(np); 979 goto out; 980 } 981 slave->np = np; 982 out: 983 return err; 984 } 985 static inline void slave_disable_netpoll(struct slave *slave) 986 { 987 struct netpoll *np = slave->np; 988 989 if (!np) 990 return; 991 992 slave->np = NULL; 993 __netpoll_free_async(np); 994 } 995 static inline bool slave_dev_support_netpoll(struct net_device *slave_dev) 996 { 997 if (slave_dev->priv_flags & IFF_DISABLE_NETPOLL) 998 return false; 999 if (!slave_dev->netdev_ops->ndo_poll_controller) 1000 return false; 1001 return true; 1002 } 1003 1004 static void bond_poll_controller(struct net_device *bond_dev) 1005 { 1006 } 1007 1008 static void bond_netpoll_cleanup(struct net_device *bond_dev) 1009 { 1010 struct bonding *bond = netdev_priv(bond_dev); 1011 struct list_head *iter; 1012 struct slave *slave; 1013 1014 bond_for_each_slave(bond, slave, iter) 1015 if (IS_UP(slave->dev)) 1016 slave_disable_netpoll(slave); 1017 } 1018 1019 static int bond_netpoll_setup(struct net_device *dev, struct netpoll_info *ni, gfp_t gfp) 1020 { 1021 struct bonding *bond = netdev_priv(dev); 1022 struct list_head *iter; 1023 struct slave *slave; 1024 int err = 0; 1025 1026 bond_for_each_slave(bond, slave, iter) { 1027 err = slave_enable_netpoll(slave); 1028 if (err) { 1029 bond_netpoll_cleanup(dev); 1030 break; 1031 } 1032 } 1033 return err; 1034 } 1035 #else 1036 static inline int slave_enable_netpoll(struct slave *slave) 1037 { 1038 return 0; 1039 } 1040 static inline void slave_disable_netpoll(struct slave *slave) 1041 { 1042 } 1043 static void bond_netpoll_cleanup(struct net_device *bond_dev) 1044 { 1045 } 1046 #endif 1047 1048 /*---------------------------------- IOCTL ----------------------------------*/ 1049 1050 static netdev_features_t bond_fix_features(struct net_device *dev, 1051 netdev_features_t features) 1052 { 1053 struct bonding *bond = netdev_priv(dev); 1054 struct list_head *iter; 1055 netdev_features_t mask; 1056 struct slave *slave; 1057 1058 if (!bond_has_slaves(bond)) { 1059 /* Disable adding VLANs to empty bond. But why? --mq */ 1060 features |= NETIF_F_VLAN_CHALLENGED; 1061 return features; 1062 } 1063 1064 mask = features; 1065 features &= ~NETIF_F_ONE_FOR_ALL; 1066 features |= NETIF_F_ALL_FOR_ALL; 1067 1068 bond_for_each_slave(bond, slave, iter) { 1069 features = netdev_increment_features(features, 1070 slave->dev->features, 1071 mask); 1072 } 1073 features = netdev_add_tso_features(features, mask); 1074 1075 return features; 1076 } 1077 1078 #define BOND_VLAN_FEATURES (NETIF_F_ALL_CSUM | NETIF_F_SG | \ 1079 NETIF_F_FRAGLIST | NETIF_F_ALL_TSO | \ 1080 NETIF_F_HIGHDMA | NETIF_F_LRO) 1081 1082 static void bond_compute_features(struct bonding *bond) 1083 { 1084 unsigned int flags, dst_release_flag = IFF_XMIT_DST_RELEASE; 1085 netdev_features_t vlan_features = BOND_VLAN_FEATURES; 1086 struct net_device *bond_dev = bond->dev; 1087 struct list_head *iter; 1088 struct slave *slave; 1089 unsigned short max_hard_header_len = ETH_HLEN; 1090 unsigned int gso_max_size = GSO_MAX_SIZE; 1091 u16 gso_max_segs = GSO_MAX_SEGS; 1092 1093 if (!bond_has_slaves(bond)) 1094 goto done; 1095 1096 bond_for_each_slave(bond, slave, iter) { 1097 vlan_features = netdev_increment_features(vlan_features, 1098 slave->dev->vlan_features, BOND_VLAN_FEATURES); 1099 1100 dst_release_flag &= slave->dev->priv_flags; 1101 if (slave->dev->hard_header_len > max_hard_header_len) 1102 max_hard_header_len = slave->dev->hard_header_len; 1103 1104 gso_max_size = min(gso_max_size, slave->dev->gso_max_size); 1105 gso_max_segs = min(gso_max_segs, slave->dev->gso_max_segs); 1106 } 1107 1108 done: 1109 bond_dev->vlan_features = vlan_features; 1110 bond_dev->hard_header_len = max_hard_header_len; 1111 bond_dev->gso_max_segs = gso_max_segs; 1112 netif_set_gso_max_size(bond_dev, gso_max_size); 1113 1114 flags = bond_dev->priv_flags & ~IFF_XMIT_DST_RELEASE; 1115 bond_dev->priv_flags = flags | dst_release_flag; 1116 1117 netdev_change_features(bond_dev); 1118 } 1119 1120 static void bond_setup_by_slave(struct net_device *bond_dev, 1121 struct net_device *slave_dev) 1122 { 1123 bond_dev->header_ops = slave_dev->header_ops; 1124 1125 bond_dev->type = slave_dev->type; 1126 bond_dev->hard_header_len = slave_dev->hard_header_len; 1127 bond_dev->addr_len = slave_dev->addr_len; 1128 1129 memcpy(bond_dev->broadcast, slave_dev->broadcast, 1130 slave_dev->addr_len); 1131 } 1132 1133 /* On bonding slaves other than the currently active slave, suppress 1134 * duplicates except for alb non-mcast/bcast. 1135 */ 1136 static bool bond_should_deliver_exact_match(struct sk_buff *skb, 1137 struct slave *slave, 1138 struct bonding *bond) 1139 { 1140 if (bond_is_slave_inactive(slave)) { 1141 if (bond->params.mode == BOND_MODE_ALB && 1142 skb->pkt_type != PACKET_BROADCAST && 1143 skb->pkt_type != PACKET_MULTICAST) 1144 return false; 1145 return true; 1146 } 1147 return false; 1148 } 1149 1150 static rx_handler_result_t bond_handle_frame(struct sk_buff **pskb) 1151 { 1152 struct sk_buff *skb = *pskb; 1153 struct slave *slave; 1154 struct bonding *bond; 1155 int (*recv_probe)(const struct sk_buff *, struct bonding *, 1156 struct slave *); 1157 int ret = RX_HANDLER_ANOTHER; 1158 1159 skb = skb_share_check(skb, GFP_ATOMIC); 1160 if (unlikely(!skb)) 1161 return RX_HANDLER_CONSUMED; 1162 1163 *pskb = skb; 1164 1165 slave = bond_slave_get_rcu(skb->dev); 1166 bond = slave->bond; 1167 1168 if (bond->params.arp_interval) 1169 slave->dev->last_rx = jiffies; 1170 1171 recv_probe = ACCESS_ONCE(bond->recv_probe); 1172 if (recv_probe) { 1173 ret = recv_probe(skb, bond, slave); 1174 if (ret == RX_HANDLER_CONSUMED) { 1175 consume_skb(skb); 1176 return ret; 1177 } 1178 } 1179 1180 if (bond_should_deliver_exact_match(skb, slave, bond)) { 1181 return RX_HANDLER_EXACT; 1182 } 1183 1184 skb->dev = bond->dev; 1185 1186 if (bond->params.mode == BOND_MODE_ALB && 1187 bond->dev->priv_flags & IFF_BRIDGE_PORT && 1188 skb->pkt_type == PACKET_HOST) { 1189 1190 if (unlikely(skb_cow_head(skb, 1191 skb->data - skb_mac_header(skb)))) { 1192 kfree_skb(skb); 1193 return RX_HANDLER_CONSUMED; 1194 } 1195 memcpy(eth_hdr(skb)->h_dest, bond->dev->dev_addr, ETH_ALEN); 1196 } 1197 1198 return ret; 1199 } 1200 1201 static int bond_master_upper_dev_link(struct net_device *bond_dev, 1202 struct net_device *slave_dev, 1203 struct slave *slave) 1204 { 1205 int err; 1206 1207 err = netdev_master_upper_dev_link_private(slave_dev, bond_dev, slave); 1208 if (err) 1209 return err; 1210 slave_dev->flags |= IFF_SLAVE; 1211 rtmsg_ifinfo(RTM_NEWLINK, slave_dev, IFF_SLAVE, GFP_KERNEL); 1212 return 0; 1213 } 1214 1215 static void bond_upper_dev_unlink(struct net_device *bond_dev, 1216 struct net_device *slave_dev) 1217 { 1218 netdev_upper_dev_unlink(slave_dev, bond_dev); 1219 slave_dev->flags &= ~IFF_SLAVE; 1220 rtmsg_ifinfo(RTM_NEWLINK, slave_dev, IFF_SLAVE, GFP_KERNEL); 1221 } 1222 1223 /* enslave device <slave> to bond device <master> */ 1224 int bond_enslave(struct net_device *bond_dev, struct net_device *slave_dev) 1225 { 1226 struct bonding *bond = netdev_priv(bond_dev); 1227 const struct net_device_ops *slave_ops = slave_dev->netdev_ops; 1228 struct slave *new_slave = NULL, *prev_slave; 1229 struct sockaddr addr; 1230 int link_reporting; 1231 int res = 0, i; 1232 1233 if (!bond->params.use_carrier && 1234 slave_dev->ethtool_ops->get_link == NULL && 1235 slave_ops->ndo_do_ioctl == NULL) { 1236 pr_warning("%s: Warning: no link monitoring support for %s\n", 1237 bond_dev->name, slave_dev->name); 1238 } 1239 1240 /* already enslaved */ 1241 if (slave_dev->flags & IFF_SLAVE) { 1242 pr_debug("Error, Device was already enslaved\n"); 1243 return -EBUSY; 1244 } 1245 1246 /* vlan challenged mutual exclusion */ 1247 /* no need to lock since we're protected by rtnl_lock */ 1248 if (slave_dev->features & NETIF_F_VLAN_CHALLENGED) { 1249 pr_debug("%s: NETIF_F_VLAN_CHALLENGED\n", slave_dev->name); 1250 if (vlan_uses_dev(bond_dev)) { 1251 pr_err("%s: Error: cannot enslave VLAN challenged slave %s on VLAN enabled bond %s\n", 1252 bond_dev->name, slave_dev->name, bond_dev->name); 1253 return -EPERM; 1254 } else { 1255 pr_warning("%s: Warning: enslaved VLAN challenged slave %s. Adding VLANs will be blocked as long as %s is part of bond %s\n", 1256 bond_dev->name, slave_dev->name, 1257 slave_dev->name, bond_dev->name); 1258 } 1259 } else { 1260 pr_debug("%s: ! NETIF_F_VLAN_CHALLENGED\n", slave_dev->name); 1261 } 1262 1263 /* 1264 * Old ifenslave binaries are no longer supported. These can 1265 * be identified with moderate accuracy by the state of the slave: 1266 * the current ifenslave will set the interface down prior to 1267 * enslaving it; the old ifenslave will not. 1268 */ 1269 if ((slave_dev->flags & IFF_UP)) { 1270 pr_err("%s is up. This may be due to an out of date ifenslave.\n", 1271 slave_dev->name); 1272 res = -EPERM; 1273 goto err_undo_flags; 1274 } 1275 1276 /* set bonding device ether type by slave - bonding netdevices are 1277 * created with ether_setup, so when the slave type is not ARPHRD_ETHER 1278 * there is a need to override some of the type dependent attribs/funcs. 1279 * 1280 * bond ether type mutual exclusion - don't allow slaves of dissimilar 1281 * ether type (eg ARPHRD_ETHER and ARPHRD_INFINIBAND) share the same bond 1282 */ 1283 if (!bond_has_slaves(bond)) { 1284 if (bond_dev->type != slave_dev->type) { 1285 pr_debug("%s: change device type from %d to %d\n", 1286 bond_dev->name, 1287 bond_dev->type, slave_dev->type); 1288 1289 res = call_netdevice_notifiers(NETDEV_PRE_TYPE_CHANGE, 1290 bond_dev); 1291 res = notifier_to_errno(res); 1292 if (res) { 1293 pr_err("%s: refused to change device type\n", 1294 bond_dev->name); 1295 res = -EBUSY; 1296 goto err_undo_flags; 1297 } 1298 1299 /* Flush unicast and multicast addresses */ 1300 dev_uc_flush(bond_dev); 1301 dev_mc_flush(bond_dev); 1302 1303 if (slave_dev->type != ARPHRD_ETHER) 1304 bond_setup_by_slave(bond_dev, slave_dev); 1305 else { 1306 ether_setup(bond_dev); 1307 bond_dev->priv_flags &= ~IFF_TX_SKB_SHARING; 1308 } 1309 1310 call_netdevice_notifiers(NETDEV_POST_TYPE_CHANGE, 1311 bond_dev); 1312 } 1313 } else if (bond_dev->type != slave_dev->type) { 1314 pr_err("%s ether type (%d) is different from other slaves (%d), can not enslave it.\n", 1315 slave_dev->name, 1316 slave_dev->type, bond_dev->type); 1317 res = -EINVAL; 1318 goto err_undo_flags; 1319 } 1320 1321 if (slave_ops->ndo_set_mac_address == NULL) { 1322 if (!bond_has_slaves(bond)) { 1323 pr_warning("%s: Warning: The first slave device specified does not support setting the MAC address. Setting fail_over_mac to active.", 1324 bond_dev->name); 1325 bond->params.fail_over_mac = BOND_FOM_ACTIVE; 1326 } else if (bond->params.fail_over_mac != BOND_FOM_ACTIVE) { 1327 pr_err("%s: Error: The slave device specified does not support setting the MAC address, but fail_over_mac is not set to active.\n", 1328 bond_dev->name); 1329 res = -EOPNOTSUPP; 1330 goto err_undo_flags; 1331 } 1332 } 1333 1334 call_netdevice_notifiers(NETDEV_JOIN, slave_dev); 1335 1336 /* If this is the first slave, then we need to set the master's hardware 1337 * address to be the same as the slave's. */ 1338 if (!bond_has_slaves(bond) && 1339 bond->dev->addr_assign_type == NET_ADDR_RANDOM) 1340 bond_set_dev_addr(bond->dev, slave_dev); 1341 1342 new_slave = kzalloc(sizeof(struct slave), GFP_KERNEL); 1343 if (!new_slave) { 1344 res = -ENOMEM; 1345 goto err_undo_flags; 1346 } 1347 /* 1348 * Set the new_slave's queue_id to be zero. Queue ID mapping 1349 * is set via sysfs or module option if desired. 1350 */ 1351 new_slave->queue_id = 0; 1352 1353 /* Save slave's original mtu and then set it to match the bond */ 1354 new_slave->original_mtu = slave_dev->mtu; 1355 res = dev_set_mtu(slave_dev, bond->dev->mtu); 1356 if (res) { 1357 pr_debug("Error %d calling dev_set_mtu\n", res); 1358 goto err_free; 1359 } 1360 1361 /* 1362 * Save slave's original ("permanent") mac address for modes 1363 * that need it, and for restoring it upon release, and then 1364 * set it to the master's address 1365 */ 1366 memcpy(new_slave->perm_hwaddr, slave_dev->dev_addr, ETH_ALEN); 1367 1368 if (!bond->params.fail_over_mac) { 1369 /* 1370 * Set slave to master's mac address. The application already 1371 * set the master's mac address to that of the first slave 1372 */ 1373 memcpy(addr.sa_data, bond_dev->dev_addr, bond_dev->addr_len); 1374 addr.sa_family = slave_dev->type; 1375 res = dev_set_mac_address(slave_dev, &addr); 1376 if (res) { 1377 pr_debug("Error %d calling set_mac_address\n", res); 1378 goto err_restore_mtu; 1379 } 1380 } 1381 1382 /* open the slave since the application closed it */ 1383 res = dev_open(slave_dev); 1384 if (res) { 1385 pr_debug("Opening slave %s failed\n", slave_dev->name); 1386 goto err_restore_mac; 1387 } 1388 1389 new_slave->bond = bond; 1390 new_slave->dev = slave_dev; 1391 slave_dev->priv_flags |= IFF_BONDING; 1392 1393 if (bond_is_lb(bond)) { 1394 /* bond_alb_init_slave() must be called before all other stages since 1395 * it might fail and we do not want to have to undo everything 1396 */ 1397 res = bond_alb_init_slave(bond, new_slave); 1398 if (res) 1399 goto err_close; 1400 } 1401 1402 /* If the mode USES_PRIMARY, then the following is handled by 1403 * bond_change_active_slave(). 1404 */ 1405 if (!USES_PRIMARY(bond->params.mode)) { 1406 /* set promiscuity level to new slave */ 1407 if (bond_dev->flags & IFF_PROMISC) { 1408 res = dev_set_promiscuity(slave_dev, 1); 1409 if (res) 1410 goto err_close; 1411 } 1412 1413 /* set allmulti level to new slave */ 1414 if (bond_dev->flags & IFF_ALLMULTI) { 1415 res = dev_set_allmulti(slave_dev, 1); 1416 if (res) 1417 goto err_close; 1418 } 1419 1420 netif_addr_lock_bh(bond_dev); 1421 1422 dev_mc_sync_multiple(slave_dev, bond_dev); 1423 dev_uc_sync_multiple(slave_dev, bond_dev); 1424 1425 netif_addr_unlock_bh(bond_dev); 1426 } 1427 1428 if (bond->params.mode == BOND_MODE_8023AD) { 1429 /* add lacpdu mc addr to mc list */ 1430 u8 lacpdu_multicast[ETH_ALEN] = MULTICAST_LACPDU_ADDR; 1431 1432 dev_mc_add(slave_dev, lacpdu_multicast); 1433 } 1434 1435 res = vlan_vids_add_by_dev(slave_dev, bond_dev); 1436 if (res) { 1437 pr_err("%s: Error: Couldn't add bond vlan ids to %s\n", 1438 bond_dev->name, slave_dev->name); 1439 goto err_close; 1440 } 1441 1442 prev_slave = bond_last_slave(bond); 1443 1444 new_slave->delay = 0; 1445 new_slave->link_failure_count = 0; 1446 1447 bond_update_speed_duplex(new_slave); 1448 1449 new_slave->last_arp_rx = jiffies - 1450 (msecs_to_jiffies(bond->params.arp_interval) + 1); 1451 for (i = 0; i < BOND_MAX_ARP_TARGETS; i++) 1452 new_slave->target_last_arp_rx[i] = new_slave->last_arp_rx; 1453 1454 if (bond->params.miimon && !bond->params.use_carrier) { 1455 link_reporting = bond_check_dev_link(bond, slave_dev, 1); 1456 1457 if ((link_reporting == -1) && !bond->params.arp_interval) { 1458 /* 1459 * miimon is set but a bonded network driver 1460 * does not support ETHTOOL/MII and 1461 * arp_interval is not set. Note: if 1462 * use_carrier is enabled, we will never go 1463 * here (because netif_carrier is always 1464 * supported); thus, we don't need to change 1465 * the messages for netif_carrier. 1466 */ 1467 pr_warning("%s: Warning: MII and ETHTOOL support not available for interface %s, and arp_interval/arp_ip_target module parameters not specified, thus bonding will not detect link failures! see bonding.txt for details.\n", 1468 bond_dev->name, slave_dev->name); 1469 } else if (link_reporting == -1) { 1470 /* unable get link status using mii/ethtool */ 1471 pr_warning("%s: Warning: can't get link status from interface %s; the network driver associated with this interface does not support MII or ETHTOOL link status reporting, thus miimon has no effect on this interface.\n", 1472 bond_dev->name, slave_dev->name); 1473 } 1474 } 1475 1476 /* check for initial state */ 1477 if (bond->params.miimon) { 1478 if (bond_check_dev_link(bond, slave_dev, 0) == BMSR_LSTATUS) { 1479 if (bond->params.updelay) { 1480 new_slave->link = BOND_LINK_BACK; 1481 new_slave->delay = bond->params.updelay; 1482 } else { 1483 new_slave->link = BOND_LINK_UP; 1484 } 1485 } else { 1486 new_slave->link = BOND_LINK_DOWN; 1487 } 1488 } else if (bond->params.arp_interval) { 1489 new_slave->link = (netif_carrier_ok(slave_dev) ? 1490 BOND_LINK_UP : BOND_LINK_DOWN); 1491 } else { 1492 new_slave->link = BOND_LINK_UP; 1493 } 1494 1495 if (new_slave->link != BOND_LINK_DOWN) 1496 new_slave->jiffies = jiffies; 1497 pr_debug("Initial state of slave_dev is BOND_LINK_%s\n", 1498 new_slave->link == BOND_LINK_DOWN ? "DOWN" : 1499 (new_slave->link == BOND_LINK_UP ? "UP" : "BACK")); 1500 1501 if (USES_PRIMARY(bond->params.mode) && bond->params.primary[0]) { 1502 /* if there is a primary slave, remember it */ 1503 if (strcmp(bond->params.primary, new_slave->dev->name) == 0) { 1504 bond->primary_slave = new_slave; 1505 bond->force_primary = true; 1506 } 1507 } 1508 1509 switch (bond->params.mode) { 1510 case BOND_MODE_ACTIVEBACKUP: 1511 bond_set_slave_inactive_flags(new_slave); 1512 break; 1513 case BOND_MODE_8023AD: 1514 /* in 802.3ad mode, the internal mechanism 1515 * will activate the slaves in the selected 1516 * aggregator 1517 */ 1518 bond_set_slave_inactive_flags(new_slave); 1519 /* if this is the first slave */ 1520 if (!prev_slave) { 1521 SLAVE_AD_INFO(new_slave).id = 1; 1522 /* Initialize AD with the number of times that the AD timer is called in 1 second 1523 * can be called only after the mac address of the bond is set 1524 */ 1525 bond_3ad_initialize(bond, 1000/AD_TIMER_INTERVAL); 1526 } else { 1527 SLAVE_AD_INFO(new_slave).id = 1528 SLAVE_AD_INFO(prev_slave).id + 1; 1529 } 1530 1531 bond_3ad_bind_slave(new_slave); 1532 break; 1533 case BOND_MODE_TLB: 1534 case BOND_MODE_ALB: 1535 bond_set_active_slave(new_slave); 1536 bond_set_slave_inactive_flags(new_slave); 1537 break; 1538 default: 1539 pr_debug("This slave is always active in trunk mode\n"); 1540 1541 /* always active in trunk mode */ 1542 bond_set_active_slave(new_slave); 1543 1544 /* In trunking mode there is little meaning to curr_active_slave 1545 * anyway (it holds no special properties of the bond device), 1546 * so we can change it without calling change_active_interface() 1547 */ 1548 if (!bond->curr_active_slave && new_slave->link == BOND_LINK_UP) 1549 rcu_assign_pointer(bond->curr_active_slave, new_slave); 1550 1551 break; 1552 } /* switch(bond_mode) */ 1553 1554 #ifdef CONFIG_NET_POLL_CONTROLLER 1555 slave_dev->npinfo = bond->dev->npinfo; 1556 if (slave_dev->npinfo) { 1557 if (slave_enable_netpoll(new_slave)) { 1558 read_unlock(&bond->lock); 1559 pr_info("Error, %s: master_dev is using netpoll, " 1560 "but new slave device does not support netpoll.\n", 1561 bond_dev->name); 1562 res = -EBUSY; 1563 goto err_detach; 1564 } 1565 } 1566 #endif 1567 1568 res = netdev_rx_handler_register(slave_dev, bond_handle_frame, 1569 new_slave); 1570 if (res) { 1571 pr_debug("Error %d calling netdev_rx_handler_register\n", res); 1572 goto err_detach; 1573 } 1574 1575 res = bond_master_upper_dev_link(bond_dev, slave_dev, new_slave); 1576 if (res) { 1577 pr_debug("Error %d calling bond_master_upper_dev_link\n", res); 1578 goto err_unregister; 1579 } 1580 1581 bond->slave_cnt++; 1582 bond_compute_features(bond); 1583 bond_set_carrier(bond); 1584 1585 if (USES_PRIMARY(bond->params.mode)) { 1586 write_lock_bh(&bond->curr_slave_lock); 1587 bond_select_active_slave(bond); 1588 write_unlock_bh(&bond->curr_slave_lock); 1589 } 1590 1591 pr_info("%s: enslaving %s as a%s interface with a%s link.\n", 1592 bond_dev->name, slave_dev->name, 1593 bond_is_active_slave(new_slave) ? "n active" : " backup", 1594 new_slave->link != BOND_LINK_DOWN ? "n up" : " down"); 1595 1596 /* enslave is successful */ 1597 return 0; 1598 1599 /* Undo stages on error */ 1600 err_unregister: 1601 netdev_rx_handler_unregister(slave_dev); 1602 1603 err_detach: 1604 if (!USES_PRIMARY(bond->params.mode)) 1605 bond_hw_addr_flush(bond_dev, slave_dev); 1606 1607 vlan_vids_del_by_dev(slave_dev, bond_dev); 1608 if (bond->primary_slave == new_slave) 1609 bond->primary_slave = NULL; 1610 if (bond->curr_active_slave == new_slave) { 1611 write_lock_bh(&bond->curr_slave_lock); 1612 bond_change_active_slave(bond, NULL); 1613 bond_select_active_slave(bond); 1614 write_unlock_bh(&bond->curr_slave_lock); 1615 } 1616 slave_disable_netpoll(new_slave); 1617 1618 err_close: 1619 slave_dev->priv_flags &= ~IFF_BONDING; 1620 dev_close(slave_dev); 1621 1622 err_restore_mac: 1623 if (!bond->params.fail_over_mac) { 1624 /* XXX TODO - fom follow mode needs to change master's 1625 * MAC if this slave's MAC is in use by the bond, or at 1626 * least print a warning. 1627 */ 1628 memcpy(addr.sa_data, new_slave->perm_hwaddr, ETH_ALEN); 1629 addr.sa_family = slave_dev->type; 1630 dev_set_mac_address(slave_dev, &addr); 1631 } 1632 1633 err_restore_mtu: 1634 dev_set_mtu(slave_dev, new_slave->original_mtu); 1635 1636 err_free: 1637 kfree(new_slave); 1638 1639 err_undo_flags: 1640 /* Enslave of first slave has failed and we need to fix master's mac */ 1641 if (!bond_has_slaves(bond) && 1642 ether_addr_equal(bond_dev->dev_addr, slave_dev->dev_addr)) 1643 eth_hw_addr_random(bond_dev); 1644 1645 return res; 1646 } 1647 1648 /* 1649 * Try to release the slave device <slave> from the bond device <master> 1650 * It is legal to access curr_active_slave without a lock because all the function 1651 * is write-locked. If "all" is true it means that the function is being called 1652 * while destroying a bond interface and all slaves are being released. 1653 * 1654 * The rules for slave state should be: 1655 * for Active/Backup: 1656 * Active stays on all backups go down 1657 * for Bonded connections: 1658 * The first up interface should be left on and all others downed. 1659 */ 1660 static int __bond_release_one(struct net_device *bond_dev, 1661 struct net_device *slave_dev, 1662 bool all) 1663 { 1664 struct bonding *bond = netdev_priv(bond_dev); 1665 struct slave *slave, *oldcurrent; 1666 struct sockaddr addr; 1667 int old_flags = bond_dev->flags; 1668 netdev_features_t old_features = bond_dev->features; 1669 1670 /* slave is not a slave or master is not master of this slave */ 1671 if (!(slave_dev->flags & IFF_SLAVE) || 1672 !netdev_has_upper_dev(slave_dev, bond_dev)) { 1673 pr_err("%s: Error: cannot release %s.\n", 1674 bond_dev->name, slave_dev->name); 1675 return -EINVAL; 1676 } 1677 1678 block_netpoll_tx(); 1679 1680 slave = bond_get_slave_by_dev(bond, slave_dev); 1681 if (!slave) { 1682 /* not a slave of this bond */ 1683 pr_info("%s: %s not enslaved\n", 1684 bond_dev->name, slave_dev->name); 1685 unblock_netpoll_tx(); 1686 return -EINVAL; 1687 } 1688 1689 /* release the slave from its bond */ 1690 bond->slave_cnt--; 1691 1692 bond_upper_dev_unlink(bond_dev, slave_dev); 1693 /* unregister rx_handler early so bond_handle_frame wouldn't be called 1694 * for this slave anymore. 1695 */ 1696 netdev_rx_handler_unregister(slave_dev); 1697 write_lock_bh(&bond->lock); 1698 1699 /* Inform AD package of unbinding of slave. */ 1700 if (bond->params.mode == BOND_MODE_8023AD) 1701 bond_3ad_unbind_slave(slave); 1702 1703 write_unlock_bh(&bond->lock); 1704 1705 pr_info("%s: releasing %s interface %s\n", 1706 bond_dev->name, 1707 bond_is_active_slave(slave) ? "active" : "backup", 1708 slave_dev->name); 1709 1710 oldcurrent = bond->curr_active_slave; 1711 1712 bond->current_arp_slave = NULL; 1713 1714 if (!all && !bond->params.fail_over_mac) { 1715 if (ether_addr_equal(bond_dev->dev_addr, slave->perm_hwaddr) && 1716 bond_has_slaves(bond)) 1717 pr_warn("%s: Warning: the permanent HWaddr of %s - %pM - is still in use by %s. Set the HWaddr of %s to a different address to avoid conflicts.\n", 1718 bond_dev->name, slave_dev->name, 1719 slave->perm_hwaddr, 1720 bond_dev->name, slave_dev->name); 1721 } 1722 1723 if (bond->primary_slave == slave) 1724 bond->primary_slave = NULL; 1725 1726 if (oldcurrent == slave) { 1727 write_lock_bh(&bond->curr_slave_lock); 1728 bond_change_active_slave(bond, NULL); 1729 write_unlock_bh(&bond->curr_slave_lock); 1730 } 1731 1732 if (bond_is_lb(bond)) { 1733 /* Must be called only after the slave has been 1734 * detached from the list and the curr_active_slave 1735 * has been cleared (if our_slave == old_current), 1736 * but before a new active slave is selected. 1737 */ 1738 bond_alb_deinit_slave(bond, slave); 1739 } 1740 1741 if (all) { 1742 rcu_assign_pointer(bond->curr_active_slave, NULL); 1743 } else if (oldcurrent == slave) { 1744 /* 1745 * Note that we hold RTNL over this sequence, so there 1746 * is no concern that another slave add/remove event 1747 * will interfere. 1748 */ 1749 write_lock_bh(&bond->curr_slave_lock); 1750 1751 bond_select_active_slave(bond); 1752 1753 write_unlock_bh(&bond->curr_slave_lock); 1754 } 1755 1756 if (!bond_has_slaves(bond)) { 1757 bond_set_carrier(bond); 1758 eth_hw_addr_random(bond_dev); 1759 1760 if (vlan_uses_dev(bond_dev)) { 1761 pr_warning("%s: Warning: clearing HW address of %s while it still has VLANs.\n", 1762 bond_dev->name, bond_dev->name); 1763 pr_warning("%s: When re-adding slaves, make sure the bond's HW address matches its VLANs'.\n", 1764 bond_dev->name); 1765 } 1766 } 1767 1768 unblock_netpoll_tx(); 1769 synchronize_rcu(); 1770 1771 if (!bond_has_slaves(bond)) { 1772 call_netdevice_notifiers(NETDEV_CHANGEADDR, bond->dev); 1773 call_netdevice_notifiers(NETDEV_RELEASE, bond->dev); 1774 } 1775 1776 bond_compute_features(bond); 1777 if (!(bond_dev->features & NETIF_F_VLAN_CHALLENGED) && 1778 (old_features & NETIF_F_VLAN_CHALLENGED)) 1779 pr_info("%s: last VLAN challenged slave %s left bond %s. VLAN blocking is removed\n", 1780 bond_dev->name, slave_dev->name, bond_dev->name); 1781 1782 /* must do this from outside any spinlocks */ 1783 vlan_vids_del_by_dev(slave_dev, bond_dev); 1784 1785 /* If the mode USES_PRIMARY, then this cases was handled above by 1786 * bond_change_active_slave(..., NULL) 1787 */ 1788 if (!USES_PRIMARY(bond->params.mode)) { 1789 /* unset promiscuity level from slave 1790 * NOTE: The NETDEV_CHANGEADDR call above may change the value 1791 * of the IFF_PROMISC flag in the bond_dev, but we need the 1792 * value of that flag before that change, as that was the value 1793 * when this slave was attached, so we cache at the start of the 1794 * function and use it here. Same goes for ALLMULTI below 1795 */ 1796 if (old_flags & IFF_PROMISC) 1797 dev_set_promiscuity(slave_dev, -1); 1798 1799 /* unset allmulti level from slave */ 1800 if (old_flags & IFF_ALLMULTI) 1801 dev_set_allmulti(slave_dev, -1); 1802 1803 bond_hw_addr_flush(bond_dev, slave_dev); 1804 } 1805 1806 slave_disable_netpoll(slave); 1807 1808 /* close slave before restoring its mac address */ 1809 dev_close(slave_dev); 1810 1811 if (bond->params.fail_over_mac != BOND_FOM_ACTIVE) { 1812 /* restore original ("permanent") mac address */ 1813 memcpy(addr.sa_data, slave->perm_hwaddr, ETH_ALEN); 1814 addr.sa_family = slave_dev->type; 1815 dev_set_mac_address(slave_dev, &addr); 1816 } 1817 1818 dev_set_mtu(slave_dev, slave->original_mtu); 1819 1820 slave_dev->priv_flags &= ~IFF_BONDING; 1821 1822 kfree(slave); 1823 1824 return 0; /* deletion OK */ 1825 } 1826 1827 /* A wrapper used because of ndo_del_link */ 1828 int bond_release(struct net_device *bond_dev, struct net_device *slave_dev) 1829 { 1830 return __bond_release_one(bond_dev, slave_dev, false); 1831 } 1832 1833 /* 1834 * First release a slave and then destroy the bond if no more slaves are left. 1835 * Must be under rtnl_lock when this function is called. 1836 */ 1837 static int bond_release_and_destroy(struct net_device *bond_dev, 1838 struct net_device *slave_dev) 1839 { 1840 struct bonding *bond = netdev_priv(bond_dev); 1841 int ret; 1842 1843 ret = bond_release(bond_dev, slave_dev); 1844 if (ret == 0 && !bond_has_slaves(bond)) { 1845 bond_dev->priv_flags |= IFF_DISABLE_NETPOLL; 1846 pr_info("%s: destroying bond %s.\n", 1847 bond_dev->name, bond_dev->name); 1848 unregister_netdevice(bond_dev); 1849 } 1850 return ret; 1851 } 1852 1853 static int bond_info_query(struct net_device *bond_dev, struct ifbond *info) 1854 { 1855 struct bonding *bond = netdev_priv(bond_dev); 1856 1857 info->bond_mode = bond->params.mode; 1858 info->miimon = bond->params.miimon; 1859 1860 read_lock(&bond->lock); 1861 info->num_slaves = bond->slave_cnt; 1862 read_unlock(&bond->lock); 1863 1864 return 0; 1865 } 1866 1867 static int bond_slave_info_query(struct net_device *bond_dev, struct ifslave *info) 1868 { 1869 struct bonding *bond = netdev_priv(bond_dev); 1870 struct list_head *iter; 1871 int i = 0, res = -ENODEV; 1872 struct slave *slave; 1873 1874 read_lock(&bond->lock); 1875 bond_for_each_slave(bond, slave, iter) { 1876 if (i++ == (int)info->slave_id) { 1877 res = 0; 1878 strcpy(info->slave_name, slave->dev->name); 1879 info->link = slave->link; 1880 info->state = bond_slave_state(slave); 1881 info->link_failure_count = slave->link_failure_count; 1882 break; 1883 } 1884 } 1885 read_unlock(&bond->lock); 1886 1887 return res; 1888 } 1889 1890 /*-------------------------------- Monitoring -------------------------------*/ 1891 1892 1893 static int bond_miimon_inspect(struct bonding *bond) 1894 { 1895 int link_state, commit = 0; 1896 struct list_head *iter; 1897 struct slave *slave; 1898 bool ignore_updelay; 1899 1900 ignore_updelay = !bond->curr_active_slave ? true : false; 1901 1902 bond_for_each_slave_rcu(bond, slave, iter) { 1903 slave->new_link = BOND_LINK_NOCHANGE; 1904 1905 link_state = bond_check_dev_link(bond, slave->dev, 0); 1906 1907 switch (slave->link) { 1908 case BOND_LINK_UP: 1909 if (link_state) 1910 continue; 1911 1912 slave->link = BOND_LINK_FAIL; 1913 slave->delay = bond->params.downdelay; 1914 if (slave->delay) { 1915 pr_info("%s: link status down for %sinterface %s, disabling it in %d ms.\n", 1916 bond->dev->name, 1917 (bond->params.mode == 1918 BOND_MODE_ACTIVEBACKUP) ? 1919 (bond_is_active_slave(slave) ? 1920 "active " : "backup ") : "", 1921 slave->dev->name, 1922 bond->params.downdelay * bond->params.miimon); 1923 } 1924 /*FALLTHRU*/ 1925 case BOND_LINK_FAIL: 1926 if (link_state) { 1927 /* 1928 * recovered before downdelay expired 1929 */ 1930 slave->link = BOND_LINK_UP; 1931 slave->jiffies = jiffies; 1932 pr_info("%s: link status up again after %d ms for interface %s.\n", 1933 bond->dev->name, 1934 (bond->params.downdelay - slave->delay) * 1935 bond->params.miimon, 1936 slave->dev->name); 1937 continue; 1938 } 1939 1940 if (slave->delay <= 0) { 1941 slave->new_link = BOND_LINK_DOWN; 1942 commit++; 1943 continue; 1944 } 1945 1946 slave->delay--; 1947 break; 1948 1949 case BOND_LINK_DOWN: 1950 if (!link_state) 1951 continue; 1952 1953 slave->link = BOND_LINK_BACK; 1954 slave->delay = bond->params.updelay; 1955 1956 if (slave->delay) { 1957 pr_info("%s: link status up for interface %s, enabling it in %d ms.\n", 1958 bond->dev->name, slave->dev->name, 1959 ignore_updelay ? 0 : 1960 bond->params.updelay * 1961 bond->params.miimon); 1962 } 1963 /*FALLTHRU*/ 1964 case BOND_LINK_BACK: 1965 if (!link_state) { 1966 slave->link = BOND_LINK_DOWN; 1967 pr_info("%s: link status down again after %d ms for interface %s.\n", 1968 bond->dev->name, 1969 (bond->params.updelay - slave->delay) * 1970 bond->params.miimon, 1971 slave->dev->name); 1972 1973 continue; 1974 } 1975 1976 if (ignore_updelay) 1977 slave->delay = 0; 1978 1979 if (slave->delay <= 0) { 1980 slave->new_link = BOND_LINK_UP; 1981 commit++; 1982 ignore_updelay = false; 1983 continue; 1984 } 1985 1986 slave->delay--; 1987 break; 1988 } 1989 } 1990 1991 return commit; 1992 } 1993 1994 static void bond_miimon_commit(struct bonding *bond) 1995 { 1996 struct list_head *iter; 1997 struct slave *slave; 1998 1999 bond_for_each_slave(bond, slave, iter) { 2000 switch (slave->new_link) { 2001 case BOND_LINK_NOCHANGE: 2002 continue; 2003 2004 case BOND_LINK_UP: 2005 slave->link = BOND_LINK_UP; 2006 slave->jiffies = jiffies; 2007 2008 if (bond->params.mode == BOND_MODE_8023AD) { 2009 /* prevent it from being the active one */ 2010 bond_set_backup_slave(slave); 2011 } else if (bond->params.mode != BOND_MODE_ACTIVEBACKUP) { 2012 /* make it immediately active */ 2013 bond_set_active_slave(slave); 2014 } else if (slave != bond->primary_slave) { 2015 /* prevent it from being the active one */ 2016 bond_set_backup_slave(slave); 2017 } 2018 2019 pr_info("%s: link status definitely up for interface %s, %u Mbps %s duplex.\n", 2020 bond->dev->name, slave->dev->name, 2021 slave->speed == SPEED_UNKNOWN ? 0 : slave->speed, 2022 slave->duplex ? "full" : "half"); 2023 2024 /* notify ad that the link status has changed */ 2025 if (bond->params.mode == BOND_MODE_8023AD) 2026 bond_3ad_handle_link_change(slave, BOND_LINK_UP); 2027 2028 if (bond_is_lb(bond)) 2029 bond_alb_handle_link_change(bond, slave, 2030 BOND_LINK_UP); 2031 2032 if (!bond->curr_active_slave || 2033 (slave == bond->primary_slave)) 2034 goto do_failover; 2035 2036 continue; 2037 2038 case BOND_LINK_DOWN: 2039 if (slave->link_failure_count < UINT_MAX) 2040 slave->link_failure_count++; 2041 2042 slave->link = BOND_LINK_DOWN; 2043 2044 if (bond->params.mode == BOND_MODE_ACTIVEBACKUP || 2045 bond->params.mode == BOND_MODE_8023AD) 2046 bond_set_slave_inactive_flags(slave); 2047 2048 pr_info("%s: link status definitely down for interface %s, disabling it\n", 2049 bond->dev->name, slave->dev->name); 2050 2051 if (bond->params.mode == BOND_MODE_8023AD) 2052 bond_3ad_handle_link_change(slave, 2053 BOND_LINK_DOWN); 2054 2055 if (bond_is_lb(bond)) 2056 bond_alb_handle_link_change(bond, slave, 2057 BOND_LINK_DOWN); 2058 2059 if (slave == bond->curr_active_slave) 2060 goto do_failover; 2061 2062 continue; 2063 2064 default: 2065 pr_err("%s: invalid new link %d on slave %s\n", 2066 bond->dev->name, slave->new_link, 2067 slave->dev->name); 2068 slave->new_link = BOND_LINK_NOCHANGE; 2069 2070 continue; 2071 } 2072 2073 do_failover: 2074 ASSERT_RTNL(); 2075 block_netpoll_tx(); 2076 write_lock_bh(&bond->curr_slave_lock); 2077 bond_select_active_slave(bond); 2078 write_unlock_bh(&bond->curr_slave_lock); 2079 unblock_netpoll_tx(); 2080 } 2081 2082 bond_set_carrier(bond); 2083 } 2084 2085 /* 2086 * bond_mii_monitor 2087 * 2088 * Really a wrapper that splits the mii monitor into two phases: an 2089 * inspection, then (if inspection indicates something needs to be done) 2090 * an acquisition of appropriate locks followed by a commit phase to 2091 * implement whatever link state changes are indicated. 2092 */ 2093 void bond_mii_monitor(struct work_struct *work) 2094 { 2095 struct bonding *bond = container_of(work, struct bonding, 2096 mii_work.work); 2097 bool should_notify_peers = false; 2098 unsigned long delay; 2099 2100 delay = msecs_to_jiffies(bond->params.miimon); 2101 2102 if (!bond_has_slaves(bond)) 2103 goto re_arm; 2104 2105 rcu_read_lock(); 2106 2107 should_notify_peers = bond_should_notify_peers(bond); 2108 2109 if (bond_miimon_inspect(bond)) { 2110 rcu_read_unlock(); 2111 2112 /* Race avoidance with bond_close cancel of workqueue */ 2113 if (!rtnl_trylock()) { 2114 delay = 1; 2115 should_notify_peers = false; 2116 goto re_arm; 2117 } 2118 2119 bond_miimon_commit(bond); 2120 2121 rtnl_unlock(); /* might sleep, hold no other locks */ 2122 } else 2123 rcu_read_unlock(); 2124 2125 re_arm: 2126 if (bond->params.miimon) 2127 queue_delayed_work(bond->wq, &bond->mii_work, delay); 2128 2129 if (should_notify_peers) { 2130 if (!rtnl_trylock()) 2131 return; 2132 call_netdevice_notifiers(NETDEV_NOTIFY_PEERS, bond->dev); 2133 rtnl_unlock(); 2134 } 2135 } 2136 2137 static bool bond_has_this_ip(struct bonding *bond, __be32 ip) 2138 { 2139 struct net_device *upper; 2140 struct list_head *iter; 2141 bool ret = false; 2142 2143 if (ip == bond_confirm_addr(bond->dev, 0, ip)) 2144 return true; 2145 2146 rcu_read_lock(); 2147 netdev_for_each_all_upper_dev_rcu(bond->dev, upper, iter) { 2148 if (ip == bond_confirm_addr(upper, 0, ip)) { 2149 ret = true; 2150 break; 2151 } 2152 } 2153 rcu_read_unlock(); 2154 2155 return ret; 2156 } 2157 2158 /* 2159 * We go to the (large) trouble of VLAN tagging ARP frames because 2160 * switches in VLAN mode (especially if ports are configured as 2161 * "native" to a VLAN) might not pass non-tagged frames. 2162 */ 2163 static void bond_arp_send(struct net_device *slave_dev, int arp_op, __be32 dest_ip, __be32 src_ip, unsigned short vlan_id) 2164 { 2165 struct sk_buff *skb; 2166 2167 pr_debug("arp %d on slave %s: dst %pI4 src %pI4 vid %d\n", arp_op, 2168 slave_dev->name, &dest_ip, &src_ip, vlan_id); 2169 2170 skb = arp_create(arp_op, ETH_P_ARP, dest_ip, slave_dev, src_ip, 2171 NULL, slave_dev->dev_addr, NULL); 2172 2173 if (!skb) { 2174 pr_err("ARP packet allocation failed\n"); 2175 return; 2176 } 2177 if (vlan_id) { 2178 skb = vlan_put_tag(skb, htons(ETH_P_8021Q), vlan_id); 2179 if (!skb) { 2180 pr_err("failed to insert VLAN tag\n"); 2181 return; 2182 } 2183 } 2184 arp_xmit(skb); 2185 } 2186 2187 2188 static void bond_arp_send_all(struct bonding *bond, struct slave *slave) 2189 { 2190 struct net_device *upper, *vlan_upper; 2191 struct list_head *iter, *vlan_iter; 2192 struct rtable *rt; 2193 __be32 *targets = bond->params.arp_targets, addr; 2194 int i, vlan_id; 2195 2196 for (i = 0; i < BOND_MAX_ARP_TARGETS && targets[i]; i++) { 2197 pr_debug("basa: target %pI4\n", &targets[i]); 2198 2199 /* Find out through which dev should the packet go */ 2200 rt = ip_route_output(dev_net(bond->dev), targets[i], 0, 2201 RTO_ONLINK, 0); 2202 if (IS_ERR(rt)) { 2203 pr_debug("%s: no route to arp_ip_target %pI4\n", 2204 bond->dev->name, &targets[i]); 2205 continue; 2206 } 2207 2208 vlan_id = 0; 2209 2210 /* bond device itself */ 2211 if (rt->dst.dev == bond->dev) 2212 goto found; 2213 2214 rcu_read_lock(); 2215 /* first we search only for vlan devices. for every vlan 2216 * found we verify its upper dev list, searching for the 2217 * rt->dst.dev. If found we save the tag of the vlan and 2218 * proceed to send the packet. 2219 * 2220 * TODO: QinQ? 2221 */ 2222 netdev_for_each_all_upper_dev_rcu(bond->dev, vlan_upper, 2223 vlan_iter) { 2224 if (!is_vlan_dev(vlan_upper)) 2225 continue; 2226 netdev_for_each_all_upper_dev_rcu(vlan_upper, upper, 2227 iter) { 2228 if (upper == rt->dst.dev) { 2229 vlan_id = vlan_dev_vlan_id(vlan_upper); 2230 rcu_read_unlock(); 2231 goto found; 2232 } 2233 } 2234 } 2235 2236 /* if the device we're looking for is not on top of any of 2237 * our upper vlans, then just search for any dev that 2238 * matches, and in case it's a vlan - save the id 2239 */ 2240 netdev_for_each_all_upper_dev_rcu(bond->dev, upper, iter) { 2241 if (upper == rt->dst.dev) { 2242 /* if it's a vlan - get its VID */ 2243 if (is_vlan_dev(upper)) 2244 vlan_id = vlan_dev_vlan_id(upper); 2245 2246 rcu_read_unlock(); 2247 goto found; 2248 } 2249 } 2250 rcu_read_unlock(); 2251 2252 /* Not our device - skip */ 2253 pr_debug("%s: no path to arp_ip_target %pI4 via rt.dev %s\n", 2254 bond->dev->name, &targets[i], 2255 rt->dst.dev ? rt->dst.dev->name : "NULL"); 2256 2257 ip_rt_put(rt); 2258 continue; 2259 2260 found: 2261 addr = bond_confirm_addr(rt->dst.dev, targets[i], 0); 2262 ip_rt_put(rt); 2263 bond_arp_send(slave->dev, ARPOP_REQUEST, targets[i], 2264 addr, vlan_id); 2265 } 2266 } 2267 2268 static void bond_validate_arp(struct bonding *bond, struct slave *slave, __be32 sip, __be32 tip) 2269 { 2270 int i; 2271 2272 if (!sip || !bond_has_this_ip(bond, tip)) { 2273 pr_debug("bva: sip %pI4 tip %pI4 not found\n", &sip, &tip); 2274 return; 2275 } 2276 2277 i = bond_get_targets_ip(bond->params.arp_targets, sip); 2278 if (i == -1) { 2279 pr_debug("bva: sip %pI4 not found in targets\n", &sip); 2280 return; 2281 } 2282 slave->last_arp_rx = jiffies; 2283 slave->target_last_arp_rx[i] = jiffies; 2284 } 2285 2286 int bond_arp_rcv(const struct sk_buff *skb, struct bonding *bond, 2287 struct slave *slave) 2288 { 2289 struct arphdr *arp = (struct arphdr *)skb->data; 2290 unsigned char *arp_ptr; 2291 __be32 sip, tip; 2292 int alen; 2293 2294 if (skb->protocol != __cpu_to_be16(ETH_P_ARP)) 2295 return RX_HANDLER_ANOTHER; 2296 2297 read_lock(&bond->lock); 2298 2299 if (!slave_do_arp_validate(bond, slave)) 2300 goto out_unlock; 2301 2302 alen = arp_hdr_len(bond->dev); 2303 2304 pr_debug("bond_arp_rcv: bond %s skb->dev %s\n", 2305 bond->dev->name, skb->dev->name); 2306 2307 if (alen > skb_headlen(skb)) { 2308 arp = kmalloc(alen, GFP_ATOMIC); 2309 if (!arp) 2310 goto out_unlock; 2311 if (skb_copy_bits(skb, 0, arp, alen) < 0) 2312 goto out_unlock; 2313 } 2314 2315 if (arp->ar_hln != bond->dev->addr_len || 2316 skb->pkt_type == PACKET_OTHERHOST || 2317 skb->pkt_type == PACKET_LOOPBACK || 2318 arp->ar_hrd != htons(ARPHRD_ETHER) || 2319 arp->ar_pro != htons(ETH_P_IP) || 2320 arp->ar_pln != 4) 2321 goto out_unlock; 2322 2323 arp_ptr = (unsigned char *)(arp + 1); 2324 arp_ptr += bond->dev->addr_len; 2325 memcpy(&sip, arp_ptr, 4); 2326 arp_ptr += 4 + bond->dev->addr_len; 2327 memcpy(&tip, arp_ptr, 4); 2328 2329 pr_debug("bond_arp_rcv: %s %s/%d av %d sv %d sip %pI4 tip %pI4\n", 2330 bond->dev->name, slave->dev->name, bond_slave_state(slave), 2331 bond->params.arp_validate, slave_do_arp_validate(bond, slave), 2332 &sip, &tip); 2333 2334 /* 2335 * Backup slaves won't see the ARP reply, but do come through 2336 * here for each ARP probe (so we swap the sip/tip to validate 2337 * the probe). In a "redundant switch, common router" type of 2338 * configuration, the ARP probe will (hopefully) travel from 2339 * the active, through one switch, the router, then the other 2340 * switch before reaching the backup. 2341 * 2342 * We 'trust' the arp requests if there is an active slave and 2343 * it received valid arp reply(s) after it became active. This 2344 * is done to avoid endless looping when we can't reach the 2345 * arp_ip_target and fool ourselves with our own arp requests. 2346 */ 2347 if (bond_is_active_slave(slave)) 2348 bond_validate_arp(bond, slave, sip, tip); 2349 else if (bond->curr_active_slave && 2350 time_after(slave_last_rx(bond, bond->curr_active_slave), 2351 bond->curr_active_slave->jiffies)) 2352 bond_validate_arp(bond, slave, tip, sip); 2353 2354 out_unlock: 2355 read_unlock(&bond->lock); 2356 if (arp != (struct arphdr *)skb->data) 2357 kfree(arp); 2358 return RX_HANDLER_ANOTHER; 2359 } 2360 2361 /* function to verify if we're in the arp_interval timeslice, returns true if 2362 * (last_act - arp_interval) <= jiffies <= (last_act + mod * arp_interval + 2363 * arp_interval/2) . the arp_interval/2 is needed for really fast networks. 2364 */ 2365 static bool bond_time_in_interval(struct bonding *bond, unsigned long last_act, 2366 int mod) 2367 { 2368 int delta_in_ticks = msecs_to_jiffies(bond->params.arp_interval); 2369 2370 return time_in_range(jiffies, 2371 last_act - delta_in_ticks, 2372 last_act + mod * delta_in_ticks + delta_in_ticks/2); 2373 } 2374 2375 /* 2376 * this function is called regularly to monitor each slave's link 2377 * ensuring that traffic is being sent and received when arp monitoring 2378 * is used in load-balancing mode. if the adapter has been dormant, then an 2379 * arp is transmitted to generate traffic. see activebackup_arp_monitor for 2380 * arp monitoring in active backup mode. 2381 */ 2382 void bond_loadbalance_arp_mon(struct work_struct *work) 2383 { 2384 struct bonding *bond = container_of(work, struct bonding, 2385 arp_work.work); 2386 struct slave *slave, *oldcurrent; 2387 struct list_head *iter; 2388 int do_failover = 0; 2389 2390 if (!bond_has_slaves(bond)) 2391 goto re_arm; 2392 2393 rcu_read_lock(); 2394 2395 oldcurrent = ACCESS_ONCE(bond->curr_active_slave); 2396 /* see if any of the previous devices are up now (i.e. they have 2397 * xmt and rcv traffic). the curr_active_slave does not come into 2398 * the picture unless it is null. also, slave->jiffies is not needed 2399 * here because we send an arp on each slave and give a slave as 2400 * long as it needs to get the tx/rx within the delta. 2401 * TODO: what about up/down delay in arp mode? it wasn't here before 2402 * so it can wait 2403 */ 2404 bond_for_each_slave_rcu(bond, slave, iter) { 2405 unsigned long trans_start = dev_trans_start(slave->dev); 2406 2407 if (slave->link != BOND_LINK_UP) { 2408 if (bond_time_in_interval(bond, trans_start, 1) && 2409 bond_time_in_interval(bond, slave->dev->last_rx, 1)) { 2410 2411 slave->link = BOND_LINK_UP; 2412 bond_set_active_slave(slave); 2413 2414 /* primary_slave has no meaning in round-robin 2415 * mode. the window of a slave being up and 2416 * curr_active_slave being null after enslaving 2417 * is closed. 2418 */ 2419 if (!oldcurrent) { 2420 pr_info("%s: link status definitely up for interface %s, ", 2421 bond->dev->name, 2422 slave->dev->name); 2423 do_failover = 1; 2424 } else { 2425 pr_info("%s: interface %s is now up\n", 2426 bond->dev->name, 2427 slave->dev->name); 2428 } 2429 } 2430 } else { 2431 /* slave->link == BOND_LINK_UP */ 2432 2433 /* not all switches will respond to an arp request 2434 * when the source ip is 0, so don't take the link down 2435 * if we don't know our ip yet 2436 */ 2437 if (!bond_time_in_interval(bond, trans_start, 2) || 2438 !bond_time_in_interval(bond, slave->dev->last_rx, 2)) { 2439 2440 slave->link = BOND_LINK_DOWN; 2441 bond_set_backup_slave(slave); 2442 2443 if (slave->link_failure_count < UINT_MAX) 2444 slave->link_failure_count++; 2445 2446 pr_info("%s: interface %s is now down.\n", 2447 bond->dev->name, 2448 slave->dev->name); 2449 2450 if (slave == oldcurrent) 2451 do_failover = 1; 2452 } 2453 } 2454 2455 /* note: if switch is in round-robin mode, all links 2456 * must tx arp to ensure all links rx an arp - otherwise 2457 * links may oscillate or not come up at all; if switch is 2458 * in something like xor mode, there is nothing we can 2459 * do - all replies will be rx'ed on same link causing slaves 2460 * to be unstable during low/no traffic periods 2461 */ 2462 if (IS_UP(slave->dev)) 2463 bond_arp_send_all(bond, slave); 2464 } 2465 2466 rcu_read_unlock(); 2467 2468 if (do_failover) { 2469 /* the bond_select_active_slave must hold RTNL 2470 * and curr_slave_lock for write. 2471 */ 2472 if (!rtnl_trylock()) 2473 goto re_arm; 2474 block_netpoll_tx(); 2475 write_lock_bh(&bond->curr_slave_lock); 2476 2477 bond_select_active_slave(bond); 2478 2479 write_unlock_bh(&bond->curr_slave_lock); 2480 unblock_netpoll_tx(); 2481 rtnl_unlock(); 2482 } 2483 2484 re_arm: 2485 if (bond->params.arp_interval) 2486 queue_delayed_work(bond->wq, &bond->arp_work, 2487 msecs_to_jiffies(bond->params.arp_interval)); 2488 } 2489 2490 /* 2491 * Called to inspect slaves for active-backup mode ARP monitor link state 2492 * changes. Sets new_link in slaves to specify what action should take 2493 * place for the slave. Returns 0 if no changes are found, >0 if changes 2494 * to link states must be committed. 2495 * 2496 * Called with rcu_read_lock hold. 2497 */ 2498 static int bond_ab_arp_inspect(struct bonding *bond) 2499 { 2500 unsigned long trans_start, last_rx; 2501 struct list_head *iter; 2502 struct slave *slave; 2503 int commit = 0; 2504 2505 bond_for_each_slave_rcu(bond, slave, iter) { 2506 slave->new_link = BOND_LINK_NOCHANGE; 2507 last_rx = slave_last_rx(bond, slave); 2508 2509 if (slave->link != BOND_LINK_UP) { 2510 if (bond_time_in_interval(bond, last_rx, 1)) { 2511 slave->new_link = BOND_LINK_UP; 2512 commit++; 2513 } 2514 continue; 2515 } 2516 2517 /* 2518 * Give slaves 2*delta after being enslaved or made 2519 * active. This avoids bouncing, as the last receive 2520 * times need a full ARP monitor cycle to be updated. 2521 */ 2522 if (bond_time_in_interval(bond, slave->jiffies, 2)) 2523 continue; 2524 2525 /* 2526 * Backup slave is down if: 2527 * - No current_arp_slave AND 2528 * - more than 3*delta since last receive AND 2529 * - the bond has an IP address 2530 * 2531 * Note: a non-null current_arp_slave indicates 2532 * the curr_active_slave went down and we are 2533 * searching for a new one; under this condition 2534 * we only take the curr_active_slave down - this 2535 * gives each slave a chance to tx/rx traffic 2536 * before being taken out 2537 */ 2538 if (!bond_is_active_slave(slave) && 2539 !bond->current_arp_slave && 2540 !bond_time_in_interval(bond, last_rx, 3)) { 2541 slave->new_link = BOND_LINK_DOWN; 2542 commit++; 2543 } 2544 2545 /* 2546 * Active slave is down if: 2547 * - more than 2*delta since transmitting OR 2548 * - (more than 2*delta since receive AND 2549 * the bond has an IP address) 2550 */ 2551 trans_start = dev_trans_start(slave->dev); 2552 if (bond_is_active_slave(slave) && 2553 (!bond_time_in_interval(bond, trans_start, 2) || 2554 !bond_time_in_interval(bond, last_rx, 2))) { 2555 slave->new_link = BOND_LINK_DOWN; 2556 commit++; 2557 } 2558 } 2559 2560 return commit; 2561 } 2562 2563 /* 2564 * Called to commit link state changes noted by inspection step of 2565 * active-backup mode ARP monitor. 2566 * 2567 * Called with RTNL hold. 2568 */ 2569 static void bond_ab_arp_commit(struct bonding *bond) 2570 { 2571 unsigned long trans_start; 2572 struct list_head *iter; 2573 struct slave *slave; 2574 2575 bond_for_each_slave(bond, slave, iter) { 2576 switch (slave->new_link) { 2577 case BOND_LINK_NOCHANGE: 2578 continue; 2579 2580 case BOND_LINK_UP: 2581 trans_start = dev_trans_start(slave->dev); 2582 if (bond->curr_active_slave != slave || 2583 (!bond->curr_active_slave && 2584 bond_time_in_interval(bond, trans_start, 1))) { 2585 slave->link = BOND_LINK_UP; 2586 if (bond->current_arp_slave) { 2587 bond_set_slave_inactive_flags( 2588 bond->current_arp_slave); 2589 bond->current_arp_slave = NULL; 2590 } 2591 2592 pr_info("%s: link status definitely up for interface %s.\n", 2593 bond->dev->name, slave->dev->name); 2594 2595 if (!bond->curr_active_slave || 2596 (slave == bond->primary_slave)) 2597 goto do_failover; 2598 2599 } 2600 2601 continue; 2602 2603 case BOND_LINK_DOWN: 2604 if (slave->link_failure_count < UINT_MAX) 2605 slave->link_failure_count++; 2606 2607 slave->link = BOND_LINK_DOWN; 2608 bond_set_slave_inactive_flags(slave); 2609 2610 pr_info("%s: link status definitely down for interface %s, disabling it\n", 2611 bond->dev->name, slave->dev->name); 2612 2613 if (slave == bond->curr_active_slave) { 2614 bond->current_arp_slave = NULL; 2615 goto do_failover; 2616 } 2617 2618 continue; 2619 2620 default: 2621 pr_err("%s: impossible: new_link %d on slave %s\n", 2622 bond->dev->name, slave->new_link, 2623 slave->dev->name); 2624 continue; 2625 } 2626 2627 do_failover: 2628 ASSERT_RTNL(); 2629 block_netpoll_tx(); 2630 write_lock_bh(&bond->curr_slave_lock); 2631 bond_select_active_slave(bond); 2632 write_unlock_bh(&bond->curr_slave_lock); 2633 unblock_netpoll_tx(); 2634 } 2635 2636 bond_set_carrier(bond); 2637 } 2638 2639 /* 2640 * Send ARP probes for active-backup mode ARP monitor. 2641 * 2642 * Called with rcu_read_lock hold. 2643 */ 2644 static void bond_ab_arp_probe(struct bonding *bond) 2645 { 2646 struct slave *slave, *before = NULL, *new_slave = NULL, 2647 *curr_arp_slave = rcu_dereference(bond->current_arp_slave); 2648 struct list_head *iter; 2649 bool found = false; 2650 2651 read_lock(&bond->curr_slave_lock); 2652 2653 if (curr_arp_slave && bond->curr_active_slave) 2654 pr_info("PROBE: c_arp %s && cas %s BAD\n", 2655 curr_arp_slave->dev->name, 2656 bond->curr_active_slave->dev->name); 2657 2658 if (bond->curr_active_slave) { 2659 bond_arp_send_all(bond, bond->curr_active_slave); 2660 read_unlock(&bond->curr_slave_lock); 2661 return; 2662 } 2663 2664 read_unlock(&bond->curr_slave_lock); 2665 2666 /* if we don't have a curr_active_slave, search for the next available 2667 * backup slave from the current_arp_slave and make it the candidate 2668 * for becoming the curr_active_slave 2669 */ 2670 2671 if (!curr_arp_slave) { 2672 curr_arp_slave = bond_first_slave_rcu(bond); 2673 if (!curr_arp_slave) 2674 return; 2675 } 2676 2677 bond_set_slave_inactive_flags(curr_arp_slave); 2678 2679 bond_for_each_slave_rcu(bond, slave, iter) { 2680 if (!found && !before && IS_UP(slave->dev)) 2681 before = slave; 2682 2683 if (found && !new_slave && IS_UP(slave->dev)) 2684 new_slave = slave; 2685 /* if the link state is up at this point, we 2686 * mark it down - this can happen if we have 2687 * simultaneous link failures and 2688 * reselect_active_interface doesn't make this 2689 * one the current slave so it is still marked 2690 * up when it is actually down 2691 */ 2692 if (!IS_UP(slave->dev) && slave->link == BOND_LINK_UP) { 2693 slave->link = BOND_LINK_DOWN; 2694 if (slave->link_failure_count < UINT_MAX) 2695 slave->link_failure_count++; 2696 2697 bond_set_slave_inactive_flags(slave); 2698 2699 pr_info("%s: backup interface %s is now down.\n", 2700 bond->dev->name, slave->dev->name); 2701 } 2702 if (slave == curr_arp_slave) 2703 found = true; 2704 } 2705 2706 if (!new_slave && before) 2707 new_slave = before; 2708 2709 if (!new_slave) 2710 return; 2711 2712 new_slave->link = BOND_LINK_BACK; 2713 bond_set_slave_active_flags(new_slave); 2714 bond_arp_send_all(bond, new_slave); 2715 new_slave->jiffies = jiffies; 2716 rcu_assign_pointer(bond->current_arp_slave, new_slave); 2717 } 2718 2719 void bond_activebackup_arp_mon(struct work_struct *work) 2720 { 2721 struct bonding *bond = container_of(work, struct bonding, 2722 arp_work.work); 2723 bool should_notify_peers = false; 2724 int delta_in_ticks; 2725 2726 delta_in_ticks = msecs_to_jiffies(bond->params.arp_interval); 2727 2728 if (!bond_has_slaves(bond)) 2729 goto re_arm; 2730 2731 rcu_read_lock(); 2732 2733 should_notify_peers = bond_should_notify_peers(bond); 2734 2735 if (bond_ab_arp_inspect(bond)) { 2736 rcu_read_unlock(); 2737 2738 /* Race avoidance with bond_close flush of workqueue */ 2739 if (!rtnl_trylock()) { 2740 delta_in_ticks = 1; 2741 should_notify_peers = false; 2742 goto re_arm; 2743 } 2744 2745 bond_ab_arp_commit(bond); 2746 2747 rtnl_unlock(); 2748 rcu_read_lock(); 2749 } 2750 2751 bond_ab_arp_probe(bond); 2752 rcu_read_unlock(); 2753 2754 re_arm: 2755 if (bond->params.arp_interval) 2756 queue_delayed_work(bond->wq, &bond->arp_work, delta_in_ticks); 2757 2758 if (should_notify_peers) { 2759 if (!rtnl_trylock()) 2760 return; 2761 call_netdevice_notifiers(NETDEV_NOTIFY_PEERS, bond->dev); 2762 rtnl_unlock(); 2763 } 2764 } 2765 2766 /*-------------------------- netdev event handling --------------------------*/ 2767 2768 /* 2769 * Change device name 2770 */ 2771 static int bond_event_changename(struct bonding *bond) 2772 { 2773 bond_remove_proc_entry(bond); 2774 bond_create_proc_entry(bond); 2775 2776 bond_debug_reregister(bond); 2777 2778 return NOTIFY_DONE; 2779 } 2780 2781 static int bond_master_netdev_event(unsigned long event, 2782 struct net_device *bond_dev) 2783 { 2784 struct bonding *event_bond = netdev_priv(bond_dev); 2785 2786 switch (event) { 2787 case NETDEV_CHANGENAME: 2788 return bond_event_changename(event_bond); 2789 case NETDEV_UNREGISTER: 2790 bond_remove_proc_entry(event_bond); 2791 break; 2792 case NETDEV_REGISTER: 2793 bond_create_proc_entry(event_bond); 2794 break; 2795 case NETDEV_NOTIFY_PEERS: 2796 if (event_bond->send_peer_notif) 2797 event_bond->send_peer_notif--; 2798 break; 2799 default: 2800 break; 2801 } 2802 2803 return NOTIFY_DONE; 2804 } 2805 2806 static int bond_slave_netdev_event(unsigned long event, 2807 struct net_device *slave_dev) 2808 { 2809 struct slave *slave = bond_slave_get_rtnl(slave_dev); 2810 struct bonding *bond; 2811 struct net_device *bond_dev; 2812 u32 old_speed; 2813 u8 old_duplex; 2814 2815 /* A netdev event can be generated while enslaving a device 2816 * before netdev_rx_handler_register is called in which case 2817 * slave will be NULL 2818 */ 2819 if (!slave) 2820 return NOTIFY_DONE; 2821 bond_dev = slave->bond->dev; 2822 bond = slave->bond; 2823 2824 switch (event) { 2825 case NETDEV_UNREGISTER: 2826 if (bond_dev->type != ARPHRD_ETHER) 2827 bond_release_and_destroy(bond_dev, slave_dev); 2828 else 2829 bond_release(bond_dev, slave_dev); 2830 break; 2831 case NETDEV_UP: 2832 case NETDEV_CHANGE: 2833 old_speed = slave->speed; 2834 old_duplex = slave->duplex; 2835 2836 bond_update_speed_duplex(slave); 2837 2838 if (bond->params.mode == BOND_MODE_8023AD) { 2839 if (old_speed != slave->speed) 2840 bond_3ad_adapter_speed_changed(slave); 2841 if (old_duplex != slave->duplex) 2842 bond_3ad_adapter_duplex_changed(slave); 2843 } 2844 break; 2845 case NETDEV_DOWN: 2846 /* 2847 * ... Or is it this? 2848 */ 2849 break; 2850 case NETDEV_CHANGEMTU: 2851 /* 2852 * TODO: Should slaves be allowed to 2853 * independently alter their MTU? For 2854 * an active-backup bond, slaves need 2855 * not be the same type of device, so 2856 * MTUs may vary. For other modes, 2857 * slaves arguably should have the 2858 * same MTUs. To do this, we'd need to 2859 * take over the slave's change_mtu 2860 * function for the duration of their 2861 * servitude. 2862 */ 2863 break; 2864 case NETDEV_CHANGENAME: 2865 /* 2866 * TODO: handle changing the primary's name 2867 */ 2868 break; 2869 case NETDEV_FEAT_CHANGE: 2870 bond_compute_features(bond); 2871 break; 2872 case NETDEV_RESEND_IGMP: 2873 /* Propagate to master device */ 2874 call_netdevice_notifiers(event, slave->bond->dev); 2875 break; 2876 default: 2877 break; 2878 } 2879 2880 return NOTIFY_DONE; 2881 } 2882 2883 /* 2884 * bond_netdev_event: handle netdev notifier chain events. 2885 * 2886 * This function receives events for the netdev chain. The caller (an 2887 * ioctl handler calling blocking_notifier_call_chain) holds the necessary 2888 * locks for us to safely manipulate the slave devices (RTNL lock, 2889 * dev_probe_lock). 2890 */ 2891 static int bond_netdev_event(struct notifier_block *this, 2892 unsigned long event, void *ptr) 2893 { 2894 struct net_device *event_dev = netdev_notifier_info_to_dev(ptr); 2895 2896 pr_debug("event_dev: %s, event: %lx\n", 2897 event_dev ? event_dev->name : "None", 2898 event); 2899 2900 if (!(event_dev->priv_flags & IFF_BONDING)) 2901 return NOTIFY_DONE; 2902 2903 if (event_dev->flags & IFF_MASTER) { 2904 pr_debug("IFF_MASTER\n"); 2905 return bond_master_netdev_event(event, event_dev); 2906 } 2907 2908 if (event_dev->flags & IFF_SLAVE) { 2909 pr_debug("IFF_SLAVE\n"); 2910 return bond_slave_netdev_event(event, event_dev); 2911 } 2912 2913 return NOTIFY_DONE; 2914 } 2915 2916 static struct notifier_block bond_netdev_notifier = { 2917 .notifier_call = bond_netdev_event, 2918 }; 2919 2920 /*---------------------------- Hashing Policies -----------------------------*/ 2921 2922 /* L2 hash helper */ 2923 static inline u32 bond_eth_hash(struct sk_buff *skb) 2924 { 2925 struct ethhdr *data = (struct ethhdr *)skb->data; 2926 2927 if (skb_headlen(skb) >= offsetof(struct ethhdr, h_proto)) 2928 return data->h_dest[5] ^ data->h_source[5]; 2929 2930 return 0; 2931 } 2932 2933 /* Extract the appropriate headers based on bond's xmit policy */ 2934 static bool bond_flow_dissect(struct bonding *bond, struct sk_buff *skb, 2935 struct flow_keys *fk) 2936 { 2937 const struct ipv6hdr *iph6; 2938 const struct iphdr *iph; 2939 int noff, proto = -1; 2940 2941 if (bond->params.xmit_policy > BOND_XMIT_POLICY_LAYER23) 2942 return skb_flow_dissect(skb, fk); 2943 2944 fk->ports = 0; 2945 noff = skb_network_offset(skb); 2946 if (skb->protocol == htons(ETH_P_IP)) { 2947 if (!pskb_may_pull(skb, noff + sizeof(*iph))) 2948 return false; 2949 iph = ip_hdr(skb); 2950 fk->src = iph->saddr; 2951 fk->dst = iph->daddr; 2952 noff += iph->ihl << 2; 2953 if (!ip_is_fragment(iph)) 2954 proto = iph->protocol; 2955 } else if (skb->protocol == htons(ETH_P_IPV6)) { 2956 if (!pskb_may_pull(skb, noff + sizeof(*iph6))) 2957 return false; 2958 iph6 = ipv6_hdr(skb); 2959 fk->src = (__force __be32)ipv6_addr_hash(&iph6->saddr); 2960 fk->dst = (__force __be32)ipv6_addr_hash(&iph6->daddr); 2961 noff += sizeof(*iph6); 2962 proto = iph6->nexthdr; 2963 } else { 2964 return false; 2965 } 2966 if (bond->params.xmit_policy == BOND_XMIT_POLICY_LAYER34 && proto >= 0) 2967 fk->ports = skb_flow_get_ports(skb, noff, proto); 2968 2969 return true; 2970 } 2971 2972 /** 2973 * bond_xmit_hash - generate a hash value based on the xmit policy 2974 * @bond: bonding device 2975 * @skb: buffer to use for headers 2976 * @count: modulo value 2977 * 2978 * This function will extract the necessary headers from the skb buffer and use 2979 * them to generate a hash based on the xmit_policy set in the bonding device 2980 * which will be reduced modulo count before returning. 2981 */ 2982 int bond_xmit_hash(struct bonding *bond, struct sk_buff *skb, int count) 2983 { 2984 struct flow_keys flow; 2985 u32 hash; 2986 2987 if (bond->params.xmit_policy == BOND_XMIT_POLICY_LAYER2 || 2988 !bond_flow_dissect(bond, skb, &flow)) 2989 return bond_eth_hash(skb) % count; 2990 2991 if (bond->params.xmit_policy == BOND_XMIT_POLICY_LAYER23 || 2992 bond->params.xmit_policy == BOND_XMIT_POLICY_ENCAP23) 2993 hash = bond_eth_hash(skb); 2994 else 2995 hash = (__force u32)flow.ports; 2996 hash ^= (__force u32)flow.dst ^ (__force u32)flow.src; 2997 hash ^= (hash >> 16); 2998 hash ^= (hash >> 8); 2999 3000 return hash % count; 3001 } 3002 3003 /*-------------------------- Device entry points ----------------------------*/ 3004 3005 static void bond_work_init_all(struct bonding *bond) 3006 { 3007 INIT_DELAYED_WORK(&bond->mcast_work, 3008 bond_resend_igmp_join_requests_delayed); 3009 INIT_DELAYED_WORK(&bond->alb_work, bond_alb_monitor); 3010 INIT_DELAYED_WORK(&bond->mii_work, bond_mii_monitor); 3011 if (bond->params.mode == BOND_MODE_ACTIVEBACKUP) 3012 INIT_DELAYED_WORK(&bond->arp_work, bond_activebackup_arp_mon); 3013 else 3014 INIT_DELAYED_WORK(&bond->arp_work, bond_loadbalance_arp_mon); 3015 INIT_DELAYED_WORK(&bond->ad_work, bond_3ad_state_machine_handler); 3016 } 3017 3018 static void bond_work_cancel_all(struct bonding *bond) 3019 { 3020 cancel_delayed_work_sync(&bond->mii_work); 3021 cancel_delayed_work_sync(&bond->arp_work); 3022 cancel_delayed_work_sync(&bond->alb_work); 3023 cancel_delayed_work_sync(&bond->ad_work); 3024 cancel_delayed_work_sync(&bond->mcast_work); 3025 } 3026 3027 static int bond_open(struct net_device *bond_dev) 3028 { 3029 struct bonding *bond = netdev_priv(bond_dev); 3030 struct list_head *iter; 3031 struct slave *slave; 3032 3033 /* reset slave->backup and slave->inactive */ 3034 read_lock(&bond->lock); 3035 if (bond_has_slaves(bond)) { 3036 read_lock(&bond->curr_slave_lock); 3037 bond_for_each_slave(bond, slave, iter) { 3038 if ((bond->params.mode == BOND_MODE_ACTIVEBACKUP) 3039 && (slave != bond->curr_active_slave)) { 3040 bond_set_slave_inactive_flags(slave); 3041 } else { 3042 bond_set_slave_active_flags(slave); 3043 } 3044 } 3045 read_unlock(&bond->curr_slave_lock); 3046 } 3047 read_unlock(&bond->lock); 3048 3049 bond_work_init_all(bond); 3050 3051 if (bond_is_lb(bond)) { 3052 /* bond_alb_initialize must be called before the timer 3053 * is started. 3054 */ 3055 if (bond_alb_initialize(bond, (bond->params.mode == BOND_MODE_ALB))) 3056 return -ENOMEM; 3057 queue_delayed_work(bond->wq, &bond->alb_work, 0); 3058 } 3059 3060 if (bond->params.miimon) /* link check interval, in milliseconds. */ 3061 queue_delayed_work(bond->wq, &bond->mii_work, 0); 3062 3063 if (bond->params.arp_interval) { /* arp interval, in milliseconds. */ 3064 queue_delayed_work(bond->wq, &bond->arp_work, 0); 3065 if (bond->params.arp_validate) 3066 bond->recv_probe = bond_arp_rcv; 3067 } 3068 3069 if (bond->params.mode == BOND_MODE_8023AD) { 3070 queue_delayed_work(bond->wq, &bond->ad_work, 0); 3071 /* register to receive LACPDUs */ 3072 bond->recv_probe = bond_3ad_lacpdu_recv; 3073 bond_3ad_initiate_agg_selection(bond, 1); 3074 } 3075 3076 return 0; 3077 } 3078 3079 static int bond_close(struct net_device *bond_dev) 3080 { 3081 struct bonding *bond = netdev_priv(bond_dev); 3082 3083 bond_work_cancel_all(bond); 3084 bond->send_peer_notif = 0; 3085 if (bond_is_lb(bond)) 3086 bond_alb_deinitialize(bond); 3087 bond->recv_probe = NULL; 3088 3089 return 0; 3090 } 3091 3092 static struct rtnl_link_stats64 *bond_get_stats(struct net_device *bond_dev, 3093 struct rtnl_link_stats64 *stats) 3094 { 3095 struct bonding *bond = netdev_priv(bond_dev); 3096 struct rtnl_link_stats64 temp; 3097 struct list_head *iter; 3098 struct slave *slave; 3099 3100 memset(stats, 0, sizeof(*stats)); 3101 3102 read_lock_bh(&bond->lock); 3103 bond_for_each_slave(bond, slave, iter) { 3104 const struct rtnl_link_stats64 *sstats = 3105 dev_get_stats(slave->dev, &temp); 3106 3107 stats->rx_packets += sstats->rx_packets; 3108 stats->rx_bytes += sstats->rx_bytes; 3109 stats->rx_errors += sstats->rx_errors; 3110 stats->rx_dropped += sstats->rx_dropped; 3111 3112 stats->tx_packets += sstats->tx_packets; 3113 stats->tx_bytes += sstats->tx_bytes; 3114 stats->tx_errors += sstats->tx_errors; 3115 stats->tx_dropped += sstats->tx_dropped; 3116 3117 stats->multicast += sstats->multicast; 3118 stats->collisions += sstats->collisions; 3119 3120 stats->rx_length_errors += sstats->rx_length_errors; 3121 stats->rx_over_errors += sstats->rx_over_errors; 3122 stats->rx_crc_errors += sstats->rx_crc_errors; 3123 stats->rx_frame_errors += sstats->rx_frame_errors; 3124 stats->rx_fifo_errors += sstats->rx_fifo_errors; 3125 stats->rx_missed_errors += sstats->rx_missed_errors; 3126 3127 stats->tx_aborted_errors += sstats->tx_aborted_errors; 3128 stats->tx_carrier_errors += sstats->tx_carrier_errors; 3129 stats->tx_fifo_errors += sstats->tx_fifo_errors; 3130 stats->tx_heartbeat_errors += sstats->tx_heartbeat_errors; 3131 stats->tx_window_errors += sstats->tx_window_errors; 3132 } 3133 read_unlock_bh(&bond->lock); 3134 3135 return stats; 3136 } 3137 3138 static int bond_do_ioctl(struct net_device *bond_dev, struct ifreq *ifr, int cmd) 3139 { 3140 struct bonding *bond = netdev_priv(bond_dev); 3141 struct net_device *slave_dev = NULL; 3142 struct ifbond k_binfo; 3143 struct ifbond __user *u_binfo = NULL; 3144 struct ifslave k_sinfo; 3145 struct ifslave __user *u_sinfo = NULL; 3146 struct mii_ioctl_data *mii = NULL; 3147 struct net *net; 3148 int res = 0; 3149 3150 pr_debug("bond_ioctl: master=%s, cmd=%d\n", bond_dev->name, cmd); 3151 3152 switch (cmd) { 3153 case SIOCGMIIPHY: 3154 mii = if_mii(ifr); 3155 if (!mii) 3156 return -EINVAL; 3157 3158 mii->phy_id = 0; 3159 /* Fall Through */ 3160 case SIOCGMIIREG: 3161 /* 3162 * We do this again just in case we were called by SIOCGMIIREG 3163 * instead of SIOCGMIIPHY. 3164 */ 3165 mii = if_mii(ifr); 3166 if (!mii) 3167 return -EINVAL; 3168 3169 3170 if (mii->reg_num == 1) { 3171 mii->val_out = 0; 3172 read_lock(&bond->lock); 3173 read_lock(&bond->curr_slave_lock); 3174 if (netif_carrier_ok(bond->dev)) 3175 mii->val_out = BMSR_LSTATUS; 3176 3177 read_unlock(&bond->curr_slave_lock); 3178 read_unlock(&bond->lock); 3179 } 3180 3181 return 0; 3182 case BOND_INFO_QUERY_OLD: 3183 case SIOCBONDINFOQUERY: 3184 u_binfo = (struct ifbond __user *)ifr->ifr_data; 3185 3186 if (copy_from_user(&k_binfo, u_binfo, sizeof(ifbond))) 3187 return -EFAULT; 3188 3189 res = bond_info_query(bond_dev, &k_binfo); 3190 if (res == 0 && 3191 copy_to_user(u_binfo, &k_binfo, sizeof(ifbond))) 3192 return -EFAULT; 3193 3194 return res; 3195 case BOND_SLAVE_INFO_QUERY_OLD: 3196 case SIOCBONDSLAVEINFOQUERY: 3197 u_sinfo = (struct ifslave __user *)ifr->ifr_data; 3198 3199 if (copy_from_user(&k_sinfo, u_sinfo, sizeof(ifslave))) 3200 return -EFAULT; 3201 3202 res = bond_slave_info_query(bond_dev, &k_sinfo); 3203 if (res == 0 && 3204 copy_to_user(u_sinfo, &k_sinfo, sizeof(ifslave))) 3205 return -EFAULT; 3206 3207 return res; 3208 default: 3209 /* Go on */ 3210 break; 3211 } 3212 3213 net = dev_net(bond_dev); 3214 3215 if (!ns_capable(net->user_ns, CAP_NET_ADMIN)) 3216 return -EPERM; 3217 3218 slave_dev = dev_get_by_name(net, ifr->ifr_slave); 3219 3220 pr_debug("slave_dev=%p:\n", slave_dev); 3221 3222 if (!slave_dev) 3223 res = -ENODEV; 3224 else { 3225 pr_debug("slave_dev->name=%s:\n", slave_dev->name); 3226 switch (cmd) { 3227 case BOND_ENSLAVE_OLD: 3228 case SIOCBONDENSLAVE: 3229 res = bond_enslave(bond_dev, slave_dev); 3230 break; 3231 case BOND_RELEASE_OLD: 3232 case SIOCBONDRELEASE: 3233 res = bond_release(bond_dev, slave_dev); 3234 break; 3235 case BOND_SETHWADDR_OLD: 3236 case SIOCBONDSETHWADDR: 3237 bond_set_dev_addr(bond_dev, slave_dev); 3238 res = 0; 3239 break; 3240 case BOND_CHANGE_ACTIVE_OLD: 3241 case SIOCBONDCHANGEACTIVE: 3242 res = bond_option_active_slave_set(bond, slave_dev); 3243 break; 3244 default: 3245 res = -EOPNOTSUPP; 3246 } 3247 3248 dev_put(slave_dev); 3249 } 3250 3251 return res; 3252 } 3253 3254 static void bond_change_rx_flags(struct net_device *bond_dev, int change) 3255 { 3256 struct bonding *bond = netdev_priv(bond_dev); 3257 3258 if (change & IFF_PROMISC) 3259 bond_set_promiscuity(bond, 3260 bond_dev->flags & IFF_PROMISC ? 1 : -1); 3261 3262 if (change & IFF_ALLMULTI) 3263 bond_set_allmulti(bond, 3264 bond_dev->flags & IFF_ALLMULTI ? 1 : -1); 3265 } 3266 3267 static void bond_set_rx_mode(struct net_device *bond_dev) 3268 { 3269 struct bonding *bond = netdev_priv(bond_dev); 3270 struct list_head *iter; 3271 struct slave *slave; 3272 3273 3274 rcu_read_lock(); 3275 if (USES_PRIMARY(bond->params.mode)) { 3276 slave = rcu_dereference(bond->curr_active_slave); 3277 if (slave) { 3278 dev_uc_sync(slave->dev, bond_dev); 3279 dev_mc_sync(slave->dev, bond_dev); 3280 } 3281 } else { 3282 bond_for_each_slave_rcu(bond, slave, iter) { 3283 dev_uc_sync_multiple(slave->dev, bond_dev); 3284 dev_mc_sync_multiple(slave->dev, bond_dev); 3285 } 3286 } 3287 rcu_read_unlock(); 3288 } 3289 3290 static int bond_neigh_init(struct neighbour *n) 3291 { 3292 struct bonding *bond = netdev_priv(n->dev); 3293 const struct net_device_ops *slave_ops; 3294 struct neigh_parms parms; 3295 struct slave *slave; 3296 int ret; 3297 3298 slave = bond_first_slave(bond); 3299 if (!slave) 3300 return 0; 3301 slave_ops = slave->dev->netdev_ops; 3302 if (!slave_ops->ndo_neigh_setup) 3303 return 0; 3304 3305 parms.neigh_setup = NULL; 3306 parms.neigh_cleanup = NULL; 3307 ret = slave_ops->ndo_neigh_setup(slave->dev, &parms); 3308 if (ret) 3309 return ret; 3310 3311 /* 3312 * Assign slave's neigh_cleanup to neighbour in case cleanup is called 3313 * after the last slave has been detached. Assumes that all slaves 3314 * utilize the same neigh_cleanup (true at this writing as only user 3315 * is ipoib). 3316 */ 3317 n->parms->neigh_cleanup = parms.neigh_cleanup; 3318 3319 if (!parms.neigh_setup) 3320 return 0; 3321 3322 return parms.neigh_setup(n); 3323 } 3324 3325 /* 3326 * The bonding ndo_neigh_setup is called at init time beofre any 3327 * slave exists. So we must declare proxy setup function which will 3328 * be used at run time to resolve the actual slave neigh param setup. 3329 * 3330 * It's also called by master devices (such as vlans) to setup their 3331 * underlying devices. In that case - do nothing, we're already set up from 3332 * our init. 3333 */ 3334 static int bond_neigh_setup(struct net_device *dev, 3335 struct neigh_parms *parms) 3336 { 3337 /* modify only our neigh_parms */ 3338 if (parms->dev == dev) 3339 parms->neigh_setup = bond_neigh_init; 3340 3341 return 0; 3342 } 3343 3344 /* 3345 * Change the MTU of all of a master's slaves to match the master 3346 */ 3347 static int bond_change_mtu(struct net_device *bond_dev, int new_mtu) 3348 { 3349 struct bonding *bond = netdev_priv(bond_dev); 3350 struct slave *slave, *rollback_slave; 3351 struct list_head *iter; 3352 int res = 0; 3353 3354 pr_debug("bond=%p, name=%s, new_mtu=%d\n", bond, 3355 (bond_dev ? bond_dev->name : "None"), new_mtu); 3356 3357 /* Can't hold bond->lock with bh disabled here since 3358 * some base drivers panic. On the other hand we can't 3359 * hold bond->lock without bh disabled because we'll 3360 * deadlock. The only solution is to rely on the fact 3361 * that we're under rtnl_lock here, and the slaves 3362 * list won't change. This doesn't solve the problem 3363 * of setting the slave's MTU while it is 3364 * transmitting, but the assumption is that the base 3365 * driver can handle that. 3366 * 3367 * TODO: figure out a way to safely iterate the slaves 3368 * list, but without holding a lock around the actual 3369 * call to the base driver. 3370 */ 3371 3372 bond_for_each_slave(bond, slave, iter) { 3373 pr_debug("s %p c_m %p\n", 3374 slave, 3375 slave->dev->netdev_ops->ndo_change_mtu); 3376 3377 res = dev_set_mtu(slave->dev, new_mtu); 3378 3379 if (res) { 3380 /* If we failed to set the slave's mtu to the new value 3381 * we must abort the operation even in ACTIVE_BACKUP 3382 * mode, because if we allow the backup slaves to have 3383 * different mtu values than the active slave we'll 3384 * need to change their mtu when doing a failover. That 3385 * means changing their mtu from timer context, which 3386 * is probably not a good idea. 3387 */ 3388 pr_debug("err %d %s\n", res, slave->dev->name); 3389 goto unwind; 3390 } 3391 } 3392 3393 bond_dev->mtu = new_mtu; 3394 3395 return 0; 3396 3397 unwind: 3398 /* unwind from head to the slave that failed */ 3399 bond_for_each_slave(bond, rollback_slave, iter) { 3400 int tmp_res; 3401 3402 if (rollback_slave == slave) 3403 break; 3404 3405 tmp_res = dev_set_mtu(rollback_slave->dev, bond_dev->mtu); 3406 if (tmp_res) { 3407 pr_debug("unwind err %d dev %s\n", 3408 tmp_res, rollback_slave->dev->name); 3409 } 3410 } 3411 3412 return res; 3413 } 3414 3415 /* 3416 * Change HW address 3417 * 3418 * Note that many devices must be down to change the HW address, and 3419 * downing the master releases all slaves. We can make bonds full of 3420 * bonding devices to test this, however. 3421 */ 3422 static int bond_set_mac_address(struct net_device *bond_dev, void *addr) 3423 { 3424 struct bonding *bond = netdev_priv(bond_dev); 3425 struct slave *slave, *rollback_slave; 3426 struct sockaddr *sa = addr, tmp_sa; 3427 struct list_head *iter; 3428 int res = 0; 3429 3430 if (bond->params.mode == BOND_MODE_ALB) 3431 return bond_alb_set_mac_address(bond_dev, addr); 3432 3433 3434 pr_debug("bond=%p, name=%s\n", 3435 bond, bond_dev ? bond_dev->name : "None"); 3436 3437 /* If fail_over_mac is enabled, do nothing and return success. 3438 * Returning an error causes ifenslave to fail. 3439 */ 3440 if (bond->params.fail_over_mac) 3441 return 0; 3442 3443 if (!is_valid_ether_addr(sa->sa_data)) 3444 return -EADDRNOTAVAIL; 3445 3446 /* Can't hold bond->lock with bh disabled here since 3447 * some base drivers panic. On the other hand we can't 3448 * hold bond->lock without bh disabled because we'll 3449 * deadlock. The only solution is to rely on the fact 3450 * that we're under rtnl_lock here, and the slaves 3451 * list won't change. This doesn't solve the problem 3452 * of setting the slave's hw address while it is 3453 * transmitting, but the assumption is that the base 3454 * driver can handle that. 3455 * 3456 * TODO: figure out a way to safely iterate the slaves 3457 * list, but without holding a lock around the actual 3458 * call to the base driver. 3459 */ 3460 3461 bond_for_each_slave(bond, slave, iter) { 3462 const struct net_device_ops *slave_ops = slave->dev->netdev_ops; 3463 pr_debug("slave %p %s\n", slave, slave->dev->name); 3464 3465 if (slave_ops->ndo_set_mac_address == NULL) { 3466 res = -EOPNOTSUPP; 3467 pr_debug("EOPNOTSUPP %s\n", slave->dev->name); 3468 goto unwind; 3469 } 3470 3471 res = dev_set_mac_address(slave->dev, addr); 3472 if (res) { 3473 /* TODO: consider downing the slave 3474 * and retry ? 3475 * User should expect communications 3476 * breakage anyway until ARP finish 3477 * updating, so... 3478 */ 3479 pr_debug("err %d %s\n", res, slave->dev->name); 3480 goto unwind; 3481 } 3482 } 3483 3484 /* success */ 3485 memcpy(bond_dev->dev_addr, sa->sa_data, bond_dev->addr_len); 3486 return 0; 3487 3488 unwind: 3489 memcpy(tmp_sa.sa_data, bond_dev->dev_addr, bond_dev->addr_len); 3490 tmp_sa.sa_family = bond_dev->type; 3491 3492 /* unwind from head to the slave that failed */ 3493 bond_for_each_slave(bond, rollback_slave, iter) { 3494 int tmp_res; 3495 3496 if (rollback_slave == slave) 3497 break; 3498 3499 tmp_res = dev_set_mac_address(rollback_slave->dev, &tmp_sa); 3500 if (tmp_res) { 3501 pr_debug("unwind err %d dev %s\n", 3502 tmp_res, rollback_slave->dev->name); 3503 } 3504 } 3505 3506 return res; 3507 } 3508 3509 /** 3510 * bond_xmit_slave_id - transmit skb through slave with slave_id 3511 * @bond: bonding device that is transmitting 3512 * @skb: buffer to transmit 3513 * @slave_id: slave id up to slave_cnt-1 through which to transmit 3514 * 3515 * This function tries to transmit through slave with slave_id but in case 3516 * it fails, it tries to find the first available slave for transmission. 3517 * The skb is consumed in all cases, thus the function is void. 3518 */ 3519 static void bond_xmit_slave_id(struct bonding *bond, struct sk_buff *skb, int slave_id) 3520 { 3521 struct list_head *iter; 3522 struct slave *slave; 3523 int i = slave_id; 3524 3525 /* Here we start from the slave with slave_id */ 3526 bond_for_each_slave_rcu(bond, slave, iter) { 3527 if (--i < 0) { 3528 if (slave_can_tx(slave)) { 3529 bond_dev_queue_xmit(bond, skb, slave->dev); 3530 return; 3531 } 3532 } 3533 } 3534 3535 /* Here we start from the first slave up to slave_id */ 3536 i = slave_id; 3537 bond_for_each_slave_rcu(bond, slave, iter) { 3538 if (--i < 0) 3539 break; 3540 if (slave_can_tx(slave)) { 3541 bond_dev_queue_xmit(bond, skb, slave->dev); 3542 return; 3543 } 3544 } 3545 /* no slave that can tx has been found */ 3546 kfree_skb(skb); 3547 } 3548 3549 /** 3550 * bond_rr_gen_slave_id - generate slave id based on packets_per_slave 3551 * @bond: bonding device to use 3552 * 3553 * Based on the value of the bonding device's packets_per_slave parameter 3554 * this function generates a slave id, which is usually used as the next 3555 * slave to transmit through. 3556 */ 3557 static u32 bond_rr_gen_slave_id(struct bonding *bond) 3558 { 3559 int packets_per_slave = bond->params.packets_per_slave; 3560 u32 slave_id; 3561 3562 switch (packets_per_slave) { 3563 case 0: 3564 slave_id = prandom_u32(); 3565 break; 3566 case 1: 3567 slave_id = bond->rr_tx_counter; 3568 break; 3569 default: 3570 slave_id = reciprocal_divide(bond->rr_tx_counter, 3571 packets_per_slave); 3572 break; 3573 } 3574 bond->rr_tx_counter++; 3575 3576 return slave_id; 3577 } 3578 3579 static int bond_xmit_roundrobin(struct sk_buff *skb, struct net_device *bond_dev) 3580 { 3581 struct bonding *bond = netdev_priv(bond_dev); 3582 struct iphdr *iph = ip_hdr(skb); 3583 struct slave *slave; 3584 u32 slave_id; 3585 3586 /* Start with the curr_active_slave that joined the bond as the 3587 * default for sending IGMP traffic. For failover purposes one 3588 * needs to maintain some consistency for the interface that will 3589 * send the join/membership reports. The curr_active_slave found 3590 * will send all of this type of traffic. 3591 */ 3592 if (iph->protocol == IPPROTO_IGMP && skb->protocol == htons(ETH_P_IP)) { 3593 slave = rcu_dereference(bond->curr_active_slave); 3594 if (slave && slave_can_tx(slave)) 3595 bond_dev_queue_xmit(bond, skb, slave->dev); 3596 else 3597 bond_xmit_slave_id(bond, skb, 0); 3598 } else { 3599 slave_id = bond_rr_gen_slave_id(bond); 3600 bond_xmit_slave_id(bond, skb, slave_id % bond->slave_cnt); 3601 } 3602 3603 return NETDEV_TX_OK; 3604 } 3605 3606 /* 3607 * in active-backup mode, we know that bond->curr_active_slave is always valid if 3608 * the bond has a usable interface. 3609 */ 3610 static int bond_xmit_activebackup(struct sk_buff *skb, struct net_device *bond_dev) 3611 { 3612 struct bonding *bond = netdev_priv(bond_dev); 3613 struct slave *slave; 3614 3615 slave = rcu_dereference(bond->curr_active_slave); 3616 if (slave) 3617 bond_dev_queue_xmit(bond, skb, slave->dev); 3618 else 3619 kfree_skb(skb); 3620 3621 return NETDEV_TX_OK; 3622 } 3623 3624 /* In bond_xmit_xor() , we determine the output device by using a pre- 3625 * determined xmit_hash_policy(), If the selected device is not enabled, 3626 * find the next active slave. 3627 */ 3628 static int bond_xmit_xor(struct sk_buff *skb, struct net_device *bond_dev) 3629 { 3630 struct bonding *bond = netdev_priv(bond_dev); 3631 3632 bond_xmit_slave_id(bond, skb, bond_xmit_hash(bond, skb, bond->slave_cnt)); 3633 3634 return NETDEV_TX_OK; 3635 } 3636 3637 /* in broadcast mode, we send everything to all usable interfaces. */ 3638 static int bond_xmit_broadcast(struct sk_buff *skb, struct net_device *bond_dev) 3639 { 3640 struct bonding *bond = netdev_priv(bond_dev); 3641 struct slave *slave = NULL; 3642 struct list_head *iter; 3643 3644 bond_for_each_slave_rcu(bond, slave, iter) { 3645 if (bond_is_last_slave(bond, slave)) 3646 break; 3647 if (IS_UP(slave->dev) && slave->link == BOND_LINK_UP) { 3648 struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC); 3649 3650 if (!skb2) { 3651 pr_err("%s: Error: bond_xmit_broadcast(): skb_clone() failed\n", 3652 bond_dev->name); 3653 continue; 3654 } 3655 /* bond_dev_queue_xmit always returns 0 */ 3656 bond_dev_queue_xmit(bond, skb2, slave->dev); 3657 } 3658 } 3659 if (slave && IS_UP(slave->dev) && slave->link == BOND_LINK_UP) 3660 bond_dev_queue_xmit(bond, skb, slave->dev); 3661 else 3662 kfree_skb(skb); 3663 3664 return NETDEV_TX_OK; 3665 } 3666 3667 /*------------------------- Device initialization ---------------------------*/ 3668 3669 /* 3670 * Lookup the slave that corresponds to a qid 3671 */ 3672 static inline int bond_slave_override(struct bonding *bond, 3673 struct sk_buff *skb) 3674 { 3675 struct slave *slave = NULL; 3676 struct slave *check_slave; 3677 struct list_head *iter; 3678 int res = 1; 3679 3680 if (!skb->queue_mapping) 3681 return 1; 3682 3683 /* Find out if any slaves have the same mapping as this skb. */ 3684 bond_for_each_slave_rcu(bond, check_slave, iter) { 3685 if (check_slave->queue_id == skb->queue_mapping) { 3686 slave = check_slave; 3687 break; 3688 } 3689 } 3690 3691 /* If the slave isn't UP, use default transmit policy. */ 3692 if (slave && slave->queue_id && IS_UP(slave->dev) && 3693 (slave->link == BOND_LINK_UP)) { 3694 res = bond_dev_queue_xmit(bond, skb, slave->dev); 3695 } 3696 3697 return res; 3698 } 3699 3700 3701 static u16 bond_select_queue(struct net_device *dev, struct sk_buff *skb) 3702 { 3703 /* 3704 * This helper function exists to help dev_pick_tx get the correct 3705 * destination queue. Using a helper function skips a call to 3706 * skb_tx_hash and will put the skbs in the queue we expect on their 3707 * way down to the bonding driver. 3708 */ 3709 u16 txq = skb_rx_queue_recorded(skb) ? skb_get_rx_queue(skb) : 0; 3710 3711 /* 3712 * Save the original txq to restore before passing to the driver 3713 */ 3714 qdisc_skb_cb(skb)->slave_dev_queue_mapping = skb->queue_mapping; 3715 3716 if (unlikely(txq >= dev->real_num_tx_queues)) { 3717 do { 3718 txq -= dev->real_num_tx_queues; 3719 } while (txq >= dev->real_num_tx_queues); 3720 } 3721 return txq; 3722 } 3723 3724 static netdev_tx_t __bond_start_xmit(struct sk_buff *skb, struct net_device *dev) 3725 { 3726 struct bonding *bond = netdev_priv(dev); 3727 3728 if (TX_QUEUE_OVERRIDE(bond->params.mode)) { 3729 if (!bond_slave_override(bond, skb)) 3730 return NETDEV_TX_OK; 3731 } 3732 3733 switch (bond->params.mode) { 3734 case BOND_MODE_ROUNDROBIN: 3735 return bond_xmit_roundrobin(skb, dev); 3736 case BOND_MODE_ACTIVEBACKUP: 3737 return bond_xmit_activebackup(skb, dev); 3738 case BOND_MODE_XOR: 3739 return bond_xmit_xor(skb, dev); 3740 case BOND_MODE_BROADCAST: 3741 return bond_xmit_broadcast(skb, dev); 3742 case BOND_MODE_8023AD: 3743 return bond_3ad_xmit_xor(skb, dev); 3744 case BOND_MODE_ALB: 3745 case BOND_MODE_TLB: 3746 return bond_alb_xmit(skb, dev); 3747 default: 3748 /* Should never happen, mode already checked */ 3749 pr_err("%s: Error: Unknown bonding mode %d\n", 3750 dev->name, bond->params.mode); 3751 WARN_ON_ONCE(1); 3752 kfree_skb(skb); 3753 return NETDEV_TX_OK; 3754 } 3755 } 3756 3757 static netdev_tx_t bond_start_xmit(struct sk_buff *skb, struct net_device *dev) 3758 { 3759 struct bonding *bond = netdev_priv(dev); 3760 netdev_tx_t ret = NETDEV_TX_OK; 3761 3762 /* 3763 * If we risk deadlock from transmitting this in the 3764 * netpoll path, tell netpoll to queue the frame for later tx 3765 */ 3766 if (is_netpoll_tx_blocked(dev)) 3767 return NETDEV_TX_BUSY; 3768 3769 rcu_read_lock(); 3770 if (bond_has_slaves(bond)) 3771 ret = __bond_start_xmit(skb, dev); 3772 else 3773 kfree_skb(skb); 3774 rcu_read_unlock(); 3775 3776 return ret; 3777 } 3778 3779 static int bond_ethtool_get_settings(struct net_device *bond_dev, 3780 struct ethtool_cmd *ecmd) 3781 { 3782 struct bonding *bond = netdev_priv(bond_dev); 3783 unsigned long speed = 0; 3784 struct list_head *iter; 3785 struct slave *slave; 3786 3787 ecmd->duplex = DUPLEX_UNKNOWN; 3788 ecmd->port = PORT_OTHER; 3789 3790 /* Since SLAVE_IS_OK returns false for all inactive or down slaves, we 3791 * do not need to check mode. Though link speed might not represent 3792 * the true receive or transmit bandwidth (not all modes are symmetric) 3793 * this is an accurate maximum. 3794 */ 3795 read_lock(&bond->lock); 3796 bond_for_each_slave(bond, slave, iter) { 3797 if (SLAVE_IS_OK(slave)) { 3798 if (slave->speed != SPEED_UNKNOWN) 3799 speed += slave->speed; 3800 if (ecmd->duplex == DUPLEX_UNKNOWN && 3801 slave->duplex != DUPLEX_UNKNOWN) 3802 ecmd->duplex = slave->duplex; 3803 } 3804 } 3805 ethtool_cmd_speed_set(ecmd, speed ? : SPEED_UNKNOWN); 3806 read_unlock(&bond->lock); 3807 3808 return 0; 3809 } 3810 3811 static void bond_ethtool_get_drvinfo(struct net_device *bond_dev, 3812 struct ethtool_drvinfo *drvinfo) 3813 { 3814 strlcpy(drvinfo->driver, DRV_NAME, sizeof(drvinfo->driver)); 3815 strlcpy(drvinfo->version, DRV_VERSION, sizeof(drvinfo->version)); 3816 snprintf(drvinfo->fw_version, sizeof(drvinfo->fw_version), "%d", 3817 BOND_ABI_VERSION); 3818 } 3819 3820 static const struct ethtool_ops bond_ethtool_ops = { 3821 .get_drvinfo = bond_ethtool_get_drvinfo, 3822 .get_settings = bond_ethtool_get_settings, 3823 .get_link = ethtool_op_get_link, 3824 }; 3825 3826 static const struct net_device_ops bond_netdev_ops = { 3827 .ndo_init = bond_init, 3828 .ndo_uninit = bond_uninit, 3829 .ndo_open = bond_open, 3830 .ndo_stop = bond_close, 3831 .ndo_start_xmit = bond_start_xmit, 3832 .ndo_select_queue = bond_select_queue, 3833 .ndo_get_stats64 = bond_get_stats, 3834 .ndo_do_ioctl = bond_do_ioctl, 3835 .ndo_change_rx_flags = bond_change_rx_flags, 3836 .ndo_set_rx_mode = bond_set_rx_mode, 3837 .ndo_change_mtu = bond_change_mtu, 3838 .ndo_set_mac_address = bond_set_mac_address, 3839 .ndo_neigh_setup = bond_neigh_setup, 3840 .ndo_vlan_rx_add_vid = bond_vlan_rx_add_vid, 3841 .ndo_vlan_rx_kill_vid = bond_vlan_rx_kill_vid, 3842 #ifdef CONFIG_NET_POLL_CONTROLLER 3843 .ndo_netpoll_setup = bond_netpoll_setup, 3844 .ndo_netpoll_cleanup = bond_netpoll_cleanup, 3845 .ndo_poll_controller = bond_poll_controller, 3846 #endif 3847 .ndo_add_slave = bond_enslave, 3848 .ndo_del_slave = bond_release, 3849 .ndo_fix_features = bond_fix_features, 3850 }; 3851 3852 static const struct device_type bond_type = { 3853 .name = "bond", 3854 }; 3855 3856 static void bond_destructor(struct net_device *bond_dev) 3857 { 3858 struct bonding *bond = netdev_priv(bond_dev); 3859 if (bond->wq) 3860 destroy_workqueue(bond->wq); 3861 free_netdev(bond_dev); 3862 } 3863 3864 void bond_setup(struct net_device *bond_dev) 3865 { 3866 struct bonding *bond = netdev_priv(bond_dev); 3867 3868 /* initialize rwlocks */ 3869 rwlock_init(&bond->lock); 3870 rwlock_init(&bond->curr_slave_lock); 3871 bond->params = bonding_defaults; 3872 3873 /* Initialize pointers */ 3874 bond->dev = bond_dev; 3875 3876 /* Initialize the device entry points */ 3877 ether_setup(bond_dev); 3878 bond_dev->netdev_ops = &bond_netdev_ops; 3879 bond_dev->ethtool_ops = &bond_ethtool_ops; 3880 3881 bond_dev->destructor = bond_destructor; 3882 3883 SET_NETDEV_DEVTYPE(bond_dev, &bond_type); 3884 3885 /* Initialize the device options */ 3886 bond_dev->tx_queue_len = 0; 3887 bond_dev->flags |= IFF_MASTER|IFF_MULTICAST; 3888 bond_dev->priv_flags |= IFF_BONDING; 3889 bond_dev->priv_flags &= ~(IFF_XMIT_DST_RELEASE | IFF_TX_SKB_SHARING); 3890 3891 /* At first, we block adding VLANs. That's the only way to 3892 * prevent problems that occur when adding VLANs over an 3893 * empty bond. The block will be removed once non-challenged 3894 * slaves are enslaved. 3895 */ 3896 bond_dev->features |= NETIF_F_VLAN_CHALLENGED; 3897 3898 /* don't acquire bond device's netif_tx_lock when 3899 * transmitting */ 3900 bond_dev->features |= NETIF_F_LLTX; 3901 3902 /* By default, we declare the bond to be fully 3903 * VLAN hardware accelerated capable. Special 3904 * care is taken in the various xmit functions 3905 * when there are slaves that are not hw accel 3906 * capable 3907 */ 3908 3909 bond_dev->hw_features = BOND_VLAN_FEATURES | 3910 NETIF_F_HW_VLAN_CTAG_TX | 3911 NETIF_F_HW_VLAN_CTAG_RX | 3912 NETIF_F_HW_VLAN_CTAG_FILTER; 3913 3914 bond_dev->hw_features &= ~(NETIF_F_ALL_CSUM & ~NETIF_F_HW_CSUM); 3915 bond_dev->features |= bond_dev->hw_features; 3916 } 3917 3918 /* 3919 * Destroy a bonding device. 3920 * Must be under rtnl_lock when this function is called. 3921 */ 3922 static void bond_uninit(struct net_device *bond_dev) 3923 { 3924 struct bonding *bond = netdev_priv(bond_dev); 3925 struct list_head *iter; 3926 struct slave *slave; 3927 3928 bond_netpoll_cleanup(bond_dev); 3929 3930 /* Release the bonded slaves */ 3931 bond_for_each_slave(bond, slave, iter) 3932 __bond_release_one(bond_dev, slave->dev, true); 3933 pr_info("%s: released all slaves\n", bond_dev->name); 3934 3935 list_del(&bond->bond_list); 3936 3937 bond_debug_unregister(bond); 3938 } 3939 3940 /*------------------------- Module initialization ---------------------------*/ 3941 3942 /* 3943 * Convert string input module parms. Accept either the 3944 * number of the mode or its string name. A bit complicated because 3945 * some mode names are substrings of other names, and calls from sysfs 3946 * may have whitespace in the name (trailing newlines, for example). 3947 */ 3948 int bond_parse_parm(const char *buf, const struct bond_parm_tbl *tbl) 3949 { 3950 int modeint = -1, i, rv; 3951 char *p, modestr[BOND_MAX_MODENAME_LEN + 1] = { 0, }; 3952 3953 for (p = (char *)buf; *p; p++) 3954 if (!(isdigit(*p) || isspace(*p))) 3955 break; 3956 3957 if (*p) 3958 rv = sscanf(buf, "%20s", modestr); 3959 else 3960 rv = sscanf(buf, "%d", &modeint); 3961 3962 if (!rv) 3963 return -1; 3964 3965 for (i = 0; tbl[i].modename; i++) { 3966 if (modeint == tbl[i].mode) 3967 return tbl[i].mode; 3968 if (strcmp(modestr, tbl[i].modename) == 0) 3969 return tbl[i].mode; 3970 } 3971 3972 return -1; 3973 } 3974 3975 static int bond_check_params(struct bond_params *params) 3976 { 3977 int arp_validate_value, fail_over_mac_value, primary_reselect_value, i; 3978 int arp_all_targets_value; 3979 3980 /* 3981 * Convert string parameters. 3982 */ 3983 if (mode) { 3984 bond_mode = bond_parse_parm(mode, bond_mode_tbl); 3985 if (bond_mode == -1) { 3986 pr_err("Error: Invalid bonding mode \"%s\"\n", 3987 mode == NULL ? "NULL" : mode); 3988 return -EINVAL; 3989 } 3990 } 3991 3992 if (xmit_hash_policy) { 3993 if ((bond_mode != BOND_MODE_XOR) && 3994 (bond_mode != BOND_MODE_8023AD)) { 3995 pr_info("xmit_hash_policy param is irrelevant in mode %s\n", 3996 bond_mode_name(bond_mode)); 3997 } else { 3998 xmit_hashtype = bond_parse_parm(xmit_hash_policy, 3999 xmit_hashtype_tbl); 4000 if (xmit_hashtype == -1) { 4001 pr_err("Error: Invalid xmit_hash_policy \"%s\"\n", 4002 xmit_hash_policy == NULL ? "NULL" : 4003 xmit_hash_policy); 4004 return -EINVAL; 4005 } 4006 } 4007 } 4008 4009 if (lacp_rate) { 4010 if (bond_mode != BOND_MODE_8023AD) { 4011 pr_info("lacp_rate param is irrelevant in mode %s\n", 4012 bond_mode_name(bond_mode)); 4013 } else { 4014 lacp_fast = bond_parse_parm(lacp_rate, bond_lacp_tbl); 4015 if (lacp_fast == -1) { 4016 pr_err("Error: Invalid lacp rate \"%s\"\n", 4017 lacp_rate == NULL ? "NULL" : lacp_rate); 4018 return -EINVAL; 4019 } 4020 } 4021 } 4022 4023 if (ad_select) { 4024 params->ad_select = bond_parse_parm(ad_select, ad_select_tbl); 4025 if (params->ad_select == -1) { 4026 pr_err("Error: Invalid ad_select \"%s\"\n", 4027 ad_select == NULL ? "NULL" : ad_select); 4028 return -EINVAL; 4029 } 4030 4031 if (bond_mode != BOND_MODE_8023AD) { 4032 pr_warning("ad_select param only affects 802.3ad mode\n"); 4033 } 4034 } else { 4035 params->ad_select = BOND_AD_STABLE; 4036 } 4037 4038 if (max_bonds < 0) { 4039 pr_warning("Warning: max_bonds (%d) not in range %d-%d, so it was reset to BOND_DEFAULT_MAX_BONDS (%d)\n", 4040 max_bonds, 0, INT_MAX, BOND_DEFAULT_MAX_BONDS); 4041 max_bonds = BOND_DEFAULT_MAX_BONDS; 4042 } 4043 4044 if (miimon < 0) { 4045 pr_warning("Warning: miimon module parameter (%d), not in range 0-%d, so it was reset to %d\n", 4046 miimon, INT_MAX, BOND_LINK_MON_INTERV); 4047 miimon = BOND_LINK_MON_INTERV; 4048 } 4049 4050 if (updelay < 0) { 4051 pr_warning("Warning: updelay module parameter (%d), not in range 0-%d, so it was reset to 0\n", 4052 updelay, INT_MAX); 4053 updelay = 0; 4054 } 4055 4056 if (downdelay < 0) { 4057 pr_warning("Warning: downdelay module parameter (%d), not in range 0-%d, so it was reset to 0\n", 4058 downdelay, INT_MAX); 4059 downdelay = 0; 4060 } 4061 4062 if ((use_carrier != 0) && (use_carrier != 1)) { 4063 pr_warning("Warning: use_carrier module parameter (%d), not of valid value (0/1), so it was set to 1\n", 4064 use_carrier); 4065 use_carrier = 1; 4066 } 4067 4068 if (num_peer_notif < 0 || num_peer_notif > 255) { 4069 pr_warning("Warning: num_grat_arp/num_unsol_na (%d) not in range 0-255 so it was reset to 1\n", 4070 num_peer_notif); 4071 num_peer_notif = 1; 4072 } 4073 4074 /* reset values for 802.3ad/TLB/ALB */ 4075 if (BOND_NO_USES_ARP(bond_mode)) { 4076 if (!miimon) { 4077 pr_warning("Warning: miimon must be specified, otherwise bonding will not detect link failure, speed and duplex which are essential for 802.3ad operation\n"); 4078 pr_warning("Forcing miimon to 100msec\n"); 4079 miimon = BOND_DEFAULT_MIIMON; 4080 } 4081 } 4082 4083 if (tx_queues < 1 || tx_queues > 255) { 4084 pr_warning("Warning: tx_queues (%d) should be between " 4085 "1 and 255, resetting to %d\n", 4086 tx_queues, BOND_DEFAULT_TX_QUEUES); 4087 tx_queues = BOND_DEFAULT_TX_QUEUES; 4088 } 4089 4090 if ((all_slaves_active != 0) && (all_slaves_active != 1)) { 4091 pr_warning("Warning: all_slaves_active module parameter (%d), " 4092 "not of valid value (0/1), so it was set to " 4093 "0\n", all_slaves_active); 4094 all_slaves_active = 0; 4095 } 4096 4097 if (resend_igmp < 0 || resend_igmp > 255) { 4098 pr_warning("Warning: resend_igmp (%d) should be between " 4099 "0 and 255, resetting to %d\n", 4100 resend_igmp, BOND_DEFAULT_RESEND_IGMP); 4101 resend_igmp = BOND_DEFAULT_RESEND_IGMP; 4102 } 4103 4104 if (packets_per_slave < 0 || packets_per_slave > USHRT_MAX) { 4105 pr_warn("Warning: packets_per_slave (%d) should be between 0 and %u resetting to 1\n", 4106 packets_per_slave, USHRT_MAX); 4107 packets_per_slave = 1; 4108 } 4109 4110 if (bond_mode == BOND_MODE_ALB) { 4111 pr_notice("In ALB mode you might experience client disconnections upon reconnection of a link if the bonding module updelay parameter (%d msec) is incompatible with the forwarding delay time of the switch\n", 4112 updelay); 4113 } 4114 4115 if (!miimon) { 4116 if (updelay || downdelay) { 4117 /* just warn the user the up/down delay will have 4118 * no effect since miimon is zero... 4119 */ 4120 pr_warning("Warning: miimon module parameter not set and updelay (%d) or downdelay (%d) module parameter is set; updelay and downdelay have no effect unless miimon is set\n", 4121 updelay, downdelay); 4122 } 4123 } else { 4124 /* don't allow arp monitoring */ 4125 if (arp_interval) { 4126 pr_warning("Warning: miimon (%d) and arp_interval (%d) can't be used simultaneously, disabling ARP monitoring\n", 4127 miimon, arp_interval); 4128 arp_interval = 0; 4129 } 4130 4131 if ((updelay % miimon) != 0) { 4132 pr_warning("Warning: updelay (%d) is not a multiple of miimon (%d), updelay rounded to %d ms\n", 4133 updelay, miimon, 4134 (updelay / miimon) * miimon); 4135 } 4136 4137 updelay /= miimon; 4138 4139 if ((downdelay % miimon) != 0) { 4140 pr_warning("Warning: downdelay (%d) is not a multiple of miimon (%d), downdelay rounded to %d ms\n", 4141 downdelay, miimon, 4142 (downdelay / miimon) * miimon); 4143 } 4144 4145 downdelay /= miimon; 4146 } 4147 4148 if (arp_interval < 0) { 4149 pr_warning("Warning: arp_interval module parameter (%d) , not in range 0-%d, so it was reset to %d\n", 4150 arp_interval, INT_MAX, BOND_LINK_ARP_INTERV); 4151 arp_interval = BOND_LINK_ARP_INTERV; 4152 } 4153 4154 for (arp_ip_count = 0, i = 0; 4155 (arp_ip_count < BOND_MAX_ARP_TARGETS) && arp_ip_target[i]; i++) { 4156 /* not complete check, but should be good enough to 4157 catch mistakes */ 4158 __be32 ip; 4159 if (!in4_pton(arp_ip_target[i], -1, (u8 *)&ip, -1, NULL) || 4160 IS_IP_TARGET_UNUSABLE_ADDRESS(ip)) { 4161 pr_warning("Warning: bad arp_ip_target module parameter (%s), ARP monitoring will not be performed\n", 4162 arp_ip_target[i]); 4163 arp_interval = 0; 4164 } else { 4165 if (bond_get_targets_ip(arp_target, ip) == -1) 4166 arp_target[arp_ip_count++] = ip; 4167 else 4168 pr_warning("Warning: duplicate address %pI4 in arp_ip_target, skipping\n", 4169 &ip); 4170 } 4171 } 4172 4173 if (arp_interval && !arp_ip_count) { 4174 /* don't allow arping if no arp_ip_target given... */ 4175 pr_warning("Warning: arp_interval module parameter (%d) specified without providing an arp_ip_target parameter, arp_interval was reset to 0\n", 4176 arp_interval); 4177 arp_interval = 0; 4178 } 4179 4180 if (arp_validate) { 4181 if (bond_mode != BOND_MODE_ACTIVEBACKUP) { 4182 pr_err("arp_validate only supported in active-backup mode\n"); 4183 return -EINVAL; 4184 } 4185 if (!arp_interval) { 4186 pr_err("arp_validate requires arp_interval\n"); 4187 return -EINVAL; 4188 } 4189 4190 arp_validate_value = bond_parse_parm(arp_validate, 4191 arp_validate_tbl); 4192 if (arp_validate_value == -1) { 4193 pr_err("Error: invalid arp_validate \"%s\"\n", 4194 arp_validate == NULL ? "NULL" : arp_validate); 4195 return -EINVAL; 4196 } 4197 } else 4198 arp_validate_value = 0; 4199 4200 arp_all_targets_value = 0; 4201 if (arp_all_targets) { 4202 arp_all_targets_value = bond_parse_parm(arp_all_targets, 4203 arp_all_targets_tbl); 4204 4205 if (arp_all_targets_value == -1) { 4206 pr_err("Error: invalid arp_all_targets_value \"%s\"\n", 4207 arp_all_targets); 4208 arp_all_targets_value = 0; 4209 } 4210 } 4211 4212 if (miimon) { 4213 pr_info("MII link monitoring set to %d ms\n", miimon); 4214 } else if (arp_interval) { 4215 pr_info("ARP monitoring set to %d ms, validate %s, with %d target(s):", 4216 arp_interval, 4217 arp_validate_tbl[arp_validate_value].modename, 4218 arp_ip_count); 4219 4220 for (i = 0; i < arp_ip_count; i++) 4221 pr_info(" %s", arp_ip_target[i]); 4222 4223 pr_info("\n"); 4224 4225 } else if (max_bonds) { 4226 /* miimon and arp_interval not set, we need one so things 4227 * work as expected, see bonding.txt for details 4228 */ 4229 pr_debug("Warning: either miimon or arp_interval and arp_ip_target module parameters must be specified, otherwise bonding will not detect link failures! see bonding.txt for details.\n"); 4230 } 4231 4232 if (primary && !USES_PRIMARY(bond_mode)) { 4233 /* currently, using a primary only makes sense 4234 * in active backup, TLB or ALB modes 4235 */ 4236 pr_warning("Warning: %s primary device specified but has no effect in %s mode\n", 4237 primary, bond_mode_name(bond_mode)); 4238 primary = NULL; 4239 } 4240 4241 if (primary && primary_reselect) { 4242 primary_reselect_value = bond_parse_parm(primary_reselect, 4243 pri_reselect_tbl); 4244 if (primary_reselect_value == -1) { 4245 pr_err("Error: Invalid primary_reselect \"%s\"\n", 4246 primary_reselect == 4247 NULL ? "NULL" : primary_reselect); 4248 return -EINVAL; 4249 } 4250 } else { 4251 primary_reselect_value = BOND_PRI_RESELECT_ALWAYS; 4252 } 4253 4254 if (fail_over_mac) { 4255 fail_over_mac_value = bond_parse_parm(fail_over_mac, 4256 fail_over_mac_tbl); 4257 if (fail_over_mac_value == -1) { 4258 pr_err("Error: invalid fail_over_mac \"%s\"\n", 4259 arp_validate == NULL ? "NULL" : arp_validate); 4260 return -EINVAL; 4261 } 4262 4263 if (bond_mode != BOND_MODE_ACTIVEBACKUP) 4264 pr_warning("Warning: fail_over_mac only affects active-backup mode.\n"); 4265 } else { 4266 fail_over_mac_value = BOND_FOM_NONE; 4267 } 4268 4269 if (lp_interval == 0) { 4270 pr_warning("Warning: ip_interval must be between 1 and %d, so it was reset to %d\n", 4271 INT_MAX, BOND_ALB_DEFAULT_LP_INTERVAL); 4272 lp_interval = BOND_ALB_DEFAULT_LP_INTERVAL; 4273 } 4274 4275 /* fill params struct with the proper values */ 4276 params->mode = bond_mode; 4277 params->xmit_policy = xmit_hashtype; 4278 params->miimon = miimon; 4279 params->num_peer_notif = num_peer_notif; 4280 params->arp_interval = arp_interval; 4281 params->arp_validate = arp_validate_value; 4282 params->arp_all_targets = arp_all_targets_value; 4283 params->updelay = updelay; 4284 params->downdelay = downdelay; 4285 params->use_carrier = use_carrier; 4286 params->lacp_fast = lacp_fast; 4287 params->primary[0] = 0; 4288 params->primary_reselect = primary_reselect_value; 4289 params->fail_over_mac = fail_over_mac_value; 4290 params->tx_queues = tx_queues; 4291 params->all_slaves_active = all_slaves_active; 4292 params->resend_igmp = resend_igmp; 4293 params->min_links = min_links; 4294 params->lp_interval = lp_interval; 4295 if (packets_per_slave > 1) 4296 params->packets_per_slave = reciprocal_value(packets_per_slave); 4297 else 4298 params->packets_per_slave = packets_per_slave; 4299 if (primary) { 4300 strncpy(params->primary, primary, IFNAMSIZ); 4301 params->primary[IFNAMSIZ - 1] = 0; 4302 } 4303 4304 memcpy(params->arp_targets, arp_target, sizeof(arp_target)); 4305 4306 return 0; 4307 } 4308 4309 static struct lock_class_key bonding_netdev_xmit_lock_key; 4310 static struct lock_class_key bonding_netdev_addr_lock_key; 4311 static struct lock_class_key bonding_tx_busylock_key; 4312 4313 static void bond_set_lockdep_class_one(struct net_device *dev, 4314 struct netdev_queue *txq, 4315 void *_unused) 4316 { 4317 lockdep_set_class(&txq->_xmit_lock, 4318 &bonding_netdev_xmit_lock_key); 4319 } 4320 4321 static void bond_set_lockdep_class(struct net_device *dev) 4322 { 4323 lockdep_set_class(&dev->addr_list_lock, 4324 &bonding_netdev_addr_lock_key); 4325 netdev_for_each_tx_queue(dev, bond_set_lockdep_class_one, NULL); 4326 dev->qdisc_tx_busylock = &bonding_tx_busylock_key; 4327 } 4328 4329 /* 4330 * Called from registration process 4331 */ 4332 static int bond_init(struct net_device *bond_dev) 4333 { 4334 struct bonding *bond = netdev_priv(bond_dev); 4335 struct bond_net *bn = net_generic(dev_net(bond_dev), bond_net_id); 4336 struct alb_bond_info *bond_info = &(BOND_ALB_INFO(bond)); 4337 4338 pr_debug("Begin bond_init for %s\n", bond_dev->name); 4339 4340 /* 4341 * Initialize locks that may be required during 4342 * en/deslave operations. All of the bond_open work 4343 * (of which this is part) should really be moved to 4344 * a phase prior to dev_open 4345 */ 4346 spin_lock_init(&(bond_info->tx_hashtbl_lock)); 4347 spin_lock_init(&(bond_info->rx_hashtbl_lock)); 4348 4349 bond->wq = create_singlethread_workqueue(bond_dev->name); 4350 if (!bond->wq) 4351 return -ENOMEM; 4352 4353 bond_set_lockdep_class(bond_dev); 4354 4355 list_add_tail(&bond->bond_list, &bn->dev_list); 4356 4357 bond_prepare_sysfs_group(bond); 4358 4359 bond_debug_register(bond); 4360 4361 /* Ensure valid dev_addr */ 4362 if (is_zero_ether_addr(bond_dev->dev_addr) && 4363 bond_dev->addr_assign_type == NET_ADDR_PERM) 4364 eth_hw_addr_random(bond_dev); 4365 4366 return 0; 4367 } 4368 4369 unsigned int bond_get_num_tx_queues(void) 4370 { 4371 return tx_queues; 4372 } 4373 4374 /* Create a new bond based on the specified name and bonding parameters. 4375 * If name is NULL, obtain a suitable "bond%d" name for us. 4376 * Caller must NOT hold rtnl_lock; we need to release it here before we 4377 * set up our sysfs entries. 4378 */ 4379 int bond_create(struct net *net, const char *name) 4380 { 4381 struct net_device *bond_dev; 4382 int res; 4383 4384 rtnl_lock(); 4385 4386 bond_dev = alloc_netdev_mq(sizeof(struct bonding), 4387 name ? name : "bond%d", 4388 bond_setup, tx_queues); 4389 if (!bond_dev) { 4390 pr_err("%s: eek! can't alloc netdev!\n", name); 4391 rtnl_unlock(); 4392 return -ENOMEM; 4393 } 4394 4395 dev_net_set(bond_dev, net); 4396 bond_dev->rtnl_link_ops = &bond_link_ops; 4397 4398 res = register_netdevice(bond_dev); 4399 4400 netif_carrier_off(bond_dev); 4401 4402 rtnl_unlock(); 4403 if (res < 0) 4404 bond_destructor(bond_dev); 4405 return res; 4406 } 4407 4408 static int __net_init bond_net_init(struct net *net) 4409 { 4410 struct bond_net *bn = net_generic(net, bond_net_id); 4411 4412 bn->net = net; 4413 INIT_LIST_HEAD(&bn->dev_list); 4414 4415 bond_create_proc_dir(bn); 4416 bond_create_sysfs(bn); 4417 4418 return 0; 4419 } 4420 4421 static void __net_exit bond_net_exit(struct net *net) 4422 { 4423 struct bond_net *bn = net_generic(net, bond_net_id); 4424 struct bonding *bond, *tmp_bond; 4425 LIST_HEAD(list); 4426 4427 bond_destroy_sysfs(bn); 4428 bond_destroy_proc_dir(bn); 4429 4430 /* Kill off any bonds created after unregistering bond rtnl ops */ 4431 rtnl_lock(); 4432 list_for_each_entry_safe(bond, tmp_bond, &bn->dev_list, bond_list) 4433 unregister_netdevice_queue(bond->dev, &list); 4434 unregister_netdevice_many(&list); 4435 rtnl_unlock(); 4436 } 4437 4438 static struct pernet_operations bond_net_ops = { 4439 .init = bond_net_init, 4440 .exit = bond_net_exit, 4441 .id = &bond_net_id, 4442 .size = sizeof(struct bond_net), 4443 }; 4444 4445 static int __init bonding_init(void) 4446 { 4447 int i; 4448 int res; 4449 4450 pr_info("%s", bond_version); 4451 4452 res = bond_check_params(&bonding_defaults); 4453 if (res) 4454 goto out; 4455 4456 res = register_pernet_subsys(&bond_net_ops); 4457 if (res) 4458 goto out; 4459 4460 res = bond_netlink_init(); 4461 if (res) 4462 goto err_link; 4463 4464 bond_create_debugfs(); 4465 4466 for (i = 0; i < max_bonds; i++) { 4467 res = bond_create(&init_net, NULL); 4468 if (res) 4469 goto err; 4470 } 4471 4472 register_netdevice_notifier(&bond_netdev_notifier); 4473 out: 4474 return res; 4475 err: 4476 bond_netlink_fini(); 4477 err_link: 4478 unregister_pernet_subsys(&bond_net_ops); 4479 goto out; 4480 4481 } 4482 4483 static void __exit bonding_exit(void) 4484 { 4485 unregister_netdevice_notifier(&bond_netdev_notifier); 4486 4487 bond_destroy_debugfs(); 4488 4489 bond_netlink_fini(); 4490 unregister_pernet_subsys(&bond_net_ops); 4491 4492 #ifdef CONFIG_NET_POLL_CONTROLLER 4493 /* 4494 * Make sure we don't have an imbalance on our netpoll blocking 4495 */ 4496 WARN_ON(atomic_read(&netpoll_block_tx)); 4497 #endif 4498 } 4499 4500 module_init(bonding_init); 4501 module_exit(bonding_exit); 4502 MODULE_LICENSE("GPL"); 4503 MODULE_VERSION(DRV_VERSION); 4504 MODULE_DESCRIPTION(DRV_DESCRIPTION ", v" DRV_VERSION); 4505 MODULE_AUTHOR("Thomas Davis, tadavis@lbl.gov and many others"); 4506