1 // SPDX-License-Identifier: GPL-1.0+ 2 /* 3 * originally based on the dummy device. 4 * 5 * Copyright 1999, Thomas Davis, tadavis@lbl.gov. 6 * Based on dummy.c, and eql.c devices. 7 * 8 * bonding.c: an Ethernet Bonding driver 9 * 10 * This is useful to talk to a Cisco EtherChannel compatible equipment: 11 * Cisco 5500 12 * Sun Trunking (Solaris) 13 * Alteon AceDirector Trunks 14 * Linux Bonding 15 * and probably many L2 switches ... 16 * 17 * How it works: 18 * ifconfig bond0 ipaddress netmask up 19 * will setup a network device, with an ip address. No mac address 20 * will be assigned at this time. The hw mac address will come from 21 * the first slave bonded to the channel. All slaves will then use 22 * this hw mac address. 23 * 24 * ifconfig bond0 down 25 * will release all slaves, marking them as down. 26 * 27 * ifenslave bond0 eth0 28 * will attach eth0 to bond0 as a slave. eth0 hw mac address will either 29 * a: be used as initial mac address 30 * b: if a hw mac address already is there, eth0's hw mac address 31 * will then be set from bond0. 32 * 33 */ 34 35 #include <linux/kernel.h> 36 #include <linux/module.h> 37 #include <linux/types.h> 38 #include <linux/fcntl.h> 39 #include <linux/filter.h> 40 #include <linux/interrupt.h> 41 #include <linux/ptrace.h> 42 #include <linux/ioport.h> 43 #include <linux/in.h> 44 #include <net/ip.h> 45 #include <linux/ip.h> 46 #include <linux/icmp.h> 47 #include <linux/icmpv6.h> 48 #include <linux/tcp.h> 49 #include <linux/udp.h> 50 #include <linux/slab.h> 51 #include <linux/string.h> 52 #include <linux/init.h> 53 #include <linux/timer.h> 54 #include <linux/socket.h> 55 #include <linux/ctype.h> 56 #include <linux/inet.h> 57 #include <linux/bitops.h> 58 #include <linux/io.h> 59 #include <asm/dma.h> 60 #include <linux/uaccess.h> 61 #include <linux/errno.h> 62 #include <linux/netdevice.h> 63 #include <linux/inetdevice.h> 64 #include <linux/igmp.h> 65 #include <linux/etherdevice.h> 66 #include <linux/skbuff.h> 67 #include <net/sock.h> 68 #include <linux/rtnetlink.h> 69 #include <linux/smp.h> 70 #include <linux/if_ether.h> 71 #include <net/arp.h> 72 #include <linux/mii.h> 73 #include <linux/ethtool.h> 74 #include <linux/if_vlan.h> 75 #include <linux/if_bonding.h> 76 #include <linux/phy.h> 77 #include <linux/jiffies.h> 78 #include <linux/preempt.h> 79 #include <net/route.h> 80 #include <net/net_namespace.h> 81 #include <net/netns/generic.h> 82 #include <net/pkt_sched.h> 83 #include <linux/rculist.h> 84 #include <net/flow_dissector.h> 85 #include <net/xfrm.h> 86 #include <net/bonding.h> 87 #include <net/bond_3ad.h> 88 #include <net/bond_alb.h> 89 #if IS_ENABLED(CONFIG_TLS_DEVICE) 90 #include <net/tls.h> 91 #endif 92 #include <net/ip6_route.h> 93 #include <net/xdp.h> 94 95 #include "bonding_priv.h" 96 97 /*---------------------------- Module parameters ----------------------------*/ 98 99 /* monitor all links that often (in milliseconds). <=0 disables monitoring */ 100 101 static int max_bonds = BOND_DEFAULT_MAX_BONDS; 102 static int tx_queues = BOND_DEFAULT_TX_QUEUES; 103 static int num_peer_notif = 1; 104 static int miimon; 105 static int updelay; 106 static int downdelay; 107 static int use_carrier = 1; 108 static char *mode; 109 static char *primary; 110 static char *primary_reselect; 111 static char *lacp_rate; 112 static int min_links; 113 static char *ad_select; 114 static char *xmit_hash_policy; 115 static int arp_interval; 116 static char *arp_ip_target[BOND_MAX_ARP_TARGETS]; 117 static char *arp_validate; 118 static char *arp_all_targets; 119 static char *fail_over_mac; 120 static int all_slaves_active; 121 static struct bond_params bonding_defaults; 122 static int resend_igmp = BOND_DEFAULT_RESEND_IGMP; 123 static int packets_per_slave = 1; 124 static int lp_interval = BOND_ALB_DEFAULT_LP_INTERVAL; 125 126 module_param(max_bonds, int, 0); 127 MODULE_PARM_DESC(max_bonds, "Max number of bonded devices"); 128 module_param(tx_queues, int, 0); 129 MODULE_PARM_DESC(tx_queues, "Max number of transmit queues (default = 16)"); 130 module_param_named(num_grat_arp, num_peer_notif, int, 0644); 131 MODULE_PARM_DESC(num_grat_arp, "Number of peer notifications to send on " 132 "failover event (alias of num_unsol_na)"); 133 module_param_named(num_unsol_na, num_peer_notif, int, 0644); 134 MODULE_PARM_DESC(num_unsol_na, "Number of peer notifications to send on " 135 "failover event (alias of num_grat_arp)"); 136 module_param(miimon, int, 0); 137 MODULE_PARM_DESC(miimon, "Link check interval in milliseconds"); 138 module_param(updelay, int, 0); 139 MODULE_PARM_DESC(updelay, "Delay before considering link up, in milliseconds"); 140 module_param(downdelay, int, 0); 141 MODULE_PARM_DESC(downdelay, "Delay before considering link down, " 142 "in milliseconds"); 143 module_param(use_carrier, int, 0); 144 MODULE_PARM_DESC(use_carrier, "Use netif_carrier_ok (vs MII ioctls) in miimon; " 145 "0 for off, 1 for on (default)"); 146 module_param(mode, charp, 0); 147 MODULE_PARM_DESC(mode, "Mode of operation; 0 for balance-rr, " 148 "1 for active-backup, 2 for balance-xor, " 149 "3 for broadcast, 4 for 802.3ad, 5 for balance-tlb, " 150 "6 for balance-alb"); 151 module_param(primary, charp, 0); 152 MODULE_PARM_DESC(primary, "Primary network device to use"); 153 module_param(primary_reselect, charp, 0); 154 MODULE_PARM_DESC(primary_reselect, "Reselect primary slave " 155 "once it comes up; " 156 "0 for always (default), " 157 "1 for only if speed of primary is " 158 "better, " 159 "2 for only on active slave " 160 "failure"); 161 module_param(lacp_rate, charp, 0); 162 MODULE_PARM_DESC(lacp_rate, "LACPDU tx rate to request from 802.3ad partner; " 163 "0 for slow, 1 for fast"); 164 module_param(ad_select, charp, 0); 165 MODULE_PARM_DESC(ad_select, "802.3ad aggregation selection logic; " 166 "0 for stable (default), 1 for bandwidth, " 167 "2 for count"); 168 module_param(min_links, int, 0); 169 MODULE_PARM_DESC(min_links, "Minimum number of available links before turning on carrier"); 170 171 module_param(xmit_hash_policy, charp, 0); 172 MODULE_PARM_DESC(xmit_hash_policy, "balance-alb, balance-tlb, balance-xor, 802.3ad hashing method; " 173 "0 for layer 2 (default), 1 for layer 3+4, " 174 "2 for layer 2+3, 3 for encap layer 2+3, " 175 "4 for encap layer 3+4, 5 for vlan+srcmac"); 176 module_param(arp_interval, int, 0); 177 MODULE_PARM_DESC(arp_interval, "arp interval in milliseconds"); 178 module_param_array(arp_ip_target, charp, NULL, 0); 179 MODULE_PARM_DESC(arp_ip_target, "arp targets in n.n.n.n form"); 180 module_param(arp_validate, charp, 0); 181 MODULE_PARM_DESC(arp_validate, "validate src/dst of ARP probes; " 182 "0 for none (default), 1 for active, " 183 "2 for backup, 3 for all"); 184 module_param(arp_all_targets, charp, 0); 185 MODULE_PARM_DESC(arp_all_targets, "fail on any/all arp targets timeout; 0 for any (default), 1 for all"); 186 module_param(fail_over_mac, charp, 0); 187 MODULE_PARM_DESC(fail_over_mac, "For active-backup, do not set all slaves to " 188 "the same MAC; 0 for none (default), " 189 "1 for active, 2 for follow"); 190 module_param(all_slaves_active, int, 0); 191 MODULE_PARM_DESC(all_slaves_active, "Keep all frames received on an interface " 192 "by setting active flag for all slaves; " 193 "0 for never (default), 1 for always."); 194 module_param(resend_igmp, int, 0); 195 MODULE_PARM_DESC(resend_igmp, "Number of IGMP membership reports to send on " 196 "link failure"); 197 module_param(packets_per_slave, int, 0); 198 MODULE_PARM_DESC(packets_per_slave, "Packets to send per slave in balance-rr " 199 "mode; 0 for a random slave, 1 packet per " 200 "slave (default), >1 packets per slave."); 201 module_param(lp_interval, uint, 0); 202 MODULE_PARM_DESC(lp_interval, "The number of seconds between instances where " 203 "the bonding driver sends learning packets to " 204 "each slaves peer switch. The default is 1."); 205 206 /*----------------------------- Global variables ----------------------------*/ 207 208 #ifdef CONFIG_NET_POLL_CONTROLLER 209 atomic_t netpoll_block_tx = ATOMIC_INIT(0); 210 #endif 211 212 unsigned int bond_net_id __read_mostly; 213 214 static const struct flow_dissector_key flow_keys_bonding_keys[] = { 215 { 216 .key_id = FLOW_DISSECTOR_KEY_CONTROL, 217 .offset = offsetof(struct flow_keys, control), 218 }, 219 { 220 .key_id = FLOW_DISSECTOR_KEY_BASIC, 221 .offset = offsetof(struct flow_keys, basic), 222 }, 223 { 224 .key_id = FLOW_DISSECTOR_KEY_IPV4_ADDRS, 225 .offset = offsetof(struct flow_keys, addrs.v4addrs), 226 }, 227 { 228 .key_id = FLOW_DISSECTOR_KEY_IPV6_ADDRS, 229 .offset = offsetof(struct flow_keys, addrs.v6addrs), 230 }, 231 { 232 .key_id = FLOW_DISSECTOR_KEY_TIPC, 233 .offset = offsetof(struct flow_keys, addrs.tipckey), 234 }, 235 { 236 .key_id = FLOW_DISSECTOR_KEY_PORTS, 237 .offset = offsetof(struct flow_keys, ports), 238 }, 239 { 240 .key_id = FLOW_DISSECTOR_KEY_ICMP, 241 .offset = offsetof(struct flow_keys, icmp), 242 }, 243 { 244 .key_id = FLOW_DISSECTOR_KEY_VLAN, 245 .offset = offsetof(struct flow_keys, vlan), 246 }, 247 { 248 .key_id = FLOW_DISSECTOR_KEY_FLOW_LABEL, 249 .offset = offsetof(struct flow_keys, tags), 250 }, 251 { 252 .key_id = FLOW_DISSECTOR_KEY_GRE_KEYID, 253 .offset = offsetof(struct flow_keys, keyid), 254 }, 255 }; 256 257 static struct flow_dissector flow_keys_bonding __read_mostly; 258 259 /*-------------------------- Forward declarations ---------------------------*/ 260 261 static int bond_init(struct net_device *bond_dev); 262 static void bond_uninit(struct net_device *bond_dev); 263 static void bond_get_stats(struct net_device *bond_dev, 264 struct rtnl_link_stats64 *stats); 265 static void bond_slave_arr_handler(struct work_struct *work); 266 static bool bond_time_in_interval(struct bonding *bond, unsigned long last_act, 267 int mod); 268 static void bond_netdev_notify_work(struct work_struct *work); 269 270 /*---------------------------- General routines -----------------------------*/ 271 272 const char *bond_mode_name(int mode) 273 { 274 static const char *names[] = { 275 [BOND_MODE_ROUNDROBIN] = "load balancing (round-robin)", 276 [BOND_MODE_ACTIVEBACKUP] = "fault-tolerance (active-backup)", 277 [BOND_MODE_XOR] = "load balancing (xor)", 278 [BOND_MODE_BROADCAST] = "fault-tolerance (broadcast)", 279 [BOND_MODE_8023AD] = "IEEE 802.3ad Dynamic link aggregation", 280 [BOND_MODE_TLB] = "transmit load balancing", 281 [BOND_MODE_ALB] = "adaptive load balancing", 282 }; 283 284 if (mode < BOND_MODE_ROUNDROBIN || mode > BOND_MODE_ALB) 285 return "unknown"; 286 287 return names[mode]; 288 } 289 290 /** 291 * bond_dev_queue_xmit - Prepare skb for xmit. 292 * 293 * @bond: bond device that got this skb for tx. 294 * @skb: hw accel VLAN tagged skb to transmit 295 * @slave_dev: slave that is supposed to xmit this skbuff 296 */ 297 netdev_tx_t bond_dev_queue_xmit(struct bonding *bond, struct sk_buff *skb, 298 struct net_device *slave_dev) 299 { 300 skb->dev = slave_dev; 301 302 BUILD_BUG_ON(sizeof(skb->queue_mapping) != 303 sizeof(qdisc_skb_cb(skb)->slave_dev_queue_mapping)); 304 skb_set_queue_mapping(skb, qdisc_skb_cb(skb)->slave_dev_queue_mapping); 305 306 if (unlikely(netpoll_tx_running(bond->dev))) 307 return bond_netpoll_send_skb(bond_get_slave_by_dev(bond, slave_dev), skb); 308 309 return dev_queue_xmit(skb); 310 } 311 312 static bool bond_sk_check(struct bonding *bond) 313 { 314 switch (BOND_MODE(bond)) { 315 case BOND_MODE_8023AD: 316 case BOND_MODE_XOR: 317 if (bond->params.xmit_policy == BOND_XMIT_POLICY_LAYER34) 318 return true; 319 fallthrough; 320 default: 321 return false; 322 } 323 } 324 325 static bool bond_xdp_check(struct bonding *bond) 326 { 327 switch (BOND_MODE(bond)) { 328 case BOND_MODE_ROUNDROBIN: 329 case BOND_MODE_ACTIVEBACKUP: 330 return true; 331 case BOND_MODE_8023AD: 332 case BOND_MODE_XOR: 333 /* vlan+srcmac is not supported with XDP as in most cases the 802.1q 334 * payload is not in the packet due to hardware offload. 335 */ 336 if (bond->params.xmit_policy != BOND_XMIT_POLICY_VLAN_SRCMAC) 337 return true; 338 fallthrough; 339 default: 340 return false; 341 } 342 } 343 344 /*---------------------------------- VLAN -----------------------------------*/ 345 346 /* In the following 2 functions, bond_vlan_rx_add_vid and bond_vlan_rx_kill_vid, 347 * We don't protect the slave list iteration with a lock because: 348 * a. This operation is performed in IOCTL context, 349 * b. The operation is protected by the RTNL semaphore in the 8021q code, 350 * c. Holding a lock with BH disabled while directly calling a base driver 351 * entry point is generally a BAD idea. 352 * 353 * The design of synchronization/protection for this operation in the 8021q 354 * module is good for one or more VLAN devices over a single physical device 355 * and cannot be extended for a teaming solution like bonding, so there is a 356 * potential race condition here where a net device from the vlan group might 357 * be referenced (either by a base driver or the 8021q code) while it is being 358 * removed from the system. However, it turns out we're not making matters 359 * worse, and if it works for regular VLAN usage it will work here too. 360 */ 361 362 /** 363 * bond_vlan_rx_add_vid - Propagates adding an id to slaves 364 * @bond_dev: bonding net device that got called 365 * @proto: network protocol ID 366 * @vid: vlan id being added 367 */ 368 static int bond_vlan_rx_add_vid(struct net_device *bond_dev, 369 __be16 proto, u16 vid) 370 { 371 struct bonding *bond = netdev_priv(bond_dev); 372 struct slave *slave, *rollback_slave; 373 struct list_head *iter; 374 int res; 375 376 bond_for_each_slave(bond, slave, iter) { 377 res = vlan_vid_add(slave->dev, proto, vid); 378 if (res) 379 goto unwind; 380 } 381 382 return 0; 383 384 unwind: 385 /* unwind to the slave that failed */ 386 bond_for_each_slave(bond, rollback_slave, iter) { 387 if (rollback_slave == slave) 388 break; 389 390 vlan_vid_del(rollback_slave->dev, proto, vid); 391 } 392 393 return res; 394 } 395 396 /** 397 * bond_vlan_rx_kill_vid - Propagates deleting an id to slaves 398 * @bond_dev: bonding net device that got called 399 * @proto: network protocol ID 400 * @vid: vlan id being removed 401 */ 402 static int bond_vlan_rx_kill_vid(struct net_device *bond_dev, 403 __be16 proto, u16 vid) 404 { 405 struct bonding *bond = netdev_priv(bond_dev); 406 struct list_head *iter; 407 struct slave *slave; 408 409 bond_for_each_slave(bond, slave, iter) 410 vlan_vid_del(slave->dev, proto, vid); 411 412 if (bond_is_lb(bond)) 413 bond_alb_clear_vlan(bond, vid); 414 415 return 0; 416 } 417 418 /*---------------------------------- XFRM -----------------------------------*/ 419 420 #ifdef CONFIG_XFRM_OFFLOAD 421 /** 422 * bond_ipsec_dev - Get active device for IPsec offload 423 * @xs: pointer to transformer state struct 424 * 425 * Context: caller must hold rcu_read_lock. 426 * 427 * Return: the device for ipsec offload, or NULL if not exist. 428 **/ 429 static struct net_device *bond_ipsec_dev(struct xfrm_state *xs) 430 { 431 struct net_device *bond_dev = xs->xso.dev; 432 struct bonding *bond; 433 struct slave *slave; 434 435 bond = netdev_priv(bond_dev); 436 if (BOND_MODE(bond) != BOND_MODE_ACTIVEBACKUP) 437 return NULL; 438 439 slave = rcu_dereference(bond->curr_active_slave); 440 if (!slave) 441 return NULL; 442 443 if (!xs->xso.real_dev) 444 return NULL; 445 446 if (xs->xso.real_dev != slave->dev) 447 pr_warn_ratelimited("%s: (slave %s): not same with IPsec offload real dev %s\n", 448 bond_dev->name, slave->dev->name, xs->xso.real_dev->name); 449 450 return slave->dev; 451 } 452 453 /** 454 * bond_ipsec_add_sa - program device with a security association 455 * @xs: pointer to transformer state struct 456 * @extack: extack point to fill failure reason 457 **/ 458 static int bond_ipsec_add_sa(struct xfrm_state *xs, 459 struct netlink_ext_ack *extack) 460 { 461 struct net_device *bond_dev = xs->xso.dev; 462 struct net_device *real_dev; 463 netdevice_tracker tracker; 464 struct bond_ipsec *ipsec; 465 struct bonding *bond; 466 struct slave *slave; 467 int err; 468 469 if (!bond_dev) 470 return -EINVAL; 471 472 rcu_read_lock(); 473 bond = netdev_priv(bond_dev); 474 slave = rcu_dereference(bond->curr_active_slave); 475 real_dev = slave ? slave->dev : NULL; 476 netdev_hold(real_dev, &tracker, GFP_ATOMIC); 477 rcu_read_unlock(); 478 if (!real_dev) { 479 err = -ENODEV; 480 goto out; 481 } 482 483 if (!real_dev->xfrmdev_ops || 484 !real_dev->xfrmdev_ops->xdo_dev_state_add || 485 netif_is_bond_master(real_dev)) { 486 NL_SET_ERR_MSG_MOD(extack, "Slave does not support ipsec offload"); 487 err = -EINVAL; 488 goto out; 489 } 490 491 ipsec = kmalloc(sizeof(*ipsec), GFP_KERNEL); 492 if (!ipsec) { 493 err = -ENOMEM; 494 goto out; 495 } 496 497 xs->xso.real_dev = real_dev; 498 err = real_dev->xfrmdev_ops->xdo_dev_state_add(xs, extack); 499 if (!err) { 500 ipsec->xs = xs; 501 INIT_LIST_HEAD(&ipsec->list); 502 mutex_lock(&bond->ipsec_lock); 503 list_add(&ipsec->list, &bond->ipsec_list); 504 mutex_unlock(&bond->ipsec_lock); 505 } else { 506 kfree(ipsec); 507 } 508 out: 509 netdev_put(real_dev, &tracker); 510 return err; 511 } 512 513 static void bond_ipsec_add_sa_all(struct bonding *bond) 514 { 515 struct net_device *bond_dev = bond->dev; 516 struct net_device *real_dev; 517 struct bond_ipsec *ipsec; 518 struct slave *slave; 519 520 slave = rtnl_dereference(bond->curr_active_slave); 521 real_dev = slave ? slave->dev : NULL; 522 if (!real_dev) 523 return; 524 525 mutex_lock(&bond->ipsec_lock); 526 if (!real_dev->xfrmdev_ops || 527 !real_dev->xfrmdev_ops->xdo_dev_state_add || 528 netif_is_bond_master(real_dev)) { 529 if (!list_empty(&bond->ipsec_list)) 530 slave_warn(bond_dev, real_dev, 531 "%s: no slave xdo_dev_state_add\n", 532 __func__); 533 goto out; 534 } 535 536 list_for_each_entry(ipsec, &bond->ipsec_list, list) { 537 /* If new state is added before ipsec_lock acquired */ 538 if (ipsec->xs->xso.real_dev == real_dev) 539 continue; 540 541 ipsec->xs->xso.real_dev = real_dev; 542 if (real_dev->xfrmdev_ops->xdo_dev_state_add(ipsec->xs, NULL)) { 543 slave_warn(bond_dev, real_dev, "%s: failed to add SA\n", __func__); 544 ipsec->xs->xso.real_dev = NULL; 545 } 546 } 547 out: 548 mutex_unlock(&bond->ipsec_lock); 549 } 550 551 /** 552 * bond_ipsec_del_sa - clear out this specific SA 553 * @xs: pointer to transformer state struct 554 **/ 555 static void bond_ipsec_del_sa(struct xfrm_state *xs) 556 { 557 struct net_device *bond_dev = xs->xso.dev; 558 struct net_device *real_dev; 559 netdevice_tracker tracker; 560 struct bond_ipsec *ipsec; 561 struct bonding *bond; 562 struct slave *slave; 563 564 if (!bond_dev) 565 return; 566 567 rcu_read_lock(); 568 bond = netdev_priv(bond_dev); 569 slave = rcu_dereference(bond->curr_active_slave); 570 real_dev = slave ? slave->dev : NULL; 571 netdev_hold(real_dev, &tracker, GFP_ATOMIC); 572 rcu_read_unlock(); 573 574 if (!slave) 575 goto out; 576 577 if (!xs->xso.real_dev) 578 goto out; 579 580 WARN_ON(xs->xso.real_dev != real_dev); 581 582 if (!real_dev->xfrmdev_ops || 583 !real_dev->xfrmdev_ops->xdo_dev_state_delete || 584 netif_is_bond_master(real_dev)) { 585 slave_warn(bond_dev, real_dev, "%s: no slave xdo_dev_state_delete\n", __func__); 586 goto out; 587 } 588 589 real_dev->xfrmdev_ops->xdo_dev_state_delete(xs); 590 out: 591 netdev_put(real_dev, &tracker); 592 mutex_lock(&bond->ipsec_lock); 593 list_for_each_entry(ipsec, &bond->ipsec_list, list) { 594 if (ipsec->xs == xs) { 595 list_del(&ipsec->list); 596 kfree(ipsec); 597 break; 598 } 599 } 600 mutex_unlock(&bond->ipsec_lock); 601 } 602 603 static void bond_ipsec_del_sa_all(struct bonding *bond) 604 { 605 struct net_device *bond_dev = bond->dev; 606 struct net_device *real_dev; 607 struct bond_ipsec *ipsec; 608 struct slave *slave; 609 610 slave = rtnl_dereference(bond->curr_active_slave); 611 real_dev = slave ? slave->dev : NULL; 612 if (!real_dev) 613 return; 614 615 mutex_lock(&bond->ipsec_lock); 616 list_for_each_entry(ipsec, &bond->ipsec_list, list) { 617 if (!ipsec->xs->xso.real_dev) 618 continue; 619 620 if (!real_dev->xfrmdev_ops || 621 !real_dev->xfrmdev_ops->xdo_dev_state_delete || 622 netif_is_bond_master(real_dev)) { 623 slave_warn(bond_dev, real_dev, 624 "%s: no slave xdo_dev_state_delete\n", 625 __func__); 626 } else { 627 real_dev->xfrmdev_ops->xdo_dev_state_delete(ipsec->xs); 628 if (real_dev->xfrmdev_ops->xdo_dev_state_free) 629 real_dev->xfrmdev_ops->xdo_dev_state_free(ipsec->xs); 630 } 631 } 632 mutex_unlock(&bond->ipsec_lock); 633 } 634 635 static void bond_ipsec_free_sa(struct xfrm_state *xs) 636 { 637 struct net_device *bond_dev = xs->xso.dev; 638 struct net_device *real_dev; 639 netdevice_tracker tracker; 640 struct bonding *bond; 641 struct slave *slave; 642 643 if (!bond_dev) 644 return; 645 646 rcu_read_lock(); 647 bond = netdev_priv(bond_dev); 648 slave = rcu_dereference(bond->curr_active_slave); 649 real_dev = slave ? slave->dev : NULL; 650 netdev_hold(real_dev, &tracker, GFP_ATOMIC); 651 rcu_read_unlock(); 652 653 if (!slave) 654 goto out; 655 656 if (!xs->xso.real_dev) 657 goto out; 658 659 WARN_ON(xs->xso.real_dev != real_dev); 660 661 if (real_dev && real_dev->xfrmdev_ops && 662 real_dev->xfrmdev_ops->xdo_dev_state_free) 663 real_dev->xfrmdev_ops->xdo_dev_state_free(xs); 664 out: 665 netdev_put(real_dev, &tracker); 666 } 667 668 /** 669 * bond_ipsec_offload_ok - can this packet use the xfrm hw offload 670 * @skb: current data packet 671 * @xs: pointer to transformer state struct 672 **/ 673 static bool bond_ipsec_offload_ok(struct sk_buff *skb, struct xfrm_state *xs) 674 { 675 struct net_device *real_dev; 676 bool ok = false; 677 678 rcu_read_lock(); 679 real_dev = bond_ipsec_dev(xs); 680 if (!real_dev) 681 goto out; 682 683 if (!real_dev->xfrmdev_ops || 684 !real_dev->xfrmdev_ops->xdo_dev_offload_ok || 685 netif_is_bond_master(real_dev)) 686 goto out; 687 688 ok = real_dev->xfrmdev_ops->xdo_dev_offload_ok(skb, xs); 689 out: 690 rcu_read_unlock(); 691 return ok; 692 } 693 694 /** 695 * bond_advance_esn_state - ESN support for IPSec HW offload 696 * @xs: pointer to transformer state struct 697 **/ 698 static void bond_advance_esn_state(struct xfrm_state *xs) 699 { 700 struct net_device *real_dev; 701 702 rcu_read_lock(); 703 real_dev = bond_ipsec_dev(xs); 704 if (!real_dev) 705 goto out; 706 707 if (!real_dev->xfrmdev_ops || 708 !real_dev->xfrmdev_ops->xdo_dev_state_advance_esn) { 709 pr_warn_ratelimited("%s: %s doesn't support xdo_dev_state_advance_esn\n", __func__, real_dev->name); 710 goto out; 711 } 712 713 real_dev->xfrmdev_ops->xdo_dev_state_advance_esn(xs); 714 out: 715 rcu_read_unlock(); 716 } 717 718 /** 719 * bond_xfrm_update_stats - Update xfrm state 720 * @xs: pointer to transformer state struct 721 **/ 722 static void bond_xfrm_update_stats(struct xfrm_state *xs) 723 { 724 struct net_device *real_dev; 725 726 rcu_read_lock(); 727 real_dev = bond_ipsec_dev(xs); 728 if (!real_dev) 729 goto out; 730 731 if (!real_dev->xfrmdev_ops || 732 !real_dev->xfrmdev_ops->xdo_dev_state_update_stats) { 733 pr_warn_ratelimited("%s: %s doesn't support xdo_dev_state_update_stats\n", __func__, real_dev->name); 734 goto out; 735 } 736 737 real_dev->xfrmdev_ops->xdo_dev_state_update_stats(xs); 738 out: 739 rcu_read_unlock(); 740 } 741 742 static const struct xfrmdev_ops bond_xfrmdev_ops = { 743 .xdo_dev_state_add = bond_ipsec_add_sa, 744 .xdo_dev_state_delete = bond_ipsec_del_sa, 745 .xdo_dev_state_free = bond_ipsec_free_sa, 746 .xdo_dev_offload_ok = bond_ipsec_offload_ok, 747 .xdo_dev_state_advance_esn = bond_advance_esn_state, 748 .xdo_dev_state_update_stats = bond_xfrm_update_stats, 749 }; 750 #endif /* CONFIG_XFRM_OFFLOAD */ 751 752 /*------------------------------- Link status -------------------------------*/ 753 754 /* Set the carrier state for the master according to the state of its 755 * slaves. If any slaves are up, the master is up. In 802.3ad mode, 756 * do special 802.3ad magic. 757 * 758 * Returns zero if carrier state does not change, nonzero if it does. 759 */ 760 int bond_set_carrier(struct bonding *bond) 761 { 762 struct list_head *iter; 763 struct slave *slave; 764 765 if (!bond_has_slaves(bond)) 766 goto down; 767 768 if (BOND_MODE(bond) == BOND_MODE_8023AD) 769 return bond_3ad_set_carrier(bond); 770 771 bond_for_each_slave(bond, slave, iter) { 772 if (slave->link == BOND_LINK_UP) { 773 if (!netif_carrier_ok(bond->dev)) { 774 netif_carrier_on(bond->dev); 775 return 1; 776 } 777 return 0; 778 } 779 } 780 781 down: 782 if (netif_carrier_ok(bond->dev)) { 783 netif_carrier_off(bond->dev); 784 return 1; 785 } 786 return 0; 787 } 788 789 /* Get link speed and duplex from the slave's base driver 790 * using ethtool. If for some reason the call fails or the 791 * values are invalid, set speed and duplex to -1, 792 * and return. Return 1 if speed or duplex settings are 793 * UNKNOWN; 0 otherwise. 794 */ 795 static int bond_update_speed_duplex(struct slave *slave) 796 { 797 struct net_device *slave_dev = slave->dev; 798 struct ethtool_link_ksettings ecmd; 799 int res; 800 801 slave->speed = SPEED_UNKNOWN; 802 slave->duplex = DUPLEX_UNKNOWN; 803 804 res = __ethtool_get_link_ksettings(slave_dev, &ecmd); 805 if (res < 0) 806 return 1; 807 if (ecmd.base.speed == 0 || ecmd.base.speed == ((__u32)-1)) 808 return 1; 809 switch (ecmd.base.duplex) { 810 case DUPLEX_FULL: 811 case DUPLEX_HALF: 812 break; 813 default: 814 return 1; 815 } 816 817 slave->speed = ecmd.base.speed; 818 slave->duplex = ecmd.base.duplex; 819 820 return 0; 821 } 822 823 const char *bond_slave_link_status(s8 link) 824 { 825 switch (link) { 826 case BOND_LINK_UP: 827 return "up"; 828 case BOND_LINK_FAIL: 829 return "going down"; 830 case BOND_LINK_DOWN: 831 return "down"; 832 case BOND_LINK_BACK: 833 return "going back"; 834 default: 835 return "unknown"; 836 } 837 } 838 839 /* if <dev> supports MII link status reporting, check its link status. 840 * 841 * We either do MII/ETHTOOL ioctls, or check netif_carrier_ok(), 842 * depending upon the setting of the use_carrier parameter. 843 * 844 * Return either BMSR_LSTATUS, meaning that the link is up (or we 845 * can't tell and just pretend it is), or 0, meaning that the link is 846 * down. 847 * 848 * If reporting is non-zero, instead of faking link up, return -1 if 849 * both ETHTOOL and MII ioctls fail (meaning the device does not 850 * support them). If use_carrier is set, return whatever it says. 851 * It'd be nice if there was a good way to tell if a driver supports 852 * netif_carrier, but there really isn't. 853 */ 854 static int bond_check_dev_link(struct bonding *bond, 855 struct net_device *slave_dev, int reporting) 856 { 857 const struct net_device_ops *slave_ops = slave_dev->netdev_ops; 858 int (*ioctl)(struct net_device *, struct ifreq *, int); 859 struct ifreq ifr; 860 struct mii_ioctl_data *mii; 861 862 if (!reporting && !netif_running(slave_dev)) 863 return 0; 864 865 if (bond->params.use_carrier) 866 return netif_carrier_ok(slave_dev) ? BMSR_LSTATUS : 0; 867 868 /* Try to get link status using Ethtool first. */ 869 if (slave_dev->ethtool_ops->get_link) 870 return slave_dev->ethtool_ops->get_link(slave_dev) ? 871 BMSR_LSTATUS : 0; 872 873 /* Ethtool can't be used, fallback to MII ioctls. */ 874 ioctl = slave_ops->ndo_eth_ioctl; 875 if (ioctl) { 876 /* TODO: set pointer to correct ioctl on a per team member 877 * bases to make this more efficient. that is, once 878 * we determine the correct ioctl, we will always 879 * call it and not the others for that team 880 * member. 881 */ 882 883 /* We cannot assume that SIOCGMIIPHY will also read a 884 * register; not all network drivers (e.g., e100) 885 * support that. 886 */ 887 888 /* Yes, the mii is overlaid on the ifreq.ifr_ifru */ 889 strscpy_pad(ifr.ifr_name, slave_dev->name, IFNAMSIZ); 890 mii = if_mii(&ifr); 891 if (ioctl(slave_dev, &ifr, SIOCGMIIPHY) == 0) { 892 mii->reg_num = MII_BMSR; 893 if (ioctl(slave_dev, &ifr, SIOCGMIIREG) == 0) 894 return mii->val_out & BMSR_LSTATUS; 895 } 896 } 897 898 /* If reporting, report that either there's no ndo_eth_ioctl, 899 * or both SIOCGMIIREG and get_link failed (meaning that we 900 * cannot report link status). If not reporting, pretend 901 * we're ok. 902 */ 903 return reporting ? -1 : BMSR_LSTATUS; 904 } 905 906 /*----------------------------- Multicast list ------------------------------*/ 907 908 /* Push the promiscuity flag down to appropriate slaves */ 909 static int bond_set_promiscuity(struct bonding *bond, int inc) 910 { 911 struct list_head *iter; 912 int err = 0; 913 914 if (bond_uses_primary(bond)) { 915 struct slave *curr_active = rtnl_dereference(bond->curr_active_slave); 916 917 if (curr_active) 918 err = dev_set_promiscuity(curr_active->dev, inc); 919 } else { 920 struct slave *slave; 921 922 bond_for_each_slave(bond, slave, iter) { 923 err = dev_set_promiscuity(slave->dev, inc); 924 if (err) 925 return err; 926 } 927 } 928 return err; 929 } 930 931 /* Push the allmulti flag down to all slaves */ 932 static int bond_set_allmulti(struct bonding *bond, int inc) 933 { 934 struct list_head *iter; 935 int err = 0; 936 937 if (bond_uses_primary(bond)) { 938 struct slave *curr_active = rtnl_dereference(bond->curr_active_slave); 939 940 if (curr_active) 941 err = dev_set_allmulti(curr_active->dev, inc); 942 } else { 943 struct slave *slave; 944 945 bond_for_each_slave(bond, slave, iter) { 946 err = dev_set_allmulti(slave->dev, inc); 947 if (err) 948 return err; 949 } 950 } 951 return err; 952 } 953 954 /* Retrieve the list of registered multicast addresses for the bonding 955 * device and retransmit an IGMP JOIN request to the current active 956 * slave. 957 */ 958 static void bond_resend_igmp_join_requests_delayed(struct work_struct *work) 959 { 960 struct bonding *bond = container_of(work, struct bonding, 961 mcast_work.work); 962 963 if (!rtnl_trylock()) { 964 queue_delayed_work(bond->wq, &bond->mcast_work, 1); 965 return; 966 } 967 call_netdevice_notifiers(NETDEV_RESEND_IGMP, bond->dev); 968 969 if (bond->igmp_retrans > 1) { 970 bond->igmp_retrans--; 971 queue_delayed_work(bond->wq, &bond->mcast_work, HZ/5); 972 } 973 rtnl_unlock(); 974 } 975 976 /* Flush bond's hardware addresses from slave */ 977 static void bond_hw_addr_flush(struct net_device *bond_dev, 978 struct net_device *slave_dev) 979 { 980 struct bonding *bond = netdev_priv(bond_dev); 981 982 dev_uc_unsync(slave_dev, bond_dev); 983 dev_mc_unsync(slave_dev, bond_dev); 984 985 if (BOND_MODE(bond) == BOND_MODE_8023AD) 986 dev_mc_del(slave_dev, lacpdu_mcast_addr); 987 } 988 989 /*--------------------------- Active slave change ---------------------------*/ 990 991 /* Update the hardware address list and promisc/allmulti for the new and 992 * old active slaves (if any). Modes that are not using primary keep all 993 * slaves up date at all times; only the modes that use primary need to call 994 * this function to swap these settings during a failover. 995 */ 996 static void bond_hw_addr_swap(struct bonding *bond, struct slave *new_active, 997 struct slave *old_active) 998 { 999 if (old_active) { 1000 if (bond->dev->flags & IFF_PROMISC) 1001 dev_set_promiscuity(old_active->dev, -1); 1002 1003 if (bond->dev->flags & IFF_ALLMULTI) 1004 dev_set_allmulti(old_active->dev, -1); 1005 1006 if (bond->dev->flags & IFF_UP) 1007 bond_hw_addr_flush(bond->dev, old_active->dev); 1008 1009 bond_slave_ns_maddrs_add(bond, old_active); 1010 } 1011 1012 if (new_active) { 1013 /* FIXME: Signal errors upstream. */ 1014 if (bond->dev->flags & IFF_PROMISC) 1015 dev_set_promiscuity(new_active->dev, 1); 1016 1017 if (bond->dev->flags & IFF_ALLMULTI) 1018 dev_set_allmulti(new_active->dev, 1); 1019 1020 if (bond->dev->flags & IFF_UP) { 1021 netif_addr_lock_bh(bond->dev); 1022 dev_uc_sync(new_active->dev, bond->dev); 1023 dev_mc_sync(new_active->dev, bond->dev); 1024 netif_addr_unlock_bh(bond->dev); 1025 } 1026 1027 bond_slave_ns_maddrs_del(bond, new_active); 1028 } 1029 } 1030 1031 /** 1032 * bond_set_dev_addr - clone slave's address to bond 1033 * @bond_dev: bond net device 1034 * @slave_dev: slave net device 1035 * 1036 * Should be called with RTNL held. 1037 */ 1038 static int bond_set_dev_addr(struct net_device *bond_dev, 1039 struct net_device *slave_dev) 1040 { 1041 int err; 1042 1043 slave_dbg(bond_dev, slave_dev, "bond_dev=%p slave_dev=%p slave_dev->addr_len=%d\n", 1044 bond_dev, slave_dev, slave_dev->addr_len); 1045 err = dev_pre_changeaddr_notify(bond_dev, slave_dev->dev_addr, NULL); 1046 if (err) 1047 return err; 1048 1049 __dev_addr_set(bond_dev, slave_dev->dev_addr, slave_dev->addr_len); 1050 bond_dev->addr_assign_type = NET_ADDR_STOLEN; 1051 call_netdevice_notifiers(NETDEV_CHANGEADDR, bond_dev); 1052 return 0; 1053 } 1054 1055 static struct slave *bond_get_old_active(struct bonding *bond, 1056 struct slave *new_active) 1057 { 1058 struct slave *slave; 1059 struct list_head *iter; 1060 1061 bond_for_each_slave(bond, slave, iter) { 1062 if (slave == new_active) 1063 continue; 1064 1065 if (ether_addr_equal(bond->dev->dev_addr, slave->dev->dev_addr)) 1066 return slave; 1067 } 1068 1069 return NULL; 1070 } 1071 1072 /* bond_do_fail_over_mac 1073 * 1074 * Perform special MAC address swapping for fail_over_mac settings 1075 * 1076 * Called with RTNL 1077 */ 1078 static void bond_do_fail_over_mac(struct bonding *bond, 1079 struct slave *new_active, 1080 struct slave *old_active) 1081 { 1082 u8 tmp_mac[MAX_ADDR_LEN]; 1083 struct sockaddr_storage ss; 1084 int rv; 1085 1086 switch (bond->params.fail_over_mac) { 1087 case BOND_FOM_ACTIVE: 1088 if (new_active) { 1089 rv = bond_set_dev_addr(bond->dev, new_active->dev); 1090 if (rv) 1091 slave_err(bond->dev, new_active->dev, "Error %d setting bond MAC from slave\n", 1092 -rv); 1093 } 1094 break; 1095 case BOND_FOM_FOLLOW: 1096 /* if new_active && old_active, swap them 1097 * if just old_active, do nothing (going to no active slave) 1098 * if just new_active, set new_active to bond's MAC 1099 */ 1100 if (!new_active) 1101 return; 1102 1103 if (!old_active) 1104 old_active = bond_get_old_active(bond, new_active); 1105 1106 if (old_active) { 1107 bond_hw_addr_copy(tmp_mac, new_active->dev->dev_addr, 1108 new_active->dev->addr_len); 1109 bond_hw_addr_copy(ss.__data, 1110 old_active->dev->dev_addr, 1111 old_active->dev->addr_len); 1112 ss.ss_family = new_active->dev->type; 1113 } else { 1114 bond_hw_addr_copy(ss.__data, bond->dev->dev_addr, 1115 bond->dev->addr_len); 1116 ss.ss_family = bond->dev->type; 1117 } 1118 1119 rv = dev_set_mac_address(new_active->dev, 1120 (struct sockaddr *)&ss, NULL); 1121 if (rv) { 1122 slave_err(bond->dev, new_active->dev, "Error %d setting MAC of new active slave\n", 1123 -rv); 1124 goto out; 1125 } 1126 1127 if (!old_active) 1128 goto out; 1129 1130 bond_hw_addr_copy(ss.__data, tmp_mac, 1131 new_active->dev->addr_len); 1132 ss.ss_family = old_active->dev->type; 1133 1134 rv = dev_set_mac_address(old_active->dev, 1135 (struct sockaddr *)&ss, NULL); 1136 if (rv) 1137 slave_err(bond->dev, old_active->dev, "Error %d setting MAC of old active slave\n", 1138 -rv); 1139 out: 1140 break; 1141 default: 1142 netdev_err(bond->dev, "bond_do_fail_over_mac impossible: bad policy %d\n", 1143 bond->params.fail_over_mac); 1144 break; 1145 } 1146 1147 } 1148 1149 /** 1150 * bond_choose_primary_or_current - select the primary or high priority slave 1151 * @bond: our bonding struct 1152 * 1153 * - Check if there is a primary link. If the primary link was set and is up, 1154 * go on and do link reselection. 1155 * 1156 * - If primary link is not set or down, find the highest priority link. 1157 * If the highest priority link is not current slave, set it as primary 1158 * link and do link reselection. 1159 */ 1160 static struct slave *bond_choose_primary_or_current(struct bonding *bond) 1161 { 1162 struct slave *prim = rtnl_dereference(bond->primary_slave); 1163 struct slave *curr = rtnl_dereference(bond->curr_active_slave); 1164 struct slave *slave, *hprio = NULL; 1165 struct list_head *iter; 1166 1167 if (!prim || prim->link != BOND_LINK_UP) { 1168 bond_for_each_slave(bond, slave, iter) { 1169 if (slave->link == BOND_LINK_UP) { 1170 hprio = hprio ?: slave; 1171 if (slave->prio > hprio->prio) 1172 hprio = slave; 1173 } 1174 } 1175 1176 if (hprio && hprio != curr) { 1177 prim = hprio; 1178 goto link_reselect; 1179 } 1180 1181 if (!curr || curr->link != BOND_LINK_UP) 1182 return NULL; 1183 return curr; 1184 } 1185 1186 if (bond->force_primary) { 1187 bond->force_primary = false; 1188 return prim; 1189 } 1190 1191 link_reselect: 1192 if (!curr || curr->link != BOND_LINK_UP) 1193 return prim; 1194 1195 /* At this point, prim and curr are both up */ 1196 switch (bond->params.primary_reselect) { 1197 case BOND_PRI_RESELECT_ALWAYS: 1198 return prim; 1199 case BOND_PRI_RESELECT_BETTER: 1200 if (prim->speed < curr->speed) 1201 return curr; 1202 if (prim->speed == curr->speed && prim->duplex <= curr->duplex) 1203 return curr; 1204 return prim; 1205 case BOND_PRI_RESELECT_FAILURE: 1206 return curr; 1207 default: 1208 netdev_err(bond->dev, "impossible primary_reselect %d\n", 1209 bond->params.primary_reselect); 1210 return curr; 1211 } 1212 } 1213 1214 /** 1215 * bond_find_best_slave - select the best available slave to be the active one 1216 * @bond: our bonding struct 1217 */ 1218 static struct slave *bond_find_best_slave(struct bonding *bond) 1219 { 1220 struct slave *slave, *bestslave = NULL; 1221 struct list_head *iter; 1222 int mintime = bond->params.updelay; 1223 1224 slave = bond_choose_primary_or_current(bond); 1225 if (slave) 1226 return slave; 1227 1228 bond_for_each_slave(bond, slave, iter) { 1229 if (slave->link == BOND_LINK_UP) 1230 return slave; 1231 if (slave->link == BOND_LINK_BACK && bond_slave_is_up(slave) && 1232 slave->delay < mintime) { 1233 mintime = slave->delay; 1234 bestslave = slave; 1235 } 1236 } 1237 1238 return bestslave; 1239 } 1240 1241 /* must be called in RCU critical section or with RTNL held */ 1242 static bool bond_should_notify_peers(struct bonding *bond) 1243 { 1244 struct slave *slave = rcu_dereference_rtnl(bond->curr_active_slave); 1245 1246 if (!slave || !bond->send_peer_notif || 1247 bond->send_peer_notif % 1248 max(1, bond->params.peer_notif_delay) != 0 || 1249 !netif_carrier_ok(bond->dev) || 1250 test_bit(__LINK_STATE_LINKWATCH_PENDING, &slave->dev->state)) 1251 return false; 1252 1253 netdev_dbg(bond->dev, "bond_should_notify_peers: slave %s\n", 1254 slave ? slave->dev->name : "NULL"); 1255 1256 return true; 1257 } 1258 1259 /** 1260 * bond_change_active_slave - change the active slave into the specified one 1261 * @bond: our bonding struct 1262 * @new_active: the new slave to make the active one 1263 * 1264 * Set the new slave to the bond's settings and unset them on the old 1265 * curr_active_slave. 1266 * Setting include flags, mc-list, promiscuity, allmulti, etc. 1267 * 1268 * If @new's link state is %BOND_LINK_BACK we'll set it to %BOND_LINK_UP, 1269 * because it is apparently the best available slave we have, even though its 1270 * updelay hasn't timed out yet. 1271 * 1272 * Caller must hold RTNL. 1273 */ 1274 void bond_change_active_slave(struct bonding *bond, struct slave *new_active) 1275 { 1276 struct slave *old_active; 1277 1278 ASSERT_RTNL(); 1279 1280 old_active = rtnl_dereference(bond->curr_active_slave); 1281 1282 if (old_active == new_active) 1283 return; 1284 1285 #ifdef CONFIG_XFRM_OFFLOAD 1286 bond_ipsec_del_sa_all(bond); 1287 #endif /* CONFIG_XFRM_OFFLOAD */ 1288 1289 if (new_active) { 1290 new_active->last_link_up = jiffies; 1291 1292 if (new_active->link == BOND_LINK_BACK) { 1293 if (bond_uses_primary(bond)) { 1294 slave_info(bond->dev, new_active->dev, "making interface the new active one %d ms earlier\n", 1295 (bond->params.updelay - new_active->delay) * bond->params.miimon); 1296 } 1297 1298 new_active->delay = 0; 1299 bond_set_slave_link_state(new_active, BOND_LINK_UP, 1300 BOND_SLAVE_NOTIFY_NOW); 1301 1302 if (BOND_MODE(bond) == BOND_MODE_8023AD) 1303 bond_3ad_handle_link_change(new_active, BOND_LINK_UP); 1304 1305 if (bond_is_lb(bond)) 1306 bond_alb_handle_link_change(bond, new_active, BOND_LINK_UP); 1307 } else { 1308 if (bond_uses_primary(bond)) 1309 slave_info(bond->dev, new_active->dev, "making interface the new active one\n"); 1310 } 1311 } 1312 1313 if (bond_uses_primary(bond)) 1314 bond_hw_addr_swap(bond, new_active, old_active); 1315 1316 if (bond_is_lb(bond)) { 1317 bond_alb_handle_active_change(bond, new_active); 1318 if (old_active) 1319 bond_set_slave_inactive_flags(old_active, 1320 BOND_SLAVE_NOTIFY_NOW); 1321 if (new_active) 1322 bond_set_slave_active_flags(new_active, 1323 BOND_SLAVE_NOTIFY_NOW); 1324 } else { 1325 rcu_assign_pointer(bond->curr_active_slave, new_active); 1326 } 1327 1328 if (BOND_MODE(bond) == BOND_MODE_ACTIVEBACKUP) { 1329 if (old_active) 1330 bond_set_slave_inactive_flags(old_active, 1331 BOND_SLAVE_NOTIFY_NOW); 1332 1333 if (new_active) { 1334 bool should_notify_peers = false; 1335 1336 bond_set_slave_active_flags(new_active, 1337 BOND_SLAVE_NOTIFY_NOW); 1338 1339 if (bond->params.fail_over_mac) 1340 bond_do_fail_over_mac(bond, new_active, 1341 old_active); 1342 1343 if (netif_running(bond->dev)) { 1344 bond->send_peer_notif = 1345 bond->params.num_peer_notif * 1346 max(1, bond->params.peer_notif_delay); 1347 should_notify_peers = 1348 bond_should_notify_peers(bond); 1349 } 1350 1351 call_netdevice_notifiers(NETDEV_BONDING_FAILOVER, bond->dev); 1352 if (should_notify_peers) { 1353 bond->send_peer_notif--; 1354 call_netdevice_notifiers(NETDEV_NOTIFY_PEERS, 1355 bond->dev); 1356 } 1357 } 1358 } 1359 1360 #ifdef CONFIG_XFRM_OFFLOAD 1361 bond_ipsec_add_sa_all(bond); 1362 #endif /* CONFIG_XFRM_OFFLOAD */ 1363 1364 /* resend IGMP joins since active slave has changed or 1365 * all were sent on curr_active_slave. 1366 * resend only if bond is brought up with the affected 1367 * bonding modes and the retransmission is enabled 1368 */ 1369 if (netif_running(bond->dev) && (bond->params.resend_igmp > 0) && 1370 ((bond_uses_primary(bond) && new_active) || 1371 BOND_MODE(bond) == BOND_MODE_ROUNDROBIN)) { 1372 bond->igmp_retrans = bond->params.resend_igmp; 1373 queue_delayed_work(bond->wq, &bond->mcast_work, 1); 1374 } 1375 } 1376 1377 /** 1378 * bond_select_active_slave - select a new active slave, if needed 1379 * @bond: our bonding struct 1380 * 1381 * This functions should be called when one of the following occurs: 1382 * - The old curr_active_slave has been released or lost its link. 1383 * - The primary_slave has got its link back. 1384 * - A slave has got its link back and there's no old curr_active_slave. 1385 * 1386 * Caller must hold RTNL. 1387 */ 1388 void bond_select_active_slave(struct bonding *bond) 1389 { 1390 struct slave *best_slave; 1391 int rv; 1392 1393 ASSERT_RTNL(); 1394 1395 best_slave = bond_find_best_slave(bond); 1396 if (best_slave != rtnl_dereference(bond->curr_active_slave)) { 1397 bond_change_active_slave(bond, best_slave); 1398 rv = bond_set_carrier(bond); 1399 if (!rv) 1400 return; 1401 1402 if (netif_carrier_ok(bond->dev)) 1403 netdev_info(bond->dev, "active interface up!\n"); 1404 else 1405 netdev_info(bond->dev, "now running without any active interface!\n"); 1406 } 1407 } 1408 1409 #ifdef CONFIG_NET_POLL_CONTROLLER 1410 static inline int slave_enable_netpoll(struct slave *slave) 1411 { 1412 struct netpoll *np; 1413 int err = 0; 1414 1415 np = kzalloc(sizeof(*np), GFP_KERNEL); 1416 err = -ENOMEM; 1417 if (!np) 1418 goto out; 1419 1420 err = __netpoll_setup(np, slave->dev); 1421 if (err) { 1422 kfree(np); 1423 goto out; 1424 } 1425 slave->np = np; 1426 out: 1427 return err; 1428 } 1429 static inline void slave_disable_netpoll(struct slave *slave) 1430 { 1431 struct netpoll *np = slave->np; 1432 1433 if (!np) 1434 return; 1435 1436 slave->np = NULL; 1437 1438 __netpoll_free(np); 1439 } 1440 1441 static void bond_poll_controller(struct net_device *bond_dev) 1442 { 1443 struct bonding *bond = netdev_priv(bond_dev); 1444 struct slave *slave = NULL; 1445 struct list_head *iter; 1446 struct ad_info ad_info; 1447 1448 if (BOND_MODE(bond) == BOND_MODE_8023AD) 1449 if (bond_3ad_get_active_agg_info(bond, &ad_info)) 1450 return; 1451 1452 bond_for_each_slave_rcu(bond, slave, iter) { 1453 if (!bond_slave_is_up(slave)) 1454 continue; 1455 1456 if (BOND_MODE(bond) == BOND_MODE_8023AD) { 1457 struct aggregator *agg = 1458 SLAVE_AD_INFO(slave)->port.aggregator; 1459 1460 if (agg && 1461 agg->aggregator_identifier != ad_info.aggregator_id) 1462 continue; 1463 } 1464 1465 netpoll_poll_dev(slave->dev); 1466 } 1467 } 1468 1469 static void bond_netpoll_cleanup(struct net_device *bond_dev) 1470 { 1471 struct bonding *bond = netdev_priv(bond_dev); 1472 struct list_head *iter; 1473 struct slave *slave; 1474 1475 bond_for_each_slave(bond, slave, iter) 1476 if (bond_slave_is_up(slave)) 1477 slave_disable_netpoll(slave); 1478 } 1479 1480 static int bond_netpoll_setup(struct net_device *dev) 1481 { 1482 struct bonding *bond = netdev_priv(dev); 1483 struct list_head *iter; 1484 struct slave *slave; 1485 int err = 0; 1486 1487 bond_for_each_slave(bond, slave, iter) { 1488 err = slave_enable_netpoll(slave); 1489 if (err) { 1490 bond_netpoll_cleanup(dev); 1491 break; 1492 } 1493 } 1494 return err; 1495 } 1496 #else 1497 static inline int slave_enable_netpoll(struct slave *slave) 1498 { 1499 return 0; 1500 } 1501 static inline void slave_disable_netpoll(struct slave *slave) 1502 { 1503 } 1504 static void bond_netpoll_cleanup(struct net_device *bond_dev) 1505 { 1506 } 1507 #endif 1508 1509 /*---------------------------------- IOCTL ----------------------------------*/ 1510 1511 static netdev_features_t bond_fix_features(struct net_device *dev, 1512 netdev_features_t features) 1513 { 1514 struct bonding *bond = netdev_priv(dev); 1515 struct list_head *iter; 1516 netdev_features_t mask; 1517 struct slave *slave; 1518 1519 mask = features; 1520 features = netdev_base_features(features); 1521 1522 bond_for_each_slave(bond, slave, iter) { 1523 features = netdev_increment_features(features, 1524 slave->dev->features, 1525 mask); 1526 } 1527 features = netdev_add_tso_features(features, mask); 1528 1529 return features; 1530 } 1531 1532 #define BOND_VLAN_FEATURES (NETIF_F_HW_CSUM | NETIF_F_SG | \ 1533 NETIF_F_FRAGLIST | NETIF_F_GSO_SOFTWARE | \ 1534 NETIF_F_GSO_ENCAP_ALL | \ 1535 NETIF_F_HIGHDMA | NETIF_F_LRO) 1536 1537 #define BOND_ENC_FEATURES (NETIF_F_HW_CSUM | NETIF_F_SG | \ 1538 NETIF_F_RXCSUM | NETIF_F_GSO_SOFTWARE | \ 1539 NETIF_F_GSO_PARTIAL) 1540 1541 #define BOND_MPLS_FEATURES (NETIF_F_HW_CSUM | NETIF_F_SG | \ 1542 NETIF_F_GSO_SOFTWARE) 1543 1544 #define BOND_GSO_PARTIAL_FEATURES (NETIF_F_GSO_ESP) 1545 1546 1547 static void bond_compute_features(struct bonding *bond) 1548 { 1549 netdev_features_t gso_partial_features = BOND_GSO_PARTIAL_FEATURES; 1550 unsigned int dst_release_flag = IFF_XMIT_DST_RELEASE | 1551 IFF_XMIT_DST_RELEASE_PERM; 1552 netdev_features_t vlan_features = BOND_VLAN_FEATURES; 1553 netdev_features_t enc_features = BOND_ENC_FEATURES; 1554 #ifdef CONFIG_XFRM_OFFLOAD 1555 netdev_features_t xfrm_features = BOND_XFRM_FEATURES; 1556 #endif /* CONFIG_XFRM_OFFLOAD */ 1557 netdev_features_t mpls_features = BOND_MPLS_FEATURES; 1558 struct net_device *bond_dev = bond->dev; 1559 struct list_head *iter; 1560 struct slave *slave; 1561 unsigned short max_hard_header_len = ETH_HLEN; 1562 unsigned int tso_max_size = TSO_MAX_SIZE; 1563 u16 tso_max_segs = TSO_MAX_SEGS; 1564 1565 if (!bond_has_slaves(bond)) 1566 goto done; 1567 1568 vlan_features = netdev_base_features(vlan_features); 1569 mpls_features = netdev_base_features(mpls_features); 1570 1571 bond_for_each_slave(bond, slave, iter) { 1572 vlan_features = netdev_increment_features(vlan_features, 1573 slave->dev->vlan_features, BOND_VLAN_FEATURES); 1574 1575 enc_features = netdev_increment_features(enc_features, 1576 slave->dev->hw_enc_features, 1577 BOND_ENC_FEATURES); 1578 1579 #ifdef CONFIG_XFRM_OFFLOAD 1580 xfrm_features = netdev_increment_features(xfrm_features, 1581 slave->dev->hw_enc_features, 1582 BOND_XFRM_FEATURES); 1583 #endif /* CONFIG_XFRM_OFFLOAD */ 1584 1585 gso_partial_features = netdev_increment_features(gso_partial_features, 1586 slave->dev->gso_partial_features, 1587 BOND_GSO_PARTIAL_FEATURES); 1588 1589 mpls_features = netdev_increment_features(mpls_features, 1590 slave->dev->mpls_features, 1591 BOND_MPLS_FEATURES); 1592 1593 dst_release_flag &= slave->dev->priv_flags; 1594 if (slave->dev->hard_header_len > max_hard_header_len) 1595 max_hard_header_len = slave->dev->hard_header_len; 1596 1597 tso_max_size = min(tso_max_size, slave->dev->tso_max_size); 1598 tso_max_segs = min(tso_max_segs, slave->dev->tso_max_segs); 1599 } 1600 bond_dev->hard_header_len = max_hard_header_len; 1601 1602 done: 1603 bond_dev->gso_partial_features = gso_partial_features; 1604 bond_dev->vlan_features = vlan_features; 1605 bond_dev->hw_enc_features = enc_features | NETIF_F_GSO_ENCAP_ALL | 1606 NETIF_F_HW_VLAN_CTAG_TX | 1607 NETIF_F_HW_VLAN_STAG_TX; 1608 #ifdef CONFIG_XFRM_OFFLOAD 1609 bond_dev->hw_enc_features |= xfrm_features; 1610 #endif /* CONFIG_XFRM_OFFLOAD */ 1611 bond_dev->mpls_features = mpls_features; 1612 netif_set_tso_max_segs(bond_dev, tso_max_segs); 1613 netif_set_tso_max_size(bond_dev, tso_max_size); 1614 1615 bond_dev->priv_flags &= ~IFF_XMIT_DST_RELEASE; 1616 if ((bond_dev->priv_flags & IFF_XMIT_DST_RELEASE_PERM) && 1617 dst_release_flag == (IFF_XMIT_DST_RELEASE | IFF_XMIT_DST_RELEASE_PERM)) 1618 bond_dev->priv_flags |= IFF_XMIT_DST_RELEASE; 1619 1620 netdev_change_features(bond_dev); 1621 } 1622 1623 static void bond_setup_by_slave(struct net_device *bond_dev, 1624 struct net_device *slave_dev) 1625 { 1626 bool was_up = !!(bond_dev->flags & IFF_UP); 1627 1628 dev_close(bond_dev); 1629 1630 bond_dev->header_ops = slave_dev->header_ops; 1631 1632 bond_dev->type = slave_dev->type; 1633 bond_dev->hard_header_len = slave_dev->hard_header_len; 1634 bond_dev->needed_headroom = slave_dev->needed_headroom; 1635 bond_dev->addr_len = slave_dev->addr_len; 1636 1637 memcpy(bond_dev->broadcast, slave_dev->broadcast, 1638 slave_dev->addr_len); 1639 1640 if (slave_dev->flags & IFF_POINTOPOINT) { 1641 bond_dev->flags &= ~(IFF_BROADCAST | IFF_MULTICAST); 1642 bond_dev->flags |= (IFF_POINTOPOINT | IFF_NOARP); 1643 } 1644 if (was_up) 1645 dev_open(bond_dev, NULL); 1646 } 1647 1648 /* On bonding slaves other than the currently active slave, suppress 1649 * duplicates except for alb non-mcast/bcast. 1650 */ 1651 static bool bond_should_deliver_exact_match(struct sk_buff *skb, 1652 struct slave *slave, 1653 struct bonding *bond) 1654 { 1655 if (bond_is_slave_inactive(slave)) { 1656 if (BOND_MODE(bond) == BOND_MODE_ALB && 1657 skb->pkt_type != PACKET_BROADCAST && 1658 skb->pkt_type != PACKET_MULTICAST) 1659 return false; 1660 return true; 1661 } 1662 return false; 1663 } 1664 1665 static rx_handler_result_t bond_handle_frame(struct sk_buff **pskb) 1666 { 1667 struct sk_buff *skb = *pskb; 1668 struct slave *slave; 1669 struct bonding *bond; 1670 int (*recv_probe)(const struct sk_buff *, struct bonding *, 1671 struct slave *); 1672 int ret = RX_HANDLER_ANOTHER; 1673 1674 skb = skb_share_check(skb, GFP_ATOMIC); 1675 if (unlikely(!skb)) 1676 return RX_HANDLER_CONSUMED; 1677 1678 *pskb = skb; 1679 1680 slave = bond_slave_get_rcu(skb->dev); 1681 bond = slave->bond; 1682 1683 recv_probe = READ_ONCE(bond->recv_probe); 1684 if (recv_probe) { 1685 ret = recv_probe(skb, bond, slave); 1686 if (ret == RX_HANDLER_CONSUMED) { 1687 consume_skb(skb); 1688 return ret; 1689 } 1690 } 1691 1692 /* 1693 * For packets determined by bond_should_deliver_exact_match() call to 1694 * be suppressed we want to make an exception for link-local packets. 1695 * This is necessary for e.g. LLDP daemons to be able to monitor 1696 * inactive slave links without being forced to bind to them 1697 * explicitly. 1698 * 1699 * At the same time, packets that are passed to the bonding master 1700 * (including link-local ones) can have their originating interface 1701 * determined via PACKET_ORIGDEV socket option. 1702 */ 1703 if (bond_should_deliver_exact_match(skb, slave, bond)) { 1704 if (is_link_local_ether_addr(eth_hdr(skb)->h_dest)) 1705 return RX_HANDLER_PASS; 1706 return RX_HANDLER_EXACT; 1707 } 1708 1709 skb->dev = bond->dev; 1710 1711 if (BOND_MODE(bond) == BOND_MODE_ALB && 1712 netif_is_bridge_port(bond->dev) && 1713 skb->pkt_type == PACKET_HOST) { 1714 1715 if (unlikely(skb_cow_head(skb, 1716 skb->data - skb_mac_header(skb)))) { 1717 kfree_skb(skb); 1718 return RX_HANDLER_CONSUMED; 1719 } 1720 bond_hw_addr_copy(eth_hdr(skb)->h_dest, bond->dev->dev_addr, 1721 bond->dev->addr_len); 1722 } 1723 1724 return ret; 1725 } 1726 1727 static enum netdev_lag_tx_type bond_lag_tx_type(struct bonding *bond) 1728 { 1729 switch (BOND_MODE(bond)) { 1730 case BOND_MODE_ROUNDROBIN: 1731 return NETDEV_LAG_TX_TYPE_ROUNDROBIN; 1732 case BOND_MODE_ACTIVEBACKUP: 1733 return NETDEV_LAG_TX_TYPE_ACTIVEBACKUP; 1734 case BOND_MODE_BROADCAST: 1735 return NETDEV_LAG_TX_TYPE_BROADCAST; 1736 case BOND_MODE_XOR: 1737 case BOND_MODE_8023AD: 1738 return NETDEV_LAG_TX_TYPE_HASH; 1739 default: 1740 return NETDEV_LAG_TX_TYPE_UNKNOWN; 1741 } 1742 } 1743 1744 static enum netdev_lag_hash bond_lag_hash_type(struct bonding *bond, 1745 enum netdev_lag_tx_type type) 1746 { 1747 if (type != NETDEV_LAG_TX_TYPE_HASH) 1748 return NETDEV_LAG_HASH_NONE; 1749 1750 switch (bond->params.xmit_policy) { 1751 case BOND_XMIT_POLICY_LAYER2: 1752 return NETDEV_LAG_HASH_L2; 1753 case BOND_XMIT_POLICY_LAYER34: 1754 return NETDEV_LAG_HASH_L34; 1755 case BOND_XMIT_POLICY_LAYER23: 1756 return NETDEV_LAG_HASH_L23; 1757 case BOND_XMIT_POLICY_ENCAP23: 1758 return NETDEV_LAG_HASH_E23; 1759 case BOND_XMIT_POLICY_ENCAP34: 1760 return NETDEV_LAG_HASH_E34; 1761 case BOND_XMIT_POLICY_VLAN_SRCMAC: 1762 return NETDEV_LAG_HASH_VLAN_SRCMAC; 1763 default: 1764 return NETDEV_LAG_HASH_UNKNOWN; 1765 } 1766 } 1767 1768 static int bond_master_upper_dev_link(struct bonding *bond, struct slave *slave, 1769 struct netlink_ext_ack *extack) 1770 { 1771 struct netdev_lag_upper_info lag_upper_info; 1772 enum netdev_lag_tx_type type; 1773 int err; 1774 1775 type = bond_lag_tx_type(bond); 1776 lag_upper_info.tx_type = type; 1777 lag_upper_info.hash_type = bond_lag_hash_type(bond, type); 1778 1779 err = netdev_master_upper_dev_link(slave->dev, bond->dev, slave, 1780 &lag_upper_info, extack); 1781 if (err) 1782 return err; 1783 1784 slave->dev->flags |= IFF_SLAVE; 1785 return 0; 1786 } 1787 1788 static void bond_upper_dev_unlink(struct bonding *bond, struct slave *slave) 1789 { 1790 netdev_upper_dev_unlink(slave->dev, bond->dev); 1791 slave->dev->flags &= ~IFF_SLAVE; 1792 } 1793 1794 static void slave_kobj_release(struct kobject *kobj) 1795 { 1796 struct slave *slave = to_slave(kobj); 1797 struct bonding *bond = bond_get_bond_by_slave(slave); 1798 1799 cancel_delayed_work_sync(&slave->notify_work); 1800 if (BOND_MODE(bond) == BOND_MODE_8023AD) 1801 kfree(SLAVE_AD_INFO(slave)); 1802 1803 kfree(slave); 1804 } 1805 1806 static struct kobj_type slave_ktype = { 1807 .release = slave_kobj_release, 1808 #ifdef CONFIG_SYSFS 1809 .sysfs_ops = &slave_sysfs_ops, 1810 #endif 1811 }; 1812 1813 static int bond_kobj_init(struct slave *slave) 1814 { 1815 int err; 1816 1817 err = kobject_init_and_add(&slave->kobj, &slave_ktype, 1818 &(slave->dev->dev.kobj), "bonding_slave"); 1819 if (err) 1820 kobject_put(&slave->kobj); 1821 1822 return err; 1823 } 1824 1825 static struct slave *bond_alloc_slave(struct bonding *bond, 1826 struct net_device *slave_dev) 1827 { 1828 struct slave *slave = NULL; 1829 1830 slave = kzalloc(sizeof(*slave), GFP_KERNEL); 1831 if (!slave) 1832 return NULL; 1833 1834 slave->bond = bond; 1835 slave->dev = slave_dev; 1836 INIT_DELAYED_WORK(&slave->notify_work, bond_netdev_notify_work); 1837 1838 if (bond_kobj_init(slave)) 1839 return NULL; 1840 1841 if (BOND_MODE(bond) == BOND_MODE_8023AD) { 1842 SLAVE_AD_INFO(slave) = kzalloc(sizeof(struct ad_slave_info), 1843 GFP_KERNEL); 1844 if (!SLAVE_AD_INFO(slave)) { 1845 kobject_put(&slave->kobj); 1846 return NULL; 1847 } 1848 } 1849 1850 return slave; 1851 } 1852 1853 static void bond_fill_ifbond(struct bonding *bond, struct ifbond *info) 1854 { 1855 info->bond_mode = BOND_MODE(bond); 1856 info->miimon = bond->params.miimon; 1857 info->num_slaves = bond->slave_cnt; 1858 } 1859 1860 static void bond_fill_ifslave(struct slave *slave, struct ifslave *info) 1861 { 1862 strcpy(info->slave_name, slave->dev->name); 1863 info->link = slave->link; 1864 info->state = bond_slave_state(slave); 1865 info->link_failure_count = slave->link_failure_count; 1866 } 1867 1868 static void bond_netdev_notify_work(struct work_struct *_work) 1869 { 1870 struct slave *slave = container_of(_work, struct slave, 1871 notify_work.work); 1872 1873 if (rtnl_trylock()) { 1874 struct netdev_bonding_info binfo; 1875 1876 bond_fill_ifslave(slave, &binfo.slave); 1877 bond_fill_ifbond(slave->bond, &binfo.master); 1878 netdev_bonding_info_change(slave->dev, &binfo); 1879 rtnl_unlock(); 1880 } else { 1881 queue_delayed_work(slave->bond->wq, &slave->notify_work, 1); 1882 } 1883 } 1884 1885 void bond_queue_slave_event(struct slave *slave) 1886 { 1887 queue_delayed_work(slave->bond->wq, &slave->notify_work, 0); 1888 } 1889 1890 void bond_lower_state_changed(struct slave *slave) 1891 { 1892 struct netdev_lag_lower_state_info info; 1893 1894 info.link_up = slave->link == BOND_LINK_UP || 1895 slave->link == BOND_LINK_FAIL; 1896 info.tx_enabled = bond_is_active_slave(slave); 1897 netdev_lower_state_changed(slave->dev, &info); 1898 } 1899 1900 #define BOND_NL_ERR(bond_dev, extack, errmsg) do { \ 1901 if (extack) \ 1902 NL_SET_ERR_MSG(extack, errmsg); \ 1903 else \ 1904 netdev_err(bond_dev, "Error: %s\n", errmsg); \ 1905 } while (0) 1906 1907 #define SLAVE_NL_ERR(bond_dev, slave_dev, extack, errmsg) do { \ 1908 if (extack) \ 1909 NL_SET_ERR_MSG(extack, errmsg); \ 1910 else \ 1911 slave_err(bond_dev, slave_dev, "Error: %s\n", errmsg); \ 1912 } while (0) 1913 1914 /* The bonding driver uses ether_setup() to convert a master bond device 1915 * to ARPHRD_ETHER, that resets the target netdevice's flags so we always 1916 * have to restore the IFF_MASTER flag, and only restore IFF_SLAVE and IFF_UP 1917 * if they were set 1918 */ 1919 static void bond_ether_setup(struct net_device *bond_dev) 1920 { 1921 unsigned int flags = bond_dev->flags & (IFF_SLAVE | IFF_UP); 1922 1923 ether_setup(bond_dev); 1924 bond_dev->flags |= IFF_MASTER | flags; 1925 bond_dev->priv_flags &= ~IFF_TX_SKB_SHARING; 1926 } 1927 1928 void bond_xdp_set_features(struct net_device *bond_dev) 1929 { 1930 struct bonding *bond = netdev_priv(bond_dev); 1931 xdp_features_t val = NETDEV_XDP_ACT_MASK; 1932 struct list_head *iter; 1933 struct slave *slave; 1934 1935 ASSERT_RTNL(); 1936 1937 if (!bond_xdp_check(bond) || !bond_has_slaves(bond)) { 1938 xdp_clear_features_flag(bond_dev); 1939 return; 1940 } 1941 1942 bond_for_each_slave(bond, slave, iter) 1943 val &= slave->dev->xdp_features; 1944 1945 val &= ~NETDEV_XDP_ACT_XSK_ZEROCOPY; 1946 1947 xdp_set_features_flag(bond_dev, val); 1948 } 1949 1950 /* enslave device <slave> to bond device <master> */ 1951 int bond_enslave(struct net_device *bond_dev, struct net_device *slave_dev, 1952 struct netlink_ext_ack *extack) 1953 { 1954 struct bonding *bond = netdev_priv(bond_dev); 1955 const struct net_device_ops *slave_ops = slave_dev->netdev_ops; 1956 struct slave *new_slave = NULL, *prev_slave; 1957 struct sockaddr_storage ss; 1958 int link_reporting; 1959 int res = 0, i; 1960 1961 if (slave_dev->flags & IFF_MASTER && 1962 !netif_is_bond_master(slave_dev)) { 1963 BOND_NL_ERR(bond_dev, extack, 1964 "Device type (master device) cannot be enslaved"); 1965 return -EPERM; 1966 } 1967 1968 if (!bond->params.use_carrier && 1969 slave_dev->ethtool_ops->get_link == NULL && 1970 slave_ops->ndo_eth_ioctl == NULL) { 1971 slave_warn(bond_dev, slave_dev, "no link monitoring support\n"); 1972 } 1973 1974 /* already in-use? */ 1975 if (netdev_is_rx_handler_busy(slave_dev)) { 1976 SLAVE_NL_ERR(bond_dev, slave_dev, extack, 1977 "Device is in use and cannot be enslaved"); 1978 return -EBUSY; 1979 } 1980 1981 if (bond_dev == slave_dev) { 1982 BOND_NL_ERR(bond_dev, extack, "Cannot enslave bond to itself."); 1983 return -EPERM; 1984 } 1985 1986 /* vlan challenged mutual exclusion */ 1987 /* no need to lock since we're protected by rtnl_lock */ 1988 if (slave_dev->features & NETIF_F_VLAN_CHALLENGED) { 1989 slave_dbg(bond_dev, slave_dev, "is NETIF_F_VLAN_CHALLENGED\n"); 1990 if (vlan_uses_dev(bond_dev)) { 1991 SLAVE_NL_ERR(bond_dev, slave_dev, extack, 1992 "Can not enslave VLAN challenged device to VLAN enabled bond"); 1993 return -EPERM; 1994 } else { 1995 slave_warn(bond_dev, slave_dev, "enslaved VLAN challenged slave. Adding VLANs will be blocked as long as it is part of bond.\n"); 1996 } 1997 } else { 1998 slave_dbg(bond_dev, slave_dev, "is !NETIF_F_VLAN_CHALLENGED\n"); 1999 } 2000 2001 if (slave_dev->features & NETIF_F_HW_ESP) 2002 slave_dbg(bond_dev, slave_dev, "is esp-hw-offload capable\n"); 2003 2004 /* Old ifenslave binaries are no longer supported. These can 2005 * be identified with moderate accuracy by the state of the slave: 2006 * the current ifenslave will set the interface down prior to 2007 * enslaving it; the old ifenslave will not. 2008 */ 2009 if (slave_dev->flags & IFF_UP) { 2010 SLAVE_NL_ERR(bond_dev, slave_dev, extack, 2011 "Device can not be enslaved while up"); 2012 return -EPERM; 2013 } 2014 2015 /* set bonding device ether type by slave - bonding netdevices are 2016 * created with ether_setup, so when the slave type is not ARPHRD_ETHER 2017 * there is a need to override some of the type dependent attribs/funcs. 2018 * 2019 * bond ether type mutual exclusion - don't allow slaves of dissimilar 2020 * ether type (eg ARPHRD_ETHER and ARPHRD_INFINIBAND) share the same bond 2021 */ 2022 if (!bond_has_slaves(bond)) { 2023 if (bond_dev->type != slave_dev->type) { 2024 slave_dbg(bond_dev, slave_dev, "change device type from %d to %d\n", 2025 bond_dev->type, slave_dev->type); 2026 2027 res = call_netdevice_notifiers(NETDEV_PRE_TYPE_CHANGE, 2028 bond_dev); 2029 res = notifier_to_errno(res); 2030 if (res) { 2031 slave_err(bond_dev, slave_dev, "refused to change device type\n"); 2032 return -EBUSY; 2033 } 2034 2035 /* Flush unicast and multicast addresses */ 2036 dev_uc_flush(bond_dev); 2037 dev_mc_flush(bond_dev); 2038 2039 if (slave_dev->type != ARPHRD_ETHER) 2040 bond_setup_by_slave(bond_dev, slave_dev); 2041 else 2042 bond_ether_setup(bond_dev); 2043 2044 call_netdevice_notifiers(NETDEV_POST_TYPE_CHANGE, 2045 bond_dev); 2046 } 2047 } else if (bond_dev->type != slave_dev->type) { 2048 SLAVE_NL_ERR(bond_dev, slave_dev, extack, 2049 "Device type is different from other slaves"); 2050 return -EINVAL; 2051 } 2052 2053 if (slave_dev->type == ARPHRD_INFINIBAND && 2054 BOND_MODE(bond) != BOND_MODE_ACTIVEBACKUP) { 2055 SLAVE_NL_ERR(bond_dev, slave_dev, extack, 2056 "Only active-backup mode is supported for infiniband slaves"); 2057 res = -EOPNOTSUPP; 2058 goto err_undo_flags; 2059 } 2060 2061 if (!slave_ops->ndo_set_mac_address || 2062 slave_dev->type == ARPHRD_INFINIBAND) { 2063 slave_warn(bond_dev, slave_dev, "The slave device specified does not support setting the MAC address\n"); 2064 if (BOND_MODE(bond) == BOND_MODE_ACTIVEBACKUP && 2065 bond->params.fail_over_mac != BOND_FOM_ACTIVE) { 2066 if (!bond_has_slaves(bond)) { 2067 bond->params.fail_over_mac = BOND_FOM_ACTIVE; 2068 slave_warn(bond_dev, slave_dev, "Setting fail_over_mac to active for active-backup mode\n"); 2069 } else { 2070 SLAVE_NL_ERR(bond_dev, slave_dev, extack, 2071 "Slave device does not support setting the MAC address, but fail_over_mac is not set to active"); 2072 res = -EOPNOTSUPP; 2073 goto err_undo_flags; 2074 } 2075 } 2076 } 2077 2078 call_netdevice_notifiers(NETDEV_JOIN, slave_dev); 2079 2080 /* If this is the first slave, then we need to set the master's hardware 2081 * address to be the same as the slave's. 2082 */ 2083 if (!bond_has_slaves(bond) && 2084 bond->dev->addr_assign_type == NET_ADDR_RANDOM) { 2085 res = bond_set_dev_addr(bond->dev, slave_dev); 2086 if (res) 2087 goto err_undo_flags; 2088 } 2089 2090 new_slave = bond_alloc_slave(bond, slave_dev); 2091 if (!new_slave) { 2092 res = -ENOMEM; 2093 goto err_undo_flags; 2094 } 2095 2096 /* Set the new_slave's queue_id to be zero. Queue ID mapping 2097 * is set via sysfs or module option if desired. 2098 */ 2099 new_slave->queue_id = 0; 2100 2101 /* Save slave's original mtu and then set it to match the bond */ 2102 new_slave->original_mtu = slave_dev->mtu; 2103 res = dev_set_mtu(slave_dev, bond->dev->mtu); 2104 if (res) { 2105 slave_err(bond_dev, slave_dev, "Error %d calling dev_set_mtu\n", res); 2106 goto err_free; 2107 } 2108 2109 /* Save slave's original ("permanent") mac address for modes 2110 * that need it, and for restoring it upon release, and then 2111 * set it to the master's address 2112 */ 2113 bond_hw_addr_copy(new_slave->perm_hwaddr, slave_dev->dev_addr, 2114 slave_dev->addr_len); 2115 2116 if (!bond->params.fail_over_mac || 2117 BOND_MODE(bond) != BOND_MODE_ACTIVEBACKUP) { 2118 /* Set slave to master's mac address. The application already 2119 * set the master's mac address to that of the first slave 2120 */ 2121 memcpy(ss.__data, bond_dev->dev_addr, bond_dev->addr_len); 2122 ss.ss_family = slave_dev->type; 2123 res = dev_set_mac_address(slave_dev, (struct sockaddr *)&ss, 2124 extack); 2125 if (res) { 2126 slave_err(bond_dev, slave_dev, "Error %d calling set_mac_address\n", res); 2127 goto err_restore_mtu; 2128 } 2129 } 2130 2131 /* set no_addrconf flag before open to prevent IPv6 addrconf */ 2132 slave_dev->priv_flags |= IFF_NO_ADDRCONF; 2133 2134 /* open the slave since the application closed it */ 2135 res = dev_open(slave_dev, extack); 2136 if (res) { 2137 slave_err(bond_dev, slave_dev, "Opening slave failed\n"); 2138 goto err_restore_mac; 2139 } 2140 2141 slave_dev->priv_flags |= IFF_BONDING; 2142 /* initialize slave stats */ 2143 dev_get_stats(new_slave->dev, &new_slave->slave_stats); 2144 2145 if (bond_is_lb(bond)) { 2146 /* bond_alb_init_slave() must be called before all other stages since 2147 * it might fail and we do not want to have to undo everything 2148 */ 2149 res = bond_alb_init_slave(bond, new_slave); 2150 if (res) 2151 goto err_close; 2152 } 2153 2154 res = vlan_vids_add_by_dev(slave_dev, bond_dev); 2155 if (res) { 2156 slave_err(bond_dev, slave_dev, "Couldn't add bond vlan ids\n"); 2157 goto err_close; 2158 } 2159 2160 prev_slave = bond_last_slave(bond); 2161 2162 new_slave->delay = 0; 2163 new_slave->link_failure_count = 0; 2164 2165 if (bond_update_speed_duplex(new_slave) && 2166 bond_needs_speed_duplex(bond)) 2167 new_slave->link = BOND_LINK_DOWN; 2168 2169 new_slave->last_rx = jiffies - 2170 (msecs_to_jiffies(bond->params.arp_interval) + 1); 2171 for (i = 0; i < BOND_MAX_ARP_TARGETS; i++) 2172 new_slave->target_last_arp_rx[i] = new_slave->last_rx; 2173 2174 new_slave->last_tx = new_slave->last_rx; 2175 2176 if (bond->params.miimon && !bond->params.use_carrier) { 2177 link_reporting = bond_check_dev_link(bond, slave_dev, 1); 2178 2179 if ((link_reporting == -1) && !bond->params.arp_interval) { 2180 /* miimon is set but a bonded network driver 2181 * does not support ETHTOOL/MII and 2182 * arp_interval is not set. Note: if 2183 * use_carrier is enabled, we will never go 2184 * here (because netif_carrier is always 2185 * supported); thus, we don't need to change 2186 * the messages for netif_carrier. 2187 */ 2188 slave_warn(bond_dev, slave_dev, "MII and ETHTOOL support not available for slave, and arp_interval/arp_ip_target module parameters not specified, thus bonding will not detect link failures! see bonding.txt for details\n"); 2189 } else if (link_reporting == -1) { 2190 /* unable get link status using mii/ethtool */ 2191 slave_warn(bond_dev, slave_dev, "can't get link status from slave; the network driver associated with this interface does not support MII or ETHTOOL link status reporting, thus miimon has no effect on this interface\n"); 2192 } 2193 } 2194 2195 /* check for initial state */ 2196 new_slave->link = BOND_LINK_NOCHANGE; 2197 if (bond->params.miimon) { 2198 if (bond_check_dev_link(bond, slave_dev, 0) == BMSR_LSTATUS) { 2199 if (bond->params.updelay) { 2200 bond_set_slave_link_state(new_slave, 2201 BOND_LINK_BACK, 2202 BOND_SLAVE_NOTIFY_NOW); 2203 new_slave->delay = bond->params.updelay; 2204 } else { 2205 bond_set_slave_link_state(new_slave, 2206 BOND_LINK_UP, 2207 BOND_SLAVE_NOTIFY_NOW); 2208 } 2209 } else { 2210 bond_set_slave_link_state(new_slave, BOND_LINK_DOWN, 2211 BOND_SLAVE_NOTIFY_NOW); 2212 } 2213 } else if (bond->params.arp_interval) { 2214 bond_set_slave_link_state(new_slave, 2215 (netif_carrier_ok(slave_dev) ? 2216 BOND_LINK_UP : BOND_LINK_DOWN), 2217 BOND_SLAVE_NOTIFY_NOW); 2218 } else { 2219 bond_set_slave_link_state(new_slave, BOND_LINK_UP, 2220 BOND_SLAVE_NOTIFY_NOW); 2221 } 2222 2223 if (new_slave->link != BOND_LINK_DOWN) 2224 new_slave->last_link_up = jiffies; 2225 slave_dbg(bond_dev, slave_dev, "Initial state of slave is BOND_LINK_%s\n", 2226 new_slave->link == BOND_LINK_DOWN ? "DOWN" : 2227 (new_slave->link == BOND_LINK_UP ? "UP" : "BACK")); 2228 2229 if (bond_uses_primary(bond) && bond->params.primary[0]) { 2230 /* if there is a primary slave, remember it */ 2231 if (strcmp(bond->params.primary, new_slave->dev->name) == 0) { 2232 rcu_assign_pointer(bond->primary_slave, new_slave); 2233 bond->force_primary = true; 2234 } 2235 } 2236 2237 switch (BOND_MODE(bond)) { 2238 case BOND_MODE_ACTIVEBACKUP: 2239 bond_set_slave_inactive_flags(new_slave, 2240 BOND_SLAVE_NOTIFY_NOW); 2241 break; 2242 case BOND_MODE_8023AD: 2243 /* in 802.3ad mode, the internal mechanism 2244 * will activate the slaves in the selected 2245 * aggregator 2246 */ 2247 bond_set_slave_inactive_flags(new_slave, BOND_SLAVE_NOTIFY_NOW); 2248 /* if this is the first slave */ 2249 if (!prev_slave) { 2250 SLAVE_AD_INFO(new_slave)->id = 1; 2251 /* Initialize AD with the number of times that the AD timer is called in 1 second 2252 * can be called only after the mac address of the bond is set 2253 */ 2254 bond_3ad_initialize(bond); 2255 } else { 2256 SLAVE_AD_INFO(new_slave)->id = 2257 SLAVE_AD_INFO(prev_slave)->id + 1; 2258 } 2259 2260 bond_3ad_bind_slave(new_slave); 2261 break; 2262 case BOND_MODE_TLB: 2263 case BOND_MODE_ALB: 2264 bond_set_active_slave(new_slave); 2265 bond_set_slave_inactive_flags(new_slave, BOND_SLAVE_NOTIFY_NOW); 2266 break; 2267 default: 2268 slave_dbg(bond_dev, slave_dev, "This slave is always active in trunk mode\n"); 2269 2270 /* always active in trunk mode */ 2271 bond_set_active_slave(new_slave); 2272 2273 /* In trunking mode there is little meaning to curr_active_slave 2274 * anyway (it holds no special properties of the bond device), 2275 * so we can change it without calling change_active_interface() 2276 */ 2277 if (!rcu_access_pointer(bond->curr_active_slave) && 2278 new_slave->link == BOND_LINK_UP) 2279 rcu_assign_pointer(bond->curr_active_slave, new_slave); 2280 2281 break; 2282 } /* switch(bond_mode) */ 2283 2284 #ifdef CONFIG_NET_POLL_CONTROLLER 2285 if (bond->dev->npinfo) { 2286 if (slave_enable_netpoll(new_slave)) { 2287 slave_info(bond_dev, slave_dev, "master_dev is using netpoll, but new slave device does not support netpoll\n"); 2288 res = -EBUSY; 2289 goto err_detach; 2290 } 2291 } 2292 #endif 2293 2294 if (!(bond_dev->features & NETIF_F_LRO)) 2295 dev_disable_lro(slave_dev); 2296 2297 res = netdev_rx_handler_register(slave_dev, bond_handle_frame, 2298 new_slave); 2299 if (res) { 2300 slave_dbg(bond_dev, slave_dev, "Error %d calling netdev_rx_handler_register\n", res); 2301 goto err_detach; 2302 } 2303 2304 res = bond_master_upper_dev_link(bond, new_slave, extack); 2305 if (res) { 2306 slave_dbg(bond_dev, slave_dev, "Error %d calling bond_master_upper_dev_link\n", res); 2307 goto err_unregister; 2308 } 2309 2310 bond_lower_state_changed(new_slave); 2311 2312 res = bond_sysfs_slave_add(new_slave); 2313 if (res) { 2314 slave_dbg(bond_dev, slave_dev, "Error %d calling bond_sysfs_slave_add\n", res); 2315 goto err_upper_unlink; 2316 } 2317 2318 /* If the mode uses primary, then the following is handled by 2319 * bond_change_active_slave(). 2320 */ 2321 if (!bond_uses_primary(bond)) { 2322 /* set promiscuity level to new slave */ 2323 if (bond_dev->flags & IFF_PROMISC) { 2324 res = dev_set_promiscuity(slave_dev, 1); 2325 if (res) 2326 goto err_sysfs_del; 2327 } 2328 2329 /* set allmulti level to new slave */ 2330 if (bond_dev->flags & IFF_ALLMULTI) { 2331 res = dev_set_allmulti(slave_dev, 1); 2332 if (res) { 2333 if (bond_dev->flags & IFF_PROMISC) 2334 dev_set_promiscuity(slave_dev, -1); 2335 goto err_sysfs_del; 2336 } 2337 } 2338 2339 if (bond_dev->flags & IFF_UP) { 2340 netif_addr_lock_bh(bond_dev); 2341 dev_mc_sync_multiple(slave_dev, bond_dev); 2342 dev_uc_sync_multiple(slave_dev, bond_dev); 2343 netif_addr_unlock_bh(bond_dev); 2344 2345 if (BOND_MODE(bond) == BOND_MODE_8023AD) 2346 dev_mc_add(slave_dev, lacpdu_mcast_addr); 2347 } 2348 } 2349 2350 bond->slave_cnt++; 2351 bond_compute_features(bond); 2352 bond_set_carrier(bond); 2353 2354 /* Needs to be called before bond_select_active_slave(), which will 2355 * remove the maddrs if the slave is selected as active slave. 2356 */ 2357 bond_slave_ns_maddrs_add(bond, new_slave); 2358 2359 if (bond_uses_primary(bond)) { 2360 block_netpoll_tx(); 2361 bond_select_active_slave(bond); 2362 unblock_netpoll_tx(); 2363 } 2364 2365 if (bond_mode_can_use_xmit_hash(bond)) 2366 bond_update_slave_arr(bond, NULL); 2367 2368 if (!slave_dev->netdev_ops->ndo_bpf || 2369 !slave_dev->netdev_ops->ndo_xdp_xmit) { 2370 if (bond->xdp_prog) { 2371 SLAVE_NL_ERR(bond_dev, slave_dev, extack, 2372 "Slave does not support XDP"); 2373 res = -EOPNOTSUPP; 2374 goto err_sysfs_del; 2375 } 2376 } else if (bond->xdp_prog) { 2377 struct netdev_bpf xdp = { 2378 .command = XDP_SETUP_PROG, 2379 .flags = 0, 2380 .prog = bond->xdp_prog, 2381 .extack = extack, 2382 }; 2383 2384 if (dev_xdp_prog_count(slave_dev) > 0) { 2385 SLAVE_NL_ERR(bond_dev, slave_dev, extack, 2386 "Slave has XDP program loaded, please unload before enslaving"); 2387 res = -EOPNOTSUPP; 2388 goto err_sysfs_del; 2389 } 2390 2391 res = dev_xdp_propagate(slave_dev, &xdp); 2392 if (res < 0) { 2393 /* ndo_bpf() sets extack error message */ 2394 slave_dbg(bond_dev, slave_dev, "Error %d calling ndo_bpf\n", res); 2395 goto err_sysfs_del; 2396 } 2397 if (bond->xdp_prog) 2398 bpf_prog_inc(bond->xdp_prog); 2399 } 2400 2401 bond_xdp_set_features(bond_dev); 2402 2403 slave_info(bond_dev, slave_dev, "Enslaving as %s interface with %s link\n", 2404 bond_is_active_slave(new_slave) ? "an active" : "a backup", 2405 new_slave->link != BOND_LINK_DOWN ? "an up" : "a down"); 2406 2407 /* enslave is successful */ 2408 bond_queue_slave_event(new_slave); 2409 return 0; 2410 2411 /* Undo stages on error */ 2412 err_sysfs_del: 2413 bond_sysfs_slave_del(new_slave); 2414 2415 err_upper_unlink: 2416 bond_upper_dev_unlink(bond, new_slave); 2417 2418 err_unregister: 2419 netdev_rx_handler_unregister(slave_dev); 2420 2421 err_detach: 2422 vlan_vids_del_by_dev(slave_dev, bond_dev); 2423 if (rcu_access_pointer(bond->primary_slave) == new_slave) 2424 RCU_INIT_POINTER(bond->primary_slave, NULL); 2425 if (rcu_access_pointer(bond->curr_active_slave) == new_slave) { 2426 block_netpoll_tx(); 2427 bond_change_active_slave(bond, NULL); 2428 bond_select_active_slave(bond); 2429 unblock_netpoll_tx(); 2430 } 2431 /* either primary_slave or curr_active_slave might've changed */ 2432 synchronize_rcu(); 2433 slave_disable_netpoll(new_slave); 2434 2435 err_close: 2436 if (!netif_is_bond_master(slave_dev)) 2437 slave_dev->priv_flags &= ~IFF_BONDING; 2438 dev_close(slave_dev); 2439 2440 err_restore_mac: 2441 slave_dev->priv_flags &= ~IFF_NO_ADDRCONF; 2442 if (!bond->params.fail_over_mac || 2443 BOND_MODE(bond) != BOND_MODE_ACTIVEBACKUP) { 2444 /* XXX TODO - fom follow mode needs to change master's 2445 * MAC if this slave's MAC is in use by the bond, or at 2446 * least print a warning. 2447 */ 2448 bond_hw_addr_copy(ss.__data, new_slave->perm_hwaddr, 2449 new_slave->dev->addr_len); 2450 ss.ss_family = slave_dev->type; 2451 dev_set_mac_address(slave_dev, (struct sockaddr *)&ss, NULL); 2452 } 2453 2454 err_restore_mtu: 2455 dev_set_mtu(slave_dev, new_slave->original_mtu); 2456 2457 err_free: 2458 kobject_put(&new_slave->kobj); 2459 2460 err_undo_flags: 2461 /* Enslave of first slave has failed and we need to fix master's mac */ 2462 if (!bond_has_slaves(bond)) { 2463 if (ether_addr_equal_64bits(bond_dev->dev_addr, 2464 slave_dev->dev_addr)) 2465 eth_hw_addr_random(bond_dev); 2466 if (bond_dev->type != ARPHRD_ETHER) { 2467 dev_close(bond_dev); 2468 bond_ether_setup(bond_dev); 2469 } 2470 } 2471 2472 return res; 2473 } 2474 2475 /* Try to release the slave device <slave> from the bond device <master> 2476 * It is legal to access curr_active_slave without a lock because all the function 2477 * is RTNL-locked. If "all" is true it means that the function is being called 2478 * while destroying a bond interface and all slaves are being released. 2479 * 2480 * The rules for slave state should be: 2481 * for Active/Backup: 2482 * Active stays on all backups go down 2483 * for Bonded connections: 2484 * The first up interface should be left on and all others downed. 2485 */ 2486 static int __bond_release_one(struct net_device *bond_dev, 2487 struct net_device *slave_dev, 2488 bool all, bool unregister) 2489 { 2490 struct bonding *bond = netdev_priv(bond_dev); 2491 struct slave *slave, *oldcurrent; 2492 struct sockaddr_storage ss; 2493 int old_flags = bond_dev->flags; 2494 netdev_features_t old_features = bond_dev->features; 2495 2496 /* slave is not a slave or master is not master of this slave */ 2497 if (!(slave_dev->flags & IFF_SLAVE) || 2498 !netdev_has_upper_dev(slave_dev, bond_dev)) { 2499 slave_dbg(bond_dev, slave_dev, "cannot release slave\n"); 2500 return -EINVAL; 2501 } 2502 2503 block_netpoll_tx(); 2504 2505 slave = bond_get_slave_by_dev(bond, slave_dev); 2506 if (!slave) { 2507 /* not a slave of this bond */ 2508 slave_info(bond_dev, slave_dev, "interface not enslaved\n"); 2509 unblock_netpoll_tx(); 2510 return -EINVAL; 2511 } 2512 2513 bond_set_slave_inactive_flags(slave, BOND_SLAVE_NOTIFY_NOW); 2514 2515 bond_sysfs_slave_del(slave); 2516 2517 /* recompute stats just before removing the slave */ 2518 bond_get_stats(bond->dev, &bond->bond_stats); 2519 2520 if (bond->xdp_prog) { 2521 struct netdev_bpf xdp = { 2522 .command = XDP_SETUP_PROG, 2523 .flags = 0, 2524 .prog = NULL, 2525 .extack = NULL, 2526 }; 2527 if (dev_xdp_propagate(slave_dev, &xdp)) 2528 slave_warn(bond_dev, slave_dev, "failed to unload XDP program\n"); 2529 } 2530 2531 /* unregister rx_handler early so bond_handle_frame wouldn't be called 2532 * for this slave anymore. 2533 */ 2534 netdev_rx_handler_unregister(slave_dev); 2535 2536 if (BOND_MODE(bond) == BOND_MODE_8023AD) 2537 bond_3ad_unbind_slave(slave); 2538 2539 bond_upper_dev_unlink(bond, slave); 2540 2541 if (bond_mode_can_use_xmit_hash(bond)) 2542 bond_update_slave_arr(bond, slave); 2543 2544 slave_info(bond_dev, slave_dev, "Releasing %s interface\n", 2545 bond_is_active_slave(slave) ? "active" : "backup"); 2546 2547 oldcurrent = rcu_access_pointer(bond->curr_active_slave); 2548 2549 RCU_INIT_POINTER(bond->current_arp_slave, NULL); 2550 2551 if (!all && (!bond->params.fail_over_mac || 2552 BOND_MODE(bond) != BOND_MODE_ACTIVEBACKUP)) { 2553 if (ether_addr_equal_64bits(bond_dev->dev_addr, slave->perm_hwaddr) && 2554 bond_has_slaves(bond)) 2555 slave_warn(bond_dev, slave_dev, "the permanent HWaddr of slave - %pM - is still in use by bond - set the HWaddr of slave to a different address to avoid conflicts\n", 2556 slave->perm_hwaddr); 2557 } 2558 2559 if (rtnl_dereference(bond->primary_slave) == slave) 2560 RCU_INIT_POINTER(bond->primary_slave, NULL); 2561 2562 if (oldcurrent == slave) 2563 bond_change_active_slave(bond, NULL); 2564 2565 /* Must be called after bond_change_active_slave () as the slave 2566 * might change from an active slave to a backup slave. Then it is 2567 * necessary to clear the maddrs on the backup slave. 2568 */ 2569 bond_slave_ns_maddrs_del(bond, slave); 2570 2571 if (bond_is_lb(bond)) { 2572 /* Must be called only after the slave has been 2573 * detached from the list and the curr_active_slave 2574 * has been cleared (if our_slave == old_current), 2575 * but before a new active slave is selected. 2576 */ 2577 bond_alb_deinit_slave(bond, slave); 2578 } 2579 2580 if (all) { 2581 RCU_INIT_POINTER(bond->curr_active_slave, NULL); 2582 } else if (oldcurrent == slave) { 2583 /* Note that we hold RTNL over this sequence, so there 2584 * is no concern that another slave add/remove event 2585 * will interfere. 2586 */ 2587 bond_select_active_slave(bond); 2588 } 2589 2590 bond_set_carrier(bond); 2591 if (!bond_has_slaves(bond)) 2592 eth_hw_addr_random(bond_dev); 2593 2594 unblock_netpoll_tx(); 2595 synchronize_rcu(); 2596 bond->slave_cnt--; 2597 2598 if (!bond_has_slaves(bond)) { 2599 call_netdevice_notifiers(NETDEV_CHANGEADDR, bond->dev); 2600 call_netdevice_notifiers(NETDEV_RELEASE, bond->dev); 2601 } 2602 2603 bond_compute_features(bond); 2604 if (!(bond_dev->features & NETIF_F_VLAN_CHALLENGED) && 2605 (old_features & NETIF_F_VLAN_CHALLENGED)) 2606 slave_info(bond_dev, slave_dev, "last VLAN challenged slave left bond - VLAN blocking is removed\n"); 2607 2608 vlan_vids_del_by_dev(slave_dev, bond_dev); 2609 2610 /* If the mode uses primary, then this case was handled above by 2611 * bond_change_active_slave(..., NULL) 2612 */ 2613 if (!bond_uses_primary(bond)) { 2614 /* unset promiscuity level from slave 2615 * NOTE: The NETDEV_CHANGEADDR call above may change the value 2616 * of the IFF_PROMISC flag in the bond_dev, but we need the 2617 * value of that flag before that change, as that was the value 2618 * when this slave was attached, so we cache at the start of the 2619 * function and use it here. Same goes for ALLMULTI below 2620 */ 2621 if (old_flags & IFF_PROMISC) 2622 dev_set_promiscuity(slave_dev, -1); 2623 2624 /* unset allmulti level from slave */ 2625 if (old_flags & IFF_ALLMULTI) 2626 dev_set_allmulti(slave_dev, -1); 2627 2628 if (old_flags & IFF_UP) 2629 bond_hw_addr_flush(bond_dev, slave_dev); 2630 } 2631 2632 slave_disable_netpoll(slave); 2633 2634 /* close slave before restoring its mac address */ 2635 dev_close(slave_dev); 2636 2637 slave_dev->priv_flags &= ~IFF_NO_ADDRCONF; 2638 2639 if (bond->params.fail_over_mac != BOND_FOM_ACTIVE || 2640 BOND_MODE(bond) != BOND_MODE_ACTIVEBACKUP) { 2641 /* restore original ("permanent") mac address */ 2642 bond_hw_addr_copy(ss.__data, slave->perm_hwaddr, 2643 slave->dev->addr_len); 2644 ss.ss_family = slave_dev->type; 2645 dev_set_mac_address(slave_dev, (struct sockaddr *)&ss, NULL); 2646 } 2647 2648 if (unregister) 2649 __dev_set_mtu(slave_dev, slave->original_mtu); 2650 else 2651 dev_set_mtu(slave_dev, slave->original_mtu); 2652 2653 if (!netif_is_bond_master(slave_dev)) 2654 slave_dev->priv_flags &= ~IFF_BONDING; 2655 2656 bond_xdp_set_features(bond_dev); 2657 kobject_put(&slave->kobj); 2658 2659 return 0; 2660 } 2661 2662 /* A wrapper used because of ndo_del_link */ 2663 int bond_release(struct net_device *bond_dev, struct net_device *slave_dev) 2664 { 2665 return __bond_release_one(bond_dev, slave_dev, false, false); 2666 } 2667 2668 /* First release a slave and then destroy the bond if no more slaves are left. 2669 * Must be under rtnl_lock when this function is called. 2670 */ 2671 static int bond_release_and_destroy(struct net_device *bond_dev, 2672 struct net_device *slave_dev) 2673 { 2674 struct bonding *bond = netdev_priv(bond_dev); 2675 int ret; 2676 2677 ret = __bond_release_one(bond_dev, slave_dev, false, true); 2678 if (ret == 0 && !bond_has_slaves(bond) && 2679 bond_dev->reg_state != NETREG_UNREGISTERING) { 2680 bond_dev->priv_flags |= IFF_DISABLE_NETPOLL; 2681 netdev_info(bond_dev, "Destroying bond\n"); 2682 bond_remove_proc_entry(bond); 2683 unregister_netdevice(bond_dev); 2684 } 2685 return ret; 2686 } 2687 2688 static void bond_info_query(struct net_device *bond_dev, struct ifbond *info) 2689 { 2690 struct bonding *bond = netdev_priv(bond_dev); 2691 2692 bond_fill_ifbond(bond, info); 2693 } 2694 2695 static int bond_slave_info_query(struct net_device *bond_dev, struct ifslave *info) 2696 { 2697 struct bonding *bond = netdev_priv(bond_dev); 2698 struct list_head *iter; 2699 int i = 0, res = -ENODEV; 2700 struct slave *slave; 2701 2702 bond_for_each_slave(bond, slave, iter) { 2703 if (i++ == (int)info->slave_id) { 2704 res = 0; 2705 bond_fill_ifslave(slave, info); 2706 break; 2707 } 2708 } 2709 2710 return res; 2711 } 2712 2713 /*-------------------------------- Monitoring -------------------------------*/ 2714 2715 /* called with rcu_read_lock() */ 2716 static int bond_miimon_inspect(struct bonding *bond) 2717 { 2718 bool ignore_updelay = false; 2719 int link_state, commit = 0; 2720 struct list_head *iter; 2721 struct slave *slave; 2722 2723 if (BOND_MODE(bond) == BOND_MODE_ACTIVEBACKUP) { 2724 ignore_updelay = !rcu_dereference(bond->curr_active_slave); 2725 } else { 2726 struct bond_up_slave *usable_slaves; 2727 2728 usable_slaves = rcu_dereference(bond->usable_slaves); 2729 2730 if (usable_slaves && usable_slaves->count == 0) 2731 ignore_updelay = true; 2732 } 2733 2734 bond_for_each_slave_rcu(bond, slave, iter) { 2735 bond_propose_link_state(slave, BOND_LINK_NOCHANGE); 2736 2737 link_state = bond_check_dev_link(bond, slave->dev, 0); 2738 2739 switch (slave->link) { 2740 case BOND_LINK_UP: 2741 if (link_state) 2742 continue; 2743 2744 bond_propose_link_state(slave, BOND_LINK_FAIL); 2745 commit++; 2746 slave->delay = bond->params.downdelay; 2747 if (slave->delay && net_ratelimit()) { 2748 slave_info(bond->dev, slave->dev, "link status down for %sinterface, disabling it in %d ms\n", 2749 (BOND_MODE(bond) == 2750 BOND_MODE_ACTIVEBACKUP) ? 2751 (bond_is_active_slave(slave) ? 2752 "active " : "backup ") : "", 2753 bond->params.downdelay * bond->params.miimon); 2754 } 2755 fallthrough; 2756 case BOND_LINK_FAIL: 2757 if (link_state) { 2758 /* recovered before downdelay expired */ 2759 bond_propose_link_state(slave, BOND_LINK_UP); 2760 slave->last_link_up = jiffies; 2761 if (net_ratelimit()) 2762 slave_info(bond->dev, slave->dev, "link status up again after %d ms\n", 2763 (bond->params.downdelay - slave->delay) * 2764 bond->params.miimon); 2765 commit++; 2766 continue; 2767 } 2768 2769 if (slave->delay <= 0) { 2770 bond_propose_link_state(slave, BOND_LINK_DOWN); 2771 commit++; 2772 continue; 2773 } 2774 2775 slave->delay--; 2776 break; 2777 2778 case BOND_LINK_DOWN: 2779 if (!link_state) 2780 continue; 2781 2782 bond_propose_link_state(slave, BOND_LINK_BACK); 2783 commit++; 2784 slave->delay = bond->params.updelay; 2785 2786 if (slave->delay && net_ratelimit()) { 2787 slave_info(bond->dev, slave->dev, "link status up, enabling it in %d ms\n", 2788 ignore_updelay ? 0 : 2789 bond->params.updelay * 2790 bond->params.miimon); 2791 } 2792 fallthrough; 2793 case BOND_LINK_BACK: 2794 if (!link_state) { 2795 bond_propose_link_state(slave, BOND_LINK_DOWN); 2796 if (net_ratelimit()) 2797 slave_info(bond->dev, slave->dev, "link status down again after %d ms\n", 2798 (bond->params.updelay - slave->delay) * 2799 bond->params.miimon); 2800 commit++; 2801 continue; 2802 } 2803 2804 if (ignore_updelay) 2805 slave->delay = 0; 2806 2807 if (slave->delay <= 0) { 2808 bond_propose_link_state(slave, BOND_LINK_UP); 2809 commit++; 2810 ignore_updelay = false; 2811 continue; 2812 } 2813 2814 slave->delay--; 2815 break; 2816 } 2817 } 2818 2819 return commit; 2820 } 2821 2822 static void bond_miimon_link_change(struct bonding *bond, 2823 struct slave *slave, 2824 char link) 2825 { 2826 switch (BOND_MODE(bond)) { 2827 case BOND_MODE_8023AD: 2828 bond_3ad_handle_link_change(slave, link); 2829 break; 2830 case BOND_MODE_TLB: 2831 case BOND_MODE_ALB: 2832 bond_alb_handle_link_change(bond, slave, link); 2833 break; 2834 case BOND_MODE_XOR: 2835 bond_update_slave_arr(bond, NULL); 2836 break; 2837 } 2838 } 2839 2840 static void bond_miimon_commit(struct bonding *bond) 2841 { 2842 struct slave *slave, *primary, *active; 2843 bool do_failover = false; 2844 struct list_head *iter; 2845 2846 ASSERT_RTNL(); 2847 2848 bond_for_each_slave(bond, slave, iter) { 2849 switch (slave->link_new_state) { 2850 case BOND_LINK_NOCHANGE: 2851 /* For 802.3ad mode, check current slave speed and 2852 * duplex again in case its port was disabled after 2853 * invalid speed/duplex reporting but recovered before 2854 * link monitoring could make a decision on the actual 2855 * link status 2856 */ 2857 if (BOND_MODE(bond) == BOND_MODE_8023AD && 2858 slave->link == BOND_LINK_UP) 2859 bond_3ad_adapter_speed_duplex_changed(slave); 2860 continue; 2861 2862 case BOND_LINK_UP: 2863 if (bond_update_speed_duplex(slave) && 2864 bond_needs_speed_duplex(bond)) { 2865 slave->link = BOND_LINK_DOWN; 2866 if (net_ratelimit()) 2867 slave_warn(bond->dev, slave->dev, 2868 "failed to get link speed/duplex\n"); 2869 continue; 2870 } 2871 bond_set_slave_link_state(slave, BOND_LINK_UP, 2872 BOND_SLAVE_NOTIFY_NOW); 2873 slave->last_link_up = jiffies; 2874 2875 primary = rtnl_dereference(bond->primary_slave); 2876 if (BOND_MODE(bond) == BOND_MODE_8023AD) { 2877 /* prevent it from being the active one */ 2878 bond_set_backup_slave(slave); 2879 } else if (BOND_MODE(bond) != BOND_MODE_ACTIVEBACKUP) { 2880 /* make it immediately active */ 2881 bond_set_active_slave(slave); 2882 } 2883 2884 slave_info(bond->dev, slave->dev, "link status definitely up, %u Mbps %s duplex\n", 2885 slave->speed == SPEED_UNKNOWN ? 0 : slave->speed, 2886 slave->duplex ? "full" : "half"); 2887 2888 bond_miimon_link_change(bond, slave, BOND_LINK_UP); 2889 2890 active = rtnl_dereference(bond->curr_active_slave); 2891 if (!active || slave == primary || slave->prio > active->prio) 2892 do_failover = true; 2893 2894 continue; 2895 2896 case BOND_LINK_DOWN: 2897 if (slave->link_failure_count < UINT_MAX) 2898 slave->link_failure_count++; 2899 2900 bond_set_slave_link_state(slave, BOND_LINK_DOWN, 2901 BOND_SLAVE_NOTIFY_NOW); 2902 2903 if (BOND_MODE(bond) == BOND_MODE_ACTIVEBACKUP || 2904 BOND_MODE(bond) == BOND_MODE_8023AD) 2905 bond_set_slave_inactive_flags(slave, 2906 BOND_SLAVE_NOTIFY_NOW); 2907 2908 slave_info(bond->dev, slave->dev, "link status definitely down, disabling slave\n"); 2909 2910 bond_miimon_link_change(bond, slave, BOND_LINK_DOWN); 2911 2912 if (slave == rcu_access_pointer(bond->curr_active_slave)) 2913 do_failover = true; 2914 2915 continue; 2916 2917 default: 2918 slave_err(bond->dev, slave->dev, "invalid new link %d on slave\n", 2919 slave->link_new_state); 2920 bond_propose_link_state(slave, BOND_LINK_NOCHANGE); 2921 2922 continue; 2923 } 2924 } 2925 2926 if (do_failover) { 2927 block_netpoll_tx(); 2928 bond_select_active_slave(bond); 2929 unblock_netpoll_tx(); 2930 } 2931 2932 bond_set_carrier(bond); 2933 } 2934 2935 /* bond_mii_monitor 2936 * 2937 * Really a wrapper that splits the mii monitor into two phases: an 2938 * inspection, then (if inspection indicates something needs to be done) 2939 * an acquisition of appropriate locks followed by a commit phase to 2940 * implement whatever link state changes are indicated. 2941 */ 2942 static void bond_mii_monitor(struct work_struct *work) 2943 { 2944 struct bonding *bond = container_of(work, struct bonding, 2945 mii_work.work); 2946 bool should_notify_peers = false; 2947 bool commit; 2948 unsigned long delay; 2949 struct slave *slave; 2950 struct list_head *iter; 2951 2952 delay = msecs_to_jiffies(bond->params.miimon); 2953 2954 if (!bond_has_slaves(bond)) 2955 goto re_arm; 2956 2957 rcu_read_lock(); 2958 should_notify_peers = bond_should_notify_peers(bond); 2959 commit = !!bond_miimon_inspect(bond); 2960 if (bond->send_peer_notif) { 2961 rcu_read_unlock(); 2962 if (rtnl_trylock()) { 2963 bond->send_peer_notif--; 2964 rtnl_unlock(); 2965 } 2966 } else { 2967 rcu_read_unlock(); 2968 } 2969 2970 if (commit) { 2971 /* Race avoidance with bond_close cancel of workqueue */ 2972 if (!rtnl_trylock()) { 2973 delay = 1; 2974 should_notify_peers = false; 2975 goto re_arm; 2976 } 2977 2978 bond_for_each_slave(bond, slave, iter) { 2979 bond_commit_link_state(slave, BOND_SLAVE_NOTIFY_LATER); 2980 } 2981 bond_miimon_commit(bond); 2982 2983 rtnl_unlock(); /* might sleep, hold no other locks */ 2984 } 2985 2986 re_arm: 2987 if (bond->params.miimon) 2988 queue_delayed_work(bond->wq, &bond->mii_work, delay); 2989 2990 if (should_notify_peers) { 2991 if (!rtnl_trylock()) 2992 return; 2993 call_netdevice_notifiers(NETDEV_NOTIFY_PEERS, bond->dev); 2994 rtnl_unlock(); 2995 } 2996 } 2997 2998 static int bond_upper_dev_walk(struct net_device *upper, 2999 struct netdev_nested_priv *priv) 3000 { 3001 __be32 ip = *(__be32 *)priv->data; 3002 3003 return ip == bond_confirm_addr(upper, 0, ip); 3004 } 3005 3006 static bool bond_has_this_ip(struct bonding *bond, __be32 ip) 3007 { 3008 struct netdev_nested_priv priv = { 3009 .data = (void *)&ip, 3010 }; 3011 bool ret = false; 3012 3013 if (ip == bond_confirm_addr(bond->dev, 0, ip)) 3014 return true; 3015 3016 rcu_read_lock(); 3017 if (netdev_walk_all_upper_dev_rcu(bond->dev, bond_upper_dev_walk, &priv)) 3018 ret = true; 3019 rcu_read_unlock(); 3020 3021 return ret; 3022 } 3023 3024 #define BOND_VLAN_PROTO_NONE cpu_to_be16(0xffff) 3025 3026 static bool bond_handle_vlan(struct slave *slave, struct bond_vlan_tag *tags, 3027 struct sk_buff *skb) 3028 { 3029 struct net_device *bond_dev = slave->bond->dev; 3030 struct net_device *slave_dev = slave->dev; 3031 struct bond_vlan_tag *outer_tag = tags; 3032 3033 if (!tags || tags->vlan_proto == BOND_VLAN_PROTO_NONE) 3034 return true; 3035 3036 tags++; 3037 3038 /* Go through all the tags backwards and add them to the packet */ 3039 while (tags->vlan_proto != BOND_VLAN_PROTO_NONE) { 3040 if (!tags->vlan_id) { 3041 tags++; 3042 continue; 3043 } 3044 3045 slave_dbg(bond_dev, slave_dev, "inner tag: proto %X vid %X\n", 3046 ntohs(outer_tag->vlan_proto), tags->vlan_id); 3047 skb = vlan_insert_tag_set_proto(skb, tags->vlan_proto, 3048 tags->vlan_id); 3049 if (!skb) { 3050 net_err_ratelimited("failed to insert inner VLAN tag\n"); 3051 return false; 3052 } 3053 3054 tags++; 3055 } 3056 /* Set the outer tag */ 3057 if (outer_tag->vlan_id) { 3058 slave_dbg(bond_dev, slave_dev, "outer tag: proto %X vid %X\n", 3059 ntohs(outer_tag->vlan_proto), outer_tag->vlan_id); 3060 __vlan_hwaccel_put_tag(skb, outer_tag->vlan_proto, 3061 outer_tag->vlan_id); 3062 } 3063 3064 return true; 3065 } 3066 3067 /* We go to the (large) trouble of VLAN tagging ARP frames because 3068 * switches in VLAN mode (especially if ports are configured as 3069 * "native" to a VLAN) might not pass non-tagged frames. 3070 */ 3071 static void bond_arp_send(struct slave *slave, int arp_op, __be32 dest_ip, 3072 __be32 src_ip, struct bond_vlan_tag *tags) 3073 { 3074 struct net_device *bond_dev = slave->bond->dev; 3075 struct net_device *slave_dev = slave->dev; 3076 struct sk_buff *skb; 3077 3078 slave_dbg(bond_dev, slave_dev, "arp %d on slave: dst %pI4 src %pI4\n", 3079 arp_op, &dest_ip, &src_ip); 3080 3081 skb = arp_create(arp_op, ETH_P_ARP, dest_ip, slave_dev, src_ip, 3082 NULL, slave_dev->dev_addr, NULL); 3083 3084 if (!skb) { 3085 net_err_ratelimited("ARP packet allocation failed\n"); 3086 return; 3087 } 3088 3089 if (bond_handle_vlan(slave, tags, skb)) { 3090 slave_update_last_tx(slave); 3091 arp_xmit(skb); 3092 } 3093 3094 return; 3095 } 3096 3097 /* Validate the device path between the @start_dev and the @end_dev. 3098 * The path is valid if the @end_dev is reachable through device 3099 * stacking. 3100 * When the path is validated, collect any vlan information in the 3101 * path. 3102 */ 3103 struct bond_vlan_tag *bond_verify_device_path(struct net_device *start_dev, 3104 struct net_device *end_dev, 3105 int level) 3106 { 3107 struct bond_vlan_tag *tags; 3108 struct net_device *upper; 3109 struct list_head *iter; 3110 3111 if (start_dev == end_dev) { 3112 tags = kcalloc(level + 1, sizeof(*tags), GFP_ATOMIC); 3113 if (!tags) 3114 return ERR_PTR(-ENOMEM); 3115 tags[level].vlan_proto = BOND_VLAN_PROTO_NONE; 3116 return tags; 3117 } 3118 3119 netdev_for_each_upper_dev_rcu(start_dev, upper, iter) { 3120 tags = bond_verify_device_path(upper, end_dev, level + 1); 3121 if (IS_ERR_OR_NULL(tags)) { 3122 if (IS_ERR(tags)) 3123 return tags; 3124 continue; 3125 } 3126 if (is_vlan_dev(upper)) { 3127 tags[level].vlan_proto = vlan_dev_vlan_proto(upper); 3128 tags[level].vlan_id = vlan_dev_vlan_id(upper); 3129 } 3130 3131 return tags; 3132 } 3133 3134 return NULL; 3135 } 3136 3137 static void bond_arp_send_all(struct bonding *bond, struct slave *slave) 3138 { 3139 struct rtable *rt; 3140 struct bond_vlan_tag *tags; 3141 __be32 *targets = bond->params.arp_targets, addr; 3142 int i; 3143 3144 for (i = 0; i < BOND_MAX_ARP_TARGETS && targets[i]; i++) { 3145 slave_dbg(bond->dev, slave->dev, "%s: target %pI4\n", 3146 __func__, &targets[i]); 3147 tags = NULL; 3148 3149 /* Find out through which dev should the packet go */ 3150 rt = ip_route_output(dev_net(bond->dev), targets[i], 0, 0, 0, 3151 RT_SCOPE_LINK); 3152 if (IS_ERR(rt)) { 3153 /* there's no route to target - try to send arp 3154 * probe to generate any traffic (arp_validate=0) 3155 */ 3156 if (bond->params.arp_validate) 3157 pr_warn_once("%s: no route to arp_ip_target %pI4 and arp_validate is set\n", 3158 bond->dev->name, 3159 &targets[i]); 3160 bond_arp_send(slave, ARPOP_REQUEST, targets[i], 3161 0, tags); 3162 continue; 3163 } 3164 3165 /* bond device itself */ 3166 if (rt->dst.dev == bond->dev) 3167 goto found; 3168 3169 rcu_read_lock(); 3170 tags = bond_verify_device_path(bond->dev, rt->dst.dev, 0); 3171 rcu_read_unlock(); 3172 3173 if (!IS_ERR_OR_NULL(tags)) 3174 goto found; 3175 3176 /* Not our device - skip */ 3177 slave_dbg(bond->dev, slave->dev, "no path to arp_ip_target %pI4 via rt.dev %s\n", 3178 &targets[i], rt->dst.dev ? rt->dst.dev->name : "NULL"); 3179 3180 ip_rt_put(rt); 3181 continue; 3182 3183 found: 3184 addr = bond_confirm_addr(rt->dst.dev, targets[i], 0); 3185 ip_rt_put(rt); 3186 bond_arp_send(slave, ARPOP_REQUEST, targets[i], addr, tags); 3187 kfree(tags); 3188 } 3189 } 3190 3191 static void bond_validate_arp(struct bonding *bond, struct slave *slave, __be32 sip, __be32 tip) 3192 { 3193 int i; 3194 3195 if (!sip || !bond_has_this_ip(bond, tip)) { 3196 slave_dbg(bond->dev, slave->dev, "%s: sip %pI4 tip %pI4 not found\n", 3197 __func__, &sip, &tip); 3198 return; 3199 } 3200 3201 i = bond_get_targets_ip(bond->params.arp_targets, sip); 3202 if (i == -1) { 3203 slave_dbg(bond->dev, slave->dev, "%s: sip %pI4 not found in targets\n", 3204 __func__, &sip); 3205 return; 3206 } 3207 slave->last_rx = jiffies; 3208 slave->target_last_arp_rx[i] = jiffies; 3209 } 3210 3211 static int bond_arp_rcv(const struct sk_buff *skb, struct bonding *bond, 3212 struct slave *slave) 3213 { 3214 struct arphdr *arp = (struct arphdr *)skb->data; 3215 struct slave *curr_active_slave, *curr_arp_slave; 3216 unsigned char *arp_ptr; 3217 __be32 sip, tip; 3218 unsigned int alen; 3219 3220 alen = arp_hdr_len(bond->dev); 3221 3222 if (alen > skb_headlen(skb)) { 3223 arp = kmalloc(alen, GFP_ATOMIC); 3224 if (!arp) 3225 goto out_unlock; 3226 if (skb_copy_bits(skb, 0, arp, alen) < 0) 3227 goto out_unlock; 3228 } 3229 3230 if (arp->ar_hln != bond->dev->addr_len || 3231 skb->pkt_type == PACKET_OTHERHOST || 3232 skb->pkt_type == PACKET_LOOPBACK || 3233 arp->ar_hrd != htons(ARPHRD_ETHER) || 3234 arp->ar_pro != htons(ETH_P_IP) || 3235 arp->ar_pln != 4) 3236 goto out_unlock; 3237 3238 arp_ptr = (unsigned char *)(arp + 1); 3239 arp_ptr += bond->dev->addr_len; 3240 memcpy(&sip, arp_ptr, 4); 3241 arp_ptr += 4 + bond->dev->addr_len; 3242 memcpy(&tip, arp_ptr, 4); 3243 3244 slave_dbg(bond->dev, slave->dev, "%s: %s/%d av %d sv %d sip %pI4 tip %pI4\n", 3245 __func__, slave->dev->name, bond_slave_state(slave), 3246 bond->params.arp_validate, slave_do_arp_validate(bond, slave), 3247 &sip, &tip); 3248 3249 curr_active_slave = rcu_dereference(bond->curr_active_slave); 3250 curr_arp_slave = rcu_dereference(bond->current_arp_slave); 3251 3252 /* We 'trust' the received ARP enough to validate it if: 3253 * 3254 * (a) the slave receiving the ARP is active (which includes the 3255 * current ARP slave, if any), or 3256 * 3257 * (b) the receiving slave isn't active, but there is a currently 3258 * active slave and it received valid arp reply(s) after it became 3259 * the currently active slave, or 3260 * 3261 * (c) there is an ARP slave that sent an ARP during the prior ARP 3262 * interval, and we receive an ARP reply on any slave. We accept 3263 * these because switch FDB update delays may deliver the ARP 3264 * reply to a slave other than the sender of the ARP request. 3265 * 3266 * Note: for (b), backup slaves are receiving the broadcast ARP 3267 * request, not a reply. This request passes from the sending 3268 * slave through the L2 switch(es) to the receiving slave. Since 3269 * this is checking the request, sip/tip are swapped for 3270 * validation. 3271 * 3272 * This is done to avoid endless looping when we can't reach the 3273 * arp_ip_target and fool ourselves with our own arp requests. 3274 */ 3275 if (bond_is_active_slave(slave)) 3276 bond_validate_arp(bond, slave, sip, tip); 3277 else if (curr_active_slave && 3278 time_after(slave_last_rx(bond, curr_active_slave), 3279 curr_active_slave->last_link_up)) 3280 bond_validate_arp(bond, slave, tip, sip); 3281 else if (curr_arp_slave && (arp->ar_op == htons(ARPOP_REPLY)) && 3282 bond_time_in_interval(bond, slave_last_tx(curr_arp_slave), 1)) 3283 bond_validate_arp(bond, slave, sip, tip); 3284 3285 out_unlock: 3286 if (arp != (struct arphdr *)skb->data) 3287 kfree(arp); 3288 return RX_HANDLER_ANOTHER; 3289 } 3290 3291 #if IS_ENABLED(CONFIG_IPV6) 3292 static void bond_ns_send(struct slave *slave, const struct in6_addr *daddr, 3293 const struct in6_addr *saddr, struct bond_vlan_tag *tags) 3294 { 3295 struct net_device *bond_dev = slave->bond->dev; 3296 struct net_device *slave_dev = slave->dev; 3297 struct in6_addr mcaddr; 3298 struct sk_buff *skb; 3299 3300 slave_dbg(bond_dev, slave_dev, "NS on slave: dst %pI6c src %pI6c\n", 3301 daddr, saddr); 3302 3303 skb = ndisc_ns_create(slave_dev, daddr, saddr, 0); 3304 if (!skb) { 3305 net_err_ratelimited("NS packet allocation failed\n"); 3306 return; 3307 } 3308 3309 addrconf_addr_solict_mult(daddr, &mcaddr); 3310 if (bond_handle_vlan(slave, tags, skb)) { 3311 slave_update_last_tx(slave); 3312 ndisc_send_skb(skb, &mcaddr, saddr); 3313 } 3314 } 3315 3316 static void bond_ns_send_all(struct bonding *bond, struct slave *slave) 3317 { 3318 struct in6_addr *targets = bond->params.ns_targets; 3319 struct bond_vlan_tag *tags; 3320 struct dst_entry *dst; 3321 struct in6_addr saddr; 3322 struct flowi6 fl6; 3323 int i; 3324 3325 for (i = 0; i < BOND_MAX_NS_TARGETS && !ipv6_addr_any(&targets[i]); i++) { 3326 slave_dbg(bond->dev, slave->dev, "%s: target %pI6c\n", 3327 __func__, &targets[i]); 3328 tags = NULL; 3329 3330 /* Find out through which dev should the packet go */ 3331 memset(&fl6, 0, sizeof(struct flowi6)); 3332 fl6.daddr = targets[i]; 3333 fl6.flowi6_oif = bond->dev->ifindex; 3334 3335 dst = ip6_route_output(dev_net(bond->dev), NULL, &fl6); 3336 if (dst->error) { 3337 dst_release(dst); 3338 /* there's no route to target - try to send arp 3339 * probe to generate any traffic (arp_validate=0) 3340 */ 3341 if (bond->params.arp_validate) 3342 pr_warn_once("%s: no route to ns_ip6_target %pI6c and arp_validate is set\n", 3343 bond->dev->name, 3344 &targets[i]); 3345 bond_ns_send(slave, &targets[i], &in6addr_any, tags); 3346 continue; 3347 } 3348 3349 /* bond device itself */ 3350 if (dst->dev == bond->dev) 3351 goto found; 3352 3353 rcu_read_lock(); 3354 tags = bond_verify_device_path(bond->dev, dst->dev, 0); 3355 rcu_read_unlock(); 3356 3357 if (!IS_ERR_OR_NULL(tags)) 3358 goto found; 3359 3360 /* Not our device - skip */ 3361 slave_dbg(bond->dev, slave->dev, "no path to ns_ip6_target %pI6c via dst->dev %s\n", 3362 &targets[i], dst->dev ? dst->dev->name : "NULL"); 3363 3364 dst_release(dst); 3365 continue; 3366 3367 found: 3368 if (!ipv6_dev_get_saddr(dev_net(dst->dev), dst->dev, &targets[i], 0, &saddr)) 3369 bond_ns_send(slave, &targets[i], &saddr, tags); 3370 else 3371 bond_ns_send(slave, &targets[i], &in6addr_any, tags); 3372 3373 dst_release(dst); 3374 kfree(tags); 3375 } 3376 } 3377 3378 static int bond_confirm_addr6(struct net_device *dev, 3379 struct netdev_nested_priv *priv) 3380 { 3381 struct in6_addr *addr = (struct in6_addr *)priv->data; 3382 3383 return ipv6_chk_addr(dev_net(dev), addr, dev, 0); 3384 } 3385 3386 static bool bond_has_this_ip6(struct bonding *bond, struct in6_addr *addr) 3387 { 3388 struct netdev_nested_priv priv = { 3389 .data = addr, 3390 }; 3391 int ret = false; 3392 3393 if (bond_confirm_addr6(bond->dev, &priv)) 3394 return true; 3395 3396 rcu_read_lock(); 3397 if (netdev_walk_all_upper_dev_rcu(bond->dev, bond_confirm_addr6, &priv)) 3398 ret = true; 3399 rcu_read_unlock(); 3400 3401 return ret; 3402 } 3403 3404 static void bond_validate_na(struct bonding *bond, struct slave *slave, 3405 struct in6_addr *saddr, struct in6_addr *daddr) 3406 { 3407 int i; 3408 3409 /* Ignore NAs that: 3410 * 1. Source address is unspecified address. 3411 * 2. Dest address is neither all-nodes multicast address nor 3412 * exist on bond interface. 3413 */ 3414 if (ipv6_addr_any(saddr) || 3415 (!ipv6_addr_equal(daddr, &in6addr_linklocal_allnodes) && 3416 !bond_has_this_ip6(bond, daddr))) { 3417 slave_dbg(bond->dev, slave->dev, "%s: sip %pI6c tip %pI6c not found\n", 3418 __func__, saddr, daddr); 3419 return; 3420 } 3421 3422 i = bond_get_targets_ip6(bond->params.ns_targets, saddr); 3423 if (i == -1) { 3424 slave_dbg(bond->dev, slave->dev, "%s: sip %pI6c not found in targets\n", 3425 __func__, saddr); 3426 return; 3427 } 3428 slave->last_rx = jiffies; 3429 slave->target_last_arp_rx[i] = jiffies; 3430 } 3431 3432 static int bond_na_rcv(const struct sk_buff *skb, struct bonding *bond, 3433 struct slave *slave) 3434 { 3435 struct slave *curr_active_slave, *curr_arp_slave; 3436 struct in6_addr *saddr, *daddr; 3437 struct { 3438 struct ipv6hdr ip6; 3439 struct icmp6hdr icmp6; 3440 } *combined, _combined; 3441 3442 if (skb->pkt_type == PACKET_OTHERHOST || 3443 skb->pkt_type == PACKET_LOOPBACK) 3444 goto out; 3445 3446 combined = skb_header_pointer(skb, 0, sizeof(_combined), &_combined); 3447 if (!combined || combined->ip6.nexthdr != NEXTHDR_ICMP || 3448 (combined->icmp6.icmp6_type != NDISC_NEIGHBOUR_SOLICITATION && 3449 combined->icmp6.icmp6_type != NDISC_NEIGHBOUR_ADVERTISEMENT)) 3450 goto out; 3451 3452 saddr = &combined->ip6.saddr; 3453 daddr = &combined->ip6.daddr; 3454 3455 slave_dbg(bond->dev, slave->dev, "%s: %s/%d av %d sv %d sip %pI6c tip %pI6c\n", 3456 __func__, slave->dev->name, bond_slave_state(slave), 3457 bond->params.arp_validate, slave_do_arp_validate(bond, slave), 3458 saddr, daddr); 3459 3460 curr_active_slave = rcu_dereference(bond->curr_active_slave); 3461 curr_arp_slave = rcu_dereference(bond->current_arp_slave); 3462 3463 /* We 'trust' the received ARP enough to validate it if: 3464 * see bond_arp_rcv(). 3465 */ 3466 if (bond_is_active_slave(slave)) 3467 bond_validate_na(bond, slave, saddr, daddr); 3468 else if (curr_active_slave && 3469 time_after(slave_last_rx(bond, curr_active_slave), 3470 curr_active_slave->last_link_up)) 3471 bond_validate_na(bond, slave, daddr, saddr); 3472 else if (curr_arp_slave && 3473 bond_time_in_interval(bond, slave_last_tx(curr_arp_slave), 1)) 3474 bond_validate_na(bond, slave, saddr, daddr); 3475 3476 out: 3477 return RX_HANDLER_ANOTHER; 3478 } 3479 #endif 3480 3481 int bond_rcv_validate(const struct sk_buff *skb, struct bonding *bond, 3482 struct slave *slave) 3483 { 3484 #if IS_ENABLED(CONFIG_IPV6) 3485 bool is_ipv6 = skb->protocol == __cpu_to_be16(ETH_P_IPV6); 3486 #endif 3487 bool is_arp = skb->protocol == __cpu_to_be16(ETH_P_ARP); 3488 3489 slave_dbg(bond->dev, slave->dev, "%s: skb->dev %s\n", 3490 __func__, skb->dev->name); 3491 3492 /* Use arp validate logic for both ARP and NS */ 3493 if (!slave_do_arp_validate(bond, slave)) { 3494 if ((slave_do_arp_validate_only(bond) && is_arp) || 3495 #if IS_ENABLED(CONFIG_IPV6) 3496 (slave_do_arp_validate_only(bond) && is_ipv6) || 3497 #endif 3498 !slave_do_arp_validate_only(bond)) 3499 slave->last_rx = jiffies; 3500 return RX_HANDLER_ANOTHER; 3501 } else if (is_arp) { 3502 return bond_arp_rcv(skb, bond, slave); 3503 #if IS_ENABLED(CONFIG_IPV6) 3504 } else if (is_ipv6) { 3505 return bond_na_rcv(skb, bond, slave); 3506 #endif 3507 } else { 3508 return RX_HANDLER_ANOTHER; 3509 } 3510 } 3511 3512 static void bond_send_validate(struct bonding *bond, struct slave *slave) 3513 { 3514 bond_arp_send_all(bond, slave); 3515 #if IS_ENABLED(CONFIG_IPV6) 3516 bond_ns_send_all(bond, slave); 3517 #endif 3518 } 3519 3520 /* function to verify if we're in the arp_interval timeslice, returns true if 3521 * (last_act - arp_interval) <= jiffies <= (last_act + mod * arp_interval + 3522 * arp_interval/2) . the arp_interval/2 is needed for really fast networks. 3523 */ 3524 static bool bond_time_in_interval(struct bonding *bond, unsigned long last_act, 3525 int mod) 3526 { 3527 int delta_in_ticks = msecs_to_jiffies(bond->params.arp_interval); 3528 3529 return time_in_range(jiffies, 3530 last_act - delta_in_ticks, 3531 last_act + mod * delta_in_ticks + delta_in_ticks/2); 3532 } 3533 3534 /* This function is called regularly to monitor each slave's link 3535 * ensuring that traffic is being sent and received when arp monitoring 3536 * is used in load-balancing mode. if the adapter has been dormant, then an 3537 * arp is transmitted to generate traffic. see activebackup_arp_monitor for 3538 * arp monitoring in active backup mode. 3539 */ 3540 static void bond_loadbalance_arp_mon(struct bonding *bond) 3541 { 3542 struct slave *slave, *oldcurrent; 3543 struct list_head *iter; 3544 int do_failover = 0, slave_state_changed = 0; 3545 3546 if (!bond_has_slaves(bond)) 3547 goto re_arm; 3548 3549 rcu_read_lock(); 3550 3551 oldcurrent = rcu_dereference(bond->curr_active_slave); 3552 /* see if any of the previous devices are up now (i.e. they have 3553 * xmt and rcv traffic). the curr_active_slave does not come into 3554 * the picture unless it is null. also, slave->last_link_up is not 3555 * needed here because we send an arp on each slave and give a slave 3556 * as long as it needs to get the tx/rx within the delta. 3557 * TODO: what about up/down delay in arp mode? it wasn't here before 3558 * so it can wait 3559 */ 3560 bond_for_each_slave_rcu(bond, slave, iter) { 3561 unsigned long last_tx = slave_last_tx(slave); 3562 3563 bond_propose_link_state(slave, BOND_LINK_NOCHANGE); 3564 3565 if (slave->link != BOND_LINK_UP) { 3566 if (bond_time_in_interval(bond, last_tx, 1) && 3567 bond_time_in_interval(bond, slave->last_rx, 1)) { 3568 3569 bond_propose_link_state(slave, BOND_LINK_UP); 3570 slave_state_changed = 1; 3571 3572 /* primary_slave has no meaning in round-robin 3573 * mode. the window of a slave being up and 3574 * curr_active_slave being null after enslaving 3575 * is closed. 3576 */ 3577 if (!oldcurrent) { 3578 slave_info(bond->dev, slave->dev, "link status definitely up\n"); 3579 do_failover = 1; 3580 } else { 3581 slave_info(bond->dev, slave->dev, "interface is now up\n"); 3582 } 3583 } 3584 } else { 3585 /* slave->link == BOND_LINK_UP */ 3586 3587 /* not all switches will respond to an arp request 3588 * when the source ip is 0, so don't take the link down 3589 * if we don't know our ip yet 3590 */ 3591 if (!bond_time_in_interval(bond, last_tx, bond->params.missed_max) || 3592 !bond_time_in_interval(bond, slave->last_rx, bond->params.missed_max)) { 3593 3594 bond_propose_link_state(slave, BOND_LINK_DOWN); 3595 slave_state_changed = 1; 3596 3597 if (slave->link_failure_count < UINT_MAX) 3598 slave->link_failure_count++; 3599 3600 slave_info(bond->dev, slave->dev, "interface is now down\n"); 3601 3602 if (slave == oldcurrent) 3603 do_failover = 1; 3604 } 3605 } 3606 3607 /* note: if switch is in round-robin mode, all links 3608 * must tx arp to ensure all links rx an arp - otherwise 3609 * links may oscillate or not come up at all; if switch is 3610 * in something like xor mode, there is nothing we can 3611 * do - all replies will be rx'ed on same link causing slaves 3612 * to be unstable during low/no traffic periods 3613 */ 3614 if (bond_slave_is_up(slave)) 3615 bond_send_validate(bond, slave); 3616 } 3617 3618 rcu_read_unlock(); 3619 3620 if (do_failover || slave_state_changed) { 3621 if (!rtnl_trylock()) 3622 goto re_arm; 3623 3624 bond_for_each_slave(bond, slave, iter) { 3625 if (slave->link_new_state != BOND_LINK_NOCHANGE) 3626 slave->link = slave->link_new_state; 3627 } 3628 3629 if (slave_state_changed) { 3630 bond_slave_state_change(bond); 3631 if (BOND_MODE(bond) == BOND_MODE_XOR) 3632 bond_update_slave_arr(bond, NULL); 3633 } 3634 if (do_failover) { 3635 block_netpoll_tx(); 3636 bond_select_active_slave(bond); 3637 unblock_netpoll_tx(); 3638 } 3639 rtnl_unlock(); 3640 } 3641 3642 re_arm: 3643 if (bond->params.arp_interval) 3644 queue_delayed_work(bond->wq, &bond->arp_work, 3645 msecs_to_jiffies(bond->params.arp_interval)); 3646 } 3647 3648 /* Called to inspect slaves for active-backup mode ARP monitor link state 3649 * changes. Sets proposed link state in slaves to specify what action 3650 * should take place for the slave. Returns 0 if no changes are found, >0 3651 * if changes to link states must be committed. 3652 * 3653 * Called with rcu_read_lock held. 3654 */ 3655 static int bond_ab_arp_inspect(struct bonding *bond) 3656 { 3657 unsigned long last_tx, last_rx; 3658 struct list_head *iter; 3659 struct slave *slave; 3660 int commit = 0; 3661 3662 bond_for_each_slave_rcu(bond, slave, iter) { 3663 bond_propose_link_state(slave, BOND_LINK_NOCHANGE); 3664 last_rx = slave_last_rx(bond, slave); 3665 3666 if (slave->link != BOND_LINK_UP) { 3667 if (bond_time_in_interval(bond, last_rx, 1)) { 3668 bond_propose_link_state(slave, BOND_LINK_UP); 3669 commit++; 3670 } else if (slave->link == BOND_LINK_BACK) { 3671 bond_propose_link_state(slave, BOND_LINK_FAIL); 3672 commit++; 3673 } 3674 continue; 3675 } 3676 3677 /* Give slaves 2*delta after being enslaved or made 3678 * active. This avoids bouncing, as the last receive 3679 * times need a full ARP monitor cycle to be updated. 3680 */ 3681 if (bond_time_in_interval(bond, slave->last_link_up, 2)) 3682 continue; 3683 3684 /* Backup slave is down if: 3685 * - No current_arp_slave AND 3686 * - more than (missed_max+1)*delta since last receive AND 3687 * - the bond has an IP address 3688 * 3689 * Note: a non-null current_arp_slave indicates 3690 * the curr_active_slave went down and we are 3691 * searching for a new one; under this condition 3692 * we only take the curr_active_slave down - this 3693 * gives each slave a chance to tx/rx traffic 3694 * before being taken out 3695 */ 3696 if (!bond_is_active_slave(slave) && 3697 !rcu_access_pointer(bond->current_arp_slave) && 3698 !bond_time_in_interval(bond, last_rx, bond->params.missed_max + 1)) { 3699 bond_propose_link_state(slave, BOND_LINK_DOWN); 3700 commit++; 3701 } 3702 3703 /* Active slave is down if: 3704 * - more than missed_max*delta since transmitting OR 3705 * - (more than missed_max*delta since receive AND 3706 * the bond has an IP address) 3707 */ 3708 last_tx = slave_last_tx(slave); 3709 if (bond_is_active_slave(slave) && 3710 (!bond_time_in_interval(bond, last_tx, bond->params.missed_max) || 3711 !bond_time_in_interval(bond, last_rx, bond->params.missed_max))) { 3712 bond_propose_link_state(slave, BOND_LINK_DOWN); 3713 commit++; 3714 } 3715 } 3716 3717 return commit; 3718 } 3719 3720 /* Called to commit link state changes noted by inspection step of 3721 * active-backup mode ARP monitor. 3722 * 3723 * Called with RTNL hold. 3724 */ 3725 static void bond_ab_arp_commit(struct bonding *bond) 3726 { 3727 bool do_failover = false; 3728 struct list_head *iter; 3729 unsigned long last_tx; 3730 struct slave *slave; 3731 3732 bond_for_each_slave(bond, slave, iter) { 3733 switch (slave->link_new_state) { 3734 case BOND_LINK_NOCHANGE: 3735 continue; 3736 3737 case BOND_LINK_UP: 3738 last_tx = slave_last_tx(slave); 3739 if (rtnl_dereference(bond->curr_active_slave) != slave || 3740 (!rtnl_dereference(bond->curr_active_slave) && 3741 bond_time_in_interval(bond, last_tx, 1))) { 3742 struct slave *current_arp_slave; 3743 3744 current_arp_slave = rtnl_dereference(bond->current_arp_slave); 3745 bond_set_slave_link_state(slave, BOND_LINK_UP, 3746 BOND_SLAVE_NOTIFY_NOW); 3747 if (current_arp_slave) { 3748 bond_set_slave_inactive_flags( 3749 current_arp_slave, 3750 BOND_SLAVE_NOTIFY_NOW); 3751 RCU_INIT_POINTER(bond->current_arp_slave, NULL); 3752 } 3753 3754 slave_info(bond->dev, slave->dev, "link status definitely up\n"); 3755 3756 if (!rtnl_dereference(bond->curr_active_slave) || 3757 slave == rtnl_dereference(bond->primary_slave) || 3758 slave->prio > rtnl_dereference(bond->curr_active_slave)->prio) 3759 do_failover = true; 3760 3761 } 3762 3763 continue; 3764 3765 case BOND_LINK_DOWN: 3766 if (slave->link_failure_count < UINT_MAX) 3767 slave->link_failure_count++; 3768 3769 bond_set_slave_link_state(slave, BOND_LINK_DOWN, 3770 BOND_SLAVE_NOTIFY_NOW); 3771 bond_set_slave_inactive_flags(slave, 3772 BOND_SLAVE_NOTIFY_NOW); 3773 3774 slave_info(bond->dev, slave->dev, "link status definitely down, disabling slave\n"); 3775 3776 if (slave == rtnl_dereference(bond->curr_active_slave)) { 3777 RCU_INIT_POINTER(bond->current_arp_slave, NULL); 3778 do_failover = true; 3779 } 3780 3781 continue; 3782 3783 case BOND_LINK_FAIL: 3784 bond_set_slave_link_state(slave, BOND_LINK_FAIL, 3785 BOND_SLAVE_NOTIFY_NOW); 3786 bond_set_slave_inactive_flags(slave, 3787 BOND_SLAVE_NOTIFY_NOW); 3788 3789 /* A slave has just been enslaved and has become 3790 * the current active slave. 3791 */ 3792 if (rtnl_dereference(bond->curr_active_slave)) 3793 RCU_INIT_POINTER(bond->current_arp_slave, NULL); 3794 continue; 3795 3796 default: 3797 slave_err(bond->dev, slave->dev, 3798 "impossible: link_new_state %d on slave\n", 3799 slave->link_new_state); 3800 continue; 3801 } 3802 } 3803 3804 if (do_failover) { 3805 block_netpoll_tx(); 3806 bond_select_active_slave(bond); 3807 unblock_netpoll_tx(); 3808 } 3809 3810 bond_set_carrier(bond); 3811 } 3812 3813 /* Send ARP probes for active-backup mode ARP monitor. 3814 * 3815 * Called with rcu_read_lock held. 3816 */ 3817 static bool bond_ab_arp_probe(struct bonding *bond) 3818 { 3819 struct slave *slave, *before = NULL, *new_slave = NULL, 3820 *curr_arp_slave = rcu_dereference(bond->current_arp_slave), 3821 *curr_active_slave = rcu_dereference(bond->curr_active_slave); 3822 struct list_head *iter; 3823 bool found = false; 3824 bool should_notify_rtnl = BOND_SLAVE_NOTIFY_LATER; 3825 3826 if (curr_arp_slave && curr_active_slave) 3827 netdev_info(bond->dev, "PROBE: c_arp %s && cas %s BAD\n", 3828 curr_arp_slave->dev->name, 3829 curr_active_slave->dev->name); 3830 3831 if (curr_active_slave) { 3832 bond_send_validate(bond, curr_active_slave); 3833 return should_notify_rtnl; 3834 } 3835 3836 /* if we don't have a curr_active_slave, search for the next available 3837 * backup slave from the current_arp_slave and make it the candidate 3838 * for becoming the curr_active_slave 3839 */ 3840 3841 if (!curr_arp_slave) { 3842 curr_arp_slave = bond_first_slave_rcu(bond); 3843 if (!curr_arp_slave) 3844 return should_notify_rtnl; 3845 } 3846 3847 bond_for_each_slave_rcu(bond, slave, iter) { 3848 if (!found && !before && bond_slave_is_up(slave)) 3849 before = slave; 3850 3851 if (found && !new_slave && bond_slave_is_up(slave)) 3852 new_slave = slave; 3853 /* if the link state is up at this point, we 3854 * mark it down - this can happen if we have 3855 * simultaneous link failures and 3856 * reselect_active_interface doesn't make this 3857 * one the current slave so it is still marked 3858 * up when it is actually down 3859 */ 3860 if (!bond_slave_is_up(slave) && slave->link == BOND_LINK_UP) { 3861 bond_set_slave_link_state(slave, BOND_LINK_DOWN, 3862 BOND_SLAVE_NOTIFY_LATER); 3863 if (slave->link_failure_count < UINT_MAX) 3864 slave->link_failure_count++; 3865 3866 bond_set_slave_inactive_flags(slave, 3867 BOND_SLAVE_NOTIFY_LATER); 3868 3869 slave_info(bond->dev, slave->dev, "backup interface is now down\n"); 3870 } 3871 if (slave == curr_arp_slave) 3872 found = true; 3873 } 3874 3875 if (!new_slave && before) 3876 new_slave = before; 3877 3878 if (!new_slave) 3879 goto check_state; 3880 3881 bond_set_slave_link_state(new_slave, BOND_LINK_BACK, 3882 BOND_SLAVE_NOTIFY_LATER); 3883 bond_set_slave_active_flags(new_slave, BOND_SLAVE_NOTIFY_LATER); 3884 bond_send_validate(bond, new_slave); 3885 new_slave->last_link_up = jiffies; 3886 rcu_assign_pointer(bond->current_arp_slave, new_slave); 3887 3888 check_state: 3889 bond_for_each_slave_rcu(bond, slave, iter) { 3890 if (slave->should_notify || slave->should_notify_link) { 3891 should_notify_rtnl = BOND_SLAVE_NOTIFY_NOW; 3892 break; 3893 } 3894 } 3895 return should_notify_rtnl; 3896 } 3897 3898 static void bond_activebackup_arp_mon(struct bonding *bond) 3899 { 3900 bool should_notify_peers = false; 3901 bool should_notify_rtnl = false; 3902 int delta_in_ticks; 3903 3904 delta_in_ticks = msecs_to_jiffies(bond->params.arp_interval); 3905 3906 if (!bond_has_slaves(bond)) 3907 goto re_arm; 3908 3909 rcu_read_lock(); 3910 3911 should_notify_peers = bond_should_notify_peers(bond); 3912 3913 if (bond_ab_arp_inspect(bond)) { 3914 rcu_read_unlock(); 3915 3916 /* Race avoidance with bond_close flush of workqueue */ 3917 if (!rtnl_trylock()) { 3918 delta_in_ticks = 1; 3919 should_notify_peers = false; 3920 goto re_arm; 3921 } 3922 3923 bond_ab_arp_commit(bond); 3924 3925 rtnl_unlock(); 3926 rcu_read_lock(); 3927 } 3928 3929 should_notify_rtnl = bond_ab_arp_probe(bond); 3930 rcu_read_unlock(); 3931 3932 re_arm: 3933 if (bond->params.arp_interval) 3934 queue_delayed_work(bond->wq, &bond->arp_work, delta_in_ticks); 3935 3936 if (should_notify_peers || should_notify_rtnl) { 3937 if (!rtnl_trylock()) 3938 return; 3939 3940 if (should_notify_peers) { 3941 bond->send_peer_notif--; 3942 call_netdevice_notifiers(NETDEV_NOTIFY_PEERS, 3943 bond->dev); 3944 } 3945 if (should_notify_rtnl) { 3946 bond_slave_state_notify(bond); 3947 bond_slave_link_notify(bond); 3948 } 3949 3950 rtnl_unlock(); 3951 } 3952 } 3953 3954 static void bond_arp_monitor(struct work_struct *work) 3955 { 3956 struct bonding *bond = container_of(work, struct bonding, 3957 arp_work.work); 3958 3959 if (BOND_MODE(bond) == BOND_MODE_ACTIVEBACKUP) 3960 bond_activebackup_arp_mon(bond); 3961 else 3962 bond_loadbalance_arp_mon(bond); 3963 } 3964 3965 /*-------------------------- netdev event handling --------------------------*/ 3966 3967 /* Change device name */ 3968 static int bond_event_changename(struct bonding *bond) 3969 { 3970 bond_remove_proc_entry(bond); 3971 bond_create_proc_entry(bond); 3972 3973 bond_debug_reregister(bond); 3974 3975 return NOTIFY_DONE; 3976 } 3977 3978 static int bond_master_netdev_event(unsigned long event, 3979 struct net_device *bond_dev) 3980 { 3981 struct bonding *event_bond = netdev_priv(bond_dev); 3982 3983 netdev_dbg(bond_dev, "%s called\n", __func__); 3984 3985 switch (event) { 3986 case NETDEV_CHANGENAME: 3987 return bond_event_changename(event_bond); 3988 case NETDEV_UNREGISTER: 3989 bond_remove_proc_entry(event_bond); 3990 #ifdef CONFIG_XFRM_OFFLOAD 3991 xfrm_dev_state_flush(dev_net(bond_dev), bond_dev, true); 3992 #endif /* CONFIG_XFRM_OFFLOAD */ 3993 break; 3994 case NETDEV_REGISTER: 3995 bond_create_proc_entry(event_bond); 3996 break; 3997 default: 3998 break; 3999 } 4000 4001 return NOTIFY_DONE; 4002 } 4003 4004 static int bond_slave_netdev_event(unsigned long event, 4005 struct net_device *slave_dev) 4006 { 4007 struct slave *slave = bond_slave_get_rtnl(slave_dev), *primary; 4008 struct bonding *bond; 4009 struct net_device *bond_dev; 4010 4011 /* A netdev event can be generated while enslaving a device 4012 * before netdev_rx_handler_register is called in which case 4013 * slave will be NULL 4014 */ 4015 if (!slave) { 4016 netdev_dbg(slave_dev, "%s called on NULL slave\n", __func__); 4017 return NOTIFY_DONE; 4018 } 4019 4020 bond_dev = slave->bond->dev; 4021 bond = slave->bond; 4022 primary = rtnl_dereference(bond->primary_slave); 4023 4024 slave_dbg(bond_dev, slave_dev, "%s called\n", __func__); 4025 4026 switch (event) { 4027 case NETDEV_UNREGISTER: 4028 if (bond_dev->type != ARPHRD_ETHER) 4029 bond_release_and_destroy(bond_dev, slave_dev); 4030 else 4031 __bond_release_one(bond_dev, slave_dev, false, true); 4032 break; 4033 case NETDEV_UP: 4034 case NETDEV_CHANGE: 4035 /* For 802.3ad mode only: 4036 * Getting invalid Speed/Duplex values here will put slave 4037 * in weird state. Mark it as link-fail if the link was 4038 * previously up or link-down if it hasn't yet come up, and 4039 * let link-monitoring (miimon) set it right when correct 4040 * speeds/duplex are available. 4041 */ 4042 if (bond_update_speed_duplex(slave) && 4043 BOND_MODE(bond) == BOND_MODE_8023AD) { 4044 if (slave->last_link_up) 4045 slave->link = BOND_LINK_FAIL; 4046 else 4047 slave->link = BOND_LINK_DOWN; 4048 } 4049 4050 if (BOND_MODE(bond) == BOND_MODE_8023AD) 4051 bond_3ad_adapter_speed_duplex_changed(slave); 4052 fallthrough; 4053 case NETDEV_DOWN: 4054 /* Refresh slave-array if applicable! 4055 * If the setup does not use miimon or arpmon (mode-specific!), 4056 * then these events will not cause the slave-array to be 4057 * refreshed. This will cause xmit to use a slave that is not 4058 * usable. Avoid such situation by refeshing the array at these 4059 * events. If these (miimon/arpmon) parameters are configured 4060 * then array gets refreshed twice and that should be fine! 4061 */ 4062 if (bond_mode_can_use_xmit_hash(bond)) 4063 bond_update_slave_arr(bond, NULL); 4064 break; 4065 case NETDEV_CHANGEMTU: 4066 /* TODO: Should slaves be allowed to 4067 * independently alter their MTU? For 4068 * an active-backup bond, slaves need 4069 * not be the same type of device, so 4070 * MTUs may vary. For other modes, 4071 * slaves arguably should have the 4072 * same MTUs. To do this, we'd need to 4073 * take over the slave's change_mtu 4074 * function for the duration of their 4075 * servitude. 4076 */ 4077 break; 4078 case NETDEV_CHANGENAME: 4079 /* we don't care if we don't have primary set */ 4080 if (!bond_uses_primary(bond) || 4081 !bond->params.primary[0]) 4082 break; 4083 4084 if (slave == primary) { 4085 /* slave's name changed - he's no longer primary */ 4086 RCU_INIT_POINTER(bond->primary_slave, NULL); 4087 } else if (!strcmp(slave_dev->name, bond->params.primary)) { 4088 /* we have a new primary slave */ 4089 rcu_assign_pointer(bond->primary_slave, slave); 4090 } else { /* we didn't change primary - exit */ 4091 break; 4092 } 4093 4094 netdev_info(bond->dev, "Primary slave changed to %s, reselecting active slave\n", 4095 primary ? slave_dev->name : "none"); 4096 4097 block_netpoll_tx(); 4098 bond_select_active_slave(bond); 4099 unblock_netpoll_tx(); 4100 break; 4101 case NETDEV_FEAT_CHANGE: 4102 if (!bond->notifier_ctx) { 4103 bond->notifier_ctx = true; 4104 bond_compute_features(bond); 4105 bond->notifier_ctx = false; 4106 } 4107 break; 4108 case NETDEV_RESEND_IGMP: 4109 /* Propagate to master device */ 4110 call_netdevice_notifiers(event, slave->bond->dev); 4111 break; 4112 case NETDEV_XDP_FEAT_CHANGE: 4113 bond_xdp_set_features(bond_dev); 4114 break; 4115 default: 4116 break; 4117 } 4118 4119 return NOTIFY_DONE; 4120 } 4121 4122 /* bond_netdev_event: handle netdev notifier chain events. 4123 * 4124 * This function receives events for the netdev chain. The caller (an 4125 * ioctl handler calling blocking_notifier_call_chain) holds the necessary 4126 * locks for us to safely manipulate the slave devices (RTNL lock, 4127 * dev_probe_lock). 4128 */ 4129 static int bond_netdev_event(struct notifier_block *this, 4130 unsigned long event, void *ptr) 4131 { 4132 struct net_device *event_dev = netdev_notifier_info_to_dev(ptr); 4133 4134 netdev_dbg(event_dev, "%s received %s\n", 4135 __func__, netdev_cmd_to_name(event)); 4136 4137 if (!(event_dev->priv_flags & IFF_BONDING)) 4138 return NOTIFY_DONE; 4139 4140 if (event_dev->flags & IFF_MASTER) { 4141 int ret; 4142 4143 ret = bond_master_netdev_event(event, event_dev); 4144 if (ret != NOTIFY_DONE) 4145 return ret; 4146 } 4147 4148 if (event_dev->flags & IFF_SLAVE) 4149 return bond_slave_netdev_event(event, event_dev); 4150 4151 return NOTIFY_DONE; 4152 } 4153 4154 static struct notifier_block bond_netdev_notifier = { 4155 .notifier_call = bond_netdev_event, 4156 }; 4157 4158 /*---------------------------- Hashing Policies -----------------------------*/ 4159 4160 /* Helper to access data in a packet, with or without a backing skb. 4161 * If skb is given the data is linearized if necessary via pskb_may_pull. 4162 */ 4163 static inline const void *bond_pull_data(struct sk_buff *skb, 4164 const void *data, int hlen, int n) 4165 { 4166 if (likely(n <= hlen)) 4167 return data; 4168 else if (skb && likely(pskb_may_pull(skb, n))) 4169 return skb->data; 4170 4171 return NULL; 4172 } 4173 4174 /* L2 hash helper */ 4175 static inline u32 bond_eth_hash(struct sk_buff *skb, const void *data, int mhoff, int hlen) 4176 { 4177 struct ethhdr *ep; 4178 4179 data = bond_pull_data(skb, data, hlen, mhoff + sizeof(struct ethhdr)); 4180 if (!data) 4181 return 0; 4182 4183 ep = (struct ethhdr *)(data + mhoff); 4184 return ep->h_dest[5] ^ ep->h_source[5] ^ be16_to_cpu(ep->h_proto); 4185 } 4186 4187 static bool bond_flow_ip(struct sk_buff *skb, struct flow_keys *fk, const void *data, 4188 int hlen, __be16 l2_proto, int *nhoff, int *ip_proto, bool l34) 4189 { 4190 const struct ipv6hdr *iph6; 4191 const struct iphdr *iph; 4192 4193 if (l2_proto == htons(ETH_P_IP)) { 4194 data = bond_pull_data(skb, data, hlen, *nhoff + sizeof(*iph)); 4195 if (!data) 4196 return false; 4197 4198 iph = (const struct iphdr *)(data + *nhoff); 4199 iph_to_flow_copy_v4addrs(fk, iph); 4200 *nhoff += iph->ihl << 2; 4201 if (!ip_is_fragment(iph)) 4202 *ip_proto = iph->protocol; 4203 } else if (l2_proto == htons(ETH_P_IPV6)) { 4204 data = bond_pull_data(skb, data, hlen, *nhoff + sizeof(*iph6)); 4205 if (!data) 4206 return false; 4207 4208 iph6 = (const struct ipv6hdr *)(data + *nhoff); 4209 iph_to_flow_copy_v6addrs(fk, iph6); 4210 *nhoff += sizeof(*iph6); 4211 *ip_proto = iph6->nexthdr; 4212 } else { 4213 return false; 4214 } 4215 4216 if (l34 && *ip_proto >= 0) 4217 fk->ports.ports = __skb_flow_get_ports(skb, *nhoff, *ip_proto, data, hlen); 4218 4219 return true; 4220 } 4221 4222 static u32 bond_vlan_srcmac_hash(struct sk_buff *skb, const void *data, int mhoff, int hlen) 4223 { 4224 u32 srcmac_vendor = 0, srcmac_dev = 0; 4225 struct ethhdr *mac_hdr; 4226 u16 vlan = 0; 4227 int i; 4228 4229 data = bond_pull_data(skb, data, hlen, mhoff + sizeof(struct ethhdr)); 4230 if (!data) 4231 return 0; 4232 mac_hdr = (struct ethhdr *)(data + mhoff); 4233 4234 for (i = 0; i < 3; i++) 4235 srcmac_vendor = (srcmac_vendor << 8) | mac_hdr->h_source[i]; 4236 4237 for (i = 3; i < ETH_ALEN; i++) 4238 srcmac_dev = (srcmac_dev << 8) | mac_hdr->h_source[i]; 4239 4240 if (skb && skb_vlan_tag_present(skb)) 4241 vlan = skb_vlan_tag_get(skb); 4242 4243 return vlan ^ srcmac_vendor ^ srcmac_dev; 4244 } 4245 4246 /* Extract the appropriate headers based on bond's xmit policy */ 4247 static bool bond_flow_dissect(struct bonding *bond, struct sk_buff *skb, const void *data, 4248 __be16 l2_proto, int nhoff, int hlen, struct flow_keys *fk) 4249 { 4250 bool l34 = bond->params.xmit_policy == BOND_XMIT_POLICY_LAYER34; 4251 int ip_proto = -1; 4252 4253 switch (bond->params.xmit_policy) { 4254 case BOND_XMIT_POLICY_ENCAP23: 4255 case BOND_XMIT_POLICY_ENCAP34: 4256 memset(fk, 0, sizeof(*fk)); 4257 return __skb_flow_dissect(NULL, skb, &flow_keys_bonding, 4258 fk, data, l2_proto, nhoff, hlen, 0); 4259 default: 4260 break; 4261 } 4262 4263 fk->ports.ports = 0; 4264 memset(&fk->icmp, 0, sizeof(fk->icmp)); 4265 if (!bond_flow_ip(skb, fk, data, hlen, l2_proto, &nhoff, &ip_proto, l34)) 4266 return false; 4267 4268 /* ICMP error packets contains at least 8 bytes of the header 4269 * of the packet which generated the error. Use this information 4270 * to correlate ICMP error packets within the same flow which 4271 * generated the error. 4272 */ 4273 if (ip_proto == IPPROTO_ICMP || ip_proto == IPPROTO_ICMPV6) { 4274 skb_flow_get_icmp_tci(skb, &fk->icmp, data, nhoff, hlen); 4275 if (ip_proto == IPPROTO_ICMP) { 4276 if (!icmp_is_err(fk->icmp.type)) 4277 return true; 4278 4279 nhoff += sizeof(struct icmphdr); 4280 } else if (ip_proto == IPPROTO_ICMPV6) { 4281 if (!icmpv6_is_err(fk->icmp.type)) 4282 return true; 4283 4284 nhoff += sizeof(struct icmp6hdr); 4285 } 4286 return bond_flow_ip(skb, fk, data, hlen, l2_proto, &nhoff, &ip_proto, l34); 4287 } 4288 4289 return true; 4290 } 4291 4292 static u32 bond_ip_hash(u32 hash, struct flow_keys *flow, int xmit_policy) 4293 { 4294 hash ^= (__force u32)flow_get_u32_dst(flow) ^ 4295 (__force u32)flow_get_u32_src(flow); 4296 hash ^= (hash >> 16); 4297 hash ^= (hash >> 8); 4298 4299 /* discard lowest hash bit to deal with the common even ports pattern */ 4300 if (xmit_policy == BOND_XMIT_POLICY_LAYER34 || 4301 xmit_policy == BOND_XMIT_POLICY_ENCAP34) 4302 return hash >> 1; 4303 4304 return hash; 4305 } 4306 4307 /* Generate hash based on xmit policy. If @skb is given it is used to linearize 4308 * the data as required, but this function can be used without it if the data is 4309 * known to be linear (e.g. with xdp_buff). 4310 */ 4311 static u32 __bond_xmit_hash(struct bonding *bond, struct sk_buff *skb, const void *data, 4312 __be16 l2_proto, int mhoff, int nhoff, int hlen) 4313 { 4314 struct flow_keys flow; 4315 u32 hash; 4316 4317 if (bond->params.xmit_policy == BOND_XMIT_POLICY_VLAN_SRCMAC) 4318 return bond_vlan_srcmac_hash(skb, data, mhoff, hlen); 4319 4320 if (bond->params.xmit_policy == BOND_XMIT_POLICY_LAYER2 || 4321 !bond_flow_dissect(bond, skb, data, l2_proto, nhoff, hlen, &flow)) 4322 return bond_eth_hash(skb, data, mhoff, hlen); 4323 4324 if (bond->params.xmit_policy == BOND_XMIT_POLICY_LAYER23 || 4325 bond->params.xmit_policy == BOND_XMIT_POLICY_ENCAP23) { 4326 hash = bond_eth_hash(skb, data, mhoff, hlen); 4327 } else { 4328 if (flow.icmp.id) 4329 memcpy(&hash, &flow.icmp, sizeof(hash)); 4330 else 4331 memcpy(&hash, &flow.ports.ports, sizeof(hash)); 4332 } 4333 4334 return bond_ip_hash(hash, &flow, bond->params.xmit_policy); 4335 } 4336 4337 /** 4338 * bond_xmit_hash - generate a hash value based on the xmit policy 4339 * @bond: bonding device 4340 * @skb: buffer to use for headers 4341 * 4342 * This function will extract the necessary headers from the skb buffer and use 4343 * them to generate a hash based on the xmit_policy set in the bonding device 4344 */ 4345 u32 bond_xmit_hash(struct bonding *bond, struct sk_buff *skb) 4346 { 4347 if (bond->params.xmit_policy == BOND_XMIT_POLICY_ENCAP34 && 4348 skb->l4_hash) 4349 return skb->hash; 4350 4351 return __bond_xmit_hash(bond, skb, skb->data, skb->protocol, 4352 0, skb_network_offset(skb), 4353 skb_headlen(skb)); 4354 } 4355 4356 /** 4357 * bond_xmit_hash_xdp - generate a hash value based on the xmit policy 4358 * @bond: bonding device 4359 * @xdp: buffer to use for headers 4360 * 4361 * The XDP variant of bond_xmit_hash. 4362 */ 4363 static u32 bond_xmit_hash_xdp(struct bonding *bond, struct xdp_buff *xdp) 4364 { 4365 struct ethhdr *eth; 4366 4367 if (xdp->data + sizeof(struct ethhdr) > xdp->data_end) 4368 return 0; 4369 4370 eth = (struct ethhdr *)xdp->data; 4371 4372 return __bond_xmit_hash(bond, NULL, xdp->data, eth->h_proto, 0, 4373 sizeof(struct ethhdr), xdp->data_end - xdp->data); 4374 } 4375 4376 /*-------------------------- Device entry points ----------------------------*/ 4377 4378 void bond_work_init_all(struct bonding *bond) 4379 { 4380 INIT_DELAYED_WORK(&bond->mcast_work, 4381 bond_resend_igmp_join_requests_delayed); 4382 INIT_DELAYED_WORK(&bond->alb_work, bond_alb_monitor); 4383 INIT_DELAYED_WORK(&bond->mii_work, bond_mii_monitor); 4384 INIT_DELAYED_WORK(&bond->arp_work, bond_arp_monitor); 4385 INIT_DELAYED_WORK(&bond->ad_work, bond_3ad_state_machine_handler); 4386 INIT_DELAYED_WORK(&bond->slave_arr_work, bond_slave_arr_handler); 4387 } 4388 4389 static void bond_work_cancel_all(struct bonding *bond) 4390 { 4391 cancel_delayed_work_sync(&bond->mii_work); 4392 cancel_delayed_work_sync(&bond->arp_work); 4393 cancel_delayed_work_sync(&bond->alb_work); 4394 cancel_delayed_work_sync(&bond->ad_work); 4395 cancel_delayed_work_sync(&bond->mcast_work); 4396 cancel_delayed_work_sync(&bond->slave_arr_work); 4397 } 4398 4399 static int bond_open(struct net_device *bond_dev) 4400 { 4401 struct bonding *bond = netdev_priv(bond_dev); 4402 struct list_head *iter; 4403 struct slave *slave; 4404 4405 if (BOND_MODE(bond) == BOND_MODE_ROUNDROBIN && !bond->rr_tx_counter) { 4406 bond->rr_tx_counter = alloc_percpu(u32); 4407 if (!bond->rr_tx_counter) 4408 return -ENOMEM; 4409 } 4410 4411 /* reset slave->backup and slave->inactive */ 4412 if (bond_has_slaves(bond)) { 4413 bond_for_each_slave(bond, slave, iter) { 4414 if (bond_uses_primary(bond) && 4415 slave != rcu_access_pointer(bond->curr_active_slave)) { 4416 bond_set_slave_inactive_flags(slave, 4417 BOND_SLAVE_NOTIFY_NOW); 4418 } else if (BOND_MODE(bond) != BOND_MODE_8023AD) { 4419 bond_set_slave_active_flags(slave, 4420 BOND_SLAVE_NOTIFY_NOW); 4421 } 4422 } 4423 } 4424 4425 if (bond_is_lb(bond)) { 4426 /* bond_alb_initialize must be called before the timer 4427 * is started. 4428 */ 4429 if (bond_alb_initialize(bond, (BOND_MODE(bond) == BOND_MODE_ALB))) 4430 return -ENOMEM; 4431 if (bond->params.tlb_dynamic_lb || BOND_MODE(bond) == BOND_MODE_ALB) 4432 queue_delayed_work(bond->wq, &bond->alb_work, 0); 4433 } 4434 4435 if (bond->params.miimon) /* link check interval, in milliseconds. */ 4436 queue_delayed_work(bond->wq, &bond->mii_work, 0); 4437 4438 if (bond->params.arp_interval) { /* arp interval, in milliseconds. */ 4439 queue_delayed_work(bond->wq, &bond->arp_work, 0); 4440 bond->recv_probe = bond_rcv_validate; 4441 } 4442 4443 if (BOND_MODE(bond) == BOND_MODE_8023AD) { 4444 queue_delayed_work(bond->wq, &bond->ad_work, 0); 4445 /* register to receive LACPDUs */ 4446 bond->recv_probe = bond_3ad_lacpdu_recv; 4447 bond_3ad_initiate_agg_selection(bond, 1); 4448 4449 bond_for_each_slave(bond, slave, iter) 4450 dev_mc_add(slave->dev, lacpdu_mcast_addr); 4451 } 4452 4453 if (bond_mode_can_use_xmit_hash(bond)) 4454 bond_update_slave_arr(bond, NULL); 4455 4456 return 0; 4457 } 4458 4459 static int bond_close(struct net_device *bond_dev) 4460 { 4461 struct bonding *bond = netdev_priv(bond_dev); 4462 struct slave *slave; 4463 4464 bond_work_cancel_all(bond); 4465 bond->send_peer_notif = 0; 4466 if (bond_is_lb(bond)) 4467 bond_alb_deinitialize(bond); 4468 bond->recv_probe = NULL; 4469 4470 if (bond_uses_primary(bond)) { 4471 rcu_read_lock(); 4472 slave = rcu_dereference(bond->curr_active_slave); 4473 if (slave) 4474 bond_hw_addr_flush(bond_dev, slave->dev); 4475 rcu_read_unlock(); 4476 } else { 4477 struct list_head *iter; 4478 4479 bond_for_each_slave(bond, slave, iter) 4480 bond_hw_addr_flush(bond_dev, slave->dev); 4481 } 4482 4483 return 0; 4484 } 4485 4486 /* fold stats, assuming all rtnl_link_stats64 fields are u64, but 4487 * that some drivers can provide 32bit values only. 4488 */ 4489 static void bond_fold_stats(struct rtnl_link_stats64 *_res, 4490 const struct rtnl_link_stats64 *_new, 4491 const struct rtnl_link_stats64 *_old) 4492 { 4493 const u64 *new = (const u64 *)_new; 4494 const u64 *old = (const u64 *)_old; 4495 u64 *res = (u64 *)_res; 4496 int i; 4497 4498 for (i = 0; i < sizeof(*_res) / sizeof(u64); i++) { 4499 u64 nv = new[i]; 4500 u64 ov = old[i]; 4501 s64 delta = nv - ov; 4502 4503 /* detects if this particular field is 32bit only */ 4504 if (((nv | ov) >> 32) == 0) 4505 delta = (s64)(s32)((u32)nv - (u32)ov); 4506 4507 /* filter anomalies, some drivers reset their stats 4508 * at down/up events. 4509 */ 4510 if (delta > 0) 4511 res[i] += delta; 4512 } 4513 } 4514 4515 #ifdef CONFIG_LOCKDEP 4516 static int bond_get_lowest_level_rcu(struct net_device *dev) 4517 { 4518 struct net_device *ldev, *next, *now, *dev_stack[MAX_NEST_DEV + 1]; 4519 struct list_head *niter, *iter, *iter_stack[MAX_NEST_DEV + 1]; 4520 int cur = 0, max = 0; 4521 4522 now = dev; 4523 iter = &dev->adj_list.lower; 4524 4525 while (1) { 4526 next = NULL; 4527 while (1) { 4528 ldev = netdev_next_lower_dev_rcu(now, &iter); 4529 if (!ldev) 4530 break; 4531 4532 next = ldev; 4533 niter = &ldev->adj_list.lower; 4534 dev_stack[cur] = now; 4535 iter_stack[cur++] = iter; 4536 if (max <= cur) 4537 max = cur; 4538 break; 4539 } 4540 4541 if (!next) { 4542 if (!cur) 4543 return max; 4544 next = dev_stack[--cur]; 4545 niter = iter_stack[cur]; 4546 } 4547 4548 now = next; 4549 iter = niter; 4550 } 4551 4552 return max; 4553 } 4554 #endif 4555 4556 static void bond_get_stats(struct net_device *bond_dev, 4557 struct rtnl_link_stats64 *stats) 4558 { 4559 struct bonding *bond = netdev_priv(bond_dev); 4560 struct rtnl_link_stats64 temp; 4561 struct list_head *iter; 4562 struct slave *slave; 4563 int nest_level = 0; 4564 4565 4566 rcu_read_lock(); 4567 #ifdef CONFIG_LOCKDEP 4568 nest_level = bond_get_lowest_level_rcu(bond_dev); 4569 #endif 4570 4571 spin_lock_nested(&bond->stats_lock, nest_level); 4572 memcpy(stats, &bond->bond_stats, sizeof(*stats)); 4573 4574 bond_for_each_slave_rcu(bond, slave, iter) { 4575 const struct rtnl_link_stats64 *new = 4576 dev_get_stats(slave->dev, &temp); 4577 4578 bond_fold_stats(stats, new, &slave->slave_stats); 4579 4580 /* save off the slave stats for the next run */ 4581 memcpy(&slave->slave_stats, new, sizeof(*new)); 4582 } 4583 4584 memcpy(&bond->bond_stats, stats, sizeof(*stats)); 4585 spin_unlock(&bond->stats_lock); 4586 rcu_read_unlock(); 4587 } 4588 4589 static int bond_eth_ioctl(struct net_device *bond_dev, struct ifreq *ifr, int cmd) 4590 { 4591 struct bonding *bond = netdev_priv(bond_dev); 4592 struct mii_ioctl_data *mii = NULL; 4593 4594 netdev_dbg(bond_dev, "bond_eth_ioctl: cmd=%d\n", cmd); 4595 4596 switch (cmd) { 4597 case SIOCGMIIPHY: 4598 mii = if_mii(ifr); 4599 if (!mii) 4600 return -EINVAL; 4601 4602 mii->phy_id = 0; 4603 fallthrough; 4604 case SIOCGMIIREG: 4605 /* We do this again just in case we were called by SIOCGMIIREG 4606 * instead of SIOCGMIIPHY. 4607 */ 4608 mii = if_mii(ifr); 4609 if (!mii) 4610 return -EINVAL; 4611 4612 if (mii->reg_num == 1) { 4613 mii->val_out = 0; 4614 if (netif_carrier_ok(bond->dev)) 4615 mii->val_out = BMSR_LSTATUS; 4616 } 4617 4618 break; 4619 default: 4620 return -EOPNOTSUPP; 4621 } 4622 4623 return 0; 4624 } 4625 4626 static int bond_do_ioctl(struct net_device *bond_dev, struct ifreq *ifr, int cmd) 4627 { 4628 struct bonding *bond = netdev_priv(bond_dev); 4629 struct net_device *slave_dev = NULL; 4630 struct ifbond k_binfo; 4631 struct ifbond __user *u_binfo = NULL; 4632 struct ifslave k_sinfo; 4633 struct ifslave __user *u_sinfo = NULL; 4634 struct bond_opt_value newval; 4635 struct net *net; 4636 int res = 0; 4637 4638 netdev_dbg(bond_dev, "bond_ioctl: cmd=%d\n", cmd); 4639 4640 switch (cmd) { 4641 case SIOCBONDINFOQUERY: 4642 u_binfo = (struct ifbond __user *)ifr->ifr_data; 4643 4644 if (copy_from_user(&k_binfo, u_binfo, sizeof(ifbond))) 4645 return -EFAULT; 4646 4647 bond_info_query(bond_dev, &k_binfo); 4648 if (copy_to_user(u_binfo, &k_binfo, sizeof(ifbond))) 4649 return -EFAULT; 4650 4651 return 0; 4652 case SIOCBONDSLAVEINFOQUERY: 4653 u_sinfo = (struct ifslave __user *)ifr->ifr_data; 4654 4655 if (copy_from_user(&k_sinfo, u_sinfo, sizeof(ifslave))) 4656 return -EFAULT; 4657 4658 res = bond_slave_info_query(bond_dev, &k_sinfo); 4659 if (res == 0 && 4660 copy_to_user(u_sinfo, &k_sinfo, sizeof(ifslave))) 4661 return -EFAULT; 4662 4663 return res; 4664 default: 4665 break; 4666 } 4667 4668 net = dev_net(bond_dev); 4669 4670 if (!ns_capable(net->user_ns, CAP_NET_ADMIN)) 4671 return -EPERM; 4672 4673 slave_dev = __dev_get_by_name(net, ifr->ifr_slave); 4674 4675 slave_dbg(bond_dev, slave_dev, "slave_dev=%p:\n", slave_dev); 4676 4677 if (!slave_dev) 4678 return -ENODEV; 4679 4680 switch (cmd) { 4681 case SIOCBONDENSLAVE: 4682 res = bond_enslave(bond_dev, slave_dev, NULL); 4683 break; 4684 case SIOCBONDRELEASE: 4685 res = bond_release(bond_dev, slave_dev); 4686 break; 4687 case SIOCBONDSETHWADDR: 4688 res = bond_set_dev_addr(bond_dev, slave_dev); 4689 break; 4690 case SIOCBONDCHANGEACTIVE: 4691 bond_opt_initstr(&newval, slave_dev->name); 4692 res = __bond_opt_set_notify(bond, BOND_OPT_ACTIVE_SLAVE, 4693 &newval); 4694 break; 4695 default: 4696 res = -EOPNOTSUPP; 4697 } 4698 4699 return res; 4700 } 4701 4702 static int bond_siocdevprivate(struct net_device *bond_dev, struct ifreq *ifr, 4703 void __user *data, int cmd) 4704 { 4705 struct ifreq ifrdata = { .ifr_data = data }; 4706 4707 switch (cmd) { 4708 case BOND_INFO_QUERY_OLD: 4709 return bond_do_ioctl(bond_dev, &ifrdata, SIOCBONDINFOQUERY); 4710 case BOND_SLAVE_INFO_QUERY_OLD: 4711 return bond_do_ioctl(bond_dev, &ifrdata, SIOCBONDSLAVEINFOQUERY); 4712 case BOND_ENSLAVE_OLD: 4713 return bond_do_ioctl(bond_dev, ifr, SIOCBONDENSLAVE); 4714 case BOND_RELEASE_OLD: 4715 return bond_do_ioctl(bond_dev, ifr, SIOCBONDRELEASE); 4716 case BOND_SETHWADDR_OLD: 4717 return bond_do_ioctl(bond_dev, ifr, SIOCBONDSETHWADDR); 4718 case BOND_CHANGE_ACTIVE_OLD: 4719 return bond_do_ioctl(bond_dev, ifr, SIOCBONDCHANGEACTIVE); 4720 } 4721 4722 return -EOPNOTSUPP; 4723 } 4724 4725 static void bond_change_rx_flags(struct net_device *bond_dev, int change) 4726 { 4727 struct bonding *bond = netdev_priv(bond_dev); 4728 4729 if (change & IFF_PROMISC) 4730 bond_set_promiscuity(bond, 4731 bond_dev->flags & IFF_PROMISC ? 1 : -1); 4732 4733 if (change & IFF_ALLMULTI) 4734 bond_set_allmulti(bond, 4735 bond_dev->flags & IFF_ALLMULTI ? 1 : -1); 4736 } 4737 4738 static void bond_set_rx_mode(struct net_device *bond_dev) 4739 { 4740 struct bonding *bond = netdev_priv(bond_dev); 4741 struct list_head *iter; 4742 struct slave *slave; 4743 4744 rcu_read_lock(); 4745 if (bond_uses_primary(bond)) { 4746 slave = rcu_dereference(bond->curr_active_slave); 4747 if (slave) { 4748 dev_uc_sync(slave->dev, bond_dev); 4749 dev_mc_sync(slave->dev, bond_dev); 4750 } 4751 } else { 4752 bond_for_each_slave_rcu(bond, slave, iter) { 4753 dev_uc_sync_multiple(slave->dev, bond_dev); 4754 dev_mc_sync_multiple(slave->dev, bond_dev); 4755 } 4756 } 4757 rcu_read_unlock(); 4758 } 4759 4760 static int bond_neigh_init(struct neighbour *n) 4761 { 4762 struct bonding *bond = netdev_priv(n->dev); 4763 const struct net_device_ops *slave_ops; 4764 struct neigh_parms parms; 4765 struct slave *slave; 4766 int ret = 0; 4767 4768 rcu_read_lock(); 4769 slave = bond_first_slave_rcu(bond); 4770 if (!slave) 4771 goto out; 4772 slave_ops = slave->dev->netdev_ops; 4773 if (!slave_ops->ndo_neigh_setup) 4774 goto out; 4775 4776 /* TODO: find another way [1] to implement this. 4777 * Passing a zeroed structure is fragile, 4778 * but at least we do not pass garbage. 4779 * 4780 * [1] One way would be that ndo_neigh_setup() never touch 4781 * struct neigh_parms, but propagate the new neigh_setup() 4782 * back to ___neigh_create() / neigh_parms_alloc() 4783 */ 4784 memset(&parms, 0, sizeof(parms)); 4785 ret = slave_ops->ndo_neigh_setup(slave->dev, &parms); 4786 4787 if (ret) 4788 goto out; 4789 4790 if (parms.neigh_setup) 4791 ret = parms.neigh_setup(n); 4792 out: 4793 rcu_read_unlock(); 4794 return ret; 4795 } 4796 4797 /* The bonding ndo_neigh_setup is called at init time beofre any 4798 * slave exists. So we must declare proxy setup function which will 4799 * be used at run time to resolve the actual slave neigh param setup. 4800 * 4801 * It's also called by master devices (such as vlans) to setup their 4802 * underlying devices. In that case - do nothing, we're already set up from 4803 * our init. 4804 */ 4805 static int bond_neigh_setup(struct net_device *dev, 4806 struct neigh_parms *parms) 4807 { 4808 /* modify only our neigh_parms */ 4809 if (parms->dev == dev) 4810 parms->neigh_setup = bond_neigh_init; 4811 4812 return 0; 4813 } 4814 4815 /* Change the MTU of all of a master's slaves to match the master */ 4816 static int bond_change_mtu(struct net_device *bond_dev, int new_mtu) 4817 { 4818 struct bonding *bond = netdev_priv(bond_dev); 4819 struct slave *slave, *rollback_slave; 4820 struct list_head *iter; 4821 int res = 0; 4822 4823 netdev_dbg(bond_dev, "bond=%p, new_mtu=%d\n", bond, new_mtu); 4824 4825 bond_for_each_slave(bond, slave, iter) { 4826 slave_dbg(bond_dev, slave->dev, "s %p c_m %p\n", 4827 slave, slave->dev->netdev_ops->ndo_change_mtu); 4828 4829 res = dev_set_mtu(slave->dev, new_mtu); 4830 4831 if (res) { 4832 /* If we failed to set the slave's mtu to the new value 4833 * we must abort the operation even in ACTIVE_BACKUP 4834 * mode, because if we allow the backup slaves to have 4835 * different mtu values than the active slave we'll 4836 * need to change their mtu when doing a failover. That 4837 * means changing their mtu from timer context, which 4838 * is probably not a good idea. 4839 */ 4840 slave_dbg(bond_dev, slave->dev, "err %d setting mtu to %d\n", 4841 res, new_mtu); 4842 goto unwind; 4843 } 4844 } 4845 4846 WRITE_ONCE(bond_dev->mtu, new_mtu); 4847 4848 return 0; 4849 4850 unwind: 4851 /* unwind from head to the slave that failed */ 4852 bond_for_each_slave(bond, rollback_slave, iter) { 4853 int tmp_res; 4854 4855 if (rollback_slave == slave) 4856 break; 4857 4858 tmp_res = dev_set_mtu(rollback_slave->dev, bond_dev->mtu); 4859 if (tmp_res) 4860 slave_dbg(bond_dev, rollback_slave->dev, "unwind err %d\n", 4861 tmp_res); 4862 } 4863 4864 return res; 4865 } 4866 4867 /* Change HW address 4868 * 4869 * Note that many devices must be down to change the HW address, and 4870 * downing the master releases all slaves. We can make bonds full of 4871 * bonding devices to test this, however. 4872 */ 4873 static int bond_set_mac_address(struct net_device *bond_dev, void *addr) 4874 { 4875 struct bonding *bond = netdev_priv(bond_dev); 4876 struct slave *slave, *rollback_slave; 4877 struct sockaddr_storage *ss = addr, tmp_ss; 4878 struct list_head *iter; 4879 int res = 0; 4880 4881 if (BOND_MODE(bond) == BOND_MODE_ALB) 4882 return bond_alb_set_mac_address(bond_dev, addr); 4883 4884 4885 netdev_dbg(bond_dev, "%s: bond=%p\n", __func__, bond); 4886 4887 /* If fail_over_mac is enabled, do nothing and return success. 4888 * Returning an error causes ifenslave to fail. 4889 */ 4890 if (bond->params.fail_over_mac && 4891 BOND_MODE(bond) == BOND_MODE_ACTIVEBACKUP) 4892 return 0; 4893 4894 if (!is_valid_ether_addr(ss->__data)) 4895 return -EADDRNOTAVAIL; 4896 4897 bond_for_each_slave(bond, slave, iter) { 4898 slave_dbg(bond_dev, slave->dev, "%s: slave=%p\n", 4899 __func__, slave); 4900 res = dev_set_mac_address(slave->dev, addr, NULL); 4901 if (res) { 4902 /* TODO: consider downing the slave 4903 * and retry ? 4904 * User should expect communications 4905 * breakage anyway until ARP finish 4906 * updating, so... 4907 */ 4908 slave_dbg(bond_dev, slave->dev, "%s: err %d\n", 4909 __func__, res); 4910 goto unwind; 4911 } 4912 } 4913 4914 /* success */ 4915 dev_addr_set(bond_dev, ss->__data); 4916 return 0; 4917 4918 unwind: 4919 memcpy(tmp_ss.__data, bond_dev->dev_addr, bond_dev->addr_len); 4920 tmp_ss.ss_family = bond_dev->type; 4921 4922 /* unwind from head to the slave that failed */ 4923 bond_for_each_slave(bond, rollback_slave, iter) { 4924 int tmp_res; 4925 4926 if (rollback_slave == slave) 4927 break; 4928 4929 tmp_res = dev_set_mac_address(rollback_slave->dev, 4930 (struct sockaddr *)&tmp_ss, NULL); 4931 if (tmp_res) { 4932 slave_dbg(bond_dev, rollback_slave->dev, "%s: unwind err %d\n", 4933 __func__, tmp_res); 4934 } 4935 } 4936 4937 return res; 4938 } 4939 4940 /** 4941 * bond_get_slave_by_id - get xmit slave with slave_id 4942 * @bond: bonding device that is transmitting 4943 * @slave_id: slave id up to slave_cnt-1 through which to transmit 4944 * 4945 * This function tries to get slave with slave_id but in case 4946 * it fails, it tries to find the first available slave for transmission. 4947 */ 4948 static struct slave *bond_get_slave_by_id(struct bonding *bond, 4949 int slave_id) 4950 { 4951 struct list_head *iter; 4952 struct slave *slave; 4953 int i = slave_id; 4954 4955 /* Here we start from the slave with slave_id */ 4956 bond_for_each_slave_rcu(bond, slave, iter) { 4957 if (--i < 0) { 4958 if (bond_slave_can_tx(slave)) 4959 return slave; 4960 } 4961 } 4962 4963 /* Here we start from the first slave up to slave_id */ 4964 i = slave_id; 4965 bond_for_each_slave_rcu(bond, slave, iter) { 4966 if (--i < 0) 4967 break; 4968 if (bond_slave_can_tx(slave)) 4969 return slave; 4970 } 4971 /* no slave that can tx has been found */ 4972 return NULL; 4973 } 4974 4975 /** 4976 * bond_rr_gen_slave_id - generate slave id based on packets_per_slave 4977 * @bond: bonding device to use 4978 * 4979 * Based on the value of the bonding device's packets_per_slave parameter 4980 * this function generates a slave id, which is usually used as the next 4981 * slave to transmit through. 4982 */ 4983 static u32 bond_rr_gen_slave_id(struct bonding *bond) 4984 { 4985 u32 slave_id; 4986 struct reciprocal_value reciprocal_packets_per_slave; 4987 int packets_per_slave = bond->params.packets_per_slave; 4988 4989 switch (packets_per_slave) { 4990 case 0: 4991 slave_id = get_random_u32(); 4992 break; 4993 case 1: 4994 slave_id = this_cpu_inc_return(*bond->rr_tx_counter); 4995 break; 4996 default: 4997 reciprocal_packets_per_slave = 4998 bond->params.reciprocal_packets_per_slave; 4999 slave_id = this_cpu_inc_return(*bond->rr_tx_counter); 5000 slave_id = reciprocal_divide(slave_id, 5001 reciprocal_packets_per_slave); 5002 break; 5003 } 5004 5005 return slave_id; 5006 } 5007 5008 static struct slave *bond_xmit_roundrobin_slave_get(struct bonding *bond, 5009 struct sk_buff *skb) 5010 { 5011 struct slave *slave; 5012 int slave_cnt; 5013 u32 slave_id; 5014 5015 /* Start with the curr_active_slave that joined the bond as the 5016 * default for sending IGMP traffic. For failover purposes one 5017 * needs to maintain some consistency for the interface that will 5018 * send the join/membership reports. The curr_active_slave found 5019 * will send all of this type of traffic. 5020 */ 5021 if (skb->protocol == htons(ETH_P_IP)) { 5022 int noff = skb_network_offset(skb); 5023 struct iphdr *iph; 5024 5025 if (unlikely(!pskb_may_pull(skb, noff + sizeof(*iph)))) 5026 goto non_igmp; 5027 5028 iph = ip_hdr(skb); 5029 if (iph->protocol == IPPROTO_IGMP) { 5030 slave = rcu_dereference(bond->curr_active_slave); 5031 if (slave) 5032 return slave; 5033 return bond_get_slave_by_id(bond, 0); 5034 } 5035 } 5036 5037 non_igmp: 5038 slave_cnt = READ_ONCE(bond->slave_cnt); 5039 if (likely(slave_cnt)) { 5040 slave_id = bond_rr_gen_slave_id(bond) % slave_cnt; 5041 return bond_get_slave_by_id(bond, slave_id); 5042 } 5043 return NULL; 5044 } 5045 5046 static struct slave *bond_xdp_xmit_roundrobin_slave_get(struct bonding *bond, 5047 struct xdp_buff *xdp) 5048 { 5049 struct slave *slave; 5050 int slave_cnt; 5051 u32 slave_id; 5052 const struct ethhdr *eth; 5053 void *data = xdp->data; 5054 5055 if (data + sizeof(struct ethhdr) > xdp->data_end) 5056 goto non_igmp; 5057 5058 eth = (struct ethhdr *)data; 5059 data += sizeof(struct ethhdr); 5060 5061 /* See comment on IGMP in bond_xmit_roundrobin_slave_get() */ 5062 if (eth->h_proto == htons(ETH_P_IP)) { 5063 const struct iphdr *iph; 5064 5065 if (data + sizeof(struct iphdr) > xdp->data_end) 5066 goto non_igmp; 5067 5068 iph = (struct iphdr *)data; 5069 5070 if (iph->protocol == IPPROTO_IGMP) { 5071 slave = rcu_dereference(bond->curr_active_slave); 5072 if (slave) 5073 return slave; 5074 return bond_get_slave_by_id(bond, 0); 5075 } 5076 } 5077 5078 non_igmp: 5079 slave_cnt = READ_ONCE(bond->slave_cnt); 5080 if (likely(slave_cnt)) { 5081 slave_id = bond_rr_gen_slave_id(bond) % slave_cnt; 5082 return bond_get_slave_by_id(bond, slave_id); 5083 } 5084 return NULL; 5085 } 5086 5087 static netdev_tx_t bond_xmit_roundrobin(struct sk_buff *skb, 5088 struct net_device *bond_dev) 5089 { 5090 struct bonding *bond = netdev_priv(bond_dev); 5091 struct slave *slave; 5092 5093 slave = bond_xmit_roundrobin_slave_get(bond, skb); 5094 if (likely(slave)) 5095 return bond_dev_queue_xmit(bond, skb, slave->dev); 5096 5097 return bond_tx_drop(bond_dev, skb); 5098 } 5099 5100 static struct slave *bond_xmit_activebackup_slave_get(struct bonding *bond) 5101 { 5102 return rcu_dereference(bond->curr_active_slave); 5103 } 5104 5105 /* In active-backup mode, we know that bond->curr_active_slave is always valid if 5106 * the bond has a usable interface. 5107 */ 5108 static netdev_tx_t bond_xmit_activebackup(struct sk_buff *skb, 5109 struct net_device *bond_dev) 5110 { 5111 struct bonding *bond = netdev_priv(bond_dev); 5112 struct slave *slave; 5113 5114 slave = bond_xmit_activebackup_slave_get(bond); 5115 if (slave) 5116 return bond_dev_queue_xmit(bond, skb, slave->dev); 5117 5118 return bond_tx_drop(bond_dev, skb); 5119 } 5120 5121 /* Use this to update slave_array when (a) it's not appropriate to update 5122 * slave_array right away (note that update_slave_array() may sleep) 5123 * and / or (b) RTNL is not held. 5124 */ 5125 void bond_slave_arr_work_rearm(struct bonding *bond, unsigned long delay) 5126 { 5127 queue_delayed_work(bond->wq, &bond->slave_arr_work, delay); 5128 } 5129 5130 /* Slave array work handler. Holds only RTNL */ 5131 static void bond_slave_arr_handler(struct work_struct *work) 5132 { 5133 struct bonding *bond = container_of(work, struct bonding, 5134 slave_arr_work.work); 5135 int ret; 5136 5137 if (!rtnl_trylock()) 5138 goto err; 5139 5140 ret = bond_update_slave_arr(bond, NULL); 5141 rtnl_unlock(); 5142 if (ret) { 5143 pr_warn_ratelimited("Failed to update slave array from WT\n"); 5144 goto err; 5145 } 5146 return; 5147 5148 err: 5149 bond_slave_arr_work_rearm(bond, 1); 5150 } 5151 5152 static void bond_skip_slave(struct bond_up_slave *slaves, 5153 struct slave *skipslave) 5154 { 5155 int idx; 5156 5157 /* Rare situation where caller has asked to skip a specific 5158 * slave but allocation failed (most likely!). BTW this is 5159 * only possible when the call is initiated from 5160 * __bond_release_one(). In this situation; overwrite the 5161 * skipslave entry in the array with the last entry from the 5162 * array to avoid a situation where the xmit path may choose 5163 * this to-be-skipped slave to send a packet out. 5164 */ 5165 for (idx = 0; slaves && idx < slaves->count; idx++) { 5166 if (skipslave == slaves->arr[idx]) { 5167 slaves->arr[idx] = 5168 slaves->arr[slaves->count - 1]; 5169 slaves->count--; 5170 break; 5171 } 5172 } 5173 } 5174 5175 static void bond_set_slave_arr(struct bonding *bond, 5176 struct bond_up_slave *usable_slaves, 5177 struct bond_up_slave *all_slaves) 5178 { 5179 struct bond_up_slave *usable, *all; 5180 5181 usable = rtnl_dereference(bond->usable_slaves); 5182 rcu_assign_pointer(bond->usable_slaves, usable_slaves); 5183 kfree_rcu(usable, rcu); 5184 5185 all = rtnl_dereference(bond->all_slaves); 5186 rcu_assign_pointer(bond->all_slaves, all_slaves); 5187 kfree_rcu(all, rcu); 5188 } 5189 5190 static void bond_reset_slave_arr(struct bonding *bond) 5191 { 5192 bond_set_slave_arr(bond, NULL, NULL); 5193 } 5194 5195 /* Build the usable slaves array in control path for modes that use xmit-hash 5196 * to determine the slave interface - 5197 * (a) BOND_MODE_8023AD 5198 * (b) BOND_MODE_XOR 5199 * (c) (BOND_MODE_TLB || BOND_MODE_ALB) && tlb_dynamic_lb == 0 5200 * 5201 * The caller is expected to hold RTNL only and NO other lock! 5202 */ 5203 int bond_update_slave_arr(struct bonding *bond, struct slave *skipslave) 5204 { 5205 struct bond_up_slave *usable_slaves = NULL, *all_slaves = NULL; 5206 struct slave *slave; 5207 struct list_head *iter; 5208 int agg_id = 0; 5209 int ret = 0; 5210 5211 might_sleep(); 5212 5213 usable_slaves = kzalloc(struct_size(usable_slaves, arr, 5214 bond->slave_cnt), GFP_KERNEL); 5215 all_slaves = kzalloc(struct_size(all_slaves, arr, 5216 bond->slave_cnt), GFP_KERNEL); 5217 if (!usable_slaves || !all_slaves) { 5218 ret = -ENOMEM; 5219 goto out; 5220 } 5221 if (BOND_MODE(bond) == BOND_MODE_8023AD) { 5222 struct ad_info ad_info; 5223 5224 spin_lock_bh(&bond->mode_lock); 5225 if (bond_3ad_get_active_agg_info(bond, &ad_info)) { 5226 spin_unlock_bh(&bond->mode_lock); 5227 pr_debug("bond_3ad_get_active_agg_info failed\n"); 5228 /* No active aggragator means it's not safe to use 5229 * the previous array. 5230 */ 5231 bond_reset_slave_arr(bond); 5232 goto out; 5233 } 5234 spin_unlock_bh(&bond->mode_lock); 5235 agg_id = ad_info.aggregator_id; 5236 } 5237 bond_for_each_slave(bond, slave, iter) { 5238 if (skipslave == slave) 5239 continue; 5240 5241 all_slaves->arr[all_slaves->count++] = slave; 5242 if (BOND_MODE(bond) == BOND_MODE_8023AD) { 5243 struct aggregator *agg; 5244 5245 agg = SLAVE_AD_INFO(slave)->port.aggregator; 5246 if (!agg || agg->aggregator_identifier != agg_id) 5247 continue; 5248 } 5249 if (!bond_slave_can_tx(slave)) 5250 continue; 5251 5252 slave_dbg(bond->dev, slave->dev, "Adding slave to tx hash array[%d]\n", 5253 usable_slaves->count); 5254 5255 usable_slaves->arr[usable_slaves->count++] = slave; 5256 } 5257 5258 bond_set_slave_arr(bond, usable_slaves, all_slaves); 5259 return ret; 5260 out: 5261 if (ret != 0 && skipslave) { 5262 bond_skip_slave(rtnl_dereference(bond->all_slaves), 5263 skipslave); 5264 bond_skip_slave(rtnl_dereference(bond->usable_slaves), 5265 skipslave); 5266 } 5267 kfree_rcu(all_slaves, rcu); 5268 kfree_rcu(usable_slaves, rcu); 5269 5270 return ret; 5271 } 5272 5273 static struct slave *bond_xmit_3ad_xor_slave_get(struct bonding *bond, 5274 struct sk_buff *skb, 5275 struct bond_up_slave *slaves) 5276 { 5277 struct slave *slave; 5278 unsigned int count; 5279 u32 hash; 5280 5281 hash = bond_xmit_hash(bond, skb); 5282 count = slaves ? READ_ONCE(slaves->count) : 0; 5283 if (unlikely(!count)) 5284 return NULL; 5285 5286 slave = slaves->arr[hash % count]; 5287 return slave; 5288 } 5289 5290 static struct slave *bond_xdp_xmit_3ad_xor_slave_get(struct bonding *bond, 5291 struct xdp_buff *xdp) 5292 { 5293 struct bond_up_slave *slaves; 5294 unsigned int count; 5295 u32 hash; 5296 5297 hash = bond_xmit_hash_xdp(bond, xdp); 5298 slaves = rcu_dereference(bond->usable_slaves); 5299 count = slaves ? READ_ONCE(slaves->count) : 0; 5300 if (unlikely(!count)) 5301 return NULL; 5302 5303 return slaves->arr[hash % count]; 5304 } 5305 5306 /* Use this Xmit function for 3AD as well as XOR modes. The current 5307 * usable slave array is formed in the control path. The xmit function 5308 * just calculates hash and sends the packet out. 5309 */ 5310 static netdev_tx_t bond_3ad_xor_xmit(struct sk_buff *skb, 5311 struct net_device *dev) 5312 { 5313 struct bonding *bond = netdev_priv(dev); 5314 struct bond_up_slave *slaves; 5315 struct slave *slave; 5316 5317 slaves = rcu_dereference(bond->usable_slaves); 5318 slave = bond_xmit_3ad_xor_slave_get(bond, skb, slaves); 5319 if (likely(slave)) 5320 return bond_dev_queue_xmit(bond, skb, slave->dev); 5321 5322 return bond_tx_drop(dev, skb); 5323 } 5324 5325 /* in broadcast mode, we send everything to all usable interfaces. */ 5326 static netdev_tx_t bond_xmit_broadcast(struct sk_buff *skb, 5327 struct net_device *bond_dev) 5328 { 5329 struct bonding *bond = netdev_priv(bond_dev); 5330 struct slave *slave = NULL; 5331 struct list_head *iter; 5332 bool xmit_suc = false; 5333 bool skb_used = false; 5334 5335 bond_for_each_slave_rcu(bond, slave, iter) { 5336 struct sk_buff *skb2; 5337 5338 if (!(bond_slave_is_up(slave) && slave->link == BOND_LINK_UP)) 5339 continue; 5340 5341 if (bond_is_last_slave(bond, slave)) { 5342 skb2 = skb; 5343 skb_used = true; 5344 } else { 5345 skb2 = skb_clone(skb, GFP_ATOMIC); 5346 if (!skb2) { 5347 net_err_ratelimited("%s: Error: %s: skb_clone() failed\n", 5348 bond_dev->name, __func__); 5349 continue; 5350 } 5351 } 5352 5353 if (bond_dev_queue_xmit(bond, skb2, slave->dev) == NETDEV_TX_OK) 5354 xmit_suc = true; 5355 } 5356 5357 if (!skb_used) 5358 dev_kfree_skb_any(skb); 5359 5360 if (xmit_suc) 5361 return NETDEV_TX_OK; 5362 5363 dev_core_stats_tx_dropped_inc(bond_dev); 5364 return NET_XMIT_DROP; 5365 } 5366 5367 /*------------------------- Device initialization ---------------------------*/ 5368 5369 /* Lookup the slave that corresponds to a qid */ 5370 static inline int bond_slave_override(struct bonding *bond, 5371 struct sk_buff *skb) 5372 { 5373 struct slave *slave = NULL; 5374 struct list_head *iter; 5375 5376 if (!skb_rx_queue_recorded(skb)) 5377 return 1; 5378 5379 /* Find out if any slaves have the same mapping as this skb. */ 5380 bond_for_each_slave_rcu(bond, slave, iter) { 5381 if (READ_ONCE(slave->queue_id) == skb_get_queue_mapping(skb)) { 5382 if (bond_slave_is_up(slave) && 5383 slave->link == BOND_LINK_UP) { 5384 bond_dev_queue_xmit(bond, skb, slave->dev); 5385 return 0; 5386 } 5387 /* If the slave isn't UP, use default transmit policy. */ 5388 break; 5389 } 5390 } 5391 5392 return 1; 5393 } 5394 5395 5396 static u16 bond_select_queue(struct net_device *dev, struct sk_buff *skb, 5397 struct net_device *sb_dev) 5398 { 5399 /* This helper function exists to help dev_pick_tx get the correct 5400 * destination queue. Using a helper function skips a call to 5401 * skb_tx_hash and will put the skbs in the queue we expect on their 5402 * way down to the bonding driver. 5403 */ 5404 u16 txq = skb_rx_queue_recorded(skb) ? skb_get_rx_queue(skb) : 0; 5405 5406 /* Save the original txq to restore before passing to the driver */ 5407 qdisc_skb_cb(skb)->slave_dev_queue_mapping = skb_get_queue_mapping(skb); 5408 5409 if (unlikely(txq >= dev->real_num_tx_queues)) { 5410 do { 5411 txq -= dev->real_num_tx_queues; 5412 } while (txq >= dev->real_num_tx_queues); 5413 } 5414 return txq; 5415 } 5416 5417 static struct net_device *bond_xmit_get_slave(struct net_device *master_dev, 5418 struct sk_buff *skb, 5419 bool all_slaves) 5420 { 5421 struct bonding *bond = netdev_priv(master_dev); 5422 struct bond_up_slave *slaves; 5423 struct slave *slave = NULL; 5424 5425 switch (BOND_MODE(bond)) { 5426 case BOND_MODE_ROUNDROBIN: 5427 slave = bond_xmit_roundrobin_slave_get(bond, skb); 5428 break; 5429 case BOND_MODE_ACTIVEBACKUP: 5430 slave = bond_xmit_activebackup_slave_get(bond); 5431 break; 5432 case BOND_MODE_8023AD: 5433 case BOND_MODE_XOR: 5434 if (all_slaves) 5435 slaves = rcu_dereference(bond->all_slaves); 5436 else 5437 slaves = rcu_dereference(bond->usable_slaves); 5438 slave = bond_xmit_3ad_xor_slave_get(bond, skb, slaves); 5439 break; 5440 case BOND_MODE_BROADCAST: 5441 break; 5442 case BOND_MODE_ALB: 5443 slave = bond_xmit_alb_slave_get(bond, skb); 5444 break; 5445 case BOND_MODE_TLB: 5446 slave = bond_xmit_tlb_slave_get(bond, skb); 5447 break; 5448 default: 5449 /* Should never happen, mode already checked */ 5450 WARN_ONCE(true, "Unknown bonding mode"); 5451 break; 5452 } 5453 5454 if (slave) 5455 return slave->dev; 5456 return NULL; 5457 } 5458 5459 static void bond_sk_to_flow(struct sock *sk, struct flow_keys *flow) 5460 { 5461 switch (sk->sk_family) { 5462 #if IS_ENABLED(CONFIG_IPV6) 5463 case AF_INET6: 5464 if (ipv6_only_sock(sk) || 5465 ipv6_addr_type(&sk->sk_v6_daddr) != IPV6_ADDR_MAPPED) { 5466 flow->control.addr_type = FLOW_DISSECTOR_KEY_IPV6_ADDRS; 5467 flow->addrs.v6addrs.src = inet6_sk(sk)->saddr; 5468 flow->addrs.v6addrs.dst = sk->sk_v6_daddr; 5469 break; 5470 } 5471 fallthrough; 5472 #endif 5473 default: /* AF_INET */ 5474 flow->control.addr_type = FLOW_DISSECTOR_KEY_IPV4_ADDRS; 5475 flow->addrs.v4addrs.src = inet_sk(sk)->inet_rcv_saddr; 5476 flow->addrs.v4addrs.dst = inet_sk(sk)->inet_daddr; 5477 break; 5478 } 5479 5480 flow->ports.src = inet_sk(sk)->inet_sport; 5481 flow->ports.dst = inet_sk(sk)->inet_dport; 5482 } 5483 5484 /** 5485 * bond_sk_hash_l34 - generate a hash value based on the socket's L3 and L4 fields 5486 * @sk: socket to use for headers 5487 * 5488 * This function will extract the necessary field from the socket and use 5489 * them to generate a hash based on the LAYER34 xmit_policy. 5490 * Assumes that sk is a TCP or UDP socket. 5491 */ 5492 static u32 bond_sk_hash_l34(struct sock *sk) 5493 { 5494 struct flow_keys flow; 5495 u32 hash; 5496 5497 bond_sk_to_flow(sk, &flow); 5498 5499 /* L4 */ 5500 memcpy(&hash, &flow.ports.ports, sizeof(hash)); 5501 /* L3 */ 5502 return bond_ip_hash(hash, &flow, BOND_XMIT_POLICY_LAYER34); 5503 } 5504 5505 static struct net_device *__bond_sk_get_lower_dev(struct bonding *bond, 5506 struct sock *sk) 5507 { 5508 struct bond_up_slave *slaves; 5509 struct slave *slave; 5510 unsigned int count; 5511 u32 hash; 5512 5513 slaves = rcu_dereference(bond->usable_slaves); 5514 count = slaves ? READ_ONCE(slaves->count) : 0; 5515 if (unlikely(!count)) 5516 return NULL; 5517 5518 hash = bond_sk_hash_l34(sk); 5519 slave = slaves->arr[hash % count]; 5520 5521 return slave->dev; 5522 } 5523 5524 static struct net_device *bond_sk_get_lower_dev(struct net_device *dev, 5525 struct sock *sk) 5526 { 5527 struct bonding *bond = netdev_priv(dev); 5528 struct net_device *lower = NULL; 5529 5530 rcu_read_lock(); 5531 if (bond_sk_check(bond)) 5532 lower = __bond_sk_get_lower_dev(bond, sk); 5533 rcu_read_unlock(); 5534 5535 return lower; 5536 } 5537 5538 #if IS_ENABLED(CONFIG_TLS_DEVICE) 5539 static netdev_tx_t bond_tls_device_xmit(struct bonding *bond, struct sk_buff *skb, 5540 struct net_device *dev) 5541 { 5542 struct net_device *tls_netdev = rcu_dereference(tls_get_ctx(skb->sk)->netdev); 5543 5544 /* tls_netdev might become NULL, even if tls_is_skb_tx_device_offloaded 5545 * was true, if tls_device_down is running in parallel, but it's OK, 5546 * because bond_get_slave_by_dev has a NULL check. 5547 */ 5548 if (likely(bond_get_slave_by_dev(bond, tls_netdev))) 5549 return bond_dev_queue_xmit(bond, skb, tls_netdev); 5550 return bond_tx_drop(dev, skb); 5551 } 5552 #endif 5553 5554 static netdev_tx_t __bond_start_xmit(struct sk_buff *skb, struct net_device *dev) 5555 { 5556 struct bonding *bond = netdev_priv(dev); 5557 5558 if (bond_should_override_tx_queue(bond) && 5559 !bond_slave_override(bond, skb)) 5560 return NETDEV_TX_OK; 5561 5562 #if IS_ENABLED(CONFIG_TLS_DEVICE) 5563 if (tls_is_skb_tx_device_offloaded(skb)) 5564 return bond_tls_device_xmit(bond, skb, dev); 5565 #endif 5566 5567 switch (BOND_MODE(bond)) { 5568 case BOND_MODE_ROUNDROBIN: 5569 return bond_xmit_roundrobin(skb, dev); 5570 case BOND_MODE_ACTIVEBACKUP: 5571 return bond_xmit_activebackup(skb, dev); 5572 case BOND_MODE_8023AD: 5573 case BOND_MODE_XOR: 5574 return bond_3ad_xor_xmit(skb, dev); 5575 case BOND_MODE_BROADCAST: 5576 return bond_xmit_broadcast(skb, dev); 5577 case BOND_MODE_ALB: 5578 return bond_alb_xmit(skb, dev); 5579 case BOND_MODE_TLB: 5580 return bond_tlb_xmit(skb, dev); 5581 default: 5582 /* Should never happen, mode already checked */ 5583 netdev_err(dev, "Unknown bonding mode %d\n", BOND_MODE(bond)); 5584 WARN_ON_ONCE(1); 5585 return bond_tx_drop(dev, skb); 5586 } 5587 } 5588 5589 static netdev_tx_t bond_start_xmit(struct sk_buff *skb, struct net_device *dev) 5590 { 5591 struct bonding *bond = netdev_priv(dev); 5592 netdev_tx_t ret = NETDEV_TX_OK; 5593 5594 /* If we risk deadlock from transmitting this in the 5595 * netpoll path, tell netpoll to queue the frame for later tx 5596 */ 5597 if (unlikely(is_netpoll_tx_blocked(dev))) 5598 return NETDEV_TX_BUSY; 5599 5600 rcu_read_lock(); 5601 if (bond_has_slaves(bond)) 5602 ret = __bond_start_xmit(skb, dev); 5603 else 5604 ret = bond_tx_drop(dev, skb); 5605 rcu_read_unlock(); 5606 5607 return ret; 5608 } 5609 5610 static struct net_device * 5611 bond_xdp_get_xmit_slave(struct net_device *bond_dev, struct xdp_buff *xdp) 5612 { 5613 struct bonding *bond = netdev_priv(bond_dev); 5614 struct slave *slave; 5615 5616 /* Caller needs to hold rcu_read_lock() */ 5617 5618 switch (BOND_MODE(bond)) { 5619 case BOND_MODE_ROUNDROBIN: 5620 slave = bond_xdp_xmit_roundrobin_slave_get(bond, xdp); 5621 break; 5622 5623 case BOND_MODE_ACTIVEBACKUP: 5624 slave = bond_xmit_activebackup_slave_get(bond); 5625 break; 5626 5627 case BOND_MODE_8023AD: 5628 case BOND_MODE_XOR: 5629 slave = bond_xdp_xmit_3ad_xor_slave_get(bond, xdp); 5630 break; 5631 5632 default: 5633 if (net_ratelimit()) 5634 netdev_err(bond_dev, "Unknown bonding mode %d for xdp xmit\n", 5635 BOND_MODE(bond)); 5636 return NULL; 5637 } 5638 5639 if (slave) 5640 return slave->dev; 5641 5642 return NULL; 5643 } 5644 5645 static int bond_xdp_xmit(struct net_device *bond_dev, 5646 int n, struct xdp_frame **frames, u32 flags) 5647 { 5648 int nxmit, err = -ENXIO; 5649 5650 rcu_read_lock(); 5651 5652 for (nxmit = 0; nxmit < n; nxmit++) { 5653 struct xdp_frame *frame = frames[nxmit]; 5654 struct xdp_frame *frames1[] = {frame}; 5655 struct net_device *slave_dev; 5656 struct xdp_buff xdp; 5657 5658 xdp_convert_frame_to_buff(frame, &xdp); 5659 5660 slave_dev = bond_xdp_get_xmit_slave(bond_dev, &xdp); 5661 if (!slave_dev) { 5662 err = -ENXIO; 5663 break; 5664 } 5665 5666 err = slave_dev->netdev_ops->ndo_xdp_xmit(slave_dev, 1, frames1, flags); 5667 if (err < 1) 5668 break; 5669 } 5670 5671 rcu_read_unlock(); 5672 5673 /* If error happened on the first frame then we can pass the error up, otherwise 5674 * report the number of frames that were xmitted. 5675 */ 5676 if (err < 0) 5677 return (nxmit == 0 ? err : nxmit); 5678 5679 return nxmit; 5680 } 5681 5682 static int bond_xdp_set(struct net_device *dev, struct bpf_prog *prog, 5683 struct netlink_ext_ack *extack) 5684 { 5685 struct bonding *bond = netdev_priv(dev); 5686 struct list_head *iter; 5687 struct slave *slave, *rollback_slave; 5688 struct bpf_prog *old_prog; 5689 struct netdev_bpf xdp = { 5690 .command = XDP_SETUP_PROG, 5691 .flags = 0, 5692 .prog = prog, 5693 .extack = extack, 5694 }; 5695 int err; 5696 5697 ASSERT_RTNL(); 5698 5699 if (!bond_xdp_check(bond)) { 5700 BOND_NL_ERR(dev, extack, 5701 "No native XDP support for the current bonding mode"); 5702 return -EOPNOTSUPP; 5703 } 5704 5705 old_prog = bond->xdp_prog; 5706 bond->xdp_prog = prog; 5707 5708 bond_for_each_slave(bond, slave, iter) { 5709 struct net_device *slave_dev = slave->dev; 5710 5711 if (!slave_dev->netdev_ops->ndo_bpf || 5712 !slave_dev->netdev_ops->ndo_xdp_xmit) { 5713 SLAVE_NL_ERR(dev, slave_dev, extack, 5714 "Slave device does not support XDP"); 5715 err = -EOPNOTSUPP; 5716 goto err; 5717 } 5718 5719 if (dev_xdp_prog_count(slave_dev) > 0) { 5720 SLAVE_NL_ERR(dev, slave_dev, extack, 5721 "Slave has XDP program loaded, please unload before enslaving"); 5722 err = -EOPNOTSUPP; 5723 goto err; 5724 } 5725 5726 err = dev_xdp_propagate(slave_dev, &xdp); 5727 if (err < 0) { 5728 /* ndo_bpf() sets extack error message */ 5729 slave_err(dev, slave_dev, "Error %d calling ndo_bpf\n", err); 5730 goto err; 5731 } 5732 if (prog) 5733 bpf_prog_inc(prog); 5734 } 5735 5736 if (prog) { 5737 static_branch_inc(&bpf_master_redirect_enabled_key); 5738 } else if (old_prog) { 5739 bpf_prog_put(old_prog); 5740 static_branch_dec(&bpf_master_redirect_enabled_key); 5741 } 5742 5743 return 0; 5744 5745 err: 5746 /* unwind the program changes */ 5747 bond->xdp_prog = old_prog; 5748 xdp.prog = old_prog; 5749 xdp.extack = NULL; /* do not overwrite original error */ 5750 5751 bond_for_each_slave(bond, rollback_slave, iter) { 5752 struct net_device *slave_dev = rollback_slave->dev; 5753 int err_unwind; 5754 5755 if (slave == rollback_slave) 5756 break; 5757 5758 err_unwind = dev_xdp_propagate(slave_dev, &xdp); 5759 if (err_unwind < 0) 5760 slave_err(dev, slave_dev, 5761 "Error %d when unwinding XDP program change\n", err_unwind); 5762 else if (xdp.prog) 5763 bpf_prog_inc(xdp.prog); 5764 } 5765 return err; 5766 } 5767 5768 static int bond_xdp(struct net_device *dev, struct netdev_bpf *xdp) 5769 { 5770 switch (xdp->command) { 5771 case XDP_SETUP_PROG: 5772 return bond_xdp_set(dev, xdp->prog, xdp->extack); 5773 default: 5774 return -EINVAL; 5775 } 5776 } 5777 5778 static u32 bond_mode_bcast_speed(struct slave *slave, u32 speed) 5779 { 5780 if (speed == 0 || speed == SPEED_UNKNOWN) 5781 speed = slave->speed; 5782 else 5783 speed = min(speed, slave->speed); 5784 5785 return speed; 5786 } 5787 5788 /* Set the BOND_PHC_INDEX flag to notify user space */ 5789 static int bond_set_phc_index_flag(struct kernel_hwtstamp_config *kernel_cfg) 5790 { 5791 struct ifreq *ifr = kernel_cfg->ifr; 5792 struct hwtstamp_config cfg; 5793 5794 if (kernel_cfg->copied_to_user) { 5795 /* Lower device has a legacy implementation */ 5796 if (copy_from_user(&cfg, ifr->ifr_data, sizeof(cfg))) 5797 return -EFAULT; 5798 5799 cfg.flags |= HWTSTAMP_FLAG_BONDED_PHC_INDEX; 5800 if (copy_to_user(ifr->ifr_data, &cfg, sizeof(cfg))) 5801 return -EFAULT; 5802 } else { 5803 kernel_cfg->flags |= HWTSTAMP_FLAG_BONDED_PHC_INDEX; 5804 } 5805 5806 return 0; 5807 } 5808 5809 static int bond_hwtstamp_get(struct net_device *dev, 5810 struct kernel_hwtstamp_config *cfg) 5811 { 5812 struct bonding *bond = netdev_priv(dev); 5813 struct net_device *real_dev; 5814 int err; 5815 5816 real_dev = bond_option_active_slave_get_rcu(bond); 5817 if (!real_dev) 5818 return -EOPNOTSUPP; 5819 5820 err = generic_hwtstamp_get_lower(real_dev, cfg); 5821 if (err) 5822 return err; 5823 5824 return bond_set_phc_index_flag(cfg); 5825 } 5826 5827 static int bond_hwtstamp_set(struct net_device *dev, 5828 struct kernel_hwtstamp_config *cfg, 5829 struct netlink_ext_ack *extack) 5830 { 5831 struct bonding *bond = netdev_priv(dev); 5832 struct net_device *real_dev; 5833 int err; 5834 5835 if (!(cfg->flags & HWTSTAMP_FLAG_BONDED_PHC_INDEX)) 5836 return -EOPNOTSUPP; 5837 5838 real_dev = bond_option_active_slave_get_rcu(bond); 5839 if (!real_dev) 5840 return -EOPNOTSUPP; 5841 5842 err = generic_hwtstamp_set_lower(real_dev, cfg, extack); 5843 if (err) 5844 return err; 5845 5846 return bond_set_phc_index_flag(cfg); 5847 } 5848 5849 static int bond_ethtool_get_link_ksettings(struct net_device *bond_dev, 5850 struct ethtool_link_ksettings *cmd) 5851 { 5852 struct bonding *bond = netdev_priv(bond_dev); 5853 struct list_head *iter; 5854 struct slave *slave; 5855 u32 speed = 0; 5856 5857 cmd->base.duplex = DUPLEX_UNKNOWN; 5858 cmd->base.port = PORT_OTHER; 5859 5860 /* Since bond_slave_can_tx returns false for all inactive or down slaves, we 5861 * do not need to check mode. Though link speed might not represent 5862 * the true receive or transmit bandwidth (not all modes are symmetric) 5863 * this is an accurate maximum. 5864 */ 5865 bond_for_each_slave(bond, slave, iter) { 5866 if (bond_slave_can_tx(slave)) { 5867 bond_update_speed_duplex(slave); 5868 if (slave->speed != SPEED_UNKNOWN) { 5869 if (BOND_MODE(bond) == BOND_MODE_BROADCAST) 5870 speed = bond_mode_bcast_speed(slave, 5871 speed); 5872 else 5873 speed += slave->speed; 5874 } 5875 if (cmd->base.duplex == DUPLEX_UNKNOWN && 5876 slave->duplex != DUPLEX_UNKNOWN) 5877 cmd->base.duplex = slave->duplex; 5878 } 5879 } 5880 cmd->base.speed = speed ? : SPEED_UNKNOWN; 5881 5882 return 0; 5883 } 5884 5885 static void bond_ethtool_get_drvinfo(struct net_device *bond_dev, 5886 struct ethtool_drvinfo *drvinfo) 5887 { 5888 strscpy(drvinfo->driver, DRV_NAME, sizeof(drvinfo->driver)); 5889 snprintf(drvinfo->fw_version, sizeof(drvinfo->fw_version), "%d", 5890 BOND_ABI_VERSION); 5891 } 5892 5893 static int bond_ethtool_get_ts_info(struct net_device *bond_dev, 5894 struct kernel_ethtool_ts_info *info) 5895 { 5896 struct bonding *bond = netdev_priv(bond_dev); 5897 struct kernel_ethtool_ts_info ts_info; 5898 struct net_device *real_dev; 5899 bool sw_tx_support = false; 5900 struct list_head *iter; 5901 struct slave *slave; 5902 int ret = 0; 5903 5904 rcu_read_lock(); 5905 real_dev = bond_option_active_slave_get_rcu(bond); 5906 dev_hold(real_dev); 5907 rcu_read_unlock(); 5908 5909 if (real_dev) { 5910 ret = ethtool_get_ts_info_by_layer(real_dev, info); 5911 } else { 5912 /* Check if all slaves support software tx timestamping */ 5913 rcu_read_lock(); 5914 bond_for_each_slave_rcu(bond, slave, iter) { 5915 ret = ethtool_get_ts_info_by_layer(slave->dev, &ts_info); 5916 if (!ret && (ts_info.so_timestamping & SOF_TIMESTAMPING_TX_SOFTWARE)) { 5917 sw_tx_support = true; 5918 continue; 5919 } 5920 5921 sw_tx_support = false; 5922 break; 5923 } 5924 rcu_read_unlock(); 5925 } 5926 5927 if (sw_tx_support) 5928 info->so_timestamping |= SOF_TIMESTAMPING_TX_SOFTWARE; 5929 5930 dev_put(real_dev); 5931 return ret; 5932 } 5933 5934 static const struct ethtool_ops bond_ethtool_ops = { 5935 .get_drvinfo = bond_ethtool_get_drvinfo, 5936 .get_link = ethtool_op_get_link, 5937 .get_link_ksettings = bond_ethtool_get_link_ksettings, 5938 .get_ts_info = bond_ethtool_get_ts_info, 5939 }; 5940 5941 static const struct net_device_ops bond_netdev_ops = { 5942 .ndo_init = bond_init, 5943 .ndo_uninit = bond_uninit, 5944 .ndo_open = bond_open, 5945 .ndo_stop = bond_close, 5946 .ndo_start_xmit = bond_start_xmit, 5947 .ndo_select_queue = bond_select_queue, 5948 .ndo_get_stats64 = bond_get_stats, 5949 .ndo_eth_ioctl = bond_eth_ioctl, 5950 .ndo_siocbond = bond_do_ioctl, 5951 .ndo_siocdevprivate = bond_siocdevprivate, 5952 .ndo_change_rx_flags = bond_change_rx_flags, 5953 .ndo_set_rx_mode = bond_set_rx_mode, 5954 .ndo_change_mtu = bond_change_mtu, 5955 .ndo_set_mac_address = bond_set_mac_address, 5956 .ndo_neigh_setup = bond_neigh_setup, 5957 .ndo_vlan_rx_add_vid = bond_vlan_rx_add_vid, 5958 .ndo_vlan_rx_kill_vid = bond_vlan_rx_kill_vid, 5959 #ifdef CONFIG_NET_POLL_CONTROLLER 5960 .ndo_netpoll_setup = bond_netpoll_setup, 5961 .ndo_netpoll_cleanup = bond_netpoll_cleanup, 5962 .ndo_poll_controller = bond_poll_controller, 5963 #endif 5964 .ndo_add_slave = bond_enslave, 5965 .ndo_del_slave = bond_release, 5966 .ndo_fix_features = bond_fix_features, 5967 .ndo_features_check = passthru_features_check, 5968 .ndo_get_xmit_slave = bond_xmit_get_slave, 5969 .ndo_sk_get_lower_dev = bond_sk_get_lower_dev, 5970 .ndo_bpf = bond_xdp, 5971 .ndo_xdp_xmit = bond_xdp_xmit, 5972 .ndo_xdp_get_xmit_slave = bond_xdp_get_xmit_slave, 5973 .ndo_hwtstamp_get = bond_hwtstamp_get, 5974 .ndo_hwtstamp_set = bond_hwtstamp_set, 5975 }; 5976 5977 static const struct device_type bond_type = { 5978 .name = "bond", 5979 }; 5980 5981 static void bond_destructor(struct net_device *bond_dev) 5982 { 5983 struct bonding *bond = netdev_priv(bond_dev); 5984 5985 if (bond->wq) 5986 destroy_workqueue(bond->wq); 5987 5988 free_percpu(bond->rr_tx_counter); 5989 } 5990 5991 void bond_setup(struct net_device *bond_dev) 5992 { 5993 struct bonding *bond = netdev_priv(bond_dev); 5994 5995 spin_lock_init(&bond->mode_lock); 5996 bond->params = bonding_defaults; 5997 5998 /* Initialize pointers */ 5999 bond->dev = bond_dev; 6000 6001 /* Initialize the device entry points */ 6002 ether_setup(bond_dev); 6003 bond_dev->max_mtu = ETH_MAX_MTU; 6004 bond_dev->netdev_ops = &bond_netdev_ops; 6005 bond_dev->ethtool_ops = &bond_ethtool_ops; 6006 6007 bond_dev->needs_free_netdev = true; 6008 bond_dev->priv_destructor = bond_destructor; 6009 6010 SET_NETDEV_DEVTYPE(bond_dev, &bond_type); 6011 6012 /* Initialize the device options */ 6013 bond_dev->flags |= IFF_MASTER; 6014 bond_dev->priv_flags |= IFF_BONDING | IFF_UNICAST_FLT | IFF_NO_QUEUE; 6015 bond_dev->priv_flags &= ~(IFF_XMIT_DST_RELEASE | IFF_TX_SKB_SHARING); 6016 6017 #ifdef CONFIG_XFRM_OFFLOAD 6018 /* set up xfrm device ops (only supported in active-backup right now) */ 6019 bond_dev->xfrmdev_ops = &bond_xfrmdev_ops; 6020 INIT_LIST_HEAD(&bond->ipsec_list); 6021 mutex_init(&bond->ipsec_lock); 6022 #endif /* CONFIG_XFRM_OFFLOAD */ 6023 6024 /* don't acquire bond device's netif_tx_lock when transmitting */ 6025 bond_dev->lltx = true; 6026 6027 /* Don't allow bond devices to change network namespaces. */ 6028 bond_dev->netns_local = true; 6029 6030 /* By default, we declare the bond to be fully 6031 * VLAN hardware accelerated capable. Special 6032 * care is taken in the various xmit functions 6033 * when there are slaves that are not hw accel 6034 * capable 6035 */ 6036 6037 bond_dev->hw_features = BOND_VLAN_FEATURES | 6038 NETIF_F_HW_VLAN_CTAG_RX | 6039 NETIF_F_HW_VLAN_CTAG_FILTER | 6040 NETIF_F_HW_VLAN_STAG_RX | 6041 NETIF_F_HW_VLAN_STAG_FILTER; 6042 6043 bond_dev->hw_features |= NETIF_F_GSO_ENCAP_ALL; 6044 bond_dev->features |= bond_dev->hw_features; 6045 bond_dev->features |= NETIF_F_HW_VLAN_CTAG_TX | NETIF_F_HW_VLAN_STAG_TX; 6046 bond_dev->features |= NETIF_F_GSO_PARTIAL; 6047 #ifdef CONFIG_XFRM_OFFLOAD 6048 bond_dev->hw_features |= BOND_XFRM_FEATURES; 6049 /* Only enable XFRM features if this is an active-backup config */ 6050 if (BOND_MODE(bond) == BOND_MODE_ACTIVEBACKUP) 6051 bond_dev->features |= BOND_XFRM_FEATURES; 6052 #endif /* CONFIG_XFRM_OFFLOAD */ 6053 } 6054 6055 /* Destroy a bonding device. 6056 * Must be under rtnl_lock when this function is called. 6057 */ 6058 static void bond_uninit(struct net_device *bond_dev) 6059 { 6060 struct bonding *bond = netdev_priv(bond_dev); 6061 struct list_head *iter; 6062 struct slave *slave; 6063 6064 bond_netpoll_cleanup(bond_dev); 6065 6066 /* Release the bonded slaves */ 6067 bond_for_each_slave(bond, slave, iter) 6068 __bond_release_one(bond_dev, slave->dev, true, true); 6069 netdev_info(bond_dev, "Released all slaves\n"); 6070 6071 #ifdef CONFIG_XFRM_OFFLOAD 6072 mutex_destroy(&bond->ipsec_lock); 6073 #endif /* CONFIG_XFRM_OFFLOAD */ 6074 6075 bond_set_slave_arr(bond, NULL, NULL); 6076 6077 list_del_rcu(&bond->bond_list); 6078 6079 bond_debug_unregister(bond); 6080 } 6081 6082 /*------------------------- Module initialization ---------------------------*/ 6083 6084 static int __init bond_check_params(struct bond_params *params) 6085 { 6086 int arp_validate_value, fail_over_mac_value, primary_reselect_value, i; 6087 struct bond_opt_value newval; 6088 const struct bond_opt_value *valptr; 6089 int arp_all_targets_value = 0; 6090 u16 ad_actor_sys_prio = 0; 6091 u16 ad_user_port_key = 0; 6092 __be32 arp_target[BOND_MAX_ARP_TARGETS] = { 0 }; 6093 int arp_ip_count; 6094 int bond_mode = BOND_MODE_ROUNDROBIN; 6095 int xmit_hashtype = BOND_XMIT_POLICY_LAYER2; 6096 int lacp_fast = 0; 6097 int tlb_dynamic_lb; 6098 6099 /* Convert string parameters. */ 6100 if (mode) { 6101 bond_opt_initstr(&newval, mode); 6102 valptr = bond_opt_parse(bond_opt_get(BOND_OPT_MODE), &newval); 6103 if (!valptr) { 6104 pr_err("Error: Invalid bonding mode \"%s\"\n", mode); 6105 return -EINVAL; 6106 } 6107 bond_mode = valptr->value; 6108 } 6109 6110 if (xmit_hash_policy) { 6111 if (bond_mode == BOND_MODE_ROUNDROBIN || 6112 bond_mode == BOND_MODE_ACTIVEBACKUP || 6113 bond_mode == BOND_MODE_BROADCAST) { 6114 pr_info("xmit_hash_policy param is irrelevant in mode %s\n", 6115 bond_mode_name(bond_mode)); 6116 } else { 6117 bond_opt_initstr(&newval, xmit_hash_policy); 6118 valptr = bond_opt_parse(bond_opt_get(BOND_OPT_XMIT_HASH), 6119 &newval); 6120 if (!valptr) { 6121 pr_err("Error: Invalid xmit_hash_policy \"%s\"\n", 6122 xmit_hash_policy); 6123 return -EINVAL; 6124 } 6125 xmit_hashtype = valptr->value; 6126 } 6127 } 6128 6129 if (lacp_rate) { 6130 if (bond_mode != BOND_MODE_8023AD) { 6131 pr_info("lacp_rate param is irrelevant in mode %s\n", 6132 bond_mode_name(bond_mode)); 6133 } else { 6134 bond_opt_initstr(&newval, lacp_rate); 6135 valptr = bond_opt_parse(bond_opt_get(BOND_OPT_LACP_RATE), 6136 &newval); 6137 if (!valptr) { 6138 pr_err("Error: Invalid lacp rate \"%s\"\n", 6139 lacp_rate); 6140 return -EINVAL; 6141 } 6142 lacp_fast = valptr->value; 6143 } 6144 } 6145 6146 if (ad_select) { 6147 bond_opt_initstr(&newval, ad_select); 6148 valptr = bond_opt_parse(bond_opt_get(BOND_OPT_AD_SELECT), 6149 &newval); 6150 if (!valptr) { 6151 pr_err("Error: Invalid ad_select \"%s\"\n", ad_select); 6152 return -EINVAL; 6153 } 6154 params->ad_select = valptr->value; 6155 if (bond_mode != BOND_MODE_8023AD) 6156 pr_warn("ad_select param only affects 802.3ad mode\n"); 6157 } else { 6158 params->ad_select = BOND_AD_STABLE; 6159 } 6160 6161 if (max_bonds < 0) { 6162 pr_warn("Warning: max_bonds (%d) not in range %d-%d, so it was reset to BOND_DEFAULT_MAX_BONDS (%d)\n", 6163 max_bonds, 0, INT_MAX, BOND_DEFAULT_MAX_BONDS); 6164 max_bonds = BOND_DEFAULT_MAX_BONDS; 6165 } 6166 6167 if (miimon < 0) { 6168 pr_warn("Warning: miimon module parameter (%d), not in range 0-%d, so it was reset to 0\n", 6169 miimon, INT_MAX); 6170 miimon = 0; 6171 } 6172 6173 if (updelay < 0) { 6174 pr_warn("Warning: updelay module parameter (%d), not in range 0-%d, so it was reset to 0\n", 6175 updelay, INT_MAX); 6176 updelay = 0; 6177 } 6178 6179 if (downdelay < 0) { 6180 pr_warn("Warning: downdelay module parameter (%d), not in range 0-%d, so it was reset to 0\n", 6181 downdelay, INT_MAX); 6182 downdelay = 0; 6183 } 6184 6185 if ((use_carrier != 0) && (use_carrier != 1)) { 6186 pr_warn("Warning: use_carrier module parameter (%d), not of valid value (0/1), so it was set to 1\n", 6187 use_carrier); 6188 use_carrier = 1; 6189 } 6190 6191 if (num_peer_notif < 0 || num_peer_notif > 255) { 6192 pr_warn("Warning: num_grat_arp/num_unsol_na (%d) not in range 0-255 so it was reset to 1\n", 6193 num_peer_notif); 6194 num_peer_notif = 1; 6195 } 6196 6197 /* reset values for 802.3ad/TLB/ALB */ 6198 if (!bond_mode_uses_arp(bond_mode)) { 6199 if (!miimon) { 6200 pr_warn("Warning: miimon must be specified, otherwise bonding will not detect link failure, speed and duplex which are essential for 802.3ad operation\n"); 6201 pr_warn("Forcing miimon to 100msec\n"); 6202 miimon = BOND_DEFAULT_MIIMON; 6203 } 6204 } 6205 6206 if (tx_queues < 1 || tx_queues > 255) { 6207 pr_warn("Warning: tx_queues (%d) should be between 1 and 255, resetting to %d\n", 6208 tx_queues, BOND_DEFAULT_TX_QUEUES); 6209 tx_queues = BOND_DEFAULT_TX_QUEUES; 6210 } 6211 6212 if ((all_slaves_active != 0) && (all_slaves_active != 1)) { 6213 pr_warn("Warning: all_slaves_active module parameter (%d), not of valid value (0/1), so it was set to 0\n", 6214 all_slaves_active); 6215 all_slaves_active = 0; 6216 } 6217 6218 if (resend_igmp < 0 || resend_igmp > 255) { 6219 pr_warn("Warning: resend_igmp (%d) should be between 0 and 255, resetting to %d\n", 6220 resend_igmp, BOND_DEFAULT_RESEND_IGMP); 6221 resend_igmp = BOND_DEFAULT_RESEND_IGMP; 6222 } 6223 6224 bond_opt_initval(&newval, packets_per_slave); 6225 if (!bond_opt_parse(bond_opt_get(BOND_OPT_PACKETS_PER_SLAVE), &newval)) { 6226 pr_warn("Warning: packets_per_slave (%d) should be between 0 and %u resetting to 1\n", 6227 packets_per_slave, USHRT_MAX); 6228 packets_per_slave = 1; 6229 } 6230 6231 if (bond_mode == BOND_MODE_ALB) { 6232 pr_notice("In ALB mode you might experience client disconnections upon reconnection of a link if the bonding module updelay parameter (%d msec) is incompatible with the forwarding delay time of the switch\n", 6233 updelay); 6234 } 6235 6236 if (!miimon) { 6237 if (updelay || downdelay) { 6238 /* just warn the user the up/down delay will have 6239 * no effect since miimon is zero... 6240 */ 6241 pr_warn("Warning: miimon module parameter not set and updelay (%d) or downdelay (%d) module parameter is set; updelay and downdelay have no effect unless miimon is set\n", 6242 updelay, downdelay); 6243 } 6244 } else { 6245 /* don't allow arp monitoring */ 6246 if (arp_interval) { 6247 pr_warn("Warning: miimon (%d) and arp_interval (%d) can't be used simultaneously, disabling ARP monitoring\n", 6248 miimon, arp_interval); 6249 arp_interval = 0; 6250 } 6251 6252 if ((updelay % miimon) != 0) { 6253 pr_warn("Warning: updelay (%d) is not a multiple of miimon (%d), updelay rounded to %d ms\n", 6254 updelay, miimon, (updelay / miimon) * miimon); 6255 } 6256 6257 updelay /= miimon; 6258 6259 if ((downdelay % miimon) != 0) { 6260 pr_warn("Warning: downdelay (%d) is not a multiple of miimon (%d), downdelay rounded to %d ms\n", 6261 downdelay, miimon, 6262 (downdelay / miimon) * miimon); 6263 } 6264 6265 downdelay /= miimon; 6266 } 6267 6268 if (arp_interval < 0) { 6269 pr_warn("Warning: arp_interval module parameter (%d), not in range 0-%d, so it was reset to 0\n", 6270 arp_interval, INT_MAX); 6271 arp_interval = 0; 6272 } 6273 6274 for (arp_ip_count = 0, i = 0; 6275 (arp_ip_count < BOND_MAX_ARP_TARGETS) && arp_ip_target[i]; i++) { 6276 __be32 ip; 6277 6278 /* not a complete check, but good enough to catch mistakes */ 6279 if (!in4_pton(arp_ip_target[i], -1, (u8 *)&ip, -1, NULL) || 6280 !bond_is_ip_target_ok(ip)) { 6281 pr_warn("Warning: bad arp_ip_target module parameter (%s), ARP monitoring will not be performed\n", 6282 arp_ip_target[i]); 6283 arp_interval = 0; 6284 } else { 6285 if (bond_get_targets_ip(arp_target, ip) == -1) 6286 arp_target[arp_ip_count++] = ip; 6287 else 6288 pr_warn("Warning: duplicate address %pI4 in arp_ip_target, skipping\n", 6289 &ip); 6290 } 6291 } 6292 6293 if (arp_interval && !arp_ip_count) { 6294 /* don't allow arping if no arp_ip_target given... */ 6295 pr_warn("Warning: arp_interval module parameter (%d) specified without providing an arp_ip_target parameter, arp_interval was reset to 0\n", 6296 arp_interval); 6297 arp_interval = 0; 6298 } 6299 6300 if (arp_validate) { 6301 if (!arp_interval) { 6302 pr_err("arp_validate requires arp_interval\n"); 6303 return -EINVAL; 6304 } 6305 6306 bond_opt_initstr(&newval, arp_validate); 6307 valptr = bond_opt_parse(bond_opt_get(BOND_OPT_ARP_VALIDATE), 6308 &newval); 6309 if (!valptr) { 6310 pr_err("Error: invalid arp_validate \"%s\"\n", 6311 arp_validate); 6312 return -EINVAL; 6313 } 6314 arp_validate_value = valptr->value; 6315 } else { 6316 arp_validate_value = 0; 6317 } 6318 6319 if (arp_all_targets) { 6320 bond_opt_initstr(&newval, arp_all_targets); 6321 valptr = bond_opt_parse(bond_opt_get(BOND_OPT_ARP_ALL_TARGETS), 6322 &newval); 6323 if (!valptr) { 6324 pr_err("Error: invalid arp_all_targets_value \"%s\"\n", 6325 arp_all_targets); 6326 arp_all_targets_value = 0; 6327 } else { 6328 arp_all_targets_value = valptr->value; 6329 } 6330 } 6331 6332 if (miimon) { 6333 pr_info("MII link monitoring set to %d ms\n", miimon); 6334 } else if (arp_interval) { 6335 valptr = bond_opt_get_val(BOND_OPT_ARP_VALIDATE, 6336 arp_validate_value); 6337 pr_info("ARP monitoring set to %d ms, validate %s, with %d target(s):", 6338 arp_interval, valptr->string, arp_ip_count); 6339 6340 for (i = 0; i < arp_ip_count; i++) 6341 pr_cont(" %s", arp_ip_target[i]); 6342 6343 pr_cont("\n"); 6344 6345 } else if (max_bonds) { 6346 /* miimon and arp_interval not set, we need one so things 6347 * work as expected, see bonding.txt for details 6348 */ 6349 pr_debug("Warning: either miimon or arp_interval and arp_ip_target module parameters must be specified, otherwise bonding will not detect link failures! see bonding.txt for details\n"); 6350 } 6351 6352 if (primary && !bond_mode_uses_primary(bond_mode)) { 6353 /* currently, using a primary only makes sense 6354 * in active backup, TLB or ALB modes 6355 */ 6356 pr_warn("Warning: %s primary device specified but has no effect in %s mode\n", 6357 primary, bond_mode_name(bond_mode)); 6358 primary = NULL; 6359 } 6360 6361 if (primary && primary_reselect) { 6362 bond_opt_initstr(&newval, primary_reselect); 6363 valptr = bond_opt_parse(bond_opt_get(BOND_OPT_PRIMARY_RESELECT), 6364 &newval); 6365 if (!valptr) { 6366 pr_err("Error: Invalid primary_reselect \"%s\"\n", 6367 primary_reselect); 6368 return -EINVAL; 6369 } 6370 primary_reselect_value = valptr->value; 6371 } else { 6372 primary_reselect_value = BOND_PRI_RESELECT_ALWAYS; 6373 } 6374 6375 if (fail_over_mac) { 6376 bond_opt_initstr(&newval, fail_over_mac); 6377 valptr = bond_opt_parse(bond_opt_get(BOND_OPT_FAIL_OVER_MAC), 6378 &newval); 6379 if (!valptr) { 6380 pr_err("Error: invalid fail_over_mac \"%s\"\n", 6381 fail_over_mac); 6382 return -EINVAL; 6383 } 6384 fail_over_mac_value = valptr->value; 6385 if (bond_mode != BOND_MODE_ACTIVEBACKUP) 6386 pr_warn("Warning: fail_over_mac only affects active-backup mode\n"); 6387 } else { 6388 fail_over_mac_value = BOND_FOM_NONE; 6389 } 6390 6391 bond_opt_initstr(&newval, "default"); 6392 valptr = bond_opt_parse( 6393 bond_opt_get(BOND_OPT_AD_ACTOR_SYS_PRIO), 6394 &newval); 6395 if (!valptr) { 6396 pr_err("Error: No ad_actor_sys_prio default value"); 6397 return -EINVAL; 6398 } 6399 ad_actor_sys_prio = valptr->value; 6400 6401 valptr = bond_opt_parse(bond_opt_get(BOND_OPT_AD_USER_PORT_KEY), 6402 &newval); 6403 if (!valptr) { 6404 pr_err("Error: No ad_user_port_key default value"); 6405 return -EINVAL; 6406 } 6407 ad_user_port_key = valptr->value; 6408 6409 bond_opt_initstr(&newval, "default"); 6410 valptr = bond_opt_parse(bond_opt_get(BOND_OPT_TLB_DYNAMIC_LB), &newval); 6411 if (!valptr) { 6412 pr_err("Error: No tlb_dynamic_lb default value"); 6413 return -EINVAL; 6414 } 6415 tlb_dynamic_lb = valptr->value; 6416 6417 if (lp_interval == 0) { 6418 pr_warn("Warning: ip_interval must be between 1 and %d, so it was reset to %d\n", 6419 INT_MAX, BOND_ALB_DEFAULT_LP_INTERVAL); 6420 lp_interval = BOND_ALB_DEFAULT_LP_INTERVAL; 6421 } 6422 6423 /* fill params struct with the proper values */ 6424 params->mode = bond_mode; 6425 params->xmit_policy = xmit_hashtype; 6426 params->miimon = miimon; 6427 params->num_peer_notif = num_peer_notif; 6428 params->arp_interval = arp_interval; 6429 params->arp_validate = arp_validate_value; 6430 params->arp_all_targets = arp_all_targets_value; 6431 params->missed_max = 2; 6432 params->updelay = updelay; 6433 params->downdelay = downdelay; 6434 params->peer_notif_delay = 0; 6435 params->use_carrier = use_carrier; 6436 params->lacp_active = 1; 6437 params->lacp_fast = lacp_fast; 6438 params->primary[0] = 0; 6439 params->primary_reselect = primary_reselect_value; 6440 params->fail_over_mac = fail_over_mac_value; 6441 params->tx_queues = tx_queues; 6442 params->all_slaves_active = all_slaves_active; 6443 params->resend_igmp = resend_igmp; 6444 params->min_links = min_links; 6445 params->lp_interval = lp_interval; 6446 params->packets_per_slave = packets_per_slave; 6447 params->tlb_dynamic_lb = tlb_dynamic_lb; 6448 params->ad_actor_sys_prio = ad_actor_sys_prio; 6449 eth_zero_addr(params->ad_actor_system); 6450 params->ad_user_port_key = ad_user_port_key; 6451 params->coupled_control = 1; 6452 if (packets_per_slave > 0) { 6453 params->reciprocal_packets_per_slave = 6454 reciprocal_value(packets_per_slave); 6455 } else { 6456 /* reciprocal_packets_per_slave is unused if 6457 * packets_per_slave is 0 or 1, just initialize it 6458 */ 6459 params->reciprocal_packets_per_slave = 6460 (struct reciprocal_value) { 0 }; 6461 } 6462 6463 if (primary) 6464 strscpy_pad(params->primary, primary, sizeof(params->primary)); 6465 6466 memcpy(params->arp_targets, arp_target, sizeof(arp_target)); 6467 #if IS_ENABLED(CONFIG_IPV6) 6468 memset(params->ns_targets, 0, sizeof(struct in6_addr) * BOND_MAX_NS_TARGETS); 6469 #endif 6470 6471 return 0; 6472 } 6473 6474 /* Called from registration process */ 6475 static int bond_init(struct net_device *bond_dev) 6476 { 6477 struct bonding *bond = netdev_priv(bond_dev); 6478 struct bond_net *bn = net_generic(dev_net(bond_dev), bond_net_id); 6479 6480 netdev_dbg(bond_dev, "Begin bond_init\n"); 6481 6482 bond->wq = alloc_ordered_workqueue("%s", WQ_MEM_RECLAIM, 6483 bond_dev->name); 6484 if (!bond->wq) 6485 return -ENOMEM; 6486 6487 bond->notifier_ctx = false; 6488 6489 spin_lock_init(&bond->stats_lock); 6490 netdev_lockdep_set_classes(bond_dev); 6491 6492 list_add_tail_rcu(&bond->bond_list, &bn->dev_list); 6493 6494 bond_prepare_sysfs_group(bond); 6495 6496 bond_debug_register(bond); 6497 6498 /* Ensure valid dev_addr */ 6499 if (is_zero_ether_addr(bond_dev->dev_addr) && 6500 bond_dev->addr_assign_type == NET_ADDR_PERM) 6501 eth_hw_addr_random(bond_dev); 6502 6503 return 0; 6504 } 6505 6506 unsigned int bond_get_num_tx_queues(void) 6507 { 6508 return tx_queues; 6509 } 6510 6511 /* Create a new bond based on the specified name and bonding parameters. 6512 * If name is NULL, obtain a suitable "bond%d" name for us. 6513 * Caller must NOT hold rtnl_lock; we need to release it here before we 6514 * set up our sysfs entries. 6515 */ 6516 int bond_create(struct net *net, const char *name) 6517 { 6518 struct net_device *bond_dev; 6519 struct bonding *bond; 6520 int res = -ENOMEM; 6521 6522 rtnl_lock(); 6523 6524 bond_dev = alloc_netdev_mq(sizeof(struct bonding), 6525 name ? name : "bond%d", NET_NAME_UNKNOWN, 6526 bond_setup, tx_queues); 6527 if (!bond_dev) 6528 goto out; 6529 6530 bond = netdev_priv(bond_dev); 6531 dev_net_set(bond_dev, net); 6532 bond_dev->rtnl_link_ops = &bond_link_ops; 6533 6534 res = register_netdevice(bond_dev); 6535 if (res < 0) { 6536 free_netdev(bond_dev); 6537 goto out; 6538 } 6539 6540 netif_carrier_off(bond_dev); 6541 6542 bond_work_init_all(bond); 6543 6544 out: 6545 rtnl_unlock(); 6546 return res; 6547 } 6548 6549 static int __net_init bond_net_init(struct net *net) 6550 { 6551 struct bond_net *bn = net_generic(net, bond_net_id); 6552 6553 bn->net = net; 6554 INIT_LIST_HEAD(&bn->dev_list); 6555 6556 bond_create_proc_dir(bn); 6557 bond_create_sysfs(bn); 6558 6559 return 0; 6560 } 6561 6562 /* According to commit 69b0216ac255 ("bonding: fix bonding_masters 6563 * race condition in bond unloading") we need to remove sysfs files 6564 * before we remove our devices (done later in bond_net_exit_batch_rtnl()) 6565 */ 6566 static void __net_exit bond_net_pre_exit(struct net *net) 6567 { 6568 struct bond_net *bn = net_generic(net, bond_net_id); 6569 6570 bond_destroy_sysfs(bn); 6571 } 6572 6573 static void __net_exit bond_net_exit_batch_rtnl(struct list_head *net_list, 6574 struct list_head *dev_kill_list) 6575 { 6576 struct bond_net *bn; 6577 struct net *net; 6578 6579 /* Kill off any bonds created after unregistering bond rtnl ops */ 6580 list_for_each_entry(net, net_list, exit_list) { 6581 struct bonding *bond, *tmp_bond; 6582 6583 bn = net_generic(net, bond_net_id); 6584 list_for_each_entry_safe(bond, tmp_bond, &bn->dev_list, bond_list) 6585 unregister_netdevice_queue(bond->dev, dev_kill_list); 6586 } 6587 } 6588 6589 /* According to commit 23fa5c2caae0 ("bonding: destroy proc directory 6590 * only after all bonds are gone") bond_destroy_proc_dir() is called 6591 * after bond_net_exit_batch_rtnl() has completed. 6592 */ 6593 static void __net_exit bond_net_exit_batch(struct list_head *net_list) 6594 { 6595 struct bond_net *bn; 6596 struct net *net; 6597 6598 list_for_each_entry(net, net_list, exit_list) { 6599 bn = net_generic(net, bond_net_id); 6600 bond_destroy_proc_dir(bn); 6601 } 6602 } 6603 6604 static struct pernet_operations bond_net_ops = { 6605 .init = bond_net_init, 6606 .pre_exit = bond_net_pre_exit, 6607 .exit_batch_rtnl = bond_net_exit_batch_rtnl, 6608 .exit_batch = bond_net_exit_batch, 6609 .id = &bond_net_id, 6610 .size = sizeof(struct bond_net), 6611 }; 6612 6613 static int __init bonding_init(void) 6614 { 6615 int i; 6616 int res; 6617 6618 res = bond_check_params(&bonding_defaults); 6619 if (res) 6620 goto out; 6621 6622 bond_create_debugfs(); 6623 6624 res = register_pernet_subsys(&bond_net_ops); 6625 if (res) 6626 goto err_net_ops; 6627 6628 res = bond_netlink_init(); 6629 if (res) 6630 goto err_link; 6631 6632 for (i = 0; i < max_bonds; i++) { 6633 res = bond_create(&init_net, NULL); 6634 if (res) 6635 goto err; 6636 } 6637 6638 skb_flow_dissector_init(&flow_keys_bonding, 6639 flow_keys_bonding_keys, 6640 ARRAY_SIZE(flow_keys_bonding_keys)); 6641 6642 register_netdevice_notifier(&bond_netdev_notifier); 6643 out: 6644 return res; 6645 err: 6646 bond_netlink_fini(); 6647 err_link: 6648 unregister_pernet_subsys(&bond_net_ops); 6649 err_net_ops: 6650 bond_destroy_debugfs(); 6651 goto out; 6652 6653 } 6654 6655 static void __exit bonding_exit(void) 6656 { 6657 unregister_netdevice_notifier(&bond_netdev_notifier); 6658 6659 bond_netlink_fini(); 6660 unregister_pernet_subsys(&bond_net_ops); 6661 6662 bond_destroy_debugfs(); 6663 6664 #ifdef CONFIG_NET_POLL_CONTROLLER 6665 /* Make sure we don't have an imbalance on our netpoll blocking */ 6666 WARN_ON(atomic_read(&netpoll_block_tx)); 6667 #endif 6668 } 6669 6670 module_init(bonding_init); 6671 module_exit(bonding_exit); 6672 MODULE_LICENSE("GPL"); 6673 MODULE_DESCRIPTION(DRV_DESCRIPTION); 6674 MODULE_AUTHOR("Thomas Davis, tadavis@lbl.gov and many others"); 6675