1 /* 2 * net/switchdev/switchdev.c - Switch device API 3 * Copyright (c) 2014 Jiri Pirko <jiri@resnulli.us> 4 * Copyright (c) 2014-2015 Scott Feldman <sfeldma@gmail.com> 5 * 6 * This program is free software; you can redistribute it and/or modify 7 * it under the terms of the GNU General Public License as published by 8 * the Free Software Foundation; either version 2 of the License, or 9 * (at your option) any later version. 10 */ 11 12 #include <linux/kernel.h> 13 #include <linux/types.h> 14 #include <linux/init.h> 15 #include <linux/mutex.h> 16 #include <linux/notifier.h> 17 #include <linux/netdevice.h> 18 #include <linux/if_bridge.h> 19 #include <linux/if_vlan.h> 20 #include <net/ip_fib.h> 21 #include <net/switchdev.h> 22 23 /** 24 * switchdev_port_attr_get - Get port attribute 25 * 26 * @dev: port device 27 * @attr: attribute to get 28 */ 29 int switchdev_port_attr_get(struct net_device *dev, struct switchdev_attr *attr) 30 { 31 const struct switchdev_ops *ops = dev->switchdev_ops; 32 struct net_device *lower_dev; 33 struct list_head *iter; 34 struct switchdev_attr first = { 35 .id = SWITCHDEV_ATTR_UNDEFINED 36 }; 37 int err = -EOPNOTSUPP; 38 39 if (ops && ops->switchdev_port_attr_get) 40 return ops->switchdev_port_attr_get(dev, attr); 41 42 if (attr->flags & SWITCHDEV_F_NO_RECURSE) 43 return err; 44 45 /* Switch device port(s) may be stacked under 46 * bond/team/vlan dev, so recurse down to get attr on 47 * each port. Return -ENODATA if attr values don't 48 * compare across ports. 49 */ 50 51 netdev_for_each_lower_dev(dev, lower_dev, iter) { 52 err = switchdev_port_attr_get(lower_dev, attr); 53 if (err) 54 break; 55 if (first.id == SWITCHDEV_ATTR_UNDEFINED) 56 first = *attr; 57 else if (memcmp(&first, attr, sizeof(*attr))) 58 return -ENODATA; 59 } 60 61 return err; 62 } 63 EXPORT_SYMBOL_GPL(switchdev_port_attr_get); 64 65 static int __switchdev_port_attr_set(struct net_device *dev, 66 struct switchdev_attr *attr) 67 { 68 const struct switchdev_ops *ops = dev->switchdev_ops; 69 struct net_device *lower_dev; 70 struct list_head *iter; 71 int err = -EOPNOTSUPP; 72 73 if (ops && ops->switchdev_port_attr_set) 74 return ops->switchdev_port_attr_set(dev, attr); 75 76 if (attr->flags & SWITCHDEV_F_NO_RECURSE) 77 return err; 78 79 /* Switch device port(s) may be stacked under 80 * bond/team/vlan dev, so recurse down to set attr on 81 * each port. 82 */ 83 84 netdev_for_each_lower_dev(dev, lower_dev, iter) { 85 err = __switchdev_port_attr_set(lower_dev, attr); 86 if (err) 87 break; 88 } 89 90 return err; 91 } 92 93 struct switchdev_attr_set_work { 94 struct work_struct work; 95 struct net_device *dev; 96 struct switchdev_attr attr; 97 }; 98 99 static void switchdev_port_attr_set_work(struct work_struct *work) 100 { 101 struct switchdev_attr_set_work *asw = 102 container_of(work, struct switchdev_attr_set_work, work); 103 int err; 104 105 rtnl_lock(); 106 err = switchdev_port_attr_set(asw->dev, &asw->attr); 107 if (err && err != -EOPNOTSUPP) 108 netdev_err(asw->dev, "failed (err=%d) to set attribute (id=%d)\n", 109 err, asw->attr.id); 110 rtnl_unlock(); 111 112 dev_put(asw->dev); 113 kfree(work); 114 } 115 116 static int switchdev_port_attr_set_defer(struct net_device *dev, 117 struct switchdev_attr *attr) 118 { 119 struct switchdev_attr_set_work *asw; 120 121 asw = kmalloc(sizeof(*asw), GFP_ATOMIC); 122 if (!asw) 123 return -ENOMEM; 124 125 INIT_WORK(&asw->work, switchdev_port_attr_set_work); 126 127 dev_hold(dev); 128 asw->dev = dev; 129 memcpy(&asw->attr, attr, sizeof(asw->attr)); 130 131 schedule_work(&asw->work); 132 133 return 0; 134 } 135 136 /** 137 * switchdev_port_attr_set - Set port attribute 138 * 139 * @dev: port device 140 * @attr: attribute to set 141 * 142 * Use a 2-phase prepare-commit transaction model to ensure 143 * system is not left in a partially updated state due to 144 * failure from driver/device. 145 */ 146 int switchdev_port_attr_set(struct net_device *dev, struct switchdev_attr *attr) 147 { 148 int err; 149 150 if (!rtnl_is_locked()) { 151 /* Running prepare-commit transaction across stacked 152 * devices requires nothing moves, so if rtnl_lock is 153 * not held, schedule a worker thread to hold rtnl_lock 154 * while setting attr. 155 */ 156 157 return switchdev_port_attr_set_defer(dev, attr); 158 } 159 160 /* Phase I: prepare for attr set. Driver/device should fail 161 * here if there are going to be issues in the commit phase, 162 * such as lack of resources or support. The driver/device 163 * should reserve resources needed for the commit phase here, 164 * but should not commit the attr. 165 */ 166 167 attr->trans = SWITCHDEV_TRANS_PREPARE; 168 err = __switchdev_port_attr_set(dev, attr); 169 if (err) { 170 /* Prepare phase failed: abort the transaction. Any 171 * resources reserved in the prepare phase are 172 * released. 173 */ 174 175 if (err != -EOPNOTSUPP) { 176 attr->trans = SWITCHDEV_TRANS_ABORT; 177 __switchdev_port_attr_set(dev, attr); 178 } 179 180 return err; 181 } 182 183 /* Phase II: commit attr set. This cannot fail as a fault 184 * of driver/device. If it does, it's a bug in the driver/device 185 * because the driver said everythings was OK in phase I. 186 */ 187 188 attr->trans = SWITCHDEV_TRANS_COMMIT; 189 err = __switchdev_port_attr_set(dev, attr); 190 WARN(err, "%s: Commit of attribute (id=%d) failed.\n", 191 dev->name, attr->id); 192 193 return err; 194 } 195 EXPORT_SYMBOL_GPL(switchdev_port_attr_set); 196 197 static int __switchdev_port_obj_add(struct net_device *dev, 198 struct switchdev_obj *obj) 199 { 200 const struct switchdev_ops *ops = dev->switchdev_ops; 201 struct net_device *lower_dev; 202 struct list_head *iter; 203 int err = -EOPNOTSUPP; 204 205 if (ops && ops->switchdev_port_obj_add) 206 return ops->switchdev_port_obj_add(dev, obj); 207 208 /* Switch device port(s) may be stacked under 209 * bond/team/vlan dev, so recurse down to add object on 210 * each port. 211 */ 212 213 netdev_for_each_lower_dev(dev, lower_dev, iter) { 214 err = __switchdev_port_obj_add(lower_dev, obj); 215 if (err) 216 break; 217 } 218 219 return err; 220 } 221 222 /** 223 * switchdev_port_obj_add - Add port object 224 * 225 * @dev: port device 226 * @obj: object to add 227 * 228 * Use a 2-phase prepare-commit transaction model to ensure 229 * system is not left in a partially updated state due to 230 * failure from driver/device. 231 * 232 * rtnl_lock must be held. 233 */ 234 int switchdev_port_obj_add(struct net_device *dev, struct switchdev_obj *obj) 235 { 236 int err; 237 238 ASSERT_RTNL(); 239 240 /* Phase I: prepare for obj add. Driver/device should fail 241 * here if there are going to be issues in the commit phase, 242 * such as lack of resources or support. The driver/device 243 * should reserve resources needed for the commit phase here, 244 * but should not commit the obj. 245 */ 246 247 obj->trans = SWITCHDEV_TRANS_PREPARE; 248 err = __switchdev_port_obj_add(dev, obj); 249 if (err) { 250 /* Prepare phase failed: abort the transaction. Any 251 * resources reserved in the prepare phase are 252 * released. 253 */ 254 255 if (err != -EOPNOTSUPP) { 256 obj->trans = SWITCHDEV_TRANS_ABORT; 257 __switchdev_port_obj_add(dev, obj); 258 } 259 260 return err; 261 } 262 263 /* Phase II: commit obj add. This cannot fail as a fault 264 * of driver/device. If it does, it's a bug in the driver/device 265 * because the driver said everythings was OK in phase I. 266 */ 267 268 obj->trans = SWITCHDEV_TRANS_COMMIT; 269 err = __switchdev_port_obj_add(dev, obj); 270 WARN(err, "%s: Commit of object (id=%d) failed.\n", dev->name, obj->id); 271 272 return err; 273 } 274 EXPORT_SYMBOL_GPL(switchdev_port_obj_add); 275 276 /** 277 * switchdev_port_obj_del - Delete port object 278 * 279 * @dev: port device 280 * @obj: object to delete 281 */ 282 int switchdev_port_obj_del(struct net_device *dev, struct switchdev_obj *obj) 283 { 284 const struct switchdev_ops *ops = dev->switchdev_ops; 285 struct net_device *lower_dev; 286 struct list_head *iter; 287 int err = -EOPNOTSUPP; 288 289 if (ops && ops->switchdev_port_obj_del) 290 return ops->switchdev_port_obj_del(dev, obj); 291 292 /* Switch device port(s) may be stacked under 293 * bond/team/vlan dev, so recurse down to delete object on 294 * each port. 295 */ 296 297 netdev_for_each_lower_dev(dev, lower_dev, iter) { 298 err = switchdev_port_obj_del(lower_dev, obj); 299 if (err) 300 break; 301 } 302 303 return err; 304 } 305 EXPORT_SYMBOL_GPL(switchdev_port_obj_del); 306 307 /** 308 * switchdev_port_obj_dump - Dump port objects 309 * 310 * @dev: port device 311 * @obj: object to dump 312 */ 313 int switchdev_port_obj_dump(struct net_device *dev, struct switchdev_obj *obj) 314 { 315 const struct switchdev_ops *ops = dev->switchdev_ops; 316 struct net_device *lower_dev; 317 struct list_head *iter; 318 int err = -EOPNOTSUPP; 319 320 if (ops && ops->switchdev_port_obj_dump) 321 return ops->switchdev_port_obj_dump(dev, obj); 322 323 /* Switch device port(s) may be stacked under 324 * bond/team/vlan dev, so recurse down to dump objects on 325 * first port at bottom of stack. 326 */ 327 328 netdev_for_each_lower_dev(dev, lower_dev, iter) { 329 err = switchdev_port_obj_dump(lower_dev, obj); 330 break; 331 } 332 333 return err; 334 } 335 EXPORT_SYMBOL_GPL(switchdev_port_obj_dump); 336 337 static DEFINE_MUTEX(switchdev_mutex); 338 static RAW_NOTIFIER_HEAD(switchdev_notif_chain); 339 340 /** 341 * register_switchdev_notifier - Register notifier 342 * @nb: notifier_block 343 * 344 * Register switch device notifier. This should be used by code 345 * which needs to monitor events happening in particular device. 346 * Return values are same as for atomic_notifier_chain_register(). 347 */ 348 int register_switchdev_notifier(struct notifier_block *nb) 349 { 350 int err; 351 352 mutex_lock(&switchdev_mutex); 353 err = raw_notifier_chain_register(&switchdev_notif_chain, nb); 354 mutex_unlock(&switchdev_mutex); 355 return err; 356 } 357 EXPORT_SYMBOL_GPL(register_switchdev_notifier); 358 359 /** 360 * unregister_switchdev_notifier - Unregister notifier 361 * @nb: notifier_block 362 * 363 * Unregister switch device notifier. 364 * Return values are same as for atomic_notifier_chain_unregister(). 365 */ 366 int unregister_switchdev_notifier(struct notifier_block *nb) 367 { 368 int err; 369 370 mutex_lock(&switchdev_mutex); 371 err = raw_notifier_chain_unregister(&switchdev_notif_chain, nb); 372 mutex_unlock(&switchdev_mutex); 373 return err; 374 } 375 EXPORT_SYMBOL_GPL(unregister_switchdev_notifier); 376 377 /** 378 * call_switchdev_notifiers - Call notifiers 379 * @val: value passed unmodified to notifier function 380 * @dev: port device 381 * @info: notifier information data 382 * 383 * Call all network notifier blocks. This should be called by driver 384 * when it needs to propagate hardware event. 385 * Return values are same as for atomic_notifier_call_chain(). 386 */ 387 int call_switchdev_notifiers(unsigned long val, struct net_device *dev, 388 struct switchdev_notifier_info *info) 389 { 390 int err; 391 392 info->dev = dev; 393 mutex_lock(&switchdev_mutex); 394 err = raw_notifier_call_chain(&switchdev_notif_chain, val, info); 395 mutex_unlock(&switchdev_mutex); 396 return err; 397 } 398 EXPORT_SYMBOL_GPL(call_switchdev_notifiers); 399 400 struct switchdev_vlan_dump { 401 struct switchdev_obj obj; 402 struct sk_buff *skb; 403 u32 filter_mask; 404 u16 flags; 405 u16 begin; 406 u16 end; 407 }; 408 409 static int switchdev_port_vlan_dump_put(struct net_device *dev, 410 struct switchdev_vlan_dump *dump) 411 { 412 struct bridge_vlan_info vinfo; 413 414 vinfo.flags = dump->flags; 415 416 if (dump->begin == 0 && dump->end == 0) { 417 return 0; 418 } else if (dump->begin == dump->end) { 419 vinfo.vid = dump->begin; 420 if (nla_put(dump->skb, IFLA_BRIDGE_VLAN_INFO, 421 sizeof(vinfo), &vinfo)) 422 return -EMSGSIZE; 423 } else { 424 vinfo.vid = dump->begin; 425 vinfo.flags |= BRIDGE_VLAN_INFO_RANGE_BEGIN; 426 if (nla_put(dump->skb, IFLA_BRIDGE_VLAN_INFO, 427 sizeof(vinfo), &vinfo)) 428 return -EMSGSIZE; 429 vinfo.vid = dump->end; 430 vinfo.flags &= ~BRIDGE_VLAN_INFO_RANGE_BEGIN; 431 vinfo.flags |= BRIDGE_VLAN_INFO_RANGE_END; 432 if (nla_put(dump->skb, IFLA_BRIDGE_VLAN_INFO, 433 sizeof(vinfo), &vinfo)) 434 return -EMSGSIZE; 435 } 436 437 return 0; 438 } 439 440 static int switchdev_port_vlan_dump_cb(struct net_device *dev, 441 struct switchdev_obj *obj) 442 { 443 struct switchdev_vlan_dump *dump = 444 container_of(obj, struct switchdev_vlan_dump, obj); 445 struct switchdev_obj_vlan *vlan = &dump->obj.u.vlan; 446 int err = 0; 447 448 if (vlan->vid_begin > vlan->vid_end) 449 return -EINVAL; 450 451 if (dump->filter_mask & RTEXT_FILTER_BRVLAN) { 452 dump->flags = vlan->flags; 453 for (dump->begin = dump->end = vlan->vid_begin; 454 dump->begin <= vlan->vid_end; 455 dump->begin++, dump->end++) { 456 err = switchdev_port_vlan_dump_put(dev, dump); 457 if (err) 458 return err; 459 } 460 } else if (dump->filter_mask & RTEXT_FILTER_BRVLAN_COMPRESSED) { 461 if (dump->begin > vlan->vid_begin && 462 dump->begin >= vlan->vid_end) { 463 if ((dump->begin - 1) == vlan->vid_end && 464 dump->flags == vlan->flags) { 465 /* prepend */ 466 dump->begin = vlan->vid_begin; 467 } else { 468 err = switchdev_port_vlan_dump_put(dev, dump); 469 dump->flags = vlan->flags; 470 dump->begin = vlan->vid_begin; 471 dump->end = vlan->vid_end; 472 } 473 } else if (dump->end <= vlan->vid_begin && 474 dump->end < vlan->vid_end) { 475 if ((dump->end + 1) == vlan->vid_begin && 476 dump->flags == vlan->flags) { 477 /* append */ 478 dump->end = vlan->vid_end; 479 } else { 480 err = switchdev_port_vlan_dump_put(dev, dump); 481 dump->flags = vlan->flags; 482 dump->begin = vlan->vid_begin; 483 dump->end = vlan->vid_end; 484 } 485 } else { 486 err = -EINVAL; 487 } 488 } 489 490 return err; 491 } 492 493 static int switchdev_port_vlan_fill(struct sk_buff *skb, struct net_device *dev, 494 u32 filter_mask) 495 { 496 struct switchdev_vlan_dump dump = { 497 .obj = { 498 .id = SWITCHDEV_OBJ_PORT_VLAN, 499 .cb = switchdev_port_vlan_dump_cb, 500 }, 501 .skb = skb, 502 .filter_mask = filter_mask, 503 }; 504 int err = 0; 505 506 if ((filter_mask & RTEXT_FILTER_BRVLAN) || 507 (filter_mask & RTEXT_FILTER_BRVLAN_COMPRESSED)) { 508 err = switchdev_port_obj_dump(dev, &dump.obj); 509 if (err) 510 goto err_out; 511 if (filter_mask & RTEXT_FILTER_BRVLAN_COMPRESSED) 512 /* last one */ 513 err = switchdev_port_vlan_dump_put(dev, &dump); 514 } 515 516 err_out: 517 return err == -EOPNOTSUPP ? 0 : err; 518 } 519 520 /** 521 * switchdev_port_bridge_getlink - Get bridge port attributes 522 * 523 * @dev: port device 524 * 525 * Called for SELF on rtnl_bridge_getlink to get bridge port 526 * attributes. 527 */ 528 int switchdev_port_bridge_getlink(struct sk_buff *skb, u32 pid, u32 seq, 529 struct net_device *dev, u32 filter_mask, 530 int nlflags) 531 { 532 struct switchdev_attr attr = { 533 .id = SWITCHDEV_ATTR_PORT_BRIDGE_FLAGS, 534 }; 535 u16 mode = BRIDGE_MODE_UNDEF; 536 u32 mask = BR_LEARNING | BR_LEARNING_SYNC; 537 int err; 538 539 err = switchdev_port_attr_get(dev, &attr); 540 if (err && err != -EOPNOTSUPP) 541 return err; 542 543 return ndo_dflt_bridge_getlink(skb, pid, seq, dev, mode, 544 attr.u.brport_flags, mask, nlflags, 545 filter_mask, switchdev_port_vlan_fill); 546 } 547 EXPORT_SYMBOL_GPL(switchdev_port_bridge_getlink); 548 549 static int switchdev_port_br_setflag(struct net_device *dev, 550 struct nlattr *nlattr, 551 unsigned long brport_flag) 552 { 553 struct switchdev_attr attr = { 554 .id = SWITCHDEV_ATTR_PORT_BRIDGE_FLAGS, 555 }; 556 u8 flag = nla_get_u8(nlattr); 557 int err; 558 559 err = switchdev_port_attr_get(dev, &attr); 560 if (err) 561 return err; 562 563 if (flag) 564 attr.u.brport_flags |= brport_flag; 565 else 566 attr.u.brport_flags &= ~brport_flag; 567 568 return switchdev_port_attr_set(dev, &attr); 569 } 570 571 static const struct nla_policy 572 switchdev_port_bridge_policy[IFLA_BRPORT_MAX + 1] = { 573 [IFLA_BRPORT_STATE] = { .type = NLA_U8 }, 574 [IFLA_BRPORT_COST] = { .type = NLA_U32 }, 575 [IFLA_BRPORT_PRIORITY] = { .type = NLA_U16 }, 576 [IFLA_BRPORT_MODE] = { .type = NLA_U8 }, 577 [IFLA_BRPORT_GUARD] = { .type = NLA_U8 }, 578 [IFLA_BRPORT_PROTECT] = { .type = NLA_U8 }, 579 [IFLA_BRPORT_FAST_LEAVE] = { .type = NLA_U8 }, 580 [IFLA_BRPORT_LEARNING] = { .type = NLA_U8 }, 581 [IFLA_BRPORT_LEARNING_SYNC] = { .type = NLA_U8 }, 582 [IFLA_BRPORT_UNICAST_FLOOD] = { .type = NLA_U8 }, 583 }; 584 585 static int switchdev_port_br_setlink_protinfo(struct net_device *dev, 586 struct nlattr *protinfo) 587 { 588 struct nlattr *attr; 589 int rem; 590 int err; 591 592 err = nla_validate_nested(protinfo, IFLA_BRPORT_MAX, 593 switchdev_port_bridge_policy); 594 if (err) 595 return err; 596 597 nla_for_each_nested(attr, protinfo, rem) { 598 switch (nla_type(attr)) { 599 case IFLA_BRPORT_LEARNING: 600 err = switchdev_port_br_setflag(dev, attr, 601 BR_LEARNING); 602 break; 603 case IFLA_BRPORT_LEARNING_SYNC: 604 err = switchdev_port_br_setflag(dev, attr, 605 BR_LEARNING_SYNC); 606 break; 607 default: 608 err = -EOPNOTSUPP; 609 break; 610 } 611 if (err) 612 return err; 613 } 614 615 return 0; 616 } 617 618 static int switchdev_port_br_afspec(struct net_device *dev, 619 struct nlattr *afspec, 620 int (*f)(struct net_device *dev, 621 struct switchdev_obj *obj)) 622 { 623 struct nlattr *attr; 624 struct bridge_vlan_info *vinfo; 625 struct switchdev_obj obj = { 626 .id = SWITCHDEV_OBJ_PORT_VLAN, 627 }; 628 struct switchdev_obj_vlan *vlan = &obj.u.vlan; 629 int rem; 630 int err; 631 632 nla_for_each_nested(attr, afspec, rem) { 633 if (nla_type(attr) != IFLA_BRIDGE_VLAN_INFO) 634 continue; 635 if (nla_len(attr) != sizeof(struct bridge_vlan_info)) 636 return -EINVAL; 637 vinfo = nla_data(attr); 638 if (!vinfo->vid || vinfo->vid >= VLAN_VID_MASK) 639 return -EINVAL; 640 vlan->flags = vinfo->flags; 641 if (vinfo->flags & BRIDGE_VLAN_INFO_RANGE_BEGIN) { 642 if (vlan->vid_begin) 643 return -EINVAL; 644 vlan->vid_begin = vinfo->vid; 645 } else if (vinfo->flags & BRIDGE_VLAN_INFO_RANGE_END) { 646 if (!vlan->vid_begin) 647 return -EINVAL; 648 vlan->vid_end = vinfo->vid; 649 if (vlan->vid_end <= vlan->vid_begin) 650 return -EINVAL; 651 err = f(dev, &obj); 652 if (err) 653 return err; 654 memset(vlan, 0, sizeof(*vlan)); 655 } else { 656 if (vlan->vid_begin) 657 return -EINVAL; 658 vlan->vid_begin = vinfo->vid; 659 vlan->vid_end = vinfo->vid; 660 err = f(dev, &obj); 661 if (err) 662 return err; 663 memset(vlan, 0, sizeof(*vlan)); 664 } 665 } 666 667 return 0; 668 } 669 670 /** 671 * switchdev_port_bridge_setlink - Set bridge port attributes 672 * 673 * @dev: port device 674 * @nlh: netlink header 675 * @flags: netlink flags 676 * 677 * Called for SELF on rtnl_bridge_setlink to set bridge port 678 * attributes. 679 */ 680 int switchdev_port_bridge_setlink(struct net_device *dev, 681 struct nlmsghdr *nlh, u16 flags) 682 { 683 struct nlattr *protinfo; 684 struct nlattr *afspec; 685 int err = 0; 686 687 protinfo = nlmsg_find_attr(nlh, sizeof(struct ifinfomsg), 688 IFLA_PROTINFO); 689 if (protinfo) { 690 err = switchdev_port_br_setlink_protinfo(dev, protinfo); 691 if (err) 692 return err; 693 } 694 695 afspec = nlmsg_find_attr(nlh, sizeof(struct ifinfomsg), 696 IFLA_AF_SPEC); 697 if (afspec) 698 err = switchdev_port_br_afspec(dev, afspec, 699 switchdev_port_obj_add); 700 701 return err; 702 } 703 EXPORT_SYMBOL_GPL(switchdev_port_bridge_setlink); 704 705 /** 706 * switchdev_port_bridge_dellink - Set bridge port attributes 707 * 708 * @dev: port device 709 * @nlh: netlink header 710 * @flags: netlink flags 711 * 712 * Called for SELF on rtnl_bridge_dellink to set bridge port 713 * attributes. 714 */ 715 int switchdev_port_bridge_dellink(struct net_device *dev, 716 struct nlmsghdr *nlh, u16 flags) 717 { 718 struct nlattr *afspec; 719 720 afspec = nlmsg_find_attr(nlh, sizeof(struct ifinfomsg), 721 IFLA_AF_SPEC); 722 if (afspec) 723 return switchdev_port_br_afspec(dev, afspec, 724 switchdev_port_obj_del); 725 726 return 0; 727 } 728 EXPORT_SYMBOL_GPL(switchdev_port_bridge_dellink); 729 730 /** 731 * switchdev_port_fdb_add - Add FDB (MAC/VLAN) entry to port 732 * 733 * @ndmsg: netlink hdr 734 * @nlattr: netlink attributes 735 * @dev: port device 736 * @addr: MAC address to add 737 * @vid: VLAN to add 738 * 739 * Add FDB entry to switch device. 740 */ 741 int switchdev_port_fdb_add(struct ndmsg *ndm, struct nlattr *tb[], 742 struct net_device *dev, const unsigned char *addr, 743 u16 vid, u16 nlm_flags) 744 { 745 struct switchdev_obj obj = { 746 .id = SWITCHDEV_OBJ_PORT_FDB, 747 .u.fdb = { 748 .addr = addr, 749 .vid = vid, 750 }, 751 }; 752 753 return switchdev_port_obj_add(dev, &obj); 754 } 755 EXPORT_SYMBOL_GPL(switchdev_port_fdb_add); 756 757 /** 758 * switchdev_port_fdb_del - Delete FDB (MAC/VLAN) entry from port 759 * 760 * @ndmsg: netlink hdr 761 * @nlattr: netlink attributes 762 * @dev: port device 763 * @addr: MAC address to delete 764 * @vid: VLAN to delete 765 * 766 * Delete FDB entry from switch device. 767 */ 768 int switchdev_port_fdb_del(struct ndmsg *ndm, struct nlattr *tb[], 769 struct net_device *dev, const unsigned char *addr, 770 u16 vid) 771 { 772 struct switchdev_obj obj = { 773 .id = SWITCHDEV_OBJ_PORT_FDB, 774 .u.fdb = { 775 .addr = addr, 776 .vid = vid, 777 }, 778 }; 779 780 return switchdev_port_obj_del(dev, &obj); 781 } 782 EXPORT_SYMBOL_GPL(switchdev_port_fdb_del); 783 784 struct switchdev_fdb_dump { 785 struct switchdev_obj obj; 786 struct sk_buff *skb; 787 struct netlink_callback *cb; 788 int idx; 789 }; 790 791 static int switchdev_port_fdb_dump_cb(struct net_device *dev, 792 struct switchdev_obj *obj) 793 { 794 struct switchdev_fdb_dump *dump = 795 container_of(obj, struct switchdev_fdb_dump, obj); 796 u32 portid = NETLINK_CB(dump->cb->skb).portid; 797 u32 seq = dump->cb->nlh->nlmsg_seq; 798 struct nlmsghdr *nlh; 799 struct ndmsg *ndm; 800 801 if (dump->idx < dump->cb->args[0]) 802 goto skip; 803 804 nlh = nlmsg_put(dump->skb, portid, seq, RTM_NEWNEIGH, 805 sizeof(*ndm), NLM_F_MULTI); 806 if (!nlh) 807 return -EMSGSIZE; 808 809 ndm = nlmsg_data(nlh); 810 ndm->ndm_family = AF_BRIDGE; 811 ndm->ndm_pad1 = 0; 812 ndm->ndm_pad2 = 0; 813 ndm->ndm_flags = NTF_SELF; 814 ndm->ndm_type = 0; 815 ndm->ndm_ifindex = dev->ifindex; 816 ndm->ndm_state = obj->u.fdb.ndm_state; 817 818 if (nla_put(dump->skb, NDA_LLADDR, ETH_ALEN, obj->u.fdb.addr)) 819 goto nla_put_failure; 820 821 if (obj->u.fdb.vid && nla_put_u16(dump->skb, NDA_VLAN, obj->u.fdb.vid)) 822 goto nla_put_failure; 823 824 nlmsg_end(dump->skb, nlh); 825 826 skip: 827 dump->idx++; 828 return 0; 829 830 nla_put_failure: 831 nlmsg_cancel(dump->skb, nlh); 832 return -EMSGSIZE; 833 } 834 835 /** 836 * switchdev_port_fdb_dump - Dump port FDB (MAC/VLAN) entries 837 * 838 * @skb: netlink skb 839 * @cb: netlink callback 840 * @dev: port device 841 * @filter_dev: filter device 842 * @idx: 843 * 844 * Delete FDB entry from switch device. 845 */ 846 int switchdev_port_fdb_dump(struct sk_buff *skb, struct netlink_callback *cb, 847 struct net_device *dev, 848 struct net_device *filter_dev, int idx) 849 { 850 struct switchdev_fdb_dump dump = { 851 .obj = { 852 .id = SWITCHDEV_OBJ_PORT_FDB, 853 .cb = switchdev_port_fdb_dump_cb, 854 }, 855 .skb = skb, 856 .cb = cb, 857 .idx = idx, 858 }; 859 860 switchdev_port_obj_dump(dev, &dump.obj); 861 return dump.idx; 862 } 863 EXPORT_SYMBOL_GPL(switchdev_port_fdb_dump); 864 865 static struct net_device *switchdev_get_lowest_dev(struct net_device *dev) 866 { 867 const struct switchdev_ops *ops = dev->switchdev_ops; 868 struct net_device *lower_dev; 869 struct net_device *port_dev; 870 struct list_head *iter; 871 872 /* Recusively search down until we find a sw port dev. 873 * (A sw port dev supports switchdev_port_attr_get). 874 */ 875 876 if (ops && ops->switchdev_port_attr_get) 877 return dev; 878 879 netdev_for_each_lower_dev(dev, lower_dev, iter) { 880 port_dev = switchdev_get_lowest_dev(lower_dev); 881 if (port_dev) 882 return port_dev; 883 } 884 885 return NULL; 886 } 887 888 static struct net_device *switchdev_get_dev_by_nhs(struct fib_info *fi) 889 { 890 struct switchdev_attr attr = { 891 .id = SWITCHDEV_ATTR_PORT_PARENT_ID, 892 }; 893 struct switchdev_attr prev_attr; 894 struct net_device *dev = NULL; 895 int nhsel; 896 897 /* For this route, all nexthop devs must be on the same switch. */ 898 899 for (nhsel = 0; nhsel < fi->fib_nhs; nhsel++) { 900 const struct fib_nh *nh = &fi->fib_nh[nhsel]; 901 902 if (!nh->nh_dev) 903 return NULL; 904 905 dev = switchdev_get_lowest_dev(nh->nh_dev); 906 if (!dev) 907 return NULL; 908 909 if (switchdev_port_attr_get(dev, &attr)) 910 return NULL; 911 912 if (nhsel > 0 && 913 !netdev_phys_item_id_same(&prev_attr.u.ppid, &attr.u.ppid)) 914 return NULL; 915 916 prev_attr = attr; 917 } 918 919 return dev; 920 } 921 922 /** 923 * switchdev_fib_ipv4_add - Add/modify switch IPv4 route entry 924 * 925 * @dst: route's IPv4 destination address 926 * @dst_len: destination address length (prefix length) 927 * @fi: route FIB info structure 928 * @tos: route TOS 929 * @type: route type 930 * @nlflags: netlink flags passed in (NLM_F_*) 931 * @tb_id: route table ID 932 * 933 * Add/modify switch IPv4 route entry. 934 */ 935 int switchdev_fib_ipv4_add(u32 dst, int dst_len, struct fib_info *fi, 936 u8 tos, u8 type, u32 nlflags, u32 tb_id) 937 { 938 struct switchdev_obj fib_obj = { 939 .id = SWITCHDEV_OBJ_IPV4_FIB, 940 .u.ipv4_fib = { 941 .dst = dst, 942 .dst_len = dst_len, 943 .fi = fi, 944 .tos = tos, 945 .type = type, 946 .nlflags = nlflags, 947 .tb_id = tb_id, 948 }, 949 }; 950 struct net_device *dev; 951 int err = 0; 952 953 /* Don't offload route if using custom ip rules or if 954 * IPv4 FIB offloading has been disabled completely. 955 */ 956 957 #ifdef CONFIG_IP_MULTIPLE_TABLES 958 if (fi->fib_net->ipv4.fib_has_custom_rules) 959 return 0; 960 #endif 961 962 if (fi->fib_net->ipv4.fib_offload_disabled) 963 return 0; 964 965 dev = switchdev_get_dev_by_nhs(fi); 966 if (!dev) 967 return 0; 968 969 err = switchdev_port_obj_add(dev, &fib_obj); 970 if (!err) 971 fi->fib_flags |= RTNH_F_OFFLOAD; 972 973 return err == -EOPNOTSUPP ? 0 : err; 974 } 975 EXPORT_SYMBOL_GPL(switchdev_fib_ipv4_add); 976 977 /** 978 * switchdev_fib_ipv4_del - Delete IPv4 route entry from switch 979 * 980 * @dst: route's IPv4 destination address 981 * @dst_len: destination address length (prefix length) 982 * @fi: route FIB info structure 983 * @tos: route TOS 984 * @type: route type 985 * @tb_id: route table ID 986 * 987 * Delete IPv4 route entry from switch device. 988 */ 989 int switchdev_fib_ipv4_del(u32 dst, int dst_len, struct fib_info *fi, 990 u8 tos, u8 type, u32 tb_id) 991 { 992 struct switchdev_obj fib_obj = { 993 .id = SWITCHDEV_OBJ_IPV4_FIB, 994 .u.ipv4_fib = { 995 .dst = dst, 996 .dst_len = dst_len, 997 .fi = fi, 998 .tos = tos, 999 .type = type, 1000 .nlflags = 0, 1001 .tb_id = tb_id, 1002 }, 1003 }; 1004 struct net_device *dev; 1005 int err = 0; 1006 1007 if (!(fi->fib_flags & RTNH_F_OFFLOAD)) 1008 return 0; 1009 1010 dev = switchdev_get_dev_by_nhs(fi); 1011 if (!dev) 1012 return 0; 1013 1014 err = switchdev_port_obj_del(dev, &fib_obj); 1015 if (!err) 1016 fi->fib_flags &= ~RTNH_F_OFFLOAD; 1017 1018 return err == -EOPNOTSUPP ? 0 : err; 1019 } 1020 EXPORT_SYMBOL_GPL(switchdev_fib_ipv4_del); 1021 1022 /** 1023 * switchdev_fib_ipv4_abort - Abort an IPv4 FIB operation 1024 * 1025 * @fi: route FIB info structure 1026 */ 1027 void switchdev_fib_ipv4_abort(struct fib_info *fi) 1028 { 1029 /* There was a problem installing this route to the offload 1030 * device. For now, until we come up with more refined 1031 * policy handling, abruptly end IPv4 fib offloading for 1032 * for entire net by flushing offload device(s) of all 1033 * IPv4 routes, and mark IPv4 fib offloading broken from 1034 * this point forward. 1035 */ 1036 1037 fib_flush_external(fi->fib_net); 1038 fi->fib_net->ipv4.fib_offload_disabled = true; 1039 } 1040 EXPORT_SYMBOL_GPL(switchdev_fib_ipv4_abort); 1041 1042 static bool switchdev_port_same_parent_id(struct net_device *a, 1043 struct net_device *b) 1044 { 1045 struct switchdev_attr a_attr = { 1046 .id = SWITCHDEV_ATTR_PORT_PARENT_ID, 1047 .flags = SWITCHDEV_F_NO_RECURSE, 1048 }; 1049 struct switchdev_attr b_attr = { 1050 .id = SWITCHDEV_ATTR_PORT_PARENT_ID, 1051 .flags = SWITCHDEV_F_NO_RECURSE, 1052 }; 1053 1054 if (switchdev_port_attr_get(a, &a_attr) || 1055 switchdev_port_attr_get(b, &b_attr)) 1056 return false; 1057 1058 return netdev_phys_item_id_same(&a_attr.u.ppid, &b_attr.u.ppid); 1059 } 1060 1061 static u32 switchdev_port_fwd_mark_get(struct net_device *dev, 1062 struct net_device *group_dev) 1063 { 1064 struct net_device *lower_dev; 1065 struct list_head *iter; 1066 1067 netdev_for_each_lower_dev(group_dev, lower_dev, iter) { 1068 if (lower_dev == dev) 1069 continue; 1070 if (switchdev_port_same_parent_id(dev, lower_dev)) 1071 return lower_dev->offload_fwd_mark; 1072 return switchdev_port_fwd_mark_get(dev, lower_dev); 1073 } 1074 1075 return dev->ifindex; 1076 } 1077 1078 static void switchdev_port_fwd_mark_reset(struct net_device *group_dev, 1079 u32 old_mark, u32 *reset_mark) 1080 { 1081 struct net_device *lower_dev; 1082 struct list_head *iter; 1083 1084 netdev_for_each_lower_dev(group_dev, lower_dev, iter) { 1085 if (lower_dev->offload_fwd_mark == old_mark) { 1086 if (!*reset_mark) 1087 *reset_mark = lower_dev->ifindex; 1088 lower_dev->offload_fwd_mark = *reset_mark; 1089 } 1090 switchdev_port_fwd_mark_reset(lower_dev, old_mark, reset_mark); 1091 } 1092 } 1093 1094 /** 1095 * switchdev_port_fwd_mark_set - Set port offload forwarding mark 1096 * 1097 * @dev: port device 1098 * @group_dev: containing device 1099 * @joining: true if dev is joining group; false if leaving group 1100 * 1101 * An ungrouped port's offload mark is just its ifindex. A grouped 1102 * port's (member of a bridge, for example) offload mark is the ifindex 1103 * of one of the ports in the group with the same parent (switch) ID. 1104 * Ports on the same device in the same group will have the same mark. 1105 * 1106 * Example: 1107 * 1108 * br0 ifindex=9 1109 * sw1p1 ifindex=2 mark=2 1110 * sw1p2 ifindex=3 mark=2 1111 * sw2p1 ifindex=4 mark=5 1112 * sw2p2 ifindex=5 mark=5 1113 * 1114 * If sw2p2 leaves the bridge, we'll have: 1115 * 1116 * br0 ifindex=9 1117 * sw1p1 ifindex=2 mark=2 1118 * sw1p2 ifindex=3 mark=2 1119 * sw2p1 ifindex=4 mark=4 1120 * sw2p2 ifindex=5 mark=5 1121 */ 1122 void switchdev_port_fwd_mark_set(struct net_device *dev, 1123 struct net_device *group_dev, 1124 bool joining) 1125 { 1126 u32 mark = dev->ifindex; 1127 u32 reset_mark = 0; 1128 1129 if (group_dev && joining) { 1130 mark = switchdev_port_fwd_mark_get(dev, group_dev); 1131 } else if (group_dev && !joining) { 1132 if (dev->offload_fwd_mark == mark) 1133 /* Ohoh, this port was the mark reference port, 1134 * but it's leaving the group, so reset the 1135 * mark for the remaining ports in the group. 1136 */ 1137 switchdev_port_fwd_mark_reset(group_dev, mark, 1138 &reset_mark); 1139 } 1140 1141 dev->offload_fwd_mark = mark; 1142 } 1143 EXPORT_SYMBOL_GPL(switchdev_port_fwd_mark_set); 1144