1 // SPDX-License-Identifier: GPL-2.0-or-later 2 /* 3 * net-sysfs.c - network device class and attributes 4 * 5 * Copyright (c) 2003 Stephen Hemminger <shemminger@osdl.org> 6 */ 7 8 #include <linux/capability.h> 9 #include <linux/kernel.h> 10 #include <linux/netdevice.h> 11 #include <linux/if_arp.h> 12 #include <linux/slab.h> 13 #include <linux/sched/signal.h> 14 #include <linux/sched/isolation.h> 15 #include <linux/nsproxy.h> 16 #include <net/sock.h> 17 #include <net/net_namespace.h> 18 #include <linux/rtnetlink.h> 19 #include <linux/vmalloc.h> 20 #include <linux/export.h> 21 #include <linux/jiffies.h> 22 #include <linux/pm_runtime.h> 23 #include <linux/of.h> 24 #include <linux/of_net.h> 25 #include <linux/cpu.h> 26 #include <net/netdev_rx_queue.h> 27 #include <net/rps.h> 28 29 #include "dev.h" 30 #include "net-sysfs.h" 31 32 #ifdef CONFIG_SYSFS 33 static const char fmt_hex[] = "%#x\n"; 34 static const char fmt_dec[] = "%d\n"; 35 static const char fmt_uint[] = "%u\n"; 36 static const char fmt_ulong[] = "%lu\n"; 37 static const char fmt_u64[] = "%llu\n"; 38 39 /* Caller holds RTNL, netdev->lock or RCU */ 40 static inline int dev_isalive(const struct net_device *dev) 41 { 42 return READ_ONCE(dev->reg_state) <= NETREG_REGISTERED; 43 } 44 45 /* There is a possible ABBA deadlock between rtnl_lock and kernfs_node->active, 46 * when unregistering a net device and accessing associated sysfs files. The 47 * potential deadlock is as follow: 48 * 49 * CPU 0 CPU 1 50 * 51 * rtnl_lock vfs_read 52 * unregister_netdevice_many kernfs_seq_start 53 * device_del / kobject_put kernfs_get_active (kn->active++) 54 * kernfs_drain sysfs_kf_seq_show 55 * wait_event( rtnl_lock 56 * kn->active == KN_DEACTIVATED_BIAS) -> waits on CPU 0 to release 57 * -> waits on CPU 1 to decrease kn->active the rtnl lock. 58 * 59 * The historical fix was to use rtnl_trylock with restart_syscall to bail out 60 * of sysfs operations when the lock couldn't be taken. This fixed the above 61 * issue as it allowed CPU 1 to bail out of the ABBA situation. 62 * 63 * But it came with performances issues, as syscalls are being restarted in 64 * loops when there was contention on the rtnl lock, with huge slow downs in 65 * specific scenarios (e.g. lots of virtual interfaces created and userspace 66 * daemons querying their attributes). 67 * 68 * The idea below is to bail out of the active kernfs_node protection 69 * (kn->active) while trying to take the rtnl lock. 70 * 71 * This replaces rtnl_lock() and still has to be used with rtnl_unlock(). The 72 * net device is guaranteed to be alive if this returns successfully. 73 */ 74 static int sysfs_rtnl_lock(struct kobject *kobj, struct attribute *attr, 75 struct net_device *ndev) 76 { 77 struct kernfs_node *kn; 78 int ret = 0; 79 80 /* First, we hold a reference to the net device as the unregistration 81 * path might run in parallel. This will ensure the net device and the 82 * associated sysfs objects won't be freed while we try to take the rtnl 83 * lock. 84 */ 85 dev_hold(ndev); 86 /* sysfs_break_active_protection was introduced to allow self-removal of 87 * devices and their associated sysfs files by bailing out of the 88 * sysfs/kernfs protection. We do this here to allow the unregistration 89 * path to complete in parallel. The following takes a reference on the 90 * kobject and the kernfs_node being accessed. 91 * 92 * This works because we hold a reference onto the net device and the 93 * unregistration path will wait for us eventually in netdev_run_todo 94 * (outside an rtnl lock section). 95 */ 96 kn = sysfs_break_active_protection(kobj, attr); 97 /* We can now try to take the rtnl lock. This can't deadlock us as the 98 * unregistration path is able to drain sysfs files (kernfs_node) thanks 99 * to the above dance. 100 */ 101 if (rtnl_lock_interruptible()) { 102 ret = -ERESTARTSYS; 103 goto unbreak; 104 } 105 /* Check dismantle on the device hasn't started, otherwise deny the 106 * operation. 107 */ 108 if (!dev_isalive(ndev)) { 109 rtnl_unlock(); 110 ret = -ENODEV; 111 goto unbreak; 112 } 113 /* We are now sure the device dismantle hasn't started nor that it can 114 * start before we exit the locking section as we hold the rtnl lock. 115 * There's no need to keep unbreaking the sysfs protection nor to hold 116 * a net device reference from that point; that was only needed to take 117 * the rtnl lock. 118 */ 119 unbreak: 120 sysfs_unbreak_active_protection(kn); 121 dev_put(ndev); 122 123 return ret; 124 } 125 126 /* use same locking rules as GIF* ioctl's */ 127 static ssize_t netdev_show(const struct device *dev, 128 struct device_attribute *attr, char *buf, 129 ssize_t (*format)(const struct net_device *, char *)) 130 { 131 struct net_device *ndev = to_net_dev(dev); 132 ssize_t ret = -EINVAL; 133 134 rcu_read_lock(); 135 if (dev_isalive(ndev)) 136 ret = (*format)(ndev, buf); 137 rcu_read_unlock(); 138 139 return ret; 140 } 141 142 /* generate a show function for simple field */ 143 #define NETDEVICE_SHOW(field, format_string) \ 144 static ssize_t format_##field(const struct net_device *dev, char *buf) \ 145 { \ 146 return sysfs_emit(buf, format_string, READ_ONCE(dev->field)); \ 147 } \ 148 static ssize_t field##_show(struct device *dev, \ 149 struct device_attribute *attr, char *buf) \ 150 { \ 151 return netdev_show(dev, attr, buf, format_##field); \ 152 } \ 153 154 #define NETDEVICE_SHOW_RO(field, format_string) \ 155 NETDEVICE_SHOW(field, format_string); \ 156 static DEVICE_ATTR_RO(field) 157 158 #define NETDEVICE_SHOW_RW(field, format_string) \ 159 NETDEVICE_SHOW(field, format_string); \ 160 static DEVICE_ATTR_RW(field) 161 162 /* use same locking and permission rules as SIF* ioctl's */ 163 static ssize_t netdev_store(struct device *dev, struct device_attribute *attr, 164 const char *buf, size_t len, 165 int (*set)(struct net_device *, unsigned long)) 166 { 167 struct net_device *netdev = to_net_dev(dev); 168 struct net *net = dev_net(netdev); 169 unsigned long new; 170 int ret; 171 172 if (!ns_capable(net->user_ns, CAP_NET_ADMIN)) 173 return -EPERM; 174 175 ret = kstrtoul(buf, 0, &new); 176 if (ret) 177 goto err; 178 179 ret = sysfs_rtnl_lock(&dev->kobj, &attr->attr, netdev); 180 if (ret) 181 goto err; 182 183 ret = (*set)(netdev, new); 184 if (ret == 0) 185 ret = len; 186 187 rtnl_unlock(); 188 err: 189 return ret; 190 } 191 192 /* Same as netdev_store() but takes netdev_lock() instead of rtnl_lock() */ 193 static ssize_t 194 netdev_lock_store(struct device *dev, struct device_attribute *attr, 195 const char *buf, size_t len, 196 int (*set)(struct net_device *, unsigned long)) 197 { 198 struct net_device *netdev = to_net_dev(dev); 199 struct net *net = dev_net(netdev); 200 unsigned long new; 201 int ret; 202 203 if (!ns_capable(net->user_ns, CAP_NET_ADMIN)) 204 return -EPERM; 205 206 ret = kstrtoul(buf, 0, &new); 207 if (ret) 208 return ret; 209 210 netdev_lock(netdev); 211 212 if (dev_isalive(netdev)) { 213 ret = (*set)(netdev, new); 214 if (ret == 0) 215 ret = len; 216 } 217 netdev_unlock(netdev); 218 219 return ret; 220 } 221 222 NETDEVICE_SHOW_RO(dev_id, fmt_hex); 223 NETDEVICE_SHOW_RO(dev_port, fmt_dec); 224 NETDEVICE_SHOW_RO(addr_assign_type, fmt_dec); 225 NETDEVICE_SHOW_RO(addr_len, fmt_dec); 226 NETDEVICE_SHOW_RO(ifindex, fmt_dec); 227 NETDEVICE_SHOW_RO(type, fmt_dec); 228 NETDEVICE_SHOW_RO(link_mode, fmt_dec); 229 230 static ssize_t iflink_show(struct device *dev, struct device_attribute *attr, 231 char *buf) 232 { 233 struct net_device *ndev = to_net_dev(dev); 234 235 return sysfs_emit(buf, fmt_dec, dev_get_iflink(ndev)); 236 } 237 static DEVICE_ATTR_RO(iflink); 238 239 static ssize_t format_name_assign_type(const struct net_device *dev, char *buf) 240 { 241 return sysfs_emit(buf, fmt_dec, READ_ONCE(dev->name_assign_type)); 242 } 243 244 static ssize_t name_assign_type_show(struct device *dev, 245 struct device_attribute *attr, 246 char *buf) 247 { 248 struct net_device *ndev = to_net_dev(dev); 249 ssize_t ret = -EINVAL; 250 251 if (READ_ONCE(ndev->name_assign_type) != NET_NAME_UNKNOWN) 252 ret = netdev_show(dev, attr, buf, format_name_assign_type); 253 254 return ret; 255 } 256 static DEVICE_ATTR_RO(name_assign_type); 257 258 /* use same locking rules as GIFHWADDR ioctl's (dev_get_mac_address()) */ 259 static ssize_t address_show(struct device *dev, struct device_attribute *attr, 260 char *buf) 261 { 262 struct net_device *ndev = to_net_dev(dev); 263 ssize_t ret = -EINVAL; 264 265 netdev_lock(ndev); 266 if (dev_isalive(ndev)) 267 ret = sysfs_format_mac(buf, ndev->dev_addr, ndev->addr_len); 268 netdev_unlock(ndev); 269 270 return ret; 271 } 272 static DEVICE_ATTR_RO(address); 273 274 static ssize_t broadcast_show(struct device *dev, 275 struct device_attribute *attr, char *buf) 276 { 277 struct net_device *ndev = to_net_dev(dev); 278 int ret = -EINVAL; 279 280 rcu_read_lock(); 281 if (dev_isalive(ndev)) 282 ret = sysfs_format_mac(buf, ndev->broadcast, ndev->addr_len); 283 rcu_read_unlock(); 284 return ret; 285 } 286 static DEVICE_ATTR_RO(broadcast); 287 288 static int change_carrier(struct net_device *dev, unsigned long new_carrier) 289 { 290 if (!netif_running(dev)) 291 return -EINVAL; 292 return dev_change_carrier(dev, (bool)new_carrier); 293 } 294 295 static ssize_t carrier_store(struct device *dev, struct device_attribute *attr, 296 const char *buf, size_t len) 297 { 298 struct net_device *netdev = to_net_dev(dev); 299 300 /* The check is also done in change_carrier; this helps returning early 301 * without hitting the locking section in netdev_store. 302 */ 303 if (!netdev->netdev_ops->ndo_change_carrier) 304 return -EOPNOTSUPP; 305 306 return netdev_store(dev, attr, buf, len, change_carrier); 307 } 308 309 static ssize_t carrier_show(struct device *dev, 310 struct device_attribute *attr, char *buf) 311 { 312 struct net_device *netdev = to_net_dev(dev); 313 int ret; 314 315 ret = sysfs_rtnl_lock(&dev->kobj, &attr->attr, netdev); 316 if (ret) 317 return ret; 318 319 ret = -EINVAL; 320 if (netif_running(netdev)) { 321 /* Synchronize carrier state with link watch, 322 * see also rtnl_getlink(). 323 */ 324 linkwatch_sync_dev(netdev); 325 326 ret = sysfs_emit(buf, fmt_dec, !!netif_carrier_ok(netdev)); 327 } 328 329 rtnl_unlock(); 330 return ret; 331 } 332 static DEVICE_ATTR_RW(carrier); 333 334 static ssize_t speed_show(struct device *dev, 335 struct device_attribute *attr, char *buf) 336 { 337 struct net_device *netdev = to_net_dev(dev); 338 int ret = -EINVAL; 339 340 /* The check is also done in __ethtool_get_link_ksettings; this helps 341 * returning early without hitting the locking section below. 342 */ 343 if (!netdev->ethtool_ops->get_link_ksettings) 344 return ret; 345 346 ret = sysfs_rtnl_lock(&dev->kobj, &attr->attr, netdev); 347 if (ret) 348 return ret; 349 350 ret = -EINVAL; 351 if (netif_running(netdev)) { 352 struct ethtool_link_ksettings cmd; 353 354 if (!__ethtool_get_link_ksettings(netdev, &cmd)) 355 ret = sysfs_emit(buf, fmt_dec, cmd.base.speed); 356 } 357 rtnl_unlock(); 358 return ret; 359 } 360 static DEVICE_ATTR_RO(speed); 361 362 static ssize_t duplex_show(struct device *dev, 363 struct device_attribute *attr, char *buf) 364 { 365 struct net_device *netdev = to_net_dev(dev); 366 int ret = -EINVAL; 367 368 /* The check is also done in __ethtool_get_link_ksettings; this helps 369 * returning early without hitting the locking section below. 370 */ 371 if (!netdev->ethtool_ops->get_link_ksettings) 372 return ret; 373 374 ret = sysfs_rtnl_lock(&dev->kobj, &attr->attr, netdev); 375 if (ret) 376 return ret; 377 378 ret = -EINVAL; 379 if (netif_running(netdev)) { 380 struct ethtool_link_ksettings cmd; 381 382 if (!__ethtool_get_link_ksettings(netdev, &cmd)) { 383 const char *duplex; 384 385 switch (cmd.base.duplex) { 386 case DUPLEX_HALF: 387 duplex = "half"; 388 break; 389 case DUPLEX_FULL: 390 duplex = "full"; 391 break; 392 default: 393 duplex = "unknown"; 394 break; 395 } 396 ret = sysfs_emit(buf, "%s\n", duplex); 397 } 398 } 399 rtnl_unlock(); 400 return ret; 401 } 402 static DEVICE_ATTR_RO(duplex); 403 404 static ssize_t testing_show(struct device *dev, 405 struct device_attribute *attr, char *buf) 406 { 407 struct net_device *netdev = to_net_dev(dev); 408 409 if (netif_running(netdev)) 410 return sysfs_emit(buf, fmt_dec, !!netif_testing(netdev)); 411 412 return -EINVAL; 413 } 414 static DEVICE_ATTR_RO(testing); 415 416 static ssize_t dormant_show(struct device *dev, 417 struct device_attribute *attr, char *buf) 418 { 419 struct net_device *netdev = to_net_dev(dev); 420 421 if (netif_running(netdev)) 422 return sysfs_emit(buf, fmt_dec, !!netif_dormant(netdev)); 423 424 return -EINVAL; 425 } 426 static DEVICE_ATTR_RO(dormant); 427 428 static const char *const operstates[] = { 429 "unknown", 430 "notpresent", /* currently unused */ 431 "down", 432 "lowerlayerdown", 433 "testing", 434 "dormant", 435 "up" 436 }; 437 438 static ssize_t operstate_show(struct device *dev, 439 struct device_attribute *attr, char *buf) 440 { 441 const struct net_device *netdev = to_net_dev(dev); 442 unsigned char operstate; 443 444 operstate = READ_ONCE(netdev->operstate); 445 if (!netif_running(netdev)) 446 operstate = IF_OPER_DOWN; 447 448 if (operstate >= ARRAY_SIZE(operstates)) 449 return -EINVAL; /* should not happen */ 450 451 return sysfs_emit(buf, "%s\n", operstates[operstate]); 452 } 453 static DEVICE_ATTR_RO(operstate); 454 455 static ssize_t carrier_changes_show(struct device *dev, 456 struct device_attribute *attr, 457 char *buf) 458 { 459 struct net_device *netdev = to_net_dev(dev); 460 461 return sysfs_emit(buf, fmt_dec, 462 atomic_read(&netdev->carrier_up_count) + 463 atomic_read(&netdev->carrier_down_count)); 464 } 465 static DEVICE_ATTR_RO(carrier_changes); 466 467 static ssize_t carrier_up_count_show(struct device *dev, 468 struct device_attribute *attr, 469 char *buf) 470 { 471 struct net_device *netdev = to_net_dev(dev); 472 473 return sysfs_emit(buf, fmt_dec, atomic_read(&netdev->carrier_up_count)); 474 } 475 static DEVICE_ATTR_RO(carrier_up_count); 476 477 static ssize_t carrier_down_count_show(struct device *dev, 478 struct device_attribute *attr, 479 char *buf) 480 { 481 struct net_device *netdev = to_net_dev(dev); 482 483 return sysfs_emit(buf, fmt_dec, atomic_read(&netdev->carrier_down_count)); 484 } 485 static DEVICE_ATTR_RO(carrier_down_count); 486 487 /* read-write attributes */ 488 489 static int change_mtu(struct net_device *dev, unsigned long new_mtu) 490 { 491 return dev_set_mtu(dev, (int)new_mtu); 492 } 493 494 static ssize_t mtu_store(struct device *dev, struct device_attribute *attr, 495 const char *buf, size_t len) 496 { 497 return netdev_store(dev, attr, buf, len, change_mtu); 498 } 499 NETDEVICE_SHOW_RW(mtu, fmt_dec); 500 501 static int change_flags(struct net_device *dev, unsigned long new_flags) 502 { 503 return dev_change_flags(dev, (unsigned int)new_flags, NULL); 504 } 505 506 static ssize_t flags_store(struct device *dev, struct device_attribute *attr, 507 const char *buf, size_t len) 508 { 509 return netdev_store(dev, attr, buf, len, change_flags); 510 } 511 NETDEVICE_SHOW_RW(flags, fmt_hex); 512 513 static ssize_t tx_queue_len_store(struct device *dev, 514 struct device_attribute *attr, 515 const char *buf, size_t len) 516 { 517 if (!capable(CAP_NET_ADMIN)) 518 return -EPERM; 519 520 return netdev_store(dev, attr, buf, len, dev_change_tx_queue_len); 521 } 522 NETDEVICE_SHOW_RW(tx_queue_len, fmt_dec); 523 524 static int change_gro_flush_timeout(struct net_device *dev, unsigned long val) 525 { 526 netdev_set_gro_flush_timeout(dev, val); 527 return 0; 528 } 529 530 static ssize_t gro_flush_timeout_store(struct device *dev, 531 struct device_attribute *attr, 532 const char *buf, size_t len) 533 { 534 if (!capable(CAP_NET_ADMIN)) 535 return -EPERM; 536 537 return netdev_lock_store(dev, attr, buf, len, change_gro_flush_timeout); 538 } 539 NETDEVICE_SHOW_RW(gro_flush_timeout, fmt_ulong); 540 541 static int change_napi_defer_hard_irqs(struct net_device *dev, unsigned long val) 542 { 543 if (val > S32_MAX) 544 return -ERANGE; 545 546 netdev_set_defer_hard_irqs(dev, (u32)val); 547 return 0; 548 } 549 550 static ssize_t napi_defer_hard_irqs_store(struct device *dev, 551 struct device_attribute *attr, 552 const char *buf, size_t len) 553 { 554 if (!capable(CAP_NET_ADMIN)) 555 return -EPERM; 556 557 return netdev_lock_store(dev, attr, buf, len, 558 change_napi_defer_hard_irqs); 559 } 560 NETDEVICE_SHOW_RW(napi_defer_hard_irqs, fmt_uint); 561 562 static ssize_t ifalias_store(struct device *dev, struct device_attribute *attr, 563 const char *buf, size_t len) 564 { 565 struct net_device *netdev = to_net_dev(dev); 566 struct net *net = dev_net(netdev); 567 size_t count = len; 568 ssize_t ret; 569 570 if (!ns_capable(net->user_ns, CAP_NET_ADMIN)) 571 return -EPERM; 572 573 /* ignore trailing newline */ 574 if (len > 0 && buf[len - 1] == '\n') 575 --count; 576 577 ret = sysfs_rtnl_lock(&dev->kobj, &attr->attr, netdev); 578 if (ret) 579 return ret; 580 581 ret = dev_set_alias(netdev, buf, count); 582 if (ret < 0) 583 goto err; 584 ret = len; 585 netdev_state_change(netdev); 586 err: 587 rtnl_unlock(); 588 589 return ret; 590 } 591 592 static ssize_t ifalias_show(struct device *dev, 593 struct device_attribute *attr, char *buf) 594 { 595 const struct net_device *netdev = to_net_dev(dev); 596 char tmp[IFALIASZ]; 597 ssize_t ret; 598 599 ret = dev_get_alias(netdev, tmp, sizeof(tmp)); 600 if (ret > 0) 601 ret = sysfs_emit(buf, "%s\n", tmp); 602 return ret; 603 } 604 static DEVICE_ATTR_RW(ifalias); 605 606 static int change_group(struct net_device *dev, unsigned long new_group) 607 { 608 dev_set_group(dev, (int)new_group); 609 return 0; 610 } 611 612 static ssize_t group_store(struct device *dev, struct device_attribute *attr, 613 const char *buf, size_t len) 614 { 615 return netdev_store(dev, attr, buf, len, change_group); 616 } 617 NETDEVICE_SHOW(group, fmt_dec); 618 static DEVICE_ATTR(netdev_group, 0644, group_show, group_store); 619 620 static int change_proto_down(struct net_device *dev, unsigned long proto_down) 621 { 622 return dev_change_proto_down(dev, (bool)proto_down); 623 } 624 625 static ssize_t proto_down_store(struct device *dev, 626 struct device_attribute *attr, 627 const char *buf, size_t len) 628 { 629 return netdev_store(dev, attr, buf, len, change_proto_down); 630 } 631 NETDEVICE_SHOW_RW(proto_down, fmt_dec); 632 633 static ssize_t phys_port_id_show(struct device *dev, 634 struct device_attribute *attr, char *buf) 635 { 636 struct net_device *netdev = to_net_dev(dev); 637 struct netdev_phys_item_id ppid; 638 ssize_t ret; 639 640 /* The check is also done in dev_get_phys_port_id; this helps returning 641 * early without hitting the locking section below. 642 */ 643 if (!netdev->netdev_ops->ndo_get_phys_port_id) 644 return -EOPNOTSUPP; 645 646 ret = sysfs_rtnl_lock(&dev->kobj, &attr->attr, netdev); 647 if (ret) 648 return ret; 649 650 ret = dev_get_phys_port_id(netdev, &ppid); 651 if (!ret) 652 ret = sysfs_emit(buf, "%*phN\n", ppid.id_len, ppid.id); 653 654 rtnl_unlock(); 655 656 return ret; 657 } 658 static DEVICE_ATTR_RO(phys_port_id); 659 660 static ssize_t phys_port_name_show(struct device *dev, 661 struct device_attribute *attr, char *buf) 662 { 663 struct net_device *netdev = to_net_dev(dev); 664 char name[IFNAMSIZ]; 665 ssize_t ret; 666 667 /* The checks are also done in dev_get_phys_port_name; this helps 668 * returning early without hitting the locking section below. 669 */ 670 if (!netdev->netdev_ops->ndo_get_phys_port_name && 671 !netdev->devlink_port) 672 return -EOPNOTSUPP; 673 674 ret = sysfs_rtnl_lock(&dev->kobj, &attr->attr, netdev); 675 if (ret) 676 return ret; 677 678 ret = dev_get_phys_port_name(netdev, name, sizeof(name)); 679 if (!ret) 680 ret = sysfs_emit(buf, "%s\n", name); 681 682 rtnl_unlock(); 683 684 return ret; 685 } 686 static DEVICE_ATTR_RO(phys_port_name); 687 688 static ssize_t phys_switch_id_show(struct device *dev, 689 struct device_attribute *attr, char *buf) 690 { 691 struct net_device *netdev = to_net_dev(dev); 692 struct netdev_phys_item_id ppid = { }; 693 ssize_t ret; 694 695 /* The checks are also done in dev_get_phys_port_name; this helps 696 * returning early without hitting the locking section below. This works 697 * because recurse is false when calling dev_get_port_parent_id. 698 */ 699 if (!netdev->netdev_ops->ndo_get_port_parent_id && 700 !netdev->devlink_port) 701 return -EOPNOTSUPP; 702 703 ret = sysfs_rtnl_lock(&dev->kobj, &attr->attr, netdev); 704 if (ret) 705 return ret; 706 707 ret = dev_get_port_parent_id(netdev, &ppid, false); 708 if (!ret) 709 ret = sysfs_emit(buf, "%*phN\n", ppid.id_len, ppid.id); 710 711 rtnl_unlock(); 712 713 return ret; 714 } 715 static DEVICE_ATTR_RO(phys_switch_id); 716 717 static ssize_t threaded_show(struct device *dev, 718 struct device_attribute *attr, char *buf) 719 { 720 struct net_device *netdev = to_net_dev(dev); 721 ssize_t ret = -EINVAL; 722 723 rcu_read_lock(); 724 725 if (dev_isalive(netdev)) 726 ret = sysfs_emit(buf, fmt_dec, READ_ONCE(netdev->threaded)); 727 728 rcu_read_unlock(); 729 730 return ret; 731 } 732 733 static int modify_napi_threaded(struct net_device *dev, unsigned long val) 734 { 735 int ret; 736 737 if (list_empty(&dev->napi_list)) 738 return -EOPNOTSUPP; 739 740 if (val != 0 && val != 1) 741 return -EOPNOTSUPP; 742 743 ret = dev_set_threaded(dev, val); 744 745 return ret; 746 } 747 748 static ssize_t threaded_store(struct device *dev, 749 struct device_attribute *attr, 750 const char *buf, size_t len) 751 { 752 return netdev_lock_store(dev, attr, buf, len, modify_napi_threaded); 753 } 754 static DEVICE_ATTR_RW(threaded); 755 756 static struct attribute *net_class_attrs[] __ro_after_init = { 757 &dev_attr_netdev_group.attr, 758 &dev_attr_type.attr, 759 &dev_attr_dev_id.attr, 760 &dev_attr_dev_port.attr, 761 &dev_attr_iflink.attr, 762 &dev_attr_ifindex.attr, 763 &dev_attr_name_assign_type.attr, 764 &dev_attr_addr_assign_type.attr, 765 &dev_attr_addr_len.attr, 766 &dev_attr_link_mode.attr, 767 &dev_attr_address.attr, 768 &dev_attr_broadcast.attr, 769 &dev_attr_speed.attr, 770 &dev_attr_duplex.attr, 771 &dev_attr_dormant.attr, 772 &dev_attr_testing.attr, 773 &dev_attr_operstate.attr, 774 &dev_attr_carrier_changes.attr, 775 &dev_attr_ifalias.attr, 776 &dev_attr_carrier.attr, 777 &dev_attr_mtu.attr, 778 &dev_attr_flags.attr, 779 &dev_attr_tx_queue_len.attr, 780 &dev_attr_gro_flush_timeout.attr, 781 &dev_attr_napi_defer_hard_irqs.attr, 782 &dev_attr_phys_port_id.attr, 783 &dev_attr_phys_port_name.attr, 784 &dev_attr_phys_switch_id.attr, 785 &dev_attr_proto_down.attr, 786 &dev_attr_carrier_up_count.attr, 787 &dev_attr_carrier_down_count.attr, 788 &dev_attr_threaded.attr, 789 NULL, 790 }; 791 ATTRIBUTE_GROUPS(net_class); 792 793 /* Show a given an attribute in the statistics group */ 794 static ssize_t netstat_show(const struct device *d, 795 struct device_attribute *attr, char *buf, 796 unsigned long offset) 797 { 798 struct net_device *dev = to_net_dev(d); 799 ssize_t ret = -EINVAL; 800 801 WARN_ON(offset > sizeof(struct rtnl_link_stats64) || 802 offset % sizeof(u64) != 0); 803 804 rcu_read_lock(); 805 if (dev_isalive(dev)) { 806 struct rtnl_link_stats64 temp; 807 const struct rtnl_link_stats64 *stats = dev_get_stats(dev, &temp); 808 809 ret = sysfs_emit(buf, fmt_u64, *(u64 *)(((u8 *)stats) + offset)); 810 } 811 rcu_read_unlock(); 812 return ret; 813 } 814 815 /* generate a read-only statistics attribute */ 816 #define NETSTAT_ENTRY(name) \ 817 static ssize_t name##_show(struct device *d, \ 818 struct device_attribute *attr, char *buf) \ 819 { \ 820 return netstat_show(d, attr, buf, \ 821 offsetof(struct rtnl_link_stats64, name)); \ 822 } \ 823 static DEVICE_ATTR_RO(name) 824 825 NETSTAT_ENTRY(rx_packets); 826 NETSTAT_ENTRY(tx_packets); 827 NETSTAT_ENTRY(rx_bytes); 828 NETSTAT_ENTRY(tx_bytes); 829 NETSTAT_ENTRY(rx_errors); 830 NETSTAT_ENTRY(tx_errors); 831 NETSTAT_ENTRY(rx_dropped); 832 NETSTAT_ENTRY(tx_dropped); 833 NETSTAT_ENTRY(multicast); 834 NETSTAT_ENTRY(collisions); 835 NETSTAT_ENTRY(rx_length_errors); 836 NETSTAT_ENTRY(rx_over_errors); 837 NETSTAT_ENTRY(rx_crc_errors); 838 NETSTAT_ENTRY(rx_frame_errors); 839 NETSTAT_ENTRY(rx_fifo_errors); 840 NETSTAT_ENTRY(rx_missed_errors); 841 NETSTAT_ENTRY(tx_aborted_errors); 842 NETSTAT_ENTRY(tx_carrier_errors); 843 NETSTAT_ENTRY(tx_fifo_errors); 844 NETSTAT_ENTRY(tx_heartbeat_errors); 845 NETSTAT_ENTRY(tx_window_errors); 846 NETSTAT_ENTRY(rx_compressed); 847 NETSTAT_ENTRY(tx_compressed); 848 NETSTAT_ENTRY(rx_nohandler); 849 850 static struct attribute *netstat_attrs[] __ro_after_init = { 851 &dev_attr_rx_packets.attr, 852 &dev_attr_tx_packets.attr, 853 &dev_attr_rx_bytes.attr, 854 &dev_attr_tx_bytes.attr, 855 &dev_attr_rx_errors.attr, 856 &dev_attr_tx_errors.attr, 857 &dev_attr_rx_dropped.attr, 858 &dev_attr_tx_dropped.attr, 859 &dev_attr_multicast.attr, 860 &dev_attr_collisions.attr, 861 &dev_attr_rx_length_errors.attr, 862 &dev_attr_rx_over_errors.attr, 863 &dev_attr_rx_crc_errors.attr, 864 &dev_attr_rx_frame_errors.attr, 865 &dev_attr_rx_fifo_errors.attr, 866 &dev_attr_rx_missed_errors.attr, 867 &dev_attr_tx_aborted_errors.attr, 868 &dev_attr_tx_carrier_errors.attr, 869 &dev_attr_tx_fifo_errors.attr, 870 &dev_attr_tx_heartbeat_errors.attr, 871 &dev_attr_tx_window_errors.attr, 872 &dev_attr_rx_compressed.attr, 873 &dev_attr_tx_compressed.attr, 874 &dev_attr_rx_nohandler.attr, 875 NULL 876 }; 877 878 static const struct attribute_group netstat_group = { 879 .name = "statistics", 880 .attrs = netstat_attrs, 881 }; 882 883 static struct attribute *wireless_attrs[] = { 884 NULL 885 }; 886 887 static const struct attribute_group wireless_group = { 888 .name = "wireless", 889 .attrs = wireless_attrs, 890 }; 891 892 static bool wireless_group_needed(struct net_device *ndev) 893 { 894 #if IS_ENABLED(CONFIG_CFG80211) 895 if (ndev->ieee80211_ptr) 896 return true; 897 #endif 898 #if IS_ENABLED(CONFIG_WIRELESS_EXT) 899 if (ndev->wireless_handlers) 900 return true; 901 #endif 902 return false; 903 } 904 905 #else /* CONFIG_SYSFS */ 906 #define net_class_groups NULL 907 #endif /* CONFIG_SYSFS */ 908 909 #ifdef CONFIG_SYSFS 910 #define to_rx_queue_attr(_attr) \ 911 container_of(_attr, struct rx_queue_attribute, attr) 912 913 #define to_rx_queue(obj) container_of(obj, struct netdev_rx_queue, kobj) 914 915 static ssize_t rx_queue_attr_show(struct kobject *kobj, struct attribute *attr, 916 char *buf) 917 { 918 const struct rx_queue_attribute *attribute = to_rx_queue_attr(attr); 919 struct netdev_rx_queue *queue = to_rx_queue(kobj); 920 921 if (!attribute->show) 922 return -EIO; 923 924 return attribute->show(queue, buf); 925 } 926 927 static ssize_t rx_queue_attr_store(struct kobject *kobj, struct attribute *attr, 928 const char *buf, size_t count) 929 { 930 const struct rx_queue_attribute *attribute = to_rx_queue_attr(attr); 931 struct netdev_rx_queue *queue = to_rx_queue(kobj); 932 933 if (!attribute->store) 934 return -EIO; 935 936 return attribute->store(queue, buf, count); 937 } 938 939 static const struct sysfs_ops rx_queue_sysfs_ops = { 940 .show = rx_queue_attr_show, 941 .store = rx_queue_attr_store, 942 }; 943 944 #ifdef CONFIG_RPS 945 static ssize_t show_rps_map(struct netdev_rx_queue *queue, char *buf) 946 { 947 struct rps_map *map; 948 cpumask_var_t mask; 949 int i, len; 950 951 if (!zalloc_cpumask_var(&mask, GFP_KERNEL)) 952 return -ENOMEM; 953 954 rcu_read_lock(); 955 map = rcu_dereference(queue->rps_map); 956 if (map) 957 for (i = 0; i < map->len; i++) 958 cpumask_set_cpu(map->cpus[i], mask); 959 960 len = sysfs_emit(buf, "%*pb\n", cpumask_pr_args(mask)); 961 rcu_read_unlock(); 962 free_cpumask_var(mask); 963 964 return len < PAGE_SIZE ? len : -EINVAL; 965 } 966 967 static int netdev_rx_queue_set_rps_mask(struct netdev_rx_queue *queue, 968 cpumask_var_t mask) 969 { 970 static DEFINE_MUTEX(rps_map_mutex); 971 struct rps_map *old_map, *map; 972 int cpu, i; 973 974 map = kzalloc(max_t(unsigned int, 975 RPS_MAP_SIZE(cpumask_weight(mask)), L1_CACHE_BYTES), 976 GFP_KERNEL); 977 if (!map) 978 return -ENOMEM; 979 980 i = 0; 981 for_each_cpu_and(cpu, mask, cpu_online_mask) 982 map->cpus[i++] = cpu; 983 984 if (i) { 985 map->len = i; 986 } else { 987 kfree(map); 988 map = NULL; 989 } 990 991 mutex_lock(&rps_map_mutex); 992 old_map = rcu_dereference_protected(queue->rps_map, 993 mutex_is_locked(&rps_map_mutex)); 994 rcu_assign_pointer(queue->rps_map, map); 995 996 if (map) 997 static_branch_inc(&rps_needed); 998 if (old_map) 999 static_branch_dec(&rps_needed); 1000 1001 mutex_unlock(&rps_map_mutex); 1002 1003 if (old_map) 1004 kfree_rcu(old_map, rcu); 1005 return 0; 1006 } 1007 1008 int rps_cpumask_housekeeping(struct cpumask *mask) 1009 { 1010 if (!cpumask_empty(mask)) { 1011 cpumask_and(mask, mask, housekeeping_cpumask(HK_TYPE_DOMAIN)); 1012 cpumask_and(mask, mask, housekeeping_cpumask(HK_TYPE_WQ)); 1013 if (cpumask_empty(mask)) 1014 return -EINVAL; 1015 } 1016 return 0; 1017 } 1018 1019 static ssize_t store_rps_map(struct netdev_rx_queue *queue, 1020 const char *buf, size_t len) 1021 { 1022 cpumask_var_t mask; 1023 int err; 1024 1025 if (!capable(CAP_NET_ADMIN)) 1026 return -EPERM; 1027 1028 if (!alloc_cpumask_var(&mask, GFP_KERNEL)) 1029 return -ENOMEM; 1030 1031 err = bitmap_parse(buf, len, cpumask_bits(mask), nr_cpumask_bits); 1032 if (err) 1033 goto out; 1034 1035 err = rps_cpumask_housekeeping(mask); 1036 if (err) 1037 goto out; 1038 1039 err = netdev_rx_queue_set_rps_mask(queue, mask); 1040 1041 out: 1042 free_cpumask_var(mask); 1043 return err ? : len; 1044 } 1045 1046 static ssize_t show_rps_dev_flow_table_cnt(struct netdev_rx_queue *queue, 1047 char *buf) 1048 { 1049 struct rps_dev_flow_table *flow_table; 1050 unsigned long val = 0; 1051 1052 rcu_read_lock(); 1053 flow_table = rcu_dereference(queue->rps_flow_table); 1054 if (flow_table) 1055 val = (unsigned long)flow_table->mask + 1; 1056 rcu_read_unlock(); 1057 1058 return sysfs_emit(buf, "%lu\n", val); 1059 } 1060 1061 static void rps_dev_flow_table_release(struct rcu_head *rcu) 1062 { 1063 struct rps_dev_flow_table *table = container_of(rcu, 1064 struct rps_dev_flow_table, rcu); 1065 vfree(table); 1066 } 1067 1068 static ssize_t store_rps_dev_flow_table_cnt(struct netdev_rx_queue *queue, 1069 const char *buf, size_t len) 1070 { 1071 unsigned long mask, count; 1072 struct rps_dev_flow_table *table, *old_table; 1073 static DEFINE_SPINLOCK(rps_dev_flow_lock); 1074 int rc; 1075 1076 if (!capable(CAP_NET_ADMIN)) 1077 return -EPERM; 1078 1079 rc = kstrtoul(buf, 0, &count); 1080 if (rc < 0) 1081 return rc; 1082 1083 if (count) { 1084 mask = count - 1; 1085 /* mask = roundup_pow_of_two(count) - 1; 1086 * without overflows... 1087 */ 1088 while ((mask | (mask >> 1)) != mask) 1089 mask |= (mask >> 1); 1090 /* On 64 bit arches, must check mask fits in table->mask (u32), 1091 * and on 32bit arches, must check 1092 * RPS_DEV_FLOW_TABLE_SIZE(mask + 1) doesn't overflow. 1093 */ 1094 #if BITS_PER_LONG > 32 1095 if (mask > (unsigned long)(u32)mask) 1096 return -EINVAL; 1097 #else 1098 if (mask > (ULONG_MAX - RPS_DEV_FLOW_TABLE_SIZE(1)) 1099 / sizeof(struct rps_dev_flow)) { 1100 /* Enforce a limit to prevent overflow */ 1101 return -EINVAL; 1102 } 1103 #endif 1104 table = vmalloc(RPS_DEV_FLOW_TABLE_SIZE(mask + 1)); 1105 if (!table) 1106 return -ENOMEM; 1107 1108 table->mask = mask; 1109 for (count = 0; count <= mask; count++) 1110 table->flows[count].cpu = RPS_NO_CPU; 1111 } else { 1112 table = NULL; 1113 } 1114 1115 spin_lock(&rps_dev_flow_lock); 1116 old_table = rcu_dereference_protected(queue->rps_flow_table, 1117 lockdep_is_held(&rps_dev_flow_lock)); 1118 rcu_assign_pointer(queue->rps_flow_table, table); 1119 spin_unlock(&rps_dev_flow_lock); 1120 1121 if (old_table) 1122 call_rcu(&old_table->rcu, rps_dev_flow_table_release); 1123 1124 return len; 1125 } 1126 1127 static struct rx_queue_attribute rps_cpus_attribute __ro_after_init 1128 = __ATTR(rps_cpus, 0644, show_rps_map, store_rps_map); 1129 1130 static struct rx_queue_attribute rps_dev_flow_table_cnt_attribute __ro_after_init 1131 = __ATTR(rps_flow_cnt, 0644, 1132 show_rps_dev_flow_table_cnt, store_rps_dev_flow_table_cnt); 1133 #endif /* CONFIG_RPS */ 1134 1135 static struct attribute *rx_queue_default_attrs[] __ro_after_init = { 1136 #ifdef CONFIG_RPS 1137 &rps_cpus_attribute.attr, 1138 &rps_dev_flow_table_cnt_attribute.attr, 1139 #endif 1140 NULL 1141 }; 1142 ATTRIBUTE_GROUPS(rx_queue_default); 1143 1144 static void rx_queue_release(struct kobject *kobj) 1145 { 1146 struct netdev_rx_queue *queue = to_rx_queue(kobj); 1147 #ifdef CONFIG_RPS 1148 struct rps_map *map; 1149 struct rps_dev_flow_table *flow_table; 1150 1151 map = rcu_dereference_protected(queue->rps_map, 1); 1152 if (map) { 1153 RCU_INIT_POINTER(queue->rps_map, NULL); 1154 kfree_rcu(map, rcu); 1155 } 1156 1157 flow_table = rcu_dereference_protected(queue->rps_flow_table, 1); 1158 if (flow_table) { 1159 RCU_INIT_POINTER(queue->rps_flow_table, NULL); 1160 call_rcu(&flow_table->rcu, rps_dev_flow_table_release); 1161 } 1162 #endif 1163 1164 memset(kobj, 0, sizeof(*kobj)); 1165 netdev_put(queue->dev, &queue->dev_tracker); 1166 } 1167 1168 static const void *rx_queue_namespace(const struct kobject *kobj) 1169 { 1170 struct netdev_rx_queue *queue = to_rx_queue(kobj); 1171 struct device *dev = &queue->dev->dev; 1172 const void *ns = NULL; 1173 1174 if (dev->class && dev->class->namespace) 1175 ns = dev->class->namespace(dev); 1176 1177 return ns; 1178 } 1179 1180 static void rx_queue_get_ownership(const struct kobject *kobj, 1181 kuid_t *uid, kgid_t *gid) 1182 { 1183 const struct net *net = rx_queue_namespace(kobj); 1184 1185 net_ns_get_ownership(net, uid, gid); 1186 } 1187 1188 static const struct kobj_type rx_queue_ktype = { 1189 .sysfs_ops = &rx_queue_sysfs_ops, 1190 .release = rx_queue_release, 1191 .namespace = rx_queue_namespace, 1192 .get_ownership = rx_queue_get_ownership, 1193 }; 1194 1195 static int rx_queue_default_mask(struct net_device *dev, 1196 struct netdev_rx_queue *queue) 1197 { 1198 #if IS_ENABLED(CONFIG_RPS) && IS_ENABLED(CONFIG_SYSCTL) 1199 struct cpumask *rps_default_mask = READ_ONCE(dev_net(dev)->core.rps_default_mask); 1200 1201 if (rps_default_mask && !cpumask_empty(rps_default_mask)) 1202 return netdev_rx_queue_set_rps_mask(queue, rps_default_mask); 1203 #endif 1204 return 0; 1205 } 1206 1207 static int rx_queue_add_kobject(struct net_device *dev, int index) 1208 { 1209 struct netdev_rx_queue *queue = dev->_rx + index; 1210 struct kobject *kobj = &queue->kobj; 1211 int error = 0; 1212 1213 /* Rx queues are cleared in rx_queue_release to allow later 1214 * re-registration. This is triggered when their kobj refcount is 1215 * dropped. 1216 * 1217 * If a queue is removed while both a read (or write) operation and a 1218 * the re-addition of the same queue are pending (waiting on rntl_lock) 1219 * it might happen that the re-addition will execute before the read, 1220 * making the initial removal to never happen (queue's kobj refcount 1221 * won't drop enough because of the pending read). In such rare case, 1222 * return to allow the removal operation to complete. 1223 */ 1224 if (unlikely(kobj->state_initialized)) { 1225 netdev_warn_once(dev, "Cannot re-add rx queues before their removal completed"); 1226 return -EAGAIN; 1227 } 1228 1229 /* Kobject_put later will trigger rx_queue_release call which 1230 * decreases dev refcount: Take that reference here 1231 */ 1232 netdev_hold(queue->dev, &queue->dev_tracker, GFP_KERNEL); 1233 1234 kobj->kset = dev->queues_kset; 1235 error = kobject_init_and_add(kobj, &rx_queue_ktype, NULL, 1236 "rx-%u", index); 1237 if (error) 1238 goto err; 1239 1240 queue->groups = rx_queue_default_groups; 1241 error = sysfs_create_groups(kobj, queue->groups); 1242 if (error) 1243 goto err; 1244 1245 if (dev->sysfs_rx_queue_group) { 1246 error = sysfs_create_group(kobj, dev->sysfs_rx_queue_group); 1247 if (error) 1248 goto err_default_groups; 1249 } 1250 1251 error = rx_queue_default_mask(dev, queue); 1252 if (error) 1253 goto err_default_groups; 1254 1255 kobject_uevent(kobj, KOBJ_ADD); 1256 1257 return error; 1258 1259 err_default_groups: 1260 sysfs_remove_groups(kobj, queue->groups); 1261 err: 1262 kobject_put(kobj); 1263 return error; 1264 } 1265 1266 static int rx_queue_change_owner(struct net_device *dev, int index, kuid_t kuid, 1267 kgid_t kgid) 1268 { 1269 struct netdev_rx_queue *queue = dev->_rx + index; 1270 struct kobject *kobj = &queue->kobj; 1271 int error; 1272 1273 error = sysfs_change_owner(kobj, kuid, kgid); 1274 if (error) 1275 return error; 1276 1277 if (dev->sysfs_rx_queue_group) 1278 error = sysfs_group_change_owner( 1279 kobj, dev->sysfs_rx_queue_group, kuid, kgid); 1280 1281 return error; 1282 } 1283 #endif /* CONFIG_SYSFS */ 1284 1285 int 1286 net_rx_queue_update_kobjects(struct net_device *dev, int old_num, int new_num) 1287 { 1288 #ifdef CONFIG_SYSFS 1289 int i; 1290 int error = 0; 1291 1292 #ifndef CONFIG_RPS 1293 if (!dev->sysfs_rx_queue_group) 1294 return 0; 1295 #endif 1296 for (i = old_num; i < new_num; i++) { 1297 error = rx_queue_add_kobject(dev, i); 1298 if (error) { 1299 new_num = old_num; 1300 break; 1301 } 1302 } 1303 1304 while (--i >= new_num) { 1305 struct netdev_rx_queue *queue = &dev->_rx[i]; 1306 struct kobject *kobj = &queue->kobj; 1307 1308 if (!refcount_read(&dev_net(dev)->ns.count)) 1309 kobj->uevent_suppress = 1; 1310 if (dev->sysfs_rx_queue_group) 1311 sysfs_remove_group(kobj, dev->sysfs_rx_queue_group); 1312 sysfs_remove_groups(kobj, queue->groups); 1313 kobject_put(kobj); 1314 } 1315 1316 return error; 1317 #else 1318 return 0; 1319 #endif 1320 } 1321 1322 static int net_rx_queue_change_owner(struct net_device *dev, int num, 1323 kuid_t kuid, kgid_t kgid) 1324 { 1325 #ifdef CONFIG_SYSFS 1326 int error = 0; 1327 int i; 1328 1329 #ifndef CONFIG_RPS 1330 if (!dev->sysfs_rx_queue_group) 1331 return 0; 1332 #endif 1333 for (i = 0; i < num; i++) { 1334 error = rx_queue_change_owner(dev, i, kuid, kgid); 1335 if (error) 1336 break; 1337 } 1338 1339 return error; 1340 #else 1341 return 0; 1342 #endif 1343 } 1344 1345 #ifdef CONFIG_SYSFS 1346 /* 1347 * netdev_queue sysfs structures and functions. 1348 */ 1349 struct netdev_queue_attribute { 1350 struct attribute attr; 1351 ssize_t (*show)(struct kobject *kobj, struct attribute *attr, 1352 struct netdev_queue *queue, char *buf); 1353 ssize_t (*store)(struct kobject *kobj, struct attribute *attr, 1354 struct netdev_queue *queue, const char *buf, 1355 size_t len); 1356 }; 1357 #define to_netdev_queue_attr(_attr) \ 1358 container_of(_attr, struct netdev_queue_attribute, attr) 1359 1360 #define to_netdev_queue(obj) container_of(obj, struct netdev_queue, kobj) 1361 1362 static ssize_t netdev_queue_attr_show(struct kobject *kobj, 1363 struct attribute *attr, char *buf) 1364 { 1365 const struct netdev_queue_attribute *attribute 1366 = to_netdev_queue_attr(attr); 1367 struct netdev_queue *queue = to_netdev_queue(kobj); 1368 1369 if (!attribute->show) 1370 return -EIO; 1371 1372 return attribute->show(kobj, attr, queue, buf); 1373 } 1374 1375 static ssize_t netdev_queue_attr_store(struct kobject *kobj, 1376 struct attribute *attr, 1377 const char *buf, size_t count) 1378 { 1379 const struct netdev_queue_attribute *attribute 1380 = to_netdev_queue_attr(attr); 1381 struct netdev_queue *queue = to_netdev_queue(kobj); 1382 1383 if (!attribute->store) 1384 return -EIO; 1385 1386 return attribute->store(kobj, attr, queue, buf, count); 1387 } 1388 1389 static const struct sysfs_ops netdev_queue_sysfs_ops = { 1390 .show = netdev_queue_attr_show, 1391 .store = netdev_queue_attr_store, 1392 }; 1393 1394 static ssize_t tx_timeout_show(struct kobject *kobj, struct attribute *attr, 1395 struct netdev_queue *queue, char *buf) 1396 { 1397 unsigned long trans_timeout = atomic_long_read(&queue->trans_timeout); 1398 1399 return sysfs_emit(buf, fmt_ulong, trans_timeout); 1400 } 1401 1402 static unsigned int get_netdev_queue_index(struct netdev_queue *queue) 1403 { 1404 struct net_device *dev = queue->dev; 1405 unsigned int i; 1406 1407 i = queue - dev->_tx; 1408 BUG_ON(i >= dev->num_tx_queues); 1409 1410 return i; 1411 } 1412 1413 static ssize_t traffic_class_show(struct kobject *kobj, struct attribute *attr, 1414 struct netdev_queue *queue, char *buf) 1415 { 1416 struct net_device *dev = queue->dev; 1417 int num_tc, tc, index, ret; 1418 1419 if (!netif_is_multiqueue(dev)) 1420 return -ENOENT; 1421 1422 ret = sysfs_rtnl_lock(kobj, attr, queue->dev); 1423 if (ret) 1424 return ret; 1425 1426 index = get_netdev_queue_index(queue); 1427 1428 /* If queue belongs to subordinate dev use its TC mapping */ 1429 dev = netdev_get_tx_queue(dev, index)->sb_dev ? : dev; 1430 1431 num_tc = dev->num_tc; 1432 tc = netdev_txq_to_tc(dev, index); 1433 1434 rtnl_unlock(); 1435 1436 if (tc < 0) 1437 return -EINVAL; 1438 1439 /* We can report the traffic class one of two ways: 1440 * Subordinate device traffic classes are reported with the traffic 1441 * class first, and then the subordinate class so for example TC0 on 1442 * subordinate device 2 will be reported as "0-2". If the queue 1443 * belongs to the root device it will be reported with just the 1444 * traffic class, so just "0" for TC 0 for example. 1445 */ 1446 return num_tc < 0 ? sysfs_emit(buf, "%d%d\n", tc, num_tc) : 1447 sysfs_emit(buf, "%d\n", tc); 1448 } 1449 1450 #ifdef CONFIG_XPS 1451 static ssize_t tx_maxrate_show(struct kobject *kobj, struct attribute *attr, 1452 struct netdev_queue *queue, char *buf) 1453 { 1454 return sysfs_emit(buf, "%lu\n", queue->tx_maxrate); 1455 } 1456 1457 static ssize_t tx_maxrate_store(struct kobject *kobj, struct attribute *attr, 1458 struct netdev_queue *queue, const char *buf, 1459 size_t len) 1460 { 1461 int err, index = get_netdev_queue_index(queue); 1462 struct net_device *dev = queue->dev; 1463 u32 rate = 0; 1464 1465 if (!capable(CAP_NET_ADMIN)) 1466 return -EPERM; 1467 1468 /* The check is also done later; this helps returning early without 1469 * hitting the locking section below. 1470 */ 1471 if (!dev->netdev_ops->ndo_set_tx_maxrate) 1472 return -EOPNOTSUPP; 1473 1474 err = kstrtou32(buf, 10, &rate); 1475 if (err < 0) 1476 return err; 1477 1478 err = sysfs_rtnl_lock(kobj, attr, dev); 1479 if (err) 1480 return err; 1481 1482 err = -EOPNOTSUPP; 1483 netdev_lock_ops(dev); 1484 if (dev->netdev_ops->ndo_set_tx_maxrate) 1485 err = dev->netdev_ops->ndo_set_tx_maxrate(dev, index, rate); 1486 netdev_unlock_ops(dev); 1487 1488 if (!err) { 1489 queue->tx_maxrate = rate; 1490 rtnl_unlock(); 1491 return len; 1492 } 1493 1494 rtnl_unlock(); 1495 return err; 1496 } 1497 1498 static struct netdev_queue_attribute queue_tx_maxrate __ro_after_init 1499 = __ATTR_RW(tx_maxrate); 1500 #endif 1501 1502 static struct netdev_queue_attribute queue_trans_timeout __ro_after_init 1503 = __ATTR_RO(tx_timeout); 1504 1505 static struct netdev_queue_attribute queue_traffic_class __ro_after_init 1506 = __ATTR_RO(traffic_class); 1507 1508 #ifdef CONFIG_BQL 1509 /* 1510 * Byte queue limits sysfs structures and functions. 1511 */ 1512 static ssize_t bql_show(char *buf, unsigned int value) 1513 { 1514 return sysfs_emit(buf, "%u\n", value); 1515 } 1516 1517 static ssize_t bql_set(const char *buf, const size_t count, 1518 unsigned int *pvalue) 1519 { 1520 unsigned int value; 1521 int err; 1522 1523 if (!strcmp(buf, "max") || !strcmp(buf, "max\n")) { 1524 value = DQL_MAX_LIMIT; 1525 } else { 1526 err = kstrtouint(buf, 10, &value); 1527 if (err < 0) 1528 return err; 1529 if (value > DQL_MAX_LIMIT) 1530 return -EINVAL; 1531 } 1532 1533 *pvalue = value; 1534 1535 return count; 1536 } 1537 1538 static ssize_t bql_show_hold_time(struct kobject *kobj, struct attribute *attr, 1539 struct netdev_queue *queue, char *buf) 1540 { 1541 struct dql *dql = &queue->dql; 1542 1543 return sysfs_emit(buf, "%u\n", jiffies_to_msecs(dql->slack_hold_time)); 1544 } 1545 1546 static ssize_t bql_set_hold_time(struct kobject *kobj, struct attribute *attr, 1547 struct netdev_queue *queue, const char *buf, 1548 size_t len) 1549 { 1550 struct dql *dql = &queue->dql; 1551 unsigned int value; 1552 int err; 1553 1554 err = kstrtouint(buf, 10, &value); 1555 if (err < 0) 1556 return err; 1557 1558 dql->slack_hold_time = msecs_to_jiffies(value); 1559 1560 return len; 1561 } 1562 1563 static struct netdev_queue_attribute bql_hold_time_attribute __ro_after_init 1564 = __ATTR(hold_time, 0644, 1565 bql_show_hold_time, bql_set_hold_time); 1566 1567 static ssize_t bql_show_stall_thrs(struct kobject *kobj, struct attribute *attr, 1568 struct netdev_queue *queue, char *buf) 1569 { 1570 struct dql *dql = &queue->dql; 1571 1572 return sysfs_emit(buf, "%u\n", jiffies_to_msecs(dql->stall_thrs)); 1573 } 1574 1575 static ssize_t bql_set_stall_thrs(struct kobject *kobj, struct attribute *attr, 1576 struct netdev_queue *queue, const char *buf, 1577 size_t len) 1578 { 1579 struct dql *dql = &queue->dql; 1580 unsigned int value; 1581 int err; 1582 1583 err = kstrtouint(buf, 10, &value); 1584 if (err < 0) 1585 return err; 1586 1587 value = msecs_to_jiffies(value); 1588 if (value && (value < 4 || value > 4 / 2 * BITS_PER_LONG)) 1589 return -ERANGE; 1590 1591 if (!dql->stall_thrs && value) 1592 dql->last_reap = jiffies; 1593 /* Force last_reap to be live */ 1594 smp_wmb(); 1595 dql->stall_thrs = value; 1596 1597 return len; 1598 } 1599 1600 static struct netdev_queue_attribute bql_stall_thrs_attribute __ro_after_init = 1601 __ATTR(stall_thrs, 0644, bql_show_stall_thrs, bql_set_stall_thrs); 1602 1603 static ssize_t bql_show_stall_max(struct kobject *kobj, struct attribute *attr, 1604 struct netdev_queue *queue, char *buf) 1605 { 1606 return sysfs_emit(buf, "%u\n", READ_ONCE(queue->dql.stall_max)); 1607 } 1608 1609 static ssize_t bql_set_stall_max(struct kobject *kobj, struct attribute *attr, 1610 struct netdev_queue *queue, const char *buf, 1611 size_t len) 1612 { 1613 WRITE_ONCE(queue->dql.stall_max, 0); 1614 return len; 1615 } 1616 1617 static struct netdev_queue_attribute bql_stall_max_attribute __ro_after_init = 1618 __ATTR(stall_max, 0644, bql_show_stall_max, bql_set_stall_max); 1619 1620 static ssize_t bql_show_stall_cnt(struct kobject *kobj, struct attribute *attr, 1621 struct netdev_queue *queue, char *buf) 1622 { 1623 struct dql *dql = &queue->dql; 1624 1625 return sysfs_emit(buf, "%lu\n", dql->stall_cnt); 1626 } 1627 1628 static struct netdev_queue_attribute bql_stall_cnt_attribute __ro_after_init = 1629 __ATTR(stall_cnt, 0444, bql_show_stall_cnt, NULL); 1630 1631 static ssize_t bql_show_inflight(struct kobject *kobj, struct attribute *attr, 1632 struct netdev_queue *queue, char *buf) 1633 { 1634 struct dql *dql = &queue->dql; 1635 1636 return sysfs_emit(buf, "%u\n", dql->num_queued - dql->num_completed); 1637 } 1638 1639 static struct netdev_queue_attribute bql_inflight_attribute __ro_after_init = 1640 __ATTR(inflight, 0444, bql_show_inflight, NULL); 1641 1642 #define BQL_ATTR(NAME, FIELD) \ 1643 static ssize_t bql_show_ ## NAME(struct kobject *kobj, \ 1644 struct attribute *attr, \ 1645 struct netdev_queue *queue, char *buf) \ 1646 { \ 1647 return bql_show(buf, queue->dql.FIELD); \ 1648 } \ 1649 \ 1650 static ssize_t bql_set_ ## NAME(struct kobject *kobj, \ 1651 struct attribute *attr, \ 1652 struct netdev_queue *queue, \ 1653 const char *buf, size_t len) \ 1654 { \ 1655 return bql_set(buf, len, &queue->dql.FIELD); \ 1656 } \ 1657 \ 1658 static struct netdev_queue_attribute bql_ ## NAME ## _attribute __ro_after_init \ 1659 = __ATTR(NAME, 0644, \ 1660 bql_show_ ## NAME, bql_set_ ## NAME) 1661 1662 BQL_ATTR(limit, limit); 1663 BQL_ATTR(limit_max, max_limit); 1664 BQL_ATTR(limit_min, min_limit); 1665 1666 static struct attribute *dql_attrs[] __ro_after_init = { 1667 &bql_limit_attribute.attr, 1668 &bql_limit_max_attribute.attr, 1669 &bql_limit_min_attribute.attr, 1670 &bql_hold_time_attribute.attr, 1671 &bql_inflight_attribute.attr, 1672 &bql_stall_thrs_attribute.attr, 1673 &bql_stall_cnt_attribute.attr, 1674 &bql_stall_max_attribute.attr, 1675 NULL 1676 }; 1677 1678 static const struct attribute_group dql_group = { 1679 .name = "byte_queue_limits", 1680 .attrs = dql_attrs, 1681 }; 1682 #else 1683 /* Fake declaration, all the code using it should be dead */ 1684 static const struct attribute_group dql_group = {}; 1685 #endif /* CONFIG_BQL */ 1686 1687 #ifdef CONFIG_XPS 1688 static ssize_t xps_queue_show(struct net_device *dev, unsigned int index, 1689 int tc, char *buf, enum xps_map_type type) 1690 { 1691 struct xps_dev_maps *dev_maps; 1692 unsigned long *mask; 1693 unsigned int nr_ids; 1694 int j, len; 1695 1696 rcu_read_lock(); 1697 dev_maps = rcu_dereference(dev->xps_maps[type]); 1698 1699 /* Default to nr_cpu_ids/dev->num_rx_queues and do not just return 0 1700 * when dev_maps hasn't been allocated yet, to be backward compatible. 1701 */ 1702 nr_ids = dev_maps ? dev_maps->nr_ids : 1703 (type == XPS_CPUS ? nr_cpu_ids : dev->num_rx_queues); 1704 1705 mask = bitmap_zalloc(nr_ids, GFP_NOWAIT); 1706 if (!mask) { 1707 rcu_read_unlock(); 1708 return -ENOMEM; 1709 } 1710 1711 if (!dev_maps || tc >= dev_maps->num_tc) 1712 goto out_no_maps; 1713 1714 for (j = 0; j < nr_ids; j++) { 1715 int i, tci = j * dev_maps->num_tc + tc; 1716 struct xps_map *map; 1717 1718 map = rcu_dereference(dev_maps->attr_map[tci]); 1719 if (!map) 1720 continue; 1721 1722 for (i = map->len; i--;) { 1723 if (map->queues[i] == index) { 1724 __set_bit(j, mask); 1725 break; 1726 } 1727 } 1728 } 1729 out_no_maps: 1730 rcu_read_unlock(); 1731 1732 len = bitmap_print_to_pagebuf(false, buf, mask, nr_ids); 1733 bitmap_free(mask); 1734 1735 return len < PAGE_SIZE ? len : -EINVAL; 1736 } 1737 1738 static ssize_t xps_cpus_show(struct kobject *kobj, struct attribute *attr, 1739 struct netdev_queue *queue, char *buf) 1740 { 1741 struct net_device *dev = queue->dev; 1742 unsigned int index; 1743 int len, tc, ret; 1744 1745 if (!netif_is_multiqueue(dev)) 1746 return -ENOENT; 1747 1748 index = get_netdev_queue_index(queue); 1749 1750 ret = sysfs_rtnl_lock(kobj, attr, queue->dev); 1751 if (ret) 1752 return ret; 1753 1754 /* If queue belongs to subordinate dev use its map */ 1755 dev = netdev_get_tx_queue(dev, index)->sb_dev ? : dev; 1756 1757 tc = netdev_txq_to_tc(dev, index); 1758 if (tc < 0) { 1759 rtnl_unlock(); 1760 return -EINVAL; 1761 } 1762 1763 /* Increase the net device refcnt to make sure it won't be freed while 1764 * xps_queue_show is running. 1765 */ 1766 dev_hold(dev); 1767 rtnl_unlock(); 1768 1769 len = xps_queue_show(dev, index, tc, buf, XPS_CPUS); 1770 1771 dev_put(dev); 1772 return len; 1773 } 1774 1775 static ssize_t xps_cpus_store(struct kobject *kobj, struct attribute *attr, 1776 struct netdev_queue *queue, const char *buf, 1777 size_t len) 1778 { 1779 struct net_device *dev = queue->dev; 1780 unsigned int index; 1781 cpumask_var_t mask; 1782 int err; 1783 1784 if (!netif_is_multiqueue(dev)) 1785 return -ENOENT; 1786 1787 if (!capable(CAP_NET_ADMIN)) 1788 return -EPERM; 1789 1790 if (!alloc_cpumask_var(&mask, GFP_KERNEL)) 1791 return -ENOMEM; 1792 1793 index = get_netdev_queue_index(queue); 1794 1795 err = bitmap_parse(buf, len, cpumask_bits(mask), nr_cpumask_bits); 1796 if (err) { 1797 free_cpumask_var(mask); 1798 return err; 1799 } 1800 1801 err = sysfs_rtnl_lock(kobj, attr, dev); 1802 if (err) { 1803 free_cpumask_var(mask); 1804 return err; 1805 } 1806 1807 err = netif_set_xps_queue(dev, mask, index); 1808 rtnl_unlock(); 1809 1810 free_cpumask_var(mask); 1811 1812 return err ? : len; 1813 } 1814 1815 static struct netdev_queue_attribute xps_cpus_attribute __ro_after_init 1816 = __ATTR_RW(xps_cpus); 1817 1818 static ssize_t xps_rxqs_show(struct kobject *kobj, struct attribute *attr, 1819 struct netdev_queue *queue, char *buf) 1820 { 1821 struct net_device *dev = queue->dev; 1822 unsigned int index; 1823 int tc, ret; 1824 1825 index = get_netdev_queue_index(queue); 1826 1827 ret = sysfs_rtnl_lock(kobj, attr, dev); 1828 if (ret) 1829 return ret; 1830 1831 tc = netdev_txq_to_tc(dev, index); 1832 1833 /* Increase the net device refcnt to make sure it won't be freed while 1834 * xps_queue_show is running. 1835 */ 1836 dev_hold(dev); 1837 rtnl_unlock(); 1838 1839 ret = tc >= 0 ? xps_queue_show(dev, index, tc, buf, XPS_RXQS) : -EINVAL; 1840 dev_put(dev); 1841 return ret; 1842 } 1843 1844 static ssize_t xps_rxqs_store(struct kobject *kobj, struct attribute *attr, 1845 struct netdev_queue *queue, const char *buf, 1846 size_t len) 1847 { 1848 struct net_device *dev = queue->dev; 1849 struct net *net = dev_net(dev); 1850 unsigned long *mask; 1851 unsigned int index; 1852 int err; 1853 1854 if (!ns_capable(net->user_ns, CAP_NET_ADMIN)) 1855 return -EPERM; 1856 1857 mask = bitmap_zalloc(dev->num_rx_queues, GFP_KERNEL); 1858 if (!mask) 1859 return -ENOMEM; 1860 1861 index = get_netdev_queue_index(queue); 1862 1863 err = bitmap_parse(buf, len, mask, dev->num_rx_queues); 1864 if (err) { 1865 bitmap_free(mask); 1866 return err; 1867 } 1868 1869 err = sysfs_rtnl_lock(kobj, attr, dev); 1870 if (err) { 1871 bitmap_free(mask); 1872 return err; 1873 } 1874 1875 cpus_read_lock(); 1876 err = __netif_set_xps_queue(dev, mask, index, XPS_RXQS); 1877 cpus_read_unlock(); 1878 1879 rtnl_unlock(); 1880 1881 bitmap_free(mask); 1882 return err ? : len; 1883 } 1884 1885 static struct netdev_queue_attribute xps_rxqs_attribute __ro_after_init 1886 = __ATTR_RW(xps_rxqs); 1887 #endif /* CONFIG_XPS */ 1888 1889 static struct attribute *netdev_queue_default_attrs[] __ro_after_init = { 1890 &queue_trans_timeout.attr, 1891 &queue_traffic_class.attr, 1892 #ifdef CONFIG_XPS 1893 &xps_cpus_attribute.attr, 1894 &xps_rxqs_attribute.attr, 1895 &queue_tx_maxrate.attr, 1896 #endif 1897 NULL 1898 }; 1899 ATTRIBUTE_GROUPS(netdev_queue_default); 1900 1901 static void netdev_queue_release(struct kobject *kobj) 1902 { 1903 struct netdev_queue *queue = to_netdev_queue(kobj); 1904 1905 memset(kobj, 0, sizeof(*kobj)); 1906 netdev_put(queue->dev, &queue->dev_tracker); 1907 } 1908 1909 static const void *netdev_queue_namespace(const struct kobject *kobj) 1910 { 1911 struct netdev_queue *queue = to_netdev_queue(kobj); 1912 struct device *dev = &queue->dev->dev; 1913 const void *ns = NULL; 1914 1915 if (dev->class && dev->class->namespace) 1916 ns = dev->class->namespace(dev); 1917 1918 return ns; 1919 } 1920 1921 static void netdev_queue_get_ownership(const struct kobject *kobj, 1922 kuid_t *uid, kgid_t *gid) 1923 { 1924 const struct net *net = netdev_queue_namespace(kobj); 1925 1926 net_ns_get_ownership(net, uid, gid); 1927 } 1928 1929 static const struct kobj_type netdev_queue_ktype = { 1930 .sysfs_ops = &netdev_queue_sysfs_ops, 1931 .release = netdev_queue_release, 1932 .namespace = netdev_queue_namespace, 1933 .get_ownership = netdev_queue_get_ownership, 1934 }; 1935 1936 static bool netdev_uses_bql(const struct net_device *dev) 1937 { 1938 if (dev->lltx || (dev->priv_flags & IFF_NO_QUEUE)) 1939 return false; 1940 1941 return IS_ENABLED(CONFIG_BQL); 1942 } 1943 1944 static int netdev_queue_add_kobject(struct net_device *dev, int index) 1945 { 1946 struct netdev_queue *queue = dev->_tx + index; 1947 struct kobject *kobj = &queue->kobj; 1948 int error = 0; 1949 1950 /* Tx queues are cleared in netdev_queue_release to allow later 1951 * re-registration. This is triggered when their kobj refcount is 1952 * dropped. 1953 * 1954 * If a queue is removed while both a read (or write) operation and a 1955 * the re-addition of the same queue are pending (waiting on rntl_lock) 1956 * it might happen that the re-addition will execute before the read, 1957 * making the initial removal to never happen (queue's kobj refcount 1958 * won't drop enough because of the pending read). In such rare case, 1959 * return to allow the removal operation to complete. 1960 */ 1961 if (unlikely(kobj->state_initialized)) { 1962 netdev_warn_once(dev, "Cannot re-add tx queues before their removal completed"); 1963 return -EAGAIN; 1964 } 1965 1966 /* Kobject_put later will trigger netdev_queue_release call 1967 * which decreases dev refcount: Take that reference here 1968 */ 1969 netdev_hold(queue->dev, &queue->dev_tracker, GFP_KERNEL); 1970 1971 kobj->kset = dev->queues_kset; 1972 error = kobject_init_and_add(kobj, &netdev_queue_ktype, NULL, 1973 "tx-%u", index); 1974 if (error) 1975 goto err; 1976 1977 queue->groups = netdev_queue_default_groups; 1978 error = sysfs_create_groups(kobj, queue->groups); 1979 if (error) 1980 goto err; 1981 1982 if (netdev_uses_bql(dev)) { 1983 error = sysfs_create_group(kobj, &dql_group); 1984 if (error) 1985 goto err_default_groups; 1986 } 1987 1988 kobject_uevent(kobj, KOBJ_ADD); 1989 return 0; 1990 1991 err_default_groups: 1992 sysfs_remove_groups(kobj, queue->groups); 1993 err: 1994 kobject_put(kobj); 1995 return error; 1996 } 1997 1998 static int tx_queue_change_owner(struct net_device *ndev, int index, 1999 kuid_t kuid, kgid_t kgid) 2000 { 2001 struct netdev_queue *queue = ndev->_tx + index; 2002 struct kobject *kobj = &queue->kobj; 2003 int error; 2004 2005 error = sysfs_change_owner(kobj, kuid, kgid); 2006 if (error) 2007 return error; 2008 2009 if (netdev_uses_bql(ndev)) 2010 error = sysfs_group_change_owner(kobj, &dql_group, kuid, kgid); 2011 2012 return error; 2013 } 2014 #endif /* CONFIG_SYSFS */ 2015 2016 int 2017 netdev_queue_update_kobjects(struct net_device *dev, int old_num, int new_num) 2018 { 2019 #ifdef CONFIG_SYSFS 2020 int i; 2021 int error = 0; 2022 2023 /* Tx queue kobjects are allowed to be updated when a device is being 2024 * unregistered, but solely to remove queues from qdiscs. Any path 2025 * adding queues should be fixed. 2026 */ 2027 WARN(dev->reg_state == NETREG_UNREGISTERING && new_num > old_num, 2028 "New queues can't be registered after device unregistration."); 2029 2030 for (i = old_num; i < new_num; i++) { 2031 error = netdev_queue_add_kobject(dev, i); 2032 if (error) { 2033 new_num = old_num; 2034 break; 2035 } 2036 } 2037 2038 while (--i >= new_num) { 2039 struct netdev_queue *queue = dev->_tx + i; 2040 2041 if (!refcount_read(&dev_net(dev)->ns.count)) 2042 queue->kobj.uevent_suppress = 1; 2043 2044 if (netdev_uses_bql(dev)) 2045 sysfs_remove_group(&queue->kobj, &dql_group); 2046 2047 sysfs_remove_groups(&queue->kobj, queue->groups); 2048 kobject_put(&queue->kobj); 2049 } 2050 2051 return error; 2052 #else 2053 return 0; 2054 #endif /* CONFIG_SYSFS */ 2055 } 2056 2057 static int net_tx_queue_change_owner(struct net_device *dev, int num, 2058 kuid_t kuid, kgid_t kgid) 2059 { 2060 #ifdef CONFIG_SYSFS 2061 int error = 0; 2062 int i; 2063 2064 for (i = 0; i < num; i++) { 2065 error = tx_queue_change_owner(dev, i, kuid, kgid); 2066 if (error) 2067 break; 2068 } 2069 2070 return error; 2071 #else 2072 return 0; 2073 #endif /* CONFIG_SYSFS */ 2074 } 2075 2076 static int register_queue_kobjects(struct net_device *dev) 2077 { 2078 int error = 0, txq = 0, rxq = 0, real_rx = 0, real_tx = 0; 2079 2080 #ifdef CONFIG_SYSFS 2081 dev->queues_kset = kset_create_and_add("queues", 2082 NULL, &dev->dev.kobj); 2083 if (!dev->queues_kset) 2084 return -ENOMEM; 2085 real_rx = dev->real_num_rx_queues; 2086 #endif 2087 real_tx = dev->real_num_tx_queues; 2088 2089 error = net_rx_queue_update_kobjects(dev, 0, real_rx); 2090 if (error) 2091 goto error; 2092 rxq = real_rx; 2093 2094 error = netdev_queue_update_kobjects(dev, 0, real_tx); 2095 if (error) 2096 goto error; 2097 txq = real_tx; 2098 2099 return 0; 2100 2101 error: 2102 netdev_queue_update_kobjects(dev, txq, 0); 2103 net_rx_queue_update_kobjects(dev, rxq, 0); 2104 #ifdef CONFIG_SYSFS 2105 kset_unregister(dev->queues_kset); 2106 #endif 2107 return error; 2108 } 2109 2110 static int queue_change_owner(struct net_device *ndev, kuid_t kuid, kgid_t kgid) 2111 { 2112 int error = 0, real_rx = 0, real_tx = 0; 2113 2114 #ifdef CONFIG_SYSFS 2115 if (ndev->queues_kset) { 2116 error = sysfs_change_owner(&ndev->queues_kset->kobj, kuid, kgid); 2117 if (error) 2118 return error; 2119 } 2120 real_rx = ndev->real_num_rx_queues; 2121 #endif 2122 real_tx = ndev->real_num_tx_queues; 2123 2124 error = net_rx_queue_change_owner(ndev, real_rx, kuid, kgid); 2125 if (error) 2126 return error; 2127 2128 error = net_tx_queue_change_owner(ndev, real_tx, kuid, kgid); 2129 if (error) 2130 return error; 2131 2132 return 0; 2133 } 2134 2135 static void remove_queue_kobjects(struct net_device *dev) 2136 { 2137 int real_rx = 0, real_tx = 0; 2138 2139 #ifdef CONFIG_SYSFS 2140 real_rx = dev->real_num_rx_queues; 2141 #endif 2142 real_tx = dev->real_num_tx_queues; 2143 2144 net_rx_queue_update_kobjects(dev, real_rx, 0); 2145 netdev_queue_update_kobjects(dev, real_tx, 0); 2146 2147 dev->real_num_rx_queues = 0; 2148 dev->real_num_tx_queues = 0; 2149 #ifdef CONFIG_SYSFS 2150 kset_unregister(dev->queues_kset); 2151 #endif 2152 } 2153 2154 static bool net_current_may_mount(void) 2155 { 2156 struct net *net = current->nsproxy->net_ns; 2157 2158 return ns_capable(net->user_ns, CAP_SYS_ADMIN); 2159 } 2160 2161 static void *net_grab_current_ns(void) 2162 { 2163 struct net *ns = current->nsproxy->net_ns; 2164 #ifdef CONFIG_NET_NS 2165 if (ns) 2166 refcount_inc(&ns->passive); 2167 #endif 2168 return ns; 2169 } 2170 2171 static const void *net_initial_ns(void) 2172 { 2173 return &init_net; 2174 } 2175 2176 static const void *net_netlink_ns(struct sock *sk) 2177 { 2178 return sock_net(sk); 2179 } 2180 2181 const struct kobj_ns_type_operations net_ns_type_operations = { 2182 .type = KOBJ_NS_TYPE_NET, 2183 .current_may_mount = net_current_may_mount, 2184 .grab_current_ns = net_grab_current_ns, 2185 .netlink_ns = net_netlink_ns, 2186 .initial_ns = net_initial_ns, 2187 .drop_ns = net_drop_ns, 2188 }; 2189 EXPORT_SYMBOL_GPL(net_ns_type_operations); 2190 2191 static int netdev_uevent(const struct device *d, struct kobj_uevent_env *env) 2192 { 2193 const struct net_device *dev = to_net_dev(d); 2194 int retval; 2195 2196 /* pass interface to uevent. */ 2197 retval = add_uevent_var(env, "INTERFACE=%s", dev->name); 2198 if (retval) 2199 goto exit; 2200 2201 /* pass ifindex to uevent. 2202 * ifindex is useful as it won't change (interface name may change) 2203 * and is what RtNetlink uses natively. 2204 */ 2205 retval = add_uevent_var(env, "IFINDEX=%d", dev->ifindex); 2206 2207 exit: 2208 return retval; 2209 } 2210 2211 /* 2212 * netdev_release -- destroy and free a dead device. 2213 * Called when last reference to device kobject is gone. 2214 */ 2215 static void netdev_release(struct device *d) 2216 { 2217 struct net_device *dev = to_net_dev(d); 2218 2219 BUG_ON(dev->reg_state != NETREG_RELEASED); 2220 2221 /* no need to wait for rcu grace period: 2222 * device is dead and about to be freed. 2223 */ 2224 kfree(rcu_access_pointer(dev->ifalias)); 2225 kvfree(dev); 2226 } 2227 2228 static const void *net_namespace(const struct device *d) 2229 { 2230 const struct net_device *dev = to_net_dev(d); 2231 2232 return dev_net(dev); 2233 } 2234 2235 static void net_get_ownership(const struct device *d, kuid_t *uid, kgid_t *gid) 2236 { 2237 const struct net_device *dev = to_net_dev(d); 2238 const struct net *net = dev_net(dev); 2239 2240 net_ns_get_ownership(net, uid, gid); 2241 } 2242 2243 static const struct class net_class = { 2244 .name = "net", 2245 .dev_release = netdev_release, 2246 .dev_groups = net_class_groups, 2247 .dev_uevent = netdev_uevent, 2248 .ns_type = &net_ns_type_operations, 2249 .namespace = net_namespace, 2250 .get_ownership = net_get_ownership, 2251 }; 2252 2253 #ifdef CONFIG_OF 2254 static int of_dev_node_match(struct device *dev, const void *data) 2255 { 2256 for (; dev; dev = dev->parent) { 2257 if (dev->of_node == data) 2258 return 1; 2259 } 2260 2261 return 0; 2262 } 2263 2264 /* 2265 * of_find_net_device_by_node - lookup the net device for the device node 2266 * @np: OF device node 2267 * 2268 * Looks up the net_device structure corresponding with the device node. 2269 * If successful, returns a pointer to the net_device with the embedded 2270 * struct device refcount incremented by one, or NULL on failure. The 2271 * refcount must be dropped when done with the net_device. 2272 */ 2273 struct net_device *of_find_net_device_by_node(struct device_node *np) 2274 { 2275 struct device *dev; 2276 2277 dev = class_find_device(&net_class, NULL, np, of_dev_node_match); 2278 if (!dev) 2279 return NULL; 2280 2281 return to_net_dev(dev); 2282 } 2283 EXPORT_SYMBOL(of_find_net_device_by_node); 2284 #endif 2285 2286 /* Delete sysfs entries but hold kobject reference until after all 2287 * netdev references are gone. 2288 */ 2289 void netdev_unregister_kobject(struct net_device *ndev) 2290 { 2291 struct device *dev = &ndev->dev; 2292 2293 if (!refcount_read(&dev_net(ndev)->ns.count)) 2294 dev_set_uevent_suppress(dev, 1); 2295 2296 kobject_get(&dev->kobj); 2297 2298 remove_queue_kobjects(ndev); 2299 2300 pm_runtime_set_memalloc_noio(dev, false); 2301 2302 device_del(dev); 2303 } 2304 2305 /* Create sysfs entries for network device. */ 2306 int netdev_register_kobject(struct net_device *ndev) 2307 { 2308 struct device *dev = &ndev->dev; 2309 const struct attribute_group **groups = ndev->sysfs_groups; 2310 int error = 0; 2311 2312 device_initialize(dev); 2313 dev->class = &net_class; 2314 dev->platform_data = ndev; 2315 dev->groups = groups; 2316 2317 dev_set_name(dev, "%s", ndev->name); 2318 2319 #ifdef CONFIG_SYSFS 2320 /* Allow for a device specific group */ 2321 if (*groups) 2322 groups++; 2323 2324 *groups++ = &netstat_group; 2325 2326 if (wireless_group_needed(ndev)) 2327 *groups++ = &wireless_group; 2328 #endif /* CONFIG_SYSFS */ 2329 2330 error = device_add(dev); 2331 if (error) 2332 return error; 2333 2334 error = register_queue_kobjects(ndev); 2335 if (error) { 2336 device_del(dev); 2337 return error; 2338 } 2339 2340 pm_runtime_set_memalloc_noio(dev, true); 2341 2342 return error; 2343 } 2344 2345 /* Change owner for sysfs entries when moving network devices across network 2346 * namespaces owned by different user namespaces. 2347 */ 2348 int netdev_change_owner(struct net_device *ndev, const struct net *net_old, 2349 const struct net *net_new) 2350 { 2351 kuid_t old_uid = GLOBAL_ROOT_UID, new_uid = GLOBAL_ROOT_UID; 2352 kgid_t old_gid = GLOBAL_ROOT_GID, new_gid = GLOBAL_ROOT_GID; 2353 struct device *dev = &ndev->dev; 2354 int error; 2355 2356 net_ns_get_ownership(net_old, &old_uid, &old_gid); 2357 net_ns_get_ownership(net_new, &new_uid, &new_gid); 2358 2359 /* The network namespace was changed but the owning user namespace is 2360 * identical so there's no need to change the owner of sysfs entries. 2361 */ 2362 if (uid_eq(old_uid, new_uid) && gid_eq(old_gid, new_gid)) 2363 return 0; 2364 2365 error = device_change_owner(dev, new_uid, new_gid); 2366 if (error) 2367 return error; 2368 2369 error = queue_change_owner(ndev, new_uid, new_gid); 2370 if (error) 2371 return error; 2372 2373 return 0; 2374 } 2375 2376 int netdev_class_create_file_ns(const struct class_attribute *class_attr, 2377 const void *ns) 2378 { 2379 return class_create_file_ns(&net_class, class_attr, ns); 2380 } 2381 EXPORT_SYMBOL(netdev_class_create_file_ns); 2382 2383 void netdev_class_remove_file_ns(const struct class_attribute *class_attr, 2384 const void *ns) 2385 { 2386 class_remove_file_ns(&net_class, class_attr, ns); 2387 } 2388 EXPORT_SYMBOL(netdev_class_remove_file_ns); 2389 2390 int __init netdev_kobject_init(void) 2391 { 2392 kobj_ns_type_register(&net_ns_type_operations); 2393 return class_register(&net_class); 2394 } 2395