1 // SPDX-License-Identifier: GPL-2.0-or-later 2 /* 3 * net-sysfs.c - network device class and attributes 4 * 5 * Copyright (c) 2003 Stephen Hemminger <shemminger@osdl.org> 6 */ 7 8 #include <linux/capability.h> 9 #include <linux/kernel.h> 10 #include <linux/netdevice.h> 11 #include <linux/if_arp.h> 12 #include <linux/slab.h> 13 #include <linux/sched/signal.h> 14 #include <linux/sched/isolation.h> 15 #include <linux/nsproxy.h> 16 #include <net/sock.h> 17 #include <net/net_namespace.h> 18 #include <linux/rtnetlink.h> 19 #include <linux/vmalloc.h> 20 #include <linux/export.h> 21 #include <linux/jiffies.h> 22 #include <linux/pm_runtime.h> 23 #include <linux/of.h> 24 #include <linux/of_net.h> 25 #include <linux/cpu.h> 26 #include <net/netdev_rx_queue.h> 27 #include <net/rps.h> 28 29 #include "dev.h" 30 #include "net-sysfs.h" 31 32 #ifdef CONFIG_SYSFS 33 static const char fmt_hex[] = "%#x\n"; 34 static const char fmt_dec[] = "%d\n"; 35 static const char fmt_uint[] = "%u\n"; 36 static const char fmt_ulong[] = "%lu\n"; 37 static const char fmt_u64[] = "%llu\n"; 38 39 /* Caller holds RTNL, netdev->lock or RCU */ 40 static inline int dev_isalive(const struct net_device *dev) 41 { 42 return READ_ONCE(dev->reg_state) <= NETREG_REGISTERED; 43 } 44 45 /* There is a possible ABBA deadlock between rtnl_lock and kernfs_node->active, 46 * when unregistering a net device and accessing associated sysfs files. The 47 * potential deadlock is as follow: 48 * 49 * CPU 0 CPU 1 50 * 51 * rtnl_lock vfs_read 52 * unregister_netdevice_many kernfs_seq_start 53 * device_del / kobject_put kernfs_get_active (kn->active++) 54 * kernfs_drain sysfs_kf_seq_show 55 * wait_event( rtnl_lock 56 * kn->active == KN_DEACTIVATED_BIAS) -> waits on CPU 0 to release 57 * -> waits on CPU 1 to decrease kn->active the rtnl lock. 58 * 59 * The historical fix was to use rtnl_trylock with restart_syscall to bail out 60 * of sysfs operations when the lock couldn't be taken. This fixed the above 61 * issue as it allowed CPU 1 to bail out of the ABBA situation. 62 * 63 * But it came with performances issues, as syscalls are being restarted in 64 * loops when there was contention on the rtnl lock, with huge slow downs in 65 * specific scenarios (e.g. lots of virtual interfaces created and userspace 66 * daemons querying their attributes). 67 * 68 * The idea below is to bail out of the active kernfs_node protection 69 * (kn->active) while trying to take the rtnl lock. 70 * 71 * This replaces rtnl_lock() and still has to be used with rtnl_unlock(). The 72 * net device is guaranteed to be alive if this returns successfully. 73 */ 74 static int sysfs_rtnl_lock(struct kobject *kobj, struct attribute *attr, 75 struct net_device *ndev) 76 { 77 struct kernfs_node *kn; 78 int ret = 0; 79 80 /* First, we hold a reference to the net device as the unregistration 81 * path might run in parallel. This will ensure the net device and the 82 * associated sysfs objects won't be freed while we try to take the rtnl 83 * lock. 84 */ 85 dev_hold(ndev); 86 /* sysfs_break_active_protection was introduced to allow self-removal of 87 * devices and their associated sysfs files by bailing out of the 88 * sysfs/kernfs protection. We do this here to allow the unregistration 89 * path to complete in parallel. The following takes a reference on the 90 * kobject and the kernfs_node being accessed. 91 * 92 * This works because we hold a reference onto the net device and the 93 * unregistration path will wait for us eventually in netdev_run_todo 94 * (outside an rtnl lock section). 95 */ 96 kn = sysfs_break_active_protection(kobj, attr); 97 /* We can now try to take the rtnl lock. This can't deadlock us as the 98 * unregistration path is able to drain sysfs files (kernfs_node) thanks 99 * to the above dance. 100 */ 101 if (rtnl_lock_interruptible()) { 102 ret = -ERESTARTSYS; 103 goto unbreak; 104 } 105 /* Check dismantle on the device hasn't started, otherwise deny the 106 * operation. 107 */ 108 if (!dev_isalive(ndev)) { 109 rtnl_unlock(); 110 ret = -ENODEV; 111 goto unbreak; 112 } 113 /* We are now sure the device dismantle hasn't started nor that it can 114 * start before we exit the locking section as we hold the rtnl lock. 115 * There's no need to keep unbreaking the sysfs protection nor to hold 116 * a net device reference from that point; that was only needed to take 117 * the rtnl lock. 118 */ 119 unbreak: 120 sysfs_unbreak_active_protection(kn); 121 dev_put(ndev); 122 123 return ret; 124 } 125 126 /* use same locking rules as GIF* ioctl's */ 127 static ssize_t netdev_show(const struct device *dev, 128 struct device_attribute *attr, char *buf, 129 ssize_t (*format)(const struct net_device *, char *)) 130 { 131 struct net_device *ndev = to_net_dev(dev); 132 ssize_t ret = -EINVAL; 133 134 rcu_read_lock(); 135 if (dev_isalive(ndev)) 136 ret = (*format)(ndev, buf); 137 rcu_read_unlock(); 138 139 return ret; 140 } 141 142 /* generate a show function for simple field */ 143 #define NETDEVICE_SHOW(field, format_string) \ 144 static ssize_t format_##field(const struct net_device *dev, char *buf) \ 145 { \ 146 return sysfs_emit(buf, format_string, READ_ONCE(dev->field)); \ 147 } \ 148 static ssize_t field##_show(struct device *dev, \ 149 struct device_attribute *attr, char *buf) \ 150 { \ 151 return netdev_show(dev, attr, buf, format_##field); \ 152 } \ 153 154 #define NETDEVICE_SHOW_RO(field, format_string) \ 155 NETDEVICE_SHOW(field, format_string); \ 156 static DEVICE_ATTR_RO(field) 157 158 #define NETDEVICE_SHOW_RW(field, format_string) \ 159 NETDEVICE_SHOW(field, format_string); \ 160 static DEVICE_ATTR_RW(field) 161 162 /* use same locking and permission rules as SIF* ioctl's */ 163 static ssize_t netdev_store(struct device *dev, struct device_attribute *attr, 164 const char *buf, size_t len, 165 int (*set)(struct net_device *, unsigned long)) 166 { 167 struct net_device *netdev = to_net_dev(dev); 168 struct net *net = dev_net(netdev); 169 unsigned long new; 170 int ret; 171 172 if (!ns_capable(net->user_ns, CAP_NET_ADMIN)) 173 return -EPERM; 174 175 ret = kstrtoul(buf, 0, &new); 176 if (ret) 177 goto err; 178 179 ret = sysfs_rtnl_lock(&dev->kobj, &attr->attr, netdev); 180 if (ret) 181 goto err; 182 183 ret = (*set)(netdev, new); 184 if (ret == 0) 185 ret = len; 186 187 rtnl_unlock(); 188 err: 189 return ret; 190 } 191 192 /* Same as netdev_store() but takes netdev_lock() instead of rtnl_lock() */ 193 static ssize_t 194 netdev_lock_store(struct device *dev, struct device_attribute *attr, 195 const char *buf, size_t len, 196 int (*set)(struct net_device *, unsigned long)) 197 { 198 struct net_device *netdev = to_net_dev(dev); 199 struct net *net = dev_net(netdev); 200 unsigned long new; 201 int ret; 202 203 if (!ns_capable(net->user_ns, CAP_NET_ADMIN)) 204 return -EPERM; 205 206 ret = kstrtoul(buf, 0, &new); 207 if (ret) 208 return ret; 209 210 netdev_lock(netdev); 211 212 if (dev_isalive(netdev)) { 213 ret = (*set)(netdev, new); 214 if (ret == 0) 215 ret = len; 216 } 217 netdev_unlock(netdev); 218 219 return ret; 220 } 221 222 NETDEVICE_SHOW_RO(dev_id, fmt_hex); 223 NETDEVICE_SHOW_RO(dev_port, fmt_dec); 224 NETDEVICE_SHOW_RO(addr_assign_type, fmt_dec); 225 NETDEVICE_SHOW_RO(addr_len, fmt_dec); 226 NETDEVICE_SHOW_RO(ifindex, fmt_dec); 227 NETDEVICE_SHOW_RO(type, fmt_dec); 228 NETDEVICE_SHOW_RO(link_mode, fmt_dec); 229 230 static ssize_t iflink_show(struct device *dev, struct device_attribute *attr, 231 char *buf) 232 { 233 struct net_device *ndev = to_net_dev(dev); 234 235 return sysfs_emit(buf, fmt_dec, dev_get_iflink(ndev)); 236 } 237 static DEVICE_ATTR_RO(iflink); 238 239 static ssize_t format_name_assign_type(const struct net_device *dev, char *buf) 240 { 241 return sysfs_emit(buf, fmt_dec, READ_ONCE(dev->name_assign_type)); 242 } 243 244 static ssize_t name_assign_type_show(struct device *dev, 245 struct device_attribute *attr, 246 char *buf) 247 { 248 struct net_device *ndev = to_net_dev(dev); 249 ssize_t ret = -EINVAL; 250 251 if (READ_ONCE(ndev->name_assign_type) != NET_NAME_UNKNOWN) 252 ret = netdev_show(dev, attr, buf, format_name_assign_type); 253 254 return ret; 255 } 256 static DEVICE_ATTR_RO(name_assign_type); 257 258 /* use same locking rules as GIFHWADDR ioctl's (dev_get_mac_address()) */ 259 static ssize_t address_show(struct device *dev, struct device_attribute *attr, 260 char *buf) 261 { 262 struct net_device *ndev = to_net_dev(dev); 263 ssize_t ret = -EINVAL; 264 265 down_read(&dev_addr_sem); 266 267 rcu_read_lock(); 268 if (dev_isalive(ndev)) 269 ret = sysfs_format_mac(buf, ndev->dev_addr, ndev->addr_len); 270 rcu_read_unlock(); 271 272 up_read(&dev_addr_sem); 273 return ret; 274 } 275 static DEVICE_ATTR_RO(address); 276 277 static ssize_t broadcast_show(struct device *dev, 278 struct device_attribute *attr, char *buf) 279 { 280 struct net_device *ndev = to_net_dev(dev); 281 int ret = -EINVAL; 282 283 rcu_read_lock(); 284 if (dev_isalive(ndev)) 285 ret = sysfs_format_mac(buf, ndev->broadcast, ndev->addr_len); 286 rcu_read_unlock(); 287 return ret; 288 } 289 static DEVICE_ATTR_RO(broadcast); 290 291 static int change_carrier(struct net_device *dev, unsigned long new_carrier) 292 { 293 if (!netif_running(dev)) 294 return -EINVAL; 295 return dev_change_carrier(dev, (bool)new_carrier); 296 } 297 298 static ssize_t carrier_store(struct device *dev, struct device_attribute *attr, 299 const char *buf, size_t len) 300 { 301 struct net_device *netdev = to_net_dev(dev); 302 303 /* The check is also done in change_carrier; this helps returning early 304 * without hitting the locking section in netdev_store. 305 */ 306 if (!netdev->netdev_ops->ndo_change_carrier) 307 return -EOPNOTSUPP; 308 309 return netdev_store(dev, attr, buf, len, change_carrier); 310 } 311 312 static ssize_t carrier_show(struct device *dev, 313 struct device_attribute *attr, char *buf) 314 { 315 struct net_device *netdev = to_net_dev(dev); 316 int ret = -EINVAL; 317 318 ret = sysfs_rtnl_lock(&dev->kobj, &attr->attr, netdev); 319 if (ret) 320 return ret; 321 322 if (netif_running(netdev)) { 323 /* Synchronize carrier state with link watch, 324 * see also rtnl_getlink(). 325 */ 326 linkwatch_sync_dev(netdev); 327 328 ret = sysfs_emit(buf, fmt_dec, !!netif_carrier_ok(netdev)); 329 } 330 331 rtnl_unlock(); 332 return ret; 333 } 334 static DEVICE_ATTR_RW(carrier); 335 336 static ssize_t speed_show(struct device *dev, 337 struct device_attribute *attr, char *buf) 338 { 339 struct net_device *netdev = to_net_dev(dev); 340 int ret = -EINVAL; 341 342 /* The check is also done in __ethtool_get_link_ksettings; this helps 343 * returning early without hitting the locking section below. 344 */ 345 if (!netdev->ethtool_ops->get_link_ksettings) 346 return ret; 347 348 ret = sysfs_rtnl_lock(&dev->kobj, &attr->attr, netdev); 349 if (ret) 350 return ret; 351 352 if (netif_running(netdev)) { 353 struct ethtool_link_ksettings cmd; 354 355 if (!__ethtool_get_link_ksettings(netdev, &cmd)) 356 ret = sysfs_emit(buf, fmt_dec, cmd.base.speed); 357 } 358 rtnl_unlock(); 359 return ret; 360 } 361 static DEVICE_ATTR_RO(speed); 362 363 static ssize_t duplex_show(struct device *dev, 364 struct device_attribute *attr, char *buf) 365 { 366 struct net_device *netdev = to_net_dev(dev); 367 int ret = -EINVAL; 368 369 /* The check is also done in __ethtool_get_link_ksettings; this helps 370 * returning early without hitting the locking section below. 371 */ 372 if (!netdev->ethtool_ops->get_link_ksettings) 373 return ret; 374 375 ret = sysfs_rtnl_lock(&dev->kobj, &attr->attr, netdev); 376 if (ret) 377 return ret; 378 379 if (netif_running(netdev)) { 380 struct ethtool_link_ksettings cmd; 381 382 if (!__ethtool_get_link_ksettings(netdev, &cmd)) { 383 const char *duplex; 384 385 switch (cmd.base.duplex) { 386 case DUPLEX_HALF: 387 duplex = "half"; 388 break; 389 case DUPLEX_FULL: 390 duplex = "full"; 391 break; 392 default: 393 duplex = "unknown"; 394 break; 395 } 396 ret = sysfs_emit(buf, "%s\n", duplex); 397 } 398 } 399 rtnl_unlock(); 400 return ret; 401 } 402 static DEVICE_ATTR_RO(duplex); 403 404 static ssize_t testing_show(struct device *dev, 405 struct device_attribute *attr, char *buf) 406 { 407 struct net_device *netdev = to_net_dev(dev); 408 409 if (netif_running(netdev)) 410 return sysfs_emit(buf, fmt_dec, !!netif_testing(netdev)); 411 412 return -EINVAL; 413 } 414 static DEVICE_ATTR_RO(testing); 415 416 static ssize_t dormant_show(struct device *dev, 417 struct device_attribute *attr, char *buf) 418 { 419 struct net_device *netdev = to_net_dev(dev); 420 421 if (netif_running(netdev)) 422 return sysfs_emit(buf, fmt_dec, !!netif_dormant(netdev)); 423 424 return -EINVAL; 425 } 426 static DEVICE_ATTR_RO(dormant); 427 428 static const char *const operstates[] = { 429 "unknown", 430 "notpresent", /* currently unused */ 431 "down", 432 "lowerlayerdown", 433 "testing", 434 "dormant", 435 "up" 436 }; 437 438 static ssize_t operstate_show(struct device *dev, 439 struct device_attribute *attr, char *buf) 440 { 441 const struct net_device *netdev = to_net_dev(dev); 442 unsigned char operstate; 443 444 operstate = READ_ONCE(netdev->operstate); 445 if (!netif_running(netdev)) 446 operstate = IF_OPER_DOWN; 447 448 if (operstate >= ARRAY_SIZE(operstates)) 449 return -EINVAL; /* should not happen */ 450 451 return sysfs_emit(buf, "%s\n", operstates[operstate]); 452 } 453 static DEVICE_ATTR_RO(operstate); 454 455 static ssize_t carrier_changes_show(struct device *dev, 456 struct device_attribute *attr, 457 char *buf) 458 { 459 struct net_device *netdev = to_net_dev(dev); 460 461 return sysfs_emit(buf, fmt_dec, 462 atomic_read(&netdev->carrier_up_count) + 463 atomic_read(&netdev->carrier_down_count)); 464 } 465 static DEVICE_ATTR_RO(carrier_changes); 466 467 static ssize_t carrier_up_count_show(struct device *dev, 468 struct device_attribute *attr, 469 char *buf) 470 { 471 struct net_device *netdev = to_net_dev(dev); 472 473 return sysfs_emit(buf, fmt_dec, atomic_read(&netdev->carrier_up_count)); 474 } 475 static DEVICE_ATTR_RO(carrier_up_count); 476 477 static ssize_t carrier_down_count_show(struct device *dev, 478 struct device_attribute *attr, 479 char *buf) 480 { 481 struct net_device *netdev = to_net_dev(dev); 482 483 return sysfs_emit(buf, fmt_dec, atomic_read(&netdev->carrier_down_count)); 484 } 485 static DEVICE_ATTR_RO(carrier_down_count); 486 487 /* read-write attributes */ 488 489 static int change_mtu(struct net_device *dev, unsigned long new_mtu) 490 { 491 return dev_set_mtu(dev, (int)new_mtu); 492 } 493 494 static ssize_t mtu_store(struct device *dev, struct device_attribute *attr, 495 const char *buf, size_t len) 496 { 497 return netdev_store(dev, attr, buf, len, change_mtu); 498 } 499 NETDEVICE_SHOW_RW(mtu, fmt_dec); 500 501 static int change_flags(struct net_device *dev, unsigned long new_flags) 502 { 503 return dev_change_flags(dev, (unsigned int)new_flags, NULL); 504 } 505 506 static ssize_t flags_store(struct device *dev, struct device_attribute *attr, 507 const char *buf, size_t len) 508 { 509 return netdev_store(dev, attr, buf, len, change_flags); 510 } 511 NETDEVICE_SHOW_RW(flags, fmt_hex); 512 513 static ssize_t tx_queue_len_store(struct device *dev, 514 struct device_attribute *attr, 515 const char *buf, size_t len) 516 { 517 if (!capable(CAP_NET_ADMIN)) 518 return -EPERM; 519 520 return netdev_store(dev, attr, buf, len, dev_change_tx_queue_len); 521 } 522 NETDEVICE_SHOW_RW(tx_queue_len, fmt_dec); 523 524 static int change_gro_flush_timeout(struct net_device *dev, unsigned long val) 525 { 526 netdev_set_gro_flush_timeout(dev, val); 527 return 0; 528 } 529 530 static ssize_t gro_flush_timeout_store(struct device *dev, 531 struct device_attribute *attr, 532 const char *buf, size_t len) 533 { 534 if (!capable(CAP_NET_ADMIN)) 535 return -EPERM; 536 537 return netdev_lock_store(dev, attr, buf, len, change_gro_flush_timeout); 538 } 539 NETDEVICE_SHOW_RW(gro_flush_timeout, fmt_ulong); 540 541 static int change_napi_defer_hard_irqs(struct net_device *dev, unsigned long val) 542 { 543 if (val > S32_MAX) 544 return -ERANGE; 545 546 netdev_set_defer_hard_irqs(dev, (u32)val); 547 return 0; 548 } 549 550 static ssize_t napi_defer_hard_irqs_store(struct device *dev, 551 struct device_attribute *attr, 552 const char *buf, size_t len) 553 { 554 if (!capable(CAP_NET_ADMIN)) 555 return -EPERM; 556 557 return netdev_lock_store(dev, attr, buf, len, 558 change_napi_defer_hard_irqs); 559 } 560 NETDEVICE_SHOW_RW(napi_defer_hard_irqs, fmt_uint); 561 562 static ssize_t ifalias_store(struct device *dev, struct device_attribute *attr, 563 const char *buf, size_t len) 564 { 565 struct net_device *netdev = to_net_dev(dev); 566 struct net *net = dev_net(netdev); 567 size_t count = len; 568 ssize_t ret = 0; 569 570 if (!ns_capable(net->user_ns, CAP_NET_ADMIN)) 571 return -EPERM; 572 573 /* ignore trailing newline */ 574 if (len > 0 && buf[len - 1] == '\n') 575 --count; 576 577 ret = sysfs_rtnl_lock(&dev->kobj, &attr->attr, netdev); 578 if (ret) 579 return ret; 580 581 ret = dev_set_alias(netdev, buf, count); 582 if (ret < 0) 583 goto err; 584 ret = len; 585 netdev_state_change(netdev); 586 err: 587 rtnl_unlock(); 588 589 return ret; 590 } 591 592 static ssize_t ifalias_show(struct device *dev, 593 struct device_attribute *attr, char *buf) 594 { 595 const struct net_device *netdev = to_net_dev(dev); 596 char tmp[IFALIASZ]; 597 ssize_t ret = 0; 598 599 ret = dev_get_alias(netdev, tmp, sizeof(tmp)); 600 if (ret > 0) 601 ret = sysfs_emit(buf, "%s\n", tmp); 602 return ret; 603 } 604 static DEVICE_ATTR_RW(ifalias); 605 606 static int change_group(struct net_device *dev, unsigned long new_group) 607 { 608 dev_set_group(dev, (int)new_group); 609 return 0; 610 } 611 612 static ssize_t group_store(struct device *dev, struct device_attribute *attr, 613 const char *buf, size_t len) 614 { 615 return netdev_store(dev, attr, buf, len, change_group); 616 } 617 NETDEVICE_SHOW(group, fmt_dec); 618 static DEVICE_ATTR(netdev_group, 0644, group_show, group_store); 619 620 static int change_proto_down(struct net_device *dev, unsigned long proto_down) 621 { 622 return dev_change_proto_down(dev, (bool)proto_down); 623 } 624 625 static ssize_t proto_down_store(struct device *dev, 626 struct device_attribute *attr, 627 const char *buf, size_t len) 628 { 629 return netdev_store(dev, attr, buf, len, change_proto_down); 630 } 631 NETDEVICE_SHOW_RW(proto_down, fmt_dec); 632 633 static ssize_t phys_port_id_show(struct device *dev, 634 struct device_attribute *attr, char *buf) 635 { 636 struct net_device *netdev = to_net_dev(dev); 637 struct netdev_phys_item_id ppid; 638 ssize_t ret = -EINVAL; 639 640 /* The check is also done in dev_get_phys_port_id; this helps returning 641 * early without hitting the locking section below. 642 */ 643 if (!netdev->netdev_ops->ndo_get_phys_port_id) 644 return -EOPNOTSUPP; 645 646 ret = sysfs_rtnl_lock(&dev->kobj, &attr->attr, netdev); 647 if (ret) 648 return ret; 649 650 ret = dev_get_phys_port_id(netdev, &ppid); 651 if (!ret) 652 ret = sysfs_emit(buf, "%*phN\n", ppid.id_len, ppid.id); 653 654 rtnl_unlock(); 655 656 return ret; 657 } 658 static DEVICE_ATTR_RO(phys_port_id); 659 660 static ssize_t phys_port_name_show(struct device *dev, 661 struct device_attribute *attr, char *buf) 662 { 663 struct net_device *netdev = to_net_dev(dev); 664 ssize_t ret = -EINVAL; 665 char name[IFNAMSIZ]; 666 667 /* The checks are also done in dev_get_phys_port_name; this helps 668 * returning early without hitting the locking section below. 669 */ 670 if (!netdev->netdev_ops->ndo_get_phys_port_name && 671 !netdev->devlink_port) 672 return -EOPNOTSUPP; 673 674 ret = sysfs_rtnl_lock(&dev->kobj, &attr->attr, netdev); 675 if (ret) 676 return ret; 677 678 ret = dev_get_phys_port_name(netdev, name, sizeof(name)); 679 if (!ret) 680 ret = sysfs_emit(buf, "%s\n", name); 681 682 rtnl_unlock(); 683 684 return ret; 685 } 686 static DEVICE_ATTR_RO(phys_port_name); 687 688 static ssize_t phys_switch_id_show(struct device *dev, 689 struct device_attribute *attr, char *buf) 690 { 691 struct net_device *netdev = to_net_dev(dev); 692 struct netdev_phys_item_id ppid = { }; 693 ssize_t ret = -EINVAL; 694 695 /* The checks are also done in dev_get_phys_port_name; this helps 696 * returning early without hitting the locking section below. This works 697 * because recurse is false when calling dev_get_port_parent_id. 698 */ 699 if (!netdev->netdev_ops->ndo_get_port_parent_id && 700 !netdev->devlink_port) 701 return -EOPNOTSUPP; 702 703 ret = sysfs_rtnl_lock(&dev->kobj, &attr->attr, netdev); 704 if (ret) 705 return ret; 706 707 ret = dev_get_port_parent_id(netdev, &ppid, false); 708 if (!ret) 709 ret = sysfs_emit(buf, "%*phN\n", ppid.id_len, ppid.id); 710 711 rtnl_unlock(); 712 713 return ret; 714 } 715 static DEVICE_ATTR_RO(phys_switch_id); 716 717 static ssize_t threaded_show(struct device *dev, 718 struct device_attribute *attr, char *buf) 719 { 720 struct net_device *netdev = to_net_dev(dev); 721 ssize_t ret = -EINVAL; 722 723 rcu_read_lock(); 724 725 if (dev_isalive(netdev)) 726 ret = sysfs_emit(buf, fmt_dec, READ_ONCE(netdev->threaded)); 727 728 rcu_read_unlock(); 729 730 return ret; 731 } 732 733 static int modify_napi_threaded(struct net_device *dev, unsigned long val) 734 { 735 int ret; 736 737 if (list_empty(&dev->napi_list)) 738 return -EOPNOTSUPP; 739 740 if (val != 0 && val != 1) 741 return -EOPNOTSUPP; 742 743 ret = dev_set_threaded(dev, val); 744 745 return ret; 746 } 747 748 static ssize_t threaded_store(struct device *dev, 749 struct device_attribute *attr, 750 const char *buf, size_t len) 751 { 752 return netdev_lock_store(dev, attr, buf, len, modify_napi_threaded); 753 } 754 static DEVICE_ATTR_RW(threaded); 755 756 static struct attribute *net_class_attrs[] __ro_after_init = { 757 &dev_attr_netdev_group.attr, 758 &dev_attr_type.attr, 759 &dev_attr_dev_id.attr, 760 &dev_attr_dev_port.attr, 761 &dev_attr_iflink.attr, 762 &dev_attr_ifindex.attr, 763 &dev_attr_name_assign_type.attr, 764 &dev_attr_addr_assign_type.attr, 765 &dev_attr_addr_len.attr, 766 &dev_attr_link_mode.attr, 767 &dev_attr_address.attr, 768 &dev_attr_broadcast.attr, 769 &dev_attr_speed.attr, 770 &dev_attr_duplex.attr, 771 &dev_attr_dormant.attr, 772 &dev_attr_testing.attr, 773 &dev_attr_operstate.attr, 774 &dev_attr_carrier_changes.attr, 775 &dev_attr_ifalias.attr, 776 &dev_attr_carrier.attr, 777 &dev_attr_mtu.attr, 778 &dev_attr_flags.attr, 779 &dev_attr_tx_queue_len.attr, 780 &dev_attr_gro_flush_timeout.attr, 781 &dev_attr_napi_defer_hard_irqs.attr, 782 &dev_attr_phys_port_id.attr, 783 &dev_attr_phys_port_name.attr, 784 &dev_attr_phys_switch_id.attr, 785 &dev_attr_proto_down.attr, 786 &dev_attr_carrier_up_count.attr, 787 &dev_attr_carrier_down_count.attr, 788 &dev_attr_threaded.attr, 789 NULL, 790 }; 791 ATTRIBUTE_GROUPS(net_class); 792 793 /* Show a given an attribute in the statistics group */ 794 static ssize_t netstat_show(const struct device *d, 795 struct device_attribute *attr, char *buf, 796 unsigned long offset) 797 { 798 struct net_device *dev = to_net_dev(d); 799 ssize_t ret = -EINVAL; 800 801 WARN_ON(offset > sizeof(struct rtnl_link_stats64) || 802 offset % sizeof(u64) != 0); 803 804 rcu_read_lock(); 805 if (dev_isalive(dev)) { 806 struct rtnl_link_stats64 temp; 807 const struct rtnl_link_stats64 *stats = dev_get_stats(dev, &temp); 808 809 ret = sysfs_emit(buf, fmt_u64, *(u64 *)(((u8 *)stats) + offset)); 810 } 811 rcu_read_unlock(); 812 return ret; 813 } 814 815 /* generate a read-only statistics attribute */ 816 #define NETSTAT_ENTRY(name) \ 817 static ssize_t name##_show(struct device *d, \ 818 struct device_attribute *attr, char *buf) \ 819 { \ 820 return netstat_show(d, attr, buf, \ 821 offsetof(struct rtnl_link_stats64, name)); \ 822 } \ 823 static DEVICE_ATTR_RO(name) 824 825 NETSTAT_ENTRY(rx_packets); 826 NETSTAT_ENTRY(tx_packets); 827 NETSTAT_ENTRY(rx_bytes); 828 NETSTAT_ENTRY(tx_bytes); 829 NETSTAT_ENTRY(rx_errors); 830 NETSTAT_ENTRY(tx_errors); 831 NETSTAT_ENTRY(rx_dropped); 832 NETSTAT_ENTRY(tx_dropped); 833 NETSTAT_ENTRY(multicast); 834 NETSTAT_ENTRY(collisions); 835 NETSTAT_ENTRY(rx_length_errors); 836 NETSTAT_ENTRY(rx_over_errors); 837 NETSTAT_ENTRY(rx_crc_errors); 838 NETSTAT_ENTRY(rx_frame_errors); 839 NETSTAT_ENTRY(rx_fifo_errors); 840 NETSTAT_ENTRY(rx_missed_errors); 841 NETSTAT_ENTRY(tx_aborted_errors); 842 NETSTAT_ENTRY(tx_carrier_errors); 843 NETSTAT_ENTRY(tx_fifo_errors); 844 NETSTAT_ENTRY(tx_heartbeat_errors); 845 NETSTAT_ENTRY(tx_window_errors); 846 NETSTAT_ENTRY(rx_compressed); 847 NETSTAT_ENTRY(tx_compressed); 848 NETSTAT_ENTRY(rx_nohandler); 849 850 static struct attribute *netstat_attrs[] __ro_after_init = { 851 &dev_attr_rx_packets.attr, 852 &dev_attr_tx_packets.attr, 853 &dev_attr_rx_bytes.attr, 854 &dev_attr_tx_bytes.attr, 855 &dev_attr_rx_errors.attr, 856 &dev_attr_tx_errors.attr, 857 &dev_attr_rx_dropped.attr, 858 &dev_attr_tx_dropped.attr, 859 &dev_attr_multicast.attr, 860 &dev_attr_collisions.attr, 861 &dev_attr_rx_length_errors.attr, 862 &dev_attr_rx_over_errors.attr, 863 &dev_attr_rx_crc_errors.attr, 864 &dev_attr_rx_frame_errors.attr, 865 &dev_attr_rx_fifo_errors.attr, 866 &dev_attr_rx_missed_errors.attr, 867 &dev_attr_tx_aborted_errors.attr, 868 &dev_attr_tx_carrier_errors.attr, 869 &dev_attr_tx_fifo_errors.attr, 870 &dev_attr_tx_heartbeat_errors.attr, 871 &dev_attr_tx_window_errors.attr, 872 &dev_attr_rx_compressed.attr, 873 &dev_attr_tx_compressed.attr, 874 &dev_attr_rx_nohandler.attr, 875 NULL 876 }; 877 878 static const struct attribute_group netstat_group = { 879 .name = "statistics", 880 .attrs = netstat_attrs, 881 }; 882 883 static struct attribute *wireless_attrs[] = { 884 NULL 885 }; 886 887 static const struct attribute_group wireless_group = { 888 .name = "wireless", 889 .attrs = wireless_attrs, 890 }; 891 892 static bool wireless_group_needed(struct net_device *ndev) 893 { 894 #if IS_ENABLED(CONFIG_CFG80211) 895 if (ndev->ieee80211_ptr) 896 return true; 897 #endif 898 #if IS_ENABLED(CONFIG_WIRELESS_EXT) 899 if (ndev->wireless_handlers) 900 return true; 901 #endif 902 return false; 903 } 904 905 #else /* CONFIG_SYSFS */ 906 #define net_class_groups NULL 907 #endif /* CONFIG_SYSFS */ 908 909 #ifdef CONFIG_SYSFS 910 #define to_rx_queue_attr(_attr) \ 911 container_of(_attr, struct rx_queue_attribute, attr) 912 913 #define to_rx_queue(obj) container_of(obj, struct netdev_rx_queue, kobj) 914 915 static ssize_t rx_queue_attr_show(struct kobject *kobj, struct attribute *attr, 916 char *buf) 917 { 918 const struct rx_queue_attribute *attribute = to_rx_queue_attr(attr); 919 struct netdev_rx_queue *queue = to_rx_queue(kobj); 920 921 if (!attribute->show) 922 return -EIO; 923 924 return attribute->show(queue, buf); 925 } 926 927 static ssize_t rx_queue_attr_store(struct kobject *kobj, struct attribute *attr, 928 const char *buf, size_t count) 929 { 930 const struct rx_queue_attribute *attribute = to_rx_queue_attr(attr); 931 struct netdev_rx_queue *queue = to_rx_queue(kobj); 932 933 if (!attribute->store) 934 return -EIO; 935 936 return attribute->store(queue, buf, count); 937 } 938 939 static const struct sysfs_ops rx_queue_sysfs_ops = { 940 .show = rx_queue_attr_show, 941 .store = rx_queue_attr_store, 942 }; 943 944 #ifdef CONFIG_RPS 945 static ssize_t show_rps_map(struct netdev_rx_queue *queue, char *buf) 946 { 947 struct rps_map *map; 948 cpumask_var_t mask; 949 int i, len; 950 951 if (!zalloc_cpumask_var(&mask, GFP_KERNEL)) 952 return -ENOMEM; 953 954 rcu_read_lock(); 955 map = rcu_dereference(queue->rps_map); 956 if (map) 957 for (i = 0; i < map->len; i++) 958 cpumask_set_cpu(map->cpus[i], mask); 959 960 len = sysfs_emit(buf, "%*pb\n", cpumask_pr_args(mask)); 961 rcu_read_unlock(); 962 free_cpumask_var(mask); 963 964 return len < PAGE_SIZE ? len : -EINVAL; 965 } 966 967 static int netdev_rx_queue_set_rps_mask(struct netdev_rx_queue *queue, 968 cpumask_var_t mask) 969 { 970 static DEFINE_MUTEX(rps_map_mutex); 971 struct rps_map *old_map, *map; 972 int cpu, i; 973 974 map = kzalloc(max_t(unsigned int, 975 RPS_MAP_SIZE(cpumask_weight(mask)), L1_CACHE_BYTES), 976 GFP_KERNEL); 977 if (!map) 978 return -ENOMEM; 979 980 i = 0; 981 for_each_cpu_and(cpu, mask, cpu_online_mask) 982 map->cpus[i++] = cpu; 983 984 if (i) { 985 map->len = i; 986 } else { 987 kfree(map); 988 map = NULL; 989 } 990 991 mutex_lock(&rps_map_mutex); 992 old_map = rcu_dereference_protected(queue->rps_map, 993 mutex_is_locked(&rps_map_mutex)); 994 rcu_assign_pointer(queue->rps_map, map); 995 996 if (map) 997 static_branch_inc(&rps_needed); 998 if (old_map) 999 static_branch_dec(&rps_needed); 1000 1001 mutex_unlock(&rps_map_mutex); 1002 1003 if (old_map) 1004 kfree_rcu(old_map, rcu); 1005 return 0; 1006 } 1007 1008 int rps_cpumask_housekeeping(struct cpumask *mask) 1009 { 1010 if (!cpumask_empty(mask)) { 1011 cpumask_and(mask, mask, housekeeping_cpumask(HK_TYPE_DOMAIN)); 1012 cpumask_and(mask, mask, housekeeping_cpumask(HK_TYPE_WQ)); 1013 if (cpumask_empty(mask)) 1014 return -EINVAL; 1015 } 1016 return 0; 1017 } 1018 1019 static ssize_t store_rps_map(struct netdev_rx_queue *queue, 1020 const char *buf, size_t len) 1021 { 1022 cpumask_var_t mask; 1023 int err; 1024 1025 if (!capable(CAP_NET_ADMIN)) 1026 return -EPERM; 1027 1028 if (!alloc_cpumask_var(&mask, GFP_KERNEL)) 1029 return -ENOMEM; 1030 1031 err = bitmap_parse(buf, len, cpumask_bits(mask), nr_cpumask_bits); 1032 if (err) 1033 goto out; 1034 1035 err = rps_cpumask_housekeeping(mask); 1036 if (err) 1037 goto out; 1038 1039 err = netdev_rx_queue_set_rps_mask(queue, mask); 1040 1041 out: 1042 free_cpumask_var(mask); 1043 return err ? : len; 1044 } 1045 1046 static ssize_t show_rps_dev_flow_table_cnt(struct netdev_rx_queue *queue, 1047 char *buf) 1048 { 1049 struct rps_dev_flow_table *flow_table; 1050 unsigned long val = 0; 1051 1052 rcu_read_lock(); 1053 flow_table = rcu_dereference(queue->rps_flow_table); 1054 if (flow_table) 1055 val = (unsigned long)flow_table->mask + 1; 1056 rcu_read_unlock(); 1057 1058 return sysfs_emit(buf, "%lu\n", val); 1059 } 1060 1061 static void rps_dev_flow_table_release(struct rcu_head *rcu) 1062 { 1063 struct rps_dev_flow_table *table = container_of(rcu, 1064 struct rps_dev_flow_table, rcu); 1065 vfree(table); 1066 } 1067 1068 static ssize_t store_rps_dev_flow_table_cnt(struct netdev_rx_queue *queue, 1069 const char *buf, size_t len) 1070 { 1071 unsigned long mask, count; 1072 struct rps_dev_flow_table *table, *old_table; 1073 static DEFINE_SPINLOCK(rps_dev_flow_lock); 1074 int rc; 1075 1076 if (!capable(CAP_NET_ADMIN)) 1077 return -EPERM; 1078 1079 rc = kstrtoul(buf, 0, &count); 1080 if (rc < 0) 1081 return rc; 1082 1083 if (count) { 1084 mask = count - 1; 1085 /* mask = roundup_pow_of_two(count) - 1; 1086 * without overflows... 1087 */ 1088 while ((mask | (mask >> 1)) != mask) 1089 mask |= (mask >> 1); 1090 /* On 64 bit arches, must check mask fits in table->mask (u32), 1091 * and on 32bit arches, must check 1092 * RPS_DEV_FLOW_TABLE_SIZE(mask + 1) doesn't overflow. 1093 */ 1094 #if BITS_PER_LONG > 32 1095 if (mask > (unsigned long)(u32)mask) 1096 return -EINVAL; 1097 #else 1098 if (mask > (ULONG_MAX - RPS_DEV_FLOW_TABLE_SIZE(1)) 1099 / sizeof(struct rps_dev_flow)) { 1100 /* Enforce a limit to prevent overflow */ 1101 return -EINVAL; 1102 } 1103 #endif 1104 table = vmalloc(RPS_DEV_FLOW_TABLE_SIZE(mask + 1)); 1105 if (!table) 1106 return -ENOMEM; 1107 1108 table->mask = mask; 1109 for (count = 0; count <= mask; count++) 1110 table->flows[count].cpu = RPS_NO_CPU; 1111 } else { 1112 table = NULL; 1113 } 1114 1115 spin_lock(&rps_dev_flow_lock); 1116 old_table = rcu_dereference_protected(queue->rps_flow_table, 1117 lockdep_is_held(&rps_dev_flow_lock)); 1118 rcu_assign_pointer(queue->rps_flow_table, table); 1119 spin_unlock(&rps_dev_flow_lock); 1120 1121 if (old_table) 1122 call_rcu(&old_table->rcu, rps_dev_flow_table_release); 1123 1124 return len; 1125 } 1126 1127 static struct rx_queue_attribute rps_cpus_attribute __ro_after_init 1128 = __ATTR(rps_cpus, 0644, show_rps_map, store_rps_map); 1129 1130 static struct rx_queue_attribute rps_dev_flow_table_cnt_attribute __ro_after_init 1131 = __ATTR(rps_flow_cnt, 0644, 1132 show_rps_dev_flow_table_cnt, store_rps_dev_flow_table_cnt); 1133 #endif /* CONFIG_RPS */ 1134 1135 static struct attribute *rx_queue_default_attrs[] __ro_after_init = { 1136 #ifdef CONFIG_RPS 1137 &rps_cpus_attribute.attr, 1138 &rps_dev_flow_table_cnt_attribute.attr, 1139 #endif 1140 NULL 1141 }; 1142 ATTRIBUTE_GROUPS(rx_queue_default); 1143 1144 static void rx_queue_release(struct kobject *kobj) 1145 { 1146 struct netdev_rx_queue *queue = to_rx_queue(kobj); 1147 #ifdef CONFIG_RPS 1148 struct rps_map *map; 1149 struct rps_dev_flow_table *flow_table; 1150 1151 map = rcu_dereference_protected(queue->rps_map, 1); 1152 if (map) { 1153 RCU_INIT_POINTER(queue->rps_map, NULL); 1154 kfree_rcu(map, rcu); 1155 } 1156 1157 flow_table = rcu_dereference_protected(queue->rps_flow_table, 1); 1158 if (flow_table) { 1159 RCU_INIT_POINTER(queue->rps_flow_table, NULL); 1160 call_rcu(&flow_table->rcu, rps_dev_flow_table_release); 1161 } 1162 #endif 1163 1164 memset(kobj, 0, sizeof(*kobj)); 1165 netdev_put(queue->dev, &queue->dev_tracker); 1166 } 1167 1168 static const void *rx_queue_namespace(const struct kobject *kobj) 1169 { 1170 struct netdev_rx_queue *queue = to_rx_queue(kobj); 1171 struct device *dev = &queue->dev->dev; 1172 const void *ns = NULL; 1173 1174 if (dev->class && dev->class->namespace) 1175 ns = dev->class->namespace(dev); 1176 1177 return ns; 1178 } 1179 1180 static void rx_queue_get_ownership(const struct kobject *kobj, 1181 kuid_t *uid, kgid_t *gid) 1182 { 1183 const struct net *net = rx_queue_namespace(kobj); 1184 1185 net_ns_get_ownership(net, uid, gid); 1186 } 1187 1188 static const struct kobj_type rx_queue_ktype = { 1189 .sysfs_ops = &rx_queue_sysfs_ops, 1190 .release = rx_queue_release, 1191 .namespace = rx_queue_namespace, 1192 .get_ownership = rx_queue_get_ownership, 1193 }; 1194 1195 static int rx_queue_default_mask(struct net_device *dev, 1196 struct netdev_rx_queue *queue) 1197 { 1198 #if IS_ENABLED(CONFIG_RPS) && IS_ENABLED(CONFIG_SYSCTL) 1199 struct cpumask *rps_default_mask = READ_ONCE(dev_net(dev)->core.rps_default_mask); 1200 1201 if (rps_default_mask && !cpumask_empty(rps_default_mask)) 1202 return netdev_rx_queue_set_rps_mask(queue, rps_default_mask); 1203 #endif 1204 return 0; 1205 } 1206 1207 static int rx_queue_add_kobject(struct net_device *dev, int index) 1208 { 1209 struct netdev_rx_queue *queue = dev->_rx + index; 1210 struct kobject *kobj = &queue->kobj; 1211 int error = 0; 1212 1213 /* Rx queues are cleared in rx_queue_release to allow later 1214 * re-registration. This is triggered when their kobj refcount is 1215 * dropped. 1216 * 1217 * If a queue is removed while both a read (or write) operation and a 1218 * the re-addition of the same queue are pending (waiting on rntl_lock) 1219 * it might happen that the re-addition will execute before the read, 1220 * making the initial removal to never happen (queue's kobj refcount 1221 * won't drop enough because of the pending read). In such rare case, 1222 * return to allow the removal operation to complete. 1223 */ 1224 if (unlikely(kobj->state_initialized)) { 1225 netdev_warn_once(dev, "Cannot re-add rx queues before their removal completed"); 1226 return -EAGAIN; 1227 } 1228 1229 /* Kobject_put later will trigger rx_queue_release call which 1230 * decreases dev refcount: Take that reference here 1231 */ 1232 netdev_hold(queue->dev, &queue->dev_tracker, GFP_KERNEL); 1233 1234 kobj->kset = dev->queues_kset; 1235 error = kobject_init_and_add(kobj, &rx_queue_ktype, NULL, 1236 "rx-%u", index); 1237 if (error) 1238 goto err; 1239 1240 queue->groups = rx_queue_default_groups; 1241 error = sysfs_create_groups(kobj, queue->groups); 1242 if (error) 1243 goto err; 1244 1245 if (dev->sysfs_rx_queue_group) { 1246 error = sysfs_create_group(kobj, dev->sysfs_rx_queue_group); 1247 if (error) 1248 goto err_default_groups; 1249 } 1250 1251 error = rx_queue_default_mask(dev, queue); 1252 if (error) 1253 goto err_default_groups; 1254 1255 kobject_uevent(kobj, KOBJ_ADD); 1256 1257 return error; 1258 1259 err_default_groups: 1260 sysfs_remove_groups(kobj, queue->groups); 1261 err: 1262 kobject_put(kobj); 1263 return error; 1264 } 1265 1266 static int rx_queue_change_owner(struct net_device *dev, int index, kuid_t kuid, 1267 kgid_t kgid) 1268 { 1269 struct netdev_rx_queue *queue = dev->_rx + index; 1270 struct kobject *kobj = &queue->kobj; 1271 int error; 1272 1273 error = sysfs_change_owner(kobj, kuid, kgid); 1274 if (error) 1275 return error; 1276 1277 if (dev->sysfs_rx_queue_group) 1278 error = sysfs_group_change_owner( 1279 kobj, dev->sysfs_rx_queue_group, kuid, kgid); 1280 1281 return error; 1282 } 1283 #endif /* CONFIG_SYSFS */ 1284 1285 int 1286 net_rx_queue_update_kobjects(struct net_device *dev, int old_num, int new_num) 1287 { 1288 #ifdef CONFIG_SYSFS 1289 int i; 1290 int error = 0; 1291 1292 #ifndef CONFIG_RPS 1293 if (!dev->sysfs_rx_queue_group) 1294 return 0; 1295 #endif 1296 for (i = old_num; i < new_num; i++) { 1297 error = rx_queue_add_kobject(dev, i); 1298 if (error) { 1299 new_num = old_num; 1300 break; 1301 } 1302 } 1303 1304 while (--i >= new_num) { 1305 struct netdev_rx_queue *queue = &dev->_rx[i]; 1306 struct kobject *kobj = &queue->kobj; 1307 1308 if (!refcount_read(&dev_net(dev)->ns.count)) 1309 kobj->uevent_suppress = 1; 1310 if (dev->sysfs_rx_queue_group) 1311 sysfs_remove_group(kobj, dev->sysfs_rx_queue_group); 1312 sysfs_remove_groups(kobj, queue->groups); 1313 kobject_put(kobj); 1314 } 1315 1316 return error; 1317 #else 1318 return 0; 1319 #endif 1320 } 1321 1322 static int net_rx_queue_change_owner(struct net_device *dev, int num, 1323 kuid_t kuid, kgid_t kgid) 1324 { 1325 #ifdef CONFIG_SYSFS 1326 int error = 0; 1327 int i; 1328 1329 #ifndef CONFIG_RPS 1330 if (!dev->sysfs_rx_queue_group) 1331 return 0; 1332 #endif 1333 for (i = 0; i < num; i++) { 1334 error = rx_queue_change_owner(dev, i, kuid, kgid); 1335 if (error) 1336 break; 1337 } 1338 1339 return error; 1340 #else 1341 return 0; 1342 #endif 1343 } 1344 1345 #ifdef CONFIG_SYSFS 1346 /* 1347 * netdev_queue sysfs structures and functions. 1348 */ 1349 struct netdev_queue_attribute { 1350 struct attribute attr; 1351 ssize_t (*show)(struct kobject *kobj, struct attribute *attr, 1352 struct netdev_queue *queue, char *buf); 1353 ssize_t (*store)(struct kobject *kobj, struct attribute *attr, 1354 struct netdev_queue *queue, const char *buf, 1355 size_t len); 1356 }; 1357 #define to_netdev_queue_attr(_attr) \ 1358 container_of(_attr, struct netdev_queue_attribute, attr) 1359 1360 #define to_netdev_queue(obj) container_of(obj, struct netdev_queue, kobj) 1361 1362 static ssize_t netdev_queue_attr_show(struct kobject *kobj, 1363 struct attribute *attr, char *buf) 1364 { 1365 const struct netdev_queue_attribute *attribute 1366 = to_netdev_queue_attr(attr); 1367 struct netdev_queue *queue = to_netdev_queue(kobj); 1368 1369 if (!attribute->show) 1370 return -EIO; 1371 1372 return attribute->show(kobj, attr, queue, buf); 1373 } 1374 1375 static ssize_t netdev_queue_attr_store(struct kobject *kobj, 1376 struct attribute *attr, 1377 const char *buf, size_t count) 1378 { 1379 const struct netdev_queue_attribute *attribute 1380 = to_netdev_queue_attr(attr); 1381 struct netdev_queue *queue = to_netdev_queue(kobj); 1382 1383 if (!attribute->store) 1384 return -EIO; 1385 1386 return attribute->store(kobj, attr, queue, buf, count); 1387 } 1388 1389 static const struct sysfs_ops netdev_queue_sysfs_ops = { 1390 .show = netdev_queue_attr_show, 1391 .store = netdev_queue_attr_store, 1392 }; 1393 1394 static ssize_t tx_timeout_show(struct kobject *kobj, struct attribute *attr, 1395 struct netdev_queue *queue, char *buf) 1396 { 1397 unsigned long trans_timeout = atomic_long_read(&queue->trans_timeout); 1398 1399 return sysfs_emit(buf, fmt_ulong, trans_timeout); 1400 } 1401 1402 static unsigned int get_netdev_queue_index(struct netdev_queue *queue) 1403 { 1404 struct net_device *dev = queue->dev; 1405 unsigned int i; 1406 1407 i = queue - dev->_tx; 1408 BUG_ON(i >= dev->num_tx_queues); 1409 1410 return i; 1411 } 1412 1413 static ssize_t traffic_class_show(struct kobject *kobj, struct attribute *attr, 1414 struct netdev_queue *queue, char *buf) 1415 { 1416 struct net_device *dev = queue->dev; 1417 int num_tc, tc, index, ret; 1418 1419 if (!netif_is_multiqueue(dev)) 1420 return -ENOENT; 1421 1422 ret = sysfs_rtnl_lock(kobj, attr, queue->dev); 1423 if (ret) 1424 return ret; 1425 1426 index = get_netdev_queue_index(queue); 1427 1428 /* If queue belongs to subordinate dev use its TC mapping */ 1429 dev = netdev_get_tx_queue(dev, index)->sb_dev ? : dev; 1430 1431 num_tc = dev->num_tc; 1432 tc = netdev_txq_to_tc(dev, index); 1433 1434 rtnl_unlock(); 1435 1436 if (tc < 0) 1437 return -EINVAL; 1438 1439 /* We can report the traffic class one of two ways: 1440 * Subordinate device traffic classes are reported with the traffic 1441 * class first, and then the subordinate class so for example TC0 on 1442 * subordinate device 2 will be reported as "0-2". If the queue 1443 * belongs to the root device it will be reported with just the 1444 * traffic class, so just "0" for TC 0 for example. 1445 */ 1446 return num_tc < 0 ? sysfs_emit(buf, "%d%d\n", tc, num_tc) : 1447 sysfs_emit(buf, "%d\n", tc); 1448 } 1449 1450 #ifdef CONFIG_XPS 1451 static ssize_t tx_maxrate_show(struct kobject *kobj, struct attribute *attr, 1452 struct netdev_queue *queue, char *buf) 1453 { 1454 return sysfs_emit(buf, "%lu\n", queue->tx_maxrate); 1455 } 1456 1457 static ssize_t tx_maxrate_store(struct kobject *kobj, struct attribute *attr, 1458 struct netdev_queue *queue, const char *buf, 1459 size_t len) 1460 { 1461 int err, index = get_netdev_queue_index(queue); 1462 struct net_device *dev = queue->dev; 1463 u32 rate = 0; 1464 1465 if (!capable(CAP_NET_ADMIN)) 1466 return -EPERM; 1467 1468 /* The check is also done later; this helps returning early without 1469 * hitting the locking section below. 1470 */ 1471 if (!dev->netdev_ops->ndo_set_tx_maxrate) 1472 return -EOPNOTSUPP; 1473 1474 err = kstrtou32(buf, 10, &rate); 1475 if (err < 0) 1476 return err; 1477 1478 err = sysfs_rtnl_lock(kobj, attr, dev); 1479 if (err) 1480 return err; 1481 1482 err = -EOPNOTSUPP; 1483 if (dev->netdev_ops->ndo_set_tx_maxrate) 1484 err = dev->netdev_ops->ndo_set_tx_maxrate(dev, index, rate); 1485 1486 if (!err) { 1487 queue->tx_maxrate = rate; 1488 rtnl_unlock(); 1489 return len; 1490 } 1491 1492 rtnl_unlock(); 1493 return err; 1494 } 1495 1496 static struct netdev_queue_attribute queue_tx_maxrate __ro_after_init 1497 = __ATTR_RW(tx_maxrate); 1498 #endif 1499 1500 static struct netdev_queue_attribute queue_trans_timeout __ro_after_init 1501 = __ATTR_RO(tx_timeout); 1502 1503 static struct netdev_queue_attribute queue_traffic_class __ro_after_init 1504 = __ATTR_RO(traffic_class); 1505 1506 #ifdef CONFIG_BQL 1507 /* 1508 * Byte queue limits sysfs structures and functions. 1509 */ 1510 static ssize_t bql_show(char *buf, unsigned int value) 1511 { 1512 return sysfs_emit(buf, "%u\n", value); 1513 } 1514 1515 static ssize_t bql_set(const char *buf, const size_t count, 1516 unsigned int *pvalue) 1517 { 1518 unsigned int value; 1519 int err; 1520 1521 if (!strcmp(buf, "max") || !strcmp(buf, "max\n")) { 1522 value = DQL_MAX_LIMIT; 1523 } else { 1524 err = kstrtouint(buf, 10, &value); 1525 if (err < 0) 1526 return err; 1527 if (value > DQL_MAX_LIMIT) 1528 return -EINVAL; 1529 } 1530 1531 *pvalue = value; 1532 1533 return count; 1534 } 1535 1536 static ssize_t bql_show_hold_time(struct kobject *kobj, struct attribute *attr, 1537 struct netdev_queue *queue, char *buf) 1538 { 1539 struct dql *dql = &queue->dql; 1540 1541 return sysfs_emit(buf, "%u\n", jiffies_to_msecs(dql->slack_hold_time)); 1542 } 1543 1544 static ssize_t bql_set_hold_time(struct kobject *kobj, struct attribute *attr, 1545 struct netdev_queue *queue, const char *buf, 1546 size_t len) 1547 { 1548 struct dql *dql = &queue->dql; 1549 unsigned int value; 1550 int err; 1551 1552 err = kstrtouint(buf, 10, &value); 1553 if (err < 0) 1554 return err; 1555 1556 dql->slack_hold_time = msecs_to_jiffies(value); 1557 1558 return len; 1559 } 1560 1561 static struct netdev_queue_attribute bql_hold_time_attribute __ro_after_init 1562 = __ATTR(hold_time, 0644, 1563 bql_show_hold_time, bql_set_hold_time); 1564 1565 static ssize_t bql_show_stall_thrs(struct kobject *kobj, struct attribute *attr, 1566 struct netdev_queue *queue, char *buf) 1567 { 1568 struct dql *dql = &queue->dql; 1569 1570 return sysfs_emit(buf, "%u\n", jiffies_to_msecs(dql->stall_thrs)); 1571 } 1572 1573 static ssize_t bql_set_stall_thrs(struct kobject *kobj, struct attribute *attr, 1574 struct netdev_queue *queue, const char *buf, 1575 size_t len) 1576 { 1577 struct dql *dql = &queue->dql; 1578 unsigned int value; 1579 int err; 1580 1581 err = kstrtouint(buf, 10, &value); 1582 if (err < 0) 1583 return err; 1584 1585 value = msecs_to_jiffies(value); 1586 if (value && (value < 4 || value > 4 / 2 * BITS_PER_LONG)) 1587 return -ERANGE; 1588 1589 if (!dql->stall_thrs && value) 1590 dql->last_reap = jiffies; 1591 /* Force last_reap to be live */ 1592 smp_wmb(); 1593 dql->stall_thrs = value; 1594 1595 return len; 1596 } 1597 1598 static struct netdev_queue_attribute bql_stall_thrs_attribute __ro_after_init = 1599 __ATTR(stall_thrs, 0644, bql_show_stall_thrs, bql_set_stall_thrs); 1600 1601 static ssize_t bql_show_stall_max(struct kobject *kobj, struct attribute *attr, 1602 struct netdev_queue *queue, char *buf) 1603 { 1604 return sysfs_emit(buf, "%u\n", READ_ONCE(queue->dql.stall_max)); 1605 } 1606 1607 static ssize_t bql_set_stall_max(struct kobject *kobj, struct attribute *attr, 1608 struct netdev_queue *queue, const char *buf, 1609 size_t len) 1610 { 1611 WRITE_ONCE(queue->dql.stall_max, 0); 1612 return len; 1613 } 1614 1615 static struct netdev_queue_attribute bql_stall_max_attribute __ro_after_init = 1616 __ATTR(stall_max, 0644, bql_show_stall_max, bql_set_stall_max); 1617 1618 static ssize_t bql_show_stall_cnt(struct kobject *kobj, struct attribute *attr, 1619 struct netdev_queue *queue, char *buf) 1620 { 1621 struct dql *dql = &queue->dql; 1622 1623 return sysfs_emit(buf, "%lu\n", dql->stall_cnt); 1624 } 1625 1626 static struct netdev_queue_attribute bql_stall_cnt_attribute __ro_after_init = 1627 __ATTR(stall_cnt, 0444, bql_show_stall_cnt, NULL); 1628 1629 static ssize_t bql_show_inflight(struct kobject *kobj, struct attribute *attr, 1630 struct netdev_queue *queue, char *buf) 1631 { 1632 struct dql *dql = &queue->dql; 1633 1634 return sysfs_emit(buf, "%u\n", dql->num_queued - dql->num_completed); 1635 } 1636 1637 static struct netdev_queue_attribute bql_inflight_attribute __ro_after_init = 1638 __ATTR(inflight, 0444, bql_show_inflight, NULL); 1639 1640 #define BQL_ATTR(NAME, FIELD) \ 1641 static ssize_t bql_show_ ## NAME(struct kobject *kobj, \ 1642 struct attribute *attr, \ 1643 struct netdev_queue *queue, char *buf) \ 1644 { \ 1645 return bql_show(buf, queue->dql.FIELD); \ 1646 } \ 1647 \ 1648 static ssize_t bql_set_ ## NAME(struct kobject *kobj, \ 1649 struct attribute *attr, \ 1650 struct netdev_queue *queue, \ 1651 const char *buf, size_t len) \ 1652 { \ 1653 return bql_set(buf, len, &queue->dql.FIELD); \ 1654 } \ 1655 \ 1656 static struct netdev_queue_attribute bql_ ## NAME ## _attribute __ro_after_init \ 1657 = __ATTR(NAME, 0644, \ 1658 bql_show_ ## NAME, bql_set_ ## NAME) 1659 1660 BQL_ATTR(limit, limit); 1661 BQL_ATTR(limit_max, max_limit); 1662 BQL_ATTR(limit_min, min_limit); 1663 1664 static struct attribute *dql_attrs[] __ro_after_init = { 1665 &bql_limit_attribute.attr, 1666 &bql_limit_max_attribute.attr, 1667 &bql_limit_min_attribute.attr, 1668 &bql_hold_time_attribute.attr, 1669 &bql_inflight_attribute.attr, 1670 &bql_stall_thrs_attribute.attr, 1671 &bql_stall_cnt_attribute.attr, 1672 &bql_stall_max_attribute.attr, 1673 NULL 1674 }; 1675 1676 static const struct attribute_group dql_group = { 1677 .name = "byte_queue_limits", 1678 .attrs = dql_attrs, 1679 }; 1680 #else 1681 /* Fake declaration, all the code using it should be dead */ 1682 static const struct attribute_group dql_group = {}; 1683 #endif /* CONFIG_BQL */ 1684 1685 #ifdef CONFIG_XPS 1686 static ssize_t xps_queue_show(struct net_device *dev, unsigned int index, 1687 int tc, char *buf, enum xps_map_type type) 1688 { 1689 struct xps_dev_maps *dev_maps; 1690 unsigned long *mask; 1691 unsigned int nr_ids; 1692 int j, len; 1693 1694 rcu_read_lock(); 1695 dev_maps = rcu_dereference(dev->xps_maps[type]); 1696 1697 /* Default to nr_cpu_ids/dev->num_rx_queues and do not just return 0 1698 * when dev_maps hasn't been allocated yet, to be backward compatible. 1699 */ 1700 nr_ids = dev_maps ? dev_maps->nr_ids : 1701 (type == XPS_CPUS ? nr_cpu_ids : dev->num_rx_queues); 1702 1703 mask = bitmap_zalloc(nr_ids, GFP_NOWAIT); 1704 if (!mask) { 1705 rcu_read_unlock(); 1706 return -ENOMEM; 1707 } 1708 1709 if (!dev_maps || tc >= dev_maps->num_tc) 1710 goto out_no_maps; 1711 1712 for (j = 0; j < nr_ids; j++) { 1713 int i, tci = j * dev_maps->num_tc + tc; 1714 struct xps_map *map; 1715 1716 map = rcu_dereference(dev_maps->attr_map[tci]); 1717 if (!map) 1718 continue; 1719 1720 for (i = map->len; i--;) { 1721 if (map->queues[i] == index) { 1722 __set_bit(j, mask); 1723 break; 1724 } 1725 } 1726 } 1727 out_no_maps: 1728 rcu_read_unlock(); 1729 1730 len = bitmap_print_to_pagebuf(false, buf, mask, nr_ids); 1731 bitmap_free(mask); 1732 1733 return len < PAGE_SIZE ? len : -EINVAL; 1734 } 1735 1736 static ssize_t xps_cpus_show(struct kobject *kobj, struct attribute *attr, 1737 struct netdev_queue *queue, char *buf) 1738 { 1739 struct net_device *dev = queue->dev; 1740 unsigned int index; 1741 int len, tc, ret; 1742 1743 if (!netif_is_multiqueue(dev)) 1744 return -ENOENT; 1745 1746 index = get_netdev_queue_index(queue); 1747 1748 ret = sysfs_rtnl_lock(kobj, attr, queue->dev); 1749 if (ret) 1750 return ret; 1751 1752 /* If queue belongs to subordinate dev use its map */ 1753 dev = netdev_get_tx_queue(dev, index)->sb_dev ? : dev; 1754 1755 tc = netdev_txq_to_tc(dev, index); 1756 if (tc < 0) { 1757 rtnl_unlock(); 1758 return -EINVAL; 1759 } 1760 1761 /* Increase the net device refcnt to make sure it won't be freed while 1762 * xps_queue_show is running. 1763 */ 1764 dev_hold(dev); 1765 rtnl_unlock(); 1766 1767 len = xps_queue_show(dev, index, tc, buf, XPS_CPUS); 1768 1769 dev_put(dev); 1770 return len; 1771 } 1772 1773 static ssize_t xps_cpus_store(struct kobject *kobj, struct attribute *attr, 1774 struct netdev_queue *queue, const char *buf, 1775 size_t len) 1776 { 1777 struct net_device *dev = queue->dev; 1778 unsigned int index; 1779 cpumask_var_t mask; 1780 int err; 1781 1782 if (!netif_is_multiqueue(dev)) 1783 return -ENOENT; 1784 1785 if (!capable(CAP_NET_ADMIN)) 1786 return -EPERM; 1787 1788 if (!alloc_cpumask_var(&mask, GFP_KERNEL)) 1789 return -ENOMEM; 1790 1791 index = get_netdev_queue_index(queue); 1792 1793 err = bitmap_parse(buf, len, cpumask_bits(mask), nr_cpumask_bits); 1794 if (err) { 1795 free_cpumask_var(mask); 1796 return err; 1797 } 1798 1799 err = sysfs_rtnl_lock(kobj, attr, dev); 1800 if (err) { 1801 free_cpumask_var(mask); 1802 return err; 1803 } 1804 1805 err = netif_set_xps_queue(dev, mask, index); 1806 rtnl_unlock(); 1807 1808 free_cpumask_var(mask); 1809 1810 return err ? : len; 1811 } 1812 1813 static struct netdev_queue_attribute xps_cpus_attribute __ro_after_init 1814 = __ATTR_RW(xps_cpus); 1815 1816 static ssize_t xps_rxqs_show(struct kobject *kobj, struct attribute *attr, 1817 struct netdev_queue *queue, char *buf) 1818 { 1819 struct net_device *dev = queue->dev; 1820 unsigned int index; 1821 int tc, ret; 1822 1823 index = get_netdev_queue_index(queue); 1824 1825 ret = sysfs_rtnl_lock(kobj, attr, dev); 1826 if (ret) 1827 return ret; 1828 1829 tc = netdev_txq_to_tc(dev, index); 1830 1831 /* Increase the net device refcnt to make sure it won't be freed while 1832 * xps_queue_show is running. 1833 */ 1834 dev_hold(dev); 1835 rtnl_unlock(); 1836 1837 ret = tc >= 0 ? xps_queue_show(dev, index, tc, buf, XPS_RXQS) : -EINVAL; 1838 dev_put(dev); 1839 return ret; 1840 } 1841 1842 static ssize_t xps_rxqs_store(struct kobject *kobj, struct attribute *attr, 1843 struct netdev_queue *queue, const char *buf, 1844 size_t len) 1845 { 1846 struct net_device *dev = queue->dev; 1847 struct net *net = dev_net(dev); 1848 unsigned long *mask; 1849 unsigned int index; 1850 int err; 1851 1852 if (!ns_capable(net->user_ns, CAP_NET_ADMIN)) 1853 return -EPERM; 1854 1855 mask = bitmap_zalloc(dev->num_rx_queues, GFP_KERNEL); 1856 if (!mask) 1857 return -ENOMEM; 1858 1859 index = get_netdev_queue_index(queue); 1860 1861 err = bitmap_parse(buf, len, mask, dev->num_rx_queues); 1862 if (err) { 1863 bitmap_free(mask); 1864 return err; 1865 } 1866 1867 err = sysfs_rtnl_lock(kobj, attr, dev); 1868 if (err) { 1869 bitmap_free(mask); 1870 return err; 1871 } 1872 1873 cpus_read_lock(); 1874 err = __netif_set_xps_queue(dev, mask, index, XPS_RXQS); 1875 cpus_read_unlock(); 1876 1877 rtnl_unlock(); 1878 1879 bitmap_free(mask); 1880 return err ? : len; 1881 } 1882 1883 static struct netdev_queue_attribute xps_rxqs_attribute __ro_after_init 1884 = __ATTR_RW(xps_rxqs); 1885 #endif /* CONFIG_XPS */ 1886 1887 static struct attribute *netdev_queue_default_attrs[] __ro_after_init = { 1888 &queue_trans_timeout.attr, 1889 &queue_traffic_class.attr, 1890 #ifdef CONFIG_XPS 1891 &xps_cpus_attribute.attr, 1892 &xps_rxqs_attribute.attr, 1893 &queue_tx_maxrate.attr, 1894 #endif 1895 NULL 1896 }; 1897 ATTRIBUTE_GROUPS(netdev_queue_default); 1898 1899 static void netdev_queue_release(struct kobject *kobj) 1900 { 1901 struct netdev_queue *queue = to_netdev_queue(kobj); 1902 1903 memset(kobj, 0, sizeof(*kobj)); 1904 netdev_put(queue->dev, &queue->dev_tracker); 1905 } 1906 1907 static const void *netdev_queue_namespace(const struct kobject *kobj) 1908 { 1909 struct netdev_queue *queue = to_netdev_queue(kobj); 1910 struct device *dev = &queue->dev->dev; 1911 const void *ns = NULL; 1912 1913 if (dev->class && dev->class->namespace) 1914 ns = dev->class->namespace(dev); 1915 1916 return ns; 1917 } 1918 1919 static void netdev_queue_get_ownership(const struct kobject *kobj, 1920 kuid_t *uid, kgid_t *gid) 1921 { 1922 const struct net *net = netdev_queue_namespace(kobj); 1923 1924 net_ns_get_ownership(net, uid, gid); 1925 } 1926 1927 static const struct kobj_type netdev_queue_ktype = { 1928 .sysfs_ops = &netdev_queue_sysfs_ops, 1929 .release = netdev_queue_release, 1930 .namespace = netdev_queue_namespace, 1931 .get_ownership = netdev_queue_get_ownership, 1932 }; 1933 1934 static bool netdev_uses_bql(const struct net_device *dev) 1935 { 1936 if (dev->lltx || (dev->priv_flags & IFF_NO_QUEUE)) 1937 return false; 1938 1939 return IS_ENABLED(CONFIG_BQL); 1940 } 1941 1942 static int netdev_queue_add_kobject(struct net_device *dev, int index) 1943 { 1944 struct netdev_queue *queue = dev->_tx + index; 1945 struct kobject *kobj = &queue->kobj; 1946 int error = 0; 1947 1948 /* Tx queues are cleared in netdev_queue_release to allow later 1949 * re-registration. This is triggered when their kobj refcount is 1950 * dropped. 1951 * 1952 * If a queue is removed while both a read (or write) operation and a 1953 * the re-addition of the same queue are pending (waiting on rntl_lock) 1954 * it might happen that the re-addition will execute before the read, 1955 * making the initial removal to never happen (queue's kobj refcount 1956 * won't drop enough because of the pending read). In such rare case, 1957 * return to allow the removal operation to complete. 1958 */ 1959 if (unlikely(kobj->state_initialized)) { 1960 netdev_warn_once(dev, "Cannot re-add tx queues before their removal completed"); 1961 return -EAGAIN; 1962 } 1963 1964 /* Kobject_put later will trigger netdev_queue_release call 1965 * which decreases dev refcount: Take that reference here 1966 */ 1967 netdev_hold(queue->dev, &queue->dev_tracker, GFP_KERNEL); 1968 1969 kobj->kset = dev->queues_kset; 1970 error = kobject_init_and_add(kobj, &netdev_queue_ktype, NULL, 1971 "tx-%u", index); 1972 if (error) 1973 goto err; 1974 1975 queue->groups = netdev_queue_default_groups; 1976 error = sysfs_create_groups(kobj, queue->groups); 1977 if (error) 1978 goto err; 1979 1980 if (netdev_uses_bql(dev)) { 1981 error = sysfs_create_group(kobj, &dql_group); 1982 if (error) 1983 goto err_default_groups; 1984 } 1985 1986 kobject_uevent(kobj, KOBJ_ADD); 1987 return 0; 1988 1989 err_default_groups: 1990 sysfs_remove_groups(kobj, queue->groups); 1991 err: 1992 kobject_put(kobj); 1993 return error; 1994 } 1995 1996 static int tx_queue_change_owner(struct net_device *ndev, int index, 1997 kuid_t kuid, kgid_t kgid) 1998 { 1999 struct netdev_queue *queue = ndev->_tx + index; 2000 struct kobject *kobj = &queue->kobj; 2001 int error; 2002 2003 error = sysfs_change_owner(kobj, kuid, kgid); 2004 if (error) 2005 return error; 2006 2007 if (netdev_uses_bql(ndev)) 2008 error = sysfs_group_change_owner(kobj, &dql_group, kuid, kgid); 2009 2010 return error; 2011 } 2012 #endif /* CONFIG_SYSFS */ 2013 2014 int 2015 netdev_queue_update_kobjects(struct net_device *dev, int old_num, int new_num) 2016 { 2017 #ifdef CONFIG_SYSFS 2018 int i; 2019 int error = 0; 2020 2021 /* Tx queue kobjects are allowed to be updated when a device is being 2022 * unregistered, but solely to remove queues from qdiscs. Any path 2023 * adding queues should be fixed. 2024 */ 2025 WARN(dev->reg_state == NETREG_UNREGISTERING && new_num > old_num, 2026 "New queues can't be registered after device unregistration."); 2027 2028 for (i = old_num; i < new_num; i++) { 2029 error = netdev_queue_add_kobject(dev, i); 2030 if (error) { 2031 new_num = old_num; 2032 break; 2033 } 2034 } 2035 2036 while (--i >= new_num) { 2037 struct netdev_queue *queue = dev->_tx + i; 2038 2039 if (!refcount_read(&dev_net(dev)->ns.count)) 2040 queue->kobj.uevent_suppress = 1; 2041 2042 if (netdev_uses_bql(dev)) 2043 sysfs_remove_group(&queue->kobj, &dql_group); 2044 2045 sysfs_remove_groups(&queue->kobj, queue->groups); 2046 kobject_put(&queue->kobj); 2047 } 2048 2049 return error; 2050 #else 2051 return 0; 2052 #endif /* CONFIG_SYSFS */ 2053 } 2054 2055 static int net_tx_queue_change_owner(struct net_device *dev, int num, 2056 kuid_t kuid, kgid_t kgid) 2057 { 2058 #ifdef CONFIG_SYSFS 2059 int error = 0; 2060 int i; 2061 2062 for (i = 0; i < num; i++) { 2063 error = tx_queue_change_owner(dev, i, kuid, kgid); 2064 if (error) 2065 break; 2066 } 2067 2068 return error; 2069 #else 2070 return 0; 2071 #endif /* CONFIG_SYSFS */ 2072 } 2073 2074 static int register_queue_kobjects(struct net_device *dev) 2075 { 2076 int error = 0, txq = 0, rxq = 0, real_rx = 0, real_tx = 0; 2077 2078 #ifdef CONFIG_SYSFS 2079 dev->queues_kset = kset_create_and_add("queues", 2080 NULL, &dev->dev.kobj); 2081 if (!dev->queues_kset) 2082 return -ENOMEM; 2083 real_rx = dev->real_num_rx_queues; 2084 #endif 2085 real_tx = dev->real_num_tx_queues; 2086 2087 error = net_rx_queue_update_kobjects(dev, 0, real_rx); 2088 if (error) 2089 goto error; 2090 rxq = real_rx; 2091 2092 error = netdev_queue_update_kobjects(dev, 0, real_tx); 2093 if (error) 2094 goto error; 2095 txq = real_tx; 2096 2097 return 0; 2098 2099 error: 2100 netdev_queue_update_kobjects(dev, txq, 0); 2101 net_rx_queue_update_kobjects(dev, rxq, 0); 2102 #ifdef CONFIG_SYSFS 2103 kset_unregister(dev->queues_kset); 2104 #endif 2105 return error; 2106 } 2107 2108 static int queue_change_owner(struct net_device *ndev, kuid_t kuid, kgid_t kgid) 2109 { 2110 int error = 0, real_rx = 0, real_tx = 0; 2111 2112 #ifdef CONFIG_SYSFS 2113 if (ndev->queues_kset) { 2114 error = sysfs_change_owner(&ndev->queues_kset->kobj, kuid, kgid); 2115 if (error) 2116 return error; 2117 } 2118 real_rx = ndev->real_num_rx_queues; 2119 #endif 2120 real_tx = ndev->real_num_tx_queues; 2121 2122 error = net_rx_queue_change_owner(ndev, real_rx, kuid, kgid); 2123 if (error) 2124 return error; 2125 2126 error = net_tx_queue_change_owner(ndev, real_tx, kuid, kgid); 2127 if (error) 2128 return error; 2129 2130 return 0; 2131 } 2132 2133 static void remove_queue_kobjects(struct net_device *dev) 2134 { 2135 int real_rx = 0, real_tx = 0; 2136 2137 #ifdef CONFIG_SYSFS 2138 real_rx = dev->real_num_rx_queues; 2139 #endif 2140 real_tx = dev->real_num_tx_queues; 2141 2142 net_rx_queue_update_kobjects(dev, real_rx, 0); 2143 netdev_queue_update_kobjects(dev, real_tx, 0); 2144 2145 dev->real_num_rx_queues = 0; 2146 dev->real_num_tx_queues = 0; 2147 #ifdef CONFIG_SYSFS 2148 kset_unregister(dev->queues_kset); 2149 #endif 2150 } 2151 2152 static bool net_current_may_mount(void) 2153 { 2154 struct net *net = current->nsproxy->net_ns; 2155 2156 return ns_capable(net->user_ns, CAP_SYS_ADMIN); 2157 } 2158 2159 static void *net_grab_current_ns(void) 2160 { 2161 struct net *ns = current->nsproxy->net_ns; 2162 #ifdef CONFIG_NET_NS 2163 if (ns) 2164 refcount_inc(&ns->passive); 2165 #endif 2166 return ns; 2167 } 2168 2169 static const void *net_initial_ns(void) 2170 { 2171 return &init_net; 2172 } 2173 2174 static const void *net_netlink_ns(struct sock *sk) 2175 { 2176 return sock_net(sk); 2177 } 2178 2179 const struct kobj_ns_type_operations net_ns_type_operations = { 2180 .type = KOBJ_NS_TYPE_NET, 2181 .current_may_mount = net_current_may_mount, 2182 .grab_current_ns = net_grab_current_ns, 2183 .netlink_ns = net_netlink_ns, 2184 .initial_ns = net_initial_ns, 2185 .drop_ns = net_drop_ns, 2186 }; 2187 EXPORT_SYMBOL_GPL(net_ns_type_operations); 2188 2189 static int netdev_uevent(const struct device *d, struct kobj_uevent_env *env) 2190 { 2191 const struct net_device *dev = to_net_dev(d); 2192 int retval; 2193 2194 /* pass interface to uevent. */ 2195 retval = add_uevent_var(env, "INTERFACE=%s", dev->name); 2196 if (retval) 2197 goto exit; 2198 2199 /* pass ifindex to uevent. 2200 * ifindex is useful as it won't change (interface name may change) 2201 * and is what RtNetlink uses natively. 2202 */ 2203 retval = add_uevent_var(env, "IFINDEX=%d", dev->ifindex); 2204 2205 exit: 2206 return retval; 2207 } 2208 2209 /* 2210 * netdev_release -- destroy and free a dead device. 2211 * Called when last reference to device kobject is gone. 2212 */ 2213 static void netdev_release(struct device *d) 2214 { 2215 struct net_device *dev = to_net_dev(d); 2216 2217 BUG_ON(dev->reg_state != NETREG_RELEASED); 2218 2219 /* no need to wait for rcu grace period: 2220 * device is dead and about to be freed. 2221 */ 2222 kfree(rcu_access_pointer(dev->ifalias)); 2223 kvfree(dev); 2224 } 2225 2226 static const void *net_namespace(const struct device *d) 2227 { 2228 const struct net_device *dev = to_net_dev(d); 2229 2230 return dev_net(dev); 2231 } 2232 2233 static void net_get_ownership(const struct device *d, kuid_t *uid, kgid_t *gid) 2234 { 2235 const struct net_device *dev = to_net_dev(d); 2236 const struct net *net = dev_net(dev); 2237 2238 net_ns_get_ownership(net, uid, gid); 2239 } 2240 2241 static const struct class net_class = { 2242 .name = "net", 2243 .dev_release = netdev_release, 2244 .dev_groups = net_class_groups, 2245 .dev_uevent = netdev_uevent, 2246 .ns_type = &net_ns_type_operations, 2247 .namespace = net_namespace, 2248 .get_ownership = net_get_ownership, 2249 }; 2250 2251 #ifdef CONFIG_OF 2252 static int of_dev_node_match(struct device *dev, const void *data) 2253 { 2254 for (; dev; dev = dev->parent) { 2255 if (dev->of_node == data) 2256 return 1; 2257 } 2258 2259 return 0; 2260 } 2261 2262 /* 2263 * of_find_net_device_by_node - lookup the net device for the device node 2264 * @np: OF device node 2265 * 2266 * Looks up the net_device structure corresponding with the device node. 2267 * If successful, returns a pointer to the net_device with the embedded 2268 * struct device refcount incremented by one, or NULL on failure. The 2269 * refcount must be dropped when done with the net_device. 2270 */ 2271 struct net_device *of_find_net_device_by_node(struct device_node *np) 2272 { 2273 struct device *dev; 2274 2275 dev = class_find_device(&net_class, NULL, np, of_dev_node_match); 2276 if (!dev) 2277 return NULL; 2278 2279 return to_net_dev(dev); 2280 } 2281 EXPORT_SYMBOL(of_find_net_device_by_node); 2282 #endif 2283 2284 /* Delete sysfs entries but hold kobject reference until after all 2285 * netdev references are gone. 2286 */ 2287 void netdev_unregister_kobject(struct net_device *ndev) 2288 { 2289 struct device *dev = &ndev->dev; 2290 2291 if (!refcount_read(&dev_net(ndev)->ns.count)) 2292 dev_set_uevent_suppress(dev, 1); 2293 2294 kobject_get(&dev->kobj); 2295 2296 remove_queue_kobjects(ndev); 2297 2298 pm_runtime_set_memalloc_noio(dev, false); 2299 2300 device_del(dev); 2301 } 2302 2303 /* Create sysfs entries for network device. */ 2304 int netdev_register_kobject(struct net_device *ndev) 2305 { 2306 struct device *dev = &ndev->dev; 2307 const struct attribute_group **groups = ndev->sysfs_groups; 2308 int error = 0; 2309 2310 device_initialize(dev); 2311 dev->class = &net_class; 2312 dev->platform_data = ndev; 2313 dev->groups = groups; 2314 2315 dev_set_name(dev, "%s", ndev->name); 2316 2317 #ifdef CONFIG_SYSFS 2318 /* Allow for a device specific group */ 2319 if (*groups) 2320 groups++; 2321 2322 *groups++ = &netstat_group; 2323 2324 if (wireless_group_needed(ndev)) 2325 *groups++ = &wireless_group; 2326 #endif /* CONFIG_SYSFS */ 2327 2328 error = device_add(dev); 2329 if (error) 2330 return error; 2331 2332 error = register_queue_kobjects(ndev); 2333 if (error) { 2334 device_del(dev); 2335 return error; 2336 } 2337 2338 pm_runtime_set_memalloc_noio(dev, true); 2339 2340 return error; 2341 } 2342 2343 /* Change owner for sysfs entries when moving network devices across network 2344 * namespaces owned by different user namespaces. 2345 */ 2346 int netdev_change_owner(struct net_device *ndev, const struct net *net_old, 2347 const struct net *net_new) 2348 { 2349 kuid_t old_uid = GLOBAL_ROOT_UID, new_uid = GLOBAL_ROOT_UID; 2350 kgid_t old_gid = GLOBAL_ROOT_GID, new_gid = GLOBAL_ROOT_GID; 2351 struct device *dev = &ndev->dev; 2352 int error; 2353 2354 net_ns_get_ownership(net_old, &old_uid, &old_gid); 2355 net_ns_get_ownership(net_new, &new_uid, &new_gid); 2356 2357 /* The network namespace was changed but the owning user namespace is 2358 * identical so there's no need to change the owner of sysfs entries. 2359 */ 2360 if (uid_eq(old_uid, new_uid) && gid_eq(old_gid, new_gid)) 2361 return 0; 2362 2363 error = device_change_owner(dev, new_uid, new_gid); 2364 if (error) 2365 return error; 2366 2367 error = queue_change_owner(ndev, new_uid, new_gid); 2368 if (error) 2369 return error; 2370 2371 return 0; 2372 } 2373 2374 int netdev_class_create_file_ns(const struct class_attribute *class_attr, 2375 const void *ns) 2376 { 2377 return class_create_file_ns(&net_class, class_attr, ns); 2378 } 2379 EXPORT_SYMBOL(netdev_class_create_file_ns); 2380 2381 void netdev_class_remove_file_ns(const struct class_attribute *class_attr, 2382 const void *ns) 2383 { 2384 class_remove_file_ns(&net_class, class_attr, ns); 2385 } 2386 EXPORT_SYMBOL(netdev_class_remove_file_ns); 2387 2388 int __init netdev_kobject_init(void) 2389 { 2390 kobj_ns_type_register(&net_ns_type_operations); 2391 return class_register(&net_class); 2392 } 2393