1 // SPDX-License-Identifier: GPL-2.0-only 2 3 #include <linux/netdevice.h> 4 #include <linux/notifier.h> 5 #include <linux/rtnetlink.h> 6 #include <net/busy_poll.h> 7 #include <net/net_namespace.h> 8 #include <net/netdev_queues.h> 9 #include <net/netdev_rx_queue.h> 10 #include <net/sock.h> 11 #include <net/xdp.h> 12 #include <net/xdp_sock.h> 13 #include <net/page_pool/memory_provider.h> 14 15 #include "dev.h" 16 #include "devmem.h" 17 #include "netdev-genl-gen.h" 18 19 struct netdev_nl_dump_ctx { 20 unsigned long ifindex; 21 unsigned int rxq_idx; 22 unsigned int txq_idx; 23 unsigned int napi_id; 24 }; 25 26 static struct netdev_nl_dump_ctx *netdev_dump_ctx(struct netlink_callback *cb) 27 { 28 NL_ASSERT_CTX_FITS(struct netdev_nl_dump_ctx); 29 30 return (struct netdev_nl_dump_ctx *)cb->ctx; 31 } 32 33 static int 34 netdev_nl_dev_fill(struct net_device *netdev, struct sk_buff *rsp, 35 const struct genl_info *info) 36 { 37 u64 xsk_features = 0; 38 u64 xdp_rx_meta = 0; 39 void *hdr; 40 41 netdev_assert_locked(netdev); /* note: rtnl_lock may not be held! */ 42 43 hdr = genlmsg_iput(rsp, info); 44 if (!hdr) 45 return -EMSGSIZE; 46 47 #define XDP_METADATA_KFUNC(_, flag, __, xmo) \ 48 if (netdev->xdp_metadata_ops && netdev->xdp_metadata_ops->xmo) \ 49 xdp_rx_meta |= flag; 50 XDP_METADATA_KFUNC_xxx 51 #undef XDP_METADATA_KFUNC 52 53 if (netdev->xsk_tx_metadata_ops) { 54 if (netdev->xsk_tx_metadata_ops->tmo_fill_timestamp) 55 xsk_features |= NETDEV_XSK_FLAGS_TX_TIMESTAMP; 56 if (netdev->xsk_tx_metadata_ops->tmo_request_checksum) 57 xsk_features |= NETDEV_XSK_FLAGS_TX_CHECKSUM; 58 if (netdev->xsk_tx_metadata_ops->tmo_request_launch_time) 59 xsk_features |= NETDEV_XSK_FLAGS_TX_LAUNCH_TIME_FIFO; 60 } 61 62 if (nla_put_u32(rsp, NETDEV_A_DEV_IFINDEX, netdev->ifindex) || 63 nla_put_u64_64bit(rsp, NETDEV_A_DEV_XDP_FEATURES, 64 netdev->xdp_features, NETDEV_A_DEV_PAD) || 65 nla_put_u64_64bit(rsp, NETDEV_A_DEV_XDP_RX_METADATA_FEATURES, 66 xdp_rx_meta, NETDEV_A_DEV_PAD) || 67 nla_put_u64_64bit(rsp, NETDEV_A_DEV_XSK_FEATURES, 68 xsk_features, NETDEV_A_DEV_PAD)) 69 goto err_cancel_msg; 70 71 if (netdev->xdp_features & NETDEV_XDP_ACT_XSK_ZEROCOPY) { 72 if (nla_put_u32(rsp, NETDEV_A_DEV_XDP_ZC_MAX_SEGS, 73 netdev->xdp_zc_max_segs)) 74 goto err_cancel_msg; 75 } 76 77 genlmsg_end(rsp, hdr); 78 79 return 0; 80 81 err_cancel_msg: 82 genlmsg_cancel(rsp, hdr); 83 return -EMSGSIZE; 84 } 85 86 static void 87 netdev_genl_dev_notify(struct net_device *netdev, int cmd) 88 { 89 struct genl_info info; 90 struct sk_buff *ntf; 91 92 if (!genl_has_listeners(&netdev_nl_family, dev_net(netdev), 93 NETDEV_NLGRP_MGMT)) 94 return; 95 96 genl_info_init_ntf(&info, &netdev_nl_family, cmd); 97 98 ntf = genlmsg_new(GENLMSG_DEFAULT_SIZE, GFP_KERNEL); 99 if (!ntf) 100 return; 101 102 if (netdev_nl_dev_fill(netdev, ntf, &info)) { 103 nlmsg_free(ntf); 104 return; 105 } 106 107 genlmsg_multicast_netns(&netdev_nl_family, dev_net(netdev), ntf, 108 0, NETDEV_NLGRP_MGMT, GFP_KERNEL); 109 } 110 111 int netdev_nl_dev_get_doit(struct sk_buff *skb, struct genl_info *info) 112 { 113 struct net_device *netdev; 114 struct sk_buff *rsp; 115 u32 ifindex; 116 int err; 117 118 if (GENL_REQ_ATTR_CHECK(info, NETDEV_A_DEV_IFINDEX)) 119 return -EINVAL; 120 121 ifindex = nla_get_u32(info->attrs[NETDEV_A_DEV_IFINDEX]); 122 123 rsp = genlmsg_new(GENLMSG_DEFAULT_SIZE, GFP_KERNEL); 124 if (!rsp) 125 return -ENOMEM; 126 127 netdev = netdev_get_by_index_lock(genl_info_net(info), ifindex); 128 if (!netdev) { 129 err = -ENODEV; 130 goto err_free_msg; 131 } 132 133 err = netdev_nl_dev_fill(netdev, rsp, info); 134 netdev_unlock(netdev); 135 136 if (err) 137 goto err_free_msg; 138 139 return genlmsg_reply(rsp, info); 140 141 err_free_msg: 142 nlmsg_free(rsp); 143 return err; 144 } 145 146 int netdev_nl_dev_get_dumpit(struct sk_buff *skb, struct netlink_callback *cb) 147 { 148 struct netdev_nl_dump_ctx *ctx = netdev_dump_ctx(cb); 149 struct net *net = sock_net(skb->sk); 150 int err; 151 152 for_each_netdev_lock_scoped(net, netdev, ctx->ifindex) { 153 err = netdev_nl_dev_fill(netdev, skb, genl_info_dump(cb)); 154 if (err < 0) 155 return err; 156 } 157 158 return 0; 159 } 160 161 static int 162 netdev_nl_napi_fill_one(struct sk_buff *rsp, struct napi_struct *napi, 163 const struct genl_info *info) 164 { 165 unsigned long irq_suspend_timeout; 166 unsigned long gro_flush_timeout; 167 u32 napi_defer_hard_irqs; 168 void *hdr; 169 pid_t pid; 170 171 if (!napi->dev->up) 172 return 0; 173 174 hdr = genlmsg_iput(rsp, info); 175 if (!hdr) 176 return -EMSGSIZE; 177 178 if (nla_put_u32(rsp, NETDEV_A_NAPI_ID, napi->napi_id)) 179 goto nla_put_failure; 180 181 if (nla_put_u32(rsp, NETDEV_A_NAPI_IFINDEX, napi->dev->ifindex)) 182 goto nla_put_failure; 183 184 if (napi->irq >= 0 && nla_put_u32(rsp, NETDEV_A_NAPI_IRQ, napi->irq)) 185 goto nla_put_failure; 186 187 if (nla_put_uint(rsp, NETDEV_A_NAPI_THREADED, 188 napi_get_threaded(napi))) 189 goto nla_put_failure; 190 191 if (napi->thread) { 192 pid = task_pid_nr(napi->thread); 193 if (nla_put_u32(rsp, NETDEV_A_NAPI_PID, pid)) 194 goto nla_put_failure; 195 } 196 197 napi_defer_hard_irqs = napi_get_defer_hard_irqs(napi); 198 if (nla_put_s32(rsp, NETDEV_A_NAPI_DEFER_HARD_IRQS, 199 napi_defer_hard_irqs)) 200 goto nla_put_failure; 201 202 irq_suspend_timeout = napi_get_irq_suspend_timeout(napi); 203 if (nla_put_uint(rsp, NETDEV_A_NAPI_IRQ_SUSPEND_TIMEOUT, 204 irq_suspend_timeout)) 205 goto nla_put_failure; 206 207 gro_flush_timeout = napi_get_gro_flush_timeout(napi); 208 if (nla_put_uint(rsp, NETDEV_A_NAPI_GRO_FLUSH_TIMEOUT, 209 gro_flush_timeout)) 210 goto nla_put_failure; 211 212 genlmsg_end(rsp, hdr); 213 214 return 0; 215 216 nla_put_failure: 217 genlmsg_cancel(rsp, hdr); 218 return -EMSGSIZE; 219 } 220 221 int netdev_nl_napi_get_doit(struct sk_buff *skb, struct genl_info *info) 222 { 223 struct napi_struct *napi; 224 struct sk_buff *rsp; 225 u32 napi_id; 226 int err; 227 228 if (GENL_REQ_ATTR_CHECK(info, NETDEV_A_NAPI_ID)) 229 return -EINVAL; 230 231 napi_id = nla_get_u32(info->attrs[NETDEV_A_NAPI_ID]); 232 233 rsp = genlmsg_new(GENLMSG_DEFAULT_SIZE, GFP_KERNEL); 234 if (!rsp) 235 return -ENOMEM; 236 237 napi = netdev_napi_by_id_lock(genl_info_net(info), napi_id); 238 if (napi) { 239 err = netdev_nl_napi_fill_one(rsp, napi, info); 240 netdev_unlock(napi->dev); 241 } else { 242 NL_SET_BAD_ATTR(info->extack, info->attrs[NETDEV_A_NAPI_ID]); 243 err = -ENOENT; 244 } 245 246 if (err) { 247 goto err_free_msg; 248 } else if (!rsp->len) { 249 err = -ENOENT; 250 goto err_free_msg; 251 } 252 253 return genlmsg_reply(rsp, info); 254 255 err_free_msg: 256 nlmsg_free(rsp); 257 return err; 258 } 259 260 static int 261 netdev_nl_napi_dump_one(struct net_device *netdev, struct sk_buff *rsp, 262 const struct genl_info *info, 263 struct netdev_nl_dump_ctx *ctx) 264 { 265 struct napi_struct *napi; 266 unsigned int prev_id; 267 int err = 0; 268 269 if (!netdev->up) 270 return err; 271 272 prev_id = UINT_MAX; 273 list_for_each_entry(napi, &netdev->napi_list, dev_list) { 274 if (!napi_id_valid(napi->napi_id)) 275 continue; 276 277 /* Dump continuation below depends on the list being sorted */ 278 WARN_ON_ONCE(napi->napi_id >= prev_id); 279 prev_id = napi->napi_id; 280 281 if (ctx->napi_id && napi->napi_id >= ctx->napi_id) 282 continue; 283 284 err = netdev_nl_napi_fill_one(rsp, napi, info); 285 if (err) 286 return err; 287 ctx->napi_id = napi->napi_id; 288 } 289 return err; 290 } 291 292 int netdev_nl_napi_get_dumpit(struct sk_buff *skb, struct netlink_callback *cb) 293 { 294 struct netdev_nl_dump_ctx *ctx = netdev_dump_ctx(cb); 295 const struct genl_info *info = genl_info_dump(cb); 296 struct net *net = sock_net(skb->sk); 297 struct net_device *netdev; 298 u32 ifindex = 0; 299 int err = 0; 300 301 if (info->attrs[NETDEV_A_NAPI_IFINDEX]) 302 ifindex = nla_get_u32(info->attrs[NETDEV_A_NAPI_IFINDEX]); 303 304 if (ifindex) { 305 netdev = netdev_get_by_index_lock(net, ifindex); 306 if (netdev) { 307 err = netdev_nl_napi_dump_one(netdev, skb, info, ctx); 308 netdev_unlock(netdev); 309 } else { 310 err = -ENODEV; 311 } 312 } else { 313 for_each_netdev_lock_scoped(net, netdev, ctx->ifindex) { 314 err = netdev_nl_napi_dump_one(netdev, skb, info, ctx); 315 if (err < 0) 316 break; 317 ctx->napi_id = 0; 318 } 319 } 320 321 return err; 322 } 323 324 static int 325 netdev_nl_napi_set_config(struct napi_struct *napi, struct genl_info *info) 326 { 327 u64 irq_suspend_timeout = 0; 328 u64 gro_flush_timeout = 0; 329 u8 threaded = 0; 330 u32 defer = 0; 331 332 if (info->attrs[NETDEV_A_NAPI_THREADED]) { 333 int ret; 334 335 threaded = nla_get_uint(info->attrs[NETDEV_A_NAPI_THREADED]); 336 ret = napi_set_threaded(napi, threaded); 337 if (ret) 338 return ret; 339 } 340 341 if (info->attrs[NETDEV_A_NAPI_DEFER_HARD_IRQS]) { 342 defer = nla_get_u32(info->attrs[NETDEV_A_NAPI_DEFER_HARD_IRQS]); 343 napi_set_defer_hard_irqs(napi, defer); 344 } 345 346 if (info->attrs[NETDEV_A_NAPI_IRQ_SUSPEND_TIMEOUT]) { 347 irq_suspend_timeout = nla_get_uint(info->attrs[NETDEV_A_NAPI_IRQ_SUSPEND_TIMEOUT]); 348 napi_set_irq_suspend_timeout(napi, irq_suspend_timeout); 349 } 350 351 if (info->attrs[NETDEV_A_NAPI_GRO_FLUSH_TIMEOUT]) { 352 gro_flush_timeout = nla_get_uint(info->attrs[NETDEV_A_NAPI_GRO_FLUSH_TIMEOUT]); 353 napi_set_gro_flush_timeout(napi, gro_flush_timeout); 354 } 355 356 return 0; 357 } 358 359 int netdev_nl_napi_set_doit(struct sk_buff *skb, struct genl_info *info) 360 { 361 struct napi_struct *napi; 362 unsigned int napi_id; 363 int err; 364 365 if (GENL_REQ_ATTR_CHECK(info, NETDEV_A_NAPI_ID)) 366 return -EINVAL; 367 368 napi_id = nla_get_u32(info->attrs[NETDEV_A_NAPI_ID]); 369 370 napi = netdev_napi_by_id_lock(genl_info_net(info), napi_id); 371 if (napi) { 372 err = netdev_nl_napi_set_config(napi, info); 373 netdev_unlock(napi->dev); 374 } else { 375 NL_SET_BAD_ATTR(info->extack, info->attrs[NETDEV_A_NAPI_ID]); 376 err = -ENOENT; 377 } 378 379 return err; 380 } 381 382 static int nla_put_napi_id(struct sk_buff *skb, const struct napi_struct *napi) 383 { 384 if (napi && napi_id_valid(napi->napi_id)) 385 return nla_put_u32(skb, NETDEV_A_QUEUE_NAPI_ID, napi->napi_id); 386 return 0; 387 } 388 389 static int 390 netdev_nl_queue_fill_lease(struct sk_buff *rsp, struct net_device *netdev, 391 u32 q_idx, u32 q_type) 392 { 393 struct net_device *orig_netdev = netdev; 394 struct nlattr *nest_lease, *nest_queue; 395 struct netdev_rx_queue *rxq; 396 struct net *net, *peer_net; 397 398 rxq = __netif_get_rx_queue_lease(&netdev, &q_idx, NETIF_PHYS_TO_VIRT); 399 if (!rxq || orig_netdev == netdev) 400 return 0; 401 402 nest_lease = nla_nest_start(rsp, NETDEV_A_QUEUE_LEASE); 403 if (!nest_lease) 404 goto nla_put_failure; 405 406 nest_queue = nla_nest_start(rsp, NETDEV_A_LEASE_QUEUE); 407 if (!nest_queue) 408 goto nla_put_failure; 409 if (nla_put_u32(rsp, NETDEV_A_QUEUE_ID, q_idx)) 410 goto nla_put_failure; 411 if (nla_put_u32(rsp, NETDEV_A_QUEUE_TYPE, q_type)) 412 goto nla_put_failure; 413 nla_nest_end(rsp, nest_queue); 414 415 if (nla_put_u32(rsp, NETDEV_A_LEASE_IFINDEX, 416 READ_ONCE(netdev->ifindex))) 417 goto nla_put_failure; 418 419 rcu_read_lock(); 420 peer_net = dev_net_rcu(netdev); 421 net = dev_net_rcu(orig_netdev); 422 if (!net_eq(net, peer_net)) { 423 s32 id = peernet2id_alloc(net, peer_net, GFP_ATOMIC); 424 425 if (nla_put_s32(rsp, NETDEV_A_LEASE_NETNS_ID, id)) 426 goto nla_put_failure_unlock; 427 } 428 rcu_read_unlock(); 429 nla_nest_end(rsp, nest_lease); 430 return 0; 431 432 nla_put_failure_unlock: 433 rcu_read_unlock(); 434 nla_put_failure: 435 return -ENOMEM; 436 } 437 438 static int 439 __netdev_nl_queue_fill_mp(struct sk_buff *rsp, struct netdev_rx_queue *rxq) 440 { 441 struct pp_memory_provider_params *params = &rxq->mp_params; 442 443 if (params->mp_ops && 444 params->mp_ops->nl_fill(params->mp_priv, rsp, rxq)) 445 return -EMSGSIZE; 446 447 #ifdef CONFIG_XDP_SOCKETS 448 if (rxq->pool) 449 if (nla_put_empty_nest(rsp, NETDEV_A_QUEUE_XSK)) 450 return -EMSGSIZE; 451 #endif 452 return 0; 453 } 454 455 static int 456 netdev_nl_queue_fill_mp(struct sk_buff *rsp, struct net_device *netdev, 457 struct netdev_rx_queue *rxq) 458 { 459 struct netdev_rx_queue *hw_rxq; 460 int ret; 461 462 hw_rxq = rxq->lease; 463 if (!hw_rxq || !netif_is_queue_leasee(netdev)) 464 return __netdev_nl_queue_fill_mp(rsp, rxq); 465 466 netdev_lock(hw_rxq->dev); 467 ret = __netdev_nl_queue_fill_mp(rsp, hw_rxq); 468 netdev_unlock(hw_rxq->dev); 469 return ret; 470 } 471 472 static int 473 netdev_nl_queue_fill_one(struct sk_buff *rsp, struct net_device *netdev, 474 u32 q_idx, u32 q_type, const struct genl_info *info) 475 { 476 struct netdev_rx_queue *rxq; 477 struct netdev_queue *txq; 478 void *hdr; 479 480 hdr = genlmsg_iput(rsp, info); 481 if (!hdr) 482 return -EMSGSIZE; 483 484 if (nla_put_u32(rsp, NETDEV_A_QUEUE_ID, q_idx) || 485 nla_put_u32(rsp, NETDEV_A_QUEUE_TYPE, q_type) || 486 nla_put_u32(rsp, NETDEV_A_QUEUE_IFINDEX, netdev->ifindex)) 487 goto nla_put_failure; 488 489 switch (q_type) { 490 case NETDEV_QUEUE_TYPE_RX: 491 rxq = __netif_get_rx_queue(netdev, q_idx); 492 if (nla_put_napi_id(rsp, rxq->napi)) 493 goto nla_put_failure; 494 if (netdev_nl_queue_fill_lease(rsp, netdev, q_idx, q_type)) 495 goto nla_put_failure; 496 if (netdev_nl_queue_fill_mp(rsp, netdev, rxq)) 497 goto nla_put_failure; 498 break; 499 case NETDEV_QUEUE_TYPE_TX: 500 txq = netdev_get_tx_queue(netdev, q_idx); 501 if (nla_put_napi_id(rsp, txq->napi)) 502 goto nla_put_failure; 503 #ifdef CONFIG_XDP_SOCKETS 504 if (txq->pool) 505 if (nla_put_empty_nest(rsp, NETDEV_A_QUEUE_XSK)) 506 goto nla_put_failure; 507 #endif 508 break; 509 } 510 511 genlmsg_end(rsp, hdr); 512 513 return 0; 514 515 nla_put_failure: 516 genlmsg_cancel(rsp, hdr); 517 return -EMSGSIZE; 518 } 519 520 static int netdev_nl_queue_validate(struct net_device *netdev, u32 q_id, 521 u32 q_type) 522 { 523 switch (q_type) { 524 case NETDEV_QUEUE_TYPE_RX: 525 if (q_id >= netdev->real_num_rx_queues) 526 return -EINVAL; 527 return 0; 528 case NETDEV_QUEUE_TYPE_TX: 529 if (q_id >= netdev->real_num_tx_queues) 530 return -EINVAL; 531 } 532 return 0; 533 } 534 535 static int 536 netdev_nl_queue_fill(struct sk_buff *rsp, struct net_device *netdev, u32 q_idx, 537 u32 q_type, const struct genl_info *info) 538 { 539 int err; 540 541 if (!netdev->up) 542 return -ENOENT; 543 544 err = netdev_nl_queue_validate(netdev, q_idx, q_type); 545 if (err) 546 return err; 547 548 return netdev_nl_queue_fill_one(rsp, netdev, q_idx, q_type, info); 549 } 550 551 int netdev_nl_queue_get_doit(struct sk_buff *skb, struct genl_info *info) 552 { 553 u32 q_id, q_type, ifindex; 554 struct net_device *netdev; 555 struct sk_buff *rsp; 556 int err; 557 558 if (GENL_REQ_ATTR_CHECK(info, NETDEV_A_QUEUE_ID) || 559 GENL_REQ_ATTR_CHECK(info, NETDEV_A_QUEUE_TYPE) || 560 GENL_REQ_ATTR_CHECK(info, NETDEV_A_QUEUE_IFINDEX)) 561 return -EINVAL; 562 563 q_id = nla_get_u32(info->attrs[NETDEV_A_QUEUE_ID]); 564 q_type = nla_get_u32(info->attrs[NETDEV_A_QUEUE_TYPE]); 565 ifindex = nla_get_u32(info->attrs[NETDEV_A_QUEUE_IFINDEX]); 566 567 rsp = genlmsg_new(GENLMSG_DEFAULT_SIZE, GFP_KERNEL); 568 if (!rsp) 569 return -ENOMEM; 570 571 netdev = netdev_get_by_index_lock_ops_compat(genl_info_net(info), 572 ifindex); 573 if (netdev) { 574 err = netdev_nl_queue_fill(rsp, netdev, q_id, q_type, info); 575 netdev_unlock_ops_compat(netdev); 576 } else { 577 err = -ENODEV; 578 } 579 580 if (err) 581 goto err_free_msg; 582 583 return genlmsg_reply(rsp, info); 584 585 err_free_msg: 586 nlmsg_free(rsp); 587 return err; 588 } 589 590 static int 591 netdev_nl_queue_dump_one(struct net_device *netdev, struct sk_buff *rsp, 592 const struct genl_info *info, 593 struct netdev_nl_dump_ctx *ctx) 594 { 595 int err = 0; 596 597 if (!netdev->up) 598 return err; 599 600 for (; ctx->rxq_idx < netdev->real_num_rx_queues; ctx->rxq_idx++) { 601 err = netdev_nl_queue_fill_one(rsp, netdev, ctx->rxq_idx, 602 NETDEV_QUEUE_TYPE_RX, info); 603 if (err) 604 return err; 605 } 606 for (; ctx->txq_idx < netdev->real_num_tx_queues; ctx->txq_idx++) { 607 err = netdev_nl_queue_fill_one(rsp, netdev, ctx->txq_idx, 608 NETDEV_QUEUE_TYPE_TX, info); 609 if (err) 610 return err; 611 } 612 613 return err; 614 } 615 616 int netdev_nl_queue_get_dumpit(struct sk_buff *skb, struct netlink_callback *cb) 617 { 618 struct netdev_nl_dump_ctx *ctx = netdev_dump_ctx(cb); 619 const struct genl_info *info = genl_info_dump(cb); 620 struct net *net = sock_net(skb->sk); 621 struct net_device *netdev; 622 u32 ifindex = 0; 623 int err = 0; 624 625 if (info->attrs[NETDEV_A_QUEUE_IFINDEX]) 626 ifindex = nla_get_u32(info->attrs[NETDEV_A_QUEUE_IFINDEX]); 627 628 if (ifindex) { 629 netdev = netdev_get_by_index_lock_ops_compat(net, ifindex); 630 if (netdev) { 631 err = netdev_nl_queue_dump_one(netdev, skb, info, ctx); 632 netdev_unlock_ops_compat(netdev); 633 } else { 634 err = -ENODEV; 635 } 636 } else { 637 for_each_netdev_lock_ops_compat_scoped(net, netdev, 638 ctx->ifindex) { 639 err = netdev_nl_queue_dump_one(netdev, skb, info, ctx); 640 if (err < 0) 641 break; 642 ctx->rxq_idx = 0; 643 ctx->txq_idx = 0; 644 } 645 } 646 647 return err; 648 } 649 650 #define NETDEV_STAT_NOT_SET (~0ULL) 651 652 static void netdev_nl_stats_add(void *_sum, const void *_add, size_t size) 653 { 654 const u64 *add = _add; 655 u64 *sum = _sum; 656 657 while (size) { 658 if (*add != NETDEV_STAT_NOT_SET && *sum != NETDEV_STAT_NOT_SET) 659 *sum += *add; 660 sum++; 661 add++; 662 size -= 8; 663 } 664 } 665 666 static int netdev_stat_put(struct sk_buff *rsp, unsigned int attr_id, u64 value) 667 { 668 if (value == NETDEV_STAT_NOT_SET) 669 return 0; 670 return nla_put_uint(rsp, attr_id, value); 671 } 672 673 static int 674 netdev_nl_stats_write_rx(struct sk_buff *rsp, struct netdev_queue_stats_rx *rx) 675 { 676 if (netdev_stat_put(rsp, NETDEV_A_QSTATS_RX_PACKETS, rx->packets) || 677 netdev_stat_put(rsp, NETDEV_A_QSTATS_RX_BYTES, rx->bytes) || 678 netdev_stat_put(rsp, NETDEV_A_QSTATS_RX_ALLOC_FAIL, rx->alloc_fail) || 679 netdev_stat_put(rsp, NETDEV_A_QSTATS_RX_HW_DROPS, rx->hw_drops) || 680 netdev_stat_put(rsp, NETDEV_A_QSTATS_RX_HW_DROP_OVERRUNS, rx->hw_drop_overruns) || 681 netdev_stat_put(rsp, NETDEV_A_QSTATS_RX_CSUM_COMPLETE, rx->csum_complete) || 682 netdev_stat_put(rsp, NETDEV_A_QSTATS_RX_CSUM_UNNECESSARY, rx->csum_unnecessary) || 683 netdev_stat_put(rsp, NETDEV_A_QSTATS_RX_CSUM_NONE, rx->csum_none) || 684 netdev_stat_put(rsp, NETDEV_A_QSTATS_RX_CSUM_BAD, rx->csum_bad) || 685 netdev_stat_put(rsp, NETDEV_A_QSTATS_RX_HW_GRO_PACKETS, rx->hw_gro_packets) || 686 netdev_stat_put(rsp, NETDEV_A_QSTATS_RX_HW_GRO_BYTES, rx->hw_gro_bytes) || 687 netdev_stat_put(rsp, NETDEV_A_QSTATS_RX_HW_GRO_WIRE_PACKETS, rx->hw_gro_wire_packets) || 688 netdev_stat_put(rsp, NETDEV_A_QSTATS_RX_HW_GRO_WIRE_BYTES, rx->hw_gro_wire_bytes) || 689 netdev_stat_put(rsp, NETDEV_A_QSTATS_RX_HW_DROP_RATELIMITS, rx->hw_drop_ratelimits)) 690 return -EMSGSIZE; 691 return 0; 692 } 693 694 static int 695 netdev_nl_stats_write_tx(struct sk_buff *rsp, struct netdev_queue_stats_tx *tx) 696 { 697 if (netdev_stat_put(rsp, NETDEV_A_QSTATS_TX_PACKETS, tx->packets) || 698 netdev_stat_put(rsp, NETDEV_A_QSTATS_TX_BYTES, tx->bytes) || 699 netdev_stat_put(rsp, NETDEV_A_QSTATS_TX_HW_DROPS, tx->hw_drops) || 700 netdev_stat_put(rsp, NETDEV_A_QSTATS_TX_HW_DROP_ERRORS, tx->hw_drop_errors) || 701 netdev_stat_put(rsp, NETDEV_A_QSTATS_TX_CSUM_NONE, tx->csum_none) || 702 netdev_stat_put(rsp, NETDEV_A_QSTATS_TX_NEEDS_CSUM, tx->needs_csum) || 703 netdev_stat_put(rsp, NETDEV_A_QSTATS_TX_HW_GSO_PACKETS, tx->hw_gso_packets) || 704 netdev_stat_put(rsp, NETDEV_A_QSTATS_TX_HW_GSO_BYTES, tx->hw_gso_bytes) || 705 netdev_stat_put(rsp, NETDEV_A_QSTATS_TX_HW_GSO_WIRE_PACKETS, tx->hw_gso_wire_packets) || 706 netdev_stat_put(rsp, NETDEV_A_QSTATS_TX_HW_GSO_WIRE_BYTES, tx->hw_gso_wire_bytes) || 707 netdev_stat_put(rsp, NETDEV_A_QSTATS_TX_HW_DROP_RATELIMITS, tx->hw_drop_ratelimits) || 708 netdev_stat_put(rsp, NETDEV_A_QSTATS_TX_STOP, tx->stop) || 709 netdev_stat_put(rsp, NETDEV_A_QSTATS_TX_WAKE, tx->wake)) 710 return -EMSGSIZE; 711 return 0; 712 } 713 714 static int 715 netdev_nl_stats_queue(struct net_device *netdev, struct sk_buff *rsp, 716 u32 q_type, int i, const struct genl_info *info) 717 { 718 const struct netdev_stat_ops *ops = netdev->stat_ops; 719 struct netdev_queue_stats_rx rx; 720 struct netdev_queue_stats_tx tx; 721 void *hdr; 722 723 hdr = genlmsg_iput(rsp, info); 724 if (!hdr) 725 return -EMSGSIZE; 726 if (nla_put_u32(rsp, NETDEV_A_QSTATS_IFINDEX, netdev->ifindex) || 727 nla_put_u32(rsp, NETDEV_A_QSTATS_QUEUE_TYPE, q_type) || 728 nla_put_u32(rsp, NETDEV_A_QSTATS_QUEUE_ID, i)) 729 goto nla_put_failure; 730 731 switch (q_type) { 732 case NETDEV_QUEUE_TYPE_RX: 733 memset(&rx, 0xff, sizeof(rx)); 734 ops->get_queue_stats_rx(netdev, i, &rx); 735 if (!memchr_inv(&rx, 0xff, sizeof(rx))) 736 goto nla_cancel; 737 if (netdev_nl_stats_write_rx(rsp, &rx)) 738 goto nla_put_failure; 739 break; 740 case NETDEV_QUEUE_TYPE_TX: 741 memset(&tx, 0xff, sizeof(tx)); 742 ops->get_queue_stats_tx(netdev, i, &tx); 743 if (!memchr_inv(&tx, 0xff, sizeof(tx))) 744 goto nla_cancel; 745 if (netdev_nl_stats_write_tx(rsp, &tx)) 746 goto nla_put_failure; 747 break; 748 } 749 750 genlmsg_end(rsp, hdr); 751 return 0; 752 753 nla_cancel: 754 genlmsg_cancel(rsp, hdr); 755 return 0; 756 nla_put_failure: 757 genlmsg_cancel(rsp, hdr); 758 return -EMSGSIZE; 759 } 760 761 static int 762 netdev_nl_stats_by_queue(struct net_device *netdev, struct sk_buff *rsp, 763 const struct genl_info *info, 764 struct netdev_nl_dump_ctx *ctx) 765 { 766 const struct netdev_stat_ops *ops = netdev->stat_ops; 767 int i, err; 768 769 if (!(netdev->flags & IFF_UP)) 770 return 0; 771 772 i = ctx->rxq_idx; 773 while (ops->get_queue_stats_rx && i < netdev->real_num_rx_queues) { 774 err = netdev_nl_stats_queue(netdev, rsp, NETDEV_QUEUE_TYPE_RX, 775 i, info); 776 if (err) 777 return err; 778 ctx->rxq_idx = ++i; 779 } 780 i = ctx->txq_idx; 781 while (ops->get_queue_stats_tx && i < netdev->real_num_tx_queues) { 782 err = netdev_nl_stats_queue(netdev, rsp, NETDEV_QUEUE_TYPE_TX, 783 i, info); 784 if (err) 785 return err; 786 ctx->txq_idx = ++i; 787 } 788 789 ctx->rxq_idx = 0; 790 ctx->txq_idx = 0; 791 return 0; 792 } 793 794 /** 795 * netdev_stat_queue_sum() - add up queue stats from range of queues 796 * @netdev: net_device 797 * @rx_start: index of the first Rx queue to query 798 * @rx_end: index after the last Rx queue (first *not* to query) 799 * @rx_sum: output Rx stats, should be already initialized 800 * @tx_start: index of the first Tx queue to query 801 * @tx_end: index after the last Tx queue (first *not* to query) 802 * @tx_sum: output Tx stats, should be already initialized 803 * 804 * Add stats from [start, end) range of queue IDs to *x_sum structs. 805 * The sum structs must be already initialized. Usually this 806 * helper is invoked from the .get_base_stats callbacks of drivers 807 * to account for stats of disabled queues. In that case the ranges 808 * are usually [netdev->real_num_*x_queues, netdev->num_*x_queues). 809 */ 810 void netdev_stat_queue_sum(struct net_device *netdev, 811 int rx_start, int rx_end, 812 struct netdev_queue_stats_rx *rx_sum, 813 int tx_start, int tx_end, 814 struct netdev_queue_stats_tx *tx_sum) 815 { 816 const struct netdev_stat_ops *ops; 817 struct netdev_queue_stats_rx rx; 818 struct netdev_queue_stats_tx tx; 819 int i; 820 821 ops = netdev->stat_ops; 822 823 for (i = rx_start; i < rx_end; i++) { 824 memset(&rx, 0xff, sizeof(rx)); 825 if (ops->get_queue_stats_rx) 826 ops->get_queue_stats_rx(netdev, i, &rx); 827 netdev_nl_stats_add(rx_sum, &rx, sizeof(rx)); 828 } 829 for (i = tx_start; i < tx_end; i++) { 830 memset(&tx, 0xff, sizeof(tx)); 831 if (ops->get_queue_stats_tx) 832 ops->get_queue_stats_tx(netdev, i, &tx); 833 netdev_nl_stats_add(tx_sum, &tx, sizeof(tx)); 834 } 835 } 836 EXPORT_SYMBOL(netdev_stat_queue_sum); 837 838 static int 839 netdev_nl_stats_by_netdev(struct net_device *netdev, struct sk_buff *rsp, 840 const struct genl_info *info) 841 { 842 struct netdev_queue_stats_rx rx_sum; 843 struct netdev_queue_stats_tx tx_sum; 844 void *hdr; 845 846 /* Netdev can't guarantee any complete counters */ 847 if (!netdev->stat_ops->get_base_stats) 848 return 0; 849 850 memset(&rx_sum, 0xff, sizeof(rx_sum)); 851 memset(&tx_sum, 0xff, sizeof(tx_sum)); 852 853 netdev->stat_ops->get_base_stats(netdev, &rx_sum, &tx_sum); 854 855 /* The op was there, but nothing reported, don't bother */ 856 if (!memchr_inv(&rx_sum, 0xff, sizeof(rx_sum)) && 857 !memchr_inv(&tx_sum, 0xff, sizeof(tx_sum))) 858 return 0; 859 860 hdr = genlmsg_iput(rsp, info); 861 if (!hdr) 862 return -EMSGSIZE; 863 if (nla_put_u32(rsp, NETDEV_A_QSTATS_IFINDEX, netdev->ifindex)) 864 goto nla_put_failure; 865 866 netdev_stat_queue_sum(netdev, 0, netdev->real_num_rx_queues, &rx_sum, 867 0, netdev->real_num_tx_queues, &tx_sum); 868 869 if (netdev_nl_stats_write_rx(rsp, &rx_sum) || 870 netdev_nl_stats_write_tx(rsp, &tx_sum)) 871 goto nla_put_failure; 872 873 genlmsg_end(rsp, hdr); 874 return 0; 875 876 nla_put_failure: 877 genlmsg_cancel(rsp, hdr); 878 return -EMSGSIZE; 879 } 880 881 static int 882 netdev_nl_qstats_get_dump_one(struct net_device *netdev, unsigned int scope, 883 struct sk_buff *skb, const struct genl_info *info, 884 struct netdev_nl_dump_ctx *ctx) 885 { 886 if (!netdev->stat_ops) 887 return 0; 888 889 switch (scope) { 890 case 0: 891 return netdev_nl_stats_by_netdev(netdev, skb, info); 892 case NETDEV_QSTATS_SCOPE_QUEUE: 893 return netdev_nl_stats_by_queue(netdev, skb, info, ctx); 894 } 895 896 return -EINVAL; /* Should not happen, per netlink policy */ 897 } 898 899 int netdev_nl_qstats_get_dumpit(struct sk_buff *skb, 900 struct netlink_callback *cb) 901 { 902 struct netdev_nl_dump_ctx *ctx = netdev_dump_ctx(cb); 903 const struct genl_info *info = genl_info_dump(cb); 904 struct net *net = sock_net(skb->sk); 905 struct net_device *netdev; 906 unsigned int ifindex; 907 unsigned int scope; 908 int err = 0; 909 910 scope = 0; 911 if (info->attrs[NETDEV_A_QSTATS_SCOPE]) 912 scope = nla_get_uint(info->attrs[NETDEV_A_QSTATS_SCOPE]); 913 914 ifindex = 0; 915 if (info->attrs[NETDEV_A_QSTATS_IFINDEX]) 916 ifindex = nla_get_u32(info->attrs[NETDEV_A_QSTATS_IFINDEX]); 917 918 if (ifindex) { 919 netdev = netdev_get_by_index_lock_ops_compat(net, ifindex); 920 if (!netdev) { 921 NL_SET_BAD_ATTR(info->extack, 922 info->attrs[NETDEV_A_QSTATS_IFINDEX]); 923 return -ENODEV; 924 } 925 if (netdev->stat_ops) { 926 err = netdev_nl_qstats_get_dump_one(netdev, scope, skb, 927 info, ctx); 928 } else { 929 NL_SET_BAD_ATTR(info->extack, 930 info->attrs[NETDEV_A_QSTATS_IFINDEX]); 931 err = -EOPNOTSUPP; 932 } 933 netdev_unlock_ops_compat(netdev); 934 return err; 935 } 936 937 for_each_netdev_lock_ops_compat_scoped(net, netdev, ctx->ifindex) { 938 err = netdev_nl_qstats_get_dump_one(netdev, scope, skb, 939 info, ctx); 940 if (err < 0) 941 break; 942 } 943 944 return err; 945 } 946 947 static int netdev_nl_read_rxq_bitmap(struct genl_info *info, 948 u32 rxq_bitmap_len, 949 unsigned long *rxq_bitmap) 950 { 951 const int maxtype = ARRAY_SIZE(netdev_queue_id_nl_policy) - 1; 952 struct nlattr *tb[ARRAY_SIZE(netdev_queue_id_nl_policy)]; 953 struct nlattr *attr; 954 int rem, err = 0; 955 u32 rxq_idx; 956 957 nla_for_each_attr_type(attr, NETDEV_A_DMABUF_QUEUES, 958 genlmsg_data(info->genlhdr), 959 genlmsg_len(info->genlhdr), rem) { 960 err = nla_parse_nested(tb, maxtype, attr, 961 netdev_queue_id_nl_policy, info->extack); 962 if (err < 0) 963 return err; 964 965 if (NL_REQ_ATTR_CHECK(info->extack, attr, tb, NETDEV_A_QUEUE_ID) || 966 NL_REQ_ATTR_CHECK(info->extack, attr, tb, NETDEV_A_QUEUE_TYPE)) 967 return -EINVAL; 968 969 if (nla_get_u32(tb[NETDEV_A_QUEUE_TYPE]) != NETDEV_QUEUE_TYPE_RX) { 970 NL_SET_BAD_ATTR(info->extack, tb[NETDEV_A_QUEUE_TYPE]); 971 return -EINVAL; 972 } 973 974 rxq_idx = nla_get_u32(tb[NETDEV_A_QUEUE_ID]); 975 if (rxq_idx >= rxq_bitmap_len) { 976 NL_SET_BAD_ATTR(info->extack, tb[NETDEV_A_QUEUE_ID]); 977 return -EINVAL; 978 } 979 980 bitmap_set(rxq_bitmap, rxq_idx, 1); 981 } 982 983 return 0; 984 } 985 986 static struct device * 987 netdev_nl_get_dma_dev(struct net_device *netdev, unsigned long *rxq_bitmap, 988 struct netlink_ext_ack *extack) 989 { 990 struct device *dma_dev = NULL; 991 u32 rxq_idx, prev_rxq_idx; 992 993 for_each_set_bit(rxq_idx, rxq_bitmap, netdev->real_num_rx_queues) { 994 struct device *rxq_dma_dev; 995 996 rxq_dma_dev = netdev_queue_get_dma_dev(netdev, rxq_idx, 997 NETDEV_QUEUE_TYPE_RX); 998 if (dma_dev && rxq_dma_dev != dma_dev) { 999 NL_SET_ERR_MSG_FMT(extack, "DMA device mismatch between queue %u and %u (multi-PF device?)", 1000 rxq_idx, prev_rxq_idx); 1001 return ERR_PTR(-EOPNOTSUPP); 1002 } 1003 1004 dma_dev = rxq_dma_dev; 1005 prev_rxq_idx = rxq_idx; 1006 } 1007 1008 return dma_dev; 1009 } 1010 1011 int netdev_nl_bind_rx_doit(struct sk_buff *skb, struct genl_info *info) 1012 { 1013 struct net_devmem_dmabuf_binding *binding; 1014 u32 ifindex, dmabuf_fd, rxq_idx; 1015 struct netdev_nl_sock *priv; 1016 struct net_device *netdev; 1017 unsigned long *rxq_bitmap; 1018 struct device *dma_dev; 1019 struct sk_buff *rsp; 1020 int err = 0; 1021 void *hdr; 1022 1023 if (GENL_REQ_ATTR_CHECK(info, NETDEV_A_DEV_IFINDEX) || 1024 GENL_REQ_ATTR_CHECK(info, NETDEV_A_DMABUF_FD) || 1025 GENL_REQ_ATTR_CHECK(info, NETDEV_A_DMABUF_QUEUES)) 1026 return -EINVAL; 1027 1028 ifindex = nla_get_u32(info->attrs[NETDEV_A_DEV_IFINDEX]); 1029 dmabuf_fd = nla_get_u32(info->attrs[NETDEV_A_DMABUF_FD]); 1030 1031 priv = genl_sk_priv_get(&netdev_nl_family, NETLINK_CB(skb).sk); 1032 if (IS_ERR(priv)) 1033 return PTR_ERR(priv); 1034 1035 rsp = genlmsg_new(GENLMSG_DEFAULT_SIZE, GFP_KERNEL); 1036 if (!rsp) 1037 return -ENOMEM; 1038 1039 hdr = genlmsg_iput(rsp, info); 1040 if (!hdr) { 1041 err = -EMSGSIZE; 1042 goto err_genlmsg_free; 1043 } 1044 1045 mutex_lock(&priv->lock); 1046 1047 err = 0; 1048 netdev = netdev_get_by_index_lock(genl_info_net(info), ifindex); 1049 if (!netdev) { 1050 err = -ENODEV; 1051 goto err_unlock_sock; 1052 } 1053 if (!netif_device_present(netdev)) 1054 err = -ENODEV; 1055 else if (!netdev_need_ops_lock(netdev)) 1056 err = -EOPNOTSUPP; 1057 if (err) { 1058 NL_SET_BAD_ATTR(info->extack, 1059 info->attrs[NETDEV_A_DEV_IFINDEX]); 1060 goto err_unlock; 1061 } 1062 1063 rxq_bitmap = bitmap_zalloc(netdev->real_num_rx_queues, GFP_KERNEL); 1064 if (!rxq_bitmap) { 1065 err = -ENOMEM; 1066 goto err_unlock; 1067 } 1068 1069 err = netdev_nl_read_rxq_bitmap(info, netdev->real_num_rx_queues, 1070 rxq_bitmap); 1071 if (err) 1072 goto err_rxq_bitmap; 1073 1074 dma_dev = netdev_nl_get_dma_dev(netdev, rxq_bitmap, info->extack); 1075 if (IS_ERR(dma_dev)) { 1076 err = PTR_ERR(dma_dev); 1077 goto err_rxq_bitmap; 1078 } 1079 1080 binding = net_devmem_bind_dmabuf(netdev, NULL, dma_dev, DMA_FROM_DEVICE, 1081 dmabuf_fd, priv, info->extack); 1082 if (IS_ERR(binding)) { 1083 err = PTR_ERR(binding); 1084 goto err_rxq_bitmap; 1085 } 1086 1087 for_each_set_bit(rxq_idx, rxq_bitmap, netdev->real_num_rx_queues) { 1088 err = net_devmem_bind_dmabuf_to_queue(netdev, rxq_idx, binding, 1089 info->extack); 1090 if (err) 1091 goto err_unbind; 1092 } 1093 1094 nla_put_u32(rsp, NETDEV_A_DMABUF_ID, binding->id); 1095 genlmsg_end(rsp, hdr); 1096 1097 err = genlmsg_reply(rsp, info); 1098 1099 bitmap_free(rxq_bitmap); 1100 1101 netdev_unlock(netdev); 1102 1103 mutex_unlock(&priv->lock); 1104 1105 return err < 0 ? err : 0; 1106 1107 err_unbind: 1108 net_devmem_unbind_dmabuf(binding); 1109 err_rxq_bitmap: 1110 bitmap_free(rxq_bitmap); 1111 err_unlock: 1112 netdev_unlock(netdev); 1113 err_unlock_sock: 1114 mutex_unlock(&priv->lock); 1115 err_genlmsg_free: 1116 nlmsg_free(rsp); 1117 return err; 1118 } 1119 1120 /* Find the DMA-capable device for a netmem TX binding. 1121 * 1122 * For NETMEM_TX_DMA devices, return the device itself. 1123 * For NETMEM_TX_NO_DMA devices, walk leased RX queues to find the underlying 1124 * physical device and return it. 1125 */ 1126 static struct net_device * 1127 netdev_find_netmem_tx_dev(struct net_device *dev) 1128 { 1129 struct netdev_rx_queue *lease_rxq; 1130 struct net_device *phys_dev; 1131 int i; 1132 1133 if (dev->netmem_tx == NETMEM_TX_DMA) 1134 return dev; 1135 1136 if (dev->netmem_tx != NETMEM_TX_NO_DMA) 1137 return NULL; 1138 1139 for (i = 0; i < dev->real_num_rx_queues; i++) { 1140 lease_rxq = READ_ONCE(__netif_get_rx_queue(dev, i)->lease); 1141 if (!lease_rxq) 1142 continue; 1143 1144 phys_dev = lease_rxq->dev; 1145 if (netif_device_present(phys_dev) && 1146 phys_dev->netmem_tx == NETMEM_TX_DMA) 1147 return phys_dev; 1148 } 1149 1150 return NULL; 1151 } 1152 1153 int netdev_nl_bind_tx_doit(struct sk_buff *skb, struct genl_info *info) 1154 { 1155 struct net_devmem_dmabuf_binding *binding; 1156 struct net_device *bind_dev; 1157 struct netdev_nl_sock *priv; 1158 struct net_device *netdev; 1159 struct device *dma_dev; 1160 u32 ifindex, dmabuf_fd; 1161 struct sk_buff *rsp; 1162 int err = 0; 1163 void *hdr; 1164 1165 if (GENL_REQ_ATTR_CHECK(info, NETDEV_A_DEV_IFINDEX) || 1166 GENL_REQ_ATTR_CHECK(info, NETDEV_A_DMABUF_FD)) 1167 return -EINVAL; 1168 1169 ifindex = nla_get_u32(info->attrs[NETDEV_A_DEV_IFINDEX]); 1170 dmabuf_fd = nla_get_u32(info->attrs[NETDEV_A_DMABUF_FD]); 1171 1172 priv = genl_sk_priv_get(&netdev_nl_family, NETLINK_CB(skb).sk); 1173 if (IS_ERR(priv)) 1174 return PTR_ERR(priv); 1175 1176 rsp = genlmsg_new(GENLMSG_DEFAULT_SIZE, GFP_KERNEL); 1177 if (!rsp) 1178 return -ENOMEM; 1179 1180 hdr = genlmsg_iput(rsp, info); 1181 if (!hdr) { 1182 err = -EMSGSIZE; 1183 goto err_genlmsg_free; 1184 } 1185 1186 mutex_lock(&priv->lock); 1187 1188 netdev = netdev_get_by_index_lock(genl_info_net(info), ifindex); 1189 if (!netdev) { 1190 err = -ENODEV; 1191 goto err_unlock_sock; 1192 } 1193 1194 if (!netif_device_present(netdev)) { 1195 err = -ENODEV; 1196 goto err_unlock_netdev; 1197 } 1198 1199 if (netdev->netmem_tx == NETMEM_TX_NONE) { 1200 err = -EOPNOTSUPP; 1201 NL_SET_ERR_MSG(info->extack, 1202 "Driver does not support netmem TX"); 1203 goto err_unlock_netdev; 1204 } 1205 1206 bind_dev = netdev_find_netmem_tx_dev(netdev); 1207 if (!bind_dev) { 1208 err = -EOPNOTSUPP; 1209 NL_SET_ERR_MSG(info->extack, 1210 "No DMA-capable device found for netmem TX"); 1211 goto err_unlock_netdev; 1212 } 1213 1214 if (bind_dev != netdev) 1215 netdev_lock(bind_dev); 1216 1217 dma_dev = netdev_queue_get_dma_dev(bind_dev, 0, NETDEV_QUEUE_TYPE_TX); 1218 1219 binding = net_devmem_bind_dmabuf(bind_dev, 1220 bind_dev != netdev ? netdev : NULL, 1221 dma_dev, DMA_TO_DEVICE, dmabuf_fd, 1222 priv, info->extack); 1223 if (IS_ERR(binding)) { 1224 err = PTR_ERR(binding); 1225 goto err_unlock_bind_dev; 1226 } 1227 1228 nla_put_u32(rsp, NETDEV_A_DMABUF_ID, binding->id); 1229 genlmsg_end(rsp, hdr); 1230 1231 if (bind_dev != netdev) 1232 netdev_unlock(bind_dev); 1233 netdev_unlock(netdev); 1234 mutex_unlock(&priv->lock); 1235 1236 return genlmsg_reply(rsp, info); 1237 1238 err_unlock_bind_dev: 1239 if (bind_dev != netdev) 1240 netdev_unlock(bind_dev); 1241 err_unlock_netdev: 1242 netdev_unlock(netdev); 1243 err_unlock_sock: 1244 mutex_unlock(&priv->lock); 1245 err_genlmsg_free: 1246 nlmsg_free(rsp); 1247 return err; 1248 } 1249 1250 int netdev_nl_queue_create_doit(struct sk_buff *skb, struct genl_info *info) 1251 { 1252 const int qmaxtype = ARRAY_SIZE(netdev_queue_id_nl_policy) - 1; 1253 const int lmaxtype = ARRAY_SIZE(netdev_lease_nl_policy) - 1; 1254 int err, ifindex, ifindex_lease, queue_id, queue_id_lease; 1255 struct nlattr *qtb[ARRAY_SIZE(netdev_queue_id_nl_policy)]; 1256 struct nlattr *ltb[ARRAY_SIZE(netdev_lease_nl_policy)]; 1257 struct netdev_rx_queue *rxq, *rxq_lease; 1258 struct net_device *dev, *dev_lease; 1259 netdevice_tracker dev_tracker; 1260 s32 netns_lease = -1; 1261 struct nlattr *nest; 1262 struct sk_buff *rsp; 1263 struct net *net; 1264 void *hdr; 1265 1266 if (GENL_REQ_ATTR_CHECK(info, NETDEV_A_QUEUE_IFINDEX) || 1267 GENL_REQ_ATTR_CHECK(info, NETDEV_A_QUEUE_TYPE) || 1268 GENL_REQ_ATTR_CHECK(info, NETDEV_A_QUEUE_LEASE)) 1269 return -EINVAL; 1270 if (nla_get_u32(info->attrs[NETDEV_A_QUEUE_TYPE]) != 1271 NETDEV_QUEUE_TYPE_RX) { 1272 NL_SET_BAD_ATTR(info->extack, info->attrs[NETDEV_A_QUEUE_TYPE]); 1273 return -EINVAL; 1274 } 1275 1276 ifindex = nla_get_u32(info->attrs[NETDEV_A_QUEUE_IFINDEX]); 1277 1278 nest = info->attrs[NETDEV_A_QUEUE_LEASE]; 1279 err = nla_parse_nested(ltb, lmaxtype, nest, 1280 netdev_lease_nl_policy, info->extack); 1281 if (err < 0) 1282 return err; 1283 if (NL_REQ_ATTR_CHECK(info->extack, nest, ltb, NETDEV_A_LEASE_IFINDEX) || 1284 NL_REQ_ATTR_CHECK(info->extack, nest, ltb, NETDEV_A_LEASE_QUEUE)) 1285 return -EINVAL; 1286 if (ltb[NETDEV_A_LEASE_NETNS_ID]) { 1287 if (!capable(CAP_NET_ADMIN)) 1288 return -EPERM; 1289 netns_lease = nla_get_s32(ltb[NETDEV_A_LEASE_NETNS_ID]); 1290 } 1291 1292 ifindex_lease = nla_get_u32(ltb[NETDEV_A_LEASE_IFINDEX]); 1293 1294 nest = ltb[NETDEV_A_LEASE_QUEUE]; 1295 err = nla_parse_nested(qtb, qmaxtype, nest, 1296 netdev_queue_id_nl_policy, info->extack); 1297 if (err < 0) 1298 return err; 1299 if (NL_REQ_ATTR_CHECK(info->extack, nest, qtb, NETDEV_A_QUEUE_ID) || 1300 NL_REQ_ATTR_CHECK(info->extack, nest, qtb, NETDEV_A_QUEUE_TYPE)) 1301 return -EINVAL; 1302 if (nla_get_u32(qtb[NETDEV_A_QUEUE_TYPE]) != NETDEV_QUEUE_TYPE_RX) { 1303 NL_SET_BAD_ATTR(info->extack, qtb[NETDEV_A_QUEUE_TYPE]); 1304 return -EINVAL; 1305 } 1306 1307 queue_id_lease = nla_get_u32(qtb[NETDEV_A_QUEUE_ID]); 1308 1309 rsp = genlmsg_new(GENLMSG_DEFAULT_SIZE, GFP_KERNEL); 1310 if (!rsp) 1311 return -ENOMEM; 1312 1313 hdr = genlmsg_iput(rsp, info); 1314 if (!hdr) { 1315 err = -EMSGSIZE; 1316 goto err_genlmsg_free; 1317 } 1318 1319 /* Locking order is always from the virtual to the physical device 1320 * since this is also the same order when applications open the 1321 * memory provider later on. 1322 */ 1323 dev = netdev_get_by_index_lock(genl_info_net(info), ifindex); 1324 if (!dev) { 1325 err = -ENODEV; 1326 goto err_genlmsg_free; 1327 } 1328 if (!netdev_can_create_queue(dev, info->extack)) { 1329 err = -EINVAL; 1330 goto err_unlock_dev; 1331 } 1332 1333 net = genl_info_net(info); 1334 if (netns_lease >= 0) { 1335 net = get_net_ns_by_id(net, netns_lease); 1336 if (!net) { 1337 err = -ENONET; 1338 goto err_unlock_dev; 1339 } 1340 } 1341 1342 dev_lease = netdev_get_by_index(net, ifindex_lease, &dev_tracker, 1343 GFP_KERNEL); 1344 if (!dev_lease) { 1345 err = -ENODEV; 1346 goto err_put_netns; 1347 } 1348 if (!netdev_can_lease_queue(dev_lease, info->extack)) { 1349 netdev_put(dev_lease, &dev_tracker); 1350 err = -EINVAL; 1351 goto err_put_netns; 1352 } 1353 1354 dev_lease = netdev_put_lock(dev_lease, net, &dev_tracker); 1355 if (!dev_lease) { 1356 err = -ENODEV; 1357 goto err_put_netns; 1358 } 1359 if (queue_id_lease >= dev_lease->real_num_rx_queues) { 1360 err = -ERANGE; 1361 NL_SET_BAD_ATTR(info->extack, qtb[NETDEV_A_QUEUE_ID]); 1362 goto err_unlock_dev_lease; 1363 } 1364 if (netdev_queue_busy(dev_lease, queue_id_lease, NETDEV_QUEUE_TYPE_RX, 1365 info->extack)) { 1366 err = -EBUSY; 1367 goto err_unlock_dev_lease; 1368 } 1369 1370 rxq_lease = __netif_get_rx_queue(dev_lease, queue_id_lease); 1371 rxq = __netif_get_rx_queue(dev, dev->real_num_rx_queues - 1); 1372 1373 /* Leasing queues from different physical devices is currently 1374 * not supported. Capabilities such as XDP features and DMA 1375 * device may differ between physical devices, and computing 1376 * a correct intersection for the virtual device is not yet 1377 * implemented. 1378 */ 1379 if (rxq->lease && rxq->lease->dev != dev_lease) { 1380 err = -EOPNOTSUPP; 1381 NL_SET_ERR_MSG(info->extack, 1382 "Leasing queues from different devices not supported"); 1383 goto err_unlock_dev_lease; 1384 } 1385 1386 queue_id = dev->queue_mgmt_ops->ndo_queue_create(dev, info->extack); 1387 if (queue_id < 0) { 1388 err = queue_id; 1389 goto err_unlock_dev_lease; 1390 } 1391 rxq = __netif_get_rx_queue(dev, queue_id); 1392 1393 netdev_rx_queue_lease(rxq, rxq_lease); 1394 1395 nla_put_u32(rsp, NETDEV_A_QUEUE_ID, queue_id); 1396 genlmsg_end(rsp, hdr); 1397 1398 netdev_unlock(dev_lease); 1399 netdev_unlock(dev); 1400 if (netns_lease >= 0) 1401 put_net(net); 1402 1403 return genlmsg_reply(rsp, info); 1404 1405 err_unlock_dev_lease: 1406 netdev_unlock(dev_lease); 1407 err_put_netns: 1408 if (netns_lease >= 0) 1409 put_net(net); 1410 err_unlock_dev: 1411 netdev_unlock(dev); 1412 err_genlmsg_free: 1413 nlmsg_free(rsp); 1414 return err; 1415 } 1416 1417 void netdev_nl_sock_priv_init(struct netdev_nl_sock *priv) 1418 { 1419 INIT_LIST_HEAD(&priv->bindings); 1420 mutex_init(&priv->lock); 1421 } 1422 1423 void netdev_nl_sock_priv_destroy(struct netdev_nl_sock *priv) 1424 { 1425 struct net_devmem_dmabuf_binding *binding; 1426 struct net_devmem_dmabuf_binding *temp; 1427 netdevice_tracker dev_tracker; 1428 struct net_device *dev; 1429 1430 mutex_lock(&priv->lock); 1431 list_for_each_entry_safe(binding, temp, &priv->bindings, list) { 1432 mutex_lock(&binding->lock); 1433 dev = binding->dev; 1434 if (!dev) { 1435 mutex_unlock(&binding->lock); 1436 net_devmem_unbind_dmabuf(binding); 1437 continue; 1438 } 1439 netdev_hold(dev, &dev_tracker, GFP_KERNEL); 1440 mutex_unlock(&binding->lock); 1441 1442 netdev_lock(dev); 1443 net_devmem_unbind_dmabuf(binding); 1444 netdev_unlock(dev); 1445 netdev_put(dev, &dev_tracker); 1446 } 1447 mutex_unlock(&priv->lock); 1448 } 1449 1450 static int netdev_genl_netdevice_event(struct notifier_block *nb, 1451 unsigned long event, void *ptr) 1452 { 1453 struct net_device *netdev = netdev_notifier_info_to_dev(ptr); 1454 1455 switch (event) { 1456 case NETDEV_REGISTER: 1457 netdev_lock_ops_to_full(netdev); 1458 netdev_genl_dev_notify(netdev, NETDEV_CMD_DEV_ADD_NTF); 1459 netdev_unlock_full_to_ops(netdev); 1460 break; 1461 case NETDEV_UNREGISTER: 1462 netdev_lock(netdev); 1463 netdev_genl_dev_notify(netdev, NETDEV_CMD_DEV_DEL_NTF); 1464 netdev_unlock(netdev); 1465 break; 1466 case NETDEV_XDP_FEAT_CHANGE: 1467 netdev_genl_dev_notify(netdev, NETDEV_CMD_DEV_CHANGE_NTF); 1468 break; 1469 } 1470 1471 return NOTIFY_OK; 1472 } 1473 1474 static struct notifier_block netdev_genl_nb = { 1475 .notifier_call = netdev_genl_netdevice_event, 1476 }; 1477 1478 static int __init netdev_genl_init(void) 1479 { 1480 int err; 1481 1482 err = register_netdevice_notifier(&netdev_genl_nb); 1483 if (err) 1484 return err; 1485 1486 err = genl_register_family(&netdev_nl_family); 1487 if (err) 1488 goto err_unreg_ntf; 1489 1490 return 0; 1491 1492 err_unreg_ntf: 1493 unregister_netdevice_notifier(&netdev_genl_nb); 1494 return err; 1495 } 1496 1497 subsys_initcall(netdev_genl_init); 1498