1 // SPDX-License-Identifier: GPL-2.0-only 2 3 #include <linux/netdevice.h> 4 #include <linux/notifier.h> 5 #include <linux/pid_namespace.h> 6 #include <linux/rtnetlink.h> 7 #include <net/busy_poll.h> 8 #include <net/net_namespace.h> 9 #include <net/netdev_queues.h> 10 #include <net/netdev_rx_queue.h> 11 #include <net/sock.h> 12 #include <net/xdp.h> 13 #include <net/xdp_sock.h> 14 #include <net/page_pool/memory_provider.h> 15 16 #include "dev.h" 17 #include "devmem.h" 18 #include "netdev-genl-gen.h" 19 20 struct netdev_nl_dump_ctx { 21 unsigned long ifindex; 22 unsigned int rxq_idx; 23 unsigned int txq_idx; 24 unsigned int napi_id; 25 }; 26 27 static struct netdev_nl_dump_ctx *netdev_dump_ctx(struct netlink_callback *cb) 28 { 29 NL_ASSERT_CTX_FITS(struct netdev_nl_dump_ctx); 30 31 return (struct netdev_nl_dump_ctx *)cb->ctx; 32 } 33 34 static int 35 netdev_nl_dev_fill(struct net_device *netdev, struct sk_buff *rsp, 36 const struct genl_info *info) 37 { 38 u64 xsk_features = 0; 39 u64 xdp_rx_meta = 0; 40 void *hdr; 41 42 netdev_assert_locked(netdev); /* note: rtnl_lock may not be held! */ 43 44 hdr = genlmsg_iput(rsp, info); 45 if (!hdr) 46 return -EMSGSIZE; 47 48 #define XDP_METADATA_KFUNC(_, flag, __, xmo) \ 49 if (netdev->xdp_metadata_ops && netdev->xdp_metadata_ops->xmo) \ 50 xdp_rx_meta |= flag; 51 XDP_METADATA_KFUNC_xxx 52 #undef XDP_METADATA_KFUNC 53 54 if (netdev->xsk_tx_metadata_ops) { 55 if (netdev->xsk_tx_metadata_ops->tmo_fill_timestamp) 56 xsk_features |= NETDEV_XSK_FLAGS_TX_TIMESTAMP; 57 if (netdev->xsk_tx_metadata_ops->tmo_request_checksum) 58 xsk_features |= NETDEV_XSK_FLAGS_TX_CHECKSUM; 59 if (netdev->xsk_tx_metadata_ops->tmo_request_launch_time) 60 xsk_features |= NETDEV_XSK_FLAGS_TX_LAUNCH_TIME_FIFO; 61 } 62 63 if (nla_put_u32(rsp, NETDEV_A_DEV_IFINDEX, netdev->ifindex) || 64 nla_put_u64_64bit(rsp, NETDEV_A_DEV_XDP_FEATURES, 65 netdev->xdp_features, NETDEV_A_DEV_PAD) || 66 nla_put_u64_64bit(rsp, NETDEV_A_DEV_XDP_RX_METADATA_FEATURES, 67 xdp_rx_meta, NETDEV_A_DEV_PAD) || 68 nla_put_u64_64bit(rsp, NETDEV_A_DEV_XSK_FEATURES, 69 xsk_features, NETDEV_A_DEV_PAD)) 70 goto err_cancel_msg; 71 72 if (netdev->xdp_features & NETDEV_XDP_ACT_XSK_ZEROCOPY) { 73 if (nla_put_u32(rsp, NETDEV_A_DEV_XDP_ZC_MAX_SEGS, 74 netdev->xdp_zc_max_segs)) 75 goto err_cancel_msg; 76 } 77 78 genlmsg_end(rsp, hdr); 79 80 return 0; 81 82 err_cancel_msg: 83 genlmsg_cancel(rsp, hdr); 84 return -EMSGSIZE; 85 } 86 87 static void 88 netdev_genl_dev_notify(struct net_device *netdev, int cmd) 89 { 90 struct genl_info info; 91 struct sk_buff *ntf; 92 93 if (!genl_has_listeners(&netdev_nl_family, dev_net(netdev), 94 NETDEV_NLGRP_MGMT)) 95 return; 96 97 genl_info_init_ntf(&info, &netdev_nl_family, cmd); 98 99 ntf = genlmsg_new(GENLMSG_DEFAULT_SIZE, GFP_KERNEL); 100 if (!ntf) 101 return; 102 103 if (netdev_nl_dev_fill(netdev, ntf, &info)) { 104 nlmsg_free(ntf); 105 return; 106 } 107 108 genlmsg_multicast_netns(&netdev_nl_family, dev_net(netdev), ntf, 109 0, NETDEV_NLGRP_MGMT, GFP_KERNEL); 110 } 111 112 int netdev_nl_dev_get_doit(struct sk_buff *skb, struct genl_info *info) 113 { 114 struct net_device *netdev; 115 struct sk_buff *rsp; 116 u32 ifindex; 117 int err; 118 119 if (GENL_REQ_ATTR_CHECK(info, NETDEV_A_DEV_IFINDEX)) 120 return -EINVAL; 121 122 ifindex = nla_get_u32(info->attrs[NETDEV_A_DEV_IFINDEX]); 123 124 rsp = genlmsg_new(GENLMSG_DEFAULT_SIZE, GFP_KERNEL); 125 if (!rsp) 126 return -ENOMEM; 127 128 netdev = netdev_get_by_index_lock(genl_info_net(info), ifindex); 129 if (!netdev) { 130 err = -ENODEV; 131 goto err_free_msg; 132 } 133 134 err = netdev_nl_dev_fill(netdev, rsp, info); 135 netdev_unlock(netdev); 136 137 if (err) 138 goto err_free_msg; 139 140 return genlmsg_reply(rsp, info); 141 142 err_free_msg: 143 nlmsg_free(rsp); 144 return err; 145 } 146 147 int netdev_nl_dev_get_dumpit(struct sk_buff *skb, struct netlink_callback *cb) 148 { 149 struct netdev_nl_dump_ctx *ctx = netdev_dump_ctx(cb); 150 struct net *net = sock_net(skb->sk); 151 int err; 152 153 for_each_netdev_lock_scoped(net, netdev, ctx->ifindex) { 154 err = netdev_nl_dev_fill(netdev, skb, genl_info_dump(cb)); 155 if (err < 0) 156 return err; 157 } 158 159 return 0; 160 } 161 162 static int 163 netdev_nl_napi_fill_one(struct sk_buff *rsp, struct napi_struct *napi, 164 const struct genl_info *info) 165 { 166 unsigned long irq_suspend_timeout; 167 unsigned long gro_flush_timeout; 168 u32 napi_defer_hard_irqs; 169 void *hdr; 170 pid_t pid; 171 172 if (!napi->dev->up) 173 return 0; 174 175 hdr = genlmsg_iput(rsp, info); 176 if (!hdr) 177 return -EMSGSIZE; 178 179 if (nla_put_u32(rsp, NETDEV_A_NAPI_ID, napi->napi_id)) 180 goto nla_put_failure; 181 182 if (nla_put_u32(rsp, NETDEV_A_NAPI_IFINDEX, napi->dev->ifindex)) 183 goto nla_put_failure; 184 185 if (napi->irq >= 0 && nla_put_u32(rsp, NETDEV_A_NAPI_IRQ, napi->irq)) 186 goto nla_put_failure; 187 188 if (nla_put_uint(rsp, NETDEV_A_NAPI_THREADED, 189 napi_get_threaded(napi))) 190 goto nla_put_failure; 191 192 if (napi->thread) { 193 pid = task_pid_nr_ns(napi->thread, 194 task_active_pid_ns(current)); 195 if (nla_put_u32(rsp, NETDEV_A_NAPI_PID, pid)) 196 goto nla_put_failure; 197 } 198 199 napi_defer_hard_irqs = napi_get_defer_hard_irqs(napi); 200 if (nla_put_s32(rsp, NETDEV_A_NAPI_DEFER_HARD_IRQS, 201 napi_defer_hard_irqs)) 202 goto nla_put_failure; 203 204 irq_suspend_timeout = napi_get_irq_suspend_timeout(napi); 205 if (nla_put_uint(rsp, NETDEV_A_NAPI_IRQ_SUSPEND_TIMEOUT, 206 irq_suspend_timeout)) 207 goto nla_put_failure; 208 209 gro_flush_timeout = napi_get_gro_flush_timeout(napi); 210 if (nla_put_uint(rsp, NETDEV_A_NAPI_GRO_FLUSH_TIMEOUT, 211 gro_flush_timeout)) 212 goto nla_put_failure; 213 214 genlmsg_end(rsp, hdr); 215 216 return 0; 217 218 nla_put_failure: 219 genlmsg_cancel(rsp, hdr); 220 return -EMSGSIZE; 221 } 222 223 int netdev_nl_napi_get_doit(struct sk_buff *skb, struct genl_info *info) 224 { 225 struct napi_struct *napi; 226 struct sk_buff *rsp; 227 u32 napi_id; 228 int err; 229 230 if (GENL_REQ_ATTR_CHECK(info, NETDEV_A_NAPI_ID)) 231 return -EINVAL; 232 233 napi_id = nla_get_u32(info->attrs[NETDEV_A_NAPI_ID]); 234 235 rsp = genlmsg_new(GENLMSG_DEFAULT_SIZE, GFP_KERNEL); 236 if (!rsp) 237 return -ENOMEM; 238 239 napi = netdev_napi_by_id_lock(genl_info_net(info), napi_id); 240 if (napi) { 241 err = netdev_nl_napi_fill_one(rsp, napi, info); 242 netdev_unlock(napi->dev); 243 } else { 244 NL_SET_BAD_ATTR(info->extack, info->attrs[NETDEV_A_NAPI_ID]); 245 err = -ENOENT; 246 } 247 248 if (err) { 249 goto err_free_msg; 250 } else if (!rsp->len) { 251 err = -ENOENT; 252 goto err_free_msg; 253 } 254 255 return genlmsg_reply(rsp, info); 256 257 err_free_msg: 258 nlmsg_free(rsp); 259 return err; 260 } 261 262 static int 263 netdev_nl_napi_dump_one(struct net_device *netdev, struct sk_buff *rsp, 264 const struct genl_info *info, 265 struct netdev_nl_dump_ctx *ctx) 266 { 267 struct napi_struct *napi; 268 unsigned int prev_id; 269 int err = 0; 270 271 if (!netdev->up) 272 return err; 273 274 prev_id = UINT_MAX; 275 list_for_each_entry(napi, &netdev->napi_list, dev_list) { 276 if (!napi_id_valid(napi->napi_id)) 277 continue; 278 279 /* Dump continuation below depends on the list being sorted */ 280 WARN_ON_ONCE(napi->napi_id >= prev_id); 281 prev_id = napi->napi_id; 282 283 if (ctx->napi_id && napi->napi_id >= ctx->napi_id) 284 continue; 285 286 err = netdev_nl_napi_fill_one(rsp, napi, info); 287 if (err) 288 return err; 289 ctx->napi_id = napi->napi_id; 290 } 291 return err; 292 } 293 294 int netdev_nl_napi_get_dumpit(struct sk_buff *skb, struct netlink_callback *cb) 295 { 296 struct netdev_nl_dump_ctx *ctx = netdev_dump_ctx(cb); 297 const struct genl_info *info = genl_info_dump(cb); 298 struct net *net = sock_net(skb->sk); 299 struct net_device *netdev; 300 u32 ifindex = 0; 301 int err = 0; 302 303 if (info->attrs[NETDEV_A_NAPI_IFINDEX]) 304 ifindex = nla_get_u32(info->attrs[NETDEV_A_NAPI_IFINDEX]); 305 306 if (ifindex) { 307 netdev = netdev_get_by_index_lock(net, ifindex); 308 if (netdev) { 309 err = netdev_nl_napi_dump_one(netdev, skb, info, ctx); 310 netdev_unlock(netdev); 311 } else { 312 err = -ENODEV; 313 } 314 } else { 315 for_each_netdev_lock_scoped(net, netdev, ctx->ifindex) { 316 err = netdev_nl_napi_dump_one(netdev, skb, info, ctx); 317 if (err < 0) 318 break; 319 ctx->napi_id = 0; 320 } 321 } 322 323 return err; 324 } 325 326 static int 327 netdev_nl_napi_set_config(struct napi_struct *napi, struct genl_info *info) 328 { 329 u64 irq_suspend_timeout = 0; 330 u64 gro_flush_timeout = 0; 331 u8 threaded = 0; 332 u32 defer = 0; 333 334 if (info->attrs[NETDEV_A_NAPI_THREADED]) { 335 int ret; 336 337 threaded = nla_get_uint(info->attrs[NETDEV_A_NAPI_THREADED]); 338 ret = napi_set_threaded(napi, threaded); 339 if (ret) 340 return ret; 341 } 342 343 if (info->attrs[NETDEV_A_NAPI_DEFER_HARD_IRQS]) { 344 defer = nla_get_u32(info->attrs[NETDEV_A_NAPI_DEFER_HARD_IRQS]); 345 napi_set_defer_hard_irqs(napi, defer); 346 } 347 348 if (info->attrs[NETDEV_A_NAPI_IRQ_SUSPEND_TIMEOUT]) { 349 irq_suspend_timeout = nla_get_uint(info->attrs[NETDEV_A_NAPI_IRQ_SUSPEND_TIMEOUT]); 350 napi_set_irq_suspend_timeout(napi, irq_suspend_timeout); 351 } 352 353 if (info->attrs[NETDEV_A_NAPI_GRO_FLUSH_TIMEOUT]) { 354 gro_flush_timeout = nla_get_uint(info->attrs[NETDEV_A_NAPI_GRO_FLUSH_TIMEOUT]); 355 napi_set_gro_flush_timeout(napi, gro_flush_timeout); 356 } 357 358 return 0; 359 } 360 361 int netdev_nl_napi_set_doit(struct sk_buff *skb, struct genl_info *info) 362 { 363 struct napi_struct *napi; 364 unsigned int napi_id; 365 int err; 366 367 if (GENL_REQ_ATTR_CHECK(info, NETDEV_A_NAPI_ID)) 368 return -EINVAL; 369 370 napi_id = nla_get_u32(info->attrs[NETDEV_A_NAPI_ID]); 371 372 napi = netdev_napi_by_id_lock(genl_info_net(info), napi_id); 373 if (napi) { 374 err = netdev_nl_napi_set_config(napi, info); 375 netdev_unlock(napi->dev); 376 } else { 377 NL_SET_BAD_ATTR(info->extack, info->attrs[NETDEV_A_NAPI_ID]); 378 err = -ENOENT; 379 } 380 381 return err; 382 } 383 384 static int nla_put_napi_id(struct sk_buff *skb, const struct napi_struct *napi) 385 { 386 if (napi && napi_id_valid(napi->napi_id)) 387 return nla_put_u32(skb, NETDEV_A_QUEUE_NAPI_ID, napi->napi_id); 388 return 0; 389 } 390 391 static int 392 netdev_nl_queue_fill_lease(struct sk_buff *rsp, struct net_device *netdev, 393 u32 q_idx, u32 q_type) 394 { 395 struct net_device *orig_netdev = netdev; 396 struct nlattr *nest_lease, *nest_queue; 397 struct netdev_rx_queue *rxq; 398 struct net *net, *peer_net; 399 400 rxq = __netif_get_rx_queue_lease(&netdev, &q_idx, NETIF_PHYS_TO_VIRT); 401 if (!rxq || orig_netdev == netdev) 402 return 0; 403 404 nest_lease = nla_nest_start(rsp, NETDEV_A_QUEUE_LEASE); 405 if (!nest_lease) 406 goto nla_put_failure; 407 408 nest_queue = nla_nest_start(rsp, NETDEV_A_LEASE_QUEUE); 409 if (!nest_queue) 410 goto nla_put_failure; 411 if (nla_put_u32(rsp, NETDEV_A_QUEUE_ID, q_idx)) 412 goto nla_put_failure; 413 if (nla_put_u32(rsp, NETDEV_A_QUEUE_TYPE, q_type)) 414 goto nla_put_failure; 415 nla_nest_end(rsp, nest_queue); 416 417 if (nla_put_u32(rsp, NETDEV_A_LEASE_IFINDEX, 418 READ_ONCE(netdev->ifindex))) 419 goto nla_put_failure; 420 421 rcu_read_lock(); 422 peer_net = dev_net_rcu(netdev); 423 net = dev_net_rcu(orig_netdev); 424 if (!net_eq(net, peer_net)) { 425 s32 id = peernet2id_alloc(net, peer_net, GFP_ATOMIC); 426 427 if (nla_put_s32(rsp, NETDEV_A_LEASE_NETNS_ID, id)) 428 goto nla_put_failure_unlock; 429 } 430 rcu_read_unlock(); 431 nla_nest_end(rsp, nest_lease); 432 return 0; 433 434 nla_put_failure_unlock: 435 rcu_read_unlock(); 436 nla_put_failure: 437 return -ENOMEM; 438 } 439 440 static int 441 __netdev_nl_queue_fill_mp(struct sk_buff *rsp, struct netdev_rx_queue *rxq) 442 { 443 struct pp_memory_provider_params *params = &rxq->mp_params; 444 445 if (params->mp_ops && 446 params->mp_ops->nl_fill(params->mp_priv, rsp, rxq)) 447 return -EMSGSIZE; 448 449 #ifdef CONFIG_XDP_SOCKETS 450 if (rxq->pool) 451 if (nla_put_empty_nest(rsp, NETDEV_A_QUEUE_XSK)) 452 return -EMSGSIZE; 453 #endif 454 return 0; 455 } 456 457 static int 458 netdev_nl_queue_fill_mp(struct sk_buff *rsp, struct net_device *netdev, 459 struct netdev_rx_queue *rxq) 460 { 461 struct netdev_rx_queue *hw_rxq; 462 int ret; 463 464 hw_rxq = rxq->lease; 465 if (!hw_rxq || !netif_is_queue_leasee(netdev)) 466 return __netdev_nl_queue_fill_mp(rsp, rxq); 467 468 netdev_lock(hw_rxq->dev); 469 ret = __netdev_nl_queue_fill_mp(rsp, hw_rxq); 470 netdev_unlock(hw_rxq->dev); 471 return ret; 472 } 473 474 static int 475 netdev_nl_queue_fill_one(struct sk_buff *rsp, struct net_device *netdev, 476 u32 q_idx, u32 q_type, const struct genl_info *info) 477 { 478 struct netdev_rx_queue *rxq; 479 struct netdev_queue *txq; 480 void *hdr; 481 482 hdr = genlmsg_iput(rsp, info); 483 if (!hdr) 484 return -EMSGSIZE; 485 486 if (nla_put_u32(rsp, NETDEV_A_QUEUE_ID, q_idx) || 487 nla_put_u32(rsp, NETDEV_A_QUEUE_TYPE, q_type) || 488 nla_put_u32(rsp, NETDEV_A_QUEUE_IFINDEX, netdev->ifindex)) 489 goto nla_put_failure; 490 491 switch (q_type) { 492 case NETDEV_QUEUE_TYPE_RX: 493 rxq = __netif_get_rx_queue(netdev, q_idx); 494 if (nla_put_napi_id(rsp, rxq->napi)) 495 goto nla_put_failure; 496 if (netdev_nl_queue_fill_lease(rsp, netdev, q_idx, q_type)) 497 goto nla_put_failure; 498 if (netdev_nl_queue_fill_mp(rsp, netdev, rxq)) 499 goto nla_put_failure; 500 break; 501 case NETDEV_QUEUE_TYPE_TX: 502 txq = netdev_get_tx_queue(netdev, q_idx); 503 if (nla_put_napi_id(rsp, txq->napi)) 504 goto nla_put_failure; 505 #ifdef CONFIG_XDP_SOCKETS 506 if (txq->pool) 507 if (nla_put_empty_nest(rsp, NETDEV_A_QUEUE_XSK)) 508 goto nla_put_failure; 509 #endif 510 break; 511 } 512 513 genlmsg_end(rsp, hdr); 514 515 return 0; 516 517 nla_put_failure: 518 genlmsg_cancel(rsp, hdr); 519 return -EMSGSIZE; 520 } 521 522 static int netdev_nl_queue_validate(struct net_device *netdev, u32 q_id, 523 u32 q_type) 524 { 525 switch (q_type) { 526 case NETDEV_QUEUE_TYPE_RX: 527 if (q_id >= netdev->real_num_rx_queues) 528 return -EINVAL; 529 return 0; 530 case NETDEV_QUEUE_TYPE_TX: 531 if (q_id >= netdev->real_num_tx_queues) 532 return -EINVAL; 533 } 534 return 0; 535 } 536 537 static int 538 netdev_nl_queue_fill(struct sk_buff *rsp, struct net_device *netdev, u32 q_idx, 539 u32 q_type, const struct genl_info *info) 540 { 541 int err; 542 543 if (!netdev->up) 544 return -ENOENT; 545 546 err = netdev_nl_queue_validate(netdev, q_idx, q_type); 547 if (err) 548 return err; 549 550 return netdev_nl_queue_fill_one(rsp, netdev, q_idx, q_type, info); 551 } 552 553 int netdev_nl_queue_get_doit(struct sk_buff *skb, struct genl_info *info) 554 { 555 u32 q_id, q_type, ifindex; 556 struct net_device *netdev; 557 struct sk_buff *rsp; 558 int err; 559 560 if (GENL_REQ_ATTR_CHECK(info, NETDEV_A_QUEUE_ID) || 561 GENL_REQ_ATTR_CHECK(info, NETDEV_A_QUEUE_TYPE) || 562 GENL_REQ_ATTR_CHECK(info, NETDEV_A_QUEUE_IFINDEX)) 563 return -EINVAL; 564 565 q_id = nla_get_u32(info->attrs[NETDEV_A_QUEUE_ID]); 566 q_type = nla_get_u32(info->attrs[NETDEV_A_QUEUE_TYPE]); 567 ifindex = nla_get_u32(info->attrs[NETDEV_A_QUEUE_IFINDEX]); 568 569 rsp = genlmsg_new(GENLMSG_DEFAULT_SIZE, GFP_KERNEL); 570 if (!rsp) 571 return -ENOMEM; 572 573 netdev = netdev_get_by_index_lock_ops_compat(genl_info_net(info), 574 ifindex); 575 if (netdev) { 576 err = netdev_nl_queue_fill(rsp, netdev, q_id, q_type, info); 577 netdev_unlock_ops_compat(netdev); 578 } else { 579 err = -ENODEV; 580 } 581 582 if (err) 583 goto err_free_msg; 584 585 return genlmsg_reply(rsp, info); 586 587 err_free_msg: 588 nlmsg_free(rsp); 589 return err; 590 } 591 592 static int 593 netdev_nl_queue_dump_one(struct net_device *netdev, struct sk_buff *rsp, 594 const struct genl_info *info, 595 struct netdev_nl_dump_ctx *ctx) 596 { 597 int err = 0; 598 599 if (!netdev->up) 600 return err; 601 602 for (; ctx->rxq_idx < netdev->real_num_rx_queues; ctx->rxq_idx++) { 603 err = netdev_nl_queue_fill_one(rsp, netdev, ctx->rxq_idx, 604 NETDEV_QUEUE_TYPE_RX, info); 605 if (err) 606 return err; 607 } 608 for (; ctx->txq_idx < netdev->real_num_tx_queues; ctx->txq_idx++) { 609 err = netdev_nl_queue_fill_one(rsp, netdev, ctx->txq_idx, 610 NETDEV_QUEUE_TYPE_TX, info); 611 if (err) 612 return err; 613 } 614 615 return err; 616 } 617 618 int netdev_nl_queue_get_dumpit(struct sk_buff *skb, struct netlink_callback *cb) 619 { 620 struct netdev_nl_dump_ctx *ctx = netdev_dump_ctx(cb); 621 const struct genl_info *info = genl_info_dump(cb); 622 struct net *net = sock_net(skb->sk); 623 struct net_device *netdev; 624 u32 ifindex = 0; 625 int err = 0; 626 627 if (info->attrs[NETDEV_A_QUEUE_IFINDEX]) 628 ifindex = nla_get_u32(info->attrs[NETDEV_A_QUEUE_IFINDEX]); 629 630 if (ifindex) { 631 netdev = netdev_get_by_index_lock_ops_compat(net, ifindex); 632 if (netdev) { 633 err = netdev_nl_queue_dump_one(netdev, skb, info, ctx); 634 netdev_unlock_ops_compat(netdev); 635 } else { 636 err = -ENODEV; 637 } 638 } else { 639 for_each_netdev_lock_ops_compat_scoped(net, netdev, 640 ctx->ifindex) { 641 err = netdev_nl_queue_dump_one(netdev, skb, info, ctx); 642 if (err < 0) 643 break; 644 ctx->rxq_idx = 0; 645 ctx->txq_idx = 0; 646 } 647 } 648 649 return err; 650 } 651 652 #define NETDEV_STAT_NOT_SET (~0ULL) 653 654 static void netdev_nl_stats_add(void *_sum, const void *_add, size_t size) 655 { 656 const u64 *add = _add; 657 u64 *sum = _sum; 658 659 while (size) { 660 if (*add != NETDEV_STAT_NOT_SET && *sum != NETDEV_STAT_NOT_SET) 661 *sum += *add; 662 sum++; 663 add++; 664 size -= 8; 665 } 666 } 667 668 static int netdev_stat_put(struct sk_buff *rsp, unsigned int attr_id, u64 value) 669 { 670 if (value == NETDEV_STAT_NOT_SET) 671 return 0; 672 return nla_put_uint(rsp, attr_id, value); 673 } 674 675 static int 676 netdev_nl_stats_write_rx(struct sk_buff *rsp, struct netdev_queue_stats_rx *rx) 677 { 678 if (netdev_stat_put(rsp, NETDEV_A_QSTATS_RX_PACKETS, rx->packets) || 679 netdev_stat_put(rsp, NETDEV_A_QSTATS_RX_BYTES, rx->bytes) || 680 netdev_stat_put(rsp, NETDEV_A_QSTATS_RX_ALLOC_FAIL, rx->alloc_fail) || 681 netdev_stat_put(rsp, NETDEV_A_QSTATS_RX_HW_DROPS, rx->hw_drops) || 682 netdev_stat_put(rsp, NETDEV_A_QSTATS_RX_HW_DROP_OVERRUNS, rx->hw_drop_overruns) || 683 netdev_stat_put(rsp, NETDEV_A_QSTATS_RX_CSUM_COMPLETE, rx->csum_complete) || 684 netdev_stat_put(rsp, NETDEV_A_QSTATS_RX_CSUM_UNNECESSARY, rx->csum_unnecessary) || 685 netdev_stat_put(rsp, NETDEV_A_QSTATS_RX_CSUM_NONE, rx->csum_none) || 686 netdev_stat_put(rsp, NETDEV_A_QSTATS_RX_CSUM_BAD, rx->csum_bad) || 687 netdev_stat_put(rsp, NETDEV_A_QSTATS_RX_HW_GRO_PACKETS, rx->hw_gro_packets) || 688 netdev_stat_put(rsp, NETDEV_A_QSTATS_RX_HW_GRO_BYTES, rx->hw_gro_bytes) || 689 netdev_stat_put(rsp, NETDEV_A_QSTATS_RX_HW_GRO_WIRE_PACKETS, rx->hw_gro_wire_packets) || 690 netdev_stat_put(rsp, NETDEV_A_QSTATS_RX_HW_GRO_WIRE_BYTES, rx->hw_gro_wire_bytes) || 691 netdev_stat_put(rsp, NETDEV_A_QSTATS_RX_HW_DROP_RATELIMITS, rx->hw_drop_ratelimits)) 692 return -EMSGSIZE; 693 return 0; 694 } 695 696 static int 697 netdev_nl_stats_write_tx(struct sk_buff *rsp, struct netdev_queue_stats_tx *tx) 698 { 699 if (netdev_stat_put(rsp, NETDEV_A_QSTATS_TX_PACKETS, tx->packets) || 700 netdev_stat_put(rsp, NETDEV_A_QSTATS_TX_BYTES, tx->bytes) || 701 netdev_stat_put(rsp, NETDEV_A_QSTATS_TX_HW_DROPS, tx->hw_drops) || 702 netdev_stat_put(rsp, NETDEV_A_QSTATS_TX_HW_DROP_ERRORS, tx->hw_drop_errors) || 703 netdev_stat_put(rsp, NETDEV_A_QSTATS_TX_CSUM_NONE, tx->csum_none) || 704 netdev_stat_put(rsp, NETDEV_A_QSTATS_TX_NEEDS_CSUM, tx->needs_csum) || 705 netdev_stat_put(rsp, NETDEV_A_QSTATS_TX_HW_GSO_PACKETS, tx->hw_gso_packets) || 706 netdev_stat_put(rsp, NETDEV_A_QSTATS_TX_HW_GSO_BYTES, tx->hw_gso_bytes) || 707 netdev_stat_put(rsp, NETDEV_A_QSTATS_TX_HW_GSO_WIRE_PACKETS, tx->hw_gso_wire_packets) || 708 netdev_stat_put(rsp, NETDEV_A_QSTATS_TX_HW_GSO_WIRE_BYTES, tx->hw_gso_wire_bytes) || 709 netdev_stat_put(rsp, NETDEV_A_QSTATS_TX_HW_DROP_RATELIMITS, tx->hw_drop_ratelimits) || 710 netdev_stat_put(rsp, NETDEV_A_QSTATS_TX_STOP, tx->stop) || 711 netdev_stat_put(rsp, NETDEV_A_QSTATS_TX_WAKE, tx->wake)) 712 return -EMSGSIZE; 713 return 0; 714 } 715 716 static int 717 netdev_nl_stats_queue(struct net_device *netdev, struct sk_buff *rsp, 718 u32 q_type, int i, const struct genl_info *info) 719 { 720 const struct netdev_stat_ops *ops = netdev->stat_ops; 721 struct netdev_queue_stats_rx rx; 722 struct netdev_queue_stats_tx tx; 723 void *hdr; 724 725 hdr = genlmsg_iput(rsp, info); 726 if (!hdr) 727 return -EMSGSIZE; 728 if (nla_put_u32(rsp, NETDEV_A_QSTATS_IFINDEX, netdev->ifindex) || 729 nla_put_u32(rsp, NETDEV_A_QSTATS_QUEUE_TYPE, q_type) || 730 nla_put_u32(rsp, NETDEV_A_QSTATS_QUEUE_ID, i)) 731 goto nla_put_failure; 732 733 switch (q_type) { 734 case NETDEV_QUEUE_TYPE_RX: 735 memset(&rx, 0xff, sizeof(rx)); 736 ops->get_queue_stats_rx(netdev, i, &rx); 737 if (!memchr_inv(&rx, 0xff, sizeof(rx))) 738 goto nla_cancel; 739 if (netdev_nl_stats_write_rx(rsp, &rx)) 740 goto nla_put_failure; 741 break; 742 case NETDEV_QUEUE_TYPE_TX: 743 memset(&tx, 0xff, sizeof(tx)); 744 ops->get_queue_stats_tx(netdev, i, &tx); 745 if (!memchr_inv(&tx, 0xff, sizeof(tx))) 746 goto nla_cancel; 747 if (netdev_nl_stats_write_tx(rsp, &tx)) 748 goto nla_put_failure; 749 break; 750 } 751 752 genlmsg_end(rsp, hdr); 753 return 0; 754 755 nla_cancel: 756 genlmsg_cancel(rsp, hdr); 757 return 0; 758 nla_put_failure: 759 genlmsg_cancel(rsp, hdr); 760 return -EMSGSIZE; 761 } 762 763 static int 764 netdev_nl_stats_by_queue(struct net_device *netdev, struct sk_buff *rsp, 765 const struct genl_info *info, 766 struct netdev_nl_dump_ctx *ctx) 767 { 768 const struct netdev_stat_ops *ops = netdev->stat_ops; 769 int i, err; 770 771 if (!(netdev->flags & IFF_UP)) 772 return 0; 773 774 i = ctx->rxq_idx; 775 while (ops->get_queue_stats_rx && i < netdev->real_num_rx_queues) { 776 err = netdev_nl_stats_queue(netdev, rsp, NETDEV_QUEUE_TYPE_RX, 777 i, info); 778 if (err) 779 return err; 780 ctx->rxq_idx = ++i; 781 } 782 i = ctx->txq_idx; 783 while (ops->get_queue_stats_tx && i < netdev->real_num_tx_queues) { 784 err = netdev_nl_stats_queue(netdev, rsp, NETDEV_QUEUE_TYPE_TX, 785 i, info); 786 if (err) 787 return err; 788 ctx->txq_idx = ++i; 789 } 790 791 ctx->rxq_idx = 0; 792 ctx->txq_idx = 0; 793 return 0; 794 } 795 796 /** 797 * netdev_stat_queue_sum() - add up queue stats from range of queues 798 * @netdev: net_device 799 * @rx_start: index of the first Rx queue to query 800 * @rx_end: index after the last Rx queue (first *not* to query) 801 * @rx_sum: output Rx stats, should be already initialized 802 * @tx_start: index of the first Tx queue to query 803 * @tx_end: index after the last Tx queue (first *not* to query) 804 * @tx_sum: output Tx stats, should be already initialized 805 * 806 * Add stats from [start, end) range of queue IDs to *x_sum structs. 807 * The sum structs must be already initialized. Usually this 808 * helper is invoked from the .get_base_stats callbacks of drivers 809 * to account for stats of disabled queues. In that case the ranges 810 * are usually [netdev->real_num_*x_queues, netdev->num_*x_queues). 811 */ 812 void netdev_stat_queue_sum(struct net_device *netdev, 813 int rx_start, int rx_end, 814 struct netdev_queue_stats_rx *rx_sum, 815 int tx_start, int tx_end, 816 struct netdev_queue_stats_tx *tx_sum) 817 { 818 const struct netdev_stat_ops *ops; 819 struct netdev_queue_stats_rx rx; 820 struct netdev_queue_stats_tx tx; 821 int i; 822 823 ops = netdev->stat_ops; 824 825 for (i = rx_start; i < rx_end; i++) { 826 memset(&rx, 0xff, sizeof(rx)); 827 if (ops->get_queue_stats_rx) 828 ops->get_queue_stats_rx(netdev, i, &rx); 829 netdev_nl_stats_add(rx_sum, &rx, sizeof(rx)); 830 } 831 for (i = tx_start; i < tx_end; i++) { 832 memset(&tx, 0xff, sizeof(tx)); 833 if (ops->get_queue_stats_tx) 834 ops->get_queue_stats_tx(netdev, i, &tx); 835 netdev_nl_stats_add(tx_sum, &tx, sizeof(tx)); 836 } 837 } 838 EXPORT_SYMBOL(netdev_stat_queue_sum); 839 840 static int 841 netdev_nl_stats_by_netdev(struct net_device *netdev, struct sk_buff *rsp, 842 const struct genl_info *info) 843 { 844 struct netdev_queue_stats_rx rx_sum; 845 struct netdev_queue_stats_tx tx_sum; 846 void *hdr; 847 848 /* Netdev can't guarantee any complete counters */ 849 if (!netdev->stat_ops->get_base_stats) 850 return 0; 851 852 memset(&rx_sum, 0xff, sizeof(rx_sum)); 853 memset(&tx_sum, 0xff, sizeof(tx_sum)); 854 855 netdev->stat_ops->get_base_stats(netdev, &rx_sum, &tx_sum); 856 857 /* The op was there, but nothing reported, don't bother */ 858 if (!memchr_inv(&rx_sum, 0xff, sizeof(rx_sum)) && 859 !memchr_inv(&tx_sum, 0xff, sizeof(tx_sum))) 860 return 0; 861 862 hdr = genlmsg_iput(rsp, info); 863 if (!hdr) 864 return -EMSGSIZE; 865 if (nla_put_u32(rsp, NETDEV_A_QSTATS_IFINDEX, netdev->ifindex)) 866 goto nla_put_failure; 867 868 netdev_stat_queue_sum(netdev, 0, netdev->real_num_rx_queues, &rx_sum, 869 0, netdev->real_num_tx_queues, &tx_sum); 870 871 if (netdev_nl_stats_write_rx(rsp, &rx_sum) || 872 netdev_nl_stats_write_tx(rsp, &tx_sum)) 873 goto nla_put_failure; 874 875 genlmsg_end(rsp, hdr); 876 return 0; 877 878 nla_put_failure: 879 genlmsg_cancel(rsp, hdr); 880 return -EMSGSIZE; 881 } 882 883 static int 884 netdev_nl_qstats_get_dump_one(struct net_device *netdev, unsigned int scope, 885 struct sk_buff *skb, const struct genl_info *info, 886 struct netdev_nl_dump_ctx *ctx) 887 { 888 if (!netdev->stat_ops) 889 return 0; 890 891 switch (scope) { 892 case 0: 893 return netdev_nl_stats_by_netdev(netdev, skb, info); 894 case NETDEV_QSTATS_SCOPE_QUEUE: 895 return netdev_nl_stats_by_queue(netdev, skb, info, ctx); 896 } 897 898 return -EINVAL; /* Should not happen, per netlink policy */ 899 } 900 901 int netdev_nl_qstats_get_dumpit(struct sk_buff *skb, 902 struct netlink_callback *cb) 903 { 904 struct netdev_nl_dump_ctx *ctx = netdev_dump_ctx(cb); 905 const struct genl_info *info = genl_info_dump(cb); 906 struct net *net = sock_net(skb->sk); 907 struct net_device *netdev; 908 unsigned int ifindex; 909 unsigned int scope; 910 int err = 0; 911 912 scope = 0; 913 if (info->attrs[NETDEV_A_QSTATS_SCOPE]) 914 scope = nla_get_uint(info->attrs[NETDEV_A_QSTATS_SCOPE]); 915 916 ifindex = 0; 917 if (info->attrs[NETDEV_A_QSTATS_IFINDEX]) 918 ifindex = nla_get_u32(info->attrs[NETDEV_A_QSTATS_IFINDEX]); 919 920 if (ifindex) { 921 netdev = netdev_get_by_index_lock_ops_compat(net, ifindex); 922 if (!netdev) { 923 NL_SET_BAD_ATTR(info->extack, 924 info->attrs[NETDEV_A_QSTATS_IFINDEX]); 925 return -ENODEV; 926 } 927 if (netdev->stat_ops) { 928 err = netdev_nl_qstats_get_dump_one(netdev, scope, skb, 929 info, ctx); 930 } else { 931 NL_SET_BAD_ATTR(info->extack, 932 info->attrs[NETDEV_A_QSTATS_IFINDEX]); 933 err = -EOPNOTSUPP; 934 } 935 netdev_unlock_ops_compat(netdev); 936 return err; 937 } 938 939 for_each_netdev_lock_ops_compat_scoped(net, netdev, ctx->ifindex) { 940 err = netdev_nl_qstats_get_dump_one(netdev, scope, skb, 941 info, ctx); 942 if (err < 0) 943 break; 944 } 945 946 return err; 947 } 948 949 static int netdev_nl_read_rxq_bitmap(struct genl_info *info, 950 u32 rxq_bitmap_len, 951 unsigned long *rxq_bitmap) 952 { 953 const int maxtype = ARRAY_SIZE(netdev_queue_id_nl_policy) - 1; 954 struct nlattr *tb[ARRAY_SIZE(netdev_queue_id_nl_policy)]; 955 struct nlattr *attr; 956 int rem, err = 0; 957 u32 rxq_idx; 958 959 nla_for_each_attr_type(attr, NETDEV_A_DMABUF_QUEUES, 960 genlmsg_data(info->genlhdr), 961 genlmsg_len(info->genlhdr), rem) { 962 err = nla_parse_nested(tb, maxtype, attr, 963 netdev_queue_id_nl_policy, info->extack); 964 if (err < 0) 965 return err; 966 967 if (NL_REQ_ATTR_CHECK(info->extack, attr, tb, NETDEV_A_QUEUE_ID) || 968 NL_REQ_ATTR_CHECK(info->extack, attr, tb, NETDEV_A_QUEUE_TYPE)) 969 return -EINVAL; 970 971 if (nla_get_u32(tb[NETDEV_A_QUEUE_TYPE]) != NETDEV_QUEUE_TYPE_RX) { 972 NL_SET_BAD_ATTR(info->extack, tb[NETDEV_A_QUEUE_TYPE]); 973 return -EINVAL; 974 } 975 976 rxq_idx = nla_get_u32(tb[NETDEV_A_QUEUE_ID]); 977 if (rxq_idx >= rxq_bitmap_len) { 978 NL_SET_BAD_ATTR(info->extack, tb[NETDEV_A_QUEUE_ID]); 979 return -EINVAL; 980 } 981 982 bitmap_set(rxq_bitmap, rxq_idx, 1); 983 } 984 985 return 0; 986 } 987 988 static struct device * 989 netdev_nl_get_dma_dev(struct net_device *netdev, unsigned long *rxq_bitmap, 990 struct netlink_ext_ack *extack) 991 { 992 struct device *dma_dev = NULL; 993 u32 rxq_idx, prev_rxq_idx; 994 995 for_each_set_bit(rxq_idx, rxq_bitmap, netdev->real_num_rx_queues) { 996 struct device *rxq_dma_dev; 997 998 rxq_dma_dev = netdev_queue_get_dma_dev(netdev, rxq_idx, 999 NETDEV_QUEUE_TYPE_RX); 1000 if (dma_dev && rxq_dma_dev != dma_dev) { 1001 NL_SET_ERR_MSG_FMT(extack, "DMA device mismatch between queue %u and %u (multi-PF device?)", 1002 rxq_idx, prev_rxq_idx); 1003 return ERR_PTR(-EOPNOTSUPP); 1004 } 1005 1006 dma_dev = rxq_dma_dev; 1007 prev_rxq_idx = rxq_idx; 1008 } 1009 1010 return dma_dev; 1011 } 1012 1013 int netdev_nl_bind_rx_doit(struct sk_buff *skb, struct genl_info *info) 1014 { 1015 struct net_devmem_dmabuf_binding *binding; 1016 u32 ifindex, dmabuf_fd, rxq_idx; 1017 struct netdev_nl_sock *priv; 1018 struct net_device *netdev; 1019 unsigned long *rxq_bitmap; 1020 struct device *dma_dev; 1021 struct sk_buff *rsp; 1022 int err = 0; 1023 void *hdr; 1024 1025 if (GENL_REQ_ATTR_CHECK(info, NETDEV_A_DEV_IFINDEX) || 1026 GENL_REQ_ATTR_CHECK(info, NETDEV_A_DMABUF_FD) || 1027 GENL_REQ_ATTR_CHECK(info, NETDEV_A_DMABUF_QUEUES)) 1028 return -EINVAL; 1029 1030 ifindex = nla_get_u32(info->attrs[NETDEV_A_DEV_IFINDEX]); 1031 dmabuf_fd = nla_get_u32(info->attrs[NETDEV_A_DMABUF_FD]); 1032 1033 priv = genl_sk_priv_get(&netdev_nl_family, NETLINK_CB(skb).sk); 1034 if (IS_ERR(priv)) 1035 return PTR_ERR(priv); 1036 1037 rsp = genlmsg_new(GENLMSG_DEFAULT_SIZE, GFP_KERNEL); 1038 if (!rsp) 1039 return -ENOMEM; 1040 1041 hdr = genlmsg_iput(rsp, info); 1042 if (!hdr) { 1043 err = -EMSGSIZE; 1044 goto err_genlmsg_free; 1045 } 1046 1047 mutex_lock(&priv->lock); 1048 1049 err = 0; 1050 netdev = netdev_get_by_index_lock(genl_info_net(info), ifindex); 1051 if (!netdev) { 1052 err = -ENODEV; 1053 goto err_unlock_sock; 1054 } 1055 if (!netif_device_present(netdev)) 1056 err = -ENODEV; 1057 else if (!netdev_need_ops_lock(netdev)) 1058 err = -EOPNOTSUPP; 1059 if (err) { 1060 NL_SET_BAD_ATTR(info->extack, 1061 info->attrs[NETDEV_A_DEV_IFINDEX]); 1062 goto err_unlock; 1063 } 1064 1065 rxq_bitmap = bitmap_zalloc(netdev->real_num_rx_queues, GFP_KERNEL); 1066 if (!rxq_bitmap) { 1067 err = -ENOMEM; 1068 goto err_unlock; 1069 } 1070 1071 err = netdev_nl_read_rxq_bitmap(info, netdev->real_num_rx_queues, 1072 rxq_bitmap); 1073 if (err) 1074 goto err_rxq_bitmap; 1075 1076 dma_dev = netdev_nl_get_dma_dev(netdev, rxq_bitmap, info->extack); 1077 if (IS_ERR(dma_dev)) { 1078 err = PTR_ERR(dma_dev); 1079 goto err_rxq_bitmap; 1080 } 1081 1082 binding = net_devmem_bind_dmabuf(netdev, NULL, dma_dev, DMA_FROM_DEVICE, 1083 dmabuf_fd, priv, info->extack); 1084 if (IS_ERR(binding)) { 1085 err = PTR_ERR(binding); 1086 goto err_rxq_bitmap; 1087 } 1088 1089 for_each_set_bit(rxq_idx, rxq_bitmap, netdev->real_num_rx_queues) { 1090 err = net_devmem_bind_dmabuf_to_queue(netdev, rxq_idx, binding, 1091 info->extack); 1092 if (err) 1093 goto err_unbind; 1094 } 1095 1096 nla_put_u32(rsp, NETDEV_A_DMABUF_ID, binding->id); 1097 genlmsg_end(rsp, hdr); 1098 1099 err = genlmsg_reply(rsp, info); 1100 1101 bitmap_free(rxq_bitmap); 1102 1103 netdev_unlock(netdev); 1104 1105 mutex_unlock(&priv->lock); 1106 1107 return err < 0 ? err : 0; 1108 1109 err_unbind: 1110 net_devmem_unbind_dmabuf(binding); 1111 err_rxq_bitmap: 1112 bitmap_free(rxq_bitmap); 1113 err_unlock: 1114 netdev_unlock(netdev); 1115 err_unlock_sock: 1116 mutex_unlock(&priv->lock); 1117 err_genlmsg_free: 1118 nlmsg_free(rsp); 1119 return err; 1120 } 1121 1122 /* Find the DMA-capable device for a netmem TX binding. 1123 * 1124 * For NETMEM_TX_DMA devices, return the device itself. 1125 * For NETMEM_TX_NO_DMA devices, walk leased RX queues to find the underlying 1126 * physical device and return it. 1127 */ 1128 static struct net_device * 1129 netdev_find_netmem_tx_dev(struct net_device *dev) 1130 { 1131 struct netdev_rx_queue *lease_rxq; 1132 struct net_device *phys_dev; 1133 int i; 1134 1135 if (dev->netmem_tx == NETMEM_TX_DMA) 1136 return dev; 1137 1138 if (dev->netmem_tx != NETMEM_TX_NO_DMA) 1139 return NULL; 1140 1141 for (i = 0; i < dev->real_num_rx_queues; i++) { 1142 lease_rxq = READ_ONCE(__netif_get_rx_queue(dev, i)->lease); 1143 if (!lease_rxq) 1144 continue; 1145 1146 phys_dev = lease_rxq->dev; 1147 if (netif_device_present(phys_dev) && 1148 phys_dev->netmem_tx == NETMEM_TX_DMA) 1149 return phys_dev; 1150 } 1151 1152 return NULL; 1153 } 1154 1155 int netdev_nl_bind_tx_doit(struct sk_buff *skb, struct genl_info *info) 1156 { 1157 struct net_devmem_dmabuf_binding *binding; 1158 struct net_device *bind_dev; 1159 struct netdev_nl_sock *priv; 1160 struct net_device *netdev; 1161 struct device *dma_dev; 1162 u32 ifindex, dmabuf_fd; 1163 struct sk_buff *rsp; 1164 int err = 0; 1165 void *hdr; 1166 1167 if (GENL_REQ_ATTR_CHECK(info, NETDEV_A_DEV_IFINDEX) || 1168 GENL_REQ_ATTR_CHECK(info, NETDEV_A_DMABUF_FD)) 1169 return -EINVAL; 1170 1171 ifindex = nla_get_u32(info->attrs[NETDEV_A_DEV_IFINDEX]); 1172 dmabuf_fd = nla_get_u32(info->attrs[NETDEV_A_DMABUF_FD]); 1173 1174 priv = genl_sk_priv_get(&netdev_nl_family, NETLINK_CB(skb).sk); 1175 if (IS_ERR(priv)) 1176 return PTR_ERR(priv); 1177 1178 rsp = genlmsg_new(GENLMSG_DEFAULT_SIZE, GFP_KERNEL); 1179 if (!rsp) 1180 return -ENOMEM; 1181 1182 hdr = genlmsg_iput(rsp, info); 1183 if (!hdr) { 1184 err = -EMSGSIZE; 1185 goto err_genlmsg_free; 1186 } 1187 1188 mutex_lock(&priv->lock); 1189 1190 netdev = netdev_get_by_index_lock(genl_info_net(info), ifindex); 1191 if (!netdev) { 1192 err = -ENODEV; 1193 goto err_unlock_sock; 1194 } 1195 1196 if (!netif_device_present(netdev)) { 1197 err = -ENODEV; 1198 goto err_unlock_netdev; 1199 } 1200 1201 if (netdev->netmem_tx == NETMEM_TX_NONE) { 1202 err = -EOPNOTSUPP; 1203 NL_SET_ERR_MSG(info->extack, 1204 "Driver does not support netmem TX"); 1205 goto err_unlock_netdev; 1206 } 1207 1208 bind_dev = netdev_find_netmem_tx_dev(netdev); 1209 if (!bind_dev) { 1210 err = -EOPNOTSUPP; 1211 NL_SET_ERR_MSG(info->extack, 1212 "No DMA-capable device found for netmem TX"); 1213 goto err_unlock_netdev; 1214 } 1215 1216 if (bind_dev != netdev) 1217 netdev_lock(bind_dev); 1218 1219 dma_dev = netdev_queue_get_dma_dev(bind_dev, 0, NETDEV_QUEUE_TYPE_TX); 1220 1221 binding = net_devmem_bind_dmabuf(bind_dev, 1222 bind_dev != netdev ? netdev : NULL, 1223 dma_dev, DMA_TO_DEVICE, dmabuf_fd, 1224 priv, info->extack); 1225 if (IS_ERR(binding)) { 1226 err = PTR_ERR(binding); 1227 goto err_unlock_bind_dev; 1228 } 1229 1230 nla_put_u32(rsp, NETDEV_A_DMABUF_ID, binding->id); 1231 genlmsg_end(rsp, hdr); 1232 1233 if (bind_dev != netdev) 1234 netdev_unlock(bind_dev); 1235 netdev_unlock(netdev); 1236 mutex_unlock(&priv->lock); 1237 1238 return genlmsg_reply(rsp, info); 1239 1240 err_unlock_bind_dev: 1241 if (bind_dev != netdev) 1242 netdev_unlock(bind_dev); 1243 err_unlock_netdev: 1244 netdev_unlock(netdev); 1245 err_unlock_sock: 1246 mutex_unlock(&priv->lock); 1247 err_genlmsg_free: 1248 nlmsg_free(rsp); 1249 return err; 1250 } 1251 1252 int netdev_nl_queue_create_doit(struct sk_buff *skb, struct genl_info *info) 1253 { 1254 const int qmaxtype = ARRAY_SIZE(netdev_queue_id_nl_policy) - 1; 1255 const int lmaxtype = ARRAY_SIZE(netdev_lease_nl_policy) - 1; 1256 int err, ifindex, ifindex_lease, queue_id, queue_id_lease; 1257 struct nlattr *qtb[ARRAY_SIZE(netdev_queue_id_nl_policy)]; 1258 struct nlattr *ltb[ARRAY_SIZE(netdev_lease_nl_policy)]; 1259 struct netdev_rx_queue *rxq, *rxq_lease; 1260 struct net_device *dev, *dev_lease; 1261 netdevice_tracker dev_tracker; 1262 s32 netns_lease = -1; 1263 struct nlattr *nest; 1264 struct sk_buff *rsp; 1265 struct net *net; 1266 void *hdr; 1267 1268 if (GENL_REQ_ATTR_CHECK(info, NETDEV_A_QUEUE_IFINDEX) || 1269 GENL_REQ_ATTR_CHECK(info, NETDEV_A_QUEUE_TYPE) || 1270 GENL_REQ_ATTR_CHECK(info, NETDEV_A_QUEUE_LEASE)) 1271 return -EINVAL; 1272 if (nla_get_u32(info->attrs[NETDEV_A_QUEUE_TYPE]) != 1273 NETDEV_QUEUE_TYPE_RX) { 1274 NL_SET_BAD_ATTR(info->extack, info->attrs[NETDEV_A_QUEUE_TYPE]); 1275 return -EINVAL; 1276 } 1277 1278 ifindex = nla_get_u32(info->attrs[NETDEV_A_QUEUE_IFINDEX]); 1279 1280 nest = info->attrs[NETDEV_A_QUEUE_LEASE]; 1281 err = nla_parse_nested(ltb, lmaxtype, nest, 1282 netdev_lease_nl_policy, info->extack); 1283 if (err < 0) 1284 return err; 1285 if (NL_REQ_ATTR_CHECK(info->extack, nest, ltb, NETDEV_A_LEASE_IFINDEX) || 1286 NL_REQ_ATTR_CHECK(info->extack, nest, ltb, NETDEV_A_LEASE_QUEUE)) 1287 return -EINVAL; 1288 if (ltb[NETDEV_A_LEASE_NETNS_ID]) { 1289 if (!capable(CAP_NET_ADMIN)) 1290 return -EPERM; 1291 netns_lease = nla_get_s32(ltb[NETDEV_A_LEASE_NETNS_ID]); 1292 } 1293 1294 ifindex_lease = nla_get_u32(ltb[NETDEV_A_LEASE_IFINDEX]); 1295 1296 nest = ltb[NETDEV_A_LEASE_QUEUE]; 1297 err = nla_parse_nested(qtb, qmaxtype, nest, 1298 netdev_queue_id_nl_policy, info->extack); 1299 if (err < 0) 1300 return err; 1301 if (NL_REQ_ATTR_CHECK(info->extack, nest, qtb, NETDEV_A_QUEUE_ID) || 1302 NL_REQ_ATTR_CHECK(info->extack, nest, qtb, NETDEV_A_QUEUE_TYPE)) 1303 return -EINVAL; 1304 if (nla_get_u32(qtb[NETDEV_A_QUEUE_TYPE]) != NETDEV_QUEUE_TYPE_RX) { 1305 NL_SET_BAD_ATTR(info->extack, qtb[NETDEV_A_QUEUE_TYPE]); 1306 return -EINVAL; 1307 } 1308 1309 queue_id_lease = nla_get_u32(qtb[NETDEV_A_QUEUE_ID]); 1310 1311 rsp = genlmsg_new(GENLMSG_DEFAULT_SIZE, GFP_KERNEL); 1312 if (!rsp) 1313 return -ENOMEM; 1314 1315 hdr = genlmsg_iput(rsp, info); 1316 if (!hdr) { 1317 err = -EMSGSIZE; 1318 goto err_genlmsg_free; 1319 } 1320 1321 /* Locking order is always from the virtual to the physical device 1322 * since this is also the same order when applications open the 1323 * memory provider later on. 1324 */ 1325 dev = netdev_get_by_index_lock(genl_info_net(info), ifindex); 1326 if (!dev) { 1327 err = -ENODEV; 1328 goto err_genlmsg_free; 1329 } 1330 if (!netdev_can_create_queue(dev, info->extack)) { 1331 err = -EINVAL; 1332 goto err_unlock_dev; 1333 } 1334 1335 net = genl_info_net(info); 1336 if (netns_lease >= 0) { 1337 net = get_net_ns_by_id(net, netns_lease); 1338 if (!net) { 1339 err = -ENONET; 1340 goto err_unlock_dev; 1341 } 1342 } 1343 1344 dev_lease = netdev_get_by_index(net, ifindex_lease, &dev_tracker, 1345 GFP_KERNEL); 1346 if (!dev_lease) { 1347 err = -ENODEV; 1348 goto err_put_netns; 1349 } 1350 if (!netdev_can_lease_queue(dev_lease, info->extack)) { 1351 netdev_put(dev_lease, &dev_tracker); 1352 err = -EINVAL; 1353 goto err_put_netns; 1354 } 1355 1356 dev_lease = netdev_put_lock(dev_lease, net, &dev_tracker); 1357 if (!dev_lease) { 1358 err = -ENODEV; 1359 goto err_put_netns; 1360 } 1361 if (queue_id_lease >= dev_lease->real_num_rx_queues) { 1362 err = -ERANGE; 1363 NL_SET_BAD_ATTR(info->extack, qtb[NETDEV_A_QUEUE_ID]); 1364 goto err_unlock_dev_lease; 1365 } 1366 if (netdev_queue_busy(dev_lease, queue_id_lease, NETDEV_QUEUE_TYPE_RX, 1367 info->extack)) { 1368 err = -EBUSY; 1369 goto err_unlock_dev_lease; 1370 } 1371 1372 rxq_lease = __netif_get_rx_queue(dev_lease, queue_id_lease); 1373 rxq = __netif_get_rx_queue(dev, dev->real_num_rx_queues - 1); 1374 1375 /* Leasing queues from different physical devices is currently 1376 * not supported. Capabilities such as XDP features and DMA 1377 * device may differ between physical devices, and computing 1378 * a correct intersection for the virtual device is not yet 1379 * implemented. 1380 */ 1381 if (rxq->lease && rxq->lease->dev != dev_lease) { 1382 err = -EOPNOTSUPP; 1383 NL_SET_ERR_MSG(info->extack, 1384 "Leasing queues from different devices not supported"); 1385 goto err_unlock_dev_lease; 1386 } 1387 1388 queue_id = dev->queue_mgmt_ops->ndo_queue_create(dev, info->extack); 1389 if (queue_id < 0) { 1390 err = queue_id; 1391 goto err_unlock_dev_lease; 1392 } 1393 rxq = __netif_get_rx_queue(dev, queue_id); 1394 1395 netdev_rx_queue_lease(rxq, rxq_lease); 1396 1397 nla_put_u32(rsp, NETDEV_A_QUEUE_ID, queue_id); 1398 genlmsg_end(rsp, hdr); 1399 1400 netdev_unlock(dev_lease); 1401 netdev_unlock(dev); 1402 if (netns_lease >= 0) 1403 put_net(net); 1404 1405 return genlmsg_reply(rsp, info); 1406 1407 err_unlock_dev_lease: 1408 netdev_unlock(dev_lease); 1409 err_put_netns: 1410 if (netns_lease >= 0) 1411 put_net(net); 1412 err_unlock_dev: 1413 netdev_unlock(dev); 1414 err_genlmsg_free: 1415 nlmsg_free(rsp); 1416 return err; 1417 } 1418 1419 void netdev_nl_sock_priv_init(struct netdev_nl_sock *priv) 1420 { 1421 INIT_LIST_HEAD(&priv->bindings); 1422 mutex_init(&priv->lock); 1423 } 1424 1425 void netdev_nl_sock_priv_destroy(struct netdev_nl_sock *priv) 1426 { 1427 struct net_devmem_dmabuf_binding *binding; 1428 struct net_devmem_dmabuf_binding *temp; 1429 netdevice_tracker dev_tracker; 1430 struct net_device *dev; 1431 1432 mutex_lock(&priv->lock); 1433 list_for_each_entry_safe(binding, temp, &priv->bindings, list) { 1434 mutex_lock(&binding->lock); 1435 dev = binding->dev; 1436 if (!dev) { 1437 mutex_unlock(&binding->lock); 1438 net_devmem_unbind_dmabuf(binding); 1439 continue; 1440 } 1441 netdev_hold(dev, &dev_tracker, GFP_KERNEL); 1442 mutex_unlock(&binding->lock); 1443 1444 netdev_lock(dev); 1445 net_devmem_unbind_dmabuf(binding); 1446 netdev_unlock(dev); 1447 netdev_put(dev, &dev_tracker); 1448 } 1449 mutex_unlock(&priv->lock); 1450 } 1451 1452 static int netdev_genl_netdevice_event(struct notifier_block *nb, 1453 unsigned long event, void *ptr) 1454 { 1455 struct net_device *netdev = netdev_notifier_info_to_dev(ptr); 1456 1457 switch (event) { 1458 case NETDEV_REGISTER: 1459 netdev_lock_ops_to_full(netdev); 1460 netdev_genl_dev_notify(netdev, NETDEV_CMD_DEV_ADD_NTF); 1461 netdev_unlock_full_to_ops(netdev); 1462 break; 1463 case NETDEV_UNREGISTER: 1464 netdev_lock(netdev); 1465 netdev_genl_dev_notify(netdev, NETDEV_CMD_DEV_DEL_NTF); 1466 netdev_unlock(netdev); 1467 break; 1468 case NETDEV_XDP_FEAT_CHANGE: 1469 netdev_genl_dev_notify(netdev, NETDEV_CMD_DEV_CHANGE_NTF); 1470 break; 1471 } 1472 1473 return NOTIFY_OK; 1474 } 1475 1476 static struct notifier_block netdev_genl_nb = { 1477 .notifier_call = netdev_genl_netdevice_event, 1478 }; 1479 1480 static int __init netdev_genl_init(void) 1481 { 1482 int err; 1483 1484 err = register_netdevice_notifier(&netdev_genl_nb); 1485 if (err) 1486 return err; 1487 1488 err = genl_register_family(&netdev_nl_family); 1489 if (err) 1490 goto err_unreg_ntf; 1491 1492 return 0; 1493 1494 err_unreg_ntf: 1495 unregister_netdevice_notifier(&netdev_genl_nb); 1496 return err; 1497 } 1498 1499 subsys_initcall(netdev_genl_init); 1500