1 // SPDX-License-Identifier: (GPL-2.0 OR MIT) 2 /* Google virtual Ethernet (gve) driver 3 * 4 * Copyright (C) 2015-2021 Google, Inc. 5 */ 6 7 #include <linux/bpf.h> 8 #include <linux/cpumask.h> 9 #include <linux/etherdevice.h> 10 #include <linux/filter.h> 11 #include <linux/interrupt.h> 12 #include <linux/module.h> 13 #include <linux/pci.h> 14 #include <linux/sched.h> 15 #include <linux/timer.h> 16 #include <linux/workqueue.h> 17 #include <linux/utsname.h> 18 #include <linux/version.h> 19 #include <net/sch_generic.h> 20 #include <net/xdp_sock_drv.h> 21 #include "gve.h" 22 #include "gve_dqo.h" 23 #include "gve_adminq.h" 24 #include "gve_register.h" 25 26 #define GVE_DEFAULT_RX_COPYBREAK (256) 27 28 #define DEFAULT_MSG_LEVEL (NETIF_MSG_DRV | NETIF_MSG_LINK) 29 #define GVE_VERSION "1.0.0" 30 #define GVE_VERSION_PREFIX "GVE-" 31 32 // Minimum amount of time between queue kicks in msec (10 seconds) 33 #define MIN_TX_TIMEOUT_GAP (1000 * 10) 34 35 char gve_driver_name[] = "gve"; 36 const char gve_version_str[] = GVE_VERSION; 37 static const char gve_version_prefix[] = GVE_VERSION_PREFIX; 38 39 static int gve_verify_driver_compatibility(struct gve_priv *priv) 40 { 41 int err; 42 struct gve_driver_info *driver_info; 43 dma_addr_t driver_info_bus; 44 45 driver_info = dma_alloc_coherent(&priv->pdev->dev, 46 sizeof(struct gve_driver_info), 47 &driver_info_bus, GFP_KERNEL); 48 if (!driver_info) 49 return -ENOMEM; 50 51 *driver_info = (struct gve_driver_info) { 52 .os_type = 1, /* Linux */ 53 .os_version_major = cpu_to_be32(LINUX_VERSION_MAJOR), 54 .os_version_minor = cpu_to_be32(LINUX_VERSION_SUBLEVEL), 55 .os_version_sub = cpu_to_be32(LINUX_VERSION_PATCHLEVEL), 56 .driver_capability_flags = { 57 cpu_to_be64(GVE_DRIVER_CAPABILITY_FLAGS1), 58 cpu_to_be64(GVE_DRIVER_CAPABILITY_FLAGS2), 59 cpu_to_be64(GVE_DRIVER_CAPABILITY_FLAGS3), 60 cpu_to_be64(GVE_DRIVER_CAPABILITY_FLAGS4), 61 }, 62 }; 63 strscpy(driver_info->os_version_str1, utsname()->release, 64 sizeof(driver_info->os_version_str1)); 65 strscpy(driver_info->os_version_str2, utsname()->version, 66 sizeof(driver_info->os_version_str2)); 67 68 err = gve_adminq_verify_driver_compatibility(priv, 69 sizeof(struct gve_driver_info), 70 driver_info_bus); 71 72 /* It's ok if the device doesn't support this */ 73 if (err == -EOPNOTSUPP) 74 err = 0; 75 76 dma_free_coherent(&priv->pdev->dev, 77 sizeof(struct gve_driver_info), 78 driver_info, driver_info_bus); 79 return err; 80 } 81 82 static netdev_features_t gve_features_check(struct sk_buff *skb, 83 struct net_device *dev, 84 netdev_features_t features) 85 { 86 struct gve_priv *priv = netdev_priv(dev); 87 88 if (!gve_is_gqi(priv)) 89 return gve_features_check_dqo(skb, dev, features); 90 91 return features; 92 } 93 94 static netdev_tx_t gve_start_xmit(struct sk_buff *skb, struct net_device *dev) 95 { 96 struct gve_priv *priv = netdev_priv(dev); 97 98 if (gve_is_gqi(priv)) 99 return gve_tx(skb, dev); 100 else 101 return gve_tx_dqo(skb, dev); 102 } 103 104 static void gve_get_stats(struct net_device *dev, struct rtnl_link_stats64 *s) 105 { 106 struct gve_priv *priv = netdev_priv(dev); 107 unsigned int start; 108 u64 packets, bytes; 109 int num_tx_queues; 110 int ring; 111 112 num_tx_queues = gve_num_tx_queues(priv); 113 if (priv->rx) { 114 for (ring = 0; ring < priv->rx_cfg.num_queues; ring++) { 115 do { 116 start = 117 u64_stats_fetch_begin(&priv->rx[ring].statss); 118 packets = priv->rx[ring].rpackets; 119 bytes = priv->rx[ring].rbytes; 120 } while (u64_stats_fetch_retry(&priv->rx[ring].statss, 121 start)); 122 s->rx_packets += packets; 123 s->rx_bytes += bytes; 124 } 125 } 126 if (priv->tx) { 127 for (ring = 0; ring < num_tx_queues; ring++) { 128 do { 129 start = 130 u64_stats_fetch_begin(&priv->tx[ring].statss); 131 packets = priv->tx[ring].pkt_done; 132 bytes = priv->tx[ring].bytes_done; 133 } while (u64_stats_fetch_retry(&priv->tx[ring].statss, 134 start)); 135 s->tx_packets += packets; 136 s->tx_bytes += bytes; 137 } 138 } 139 } 140 141 static int gve_alloc_counter_array(struct gve_priv *priv) 142 { 143 priv->counter_array = 144 dma_alloc_coherent(&priv->pdev->dev, 145 priv->num_event_counters * 146 sizeof(*priv->counter_array), 147 &priv->counter_array_bus, GFP_KERNEL); 148 if (!priv->counter_array) 149 return -ENOMEM; 150 151 return 0; 152 } 153 154 static void gve_free_counter_array(struct gve_priv *priv) 155 { 156 if (!priv->counter_array) 157 return; 158 159 dma_free_coherent(&priv->pdev->dev, 160 priv->num_event_counters * 161 sizeof(*priv->counter_array), 162 priv->counter_array, priv->counter_array_bus); 163 priv->counter_array = NULL; 164 } 165 166 /* NIC requests to report stats */ 167 static void gve_stats_report_task(struct work_struct *work) 168 { 169 struct gve_priv *priv = container_of(work, struct gve_priv, 170 stats_report_task); 171 if (gve_get_do_report_stats(priv)) { 172 gve_handle_report_stats(priv); 173 gve_clear_do_report_stats(priv); 174 } 175 } 176 177 static void gve_stats_report_schedule(struct gve_priv *priv) 178 { 179 if (!gve_get_probe_in_progress(priv) && 180 !gve_get_reset_in_progress(priv)) { 181 gve_set_do_report_stats(priv); 182 queue_work(priv->gve_wq, &priv->stats_report_task); 183 } 184 } 185 186 static void gve_stats_report_timer(struct timer_list *t) 187 { 188 struct gve_priv *priv = from_timer(priv, t, stats_report_timer); 189 190 mod_timer(&priv->stats_report_timer, 191 round_jiffies(jiffies + 192 msecs_to_jiffies(priv->stats_report_timer_period))); 193 gve_stats_report_schedule(priv); 194 } 195 196 static int gve_alloc_stats_report(struct gve_priv *priv) 197 { 198 int tx_stats_num, rx_stats_num; 199 200 tx_stats_num = (GVE_TX_STATS_REPORT_NUM + NIC_TX_STATS_REPORT_NUM) * 201 gve_num_tx_queues(priv); 202 rx_stats_num = (GVE_RX_STATS_REPORT_NUM + NIC_RX_STATS_REPORT_NUM) * 203 priv->rx_cfg.num_queues; 204 priv->stats_report_len = struct_size(priv->stats_report, stats, 205 size_add(tx_stats_num, rx_stats_num)); 206 priv->stats_report = 207 dma_alloc_coherent(&priv->pdev->dev, priv->stats_report_len, 208 &priv->stats_report_bus, GFP_KERNEL); 209 if (!priv->stats_report) 210 return -ENOMEM; 211 /* Set up timer for the report-stats task */ 212 timer_setup(&priv->stats_report_timer, gve_stats_report_timer, 0); 213 priv->stats_report_timer_period = GVE_STATS_REPORT_TIMER_PERIOD; 214 return 0; 215 } 216 217 static void gve_free_stats_report(struct gve_priv *priv) 218 { 219 if (!priv->stats_report) 220 return; 221 222 del_timer_sync(&priv->stats_report_timer); 223 dma_free_coherent(&priv->pdev->dev, priv->stats_report_len, 224 priv->stats_report, priv->stats_report_bus); 225 priv->stats_report = NULL; 226 } 227 228 static irqreturn_t gve_mgmnt_intr(int irq, void *arg) 229 { 230 struct gve_priv *priv = arg; 231 232 queue_work(priv->gve_wq, &priv->service_task); 233 return IRQ_HANDLED; 234 } 235 236 static irqreturn_t gve_intr(int irq, void *arg) 237 { 238 struct gve_notify_block *block = arg; 239 struct gve_priv *priv = block->priv; 240 241 iowrite32be(GVE_IRQ_MASK, gve_irq_doorbell(priv, block)); 242 napi_schedule_irqoff(&block->napi); 243 return IRQ_HANDLED; 244 } 245 246 static irqreturn_t gve_intr_dqo(int irq, void *arg) 247 { 248 struct gve_notify_block *block = arg; 249 250 /* Interrupts are automatically masked */ 251 napi_schedule_irqoff(&block->napi); 252 return IRQ_HANDLED; 253 } 254 255 static int gve_napi_poll(struct napi_struct *napi, int budget) 256 { 257 struct gve_notify_block *block; 258 __be32 __iomem *irq_doorbell; 259 bool reschedule = false; 260 struct gve_priv *priv; 261 int work_done = 0; 262 263 block = container_of(napi, struct gve_notify_block, napi); 264 priv = block->priv; 265 266 if (block->tx) { 267 if (block->tx->q_num < priv->tx_cfg.num_queues) 268 reschedule |= gve_tx_poll(block, budget); 269 else if (budget) 270 reschedule |= gve_xdp_poll(block, budget); 271 } 272 273 if (!budget) 274 return 0; 275 276 if (block->rx) { 277 work_done = gve_rx_poll(block, budget); 278 reschedule |= work_done == budget; 279 } 280 281 if (reschedule) 282 return budget; 283 284 /* Complete processing - don't unmask irq if busy polling is enabled */ 285 if (likely(napi_complete_done(napi, work_done))) { 286 irq_doorbell = gve_irq_doorbell(priv, block); 287 iowrite32be(GVE_IRQ_ACK | GVE_IRQ_EVENT, irq_doorbell); 288 289 /* Ensure IRQ ACK is visible before we check pending work. 290 * If queue had issued updates, it would be truly visible. 291 */ 292 mb(); 293 294 if (block->tx) 295 reschedule |= gve_tx_clean_pending(priv, block->tx); 296 if (block->rx) 297 reschedule |= gve_rx_work_pending(block->rx); 298 299 if (reschedule && napi_schedule(napi)) 300 iowrite32be(GVE_IRQ_MASK, irq_doorbell); 301 } 302 return work_done; 303 } 304 305 static int gve_napi_poll_dqo(struct napi_struct *napi, int budget) 306 { 307 struct gve_notify_block *block = 308 container_of(napi, struct gve_notify_block, napi); 309 struct gve_priv *priv = block->priv; 310 bool reschedule = false; 311 int work_done = 0; 312 313 if (block->tx) 314 reschedule |= gve_tx_poll_dqo(block, /*do_clean=*/true); 315 316 if (!budget) 317 return 0; 318 319 if (block->rx) { 320 work_done = gve_rx_poll_dqo(block, budget); 321 reschedule |= work_done == budget; 322 } 323 324 if (reschedule) 325 return budget; 326 327 if (likely(napi_complete_done(napi, work_done))) { 328 /* Enable interrupts again. 329 * 330 * We don't need to repoll afterwards because HW supports the 331 * PCI MSI-X PBA feature. 332 * 333 * Another interrupt would be triggered if a new event came in 334 * since the last one. 335 */ 336 gve_write_irq_doorbell_dqo(priv, block, 337 GVE_ITR_NO_UPDATE_DQO | GVE_ITR_ENABLE_BIT_DQO); 338 } 339 340 return work_done; 341 } 342 343 static int gve_alloc_notify_blocks(struct gve_priv *priv) 344 { 345 int num_vecs_requested = priv->num_ntfy_blks + 1; 346 unsigned int active_cpus; 347 int vecs_enabled; 348 int i, j; 349 int err; 350 351 priv->msix_vectors = kvcalloc(num_vecs_requested, 352 sizeof(*priv->msix_vectors), GFP_KERNEL); 353 if (!priv->msix_vectors) 354 return -ENOMEM; 355 for (i = 0; i < num_vecs_requested; i++) 356 priv->msix_vectors[i].entry = i; 357 vecs_enabled = pci_enable_msix_range(priv->pdev, priv->msix_vectors, 358 GVE_MIN_MSIX, num_vecs_requested); 359 if (vecs_enabled < 0) { 360 dev_err(&priv->pdev->dev, "Could not enable min msix %d/%d\n", 361 GVE_MIN_MSIX, vecs_enabled); 362 err = vecs_enabled; 363 goto abort_with_msix_vectors; 364 } 365 if (vecs_enabled != num_vecs_requested) { 366 int new_num_ntfy_blks = (vecs_enabled - 1) & ~0x1; 367 int vecs_per_type = new_num_ntfy_blks / 2; 368 int vecs_left = new_num_ntfy_blks % 2; 369 370 priv->num_ntfy_blks = new_num_ntfy_blks; 371 priv->mgmt_msix_idx = priv->num_ntfy_blks; 372 priv->tx_cfg.max_queues = min_t(int, priv->tx_cfg.max_queues, 373 vecs_per_type); 374 priv->rx_cfg.max_queues = min_t(int, priv->rx_cfg.max_queues, 375 vecs_per_type + vecs_left); 376 dev_err(&priv->pdev->dev, 377 "Could not enable desired msix, only enabled %d, adjusting tx max queues to %d, and rx max queues to %d\n", 378 vecs_enabled, priv->tx_cfg.max_queues, 379 priv->rx_cfg.max_queues); 380 if (priv->tx_cfg.num_queues > priv->tx_cfg.max_queues) 381 priv->tx_cfg.num_queues = priv->tx_cfg.max_queues; 382 if (priv->rx_cfg.num_queues > priv->rx_cfg.max_queues) 383 priv->rx_cfg.num_queues = priv->rx_cfg.max_queues; 384 } 385 /* Half the notification blocks go to TX and half to RX */ 386 active_cpus = min_t(int, priv->num_ntfy_blks / 2, num_online_cpus()); 387 388 /* Setup Management Vector - the last vector */ 389 snprintf(priv->mgmt_msix_name, sizeof(priv->mgmt_msix_name), "gve-mgmnt@pci:%s", 390 pci_name(priv->pdev)); 391 err = request_irq(priv->msix_vectors[priv->mgmt_msix_idx].vector, 392 gve_mgmnt_intr, 0, priv->mgmt_msix_name, priv); 393 if (err) { 394 dev_err(&priv->pdev->dev, "Did not receive management vector.\n"); 395 goto abort_with_msix_enabled; 396 } 397 priv->irq_db_indices = 398 dma_alloc_coherent(&priv->pdev->dev, 399 priv->num_ntfy_blks * 400 sizeof(*priv->irq_db_indices), 401 &priv->irq_db_indices_bus, GFP_KERNEL); 402 if (!priv->irq_db_indices) { 403 err = -ENOMEM; 404 goto abort_with_mgmt_vector; 405 } 406 407 priv->ntfy_blocks = kvzalloc(priv->num_ntfy_blks * 408 sizeof(*priv->ntfy_blocks), GFP_KERNEL); 409 if (!priv->ntfy_blocks) { 410 err = -ENOMEM; 411 goto abort_with_irq_db_indices; 412 } 413 414 /* Setup the other blocks - the first n-1 vectors */ 415 for (i = 0; i < priv->num_ntfy_blks; i++) { 416 struct gve_notify_block *block = &priv->ntfy_blocks[i]; 417 int msix_idx = i; 418 419 snprintf(block->name, sizeof(block->name), "gve-ntfy-blk%d@pci:%s", 420 i, pci_name(priv->pdev)); 421 block->priv = priv; 422 err = request_irq(priv->msix_vectors[msix_idx].vector, 423 gve_is_gqi(priv) ? gve_intr : gve_intr_dqo, 424 0, block->name, block); 425 if (err) { 426 dev_err(&priv->pdev->dev, 427 "Failed to receive msix vector %d\n", i); 428 goto abort_with_some_ntfy_blocks; 429 } 430 irq_set_affinity_hint(priv->msix_vectors[msix_idx].vector, 431 get_cpu_mask(i % active_cpus)); 432 block->irq_db_index = &priv->irq_db_indices[i].index; 433 } 434 return 0; 435 abort_with_some_ntfy_blocks: 436 for (j = 0; j < i; j++) { 437 struct gve_notify_block *block = &priv->ntfy_blocks[j]; 438 int msix_idx = j; 439 440 irq_set_affinity_hint(priv->msix_vectors[msix_idx].vector, 441 NULL); 442 free_irq(priv->msix_vectors[msix_idx].vector, block); 443 } 444 kvfree(priv->ntfy_blocks); 445 priv->ntfy_blocks = NULL; 446 abort_with_irq_db_indices: 447 dma_free_coherent(&priv->pdev->dev, priv->num_ntfy_blks * 448 sizeof(*priv->irq_db_indices), 449 priv->irq_db_indices, priv->irq_db_indices_bus); 450 priv->irq_db_indices = NULL; 451 abort_with_mgmt_vector: 452 free_irq(priv->msix_vectors[priv->mgmt_msix_idx].vector, priv); 453 abort_with_msix_enabled: 454 pci_disable_msix(priv->pdev); 455 abort_with_msix_vectors: 456 kvfree(priv->msix_vectors); 457 priv->msix_vectors = NULL; 458 return err; 459 } 460 461 static void gve_free_notify_blocks(struct gve_priv *priv) 462 { 463 int i; 464 465 if (!priv->msix_vectors) 466 return; 467 468 /* Free the irqs */ 469 for (i = 0; i < priv->num_ntfy_blks; i++) { 470 struct gve_notify_block *block = &priv->ntfy_blocks[i]; 471 int msix_idx = i; 472 473 irq_set_affinity_hint(priv->msix_vectors[msix_idx].vector, 474 NULL); 475 free_irq(priv->msix_vectors[msix_idx].vector, block); 476 } 477 free_irq(priv->msix_vectors[priv->mgmt_msix_idx].vector, priv); 478 kvfree(priv->ntfy_blocks); 479 priv->ntfy_blocks = NULL; 480 dma_free_coherent(&priv->pdev->dev, priv->num_ntfy_blks * 481 sizeof(*priv->irq_db_indices), 482 priv->irq_db_indices, priv->irq_db_indices_bus); 483 priv->irq_db_indices = NULL; 484 pci_disable_msix(priv->pdev); 485 kvfree(priv->msix_vectors); 486 priv->msix_vectors = NULL; 487 } 488 489 static int gve_setup_device_resources(struct gve_priv *priv) 490 { 491 int err; 492 493 err = gve_alloc_counter_array(priv); 494 if (err) 495 return err; 496 err = gve_alloc_notify_blocks(priv); 497 if (err) 498 goto abort_with_counter; 499 err = gve_alloc_stats_report(priv); 500 if (err) 501 goto abort_with_ntfy_blocks; 502 err = gve_adminq_configure_device_resources(priv, 503 priv->counter_array_bus, 504 priv->num_event_counters, 505 priv->irq_db_indices_bus, 506 priv->num_ntfy_blks); 507 if (unlikely(err)) { 508 dev_err(&priv->pdev->dev, 509 "could not setup device_resources: err=%d\n", err); 510 err = -ENXIO; 511 goto abort_with_stats_report; 512 } 513 514 if (!gve_is_gqi(priv)) { 515 priv->ptype_lut_dqo = kvzalloc(sizeof(*priv->ptype_lut_dqo), 516 GFP_KERNEL); 517 if (!priv->ptype_lut_dqo) { 518 err = -ENOMEM; 519 goto abort_with_stats_report; 520 } 521 err = gve_adminq_get_ptype_map_dqo(priv, priv->ptype_lut_dqo); 522 if (err) { 523 dev_err(&priv->pdev->dev, 524 "Failed to get ptype map: err=%d\n", err); 525 goto abort_with_ptype_lut; 526 } 527 } 528 529 err = gve_adminq_report_stats(priv, priv->stats_report_len, 530 priv->stats_report_bus, 531 GVE_STATS_REPORT_TIMER_PERIOD); 532 if (err) 533 dev_err(&priv->pdev->dev, 534 "Failed to report stats: err=%d\n", err); 535 gve_set_device_resources_ok(priv); 536 return 0; 537 538 abort_with_ptype_lut: 539 kvfree(priv->ptype_lut_dqo); 540 priv->ptype_lut_dqo = NULL; 541 abort_with_stats_report: 542 gve_free_stats_report(priv); 543 abort_with_ntfy_blocks: 544 gve_free_notify_blocks(priv); 545 abort_with_counter: 546 gve_free_counter_array(priv); 547 548 return err; 549 } 550 551 static void gve_trigger_reset(struct gve_priv *priv); 552 553 static void gve_teardown_device_resources(struct gve_priv *priv) 554 { 555 int err; 556 557 /* Tell device its resources are being freed */ 558 if (gve_get_device_resources_ok(priv)) { 559 /* detach the stats report */ 560 err = gve_adminq_report_stats(priv, 0, 0x0, GVE_STATS_REPORT_TIMER_PERIOD); 561 if (err) { 562 dev_err(&priv->pdev->dev, 563 "Failed to detach stats report: err=%d\n", err); 564 gve_trigger_reset(priv); 565 } 566 err = gve_adminq_deconfigure_device_resources(priv); 567 if (err) { 568 dev_err(&priv->pdev->dev, 569 "Could not deconfigure device resources: err=%d\n", 570 err); 571 gve_trigger_reset(priv); 572 } 573 } 574 575 kvfree(priv->ptype_lut_dqo); 576 priv->ptype_lut_dqo = NULL; 577 578 gve_free_counter_array(priv); 579 gve_free_notify_blocks(priv); 580 gve_free_stats_report(priv); 581 gve_clear_device_resources_ok(priv); 582 } 583 584 static void gve_add_napi(struct gve_priv *priv, int ntfy_idx, 585 int (*gve_poll)(struct napi_struct *, int)) 586 { 587 struct gve_notify_block *block = &priv->ntfy_blocks[ntfy_idx]; 588 589 netif_napi_add(priv->dev, &block->napi, gve_poll); 590 } 591 592 static void gve_remove_napi(struct gve_priv *priv, int ntfy_idx) 593 { 594 struct gve_notify_block *block = &priv->ntfy_blocks[ntfy_idx]; 595 596 netif_napi_del(&block->napi); 597 } 598 599 static int gve_register_xdp_qpls(struct gve_priv *priv) 600 { 601 int start_id; 602 int err; 603 int i; 604 605 start_id = gve_tx_qpl_id(priv, gve_xdp_tx_start_queue_id(priv)); 606 for (i = start_id; i < start_id + gve_num_xdp_qpls(priv); i++) { 607 err = gve_adminq_register_page_list(priv, &priv->qpls[i]); 608 if (err) { 609 netif_err(priv, drv, priv->dev, 610 "failed to register queue page list %d\n", 611 priv->qpls[i].id); 612 /* This failure will trigger a reset - no need to clean 613 * up 614 */ 615 return err; 616 } 617 } 618 return 0; 619 } 620 621 static int gve_register_qpls(struct gve_priv *priv) 622 { 623 int start_id; 624 int err; 625 int i; 626 627 start_id = gve_tx_start_qpl_id(priv); 628 for (i = start_id; i < start_id + gve_num_tx_qpls(priv); i++) { 629 err = gve_adminq_register_page_list(priv, &priv->qpls[i]); 630 if (err) { 631 netif_err(priv, drv, priv->dev, 632 "failed to register queue page list %d\n", 633 priv->qpls[i].id); 634 /* This failure will trigger a reset - no need to clean 635 * up 636 */ 637 return err; 638 } 639 } 640 641 start_id = gve_rx_start_qpl_id(priv); 642 for (i = start_id; i < start_id + gve_num_rx_qpls(priv); i++) { 643 err = gve_adminq_register_page_list(priv, &priv->qpls[i]); 644 if (err) { 645 netif_err(priv, drv, priv->dev, 646 "failed to register queue page list %d\n", 647 priv->qpls[i].id); 648 /* This failure will trigger a reset - no need to clean 649 * up 650 */ 651 return err; 652 } 653 } 654 return 0; 655 } 656 657 static int gve_unregister_xdp_qpls(struct gve_priv *priv) 658 { 659 int start_id; 660 int err; 661 int i; 662 663 start_id = gve_tx_qpl_id(priv, gve_xdp_tx_start_queue_id(priv)); 664 for (i = start_id; i < start_id + gve_num_xdp_qpls(priv); i++) { 665 err = gve_adminq_unregister_page_list(priv, priv->qpls[i].id); 666 /* This failure will trigger a reset - no need to clean up */ 667 if (err) { 668 netif_err(priv, drv, priv->dev, 669 "Failed to unregister queue page list %d\n", 670 priv->qpls[i].id); 671 return err; 672 } 673 } 674 return 0; 675 } 676 677 static int gve_unregister_qpls(struct gve_priv *priv) 678 { 679 int start_id; 680 int err; 681 int i; 682 683 start_id = gve_tx_start_qpl_id(priv); 684 for (i = start_id; i < start_id + gve_num_tx_qpls(priv); i++) { 685 err = gve_adminq_unregister_page_list(priv, priv->qpls[i].id); 686 /* This failure will trigger a reset - no need to clean up */ 687 if (err) { 688 netif_err(priv, drv, priv->dev, 689 "Failed to unregister queue page list %d\n", 690 priv->qpls[i].id); 691 return err; 692 } 693 } 694 695 start_id = gve_rx_start_qpl_id(priv); 696 for (i = start_id; i < start_id + gve_num_rx_qpls(priv); i++) { 697 err = gve_adminq_unregister_page_list(priv, priv->qpls[i].id); 698 /* This failure will trigger a reset - no need to clean up */ 699 if (err) { 700 netif_err(priv, drv, priv->dev, 701 "Failed to unregister queue page list %d\n", 702 priv->qpls[i].id); 703 return err; 704 } 705 } 706 return 0; 707 } 708 709 static int gve_create_xdp_rings(struct gve_priv *priv) 710 { 711 int err; 712 713 err = gve_adminq_create_tx_queues(priv, 714 gve_xdp_tx_start_queue_id(priv), 715 priv->num_xdp_queues); 716 if (err) { 717 netif_err(priv, drv, priv->dev, "failed to create %d XDP tx queues\n", 718 priv->num_xdp_queues); 719 /* This failure will trigger a reset - no need to clean 720 * up 721 */ 722 return err; 723 } 724 netif_dbg(priv, drv, priv->dev, "created %d XDP tx queues\n", 725 priv->num_xdp_queues); 726 727 return 0; 728 } 729 730 static int gve_create_rings(struct gve_priv *priv) 731 { 732 int num_tx_queues = gve_num_tx_queues(priv); 733 int err; 734 int i; 735 736 err = gve_adminq_create_tx_queues(priv, 0, num_tx_queues); 737 if (err) { 738 netif_err(priv, drv, priv->dev, "failed to create %d tx queues\n", 739 num_tx_queues); 740 /* This failure will trigger a reset - no need to clean 741 * up 742 */ 743 return err; 744 } 745 netif_dbg(priv, drv, priv->dev, "created %d tx queues\n", 746 num_tx_queues); 747 748 err = gve_adminq_create_rx_queues(priv, priv->rx_cfg.num_queues); 749 if (err) { 750 netif_err(priv, drv, priv->dev, "failed to create %d rx queues\n", 751 priv->rx_cfg.num_queues); 752 /* This failure will trigger a reset - no need to clean 753 * up 754 */ 755 return err; 756 } 757 netif_dbg(priv, drv, priv->dev, "created %d rx queues\n", 758 priv->rx_cfg.num_queues); 759 760 if (gve_is_gqi(priv)) { 761 /* Rx data ring has been prefilled with packet buffers at queue 762 * allocation time. 763 * 764 * Write the doorbell to provide descriptor slots and packet 765 * buffers to the NIC. 766 */ 767 for (i = 0; i < priv->rx_cfg.num_queues; i++) 768 gve_rx_write_doorbell(priv, &priv->rx[i]); 769 } else { 770 for (i = 0; i < priv->rx_cfg.num_queues; i++) { 771 /* Post buffers and ring doorbell. */ 772 gve_rx_post_buffers_dqo(&priv->rx[i]); 773 } 774 } 775 776 return 0; 777 } 778 779 static void add_napi_init_xdp_sync_stats(struct gve_priv *priv, 780 int (*napi_poll)(struct napi_struct *napi, 781 int budget)) 782 { 783 int start_id = gve_xdp_tx_start_queue_id(priv); 784 int i; 785 786 /* Add xdp tx napi & init sync stats*/ 787 for (i = start_id; i < start_id + priv->num_xdp_queues; i++) { 788 int ntfy_idx = gve_tx_idx_to_ntfy(priv, i); 789 790 u64_stats_init(&priv->tx[i].statss); 791 priv->tx[i].ntfy_id = ntfy_idx; 792 gve_add_napi(priv, ntfy_idx, napi_poll); 793 } 794 } 795 796 static void add_napi_init_sync_stats(struct gve_priv *priv, 797 int (*napi_poll)(struct napi_struct *napi, 798 int budget)) 799 { 800 int i; 801 802 /* Add tx napi & init sync stats*/ 803 for (i = 0; i < gve_num_tx_queues(priv); i++) { 804 int ntfy_idx = gve_tx_idx_to_ntfy(priv, i); 805 806 u64_stats_init(&priv->tx[i].statss); 807 priv->tx[i].ntfy_id = ntfy_idx; 808 gve_add_napi(priv, ntfy_idx, napi_poll); 809 } 810 /* Add rx napi & init sync stats*/ 811 for (i = 0; i < priv->rx_cfg.num_queues; i++) { 812 int ntfy_idx = gve_rx_idx_to_ntfy(priv, i); 813 814 u64_stats_init(&priv->rx[i].statss); 815 priv->rx[i].ntfy_id = ntfy_idx; 816 gve_add_napi(priv, ntfy_idx, napi_poll); 817 } 818 } 819 820 static void gve_tx_free_rings(struct gve_priv *priv, int start_id, int num_rings) 821 { 822 if (gve_is_gqi(priv)) { 823 gve_tx_free_rings_gqi(priv, start_id, num_rings); 824 } else { 825 gve_tx_free_rings_dqo(priv); 826 } 827 } 828 829 static int gve_alloc_xdp_rings(struct gve_priv *priv) 830 { 831 int start_id; 832 int err = 0; 833 834 if (!priv->num_xdp_queues) 835 return 0; 836 837 start_id = gve_xdp_tx_start_queue_id(priv); 838 err = gve_tx_alloc_rings(priv, start_id, priv->num_xdp_queues); 839 if (err) 840 return err; 841 add_napi_init_xdp_sync_stats(priv, gve_napi_poll); 842 843 return 0; 844 } 845 846 static int gve_alloc_rings(struct gve_priv *priv) 847 { 848 int err; 849 850 /* Setup tx rings */ 851 priv->tx = kvcalloc(priv->tx_cfg.max_queues, sizeof(*priv->tx), 852 GFP_KERNEL); 853 if (!priv->tx) 854 return -ENOMEM; 855 856 if (gve_is_gqi(priv)) 857 err = gve_tx_alloc_rings(priv, 0, gve_num_tx_queues(priv)); 858 else 859 err = gve_tx_alloc_rings_dqo(priv); 860 if (err) 861 goto free_tx; 862 863 /* Setup rx rings */ 864 priv->rx = kvcalloc(priv->rx_cfg.max_queues, sizeof(*priv->rx), 865 GFP_KERNEL); 866 if (!priv->rx) { 867 err = -ENOMEM; 868 goto free_tx_queue; 869 } 870 871 if (gve_is_gqi(priv)) 872 err = gve_rx_alloc_rings(priv); 873 else 874 err = gve_rx_alloc_rings_dqo(priv); 875 if (err) 876 goto free_rx; 877 878 if (gve_is_gqi(priv)) 879 add_napi_init_sync_stats(priv, gve_napi_poll); 880 else 881 add_napi_init_sync_stats(priv, gve_napi_poll_dqo); 882 883 return 0; 884 885 free_rx: 886 kvfree(priv->rx); 887 priv->rx = NULL; 888 free_tx_queue: 889 gve_tx_free_rings(priv, 0, gve_num_tx_queues(priv)); 890 free_tx: 891 kvfree(priv->tx); 892 priv->tx = NULL; 893 return err; 894 } 895 896 static int gve_destroy_xdp_rings(struct gve_priv *priv) 897 { 898 int start_id; 899 int err; 900 901 start_id = gve_xdp_tx_start_queue_id(priv); 902 err = gve_adminq_destroy_tx_queues(priv, 903 start_id, 904 priv->num_xdp_queues); 905 if (err) { 906 netif_err(priv, drv, priv->dev, 907 "failed to destroy XDP queues\n"); 908 /* This failure will trigger a reset - no need to clean up */ 909 return err; 910 } 911 netif_dbg(priv, drv, priv->dev, "destroyed XDP queues\n"); 912 913 return 0; 914 } 915 916 static int gve_destroy_rings(struct gve_priv *priv) 917 { 918 int num_tx_queues = gve_num_tx_queues(priv); 919 int err; 920 921 err = gve_adminq_destroy_tx_queues(priv, 0, num_tx_queues); 922 if (err) { 923 netif_err(priv, drv, priv->dev, 924 "failed to destroy tx queues\n"); 925 /* This failure will trigger a reset - no need to clean up */ 926 return err; 927 } 928 netif_dbg(priv, drv, priv->dev, "destroyed tx queues\n"); 929 err = gve_adminq_destroy_rx_queues(priv, priv->rx_cfg.num_queues); 930 if (err) { 931 netif_err(priv, drv, priv->dev, 932 "failed to destroy rx queues\n"); 933 /* This failure will trigger a reset - no need to clean up */ 934 return err; 935 } 936 netif_dbg(priv, drv, priv->dev, "destroyed rx queues\n"); 937 return 0; 938 } 939 940 static void gve_rx_free_rings(struct gve_priv *priv) 941 { 942 if (gve_is_gqi(priv)) 943 gve_rx_free_rings_gqi(priv); 944 else 945 gve_rx_free_rings_dqo(priv); 946 } 947 948 static void gve_free_xdp_rings(struct gve_priv *priv) 949 { 950 int ntfy_idx, start_id; 951 int i; 952 953 start_id = gve_xdp_tx_start_queue_id(priv); 954 if (priv->tx) { 955 for (i = start_id; i < start_id + priv->num_xdp_queues; i++) { 956 ntfy_idx = gve_tx_idx_to_ntfy(priv, i); 957 gve_remove_napi(priv, ntfy_idx); 958 } 959 gve_tx_free_rings(priv, start_id, priv->num_xdp_queues); 960 } 961 } 962 963 static void gve_free_rings(struct gve_priv *priv) 964 { 965 int num_tx_queues = gve_num_tx_queues(priv); 966 int ntfy_idx; 967 int i; 968 969 if (priv->tx) { 970 for (i = 0; i < num_tx_queues; i++) { 971 ntfy_idx = gve_tx_idx_to_ntfy(priv, i); 972 gve_remove_napi(priv, ntfy_idx); 973 } 974 gve_tx_free_rings(priv, 0, num_tx_queues); 975 kvfree(priv->tx); 976 priv->tx = NULL; 977 } 978 if (priv->rx) { 979 for (i = 0; i < priv->rx_cfg.num_queues; i++) { 980 ntfy_idx = gve_rx_idx_to_ntfy(priv, i); 981 gve_remove_napi(priv, ntfy_idx); 982 } 983 gve_rx_free_rings(priv); 984 kvfree(priv->rx); 985 priv->rx = NULL; 986 } 987 } 988 989 int gve_alloc_page(struct gve_priv *priv, struct device *dev, 990 struct page **page, dma_addr_t *dma, 991 enum dma_data_direction dir, gfp_t gfp_flags) 992 { 993 *page = alloc_page(gfp_flags); 994 if (!*page) { 995 priv->page_alloc_fail++; 996 return -ENOMEM; 997 } 998 *dma = dma_map_page(dev, *page, 0, PAGE_SIZE, dir); 999 if (dma_mapping_error(dev, *dma)) { 1000 priv->dma_mapping_error++; 1001 put_page(*page); 1002 return -ENOMEM; 1003 } 1004 return 0; 1005 } 1006 1007 static int gve_alloc_queue_page_list(struct gve_priv *priv, u32 id, 1008 int pages) 1009 { 1010 struct gve_queue_page_list *qpl = &priv->qpls[id]; 1011 int err; 1012 int i; 1013 1014 if (pages + priv->num_registered_pages > priv->max_registered_pages) { 1015 netif_err(priv, drv, priv->dev, 1016 "Reached max number of registered pages %llu > %llu\n", 1017 pages + priv->num_registered_pages, 1018 priv->max_registered_pages); 1019 return -EINVAL; 1020 } 1021 1022 qpl->id = id; 1023 qpl->num_entries = 0; 1024 qpl->pages = kvcalloc(pages, sizeof(*qpl->pages), GFP_KERNEL); 1025 /* caller handles clean up */ 1026 if (!qpl->pages) 1027 return -ENOMEM; 1028 qpl->page_buses = kvcalloc(pages, sizeof(*qpl->page_buses), GFP_KERNEL); 1029 /* caller handles clean up */ 1030 if (!qpl->page_buses) 1031 return -ENOMEM; 1032 1033 for (i = 0; i < pages; i++) { 1034 err = gve_alloc_page(priv, &priv->pdev->dev, &qpl->pages[i], 1035 &qpl->page_buses[i], 1036 gve_qpl_dma_dir(priv, id), GFP_KERNEL); 1037 /* caller handles clean up */ 1038 if (err) 1039 return -ENOMEM; 1040 qpl->num_entries++; 1041 } 1042 priv->num_registered_pages += pages; 1043 1044 return 0; 1045 } 1046 1047 void gve_free_page(struct device *dev, struct page *page, dma_addr_t dma, 1048 enum dma_data_direction dir) 1049 { 1050 if (!dma_mapping_error(dev, dma)) 1051 dma_unmap_page(dev, dma, PAGE_SIZE, dir); 1052 if (page) 1053 put_page(page); 1054 } 1055 1056 static void gve_free_queue_page_list(struct gve_priv *priv, u32 id) 1057 { 1058 struct gve_queue_page_list *qpl = &priv->qpls[id]; 1059 int i; 1060 1061 if (!qpl->pages) 1062 return; 1063 if (!qpl->page_buses) 1064 goto free_pages; 1065 1066 for (i = 0; i < qpl->num_entries; i++) 1067 gve_free_page(&priv->pdev->dev, qpl->pages[i], 1068 qpl->page_buses[i], gve_qpl_dma_dir(priv, id)); 1069 1070 kvfree(qpl->page_buses); 1071 qpl->page_buses = NULL; 1072 free_pages: 1073 kvfree(qpl->pages); 1074 qpl->pages = NULL; 1075 priv->num_registered_pages -= qpl->num_entries; 1076 } 1077 1078 static int gve_alloc_xdp_qpls(struct gve_priv *priv) 1079 { 1080 int start_id; 1081 int i, j; 1082 int err; 1083 1084 start_id = gve_tx_qpl_id(priv, gve_xdp_tx_start_queue_id(priv)); 1085 for (i = start_id; i < start_id + gve_num_xdp_qpls(priv); i++) { 1086 err = gve_alloc_queue_page_list(priv, i, 1087 priv->tx_pages_per_qpl); 1088 if (err) 1089 goto free_qpls; 1090 } 1091 1092 return 0; 1093 1094 free_qpls: 1095 for (j = start_id; j <= i; j++) 1096 gve_free_queue_page_list(priv, j); 1097 return err; 1098 } 1099 1100 static int gve_alloc_qpls(struct gve_priv *priv) 1101 { 1102 int max_queues = priv->tx_cfg.max_queues + priv->rx_cfg.max_queues; 1103 int page_count; 1104 int start_id; 1105 int i, j; 1106 int err; 1107 1108 if (!gve_is_qpl(priv)) 1109 return 0; 1110 1111 priv->qpls = kvcalloc(max_queues, sizeof(*priv->qpls), GFP_KERNEL); 1112 if (!priv->qpls) 1113 return -ENOMEM; 1114 1115 start_id = gve_tx_start_qpl_id(priv); 1116 page_count = priv->tx_pages_per_qpl; 1117 for (i = start_id; i < start_id + gve_num_tx_qpls(priv); i++) { 1118 err = gve_alloc_queue_page_list(priv, i, 1119 page_count); 1120 if (err) 1121 goto free_qpls; 1122 } 1123 1124 start_id = gve_rx_start_qpl_id(priv); 1125 1126 /* For GQI_QPL number of pages allocated have 1:1 relationship with 1127 * number of descriptors. For DQO, number of pages required are 1128 * more than descriptors (because of out of order completions). 1129 */ 1130 page_count = priv->queue_format == GVE_GQI_QPL_FORMAT ? 1131 priv->rx_data_slot_cnt : priv->rx_pages_per_qpl; 1132 for (i = start_id; i < start_id + gve_num_rx_qpls(priv); i++) { 1133 err = gve_alloc_queue_page_list(priv, i, 1134 page_count); 1135 if (err) 1136 goto free_qpls; 1137 } 1138 1139 priv->qpl_cfg.qpl_map_size = BITS_TO_LONGS(max_queues) * 1140 sizeof(unsigned long) * BITS_PER_BYTE; 1141 priv->qpl_cfg.qpl_id_map = kvcalloc(BITS_TO_LONGS(max_queues), 1142 sizeof(unsigned long), GFP_KERNEL); 1143 if (!priv->qpl_cfg.qpl_id_map) { 1144 err = -ENOMEM; 1145 goto free_qpls; 1146 } 1147 1148 return 0; 1149 1150 free_qpls: 1151 for (j = 0; j <= i; j++) 1152 gve_free_queue_page_list(priv, j); 1153 kvfree(priv->qpls); 1154 priv->qpls = NULL; 1155 return err; 1156 } 1157 1158 static void gve_free_xdp_qpls(struct gve_priv *priv) 1159 { 1160 int start_id; 1161 int i; 1162 1163 start_id = gve_tx_qpl_id(priv, gve_xdp_tx_start_queue_id(priv)); 1164 for (i = start_id; i < start_id + gve_num_xdp_qpls(priv); i++) 1165 gve_free_queue_page_list(priv, i); 1166 } 1167 1168 static void gve_free_qpls(struct gve_priv *priv) 1169 { 1170 int max_queues = priv->tx_cfg.max_queues + priv->rx_cfg.max_queues; 1171 int i; 1172 1173 if (!priv->qpls) 1174 return; 1175 1176 kvfree(priv->qpl_cfg.qpl_id_map); 1177 priv->qpl_cfg.qpl_id_map = NULL; 1178 1179 for (i = 0; i < max_queues; i++) 1180 gve_free_queue_page_list(priv, i); 1181 1182 kvfree(priv->qpls); 1183 priv->qpls = NULL; 1184 } 1185 1186 /* Use this to schedule a reset when the device is capable of continuing 1187 * to handle other requests in its current state. If it is not, do a reset 1188 * in thread instead. 1189 */ 1190 void gve_schedule_reset(struct gve_priv *priv) 1191 { 1192 gve_set_do_reset(priv); 1193 queue_work(priv->gve_wq, &priv->service_task); 1194 } 1195 1196 static void gve_reset_and_teardown(struct gve_priv *priv, bool was_up); 1197 static int gve_reset_recovery(struct gve_priv *priv, bool was_up); 1198 static void gve_turndown(struct gve_priv *priv); 1199 static void gve_turnup(struct gve_priv *priv); 1200 1201 static int gve_reg_xdp_info(struct gve_priv *priv, struct net_device *dev) 1202 { 1203 struct napi_struct *napi; 1204 struct gve_rx_ring *rx; 1205 int err = 0; 1206 int i, j; 1207 u32 tx_qid; 1208 1209 if (!priv->num_xdp_queues) 1210 return 0; 1211 1212 for (i = 0; i < priv->rx_cfg.num_queues; i++) { 1213 rx = &priv->rx[i]; 1214 napi = &priv->ntfy_blocks[rx->ntfy_id].napi; 1215 1216 err = xdp_rxq_info_reg(&rx->xdp_rxq, dev, i, 1217 napi->napi_id); 1218 if (err) 1219 goto err; 1220 err = xdp_rxq_info_reg_mem_model(&rx->xdp_rxq, 1221 MEM_TYPE_PAGE_SHARED, NULL); 1222 if (err) 1223 goto err; 1224 rx->xsk_pool = xsk_get_pool_from_qid(dev, i); 1225 if (rx->xsk_pool) { 1226 err = xdp_rxq_info_reg(&rx->xsk_rxq, dev, i, 1227 napi->napi_id); 1228 if (err) 1229 goto err; 1230 err = xdp_rxq_info_reg_mem_model(&rx->xsk_rxq, 1231 MEM_TYPE_XSK_BUFF_POOL, NULL); 1232 if (err) 1233 goto err; 1234 xsk_pool_set_rxq_info(rx->xsk_pool, 1235 &rx->xsk_rxq); 1236 } 1237 } 1238 1239 for (i = 0; i < priv->num_xdp_queues; i++) { 1240 tx_qid = gve_xdp_tx_queue_id(priv, i); 1241 priv->tx[tx_qid].xsk_pool = xsk_get_pool_from_qid(dev, i); 1242 } 1243 return 0; 1244 1245 err: 1246 for (j = i; j >= 0; j--) { 1247 rx = &priv->rx[j]; 1248 if (xdp_rxq_info_is_reg(&rx->xdp_rxq)) 1249 xdp_rxq_info_unreg(&rx->xdp_rxq); 1250 if (xdp_rxq_info_is_reg(&rx->xsk_rxq)) 1251 xdp_rxq_info_unreg(&rx->xsk_rxq); 1252 } 1253 return err; 1254 } 1255 1256 static void gve_unreg_xdp_info(struct gve_priv *priv) 1257 { 1258 int i, tx_qid; 1259 1260 if (!priv->num_xdp_queues) 1261 return; 1262 1263 for (i = 0; i < priv->rx_cfg.num_queues; i++) { 1264 struct gve_rx_ring *rx = &priv->rx[i]; 1265 1266 xdp_rxq_info_unreg(&rx->xdp_rxq); 1267 if (rx->xsk_pool) { 1268 xdp_rxq_info_unreg(&rx->xsk_rxq); 1269 rx->xsk_pool = NULL; 1270 } 1271 } 1272 1273 for (i = 0; i < priv->num_xdp_queues; i++) { 1274 tx_qid = gve_xdp_tx_queue_id(priv, i); 1275 priv->tx[tx_qid].xsk_pool = NULL; 1276 } 1277 } 1278 1279 static void gve_drain_page_cache(struct gve_priv *priv) 1280 { 1281 struct page_frag_cache *nc; 1282 int i; 1283 1284 for (i = 0; i < priv->rx_cfg.num_queues; i++) { 1285 nc = &priv->rx[i].page_cache; 1286 if (nc->va) { 1287 __page_frag_cache_drain(virt_to_page(nc->va), 1288 nc->pagecnt_bias); 1289 nc->va = NULL; 1290 } 1291 } 1292 } 1293 1294 static int gve_open(struct net_device *dev) 1295 { 1296 struct gve_priv *priv = netdev_priv(dev); 1297 int err; 1298 1299 if (priv->xdp_prog) 1300 priv->num_xdp_queues = priv->rx_cfg.num_queues; 1301 else 1302 priv->num_xdp_queues = 0; 1303 1304 err = gve_alloc_qpls(priv); 1305 if (err) 1306 return err; 1307 1308 err = gve_alloc_rings(priv); 1309 if (err) 1310 goto free_qpls; 1311 1312 err = netif_set_real_num_tx_queues(dev, priv->tx_cfg.num_queues); 1313 if (err) 1314 goto free_rings; 1315 err = netif_set_real_num_rx_queues(dev, priv->rx_cfg.num_queues); 1316 if (err) 1317 goto free_rings; 1318 1319 err = gve_reg_xdp_info(priv, dev); 1320 if (err) 1321 goto free_rings; 1322 1323 err = gve_register_qpls(priv); 1324 if (err) 1325 goto reset; 1326 1327 if (!gve_is_gqi(priv)) { 1328 /* Hard code this for now. This may be tuned in the future for 1329 * performance. 1330 */ 1331 priv->data_buffer_size_dqo = GVE_DEFAULT_RX_BUFFER_SIZE; 1332 } 1333 err = gve_create_rings(priv); 1334 if (err) 1335 goto reset; 1336 1337 gve_set_device_rings_ok(priv); 1338 1339 if (gve_get_report_stats(priv)) 1340 mod_timer(&priv->stats_report_timer, 1341 round_jiffies(jiffies + 1342 msecs_to_jiffies(priv->stats_report_timer_period))); 1343 1344 gve_turnup(priv); 1345 queue_work(priv->gve_wq, &priv->service_task); 1346 priv->interface_up_cnt++; 1347 return 0; 1348 1349 free_rings: 1350 gve_free_rings(priv); 1351 free_qpls: 1352 gve_free_qpls(priv); 1353 return err; 1354 1355 reset: 1356 /* This must have been called from a reset due to the rtnl lock 1357 * so just return at this point. 1358 */ 1359 if (gve_get_reset_in_progress(priv)) 1360 return err; 1361 /* Otherwise reset before returning */ 1362 gve_reset_and_teardown(priv, true); 1363 /* if this fails there is nothing we can do so just ignore the return */ 1364 gve_reset_recovery(priv, false); 1365 /* return the original error */ 1366 return err; 1367 } 1368 1369 static int gve_close(struct net_device *dev) 1370 { 1371 struct gve_priv *priv = netdev_priv(dev); 1372 int err; 1373 1374 netif_carrier_off(dev); 1375 if (gve_get_device_rings_ok(priv)) { 1376 gve_turndown(priv); 1377 gve_drain_page_cache(priv); 1378 err = gve_destroy_rings(priv); 1379 if (err) 1380 goto err; 1381 err = gve_unregister_qpls(priv); 1382 if (err) 1383 goto err; 1384 gve_clear_device_rings_ok(priv); 1385 } 1386 del_timer_sync(&priv->stats_report_timer); 1387 1388 gve_unreg_xdp_info(priv); 1389 gve_free_rings(priv); 1390 gve_free_qpls(priv); 1391 priv->interface_down_cnt++; 1392 return 0; 1393 1394 err: 1395 /* This must have been called from a reset due to the rtnl lock 1396 * so just return at this point. 1397 */ 1398 if (gve_get_reset_in_progress(priv)) 1399 return err; 1400 /* Otherwise reset before returning */ 1401 gve_reset_and_teardown(priv, true); 1402 return gve_reset_recovery(priv, false); 1403 } 1404 1405 static int gve_remove_xdp_queues(struct gve_priv *priv) 1406 { 1407 int err; 1408 1409 err = gve_destroy_xdp_rings(priv); 1410 if (err) 1411 return err; 1412 1413 err = gve_unregister_xdp_qpls(priv); 1414 if (err) 1415 return err; 1416 1417 gve_unreg_xdp_info(priv); 1418 gve_free_xdp_rings(priv); 1419 gve_free_xdp_qpls(priv); 1420 priv->num_xdp_queues = 0; 1421 return 0; 1422 } 1423 1424 static int gve_add_xdp_queues(struct gve_priv *priv) 1425 { 1426 int err; 1427 1428 priv->num_xdp_queues = priv->tx_cfg.num_queues; 1429 1430 err = gve_alloc_xdp_qpls(priv); 1431 if (err) 1432 goto err; 1433 1434 err = gve_alloc_xdp_rings(priv); 1435 if (err) 1436 goto free_xdp_qpls; 1437 1438 err = gve_reg_xdp_info(priv, priv->dev); 1439 if (err) 1440 goto free_xdp_rings; 1441 1442 err = gve_register_xdp_qpls(priv); 1443 if (err) 1444 goto free_xdp_rings; 1445 1446 err = gve_create_xdp_rings(priv); 1447 if (err) 1448 goto free_xdp_rings; 1449 1450 return 0; 1451 1452 free_xdp_rings: 1453 gve_free_xdp_rings(priv); 1454 free_xdp_qpls: 1455 gve_free_xdp_qpls(priv); 1456 err: 1457 priv->num_xdp_queues = 0; 1458 return err; 1459 } 1460 1461 static void gve_handle_link_status(struct gve_priv *priv, bool link_status) 1462 { 1463 if (!gve_get_napi_enabled(priv)) 1464 return; 1465 1466 if (link_status == netif_carrier_ok(priv->dev)) 1467 return; 1468 1469 if (link_status) { 1470 netdev_info(priv->dev, "Device link is up.\n"); 1471 netif_carrier_on(priv->dev); 1472 } else { 1473 netdev_info(priv->dev, "Device link is down.\n"); 1474 netif_carrier_off(priv->dev); 1475 } 1476 } 1477 1478 static int gve_set_xdp(struct gve_priv *priv, struct bpf_prog *prog, 1479 struct netlink_ext_ack *extack) 1480 { 1481 struct bpf_prog *old_prog; 1482 int err = 0; 1483 u32 status; 1484 1485 old_prog = READ_ONCE(priv->xdp_prog); 1486 if (!netif_carrier_ok(priv->dev)) { 1487 WRITE_ONCE(priv->xdp_prog, prog); 1488 if (old_prog) 1489 bpf_prog_put(old_prog); 1490 return 0; 1491 } 1492 1493 gve_turndown(priv); 1494 if (!old_prog && prog) { 1495 // Allocate XDP TX queues if an XDP program is 1496 // being installed 1497 err = gve_add_xdp_queues(priv); 1498 if (err) 1499 goto out; 1500 } else if (old_prog && !prog) { 1501 // Remove XDP TX queues if an XDP program is 1502 // being uninstalled 1503 err = gve_remove_xdp_queues(priv); 1504 if (err) 1505 goto out; 1506 } 1507 WRITE_ONCE(priv->xdp_prog, prog); 1508 if (old_prog) 1509 bpf_prog_put(old_prog); 1510 1511 out: 1512 gve_turnup(priv); 1513 status = ioread32be(&priv->reg_bar0->device_status); 1514 gve_handle_link_status(priv, GVE_DEVICE_STATUS_LINK_STATUS_MASK & status); 1515 return err; 1516 } 1517 1518 static int gve_xsk_pool_enable(struct net_device *dev, 1519 struct xsk_buff_pool *pool, 1520 u16 qid) 1521 { 1522 struct gve_priv *priv = netdev_priv(dev); 1523 struct napi_struct *napi; 1524 struct gve_rx_ring *rx; 1525 int tx_qid; 1526 int err; 1527 1528 if (qid >= priv->rx_cfg.num_queues) { 1529 dev_err(&priv->pdev->dev, "xsk pool invalid qid %d", qid); 1530 return -EINVAL; 1531 } 1532 if (xsk_pool_get_rx_frame_size(pool) < 1533 priv->dev->max_mtu + sizeof(struct ethhdr)) { 1534 dev_err(&priv->pdev->dev, "xsk pool frame_len too small"); 1535 return -EINVAL; 1536 } 1537 1538 err = xsk_pool_dma_map(pool, &priv->pdev->dev, 1539 DMA_ATTR_SKIP_CPU_SYNC | DMA_ATTR_WEAK_ORDERING); 1540 if (err) 1541 return err; 1542 1543 /* If XDP prog is not installed, return */ 1544 if (!priv->xdp_prog) 1545 return 0; 1546 1547 rx = &priv->rx[qid]; 1548 napi = &priv->ntfy_blocks[rx->ntfy_id].napi; 1549 err = xdp_rxq_info_reg(&rx->xsk_rxq, dev, qid, napi->napi_id); 1550 if (err) 1551 goto err; 1552 1553 err = xdp_rxq_info_reg_mem_model(&rx->xsk_rxq, 1554 MEM_TYPE_XSK_BUFF_POOL, NULL); 1555 if (err) 1556 goto err; 1557 1558 xsk_pool_set_rxq_info(pool, &rx->xsk_rxq); 1559 rx->xsk_pool = pool; 1560 1561 tx_qid = gve_xdp_tx_queue_id(priv, qid); 1562 priv->tx[tx_qid].xsk_pool = pool; 1563 1564 return 0; 1565 err: 1566 if (xdp_rxq_info_is_reg(&rx->xsk_rxq)) 1567 xdp_rxq_info_unreg(&rx->xsk_rxq); 1568 1569 xsk_pool_dma_unmap(pool, 1570 DMA_ATTR_SKIP_CPU_SYNC | DMA_ATTR_WEAK_ORDERING); 1571 return err; 1572 } 1573 1574 static int gve_xsk_pool_disable(struct net_device *dev, 1575 u16 qid) 1576 { 1577 struct gve_priv *priv = netdev_priv(dev); 1578 struct napi_struct *napi_rx; 1579 struct napi_struct *napi_tx; 1580 struct xsk_buff_pool *pool; 1581 int tx_qid; 1582 1583 pool = xsk_get_pool_from_qid(dev, qid); 1584 if (!pool) 1585 return -EINVAL; 1586 if (qid >= priv->rx_cfg.num_queues) 1587 return -EINVAL; 1588 1589 /* If XDP prog is not installed, unmap DMA and return */ 1590 if (!priv->xdp_prog) 1591 goto done; 1592 1593 tx_qid = gve_xdp_tx_queue_id(priv, qid); 1594 if (!netif_running(dev)) { 1595 priv->rx[qid].xsk_pool = NULL; 1596 xdp_rxq_info_unreg(&priv->rx[qid].xsk_rxq); 1597 priv->tx[tx_qid].xsk_pool = NULL; 1598 goto done; 1599 } 1600 1601 napi_rx = &priv->ntfy_blocks[priv->rx[qid].ntfy_id].napi; 1602 napi_disable(napi_rx); /* make sure current rx poll is done */ 1603 1604 napi_tx = &priv->ntfy_blocks[priv->tx[tx_qid].ntfy_id].napi; 1605 napi_disable(napi_tx); /* make sure current tx poll is done */ 1606 1607 priv->rx[qid].xsk_pool = NULL; 1608 xdp_rxq_info_unreg(&priv->rx[qid].xsk_rxq); 1609 priv->tx[tx_qid].xsk_pool = NULL; 1610 smp_mb(); /* Make sure it is visible to the workers on datapath */ 1611 1612 napi_enable(napi_rx); 1613 if (gve_rx_work_pending(&priv->rx[qid])) 1614 napi_schedule(napi_rx); 1615 1616 napi_enable(napi_tx); 1617 if (gve_tx_clean_pending(priv, &priv->tx[tx_qid])) 1618 napi_schedule(napi_tx); 1619 1620 done: 1621 xsk_pool_dma_unmap(pool, 1622 DMA_ATTR_SKIP_CPU_SYNC | DMA_ATTR_WEAK_ORDERING); 1623 return 0; 1624 } 1625 1626 static int gve_xsk_wakeup(struct net_device *dev, u32 queue_id, u32 flags) 1627 { 1628 struct gve_priv *priv = netdev_priv(dev); 1629 int tx_queue_id = gve_xdp_tx_queue_id(priv, queue_id); 1630 1631 if (queue_id >= priv->rx_cfg.num_queues || !priv->xdp_prog) 1632 return -EINVAL; 1633 1634 if (flags & XDP_WAKEUP_TX) { 1635 struct gve_tx_ring *tx = &priv->tx[tx_queue_id]; 1636 struct napi_struct *napi = 1637 &priv->ntfy_blocks[tx->ntfy_id].napi; 1638 1639 if (!napi_if_scheduled_mark_missed(napi)) { 1640 /* Call local_bh_enable to trigger SoftIRQ processing */ 1641 local_bh_disable(); 1642 napi_schedule(napi); 1643 local_bh_enable(); 1644 } 1645 1646 tx->xdp_xsk_wakeup++; 1647 } 1648 1649 return 0; 1650 } 1651 1652 static int verify_xdp_configuration(struct net_device *dev) 1653 { 1654 struct gve_priv *priv = netdev_priv(dev); 1655 1656 if (dev->features & NETIF_F_LRO) { 1657 netdev_warn(dev, "XDP is not supported when LRO is on.\n"); 1658 return -EOPNOTSUPP; 1659 } 1660 1661 if (priv->queue_format != GVE_GQI_QPL_FORMAT) { 1662 netdev_warn(dev, "XDP is not supported in mode %d.\n", 1663 priv->queue_format); 1664 return -EOPNOTSUPP; 1665 } 1666 1667 if (dev->mtu > GVE_DEFAULT_RX_BUFFER_SIZE - sizeof(struct ethhdr) - GVE_RX_PAD) { 1668 netdev_warn(dev, "XDP is not supported for mtu %d.\n", 1669 dev->mtu); 1670 return -EOPNOTSUPP; 1671 } 1672 1673 if (priv->rx_cfg.num_queues != priv->tx_cfg.num_queues || 1674 (2 * priv->tx_cfg.num_queues > priv->tx_cfg.max_queues)) { 1675 netdev_warn(dev, "XDP load failed: The number of configured RX queues %d should be equal to the number of configured TX queues %d and the number of configured RX/TX queues should be less than or equal to half the maximum number of RX/TX queues %d", 1676 priv->rx_cfg.num_queues, 1677 priv->tx_cfg.num_queues, 1678 priv->tx_cfg.max_queues); 1679 return -EINVAL; 1680 } 1681 return 0; 1682 } 1683 1684 static int gve_xdp(struct net_device *dev, struct netdev_bpf *xdp) 1685 { 1686 struct gve_priv *priv = netdev_priv(dev); 1687 int err; 1688 1689 err = verify_xdp_configuration(dev); 1690 if (err) 1691 return err; 1692 switch (xdp->command) { 1693 case XDP_SETUP_PROG: 1694 return gve_set_xdp(priv, xdp->prog, xdp->extack); 1695 case XDP_SETUP_XSK_POOL: 1696 if (xdp->xsk.pool) 1697 return gve_xsk_pool_enable(dev, xdp->xsk.pool, xdp->xsk.queue_id); 1698 else 1699 return gve_xsk_pool_disable(dev, xdp->xsk.queue_id); 1700 default: 1701 return -EINVAL; 1702 } 1703 } 1704 1705 int gve_adjust_queues(struct gve_priv *priv, 1706 struct gve_queue_config new_rx_config, 1707 struct gve_queue_config new_tx_config) 1708 { 1709 int err; 1710 1711 if (netif_carrier_ok(priv->dev)) { 1712 /* To make this process as simple as possible we teardown the 1713 * device, set the new configuration, and then bring the device 1714 * up again. 1715 */ 1716 err = gve_close(priv->dev); 1717 /* we have already tried to reset in close, 1718 * just fail at this point 1719 */ 1720 if (err) 1721 return err; 1722 priv->tx_cfg = new_tx_config; 1723 priv->rx_cfg = new_rx_config; 1724 1725 err = gve_open(priv->dev); 1726 if (err) 1727 goto err; 1728 1729 return 0; 1730 } 1731 /* Set the config for the next up. */ 1732 priv->tx_cfg = new_tx_config; 1733 priv->rx_cfg = new_rx_config; 1734 1735 return 0; 1736 err: 1737 netif_err(priv, drv, priv->dev, 1738 "Adjust queues failed! !!! DISABLING ALL QUEUES !!!\n"); 1739 gve_turndown(priv); 1740 return err; 1741 } 1742 1743 static void gve_turndown(struct gve_priv *priv) 1744 { 1745 int idx; 1746 1747 if (netif_carrier_ok(priv->dev)) 1748 netif_carrier_off(priv->dev); 1749 1750 if (!gve_get_napi_enabled(priv)) 1751 return; 1752 1753 /* Disable napi to prevent more work from coming in */ 1754 for (idx = 0; idx < gve_num_tx_queues(priv); idx++) { 1755 int ntfy_idx = gve_tx_idx_to_ntfy(priv, idx); 1756 struct gve_notify_block *block = &priv->ntfy_blocks[ntfy_idx]; 1757 1758 napi_disable(&block->napi); 1759 } 1760 for (idx = 0; idx < priv->rx_cfg.num_queues; idx++) { 1761 int ntfy_idx = gve_rx_idx_to_ntfy(priv, idx); 1762 struct gve_notify_block *block = &priv->ntfy_blocks[ntfy_idx]; 1763 1764 napi_disable(&block->napi); 1765 } 1766 1767 /* Stop tx queues */ 1768 netif_tx_disable(priv->dev); 1769 1770 gve_clear_napi_enabled(priv); 1771 gve_clear_report_stats(priv); 1772 } 1773 1774 static void gve_turnup(struct gve_priv *priv) 1775 { 1776 int idx; 1777 1778 /* Start the tx queues */ 1779 netif_tx_start_all_queues(priv->dev); 1780 1781 /* Enable napi and unmask interrupts for all queues */ 1782 for (idx = 0; idx < gve_num_tx_queues(priv); idx++) { 1783 int ntfy_idx = gve_tx_idx_to_ntfy(priv, idx); 1784 struct gve_notify_block *block = &priv->ntfy_blocks[ntfy_idx]; 1785 1786 napi_enable(&block->napi); 1787 if (gve_is_gqi(priv)) { 1788 iowrite32be(0, gve_irq_doorbell(priv, block)); 1789 } else { 1790 gve_set_itr_coalesce_usecs_dqo(priv, block, 1791 priv->tx_coalesce_usecs); 1792 } 1793 } 1794 for (idx = 0; idx < priv->rx_cfg.num_queues; idx++) { 1795 int ntfy_idx = gve_rx_idx_to_ntfy(priv, idx); 1796 struct gve_notify_block *block = &priv->ntfy_blocks[ntfy_idx]; 1797 1798 napi_enable(&block->napi); 1799 if (gve_is_gqi(priv)) { 1800 iowrite32be(0, gve_irq_doorbell(priv, block)); 1801 } else { 1802 gve_set_itr_coalesce_usecs_dqo(priv, block, 1803 priv->rx_coalesce_usecs); 1804 } 1805 } 1806 1807 gve_set_napi_enabled(priv); 1808 } 1809 1810 static void gve_tx_timeout(struct net_device *dev, unsigned int txqueue) 1811 { 1812 struct gve_notify_block *block; 1813 struct gve_tx_ring *tx = NULL; 1814 struct gve_priv *priv; 1815 u32 last_nic_done; 1816 u32 current_time; 1817 u32 ntfy_idx; 1818 1819 netdev_info(dev, "Timeout on tx queue, %d", txqueue); 1820 priv = netdev_priv(dev); 1821 if (txqueue > priv->tx_cfg.num_queues) 1822 goto reset; 1823 1824 ntfy_idx = gve_tx_idx_to_ntfy(priv, txqueue); 1825 if (ntfy_idx >= priv->num_ntfy_blks) 1826 goto reset; 1827 1828 block = &priv->ntfy_blocks[ntfy_idx]; 1829 tx = block->tx; 1830 1831 current_time = jiffies_to_msecs(jiffies); 1832 if (tx->last_kick_msec + MIN_TX_TIMEOUT_GAP > current_time) 1833 goto reset; 1834 1835 /* Check to see if there are missed completions, which will allow us to 1836 * kick the queue. 1837 */ 1838 last_nic_done = gve_tx_load_event_counter(priv, tx); 1839 if (last_nic_done - tx->done) { 1840 netdev_info(dev, "Kicking queue %d", txqueue); 1841 iowrite32be(GVE_IRQ_MASK, gve_irq_doorbell(priv, block)); 1842 napi_schedule(&block->napi); 1843 tx->last_kick_msec = current_time; 1844 goto out; 1845 } // Else reset. 1846 1847 reset: 1848 gve_schedule_reset(priv); 1849 1850 out: 1851 if (tx) 1852 tx->queue_timeout++; 1853 priv->tx_timeo_cnt++; 1854 } 1855 1856 static int gve_set_features(struct net_device *netdev, 1857 netdev_features_t features) 1858 { 1859 const netdev_features_t orig_features = netdev->features; 1860 struct gve_priv *priv = netdev_priv(netdev); 1861 int err; 1862 1863 if ((netdev->features & NETIF_F_LRO) != (features & NETIF_F_LRO)) { 1864 netdev->features ^= NETIF_F_LRO; 1865 if (netif_carrier_ok(netdev)) { 1866 /* To make this process as simple as possible we 1867 * teardown the device, set the new configuration, 1868 * and then bring the device up again. 1869 */ 1870 err = gve_close(netdev); 1871 /* We have already tried to reset in close, just fail 1872 * at this point. 1873 */ 1874 if (err) 1875 goto err; 1876 1877 err = gve_open(netdev); 1878 if (err) 1879 goto err; 1880 } 1881 } 1882 1883 return 0; 1884 err: 1885 /* Reverts the change on error. */ 1886 netdev->features = orig_features; 1887 netif_err(priv, drv, netdev, 1888 "Set features failed! !!! DISABLING ALL QUEUES !!!\n"); 1889 return err; 1890 } 1891 1892 static const struct net_device_ops gve_netdev_ops = { 1893 .ndo_start_xmit = gve_start_xmit, 1894 .ndo_features_check = gve_features_check, 1895 .ndo_open = gve_open, 1896 .ndo_stop = gve_close, 1897 .ndo_get_stats64 = gve_get_stats, 1898 .ndo_tx_timeout = gve_tx_timeout, 1899 .ndo_set_features = gve_set_features, 1900 .ndo_bpf = gve_xdp, 1901 .ndo_xdp_xmit = gve_xdp_xmit, 1902 .ndo_xsk_wakeup = gve_xsk_wakeup, 1903 }; 1904 1905 static void gve_handle_status(struct gve_priv *priv, u32 status) 1906 { 1907 if (GVE_DEVICE_STATUS_RESET_MASK & status) { 1908 dev_info(&priv->pdev->dev, "Device requested reset.\n"); 1909 gve_set_do_reset(priv); 1910 } 1911 if (GVE_DEVICE_STATUS_REPORT_STATS_MASK & status) { 1912 priv->stats_report_trigger_cnt++; 1913 gve_set_do_report_stats(priv); 1914 } 1915 } 1916 1917 static void gve_handle_reset(struct gve_priv *priv) 1918 { 1919 /* A service task will be scheduled at the end of probe to catch any 1920 * resets that need to happen, and we don't want to reset until 1921 * probe is done. 1922 */ 1923 if (gve_get_probe_in_progress(priv)) 1924 return; 1925 1926 if (gve_get_do_reset(priv)) { 1927 rtnl_lock(); 1928 gve_reset(priv, false); 1929 rtnl_unlock(); 1930 } 1931 } 1932 1933 void gve_handle_report_stats(struct gve_priv *priv) 1934 { 1935 struct stats *stats = priv->stats_report->stats; 1936 int idx, stats_idx = 0; 1937 unsigned int start = 0; 1938 u64 tx_bytes; 1939 1940 if (!gve_get_report_stats(priv)) 1941 return; 1942 1943 be64_add_cpu(&priv->stats_report->written_count, 1); 1944 /* tx stats */ 1945 if (priv->tx) { 1946 for (idx = 0; idx < gve_num_tx_queues(priv); idx++) { 1947 u32 last_completion = 0; 1948 u32 tx_frames = 0; 1949 1950 /* DQO doesn't currently support these metrics. */ 1951 if (gve_is_gqi(priv)) { 1952 last_completion = priv->tx[idx].done; 1953 tx_frames = priv->tx[idx].req; 1954 } 1955 1956 do { 1957 start = u64_stats_fetch_begin(&priv->tx[idx].statss); 1958 tx_bytes = priv->tx[idx].bytes_done; 1959 } while (u64_stats_fetch_retry(&priv->tx[idx].statss, start)); 1960 stats[stats_idx++] = (struct stats) { 1961 .stat_name = cpu_to_be32(TX_WAKE_CNT), 1962 .value = cpu_to_be64(priv->tx[idx].wake_queue), 1963 .queue_id = cpu_to_be32(idx), 1964 }; 1965 stats[stats_idx++] = (struct stats) { 1966 .stat_name = cpu_to_be32(TX_STOP_CNT), 1967 .value = cpu_to_be64(priv->tx[idx].stop_queue), 1968 .queue_id = cpu_to_be32(idx), 1969 }; 1970 stats[stats_idx++] = (struct stats) { 1971 .stat_name = cpu_to_be32(TX_FRAMES_SENT), 1972 .value = cpu_to_be64(tx_frames), 1973 .queue_id = cpu_to_be32(idx), 1974 }; 1975 stats[stats_idx++] = (struct stats) { 1976 .stat_name = cpu_to_be32(TX_BYTES_SENT), 1977 .value = cpu_to_be64(tx_bytes), 1978 .queue_id = cpu_to_be32(idx), 1979 }; 1980 stats[stats_idx++] = (struct stats) { 1981 .stat_name = cpu_to_be32(TX_LAST_COMPLETION_PROCESSED), 1982 .value = cpu_to_be64(last_completion), 1983 .queue_id = cpu_to_be32(idx), 1984 }; 1985 stats[stats_idx++] = (struct stats) { 1986 .stat_name = cpu_to_be32(TX_TIMEOUT_CNT), 1987 .value = cpu_to_be64(priv->tx[idx].queue_timeout), 1988 .queue_id = cpu_to_be32(idx), 1989 }; 1990 } 1991 } 1992 /* rx stats */ 1993 if (priv->rx) { 1994 for (idx = 0; idx < priv->rx_cfg.num_queues; idx++) { 1995 stats[stats_idx++] = (struct stats) { 1996 .stat_name = cpu_to_be32(RX_NEXT_EXPECTED_SEQUENCE), 1997 .value = cpu_to_be64(priv->rx[idx].desc.seqno), 1998 .queue_id = cpu_to_be32(idx), 1999 }; 2000 stats[stats_idx++] = (struct stats) { 2001 .stat_name = cpu_to_be32(RX_BUFFERS_POSTED), 2002 .value = cpu_to_be64(priv->rx[0].fill_cnt), 2003 .queue_id = cpu_to_be32(idx), 2004 }; 2005 } 2006 } 2007 } 2008 2009 /* Handle NIC status register changes, reset requests and report stats */ 2010 static void gve_service_task(struct work_struct *work) 2011 { 2012 struct gve_priv *priv = container_of(work, struct gve_priv, 2013 service_task); 2014 u32 status = ioread32be(&priv->reg_bar0->device_status); 2015 2016 gve_handle_status(priv, status); 2017 2018 gve_handle_reset(priv); 2019 gve_handle_link_status(priv, GVE_DEVICE_STATUS_LINK_STATUS_MASK & status); 2020 } 2021 2022 static void gve_set_netdev_xdp_features(struct gve_priv *priv) 2023 { 2024 if (priv->queue_format == GVE_GQI_QPL_FORMAT) { 2025 priv->dev->xdp_features = NETDEV_XDP_ACT_BASIC; 2026 priv->dev->xdp_features |= NETDEV_XDP_ACT_REDIRECT; 2027 priv->dev->xdp_features |= NETDEV_XDP_ACT_NDO_XMIT; 2028 priv->dev->xdp_features |= NETDEV_XDP_ACT_XSK_ZEROCOPY; 2029 } else { 2030 priv->dev->xdp_features = 0; 2031 } 2032 } 2033 2034 static int gve_init_priv(struct gve_priv *priv, bool skip_describe_device) 2035 { 2036 int num_ntfy; 2037 int err; 2038 2039 /* Set up the adminq */ 2040 err = gve_adminq_alloc(&priv->pdev->dev, priv); 2041 if (err) { 2042 dev_err(&priv->pdev->dev, 2043 "Failed to alloc admin queue: err=%d\n", err); 2044 return err; 2045 } 2046 2047 err = gve_verify_driver_compatibility(priv); 2048 if (err) { 2049 dev_err(&priv->pdev->dev, 2050 "Could not verify driver compatibility: err=%d\n", err); 2051 goto err; 2052 } 2053 2054 if (skip_describe_device) 2055 goto setup_device; 2056 2057 priv->queue_format = GVE_QUEUE_FORMAT_UNSPECIFIED; 2058 /* Get the initial information we need from the device */ 2059 err = gve_adminq_describe_device(priv); 2060 if (err) { 2061 dev_err(&priv->pdev->dev, 2062 "Could not get device information: err=%d\n", err); 2063 goto err; 2064 } 2065 priv->dev->mtu = priv->dev->max_mtu; 2066 num_ntfy = pci_msix_vec_count(priv->pdev); 2067 if (num_ntfy <= 0) { 2068 dev_err(&priv->pdev->dev, 2069 "could not count MSI-x vectors: err=%d\n", num_ntfy); 2070 err = num_ntfy; 2071 goto err; 2072 } else if (num_ntfy < GVE_MIN_MSIX) { 2073 dev_err(&priv->pdev->dev, "gve needs at least %d MSI-x vectors, but only has %d\n", 2074 GVE_MIN_MSIX, num_ntfy); 2075 err = -EINVAL; 2076 goto err; 2077 } 2078 2079 /* Big TCP is only supported on DQ*/ 2080 if (!gve_is_gqi(priv)) 2081 netif_set_tso_max_size(priv->dev, GVE_DQO_TX_MAX); 2082 2083 priv->num_registered_pages = 0; 2084 priv->rx_copybreak = GVE_DEFAULT_RX_COPYBREAK; 2085 /* gvnic has one Notification Block per MSI-x vector, except for the 2086 * management vector 2087 */ 2088 priv->num_ntfy_blks = (num_ntfy - 1) & ~0x1; 2089 priv->mgmt_msix_idx = priv->num_ntfy_blks; 2090 2091 priv->tx_cfg.max_queues = 2092 min_t(int, priv->tx_cfg.max_queues, priv->num_ntfy_blks / 2); 2093 priv->rx_cfg.max_queues = 2094 min_t(int, priv->rx_cfg.max_queues, priv->num_ntfy_blks / 2); 2095 2096 priv->tx_cfg.num_queues = priv->tx_cfg.max_queues; 2097 priv->rx_cfg.num_queues = priv->rx_cfg.max_queues; 2098 if (priv->default_num_queues > 0) { 2099 priv->tx_cfg.num_queues = min_t(int, priv->default_num_queues, 2100 priv->tx_cfg.num_queues); 2101 priv->rx_cfg.num_queues = min_t(int, priv->default_num_queues, 2102 priv->rx_cfg.num_queues); 2103 } 2104 2105 dev_info(&priv->pdev->dev, "TX queues %d, RX queues %d\n", 2106 priv->tx_cfg.num_queues, priv->rx_cfg.num_queues); 2107 dev_info(&priv->pdev->dev, "Max TX queues %d, Max RX queues %d\n", 2108 priv->tx_cfg.max_queues, priv->rx_cfg.max_queues); 2109 2110 if (!gve_is_gqi(priv)) { 2111 priv->tx_coalesce_usecs = GVE_TX_IRQ_RATELIMIT_US_DQO; 2112 priv->rx_coalesce_usecs = GVE_RX_IRQ_RATELIMIT_US_DQO; 2113 } 2114 2115 setup_device: 2116 gve_set_netdev_xdp_features(priv); 2117 err = gve_setup_device_resources(priv); 2118 if (!err) 2119 return 0; 2120 err: 2121 gve_adminq_free(&priv->pdev->dev, priv); 2122 return err; 2123 } 2124 2125 static void gve_teardown_priv_resources(struct gve_priv *priv) 2126 { 2127 gve_teardown_device_resources(priv); 2128 gve_adminq_free(&priv->pdev->dev, priv); 2129 } 2130 2131 static void gve_trigger_reset(struct gve_priv *priv) 2132 { 2133 /* Reset the device by releasing the AQ */ 2134 gve_adminq_release(priv); 2135 } 2136 2137 static void gve_reset_and_teardown(struct gve_priv *priv, bool was_up) 2138 { 2139 gve_trigger_reset(priv); 2140 /* With the reset having already happened, close cannot fail */ 2141 if (was_up) 2142 gve_close(priv->dev); 2143 gve_teardown_priv_resources(priv); 2144 } 2145 2146 static int gve_reset_recovery(struct gve_priv *priv, bool was_up) 2147 { 2148 int err; 2149 2150 err = gve_init_priv(priv, true); 2151 if (err) 2152 goto err; 2153 if (was_up) { 2154 err = gve_open(priv->dev); 2155 if (err) 2156 goto err; 2157 } 2158 return 0; 2159 err: 2160 dev_err(&priv->pdev->dev, "Reset failed! !!! DISABLING ALL QUEUES !!!\n"); 2161 gve_turndown(priv); 2162 return err; 2163 } 2164 2165 int gve_reset(struct gve_priv *priv, bool attempt_teardown) 2166 { 2167 bool was_up = netif_carrier_ok(priv->dev); 2168 int err; 2169 2170 dev_info(&priv->pdev->dev, "Performing reset\n"); 2171 gve_clear_do_reset(priv); 2172 gve_set_reset_in_progress(priv); 2173 /* If we aren't attempting to teardown normally, just go turndown and 2174 * reset right away. 2175 */ 2176 if (!attempt_teardown) { 2177 gve_turndown(priv); 2178 gve_reset_and_teardown(priv, was_up); 2179 } else { 2180 /* Otherwise attempt to close normally */ 2181 if (was_up) { 2182 err = gve_close(priv->dev); 2183 /* If that fails reset as we did above */ 2184 if (err) 2185 gve_reset_and_teardown(priv, was_up); 2186 } 2187 /* Clean up any remaining resources */ 2188 gve_teardown_priv_resources(priv); 2189 } 2190 2191 /* Set it all back up */ 2192 err = gve_reset_recovery(priv, was_up); 2193 gve_clear_reset_in_progress(priv); 2194 priv->reset_cnt++; 2195 priv->interface_up_cnt = 0; 2196 priv->interface_down_cnt = 0; 2197 priv->stats_report_trigger_cnt = 0; 2198 return err; 2199 } 2200 2201 static void gve_write_version(u8 __iomem *driver_version_register) 2202 { 2203 const char *c = gve_version_prefix; 2204 2205 while (*c) { 2206 writeb(*c, driver_version_register); 2207 c++; 2208 } 2209 2210 c = gve_version_str; 2211 while (*c) { 2212 writeb(*c, driver_version_register); 2213 c++; 2214 } 2215 writeb('\n', driver_version_register); 2216 } 2217 2218 static int gve_probe(struct pci_dev *pdev, const struct pci_device_id *ent) 2219 { 2220 int max_tx_queues, max_rx_queues; 2221 struct net_device *dev; 2222 __be32 __iomem *db_bar; 2223 struct gve_registers __iomem *reg_bar; 2224 struct gve_priv *priv; 2225 int err; 2226 2227 err = pci_enable_device(pdev); 2228 if (err) 2229 return err; 2230 2231 err = pci_request_regions(pdev, gve_driver_name); 2232 if (err) 2233 goto abort_with_enabled; 2234 2235 pci_set_master(pdev); 2236 2237 err = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(64)); 2238 if (err) { 2239 dev_err(&pdev->dev, "Failed to set dma mask: err=%d\n", err); 2240 goto abort_with_pci_region; 2241 } 2242 2243 reg_bar = pci_iomap(pdev, GVE_REGISTER_BAR, 0); 2244 if (!reg_bar) { 2245 dev_err(&pdev->dev, "Failed to map pci bar!\n"); 2246 err = -ENOMEM; 2247 goto abort_with_pci_region; 2248 } 2249 2250 db_bar = pci_iomap(pdev, GVE_DOORBELL_BAR, 0); 2251 if (!db_bar) { 2252 dev_err(&pdev->dev, "Failed to map doorbell bar!\n"); 2253 err = -ENOMEM; 2254 goto abort_with_reg_bar; 2255 } 2256 2257 gve_write_version(®_bar->driver_version); 2258 /* Get max queues to alloc etherdev */ 2259 max_tx_queues = ioread32be(®_bar->max_tx_queues); 2260 max_rx_queues = ioread32be(®_bar->max_rx_queues); 2261 /* Alloc and setup the netdev and priv */ 2262 dev = alloc_etherdev_mqs(sizeof(*priv), max_tx_queues, max_rx_queues); 2263 if (!dev) { 2264 dev_err(&pdev->dev, "could not allocate netdev\n"); 2265 err = -ENOMEM; 2266 goto abort_with_db_bar; 2267 } 2268 SET_NETDEV_DEV(dev, &pdev->dev); 2269 pci_set_drvdata(pdev, dev); 2270 dev->ethtool_ops = &gve_ethtool_ops; 2271 dev->netdev_ops = &gve_netdev_ops; 2272 2273 /* Set default and supported features. 2274 * 2275 * Features might be set in other locations as well (such as 2276 * `gve_adminq_describe_device`). 2277 */ 2278 dev->hw_features = NETIF_F_HIGHDMA; 2279 dev->hw_features |= NETIF_F_SG; 2280 dev->hw_features |= NETIF_F_HW_CSUM; 2281 dev->hw_features |= NETIF_F_TSO; 2282 dev->hw_features |= NETIF_F_TSO6; 2283 dev->hw_features |= NETIF_F_TSO_ECN; 2284 dev->hw_features |= NETIF_F_RXCSUM; 2285 dev->hw_features |= NETIF_F_RXHASH; 2286 dev->features = dev->hw_features; 2287 dev->watchdog_timeo = 5 * HZ; 2288 dev->min_mtu = ETH_MIN_MTU; 2289 netif_carrier_off(dev); 2290 2291 priv = netdev_priv(dev); 2292 priv->dev = dev; 2293 priv->pdev = pdev; 2294 priv->msg_enable = DEFAULT_MSG_LEVEL; 2295 priv->reg_bar0 = reg_bar; 2296 priv->db_bar2 = db_bar; 2297 priv->service_task_flags = 0x0; 2298 priv->state_flags = 0x0; 2299 priv->ethtool_flags = 0x0; 2300 2301 gve_set_probe_in_progress(priv); 2302 priv->gve_wq = alloc_ordered_workqueue("gve", 0); 2303 if (!priv->gve_wq) { 2304 dev_err(&pdev->dev, "Could not allocate workqueue"); 2305 err = -ENOMEM; 2306 goto abort_with_netdev; 2307 } 2308 INIT_WORK(&priv->service_task, gve_service_task); 2309 INIT_WORK(&priv->stats_report_task, gve_stats_report_task); 2310 priv->tx_cfg.max_queues = max_tx_queues; 2311 priv->rx_cfg.max_queues = max_rx_queues; 2312 2313 err = gve_init_priv(priv, false); 2314 if (err) 2315 goto abort_with_wq; 2316 2317 err = register_netdev(dev); 2318 if (err) 2319 goto abort_with_gve_init; 2320 2321 dev_info(&pdev->dev, "GVE version %s\n", gve_version_str); 2322 dev_info(&pdev->dev, "GVE queue format %d\n", (int)priv->queue_format); 2323 gve_clear_probe_in_progress(priv); 2324 queue_work(priv->gve_wq, &priv->service_task); 2325 return 0; 2326 2327 abort_with_gve_init: 2328 gve_teardown_priv_resources(priv); 2329 2330 abort_with_wq: 2331 destroy_workqueue(priv->gve_wq); 2332 2333 abort_with_netdev: 2334 free_netdev(dev); 2335 2336 abort_with_db_bar: 2337 pci_iounmap(pdev, db_bar); 2338 2339 abort_with_reg_bar: 2340 pci_iounmap(pdev, reg_bar); 2341 2342 abort_with_pci_region: 2343 pci_release_regions(pdev); 2344 2345 abort_with_enabled: 2346 pci_disable_device(pdev); 2347 return err; 2348 } 2349 2350 static void gve_remove(struct pci_dev *pdev) 2351 { 2352 struct net_device *netdev = pci_get_drvdata(pdev); 2353 struct gve_priv *priv = netdev_priv(netdev); 2354 __be32 __iomem *db_bar = priv->db_bar2; 2355 void __iomem *reg_bar = priv->reg_bar0; 2356 2357 unregister_netdev(netdev); 2358 gve_teardown_priv_resources(priv); 2359 destroy_workqueue(priv->gve_wq); 2360 free_netdev(netdev); 2361 pci_iounmap(pdev, db_bar); 2362 pci_iounmap(pdev, reg_bar); 2363 pci_release_regions(pdev); 2364 pci_disable_device(pdev); 2365 } 2366 2367 static void gve_shutdown(struct pci_dev *pdev) 2368 { 2369 struct net_device *netdev = pci_get_drvdata(pdev); 2370 struct gve_priv *priv = netdev_priv(netdev); 2371 bool was_up = netif_carrier_ok(priv->dev); 2372 2373 rtnl_lock(); 2374 if (was_up && gve_close(priv->dev)) { 2375 /* If the dev was up, attempt to close, if close fails, reset */ 2376 gve_reset_and_teardown(priv, was_up); 2377 } else { 2378 /* If the dev wasn't up or close worked, finish tearing down */ 2379 gve_teardown_priv_resources(priv); 2380 } 2381 rtnl_unlock(); 2382 } 2383 2384 #ifdef CONFIG_PM 2385 static int gve_suspend(struct pci_dev *pdev, pm_message_t state) 2386 { 2387 struct net_device *netdev = pci_get_drvdata(pdev); 2388 struct gve_priv *priv = netdev_priv(netdev); 2389 bool was_up = netif_carrier_ok(priv->dev); 2390 2391 priv->suspend_cnt++; 2392 rtnl_lock(); 2393 if (was_up && gve_close(priv->dev)) { 2394 /* If the dev was up, attempt to close, if close fails, reset */ 2395 gve_reset_and_teardown(priv, was_up); 2396 } else { 2397 /* If the dev wasn't up or close worked, finish tearing down */ 2398 gve_teardown_priv_resources(priv); 2399 } 2400 priv->up_before_suspend = was_up; 2401 rtnl_unlock(); 2402 return 0; 2403 } 2404 2405 static int gve_resume(struct pci_dev *pdev) 2406 { 2407 struct net_device *netdev = pci_get_drvdata(pdev); 2408 struct gve_priv *priv = netdev_priv(netdev); 2409 int err; 2410 2411 priv->resume_cnt++; 2412 rtnl_lock(); 2413 err = gve_reset_recovery(priv, priv->up_before_suspend); 2414 rtnl_unlock(); 2415 return err; 2416 } 2417 #endif /* CONFIG_PM */ 2418 2419 static const struct pci_device_id gve_id_table[] = { 2420 { PCI_DEVICE(PCI_VENDOR_ID_GOOGLE, PCI_DEV_ID_GVNIC) }, 2421 { } 2422 }; 2423 2424 static struct pci_driver gve_driver = { 2425 .name = gve_driver_name, 2426 .id_table = gve_id_table, 2427 .probe = gve_probe, 2428 .remove = gve_remove, 2429 .shutdown = gve_shutdown, 2430 #ifdef CONFIG_PM 2431 .suspend = gve_suspend, 2432 .resume = gve_resume, 2433 #endif 2434 }; 2435 2436 module_pci_driver(gve_driver); 2437 2438 MODULE_DEVICE_TABLE(pci, gve_id_table); 2439 MODULE_AUTHOR("Google, Inc."); 2440 MODULE_DESCRIPTION("Google Virtual NIC Driver"); 2441 MODULE_LICENSE("Dual MIT/GPL"); 2442 MODULE_VERSION(GVE_VERSION); 2443