1 // SPDX-License-Identifier: (GPL-2.0 OR MIT) 2 /* Google virtual Ethernet (gve) driver 3 * 4 * Copyright (C) 2015-2021 Google, Inc. 5 */ 6 7 #include <linux/bpf.h> 8 #include <linux/cpumask.h> 9 #include <linux/etherdevice.h> 10 #include <linux/filter.h> 11 #include <linux/interrupt.h> 12 #include <linux/module.h> 13 #include <linux/pci.h> 14 #include <linux/sched.h> 15 #include <linux/timer.h> 16 #include <linux/workqueue.h> 17 #include <linux/utsname.h> 18 #include <linux/version.h> 19 #include <net/sch_generic.h> 20 #include <net/xdp_sock_drv.h> 21 #include "gve.h" 22 #include "gve_dqo.h" 23 #include "gve_adminq.h" 24 #include "gve_register.h" 25 26 #define GVE_DEFAULT_RX_COPYBREAK (256) 27 28 #define DEFAULT_MSG_LEVEL (NETIF_MSG_DRV | NETIF_MSG_LINK) 29 #define GVE_VERSION "1.0.0" 30 #define GVE_VERSION_PREFIX "GVE-" 31 32 // Minimum amount of time between queue kicks in msec (10 seconds) 33 #define MIN_TX_TIMEOUT_GAP (1000 * 10) 34 35 const char gve_version_str[] = GVE_VERSION; 36 static const char gve_version_prefix[] = GVE_VERSION_PREFIX; 37 38 static int gve_verify_driver_compatibility(struct gve_priv *priv) 39 { 40 int err; 41 struct gve_driver_info *driver_info; 42 dma_addr_t driver_info_bus; 43 44 driver_info = dma_alloc_coherent(&priv->pdev->dev, 45 sizeof(struct gve_driver_info), 46 &driver_info_bus, GFP_KERNEL); 47 if (!driver_info) 48 return -ENOMEM; 49 50 *driver_info = (struct gve_driver_info) { 51 .os_type = 1, /* Linux */ 52 .os_version_major = cpu_to_be32(LINUX_VERSION_MAJOR), 53 .os_version_minor = cpu_to_be32(LINUX_VERSION_SUBLEVEL), 54 .os_version_sub = cpu_to_be32(LINUX_VERSION_PATCHLEVEL), 55 .driver_capability_flags = { 56 cpu_to_be64(GVE_DRIVER_CAPABILITY_FLAGS1), 57 cpu_to_be64(GVE_DRIVER_CAPABILITY_FLAGS2), 58 cpu_to_be64(GVE_DRIVER_CAPABILITY_FLAGS3), 59 cpu_to_be64(GVE_DRIVER_CAPABILITY_FLAGS4), 60 }, 61 }; 62 strscpy(driver_info->os_version_str1, utsname()->release, 63 sizeof(driver_info->os_version_str1)); 64 strscpy(driver_info->os_version_str2, utsname()->version, 65 sizeof(driver_info->os_version_str2)); 66 67 err = gve_adminq_verify_driver_compatibility(priv, 68 sizeof(struct gve_driver_info), 69 driver_info_bus); 70 71 /* It's ok if the device doesn't support this */ 72 if (err == -EOPNOTSUPP) 73 err = 0; 74 75 dma_free_coherent(&priv->pdev->dev, 76 sizeof(struct gve_driver_info), 77 driver_info, driver_info_bus); 78 return err; 79 } 80 81 static netdev_tx_t gve_start_xmit(struct sk_buff *skb, struct net_device *dev) 82 { 83 struct gve_priv *priv = netdev_priv(dev); 84 85 if (gve_is_gqi(priv)) 86 return gve_tx(skb, dev); 87 else 88 return gve_tx_dqo(skb, dev); 89 } 90 91 static void gve_get_stats(struct net_device *dev, struct rtnl_link_stats64 *s) 92 { 93 struct gve_priv *priv = netdev_priv(dev); 94 unsigned int start; 95 u64 packets, bytes; 96 int num_tx_queues; 97 int ring; 98 99 num_tx_queues = gve_num_tx_queues(priv); 100 if (priv->rx) { 101 for (ring = 0; ring < priv->rx_cfg.num_queues; ring++) { 102 do { 103 start = 104 u64_stats_fetch_begin(&priv->rx[ring].statss); 105 packets = priv->rx[ring].rpackets; 106 bytes = priv->rx[ring].rbytes; 107 } while (u64_stats_fetch_retry(&priv->rx[ring].statss, 108 start)); 109 s->rx_packets += packets; 110 s->rx_bytes += bytes; 111 } 112 } 113 if (priv->tx) { 114 for (ring = 0; ring < num_tx_queues; ring++) { 115 do { 116 start = 117 u64_stats_fetch_begin(&priv->tx[ring].statss); 118 packets = priv->tx[ring].pkt_done; 119 bytes = priv->tx[ring].bytes_done; 120 } while (u64_stats_fetch_retry(&priv->tx[ring].statss, 121 start)); 122 s->tx_packets += packets; 123 s->tx_bytes += bytes; 124 } 125 } 126 } 127 128 static int gve_alloc_counter_array(struct gve_priv *priv) 129 { 130 priv->counter_array = 131 dma_alloc_coherent(&priv->pdev->dev, 132 priv->num_event_counters * 133 sizeof(*priv->counter_array), 134 &priv->counter_array_bus, GFP_KERNEL); 135 if (!priv->counter_array) 136 return -ENOMEM; 137 138 return 0; 139 } 140 141 static void gve_free_counter_array(struct gve_priv *priv) 142 { 143 if (!priv->counter_array) 144 return; 145 146 dma_free_coherent(&priv->pdev->dev, 147 priv->num_event_counters * 148 sizeof(*priv->counter_array), 149 priv->counter_array, priv->counter_array_bus); 150 priv->counter_array = NULL; 151 } 152 153 /* NIC requests to report stats */ 154 static void gve_stats_report_task(struct work_struct *work) 155 { 156 struct gve_priv *priv = container_of(work, struct gve_priv, 157 stats_report_task); 158 if (gve_get_do_report_stats(priv)) { 159 gve_handle_report_stats(priv); 160 gve_clear_do_report_stats(priv); 161 } 162 } 163 164 static void gve_stats_report_schedule(struct gve_priv *priv) 165 { 166 if (!gve_get_probe_in_progress(priv) && 167 !gve_get_reset_in_progress(priv)) { 168 gve_set_do_report_stats(priv); 169 queue_work(priv->gve_wq, &priv->stats_report_task); 170 } 171 } 172 173 static void gve_stats_report_timer(struct timer_list *t) 174 { 175 struct gve_priv *priv = from_timer(priv, t, stats_report_timer); 176 177 mod_timer(&priv->stats_report_timer, 178 round_jiffies(jiffies + 179 msecs_to_jiffies(priv->stats_report_timer_period))); 180 gve_stats_report_schedule(priv); 181 } 182 183 static int gve_alloc_stats_report(struct gve_priv *priv) 184 { 185 int tx_stats_num, rx_stats_num; 186 187 tx_stats_num = (GVE_TX_STATS_REPORT_NUM + NIC_TX_STATS_REPORT_NUM) * 188 gve_num_tx_queues(priv); 189 rx_stats_num = (GVE_RX_STATS_REPORT_NUM + NIC_RX_STATS_REPORT_NUM) * 190 priv->rx_cfg.num_queues; 191 priv->stats_report_len = struct_size(priv->stats_report, stats, 192 tx_stats_num + rx_stats_num); 193 priv->stats_report = 194 dma_alloc_coherent(&priv->pdev->dev, priv->stats_report_len, 195 &priv->stats_report_bus, GFP_KERNEL); 196 if (!priv->stats_report) 197 return -ENOMEM; 198 /* Set up timer for the report-stats task */ 199 timer_setup(&priv->stats_report_timer, gve_stats_report_timer, 0); 200 priv->stats_report_timer_period = GVE_STATS_REPORT_TIMER_PERIOD; 201 return 0; 202 } 203 204 static void gve_free_stats_report(struct gve_priv *priv) 205 { 206 if (!priv->stats_report) 207 return; 208 209 del_timer_sync(&priv->stats_report_timer); 210 dma_free_coherent(&priv->pdev->dev, priv->stats_report_len, 211 priv->stats_report, priv->stats_report_bus); 212 priv->stats_report = NULL; 213 } 214 215 static irqreturn_t gve_mgmnt_intr(int irq, void *arg) 216 { 217 struct gve_priv *priv = arg; 218 219 queue_work(priv->gve_wq, &priv->service_task); 220 return IRQ_HANDLED; 221 } 222 223 static irqreturn_t gve_intr(int irq, void *arg) 224 { 225 struct gve_notify_block *block = arg; 226 struct gve_priv *priv = block->priv; 227 228 iowrite32be(GVE_IRQ_MASK, gve_irq_doorbell(priv, block)); 229 napi_schedule_irqoff(&block->napi); 230 return IRQ_HANDLED; 231 } 232 233 static irqreturn_t gve_intr_dqo(int irq, void *arg) 234 { 235 struct gve_notify_block *block = arg; 236 237 /* Interrupts are automatically masked */ 238 napi_schedule_irqoff(&block->napi); 239 return IRQ_HANDLED; 240 } 241 242 static int gve_napi_poll(struct napi_struct *napi, int budget) 243 { 244 struct gve_notify_block *block; 245 __be32 __iomem *irq_doorbell; 246 bool reschedule = false; 247 struct gve_priv *priv; 248 int work_done = 0; 249 250 block = container_of(napi, struct gve_notify_block, napi); 251 priv = block->priv; 252 253 if (block->tx) { 254 if (block->tx->q_num < priv->tx_cfg.num_queues) 255 reschedule |= gve_tx_poll(block, budget); 256 else 257 reschedule |= gve_xdp_poll(block, budget); 258 } 259 260 if (block->rx) { 261 work_done = gve_rx_poll(block, budget); 262 reschedule |= work_done == budget; 263 } 264 265 if (reschedule) 266 return budget; 267 268 /* Complete processing - don't unmask irq if busy polling is enabled */ 269 if (likely(napi_complete_done(napi, work_done))) { 270 irq_doorbell = gve_irq_doorbell(priv, block); 271 iowrite32be(GVE_IRQ_ACK | GVE_IRQ_EVENT, irq_doorbell); 272 273 /* Ensure IRQ ACK is visible before we check pending work. 274 * If queue had issued updates, it would be truly visible. 275 */ 276 mb(); 277 278 if (block->tx) 279 reschedule |= gve_tx_clean_pending(priv, block->tx); 280 if (block->rx) 281 reschedule |= gve_rx_work_pending(block->rx); 282 283 if (reschedule && napi_reschedule(napi)) 284 iowrite32be(GVE_IRQ_MASK, irq_doorbell); 285 } 286 return work_done; 287 } 288 289 static int gve_napi_poll_dqo(struct napi_struct *napi, int budget) 290 { 291 struct gve_notify_block *block = 292 container_of(napi, struct gve_notify_block, napi); 293 struct gve_priv *priv = block->priv; 294 bool reschedule = false; 295 int work_done = 0; 296 297 /* Clear PCI MSI-X Pending Bit Array (PBA) 298 * 299 * This bit is set if an interrupt event occurs while the vector is 300 * masked. If this bit is set and we reenable the interrupt, it will 301 * fire again. Since we're just about to poll the queue state, we don't 302 * need it to fire again. 303 * 304 * Under high softirq load, it's possible that the interrupt condition 305 * is triggered twice before we got the chance to process it. 306 */ 307 gve_write_irq_doorbell_dqo(priv, block, 308 GVE_ITR_NO_UPDATE_DQO | GVE_ITR_CLEAR_PBA_BIT_DQO); 309 310 if (block->tx) 311 reschedule |= gve_tx_poll_dqo(block, /*do_clean=*/true); 312 313 if (block->rx) { 314 work_done = gve_rx_poll_dqo(block, budget); 315 reschedule |= work_done == budget; 316 } 317 318 if (reschedule) 319 return budget; 320 321 if (likely(napi_complete_done(napi, work_done))) { 322 /* Enable interrupts again. 323 * 324 * We don't need to repoll afterwards because HW supports the 325 * PCI MSI-X PBA feature. 326 * 327 * Another interrupt would be triggered if a new event came in 328 * since the last one. 329 */ 330 gve_write_irq_doorbell_dqo(priv, block, 331 GVE_ITR_NO_UPDATE_DQO | GVE_ITR_ENABLE_BIT_DQO); 332 } 333 334 return work_done; 335 } 336 337 static int gve_alloc_notify_blocks(struct gve_priv *priv) 338 { 339 int num_vecs_requested = priv->num_ntfy_blks + 1; 340 unsigned int active_cpus; 341 int vecs_enabled; 342 int i, j; 343 int err; 344 345 priv->msix_vectors = kvcalloc(num_vecs_requested, 346 sizeof(*priv->msix_vectors), GFP_KERNEL); 347 if (!priv->msix_vectors) 348 return -ENOMEM; 349 for (i = 0; i < num_vecs_requested; i++) 350 priv->msix_vectors[i].entry = i; 351 vecs_enabled = pci_enable_msix_range(priv->pdev, priv->msix_vectors, 352 GVE_MIN_MSIX, num_vecs_requested); 353 if (vecs_enabled < 0) { 354 dev_err(&priv->pdev->dev, "Could not enable min msix %d/%d\n", 355 GVE_MIN_MSIX, vecs_enabled); 356 err = vecs_enabled; 357 goto abort_with_msix_vectors; 358 } 359 if (vecs_enabled != num_vecs_requested) { 360 int new_num_ntfy_blks = (vecs_enabled - 1) & ~0x1; 361 int vecs_per_type = new_num_ntfy_blks / 2; 362 int vecs_left = new_num_ntfy_blks % 2; 363 364 priv->num_ntfy_blks = new_num_ntfy_blks; 365 priv->mgmt_msix_idx = priv->num_ntfy_blks; 366 priv->tx_cfg.max_queues = min_t(int, priv->tx_cfg.max_queues, 367 vecs_per_type); 368 priv->rx_cfg.max_queues = min_t(int, priv->rx_cfg.max_queues, 369 vecs_per_type + vecs_left); 370 dev_err(&priv->pdev->dev, 371 "Could not enable desired msix, only enabled %d, adjusting tx max queues to %d, and rx max queues to %d\n", 372 vecs_enabled, priv->tx_cfg.max_queues, 373 priv->rx_cfg.max_queues); 374 if (priv->tx_cfg.num_queues > priv->tx_cfg.max_queues) 375 priv->tx_cfg.num_queues = priv->tx_cfg.max_queues; 376 if (priv->rx_cfg.num_queues > priv->rx_cfg.max_queues) 377 priv->rx_cfg.num_queues = priv->rx_cfg.max_queues; 378 } 379 /* Half the notification blocks go to TX and half to RX */ 380 active_cpus = min_t(int, priv->num_ntfy_blks / 2, num_online_cpus()); 381 382 /* Setup Management Vector - the last vector */ 383 snprintf(priv->mgmt_msix_name, sizeof(priv->mgmt_msix_name), "gve-mgmnt@pci:%s", 384 pci_name(priv->pdev)); 385 err = request_irq(priv->msix_vectors[priv->mgmt_msix_idx].vector, 386 gve_mgmnt_intr, 0, priv->mgmt_msix_name, priv); 387 if (err) { 388 dev_err(&priv->pdev->dev, "Did not receive management vector.\n"); 389 goto abort_with_msix_enabled; 390 } 391 priv->irq_db_indices = 392 dma_alloc_coherent(&priv->pdev->dev, 393 priv->num_ntfy_blks * 394 sizeof(*priv->irq_db_indices), 395 &priv->irq_db_indices_bus, GFP_KERNEL); 396 if (!priv->irq_db_indices) { 397 err = -ENOMEM; 398 goto abort_with_mgmt_vector; 399 } 400 401 priv->ntfy_blocks = kvzalloc(priv->num_ntfy_blks * 402 sizeof(*priv->ntfy_blocks), GFP_KERNEL); 403 if (!priv->ntfy_blocks) { 404 err = -ENOMEM; 405 goto abort_with_irq_db_indices; 406 } 407 408 /* Setup the other blocks - the first n-1 vectors */ 409 for (i = 0; i < priv->num_ntfy_blks; i++) { 410 struct gve_notify_block *block = &priv->ntfy_blocks[i]; 411 int msix_idx = i; 412 413 snprintf(block->name, sizeof(block->name), "gve-ntfy-blk%d@pci:%s", 414 i, pci_name(priv->pdev)); 415 block->priv = priv; 416 err = request_irq(priv->msix_vectors[msix_idx].vector, 417 gve_is_gqi(priv) ? gve_intr : gve_intr_dqo, 418 0, block->name, block); 419 if (err) { 420 dev_err(&priv->pdev->dev, 421 "Failed to receive msix vector %d\n", i); 422 goto abort_with_some_ntfy_blocks; 423 } 424 irq_set_affinity_hint(priv->msix_vectors[msix_idx].vector, 425 get_cpu_mask(i % active_cpus)); 426 block->irq_db_index = &priv->irq_db_indices[i].index; 427 } 428 return 0; 429 abort_with_some_ntfy_blocks: 430 for (j = 0; j < i; j++) { 431 struct gve_notify_block *block = &priv->ntfy_blocks[j]; 432 int msix_idx = j; 433 434 irq_set_affinity_hint(priv->msix_vectors[msix_idx].vector, 435 NULL); 436 free_irq(priv->msix_vectors[msix_idx].vector, block); 437 } 438 kvfree(priv->ntfy_blocks); 439 priv->ntfy_blocks = NULL; 440 abort_with_irq_db_indices: 441 dma_free_coherent(&priv->pdev->dev, priv->num_ntfy_blks * 442 sizeof(*priv->irq_db_indices), 443 priv->irq_db_indices, priv->irq_db_indices_bus); 444 priv->irq_db_indices = NULL; 445 abort_with_mgmt_vector: 446 free_irq(priv->msix_vectors[priv->mgmt_msix_idx].vector, priv); 447 abort_with_msix_enabled: 448 pci_disable_msix(priv->pdev); 449 abort_with_msix_vectors: 450 kvfree(priv->msix_vectors); 451 priv->msix_vectors = NULL; 452 return err; 453 } 454 455 static void gve_free_notify_blocks(struct gve_priv *priv) 456 { 457 int i; 458 459 if (!priv->msix_vectors) 460 return; 461 462 /* Free the irqs */ 463 for (i = 0; i < priv->num_ntfy_blks; i++) { 464 struct gve_notify_block *block = &priv->ntfy_blocks[i]; 465 int msix_idx = i; 466 467 irq_set_affinity_hint(priv->msix_vectors[msix_idx].vector, 468 NULL); 469 free_irq(priv->msix_vectors[msix_idx].vector, block); 470 } 471 free_irq(priv->msix_vectors[priv->mgmt_msix_idx].vector, priv); 472 kvfree(priv->ntfy_blocks); 473 priv->ntfy_blocks = NULL; 474 dma_free_coherent(&priv->pdev->dev, priv->num_ntfy_blks * 475 sizeof(*priv->irq_db_indices), 476 priv->irq_db_indices, priv->irq_db_indices_bus); 477 priv->irq_db_indices = NULL; 478 pci_disable_msix(priv->pdev); 479 kvfree(priv->msix_vectors); 480 priv->msix_vectors = NULL; 481 } 482 483 static int gve_setup_device_resources(struct gve_priv *priv) 484 { 485 int err; 486 487 err = gve_alloc_counter_array(priv); 488 if (err) 489 return err; 490 err = gve_alloc_notify_blocks(priv); 491 if (err) 492 goto abort_with_counter; 493 err = gve_alloc_stats_report(priv); 494 if (err) 495 goto abort_with_ntfy_blocks; 496 err = gve_adminq_configure_device_resources(priv, 497 priv->counter_array_bus, 498 priv->num_event_counters, 499 priv->irq_db_indices_bus, 500 priv->num_ntfy_blks); 501 if (unlikely(err)) { 502 dev_err(&priv->pdev->dev, 503 "could not setup device_resources: err=%d\n", err); 504 err = -ENXIO; 505 goto abort_with_stats_report; 506 } 507 508 if (priv->queue_format == GVE_DQO_RDA_FORMAT) { 509 priv->ptype_lut_dqo = kvzalloc(sizeof(*priv->ptype_lut_dqo), 510 GFP_KERNEL); 511 if (!priv->ptype_lut_dqo) { 512 err = -ENOMEM; 513 goto abort_with_stats_report; 514 } 515 err = gve_adminq_get_ptype_map_dqo(priv, priv->ptype_lut_dqo); 516 if (err) { 517 dev_err(&priv->pdev->dev, 518 "Failed to get ptype map: err=%d\n", err); 519 goto abort_with_ptype_lut; 520 } 521 } 522 523 err = gve_adminq_report_stats(priv, priv->stats_report_len, 524 priv->stats_report_bus, 525 GVE_STATS_REPORT_TIMER_PERIOD); 526 if (err) 527 dev_err(&priv->pdev->dev, 528 "Failed to report stats: err=%d\n", err); 529 gve_set_device_resources_ok(priv); 530 return 0; 531 532 abort_with_ptype_lut: 533 kvfree(priv->ptype_lut_dqo); 534 priv->ptype_lut_dqo = NULL; 535 abort_with_stats_report: 536 gve_free_stats_report(priv); 537 abort_with_ntfy_blocks: 538 gve_free_notify_blocks(priv); 539 abort_with_counter: 540 gve_free_counter_array(priv); 541 542 return err; 543 } 544 545 static void gve_trigger_reset(struct gve_priv *priv); 546 547 static void gve_teardown_device_resources(struct gve_priv *priv) 548 { 549 int err; 550 551 /* Tell device its resources are being freed */ 552 if (gve_get_device_resources_ok(priv)) { 553 /* detach the stats report */ 554 err = gve_adminq_report_stats(priv, 0, 0x0, GVE_STATS_REPORT_TIMER_PERIOD); 555 if (err) { 556 dev_err(&priv->pdev->dev, 557 "Failed to detach stats report: err=%d\n", err); 558 gve_trigger_reset(priv); 559 } 560 err = gve_adminq_deconfigure_device_resources(priv); 561 if (err) { 562 dev_err(&priv->pdev->dev, 563 "Could not deconfigure device resources: err=%d\n", 564 err); 565 gve_trigger_reset(priv); 566 } 567 } 568 569 kvfree(priv->ptype_lut_dqo); 570 priv->ptype_lut_dqo = NULL; 571 572 gve_free_counter_array(priv); 573 gve_free_notify_blocks(priv); 574 gve_free_stats_report(priv); 575 gve_clear_device_resources_ok(priv); 576 } 577 578 static void gve_add_napi(struct gve_priv *priv, int ntfy_idx, 579 int (*gve_poll)(struct napi_struct *, int)) 580 { 581 struct gve_notify_block *block = &priv->ntfy_blocks[ntfy_idx]; 582 583 netif_napi_add(priv->dev, &block->napi, gve_poll); 584 } 585 586 static void gve_remove_napi(struct gve_priv *priv, int ntfy_idx) 587 { 588 struct gve_notify_block *block = &priv->ntfy_blocks[ntfy_idx]; 589 590 netif_napi_del(&block->napi); 591 } 592 593 static int gve_register_xdp_qpls(struct gve_priv *priv) 594 { 595 int start_id; 596 int err; 597 int i; 598 599 start_id = gve_tx_qpl_id(priv, gve_xdp_tx_start_queue_id(priv)); 600 for (i = start_id; i < start_id + gve_num_xdp_qpls(priv); i++) { 601 err = gve_adminq_register_page_list(priv, &priv->qpls[i]); 602 if (err) { 603 netif_err(priv, drv, priv->dev, 604 "failed to register queue page list %d\n", 605 priv->qpls[i].id); 606 /* This failure will trigger a reset - no need to clean 607 * up 608 */ 609 return err; 610 } 611 } 612 return 0; 613 } 614 615 static int gve_register_qpls(struct gve_priv *priv) 616 { 617 int start_id; 618 int err; 619 int i; 620 621 start_id = gve_tx_start_qpl_id(priv); 622 for (i = start_id; i < start_id + gve_num_tx_qpls(priv); i++) { 623 err = gve_adminq_register_page_list(priv, &priv->qpls[i]); 624 if (err) { 625 netif_err(priv, drv, priv->dev, 626 "failed to register queue page list %d\n", 627 priv->qpls[i].id); 628 /* This failure will trigger a reset - no need to clean 629 * up 630 */ 631 return err; 632 } 633 } 634 635 start_id = gve_rx_start_qpl_id(priv); 636 for (i = start_id; i < start_id + gve_num_rx_qpls(priv); i++) { 637 err = gve_adminq_register_page_list(priv, &priv->qpls[i]); 638 if (err) { 639 netif_err(priv, drv, priv->dev, 640 "failed to register queue page list %d\n", 641 priv->qpls[i].id); 642 /* This failure will trigger a reset - no need to clean 643 * up 644 */ 645 return err; 646 } 647 } 648 return 0; 649 } 650 651 static int gve_unregister_xdp_qpls(struct gve_priv *priv) 652 { 653 int start_id; 654 int err; 655 int i; 656 657 start_id = gve_tx_qpl_id(priv, gve_xdp_tx_start_queue_id(priv)); 658 for (i = start_id; i < start_id + gve_num_xdp_qpls(priv); i++) { 659 err = gve_adminq_unregister_page_list(priv, priv->qpls[i].id); 660 /* This failure will trigger a reset - no need to clean up */ 661 if (err) { 662 netif_err(priv, drv, priv->dev, 663 "Failed to unregister queue page list %d\n", 664 priv->qpls[i].id); 665 return err; 666 } 667 } 668 return 0; 669 } 670 671 static int gve_unregister_qpls(struct gve_priv *priv) 672 { 673 int start_id; 674 int err; 675 int i; 676 677 start_id = gve_tx_start_qpl_id(priv); 678 for (i = start_id; i < start_id + gve_num_tx_qpls(priv); i++) { 679 err = gve_adminq_unregister_page_list(priv, priv->qpls[i].id); 680 /* This failure will trigger a reset - no need to clean up */ 681 if (err) { 682 netif_err(priv, drv, priv->dev, 683 "Failed to unregister queue page list %d\n", 684 priv->qpls[i].id); 685 return err; 686 } 687 } 688 689 start_id = gve_rx_start_qpl_id(priv); 690 for (i = start_id; i < start_id + gve_num_rx_qpls(priv); i++) { 691 err = gve_adminq_unregister_page_list(priv, priv->qpls[i].id); 692 /* This failure will trigger a reset - no need to clean up */ 693 if (err) { 694 netif_err(priv, drv, priv->dev, 695 "Failed to unregister queue page list %d\n", 696 priv->qpls[i].id); 697 return err; 698 } 699 } 700 return 0; 701 } 702 703 static int gve_create_xdp_rings(struct gve_priv *priv) 704 { 705 int err; 706 707 err = gve_adminq_create_tx_queues(priv, 708 gve_xdp_tx_start_queue_id(priv), 709 priv->num_xdp_queues); 710 if (err) { 711 netif_err(priv, drv, priv->dev, "failed to create %d XDP tx queues\n", 712 priv->num_xdp_queues); 713 /* This failure will trigger a reset - no need to clean 714 * up 715 */ 716 return err; 717 } 718 netif_dbg(priv, drv, priv->dev, "created %d XDP tx queues\n", 719 priv->num_xdp_queues); 720 721 return 0; 722 } 723 724 static int gve_create_rings(struct gve_priv *priv) 725 { 726 int num_tx_queues = gve_num_tx_queues(priv); 727 int err; 728 int i; 729 730 err = gve_adminq_create_tx_queues(priv, 0, num_tx_queues); 731 if (err) { 732 netif_err(priv, drv, priv->dev, "failed to create %d tx queues\n", 733 num_tx_queues); 734 /* This failure will trigger a reset - no need to clean 735 * up 736 */ 737 return err; 738 } 739 netif_dbg(priv, drv, priv->dev, "created %d tx queues\n", 740 num_tx_queues); 741 742 err = gve_adminq_create_rx_queues(priv, priv->rx_cfg.num_queues); 743 if (err) { 744 netif_err(priv, drv, priv->dev, "failed to create %d rx queues\n", 745 priv->rx_cfg.num_queues); 746 /* This failure will trigger a reset - no need to clean 747 * up 748 */ 749 return err; 750 } 751 netif_dbg(priv, drv, priv->dev, "created %d rx queues\n", 752 priv->rx_cfg.num_queues); 753 754 if (gve_is_gqi(priv)) { 755 /* Rx data ring has been prefilled with packet buffers at queue 756 * allocation time. 757 * 758 * Write the doorbell to provide descriptor slots and packet 759 * buffers to the NIC. 760 */ 761 for (i = 0; i < priv->rx_cfg.num_queues; i++) 762 gve_rx_write_doorbell(priv, &priv->rx[i]); 763 } else { 764 for (i = 0; i < priv->rx_cfg.num_queues; i++) { 765 /* Post buffers and ring doorbell. */ 766 gve_rx_post_buffers_dqo(&priv->rx[i]); 767 } 768 } 769 770 return 0; 771 } 772 773 static void add_napi_init_xdp_sync_stats(struct gve_priv *priv, 774 int (*napi_poll)(struct napi_struct *napi, 775 int budget)) 776 { 777 int start_id = gve_xdp_tx_start_queue_id(priv); 778 int i; 779 780 /* Add xdp tx napi & init sync stats*/ 781 for (i = start_id; i < start_id + priv->num_xdp_queues; i++) { 782 int ntfy_idx = gve_tx_idx_to_ntfy(priv, i); 783 784 u64_stats_init(&priv->tx[i].statss); 785 priv->tx[i].ntfy_id = ntfy_idx; 786 gve_add_napi(priv, ntfy_idx, napi_poll); 787 } 788 } 789 790 static void add_napi_init_sync_stats(struct gve_priv *priv, 791 int (*napi_poll)(struct napi_struct *napi, 792 int budget)) 793 { 794 int i; 795 796 /* Add tx napi & init sync stats*/ 797 for (i = 0; i < gve_num_tx_queues(priv); i++) { 798 int ntfy_idx = gve_tx_idx_to_ntfy(priv, i); 799 800 u64_stats_init(&priv->tx[i].statss); 801 priv->tx[i].ntfy_id = ntfy_idx; 802 gve_add_napi(priv, ntfy_idx, napi_poll); 803 } 804 /* Add rx napi & init sync stats*/ 805 for (i = 0; i < priv->rx_cfg.num_queues; i++) { 806 int ntfy_idx = gve_rx_idx_to_ntfy(priv, i); 807 808 u64_stats_init(&priv->rx[i].statss); 809 priv->rx[i].ntfy_id = ntfy_idx; 810 gve_add_napi(priv, ntfy_idx, napi_poll); 811 } 812 } 813 814 static void gve_tx_free_rings(struct gve_priv *priv, int start_id, int num_rings) 815 { 816 if (gve_is_gqi(priv)) { 817 gve_tx_free_rings_gqi(priv, start_id, num_rings); 818 } else { 819 gve_tx_free_rings_dqo(priv); 820 } 821 } 822 823 static int gve_alloc_xdp_rings(struct gve_priv *priv) 824 { 825 int start_id; 826 int err = 0; 827 828 if (!priv->num_xdp_queues) 829 return 0; 830 831 start_id = gve_xdp_tx_start_queue_id(priv); 832 err = gve_tx_alloc_rings(priv, start_id, priv->num_xdp_queues); 833 if (err) 834 return err; 835 add_napi_init_xdp_sync_stats(priv, gve_napi_poll); 836 837 return 0; 838 } 839 840 static int gve_alloc_rings(struct gve_priv *priv) 841 { 842 int err; 843 844 /* Setup tx rings */ 845 priv->tx = kvcalloc(priv->tx_cfg.max_queues, sizeof(*priv->tx), 846 GFP_KERNEL); 847 if (!priv->tx) 848 return -ENOMEM; 849 850 if (gve_is_gqi(priv)) 851 err = gve_tx_alloc_rings(priv, 0, gve_num_tx_queues(priv)); 852 else 853 err = gve_tx_alloc_rings_dqo(priv); 854 if (err) 855 goto free_tx; 856 857 /* Setup rx rings */ 858 priv->rx = kvcalloc(priv->rx_cfg.max_queues, sizeof(*priv->rx), 859 GFP_KERNEL); 860 if (!priv->rx) { 861 err = -ENOMEM; 862 goto free_tx_queue; 863 } 864 865 if (gve_is_gqi(priv)) 866 err = gve_rx_alloc_rings(priv); 867 else 868 err = gve_rx_alloc_rings_dqo(priv); 869 if (err) 870 goto free_rx; 871 872 if (gve_is_gqi(priv)) 873 add_napi_init_sync_stats(priv, gve_napi_poll); 874 else 875 add_napi_init_sync_stats(priv, gve_napi_poll_dqo); 876 877 return 0; 878 879 free_rx: 880 kvfree(priv->rx); 881 priv->rx = NULL; 882 free_tx_queue: 883 gve_tx_free_rings(priv, 0, gve_num_tx_queues(priv)); 884 free_tx: 885 kvfree(priv->tx); 886 priv->tx = NULL; 887 return err; 888 } 889 890 static int gve_destroy_xdp_rings(struct gve_priv *priv) 891 { 892 int start_id; 893 int err; 894 895 start_id = gve_xdp_tx_start_queue_id(priv); 896 err = gve_adminq_destroy_tx_queues(priv, 897 start_id, 898 priv->num_xdp_queues); 899 if (err) { 900 netif_err(priv, drv, priv->dev, 901 "failed to destroy XDP queues\n"); 902 /* This failure will trigger a reset - no need to clean up */ 903 return err; 904 } 905 netif_dbg(priv, drv, priv->dev, "destroyed XDP queues\n"); 906 907 return 0; 908 } 909 910 static int gve_destroy_rings(struct gve_priv *priv) 911 { 912 int num_tx_queues = gve_num_tx_queues(priv); 913 int err; 914 915 err = gve_adminq_destroy_tx_queues(priv, 0, num_tx_queues); 916 if (err) { 917 netif_err(priv, drv, priv->dev, 918 "failed to destroy tx queues\n"); 919 /* This failure will trigger a reset - no need to clean up */ 920 return err; 921 } 922 netif_dbg(priv, drv, priv->dev, "destroyed tx queues\n"); 923 err = gve_adminq_destroy_rx_queues(priv, priv->rx_cfg.num_queues); 924 if (err) { 925 netif_err(priv, drv, priv->dev, 926 "failed to destroy rx queues\n"); 927 /* This failure will trigger a reset - no need to clean up */ 928 return err; 929 } 930 netif_dbg(priv, drv, priv->dev, "destroyed rx queues\n"); 931 return 0; 932 } 933 934 static void gve_rx_free_rings(struct gve_priv *priv) 935 { 936 if (gve_is_gqi(priv)) 937 gve_rx_free_rings_gqi(priv); 938 else 939 gve_rx_free_rings_dqo(priv); 940 } 941 942 static void gve_free_xdp_rings(struct gve_priv *priv) 943 { 944 int ntfy_idx, start_id; 945 int i; 946 947 start_id = gve_xdp_tx_start_queue_id(priv); 948 if (priv->tx) { 949 for (i = start_id; i < start_id + priv->num_xdp_queues; i++) { 950 ntfy_idx = gve_tx_idx_to_ntfy(priv, i); 951 gve_remove_napi(priv, ntfy_idx); 952 } 953 gve_tx_free_rings(priv, start_id, priv->num_xdp_queues); 954 } 955 } 956 957 static void gve_free_rings(struct gve_priv *priv) 958 { 959 int num_tx_queues = gve_num_tx_queues(priv); 960 int ntfy_idx; 961 int i; 962 963 if (priv->tx) { 964 for (i = 0; i < num_tx_queues; i++) { 965 ntfy_idx = gve_tx_idx_to_ntfy(priv, i); 966 gve_remove_napi(priv, ntfy_idx); 967 } 968 gve_tx_free_rings(priv, 0, num_tx_queues); 969 kvfree(priv->tx); 970 priv->tx = NULL; 971 } 972 if (priv->rx) { 973 for (i = 0; i < priv->rx_cfg.num_queues; i++) { 974 ntfy_idx = gve_rx_idx_to_ntfy(priv, i); 975 gve_remove_napi(priv, ntfy_idx); 976 } 977 gve_rx_free_rings(priv); 978 kvfree(priv->rx); 979 priv->rx = NULL; 980 } 981 } 982 983 int gve_alloc_page(struct gve_priv *priv, struct device *dev, 984 struct page **page, dma_addr_t *dma, 985 enum dma_data_direction dir, gfp_t gfp_flags) 986 { 987 *page = alloc_page(gfp_flags); 988 if (!*page) { 989 priv->page_alloc_fail++; 990 return -ENOMEM; 991 } 992 *dma = dma_map_page(dev, *page, 0, PAGE_SIZE, dir); 993 if (dma_mapping_error(dev, *dma)) { 994 priv->dma_mapping_error++; 995 put_page(*page); 996 return -ENOMEM; 997 } 998 return 0; 999 } 1000 1001 static int gve_alloc_queue_page_list(struct gve_priv *priv, u32 id, 1002 int pages) 1003 { 1004 struct gve_queue_page_list *qpl = &priv->qpls[id]; 1005 int err; 1006 int i; 1007 1008 if (pages + priv->num_registered_pages > priv->max_registered_pages) { 1009 netif_err(priv, drv, priv->dev, 1010 "Reached max number of registered pages %llu > %llu\n", 1011 pages + priv->num_registered_pages, 1012 priv->max_registered_pages); 1013 return -EINVAL; 1014 } 1015 1016 qpl->id = id; 1017 qpl->num_entries = 0; 1018 qpl->pages = kvcalloc(pages, sizeof(*qpl->pages), GFP_KERNEL); 1019 /* caller handles clean up */ 1020 if (!qpl->pages) 1021 return -ENOMEM; 1022 qpl->page_buses = kvcalloc(pages, sizeof(*qpl->page_buses), GFP_KERNEL); 1023 /* caller handles clean up */ 1024 if (!qpl->page_buses) 1025 return -ENOMEM; 1026 1027 for (i = 0; i < pages; i++) { 1028 err = gve_alloc_page(priv, &priv->pdev->dev, &qpl->pages[i], 1029 &qpl->page_buses[i], 1030 gve_qpl_dma_dir(priv, id), GFP_KERNEL); 1031 /* caller handles clean up */ 1032 if (err) 1033 return -ENOMEM; 1034 qpl->num_entries++; 1035 } 1036 priv->num_registered_pages += pages; 1037 1038 return 0; 1039 } 1040 1041 void gve_free_page(struct device *dev, struct page *page, dma_addr_t dma, 1042 enum dma_data_direction dir) 1043 { 1044 if (!dma_mapping_error(dev, dma)) 1045 dma_unmap_page(dev, dma, PAGE_SIZE, dir); 1046 if (page) 1047 put_page(page); 1048 } 1049 1050 static void gve_free_queue_page_list(struct gve_priv *priv, u32 id) 1051 { 1052 struct gve_queue_page_list *qpl = &priv->qpls[id]; 1053 int i; 1054 1055 if (!qpl->pages) 1056 return; 1057 if (!qpl->page_buses) 1058 goto free_pages; 1059 1060 for (i = 0; i < qpl->num_entries; i++) 1061 gve_free_page(&priv->pdev->dev, qpl->pages[i], 1062 qpl->page_buses[i], gve_qpl_dma_dir(priv, id)); 1063 1064 kvfree(qpl->page_buses); 1065 qpl->page_buses = NULL; 1066 free_pages: 1067 kvfree(qpl->pages); 1068 qpl->pages = NULL; 1069 priv->num_registered_pages -= qpl->num_entries; 1070 } 1071 1072 static int gve_alloc_xdp_qpls(struct gve_priv *priv) 1073 { 1074 int start_id; 1075 int i, j; 1076 int err; 1077 1078 start_id = gve_tx_qpl_id(priv, gve_xdp_tx_start_queue_id(priv)); 1079 for (i = start_id; i < start_id + gve_num_xdp_qpls(priv); i++) { 1080 err = gve_alloc_queue_page_list(priv, i, 1081 priv->tx_pages_per_qpl); 1082 if (err) 1083 goto free_qpls; 1084 } 1085 1086 return 0; 1087 1088 free_qpls: 1089 for (j = start_id; j <= i; j++) 1090 gve_free_queue_page_list(priv, j); 1091 return err; 1092 } 1093 1094 static int gve_alloc_qpls(struct gve_priv *priv) 1095 { 1096 int max_queues = priv->tx_cfg.max_queues + priv->rx_cfg.max_queues; 1097 int start_id; 1098 int i, j; 1099 int err; 1100 1101 if (priv->queue_format != GVE_GQI_QPL_FORMAT) 1102 return 0; 1103 1104 priv->qpls = kvcalloc(max_queues, sizeof(*priv->qpls), GFP_KERNEL); 1105 if (!priv->qpls) 1106 return -ENOMEM; 1107 1108 start_id = gve_tx_start_qpl_id(priv); 1109 for (i = start_id; i < start_id + gve_num_tx_qpls(priv); i++) { 1110 err = gve_alloc_queue_page_list(priv, i, 1111 priv->tx_pages_per_qpl); 1112 if (err) 1113 goto free_qpls; 1114 } 1115 1116 start_id = gve_rx_start_qpl_id(priv); 1117 for (i = start_id; i < start_id + gve_num_rx_qpls(priv); i++) { 1118 err = gve_alloc_queue_page_list(priv, i, 1119 priv->rx_data_slot_cnt); 1120 if (err) 1121 goto free_qpls; 1122 } 1123 1124 priv->qpl_cfg.qpl_map_size = BITS_TO_LONGS(max_queues) * 1125 sizeof(unsigned long) * BITS_PER_BYTE; 1126 priv->qpl_cfg.qpl_id_map = kvcalloc(BITS_TO_LONGS(max_queues), 1127 sizeof(unsigned long), GFP_KERNEL); 1128 if (!priv->qpl_cfg.qpl_id_map) { 1129 err = -ENOMEM; 1130 goto free_qpls; 1131 } 1132 1133 return 0; 1134 1135 free_qpls: 1136 for (j = 0; j <= i; j++) 1137 gve_free_queue_page_list(priv, j); 1138 kvfree(priv->qpls); 1139 priv->qpls = NULL; 1140 return err; 1141 } 1142 1143 static void gve_free_xdp_qpls(struct gve_priv *priv) 1144 { 1145 int start_id; 1146 int i; 1147 1148 start_id = gve_tx_qpl_id(priv, gve_xdp_tx_start_queue_id(priv)); 1149 for (i = start_id; i < start_id + gve_num_xdp_qpls(priv); i++) 1150 gve_free_queue_page_list(priv, i); 1151 } 1152 1153 static void gve_free_qpls(struct gve_priv *priv) 1154 { 1155 int max_queues = priv->tx_cfg.max_queues + priv->rx_cfg.max_queues; 1156 int i; 1157 1158 if (!priv->qpls) 1159 return; 1160 1161 kvfree(priv->qpl_cfg.qpl_id_map); 1162 priv->qpl_cfg.qpl_id_map = NULL; 1163 1164 for (i = 0; i < max_queues; i++) 1165 gve_free_queue_page_list(priv, i); 1166 1167 kvfree(priv->qpls); 1168 priv->qpls = NULL; 1169 } 1170 1171 /* Use this to schedule a reset when the device is capable of continuing 1172 * to handle other requests in its current state. If it is not, do a reset 1173 * in thread instead. 1174 */ 1175 void gve_schedule_reset(struct gve_priv *priv) 1176 { 1177 gve_set_do_reset(priv); 1178 queue_work(priv->gve_wq, &priv->service_task); 1179 } 1180 1181 static void gve_reset_and_teardown(struct gve_priv *priv, bool was_up); 1182 static int gve_reset_recovery(struct gve_priv *priv, bool was_up); 1183 static void gve_turndown(struct gve_priv *priv); 1184 static void gve_turnup(struct gve_priv *priv); 1185 1186 static int gve_reg_xdp_info(struct gve_priv *priv, struct net_device *dev) 1187 { 1188 struct napi_struct *napi; 1189 struct gve_rx_ring *rx; 1190 int err = 0; 1191 int i, j; 1192 u32 tx_qid; 1193 1194 if (!priv->num_xdp_queues) 1195 return 0; 1196 1197 for (i = 0; i < priv->rx_cfg.num_queues; i++) { 1198 rx = &priv->rx[i]; 1199 napi = &priv->ntfy_blocks[rx->ntfy_id].napi; 1200 1201 err = xdp_rxq_info_reg(&rx->xdp_rxq, dev, i, 1202 napi->napi_id); 1203 if (err) 1204 goto err; 1205 err = xdp_rxq_info_reg_mem_model(&rx->xdp_rxq, 1206 MEM_TYPE_PAGE_SHARED, NULL); 1207 if (err) 1208 goto err; 1209 rx->xsk_pool = xsk_get_pool_from_qid(dev, i); 1210 if (rx->xsk_pool) { 1211 err = xdp_rxq_info_reg(&rx->xsk_rxq, dev, i, 1212 napi->napi_id); 1213 if (err) 1214 goto err; 1215 err = xdp_rxq_info_reg_mem_model(&rx->xsk_rxq, 1216 MEM_TYPE_XSK_BUFF_POOL, NULL); 1217 if (err) 1218 goto err; 1219 xsk_pool_set_rxq_info(rx->xsk_pool, 1220 &rx->xsk_rxq); 1221 } 1222 } 1223 1224 for (i = 0; i < priv->num_xdp_queues; i++) { 1225 tx_qid = gve_xdp_tx_queue_id(priv, i); 1226 priv->tx[tx_qid].xsk_pool = xsk_get_pool_from_qid(dev, i); 1227 } 1228 return 0; 1229 1230 err: 1231 for (j = i; j >= 0; j--) { 1232 rx = &priv->rx[j]; 1233 if (xdp_rxq_info_is_reg(&rx->xdp_rxq)) 1234 xdp_rxq_info_unreg(&rx->xdp_rxq); 1235 if (xdp_rxq_info_is_reg(&rx->xsk_rxq)) 1236 xdp_rxq_info_unreg(&rx->xsk_rxq); 1237 } 1238 return err; 1239 } 1240 1241 static void gve_unreg_xdp_info(struct gve_priv *priv) 1242 { 1243 int i, tx_qid; 1244 1245 if (!priv->num_xdp_queues) 1246 return; 1247 1248 for (i = 0; i < priv->rx_cfg.num_queues; i++) { 1249 struct gve_rx_ring *rx = &priv->rx[i]; 1250 1251 xdp_rxq_info_unreg(&rx->xdp_rxq); 1252 if (rx->xsk_pool) { 1253 xdp_rxq_info_unreg(&rx->xsk_rxq); 1254 rx->xsk_pool = NULL; 1255 } 1256 } 1257 1258 for (i = 0; i < priv->num_xdp_queues; i++) { 1259 tx_qid = gve_xdp_tx_queue_id(priv, i); 1260 priv->tx[tx_qid].xsk_pool = NULL; 1261 } 1262 } 1263 1264 static void gve_drain_page_cache(struct gve_priv *priv) 1265 { 1266 struct page_frag_cache *nc; 1267 int i; 1268 1269 for (i = 0; i < priv->rx_cfg.num_queues; i++) { 1270 nc = &priv->rx[i].page_cache; 1271 if (nc->va) { 1272 __page_frag_cache_drain(virt_to_page(nc->va), 1273 nc->pagecnt_bias); 1274 nc->va = NULL; 1275 } 1276 } 1277 } 1278 1279 static int gve_open(struct net_device *dev) 1280 { 1281 struct gve_priv *priv = netdev_priv(dev); 1282 int err; 1283 1284 if (priv->xdp_prog) 1285 priv->num_xdp_queues = priv->rx_cfg.num_queues; 1286 else 1287 priv->num_xdp_queues = 0; 1288 1289 err = gve_alloc_qpls(priv); 1290 if (err) 1291 return err; 1292 1293 err = gve_alloc_rings(priv); 1294 if (err) 1295 goto free_qpls; 1296 1297 err = netif_set_real_num_tx_queues(dev, priv->tx_cfg.num_queues); 1298 if (err) 1299 goto free_rings; 1300 err = netif_set_real_num_rx_queues(dev, priv->rx_cfg.num_queues); 1301 if (err) 1302 goto free_rings; 1303 1304 err = gve_reg_xdp_info(priv, dev); 1305 if (err) 1306 goto free_rings; 1307 1308 err = gve_register_qpls(priv); 1309 if (err) 1310 goto reset; 1311 1312 if (!gve_is_gqi(priv)) { 1313 /* Hard code this for now. This may be tuned in the future for 1314 * performance. 1315 */ 1316 priv->data_buffer_size_dqo = GVE_RX_BUFFER_SIZE_DQO; 1317 } 1318 err = gve_create_rings(priv); 1319 if (err) 1320 goto reset; 1321 1322 gve_set_device_rings_ok(priv); 1323 1324 if (gve_get_report_stats(priv)) 1325 mod_timer(&priv->stats_report_timer, 1326 round_jiffies(jiffies + 1327 msecs_to_jiffies(priv->stats_report_timer_period))); 1328 1329 gve_turnup(priv); 1330 queue_work(priv->gve_wq, &priv->service_task); 1331 priv->interface_up_cnt++; 1332 return 0; 1333 1334 free_rings: 1335 gve_free_rings(priv); 1336 free_qpls: 1337 gve_free_qpls(priv); 1338 return err; 1339 1340 reset: 1341 /* This must have been called from a reset due to the rtnl lock 1342 * so just return at this point. 1343 */ 1344 if (gve_get_reset_in_progress(priv)) 1345 return err; 1346 /* Otherwise reset before returning */ 1347 gve_reset_and_teardown(priv, true); 1348 /* if this fails there is nothing we can do so just ignore the return */ 1349 gve_reset_recovery(priv, false); 1350 /* return the original error */ 1351 return err; 1352 } 1353 1354 static int gve_close(struct net_device *dev) 1355 { 1356 struct gve_priv *priv = netdev_priv(dev); 1357 int err; 1358 1359 netif_carrier_off(dev); 1360 if (gve_get_device_rings_ok(priv)) { 1361 gve_turndown(priv); 1362 gve_drain_page_cache(priv); 1363 err = gve_destroy_rings(priv); 1364 if (err) 1365 goto err; 1366 err = gve_unregister_qpls(priv); 1367 if (err) 1368 goto err; 1369 gve_clear_device_rings_ok(priv); 1370 } 1371 del_timer_sync(&priv->stats_report_timer); 1372 1373 gve_unreg_xdp_info(priv); 1374 gve_free_rings(priv); 1375 gve_free_qpls(priv); 1376 priv->interface_down_cnt++; 1377 return 0; 1378 1379 err: 1380 /* This must have been called from a reset due to the rtnl lock 1381 * so just return at this point. 1382 */ 1383 if (gve_get_reset_in_progress(priv)) 1384 return err; 1385 /* Otherwise reset before returning */ 1386 gve_reset_and_teardown(priv, true); 1387 return gve_reset_recovery(priv, false); 1388 } 1389 1390 static int gve_remove_xdp_queues(struct gve_priv *priv) 1391 { 1392 int err; 1393 1394 err = gve_destroy_xdp_rings(priv); 1395 if (err) 1396 return err; 1397 1398 err = gve_unregister_xdp_qpls(priv); 1399 if (err) 1400 return err; 1401 1402 gve_unreg_xdp_info(priv); 1403 gve_free_xdp_rings(priv); 1404 gve_free_xdp_qpls(priv); 1405 priv->num_xdp_queues = 0; 1406 return 0; 1407 } 1408 1409 static int gve_add_xdp_queues(struct gve_priv *priv) 1410 { 1411 int err; 1412 1413 priv->num_xdp_queues = priv->tx_cfg.num_queues; 1414 1415 err = gve_alloc_xdp_qpls(priv); 1416 if (err) 1417 goto err; 1418 1419 err = gve_alloc_xdp_rings(priv); 1420 if (err) 1421 goto free_xdp_qpls; 1422 1423 err = gve_reg_xdp_info(priv, priv->dev); 1424 if (err) 1425 goto free_xdp_rings; 1426 1427 err = gve_register_xdp_qpls(priv); 1428 if (err) 1429 goto free_xdp_rings; 1430 1431 err = gve_create_xdp_rings(priv); 1432 if (err) 1433 goto free_xdp_rings; 1434 1435 return 0; 1436 1437 free_xdp_rings: 1438 gve_free_xdp_rings(priv); 1439 free_xdp_qpls: 1440 gve_free_xdp_qpls(priv); 1441 err: 1442 priv->num_xdp_queues = 0; 1443 return err; 1444 } 1445 1446 static void gve_handle_link_status(struct gve_priv *priv, bool link_status) 1447 { 1448 if (!gve_get_napi_enabled(priv)) 1449 return; 1450 1451 if (link_status == netif_carrier_ok(priv->dev)) 1452 return; 1453 1454 if (link_status) { 1455 netdev_info(priv->dev, "Device link is up.\n"); 1456 netif_carrier_on(priv->dev); 1457 } else { 1458 netdev_info(priv->dev, "Device link is down.\n"); 1459 netif_carrier_off(priv->dev); 1460 } 1461 } 1462 1463 static int gve_set_xdp(struct gve_priv *priv, struct bpf_prog *prog, 1464 struct netlink_ext_ack *extack) 1465 { 1466 struct bpf_prog *old_prog; 1467 int err = 0; 1468 u32 status; 1469 1470 old_prog = READ_ONCE(priv->xdp_prog); 1471 if (!netif_carrier_ok(priv->dev)) { 1472 WRITE_ONCE(priv->xdp_prog, prog); 1473 if (old_prog) 1474 bpf_prog_put(old_prog); 1475 return 0; 1476 } 1477 1478 gve_turndown(priv); 1479 if (!old_prog && prog) { 1480 // Allocate XDP TX queues if an XDP program is 1481 // being installed 1482 err = gve_add_xdp_queues(priv); 1483 if (err) 1484 goto out; 1485 } else if (old_prog && !prog) { 1486 // Remove XDP TX queues if an XDP program is 1487 // being uninstalled 1488 err = gve_remove_xdp_queues(priv); 1489 if (err) 1490 goto out; 1491 } 1492 WRITE_ONCE(priv->xdp_prog, prog); 1493 if (old_prog) 1494 bpf_prog_put(old_prog); 1495 1496 out: 1497 gve_turnup(priv); 1498 status = ioread32be(&priv->reg_bar0->device_status); 1499 gve_handle_link_status(priv, GVE_DEVICE_STATUS_LINK_STATUS_MASK & status); 1500 return err; 1501 } 1502 1503 static int gve_xsk_pool_enable(struct net_device *dev, 1504 struct xsk_buff_pool *pool, 1505 u16 qid) 1506 { 1507 struct gve_priv *priv = netdev_priv(dev); 1508 struct napi_struct *napi; 1509 struct gve_rx_ring *rx; 1510 int tx_qid; 1511 int err; 1512 1513 if (qid >= priv->rx_cfg.num_queues) { 1514 dev_err(&priv->pdev->dev, "xsk pool invalid qid %d", qid); 1515 return -EINVAL; 1516 } 1517 if (xsk_pool_get_rx_frame_size(pool) < 1518 priv->dev->max_mtu + sizeof(struct ethhdr)) { 1519 dev_err(&priv->pdev->dev, "xsk pool frame_len too small"); 1520 return -EINVAL; 1521 } 1522 1523 err = xsk_pool_dma_map(pool, &priv->pdev->dev, 1524 DMA_ATTR_SKIP_CPU_SYNC | DMA_ATTR_WEAK_ORDERING); 1525 if (err) 1526 return err; 1527 1528 /* If XDP prog is not installed, return */ 1529 if (!priv->xdp_prog) 1530 return 0; 1531 1532 rx = &priv->rx[qid]; 1533 napi = &priv->ntfy_blocks[rx->ntfy_id].napi; 1534 err = xdp_rxq_info_reg(&rx->xsk_rxq, dev, qid, napi->napi_id); 1535 if (err) 1536 goto err; 1537 1538 err = xdp_rxq_info_reg_mem_model(&rx->xsk_rxq, 1539 MEM_TYPE_XSK_BUFF_POOL, NULL); 1540 if (err) 1541 goto err; 1542 1543 xsk_pool_set_rxq_info(pool, &rx->xsk_rxq); 1544 rx->xsk_pool = pool; 1545 1546 tx_qid = gve_xdp_tx_queue_id(priv, qid); 1547 priv->tx[tx_qid].xsk_pool = pool; 1548 1549 return 0; 1550 err: 1551 if (xdp_rxq_info_is_reg(&rx->xsk_rxq)) 1552 xdp_rxq_info_unreg(&rx->xsk_rxq); 1553 1554 xsk_pool_dma_unmap(pool, 1555 DMA_ATTR_SKIP_CPU_SYNC | DMA_ATTR_WEAK_ORDERING); 1556 return err; 1557 } 1558 1559 static int gve_xsk_pool_disable(struct net_device *dev, 1560 u16 qid) 1561 { 1562 struct gve_priv *priv = netdev_priv(dev); 1563 struct napi_struct *napi_rx; 1564 struct napi_struct *napi_tx; 1565 struct xsk_buff_pool *pool; 1566 int tx_qid; 1567 1568 pool = xsk_get_pool_from_qid(dev, qid); 1569 if (!pool) 1570 return -EINVAL; 1571 if (qid >= priv->rx_cfg.num_queues) 1572 return -EINVAL; 1573 1574 /* If XDP prog is not installed, unmap DMA and return */ 1575 if (!priv->xdp_prog) 1576 goto done; 1577 1578 tx_qid = gve_xdp_tx_queue_id(priv, qid); 1579 if (!netif_running(dev)) { 1580 priv->rx[qid].xsk_pool = NULL; 1581 xdp_rxq_info_unreg(&priv->rx[qid].xsk_rxq); 1582 priv->tx[tx_qid].xsk_pool = NULL; 1583 goto done; 1584 } 1585 1586 napi_rx = &priv->ntfy_blocks[priv->rx[qid].ntfy_id].napi; 1587 napi_disable(napi_rx); /* make sure current rx poll is done */ 1588 1589 napi_tx = &priv->ntfy_blocks[priv->tx[tx_qid].ntfy_id].napi; 1590 napi_disable(napi_tx); /* make sure current tx poll is done */ 1591 1592 priv->rx[qid].xsk_pool = NULL; 1593 xdp_rxq_info_unreg(&priv->rx[qid].xsk_rxq); 1594 priv->tx[tx_qid].xsk_pool = NULL; 1595 smp_mb(); /* Make sure it is visible to the workers on datapath */ 1596 1597 napi_enable(napi_rx); 1598 if (gve_rx_work_pending(&priv->rx[qid])) 1599 napi_schedule(napi_rx); 1600 1601 napi_enable(napi_tx); 1602 if (gve_tx_clean_pending(priv, &priv->tx[tx_qid])) 1603 napi_schedule(napi_tx); 1604 1605 done: 1606 xsk_pool_dma_unmap(pool, 1607 DMA_ATTR_SKIP_CPU_SYNC | DMA_ATTR_WEAK_ORDERING); 1608 return 0; 1609 } 1610 1611 static int gve_xsk_wakeup(struct net_device *dev, u32 queue_id, u32 flags) 1612 { 1613 struct gve_priv *priv = netdev_priv(dev); 1614 int tx_queue_id = gve_xdp_tx_queue_id(priv, queue_id); 1615 1616 if (queue_id >= priv->rx_cfg.num_queues || !priv->xdp_prog) 1617 return -EINVAL; 1618 1619 if (flags & XDP_WAKEUP_TX) { 1620 struct gve_tx_ring *tx = &priv->tx[tx_queue_id]; 1621 struct napi_struct *napi = 1622 &priv->ntfy_blocks[tx->ntfy_id].napi; 1623 1624 if (!napi_if_scheduled_mark_missed(napi)) { 1625 /* Call local_bh_enable to trigger SoftIRQ processing */ 1626 local_bh_disable(); 1627 napi_schedule(napi); 1628 local_bh_enable(); 1629 } 1630 1631 tx->xdp_xsk_wakeup++; 1632 } 1633 1634 return 0; 1635 } 1636 1637 static int verify_xdp_configuration(struct net_device *dev) 1638 { 1639 struct gve_priv *priv = netdev_priv(dev); 1640 1641 if (dev->features & NETIF_F_LRO) { 1642 netdev_warn(dev, "XDP is not supported when LRO is on.\n"); 1643 return -EOPNOTSUPP; 1644 } 1645 1646 if (priv->queue_format != GVE_GQI_QPL_FORMAT) { 1647 netdev_warn(dev, "XDP is not supported in mode %d.\n", 1648 priv->queue_format); 1649 return -EOPNOTSUPP; 1650 } 1651 1652 if (dev->mtu > (PAGE_SIZE / 2) - sizeof(struct ethhdr) - GVE_RX_PAD) { 1653 netdev_warn(dev, "XDP is not supported for mtu %d.\n", 1654 dev->mtu); 1655 return -EOPNOTSUPP; 1656 } 1657 1658 if (priv->rx_cfg.num_queues != priv->tx_cfg.num_queues || 1659 (2 * priv->tx_cfg.num_queues > priv->tx_cfg.max_queues)) { 1660 netdev_warn(dev, "XDP load failed: The number of configured RX queues %d should be equal to the number of configured TX queues %d and the number of configured RX/TX queues should be less than or equal to half the maximum number of RX/TX queues %d", 1661 priv->rx_cfg.num_queues, 1662 priv->tx_cfg.num_queues, 1663 priv->tx_cfg.max_queues); 1664 return -EINVAL; 1665 } 1666 return 0; 1667 } 1668 1669 static int gve_xdp(struct net_device *dev, struct netdev_bpf *xdp) 1670 { 1671 struct gve_priv *priv = netdev_priv(dev); 1672 int err; 1673 1674 err = verify_xdp_configuration(dev); 1675 if (err) 1676 return err; 1677 switch (xdp->command) { 1678 case XDP_SETUP_PROG: 1679 return gve_set_xdp(priv, xdp->prog, xdp->extack); 1680 case XDP_SETUP_XSK_POOL: 1681 if (xdp->xsk.pool) 1682 return gve_xsk_pool_enable(dev, xdp->xsk.pool, xdp->xsk.queue_id); 1683 else 1684 return gve_xsk_pool_disable(dev, xdp->xsk.queue_id); 1685 default: 1686 return -EINVAL; 1687 } 1688 } 1689 1690 int gve_adjust_queues(struct gve_priv *priv, 1691 struct gve_queue_config new_rx_config, 1692 struct gve_queue_config new_tx_config) 1693 { 1694 int err; 1695 1696 if (netif_carrier_ok(priv->dev)) { 1697 /* To make this process as simple as possible we teardown the 1698 * device, set the new configuration, and then bring the device 1699 * up again. 1700 */ 1701 err = gve_close(priv->dev); 1702 /* we have already tried to reset in close, 1703 * just fail at this point 1704 */ 1705 if (err) 1706 return err; 1707 priv->tx_cfg = new_tx_config; 1708 priv->rx_cfg = new_rx_config; 1709 1710 err = gve_open(priv->dev); 1711 if (err) 1712 goto err; 1713 1714 return 0; 1715 } 1716 /* Set the config for the next up. */ 1717 priv->tx_cfg = new_tx_config; 1718 priv->rx_cfg = new_rx_config; 1719 1720 return 0; 1721 err: 1722 netif_err(priv, drv, priv->dev, 1723 "Adjust queues failed! !!! DISABLING ALL QUEUES !!!\n"); 1724 gve_turndown(priv); 1725 return err; 1726 } 1727 1728 static void gve_turndown(struct gve_priv *priv) 1729 { 1730 int idx; 1731 1732 if (netif_carrier_ok(priv->dev)) 1733 netif_carrier_off(priv->dev); 1734 1735 if (!gve_get_napi_enabled(priv)) 1736 return; 1737 1738 /* Disable napi to prevent more work from coming in */ 1739 for (idx = 0; idx < gve_num_tx_queues(priv); idx++) { 1740 int ntfy_idx = gve_tx_idx_to_ntfy(priv, idx); 1741 struct gve_notify_block *block = &priv->ntfy_blocks[ntfy_idx]; 1742 1743 napi_disable(&block->napi); 1744 } 1745 for (idx = 0; idx < priv->rx_cfg.num_queues; idx++) { 1746 int ntfy_idx = gve_rx_idx_to_ntfy(priv, idx); 1747 struct gve_notify_block *block = &priv->ntfy_blocks[ntfy_idx]; 1748 1749 napi_disable(&block->napi); 1750 } 1751 1752 /* Stop tx queues */ 1753 netif_tx_disable(priv->dev); 1754 1755 gve_clear_napi_enabled(priv); 1756 gve_clear_report_stats(priv); 1757 } 1758 1759 static void gve_turnup(struct gve_priv *priv) 1760 { 1761 int idx; 1762 1763 /* Start the tx queues */ 1764 netif_tx_start_all_queues(priv->dev); 1765 1766 /* Enable napi and unmask interrupts for all queues */ 1767 for (idx = 0; idx < gve_num_tx_queues(priv); idx++) { 1768 int ntfy_idx = gve_tx_idx_to_ntfy(priv, idx); 1769 struct gve_notify_block *block = &priv->ntfy_blocks[ntfy_idx]; 1770 1771 napi_enable(&block->napi); 1772 if (gve_is_gqi(priv)) { 1773 iowrite32be(0, gve_irq_doorbell(priv, block)); 1774 } else { 1775 gve_set_itr_coalesce_usecs_dqo(priv, block, 1776 priv->tx_coalesce_usecs); 1777 } 1778 } 1779 for (idx = 0; idx < priv->rx_cfg.num_queues; idx++) { 1780 int ntfy_idx = gve_rx_idx_to_ntfy(priv, idx); 1781 struct gve_notify_block *block = &priv->ntfy_blocks[ntfy_idx]; 1782 1783 napi_enable(&block->napi); 1784 if (gve_is_gqi(priv)) { 1785 iowrite32be(0, gve_irq_doorbell(priv, block)); 1786 } else { 1787 gve_set_itr_coalesce_usecs_dqo(priv, block, 1788 priv->rx_coalesce_usecs); 1789 } 1790 } 1791 1792 gve_set_napi_enabled(priv); 1793 } 1794 1795 static void gve_tx_timeout(struct net_device *dev, unsigned int txqueue) 1796 { 1797 struct gve_notify_block *block; 1798 struct gve_tx_ring *tx = NULL; 1799 struct gve_priv *priv; 1800 u32 last_nic_done; 1801 u32 current_time; 1802 u32 ntfy_idx; 1803 1804 netdev_info(dev, "Timeout on tx queue, %d", txqueue); 1805 priv = netdev_priv(dev); 1806 if (txqueue > priv->tx_cfg.num_queues) 1807 goto reset; 1808 1809 ntfy_idx = gve_tx_idx_to_ntfy(priv, txqueue); 1810 if (ntfy_idx >= priv->num_ntfy_blks) 1811 goto reset; 1812 1813 block = &priv->ntfy_blocks[ntfy_idx]; 1814 tx = block->tx; 1815 1816 current_time = jiffies_to_msecs(jiffies); 1817 if (tx->last_kick_msec + MIN_TX_TIMEOUT_GAP > current_time) 1818 goto reset; 1819 1820 /* Check to see if there are missed completions, which will allow us to 1821 * kick the queue. 1822 */ 1823 last_nic_done = gve_tx_load_event_counter(priv, tx); 1824 if (last_nic_done - tx->done) { 1825 netdev_info(dev, "Kicking queue %d", txqueue); 1826 iowrite32be(GVE_IRQ_MASK, gve_irq_doorbell(priv, block)); 1827 napi_schedule(&block->napi); 1828 tx->last_kick_msec = current_time; 1829 goto out; 1830 } // Else reset. 1831 1832 reset: 1833 gve_schedule_reset(priv); 1834 1835 out: 1836 if (tx) 1837 tx->queue_timeout++; 1838 priv->tx_timeo_cnt++; 1839 } 1840 1841 static int gve_set_features(struct net_device *netdev, 1842 netdev_features_t features) 1843 { 1844 const netdev_features_t orig_features = netdev->features; 1845 struct gve_priv *priv = netdev_priv(netdev); 1846 int err; 1847 1848 if ((netdev->features & NETIF_F_LRO) != (features & NETIF_F_LRO)) { 1849 netdev->features ^= NETIF_F_LRO; 1850 if (netif_carrier_ok(netdev)) { 1851 /* To make this process as simple as possible we 1852 * teardown the device, set the new configuration, 1853 * and then bring the device up again. 1854 */ 1855 err = gve_close(netdev); 1856 /* We have already tried to reset in close, just fail 1857 * at this point. 1858 */ 1859 if (err) 1860 goto err; 1861 1862 err = gve_open(netdev); 1863 if (err) 1864 goto err; 1865 } 1866 } 1867 1868 return 0; 1869 err: 1870 /* Reverts the change on error. */ 1871 netdev->features = orig_features; 1872 netif_err(priv, drv, netdev, 1873 "Set features failed! !!! DISABLING ALL QUEUES !!!\n"); 1874 return err; 1875 } 1876 1877 static const struct net_device_ops gve_netdev_ops = { 1878 .ndo_start_xmit = gve_start_xmit, 1879 .ndo_open = gve_open, 1880 .ndo_stop = gve_close, 1881 .ndo_get_stats64 = gve_get_stats, 1882 .ndo_tx_timeout = gve_tx_timeout, 1883 .ndo_set_features = gve_set_features, 1884 .ndo_bpf = gve_xdp, 1885 .ndo_xdp_xmit = gve_xdp_xmit, 1886 .ndo_xsk_wakeup = gve_xsk_wakeup, 1887 }; 1888 1889 static void gve_handle_status(struct gve_priv *priv, u32 status) 1890 { 1891 if (GVE_DEVICE_STATUS_RESET_MASK & status) { 1892 dev_info(&priv->pdev->dev, "Device requested reset.\n"); 1893 gve_set_do_reset(priv); 1894 } 1895 if (GVE_DEVICE_STATUS_REPORT_STATS_MASK & status) { 1896 priv->stats_report_trigger_cnt++; 1897 gve_set_do_report_stats(priv); 1898 } 1899 } 1900 1901 static void gve_handle_reset(struct gve_priv *priv) 1902 { 1903 /* A service task will be scheduled at the end of probe to catch any 1904 * resets that need to happen, and we don't want to reset until 1905 * probe is done. 1906 */ 1907 if (gve_get_probe_in_progress(priv)) 1908 return; 1909 1910 if (gve_get_do_reset(priv)) { 1911 rtnl_lock(); 1912 gve_reset(priv, false); 1913 rtnl_unlock(); 1914 } 1915 } 1916 1917 void gve_handle_report_stats(struct gve_priv *priv) 1918 { 1919 struct stats *stats = priv->stats_report->stats; 1920 int idx, stats_idx = 0; 1921 unsigned int start = 0; 1922 u64 tx_bytes; 1923 1924 if (!gve_get_report_stats(priv)) 1925 return; 1926 1927 be64_add_cpu(&priv->stats_report->written_count, 1); 1928 /* tx stats */ 1929 if (priv->tx) { 1930 for (idx = 0; idx < gve_num_tx_queues(priv); idx++) { 1931 u32 last_completion = 0; 1932 u32 tx_frames = 0; 1933 1934 /* DQO doesn't currently support these metrics. */ 1935 if (gve_is_gqi(priv)) { 1936 last_completion = priv->tx[idx].done; 1937 tx_frames = priv->tx[idx].req; 1938 } 1939 1940 do { 1941 start = u64_stats_fetch_begin(&priv->tx[idx].statss); 1942 tx_bytes = priv->tx[idx].bytes_done; 1943 } while (u64_stats_fetch_retry(&priv->tx[idx].statss, start)); 1944 stats[stats_idx++] = (struct stats) { 1945 .stat_name = cpu_to_be32(TX_WAKE_CNT), 1946 .value = cpu_to_be64(priv->tx[idx].wake_queue), 1947 .queue_id = cpu_to_be32(idx), 1948 }; 1949 stats[stats_idx++] = (struct stats) { 1950 .stat_name = cpu_to_be32(TX_STOP_CNT), 1951 .value = cpu_to_be64(priv->tx[idx].stop_queue), 1952 .queue_id = cpu_to_be32(idx), 1953 }; 1954 stats[stats_idx++] = (struct stats) { 1955 .stat_name = cpu_to_be32(TX_FRAMES_SENT), 1956 .value = cpu_to_be64(tx_frames), 1957 .queue_id = cpu_to_be32(idx), 1958 }; 1959 stats[stats_idx++] = (struct stats) { 1960 .stat_name = cpu_to_be32(TX_BYTES_SENT), 1961 .value = cpu_to_be64(tx_bytes), 1962 .queue_id = cpu_to_be32(idx), 1963 }; 1964 stats[stats_idx++] = (struct stats) { 1965 .stat_name = cpu_to_be32(TX_LAST_COMPLETION_PROCESSED), 1966 .value = cpu_to_be64(last_completion), 1967 .queue_id = cpu_to_be32(idx), 1968 }; 1969 stats[stats_idx++] = (struct stats) { 1970 .stat_name = cpu_to_be32(TX_TIMEOUT_CNT), 1971 .value = cpu_to_be64(priv->tx[idx].queue_timeout), 1972 .queue_id = cpu_to_be32(idx), 1973 }; 1974 } 1975 } 1976 /* rx stats */ 1977 if (priv->rx) { 1978 for (idx = 0; idx < priv->rx_cfg.num_queues; idx++) { 1979 stats[stats_idx++] = (struct stats) { 1980 .stat_name = cpu_to_be32(RX_NEXT_EXPECTED_SEQUENCE), 1981 .value = cpu_to_be64(priv->rx[idx].desc.seqno), 1982 .queue_id = cpu_to_be32(idx), 1983 }; 1984 stats[stats_idx++] = (struct stats) { 1985 .stat_name = cpu_to_be32(RX_BUFFERS_POSTED), 1986 .value = cpu_to_be64(priv->rx[0].fill_cnt), 1987 .queue_id = cpu_to_be32(idx), 1988 }; 1989 } 1990 } 1991 } 1992 1993 /* Handle NIC status register changes, reset requests and report stats */ 1994 static void gve_service_task(struct work_struct *work) 1995 { 1996 struct gve_priv *priv = container_of(work, struct gve_priv, 1997 service_task); 1998 u32 status = ioread32be(&priv->reg_bar0->device_status); 1999 2000 gve_handle_status(priv, status); 2001 2002 gve_handle_reset(priv); 2003 gve_handle_link_status(priv, GVE_DEVICE_STATUS_LINK_STATUS_MASK & status); 2004 } 2005 2006 static void gve_set_netdev_xdp_features(struct gve_priv *priv) 2007 { 2008 if (priv->queue_format == GVE_GQI_QPL_FORMAT) { 2009 priv->dev->xdp_features = NETDEV_XDP_ACT_BASIC; 2010 priv->dev->xdp_features |= NETDEV_XDP_ACT_REDIRECT; 2011 priv->dev->xdp_features |= NETDEV_XDP_ACT_NDO_XMIT; 2012 priv->dev->xdp_features |= NETDEV_XDP_ACT_XSK_ZEROCOPY; 2013 } else { 2014 priv->dev->xdp_features = 0; 2015 } 2016 } 2017 2018 static int gve_init_priv(struct gve_priv *priv, bool skip_describe_device) 2019 { 2020 int num_ntfy; 2021 int err; 2022 2023 /* Set up the adminq */ 2024 err = gve_adminq_alloc(&priv->pdev->dev, priv); 2025 if (err) { 2026 dev_err(&priv->pdev->dev, 2027 "Failed to alloc admin queue: err=%d\n", err); 2028 return err; 2029 } 2030 2031 err = gve_verify_driver_compatibility(priv); 2032 if (err) { 2033 dev_err(&priv->pdev->dev, 2034 "Could not verify driver compatibility: err=%d\n", err); 2035 goto err; 2036 } 2037 2038 if (skip_describe_device) 2039 goto setup_device; 2040 2041 priv->queue_format = GVE_QUEUE_FORMAT_UNSPECIFIED; 2042 /* Get the initial information we need from the device */ 2043 err = gve_adminq_describe_device(priv); 2044 if (err) { 2045 dev_err(&priv->pdev->dev, 2046 "Could not get device information: err=%d\n", err); 2047 goto err; 2048 } 2049 priv->dev->mtu = priv->dev->max_mtu; 2050 num_ntfy = pci_msix_vec_count(priv->pdev); 2051 if (num_ntfy <= 0) { 2052 dev_err(&priv->pdev->dev, 2053 "could not count MSI-x vectors: err=%d\n", num_ntfy); 2054 err = num_ntfy; 2055 goto err; 2056 } else if (num_ntfy < GVE_MIN_MSIX) { 2057 dev_err(&priv->pdev->dev, "gve needs at least %d MSI-x vectors, but only has %d\n", 2058 GVE_MIN_MSIX, num_ntfy); 2059 err = -EINVAL; 2060 goto err; 2061 } 2062 2063 priv->num_registered_pages = 0; 2064 priv->rx_copybreak = GVE_DEFAULT_RX_COPYBREAK; 2065 /* gvnic has one Notification Block per MSI-x vector, except for the 2066 * management vector 2067 */ 2068 priv->num_ntfy_blks = (num_ntfy - 1) & ~0x1; 2069 priv->mgmt_msix_idx = priv->num_ntfy_blks; 2070 2071 priv->tx_cfg.max_queues = 2072 min_t(int, priv->tx_cfg.max_queues, priv->num_ntfy_blks / 2); 2073 priv->rx_cfg.max_queues = 2074 min_t(int, priv->rx_cfg.max_queues, priv->num_ntfy_blks / 2); 2075 2076 priv->tx_cfg.num_queues = priv->tx_cfg.max_queues; 2077 priv->rx_cfg.num_queues = priv->rx_cfg.max_queues; 2078 if (priv->default_num_queues > 0) { 2079 priv->tx_cfg.num_queues = min_t(int, priv->default_num_queues, 2080 priv->tx_cfg.num_queues); 2081 priv->rx_cfg.num_queues = min_t(int, priv->default_num_queues, 2082 priv->rx_cfg.num_queues); 2083 } 2084 2085 dev_info(&priv->pdev->dev, "TX queues %d, RX queues %d\n", 2086 priv->tx_cfg.num_queues, priv->rx_cfg.num_queues); 2087 dev_info(&priv->pdev->dev, "Max TX queues %d, Max RX queues %d\n", 2088 priv->tx_cfg.max_queues, priv->rx_cfg.max_queues); 2089 2090 if (!gve_is_gqi(priv)) { 2091 priv->tx_coalesce_usecs = GVE_TX_IRQ_RATELIMIT_US_DQO; 2092 priv->rx_coalesce_usecs = GVE_RX_IRQ_RATELIMIT_US_DQO; 2093 } 2094 2095 setup_device: 2096 gve_set_netdev_xdp_features(priv); 2097 err = gve_setup_device_resources(priv); 2098 if (!err) 2099 return 0; 2100 err: 2101 gve_adminq_free(&priv->pdev->dev, priv); 2102 return err; 2103 } 2104 2105 static void gve_teardown_priv_resources(struct gve_priv *priv) 2106 { 2107 gve_teardown_device_resources(priv); 2108 gve_adminq_free(&priv->pdev->dev, priv); 2109 } 2110 2111 static void gve_trigger_reset(struct gve_priv *priv) 2112 { 2113 /* Reset the device by releasing the AQ */ 2114 gve_adminq_release(priv); 2115 } 2116 2117 static void gve_reset_and_teardown(struct gve_priv *priv, bool was_up) 2118 { 2119 gve_trigger_reset(priv); 2120 /* With the reset having already happened, close cannot fail */ 2121 if (was_up) 2122 gve_close(priv->dev); 2123 gve_teardown_priv_resources(priv); 2124 } 2125 2126 static int gve_reset_recovery(struct gve_priv *priv, bool was_up) 2127 { 2128 int err; 2129 2130 err = gve_init_priv(priv, true); 2131 if (err) 2132 goto err; 2133 if (was_up) { 2134 err = gve_open(priv->dev); 2135 if (err) 2136 goto err; 2137 } 2138 return 0; 2139 err: 2140 dev_err(&priv->pdev->dev, "Reset failed! !!! DISABLING ALL QUEUES !!!\n"); 2141 gve_turndown(priv); 2142 return err; 2143 } 2144 2145 int gve_reset(struct gve_priv *priv, bool attempt_teardown) 2146 { 2147 bool was_up = netif_carrier_ok(priv->dev); 2148 int err; 2149 2150 dev_info(&priv->pdev->dev, "Performing reset\n"); 2151 gve_clear_do_reset(priv); 2152 gve_set_reset_in_progress(priv); 2153 /* If we aren't attempting to teardown normally, just go turndown and 2154 * reset right away. 2155 */ 2156 if (!attempt_teardown) { 2157 gve_turndown(priv); 2158 gve_reset_and_teardown(priv, was_up); 2159 } else { 2160 /* Otherwise attempt to close normally */ 2161 if (was_up) { 2162 err = gve_close(priv->dev); 2163 /* If that fails reset as we did above */ 2164 if (err) 2165 gve_reset_and_teardown(priv, was_up); 2166 } 2167 /* Clean up any remaining resources */ 2168 gve_teardown_priv_resources(priv); 2169 } 2170 2171 /* Set it all back up */ 2172 err = gve_reset_recovery(priv, was_up); 2173 gve_clear_reset_in_progress(priv); 2174 priv->reset_cnt++; 2175 priv->interface_up_cnt = 0; 2176 priv->interface_down_cnt = 0; 2177 priv->stats_report_trigger_cnt = 0; 2178 return err; 2179 } 2180 2181 static void gve_write_version(u8 __iomem *driver_version_register) 2182 { 2183 const char *c = gve_version_prefix; 2184 2185 while (*c) { 2186 writeb(*c, driver_version_register); 2187 c++; 2188 } 2189 2190 c = gve_version_str; 2191 while (*c) { 2192 writeb(*c, driver_version_register); 2193 c++; 2194 } 2195 writeb('\n', driver_version_register); 2196 } 2197 2198 static int gve_probe(struct pci_dev *pdev, const struct pci_device_id *ent) 2199 { 2200 int max_tx_queues, max_rx_queues; 2201 struct net_device *dev; 2202 __be32 __iomem *db_bar; 2203 struct gve_registers __iomem *reg_bar; 2204 struct gve_priv *priv; 2205 int err; 2206 2207 err = pci_enable_device(pdev); 2208 if (err) 2209 return err; 2210 2211 err = pci_request_regions(pdev, "gvnic-cfg"); 2212 if (err) 2213 goto abort_with_enabled; 2214 2215 pci_set_master(pdev); 2216 2217 err = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(64)); 2218 if (err) { 2219 dev_err(&pdev->dev, "Failed to set dma mask: err=%d\n", err); 2220 goto abort_with_pci_region; 2221 } 2222 2223 reg_bar = pci_iomap(pdev, GVE_REGISTER_BAR, 0); 2224 if (!reg_bar) { 2225 dev_err(&pdev->dev, "Failed to map pci bar!\n"); 2226 err = -ENOMEM; 2227 goto abort_with_pci_region; 2228 } 2229 2230 db_bar = pci_iomap(pdev, GVE_DOORBELL_BAR, 0); 2231 if (!db_bar) { 2232 dev_err(&pdev->dev, "Failed to map doorbell bar!\n"); 2233 err = -ENOMEM; 2234 goto abort_with_reg_bar; 2235 } 2236 2237 gve_write_version(®_bar->driver_version); 2238 /* Get max queues to alloc etherdev */ 2239 max_tx_queues = ioread32be(®_bar->max_tx_queues); 2240 max_rx_queues = ioread32be(®_bar->max_rx_queues); 2241 /* Alloc and setup the netdev and priv */ 2242 dev = alloc_etherdev_mqs(sizeof(*priv), max_tx_queues, max_rx_queues); 2243 if (!dev) { 2244 dev_err(&pdev->dev, "could not allocate netdev\n"); 2245 err = -ENOMEM; 2246 goto abort_with_db_bar; 2247 } 2248 SET_NETDEV_DEV(dev, &pdev->dev); 2249 pci_set_drvdata(pdev, dev); 2250 dev->ethtool_ops = &gve_ethtool_ops; 2251 dev->netdev_ops = &gve_netdev_ops; 2252 2253 /* Set default and supported features. 2254 * 2255 * Features might be set in other locations as well (such as 2256 * `gve_adminq_describe_device`). 2257 */ 2258 dev->hw_features = NETIF_F_HIGHDMA; 2259 dev->hw_features |= NETIF_F_SG; 2260 dev->hw_features |= NETIF_F_HW_CSUM; 2261 dev->hw_features |= NETIF_F_TSO; 2262 dev->hw_features |= NETIF_F_TSO6; 2263 dev->hw_features |= NETIF_F_TSO_ECN; 2264 dev->hw_features |= NETIF_F_RXCSUM; 2265 dev->hw_features |= NETIF_F_RXHASH; 2266 dev->features = dev->hw_features; 2267 dev->watchdog_timeo = 5 * HZ; 2268 dev->min_mtu = ETH_MIN_MTU; 2269 netif_carrier_off(dev); 2270 2271 priv = netdev_priv(dev); 2272 priv->dev = dev; 2273 priv->pdev = pdev; 2274 priv->msg_enable = DEFAULT_MSG_LEVEL; 2275 priv->reg_bar0 = reg_bar; 2276 priv->db_bar2 = db_bar; 2277 priv->service_task_flags = 0x0; 2278 priv->state_flags = 0x0; 2279 priv->ethtool_flags = 0x0; 2280 2281 gve_set_probe_in_progress(priv); 2282 priv->gve_wq = alloc_ordered_workqueue("gve", 0); 2283 if (!priv->gve_wq) { 2284 dev_err(&pdev->dev, "Could not allocate workqueue"); 2285 err = -ENOMEM; 2286 goto abort_with_netdev; 2287 } 2288 INIT_WORK(&priv->service_task, gve_service_task); 2289 INIT_WORK(&priv->stats_report_task, gve_stats_report_task); 2290 priv->tx_cfg.max_queues = max_tx_queues; 2291 priv->rx_cfg.max_queues = max_rx_queues; 2292 2293 err = gve_init_priv(priv, false); 2294 if (err) 2295 goto abort_with_wq; 2296 2297 err = register_netdev(dev); 2298 if (err) 2299 goto abort_with_gve_init; 2300 2301 dev_info(&pdev->dev, "GVE version %s\n", gve_version_str); 2302 dev_info(&pdev->dev, "GVE queue format %d\n", (int)priv->queue_format); 2303 gve_clear_probe_in_progress(priv); 2304 queue_work(priv->gve_wq, &priv->service_task); 2305 return 0; 2306 2307 abort_with_gve_init: 2308 gve_teardown_priv_resources(priv); 2309 2310 abort_with_wq: 2311 destroy_workqueue(priv->gve_wq); 2312 2313 abort_with_netdev: 2314 free_netdev(dev); 2315 2316 abort_with_db_bar: 2317 pci_iounmap(pdev, db_bar); 2318 2319 abort_with_reg_bar: 2320 pci_iounmap(pdev, reg_bar); 2321 2322 abort_with_pci_region: 2323 pci_release_regions(pdev); 2324 2325 abort_with_enabled: 2326 pci_disable_device(pdev); 2327 return err; 2328 } 2329 2330 static void gve_remove(struct pci_dev *pdev) 2331 { 2332 struct net_device *netdev = pci_get_drvdata(pdev); 2333 struct gve_priv *priv = netdev_priv(netdev); 2334 __be32 __iomem *db_bar = priv->db_bar2; 2335 void __iomem *reg_bar = priv->reg_bar0; 2336 2337 unregister_netdev(netdev); 2338 gve_teardown_priv_resources(priv); 2339 destroy_workqueue(priv->gve_wq); 2340 free_netdev(netdev); 2341 pci_iounmap(pdev, db_bar); 2342 pci_iounmap(pdev, reg_bar); 2343 pci_release_regions(pdev); 2344 pci_disable_device(pdev); 2345 } 2346 2347 static void gve_shutdown(struct pci_dev *pdev) 2348 { 2349 struct net_device *netdev = pci_get_drvdata(pdev); 2350 struct gve_priv *priv = netdev_priv(netdev); 2351 bool was_up = netif_carrier_ok(priv->dev); 2352 2353 rtnl_lock(); 2354 if (was_up && gve_close(priv->dev)) { 2355 /* If the dev was up, attempt to close, if close fails, reset */ 2356 gve_reset_and_teardown(priv, was_up); 2357 } else { 2358 /* If the dev wasn't up or close worked, finish tearing down */ 2359 gve_teardown_priv_resources(priv); 2360 } 2361 rtnl_unlock(); 2362 } 2363 2364 #ifdef CONFIG_PM 2365 static int gve_suspend(struct pci_dev *pdev, pm_message_t state) 2366 { 2367 struct net_device *netdev = pci_get_drvdata(pdev); 2368 struct gve_priv *priv = netdev_priv(netdev); 2369 bool was_up = netif_carrier_ok(priv->dev); 2370 2371 priv->suspend_cnt++; 2372 rtnl_lock(); 2373 if (was_up && gve_close(priv->dev)) { 2374 /* If the dev was up, attempt to close, if close fails, reset */ 2375 gve_reset_and_teardown(priv, was_up); 2376 } else { 2377 /* If the dev wasn't up or close worked, finish tearing down */ 2378 gve_teardown_priv_resources(priv); 2379 } 2380 priv->up_before_suspend = was_up; 2381 rtnl_unlock(); 2382 return 0; 2383 } 2384 2385 static int gve_resume(struct pci_dev *pdev) 2386 { 2387 struct net_device *netdev = pci_get_drvdata(pdev); 2388 struct gve_priv *priv = netdev_priv(netdev); 2389 int err; 2390 2391 priv->resume_cnt++; 2392 rtnl_lock(); 2393 err = gve_reset_recovery(priv, priv->up_before_suspend); 2394 rtnl_unlock(); 2395 return err; 2396 } 2397 #endif /* CONFIG_PM */ 2398 2399 static const struct pci_device_id gve_id_table[] = { 2400 { PCI_DEVICE(PCI_VENDOR_ID_GOOGLE, PCI_DEV_ID_GVNIC) }, 2401 { } 2402 }; 2403 2404 static struct pci_driver gvnic_driver = { 2405 .name = "gvnic", 2406 .id_table = gve_id_table, 2407 .probe = gve_probe, 2408 .remove = gve_remove, 2409 .shutdown = gve_shutdown, 2410 #ifdef CONFIG_PM 2411 .suspend = gve_suspend, 2412 .resume = gve_resume, 2413 #endif 2414 }; 2415 2416 module_pci_driver(gvnic_driver); 2417 2418 MODULE_DEVICE_TABLE(pci, gve_id_table); 2419 MODULE_AUTHOR("Google, Inc."); 2420 MODULE_DESCRIPTION("gVNIC Driver"); 2421 MODULE_LICENSE("Dual MIT/GPL"); 2422 MODULE_VERSION(GVE_VERSION); 2423