1 // SPDX-License-Identifier: (GPL-2.0 OR MIT) 2 /* Google virtual Ethernet (gve) driver 3 * 4 * Copyright (C) 2015-2024 Google LLC 5 */ 6 7 #include <linux/bpf.h> 8 #include <linux/cpumask.h> 9 #include <linux/etherdevice.h> 10 #include <linux/filter.h> 11 #include <linux/interrupt.h> 12 #include <linux/irq.h> 13 #include <linux/module.h> 14 #include <linux/pci.h> 15 #include <linux/sched.h> 16 #include <linux/timer.h> 17 #include <linux/workqueue.h> 18 #include <linux/utsname.h> 19 #include <linux/version.h> 20 #include <net/netdev_queues.h> 21 #include <net/sch_generic.h> 22 #include <net/xdp_sock_drv.h> 23 #include "gve.h" 24 #include "gve_dqo.h" 25 #include "gve_adminq.h" 26 #include "gve_register.h" 27 #include "gve_utils.h" 28 29 #define GVE_DEFAULT_RX_COPYBREAK (256) 30 31 #define DEFAULT_MSG_LEVEL (NETIF_MSG_DRV | NETIF_MSG_LINK) 32 #define GVE_VERSION "1.0.0" 33 #define GVE_VERSION_PREFIX "GVE-" 34 35 // Minimum amount of time between queue kicks in msec (10 seconds) 36 #define MIN_TX_TIMEOUT_GAP (1000 * 10) 37 38 char gve_driver_name[] = "gve"; 39 const char gve_version_str[] = GVE_VERSION; 40 static const char gve_version_prefix[] = GVE_VERSION_PREFIX; 41 42 static int gve_verify_driver_compatibility(struct gve_priv *priv) 43 { 44 int err; 45 struct gve_driver_info *driver_info; 46 dma_addr_t driver_info_bus; 47 48 driver_info = dma_alloc_coherent(&priv->pdev->dev, 49 sizeof(struct gve_driver_info), 50 &driver_info_bus, GFP_KERNEL); 51 if (!driver_info) 52 return -ENOMEM; 53 54 *driver_info = (struct gve_driver_info) { 55 .os_type = 1, /* Linux */ 56 .os_version_major = cpu_to_be32(LINUX_VERSION_MAJOR), 57 .os_version_minor = cpu_to_be32(LINUX_VERSION_SUBLEVEL), 58 .os_version_sub = cpu_to_be32(LINUX_VERSION_PATCHLEVEL), 59 .driver_capability_flags = { 60 cpu_to_be64(GVE_DRIVER_CAPABILITY_FLAGS1), 61 cpu_to_be64(GVE_DRIVER_CAPABILITY_FLAGS2), 62 cpu_to_be64(GVE_DRIVER_CAPABILITY_FLAGS3), 63 cpu_to_be64(GVE_DRIVER_CAPABILITY_FLAGS4), 64 }, 65 }; 66 strscpy(driver_info->os_version_str1, utsname()->release, 67 sizeof(driver_info->os_version_str1)); 68 strscpy(driver_info->os_version_str2, utsname()->version, 69 sizeof(driver_info->os_version_str2)); 70 71 err = gve_adminq_verify_driver_compatibility(priv, 72 sizeof(struct gve_driver_info), 73 driver_info_bus); 74 75 /* It's ok if the device doesn't support this */ 76 if (err == -EOPNOTSUPP) 77 err = 0; 78 79 dma_free_coherent(&priv->pdev->dev, 80 sizeof(struct gve_driver_info), 81 driver_info, driver_info_bus); 82 return err; 83 } 84 85 static netdev_features_t gve_features_check(struct sk_buff *skb, 86 struct net_device *dev, 87 netdev_features_t features) 88 { 89 struct gve_priv *priv = netdev_priv(dev); 90 91 if (!gve_is_gqi(priv)) 92 return gve_features_check_dqo(skb, dev, features); 93 94 return features; 95 } 96 97 static netdev_tx_t gve_start_xmit(struct sk_buff *skb, struct net_device *dev) 98 { 99 struct gve_priv *priv = netdev_priv(dev); 100 101 if (gve_is_gqi(priv)) 102 return gve_tx(skb, dev); 103 else 104 return gve_tx_dqo(skb, dev); 105 } 106 107 static void gve_get_stats(struct net_device *dev, struct rtnl_link_stats64 *s) 108 { 109 struct gve_priv *priv = netdev_priv(dev); 110 unsigned int start; 111 u64 packets, bytes; 112 int num_tx_queues; 113 int ring; 114 115 num_tx_queues = gve_num_tx_queues(priv); 116 if (priv->rx) { 117 for (ring = 0; ring < priv->rx_cfg.num_queues; ring++) { 118 do { 119 start = 120 u64_stats_fetch_begin(&priv->rx[ring].statss); 121 packets = priv->rx[ring].rpackets; 122 bytes = priv->rx[ring].rbytes; 123 } while (u64_stats_fetch_retry(&priv->rx[ring].statss, 124 start)); 125 s->rx_packets += packets; 126 s->rx_bytes += bytes; 127 } 128 } 129 if (priv->tx) { 130 for (ring = 0; ring < num_tx_queues; ring++) { 131 do { 132 start = 133 u64_stats_fetch_begin(&priv->tx[ring].statss); 134 packets = priv->tx[ring].pkt_done; 135 bytes = priv->tx[ring].bytes_done; 136 } while (u64_stats_fetch_retry(&priv->tx[ring].statss, 137 start)); 138 s->tx_packets += packets; 139 s->tx_bytes += bytes; 140 } 141 } 142 } 143 144 static int gve_alloc_flow_rule_caches(struct gve_priv *priv) 145 { 146 struct gve_flow_rules_cache *flow_rules_cache = &priv->flow_rules_cache; 147 int err = 0; 148 149 if (!priv->max_flow_rules) 150 return 0; 151 152 flow_rules_cache->rules_cache = 153 kvcalloc(GVE_FLOW_RULES_CACHE_SIZE, sizeof(*flow_rules_cache->rules_cache), 154 GFP_KERNEL); 155 if (!flow_rules_cache->rules_cache) { 156 dev_err(&priv->pdev->dev, "Cannot alloc flow rules cache\n"); 157 return -ENOMEM; 158 } 159 160 flow_rules_cache->rule_ids_cache = 161 kvcalloc(GVE_FLOW_RULE_IDS_CACHE_SIZE, sizeof(*flow_rules_cache->rule_ids_cache), 162 GFP_KERNEL); 163 if (!flow_rules_cache->rule_ids_cache) { 164 dev_err(&priv->pdev->dev, "Cannot alloc flow rule ids cache\n"); 165 err = -ENOMEM; 166 goto free_rules_cache; 167 } 168 169 return 0; 170 171 free_rules_cache: 172 kvfree(flow_rules_cache->rules_cache); 173 flow_rules_cache->rules_cache = NULL; 174 return err; 175 } 176 177 static void gve_free_flow_rule_caches(struct gve_priv *priv) 178 { 179 struct gve_flow_rules_cache *flow_rules_cache = &priv->flow_rules_cache; 180 181 kvfree(flow_rules_cache->rule_ids_cache); 182 flow_rules_cache->rule_ids_cache = NULL; 183 kvfree(flow_rules_cache->rules_cache); 184 flow_rules_cache->rules_cache = NULL; 185 } 186 187 static int gve_alloc_counter_array(struct gve_priv *priv) 188 { 189 priv->counter_array = 190 dma_alloc_coherent(&priv->pdev->dev, 191 priv->num_event_counters * 192 sizeof(*priv->counter_array), 193 &priv->counter_array_bus, GFP_KERNEL); 194 if (!priv->counter_array) 195 return -ENOMEM; 196 197 return 0; 198 } 199 200 static void gve_free_counter_array(struct gve_priv *priv) 201 { 202 if (!priv->counter_array) 203 return; 204 205 dma_free_coherent(&priv->pdev->dev, 206 priv->num_event_counters * 207 sizeof(*priv->counter_array), 208 priv->counter_array, priv->counter_array_bus); 209 priv->counter_array = NULL; 210 } 211 212 /* NIC requests to report stats */ 213 static void gve_stats_report_task(struct work_struct *work) 214 { 215 struct gve_priv *priv = container_of(work, struct gve_priv, 216 stats_report_task); 217 if (gve_get_do_report_stats(priv)) { 218 gve_handle_report_stats(priv); 219 gve_clear_do_report_stats(priv); 220 } 221 } 222 223 static void gve_stats_report_schedule(struct gve_priv *priv) 224 { 225 if (!gve_get_probe_in_progress(priv) && 226 !gve_get_reset_in_progress(priv)) { 227 gve_set_do_report_stats(priv); 228 queue_work(priv->gve_wq, &priv->stats_report_task); 229 } 230 } 231 232 static void gve_stats_report_timer(struct timer_list *t) 233 { 234 struct gve_priv *priv = from_timer(priv, t, stats_report_timer); 235 236 mod_timer(&priv->stats_report_timer, 237 round_jiffies(jiffies + 238 msecs_to_jiffies(priv->stats_report_timer_period))); 239 gve_stats_report_schedule(priv); 240 } 241 242 static int gve_alloc_stats_report(struct gve_priv *priv) 243 { 244 int tx_stats_num, rx_stats_num; 245 246 tx_stats_num = (GVE_TX_STATS_REPORT_NUM + NIC_TX_STATS_REPORT_NUM) * 247 gve_num_tx_queues(priv); 248 rx_stats_num = (GVE_RX_STATS_REPORT_NUM + NIC_RX_STATS_REPORT_NUM) * 249 priv->rx_cfg.num_queues; 250 priv->stats_report_len = struct_size(priv->stats_report, stats, 251 size_add(tx_stats_num, rx_stats_num)); 252 priv->stats_report = 253 dma_alloc_coherent(&priv->pdev->dev, priv->stats_report_len, 254 &priv->stats_report_bus, GFP_KERNEL); 255 if (!priv->stats_report) 256 return -ENOMEM; 257 /* Set up timer for the report-stats task */ 258 timer_setup(&priv->stats_report_timer, gve_stats_report_timer, 0); 259 priv->stats_report_timer_period = GVE_STATS_REPORT_TIMER_PERIOD; 260 return 0; 261 } 262 263 static void gve_free_stats_report(struct gve_priv *priv) 264 { 265 if (!priv->stats_report) 266 return; 267 268 del_timer_sync(&priv->stats_report_timer); 269 dma_free_coherent(&priv->pdev->dev, priv->stats_report_len, 270 priv->stats_report, priv->stats_report_bus); 271 priv->stats_report = NULL; 272 } 273 274 static irqreturn_t gve_mgmnt_intr(int irq, void *arg) 275 { 276 struct gve_priv *priv = arg; 277 278 queue_work(priv->gve_wq, &priv->service_task); 279 return IRQ_HANDLED; 280 } 281 282 static irqreturn_t gve_intr(int irq, void *arg) 283 { 284 struct gve_notify_block *block = arg; 285 struct gve_priv *priv = block->priv; 286 287 iowrite32be(GVE_IRQ_MASK, gve_irq_doorbell(priv, block)); 288 napi_schedule_irqoff(&block->napi); 289 return IRQ_HANDLED; 290 } 291 292 static irqreturn_t gve_intr_dqo(int irq, void *arg) 293 { 294 struct gve_notify_block *block = arg; 295 296 /* Interrupts are automatically masked */ 297 napi_schedule_irqoff(&block->napi); 298 return IRQ_HANDLED; 299 } 300 301 static int gve_is_napi_on_home_cpu(struct gve_priv *priv, u32 irq) 302 { 303 int cpu_curr = smp_processor_id(); 304 const struct cpumask *aff_mask; 305 306 aff_mask = irq_get_effective_affinity_mask(irq); 307 if (unlikely(!aff_mask)) 308 return 1; 309 310 return cpumask_test_cpu(cpu_curr, aff_mask); 311 } 312 313 int gve_napi_poll(struct napi_struct *napi, int budget) 314 { 315 struct gve_notify_block *block; 316 __be32 __iomem *irq_doorbell; 317 bool reschedule = false; 318 struct gve_priv *priv; 319 int work_done = 0; 320 321 block = container_of(napi, struct gve_notify_block, napi); 322 priv = block->priv; 323 324 if (block->tx) { 325 if (block->tx->q_num < priv->tx_cfg.num_queues) 326 reschedule |= gve_tx_poll(block, budget); 327 else if (budget) 328 reschedule |= gve_xdp_poll(block, budget); 329 } 330 331 if (!budget) 332 return 0; 333 334 if (block->rx) { 335 work_done = gve_rx_poll(block, budget); 336 reschedule |= work_done == budget; 337 } 338 339 if (reschedule) 340 return budget; 341 342 /* Complete processing - don't unmask irq if busy polling is enabled */ 343 if (likely(napi_complete_done(napi, work_done))) { 344 irq_doorbell = gve_irq_doorbell(priv, block); 345 iowrite32be(GVE_IRQ_ACK | GVE_IRQ_EVENT, irq_doorbell); 346 347 /* Ensure IRQ ACK is visible before we check pending work. 348 * If queue had issued updates, it would be truly visible. 349 */ 350 mb(); 351 352 if (block->tx) 353 reschedule |= gve_tx_clean_pending(priv, block->tx); 354 if (block->rx) 355 reschedule |= gve_rx_work_pending(block->rx); 356 357 if (reschedule && napi_schedule(napi)) 358 iowrite32be(GVE_IRQ_MASK, irq_doorbell); 359 } 360 return work_done; 361 } 362 363 int gve_napi_poll_dqo(struct napi_struct *napi, int budget) 364 { 365 struct gve_notify_block *block = 366 container_of(napi, struct gve_notify_block, napi); 367 struct gve_priv *priv = block->priv; 368 bool reschedule = false; 369 int work_done = 0; 370 371 if (block->tx) 372 reschedule |= gve_tx_poll_dqo(block, /*do_clean=*/true); 373 374 if (!budget) 375 return 0; 376 377 if (block->rx) { 378 work_done = gve_rx_poll_dqo(block, budget); 379 reschedule |= work_done == budget; 380 } 381 382 if (reschedule) { 383 /* Reschedule by returning budget only if already on the correct 384 * cpu. 385 */ 386 if (likely(gve_is_napi_on_home_cpu(priv, block->irq))) 387 return budget; 388 389 /* If not on the cpu with which this queue's irq has affinity 390 * with, we avoid rescheduling napi and arm the irq instead so 391 * that napi gets rescheduled back eventually onto the right 392 * cpu. 393 */ 394 if (work_done == budget) 395 work_done--; 396 } 397 398 if (likely(napi_complete_done(napi, work_done))) { 399 /* Enable interrupts again. 400 * 401 * We don't need to repoll afterwards because HW supports the 402 * PCI MSI-X PBA feature. 403 * 404 * Another interrupt would be triggered if a new event came in 405 * since the last one. 406 */ 407 gve_write_irq_doorbell_dqo(priv, block, 408 GVE_ITR_NO_UPDATE_DQO | GVE_ITR_ENABLE_BIT_DQO); 409 } 410 411 return work_done; 412 } 413 414 static int gve_alloc_notify_blocks(struct gve_priv *priv) 415 { 416 int num_vecs_requested = priv->num_ntfy_blks + 1; 417 unsigned int active_cpus; 418 int vecs_enabled; 419 int i, j; 420 int err; 421 422 priv->msix_vectors = kvcalloc(num_vecs_requested, 423 sizeof(*priv->msix_vectors), GFP_KERNEL); 424 if (!priv->msix_vectors) 425 return -ENOMEM; 426 for (i = 0; i < num_vecs_requested; i++) 427 priv->msix_vectors[i].entry = i; 428 vecs_enabled = pci_enable_msix_range(priv->pdev, priv->msix_vectors, 429 GVE_MIN_MSIX, num_vecs_requested); 430 if (vecs_enabled < 0) { 431 dev_err(&priv->pdev->dev, "Could not enable min msix %d/%d\n", 432 GVE_MIN_MSIX, vecs_enabled); 433 err = vecs_enabled; 434 goto abort_with_msix_vectors; 435 } 436 if (vecs_enabled != num_vecs_requested) { 437 int new_num_ntfy_blks = (vecs_enabled - 1) & ~0x1; 438 int vecs_per_type = new_num_ntfy_blks / 2; 439 int vecs_left = new_num_ntfy_blks % 2; 440 441 priv->num_ntfy_blks = new_num_ntfy_blks; 442 priv->mgmt_msix_idx = priv->num_ntfy_blks; 443 priv->tx_cfg.max_queues = min_t(int, priv->tx_cfg.max_queues, 444 vecs_per_type); 445 priv->rx_cfg.max_queues = min_t(int, priv->rx_cfg.max_queues, 446 vecs_per_type + vecs_left); 447 dev_err(&priv->pdev->dev, 448 "Could not enable desired msix, only enabled %d, adjusting tx max queues to %d, and rx max queues to %d\n", 449 vecs_enabled, priv->tx_cfg.max_queues, 450 priv->rx_cfg.max_queues); 451 if (priv->tx_cfg.num_queues > priv->tx_cfg.max_queues) 452 priv->tx_cfg.num_queues = priv->tx_cfg.max_queues; 453 if (priv->rx_cfg.num_queues > priv->rx_cfg.max_queues) 454 priv->rx_cfg.num_queues = priv->rx_cfg.max_queues; 455 } 456 /* Half the notification blocks go to TX and half to RX */ 457 active_cpus = min_t(int, priv->num_ntfy_blks / 2, num_online_cpus()); 458 459 /* Setup Management Vector - the last vector */ 460 snprintf(priv->mgmt_msix_name, sizeof(priv->mgmt_msix_name), "gve-mgmnt@pci:%s", 461 pci_name(priv->pdev)); 462 err = request_irq(priv->msix_vectors[priv->mgmt_msix_idx].vector, 463 gve_mgmnt_intr, 0, priv->mgmt_msix_name, priv); 464 if (err) { 465 dev_err(&priv->pdev->dev, "Did not receive management vector.\n"); 466 goto abort_with_msix_enabled; 467 } 468 priv->irq_db_indices = 469 dma_alloc_coherent(&priv->pdev->dev, 470 priv->num_ntfy_blks * 471 sizeof(*priv->irq_db_indices), 472 &priv->irq_db_indices_bus, GFP_KERNEL); 473 if (!priv->irq_db_indices) { 474 err = -ENOMEM; 475 goto abort_with_mgmt_vector; 476 } 477 478 priv->ntfy_blocks = kvzalloc(priv->num_ntfy_blks * 479 sizeof(*priv->ntfy_blocks), GFP_KERNEL); 480 if (!priv->ntfy_blocks) { 481 err = -ENOMEM; 482 goto abort_with_irq_db_indices; 483 } 484 485 /* Setup the other blocks - the first n-1 vectors */ 486 for (i = 0; i < priv->num_ntfy_blks; i++) { 487 struct gve_notify_block *block = &priv->ntfy_blocks[i]; 488 int msix_idx = i; 489 490 snprintf(block->name, sizeof(block->name), "gve-ntfy-blk%d@pci:%s", 491 i, pci_name(priv->pdev)); 492 block->priv = priv; 493 err = request_irq(priv->msix_vectors[msix_idx].vector, 494 gve_is_gqi(priv) ? gve_intr : gve_intr_dqo, 495 0, block->name, block); 496 if (err) { 497 dev_err(&priv->pdev->dev, 498 "Failed to receive msix vector %d\n", i); 499 goto abort_with_some_ntfy_blocks; 500 } 501 block->irq = priv->msix_vectors[msix_idx].vector; 502 irq_set_affinity_hint(priv->msix_vectors[msix_idx].vector, 503 get_cpu_mask(i % active_cpus)); 504 block->irq_db_index = &priv->irq_db_indices[i].index; 505 } 506 return 0; 507 abort_with_some_ntfy_blocks: 508 for (j = 0; j < i; j++) { 509 struct gve_notify_block *block = &priv->ntfy_blocks[j]; 510 int msix_idx = j; 511 512 irq_set_affinity_hint(priv->msix_vectors[msix_idx].vector, 513 NULL); 514 free_irq(priv->msix_vectors[msix_idx].vector, block); 515 block->irq = 0; 516 } 517 kvfree(priv->ntfy_blocks); 518 priv->ntfy_blocks = NULL; 519 abort_with_irq_db_indices: 520 dma_free_coherent(&priv->pdev->dev, priv->num_ntfy_blks * 521 sizeof(*priv->irq_db_indices), 522 priv->irq_db_indices, priv->irq_db_indices_bus); 523 priv->irq_db_indices = NULL; 524 abort_with_mgmt_vector: 525 free_irq(priv->msix_vectors[priv->mgmt_msix_idx].vector, priv); 526 abort_with_msix_enabled: 527 pci_disable_msix(priv->pdev); 528 abort_with_msix_vectors: 529 kvfree(priv->msix_vectors); 530 priv->msix_vectors = NULL; 531 return err; 532 } 533 534 static void gve_free_notify_blocks(struct gve_priv *priv) 535 { 536 int i; 537 538 if (!priv->msix_vectors) 539 return; 540 541 /* Free the irqs */ 542 for (i = 0; i < priv->num_ntfy_blks; i++) { 543 struct gve_notify_block *block = &priv->ntfy_blocks[i]; 544 int msix_idx = i; 545 546 irq_set_affinity_hint(priv->msix_vectors[msix_idx].vector, 547 NULL); 548 free_irq(priv->msix_vectors[msix_idx].vector, block); 549 block->irq = 0; 550 } 551 free_irq(priv->msix_vectors[priv->mgmt_msix_idx].vector, priv); 552 kvfree(priv->ntfy_blocks); 553 priv->ntfy_blocks = NULL; 554 dma_free_coherent(&priv->pdev->dev, priv->num_ntfy_blks * 555 sizeof(*priv->irq_db_indices), 556 priv->irq_db_indices, priv->irq_db_indices_bus); 557 priv->irq_db_indices = NULL; 558 pci_disable_msix(priv->pdev); 559 kvfree(priv->msix_vectors); 560 priv->msix_vectors = NULL; 561 } 562 563 static int gve_setup_device_resources(struct gve_priv *priv) 564 { 565 int err; 566 567 err = gve_alloc_flow_rule_caches(priv); 568 if (err) 569 return err; 570 err = gve_alloc_counter_array(priv); 571 if (err) 572 goto abort_with_flow_rule_caches; 573 err = gve_alloc_notify_blocks(priv); 574 if (err) 575 goto abort_with_counter; 576 err = gve_alloc_stats_report(priv); 577 if (err) 578 goto abort_with_ntfy_blocks; 579 err = gve_adminq_configure_device_resources(priv, 580 priv->counter_array_bus, 581 priv->num_event_counters, 582 priv->irq_db_indices_bus, 583 priv->num_ntfy_blks); 584 if (unlikely(err)) { 585 dev_err(&priv->pdev->dev, 586 "could not setup device_resources: err=%d\n", err); 587 err = -ENXIO; 588 goto abort_with_stats_report; 589 } 590 591 if (!gve_is_gqi(priv)) { 592 priv->ptype_lut_dqo = kvzalloc(sizeof(*priv->ptype_lut_dqo), 593 GFP_KERNEL); 594 if (!priv->ptype_lut_dqo) { 595 err = -ENOMEM; 596 goto abort_with_stats_report; 597 } 598 err = gve_adminq_get_ptype_map_dqo(priv, priv->ptype_lut_dqo); 599 if (err) { 600 dev_err(&priv->pdev->dev, 601 "Failed to get ptype map: err=%d\n", err); 602 goto abort_with_ptype_lut; 603 } 604 } 605 606 err = gve_adminq_report_stats(priv, priv->stats_report_len, 607 priv->stats_report_bus, 608 GVE_STATS_REPORT_TIMER_PERIOD); 609 if (err) 610 dev_err(&priv->pdev->dev, 611 "Failed to report stats: err=%d\n", err); 612 gve_set_device_resources_ok(priv); 613 return 0; 614 615 abort_with_ptype_lut: 616 kvfree(priv->ptype_lut_dqo); 617 priv->ptype_lut_dqo = NULL; 618 abort_with_stats_report: 619 gve_free_stats_report(priv); 620 abort_with_ntfy_blocks: 621 gve_free_notify_blocks(priv); 622 abort_with_counter: 623 gve_free_counter_array(priv); 624 abort_with_flow_rule_caches: 625 gve_free_flow_rule_caches(priv); 626 627 return err; 628 } 629 630 static void gve_trigger_reset(struct gve_priv *priv); 631 632 static void gve_teardown_device_resources(struct gve_priv *priv) 633 { 634 int err; 635 636 /* Tell device its resources are being freed */ 637 if (gve_get_device_resources_ok(priv)) { 638 err = gve_flow_rules_reset(priv); 639 if (err) { 640 dev_err(&priv->pdev->dev, 641 "Failed to reset flow rules: err=%d\n", err); 642 gve_trigger_reset(priv); 643 } 644 /* detach the stats report */ 645 err = gve_adminq_report_stats(priv, 0, 0x0, GVE_STATS_REPORT_TIMER_PERIOD); 646 if (err) { 647 dev_err(&priv->pdev->dev, 648 "Failed to detach stats report: err=%d\n", err); 649 gve_trigger_reset(priv); 650 } 651 err = gve_adminq_deconfigure_device_resources(priv); 652 if (err) { 653 dev_err(&priv->pdev->dev, 654 "Could not deconfigure device resources: err=%d\n", 655 err); 656 gve_trigger_reset(priv); 657 } 658 } 659 660 kvfree(priv->ptype_lut_dqo); 661 priv->ptype_lut_dqo = NULL; 662 663 gve_free_flow_rule_caches(priv); 664 gve_free_counter_array(priv); 665 gve_free_notify_blocks(priv); 666 gve_free_stats_report(priv); 667 gve_clear_device_resources_ok(priv); 668 } 669 670 static int gve_unregister_qpl(struct gve_priv *priv, 671 struct gve_queue_page_list *qpl) 672 { 673 int err; 674 675 if (!qpl) 676 return 0; 677 678 err = gve_adminq_unregister_page_list(priv, qpl->id); 679 if (err) { 680 netif_err(priv, drv, priv->dev, 681 "Failed to unregister queue page list %d\n", 682 qpl->id); 683 return err; 684 } 685 686 priv->num_registered_pages -= qpl->num_entries; 687 return 0; 688 } 689 690 static int gve_register_qpl(struct gve_priv *priv, 691 struct gve_queue_page_list *qpl) 692 { 693 int pages; 694 int err; 695 696 if (!qpl) 697 return 0; 698 699 pages = qpl->num_entries; 700 701 if (pages + priv->num_registered_pages > priv->max_registered_pages) { 702 netif_err(priv, drv, priv->dev, 703 "Reached max number of registered pages %llu > %llu\n", 704 pages + priv->num_registered_pages, 705 priv->max_registered_pages); 706 return -EINVAL; 707 } 708 709 err = gve_adminq_register_page_list(priv, qpl); 710 if (err) { 711 netif_err(priv, drv, priv->dev, 712 "failed to register queue page list %d\n", 713 qpl->id); 714 return err; 715 } 716 717 priv->num_registered_pages += pages; 718 return 0; 719 } 720 721 static struct gve_queue_page_list *gve_tx_get_qpl(struct gve_priv *priv, int idx) 722 { 723 struct gve_tx_ring *tx = &priv->tx[idx]; 724 725 if (gve_is_gqi(priv)) 726 return tx->tx_fifo.qpl; 727 else 728 return tx->dqo.qpl; 729 } 730 731 static struct gve_queue_page_list *gve_rx_get_qpl(struct gve_priv *priv, int idx) 732 { 733 struct gve_rx_ring *rx = &priv->rx[idx]; 734 735 if (gve_is_gqi(priv)) 736 return rx->data.qpl; 737 else 738 return rx->dqo.qpl; 739 } 740 741 static int gve_register_xdp_qpls(struct gve_priv *priv) 742 { 743 int start_id; 744 int err; 745 int i; 746 747 start_id = gve_xdp_tx_start_queue_id(priv); 748 for (i = start_id; i < start_id + gve_num_xdp_qpls(priv); i++) { 749 err = gve_register_qpl(priv, gve_tx_get_qpl(priv, i)); 750 /* This failure will trigger a reset - no need to clean up */ 751 if (err) 752 return err; 753 } 754 return 0; 755 } 756 757 static int gve_register_qpls(struct gve_priv *priv) 758 { 759 int num_tx_qpls, num_rx_qpls; 760 int err; 761 int i; 762 763 num_tx_qpls = gve_num_tx_qpls(&priv->tx_cfg, gve_num_xdp_qpls(priv), 764 gve_is_qpl(priv)); 765 num_rx_qpls = gve_num_rx_qpls(&priv->rx_cfg, gve_is_qpl(priv)); 766 767 for (i = 0; i < num_tx_qpls; i++) { 768 err = gve_register_qpl(priv, gve_tx_get_qpl(priv, i)); 769 if (err) 770 return err; 771 } 772 773 for (i = 0; i < num_rx_qpls; i++) { 774 err = gve_register_qpl(priv, gve_rx_get_qpl(priv, i)); 775 if (err) 776 return err; 777 } 778 779 return 0; 780 } 781 782 static int gve_unregister_xdp_qpls(struct gve_priv *priv) 783 { 784 int start_id; 785 int err; 786 int i; 787 788 start_id = gve_xdp_tx_start_queue_id(priv); 789 for (i = start_id; i < start_id + gve_num_xdp_qpls(priv); i++) { 790 err = gve_unregister_qpl(priv, gve_tx_get_qpl(priv, i)); 791 /* This failure will trigger a reset - no need to clean */ 792 if (err) 793 return err; 794 } 795 return 0; 796 } 797 798 static int gve_unregister_qpls(struct gve_priv *priv) 799 { 800 int num_tx_qpls, num_rx_qpls; 801 int err; 802 int i; 803 804 num_tx_qpls = gve_num_tx_qpls(&priv->tx_cfg, gve_num_xdp_qpls(priv), 805 gve_is_qpl(priv)); 806 num_rx_qpls = gve_num_rx_qpls(&priv->rx_cfg, gve_is_qpl(priv)); 807 808 for (i = 0; i < num_tx_qpls; i++) { 809 err = gve_unregister_qpl(priv, gve_tx_get_qpl(priv, i)); 810 /* This failure will trigger a reset - no need to clean */ 811 if (err) 812 return err; 813 } 814 815 for (i = 0; i < num_rx_qpls; i++) { 816 err = gve_unregister_qpl(priv, gve_rx_get_qpl(priv, i)); 817 /* This failure will trigger a reset - no need to clean */ 818 if (err) 819 return err; 820 } 821 return 0; 822 } 823 824 static int gve_create_xdp_rings(struct gve_priv *priv) 825 { 826 int err; 827 828 err = gve_adminq_create_tx_queues(priv, 829 gve_xdp_tx_start_queue_id(priv), 830 priv->num_xdp_queues); 831 if (err) { 832 netif_err(priv, drv, priv->dev, "failed to create %d XDP tx queues\n", 833 priv->num_xdp_queues); 834 /* This failure will trigger a reset - no need to clean 835 * up 836 */ 837 return err; 838 } 839 netif_dbg(priv, drv, priv->dev, "created %d XDP tx queues\n", 840 priv->num_xdp_queues); 841 842 return 0; 843 } 844 845 static int gve_create_rings(struct gve_priv *priv) 846 { 847 int num_tx_queues = gve_num_tx_queues(priv); 848 int err; 849 int i; 850 851 err = gve_adminq_create_tx_queues(priv, 0, num_tx_queues); 852 if (err) { 853 netif_err(priv, drv, priv->dev, "failed to create %d tx queues\n", 854 num_tx_queues); 855 /* This failure will trigger a reset - no need to clean 856 * up 857 */ 858 return err; 859 } 860 netif_dbg(priv, drv, priv->dev, "created %d tx queues\n", 861 num_tx_queues); 862 863 err = gve_adminq_create_rx_queues(priv, priv->rx_cfg.num_queues); 864 if (err) { 865 netif_err(priv, drv, priv->dev, "failed to create %d rx queues\n", 866 priv->rx_cfg.num_queues); 867 /* This failure will trigger a reset - no need to clean 868 * up 869 */ 870 return err; 871 } 872 netif_dbg(priv, drv, priv->dev, "created %d rx queues\n", 873 priv->rx_cfg.num_queues); 874 875 if (gve_is_gqi(priv)) { 876 /* Rx data ring has been prefilled with packet buffers at queue 877 * allocation time. 878 * 879 * Write the doorbell to provide descriptor slots and packet 880 * buffers to the NIC. 881 */ 882 for (i = 0; i < priv->rx_cfg.num_queues; i++) 883 gve_rx_write_doorbell(priv, &priv->rx[i]); 884 } else { 885 for (i = 0; i < priv->rx_cfg.num_queues; i++) { 886 /* Post buffers and ring doorbell. */ 887 gve_rx_post_buffers_dqo(&priv->rx[i]); 888 } 889 } 890 891 return 0; 892 } 893 894 static void init_xdp_sync_stats(struct gve_priv *priv) 895 { 896 int start_id = gve_xdp_tx_start_queue_id(priv); 897 int i; 898 899 /* Init stats */ 900 for (i = start_id; i < start_id + priv->num_xdp_queues; i++) { 901 int ntfy_idx = gve_tx_idx_to_ntfy(priv, i); 902 903 u64_stats_init(&priv->tx[i].statss); 904 priv->tx[i].ntfy_id = ntfy_idx; 905 } 906 } 907 908 static void gve_init_sync_stats(struct gve_priv *priv) 909 { 910 int i; 911 912 for (i = 0; i < priv->tx_cfg.num_queues; i++) 913 u64_stats_init(&priv->tx[i].statss); 914 915 /* Init stats for XDP TX queues */ 916 init_xdp_sync_stats(priv); 917 918 for (i = 0; i < priv->rx_cfg.num_queues; i++) 919 u64_stats_init(&priv->rx[i].statss); 920 } 921 922 static void gve_tx_get_curr_alloc_cfg(struct gve_priv *priv, 923 struct gve_tx_alloc_rings_cfg *cfg) 924 { 925 cfg->qcfg = &priv->tx_cfg; 926 cfg->raw_addressing = !gve_is_qpl(priv); 927 cfg->ring_size = priv->tx_desc_cnt; 928 cfg->start_idx = 0; 929 cfg->num_rings = gve_num_tx_queues(priv); 930 cfg->tx = priv->tx; 931 } 932 933 static void gve_tx_stop_rings(struct gve_priv *priv, int start_id, int num_rings) 934 { 935 int i; 936 937 if (!priv->tx) 938 return; 939 940 for (i = start_id; i < start_id + num_rings; i++) { 941 if (gve_is_gqi(priv)) 942 gve_tx_stop_ring_gqi(priv, i); 943 else 944 gve_tx_stop_ring_dqo(priv, i); 945 } 946 } 947 948 static void gve_tx_start_rings(struct gve_priv *priv, int start_id, 949 int num_rings) 950 { 951 int i; 952 953 for (i = start_id; i < start_id + num_rings; i++) { 954 if (gve_is_gqi(priv)) 955 gve_tx_start_ring_gqi(priv, i); 956 else 957 gve_tx_start_ring_dqo(priv, i); 958 } 959 } 960 961 static int gve_alloc_xdp_rings(struct gve_priv *priv) 962 { 963 struct gve_tx_alloc_rings_cfg cfg = {0}; 964 int err = 0; 965 966 if (!priv->num_xdp_queues) 967 return 0; 968 969 gve_tx_get_curr_alloc_cfg(priv, &cfg); 970 cfg.start_idx = gve_xdp_tx_start_queue_id(priv); 971 cfg.num_rings = priv->num_xdp_queues; 972 973 err = gve_tx_alloc_rings_gqi(priv, &cfg); 974 if (err) 975 return err; 976 977 gve_tx_start_rings(priv, cfg.start_idx, cfg.num_rings); 978 init_xdp_sync_stats(priv); 979 980 return 0; 981 } 982 983 static int gve_queues_mem_alloc(struct gve_priv *priv, 984 struct gve_tx_alloc_rings_cfg *tx_alloc_cfg, 985 struct gve_rx_alloc_rings_cfg *rx_alloc_cfg) 986 { 987 int err; 988 989 if (gve_is_gqi(priv)) 990 err = gve_tx_alloc_rings_gqi(priv, tx_alloc_cfg); 991 else 992 err = gve_tx_alloc_rings_dqo(priv, tx_alloc_cfg); 993 if (err) 994 return err; 995 996 if (gve_is_gqi(priv)) 997 err = gve_rx_alloc_rings_gqi(priv, rx_alloc_cfg); 998 else 999 err = gve_rx_alloc_rings_dqo(priv, rx_alloc_cfg); 1000 if (err) 1001 goto free_tx; 1002 1003 return 0; 1004 1005 free_tx: 1006 if (gve_is_gqi(priv)) 1007 gve_tx_free_rings_gqi(priv, tx_alloc_cfg); 1008 else 1009 gve_tx_free_rings_dqo(priv, tx_alloc_cfg); 1010 return err; 1011 } 1012 1013 static int gve_destroy_xdp_rings(struct gve_priv *priv) 1014 { 1015 int start_id; 1016 int err; 1017 1018 start_id = gve_xdp_tx_start_queue_id(priv); 1019 err = gve_adminq_destroy_tx_queues(priv, 1020 start_id, 1021 priv->num_xdp_queues); 1022 if (err) { 1023 netif_err(priv, drv, priv->dev, 1024 "failed to destroy XDP queues\n"); 1025 /* This failure will trigger a reset - no need to clean up */ 1026 return err; 1027 } 1028 netif_dbg(priv, drv, priv->dev, "destroyed XDP queues\n"); 1029 1030 return 0; 1031 } 1032 1033 static int gve_destroy_rings(struct gve_priv *priv) 1034 { 1035 int num_tx_queues = gve_num_tx_queues(priv); 1036 int err; 1037 1038 err = gve_adminq_destroy_tx_queues(priv, 0, num_tx_queues); 1039 if (err) { 1040 netif_err(priv, drv, priv->dev, 1041 "failed to destroy tx queues\n"); 1042 /* This failure will trigger a reset - no need to clean up */ 1043 return err; 1044 } 1045 netif_dbg(priv, drv, priv->dev, "destroyed tx queues\n"); 1046 err = gve_adminq_destroy_rx_queues(priv, priv->rx_cfg.num_queues); 1047 if (err) { 1048 netif_err(priv, drv, priv->dev, 1049 "failed to destroy rx queues\n"); 1050 /* This failure will trigger a reset - no need to clean up */ 1051 return err; 1052 } 1053 netif_dbg(priv, drv, priv->dev, "destroyed rx queues\n"); 1054 return 0; 1055 } 1056 1057 static void gve_free_xdp_rings(struct gve_priv *priv) 1058 { 1059 struct gve_tx_alloc_rings_cfg cfg = {0}; 1060 1061 gve_tx_get_curr_alloc_cfg(priv, &cfg); 1062 cfg.start_idx = gve_xdp_tx_start_queue_id(priv); 1063 cfg.num_rings = priv->num_xdp_queues; 1064 1065 if (priv->tx) { 1066 gve_tx_stop_rings(priv, cfg.start_idx, cfg.num_rings); 1067 gve_tx_free_rings_gqi(priv, &cfg); 1068 } 1069 } 1070 1071 static void gve_queues_mem_free(struct gve_priv *priv, 1072 struct gve_tx_alloc_rings_cfg *tx_cfg, 1073 struct gve_rx_alloc_rings_cfg *rx_cfg) 1074 { 1075 if (gve_is_gqi(priv)) { 1076 gve_tx_free_rings_gqi(priv, tx_cfg); 1077 gve_rx_free_rings_gqi(priv, rx_cfg); 1078 } else { 1079 gve_tx_free_rings_dqo(priv, tx_cfg); 1080 gve_rx_free_rings_dqo(priv, rx_cfg); 1081 } 1082 } 1083 1084 int gve_alloc_page(struct gve_priv *priv, struct device *dev, 1085 struct page **page, dma_addr_t *dma, 1086 enum dma_data_direction dir, gfp_t gfp_flags) 1087 { 1088 *page = alloc_page(gfp_flags); 1089 if (!*page) { 1090 priv->page_alloc_fail++; 1091 return -ENOMEM; 1092 } 1093 *dma = dma_map_page(dev, *page, 0, PAGE_SIZE, dir); 1094 if (dma_mapping_error(dev, *dma)) { 1095 priv->dma_mapping_error++; 1096 put_page(*page); 1097 return -ENOMEM; 1098 } 1099 return 0; 1100 } 1101 1102 struct gve_queue_page_list *gve_alloc_queue_page_list(struct gve_priv *priv, 1103 u32 id, int pages) 1104 { 1105 struct gve_queue_page_list *qpl; 1106 int err; 1107 int i; 1108 1109 qpl = kvzalloc(sizeof(*qpl), GFP_KERNEL); 1110 if (!qpl) 1111 return NULL; 1112 1113 qpl->id = id; 1114 qpl->num_entries = 0; 1115 qpl->pages = kvcalloc(pages, sizeof(*qpl->pages), GFP_KERNEL); 1116 if (!qpl->pages) 1117 goto abort; 1118 1119 qpl->page_buses = kvcalloc(pages, sizeof(*qpl->page_buses), GFP_KERNEL); 1120 if (!qpl->page_buses) 1121 goto abort; 1122 1123 for (i = 0; i < pages; i++) { 1124 err = gve_alloc_page(priv, &priv->pdev->dev, &qpl->pages[i], 1125 &qpl->page_buses[i], 1126 gve_qpl_dma_dir(priv, id), GFP_KERNEL); 1127 if (err) 1128 goto abort; 1129 qpl->num_entries++; 1130 } 1131 1132 return qpl; 1133 1134 abort: 1135 gve_free_queue_page_list(priv, qpl, id); 1136 return NULL; 1137 } 1138 1139 void gve_free_page(struct device *dev, struct page *page, dma_addr_t dma, 1140 enum dma_data_direction dir) 1141 { 1142 if (!dma_mapping_error(dev, dma)) 1143 dma_unmap_page(dev, dma, PAGE_SIZE, dir); 1144 if (page) 1145 put_page(page); 1146 } 1147 1148 void gve_free_queue_page_list(struct gve_priv *priv, 1149 struct gve_queue_page_list *qpl, 1150 u32 id) 1151 { 1152 int i; 1153 1154 if (!qpl) 1155 return; 1156 if (!qpl->pages) 1157 goto free_qpl; 1158 if (!qpl->page_buses) 1159 goto free_pages; 1160 1161 for (i = 0; i < qpl->num_entries; i++) 1162 gve_free_page(&priv->pdev->dev, qpl->pages[i], 1163 qpl->page_buses[i], gve_qpl_dma_dir(priv, id)); 1164 1165 kvfree(qpl->page_buses); 1166 qpl->page_buses = NULL; 1167 free_pages: 1168 kvfree(qpl->pages); 1169 qpl->pages = NULL; 1170 free_qpl: 1171 kvfree(qpl); 1172 } 1173 1174 /* Use this to schedule a reset when the device is capable of continuing 1175 * to handle other requests in its current state. If it is not, do a reset 1176 * in thread instead. 1177 */ 1178 void gve_schedule_reset(struct gve_priv *priv) 1179 { 1180 gve_set_do_reset(priv); 1181 queue_work(priv->gve_wq, &priv->service_task); 1182 } 1183 1184 static void gve_reset_and_teardown(struct gve_priv *priv, bool was_up); 1185 static int gve_reset_recovery(struct gve_priv *priv, bool was_up); 1186 static void gve_turndown(struct gve_priv *priv); 1187 static void gve_turnup(struct gve_priv *priv); 1188 1189 static int gve_reg_xdp_info(struct gve_priv *priv, struct net_device *dev) 1190 { 1191 struct napi_struct *napi; 1192 struct gve_rx_ring *rx; 1193 int err = 0; 1194 int i, j; 1195 u32 tx_qid; 1196 1197 if (!priv->num_xdp_queues) 1198 return 0; 1199 1200 for (i = 0; i < priv->rx_cfg.num_queues; i++) { 1201 rx = &priv->rx[i]; 1202 napi = &priv->ntfy_blocks[rx->ntfy_id].napi; 1203 1204 err = xdp_rxq_info_reg(&rx->xdp_rxq, dev, i, 1205 napi->napi_id); 1206 if (err) 1207 goto err; 1208 err = xdp_rxq_info_reg_mem_model(&rx->xdp_rxq, 1209 MEM_TYPE_PAGE_SHARED, NULL); 1210 if (err) 1211 goto err; 1212 rx->xsk_pool = xsk_get_pool_from_qid(dev, i); 1213 if (rx->xsk_pool) { 1214 err = xdp_rxq_info_reg(&rx->xsk_rxq, dev, i, 1215 napi->napi_id); 1216 if (err) 1217 goto err; 1218 err = xdp_rxq_info_reg_mem_model(&rx->xsk_rxq, 1219 MEM_TYPE_XSK_BUFF_POOL, NULL); 1220 if (err) 1221 goto err; 1222 xsk_pool_set_rxq_info(rx->xsk_pool, 1223 &rx->xsk_rxq); 1224 } 1225 } 1226 1227 for (i = 0; i < priv->num_xdp_queues; i++) { 1228 tx_qid = gve_xdp_tx_queue_id(priv, i); 1229 priv->tx[tx_qid].xsk_pool = xsk_get_pool_from_qid(dev, i); 1230 } 1231 return 0; 1232 1233 err: 1234 for (j = i; j >= 0; j--) { 1235 rx = &priv->rx[j]; 1236 if (xdp_rxq_info_is_reg(&rx->xdp_rxq)) 1237 xdp_rxq_info_unreg(&rx->xdp_rxq); 1238 if (xdp_rxq_info_is_reg(&rx->xsk_rxq)) 1239 xdp_rxq_info_unreg(&rx->xsk_rxq); 1240 } 1241 return err; 1242 } 1243 1244 static void gve_unreg_xdp_info(struct gve_priv *priv) 1245 { 1246 int i, tx_qid; 1247 1248 if (!priv->num_xdp_queues) 1249 return; 1250 1251 for (i = 0; i < priv->rx_cfg.num_queues; i++) { 1252 struct gve_rx_ring *rx = &priv->rx[i]; 1253 1254 xdp_rxq_info_unreg(&rx->xdp_rxq); 1255 if (rx->xsk_pool) { 1256 xdp_rxq_info_unreg(&rx->xsk_rxq); 1257 rx->xsk_pool = NULL; 1258 } 1259 } 1260 1261 for (i = 0; i < priv->num_xdp_queues; i++) { 1262 tx_qid = gve_xdp_tx_queue_id(priv, i); 1263 priv->tx[tx_qid].xsk_pool = NULL; 1264 } 1265 } 1266 1267 static void gve_drain_page_cache(struct gve_priv *priv) 1268 { 1269 int i; 1270 1271 for (i = 0; i < priv->rx_cfg.num_queues; i++) 1272 page_frag_cache_drain(&priv->rx[i].page_cache); 1273 } 1274 1275 static void gve_rx_get_curr_alloc_cfg(struct gve_priv *priv, 1276 struct gve_rx_alloc_rings_cfg *cfg) 1277 { 1278 cfg->qcfg = &priv->rx_cfg; 1279 cfg->qcfg_tx = &priv->tx_cfg; 1280 cfg->raw_addressing = !gve_is_qpl(priv); 1281 cfg->enable_header_split = priv->header_split_enabled; 1282 cfg->ring_size = priv->rx_desc_cnt; 1283 cfg->packet_buffer_size = gve_is_gqi(priv) ? 1284 GVE_DEFAULT_RX_BUFFER_SIZE : 1285 priv->data_buffer_size_dqo; 1286 cfg->rx = priv->rx; 1287 } 1288 1289 void gve_get_curr_alloc_cfgs(struct gve_priv *priv, 1290 struct gve_tx_alloc_rings_cfg *tx_alloc_cfg, 1291 struct gve_rx_alloc_rings_cfg *rx_alloc_cfg) 1292 { 1293 gve_tx_get_curr_alloc_cfg(priv, tx_alloc_cfg); 1294 gve_rx_get_curr_alloc_cfg(priv, rx_alloc_cfg); 1295 } 1296 1297 static void gve_rx_start_ring(struct gve_priv *priv, int i) 1298 { 1299 if (gve_is_gqi(priv)) 1300 gve_rx_start_ring_gqi(priv, i); 1301 else 1302 gve_rx_start_ring_dqo(priv, i); 1303 } 1304 1305 static void gve_rx_start_rings(struct gve_priv *priv, int num_rings) 1306 { 1307 int i; 1308 1309 for (i = 0; i < num_rings; i++) 1310 gve_rx_start_ring(priv, i); 1311 } 1312 1313 static void gve_rx_stop_ring(struct gve_priv *priv, int i) 1314 { 1315 if (gve_is_gqi(priv)) 1316 gve_rx_stop_ring_gqi(priv, i); 1317 else 1318 gve_rx_stop_ring_dqo(priv, i); 1319 } 1320 1321 static void gve_rx_stop_rings(struct gve_priv *priv, int num_rings) 1322 { 1323 int i; 1324 1325 if (!priv->rx) 1326 return; 1327 1328 for (i = 0; i < num_rings; i++) 1329 gve_rx_stop_ring(priv, i); 1330 } 1331 1332 static void gve_queues_mem_remove(struct gve_priv *priv) 1333 { 1334 struct gve_tx_alloc_rings_cfg tx_alloc_cfg = {0}; 1335 struct gve_rx_alloc_rings_cfg rx_alloc_cfg = {0}; 1336 1337 gve_get_curr_alloc_cfgs(priv, &tx_alloc_cfg, &rx_alloc_cfg); 1338 gve_queues_mem_free(priv, &tx_alloc_cfg, &rx_alloc_cfg); 1339 priv->tx = NULL; 1340 priv->rx = NULL; 1341 } 1342 1343 /* The passed-in queue memory is stored into priv and the queues are made live. 1344 * No memory is allocated. Passed-in memory is freed on errors. 1345 */ 1346 static int gve_queues_start(struct gve_priv *priv, 1347 struct gve_tx_alloc_rings_cfg *tx_alloc_cfg, 1348 struct gve_rx_alloc_rings_cfg *rx_alloc_cfg) 1349 { 1350 struct net_device *dev = priv->dev; 1351 int err; 1352 1353 /* Record new resources into priv */ 1354 priv->tx = tx_alloc_cfg->tx; 1355 priv->rx = rx_alloc_cfg->rx; 1356 1357 /* Record new configs into priv */ 1358 priv->tx_cfg = *tx_alloc_cfg->qcfg; 1359 priv->rx_cfg = *rx_alloc_cfg->qcfg; 1360 priv->tx_desc_cnt = tx_alloc_cfg->ring_size; 1361 priv->rx_desc_cnt = rx_alloc_cfg->ring_size; 1362 1363 if (priv->xdp_prog) 1364 priv->num_xdp_queues = priv->rx_cfg.num_queues; 1365 else 1366 priv->num_xdp_queues = 0; 1367 1368 gve_tx_start_rings(priv, 0, tx_alloc_cfg->num_rings); 1369 gve_rx_start_rings(priv, rx_alloc_cfg->qcfg->num_queues); 1370 gve_init_sync_stats(priv); 1371 1372 err = netif_set_real_num_tx_queues(dev, priv->tx_cfg.num_queues); 1373 if (err) 1374 goto stop_and_free_rings; 1375 err = netif_set_real_num_rx_queues(dev, priv->rx_cfg.num_queues); 1376 if (err) 1377 goto stop_and_free_rings; 1378 1379 err = gve_reg_xdp_info(priv, dev); 1380 if (err) 1381 goto stop_and_free_rings; 1382 1383 err = gve_register_qpls(priv); 1384 if (err) 1385 goto reset; 1386 1387 priv->header_split_enabled = rx_alloc_cfg->enable_header_split; 1388 priv->data_buffer_size_dqo = rx_alloc_cfg->packet_buffer_size; 1389 1390 err = gve_create_rings(priv); 1391 if (err) 1392 goto reset; 1393 1394 gve_set_device_rings_ok(priv); 1395 1396 if (gve_get_report_stats(priv)) 1397 mod_timer(&priv->stats_report_timer, 1398 round_jiffies(jiffies + 1399 msecs_to_jiffies(priv->stats_report_timer_period))); 1400 1401 gve_turnup(priv); 1402 queue_work(priv->gve_wq, &priv->service_task); 1403 priv->interface_up_cnt++; 1404 return 0; 1405 1406 reset: 1407 if (gve_get_reset_in_progress(priv)) 1408 goto stop_and_free_rings; 1409 gve_reset_and_teardown(priv, true); 1410 /* if this fails there is nothing we can do so just ignore the return */ 1411 gve_reset_recovery(priv, false); 1412 /* return the original error */ 1413 return err; 1414 stop_and_free_rings: 1415 gve_tx_stop_rings(priv, 0, gve_num_tx_queues(priv)); 1416 gve_rx_stop_rings(priv, priv->rx_cfg.num_queues); 1417 gve_queues_mem_remove(priv); 1418 return err; 1419 } 1420 1421 static int gve_open(struct net_device *dev) 1422 { 1423 struct gve_tx_alloc_rings_cfg tx_alloc_cfg = {0}; 1424 struct gve_rx_alloc_rings_cfg rx_alloc_cfg = {0}; 1425 struct gve_priv *priv = netdev_priv(dev); 1426 int err; 1427 1428 gve_get_curr_alloc_cfgs(priv, &tx_alloc_cfg, &rx_alloc_cfg); 1429 1430 err = gve_queues_mem_alloc(priv, &tx_alloc_cfg, &rx_alloc_cfg); 1431 if (err) 1432 return err; 1433 1434 /* No need to free on error: ownership of resources is lost after 1435 * calling gve_queues_start. 1436 */ 1437 err = gve_queues_start(priv, &tx_alloc_cfg, &rx_alloc_cfg); 1438 if (err) 1439 return err; 1440 1441 return 0; 1442 } 1443 1444 static int gve_queues_stop(struct gve_priv *priv) 1445 { 1446 int err; 1447 1448 netif_carrier_off(priv->dev); 1449 if (gve_get_device_rings_ok(priv)) { 1450 gve_turndown(priv); 1451 gve_drain_page_cache(priv); 1452 err = gve_destroy_rings(priv); 1453 if (err) 1454 goto err; 1455 err = gve_unregister_qpls(priv); 1456 if (err) 1457 goto err; 1458 gve_clear_device_rings_ok(priv); 1459 } 1460 del_timer_sync(&priv->stats_report_timer); 1461 1462 gve_unreg_xdp_info(priv); 1463 1464 gve_tx_stop_rings(priv, 0, gve_num_tx_queues(priv)); 1465 gve_rx_stop_rings(priv, priv->rx_cfg.num_queues); 1466 1467 priv->interface_down_cnt++; 1468 return 0; 1469 1470 err: 1471 /* This must have been called from a reset due to the rtnl lock 1472 * so just return at this point. 1473 */ 1474 if (gve_get_reset_in_progress(priv)) 1475 return err; 1476 /* Otherwise reset before returning */ 1477 gve_reset_and_teardown(priv, true); 1478 return gve_reset_recovery(priv, false); 1479 } 1480 1481 static int gve_close(struct net_device *dev) 1482 { 1483 struct gve_priv *priv = netdev_priv(dev); 1484 int err; 1485 1486 err = gve_queues_stop(priv); 1487 if (err) 1488 return err; 1489 1490 gve_queues_mem_remove(priv); 1491 return 0; 1492 } 1493 1494 static int gve_remove_xdp_queues(struct gve_priv *priv) 1495 { 1496 int err; 1497 1498 err = gve_destroy_xdp_rings(priv); 1499 if (err) 1500 return err; 1501 1502 err = gve_unregister_xdp_qpls(priv); 1503 if (err) 1504 return err; 1505 1506 gve_unreg_xdp_info(priv); 1507 gve_free_xdp_rings(priv); 1508 1509 priv->num_xdp_queues = 0; 1510 return 0; 1511 } 1512 1513 static int gve_add_xdp_queues(struct gve_priv *priv) 1514 { 1515 int err; 1516 1517 priv->num_xdp_queues = priv->rx_cfg.num_queues; 1518 1519 err = gve_alloc_xdp_rings(priv); 1520 if (err) 1521 goto err; 1522 1523 err = gve_reg_xdp_info(priv, priv->dev); 1524 if (err) 1525 goto free_xdp_rings; 1526 1527 err = gve_register_xdp_qpls(priv); 1528 if (err) 1529 goto free_xdp_rings; 1530 1531 err = gve_create_xdp_rings(priv); 1532 if (err) 1533 goto free_xdp_rings; 1534 1535 return 0; 1536 1537 free_xdp_rings: 1538 gve_free_xdp_rings(priv); 1539 err: 1540 priv->num_xdp_queues = 0; 1541 return err; 1542 } 1543 1544 static void gve_handle_link_status(struct gve_priv *priv, bool link_status) 1545 { 1546 if (!gve_get_napi_enabled(priv)) 1547 return; 1548 1549 if (link_status == netif_carrier_ok(priv->dev)) 1550 return; 1551 1552 if (link_status) { 1553 netdev_info(priv->dev, "Device link is up.\n"); 1554 netif_carrier_on(priv->dev); 1555 } else { 1556 netdev_info(priv->dev, "Device link is down.\n"); 1557 netif_carrier_off(priv->dev); 1558 } 1559 } 1560 1561 static int gve_set_xdp(struct gve_priv *priv, struct bpf_prog *prog, 1562 struct netlink_ext_ack *extack) 1563 { 1564 struct bpf_prog *old_prog; 1565 int err = 0; 1566 u32 status; 1567 1568 old_prog = READ_ONCE(priv->xdp_prog); 1569 if (!netif_carrier_ok(priv->dev)) { 1570 WRITE_ONCE(priv->xdp_prog, prog); 1571 if (old_prog) 1572 bpf_prog_put(old_prog); 1573 return 0; 1574 } 1575 1576 gve_turndown(priv); 1577 if (!old_prog && prog) { 1578 // Allocate XDP TX queues if an XDP program is 1579 // being installed 1580 err = gve_add_xdp_queues(priv); 1581 if (err) 1582 goto out; 1583 } else if (old_prog && !prog) { 1584 // Remove XDP TX queues if an XDP program is 1585 // being uninstalled 1586 err = gve_remove_xdp_queues(priv); 1587 if (err) 1588 goto out; 1589 } 1590 WRITE_ONCE(priv->xdp_prog, prog); 1591 if (old_prog) 1592 bpf_prog_put(old_prog); 1593 1594 out: 1595 gve_turnup(priv); 1596 status = ioread32be(&priv->reg_bar0->device_status); 1597 gve_handle_link_status(priv, GVE_DEVICE_STATUS_LINK_STATUS_MASK & status); 1598 return err; 1599 } 1600 1601 static int gve_xsk_pool_enable(struct net_device *dev, 1602 struct xsk_buff_pool *pool, 1603 u16 qid) 1604 { 1605 struct gve_priv *priv = netdev_priv(dev); 1606 struct napi_struct *napi; 1607 struct gve_rx_ring *rx; 1608 int tx_qid; 1609 int err; 1610 1611 if (qid >= priv->rx_cfg.num_queues) { 1612 dev_err(&priv->pdev->dev, "xsk pool invalid qid %d", qid); 1613 return -EINVAL; 1614 } 1615 if (xsk_pool_get_rx_frame_size(pool) < 1616 priv->dev->max_mtu + sizeof(struct ethhdr)) { 1617 dev_err(&priv->pdev->dev, "xsk pool frame_len too small"); 1618 return -EINVAL; 1619 } 1620 1621 err = xsk_pool_dma_map(pool, &priv->pdev->dev, 1622 DMA_ATTR_SKIP_CPU_SYNC | DMA_ATTR_WEAK_ORDERING); 1623 if (err) 1624 return err; 1625 1626 /* If XDP prog is not installed, return */ 1627 if (!priv->xdp_prog) 1628 return 0; 1629 1630 rx = &priv->rx[qid]; 1631 napi = &priv->ntfy_blocks[rx->ntfy_id].napi; 1632 err = xdp_rxq_info_reg(&rx->xsk_rxq, dev, qid, napi->napi_id); 1633 if (err) 1634 goto err; 1635 1636 err = xdp_rxq_info_reg_mem_model(&rx->xsk_rxq, 1637 MEM_TYPE_XSK_BUFF_POOL, NULL); 1638 if (err) 1639 goto err; 1640 1641 xsk_pool_set_rxq_info(pool, &rx->xsk_rxq); 1642 rx->xsk_pool = pool; 1643 1644 tx_qid = gve_xdp_tx_queue_id(priv, qid); 1645 priv->tx[tx_qid].xsk_pool = pool; 1646 1647 return 0; 1648 err: 1649 if (xdp_rxq_info_is_reg(&rx->xsk_rxq)) 1650 xdp_rxq_info_unreg(&rx->xsk_rxq); 1651 1652 xsk_pool_dma_unmap(pool, 1653 DMA_ATTR_SKIP_CPU_SYNC | DMA_ATTR_WEAK_ORDERING); 1654 return err; 1655 } 1656 1657 static int gve_xsk_pool_disable(struct net_device *dev, 1658 u16 qid) 1659 { 1660 struct gve_priv *priv = netdev_priv(dev); 1661 struct napi_struct *napi_rx; 1662 struct napi_struct *napi_tx; 1663 struct xsk_buff_pool *pool; 1664 int tx_qid; 1665 1666 pool = xsk_get_pool_from_qid(dev, qid); 1667 if (!pool) 1668 return -EINVAL; 1669 if (qid >= priv->rx_cfg.num_queues) 1670 return -EINVAL; 1671 1672 /* If XDP prog is not installed, unmap DMA and return */ 1673 if (!priv->xdp_prog) 1674 goto done; 1675 1676 tx_qid = gve_xdp_tx_queue_id(priv, qid); 1677 if (!netif_running(dev)) { 1678 priv->rx[qid].xsk_pool = NULL; 1679 xdp_rxq_info_unreg(&priv->rx[qid].xsk_rxq); 1680 priv->tx[tx_qid].xsk_pool = NULL; 1681 goto done; 1682 } 1683 1684 napi_rx = &priv->ntfy_blocks[priv->rx[qid].ntfy_id].napi; 1685 napi_disable(napi_rx); /* make sure current rx poll is done */ 1686 1687 napi_tx = &priv->ntfy_blocks[priv->tx[tx_qid].ntfy_id].napi; 1688 napi_disable(napi_tx); /* make sure current tx poll is done */ 1689 1690 priv->rx[qid].xsk_pool = NULL; 1691 xdp_rxq_info_unreg(&priv->rx[qid].xsk_rxq); 1692 priv->tx[tx_qid].xsk_pool = NULL; 1693 smp_mb(); /* Make sure it is visible to the workers on datapath */ 1694 1695 napi_enable(napi_rx); 1696 if (gve_rx_work_pending(&priv->rx[qid])) 1697 napi_schedule(napi_rx); 1698 1699 napi_enable(napi_tx); 1700 if (gve_tx_clean_pending(priv, &priv->tx[tx_qid])) 1701 napi_schedule(napi_tx); 1702 1703 done: 1704 xsk_pool_dma_unmap(pool, 1705 DMA_ATTR_SKIP_CPU_SYNC | DMA_ATTR_WEAK_ORDERING); 1706 return 0; 1707 } 1708 1709 static int gve_xsk_wakeup(struct net_device *dev, u32 queue_id, u32 flags) 1710 { 1711 struct gve_priv *priv = netdev_priv(dev); 1712 int tx_queue_id = gve_xdp_tx_queue_id(priv, queue_id); 1713 1714 if (queue_id >= priv->rx_cfg.num_queues || !priv->xdp_prog) 1715 return -EINVAL; 1716 1717 if (flags & XDP_WAKEUP_TX) { 1718 struct gve_tx_ring *tx = &priv->tx[tx_queue_id]; 1719 struct napi_struct *napi = 1720 &priv->ntfy_blocks[tx->ntfy_id].napi; 1721 1722 if (!napi_if_scheduled_mark_missed(napi)) { 1723 /* Call local_bh_enable to trigger SoftIRQ processing */ 1724 local_bh_disable(); 1725 napi_schedule(napi); 1726 local_bh_enable(); 1727 } 1728 1729 tx->xdp_xsk_wakeup++; 1730 } 1731 1732 return 0; 1733 } 1734 1735 static int verify_xdp_configuration(struct net_device *dev) 1736 { 1737 struct gve_priv *priv = netdev_priv(dev); 1738 1739 if (dev->features & NETIF_F_LRO) { 1740 netdev_warn(dev, "XDP is not supported when LRO is on.\n"); 1741 return -EOPNOTSUPP; 1742 } 1743 1744 if (priv->queue_format != GVE_GQI_QPL_FORMAT) { 1745 netdev_warn(dev, "XDP is not supported in mode %d.\n", 1746 priv->queue_format); 1747 return -EOPNOTSUPP; 1748 } 1749 1750 if (dev->mtu > GVE_DEFAULT_RX_BUFFER_SIZE - sizeof(struct ethhdr) - GVE_RX_PAD) { 1751 netdev_warn(dev, "XDP is not supported for mtu %d.\n", 1752 dev->mtu); 1753 return -EOPNOTSUPP; 1754 } 1755 1756 if (priv->rx_cfg.num_queues != priv->tx_cfg.num_queues || 1757 (2 * priv->tx_cfg.num_queues > priv->tx_cfg.max_queues)) { 1758 netdev_warn(dev, "XDP load failed: The number of configured RX queues %d should be equal to the number of configured TX queues %d and the number of configured RX/TX queues should be less than or equal to half the maximum number of RX/TX queues %d", 1759 priv->rx_cfg.num_queues, 1760 priv->tx_cfg.num_queues, 1761 priv->tx_cfg.max_queues); 1762 return -EINVAL; 1763 } 1764 return 0; 1765 } 1766 1767 static int gve_xdp(struct net_device *dev, struct netdev_bpf *xdp) 1768 { 1769 struct gve_priv *priv = netdev_priv(dev); 1770 int err; 1771 1772 err = verify_xdp_configuration(dev); 1773 if (err) 1774 return err; 1775 switch (xdp->command) { 1776 case XDP_SETUP_PROG: 1777 return gve_set_xdp(priv, xdp->prog, xdp->extack); 1778 case XDP_SETUP_XSK_POOL: 1779 if (xdp->xsk.pool) 1780 return gve_xsk_pool_enable(dev, xdp->xsk.pool, xdp->xsk.queue_id); 1781 else 1782 return gve_xsk_pool_disable(dev, xdp->xsk.queue_id); 1783 default: 1784 return -EINVAL; 1785 } 1786 } 1787 1788 int gve_flow_rules_reset(struct gve_priv *priv) 1789 { 1790 if (!priv->max_flow_rules) 1791 return 0; 1792 1793 return gve_adminq_reset_flow_rules(priv); 1794 } 1795 1796 int gve_adjust_config(struct gve_priv *priv, 1797 struct gve_tx_alloc_rings_cfg *tx_alloc_cfg, 1798 struct gve_rx_alloc_rings_cfg *rx_alloc_cfg) 1799 { 1800 int err; 1801 1802 /* Allocate resources for the new confiugration */ 1803 err = gve_queues_mem_alloc(priv, tx_alloc_cfg, rx_alloc_cfg); 1804 if (err) { 1805 netif_err(priv, drv, priv->dev, 1806 "Adjust config failed to alloc new queues"); 1807 return err; 1808 } 1809 1810 /* Teardown the device and free existing resources */ 1811 err = gve_close(priv->dev); 1812 if (err) { 1813 netif_err(priv, drv, priv->dev, 1814 "Adjust config failed to close old queues"); 1815 gve_queues_mem_free(priv, tx_alloc_cfg, rx_alloc_cfg); 1816 return err; 1817 } 1818 1819 /* Bring the device back up again with the new resources. */ 1820 err = gve_queues_start(priv, tx_alloc_cfg, rx_alloc_cfg); 1821 if (err) { 1822 netif_err(priv, drv, priv->dev, 1823 "Adjust config failed to start new queues, !!! DISABLING ALL QUEUES !!!\n"); 1824 /* No need to free on error: ownership of resources is lost after 1825 * calling gve_queues_start. 1826 */ 1827 gve_turndown(priv); 1828 return err; 1829 } 1830 1831 return 0; 1832 } 1833 1834 int gve_adjust_queues(struct gve_priv *priv, 1835 struct gve_queue_config new_rx_config, 1836 struct gve_queue_config new_tx_config) 1837 { 1838 struct gve_tx_alloc_rings_cfg tx_alloc_cfg = {0}; 1839 struct gve_rx_alloc_rings_cfg rx_alloc_cfg = {0}; 1840 int err; 1841 1842 gve_get_curr_alloc_cfgs(priv, &tx_alloc_cfg, &rx_alloc_cfg); 1843 1844 /* Relay the new config from ethtool */ 1845 tx_alloc_cfg.qcfg = &new_tx_config; 1846 rx_alloc_cfg.qcfg_tx = &new_tx_config; 1847 rx_alloc_cfg.qcfg = &new_rx_config; 1848 tx_alloc_cfg.num_rings = new_tx_config.num_queues; 1849 1850 if (netif_carrier_ok(priv->dev)) { 1851 err = gve_adjust_config(priv, &tx_alloc_cfg, &rx_alloc_cfg); 1852 return err; 1853 } 1854 /* Set the config for the next up. */ 1855 priv->tx_cfg = new_tx_config; 1856 priv->rx_cfg = new_rx_config; 1857 1858 return 0; 1859 } 1860 1861 static void gve_turndown(struct gve_priv *priv) 1862 { 1863 int idx; 1864 1865 if (netif_carrier_ok(priv->dev)) 1866 netif_carrier_off(priv->dev); 1867 1868 if (!gve_get_napi_enabled(priv)) 1869 return; 1870 1871 /* Disable napi to prevent more work from coming in */ 1872 for (idx = 0; idx < gve_num_tx_queues(priv); idx++) { 1873 int ntfy_idx = gve_tx_idx_to_ntfy(priv, idx); 1874 struct gve_notify_block *block = &priv->ntfy_blocks[ntfy_idx]; 1875 1876 if (!gve_tx_was_added_to_block(priv, idx)) 1877 continue; 1878 napi_disable(&block->napi); 1879 } 1880 for (idx = 0; idx < priv->rx_cfg.num_queues; idx++) { 1881 int ntfy_idx = gve_rx_idx_to_ntfy(priv, idx); 1882 struct gve_notify_block *block = &priv->ntfy_blocks[ntfy_idx]; 1883 1884 if (!gve_rx_was_added_to_block(priv, idx)) 1885 continue; 1886 napi_disable(&block->napi); 1887 } 1888 1889 /* Stop tx queues */ 1890 netif_tx_disable(priv->dev); 1891 1892 gve_clear_napi_enabled(priv); 1893 gve_clear_report_stats(priv); 1894 } 1895 1896 static void gve_turnup(struct gve_priv *priv) 1897 { 1898 int idx; 1899 1900 /* Start the tx queues */ 1901 netif_tx_start_all_queues(priv->dev); 1902 1903 /* Enable napi and unmask interrupts for all queues */ 1904 for (idx = 0; idx < gve_num_tx_queues(priv); idx++) { 1905 int ntfy_idx = gve_tx_idx_to_ntfy(priv, idx); 1906 struct gve_notify_block *block = &priv->ntfy_blocks[ntfy_idx]; 1907 1908 if (!gve_tx_was_added_to_block(priv, idx)) 1909 continue; 1910 1911 napi_enable(&block->napi); 1912 if (gve_is_gqi(priv)) { 1913 iowrite32be(0, gve_irq_doorbell(priv, block)); 1914 } else { 1915 gve_set_itr_coalesce_usecs_dqo(priv, block, 1916 priv->tx_coalesce_usecs); 1917 } 1918 1919 /* Any descs written by the NIC before this barrier will be 1920 * handled by the one-off napi schedule below. Whereas any 1921 * descs after the barrier will generate interrupts. 1922 */ 1923 mb(); 1924 napi_schedule(&block->napi); 1925 } 1926 for (idx = 0; idx < priv->rx_cfg.num_queues; idx++) { 1927 int ntfy_idx = gve_rx_idx_to_ntfy(priv, idx); 1928 struct gve_notify_block *block = &priv->ntfy_blocks[ntfy_idx]; 1929 1930 if (!gve_rx_was_added_to_block(priv, idx)) 1931 continue; 1932 1933 napi_enable(&block->napi); 1934 if (gve_is_gqi(priv)) { 1935 iowrite32be(0, gve_irq_doorbell(priv, block)); 1936 } else { 1937 gve_set_itr_coalesce_usecs_dqo(priv, block, 1938 priv->rx_coalesce_usecs); 1939 } 1940 1941 /* Any descs written by the NIC before this barrier will be 1942 * handled by the one-off napi schedule below. Whereas any 1943 * descs after the barrier will generate interrupts. 1944 */ 1945 mb(); 1946 napi_schedule(&block->napi); 1947 } 1948 1949 gve_set_napi_enabled(priv); 1950 } 1951 1952 static void gve_turnup_and_check_status(struct gve_priv *priv) 1953 { 1954 u32 status; 1955 1956 gve_turnup(priv); 1957 status = ioread32be(&priv->reg_bar0->device_status); 1958 gve_handle_link_status(priv, GVE_DEVICE_STATUS_LINK_STATUS_MASK & status); 1959 } 1960 1961 static void gve_tx_timeout(struct net_device *dev, unsigned int txqueue) 1962 { 1963 struct gve_notify_block *block; 1964 struct gve_tx_ring *tx = NULL; 1965 struct gve_priv *priv; 1966 u32 last_nic_done; 1967 u32 current_time; 1968 u32 ntfy_idx; 1969 1970 netdev_info(dev, "Timeout on tx queue, %d", txqueue); 1971 priv = netdev_priv(dev); 1972 if (txqueue > priv->tx_cfg.num_queues) 1973 goto reset; 1974 1975 ntfy_idx = gve_tx_idx_to_ntfy(priv, txqueue); 1976 if (ntfy_idx >= priv->num_ntfy_blks) 1977 goto reset; 1978 1979 block = &priv->ntfy_blocks[ntfy_idx]; 1980 tx = block->tx; 1981 1982 current_time = jiffies_to_msecs(jiffies); 1983 if (tx->last_kick_msec + MIN_TX_TIMEOUT_GAP > current_time) 1984 goto reset; 1985 1986 /* Check to see if there are missed completions, which will allow us to 1987 * kick the queue. 1988 */ 1989 last_nic_done = gve_tx_load_event_counter(priv, tx); 1990 if (last_nic_done - tx->done) { 1991 netdev_info(dev, "Kicking queue %d", txqueue); 1992 iowrite32be(GVE_IRQ_MASK, gve_irq_doorbell(priv, block)); 1993 napi_schedule(&block->napi); 1994 tx->last_kick_msec = current_time; 1995 goto out; 1996 } // Else reset. 1997 1998 reset: 1999 gve_schedule_reset(priv); 2000 2001 out: 2002 if (tx) 2003 tx->queue_timeout++; 2004 priv->tx_timeo_cnt++; 2005 } 2006 2007 u16 gve_get_pkt_buf_size(const struct gve_priv *priv, bool enable_hsplit) 2008 { 2009 if (enable_hsplit && priv->max_rx_buffer_size >= GVE_MAX_RX_BUFFER_SIZE) 2010 return GVE_MAX_RX_BUFFER_SIZE; 2011 else 2012 return GVE_DEFAULT_RX_BUFFER_SIZE; 2013 } 2014 2015 /* header-split is not supported on non-DQO_RDA yet even if device advertises it */ 2016 bool gve_header_split_supported(const struct gve_priv *priv) 2017 { 2018 return priv->header_buf_size && priv->queue_format == GVE_DQO_RDA_FORMAT; 2019 } 2020 2021 int gve_set_hsplit_config(struct gve_priv *priv, u8 tcp_data_split) 2022 { 2023 struct gve_tx_alloc_rings_cfg tx_alloc_cfg = {0}; 2024 struct gve_rx_alloc_rings_cfg rx_alloc_cfg = {0}; 2025 bool enable_hdr_split; 2026 int err = 0; 2027 2028 if (tcp_data_split == ETHTOOL_TCP_DATA_SPLIT_UNKNOWN) 2029 return 0; 2030 2031 if (!gve_header_split_supported(priv)) { 2032 dev_err(&priv->pdev->dev, "Header-split not supported\n"); 2033 return -EOPNOTSUPP; 2034 } 2035 2036 if (tcp_data_split == ETHTOOL_TCP_DATA_SPLIT_ENABLED) 2037 enable_hdr_split = true; 2038 else 2039 enable_hdr_split = false; 2040 2041 if (enable_hdr_split == priv->header_split_enabled) 2042 return 0; 2043 2044 gve_get_curr_alloc_cfgs(priv, &tx_alloc_cfg, &rx_alloc_cfg); 2045 2046 rx_alloc_cfg.enable_header_split = enable_hdr_split; 2047 rx_alloc_cfg.packet_buffer_size = gve_get_pkt_buf_size(priv, enable_hdr_split); 2048 2049 if (netif_running(priv->dev)) 2050 err = gve_adjust_config(priv, &tx_alloc_cfg, &rx_alloc_cfg); 2051 return err; 2052 } 2053 2054 static int gve_set_features(struct net_device *netdev, 2055 netdev_features_t features) 2056 { 2057 const netdev_features_t orig_features = netdev->features; 2058 struct gve_tx_alloc_rings_cfg tx_alloc_cfg = {0}; 2059 struct gve_rx_alloc_rings_cfg rx_alloc_cfg = {0}; 2060 struct gve_priv *priv = netdev_priv(netdev); 2061 int err; 2062 2063 gve_get_curr_alloc_cfgs(priv, &tx_alloc_cfg, &rx_alloc_cfg); 2064 2065 if ((netdev->features & NETIF_F_LRO) != (features & NETIF_F_LRO)) { 2066 netdev->features ^= NETIF_F_LRO; 2067 if (netif_carrier_ok(netdev)) { 2068 err = gve_adjust_config(priv, &tx_alloc_cfg, &rx_alloc_cfg); 2069 if (err) 2070 goto revert_features; 2071 } 2072 } 2073 if ((netdev->features & NETIF_F_NTUPLE) && !(features & NETIF_F_NTUPLE)) { 2074 err = gve_flow_rules_reset(priv); 2075 if (err) 2076 goto revert_features; 2077 } 2078 2079 return 0; 2080 2081 revert_features: 2082 netdev->features = orig_features; 2083 return err; 2084 } 2085 2086 static const struct net_device_ops gve_netdev_ops = { 2087 .ndo_start_xmit = gve_start_xmit, 2088 .ndo_features_check = gve_features_check, 2089 .ndo_open = gve_open, 2090 .ndo_stop = gve_close, 2091 .ndo_get_stats64 = gve_get_stats, 2092 .ndo_tx_timeout = gve_tx_timeout, 2093 .ndo_set_features = gve_set_features, 2094 .ndo_bpf = gve_xdp, 2095 .ndo_xdp_xmit = gve_xdp_xmit, 2096 .ndo_xsk_wakeup = gve_xsk_wakeup, 2097 }; 2098 2099 static void gve_handle_status(struct gve_priv *priv, u32 status) 2100 { 2101 if (GVE_DEVICE_STATUS_RESET_MASK & status) { 2102 dev_info(&priv->pdev->dev, "Device requested reset.\n"); 2103 gve_set_do_reset(priv); 2104 } 2105 if (GVE_DEVICE_STATUS_REPORT_STATS_MASK & status) { 2106 priv->stats_report_trigger_cnt++; 2107 gve_set_do_report_stats(priv); 2108 } 2109 } 2110 2111 static void gve_handle_reset(struct gve_priv *priv) 2112 { 2113 /* A service task will be scheduled at the end of probe to catch any 2114 * resets that need to happen, and we don't want to reset until 2115 * probe is done. 2116 */ 2117 if (gve_get_probe_in_progress(priv)) 2118 return; 2119 2120 if (gve_get_do_reset(priv)) { 2121 rtnl_lock(); 2122 gve_reset(priv, false); 2123 rtnl_unlock(); 2124 } 2125 } 2126 2127 void gve_handle_report_stats(struct gve_priv *priv) 2128 { 2129 struct stats *stats = priv->stats_report->stats; 2130 int idx, stats_idx = 0; 2131 unsigned int start = 0; 2132 u64 tx_bytes; 2133 2134 if (!gve_get_report_stats(priv)) 2135 return; 2136 2137 be64_add_cpu(&priv->stats_report->written_count, 1); 2138 /* tx stats */ 2139 if (priv->tx) { 2140 for (idx = 0; idx < gve_num_tx_queues(priv); idx++) { 2141 u32 last_completion = 0; 2142 u32 tx_frames = 0; 2143 2144 /* DQO doesn't currently support these metrics. */ 2145 if (gve_is_gqi(priv)) { 2146 last_completion = priv->tx[idx].done; 2147 tx_frames = priv->tx[idx].req; 2148 } 2149 2150 do { 2151 start = u64_stats_fetch_begin(&priv->tx[idx].statss); 2152 tx_bytes = priv->tx[idx].bytes_done; 2153 } while (u64_stats_fetch_retry(&priv->tx[idx].statss, start)); 2154 stats[stats_idx++] = (struct stats) { 2155 .stat_name = cpu_to_be32(TX_WAKE_CNT), 2156 .value = cpu_to_be64(priv->tx[idx].wake_queue), 2157 .queue_id = cpu_to_be32(idx), 2158 }; 2159 stats[stats_idx++] = (struct stats) { 2160 .stat_name = cpu_to_be32(TX_STOP_CNT), 2161 .value = cpu_to_be64(priv->tx[idx].stop_queue), 2162 .queue_id = cpu_to_be32(idx), 2163 }; 2164 stats[stats_idx++] = (struct stats) { 2165 .stat_name = cpu_to_be32(TX_FRAMES_SENT), 2166 .value = cpu_to_be64(tx_frames), 2167 .queue_id = cpu_to_be32(idx), 2168 }; 2169 stats[stats_idx++] = (struct stats) { 2170 .stat_name = cpu_to_be32(TX_BYTES_SENT), 2171 .value = cpu_to_be64(tx_bytes), 2172 .queue_id = cpu_to_be32(idx), 2173 }; 2174 stats[stats_idx++] = (struct stats) { 2175 .stat_name = cpu_to_be32(TX_LAST_COMPLETION_PROCESSED), 2176 .value = cpu_to_be64(last_completion), 2177 .queue_id = cpu_to_be32(idx), 2178 }; 2179 stats[stats_idx++] = (struct stats) { 2180 .stat_name = cpu_to_be32(TX_TIMEOUT_CNT), 2181 .value = cpu_to_be64(priv->tx[idx].queue_timeout), 2182 .queue_id = cpu_to_be32(idx), 2183 }; 2184 } 2185 } 2186 /* rx stats */ 2187 if (priv->rx) { 2188 for (idx = 0; idx < priv->rx_cfg.num_queues; idx++) { 2189 stats[stats_idx++] = (struct stats) { 2190 .stat_name = cpu_to_be32(RX_NEXT_EXPECTED_SEQUENCE), 2191 .value = cpu_to_be64(priv->rx[idx].desc.seqno), 2192 .queue_id = cpu_to_be32(idx), 2193 }; 2194 stats[stats_idx++] = (struct stats) { 2195 .stat_name = cpu_to_be32(RX_BUFFERS_POSTED), 2196 .value = cpu_to_be64(priv->rx[0].fill_cnt), 2197 .queue_id = cpu_to_be32(idx), 2198 }; 2199 } 2200 } 2201 } 2202 2203 /* Handle NIC status register changes, reset requests and report stats */ 2204 static void gve_service_task(struct work_struct *work) 2205 { 2206 struct gve_priv *priv = container_of(work, struct gve_priv, 2207 service_task); 2208 u32 status = ioread32be(&priv->reg_bar0->device_status); 2209 2210 gve_handle_status(priv, status); 2211 2212 gve_handle_reset(priv); 2213 gve_handle_link_status(priv, GVE_DEVICE_STATUS_LINK_STATUS_MASK & status); 2214 } 2215 2216 static void gve_set_netdev_xdp_features(struct gve_priv *priv) 2217 { 2218 if (priv->queue_format == GVE_GQI_QPL_FORMAT) { 2219 priv->dev->xdp_features = NETDEV_XDP_ACT_BASIC; 2220 priv->dev->xdp_features |= NETDEV_XDP_ACT_REDIRECT; 2221 priv->dev->xdp_features |= NETDEV_XDP_ACT_NDO_XMIT; 2222 priv->dev->xdp_features |= NETDEV_XDP_ACT_XSK_ZEROCOPY; 2223 } else { 2224 priv->dev->xdp_features = 0; 2225 } 2226 } 2227 2228 static int gve_init_priv(struct gve_priv *priv, bool skip_describe_device) 2229 { 2230 int num_ntfy; 2231 int err; 2232 2233 /* Set up the adminq */ 2234 err = gve_adminq_alloc(&priv->pdev->dev, priv); 2235 if (err) { 2236 dev_err(&priv->pdev->dev, 2237 "Failed to alloc admin queue: err=%d\n", err); 2238 return err; 2239 } 2240 2241 err = gve_verify_driver_compatibility(priv); 2242 if (err) { 2243 dev_err(&priv->pdev->dev, 2244 "Could not verify driver compatibility: err=%d\n", err); 2245 goto err; 2246 } 2247 2248 priv->num_registered_pages = 0; 2249 2250 if (skip_describe_device) 2251 goto setup_device; 2252 2253 priv->queue_format = GVE_QUEUE_FORMAT_UNSPECIFIED; 2254 /* Get the initial information we need from the device */ 2255 err = gve_adminq_describe_device(priv); 2256 if (err) { 2257 dev_err(&priv->pdev->dev, 2258 "Could not get device information: err=%d\n", err); 2259 goto err; 2260 } 2261 priv->dev->mtu = priv->dev->max_mtu; 2262 num_ntfy = pci_msix_vec_count(priv->pdev); 2263 if (num_ntfy <= 0) { 2264 dev_err(&priv->pdev->dev, 2265 "could not count MSI-x vectors: err=%d\n", num_ntfy); 2266 err = num_ntfy; 2267 goto err; 2268 } else if (num_ntfy < GVE_MIN_MSIX) { 2269 dev_err(&priv->pdev->dev, "gve needs at least %d MSI-x vectors, but only has %d\n", 2270 GVE_MIN_MSIX, num_ntfy); 2271 err = -EINVAL; 2272 goto err; 2273 } 2274 2275 /* Big TCP is only supported on DQ*/ 2276 if (!gve_is_gqi(priv)) 2277 netif_set_tso_max_size(priv->dev, GVE_DQO_TX_MAX); 2278 2279 priv->rx_copybreak = GVE_DEFAULT_RX_COPYBREAK; 2280 /* gvnic has one Notification Block per MSI-x vector, except for the 2281 * management vector 2282 */ 2283 priv->num_ntfy_blks = (num_ntfy - 1) & ~0x1; 2284 priv->mgmt_msix_idx = priv->num_ntfy_blks; 2285 2286 priv->tx_cfg.max_queues = 2287 min_t(int, priv->tx_cfg.max_queues, priv->num_ntfy_blks / 2); 2288 priv->rx_cfg.max_queues = 2289 min_t(int, priv->rx_cfg.max_queues, priv->num_ntfy_blks / 2); 2290 2291 priv->tx_cfg.num_queues = priv->tx_cfg.max_queues; 2292 priv->rx_cfg.num_queues = priv->rx_cfg.max_queues; 2293 if (priv->default_num_queues > 0) { 2294 priv->tx_cfg.num_queues = min_t(int, priv->default_num_queues, 2295 priv->tx_cfg.num_queues); 2296 priv->rx_cfg.num_queues = min_t(int, priv->default_num_queues, 2297 priv->rx_cfg.num_queues); 2298 } 2299 2300 dev_info(&priv->pdev->dev, "TX queues %d, RX queues %d\n", 2301 priv->tx_cfg.num_queues, priv->rx_cfg.num_queues); 2302 dev_info(&priv->pdev->dev, "Max TX queues %d, Max RX queues %d\n", 2303 priv->tx_cfg.max_queues, priv->rx_cfg.max_queues); 2304 2305 if (!gve_is_gqi(priv)) { 2306 priv->tx_coalesce_usecs = GVE_TX_IRQ_RATELIMIT_US_DQO; 2307 priv->rx_coalesce_usecs = GVE_RX_IRQ_RATELIMIT_US_DQO; 2308 } 2309 2310 setup_device: 2311 gve_set_netdev_xdp_features(priv); 2312 err = gve_setup_device_resources(priv); 2313 if (!err) 2314 return 0; 2315 err: 2316 gve_adminq_free(&priv->pdev->dev, priv); 2317 return err; 2318 } 2319 2320 static void gve_teardown_priv_resources(struct gve_priv *priv) 2321 { 2322 gve_teardown_device_resources(priv); 2323 gve_adminq_free(&priv->pdev->dev, priv); 2324 } 2325 2326 static void gve_trigger_reset(struct gve_priv *priv) 2327 { 2328 /* Reset the device by releasing the AQ */ 2329 gve_adminq_release(priv); 2330 } 2331 2332 static void gve_reset_and_teardown(struct gve_priv *priv, bool was_up) 2333 { 2334 gve_trigger_reset(priv); 2335 /* With the reset having already happened, close cannot fail */ 2336 if (was_up) 2337 gve_close(priv->dev); 2338 gve_teardown_priv_resources(priv); 2339 } 2340 2341 static int gve_reset_recovery(struct gve_priv *priv, bool was_up) 2342 { 2343 int err; 2344 2345 err = gve_init_priv(priv, true); 2346 if (err) 2347 goto err; 2348 if (was_up) { 2349 err = gve_open(priv->dev); 2350 if (err) 2351 goto err; 2352 } 2353 return 0; 2354 err: 2355 dev_err(&priv->pdev->dev, "Reset failed! !!! DISABLING ALL QUEUES !!!\n"); 2356 gve_turndown(priv); 2357 return err; 2358 } 2359 2360 int gve_reset(struct gve_priv *priv, bool attempt_teardown) 2361 { 2362 bool was_up = netif_carrier_ok(priv->dev); 2363 int err; 2364 2365 dev_info(&priv->pdev->dev, "Performing reset\n"); 2366 gve_clear_do_reset(priv); 2367 gve_set_reset_in_progress(priv); 2368 /* If we aren't attempting to teardown normally, just go turndown and 2369 * reset right away. 2370 */ 2371 if (!attempt_teardown) { 2372 gve_turndown(priv); 2373 gve_reset_and_teardown(priv, was_up); 2374 } else { 2375 /* Otherwise attempt to close normally */ 2376 if (was_up) { 2377 err = gve_close(priv->dev); 2378 /* If that fails reset as we did above */ 2379 if (err) 2380 gve_reset_and_teardown(priv, was_up); 2381 } 2382 /* Clean up any remaining resources */ 2383 gve_teardown_priv_resources(priv); 2384 } 2385 2386 /* Set it all back up */ 2387 err = gve_reset_recovery(priv, was_up); 2388 gve_clear_reset_in_progress(priv); 2389 priv->reset_cnt++; 2390 priv->interface_up_cnt = 0; 2391 priv->interface_down_cnt = 0; 2392 priv->stats_report_trigger_cnt = 0; 2393 return err; 2394 } 2395 2396 static void gve_write_version(u8 __iomem *driver_version_register) 2397 { 2398 const char *c = gve_version_prefix; 2399 2400 while (*c) { 2401 writeb(*c, driver_version_register); 2402 c++; 2403 } 2404 2405 c = gve_version_str; 2406 while (*c) { 2407 writeb(*c, driver_version_register); 2408 c++; 2409 } 2410 writeb('\n', driver_version_register); 2411 } 2412 2413 static int gve_rx_queue_stop(struct net_device *dev, void *per_q_mem, int idx) 2414 { 2415 struct gve_priv *priv = netdev_priv(dev); 2416 struct gve_rx_ring *gve_per_q_mem; 2417 int err; 2418 2419 if (!priv->rx) 2420 return -EAGAIN; 2421 2422 /* Destroying queue 0 while other queues exist is not supported in DQO */ 2423 if (!gve_is_gqi(priv) && idx == 0) 2424 return -ERANGE; 2425 2426 /* Single-queue destruction requires quiescence on all queues */ 2427 gve_turndown(priv); 2428 2429 /* This failure will trigger a reset - no need to clean up */ 2430 err = gve_adminq_destroy_single_rx_queue(priv, idx); 2431 if (err) 2432 return err; 2433 2434 if (gve_is_qpl(priv)) { 2435 /* This failure will trigger a reset - no need to clean up */ 2436 err = gve_unregister_qpl(priv, gve_rx_get_qpl(priv, idx)); 2437 if (err) 2438 return err; 2439 } 2440 2441 gve_rx_stop_ring(priv, idx); 2442 2443 /* Turn the unstopped queues back up */ 2444 gve_turnup_and_check_status(priv); 2445 2446 gve_per_q_mem = (struct gve_rx_ring *)per_q_mem; 2447 *gve_per_q_mem = priv->rx[idx]; 2448 memset(&priv->rx[idx], 0, sizeof(priv->rx[idx])); 2449 return 0; 2450 } 2451 2452 static void gve_rx_queue_mem_free(struct net_device *dev, void *per_q_mem) 2453 { 2454 struct gve_priv *priv = netdev_priv(dev); 2455 struct gve_rx_alloc_rings_cfg cfg = {0}; 2456 struct gve_rx_ring *gve_per_q_mem; 2457 2458 gve_per_q_mem = (struct gve_rx_ring *)per_q_mem; 2459 gve_rx_get_curr_alloc_cfg(priv, &cfg); 2460 2461 if (gve_is_gqi(priv)) 2462 gve_rx_free_ring_gqi(priv, gve_per_q_mem, &cfg); 2463 else 2464 gve_rx_free_ring_dqo(priv, gve_per_q_mem, &cfg); 2465 } 2466 2467 static int gve_rx_queue_mem_alloc(struct net_device *dev, void *per_q_mem, 2468 int idx) 2469 { 2470 struct gve_priv *priv = netdev_priv(dev); 2471 struct gve_rx_alloc_rings_cfg cfg = {0}; 2472 struct gve_rx_ring *gve_per_q_mem; 2473 int err; 2474 2475 if (!priv->rx) 2476 return -EAGAIN; 2477 2478 gve_per_q_mem = (struct gve_rx_ring *)per_q_mem; 2479 gve_rx_get_curr_alloc_cfg(priv, &cfg); 2480 2481 if (gve_is_gqi(priv)) 2482 err = gve_rx_alloc_ring_gqi(priv, &cfg, gve_per_q_mem, idx); 2483 else 2484 err = gve_rx_alloc_ring_dqo(priv, &cfg, gve_per_q_mem, idx); 2485 2486 return err; 2487 } 2488 2489 static int gve_rx_queue_start(struct net_device *dev, void *per_q_mem, int idx) 2490 { 2491 struct gve_priv *priv = netdev_priv(dev); 2492 struct gve_rx_ring *gve_per_q_mem; 2493 int err; 2494 2495 if (!priv->rx) 2496 return -EAGAIN; 2497 2498 gve_per_q_mem = (struct gve_rx_ring *)per_q_mem; 2499 priv->rx[idx] = *gve_per_q_mem; 2500 2501 /* Single-queue creation requires quiescence on all queues */ 2502 gve_turndown(priv); 2503 2504 gve_rx_start_ring(priv, idx); 2505 2506 if (gve_is_qpl(priv)) { 2507 /* This failure will trigger a reset - no need to clean up */ 2508 err = gve_register_qpl(priv, gve_rx_get_qpl(priv, idx)); 2509 if (err) 2510 goto abort; 2511 } 2512 2513 /* This failure will trigger a reset - no need to clean up */ 2514 err = gve_adminq_create_single_rx_queue(priv, idx); 2515 if (err) 2516 goto abort; 2517 2518 if (gve_is_gqi(priv)) 2519 gve_rx_write_doorbell(priv, &priv->rx[idx]); 2520 else 2521 gve_rx_post_buffers_dqo(&priv->rx[idx]); 2522 2523 /* Turn the unstopped queues back up */ 2524 gve_turnup_and_check_status(priv); 2525 return 0; 2526 2527 abort: 2528 gve_rx_stop_ring(priv, idx); 2529 2530 /* All failures in this func result in a reset, by clearing the struct 2531 * at idx, we prevent a double free when that reset runs. The reset, 2532 * which needs the rtnl lock, will not run till this func returns and 2533 * its caller gives up the lock. 2534 */ 2535 memset(&priv->rx[idx], 0, sizeof(priv->rx[idx])); 2536 return err; 2537 } 2538 2539 static const struct netdev_queue_mgmt_ops gve_queue_mgmt_ops = { 2540 .ndo_queue_mem_size = sizeof(struct gve_rx_ring), 2541 .ndo_queue_mem_alloc = gve_rx_queue_mem_alloc, 2542 .ndo_queue_mem_free = gve_rx_queue_mem_free, 2543 .ndo_queue_start = gve_rx_queue_start, 2544 .ndo_queue_stop = gve_rx_queue_stop, 2545 }; 2546 2547 static int gve_probe(struct pci_dev *pdev, const struct pci_device_id *ent) 2548 { 2549 int max_tx_queues, max_rx_queues; 2550 struct net_device *dev; 2551 __be32 __iomem *db_bar; 2552 struct gve_registers __iomem *reg_bar; 2553 struct gve_priv *priv; 2554 int err; 2555 2556 err = pci_enable_device(pdev); 2557 if (err) 2558 return err; 2559 2560 err = pci_request_regions(pdev, gve_driver_name); 2561 if (err) 2562 goto abort_with_enabled; 2563 2564 pci_set_master(pdev); 2565 2566 err = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(64)); 2567 if (err) { 2568 dev_err(&pdev->dev, "Failed to set dma mask: err=%d\n", err); 2569 goto abort_with_pci_region; 2570 } 2571 2572 reg_bar = pci_iomap(pdev, GVE_REGISTER_BAR, 0); 2573 if (!reg_bar) { 2574 dev_err(&pdev->dev, "Failed to map pci bar!\n"); 2575 err = -ENOMEM; 2576 goto abort_with_pci_region; 2577 } 2578 2579 db_bar = pci_iomap(pdev, GVE_DOORBELL_BAR, 0); 2580 if (!db_bar) { 2581 dev_err(&pdev->dev, "Failed to map doorbell bar!\n"); 2582 err = -ENOMEM; 2583 goto abort_with_reg_bar; 2584 } 2585 2586 gve_write_version(®_bar->driver_version); 2587 /* Get max queues to alloc etherdev */ 2588 max_tx_queues = ioread32be(®_bar->max_tx_queues); 2589 max_rx_queues = ioread32be(®_bar->max_rx_queues); 2590 /* Alloc and setup the netdev and priv */ 2591 dev = alloc_etherdev_mqs(sizeof(*priv), max_tx_queues, max_rx_queues); 2592 if (!dev) { 2593 dev_err(&pdev->dev, "could not allocate netdev\n"); 2594 err = -ENOMEM; 2595 goto abort_with_db_bar; 2596 } 2597 SET_NETDEV_DEV(dev, &pdev->dev); 2598 pci_set_drvdata(pdev, dev); 2599 dev->ethtool_ops = &gve_ethtool_ops; 2600 dev->netdev_ops = &gve_netdev_ops; 2601 dev->queue_mgmt_ops = &gve_queue_mgmt_ops; 2602 2603 /* Set default and supported features. 2604 * 2605 * Features might be set in other locations as well (such as 2606 * `gve_adminq_describe_device`). 2607 */ 2608 dev->hw_features = NETIF_F_HIGHDMA; 2609 dev->hw_features |= NETIF_F_SG; 2610 dev->hw_features |= NETIF_F_HW_CSUM; 2611 dev->hw_features |= NETIF_F_TSO; 2612 dev->hw_features |= NETIF_F_TSO6; 2613 dev->hw_features |= NETIF_F_TSO_ECN; 2614 dev->hw_features |= NETIF_F_RXCSUM; 2615 dev->hw_features |= NETIF_F_RXHASH; 2616 dev->features = dev->hw_features; 2617 dev->watchdog_timeo = 5 * HZ; 2618 dev->min_mtu = ETH_MIN_MTU; 2619 netif_carrier_off(dev); 2620 2621 priv = netdev_priv(dev); 2622 priv->dev = dev; 2623 priv->pdev = pdev; 2624 priv->msg_enable = DEFAULT_MSG_LEVEL; 2625 priv->reg_bar0 = reg_bar; 2626 priv->db_bar2 = db_bar; 2627 priv->service_task_flags = 0x0; 2628 priv->state_flags = 0x0; 2629 priv->ethtool_flags = 0x0; 2630 priv->data_buffer_size_dqo = GVE_DEFAULT_RX_BUFFER_SIZE; 2631 priv->max_rx_buffer_size = GVE_DEFAULT_RX_BUFFER_SIZE; 2632 2633 gve_set_probe_in_progress(priv); 2634 priv->gve_wq = alloc_ordered_workqueue("gve", 0); 2635 if (!priv->gve_wq) { 2636 dev_err(&pdev->dev, "Could not allocate workqueue"); 2637 err = -ENOMEM; 2638 goto abort_with_netdev; 2639 } 2640 INIT_WORK(&priv->service_task, gve_service_task); 2641 INIT_WORK(&priv->stats_report_task, gve_stats_report_task); 2642 priv->tx_cfg.max_queues = max_tx_queues; 2643 priv->rx_cfg.max_queues = max_rx_queues; 2644 2645 err = gve_init_priv(priv, false); 2646 if (err) 2647 goto abort_with_wq; 2648 2649 err = register_netdev(dev); 2650 if (err) 2651 goto abort_with_gve_init; 2652 2653 dev_info(&pdev->dev, "GVE version %s\n", gve_version_str); 2654 dev_info(&pdev->dev, "GVE queue format %d\n", (int)priv->queue_format); 2655 gve_clear_probe_in_progress(priv); 2656 queue_work(priv->gve_wq, &priv->service_task); 2657 return 0; 2658 2659 abort_with_gve_init: 2660 gve_teardown_priv_resources(priv); 2661 2662 abort_with_wq: 2663 destroy_workqueue(priv->gve_wq); 2664 2665 abort_with_netdev: 2666 free_netdev(dev); 2667 2668 abort_with_db_bar: 2669 pci_iounmap(pdev, db_bar); 2670 2671 abort_with_reg_bar: 2672 pci_iounmap(pdev, reg_bar); 2673 2674 abort_with_pci_region: 2675 pci_release_regions(pdev); 2676 2677 abort_with_enabled: 2678 pci_disable_device(pdev); 2679 return err; 2680 } 2681 2682 static void gve_remove(struct pci_dev *pdev) 2683 { 2684 struct net_device *netdev = pci_get_drvdata(pdev); 2685 struct gve_priv *priv = netdev_priv(netdev); 2686 __be32 __iomem *db_bar = priv->db_bar2; 2687 void __iomem *reg_bar = priv->reg_bar0; 2688 2689 unregister_netdev(netdev); 2690 gve_teardown_priv_resources(priv); 2691 destroy_workqueue(priv->gve_wq); 2692 free_netdev(netdev); 2693 pci_iounmap(pdev, db_bar); 2694 pci_iounmap(pdev, reg_bar); 2695 pci_release_regions(pdev); 2696 pci_disable_device(pdev); 2697 } 2698 2699 static void gve_shutdown(struct pci_dev *pdev) 2700 { 2701 struct net_device *netdev = pci_get_drvdata(pdev); 2702 struct gve_priv *priv = netdev_priv(netdev); 2703 bool was_up = netif_carrier_ok(priv->dev); 2704 2705 rtnl_lock(); 2706 if (was_up && gve_close(priv->dev)) { 2707 /* If the dev was up, attempt to close, if close fails, reset */ 2708 gve_reset_and_teardown(priv, was_up); 2709 } else { 2710 /* If the dev wasn't up or close worked, finish tearing down */ 2711 gve_teardown_priv_resources(priv); 2712 } 2713 rtnl_unlock(); 2714 } 2715 2716 #ifdef CONFIG_PM 2717 static int gve_suspend(struct pci_dev *pdev, pm_message_t state) 2718 { 2719 struct net_device *netdev = pci_get_drvdata(pdev); 2720 struct gve_priv *priv = netdev_priv(netdev); 2721 bool was_up = netif_carrier_ok(priv->dev); 2722 2723 priv->suspend_cnt++; 2724 rtnl_lock(); 2725 if (was_up && gve_close(priv->dev)) { 2726 /* If the dev was up, attempt to close, if close fails, reset */ 2727 gve_reset_and_teardown(priv, was_up); 2728 } else { 2729 /* If the dev wasn't up or close worked, finish tearing down */ 2730 gve_teardown_priv_resources(priv); 2731 } 2732 priv->up_before_suspend = was_up; 2733 rtnl_unlock(); 2734 return 0; 2735 } 2736 2737 static int gve_resume(struct pci_dev *pdev) 2738 { 2739 struct net_device *netdev = pci_get_drvdata(pdev); 2740 struct gve_priv *priv = netdev_priv(netdev); 2741 int err; 2742 2743 priv->resume_cnt++; 2744 rtnl_lock(); 2745 err = gve_reset_recovery(priv, priv->up_before_suspend); 2746 rtnl_unlock(); 2747 return err; 2748 } 2749 #endif /* CONFIG_PM */ 2750 2751 static const struct pci_device_id gve_id_table[] = { 2752 { PCI_DEVICE(PCI_VENDOR_ID_GOOGLE, PCI_DEV_ID_GVNIC) }, 2753 { } 2754 }; 2755 2756 static struct pci_driver gve_driver = { 2757 .name = gve_driver_name, 2758 .id_table = gve_id_table, 2759 .probe = gve_probe, 2760 .remove = gve_remove, 2761 .shutdown = gve_shutdown, 2762 #ifdef CONFIG_PM 2763 .suspend = gve_suspend, 2764 .resume = gve_resume, 2765 #endif 2766 }; 2767 2768 module_pci_driver(gve_driver); 2769 2770 MODULE_DEVICE_TABLE(pci, gve_id_table); 2771 MODULE_AUTHOR("Google, Inc."); 2772 MODULE_DESCRIPTION("Google Virtual NIC Driver"); 2773 MODULE_LICENSE("Dual MIT/GPL"); 2774 MODULE_VERSION(GVE_VERSION); 2775