1 // SPDX-License-Identifier: (GPL-2.0 OR MIT) 2 /* Google virtual Ethernet (gve) driver 3 * 4 * Copyright (C) 2015-2024 Google LLC 5 */ 6 7 #include <linux/bpf.h> 8 #include <linux/cpumask.h> 9 #include <linux/etherdevice.h> 10 #include <linux/filter.h> 11 #include <linux/interrupt.h> 12 #include <linux/irq.h> 13 #include <linux/module.h> 14 #include <linux/pci.h> 15 #include <linux/sched.h> 16 #include <linux/timer.h> 17 #include <linux/workqueue.h> 18 #include <linux/utsname.h> 19 #include <linux/version.h> 20 #include <net/netdev_queues.h> 21 #include <net/sch_generic.h> 22 #include <net/xdp_sock_drv.h> 23 #include "gve.h" 24 #include "gve_dqo.h" 25 #include "gve_adminq.h" 26 #include "gve_register.h" 27 #include "gve_utils.h" 28 29 #define GVE_DEFAULT_RX_COPYBREAK (256) 30 31 #define DEFAULT_MSG_LEVEL (NETIF_MSG_DRV | NETIF_MSG_LINK) 32 #define GVE_VERSION "1.0.0" 33 #define GVE_VERSION_PREFIX "GVE-" 34 35 // Minimum amount of time between queue kicks in msec (10 seconds) 36 #define MIN_TX_TIMEOUT_GAP (1000 * 10) 37 38 char gve_driver_name[] = "gve"; 39 const char gve_version_str[] = GVE_VERSION; 40 static const char gve_version_prefix[] = GVE_VERSION_PREFIX; 41 42 static int gve_verify_driver_compatibility(struct gve_priv *priv) 43 { 44 int err; 45 struct gve_driver_info *driver_info; 46 dma_addr_t driver_info_bus; 47 48 driver_info = dma_alloc_coherent(&priv->pdev->dev, 49 sizeof(struct gve_driver_info), 50 &driver_info_bus, GFP_KERNEL); 51 if (!driver_info) 52 return -ENOMEM; 53 54 *driver_info = (struct gve_driver_info) { 55 .os_type = 1, /* Linux */ 56 .os_version_major = cpu_to_be32(LINUX_VERSION_MAJOR), 57 .os_version_minor = cpu_to_be32(LINUX_VERSION_SUBLEVEL), 58 .os_version_sub = cpu_to_be32(LINUX_VERSION_PATCHLEVEL), 59 .driver_capability_flags = { 60 cpu_to_be64(GVE_DRIVER_CAPABILITY_FLAGS1), 61 cpu_to_be64(GVE_DRIVER_CAPABILITY_FLAGS2), 62 cpu_to_be64(GVE_DRIVER_CAPABILITY_FLAGS3), 63 cpu_to_be64(GVE_DRIVER_CAPABILITY_FLAGS4), 64 }, 65 }; 66 strscpy(driver_info->os_version_str1, utsname()->release, 67 sizeof(driver_info->os_version_str1)); 68 strscpy(driver_info->os_version_str2, utsname()->version, 69 sizeof(driver_info->os_version_str2)); 70 71 err = gve_adminq_verify_driver_compatibility(priv, 72 sizeof(struct gve_driver_info), 73 driver_info_bus); 74 75 /* It's ok if the device doesn't support this */ 76 if (err == -EOPNOTSUPP) 77 err = 0; 78 79 dma_free_coherent(&priv->pdev->dev, 80 sizeof(struct gve_driver_info), 81 driver_info, driver_info_bus); 82 return err; 83 } 84 85 static netdev_features_t gve_features_check(struct sk_buff *skb, 86 struct net_device *dev, 87 netdev_features_t features) 88 { 89 struct gve_priv *priv = netdev_priv(dev); 90 91 if (!gve_is_gqi(priv)) 92 return gve_features_check_dqo(skb, dev, features); 93 94 return features; 95 } 96 97 static netdev_tx_t gve_start_xmit(struct sk_buff *skb, struct net_device *dev) 98 { 99 struct gve_priv *priv = netdev_priv(dev); 100 101 if (gve_is_gqi(priv)) 102 return gve_tx(skb, dev); 103 else 104 return gve_tx_dqo(skb, dev); 105 } 106 107 static void gve_get_stats(struct net_device *dev, struct rtnl_link_stats64 *s) 108 { 109 struct gve_priv *priv = netdev_priv(dev); 110 unsigned int start; 111 u64 packets, bytes; 112 int num_tx_queues; 113 int ring; 114 115 num_tx_queues = gve_num_tx_queues(priv); 116 if (priv->rx) { 117 for (ring = 0; ring < priv->rx_cfg.num_queues; ring++) { 118 do { 119 start = 120 u64_stats_fetch_begin(&priv->rx[ring].statss); 121 packets = priv->rx[ring].rpackets; 122 bytes = priv->rx[ring].rbytes; 123 } while (u64_stats_fetch_retry(&priv->rx[ring].statss, 124 start)); 125 s->rx_packets += packets; 126 s->rx_bytes += bytes; 127 } 128 } 129 if (priv->tx) { 130 for (ring = 0; ring < num_tx_queues; ring++) { 131 do { 132 start = 133 u64_stats_fetch_begin(&priv->tx[ring].statss); 134 packets = priv->tx[ring].pkt_done; 135 bytes = priv->tx[ring].bytes_done; 136 } while (u64_stats_fetch_retry(&priv->tx[ring].statss, 137 start)); 138 s->tx_packets += packets; 139 s->tx_bytes += bytes; 140 } 141 } 142 } 143 144 static int gve_alloc_flow_rule_caches(struct gve_priv *priv) 145 { 146 struct gve_flow_rules_cache *flow_rules_cache = &priv->flow_rules_cache; 147 int err = 0; 148 149 if (!priv->max_flow_rules) 150 return 0; 151 152 flow_rules_cache->rules_cache = 153 kvcalloc(GVE_FLOW_RULES_CACHE_SIZE, sizeof(*flow_rules_cache->rules_cache), 154 GFP_KERNEL); 155 if (!flow_rules_cache->rules_cache) { 156 dev_err(&priv->pdev->dev, "Cannot alloc flow rules cache\n"); 157 return -ENOMEM; 158 } 159 160 flow_rules_cache->rule_ids_cache = 161 kvcalloc(GVE_FLOW_RULE_IDS_CACHE_SIZE, sizeof(*flow_rules_cache->rule_ids_cache), 162 GFP_KERNEL); 163 if (!flow_rules_cache->rule_ids_cache) { 164 dev_err(&priv->pdev->dev, "Cannot alloc flow rule ids cache\n"); 165 err = -ENOMEM; 166 goto free_rules_cache; 167 } 168 169 return 0; 170 171 free_rules_cache: 172 kvfree(flow_rules_cache->rules_cache); 173 flow_rules_cache->rules_cache = NULL; 174 return err; 175 } 176 177 static void gve_free_flow_rule_caches(struct gve_priv *priv) 178 { 179 struct gve_flow_rules_cache *flow_rules_cache = &priv->flow_rules_cache; 180 181 kvfree(flow_rules_cache->rule_ids_cache); 182 flow_rules_cache->rule_ids_cache = NULL; 183 kvfree(flow_rules_cache->rules_cache); 184 flow_rules_cache->rules_cache = NULL; 185 } 186 187 static int gve_alloc_rss_config_cache(struct gve_priv *priv) 188 { 189 struct gve_rss_config *rss_config = &priv->rss_config; 190 191 if (!priv->cache_rss_config) 192 return 0; 193 194 rss_config->hash_key = kcalloc(priv->rss_key_size, 195 sizeof(rss_config->hash_key[0]), 196 GFP_KERNEL); 197 if (!rss_config->hash_key) 198 return -ENOMEM; 199 200 rss_config->hash_lut = kcalloc(priv->rss_lut_size, 201 sizeof(rss_config->hash_lut[0]), 202 GFP_KERNEL); 203 if (!rss_config->hash_lut) 204 goto free_rss_key_cache; 205 206 return 0; 207 208 free_rss_key_cache: 209 kfree(rss_config->hash_key); 210 rss_config->hash_key = NULL; 211 return -ENOMEM; 212 } 213 214 static void gve_free_rss_config_cache(struct gve_priv *priv) 215 { 216 struct gve_rss_config *rss_config = &priv->rss_config; 217 218 kfree(rss_config->hash_key); 219 kfree(rss_config->hash_lut); 220 221 memset(rss_config, 0, sizeof(*rss_config)); 222 } 223 224 static int gve_alloc_counter_array(struct gve_priv *priv) 225 { 226 priv->counter_array = 227 dma_alloc_coherent(&priv->pdev->dev, 228 priv->num_event_counters * 229 sizeof(*priv->counter_array), 230 &priv->counter_array_bus, GFP_KERNEL); 231 if (!priv->counter_array) 232 return -ENOMEM; 233 234 return 0; 235 } 236 237 static void gve_free_counter_array(struct gve_priv *priv) 238 { 239 if (!priv->counter_array) 240 return; 241 242 dma_free_coherent(&priv->pdev->dev, 243 priv->num_event_counters * 244 sizeof(*priv->counter_array), 245 priv->counter_array, priv->counter_array_bus); 246 priv->counter_array = NULL; 247 } 248 249 /* NIC requests to report stats */ 250 static void gve_stats_report_task(struct work_struct *work) 251 { 252 struct gve_priv *priv = container_of(work, struct gve_priv, 253 stats_report_task); 254 if (gve_get_do_report_stats(priv)) { 255 gve_handle_report_stats(priv); 256 gve_clear_do_report_stats(priv); 257 } 258 } 259 260 static void gve_stats_report_schedule(struct gve_priv *priv) 261 { 262 if (!gve_get_probe_in_progress(priv) && 263 !gve_get_reset_in_progress(priv)) { 264 gve_set_do_report_stats(priv); 265 queue_work(priv->gve_wq, &priv->stats_report_task); 266 } 267 } 268 269 static void gve_stats_report_timer(struct timer_list *t) 270 { 271 struct gve_priv *priv = timer_container_of(priv, t, 272 stats_report_timer); 273 274 mod_timer(&priv->stats_report_timer, 275 round_jiffies(jiffies + 276 msecs_to_jiffies(priv->stats_report_timer_period))); 277 gve_stats_report_schedule(priv); 278 } 279 280 static int gve_alloc_stats_report(struct gve_priv *priv) 281 { 282 int tx_stats_num, rx_stats_num; 283 284 tx_stats_num = (GVE_TX_STATS_REPORT_NUM + NIC_TX_STATS_REPORT_NUM) * 285 gve_num_tx_queues(priv); 286 rx_stats_num = (GVE_RX_STATS_REPORT_NUM + NIC_RX_STATS_REPORT_NUM) * 287 priv->rx_cfg.num_queues; 288 priv->stats_report_len = struct_size(priv->stats_report, stats, 289 size_add(tx_stats_num, rx_stats_num)); 290 priv->stats_report = 291 dma_alloc_coherent(&priv->pdev->dev, priv->stats_report_len, 292 &priv->stats_report_bus, GFP_KERNEL); 293 if (!priv->stats_report) 294 return -ENOMEM; 295 /* Set up timer for the report-stats task */ 296 timer_setup(&priv->stats_report_timer, gve_stats_report_timer, 0); 297 priv->stats_report_timer_period = GVE_STATS_REPORT_TIMER_PERIOD; 298 return 0; 299 } 300 301 static void gve_free_stats_report(struct gve_priv *priv) 302 { 303 if (!priv->stats_report) 304 return; 305 306 timer_delete_sync(&priv->stats_report_timer); 307 dma_free_coherent(&priv->pdev->dev, priv->stats_report_len, 308 priv->stats_report, priv->stats_report_bus); 309 priv->stats_report = NULL; 310 } 311 312 static irqreturn_t gve_mgmnt_intr(int irq, void *arg) 313 { 314 struct gve_priv *priv = arg; 315 316 queue_work(priv->gve_wq, &priv->service_task); 317 return IRQ_HANDLED; 318 } 319 320 static irqreturn_t gve_intr(int irq, void *arg) 321 { 322 struct gve_notify_block *block = arg; 323 struct gve_priv *priv = block->priv; 324 325 iowrite32be(GVE_IRQ_MASK, gve_irq_doorbell(priv, block)); 326 napi_schedule_irqoff(&block->napi); 327 return IRQ_HANDLED; 328 } 329 330 static irqreturn_t gve_intr_dqo(int irq, void *arg) 331 { 332 struct gve_notify_block *block = arg; 333 334 /* Interrupts are automatically masked */ 335 napi_schedule_irqoff(&block->napi); 336 return IRQ_HANDLED; 337 } 338 339 static int gve_is_napi_on_home_cpu(struct gve_priv *priv, u32 irq) 340 { 341 int cpu_curr = smp_processor_id(); 342 const struct cpumask *aff_mask; 343 344 aff_mask = irq_get_effective_affinity_mask(irq); 345 if (unlikely(!aff_mask)) 346 return 1; 347 348 return cpumask_test_cpu(cpu_curr, aff_mask); 349 } 350 351 int gve_napi_poll(struct napi_struct *napi, int budget) 352 { 353 struct gve_notify_block *block; 354 __be32 __iomem *irq_doorbell; 355 bool reschedule = false; 356 struct gve_priv *priv; 357 int work_done = 0; 358 359 block = container_of(napi, struct gve_notify_block, napi); 360 priv = block->priv; 361 362 if (block->tx) { 363 if (block->tx->q_num < priv->tx_cfg.num_queues) 364 reschedule |= gve_tx_poll(block, budget); 365 else if (budget) 366 reschedule |= gve_xdp_poll(block, budget); 367 } 368 369 if (!budget) 370 return 0; 371 372 if (block->rx) { 373 work_done = gve_rx_poll(block, budget); 374 375 /* Poll XSK TX as part of RX NAPI. Setup re-poll based on max of 376 * TX and RX work done. 377 */ 378 if (priv->xdp_prog) 379 work_done = max_t(int, work_done, 380 gve_xsk_tx_poll(block, budget)); 381 382 reschedule |= work_done == budget; 383 } 384 385 if (reschedule) 386 return budget; 387 388 /* Complete processing - don't unmask irq if busy polling is enabled */ 389 if (likely(napi_complete_done(napi, work_done))) { 390 irq_doorbell = gve_irq_doorbell(priv, block); 391 iowrite32be(GVE_IRQ_ACK | GVE_IRQ_EVENT, irq_doorbell); 392 393 /* Ensure IRQ ACK is visible before we check pending work. 394 * If queue had issued updates, it would be truly visible. 395 */ 396 mb(); 397 398 if (block->tx) 399 reschedule |= gve_tx_clean_pending(priv, block->tx); 400 if (block->rx) 401 reschedule |= gve_rx_work_pending(block->rx); 402 403 if (reschedule && napi_schedule(napi)) 404 iowrite32be(GVE_IRQ_MASK, irq_doorbell); 405 } 406 return work_done; 407 } 408 409 int gve_napi_poll_dqo(struct napi_struct *napi, int budget) 410 { 411 struct gve_notify_block *block = 412 container_of(napi, struct gve_notify_block, napi); 413 struct gve_priv *priv = block->priv; 414 bool reschedule = false; 415 int work_done = 0; 416 417 if (block->tx) { 418 if (block->tx->q_num < priv->tx_cfg.num_queues) 419 reschedule |= gve_tx_poll_dqo(block, /*do_clean=*/true); 420 else 421 reschedule |= gve_xdp_poll_dqo(block); 422 } 423 424 if (!budget) 425 return 0; 426 427 if (block->rx) { 428 work_done = gve_rx_poll_dqo(block, budget); 429 reschedule |= work_done == budget; 430 } 431 432 if (reschedule) { 433 /* Reschedule by returning budget only if already on the correct 434 * cpu. 435 */ 436 if (likely(gve_is_napi_on_home_cpu(priv, block->irq))) 437 return budget; 438 439 /* If not on the cpu with which this queue's irq has affinity 440 * with, we avoid rescheduling napi and arm the irq instead so 441 * that napi gets rescheduled back eventually onto the right 442 * cpu. 443 */ 444 if (work_done == budget) 445 work_done--; 446 } 447 448 if (likely(napi_complete_done(napi, work_done))) { 449 /* Enable interrupts again. 450 * 451 * We don't need to repoll afterwards because HW supports the 452 * PCI MSI-X PBA feature. 453 * 454 * Another interrupt would be triggered if a new event came in 455 * since the last one. 456 */ 457 gve_write_irq_doorbell_dqo(priv, block, 458 GVE_ITR_NO_UPDATE_DQO | GVE_ITR_ENABLE_BIT_DQO); 459 } 460 461 return work_done; 462 } 463 464 static int gve_alloc_notify_blocks(struct gve_priv *priv) 465 { 466 int num_vecs_requested = priv->num_ntfy_blks + 1; 467 unsigned int active_cpus; 468 int vecs_enabled; 469 int i, j; 470 int err; 471 472 priv->msix_vectors = kvcalloc(num_vecs_requested, 473 sizeof(*priv->msix_vectors), GFP_KERNEL); 474 if (!priv->msix_vectors) 475 return -ENOMEM; 476 for (i = 0; i < num_vecs_requested; i++) 477 priv->msix_vectors[i].entry = i; 478 vecs_enabled = pci_enable_msix_range(priv->pdev, priv->msix_vectors, 479 GVE_MIN_MSIX, num_vecs_requested); 480 if (vecs_enabled < 0) { 481 dev_err(&priv->pdev->dev, "Could not enable min msix %d/%d\n", 482 GVE_MIN_MSIX, vecs_enabled); 483 err = vecs_enabled; 484 goto abort_with_msix_vectors; 485 } 486 if (vecs_enabled != num_vecs_requested) { 487 int new_num_ntfy_blks = (vecs_enabled - 1) & ~0x1; 488 int vecs_per_type = new_num_ntfy_blks / 2; 489 int vecs_left = new_num_ntfy_blks % 2; 490 491 priv->num_ntfy_blks = new_num_ntfy_blks; 492 priv->mgmt_msix_idx = priv->num_ntfy_blks; 493 priv->tx_cfg.max_queues = min_t(int, priv->tx_cfg.max_queues, 494 vecs_per_type); 495 priv->rx_cfg.max_queues = min_t(int, priv->rx_cfg.max_queues, 496 vecs_per_type + vecs_left); 497 dev_err(&priv->pdev->dev, 498 "Could not enable desired msix, only enabled %d, adjusting tx max queues to %d, and rx max queues to %d\n", 499 vecs_enabled, priv->tx_cfg.max_queues, 500 priv->rx_cfg.max_queues); 501 if (priv->tx_cfg.num_queues > priv->tx_cfg.max_queues) 502 priv->tx_cfg.num_queues = priv->tx_cfg.max_queues; 503 if (priv->rx_cfg.num_queues > priv->rx_cfg.max_queues) 504 priv->rx_cfg.num_queues = priv->rx_cfg.max_queues; 505 } 506 /* Half the notification blocks go to TX and half to RX */ 507 active_cpus = min_t(int, priv->num_ntfy_blks / 2, num_online_cpus()); 508 509 /* Setup Management Vector - the last vector */ 510 snprintf(priv->mgmt_msix_name, sizeof(priv->mgmt_msix_name), "gve-mgmnt@pci:%s", 511 pci_name(priv->pdev)); 512 err = request_irq(priv->msix_vectors[priv->mgmt_msix_idx].vector, 513 gve_mgmnt_intr, 0, priv->mgmt_msix_name, priv); 514 if (err) { 515 dev_err(&priv->pdev->dev, "Did not receive management vector.\n"); 516 goto abort_with_msix_enabled; 517 } 518 priv->irq_db_indices = 519 dma_alloc_coherent(&priv->pdev->dev, 520 priv->num_ntfy_blks * 521 sizeof(*priv->irq_db_indices), 522 &priv->irq_db_indices_bus, GFP_KERNEL); 523 if (!priv->irq_db_indices) { 524 err = -ENOMEM; 525 goto abort_with_mgmt_vector; 526 } 527 528 priv->ntfy_blocks = kvzalloc(priv->num_ntfy_blks * 529 sizeof(*priv->ntfy_blocks), GFP_KERNEL); 530 if (!priv->ntfy_blocks) { 531 err = -ENOMEM; 532 goto abort_with_irq_db_indices; 533 } 534 535 /* Setup the other blocks - the first n-1 vectors */ 536 for (i = 0; i < priv->num_ntfy_blks; i++) { 537 struct gve_notify_block *block = &priv->ntfy_blocks[i]; 538 int msix_idx = i; 539 540 snprintf(block->name, sizeof(block->name), "gve-ntfy-blk%d@pci:%s", 541 i, pci_name(priv->pdev)); 542 block->priv = priv; 543 err = request_irq(priv->msix_vectors[msix_idx].vector, 544 gve_is_gqi(priv) ? gve_intr : gve_intr_dqo, 545 0, block->name, block); 546 if (err) { 547 dev_err(&priv->pdev->dev, 548 "Failed to receive msix vector %d\n", i); 549 goto abort_with_some_ntfy_blocks; 550 } 551 block->irq = priv->msix_vectors[msix_idx].vector; 552 irq_set_affinity_hint(priv->msix_vectors[msix_idx].vector, 553 get_cpu_mask(i % active_cpus)); 554 block->irq_db_index = &priv->irq_db_indices[i].index; 555 } 556 return 0; 557 abort_with_some_ntfy_blocks: 558 for (j = 0; j < i; j++) { 559 struct gve_notify_block *block = &priv->ntfy_blocks[j]; 560 int msix_idx = j; 561 562 irq_set_affinity_hint(priv->msix_vectors[msix_idx].vector, 563 NULL); 564 free_irq(priv->msix_vectors[msix_idx].vector, block); 565 block->irq = 0; 566 } 567 kvfree(priv->ntfy_blocks); 568 priv->ntfy_blocks = NULL; 569 abort_with_irq_db_indices: 570 dma_free_coherent(&priv->pdev->dev, priv->num_ntfy_blks * 571 sizeof(*priv->irq_db_indices), 572 priv->irq_db_indices, priv->irq_db_indices_bus); 573 priv->irq_db_indices = NULL; 574 abort_with_mgmt_vector: 575 free_irq(priv->msix_vectors[priv->mgmt_msix_idx].vector, priv); 576 abort_with_msix_enabled: 577 pci_disable_msix(priv->pdev); 578 abort_with_msix_vectors: 579 kvfree(priv->msix_vectors); 580 priv->msix_vectors = NULL; 581 return err; 582 } 583 584 static void gve_free_notify_blocks(struct gve_priv *priv) 585 { 586 int i; 587 588 if (!priv->msix_vectors) 589 return; 590 591 /* Free the irqs */ 592 for (i = 0; i < priv->num_ntfy_blks; i++) { 593 struct gve_notify_block *block = &priv->ntfy_blocks[i]; 594 int msix_idx = i; 595 596 irq_set_affinity_hint(priv->msix_vectors[msix_idx].vector, 597 NULL); 598 free_irq(priv->msix_vectors[msix_idx].vector, block); 599 block->irq = 0; 600 } 601 free_irq(priv->msix_vectors[priv->mgmt_msix_idx].vector, priv); 602 kvfree(priv->ntfy_blocks); 603 priv->ntfy_blocks = NULL; 604 dma_free_coherent(&priv->pdev->dev, priv->num_ntfy_blks * 605 sizeof(*priv->irq_db_indices), 606 priv->irq_db_indices, priv->irq_db_indices_bus); 607 priv->irq_db_indices = NULL; 608 pci_disable_msix(priv->pdev); 609 kvfree(priv->msix_vectors); 610 priv->msix_vectors = NULL; 611 } 612 613 static int gve_setup_device_resources(struct gve_priv *priv) 614 { 615 int err; 616 617 err = gve_alloc_flow_rule_caches(priv); 618 if (err) 619 return err; 620 err = gve_alloc_rss_config_cache(priv); 621 if (err) 622 goto abort_with_flow_rule_caches; 623 err = gve_alloc_counter_array(priv); 624 if (err) 625 goto abort_with_rss_config_cache; 626 err = gve_init_clock(priv); 627 if (err) 628 goto abort_with_counter; 629 err = gve_alloc_notify_blocks(priv); 630 if (err) 631 goto abort_with_clock; 632 err = gve_alloc_stats_report(priv); 633 if (err) 634 goto abort_with_ntfy_blocks; 635 err = gve_adminq_configure_device_resources(priv, 636 priv->counter_array_bus, 637 priv->num_event_counters, 638 priv->irq_db_indices_bus, 639 priv->num_ntfy_blks); 640 if (unlikely(err)) { 641 dev_err(&priv->pdev->dev, 642 "could not setup device_resources: err=%d\n", err); 643 err = -ENXIO; 644 goto abort_with_stats_report; 645 } 646 647 if (!gve_is_gqi(priv)) { 648 priv->ptype_lut_dqo = kvzalloc(sizeof(*priv->ptype_lut_dqo), 649 GFP_KERNEL); 650 if (!priv->ptype_lut_dqo) { 651 err = -ENOMEM; 652 goto abort_with_stats_report; 653 } 654 err = gve_adminq_get_ptype_map_dqo(priv, priv->ptype_lut_dqo); 655 if (err) { 656 dev_err(&priv->pdev->dev, 657 "Failed to get ptype map: err=%d\n", err); 658 goto abort_with_ptype_lut; 659 } 660 } 661 662 err = gve_init_rss_config(priv, priv->rx_cfg.num_queues); 663 if (err) { 664 dev_err(&priv->pdev->dev, "Failed to init RSS config"); 665 goto abort_with_ptype_lut; 666 } 667 668 err = gve_adminq_report_stats(priv, priv->stats_report_len, 669 priv->stats_report_bus, 670 GVE_STATS_REPORT_TIMER_PERIOD); 671 if (err) 672 dev_err(&priv->pdev->dev, 673 "Failed to report stats: err=%d\n", err); 674 gve_set_device_resources_ok(priv); 675 return 0; 676 677 abort_with_ptype_lut: 678 kvfree(priv->ptype_lut_dqo); 679 priv->ptype_lut_dqo = NULL; 680 abort_with_stats_report: 681 gve_free_stats_report(priv); 682 abort_with_ntfy_blocks: 683 gve_free_notify_blocks(priv); 684 abort_with_clock: 685 gve_teardown_clock(priv); 686 abort_with_counter: 687 gve_free_counter_array(priv); 688 abort_with_rss_config_cache: 689 gve_free_rss_config_cache(priv); 690 abort_with_flow_rule_caches: 691 gve_free_flow_rule_caches(priv); 692 693 return err; 694 } 695 696 static void gve_trigger_reset(struct gve_priv *priv); 697 698 static void gve_teardown_device_resources(struct gve_priv *priv) 699 { 700 int err; 701 702 /* Tell device its resources are being freed */ 703 if (gve_get_device_resources_ok(priv)) { 704 err = gve_flow_rules_reset(priv); 705 if (err) { 706 dev_err(&priv->pdev->dev, 707 "Failed to reset flow rules: err=%d\n", err); 708 gve_trigger_reset(priv); 709 } 710 /* detach the stats report */ 711 err = gve_adminq_report_stats(priv, 0, 0x0, GVE_STATS_REPORT_TIMER_PERIOD); 712 if (err) { 713 dev_err(&priv->pdev->dev, 714 "Failed to detach stats report: err=%d\n", err); 715 gve_trigger_reset(priv); 716 } 717 err = gve_adminq_deconfigure_device_resources(priv); 718 if (err) { 719 dev_err(&priv->pdev->dev, 720 "Could not deconfigure device resources: err=%d\n", 721 err); 722 gve_trigger_reset(priv); 723 } 724 } 725 726 kvfree(priv->ptype_lut_dqo); 727 priv->ptype_lut_dqo = NULL; 728 729 gve_free_flow_rule_caches(priv); 730 gve_free_rss_config_cache(priv); 731 gve_free_counter_array(priv); 732 gve_free_notify_blocks(priv); 733 gve_free_stats_report(priv); 734 gve_teardown_clock(priv); 735 gve_clear_device_resources_ok(priv); 736 } 737 738 static int gve_unregister_qpl(struct gve_priv *priv, 739 struct gve_queue_page_list *qpl) 740 { 741 int err; 742 743 if (!qpl) 744 return 0; 745 746 err = gve_adminq_unregister_page_list(priv, qpl->id); 747 if (err) { 748 netif_err(priv, drv, priv->dev, 749 "Failed to unregister queue page list %d\n", 750 qpl->id); 751 return err; 752 } 753 754 priv->num_registered_pages -= qpl->num_entries; 755 return 0; 756 } 757 758 static int gve_register_qpl(struct gve_priv *priv, 759 struct gve_queue_page_list *qpl) 760 { 761 int pages; 762 int err; 763 764 if (!qpl) 765 return 0; 766 767 pages = qpl->num_entries; 768 769 if (pages + priv->num_registered_pages > priv->max_registered_pages) { 770 netif_err(priv, drv, priv->dev, 771 "Reached max number of registered pages %llu > %llu\n", 772 pages + priv->num_registered_pages, 773 priv->max_registered_pages); 774 return -EINVAL; 775 } 776 777 err = gve_adminq_register_page_list(priv, qpl); 778 if (err) { 779 netif_err(priv, drv, priv->dev, 780 "failed to register queue page list %d\n", 781 qpl->id); 782 return err; 783 } 784 785 priv->num_registered_pages += pages; 786 return 0; 787 } 788 789 static struct gve_queue_page_list *gve_tx_get_qpl(struct gve_priv *priv, int idx) 790 { 791 struct gve_tx_ring *tx = &priv->tx[idx]; 792 793 if (gve_is_gqi(priv)) 794 return tx->tx_fifo.qpl; 795 else 796 return tx->dqo.qpl; 797 } 798 799 static struct gve_queue_page_list *gve_rx_get_qpl(struct gve_priv *priv, int idx) 800 { 801 struct gve_rx_ring *rx = &priv->rx[idx]; 802 803 if (gve_is_gqi(priv)) 804 return rx->data.qpl; 805 else 806 return rx->dqo.qpl; 807 } 808 809 static int gve_register_qpls(struct gve_priv *priv) 810 { 811 int num_tx_qpls, num_rx_qpls; 812 int err; 813 int i; 814 815 num_tx_qpls = gve_num_tx_qpls(&priv->tx_cfg, gve_is_qpl(priv)); 816 num_rx_qpls = gve_num_rx_qpls(&priv->rx_cfg, gve_is_qpl(priv)); 817 818 for (i = 0; i < num_tx_qpls; i++) { 819 err = gve_register_qpl(priv, gve_tx_get_qpl(priv, i)); 820 if (err) 821 return err; 822 } 823 824 for (i = 0; i < num_rx_qpls; i++) { 825 err = gve_register_qpl(priv, gve_rx_get_qpl(priv, i)); 826 if (err) 827 return err; 828 } 829 830 return 0; 831 } 832 833 static int gve_unregister_qpls(struct gve_priv *priv) 834 { 835 int num_tx_qpls, num_rx_qpls; 836 int err; 837 int i; 838 839 num_tx_qpls = gve_num_tx_qpls(&priv->tx_cfg, gve_is_qpl(priv)); 840 num_rx_qpls = gve_num_rx_qpls(&priv->rx_cfg, gve_is_qpl(priv)); 841 842 for (i = 0; i < num_tx_qpls; i++) { 843 err = gve_unregister_qpl(priv, gve_tx_get_qpl(priv, i)); 844 /* This failure will trigger a reset - no need to clean */ 845 if (err) 846 return err; 847 } 848 849 for (i = 0; i < num_rx_qpls; i++) { 850 err = gve_unregister_qpl(priv, gve_rx_get_qpl(priv, i)); 851 /* This failure will trigger a reset - no need to clean */ 852 if (err) 853 return err; 854 } 855 return 0; 856 } 857 858 static int gve_create_rings(struct gve_priv *priv) 859 { 860 int num_tx_queues = gve_num_tx_queues(priv); 861 int err; 862 int i; 863 864 err = gve_adminq_create_tx_queues(priv, 0, num_tx_queues); 865 if (err) { 866 netif_err(priv, drv, priv->dev, "failed to create %d tx queues\n", 867 num_tx_queues); 868 /* This failure will trigger a reset - no need to clean 869 * up 870 */ 871 return err; 872 } 873 netif_dbg(priv, drv, priv->dev, "created %d tx queues\n", 874 num_tx_queues); 875 876 err = gve_adminq_create_rx_queues(priv, priv->rx_cfg.num_queues); 877 if (err) { 878 netif_err(priv, drv, priv->dev, "failed to create %d rx queues\n", 879 priv->rx_cfg.num_queues); 880 /* This failure will trigger a reset - no need to clean 881 * up 882 */ 883 return err; 884 } 885 netif_dbg(priv, drv, priv->dev, "created %d rx queues\n", 886 priv->rx_cfg.num_queues); 887 888 if (gve_is_gqi(priv)) { 889 /* Rx data ring has been prefilled with packet buffers at queue 890 * allocation time. 891 * 892 * Write the doorbell to provide descriptor slots and packet 893 * buffers to the NIC. 894 */ 895 for (i = 0; i < priv->rx_cfg.num_queues; i++) 896 gve_rx_write_doorbell(priv, &priv->rx[i]); 897 } else { 898 for (i = 0; i < priv->rx_cfg.num_queues; i++) { 899 /* Post buffers and ring doorbell. */ 900 gve_rx_post_buffers_dqo(&priv->rx[i]); 901 } 902 } 903 904 return 0; 905 } 906 907 static void init_xdp_sync_stats(struct gve_priv *priv) 908 { 909 int start_id = gve_xdp_tx_start_queue_id(priv); 910 int i; 911 912 /* Init stats */ 913 for (i = start_id; i < start_id + priv->tx_cfg.num_xdp_queues; i++) { 914 int ntfy_idx = gve_tx_idx_to_ntfy(priv, i); 915 916 u64_stats_init(&priv->tx[i].statss); 917 priv->tx[i].ntfy_id = ntfy_idx; 918 } 919 } 920 921 static void gve_init_sync_stats(struct gve_priv *priv) 922 { 923 int i; 924 925 for (i = 0; i < priv->tx_cfg.num_queues; i++) 926 u64_stats_init(&priv->tx[i].statss); 927 928 /* Init stats for XDP TX queues */ 929 init_xdp_sync_stats(priv); 930 931 for (i = 0; i < priv->rx_cfg.num_queues; i++) 932 u64_stats_init(&priv->rx[i].statss); 933 } 934 935 static void gve_tx_get_curr_alloc_cfg(struct gve_priv *priv, 936 struct gve_tx_alloc_rings_cfg *cfg) 937 { 938 cfg->qcfg = &priv->tx_cfg; 939 cfg->raw_addressing = !gve_is_qpl(priv); 940 cfg->ring_size = priv->tx_desc_cnt; 941 cfg->num_xdp_rings = cfg->qcfg->num_xdp_queues; 942 cfg->tx = priv->tx; 943 } 944 945 static void gve_tx_stop_rings(struct gve_priv *priv, int num_rings) 946 { 947 int i; 948 949 if (!priv->tx) 950 return; 951 952 for (i = 0; i < num_rings; i++) { 953 if (gve_is_gqi(priv)) 954 gve_tx_stop_ring_gqi(priv, i); 955 else 956 gve_tx_stop_ring_dqo(priv, i); 957 } 958 } 959 960 static void gve_tx_start_rings(struct gve_priv *priv, int num_rings) 961 { 962 int i; 963 964 for (i = 0; i < num_rings; i++) { 965 if (gve_is_gqi(priv)) 966 gve_tx_start_ring_gqi(priv, i); 967 else 968 gve_tx_start_ring_dqo(priv, i); 969 } 970 } 971 972 static int gve_queues_mem_alloc(struct gve_priv *priv, 973 struct gve_tx_alloc_rings_cfg *tx_alloc_cfg, 974 struct gve_rx_alloc_rings_cfg *rx_alloc_cfg) 975 { 976 int err; 977 978 if (gve_is_gqi(priv)) 979 err = gve_tx_alloc_rings_gqi(priv, tx_alloc_cfg); 980 else 981 err = gve_tx_alloc_rings_dqo(priv, tx_alloc_cfg); 982 if (err) 983 return err; 984 985 if (gve_is_gqi(priv)) 986 err = gve_rx_alloc_rings_gqi(priv, rx_alloc_cfg); 987 else 988 err = gve_rx_alloc_rings_dqo(priv, rx_alloc_cfg); 989 if (err) 990 goto free_tx; 991 992 return 0; 993 994 free_tx: 995 if (gve_is_gqi(priv)) 996 gve_tx_free_rings_gqi(priv, tx_alloc_cfg); 997 else 998 gve_tx_free_rings_dqo(priv, tx_alloc_cfg); 999 return err; 1000 } 1001 1002 static int gve_destroy_rings(struct gve_priv *priv) 1003 { 1004 int num_tx_queues = gve_num_tx_queues(priv); 1005 int err; 1006 1007 err = gve_adminq_destroy_tx_queues(priv, 0, num_tx_queues); 1008 if (err) { 1009 netif_err(priv, drv, priv->dev, 1010 "failed to destroy tx queues\n"); 1011 /* This failure will trigger a reset - no need to clean up */ 1012 return err; 1013 } 1014 netif_dbg(priv, drv, priv->dev, "destroyed tx queues\n"); 1015 err = gve_adminq_destroy_rx_queues(priv, priv->rx_cfg.num_queues); 1016 if (err) { 1017 netif_err(priv, drv, priv->dev, 1018 "failed to destroy rx queues\n"); 1019 /* This failure will trigger a reset - no need to clean up */ 1020 return err; 1021 } 1022 netif_dbg(priv, drv, priv->dev, "destroyed rx queues\n"); 1023 return 0; 1024 } 1025 1026 static void gve_queues_mem_free(struct gve_priv *priv, 1027 struct gve_tx_alloc_rings_cfg *tx_cfg, 1028 struct gve_rx_alloc_rings_cfg *rx_cfg) 1029 { 1030 if (gve_is_gqi(priv)) { 1031 gve_tx_free_rings_gqi(priv, tx_cfg); 1032 gve_rx_free_rings_gqi(priv, rx_cfg); 1033 } else { 1034 gve_tx_free_rings_dqo(priv, tx_cfg); 1035 gve_rx_free_rings_dqo(priv, rx_cfg); 1036 } 1037 } 1038 1039 int gve_alloc_page(struct gve_priv *priv, struct device *dev, 1040 struct page **page, dma_addr_t *dma, 1041 enum dma_data_direction dir, gfp_t gfp_flags) 1042 { 1043 *page = alloc_page(gfp_flags); 1044 if (!*page) { 1045 priv->page_alloc_fail++; 1046 return -ENOMEM; 1047 } 1048 *dma = dma_map_page(dev, *page, 0, PAGE_SIZE, dir); 1049 if (dma_mapping_error(dev, *dma)) { 1050 priv->dma_mapping_error++; 1051 put_page(*page); 1052 return -ENOMEM; 1053 } 1054 return 0; 1055 } 1056 1057 struct gve_queue_page_list *gve_alloc_queue_page_list(struct gve_priv *priv, 1058 u32 id, int pages) 1059 { 1060 struct gve_queue_page_list *qpl; 1061 int err; 1062 int i; 1063 1064 qpl = kvzalloc(sizeof(*qpl), GFP_KERNEL); 1065 if (!qpl) 1066 return NULL; 1067 1068 qpl->id = id; 1069 qpl->num_entries = 0; 1070 qpl->pages = kvcalloc(pages, sizeof(*qpl->pages), GFP_KERNEL); 1071 if (!qpl->pages) 1072 goto abort; 1073 1074 qpl->page_buses = kvcalloc(pages, sizeof(*qpl->page_buses), GFP_KERNEL); 1075 if (!qpl->page_buses) 1076 goto abort; 1077 1078 for (i = 0; i < pages; i++) { 1079 err = gve_alloc_page(priv, &priv->pdev->dev, &qpl->pages[i], 1080 &qpl->page_buses[i], 1081 gve_qpl_dma_dir(priv, id), GFP_KERNEL); 1082 if (err) 1083 goto abort; 1084 qpl->num_entries++; 1085 } 1086 1087 return qpl; 1088 1089 abort: 1090 gve_free_queue_page_list(priv, qpl, id); 1091 return NULL; 1092 } 1093 1094 void gve_free_page(struct device *dev, struct page *page, dma_addr_t dma, 1095 enum dma_data_direction dir) 1096 { 1097 if (!dma_mapping_error(dev, dma)) 1098 dma_unmap_page(dev, dma, PAGE_SIZE, dir); 1099 if (page) 1100 put_page(page); 1101 } 1102 1103 void gve_free_queue_page_list(struct gve_priv *priv, 1104 struct gve_queue_page_list *qpl, 1105 u32 id) 1106 { 1107 int i; 1108 1109 if (!qpl) 1110 return; 1111 if (!qpl->pages) 1112 goto free_qpl; 1113 if (!qpl->page_buses) 1114 goto free_pages; 1115 1116 for (i = 0; i < qpl->num_entries; i++) 1117 gve_free_page(&priv->pdev->dev, qpl->pages[i], 1118 qpl->page_buses[i], gve_qpl_dma_dir(priv, id)); 1119 1120 kvfree(qpl->page_buses); 1121 qpl->page_buses = NULL; 1122 free_pages: 1123 kvfree(qpl->pages); 1124 qpl->pages = NULL; 1125 free_qpl: 1126 kvfree(qpl); 1127 } 1128 1129 /* Use this to schedule a reset when the device is capable of continuing 1130 * to handle other requests in its current state. If it is not, do a reset 1131 * in thread instead. 1132 */ 1133 void gve_schedule_reset(struct gve_priv *priv) 1134 { 1135 gve_set_do_reset(priv); 1136 queue_work(priv->gve_wq, &priv->service_task); 1137 } 1138 1139 static void gve_reset_and_teardown(struct gve_priv *priv, bool was_up); 1140 static int gve_reset_recovery(struct gve_priv *priv, bool was_up); 1141 static void gve_turndown(struct gve_priv *priv); 1142 static void gve_turnup(struct gve_priv *priv); 1143 1144 static int gve_reg_xdp_info(struct gve_priv *priv, struct net_device *dev) 1145 { 1146 struct napi_struct *napi; 1147 struct gve_rx_ring *rx; 1148 int err = 0; 1149 int i, j; 1150 u32 tx_qid; 1151 1152 if (!priv->tx_cfg.num_xdp_queues) 1153 return 0; 1154 1155 for (i = 0; i < priv->rx_cfg.num_queues; i++) { 1156 rx = &priv->rx[i]; 1157 napi = &priv->ntfy_blocks[rx->ntfy_id].napi; 1158 1159 err = xdp_rxq_info_reg(&rx->xdp_rxq, dev, i, 1160 napi->napi_id); 1161 if (err) 1162 goto err; 1163 if (gve_is_qpl(priv)) 1164 err = xdp_rxq_info_reg_mem_model(&rx->xdp_rxq, 1165 MEM_TYPE_PAGE_SHARED, 1166 NULL); 1167 else 1168 err = xdp_rxq_info_reg_mem_model(&rx->xdp_rxq, 1169 MEM_TYPE_PAGE_POOL, 1170 rx->dqo.page_pool); 1171 if (err) 1172 goto err; 1173 rx->xsk_pool = xsk_get_pool_from_qid(dev, i); 1174 if (rx->xsk_pool) { 1175 err = xdp_rxq_info_reg(&rx->xsk_rxq, dev, i, 1176 napi->napi_id); 1177 if (err) 1178 goto err; 1179 err = xdp_rxq_info_reg_mem_model(&rx->xsk_rxq, 1180 MEM_TYPE_XSK_BUFF_POOL, NULL); 1181 if (err) 1182 goto err; 1183 xsk_pool_set_rxq_info(rx->xsk_pool, 1184 &rx->xsk_rxq); 1185 } 1186 } 1187 1188 for (i = 0; i < priv->tx_cfg.num_xdp_queues; i++) { 1189 tx_qid = gve_xdp_tx_queue_id(priv, i); 1190 priv->tx[tx_qid].xsk_pool = xsk_get_pool_from_qid(dev, i); 1191 } 1192 return 0; 1193 1194 err: 1195 for (j = i; j >= 0; j--) { 1196 rx = &priv->rx[j]; 1197 if (xdp_rxq_info_is_reg(&rx->xdp_rxq)) 1198 xdp_rxq_info_unreg(&rx->xdp_rxq); 1199 if (xdp_rxq_info_is_reg(&rx->xsk_rxq)) 1200 xdp_rxq_info_unreg(&rx->xsk_rxq); 1201 } 1202 return err; 1203 } 1204 1205 static void gve_unreg_xdp_info(struct gve_priv *priv) 1206 { 1207 int i, tx_qid; 1208 1209 if (!priv->tx_cfg.num_xdp_queues || !priv->rx || !priv->tx) 1210 return; 1211 1212 for (i = 0; i < priv->rx_cfg.num_queues; i++) { 1213 struct gve_rx_ring *rx = &priv->rx[i]; 1214 1215 xdp_rxq_info_unreg(&rx->xdp_rxq); 1216 if (rx->xsk_pool) { 1217 xdp_rxq_info_unreg(&rx->xsk_rxq); 1218 rx->xsk_pool = NULL; 1219 } 1220 } 1221 1222 for (i = 0; i < priv->tx_cfg.num_xdp_queues; i++) { 1223 tx_qid = gve_xdp_tx_queue_id(priv, i); 1224 priv->tx[tx_qid].xsk_pool = NULL; 1225 } 1226 } 1227 1228 static void gve_drain_page_cache(struct gve_priv *priv) 1229 { 1230 int i; 1231 1232 for (i = 0; i < priv->rx_cfg.num_queues; i++) 1233 page_frag_cache_drain(&priv->rx[i].page_cache); 1234 } 1235 1236 static void gve_rx_get_curr_alloc_cfg(struct gve_priv *priv, 1237 struct gve_rx_alloc_rings_cfg *cfg) 1238 { 1239 cfg->qcfg_rx = &priv->rx_cfg; 1240 cfg->qcfg_tx = &priv->tx_cfg; 1241 cfg->raw_addressing = !gve_is_qpl(priv); 1242 cfg->enable_header_split = priv->header_split_enabled; 1243 cfg->ring_size = priv->rx_desc_cnt; 1244 cfg->packet_buffer_size = priv->rx_cfg.packet_buffer_size; 1245 cfg->rx = priv->rx; 1246 cfg->xdp = !!cfg->qcfg_tx->num_xdp_queues; 1247 } 1248 1249 void gve_get_curr_alloc_cfgs(struct gve_priv *priv, 1250 struct gve_tx_alloc_rings_cfg *tx_alloc_cfg, 1251 struct gve_rx_alloc_rings_cfg *rx_alloc_cfg) 1252 { 1253 gve_tx_get_curr_alloc_cfg(priv, tx_alloc_cfg); 1254 gve_rx_get_curr_alloc_cfg(priv, rx_alloc_cfg); 1255 } 1256 1257 static void gve_rx_start_ring(struct gve_priv *priv, int i) 1258 { 1259 if (gve_is_gqi(priv)) 1260 gve_rx_start_ring_gqi(priv, i); 1261 else 1262 gve_rx_start_ring_dqo(priv, i); 1263 } 1264 1265 static void gve_rx_start_rings(struct gve_priv *priv, int num_rings) 1266 { 1267 int i; 1268 1269 for (i = 0; i < num_rings; i++) 1270 gve_rx_start_ring(priv, i); 1271 } 1272 1273 static void gve_rx_stop_ring(struct gve_priv *priv, int i) 1274 { 1275 if (gve_is_gqi(priv)) 1276 gve_rx_stop_ring_gqi(priv, i); 1277 else 1278 gve_rx_stop_ring_dqo(priv, i); 1279 } 1280 1281 static void gve_rx_stop_rings(struct gve_priv *priv, int num_rings) 1282 { 1283 int i; 1284 1285 if (!priv->rx) 1286 return; 1287 1288 for (i = 0; i < num_rings; i++) 1289 gve_rx_stop_ring(priv, i); 1290 } 1291 1292 static void gve_queues_mem_remove(struct gve_priv *priv) 1293 { 1294 struct gve_tx_alloc_rings_cfg tx_alloc_cfg = {0}; 1295 struct gve_rx_alloc_rings_cfg rx_alloc_cfg = {0}; 1296 1297 gve_get_curr_alloc_cfgs(priv, &tx_alloc_cfg, &rx_alloc_cfg); 1298 gve_queues_mem_free(priv, &tx_alloc_cfg, &rx_alloc_cfg); 1299 priv->tx = NULL; 1300 priv->rx = NULL; 1301 } 1302 1303 /* The passed-in queue memory is stored into priv and the queues are made live. 1304 * No memory is allocated. Passed-in memory is freed on errors. 1305 */ 1306 static int gve_queues_start(struct gve_priv *priv, 1307 struct gve_tx_alloc_rings_cfg *tx_alloc_cfg, 1308 struct gve_rx_alloc_rings_cfg *rx_alloc_cfg) 1309 { 1310 struct net_device *dev = priv->dev; 1311 int err; 1312 1313 /* Record new resources into priv */ 1314 priv->tx = tx_alloc_cfg->tx; 1315 priv->rx = rx_alloc_cfg->rx; 1316 1317 /* Record new configs into priv */ 1318 priv->tx_cfg = *tx_alloc_cfg->qcfg; 1319 priv->tx_cfg.num_xdp_queues = tx_alloc_cfg->num_xdp_rings; 1320 priv->rx_cfg = *rx_alloc_cfg->qcfg_rx; 1321 priv->tx_desc_cnt = tx_alloc_cfg->ring_size; 1322 priv->rx_desc_cnt = rx_alloc_cfg->ring_size; 1323 1324 gve_tx_start_rings(priv, gve_num_tx_queues(priv)); 1325 gve_rx_start_rings(priv, rx_alloc_cfg->qcfg_rx->num_queues); 1326 gve_init_sync_stats(priv); 1327 1328 err = netif_set_real_num_tx_queues(dev, priv->tx_cfg.num_queues); 1329 if (err) 1330 goto stop_and_free_rings; 1331 err = netif_set_real_num_rx_queues(dev, priv->rx_cfg.num_queues); 1332 if (err) 1333 goto stop_and_free_rings; 1334 1335 err = gve_reg_xdp_info(priv, dev); 1336 if (err) 1337 goto stop_and_free_rings; 1338 1339 if (rx_alloc_cfg->reset_rss) { 1340 err = gve_init_rss_config(priv, priv->rx_cfg.num_queues); 1341 if (err) 1342 goto reset; 1343 } 1344 1345 err = gve_register_qpls(priv); 1346 if (err) 1347 goto reset; 1348 1349 priv->header_split_enabled = rx_alloc_cfg->enable_header_split; 1350 priv->rx_cfg.packet_buffer_size = rx_alloc_cfg->packet_buffer_size; 1351 1352 err = gve_create_rings(priv); 1353 if (err) 1354 goto reset; 1355 1356 gve_set_device_rings_ok(priv); 1357 1358 if (gve_get_report_stats(priv)) 1359 mod_timer(&priv->stats_report_timer, 1360 round_jiffies(jiffies + 1361 msecs_to_jiffies(priv->stats_report_timer_period))); 1362 1363 gve_turnup(priv); 1364 queue_work(priv->gve_wq, &priv->service_task); 1365 priv->interface_up_cnt++; 1366 return 0; 1367 1368 reset: 1369 if (gve_get_reset_in_progress(priv)) 1370 goto stop_and_free_rings; 1371 gve_reset_and_teardown(priv, true); 1372 /* if this fails there is nothing we can do so just ignore the return */ 1373 gve_reset_recovery(priv, false); 1374 /* return the original error */ 1375 return err; 1376 stop_and_free_rings: 1377 gve_tx_stop_rings(priv, gve_num_tx_queues(priv)); 1378 gve_rx_stop_rings(priv, priv->rx_cfg.num_queues); 1379 gve_queues_mem_remove(priv); 1380 return err; 1381 } 1382 1383 static int gve_open(struct net_device *dev) 1384 { 1385 struct gve_tx_alloc_rings_cfg tx_alloc_cfg = {0}; 1386 struct gve_rx_alloc_rings_cfg rx_alloc_cfg = {0}; 1387 struct gve_priv *priv = netdev_priv(dev); 1388 int err; 1389 1390 gve_get_curr_alloc_cfgs(priv, &tx_alloc_cfg, &rx_alloc_cfg); 1391 1392 err = gve_queues_mem_alloc(priv, &tx_alloc_cfg, &rx_alloc_cfg); 1393 if (err) 1394 return err; 1395 1396 /* No need to free on error: ownership of resources is lost after 1397 * calling gve_queues_start. 1398 */ 1399 err = gve_queues_start(priv, &tx_alloc_cfg, &rx_alloc_cfg); 1400 if (err) 1401 return err; 1402 1403 return 0; 1404 } 1405 1406 static int gve_queues_stop(struct gve_priv *priv) 1407 { 1408 int err; 1409 1410 netif_carrier_off(priv->dev); 1411 if (gve_get_device_rings_ok(priv)) { 1412 gve_turndown(priv); 1413 gve_drain_page_cache(priv); 1414 err = gve_destroy_rings(priv); 1415 if (err) 1416 goto err; 1417 err = gve_unregister_qpls(priv); 1418 if (err) 1419 goto err; 1420 gve_clear_device_rings_ok(priv); 1421 } 1422 timer_delete_sync(&priv->stats_report_timer); 1423 1424 gve_unreg_xdp_info(priv); 1425 1426 gve_tx_stop_rings(priv, gve_num_tx_queues(priv)); 1427 gve_rx_stop_rings(priv, priv->rx_cfg.num_queues); 1428 1429 priv->interface_down_cnt++; 1430 return 0; 1431 1432 err: 1433 /* This must have been called from a reset due to the rtnl lock 1434 * so just return at this point. 1435 */ 1436 if (gve_get_reset_in_progress(priv)) 1437 return err; 1438 /* Otherwise reset before returning */ 1439 gve_reset_and_teardown(priv, true); 1440 return gve_reset_recovery(priv, false); 1441 } 1442 1443 static int gve_close(struct net_device *dev) 1444 { 1445 struct gve_priv *priv = netdev_priv(dev); 1446 int err; 1447 1448 err = gve_queues_stop(priv); 1449 if (err) 1450 return err; 1451 1452 gve_queues_mem_remove(priv); 1453 return 0; 1454 } 1455 1456 static void gve_handle_link_status(struct gve_priv *priv, bool link_status) 1457 { 1458 if (!gve_get_napi_enabled(priv)) 1459 return; 1460 1461 if (link_status == netif_carrier_ok(priv->dev)) 1462 return; 1463 1464 if (link_status) { 1465 netdev_info(priv->dev, "Device link is up.\n"); 1466 netif_carrier_on(priv->dev); 1467 } else { 1468 netdev_info(priv->dev, "Device link is down.\n"); 1469 netif_carrier_off(priv->dev); 1470 } 1471 } 1472 1473 static int gve_configure_rings_xdp(struct gve_priv *priv, 1474 u16 num_xdp_rings) 1475 { 1476 struct gve_tx_alloc_rings_cfg tx_alloc_cfg = {0}; 1477 struct gve_rx_alloc_rings_cfg rx_alloc_cfg = {0}; 1478 1479 gve_get_curr_alloc_cfgs(priv, &tx_alloc_cfg, &rx_alloc_cfg); 1480 tx_alloc_cfg.num_xdp_rings = num_xdp_rings; 1481 1482 rx_alloc_cfg.xdp = !!num_xdp_rings; 1483 return gve_adjust_config(priv, &tx_alloc_cfg, &rx_alloc_cfg); 1484 } 1485 1486 static int gve_set_xdp(struct gve_priv *priv, struct bpf_prog *prog, 1487 struct netlink_ext_ack *extack) 1488 { 1489 struct bpf_prog *old_prog; 1490 int err = 0; 1491 u32 status; 1492 1493 old_prog = READ_ONCE(priv->xdp_prog); 1494 if (!netif_running(priv->dev)) { 1495 WRITE_ONCE(priv->xdp_prog, prog); 1496 if (old_prog) 1497 bpf_prog_put(old_prog); 1498 1499 /* Update priv XDP queue configuration */ 1500 priv->tx_cfg.num_xdp_queues = priv->xdp_prog ? 1501 priv->rx_cfg.num_queues : 0; 1502 return 0; 1503 } 1504 1505 if (!old_prog && prog) 1506 err = gve_configure_rings_xdp(priv, priv->rx_cfg.num_queues); 1507 else if (old_prog && !prog) 1508 err = gve_configure_rings_xdp(priv, 0); 1509 1510 if (err) 1511 goto out; 1512 1513 WRITE_ONCE(priv->xdp_prog, prog); 1514 if (old_prog) 1515 bpf_prog_put(old_prog); 1516 1517 out: 1518 status = ioread32be(&priv->reg_bar0->device_status); 1519 gve_handle_link_status(priv, GVE_DEVICE_STATUS_LINK_STATUS_MASK & status); 1520 return err; 1521 } 1522 1523 static int gve_xdp_xmit(struct net_device *dev, int n, 1524 struct xdp_frame **frames, u32 flags) 1525 { 1526 struct gve_priv *priv = netdev_priv(dev); 1527 1528 if (priv->queue_format == GVE_GQI_QPL_FORMAT) 1529 return gve_xdp_xmit_gqi(dev, n, frames, flags); 1530 else if (priv->queue_format == GVE_DQO_RDA_FORMAT) 1531 return gve_xdp_xmit_dqo(dev, n, frames, flags); 1532 1533 return -EOPNOTSUPP; 1534 } 1535 1536 static int gve_xsk_pool_enable(struct net_device *dev, 1537 struct xsk_buff_pool *pool, 1538 u16 qid) 1539 { 1540 struct gve_priv *priv = netdev_priv(dev); 1541 struct napi_struct *napi; 1542 struct gve_rx_ring *rx; 1543 int tx_qid; 1544 int err; 1545 1546 if (qid >= priv->rx_cfg.num_queues) { 1547 dev_err(&priv->pdev->dev, "xsk pool invalid qid %d", qid); 1548 return -EINVAL; 1549 } 1550 if (xsk_pool_get_rx_frame_size(pool) < 1551 priv->dev->max_mtu + sizeof(struct ethhdr)) { 1552 dev_err(&priv->pdev->dev, "xsk pool frame_len too small"); 1553 return -EINVAL; 1554 } 1555 1556 err = xsk_pool_dma_map(pool, &priv->pdev->dev, 1557 DMA_ATTR_SKIP_CPU_SYNC | DMA_ATTR_WEAK_ORDERING); 1558 if (err) 1559 return err; 1560 1561 /* If XDP prog is not installed or interface is down, return. */ 1562 if (!priv->xdp_prog || !netif_running(dev)) 1563 return 0; 1564 1565 rx = &priv->rx[qid]; 1566 napi = &priv->ntfy_blocks[rx->ntfy_id].napi; 1567 err = xdp_rxq_info_reg(&rx->xsk_rxq, dev, qid, napi->napi_id); 1568 if (err) 1569 goto err; 1570 1571 err = xdp_rxq_info_reg_mem_model(&rx->xsk_rxq, 1572 MEM_TYPE_XSK_BUFF_POOL, NULL); 1573 if (err) 1574 goto err; 1575 1576 xsk_pool_set_rxq_info(pool, &rx->xsk_rxq); 1577 rx->xsk_pool = pool; 1578 1579 tx_qid = gve_xdp_tx_queue_id(priv, qid); 1580 priv->tx[tx_qid].xsk_pool = pool; 1581 1582 return 0; 1583 err: 1584 if (xdp_rxq_info_is_reg(&rx->xsk_rxq)) 1585 xdp_rxq_info_unreg(&rx->xsk_rxq); 1586 1587 xsk_pool_dma_unmap(pool, 1588 DMA_ATTR_SKIP_CPU_SYNC | DMA_ATTR_WEAK_ORDERING); 1589 return err; 1590 } 1591 1592 static int gve_xsk_pool_disable(struct net_device *dev, 1593 u16 qid) 1594 { 1595 struct gve_priv *priv = netdev_priv(dev); 1596 struct napi_struct *napi_rx; 1597 struct napi_struct *napi_tx; 1598 struct xsk_buff_pool *pool; 1599 int tx_qid; 1600 1601 pool = xsk_get_pool_from_qid(dev, qid); 1602 if (!pool) 1603 return -EINVAL; 1604 if (qid >= priv->rx_cfg.num_queues) 1605 return -EINVAL; 1606 1607 /* If XDP prog is not installed or interface is down, unmap DMA and 1608 * return. 1609 */ 1610 if (!priv->xdp_prog || !netif_running(dev)) 1611 goto done; 1612 1613 napi_rx = &priv->ntfy_blocks[priv->rx[qid].ntfy_id].napi; 1614 napi_disable(napi_rx); /* make sure current rx poll is done */ 1615 1616 tx_qid = gve_xdp_tx_queue_id(priv, qid); 1617 napi_tx = &priv->ntfy_blocks[priv->tx[tx_qid].ntfy_id].napi; 1618 napi_disable(napi_tx); /* make sure current tx poll is done */ 1619 1620 priv->rx[qid].xsk_pool = NULL; 1621 xdp_rxq_info_unreg(&priv->rx[qid].xsk_rxq); 1622 priv->tx[tx_qid].xsk_pool = NULL; 1623 smp_mb(); /* Make sure it is visible to the workers on datapath */ 1624 1625 napi_enable(napi_rx); 1626 if (gve_rx_work_pending(&priv->rx[qid])) 1627 napi_schedule(napi_rx); 1628 1629 napi_enable(napi_tx); 1630 if (gve_tx_clean_pending(priv, &priv->tx[tx_qid])) 1631 napi_schedule(napi_tx); 1632 1633 done: 1634 xsk_pool_dma_unmap(pool, 1635 DMA_ATTR_SKIP_CPU_SYNC | DMA_ATTR_WEAK_ORDERING); 1636 return 0; 1637 } 1638 1639 static int gve_xsk_wakeup(struct net_device *dev, u32 queue_id, u32 flags) 1640 { 1641 struct gve_priv *priv = netdev_priv(dev); 1642 struct napi_struct *napi; 1643 1644 if (!gve_get_napi_enabled(priv)) 1645 return -ENETDOWN; 1646 1647 if (queue_id >= priv->rx_cfg.num_queues || !priv->xdp_prog) 1648 return -EINVAL; 1649 1650 napi = &priv->ntfy_blocks[gve_rx_idx_to_ntfy(priv, queue_id)].napi; 1651 if (!napi_if_scheduled_mark_missed(napi)) { 1652 /* Call local_bh_enable to trigger SoftIRQ processing */ 1653 local_bh_disable(); 1654 napi_schedule(napi); 1655 local_bh_enable(); 1656 } 1657 1658 return 0; 1659 } 1660 1661 static int verify_xdp_configuration(struct net_device *dev) 1662 { 1663 struct gve_priv *priv = netdev_priv(dev); 1664 u16 max_xdp_mtu; 1665 1666 if (dev->features & NETIF_F_LRO) { 1667 netdev_warn(dev, "XDP is not supported when LRO is on.\n"); 1668 return -EOPNOTSUPP; 1669 } 1670 1671 if (priv->header_split_enabled) { 1672 netdev_warn(dev, "XDP is not supported when header-data split is enabled.\n"); 1673 return -EOPNOTSUPP; 1674 } 1675 1676 max_xdp_mtu = priv->rx_cfg.packet_buffer_size - sizeof(struct ethhdr); 1677 if (priv->queue_format == GVE_GQI_QPL_FORMAT) 1678 max_xdp_mtu -= GVE_RX_PAD; 1679 1680 if (dev->mtu > max_xdp_mtu) { 1681 netdev_warn(dev, "XDP is not supported for mtu %d.\n", 1682 dev->mtu); 1683 return -EOPNOTSUPP; 1684 } 1685 1686 if (priv->rx_cfg.num_queues != priv->tx_cfg.num_queues || 1687 (2 * priv->tx_cfg.num_queues > priv->tx_cfg.max_queues)) { 1688 netdev_warn(dev, "XDP load failed: The number of configured RX queues %d should be equal to the number of configured TX queues %d and the number of configured RX/TX queues should be less than or equal to half the maximum number of RX/TX queues %d", 1689 priv->rx_cfg.num_queues, 1690 priv->tx_cfg.num_queues, 1691 priv->tx_cfg.max_queues); 1692 return -EINVAL; 1693 } 1694 return 0; 1695 } 1696 1697 static int gve_xdp(struct net_device *dev, struct netdev_bpf *xdp) 1698 { 1699 struct gve_priv *priv = netdev_priv(dev); 1700 int err; 1701 1702 err = verify_xdp_configuration(dev); 1703 if (err) 1704 return err; 1705 switch (xdp->command) { 1706 case XDP_SETUP_PROG: 1707 return gve_set_xdp(priv, xdp->prog, xdp->extack); 1708 case XDP_SETUP_XSK_POOL: 1709 if (xdp->xsk.pool) 1710 return gve_xsk_pool_enable(dev, xdp->xsk.pool, xdp->xsk.queue_id); 1711 else 1712 return gve_xsk_pool_disable(dev, xdp->xsk.queue_id); 1713 default: 1714 return -EINVAL; 1715 } 1716 } 1717 1718 int gve_init_rss_config(struct gve_priv *priv, u16 num_queues) 1719 { 1720 struct gve_rss_config *rss_config = &priv->rss_config; 1721 struct ethtool_rxfh_param rxfh = {0}; 1722 u16 i; 1723 1724 if (!priv->cache_rss_config) 1725 return 0; 1726 1727 for (i = 0; i < priv->rss_lut_size; i++) 1728 rss_config->hash_lut[i] = 1729 ethtool_rxfh_indir_default(i, num_queues); 1730 1731 netdev_rss_key_fill(rss_config->hash_key, priv->rss_key_size); 1732 1733 rxfh.hfunc = ETH_RSS_HASH_TOP; 1734 1735 return gve_adminq_configure_rss(priv, &rxfh); 1736 } 1737 1738 int gve_flow_rules_reset(struct gve_priv *priv) 1739 { 1740 if (!priv->max_flow_rules) 1741 return 0; 1742 1743 return gve_adminq_reset_flow_rules(priv); 1744 } 1745 1746 int gve_adjust_config(struct gve_priv *priv, 1747 struct gve_tx_alloc_rings_cfg *tx_alloc_cfg, 1748 struct gve_rx_alloc_rings_cfg *rx_alloc_cfg) 1749 { 1750 int err; 1751 1752 /* Allocate resources for the new configuration */ 1753 err = gve_queues_mem_alloc(priv, tx_alloc_cfg, rx_alloc_cfg); 1754 if (err) { 1755 netif_err(priv, drv, priv->dev, 1756 "Adjust config failed to alloc new queues"); 1757 return err; 1758 } 1759 1760 /* Teardown the device and free existing resources */ 1761 err = gve_close(priv->dev); 1762 if (err) { 1763 netif_err(priv, drv, priv->dev, 1764 "Adjust config failed to close old queues"); 1765 gve_queues_mem_free(priv, tx_alloc_cfg, rx_alloc_cfg); 1766 return err; 1767 } 1768 1769 /* Bring the device back up again with the new resources. */ 1770 err = gve_queues_start(priv, tx_alloc_cfg, rx_alloc_cfg); 1771 if (err) { 1772 netif_err(priv, drv, priv->dev, 1773 "Adjust config failed to start new queues, !!! DISABLING ALL QUEUES !!!\n"); 1774 /* No need to free on error: ownership of resources is lost after 1775 * calling gve_queues_start. 1776 */ 1777 gve_turndown(priv); 1778 return err; 1779 } 1780 1781 return 0; 1782 } 1783 1784 int gve_adjust_queues(struct gve_priv *priv, 1785 struct gve_rx_queue_config new_rx_config, 1786 struct gve_tx_queue_config new_tx_config, 1787 bool reset_rss) 1788 { 1789 struct gve_tx_alloc_rings_cfg tx_alloc_cfg = {0}; 1790 struct gve_rx_alloc_rings_cfg rx_alloc_cfg = {0}; 1791 int err; 1792 1793 gve_get_curr_alloc_cfgs(priv, &tx_alloc_cfg, &rx_alloc_cfg); 1794 1795 /* Relay the new config from ethtool */ 1796 tx_alloc_cfg.qcfg = &new_tx_config; 1797 rx_alloc_cfg.qcfg_tx = &new_tx_config; 1798 rx_alloc_cfg.qcfg_rx = &new_rx_config; 1799 rx_alloc_cfg.reset_rss = reset_rss; 1800 1801 if (netif_running(priv->dev)) { 1802 err = gve_adjust_config(priv, &tx_alloc_cfg, &rx_alloc_cfg); 1803 return err; 1804 } 1805 /* Set the config for the next up. */ 1806 if (reset_rss) { 1807 err = gve_init_rss_config(priv, new_rx_config.num_queues); 1808 if (err) 1809 return err; 1810 } 1811 priv->tx_cfg = new_tx_config; 1812 priv->rx_cfg = new_rx_config; 1813 1814 return 0; 1815 } 1816 1817 static void gve_turndown(struct gve_priv *priv) 1818 { 1819 int idx; 1820 1821 if (netif_carrier_ok(priv->dev)) 1822 netif_carrier_off(priv->dev); 1823 1824 if (!gve_get_napi_enabled(priv)) 1825 return; 1826 1827 /* Disable napi to prevent more work from coming in */ 1828 for (idx = 0; idx < gve_num_tx_queues(priv); idx++) { 1829 int ntfy_idx = gve_tx_idx_to_ntfy(priv, idx); 1830 struct gve_notify_block *block = &priv->ntfy_blocks[ntfy_idx]; 1831 1832 if (!gve_tx_was_added_to_block(priv, idx)) 1833 continue; 1834 1835 if (idx < priv->tx_cfg.num_queues) 1836 netif_queue_set_napi(priv->dev, idx, 1837 NETDEV_QUEUE_TYPE_TX, NULL); 1838 1839 napi_disable_locked(&block->napi); 1840 } 1841 for (idx = 0; idx < priv->rx_cfg.num_queues; idx++) { 1842 int ntfy_idx = gve_rx_idx_to_ntfy(priv, idx); 1843 struct gve_notify_block *block = &priv->ntfy_blocks[ntfy_idx]; 1844 1845 if (!gve_rx_was_added_to_block(priv, idx)) 1846 continue; 1847 1848 netif_queue_set_napi(priv->dev, idx, NETDEV_QUEUE_TYPE_RX, 1849 NULL); 1850 napi_disable_locked(&block->napi); 1851 } 1852 1853 /* Stop tx queues */ 1854 netif_tx_disable(priv->dev); 1855 1856 xdp_features_clear_redirect_target_locked(priv->dev); 1857 1858 gve_clear_napi_enabled(priv); 1859 gve_clear_report_stats(priv); 1860 1861 /* Make sure that all traffic is finished processing. */ 1862 synchronize_net(); 1863 } 1864 1865 static void gve_turnup(struct gve_priv *priv) 1866 { 1867 int idx; 1868 1869 /* Start the tx queues */ 1870 netif_tx_start_all_queues(priv->dev); 1871 1872 /* Enable napi and unmask interrupts for all queues */ 1873 for (idx = 0; idx < gve_num_tx_queues(priv); idx++) { 1874 int ntfy_idx = gve_tx_idx_to_ntfy(priv, idx); 1875 struct gve_notify_block *block = &priv->ntfy_blocks[ntfy_idx]; 1876 1877 if (!gve_tx_was_added_to_block(priv, idx)) 1878 continue; 1879 1880 napi_enable_locked(&block->napi); 1881 1882 if (idx < priv->tx_cfg.num_queues) 1883 netif_queue_set_napi(priv->dev, idx, 1884 NETDEV_QUEUE_TYPE_TX, 1885 &block->napi); 1886 1887 if (gve_is_gqi(priv)) { 1888 iowrite32be(0, gve_irq_doorbell(priv, block)); 1889 } else { 1890 gve_set_itr_coalesce_usecs_dqo(priv, block, 1891 priv->tx_coalesce_usecs); 1892 } 1893 1894 /* Any descs written by the NIC before this barrier will be 1895 * handled by the one-off napi schedule below. Whereas any 1896 * descs after the barrier will generate interrupts. 1897 */ 1898 mb(); 1899 napi_schedule(&block->napi); 1900 } 1901 for (idx = 0; idx < priv->rx_cfg.num_queues; idx++) { 1902 int ntfy_idx = gve_rx_idx_to_ntfy(priv, idx); 1903 struct gve_notify_block *block = &priv->ntfy_blocks[ntfy_idx]; 1904 1905 if (!gve_rx_was_added_to_block(priv, idx)) 1906 continue; 1907 1908 napi_enable_locked(&block->napi); 1909 netif_queue_set_napi(priv->dev, idx, NETDEV_QUEUE_TYPE_RX, 1910 &block->napi); 1911 1912 if (gve_is_gqi(priv)) { 1913 iowrite32be(0, gve_irq_doorbell(priv, block)); 1914 } else { 1915 gve_set_itr_coalesce_usecs_dqo(priv, block, 1916 priv->rx_coalesce_usecs); 1917 } 1918 1919 /* Any descs written by the NIC before this barrier will be 1920 * handled by the one-off napi schedule below. Whereas any 1921 * descs after the barrier will generate interrupts. 1922 */ 1923 mb(); 1924 napi_schedule(&block->napi); 1925 } 1926 1927 if (priv->tx_cfg.num_xdp_queues && gve_supports_xdp_xmit(priv)) 1928 xdp_features_set_redirect_target_locked(priv->dev, false); 1929 1930 gve_set_napi_enabled(priv); 1931 } 1932 1933 static void gve_turnup_and_check_status(struct gve_priv *priv) 1934 { 1935 u32 status; 1936 1937 gve_turnup(priv); 1938 status = ioread32be(&priv->reg_bar0->device_status); 1939 gve_handle_link_status(priv, GVE_DEVICE_STATUS_LINK_STATUS_MASK & status); 1940 } 1941 1942 static void gve_tx_timeout(struct net_device *dev, unsigned int txqueue) 1943 { 1944 struct gve_notify_block *block; 1945 struct gve_tx_ring *tx = NULL; 1946 struct gve_priv *priv; 1947 u32 last_nic_done; 1948 u32 current_time; 1949 u32 ntfy_idx; 1950 1951 netdev_info(dev, "Timeout on tx queue, %d", txqueue); 1952 priv = netdev_priv(dev); 1953 if (txqueue > priv->tx_cfg.num_queues) 1954 goto reset; 1955 1956 ntfy_idx = gve_tx_idx_to_ntfy(priv, txqueue); 1957 if (ntfy_idx >= priv->num_ntfy_blks) 1958 goto reset; 1959 1960 block = &priv->ntfy_blocks[ntfy_idx]; 1961 tx = block->tx; 1962 1963 current_time = jiffies_to_msecs(jiffies); 1964 if (tx->last_kick_msec + MIN_TX_TIMEOUT_GAP > current_time) 1965 goto reset; 1966 1967 /* Check to see if there are missed completions, which will allow us to 1968 * kick the queue. 1969 */ 1970 last_nic_done = gve_tx_load_event_counter(priv, tx); 1971 if (last_nic_done - tx->done) { 1972 netdev_info(dev, "Kicking queue %d", txqueue); 1973 iowrite32be(GVE_IRQ_MASK, gve_irq_doorbell(priv, block)); 1974 napi_schedule(&block->napi); 1975 tx->last_kick_msec = current_time; 1976 goto out; 1977 } // Else reset. 1978 1979 reset: 1980 gve_schedule_reset(priv); 1981 1982 out: 1983 if (tx) 1984 tx->queue_timeout++; 1985 priv->tx_timeo_cnt++; 1986 } 1987 1988 u16 gve_get_pkt_buf_size(const struct gve_priv *priv, bool enable_hsplit) 1989 { 1990 if (enable_hsplit && priv->max_rx_buffer_size >= GVE_MAX_RX_BUFFER_SIZE) 1991 return GVE_MAX_RX_BUFFER_SIZE; 1992 else 1993 return GVE_DEFAULT_RX_BUFFER_SIZE; 1994 } 1995 1996 /* Header split is only supported on DQ RDA queue format. If XDP is enabled, 1997 * header split is not allowed. 1998 */ 1999 bool gve_header_split_supported(const struct gve_priv *priv) 2000 { 2001 return priv->header_buf_size && 2002 priv->queue_format == GVE_DQO_RDA_FORMAT && !priv->xdp_prog; 2003 } 2004 2005 int gve_set_hsplit_config(struct gve_priv *priv, u8 tcp_data_split) 2006 { 2007 struct gve_tx_alloc_rings_cfg tx_alloc_cfg = {0}; 2008 struct gve_rx_alloc_rings_cfg rx_alloc_cfg = {0}; 2009 bool enable_hdr_split; 2010 int err = 0; 2011 2012 if (tcp_data_split == ETHTOOL_TCP_DATA_SPLIT_UNKNOWN) 2013 return 0; 2014 2015 if (!gve_header_split_supported(priv)) { 2016 dev_err(&priv->pdev->dev, "Header-split not supported\n"); 2017 return -EOPNOTSUPP; 2018 } 2019 2020 if (tcp_data_split == ETHTOOL_TCP_DATA_SPLIT_ENABLED) 2021 enable_hdr_split = true; 2022 else 2023 enable_hdr_split = false; 2024 2025 if (enable_hdr_split == priv->header_split_enabled) 2026 return 0; 2027 2028 gve_get_curr_alloc_cfgs(priv, &tx_alloc_cfg, &rx_alloc_cfg); 2029 2030 rx_alloc_cfg.enable_header_split = enable_hdr_split; 2031 rx_alloc_cfg.packet_buffer_size = gve_get_pkt_buf_size(priv, enable_hdr_split); 2032 2033 if (netif_running(priv->dev)) 2034 err = gve_adjust_config(priv, &tx_alloc_cfg, &rx_alloc_cfg); 2035 return err; 2036 } 2037 2038 static int gve_set_features(struct net_device *netdev, 2039 netdev_features_t features) 2040 { 2041 const netdev_features_t orig_features = netdev->features; 2042 struct gve_tx_alloc_rings_cfg tx_alloc_cfg = {0}; 2043 struct gve_rx_alloc_rings_cfg rx_alloc_cfg = {0}; 2044 struct gve_priv *priv = netdev_priv(netdev); 2045 int err; 2046 2047 gve_get_curr_alloc_cfgs(priv, &tx_alloc_cfg, &rx_alloc_cfg); 2048 2049 if ((netdev->features & NETIF_F_LRO) != (features & NETIF_F_LRO)) { 2050 netdev->features ^= NETIF_F_LRO; 2051 if (priv->xdp_prog && (netdev->features & NETIF_F_LRO)) { 2052 netdev_warn(netdev, 2053 "XDP is not supported when LRO is on.\n"); 2054 err = -EOPNOTSUPP; 2055 goto revert_features; 2056 } 2057 if (netif_running(netdev)) { 2058 err = gve_adjust_config(priv, &tx_alloc_cfg, &rx_alloc_cfg); 2059 if (err) 2060 goto revert_features; 2061 } 2062 } 2063 if ((netdev->features & NETIF_F_NTUPLE) && !(features & NETIF_F_NTUPLE)) { 2064 err = gve_flow_rules_reset(priv); 2065 if (err) 2066 goto revert_features; 2067 } 2068 2069 return 0; 2070 2071 revert_features: 2072 netdev->features = orig_features; 2073 return err; 2074 } 2075 2076 static int gve_get_ts_config(struct net_device *dev, 2077 struct kernel_hwtstamp_config *kernel_config) 2078 { 2079 struct gve_priv *priv = netdev_priv(dev); 2080 2081 *kernel_config = priv->ts_config; 2082 return 0; 2083 } 2084 2085 static int gve_set_ts_config(struct net_device *dev, 2086 struct kernel_hwtstamp_config *kernel_config, 2087 struct netlink_ext_ack *extack) 2088 { 2089 struct gve_priv *priv = netdev_priv(dev); 2090 2091 if (kernel_config->tx_type != HWTSTAMP_TX_OFF) { 2092 NL_SET_ERR_MSG_MOD(extack, "TX timestamping is not supported"); 2093 return -ERANGE; 2094 } 2095 2096 if (kernel_config->rx_filter != HWTSTAMP_FILTER_NONE) { 2097 if (!priv->nic_ts_report) { 2098 NL_SET_ERR_MSG_MOD(extack, 2099 "RX timestamping is not supported"); 2100 kernel_config->rx_filter = HWTSTAMP_FILTER_NONE; 2101 return -EOPNOTSUPP; 2102 } 2103 2104 kernel_config->rx_filter = HWTSTAMP_FILTER_ALL; 2105 gve_clock_nic_ts_read(priv); 2106 ptp_schedule_worker(priv->ptp->clock, 0); 2107 } else { 2108 ptp_cancel_worker_sync(priv->ptp->clock); 2109 } 2110 2111 priv->ts_config.rx_filter = kernel_config->rx_filter; 2112 2113 return 0; 2114 } 2115 2116 static const struct net_device_ops gve_netdev_ops = { 2117 .ndo_start_xmit = gve_start_xmit, 2118 .ndo_features_check = gve_features_check, 2119 .ndo_open = gve_open, 2120 .ndo_stop = gve_close, 2121 .ndo_get_stats64 = gve_get_stats, 2122 .ndo_tx_timeout = gve_tx_timeout, 2123 .ndo_set_features = gve_set_features, 2124 .ndo_bpf = gve_xdp, 2125 .ndo_xdp_xmit = gve_xdp_xmit, 2126 .ndo_xsk_wakeup = gve_xsk_wakeup, 2127 .ndo_hwtstamp_get = gve_get_ts_config, 2128 .ndo_hwtstamp_set = gve_set_ts_config, 2129 }; 2130 2131 static void gve_handle_status(struct gve_priv *priv, u32 status) 2132 { 2133 if (GVE_DEVICE_STATUS_RESET_MASK & status) { 2134 dev_info(&priv->pdev->dev, "Device requested reset.\n"); 2135 gve_set_do_reset(priv); 2136 } 2137 if (GVE_DEVICE_STATUS_REPORT_STATS_MASK & status) { 2138 priv->stats_report_trigger_cnt++; 2139 gve_set_do_report_stats(priv); 2140 } 2141 } 2142 2143 static void gve_handle_reset(struct gve_priv *priv) 2144 { 2145 /* A service task will be scheduled at the end of probe to catch any 2146 * resets that need to happen, and we don't want to reset until 2147 * probe is done. 2148 */ 2149 if (gve_get_probe_in_progress(priv)) 2150 return; 2151 2152 if (gve_get_do_reset(priv)) { 2153 rtnl_lock(); 2154 netdev_lock(priv->dev); 2155 gve_reset(priv, false); 2156 netdev_unlock(priv->dev); 2157 rtnl_unlock(); 2158 } 2159 } 2160 2161 void gve_handle_report_stats(struct gve_priv *priv) 2162 { 2163 struct stats *stats = priv->stats_report->stats; 2164 int idx, stats_idx = 0; 2165 unsigned int start = 0; 2166 u64 tx_bytes; 2167 2168 if (!gve_get_report_stats(priv)) 2169 return; 2170 2171 be64_add_cpu(&priv->stats_report->written_count, 1); 2172 /* tx stats */ 2173 if (priv->tx) { 2174 for (idx = 0; idx < gve_num_tx_queues(priv); idx++) { 2175 u32 last_completion = 0; 2176 u32 tx_frames = 0; 2177 2178 /* DQO doesn't currently support these metrics. */ 2179 if (gve_is_gqi(priv)) { 2180 last_completion = priv->tx[idx].done; 2181 tx_frames = priv->tx[idx].req; 2182 } 2183 2184 do { 2185 start = u64_stats_fetch_begin(&priv->tx[idx].statss); 2186 tx_bytes = priv->tx[idx].bytes_done; 2187 } while (u64_stats_fetch_retry(&priv->tx[idx].statss, start)); 2188 stats[stats_idx++] = (struct stats) { 2189 .stat_name = cpu_to_be32(TX_WAKE_CNT), 2190 .value = cpu_to_be64(priv->tx[idx].wake_queue), 2191 .queue_id = cpu_to_be32(idx), 2192 }; 2193 stats[stats_idx++] = (struct stats) { 2194 .stat_name = cpu_to_be32(TX_STOP_CNT), 2195 .value = cpu_to_be64(priv->tx[idx].stop_queue), 2196 .queue_id = cpu_to_be32(idx), 2197 }; 2198 stats[stats_idx++] = (struct stats) { 2199 .stat_name = cpu_to_be32(TX_FRAMES_SENT), 2200 .value = cpu_to_be64(tx_frames), 2201 .queue_id = cpu_to_be32(idx), 2202 }; 2203 stats[stats_idx++] = (struct stats) { 2204 .stat_name = cpu_to_be32(TX_BYTES_SENT), 2205 .value = cpu_to_be64(tx_bytes), 2206 .queue_id = cpu_to_be32(idx), 2207 }; 2208 stats[stats_idx++] = (struct stats) { 2209 .stat_name = cpu_to_be32(TX_LAST_COMPLETION_PROCESSED), 2210 .value = cpu_to_be64(last_completion), 2211 .queue_id = cpu_to_be32(idx), 2212 }; 2213 stats[stats_idx++] = (struct stats) { 2214 .stat_name = cpu_to_be32(TX_TIMEOUT_CNT), 2215 .value = cpu_to_be64(priv->tx[idx].queue_timeout), 2216 .queue_id = cpu_to_be32(idx), 2217 }; 2218 } 2219 } 2220 /* rx stats */ 2221 if (priv->rx) { 2222 for (idx = 0; idx < priv->rx_cfg.num_queues; idx++) { 2223 stats[stats_idx++] = (struct stats) { 2224 .stat_name = cpu_to_be32(RX_NEXT_EXPECTED_SEQUENCE), 2225 .value = cpu_to_be64(priv->rx[idx].desc.seqno), 2226 .queue_id = cpu_to_be32(idx), 2227 }; 2228 stats[stats_idx++] = (struct stats) { 2229 .stat_name = cpu_to_be32(RX_BUFFERS_POSTED), 2230 .value = cpu_to_be64(priv->rx[idx].fill_cnt), 2231 .queue_id = cpu_to_be32(idx), 2232 }; 2233 } 2234 } 2235 } 2236 2237 /* Handle NIC status register changes, reset requests and report stats */ 2238 static void gve_service_task(struct work_struct *work) 2239 { 2240 struct gve_priv *priv = container_of(work, struct gve_priv, 2241 service_task); 2242 u32 status = ioread32be(&priv->reg_bar0->device_status); 2243 2244 gve_handle_status(priv, status); 2245 2246 gve_handle_reset(priv); 2247 gve_handle_link_status(priv, GVE_DEVICE_STATUS_LINK_STATUS_MASK & status); 2248 } 2249 2250 static void gve_set_netdev_xdp_features(struct gve_priv *priv) 2251 { 2252 xdp_features_t xdp_features; 2253 2254 if (priv->queue_format == GVE_GQI_QPL_FORMAT) { 2255 xdp_features = NETDEV_XDP_ACT_BASIC; 2256 xdp_features |= NETDEV_XDP_ACT_REDIRECT; 2257 xdp_features |= NETDEV_XDP_ACT_XSK_ZEROCOPY; 2258 } else if (priv->queue_format == GVE_DQO_RDA_FORMAT) { 2259 xdp_features = NETDEV_XDP_ACT_BASIC; 2260 xdp_features |= NETDEV_XDP_ACT_REDIRECT; 2261 } else { 2262 xdp_features = 0; 2263 } 2264 2265 xdp_set_features_flag_locked(priv->dev, xdp_features); 2266 } 2267 2268 static int gve_init_priv(struct gve_priv *priv, bool skip_describe_device) 2269 { 2270 int num_ntfy; 2271 int err; 2272 2273 /* Set up the adminq */ 2274 err = gve_adminq_alloc(&priv->pdev->dev, priv); 2275 if (err) { 2276 dev_err(&priv->pdev->dev, 2277 "Failed to alloc admin queue: err=%d\n", err); 2278 return err; 2279 } 2280 2281 err = gve_verify_driver_compatibility(priv); 2282 if (err) { 2283 dev_err(&priv->pdev->dev, 2284 "Could not verify driver compatibility: err=%d\n", err); 2285 goto err; 2286 } 2287 2288 priv->num_registered_pages = 0; 2289 2290 if (skip_describe_device) 2291 goto setup_device; 2292 2293 priv->queue_format = GVE_QUEUE_FORMAT_UNSPECIFIED; 2294 /* Get the initial information we need from the device */ 2295 err = gve_adminq_describe_device(priv); 2296 if (err) { 2297 dev_err(&priv->pdev->dev, 2298 "Could not get device information: err=%d\n", err); 2299 goto err; 2300 } 2301 priv->dev->mtu = priv->dev->max_mtu; 2302 num_ntfy = pci_msix_vec_count(priv->pdev); 2303 if (num_ntfy <= 0) { 2304 dev_err(&priv->pdev->dev, 2305 "could not count MSI-x vectors: err=%d\n", num_ntfy); 2306 err = num_ntfy; 2307 goto err; 2308 } else if (num_ntfy < GVE_MIN_MSIX) { 2309 dev_err(&priv->pdev->dev, "gve needs at least %d MSI-x vectors, but only has %d\n", 2310 GVE_MIN_MSIX, num_ntfy); 2311 err = -EINVAL; 2312 goto err; 2313 } 2314 2315 /* Big TCP is only supported on DQO */ 2316 if (!gve_is_gqi(priv)) 2317 netif_set_tso_max_size(priv->dev, GVE_DQO_TX_MAX); 2318 2319 priv->rx_copybreak = GVE_DEFAULT_RX_COPYBREAK; 2320 /* gvnic has one Notification Block per MSI-x vector, except for the 2321 * management vector 2322 */ 2323 priv->num_ntfy_blks = (num_ntfy - 1) & ~0x1; 2324 priv->mgmt_msix_idx = priv->num_ntfy_blks; 2325 2326 priv->tx_cfg.max_queues = 2327 min_t(int, priv->tx_cfg.max_queues, priv->num_ntfy_blks / 2); 2328 priv->rx_cfg.max_queues = 2329 min_t(int, priv->rx_cfg.max_queues, priv->num_ntfy_blks / 2); 2330 2331 priv->tx_cfg.num_queues = priv->tx_cfg.max_queues; 2332 priv->rx_cfg.num_queues = priv->rx_cfg.max_queues; 2333 if (priv->default_num_queues > 0) { 2334 priv->tx_cfg.num_queues = min_t(int, priv->default_num_queues, 2335 priv->tx_cfg.num_queues); 2336 priv->rx_cfg.num_queues = min_t(int, priv->default_num_queues, 2337 priv->rx_cfg.num_queues); 2338 } 2339 priv->tx_cfg.num_xdp_queues = 0; 2340 2341 dev_info(&priv->pdev->dev, "TX queues %d, RX queues %d\n", 2342 priv->tx_cfg.num_queues, priv->rx_cfg.num_queues); 2343 dev_info(&priv->pdev->dev, "Max TX queues %d, Max RX queues %d\n", 2344 priv->tx_cfg.max_queues, priv->rx_cfg.max_queues); 2345 2346 if (!gve_is_gqi(priv)) { 2347 priv->tx_coalesce_usecs = GVE_TX_IRQ_RATELIMIT_US_DQO; 2348 priv->rx_coalesce_usecs = GVE_RX_IRQ_RATELIMIT_US_DQO; 2349 } 2350 2351 priv->ts_config.tx_type = HWTSTAMP_TX_OFF; 2352 priv->ts_config.rx_filter = HWTSTAMP_FILTER_NONE; 2353 2354 setup_device: 2355 gve_set_netdev_xdp_features(priv); 2356 err = gve_setup_device_resources(priv); 2357 if (!err) 2358 return 0; 2359 err: 2360 gve_adminq_free(&priv->pdev->dev, priv); 2361 return err; 2362 } 2363 2364 static void gve_teardown_priv_resources(struct gve_priv *priv) 2365 { 2366 gve_teardown_device_resources(priv); 2367 gve_adminq_free(&priv->pdev->dev, priv); 2368 } 2369 2370 static void gve_trigger_reset(struct gve_priv *priv) 2371 { 2372 /* Reset the device by releasing the AQ */ 2373 gve_adminq_release(priv); 2374 } 2375 2376 static void gve_reset_and_teardown(struct gve_priv *priv, bool was_up) 2377 { 2378 gve_trigger_reset(priv); 2379 /* With the reset having already happened, close cannot fail */ 2380 if (was_up) 2381 gve_close(priv->dev); 2382 gve_teardown_priv_resources(priv); 2383 } 2384 2385 static int gve_reset_recovery(struct gve_priv *priv, bool was_up) 2386 { 2387 int err; 2388 2389 err = gve_init_priv(priv, true); 2390 if (err) 2391 goto err; 2392 if (was_up) { 2393 err = gve_open(priv->dev); 2394 if (err) 2395 goto err; 2396 } 2397 return 0; 2398 err: 2399 dev_err(&priv->pdev->dev, "Reset failed! !!! DISABLING ALL QUEUES !!!\n"); 2400 gve_turndown(priv); 2401 return err; 2402 } 2403 2404 int gve_reset(struct gve_priv *priv, bool attempt_teardown) 2405 { 2406 bool was_up = netif_running(priv->dev); 2407 int err; 2408 2409 dev_info(&priv->pdev->dev, "Performing reset\n"); 2410 gve_clear_do_reset(priv); 2411 gve_set_reset_in_progress(priv); 2412 /* If we aren't attempting to teardown normally, just go turndown and 2413 * reset right away. 2414 */ 2415 if (!attempt_teardown) { 2416 gve_turndown(priv); 2417 gve_reset_and_teardown(priv, was_up); 2418 } else { 2419 /* Otherwise attempt to close normally */ 2420 if (was_up) { 2421 err = gve_close(priv->dev); 2422 /* If that fails reset as we did above */ 2423 if (err) 2424 gve_reset_and_teardown(priv, was_up); 2425 } 2426 /* Clean up any remaining resources */ 2427 gve_teardown_priv_resources(priv); 2428 } 2429 2430 /* Set it all back up */ 2431 err = gve_reset_recovery(priv, was_up); 2432 gve_clear_reset_in_progress(priv); 2433 priv->reset_cnt++; 2434 priv->interface_up_cnt = 0; 2435 priv->interface_down_cnt = 0; 2436 priv->stats_report_trigger_cnt = 0; 2437 return err; 2438 } 2439 2440 static void gve_write_version(u8 __iomem *driver_version_register) 2441 { 2442 const char *c = gve_version_prefix; 2443 2444 while (*c) { 2445 writeb(*c, driver_version_register); 2446 c++; 2447 } 2448 2449 c = gve_version_str; 2450 while (*c) { 2451 writeb(*c, driver_version_register); 2452 c++; 2453 } 2454 writeb('\n', driver_version_register); 2455 } 2456 2457 static int gve_rx_queue_stop(struct net_device *dev, void *per_q_mem, int idx) 2458 { 2459 struct gve_priv *priv = netdev_priv(dev); 2460 struct gve_rx_ring *gve_per_q_mem; 2461 int err; 2462 2463 if (!priv->rx) 2464 return -EAGAIN; 2465 2466 /* Destroying queue 0 while other queues exist is not supported in DQO */ 2467 if (!gve_is_gqi(priv) && idx == 0) 2468 return -ERANGE; 2469 2470 /* Single-queue destruction requires quiescence on all queues */ 2471 gve_turndown(priv); 2472 2473 /* This failure will trigger a reset - no need to clean up */ 2474 err = gve_adminq_destroy_single_rx_queue(priv, idx); 2475 if (err) 2476 return err; 2477 2478 if (gve_is_qpl(priv)) { 2479 /* This failure will trigger a reset - no need to clean up */ 2480 err = gve_unregister_qpl(priv, gve_rx_get_qpl(priv, idx)); 2481 if (err) 2482 return err; 2483 } 2484 2485 gve_rx_stop_ring(priv, idx); 2486 2487 /* Turn the unstopped queues back up */ 2488 gve_turnup_and_check_status(priv); 2489 2490 gve_per_q_mem = (struct gve_rx_ring *)per_q_mem; 2491 *gve_per_q_mem = priv->rx[idx]; 2492 memset(&priv->rx[idx], 0, sizeof(priv->rx[idx])); 2493 return 0; 2494 } 2495 2496 static void gve_rx_queue_mem_free(struct net_device *dev, void *per_q_mem) 2497 { 2498 struct gve_priv *priv = netdev_priv(dev); 2499 struct gve_rx_alloc_rings_cfg cfg = {0}; 2500 struct gve_rx_ring *gve_per_q_mem; 2501 2502 gve_per_q_mem = (struct gve_rx_ring *)per_q_mem; 2503 gve_rx_get_curr_alloc_cfg(priv, &cfg); 2504 2505 if (gve_is_gqi(priv)) 2506 gve_rx_free_ring_gqi(priv, gve_per_q_mem, &cfg); 2507 else 2508 gve_rx_free_ring_dqo(priv, gve_per_q_mem, &cfg); 2509 } 2510 2511 static int gve_rx_queue_mem_alloc(struct net_device *dev, void *per_q_mem, 2512 int idx) 2513 { 2514 struct gve_priv *priv = netdev_priv(dev); 2515 struct gve_rx_alloc_rings_cfg cfg = {0}; 2516 struct gve_rx_ring *gve_per_q_mem; 2517 int err; 2518 2519 if (!priv->rx) 2520 return -EAGAIN; 2521 2522 gve_per_q_mem = (struct gve_rx_ring *)per_q_mem; 2523 gve_rx_get_curr_alloc_cfg(priv, &cfg); 2524 2525 if (gve_is_gqi(priv)) 2526 err = gve_rx_alloc_ring_gqi(priv, &cfg, gve_per_q_mem, idx); 2527 else 2528 err = gve_rx_alloc_ring_dqo(priv, &cfg, gve_per_q_mem, idx); 2529 2530 return err; 2531 } 2532 2533 static int gve_rx_queue_start(struct net_device *dev, void *per_q_mem, int idx) 2534 { 2535 struct gve_priv *priv = netdev_priv(dev); 2536 struct gve_rx_ring *gve_per_q_mem; 2537 int err; 2538 2539 if (!priv->rx) 2540 return -EAGAIN; 2541 2542 gve_per_q_mem = (struct gve_rx_ring *)per_q_mem; 2543 priv->rx[idx] = *gve_per_q_mem; 2544 2545 /* Single-queue creation requires quiescence on all queues */ 2546 gve_turndown(priv); 2547 2548 gve_rx_start_ring(priv, idx); 2549 2550 if (gve_is_qpl(priv)) { 2551 /* This failure will trigger a reset - no need to clean up */ 2552 err = gve_register_qpl(priv, gve_rx_get_qpl(priv, idx)); 2553 if (err) 2554 goto abort; 2555 } 2556 2557 /* This failure will trigger a reset - no need to clean up */ 2558 err = gve_adminq_create_single_rx_queue(priv, idx); 2559 if (err) 2560 goto abort; 2561 2562 if (gve_is_gqi(priv)) 2563 gve_rx_write_doorbell(priv, &priv->rx[idx]); 2564 else 2565 gve_rx_post_buffers_dqo(&priv->rx[idx]); 2566 2567 /* Turn the unstopped queues back up */ 2568 gve_turnup_and_check_status(priv); 2569 return 0; 2570 2571 abort: 2572 gve_rx_stop_ring(priv, idx); 2573 2574 /* All failures in this func result in a reset, by clearing the struct 2575 * at idx, we prevent a double free when that reset runs. The reset, 2576 * which needs the rtnl lock, will not run till this func returns and 2577 * its caller gives up the lock. 2578 */ 2579 memset(&priv->rx[idx], 0, sizeof(priv->rx[idx])); 2580 return err; 2581 } 2582 2583 static const struct netdev_queue_mgmt_ops gve_queue_mgmt_ops = { 2584 .ndo_queue_mem_size = sizeof(struct gve_rx_ring), 2585 .ndo_queue_mem_alloc = gve_rx_queue_mem_alloc, 2586 .ndo_queue_mem_free = gve_rx_queue_mem_free, 2587 .ndo_queue_start = gve_rx_queue_start, 2588 .ndo_queue_stop = gve_rx_queue_stop, 2589 }; 2590 2591 static void gve_get_rx_queue_stats(struct net_device *dev, int idx, 2592 struct netdev_queue_stats_rx *rx_stats) 2593 { 2594 struct gve_priv *priv = netdev_priv(dev); 2595 struct gve_rx_ring *rx = &priv->rx[idx]; 2596 unsigned int start; 2597 2598 do { 2599 start = u64_stats_fetch_begin(&rx->statss); 2600 rx_stats->packets = rx->rpackets; 2601 rx_stats->bytes = rx->rbytes; 2602 rx_stats->alloc_fail = rx->rx_skb_alloc_fail + 2603 rx->rx_buf_alloc_fail; 2604 } while (u64_stats_fetch_retry(&rx->statss, start)); 2605 } 2606 2607 static void gve_get_tx_queue_stats(struct net_device *dev, int idx, 2608 struct netdev_queue_stats_tx *tx_stats) 2609 { 2610 struct gve_priv *priv = netdev_priv(dev); 2611 struct gve_tx_ring *tx = &priv->tx[idx]; 2612 unsigned int start; 2613 2614 do { 2615 start = u64_stats_fetch_begin(&tx->statss); 2616 tx_stats->packets = tx->pkt_done; 2617 tx_stats->bytes = tx->bytes_done; 2618 } while (u64_stats_fetch_retry(&tx->statss, start)); 2619 } 2620 2621 static void gve_get_base_stats(struct net_device *dev, 2622 struct netdev_queue_stats_rx *rx, 2623 struct netdev_queue_stats_tx *tx) 2624 { 2625 rx->packets = 0; 2626 rx->bytes = 0; 2627 rx->alloc_fail = 0; 2628 2629 tx->packets = 0; 2630 tx->bytes = 0; 2631 } 2632 2633 static const struct netdev_stat_ops gve_stat_ops = { 2634 .get_queue_stats_rx = gve_get_rx_queue_stats, 2635 .get_queue_stats_tx = gve_get_tx_queue_stats, 2636 .get_base_stats = gve_get_base_stats, 2637 }; 2638 2639 static int gve_probe(struct pci_dev *pdev, const struct pci_device_id *ent) 2640 { 2641 int max_tx_queues, max_rx_queues; 2642 struct net_device *dev; 2643 __be32 __iomem *db_bar; 2644 struct gve_registers __iomem *reg_bar; 2645 struct gve_priv *priv; 2646 int err; 2647 2648 err = pci_enable_device(pdev); 2649 if (err) 2650 return err; 2651 2652 err = pci_request_regions(pdev, gve_driver_name); 2653 if (err) 2654 goto abort_with_enabled; 2655 2656 pci_set_master(pdev); 2657 2658 err = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(64)); 2659 if (err) { 2660 dev_err(&pdev->dev, "Failed to set dma mask: err=%d\n", err); 2661 goto abort_with_pci_region; 2662 } 2663 2664 reg_bar = pci_iomap(pdev, GVE_REGISTER_BAR, 0); 2665 if (!reg_bar) { 2666 dev_err(&pdev->dev, "Failed to map pci bar!\n"); 2667 err = -ENOMEM; 2668 goto abort_with_pci_region; 2669 } 2670 2671 db_bar = pci_iomap(pdev, GVE_DOORBELL_BAR, 0); 2672 if (!db_bar) { 2673 dev_err(&pdev->dev, "Failed to map doorbell bar!\n"); 2674 err = -ENOMEM; 2675 goto abort_with_reg_bar; 2676 } 2677 2678 gve_write_version(®_bar->driver_version); 2679 /* Get max queues to alloc etherdev */ 2680 max_tx_queues = ioread32be(®_bar->max_tx_queues); 2681 max_rx_queues = ioread32be(®_bar->max_rx_queues); 2682 /* Alloc and setup the netdev and priv */ 2683 dev = alloc_etherdev_mqs(sizeof(*priv), max_tx_queues, max_rx_queues); 2684 if (!dev) { 2685 dev_err(&pdev->dev, "could not allocate netdev\n"); 2686 err = -ENOMEM; 2687 goto abort_with_db_bar; 2688 } 2689 SET_NETDEV_DEV(dev, &pdev->dev); 2690 pci_set_drvdata(pdev, dev); 2691 dev->ethtool_ops = &gve_ethtool_ops; 2692 dev->netdev_ops = &gve_netdev_ops; 2693 dev->queue_mgmt_ops = &gve_queue_mgmt_ops; 2694 dev->stat_ops = &gve_stat_ops; 2695 2696 /* Set default and supported features. 2697 * 2698 * Features might be set in other locations as well (such as 2699 * `gve_adminq_describe_device`). 2700 */ 2701 dev->hw_features = NETIF_F_HIGHDMA; 2702 dev->hw_features |= NETIF_F_SG; 2703 dev->hw_features |= NETIF_F_HW_CSUM; 2704 dev->hw_features |= NETIF_F_TSO; 2705 dev->hw_features |= NETIF_F_TSO6; 2706 dev->hw_features |= NETIF_F_TSO_ECN; 2707 dev->hw_features |= NETIF_F_RXCSUM; 2708 dev->hw_features |= NETIF_F_RXHASH; 2709 dev->features = dev->hw_features; 2710 dev->watchdog_timeo = 5 * HZ; 2711 dev->min_mtu = ETH_MIN_MTU; 2712 netif_carrier_off(dev); 2713 2714 priv = netdev_priv(dev); 2715 priv->dev = dev; 2716 priv->pdev = pdev; 2717 priv->msg_enable = DEFAULT_MSG_LEVEL; 2718 priv->reg_bar0 = reg_bar; 2719 priv->db_bar2 = db_bar; 2720 priv->service_task_flags = 0x0; 2721 priv->state_flags = 0x0; 2722 priv->ethtool_flags = 0x0; 2723 priv->rx_cfg.packet_buffer_size = GVE_DEFAULT_RX_BUFFER_SIZE; 2724 priv->max_rx_buffer_size = GVE_DEFAULT_RX_BUFFER_SIZE; 2725 2726 gve_set_probe_in_progress(priv); 2727 priv->gve_wq = alloc_ordered_workqueue("gve", 0); 2728 if (!priv->gve_wq) { 2729 dev_err(&pdev->dev, "Could not allocate workqueue"); 2730 err = -ENOMEM; 2731 goto abort_with_netdev; 2732 } 2733 INIT_WORK(&priv->service_task, gve_service_task); 2734 INIT_WORK(&priv->stats_report_task, gve_stats_report_task); 2735 priv->tx_cfg.max_queues = max_tx_queues; 2736 priv->rx_cfg.max_queues = max_rx_queues; 2737 2738 err = gve_init_priv(priv, false); 2739 if (err) 2740 goto abort_with_wq; 2741 2742 if (!gve_is_gqi(priv) && !gve_is_qpl(priv)) 2743 dev->netmem_tx = true; 2744 2745 err = register_netdev(dev); 2746 if (err) 2747 goto abort_with_gve_init; 2748 2749 dev_info(&pdev->dev, "GVE version %s\n", gve_version_str); 2750 dev_info(&pdev->dev, "GVE queue format %d\n", (int)priv->queue_format); 2751 gve_clear_probe_in_progress(priv); 2752 queue_work(priv->gve_wq, &priv->service_task); 2753 return 0; 2754 2755 abort_with_gve_init: 2756 gve_teardown_priv_resources(priv); 2757 2758 abort_with_wq: 2759 destroy_workqueue(priv->gve_wq); 2760 2761 abort_with_netdev: 2762 free_netdev(dev); 2763 2764 abort_with_db_bar: 2765 pci_iounmap(pdev, db_bar); 2766 2767 abort_with_reg_bar: 2768 pci_iounmap(pdev, reg_bar); 2769 2770 abort_with_pci_region: 2771 pci_release_regions(pdev); 2772 2773 abort_with_enabled: 2774 pci_disable_device(pdev); 2775 return err; 2776 } 2777 2778 static void gve_remove(struct pci_dev *pdev) 2779 { 2780 struct net_device *netdev = pci_get_drvdata(pdev); 2781 struct gve_priv *priv = netdev_priv(netdev); 2782 __be32 __iomem *db_bar = priv->db_bar2; 2783 void __iomem *reg_bar = priv->reg_bar0; 2784 2785 unregister_netdev(netdev); 2786 gve_teardown_priv_resources(priv); 2787 destroy_workqueue(priv->gve_wq); 2788 free_netdev(netdev); 2789 pci_iounmap(pdev, db_bar); 2790 pci_iounmap(pdev, reg_bar); 2791 pci_release_regions(pdev); 2792 pci_disable_device(pdev); 2793 } 2794 2795 static void gve_shutdown(struct pci_dev *pdev) 2796 { 2797 struct net_device *netdev = pci_get_drvdata(pdev); 2798 struct gve_priv *priv = netdev_priv(netdev); 2799 bool was_up = netif_running(priv->dev); 2800 2801 rtnl_lock(); 2802 netdev_lock(netdev); 2803 if (was_up && gve_close(priv->dev)) { 2804 /* If the dev was up, attempt to close, if close fails, reset */ 2805 gve_reset_and_teardown(priv, was_up); 2806 } else { 2807 /* If the dev wasn't up or close worked, finish tearing down */ 2808 gve_teardown_priv_resources(priv); 2809 } 2810 netdev_unlock(netdev); 2811 rtnl_unlock(); 2812 } 2813 2814 #ifdef CONFIG_PM 2815 static int gve_suspend(struct pci_dev *pdev, pm_message_t state) 2816 { 2817 struct net_device *netdev = pci_get_drvdata(pdev); 2818 struct gve_priv *priv = netdev_priv(netdev); 2819 bool was_up = netif_running(priv->dev); 2820 2821 priv->suspend_cnt++; 2822 rtnl_lock(); 2823 netdev_lock(netdev); 2824 if (was_up && gve_close(priv->dev)) { 2825 /* If the dev was up, attempt to close, if close fails, reset */ 2826 gve_reset_and_teardown(priv, was_up); 2827 } else { 2828 /* If the dev wasn't up or close worked, finish tearing down */ 2829 gve_teardown_priv_resources(priv); 2830 } 2831 priv->up_before_suspend = was_up; 2832 netdev_unlock(netdev); 2833 rtnl_unlock(); 2834 return 0; 2835 } 2836 2837 static int gve_resume(struct pci_dev *pdev) 2838 { 2839 struct net_device *netdev = pci_get_drvdata(pdev); 2840 struct gve_priv *priv = netdev_priv(netdev); 2841 int err; 2842 2843 priv->resume_cnt++; 2844 rtnl_lock(); 2845 netdev_lock(netdev); 2846 err = gve_reset_recovery(priv, priv->up_before_suspend); 2847 netdev_unlock(netdev); 2848 rtnl_unlock(); 2849 return err; 2850 } 2851 #endif /* CONFIG_PM */ 2852 2853 static const struct pci_device_id gve_id_table[] = { 2854 { PCI_DEVICE(PCI_VENDOR_ID_GOOGLE, PCI_DEV_ID_GVNIC) }, 2855 { } 2856 }; 2857 2858 static struct pci_driver gve_driver = { 2859 .name = gve_driver_name, 2860 .id_table = gve_id_table, 2861 .probe = gve_probe, 2862 .remove = gve_remove, 2863 .shutdown = gve_shutdown, 2864 #ifdef CONFIG_PM 2865 .suspend = gve_suspend, 2866 .resume = gve_resume, 2867 #endif 2868 }; 2869 2870 module_pci_driver(gve_driver); 2871 2872 MODULE_DEVICE_TABLE(pci, gve_id_table); 2873 MODULE_AUTHOR("Google, Inc."); 2874 MODULE_DESCRIPTION("Google Virtual NIC Driver"); 2875 MODULE_LICENSE("Dual MIT/GPL"); 2876 MODULE_VERSION(GVE_VERSION); 2877