1 // SPDX-License-Identifier: (GPL-2.0 OR MIT) 2 /* Google virtual Ethernet (gve) driver 3 * 4 * Copyright (C) 2015-2024 Google LLC 5 */ 6 7 #include <linux/bitmap.h> 8 #include <linux/bpf.h> 9 #include <linux/cpumask.h> 10 #include <linux/etherdevice.h> 11 #include <linux/filter.h> 12 #include <linux/interrupt.h> 13 #include <linux/irq.h> 14 #include <linux/module.h> 15 #include <linux/pci.h> 16 #include <linux/sched.h> 17 #include <linux/timer.h> 18 #include <linux/workqueue.h> 19 #include <linux/utsname.h> 20 #include <linux/version.h> 21 #include <net/netdev_queues.h> 22 #include <net/sch_generic.h> 23 #include <net/xdp_sock_drv.h> 24 #include "gve.h" 25 #include "gve_dqo.h" 26 #include "gve_adminq.h" 27 #include "gve_register.h" 28 #include "gve_utils.h" 29 30 #define GVE_DEFAULT_RX_COPYBREAK (256) 31 32 #define DEFAULT_MSG_LEVEL (NETIF_MSG_DRV | NETIF_MSG_LINK) 33 #define GVE_VERSION "1.0.0" 34 #define GVE_VERSION_PREFIX "GVE-" 35 36 // Minimum amount of time between queue kicks in msec (10 seconds) 37 #define MIN_TX_TIMEOUT_GAP (1000 * 10) 38 39 char gve_driver_name[] = "gve"; 40 const char gve_version_str[] = GVE_VERSION; 41 static const char gve_version_prefix[] = GVE_VERSION_PREFIX; 42 43 static int gve_verify_driver_compatibility(struct gve_priv *priv) 44 { 45 int err; 46 struct gve_driver_info *driver_info; 47 dma_addr_t driver_info_bus; 48 49 driver_info = dma_alloc_coherent(&priv->pdev->dev, 50 sizeof(struct gve_driver_info), 51 &driver_info_bus, GFP_KERNEL); 52 if (!driver_info) 53 return -ENOMEM; 54 55 *driver_info = (struct gve_driver_info) { 56 .os_type = 1, /* Linux */ 57 .os_version_major = cpu_to_be32(LINUX_VERSION_MAJOR), 58 .os_version_minor = cpu_to_be32(LINUX_VERSION_SUBLEVEL), 59 .os_version_sub = cpu_to_be32(LINUX_VERSION_PATCHLEVEL), 60 .driver_capability_flags = { 61 cpu_to_be64(GVE_DRIVER_CAPABILITY_FLAGS1), 62 cpu_to_be64(GVE_DRIVER_CAPABILITY_FLAGS2), 63 cpu_to_be64(GVE_DRIVER_CAPABILITY_FLAGS3), 64 cpu_to_be64(GVE_DRIVER_CAPABILITY_FLAGS4), 65 }, 66 }; 67 strscpy(driver_info->os_version_str1, utsname()->release, 68 sizeof(driver_info->os_version_str1)); 69 strscpy(driver_info->os_version_str2, utsname()->version, 70 sizeof(driver_info->os_version_str2)); 71 72 err = gve_adminq_verify_driver_compatibility(priv, 73 sizeof(struct gve_driver_info), 74 driver_info_bus); 75 76 /* It's ok if the device doesn't support this */ 77 if (err == -EOPNOTSUPP) 78 err = 0; 79 80 dma_free_coherent(&priv->pdev->dev, 81 sizeof(struct gve_driver_info), 82 driver_info, driver_info_bus); 83 return err; 84 } 85 86 static netdev_features_t gve_features_check(struct sk_buff *skb, 87 struct net_device *dev, 88 netdev_features_t features) 89 { 90 struct gve_priv *priv = netdev_priv(dev); 91 92 if (!gve_is_gqi(priv)) 93 return gve_features_check_dqo(skb, dev, features); 94 95 return features; 96 } 97 98 static netdev_tx_t gve_start_xmit(struct sk_buff *skb, struct net_device *dev) 99 { 100 struct gve_priv *priv = netdev_priv(dev); 101 102 if (gve_is_gqi(priv)) 103 return gve_tx(skb, dev); 104 else 105 return gve_tx_dqo(skb, dev); 106 } 107 108 static void gve_get_stats(struct net_device *dev, struct rtnl_link_stats64 *s) 109 { 110 struct gve_priv *priv = netdev_priv(dev); 111 unsigned int start; 112 u64 packets, bytes; 113 int num_tx_queues; 114 int ring; 115 116 num_tx_queues = gve_num_tx_queues(priv); 117 if (priv->rx) { 118 for (ring = 0; ring < priv->rx_cfg.num_queues; ring++) { 119 do { 120 start = 121 u64_stats_fetch_begin(&priv->rx[ring].statss); 122 packets = priv->rx[ring].rpackets; 123 bytes = priv->rx[ring].rbytes; 124 } while (u64_stats_fetch_retry(&priv->rx[ring].statss, 125 start)); 126 s->rx_packets += packets; 127 s->rx_bytes += bytes; 128 } 129 } 130 if (priv->tx) { 131 for (ring = 0; ring < num_tx_queues; ring++) { 132 do { 133 start = 134 u64_stats_fetch_begin(&priv->tx[ring].statss); 135 packets = priv->tx[ring].pkt_done; 136 bytes = priv->tx[ring].bytes_done; 137 } while (u64_stats_fetch_retry(&priv->tx[ring].statss, 138 start)); 139 s->tx_packets += packets; 140 s->tx_bytes += bytes; 141 } 142 } 143 } 144 145 static int gve_alloc_flow_rule_caches(struct gve_priv *priv) 146 { 147 struct gve_flow_rules_cache *flow_rules_cache = &priv->flow_rules_cache; 148 int err = 0; 149 150 if (!priv->max_flow_rules) 151 return 0; 152 153 flow_rules_cache->rules_cache = 154 kvcalloc(GVE_FLOW_RULES_CACHE_SIZE, sizeof(*flow_rules_cache->rules_cache), 155 GFP_KERNEL); 156 if (!flow_rules_cache->rules_cache) { 157 dev_err(&priv->pdev->dev, "Cannot alloc flow rules cache\n"); 158 return -ENOMEM; 159 } 160 161 flow_rules_cache->rule_ids_cache = 162 kvcalloc(GVE_FLOW_RULE_IDS_CACHE_SIZE, sizeof(*flow_rules_cache->rule_ids_cache), 163 GFP_KERNEL); 164 if (!flow_rules_cache->rule_ids_cache) { 165 dev_err(&priv->pdev->dev, "Cannot alloc flow rule ids cache\n"); 166 err = -ENOMEM; 167 goto free_rules_cache; 168 } 169 170 return 0; 171 172 free_rules_cache: 173 kvfree(flow_rules_cache->rules_cache); 174 flow_rules_cache->rules_cache = NULL; 175 return err; 176 } 177 178 static void gve_free_flow_rule_caches(struct gve_priv *priv) 179 { 180 struct gve_flow_rules_cache *flow_rules_cache = &priv->flow_rules_cache; 181 182 kvfree(flow_rules_cache->rule_ids_cache); 183 flow_rules_cache->rule_ids_cache = NULL; 184 kvfree(flow_rules_cache->rules_cache); 185 flow_rules_cache->rules_cache = NULL; 186 } 187 188 static int gve_alloc_rss_config_cache(struct gve_priv *priv) 189 { 190 struct gve_rss_config *rss_config = &priv->rss_config; 191 192 if (!priv->cache_rss_config) 193 return 0; 194 195 rss_config->hash_key = kcalloc(priv->rss_key_size, 196 sizeof(rss_config->hash_key[0]), 197 GFP_KERNEL); 198 if (!rss_config->hash_key) 199 return -ENOMEM; 200 201 rss_config->hash_lut = kcalloc(priv->rss_lut_size, 202 sizeof(rss_config->hash_lut[0]), 203 GFP_KERNEL); 204 if (!rss_config->hash_lut) 205 goto free_rss_key_cache; 206 207 return 0; 208 209 free_rss_key_cache: 210 kfree(rss_config->hash_key); 211 rss_config->hash_key = NULL; 212 return -ENOMEM; 213 } 214 215 static void gve_free_rss_config_cache(struct gve_priv *priv) 216 { 217 struct gve_rss_config *rss_config = &priv->rss_config; 218 219 kfree(rss_config->hash_key); 220 kfree(rss_config->hash_lut); 221 222 memset(rss_config, 0, sizeof(*rss_config)); 223 } 224 225 static int gve_alloc_counter_array(struct gve_priv *priv) 226 { 227 priv->counter_array = 228 dma_alloc_coherent(&priv->pdev->dev, 229 priv->num_event_counters * 230 sizeof(*priv->counter_array), 231 &priv->counter_array_bus, GFP_KERNEL); 232 if (!priv->counter_array) 233 return -ENOMEM; 234 235 return 0; 236 } 237 238 static void gve_free_counter_array(struct gve_priv *priv) 239 { 240 if (!priv->counter_array) 241 return; 242 243 dma_free_coherent(&priv->pdev->dev, 244 priv->num_event_counters * 245 sizeof(*priv->counter_array), 246 priv->counter_array, priv->counter_array_bus); 247 priv->counter_array = NULL; 248 } 249 250 /* NIC requests to report stats */ 251 static void gve_stats_report_task(struct work_struct *work) 252 { 253 struct gve_priv *priv = container_of(work, struct gve_priv, 254 stats_report_task); 255 if (gve_get_do_report_stats(priv)) { 256 gve_handle_report_stats(priv); 257 gve_clear_do_report_stats(priv); 258 } 259 } 260 261 static void gve_stats_report_schedule(struct gve_priv *priv) 262 { 263 if (!gve_get_probe_in_progress(priv) && 264 !gve_get_reset_in_progress(priv)) { 265 gve_set_do_report_stats(priv); 266 queue_work(priv->gve_wq, &priv->stats_report_task); 267 } 268 } 269 270 static void gve_stats_report_timer(struct timer_list *t) 271 { 272 struct gve_priv *priv = timer_container_of(priv, t, 273 stats_report_timer); 274 275 mod_timer(&priv->stats_report_timer, 276 round_jiffies(jiffies + 277 msecs_to_jiffies(priv->stats_report_timer_period))); 278 gve_stats_report_schedule(priv); 279 } 280 281 static int gve_alloc_stats_report(struct gve_priv *priv) 282 { 283 int tx_stats_num, rx_stats_num; 284 285 tx_stats_num = (GVE_TX_STATS_REPORT_NUM + NIC_TX_STATS_REPORT_NUM) * 286 gve_num_tx_queues(priv); 287 rx_stats_num = (GVE_RX_STATS_REPORT_NUM + NIC_RX_STATS_REPORT_NUM) * 288 priv->rx_cfg.num_queues; 289 priv->stats_report_len = struct_size(priv->stats_report, stats, 290 size_add(tx_stats_num, rx_stats_num)); 291 priv->stats_report = 292 dma_alloc_coherent(&priv->pdev->dev, priv->stats_report_len, 293 &priv->stats_report_bus, GFP_KERNEL); 294 if (!priv->stats_report) 295 return -ENOMEM; 296 /* Set up timer for the report-stats task */ 297 timer_setup(&priv->stats_report_timer, gve_stats_report_timer, 0); 298 priv->stats_report_timer_period = GVE_STATS_REPORT_TIMER_PERIOD; 299 return 0; 300 } 301 302 static void gve_free_stats_report(struct gve_priv *priv) 303 { 304 if (!priv->stats_report) 305 return; 306 307 timer_delete_sync(&priv->stats_report_timer); 308 dma_free_coherent(&priv->pdev->dev, priv->stats_report_len, 309 priv->stats_report, priv->stats_report_bus); 310 priv->stats_report = NULL; 311 } 312 313 static irqreturn_t gve_mgmnt_intr(int irq, void *arg) 314 { 315 struct gve_priv *priv = arg; 316 317 queue_work(priv->gve_wq, &priv->service_task); 318 return IRQ_HANDLED; 319 } 320 321 static irqreturn_t gve_intr(int irq, void *arg) 322 { 323 struct gve_notify_block *block = arg; 324 struct gve_priv *priv = block->priv; 325 326 iowrite32be(GVE_IRQ_MASK, gve_irq_doorbell(priv, block)); 327 napi_schedule_irqoff(&block->napi); 328 return IRQ_HANDLED; 329 } 330 331 static irqreturn_t gve_intr_dqo(int irq, void *arg) 332 { 333 struct gve_notify_block *block = arg; 334 335 /* Interrupts are automatically masked */ 336 napi_schedule_irqoff(&block->napi); 337 return IRQ_HANDLED; 338 } 339 340 static int gve_is_napi_on_home_cpu(struct gve_priv *priv, u32 irq) 341 { 342 int cpu_curr = smp_processor_id(); 343 const struct cpumask *aff_mask; 344 345 aff_mask = irq_get_effective_affinity_mask(irq); 346 if (unlikely(!aff_mask)) 347 return 1; 348 349 return cpumask_test_cpu(cpu_curr, aff_mask); 350 } 351 352 int gve_napi_poll(struct napi_struct *napi, int budget) 353 { 354 struct gve_notify_block *block; 355 __be32 __iomem *irq_doorbell; 356 bool reschedule = false; 357 struct gve_priv *priv; 358 int work_done = 0; 359 360 block = container_of(napi, struct gve_notify_block, napi); 361 priv = block->priv; 362 363 if (block->tx) { 364 if (block->tx->q_num < priv->tx_cfg.num_queues) 365 reschedule |= gve_tx_poll(block, budget); 366 else if (budget) 367 reschedule |= gve_xdp_poll(block, budget); 368 } 369 370 if (!budget) 371 return 0; 372 373 if (block->rx) { 374 work_done = gve_rx_poll(block, budget); 375 376 /* Poll XSK TX as part of RX NAPI. Setup re-poll based on max of 377 * TX and RX work done. 378 */ 379 if (priv->xdp_prog) 380 work_done = max_t(int, work_done, 381 gve_xsk_tx_poll(block, budget)); 382 383 reschedule |= work_done == budget; 384 } 385 386 if (reschedule) 387 return budget; 388 389 /* Complete processing - don't unmask irq if busy polling is enabled */ 390 if (likely(napi_complete_done(napi, work_done))) { 391 irq_doorbell = gve_irq_doorbell(priv, block); 392 iowrite32be(GVE_IRQ_ACK | GVE_IRQ_EVENT, irq_doorbell); 393 394 /* Ensure IRQ ACK is visible before we check pending work. 395 * If queue had issued updates, it would be truly visible. 396 */ 397 mb(); 398 399 if (block->tx) 400 reschedule |= gve_tx_clean_pending(priv, block->tx); 401 if (block->rx) 402 reschedule |= gve_rx_work_pending(block->rx); 403 404 if (reschedule && napi_schedule(napi)) 405 iowrite32be(GVE_IRQ_MASK, irq_doorbell); 406 } 407 return work_done; 408 } 409 410 int gve_napi_poll_dqo(struct napi_struct *napi, int budget) 411 { 412 struct gve_notify_block *block = 413 container_of(napi, struct gve_notify_block, napi); 414 struct gve_priv *priv = block->priv; 415 bool reschedule = false; 416 int work_done = 0; 417 418 if (block->tx) { 419 if (block->tx->q_num < priv->tx_cfg.num_queues) 420 reschedule |= gve_tx_poll_dqo(block, /*do_clean=*/true); 421 else 422 reschedule |= gve_xdp_poll_dqo(block); 423 } 424 425 if (!budget) 426 return 0; 427 428 if (block->rx) { 429 work_done = gve_rx_poll_dqo(block, budget); 430 431 /* Poll XSK TX as part of RX NAPI. Setup re-poll based on if 432 * either datapath has more work to do. 433 */ 434 if (priv->xdp_prog) 435 reschedule |= gve_xsk_tx_poll_dqo(block, budget); 436 reschedule |= work_done == budget; 437 } 438 439 if (reschedule) { 440 /* Reschedule by returning budget only if already on the correct 441 * cpu. 442 */ 443 if (likely(gve_is_napi_on_home_cpu(priv, block->irq))) 444 return budget; 445 446 /* If not on the cpu with which this queue's irq has affinity 447 * with, we avoid rescheduling napi and arm the irq instead so 448 * that napi gets rescheduled back eventually onto the right 449 * cpu. 450 */ 451 if (work_done == budget) 452 work_done--; 453 } 454 455 if (likely(napi_complete_done(napi, work_done))) { 456 /* Enable interrupts again. 457 * 458 * We don't need to repoll afterwards because HW supports the 459 * PCI MSI-X PBA feature. 460 * 461 * Another interrupt would be triggered if a new event came in 462 * since the last one. 463 */ 464 gve_write_irq_doorbell_dqo(priv, block, 465 GVE_ITR_NO_UPDATE_DQO | GVE_ITR_ENABLE_BIT_DQO); 466 } 467 468 return work_done; 469 } 470 471 static const struct cpumask *gve_get_node_mask(struct gve_priv *priv) 472 { 473 if (priv->numa_node == NUMA_NO_NODE) 474 return cpu_all_mask; 475 else 476 return cpumask_of_node(priv->numa_node); 477 } 478 479 static int gve_alloc_notify_blocks(struct gve_priv *priv) 480 { 481 int num_vecs_requested = priv->num_ntfy_blks + 1; 482 const struct cpumask *node_mask; 483 unsigned int cur_cpu; 484 int vecs_enabled; 485 int i, j; 486 int err; 487 488 priv->msix_vectors = kvcalloc(num_vecs_requested, 489 sizeof(*priv->msix_vectors), GFP_KERNEL); 490 if (!priv->msix_vectors) 491 return -ENOMEM; 492 for (i = 0; i < num_vecs_requested; i++) 493 priv->msix_vectors[i].entry = i; 494 vecs_enabled = pci_enable_msix_range(priv->pdev, priv->msix_vectors, 495 GVE_MIN_MSIX, num_vecs_requested); 496 if (vecs_enabled < 0) { 497 dev_err(&priv->pdev->dev, "Could not enable min msix %d/%d\n", 498 GVE_MIN_MSIX, vecs_enabled); 499 err = vecs_enabled; 500 goto abort_with_msix_vectors; 501 } 502 if (vecs_enabled != num_vecs_requested) { 503 int new_num_ntfy_blks = (vecs_enabled - 1) & ~0x1; 504 int vecs_per_type = new_num_ntfy_blks / 2; 505 int vecs_left = new_num_ntfy_blks % 2; 506 507 priv->num_ntfy_blks = new_num_ntfy_blks; 508 priv->mgmt_msix_idx = priv->num_ntfy_blks; 509 priv->tx_cfg.max_queues = min_t(int, priv->tx_cfg.max_queues, 510 vecs_per_type); 511 priv->rx_cfg.max_queues = min_t(int, priv->rx_cfg.max_queues, 512 vecs_per_type + vecs_left); 513 dev_err(&priv->pdev->dev, 514 "Could not enable desired msix, only enabled %d, adjusting tx max queues to %d, and rx max queues to %d\n", 515 vecs_enabled, priv->tx_cfg.max_queues, 516 priv->rx_cfg.max_queues); 517 if (priv->tx_cfg.num_queues > priv->tx_cfg.max_queues) 518 priv->tx_cfg.num_queues = priv->tx_cfg.max_queues; 519 if (priv->rx_cfg.num_queues > priv->rx_cfg.max_queues) 520 priv->rx_cfg.num_queues = priv->rx_cfg.max_queues; 521 } 522 523 /* Setup Management Vector - the last vector */ 524 snprintf(priv->mgmt_msix_name, sizeof(priv->mgmt_msix_name), "gve-mgmnt@pci:%s", 525 pci_name(priv->pdev)); 526 err = request_irq(priv->msix_vectors[priv->mgmt_msix_idx].vector, 527 gve_mgmnt_intr, 0, priv->mgmt_msix_name, priv); 528 if (err) { 529 dev_err(&priv->pdev->dev, "Did not receive management vector.\n"); 530 goto abort_with_msix_enabled; 531 } 532 priv->irq_db_indices = 533 dma_alloc_coherent(&priv->pdev->dev, 534 priv->num_ntfy_blks * 535 sizeof(*priv->irq_db_indices), 536 &priv->irq_db_indices_bus, GFP_KERNEL); 537 if (!priv->irq_db_indices) { 538 err = -ENOMEM; 539 goto abort_with_mgmt_vector; 540 } 541 542 priv->ntfy_blocks = kvzalloc(priv->num_ntfy_blks * 543 sizeof(*priv->ntfy_blocks), GFP_KERNEL); 544 if (!priv->ntfy_blocks) { 545 err = -ENOMEM; 546 goto abort_with_irq_db_indices; 547 } 548 549 /* Setup the other blocks - the first n-1 vectors */ 550 node_mask = gve_get_node_mask(priv); 551 cur_cpu = cpumask_first(node_mask); 552 for (i = 0; i < priv->num_ntfy_blks; i++) { 553 struct gve_notify_block *block = &priv->ntfy_blocks[i]; 554 int msix_idx = i; 555 556 snprintf(block->name, sizeof(block->name), "gve-ntfy-blk%d@pci:%s", 557 i, pci_name(priv->pdev)); 558 block->priv = priv; 559 err = request_irq(priv->msix_vectors[msix_idx].vector, 560 gve_is_gqi(priv) ? gve_intr : gve_intr_dqo, 561 IRQF_NO_AUTOEN, block->name, block); 562 if (err) { 563 dev_err(&priv->pdev->dev, 564 "Failed to receive msix vector %d\n", i); 565 goto abort_with_some_ntfy_blocks; 566 } 567 block->irq = priv->msix_vectors[msix_idx].vector; 568 irq_set_affinity_and_hint(block->irq, 569 cpumask_of(cur_cpu)); 570 block->irq_db_index = &priv->irq_db_indices[i].index; 571 572 cur_cpu = cpumask_next(cur_cpu, node_mask); 573 /* Wrap once CPUs in the node have been exhausted, or when 574 * starting RX queue affinities. TX and RX queues of the same 575 * index share affinity. 576 */ 577 if (cur_cpu >= nr_cpu_ids || (i + 1) == priv->tx_cfg.max_queues) 578 cur_cpu = cpumask_first(node_mask); 579 } 580 return 0; 581 abort_with_some_ntfy_blocks: 582 for (j = 0; j < i; j++) { 583 struct gve_notify_block *block = &priv->ntfy_blocks[j]; 584 int msix_idx = j; 585 586 irq_set_affinity_hint(priv->msix_vectors[msix_idx].vector, 587 NULL); 588 free_irq(priv->msix_vectors[msix_idx].vector, block); 589 block->irq = 0; 590 } 591 kvfree(priv->ntfy_blocks); 592 priv->ntfy_blocks = NULL; 593 abort_with_irq_db_indices: 594 dma_free_coherent(&priv->pdev->dev, priv->num_ntfy_blks * 595 sizeof(*priv->irq_db_indices), 596 priv->irq_db_indices, priv->irq_db_indices_bus); 597 priv->irq_db_indices = NULL; 598 abort_with_mgmt_vector: 599 free_irq(priv->msix_vectors[priv->mgmt_msix_idx].vector, priv); 600 abort_with_msix_enabled: 601 pci_disable_msix(priv->pdev); 602 abort_with_msix_vectors: 603 kvfree(priv->msix_vectors); 604 priv->msix_vectors = NULL; 605 return err; 606 } 607 608 static void gve_free_notify_blocks(struct gve_priv *priv) 609 { 610 int i; 611 612 if (!priv->msix_vectors) 613 return; 614 615 /* Free the irqs */ 616 for (i = 0; i < priv->num_ntfy_blks; i++) { 617 struct gve_notify_block *block = &priv->ntfy_blocks[i]; 618 int msix_idx = i; 619 620 irq_set_affinity_hint(priv->msix_vectors[msix_idx].vector, 621 NULL); 622 free_irq(priv->msix_vectors[msix_idx].vector, block); 623 block->irq = 0; 624 } 625 free_irq(priv->msix_vectors[priv->mgmt_msix_idx].vector, priv); 626 kvfree(priv->ntfy_blocks); 627 priv->ntfy_blocks = NULL; 628 dma_free_coherent(&priv->pdev->dev, priv->num_ntfy_blks * 629 sizeof(*priv->irq_db_indices), 630 priv->irq_db_indices, priv->irq_db_indices_bus); 631 priv->irq_db_indices = NULL; 632 pci_disable_msix(priv->pdev); 633 kvfree(priv->msix_vectors); 634 priv->msix_vectors = NULL; 635 } 636 637 static int gve_setup_device_resources(struct gve_priv *priv) 638 { 639 int err; 640 641 err = gve_alloc_flow_rule_caches(priv); 642 if (err) 643 return err; 644 err = gve_alloc_rss_config_cache(priv); 645 if (err) 646 goto abort_with_flow_rule_caches; 647 err = gve_alloc_counter_array(priv); 648 if (err) 649 goto abort_with_rss_config_cache; 650 err = gve_alloc_notify_blocks(priv); 651 if (err) 652 goto abort_with_counter; 653 err = gve_alloc_stats_report(priv); 654 if (err) 655 goto abort_with_ntfy_blocks; 656 err = gve_adminq_configure_device_resources(priv, 657 priv->counter_array_bus, 658 priv->num_event_counters, 659 priv->irq_db_indices_bus, 660 priv->num_ntfy_blks); 661 if (unlikely(err)) { 662 dev_err(&priv->pdev->dev, 663 "could not setup device_resources: err=%d\n", err); 664 err = -ENXIO; 665 goto abort_with_stats_report; 666 } 667 668 if (!gve_is_gqi(priv)) { 669 priv->ptype_lut_dqo = kvzalloc(sizeof(*priv->ptype_lut_dqo), 670 GFP_KERNEL); 671 if (!priv->ptype_lut_dqo) { 672 err = -ENOMEM; 673 goto abort_with_stats_report; 674 } 675 err = gve_adminq_get_ptype_map_dqo(priv, priv->ptype_lut_dqo); 676 if (err) { 677 dev_err(&priv->pdev->dev, 678 "Failed to get ptype map: err=%d\n", err); 679 goto abort_with_ptype_lut; 680 } 681 } 682 683 if (priv->nic_timestamp_supported) { 684 err = gve_init_clock(priv); 685 if (err) { 686 dev_warn(&priv->pdev->dev, "Failed to init clock, continuing without PTP support"); 687 err = 0; 688 } 689 } 690 691 err = gve_init_rss_config(priv, priv->rx_cfg.num_queues); 692 if (err) { 693 dev_err(&priv->pdev->dev, "Failed to init RSS config"); 694 goto abort_with_clock; 695 } 696 697 err = gve_adminq_report_stats(priv, priv->stats_report_len, 698 priv->stats_report_bus, 699 GVE_STATS_REPORT_TIMER_PERIOD); 700 if (err) 701 dev_err(&priv->pdev->dev, 702 "Failed to report stats: err=%d\n", err); 703 gve_set_device_resources_ok(priv); 704 return 0; 705 706 abort_with_clock: 707 gve_teardown_clock(priv); 708 abort_with_ptype_lut: 709 kvfree(priv->ptype_lut_dqo); 710 priv->ptype_lut_dqo = NULL; 711 abort_with_stats_report: 712 gve_free_stats_report(priv); 713 abort_with_ntfy_blocks: 714 gve_free_notify_blocks(priv); 715 abort_with_counter: 716 gve_free_counter_array(priv); 717 abort_with_rss_config_cache: 718 gve_free_rss_config_cache(priv); 719 abort_with_flow_rule_caches: 720 gve_free_flow_rule_caches(priv); 721 722 return err; 723 } 724 725 static void gve_trigger_reset(struct gve_priv *priv); 726 727 static void gve_teardown_device_resources(struct gve_priv *priv) 728 { 729 int err; 730 731 /* Tell device its resources are being freed */ 732 if (gve_get_device_resources_ok(priv)) { 733 err = gve_flow_rules_reset(priv); 734 if (err) { 735 dev_err(&priv->pdev->dev, 736 "Failed to reset flow rules: err=%d\n", err); 737 gve_trigger_reset(priv); 738 } 739 /* detach the stats report */ 740 err = gve_adminq_report_stats(priv, 0, 0x0, GVE_STATS_REPORT_TIMER_PERIOD); 741 if (err) { 742 dev_err(&priv->pdev->dev, 743 "Failed to detach stats report: err=%d\n", err); 744 gve_trigger_reset(priv); 745 } 746 err = gve_adminq_deconfigure_device_resources(priv); 747 if (err) { 748 dev_err(&priv->pdev->dev, 749 "Could not deconfigure device resources: err=%d\n", 750 err); 751 gve_trigger_reset(priv); 752 } 753 } 754 755 kvfree(priv->ptype_lut_dqo); 756 priv->ptype_lut_dqo = NULL; 757 758 gve_free_flow_rule_caches(priv); 759 gve_free_rss_config_cache(priv); 760 gve_free_counter_array(priv); 761 gve_free_notify_blocks(priv); 762 gve_free_stats_report(priv); 763 gve_teardown_clock(priv); 764 gve_clear_device_resources_ok(priv); 765 } 766 767 static int gve_unregister_qpl(struct gve_priv *priv, 768 struct gve_queue_page_list *qpl) 769 { 770 int err; 771 772 if (!qpl) 773 return 0; 774 775 err = gve_adminq_unregister_page_list(priv, qpl->id); 776 if (err) { 777 netif_err(priv, drv, priv->dev, 778 "Failed to unregister queue page list %d\n", 779 qpl->id); 780 return err; 781 } 782 783 priv->num_registered_pages -= qpl->num_entries; 784 return 0; 785 } 786 787 static int gve_register_qpl(struct gve_priv *priv, 788 struct gve_queue_page_list *qpl) 789 { 790 int pages; 791 int err; 792 793 if (!qpl) 794 return 0; 795 796 pages = qpl->num_entries; 797 798 if (pages + priv->num_registered_pages > priv->max_registered_pages) { 799 netif_err(priv, drv, priv->dev, 800 "Reached max number of registered pages %llu > %llu\n", 801 pages + priv->num_registered_pages, 802 priv->max_registered_pages); 803 return -EINVAL; 804 } 805 806 err = gve_adminq_register_page_list(priv, qpl); 807 if (err) { 808 netif_err(priv, drv, priv->dev, 809 "failed to register queue page list %d\n", 810 qpl->id); 811 return err; 812 } 813 814 priv->num_registered_pages += pages; 815 return 0; 816 } 817 818 static struct gve_queue_page_list *gve_tx_get_qpl(struct gve_priv *priv, int idx) 819 { 820 struct gve_tx_ring *tx = &priv->tx[idx]; 821 822 if (gve_is_gqi(priv)) 823 return tx->tx_fifo.qpl; 824 else 825 return tx->dqo.qpl; 826 } 827 828 static struct gve_queue_page_list *gve_rx_get_qpl(struct gve_priv *priv, int idx) 829 { 830 struct gve_rx_ring *rx = &priv->rx[idx]; 831 832 if (gve_is_gqi(priv)) 833 return rx->data.qpl; 834 else 835 return rx->dqo.qpl; 836 } 837 838 static int gve_register_qpls(struct gve_priv *priv) 839 { 840 int num_tx_qpls, num_rx_qpls; 841 int err; 842 int i; 843 844 num_tx_qpls = gve_num_tx_qpls(&priv->tx_cfg, gve_is_qpl(priv)); 845 num_rx_qpls = gve_num_rx_qpls(&priv->rx_cfg, gve_is_qpl(priv)); 846 847 for (i = 0; i < num_tx_qpls; i++) { 848 err = gve_register_qpl(priv, gve_tx_get_qpl(priv, i)); 849 if (err) 850 return err; 851 } 852 853 for (i = 0; i < num_rx_qpls; i++) { 854 err = gve_register_qpl(priv, gve_rx_get_qpl(priv, i)); 855 if (err) 856 return err; 857 } 858 859 return 0; 860 } 861 862 static int gve_unregister_qpls(struct gve_priv *priv) 863 { 864 int num_tx_qpls, num_rx_qpls; 865 int err; 866 int i; 867 868 num_tx_qpls = gve_num_tx_qpls(&priv->tx_cfg, gve_is_qpl(priv)); 869 num_rx_qpls = gve_num_rx_qpls(&priv->rx_cfg, gve_is_qpl(priv)); 870 871 for (i = 0; i < num_tx_qpls; i++) { 872 err = gve_unregister_qpl(priv, gve_tx_get_qpl(priv, i)); 873 /* This failure will trigger a reset - no need to clean */ 874 if (err) 875 return err; 876 } 877 878 for (i = 0; i < num_rx_qpls; i++) { 879 err = gve_unregister_qpl(priv, gve_rx_get_qpl(priv, i)); 880 /* This failure will trigger a reset - no need to clean */ 881 if (err) 882 return err; 883 } 884 return 0; 885 } 886 887 static int gve_create_rings(struct gve_priv *priv) 888 { 889 int num_tx_queues = gve_num_tx_queues(priv); 890 int err; 891 int i; 892 893 err = gve_adminq_create_tx_queues(priv, 0, num_tx_queues); 894 if (err) { 895 netif_err(priv, drv, priv->dev, "failed to create %d tx queues\n", 896 num_tx_queues); 897 /* This failure will trigger a reset - no need to clean 898 * up 899 */ 900 return err; 901 } 902 netif_dbg(priv, drv, priv->dev, "created %d tx queues\n", 903 num_tx_queues); 904 905 err = gve_adminq_create_rx_queues(priv, priv->rx_cfg.num_queues); 906 if (err) { 907 netif_err(priv, drv, priv->dev, "failed to create %d rx queues\n", 908 priv->rx_cfg.num_queues); 909 /* This failure will trigger a reset - no need to clean 910 * up 911 */ 912 return err; 913 } 914 netif_dbg(priv, drv, priv->dev, "created %d rx queues\n", 915 priv->rx_cfg.num_queues); 916 917 if (gve_is_gqi(priv)) { 918 /* Rx data ring has been prefilled with packet buffers at queue 919 * allocation time. 920 * 921 * Write the doorbell to provide descriptor slots and packet 922 * buffers to the NIC. 923 */ 924 for (i = 0; i < priv->rx_cfg.num_queues; i++) 925 gve_rx_write_doorbell(priv, &priv->rx[i]); 926 } else { 927 for (i = 0; i < priv->rx_cfg.num_queues; i++) { 928 /* Post buffers and ring doorbell. */ 929 gve_rx_post_buffers_dqo(&priv->rx[i]); 930 } 931 } 932 933 return 0; 934 } 935 936 static void init_xdp_sync_stats(struct gve_priv *priv) 937 { 938 int start_id = gve_xdp_tx_start_queue_id(priv); 939 int i; 940 941 /* Init stats */ 942 for (i = start_id; i < start_id + priv->tx_cfg.num_xdp_queues; i++) { 943 int ntfy_idx = gve_tx_idx_to_ntfy(priv, i); 944 945 u64_stats_init(&priv->tx[i].statss); 946 priv->tx[i].ntfy_id = ntfy_idx; 947 } 948 } 949 950 static void gve_init_sync_stats(struct gve_priv *priv) 951 { 952 int i; 953 954 for (i = 0; i < priv->tx_cfg.num_queues; i++) 955 u64_stats_init(&priv->tx[i].statss); 956 957 /* Init stats for XDP TX queues */ 958 init_xdp_sync_stats(priv); 959 960 for (i = 0; i < priv->rx_cfg.num_queues; i++) 961 u64_stats_init(&priv->rx[i].statss); 962 } 963 964 static void gve_tx_get_curr_alloc_cfg(struct gve_priv *priv, 965 struct gve_tx_alloc_rings_cfg *cfg) 966 { 967 cfg->qcfg = &priv->tx_cfg; 968 cfg->raw_addressing = !gve_is_qpl(priv); 969 cfg->ring_size = priv->tx_desc_cnt; 970 cfg->num_xdp_rings = cfg->qcfg->num_xdp_queues; 971 cfg->tx = priv->tx; 972 } 973 974 static void gve_tx_stop_rings(struct gve_priv *priv, int num_rings) 975 { 976 int i; 977 978 if (!priv->tx) 979 return; 980 981 for (i = 0; i < num_rings; i++) { 982 if (gve_is_gqi(priv)) 983 gve_tx_stop_ring_gqi(priv, i); 984 else 985 gve_tx_stop_ring_dqo(priv, i); 986 } 987 } 988 989 static void gve_tx_start_rings(struct gve_priv *priv, int num_rings) 990 { 991 int i; 992 993 for (i = 0; i < num_rings; i++) { 994 if (gve_is_gqi(priv)) 995 gve_tx_start_ring_gqi(priv, i); 996 else 997 gve_tx_start_ring_dqo(priv, i); 998 } 999 } 1000 1001 static int gve_queues_mem_alloc(struct gve_priv *priv, 1002 struct gve_tx_alloc_rings_cfg *tx_alloc_cfg, 1003 struct gve_rx_alloc_rings_cfg *rx_alloc_cfg) 1004 { 1005 int err; 1006 1007 if (gve_is_gqi(priv)) 1008 err = gve_tx_alloc_rings_gqi(priv, tx_alloc_cfg); 1009 else 1010 err = gve_tx_alloc_rings_dqo(priv, tx_alloc_cfg); 1011 if (err) 1012 return err; 1013 1014 if (gve_is_gqi(priv)) 1015 err = gve_rx_alloc_rings_gqi(priv, rx_alloc_cfg); 1016 else 1017 err = gve_rx_alloc_rings_dqo(priv, rx_alloc_cfg); 1018 if (err) 1019 goto free_tx; 1020 1021 return 0; 1022 1023 free_tx: 1024 if (gve_is_gqi(priv)) 1025 gve_tx_free_rings_gqi(priv, tx_alloc_cfg); 1026 else 1027 gve_tx_free_rings_dqo(priv, tx_alloc_cfg); 1028 return err; 1029 } 1030 1031 static int gve_destroy_rings(struct gve_priv *priv) 1032 { 1033 int num_tx_queues = gve_num_tx_queues(priv); 1034 int err; 1035 1036 err = gve_adminq_destroy_tx_queues(priv, 0, num_tx_queues); 1037 if (err) { 1038 netif_err(priv, drv, priv->dev, 1039 "failed to destroy tx queues\n"); 1040 /* This failure will trigger a reset - no need to clean up */ 1041 return err; 1042 } 1043 netif_dbg(priv, drv, priv->dev, "destroyed tx queues\n"); 1044 err = gve_adminq_destroy_rx_queues(priv, priv->rx_cfg.num_queues); 1045 if (err) { 1046 netif_err(priv, drv, priv->dev, 1047 "failed to destroy rx queues\n"); 1048 /* This failure will trigger a reset - no need to clean up */ 1049 return err; 1050 } 1051 netif_dbg(priv, drv, priv->dev, "destroyed rx queues\n"); 1052 return 0; 1053 } 1054 1055 static void gve_queues_mem_free(struct gve_priv *priv, 1056 struct gve_tx_alloc_rings_cfg *tx_cfg, 1057 struct gve_rx_alloc_rings_cfg *rx_cfg) 1058 { 1059 if (gve_is_gqi(priv)) { 1060 gve_tx_free_rings_gqi(priv, tx_cfg); 1061 gve_rx_free_rings_gqi(priv, rx_cfg); 1062 } else { 1063 gve_tx_free_rings_dqo(priv, tx_cfg); 1064 gve_rx_free_rings_dqo(priv, rx_cfg); 1065 } 1066 } 1067 1068 int gve_alloc_page(struct gve_priv *priv, struct device *dev, 1069 struct page **page, dma_addr_t *dma, 1070 enum dma_data_direction dir, gfp_t gfp_flags) 1071 { 1072 *page = alloc_pages_node(priv->numa_node, gfp_flags, 0); 1073 if (!*page) { 1074 priv->page_alloc_fail++; 1075 return -ENOMEM; 1076 } 1077 *dma = dma_map_page(dev, *page, 0, PAGE_SIZE, dir); 1078 if (dma_mapping_error(dev, *dma)) { 1079 priv->dma_mapping_error++; 1080 put_page(*page); 1081 return -ENOMEM; 1082 } 1083 return 0; 1084 } 1085 1086 struct gve_queue_page_list *gve_alloc_queue_page_list(struct gve_priv *priv, 1087 u32 id, int pages) 1088 { 1089 struct gve_queue_page_list *qpl; 1090 int err; 1091 int i; 1092 1093 qpl = kvzalloc(sizeof(*qpl), GFP_KERNEL); 1094 if (!qpl) 1095 return NULL; 1096 1097 qpl->id = id; 1098 qpl->num_entries = 0; 1099 qpl->pages = kvcalloc(pages, sizeof(*qpl->pages), GFP_KERNEL); 1100 if (!qpl->pages) 1101 goto abort; 1102 1103 qpl->page_buses = kvcalloc(pages, sizeof(*qpl->page_buses), GFP_KERNEL); 1104 if (!qpl->page_buses) 1105 goto abort; 1106 1107 for (i = 0; i < pages; i++) { 1108 err = gve_alloc_page(priv, &priv->pdev->dev, &qpl->pages[i], 1109 &qpl->page_buses[i], 1110 gve_qpl_dma_dir(priv, id), GFP_KERNEL); 1111 if (err) 1112 goto abort; 1113 qpl->num_entries++; 1114 } 1115 1116 return qpl; 1117 1118 abort: 1119 gve_free_queue_page_list(priv, qpl, id); 1120 return NULL; 1121 } 1122 1123 void gve_free_page(struct device *dev, struct page *page, dma_addr_t dma, 1124 enum dma_data_direction dir) 1125 { 1126 if (!dma_mapping_error(dev, dma)) 1127 dma_unmap_page(dev, dma, PAGE_SIZE, dir); 1128 if (page) 1129 put_page(page); 1130 } 1131 1132 void gve_free_queue_page_list(struct gve_priv *priv, 1133 struct gve_queue_page_list *qpl, 1134 u32 id) 1135 { 1136 int i; 1137 1138 if (!qpl) 1139 return; 1140 if (!qpl->pages) 1141 goto free_qpl; 1142 if (!qpl->page_buses) 1143 goto free_pages; 1144 1145 for (i = 0; i < qpl->num_entries; i++) 1146 gve_free_page(&priv->pdev->dev, qpl->pages[i], 1147 qpl->page_buses[i], gve_qpl_dma_dir(priv, id)); 1148 1149 kvfree(qpl->page_buses); 1150 qpl->page_buses = NULL; 1151 free_pages: 1152 kvfree(qpl->pages); 1153 qpl->pages = NULL; 1154 free_qpl: 1155 kvfree(qpl); 1156 } 1157 1158 /* Use this to schedule a reset when the device is capable of continuing 1159 * to handle other requests in its current state. If it is not, do a reset 1160 * in thread instead. 1161 */ 1162 void gve_schedule_reset(struct gve_priv *priv) 1163 { 1164 gve_set_do_reset(priv); 1165 queue_work(priv->gve_wq, &priv->service_task); 1166 } 1167 1168 static void gve_reset_and_teardown(struct gve_priv *priv, bool was_up); 1169 static int gve_reset_recovery(struct gve_priv *priv, bool was_up); 1170 static void gve_turndown(struct gve_priv *priv); 1171 static void gve_turnup(struct gve_priv *priv); 1172 1173 static void gve_unreg_xsk_pool(struct gve_priv *priv, u16 qid) 1174 { 1175 struct gve_rx_ring *rx; 1176 1177 if (!priv->rx) 1178 return; 1179 1180 rx = &priv->rx[qid]; 1181 rx->xsk_pool = NULL; 1182 if (xdp_rxq_info_is_reg(&rx->xdp_rxq)) 1183 xdp_rxq_info_unreg_mem_model(&rx->xdp_rxq); 1184 1185 if (!priv->tx) 1186 return; 1187 priv->tx[gve_xdp_tx_queue_id(priv, qid)].xsk_pool = NULL; 1188 } 1189 1190 static int gve_reg_xsk_pool(struct gve_priv *priv, struct net_device *dev, 1191 struct xsk_buff_pool *pool, u16 qid) 1192 { 1193 struct gve_rx_ring *rx; 1194 u16 tx_qid; 1195 int err; 1196 1197 rx = &priv->rx[qid]; 1198 err = xdp_rxq_info_reg_mem_model(&rx->xdp_rxq, 1199 MEM_TYPE_XSK_BUFF_POOL, pool); 1200 if (err) { 1201 gve_unreg_xsk_pool(priv, qid); 1202 return err; 1203 } 1204 1205 rx->xsk_pool = pool; 1206 1207 tx_qid = gve_xdp_tx_queue_id(priv, qid); 1208 priv->tx[tx_qid].xsk_pool = pool; 1209 1210 return 0; 1211 } 1212 1213 static void gve_unreg_xdp_info(struct gve_priv *priv) 1214 { 1215 int i; 1216 1217 if (!priv->tx_cfg.num_xdp_queues || !priv->rx) 1218 return; 1219 1220 for (i = 0; i < priv->rx_cfg.num_queues; i++) { 1221 struct gve_rx_ring *rx = &priv->rx[i]; 1222 1223 if (xdp_rxq_info_is_reg(&rx->xdp_rxq)) 1224 xdp_rxq_info_unreg(&rx->xdp_rxq); 1225 1226 gve_unreg_xsk_pool(priv, i); 1227 } 1228 } 1229 1230 static struct xsk_buff_pool *gve_get_xsk_pool(struct gve_priv *priv, int qid) 1231 { 1232 if (!test_bit(qid, priv->xsk_pools)) 1233 return NULL; 1234 1235 return xsk_get_pool_from_qid(priv->dev, qid); 1236 } 1237 1238 static int gve_reg_xdp_info(struct gve_priv *priv, struct net_device *dev) 1239 { 1240 struct napi_struct *napi; 1241 struct gve_rx_ring *rx; 1242 int err = 0; 1243 int i; 1244 1245 if (!priv->tx_cfg.num_xdp_queues) 1246 return 0; 1247 1248 for (i = 0; i < priv->rx_cfg.num_queues; i++) { 1249 struct xsk_buff_pool *xsk_pool; 1250 1251 rx = &priv->rx[i]; 1252 napi = &priv->ntfy_blocks[rx->ntfy_id].napi; 1253 1254 err = xdp_rxq_info_reg(&rx->xdp_rxq, dev, i, 1255 napi->napi_id); 1256 if (err) 1257 goto err; 1258 1259 xsk_pool = gve_get_xsk_pool(priv, i); 1260 if (xsk_pool) 1261 err = gve_reg_xsk_pool(priv, dev, xsk_pool, i); 1262 else if (gve_is_qpl(priv)) 1263 err = xdp_rxq_info_reg_mem_model(&rx->xdp_rxq, 1264 MEM_TYPE_PAGE_SHARED, 1265 NULL); 1266 else 1267 err = xdp_rxq_info_reg_mem_model(&rx->xdp_rxq, 1268 MEM_TYPE_PAGE_POOL, 1269 rx->dqo.page_pool); 1270 if (err) 1271 goto err; 1272 } 1273 return 0; 1274 1275 err: 1276 gve_unreg_xdp_info(priv); 1277 return err; 1278 } 1279 1280 1281 static void gve_drain_page_cache(struct gve_priv *priv) 1282 { 1283 int i; 1284 1285 for (i = 0; i < priv->rx_cfg.num_queues; i++) 1286 page_frag_cache_drain(&priv->rx[i].page_cache); 1287 } 1288 1289 static void gve_rx_get_curr_alloc_cfg(struct gve_priv *priv, 1290 struct gve_rx_alloc_rings_cfg *cfg) 1291 { 1292 cfg->qcfg_rx = &priv->rx_cfg; 1293 cfg->qcfg_tx = &priv->tx_cfg; 1294 cfg->raw_addressing = !gve_is_qpl(priv); 1295 cfg->enable_header_split = priv->header_split_enabled; 1296 cfg->ring_size = priv->rx_desc_cnt; 1297 cfg->packet_buffer_size = priv->rx_cfg.packet_buffer_size; 1298 cfg->rx = priv->rx; 1299 cfg->xdp = !!cfg->qcfg_tx->num_xdp_queues; 1300 } 1301 1302 void gve_get_curr_alloc_cfgs(struct gve_priv *priv, 1303 struct gve_tx_alloc_rings_cfg *tx_alloc_cfg, 1304 struct gve_rx_alloc_rings_cfg *rx_alloc_cfg) 1305 { 1306 gve_tx_get_curr_alloc_cfg(priv, tx_alloc_cfg); 1307 gve_rx_get_curr_alloc_cfg(priv, rx_alloc_cfg); 1308 } 1309 1310 static void gve_rx_start_ring(struct gve_priv *priv, int i) 1311 { 1312 if (gve_is_gqi(priv)) 1313 gve_rx_start_ring_gqi(priv, i); 1314 else 1315 gve_rx_start_ring_dqo(priv, i); 1316 } 1317 1318 static void gve_rx_start_rings(struct gve_priv *priv, int num_rings) 1319 { 1320 int i; 1321 1322 for (i = 0; i < num_rings; i++) 1323 gve_rx_start_ring(priv, i); 1324 } 1325 1326 static void gve_rx_stop_ring(struct gve_priv *priv, int i) 1327 { 1328 if (gve_is_gqi(priv)) 1329 gve_rx_stop_ring_gqi(priv, i); 1330 else 1331 gve_rx_stop_ring_dqo(priv, i); 1332 } 1333 1334 static void gve_rx_stop_rings(struct gve_priv *priv, int num_rings) 1335 { 1336 int i; 1337 1338 if (!priv->rx) 1339 return; 1340 1341 for (i = 0; i < num_rings; i++) 1342 gve_rx_stop_ring(priv, i); 1343 } 1344 1345 static void gve_queues_mem_remove(struct gve_priv *priv) 1346 { 1347 struct gve_tx_alloc_rings_cfg tx_alloc_cfg = {0}; 1348 struct gve_rx_alloc_rings_cfg rx_alloc_cfg = {0}; 1349 1350 gve_get_curr_alloc_cfgs(priv, &tx_alloc_cfg, &rx_alloc_cfg); 1351 gve_queues_mem_free(priv, &tx_alloc_cfg, &rx_alloc_cfg); 1352 priv->tx = NULL; 1353 priv->rx = NULL; 1354 } 1355 1356 /* The passed-in queue memory is stored into priv and the queues are made live. 1357 * No memory is allocated. Passed-in memory is freed on errors. 1358 */ 1359 static int gve_queues_start(struct gve_priv *priv, 1360 struct gve_tx_alloc_rings_cfg *tx_alloc_cfg, 1361 struct gve_rx_alloc_rings_cfg *rx_alloc_cfg) 1362 { 1363 struct net_device *dev = priv->dev; 1364 int err; 1365 1366 /* Record new resources into priv */ 1367 priv->tx = tx_alloc_cfg->tx; 1368 priv->rx = rx_alloc_cfg->rx; 1369 1370 /* Record new configs into priv */ 1371 priv->tx_cfg = *tx_alloc_cfg->qcfg; 1372 priv->tx_cfg.num_xdp_queues = tx_alloc_cfg->num_xdp_rings; 1373 priv->rx_cfg = *rx_alloc_cfg->qcfg_rx; 1374 priv->tx_desc_cnt = tx_alloc_cfg->ring_size; 1375 priv->rx_desc_cnt = rx_alloc_cfg->ring_size; 1376 1377 gve_tx_start_rings(priv, gve_num_tx_queues(priv)); 1378 gve_rx_start_rings(priv, rx_alloc_cfg->qcfg_rx->num_queues); 1379 gve_init_sync_stats(priv); 1380 1381 err = netif_set_real_num_tx_queues(dev, priv->tx_cfg.num_queues); 1382 if (err) 1383 goto stop_and_free_rings; 1384 err = netif_set_real_num_rx_queues(dev, priv->rx_cfg.num_queues); 1385 if (err) 1386 goto stop_and_free_rings; 1387 1388 err = gve_reg_xdp_info(priv, dev); 1389 if (err) 1390 goto stop_and_free_rings; 1391 1392 if (rx_alloc_cfg->reset_rss) { 1393 err = gve_init_rss_config(priv, priv->rx_cfg.num_queues); 1394 if (err) 1395 goto reset; 1396 } 1397 1398 err = gve_register_qpls(priv); 1399 if (err) 1400 goto reset; 1401 1402 priv->header_split_enabled = rx_alloc_cfg->enable_header_split; 1403 priv->rx_cfg.packet_buffer_size = rx_alloc_cfg->packet_buffer_size; 1404 1405 err = gve_create_rings(priv); 1406 if (err) 1407 goto reset; 1408 1409 gve_set_device_rings_ok(priv); 1410 1411 if (gve_get_report_stats(priv)) 1412 mod_timer(&priv->stats_report_timer, 1413 round_jiffies(jiffies + 1414 msecs_to_jiffies(priv->stats_report_timer_period))); 1415 1416 gve_turnup(priv); 1417 queue_work(priv->gve_wq, &priv->service_task); 1418 priv->interface_up_cnt++; 1419 return 0; 1420 1421 reset: 1422 if (gve_get_reset_in_progress(priv)) 1423 goto stop_and_free_rings; 1424 gve_reset_and_teardown(priv, true); 1425 /* if this fails there is nothing we can do so just ignore the return */ 1426 gve_reset_recovery(priv, false); 1427 /* return the original error */ 1428 return err; 1429 stop_and_free_rings: 1430 gve_tx_stop_rings(priv, gve_num_tx_queues(priv)); 1431 gve_rx_stop_rings(priv, priv->rx_cfg.num_queues); 1432 gve_queues_mem_remove(priv); 1433 return err; 1434 } 1435 1436 static int gve_open(struct net_device *dev) 1437 { 1438 struct gve_tx_alloc_rings_cfg tx_alloc_cfg = {0}; 1439 struct gve_rx_alloc_rings_cfg rx_alloc_cfg = {0}; 1440 struct gve_priv *priv = netdev_priv(dev); 1441 int err; 1442 1443 gve_get_curr_alloc_cfgs(priv, &tx_alloc_cfg, &rx_alloc_cfg); 1444 1445 err = gve_queues_mem_alloc(priv, &tx_alloc_cfg, &rx_alloc_cfg); 1446 if (err) 1447 return err; 1448 1449 /* No need to free on error: ownership of resources is lost after 1450 * calling gve_queues_start. 1451 */ 1452 err = gve_queues_start(priv, &tx_alloc_cfg, &rx_alloc_cfg); 1453 if (err) 1454 return err; 1455 1456 return 0; 1457 } 1458 1459 static int gve_queues_stop(struct gve_priv *priv) 1460 { 1461 int err; 1462 1463 netif_carrier_off(priv->dev); 1464 if (gve_get_device_rings_ok(priv)) { 1465 gve_turndown(priv); 1466 gve_drain_page_cache(priv); 1467 err = gve_destroy_rings(priv); 1468 if (err) 1469 goto err; 1470 err = gve_unregister_qpls(priv); 1471 if (err) 1472 goto err; 1473 gve_clear_device_rings_ok(priv); 1474 } 1475 timer_delete_sync(&priv->stats_report_timer); 1476 1477 gve_unreg_xdp_info(priv); 1478 1479 gve_tx_stop_rings(priv, gve_num_tx_queues(priv)); 1480 gve_rx_stop_rings(priv, priv->rx_cfg.num_queues); 1481 1482 priv->interface_down_cnt++; 1483 return 0; 1484 1485 err: 1486 /* This must have been called from a reset due to the rtnl lock 1487 * so just return at this point. 1488 */ 1489 if (gve_get_reset_in_progress(priv)) 1490 return err; 1491 /* Otherwise reset before returning */ 1492 gve_reset_and_teardown(priv, true); 1493 return gve_reset_recovery(priv, false); 1494 } 1495 1496 static int gve_close(struct net_device *dev) 1497 { 1498 struct gve_priv *priv = netdev_priv(dev); 1499 int err; 1500 1501 err = gve_queues_stop(priv); 1502 if (err) 1503 return err; 1504 1505 gve_queues_mem_remove(priv); 1506 return 0; 1507 } 1508 1509 static void gve_handle_link_status(struct gve_priv *priv, bool link_status) 1510 { 1511 if (!gve_get_napi_enabled(priv)) 1512 return; 1513 1514 if (link_status == netif_carrier_ok(priv->dev)) 1515 return; 1516 1517 if (link_status) { 1518 netdev_info(priv->dev, "Device link is up.\n"); 1519 netif_carrier_on(priv->dev); 1520 } else { 1521 netdev_info(priv->dev, "Device link is down.\n"); 1522 netif_carrier_off(priv->dev); 1523 } 1524 } 1525 1526 static int gve_configure_rings_xdp(struct gve_priv *priv, 1527 u16 num_xdp_rings) 1528 { 1529 struct gve_tx_alloc_rings_cfg tx_alloc_cfg = {0}; 1530 struct gve_rx_alloc_rings_cfg rx_alloc_cfg = {0}; 1531 1532 gve_get_curr_alloc_cfgs(priv, &tx_alloc_cfg, &rx_alloc_cfg); 1533 tx_alloc_cfg.num_xdp_rings = num_xdp_rings; 1534 1535 rx_alloc_cfg.xdp = !!num_xdp_rings; 1536 return gve_adjust_config(priv, &tx_alloc_cfg, &rx_alloc_cfg); 1537 } 1538 1539 static int gve_set_xdp(struct gve_priv *priv, struct bpf_prog *prog, 1540 struct netlink_ext_ack *extack) 1541 { 1542 struct bpf_prog *old_prog; 1543 int err = 0; 1544 u32 status; 1545 1546 old_prog = READ_ONCE(priv->xdp_prog); 1547 if (!netif_running(priv->dev)) { 1548 WRITE_ONCE(priv->xdp_prog, prog); 1549 if (old_prog) 1550 bpf_prog_put(old_prog); 1551 1552 /* Update priv XDP queue configuration */ 1553 priv->tx_cfg.num_xdp_queues = priv->xdp_prog ? 1554 priv->rx_cfg.num_queues : 0; 1555 return 0; 1556 } 1557 1558 if (!old_prog && prog) 1559 err = gve_configure_rings_xdp(priv, priv->rx_cfg.num_queues); 1560 else if (old_prog && !prog) 1561 err = gve_configure_rings_xdp(priv, 0); 1562 1563 if (err) 1564 goto out; 1565 1566 WRITE_ONCE(priv->xdp_prog, prog); 1567 if (old_prog) 1568 bpf_prog_put(old_prog); 1569 1570 out: 1571 status = ioread32be(&priv->reg_bar0->device_status); 1572 gve_handle_link_status(priv, GVE_DEVICE_STATUS_LINK_STATUS_MASK & status); 1573 return err; 1574 } 1575 1576 static int gve_xdp_xmit(struct net_device *dev, int n, 1577 struct xdp_frame **frames, u32 flags) 1578 { 1579 struct gve_priv *priv = netdev_priv(dev); 1580 1581 if (priv->queue_format == GVE_GQI_QPL_FORMAT) 1582 return gve_xdp_xmit_gqi(dev, n, frames, flags); 1583 else if (priv->queue_format == GVE_DQO_RDA_FORMAT) 1584 return gve_xdp_xmit_dqo(dev, n, frames, flags); 1585 1586 return -EOPNOTSUPP; 1587 } 1588 1589 static int gve_xsk_pool_enable(struct net_device *dev, 1590 struct xsk_buff_pool *pool, 1591 u16 qid) 1592 { 1593 struct gve_priv *priv = netdev_priv(dev); 1594 int err; 1595 1596 if (qid >= priv->rx_cfg.num_queues) { 1597 dev_err(&priv->pdev->dev, "xsk pool invalid qid %d", qid); 1598 return -EINVAL; 1599 } 1600 if (xsk_pool_get_rx_frame_size(pool) < 1601 priv->dev->max_mtu + sizeof(struct ethhdr)) { 1602 dev_err(&priv->pdev->dev, "xsk pool frame_len too small"); 1603 return -EINVAL; 1604 } 1605 1606 err = xsk_pool_dma_map(pool, &priv->pdev->dev, 1607 DMA_ATTR_SKIP_CPU_SYNC | DMA_ATTR_WEAK_ORDERING); 1608 if (err) 1609 return err; 1610 1611 set_bit(qid, priv->xsk_pools); 1612 1613 /* If XDP prog is not installed or interface is down, return. */ 1614 if (!priv->xdp_prog || !netif_running(dev)) 1615 return 0; 1616 1617 err = gve_reg_xsk_pool(priv, dev, pool, qid); 1618 if (err) 1619 goto err_xsk_pool_dma_mapped; 1620 1621 /* Stop and start RDA queues to repost buffers. */ 1622 if (!gve_is_qpl(priv)) { 1623 err = gve_configure_rings_xdp(priv, priv->rx_cfg.num_queues); 1624 if (err) 1625 goto err_xsk_pool_registered; 1626 } 1627 return 0; 1628 1629 err_xsk_pool_registered: 1630 gve_unreg_xsk_pool(priv, qid); 1631 err_xsk_pool_dma_mapped: 1632 clear_bit(qid, priv->xsk_pools); 1633 xsk_pool_dma_unmap(pool, 1634 DMA_ATTR_SKIP_CPU_SYNC | 1635 DMA_ATTR_WEAK_ORDERING); 1636 return err; 1637 } 1638 1639 static int gve_xsk_pool_disable(struct net_device *dev, 1640 u16 qid) 1641 { 1642 struct gve_priv *priv = netdev_priv(dev); 1643 struct napi_struct *napi_rx; 1644 struct napi_struct *napi_tx; 1645 struct xsk_buff_pool *pool; 1646 int tx_qid; 1647 int err; 1648 1649 if (qid >= priv->rx_cfg.num_queues) 1650 return -EINVAL; 1651 1652 clear_bit(qid, priv->xsk_pools); 1653 1654 pool = xsk_get_pool_from_qid(dev, qid); 1655 if (pool) 1656 xsk_pool_dma_unmap(pool, 1657 DMA_ATTR_SKIP_CPU_SYNC | 1658 DMA_ATTR_WEAK_ORDERING); 1659 1660 if (!netif_running(dev) || !priv->tx_cfg.num_xdp_queues) 1661 return 0; 1662 1663 /* Stop and start RDA queues to repost buffers. */ 1664 if (!gve_is_qpl(priv) && priv->xdp_prog) { 1665 err = gve_configure_rings_xdp(priv, priv->rx_cfg.num_queues); 1666 if (err) 1667 return err; 1668 } 1669 1670 napi_rx = &priv->ntfy_blocks[priv->rx[qid].ntfy_id].napi; 1671 napi_disable(napi_rx); /* make sure current rx poll is done */ 1672 1673 tx_qid = gve_xdp_tx_queue_id(priv, qid); 1674 napi_tx = &priv->ntfy_blocks[priv->tx[tx_qid].ntfy_id].napi; 1675 napi_disable(napi_tx); /* make sure current tx poll is done */ 1676 1677 gve_unreg_xsk_pool(priv, qid); 1678 smp_mb(); /* Make sure it is visible to the workers on datapath */ 1679 1680 napi_enable(napi_rx); 1681 napi_enable(napi_tx); 1682 if (gve_is_gqi(priv)) { 1683 if (gve_rx_work_pending(&priv->rx[qid])) 1684 napi_schedule(napi_rx); 1685 1686 if (gve_tx_clean_pending(priv, &priv->tx[tx_qid])) 1687 napi_schedule(napi_tx); 1688 } 1689 1690 return 0; 1691 } 1692 1693 static int gve_xsk_wakeup(struct net_device *dev, u32 queue_id, u32 flags) 1694 { 1695 struct gve_priv *priv = netdev_priv(dev); 1696 struct napi_struct *napi; 1697 1698 if (!gve_get_napi_enabled(priv)) 1699 return -ENETDOWN; 1700 1701 if (queue_id >= priv->rx_cfg.num_queues || !priv->xdp_prog) 1702 return -EINVAL; 1703 1704 napi = &priv->ntfy_blocks[gve_rx_idx_to_ntfy(priv, queue_id)].napi; 1705 if (!napi_if_scheduled_mark_missed(napi)) { 1706 /* Call local_bh_enable to trigger SoftIRQ processing */ 1707 local_bh_disable(); 1708 napi_schedule(napi); 1709 local_bh_enable(); 1710 } 1711 1712 return 0; 1713 } 1714 1715 static int gve_verify_xdp_configuration(struct net_device *dev, 1716 struct netlink_ext_ack *extack) 1717 { 1718 struct gve_priv *priv = netdev_priv(dev); 1719 u16 max_xdp_mtu; 1720 1721 if (dev->features & NETIF_F_LRO) { 1722 NL_SET_ERR_MSG_MOD(extack, 1723 "XDP is not supported when LRO is on."); 1724 return -EOPNOTSUPP; 1725 } 1726 1727 if (priv->header_split_enabled) { 1728 NL_SET_ERR_MSG_MOD(extack, 1729 "XDP is not supported when header-data split is enabled."); 1730 return -EOPNOTSUPP; 1731 } 1732 1733 if (priv->rx_cfg.packet_buffer_size != SZ_2K) { 1734 NL_SET_ERR_MSG_FMT_MOD(extack, 1735 "XDP is not supported for Rx buf len %d, only %d supported.", 1736 priv->rx_cfg.packet_buffer_size, SZ_2K); 1737 return -EOPNOTSUPP; 1738 } 1739 1740 max_xdp_mtu = priv->rx_cfg.packet_buffer_size - sizeof(struct ethhdr); 1741 if (priv->queue_format == GVE_GQI_QPL_FORMAT) 1742 max_xdp_mtu -= GVE_RX_PAD; 1743 1744 if (dev->mtu > max_xdp_mtu) { 1745 NL_SET_ERR_MSG_FMT_MOD(extack, 1746 "XDP is not supported for mtu %d.", 1747 dev->mtu); 1748 return -EOPNOTSUPP; 1749 } 1750 1751 if (priv->rx_cfg.num_queues != priv->tx_cfg.num_queues || 1752 (2 * priv->tx_cfg.num_queues > priv->tx_cfg.max_queues)) { 1753 netdev_warn(dev, 1754 "XDP load failed: The number of configured RX queues %d should be equal to the number of configured TX queues %d and the number of configured RX/TX queues should be less than or equal to half the maximum number of RX/TX queues %d.", 1755 priv->rx_cfg.num_queues, priv->tx_cfg.num_queues, 1756 priv->tx_cfg.max_queues); 1757 NL_SET_ERR_MSG_MOD(extack, 1758 "XDP load failed: The number of configured RX queues should be equal to the number of configured TX queues and the number of configured RX/TX queues should be less than or equal to half the maximum number of RX/TX queues"); 1759 return -EINVAL; 1760 } 1761 return 0; 1762 } 1763 1764 static int gve_xdp(struct net_device *dev, struct netdev_bpf *xdp) 1765 { 1766 struct gve_priv *priv = netdev_priv(dev); 1767 int err; 1768 1769 err = gve_verify_xdp_configuration(dev, xdp->extack); 1770 if (err) 1771 return err; 1772 switch (xdp->command) { 1773 case XDP_SETUP_PROG: 1774 return gve_set_xdp(priv, xdp->prog, xdp->extack); 1775 case XDP_SETUP_XSK_POOL: 1776 if (xdp->xsk.pool) 1777 return gve_xsk_pool_enable(dev, xdp->xsk.pool, xdp->xsk.queue_id); 1778 else 1779 return gve_xsk_pool_disable(dev, xdp->xsk.queue_id); 1780 default: 1781 return -EINVAL; 1782 } 1783 } 1784 1785 int gve_init_rss_config(struct gve_priv *priv, u16 num_queues) 1786 { 1787 struct gve_rss_config *rss_config = &priv->rss_config; 1788 struct ethtool_rxfh_param rxfh = {0}; 1789 u16 i; 1790 1791 if (!priv->cache_rss_config) 1792 return 0; 1793 1794 for (i = 0; i < priv->rss_lut_size; i++) 1795 rss_config->hash_lut[i] = 1796 ethtool_rxfh_indir_default(i, num_queues); 1797 1798 netdev_rss_key_fill(rss_config->hash_key, priv->rss_key_size); 1799 1800 rxfh.hfunc = ETH_RSS_HASH_TOP; 1801 1802 return gve_adminq_configure_rss(priv, &rxfh); 1803 } 1804 1805 int gve_flow_rules_reset(struct gve_priv *priv) 1806 { 1807 if (!priv->max_flow_rules) 1808 return 0; 1809 1810 return gve_adminq_reset_flow_rules(priv); 1811 } 1812 1813 int gve_adjust_config(struct gve_priv *priv, 1814 struct gve_tx_alloc_rings_cfg *tx_alloc_cfg, 1815 struct gve_rx_alloc_rings_cfg *rx_alloc_cfg) 1816 { 1817 int err; 1818 1819 /* Allocate resources for the new configuration */ 1820 err = gve_queues_mem_alloc(priv, tx_alloc_cfg, rx_alloc_cfg); 1821 if (err) { 1822 netif_err(priv, drv, priv->dev, 1823 "Adjust config failed to alloc new queues"); 1824 return err; 1825 } 1826 1827 /* Teardown the device and free existing resources */ 1828 err = gve_close(priv->dev); 1829 if (err) { 1830 netif_err(priv, drv, priv->dev, 1831 "Adjust config failed to close old queues"); 1832 gve_queues_mem_free(priv, tx_alloc_cfg, rx_alloc_cfg); 1833 return err; 1834 } 1835 1836 /* Bring the device back up again with the new resources. */ 1837 err = gve_queues_start(priv, tx_alloc_cfg, rx_alloc_cfg); 1838 if (err) { 1839 netif_err(priv, drv, priv->dev, 1840 "Adjust config failed to start new queues, !!! DISABLING ALL QUEUES !!!\n"); 1841 /* No need to free on error: ownership of resources is lost after 1842 * calling gve_queues_start. 1843 */ 1844 gve_turndown(priv); 1845 return err; 1846 } 1847 1848 return 0; 1849 } 1850 1851 int gve_adjust_queues(struct gve_priv *priv, 1852 struct gve_rx_queue_config new_rx_config, 1853 struct gve_tx_queue_config new_tx_config, 1854 bool reset_rss) 1855 { 1856 struct gve_tx_alloc_rings_cfg tx_alloc_cfg = {0}; 1857 struct gve_rx_alloc_rings_cfg rx_alloc_cfg = {0}; 1858 int err; 1859 1860 gve_get_curr_alloc_cfgs(priv, &tx_alloc_cfg, &rx_alloc_cfg); 1861 1862 /* Relay the new config from ethtool */ 1863 tx_alloc_cfg.qcfg = &new_tx_config; 1864 rx_alloc_cfg.qcfg_tx = &new_tx_config; 1865 rx_alloc_cfg.qcfg_rx = &new_rx_config; 1866 rx_alloc_cfg.reset_rss = reset_rss; 1867 1868 if (netif_running(priv->dev)) { 1869 err = gve_adjust_config(priv, &tx_alloc_cfg, &rx_alloc_cfg); 1870 return err; 1871 } 1872 /* Set the config for the next up. */ 1873 if (reset_rss) { 1874 err = gve_init_rss_config(priv, new_rx_config.num_queues); 1875 if (err) 1876 return err; 1877 } 1878 priv->tx_cfg = new_tx_config; 1879 priv->rx_cfg = new_rx_config; 1880 1881 return 0; 1882 } 1883 1884 static void gve_turndown(struct gve_priv *priv) 1885 { 1886 int idx; 1887 1888 if (netif_carrier_ok(priv->dev)) 1889 netif_carrier_off(priv->dev); 1890 1891 if (!gve_get_napi_enabled(priv)) 1892 return; 1893 1894 /* Disable napi to prevent more work from coming in */ 1895 for (idx = 0; idx < gve_num_tx_queues(priv); idx++) { 1896 int ntfy_idx = gve_tx_idx_to_ntfy(priv, idx); 1897 struct gve_notify_block *block = &priv->ntfy_blocks[ntfy_idx]; 1898 1899 if (!gve_tx_was_added_to_block(priv, idx)) 1900 continue; 1901 1902 if (idx < priv->tx_cfg.num_queues) 1903 netif_queue_set_napi(priv->dev, idx, 1904 NETDEV_QUEUE_TYPE_TX, NULL); 1905 1906 napi_disable_locked(&block->napi); 1907 } 1908 for (idx = 0; idx < priv->rx_cfg.num_queues; idx++) { 1909 int ntfy_idx = gve_rx_idx_to_ntfy(priv, idx); 1910 struct gve_notify_block *block = &priv->ntfy_blocks[ntfy_idx]; 1911 1912 if (!gve_rx_was_added_to_block(priv, idx)) 1913 continue; 1914 1915 netif_queue_set_napi(priv->dev, idx, NETDEV_QUEUE_TYPE_RX, 1916 NULL); 1917 napi_disable_locked(&block->napi); 1918 } 1919 1920 /* Stop tx queues */ 1921 netif_tx_disable(priv->dev); 1922 1923 xdp_features_clear_redirect_target_locked(priv->dev); 1924 1925 gve_clear_napi_enabled(priv); 1926 gve_clear_report_stats(priv); 1927 1928 /* Make sure that all traffic is finished processing. */ 1929 synchronize_net(); 1930 } 1931 1932 static void gve_turnup(struct gve_priv *priv) 1933 { 1934 int idx; 1935 1936 /* Start the tx queues */ 1937 netif_tx_start_all_queues(priv->dev); 1938 1939 /* Enable napi and unmask interrupts for all queues */ 1940 for (idx = 0; idx < gve_num_tx_queues(priv); idx++) { 1941 int ntfy_idx = gve_tx_idx_to_ntfy(priv, idx); 1942 struct gve_notify_block *block = &priv->ntfy_blocks[ntfy_idx]; 1943 1944 if (!gve_tx_was_added_to_block(priv, idx)) 1945 continue; 1946 1947 napi_enable_locked(&block->napi); 1948 1949 if (idx < priv->tx_cfg.num_queues) 1950 netif_queue_set_napi(priv->dev, idx, 1951 NETDEV_QUEUE_TYPE_TX, 1952 &block->napi); 1953 1954 if (gve_is_gqi(priv)) { 1955 iowrite32be(0, gve_irq_doorbell(priv, block)); 1956 } else { 1957 gve_set_itr_coalesce_usecs_dqo(priv, block, 1958 priv->tx_coalesce_usecs); 1959 } 1960 1961 /* Any descs written by the NIC before this barrier will be 1962 * handled by the one-off napi schedule below. Whereas any 1963 * descs after the barrier will generate interrupts. 1964 */ 1965 mb(); 1966 napi_schedule(&block->napi); 1967 } 1968 for (idx = 0; idx < priv->rx_cfg.num_queues; idx++) { 1969 int ntfy_idx = gve_rx_idx_to_ntfy(priv, idx); 1970 struct gve_notify_block *block = &priv->ntfy_blocks[ntfy_idx]; 1971 1972 if (!gve_rx_was_added_to_block(priv, idx)) 1973 continue; 1974 1975 napi_enable_locked(&block->napi); 1976 netif_queue_set_napi(priv->dev, idx, NETDEV_QUEUE_TYPE_RX, 1977 &block->napi); 1978 1979 if (gve_is_gqi(priv)) { 1980 iowrite32be(0, gve_irq_doorbell(priv, block)); 1981 } else { 1982 gve_set_itr_coalesce_usecs_dqo(priv, block, 1983 priv->rx_coalesce_usecs); 1984 } 1985 1986 /* Any descs written by the NIC before this barrier will be 1987 * handled by the one-off napi schedule below. Whereas any 1988 * descs after the barrier will generate interrupts. 1989 */ 1990 mb(); 1991 napi_schedule(&block->napi); 1992 } 1993 1994 if (priv->tx_cfg.num_xdp_queues && gve_supports_xdp_xmit(priv)) 1995 xdp_features_set_redirect_target_locked(priv->dev, false); 1996 1997 gve_set_napi_enabled(priv); 1998 } 1999 2000 static void gve_turnup_and_check_status(struct gve_priv *priv) 2001 { 2002 u32 status; 2003 2004 gve_turnup(priv); 2005 status = ioread32be(&priv->reg_bar0->device_status); 2006 gve_handle_link_status(priv, GVE_DEVICE_STATUS_LINK_STATUS_MASK & status); 2007 } 2008 2009 static struct gve_notify_block *gve_get_tx_notify_block(struct gve_priv *priv, 2010 unsigned int txqueue) 2011 { 2012 u32 ntfy_idx; 2013 2014 if (txqueue > priv->tx_cfg.num_queues) 2015 return NULL; 2016 2017 ntfy_idx = gve_tx_idx_to_ntfy(priv, txqueue); 2018 if (ntfy_idx >= priv->num_ntfy_blks) 2019 return NULL; 2020 2021 return &priv->ntfy_blocks[ntfy_idx]; 2022 } 2023 2024 static bool gve_tx_timeout_try_q_kick(struct gve_priv *priv, 2025 unsigned int txqueue) 2026 { 2027 struct gve_notify_block *block; 2028 u32 current_time; 2029 2030 block = gve_get_tx_notify_block(priv, txqueue); 2031 2032 if (!block) 2033 return false; 2034 2035 current_time = jiffies_to_msecs(jiffies); 2036 if (block->tx->last_kick_msec + MIN_TX_TIMEOUT_GAP > current_time) 2037 return false; 2038 2039 netdev_info(priv->dev, "Kicking queue %d", txqueue); 2040 napi_schedule(&block->napi); 2041 block->tx->last_kick_msec = current_time; 2042 return true; 2043 } 2044 2045 static void gve_tx_timeout(struct net_device *dev, unsigned int txqueue) 2046 { 2047 struct gve_notify_block *block; 2048 struct gve_priv *priv; 2049 2050 netdev_info(dev, "Timeout on tx queue, %d", txqueue); 2051 priv = netdev_priv(dev); 2052 2053 if (!gve_tx_timeout_try_q_kick(priv, txqueue)) 2054 gve_schedule_reset(priv); 2055 2056 block = gve_get_tx_notify_block(priv, txqueue); 2057 if (block) 2058 block->tx->queue_timeout++; 2059 priv->tx_timeo_cnt++; 2060 } 2061 2062 /* Header split is only supported on DQ RDA queue format. If XDP is enabled, 2063 * header split is not allowed. 2064 */ 2065 bool gve_header_split_supported(const struct gve_priv *priv) 2066 { 2067 return priv->header_buf_size && 2068 priv->queue_format == GVE_DQO_RDA_FORMAT && !priv->xdp_prog; 2069 } 2070 2071 int gve_set_rx_buf_len_config(struct gve_priv *priv, u32 rx_buf_len, 2072 struct netlink_ext_ack *extack, 2073 struct gve_rx_alloc_rings_cfg *rx_alloc_cfg) 2074 { 2075 u32 old_rx_buf_len = rx_alloc_cfg->packet_buffer_size; 2076 2077 if (rx_buf_len == old_rx_buf_len) 2078 return 0; 2079 2080 /* device options may not always contain support for 4K buffers */ 2081 if (!gve_is_dqo(priv) || priv->max_rx_buffer_size < SZ_4K) { 2082 NL_SET_ERR_MSG_MOD(extack, 2083 "Modifying Rx buf len is not supported"); 2084 return -EOPNOTSUPP; 2085 } 2086 2087 if (priv->xdp_prog && rx_buf_len != SZ_2K) { 2088 NL_SET_ERR_MSG_MOD(extack, 2089 "Rx buf len can only be 2048 when XDP is on"); 2090 return -EINVAL; 2091 } 2092 2093 if (rx_buf_len != SZ_2K && rx_buf_len != SZ_4K) { 2094 NL_SET_ERR_MSG_MOD(extack, 2095 "Rx buf len can only be 2048 or 4096"); 2096 return -EINVAL; 2097 } 2098 rx_alloc_cfg->packet_buffer_size = rx_buf_len; 2099 2100 return 0; 2101 } 2102 2103 int gve_set_hsplit_config(struct gve_priv *priv, u8 tcp_data_split, 2104 struct gve_rx_alloc_rings_cfg *rx_alloc_cfg) 2105 { 2106 bool enable_hdr_split; 2107 2108 if (tcp_data_split == ETHTOOL_TCP_DATA_SPLIT_UNKNOWN) 2109 return 0; 2110 2111 if (!gve_header_split_supported(priv)) { 2112 dev_err(&priv->pdev->dev, "Header-split not supported\n"); 2113 return -EOPNOTSUPP; 2114 } 2115 2116 if (tcp_data_split == ETHTOOL_TCP_DATA_SPLIT_ENABLED) 2117 enable_hdr_split = true; 2118 else 2119 enable_hdr_split = false; 2120 2121 if (enable_hdr_split == priv->header_split_enabled) 2122 return 0; 2123 2124 rx_alloc_cfg->enable_header_split = enable_hdr_split; 2125 2126 return 0; 2127 } 2128 2129 static int gve_set_features(struct net_device *netdev, 2130 netdev_features_t features) 2131 { 2132 const netdev_features_t orig_features = netdev->features; 2133 struct gve_tx_alloc_rings_cfg tx_alloc_cfg = {0}; 2134 struct gve_rx_alloc_rings_cfg rx_alloc_cfg = {0}; 2135 struct gve_priv *priv = netdev_priv(netdev); 2136 int err; 2137 2138 gve_get_curr_alloc_cfgs(priv, &tx_alloc_cfg, &rx_alloc_cfg); 2139 2140 if ((netdev->features & NETIF_F_LRO) != (features & NETIF_F_LRO)) { 2141 netdev->features ^= NETIF_F_LRO; 2142 if (priv->xdp_prog && (netdev->features & NETIF_F_LRO)) { 2143 netdev_warn(netdev, 2144 "XDP is not supported when LRO is on.\n"); 2145 err = -EOPNOTSUPP; 2146 goto revert_features; 2147 } 2148 if (netif_running(netdev)) { 2149 err = gve_adjust_config(priv, &tx_alloc_cfg, &rx_alloc_cfg); 2150 if (err) 2151 goto revert_features; 2152 } 2153 } 2154 if ((netdev->features & NETIF_F_NTUPLE) && !(features & NETIF_F_NTUPLE)) { 2155 err = gve_flow_rules_reset(priv); 2156 if (err) 2157 goto revert_features; 2158 } 2159 2160 return 0; 2161 2162 revert_features: 2163 netdev->features = orig_features; 2164 return err; 2165 } 2166 2167 static int gve_get_ts_config(struct net_device *dev, 2168 struct kernel_hwtstamp_config *kernel_config) 2169 { 2170 struct gve_priv *priv = netdev_priv(dev); 2171 2172 *kernel_config = priv->ts_config; 2173 return 0; 2174 } 2175 2176 static int gve_set_ts_config(struct net_device *dev, 2177 struct kernel_hwtstamp_config *kernel_config, 2178 struct netlink_ext_ack *extack) 2179 { 2180 struct gve_priv *priv = netdev_priv(dev); 2181 2182 if (kernel_config->tx_type != HWTSTAMP_TX_OFF) { 2183 NL_SET_ERR_MSG_MOD(extack, "TX timestamping is not supported"); 2184 return -ERANGE; 2185 } 2186 2187 if (kernel_config->rx_filter != HWTSTAMP_FILTER_NONE) { 2188 if (!gve_is_clock_enabled(priv)) { 2189 NL_SET_ERR_MSG_MOD(extack, 2190 "RX timestamping is not supported"); 2191 kernel_config->rx_filter = HWTSTAMP_FILTER_NONE; 2192 return -EOPNOTSUPP; 2193 } 2194 2195 kernel_config->rx_filter = HWTSTAMP_FILTER_ALL; 2196 } 2197 2198 priv->ts_config.rx_filter = kernel_config->rx_filter; 2199 2200 return 0; 2201 } 2202 2203 static const struct net_device_ops gve_netdev_ops = { 2204 .ndo_start_xmit = gve_start_xmit, 2205 .ndo_features_check = gve_features_check, 2206 .ndo_open = gve_open, 2207 .ndo_stop = gve_close, 2208 .ndo_get_stats64 = gve_get_stats, 2209 .ndo_tx_timeout = gve_tx_timeout, 2210 .ndo_set_features = gve_set_features, 2211 .ndo_bpf = gve_xdp, 2212 .ndo_xdp_xmit = gve_xdp_xmit, 2213 .ndo_xsk_wakeup = gve_xsk_wakeup, 2214 .ndo_hwtstamp_get = gve_get_ts_config, 2215 .ndo_hwtstamp_set = gve_set_ts_config, 2216 }; 2217 2218 static void gve_handle_status(struct gve_priv *priv, u32 status) 2219 { 2220 if (GVE_DEVICE_STATUS_RESET_MASK & status) { 2221 dev_info(&priv->pdev->dev, "Device requested reset.\n"); 2222 gve_set_do_reset(priv); 2223 } 2224 if (GVE_DEVICE_STATUS_REPORT_STATS_MASK & status) { 2225 priv->stats_report_trigger_cnt++; 2226 gve_set_do_report_stats(priv); 2227 } 2228 } 2229 2230 static void gve_handle_reset(struct gve_priv *priv) 2231 { 2232 /* A service task will be scheduled at the end of probe to catch any 2233 * resets that need to happen, and we don't want to reset until 2234 * probe is done. 2235 */ 2236 if (gve_get_probe_in_progress(priv)) 2237 return; 2238 2239 if (gve_get_do_reset(priv)) { 2240 rtnl_lock(); 2241 netdev_lock(priv->dev); 2242 gve_reset(priv, false); 2243 netdev_unlock(priv->dev); 2244 rtnl_unlock(); 2245 } 2246 } 2247 2248 void gve_handle_report_stats(struct gve_priv *priv) 2249 { 2250 struct stats *stats = priv->stats_report->stats; 2251 int idx, stats_idx = 0; 2252 unsigned int start = 0; 2253 u64 tx_bytes; 2254 2255 if (!gve_get_report_stats(priv)) 2256 return; 2257 2258 be64_add_cpu(&priv->stats_report->written_count, 1); 2259 /* tx stats */ 2260 if (priv->tx) { 2261 for (idx = 0; idx < gve_num_tx_queues(priv); idx++) { 2262 u32 last_completion = 0; 2263 u32 tx_frames = 0; 2264 2265 /* DQO doesn't currently support these metrics. */ 2266 if (gve_is_gqi(priv)) { 2267 last_completion = priv->tx[idx].done; 2268 tx_frames = priv->tx[idx].req; 2269 } 2270 2271 do { 2272 start = u64_stats_fetch_begin(&priv->tx[idx].statss); 2273 tx_bytes = priv->tx[idx].bytes_done; 2274 } while (u64_stats_fetch_retry(&priv->tx[idx].statss, start)); 2275 stats[stats_idx++] = (struct stats) { 2276 .stat_name = cpu_to_be32(TX_WAKE_CNT), 2277 .value = cpu_to_be64(priv->tx[idx].wake_queue), 2278 .queue_id = cpu_to_be32(idx), 2279 }; 2280 stats[stats_idx++] = (struct stats) { 2281 .stat_name = cpu_to_be32(TX_STOP_CNT), 2282 .value = cpu_to_be64(priv->tx[idx].stop_queue), 2283 .queue_id = cpu_to_be32(idx), 2284 }; 2285 stats[stats_idx++] = (struct stats) { 2286 .stat_name = cpu_to_be32(TX_FRAMES_SENT), 2287 .value = cpu_to_be64(tx_frames), 2288 .queue_id = cpu_to_be32(idx), 2289 }; 2290 stats[stats_idx++] = (struct stats) { 2291 .stat_name = cpu_to_be32(TX_BYTES_SENT), 2292 .value = cpu_to_be64(tx_bytes), 2293 .queue_id = cpu_to_be32(idx), 2294 }; 2295 stats[stats_idx++] = (struct stats) { 2296 .stat_name = cpu_to_be32(TX_LAST_COMPLETION_PROCESSED), 2297 .value = cpu_to_be64(last_completion), 2298 .queue_id = cpu_to_be32(idx), 2299 }; 2300 stats[stats_idx++] = (struct stats) { 2301 .stat_name = cpu_to_be32(TX_TIMEOUT_CNT), 2302 .value = cpu_to_be64(priv->tx[idx].queue_timeout), 2303 .queue_id = cpu_to_be32(idx), 2304 }; 2305 } 2306 } 2307 /* rx stats */ 2308 if (priv->rx) { 2309 for (idx = 0; idx < priv->rx_cfg.num_queues; idx++) { 2310 stats[stats_idx++] = (struct stats) { 2311 .stat_name = cpu_to_be32(RX_NEXT_EXPECTED_SEQUENCE), 2312 .value = cpu_to_be64(priv->rx[idx].desc.seqno), 2313 .queue_id = cpu_to_be32(idx), 2314 }; 2315 stats[stats_idx++] = (struct stats) { 2316 .stat_name = cpu_to_be32(RX_BUFFERS_POSTED), 2317 .value = cpu_to_be64(priv->rx[idx].fill_cnt), 2318 .queue_id = cpu_to_be32(idx), 2319 }; 2320 } 2321 } 2322 } 2323 2324 /* Handle NIC status register changes, reset requests and report stats */ 2325 static void gve_service_task(struct work_struct *work) 2326 { 2327 struct gve_priv *priv = container_of(work, struct gve_priv, 2328 service_task); 2329 u32 status = ioread32be(&priv->reg_bar0->device_status); 2330 2331 gve_handle_status(priv, status); 2332 2333 gve_handle_reset(priv); 2334 gve_handle_link_status(priv, GVE_DEVICE_STATUS_LINK_STATUS_MASK & status); 2335 } 2336 2337 static void gve_set_netdev_xdp_features(struct gve_priv *priv) 2338 { 2339 xdp_features_t xdp_features; 2340 2341 if (priv->queue_format == GVE_GQI_QPL_FORMAT) { 2342 xdp_features = NETDEV_XDP_ACT_BASIC; 2343 xdp_features |= NETDEV_XDP_ACT_REDIRECT; 2344 xdp_features |= NETDEV_XDP_ACT_XSK_ZEROCOPY; 2345 } else if (priv->queue_format == GVE_DQO_RDA_FORMAT) { 2346 xdp_features = NETDEV_XDP_ACT_BASIC; 2347 xdp_features |= NETDEV_XDP_ACT_REDIRECT; 2348 xdp_features |= NETDEV_XDP_ACT_XSK_ZEROCOPY; 2349 } else { 2350 xdp_features = 0; 2351 } 2352 2353 xdp_set_features_flag_locked(priv->dev, xdp_features); 2354 } 2355 2356 static const struct xdp_metadata_ops gve_xdp_metadata_ops = { 2357 .xmo_rx_timestamp = gve_xdp_rx_timestamp, 2358 }; 2359 2360 static int gve_init_priv(struct gve_priv *priv, bool skip_describe_device) 2361 { 2362 int num_ntfy; 2363 int err; 2364 2365 /* Set up the adminq */ 2366 err = gve_adminq_alloc(&priv->pdev->dev, priv); 2367 if (err) { 2368 dev_err(&priv->pdev->dev, 2369 "Failed to alloc admin queue: err=%d\n", err); 2370 return err; 2371 } 2372 2373 err = gve_verify_driver_compatibility(priv); 2374 if (err) { 2375 dev_err(&priv->pdev->dev, 2376 "Could not verify driver compatibility: err=%d\n", err); 2377 goto err; 2378 } 2379 2380 priv->num_registered_pages = 0; 2381 2382 if (skip_describe_device) 2383 goto setup_device; 2384 2385 priv->queue_format = GVE_QUEUE_FORMAT_UNSPECIFIED; 2386 /* Get the initial information we need from the device */ 2387 err = gve_adminq_describe_device(priv); 2388 if (err) { 2389 dev_err(&priv->pdev->dev, 2390 "Could not get device information: err=%d\n", err); 2391 goto err; 2392 } 2393 priv->dev->mtu = priv->dev->max_mtu; 2394 num_ntfy = pci_msix_vec_count(priv->pdev); 2395 if (num_ntfy <= 0) { 2396 dev_err(&priv->pdev->dev, 2397 "could not count MSI-x vectors: err=%d\n", num_ntfy); 2398 err = num_ntfy; 2399 goto err; 2400 } else if (num_ntfy < GVE_MIN_MSIX) { 2401 dev_err(&priv->pdev->dev, "gve needs at least %d MSI-x vectors, but only has %d\n", 2402 GVE_MIN_MSIX, num_ntfy); 2403 err = -EINVAL; 2404 goto err; 2405 } 2406 2407 /* Big TCP is only supported on DQO */ 2408 if (!gve_is_gqi(priv)) 2409 netif_set_tso_max_size(priv->dev, GVE_DQO_TX_MAX); 2410 2411 priv->rx_copybreak = GVE_DEFAULT_RX_COPYBREAK; 2412 /* gvnic has one Notification Block per MSI-x vector, except for the 2413 * management vector 2414 */ 2415 priv->num_ntfy_blks = (num_ntfy - 1) & ~0x1; 2416 priv->mgmt_msix_idx = priv->num_ntfy_blks; 2417 priv->numa_node = dev_to_node(&priv->pdev->dev); 2418 2419 priv->tx_cfg.max_queues = 2420 min_t(int, priv->tx_cfg.max_queues, priv->num_ntfy_blks / 2); 2421 priv->rx_cfg.max_queues = 2422 min_t(int, priv->rx_cfg.max_queues, priv->num_ntfy_blks / 2); 2423 2424 priv->tx_cfg.num_queues = priv->tx_cfg.max_queues; 2425 priv->rx_cfg.num_queues = priv->rx_cfg.max_queues; 2426 if (priv->default_num_queues > 0) { 2427 priv->tx_cfg.num_queues = min_t(int, priv->default_num_queues, 2428 priv->tx_cfg.num_queues); 2429 priv->rx_cfg.num_queues = min_t(int, priv->default_num_queues, 2430 priv->rx_cfg.num_queues); 2431 } 2432 priv->tx_cfg.num_xdp_queues = 0; 2433 2434 dev_info(&priv->pdev->dev, "TX queues %d, RX queues %d\n", 2435 priv->tx_cfg.num_queues, priv->rx_cfg.num_queues); 2436 dev_info(&priv->pdev->dev, "Max TX queues %d, Max RX queues %d\n", 2437 priv->tx_cfg.max_queues, priv->rx_cfg.max_queues); 2438 2439 if (!gve_is_gqi(priv)) { 2440 priv->tx_coalesce_usecs = GVE_TX_IRQ_RATELIMIT_US_DQO; 2441 priv->rx_coalesce_usecs = GVE_RX_IRQ_RATELIMIT_US_DQO; 2442 } 2443 2444 priv->ts_config.tx_type = HWTSTAMP_TX_OFF; 2445 priv->ts_config.rx_filter = HWTSTAMP_FILTER_NONE; 2446 2447 setup_device: 2448 priv->xsk_pools = bitmap_zalloc(priv->rx_cfg.max_queues, GFP_KERNEL); 2449 if (!priv->xsk_pools) { 2450 err = -ENOMEM; 2451 goto err; 2452 } 2453 2454 gve_set_netdev_xdp_features(priv); 2455 if (!gve_is_gqi(priv)) 2456 priv->dev->xdp_metadata_ops = &gve_xdp_metadata_ops; 2457 2458 err = gve_setup_device_resources(priv); 2459 if (err) 2460 goto err_free_xsk_bitmap; 2461 2462 return 0; 2463 2464 err_free_xsk_bitmap: 2465 bitmap_free(priv->xsk_pools); 2466 priv->xsk_pools = NULL; 2467 err: 2468 gve_adminq_free(&priv->pdev->dev, priv); 2469 return err; 2470 } 2471 2472 static void gve_teardown_priv_resources(struct gve_priv *priv) 2473 { 2474 gve_teardown_device_resources(priv); 2475 gve_adminq_free(&priv->pdev->dev, priv); 2476 bitmap_free(priv->xsk_pools); 2477 priv->xsk_pools = NULL; 2478 } 2479 2480 static void gve_trigger_reset(struct gve_priv *priv) 2481 { 2482 /* Reset the device by releasing the AQ */ 2483 gve_adminq_release(priv); 2484 } 2485 2486 static void gve_reset_and_teardown(struct gve_priv *priv, bool was_up) 2487 { 2488 gve_trigger_reset(priv); 2489 /* With the reset having already happened, close cannot fail */ 2490 if (was_up) 2491 gve_close(priv->dev); 2492 gve_teardown_priv_resources(priv); 2493 } 2494 2495 static int gve_reset_recovery(struct gve_priv *priv, bool was_up) 2496 { 2497 int err; 2498 2499 err = gve_init_priv(priv, true); 2500 if (err) 2501 goto err; 2502 if (was_up) { 2503 err = gve_open(priv->dev); 2504 if (err) 2505 goto err; 2506 } 2507 return 0; 2508 err: 2509 dev_err(&priv->pdev->dev, "Reset failed! !!! DISABLING ALL QUEUES !!!\n"); 2510 gve_turndown(priv); 2511 return err; 2512 } 2513 2514 int gve_reset(struct gve_priv *priv, bool attempt_teardown) 2515 { 2516 bool was_up = netif_running(priv->dev); 2517 int err; 2518 2519 dev_info(&priv->pdev->dev, "Performing reset\n"); 2520 gve_clear_do_reset(priv); 2521 gve_set_reset_in_progress(priv); 2522 /* If we aren't attempting to teardown normally, just go turndown and 2523 * reset right away. 2524 */ 2525 if (!attempt_teardown) { 2526 gve_turndown(priv); 2527 gve_reset_and_teardown(priv, was_up); 2528 } else { 2529 /* Otherwise attempt to close normally */ 2530 if (was_up) { 2531 err = gve_close(priv->dev); 2532 /* If that fails reset as we did above */ 2533 if (err) 2534 gve_reset_and_teardown(priv, was_up); 2535 } 2536 /* Clean up any remaining resources */ 2537 gve_teardown_priv_resources(priv); 2538 } 2539 2540 /* Set it all back up */ 2541 err = gve_reset_recovery(priv, was_up); 2542 gve_clear_reset_in_progress(priv); 2543 priv->reset_cnt++; 2544 priv->interface_up_cnt = 0; 2545 priv->interface_down_cnt = 0; 2546 priv->stats_report_trigger_cnt = 0; 2547 return err; 2548 } 2549 2550 static void gve_write_version(u8 __iomem *driver_version_register) 2551 { 2552 const char *c = gve_version_prefix; 2553 2554 while (*c) { 2555 writeb(*c, driver_version_register); 2556 c++; 2557 } 2558 2559 c = gve_version_str; 2560 while (*c) { 2561 writeb(*c, driver_version_register); 2562 c++; 2563 } 2564 writeb('\n', driver_version_register); 2565 } 2566 2567 static int gve_rx_queue_stop(struct net_device *dev, void *per_q_mem, int idx) 2568 { 2569 struct gve_priv *priv = netdev_priv(dev); 2570 struct gve_rx_ring *gve_per_q_mem; 2571 int err; 2572 2573 if (!priv->rx) 2574 return -EAGAIN; 2575 2576 /* Destroying queue 0 while other queues exist is not supported in DQO */ 2577 if (!gve_is_gqi(priv) && idx == 0) 2578 return -ERANGE; 2579 2580 /* Single-queue destruction requires quiescence on all queues */ 2581 gve_turndown(priv); 2582 2583 /* This failure will trigger a reset - no need to clean up */ 2584 err = gve_adminq_destroy_single_rx_queue(priv, idx); 2585 if (err) 2586 return err; 2587 2588 if (gve_is_qpl(priv)) { 2589 /* This failure will trigger a reset - no need to clean up */ 2590 err = gve_unregister_qpl(priv, gve_rx_get_qpl(priv, idx)); 2591 if (err) 2592 return err; 2593 } 2594 2595 gve_rx_stop_ring(priv, idx); 2596 2597 /* Turn the unstopped queues back up */ 2598 gve_turnup_and_check_status(priv); 2599 2600 gve_per_q_mem = (struct gve_rx_ring *)per_q_mem; 2601 *gve_per_q_mem = priv->rx[idx]; 2602 memset(&priv->rx[idx], 0, sizeof(priv->rx[idx])); 2603 return 0; 2604 } 2605 2606 static void gve_rx_queue_mem_free(struct net_device *dev, void *per_q_mem) 2607 { 2608 struct gve_priv *priv = netdev_priv(dev); 2609 struct gve_rx_alloc_rings_cfg cfg = {0}; 2610 struct gve_rx_ring *gve_per_q_mem; 2611 2612 gve_per_q_mem = (struct gve_rx_ring *)per_q_mem; 2613 gve_rx_get_curr_alloc_cfg(priv, &cfg); 2614 2615 if (gve_is_gqi(priv)) 2616 gve_rx_free_ring_gqi(priv, gve_per_q_mem, &cfg); 2617 else 2618 gve_rx_free_ring_dqo(priv, gve_per_q_mem, &cfg); 2619 } 2620 2621 static int gve_rx_queue_mem_alloc(struct net_device *dev, void *per_q_mem, 2622 int idx) 2623 { 2624 struct gve_priv *priv = netdev_priv(dev); 2625 struct gve_rx_alloc_rings_cfg cfg = {0}; 2626 struct gve_rx_ring *gve_per_q_mem; 2627 int err; 2628 2629 if (!priv->rx) 2630 return -EAGAIN; 2631 2632 gve_per_q_mem = (struct gve_rx_ring *)per_q_mem; 2633 gve_rx_get_curr_alloc_cfg(priv, &cfg); 2634 2635 if (gve_is_gqi(priv)) 2636 err = gve_rx_alloc_ring_gqi(priv, &cfg, gve_per_q_mem, idx); 2637 else 2638 err = gve_rx_alloc_ring_dqo(priv, &cfg, gve_per_q_mem, idx); 2639 2640 return err; 2641 } 2642 2643 static int gve_rx_queue_start(struct net_device *dev, void *per_q_mem, int idx) 2644 { 2645 struct gve_priv *priv = netdev_priv(dev); 2646 struct gve_rx_ring *gve_per_q_mem; 2647 int err; 2648 2649 if (!priv->rx) 2650 return -EAGAIN; 2651 2652 gve_per_q_mem = (struct gve_rx_ring *)per_q_mem; 2653 priv->rx[idx] = *gve_per_q_mem; 2654 2655 /* Single-queue creation requires quiescence on all queues */ 2656 gve_turndown(priv); 2657 2658 gve_rx_start_ring(priv, idx); 2659 2660 if (gve_is_qpl(priv)) { 2661 /* This failure will trigger a reset - no need to clean up */ 2662 err = gve_register_qpl(priv, gve_rx_get_qpl(priv, idx)); 2663 if (err) 2664 goto abort; 2665 } 2666 2667 /* This failure will trigger a reset - no need to clean up */ 2668 err = gve_adminq_create_single_rx_queue(priv, idx); 2669 if (err) 2670 goto abort; 2671 2672 if (gve_is_gqi(priv)) 2673 gve_rx_write_doorbell(priv, &priv->rx[idx]); 2674 else 2675 gve_rx_post_buffers_dqo(&priv->rx[idx]); 2676 2677 /* Turn the unstopped queues back up */ 2678 gve_turnup_and_check_status(priv); 2679 return 0; 2680 2681 abort: 2682 gve_rx_stop_ring(priv, idx); 2683 2684 /* All failures in this func result in a reset, by clearing the struct 2685 * at idx, we prevent a double free when that reset runs. The reset, 2686 * which needs the rtnl lock, will not run till this func returns and 2687 * its caller gives up the lock. 2688 */ 2689 memset(&priv->rx[idx], 0, sizeof(priv->rx[idx])); 2690 return err; 2691 } 2692 2693 static const struct netdev_queue_mgmt_ops gve_queue_mgmt_ops = { 2694 .ndo_queue_mem_size = sizeof(struct gve_rx_ring), 2695 .ndo_queue_mem_alloc = gve_rx_queue_mem_alloc, 2696 .ndo_queue_mem_free = gve_rx_queue_mem_free, 2697 .ndo_queue_start = gve_rx_queue_start, 2698 .ndo_queue_stop = gve_rx_queue_stop, 2699 }; 2700 2701 static void gve_get_rx_queue_stats(struct net_device *dev, int idx, 2702 struct netdev_queue_stats_rx *rx_stats) 2703 { 2704 struct gve_priv *priv = netdev_priv(dev); 2705 struct gve_rx_ring *rx = &priv->rx[idx]; 2706 unsigned int start; 2707 2708 do { 2709 start = u64_stats_fetch_begin(&rx->statss); 2710 rx_stats->packets = rx->rpackets; 2711 rx_stats->bytes = rx->rbytes; 2712 rx_stats->alloc_fail = rx->rx_skb_alloc_fail + 2713 rx->rx_buf_alloc_fail; 2714 } while (u64_stats_fetch_retry(&rx->statss, start)); 2715 } 2716 2717 static void gve_get_tx_queue_stats(struct net_device *dev, int idx, 2718 struct netdev_queue_stats_tx *tx_stats) 2719 { 2720 struct gve_priv *priv = netdev_priv(dev); 2721 struct gve_tx_ring *tx = &priv->tx[idx]; 2722 unsigned int start; 2723 2724 do { 2725 start = u64_stats_fetch_begin(&tx->statss); 2726 tx_stats->packets = tx->pkt_done; 2727 tx_stats->bytes = tx->bytes_done; 2728 } while (u64_stats_fetch_retry(&tx->statss, start)); 2729 } 2730 2731 static void gve_get_base_stats(struct net_device *dev, 2732 struct netdev_queue_stats_rx *rx, 2733 struct netdev_queue_stats_tx *tx) 2734 { 2735 rx->packets = 0; 2736 rx->bytes = 0; 2737 rx->alloc_fail = 0; 2738 2739 tx->packets = 0; 2740 tx->bytes = 0; 2741 } 2742 2743 static const struct netdev_stat_ops gve_stat_ops = { 2744 .get_queue_stats_rx = gve_get_rx_queue_stats, 2745 .get_queue_stats_tx = gve_get_tx_queue_stats, 2746 .get_base_stats = gve_get_base_stats, 2747 }; 2748 2749 static int gve_probe(struct pci_dev *pdev, const struct pci_device_id *ent) 2750 { 2751 int max_tx_queues, max_rx_queues; 2752 struct net_device *dev; 2753 __be32 __iomem *db_bar; 2754 struct gve_registers __iomem *reg_bar; 2755 struct gve_priv *priv; 2756 int err; 2757 2758 err = pci_enable_device(pdev); 2759 if (err) 2760 return err; 2761 2762 err = pci_request_regions(pdev, gve_driver_name); 2763 if (err) 2764 goto abort_with_enabled; 2765 2766 pci_set_master(pdev); 2767 2768 err = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(64)); 2769 if (err) { 2770 dev_err(&pdev->dev, "Failed to set dma mask: err=%d\n", err); 2771 goto abort_with_pci_region; 2772 } 2773 2774 reg_bar = pci_iomap(pdev, GVE_REGISTER_BAR, 0); 2775 if (!reg_bar) { 2776 dev_err(&pdev->dev, "Failed to map pci bar!\n"); 2777 err = -ENOMEM; 2778 goto abort_with_pci_region; 2779 } 2780 2781 db_bar = pci_iomap(pdev, GVE_DOORBELL_BAR, 0); 2782 if (!db_bar) { 2783 dev_err(&pdev->dev, "Failed to map doorbell bar!\n"); 2784 err = -ENOMEM; 2785 goto abort_with_reg_bar; 2786 } 2787 2788 gve_write_version(®_bar->driver_version); 2789 /* Get max queues to alloc etherdev */ 2790 max_tx_queues = ioread32be(®_bar->max_tx_queues); 2791 max_rx_queues = ioread32be(®_bar->max_rx_queues); 2792 /* Alloc and setup the netdev and priv */ 2793 dev = alloc_etherdev_mqs(sizeof(*priv), max_tx_queues, max_rx_queues); 2794 if (!dev) { 2795 dev_err(&pdev->dev, "could not allocate netdev\n"); 2796 err = -ENOMEM; 2797 goto abort_with_db_bar; 2798 } 2799 SET_NETDEV_DEV(dev, &pdev->dev); 2800 pci_set_drvdata(pdev, dev); 2801 dev->ethtool_ops = &gve_ethtool_ops; 2802 dev->netdev_ops = &gve_netdev_ops; 2803 dev->queue_mgmt_ops = &gve_queue_mgmt_ops; 2804 dev->stat_ops = &gve_stat_ops; 2805 2806 /* Set default and supported features. 2807 * 2808 * Features might be set in other locations as well (such as 2809 * `gve_adminq_describe_device`). 2810 */ 2811 dev->hw_features = NETIF_F_HIGHDMA; 2812 dev->hw_features |= NETIF_F_SG; 2813 dev->hw_features |= NETIF_F_HW_CSUM; 2814 dev->hw_features |= NETIF_F_TSO; 2815 dev->hw_features |= NETIF_F_TSO6; 2816 dev->hw_features |= NETIF_F_TSO_ECN; 2817 dev->hw_features |= NETIF_F_RXCSUM; 2818 dev->hw_features |= NETIF_F_RXHASH; 2819 dev->features = dev->hw_features; 2820 dev->watchdog_timeo = 5 * HZ; 2821 dev->min_mtu = ETH_MIN_MTU; 2822 netif_carrier_off(dev); 2823 2824 priv = netdev_priv(dev); 2825 priv->dev = dev; 2826 priv->pdev = pdev; 2827 priv->msg_enable = DEFAULT_MSG_LEVEL; 2828 priv->reg_bar0 = reg_bar; 2829 priv->db_bar2 = db_bar; 2830 priv->service_task_flags = 0x0; 2831 priv->state_flags = 0x0; 2832 priv->ethtool_flags = 0x0; 2833 priv->rx_cfg.packet_buffer_size = GVE_DEFAULT_RX_BUFFER_SIZE; 2834 priv->max_rx_buffer_size = GVE_DEFAULT_RX_BUFFER_SIZE; 2835 2836 gve_set_probe_in_progress(priv); 2837 priv->gve_wq = alloc_ordered_workqueue("gve", 0); 2838 if (!priv->gve_wq) { 2839 dev_err(&pdev->dev, "Could not allocate workqueue"); 2840 err = -ENOMEM; 2841 goto abort_with_netdev; 2842 } 2843 INIT_WORK(&priv->service_task, gve_service_task); 2844 INIT_WORK(&priv->stats_report_task, gve_stats_report_task); 2845 priv->tx_cfg.max_queues = max_tx_queues; 2846 priv->rx_cfg.max_queues = max_rx_queues; 2847 2848 err = gve_init_priv(priv, false); 2849 if (err) 2850 goto abort_with_wq; 2851 2852 if (!gve_is_gqi(priv) && !gve_is_qpl(priv)) 2853 dev->netmem_tx = true; 2854 2855 err = register_netdev(dev); 2856 if (err) 2857 goto abort_with_gve_init; 2858 2859 dev_info(&pdev->dev, "GVE version %s\n", gve_version_str); 2860 dev_info(&pdev->dev, "GVE queue format %d\n", (int)priv->queue_format); 2861 gve_clear_probe_in_progress(priv); 2862 queue_work(priv->gve_wq, &priv->service_task); 2863 return 0; 2864 2865 abort_with_gve_init: 2866 gve_teardown_priv_resources(priv); 2867 2868 abort_with_wq: 2869 destroy_workqueue(priv->gve_wq); 2870 2871 abort_with_netdev: 2872 free_netdev(dev); 2873 2874 abort_with_db_bar: 2875 pci_iounmap(pdev, db_bar); 2876 2877 abort_with_reg_bar: 2878 pci_iounmap(pdev, reg_bar); 2879 2880 abort_with_pci_region: 2881 pci_release_regions(pdev); 2882 2883 abort_with_enabled: 2884 pci_disable_device(pdev); 2885 return err; 2886 } 2887 2888 static void gve_remove(struct pci_dev *pdev) 2889 { 2890 struct net_device *netdev = pci_get_drvdata(pdev); 2891 struct gve_priv *priv = netdev_priv(netdev); 2892 __be32 __iomem *db_bar = priv->db_bar2; 2893 void __iomem *reg_bar = priv->reg_bar0; 2894 2895 unregister_netdev(netdev); 2896 gve_teardown_priv_resources(priv); 2897 destroy_workqueue(priv->gve_wq); 2898 free_netdev(netdev); 2899 pci_iounmap(pdev, db_bar); 2900 pci_iounmap(pdev, reg_bar); 2901 pci_release_regions(pdev); 2902 pci_disable_device(pdev); 2903 } 2904 2905 static void gve_shutdown(struct pci_dev *pdev) 2906 { 2907 struct net_device *netdev = pci_get_drvdata(pdev); 2908 struct gve_priv *priv = netdev_priv(netdev); 2909 bool was_up = netif_running(priv->dev); 2910 2911 netif_device_detach(netdev); 2912 2913 rtnl_lock(); 2914 netdev_lock(netdev); 2915 if (was_up && gve_close(priv->dev)) { 2916 /* If the dev was up, attempt to close, if close fails, reset */ 2917 gve_reset_and_teardown(priv, was_up); 2918 } else { 2919 /* If the dev wasn't up or close worked, finish tearing down */ 2920 gve_teardown_priv_resources(priv); 2921 } 2922 netdev_unlock(netdev); 2923 rtnl_unlock(); 2924 } 2925 2926 #ifdef CONFIG_PM 2927 static int gve_suspend(struct pci_dev *pdev, pm_message_t state) 2928 { 2929 struct net_device *netdev = pci_get_drvdata(pdev); 2930 struct gve_priv *priv = netdev_priv(netdev); 2931 bool was_up = netif_running(priv->dev); 2932 2933 priv->suspend_cnt++; 2934 rtnl_lock(); 2935 netdev_lock(netdev); 2936 if (was_up && gve_close(priv->dev)) { 2937 /* If the dev was up, attempt to close, if close fails, reset */ 2938 gve_reset_and_teardown(priv, was_up); 2939 } else { 2940 /* If the dev wasn't up or close worked, finish tearing down */ 2941 gve_teardown_priv_resources(priv); 2942 } 2943 priv->up_before_suspend = was_up; 2944 netdev_unlock(netdev); 2945 rtnl_unlock(); 2946 return 0; 2947 } 2948 2949 static int gve_resume(struct pci_dev *pdev) 2950 { 2951 struct net_device *netdev = pci_get_drvdata(pdev); 2952 struct gve_priv *priv = netdev_priv(netdev); 2953 int err; 2954 2955 priv->resume_cnt++; 2956 rtnl_lock(); 2957 netdev_lock(netdev); 2958 err = gve_reset_recovery(priv, priv->up_before_suspend); 2959 netdev_unlock(netdev); 2960 rtnl_unlock(); 2961 return err; 2962 } 2963 #endif /* CONFIG_PM */ 2964 2965 static const struct pci_device_id gve_id_table[] = { 2966 { PCI_DEVICE(PCI_VENDOR_ID_GOOGLE, PCI_DEV_ID_GVNIC) }, 2967 { } 2968 }; 2969 2970 static struct pci_driver gve_driver = { 2971 .name = gve_driver_name, 2972 .id_table = gve_id_table, 2973 .probe = gve_probe, 2974 .remove = gve_remove, 2975 .shutdown = gve_shutdown, 2976 #ifdef CONFIG_PM 2977 .suspend = gve_suspend, 2978 .resume = gve_resume, 2979 #endif 2980 }; 2981 2982 module_pci_driver(gve_driver); 2983 2984 MODULE_DEVICE_TABLE(pci, gve_id_table); 2985 MODULE_AUTHOR("Google, Inc."); 2986 MODULE_DESCRIPTION("Google Virtual NIC Driver"); 2987 MODULE_LICENSE("Dual MIT/GPL"); 2988 MODULE_VERSION(GVE_VERSION); 2989