1 // SPDX-License-Identifier: (GPL-2.0 OR MIT) 2 /* Google virtual Ethernet (gve) driver 3 * 4 * Copyright (C) 2015-2024 Google LLC 5 */ 6 7 #include <linux/bitmap.h> 8 #include <linux/bpf.h> 9 #include <linux/cpumask.h> 10 #include <linux/etherdevice.h> 11 #include <linux/filter.h> 12 #include <linux/interrupt.h> 13 #include <linux/irq.h> 14 #include <linux/module.h> 15 #include <linux/pci.h> 16 #include <linux/sched.h> 17 #include <linux/timer.h> 18 #include <linux/workqueue.h> 19 #include <linux/utsname.h> 20 #include <linux/version.h> 21 #include <net/netdev_queues.h> 22 #include <net/sch_generic.h> 23 #include <net/xdp_sock_drv.h> 24 #include "gve.h" 25 #include "gve_dqo.h" 26 #include "gve_adminq.h" 27 #include "gve_register.h" 28 #include "gve_utils.h" 29 30 #define GVE_DEFAULT_RX_COPYBREAK (256) 31 32 #define DEFAULT_MSG_LEVEL (NETIF_MSG_DRV | NETIF_MSG_LINK) 33 #define GVE_VERSION "1.0.0" 34 #define GVE_VERSION_PREFIX "GVE-" 35 36 // Minimum amount of time between queue kicks in msec (10 seconds) 37 #define MIN_TX_TIMEOUT_GAP (1000 * 10) 38 39 char gve_driver_name[] = "gve"; 40 const char gve_version_str[] = GVE_VERSION; 41 static const char gve_version_prefix[] = GVE_VERSION_PREFIX; 42 43 static int gve_verify_driver_compatibility(struct gve_priv *priv) 44 { 45 int err; 46 struct gve_driver_info *driver_info; 47 dma_addr_t driver_info_bus; 48 49 driver_info = dma_alloc_coherent(&priv->pdev->dev, 50 sizeof(struct gve_driver_info), 51 &driver_info_bus, GFP_KERNEL); 52 if (!driver_info) 53 return -ENOMEM; 54 55 *driver_info = (struct gve_driver_info) { 56 .os_type = 1, /* Linux */ 57 .os_version_major = cpu_to_be32(LINUX_VERSION_MAJOR), 58 .os_version_minor = cpu_to_be32(LINUX_VERSION_SUBLEVEL), 59 .os_version_sub = cpu_to_be32(LINUX_VERSION_PATCHLEVEL), 60 .driver_capability_flags = { 61 cpu_to_be64(GVE_DRIVER_CAPABILITY_FLAGS1), 62 cpu_to_be64(GVE_DRIVER_CAPABILITY_FLAGS2), 63 cpu_to_be64(GVE_DRIVER_CAPABILITY_FLAGS3), 64 cpu_to_be64(GVE_DRIVER_CAPABILITY_FLAGS4), 65 }, 66 }; 67 strscpy(driver_info->os_version_str1, utsname()->release, 68 sizeof(driver_info->os_version_str1)); 69 strscpy(driver_info->os_version_str2, utsname()->version, 70 sizeof(driver_info->os_version_str2)); 71 72 err = gve_adminq_verify_driver_compatibility(priv, 73 sizeof(struct gve_driver_info), 74 driver_info_bus); 75 76 /* It's ok if the device doesn't support this */ 77 if (err == -EOPNOTSUPP) 78 err = 0; 79 80 dma_free_coherent(&priv->pdev->dev, 81 sizeof(struct gve_driver_info), 82 driver_info, driver_info_bus); 83 return err; 84 } 85 86 static netdev_features_t gve_features_check(struct sk_buff *skb, 87 struct net_device *dev, 88 netdev_features_t features) 89 { 90 struct gve_priv *priv = netdev_priv(dev); 91 92 if (!gve_is_gqi(priv)) 93 return gve_features_check_dqo(skb, dev, features); 94 95 return features; 96 } 97 98 static netdev_tx_t gve_start_xmit(struct sk_buff *skb, struct net_device *dev) 99 { 100 struct gve_priv *priv = netdev_priv(dev); 101 102 if (gve_is_gqi(priv)) 103 return gve_tx(skb, dev); 104 else 105 return gve_tx_dqo(skb, dev); 106 } 107 108 static void gve_get_stats(struct net_device *dev, struct rtnl_link_stats64 *s) 109 { 110 struct gve_priv *priv = netdev_priv(dev); 111 unsigned int start; 112 u64 packets, bytes; 113 int num_tx_queues; 114 int ring; 115 116 num_tx_queues = gve_num_tx_queues(priv); 117 if (priv->rx) { 118 for (ring = 0; ring < priv->rx_cfg.num_queues; ring++) { 119 do { 120 start = 121 u64_stats_fetch_begin(&priv->rx[ring].statss); 122 packets = priv->rx[ring].rpackets; 123 bytes = priv->rx[ring].rbytes; 124 } while (u64_stats_fetch_retry(&priv->rx[ring].statss, 125 start)); 126 s->rx_packets += packets; 127 s->rx_bytes += bytes; 128 } 129 } 130 if (priv->tx) { 131 for (ring = 0; ring < num_tx_queues; ring++) { 132 do { 133 start = 134 u64_stats_fetch_begin(&priv->tx[ring].statss); 135 packets = priv->tx[ring].pkt_done; 136 bytes = priv->tx[ring].bytes_done; 137 } while (u64_stats_fetch_retry(&priv->tx[ring].statss, 138 start)); 139 s->tx_packets += packets; 140 s->tx_bytes += bytes; 141 } 142 } 143 } 144 145 static int gve_alloc_flow_rule_caches(struct gve_priv *priv) 146 { 147 struct gve_flow_rules_cache *flow_rules_cache = &priv->flow_rules_cache; 148 int err = 0; 149 150 if (!priv->max_flow_rules) 151 return 0; 152 153 flow_rules_cache->rules_cache = 154 kvcalloc(GVE_FLOW_RULES_CACHE_SIZE, sizeof(*flow_rules_cache->rules_cache), 155 GFP_KERNEL); 156 if (!flow_rules_cache->rules_cache) { 157 dev_err(&priv->pdev->dev, "Cannot alloc flow rules cache\n"); 158 return -ENOMEM; 159 } 160 161 flow_rules_cache->rule_ids_cache = 162 kvcalloc(GVE_FLOW_RULE_IDS_CACHE_SIZE, sizeof(*flow_rules_cache->rule_ids_cache), 163 GFP_KERNEL); 164 if (!flow_rules_cache->rule_ids_cache) { 165 dev_err(&priv->pdev->dev, "Cannot alloc flow rule ids cache\n"); 166 err = -ENOMEM; 167 goto free_rules_cache; 168 } 169 170 return 0; 171 172 free_rules_cache: 173 kvfree(flow_rules_cache->rules_cache); 174 flow_rules_cache->rules_cache = NULL; 175 return err; 176 } 177 178 static void gve_free_flow_rule_caches(struct gve_priv *priv) 179 { 180 struct gve_flow_rules_cache *flow_rules_cache = &priv->flow_rules_cache; 181 182 kvfree(flow_rules_cache->rule_ids_cache); 183 flow_rules_cache->rule_ids_cache = NULL; 184 kvfree(flow_rules_cache->rules_cache); 185 flow_rules_cache->rules_cache = NULL; 186 } 187 188 static int gve_alloc_rss_config_cache(struct gve_priv *priv) 189 { 190 struct gve_rss_config *rss_config = &priv->rss_config; 191 192 if (!priv->cache_rss_config) 193 return 0; 194 195 rss_config->hash_key = kcalloc(priv->rss_key_size, 196 sizeof(rss_config->hash_key[0]), 197 GFP_KERNEL); 198 if (!rss_config->hash_key) 199 return -ENOMEM; 200 201 rss_config->hash_lut = kcalloc(priv->rss_lut_size, 202 sizeof(rss_config->hash_lut[0]), 203 GFP_KERNEL); 204 if (!rss_config->hash_lut) 205 goto free_rss_key_cache; 206 207 return 0; 208 209 free_rss_key_cache: 210 kfree(rss_config->hash_key); 211 rss_config->hash_key = NULL; 212 return -ENOMEM; 213 } 214 215 static void gve_free_rss_config_cache(struct gve_priv *priv) 216 { 217 struct gve_rss_config *rss_config = &priv->rss_config; 218 219 kfree(rss_config->hash_key); 220 kfree(rss_config->hash_lut); 221 222 memset(rss_config, 0, sizeof(*rss_config)); 223 } 224 225 static int gve_alloc_counter_array(struct gve_priv *priv) 226 { 227 priv->counter_array = 228 dma_alloc_coherent(&priv->pdev->dev, 229 priv->num_event_counters * 230 sizeof(*priv->counter_array), 231 &priv->counter_array_bus, GFP_KERNEL); 232 if (!priv->counter_array) 233 return -ENOMEM; 234 235 return 0; 236 } 237 238 static void gve_free_counter_array(struct gve_priv *priv) 239 { 240 if (!priv->counter_array) 241 return; 242 243 dma_free_coherent(&priv->pdev->dev, 244 priv->num_event_counters * 245 sizeof(*priv->counter_array), 246 priv->counter_array, priv->counter_array_bus); 247 priv->counter_array = NULL; 248 } 249 250 /* NIC requests to report stats */ 251 static void gve_stats_report_task(struct work_struct *work) 252 { 253 struct gve_priv *priv = container_of(work, struct gve_priv, 254 stats_report_task); 255 if (gve_get_do_report_stats(priv)) { 256 gve_handle_report_stats(priv); 257 gve_clear_do_report_stats(priv); 258 } 259 } 260 261 static void gve_stats_report_schedule(struct gve_priv *priv) 262 { 263 if (!gve_get_probe_in_progress(priv) && 264 !gve_get_reset_in_progress(priv)) { 265 gve_set_do_report_stats(priv); 266 queue_work(priv->gve_wq, &priv->stats_report_task); 267 } 268 } 269 270 static void gve_stats_report_timer(struct timer_list *t) 271 { 272 struct gve_priv *priv = timer_container_of(priv, t, 273 stats_report_timer); 274 275 mod_timer(&priv->stats_report_timer, 276 round_jiffies(jiffies + 277 msecs_to_jiffies(priv->stats_report_timer_period))); 278 gve_stats_report_schedule(priv); 279 } 280 281 static int gve_alloc_stats_report(struct gve_priv *priv) 282 { 283 int tx_stats_num, rx_stats_num; 284 285 tx_stats_num = (GVE_TX_STATS_REPORT_NUM + NIC_TX_STATS_REPORT_NUM) * 286 gve_num_tx_queues(priv); 287 rx_stats_num = (GVE_RX_STATS_REPORT_NUM + NIC_RX_STATS_REPORT_NUM) * 288 priv->rx_cfg.num_queues; 289 priv->stats_report_len = struct_size(priv->stats_report, stats, 290 size_add(tx_stats_num, rx_stats_num)); 291 priv->stats_report = 292 dma_alloc_coherent(&priv->pdev->dev, priv->stats_report_len, 293 &priv->stats_report_bus, GFP_KERNEL); 294 if (!priv->stats_report) 295 return -ENOMEM; 296 /* Set up timer for the report-stats task */ 297 timer_setup(&priv->stats_report_timer, gve_stats_report_timer, 0); 298 priv->stats_report_timer_period = GVE_STATS_REPORT_TIMER_PERIOD; 299 return 0; 300 } 301 302 static void gve_free_stats_report(struct gve_priv *priv) 303 { 304 if (!priv->stats_report) 305 return; 306 307 timer_delete_sync(&priv->stats_report_timer); 308 dma_free_coherent(&priv->pdev->dev, priv->stats_report_len, 309 priv->stats_report, priv->stats_report_bus); 310 priv->stats_report = NULL; 311 } 312 313 static irqreturn_t gve_mgmnt_intr(int irq, void *arg) 314 { 315 struct gve_priv *priv = arg; 316 317 queue_work(priv->gve_wq, &priv->service_task); 318 return IRQ_HANDLED; 319 } 320 321 static irqreturn_t gve_intr(int irq, void *arg) 322 { 323 struct gve_notify_block *block = arg; 324 struct gve_priv *priv = block->priv; 325 326 iowrite32be(GVE_IRQ_MASK, gve_irq_doorbell(priv, block)); 327 napi_schedule_irqoff(&block->napi); 328 return IRQ_HANDLED; 329 } 330 331 static irqreturn_t gve_intr_dqo(int irq, void *arg) 332 { 333 struct gve_notify_block *block = arg; 334 335 /* Interrupts are automatically masked */ 336 napi_schedule_irqoff(&block->napi); 337 return IRQ_HANDLED; 338 } 339 340 static int gve_is_napi_on_home_cpu(struct gve_priv *priv, u32 irq) 341 { 342 int cpu_curr = smp_processor_id(); 343 const struct cpumask *aff_mask; 344 345 aff_mask = irq_get_effective_affinity_mask(irq); 346 if (unlikely(!aff_mask)) 347 return 1; 348 349 return cpumask_test_cpu(cpu_curr, aff_mask); 350 } 351 352 int gve_napi_poll(struct napi_struct *napi, int budget) 353 { 354 struct gve_notify_block *block; 355 __be32 __iomem *irq_doorbell; 356 bool reschedule = false; 357 struct gve_priv *priv; 358 int work_done = 0; 359 360 block = container_of(napi, struct gve_notify_block, napi); 361 priv = block->priv; 362 363 if (block->tx) { 364 if (block->tx->q_num < priv->tx_cfg.num_queues) 365 reschedule |= gve_tx_poll(block, budget); 366 else if (budget) 367 reschedule |= gve_xdp_poll(block, budget); 368 } 369 370 if (!budget) 371 return 0; 372 373 if (block->rx) { 374 work_done = gve_rx_poll(block, budget); 375 376 /* Poll XSK TX as part of RX NAPI. Setup re-poll based on max of 377 * TX and RX work done. 378 */ 379 if (priv->xdp_prog) 380 work_done = max_t(int, work_done, 381 gve_xsk_tx_poll(block, budget)); 382 383 reschedule |= work_done == budget; 384 } 385 386 if (reschedule) 387 return budget; 388 389 /* Complete processing - don't unmask irq if busy polling is enabled */ 390 if (likely(napi_complete_done(napi, work_done))) { 391 irq_doorbell = gve_irq_doorbell(priv, block); 392 iowrite32be(GVE_IRQ_ACK | GVE_IRQ_EVENT, irq_doorbell); 393 394 /* Ensure IRQ ACK is visible before we check pending work. 395 * If queue had issued updates, it would be truly visible. 396 */ 397 mb(); 398 399 if (block->tx) 400 reschedule |= gve_tx_clean_pending(priv, block->tx); 401 if (block->rx) 402 reschedule |= gve_rx_work_pending(block->rx); 403 404 if (reschedule && napi_schedule(napi)) 405 iowrite32be(GVE_IRQ_MASK, irq_doorbell); 406 } 407 return work_done; 408 } 409 410 int gve_napi_poll_dqo(struct napi_struct *napi, int budget) 411 { 412 struct gve_notify_block *block = 413 container_of(napi, struct gve_notify_block, napi); 414 struct gve_priv *priv = block->priv; 415 bool reschedule = false; 416 int work_done = 0; 417 418 if (block->tx) { 419 if (block->tx->q_num < priv->tx_cfg.num_queues) 420 reschedule |= gve_tx_poll_dqo(block, /*do_clean=*/true); 421 else 422 reschedule |= gve_xdp_poll_dqo(block); 423 } 424 425 if (!budget) 426 return 0; 427 428 if (block->rx) { 429 work_done = gve_rx_poll_dqo(block, budget); 430 431 /* Poll XSK TX as part of RX NAPI. Setup re-poll based on if 432 * either datapath has more work to do. 433 */ 434 if (priv->xdp_prog) 435 reschedule |= gve_xsk_tx_poll_dqo(block, budget); 436 reschedule |= work_done == budget; 437 } 438 439 if (reschedule) { 440 /* Reschedule by returning budget only if already on the correct 441 * cpu. 442 */ 443 if (likely(gve_is_napi_on_home_cpu(priv, block->irq))) 444 return budget; 445 446 /* If not on the cpu with which this queue's irq has affinity 447 * with, we avoid rescheduling napi and arm the irq instead so 448 * that napi gets rescheduled back eventually onto the right 449 * cpu. 450 */ 451 if (work_done == budget) 452 work_done--; 453 } 454 455 if (likely(napi_complete_done(napi, work_done))) { 456 /* Enable interrupts again. 457 * 458 * We don't need to repoll afterwards because HW supports the 459 * PCI MSI-X PBA feature. 460 * 461 * Another interrupt would be triggered if a new event came in 462 * since the last one. 463 */ 464 gve_write_irq_doorbell_dqo(priv, block, 465 GVE_ITR_NO_UPDATE_DQO | GVE_ITR_ENABLE_BIT_DQO); 466 } 467 468 return work_done; 469 } 470 471 static const struct cpumask *gve_get_node_mask(struct gve_priv *priv) 472 { 473 if (priv->numa_node == NUMA_NO_NODE) 474 return cpu_all_mask; 475 else 476 return cpumask_of_node(priv->numa_node); 477 } 478 479 static int gve_alloc_notify_blocks(struct gve_priv *priv) 480 { 481 int num_vecs_requested = priv->num_ntfy_blks + 1; 482 const struct cpumask *node_mask; 483 unsigned int cur_cpu; 484 int vecs_enabled; 485 int i, j; 486 int err; 487 488 priv->msix_vectors = kvcalloc(num_vecs_requested, 489 sizeof(*priv->msix_vectors), GFP_KERNEL); 490 if (!priv->msix_vectors) 491 return -ENOMEM; 492 for (i = 0; i < num_vecs_requested; i++) 493 priv->msix_vectors[i].entry = i; 494 vecs_enabled = pci_enable_msix_range(priv->pdev, priv->msix_vectors, 495 GVE_MIN_MSIX, num_vecs_requested); 496 if (vecs_enabled < 0) { 497 dev_err(&priv->pdev->dev, "Could not enable min msix %d/%d\n", 498 GVE_MIN_MSIX, vecs_enabled); 499 err = vecs_enabled; 500 goto abort_with_msix_vectors; 501 } 502 if (vecs_enabled != num_vecs_requested) { 503 int new_num_ntfy_blks = (vecs_enabled - 1) & ~0x1; 504 int vecs_per_type = new_num_ntfy_blks / 2; 505 int vecs_left = new_num_ntfy_blks % 2; 506 507 priv->num_ntfy_blks = new_num_ntfy_blks; 508 priv->mgmt_msix_idx = priv->num_ntfy_blks; 509 priv->tx_cfg.max_queues = min_t(int, priv->tx_cfg.max_queues, 510 vecs_per_type); 511 priv->rx_cfg.max_queues = min_t(int, priv->rx_cfg.max_queues, 512 vecs_per_type + vecs_left); 513 dev_err(&priv->pdev->dev, 514 "Could not enable desired msix, only enabled %d, adjusting tx max queues to %d, and rx max queues to %d\n", 515 vecs_enabled, priv->tx_cfg.max_queues, 516 priv->rx_cfg.max_queues); 517 if (priv->tx_cfg.num_queues > priv->tx_cfg.max_queues) 518 priv->tx_cfg.num_queues = priv->tx_cfg.max_queues; 519 if (priv->rx_cfg.num_queues > priv->rx_cfg.max_queues) 520 priv->rx_cfg.num_queues = priv->rx_cfg.max_queues; 521 } 522 523 /* Setup Management Vector - the last vector */ 524 snprintf(priv->mgmt_msix_name, sizeof(priv->mgmt_msix_name), "gve-mgmnt@pci:%s", 525 pci_name(priv->pdev)); 526 err = request_irq(priv->msix_vectors[priv->mgmt_msix_idx].vector, 527 gve_mgmnt_intr, 0, priv->mgmt_msix_name, priv); 528 if (err) { 529 dev_err(&priv->pdev->dev, "Did not receive management vector.\n"); 530 goto abort_with_msix_enabled; 531 } 532 priv->irq_db_indices = 533 dma_alloc_coherent(&priv->pdev->dev, 534 priv->num_ntfy_blks * 535 sizeof(*priv->irq_db_indices), 536 &priv->irq_db_indices_bus, GFP_KERNEL); 537 if (!priv->irq_db_indices) { 538 err = -ENOMEM; 539 goto abort_with_mgmt_vector; 540 } 541 542 priv->ntfy_blocks = kvzalloc(priv->num_ntfy_blks * 543 sizeof(*priv->ntfy_blocks), GFP_KERNEL); 544 if (!priv->ntfy_blocks) { 545 err = -ENOMEM; 546 goto abort_with_irq_db_indices; 547 } 548 549 /* Setup the other blocks - the first n-1 vectors */ 550 node_mask = gve_get_node_mask(priv); 551 cur_cpu = cpumask_first(node_mask); 552 for (i = 0; i < priv->num_ntfy_blks; i++) { 553 struct gve_notify_block *block = &priv->ntfy_blocks[i]; 554 int msix_idx = i; 555 556 snprintf(block->name, sizeof(block->name), "gve-ntfy-blk%d@pci:%s", 557 i, pci_name(priv->pdev)); 558 block->priv = priv; 559 err = request_irq(priv->msix_vectors[msix_idx].vector, 560 gve_is_gqi(priv) ? gve_intr : gve_intr_dqo, 561 0, block->name, block); 562 if (err) { 563 dev_err(&priv->pdev->dev, 564 "Failed to receive msix vector %d\n", i); 565 goto abort_with_some_ntfy_blocks; 566 } 567 block->irq = priv->msix_vectors[msix_idx].vector; 568 irq_set_affinity_and_hint(block->irq, 569 cpumask_of(cur_cpu)); 570 block->irq_db_index = &priv->irq_db_indices[i].index; 571 572 cur_cpu = cpumask_next(cur_cpu, node_mask); 573 /* Wrap once CPUs in the node have been exhausted, or when 574 * starting RX queue affinities. TX and RX queues of the same 575 * index share affinity. 576 */ 577 if (cur_cpu >= nr_cpu_ids || (i + 1) == priv->tx_cfg.max_queues) 578 cur_cpu = cpumask_first(node_mask); 579 } 580 return 0; 581 abort_with_some_ntfy_blocks: 582 for (j = 0; j < i; j++) { 583 struct gve_notify_block *block = &priv->ntfy_blocks[j]; 584 int msix_idx = j; 585 586 irq_set_affinity_hint(priv->msix_vectors[msix_idx].vector, 587 NULL); 588 free_irq(priv->msix_vectors[msix_idx].vector, block); 589 block->irq = 0; 590 } 591 kvfree(priv->ntfy_blocks); 592 priv->ntfy_blocks = NULL; 593 abort_with_irq_db_indices: 594 dma_free_coherent(&priv->pdev->dev, priv->num_ntfy_blks * 595 sizeof(*priv->irq_db_indices), 596 priv->irq_db_indices, priv->irq_db_indices_bus); 597 priv->irq_db_indices = NULL; 598 abort_with_mgmt_vector: 599 free_irq(priv->msix_vectors[priv->mgmt_msix_idx].vector, priv); 600 abort_with_msix_enabled: 601 pci_disable_msix(priv->pdev); 602 abort_with_msix_vectors: 603 kvfree(priv->msix_vectors); 604 priv->msix_vectors = NULL; 605 return err; 606 } 607 608 static void gve_free_notify_blocks(struct gve_priv *priv) 609 { 610 int i; 611 612 if (!priv->msix_vectors) 613 return; 614 615 /* Free the irqs */ 616 for (i = 0; i < priv->num_ntfy_blks; i++) { 617 struct gve_notify_block *block = &priv->ntfy_blocks[i]; 618 int msix_idx = i; 619 620 irq_set_affinity_hint(priv->msix_vectors[msix_idx].vector, 621 NULL); 622 free_irq(priv->msix_vectors[msix_idx].vector, block); 623 block->irq = 0; 624 } 625 free_irq(priv->msix_vectors[priv->mgmt_msix_idx].vector, priv); 626 kvfree(priv->ntfy_blocks); 627 priv->ntfy_blocks = NULL; 628 dma_free_coherent(&priv->pdev->dev, priv->num_ntfy_blks * 629 sizeof(*priv->irq_db_indices), 630 priv->irq_db_indices, priv->irq_db_indices_bus); 631 priv->irq_db_indices = NULL; 632 pci_disable_msix(priv->pdev); 633 kvfree(priv->msix_vectors); 634 priv->msix_vectors = NULL; 635 } 636 637 static int gve_setup_device_resources(struct gve_priv *priv) 638 { 639 int err; 640 641 err = gve_alloc_flow_rule_caches(priv); 642 if (err) 643 return err; 644 err = gve_alloc_rss_config_cache(priv); 645 if (err) 646 goto abort_with_flow_rule_caches; 647 err = gve_alloc_counter_array(priv); 648 if (err) 649 goto abort_with_rss_config_cache; 650 err = gve_init_clock(priv); 651 if (err) 652 goto abort_with_counter; 653 err = gve_alloc_notify_blocks(priv); 654 if (err) 655 goto abort_with_clock; 656 err = gve_alloc_stats_report(priv); 657 if (err) 658 goto abort_with_ntfy_blocks; 659 err = gve_adminq_configure_device_resources(priv, 660 priv->counter_array_bus, 661 priv->num_event_counters, 662 priv->irq_db_indices_bus, 663 priv->num_ntfy_blks); 664 if (unlikely(err)) { 665 dev_err(&priv->pdev->dev, 666 "could not setup device_resources: err=%d\n", err); 667 err = -ENXIO; 668 goto abort_with_stats_report; 669 } 670 671 if (!gve_is_gqi(priv)) { 672 priv->ptype_lut_dqo = kvzalloc(sizeof(*priv->ptype_lut_dqo), 673 GFP_KERNEL); 674 if (!priv->ptype_lut_dqo) { 675 err = -ENOMEM; 676 goto abort_with_stats_report; 677 } 678 err = gve_adminq_get_ptype_map_dqo(priv, priv->ptype_lut_dqo); 679 if (err) { 680 dev_err(&priv->pdev->dev, 681 "Failed to get ptype map: err=%d\n", err); 682 goto abort_with_ptype_lut; 683 } 684 } 685 686 err = gve_init_rss_config(priv, priv->rx_cfg.num_queues); 687 if (err) { 688 dev_err(&priv->pdev->dev, "Failed to init RSS config"); 689 goto abort_with_ptype_lut; 690 } 691 692 err = gve_adminq_report_stats(priv, priv->stats_report_len, 693 priv->stats_report_bus, 694 GVE_STATS_REPORT_TIMER_PERIOD); 695 if (err) 696 dev_err(&priv->pdev->dev, 697 "Failed to report stats: err=%d\n", err); 698 gve_set_device_resources_ok(priv); 699 return 0; 700 701 abort_with_ptype_lut: 702 kvfree(priv->ptype_lut_dqo); 703 priv->ptype_lut_dqo = NULL; 704 abort_with_stats_report: 705 gve_free_stats_report(priv); 706 abort_with_ntfy_blocks: 707 gve_free_notify_blocks(priv); 708 abort_with_clock: 709 gve_teardown_clock(priv); 710 abort_with_counter: 711 gve_free_counter_array(priv); 712 abort_with_rss_config_cache: 713 gve_free_rss_config_cache(priv); 714 abort_with_flow_rule_caches: 715 gve_free_flow_rule_caches(priv); 716 717 return err; 718 } 719 720 static void gve_trigger_reset(struct gve_priv *priv); 721 722 static void gve_teardown_device_resources(struct gve_priv *priv) 723 { 724 int err; 725 726 /* Tell device its resources are being freed */ 727 if (gve_get_device_resources_ok(priv)) { 728 err = gve_flow_rules_reset(priv); 729 if (err) { 730 dev_err(&priv->pdev->dev, 731 "Failed to reset flow rules: err=%d\n", err); 732 gve_trigger_reset(priv); 733 } 734 /* detach the stats report */ 735 err = gve_adminq_report_stats(priv, 0, 0x0, GVE_STATS_REPORT_TIMER_PERIOD); 736 if (err) { 737 dev_err(&priv->pdev->dev, 738 "Failed to detach stats report: err=%d\n", err); 739 gve_trigger_reset(priv); 740 } 741 err = gve_adminq_deconfigure_device_resources(priv); 742 if (err) { 743 dev_err(&priv->pdev->dev, 744 "Could not deconfigure device resources: err=%d\n", 745 err); 746 gve_trigger_reset(priv); 747 } 748 } 749 750 kvfree(priv->ptype_lut_dqo); 751 priv->ptype_lut_dqo = NULL; 752 753 gve_free_flow_rule_caches(priv); 754 gve_free_rss_config_cache(priv); 755 gve_free_counter_array(priv); 756 gve_free_notify_blocks(priv); 757 gve_free_stats_report(priv); 758 gve_teardown_clock(priv); 759 gve_clear_device_resources_ok(priv); 760 } 761 762 static int gve_unregister_qpl(struct gve_priv *priv, 763 struct gve_queue_page_list *qpl) 764 { 765 int err; 766 767 if (!qpl) 768 return 0; 769 770 err = gve_adminq_unregister_page_list(priv, qpl->id); 771 if (err) { 772 netif_err(priv, drv, priv->dev, 773 "Failed to unregister queue page list %d\n", 774 qpl->id); 775 return err; 776 } 777 778 priv->num_registered_pages -= qpl->num_entries; 779 return 0; 780 } 781 782 static int gve_register_qpl(struct gve_priv *priv, 783 struct gve_queue_page_list *qpl) 784 { 785 int pages; 786 int err; 787 788 if (!qpl) 789 return 0; 790 791 pages = qpl->num_entries; 792 793 if (pages + priv->num_registered_pages > priv->max_registered_pages) { 794 netif_err(priv, drv, priv->dev, 795 "Reached max number of registered pages %llu > %llu\n", 796 pages + priv->num_registered_pages, 797 priv->max_registered_pages); 798 return -EINVAL; 799 } 800 801 err = gve_adminq_register_page_list(priv, qpl); 802 if (err) { 803 netif_err(priv, drv, priv->dev, 804 "failed to register queue page list %d\n", 805 qpl->id); 806 return err; 807 } 808 809 priv->num_registered_pages += pages; 810 return 0; 811 } 812 813 static struct gve_queue_page_list *gve_tx_get_qpl(struct gve_priv *priv, int idx) 814 { 815 struct gve_tx_ring *tx = &priv->tx[idx]; 816 817 if (gve_is_gqi(priv)) 818 return tx->tx_fifo.qpl; 819 else 820 return tx->dqo.qpl; 821 } 822 823 static struct gve_queue_page_list *gve_rx_get_qpl(struct gve_priv *priv, int idx) 824 { 825 struct gve_rx_ring *rx = &priv->rx[idx]; 826 827 if (gve_is_gqi(priv)) 828 return rx->data.qpl; 829 else 830 return rx->dqo.qpl; 831 } 832 833 static int gve_register_qpls(struct gve_priv *priv) 834 { 835 int num_tx_qpls, num_rx_qpls; 836 int err; 837 int i; 838 839 num_tx_qpls = gve_num_tx_qpls(&priv->tx_cfg, gve_is_qpl(priv)); 840 num_rx_qpls = gve_num_rx_qpls(&priv->rx_cfg, gve_is_qpl(priv)); 841 842 for (i = 0; i < num_tx_qpls; i++) { 843 err = gve_register_qpl(priv, gve_tx_get_qpl(priv, i)); 844 if (err) 845 return err; 846 } 847 848 for (i = 0; i < num_rx_qpls; i++) { 849 err = gve_register_qpl(priv, gve_rx_get_qpl(priv, i)); 850 if (err) 851 return err; 852 } 853 854 return 0; 855 } 856 857 static int gve_unregister_qpls(struct gve_priv *priv) 858 { 859 int num_tx_qpls, num_rx_qpls; 860 int err; 861 int i; 862 863 num_tx_qpls = gve_num_tx_qpls(&priv->tx_cfg, gve_is_qpl(priv)); 864 num_rx_qpls = gve_num_rx_qpls(&priv->rx_cfg, gve_is_qpl(priv)); 865 866 for (i = 0; i < num_tx_qpls; i++) { 867 err = gve_unregister_qpl(priv, gve_tx_get_qpl(priv, i)); 868 /* This failure will trigger a reset - no need to clean */ 869 if (err) 870 return err; 871 } 872 873 for (i = 0; i < num_rx_qpls; i++) { 874 err = gve_unregister_qpl(priv, gve_rx_get_qpl(priv, i)); 875 /* This failure will trigger a reset - no need to clean */ 876 if (err) 877 return err; 878 } 879 return 0; 880 } 881 882 static int gve_create_rings(struct gve_priv *priv) 883 { 884 int num_tx_queues = gve_num_tx_queues(priv); 885 int err; 886 int i; 887 888 err = gve_adminq_create_tx_queues(priv, 0, num_tx_queues); 889 if (err) { 890 netif_err(priv, drv, priv->dev, "failed to create %d tx queues\n", 891 num_tx_queues); 892 /* This failure will trigger a reset - no need to clean 893 * up 894 */ 895 return err; 896 } 897 netif_dbg(priv, drv, priv->dev, "created %d tx queues\n", 898 num_tx_queues); 899 900 err = gve_adminq_create_rx_queues(priv, priv->rx_cfg.num_queues); 901 if (err) { 902 netif_err(priv, drv, priv->dev, "failed to create %d rx queues\n", 903 priv->rx_cfg.num_queues); 904 /* This failure will trigger a reset - no need to clean 905 * up 906 */ 907 return err; 908 } 909 netif_dbg(priv, drv, priv->dev, "created %d rx queues\n", 910 priv->rx_cfg.num_queues); 911 912 if (gve_is_gqi(priv)) { 913 /* Rx data ring has been prefilled with packet buffers at queue 914 * allocation time. 915 * 916 * Write the doorbell to provide descriptor slots and packet 917 * buffers to the NIC. 918 */ 919 for (i = 0; i < priv->rx_cfg.num_queues; i++) 920 gve_rx_write_doorbell(priv, &priv->rx[i]); 921 } else { 922 for (i = 0; i < priv->rx_cfg.num_queues; i++) { 923 /* Post buffers and ring doorbell. */ 924 gve_rx_post_buffers_dqo(&priv->rx[i]); 925 } 926 } 927 928 return 0; 929 } 930 931 static void init_xdp_sync_stats(struct gve_priv *priv) 932 { 933 int start_id = gve_xdp_tx_start_queue_id(priv); 934 int i; 935 936 /* Init stats */ 937 for (i = start_id; i < start_id + priv->tx_cfg.num_xdp_queues; i++) { 938 int ntfy_idx = gve_tx_idx_to_ntfy(priv, i); 939 940 u64_stats_init(&priv->tx[i].statss); 941 priv->tx[i].ntfy_id = ntfy_idx; 942 } 943 } 944 945 static void gve_init_sync_stats(struct gve_priv *priv) 946 { 947 int i; 948 949 for (i = 0; i < priv->tx_cfg.num_queues; i++) 950 u64_stats_init(&priv->tx[i].statss); 951 952 /* Init stats for XDP TX queues */ 953 init_xdp_sync_stats(priv); 954 955 for (i = 0; i < priv->rx_cfg.num_queues; i++) 956 u64_stats_init(&priv->rx[i].statss); 957 } 958 959 static void gve_tx_get_curr_alloc_cfg(struct gve_priv *priv, 960 struct gve_tx_alloc_rings_cfg *cfg) 961 { 962 cfg->qcfg = &priv->tx_cfg; 963 cfg->raw_addressing = !gve_is_qpl(priv); 964 cfg->ring_size = priv->tx_desc_cnt; 965 cfg->num_xdp_rings = cfg->qcfg->num_xdp_queues; 966 cfg->tx = priv->tx; 967 } 968 969 static void gve_tx_stop_rings(struct gve_priv *priv, int num_rings) 970 { 971 int i; 972 973 if (!priv->tx) 974 return; 975 976 for (i = 0; i < num_rings; i++) { 977 if (gve_is_gqi(priv)) 978 gve_tx_stop_ring_gqi(priv, i); 979 else 980 gve_tx_stop_ring_dqo(priv, i); 981 } 982 } 983 984 static void gve_tx_start_rings(struct gve_priv *priv, int num_rings) 985 { 986 int i; 987 988 for (i = 0; i < num_rings; i++) { 989 if (gve_is_gqi(priv)) 990 gve_tx_start_ring_gqi(priv, i); 991 else 992 gve_tx_start_ring_dqo(priv, i); 993 } 994 } 995 996 static int gve_queues_mem_alloc(struct gve_priv *priv, 997 struct gve_tx_alloc_rings_cfg *tx_alloc_cfg, 998 struct gve_rx_alloc_rings_cfg *rx_alloc_cfg) 999 { 1000 int err; 1001 1002 if (gve_is_gqi(priv)) 1003 err = gve_tx_alloc_rings_gqi(priv, tx_alloc_cfg); 1004 else 1005 err = gve_tx_alloc_rings_dqo(priv, tx_alloc_cfg); 1006 if (err) 1007 return err; 1008 1009 if (gve_is_gqi(priv)) 1010 err = gve_rx_alloc_rings_gqi(priv, rx_alloc_cfg); 1011 else 1012 err = gve_rx_alloc_rings_dqo(priv, rx_alloc_cfg); 1013 if (err) 1014 goto free_tx; 1015 1016 return 0; 1017 1018 free_tx: 1019 if (gve_is_gqi(priv)) 1020 gve_tx_free_rings_gqi(priv, tx_alloc_cfg); 1021 else 1022 gve_tx_free_rings_dqo(priv, tx_alloc_cfg); 1023 return err; 1024 } 1025 1026 static int gve_destroy_rings(struct gve_priv *priv) 1027 { 1028 int num_tx_queues = gve_num_tx_queues(priv); 1029 int err; 1030 1031 err = gve_adminq_destroy_tx_queues(priv, 0, num_tx_queues); 1032 if (err) { 1033 netif_err(priv, drv, priv->dev, 1034 "failed to destroy tx queues\n"); 1035 /* This failure will trigger a reset - no need to clean up */ 1036 return err; 1037 } 1038 netif_dbg(priv, drv, priv->dev, "destroyed tx queues\n"); 1039 err = gve_adminq_destroy_rx_queues(priv, priv->rx_cfg.num_queues); 1040 if (err) { 1041 netif_err(priv, drv, priv->dev, 1042 "failed to destroy rx queues\n"); 1043 /* This failure will trigger a reset - no need to clean up */ 1044 return err; 1045 } 1046 netif_dbg(priv, drv, priv->dev, "destroyed rx queues\n"); 1047 return 0; 1048 } 1049 1050 static void gve_queues_mem_free(struct gve_priv *priv, 1051 struct gve_tx_alloc_rings_cfg *tx_cfg, 1052 struct gve_rx_alloc_rings_cfg *rx_cfg) 1053 { 1054 if (gve_is_gqi(priv)) { 1055 gve_tx_free_rings_gqi(priv, tx_cfg); 1056 gve_rx_free_rings_gqi(priv, rx_cfg); 1057 } else { 1058 gve_tx_free_rings_dqo(priv, tx_cfg); 1059 gve_rx_free_rings_dqo(priv, rx_cfg); 1060 } 1061 } 1062 1063 int gve_alloc_page(struct gve_priv *priv, struct device *dev, 1064 struct page **page, dma_addr_t *dma, 1065 enum dma_data_direction dir, gfp_t gfp_flags) 1066 { 1067 *page = alloc_pages_node(priv->numa_node, gfp_flags, 0); 1068 if (!*page) { 1069 priv->page_alloc_fail++; 1070 return -ENOMEM; 1071 } 1072 *dma = dma_map_page(dev, *page, 0, PAGE_SIZE, dir); 1073 if (dma_mapping_error(dev, *dma)) { 1074 priv->dma_mapping_error++; 1075 put_page(*page); 1076 return -ENOMEM; 1077 } 1078 return 0; 1079 } 1080 1081 struct gve_queue_page_list *gve_alloc_queue_page_list(struct gve_priv *priv, 1082 u32 id, int pages) 1083 { 1084 struct gve_queue_page_list *qpl; 1085 int err; 1086 int i; 1087 1088 qpl = kvzalloc(sizeof(*qpl), GFP_KERNEL); 1089 if (!qpl) 1090 return NULL; 1091 1092 qpl->id = id; 1093 qpl->num_entries = 0; 1094 qpl->pages = kvcalloc(pages, sizeof(*qpl->pages), GFP_KERNEL); 1095 if (!qpl->pages) 1096 goto abort; 1097 1098 qpl->page_buses = kvcalloc(pages, sizeof(*qpl->page_buses), GFP_KERNEL); 1099 if (!qpl->page_buses) 1100 goto abort; 1101 1102 for (i = 0; i < pages; i++) { 1103 err = gve_alloc_page(priv, &priv->pdev->dev, &qpl->pages[i], 1104 &qpl->page_buses[i], 1105 gve_qpl_dma_dir(priv, id), GFP_KERNEL); 1106 if (err) 1107 goto abort; 1108 qpl->num_entries++; 1109 } 1110 1111 return qpl; 1112 1113 abort: 1114 gve_free_queue_page_list(priv, qpl, id); 1115 return NULL; 1116 } 1117 1118 void gve_free_page(struct device *dev, struct page *page, dma_addr_t dma, 1119 enum dma_data_direction dir) 1120 { 1121 if (!dma_mapping_error(dev, dma)) 1122 dma_unmap_page(dev, dma, PAGE_SIZE, dir); 1123 if (page) 1124 put_page(page); 1125 } 1126 1127 void gve_free_queue_page_list(struct gve_priv *priv, 1128 struct gve_queue_page_list *qpl, 1129 u32 id) 1130 { 1131 int i; 1132 1133 if (!qpl) 1134 return; 1135 if (!qpl->pages) 1136 goto free_qpl; 1137 if (!qpl->page_buses) 1138 goto free_pages; 1139 1140 for (i = 0; i < qpl->num_entries; i++) 1141 gve_free_page(&priv->pdev->dev, qpl->pages[i], 1142 qpl->page_buses[i], gve_qpl_dma_dir(priv, id)); 1143 1144 kvfree(qpl->page_buses); 1145 qpl->page_buses = NULL; 1146 free_pages: 1147 kvfree(qpl->pages); 1148 qpl->pages = NULL; 1149 free_qpl: 1150 kvfree(qpl); 1151 } 1152 1153 /* Use this to schedule a reset when the device is capable of continuing 1154 * to handle other requests in its current state. If it is not, do a reset 1155 * in thread instead. 1156 */ 1157 void gve_schedule_reset(struct gve_priv *priv) 1158 { 1159 gve_set_do_reset(priv); 1160 queue_work(priv->gve_wq, &priv->service_task); 1161 } 1162 1163 static void gve_reset_and_teardown(struct gve_priv *priv, bool was_up); 1164 static int gve_reset_recovery(struct gve_priv *priv, bool was_up); 1165 static void gve_turndown(struct gve_priv *priv); 1166 static void gve_turnup(struct gve_priv *priv); 1167 1168 static void gve_unreg_xsk_pool(struct gve_priv *priv, u16 qid) 1169 { 1170 struct gve_rx_ring *rx; 1171 1172 if (!priv->rx) 1173 return; 1174 1175 rx = &priv->rx[qid]; 1176 rx->xsk_pool = NULL; 1177 if (xdp_rxq_info_is_reg(&rx->xdp_rxq)) 1178 xdp_rxq_info_unreg_mem_model(&rx->xdp_rxq); 1179 1180 if (!priv->tx) 1181 return; 1182 priv->tx[gve_xdp_tx_queue_id(priv, qid)].xsk_pool = NULL; 1183 } 1184 1185 static int gve_reg_xsk_pool(struct gve_priv *priv, struct net_device *dev, 1186 struct xsk_buff_pool *pool, u16 qid) 1187 { 1188 struct gve_rx_ring *rx; 1189 u16 tx_qid; 1190 int err; 1191 1192 rx = &priv->rx[qid]; 1193 err = xdp_rxq_info_reg_mem_model(&rx->xdp_rxq, 1194 MEM_TYPE_XSK_BUFF_POOL, pool); 1195 if (err) { 1196 gve_unreg_xsk_pool(priv, qid); 1197 return err; 1198 } 1199 1200 rx->xsk_pool = pool; 1201 1202 tx_qid = gve_xdp_tx_queue_id(priv, qid); 1203 priv->tx[tx_qid].xsk_pool = pool; 1204 1205 return 0; 1206 } 1207 1208 static void gve_unreg_xdp_info(struct gve_priv *priv) 1209 { 1210 int i; 1211 1212 if (!priv->tx_cfg.num_xdp_queues || !priv->rx) 1213 return; 1214 1215 for (i = 0; i < priv->rx_cfg.num_queues; i++) { 1216 struct gve_rx_ring *rx = &priv->rx[i]; 1217 1218 if (xdp_rxq_info_is_reg(&rx->xdp_rxq)) 1219 xdp_rxq_info_unreg(&rx->xdp_rxq); 1220 1221 gve_unreg_xsk_pool(priv, i); 1222 } 1223 } 1224 1225 static struct xsk_buff_pool *gve_get_xsk_pool(struct gve_priv *priv, int qid) 1226 { 1227 if (!test_bit(qid, priv->xsk_pools)) 1228 return NULL; 1229 1230 return xsk_get_pool_from_qid(priv->dev, qid); 1231 } 1232 1233 static int gve_reg_xdp_info(struct gve_priv *priv, struct net_device *dev) 1234 { 1235 struct napi_struct *napi; 1236 struct gve_rx_ring *rx; 1237 int err = 0; 1238 int i; 1239 1240 if (!priv->tx_cfg.num_xdp_queues) 1241 return 0; 1242 1243 for (i = 0; i < priv->rx_cfg.num_queues; i++) { 1244 struct xsk_buff_pool *xsk_pool; 1245 1246 rx = &priv->rx[i]; 1247 napi = &priv->ntfy_blocks[rx->ntfy_id].napi; 1248 1249 err = xdp_rxq_info_reg(&rx->xdp_rxq, dev, i, 1250 napi->napi_id); 1251 if (err) 1252 goto err; 1253 1254 xsk_pool = gve_get_xsk_pool(priv, i); 1255 if (xsk_pool) 1256 err = gve_reg_xsk_pool(priv, dev, xsk_pool, i); 1257 else if (gve_is_qpl(priv)) 1258 err = xdp_rxq_info_reg_mem_model(&rx->xdp_rxq, 1259 MEM_TYPE_PAGE_SHARED, 1260 NULL); 1261 else 1262 err = xdp_rxq_info_reg_mem_model(&rx->xdp_rxq, 1263 MEM_TYPE_PAGE_POOL, 1264 rx->dqo.page_pool); 1265 if (err) 1266 goto err; 1267 } 1268 return 0; 1269 1270 err: 1271 gve_unreg_xdp_info(priv); 1272 return err; 1273 } 1274 1275 1276 static void gve_drain_page_cache(struct gve_priv *priv) 1277 { 1278 int i; 1279 1280 for (i = 0; i < priv->rx_cfg.num_queues; i++) 1281 page_frag_cache_drain(&priv->rx[i].page_cache); 1282 } 1283 1284 static void gve_rx_get_curr_alloc_cfg(struct gve_priv *priv, 1285 struct gve_rx_alloc_rings_cfg *cfg) 1286 { 1287 cfg->qcfg_rx = &priv->rx_cfg; 1288 cfg->qcfg_tx = &priv->tx_cfg; 1289 cfg->raw_addressing = !gve_is_qpl(priv); 1290 cfg->enable_header_split = priv->header_split_enabled; 1291 cfg->ring_size = priv->rx_desc_cnt; 1292 cfg->packet_buffer_size = priv->rx_cfg.packet_buffer_size; 1293 cfg->rx = priv->rx; 1294 cfg->xdp = !!cfg->qcfg_tx->num_xdp_queues; 1295 } 1296 1297 void gve_get_curr_alloc_cfgs(struct gve_priv *priv, 1298 struct gve_tx_alloc_rings_cfg *tx_alloc_cfg, 1299 struct gve_rx_alloc_rings_cfg *rx_alloc_cfg) 1300 { 1301 gve_tx_get_curr_alloc_cfg(priv, tx_alloc_cfg); 1302 gve_rx_get_curr_alloc_cfg(priv, rx_alloc_cfg); 1303 } 1304 1305 static void gve_rx_start_ring(struct gve_priv *priv, int i) 1306 { 1307 if (gve_is_gqi(priv)) 1308 gve_rx_start_ring_gqi(priv, i); 1309 else 1310 gve_rx_start_ring_dqo(priv, i); 1311 } 1312 1313 static void gve_rx_start_rings(struct gve_priv *priv, int num_rings) 1314 { 1315 int i; 1316 1317 for (i = 0; i < num_rings; i++) 1318 gve_rx_start_ring(priv, i); 1319 } 1320 1321 static void gve_rx_stop_ring(struct gve_priv *priv, int i) 1322 { 1323 if (gve_is_gqi(priv)) 1324 gve_rx_stop_ring_gqi(priv, i); 1325 else 1326 gve_rx_stop_ring_dqo(priv, i); 1327 } 1328 1329 static void gve_rx_stop_rings(struct gve_priv *priv, int num_rings) 1330 { 1331 int i; 1332 1333 if (!priv->rx) 1334 return; 1335 1336 for (i = 0; i < num_rings; i++) 1337 gve_rx_stop_ring(priv, i); 1338 } 1339 1340 static void gve_queues_mem_remove(struct gve_priv *priv) 1341 { 1342 struct gve_tx_alloc_rings_cfg tx_alloc_cfg = {0}; 1343 struct gve_rx_alloc_rings_cfg rx_alloc_cfg = {0}; 1344 1345 gve_get_curr_alloc_cfgs(priv, &tx_alloc_cfg, &rx_alloc_cfg); 1346 gve_queues_mem_free(priv, &tx_alloc_cfg, &rx_alloc_cfg); 1347 priv->tx = NULL; 1348 priv->rx = NULL; 1349 } 1350 1351 /* The passed-in queue memory is stored into priv and the queues are made live. 1352 * No memory is allocated. Passed-in memory is freed on errors. 1353 */ 1354 static int gve_queues_start(struct gve_priv *priv, 1355 struct gve_tx_alloc_rings_cfg *tx_alloc_cfg, 1356 struct gve_rx_alloc_rings_cfg *rx_alloc_cfg) 1357 { 1358 struct net_device *dev = priv->dev; 1359 int err; 1360 1361 /* Record new resources into priv */ 1362 priv->tx = tx_alloc_cfg->tx; 1363 priv->rx = rx_alloc_cfg->rx; 1364 1365 /* Record new configs into priv */ 1366 priv->tx_cfg = *tx_alloc_cfg->qcfg; 1367 priv->tx_cfg.num_xdp_queues = tx_alloc_cfg->num_xdp_rings; 1368 priv->rx_cfg = *rx_alloc_cfg->qcfg_rx; 1369 priv->tx_desc_cnt = tx_alloc_cfg->ring_size; 1370 priv->rx_desc_cnt = rx_alloc_cfg->ring_size; 1371 1372 gve_tx_start_rings(priv, gve_num_tx_queues(priv)); 1373 gve_rx_start_rings(priv, rx_alloc_cfg->qcfg_rx->num_queues); 1374 gve_init_sync_stats(priv); 1375 1376 err = netif_set_real_num_tx_queues(dev, priv->tx_cfg.num_queues); 1377 if (err) 1378 goto stop_and_free_rings; 1379 err = netif_set_real_num_rx_queues(dev, priv->rx_cfg.num_queues); 1380 if (err) 1381 goto stop_and_free_rings; 1382 1383 err = gve_reg_xdp_info(priv, dev); 1384 if (err) 1385 goto stop_and_free_rings; 1386 1387 if (rx_alloc_cfg->reset_rss) { 1388 err = gve_init_rss_config(priv, priv->rx_cfg.num_queues); 1389 if (err) 1390 goto reset; 1391 } 1392 1393 err = gve_register_qpls(priv); 1394 if (err) 1395 goto reset; 1396 1397 priv->header_split_enabled = rx_alloc_cfg->enable_header_split; 1398 priv->rx_cfg.packet_buffer_size = rx_alloc_cfg->packet_buffer_size; 1399 1400 err = gve_create_rings(priv); 1401 if (err) 1402 goto reset; 1403 1404 gve_set_device_rings_ok(priv); 1405 1406 if (gve_get_report_stats(priv)) 1407 mod_timer(&priv->stats_report_timer, 1408 round_jiffies(jiffies + 1409 msecs_to_jiffies(priv->stats_report_timer_period))); 1410 1411 gve_turnup(priv); 1412 queue_work(priv->gve_wq, &priv->service_task); 1413 priv->interface_up_cnt++; 1414 return 0; 1415 1416 reset: 1417 if (gve_get_reset_in_progress(priv)) 1418 goto stop_and_free_rings; 1419 gve_reset_and_teardown(priv, true); 1420 /* if this fails there is nothing we can do so just ignore the return */ 1421 gve_reset_recovery(priv, false); 1422 /* return the original error */ 1423 return err; 1424 stop_and_free_rings: 1425 gve_tx_stop_rings(priv, gve_num_tx_queues(priv)); 1426 gve_rx_stop_rings(priv, priv->rx_cfg.num_queues); 1427 gve_queues_mem_remove(priv); 1428 return err; 1429 } 1430 1431 static int gve_open(struct net_device *dev) 1432 { 1433 struct gve_tx_alloc_rings_cfg tx_alloc_cfg = {0}; 1434 struct gve_rx_alloc_rings_cfg rx_alloc_cfg = {0}; 1435 struct gve_priv *priv = netdev_priv(dev); 1436 int err; 1437 1438 gve_get_curr_alloc_cfgs(priv, &tx_alloc_cfg, &rx_alloc_cfg); 1439 1440 err = gve_queues_mem_alloc(priv, &tx_alloc_cfg, &rx_alloc_cfg); 1441 if (err) 1442 return err; 1443 1444 /* No need to free on error: ownership of resources is lost after 1445 * calling gve_queues_start. 1446 */ 1447 err = gve_queues_start(priv, &tx_alloc_cfg, &rx_alloc_cfg); 1448 if (err) 1449 return err; 1450 1451 return 0; 1452 } 1453 1454 static int gve_queues_stop(struct gve_priv *priv) 1455 { 1456 int err; 1457 1458 netif_carrier_off(priv->dev); 1459 if (gve_get_device_rings_ok(priv)) { 1460 gve_turndown(priv); 1461 gve_drain_page_cache(priv); 1462 err = gve_destroy_rings(priv); 1463 if (err) 1464 goto err; 1465 err = gve_unregister_qpls(priv); 1466 if (err) 1467 goto err; 1468 gve_clear_device_rings_ok(priv); 1469 } 1470 timer_delete_sync(&priv->stats_report_timer); 1471 1472 gve_unreg_xdp_info(priv); 1473 1474 gve_tx_stop_rings(priv, gve_num_tx_queues(priv)); 1475 gve_rx_stop_rings(priv, priv->rx_cfg.num_queues); 1476 1477 priv->interface_down_cnt++; 1478 return 0; 1479 1480 err: 1481 /* This must have been called from a reset due to the rtnl lock 1482 * so just return at this point. 1483 */ 1484 if (gve_get_reset_in_progress(priv)) 1485 return err; 1486 /* Otherwise reset before returning */ 1487 gve_reset_and_teardown(priv, true); 1488 return gve_reset_recovery(priv, false); 1489 } 1490 1491 static int gve_close(struct net_device *dev) 1492 { 1493 struct gve_priv *priv = netdev_priv(dev); 1494 int err; 1495 1496 err = gve_queues_stop(priv); 1497 if (err) 1498 return err; 1499 1500 gve_queues_mem_remove(priv); 1501 return 0; 1502 } 1503 1504 static void gve_handle_link_status(struct gve_priv *priv, bool link_status) 1505 { 1506 if (!gve_get_napi_enabled(priv)) 1507 return; 1508 1509 if (link_status == netif_carrier_ok(priv->dev)) 1510 return; 1511 1512 if (link_status) { 1513 netdev_info(priv->dev, "Device link is up.\n"); 1514 netif_carrier_on(priv->dev); 1515 } else { 1516 netdev_info(priv->dev, "Device link is down.\n"); 1517 netif_carrier_off(priv->dev); 1518 } 1519 } 1520 1521 static int gve_configure_rings_xdp(struct gve_priv *priv, 1522 u16 num_xdp_rings) 1523 { 1524 struct gve_tx_alloc_rings_cfg tx_alloc_cfg = {0}; 1525 struct gve_rx_alloc_rings_cfg rx_alloc_cfg = {0}; 1526 1527 gve_get_curr_alloc_cfgs(priv, &tx_alloc_cfg, &rx_alloc_cfg); 1528 tx_alloc_cfg.num_xdp_rings = num_xdp_rings; 1529 1530 rx_alloc_cfg.xdp = !!num_xdp_rings; 1531 return gve_adjust_config(priv, &tx_alloc_cfg, &rx_alloc_cfg); 1532 } 1533 1534 static int gve_set_xdp(struct gve_priv *priv, struct bpf_prog *prog, 1535 struct netlink_ext_ack *extack) 1536 { 1537 struct bpf_prog *old_prog; 1538 int err = 0; 1539 u32 status; 1540 1541 old_prog = READ_ONCE(priv->xdp_prog); 1542 if (!netif_running(priv->dev)) { 1543 WRITE_ONCE(priv->xdp_prog, prog); 1544 if (old_prog) 1545 bpf_prog_put(old_prog); 1546 1547 /* Update priv XDP queue configuration */ 1548 priv->tx_cfg.num_xdp_queues = priv->xdp_prog ? 1549 priv->rx_cfg.num_queues : 0; 1550 return 0; 1551 } 1552 1553 if (!old_prog && prog) 1554 err = gve_configure_rings_xdp(priv, priv->rx_cfg.num_queues); 1555 else if (old_prog && !prog) 1556 err = gve_configure_rings_xdp(priv, 0); 1557 1558 if (err) 1559 goto out; 1560 1561 WRITE_ONCE(priv->xdp_prog, prog); 1562 if (old_prog) 1563 bpf_prog_put(old_prog); 1564 1565 out: 1566 status = ioread32be(&priv->reg_bar0->device_status); 1567 gve_handle_link_status(priv, GVE_DEVICE_STATUS_LINK_STATUS_MASK & status); 1568 return err; 1569 } 1570 1571 static int gve_xdp_xmit(struct net_device *dev, int n, 1572 struct xdp_frame **frames, u32 flags) 1573 { 1574 struct gve_priv *priv = netdev_priv(dev); 1575 1576 if (priv->queue_format == GVE_GQI_QPL_FORMAT) 1577 return gve_xdp_xmit_gqi(dev, n, frames, flags); 1578 else if (priv->queue_format == GVE_DQO_RDA_FORMAT) 1579 return gve_xdp_xmit_dqo(dev, n, frames, flags); 1580 1581 return -EOPNOTSUPP; 1582 } 1583 1584 static int gve_xsk_pool_enable(struct net_device *dev, 1585 struct xsk_buff_pool *pool, 1586 u16 qid) 1587 { 1588 struct gve_priv *priv = netdev_priv(dev); 1589 int err; 1590 1591 if (qid >= priv->rx_cfg.num_queues) { 1592 dev_err(&priv->pdev->dev, "xsk pool invalid qid %d", qid); 1593 return -EINVAL; 1594 } 1595 if (xsk_pool_get_rx_frame_size(pool) < 1596 priv->dev->max_mtu + sizeof(struct ethhdr)) { 1597 dev_err(&priv->pdev->dev, "xsk pool frame_len too small"); 1598 return -EINVAL; 1599 } 1600 1601 err = xsk_pool_dma_map(pool, &priv->pdev->dev, 1602 DMA_ATTR_SKIP_CPU_SYNC | DMA_ATTR_WEAK_ORDERING); 1603 if (err) 1604 return err; 1605 1606 set_bit(qid, priv->xsk_pools); 1607 1608 /* If XDP prog is not installed or interface is down, return. */ 1609 if (!priv->xdp_prog || !netif_running(dev)) 1610 return 0; 1611 1612 err = gve_reg_xsk_pool(priv, dev, pool, qid); 1613 if (err) 1614 goto err_xsk_pool_dma_mapped; 1615 1616 /* Stop and start RDA queues to repost buffers. */ 1617 if (!gve_is_qpl(priv)) { 1618 err = gve_configure_rings_xdp(priv, priv->rx_cfg.num_queues); 1619 if (err) 1620 goto err_xsk_pool_registered; 1621 } 1622 return 0; 1623 1624 err_xsk_pool_registered: 1625 gve_unreg_xsk_pool(priv, qid); 1626 err_xsk_pool_dma_mapped: 1627 clear_bit(qid, priv->xsk_pools); 1628 xsk_pool_dma_unmap(pool, 1629 DMA_ATTR_SKIP_CPU_SYNC | 1630 DMA_ATTR_WEAK_ORDERING); 1631 return err; 1632 } 1633 1634 static int gve_xsk_pool_disable(struct net_device *dev, 1635 u16 qid) 1636 { 1637 struct gve_priv *priv = netdev_priv(dev); 1638 struct napi_struct *napi_rx; 1639 struct napi_struct *napi_tx; 1640 struct xsk_buff_pool *pool; 1641 int tx_qid; 1642 int err; 1643 1644 if (qid >= priv->rx_cfg.num_queues) 1645 return -EINVAL; 1646 1647 clear_bit(qid, priv->xsk_pools); 1648 1649 pool = xsk_get_pool_from_qid(dev, qid); 1650 if (pool) 1651 xsk_pool_dma_unmap(pool, 1652 DMA_ATTR_SKIP_CPU_SYNC | 1653 DMA_ATTR_WEAK_ORDERING); 1654 1655 if (!netif_running(dev) || !priv->tx_cfg.num_xdp_queues) 1656 return 0; 1657 1658 /* Stop and start RDA queues to repost buffers. */ 1659 if (!gve_is_qpl(priv) && priv->xdp_prog) { 1660 err = gve_configure_rings_xdp(priv, priv->rx_cfg.num_queues); 1661 if (err) 1662 return err; 1663 } 1664 1665 napi_rx = &priv->ntfy_blocks[priv->rx[qid].ntfy_id].napi; 1666 napi_disable(napi_rx); /* make sure current rx poll is done */ 1667 1668 tx_qid = gve_xdp_tx_queue_id(priv, qid); 1669 napi_tx = &priv->ntfy_blocks[priv->tx[tx_qid].ntfy_id].napi; 1670 napi_disable(napi_tx); /* make sure current tx poll is done */ 1671 1672 gve_unreg_xsk_pool(priv, qid); 1673 smp_mb(); /* Make sure it is visible to the workers on datapath */ 1674 1675 napi_enable(napi_rx); 1676 napi_enable(napi_tx); 1677 if (gve_is_gqi(priv)) { 1678 if (gve_rx_work_pending(&priv->rx[qid])) 1679 napi_schedule(napi_rx); 1680 1681 if (gve_tx_clean_pending(priv, &priv->tx[tx_qid])) 1682 napi_schedule(napi_tx); 1683 } 1684 1685 return 0; 1686 } 1687 1688 static int gve_xsk_wakeup(struct net_device *dev, u32 queue_id, u32 flags) 1689 { 1690 struct gve_priv *priv = netdev_priv(dev); 1691 struct napi_struct *napi; 1692 1693 if (!gve_get_napi_enabled(priv)) 1694 return -ENETDOWN; 1695 1696 if (queue_id >= priv->rx_cfg.num_queues || !priv->xdp_prog) 1697 return -EINVAL; 1698 1699 napi = &priv->ntfy_blocks[gve_rx_idx_to_ntfy(priv, queue_id)].napi; 1700 if (!napi_if_scheduled_mark_missed(napi)) { 1701 /* Call local_bh_enable to trigger SoftIRQ processing */ 1702 local_bh_disable(); 1703 napi_schedule(napi); 1704 local_bh_enable(); 1705 } 1706 1707 return 0; 1708 } 1709 1710 static int verify_xdp_configuration(struct net_device *dev) 1711 { 1712 struct gve_priv *priv = netdev_priv(dev); 1713 u16 max_xdp_mtu; 1714 1715 if (dev->features & NETIF_F_LRO) { 1716 netdev_warn(dev, "XDP is not supported when LRO is on.\n"); 1717 return -EOPNOTSUPP; 1718 } 1719 1720 if (priv->header_split_enabled) { 1721 netdev_warn(dev, "XDP is not supported when header-data split is enabled.\n"); 1722 return -EOPNOTSUPP; 1723 } 1724 1725 max_xdp_mtu = priv->rx_cfg.packet_buffer_size - sizeof(struct ethhdr); 1726 if (priv->queue_format == GVE_GQI_QPL_FORMAT) 1727 max_xdp_mtu -= GVE_RX_PAD; 1728 1729 if (dev->mtu > max_xdp_mtu) { 1730 netdev_warn(dev, "XDP is not supported for mtu %d.\n", 1731 dev->mtu); 1732 return -EOPNOTSUPP; 1733 } 1734 1735 if (priv->rx_cfg.num_queues != priv->tx_cfg.num_queues || 1736 (2 * priv->tx_cfg.num_queues > priv->tx_cfg.max_queues)) { 1737 netdev_warn(dev, "XDP load failed: The number of configured RX queues %d should be equal to the number of configured TX queues %d and the number of configured RX/TX queues should be less than or equal to half the maximum number of RX/TX queues %d", 1738 priv->rx_cfg.num_queues, 1739 priv->tx_cfg.num_queues, 1740 priv->tx_cfg.max_queues); 1741 return -EINVAL; 1742 } 1743 return 0; 1744 } 1745 1746 static int gve_xdp(struct net_device *dev, struct netdev_bpf *xdp) 1747 { 1748 struct gve_priv *priv = netdev_priv(dev); 1749 int err; 1750 1751 err = verify_xdp_configuration(dev); 1752 if (err) 1753 return err; 1754 switch (xdp->command) { 1755 case XDP_SETUP_PROG: 1756 return gve_set_xdp(priv, xdp->prog, xdp->extack); 1757 case XDP_SETUP_XSK_POOL: 1758 if (xdp->xsk.pool) 1759 return gve_xsk_pool_enable(dev, xdp->xsk.pool, xdp->xsk.queue_id); 1760 else 1761 return gve_xsk_pool_disable(dev, xdp->xsk.queue_id); 1762 default: 1763 return -EINVAL; 1764 } 1765 } 1766 1767 int gve_init_rss_config(struct gve_priv *priv, u16 num_queues) 1768 { 1769 struct gve_rss_config *rss_config = &priv->rss_config; 1770 struct ethtool_rxfh_param rxfh = {0}; 1771 u16 i; 1772 1773 if (!priv->cache_rss_config) 1774 return 0; 1775 1776 for (i = 0; i < priv->rss_lut_size; i++) 1777 rss_config->hash_lut[i] = 1778 ethtool_rxfh_indir_default(i, num_queues); 1779 1780 netdev_rss_key_fill(rss_config->hash_key, priv->rss_key_size); 1781 1782 rxfh.hfunc = ETH_RSS_HASH_TOP; 1783 1784 return gve_adminq_configure_rss(priv, &rxfh); 1785 } 1786 1787 int gve_flow_rules_reset(struct gve_priv *priv) 1788 { 1789 if (!priv->max_flow_rules) 1790 return 0; 1791 1792 return gve_adminq_reset_flow_rules(priv); 1793 } 1794 1795 int gve_adjust_config(struct gve_priv *priv, 1796 struct gve_tx_alloc_rings_cfg *tx_alloc_cfg, 1797 struct gve_rx_alloc_rings_cfg *rx_alloc_cfg) 1798 { 1799 int err; 1800 1801 /* Allocate resources for the new configuration */ 1802 err = gve_queues_mem_alloc(priv, tx_alloc_cfg, rx_alloc_cfg); 1803 if (err) { 1804 netif_err(priv, drv, priv->dev, 1805 "Adjust config failed to alloc new queues"); 1806 return err; 1807 } 1808 1809 /* Teardown the device and free existing resources */ 1810 err = gve_close(priv->dev); 1811 if (err) { 1812 netif_err(priv, drv, priv->dev, 1813 "Adjust config failed to close old queues"); 1814 gve_queues_mem_free(priv, tx_alloc_cfg, rx_alloc_cfg); 1815 return err; 1816 } 1817 1818 /* Bring the device back up again with the new resources. */ 1819 err = gve_queues_start(priv, tx_alloc_cfg, rx_alloc_cfg); 1820 if (err) { 1821 netif_err(priv, drv, priv->dev, 1822 "Adjust config failed to start new queues, !!! DISABLING ALL QUEUES !!!\n"); 1823 /* No need to free on error: ownership of resources is lost after 1824 * calling gve_queues_start. 1825 */ 1826 gve_turndown(priv); 1827 return err; 1828 } 1829 1830 return 0; 1831 } 1832 1833 int gve_adjust_queues(struct gve_priv *priv, 1834 struct gve_rx_queue_config new_rx_config, 1835 struct gve_tx_queue_config new_tx_config, 1836 bool reset_rss) 1837 { 1838 struct gve_tx_alloc_rings_cfg tx_alloc_cfg = {0}; 1839 struct gve_rx_alloc_rings_cfg rx_alloc_cfg = {0}; 1840 int err; 1841 1842 gve_get_curr_alloc_cfgs(priv, &tx_alloc_cfg, &rx_alloc_cfg); 1843 1844 /* Relay the new config from ethtool */ 1845 tx_alloc_cfg.qcfg = &new_tx_config; 1846 rx_alloc_cfg.qcfg_tx = &new_tx_config; 1847 rx_alloc_cfg.qcfg_rx = &new_rx_config; 1848 rx_alloc_cfg.reset_rss = reset_rss; 1849 1850 if (netif_running(priv->dev)) { 1851 err = gve_adjust_config(priv, &tx_alloc_cfg, &rx_alloc_cfg); 1852 return err; 1853 } 1854 /* Set the config for the next up. */ 1855 if (reset_rss) { 1856 err = gve_init_rss_config(priv, new_rx_config.num_queues); 1857 if (err) 1858 return err; 1859 } 1860 priv->tx_cfg = new_tx_config; 1861 priv->rx_cfg = new_rx_config; 1862 1863 return 0; 1864 } 1865 1866 static void gve_turndown(struct gve_priv *priv) 1867 { 1868 int idx; 1869 1870 if (netif_carrier_ok(priv->dev)) 1871 netif_carrier_off(priv->dev); 1872 1873 if (!gve_get_napi_enabled(priv)) 1874 return; 1875 1876 /* Disable napi to prevent more work from coming in */ 1877 for (idx = 0; idx < gve_num_tx_queues(priv); idx++) { 1878 int ntfy_idx = gve_tx_idx_to_ntfy(priv, idx); 1879 struct gve_notify_block *block = &priv->ntfy_blocks[ntfy_idx]; 1880 1881 if (!gve_tx_was_added_to_block(priv, idx)) 1882 continue; 1883 1884 if (idx < priv->tx_cfg.num_queues) 1885 netif_queue_set_napi(priv->dev, idx, 1886 NETDEV_QUEUE_TYPE_TX, NULL); 1887 1888 napi_disable_locked(&block->napi); 1889 } 1890 for (idx = 0; idx < priv->rx_cfg.num_queues; idx++) { 1891 int ntfy_idx = gve_rx_idx_to_ntfy(priv, idx); 1892 struct gve_notify_block *block = &priv->ntfy_blocks[ntfy_idx]; 1893 1894 if (!gve_rx_was_added_to_block(priv, idx)) 1895 continue; 1896 1897 netif_queue_set_napi(priv->dev, idx, NETDEV_QUEUE_TYPE_RX, 1898 NULL); 1899 napi_disable_locked(&block->napi); 1900 } 1901 1902 /* Stop tx queues */ 1903 netif_tx_disable(priv->dev); 1904 1905 xdp_features_clear_redirect_target_locked(priv->dev); 1906 1907 gve_clear_napi_enabled(priv); 1908 gve_clear_report_stats(priv); 1909 1910 /* Make sure that all traffic is finished processing. */ 1911 synchronize_net(); 1912 } 1913 1914 static void gve_turnup(struct gve_priv *priv) 1915 { 1916 int idx; 1917 1918 /* Start the tx queues */ 1919 netif_tx_start_all_queues(priv->dev); 1920 1921 /* Enable napi and unmask interrupts for all queues */ 1922 for (idx = 0; idx < gve_num_tx_queues(priv); idx++) { 1923 int ntfy_idx = gve_tx_idx_to_ntfy(priv, idx); 1924 struct gve_notify_block *block = &priv->ntfy_blocks[ntfy_idx]; 1925 1926 if (!gve_tx_was_added_to_block(priv, idx)) 1927 continue; 1928 1929 napi_enable_locked(&block->napi); 1930 1931 if (idx < priv->tx_cfg.num_queues) 1932 netif_queue_set_napi(priv->dev, idx, 1933 NETDEV_QUEUE_TYPE_TX, 1934 &block->napi); 1935 1936 if (gve_is_gqi(priv)) { 1937 iowrite32be(0, gve_irq_doorbell(priv, block)); 1938 } else { 1939 gve_set_itr_coalesce_usecs_dqo(priv, block, 1940 priv->tx_coalesce_usecs); 1941 } 1942 1943 /* Any descs written by the NIC before this barrier will be 1944 * handled by the one-off napi schedule below. Whereas any 1945 * descs after the barrier will generate interrupts. 1946 */ 1947 mb(); 1948 napi_schedule(&block->napi); 1949 } 1950 for (idx = 0; idx < priv->rx_cfg.num_queues; idx++) { 1951 int ntfy_idx = gve_rx_idx_to_ntfy(priv, idx); 1952 struct gve_notify_block *block = &priv->ntfy_blocks[ntfy_idx]; 1953 1954 if (!gve_rx_was_added_to_block(priv, idx)) 1955 continue; 1956 1957 napi_enable_locked(&block->napi); 1958 netif_queue_set_napi(priv->dev, idx, NETDEV_QUEUE_TYPE_RX, 1959 &block->napi); 1960 1961 if (gve_is_gqi(priv)) { 1962 iowrite32be(0, gve_irq_doorbell(priv, block)); 1963 } else { 1964 gve_set_itr_coalesce_usecs_dqo(priv, block, 1965 priv->rx_coalesce_usecs); 1966 } 1967 1968 /* Any descs written by the NIC before this barrier will be 1969 * handled by the one-off napi schedule below. Whereas any 1970 * descs after the barrier will generate interrupts. 1971 */ 1972 mb(); 1973 napi_schedule(&block->napi); 1974 } 1975 1976 if (priv->tx_cfg.num_xdp_queues && gve_supports_xdp_xmit(priv)) 1977 xdp_features_set_redirect_target_locked(priv->dev, false); 1978 1979 gve_set_napi_enabled(priv); 1980 } 1981 1982 static void gve_turnup_and_check_status(struct gve_priv *priv) 1983 { 1984 u32 status; 1985 1986 gve_turnup(priv); 1987 status = ioread32be(&priv->reg_bar0->device_status); 1988 gve_handle_link_status(priv, GVE_DEVICE_STATUS_LINK_STATUS_MASK & status); 1989 } 1990 1991 static struct gve_notify_block *gve_get_tx_notify_block(struct gve_priv *priv, 1992 unsigned int txqueue) 1993 { 1994 u32 ntfy_idx; 1995 1996 if (txqueue > priv->tx_cfg.num_queues) 1997 return NULL; 1998 1999 ntfy_idx = gve_tx_idx_to_ntfy(priv, txqueue); 2000 if (ntfy_idx >= priv->num_ntfy_blks) 2001 return NULL; 2002 2003 return &priv->ntfy_blocks[ntfy_idx]; 2004 } 2005 2006 static bool gve_tx_timeout_try_q_kick(struct gve_priv *priv, 2007 unsigned int txqueue) 2008 { 2009 struct gve_notify_block *block; 2010 u32 current_time; 2011 2012 block = gve_get_tx_notify_block(priv, txqueue); 2013 2014 if (!block) 2015 return false; 2016 2017 current_time = jiffies_to_msecs(jiffies); 2018 if (block->tx->last_kick_msec + MIN_TX_TIMEOUT_GAP > current_time) 2019 return false; 2020 2021 netdev_info(priv->dev, "Kicking queue %d", txqueue); 2022 napi_schedule(&block->napi); 2023 block->tx->last_kick_msec = current_time; 2024 return true; 2025 } 2026 2027 static void gve_tx_timeout(struct net_device *dev, unsigned int txqueue) 2028 { 2029 struct gve_notify_block *block; 2030 struct gve_priv *priv; 2031 2032 netdev_info(dev, "Timeout on tx queue, %d", txqueue); 2033 priv = netdev_priv(dev); 2034 2035 if (!gve_tx_timeout_try_q_kick(priv, txqueue)) 2036 gve_schedule_reset(priv); 2037 2038 block = gve_get_tx_notify_block(priv, txqueue); 2039 if (block) 2040 block->tx->queue_timeout++; 2041 priv->tx_timeo_cnt++; 2042 } 2043 2044 u16 gve_get_pkt_buf_size(const struct gve_priv *priv, bool enable_hsplit) 2045 { 2046 if (enable_hsplit && priv->max_rx_buffer_size >= GVE_MAX_RX_BUFFER_SIZE) 2047 return GVE_MAX_RX_BUFFER_SIZE; 2048 else 2049 return GVE_DEFAULT_RX_BUFFER_SIZE; 2050 } 2051 2052 /* Header split is only supported on DQ RDA queue format. If XDP is enabled, 2053 * header split is not allowed. 2054 */ 2055 bool gve_header_split_supported(const struct gve_priv *priv) 2056 { 2057 return priv->header_buf_size && 2058 priv->queue_format == GVE_DQO_RDA_FORMAT && !priv->xdp_prog; 2059 } 2060 2061 int gve_set_hsplit_config(struct gve_priv *priv, u8 tcp_data_split) 2062 { 2063 struct gve_tx_alloc_rings_cfg tx_alloc_cfg = {0}; 2064 struct gve_rx_alloc_rings_cfg rx_alloc_cfg = {0}; 2065 bool enable_hdr_split; 2066 int err = 0; 2067 2068 if (tcp_data_split == ETHTOOL_TCP_DATA_SPLIT_UNKNOWN) 2069 return 0; 2070 2071 if (!gve_header_split_supported(priv)) { 2072 dev_err(&priv->pdev->dev, "Header-split not supported\n"); 2073 return -EOPNOTSUPP; 2074 } 2075 2076 if (tcp_data_split == ETHTOOL_TCP_DATA_SPLIT_ENABLED) 2077 enable_hdr_split = true; 2078 else 2079 enable_hdr_split = false; 2080 2081 if (enable_hdr_split == priv->header_split_enabled) 2082 return 0; 2083 2084 gve_get_curr_alloc_cfgs(priv, &tx_alloc_cfg, &rx_alloc_cfg); 2085 2086 rx_alloc_cfg.enable_header_split = enable_hdr_split; 2087 rx_alloc_cfg.packet_buffer_size = gve_get_pkt_buf_size(priv, enable_hdr_split); 2088 2089 if (netif_running(priv->dev)) 2090 err = gve_adjust_config(priv, &tx_alloc_cfg, &rx_alloc_cfg); 2091 return err; 2092 } 2093 2094 static int gve_set_features(struct net_device *netdev, 2095 netdev_features_t features) 2096 { 2097 const netdev_features_t orig_features = netdev->features; 2098 struct gve_tx_alloc_rings_cfg tx_alloc_cfg = {0}; 2099 struct gve_rx_alloc_rings_cfg rx_alloc_cfg = {0}; 2100 struct gve_priv *priv = netdev_priv(netdev); 2101 int err; 2102 2103 gve_get_curr_alloc_cfgs(priv, &tx_alloc_cfg, &rx_alloc_cfg); 2104 2105 if ((netdev->features & NETIF_F_LRO) != (features & NETIF_F_LRO)) { 2106 netdev->features ^= NETIF_F_LRO; 2107 if (priv->xdp_prog && (netdev->features & NETIF_F_LRO)) { 2108 netdev_warn(netdev, 2109 "XDP is not supported when LRO is on.\n"); 2110 err = -EOPNOTSUPP; 2111 goto revert_features; 2112 } 2113 if (netif_running(netdev)) { 2114 err = gve_adjust_config(priv, &tx_alloc_cfg, &rx_alloc_cfg); 2115 if (err) 2116 goto revert_features; 2117 } 2118 } 2119 if ((netdev->features & NETIF_F_NTUPLE) && !(features & NETIF_F_NTUPLE)) { 2120 err = gve_flow_rules_reset(priv); 2121 if (err) 2122 goto revert_features; 2123 } 2124 2125 return 0; 2126 2127 revert_features: 2128 netdev->features = orig_features; 2129 return err; 2130 } 2131 2132 static int gve_get_ts_config(struct net_device *dev, 2133 struct kernel_hwtstamp_config *kernel_config) 2134 { 2135 struct gve_priv *priv = netdev_priv(dev); 2136 2137 *kernel_config = priv->ts_config; 2138 return 0; 2139 } 2140 2141 static int gve_set_ts_config(struct net_device *dev, 2142 struct kernel_hwtstamp_config *kernel_config, 2143 struct netlink_ext_ack *extack) 2144 { 2145 struct gve_priv *priv = netdev_priv(dev); 2146 2147 if (kernel_config->tx_type != HWTSTAMP_TX_OFF) { 2148 NL_SET_ERR_MSG_MOD(extack, "TX timestamping is not supported"); 2149 return -ERANGE; 2150 } 2151 2152 if (kernel_config->rx_filter != HWTSTAMP_FILTER_NONE) { 2153 if (!priv->nic_ts_report) { 2154 NL_SET_ERR_MSG_MOD(extack, 2155 "RX timestamping is not supported"); 2156 kernel_config->rx_filter = HWTSTAMP_FILTER_NONE; 2157 return -EOPNOTSUPP; 2158 } 2159 2160 kernel_config->rx_filter = HWTSTAMP_FILTER_ALL; 2161 gve_clock_nic_ts_read(priv); 2162 ptp_schedule_worker(priv->ptp->clock, 0); 2163 } else { 2164 ptp_cancel_worker_sync(priv->ptp->clock); 2165 } 2166 2167 priv->ts_config.rx_filter = kernel_config->rx_filter; 2168 2169 return 0; 2170 } 2171 2172 static const struct net_device_ops gve_netdev_ops = { 2173 .ndo_start_xmit = gve_start_xmit, 2174 .ndo_features_check = gve_features_check, 2175 .ndo_open = gve_open, 2176 .ndo_stop = gve_close, 2177 .ndo_get_stats64 = gve_get_stats, 2178 .ndo_tx_timeout = gve_tx_timeout, 2179 .ndo_set_features = gve_set_features, 2180 .ndo_bpf = gve_xdp, 2181 .ndo_xdp_xmit = gve_xdp_xmit, 2182 .ndo_xsk_wakeup = gve_xsk_wakeup, 2183 .ndo_hwtstamp_get = gve_get_ts_config, 2184 .ndo_hwtstamp_set = gve_set_ts_config, 2185 }; 2186 2187 static void gve_handle_status(struct gve_priv *priv, u32 status) 2188 { 2189 if (GVE_DEVICE_STATUS_RESET_MASK & status) { 2190 dev_info(&priv->pdev->dev, "Device requested reset.\n"); 2191 gve_set_do_reset(priv); 2192 } 2193 if (GVE_DEVICE_STATUS_REPORT_STATS_MASK & status) { 2194 priv->stats_report_trigger_cnt++; 2195 gve_set_do_report_stats(priv); 2196 } 2197 } 2198 2199 static void gve_handle_reset(struct gve_priv *priv) 2200 { 2201 /* A service task will be scheduled at the end of probe to catch any 2202 * resets that need to happen, and we don't want to reset until 2203 * probe is done. 2204 */ 2205 if (gve_get_probe_in_progress(priv)) 2206 return; 2207 2208 if (gve_get_do_reset(priv)) { 2209 rtnl_lock(); 2210 netdev_lock(priv->dev); 2211 gve_reset(priv, false); 2212 netdev_unlock(priv->dev); 2213 rtnl_unlock(); 2214 } 2215 } 2216 2217 void gve_handle_report_stats(struct gve_priv *priv) 2218 { 2219 struct stats *stats = priv->stats_report->stats; 2220 int idx, stats_idx = 0; 2221 unsigned int start = 0; 2222 u64 tx_bytes; 2223 2224 if (!gve_get_report_stats(priv)) 2225 return; 2226 2227 be64_add_cpu(&priv->stats_report->written_count, 1); 2228 /* tx stats */ 2229 if (priv->tx) { 2230 for (idx = 0; idx < gve_num_tx_queues(priv); idx++) { 2231 u32 last_completion = 0; 2232 u32 tx_frames = 0; 2233 2234 /* DQO doesn't currently support these metrics. */ 2235 if (gve_is_gqi(priv)) { 2236 last_completion = priv->tx[idx].done; 2237 tx_frames = priv->tx[idx].req; 2238 } 2239 2240 do { 2241 start = u64_stats_fetch_begin(&priv->tx[idx].statss); 2242 tx_bytes = priv->tx[idx].bytes_done; 2243 } while (u64_stats_fetch_retry(&priv->tx[idx].statss, start)); 2244 stats[stats_idx++] = (struct stats) { 2245 .stat_name = cpu_to_be32(TX_WAKE_CNT), 2246 .value = cpu_to_be64(priv->tx[idx].wake_queue), 2247 .queue_id = cpu_to_be32(idx), 2248 }; 2249 stats[stats_idx++] = (struct stats) { 2250 .stat_name = cpu_to_be32(TX_STOP_CNT), 2251 .value = cpu_to_be64(priv->tx[idx].stop_queue), 2252 .queue_id = cpu_to_be32(idx), 2253 }; 2254 stats[stats_idx++] = (struct stats) { 2255 .stat_name = cpu_to_be32(TX_FRAMES_SENT), 2256 .value = cpu_to_be64(tx_frames), 2257 .queue_id = cpu_to_be32(idx), 2258 }; 2259 stats[stats_idx++] = (struct stats) { 2260 .stat_name = cpu_to_be32(TX_BYTES_SENT), 2261 .value = cpu_to_be64(tx_bytes), 2262 .queue_id = cpu_to_be32(idx), 2263 }; 2264 stats[stats_idx++] = (struct stats) { 2265 .stat_name = cpu_to_be32(TX_LAST_COMPLETION_PROCESSED), 2266 .value = cpu_to_be64(last_completion), 2267 .queue_id = cpu_to_be32(idx), 2268 }; 2269 stats[stats_idx++] = (struct stats) { 2270 .stat_name = cpu_to_be32(TX_TIMEOUT_CNT), 2271 .value = cpu_to_be64(priv->tx[idx].queue_timeout), 2272 .queue_id = cpu_to_be32(idx), 2273 }; 2274 } 2275 } 2276 /* rx stats */ 2277 if (priv->rx) { 2278 for (idx = 0; idx < priv->rx_cfg.num_queues; idx++) { 2279 stats[stats_idx++] = (struct stats) { 2280 .stat_name = cpu_to_be32(RX_NEXT_EXPECTED_SEQUENCE), 2281 .value = cpu_to_be64(priv->rx[idx].desc.seqno), 2282 .queue_id = cpu_to_be32(idx), 2283 }; 2284 stats[stats_idx++] = (struct stats) { 2285 .stat_name = cpu_to_be32(RX_BUFFERS_POSTED), 2286 .value = cpu_to_be64(priv->rx[idx].fill_cnt), 2287 .queue_id = cpu_to_be32(idx), 2288 }; 2289 } 2290 } 2291 } 2292 2293 /* Handle NIC status register changes, reset requests and report stats */ 2294 static void gve_service_task(struct work_struct *work) 2295 { 2296 struct gve_priv *priv = container_of(work, struct gve_priv, 2297 service_task); 2298 u32 status = ioread32be(&priv->reg_bar0->device_status); 2299 2300 gve_handle_status(priv, status); 2301 2302 gve_handle_reset(priv); 2303 gve_handle_link_status(priv, GVE_DEVICE_STATUS_LINK_STATUS_MASK & status); 2304 } 2305 2306 static void gve_set_netdev_xdp_features(struct gve_priv *priv) 2307 { 2308 xdp_features_t xdp_features; 2309 2310 if (priv->queue_format == GVE_GQI_QPL_FORMAT) { 2311 xdp_features = NETDEV_XDP_ACT_BASIC; 2312 xdp_features |= NETDEV_XDP_ACT_REDIRECT; 2313 xdp_features |= NETDEV_XDP_ACT_XSK_ZEROCOPY; 2314 } else if (priv->queue_format == GVE_DQO_RDA_FORMAT) { 2315 xdp_features = NETDEV_XDP_ACT_BASIC; 2316 xdp_features |= NETDEV_XDP_ACT_REDIRECT; 2317 xdp_features |= NETDEV_XDP_ACT_XSK_ZEROCOPY; 2318 } else { 2319 xdp_features = 0; 2320 } 2321 2322 xdp_set_features_flag_locked(priv->dev, xdp_features); 2323 } 2324 2325 static int gve_init_priv(struct gve_priv *priv, bool skip_describe_device) 2326 { 2327 int num_ntfy; 2328 int err; 2329 2330 /* Set up the adminq */ 2331 err = gve_adminq_alloc(&priv->pdev->dev, priv); 2332 if (err) { 2333 dev_err(&priv->pdev->dev, 2334 "Failed to alloc admin queue: err=%d\n", err); 2335 return err; 2336 } 2337 2338 err = gve_verify_driver_compatibility(priv); 2339 if (err) { 2340 dev_err(&priv->pdev->dev, 2341 "Could not verify driver compatibility: err=%d\n", err); 2342 goto err; 2343 } 2344 2345 priv->num_registered_pages = 0; 2346 2347 if (skip_describe_device) 2348 goto setup_device; 2349 2350 priv->queue_format = GVE_QUEUE_FORMAT_UNSPECIFIED; 2351 /* Get the initial information we need from the device */ 2352 err = gve_adminq_describe_device(priv); 2353 if (err) { 2354 dev_err(&priv->pdev->dev, 2355 "Could not get device information: err=%d\n", err); 2356 goto err; 2357 } 2358 priv->dev->mtu = priv->dev->max_mtu; 2359 num_ntfy = pci_msix_vec_count(priv->pdev); 2360 if (num_ntfy <= 0) { 2361 dev_err(&priv->pdev->dev, 2362 "could not count MSI-x vectors: err=%d\n", num_ntfy); 2363 err = num_ntfy; 2364 goto err; 2365 } else if (num_ntfy < GVE_MIN_MSIX) { 2366 dev_err(&priv->pdev->dev, "gve needs at least %d MSI-x vectors, but only has %d\n", 2367 GVE_MIN_MSIX, num_ntfy); 2368 err = -EINVAL; 2369 goto err; 2370 } 2371 2372 /* Big TCP is only supported on DQO */ 2373 if (!gve_is_gqi(priv)) 2374 netif_set_tso_max_size(priv->dev, GVE_DQO_TX_MAX); 2375 2376 priv->rx_copybreak = GVE_DEFAULT_RX_COPYBREAK; 2377 /* gvnic has one Notification Block per MSI-x vector, except for the 2378 * management vector 2379 */ 2380 priv->num_ntfy_blks = (num_ntfy - 1) & ~0x1; 2381 priv->mgmt_msix_idx = priv->num_ntfy_blks; 2382 priv->numa_node = dev_to_node(&priv->pdev->dev); 2383 2384 priv->tx_cfg.max_queues = 2385 min_t(int, priv->tx_cfg.max_queues, priv->num_ntfy_blks / 2); 2386 priv->rx_cfg.max_queues = 2387 min_t(int, priv->rx_cfg.max_queues, priv->num_ntfy_blks / 2); 2388 2389 priv->tx_cfg.num_queues = priv->tx_cfg.max_queues; 2390 priv->rx_cfg.num_queues = priv->rx_cfg.max_queues; 2391 if (priv->default_num_queues > 0) { 2392 priv->tx_cfg.num_queues = min_t(int, priv->default_num_queues, 2393 priv->tx_cfg.num_queues); 2394 priv->rx_cfg.num_queues = min_t(int, priv->default_num_queues, 2395 priv->rx_cfg.num_queues); 2396 } 2397 priv->tx_cfg.num_xdp_queues = 0; 2398 2399 dev_info(&priv->pdev->dev, "TX queues %d, RX queues %d\n", 2400 priv->tx_cfg.num_queues, priv->rx_cfg.num_queues); 2401 dev_info(&priv->pdev->dev, "Max TX queues %d, Max RX queues %d\n", 2402 priv->tx_cfg.max_queues, priv->rx_cfg.max_queues); 2403 2404 if (!gve_is_gqi(priv)) { 2405 priv->tx_coalesce_usecs = GVE_TX_IRQ_RATELIMIT_US_DQO; 2406 priv->rx_coalesce_usecs = GVE_RX_IRQ_RATELIMIT_US_DQO; 2407 } 2408 2409 priv->ts_config.tx_type = HWTSTAMP_TX_OFF; 2410 priv->ts_config.rx_filter = HWTSTAMP_FILTER_NONE; 2411 2412 setup_device: 2413 priv->xsk_pools = bitmap_zalloc(priv->rx_cfg.max_queues, GFP_KERNEL); 2414 if (!priv->xsk_pools) { 2415 err = -ENOMEM; 2416 goto err; 2417 } 2418 2419 gve_set_netdev_xdp_features(priv); 2420 err = gve_setup_device_resources(priv); 2421 if (err) 2422 goto err_free_xsk_bitmap; 2423 2424 return 0; 2425 2426 err_free_xsk_bitmap: 2427 bitmap_free(priv->xsk_pools); 2428 priv->xsk_pools = NULL; 2429 err: 2430 gve_adminq_free(&priv->pdev->dev, priv); 2431 return err; 2432 } 2433 2434 static void gve_teardown_priv_resources(struct gve_priv *priv) 2435 { 2436 gve_teardown_device_resources(priv); 2437 gve_adminq_free(&priv->pdev->dev, priv); 2438 bitmap_free(priv->xsk_pools); 2439 priv->xsk_pools = NULL; 2440 } 2441 2442 static void gve_trigger_reset(struct gve_priv *priv) 2443 { 2444 /* Reset the device by releasing the AQ */ 2445 gve_adminq_release(priv); 2446 } 2447 2448 static void gve_reset_and_teardown(struct gve_priv *priv, bool was_up) 2449 { 2450 gve_trigger_reset(priv); 2451 /* With the reset having already happened, close cannot fail */ 2452 if (was_up) 2453 gve_close(priv->dev); 2454 gve_teardown_priv_resources(priv); 2455 } 2456 2457 static int gve_reset_recovery(struct gve_priv *priv, bool was_up) 2458 { 2459 int err; 2460 2461 err = gve_init_priv(priv, true); 2462 if (err) 2463 goto err; 2464 if (was_up) { 2465 err = gve_open(priv->dev); 2466 if (err) 2467 goto err; 2468 } 2469 return 0; 2470 err: 2471 dev_err(&priv->pdev->dev, "Reset failed! !!! DISABLING ALL QUEUES !!!\n"); 2472 gve_turndown(priv); 2473 return err; 2474 } 2475 2476 int gve_reset(struct gve_priv *priv, bool attempt_teardown) 2477 { 2478 bool was_up = netif_running(priv->dev); 2479 int err; 2480 2481 dev_info(&priv->pdev->dev, "Performing reset\n"); 2482 gve_clear_do_reset(priv); 2483 gve_set_reset_in_progress(priv); 2484 /* If we aren't attempting to teardown normally, just go turndown and 2485 * reset right away. 2486 */ 2487 if (!attempt_teardown) { 2488 gve_turndown(priv); 2489 gve_reset_and_teardown(priv, was_up); 2490 } else { 2491 /* Otherwise attempt to close normally */ 2492 if (was_up) { 2493 err = gve_close(priv->dev); 2494 /* If that fails reset as we did above */ 2495 if (err) 2496 gve_reset_and_teardown(priv, was_up); 2497 } 2498 /* Clean up any remaining resources */ 2499 gve_teardown_priv_resources(priv); 2500 } 2501 2502 /* Set it all back up */ 2503 err = gve_reset_recovery(priv, was_up); 2504 gve_clear_reset_in_progress(priv); 2505 priv->reset_cnt++; 2506 priv->interface_up_cnt = 0; 2507 priv->interface_down_cnt = 0; 2508 priv->stats_report_trigger_cnt = 0; 2509 return err; 2510 } 2511 2512 static void gve_write_version(u8 __iomem *driver_version_register) 2513 { 2514 const char *c = gve_version_prefix; 2515 2516 while (*c) { 2517 writeb(*c, driver_version_register); 2518 c++; 2519 } 2520 2521 c = gve_version_str; 2522 while (*c) { 2523 writeb(*c, driver_version_register); 2524 c++; 2525 } 2526 writeb('\n', driver_version_register); 2527 } 2528 2529 static int gve_rx_queue_stop(struct net_device *dev, void *per_q_mem, int idx) 2530 { 2531 struct gve_priv *priv = netdev_priv(dev); 2532 struct gve_rx_ring *gve_per_q_mem; 2533 int err; 2534 2535 if (!priv->rx) 2536 return -EAGAIN; 2537 2538 /* Destroying queue 0 while other queues exist is not supported in DQO */ 2539 if (!gve_is_gqi(priv) && idx == 0) 2540 return -ERANGE; 2541 2542 /* Single-queue destruction requires quiescence on all queues */ 2543 gve_turndown(priv); 2544 2545 /* This failure will trigger a reset - no need to clean up */ 2546 err = gve_adminq_destroy_single_rx_queue(priv, idx); 2547 if (err) 2548 return err; 2549 2550 if (gve_is_qpl(priv)) { 2551 /* This failure will trigger a reset - no need to clean up */ 2552 err = gve_unregister_qpl(priv, gve_rx_get_qpl(priv, idx)); 2553 if (err) 2554 return err; 2555 } 2556 2557 gve_rx_stop_ring(priv, idx); 2558 2559 /* Turn the unstopped queues back up */ 2560 gve_turnup_and_check_status(priv); 2561 2562 gve_per_q_mem = (struct gve_rx_ring *)per_q_mem; 2563 *gve_per_q_mem = priv->rx[idx]; 2564 memset(&priv->rx[idx], 0, sizeof(priv->rx[idx])); 2565 return 0; 2566 } 2567 2568 static void gve_rx_queue_mem_free(struct net_device *dev, void *per_q_mem) 2569 { 2570 struct gve_priv *priv = netdev_priv(dev); 2571 struct gve_rx_alloc_rings_cfg cfg = {0}; 2572 struct gve_rx_ring *gve_per_q_mem; 2573 2574 gve_per_q_mem = (struct gve_rx_ring *)per_q_mem; 2575 gve_rx_get_curr_alloc_cfg(priv, &cfg); 2576 2577 if (gve_is_gqi(priv)) 2578 gve_rx_free_ring_gqi(priv, gve_per_q_mem, &cfg); 2579 else 2580 gve_rx_free_ring_dqo(priv, gve_per_q_mem, &cfg); 2581 } 2582 2583 static int gve_rx_queue_mem_alloc(struct net_device *dev, void *per_q_mem, 2584 int idx) 2585 { 2586 struct gve_priv *priv = netdev_priv(dev); 2587 struct gve_rx_alloc_rings_cfg cfg = {0}; 2588 struct gve_rx_ring *gve_per_q_mem; 2589 int err; 2590 2591 if (!priv->rx) 2592 return -EAGAIN; 2593 2594 gve_per_q_mem = (struct gve_rx_ring *)per_q_mem; 2595 gve_rx_get_curr_alloc_cfg(priv, &cfg); 2596 2597 if (gve_is_gqi(priv)) 2598 err = gve_rx_alloc_ring_gqi(priv, &cfg, gve_per_q_mem, idx); 2599 else 2600 err = gve_rx_alloc_ring_dqo(priv, &cfg, gve_per_q_mem, idx); 2601 2602 return err; 2603 } 2604 2605 static int gve_rx_queue_start(struct net_device *dev, void *per_q_mem, int idx) 2606 { 2607 struct gve_priv *priv = netdev_priv(dev); 2608 struct gve_rx_ring *gve_per_q_mem; 2609 int err; 2610 2611 if (!priv->rx) 2612 return -EAGAIN; 2613 2614 gve_per_q_mem = (struct gve_rx_ring *)per_q_mem; 2615 priv->rx[idx] = *gve_per_q_mem; 2616 2617 /* Single-queue creation requires quiescence on all queues */ 2618 gve_turndown(priv); 2619 2620 gve_rx_start_ring(priv, idx); 2621 2622 if (gve_is_qpl(priv)) { 2623 /* This failure will trigger a reset - no need to clean up */ 2624 err = gve_register_qpl(priv, gve_rx_get_qpl(priv, idx)); 2625 if (err) 2626 goto abort; 2627 } 2628 2629 /* This failure will trigger a reset - no need to clean up */ 2630 err = gve_adminq_create_single_rx_queue(priv, idx); 2631 if (err) 2632 goto abort; 2633 2634 if (gve_is_gqi(priv)) 2635 gve_rx_write_doorbell(priv, &priv->rx[idx]); 2636 else 2637 gve_rx_post_buffers_dqo(&priv->rx[idx]); 2638 2639 /* Turn the unstopped queues back up */ 2640 gve_turnup_and_check_status(priv); 2641 return 0; 2642 2643 abort: 2644 gve_rx_stop_ring(priv, idx); 2645 2646 /* All failures in this func result in a reset, by clearing the struct 2647 * at idx, we prevent a double free when that reset runs. The reset, 2648 * which needs the rtnl lock, will not run till this func returns and 2649 * its caller gives up the lock. 2650 */ 2651 memset(&priv->rx[idx], 0, sizeof(priv->rx[idx])); 2652 return err; 2653 } 2654 2655 static const struct netdev_queue_mgmt_ops gve_queue_mgmt_ops = { 2656 .ndo_queue_mem_size = sizeof(struct gve_rx_ring), 2657 .ndo_queue_mem_alloc = gve_rx_queue_mem_alloc, 2658 .ndo_queue_mem_free = gve_rx_queue_mem_free, 2659 .ndo_queue_start = gve_rx_queue_start, 2660 .ndo_queue_stop = gve_rx_queue_stop, 2661 }; 2662 2663 static void gve_get_rx_queue_stats(struct net_device *dev, int idx, 2664 struct netdev_queue_stats_rx *rx_stats) 2665 { 2666 struct gve_priv *priv = netdev_priv(dev); 2667 struct gve_rx_ring *rx = &priv->rx[idx]; 2668 unsigned int start; 2669 2670 do { 2671 start = u64_stats_fetch_begin(&rx->statss); 2672 rx_stats->packets = rx->rpackets; 2673 rx_stats->bytes = rx->rbytes; 2674 rx_stats->alloc_fail = rx->rx_skb_alloc_fail + 2675 rx->rx_buf_alloc_fail; 2676 } while (u64_stats_fetch_retry(&rx->statss, start)); 2677 } 2678 2679 static void gve_get_tx_queue_stats(struct net_device *dev, int idx, 2680 struct netdev_queue_stats_tx *tx_stats) 2681 { 2682 struct gve_priv *priv = netdev_priv(dev); 2683 struct gve_tx_ring *tx = &priv->tx[idx]; 2684 unsigned int start; 2685 2686 do { 2687 start = u64_stats_fetch_begin(&tx->statss); 2688 tx_stats->packets = tx->pkt_done; 2689 tx_stats->bytes = tx->bytes_done; 2690 } while (u64_stats_fetch_retry(&tx->statss, start)); 2691 } 2692 2693 static void gve_get_base_stats(struct net_device *dev, 2694 struct netdev_queue_stats_rx *rx, 2695 struct netdev_queue_stats_tx *tx) 2696 { 2697 rx->packets = 0; 2698 rx->bytes = 0; 2699 rx->alloc_fail = 0; 2700 2701 tx->packets = 0; 2702 tx->bytes = 0; 2703 } 2704 2705 static const struct netdev_stat_ops gve_stat_ops = { 2706 .get_queue_stats_rx = gve_get_rx_queue_stats, 2707 .get_queue_stats_tx = gve_get_tx_queue_stats, 2708 .get_base_stats = gve_get_base_stats, 2709 }; 2710 2711 static int gve_probe(struct pci_dev *pdev, const struct pci_device_id *ent) 2712 { 2713 int max_tx_queues, max_rx_queues; 2714 struct net_device *dev; 2715 __be32 __iomem *db_bar; 2716 struct gve_registers __iomem *reg_bar; 2717 struct gve_priv *priv; 2718 int err; 2719 2720 err = pci_enable_device(pdev); 2721 if (err) 2722 return err; 2723 2724 err = pci_request_regions(pdev, gve_driver_name); 2725 if (err) 2726 goto abort_with_enabled; 2727 2728 pci_set_master(pdev); 2729 2730 err = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(64)); 2731 if (err) { 2732 dev_err(&pdev->dev, "Failed to set dma mask: err=%d\n", err); 2733 goto abort_with_pci_region; 2734 } 2735 2736 reg_bar = pci_iomap(pdev, GVE_REGISTER_BAR, 0); 2737 if (!reg_bar) { 2738 dev_err(&pdev->dev, "Failed to map pci bar!\n"); 2739 err = -ENOMEM; 2740 goto abort_with_pci_region; 2741 } 2742 2743 db_bar = pci_iomap(pdev, GVE_DOORBELL_BAR, 0); 2744 if (!db_bar) { 2745 dev_err(&pdev->dev, "Failed to map doorbell bar!\n"); 2746 err = -ENOMEM; 2747 goto abort_with_reg_bar; 2748 } 2749 2750 gve_write_version(®_bar->driver_version); 2751 /* Get max queues to alloc etherdev */ 2752 max_tx_queues = ioread32be(®_bar->max_tx_queues); 2753 max_rx_queues = ioread32be(®_bar->max_rx_queues); 2754 /* Alloc and setup the netdev and priv */ 2755 dev = alloc_etherdev_mqs(sizeof(*priv), max_tx_queues, max_rx_queues); 2756 if (!dev) { 2757 dev_err(&pdev->dev, "could not allocate netdev\n"); 2758 err = -ENOMEM; 2759 goto abort_with_db_bar; 2760 } 2761 SET_NETDEV_DEV(dev, &pdev->dev); 2762 pci_set_drvdata(pdev, dev); 2763 dev->ethtool_ops = &gve_ethtool_ops; 2764 dev->netdev_ops = &gve_netdev_ops; 2765 dev->queue_mgmt_ops = &gve_queue_mgmt_ops; 2766 dev->stat_ops = &gve_stat_ops; 2767 2768 /* Set default and supported features. 2769 * 2770 * Features might be set in other locations as well (such as 2771 * `gve_adminq_describe_device`). 2772 */ 2773 dev->hw_features = NETIF_F_HIGHDMA; 2774 dev->hw_features |= NETIF_F_SG; 2775 dev->hw_features |= NETIF_F_HW_CSUM; 2776 dev->hw_features |= NETIF_F_TSO; 2777 dev->hw_features |= NETIF_F_TSO6; 2778 dev->hw_features |= NETIF_F_TSO_ECN; 2779 dev->hw_features |= NETIF_F_RXCSUM; 2780 dev->hw_features |= NETIF_F_RXHASH; 2781 dev->features = dev->hw_features; 2782 dev->watchdog_timeo = 5 * HZ; 2783 dev->min_mtu = ETH_MIN_MTU; 2784 netif_carrier_off(dev); 2785 2786 priv = netdev_priv(dev); 2787 priv->dev = dev; 2788 priv->pdev = pdev; 2789 priv->msg_enable = DEFAULT_MSG_LEVEL; 2790 priv->reg_bar0 = reg_bar; 2791 priv->db_bar2 = db_bar; 2792 priv->service_task_flags = 0x0; 2793 priv->state_flags = 0x0; 2794 priv->ethtool_flags = 0x0; 2795 priv->rx_cfg.packet_buffer_size = GVE_DEFAULT_RX_BUFFER_SIZE; 2796 priv->max_rx_buffer_size = GVE_DEFAULT_RX_BUFFER_SIZE; 2797 2798 gve_set_probe_in_progress(priv); 2799 priv->gve_wq = alloc_ordered_workqueue("gve", 0); 2800 if (!priv->gve_wq) { 2801 dev_err(&pdev->dev, "Could not allocate workqueue"); 2802 err = -ENOMEM; 2803 goto abort_with_netdev; 2804 } 2805 INIT_WORK(&priv->service_task, gve_service_task); 2806 INIT_WORK(&priv->stats_report_task, gve_stats_report_task); 2807 priv->tx_cfg.max_queues = max_tx_queues; 2808 priv->rx_cfg.max_queues = max_rx_queues; 2809 2810 err = gve_init_priv(priv, false); 2811 if (err) 2812 goto abort_with_wq; 2813 2814 if (!gve_is_gqi(priv) && !gve_is_qpl(priv)) 2815 dev->netmem_tx = true; 2816 2817 err = register_netdev(dev); 2818 if (err) 2819 goto abort_with_gve_init; 2820 2821 dev_info(&pdev->dev, "GVE version %s\n", gve_version_str); 2822 dev_info(&pdev->dev, "GVE queue format %d\n", (int)priv->queue_format); 2823 gve_clear_probe_in_progress(priv); 2824 queue_work(priv->gve_wq, &priv->service_task); 2825 return 0; 2826 2827 abort_with_gve_init: 2828 gve_teardown_priv_resources(priv); 2829 2830 abort_with_wq: 2831 destroy_workqueue(priv->gve_wq); 2832 2833 abort_with_netdev: 2834 free_netdev(dev); 2835 2836 abort_with_db_bar: 2837 pci_iounmap(pdev, db_bar); 2838 2839 abort_with_reg_bar: 2840 pci_iounmap(pdev, reg_bar); 2841 2842 abort_with_pci_region: 2843 pci_release_regions(pdev); 2844 2845 abort_with_enabled: 2846 pci_disable_device(pdev); 2847 return err; 2848 } 2849 2850 static void gve_remove(struct pci_dev *pdev) 2851 { 2852 struct net_device *netdev = pci_get_drvdata(pdev); 2853 struct gve_priv *priv = netdev_priv(netdev); 2854 __be32 __iomem *db_bar = priv->db_bar2; 2855 void __iomem *reg_bar = priv->reg_bar0; 2856 2857 unregister_netdev(netdev); 2858 gve_teardown_priv_resources(priv); 2859 destroy_workqueue(priv->gve_wq); 2860 free_netdev(netdev); 2861 pci_iounmap(pdev, db_bar); 2862 pci_iounmap(pdev, reg_bar); 2863 pci_release_regions(pdev); 2864 pci_disable_device(pdev); 2865 } 2866 2867 static void gve_shutdown(struct pci_dev *pdev) 2868 { 2869 struct net_device *netdev = pci_get_drvdata(pdev); 2870 struct gve_priv *priv = netdev_priv(netdev); 2871 bool was_up = netif_running(priv->dev); 2872 2873 netif_device_detach(netdev); 2874 2875 rtnl_lock(); 2876 netdev_lock(netdev); 2877 if (was_up && gve_close(priv->dev)) { 2878 /* If the dev was up, attempt to close, if close fails, reset */ 2879 gve_reset_and_teardown(priv, was_up); 2880 } else { 2881 /* If the dev wasn't up or close worked, finish tearing down */ 2882 gve_teardown_priv_resources(priv); 2883 } 2884 netdev_unlock(netdev); 2885 rtnl_unlock(); 2886 } 2887 2888 #ifdef CONFIG_PM 2889 static int gve_suspend(struct pci_dev *pdev, pm_message_t state) 2890 { 2891 struct net_device *netdev = pci_get_drvdata(pdev); 2892 struct gve_priv *priv = netdev_priv(netdev); 2893 bool was_up = netif_running(priv->dev); 2894 2895 priv->suspend_cnt++; 2896 rtnl_lock(); 2897 netdev_lock(netdev); 2898 if (was_up && gve_close(priv->dev)) { 2899 /* If the dev was up, attempt to close, if close fails, reset */ 2900 gve_reset_and_teardown(priv, was_up); 2901 } else { 2902 /* If the dev wasn't up or close worked, finish tearing down */ 2903 gve_teardown_priv_resources(priv); 2904 } 2905 priv->up_before_suspend = was_up; 2906 netdev_unlock(netdev); 2907 rtnl_unlock(); 2908 return 0; 2909 } 2910 2911 static int gve_resume(struct pci_dev *pdev) 2912 { 2913 struct net_device *netdev = pci_get_drvdata(pdev); 2914 struct gve_priv *priv = netdev_priv(netdev); 2915 int err; 2916 2917 priv->resume_cnt++; 2918 rtnl_lock(); 2919 netdev_lock(netdev); 2920 err = gve_reset_recovery(priv, priv->up_before_suspend); 2921 netdev_unlock(netdev); 2922 rtnl_unlock(); 2923 return err; 2924 } 2925 #endif /* CONFIG_PM */ 2926 2927 static const struct pci_device_id gve_id_table[] = { 2928 { PCI_DEVICE(PCI_VENDOR_ID_GOOGLE, PCI_DEV_ID_GVNIC) }, 2929 { } 2930 }; 2931 2932 static struct pci_driver gve_driver = { 2933 .name = gve_driver_name, 2934 .id_table = gve_id_table, 2935 .probe = gve_probe, 2936 .remove = gve_remove, 2937 .shutdown = gve_shutdown, 2938 #ifdef CONFIG_PM 2939 .suspend = gve_suspend, 2940 .resume = gve_resume, 2941 #endif 2942 }; 2943 2944 module_pci_driver(gve_driver); 2945 2946 MODULE_DEVICE_TABLE(pci, gve_id_table); 2947 MODULE_AUTHOR("Google, Inc."); 2948 MODULE_DESCRIPTION("Google Virtual NIC Driver"); 2949 MODULE_LICENSE("Dual MIT/GPL"); 2950 MODULE_VERSION(GVE_VERSION); 2951