1 // SPDX-License-Identifier: (GPL-2.0 OR MIT) 2 /* Google virtual Ethernet (gve) driver 3 * 4 * Copyright (C) 2015-2024 Google LLC 5 */ 6 7 #include <linux/bpf.h> 8 #include <linux/cpumask.h> 9 #include <linux/etherdevice.h> 10 #include <linux/filter.h> 11 #include <linux/interrupt.h> 12 #include <linux/irq.h> 13 #include <linux/module.h> 14 #include <linux/pci.h> 15 #include <linux/sched.h> 16 #include <linux/timer.h> 17 #include <linux/workqueue.h> 18 #include <linux/utsname.h> 19 #include <linux/version.h> 20 #include <net/netdev_queues.h> 21 #include <net/sch_generic.h> 22 #include <net/xdp_sock_drv.h> 23 #include "gve.h" 24 #include "gve_dqo.h" 25 #include "gve_adminq.h" 26 #include "gve_register.h" 27 #include "gve_utils.h" 28 29 #define GVE_DEFAULT_RX_COPYBREAK (256) 30 31 #define DEFAULT_MSG_LEVEL (NETIF_MSG_DRV | NETIF_MSG_LINK) 32 #define GVE_VERSION "1.0.0" 33 #define GVE_VERSION_PREFIX "GVE-" 34 35 // Minimum amount of time between queue kicks in msec (10 seconds) 36 #define MIN_TX_TIMEOUT_GAP (1000 * 10) 37 38 char gve_driver_name[] = "gve"; 39 const char gve_version_str[] = GVE_VERSION; 40 static const char gve_version_prefix[] = GVE_VERSION_PREFIX; 41 42 static int gve_verify_driver_compatibility(struct gve_priv *priv) 43 { 44 int err; 45 struct gve_driver_info *driver_info; 46 dma_addr_t driver_info_bus; 47 48 driver_info = dma_alloc_coherent(&priv->pdev->dev, 49 sizeof(struct gve_driver_info), 50 &driver_info_bus, GFP_KERNEL); 51 if (!driver_info) 52 return -ENOMEM; 53 54 *driver_info = (struct gve_driver_info) { 55 .os_type = 1, /* Linux */ 56 .os_version_major = cpu_to_be32(LINUX_VERSION_MAJOR), 57 .os_version_minor = cpu_to_be32(LINUX_VERSION_SUBLEVEL), 58 .os_version_sub = cpu_to_be32(LINUX_VERSION_PATCHLEVEL), 59 .driver_capability_flags = { 60 cpu_to_be64(GVE_DRIVER_CAPABILITY_FLAGS1), 61 cpu_to_be64(GVE_DRIVER_CAPABILITY_FLAGS2), 62 cpu_to_be64(GVE_DRIVER_CAPABILITY_FLAGS3), 63 cpu_to_be64(GVE_DRIVER_CAPABILITY_FLAGS4), 64 }, 65 }; 66 strscpy(driver_info->os_version_str1, utsname()->release, 67 sizeof(driver_info->os_version_str1)); 68 strscpy(driver_info->os_version_str2, utsname()->version, 69 sizeof(driver_info->os_version_str2)); 70 71 err = gve_adminq_verify_driver_compatibility(priv, 72 sizeof(struct gve_driver_info), 73 driver_info_bus); 74 75 /* It's ok if the device doesn't support this */ 76 if (err == -EOPNOTSUPP) 77 err = 0; 78 79 dma_free_coherent(&priv->pdev->dev, 80 sizeof(struct gve_driver_info), 81 driver_info, driver_info_bus); 82 return err; 83 } 84 85 static netdev_features_t gve_features_check(struct sk_buff *skb, 86 struct net_device *dev, 87 netdev_features_t features) 88 { 89 struct gve_priv *priv = netdev_priv(dev); 90 91 if (!gve_is_gqi(priv)) 92 return gve_features_check_dqo(skb, dev, features); 93 94 return features; 95 } 96 97 static netdev_tx_t gve_start_xmit(struct sk_buff *skb, struct net_device *dev) 98 { 99 struct gve_priv *priv = netdev_priv(dev); 100 101 if (gve_is_gqi(priv)) 102 return gve_tx(skb, dev); 103 else 104 return gve_tx_dqo(skb, dev); 105 } 106 107 static void gve_get_stats(struct net_device *dev, struct rtnl_link_stats64 *s) 108 { 109 struct gve_priv *priv = netdev_priv(dev); 110 unsigned int start; 111 u64 packets, bytes; 112 int num_tx_queues; 113 int ring; 114 115 num_tx_queues = gve_num_tx_queues(priv); 116 if (priv->rx) { 117 for (ring = 0; ring < priv->rx_cfg.num_queues; ring++) { 118 do { 119 start = 120 u64_stats_fetch_begin(&priv->rx[ring].statss); 121 packets = priv->rx[ring].rpackets; 122 bytes = priv->rx[ring].rbytes; 123 } while (u64_stats_fetch_retry(&priv->rx[ring].statss, 124 start)); 125 s->rx_packets += packets; 126 s->rx_bytes += bytes; 127 } 128 } 129 if (priv->tx) { 130 for (ring = 0; ring < num_tx_queues; ring++) { 131 do { 132 start = 133 u64_stats_fetch_begin(&priv->tx[ring].statss); 134 packets = priv->tx[ring].pkt_done; 135 bytes = priv->tx[ring].bytes_done; 136 } while (u64_stats_fetch_retry(&priv->tx[ring].statss, 137 start)); 138 s->tx_packets += packets; 139 s->tx_bytes += bytes; 140 } 141 } 142 } 143 144 static int gve_alloc_flow_rule_caches(struct gve_priv *priv) 145 { 146 struct gve_flow_rules_cache *flow_rules_cache = &priv->flow_rules_cache; 147 int err = 0; 148 149 if (!priv->max_flow_rules) 150 return 0; 151 152 flow_rules_cache->rules_cache = 153 kvcalloc(GVE_FLOW_RULES_CACHE_SIZE, sizeof(*flow_rules_cache->rules_cache), 154 GFP_KERNEL); 155 if (!flow_rules_cache->rules_cache) { 156 dev_err(&priv->pdev->dev, "Cannot alloc flow rules cache\n"); 157 return -ENOMEM; 158 } 159 160 flow_rules_cache->rule_ids_cache = 161 kvcalloc(GVE_FLOW_RULE_IDS_CACHE_SIZE, sizeof(*flow_rules_cache->rule_ids_cache), 162 GFP_KERNEL); 163 if (!flow_rules_cache->rule_ids_cache) { 164 dev_err(&priv->pdev->dev, "Cannot alloc flow rule ids cache\n"); 165 err = -ENOMEM; 166 goto free_rules_cache; 167 } 168 169 return 0; 170 171 free_rules_cache: 172 kvfree(flow_rules_cache->rules_cache); 173 flow_rules_cache->rules_cache = NULL; 174 return err; 175 } 176 177 static void gve_free_flow_rule_caches(struct gve_priv *priv) 178 { 179 struct gve_flow_rules_cache *flow_rules_cache = &priv->flow_rules_cache; 180 181 kvfree(flow_rules_cache->rule_ids_cache); 182 flow_rules_cache->rule_ids_cache = NULL; 183 kvfree(flow_rules_cache->rules_cache); 184 flow_rules_cache->rules_cache = NULL; 185 } 186 187 static int gve_alloc_rss_config_cache(struct gve_priv *priv) 188 { 189 struct gve_rss_config *rss_config = &priv->rss_config; 190 191 if (!priv->cache_rss_config) 192 return 0; 193 194 rss_config->hash_key = kcalloc(priv->rss_key_size, 195 sizeof(rss_config->hash_key[0]), 196 GFP_KERNEL); 197 if (!rss_config->hash_key) 198 return -ENOMEM; 199 200 rss_config->hash_lut = kcalloc(priv->rss_lut_size, 201 sizeof(rss_config->hash_lut[0]), 202 GFP_KERNEL); 203 if (!rss_config->hash_lut) 204 goto free_rss_key_cache; 205 206 return 0; 207 208 free_rss_key_cache: 209 kfree(rss_config->hash_key); 210 rss_config->hash_key = NULL; 211 return -ENOMEM; 212 } 213 214 static void gve_free_rss_config_cache(struct gve_priv *priv) 215 { 216 struct gve_rss_config *rss_config = &priv->rss_config; 217 218 kfree(rss_config->hash_key); 219 kfree(rss_config->hash_lut); 220 221 memset(rss_config, 0, sizeof(*rss_config)); 222 } 223 224 static int gve_alloc_counter_array(struct gve_priv *priv) 225 { 226 priv->counter_array = 227 dma_alloc_coherent(&priv->pdev->dev, 228 priv->num_event_counters * 229 sizeof(*priv->counter_array), 230 &priv->counter_array_bus, GFP_KERNEL); 231 if (!priv->counter_array) 232 return -ENOMEM; 233 234 return 0; 235 } 236 237 static void gve_free_counter_array(struct gve_priv *priv) 238 { 239 if (!priv->counter_array) 240 return; 241 242 dma_free_coherent(&priv->pdev->dev, 243 priv->num_event_counters * 244 sizeof(*priv->counter_array), 245 priv->counter_array, priv->counter_array_bus); 246 priv->counter_array = NULL; 247 } 248 249 /* NIC requests to report stats */ 250 static void gve_stats_report_task(struct work_struct *work) 251 { 252 struct gve_priv *priv = container_of(work, struct gve_priv, 253 stats_report_task); 254 if (gve_get_do_report_stats(priv)) { 255 gve_handle_report_stats(priv); 256 gve_clear_do_report_stats(priv); 257 } 258 } 259 260 static void gve_stats_report_schedule(struct gve_priv *priv) 261 { 262 if (!gve_get_probe_in_progress(priv) && 263 !gve_get_reset_in_progress(priv)) { 264 gve_set_do_report_stats(priv); 265 queue_work(priv->gve_wq, &priv->stats_report_task); 266 } 267 } 268 269 static void gve_stats_report_timer(struct timer_list *t) 270 { 271 struct gve_priv *priv = from_timer(priv, t, stats_report_timer); 272 273 mod_timer(&priv->stats_report_timer, 274 round_jiffies(jiffies + 275 msecs_to_jiffies(priv->stats_report_timer_period))); 276 gve_stats_report_schedule(priv); 277 } 278 279 static int gve_alloc_stats_report(struct gve_priv *priv) 280 { 281 int tx_stats_num, rx_stats_num; 282 283 tx_stats_num = (GVE_TX_STATS_REPORT_NUM + NIC_TX_STATS_REPORT_NUM) * 284 gve_num_tx_queues(priv); 285 rx_stats_num = (GVE_RX_STATS_REPORT_NUM + NIC_RX_STATS_REPORT_NUM) * 286 priv->rx_cfg.num_queues; 287 priv->stats_report_len = struct_size(priv->stats_report, stats, 288 size_add(tx_stats_num, rx_stats_num)); 289 priv->stats_report = 290 dma_alloc_coherent(&priv->pdev->dev, priv->stats_report_len, 291 &priv->stats_report_bus, GFP_KERNEL); 292 if (!priv->stats_report) 293 return -ENOMEM; 294 /* Set up timer for the report-stats task */ 295 timer_setup(&priv->stats_report_timer, gve_stats_report_timer, 0); 296 priv->stats_report_timer_period = GVE_STATS_REPORT_TIMER_PERIOD; 297 return 0; 298 } 299 300 static void gve_free_stats_report(struct gve_priv *priv) 301 { 302 if (!priv->stats_report) 303 return; 304 305 del_timer_sync(&priv->stats_report_timer); 306 dma_free_coherent(&priv->pdev->dev, priv->stats_report_len, 307 priv->stats_report, priv->stats_report_bus); 308 priv->stats_report = NULL; 309 } 310 311 static irqreturn_t gve_mgmnt_intr(int irq, void *arg) 312 { 313 struct gve_priv *priv = arg; 314 315 queue_work(priv->gve_wq, &priv->service_task); 316 return IRQ_HANDLED; 317 } 318 319 static irqreturn_t gve_intr(int irq, void *arg) 320 { 321 struct gve_notify_block *block = arg; 322 struct gve_priv *priv = block->priv; 323 324 iowrite32be(GVE_IRQ_MASK, gve_irq_doorbell(priv, block)); 325 napi_schedule_irqoff(&block->napi); 326 return IRQ_HANDLED; 327 } 328 329 static irqreturn_t gve_intr_dqo(int irq, void *arg) 330 { 331 struct gve_notify_block *block = arg; 332 333 /* Interrupts are automatically masked */ 334 napi_schedule_irqoff(&block->napi); 335 return IRQ_HANDLED; 336 } 337 338 static int gve_is_napi_on_home_cpu(struct gve_priv *priv, u32 irq) 339 { 340 int cpu_curr = smp_processor_id(); 341 const struct cpumask *aff_mask; 342 343 aff_mask = irq_get_effective_affinity_mask(irq); 344 if (unlikely(!aff_mask)) 345 return 1; 346 347 return cpumask_test_cpu(cpu_curr, aff_mask); 348 } 349 350 int gve_napi_poll(struct napi_struct *napi, int budget) 351 { 352 struct gve_notify_block *block; 353 __be32 __iomem *irq_doorbell; 354 bool reschedule = false; 355 struct gve_priv *priv; 356 int work_done = 0; 357 358 block = container_of(napi, struct gve_notify_block, napi); 359 priv = block->priv; 360 361 if (block->tx) { 362 if (block->tx->q_num < priv->tx_cfg.num_queues) 363 reschedule |= gve_tx_poll(block, budget); 364 else if (budget) 365 reschedule |= gve_xdp_poll(block, budget); 366 } 367 368 if (!budget) 369 return 0; 370 371 if (block->rx) { 372 work_done = gve_rx_poll(block, budget); 373 374 /* Poll XSK TX as part of RX NAPI. Setup re-poll based on max of 375 * TX and RX work done. 376 */ 377 if (priv->xdp_prog) 378 work_done = max_t(int, work_done, 379 gve_xsk_tx_poll(block, budget)); 380 381 reschedule |= work_done == budget; 382 } 383 384 if (reschedule) 385 return budget; 386 387 /* Complete processing - don't unmask irq if busy polling is enabled */ 388 if (likely(napi_complete_done(napi, work_done))) { 389 irq_doorbell = gve_irq_doorbell(priv, block); 390 iowrite32be(GVE_IRQ_ACK | GVE_IRQ_EVENT, irq_doorbell); 391 392 /* Ensure IRQ ACK is visible before we check pending work. 393 * If queue had issued updates, it would be truly visible. 394 */ 395 mb(); 396 397 if (block->tx) 398 reschedule |= gve_tx_clean_pending(priv, block->tx); 399 if (block->rx) 400 reschedule |= gve_rx_work_pending(block->rx); 401 402 if (reschedule && napi_schedule(napi)) 403 iowrite32be(GVE_IRQ_MASK, irq_doorbell); 404 } 405 return work_done; 406 } 407 408 int gve_napi_poll_dqo(struct napi_struct *napi, int budget) 409 { 410 struct gve_notify_block *block = 411 container_of(napi, struct gve_notify_block, napi); 412 struct gve_priv *priv = block->priv; 413 bool reschedule = false; 414 int work_done = 0; 415 416 if (block->tx) 417 reschedule |= gve_tx_poll_dqo(block, /*do_clean=*/true); 418 419 if (!budget) 420 return 0; 421 422 if (block->rx) { 423 work_done = gve_rx_poll_dqo(block, budget); 424 reschedule |= work_done == budget; 425 } 426 427 if (reschedule) { 428 /* Reschedule by returning budget only if already on the correct 429 * cpu. 430 */ 431 if (likely(gve_is_napi_on_home_cpu(priv, block->irq))) 432 return budget; 433 434 /* If not on the cpu with which this queue's irq has affinity 435 * with, we avoid rescheduling napi and arm the irq instead so 436 * that napi gets rescheduled back eventually onto the right 437 * cpu. 438 */ 439 if (work_done == budget) 440 work_done--; 441 } 442 443 if (likely(napi_complete_done(napi, work_done))) { 444 /* Enable interrupts again. 445 * 446 * We don't need to repoll afterwards because HW supports the 447 * PCI MSI-X PBA feature. 448 * 449 * Another interrupt would be triggered if a new event came in 450 * since the last one. 451 */ 452 gve_write_irq_doorbell_dqo(priv, block, 453 GVE_ITR_NO_UPDATE_DQO | GVE_ITR_ENABLE_BIT_DQO); 454 } 455 456 return work_done; 457 } 458 459 static int gve_alloc_notify_blocks(struct gve_priv *priv) 460 { 461 int num_vecs_requested = priv->num_ntfy_blks + 1; 462 unsigned int active_cpus; 463 int vecs_enabled; 464 int i, j; 465 int err; 466 467 priv->msix_vectors = kvcalloc(num_vecs_requested, 468 sizeof(*priv->msix_vectors), GFP_KERNEL); 469 if (!priv->msix_vectors) 470 return -ENOMEM; 471 for (i = 0; i < num_vecs_requested; i++) 472 priv->msix_vectors[i].entry = i; 473 vecs_enabled = pci_enable_msix_range(priv->pdev, priv->msix_vectors, 474 GVE_MIN_MSIX, num_vecs_requested); 475 if (vecs_enabled < 0) { 476 dev_err(&priv->pdev->dev, "Could not enable min msix %d/%d\n", 477 GVE_MIN_MSIX, vecs_enabled); 478 err = vecs_enabled; 479 goto abort_with_msix_vectors; 480 } 481 if (vecs_enabled != num_vecs_requested) { 482 int new_num_ntfy_blks = (vecs_enabled - 1) & ~0x1; 483 int vecs_per_type = new_num_ntfy_blks / 2; 484 int vecs_left = new_num_ntfy_blks % 2; 485 486 priv->num_ntfy_blks = new_num_ntfy_blks; 487 priv->mgmt_msix_idx = priv->num_ntfy_blks; 488 priv->tx_cfg.max_queues = min_t(int, priv->tx_cfg.max_queues, 489 vecs_per_type); 490 priv->rx_cfg.max_queues = min_t(int, priv->rx_cfg.max_queues, 491 vecs_per_type + vecs_left); 492 dev_err(&priv->pdev->dev, 493 "Could not enable desired msix, only enabled %d, adjusting tx max queues to %d, and rx max queues to %d\n", 494 vecs_enabled, priv->tx_cfg.max_queues, 495 priv->rx_cfg.max_queues); 496 if (priv->tx_cfg.num_queues > priv->tx_cfg.max_queues) 497 priv->tx_cfg.num_queues = priv->tx_cfg.max_queues; 498 if (priv->rx_cfg.num_queues > priv->rx_cfg.max_queues) 499 priv->rx_cfg.num_queues = priv->rx_cfg.max_queues; 500 } 501 /* Half the notification blocks go to TX and half to RX */ 502 active_cpus = min_t(int, priv->num_ntfy_blks / 2, num_online_cpus()); 503 504 /* Setup Management Vector - the last vector */ 505 snprintf(priv->mgmt_msix_name, sizeof(priv->mgmt_msix_name), "gve-mgmnt@pci:%s", 506 pci_name(priv->pdev)); 507 err = request_irq(priv->msix_vectors[priv->mgmt_msix_idx].vector, 508 gve_mgmnt_intr, 0, priv->mgmt_msix_name, priv); 509 if (err) { 510 dev_err(&priv->pdev->dev, "Did not receive management vector.\n"); 511 goto abort_with_msix_enabled; 512 } 513 priv->irq_db_indices = 514 dma_alloc_coherent(&priv->pdev->dev, 515 priv->num_ntfy_blks * 516 sizeof(*priv->irq_db_indices), 517 &priv->irq_db_indices_bus, GFP_KERNEL); 518 if (!priv->irq_db_indices) { 519 err = -ENOMEM; 520 goto abort_with_mgmt_vector; 521 } 522 523 priv->ntfy_blocks = kvzalloc(priv->num_ntfy_blks * 524 sizeof(*priv->ntfy_blocks), GFP_KERNEL); 525 if (!priv->ntfy_blocks) { 526 err = -ENOMEM; 527 goto abort_with_irq_db_indices; 528 } 529 530 /* Setup the other blocks - the first n-1 vectors */ 531 for (i = 0; i < priv->num_ntfy_blks; i++) { 532 struct gve_notify_block *block = &priv->ntfy_blocks[i]; 533 int msix_idx = i; 534 535 snprintf(block->name, sizeof(block->name), "gve-ntfy-blk%d@pci:%s", 536 i, pci_name(priv->pdev)); 537 block->priv = priv; 538 err = request_irq(priv->msix_vectors[msix_idx].vector, 539 gve_is_gqi(priv) ? gve_intr : gve_intr_dqo, 540 0, block->name, block); 541 if (err) { 542 dev_err(&priv->pdev->dev, 543 "Failed to receive msix vector %d\n", i); 544 goto abort_with_some_ntfy_blocks; 545 } 546 block->irq = priv->msix_vectors[msix_idx].vector; 547 irq_set_affinity_hint(priv->msix_vectors[msix_idx].vector, 548 get_cpu_mask(i % active_cpus)); 549 block->irq_db_index = &priv->irq_db_indices[i].index; 550 } 551 return 0; 552 abort_with_some_ntfy_blocks: 553 for (j = 0; j < i; j++) { 554 struct gve_notify_block *block = &priv->ntfy_blocks[j]; 555 int msix_idx = j; 556 557 irq_set_affinity_hint(priv->msix_vectors[msix_idx].vector, 558 NULL); 559 free_irq(priv->msix_vectors[msix_idx].vector, block); 560 block->irq = 0; 561 } 562 kvfree(priv->ntfy_blocks); 563 priv->ntfy_blocks = NULL; 564 abort_with_irq_db_indices: 565 dma_free_coherent(&priv->pdev->dev, priv->num_ntfy_blks * 566 sizeof(*priv->irq_db_indices), 567 priv->irq_db_indices, priv->irq_db_indices_bus); 568 priv->irq_db_indices = NULL; 569 abort_with_mgmt_vector: 570 free_irq(priv->msix_vectors[priv->mgmt_msix_idx].vector, priv); 571 abort_with_msix_enabled: 572 pci_disable_msix(priv->pdev); 573 abort_with_msix_vectors: 574 kvfree(priv->msix_vectors); 575 priv->msix_vectors = NULL; 576 return err; 577 } 578 579 static void gve_free_notify_blocks(struct gve_priv *priv) 580 { 581 int i; 582 583 if (!priv->msix_vectors) 584 return; 585 586 /* Free the irqs */ 587 for (i = 0; i < priv->num_ntfy_blks; i++) { 588 struct gve_notify_block *block = &priv->ntfy_blocks[i]; 589 int msix_idx = i; 590 591 irq_set_affinity_hint(priv->msix_vectors[msix_idx].vector, 592 NULL); 593 free_irq(priv->msix_vectors[msix_idx].vector, block); 594 block->irq = 0; 595 } 596 free_irq(priv->msix_vectors[priv->mgmt_msix_idx].vector, priv); 597 kvfree(priv->ntfy_blocks); 598 priv->ntfy_blocks = NULL; 599 dma_free_coherent(&priv->pdev->dev, priv->num_ntfy_blks * 600 sizeof(*priv->irq_db_indices), 601 priv->irq_db_indices, priv->irq_db_indices_bus); 602 priv->irq_db_indices = NULL; 603 pci_disable_msix(priv->pdev); 604 kvfree(priv->msix_vectors); 605 priv->msix_vectors = NULL; 606 } 607 608 static int gve_setup_device_resources(struct gve_priv *priv) 609 { 610 int err; 611 612 err = gve_alloc_flow_rule_caches(priv); 613 if (err) 614 return err; 615 err = gve_alloc_rss_config_cache(priv); 616 if (err) 617 goto abort_with_flow_rule_caches; 618 err = gve_alloc_counter_array(priv); 619 if (err) 620 goto abort_with_rss_config_cache; 621 err = gve_alloc_notify_blocks(priv); 622 if (err) 623 goto abort_with_counter; 624 err = gve_alloc_stats_report(priv); 625 if (err) 626 goto abort_with_ntfy_blocks; 627 err = gve_adminq_configure_device_resources(priv, 628 priv->counter_array_bus, 629 priv->num_event_counters, 630 priv->irq_db_indices_bus, 631 priv->num_ntfy_blks); 632 if (unlikely(err)) { 633 dev_err(&priv->pdev->dev, 634 "could not setup device_resources: err=%d\n", err); 635 err = -ENXIO; 636 goto abort_with_stats_report; 637 } 638 639 if (!gve_is_gqi(priv)) { 640 priv->ptype_lut_dqo = kvzalloc(sizeof(*priv->ptype_lut_dqo), 641 GFP_KERNEL); 642 if (!priv->ptype_lut_dqo) { 643 err = -ENOMEM; 644 goto abort_with_stats_report; 645 } 646 err = gve_adminq_get_ptype_map_dqo(priv, priv->ptype_lut_dqo); 647 if (err) { 648 dev_err(&priv->pdev->dev, 649 "Failed to get ptype map: err=%d\n", err); 650 goto abort_with_ptype_lut; 651 } 652 } 653 654 err = gve_init_rss_config(priv, priv->rx_cfg.num_queues); 655 if (err) { 656 dev_err(&priv->pdev->dev, "Failed to init RSS config"); 657 goto abort_with_ptype_lut; 658 } 659 660 err = gve_adminq_report_stats(priv, priv->stats_report_len, 661 priv->stats_report_bus, 662 GVE_STATS_REPORT_TIMER_PERIOD); 663 if (err) 664 dev_err(&priv->pdev->dev, 665 "Failed to report stats: err=%d\n", err); 666 gve_set_device_resources_ok(priv); 667 return 0; 668 669 abort_with_ptype_lut: 670 kvfree(priv->ptype_lut_dqo); 671 priv->ptype_lut_dqo = NULL; 672 abort_with_stats_report: 673 gve_free_stats_report(priv); 674 abort_with_ntfy_blocks: 675 gve_free_notify_blocks(priv); 676 abort_with_counter: 677 gve_free_counter_array(priv); 678 abort_with_rss_config_cache: 679 gve_free_rss_config_cache(priv); 680 abort_with_flow_rule_caches: 681 gve_free_flow_rule_caches(priv); 682 683 return err; 684 } 685 686 static void gve_trigger_reset(struct gve_priv *priv); 687 688 static void gve_teardown_device_resources(struct gve_priv *priv) 689 { 690 int err; 691 692 /* Tell device its resources are being freed */ 693 if (gve_get_device_resources_ok(priv)) { 694 err = gve_flow_rules_reset(priv); 695 if (err) { 696 dev_err(&priv->pdev->dev, 697 "Failed to reset flow rules: err=%d\n", err); 698 gve_trigger_reset(priv); 699 } 700 /* detach the stats report */ 701 err = gve_adminq_report_stats(priv, 0, 0x0, GVE_STATS_REPORT_TIMER_PERIOD); 702 if (err) { 703 dev_err(&priv->pdev->dev, 704 "Failed to detach stats report: err=%d\n", err); 705 gve_trigger_reset(priv); 706 } 707 err = gve_adminq_deconfigure_device_resources(priv); 708 if (err) { 709 dev_err(&priv->pdev->dev, 710 "Could not deconfigure device resources: err=%d\n", 711 err); 712 gve_trigger_reset(priv); 713 } 714 } 715 716 kvfree(priv->ptype_lut_dqo); 717 priv->ptype_lut_dqo = NULL; 718 719 gve_free_flow_rule_caches(priv); 720 gve_free_rss_config_cache(priv); 721 gve_free_counter_array(priv); 722 gve_free_notify_blocks(priv); 723 gve_free_stats_report(priv); 724 gve_clear_device_resources_ok(priv); 725 } 726 727 static int gve_unregister_qpl(struct gve_priv *priv, 728 struct gve_queue_page_list *qpl) 729 { 730 int err; 731 732 if (!qpl) 733 return 0; 734 735 err = gve_adminq_unregister_page_list(priv, qpl->id); 736 if (err) { 737 netif_err(priv, drv, priv->dev, 738 "Failed to unregister queue page list %d\n", 739 qpl->id); 740 return err; 741 } 742 743 priv->num_registered_pages -= qpl->num_entries; 744 return 0; 745 } 746 747 static int gve_register_qpl(struct gve_priv *priv, 748 struct gve_queue_page_list *qpl) 749 { 750 int pages; 751 int err; 752 753 if (!qpl) 754 return 0; 755 756 pages = qpl->num_entries; 757 758 if (pages + priv->num_registered_pages > priv->max_registered_pages) { 759 netif_err(priv, drv, priv->dev, 760 "Reached max number of registered pages %llu > %llu\n", 761 pages + priv->num_registered_pages, 762 priv->max_registered_pages); 763 return -EINVAL; 764 } 765 766 err = gve_adminq_register_page_list(priv, qpl); 767 if (err) { 768 netif_err(priv, drv, priv->dev, 769 "failed to register queue page list %d\n", 770 qpl->id); 771 return err; 772 } 773 774 priv->num_registered_pages += pages; 775 return 0; 776 } 777 778 static struct gve_queue_page_list *gve_tx_get_qpl(struct gve_priv *priv, int idx) 779 { 780 struct gve_tx_ring *tx = &priv->tx[idx]; 781 782 if (gve_is_gqi(priv)) 783 return tx->tx_fifo.qpl; 784 else 785 return tx->dqo.qpl; 786 } 787 788 static struct gve_queue_page_list *gve_rx_get_qpl(struct gve_priv *priv, int idx) 789 { 790 struct gve_rx_ring *rx = &priv->rx[idx]; 791 792 if (gve_is_gqi(priv)) 793 return rx->data.qpl; 794 else 795 return rx->dqo.qpl; 796 } 797 798 static int gve_register_xdp_qpls(struct gve_priv *priv) 799 { 800 int start_id; 801 int err; 802 int i; 803 804 start_id = gve_xdp_tx_start_queue_id(priv); 805 for (i = start_id; i < start_id + gve_num_xdp_qpls(priv); i++) { 806 err = gve_register_qpl(priv, gve_tx_get_qpl(priv, i)); 807 /* This failure will trigger a reset - no need to clean up */ 808 if (err) 809 return err; 810 } 811 return 0; 812 } 813 814 static int gve_register_qpls(struct gve_priv *priv) 815 { 816 int num_tx_qpls, num_rx_qpls; 817 int err; 818 int i; 819 820 num_tx_qpls = gve_num_tx_qpls(&priv->tx_cfg, gve_num_xdp_qpls(priv), 821 gve_is_qpl(priv)); 822 num_rx_qpls = gve_num_rx_qpls(&priv->rx_cfg, gve_is_qpl(priv)); 823 824 for (i = 0; i < num_tx_qpls; i++) { 825 err = gve_register_qpl(priv, gve_tx_get_qpl(priv, i)); 826 if (err) 827 return err; 828 } 829 830 for (i = 0; i < num_rx_qpls; i++) { 831 err = gve_register_qpl(priv, gve_rx_get_qpl(priv, i)); 832 if (err) 833 return err; 834 } 835 836 return 0; 837 } 838 839 static int gve_unregister_xdp_qpls(struct gve_priv *priv) 840 { 841 int start_id; 842 int err; 843 int i; 844 845 start_id = gve_xdp_tx_start_queue_id(priv); 846 for (i = start_id; i < start_id + gve_num_xdp_qpls(priv); i++) { 847 err = gve_unregister_qpl(priv, gve_tx_get_qpl(priv, i)); 848 /* This failure will trigger a reset - no need to clean */ 849 if (err) 850 return err; 851 } 852 return 0; 853 } 854 855 static int gve_unregister_qpls(struct gve_priv *priv) 856 { 857 int num_tx_qpls, num_rx_qpls; 858 int err; 859 int i; 860 861 num_tx_qpls = gve_num_tx_qpls(&priv->tx_cfg, gve_num_xdp_qpls(priv), 862 gve_is_qpl(priv)); 863 num_rx_qpls = gve_num_rx_qpls(&priv->rx_cfg, gve_is_qpl(priv)); 864 865 for (i = 0; i < num_tx_qpls; i++) { 866 err = gve_unregister_qpl(priv, gve_tx_get_qpl(priv, i)); 867 /* This failure will trigger a reset - no need to clean */ 868 if (err) 869 return err; 870 } 871 872 for (i = 0; i < num_rx_qpls; i++) { 873 err = gve_unregister_qpl(priv, gve_rx_get_qpl(priv, i)); 874 /* This failure will trigger a reset - no need to clean */ 875 if (err) 876 return err; 877 } 878 return 0; 879 } 880 881 static int gve_create_xdp_rings(struct gve_priv *priv) 882 { 883 int err; 884 885 err = gve_adminq_create_tx_queues(priv, 886 gve_xdp_tx_start_queue_id(priv), 887 priv->num_xdp_queues); 888 if (err) { 889 netif_err(priv, drv, priv->dev, "failed to create %d XDP tx queues\n", 890 priv->num_xdp_queues); 891 /* This failure will trigger a reset - no need to clean 892 * up 893 */ 894 return err; 895 } 896 netif_dbg(priv, drv, priv->dev, "created %d XDP tx queues\n", 897 priv->num_xdp_queues); 898 899 return 0; 900 } 901 902 static int gve_create_rings(struct gve_priv *priv) 903 { 904 int num_tx_queues = gve_num_tx_queues(priv); 905 int err; 906 int i; 907 908 err = gve_adminq_create_tx_queues(priv, 0, num_tx_queues); 909 if (err) { 910 netif_err(priv, drv, priv->dev, "failed to create %d tx queues\n", 911 num_tx_queues); 912 /* This failure will trigger a reset - no need to clean 913 * up 914 */ 915 return err; 916 } 917 netif_dbg(priv, drv, priv->dev, "created %d tx queues\n", 918 num_tx_queues); 919 920 err = gve_adminq_create_rx_queues(priv, priv->rx_cfg.num_queues); 921 if (err) { 922 netif_err(priv, drv, priv->dev, "failed to create %d rx queues\n", 923 priv->rx_cfg.num_queues); 924 /* This failure will trigger a reset - no need to clean 925 * up 926 */ 927 return err; 928 } 929 netif_dbg(priv, drv, priv->dev, "created %d rx queues\n", 930 priv->rx_cfg.num_queues); 931 932 if (gve_is_gqi(priv)) { 933 /* Rx data ring has been prefilled with packet buffers at queue 934 * allocation time. 935 * 936 * Write the doorbell to provide descriptor slots and packet 937 * buffers to the NIC. 938 */ 939 for (i = 0; i < priv->rx_cfg.num_queues; i++) 940 gve_rx_write_doorbell(priv, &priv->rx[i]); 941 } else { 942 for (i = 0; i < priv->rx_cfg.num_queues; i++) { 943 /* Post buffers and ring doorbell. */ 944 gve_rx_post_buffers_dqo(&priv->rx[i]); 945 } 946 } 947 948 return 0; 949 } 950 951 static void init_xdp_sync_stats(struct gve_priv *priv) 952 { 953 int start_id = gve_xdp_tx_start_queue_id(priv); 954 int i; 955 956 /* Init stats */ 957 for (i = start_id; i < start_id + priv->num_xdp_queues; i++) { 958 int ntfy_idx = gve_tx_idx_to_ntfy(priv, i); 959 960 u64_stats_init(&priv->tx[i].statss); 961 priv->tx[i].ntfy_id = ntfy_idx; 962 } 963 } 964 965 static void gve_init_sync_stats(struct gve_priv *priv) 966 { 967 int i; 968 969 for (i = 0; i < priv->tx_cfg.num_queues; i++) 970 u64_stats_init(&priv->tx[i].statss); 971 972 /* Init stats for XDP TX queues */ 973 init_xdp_sync_stats(priv); 974 975 for (i = 0; i < priv->rx_cfg.num_queues; i++) 976 u64_stats_init(&priv->rx[i].statss); 977 } 978 979 static void gve_tx_get_curr_alloc_cfg(struct gve_priv *priv, 980 struct gve_tx_alloc_rings_cfg *cfg) 981 { 982 int num_xdp_queues = priv->xdp_prog ? priv->rx_cfg.num_queues : 0; 983 984 cfg->qcfg = &priv->tx_cfg; 985 cfg->raw_addressing = !gve_is_qpl(priv); 986 cfg->ring_size = priv->tx_desc_cnt; 987 cfg->start_idx = 0; 988 cfg->num_rings = priv->tx_cfg.num_queues + num_xdp_queues; 989 cfg->tx = priv->tx; 990 } 991 992 static void gve_tx_stop_rings(struct gve_priv *priv, int start_id, int num_rings) 993 { 994 int i; 995 996 if (!priv->tx) 997 return; 998 999 for (i = start_id; i < start_id + num_rings; i++) { 1000 if (gve_is_gqi(priv)) 1001 gve_tx_stop_ring_gqi(priv, i); 1002 else 1003 gve_tx_stop_ring_dqo(priv, i); 1004 } 1005 } 1006 1007 static void gve_tx_start_rings(struct gve_priv *priv, int start_id, 1008 int num_rings) 1009 { 1010 int i; 1011 1012 for (i = start_id; i < start_id + num_rings; i++) { 1013 if (gve_is_gqi(priv)) 1014 gve_tx_start_ring_gqi(priv, i); 1015 else 1016 gve_tx_start_ring_dqo(priv, i); 1017 } 1018 } 1019 1020 static int gve_alloc_xdp_rings(struct gve_priv *priv) 1021 { 1022 struct gve_tx_alloc_rings_cfg cfg = {0}; 1023 int err = 0; 1024 1025 if (!priv->num_xdp_queues) 1026 return 0; 1027 1028 gve_tx_get_curr_alloc_cfg(priv, &cfg); 1029 cfg.start_idx = gve_xdp_tx_start_queue_id(priv); 1030 cfg.num_rings = priv->num_xdp_queues; 1031 1032 err = gve_tx_alloc_rings_gqi(priv, &cfg); 1033 if (err) 1034 return err; 1035 1036 gve_tx_start_rings(priv, cfg.start_idx, cfg.num_rings); 1037 init_xdp_sync_stats(priv); 1038 1039 return 0; 1040 } 1041 1042 static int gve_queues_mem_alloc(struct gve_priv *priv, 1043 struct gve_tx_alloc_rings_cfg *tx_alloc_cfg, 1044 struct gve_rx_alloc_rings_cfg *rx_alloc_cfg) 1045 { 1046 int err; 1047 1048 if (gve_is_gqi(priv)) 1049 err = gve_tx_alloc_rings_gqi(priv, tx_alloc_cfg); 1050 else 1051 err = gve_tx_alloc_rings_dqo(priv, tx_alloc_cfg); 1052 if (err) 1053 return err; 1054 1055 if (gve_is_gqi(priv)) 1056 err = gve_rx_alloc_rings_gqi(priv, rx_alloc_cfg); 1057 else 1058 err = gve_rx_alloc_rings_dqo(priv, rx_alloc_cfg); 1059 if (err) 1060 goto free_tx; 1061 1062 return 0; 1063 1064 free_tx: 1065 if (gve_is_gqi(priv)) 1066 gve_tx_free_rings_gqi(priv, tx_alloc_cfg); 1067 else 1068 gve_tx_free_rings_dqo(priv, tx_alloc_cfg); 1069 return err; 1070 } 1071 1072 static int gve_destroy_xdp_rings(struct gve_priv *priv) 1073 { 1074 int start_id; 1075 int err; 1076 1077 start_id = gve_xdp_tx_start_queue_id(priv); 1078 err = gve_adminq_destroy_tx_queues(priv, 1079 start_id, 1080 priv->num_xdp_queues); 1081 if (err) { 1082 netif_err(priv, drv, priv->dev, 1083 "failed to destroy XDP queues\n"); 1084 /* This failure will trigger a reset - no need to clean up */ 1085 return err; 1086 } 1087 netif_dbg(priv, drv, priv->dev, "destroyed XDP queues\n"); 1088 1089 return 0; 1090 } 1091 1092 static int gve_destroy_rings(struct gve_priv *priv) 1093 { 1094 int num_tx_queues = gve_num_tx_queues(priv); 1095 int err; 1096 1097 err = gve_adminq_destroy_tx_queues(priv, 0, num_tx_queues); 1098 if (err) { 1099 netif_err(priv, drv, priv->dev, 1100 "failed to destroy tx queues\n"); 1101 /* This failure will trigger a reset - no need to clean up */ 1102 return err; 1103 } 1104 netif_dbg(priv, drv, priv->dev, "destroyed tx queues\n"); 1105 err = gve_adminq_destroy_rx_queues(priv, priv->rx_cfg.num_queues); 1106 if (err) { 1107 netif_err(priv, drv, priv->dev, 1108 "failed to destroy rx queues\n"); 1109 /* This failure will trigger a reset - no need to clean up */ 1110 return err; 1111 } 1112 netif_dbg(priv, drv, priv->dev, "destroyed rx queues\n"); 1113 return 0; 1114 } 1115 1116 static void gve_free_xdp_rings(struct gve_priv *priv) 1117 { 1118 struct gve_tx_alloc_rings_cfg cfg = {0}; 1119 1120 gve_tx_get_curr_alloc_cfg(priv, &cfg); 1121 cfg.start_idx = gve_xdp_tx_start_queue_id(priv); 1122 cfg.num_rings = priv->num_xdp_queues; 1123 1124 if (priv->tx) { 1125 gve_tx_stop_rings(priv, cfg.start_idx, cfg.num_rings); 1126 gve_tx_free_rings_gqi(priv, &cfg); 1127 } 1128 } 1129 1130 static void gve_queues_mem_free(struct gve_priv *priv, 1131 struct gve_tx_alloc_rings_cfg *tx_cfg, 1132 struct gve_rx_alloc_rings_cfg *rx_cfg) 1133 { 1134 if (gve_is_gqi(priv)) { 1135 gve_tx_free_rings_gqi(priv, tx_cfg); 1136 gve_rx_free_rings_gqi(priv, rx_cfg); 1137 } else { 1138 gve_tx_free_rings_dqo(priv, tx_cfg); 1139 gve_rx_free_rings_dqo(priv, rx_cfg); 1140 } 1141 } 1142 1143 int gve_alloc_page(struct gve_priv *priv, struct device *dev, 1144 struct page **page, dma_addr_t *dma, 1145 enum dma_data_direction dir, gfp_t gfp_flags) 1146 { 1147 *page = alloc_page(gfp_flags); 1148 if (!*page) { 1149 priv->page_alloc_fail++; 1150 return -ENOMEM; 1151 } 1152 *dma = dma_map_page(dev, *page, 0, PAGE_SIZE, dir); 1153 if (dma_mapping_error(dev, *dma)) { 1154 priv->dma_mapping_error++; 1155 put_page(*page); 1156 return -ENOMEM; 1157 } 1158 return 0; 1159 } 1160 1161 struct gve_queue_page_list *gve_alloc_queue_page_list(struct gve_priv *priv, 1162 u32 id, int pages) 1163 { 1164 struct gve_queue_page_list *qpl; 1165 int err; 1166 int i; 1167 1168 qpl = kvzalloc(sizeof(*qpl), GFP_KERNEL); 1169 if (!qpl) 1170 return NULL; 1171 1172 qpl->id = id; 1173 qpl->num_entries = 0; 1174 qpl->pages = kvcalloc(pages, sizeof(*qpl->pages), GFP_KERNEL); 1175 if (!qpl->pages) 1176 goto abort; 1177 1178 qpl->page_buses = kvcalloc(pages, sizeof(*qpl->page_buses), GFP_KERNEL); 1179 if (!qpl->page_buses) 1180 goto abort; 1181 1182 for (i = 0; i < pages; i++) { 1183 err = gve_alloc_page(priv, &priv->pdev->dev, &qpl->pages[i], 1184 &qpl->page_buses[i], 1185 gve_qpl_dma_dir(priv, id), GFP_KERNEL); 1186 if (err) 1187 goto abort; 1188 qpl->num_entries++; 1189 } 1190 1191 return qpl; 1192 1193 abort: 1194 gve_free_queue_page_list(priv, qpl, id); 1195 return NULL; 1196 } 1197 1198 void gve_free_page(struct device *dev, struct page *page, dma_addr_t dma, 1199 enum dma_data_direction dir) 1200 { 1201 if (!dma_mapping_error(dev, dma)) 1202 dma_unmap_page(dev, dma, PAGE_SIZE, dir); 1203 if (page) 1204 put_page(page); 1205 } 1206 1207 void gve_free_queue_page_list(struct gve_priv *priv, 1208 struct gve_queue_page_list *qpl, 1209 u32 id) 1210 { 1211 int i; 1212 1213 if (!qpl) 1214 return; 1215 if (!qpl->pages) 1216 goto free_qpl; 1217 if (!qpl->page_buses) 1218 goto free_pages; 1219 1220 for (i = 0; i < qpl->num_entries; i++) 1221 gve_free_page(&priv->pdev->dev, qpl->pages[i], 1222 qpl->page_buses[i], gve_qpl_dma_dir(priv, id)); 1223 1224 kvfree(qpl->page_buses); 1225 qpl->page_buses = NULL; 1226 free_pages: 1227 kvfree(qpl->pages); 1228 qpl->pages = NULL; 1229 free_qpl: 1230 kvfree(qpl); 1231 } 1232 1233 /* Use this to schedule a reset when the device is capable of continuing 1234 * to handle other requests in its current state. If it is not, do a reset 1235 * in thread instead. 1236 */ 1237 void gve_schedule_reset(struct gve_priv *priv) 1238 { 1239 gve_set_do_reset(priv); 1240 queue_work(priv->gve_wq, &priv->service_task); 1241 } 1242 1243 static void gve_reset_and_teardown(struct gve_priv *priv, bool was_up); 1244 static int gve_reset_recovery(struct gve_priv *priv, bool was_up); 1245 static void gve_turndown(struct gve_priv *priv); 1246 static void gve_turnup(struct gve_priv *priv); 1247 1248 static int gve_reg_xdp_info(struct gve_priv *priv, struct net_device *dev) 1249 { 1250 struct napi_struct *napi; 1251 struct gve_rx_ring *rx; 1252 int err = 0; 1253 int i, j; 1254 u32 tx_qid; 1255 1256 if (!priv->num_xdp_queues) 1257 return 0; 1258 1259 for (i = 0; i < priv->rx_cfg.num_queues; i++) { 1260 rx = &priv->rx[i]; 1261 napi = &priv->ntfy_blocks[rx->ntfy_id].napi; 1262 1263 err = xdp_rxq_info_reg(&rx->xdp_rxq, dev, i, 1264 napi->napi_id); 1265 if (err) 1266 goto err; 1267 err = xdp_rxq_info_reg_mem_model(&rx->xdp_rxq, 1268 MEM_TYPE_PAGE_SHARED, NULL); 1269 if (err) 1270 goto err; 1271 rx->xsk_pool = xsk_get_pool_from_qid(dev, i); 1272 if (rx->xsk_pool) { 1273 err = xdp_rxq_info_reg(&rx->xsk_rxq, dev, i, 1274 napi->napi_id); 1275 if (err) 1276 goto err; 1277 err = xdp_rxq_info_reg_mem_model(&rx->xsk_rxq, 1278 MEM_TYPE_XSK_BUFF_POOL, NULL); 1279 if (err) 1280 goto err; 1281 xsk_pool_set_rxq_info(rx->xsk_pool, 1282 &rx->xsk_rxq); 1283 } 1284 } 1285 1286 for (i = 0; i < priv->num_xdp_queues; i++) { 1287 tx_qid = gve_xdp_tx_queue_id(priv, i); 1288 priv->tx[tx_qid].xsk_pool = xsk_get_pool_from_qid(dev, i); 1289 } 1290 return 0; 1291 1292 err: 1293 for (j = i; j >= 0; j--) { 1294 rx = &priv->rx[j]; 1295 if (xdp_rxq_info_is_reg(&rx->xdp_rxq)) 1296 xdp_rxq_info_unreg(&rx->xdp_rxq); 1297 if (xdp_rxq_info_is_reg(&rx->xsk_rxq)) 1298 xdp_rxq_info_unreg(&rx->xsk_rxq); 1299 } 1300 return err; 1301 } 1302 1303 static void gve_unreg_xdp_info(struct gve_priv *priv) 1304 { 1305 int i, tx_qid; 1306 1307 if (!priv->num_xdp_queues) 1308 return; 1309 1310 for (i = 0; i < priv->rx_cfg.num_queues; i++) { 1311 struct gve_rx_ring *rx = &priv->rx[i]; 1312 1313 xdp_rxq_info_unreg(&rx->xdp_rxq); 1314 if (rx->xsk_pool) { 1315 xdp_rxq_info_unreg(&rx->xsk_rxq); 1316 rx->xsk_pool = NULL; 1317 } 1318 } 1319 1320 for (i = 0; i < priv->num_xdp_queues; i++) { 1321 tx_qid = gve_xdp_tx_queue_id(priv, i); 1322 priv->tx[tx_qid].xsk_pool = NULL; 1323 } 1324 } 1325 1326 static void gve_drain_page_cache(struct gve_priv *priv) 1327 { 1328 int i; 1329 1330 for (i = 0; i < priv->rx_cfg.num_queues; i++) 1331 page_frag_cache_drain(&priv->rx[i].page_cache); 1332 } 1333 1334 static void gve_rx_get_curr_alloc_cfg(struct gve_priv *priv, 1335 struct gve_rx_alloc_rings_cfg *cfg) 1336 { 1337 cfg->qcfg = &priv->rx_cfg; 1338 cfg->qcfg_tx = &priv->tx_cfg; 1339 cfg->raw_addressing = !gve_is_qpl(priv); 1340 cfg->enable_header_split = priv->header_split_enabled; 1341 cfg->ring_size = priv->rx_desc_cnt; 1342 cfg->packet_buffer_size = gve_is_gqi(priv) ? 1343 GVE_DEFAULT_RX_BUFFER_SIZE : 1344 priv->data_buffer_size_dqo; 1345 cfg->rx = priv->rx; 1346 } 1347 1348 void gve_get_curr_alloc_cfgs(struct gve_priv *priv, 1349 struct gve_tx_alloc_rings_cfg *tx_alloc_cfg, 1350 struct gve_rx_alloc_rings_cfg *rx_alloc_cfg) 1351 { 1352 gve_tx_get_curr_alloc_cfg(priv, tx_alloc_cfg); 1353 gve_rx_get_curr_alloc_cfg(priv, rx_alloc_cfg); 1354 } 1355 1356 static void gve_rx_start_ring(struct gve_priv *priv, int i) 1357 { 1358 if (gve_is_gqi(priv)) 1359 gve_rx_start_ring_gqi(priv, i); 1360 else 1361 gve_rx_start_ring_dqo(priv, i); 1362 } 1363 1364 static void gve_rx_start_rings(struct gve_priv *priv, int num_rings) 1365 { 1366 int i; 1367 1368 for (i = 0; i < num_rings; i++) 1369 gve_rx_start_ring(priv, i); 1370 } 1371 1372 static void gve_rx_stop_ring(struct gve_priv *priv, int i) 1373 { 1374 if (gve_is_gqi(priv)) 1375 gve_rx_stop_ring_gqi(priv, i); 1376 else 1377 gve_rx_stop_ring_dqo(priv, i); 1378 } 1379 1380 static void gve_rx_stop_rings(struct gve_priv *priv, int num_rings) 1381 { 1382 int i; 1383 1384 if (!priv->rx) 1385 return; 1386 1387 for (i = 0; i < num_rings; i++) 1388 gve_rx_stop_ring(priv, i); 1389 } 1390 1391 static void gve_queues_mem_remove(struct gve_priv *priv) 1392 { 1393 struct gve_tx_alloc_rings_cfg tx_alloc_cfg = {0}; 1394 struct gve_rx_alloc_rings_cfg rx_alloc_cfg = {0}; 1395 1396 gve_get_curr_alloc_cfgs(priv, &tx_alloc_cfg, &rx_alloc_cfg); 1397 gve_queues_mem_free(priv, &tx_alloc_cfg, &rx_alloc_cfg); 1398 priv->tx = NULL; 1399 priv->rx = NULL; 1400 } 1401 1402 /* The passed-in queue memory is stored into priv and the queues are made live. 1403 * No memory is allocated. Passed-in memory is freed on errors. 1404 */ 1405 static int gve_queues_start(struct gve_priv *priv, 1406 struct gve_tx_alloc_rings_cfg *tx_alloc_cfg, 1407 struct gve_rx_alloc_rings_cfg *rx_alloc_cfg) 1408 { 1409 struct net_device *dev = priv->dev; 1410 int err; 1411 1412 /* Record new resources into priv */ 1413 priv->tx = tx_alloc_cfg->tx; 1414 priv->rx = rx_alloc_cfg->rx; 1415 1416 /* Record new configs into priv */ 1417 priv->tx_cfg = *tx_alloc_cfg->qcfg; 1418 priv->rx_cfg = *rx_alloc_cfg->qcfg; 1419 priv->tx_desc_cnt = tx_alloc_cfg->ring_size; 1420 priv->rx_desc_cnt = rx_alloc_cfg->ring_size; 1421 1422 if (priv->xdp_prog) 1423 priv->num_xdp_queues = priv->rx_cfg.num_queues; 1424 else 1425 priv->num_xdp_queues = 0; 1426 1427 gve_tx_start_rings(priv, 0, tx_alloc_cfg->num_rings); 1428 gve_rx_start_rings(priv, rx_alloc_cfg->qcfg->num_queues); 1429 gve_init_sync_stats(priv); 1430 1431 err = netif_set_real_num_tx_queues(dev, priv->tx_cfg.num_queues); 1432 if (err) 1433 goto stop_and_free_rings; 1434 err = netif_set_real_num_rx_queues(dev, priv->rx_cfg.num_queues); 1435 if (err) 1436 goto stop_and_free_rings; 1437 1438 err = gve_reg_xdp_info(priv, dev); 1439 if (err) 1440 goto stop_and_free_rings; 1441 1442 if (rx_alloc_cfg->reset_rss) { 1443 err = gve_init_rss_config(priv, priv->rx_cfg.num_queues); 1444 if (err) 1445 goto reset; 1446 } 1447 1448 err = gve_register_qpls(priv); 1449 if (err) 1450 goto reset; 1451 1452 priv->header_split_enabled = rx_alloc_cfg->enable_header_split; 1453 priv->data_buffer_size_dqo = rx_alloc_cfg->packet_buffer_size; 1454 1455 err = gve_create_rings(priv); 1456 if (err) 1457 goto reset; 1458 1459 gve_set_device_rings_ok(priv); 1460 1461 if (gve_get_report_stats(priv)) 1462 mod_timer(&priv->stats_report_timer, 1463 round_jiffies(jiffies + 1464 msecs_to_jiffies(priv->stats_report_timer_period))); 1465 1466 gve_turnup(priv); 1467 queue_work(priv->gve_wq, &priv->service_task); 1468 priv->interface_up_cnt++; 1469 return 0; 1470 1471 reset: 1472 if (gve_get_reset_in_progress(priv)) 1473 goto stop_and_free_rings; 1474 gve_reset_and_teardown(priv, true); 1475 /* if this fails there is nothing we can do so just ignore the return */ 1476 gve_reset_recovery(priv, false); 1477 /* return the original error */ 1478 return err; 1479 stop_and_free_rings: 1480 gve_tx_stop_rings(priv, 0, gve_num_tx_queues(priv)); 1481 gve_rx_stop_rings(priv, priv->rx_cfg.num_queues); 1482 gve_queues_mem_remove(priv); 1483 return err; 1484 } 1485 1486 static int gve_open(struct net_device *dev) 1487 { 1488 struct gve_tx_alloc_rings_cfg tx_alloc_cfg = {0}; 1489 struct gve_rx_alloc_rings_cfg rx_alloc_cfg = {0}; 1490 struct gve_priv *priv = netdev_priv(dev); 1491 int err; 1492 1493 gve_get_curr_alloc_cfgs(priv, &tx_alloc_cfg, &rx_alloc_cfg); 1494 1495 err = gve_queues_mem_alloc(priv, &tx_alloc_cfg, &rx_alloc_cfg); 1496 if (err) 1497 return err; 1498 1499 /* No need to free on error: ownership of resources is lost after 1500 * calling gve_queues_start. 1501 */ 1502 err = gve_queues_start(priv, &tx_alloc_cfg, &rx_alloc_cfg); 1503 if (err) 1504 return err; 1505 1506 return 0; 1507 } 1508 1509 static int gve_queues_stop(struct gve_priv *priv) 1510 { 1511 int err; 1512 1513 netif_carrier_off(priv->dev); 1514 if (gve_get_device_rings_ok(priv)) { 1515 gve_turndown(priv); 1516 gve_drain_page_cache(priv); 1517 err = gve_destroy_rings(priv); 1518 if (err) 1519 goto err; 1520 err = gve_unregister_qpls(priv); 1521 if (err) 1522 goto err; 1523 gve_clear_device_rings_ok(priv); 1524 } 1525 del_timer_sync(&priv->stats_report_timer); 1526 1527 gve_unreg_xdp_info(priv); 1528 1529 gve_tx_stop_rings(priv, 0, gve_num_tx_queues(priv)); 1530 gve_rx_stop_rings(priv, priv->rx_cfg.num_queues); 1531 1532 priv->interface_down_cnt++; 1533 return 0; 1534 1535 err: 1536 /* This must have been called from a reset due to the rtnl lock 1537 * so just return at this point. 1538 */ 1539 if (gve_get_reset_in_progress(priv)) 1540 return err; 1541 /* Otherwise reset before returning */ 1542 gve_reset_and_teardown(priv, true); 1543 return gve_reset_recovery(priv, false); 1544 } 1545 1546 static int gve_close(struct net_device *dev) 1547 { 1548 struct gve_priv *priv = netdev_priv(dev); 1549 int err; 1550 1551 err = gve_queues_stop(priv); 1552 if (err) 1553 return err; 1554 1555 gve_queues_mem_remove(priv); 1556 return 0; 1557 } 1558 1559 static int gve_remove_xdp_queues(struct gve_priv *priv) 1560 { 1561 int err; 1562 1563 err = gve_destroy_xdp_rings(priv); 1564 if (err) 1565 return err; 1566 1567 err = gve_unregister_xdp_qpls(priv); 1568 if (err) 1569 return err; 1570 1571 gve_unreg_xdp_info(priv); 1572 gve_free_xdp_rings(priv); 1573 1574 priv->num_xdp_queues = 0; 1575 return 0; 1576 } 1577 1578 static int gve_add_xdp_queues(struct gve_priv *priv) 1579 { 1580 int err; 1581 1582 priv->num_xdp_queues = priv->rx_cfg.num_queues; 1583 1584 err = gve_alloc_xdp_rings(priv); 1585 if (err) 1586 goto err; 1587 1588 err = gve_reg_xdp_info(priv, priv->dev); 1589 if (err) 1590 goto free_xdp_rings; 1591 1592 err = gve_register_xdp_qpls(priv); 1593 if (err) 1594 goto free_xdp_rings; 1595 1596 err = gve_create_xdp_rings(priv); 1597 if (err) 1598 goto free_xdp_rings; 1599 1600 return 0; 1601 1602 free_xdp_rings: 1603 gve_free_xdp_rings(priv); 1604 err: 1605 priv->num_xdp_queues = 0; 1606 return err; 1607 } 1608 1609 static void gve_handle_link_status(struct gve_priv *priv, bool link_status) 1610 { 1611 if (!gve_get_napi_enabled(priv)) 1612 return; 1613 1614 if (link_status == netif_carrier_ok(priv->dev)) 1615 return; 1616 1617 if (link_status) { 1618 netdev_info(priv->dev, "Device link is up.\n"); 1619 netif_carrier_on(priv->dev); 1620 } else { 1621 netdev_info(priv->dev, "Device link is down.\n"); 1622 netif_carrier_off(priv->dev); 1623 } 1624 } 1625 1626 static int gve_set_xdp(struct gve_priv *priv, struct bpf_prog *prog, 1627 struct netlink_ext_ack *extack) 1628 { 1629 struct bpf_prog *old_prog; 1630 int err = 0; 1631 u32 status; 1632 1633 old_prog = READ_ONCE(priv->xdp_prog); 1634 if (!netif_running(priv->dev)) { 1635 WRITE_ONCE(priv->xdp_prog, prog); 1636 if (old_prog) 1637 bpf_prog_put(old_prog); 1638 return 0; 1639 } 1640 1641 gve_turndown(priv); 1642 if (!old_prog && prog) { 1643 // Allocate XDP TX queues if an XDP program is 1644 // being installed 1645 err = gve_add_xdp_queues(priv); 1646 if (err) 1647 goto out; 1648 } else if (old_prog && !prog) { 1649 // Remove XDP TX queues if an XDP program is 1650 // being uninstalled 1651 err = gve_remove_xdp_queues(priv); 1652 if (err) 1653 goto out; 1654 } 1655 WRITE_ONCE(priv->xdp_prog, prog); 1656 if (old_prog) 1657 bpf_prog_put(old_prog); 1658 1659 out: 1660 gve_turnup(priv); 1661 status = ioread32be(&priv->reg_bar0->device_status); 1662 gve_handle_link_status(priv, GVE_DEVICE_STATUS_LINK_STATUS_MASK & status); 1663 return err; 1664 } 1665 1666 static int gve_xsk_pool_enable(struct net_device *dev, 1667 struct xsk_buff_pool *pool, 1668 u16 qid) 1669 { 1670 struct gve_priv *priv = netdev_priv(dev); 1671 struct napi_struct *napi; 1672 struct gve_rx_ring *rx; 1673 int tx_qid; 1674 int err; 1675 1676 if (qid >= priv->rx_cfg.num_queues) { 1677 dev_err(&priv->pdev->dev, "xsk pool invalid qid %d", qid); 1678 return -EINVAL; 1679 } 1680 if (xsk_pool_get_rx_frame_size(pool) < 1681 priv->dev->max_mtu + sizeof(struct ethhdr)) { 1682 dev_err(&priv->pdev->dev, "xsk pool frame_len too small"); 1683 return -EINVAL; 1684 } 1685 1686 err = xsk_pool_dma_map(pool, &priv->pdev->dev, 1687 DMA_ATTR_SKIP_CPU_SYNC | DMA_ATTR_WEAK_ORDERING); 1688 if (err) 1689 return err; 1690 1691 /* If XDP prog is not installed or interface is down, return. */ 1692 if (!priv->xdp_prog || !netif_running(dev)) 1693 return 0; 1694 1695 rx = &priv->rx[qid]; 1696 napi = &priv->ntfy_blocks[rx->ntfy_id].napi; 1697 err = xdp_rxq_info_reg(&rx->xsk_rxq, dev, qid, napi->napi_id); 1698 if (err) 1699 goto err; 1700 1701 err = xdp_rxq_info_reg_mem_model(&rx->xsk_rxq, 1702 MEM_TYPE_XSK_BUFF_POOL, NULL); 1703 if (err) 1704 goto err; 1705 1706 xsk_pool_set_rxq_info(pool, &rx->xsk_rxq); 1707 rx->xsk_pool = pool; 1708 1709 tx_qid = gve_xdp_tx_queue_id(priv, qid); 1710 priv->tx[tx_qid].xsk_pool = pool; 1711 1712 return 0; 1713 err: 1714 if (xdp_rxq_info_is_reg(&rx->xsk_rxq)) 1715 xdp_rxq_info_unreg(&rx->xsk_rxq); 1716 1717 xsk_pool_dma_unmap(pool, 1718 DMA_ATTR_SKIP_CPU_SYNC | DMA_ATTR_WEAK_ORDERING); 1719 return err; 1720 } 1721 1722 static int gve_xsk_pool_disable(struct net_device *dev, 1723 u16 qid) 1724 { 1725 struct gve_priv *priv = netdev_priv(dev); 1726 struct napi_struct *napi_rx; 1727 struct napi_struct *napi_tx; 1728 struct xsk_buff_pool *pool; 1729 int tx_qid; 1730 1731 pool = xsk_get_pool_from_qid(dev, qid); 1732 if (!pool) 1733 return -EINVAL; 1734 if (qid >= priv->rx_cfg.num_queues) 1735 return -EINVAL; 1736 1737 /* If XDP prog is not installed or interface is down, unmap DMA and 1738 * return. 1739 */ 1740 if (!priv->xdp_prog || !netif_running(dev)) 1741 goto done; 1742 1743 napi_rx = &priv->ntfy_blocks[priv->rx[qid].ntfy_id].napi; 1744 napi_disable(napi_rx); /* make sure current rx poll is done */ 1745 1746 tx_qid = gve_xdp_tx_queue_id(priv, qid); 1747 napi_tx = &priv->ntfy_blocks[priv->tx[tx_qid].ntfy_id].napi; 1748 napi_disable(napi_tx); /* make sure current tx poll is done */ 1749 1750 priv->rx[qid].xsk_pool = NULL; 1751 xdp_rxq_info_unreg(&priv->rx[qid].xsk_rxq); 1752 priv->tx[tx_qid].xsk_pool = NULL; 1753 smp_mb(); /* Make sure it is visible to the workers on datapath */ 1754 1755 napi_enable(napi_rx); 1756 if (gve_rx_work_pending(&priv->rx[qid])) 1757 napi_schedule(napi_rx); 1758 1759 napi_enable(napi_tx); 1760 if (gve_tx_clean_pending(priv, &priv->tx[tx_qid])) 1761 napi_schedule(napi_tx); 1762 1763 done: 1764 xsk_pool_dma_unmap(pool, 1765 DMA_ATTR_SKIP_CPU_SYNC | DMA_ATTR_WEAK_ORDERING); 1766 return 0; 1767 } 1768 1769 static int gve_xsk_wakeup(struct net_device *dev, u32 queue_id, u32 flags) 1770 { 1771 struct gve_priv *priv = netdev_priv(dev); 1772 struct napi_struct *napi; 1773 1774 if (!gve_get_napi_enabled(priv)) 1775 return -ENETDOWN; 1776 1777 if (queue_id >= priv->rx_cfg.num_queues || !priv->xdp_prog) 1778 return -EINVAL; 1779 1780 napi = &priv->ntfy_blocks[gve_rx_idx_to_ntfy(priv, queue_id)].napi; 1781 if (!napi_if_scheduled_mark_missed(napi)) { 1782 /* Call local_bh_enable to trigger SoftIRQ processing */ 1783 local_bh_disable(); 1784 napi_schedule(napi); 1785 local_bh_enable(); 1786 } 1787 1788 return 0; 1789 } 1790 1791 static int verify_xdp_configuration(struct net_device *dev) 1792 { 1793 struct gve_priv *priv = netdev_priv(dev); 1794 1795 if (dev->features & NETIF_F_LRO) { 1796 netdev_warn(dev, "XDP is not supported when LRO is on.\n"); 1797 return -EOPNOTSUPP; 1798 } 1799 1800 if (priv->queue_format != GVE_GQI_QPL_FORMAT) { 1801 netdev_warn(dev, "XDP is not supported in mode %d.\n", 1802 priv->queue_format); 1803 return -EOPNOTSUPP; 1804 } 1805 1806 if (dev->mtu > GVE_DEFAULT_RX_BUFFER_SIZE - sizeof(struct ethhdr) - GVE_RX_PAD) { 1807 netdev_warn(dev, "XDP is not supported for mtu %d.\n", 1808 dev->mtu); 1809 return -EOPNOTSUPP; 1810 } 1811 1812 if (priv->rx_cfg.num_queues != priv->tx_cfg.num_queues || 1813 (2 * priv->tx_cfg.num_queues > priv->tx_cfg.max_queues)) { 1814 netdev_warn(dev, "XDP load failed: The number of configured RX queues %d should be equal to the number of configured TX queues %d and the number of configured RX/TX queues should be less than or equal to half the maximum number of RX/TX queues %d", 1815 priv->rx_cfg.num_queues, 1816 priv->tx_cfg.num_queues, 1817 priv->tx_cfg.max_queues); 1818 return -EINVAL; 1819 } 1820 return 0; 1821 } 1822 1823 static int gve_xdp(struct net_device *dev, struct netdev_bpf *xdp) 1824 { 1825 struct gve_priv *priv = netdev_priv(dev); 1826 int err; 1827 1828 err = verify_xdp_configuration(dev); 1829 if (err) 1830 return err; 1831 switch (xdp->command) { 1832 case XDP_SETUP_PROG: 1833 return gve_set_xdp(priv, xdp->prog, xdp->extack); 1834 case XDP_SETUP_XSK_POOL: 1835 if (xdp->xsk.pool) 1836 return gve_xsk_pool_enable(dev, xdp->xsk.pool, xdp->xsk.queue_id); 1837 else 1838 return gve_xsk_pool_disable(dev, xdp->xsk.queue_id); 1839 default: 1840 return -EINVAL; 1841 } 1842 } 1843 1844 int gve_init_rss_config(struct gve_priv *priv, u16 num_queues) 1845 { 1846 struct gve_rss_config *rss_config = &priv->rss_config; 1847 struct ethtool_rxfh_param rxfh = {0}; 1848 u16 i; 1849 1850 if (!priv->cache_rss_config) 1851 return 0; 1852 1853 for (i = 0; i < priv->rss_lut_size; i++) 1854 rss_config->hash_lut[i] = 1855 ethtool_rxfh_indir_default(i, num_queues); 1856 1857 netdev_rss_key_fill(rss_config->hash_key, priv->rss_key_size); 1858 1859 rxfh.hfunc = ETH_RSS_HASH_TOP; 1860 1861 return gve_adminq_configure_rss(priv, &rxfh); 1862 } 1863 1864 int gve_flow_rules_reset(struct gve_priv *priv) 1865 { 1866 if (!priv->max_flow_rules) 1867 return 0; 1868 1869 return gve_adminq_reset_flow_rules(priv); 1870 } 1871 1872 int gve_adjust_config(struct gve_priv *priv, 1873 struct gve_tx_alloc_rings_cfg *tx_alloc_cfg, 1874 struct gve_rx_alloc_rings_cfg *rx_alloc_cfg) 1875 { 1876 int err; 1877 1878 /* Allocate resources for the new confiugration */ 1879 err = gve_queues_mem_alloc(priv, tx_alloc_cfg, rx_alloc_cfg); 1880 if (err) { 1881 netif_err(priv, drv, priv->dev, 1882 "Adjust config failed to alloc new queues"); 1883 return err; 1884 } 1885 1886 /* Teardown the device and free existing resources */ 1887 err = gve_close(priv->dev); 1888 if (err) { 1889 netif_err(priv, drv, priv->dev, 1890 "Adjust config failed to close old queues"); 1891 gve_queues_mem_free(priv, tx_alloc_cfg, rx_alloc_cfg); 1892 return err; 1893 } 1894 1895 /* Bring the device back up again with the new resources. */ 1896 err = gve_queues_start(priv, tx_alloc_cfg, rx_alloc_cfg); 1897 if (err) { 1898 netif_err(priv, drv, priv->dev, 1899 "Adjust config failed to start new queues, !!! DISABLING ALL QUEUES !!!\n"); 1900 /* No need to free on error: ownership of resources is lost after 1901 * calling gve_queues_start. 1902 */ 1903 gve_turndown(priv); 1904 return err; 1905 } 1906 1907 return 0; 1908 } 1909 1910 int gve_adjust_queues(struct gve_priv *priv, 1911 struct gve_queue_config new_rx_config, 1912 struct gve_queue_config new_tx_config, 1913 bool reset_rss) 1914 { 1915 struct gve_tx_alloc_rings_cfg tx_alloc_cfg = {0}; 1916 struct gve_rx_alloc_rings_cfg rx_alloc_cfg = {0}; 1917 int num_xdp_queues; 1918 int err; 1919 1920 gve_get_curr_alloc_cfgs(priv, &tx_alloc_cfg, &rx_alloc_cfg); 1921 1922 /* Relay the new config from ethtool */ 1923 tx_alloc_cfg.qcfg = &new_tx_config; 1924 rx_alloc_cfg.qcfg_tx = &new_tx_config; 1925 rx_alloc_cfg.qcfg = &new_rx_config; 1926 rx_alloc_cfg.reset_rss = reset_rss; 1927 tx_alloc_cfg.num_rings = new_tx_config.num_queues; 1928 1929 /* Add dedicated XDP TX queues if enabled. */ 1930 num_xdp_queues = priv->xdp_prog ? new_rx_config.num_queues : 0; 1931 tx_alloc_cfg.num_rings += num_xdp_queues; 1932 1933 if (netif_running(priv->dev)) { 1934 err = gve_adjust_config(priv, &tx_alloc_cfg, &rx_alloc_cfg); 1935 return err; 1936 } 1937 /* Set the config for the next up. */ 1938 if (reset_rss) { 1939 err = gve_init_rss_config(priv, new_rx_config.num_queues); 1940 if (err) 1941 return err; 1942 } 1943 priv->tx_cfg = new_tx_config; 1944 priv->rx_cfg = new_rx_config; 1945 1946 return 0; 1947 } 1948 1949 static void gve_turndown(struct gve_priv *priv) 1950 { 1951 int idx; 1952 1953 if (netif_carrier_ok(priv->dev)) 1954 netif_carrier_off(priv->dev); 1955 1956 if (!gve_get_napi_enabled(priv)) 1957 return; 1958 1959 /* Disable napi to prevent more work from coming in */ 1960 for (idx = 0; idx < gve_num_tx_queues(priv); idx++) { 1961 int ntfy_idx = gve_tx_idx_to_ntfy(priv, idx); 1962 struct gve_notify_block *block = &priv->ntfy_blocks[ntfy_idx]; 1963 1964 if (!gve_tx_was_added_to_block(priv, idx)) 1965 continue; 1966 1967 if (idx < priv->tx_cfg.num_queues) 1968 netif_queue_set_napi(priv->dev, idx, 1969 NETDEV_QUEUE_TYPE_TX, NULL); 1970 1971 napi_disable_locked(&block->napi); 1972 } 1973 for (idx = 0; idx < priv->rx_cfg.num_queues; idx++) { 1974 int ntfy_idx = gve_rx_idx_to_ntfy(priv, idx); 1975 struct gve_notify_block *block = &priv->ntfy_blocks[ntfy_idx]; 1976 1977 if (!gve_rx_was_added_to_block(priv, idx)) 1978 continue; 1979 1980 netif_queue_set_napi(priv->dev, idx, NETDEV_QUEUE_TYPE_RX, 1981 NULL); 1982 napi_disable_locked(&block->napi); 1983 } 1984 1985 /* Stop tx queues */ 1986 netif_tx_disable(priv->dev); 1987 1988 xdp_features_clear_redirect_target(priv->dev); 1989 1990 gve_clear_napi_enabled(priv); 1991 gve_clear_report_stats(priv); 1992 1993 /* Make sure that all traffic is finished processing. */ 1994 synchronize_net(); 1995 } 1996 1997 static void gve_turnup(struct gve_priv *priv) 1998 { 1999 int idx; 2000 2001 /* Start the tx queues */ 2002 netif_tx_start_all_queues(priv->dev); 2003 2004 /* Enable napi and unmask interrupts for all queues */ 2005 for (idx = 0; idx < gve_num_tx_queues(priv); idx++) { 2006 int ntfy_idx = gve_tx_idx_to_ntfy(priv, idx); 2007 struct gve_notify_block *block = &priv->ntfy_blocks[ntfy_idx]; 2008 2009 if (!gve_tx_was_added_to_block(priv, idx)) 2010 continue; 2011 2012 napi_enable_locked(&block->napi); 2013 2014 if (idx < priv->tx_cfg.num_queues) 2015 netif_queue_set_napi(priv->dev, idx, 2016 NETDEV_QUEUE_TYPE_TX, 2017 &block->napi); 2018 2019 if (gve_is_gqi(priv)) { 2020 iowrite32be(0, gve_irq_doorbell(priv, block)); 2021 } else { 2022 gve_set_itr_coalesce_usecs_dqo(priv, block, 2023 priv->tx_coalesce_usecs); 2024 } 2025 2026 /* Any descs written by the NIC before this barrier will be 2027 * handled by the one-off napi schedule below. Whereas any 2028 * descs after the barrier will generate interrupts. 2029 */ 2030 mb(); 2031 napi_schedule(&block->napi); 2032 } 2033 for (idx = 0; idx < priv->rx_cfg.num_queues; idx++) { 2034 int ntfy_idx = gve_rx_idx_to_ntfy(priv, idx); 2035 struct gve_notify_block *block = &priv->ntfy_blocks[ntfy_idx]; 2036 2037 if (!gve_rx_was_added_to_block(priv, idx)) 2038 continue; 2039 2040 napi_enable_locked(&block->napi); 2041 netif_queue_set_napi(priv->dev, idx, NETDEV_QUEUE_TYPE_RX, 2042 &block->napi); 2043 2044 if (gve_is_gqi(priv)) { 2045 iowrite32be(0, gve_irq_doorbell(priv, block)); 2046 } else { 2047 gve_set_itr_coalesce_usecs_dqo(priv, block, 2048 priv->rx_coalesce_usecs); 2049 } 2050 2051 /* Any descs written by the NIC before this barrier will be 2052 * handled by the one-off napi schedule below. Whereas any 2053 * descs after the barrier will generate interrupts. 2054 */ 2055 mb(); 2056 napi_schedule(&block->napi); 2057 } 2058 2059 if (priv->num_xdp_queues && gve_supports_xdp_xmit(priv)) 2060 xdp_features_set_redirect_target(priv->dev, false); 2061 2062 gve_set_napi_enabled(priv); 2063 } 2064 2065 static void gve_turnup_and_check_status(struct gve_priv *priv) 2066 { 2067 u32 status; 2068 2069 gve_turnup(priv); 2070 status = ioread32be(&priv->reg_bar0->device_status); 2071 gve_handle_link_status(priv, GVE_DEVICE_STATUS_LINK_STATUS_MASK & status); 2072 } 2073 2074 static void gve_tx_timeout(struct net_device *dev, unsigned int txqueue) 2075 { 2076 struct gve_notify_block *block; 2077 struct gve_tx_ring *tx = NULL; 2078 struct gve_priv *priv; 2079 u32 last_nic_done; 2080 u32 current_time; 2081 u32 ntfy_idx; 2082 2083 netdev_info(dev, "Timeout on tx queue, %d", txqueue); 2084 priv = netdev_priv(dev); 2085 if (txqueue > priv->tx_cfg.num_queues) 2086 goto reset; 2087 2088 ntfy_idx = gve_tx_idx_to_ntfy(priv, txqueue); 2089 if (ntfy_idx >= priv->num_ntfy_blks) 2090 goto reset; 2091 2092 block = &priv->ntfy_blocks[ntfy_idx]; 2093 tx = block->tx; 2094 2095 current_time = jiffies_to_msecs(jiffies); 2096 if (tx->last_kick_msec + MIN_TX_TIMEOUT_GAP > current_time) 2097 goto reset; 2098 2099 /* Check to see if there are missed completions, which will allow us to 2100 * kick the queue. 2101 */ 2102 last_nic_done = gve_tx_load_event_counter(priv, tx); 2103 if (last_nic_done - tx->done) { 2104 netdev_info(dev, "Kicking queue %d", txqueue); 2105 iowrite32be(GVE_IRQ_MASK, gve_irq_doorbell(priv, block)); 2106 napi_schedule(&block->napi); 2107 tx->last_kick_msec = current_time; 2108 goto out; 2109 } // Else reset. 2110 2111 reset: 2112 gve_schedule_reset(priv); 2113 2114 out: 2115 if (tx) 2116 tx->queue_timeout++; 2117 priv->tx_timeo_cnt++; 2118 } 2119 2120 u16 gve_get_pkt_buf_size(const struct gve_priv *priv, bool enable_hsplit) 2121 { 2122 if (enable_hsplit && priv->max_rx_buffer_size >= GVE_MAX_RX_BUFFER_SIZE) 2123 return GVE_MAX_RX_BUFFER_SIZE; 2124 else 2125 return GVE_DEFAULT_RX_BUFFER_SIZE; 2126 } 2127 2128 /* header-split is not supported on non-DQO_RDA yet even if device advertises it */ 2129 bool gve_header_split_supported(const struct gve_priv *priv) 2130 { 2131 return priv->header_buf_size && priv->queue_format == GVE_DQO_RDA_FORMAT; 2132 } 2133 2134 int gve_set_hsplit_config(struct gve_priv *priv, u8 tcp_data_split) 2135 { 2136 struct gve_tx_alloc_rings_cfg tx_alloc_cfg = {0}; 2137 struct gve_rx_alloc_rings_cfg rx_alloc_cfg = {0}; 2138 bool enable_hdr_split; 2139 int err = 0; 2140 2141 if (tcp_data_split == ETHTOOL_TCP_DATA_SPLIT_UNKNOWN) 2142 return 0; 2143 2144 if (!gve_header_split_supported(priv)) { 2145 dev_err(&priv->pdev->dev, "Header-split not supported\n"); 2146 return -EOPNOTSUPP; 2147 } 2148 2149 if (tcp_data_split == ETHTOOL_TCP_DATA_SPLIT_ENABLED) 2150 enable_hdr_split = true; 2151 else 2152 enable_hdr_split = false; 2153 2154 if (enable_hdr_split == priv->header_split_enabled) 2155 return 0; 2156 2157 gve_get_curr_alloc_cfgs(priv, &tx_alloc_cfg, &rx_alloc_cfg); 2158 2159 rx_alloc_cfg.enable_header_split = enable_hdr_split; 2160 rx_alloc_cfg.packet_buffer_size = gve_get_pkt_buf_size(priv, enable_hdr_split); 2161 2162 if (netif_running(priv->dev)) 2163 err = gve_adjust_config(priv, &tx_alloc_cfg, &rx_alloc_cfg); 2164 return err; 2165 } 2166 2167 static int gve_set_features(struct net_device *netdev, 2168 netdev_features_t features) 2169 { 2170 const netdev_features_t orig_features = netdev->features; 2171 struct gve_tx_alloc_rings_cfg tx_alloc_cfg = {0}; 2172 struct gve_rx_alloc_rings_cfg rx_alloc_cfg = {0}; 2173 struct gve_priv *priv = netdev_priv(netdev); 2174 int err; 2175 2176 gve_get_curr_alloc_cfgs(priv, &tx_alloc_cfg, &rx_alloc_cfg); 2177 2178 if ((netdev->features & NETIF_F_LRO) != (features & NETIF_F_LRO)) { 2179 netdev->features ^= NETIF_F_LRO; 2180 if (netif_running(netdev)) { 2181 err = gve_adjust_config(priv, &tx_alloc_cfg, &rx_alloc_cfg); 2182 if (err) 2183 goto revert_features; 2184 } 2185 } 2186 if ((netdev->features & NETIF_F_NTUPLE) && !(features & NETIF_F_NTUPLE)) { 2187 err = gve_flow_rules_reset(priv); 2188 if (err) 2189 goto revert_features; 2190 } 2191 2192 return 0; 2193 2194 revert_features: 2195 netdev->features = orig_features; 2196 return err; 2197 } 2198 2199 static const struct net_device_ops gve_netdev_ops = { 2200 .ndo_start_xmit = gve_start_xmit, 2201 .ndo_features_check = gve_features_check, 2202 .ndo_open = gve_open, 2203 .ndo_stop = gve_close, 2204 .ndo_get_stats64 = gve_get_stats, 2205 .ndo_tx_timeout = gve_tx_timeout, 2206 .ndo_set_features = gve_set_features, 2207 .ndo_bpf = gve_xdp, 2208 .ndo_xdp_xmit = gve_xdp_xmit, 2209 .ndo_xsk_wakeup = gve_xsk_wakeup, 2210 }; 2211 2212 static void gve_handle_status(struct gve_priv *priv, u32 status) 2213 { 2214 if (GVE_DEVICE_STATUS_RESET_MASK & status) { 2215 dev_info(&priv->pdev->dev, "Device requested reset.\n"); 2216 gve_set_do_reset(priv); 2217 } 2218 if (GVE_DEVICE_STATUS_REPORT_STATS_MASK & status) { 2219 priv->stats_report_trigger_cnt++; 2220 gve_set_do_report_stats(priv); 2221 } 2222 } 2223 2224 static void gve_handle_reset(struct gve_priv *priv) 2225 { 2226 /* A service task will be scheduled at the end of probe to catch any 2227 * resets that need to happen, and we don't want to reset until 2228 * probe is done. 2229 */ 2230 if (gve_get_probe_in_progress(priv)) 2231 return; 2232 2233 if (gve_get_do_reset(priv)) { 2234 rtnl_lock(); 2235 gve_reset(priv, false); 2236 rtnl_unlock(); 2237 } 2238 } 2239 2240 void gve_handle_report_stats(struct gve_priv *priv) 2241 { 2242 struct stats *stats = priv->stats_report->stats; 2243 int idx, stats_idx = 0; 2244 unsigned int start = 0; 2245 u64 tx_bytes; 2246 2247 if (!gve_get_report_stats(priv)) 2248 return; 2249 2250 be64_add_cpu(&priv->stats_report->written_count, 1); 2251 /* tx stats */ 2252 if (priv->tx) { 2253 for (idx = 0; idx < gve_num_tx_queues(priv); idx++) { 2254 u32 last_completion = 0; 2255 u32 tx_frames = 0; 2256 2257 /* DQO doesn't currently support these metrics. */ 2258 if (gve_is_gqi(priv)) { 2259 last_completion = priv->tx[idx].done; 2260 tx_frames = priv->tx[idx].req; 2261 } 2262 2263 do { 2264 start = u64_stats_fetch_begin(&priv->tx[idx].statss); 2265 tx_bytes = priv->tx[idx].bytes_done; 2266 } while (u64_stats_fetch_retry(&priv->tx[idx].statss, start)); 2267 stats[stats_idx++] = (struct stats) { 2268 .stat_name = cpu_to_be32(TX_WAKE_CNT), 2269 .value = cpu_to_be64(priv->tx[idx].wake_queue), 2270 .queue_id = cpu_to_be32(idx), 2271 }; 2272 stats[stats_idx++] = (struct stats) { 2273 .stat_name = cpu_to_be32(TX_STOP_CNT), 2274 .value = cpu_to_be64(priv->tx[idx].stop_queue), 2275 .queue_id = cpu_to_be32(idx), 2276 }; 2277 stats[stats_idx++] = (struct stats) { 2278 .stat_name = cpu_to_be32(TX_FRAMES_SENT), 2279 .value = cpu_to_be64(tx_frames), 2280 .queue_id = cpu_to_be32(idx), 2281 }; 2282 stats[stats_idx++] = (struct stats) { 2283 .stat_name = cpu_to_be32(TX_BYTES_SENT), 2284 .value = cpu_to_be64(tx_bytes), 2285 .queue_id = cpu_to_be32(idx), 2286 }; 2287 stats[stats_idx++] = (struct stats) { 2288 .stat_name = cpu_to_be32(TX_LAST_COMPLETION_PROCESSED), 2289 .value = cpu_to_be64(last_completion), 2290 .queue_id = cpu_to_be32(idx), 2291 }; 2292 stats[stats_idx++] = (struct stats) { 2293 .stat_name = cpu_to_be32(TX_TIMEOUT_CNT), 2294 .value = cpu_to_be64(priv->tx[idx].queue_timeout), 2295 .queue_id = cpu_to_be32(idx), 2296 }; 2297 } 2298 } 2299 /* rx stats */ 2300 if (priv->rx) { 2301 for (idx = 0; idx < priv->rx_cfg.num_queues; idx++) { 2302 stats[stats_idx++] = (struct stats) { 2303 .stat_name = cpu_to_be32(RX_NEXT_EXPECTED_SEQUENCE), 2304 .value = cpu_to_be64(priv->rx[idx].desc.seqno), 2305 .queue_id = cpu_to_be32(idx), 2306 }; 2307 stats[stats_idx++] = (struct stats) { 2308 .stat_name = cpu_to_be32(RX_BUFFERS_POSTED), 2309 .value = cpu_to_be64(priv->rx[0].fill_cnt), 2310 .queue_id = cpu_to_be32(idx), 2311 }; 2312 } 2313 } 2314 } 2315 2316 /* Handle NIC status register changes, reset requests and report stats */ 2317 static void gve_service_task(struct work_struct *work) 2318 { 2319 struct gve_priv *priv = container_of(work, struct gve_priv, 2320 service_task); 2321 u32 status = ioread32be(&priv->reg_bar0->device_status); 2322 2323 gve_handle_status(priv, status); 2324 2325 gve_handle_reset(priv); 2326 gve_handle_link_status(priv, GVE_DEVICE_STATUS_LINK_STATUS_MASK & status); 2327 } 2328 2329 static void gve_set_netdev_xdp_features(struct gve_priv *priv) 2330 { 2331 xdp_features_t xdp_features; 2332 2333 if (priv->queue_format == GVE_GQI_QPL_FORMAT) { 2334 xdp_features = NETDEV_XDP_ACT_BASIC; 2335 xdp_features |= NETDEV_XDP_ACT_REDIRECT; 2336 xdp_features |= NETDEV_XDP_ACT_XSK_ZEROCOPY; 2337 } else { 2338 xdp_features = 0; 2339 } 2340 2341 xdp_set_features_flag(priv->dev, xdp_features); 2342 } 2343 2344 static int gve_init_priv(struct gve_priv *priv, bool skip_describe_device) 2345 { 2346 int num_ntfy; 2347 int err; 2348 2349 /* Set up the adminq */ 2350 err = gve_adminq_alloc(&priv->pdev->dev, priv); 2351 if (err) { 2352 dev_err(&priv->pdev->dev, 2353 "Failed to alloc admin queue: err=%d\n", err); 2354 return err; 2355 } 2356 2357 err = gve_verify_driver_compatibility(priv); 2358 if (err) { 2359 dev_err(&priv->pdev->dev, 2360 "Could not verify driver compatibility: err=%d\n", err); 2361 goto err; 2362 } 2363 2364 priv->num_registered_pages = 0; 2365 2366 if (skip_describe_device) 2367 goto setup_device; 2368 2369 priv->queue_format = GVE_QUEUE_FORMAT_UNSPECIFIED; 2370 /* Get the initial information we need from the device */ 2371 err = gve_adminq_describe_device(priv); 2372 if (err) { 2373 dev_err(&priv->pdev->dev, 2374 "Could not get device information: err=%d\n", err); 2375 goto err; 2376 } 2377 priv->dev->mtu = priv->dev->max_mtu; 2378 num_ntfy = pci_msix_vec_count(priv->pdev); 2379 if (num_ntfy <= 0) { 2380 dev_err(&priv->pdev->dev, 2381 "could not count MSI-x vectors: err=%d\n", num_ntfy); 2382 err = num_ntfy; 2383 goto err; 2384 } else if (num_ntfy < GVE_MIN_MSIX) { 2385 dev_err(&priv->pdev->dev, "gve needs at least %d MSI-x vectors, but only has %d\n", 2386 GVE_MIN_MSIX, num_ntfy); 2387 err = -EINVAL; 2388 goto err; 2389 } 2390 2391 /* Big TCP is only supported on DQ*/ 2392 if (!gve_is_gqi(priv)) 2393 netif_set_tso_max_size(priv->dev, GVE_DQO_TX_MAX); 2394 2395 priv->rx_copybreak = GVE_DEFAULT_RX_COPYBREAK; 2396 /* gvnic has one Notification Block per MSI-x vector, except for the 2397 * management vector 2398 */ 2399 priv->num_ntfy_blks = (num_ntfy - 1) & ~0x1; 2400 priv->mgmt_msix_idx = priv->num_ntfy_blks; 2401 2402 priv->tx_cfg.max_queues = 2403 min_t(int, priv->tx_cfg.max_queues, priv->num_ntfy_blks / 2); 2404 priv->rx_cfg.max_queues = 2405 min_t(int, priv->rx_cfg.max_queues, priv->num_ntfy_blks / 2); 2406 2407 priv->tx_cfg.num_queues = priv->tx_cfg.max_queues; 2408 priv->rx_cfg.num_queues = priv->rx_cfg.max_queues; 2409 if (priv->default_num_queues > 0) { 2410 priv->tx_cfg.num_queues = min_t(int, priv->default_num_queues, 2411 priv->tx_cfg.num_queues); 2412 priv->rx_cfg.num_queues = min_t(int, priv->default_num_queues, 2413 priv->rx_cfg.num_queues); 2414 } 2415 2416 dev_info(&priv->pdev->dev, "TX queues %d, RX queues %d\n", 2417 priv->tx_cfg.num_queues, priv->rx_cfg.num_queues); 2418 dev_info(&priv->pdev->dev, "Max TX queues %d, Max RX queues %d\n", 2419 priv->tx_cfg.max_queues, priv->rx_cfg.max_queues); 2420 2421 if (!gve_is_gqi(priv)) { 2422 priv->tx_coalesce_usecs = GVE_TX_IRQ_RATELIMIT_US_DQO; 2423 priv->rx_coalesce_usecs = GVE_RX_IRQ_RATELIMIT_US_DQO; 2424 } 2425 2426 setup_device: 2427 gve_set_netdev_xdp_features(priv); 2428 err = gve_setup_device_resources(priv); 2429 if (!err) 2430 return 0; 2431 err: 2432 gve_adminq_free(&priv->pdev->dev, priv); 2433 return err; 2434 } 2435 2436 static void gve_teardown_priv_resources(struct gve_priv *priv) 2437 { 2438 gve_teardown_device_resources(priv); 2439 gve_adminq_free(&priv->pdev->dev, priv); 2440 } 2441 2442 static void gve_trigger_reset(struct gve_priv *priv) 2443 { 2444 /* Reset the device by releasing the AQ */ 2445 gve_adminq_release(priv); 2446 } 2447 2448 static void gve_reset_and_teardown(struct gve_priv *priv, bool was_up) 2449 { 2450 gve_trigger_reset(priv); 2451 /* With the reset having already happened, close cannot fail */ 2452 if (was_up) 2453 gve_close(priv->dev); 2454 gve_teardown_priv_resources(priv); 2455 } 2456 2457 static int gve_reset_recovery(struct gve_priv *priv, bool was_up) 2458 { 2459 int err; 2460 2461 err = gve_init_priv(priv, true); 2462 if (err) 2463 goto err; 2464 if (was_up) { 2465 err = gve_open(priv->dev); 2466 if (err) 2467 goto err; 2468 } 2469 return 0; 2470 err: 2471 dev_err(&priv->pdev->dev, "Reset failed! !!! DISABLING ALL QUEUES !!!\n"); 2472 gve_turndown(priv); 2473 return err; 2474 } 2475 2476 int gve_reset(struct gve_priv *priv, bool attempt_teardown) 2477 { 2478 bool was_up = netif_running(priv->dev); 2479 int err; 2480 2481 dev_info(&priv->pdev->dev, "Performing reset\n"); 2482 gve_clear_do_reset(priv); 2483 gve_set_reset_in_progress(priv); 2484 /* If we aren't attempting to teardown normally, just go turndown and 2485 * reset right away. 2486 */ 2487 if (!attempt_teardown) { 2488 gve_turndown(priv); 2489 gve_reset_and_teardown(priv, was_up); 2490 } else { 2491 /* Otherwise attempt to close normally */ 2492 if (was_up) { 2493 err = gve_close(priv->dev); 2494 /* If that fails reset as we did above */ 2495 if (err) 2496 gve_reset_and_teardown(priv, was_up); 2497 } 2498 /* Clean up any remaining resources */ 2499 gve_teardown_priv_resources(priv); 2500 } 2501 2502 /* Set it all back up */ 2503 err = gve_reset_recovery(priv, was_up); 2504 gve_clear_reset_in_progress(priv); 2505 priv->reset_cnt++; 2506 priv->interface_up_cnt = 0; 2507 priv->interface_down_cnt = 0; 2508 priv->stats_report_trigger_cnt = 0; 2509 return err; 2510 } 2511 2512 static void gve_write_version(u8 __iomem *driver_version_register) 2513 { 2514 const char *c = gve_version_prefix; 2515 2516 while (*c) { 2517 writeb(*c, driver_version_register); 2518 c++; 2519 } 2520 2521 c = gve_version_str; 2522 while (*c) { 2523 writeb(*c, driver_version_register); 2524 c++; 2525 } 2526 writeb('\n', driver_version_register); 2527 } 2528 2529 static int gve_rx_queue_stop(struct net_device *dev, void *per_q_mem, int idx) 2530 { 2531 struct gve_priv *priv = netdev_priv(dev); 2532 struct gve_rx_ring *gve_per_q_mem; 2533 int err; 2534 2535 if (!priv->rx) 2536 return -EAGAIN; 2537 2538 /* Destroying queue 0 while other queues exist is not supported in DQO */ 2539 if (!gve_is_gqi(priv) && idx == 0) 2540 return -ERANGE; 2541 2542 /* Single-queue destruction requires quiescence on all queues */ 2543 gve_turndown(priv); 2544 2545 /* This failure will trigger a reset - no need to clean up */ 2546 err = gve_adminq_destroy_single_rx_queue(priv, idx); 2547 if (err) 2548 return err; 2549 2550 if (gve_is_qpl(priv)) { 2551 /* This failure will trigger a reset - no need to clean up */ 2552 err = gve_unregister_qpl(priv, gve_rx_get_qpl(priv, idx)); 2553 if (err) 2554 return err; 2555 } 2556 2557 gve_rx_stop_ring(priv, idx); 2558 2559 /* Turn the unstopped queues back up */ 2560 gve_turnup_and_check_status(priv); 2561 2562 gve_per_q_mem = (struct gve_rx_ring *)per_q_mem; 2563 *gve_per_q_mem = priv->rx[idx]; 2564 memset(&priv->rx[idx], 0, sizeof(priv->rx[idx])); 2565 return 0; 2566 } 2567 2568 static void gve_rx_queue_mem_free(struct net_device *dev, void *per_q_mem) 2569 { 2570 struct gve_priv *priv = netdev_priv(dev); 2571 struct gve_rx_alloc_rings_cfg cfg = {0}; 2572 struct gve_rx_ring *gve_per_q_mem; 2573 2574 gve_per_q_mem = (struct gve_rx_ring *)per_q_mem; 2575 gve_rx_get_curr_alloc_cfg(priv, &cfg); 2576 2577 if (gve_is_gqi(priv)) 2578 gve_rx_free_ring_gqi(priv, gve_per_q_mem, &cfg); 2579 else 2580 gve_rx_free_ring_dqo(priv, gve_per_q_mem, &cfg); 2581 } 2582 2583 static int gve_rx_queue_mem_alloc(struct net_device *dev, void *per_q_mem, 2584 int idx) 2585 { 2586 struct gve_priv *priv = netdev_priv(dev); 2587 struct gve_rx_alloc_rings_cfg cfg = {0}; 2588 struct gve_rx_ring *gve_per_q_mem; 2589 int err; 2590 2591 if (!priv->rx) 2592 return -EAGAIN; 2593 2594 gve_per_q_mem = (struct gve_rx_ring *)per_q_mem; 2595 gve_rx_get_curr_alloc_cfg(priv, &cfg); 2596 2597 if (gve_is_gqi(priv)) 2598 err = gve_rx_alloc_ring_gqi(priv, &cfg, gve_per_q_mem, idx); 2599 else 2600 err = gve_rx_alloc_ring_dqo(priv, &cfg, gve_per_q_mem, idx); 2601 2602 return err; 2603 } 2604 2605 static int gve_rx_queue_start(struct net_device *dev, void *per_q_mem, int idx) 2606 { 2607 struct gve_priv *priv = netdev_priv(dev); 2608 struct gve_rx_ring *gve_per_q_mem; 2609 int err; 2610 2611 if (!priv->rx) 2612 return -EAGAIN; 2613 2614 gve_per_q_mem = (struct gve_rx_ring *)per_q_mem; 2615 priv->rx[idx] = *gve_per_q_mem; 2616 2617 /* Single-queue creation requires quiescence on all queues */ 2618 gve_turndown(priv); 2619 2620 gve_rx_start_ring(priv, idx); 2621 2622 if (gve_is_qpl(priv)) { 2623 /* This failure will trigger a reset - no need to clean up */ 2624 err = gve_register_qpl(priv, gve_rx_get_qpl(priv, idx)); 2625 if (err) 2626 goto abort; 2627 } 2628 2629 /* This failure will trigger a reset - no need to clean up */ 2630 err = gve_adminq_create_single_rx_queue(priv, idx); 2631 if (err) 2632 goto abort; 2633 2634 if (gve_is_gqi(priv)) 2635 gve_rx_write_doorbell(priv, &priv->rx[idx]); 2636 else 2637 gve_rx_post_buffers_dqo(&priv->rx[idx]); 2638 2639 /* Turn the unstopped queues back up */ 2640 gve_turnup_and_check_status(priv); 2641 return 0; 2642 2643 abort: 2644 gve_rx_stop_ring(priv, idx); 2645 2646 /* All failures in this func result in a reset, by clearing the struct 2647 * at idx, we prevent a double free when that reset runs. The reset, 2648 * which needs the rtnl lock, will not run till this func returns and 2649 * its caller gives up the lock. 2650 */ 2651 memset(&priv->rx[idx], 0, sizeof(priv->rx[idx])); 2652 return err; 2653 } 2654 2655 static const struct netdev_queue_mgmt_ops gve_queue_mgmt_ops = { 2656 .ndo_queue_mem_size = sizeof(struct gve_rx_ring), 2657 .ndo_queue_mem_alloc = gve_rx_queue_mem_alloc, 2658 .ndo_queue_mem_free = gve_rx_queue_mem_free, 2659 .ndo_queue_start = gve_rx_queue_start, 2660 .ndo_queue_stop = gve_rx_queue_stop, 2661 }; 2662 2663 static void gve_get_rx_queue_stats(struct net_device *dev, int idx, 2664 struct netdev_queue_stats_rx *rx_stats) 2665 { 2666 struct gve_priv *priv = netdev_priv(dev); 2667 struct gve_rx_ring *rx = &priv->rx[idx]; 2668 unsigned int start; 2669 2670 do { 2671 start = u64_stats_fetch_begin(&rx->statss); 2672 rx_stats->packets = rx->rpackets; 2673 rx_stats->bytes = rx->rbytes; 2674 rx_stats->alloc_fail = rx->rx_skb_alloc_fail + 2675 rx->rx_buf_alloc_fail; 2676 } while (u64_stats_fetch_retry(&rx->statss, start)); 2677 } 2678 2679 static void gve_get_tx_queue_stats(struct net_device *dev, int idx, 2680 struct netdev_queue_stats_tx *tx_stats) 2681 { 2682 struct gve_priv *priv = netdev_priv(dev); 2683 struct gve_tx_ring *tx = &priv->tx[idx]; 2684 unsigned int start; 2685 2686 do { 2687 start = u64_stats_fetch_begin(&tx->statss); 2688 tx_stats->packets = tx->pkt_done; 2689 tx_stats->bytes = tx->bytes_done; 2690 } while (u64_stats_fetch_retry(&tx->statss, start)); 2691 } 2692 2693 static void gve_get_base_stats(struct net_device *dev, 2694 struct netdev_queue_stats_rx *rx, 2695 struct netdev_queue_stats_tx *tx) 2696 { 2697 rx->packets = 0; 2698 rx->bytes = 0; 2699 rx->alloc_fail = 0; 2700 2701 tx->packets = 0; 2702 tx->bytes = 0; 2703 } 2704 2705 static const struct netdev_stat_ops gve_stat_ops = { 2706 .get_queue_stats_rx = gve_get_rx_queue_stats, 2707 .get_queue_stats_tx = gve_get_tx_queue_stats, 2708 .get_base_stats = gve_get_base_stats, 2709 }; 2710 2711 static int gve_probe(struct pci_dev *pdev, const struct pci_device_id *ent) 2712 { 2713 int max_tx_queues, max_rx_queues; 2714 struct net_device *dev; 2715 __be32 __iomem *db_bar; 2716 struct gve_registers __iomem *reg_bar; 2717 struct gve_priv *priv; 2718 int err; 2719 2720 err = pci_enable_device(pdev); 2721 if (err) 2722 return err; 2723 2724 err = pci_request_regions(pdev, gve_driver_name); 2725 if (err) 2726 goto abort_with_enabled; 2727 2728 pci_set_master(pdev); 2729 2730 err = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(64)); 2731 if (err) { 2732 dev_err(&pdev->dev, "Failed to set dma mask: err=%d\n", err); 2733 goto abort_with_pci_region; 2734 } 2735 2736 reg_bar = pci_iomap(pdev, GVE_REGISTER_BAR, 0); 2737 if (!reg_bar) { 2738 dev_err(&pdev->dev, "Failed to map pci bar!\n"); 2739 err = -ENOMEM; 2740 goto abort_with_pci_region; 2741 } 2742 2743 db_bar = pci_iomap(pdev, GVE_DOORBELL_BAR, 0); 2744 if (!db_bar) { 2745 dev_err(&pdev->dev, "Failed to map doorbell bar!\n"); 2746 err = -ENOMEM; 2747 goto abort_with_reg_bar; 2748 } 2749 2750 gve_write_version(®_bar->driver_version); 2751 /* Get max queues to alloc etherdev */ 2752 max_tx_queues = ioread32be(®_bar->max_tx_queues); 2753 max_rx_queues = ioread32be(®_bar->max_rx_queues); 2754 /* Alloc and setup the netdev and priv */ 2755 dev = alloc_etherdev_mqs(sizeof(*priv), max_tx_queues, max_rx_queues); 2756 if (!dev) { 2757 dev_err(&pdev->dev, "could not allocate netdev\n"); 2758 err = -ENOMEM; 2759 goto abort_with_db_bar; 2760 } 2761 SET_NETDEV_DEV(dev, &pdev->dev); 2762 pci_set_drvdata(pdev, dev); 2763 dev->ethtool_ops = &gve_ethtool_ops; 2764 dev->netdev_ops = &gve_netdev_ops; 2765 dev->queue_mgmt_ops = &gve_queue_mgmt_ops; 2766 dev->stat_ops = &gve_stat_ops; 2767 2768 /* Set default and supported features. 2769 * 2770 * Features might be set in other locations as well (such as 2771 * `gve_adminq_describe_device`). 2772 */ 2773 dev->hw_features = NETIF_F_HIGHDMA; 2774 dev->hw_features |= NETIF_F_SG; 2775 dev->hw_features |= NETIF_F_HW_CSUM; 2776 dev->hw_features |= NETIF_F_TSO; 2777 dev->hw_features |= NETIF_F_TSO6; 2778 dev->hw_features |= NETIF_F_TSO_ECN; 2779 dev->hw_features |= NETIF_F_RXCSUM; 2780 dev->hw_features |= NETIF_F_RXHASH; 2781 dev->features = dev->hw_features; 2782 dev->watchdog_timeo = 5 * HZ; 2783 dev->min_mtu = ETH_MIN_MTU; 2784 netif_carrier_off(dev); 2785 2786 priv = netdev_priv(dev); 2787 priv->dev = dev; 2788 priv->pdev = pdev; 2789 priv->msg_enable = DEFAULT_MSG_LEVEL; 2790 priv->reg_bar0 = reg_bar; 2791 priv->db_bar2 = db_bar; 2792 priv->service_task_flags = 0x0; 2793 priv->state_flags = 0x0; 2794 priv->ethtool_flags = 0x0; 2795 priv->data_buffer_size_dqo = GVE_DEFAULT_RX_BUFFER_SIZE; 2796 priv->max_rx_buffer_size = GVE_DEFAULT_RX_BUFFER_SIZE; 2797 2798 gve_set_probe_in_progress(priv); 2799 priv->gve_wq = alloc_ordered_workqueue("gve", 0); 2800 if (!priv->gve_wq) { 2801 dev_err(&pdev->dev, "Could not allocate workqueue"); 2802 err = -ENOMEM; 2803 goto abort_with_netdev; 2804 } 2805 INIT_WORK(&priv->service_task, gve_service_task); 2806 INIT_WORK(&priv->stats_report_task, gve_stats_report_task); 2807 priv->tx_cfg.max_queues = max_tx_queues; 2808 priv->rx_cfg.max_queues = max_rx_queues; 2809 2810 err = gve_init_priv(priv, false); 2811 if (err) 2812 goto abort_with_wq; 2813 2814 err = register_netdev(dev); 2815 if (err) 2816 goto abort_with_gve_init; 2817 2818 dev_info(&pdev->dev, "GVE version %s\n", gve_version_str); 2819 dev_info(&pdev->dev, "GVE queue format %d\n", (int)priv->queue_format); 2820 gve_clear_probe_in_progress(priv); 2821 queue_work(priv->gve_wq, &priv->service_task); 2822 return 0; 2823 2824 abort_with_gve_init: 2825 gve_teardown_priv_resources(priv); 2826 2827 abort_with_wq: 2828 destroy_workqueue(priv->gve_wq); 2829 2830 abort_with_netdev: 2831 free_netdev(dev); 2832 2833 abort_with_db_bar: 2834 pci_iounmap(pdev, db_bar); 2835 2836 abort_with_reg_bar: 2837 pci_iounmap(pdev, reg_bar); 2838 2839 abort_with_pci_region: 2840 pci_release_regions(pdev); 2841 2842 abort_with_enabled: 2843 pci_disable_device(pdev); 2844 return err; 2845 } 2846 2847 static void gve_remove(struct pci_dev *pdev) 2848 { 2849 struct net_device *netdev = pci_get_drvdata(pdev); 2850 struct gve_priv *priv = netdev_priv(netdev); 2851 __be32 __iomem *db_bar = priv->db_bar2; 2852 void __iomem *reg_bar = priv->reg_bar0; 2853 2854 unregister_netdev(netdev); 2855 gve_teardown_priv_resources(priv); 2856 destroy_workqueue(priv->gve_wq); 2857 free_netdev(netdev); 2858 pci_iounmap(pdev, db_bar); 2859 pci_iounmap(pdev, reg_bar); 2860 pci_release_regions(pdev); 2861 pci_disable_device(pdev); 2862 } 2863 2864 static void gve_shutdown(struct pci_dev *pdev) 2865 { 2866 struct net_device *netdev = pci_get_drvdata(pdev); 2867 struct gve_priv *priv = netdev_priv(netdev); 2868 bool was_up = netif_running(priv->dev); 2869 2870 rtnl_lock(); 2871 if (was_up && gve_close(priv->dev)) { 2872 /* If the dev was up, attempt to close, if close fails, reset */ 2873 gve_reset_and_teardown(priv, was_up); 2874 } else { 2875 /* If the dev wasn't up or close worked, finish tearing down */ 2876 gve_teardown_priv_resources(priv); 2877 } 2878 rtnl_unlock(); 2879 } 2880 2881 #ifdef CONFIG_PM 2882 static int gve_suspend(struct pci_dev *pdev, pm_message_t state) 2883 { 2884 struct net_device *netdev = pci_get_drvdata(pdev); 2885 struct gve_priv *priv = netdev_priv(netdev); 2886 bool was_up = netif_running(priv->dev); 2887 2888 priv->suspend_cnt++; 2889 rtnl_lock(); 2890 netdev_lock(netdev); 2891 if (was_up && gve_close(priv->dev)) { 2892 /* If the dev was up, attempt to close, if close fails, reset */ 2893 gve_reset_and_teardown(priv, was_up); 2894 } else { 2895 /* If the dev wasn't up or close worked, finish tearing down */ 2896 gve_teardown_priv_resources(priv); 2897 } 2898 priv->up_before_suspend = was_up; 2899 netdev_unlock(netdev); 2900 rtnl_unlock(); 2901 return 0; 2902 } 2903 2904 static int gve_resume(struct pci_dev *pdev) 2905 { 2906 struct net_device *netdev = pci_get_drvdata(pdev); 2907 struct gve_priv *priv = netdev_priv(netdev); 2908 int err; 2909 2910 priv->resume_cnt++; 2911 rtnl_lock(); 2912 netdev_lock(netdev); 2913 err = gve_reset_recovery(priv, priv->up_before_suspend); 2914 netdev_unlock(netdev); 2915 rtnl_unlock(); 2916 return err; 2917 } 2918 #endif /* CONFIG_PM */ 2919 2920 static const struct pci_device_id gve_id_table[] = { 2921 { PCI_DEVICE(PCI_VENDOR_ID_GOOGLE, PCI_DEV_ID_GVNIC) }, 2922 { } 2923 }; 2924 2925 static struct pci_driver gve_driver = { 2926 .name = gve_driver_name, 2927 .id_table = gve_id_table, 2928 .probe = gve_probe, 2929 .remove = gve_remove, 2930 .shutdown = gve_shutdown, 2931 #ifdef CONFIG_PM 2932 .suspend = gve_suspend, 2933 .resume = gve_resume, 2934 #endif 2935 }; 2936 2937 module_pci_driver(gve_driver); 2938 2939 MODULE_DEVICE_TABLE(pci, gve_id_table); 2940 MODULE_AUTHOR("Google, Inc."); 2941 MODULE_DESCRIPTION("Google Virtual NIC Driver"); 2942 MODULE_LICENSE("Dual MIT/GPL"); 2943 MODULE_VERSION(GVE_VERSION); 2944