1 // SPDX-License-Identifier: (GPL-2.0 OR MIT) 2 /* Google virtual Ethernet (gve) driver 3 * 4 * Copyright (C) 2015-2024 Google LLC 5 */ 6 7 #include <linux/bpf.h> 8 #include <linux/cpumask.h> 9 #include <linux/etherdevice.h> 10 #include <linux/filter.h> 11 #include <linux/interrupt.h> 12 #include <linux/irq.h> 13 #include <linux/module.h> 14 #include <linux/pci.h> 15 #include <linux/sched.h> 16 #include <linux/timer.h> 17 #include <linux/workqueue.h> 18 #include <linux/utsname.h> 19 #include <linux/version.h> 20 #include <net/netdev_queues.h> 21 #include <net/sch_generic.h> 22 #include <net/xdp_sock_drv.h> 23 #include "gve.h" 24 #include "gve_dqo.h" 25 #include "gve_adminq.h" 26 #include "gve_register.h" 27 #include "gve_utils.h" 28 29 #define GVE_DEFAULT_RX_COPYBREAK (256) 30 31 #define DEFAULT_MSG_LEVEL (NETIF_MSG_DRV | NETIF_MSG_LINK) 32 #define GVE_VERSION "1.0.0" 33 #define GVE_VERSION_PREFIX "GVE-" 34 35 // Minimum amount of time between queue kicks in msec (10 seconds) 36 #define MIN_TX_TIMEOUT_GAP (1000 * 10) 37 38 char gve_driver_name[] = "gve"; 39 const char gve_version_str[] = GVE_VERSION; 40 static const char gve_version_prefix[] = GVE_VERSION_PREFIX; 41 42 static int gve_verify_driver_compatibility(struct gve_priv *priv) 43 { 44 int err; 45 struct gve_driver_info *driver_info; 46 dma_addr_t driver_info_bus; 47 48 driver_info = dma_alloc_coherent(&priv->pdev->dev, 49 sizeof(struct gve_driver_info), 50 &driver_info_bus, GFP_KERNEL); 51 if (!driver_info) 52 return -ENOMEM; 53 54 *driver_info = (struct gve_driver_info) { 55 .os_type = 1, /* Linux */ 56 .os_version_major = cpu_to_be32(LINUX_VERSION_MAJOR), 57 .os_version_minor = cpu_to_be32(LINUX_VERSION_SUBLEVEL), 58 .os_version_sub = cpu_to_be32(LINUX_VERSION_PATCHLEVEL), 59 .driver_capability_flags = { 60 cpu_to_be64(GVE_DRIVER_CAPABILITY_FLAGS1), 61 cpu_to_be64(GVE_DRIVER_CAPABILITY_FLAGS2), 62 cpu_to_be64(GVE_DRIVER_CAPABILITY_FLAGS3), 63 cpu_to_be64(GVE_DRIVER_CAPABILITY_FLAGS4), 64 }, 65 }; 66 strscpy(driver_info->os_version_str1, utsname()->release, 67 sizeof(driver_info->os_version_str1)); 68 strscpy(driver_info->os_version_str2, utsname()->version, 69 sizeof(driver_info->os_version_str2)); 70 71 err = gve_adminq_verify_driver_compatibility(priv, 72 sizeof(struct gve_driver_info), 73 driver_info_bus); 74 75 /* It's ok if the device doesn't support this */ 76 if (err == -EOPNOTSUPP) 77 err = 0; 78 79 dma_free_coherent(&priv->pdev->dev, 80 sizeof(struct gve_driver_info), 81 driver_info, driver_info_bus); 82 return err; 83 } 84 85 static netdev_features_t gve_features_check(struct sk_buff *skb, 86 struct net_device *dev, 87 netdev_features_t features) 88 { 89 struct gve_priv *priv = netdev_priv(dev); 90 91 if (!gve_is_gqi(priv)) 92 return gve_features_check_dqo(skb, dev, features); 93 94 return features; 95 } 96 97 static netdev_tx_t gve_start_xmit(struct sk_buff *skb, struct net_device *dev) 98 { 99 struct gve_priv *priv = netdev_priv(dev); 100 101 if (gve_is_gqi(priv)) 102 return gve_tx(skb, dev); 103 else 104 return gve_tx_dqo(skb, dev); 105 } 106 107 static void gve_get_stats(struct net_device *dev, struct rtnl_link_stats64 *s) 108 { 109 struct gve_priv *priv = netdev_priv(dev); 110 unsigned int start; 111 u64 packets, bytes; 112 int num_tx_queues; 113 int ring; 114 115 num_tx_queues = gve_num_tx_queues(priv); 116 if (priv->rx) { 117 for (ring = 0; ring < priv->rx_cfg.num_queues; ring++) { 118 do { 119 start = 120 u64_stats_fetch_begin(&priv->rx[ring].statss); 121 packets = priv->rx[ring].rpackets; 122 bytes = priv->rx[ring].rbytes; 123 } while (u64_stats_fetch_retry(&priv->rx[ring].statss, 124 start)); 125 s->rx_packets += packets; 126 s->rx_bytes += bytes; 127 } 128 } 129 if (priv->tx) { 130 for (ring = 0; ring < num_tx_queues; ring++) { 131 do { 132 start = 133 u64_stats_fetch_begin(&priv->tx[ring].statss); 134 packets = priv->tx[ring].pkt_done; 135 bytes = priv->tx[ring].bytes_done; 136 } while (u64_stats_fetch_retry(&priv->tx[ring].statss, 137 start)); 138 s->tx_packets += packets; 139 s->tx_bytes += bytes; 140 } 141 } 142 } 143 144 static int gve_alloc_flow_rule_caches(struct gve_priv *priv) 145 { 146 struct gve_flow_rules_cache *flow_rules_cache = &priv->flow_rules_cache; 147 int err = 0; 148 149 if (!priv->max_flow_rules) 150 return 0; 151 152 flow_rules_cache->rules_cache = 153 kvcalloc(GVE_FLOW_RULES_CACHE_SIZE, sizeof(*flow_rules_cache->rules_cache), 154 GFP_KERNEL); 155 if (!flow_rules_cache->rules_cache) { 156 dev_err(&priv->pdev->dev, "Cannot alloc flow rules cache\n"); 157 return -ENOMEM; 158 } 159 160 flow_rules_cache->rule_ids_cache = 161 kvcalloc(GVE_FLOW_RULE_IDS_CACHE_SIZE, sizeof(*flow_rules_cache->rule_ids_cache), 162 GFP_KERNEL); 163 if (!flow_rules_cache->rule_ids_cache) { 164 dev_err(&priv->pdev->dev, "Cannot alloc flow rule ids cache\n"); 165 err = -ENOMEM; 166 goto free_rules_cache; 167 } 168 169 return 0; 170 171 free_rules_cache: 172 kvfree(flow_rules_cache->rules_cache); 173 flow_rules_cache->rules_cache = NULL; 174 return err; 175 } 176 177 static void gve_free_flow_rule_caches(struct gve_priv *priv) 178 { 179 struct gve_flow_rules_cache *flow_rules_cache = &priv->flow_rules_cache; 180 181 kvfree(flow_rules_cache->rule_ids_cache); 182 flow_rules_cache->rule_ids_cache = NULL; 183 kvfree(flow_rules_cache->rules_cache); 184 flow_rules_cache->rules_cache = NULL; 185 } 186 187 static int gve_alloc_rss_config_cache(struct gve_priv *priv) 188 { 189 struct gve_rss_config *rss_config = &priv->rss_config; 190 191 if (!priv->cache_rss_config) 192 return 0; 193 194 rss_config->hash_key = kcalloc(priv->rss_key_size, 195 sizeof(rss_config->hash_key[0]), 196 GFP_KERNEL); 197 if (!rss_config->hash_key) 198 return -ENOMEM; 199 200 rss_config->hash_lut = kcalloc(priv->rss_lut_size, 201 sizeof(rss_config->hash_lut[0]), 202 GFP_KERNEL); 203 if (!rss_config->hash_lut) 204 goto free_rss_key_cache; 205 206 return 0; 207 208 free_rss_key_cache: 209 kfree(rss_config->hash_key); 210 rss_config->hash_key = NULL; 211 return -ENOMEM; 212 } 213 214 static void gve_free_rss_config_cache(struct gve_priv *priv) 215 { 216 struct gve_rss_config *rss_config = &priv->rss_config; 217 218 kfree(rss_config->hash_key); 219 kfree(rss_config->hash_lut); 220 221 memset(rss_config, 0, sizeof(*rss_config)); 222 } 223 224 static int gve_alloc_counter_array(struct gve_priv *priv) 225 { 226 priv->counter_array = 227 dma_alloc_coherent(&priv->pdev->dev, 228 priv->num_event_counters * 229 sizeof(*priv->counter_array), 230 &priv->counter_array_bus, GFP_KERNEL); 231 if (!priv->counter_array) 232 return -ENOMEM; 233 234 return 0; 235 } 236 237 static void gve_free_counter_array(struct gve_priv *priv) 238 { 239 if (!priv->counter_array) 240 return; 241 242 dma_free_coherent(&priv->pdev->dev, 243 priv->num_event_counters * 244 sizeof(*priv->counter_array), 245 priv->counter_array, priv->counter_array_bus); 246 priv->counter_array = NULL; 247 } 248 249 /* NIC requests to report stats */ 250 static void gve_stats_report_task(struct work_struct *work) 251 { 252 struct gve_priv *priv = container_of(work, struct gve_priv, 253 stats_report_task); 254 if (gve_get_do_report_stats(priv)) { 255 gve_handle_report_stats(priv); 256 gve_clear_do_report_stats(priv); 257 } 258 } 259 260 static void gve_stats_report_schedule(struct gve_priv *priv) 261 { 262 if (!gve_get_probe_in_progress(priv) && 263 !gve_get_reset_in_progress(priv)) { 264 gve_set_do_report_stats(priv); 265 queue_work(priv->gve_wq, &priv->stats_report_task); 266 } 267 } 268 269 static void gve_stats_report_timer(struct timer_list *t) 270 { 271 struct gve_priv *priv = timer_container_of(priv, t, 272 stats_report_timer); 273 274 mod_timer(&priv->stats_report_timer, 275 round_jiffies(jiffies + 276 msecs_to_jiffies(priv->stats_report_timer_period))); 277 gve_stats_report_schedule(priv); 278 } 279 280 static int gve_alloc_stats_report(struct gve_priv *priv) 281 { 282 int tx_stats_num, rx_stats_num; 283 284 tx_stats_num = (GVE_TX_STATS_REPORT_NUM + NIC_TX_STATS_REPORT_NUM) * 285 gve_num_tx_queues(priv); 286 rx_stats_num = (GVE_RX_STATS_REPORT_NUM + NIC_RX_STATS_REPORT_NUM) * 287 priv->rx_cfg.num_queues; 288 priv->stats_report_len = struct_size(priv->stats_report, stats, 289 size_add(tx_stats_num, rx_stats_num)); 290 priv->stats_report = 291 dma_alloc_coherent(&priv->pdev->dev, priv->stats_report_len, 292 &priv->stats_report_bus, GFP_KERNEL); 293 if (!priv->stats_report) 294 return -ENOMEM; 295 /* Set up timer for the report-stats task */ 296 timer_setup(&priv->stats_report_timer, gve_stats_report_timer, 0); 297 priv->stats_report_timer_period = GVE_STATS_REPORT_TIMER_PERIOD; 298 return 0; 299 } 300 301 static void gve_free_stats_report(struct gve_priv *priv) 302 { 303 if (!priv->stats_report) 304 return; 305 306 timer_delete_sync(&priv->stats_report_timer); 307 dma_free_coherent(&priv->pdev->dev, priv->stats_report_len, 308 priv->stats_report, priv->stats_report_bus); 309 priv->stats_report = NULL; 310 } 311 312 static irqreturn_t gve_mgmnt_intr(int irq, void *arg) 313 { 314 struct gve_priv *priv = arg; 315 316 queue_work(priv->gve_wq, &priv->service_task); 317 return IRQ_HANDLED; 318 } 319 320 static irqreturn_t gve_intr(int irq, void *arg) 321 { 322 struct gve_notify_block *block = arg; 323 struct gve_priv *priv = block->priv; 324 325 iowrite32be(GVE_IRQ_MASK, gve_irq_doorbell(priv, block)); 326 napi_schedule_irqoff(&block->napi); 327 return IRQ_HANDLED; 328 } 329 330 static irqreturn_t gve_intr_dqo(int irq, void *arg) 331 { 332 struct gve_notify_block *block = arg; 333 334 /* Interrupts are automatically masked */ 335 napi_schedule_irqoff(&block->napi); 336 return IRQ_HANDLED; 337 } 338 339 static int gve_is_napi_on_home_cpu(struct gve_priv *priv, u32 irq) 340 { 341 int cpu_curr = smp_processor_id(); 342 const struct cpumask *aff_mask; 343 344 aff_mask = irq_get_effective_affinity_mask(irq); 345 if (unlikely(!aff_mask)) 346 return 1; 347 348 return cpumask_test_cpu(cpu_curr, aff_mask); 349 } 350 351 int gve_napi_poll(struct napi_struct *napi, int budget) 352 { 353 struct gve_notify_block *block; 354 __be32 __iomem *irq_doorbell; 355 bool reschedule = false; 356 struct gve_priv *priv; 357 int work_done = 0; 358 359 block = container_of(napi, struct gve_notify_block, napi); 360 priv = block->priv; 361 362 if (block->tx) { 363 if (block->tx->q_num < priv->tx_cfg.num_queues) 364 reschedule |= gve_tx_poll(block, budget); 365 else if (budget) 366 reschedule |= gve_xdp_poll(block, budget); 367 } 368 369 if (!budget) 370 return 0; 371 372 if (block->rx) { 373 work_done = gve_rx_poll(block, budget); 374 375 /* Poll XSK TX as part of RX NAPI. Setup re-poll based on max of 376 * TX and RX work done. 377 */ 378 if (priv->xdp_prog) 379 work_done = max_t(int, work_done, 380 gve_xsk_tx_poll(block, budget)); 381 382 reschedule |= work_done == budget; 383 } 384 385 if (reschedule) 386 return budget; 387 388 /* Complete processing - don't unmask irq if busy polling is enabled */ 389 if (likely(napi_complete_done(napi, work_done))) { 390 irq_doorbell = gve_irq_doorbell(priv, block); 391 iowrite32be(GVE_IRQ_ACK | GVE_IRQ_EVENT, irq_doorbell); 392 393 /* Ensure IRQ ACK is visible before we check pending work. 394 * If queue had issued updates, it would be truly visible. 395 */ 396 mb(); 397 398 if (block->tx) 399 reschedule |= gve_tx_clean_pending(priv, block->tx); 400 if (block->rx) 401 reschedule |= gve_rx_work_pending(block->rx); 402 403 if (reschedule && napi_schedule(napi)) 404 iowrite32be(GVE_IRQ_MASK, irq_doorbell); 405 } 406 return work_done; 407 } 408 409 int gve_napi_poll_dqo(struct napi_struct *napi, int budget) 410 { 411 struct gve_notify_block *block = 412 container_of(napi, struct gve_notify_block, napi); 413 struct gve_priv *priv = block->priv; 414 bool reschedule = false; 415 int work_done = 0; 416 417 if (block->tx) 418 reschedule |= gve_tx_poll_dqo(block, /*do_clean=*/true); 419 420 if (!budget) 421 return 0; 422 423 if (block->rx) { 424 work_done = gve_rx_poll_dqo(block, budget); 425 reschedule |= work_done == budget; 426 } 427 428 if (reschedule) { 429 /* Reschedule by returning budget only if already on the correct 430 * cpu. 431 */ 432 if (likely(gve_is_napi_on_home_cpu(priv, block->irq))) 433 return budget; 434 435 /* If not on the cpu with which this queue's irq has affinity 436 * with, we avoid rescheduling napi and arm the irq instead so 437 * that napi gets rescheduled back eventually onto the right 438 * cpu. 439 */ 440 if (work_done == budget) 441 work_done--; 442 } 443 444 if (likely(napi_complete_done(napi, work_done))) { 445 /* Enable interrupts again. 446 * 447 * We don't need to repoll afterwards because HW supports the 448 * PCI MSI-X PBA feature. 449 * 450 * Another interrupt would be triggered if a new event came in 451 * since the last one. 452 */ 453 gve_write_irq_doorbell_dqo(priv, block, 454 GVE_ITR_NO_UPDATE_DQO | GVE_ITR_ENABLE_BIT_DQO); 455 } 456 457 return work_done; 458 } 459 460 static int gve_alloc_notify_blocks(struct gve_priv *priv) 461 { 462 int num_vecs_requested = priv->num_ntfy_blks + 1; 463 unsigned int active_cpus; 464 int vecs_enabled; 465 int i, j; 466 int err; 467 468 priv->msix_vectors = kvcalloc(num_vecs_requested, 469 sizeof(*priv->msix_vectors), GFP_KERNEL); 470 if (!priv->msix_vectors) 471 return -ENOMEM; 472 for (i = 0; i < num_vecs_requested; i++) 473 priv->msix_vectors[i].entry = i; 474 vecs_enabled = pci_enable_msix_range(priv->pdev, priv->msix_vectors, 475 GVE_MIN_MSIX, num_vecs_requested); 476 if (vecs_enabled < 0) { 477 dev_err(&priv->pdev->dev, "Could not enable min msix %d/%d\n", 478 GVE_MIN_MSIX, vecs_enabled); 479 err = vecs_enabled; 480 goto abort_with_msix_vectors; 481 } 482 if (vecs_enabled != num_vecs_requested) { 483 int new_num_ntfy_blks = (vecs_enabled - 1) & ~0x1; 484 int vecs_per_type = new_num_ntfy_blks / 2; 485 int vecs_left = new_num_ntfy_blks % 2; 486 487 priv->num_ntfy_blks = new_num_ntfy_blks; 488 priv->mgmt_msix_idx = priv->num_ntfy_blks; 489 priv->tx_cfg.max_queues = min_t(int, priv->tx_cfg.max_queues, 490 vecs_per_type); 491 priv->rx_cfg.max_queues = min_t(int, priv->rx_cfg.max_queues, 492 vecs_per_type + vecs_left); 493 dev_err(&priv->pdev->dev, 494 "Could not enable desired msix, only enabled %d, adjusting tx max queues to %d, and rx max queues to %d\n", 495 vecs_enabled, priv->tx_cfg.max_queues, 496 priv->rx_cfg.max_queues); 497 if (priv->tx_cfg.num_queues > priv->tx_cfg.max_queues) 498 priv->tx_cfg.num_queues = priv->tx_cfg.max_queues; 499 if (priv->rx_cfg.num_queues > priv->rx_cfg.max_queues) 500 priv->rx_cfg.num_queues = priv->rx_cfg.max_queues; 501 } 502 /* Half the notification blocks go to TX and half to RX */ 503 active_cpus = min_t(int, priv->num_ntfy_blks / 2, num_online_cpus()); 504 505 /* Setup Management Vector - the last vector */ 506 snprintf(priv->mgmt_msix_name, sizeof(priv->mgmt_msix_name), "gve-mgmnt@pci:%s", 507 pci_name(priv->pdev)); 508 err = request_irq(priv->msix_vectors[priv->mgmt_msix_idx].vector, 509 gve_mgmnt_intr, 0, priv->mgmt_msix_name, priv); 510 if (err) { 511 dev_err(&priv->pdev->dev, "Did not receive management vector.\n"); 512 goto abort_with_msix_enabled; 513 } 514 priv->irq_db_indices = 515 dma_alloc_coherent(&priv->pdev->dev, 516 priv->num_ntfy_blks * 517 sizeof(*priv->irq_db_indices), 518 &priv->irq_db_indices_bus, GFP_KERNEL); 519 if (!priv->irq_db_indices) { 520 err = -ENOMEM; 521 goto abort_with_mgmt_vector; 522 } 523 524 priv->ntfy_blocks = kvzalloc(priv->num_ntfy_blks * 525 sizeof(*priv->ntfy_blocks), GFP_KERNEL); 526 if (!priv->ntfy_blocks) { 527 err = -ENOMEM; 528 goto abort_with_irq_db_indices; 529 } 530 531 /* Setup the other blocks - the first n-1 vectors */ 532 for (i = 0; i < priv->num_ntfy_blks; i++) { 533 struct gve_notify_block *block = &priv->ntfy_blocks[i]; 534 int msix_idx = i; 535 536 snprintf(block->name, sizeof(block->name), "gve-ntfy-blk%d@pci:%s", 537 i, pci_name(priv->pdev)); 538 block->priv = priv; 539 err = request_irq(priv->msix_vectors[msix_idx].vector, 540 gve_is_gqi(priv) ? gve_intr : gve_intr_dqo, 541 0, block->name, block); 542 if (err) { 543 dev_err(&priv->pdev->dev, 544 "Failed to receive msix vector %d\n", i); 545 goto abort_with_some_ntfy_blocks; 546 } 547 block->irq = priv->msix_vectors[msix_idx].vector; 548 irq_set_affinity_hint(priv->msix_vectors[msix_idx].vector, 549 get_cpu_mask(i % active_cpus)); 550 block->irq_db_index = &priv->irq_db_indices[i].index; 551 } 552 return 0; 553 abort_with_some_ntfy_blocks: 554 for (j = 0; j < i; j++) { 555 struct gve_notify_block *block = &priv->ntfy_blocks[j]; 556 int msix_idx = j; 557 558 irq_set_affinity_hint(priv->msix_vectors[msix_idx].vector, 559 NULL); 560 free_irq(priv->msix_vectors[msix_idx].vector, block); 561 block->irq = 0; 562 } 563 kvfree(priv->ntfy_blocks); 564 priv->ntfy_blocks = NULL; 565 abort_with_irq_db_indices: 566 dma_free_coherent(&priv->pdev->dev, priv->num_ntfy_blks * 567 sizeof(*priv->irq_db_indices), 568 priv->irq_db_indices, priv->irq_db_indices_bus); 569 priv->irq_db_indices = NULL; 570 abort_with_mgmt_vector: 571 free_irq(priv->msix_vectors[priv->mgmt_msix_idx].vector, priv); 572 abort_with_msix_enabled: 573 pci_disable_msix(priv->pdev); 574 abort_with_msix_vectors: 575 kvfree(priv->msix_vectors); 576 priv->msix_vectors = NULL; 577 return err; 578 } 579 580 static void gve_free_notify_blocks(struct gve_priv *priv) 581 { 582 int i; 583 584 if (!priv->msix_vectors) 585 return; 586 587 /* Free the irqs */ 588 for (i = 0; i < priv->num_ntfy_blks; i++) { 589 struct gve_notify_block *block = &priv->ntfy_blocks[i]; 590 int msix_idx = i; 591 592 irq_set_affinity_hint(priv->msix_vectors[msix_idx].vector, 593 NULL); 594 free_irq(priv->msix_vectors[msix_idx].vector, block); 595 block->irq = 0; 596 } 597 free_irq(priv->msix_vectors[priv->mgmt_msix_idx].vector, priv); 598 kvfree(priv->ntfy_blocks); 599 priv->ntfy_blocks = NULL; 600 dma_free_coherent(&priv->pdev->dev, priv->num_ntfy_blks * 601 sizeof(*priv->irq_db_indices), 602 priv->irq_db_indices, priv->irq_db_indices_bus); 603 priv->irq_db_indices = NULL; 604 pci_disable_msix(priv->pdev); 605 kvfree(priv->msix_vectors); 606 priv->msix_vectors = NULL; 607 } 608 609 static int gve_setup_device_resources(struct gve_priv *priv) 610 { 611 int err; 612 613 err = gve_alloc_flow_rule_caches(priv); 614 if (err) 615 return err; 616 err = gve_alloc_rss_config_cache(priv); 617 if (err) 618 goto abort_with_flow_rule_caches; 619 err = gve_alloc_counter_array(priv); 620 if (err) 621 goto abort_with_rss_config_cache; 622 err = gve_init_clock(priv); 623 if (err) 624 goto abort_with_counter; 625 err = gve_alloc_notify_blocks(priv); 626 if (err) 627 goto abort_with_clock; 628 err = gve_alloc_stats_report(priv); 629 if (err) 630 goto abort_with_ntfy_blocks; 631 err = gve_adminq_configure_device_resources(priv, 632 priv->counter_array_bus, 633 priv->num_event_counters, 634 priv->irq_db_indices_bus, 635 priv->num_ntfy_blks); 636 if (unlikely(err)) { 637 dev_err(&priv->pdev->dev, 638 "could not setup device_resources: err=%d\n", err); 639 err = -ENXIO; 640 goto abort_with_stats_report; 641 } 642 643 if (!gve_is_gqi(priv)) { 644 priv->ptype_lut_dqo = kvzalloc(sizeof(*priv->ptype_lut_dqo), 645 GFP_KERNEL); 646 if (!priv->ptype_lut_dqo) { 647 err = -ENOMEM; 648 goto abort_with_stats_report; 649 } 650 err = gve_adminq_get_ptype_map_dqo(priv, priv->ptype_lut_dqo); 651 if (err) { 652 dev_err(&priv->pdev->dev, 653 "Failed to get ptype map: err=%d\n", err); 654 goto abort_with_ptype_lut; 655 } 656 } 657 658 err = gve_init_rss_config(priv, priv->rx_cfg.num_queues); 659 if (err) { 660 dev_err(&priv->pdev->dev, "Failed to init RSS config"); 661 goto abort_with_ptype_lut; 662 } 663 664 err = gve_adminq_report_stats(priv, priv->stats_report_len, 665 priv->stats_report_bus, 666 GVE_STATS_REPORT_TIMER_PERIOD); 667 if (err) 668 dev_err(&priv->pdev->dev, 669 "Failed to report stats: err=%d\n", err); 670 gve_set_device_resources_ok(priv); 671 return 0; 672 673 abort_with_ptype_lut: 674 kvfree(priv->ptype_lut_dqo); 675 priv->ptype_lut_dqo = NULL; 676 abort_with_stats_report: 677 gve_free_stats_report(priv); 678 abort_with_ntfy_blocks: 679 gve_free_notify_blocks(priv); 680 abort_with_clock: 681 gve_teardown_clock(priv); 682 abort_with_counter: 683 gve_free_counter_array(priv); 684 abort_with_rss_config_cache: 685 gve_free_rss_config_cache(priv); 686 abort_with_flow_rule_caches: 687 gve_free_flow_rule_caches(priv); 688 689 return err; 690 } 691 692 static void gve_trigger_reset(struct gve_priv *priv); 693 694 static void gve_teardown_device_resources(struct gve_priv *priv) 695 { 696 int err; 697 698 /* Tell device its resources are being freed */ 699 if (gve_get_device_resources_ok(priv)) { 700 err = gve_flow_rules_reset(priv); 701 if (err) { 702 dev_err(&priv->pdev->dev, 703 "Failed to reset flow rules: err=%d\n", err); 704 gve_trigger_reset(priv); 705 } 706 /* detach the stats report */ 707 err = gve_adminq_report_stats(priv, 0, 0x0, GVE_STATS_REPORT_TIMER_PERIOD); 708 if (err) { 709 dev_err(&priv->pdev->dev, 710 "Failed to detach stats report: err=%d\n", err); 711 gve_trigger_reset(priv); 712 } 713 err = gve_adminq_deconfigure_device_resources(priv); 714 if (err) { 715 dev_err(&priv->pdev->dev, 716 "Could not deconfigure device resources: err=%d\n", 717 err); 718 gve_trigger_reset(priv); 719 } 720 } 721 722 kvfree(priv->ptype_lut_dqo); 723 priv->ptype_lut_dqo = NULL; 724 725 gve_free_flow_rule_caches(priv); 726 gve_free_rss_config_cache(priv); 727 gve_free_counter_array(priv); 728 gve_free_notify_blocks(priv); 729 gve_free_stats_report(priv); 730 gve_teardown_clock(priv); 731 gve_clear_device_resources_ok(priv); 732 } 733 734 static int gve_unregister_qpl(struct gve_priv *priv, 735 struct gve_queue_page_list *qpl) 736 { 737 int err; 738 739 if (!qpl) 740 return 0; 741 742 err = gve_adminq_unregister_page_list(priv, qpl->id); 743 if (err) { 744 netif_err(priv, drv, priv->dev, 745 "Failed to unregister queue page list %d\n", 746 qpl->id); 747 return err; 748 } 749 750 priv->num_registered_pages -= qpl->num_entries; 751 return 0; 752 } 753 754 static int gve_register_qpl(struct gve_priv *priv, 755 struct gve_queue_page_list *qpl) 756 { 757 int pages; 758 int err; 759 760 if (!qpl) 761 return 0; 762 763 pages = qpl->num_entries; 764 765 if (pages + priv->num_registered_pages > priv->max_registered_pages) { 766 netif_err(priv, drv, priv->dev, 767 "Reached max number of registered pages %llu > %llu\n", 768 pages + priv->num_registered_pages, 769 priv->max_registered_pages); 770 return -EINVAL; 771 } 772 773 err = gve_adminq_register_page_list(priv, qpl); 774 if (err) { 775 netif_err(priv, drv, priv->dev, 776 "failed to register queue page list %d\n", 777 qpl->id); 778 return err; 779 } 780 781 priv->num_registered_pages += pages; 782 return 0; 783 } 784 785 static struct gve_queue_page_list *gve_tx_get_qpl(struct gve_priv *priv, int idx) 786 { 787 struct gve_tx_ring *tx = &priv->tx[idx]; 788 789 if (gve_is_gqi(priv)) 790 return tx->tx_fifo.qpl; 791 else 792 return tx->dqo.qpl; 793 } 794 795 static struct gve_queue_page_list *gve_rx_get_qpl(struct gve_priv *priv, int idx) 796 { 797 struct gve_rx_ring *rx = &priv->rx[idx]; 798 799 if (gve_is_gqi(priv)) 800 return rx->data.qpl; 801 else 802 return rx->dqo.qpl; 803 } 804 805 static int gve_register_qpls(struct gve_priv *priv) 806 { 807 int num_tx_qpls, num_rx_qpls; 808 int err; 809 int i; 810 811 num_tx_qpls = gve_num_tx_qpls(&priv->tx_cfg, gve_is_qpl(priv)); 812 num_rx_qpls = gve_num_rx_qpls(&priv->rx_cfg, gve_is_qpl(priv)); 813 814 for (i = 0; i < num_tx_qpls; i++) { 815 err = gve_register_qpl(priv, gve_tx_get_qpl(priv, i)); 816 if (err) 817 return err; 818 } 819 820 for (i = 0; i < num_rx_qpls; i++) { 821 err = gve_register_qpl(priv, gve_rx_get_qpl(priv, i)); 822 if (err) 823 return err; 824 } 825 826 return 0; 827 } 828 829 static int gve_unregister_qpls(struct gve_priv *priv) 830 { 831 int num_tx_qpls, num_rx_qpls; 832 int err; 833 int i; 834 835 num_tx_qpls = gve_num_tx_qpls(&priv->tx_cfg, gve_is_qpl(priv)); 836 num_rx_qpls = gve_num_rx_qpls(&priv->rx_cfg, gve_is_qpl(priv)); 837 838 for (i = 0; i < num_tx_qpls; i++) { 839 err = gve_unregister_qpl(priv, gve_tx_get_qpl(priv, i)); 840 /* This failure will trigger a reset - no need to clean */ 841 if (err) 842 return err; 843 } 844 845 for (i = 0; i < num_rx_qpls; i++) { 846 err = gve_unregister_qpl(priv, gve_rx_get_qpl(priv, i)); 847 /* This failure will trigger a reset - no need to clean */ 848 if (err) 849 return err; 850 } 851 return 0; 852 } 853 854 static int gve_create_rings(struct gve_priv *priv) 855 { 856 int num_tx_queues = gve_num_tx_queues(priv); 857 int err; 858 int i; 859 860 err = gve_adminq_create_tx_queues(priv, 0, num_tx_queues); 861 if (err) { 862 netif_err(priv, drv, priv->dev, "failed to create %d tx queues\n", 863 num_tx_queues); 864 /* This failure will trigger a reset - no need to clean 865 * up 866 */ 867 return err; 868 } 869 netif_dbg(priv, drv, priv->dev, "created %d tx queues\n", 870 num_tx_queues); 871 872 err = gve_adminq_create_rx_queues(priv, priv->rx_cfg.num_queues); 873 if (err) { 874 netif_err(priv, drv, priv->dev, "failed to create %d rx queues\n", 875 priv->rx_cfg.num_queues); 876 /* This failure will trigger a reset - no need to clean 877 * up 878 */ 879 return err; 880 } 881 netif_dbg(priv, drv, priv->dev, "created %d rx queues\n", 882 priv->rx_cfg.num_queues); 883 884 if (gve_is_gqi(priv)) { 885 /* Rx data ring has been prefilled with packet buffers at queue 886 * allocation time. 887 * 888 * Write the doorbell to provide descriptor slots and packet 889 * buffers to the NIC. 890 */ 891 for (i = 0; i < priv->rx_cfg.num_queues; i++) 892 gve_rx_write_doorbell(priv, &priv->rx[i]); 893 } else { 894 for (i = 0; i < priv->rx_cfg.num_queues; i++) { 895 /* Post buffers and ring doorbell. */ 896 gve_rx_post_buffers_dqo(&priv->rx[i]); 897 } 898 } 899 900 return 0; 901 } 902 903 static void init_xdp_sync_stats(struct gve_priv *priv) 904 { 905 int start_id = gve_xdp_tx_start_queue_id(priv); 906 int i; 907 908 /* Init stats */ 909 for (i = start_id; i < start_id + priv->tx_cfg.num_xdp_queues; i++) { 910 int ntfy_idx = gve_tx_idx_to_ntfy(priv, i); 911 912 u64_stats_init(&priv->tx[i].statss); 913 priv->tx[i].ntfy_id = ntfy_idx; 914 } 915 } 916 917 static void gve_init_sync_stats(struct gve_priv *priv) 918 { 919 int i; 920 921 for (i = 0; i < priv->tx_cfg.num_queues; i++) 922 u64_stats_init(&priv->tx[i].statss); 923 924 /* Init stats for XDP TX queues */ 925 init_xdp_sync_stats(priv); 926 927 for (i = 0; i < priv->rx_cfg.num_queues; i++) 928 u64_stats_init(&priv->rx[i].statss); 929 } 930 931 static void gve_tx_get_curr_alloc_cfg(struct gve_priv *priv, 932 struct gve_tx_alloc_rings_cfg *cfg) 933 { 934 cfg->qcfg = &priv->tx_cfg; 935 cfg->raw_addressing = !gve_is_qpl(priv); 936 cfg->ring_size = priv->tx_desc_cnt; 937 cfg->num_xdp_rings = cfg->qcfg->num_xdp_queues; 938 cfg->tx = priv->tx; 939 } 940 941 static void gve_tx_stop_rings(struct gve_priv *priv, int num_rings) 942 { 943 int i; 944 945 if (!priv->tx) 946 return; 947 948 for (i = 0; i < num_rings; i++) { 949 if (gve_is_gqi(priv)) 950 gve_tx_stop_ring_gqi(priv, i); 951 else 952 gve_tx_stop_ring_dqo(priv, i); 953 } 954 } 955 956 static void gve_tx_start_rings(struct gve_priv *priv, int num_rings) 957 { 958 int i; 959 960 for (i = 0; i < num_rings; i++) { 961 if (gve_is_gqi(priv)) 962 gve_tx_start_ring_gqi(priv, i); 963 else 964 gve_tx_start_ring_dqo(priv, i); 965 } 966 } 967 968 static int gve_queues_mem_alloc(struct gve_priv *priv, 969 struct gve_tx_alloc_rings_cfg *tx_alloc_cfg, 970 struct gve_rx_alloc_rings_cfg *rx_alloc_cfg) 971 { 972 int err; 973 974 if (gve_is_gqi(priv)) 975 err = gve_tx_alloc_rings_gqi(priv, tx_alloc_cfg); 976 else 977 err = gve_tx_alloc_rings_dqo(priv, tx_alloc_cfg); 978 if (err) 979 return err; 980 981 if (gve_is_gqi(priv)) 982 err = gve_rx_alloc_rings_gqi(priv, rx_alloc_cfg); 983 else 984 err = gve_rx_alloc_rings_dqo(priv, rx_alloc_cfg); 985 if (err) 986 goto free_tx; 987 988 return 0; 989 990 free_tx: 991 if (gve_is_gqi(priv)) 992 gve_tx_free_rings_gqi(priv, tx_alloc_cfg); 993 else 994 gve_tx_free_rings_dqo(priv, tx_alloc_cfg); 995 return err; 996 } 997 998 static int gve_destroy_rings(struct gve_priv *priv) 999 { 1000 int num_tx_queues = gve_num_tx_queues(priv); 1001 int err; 1002 1003 err = gve_adminq_destroy_tx_queues(priv, 0, num_tx_queues); 1004 if (err) { 1005 netif_err(priv, drv, priv->dev, 1006 "failed to destroy tx queues\n"); 1007 /* This failure will trigger a reset - no need to clean up */ 1008 return err; 1009 } 1010 netif_dbg(priv, drv, priv->dev, "destroyed tx queues\n"); 1011 err = gve_adminq_destroy_rx_queues(priv, priv->rx_cfg.num_queues); 1012 if (err) { 1013 netif_err(priv, drv, priv->dev, 1014 "failed to destroy rx queues\n"); 1015 /* This failure will trigger a reset - no need to clean up */ 1016 return err; 1017 } 1018 netif_dbg(priv, drv, priv->dev, "destroyed rx queues\n"); 1019 return 0; 1020 } 1021 1022 static void gve_queues_mem_free(struct gve_priv *priv, 1023 struct gve_tx_alloc_rings_cfg *tx_cfg, 1024 struct gve_rx_alloc_rings_cfg *rx_cfg) 1025 { 1026 if (gve_is_gqi(priv)) { 1027 gve_tx_free_rings_gqi(priv, tx_cfg); 1028 gve_rx_free_rings_gqi(priv, rx_cfg); 1029 } else { 1030 gve_tx_free_rings_dqo(priv, tx_cfg); 1031 gve_rx_free_rings_dqo(priv, rx_cfg); 1032 } 1033 } 1034 1035 int gve_alloc_page(struct gve_priv *priv, struct device *dev, 1036 struct page **page, dma_addr_t *dma, 1037 enum dma_data_direction dir, gfp_t gfp_flags) 1038 { 1039 *page = alloc_page(gfp_flags); 1040 if (!*page) { 1041 priv->page_alloc_fail++; 1042 return -ENOMEM; 1043 } 1044 *dma = dma_map_page(dev, *page, 0, PAGE_SIZE, dir); 1045 if (dma_mapping_error(dev, *dma)) { 1046 priv->dma_mapping_error++; 1047 put_page(*page); 1048 return -ENOMEM; 1049 } 1050 return 0; 1051 } 1052 1053 struct gve_queue_page_list *gve_alloc_queue_page_list(struct gve_priv *priv, 1054 u32 id, int pages) 1055 { 1056 struct gve_queue_page_list *qpl; 1057 int err; 1058 int i; 1059 1060 qpl = kvzalloc(sizeof(*qpl), GFP_KERNEL); 1061 if (!qpl) 1062 return NULL; 1063 1064 qpl->id = id; 1065 qpl->num_entries = 0; 1066 qpl->pages = kvcalloc(pages, sizeof(*qpl->pages), GFP_KERNEL); 1067 if (!qpl->pages) 1068 goto abort; 1069 1070 qpl->page_buses = kvcalloc(pages, sizeof(*qpl->page_buses), GFP_KERNEL); 1071 if (!qpl->page_buses) 1072 goto abort; 1073 1074 for (i = 0; i < pages; i++) { 1075 err = gve_alloc_page(priv, &priv->pdev->dev, &qpl->pages[i], 1076 &qpl->page_buses[i], 1077 gve_qpl_dma_dir(priv, id), GFP_KERNEL); 1078 if (err) 1079 goto abort; 1080 qpl->num_entries++; 1081 } 1082 1083 return qpl; 1084 1085 abort: 1086 gve_free_queue_page_list(priv, qpl, id); 1087 return NULL; 1088 } 1089 1090 void gve_free_page(struct device *dev, struct page *page, dma_addr_t dma, 1091 enum dma_data_direction dir) 1092 { 1093 if (!dma_mapping_error(dev, dma)) 1094 dma_unmap_page(dev, dma, PAGE_SIZE, dir); 1095 if (page) 1096 put_page(page); 1097 } 1098 1099 void gve_free_queue_page_list(struct gve_priv *priv, 1100 struct gve_queue_page_list *qpl, 1101 u32 id) 1102 { 1103 int i; 1104 1105 if (!qpl) 1106 return; 1107 if (!qpl->pages) 1108 goto free_qpl; 1109 if (!qpl->page_buses) 1110 goto free_pages; 1111 1112 for (i = 0; i < qpl->num_entries; i++) 1113 gve_free_page(&priv->pdev->dev, qpl->pages[i], 1114 qpl->page_buses[i], gve_qpl_dma_dir(priv, id)); 1115 1116 kvfree(qpl->page_buses); 1117 qpl->page_buses = NULL; 1118 free_pages: 1119 kvfree(qpl->pages); 1120 qpl->pages = NULL; 1121 free_qpl: 1122 kvfree(qpl); 1123 } 1124 1125 /* Use this to schedule a reset when the device is capable of continuing 1126 * to handle other requests in its current state. If it is not, do a reset 1127 * in thread instead. 1128 */ 1129 void gve_schedule_reset(struct gve_priv *priv) 1130 { 1131 gve_set_do_reset(priv); 1132 queue_work(priv->gve_wq, &priv->service_task); 1133 } 1134 1135 static void gve_reset_and_teardown(struct gve_priv *priv, bool was_up); 1136 static int gve_reset_recovery(struct gve_priv *priv, bool was_up); 1137 static void gve_turndown(struct gve_priv *priv); 1138 static void gve_turnup(struct gve_priv *priv); 1139 1140 static int gve_reg_xdp_info(struct gve_priv *priv, struct net_device *dev) 1141 { 1142 struct napi_struct *napi; 1143 struct gve_rx_ring *rx; 1144 int err = 0; 1145 int i, j; 1146 u32 tx_qid; 1147 1148 if (!priv->tx_cfg.num_xdp_queues) 1149 return 0; 1150 1151 for (i = 0; i < priv->rx_cfg.num_queues; i++) { 1152 rx = &priv->rx[i]; 1153 napi = &priv->ntfy_blocks[rx->ntfy_id].napi; 1154 1155 err = xdp_rxq_info_reg(&rx->xdp_rxq, dev, i, 1156 napi->napi_id); 1157 if (err) 1158 goto err; 1159 if (gve_is_qpl(priv)) 1160 err = xdp_rxq_info_reg_mem_model(&rx->xdp_rxq, 1161 MEM_TYPE_PAGE_SHARED, 1162 NULL); 1163 else 1164 err = xdp_rxq_info_reg_mem_model(&rx->xdp_rxq, 1165 MEM_TYPE_PAGE_POOL, 1166 rx->dqo.page_pool); 1167 if (err) 1168 goto err; 1169 rx->xsk_pool = xsk_get_pool_from_qid(dev, i); 1170 if (rx->xsk_pool) { 1171 err = xdp_rxq_info_reg(&rx->xsk_rxq, dev, i, 1172 napi->napi_id); 1173 if (err) 1174 goto err; 1175 err = xdp_rxq_info_reg_mem_model(&rx->xsk_rxq, 1176 MEM_TYPE_XSK_BUFF_POOL, NULL); 1177 if (err) 1178 goto err; 1179 xsk_pool_set_rxq_info(rx->xsk_pool, 1180 &rx->xsk_rxq); 1181 } 1182 } 1183 1184 for (i = 0; i < priv->tx_cfg.num_xdp_queues; i++) { 1185 tx_qid = gve_xdp_tx_queue_id(priv, i); 1186 priv->tx[tx_qid].xsk_pool = xsk_get_pool_from_qid(dev, i); 1187 } 1188 return 0; 1189 1190 err: 1191 for (j = i; j >= 0; j--) { 1192 rx = &priv->rx[j]; 1193 if (xdp_rxq_info_is_reg(&rx->xdp_rxq)) 1194 xdp_rxq_info_unreg(&rx->xdp_rxq); 1195 if (xdp_rxq_info_is_reg(&rx->xsk_rxq)) 1196 xdp_rxq_info_unreg(&rx->xsk_rxq); 1197 } 1198 return err; 1199 } 1200 1201 static void gve_unreg_xdp_info(struct gve_priv *priv) 1202 { 1203 int i, tx_qid; 1204 1205 if (!priv->tx_cfg.num_xdp_queues || !priv->rx || !priv->tx) 1206 return; 1207 1208 for (i = 0; i < priv->rx_cfg.num_queues; i++) { 1209 struct gve_rx_ring *rx = &priv->rx[i]; 1210 1211 xdp_rxq_info_unreg(&rx->xdp_rxq); 1212 if (rx->xsk_pool) { 1213 xdp_rxq_info_unreg(&rx->xsk_rxq); 1214 rx->xsk_pool = NULL; 1215 } 1216 } 1217 1218 for (i = 0; i < priv->tx_cfg.num_xdp_queues; i++) { 1219 tx_qid = gve_xdp_tx_queue_id(priv, i); 1220 priv->tx[tx_qid].xsk_pool = NULL; 1221 } 1222 } 1223 1224 static void gve_drain_page_cache(struct gve_priv *priv) 1225 { 1226 int i; 1227 1228 for (i = 0; i < priv->rx_cfg.num_queues; i++) 1229 page_frag_cache_drain(&priv->rx[i].page_cache); 1230 } 1231 1232 static void gve_rx_get_curr_alloc_cfg(struct gve_priv *priv, 1233 struct gve_rx_alloc_rings_cfg *cfg) 1234 { 1235 cfg->qcfg_rx = &priv->rx_cfg; 1236 cfg->qcfg_tx = &priv->tx_cfg; 1237 cfg->raw_addressing = !gve_is_qpl(priv); 1238 cfg->enable_header_split = priv->header_split_enabled; 1239 cfg->ring_size = priv->rx_desc_cnt; 1240 cfg->packet_buffer_size = priv->rx_cfg.packet_buffer_size; 1241 cfg->rx = priv->rx; 1242 cfg->xdp = !!cfg->qcfg_tx->num_xdp_queues; 1243 } 1244 1245 void gve_get_curr_alloc_cfgs(struct gve_priv *priv, 1246 struct gve_tx_alloc_rings_cfg *tx_alloc_cfg, 1247 struct gve_rx_alloc_rings_cfg *rx_alloc_cfg) 1248 { 1249 gve_tx_get_curr_alloc_cfg(priv, tx_alloc_cfg); 1250 gve_rx_get_curr_alloc_cfg(priv, rx_alloc_cfg); 1251 } 1252 1253 static void gve_rx_start_ring(struct gve_priv *priv, int i) 1254 { 1255 if (gve_is_gqi(priv)) 1256 gve_rx_start_ring_gqi(priv, i); 1257 else 1258 gve_rx_start_ring_dqo(priv, i); 1259 } 1260 1261 static void gve_rx_start_rings(struct gve_priv *priv, int num_rings) 1262 { 1263 int i; 1264 1265 for (i = 0; i < num_rings; i++) 1266 gve_rx_start_ring(priv, i); 1267 } 1268 1269 static void gve_rx_stop_ring(struct gve_priv *priv, int i) 1270 { 1271 if (gve_is_gqi(priv)) 1272 gve_rx_stop_ring_gqi(priv, i); 1273 else 1274 gve_rx_stop_ring_dqo(priv, i); 1275 } 1276 1277 static void gve_rx_stop_rings(struct gve_priv *priv, int num_rings) 1278 { 1279 int i; 1280 1281 if (!priv->rx) 1282 return; 1283 1284 for (i = 0; i < num_rings; i++) 1285 gve_rx_stop_ring(priv, i); 1286 } 1287 1288 static void gve_queues_mem_remove(struct gve_priv *priv) 1289 { 1290 struct gve_tx_alloc_rings_cfg tx_alloc_cfg = {0}; 1291 struct gve_rx_alloc_rings_cfg rx_alloc_cfg = {0}; 1292 1293 gve_get_curr_alloc_cfgs(priv, &tx_alloc_cfg, &rx_alloc_cfg); 1294 gve_queues_mem_free(priv, &tx_alloc_cfg, &rx_alloc_cfg); 1295 priv->tx = NULL; 1296 priv->rx = NULL; 1297 } 1298 1299 /* The passed-in queue memory is stored into priv and the queues are made live. 1300 * No memory is allocated. Passed-in memory is freed on errors. 1301 */ 1302 static int gve_queues_start(struct gve_priv *priv, 1303 struct gve_tx_alloc_rings_cfg *tx_alloc_cfg, 1304 struct gve_rx_alloc_rings_cfg *rx_alloc_cfg) 1305 { 1306 struct net_device *dev = priv->dev; 1307 int err; 1308 1309 /* Record new resources into priv */ 1310 priv->tx = tx_alloc_cfg->tx; 1311 priv->rx = rx_alloc_cfg->rx; 1312 1313 /* Record new configs into priv */ 1314 priv->tx_cfg = *tx_alloc_cfg->qcfg; 1315 priv->tx_cfg.num_xdp_queues = tx_alloc_cfg->num_xdp_rings; 1316 priv->rx_cfg = *rx_alloc_cfg->qcfg_rx; 1317 priv->tx_desc_cnt = tx_alloc_cfg->ring_size; 1318 priv->rx_desc_cnt = rx_alloc_cfg->ring_size; 1319 1320 gve_tx_start_rings(priv, gve_num_tx_queues(priv)); 1321 gve_rx_start_rings(priv, rx_alloc_cfg->qcfg_rx->num_queues); 1322 gve_init_sync_stats(priv); 1323 1324 err = netif_set_real_num_tx_queues(dev, priv->tx_cfg.num_queues); 1325 if (err) 1326 goto stop_and_free_rings; 1327 err = netif_set_real_num_rx_queues(dev, priv->rx_cfg.num_queues); 1328 if (err) 1329 goto stop_and_free_rings; 1330 1331 err = gve_reg_xdp_info(priv, dev); 1332 if (err) 1333 goto stop_and_free_rings; 1334 1335 if (rx_alloc_cfg->reset_rss) { 1336 err = gve_init_rss_config(priv, priv->rx_cfg.num_queues); 1337 if (err) 1338 goto reset; 1339 } 1340 1341 err = gve_register_qpls(priv); 1342 if (err) 1343 goto reset; 1344 1345 priv->header_split_enabled = rx_alloc_cfg->enable_header_split; 1346 priv->rx_cfg.packet_buffer_size = rx_alloc_cfg->packet_buffer_size; 1347 1348 err = gve_create_rings(priv); 1349 if (err) 1350 goto reset; 1351 1352 gve_set_device_rings_ok(priv); 1353 1354 if (gve_get_report_stats(priv)) 1355 mod_timer(&priv->stats_report_timer, 1356 round_jiffies(jiffies + 1357 msecs_to_jiffies(priv->stats_report_timer_period))); 1358 1359 gve_turnup(priv); 1360 queue_work(priv->gve_wq, &priv->service_task); 1361 priv->interface_up_cnt++; 1362 return 0; 1363 1364 reset: 1365 if (gve_get_reset_in_progress(priv)) 1366 goto stop_and_free_rings; 1367 gve_reset_and_teardown(priv, true); 1368 /* if this fails there is nothing we can do so just ignore the return */ 1369 gve_reset_recovery(priv, false); 1370 /* return the original error */ 1371 return err; 1372 stop_and_free_rings: 1373 gve_tx_stop_rings(priv, gve_num_tx_queues(priv)); 1374 gve_rx_stop_rings(priv, priv->rx_cfg.num_queues); 1375 gve_queues_mem_remove(priv); 1376 return err; 1377 } 1378 1379 static int gve_open(struct net_device *dev) 1380 { 1381 struct gve_tx_alloc_rings_cfg tx_alloc_cfg = {0}; 1382 struct gve_rx_alloc_rings_cfg rx_alloc_cfg = {0}; 1383 struct gve_priv *priv = netdev_priv(dev); 1384 int err; 1385 1386 gve_get_curr_alloc_cfgs(priv, &tx_alloc_cfg, &rx_alloc_cfg); 1387 1388 err = gve_queues_mem_alloc(priv, &tx_alloc_cfg, &rx_alloc_cfg); 1389 if (err) 1390 return err; 1391 1392 /* No need to free on error: ownership of resources is lost after 1393 * calling gve_queues_start. 1394 */ 1395 err = gve_queues_start(priv, &tx_alloc_cfg, &rx_alloc_cfg); 1396 if (err) 1397 return err; 1398 1399 return 0; 1400 } 1401 1402 static int gve_queues_stop(struct gve_priv *priv) 1403 { 1404 int err; 1405 1406 netif_carrier_off(priv->dev); 1407 if (gve_get_device_rings_ok(priv)) { 1408 gve_turndown(priv); 1409 gve_drain_page_cache(priv); 1410 err = gve_destroy_rings(priv); 1411 if (err) 1412 goto err; 1413 err = gve_unregister_qpls(priv); 1414 if (err) 1415 goto err; 1416 gve_clear_device_rings_ok(priv); 1417 } 1418 timer_delete_sync(&priv->stats_report_timer); 1419 1420 gve_unreg_xdp_info(priv); 1421 1422 gve_tx_stop_rings(priv, gve_num_tx_queues(priv)); 1423 gve_rx_stop_rings(priv, priv->rx_cfg.num_queues); 1424 1425 priv->interface_down_cnt++; 1426 return 0; 1427 1428 err: 1429 /* This must have been called from a reset due to the rtnl lock 1430 * so just return at this point. 1431 */ 1432 if (gve_get_reset_in_progress(priv)) 1433 return err; 1434 /* Otherwise reset before returning */ 1435 gve_reset_and_teardown(priv, true); 1436 return gve_reset_recovery(priv, false); 1437 } 1438 1439 static int gve_close(struct net_device *dev) 1440 { 1441 struct gve_priv *priv = netdev_priv(dev); 1442 int err; 1443 1444 err = gve_queues_stop(priv); 1445 if (err) 1446 return err; 1447 1448 gve_queues_mem_remove(priv); 1449 return 0; 1450 } 1451 1452 static void gve_handle_link_status(struct gve_priv *priv, bool link_status) 1453 { 1454 if (!gve_get_napi_enabled(priv)) 1455 return; 1456 1457 if (link_status == netif_carrier_ok(priv->dev)) 1458 return; 1459 1460 if (link_status) { 1461 netdev_info(priv->dev, "Device link is up.\n"); 1462 netif_carrier_on(priv->dev); 1463 } else { 1464 netdev_info(priv->dev, "Device link is down.\n"); 1465 netif_carrier_off(priv->dev); 1466 } 1467 } 1468 1469 static int gve_configure_rings_xdp(struct gve_priv *priv, 1470 u16 num_xdp_rings) 1471 { 1472 struct gve_tx_alloc_rings_cfg tx_alloc_cfg = {0}; 1473 struct gve_rx_alloc_rings_cfg rx_alloc_cfg = {0}; 1474 1475 gve_get_curr_alloc_cfgs(priv, &tx_alloc_cfg, &rx_alloc_cfg); 1476 tx_alloc_cfg.num_xdp_rings = num_xdp_rings; 1477 1478 rx_alloc_cfg.xdp = !!num_xdp_rings; 1479 return gve_adjust_config(priv, &tx_alloc_cfg, &rx_alloc_cfg); 1480 } 1481 1482 static int gve_set_xdp(struct gve_priv *priv, struct bpf_prog *prog, 1483 struct netlink_ext_ack *extack) 1484 { 1485 struct bpf_prog *old_prog; 1486 int err = 0; 1487 u32 status; 1488 1489 old_prog = READ_ONCE(priv->xdp_prog); 1490 if (!netif_running(priv->dev)) { 1491 WRITE_ONCE(priv->xdp_prog, prog); 1492 if (old_prog) 1493 bpf_prog_put(old_prog); 1494 1495 /* Update priv XDP queue configuration */ 1496 priv->tx_cfg.num_xdp_queues = priv->xdp_prog ? 1497 priv->rx_cfg.num_queues : 0; 1498 return 0; 1499 } 1500 1501 if (!old_prog && prog) 1502 err = gve_configure_rings_xdp(priv, priv->rx_cfg.num_queues); 1503 else if (old_prog && !prog) 1504 err = gve_configure_rings_xdp(priv, 0); 1505 1506 if (err) 1507 goto out; 1508 1509 WRITE_ONCE(priv->xdp_prog, prog); 1510 if (old_prog) 1511 bpf_prog_put(old_prog); 1512 1513 out: 1514 status = ioread32be(&priv->reg_bar0->device_status); 1515 gve_handle_link_status(priv, GVE_DEVICE_STATUS_LINK_STATUS_MASK & status); 1516 return err; 1517 } 1518 1519 static int gve_xsk_pool_enable(struct net_device *dev, 1520 struct xsk_buff_pool *pool, 1521 u16 qid) 1522 { 1523 struct gve_priv *priv = netdev_priv(dev); 1524 struct napi_struct *napi; 1525 struct gve_rx_ring *rx; 1526 int tx_qid; 1527 int err; 1528 1529 if (qid >= priv->rx_cfg.num_queues) { 1530 dev_err(&priv->pdev->dev, "xsk pool invalid qid %d", qid); 1531 return -EINVAL; 1532 } 1533 if (xsk_pool_get_rx_frame_size(pool) < 1534 priv->dev->max_mtu + sizeof(struct ethhdr)) { 1535 dev_err(&priv->pdev->dev, "xsk pool frame_len too small"); 1536 return -EINVAL; 1537 } 1538 1539 err = xsk_pool_dma_map(pool, &priv->pdev->dev, 1540 DMA_ATTR_SKIP_CPU_SYNC | DMA_ATTR_WEAK_ORDERING); 1541 if (err) 1542 return err; 1543 1544 /* If XDP prog is not installed or interface is down, return. */ 1545 if (!priv->xdp_prog || !netif_running(dev)) 1546 return 0; 1547 1548 rx = &priv->rx[qid]; 1549 napi = &priv->ntfy_blocks[rx->ntfy_id].napi; 1550 err = xdp_rxq_info_reg(&rx->xsk_rxq, dev, qid, napi->napi_id); 1551 if (err) 1552 goto err; 1553 1554 err = xdp_rxq_info_reg_mem_model(&rx->xsk_rxq, 1555 MEM_TYPE_XSK_BUFF_POOL, NULL); 1556 if (err) 1557 goto err; 1558 1559 xsk_pool_set_rxq_info(pool, &rx->xsk_rxq); 1560 rx->xsk_pool = pool; 1561 1562 tx_qid = gve_xdp_tx_queue_id(priv, qid); 1563 priv->tx[tx_qid].xsk_pool = pool; 1564 1565 return 0; 1566 err: 1567 if (xdp_rxq_info_is_reg(&rx->xsk_rxq)) 1568 xdp_rxq_info_unreg(&rx->xsk_rxq); 1569 1570 xsk_pool_dma_unmap(pool, 1571 DMA_ATTR_SKIP_CPU_SYNC | DMA_ATTR_WEAK_ORDERING); 1572 return err; 1573 } 1574 1575 static int gve_xsk_pool_disable(struct net_device *dev, 1576 u16 qid) 1577 { 1578 struct gve_priv *priv = netdev_priv(dev); 1579 struct napi_struct *napi_rx; 1580 struct napi_struct *napi_tx; 1581 struct xsk_buff_pool *pool; 1582 int tx_qid; 1583 1584 pool = xsk_get_pool_from_qid(dev, qid); 1585 if (!pool) 1586 return -EINVAL; 1587 if (qid >= priv->rx_cfg.num_queues) 1588 return -EINVAL; 1589 1590 /* If XDP prog is not installed or interface is down, unmap DMA and 1591 * return. 1592 */ 1593 if (!priv->xdp_prog || !netif_running(dev)) 1594 goto done; 1595 1596 napi_rx = &priv->ntfy_blocks[priv->rx[qid].ntfy_id].napi; 1597 napi_disable(napi_rx); /* make sure current rx poll is done */ 1598 1599 tx_qid = gve_xdp_tx_queue_id(priv, qid); 1600 napi_tx = &priv->ntfy_blocks[priv->tx[tx_qid].ntfy_id].napi; 1601 napi_disable(napi_tx); /* make sure current tx poll is done */ 1602 1603 priv->rx[qid].xsk_pool = NULL; 1604 xdp_rxq_info_unreg(&priv->rx[qid].xsk_rxq); 1605 priv->tx[tx_qid].xsk_pool = NULL; 1606 smp_mb(); /* Make sure it is visible to the workers on datapath */ 1607 1608 napi_enable(napi_rx); 1609 if (gve_rx_work_pending(&priv->rx[qid])) 1610 napi_schedule(napi_rx); 1611 1612 napi_enable(napi_tx); 1613 if (gve_tx_clean_pending(priv, &priv->tx[tx_qid])) 1614 napi_schedule(napi_tx); 1615 1616 done: 1617 xsk_pool_dma_unmap(pool, 1618 DMA_ATTR_SKIP_CPU_SYNC | DMA_ATTR_WEAK_ORDERING); 1619 return 0; 1620 } 1621 1622 static int gve_xsk_wakeup(struct net_device *dev, u32 queue_id, u32 flags) 1623 { 1624 struct gve_priv *priv = netdev_priv(dev); 1625 struct napi_struct *napi; 1626 1627 if (!gve_get_napi_enabled(priv)) 1628 return -ENETDOWN; 1629 1630 if (queue_id >= priv->rx_cfg.num_queues || !priv->xdp_prog) 1631 return -EINVAL; 1632 1633 napi = &priv->ntfy_blocks[gve_rx_idx_to_ntfy(priv, queue_id)].napi; 1634 if (!napi_if_scheduled_mark_missed(napi)) { 1635 /* Call local_bh_enable to trigger SoftIRQ processing */ 1636 local_bh_disable(); 1637 napi_schedule(napi); 1638 local_bh_enable(); 1639 } 1640 1641 return 0; 1642 } 1643 1644 static int verify_xdp_configuration(struct net_device *dev) 1645 { 1646 struct gve_priv *priv = netdev_priv(dev); 1647 u16 max_xdp_mtu; 1648 1649 if (dev->features & NETIF_F_LRO) { 1650 netdev_warn(dev, "XDP is not supported when LRO is on.\n"); 1651 return -EOPNOTSUPP; 1652 } 1653 1654 if (priv->queue_format != GVE_GQI_QPL_FORMAT) { 1655 netdev_warn(dev, "XDP is not supported in mode %d.\n", 1656 priv->queue_format); 1657 return -EOPNOTSUPP; 1658 } 1659 1660 max_xdp_mtu = priv->rx_cfg.packet_buffer_size - sizeof(struct ethhdr); 1661 if (priv->queue_format == GVE_GQI_QPL_FORMAT) 1662 max_xdp_mtu -= GVE_RX_PAD; 1663 1664 if (dev->mtu > max_xdp_mtu) { 1665 netdev_warn(dev, "XDP is not supported for mtu %d.\n", 1666 dev->mtu); 1667 return -EOPNOTSUPP; 1668 } 1669 1670 if (priv->rx_cfg.num_queues != priv->tx_cfg.num_queues || 1671 (2 * priv->tx_cfg.num_queues > priv->tx_cfg.max_queues)) { 1672 netdev_warn(dev, "XDP load failed: The number of configured RX queues %d should be equal to the number of configured TX queues %d and the number of configured RX/TX queues should be less than or equal to half the maximum number of RX/TX queues %d", 1673 priv->rx_cfg.num_queues, 1674 priv->tx_cfg.num_queues, 1675 priv->tx_cfg.max_queues); 1676 return -EINVAL; 1677 } 1678 return 0; 1679 } 1680 1681 static int gve_xdp(struct net_device *dev, struct netdev_bpf *xdp) 1682 { 1683 struct gve_priv *priv = netdev_priv(dev); 1684 int err; 1685 1686 err = verify_xdp_configuration(dev); 1687 if (err) 1688 return err; 1689 switch (xdp->command) { 1690 case XDP_SETUP_PROG: 1691 return gve_set_xdp(priv, xdp->prog, xdp->extack); 1692 case XDP_SETUP_XSK_POOL: 1693 if (xdp->xsk.pool) 1694 return gve_xsk_pool_enable(dev, xdp->xsk.pool, xdp->xsk.queue_id); 1695 else 1696 return gve_xsk_pool_disable(dev, xdp->xsk.queue_id); 1697 default: 1698 return -EINVAL; 1699 } 1700 } 1701 1702 int gve_init_rss_config(struct gve_priv *priv, u16 num_queues) 1703 { 1704 struct gve_rss_config *rss_config = &priv->rss_config; 1705 struct ethtool_rxfh_param rxfh = {0}; 1706 u16 i; 1707 1708 if (!priv->cache_rss_config) 1709 return 0; 1710 1711 for (i = 0; i < priv->rss_lut_size; i++) 1712 rss_config->hash_lut[i] = 1713 ethtool_rxfh_indir_default(i, num_queues); 1714 1715 netdev_rss_key_fill(rss_config->hash_key, priv->rss_key_size); 1716 1717 rxfh.hfunc = ETH_RSS_HASH_TOP; 1718 1719 return gve_adminq_configure_rss(priv, &rxfh); 1720 } 1721 1722 int gve_flow_rules_reset(struct gve_priv *priv) 1723 { 1724 if (!priv->max_flow_rules) 1725 return 0; 1726 1727 return gve_adminq_reset_flow_rules(priv); 1728 } 1729 1730 int gve_adjust_config(struct gve_priv *priv, 1731 struct gve_tx_alloc_rings_cfg *tx_alloc_cfg, 1732 struct gve_rx_alloc_rings_cfg *rx_alloc_cfg) 1733 { 1734 int err; 1735 1736 /* Allocate resources for the new configuration */ 1737 err = gve_queues_mem_alloc(priv, tx_alloc_cfg, rx_alloc_cfg); 1738 if (err) { 1739 netif_err(priv, drv, priv->dev, 1740 "Adjust config failed to alloc new queues"); 1741 return err; 1742 } 1743 1744 /* Teardown the device and free existing resources */ 1745 err = gve_close(priv->dev); 1746 if (err) { 1747 netif_err(priv, drv, priv->dev, 1748 "Adjust config failed to close old queues"); 1749 gve_queues_mem_free(priv, tx_alloc_cfg, rx_alloc_cfg); 1750 return err; 1751 } 1752 1753 /* Bring the device back up again with the new resources. */ 1754 err = gve_queues_start(priv, tx_alloc_cfg, rx_alloc_cfg); 1755 if (err) { 1756 netif_err(priv, drv, priv->dev, 1757 "Adjust config failed to start new queues, !!! DISABLING ALL QUEUES !!!\n"); 1758 /* No need to free on error: ownership of resources is lost after 1759 * calling gve_queues_start. 1760 */ 1761 gve_turndown(priv); 1762 return err; 1763 } 1764 1765 return 0; 1766 } 1767 1768 int gve_adjust_queues(struct gve_priv *priv, 1769 struct gve_rx_queue_config new_rx_config, 1770 struct gve_tx_queue_config new_tx_config, 1771 bool reset_rss) 1772 { 1773 struct gve_tx_alloc_rings_cfg tx_alloc_cfg = {0}; 1774 struct gve_rx_alloc_rings_cfg rx_alloc_cfg = {0}; 1775 int err; 1776 1777 gve_get_curr_alloc_cfgs(priv, &tx_alloc_cfg, &rx_alloc_cfg); 1778 1779 /* Relay the new config from ethtool */ 1780 tx_alloc_cfg.qcfg = &new_tx_config; 1781 rx_alloc_cfg.qcfg_tx = &new_tx_config; 1782 rx_alloc_cfg.qcfg_rx = &new_rx_config; 1783 rx_alloc_cfg.reset_rss = reset_rss; 1784 1785 if (netif_running(priv->dev)) { 1786 err = gve_adjust_config(priv, &tx_alloc_cfg, &rx_alloc_cfg); 1787 return err; 1788 } 1789 /* Set the config for the next up. */ 1790 if (reset_rss) { 1791 err = gve_init_rss_config(priv, new_rx_config.num_queues); 1792 if (err) 1793 return err; 1794 } 1795 priv->tx_cfg = new_tx_config; 1796 priv->rx_cfg = new_rx_config; 1797 1798 return 0; 1799 } 1800 1801 static void gve_turndown(struct gve_priv *priv) 1802 { 1803 int idx; 1804 1805 if (netif_carrier_ok(priv->dev)) 1806 netif_carrier_off(priv->dev); 1807 1808 if (!gve_get_napi_enabled(priv)) 1809 return; 1810 1811 /* Disable napi to prevent more work from coming in */ 1812 for (idx = 0; idx < gve_num_tx_queues(priv); idx++) { 1813 int ntfy_idx = gve_tx_idx_to_ntfy(priv, idx); 1814 struct gve_notify_block *block = &priv->ntfy_blocks[ntfy_idx]; 1815 1816 if (!gve_tx_was_added_to_block(priv, idx)) 1817 continue; 1818 1819 if (idx < priv->tx_cfg.num_queues) 1820 netif_queue_set_napi(priv->dev, idx, 1821 NETDEV_QUEUE_TYPE_TX, NULL); 1822 1823 napi_disable_locked(&block->napi); 1824 } 1825 for (idx = 0; idx < priv->rx_cfg.num_queues; idx++) { 1826 int ntfy_idx = gve_rx_idx_to_ntfy(priv, idx); 1827 struct gve_notify_block *block = &priv->ntfy_blocks[ntfy_idx]; 1828 1829 if (!gve_rx_was_added_to_block(priv, idx)) 1830 continue; 1831 1832 netif_queue_set_napi(priv->dev, idx, NETDEV_QUEUE_TYPE_RX, 1833 NULL); 1834 napi_disable_locked(&block->napi); 1835 } 1836 1837 /* Stop tx queues */ 1838 netif_tx_disable(priv->dev); 1839 1840 xdp_features_clear_redirect_target_locked(priv->dev); 1841 1842 gve_clear_napi_enabled(priv); 1843 gve_clear_report_stats(priv); 1844 1845 /* Make sure that all traffic is finished processing. */ 1846 synchronize_net(); 1847 } 1848 1849 static void gve_turnup(struct gve_priv *priv) 1850 { 1851 int idx; 1852 1853 /* Start the tx queues */ 1854 netif_tx_start_all_queues(priv->dev); 1855 1856 /* Enable napi and unmask interrupts for all queues */ 1857 for (idx = 0; idx < gve_num_tx_queues(priv); idx++) { 1858 int ntfy_idx = gve_tx_idx_to_ntfy(priv, idx); 1859 struct gve_notify_block *block = &priv->ntfy_blocks[ntfy_idx]; 1860 1861 if (!gve_tx_was_added_to_block(priv, idx)) 1862 continue; 1863 1864 napi_enable_locked(&block->napi); 1865 1866 if (idx < priv->tx_cfg.num_queues) 1867 netif_queue_set_napi(priv->dev, idx, 1868 NETDEV_QUEUE_TYPE_TX, 1869 &block->napi); 1870 1871 if (gve_is_gqi(priv)) { 1872 iowrite32be(0, gve_irq_doorbell(priv, block)); 1873 } else { 1874 gve_set_itr_coalesce_usecs_dqo(priv, block, 1875 priv->tx_coalesce_usecs); 1876 } 1877 1878 /* Any descs written by the NIC before this barrier will be 1879 * handled by the one-off napi schedule below. Whereas any 1880 * descs after the barrier will generate interrupts. 1881 */ 1882 mb(); 1883 napi_schedule(&block->napi); 1884 } 1885 for (idx = 0; idx < priv->rx_cfg.num_queues; idx++) { 1886 int ntfy_idx = gve_rx_idx_to_ntfy(priv, idx); 1887 struct gve_notify_block *block = &priv->ntfy_blocks[ntfy_idx]; 1888 1889 if (!gve_rx_was_added_to_block(priv, idx)) 1890 continue; 1891 1892 napi_enable_locked(&block->napi); 1893 netif_queue_set_napi(priv->dev, idx, NETDEV_QUEUE_TYPE_RX, 1894 &block->napi); 1895 1896 if (gve_is_gqi(priv)) { 1897 iowrite32be(0, gve_irq_doorbell(priv, block)); 1898 } else { 1899 gve_set_itr_coalesce_usecs_dqo(priv, block, 1900 priv->rx_coalesce_usecs); 1901 } 1902 1903 /* Any descs written by the NIC before this barrier will be 1904 * handled by the one-off napi schedule below. Whereas any 1905 * descs after the barrier will generate interrupts. 1906 */ 1907 mb(); 1908 napi_schedule(&block->napi); 1909 } 1910 1911 if (priv->tx_cfg.num_xdp_queues && gve_supports_xdp_xmit(priv)) 1912 xdp_features_set_redirect_target_locked(priv->dev, false); 1913 1914 gve_set_napi_enabled(priv); 1915 } 1916 1917 static void gve_turnup_and_check_status(struct gve_priv *priv) 1918 { 1919 u32 status; 1920 1921 gve_turnup(priv); 1922 status = ioread32be(&priv->reg_bar0->device_status); 1923 gve_handle_link_status(priv, GVE_DEVICE_STATUS_LINK_STATUS_MASK & status); 1924 } 1925 1926 static void gve_tx_timeout(struct net_device *dev, unsigned int txqueue) 1927 { 1928 struct gve_notify_block *block; 1929 struct gve_tx_ring *tx = NULL; 1930 struct gve_priv *priv; 1931 u32 last_nic_done; 1932 u32 current_time; 1933 u32 ntfy_idx; 1934 1935 netdev_info(dev, "Timeout on tx queue, %d", txqueue); 1936 priv = netdev_priv(dev); 1937 if (txqueue > priv->tx_cfg.num_queues) 1938 goto reset; 1939 1940 ntfy_idx = gve_tx_idx_to_ntfy(priv, txqueue); 1941 if (ntfy_idx >= priv->num_ntfy_blks) 1942 goto reset; 1943 1944 block = &priv->ntfy_blocks[ntfy_idx]; 1945 tx = block->tx; 1946 1947 current_time = jiffies_to_msecs(jiffies); 1948 if (tx->last_kick_msec + MIN_TX_TIMEOUT_GAP > current_time) 1949 goto reset; 1950 1951 /* Check to see if there are missed completions, which will allow us to 1952 * kick the queue. 1953 */ 1954 last_nic_done = gve_tx_load_event_counter(priv, tx); 1955 if (last_nic_done - tx->done) { 1956 netdev_info(dev, "Kicking queue %d", txqueue); 1957 iowrite32be(GVE_IRQ_MASK, gve_irq_doorbell(priv, block)); 1958 napi_schedule(&block->napi); 1959 tx->last_kick_msec = current_time; 1960 goto out; 1961 } // Else reset. 1962 1963 reset: 1964 gve_schedule_reset(priv); 1965 1966 out: 1967 if (tx) 1968 tx->queue_timeout++; 1969 priv->tx_timeo_cnt++; 1970 } 1971 1972 u16 gve_get_pkt_buf_size(const struct gve_priv *priv, bool enable_hsplit) 1973 { 1974 if (enable_hsplit && priv->max_rx_buffer_size >= GVE_MAX_RX_BUFFER_SIZE) 1975 return GVE_MAX_RX_BUFFER_SIZE; 1976 else 1977 return GVE_DEFAULT_RX_BUFFER_SIZE; 1978 } 1979 1980 /* header-split is not supported on non-DQO_RDA yet even if device advertises it */ 1981 bool gve_header_split_supported(const struct gve_priv *priv) 1982 { 1983 return priv->header_buf_size && priv->queue_format == GVE_DQO_RDA_FORMAT; 1984 } 1985 1986 int gve_set_hsplit_config(struct gve_priv *priv, u8 tcp_data_split) 1987 { 1988 struct gve_tx_alloc_rings_cfg tx_alloc_cfg = {0}; 1989 struct gve_rx_alloc_rings_cfg rx_alloc_cfg = {0}; 1990 bool enable_hdr_split; 1991 int err = 0; 1992 1993 if (tcp_data_split == ETHTOOL_TCP_DATA_SPLIT_UNKNOWN) 1994 return 0; 1995 1996 if (!gve_header_split_supported(priv)) { 1997 dev_err(&priv->pdev->dev, "Header-split not supported\n"); 1998 return -EOPNOTSUPP; 1999 } 2000 2001 if (tcp_data_split == ETHTOOL_TCP_DATA_SPLIT_ENABLED) 2002 enable_hdr_split = true; 2003 else 2004 enable_hdr_split = false; 2005 2006 if (enable_hdr_split == priv->header_split_enabled) 2007 return 0; 2008 2009 gve_get_curr_alloc_cfgs(priv, &tx_alloc_cfg, &rx_alloc_cfg); 2010 2011 rx_alloc_cfg.enable_header_split = enable_hdr_split; 2012 rx_alloc_cfg.packet_buffer_size = gve_get_pkt_buf_size(priv, enable_hdr_split); 2013 2014 if (netif_running(priv->dev)) 2015 err = gve_adjust_config(priv, &tx_alloc_cfg, &rx_alloc_cfg); 2016 return err; 2017 } 2018 2019 static int gve_set_features(struct net_device *netdev, 2020 netdev_features_t features) 2021 { 2022 const netdev_features_t orig_features = netdev->features; 2023 struct gve_tx_alloc_rings_cfg tx_alloc_cfg = {0}; 2024 struct gve_rx_alloc_rings_cfg rx_alloc_cfg = {0}; 2025 struct gve_priv *priv = netdev_priv(netdev); 2026 int err; 2027 2028 gve_get_curr_alloc_cfgs(priv, &tx_alloc_cfg, &rx_alloc_cfg); 2029 2030 if ((netdev->features & NETIF_F_LRO) != (features & NETIF_F_LRO)) { 2031 netdev->features ^= NETIF_F_LRO; 2032 if (netif_running(netdev)) { 2033 err = gve_adjust_config(priv, &tx_alloc_cfg, &rx_alloc_cfg); 2034 if (err) 2035 goto revert_features; 2036 } 2037 } 2038 if ((netdev->features & NETIF_F_NTUPLE) && !(features & NETIF_F_NTUPLE)) { 2039 err = gve_flow_rules_reset(priv); 2040 if (err) 2041 goto revert_features; 2042 } 2043 2044 return 0; 2045 2046 revert_features: 2047 netdev->features = orig_features; 2048 return err; 2049 } 2050 2051 static int gve_get_ts_config(struct net_device *dev, 2052 struct kernel_hwtstamp_config *kernel_config) 2053 { 2054 struct gve_priv *priv = netdev_priv(dev); 2055 2056 *kernel_config = priv->ts_config; 2057 return 0; 2058 } 2059 2060 static int gve_set_ts_config(struct net_device *dev, 2061 struct kernel_hwtstamp_config *kernel_config, 2062 struct netlink_ext_ack *extack) 2063 { 2064 struct gve_priv *priv = netdev_priv(dev); 2065 2066 if (kernel_config->tx_type != HWTSTAMP_TX_OFF) { 2067 NL_SET_ERR_MSG_MOD(extack, "TX timestamping is not supported"); 2068 return -ERANGE; 2069 } 2070 2071 if (kernel_config->rx_filter != HWTSTAMP_FILTER_NONE) { 2072 if (!priv->nic_ts_report) { 2073 NL_SET_ERR_MSG_MOD(extack, 2074 "RX timestamping is not supported"); 2075 kernel_config->rx_filter = HWTSTAMP_FILTER_NONE; 2076 return -EOPNOTSUPP; 2077 } 2078 2079 kernel_config->rx_filter = HWTSTAMP_FILTER_ALL; 2080 gve_clock_nic_ts_read(priv); 2081 ptp_schedule_worker(priv->ptp->clock, 0); 2082 } else { 2083 ptp_cancel_worker_sync(priv->ptp->clock); 2084 } 2085 2086 priv->ts_config.rx_filter = kernel_config->rx_filter; 2087 2088 return 0; 2089 } 2090 2091 static const struct net_device_ops gve_netdev_ops = { 2092 .ndo_start_xmit = gve_start_xmit, 2093 .ndo_features_check = gve_features_check, 2094 .ndo_open = gve_open, 2095 .ndo_stop = gve_close, 2096 .ndo_get_stats64 = gve_get_stats, 2097 .ndo_tx_timeout = gve_tx_timeout, 2098 .ndo_set_features = gve_set_features, 2099 .ndo_bpf = gve_xdp, 2100 .ndo_xdp_xmit = gve_xdp_xmit, 2101 .ndo_xsk_wakeup = gve_xsk_wakeup, 2102 .ndo_hwtstamp_get = gve_get_ts_config, 2103 .ndo_hwtstamp_set = gve_set_ts_config, 2104 }; 2105 2106 static void gve_handle_status(struct gve_priv *priv, u32 status) 2107 { 2108 if (GVE_DEVICE_STATUS_RESET_MASK & status) { 2109 dev_info(&priv->pdev->dev, "Device requested reset.\n"); 2110 gve_set_do_reset(priv); 2111 } 2112 if (GVE_DEVICE_STATUS_REPORT_STATS_MASK & status) { 2113 priv->stats_report_trigger_cnt++; 2114 gve_set_do_report_stats(priv); 2115 } 2116 } 2117 2118 static void gve_handle_reset(struct gve_priv *priv) 2119 { 2120 /* A service task will be scheduled at the end of probe to catch any 2121 * resets that need to happen, and we don't want to reset until 2122 * probe is done. 2123 */ 2124 if (gve_get_probe_in_progress(priv)) 2125 return; 2126 2127 if (gve_get_do_reset(priv)) { 2128 rtnl_lock(); 2129 netdev_lock(priv->dev); 2130 gve_reset(priv, false); 2131 netdev_unlock(priv->dev); 2132 rtnl_unlock(); 2133 } 2134 } 2135 2136 void gve_handle_report_stats(struct gve_priv *priv) 2137 { 2138 struct stats *stats = priv->stats_report->stats; 2139 int idx, stats_idx = 0; 2140 unsigned int start = 0; 2141 u64 tx_bytes; 2142 2143 if (!gve_get_report_stats(priv)) 2144 return; 2145 2146 be64_add_cpu(&priv->stats_report->written_count, 1); 2147 /* tx stats */ 2148 if (priv->tx) { 2149 for (idx = 0; idx < gve_num_tx_queues(priv); idx++) { 2150 u32 last_completion = 0; 2151 u32 tx_frames = 0; 2152 2153 /* DQO doesn't currently support these metrics. */ 2154 if (gve_is_gqi(priv)) { 2155 last_completion = priv->tx[idx].done; 2156 tx_frames = priv->tx[idx].req; 2157 } 2158 2159 do { 2160 start = u64_stats_fetch_begin(&priv->tx[idx].statss); 2161 tx_bytes = priv->tx[idx].bytes_done; 2162 } while (u64_stats_fetch_retry(&priv->tx[idx].statss, start)); 2163 stats[stats_idx++] = (struct stats) { 2164 .stat_name = cpu_to_be32(TX_WAKE_CNT), 2165 .value = cpu_to_be64(priv->tx[idx].wake_queue), 2166 .queue_id = cpu_to_be32(idx), 2167 }; 2168 stats[stats_idx++] = (struct stats) { 2169 .stat_name = cpu_to_be32(TX_STOP_CNT), 2170 .value = cpu_to_be64(priv->tx[idx].stop_queue), 2171 .queue_id = cpu_to_be32(idx), 2172 }; 2173 stats[stats_idx++] = (struct stats) { 2174 .stat_name = cpu_to_be32(TX_FRAMES_SENT), 2175 .value = cpu_to_be64(tx_frames), 2176 .queue_id = cpu_to_be32(idx), 2177 }; 2178 stats[stats_idx++] = (struct stats) { 2179 .stat_name = cpu_to_be32(TX_BYTES_SENT), 2180 .value = cpu_to_be64(tx_bytes), 2181 .queue_id = cpu_to_be32(idx), 2182 }; 2183 stats[stats_idx++] = (struct stats) { 2184 .stat_name = cpu_to_be32(TX_LAST_COMPLETION_PROCESSED), 2185 .value = cpu_to_be64(last_completion), 2186 .queue_id = cpu_to_be32(idx), 2187 }; 2188 stats[stats_idx++] = (struct stats) { 2189 .stat_name = cpu_to_be32(TX_TIMEOUT_CNT), 2190 .value = cpu_to_be64(priv->tx[idx].queue_timeout), 2191 .queue_id = cpu_to_be32(idx), 2192 }; 2193 } 2194 } 2195 /* rx stats */ 2196 if (priv->rx) { 2197 for (idx = 0; idx < priv->rx_cfg.num_queues; idx++) { 2198 stats[stats_idx++] = (struct stats) { 2199 .stat_name = cpu_to_be32(RX_NEXT_EXPECTED_SEQUENCE), 2200 .value = cpu_to_be64(priv->rx[idx].desc.seqno), 2201 .queue_id = cpu_to_be32(idx), 2202 }; 2203 stats[stats_idx++] = (struct stats) { 2204 .stat_name = cpu_to_be32(RX_BUFFERS_POSTED), 2205 .value = cpu_to_be64(priv->rx[idx].fill_cnt), 2206 .queue_id = cpu_to_be32(idx), 2207 }; 2208 } 2209 } 2210 } 2211 2212 /* Handle NIC status register changes, reset requests and report stats */ 2213 static void gve_service_task(struct work_struct *work) 2214 { 2215 struct gve_priv *priv = container_of(work, struct gve_priv, 2216 service_task); 2217 u32 status = ioread32be(&priv->reg_bar0->device_status); 2218 2219 gve_handle_status(priv, status); 2220 2221 gve_handle_reset(priv); 2222 gve_handle_link_status(priv, GVE_DEVICE_STATUS_LINK_STATUS_MASK & status); 2223 } 2224 2225 static void gve_set_netdev_xdp_features(struct gve_priv *priv) 2226 { 2227 xdp_features_t xdp_features; 2228 2229 if (priv->queue_format == GVE_GQI_QPL_FORMAT) { 2230 xdp_features = NETDEV_XDP_ACT_BASIC; 2231 xdp_features |= NETDEV_XDP_ACT_REDIRECT; 2232 xdp_features |= NETDEV_XDP_ACT_XSK_ZEROCOPY; 2233 } else { 2234 xdp_features = 0; 2235 } 2236 2237 xdp_set_features_flag_locked(priv->dev, xdp_features); 2238 } 2239 2240 static int gve_init_priv(struct gve_priv *priv, bool skip_describe_device) 2241 { 2242 int num_ntfy; 2243 int err; 2244 2245 /* Set up the adminq */ 2246 err = gve_adminq_alloc(&priv->pdev->dev, priv); 2247 if (err) { 2248 dev_err(&priv->pdev->dev, 2249 "Failed to alloc admin queue: err=%d\n", err); 2250 return err; 2251 } 2252 2253 err = gve_verify_driver_compatibility(priv); 2254 if (err) { 2255 dev_err(&priv->pdev->dev, 2256 "Could not verify driver compatibility: err=%d\n", err); 2257 goto err; 2258 } 2259 2260 priv->num_registered_pages = 0; 2261 2262 if (skip_describe_device) 2263 goto setup_device; 2264 2265 priv->queue_format = GVE_QUEUE_FORMAT_UNSPECIFIED; 2266 /* Get the initial information we need from the device */ 2267 err = gve_adminq_describe_device(priv); 2268 if (err) { 2269 dev_err(&priv->pdev->dev, 2270 "Could not get device information: err=%d\n", err); 2271 goto err; 2272 } 2273 priv->dev->mtu = priv->dev->max_mtu; 2274 num_ntfy = pci_msix_vec_count(priv->pdev); 2275 if (num_ntfy <= 0) { 2276 dev_err(&priv->pdev->dev, 2277 "could not count MSI-x vectors: err=%d\n", num_ntfy); 2278 err = num_ntfy; 2279 goto err; 2280 } else if (num_ntfy < GVE_MIN_MSIX) { 2281 dev_err(&priv->pdev->dev, "gve needs at least %d MSI-x vectors, but only has %d\n", 2282 GVE_MIN_MSIX, num_ntfy); 2283 err = -EINVAL; 2284 goto err; 2285 } 2286 2287 /* Big TCP is only supported on DQO */ 2288 if (!gve_is_gqi(priv)) 2289 netif_set_tso_max_size(priv->dev, GVE_DQO_TX_MAX); 2290 2291 priv->rx_copybreak = GVE_DEFAULT_RX_COPYBREAK; 2292 /* gvnic has one Notification Block per MSI-x vector, except for the 2293 * management vector 2294 */ 2295 priv->num_ntfy_blks = (num_ntfy - 1) & ~0x1; 2296 priv->mgmt_msix_idx = priv->num_ntfy_blks; 2297 2298 priv->tx_cfg.max_queues = 2299 min_t(int, priv->tx_cfg.max_queues, priv->num_ntfy_blks / 2); 2300 priv->rx_cfg.max_queues = 2301 min_t(int, priv->rx_cfg.max_queues, priv->num_ntfy_blks / 2); 2302 2303 priv->tx_cfg.num_queues = priv->tx_cfg.max_queues; 2304 priv->rx_cfg.num_queues = priv->rx_cfg.max_queues; 2305 if (priv->default_num_queues > 0) { 2306 priv->tx_cfg.num_queues = min_t(int, priv->default_num_queues, 2307 priv->tx_cfg.num_queues); 2308 priv->rx_cfg.num_queues = min_t(int, priv->default_num_queues, 2309 priv->rx_cfg.num_queues); 2310 } 2311 priv->tx_cfg.num_xdp_queues = 0; 2312 2313 dev_info(&priv->pdev->dev, "TX queues %d, RX queues %d\n", 2314 priv->tx_cfg.num_queues, priv->rx_cfg.num_queues); 2315 dev_info(&priv->pdev->dev, "Max TX queues %d, Max RX queues %d\n", 2316 priv->tx_cfg.max_queues, priv->rx_cfg.max_queues); 2317 2318 if (!gve_is_gqi(priv)) { 2319 priv->tx_coalesce_usecs = GVE_TX_IRQ_RATELIMIT_US_DQO; 2320 priv->rx_coalesce_usecs = GVE_RX_IRQ_RATELIMIT_US_DQO; 2321 } 2322 2323 priv->ts_config.tx_type = HWTSTAMP_TX_OFF; 2324 priv->ts_config.rx_filter = HWTSTAMP_FILTER_NONE; 2325 2326 setup_device: 2327 gve_set_netdev_xdp_features(priv); 2328 err = gve_setup_device_resources(priv); 2329 if (!err) 2330 return 0; 2331 err: 2332 gve_adminq_free(&priv->pdev->dev, priv); 2333 return err; 2334 } 2335 2336 static void gve_teardown_priv_resources(struct gve_priv *priv) 2337 { 2338 gve_teardown_device_resources(priv); 2339 gve_adminq_free(&priv->pdev->dev, priv); 2340 } 2341 2342 static void gve_trigger_reset(struct gve_priv *priv) 2343 { 2344 /* Reset the device by releasing the AQ */ 2345 gve_adminq_release(priv); 2346 } 2347 2348 static void gve_reset_and_teardown(struct gve_priv *priv, bool was_up) 2349 { 2350 gve_trigger_reset(priv); 2351 /* With the reset having already happened, close cannot fail */ 2352 if (was_up) 2353 gve_close(priv->dev); 2354 gve_teardown_priv_resources(priv); 2355 } 2356 2357 static int gve_reset_recovery(struct gve_priv *priv, bool was_up) 2358 { 2359 int err; 2360 2361 err = gve_init_priv(priv, true); 2362 if (err) 2363 goto err; 2364 if (was_up) { 2365 err = gve_open(priv->dev); 2366 if (err) 2367 goto err; 2368 } 2369 return 0; 2370 err: 2371 dev_err(&priv->pdev->dev, "Reset failed! !!! DISABLING ALL QUEUES !!!\n"); 2372 gve_turndown(priv); 2373 return err; 2374 } 2375 2376 int gve_reset(struct gve_priv *priv, bool attempt_teardown) 2377 { 2378 bool was_up = netif_running(priv->dev); 2379 int err; 2380 2381 dev_info(&priv->pdev->dev, "Performing reset\n"); 2382 gve_clear_do_reset(priv); 2383 gve_set_reset_in_progress(priv); 2384 /* If we aren't attempting to teardown normally, just go turndown and 2385 * reset right away. 2386 */ 2387 if (!attempt_teardown) { 2388 gve_turndown(priv); 2389 gve_reset_and_teardown(priv, was_up); 2390 } else { 2391 /* Otherwise attempt to close normally */ 2392 if (was_up) { 2393 err = gve_close(priv->dev); 2394 /* If that fails reset as we did above */ 2395 if (err) 2396 gve_reset_and_teardown(priv, was_up); 2397 } 2398 /* Clean up any remaining resources */ 2399 gve_teardown_priv_resources(priv); 2400 } 2401 2402 /* Set it all back up */ 2403 err = gve_reset_recovery(priv, was_up); 2404 gve_clear_reset_in_progress(priv); 2405 priv->reset_cnt++; 2406 priv->interface_up_cnt = 0; 2407 priv->interface_down_cnt = 0; 2408 priv->stats_report_trigger_cnt = 0; 2409 return err; 2410 } 2411 2412 static void gve_write_version(u8 __iomem *driver_version_register) 2413 { 2414 const char *c = gve_version_prefix; 2415 2416 while (*c) { 2417 writeb(*c, driver_version_register); 2418 c++; 2419 } 2420 2421 c = gve_version_str; 2422 while (*c) { 2423 writeb(*c, driver_version_register); 2424 c++; 2425 } 2426 writeb('\n', driver_version_register); 2427 } 2428 2429 static int gve_rx_queue_stop(struct net_device *dev, void *per_q_mem, int idx) 2430 { 2431 struct gve_priv *priv = netdev_priv(dev); 2432 struct gve_rx_ring *gve_per_q_mem; 2433 int err; 2434 2435 if (!priv->rx) 2436 return -EAGAIN; 2437 2438 /* Destroying queue 0 while other queues exist is not supported in DQO */ 2439 if (!gve_is_gqi(priv) && idx == 0) 2440 return -ERANGE; 2441 2442 /* Single-queue destruction requires quiescence on all queues */ 2443 gve_turndown(priv); 2444 2445 /* This failure will trigger a reset - no need to clean up */ 2446 err = gve_adminq_destroy_single_rx_queue(priv, idx); 2447 if (err) 2448 return err; 2449 2450 if (gve_is_qpl(priv)) { 2451 /* This failure will trigger a reset - no need to clean up */ 2452 err = gve_unregister_qpl(priv, gve_rx_get_qpl(priv, idx)); 2453 if (err) 2454 return err; 2455 } 2456 2457 gve_rx_stop_ring(priv, idx); 2458 2459 /* Turn the unstopped queues back up */ 2460 gve_turnup_and_check_status(priv); 2461 2462 gve_per_q_mem = (struct gve_rx_ring *)per_q_mem; 2463 *gve_per_q_mem = priv->rx[idx]; 2464 memset(&priv->rx[idx], 0, sizeof(priv->rx[idx])); 2465 return 0; 2466 } 2467 2468 static void gve_rx_queue_mem_free(struct net_device *dev, void *per_q_mem) 2469 { 2470 struct gve_priv *priv = netdev_priv(dev); 2471 struct gve_rx_alloc_rings_cfg cfg = {0}; 2472 struct gve_rx_ring *gve_per_q_mem; 2473 2474 gve_per_q_mem = (struct gve_rx_ring *)per_q_mem; 2475 gve_rx_get_curr_alloc_cfg(priv, &cfg); 2476 2477 if (gve_is_gqi(priv)) 2478 gve_rx_free_ring_gqi(priv, gve_per_q_mem, &cfg); 2479 else 2480 gve_rx_free_ring_dqo(priv, gve_per_q_mem, &cfg); 2481 } 2482 2483 static int gve_rx_queue_mem_alloc(struct net_device *dev, void *per_q_mem, 2484 int idx) 2485 { 2486 struct gve_priv *priv = netdev_priv(dev); 2487 struct gve_rx_alloc_rings_cfg cfg = {0}; 2488 struct gve_rx_ring *gve_per_q_mem; 2489 int err; 2490 2491 if (!priv->rx) 2492 return -EAGAIN; 2493 2494 gve_per_q_mem = (struct gve_rx_ring *)per_q_mem; 2495 gve_rx_get_curr_alloc_cfg(priv, &cfg); 2496 2497 if (gve_is_gqi(priv)) 2498 err = gve_rx_alloc_ring_gqi(priv, &cfg, gve_per_q_mem, idx); 2499 else 2500 err = gve_rx_alloc_ring_dqo(priv, &cfg, gve_per_q_mem, idx); 2501 2502 return err; 2503 } 2504 2505 static int gve_rx_queue_start(struct net_device *dev, void *per_q_mem, int idx) 2506 { 2507 struct gve_priv *priv = netdev_priv(dev); 2508 struct gve_rx_ring *gve_per_q_mem; 2509 int err; 2510 2511 if (!priv->rx) 2512 return -EAGAIN; 2513 2514 gve_per_q_mem = (struct gve_rx_ring *)per_q_mem; 2515 priv->rx[idx] = *gve_per_q_mem; 2516 2517 /* Single-queue creation requires quiescence on all queues */ 2518 gve_turndown(priv); 2519 2520 gve_rx_start_ring(priv, idx); 2521 2522 if (gve_is_qpl(priv)) { 2523 /* This failure will trigger a reset - no need to clean up */ 2524 err = gve_register_qpl(priv, gve_rx_get_qpl(priv, idx)); 2525 if (err) 2526 goto abort; 2527 } 2528 2529 /* This failure will trigger a reset - no need to clean up */ 2530 err = gve_adminq_create_single_rx_queue(priv, idx); 2531 if (err) 2532 goto abort; 2533 2534 if (gve_is_gqi(priv)) 2535 gve_rx_write_doorbell(priv, &priv->rx[idx]); 2536 else 2537 gve_rx_post_buffers_dqo(&priv->rx[idx]); 2538 2539 /* Turn the unstopped queues back up */ 2540 gve_turnup_and_check_status(priv); 2541 return 0; 2542 2543 abort: 2544 gve_rx_stop_ring(priv, idx); 2545 2546 /* All failures in this func result in a reset, by clearing the struct 2547 * at idx, we prevent a double free when that reset runs. The reset, 2548 * which needs the rtnl lock, will not run till this func returns and 2549 * its caller gives up the lock. 2550 */ 2551 memset(&priv->rx[idx], 0, sizeof(priv->rx[idx])); 2552 return err; 2553 } 2554 2555 static const struct netdev_queue_mgmt_ops gve_queue_mgmt_ops = { 2556 .ndo_queue_mem_size = sizeof(struct gve_rx_ring), 2557 .ndo_queue_mem_alloc = gve_rx_queue_mem_alloc, 2558 .ndo_queue_mem_free = gve_rx_queue_mem_free, 2559 .ndo_queue_start = gve_rx_queue_start, 2560 .ndo_queue_stop = gve_rx_queue_stop, 2561 }; 2562 2563 static void gve_get_rx_queue_stats(struct net_device *dev, int idx, 2564 struct netdev_queue_stats_rx *rx_stats) 2565 { 2566 struct gve_priv *priv = netdev_priv(dev); 2567 struct gve_rx_ring *rx = &priv->rx[idx]; 2568 unsigned int start; 2569 2570 do { 2571 start = u64_stats_fetch_begin(&rx->statss); 2572 rx_stats->packets = rx->rpackets; 2573 rx_stats->bytes = rx->rbytes; 2574 rx_stats->alloc_fail = rx->rx_skb_alloc_fail + 2575 rx->rx_buf_alloc_fail; 2576 } while (u64_stats_fetch_retry(&rx->statss, start)); 2577 } 2578 2579 static void gve_get_tx_queue_stats(struct net_device *dev, int idx, 2580 struct netdev_queue_stats_tx *tx_stats) 2581 { 2582 struct gve_priv *priv = netdev_priv(dev); 2583 struct gve_tx_ring *tx = &priv->tx[idx]; 2584 unsigned int start; 2585 2586 do { 2587 start = u64_stats_fetch_begin(&tx->statss); 2588 tx_stats->packets = tx->pkt_done; 2589 tx_stats->bytes = tx->bytes_done; 2590 } while (u64_stats_fetch_retry(&tx->statss, start)); 2591 } 2592 2593 static void gve_get_base_stats(struct net_device *dev, 2594 struct netdev_queue_stats_rx *rx, 2595 struct netdev_queue_stats_tx *tx) 2596 { 2597 rx->packets = 0; 2598 rx->bytes = 0; 2599 rx->alloc_fail = 0; 2600 2601 tx->packets = 0; 2602 tx->bytes = 0; 2603 } 2604 2605 static const struct netdev_stat_ops gve_stat_ops = { 2606 .get_queue_stats_rx = gve_get_rx_queue_stats, 2607 .get_queue_stats_tx = gve_get_tx_queue_stats, 2608 .get_base_stats = gve_get_base_stats, 2609 }; 2610 2611 static int gve_probe(struct pci_dev *pdev, const struct pci_device_id *ent) 2612 { 2613 int max_tx_queues, max_rx_queues; 2614 struct net_device *dev; 2615 __be32 __iomem *db_bar; 2616 struct gve_registers __iomem *reg_bar; 2617 struct gve_priv *priv; 2618 int err; 2619 2620 err = pci_enable_device(pdev); 2621 if (err) 2622 return err; 2623 2624 err = pci_request_regions(pdev, gve_driver_name); 2625 if (err) 2626 goto abort_with_enabled; 2627 2628 pci_set_master(pdev); 2629 2630 err = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(64)); 2631 if (err) { 2632 dev_err(&pdev->dev, "Failed to set dma mask: err=%d\n", err); 2633 goto abort_with_pci_region; 2634 } 2635 2636 reg_bar = pci_iomap(pdev, GVE_REGISTER_BAR, 0); 2637 if (!reg_bar) { 2638 dev_err(&pdev->dev, "Failed to map pci bar!\n"); 2639 err = -ENOMEM; 2640 goto abort_with_pci_region; 2641 } 2642 2643 db_bar = pci_iomap(pdev, GVE_DOORBELL_BAR, 0); 2644 if (!db_bar) { 2645 dev_err(&pdev->dev, "Failed to map doorbell bar!\n"); 2646 err = -ENOMEM; 2647 goto abort_with_reg_bar; 2648 } 2649 2650 gve_write_version(®_bar->driver_version); 2651 /* Get max queues to alloc etherdev */ 2652 max_tx_queues = ioread32be(®_bar->max_tx_queues); 2653 max_rx_queues = ioread32be(®_bar->max_rx_queues); 2654 /* Alloc and setup the netdev and priv */ 2655 dev = alloc_etherdev_mqs(sizeof(*priv), max_tx_queues, max_rx_queues); 2656 if (!dev) { 2657 dev_err(&pdev->dev, "could not allocate netdev\n"); 2658 err = -ENOMEM; 2659 goto abort_with_db_bar; 2660 } 2661 SET_NETDEV_DEV(dev, &pdev->dev); 2662 pci_set_drvdata(pdev, dev); 2663 dev->ethtool_ops = &gve_ethtool_ops; 2664 dev->netdev_ops = &gve_netdev_ops; 2665 dev->queue_mgmt_ops = &gve_queue_mgmt_ops; 2666 dev->stat_ops = &gve_stat_ops; 2667 2668 /* Set default and supported features. 2669 * 2670 * Features might be set in other locations as well (such as 2671 * `gve_adminq_describe_device`). 2672 */ 2673 dev->hw_features = NETIF_F_HIGHDMA; 2674 dev->hw_features |= NETIF_F_SG; 2675 dev->hw_features |= NETIF_F_HW_CSUM; 2676 dev->hw_features |= NETIF_F_TSO; 2677 dev->hw_features |= NETIF_F_TSO6; 2678 dev->hw_features |= NETIF_F_TSO_ECN; 2679 dev->hw_features |= NETIF_F_RXCSUM; 2680 dev->hw_features |= NETIF_F_RXHASH; 2681 dev->features = dev->hw_features; 2682 dev->watchdog_timeo = 5 * HZ; 2683 dev->min_mtu = ETH_MIN_MTU; 2684 netif_carrier_off(dev); 2685 2686 priv = netdev_priv(dev); 2687 priv->dev = dev; 2688 priv->pdev = pdev; 2689 priv->msg_enable = DEFAULT_MSG_LEVEL; 2690 priv->reg_bar0 = reg_bar; 2691 priv->db_bar2 = db_bar; 2692 priv->service_task_flags = 0x0; 2693 priv->state_flags = 0x0; 2694 priv->ethtool_flags = 0x0; 2695 priv->rx_cfg.packet_buffer_size = GVE_DEFAULT_RX_BUFFER_SIZE; 2696 priv->max_rx_buffer_size = GVE_DEFAULT_RX_BUFFER_SIZE; 2697 2698 gve_set_probe_in_progress(priv); 2699 priv->gve_wq = alloc_ordered_workqueue("gve", 0); 2700 if (!priv->gve_wq) { 2701 dev_err(&pdev->dev, "Could not allocate workqueue"); 2702 err = -ENOMEM; 2703 goto abort_with_netdev; 2704 } 2705 INIT_WORK(&priv->service_task, gve_service_task); 2706 INIT_WORK(&priv->stats_report_task, gve_stats_report_task); 2707 priv->tx_cfg.max_queues = max_tx_queues; 2708 priv->rx_cfg.max_queues = max_rx_queues; 2709 2710 err = gve_init_priv(priv, false); 2711 if (err) 2712 goto abort_with_wq; 2713 2714 if (!gve_is_gqi(priv) && !gve_is_qpl(priv)) 2715 dev->netmem_tx = true; 2716 2717 err = register_netdev(dev); 2718 if (err) 2719 goto abort_with_gve_init; 2720 2721 dev_info(&pdev->dev, "GVE version %s\n", gve_version_str); 2722 dev_info(&pdev->dev, "GVE queue format %d\n", (int)priv->queue_format); 2723 gve_clear_probe_in_progress(priv); 2724 queue_work(priv->gve_wq, &priv->service_task); 2725 return 0; 2726 2727 abort_with_gve_init: 2728 gve_teardown_priv_resources(priv); 2729 2730 abort_with_wq: 2731 destroy_workqueue(priv->gve_wq); 2732 2733 abort_with_netdev: 2734 free_netdev(dev); 2735 2736 abort_with_db_bar: 2737 pci_iounmap(pdev, db_bar); 2738 2739 abort_with_reg_bar: 2740 pci_iounmap(pdev, reg_bar); 2741 2742 abort_with_pci_region: 2743 pci_release_regions(pdev); 2744 2745 abort_with_enabled: 2746 pci_disable_device(pdev); 2747 return err; 2748 } 2749 2750 static void gve_remove(struct pci_dev *pdev) 2751 { 2752 struct net_device *netdev = pci_get_drvdata(pdev); 2753 struct gve_priv *priv = netdev_priv(netdev); 2754 __be32 __iomem *db_bar = priv->db_bar2; 2755 void __iomem *reg_bar = priv->reg_bar0; 2756 2757 unregister_netdev(netdev); 2758 gve_teardown_priv_resources(priv); 2759 destroy_workqueue(priv->gve_wq); 2760 free_netdev(netdev); 2761 pci_iounmap(pdev, db_bar); 2762 pci_iounmap(pdev, reg_bar); 2763 pci_release_regions(pdev); 2764 pci_disable_device(pdev); 2765 } 2766 2767 static void gve_shutdown(struct pci_dev *pdev) 2768 { 2769 struct net_device *netdev = pci_get_drvdata(pdev); 2770 struct gve_priv *priv = netdev_priv(netdev); 2771 bool was_up = netif_running(priv->dev); 2772 2773 rtnl_lock(); 2774 netdev_lock(netdev); 2775 if (was_up && gve_close(priv->dev)) { 2776 /* If the dev was up, attempt to close, if close fails, reset */ 2777 gve_reset_and_teardown(priv, was_up); 2778 } else { 2779 /* If the dev wasn't up or close worked, finish tearing down */ 2780 gve_teardown_priv_resources(priv); 2781 } 2782 netdev_unlock(netdev); 2783 rtnl_unlock(); 2784 } 2785 2786 #ifdef CONFIG_PM 2787 static int gve_suspend(struct pci_dev *pdev, pm_message_t state) 2788 { 2789 struct net_device *netdev = pci_get_drvdata(pdev); 2790 struct gve_priv *priv = netdev_priv(netdev); 2791 bool was_up = netif_running(priv->dev); 2792 2793 priv->suspend_cnt++; 2794 rtnl_lock(); 2795 netdev_lock(netdev); 2796 if (was_up && gve_close(priv->dev)) { 2797 /* If the dev was up, attempt to close, if close fails, reset */ 2798 gve_reset_and_teardown(priv, was_up); 2799 } else { 2800 /* If the dev wasn't up or close worked, finish tearing down */ 2801 gve_teardown_priv_resources(priv); 2802 } 2803 priv->up_before_suspend = was_up; 2804 netdev_unlock(netdev); 2805 rtnl_unlock(); 2806 return 0; 2807 } 2808 2809 static int gve_resume(struct pci_dev *pdev) 2810 { 2811 struct net_device *netdev = pci_get_drvdata(pdev); 2812 struct gve_priv *priv = netdev_priv(netdev); 2813 int err; 2814 2815 priv->resume_cnt++; 2816 rtnl_lock(); 2817 netdev_lock(netdev); 2818 err = gve_reset_recovery(priv, priv->up_before_suspend); 2819 netdev_unlock(netdev); 2820 rtnl_unlock(); 2821 return err; 2822 } 2823 #endif /* CONFIG_PM */ 2824 2825 static const struct pci_device_id gve_id_table[] = { 2826 { PCI_DEVICE(PCI_VENDOR_ID_GOOGLE, PCI_DEV_ID_GVNIC) }, 2827 { } 2828 }; 2829 2830 static struct pci_driver gve_driver = { 2831 .name = gve_driver_name, 2832 .id_table = gve_id_table, 2833 .probe = gve_probe, 2834 .remove = gve_remove, 2835 .shutdown = gve_shutdown, 2836 #ifdef CONFIG_PM 2837 .suspend = gve_suspend, 2838 .resume = gve_resume, 2839 #endif 2840 }; 2841 2842 module_pci_driver(gve_driver); 2843 2844 MODULE_DEVICE_TABLE(pci, gve_id_table); 2845 MODULE_AUTHOR("Google, Inc."); 2846 MODULE_DESCRIPTION("Google Virtual NIC Driver"); 2847 MODULE_LICENSE("Dual MIT/GPL"); 2848 MODULE_VERSION(GVE_VERSION); 2849