1 // SPDX-License-Identifier: (GPL-2.0 OR MIT) 2 /* Google virtual Ethernet (gve) driver 3 * 4 * Copyright (C) 2015-2024 Google LLC 5 */ 6 7 #include <linux/bpf.h> 8 #include <linux/cpumask.h> 9 #include <linux/etherdevice.h> 10 #include <linux/filter.h> 11 #include <linux/interrupt.h> 12 #include <linux/irq.h> 13 #include <linux/module.h> 14 #include <linux/pci.h> 15 #include <linux/sched.h> 16 #include <linux/timer.h> 17 #include <linux/workqueue.h> 18 #include <linux/utsname.h> 19 #include <linux/version.h> 20 #include <net/netdev_queues.h> 21 #include <net/sch_generic.h> 22 #include <net/xdp_sock_drv.h> 23 #include "gve.h" 24 #include "gve_dqo.h" 25 #include "gve_adminq.h" 26 #include "gve_register.h" 27 #include "gve_utils.h" 28 29 #define GVE_DEFAULT_RX_COPYBREAK (256) 30 31 #define DEFAULT_MSG_LEVEL (NETIF_MSG_DRV | NETIF_MSG_LINK) 32 #define GVE_VERSION "1.0.0" 33 #define GVE_VERSION_PREFIX "GVE-" 34 35 // Minimum amount of time between queue kicks in msec (10 seconds) 36 #define MIN_TX_TIMEOUT_GAP (1000 * 10) 37 38 char gve_driver_name[] = "gve"; 39 const char gve_version_str[] = GVE_VERSION; 40 static const char gve_version_prefix[] = GVE_VERSION_PREFIX; 41 42 static int gve_verify_driver_compatibility(struct gve_priv *priv) 43 { 44 int err; 45 struct gve_driver_info *driver_info; 46 dma_addr_t driver_info_bus; 47 48 driver_info = dma_alloc_coherent(&priv->pdev->dev, 49 sizeof(struct gve_driver_info), 50 &driver_info_bus, GFP_KERNEL); 51 if (!driver_info) 52 return -ENOMEM; 53 54 *driver_info = (struct gve_driver_info) { 55 .os_type = 1, /* Linux */ 56 .os_version_major = cpu_to_be32(LINUX_VERSION_MAJOR), 57 .os_version_minor = cpu_to_be32(LINUX_VERSION_SUBLEVEL), 58 .os_version_sub = cpu_to_be32(LINUX_VERSION_PATCHLEVEL), 59 .driver_capability_flags = { 60 cpu_to_be64(GVE_DRIVER_CAPABILITY_FLAGS1), 61 cpu_to_be64(GVE_DRIVER_CAPABILITY_FLAGS2), 62 cpu_to_be64(GVE_DRIVER_CAPABILITY_FLAGS3), 63 cpu_to_be64(GVE_DRIVER_CAPABILITY_FLAGS4), 64 }, 65 }; 66 strscpy(driver_info->os_version_str1, utsname()->release, 67 sizeof(driver_info->os_version_str1)); 68 strscpy(driver_info->os_version_str2, utsname()->version, 69 sizeof(driver_info->os_version_str2)); 70 71 err = gve_adminq_verify_driver_compatibility(priv, 72 sizeof(struct gve_driver_info), 73 driver_info_bus); 74 75 /* It's ok if the device doesn't support this */ 76 if (err == -EOPNOTSUPP) 77 err = 0; 78 79 dma_free_coherent(&priv->pdev->dev, 80 sizeof(struct gve_driver_info), 81 driver_info, driver_info_bus); 82 return err; 83 } 84 85 static netdev_features_t gve_features_check(struct sk_buff *skb, 86 struct net_device *dev, 87 netdev_features_t features) 88 { 89 struct gve_priv *priv = netdev_priv(dev); 90 91 if (!gve_is_gqi(priv)) 92 return gve_features_check_dqo(skb, dev, features); 93 94 return features; 95 } 96 97 static netdev_tx_t gve_start_xmit(struct sk_buff *skb, struct net_device *dev) 98 { 99 struct gve_priv *priv = netdev_priv(dev); 100 101 if (gve_is_gqi(priv)) 102 return gve_tx(skb, dev); 103 else 104 return gve_tx_dqo(skb, dev); 105 } 106 107 static void gve_get_stats(struct net_device *dev, struct rtnl_link_stats64 *s) 108 { 109 struct gve_priv *priv = netdev_priv(dev); 110 unsigned int start; 111 u64 packets, bytes; 112 int num_tx_queues; 113 int ring; 114 115 num_tx_queues = gve_num_tx_queues(priv); 116 if (priv->rx) { 117 for (ring = 0; ring < priv->rx_cfg.num_queues; ring++) { 118 do { 119 start = 120 u64_stats_fetch_begin(&priv->rx[ring].statss); 121 packets = priv->rx[ring].rpackets; 122 bytes = priv->rx[ring].rbytes; 123 } while (u64_stats_fetch_retry(&priv->rx[ring].statss, 124 start)); 125 s->rx_packets += packets; 126 s->rx_bytes += bytes; 127 } 128 } 129 if (priv->tx) { 130 for (ring = 0; ring < num_tx_queues; ring++) { 131 do { 132 start = 133 u64_stats_fetch_begin(&priv->tx[ring].statss); 134 packets = priv->tx[ring].pkt_done; 135 bytes = priv->tx[ring].bytes_done; 136 } while (u64_stats_fetch_retry(&priv->tx[ring].statss, 137 start)); 138 s->tx_packets += packets; 139 s->tx_bytes += bytes; 140 } 141 } 142 } 143 144 static int gve_alloc_flow_rule_caches(struct gve_priv *priv) 145 { 146 struct gve_flow_rules_cache *flow_rules_cache = &priv->flow_rules_cache; 147 int err = 0; 148 149 if (!priv->max_flow_rules) 150 return 0; 151 152 flow_rules_cache->rules_cache = 153 kvcalloc(GVE_FLOW_RULES_CACHE_SIZE, sizeof(*flow_rules_cache->rules_cache), 154 GFP_KERNEL); 155 if (!flow_rules_cache->rules_cache) { 156 dev_err(&priv->pdev->dev, "Cannot alloc flow rules cache\n"); 157 return -ENOMEM; 158 } 159 160 flow_rules_cache->rule_ids_cache = 161 kvcalloc(GVE_FLOW_RULE_IDS_CACHE_SIZE, sizeof(*flow_rules_cache->rule_ids_cache), 162 GFP_KERNEL); 163 if (!flow_rules_cache->rule_ids_cache) { 164 dev_err(&priv->pdev->dev, "Cannot alloc flow rule ids cache\n"); 165 err = -ENOMEM; 166 goto free_rules_cache; 167 } 168 169 return 0; 170 171 free_rules_cache: 172 kvfree(flow_rules_cache->rules_cache); 173 flow_rules_cache->rules_cache = NULL; 174 return err; 175 } 176 177 static void gve_free_flow_rule_caches(struct gve_priv *priv) 178 { 179 struct gve_flow_rules_cache *flow_rules_cache = &priv->flow_rules_cache; 180 181 kvfree(flow_rules_cache->rule_ids_cache); 182 flow_rules_cache->rule_ids_cache = NULL; 183 kvfree(flow_rules_cache->rules_cache); 184 flow_rules_cache->rules_cache = NULL; 185 } 186 187 static int gve_alloc_rss_config_cache(struct gve_priv *priv) 188 { 189 struct gve_rss_config *rss_config = &priv->rss_config; 190 191 if (!priv->cache_rss_config) 192 return 0; 193 194 rss_config->hash_key = kcalloc(priv->rss_key_size, 195 sizeof(rss_config->hash_key[0]), 196 GFP_KERNEL); 197 if (!rss_config->hash_key) 198 return -ENOMEM; 199 200 rss_config->hash_lut = kcalloc(priv->rss_lut_size, 201 sizeof(rss_config->hash_lut[0]), 202 GFP_KERNEL); 203 if (!rss_config->hash_lut) 204 goto free_rss_key_cache; 205 206 return 0; 207 208 free_rss_key_cache: 209 kfree(rss_config->hash_key); 210 rss_config->hash_key = NULL; 211 return -ENOMEM; 212 } 213 214 static void gve_free_rss_config_cache(struct gve_priv *priv) 215 { 216 struct gve_rss_config *rss_config = &priv->rss_config; 217 218 kfree(rss_config->hash_key); 219 kfree(rss_config->hash_lut); 220 221 memset(rss_config, 0, sizeof(*rss_config)); 222 } 223 224 static int gve_alloc_counter_array(struct gve_priv *priv) 225 { 226 priv->counter_array = 227 dma_alloc_coherent(&priv->pdev->dev, 228 priv->num_event_counters * 229 sizeof(*priv->counter_array), 230 &priv->counter_array_bus, GFP_KERNEL); 231 if (!priv->counter_array) 232 return -ENOMEM; 233 234 return 0; 235 } 236 237 static void gve_free_counter_array(struct gve_priv *priv) 238 { 239 if (!priv->counter_array) 240 return; 241 242 dma_free_coherent(&priv->pdev->dev, 243 priv->num_event_counters * 244 sizeof(*priv->counter_array), 245 priv->counter_array, priv->counter_array_bus); 246 priv->counter_array = NULL; 247 } 248 249 /* NIC requests to report stats */ 250 static void gve_stats_report_task(struct work_struct *work) 251 { 252 struct gve_priv *priv = container_of(work, struct gve_priv, 253 stats_report_task); 254 if (gve_get_do_report_stats(priv)) { 255 gve_handle_report_stats(priv); 256 gve_clear_do_report_stats(priv); 257 } 258 } 259 260 static void gve_stats_report_schedule(struct gve_priv *priv) 261 { 262 if (!gve_get_probe_in_progress(priv) && 263 !gve_get_reset_in_progress(priv)) { 264 gve_set_do_report_stats(priv); 265 queue_work(priv->gve_wq, &priv->stats_report_task); 266 } 267 } 268 269 static void gve_stats_report_timer(struct timer_list *t) 270 { 271 struct gve_priv *priv = timer_container_of(priv, t, 272 stats_report_timer); 273 274 mod_timer(&priv->stats_report_timer, 275 round_jiffies(jiffies + 276 msecs_to_jiffies(priv->stats_report_timer_period))); 277 gve_stats_report_schedule(priv); 278 } 279 280 static int gve_alloc_stats_report(struct gve_priv *priv) 281 { 282 int tx_stats_num, rx_stats_num; 283 284 tx_stats_num = (GVE_TX_STATS_REPORT_NUM + NIC_TX_STATS_REPORT_NUM) * 285 gve_num_tx_queues(priv); 286 rx_stats_num = (GVE_RX_STATS_REPORT_NUM + NIC_RX_STATS_REPORT_NUM) * 287 priv->rx_cfg.num_queues; 288 priv->stats_report_len = struct_size(priv->stats_report, stats, 289 size_add(tx_stats_num, rx_stats_num)); 290 priv->stats_report = 291 dma_alloc_coherent(&priv->pdev->dev, priv->stats_report_len, 292 &priv->stats_report_bus, GFP_KERNEL); 293 if (!priv->stats_report) 294 return -ENOMEM; 295 /* Set up timer for the report-stats task */ 296 timer_setup(&priv->stats_report_timer, gve_stats_report_timer, 0); 297 priv->stats_report_timer_period = GVE_STATS_REPORT_TIMER_PERIOD; 298 return 0; 299 } 300 301 static void gve_free_stats_report(struct gve_priv *priv) 302 { 303 if (!priv->stats_report) 304 return; 305 306 timer_delete_sync(&priv->stats_report_timer); 307 dma_free_coherent(&priv->pdev->dev, priv->stats_report_len, 308 priv->stats_report, priv->stats_report_bus); 309 priv->stats_report = NULL; 310 } 311 312 static irqreturn_t gve_mgmnt_intr(int irq, void *arg) 313 { 314 struct gve_priv *priv = arg; 315 316 queue_work(priv->gve_wq, &priv->service_task); 317 return IRQ_HANDLED; 318 } 319 320 static irqreturn_t gve_intr(int irq, void *arg) 321 { 322 struct gve_notify_block *block = arg; 323 struct gve_priv *priv = block->priv; 324 325 iowrite32be(GVE_IRQ_MASK, gve_irq_doorbell(priv, block)); 326 napi_schedule_irqoff(&block->napi); 327 return IRQ_HANDLED; 328 } 329 330 static irqreturn_t gve_intr_dqo(int irq, void *arg) 331 { 332 struct gve_notify_block *block = arg; 333 334 /* Interrupts are automatically masked */ 335 napi_schedule_irqoff(&block->napi); 336 return IRQ_HANDLED; 337 } 338 339 static int gve_is_napi_on_home_cpu(struct gve_priv *priv, u32 irq) 340 { 341 int cpu_curr = smp_processor_id(); 342 const struct cpumask *aff_mask; 343 344 aff_mask = irq_get_effective_affinity_mask(irq); 345 if (unlikely(!aff_mask)) 346 return 1; 347 348 return cpumask_test_cpu(cpu_curr, aff_mask); 349 } 350 351 int gve_napi_poll(struct napi_struct *napi, int budget) 352 { 353 struct gve_notify_block *block; 354 __be32 __iomem *irq_doorbell; 355 bool reschedule = false; 356 struct gve_priv *priv; 357 int work_done = 0; 358 359 block = container_of(napi, struct gve_notify_block, napi); 360 priv = block->priv; 361 362 if (block->tx) { 363 if (block->tx->q_num < priv->tx_cfg.num_queues) 364 reschedule |= gve_tx_poll(block, budget); 365 else if (budget) 366 reschedule |= gve_xdp_poll(block, budget); 367 } 368 369 if (!budget) 370 return 0; 371 372 if (block->rx) { 373 work_done = gve_rx_poll(block, budget); 374 375 /* Poll XSK TX as part of RX NAPI. Setup re-poll based on max of 376 * TX and RX work done. 377 */ 378 if (priv->xdp_prog) 379 work_done = max_t(int, work_done, 380 gve_xsk_tx_poll(block, budget)); 381 382 reschedule |= work_done == budget; 383 } 384 385 if (reschedule) 386 return budget; 387 388 /* Complete processing - don't unmask irq if busy polling is enabled */ 389 if (likely(napi_complete_done(napi, work_done))) { 390 irq_doorbell = gve_irq_doorbell(priv, block); 391 iowrite32be(GVE_IRQ_ACK | GVE_IRQ_EVENT, irq_doorbell); 392 393 /* Ensure IRQ ACK is visible before we check pending work. 394 * If queue had issued updates, it would be truly visible. 395 */ 396 mb(); 397 398 if (block->tx) 399 reschedule |= gve_tx_clean_pending(priv, block->tx); 400 if (block->rx) 401 reschedule |= gve_rx_work_pending(block->rx); 402 403 if (reschedule && napi_schedule(napi)) 404 iowrite32be(GVE_IRQ_MASK, irq_doorbell); 405 } 406 return work_done; 407 } 408 409 int gve_napi_poll_dqo(struct napi_struct *napi, int budget) 410 { 411 struct gve_notify_block *block = 412 container_of(napi, struct gve_notify_block, napi); 413 struct gve_priv *priv = block->priv; 414 bool reschedule = false; 415 int work_done = 0; 416 417 if (block->tx) 418 reschedule |= gve_tx_poll_dqo(block, /*do_clean=*/true); 419 420 if (!budget) 421 return 0; 422 423 if (block->rx) { 424 work_done = gve_rx_poll_dqo(block, budget); 425 reschedule |= work_done == budget; 426 } 427 428 if (reschedule) { 429 /* Reschedule by returning budget only if already on the correct 430 * cpu. 431 */ 432 if (likely(gve_is_napi_on_home_cpu(priv, block->irq))) 433 return budget; 434 435 /* If not on the cpu with which this queue's irq has affinity 436 * with, we avoid rescheduling napi and arm the irq instead so 437 * that napi gets rescheduled back eventually onto the right 438 * cpu. 439 */ 440 if (work_done == budget) 441 work_done--; 442 } 443 444 if (likely(napi_complete_done(napi, work_done))) { 445 /* Enable interrupts again. 446 * 447 * We don't need to repoll afterwards because HW supports the 448 * PCI MSI-X PBA feature. 449 * 450 * Another interrupt would be triggered if a new event came in 451 * since the last one. 452 */ 453 gve_write_irq_doorbell_dqo(priv, block, 454 GVE_ITR_NO_UPDATE_DQO | GVE_ITR_ENABLE_BIT_DQO); 455 } 456 457 return work_done; 458 } 459 460 static int gve_alloc_notify_blocks(struct gve_priv *priv) 461 { 462 int num_vecs_requested = priv->num_ntfy_blks + 1; 463 unsigned int active_cpus; 464 int vecs_enabled; 465 int i, j; 466 int err; 467 468 priv->msix_vectors = kvcalloc(num_vecs_requested, 469 sizeof(*priv->msix_vectors), GFP_KERNEL); 470 if (!priv->msix_vectors) 471 return -ENOMEM; 472 for (i = 0; i < num_vecs_requested; i++) 473 priv->msix_vectors[i].entry = i; 474 vecs_enabled = pci_enable_msix_range(priv->pdev, priv->msix_vectors, 475 GVE_MIN_MSIX, num_vecs_requested); 476 if (vecs_enabled < 0) { 477 dev_err(&priv->pdev->dev, "Could not enable min msix %d/%d\n", 478 GVE_MIN_MSIX, vecs_enabled); 479 err = vecs_enabled; 480 goto abort_with_msix_vectors; 481 } 482 if (vecs_enabled != num_vecs_requested) { 483 int new_num_ntfy_blks = (vecs_enabled - 1) & ~0x1; 484 int vecs_per_type = new_num_ntfy_blks / 2; 485 int vecs_left = new_num_ntfy_blks % 2; 486 487 priv->num_ntfy_blks = new_num_ntfy_blks; 488 priv->mgmt_msix_idx = priv->num_ntfy_blks; 489 priv->tx_cfg.max_queues = min_t(int, priv->tx_cfg.max_queues, 490 vecs_per_type); 491 priv->rx_cfg.max_queues = min_t(int, priv->rx_cfg.max_queues, 492 vecs_per_type + vecs_left); 493 dev_err(&priv->pdev->dev, 494 "Could not enable desired msix, only enabled %d, adjusting tx max queues to %d, and rx max queues to %d\n", 495 vecs_enabled, priv->tx_cfg.max_queues, 496 priv->rx_cfg.max_queues); 497 if (priv->tx_cfg.num_queues > priv->tx_cfg.max_queues) 498 priv->tx_cfg.num_queues = priv->tx_cfg.max_queues; 499 if (priv->rx_cfg.num_queues > priv->rx_cfg.max_queues) 500 priv->rx_cfg.num_queues = priv->rx_cfg.max_queues; 501 } 502 /* Half the notification blocks go to TX and half to RX */ 503 active_cpus = min_t(int, priv->num_ntfy_blks / 2, num_online_cpus()); 504 505 /* Setup Management Vector - the last vector */ 506 snprintf(priv->mgmt_msix_name, sizeof(priv->mgmt_msix_name), "gve-mgmnt@pci:%s", 507 pci_name(priv->pdev)); 508 err = request_irq(priv->msix_vectors[priv->mgmt_msix_idx].vector, 509 gve_mgmnt_intr, 0, priv->mgmt_msix_name, priv); 510 if (err) { 511 dev_err(&priv->pdev->dev, "Did not receive management vector.\n"); 512 goto abort_with_msix_enabled; 513 } 514 priv->irq_db_indices = 515 dma_alloc_coherent(&priv->pdev->dev, 516 priv->num_ntfy_blks * 517 sizeof(*priv->irq_db_indices), 518 &priv->irq_db_indices_bus, GFP_KERNEL); 519 if (!priv->irq_db_indices) { 520 err = -ENOMEM; 521 goto abort_with_mgmt_vector; 522 } 523 524 priv->ntfy_blocks = kvzalloc(priv->num_ntfy_blks * 525 sizeof(*priv->ntfy_blocks), GFP_KERNEL); 526 if (!priv->ntfy_blocks) { 527 err = -ENOMEM; 528 goto abort_with_irq_db_indices; 529 } 530 531 /* Setup the other blocks - the first n-1 vectors */ 532 for (i = 0; i < priv->num_ntfy_blks; i++) { 533 struct gve_notify_block *block = &priv->ntfy_blocks[i]; 534 int msix_idx = i; 535 536 snprintf(block->name, sizeof(block->name), "gve-ntfy-blk%d@pci:%s", 537 i, pci_name(priv->pdev)); 538 block->priv = priv; 539 err = request_irq(priv->msix_vectors[msix_idx].vector, 540 gve_is_gqi(priv) ? gve_intr : gve_intr_dqo, 541 0, block->name, block); 542 if (err) { 543 dev_err(&priv->pdev->dev, 544 "Failed to receive msix vector %d\n", i); 545 goto abort_with_some_ntfy_blocks; 546 } 547 block->irq = priv->msix_vectors[msix_idx].vector; 548 irq_set_affinity_hint(priv->msix_vectors[msix_idx].vector, 549 get_cpu_mask(i % active_cpus)); 550 block->irq_db_index = &priv->irq_db_indices[i].index; 551 } 552 return 0; 553 abort_with_some_ntfy_blocks: 554 for (j = 0; j < i; j++) { 555 struct gve_notify_block *block = &priv->ntfy_blocks[j]; 556 int msix_idx = j; 557 558 irq_set_affinity_hint(priv->msix_vectors[msix_idx].vector, 559 NULL); 560 free_irq(priv->msix_vectors[msix_idx].vector, block); 561 block->irq = 0; 562 } 563 kvfree(priv->ntfy_blocks); 564 priv->ntfy_blocks = NULL; 565 abort_with_irq_db_indices: 566 dma_free_coherent(&priv->pdev->dev, priv->num_ntfy_blks * 567 sizeof(*priv->irq_db_indices), 568 priv->irq_db_indices, priv->irq_db_indices_bus); 569 priv->irq_db_indices = NULL; 570 abort_with_mgmt_vector: 571 free_irq(priv->msix_vectors[priv->mgmt_msix_idx].vector, priv); 572 abort_with_msix_enabled: 573 pci_disable_msix(priv->pdev); 574 abort_with_msix_vectors: 575 kvfree(priv->msix_vectors); 576 priv->msix_vectors = NULL; 577 return err; 578 } 579 580 static void gve_free_notify_blocks(struct gve_priv *priv) 581 { 582 int i; 583 584 if (!priv->msix_vectors) 585 return; 586 587 /* Free the irqs */ 588 for (i = 0; i < priv->num_ntfy_blks; i++) { 589 struct gve_notify_block *block = &priv->ntfy_blocks[i]; 590 int msix_idx = i; 591 592 irq_set_affinity_hint(priv->msix_vectors[msix_idx].vector, 593 NULL); 594 free_irq(priv->msix_vectors[msix_idx].vector, block); 595 block->irq = 0; 596 } 597 free_irq(priv->msix_vectors[priv->mgmt_msix_idx].vector, priv); 598 kvfree(priv->ntfy_blocks); 599 priv->ntfy_blocks = NULL; 600 dma_free_coherent(&priv->pdev->dev, priv->num_ntfy_blks * 601 sizeof(*priv->irq_db_indices), 602 priv->irq_db_indices, priv->irq_db_indices_bus); 603 priv->irq_db_indices = NULL; 604 pci_disable_msix(priv->pdev); 605 kvfree(priv->msix_vectors); 606 priv->msix_vectors = NULL; 607 } 608 609 static int gve_setup_device_resources(struct gve_priv *priv) 610 { 611 int err; 612 613 err = gve_alloc_flow_rule_caches(priv); 614 if (err) 615 return err; 616 err = gve_alloc_rss_config_cache(priv); 617 if (err) 618 goto abort_with_flow_rule_caches; 619 err = gve_alloc_counter_array(priv); 620 if (err) 621 goto abort_with_rss_config_cache; 622 err = gve_alloc_notify_blocks(priv); 623 if (err) 624 goto abort_with_counter; 625 err = gve_alloc_stats_report(priv); 626 if (err) 627 goto abort_with_ntfy_blocks; 628 err = gve_adminq_configure_device_resources(priv, 629 priv->counter_array_bus, 630 priv->num_event_counters, 631 priv->irq_db_indices_bus, 632 priv->num_ntfy_blks); 633 if (unlikely(err)) { 634 dev_err(&priv->pdev->dev, 635 "could not setup device_resources: err=%d\n", err); 636 err = -ENXIO; 637 goto abort_with_stats_report; 638 } 639 640 if (!gve_is_gqi(priv)) { 641 priv->ptype_lut_dqo = kvzalloc(sizeof(*priv->ptype_lut_dqo), 642 GFP_KERNEL); 643 if (!priv->ptype_lut_dqo) { 644 err = -ENOMEM; 645 goto abort_with_stats_report; 646 } 647 err = gve_adminq_get_ptype_map_dqo(priv, priv->ptype_lut_dqo); 648 if (err) { 649 dev_err(&priv->pdev->dev, 650 "Failed to get ptype map: err=%d\n", err); 651 goto abort_with_ptype_lut; 652 } 653 } 654 655 err = gve_init_rss_config(priv, priv->rx_cfg.num_queues); 656 if (err) { 657 dev_err(&priv->pdev->dev, "Failed to init RSS config"); 658 goto abort_with_ptype_lut; 659 } 660 661 err = gve_adminq_report_stats(priv, priv->stats_report_len, 662 priv->stats_report_bus, 663 GVE_STATS_REPORT_TIMER_PERIOD); 664 if (err) 665 dev_err(&priv->pdev->dev, 666 "Failed to report stats: err=%d\n", err); 667 gve_set_device_resources_ok(priv); 668 return 0; 669 670 abort_with_ptype_lut: 671 kvfree(priv->ptype_lut_dqo); 672 priv->ptype_lut_dqo = NULL; 673 abort_with_stats_report: 674 gve_free_stats_report(priv); 675 abort_with_ntfy_blocks: 676 gve_free_notify_blocks(priv); 677 abort_with_counter: 678 gve_free_counter_array(priv); 679 abort_with_rss_config_cache: 680 gve_free_rss_config_cache(priv); 681 abort_with_flow_rule_caches: 682 gve_free_flow_rule_caches(priv); 683 684 return err; 685 } 686 687 static void gve_trigger_reset(struct gve_priv *priv); 688 689 static void gve_teardown_device_resources(struct gve_priv *priv) 690 { 691 int err; 692 693 /* Tell device its resources are being freed */ 694 if (gve_get_device_resources_ok(priv)) { 695 err = gve_flow_rules_reset(priv); 696 if (err) { 697 dev_err(&priv->pdev->dev, 698 "Failed to reset flow rules: err=%d\n", err); 699 gve_trigger_reset(priv); 700 } 701 /* detach the stats report */ 702 err = gve_adminq_report_stats(priv, 0, 0x0, GVE_STATS_REPORT_TIMER_PERIOD); 703 if (err) { 704 dev_err(&priv->pdev->dev, 705 "Failed to detach stats report: err=%d\n", err); 706 gve_trigger_reset(priv); 707 } 708 err = gve_adminq_deconfigure_device_resources(priv); 709 if (err) { 710 dev_err(&priv->pdev->dev, 711 "Could not deconfigure device resources: err=%d\n", 712 err); 713 gve_trigger_reset(priv); 714 } 715 } 716 717 kvfree(priv->ptype_lut_dqo); 718 priv->ptype_lut_dqo = NULL; 719 720 gve_free_flow_rule_caches(priv); 721 gve_free_rss_config_cache(priv); 722 gve_free_counter_array(priv); 723 gve_free_notify_blocks(priv); 724 gve_free_stats_report(priv); 725 gve_clear_device_resources_ok(priv); 726 } 727 728 static int gve_unregister_qpl(struct gve_priv *priv, 729 struct gve_queue_page_list *qpl) 730 { 731 int err; 732 733 if (!qpl) 734 return 0; 735 736 err = gve_adminq_unregister_page_list(priv, qpl->id); 737 if (err) { 738 netif_err(priv, drv, priv->dev, 739 "Failed to unregister queue page list %d\n", 740 qpl->id); 741 return err; 742 } 743 744 priv->num_registered_pages -= qpl->num_entries; 745 return 0; 746 } 747 748 static int gve_register_qpl(struct gve_priv *priv, 749 struct gve_queue_page_list *qpl) 750 { 751 int pages; 752 int err; 753 754 if (!qpl) 755 return 0; 756 757 pages = qpl->num_entries; 758 759 if (pages + priv->num_registered_pages > priv->max_registered_pages) { 760 netif_err(priv, drv, priv->dev, 761 "Reached max number of registered pages %llu > %llu\n", 762 pages + priv->num_registered_pages, 763 priv->max_registered_pages); 764 return -EINVAL; 765 } 766 767 err = gve_adminq_register_page_list(priv, qpl); 768 if (err) { 769 netif_err(priv, drv, priv->dev, 770 "failed to register queue page list %d\n", 771 qpl->id); 772 return err; 773 } 774 775 priv->num_registered_pages += pages; 776 return 0; 777 } 778 779 static struct gve_queue_page_list *gve_tx_get_qpl(struct gve_priv *priv, int idx) 780 { 781 struct gve_tx_ring *tx = &priv->tx[idx]; 782 783 if (gve_is_gqi(priv)) 784 return tx->tx_fifo.qpl; 785 else 786 return tx->dqo.qpl; 787 } 788 789 static struct gve_queue_page_list *gve_rx_get_qpl(struct gve_priv *priv, int idx) 790 { 791 struct gve_rx_ring *rx = &priv->rx[idx]; 792 793 if (gve_is_gqi(priv)) 794 return rx->data.qpl; 795 else 796 return rx->dqo.qpl; 797 } 798 799 static int gve_register_qpls(struct gve_priv *priv) 800 { 801 int num_tx_qpls, num_rx_qpls; 802 int err; 803 int i; 804 805 num_tx_qpls = gve_num_tx_qpls(&priv->tx_cfg, gve_is_qpl(priv)); 806 num_rx_qpls = gve_num_rx_qpls(&priv->rx_cfg, gve_is_qpl(priv)); 807 808 for (i = 0; i < num_tx_qpls; i++) { 809 err = gve_register_qpl(priv, gve_tx_get_qpl(priv, i)); 810 if (err) 811 return err; 812 } 813 814 for (i = 0; i < num_rx_qpls; i++) { 815 err = gve_register_qpl(priv, gve_rx_get_qpl(priv, i)); 816 if (err) 817 return err; 818 } 819 820 return 0; 821 } 822 823 static int gve_unregister_qpls(struct gve_priv *priv) 824 { 825 int num_tx_qpls, num_rx_qpls; 826 int err; 827 int i; 828 829 num_tx_qpls = gve_num_tx_qpls(&priv->tx_cfg, gve_is_qpl(priv)); 830 num_rx_qpls = gve_num_rx_qpls(&priv->rx_cfg, gve_is_qpl(priv)); 831 832 for (i = 0; i < num_tx_qpls; i++) { 833 err = gve_unregister_qpl(priv, gve_tx_get_qpl(priv, i)); 834 /* This failure will trigger a reset - no need to clean */ 835 if (err) 836 return err; 837 } 838 839 for (i = 0; i < num_rx_qpls; i++) { 840 err = gve_unregister_qpl(priv, gve_rx_get_qpl(priv, i)); 841 /* This failure will trigger a reset - no need to clean */ 842 if (err) 843 return err; 844 } 845 return 0; 846 } 847 848 static int gve_create_rings(struct gve_priv *priv) 849 { 850 int num_tx_queues = gve_num_tx_queues(priv); 851 int err; 852 int i; 853 854 err = gve_adminq_create_tx_queues(priv, 0, num_tx_queues); 855 if (err) { 856 netif_err(priv, drv, priv->dev, "failed to create %d tx queues\n", 857 num_tx_queues); 858 /* This failure will trigger a reset - no need to clean 859 * up 860 */ 861 return err; 862 } 863 netif_dbg(priv, drv, priv->dev, "created %d tx queues\n", 864 num_tx_queues); 865 866 err = gve_adminq_create_rx_queues(priv, priv->rx_cfg.num_queues); 867 if (err) { 868 netif_err(priv, drv, priv->dev, "failed to create %d rx queues\n", 869 priv->rx_cfg.num_queues); 870 /* This failure will trigger a reset - no need to clean 871 * up 872 */ 873 return err; 874 } 875 netif_dbg(priv, drv, priv->dev, "created %d rx queues\n", 876 priv->rx_cfg.num_queues); 877 878 if (gve_is_gqi(priv)) { 879 /* Rx data ring has been prefilled with packet buffers at queue 880 * allocation time. 881 * 882 * Write the doorbell to provide descriptor slots and packet 883 * buffers to the NIC. 884 */ 885 for (i = 0; i < priv->rx_cfg.num_queues; i++) 886 gve_rx_write_doorbell(priv, &priv->rx[i]); 887 } else { 888 for (i = 0; i < priv->rx_cfg.num_queues; i++) { 889 /* Post buffers and ring doorbell. */ 890 gve_rx_post_buffers_dqo(&priv->rx[i]); 891 } 892 } 893 894 return 0; 895 } 896 897 static void init_xdp_sync_stats(struct gve_priv *priv) 898 { 899 int start_id = gve_xdp_tx_start_queue_id(priv); 900 int i; 901 902 /* Init stats */ 903 for (i = start_id; i < start_id + priv->tx_cfg.num_xdp_queues; i++) { 904 int ntfy_idx = gve_tx_idx_to_ntfy(priv, i); 905 906 u64_stats_init(&priv->tx[i].statss); 907 priv->tx[i].ntfy_id = ntfy_idx; 908 } 909 } 910 911 static void gve_init_sync_stats(struct gve_priv *priv) 912 { 913 int i; 914 915 for (i = 0; i < priv->tx_cfg.num_queues; i++) 916 u64_stats_init(&priv->tx[i].statss); 917 918 /* Init stats for XDP TX queues */ 919 init_xdp_sync_stats(priv); 920 921 for (i = 0; i < priv->rx_cfg.num_queues; i++) 922 u64_stats_init(&priv->rx[i].statss); 923 } 924 925 static void gve_tx_get_curr_alloc_cfg(struct gve_priv *priv, 926 struct gve_tx_alloc_rings_cfg *cfg) 927 { 928 cfg->qcfg = &priv->tx_cfg; 929 cfg->raw_addressing = !gve_is_qpl(priv); 930 cfg->ring_size = priv->tx_desc_cnt; 931 cfg->num_xdp_rings = cfg->qcfg->num_xdp_queues; 932 cfg->tx = priv->tx; 933 } 934 935 static void gve_tx_stop_rings(struct gve_priv *priv, int num_rings) 936 { 937 int i; 938 939 if (!priv->tx) 940 return; 941 942 for (i = 0; i < num_rings; i++) { 943 if (gve_is_gqi(priv)) 944 gve_tx_stop_ring_gqi(priv, i); 945 else 946 gve_tx_stop_ring_dqo(priv, i); 947 } 948 } 949 950 static void gve_tx_start_rings(struct gve_priv *priv, int num_rings) 951 { 952 int i; 953 954 for (i = 0; i < num_rings; i++) { 955 if (gve_is_gqi(priv)) 956 gve_tx_start_ring_gqi(priv, i); 957 else 958 gve_tx_start_ring_dqo(priv, i); 959 } 960 } 961 962 static int gve_queues_mem_alloc(struct gve_priv *priv, 963 struct gve_tx_alloc_rings_cfg *tx_alloc_cfg, 964 struct gve_rx_alloc_rings_cfg *rx_alloc_cfg) 965 { 966 int err; 967 968 if (gve_is_gqi(priv)) 969 err = gve_tx_alloc_rings_gqi(priv, tx_alloc_cfg); 970 else 971 err = gve_tx_alloc_rings_dqo(priv, tx_alloc_cfg); 972 if (err) 973 return err; 974 975 if (gve_is_gqi(priv)) 976 err = gve_rx_alloc_rings_gqi(priv, rx_alloc_cfg); 977 else 978 err = gve_rx_alloc_rings_dqo(priv, rx_alloc_cfg); 979 if (err) 980 goto free_tx; 981 982 return 0; 983 984 free_tx: 985 if (gve_is_gqi(priv)) 986 gve_tx_free_rings_gqi(priv, tx_alloc_cfg); 987 else 988 gve_tx_free_rings_dqo(priv, tx_alloc_cfg); 989 return err; 990 } 991 992 static int gve_destroy_rings(struct gve_priv *priv) 993 { 994 int num_tx_queues = gve_num_tx_queues(priv); 995 int err; 996 997 err = gve_adminq_destroy_tx_queues(priv, 0, num_tx_queues); 998 if (err) { 999 netif_err(priv, drv, priv->dev, 1000 "failed to destroy tx queues\n"); 1001 /* This failure will trigger a reset - no need to clean up */ 1002 return err; 1003 } 1004 netif_dbg(priv, drv, priv->dev, "destroyed tx queues\n"); 1005 err = gve_adminq_destroy_rx_queues(priv, priv->rx_cfg.num_queues); 1006 if (err) { 1007 netif_err(priv, drv, priv->dev, 1008 "failed to destroy rx queues\n"); 1009 /* This failure will trigger a reset - no need to clean up */ 1010 return err; 1011 } 1012 netif_dbg(priv, drv, priv->dev, "destroyed rx queues\n"); 1013 return 0; 1014 } 1015 1016 static void gve_queues_mem_free(struct gve_priv *priv, 1017 struct gve_tx_alloc_rings_cfg *tx_cfg, 1018 struct gve_rx_alloc_rings_cfg *rx_cfg) 1019 { 1020 if (gve_is_gqi(priv)) { 1021 gve_tx_free_rings_gqi(priv, tx_cfg); 1022 gve_rx_free_rings_gqi(priv, rx_cfg); 1023 } else { 1024 gve_tx_free_rings_dqo(priv, tx_cfg); 1025 gve_rx_free_rings_dqo(priv, rx_cfg); 1026 } 1027 } 1028 1029 int gve_alloc_page(struct gve_priv *priv, struct device *dev, 1030 struct page **page, dma_addr_t *dma, 1031 enum dma_data_direction dir, gfp_t gfp_flags) 1032 { 1033 *page = alloc_page(gfp_flags); 1034 if (!*page) { 1035 priv->page_alloc_fail++; 1036 return -ENOMEM; 1037 } 1038 *dma = dma_map_page(dev, *page, 0, PAGE_SIZE, dir); 1039 if (dma_mapping_error(dev, *dma)) { 1040 priv->dma_mapping_error++; 1041 put_page(*page); 1042 return -ENOMEM; 1043 } 1044 return 0; 1045 } 1046 1047 struct gve_queue_page_list *gve_alloc_queue_page_list(struct gve_priv *priv, 1048 u32 id, int pages) 1049 { 1050 struct gve_queue_page_list *qpl; 1051 int err; 1052 int i; 1053 1054 qpl = kvzalloc(sizeof(*qpl), GFP_KERNEL); 1055 if (!qpl) 1056 return NULL; 1057 1058 qpl->id = id; 1059 qpl->num_entries = 0; 1060 qpl->pages = kvcalloc(pages, sizeof(*qpl->pages), GFP_KERNEL); 1061 if (!qpl->pages) 1062 goto abort; 1063 1064 qpl->page_buses = kvcalloc(pages, sizeof(*qpl->page_buses), GFP_KERNEL); 1065 if (!qpl->page_buses) 1066 goto abort; 1067 1068 for (i = 0; i < pages; i++) { 1069 err = gve_alloc_page(priv, &priv->pdev->dev, &qpl->pages[i], 1070 &qpl->page_buses[i], 1071 gve_qpl_dma_dir(priv, id), GFP_KERNEL); 1072 if (err) 1073 goto abort; 1074 qpl->num_entries++; 1075 } 1076 1077 return qpl; 1078 1079 abort: 1080 gve_free_queue_page_list(priv, qpl, id); 1081 return NULL; 1082 } 1083 1084 void gve_free_page(struct device *dev, struct page *page, dma_addr_t dma, 1085 enum dma_data_direction dir) 1086 { 1087 if (!dma_mapping_error(dev, dma)) 1088 dma_unmap_page(dev, dma, PAGE_SIZE, dir); 1089 if (page) 1090 put_page(page); 1091 } 1092 1093 void gve_free_queue_page_list(struct gve_priv *priv, 1094 struct gve_queue_page_list *qpl, 1095 u32 id) 1096 { 1097 int i; 1098 1099 if (!qpl) 1100 return; 1101 if (!qpl->pages) 1102 goto free_qpl; 1103 if (!qpl->page_buses) 1104 goto free_pages; 1105 1106 for (i = 0; i < qpl->num_entries; i++) 1107 gve_free_page(&priv->pdev->dev, qpl->pages[i], 1108 qpl->page_buses[i], gve_qpl_dma_dir(priv, id)); 1109 1110 kvfree(qpl->page_buses); 1111 qpl->page_buses = NULL; 1112 free_pages: 1113 kvfree(qpl->pages); 1114 qpl->pages = NULL; 1115 free_qpl: 1116 kvfree(qpl); 1117 } 1118 1119 /* Use this to schedule a reset when the device is capable of continuing 1120 * to handle other requests in its current state. If it is not, do a reset 1121 * in thread instead. 1122 */ 1123 void gve_schedule_reset(struct gve_priv *priv) 1124 { 1125 gve_set_do_reset(priv); 1126 queue_work(priv->gve_wq, &priv->service_task); 1127 } 1128 1129 static void gve_reset_and_teardown(struct gve_priv *priv, bool was_up); 1130 static int gve_reset_recovery(struct gve_priv *priv, bool was_up); 1131 static void gve_turndown(struct gve_priv *priv); 1132 static void gve_turnup(struct gve_priv *priv); 1133 1134 static int gve_reg_xdp_info(struct gve_priv *priv, struct net_device *dev) 1135 { 1136 struct napi_struct *napi; 1137 struct gve_rx_ring *rx; 1138 int err = 0; 1139 int i, j; 1140 u32 tx_qid; 1141 1142 if (!priv->tx_cfg.num_xdp_queues) 1143 return 0; 1144 1145 for (i = 0; i < priv->rx_cfg.num_queues; i++) { 1146 rx = &priv->rx[i]; 1147 napi = &priv->ntfy_blocks[rx->ntfy_id].napi; 1148 1149 err = xdp_rxq_info_reg(&rx->xdp_rxq, dev, i, 1150 napi->napi_id); 1151 if (err) 1152 goto err; 1153 if (gve_is_qpl(priv)) 1154 err = xdp_rxq_info_reg_mem_model(&rx->xdp_rxq, 1155 MEM_TYPE_PAGE_SHARED, 1156 NULL); 1157 else 1158 err = xdp_rxq_info_reg_mem_model(&rx->xdp_rxq, 1159 MEM_TYPE_PAGE_POOL, 1160 rx->dqo.page_pool); 1161 if (err) 1162 goto err; 1163 rx->xsk_pool = xsk_get_pool_from_qid(dev, i); 1164 if (rx->xsk_pool) { 1165 err = xdp_rxq_info_reg(&rx->xsk_rxq, dev, i, 1166 napi->napi_id); 1167 if (err) 1168 goto err; 1169 err = xdp_rxq_info_reg_mem_model(&rx->xsk_rxq, 1170 MEM_TYPE_XSK_BUFF_POOL, NULL); 1171 if (err) 1172 goto err; 1173 xsk_pool_set_rxq_info(rx->xsk_pool, 1174 &rx->xsk_rxq); 1175 } 1176 } 1177 1178 for (i = 0; i < priv->tx_cfg.num_xdp_queues; i++) { 1179 tx_qid = gve_xdp_tx_queue_id(priv, i); 1180 priv->tx[tx_qid].xsk_pool = xsk_get_pool_from_qid(dev, i); 1181 } 1182 return 0; 1183 1184 err: 1185 for (j = i; j >= 0; j--) { 1186 rx = &priv->rx[j]; 1187 if (xdp_rxq_info_is_reg(&rx->xdp_rxq)) 1188 xdp_rxq_info_unreg(&rx->xdp_rxq); 1189 if (xdp_rxq_info_is_reg(&rx->xsk_rxq)) 1190 xdp_rxq_info_unreg(&rx->xsk_rxq); 1191 } 1192 return err; 1193 } 1194 1195 static void gve_unreg_xdp_info(struct gve_priv *priv) 1196 { 1197 int i, tx_qid; 1198 1199 if (!priv->tx_cfg.num_xdp_queues || !priv->rx || !priv->tx) 1200 return; 1201 1202 for (i = 0; i < priv->rx_cfg.num_queues; i++) { 1203 struct gve_rx_ring *rx = &priv->rx[i]; 1204 1205 xdp_rxq_info_unreg(&rx->xdp_rxq); 1206 if (rx->xsk_pool) { 1207 xdp_rxq_info_unreg(&rx->xsk_rxq); 1208 rx->xsk_pool = NULL; 1209 } 1210 } 1211 1212 for (i = 0; i < priv->tx_cfg.num_xdp_queues; i++) { 1213 tx_qid = gve_xdp_tx_queue_id(priv, i); 1214 priv->tx[tx_qid].xsk_pool = NULL; 1215 } 1216 } 1217 1218 static void gve_drain_page_cache(struct gve_priv *priv) 1219 { 1220 int i; 1221 1222 for (i = 0; i < priv->rx_cfg.num_queues; i++) 1223 page_frag_cache_drain(&priv->rx[i].page_cache); 1224 } 1225 1226 static void gve_rx_get_curr_alloc_cfg(struct gve_priv *priv, 1227 struct gve_rx_alloc_rings_cfg *cfg) 1228 { 1229 cfg->qcfg_rx = &priv->rx_cfg; 1230 cfg->qcfg_tx = &priv->tx_cfg; 1231 cfg->raw_addressing = !gve_is_qpl(priv); 1232 cfg->enable_header_split = priv->header_split_enabled; 1233 cfg->ring_size = priv->rx_desc_cnt; 1234 cfg->packet_buffer_size = priv->rx_cfg.packet_buffer_size; 1235 cfg->rx = priv->rx; 1236 cfg->xdp = !!cfg->qcfg_tx->num_xdp_queues; 1237 } 1238 1239 void gve_get_curr_alloc_cfgs(struct gve_priv *priv, 1240 struct gve_tx_alloc_rings_cfg *tx_alloc_cfg, 1241 struct gve_rx_alloc_rings_cfg *rx_alloc_cfg) 1242 { 1243 gve_tx_get_curr_alloc_cfg(priv, tx_alloc_cfg); 1244 gve_rx_get_curr_alloc_cfg(priv, rx_alloc_cfg); 1245 } 1246 1247 static void gve_rx_start_ring(struct gve_priv *priv, int i) 1248 { 1249 if (gve_is_gqi(priv)) 1250 gve_rx_start_ring_gqi(priv, i); 1251 else 1252 gve_rx_start_ring_dqo(priv, i); 1253 } 1254 1255 static void gve_rx_start_rings(struct gve_priv *priv, int num_rings) 1256 { 1257 int i; 1258 1259 for (i = 0; i < num_rings; i++) 1260 gve_rx_start_ring(priv, i); 1261 } 1262 1263 static void gve_rx_stop_ring(struct gve_priv *priv, int i) 1264 { 1265 if (gve_is_gqi(priv)) 1266 gve_rx_stop_ring_gqi(priv, i); 1267 else 1268 gve_rx_stop_ring_dqo(priv, i); 1269 } 1270 1271 static void gve_rx_stop_rings(struct gve_priv *priv, int num_rings) 1272 { 1273 int i; 1274 1275 if (!priv->rx) 1276 return; 1277 1278 for (i = 0; i < num_rings; i++) 1279 gve_rx_stop_ring(priv, i); 1280 } 1281 1282 static void gve_queues_mem_remove(struct gve_priv *priv) 1283 { 1284 struct gve_tx_alloc_rings_cfg tx_alloc_cfg = {0}; 1285 struct gve_rx_alloc_rings_cfg rx_alloc_cfg = {0}; 1286 1287 gve_get_curr_alloc_cfgs(priv, &tx_alloc_cfg, &rx_alloc_cfg); 1288 gve_queues_mem_free(priv, &tx_alloc_cfg, &rx_alloc_cfg); 1289 priv->tx = NULL; 1290 priv->rx = NULL; 1291 } 1292 1293 /* The passed-in queue memory is stored into priv and the queues are made live. 1294 * No memory is allocated. Passed-in memory is freed on errors. 1295 */ 1296 static int gve_queues_start(struct gve_priv *priv, 1297 struct gve_tx_alloc_rings_cfg *tx_alloc_cfg, 1298 struct gve_rx_alloc_rings_cfg *rx_alloc_cfg) 1299 { 1300 struct net_device *dev = priv->dev; 1301 int err; 1302 1303 /* Record new resources into priv */ 1304 priv->tx = tx_alloc_cfg->tx; 1305 priv->rx = rx_alloc_cfg->rx; 1306 1307 /* Record new configs into priv */ 1308 priv->tx_cfg = *tx_alloc_cfg->qcfg; 1309 priv->tx_cfg.num_xdp_queues = tx_alloc_cfg->num_xdp_rings; 1310 priv->rx_cfg = *rx_alloc_cfg->qcfg_rx; 1311 priv->tx_desc_cnt = tx_alloc_cfg->ring_size; 1312 priv->rx_desc_cnt = rx_alloc_cfg->ring_size; 1313 1314 gve_tx_start_rings(priv, gve_num_tx_queues(priv)); 1315 gve_rx_start_rings(priv, rx_alloc_cfg->qcfg_rx->num_queues); 1316 gve_init_sync_stats(priv); 1317 1318 err = netif_set_real_num_tx_queues(dev, priv->tx_cfg.num_queues); 1319 if (err) 1320 goto stop_and_free_rings; 1321 err = netif_set_real_num_rx_queues(dev, priv->rx_cfg.num_queues); 1322 if (err) 1323 goto stop_and_free_rings; 1324 1325 err = gve_reg_xdp_info(priv, dev); 1326 if (err) 1327 goto stop_and_free_rings; 1328 1329 if (rx_alloc_cfg->reset_rss) { 1330 err = gve_init_rss_config(priv, priv->rx_cfg.num_queues); 1331 if (err) 1332 goto reset; 1333 } 1334 1335 err = gve_register_qpls(priv); 1336 if (err) 1337 goto reset; 1338 1339 priv->header_split_enabled = rx_alloc_cfg->enable_header_split; 1340 priv->rx_cfg.packet_buffer_size = rx_alloc_cfg->packet_buffer_size; 1341 1342 err = gve_create_rings(priv); 1343 if (err) 1344 goto reset; 1345 1346 gve_set_device_rings_ok(priv); 1347 1348 if (gve_get_report_stats(priv)) 1349 mod_timer(&priv->stats_report_timer, 1350 round_jiffies(jiffies + 1351 msecs_to_jiffies(priv->stats_report_timer_period))); 1352 1353 gve_turnup(priv); 1354 queue_work(priv->gve_wq, &priv->service_task); 1355 priv->interface_up_cnt++; 1356 return 0; 1357 1358 reset: 1359 if (gve_get_reset_in_progress(priv)) 1360 goto stop_and_free_rings; 1361 gve_reset_and_teardown(priv, true); 1362 /* if this fails there is nothing we can do so just ignore the return */ 1363 gve_reset_recovery(priv, false); 1364 /* return the original error */ 1365 return err; 1366 stop_and_free_rings: 1367 gve_tx_stop_rings(priv, gve_num_tx_queues(priv)); 1368 gve_rx_stop_rings(priv, priv->rx_cfg.num_queues); 1369 gve_queues_mem_remove(priv); 1370 return err; 1371 } 1372 1373 static int gve_open(struct net_device *dev) 1374 { 1375 struct gve_tx_alloc_rings_cfg tx_alloc_cfg = {0}; 1376 struct gve_rx_alloc_rings_cfg rx_alloc_cfg = {0}; 1377 struct gve_priv *priv = netdev_priv(dev); 1378 int err; 1379 1380 gve_get_curr_alloc_cfgs(priv, &tx_alloc_cfg, &rx_alloc_cfg); 1381 1382 err = gve_queues_mem_alloc(priv, &tx_alloc_cfg, &rx_alloc_cfg); 1383 if (err) 1384 return err; 1385 1386 /* No need to free on error: ownership of resources is lost after 1387 * calling gve_queues_start. 1388 */ 1389 err = gve_queues_start(priv, &tx_alloc_cfg, &rx_alloc_cfg); 1390 if (err) 1391 return err; 1392 1393 return 0; 1394 } 1395 1396 static int gve_queues_stop(struct gve_priv *priv) 1397 { 1398 int err; 1399 1400 netif_carrier_off(priv->dev); 1401 if (gve_get_device_rings_ok(priv)) { 1402 gve_turndown(priv); 1403 gve_drain_page_cache(priv); 1404 err = gve_destroy_rings(priv); 1405 if (err) 1406 goto err; 1407 err = gve_unregister_qpls(priv); 1408 if (err) 1409 goto err; 1410 gve_clear_device_rings_ok(priv); 1411 } 1412 timer_delete_sync(&priv->stats_report_timer); 1413 1414 gve_unreg_xdp_info(priv); 1415 1416 gve_tx_stop_rings(priv, gve_num_tx_queues(priv)); 1417 gve_rx_stop_rings(priv, priv->rx_cfg.num_queues); 1418 1419 priv->interface_down_cnt++; 1420 return 0; 1421 1422 err: 1423 /* This must have been called from a reset due to the rtnl lock 1424 * so just return at this point. 1425 */ 1426 if (gve_get_reset_in_progress(priv)) 1427 return err; 1428 /* Otherwise reset before returning */ 1429 gve_reset_and_teardown(priv, true); 1430 return gve_reset_recovery(priv, false); 1431 } 1432 1433 static int gve_close(struct net_device *dev) 1434 { 1435 struct gve_priv *priv = netdev_priv(dev); 1436 int err; 1437 1438 err = gve_queues_stop(priv); 1439 if (err) 1440 return err; 1441 1442 gve_queues_mem_remove(priv); 1443 return 0; 1444 } 1445 1446 static void gve_handle_link_status(struct gve_priv *priv, bool link_status) 1447 { 1448 if (!gve_get_napi_enabled(priv)) 1449 return; 1450 1451 if (link_status == netif_carrier_ok(priv->dev)) 1452 return; 1453 1454 if (link_status) { 1455 netdev_info(priv->dev, "Device link is up.\n"); 1456 netif_carrier_on(priv->dev); 1457 } else { 1458 netdev_info(priv->dev, "Device link is down.\n"); 1459 netif_carrier_off(priv->dev); 1460 } 1461 } 1462 1463 static int gve_configure_rings_xdp(struct gve_priv *priv, 1464 u16 num_xdp_rings) 1465 { 1466 struct gve_tx_alloc_rings_cfg tx_alloc_cfg = {0}; 1467 struct gve_rx_alloc_rings_cfg rx_alloc_cfg = {0}; 1468 1469 gve_get_curr_alloc_cfgs(priv, &tx_alloc_cfg, &rx_alloc_cfg); 1470 tx_alloc_cfg.num_xdp_rings = num_xdp_rings; 1471 1472 rx_alloc_cfg.xdp = !!num_xdp_rings; 1473 return gve_adjust_config(priv, &tx_alloc_cfg, &rx_alloc_cfg); 1474 } 1475 1476 static int gve_set_xdp(struct gve_priv *priv, struct bpf_prog *prog, 1477 struct netlink_ext_ack *extack) 1478 { 1479 struct bpf_prog *old_prog; 1480 int err = 0; 1481 u32 status; 1482 1483 old_prog = READ_ONCE(priv->xdp_prog); 1484 if (!netif_running(priv->dev)) { 1485 WRITE_ONCE(priv->xdp_prog, prog); 1486 if (old_prog) 1487 bpf_prog_put(old_prog); 1488 1489 /* Update priv XDP queue configuration */ 1490 priv->tx_cfg.num_xdp_queues = priv->xdp_prog ? 1491 priv->rx_cfg.num_queues : 0; 1492 return 0; 1493 } 1494 1495 if (!old_prog && prog) 1496 err = gve_configure_rings_xdp(priv, priv->rx_cfg.num_queues); 1497 else if (old_prog && !prog) 1498 err = gve_configure_rings_xdp(priv, 0); 1499 1500 if (err) 1501 goto out; 1502 1503 WRITE_ONCE(priv->xdp_prog, prog); 1504 if (old_prog) 1505 bpf_prog_put(old_prog); 1506 1507 out: 1508 status = ioread32be(&priv->reg_bar0->device_status); 1509 gve_handle_link_status(priv, GVE_DEVICE_STATUS_LINK_STATUS_MASK & status); 1510 return err; 1511 } 1512 1513 static int gve_xsk_pool_enable(struct net_device *dev, 1514 struct xsk_buff_pool *pool, 1515 u16 qid) 1516 { 1517 struct gve_priv *priv = netdev_priv(dev); 1518 struct napi_struct *napi; 1519 struct gve_rx_ring *rx; 1520 int tx_qid; 1521 int err; 1522 1523 if (qid >= priv->rx_cfg.num_queues) { 1524 dev_err(&priv->pdev->dev, "xsk pool invalid qid %d", qid); 1525 return -EINVAL; 1526 } 1527 if (xsk_pool_get_rx_frame_size(pool) < 1528 priv->dev->max_mtu + sizeof(struct ethhdr)) { 1529 dev_err(&priv->pdev->dev, "xsk pool frame_len too small"); 1530 return -EINVAL; 1531 } 1532 1533 err = xsk_pool_dma_map(pool, &priv->pdev->dev, 1534 DMA_ATTR_SKIP_CPU_SYNC | DMA_ATTR_WEAK_ORDERING); 1535 if (err) 1536 return err; 1537 1538 /* If XDP prog is not installed or interface is down, return. */ 1539 if (!priv->xdp_prog || !netif_running(dev)) 1540 return 0; 1541 1542 rx = &priv->rx[qid]; 1543 napi = &priv->ntfy_blocks[rx->ntfy_id].napi; 1544 err = xdp_rxq_info_reg(&rx->xsk_rxq, dev, qid, napi->napi_id); 1545 if (err) 1546 goto err; 1547 1548 err = xdp_rxq_info_reg_mem_model(&rx->xsk_rxq, 1549 MEM_TYPE_XSK_BUFF_POOL, NULL); 1550 if (err) 1551 goto err; 1552 1553 xsk_pool_set_rxq_info(pool, &rx->xsk_rxq); 1554 rx->xsk_pool = pool; 1555 1556 tx_qid = gve_xdp_tx_queue_id(priv, qid); 1557 priv->tx[tx_qid].xsk_pool = pool; 1558 1559 return 0; 1560 err: 1561 if (xdp_rxq_info_is_reg(&rx->xsk_rxq)) 1562 xdp_rxq_info_unreg(&rx->xsk_rxq); 1563 1564 xsk_pool_dma_unmap(pool, 1565 DMA_ATTR_SKIP_CPU_SYNC | DMA_ATTR_WEAK_ORDERING); 1566 return err; 1567 } 1568 1569 static int gve_xsk_pool_disable(struct net_device *dev, 1570 u16 qid) 1571 { 1572 struct gve_priv *priv = netdev_priv(dev); 1573 struct napi_struct *napi_rx; 1574 struct napi_struct *napi_tx; 1575 struct xsk_buff_pool *pool; 1576 int tx_qid; 1577 1578 pool = xsk_get_pool_from_qid(dev, qid); 1579 if (!pool) 1580 return -EINVAL; 1581 if (qid >= priv->rx_cfg.num_queues) 1582 return -EINVAL; 1583 1584 /* If XDP prog is not installed or interface is down, unmap DMA and 1585 * return. 1586 */ 1587 if (!priv->xdp_prog || !netif_running(dev)) 1588 goto done; 1589 1590 napi_rx = &priv->ntfy_blocks[priv->rx[qid].ntfy_id].napi; 1591 napi_disable(napi_rx); /* make sure current rx poll is done */ 1592 1593 tx_qid = gve_xdp_tx_queue_id(priv, qid); 1594 napi_tx = &priv->ntfy_blocks[priv->tx[tx_qid].ntfy_id].napi; 1595 napi_disable(napi_tx); /* make sure current tx poll is done */ 1596 1597 priv->rx[qid].xsk_pool = NULL; 1598 xdp_rxq_info_unreg(&priv->rx[qid].xsk_rxq); 1599 priv->tx[tx_qid].xsk_pool = NULL; 1600 smp_mb(); /* Make sure it is visible to the workers on datapath */ 1601 1602 napi_enable(napi_rx); 1603 if (gve_rx_work_pending(&priv->rx[qid])) 1604 napi_schedule(napi_rx); 1605 1606 napi_enable(napi_tx); 1607 if (gve_tx_clean_pending(priv, &priv->tx[tx_qid])) 1608 napi_schedule(napi_tx); 1609 1610 done: 1611 xsk_pool_dma_unmap(pool, 1612 DMA_ATTR_SKIP_CPU_SYNC | DMA_ATTR_WEAK_ORDERING); 1613 return 0; 1614 } 1615 1616 static int gve_xsk_wakeup(struct net_device *dev, u32 queue_id, u32 flags) 1617 { 1618 struct gve_priv *priv = netdev_priv(dev); 1619 struct napi_struct *napi; 1620 1621 if (!gve_get_napi_enabled(priv)) 1622 return -ENETDOWN; 1623 1624 if (queue_id >= priv->rx_cfg.num_queues || !priv->xdp_prog) 1625 return -EINVAL; 1626 1627 napi = &priv->ntfy_blocks[gve_rx_idx_to_ntfy(priv, queue_id)].napi; 1628 if (!napi_if_scheduled_mark_missed(napi)) { 1629 /* Call local_bh_enable to trigger SoftIRQ processing */ 1630 local_bh_disable(); 1631 napi_schedule(napi); 1632 local_bh_enable(); 1633 } 1634 1635 return 0; 1636 } 1637 1638 static int verify_xdp_configuration(struct net_device *dev) 1639 { 1640 struct gve_priv *priv = netdev_priv(dev); 1641 u16 max_xdp_mtu; 1642 1643 if (dev->features & NETIF_F_LRO) { 1644 netdev_warn(dev, "XDP is not supported when LRO is on.\n"); 1645 return -EOPNOTSUPP; 1646 } 1647 1648 if (priv->queue_format != GVE_GQI_QPL_FORMAT) { 1649 netdev_warn(dev, "XDP is not supported in mode %d.\n", 1650 priv->queue_format); 1651 return -EOPNOTSUPP; 1652 } 1653 1654 max_xdp_mtu = priv->rx_cfg.packet_buffer_size - sizeof(struct ethhdr); 1655 if (priv->queue_format == GVE_GQI_QPL_FORMAT) 1656 max_xdp_mtu -= GVE_RX_PAD; 1657 1658 if (dev->mtu > max_xdp_mtu) { 1659 netdev_warn(dev, "XDP is not supported for mtu %d.\n", 1660 dev->mtu); 1661 return -EOPNOTSUPP; 1662 } 1663 1664 if (priv->rx_cfg.num_queues != priv->tx_cfg.num_queues || 1665 (2 * priv->tx_cfg.num_queues > priv->tx_cfg.max_queues)) { 1666 netdev_warn(dev, "XDP load failed: The number of configured RX queues %d should be equal to the number of configured TX queues %d and the number of configured RX/TX queues should be less than or equal to half the maximum number of RX/TX queues %d", 1667 priv->rx_cfg.num_queues, 1668 priv->tx_cfg.num_queues, 1669 priv->tx_cfg.max_queues); 1670 return -EINVAL; 1671 } 1672 return 0; 1673 } 1674 1675 static int gve_xdp(struct net_device *dev, struct netdev_bpf *xdp) 1676 { 1677 struct gve_priv *priv = netdev_priv(dev); 1678 int err; 1679 1680 err = verify_xdp_configuration(dev); 1681 if (err) 1682 return err; 1683 switch (xdp->command) { 1684 case XDP_SETUP_PROG: 1685 return gve_set_xdp(priv, xdp->prog, xdp->extack); 1686 case XDP_SETUP_XSK_POOL: 1687 if (xdp->xsk.pool) 1688 return gve_xsk_pool_enable(dev, xdp->xsk.pool, xdp->xsk.queue_id); 1689 else 1690 return gve_xsk_pool_disable(dev, xdp->xsk.queue_id); 1691 default: 1692 return -EINVAL; 1693 } 1694 } 1695 1696 int gve_init_rss_config(struct gve_priv *priv, u16 num_queues) 1697 { 1698 struct gve_rss_config *rss_config = &priv->rss_config; 1699 struct ethtool_rxfh_param rxfh = {0}; 1700 u16 i; 1701 1702 if (!priv->cache_rss_config) 1703 return 0; 1704 1705 for (i = 0; i < priv->rss_lut_size; i++) 1706 rss_config->hash_lut[i] = 1707 ethtool_rxfh_indir_default(i, num_queues); 1708 1709 netdev_rss_key_fill(rss_config->hash_key, priv->rss_key_size); 1710 1711 rxfh.hfunc = ETH_RSS_HASH_TOP; 1712 1713 return gve_adminq_configure_rss(priv, &rxfh); 1714 } 1715 1716 int gve_flow_rules_reset(struct gve_priv *priv) 1717 { 1718 if (!priv->max_flow_rules) 1719 return 0; 1720 1721 return gve_adminq_reset_flow_rules(priv); 1722 } 1723 1724 int gve_adjust_config(struct gve_priv *priv, 1725 struct gve_tx_alloc_rings_cfg *tx_alloc_cfg, 1726 struct gve_rx_alloc_rings_cfg *rx_alloc_cfg) 1727 { 1728 int err; 1729 1730 /* Allocate resources for the new confiugration */ 1731 err = gve_queues_mem_alloc(priv, tx_alloc_cfg, rx_alloc_cfg); 1732 if (err) { 1733 netif_err(priv, drv, priv->dev, 1734 "Adjust config failed to alloc new queues"); 1735 return err; 1736 } 1737 1738 /* Teardown the device and free existing resources */ 1739 err = gve_close(priv->dev); 1740 if (err) { 1741 netif_err(priv, drv, priv->dev, 1742 "Adjust config failed to close old queues"); 1743 gve_queues_mem_free(priv, tx_alloc_cfg, rx_alloc_cfg); 1744 return err; 1745 } 1746 1747 /* Bring the device back up again with the new resources. */ 1748 err = gve_queues_start(priv, tx_alloc_cfg, rx_alloc_cfg); 1749 if (err) { 1750 netif_err(priv, drv, priv->dev, 1751 "Adjust config failed to start new queues, !!! DISABLING ALL QUEUES !!!\n"); 1752 /* No need to free on error: ownership of resources is lost after 1753 * calling gve_queues_start. 1754 */ 1755 gve_turndown(priv); 1756 return err; 1757 } 1758 1759 return 0; 1760 } 1761 1762 int gve_adjust_queues(struct gve_priv *priv, 1763 struct gve_rx_queue_config new_rx_config, 1764 struct gve_tx_queue_config new_tx_config, 1765 bool reset_rss) 1766 { 1767 struct gve_tx_alloc_rings_cfg tx_alloc_cfg = {0}; 1768 struct gve_rx_alloc_rings_cfg rx_alloc_cfg = {0}; 1769 int err; 1770 1771 gve_get_curr_alloc_cfgs(priv, &tx_alloc_cfg, &rx_alloc_cfg); 1772 1773 /* Relay the new config from ethtool */ 1774 tx_alloc_cfg.qcfg = &new_tx_config; 1775 rx_alloc_cfg.qcfg_tx = &new_tx_config; 1776 rx_alloc_cfg.qcfg_rx = &new_rx_config; 1777 rx_alloc_cfg.reset_rss = reset_rss; 1778 1779 if (netif_running(priv->dev)) { 1780 err = gve_adjust_config(priv, &tx_alloc_cfg, &rx_alloc_cfg); 1781 return err; 1782 } 1783 /* Set the config for the next up. */ 1784 if (reset_rss) { 1785 err = gve_init_rss_config(priv, new_rx_config.num_queues); 1786 if (err) 1787 return err; 1788 } 1789 priv->tx_cfg = new_tx_config; 1790 priv->rx_cfg = new_rx_config; 1791 1792 return 0; 1793 } 1794 1795 static void gve_turndown(struct gve_priv *priv) 1796 { 1797 int idx; 1798 1799 if (netif_carrier_ok(priv->dev)) 1800 netif_carrier_off(priv->dev); 1801 1802 if (!gve_get_napi_enabled(priv)) 1803 return; 1804 1805 /* Disable napi to prevent more work from coming in */ 1806 for (idx = 0; idx < gve_num_tx_queues(priv); idx++) { 1807 int ntfy_idx = gve_tx_idx_to_ntfy(priv, idx); 1808 struct gve_notify_block *block = &priv->ntfy_blocks[ntfy_idx]; 1809 1810 if (!gve_tx_was_added_to_block(priv, idx)) 1811 continue; 1812 1813 if (idx < priv->tx_cfg.num_queues) 1814 netif_queue_set_napi(priv->dev, idx, 1815 NETDEV_QUEUE_TYPE_TX, NULL); 1816 1817 napi_disable_locked(&block->napi); 1818 } 1819 for (idx = 0; idx < priv->rx_cfg.num_queues; idx++) { 1820 int ntfy_idx = gve_rx_idx_to_ntfy(priv, idx); 1821 struct gve_notify_block *block = &priv->ntfy_blocks[ntfy_idx]; 1822 1823 if (!gve_rx_was_added_to_block(priv, idx)) 1824 continue; 1825 1826 netif_queue_set_napi(priv->dev, idx, NETDEV_QUEUE_TYPE_RX, 1827 NULL); 1828 napi_disable_locked(&block->napi); 1829 } 1830 1831 /* Stop tx queues */ 1832 netif_tx_disable(priv->dev); 1833 1834 xdp_features_clear_redirect_target_locked(priv->dev); 1835 1836 gve_clear_napi_enabled(priv); 1837 gve_clear_report_stats(priv); 1838 1839 /* Make sure that all traffic is finished processing. */ 1840 synchronize_net(); 1841 } 1842 1843 static void gve_turnup(struct gve_priv *priv) 1844 { 1845 int idx; 1846 1847 /* Start the tx queues */ 1848 netif_tx_start_all_queues(priv->dev); 1849 1850 /* Enable napi and unmask interrupts for all queues */ 1851 for (idx = 0; idx < gve_num_tx_queues(priv); idx++) { 1852 int ntfy_idx = gve_tx_idx_to_ntfy(priv, idx); 1853 struct gve_notify_block *block = &priv->ntfy_blocks[ntfy_idx]; 1854 1855 if (!gve_tx_was_added_to_block(priv, idx)) 1856 continue; 1857 1858 napi_enable_locked(&block->napi); 1859 1860 if (idx < priv->tx_cfg.num_queues) 1861 netif_queue_set_napi(priv->dev, idx, 1862 NETDEV_QUEUE_TYPE_TX, 1863 &block->napi); 1864 1865 if (gve_is_gqi(priv)) { 1866 iowrite32be(0, gve_irq_doorbell(priv, block)); 1867 } else { 1868 gve_set_itr_coalesce_usecs_dqo(priv, block, 1869 priv->tx_coalesce_usecs); 1870 } 1871 1872 /* Any descs written by the NIC before this barrier will be 1873 * handled by the one-off napi schedule below. Whereas any 1874 * descs after the barrier will generate interrupts. 1875 */ 1876 mb(); 1877 napi_schedule(&block->napi); 1878 } 1879 for (idx = 0; idx < priv->rx_cfg.num_queues; idx++) { 1880 int ntfy_idx = gve_rx_idx_to_ntfy(priv, idx); 1881 struct gve_notify_block *block = &priv->ntfy_blocks[ntfy_idx]; 1882 1883 if (!gve_rx_was_added_to_block(priv, idx)) 1884 continue; 1885 1886 napi_enable_locked(&block->napi); 1887 netif_queue_set_napi(priv->dev, idx, NETDEV_QUEUE_TYPE_RX, 1888 &block->napi); 1889 1890 if (gve_is_gqi(priv)) { 1891 iowrite32be(0, gve_irq_doorbell(priv, block)); 1892 } else { 1893 gve_set_itr_coalesce_usecs_dqo(priv, block, 1894 priv->rx_coalesce_usecs); 1895 } 1896 1897 /* Any descs written by the NIC before this barrier will be 1898 * handled by the one-off napi schedule below. Whereas any 1899 * descs after the barrier will generate interrupts. 1900 */ 1901 mb(); 1902 napi_schedule(&block->napi); 1903 } 1904 1905 if (priv->tx_cfg.num_xdp_queues && gve_supports_xdp_xmit(priv)) 1906 xdp_features_set_redirect_target_locked(priv->dev, false); 1907 1908 gve_set_napi_enabled(priv); 1909 } 1910 1911 static void gve_turnup_and_check_status(struct gve_priv *priv) 1912 { 1913 u32 status; 1914 1915 gve_turnup(priv); 1916 status = ioread32be(&priv->reg_bar0->device_status); 1917 gve_handle_link_status(priv, GVE_DEVICE_STATUS_LINK_STATUS_MASK & status); 1918 } 1919 1920 static void gve_tx_timeout(struct net_device *dev, unsigned int txqueue) 1921 { 1922 struct gve_notify_block *block; 1923 struct gve_tx_ring *tx = NULL; 1924 struct gve_priv *priv; 1925 u32 last_nic_done; 1926 u32 current_time; 1927 u32 ntfy_idx; 1928 1929 netdev_info(dev, "Timeout on tx queue, %d", txqueue); 1930 priv = netdev_priv(dev); 1931 if (txqueue > priv->tx_cfg.num_queues) 1932 goto reset; 1933 1934 ntfy_idx = gve_tx_idx_to_ntfy(priv, txqueue); 1935 if (ntfy_idx >= priv->num_ntfy_blks) 1936 goto reset; 1937 1938 block = &priv->ntfy_blocks[ntfy_idx]; 1939 tx = block->tx; 1940 1941 current_time = jiffies_to_msecs(jiffies); 1942 if (tx->last_kick_msec + MIN_TX_TIMEOUT_GAP > current_time) 1943 goto reset; 1944 1945 /* Check to see if there are missed completions, which will allow us to 1946 * kick the queue. 1947 */ 1948 last_nic_done = gve_tx_load_event_counter(priv, tx); 1949 if (last_nic_done - tx->done) { 1950 netdev_info(dev, "Kicking queue %d", txqueue); 1951 iowrite32be(GVE_IRQ_MASK, gve_irq_doorbell(priv, block)); 1952 napi_schedule(&block->napi); 1953 tx->last_kick_msec = current_time; 1954 goto out; 1955 } // Else reset. 1956 1957 reset: 1958 gve_schedule_reset(priv); 1959 1960 out: 1961 if (tx) 1962 tx->queue_timeout++; 1963 priv->tx_timeo_cnt++; 1964 } 1965 1966 u16 gve_get_pkt_buf_size(const struct gve_priv *priv, bool enable_hsplit) 1967 { 1968 if (enable_hsplit && priv->max_rx_buffer_size >= GVE_MAX_RX_BUFFER_SIZE) 1969 return GVE_MAX_RX_BUFFER_SIZE; 1970 else 1971 return GVE_DEFAULT_RX_BUFFER_SIZE; 1972 } 1973 1974 /* header-split is not supported on non-DQO_RDA yet even if device advertises it */ 1975 bool gve_header_split_supported(const struct gve_priv *priv) 1976 { 1977 return priv->header_buf_size && priv->queue_format == GVE_DQO_RDA_FORMAT; 1978 } 1979 1980 int gve_set_hsplit_config(struct gve_priv *priv, u8 tcp_data_split) 1981 { 1982 struct gve_tx_alloc_rings_cfg tx_alloc_cfg = {0}; 1983 struct gve_rx_alloc_rings_cfg rx_alloc_cfg = {0}; 1984 bool enable_hdr_split; 1985 int err = 0; 1986 1987 if (tcp_data_split == ETHTOOL_TCP_DATA_SPLIT_UNKNOWN) 1988 return 0; 1989 1990 if (!gve_header_split_supported(priv)) { 1991 dev_err(&priv->pdev->dev, "Header-split not supported\n"); 1992 return -EOPNOTSUPP; 1993 } 1994 1995 if (tcp_data_split == ETHTOOL_TCP_DATA_SPLIT_ENABLED) 1996 enable_hdr_split = true; 1997 else 1998 enable_hdr_split = false; 1999 2000 if (enable_hdr_split == priv->header_split_enabled) 2001 return 0; 2002 2003 gve_get_curr_alloc_cfgs(priv, &tx_alloc_cfg, &rx_alloc_cfg); 2004 2005 rx_alloc_cfg.enable_header_split = enable_hdr_split; 2006 rx_alloc_cfg.packet_buffer_size = gve_get_pkt_buf_size(priv, enable_hdr_split); 2007 2008 if (netif_running(priv->dev)) 2009 err = gve_adjust_config(priv, &tx_alloc_cfg, &rx_alloc_cfg); 2010 return err; 2011 } 2012 2013 static int gve_set_features(struct net_device *netdev, 2014 netdev_features_t features) 2015 { 2016 const netdev_features_t orig_features = netdev->features; 2017 struct gve_tx_alloc_rings_cfg tx_alloc_cfg = {0}; 2018 struct gve_rx_alloc_rings_cfg rx_alloc_cfg = {0}; 2019 struct gve_priv *priv = netdev_priv(netdev); 2020 int err; 2021 2022 gve_get_curr_alloc_cfgs(priv, &tx_alloc_cfg, &rx_alloc_cfg); 2023 2024 if ((netdev->features & NETIF_F_LRO) != (features & NETIF_F_LRO)) { 2025 netdev->features ^= NETIF_F_LRO; 2026 if (netif_running(netdev)) { 2027 err = gve_adjust_config(priv, &tx_alloc_cfg, &rx_alloc_cfg); 2028 if (err) 2029 goto revert_features; 2030 } 2031 } 2032 if ((netdev->features & NETIF_F_NTUPLE) && !(features & NETIF_F_NTUPLE)) { 2033 err = gve_flow_rules_reset(priv); 2034 if (err) 2035 goto revert_features; 2036 } 2037 2038 return 0; 2039 2040 revert_features: 2041 netdev->features = orig_features; 2042 return err; 2043 } 2044 2045 static const struct net_device_ops gve_netdev_ops = { 2046 .ndo_start_xmit = gve_start_xmit, 2047 .ndo_features_check = gve_features_check, 2048 .ndo_open = gve_open, 2049 .ndo_stop = gve_close, 2050 .ndo_get_stats64 = gve_get_stats, 2051 .ndo_tx_timeout = gve_tx_timeout, 2052 .ndo_set_features = gve_set_features, 2053 .ndo_bpf = gve_xdp, 2054 .ndo_xdp_xmit = gve_xdp_xmit, 2055 .ndo_xsk_wakeup = gve_xsk_wakeup, 2056 }; 2057 2058 static void gve_handle_status(struct gve_priv *priv, u32 status) 2059 { 2060 if (GVE_DEVICE_STATUS_RESET_MASK & status) { 2061 dev_info(&priv->pdev->dev, "Device requested reset.\n"); 2062 gve_set_do_reset(priv); 2063 } 2064 if (GVE_DEVICE_STATUS_REPORT_STATS_MASK & status) { 2065 priv->stats_report_trigger_cnt++; 2066 gve_set_do_report_stats(priv); 2067 } 2068 } 2069 2070 static void gve_handle_reset(struct gve_priv *priv) 2071 { 2072 /* A service task will be scheduled at the end of probe to catch any 2073 * resets that need to happen, and we don't want to reset until 2074 * probe is done. 2075 */ 2076 if (gve_get_probe_in_progress(priv)) 2077 return; 2078 2079 if (gve_get_do_reset(priv)) { 2080 rtnl_lock(); 2081 netdev_lock(priv->dev); 2082 gve_reset(priv, false); 2083 netdev_unlock(priv->dev); 2084 rtnl_unlock(); 2085 } 2086 } 2087 2088 void gve_handle_report_stats(struct gve_priv *priv) 2089 { 2090 struct stats *stats = priv->stats_report->stats; 2091 int idx, stats_idx = 0; 2092 unsigned int start = 0; 2093 u64 tx_bytes; 2094 2095 if (!gve_get_report_stats(priv)) 2096 return; 2097 2098 be64_add_cpu(&priv->stats_report->written_count, 1); 2099 /* tx stats */ 2100 if (priv->tx) { 2101 for (idx = 0; idx < gve_num_tx_queues(priv); idx++) { 2102 u32 last_completion = 0; 2103 u32 tx_frames = 0; 2104 2105 /* DQO doesn't currently support these metrics. */ 2106 if (gve_is_gqi(priv)) { 2107 last_completion = priv->tx[idx].done; 2108 tx_frames = priv->tx[idx].req; 2109 } 2110 2111 do { 2112 start = u64_stats_fetch_begin(&priv->tx[idx].statss); 2113 tx_bytes = priv->tx[idx].bytes_done; 2114 } while (u64_stats_fetch_retry(&priv->tx[idx].statss, start)); 2115 stats[stats_idx++] = (struct stats) { 2116 .stat_name = cpu_to_be32(TX_WAKE_CNT), 2117 .value = cpu_to_be64(priv->tx[idx].wake_queue), 2118 .queue_id = cpu_to_be32(idx), 2119 }; 2120 stats[stats_idx++] = (struct stats) { 2121 .stat_name = cpu_to_be32(TX_STOP_CNT), 2122 .value = cpu_to_be64(priv->tx[idx].stop_queue), 2123 .queue_id = cpu_to_be32(idx), 2124 }; 2125 stats[stats_idx++] = (struct stats) { 2126 .stat_name = cpu_to_be32(TX_FRAMES_SENT), 2127 .value = cpu_to_be64(tx_frames), 2128 .queue_id = cpu_to_be32(idx), 2129 }; 2130 stats[stats_idx++] = (struct stats) { 2131 .stat_name = cpu_to_be32(TX_BYTES_SENT), 2132 .value = cpu_to_be64(tx_bytes), 2133 .queue_id = cpu_to_be32(idx), 2134 }; 2135 stats[stats_idx++] = (struct stats) { 2136 .stat_name = cpu_to_be32(TX_LAST_COMPLETION_PROCESSED), 2137 .value = cpu_to_be64(last_completion), 2138 .queue_id = cpu_to_be32(idx), 2139 }; 2140 stats[stats_idx++] = (struct stats) { 2141 .stat_name = cpu_to_be32(TX_TIMEOUT_CNT), 2142 .value = cpu_to_be64(priv->tx[idx].queue_timeout), 2143 .queue_id = cpu_to_be32(idx), 2144 }; 2145 } 2146 } 2147 /* rx stats */ 2148 if (priv->rx) { 2149 for (idx = 0; idx < priv->rx_cfg.num_queues; idx++) { 2150 stats[stats_idx++] = (struct stats) { 2151 .stat_name = cpu_to_be32(RX_NEXT_EXPECTED_SEQUENCE), 2152 .value = cpu_to_be64(priv->rx[idx].desc.seqno), 2153 .queue_id = cpu_to_be32(idx), 2154 }; 2155 stats[stats_idx++] = (struct stats) { 2156 .stat_name = cpu_to_be32(RX_BUFFERS_POSTED), 2157 .value = cpu_to_be64(priv->rx[idx].fill_cnt), 2158 .queue_id = cpu_to_be32(idx), 2159 }; 2160 } 2161 } 2162 } 2163 2164 /* Handle NIC status register changes, reset requests and report stats */ 2165 static void gve_service_task(struct work_struct *work) 2166 { 2167 struct gve_priv *priv = container_of(work, struct gve_priv, 2168 service_task); 2169 u32 status = ioread32be(&priv->reg_bar0->device_status); 2170 2171 gve_handle_status(priv, status); 2172 2173 gve_handle_reset(priv); 2174 gve_handle_link_status(priv, GVE_DEVICE_STATUS_LINK_STATUS_MASK & status); 2175 } 2176 2177 static void gve_set_netdev_xdp_features(struct gve_priv *priv) 2178 { 2179 xdp_features_t xdp_features; 2180 2181 if (priv->queue_format == GVE_GQI_QPL_FORMAT) { 2182 xdp_features = NETDEV_XDP_ACT_BASIC; 2183 xdp_features |= NETDEV_XDP_ACT_REDIRECT; 2184 xdp_features |= NETDEV_XDP_ACT_XSK_ZEROCOPY; 2185 } else { 2186 xdp_features = 0; 2187 } 2188 2189 xdp_set_features_flag_locked(priv->dev, xdp_features); 2190 } 2191 2192 static int gve_init_priv(struct gve_priv *priv, bool skip_describe_device) 2193 { 2194 int num_ntfy; 2195 int err; 2196 2197 /* Set up the adminq */ 2198 err = gve_adminq_alloc(&priv->pdev->dev, priv); 2199 if (err) { 2200 dev_err(&priv->pdev->dev, 2201 "Failed to alloc admin queue: err=%d\n", err); 2202 return err; 2203 } 2204 2205 err = gve_verify_driver_compatibility(priv); 2206 if (err) { 2207 dev_err(&priv->pdev->dev, 2208 "Could not verify driver compatibility: err=%d\n", err); 2209 goto err; 2210 } 2211 2212 priv->num_registered_pages = 0; 2213 2214 if (skip_describe_device) 2215 goto setup_device; 2216 2217 priv->queue_format = GVE_QUEUE_FORMAT_UNSPECIFIED; 2218 /* Get the initial information we need from the device */ 2219 err = gve_adminq_describe_device(priv); 2220 if (err) { 2221 dev_err(&priv->pdev->dev, 2222 "Could not get device information: err=%d\n", err); 2223 goto err; 2224 } 2225 priv->dev->mtu = priv->dev->max_mtu; 2226 num_ntfy = pci_msix_vec_count(priv->pdev); 2227 if (num_ntfy <= 0) { 2228 dev_err(&priv->pdev->dev, 2229 "could not count MSI-x vectors: err=%d\n", num_ntfy); 2230 err = num_ntfy; 2231 goto err; 2232 } else if (num_ntfy < GVE_MIN_MSIX) { 2233 dev_err(&priv->pdev->dev, "gve needs at least %d MSI-x vectors, but only has %d\n", 2234 GVE_MIN_MSIX, num_ntfy); 2235 err = -EINVAL; 2236 goto err; 2237 } 2238 2239 /* Big TCP is only supported on DQ*/ 2240 if (!gve_is_gqi(priv)) 2241 netif_set_tso_max_size(priv->dev, GVE_DQO_TX_MAX); 2242 2243 priv->rx_copybreak = GVE_DEFAULT_RX_COPYBREAK; 2244 /* gvnic has one Notification Block per MSI-x vector, except for the 2245 * management vector 2246 */ 2247 priv->num_ntfy_blks = (num_ntfy - 1) & ~0x1; 2248 priv->mgmt_msix_idx = priv->num_ntfy_blks; 2249 2250 priv->tx_cfg.max_queues = 2251 min_t(int, priv->tx_cfg.max_queues, priv->num_ntfy_blks / 2); 2252 priv->rx_cfg.max_queues = 2253 min_t(int, priv->rx_cfg.max_queues, priv->num_ntfy_blks / 2); 2254 2255 priv->tx_cfg.num_queues = priv->tx_cfg.max_queues; 2256 priv->rx_cfg.num_queues = priv->rx_cfg.max_queues; 2257 if (priv->default_num_queues > 0) { 2258 priv->tx_cfg.num_queues = min_t(int, priv->default_num_queues, 2259 priv->tx_cfg.num_queues); 2260 priv->rx_cfg.num_queues = min_t(int, priv->default_num_queues, 2261 priv->rx_cfg.num_queues); 2262 } 2263 priv->tx_cfg.num_xdp_queues = 0; 2264 2265 dev_info(&priv->pdev->dev, "TX queues %d, RX queues %d\n", 2266 priv->tx_cfg.num_queues, priv->rx_cfg.num_queues); 2267 dev_info(&priv->pdev->dev, "Max TX queues %d, Max RX queues %d\n", 2268 priv->tx_cfg.max_queues, priv->rx_cfg.max_queues); 2269 2270 if (!gve_is_gqi(priv)) { 2271 priv->tx_coalesce_usecs = GVE_TX_IRQ_RATELIMIT_US_DQO; 2272 priv->rx_coalesce_usecs = GVE_RX_IRQ_RATELIMIT_US_DQO; 2273 } 2274 2275 setup_device: 2276 gve_set_netdev_xdp_features(priv); 2277 err = gve_setup_device_resources(priv); 2278 if (!err) 2279 return 0; 2280 err: 2281 gve_adminq_free(&priv->pdev->dev, priv); 2282 return err; 2283 } 2284 2285 static void gve_teardown_priv_resources(struct gve_priv *priv) 2286 { 2287 gve_teardown_device_resources(priv); 2288 gve_adminq_free(&priv->pdev->dev, priv); 2289 } 2290 2291 static void gve_trigger_reset(struct gve_priv *priv) 2292 { 2293 /* Reset the device by releasing the AQ */ 2294 gve_adminq_release(priv); 2295 } 2296 2297 static void gve_reset_and_teardown(struct gve_priv *priv, bool was_up) 2298 { 2299 gve_trigger_reset(priv); 2300 /* With the reset having already happened, close cannot fail */ 2301 if (was_up) 2302 gve_close(priv->dev); 2303 gve_teardown_priv_resources(priv); 2304 } 2305 2306 static int gve_reset_recovery(struct gve_priv *priv, bool was_up) 2307 { 2308 int err; 2309 2310 err = gve_init_priv(priv, true); 2311 if (err) 2312 goto err; 2313 if (was_up) { 2314 err = gve_open(priv->dev); 2315 if (err) 2316 goto err; 2317 } 2318 return 0; 2319 err: 2320 dev_err(&priv->pdev->dev, "Reset failed! !!! DISABLING ALL QUEUES !!!\n"); 2321 gve_turndown(priv); 2322 return err; 2323 } 2324 2325 int gve_reset(struct gve_priv *priv, bool attempt_teardown) 2326 { 2327 bool was_up = netif_running(priv->dev); 2328 int err; 2329 2330 dev_info(&priv->pdev->dev, "Performing reset\n"); 2331 gve_clear_do_reset(priv); 2332 gve_set_reset_in_progress(priv); 2333 /* If we aren't attempting to teardown normally, just go turndown and 2334 * reset right away. 2335 */ 2336 if (!attempt_teardown) { 2337 gve_turndown(priv); 2338 gve_reset_and_teardown(priv, was_up); 2339 } else { 2340 /* Otherwise attempt to close normally */ 2341 if (was_up) { 2342 err = gve_close(priv->dev); 2343 /* If that fails reset as we did above */ 2344 if (err) 2345 gve_reset_and_teardown(priv, was_up); 2346 } 2347 /* Clean up any remaining resources */ 2348 gve_teardown_priv_resources(priv); 2349 } 2350 2351 /* Set it all back up */ 2352 err = gve_reset_recovery(priv, was_up); 2353 gve_clear_reset_in_progress(priv); 2354 priv->reset_cnt++; 2355 priv->interface_up_cnt = 0; 2356 priv->interface_down_cnt = 0; 2357 priv->stats_report_trigger_cnt = 0; 2358 return err; 2359 } 2360 2361 static void gve_write_version(u8 __iomem *driver_version_register) 2362 { 2363 const char *c = gve_version_prefix; 2364 2365 while (*c) { 2366 writeb(*c, driver_version_register); 2367 c++; 2368 } 2369 2370 c = gve_version_str; 2371 while (*c) { 2372 writeb(*c, driver_version_register); 2373 c++; 2374 } 2375 writeb('\n', driver_version_register); 2376 } 2377 2378 static int gve_rx_queue_stop(struct net_device *dev, void *per_q_mem, int idx) 2379 { 2380 struct gve_priv *priv = netdev_priv(dev); 2381 struct gve_rx_ring *gve_per_q_mem; 2382 int err; 2383 2384 if (!priv->rx) 2385 return -EAGAIN; 2386 2387 /* Destroying queue 0 while other queues exist is not supported in DQO */ 2388 if (!gve_is_gqi(priv) && idx == 0) 2389 return -ERANGE; 2390 2391 /* Single-queue destruction requires quiescence on all queues */ 2392 gve_turndown(priv); 2393 2394 /* This failure will trigger a reset - no need to clean up */ 2395 err = gve_adminq_destroy_single_rx_queue(priv, idx); 2396 if (err) 2397 return err; 2398 2399 if (gve_is_qpl(priv)) { 2400 /* This failure will trigger a reset - no need to clean up */ 2401 err = gve_unregister_qpl(priv, gve_rx_get_qpl(priv, idx)); 2402 if (err) 2403 return err; 2404 } 2405 2406 gve_rx_stop_ring(priv, idx); 2407 2408 /* Turn the unstopped queues back up */ 2409 gve_turnup_and_check_status(priv); 2410 2411 gve_per_q_mem = (struct gve_rx_ring *)per_q_mem; 2412 *gve_per_q_mem = priv->rx[idx]; 2413 memset(&priv->rx[idx], 0, sizeof(priv->rx[idx])); 2414 return 0; 2415 } 2416 2417 static void gve_rx_queue_mem_free(struct net_device *dev, void *per_q_mem) 2418 { 2419 struct gve_priv *priv = netdev_priv(dev); 2420 struct gve_rx_alloc_rings_cfg cfg = {0}; 2421 struct gve_rx_ring *gve_per_q_mem; 2422 2423 gve_per_q_mem = (struct gve_rx_ring *)per_q_mem; 2424 gve_rx_get_curr_alloc_cfg(priv, &cfg); 2425 2426 if (gve_is_gqi(priv)) 2427 gve_rx_free_ring_gqi(priv, gve_per_q_mem, &cfg); 2428 else 2429 gve_rx_free_ring_dqo(priv, gve_per_q_mem, &cfg); 2430 } 2431 2432 static int gve_rx_queue_mem_alloc(struct net_device *dev, void *per_q_mem, 2433 int idx) 2434 { 2435 struct gve_priv *priv = netdev_priv(dev); 2436 struct gve_rx_alloc_rings_cfg cfg = {0}; 2437 struct gve_rx_ring *gve_per_q_mem; 2438 int err; 2439 2440 if (!priv->rx) 2441 return -EAGAIN; 2442 2443 gve_per_q_mem = (struct gve_rx_ring *)per_q_mem; 2444 gve_rx_get_curr_alloc_cfg(priv, &cfg); 2445 2446 if (gve_is_gqi(priv)) 2447 err = gve_rx_alloc_ring_gqi(priv, &cfg, gve_per_q_mem, idx); 2448 else 2449 err = gve_rx_alloc_ring_dqo(priv, &cfg, gve_per_q_mem, idx); 2450 2451 return err; 2452 } 2453 2454 static int gve_rx_queue_start(struct net_device *dev, void *per_q_mem, int idx) 2455 { 2456 struct gve_priv *priv = netdev_priv(dev); 2457 struct gve_rx_ring *gve_per_q_mem; 2458 int err; 2459 2460 if (!priv->rx) 2461 return -EAGAIN; 2462 2463 gve_per_q_mem = (struct gve_rx_ring *)per_q_mem; 2464 priv->rx[idx] = *gve_per_q_mem; 2465 2466 /* Single-queue creation requires quiescence on all queues */ 2467 gve_turndown(priv); 2468 2469 gve_rx_start_ring(priv, idx); 2470 2471 if (gve_is_qpl(priv)) { 2472 /* This failure will trigger a reset - no need to clean up */ 2473 err = gve_register_qpl(priv, gve_rx_get_qpl(priv, idx)); 2474 if (err) 2475 goto abort; 2476 } 2477 2478 /* This failure will trigger a reset - no need to clean up */ 2479 err = gve_adminq_create_single_rx_queue(priv, idx); 2480 if (err) 2481 goto abort; 2482 2483 if (gve_is_gqi(priv)) 2484 gve_rx_write_doorbell(priv, &priv->rx[idx]); 2485 else 2486 gve_rx_post_buffers_dqo(&priv->rx[idx]); 2487 2488 /* Turn the unstopped queues back up */ 2489 gve_turnup_and_check_status(priv); 2490 return 0; 2491 2492 abort: 2493 gve_rx_stop_ring(priv, idx); 2494 2495 /* All failures in this func result in a reset, by clearing the struct 2496 * at idx, we prevent a double free when that reset runs. The reset, 2497 * which needs the rtnl lock, will not run till this func returns and 2498 * its caller gives up the lock. 2499 */ 2500 memset(&priv->rx[idx], 0, sizeof(priv->rx[idx])); 2501 return err; 2502 } 2503 2504 static const struct netdev_queue_mgmt_ops gve_queue_mgmt_ops = { 2505 .ndo_queue_mem_size = sizeof(struct gve_rx_ring), 2506 .ndo_queue_mem_alloc = gve_rx_queue_mem_alloc, 2507 .ndo_queue_mem_free = gve_rx_queue_mem_free, 2508 .ndo_queue_start = gve_rx_queue_start, 2509 .ndo_queue_stop = gve_rx_queue_stop, 2510 }; 2511 2512 static void gve_get_rx_queue_stats(struct net_device *dev, int idx, 2513 struct netdev_queue_stats_rx *rx_stats) 2514 { 2515 struct gve_priv *priv = netdev_priv(dev); 2516 struct gve_rx_ring *rx = &priv->rx[idx]; 2517 unsigned int start; 2518 2519 do { 2520 start = u64_stats_fetch_begin(&rx->statss); 2521 rx_stats->packets = rx->rpackets; 2522 rx_stats->bytes = rx->rbytes; 2523 rx_stats->alloc_fail = rx->rx_skb_alloc_fail + 2524 rx->rx_buf_alloc_fail; 2525 } while (u64_stats_fetch_retry(&rx->statss, start)); 2526 } 2527 2528 static void gve_get_tx_queue_stats(struct net_device *dev, int idx, 2529 struct netdev_queue_stats_tx *tx_stats) 2530 { 2531 struct gve_priv *priv = netdev_priv(dev); 2532 struct gve_tx_ring *tx = &priv->tx[idx]; 2533 unsigned int start; 2534 2535 do { 2536 start = u64_stats_fetch_begin(&tx->statss); 2537 tx_stats->packets = tx->pkt_done; 2538 tx_stats->bytes = tx->bytes_done; 2539 } while (u64_stats_fetch_retry(&tx->statss, start)); 2540 } 2541 2542 static void gve_get_base_stats(struct net_device *dev, 2543 struct netdev_queue_stats_rx *rx, 2544 struct netdev_queue_stats_tx *tx) 2545 { 2546 rx->packets = 0; 2547 rx->bytes = 0; 2548 rx->alloc_fail = 0; 2549 2550 tx->packets = 0; 2551 tx->bytes = 0; 2552 } 2553 2554 static const struct netdev_stat_ops gve_stat_ops = { 2555 .get_queue_stats_rx = gve_get_rx_queue_stats, 2556 .get_queue_stats_tx = gve_get_tx_queue_stats, 2557 .get_base_stats = gve_get_base_stats, 2558 }; 2559 2560 static int gve_probe(struct pci_dev *pdev, const struct pci_device_id *ent) 2561 { 2562 int max_tx_queues, max_rx_queues; 2563 struct net_device *dev; 2564 __be32 __iomem *db_bar; 2565 struct gve_registers __iomem *reg_bar; 2566 struct gve_priv *priv; 2567 int err; 2568 2569 err = pci_enable_device(pdev); 2570 if (err) 2571 return err; 2572 2573 err = pci_request_regions(pdev, gve_driver_name); 2574 if (err) 2575 goto abort_with_enabled; 2576 2577 pci_set_master(pdev); 2578 2579 err = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(64)); 2580 if (err) { 2581 dev_err(&pdev->dev, "Failed to set dma mask: err=%d\n", err); 2582 goto abort_with_pci_region; 2583 } 2584 2585 reg_bar = pci_iomap(pdev, GVE_REGISTER_BAR, 0); 2586 if (!reg_bar) { 2587 dev_err(&pdev->dev, "Failed to map pci bar!\n"); 2588 err = -ENOMEM; 2589 goto abort_with_pci_region; 2590 } 2591 2592 db_bar = pci_iomap(pdev, GVE_DOORBELL_BAR, 0); 2593 if (!db_bar) { 2594 dev_err(&pdev->dev, "Failed to map doorbell bar!\n"); 2595 err = -ENOMEM; 2596 goto abort_with_reg_bar; 2597 } 2598 2599 gve_write_version(®_bar->driver_version); 2600 /* Get max queues to alloc etherdev */ 2601 max_tx_queues = ioread32be(®_bar->max_tx_queues); 2602 max_rx_queues = ioread32be(®_bar->max_rx_queues); 2603 /* Alloc and setup the netdev and priv */ 2604 dev = alloc_etherdev_mqs(sizeof(*priv), max_tx_queues, max_rx_queues); 2605 if (!dev) { 2606 dev_err(&pdev->dev, "could not allocate netdev\n"); 2607 err = -ENOMEM; 2608 goto abort_with_db_bar; 2609 } 2610 SET_NETDEV_DEV(dev, &pdev->dev); 2611 pci_set_drvdata(pdev, dev); 2612 dev->ethtool_ops = &gve_ethtool_ops; 2613 dev->netdev_ops = &gve_netdev_ops; 2614 dev->queue_mgmt_ops = &gve_queue_mgmt_ops; 2615 dev->stat_ops = &gve_stat_ops; 2616 2617 /* Set default and supported features. 2618 * 2619 * Features might be set in other locations as well (such as 2620 * `gve_adminq_describe_device`). 2621 */ 2622 dev->hw_features = NETIF_F_HIGHDMA; 2623 dev->hw_features |= NETIF_F_SG; 2624 dev->hw_features |= NETIF_F_HW_CSUM; 2625 dev->hw_features |= NETIF_F_TSO; 2626 dev->hw_features |= NETIF_F_TSO6; 2627 dev->hw_features |= NETIF_F_TSO_ECN; 2628 dev->hw_features |= NETIF_F_RXCSUM; 2629 dev->hw_features |= NETIF_F_RXHASH; 2630 dev->features = dev->hw_features; 2631 dev->watchdog_timeo = 5 * HZ; 2632 dev->min_mtu = ETH_MIN_MTU; 2633 netif_carrier_off(dev); 2634 2635 priv = netdev_priv(dev); 2636 priv->dev = dev; 2637 priv->pdev = pdev; 2638 priv->msg_enable = DEFAULT_MSG_LEVEL; 2639 priv->reg_bar0 = reg_bar; 2640 priv->db_bar2 = db_bar; 2641 priv->service_task_flags = 0x0; 2642 priv->state_flags = 0x0; 2643 priv->ethtool_flags = 0x0; 2644 priv->rx_cfg.packet_buffer_size = GVE_DEFAULT_RX_BUFFER_SIZE; 2645 priv->max_rx_buffer_size = GVE_DEFAULT_RX_BUFFER_SIZE; 2646 2647 gve_set_probe_in_progress(priv); 2648 priv->gve_wq = alloc_ordered_workqueue("gve", 0); 2649 if (!priv->gve_wq) { 2650 dev_err(&pdev->dev, "Could not allocate workqueue"); 2651 err = -ENOMEM; 2652 goto abort_with_netdev; 2653 } 2654 INIT_WORK(&priv->service_task, gve_service_task); 2655 INIT_WORK(&priv->stats_report_task, gve_stats_report_task); 2656 priv->tx_cfg.max_queues = max_tx_queues; 2657 priv->rx_cfg.max_queues = max_rx_queues; 2658 2659 err = gve_init_priv(priv, false); 2660 if (err) 2661 goto abort_with_wq; 2662 2663 if (!gve_is_gqi(priv) && !gve_is_qpl(priv)) 2664 dev->netmem_tx = true; 2665 2666 err = register_netdev(dev); 2667 if (err) 2668 goto abort_with_gve_init; 2669 2670 dev_info(&pdev->dev, "GVE version %s\n", gve_version_str); 2671 dev_info(&pdev->dev, "GVE queue format %d\n", (int)priv->queue_format); 2672 gve_clear_probe_in_progress(priv); 2673 queue_work(priv->gve_wq, &priv->service_task); 2674 return 0; 2675 2676 abort_with_gve_init: 2677 gve_teardown_priv_resources(priv); 2678 2679 abort_with_wq: 2680 destroy_workqueue(priv->gve_wq); 2681 2682 abort_with_netdev: 2683 free_netdev(dev); 2684 2685 abort_with_db_bar: 2686 pci_iounmap(pdev, db_bar); 2687 2688 abort_with_reg_bar: 2689 pci_iounmap(pdev, reg_bar); 2690 2691 abort_with_pci_region: 2692 pci_release_regions(pdev); 2693 2694 abort_with_enabled: 2695 pci_disable_device(pdev); 2696 return err; 2697 } 2698 2699 static void gve_remove(struct pci_dev *pdev) 2700 { 2701 struct net_device *netdev = pci_get_drvdata(pdev); 2702 struct gve_priv *priv = netdev_priv(netdev); 2703 __be32 __iomem *db_bar = priv->db_bar2; 2704 void __iomem *reg_bar = priv->reg_bar0; 2705 2706 unregister_netdev(netdev); 2707 gve_teardown_priv_resources(priv); 2708 destroy_workqueue(priv->gve_wq); 2709 free_netdev(netdev); 2710 pci_iounmap(pdev, db_bar); 2711 pci_iounmap(pdev, reg_bar); 2712 pci_release_regions(pdev); 2713 pci_disable_device(pdev); 2714 } 2715 2716 static void gve_shutdown(struct pci_dev *pdev) 2717 { 2718 struct net_device *netdev = pci_get_drvdata(pdev); 2719 struct gve_priv *priv = netdev_priv(netdev); 2720 bool was_up = netif_running(priv->dev); 2721 2722 rtnl_lock(); 2723 netdev_lock(netdev); 2724 if (was_up && gve_close(priv->dev)) { 2725 /* If the dev was up, attempt to close, if close fails, reset */ 2726 gve_reset_and_teardown(priv, was_up); 2727 } else { 2728 /* If the dev wasn't up or close worked, finish tearing down */ 2729 gve_teardown_priv_resources(priv); 2730 } 2731 netdev_unlock(netdev); 2732 rtnl_unlock(); 2733 } 2734 2735 #ifdef CONFIG_PM 2736 static int gve_suspend(struct pci_dev *pdev, pm_message_t state) 2737 { 2738 struct net_device *netdev = pci_get_drvdata(pdev); 2739 struct gve_priv *priv = netdev_priv(netdev); 2740 bool was_up = netif_running(priv->dev); 2741 2742 priv->suspend_cnt++; 2743 rtnl_lock(); 2744 netdev_lock(netdev); 2745 if (was_up && gve_close(priv->dev)) { 2746 /* If the dev was up, attempt to close, if close fails, reset */ 2747 gve_reset_and_teardown(priv, was_up); 2748 } else { 2749 /* If the dev wasn't up or close worked, finish tearing down */ 2750 gve_teardown_priv_resources(priv); 2751 } 2752 priv->up_before_suspend = was_up; 2753 netdev_unlock(netdev); 2754 rtnl_unlock(); 2755 return 0; 2756 } 2757 2758 static int gve_resume(struct pci_dev *pdev) 2759 { 2760 struct net_device *netdev = pci_get_drvdata(pdev); 2761 struct gve_priv *priv = netdev_priv(netdev); 2762 int err; 2763 2764 priv->resume_cnt++; 2765 rtnl_lock(); 2766 netdev_lock(netdev); 2767 err = gve_reset_recovery(priv, priv->up_before_suspend); 2768 netdev_unlock(netdev); 2769 rtnl_unlock(); 2770 return err; 2771 } 2772 #endif /* CONFIG_PM */ 2773 2774 static const struct pci_device_id gve_id_table[] = { 2775 { PCI_DEVICE(PCI_VENDOR_ID_GOOGLE, PCI_DEV_ID_GVNIC) }, 2776 { } 2777 }; 2778 2779 static struct pci_driver gve_driver = { 2780 .name = gve_driver_name, 2781 .id_table = gve_id_table, 2782 .probe = gve_probe, 2783 .remove = gve_remove, 2784 .shutdown = gve_shutdown, 2785 #ifdef CONFIG_PM 2786 .suspend = gve_suspend, 2787 .resume = gve_resume, 2788 #endif 2789 }; 2790 2791 module_pci_driver(gve_driver); 2792 2793 MODULE_DEVICE_TABLE(pci, gve_id_table); 2794 MODULE_AUTHOR("Google, Inc."); 2795 MODULE_DESCRIPTION("Google Virtual NIC Driver"); 2796 MODULE_LICENSE("Dual MIT/GPL"); 2797 MODULE_VERSION(GVE_VERSION); 2798