1 // SPDX-License-Identifier: (GPL-2.0 OR MIT) 2 /* Google virtual Ethernet (gve) driver 3 * 4 * Copyright (C) 2015-2024 Google LLC 5 */ 6 7 #include <linux/bpf.h> 8 #include <linux/cpumask.h> 9 #include <linux/etherdevice.h> 10 #include <linux/filter.h> 11 #include <linux/interrupt.h> 12 #include <linux/irq.h> 13 #include <linux/module.h> 14 #include <linux/pci.h> 15 #include <linux/sched.h> 16 #include <linux/timer.h> 17 #include <linux/workqueue.h> 18 #include <linux/utsname.h> 19 #include <linux/version.h> 20 #include <net/netdev_queues.h> 21 #include <net/sch_generic.h> 22 #include <net/xdp_sock_drv.h> 23 #include "gve.h" 24 #include "gve_dqo.h" 25 #include "gve_adminq.h" 26 #include "gve_register.h" 27 #include "gve_utils.h" 28 29 #define GVE_DEFAULT_RX_COPYBREAK (256) 30 31 #define DEFAULT_MSG_LEVEL (NETIF_MSG_DRV | NETIF_MSG_LINK) 32 #define GVE_VERSION "1.0.0" 33 #define GVE_VERSION_PREFIX "GVE-" 34 35 // Minimum amount of time between queue kicks in msec (10 seconds) 36 #define MIN_TX_TIMEOUT_GAP (1000 * 10) 37 38 char gve_driver_name[] = "gve"; 39 const char gve_version_str[] = GVE_VERSION; 40 static const char gve_version_prefix[] = GVE_VERSION_PREFIX; 41 42 static int gve_verify_driver_compatibility(struct gve_priv *priv) 43 { 44 int err; 45 struct gve_driver_info *driver_info; 46 dma_addr_t driver_info_bus; 47 48 driver_info = dma_alloc_coherent(&priv->pdev->dev, 49 sizeof(struct gve_driver_info), 50 &driver_info_bus, GFP_KERNEL); 51 if (!driver_info) 52 return -ENOMEM; 53 54 *driver_info = (struct gve_driver_info) { 55 .os_type = 1, /* Linux */ 56 .os_version_major = cpu_to_be32(LINUX_VERSION_MAJOR), 57 .os_version_minor = cpu_to_be32(LINUX_VERSION_SUBLEVEL), 58 .os_version_sub = cpu_to_be32(LINUX_VERSION_PATCHLEVEL), 59 .driver_capability_flags = { 60 cpu_to_be64(GVE_DRIVER_CAPABILITY_FLAGS1), 61 cpu_to_be64(GVE_DRIVER_CAPABILITY_FLAGS2), 62 cpu_to_be64(GVE_DRIVER_CAPABILITY_FLAGS3), 63 cpu_to_be64(GVE_DRIVER_CAPABILITY_FLAGS4), 64 }, 65 }; 66 strscpy(driver_info->os_version_str1, utsname()->release, 67 sizeof(driver_info->os_version_str1)); 68 strscpy(driver_info->os_version_str2, utsname()->version, 69 sizeof(driver_info->os_version_str2)); 70 71 err = gve_adminq_verify_driver_compatibility(priv, 72 sizeof(struct gve_driver_info), 73 driver_info_bus); 74 75 /* It's ok if the device doesn't support this */ 76 if (err == -EOPNOTSUPP) 77 err = 0; 78 79 dma_free_coherent(&priv->pdev->dev, 80 sizeof(struct gve_driver_info), 81 driver_info, driver_info_bus); 82 return err; 83 } 84 85 static netdev_features_t gve_features_check(struct sk_buff *skb, 86 struct net_device *dev, 87 netdev_features_t features) 88 { 89 struct gve_priv *priv = netdev_priv(dev); 90 91 if (!gve_is_gqi(priv)) 92 return gve_features_check_dqo(skb, dev, features); 93 94 return features; 95 } 96 97 static netdev_tx_t gve_start_xmit(struct sk_buff *skb, struct net_device *dev) 98 { 99 struct gve_priv *priv = netdev_priv(dev); 100 101 if (gve_is_gqi(priv)) 102 return gve_tx(skb, dev); 103 else 104 return gve_tx_dqo(skb, dev); 105 } 106 107 static void gve_get_stats(struct net_device *dev, struct rtnl_link_stats64 *s) 108 { 109 struct gve_priv *priv = netdev_priv(dev); 110 unsigned int start; 111 u64 packets, bytes; 112 int num_tx_queues; 113 int ring; 114 115 num_tx_queues = gve_num_tx_queues(priv); 116 if (priv->rx) { 117 for (ring = 0; ring < priv->rx_cfg.num_queues; ring++) { 118 do { 119 start = 120 u64_stats_fetch_begin(&priv->rx[ring].statss); 121 packets = priv->rx[ring].rpackets; 122 bytes = priv->rx[ring].rbytes; 123 } while (u64_stats_fetch_retry(&priv->rx[ring].statss, 124 start)); 125 s->rx_packets += packets; 126 s->rx_bytes += bytes; 127 } 128 } 129 if (priv->tx) { 130 for (ring = 0; ring < num_tx_queues; ring++) { 131 do { 132 start = 133 u64_stats_fetch_begin(&priv->tx[ring].statss); 134 packets = priv->tx[ring].pkt_done; 135 bytes = priv->tx[ring].bytes_done; 136 } while (u64_stats_fetch_retry(&priv->tx[ring].statss, 137 start)); 138 s->tx_packets += packets; 139 s->tx_bytes += bytes; 140 } 141 } 142 } 143 144 static int gve_alloc_flow_rule_caches(struct gve_priv *priv) 145 { 146 struct gve_flow_rules_cache *flow_rules_cache = &priv->flow_rules_cache; 147 int err = 0; 148 149 if (!priv->max_flow_rules) 150 return 0; 151 152 flow_rules_cache->rules_cache = 153 kvcalloc(GVE_FLOW_RULES_CACHE_SIZE, sizeof(*flow_rules_cache->rules_cache), 154 GFP_KERNEL); 155 if (!flow_rules_cache->rules_cache) { 156 dev_err(&priv->pdev->dev, "Cannot alloc flow rules cache\n"); 157 return -ENOMEM; 158 } 159 160 flow_rules_cache->rule_ids_cache = 161 kvcalloc(GVE_FLOW_RULE_IDS_CACHE_SIZE, sizeof(*flow_rules_cache->rule_ids_cache), 162 GFP_KERNEL); 163 if (!flow_rules_cache->rule_ids_cache) { 164 dev_err(&priv->pdev->dev, "Cannot alloc flow rule ids cache\n"); 165 err = -ENOMEM; 166 goto free_rules_cache; 167 } 168 169 return 0; 170 171 free_rules_cache: 172 kvfree(flow_rules_cache->rules_cache); 173 flow_rules_cache->rules_cache = NULL; 174 return err; 175 } 176 177 static void gve_free_flow_rule_caches(struct gve_priv *priv) 178 { 179 struct gve_flow_rules_cache *flow_rules_cache = &priv->flow_rules_cache; 180 181 kvfree(flow_rules_cache->rule_ids_cache); 182 flow_rules_cache->rule_ids_cache = NULL; 183 kvfree(flow_rules_cache->rules_cache); 184 flow_rules_cache->rules_cache = NULL; 185 } 186 187 static int gve_alloc_rss_config_cache(struct gve_priv *priv) 188 { 189 struct gve_rss_config *rss_config = &priv->rss_config; 190 191 if (!priv->cache_rss_config) 192 return 0; 193 194 rss_config->hash_key = kcalloc(priv->rss_key_size, 195 sizeof(rss_config->hash_key[0]), 196 GFP_KERNEL); 197 if (!rss_config->hash_key) 198 return -ENOMEM; 199 200 rss_config->hash_lut = kcalloc(priv->rss_lut_size, 201 sizeof(rss_config->hash_lut[0]), 202 GFP_KERNEL); 203 if (!rss_config->hash_lut) 204 goto free_rss_key_cache; 205 206 return 0; 207 208 free_rss_key_cache: 209 kfree(rss_config->hash_key); 210 rss_config->hash_key = NULL; 211 return -ENOMEM; 212 } 213 214 static void gve_free_rss_config_cache(struct gve_priv *priv) 215 { 216 struct gve_rss_config *rss_config = &priv->rss_config; 217 218 kfree(rss_config->hash_key); 219 kfree(rss_config->hash_lut); 220 221 memset(rss_config, 0, sizeof(*rss_config)); 222 } 223 224 static int gve_alloc_counter_array(struct gve_priv *priv) 225 { 226 priv->counter_array = 227 dma_alloc_coherent(&priv->pdev->dev, 228 priv->num_event_counters * 229 sizeof(*priv->counter_array), 230 &priv->counter_array_bus, GFP_KERNEL); 231 if (!priv->counter_array) 232 return -ENOMEM; 233 234 return 0; 235 } 236 237 static void gve_free_counter_array(struct gve_priv *priv) 238 { 239 if (!priv->counter_array) 240 return; 241 242 dma_free_coherent(&priv->pdev->dev, 243 priv->num_event_counters * 244 sizeof(*priv->counter_array), 245 priv->counter_array, priv->counter_array_bus); 246 priv->counter_array = NULL; 247 } 248 249 /* NIC requests to report stats */ 250 static void gve_stats_report_task(struct work_struct *work) 251 { 252 struct gve_priv *priv = container_of(work, struct gve_priv, 253 stats_report_task); 254 if (gve_get_do_report_stats(priv)) { 255 gve_handle_report_stats(priv); 256 gve_clear_do_report_stats(priv); 257 } 258 } 259 260 static void gve_stats_report_schedule(struct gve_priv *priv) 261 { 262 if (!gve_get_probe_in_progress(priv) && 263 !gve_get_reset_in_progress(priv)) { 264 gve_set_do_report_stats(priv); 265 queue_work(priv->gve_wq, &priv->stats_report_task); 266 } 267 } 268 269 static void gve_stats_report_timer(struct timer_list *t) 270 { 271 struct gve_priv *priv = timer_container_of(priv, t, 272 stats_report_timer); 273 274 mod_timer(&priv->stats_report_timer, 275 round_jiffies(jiffies + 276 msecs_to_jiffies(priv->stats_report_timer_period))); 277 gve_stats_report_schedule(priv); 278 } 279 280 static int gve_alloc_stats_report(struct gve_priv *priv) 281 { 282 int tx_stats_num, rx_stats_num; 283 284 tx_stats_num = (GVE_TX_STATS_REPORT_NUM + NIC_TX_STATS_REPORT_NUM) * 285 gve_num_tx_queues(priv); 286 rx_stats_num = (GVE_RX_STATS_REPORT_NUM + NIC_RX_STATS_REPORT_NUM) * 287 priv->rx_cfg.num_queues; 288 priv->stats_report_len = struct_size(priv->stats_report, stats, 289 size_add(tx_stats_num, rx_stats_num)); 290 priv->stats_report = 291 dma_alloc_coherent(&priv->pdev->dev, priv->stats_report_len, 292 &priv->stats_report_bus, GFP_KERNEL); 293 if (!priv->stats_report) 294 return -ENOMEM; 295 /* Set up timer for the report-stats task */ 296 timer_setup(&priv->stats_report_timer, gve_stats_report_timer, 0); 297 priv->stats_report_timer_period = GVE_STATS_REPORT_TIMER_PERIOD; 298 return 0; 299 } 300 301 static void gve_free_stats_report(struct gve_priv *priv) 302 { 303 if (!priv->stats_report) 304 return; 305 306 timer_delete_sync(&priv->stats_report_timer); 307 dma_free_coherent(&priv->pdev->dev, priv->stats_report_len, 308 priv->stats_report, priv->stats_report_bus); 309 priv->stats_report = NULL; 310 } 311 312 static irqreturn_t gve_mgmnt_intr(int irq, void *arg) 313 { 314 struct gve_priv *priv = arg; 315 316 queue_work(priv->gve_wq, &priv->service_task); 317 return IRQ_HANDLED; 318 } 319 320 static irqreturn_t gve_intr(int irq, void *arg) 321 { 322 struct gve_notify_block *block = arg; 323 struct gve_priv *priv = block->priv; 324 325 iowrite32be(GVE_IRQ_MASK, gve_irq_doorbell(priv, block)); 326 napi_schedule_irqoff(&block->napi); 327 return IRQ_HANDLED; 328 } 329 330 static irqreturn_t gve_intr_dqo(int irq, void *arg) 331 { 332 struct gve_notify_block *block = arg; 333 334 /* Interrupts are automatically masked */ 335 napi_schedule_irqoff(&block->napi); 336 return IRQ_HANDLED; 337 } 338 339 static int gve_is_napi_on_home_cpu(struct gve_priv *priv, u32 irq) 340 { 341 int cpu_curr = smp_processor_id(); 342 const struct cpumask *aff_mask; 343 344 aff_mask = irq_get_effective_affinity_mask(irq); 345 if (unlikely(!aff_mask)) 346 return 1; 347 348 return cpumask_test_cpu(cpu_curr, aff_mask); 349 } 350 351 int gve_napi_poll(struct napi_struct *napi, int budget) 352 { 353 struct gve_notify_block *block; 354 __be32 __iomem *irq_doorbell; 355 bool reschedule = false; 356 struct gve_priv *priv; 357 int work_done = 0; 358 359 block = container_of(napi, struct gve_notify_block, napi); 360 priv = block->priv; 361 362 if (block->tx) { 363 if (block->tx->q_num < priv->tx_cfg.num_queues) 364 reschedule |= gve_tx_poll(block, budget); 365 else if (budget) 366 reschedule |= gve_xdp_poll(block, budget); 367 } 368 369 if (!budget) 370 return 0; 371 372 if (block->rx) { 373 work_done = gve_rx_poll(block, budget); 374 375 /* Poll XSK TX as part of RX NAPI. Setup re-poll based on max of 376 * TX and RX work done. 377 */ 378 if (priv->xdp_prog) 379 work_done = max_t(int, work_done, 380 gve_xsk_tx_poll(block, budget)); 381 382 reschedule |= work_done == budget; 383 } 384 385 if (reschedule) 386 return budget; 387 388 /* Complete processing - don't unmask irq if busy polling is enabled */ 389 if (likely(napi_complete_done(napi, work_done))) { 390 irq_doorbell = gve_irq_doorbell(priv, block); 391 iowrite32be(GVE_IRQ_ACK | GVE_IRQ_EVENT, irq_doorbell); 392 393 /* Ensure IRQ ACK is visible before we check pending work. 394 * If queue had issued updates, it would be truly visible. 395 */ 396 mb(); 397 398 if (block->tx) 399 reschedule |= gve_tx_clean_pending(priv, block->tx); 400 if (block->rx) 401 reschedule |= gve_rx_work_pending(block->rx); 402 403 if (reschedule && napi_schedule(napi)) 404 iowrite32be(GVE_IRQ_MASK, irq_doorbell); 405 } 406 return work_done; 407 } 408 409 int gve_napi_poll_dqo(struct napi_struct *napi, int budget) 410 { 411 struct gve_notify_block *block = 412 container_of(napi, struct gve_notify_block, napi); 413 struct gve_priv *priv = block->priv; 414 bool reschedule = false; 415 int work_done = 0; 416 417 if (block->tx) 418 reschedule |= gve_tx_poll_dqo(block, /*do_clean=*/true); 419 420 if (!budget) 421 return 0; 422 423 if (block->rx) { 424 work_done = gve_rx_poll_dqo(block, budget); 425 reschedule |= work_done == budget; 426 } 427 428 if (reschedule) { 429 /* Reschedule by returning budget only if already on the correct 430 * cpu. 431 */ 432 if (likely(gve_is_napi_on_home_cpu(priv, block->irq))) 433 return budget; 434 435 /* If not on the cpu with which this queue's irq has affinity 436 * with, we avoid rescheduling napi and arm the irq instead so 437 * that napi gets rescheduled back eventually onto the right 438 * cpu. 439 */ 440 if (work_done == budget) 441 work_done--; 442 } 443 444 if (likely(napi_complete_done(napi, work_done))) { 445 /* Enable interrupts again. 446 * 447 * We don't need to repoll afterwards because HW supports the 448 * PCI MSI-X PBA feature. 449 * 450 * Another interrupt would be triggered if a new event came in 451 * since the last one. 452 */ 453 gve_write_irq_doorbell_dqo(priv, block, 454 GVE_ITR_NO_UPDATE_DQO | GVE_ITR_ENABLE_BIT_DQO); 455 } 456 457 return work_done; 458 } 459 460 static int gve_alloc_notify_blocks(struct gve_priv *priv) 461 { 462 int num_vecs_requested = priv->num_ntfy_blks + 1; 463 unsigned int active_cpus; 464 int vecs_enabled; 465 int i, j; 466 int err; 467 468 priv->msix_vectors = kvcalloc(num_vecs_requested, 469 sizeof(*priv->msix_vectors), GFP_KERNEL); 470 if (!priv->msix_vectors) 471 return -ENOMEM; 472 for (i = 0; i < num_vecs_requested; i++) 473 priv->msix_vectors[i].entry = i; 474 vecs_enabled = pci_enable_msix_range(priv->pdev, priv->msix_vectors, 475 GVE_MIN_MSIX, num_vecs_requested); 476 if (vecs_enabled < 0) { 477 dev_err(&priv->pdev->dev, "Could not enable min msix %d/%d\n", 478 GVE_MIN_MSIX, vecs_enabled); 479 err = vecs_enabled; 480 goto abort_with_msix_vectors; 481 } 482 if (vecs_enabled != num_vecs_requested) { 483 int new_num_ntfy_blks = (vecs_enabled - 1) & ~0x1; 484 int vecs_per_type = new_num_ntfy_blks / 2; 485 int vecs_left = new_num_ntfy_blks % 2; 486 487 priv->num_ntfy_blks = new_num_ntfy_blks; 488 priv->mgmt_msix_idx = priv->num_ntfy_blks; 489 priv->tx_cfg.max_queues = min_t(int, priv->tx_cfg.max_queues, 490 vecs_per_type); 491 priv->rx_cfg.max_queues = min_t(int, priv->rx_cfg.max_queues, 492 vecs_per_type + vecs_left); 493 dev_err(&priv->pdev->dev, 494 "Could not enable desired msix, only enabled %d, adjusting tx max queues to %d, and rx max queues to %d\n", 495 vecs_enabled, priv->tx_cfg.max_queues, 496 priv->rx_cfg.max_queues); 497 if (priv->tx_cfg.num_queues > priv->tx_cfg.max_queues) 498 priv->tx_cfg.num_queues = priv->tx_cfg.max_queues; 499 if (priv->rx_cfg.num_queues > priv->rx_cfg.max_queues) 500 priv->rx_cfg.num_queues = priv->rx_cfg.max_queues; 501 } 502 /* Half the notification blocks go to TX and half to RX */ 503 active_cpus = min_t(int, priv->num_ntfy_blks / 2, num_online_cpus()); 504 505 /* Setup Management Vector - the last vector */ 506 snprintf(priv->mgmt_msix_name, sizeof(priv->mgmt_msix_name), "gve-mgmnt@pci:%s", 507 pci_name(priv->pdev)); 508 err = request_irq(priv->msix_vectors[priv->mgmt_msix_idx].vector, 509 gve_mgmnt_intr, 0, priv->mgmt_msix_name, priv); 510 if (err) { 511 dev_err(&priv->pdev->dev, "Did not receive management vector.\n"); 512 goto abort_with_msix_enabled; 513 } 514 priv->irq_db_indices = 515 dma_alloc_coherent(&priv->pdev->dev, 516 priv->num_ntfy_blks * 517 sizeof(*priv->irq_db_indices), 518 &priv->irq_db_indices_bus, GFP_KERNEL); 519 if (!priv->irq_db_indices) { 520 err = -ENOMEM; 521 goto abort_with_mgmt_vector; 522 } 523 524 priv->ntfy_blocks = kvzalloc(priv->num_ntfy_blks * 525 sizeof(*priv->ntfy_blocks), GFP_KERNEL); 526 if (!priv->ntfy_blocks) { 527 err = -ENOMEM; 528 goto abort_with_irq_db_indices; 529 } 530 531 /* Setup the other blocks - the first n-1 vectors */ 532 for (i = 0; i < priv->num_ntfy_blks; i++) { 533 struct gve_notify_block *block = &priv->ntfy_blocks[i]; 534 int msix_idx = i; 535 536 snprintf(block->name, sizeof(block->name), "gve-ntfy-blk%d@pci:%s", 537 i, pci_name(priv->pdev)); 538 block->priv = priv; 539 err = request_irq(priv->msix_vectors[msix_idx].vector, 540 gve_is_gqi(priv) ? gve_intr : gve_intr_dqo, 541 0, block->name, block); 542 if (err) { 543 dev_err(&priv->pdev->dev, 544 "Failed to receive msix vector %d\n", i); 545 goto abort_with_some_ntfy_blocks; 546 } 547 block->irq = priv->msix_vectors[msix_idx].vector; 548 irq_set_affinity_hint(priv->msix_vectors[msix_idx].vector, 549 get_cpu_mask(i % active_cpus)); 550 block->irq_db_index = &priv->irq_db_indices[i].index; 551 } 552 return 0; 553 abort_with_some_ntfy_blocks: 554 for (j = 0; j < i; j++) { 555 struct gve_notify_block *block = &priv->ntfy_blocks[j]; 556 int msix_idx = j; 557 558 irq_set_affinity_hint(priv->msix_vectors[msix_idx].vector, 559 NULL); 560 free_irq(priv->msix_vectors[msix_idx].vector, block); 561 block->irq = 0; 562 } 563 kvfree(priv->ntfy_blocks); 564 priv->ntfy_blocks = NULL; 565 abort_with_irq_db_indices: 566 dma_free_coherent(&priv->pdev->dev, priv->num_ntfy_blks * 567 sizeof(*priv->irq_db_indices), 568 priv->irq_db_indices, priv->irq_db_indices_bus); 569 priv->irq_db_indices = NULL; 570 abort_with_mgmt_vector: 571 free_irq(priv->msix_vectors[priv->mgmt_msix_idx].vector, priv); 572 abort_with_msix_enabled: 573 pci_disable_msix(priv->pdev); 574 abort_with_msix_vectors: 575 kvfree(priv->msix_vectors); 576 priv->msix_vectors = NULL; 577 return err; 578 } 579 580 static void gve_free_notify_blocks(struct gve_priv *priv) 581 { 582 int i; 583 584 if (!priv->msix_vectors) 585 return; 586 587 /* Free the irqs */ 588 for (i = 0; i < priv->num_ntfy_blks; i++) { 589 struct gve_notify_block *block = &priv->ntfy_blocks[i]; 590 int msix_idx = i; 591 592 irq_set_affinity_hint(priv->msix_vectors[msix_idx].vector, 593 NULL); 594 free_irq(priv->msix_vectors[msix_idx].vector, block); 595 block->irq = 0; 596 } 597 free_irq(priv->msix_vectors[priv->mgmt_msix_idx].vector, priv); 598 kvfree(priv->ntfy_blocks); 599 priv->ntfy_blocks = NULL; 600 dma_free_coherent(&priv->pdev->dev, priv->num_ntfy_blks * 601 sizeof(*priv->irq_db_indices), 602 priv->irq_db_indices, priv->irq_db_indices_bus); 603 priv->irq_db_indices = NULL; 604 pci_disable_msix(priv->pdev); 605 kvfree(priv->msix_vectors); 606 priv->msix_vectors = NULL; 607 } 608 609 static int gve_setup_device_resources(struct gve_priv *priv) 610 { 611 int err; 612 613 err = gve_alloc_flow_rule_caches(priv); 614 if (err) 615 return err; 616 err = gve_alloc_rss_config_cache(priv); 617 if (err) 618 goto abort_with_flow_rule_caches; 619 err = gve_alloc_counter_array(priv); 620 if (err) 621 goto abort_with_rss_config_cache; 622 err = gve_alloc_notify_blocks(priv); 623 if (err) 624 goto abort_with_counter; 625 err = gve_alloc_stats_report(priv); 626 if (err) 627 goto abort_with_ntfy_blocks; 628 err = gve_adminq_configure_device_resources(priv, 629 priv->counter_array_bus, 630 priv->num_event_counters, 631 priv->irq_db_indices_bus, 632 priv->num_ntfy_blks); 633 if (unlikely(err)) { 634 dev_err(&priv->pdev->dev, 635 "could not setup device_resources: err=%d\n", err); 636 err = -ENXIO; 637 goto abort_with_stats_report; 638 } 639 640 if (!gve_is_gqi(priv)) { 641 priv->ptype_lut_dqo = kvzalloc(sizeof(*priv->ptype_lut_dqo), 642 GFP_KERNEL); 643 if (!priv->ptype_lut_dqo) { 644 err = -ENOMEM; 645 goto abort_with_stats_report; 646 } 647 err = gve_adminq_get_ptype_map_dqo(priv, priv->ptype_lut_dqo); 648 if (err) { 649 dev_err(&priv->pdev->dev, 650 "Failed to get ptype map: err=%d\n", err); 651 goto abort_with_ptype_lut; 652 } 653 } 654 655 err = gve_init_rss_config(priv, priv->rx_cfg.num_queues); 656 if (err) { 657 dev_err(&priv->pdev->dev, "Failed to init RSS config"); 658 goto abort_with_ptype_lut; 659 } 660 661 err = gve_adminq_report_stats(priv, priv->stats_report_len, 662 priv->stats_report_bus, 663 GVE_STATS_REPORT_TIMER_PERIOD); 664 if (err) 665 dev_err(&priv->pdev->dev, 666 "Failed to report stats: err=%d\n", err); 667 gve_set_device_resources_ok(priv); 668 return 0; 669 670 abort_with_ptype_lut: 671 kvfree(priv->ptype_lut_dqo); 672 priv->ptype_lut_dqo = NULL; 673 abort_with_stats_report: 674 gve_free_stats_report(priv); 675 abort_with_ntfy_blocks: 676 gve_free_notify_blocks(priv); 677 abort_with_counter: 678 gve_free_counter_array(priv); 679 abort_with_rss_config_cache: 680 gve_free_rss_config_cache(priv); 681 abort_with_flow_rule_caches: 682 gve_free_flow_rule_caches(priv); 683 684 return err; 685 } 686 687 static void gve_trigger_reset(struct gve_priv *priv); 688 689 static void gve_teardown_device_resources(struct gve_priv *priv) 690 { 691 int err; 692 693 /* Tell device its resources are being freed */ 694 if (gve_get_device_resources_ok(priv)) { 695 err = gve_flow_rules_reset(priv); 696 if (err) { 697 dev_err(&priv->pdev->dev, 698 "Failed to reset flow rules: err=%d\n", err); 699 gve_trigger_reset(priv); 700 } 701 /* detach the stats report */ 702 err = gve_adminq_report_stats(priv, 0, 0x0, GVE_STATS_REPORT_TIMER_PERIOD); 703 if (err) { 704 dev_err(&priv->pdev->dev, 705 "Failed to detach stats report: err=%d\n", err); 706 gve_trigger_reset(priv); 707 } 708 err = gve_adminq_deconfigure_device_resources(priv); 709 if (err) { 710 dev_err(&priv->pdev->dev, 711 "Could not deconfigure device resources: err=%d\n", 712 err); 713 gve_trigger_reset(priv); 714 } 715 } 716 717 kvfree(priv->ptype_lut_dqo); 718 priv->ptype_lut_dqo = NULL; 719 720 gve_free_flow_rule_caches(priv); 721 gve_free_rss_config_cache(priv); 722 gve_free_counter_array(priv); 723 gve_free_notify_blocks(priv); 724 gve_free_stats_report(priv); 725 gve_clear_device_resources_ok(priv); 726 } 727 728 static int gve_unregister_qpl(struct gve_priv *priv, 729 struct gve_queue_page_list *qpl) 730 { 731 int err; 732 733 if (!qpl) 734 return 0; 735 736 err = gve_adminq_unregister_page_list(priv, qpl->id); 737 if (err) { 738 netif_err(priv, drv, priv->dev, 739 "Failed to unregister queue page list %d\n", 740 qpl->id); 741 return err; 742 } 743 744 priv->num_registered_pages -= qpl->num_entries; 745 return 0; 746 } 747 748 static int gve_register_qpl(struct gve_priv *priv, 749 struct gve_queue_page_list *qpl) 750 { 751 int pages; 752 int err; 753 754 if (!qpl) 755 return 0; 756 757 pages = qpl->num_entries; 758 759 if (pages + priv->num_registered_pages > priv->max_registered_pages) { 760 netif_err(priv, drv, priv->dev, 761 "Reached max number of registered pages %llu > %llu\n", 762 pages + priv->num_registered_pages, 763 priv->max_registered_pages); 764 return -EINVAL; 765 } 766 767 err = gve_adminq_register_page_list(priv, qpl); 768 if (err) { 769 netif_err(priv, drv, priv->dev, 770 "failed to register queue page list %d\n", 771 qpl->id); 772 return err; 773 } 774 775 priv->num_registered_pages += pages; 776 return 0; 777 } 778 779 static struct gve_queue_page_list *gve_tx_get_qpl(struct gve_priv *priv, int idx) 780 { 781 struct gve_tx_ring *tx = &priv->tx[idx]; 782 783 if (gve_is_gqi(priv)) 784 return tx->tx_fifo.qpl; 785 else 786 return tx->dqo.qpl; 787 } 788 789 static struct gve_queue_page_list *gve_rx_get_qpl(struct gve_priv *priv, int idx) 790 { 791 struct gve_rx_ring *rx = &priv->rx[idx]; 792 793 if (gve_is_gqi(priv)) 794 return rx->data.qpl; 795 else 796 return rx->dqo.qpl; 797 } 798 799 static int gve_register_qpls(struct gve_priv *priv) 800 { 801 int num_tx_qpls, num_rx_qpls; 802 int err; 803 int i; 804 805 num_tx_qpls = gve_num_tx_qpls(&priv->tx_cfg, gve_is_qpl(priv)); 806 num_rx_qpls = gve_num_rx_qpls(&priv->rx_cfg, gve_is_qpl(priv)); 807 808 for (i = 0; i < num_tx_qpls; i++) { 809 err = gve_register_qpl(priv, gve_tx_get_qpl(priv, i)); 810 if (err) 811 return err; 812 } 813 814 for (i = 0; i < num_rx_qpls; i++) { 815 err = gve_register_qpl(priv, gve_rx_get_qpl(priv, i)); 816 if (err) 817 return err; 818 } 819 820 return 0; 821 } 822 823 static int gve_unregister_qpls(struct gve_priv *priv) 824 { 825 int num_tx_qpls, num_rx_qpls; 826 int err; 827 int i; 828 829 num_tx_qpls = gve_num_tx_qpls(&priv->tx_cfg, gve_is_qpl(priv)); 830 num_rx_qpls = gve_num_rx_qpls(&priv->rx_cfg, gve_is_qpl(priv)); 831 832 for (i = 0; i < num_tx_qpls; i++) { 833 err = gve_unregister_qpl(priv, gve_tx_get_qpl(priv, i)); 834 /* This failure will trigger a reset - no need to clean */ 835 if (err) 836 return err; 837 } 838 839 for (i = 0; i < num_rx_qpls; i++) { 840 err = gve_unregister_qpl(priv, gve_rx_get_qpl(priv, i)); 841 /* This failure will trigger a reset - no need to clean */ 842 if (err) 843 return err; 844 } 845 return 0; 846 } 847 848 static int gve_create_rings(struct gve_priv *priv) 849 { 850 int num_tx_queues = gve_num_tx_queues(priv); 851 int err; 852 int i; 853 854 err = gve_adminq_create_tx_queues(priv, 0, num_tx_queues); 855 if (err) { 856 netif_err(priv, drv, priv->dev, "failed to create %d tx queues\n", 857 num_tx_queues); 858 /* This failure will trigger a reset - no need to clean 859 * up 860 */ 861 return err; 862 } 863 netif_dbg(priv, drv, priv->dev, "created %d tx queues\n", 864 num_tx_queues); 865 866 err = gve_adminq_create_rx_queues(priv, priv->rx_cfg.num_queues); 867 if (err) { 868 netif_err(priv, drv, priv->dev, "failed to create %d rx queues\n", 869 priv->rx_cfg.num_queues); 870 /* This failure will trigger a reset - no need to clean 871 * up 872 */ 873 return err; 874 } 875 netif_dbg(priv, drv, priv->dev, "created %d rx queues\n", 876 priv->rx_cfg.num_queues); 877 878 if (gve_is_gqi(priv)) { 879 /* Rx data ring has been prefilled with packet buffers at queue 880 * allocation time. 881 * 882 * Write the doorbell to provide descriptor slots and packet 883 * buffers to the NIC. 884 */ 885 for (i = 0; i < priv->rx_cfg.num_queues; i++) 886 gve_rx_write_doorbell(priv, &priv->rx[i]); 887 } else { 888 for (i = 0; i < priv->rx_cfg.num_queues; i++) { 889 /* Post buffers and ring doorbell. */ 890 gve_rx_post_buffers_dqo(&priv->rx[i]); 891 } 892 } 893 894 return 0; 895 } 896 897 static void init_xdp_sync_stats(struct gve_priv *priv) 898 { 899 int start_id = gve_xdp_tx_start_queue_id(priv); 900 int i; 901 902 /* Init stats */ 903 for (i = start_id; i < start_id + priv->tx_cfg.num_xdp_queues; i++) { 904 int ntfy_idx = gve_tx_idx_to_ntfy(priv, i); 905 906 u64_stats_init(&priv->tx[i].statss); 907 priv->tx[i].ntfy_id = ntfy_idx; 908 } 909 } 910 911 static void gve_init_sync_stats(struct gve_priv *priv) 912 { 913 int i; 914 915 for (i = 0; i < priv->tx_cfg.num_queues; i++) 916 u64_stats_init(&priv->tx[i].statss); 917 918 /* Init stats for XDP TX queues */ 919 init_xdp_sync_stats(priv); 920 921 for (i = 0; i < priv->rx_cfg.num_queues; i++) 922 u64_stats_init(&priv->rx[i].statss); 923 } 924 925 static void gve_tx_get_curr_alloc_cfg(struct gve_priv *priv, 926 struct gve_tx_alloc_rings_cfg *cfg) 927 { 928 cfg->qcfg = &priv->tx_cfg; 929 cfg->raw_addressing = !gve_is_qpl(priv); 930 cfg->ring_size = priv->tx_desc_cnt; 931 cfg->num_xdp_rings = cfg->qcfg->num_xdp_queues; 932 cfg->tx = priv->tx; 933 } 934 935 static void gve_tx_stop_rings(struct gve_priv *priv, int num_rings) 936 { 937 int i; 938 939 if (!priv->tx) 940 return; 941 942 for (i = 0; i < num_rings; i++) { 943 if (gve_is_gqi(priv)) 944 gve_tx_stop_ring_gqi(priv, i); 945 else 946 gve_tx_stop_ring_dqo(priv, i); 947 } 948 } 949 950 static void gve_tx_start_rings(struct gve_priv *priv, int num_rings) 951 { 952 int i; 953 954 for (i = 0; i < num_rings; i++) { 955 if (gve_is_gqi(priv)) 956 gve_tx_start_ring_gqi(priv, i); 957 else 958 gve_tx_start_ring_dqo(priv, i); 959 } 960 } 961 962 static int gve_queues_mem_alloc(struct gve_priv *priv, 963 struct gve_tx_alloc_rings_cfg *tx_alloc_cfg, 964 struct gve_rx_alloc_rings_cfg *rx_alloc_cfg) 965 { 966 int err; 967 968 if (gve_is_gqi(priv)) 969 err = gve_tx_alloc_rings_gqi(priv, tx_alloc_cfg); 970 else 971 err = gve_tx_alloc_rings_dqo(priv, tx_alloc_cfg); 972 if (err) 973 return err; 974 975 if (gve_is_gqi(priv)) 976 err = gve_rx_alloc_rings_gqi(priv, rx_alloc_cfg); 977 else 978 err = gve_rx_alloc_rings_dqo(priv, rx_alloc_cfg); 979 if (err) 980 goto free_tx; 981 982 return 0; 983 984 free_tx: 985 if (gve_is_gqi(priv)) 986 gve_tx_free_rings_gqi(priv, tx_alloc_cfg); 987 else 988 gve_tx_free_rings_dqo(priv, tx_alloc_cfg); 989 return err; 990 } 991 992 static int gve_destroy_rings(struct gve_priv *priv) 993 { 994 int num_tx_queues = gve_num_tx_queues(priv); 995 int err; 996 997 err = gve_adminq_destroy_tx_queues(priv, 0, num_tx_queues); 998 if (err) { 999 netif_err(priv, drv, priv->dev, 1000 "failed to destroy tx queues\n"); 1001 /* This failure will trigger a reset - no need to clean up */ 1002 return err; 1003 } 1004 netif_dbg(priv, drv, priv->dev, "destroyed tx queues\n"); 1005 err = gve_adminq_destroy_rx_queues(priv, priv->rx_cfg.num_queues); 1006 if (err) { 1007 netif_err(priv, drv, priv->dev, 1008 "failed to destroy rx queues\n"); 1009 /* This failure will trigger a reset - no need to clean up */ 1010 return err; 1011 } 1012 netif_dbg(priv, drv, priv->dev, "destroyed rx queues\n"); 1013 return 0; 1014 } 1015 1016 static void gve_queues_mem_free(struct gve_priv *priv, 1017 struct gve_tx_alloc_rings_cfg *tx_cfg, 1018 struct gve_rx_alloc_rings_cfg *rx_cfg) 1019 { 1020 if (gve_is_gqi(priv)) { 1021 gve_tx_free_rings_gqi(priv, tx_cfg); 1022 gve_rx_free_rings_gqi(priv, rx_cfg); 1023 } else { 1024 gve_tx_free_rings_dqo(priv, tx_cfg); 1025 gve_rx_free_rings_dqo(priv, rx_cfg); 1026 } 1027 } 1028 1029 int gve_alloc_page(struct gve_priv *priv, struct device *dev, 1030 struct page **page, dma_addr_t *dma, 1031 enum dma_data_direction dir, gfp_t gfp_flags) 1032 { 1033 *page = alloc_page(gfp_flags); 1034 if (!*page) { 1035 priv->page_alloc_fail++; 1036 return -ENOMEM; 1037 } 1038 *dma = dma_map_page(dev, *page, 0, PAGE_SIZE, dir); 1039 if (dma_mapping_error(dev, *dma)) { 1040 priv->dma_mapping_error++; 1041 put_page(*page); 1042 return -ENOMEM; 1043 } 1044 return 0; 1045 } 1046 1047 struct gve_queue_page_list *gve_alloc_queue_page_list(struct gve_priv *priv, 1048 u32 id, int pages) 1049 { 1050 struct gve_queue_page_list *qpl; 1051 int err; 1052 int i; 1053 1054 qpl = kvzalloc(sizeof(*qpl), GFP_KERNEL); 1055 if (!qpl) 1056 return NULL; 1057 1058 qpl->id = id; 1059 qpl->num_entries = 0; 1060 qpl->pages = kvcalloc(pages, sizeof(*qpl->pages), GFP_KERNEL); 1061 if (!qpl->pages) 1062 goto abort; 1063 1064 qpl->page_buses = kvcalloc(pages, sizeof(*qpl->page_buses), GFP_KERNEL); 1065 if (!qpl->page_buses) 1066 goto abort; 1067 1068 for (i = 0; i < pages; i++) { 1069 err = gve_alloc_page(priv, &priv->pdev->dev, &qpl->pages[i], 1070 &qpl->page_buses[i], 1071 gve_qpl_dma_dir(priv, id), GFP_KERNEL); 1072 if (err) 1073 goto abort; 1074 qpl->num_entries++; 1075 } 1076 1077 return qpl; 1078 1079 abort: 1080 gve_free_queue_page_list(priv, qpl, id); 1081 return NULL; 1082 } 1083 1084 void gve_free_page(struct device *dev, struct page *page, dma_addr_t dma, 1085 enum dma_data_direction dir) 1086 { 1087 if (!dma_mapping_error(dev, dma)) 1088 dma_unmap_page(dev, dma, PAGE_SIZE, dir); 1089 if (page) 1090 put_page(page); 1091 } 1092 1093 void gve_free_queue_page_list(struct gve_priv *priv, 1094 struct gve_queue_page_list *qpl, 1095 u32 id) 1096 { 1097 int i; 1098 1099 if (!qpl) 1100 return; 1101 if (!qpl->pages) 1102 goto free_qpl; 1103 if (!qpl->page_buses) 1104 goto free_pages; 1105 1106 for (i = 0; i < qpl->num_entries; i++) 1107 gve_free_page(&priv->pdev->dev, qpl->pages[i], 1108 qpl->page_buses[i], gve_qpl_dma_dir(priv, id)); 1109 1110 kvfree(qpl->page_buses); 1111 qpl->page_buses = NULL; 1112 free_pages: 1113 kvfree(qpl->pages); 1114 qpl->pages = NULL; 1115 free_qpl: 1116 kvfree(qpl); 1117 } 1118 1119 /* Use this to schedule a reset when the device is capable of continuing 1120 * to handle other requests in its current state. If it is not, do a reset 1121 * in thread instead. 1122 */ 1123 void gve_schedule_reset(struct gve_priv *priv) 1124 { 1125 gve_set_do_reset(priv); 1126 queue_work(priv->gve_wq, &priv->service_task); 1127 } 1128 1129 static void gve_reset_and_teardown(struct gve_priv *priv, bool was_up); 1130 static int gve_reset_recovery(struct gve_priv *priv, bool was_up); 1131 static void gve_turndown(struct gve_priv *priv); 1132 static void gve_turnup(struct gve_priv *priv); 1133 1134 static int gve_reg_xdp_info(struct gve_priv *priv, struct net_device *dev) 1135 { 1136 struct napi_struct *napi; 1137 struct gve_rx_ring *rx; 1138 int err = 0; 1139 int i, j; 1140 u32 tx_qid; 1141 1142 if (!priv->tx_cfg.num_xdp_queues) 1143 return 0; 1144 1145 for (i = 0; i < priv->rx_cfg.num_queues; i++) { 1146 rx = &priv->rx[i]; 1147 napi = &priv->ntfy_blocks[rx->ntfy_id].napi; 1148 1149 err = xdp_rxq_info_reg(&rx->xdp_rxq, dev, i, 1150 napi->napi_id); 1151 if (err) 1152 goto err; 1153 if (gve_is_qpl(priv)) 1154 err = xdp_rxq_info_reg_mem_model(&rx->xdp_rxq, 1155 MEM_TYPE_PAGE_SHARED, 1156 NULL); 1157 else 1158 err = xdp_rxq_info_reg_mem_model(&rx->xdp_rxq, 1159 MEM_TYPE_PAGE_POOL, 1160 rx->dqo.page_pool); 1161 if (err) 1162 goto err; 1163 rx->xsk_pool = xsk_get_pool_from_qid(dev, i); 1164 if (rx->xsk_pool) { 1165 err = xdp_rxq_info_reg(&rx->xsk_rxq, dev, i, 1166 napi->napi_id); 1167 if (err) 1168 goto err; 1169 err = xdp_rxq_info_reg_mem_model(&rx->xsk_rxq, 1170 MEM_TYPE_XSK_BUFF_POOL, NULL); 1171 if (err) 1172 goto err; 1173 xsk_pool_set_rxq_info(rx->xsk_pool, 1174 &rx->xsk_rxq); 1175 } 1176 } 1177 1178 for (i = 0; i < priv->tx_cfg.num_xdp_queues; i++) { 1179 tx_qid = gve_xdp_tx_queue_id(priv, i); 1180 priv->tx[tx_qid].xsk_pool = xsk_get_pool_from_qid(dev, i); 1181 } 1182 return 0; 1183 1184 err: 1185 for (j = i; j >= 0; j--) { 1186 rx = &priv->rx[j]; 1187 if (xdp_rxq_info_is_reg(&rx->xdp_rxq)) 1188 xdp_rxq_info_unreg(&rx->xdp_rxq); 1189 if (xdp_rxq_info_is_reg(&rx->xsk_rxq)) 1190 xdp_rxq_info_unreg(&rx->xsk_rxq); 1191 } 1192 return err; 1193 } 1194 1195 static void gve_unreg_xdp_info(struct gve_priv *priv) 1196 { 1197 int i, tx_qid; 1198 1199 if (!priv->tx_cfg.num_xdp_queues || !priv->rx || !priv->tx) 1200 return; 1201 1202 for (i = 0; i < priv->rx_cfg.num_queues; i++) { 1203 struct gve_rx_ring *rx = &priv->rx[i]; 1204 1205 xdp_rxq_info_unreg(&rx->xdp_rxq); 1206 if (rx->xsk_pool) { 1207 xdp_rxq_info_unreg(&rx->xsk_rxq); 1208 rx->xsk_pool = NULL; 1209 } 1210 } 1211 1212 for (i = 0; i < priv->tx_cfg.num_xdp_queues; i++) { 1213 tx_qid = gve_xdp_tx_queue_id(priv, i); 1214 priv->tx[tx_qid].xsk_pool = NULL; 1215 } 1216 } 1217 1218 static void gve_drain_page_cache(struct gve_priv *priv) 1219 { 1220 int i; 1221 1222 for (i = 0; i < priv->rx_cfg.num_queues; i++) 1223 page_frag_cache_drain(&priv->rx[i].page_cache); 1224 } 1225 1226 static void gve_rx_get_curr_alloc_cfg(struct gve_priv *priv, 1227 struct gve_rx_alloc_rings_cfg *cfg) 1228 { 1229 cfg->qcfg_rx = &priv->rx_cfg; 1230 cfg->qcfg_tx = &priv->tx_cfg; 1231 cfg->raw_addressing = !gve_is_qpl(priv); 1232 cfg->enable_header_split = priv->header_split_enabled; 1233 cfg->ring_size = priv->rx_desc_cnt; 1234 cfg->packet_buffer_size = priv->rx_cfg.packet_buffer_size; 1235 cfg->rx = priv->rx; 1236 cfg->xdp = !!cfg->qcfg_tx->num_xdp_queues; 1237 } 1238 1239 void gve_get_curr_alloc_cfgs(struct gve_priv *priv, 1240 struct gve_tx_alloc_rings_cfg *tx_alloc_cfg, 1241 struct gve_rx_alloc_rings_cfg *rx_alloc_cfg) 1242 { 1243 gve_tx_get_curr_alloc_cfg(priv, tx_alloc_cfg); 1244 gve_rx_get_curr_alloc_cfg(priv, rx_alloc_cfg); 1245 } 1246 1247 static void gve_rx_start_ring(struct gve_priv *priv, int i) 1248 { 1249 if (gve_is_gqi(priv)) 1250 gve_rx_start_ring_gqi(priv, i); 1251 else 1252 gve_rx_start_ring_dqo(priv, i); 1253 } 1254 1255 static void gve_rx_start_rings(struct gve_priv *priv, int num_rings) 1256 { 1257 int i; 1258 1259 for (i = 0; i < num_rings; i++) 1260 gve_rx_start_ring(priv, i); 1261 } 1262 1263 static void gve_rx_stop_ring(struct gve_priv *priv, int i) 1264 { 1265 if (gve_is_gqi(priv)) 1266 gve_rx_stop_ring_gqi(priv, i); 1267 else 1268 gve_rx_stop_ring_dqo(priv, i); 1269 } 1270 1271 static void gve_rx_stop_rings(struct gve_priv *priv, int num_rings) 1272 { 1273 int i; 1274 1275 if (!priv->rx) 1276 return; 1277 1278 for (i = 0; i < num_rings; i++) 1279 gve_rx_stop_ring(priv, i); 1280 } 1281 1282 static void gve_queues_mem_remove(struct gve_priv *priv) 1283 { 1284 struct gve_tx_alloc_rings_cfg tx_alloc_cfg = {0}; 1285 struct gve_rx_alloc_rings_cfg rx_alloc_cfg = {0}; 1286 1287 gve_get_curr_alloc_cfgs(priv, &tx_alloc_cfg, &rx_alloc_cfg); 1288 gve_queues_mem_free(priv, &tx_alloc_cfg, &rx_alloc_cfg); 1289 priv->tx = NULL; 1290 priv->rx = NULL; 1291 } 1292 1293 /* The passed-in queue memory is stored into priv and the queues are made live. 1294 * No memory is allocated. Passed-in memory is freed on errors. 1295 */ 1296 static int gve_queues_start(struct gve_priv *priv, 1297 struct gve_tx_alloc_rings_cfg *tx_alloc_cfg, 1298 struct gve_rx_alloc_rings_cfg *rx_alloc_cfg) 1299 { 1300 struct net_device *dev = priv->dev; 1301 int err; 1302 1303 /* Record new resources into priv */ 1304 priv->tx = tx_alloc_cfg->tx; 1305 priv->rx = rx_alloc_cfg->rx; 1306 1307 /* Record new configs into priv */ 1308 priv->tx_cfg = *tx_alloc_cfg->qcfg; 1309 priv->tx_cfg.num_xdp_queues = tx_alloc_cfg->num_xdp_rings; 1310 priv->rx_cfg = *rx_alloc_cfg->qcfg_rx; 1311 priv->tx_desc_cnt = tx_alloc_cfg->ring_size; 1312 priv->rx_desc_cnt = rx_alloc_cfg->ring_size; 1313 1314 gve_tx_start_rings(priv, gve_num_tx_queues(priv)); 1315 gve_rx_start_rings(priv, rx_alloc_cfg->qcfg_rx->num_queues); 1316 gve_init_sync_stats(priv); 1317 1318 err = netif_set_real_num_tx_queues(dev, priv->tx_cfg.num_queues); 1319 if (err) 1320 goto stop_and_free_rings; 1321 err = netif_set_real_num_rx_queues(dev, priv->rx_cfg.num_queues); 1322 if (err) 1323 goto stop_and_free_rings; 1324 1325 err = gve_reg_xdp_info(priv, dev); 1326 if (err) 1327 goto stop_and_free_rings; 1328 1329 if (rx_alloc_cfg->reset_rss) { 1330 err = gve_init_rss_config(priv, priv->rx_cfg.num_queues); 1331 if (err) 1332 goto reset; 1333 } 1334 1335 err = gve_register_qpls(priv); 1336 if (err) 1337 goto reset; 1338 1339 priv->header_split_enabled = rx_alloc_cfg->enable_header_split; 1340 priv->rx_cfg.packet_buffer_size = rx_alloc_cfg->packet_buffer_size; 1341 1342 err = gve_create_rings(priv); 1343 if (err) 1344 goto reset; 1345 1346 gve_set_device_rings_ok(priv); 1347 1348 if (gve_get_report_stats(priv)) 1349 mod_timer(&priv->stats_report_timer, 1350 round_jiffies(jiffies + 1351 msecs_to_jiffies(priv->stats_report_timer_period))); 1352 1353 gve_turnup(priv); 1354 queue_work(priv->gve_wq, &priv->service_task); 1355 priv->interface_up_cnt++; 1356 return 0; 1357 1358 reset: 1359 if (gve_get_reset_in_progress(priv)) 1360 goto stop_and_free_rings; 1361 gve_reset_and_teardown(priv, true); 1362 /* if this fails there is nothing we can do so just ignore the return */ 1363 gve_reset_recovery(priv, false); 1364 /* return the original error */ 1365 return err; 1366 stop_and_free_rings: 1367 gve_tx_stop_rings(priv, gve_num_tx_queues(priv)); 1368 gve_rx_stop_rings(priv, priv->rx_cfg.num_queues); 1369 gve_queues_mem_remove(priv); 1370 return err; 1371 } 1372 1373 static int gve_open(struct net_device *dev) 1374 { 1375 struct gve_tx_alloc_rings_cfg tx_alloc_cfg = {0}; 1376 struct gve_rx_alloc_rings_cfg rx_alloc_cfg = {0}; 1377 struct gve_priv *priv = netdev_priv(dev); 1378 int err; 1379 1380 gve_get_curr_alloc_cfgs(priv, &tx_alloc_cfg, &rx_alloc_cfg); 1381 1382 err = gve_queues_mem_alloc(priv, &tx_alloc_cfg, &rx_alloc_cfg); 1383 if (err) 1384 return err; 1385 1386 /* No need to free on error: ownership of resources is lost after 1387 * calling gve_queues_start. 1388 */ 1389 err = gve_queues_start(priv, &tx_alloc_cfg, &rx_alloc_cfg); 1390 if (err) 1391 return err; 1392 1393 return 0; 1394 } 1395 1396 static int gve_queues_stop(struct gve_priv *priv) 1397 { 1398 int err; 1399 1400 netif_carrier_off(priv->dev); 1401 if (gve_get_device_rings_ok(priv)) { 1402 gve_turndown(priv); 1403 gve_drain_page_cache(priv); 1404 err = gve_destroy_rings(priv); 1405 if (err) 1406 goto err; 1407 err = gve_unregister_qpls(priv); 1408 if (err) 1409 goto err; 1410 gve_clear_device_rings_ok(priv); 1411 } 1412 timer_delete_sync(&priv->stats_report_timer); 1413 1414 gve_unreg_xdp_info(priv); 1415 1416 gve_tx_stop_rings(priv, gve_num_tx_queues(priv)); 1417 gve_rx_stop_rings(priv, priv->rx_cfg.num_queues); 1418 1419 priv->interface_down_cnt++; 1420 return 0; 1421 1422 err: 1423 /* This must have been called from a reset due to the rtnl lock 1424 * so just return at this point. 1425 */ 1426 if (gve_get_reset_in_progress(priv)) 1427 return err; 1428 /* Otherwise reset before returning */ 1429 gve_reset_and_teardown(priv, true); 1430 return gve_reset_recovery(priv, false); 1431 } 1432 1433 static int gve_close(struct net_device *dev) 1434 { 1435 struct gve_priv *priv = netdev_priv(dev); 1436 int err; 1437 1438 err = gve_queues_stop(priv); 1439 if (err) 1440 return err; 1441 1442 gve_queues_mem_remove(priv); 1443 return 0; 1444 } 1445 1446 static void gve_handle_link_status(struct gve_priv *priv, bool link_status) 1447 { 1448 if (!gve_get_napi_enabled(priv)) 1449 return; 1450 1451 if (link_status == netif_carrier_ok(priv->dev)) 1452 return; 1453 1454 if (link_status) { 1455 netdev_info(priv->dev, "Device link is up.\n"); 1456 netif_carrier_on(priv->dev); 1457 } else { 1458 netdev_info(priv->dev, "Device link is down.\n"); 1459 netif_carrier_off(priv->dev); 1460 } 1461 } 1462 1463 static int gve_configure_rings_xdp(struct gve_priv *priv, 1464 u16 num_xdp_rings) 1465 { 1466 struct gve_tx_alloc_rings_cfg tx_alloc_cfg = {0}; 1467 struct gve_rx_alloc_rings_cfg rx_alloc_cfg = {0}; 1468 1469 gve_get_curr_alloc_cfgs(priv, &tx_alloc_cfg, &rx_alloc_cfg); 1470 tx_alloc_cfg.num_xdp_rings = num_xdp_rings; 1471 1472 rx_alloc_cfg.xdp = !!num_xdp_rings; 1473 return gve_adjust_config(priv, &tx_alloc_cfg, &rx_alloc_cfg); 1474 } 1475 1476 static int gve_set_xdp(struct gve_priv *priv, struct bpf_prog *prog, 1477 struct netlink_ext_ack *extack) 1478 { 1479 struct bpf_prog *old_prog; 1480 int err = 0; 1481 u32 status; 1482 1483 old_prog = READ_ONCE(priv->xdp_prog); 1484 if (!netif_running(priv->dev)) { 1485 WRITE_ONCE(priv->xdp_prog, prog); 1486 if (old_prog) 1487 bpf_prog_put(old_prog); 1488 1489 /* Update priv XDP queue configuration */ 1490 priv->tx_cfg.num_xdp_queues = priv->xdp_prog ? 1491 priv->rx_cfg.num_queues : 0; 1492 return 0; 1493 } 1494 1495 if (!old_prog && prog) 1496 err = gve_configure_rings_xdp(priv, priv->rx_cfg.num_queues); 1497 else if (old_prog && !prog) 1498 err = gve_configure_rings_xdp(priv, 0); 1499 1500 if (err) 1501 goto out; 1502 1503 WRITE_ONCE(priv->xdp_prog, prog); 1504 if (old_prog) 1505 bpf_prog_put(old_prog); 1506 1507 out: 1508 status = ioread32be(&priv->reg_bar0->device_status); 1509 gve_handle_link_status(priv, GVE_DEVICE_STATUS_LINK_STATUS_MASK & status); 1510 return err; 1511 } 1512 1513 static int gve_xsk_pool_enable(struct net_device *dev, 1514 struct xsk_buff_pool *pool, 1515 u16 qid) 1516 { 1517 struct gve_priv *priv = netdev_priv(dev); 1518 struct napi_struct *napi; 1519 struct gve_rx_ring *rx; 1520 int tx_qid; 1521 int err; 1522 1523 if (qid >= priv->rx_cfg.num_queues) { 1524 dev_err(&priv->pdev->dev, "xsk pool invalid qid %d", qid); 1525 return -EINVAL; 1526 } 1527 if (xsk_pool_get_rx_frame_size(pool) < 1528 priv->dev->max_mtu + sizeof(struct ethhdr)) { 1529 dev_err(&priv->pdev->dev, "xsk pool frame_len too small"); 1530 return -EINVAL; 1531 } 1532 1533 err = xsk_pool_dma_map(pool, &priv->pdev->dev, 1534 DMA_ATTR_SKIP_CPU_SYNC | DMA_ATTR_WEAK_ORDERING); 1535 if (err) 1536 return err; 1537 1538 /* If XDP prog is not installed or interface is down, return. */ 1539 if (!priv->xdp_prog || !netif_running(dev)) 1540 return 0; 1541 1542 rx = &priv->rx[qid]; 1543 napi = &priv->ntfy_blocks[rx->ntfy_id].napi; 1544 err = xdp_rxq_info_reg(&rx->xsk_rxq, dev, qid, napi->napi_id); 1545 if (err) 1546 goto err; 1547 1548 err = xdp_rxq_info_reg_mem_model(&rx->xsk_rxq, 1549 MEM_TYPE_XSK_BUFF_POOL, NULL); 1550 if (err) 1551 goto err; 1552 1553 xsk_pool_set_rxq_info(pool, &rx->xsk_rxq); 1554 rx->xsk_pool = pool; 1555 1556 tx_qid = gve_xdp_tx_queue_id(priv, qid); 1557 priv->tx[tx_qid].xsk_pool = pool; 1558 1559 return 0; 1560 err: 1561 if (xdp_rxq_info_is_reg(&rx->xsk_rxq)) 1562 xdp_rxq_info_unreg(&rx->xsk_rxq); 1563 1564 xsk_pool_dma_unmap(pool, 1565 DMA_ATTR_SKIP_CPU_SYNC | DMA_ATTR_WEAK_ORDERING); 1566 return err; 1567 } 1568 1569 static int gve_xsk_pool_disable(struct net_device *dev, 1570 u16 qid) 1571 { 1572 struct gve_priv *priv = netdev_priv(dev); 1573 struct napi_struct *napi_rx; 1574 struct napi_struct *napi_tx; 1575 struct xsk_buff_pool *pool; 1576 int tx_qid; 1577 1578 pool = xsk_get_pool_from_qid(dev, qid); 1579 if (!pool) 1580 return -EINVAL; 1581 if (qid >= priv->rx_cfg.num_queues) 1582 return -EINVAL; 1583 1584 /* If XDP prog is not installed or interface is down, unmap DMA and 1585 * return. 1586 */ 1587 if (!priv->xdp_prog || !netif_running(dev)) 1588 goto done; 1589 1590 napi_rx = &priv->ntfy_blocks[priv->rx[qid].ntfy_id].napi; 1591 napi_disable(napi_rx); /* make sure current rx poll is done */ 1592 1593 tx_qid = gve_xdp_tx_queue_id(priv, qid); 1594 napi_tx = &priv->ntfy_blocks[priv->tx[tx_qid].ntfy_id].napi; 1595 napi_disable(napi_tx); /* make sure current tx poll is done */ 1596 1597 priv->rx[qid].xsk_pool = NULL; 1598 xdp_rxq_info_unreg(&priv->rx[qid].xsk_rxq); 1599 priv->tx[tx_qid].xsk_pool = NULL; 1600 smp_mb(); /* Make sure it is visible to the workers on datapath */ 1601 1602 napi_enable(napi_rx); 1603 if (gve_rx_work_pending(&priv->rx[qid])) 1604 napi_schedule(napi_rx); 1605 1606 napi_enable(napi_tx); 1607 if (gve_tx_clean_pending(priv, &priv->tx[tx_qid])) 1608 napi_schedule(napi_tx); 1609 1610 done: 1611 xsk_pool_dma_unmap(pool, 1612 DMA_ATTR_SKIP_CPU_SYNC | DMA_ATTR_WEAK_ORDERING); 1613 return 0; 1614 } 1615 1616 static int gve_xsk_wakeup(struct net_device *dev, u32 queue_id, u32 flags) 1617 { 1618 struct gve_priv *priv = netdev_priv(dev); 1619 struct napi_struct *napi; 1620 1621 if (!gve_get_napi_enabled(priv)) 1622 return -ENETDOWN; 1623 1624 if (queue_id >= priv->rx_cfg.num_queues || !priv->xdp_prog) 1625 return -EINVAL; 1626 1627 napi = &priv->ntfy_blocks[gve_rx_idx_to_ntfy(priv, queue_id)].napi; 1628 if (!napi_if_scheduled_mark_missed(napi)) { 1629 /* Call local_bh_enable to trigger SoftIRQ processing */ 1630 local_bh_disable(); 1631 napi_schedule(napi); 1632 local_bh_enable(); 1633 } 1634 1635 return 0; 1636 } 1637 1638 static int verify_xdp_configuration(struct net_device *dev) 1639 { 1640 struct gve_priv *priv = netdev_priv(dev); 1641 u16 max_xdp_mtu; 1642 1643 if (dev->features & NETIF_F_LRO) { 1644 netdev_warn(dev, "XDP is not supported when LRO is on.\n"); 1645 return -EOPNOTSUPP; 1646 } 1647 1648 if (priv->queue_format != GVE_GQI_QPL_FORMAT) { 1649 netdev_warn(dev, "XDP is not supported in mode %d.\n", 1650 priv->queue_format); 1651 return -EOPNOTSUPP; 1652 } 1653 1654 max_xdp_mtu = priv->rx_cfg.packet_buffer_size - sizeof(struct ethhdr); 1655 if (priv->queue_format == GVE_GQI_QPL_FORMAT) 1656 max_xdp_mtu -= GVE_RX_PAD; 1657 1658 if (dev->mtu > max_xdp_mtu) { 1659 netdev_warn(dev, "XDP is not supported for mtu %d.\n", 1660 dev->mtu); 1661 return -EOPNOTSUPP; 1662 } 1663 1664 if (priv->rx_cfg.num_queues != priv->tx_cfg.num_queues || 1665 (2 * priv->tx_cfg.num_queues > priv->tx_cfg.max_queues)) { 1666 netdev_warn(dev, "XDP load failed: The number of configured RX queues %d should be equal to the number of configured TX queues %d and the number of configured RX/TX queues should be less than or equal to half the maximum number of RX/TX queues %d", 1667 priv->rx_cfg.num_queues, 1668 priv->tx_cfg.num_queues, 1669 priv->tx_cfg.max_queues); 1670 return -EINVAL; 1671 } 1672 return 0; 1673 } 1674 1675 static int gve_xdp(struct net_device *dev, struct netdev_bpf *xdp) 1676 { 1677 struct gve_priv *priv = netdev_priv(dev); 1678 int err; 1679 1680 err = verify_xdp_configuration(dev); 1681 if (err) 1682 return err; 1683 switch (xdp->command) { 1684 case XDP_SETUP_PROG: 1685 return gve_set_xdp(priv, xdp->prog, xdp->extack); 1686 case XDP_SETUP_XSK_POOL: 1687 if (xdp->xsk.pool) 1688 return gve_xsk_pool_enable(dev, xdp->xsk.pool, xdp->xsk.queue_id); 1689 else 1690 return gve_xsk_pool_disable(dev, xdp->xsk.queue_id); 1691 default: 1692 return -EINVAL; 1693 } 1694 } 1695 1696 int gve_init_rss_config(struct gve_priv *priv, u16 num_queues) 1697 { 1698 struct gve_rss_config *rss_config = &priv->rss_config; 1699 struct ethtool_rxfh_param rxfh = {0}; 1700 u16 i; 1701 1702 if (!priv->cache_rss_config) 1703 return 0; 1704 1705 for (i = 0; i < priv->rss_lut_size; i++) 1706 rss_config->hash_lut[i] = 1707 ethtool_rxfh_indir_default(i, num_queues); 1708 1709 netdev_rss_key_fill(rss_config->hash_key, priv->rss_key_size); 1710 1711 rxfh.hfunc = ETH_RSS_HASH_TOP; 1712 1713 return gve_adminq_configure_rss(priv, &rxfh); 1714 } 1715 1716 int gve_flow_rules_reset(struct gve_priv *priv) 1717 { 1718 if (!priv->max_flow_rules) 1719 return 0; 1720 1721 return gve_adminq_reset_flow_rules(priv); 1722 } 1723 1724 int gve_adjust_config(struct gve_priv *priv, 1725 struct gve_tx_alloc_rings_cfg *tx_alloc_cfg, 1726 struct gve_rx_alloc_rings_cfg *rx_alloc_cfg) 1727 { 1728 int err; 1729 1730 /* Allocate resources for the new confiugration */ 1731 err = gve_queues_mem_alloc(priv, tx_alloc_cfg, rx_alloc_cfg); 1732 if (err) { 1733 netif_err(priv, drv, priv->dev, 1734 "Adjust config failed to alloc new queues"); 1735 return err; 1736 } 1737 1738 /* Teardown the device and free existing resources */ 1739 err = gve_close(priv->dev); 1740 if (err) { 1741 netif_err(priv, drv, priv->dev, 1742 "Adjust config failed to close old queues"); 1743 gve_queues_mem_free(priv, tx_alloc_cfg, rx_alloc_cfg); 1744 return err; 1745 } 1746 1747 /* Bring the device back up again with the new resources. */ 1748 err = gve_queues_start(priv, tx_alloc_cfg, rx_alloc_cfg); 1749 if (err) { 1750 netif_err(priv, drv, priv->dev, 1751 "Adjust config failed to start new queues, !!! DISABLING ALL QUEUES !!!\n"); 1752 /* No need to free on error: ownership of resources is lost after 1753 * calling gve_queues_start. 1754 */ 1755 gve_turndown(priv); 1756 return err; 1757 } 1758 1759 return 0; 1760 } 1761 1762 int gve_adjust_queues(struct gve_priv *priv, 1763 struct gve_rx_queue_config new_rx_config, 1764 struct gve_tx_queue_config new_tx_config, 1765 bool reset_rss) 1766 { 1767 struct gve_tx_alloc_rings_cfg tx_alloc_cfg = {0}; 1768 struct gve_rx_alloc_rings_cfg rx_alloc_cfg = {0}; 1769 int err; 1770 1771 gve_get_curr_alloc_cfgs(priv, &tx_alloc_cfg, &rx_alloc_cfg); 1772 1773 /* Relay the new config from ethtool */ 1774 tx_alloc_cfg.qcfg = &new_tx_config; 1775 rx_alloc_cfg.qcfg_tx = &new_tx_config; 1776 rx_alloc_cfg.qcfg_rx = &new_rx_config; 1777 rx_alloc_cfg.reset_rss = reset_rss; 1778 1779 if (netif_running(priv->dev)) { 1780 err = gve_adjust_config(priv, &tx_alloc_cfg, &rx_alloc_cfg); 1781 return err; 1782 } 1783 /* Set the config for the next up. */ 1784 if (reset_rss) { 1785 err = gve_init_rss_config(priv, new_rx_config.num_queues); 1786 if (err) 1787 return err; 1788 } 1789 priv->tx_cfg = new_tx_config; 1790 priv->rx_cfg = new_rx_config; 1791 1792 return 0; 1793 } 1794 1795 static void gve_turndown(struct gve_priv *priv) 1796 { 1797 int idx; 1798 1799 if (netif_carrier_ok(priv->dev)) 1800 netif_carrier_off(priv->dev); 1801 1802 if (!gve_get_napi_enabled(priv)) 1803 return; 1804 1805 /* Disable napi to prevent more work from coming in */ 1806 for (idx = 0; idx < gve_num_tx_queues(priv); idx++) { 1807 int ntfy_idx = gve_tx_idx_to_ntfy(priv, idx); 1808 struct gve_notify_block *block = &priv->ntfy_blocks[ntfy_idx]; 1809 1810 if (!gve_tx_was_added_to_block(priv, idx)) 1811 continue; 1812 1813 if (idx < priv->tx_cfg.num_queues) 1814 netif_queue_set_napi(priv->dev, idx, 1815 NETDEV_QUEUE_TYPE_TX, NULL); 1816 1817 napi_disable_locked(&block->napi); 1818 } 1819 for (idx = 0; idx < priv->rx_cfg.num_queues; idx++) { 1820 int ntfy_idx = gve_rx_idx_to_ntfy(priv, idx); 1821 struct gve_notify_block *block = &priv->ntfy_blocks[ntfy_idx]; 1822 1823 if (!gve_rx_was_added_to_block(priv, idx)) 1824 continue; 1825 1826 netif_queue_set_napi(priv->dev, idx, NETDEV_QUEUE_TYPE_RX, 1827 NULL); 1828 napi_disable_locked(&block->napi); 1829 } 1830 1831 /* Stop tx queues */ 1832 netif_tx_disable(priv->dev); 1833 1834 xdp_features_clear_redirect_target_locked(priv->dev); 1835 1836 gve_clear_napi_enabled(priv); 1837 gve_clear_report_stats(priv); 1838 1839 /* Make sure that all traffic is finished processing. */ 1840 synchronize_net(); 1841 } 1842 1843 static void gve_turnup(struct gve_priv *priv) 1844 { 1845 int idx; 1846 1847 /* Start the tx queues */ 1848 netif_tx_start_all_queues(priv->dev); 1849 1850 /* Enable napi and unmask interrupts for all queues */ 1851 for (idx = 0; idx < gve_num_tx_queues(priv); idx++) { 1852 int ntfy_idx = gve_tx_idx_to_ntfy(priv, idx); 1853 struct gve_notify_block *block = &priv->ntfy_blocks[ntfy_idx]; 1854 1855 if (!gve_tx_was_added_to_block(priv, idx)) 1856 continue; 1857 1858 napi_enable_locked(&block->napi); 1859 1860 if (idx < priv->tx_cfg.num_queues) 1861 netif_queue_set_napi(priv->dev, idx, 1862 NETDEV_QUEUE_TYPE_TX, 1863 &block->napi); 1864 1865 if (gve_is_gqi(priv)) { 1866 iowrite32be(0, gve_irq_doorbell(priv, block)); 1867 } else { 1868 gve_set_itr_coalesce_usecs_dqo(priv, block, 1869 priv->tx_coalesce_usecs); 1870 } 1871 1872 /* Any descs written by the NIC before this barrier will be 1873 * handled by the one-off napi schedule below. Whereas any 1874 * descs after the barrier will generate interrupts. 1875 */ 1876 mb(); 1877 napi_schedule(&block->napi); 1878 } 1879 for (idx = 0; idx < priv->rx_cfg.num_queues; idx++) { 1880 int ntfy_idx = gve_rx_idx_to_ntfy(priv, idx); 1881 struct gve_notify_block *block = &priv->ntfy_blocks[ntfy_idx]; 1882 1883 if (!gve_rx_was_added_to_block(priv, idx)) 1884 continue; 1885 1886 napi_enable_locked(&block->napi); 1887 netif_queue_set_napi(priv->dev, idx, NETDEV_QUEUE_TYPE_RX, 1888 &block->napi); 1889 1890 if (gve_is_gqi(priv)) { 1891 iowrite32be(0, gve_irq_doorbell(priv, block)); 1892 } else { 1893 gve_set_itr_coalesce_usecs_dqo(priv, block, 1894 priv->rx_coalesce_usecs); 1895 } 1896 1897 /* Any descs written by the NIC before this barrier will be 1898 * handled by the one-off napi schedule below. Whereas any 1899 * descs after the barrier will generate interrupts. 1900 */ 1901 mb(); 1902 napi_schedule(&block->napi); 1903 } 1904 1905 if (priv->tx_cfg.num_xdp_queues && gve_supports_xdp_xmit(priv)) 1906 xdp_features_set_redirect_target_locked(priv->dev, false); 1907 1908 gve_set_napi_enabled(priv); 1909 } 1910 1911 static void gve_turnup_and_check_status(struct gve_priv *priv) 1912 { 1913 u32 status; 1914 1915 gve_turnup(priv); 1916 status = ioread32be(&priv->reg_bar0->device_status); 1917 gve_handle_link_status(priv, GVE_DEVICE_STATUS_LINK_STATUS_MASK & status); 1918 } 1919 1920 static struct gve_notify_block *gve_get_tx_notify_block(struct gve_priv *priv, 1921 unsigned int txqueue) 1922 { 1923 u32 ntfy_idx; 1924 1925 if (txqueue > priv->tx_cfg.num_queues) 1926 return NULL; 1927 1928 ntfy_idx = gve_tx_idx_to_ntfy(priv, txqueue); 1929 if (ntfy_idx >= priv->num_ntfy_blks) 1930 return NULL; 1931 1932 return &priv->ntfy_blocks[ntfy_idx]; 1933 } 1934 1935 static bool gve_tx_timeout_try_q_kick(struct gve_priv *priv, 1936 unsigned int txqueue) 1937 { 1938 struct gve_notify_block *block; 1939 u32 current_time; 1940 1941 block = gve_get_tx_notify_block(priv, txqueue); 1942 1943 if (!block) 1944 return false; 1945 1946 current_time = jiffies_to_msecs(jiffies); 1947 if (block->tx->last_kick_msec + MIN_TX_TIMEOUT_GAP > current_time) 1948 return false; 1949 1950 netdev_info(priv->dev, "Kicking queue %d", txqueue); 1951 napi_schedule(&block->napi); 1952 block->tx->last_kick_msec = current_time; 1953 return true; 1954 } 1955 1956 static void gve_tx_timeout(struct net_device *dev, unsigned int txqueue) 1957 { 1958 struct gve_notify_block *block; 1959 struct gve_priv *priv; 1960 1961 netdev_info(dev, "Timeout on tx queue, %d", txqueue); 1962 priv = netdev_priv(dev); 1963 1964 if (!gve_tx_timeout_try_q_kick(priv, txqueue)) 1965 gve_schedule_reset(priv); 1966 1967 block = gve_get_tx_notify_block(priv, txqueue); 1968 if (block) 1969 block->tx->queue_timeout++; 1970 priv->tx_timeo_cnt++; 1971 } 1972 1973 u16 gve_get_pkt_buf_size(const struct gve_priv *priv, bool enable_hsplit) 1974 { 1975 if (enable_hsplit && priv->max_rx_buffer_size >= GVE_MAX_RX_BUFFER_SIZE) 1976 return GVE_MAX_RX_BUFFER_SIZE; 1977 else 1978 return GVE_DEFAULT_RX_BUFFER_SIZE; 1979 } 1980 1981 /* header-split is not supported on non-DQO_RDA yet even if device advertises it */ 1982 bool gve_header_split_supported(const struct gve_priv *priv) 1983 { 1984 return priv->header_buf_size && priv->queue_format == GVE_DQO_RDA_FORMAT; 1985 } 1986 1987 int gve_set_hsplit_config(struct gve_priv *priv, u8 tcp_data_split) 1988 { 1989 struct gve_tx_alloc_rings_cfg tx_alloc_cfg = {0}; 1990 struct gve_rx_alloc_rings_cfg rx_alloc_cfg = {0}; 1991 bool enable_hdr_split; 1992 int err = 0; 1993 1994 if (tcp_data_split == ETHTOOL_TCP_DATA_SPLIT_UNKNOWN) 1995 return 0; 1996 1997 if (!gve_header_split_supported(priv)) { 1998 dev_err(&priv->pdev->dev, "Header-split not supported\n"); 1999 return -EOPNOTSUPP; 2000 } 2001 2002 if (tcp_data_split == ETHTOOL_TCP_DATA_SPLIT_ENABLED) 2003 enable_hdr_split = true; 2004 else 2005 enable_hdr_split = false; 2006 2007 if (enable_hdr_split == priv->header_split_enabled) 2008 return 0; 2009 2010 gve_get_curr_alloc_cfgs(priv, &tx_alloc_cfg, &rx_alloc_cfg); 2011 2012 rx_alloc_cfg.enable_header_split = enable_hdr_split; 2013 rx_alloc_cfg.packet_buffer_size = gve_get_pkt_buf_size(priv, enable_hdr_split); 2014 2015 if (netif_running(priv->dev)) 2016 err = gve_adjust_config(priv, &tx_alloc_cfg, &rx_alloc_cfg); 2017 return err; 2018 } 2019 2020 static int gve_set_features(struct net_device *netdev, 2021 netdev_features_t features) 2022 { 2023 const netdev_features_t orig_features = netdev->features; 2024 struct gve_tx_alloc_rings_cfg tx_alloc_cfg = {0}; 2025 struct gve_rx_alloc_rings_cfg rx_alloc_cfg = {0}; 2026 struct gve_priv *priv = netdev_priv(netdev); 2027 int err; 2028 2029 gve_get_curr_alloc_cfgs(priv, &tx_alloc_cfg, &rx_alloc_cfg); 2030 2031 if ((netdev->features & NETIF_F_LRO) != (features & NETIF_F_LRO)) { 2032 netdev->features ^= NETIF_F_LRO; 2033 if (netif_running(netdev)) { 2034 err = gve_adjust_config(priv, &tx_alloc_cfg, &rx_alloc_cfg); 2035 if (err) 2036 goto revert_features; 2037 } 2038 } 2039 if ((netdev->features & NETIF_F_NTUPLE) && !(features & NETIF_F_NTUPLE)) { 2040 err = gve_flow_rules_reset(priv); 2041 if (err) 2042 goto revert_features; 2043 } 2044 2045 return 0; 2046 2047 revert_features: 2048 netdev->features = orig_features; 2049 return err; 2050 } 2051 2052 static const struct net_device_ops gve_netdev_ops = { 2053 .ndo_start_xmit = gve_start_xmit, 2054 .ndo_features_check = gve_features_check, 2055 .ndo_open = gve_open, 2056 .ndo_stop = gve_close, 2057 .ndo_get_stats64 = gve_get_stats, 2058 .ndo_tx_timeout = gve_tx_timeout, 2059 .ndo_set_features = gve_set_features, 2060 .ndo_bpf = gve_xdp, 2061 .ndo_xdp_xmit = gve_xdp_xmit, 2062 .ndo_xsk_wakeup = gve_xsk_wakeup, 2063 }; 2064 2065 static void gve_handle_status(struct gve_priv *priv, u32 status) 2066 { 2067 if (GVE_DEVICE_STATUS_RESET_MASK & status) { 2068 dev_info(&priv->pdev->dev, "Device requested reset.\n"); 2069 gve_set_do_reset(priv); 2070 } 2071 if (GVE_DEVICE_STATUS_REPORT_STATS_MASK & status) { 2072 priv->stats_report_trigger_cnt++; 2073 gve_set_do_report_stats(priv); 2074 } 2075 } 2076 2077 static void gve_handle_reset(struct gve_priv *priv) 2078 { 2079 /* A service task will be scheduled at the end of probe to catch any 2080 * resets that need to happen, and we don't want to reset until 2081 * probe is done. 2082 */ 2083 if (gve_get_probe_in_progress(priv)) 2084 return; 2085 2086 if (gve_get_do_reset(priv)) { 2087 rtnl_lock(); 2088 netdev_lock(priv->dev); 2089 gve_reset(priv, false); 2090 netdev_unlock(priv->dev); 2091 rtnl_unlock(); 2092 } 2093 } 2094 2095 void gve_handle_report_stats(struct gve_priv *priv) 2096 { 2097 struct stats *stats = priv->stats_report->stats; 2098 int idx, stats_idx = 0; 2099 unsigned int start = 0; 2100 u64 tx_bytes; 2101 2102 if (!gve_get_report_stats(priv)) 2103 return; 2104 2105 be64_add_cpu(&priv->stats_report->written_count, 1); 2106 /* tx stats */ 2107 if (priv->tx) { 2108 for (idx = 0; idx < gve_num_tx_queues(priv); idx++) { 2109 u32 last_completion = 0; 2110 u32 tx_frames = 0; 2111 2112 /* DQO doesn't currently support these metrics. */ 2113 if (gve_is_gqi(priv)) { 2114 last_completion = priv->tx[idx].done; 2115 tx_frames = priv->tx[idx].req; 2116 } 2117 2118 do { 2119 start = u64_stats_fetch_begin(&priv->tx[idx].statss); 2120 tx_bytes = priv->tx[idx].bytes_done; 2121 } while (u64_stats_fetch_retry(&priv->tx[idx].statss, start)); 2122 stats[stats_idx++] = (struct stats) { 2123 .stat_name = cpu_to_be32(TX_WAKE_CNT), 2124 .value = cpu_to_be64(priv->tx[idx].wake_queue), 2125 .queue_id = cpu_to_be32(idx), 2126 }; 2127 stats[stats_idx++] = (struct stats) { 2128 .stat_name = cpu_to_be32(TX_STOP_CNT), 2129 .value = cpu_to_be64(priv->tx[idx].stop_queue), 2130 .queue_id = cpu_to_be32(idx), 2131 }; 2132 stats[stats_idx++] = (struct stats) { 2133 .stat_name = cpu_to_be32(TX_FRAMES_SENT), 2134 .value = cpu_to_be64(tx_frames), 2135 .queue_id = cpu_to_be32(idx), 2136 }; 2137 stats[stats_idx++] = (struct stats) { 2138 .stat_name = cpu_to_be32(TX_BYTES_SENT), 2139 .value = cpu_to_be64(tx_bytes), 2140 .queue_id = cpu_to_be32(idx), 2141 }; 2142 stats[stats_idx++] = (struct stats) { 2143 .stat_name = cpu_to_be32(TX_LAST_COMPLETION_PROCESSED), 2144 .value = cpu_to_be64(last_completion), 2145 .queue_id = cpu_to_be32(idx), 2146 }; 2147 stats[stats_idx++] = (struct stats) { 2148 .stat_name = cpu_to_be32(TX_TIMEOUT_CNT), 2149 .value = cpu_to_be64(priv->tx[idx].queue_timeout), 2150 .queue_id = cpu_to_be32(idx), 2151 }; 2152 } 2153 } 2154 /* rx stats */ 2155 if (priv->rx) { 2156 for (idx = 0; idx < priv->rx_cfg.num_queues; idx++) { 2157 stats[stats_idx++] = (struct stats) { 2158 .stat_name = cpu_to_be32(RX_NEXT_EXPECTED_SEQUENCE), 2159 .value = cpu_to_be64(priv->rx[idx].desc.seqno), 2160 .queue_id = cpu_to_be32(idx), 2161 }; 2162 stats[stats_idx++] = (struct stats) { 2163 .stat_name = cpu_to_be32(RX_BUFFERS_POSTED), 2164 .value = cpu_to_be64(priv->rx[idx].fill_cnt), 2165 .queue_id = cpu_to_be32(idx), 2166 }; 2167 } 2168 } 2169 } 2170 2171 /* Handle NIC status register changes, reset requests and report stats */ 2172 static void gve_service_task(struct work_struct *work) 2173 { 2174 struct gve_priv *priv = container_of(work, struct gve_priv, 2175 service_task); 2176 u32 status = ioread32be(&priv->reg_bar0->device_status); 2177 2178 gve_handle_status(priv, status); 2179 2180 gve_handle_reset(priv); 2181 gve_handle_link_status(priv, GVE_DEVICE_STATUS_LINK_STATUS_MASK & status); 2182 } 2183 2184 static void gve_set_netdev_xdp_features(struct gve_priv *priv) 2185 { 2186 xdp_features_t xdp_features; 2187 2188 if (priv->queue_format == GVE_GQI_QPL_FORMAT) { 2189 xdp_features = NETDEV_XDP_ACT_BASIC; 2190 xdp_features |= NETDEV_XDP_ACT_REDIRECT; 2191 xdp_features |= NETDEV_XDP_ACT_XSK_ZEROCOPY; 2192 } else { 2193 xdp_features = 0; 2194 } 2195 2196 xdp_set_features_flag_locked(priv->dev, xdp_features); 2197 } 2198 2199 static int gve_init_priv(struct gve_priv *priv, bool skip_describe_device) 2200 { 2201 int num_ntfy; 2202 int err; 2203 2204 /* Set up the adminq */ 2205 err = gve_adminq_alloc(&priv->pdev->dev, priv); 2206 if (err) { 2207 dev_err(&priv->pdev->dev, 2208 "Failed to alloc admin queue: err=%d\n", err); 2209 return err; 2210 } 2211 2212 err = gve_verify_driver_compatibility(priv); 2213 if (err) { 2214 dev_err(&priv->pdev->dev, 2215 "Could not verify driver compatibility: err=%d\n", err); 2216 goto err; 2217 } 2218 2219 priv->num_registered_pages = 0; 2220 2221 if (skip_describe_device) 2222 goto setup_device; 2223 2224 priv->queue_format = GVE_QUEUE_FORMAT_UNSPECIFIED; 2225 /* Get the initial information we need from the device */ 2226 err = gve_adminq_describe_device(priv); 2227 if (err) { 2228 dev_err(&priv->pdev->dev, 2229 "Could not get device information: err=%d\n", err); 2230 goto err; 2231 } 2232 priv->dev->mtu = priv->dev->max_mtu; 2233 num_ntfy = pci_msix_vec_count(priv->pdev); 2234 if (num_ntfy <= 0) { 2235 dev_err(&priv->pdev->dev, 2236 "could not count MSI-x vectors: err=%d\n", num_ntfy); 2237 err = num_ntfy; 2238 goto err; 2239 } else if (num_ntfy < GVE_MIN_MSIX) { 2240 dev_err(&priv->pdev->dev, "gve needs at least %d MSI-x vectors, but only has %d\n", 2241 GVE_MIN_MSIX, num_ntfy); 2242 err = -EINVAL; 2243 goto err; 2244 } 2245 2246 /* Big TCP is only supported on DQ*/ 2247 if (!gve_is_gqi(priv)) 2248 netif_set_tso_max_size(priv->dev, GVE_DQO_TX_MAX); 2249 2250 priv->rx_copybreak = GVE_DEFAULT_RX_COPYBREAK; 2251 /* gvnic has one Notification Block per MSI-x vector, except for the 2252 * management vector 2253 */ 2254 priv->num_ntfy_blks = (num_ntfy - 1) & ~0x1; 2255 priv->mgmt_msix_idx = priv->num_ntfy_blks; 2256 2257 priv->tx_cfg.max_queues = 2258 min_t(int, priv->tx_cfg.max_queues, priv->num_ntfy_blks / 2); 2259 priv->rx_cfg.max_queues = 2260 min_t(int, priv->rx_cfg.max_queues, priv->num_ntfy_blks / 2); 2261 2262 priv->tx_cfg.num_queues = priv->tx_cfg.max_queues; 2263 priv->rx_cfg.num_queues = priv->rx_cfg.max_queues; 2264 if (priv->default_num_queues > 0) { 2265 priv->tx_cfg.num_queues = min_t(int, priv->default_num_queues, 2266 priv->tx_cfg.num_queues); 2267 priv->rx_cfg.num_queues = min_t(int, priv->default_num_queues, 2268 priv->rx_cfg.num_queues); 2269 } 2270 priv->tx_cfg.num_xdp_queues = 0; 2271 2272 dev_info(&priv->pdev->dev, "TX queues %d, RX queues %d\n", 2273 priv->tx_cfg.num_queues, priv->rx_cfg.num_queues); 2274 dev_info(&priv->pdev->dev, "Max TX queues %d, Max RX queues %d\n", 2275 priv->tx_cfg.max_queues, priv->rx_cfg.max_queues); 2276 2277 if (!gve_is_gqi(priv)) { 2278 priv->tx_coalesce_usecs = GVE_TX_IRQ_RATELIMIT_US_DQO; 2279 priv->rx_coalesce_usecs = GVE_RX_IRQ_RATELIMIT_US_DQO; 2280 } 2281 2282 setup_device: 2283 gve_set_netdev_xdp_features(priv); 2284 err = gve_setup_device_resources(priv); 2285 if (!err) 2286 return 0; 2287 err: 2288 gve_adminq_free(&priv->pdev->dev, priv); 2289 return err; 2290 } 2291 2292 static void gve_teardown_priv_resources(struct gve_priv *priv) 2293 { 2294 gve_teardown_device_resources(priv); 2295 gve_adminq_free(&priv->pdev->dev, priv); 2296 } 2297 2298 static void gve_trigger_reset(struct gve_priv *priv) 2299 { 2300 /* Reset the device by releasing the AQ */ 2301 gve_adminq_release(priv); 2302 } 2303 2304 static void gve_reset_and_teardown(struct gve_priv *priv, bool was_up) 2305 { 2306 gve_trigger_reset(priv); 2307 /* With the reset having already happened, close cannot fail */ 2308 if (was_up) 2309 gve_close(priv->dev); 2310 gve_teardown_priv_resources(priv); 2311 } 2312 2313 static int gve_reset_recovery(struct gve_priv *priv, bool was_up) 2314 { 2315 int err; 2316 2317 err = gve_init_priv(priv, true); 2318 if (err) 2319 goto err; 2320 if (was_up) { 2321 err = gve_open(priv->dev); 2322 if (err) 2323 goto err; 2324 } 2325 return 0; 2326 err: 2327 dev_err(&priv->pdev->dev, "Reset failed! !!! DISABLING ALL QUEUES !!!\n"); 2328 gve_turndown(priv); 2329 return err; 2330 } 2331 2332 int gve_reset(struct gve_priv *priv, bool attempt_teardown) 2333 { 2334 bool was_up = netif_running(priv->dev); 2335 int err; 2336 2337 dev_info(&priv->pdev->dev, "Performing reset\n"); 2338 gve_clear_do_reset(priv); 2339 gve_set_reset_in_progress(priv); 2340 /* If we aren't attempting to teardown normally, just go turndown and 2341 * reset right away. 2342 */ 2343 if (!attempt_teardown) { 2344 gve_turndown(priv); 2345 gve_reset_and_teardown(priv, was_up); 2346 } else { 2347 /* Otherwise attempt to close normally */ 2348 if (was_up) { 2349 err = gve_close(priv->dev); 2350 /* If that fails reset as we did above */ 2351 if (err) 2352 gve_reset_and_teardown(priv, was_up); 2353 } 2354 /* Clean up any remaining resources */ 2355 gve_teardown_priv_resources(priv); 2356 } 2357 2358 /* Set it all back up */ 2359 err = gve_reset_recovery(priv, was_up); 2360 gve_clear_reset_in_progress(priv); 2361 priv->reset_cnt++; 2362 priv->interface_up_cnt = 0; 2363 priv->interface_down_cnt = 0; 2364 priv->stats_report_trigger_cnt = 0; 2365 return err; 2366 } 2367 2368 static void gve_write_version(u8 __iomem *driver_version_register) 2369 { 2370 const char *c = gve_version_prefix; 2371 2372 while (*c) { 2373 writeb(*c, driver_version_register); 2374 c++; 2375 } 2376 2377 c = gve_version_str; 2378 while (*c) { 2379 writeb(*c, driver_version_register); 2380 c++; 2381 } 2382 writeb('\n', driver_version_register); 2383 } 2384 2385 static int gve_rx_queue_stop(struct net_device *dev, void *per_q_mem, int idx) 2386 { 2387 struct gve_priv *priv = netdev_priv(dev); 2388 struct gve_rx_ring *gve_per_q_mem; 2389 int err; 2390 2391 if (!priv->rx) 2392 return -EAGAIN; 2393 2394 /* Destroying queue 0 while other queues exist is not supported in DQO */ 2395 if (!gve_is_gqi(priv) && idx == 0) 2396 return -ERANGE; 2397 2398 /* Single-queue destruction requires quiescence on all queues */ 2399 gve_turndown(priv); 2400 2401 /* This failure will trigger a reset - no need to clean up */ 2402 err = gve_adminq_destroy_single_rx_queue(priv, idx); 2403 if (err) 2404 return err; 2405 2406 if (gve_is_qpl(priv)) { 2407 /* This failure will trigger a reset - no need to clean up */ 2408 err = gve_unregister_qpl(priv, gve_rx_get_qpl(priv, idx)); 2409 if (err) 2410 return err; 2411 } 2412 2413 gve_rx_stop_ring(priv, idx); 2414 2415 /* Turn the unstopped queues back up */ 2416 gve_turnup_and_check_status(priv); 2417 2418 gve_per_q_mem = (struct gve_rx_ring *)per_q_mem; 2419 *gve_per_q_mem = priv->rx[idx]; 2420 memset(&priv->rx[idx], 0, sizeof(priv->rx[idx])); 2421 return 0; 2422 } 2423 2424 static void gve_rx_queue_mem_free(struct net_device *dev, void *per_q_mem) 2425 { 2426 struct gve_priv *priv = netdev_priv(dev); 2427 struct gve_rx_alloc_rings_cfg cfg = {0}; 2428 struct gve_rx_ring *gve_per_q_mem; 2429 2430 gve_per_q_mem = (struct gve_rx_ring *)per_q_mem; 2431 gve_rx_get_curr_alloc_cfg(priv, &cfg); 2432 2433 if (gve_is_gqi(priv)) 2434 gve_rx_free_ring_gqi(priv, gve_per_q_mem, &cfg); 2435 else 2436 gve_rx_free_ring_dqo(priv, gve_per_q_mem, &cfg); 2437 } 2438 2439 static int gve_rx_queue_mem_alloc(struct net_device *dev, void *per_q_mem, 2440 int idx) 2441 { 2442 struct gve_priv *priv = netdev_priv(dev); 2443 struct gve_rx_alloc_rings_cfg cfg = {0}; 2444 struct gve_rx_ring *gve_per_q_mem; 2445 int err; 2446 2447 if (!priv->rx) 2448 return -EAGAIN; 2449 2450 gve_per_q_mem = (struct gve_rx_ring *)per_q_mem; 2451 gve_rx_get_curr_alloc_cfg(priv, &cfg); 2452 2453 if (gve_is_gqi(priv)) 2454 err = gve_rx_alloc_ring_gqi(priv, &cfg, gve_per_q_mem, idx); 2455 else 2456 err = gve_rx_alloc_ring_dqo(priv, &cfg, gve_per_q_mem, idx); 2457 2458 return err; 2459 } 2460 2461 static int gve_rx_queue_start(struct net_device *dev, void *per_q_mem, int idx) 2462 { 2463 struct gve_priv *priv = netdev_priv(dev); 2464 struct gve_rx_ring *gve_per_q_mem; 2465 int err; 2466 2467 if (!priv->rx) 2468 return -EAGAIN; 2469 2470 gve_per_q_mem = (struct gve_rx_ring *)per_q_mem; 2471 priv->rx[idx] = *gve_per_q_mem; 2472 2473 /* Single-queue creation requires quiescence on all queues */ 2474 gve_turndown(priv); 2475 2476 gve_rx_start_ring(priv, idx); 2477 2478 if (gve_is_qpl(priv)) { 2479 /* This failure will trigger a reset - no need to clean up */ 2480 err = gve_register_qpl(priv, gve_rx_get_qpl(priv, idx)); 2481 if (err) 2482 goto abort; 2483 } 2484 2485 /* This failure will trigger a reset - no need to clean up */ 2486 err = gve_adminq_create_single_rx_queue(priv, idx); 2487 if (err) 2488 goto abort; 2489 2490 if (gve_is_gqi(priv)) 2491 gve_rx_write_doorbell(priv, &priv->rx[idx]); 2492 else 2493 gve_rx_post_buffers_dqo(&priv->rx[idx]); 2494 2495 /* Turn the unstopped queues back up */ 2496 gve_turnup_and_check_status(priv); 2497 return 0; 2498 2499 abort: 2500 gve_rx_stop_ring(priv, idx); 2501 2502 /* All failures in this func result in a reset, by clearing the struct 2503 * at idx, we prevent a double free when that reset runs. The reset, 2504 * which needs the rtnl lock, will not run till this func returns and 2505 * its caller gives up the lock. 2506 */ 2507 memset(&priv->rx[idx], 0, sizeof(priv->rx[idx])); 2508 return err; 2509 } 2510 2511 static const struct netdev_queue_mgmt_ops gve_queue_mgmt_ops = { 2512 .ndo_queue_mem_size = sizeof(struct gve_rx_ring), 2513 .ndo_queue_mem_alloc = gve_rx_queue_mem_alloc, 2514 .ndo_queue_mem_free = gve_rx_queue_mem_free, 2515 .ndo_queue_start = gve_rx_queue_start, 2516 .ndo_queue_stop = gve_rx_queue_stop, 2517 }; 2518 2519 static void gve_get_rx_queue_stats(struct net_device *dev, int idx, 2520 struct netdev_queue_stats_rx *rx_stats) 2521 { 2522 struct gve_priv *priv = netdev_priv(dev); 2523 struct gve_rx_ring *rx = &priv->rx[idx]; 2524 unsigned int start; 2525 2526 do { 2527 start = u64_stats_fetch_begin(&rx->statss); 2528 rx_stats->packets = rx->rpackets; 2529 rx_stats->bytes = rx->rbytes; 2530 rx_stats->alloc_fail = rx->rx_skb_alloc_fail + 2531 rx->rx_buf_alloc_fail; 2532 } while (u64_stats_fetch_retry(&rx->statss, start)); 2533 } 2534 2535 static void gve_get_tx_queue_stats(struct net_device *dev, int idx, 2536 struct netdev_queue_stats_tx *tx_stats) 2537 { 2538 struct gve_priv *priv = netdev_priv(dev); 2539 struct gve_tx_ring *tx = &priv->tx[idx]; 2540 unsigned int start; 2541 2542 do { 2543 start = u64_stats_fetch_begin(&tx->statss); 2544 tx_stats->packets = tx->pkt_done; 2545 tx_stats->bytes = tx->bytes_done; 2546 } while (u64_stats_fetch_retry(&tx->statss, start)); 2547 } 2548 2549 static void gve_get_base_stats(struct net_device *dev, 2550 struct netdev_queue_stats_rx *rx, 2551 struct netdev_queue_stats_tx *tx) 2552 { 2553 rx->packets = 0; 2554 rx->bytes = 0; 2555 rx->alloc_fail = 0; 2556 2557 tx->packets = 0; 2558 tx->bytes = 0; 2559 } 2560 2561 static const struct netdev_stat_ops gve_stat_ops = { 2562 .get_queue_stats_rx = gve_get_rx_queue_stats, 2563 .get_queue_stats_tx = gve_get_tx_queue_stats, 2564 .get_base_stats = gve_get_base_stats, 2565 }; 2566 2567 static int gve_probe(struct pci_dev *pdev, const struct pci_device_id *ent) 2568 { 2569 int max_tx_queues, max_rx_queues; 2570 struct net_device *dev; 2571 __be32 __iomem *db_bar; 2572 struct gve_registers __iomem *reg_bar; 2573 struct gve_priv *priv; 2574 int err; 2575 2576 err = pci_enable_device(pdev); 2577 if (err) 2578 return err; 2579 2580 err = pci_request_regions(pdev, gve_driver_name); 2581 if (err) 2582 goto abort_with_enabled; 2583 2584 pci_set_master(pdev); 2585 2586 err = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(64)); 2587 if (err) { 2588 dev_err(&pdev->dev, "Failed to set dma mask: err=%d\n", err); 2589 goto abort_with_pci_region; 2590 } 2591 2592 reg_bar = pci_iomap(pdev, GVE_REGISTER_BAR, 0); 2593 if (!reg_bar) { 2594 dev_err(&pdev->dev, "Failed to map pci bar!\n"); 2595 err = -ENOMEM; 2596 goto abort_with_pci_region; 2597 } 2598 2599 db_bar = pci_iomap(pdev, GVE_DOORBELL_BAR, 0); 2600 if (!db_bar) { 2601 dev_err(&pdev->dev, "Failed to map doorbell bar!\n"); 2602 err = -ENOMEM; 2603 goto abort_with_reg_bar; 2604 } 2605 2606 gve_write_version(®_bar->driver_version); 2607 /* Get max queues to alloc etherdev */ 2608 max_tx_queues = ioread32be(®_bar->max_tx_queues); 2609 max_rx_queues = ioread32be(®_bar->max_rx_queues); 2610 /* Alloc and setup the netdev and priv */ 2611 dev = alloc_etherdev_mqs(sizeof(*priv), max_tx_queues, max_rx_queues); 2612 if (!dev) { 2613 dev_err(&pdev->dev, "could not allocate netdev\n"); 2614 err = -ENOMEM; 2615 goto abort_with_db_bar; 2616 } 2617 SET_NETDEV_DEV(dev, &pdev->dev); 2618 pci_set_drvdata(pdev, dev); 2619 dev->ethtool_ops = &gve_ethtool_ops; 2620 dev->netdev_ops = &gve_netdev_ops; 2621 dev->queue_mgmt_ops = &gve_queue_mgmt_ops; 2622 dev->stat_ops = &gve_stat_ops; 2623 2624 /* Set default and supported features. 2625 * 2626 * Features might be set in other locations as well (such as 2627 * `gve_adminq_describe_device`). 2628 */ 2629 dev->hw_features = NETIF_F_HIGHDMA; 2630 dev->hw_features |= NETIF_F_SG; 2631 dev->hw_features |= NETIF_F_HW_CSUM; 2632 dev->hw_features |= NETIF_F_TSO; 2633 dev->hw_features |= NETIF_F_TSO6; 2634 dev->hw_features |= NETIF_F_TSO_ECN; 2635 dev->hw_features |= NETIF_F_RXCSUM; 2636 dev->hw_features |= NETIF_F_RXHASH; 2637 dev->features = dev->hw_features; 2638 dev->watchdog_timeo = 5 * HZ; 2639 dev->min_mtu = ETH_MIN_MTU; 2640 netif_carrier_off(dev); 2641 2642 priv = netdev_priv(dev); 2643 priv->dev = dev; 2644 priv->pdev = pdev; 2645 priv->msg_enable = DEFAULT_MSG_LEVEL; 2646 priv->reg_bar0 = reg_bar; 2647 priv->db_bar2 = db_bar; 2648 priv->service_task_flags = 0x0; 2649 priv->state_flags = 0x0; 2650 priv->ethtool_flags = 0x0; 2651 priv->rx_cfg.packet_buffer_size = GVE_DEFAULT_RX_BUFFER_SIZE; 2652 priv->max_rx_buffer_size = GVE_DEFAULT_RX_BUFFER_SIZE; 2653 2654 gve_set_probe_in_progress(priv); 2655 priv->gve_wq = alloc_ordered_workqueue("gve", 0); 2656 if (!priv->gve_wq) { 2657 dev_err(&pdev->dev, "Could not allocate workqueue"); 2658 err = -ENOMEM; 2659 goto abort_with_netdev; 2660 } 2661 INIT_WORK(&priv->service_task, gve_service_task); 2662 INIT_WORK(&priv->stats_report_task, gve_stats_report_task); 2663 priv->tx_cfg.max_queues = max_tx_queues; 2664 priv->rx_cfg.max_queues = max_rx_queues; 2665 2666 err = gve_init_priv(priv, false); 2667 if (err) 2668 goto abort_with_wq; 2669 2670 if (!gve_is_gqi(priv) && !gve_is_qpl(priv)) 2671 dev->netmem_tx = true; 2672 2673 err = register_netdev(dev); 2674 if (err) 2675 goto abort_with_gve_init; 2676 2677 dev_info(&pdev->dev, "GVE version %s\n", gve_version_str); 2678 dev_info(&pdev->dev, "GVE queue format %d\n", (int)priv->queue_format); 2679 gve_clear_probe_in_progress(priv); 2680 queue_work(priv->gve_wq, &priv->service_task); 2681 return 0; 2682 2683 abort_with_gve_init: 2684 gve_teardown_priv_resources(priv); 2685 2686 abort_with_wq: 2687 destroy_workqueue(priv->gve_wq); 2688 2689 abort_with_netdev: 2690 free_netdev(dev); 2691 2692 abort_with_db_bar: 2693 pci_iounmap(pdev, db_bar); 2694 2695 abort_with_reg_bar: 2696 pci_iounmap(pdev, reg_bar); 2697 2698 abort_with_pci_region: 2699 pci_release_regions(pdev); 2700 2701 abort_with_enabled: 2702 pci_disable_device(pdev); 2703 return err; 2704 } 2705 2706 static void gve_remove(struct pci_dev *pdev) 2707 { 2708 struct net_device *netdev = pci_get_drvdata(pdev); 2709 struct gve_priv *priv = netdev_priv(netdev); 2710 __be32 __iomem *db_bar = priv->db_bar2; 2711 void __iomem *reg_bar = priv->reg_bar0; 2712 2713 unregister_netdev(netdev); 2714 gve_teardown_priv_resources(priv); 2715 destroy_workqueue(priv->gve_wq); 2716 free_netdev(netdev); 2717 pci_iounmap(pdev, db_bar); 2718 pci_iounmap(pdev, reg_bar); 2719 pci_release_regions(pdev); 2720 pci_disable_device(pdev); 2721 } 2722 2723 static void gve_shutdown(struct pci_dev *pdev) 2724 { 2725 struct net_device *netdev = pci_get_drvdata(pdev); 2726 struct gve_priv *priv = netdev_priv(netdev); 2727 bool was_up = netif_running(priv->dev); 2728 2729 rtnl_lock(); 2730 netdev_lock(netdev); 2731 if (was_up && gve_close(priv->dev)) { 2732 /* If the dev was up, attempt to close, if close fails, reset */ 2733 gve_reset_and_teardown(priv, was_up); 2734 } else { 2735 /* If the dev wasn't up or close worked, finish tearing down */ 2736 gve_teardown_priv_resources(priv); 2737 } 2738 netdev_unlock(netdev); 2739 rtnl_unlock(); 2740 } 2741 2742 #ifdef CONFIG_PM 2743 static int gve_suspend(struct pci_dev *pdev, pm_message_t state) 2744 { 2745 struct net_device *netdev = pci_get_drvdata(pdev); 2746 struct gve_priv *priv = netdev_priv(netdev); 2747 bool was_up = netif_running(priv->dev); 2748 2749 priv->suspend_cnt++; 2750 rtnl_lock(); 2751 netdev_lock(netdev); 2752 if (was_up && gve_close(priv->dev)) { 2753 /* If the dev was up, attempt to close, if close fails, reset */ 2754 gve_reset_and_teardown(priv, was_up); 2755 } else { 2756 /* If the dev wasn't up or close worked, finish tearing down */ 2757 gve_teardown_priv_resources(priv); 2758 } 2759 priv->up_before_suspend = was_up; 2760 netdev_unlock(netdev); 2761 rtnl_unlock(); 2762 return 0; 2763 } 2764 2765 static int gve_resume(struct pci_dev *pdev) 2766 { 2767 struct net_device *netdev = pci_get_drvdata(pdev); 2768 struct gve_priv *priv = netdev_priv(netdev); 2769 int err; 2770 2771 priv->resume_cnt++; 2772 rtnl_lock(); 2773 netdev_lock(netdev); 2774 err = gve_reset_recovery(priv, priv->up_before_suspend); 2775 netdev_unlock(netdev); 2776 rtnl_unlock(); 2777 return err; 2778 } 2779 #endif /* CONFIG_PM */ 2780 2781 static const struct pci_device_id gve_id_table[] = { 2782 { PCI_DEVICE(PCI_VENDOR_ID_GOOGLE, PCI_DEV_ID_GVNIC) }, 2783 { } 2784 }; 2785 2786 static struct pci_driver gve_driver = { 2787 .name = gve_driver_name, 2788 .id_table = gve_id_table, 2789 .probe = gve_probe, 2790 .remove = gve_remove, 2791 .shutdown = gve_shutdown, 2792 #ifdef CONFIG_PM 2793 .suspend = gve_suspend, 2794 .resume = gve_resume, 2795 #endif 2796 }; 2797 2798 module_pci_driver(gve_driver); 2799 2800 MODULE_DEVICE_TABLE(pci, gve_id_table); 2801 MODULE_AUTHOR("Google, Inc."); 2802 MODULE_DESCRIPTION("Google Virtual NIC Driver"); 2803 MODULE_LICENSE("Dual MIT/GPL"); 2804 MODULE_VERSION(GVE_VERSION); 2805