1 // SPDX-License-Identifier: (GPL-2.0 OR MIT) 2 /* Google virtual Ethernet (gve) driver 3 * 4 * Copyright (C) 2015-2024 Google LLC 5 */ 6 7 #include <linux/bpf.h> 8 #include <linux/cpumask.h> 9 #include <linux/etherdevice.h> 10 #include <linux/filter.h> 11 #include <linux/interrupt.h> 12 #include <linux/irq.h> 13 #include <linux/module.h> 14 #include <linux/pci.h> 15 #include <linux/sched.h> 16 #include <linux/timer.h> 17 #include <linux/workqueue.h> 18 #include <linux/utsname.h> 19 #include <linux/version.h> 20 #include <net/netdev_queues.h> 21 #include <net/sch_generic.h> 22 #include <net/xdp_sock_drv.h> 23 #include "gve.h" 24 #include "gve_dqo.h" 25 #include "gve_adminq.h" 26 #include "gve_register.h" 27 #include "gve_utils.h" 28 29 #define GVE_DEFAULT_RX_COPYBREAK (256) 30 31 #define DEFAULT_MSG_LEVEL (NETIF_MSG_DRV | NETIF_MSG_LINK) 32 #define GVE_VERSION "1.0.0" 33 #define GVE_VERSION_PREFIX "GVE-" 34 35 // Minimum amount of time between queue kicks in msec (10 seconds) 36 #define MIN_TX_TIMEOUT_GAP (1000 * 10) 37 38 char gve_driver_name[] = "gve"; 39 const char gve_version_str[] = GVE_VERSION; 40 static const char gve_version_prefix[] = GVE_VERSION_PREFIX; 41 42 static int gve_verify_driver_compatibility(struct gve_priv *priv) 43 { 44 int err; 45 struct gve_driver_info *driver_info; 46 dma_addr_t driver_info_bus; 47 48 driver_info = dma_alloc_coherent(&priv->pdev->dev, 49 sizeof(struct gve_driver_info), 50 &driver_info_bus, GFP_KERNEL); 51 if (!driver_info) 52 return -ENOMEM; 53 54 *driver_info = (struct gve_driver_info) { 55 .os_type = 1, /* Linux */ 56 .os_version_major = cpu_to_be32(LINUX_VERSION_MAJOR), 57 .os_version_minor = cpu_to_be32(LINUX_VERSION_SUBLEVEL), 58 .os_version_sub = cpu_to_be32(LINUX_VERSION_PATCHLEVEL), 59 .driver_capability_flags = { 60 cpu_to_be64(GVE_DRIVER_CAPABILITY_FLAGS1), 61 cpu_to_be64(GVE_DRIVER_CAPABILITY_FLAGS2), 62 cpu_to_be64(GVE_DRIVER_CAPABILITY_FLAGS3), 63 cpu_to_be64(GVE_DRIVER_CAPABILITY_FLAGS4), 64 }, 65 }; 66 strscpy(driver_info->os_version_str1, utsname()->release, 67 sizeof(driver_info->os_version_str1)); 68 strscpy(driver_info->os_version_str2, utsname()->version, 69 sizeof(driver_info->os_version_str2)); 70 71 err = gve_adminq_verify_driver_compatibility(priv, 72 sizeof(struct gve_driver_info), 73 driver_info_bus); 74 75 /* It's ok if the device doesn't support this */ 76 if (err == -EOPNOTSUPP) 77 err = 0; 78 79 dma_free_coherent(&priv->pdev->dev, 80 sizeof(struct gve_driver_info), 81 driver_info, driver_info_bus); 82 return err; 83 } 84 85 static netdev_features_t gve_features_check(struct sk_buff *skb, 86 struct net_device *dev, 87 netdev_features_t features) 88 { 89 struct gve_priv *priv = netdev_priv(dev); 90 91 if (!gve_is_gqi(priv)) 92 return gve_features_check_dqo(skb, dev, features); 93 94 return features; 95 } 96 97 static netdev_tx_t gve_start_xmit(struct sk_buff *skb, struct net_device *dev) 98 { 99 struct gve_priv *priv = netdev_priv(dev); 100 101 if (gve_is_gqi(priv)) 102 return gve_tx(skb, dev); 103 else 104 return gve_tx_dqo(skb, dev); 105 } 106 107 static void gve_get_stats(struct net_device *dev, struct rtnl_link_stats64 *s) 108 { 109 struct gve_priv *priv = netdev_priv(dev); 110 unsigned int start; 111 u64 packets, bytes; 112 int num_tx_queues; 113 int ring; 114 115 num_tx_queues = gve_num_tx_queues(priv); 116 if (priv->rx) { 117 for (ring = 0; ring < priv->rx_cfg.num_queues; ring++) { 118 do { 119 start = 120 u64_stats_fetch_begin(&priv->rx[ring].statss); 121 packets = priv->rx[ring].rpackets; 122 bytes = priv->rx[ring].rbytes; 123 } while (u64_stats_fetch_retry(&priv->rx[ring].statss, 124 start)); 125 s->rx_packets += packets; 126 s->rx_bytes += bytes; 127 } 128 } 129 if (priv->tx) { 130 for (ring = 0; ring < num_tx_queues; ring++) { 131 do { 132 start = 133 u64_stats_fetch_begin(&priv->tx[ring].statss); 134 packets = priv->tx[ring].pkt_done; 135 bytes = priv->tx[ring].bytes_done; 136 } while (u64_stats_fetch_retry(&priv->tx[ring].statss, 137 start)); 138 s->tx_packets += packets; 139 s->tx_bytes += bytes; 140 } 141 } 142 } 143 144 static int gve_alloc_flow_rule_caches(struct gve_priv *priv) 145 { 146 struct gve_flow_rules_cache *flow_rules_cache = &priv->flow_rules_cache; 147 int err = 0; 148 149 if (!priv->max_flow_rules) 150 return 0; 151 152 flow_rules_cache->rules_cache = 153 kvcalloc(GVE_FLOW_RULES_CACHE_SIZE, sizeof(*flow_rules_cache->rules_cache), 154 GFP_KERNEL); 155 if (!flow_rules_cache->rules_cache) { 156 dev_err(&priv->pdev->dev, "Cannot alloc flow rules cache\n"); 157 return -ENOMEM; 158 } 159 160 flow_rules_cache->rule_ids_cache = 161 kvcalloc(GVE_FLOW_RULE_IDS_CACHE_SIZE, sizeof(*flow_rules_cache->rule_ids_cache), 162 GFP_KERNEL); 163 if (!flow_rules_cache->rule_ids_cache) { 164 dev_err(&priv->pdev->dev, "Cannot alloc flow rule ids cache\n"); 165 err = -ENOMEM; 166 goto free_rules_cache; 167 } 168 169 return 0; 170 171 free_rules_cache: 172 kvfree(flow_rules_cache->rules_cache); 173 flow_rules_cache->rules_cache = NULL; 174 return err; 175 } 176 177 static void gve_free_flow_rule_caches(struct gve_priv *priv) 178 { 179 struct gve_flow_rules_cache *flow_rules_cache = &priv->flow_rules_cache; 180 181 kvfree(flow_rules_cache->rule_ids_cache); 182 flow_rules_cache->rule_ids_cache = NULL; 183 kvfree(flow_rules_cache->rules_cache); 184 flow_rules_cache->rules_cache = NULL; 185 } 186 187 static int gve_alloc_rss_config_cache(struct gve_priv *priv) 188 { 189 struct gve_rss_config *rss_config = &priv->rss_config; 190 191 if (!priv->cache_rss_config) 192 return 0; 193 194 rss_config->hash_key = kcalloc(priv->rss_key_size, 195 sizeof(rss_config->hash_key[0]), 196 GFP_KERNEL); 197 if (!rss_config->hash_key) 198 return -ENOMEM; 199 200 rss_config->hash_lut = kcalloc(priv->rss_lut_size, 201 sizeof(rss_config->hash_lut[0]), 202 GFP_KERNEL); 203 if (!rss_config->hash_lut) 204 goto free_rss_key_cache; 205 206 return 0; 207 208 free_rss_key_cache: 209 kfree(rss_config->hash_key); 210 rss_config->hash_key = NULL; 211 return -ENOMEM; 212 } 213 214 static void gve_free_rss_config_cache(struct gve_priv *priv) 215 { 216 struct gve_rss_config *rss_config = &priv->rss_config; 217 218 kfree(rss_config->hash_key); 219 kfree(rss_config->hash_lut); 220 221 memset(rss_config, 0, sizeof(*rss_config)); 222 } 223 224 static int gve_alloc_counter_array(struct gve_priv *priv) 225 { 226 priv->counter_array = 227 dma_alloc_coherent(&priv->pdev->dev, 228 priv->num_event_counters * 229 sizeof(*priv->counter_array), 230 &priv->counter_array_bus, GFP_KERNEL); 231 if (!priv->counter_array) 232 return -ENOMEM; 233 234 return 0; 235 } 236 237 static void gve_free_counter_array(struct gve_priv *priv) 238 { 239 if (!priv->counter_array) 240 return; 241 242 dma_free_coherent(&priv->pdev->dev, 243 priv->num_event_counters * 244 sizeof(*priv->counter_array), 245 priv->counter_array, priv->counter_array_bus); 246 priv->counter_array = NULL; 247 } 248 249 /* NIC requests to report stats */ 250 static void gve_stats_report_task(struct work_struct *work) 251 { 252 struct gve_priv *priv = container_of(work, struct gve_priv, 253 stats_report_task); 254 if (gve_get_do_report_stats(priv)) { 255 gve_handle_report_stats(priv); 256 gve_clear_do_report_stats(priv); 257 } 258 } 259 260 static void gve_stats_report_schedule(struct gve_priv *priv) 261 { 262 if (!gve_get_probe_in_progress(priv) && 263 !gve_get_reset_in_progress(priv)) { 264 gve_set_do_report_stats(priv); 265 queue_work(priv->gve_wq, &priv->stats_report_task); 266 } 267 } 268 269 static void gve_stats_report_timer(struct timer_list *t) 270 { 271 struct gve_priv *priv = from_timer(priv, t, stats_report_timer); 272 273 mod_timer(&priv->stats_report_timer, 274 round_jiffies(jiffies + 275 msecs_to_jiffies(priv->stats_report_timer_period))); 276 gve_stats_report_schedule(priv); 277 } 278 279 static int gve_alloc_stats_report(struct gve_priv *priv) 280 { 281 int tx_stats_num, rx_stats_num; 282 283 tx_stats_num = (GVE_TX_STATS_REPORT_NUM + NIC_TX_STATS_REPORT_NUM) * 284 gve_num_tx_queues(priv); 285 rx_stats_num = (GVE_RX_STATS_REPORT_NUM + NIC_RX_STATS_REPORT_NUM) * 286 priv->rx_cfg.num_queues; 287 priv->stats_report_len = struct_size(priv->stats_report, stats, 288 size_add(tx_stats_num, rx_stats_num)); 289 priv->stats_report = 290 dma_alloc_coherent(&priv->pdev->dev, priv->stats_report_len, 291 &priv->stats_report_bus, GFP_KERNEL); 292 if (!priv->stats_report) 293 return -ENOMEM; 294 /* Set up timer for the report-stats task */ 295 timer_setup(&priv->stats_report_timer, gve_stats_report_timer, 0); 296 priv->stats_report_timer_period = GVE_STATS_REPORT_TIMER_PERIOD; 297 return 0; 298 } 299 300 static void gve_free_stats_report(struct gve_priv *priv) 301 { 302 if (!priv->stats_report) 303 return; 304 305 timer_delete_sync(&priv->stats_report_timer); 306 dma_free_coherent(&priv->pdev->dev, priv->stats_report_len, 307 priv->stats_report, priv->stats_report_bus); 308 priv->stats_report = NULL; 309 } 310 311 static irqreturn_t gve_mgmnt_intr(int irq, void *arg) 312 { 313 struct gve_priv *priv = arg; 314 315 queue_work(priv->gve_wq, &priv->service_task); 316 return IRQ_HANDLED; 317 } 318 319 static irqreturn_t gve_intr(int irq, void *arg) 320 { 321 struct gve_notify_block *block = arg; 322 struct gve_priv *priv = block->priv; 323 324 iowrite32be(GVE_IRQ_MASK, gve_irq_doorbell(priv, block)); 325 napi_schedule_irqoff(&block->napi); 326 return IRQ_HANDLED; 327 } 328 329 static irqreturn_t gve_intr_dqo(int irq, void *arg) 330 { 331 struct gve_notify_block *block = arg; 332 333 /* Interrupts are automatically masked */ 334 napi_schedule_irqoff(&block->napi); 335 return IRQ_HANDLED; 336 } 337 338 static int gve_is_napi_on_home_cpu(struct gve_priv *priv, u32 irq) 339 { 340 int cpu_curr = smp_processor_id(); 341 const struct cpumask *aff_mask; 342 343 aff_mask = irq_get_effective_affinity_mask(irq); 344 if (unlikely(!aff_mask)) 345 return 1; 346 347 return cpumask_test_cpu(cpu_curr, aff_mask); 348 } 349 350 int gve_napi_poll(struct napi_struct *napi, int budget) 351 { 352 struct gve_notify_block *block; 353 __be32 __iomem *irq_doorbell; 354 bool reschedule = false; 355 struct gve_priv *priv; 356 int work_done = 0; 357 358 block = container_of(napi, struct gve_notify_block, napi); 359 priv = block->priv; 360 361 if (block->tx) { 362 if (block->tx->q_num < priv->tx_cfg.num_queues) 363 reschedule |= gve_tx_poll(block, budget); 364 else if (budget) 365 reschedule |= gve_xdp_poll(block, budget); 366 } 367 368 if (!budget) 369 return 0; 370 371 if (block->rx) { 372 work_done = gve_rx_poll(block, budget); 373 374 /* Poll XSK TX as part of RX NAPI. Setup re-poll based on max of 375 * TX and RX work done. 376 */ 377 if (priv->xdp_prog) 378 work_done = max_t(int, work_done, 379 gve_xsk_tx_poll(block, budget)); 380 381 reschedule |= work_done == budget; 382 } 383 384 if (reschedule) 385 return budget; 386 387 /* Complete processing - don't unmask irq if busy polling is enabled */ 388 if (likely(napi_complete_done(napi, work_done))) { 389 irq_doorbell = gve_irq_doorbell(priv, block); 390 iowrite32be(GVE_IRQ_ACK | GVE_IRQ_EVENT, irq_doorbell); 391 392 /* Ensure IRQ ACK is visible before we check pending work. 393 * If queue had issued updates, it would be truly visible. 394 */ 395 mb(); 396 397 if (block->tx) 398 reschedule |= gve_tx_clean_pending(priv, block->tx); 399 if (block->rx) 400 reschedule |= gve_rx_work_pending(block->rx); 401 402 if (reschedule && napi_schedule(napi)) 403 iowrite32be(GVE_IRQ_MASK, irq_doorbell); 404 } 405 return work_done; 406 } 407 408 int gve_napi_poll_dqo(struct napi_struct *napi, int budget) 409 { 410 struct gve_notify_block *block = 411 container_of(napi, struct gve_notify_block, napi); 412 struct gve_priv *priv = block->priv; 413 bool reschedule = false; 414 int work_done = 0; 415 416 if (block->tx) 417 reschedule |= gve_tx_poll_dqo(block, /*do_clean=*/true); 418 419 if (!budget) 420 return 0; 421 422 if (block->rx) { 423 work_done = gve_rx_poll_dqo(block, budget); 424 reschedule |= work_done == budget; 425 } 426 427 if (reschedule) { 428 /* Reschedule by returning budget only if already on the correct 429 * cpu. 430 */ 431 if (likely(gve_is_napi_on_home_cpu(priv, block->irq))) 432 return budget; 433 434 /* If not on the cpu with which this queue's irq has affinity 435 * with, we avoid rescheduling napi and arm the irq instead so 436 * that napi gets rescheduled back eventually onto the right 437 * cpu. 438 */ 439 if (work_done == budget) 440 work_done--; 441 } 442 443 if (likely(napi_complete_done(napi, work_done))) { 444 /* Enable interrupts again. 445 * 446 * We don't need to repoll afterwards because HW supports the 447 * PCI MSI-X PBA feature. 448 * 449 * Another interrupt would be triggered if a new event came in 450 * since the last one. 451 */ 452 gve_write_irq_doorbell_dqo(priv, block, 453 GVE_ITR_NO_UPDATE_DQO | GVE_ITR_ENABLE_BIT_DQO); 454 } 455 456 return work_done; 457 } 458 459 static int gve_alloc_notify_blocks(struct gve_priv *priv) 460 { 461 int num_vecs_requested = priv->num_ntfy_blks + 1; 462 unsigned int active_cpus; 463 int vecs_enabled; 464 int i, j; 465 int err; 466 467 priv->msix_vectors = kvcalloc(num_vecs_requested, 468 sizeof(*priv->msix_vectors), GFP_KERNEL); 469 if (!priv->msix_vectors) 470 return -ENOMEM; 471 for (i = 0; i < num_vecs_requested; i++) 472 priv->msix_vectors[i].entry = i; 473 vecs_enabled = pci_enable_msix_range(priv->pdev, priv->msix_vectors, 474 GVE_MIN_MSIX, num_vecs_requested); 475 if (vecs_enabled < 0) { 476 dev_err(&priv->pdev->dev, "Could not enable min msix %d/%d\n", 477 GVE_MIN_MSIX, vecs_enabled); 478 err = vecs_enabled; 479 goto abort_with_msix_vectors; 480 } 481 if (vecs_enabled != num_vecs_requested) { 482 int new_num_ntfy_blks = (vecs_enabled - 1) & ~0x1; 483 int vecs_per_type = new_num_ntfy_blks / 2; 484 int vecs_left = new_num_ntfy_blks % 2; 485 486 priv->num_ntfy_blks = new_num_ntfy_blks; 487 priv->mgmt_msix_idx = priv->num_ntfy_blks; 488 priv->tx_cfg.max_queues = min_t(int, priv->tx_cfg.max_queues, 489 vecs_per_type); 490 priv->rx_cfg.max_queues = min_t(int, priv->rx_cfg.max_queues, 491 vecs_per_type + vecs_left); 492 dev_err(&priv->pdev->dev, 493 "Could not enable desired msix, only enabled %d, adjusting tx max queues to %d, and rx max queues to %d\n", 494 vecs_enabled, priv->tx_cfg.max_queues, 495 priv->rx_cfg.max_queues); 496 if (priv->tx_cfg.num_queues > priv->tx_cfg.max_queues) 497 priv->tx_cfg.num_queues = priv->tx_cfg.max_queues; 498 if (priv->rx_cfg.num_queues > priv->rx_cfg.max_queues) 499 priv->rx_cfg.num_queues = priv->rx_cfg.max_queues; 500 } 501 /* Half the notification blocks go to TX and half to RX */ 502 active_cpus = min_t(int, priv->num_ntfy_blks / 2, num_online_cpus()); 503 504 /* Setup Management Vector - the last vector */ 505 snprintf(priv->mgmt_msix_name, sizeof(priv->mgmt_msix_name), "gve-mgmnt@pci:%s", 506 pci_name(priv->pdev)); 507 err = request_irq(priv->msix_vectors[priv->mgmt_msix_idx].vector, 508 gve_mgmnt_intr, 0, priv->mgmt_msix_name, priv); 509 if (err) { 510 dev_err(&priv->pdev->dev, "Did not receive management vector.\n"); 511 goto abort_with_msix_enabled; 512 } 513 priv->irq_db_indices = 514 dma_alloc_coherent(&priv->pdev->dev, 515 priv->num_ntfy_blks * 516 sizeof(*priv->irq_db_indices), 517 &priv->irq_db_indices_bus, GFP_KERNEL); 518 if (!priv->irq_db_indices) { 519 err = -ENOMEM; 520 goto abort_with_mgmt_vector; 521 } 522 523 priv->ntfy_blocks = kvzalloc(priv->num_ntfy_blks * 524 sizeof(*priv->ntfy_blocks), GFP_KERNEL); 525 if (!priv->ntfy_blocks) { 526 err = -ENOMEM; 527 goto abort_with_irq_db_indices; 528 } 529 530 /* Setup the other blocks - the first n-1 vectors */ 531 for (i = 0; i < priv->num_ntfy_blks; i++) { 532 struct gve_notify_block *block = &priv->ntfy_blocks[i]; 533 int msix_idx = i; 534 535 snprintf(block->name, sizeof(block->name), "gve-ntfy-blk%d@pci:%s", 536 i, pci_name(priv->pdev)); 537 block->priv = priv; 538 err = request_irq(priv->msix_vectors[msix_idx].vector, 539 gve_is_gqi(priv) ? gve_intr : gve_intr_dqo, 540 0, block->name, block); 541 if (err) { 542 dev_err(&priv->pdev->dev, 543 "Failed to receive msix vector %d\n", i); 544 goto abort_with_some_ntfy_blocks; 545 } 546 block->irq = priv->msix_vectors[msix_idx].vector; 547 irq_set_affinity_hint(priv->msix_vectors[msix_idx].vector, 548 get_cpu_mask(i % active_cpus)); 549 block->irq_db_index = &priv->irq_db_indices[i].index; 550 } 551 return 0; 552 abort_with_some_ntfy_blocks: 553 for (j = 0; j < i; j++) { 554 struct gve_notify_block *block = &priv->ntfy_blocks[j]; 555 int msix_idx = j; 556 557 irq_set_affinity_hint(priv->msix_vectors[msix_idx].vector, 558 NULL); 559 free_irq(priv->msix_vectors[msix_idx].vector, block); 560 block->irq = 0; 561 } 562 kvfree(priv->ntfy_blocks); 563 priv->ntfy_blocks = NULL; 564 abort_with_irq_db_indices: 565 dma_free_coherent(&priv->pdev->dev, priv->num_ntfy_blks * 566 sizeof(*priv->irq_db_indices), 567 priv->irq_db_indices, priv->irq_db_indices_bus); 568 priv->irq_db_indices = NULL; 569 abort_with_mgmt_vector: 570 free_irq(priv->msix_vectors[priv->mgmt_msix_idx].vector, priv); 571 abort_with_msix_enabled: 572 pci_disable_msix(priv->pdev); 573 abort_with_msix_vectors: 574 kvfree(priv->msix_vectors); 575 priv->msix_vectors = NULL; 576 return err; 577 } 578 579 static void gve_free_notify_blocks(struct gve_priv *priv) 580 { 581 int i; 582 583 if (!priv->msix_vectors) 584 return; 585 586 /* Free the irqs */ 587 for (i = 0; i < priv->num_ntfy_blks; i++) { 588 struct gve_notify_block *block = &priv->ntfy_blocks[i]; 589 int msix_idx = i; 590 591 irq_set_affinity_hint(priv->msix_vectors[msix_idx].vector, 592 NULL); 593 free_irq(priv->msix_vectors[msix_idx].vector, block); 594 block->irq = 0; 595 } 596 free_irq(priv->msix_vectors[priv->mgmt_msix_idx].vector, priv); 597 kvfree(priv->ntfy_blocks); 598 priv->ntfy_blocks = NULL; 599 dma_free_coherent(&priv->pdev->dev, priv->num_ntfy_blks * 600 sizeof(*priv->irq_db_indices), 601 priv->irq_db_indices, priv->irq_db_indices_bus); 602 priv->irq_db_indices = NULL; 603 pci_disable_msix(priv->pdev); 604 kvfree(priv->msix_vectors); 605 priv->msix_vectors = NULL; 606 } 607 608 static int gve_setup_device_resources(struct gve_priv *priv) 609 { 610 int err; 611 612 err = gve_alloc_flow_rule_caches(priv); 613 if (err) 614 return err; 615 err = gve_alloc_rss_config_cache(priv); 616 if (err) 617 goto abort_with_flow_rule_caches; 618 err = gve_alloc_counter_array(priv); 619 if (err) 620 goto abort_with_rss_config_cache; 621 err = gve_alloc_notify_blocks(priv); 622 if (err) 623 goto abort_with_counter; 624 err = gve_alloc_stats_report(priv); 625 if (err) 626 goto abort_with_ntfy_blocks; 627 err = gve_adminq_configure_device_resources(priv, 628 priv->counter_array_bus, 629 priv->num_event_counters, 630 priv->irq_db_indices_bus, 631 priv->num_ntfy_blks); 632 if (unlikely(err)) { 633 dev_err(&priv->pdev->dev, 634 "could not setup device_resources: err=%d\n", err); 635 err = -ENXIO; 636 goto abort_with_stats_report; 637 } 638 639 if (!gve_is_gqi(priv)) { 640 priv->ptype_lut_dqo = kvzalloc(sizeof(*priv->ptype_lut_dqo), 641 GFP_KERNEL); 642 if (!priv->ptype_lut_dqo) { 643 err = -ENOMEM; 644 goto abort_with_stats_report; 645 } 646 err = gve_adminq_get_ptype_map_dqo(priv, priv->ptype_lut_dqo); 647 if (err) { 648 dev_err(&priv->pdev->dev, 649 "Failed to get ptype map: err=%d\n", err); 650 goto abort_with_ptype_lut; 651 } 652 } 653 654 err = gve_init_rss_config(priv, priv->rx_cfg.num_queues); 655 if (err) { 656 dev_err(&priv->pdev->dev, "Failed to init RSS config"); 657 goto abort_with_ptype_lut; 658 } 659 660 err = gve_adminq_report_stats(priv, priv->stats_report_len, 661 priv->stats_report_bus, 662 GVE_STATS_REPORT_TIMER_PERIOD); 663 if (err) 664 dev_err(&priv->pdev->dev, 665 "Failed to report stats: err=%d\n", err); 666 gve_set_device_resources_ok(priv); 667 return 0; 668 669 abort_with_ptype_lut: 670 kvfree(priv->ptype_lut_dqo); 671 priv->ptype_lut_dqo = NULL; 672 abort_with_stats_report: 673 gve_free_stats_report(priv); 674 abort_with_ntfy_blocks: 675 gve_free_notify_blocks(priv); 676 abort_with_counter: 677 gve_free_counter_array(priv); 678 abort_with_rss_config_cache: 679 gve_free_rss_config_cache(priv); 680 abort_with_flow_rule_caches: 681 gve_free_flow_rule_caches(priv); 682 683 return err; 684 } 685 686 static void gve_trigger_reset(struct gve_priv *priv); 687 688 static void gve_teardown_device_resources(struct gve_priv *priv) 689 { 690 int err; 691 692 /* Tell device its resources are being freed */ 693 if (gve_get_device_resources_ok(priv)) { 694 err = gve_flow_rules_reset(priv); 695 if (err) { 696 dev_err(&priv->pdev->dev, 697 "Failed to reset flow rules: err=%d\n", err); 698 gve_trigger_reset(priv); 699 } 700 /* detach the stats report */ 701 err = gve_adminq_report_stats(priv, 0, 0x0, GVE_STATS_REPORT_TIMER_PERIOD); 702 if (err) { 703 dev_err(&priv->pdev->dev, 704 "Failed to detach stats report: err=%d\n", err); 705 gve_trigger_reset(priv); 706 } 707 err = gve_adminq_deconfigure_device_resources(priv); 708 if (err) { 709 dev_err(&priv->pdev->dev, 710 "Could not deconfigure device resources: err=%d\n", 711 err); 712 gve_trigger_reset(priv); 713 } 714 } 715 716 kvfree(priv->ptype_lut_dqo); 717 priv->ptype_lut_dqo = NULL; 718 719 gve_free_flow_rule_caches(priv); 720 gve_free_rss_config_cache(priv); 721 gve_free_counter_array(priv); 722 gve_free_notify_blocks(priv); 723 gve_free_stats_report(priv); 724 gve_clear_device_resources_ok(priv); 725 } 726 727 static int gve_unregister_qpl(struct gve_priv *priv, 728 struct gve_queue_page_list *qpl) 729 { 730 int err; 731 732 if (!qpl) 733 return 0; 734 735 err = gve_adminq_unregister_page_list(priv, qpl->id); 736 if (err) { 737 netif_err(priv, drv, priv->dev, 738 "Failed to unregister queue page list %d\n", 739 qpl->id); 740 return err; 741 } 742 743 priv->num_registered_pages -= qpl->num_entries; 744 return 0; 745 } 746 747 static int gve_register_qpl(struct gve_priv *priv, 748 struct gve_queue_page_list *qpl) 749 { 750 int pages; 751 int err; 752 753 if (!qpl) 754 return 0; 755 756 pages = qpl->num_entries; 757 758 if (pages + priv->num_registered_pages > priv->max_registered_pages) { 759 netif_err(priv, drv, priv->dev, 760 "Reached max number of registered pages %llu > %llu\n", 761 pages + priv->num_registered_pages, 762 priv->max_registered_pages); 763 return -EINVAL; 764 } 765 766 err = gve_adminq_register_page_list(priv, qpl); 767 if (err) { 768 netif_err(priv, drv, priv->dev, 769 "failed to register queue page list %d\n", 770 qpl->id); 771 return err; 772 } 773 774 priv->num_registered_pages += pages; 775 return 0; 776 } 777 778 static struct gve_queue_page_list *gve_tx_get_qpl(struct gve_priv *priv, int idx) 779 { 780 struct gve_tx_ring *tx = &priv->tx[idx]; 781 782 if (gve_is_gqi(priv)) 783 return tx->tx_fifo.qpl; 784 else 785 return tx->dqo.qpl; 786 } 787 788 static struct gve_queue_page_list *gve_rx_get_qpl(struct gve_priv *priv, int idx) 789 { 790 struct gve_rx_ring *rx = &priv->rx[idx]; 791 792 if (gve_is_gqi(priv)) 793 return rx->data.qpl; 794 else 795 return rx->dqo.qpl; 796 } 797 798 static int gve_register_qpls(struct gve_priv *priv) 799 { 800 int num_tx_qpls, num_rx_qpls; 801 int err; 802 int i; 803 804 num_tx_qpls = gve_num_tx_qpls(&priv->tx_cfg, gve_is_qpl(priv)); 805 num_rx_qpls = gve_num_rx_qpls(&priv->rx_cfg, gve_is_qpl(priv)); 806 807 for (i = 0; i < num_tx_qpls; i++) { 808 err = gve_register_qpl(priv, gve_tx_get_qpl(priv, i)); 809 if (err) 810 return err; 811 } 812 813 for (i = 0; i < num_rx_qpls; i++) { 814 err = gve_register_qpl(priv, gve_rx_get_qpl(priv, i)); 815 if (err) 816 return err; 817 } 818 819 return 0; 820 } 821 822 static int gve_unregister_qpls(struct gve_priv *priv) 823 { 824 int num_tx_qpls, num_rx_qpls; 825 int err; 826 int i; 827 828 num_tx_qpls = gve_num_tx_qpls(&priv->tx_cfg, gve_is_qpl(priv)); 829 num_rx_qpls = gve_num_rx_qpls(&priv->rx_cfg, gve_is_qpl(priv)); 830 831 for (i = 0; i < num_tx_qpls; i++) { 832 err = gve_unregister_qpl(priv, gve_tx_get_qpl(priv, i)); 833 /* This failure will trigger a reset - no need to clean */ 834 if (err) 835 return err; 836 } 837 838 for (i = 0; i < num_rx_qpls; i++) { 839 err = gve_unregister_qpl(priv, gve_rx_get_qpl(priv, i)); 840 /* This failure will trigger a reset - no need to clean */ 841 if (err) 842 return err; 843 } 844 return 0; 845 } 846 847 static int gve_create_rings(struct gve_priv *priv) 848 { 849 int num_tx_queues = gve_num_tx_queues(priv); 850 int err; 851 int i; 852 853 err = gve_adminq_create_tx_queues(priv, 0, num_tx_queues); 854 if (err) { 855 netif_err(priv, drv, priv->dev, "failed to create %d tx queues\n", 856 num_tx_queues); 857 /* This failure will trigger a reset - no need to clean 858 * up 859 */ 860 return err; 861 } 862 netif_dbg(priv, drv, priv->dev, "created %d tx queues\n", 863 num_tx_queues); 864 865 err = gve_adminq_create_rx_queues(priv, priv->rx_cfg.num_queues); 866 if (err) { 867 netif_err(priv, drv, priv->dev, "failed to create %d rx queues\n", 868 priv->rx_cfg.num_queues); 869 /* This failure will trigger a reset - no need to clean 870 * up 871 */ 872 return err; 873 } 874 netif_dbg(priv, drv, priv->dev, "created %d rx queues\n", 875 priv->rx_cfg.num_queues); 876 877 if (gve_is_gqi(priv)) { 878 /* Rx data ring has been prefilled with packet buffers at queue 879 * allocation time. 880 * 881 * Write the doorbell to provide descriptor slots and packet 882 * buffers to the NIC. 883 */ 884 for (i = 0; i < priv->rx_cfg.num_queues; i++) 885 gve_rx_write_doorbell(priv, &priv->rx[i]); 886 } else { 887 for (i = 0; i < priv->rx_cfg.num_queues; i++) { 888 /* Post buffers and ring doorbell. */ 889 gve_rx_post_buffers_dqo(&priv->rx[i]); 890 } 891 } 892 893 return 0; 894 } 895 896 static void init_xdp_sync_stats(struct gve_priv *priv) 897 { 898 int start_id = gve_xdp_tx_start_queue_id(priv); 899 int i; 900 901 /* Init stats */ 902 for (i = start_id; i < start_id + priv->tx_cfg.num_xdp_queues; i++) { 903 int ntfy_idx = gve_tx_idx_to_ntfy(priv, i); 904 905 u64_stats_init(&priv->tx[i].statss); 906 priv->tx[i].ntfy_id = ntfy_idx; 907 } 908 } 909 910 static void gve_init_sync_stats(struct gve_priv *priv) 911 { 912 int i; 913 914 for (i = 0; i < priv->tx_cfg.num_queues; i++) 915 u64_stats_init(&priv->tx[i].statss); 916 917 /* Init stats for XDP TX queues */ 918 init_xdp_sync_stats(priv); 919 920 for (i = 0; i < priv->rx_cfg.num_queues; i++) 921 u64_stats_init(&priv->rx[i].statss); 922 } 923 924 static void gve_tx_get_curr_alloc_cfg(struct gve_priv *priv, 925 struct gve_tx_alloc_rings_cfg *cfg) 926 { 927 cfg->qcfg = &priv->tx_cfg; 928 cfg->raw_addressing = !gve_is_qpl(priv); 929 cfg->ring_size = priv->tx_desc_cnt; 930 cfg->num_xdp_rings = cfg->qcfg->num_xdp_queues; 931 cfg->tx = priv->tx; 932 } 933 934 static void gve_tx_stop_rings(struct gve_priv *priv, int num_rings) 935 { 936 int i; 937 938 if (!priv->tx) 939 return; 940 941 for (i = 0; i < num_rings; i++) { 942 if (gve_is_gqi(priv)) 943 gve_tx_stop_ring_gqi(priv, i); 944 else 945 gve_tx_stop_ring_dqo(priv, i); 946 } 947 } 948 949 static void gve_tx_start_rings(struct gve_priv *priv, int num_rings) 950 { 951 int i; 952 953 for (i = 0; i < num_rings; i++) { 954 if (gve_is_gqi(priv)) 955 gve_tx_start_ring_gqi(priv, i); 956 else 957 gve_tx_start_ring_dqo(priv, i); 958 } 959 } 960 961 static int gve_queues_mem_alloc(struct gve_priv *priv, 962 struct gve_tx_alloc_rings_cfg *tx_alloc_cfg, 963 struct gve_rx_alloc_rings_cfg *rx_alloc_cfg) 964 { 965 int err; 966 967 if (gve_is_gqi(priv)) 968 err = gve_tx_alloc_rings_gqi(priv, tx_alloc_cfg); 969 else 970 err = gve_tx_alloc_rings_dqo(priv, tx_alloc_cfg); 971 if (err) 972 return err; 973 974 if (gve_is_gqi(priv)) 975 err = gve_rx_alloc_rings_gqi(priv, rx_alloc_cfg); 976 else 977 err = gve_rx_alloc_rings_dqo(priv, rx_alloc_cfg); 978 if (err) 979 goto free_tx; 980 981 return 0; 982 983 free_tx: 984 if (gve_is_gqi(priv)) 985 gve_tx_free_rings_gqi(priv, tx_alloc_cfg); 986 else 987 gve_tx_free_rings_dqo(priv, tx_alloc_cfg); 988 return err; 989 } 990 991 static int gve_destroy_rings(struct gve_priv *priv) 992 { 993 int num_tx_queues = gve_num_tx_queues(priv); 994 int err; 995 996 err = gve_adminq_destroy_tx_queues(priv, 0, num_tx_queues); 997 if (err) { 998 netif_err(priv, drv, priv->dev, 999 "failed to destroy tx queues\n"); 1000 /* This failure will trigger a reset - no need to clean up */ 1001 return err; 1002 } 1003 netif_dbg(priv, drv, priv->dev, "destroyed tx queues\n"); 1004 err = gve_adminq_destroy_rx_queues(priv, priv->rx_cfg.num_queues); 1005 if (err) { 1006 netif_err(priv, drv, priv->dev, 1007 "failed to destroy rx queues\n"); 1008 /* This failure will trigger a reset - no need to clean up */ 1009 return err; 1010 } 1011 netif_dbg(priv, drv, priv->dev, "destroyed rx queues\n"); 1012 return 0; 1013 } 1014 1015 static void gve_queues_mem_free(struct gve_priv *priv, 1016 struct gve_tx_alloc_rings_cfg *tx_cfg, 1017 struct gve_rx_alloc_rings_cfg *rx_cfg) 1018 { 1019 if (gve_is_gqi(priv)) { 1020 gve_tx_free_rings_gqi(priv, tx_cfg); 1021 gve_rx_free_rings_gqi(priv, rx_cfg); 1022 } else { 1023 gve_tx_free_rings_dqo(priv, tx_cfg); 1024 gve_rx_free_rings_dqo(priv, rx_cfg); 1025 } 1026 } 1027 1028 int gve_alloc_page(struct gve_priv *priv, struct device *dev, 1029 struct page **page, dma_addr_t *dma, 1030 enum dma_data_direction dir, gfp_t gfp_flags) 1031 { 1032 *page = alloc_page(gfp_flags); 1033 if (!*page) { 1034 priv->page_alloc_fail++; 1035 return -ENOMEM; 1036 } 1037 *dma = dma_map_page(dev, *page, 0, PAGE_SIZE, dir); 1038 if (dma_mapping_error(dev, *dma)) { 1039 priv->dma_mapping_error++; 1040 put_page(*page); 1041 return -ENOMEM; 1042 } 1043 return 0; 1044 } 1045 1046 struct gve_queue_page_list *gve_alloc_queue_page_list(struct gve_priv *priv, 1047 u32 id, int pages) 1048 { 1049 struct gve_queue_page_list *qpl; 1050 int err; 1051 int i; 1052 1053 qpl = kvzalloc(sizeof(*qpl), GFP_KERNEL); 1054 if (!qpl) 1055 return NULL; 1056 1057 qpl->id = id; 1058 qpl->num_entries = 0; 1059 qpl->pages = kvcalloc(pages, sizeof(*qpl->pages), GFP_KERNEL); 1060 if (!qpl->pages) 1061 goto abort; 1062 1063 qpl->page_buses = kvcalloc(pages, sizeof(*qpl->page_buses), GFP_KERNEL); 1064 if (!qpl->page_buses) 1065 goto abort; 1066 1067 for (i = 0; i < pages; i++) { 1068 err = gve_alloc_page(priv, &priv->pdev->dev, &qpl->pages[i], 1069 &qpl->page_buses[i], 1070 gve_qpl_dma_dir(priv, id), GFP_KERNEL); 1071 if (err) 1072 goto abort; 1073 qpl->num_entries++; 1074 } 1075 1076 return qpl; 1077 1078 abort: 1079 gve_free_queue_page_list(priv, qpl, id); 1080 return NULL; 1081 } 1082 1083 void gve_free_page(struct device *dev, struct page *page, dma_addr_t dma, 1084 enum dma_data_direction dir) 1085 { 1086 if (!dma_mapping_error(dev, dma)) 1087 dma_unmap_page(dev, dma, PAGE_SIZE, dir); 1088 if (page) 1089 put_page(page); 1090 } 1091 1092 void gve_free_queue_page_list(struct gve_priv *priv, 1093 struct gve_queue_page_list *qpl, 1094 u32 id) 1095 { 1096 int i; 1097 1098 if (!qpl) 1099 return; 1100 if (!qpl->pages) 1101 goto free_qpl; 1102 if (!qpl->page_buses) 1103 goto free_pages; 1104 1105 for (i = 0; i < qpl->num_entries; i++) 1106 gve_free_page(&priv->pdev->dev, qpl->pages[i], 1107 qpl->page_buses[i], gve_qpl_dma_dir(priv, id)); 1108 1109 kvfree(qpl->page_buses); 1110 qpl->page_buses = NULL; 1111 free_pages: 1112 kvfree(qpl->pages); 1113 qpl->pages = NULL; 1114 free_qpl: 1115 kvfree(qpl); 1116 } 1117 1118 /* Use this to schedule a reset when the device is capable of continuing 1119 * to handle other requests in its current state. If it is not, do a reset 1120 * in thread instead. 1121 */ 1122 void gve_schedule_reset(struct gve_priv *priv) 1123 { 1124 gve_set_do_reset(priv); 1125 queue_work(priv->gve_wq, &priv->service_task); 1126 } 1127 1128 static void gve_reset_and_teardown(struct gve_priv *priv, bool was_up); 1129 static int gve_reset_recovery(struct gve_priv *priv, bool was_up); 1130 static void gve_turndown(struct gve_priv *priv); 1131 static void gve_turnup(struct gve_priv *priv); 1132 1133 static int gve_reg_xdp_info(struct gve_priv *priv, struct net_device *dev) 1134 { 1135 struct napi_struct *napi; 1136 struct gve_rx_ring *rx; 1137 int err = 0; 1138 int i, j; 1139 u32 tx_qid; 1140 1141 if (!priv->tx_cfg.num_xdp_queues) 1142 return 0; 1143 1144 for (i = 0; i < priv->rx_cfg.num_queues; i++) { 1145 rx = &priv->rx[i]; 1146 napi = &priv->ntfy_blocks[rx->ntfy_id].napi; 1147 1148 err = xdp_rxq_info_reg(&rx->xdp_rxq, dev, i, 1149 napi->napi_id); 1150 if (err) 1151 goto err; 1152 if (gve_is_qpl(priv)) 1153 err = xdp_rxq_info_reg_mem_model(&rx->xdp_rxq, 1154 MEM_TYPE_PAGE_SHARED, 1155 NULL); 1156 else 1157 err = xdp_rxq_info_reg_mem_model(&rx->xdp_rxq, 1158 MEM_TYPE_PAGE_POOL, 1159 rx->dqo.page_pool); 1160 if (err) 1161 goto err; 1162 rx->xsk_pool = xsk_get_pool_from_qid(dev, i); 1163 if (rx->xsk_pool) { 1164 err = xdp_rxq_info_reg(&rx->xsk_rxq, dev, i, 1165 napi->napi_id); 1166 if (err) 1167 goto err; 1168 err = xdp_rxq_info_reg_mem_model(&rx->xsk_rxq, 1169 MEM_TYPE_XSK_BUFF_POOL, NULL); 1170 if (err) 1171 goto err; 1172 xsk_pool_set_rxq_info(rx->xsk_pool, 1173 &rx->xsk_rxq); 1174 } 1175 } 1176 1177 for (i = 0; i < priv->tx_cfg.num_xdp_queues; i++) { 1178 tx_qid = gve_xdp_tx_queue_id(priv, i); 1179 priv->tx[tx_qid].xsk_pool = xsk_get_pool_from_qid(dev, i); 1180 } 1181 return 0; 1182 1183 err: 1184 for (j = i; j >= 0; j--) { 1185 rx = &priv->rx[j]; 1186 if (xdp_rxq_info_is_reg(&rx->xdp_rxq)) 1187 xdp_rxq_info_unreg(&rx->xdp_rxq); 1188 if (xdp_rxq_info_is_reg(&rx->xsk_rxq)) 1189 xdp_rxq_info_unreg(&rx->xsk_rxq); 1190 } 1191 return err; 1192 } 1193 1194 static void gve_unreg_xdp_info(struct gve_priv *priv) 1195 { 1196 int i, tx_qid; 1197 1198 if (!priv->tx_cfg.num_xdp_queues || !priv->rx || !priv->tx) 1199 return; 1200 1201 for (i = 0; i < priv->rx_cfg.num_queues; i++) { 1202 struct gve_rx_ring *rx = &priv->rx[i]; 1203 1204 xdp_rxq_info_unreg(&rx->xdp_rxq); 1205 if (rx->xsk_pool) { 1206 xdp_rxq_info_unreg(&rx->xsk_rxq); 1207 rx->xsk_pool = NULL; 1208 } 1209 } 1210 1211 for (i = 0; i < priv->tx_cfg.num_xdp_queues; i++) { 1212 tx_qid = gve_xdp_tx_queue_id(priv, i); 1213 priv->tx[tx_qid].xsk_pool = NULL; 1214 } 1215 } 1216 1217 static void gve_drain_page_cache(struct gve_priv *priv) 1218 { 1219 int i; 1220 1221 for (i = 0; i < priv->rx_cfg.num_queues; i++) 1222 page_frag_cache_drain(&priv->rx[i].page_cache); 1223 } 1224 1225 static void gve_rx_get_curr_alloc_cfg(struct gve_priv *priv, 1226 struct gve_rx_alloc_rings_cfg *cfg) 1227 { 1228 cfg->qcfg_rx = &priv->rx_cfg; 1229 cfg->qcfg_tx = &priv->tx_cfg; 1230 cfg->raw_addressing = !gve_is_qpl(priv); 1231 cfg->enable_header_split = priv->header_split_enabled; 1232 cfg->ring_size = priv->rx_desc_cnt; 1233 cfg->packet_buffer_size = priv->rx_cfg.packet_buffer_size; 1234 cfg->rx = priv->rx; 1235 cfg->xdp = !!cfg->qcfg_tx->num_xdp_queues; 1236 } 1237 1238 void gve_get_curr_alloc_cfgs(struct gve_priv *priv, 1239 struct gve_tx_alloc_rings_cfg *tx_alloc_cfg, 1240 struct gve_rx_alloc_rings_cfg *rx_alloc_cfg) 1241 { 1242 gve_tx_get_curr_alloc_cfg(priv, tx_alloc_cfg); 1243 gve_rx_get_curr_alloc_cfg(priv, rx_alloc_cfg); 1244 } 1245 1246 static void gve_rx_start_ring(struct gve_priv *priv, int i) 1247 { 1248 if (gve_is_gqi(priv)) 1249 gve_rx_start_ring_gqi(priv, i); 1250 else 1251 gve_rx_start_ring_dqo(priv, i); 1252 } 1253 1254 static void gve_rx_start_rings(struct gve_priv *priv, int num_rings) 1255 { 1256 int i; 1257 1258 for (i = 0; i < num_rings; i++) 1259 gve_rx_start_ring(priv, i); 1260 } 1261 1262 static void gve_rx_stop_ring(struct gve_priv *priv, int i) 1263 { 1264 if (gve_is_gqi(priv)) 1265 gve_rx_stop_ring_gqi(priv, i); 1266 else 1267 gve_rx_stop_ring_dqo(priv, i); 1268 } 1269 1270 static void gve_rx_stop_rings(struct gve_priv *priv, int num_rings) 1271 { 1272 int i; 1273 1274 if (!priv->rx) 1275 return; 1276 1277 for (i = 0; i < num_rings; i++) 1278 gve_rx_stop_ring(priv, i); 1279 } 1280 1281 static void gve_queues_mem_remove(struct gve_priv *priv) 1282 { 1283 struct gve_tx_alloc_rings_cfg tx_alloc_cfg = {0}; 1284 struct gve_rx_alloc_rings_cfg rx_alloc_cfg = {0}; 1285 1286 gve_get_curr_alloc_cfgs(priv, &tx_alloc_cfg, &rx_alloc_cfg); 1287 gve_queues_mem_free(priv, &tx_alloc_cfg, &rx_alloc_cfg); 1288 priv->tx = NULL; 1289 priv->rx = NULL; 1290 } 1291 1292 /* The passed-in queue memory is stored into priv and the queues are made live. 1293 * No memory is allocated. Passed-in memory is freed on errors. 1294 */ 1295 static int gve_queues_start(struct gve_priv *priv, 1296 struct gve_tx_alloc_rings_cfg *tx_alloc_cfg, 1297 struct gve_rx_alloc_rings_cfg *rx_alloc_cfg) 1298 { 1299 struct net_device *dev = priv->dev; 1300 int err; 1301 1302 /* Record new resources into priv */ 1303 priv->tx = tx_alloc_cfg->tx; 1304 priv->rx = rx_alloc_cfg->rx; 1305 1306 /* Record new configs into priv */ 1307 priv->tx_cfg = *tx_alloc_cfg->qcfg; 1308 priv->tx_cfg.num_xdp_queues = tx_alloc_cfg->num_xdp_rings; 1309 priv->rx_cfg = *rx_alloc_cfg->qcfg_rx; 1310 priv->tx_desc_cnt = tx_alloc_cfg->ring_size; 1311 priv->rx_desc_cnt = rx_alloc_cfg->ring_size; 1312 1313 gve_tx_start_rings(priv, gve_num_tx_queues(priv)); 1314 gve_rx_start_rings(priv, rx_alloc_cfg->qcfg_rx->num_queues); 1315 gve_init_sync_stats(priv); 1316 1317 err = netif_set_real_num_tx_queues(dev, priv->tx_cfg.num_queues); 1318 if (err) 1319 goto stop_and_free_rings; 1320 err = netif_set_real_num_rx_queues(dev, priv->rx_cfg.num_queues); 1321 if (err) 1322 goto stop_and_free_rings; 1323 1324 err = gve_reg_xdp_info(priv, dev); 1325 if (err) 1326 goto stop_and_free_rings; 1327 1328 if (rx_alloc_cfg->reset_rss) { 1329 err = gve_init_rss_config(priv, priv->rx_cfg.num_queues); 1330 if (err) 1331 goto reset; 1332 } 1333 1334 err = gve_register_qpls(priv); 1335 if (err) 1336 goto reset; 1337 1338 priv->header_split_enabled = rx_alloc_cfg->enable_header_split; 1339 priv->rx_cfg.packet_buffer_size = rx_alloc_cfg->packet_buffer_size; 1340 1341 err = gve_create_rings(priv); 1342 if (err) 1343 goto reset; 1344 1345 gve_set_device_rings_ok(priv); 1346 1347 if (gve_get_report_stats(priv)) 1348 mod_timer(&priv->stats_report_timer, 1349 round_jiffies(jiffies + 1350 msecs_to_jiffies(priv->stats_report_timer_period))); 1351 1352 gve_turnup(priv); 1353 queue_work(priv->gve_wq, &priv->service_task); 1354 priv->interface_up_cnt++; 1355 return 0; 1356 1357 reset: 1358 if (gve_get_reset_in_progress(priv)) 1359 goto stop_and_free_rings; 1360 gve_reset_and_teardown(priv, true); 1361 /* if this fails there is nothing we can do so just ignore the return */ 1362 gve_reset_recovery(priv, false); 1363 /* return the original error */ 1364 return err; 1365 stop_and_free_rings: 1366 gve_tx_stop_rings(priv, gve_num_tx_queues(priv)); 1367 gve_rx_stop_rings(priv, priv->rx_cfg.num_queues); 1368 gve_queues_mem_remove(priv); 1369 return err; 1370 } 1371 1372 static int gve_open(struct net_device *dev) 1373 { 1374 struct gve_tx_alloc_rings_cfg tx_alloc_cfg = {0}; 1375 struct gve_rx_alloc_rings_cfg rx_alloc_cfg = {0}; 1376 struct gve_priv *priv = netdev_priv(dev); 1377 int err; 1378 1379 gve_get_curr_alloc_cfgs(priv, &tx_alloc_cfg, &rx_alloc_cfg); 1380 1381 err = gve_queues_mem_alloc(priv, &tx_alloc_cfg, &rx_alloc_cfg); 1382 if (err) 1383 return err; 1384 1385 /* No need to free on error: ownership of resources is lost after 1386 * calling gve_queues_start. 1387 */ 1388 err = gve_queues_start(priv, &tx_alloc_cfg, &rx_alloc_cfg); 1389 if (err) 1390 return err; 1391 1392 return 0; 1393 } 1394 1395 static int gve_queues_stop(struct gve_priv *priv) 1396 { 1397 int err; 1398 1399 netif_carrier_off(priv->dev); 1400 if (gve_get_device_rings_ok(priv)) { 1401 gve_turndown(priv); 1402 gve_drain_page_cache(priv); 1403 err = gve_destroy_rings(priv); 1404 if (err) 1405 goto err; 1406 err = gve_unregister_qpls(priv); 1407 if (err) 1408 goto err; 1409 gve_clear_device_rings_ok(priv); 1410 } 1411 timer_delete_sync(&priv->stats_report_timer); 1412 1413 gve_unreg_xdp_info(priv); 1414 1415 gve_tx_stop_rings(priv, gve_num_tx_queues(priv)); 1416 gve_rx_stop_rings(priv, priv->rx_cfg.num_queues); 1417 1418 priv->interface_down_cnt++; 1419 return 0; 1420 1421 err: 1422 /* This must have been called from a reset due to the rtnl lock 1423 * so just return at this point. 1424 */ 1425 if (gve_get_reset_in_progress(priv)) 1426 return err; 1427 /* Otherwise reset before returning */ 1428 gve_reset_and_teardown(priv, true); 1429 return gve_reset_recovery(priv, false); 1430 } 1431 1432 static int gve_close(struct net_device *dev) 1433 { 1434 struct gve_priv *priv = netdev_priv(dev); 1435 int err; 1436 1437 err = gve_queues_stop(priv); 1438 if (err) 1439 return err; 1440 1441 gve_queues_mem_remove(priv); 1442 return 0; 1443 } 1444 1445 static void gve_handle_link_status(struct gve_priv *priv, bool link_status) 1446 { 1447 if (!gve_get_napi_enabled(priv)) 1448 return; 1449 1450 if (link_status == netif_carrier_ok(priv->dev)) 1451 return; 1452 1453 if (link_status) { 1454 netdev_info(priv->dev, "Device link is up.\n"); 1455 netif_carrier_on(priv->dev); 1456 } else { 1457 netdev_info(priv->dev, "Device link is down.\n"); 1458 netif_carrier_off(priv->dev); 1459 } 1460 } 1461 1462 static int gve_configure_rings_xdp(struct gve_priv *priv, 1463 u16 num_xdp_rings) 1464 { 1465 struct gve_tx_alloc_rings_cfg tx_alloc_cfg = {0}; 1466 struct gve_rx_alloc_rings_cfg rx_alloc_cfg = {0}; 1467 1468 gve_get_curr_alloc_cfgs(priv, &tx_alloc_cfg, &rx_alloc_cfg); 1469 tx_alloc_cfg.num_xdp_rings = num_xdp_rings; 1470 1471 rx_alloc_cfg.xdp = !!num_xdp_rings; 1472 return gve_adjust_config(priv, &tx_alloc_cfg, &rx_alloc_cfg); 1473 } 1474 1475 static int gve_set_xdp(struct gve_priv *priv, struct bpf_prog *prog, 1476 struct netlink_ext_ack *extack) 1477 { 1478 struct bpf_prog *old_prog; 1479 int err = 0; 1480 u32 status; 1481 1482 old_prog = READ_ONCE(priv->xdp_prog); 1483 if (!netif_running(priv->dev)) { 1484 WRITE_ONCE(priv->xdp_prog, prog); 1485 if (old_prog) 1486 bpf_prog_put(old_prog); 1487 1488 /* Update priv XDP queue configuration */ 1489 priv->tx_cfg.num_xdp_queues = priv->xdp_prog ? 1490 priv->rx_cfg.num_queues : 0; 1491 return 0; 1492 } 1493 1494 if (!old_prog && prog) 1495 err = gve_configure_rings_xdp(priv, priv->rx_cfg.num_queues); 1496 else if (old_prog && !prog) 1497 err = gve_configure_rings_xdp(priv, 0); 1498 1499 if (err) 1500 goto out; 1501 1502 WRITE_ONCE(priv->xdp_prog, prog); 1503 if (old_prog) 1504 bpf_prog_put(old_prog); 1505 1506 out: 1507 status = ioread32be(&priv->reg_bar0->device_status); 1508 gve_handle_link_status(priv, GVE_DEVICE_STATUS_LINK_STATUS_MASK & status); 1509 return err; 1510 } 1511 1512 static int gve_xsk_pool_enable(struct net_device *dev, 1513 struct xsk_buff_pool *pool, 1514 u16 qid) 1515 { 1516 struct gve_priv *priv = netdev_priv(dev); 1517 struct napi_struct *napi; 1518 struct gve_rx_ring *rx; 1519 int tx_qid; 1520 int err; 1521 1522 if (qid >= priv->rx_cfg.num_queues) { 1523 dev_err(&priv->pdev->dev, "xsk pool invalid qid %d", qid); 1524 return -EINVAL; 1525 } 1526 if (xsk_pool_get_rx_frame_size(pool) < 1527 priv->dev->max_mtu + sizeof(struct ethhdr)) { 1528 dev_err(&priv->pdev->dev, "xsk pool frame_len too small"); 1529 return -EINVAL; 1530 } 1531 1532 err = xsk_pool_dma_map(pool, &priv->pdev->dev, 1533 DMA_ATTR_SKIP_CPU_SYNC | DMA_ATTR_WEAK_ORDERING); 1534 if (err) 1535 return err; 1536 1537 /* If XDP prog is not installed or interface is down, return. */ 1538 if (!priv->xdp_prog || !netif_running(dev)) 1539 return 0; 1540 1541 rx = &priv->rx[qid]; 1542 napi = &priv->ntfy_blocks[rx->ntfy_id].napi; 1543 err = xdp_rxq_info_reg(&rx->xsk_rxq, dev, qid, napi->napi_id); 1544 if (err) 1545 goto err; 1546 1547 err = xdp_rxq_info_reg_mem_model(&rx->xsk_rxq, 1548 MEM_TYPE_XSK_BUFF_POOL, NULL); 1549 if (err) 1550 goto err; 1551 1552 xsk_pool_set_rxq_info(pool, &rx->xsk_rxq); 1553 rx->xsk_pool = pool; 1554 1555 tx_qid = gve_xdp_tx_queue_id(priv, qid); 1556 priv->tx[tx_qid].xsk_pool = pool; 1557 1558 return 0; 1559 err: 1560 if (xdp_rxq_info_is_reg(&rx->xsk_rxq)) 1561 xdp_rxq_info_unreg(&rx->xsk_rxq); 1562 1563 xsk_pool_dma_unmap(pool, 1564 DMA_ATTR_SKIP_CPU_SYNC | DMA_ATTR_WEAK_ORDERING); 1565 return err; 1566 } 1567 1568 static int gve_xsk_pool_disable(struct net_device *dev, 1569 u16 qid) 1570 { 1571 struct gve_priv *priv = netdev_priv(dev); 1572 struct napi_struct *napi_rx; 1573 struct napi_struct *napi_tx; 1574 struct xsk_buff_pool *pool; 1575 int tx_qid; 1576 1577 pool = xsk_get_pool_from_qid(dev, qid); 1578 if (!pool) 1579 return -EINVAL; 1580 if (qid >= priv->rx_cfg.num_queues) 1581 return -EINVAL; 1582 1583 /* If XDP prog is not installed or interface is down, unmap DMA and 1584 * return. 1585 */ 1586 if (!priv->xdp_prog || !netif_running(dev)) 1587 goto done; 1588 1589 napi_rx = &priv->ntfy_blocks[priv->rx[qid].ntfy_id].napi; 1590 napi_disable(napi_rx); /* make sure current rx poll is done */ 1591 1592 tx_qid = gve_xdp_tx_queue_id(priv, qid); 1593 napi_tx = &priv->ntfy_blocks[priv->tx[tx_qid].ntfy_id].napi; 1594 napi_disable(napi_tx); /* make sure current tx poll is done */ 1595 1596 priv->rx[qid].xsk_pool = NULL; 1597 xdp_rxq_info_unreg(&priv->rx[qid].xsk_rxq); 1598 priv->tx[tx_qid].xsk_pool = NULL; 1599 smp_mb(); /* Make sure it is visible to the workers on datapath */ 1600 1601 napi_enable(napi_rx); 1602 if (gve_rx_work_pending(&priv->rx[qid])) 1603 napi_schedule(napi_rx); 1604 1605 napi_enable(napi_tx); 1606 if (gve_tx_clean_pending(priv, &priv->tx[tx_qid])) 1607 napi_schedule(napi_tx); 1608 1609 done: 1610 xsk_pool_dma_unmap(pool, 1611 DMA_ATTR_SKIP_CPU_SYNC | DMA_ATTR_WEAK_ORDERING); 1612 return 0; 1613 } 1614 1615 static int gve_xsk_wakeup(struct net_device *dev, u32 queue_id, u32 flags) 1616 { 1617 struct gve_priv *priv = netdev_priv(dev); 1618 struct napi_struct *napi; 1619 1620 if (!gve_get_napi_enabled(priv)) 1621 return -ENETDOWN; 1622 1623 if (queue_id >= priv->rx_cfg.num_queues || !priv->xdp_prog) 1624 return -EINVAL; 1625 1626 napi = &priv->ntfy_blocks[gve_rx_idx_to_ntfy(priv, queue_id)].napi; 1627 if (!napi_if_scheduled_mark_missed(napi)) { 1628 /* Call local_bh_enable to trigger SoftIRQ processing */ 1629 local_bh_disable(); 1630 napi_schedule(napi); 1631 local_bh_enable(); 1632 } 1633 1634 return 0; 1635 } 1636 1637 static int verify_xdp_configuration(struct net_device *dev) 1638 { 1639 struct gve_priv *priv = netdev_priv(dev); 1640 u16 max_xdp_mtu; 1641 1642 if (dev->features & NETIF_F_LRO) { 1643 netdev_warn(dev, "XDP is not supported when LRO is on.\n"); 1644 return -EOPNOTSUPP; 1645 } 1646 1647 if (priv->queue_format != GVE_GQI_QPL_FORMAT) { 1648 netdev_warn(dev, "XDP is not supported in mode %d.\n", 1649 priv->queue_format); 1650 return -EOPNOTSUPP; 1651 } 1652 1653 max_xdp_mtu = priv->rx_cfg.packet_buffer_size - sizeof(struct ethhdr); 1654 if (priv->queue_format == GVE_GQI_QPL_FORMAT) 1655 max_xdp_mtu -= GVE_RX_PAD; 1656 1657 if (dev->mtu > max_xdp_mtu) { 1658 netdev_warn(dev, "XDP is not supported for mtu %d.\n", 1659 dev->mtu); 1660 return -EOPNOTSUPP; 1661 } 1662 1663 if (priv->rx_cfg.num_queues != priv->tx_cfg.num_queues || 1664 (2 * priv->tx_cfg.num_queues > priv->tx_cfg.max_queues)) { 1665 netdev_warn(dev, "XDP load failed: The number of configured RX queues %d should be equal to the number of configured TX queues %d and the number of configured RX/TX queues should be less than or equal to half the maximum number of RX/TX queues %d", 1666 priv->rx_cfg.num_queues, 1667 priv->tx_cfg.num_queues, 1668 priv->tx_cfg.max_queues); 1669 return -EINVAL; 1670 } 1671 return 0; 1672 } 1673 1674 static int gve_xdp(struct net_device *dev, struct netdev_bpf *xdp) 1675 { 1676 struct gve_priv *priv = netdev_priv(dev); 1677 int err; 1678 1679 err = verify_xdp_configuration(dev); 1680 if (err) 1681 return err; 1682 switch (xdp->command) { 1683 case XDP_SETUP_PROG: 1684 return gve_set_xdp(priv, xdp->prog, xdp->extack); 1685 case XDP_SETUP_XSK_POOL: 1686 if (xdp->xsk.pool) 1687 return gve_xsk_pool_enable(dev, xdp->xsk.pool, xdp->xsk.queue_id); 1688 else 1689 return gve_xsk_pool_disable(dev, xdp->xsk.queue_id); 1690 default: 1691 return -EINVAL; 1692 } 1693 } 1694 1695 int gve_init_rss_config(struct gve_priv *priv, u16 num_queues) 1696 { 1697 struct gve_rss_config *rss_config = &priv->rss_config; 1698 struct ethtool_rxfh_param rxfh = {0}; 1699 u16 i; 1700 1701 if (!priv->cache_rss_config) 1702 return 0; 1703 1704 for (i = 0; i < priv->rss_lut_size; i++) 1705 rss_config->hash_lut[i] = 1706 ethtool_rxfh_indir_default(i, num_queues); 1707 1708 netdev_rss_key_fill(rss_config->hash_key, priv->rss_key_size); 1709 1710 rxfh.hfunc = ETH_RSS_HASH_TOP; 1711 1712 return gve_adminq_configure_rss(priv, &rxfh); 1713 } 1714 1715 int gve_flow_rules_reset(struct gve_priv *priv) 1716 { 1717 if (!priv->max_flow_rules) 1718 return 0; 1719 1720 return gve_adminq_reset_flow_rules(priv); 1721 } 1722 1723 int gve_adjust_config(struct gve_priv *priv, 1724 struct gve_tx_alloc_rings_cfg *tx_alloc_cfg, 1725 struct gve_rx_alloc_rings_cfg *rx_alloc_cfg) 1726 { 1727 int err; 1728 1729 /* Allocate resources for the new confiugration */ 1730 err = gve_queues_mem_alloc(priv, tx_alloc_cfg, rx_alloc_cfg); 1731 if (err) { 1732 netif_err(priv, drv, priv->dev, 1733 "Adjust config failed to alloc new queues"); 1734 return err; 1735 } 1736 1737 /* Teardown the device and free existing resources */ 1738 err = gve_close(priv->dev); 1739 if (err) { 1740 netif_err(priv, drv, priv->dev, 1741 "Adjust config failed to close old queues"); 1742 gve_queues_mem_free(priv, tx_alloc_cfg, rx_alloc_cfg); 1743 return err; 1744 } 1745 1746 /* Bring the device back up again with the new resources. */ 1747 err = gve_queues_start(priv, tx_alloc_cfg, rx_alloc_cfg); 1748 if (err) { 1749 netif_err(priv, drv, priv->dev, 1750 "Adjust config failed to start new queues, !!! DISABLING ALL QUEUES !!!\n"); 1751 /* No need to free on error: ownership of resources is lost after 1752 * calling gve_queues_start. 1753 */ 1754 gve_turndown(priv); 1755 return err; 1756 } 1757 1758 return 0; 1759 } 1760 1761 int gve_adjust_queues(struct gve_priv *priv, 1762 struct gve_rx_queue_config new_rx_config, 1763 struct gve_tx_queue_config new_tx_config, 1764 bool reset_rss) 1765 { 1766 struct gve_tx_alloc_rings_cfg tx_alloc_cfg = {0}; 1767 struct gve_rx_alloc_rings_cfg rx_alloc_cfg = {0}; 1768 int err; 1769 1770 gve_get_curr_alloc_cfgs(priv, &tx_alloc_cfg, &rx_alloc_cfg); 1771 1772 /* Relay the new config from ethtool */ 1773 tx_alloc_cfg.qcfg = &new_tx_config; 1774 rx_alloc_cfg.qcfg_tx = &new_tx_config; 1775 rx_alloc_cfg.qcfg_rx = &new_rx_config; 1776 rx_alloc_cfg.reset_rss = reset_rss; 1777 1778 if (netif_running(priv->dev)) { 1779 err = gve_adjust_config(priv, &tx_alloc_cfg, &rx_alloc_cfg); 1780 return err; 1781 } 1782 /* Set the config for the next up. */ 1783 if (reset_rss) { 1784 err = gve_init_rss_config(priv, new_rx_config.num_queues); 1785 if (err) 1786 return err; 1787 } 1788 priv->tx_cfg = new_tx_config; 1789 priv->rx_cfg = new_rx_config; 1790 1791 return 0; 1792 } 1793 1794 static void gve_turndown(struct gve_priv *priv) 1795 { 1796 int idx; 1797 1798 if (netif_carrier_ok(priv->dev)) 1799 netif_carrier_off(priv->dev); 1800 1801 if (!gve_get_napi_enabled(priv)) 1802 return; 1803 1804 /* Disable napi to prevent more work from coming in */ 1805 for (idx = 0; idx < gve_num_tx_queues(priv); idx++) { 1806 int ntfy_idx = gve_tx_idx_to_ntfy(priv, idx); 1807 struct gve_notify_block *block = &priv->ntfy_blocks[ntfy_idx]; 1808 1809 if (!gve_tx_was_added_to_block(priv, idx)) 1810 continue; 1811 1812 if (idx < priv->tx_cfg.num_queues) 1813 netif_queue_set_napi(priv->dev, idx, 1814 NETDEV_QUEUE_TYPE_TX, NULL); 1815 1816 napi_disable_locked(&block->napi); 1817 } 1818 for (idx = 0; idx < priv->rx_cfg.num_queues; idx++) { 1819 int ntfy_idx = gve_rx_idx_to_ntfy(priv, idx); 1820 struct gve_notify_block *block = &priv->ntfy_blocks[ntfy_idx]; 1821 1822 if (!gve_rx_was_added_to_block(priv, idx)) 1823 continue; 1824 1825 netif_queue_set_napi(priv->dev, idx, NETDEV_QUEUE_TYPE_RX, 1826 NULL); 1827 napi_disable_locked(&block->napi); 1828 } 1829 1830 /* Stop tx queues */ 1831 netif_tx_disable(priv->dev); 1832 1833 xdp_features_clear_redirect_target(priv->dev); 1834 1835 gve_clear_napi_enabled(priv); 1836 gve_clear_report_stats(priv); 1837 1838 /* Make sure that all traffic is finished processing. */ 1839 synchronize_net(); 1840 } 1841 1842 static void gve_turnup(struct gve_priv *priv) 1843 { 1844 int idx; 1845 1846 /* Start the tx queues */ 1847 netif_tx_start_all_queues(priv->dev); 1848 1849 /* Enable napi and unmask interrupts for all queues */ 1850 for (idx = 0; idx < gve_num_tx_queues(priv); idx++) { 1851 int ntfy_idx = gve_tx_idx_to_ntfy(priv, idx); 1852 struct gve_notify_block *block = &priv->ntfy_blocks[ntfy_idx]; 1853 1854 if (!gve_tx_was_added_to_block(priv, idx)) 1855 continue; 1856 1857 napi_enable_locked(&block->napi); 1858 1859 if (idx < priv->tx_cfg.num_queues) 1860 netif_queue_set_napi(priv->dev, idx, 1861 NETDEV_QUEUE_TYPE_TX, 1862 &block->napi); 1863 1864 if (gve_is_gqi(priv)) { 1865 iowrite32be(0, gve_irq_doorbell(priv, block)); 1866 } else { 1867 gve_set_itr_coalesce_usecs_dqo(priv, block, 1868 priv->tx_coalesce_usecs); 1869 } 1870 1871 /* Any descs written by the NIC before this barrier will be 1872 * handled by the one-off napi schedule below. Whereas any 1873 * descs after the barrier will generate interrupts. 1874 */ 1875 mb(); 1876 napi_schedule(&block->napi); 1877 } 1878 for (idx = 0; idx < priv->rx_cfg.num_queues; idx++) { 1879 int ntfy_idx = gve_rx_idx_to_ntfy(priv, idx); 1880 struct gve_notify_block *block = &priv->ntfy_blocks[ntfy_idx]; 1881 1882 if (!gve_rx_was_added_to_block(priv, idx)) 1883 continue; 1884 1885 napi_enable_locked(&block->napi); 1886 netif_queue_set_napi(priv->dev, idx, NETDEV_QUEUE_TYPE_RX, 1887 &block->napi); 1888 1889 if (gve_is_gqi(priv)) { 1890 iowrite32be(0, gve_irq_doorbell(priv, block)); 1891 } else { 1892 gve_set_itr_coalesce_usecs_dqo(priv, block, 1893 priv->rx_coalesce_usecs); 1894 } 1895 1896 /* Any descs written by the NIC before this barrier will be 1897 * handled by the one-off napi schedule below. Whereas any 1898 * descs after the barrier will generate interrupts. 1899 */ 1900 mb(); 1901 napi_schedule(&block->napi); 1902 } 1903 1904 if (priv->tx_cfg.num_xdp_queues && gve_supports_xdp_xmit(priv)) 1905 xdp_features_set_redirect_target(priv->dev, false); 1906 1907 gve_set_napi_enabled(priv); 1908 } 1909 1910 static void gve_turnup_and_check_status(struct gve_priv *priv) 1911 { 1912 u32 status; 1913 1914 gve_turnup(priv); 1915 status = ioread32be(&priv->reg_bar0->device_status); 1916 gve_handle_link_status(priv, GVE_DEVICE_STATUS_LINK_STATUS_MASK & status); 1917 } 1918 1919 static void gve_tx_timeout(struct net_device *dev, unsigned int txqueue) 1920 { 1921 struct gve_notify_block *block; 1922 struct gve_tx_ring *tx = NULL; 1923 struct gve_priv *priv; 1924 u32 last_nic_done; 1925 u32 current_time; 1926 u32 ntfy_idx; 1927 1928 netdev_info(dev, "Timeout on tx queue, %d", txqueue); 1929 priv = netdev_priv(dev); 1930 if (txqueue > priv->tx_cfg.num_queues) 1931 goto reset; 1932 1933 ntfy_idx = gve_tx_idx_to_ntfy(priv, txqueue); 1934 if (ntfy_idx >= priv->num_ntfy_blks) 1935 goto reset; 1936 1937 block = &priv->ntfy_blocks[ntfy_idx]; 1938 tx = block->tx; 1939 1940 current_time = jiffies_to_msecs(jiffies); 1941 if (tx->last_kick_msec + MIN_TX_TIMEOUT_GAP > current_time) 1942 goto reset; 1943 1944 /* Check to see if there are missed completions, which will allow us to 1945 * kick the queue. 1946 */ 1947 last_nic_done = gve_tx_load_event_counter(priv, tx); 1948 if (last_nic_done - tx->done) { 1949 netdev_info(dev, "Kicking queue %d", txqueue); 1950 iowrite32be(GVE_IRQ_MASK, gve_irq_doorbell(priv, block)); 1951 napi_schedule(&block->napi); 1952 tx->last_kick_msec = current_time; 1953 goto out; 1954 } // Else reset. 1955 1956 reset: 1957 gve_schedule_reset(priv); 1958 1959 out: 1960 if (tx) 1961 tx->queue_timeout++; 1962 priv->tx_timeo_cnt++; 1963 } 1964 1965 u16 gve_get_pkt_buf_size(const struct gve_priv *priv, bool enable_hsplit) 1966 { 1967 if (enable_hsplit && priv->max_rx_buffer_size >= GVE_MAX_RX_BUFFER_SIZE) 1968 return GVE_MAX_RX_BUFFER_SIZE; 1969 else 1970 return GVE_DEFAULT_RX_BUFFER_SIZE; 1971 } 1972 1973 /* header-split is not supported on non-DQO_RDA yet even if device advertises it */ 1974 bool gve_header_split_supported(const struct gve_priv *priv) 1975 { 1976 return priv->header_buf_size && priv->queue_format == GVE_DQO_RDA_FORMAT; 1977 } 1978 1979 int gve_set_hsplit_config(struct gve_priv *priv, u8 tcp_data_split) 1980 { 1981 struct gve_tx_alloc_rings_cfg tx_alloc_cfg = {0}; 1982 struct gve_rx_alloc_rings_cfg rx_alloc_cfg = {0}; 1983 bool enable_hdr_split; 1984 int err = 0; 1985 1986 if (tcp_data_split == ETHTOOL_TCP_DATA_SPLIT_UNKNOWN) 1987 return 0; 1988 1989 if (!gve_header_split_supported(priv)) { 1990 dev_err(&priv->pdev->dev, "Header-split not supported\n"); 1991 return -EOPNOTSUPP; 1992 } 1993 1994 if (tcp_data_split == ETHTOOL_TCP_DATA_SPLIT_ENABLED) 1995 enable_hdr_split = true; 1996 else 1997 enable_hdr_split = false; 1998 1999 if (enable_hdr_split == priv->header_split_enabled) 2000 return 0; 2001 2002 gve_get_curr_alloc_cfgs(priv, &tx_alloc_cfg, &rx_alloc_cfg); 2003 2004 rx_alloc_cfg.enable_header_split = enable_hdr_split; 2005 rx_alloc_cfg.packet_buffer_size = gve_get_pkt_buf_size(priv, enable_hdr_split); 2006 2007 if (netif_running(priv->dev)) 2008 err = gve_adjust_config(priv, &tx_alloc_cfg, &rx_alloc_cfg); 2009 return err; 2010 } 2011 2012 static int gve_set_features(struct net_device *netdev, 2013 netdev_features_t features) 2014 { 2015 const netdev_features_t orig_features = netdev->features; 2016 struct gve_tx_alloc_rings_cfg tx_alloc_cfg = {0}; 2017 struct gve_rx_alloc_rings_cfg rx_alloc_cfg = {0}; 2018 struct gve_priv *priv = netdev_priv(netdev); 2019 int err; 2020 2021 gve_get_curr_alloc_cfgs(priv, &tx_alloc_cfg, &rx_alloc_cfg); 2022 2023 if ((netdev->features & NETIF_F_LRO) != (features & NETIF_F_LRO)) { 2024 netdev->features ^= NETIF_F_LRO; 2025 if (netif_running(netdev)) { 2026 err = gve_adjust_config(priv, &tx_alloc_cfg, &rx_alloc_cfg); 2027 if (err) 2028 goto revert_features; 2029 } 2030 } 2031 if ((netdev->features & NETIF_F_NTUPLE) && !(features & NETIF_F_NTUPLE)) { 2032 err = gve_flow_rules_reset(priv); 2033 if (err) 2034 goto revert_features; 2035 } 2036 2037 return 0; 2038 2039 revert_features: 2040 netdev->features = orig_features; 2041 return err; 2042 } 2043 2044 static const struct net_device_ops gve_netdev_ops = { 2045 .ndo_start_xmit = gve_start_xmit, 2046 .ndo_features_check = gve_features_check, 2047 .ndo_open = gve_open, 2048 .ndo_stop = gve_close, 2049 .ndo_get_stats64 = gve_get_stats, 2050 .ndo_tx_timeout = gve_tx_timeout, 2051 .ndo_set_features = gve_set_features, 2052 .ndo_bpf = gve_xdp, 2053 .ndo_xdp_xmit = gve_xdp_xmit, 2054 .ndo_xsk_wakeup = gve_xsk_wakeup, 2055 }; 2056 2057 static void gve_handle_status(struct gve_priv *priv, u32 status) 2058 { 2059 if (GVE_DEVICE_STATUS_RESET_MASK & status) { 2060 dev_info(&priv->pdev->dev, "Device requested reset.\n"); 2061 gve_set_do_reset(priv); 2062 } 2063 if (GVE_DEVICE_STATUS_REPORT_STATS_MASK & status) { 2064 priv->stats_report_trigger_cnt++; 2065 gve_set_do_report_stats(priv); 2066 } 2067 } 2068 2069 static void gve_handle_reset(struct gve_priv *priv) 2070 { 2071 /* A service task will be scheduled at the end of probe to catch any 2072 * resets that need to happen, and we don't want to reset until 2073 * probe is done. 2074 */ 2075 if (gve_get_probe_in_progress(priv)) 2076 return; 2077 2078 if (gve_get_do_reset(priv)) { 2079 rtnl_lock(); 2080 netdev_lock(priv->dev); 2081 gve_reset(priv, false); 2082 netdev_unlock(priv->dev); 2083 rtnl_unlock(); 2084 } 2085 } 2086 2087 void gve_handle_report_stats(struct gve_priv *priv) 2088 { 2089 struct stats *stats = priv->stats_report->stats; 2090 int idx, stats_idx = 0; 2091 unsigned int start = 0; 2092 u64 tx_bytes; 2093 2094 if (!gve_get_report_stats(priv)) 2095 return; 2096 2097 be64_add_cpu(&priv->stats_report->written_count, 1); 2098 /* tx stats */ 2099 if (priv->tx) { 2100 for (idx = 0; idx < gve_num_tx_queues(priv); idx++) { 2101 u32 last_completion = 0; 2102 u32 tx_frames = 0; 2103 2104 /* DQO doesn't currently support these metrics. */ 2105 if (gve_is_gqi(priv)) { 2106 last_completion = priv->tx[idx].done; 2107 tx_frames = priv->tx[idx].req; 2108 } 2109 2110 do { 2111 start = u64_stats_fetch_begin(&priv->tx[idx].statss); 2112 tx_bytes = priv->tx[idx].bytes_done; 2113 } while (u64_stats_fetch_retry(&priv->tx[idx].statss, start)); 2114 stats[stats_idx++] = (struct stats) { 2115 .stat_name = cpu_to_be32(TX_WAKE_CNT), 2116 .value = cpu_to_be64(priv->tx[idx].wake_queue), 2117 .queue_id = cpu_to_be32(idx), 2118 }; 2119 stats[stats_idx++] = (struct stats) { 2120 .stat_name = cpu_to_be32(TX_STOP_CNT), 2121 .value = cpu_to_be64(priv->tx[idx].stop_queue), 2122 .queue_id = cpu_to_be32(idx), 2123 }; 2124 stats[stats_idx++] = (struct stats) { 2125 .stat_name = cpu_to_be32(TX_FRAMES_SENT), 2126 .value = cpu_to_be64(tx_frames), 2127 .queue_id = cpu_to_be32(idx), 2128 }; 2129 stats[stats_idx++] = (struct stats) { 2130 .stat_name = cpu_to_be32(TX_BYTES_SENT), 2131 .value = cpu_to_be64(tx_bytes), 2132 .queue_id = cpu_to_be32(idx), 2133 }; 2134 stats[stats_idx++] = (struct stats) { 2135 .stat_name = cpu_to_be32(TX_LAST_COMPLETION_PROCESSED), 2136 .value = cpu_to_be64(last_completion), 2137 .queue_id = cpu_to_be32(idx), 2138 }; 2139 stats[stats_idx++] = (struct stats) { 2140 .stat_name = cpu_to_be32(TX_TIMEOUT_CNT), 2141 .value = cpu_to_be64(priv->tx[idx].queue_timeout), 2142 .queue_id = cpu_to_be32(idx), 2143 }; 2144 } 2145 } 2146 /* rx stats */ 2147 if (priv->rx) { 2148 for (idx = 0; idx < priv->rx_cfg.num_queues; idx++) { 2149 stats[stats_idx++] = (struct stats) { 2150 .stat_name = cpu_to_be32(RX_NEXT_EXPECTED_SEQUENCE), 2151 .value = cpu_to_be64(priv->rx[idx].desc.seqno), 2152 .queue_id = cpu_to_be32(idx), 2153 }; 2154 stats[stats_idx++] = (struct stats) { 2155 .stat_name = cpu_to_be32(RX_BUFFERS_POSTED), 2156 .value = cpu_to_be64(priv->rx[0].fill_cnt), 2157 .queue_id = cpu_to_be32(idx), 2158 }; 2159 } 2160 } 2161 } 2162 2163 /* Handle NIC status register changes, reset requests and report stats */ 2164 static void gve_service_task(struct work_struct *work) 2165 { 2166 struct gve_priv *priv = container_of(work, struct gve_priv, 2167 service_task); 2168 u32 status = ioread32be(&priv->reg_bar0->device_status); 2169 2170 gve_handle_status(priv, status); 2171 2172 gve_handle_reset(priv); 2173 gve_handle_link_status(priv, GVE_DEVICE_STATUS_LINK_STATUS_MASK & status); 2174 } 2175 2176 static void gve_set_netdev_xdp_features(struct gve_priv *priv) 2177 { 2178 xdp_features_t xdp_features; 2179 2180 if (priv->queue_format == GVE_GQI_QPL_FORMAT) { 2181 xdp_features = NETDEV_XDP_ACT_BASIC; 2182 xdp_features |= NETDEV_XDP_ACT_REDIRECT; 2183 xdp_features |= NETDEV_XDP_ACT_XSK_ZEROCOPY; 2184 } else { 2185 xdp_features = 0; 2186 } 2187 2188 xdp_set_features_flag(priv->dev, xdp_features); 2189 } 2190 2191 static int gve_init_priv(struct gve_priv *priv, bool skip_describe_device) 2192 { 2193 int num_ntfy; 2194 int err; 2195 2196 /* Set up the adminq */ 2197 err = gve_adminq_alloc(&priv->pdev->dev, priv); 2198 if (err) { 2199 dev_err(&priv->pdev->dev, 2200 "Failed to alloc admin queue: err=%d\n", err); 2201 return err; 2202 } 2203 2204 err = gve_verify_driver_compatibility(priv); 2205 if (err) { 2206 dev_err(&priv->pdev->dev, 2207 "Could not verify driver compatibility: err=%d\n", err); 2208 goto err; 2209 } 2210 2211 priv->num_registered_pages = 0; 2212 2213 if (skip_describe_device) 2214 goto setup_device; 2215 2216 priv->queue_format = GVE_QUEUE_FORMAT_UNSPECIFIED; 2217 /* Get the initial information we need from the device */ 2218 err = gve_adminq_describe_device(priv); 2219 if (err) { 2220 dev_err(&priv->pdev->dev, 2221 "Could not get device information: err=%d\n", err); 2222 goto err; 2223 } 2224 priv->dev->mtu = priv->dev->max_mtu; 2225 num_ntfy = pci_msix_vec_count(priv->pdev); 2226 if (num_ntfy <= 0) { 2227 dev_err(&priv->pdev->dev, 2228 "could not count MSI-x vectors: err=%d\n", num_ntfy); 2229 err = num_ntfy; 2230 goto err; 2231 } else if (num_ntfy < GVE_MIN_MSIX) { 2232 dev_err(&priv->pdev->dev, "gve needs at least %d MSI-x vectors, but only has %d\n", 2233 GVE_MIN_MSIX, num_ntfy); 2234 err = -EINVAL; 2235 goto err; 2236 } 2237 2238 /* Big TCP is only supported on DQ*/ 2239 if (!gve_is_gqi(priv)) 2240 netif_set_tso_max_size(priv->dev, GVE_DQO_TX_MAX); 2241 2242 priv->rx_copybreak = GVE_DEFAULT_RX_COPYBREAK; 2243 /* gvnic has one Notification Block per MSI-x vector, except for the 2244 * management vector 2245 */ 2246 priv->num_ntfy_blks = (num_ntfy - 1) & ~0x1; 2247 priv->mgmt_msix_idx = priv->num_ntfy_blks; 2248 2249 priv->tx_cfg.max_queues = 2250 min_t(int, priv->tx_cfg.max_queues, priv->num_ntfy_blks / 2); 2251 priv->rx_cfg.max_queues = 2252 min_t(int, priv->rx_cfg.max_queues, priv->num_ntfy_blks / 2); 2253 2254 priv->tx_cfg.num_queues = priv->tx_cfg.max_queues; 2255 priv->rx_cfg.num_queues = priv->rx_cfg.max_queues; 2256 if (priv->default_num_queues > 0) { 2257 priv->tx_cfg.num_queues = min_t(int, priv->default_num_queues, 2258 priv->tx_cfg.num_queues); 2259 priv->rx_cfg.num_queues = min_t(int, priv->default_num_queues, 2260 priv->rx_cfg.num_queues); 2261 } 2262 priv->tx_cfg.num_xdp_queues = 0; 2263 2264 dev_info(&priv->pdev->dev, "TX queues %d, RX queues %d\n", 2265 priv->tx_cfg.num_queues, priv->rx_cfg.num_queues); 2266 dev_info(&priv->pdev->dev, "Max TX queues %d, Max RX queues %d\n", 2267 priv->tx_cfg.max_queues, priv->rx_cfg.max_queues); 2268 2269 if (!gve_is_gqi(priv)) { 2270 priv->tx_coalesce_usecs = GVE_TX_IRQ_RATELIMIT_US_DQO; 2271 priv->rx_coalesce_usecs = GVE_RX_IRQ_RATELIMIT_US_DQO; 2272 } 2273 2274 setup_device: 2275 gve_set_netdev_xdp_features(priv); 2276 err = gve_setup_device_resources(priv); 2277 if (!err) 2278 return 0; 2279 err: 2280 gve_adminq_free(&priv->pdev->dev, priv); 2281 return err; 2282 } 2283 2284 static void gve_teardown_priv_resources(struct gve_priv *priv) 2285 { 2286 gve_teardown_device_resources(priv); 2287 gve_adminq_free(&priv->pdev->dev, priv); 2288 } 2289 2290 static void gve_trigger_reset(struct gve_priv *priv) 2291 { 2292 /* Reset the device by releasing the AQ */ 2293 gve_adminq_release(priv); 2294 } 2295 2296 static void gve_reset_and_teardown(struct gve_priv *priv, bool was_up) 2297 { 2298 gve_trigger_reset(priv); 2299 /* With the reset having already happened, close cannot fail */ 2300 if (was_up) 2301 gve_close(priv->dev); 2302 gve_teardown_priv_resources(priv); 2303 } 2304 2305 static int gve_reset_recovery(struct gve_priv *priv, bool was_up) 2306 { 2307 int err; 2308 2309 err = gve_init_priv(priv, true); 2310 if (err) 2311 goto err; 2312 if (was_up) { 2313 err = gve_open(priv->dev); 2314 if (err) 2315 goto err; 2316 } 2317 return 0; 2318 err: 2319 dev_err(&priv->pdev->dev, "Reset failed! !!! DISABLING ALL QUEUES !!!\n"); 2320 gve_turndown(priv); 2321 return err; 2322 } 2323 2324 int gve_reset(struct gve_priv *priv, bool attempt_teardown) 2325 { 2326 bool was_up = netif_running(priv->dev); 2327 int err; 2328 2329 dev_info(&priv->pdev->dev, "Performing reset\n"); 2330 gve_clear_do_reset(priv); 2331 gve_set_reset_in_progress(priv); 2332 /* If we aren't attempting to teardown normally, just go turndown and 2333 * reset right away. 2334 */ 2335 if (!attempt_teardown) { 2336 gve_turndown(priv); 2337 gve_reset_and_teardown(priv, was_up); 2338 } else { 2339 /* Otherwise attempt to close normally */ 2340 if (was_up) { 2341 err = gve_close(priv->dev); 2342 /* If that fails reset as we did above */ 2343 if (err) 2344 gve_reset_and_teardown(priv, was_up); 2345 } 2346 /* Clean up any remaining resources */ 2347 gve_teardown_priv_resources(priv); 2348 } 2349 2350 /* Set it all back up */ 2351 err = gve_reset_recovery(priv, was_up); 2352 gve_clear_reset_in_progress(priv); 2353 priv->reset_cnt++; 2354 priv->interface_up_cnt = 0; 2355 priv->interface_down_cnt = 0; 2356 priv->stats_report_trigger_cnt = 0; 2357 return err; 2358 } 2359 2360 static void gve_write_version(u8 __iomem *driver_version_register) 2361 { 2362 const char *c = gve_version_prefix; 2363 2364 while (*c) { 2365 writeb(*c, driver_version_register); 2366 c++; 2367 } 2368 2369 c = gve_version_str; 2370 while (*c) { 2371 writeb(*c, driver_version_register); 2372 c++; 2373 } 2374 writeb('\n', driver_version_register); 2375 } 2376 2377 static int gve_rx_queue_stop(struct net_device *dev, void *per_q_mem, int idx) 2378 { 2379 struct gve_priv *priv = netdev_priv(dev); 2380 struct gve_rx_ring *gve_per_q_mem; 2381 int err; 2382 2383 if (!priv->rx) 2384 return -EAGAIN; 2385 2386 /* Destroying queue 0 while other queues exist is not supported in DQO */ 2387 if (!gve_is_gqi(priv) && idx == 0) 2388 return -ERANGE; 2389 2390 /* Single-queue destruction requires quiescence on all queues */ 2391 gve_turndown(priv); 2392 2393 /* This failure will trigger a reset - no need to clean up */ 2394 err = gve_adminq_destroy_single_rx_queue(priv, idx); 2395 if (err) 2396 return err; 2397 2398 if (gve_is_qpl(priv)) { 2399 /* This failure will trigger a reset - no need to clean up */ 2400 err = gve_unregister_qpl(priv, gve_rx_get_qpl(priv, idx)); 2401 if (err) 2402 return err; 2403 } 2404 2405 gve_rx_stop_ring(priv, idx); 2406 2407 /* Turn the unstopped queues back up */ 2408 gve_turnup_and_check_status(priv); 2409 2410 gve_per_q_mem = (struct gve_rx_ring *)per_q_mem; 2411 *gve_per_q_mem = priv->rx[idx]; 2412 memset(&priv->rx[idx], 0, sizeof(priv->rx[idx])); 2413 return 0; 2414 } 2415 2416 static void gve_rx_queue_mem_free(struct net_device *dev, void *per_q_mem) 2417 { 2418 struct gve_priv *priv = netdev_priv(dev); 2419 struct gve_rx_alloc_rings_cfg cfg = {0}; 2420 struct gve_rx_ring *gve_per_q_mem; 2421 2422 gve_per_q_mem = (struct gve_rx_ring *)per_q_mem; 2423 gve_rx_get_curr_alloc_cfg(priv, &cfg); 2424 2425 if (gve_is_gqi(priv)) 2426 gve_rx_free_ring_gqi(priv, gve_per_q_mem, &cfg); 2427 else 2428 gve_rx_free_ring_dqo(priv, gve_per_q_mem, &cfg); 2429 } 2430 2431 static int gve_rx_queue_mem_alloc(struct net_device *dev, void *per_q_mem, 2432 int idx) 2433 { 2434 struct gve_priv *priv = netdev_priv(dev); 2435 struct gve_rx_alloc_rings_cfg cfg = {0}; 2436 struct gve_rx_ring *gve_per_q_mem; 2437 int err; 2438 2439 if (!priv->rx) 2440 return -EAGAIN; 2441 2442 gve_per_q_mem = (struct gve_rx_ring *)per_q_mem; 2443 gve_rx_get_curr_alloc_cfg(priv, &cfg); 2444 2445 if (gve_is_gqi(priv)) 2446 err = gve_rx_alloc_ring_gqi(priv, &cfg, gve_per_q_mem, idx); 2447 else 2448 err = gve_rx_alloc_ring_dqo(priv, &cfg, gve_per_q_mem, idx); 2449 2450 return err; 2451 } 2452 2453 static int gve_rx_queue_start(struct net_device *dev, void *per_q_mem, int idx) 2454 { 2455 struct gve_priv *priv = netdev_priv(dev); 2456 struct gve_rx_ring *gve_per_q_mem; 2457 int err; 2458 2459 if (!priv->rx) 2460 return -EAGAIN; 2461 2462 gve_per_q_mem = (struct gve_rx_ring *)per_q_mem; 2463 priv->rx[idx] = *gve_per_q_mem; 2464 2465 /* Single-queue creation requires quiescence on all queues */ 2466 gve_turndown(priv); 2467 2468 gve_rx_start_ring(priv, idx); 2469 2470 if (gve_is_qpl(priv)) { 2471 /* This failure will trigger a reset - no need to clean up */ 2472 err = gve_register_qpl(priv, gve_rx_get_qpl(priv, idx)); 2473 if (err) 2474 goto abort; 2475 } 2476 2477 /* This failure will trigger a reset - no need to clean up */ 2478 err = gve_adminq_create_single_rx_queue(priv, idx); 2479 if (err) 2480 goto abort; 2481 2482 if (gve_is_gqi(priv)) 2483 gve_rx_write_doorbell(priv, &priv->rx[idx]); 2484 else 2485 gve_rx_post_buffers_dqo(&priv->rx[idx]); 2486 2487 /* Turn the unstopped queues back up */ 2488 gve_turnup_and_check_status(priv); 2489 return 0; 2490 2491 abort: 2492 gve_rx_stop_ring(priv, idx); 2493 2494 /* All failures in this func result in a reset, by clearing the struct 2495 * at idx, we prevent a double free when that reset runs. The reset, 2496 * which needs the rtnl lock, will not run till this func returns and 2497 * its caller gives up the lock. 2498 */ 2499 memset(&priv->rx[idx], 0, sizeof(priv->rx[idx])); 2500 return err; 2501 } 2502 2503 static const struct netdev_queue_mgmt_ops gve_queue_mgmt_ops = { 2504 .ndo_queue_mem_size = sizeof(struct gve_rx_ring), 2505 .ndo_queue_mem_alloc = gve_rx_queue_mem_alloc, 2506 .ndo_queue_mem_free = gve_rx_queue_mem_free, 2507 .ndo_queue_start = gve_rx_queue_start, 2508 .ndo_queue_stop = gve_rx_queue_stop, 2509 }; 2510 2511 static void gve_get_rx_queue_stats(struct net_device *dev, int idx, 2512 struct netdev_queue_stats_rx *rx_stats) 2513 { 2514 struct gve_priv *priv = netdev_priv(dev); 2515 struct gve_rx_ring *rx = &priv->rx[idx]; 2516 unsigned int start; 2517 2518 do { 2519 start = u64_stats_fetch_begin(&rx->statss); 2520 rx_stats->packets = rx->rpackets; 2521 rx_stats->bytes = rx->rbytes; 2522 rx_stats->alloc_fail = rx->rx_skb_alloc_fail + 2523 rx->rx_buf_alloc_fail; 2524 } while (u64_stats_fetch_retry(&rx->statss, start)); 2525 } 2526 2527 static void gve_get_tx_queue_stats(struct net_device *dev, int idx, 2528 struct netdev_queue_stats_tx *tx_stats) 2529 { 2530 struct gve_priv *priv = netdev_priv(dev); 2531 struct gve_tx_ring *tx = &priv->tx[idx]; 2532 unsigned int start; 2533 2534 do { 2535 start = u64_stats_fetch_begin(&tx->statss); 2536 tx_stats->packets = tx->pkt_done; 2537 tx_stats->bytes = tx->bytes_done; 2538 } while (u64_stats_fetch_retry(&tx->statss, start)); 2539 } 2540 2541 static void gve_get_base_stats(struct net_device *dev, 2542 struct netdev_queue_stats_rx *rx, 2543 struct netdev_queue_stats_tx *tx) 2544 { 2545 rx->packets = 0; 2546 rx->bytes = 0; 2547 rx->alloc_fail = 0; 2548 2549 tx->packets = 0; 2550 tx->bytes = 0; 2551 } 2552 2553 static const struct netdev_stat_ops gve_stat_ops = { 2554 .get_queue_stats_rx = gve_get_rx_queue_stats, 2555 .get_queue_stats_tx = gve_get_tx_queue_stats, 2556 .get_base_stats = gve_get_base_stats, 2557 }; 2558 2559 static int gve_probe(struct pci_dev *pdev, const struct pci_device_id *ent) 2560 { 2561 int max_tx_queues, max_rx_queues; 2562 struct net_device *dev; 2563 __be32 __iomem *db_bar; 2564 struct gve_registers __iomem *reg_bar; 2565 struct gve_priv *priv; 2566 int err; 2567 2568 err = pci_enable_device(pdev); 2569 if (err) 2570 return err; 2571 2572 err = pci_request_regions(pdev, gve_driver_name); 2573 if (err) 2574 goto abort_with_enabled; 2575 2576 pci_set_master(pdev); 2577 2578 err = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(64)); 2579 if (err) { 2580 dev_err(&pdev->dev, "Failed to set dma mask: err=%d\n", err); 2581 goto abort_with_pci_region; 2582 } 2583 2584 reg_bar = pci_iomap(pdev, GVE_REGISTER_BAR, 0); 2585 if (!reg_bar) { 2586 dev_err(&pdev->dev, "Failed to map pci bar!\n"); 2587 err = -ENOMEM; 2588 goto abort_with_pci_region; 2589 } 2590 2591 db_bar = pci_iomap(pdev, GVE_DOORBELL_BAR, 0); 2592 if (!db_bar) { 2593 dev_err(&pdev->dev, "Failed to map doorbell bar!\n"); 2594 err = -ENOMEM; 2595 goto abort_with_reg_bar; 2596 } 2597 2598 gve_write_version(®_bar->driver_version); 2599 /* Get max queues to alloc etherdev */ 2600 max_tx_queues = ioread32be(®_bar->max_tx_queues); 2601 max_rx_queues = ioread32be(®_bar->max_rx_queues); 2602 /* Alloc and setup the netdev and priv */ 2603 dev = alloc_etherdev_mqs(sizeof(*priv), max_tx_queues, max_rx_queues); 2604 if (!dev) { 2605 dev_err(&pdev->dev, "could not allocate netdev\n"); 2606 err = -ENOMEM; 2607 goto abort_with_db_bar; 2608 } 2609 SET_NETDEV_DEV(dev, &pdev->dev); 2610 pci_set_drvdata(pdev, dev); 2611 dev->ethtool_ops = &gve_ethtool_ops; 2612 dev->netdev_ops = &gve_netdev_ops; 2613 dev->queue_mgmt_ops = &gve_queue_mgmt_ops; 2614 dev->stat_ops = &gve_stat_ops; 2615 2616 /* Set default and supported features. 2617 * 2618 * Features might be set in other locations as well (such as 2619 * `gve_adminq_describe_device`). 2620 */ 2621 dev->hw_features = NETIF_F_HIGHDMA; 2622 dev->hw_features |= NETIF_F_SG; 2623 dev->hw_features |= NETIF_F_HW_CSUM; 2624 dev->hw_features |= NETIF_F_TSO; 2625 dev->hw_features |= NETIF_F_TSO6; 2626 dev->hw_features |= NETIF_F_TSO_ECN; 2627 dev->hw_features |= NETIF_F_RXCSUM; 2628 dev->hw_features |= NETIF_F_RXHASH; 2629 dev->features = dev->hw_features; 2630 dev->watchdog_timeo = 5 * HZ; 2631 dev->min_mtu = ETH_MIN_MTU; 2632 netif_carrier_off(dev); 2633 2634 priv = netdev_priv(dev); 2635 priv->dev = dev; 2636 priv->pdev = pdev; 2637 priv->msg_enable = DEFAULT_MSG_LEVEL; 2638 priv->reg_bar0 = reg_bar; 2639 priv->db_bar2 = db_bar; 2640 priv->service_task_flags = 0x0; 2641 priv->state_flags = 0x0; 2642 priv->ethtool_flags = 0x0; 2643 priv->rx_cfg.packet_buffer_size = GVE_DEFAULT_RX_BUFFER_SIZE; 2644 priv->max_rx_buffer_size = GVE_DEFAULT_RX_BUFFER_SIZE; 2645 2646 gve_set_probe_in_progress(priv); 2647 priv->gve_wq = alloc_ordered_workqueue("gve", 0); 2648 if (!priv->gve_wq) { 2649 dev_err(&pdev->dev, "Could not allocate workqueue"); 2650 err = -ENOMEM; 2651 goto abort_with_netdev; 2652 } 2653 INIT_WORK(&priv->service_task, gve_service_task); 2654 INIT_WORK(&priv->stats_report_task, gve_stats_report_task); 2655 priv->tx_cfg.max_queues = max_tx_queues; 2656 priv->rx_cfg.max_queues = max_rx_queues; 2657 2658 err = gve_init_priv(priv, false); 2659 if (err) 2660 goto abort_with_wq; 2661 2662 err = register_netdev(dev); 2663 if (err) 2664 goto abort_with_gve_init; 2665 2666 dev_info(&pdev->dev, "GVE version %s\n", gve_version_str); 2667 dev_info(&pdev->dev, "GVE queue format %d\n", (int)priv->queue_format); 2668 gve_clear_probe_in_progress(priv); 2669 queue_work(priv->gve_wq, &priv->service_task); 2670 return 0; 2671 2672 abort_with_gve_init: 2673 gve_teardown_priv_resources(priv); 2674 2675 abort_with_wq: 2676 destroy_workqueue(priv->gve_wq); 2677 2678 abort_with_netdev: 2679 free_netdev(dev); 2680 2681 abort_with_db_bar: 2682 pci_iounmap(pdev, db_bar); 2683 2684 abort_with_reg_bar: 2685 pci_iounmap(pdev, reg_bar); 2686 2687 abort_with_pci_region: 2688 pci_release_regions(pdev); 2689 2690 abort_with_enabled: 2691 pci_disable_device(pdev); 2692 return err; 2693 } 2694 2695 static void gve_remove(struct pci_dev *pdev) 2696 { 2697 struct net_device *netdev = pci_get_drvdata(pdev); 2698 struct gve_priv *priv = netdev_priv(netdev); 2699 __be32 __iomem *db_bar = priv->db_bar2; 2700 void __iomem *reg_bar = priv->reg_bar0; 2701 2702 unregister_netdev(netdev); 2703 gve_teardown_priv_resources(priv); 2704 destroy_workqueue(priv->gve_wq); 2705 free_netdev(netdev); 2706 pci_iounmap(pdev, db_bar); 2707 pci_iounmap(pdev, reg_bar); 2708 pci_release_regions(pdev); 2709 pci_disable_device(pdev); 2710 } 2711 2712 static void gve_shutdown(struct pci_dev *pdev) 2713 { 2714 struct net_device *netdev = pci_get_drvdata(pdev); 2715 struct gve_priv *priv = netdev_priv(netdev); 2716 bool was_up = netif_running(priv->dev); 2717 2718 rtnl_lock(); 2719 netdev_lock(netdev); 2720 if (was_up && gve_close(priv->dev)) { 2721 /* If the dev was up, attempt to close, if close fails, reset */ 2722 gve_reset_and_teardown(priv, was_up); 2723 } else { 2724 /* If the dev wasn't up or close worked, finish tearing down */ 2725 gve_teardown_priv_resources(priv); 2726 } 2727 netdev_unlock(netdev); 2728 rtnl_unlock(); 2729 } 2730 2731 #ifdef CONFIG_PM 2732 static int gve_suspend(struct pci_dev *pdev, pm_message_t state) 2733 { 2734 struct net_device *netdev = pci_get_drvdata(pdev); 2735 struct gve_priv *priv = netdev_priv(netdev); 2736 bool was_up = netif_running(priv->dev); 2737 2738 priv->suspend_cnt++; 2739 rtnl_lock(); 2740 netdev_lock(netdev); 2741 if (was_up && gve_close(priv->dev)) { 2742 /* If the dev was up, attempt to close, if close fails, reset */ 2743 gve_reset_and_teardown(priv, was_up); 2744 } else { 2745 /* If the dev wasn't up or close worked, finish tearing down */ 2746 gve_teardown_priv_resources(priv); 2747 } 2748 priv->up_before_suspend = was_up; 2749 netdev_unlock(netdev); 2750 rtnl_unlock(); 2751 return 0; 2752 } 2753 2754 static int gve_resume(struct pci_dev *pdev) 2755 { 2756 struct net_device *netdev = pci_get_drvdata(pdev); 2757 struct gve_priv *priv = netdev_priv(netdev); 2758 int err; 2759 2760 priv->resume_cnt++; 2761 rtnl_lock(); 2762 netdev_lock(netdev); 2763 err = gve_reset_recovery(priv, priv->up_before_suspend); 2764 netdev_unlock(netdev); 2765 rtnl_unlock(); 2766 return err; 2767 } 2768 #endif /* CONFIG_PM */ 2769 2770 static const struct pci_device_id gve_id_table[] = { 2771 { PCI_DEVICE(PCI_VENDOR_ID_GOOGLE, PCI_DEV_ID_GVNIC) }, 2772 { } 2773 }; 2774 2775 static struct pci_driver gve_driver = { 2776 .name = gve_driver_name, 2777 .id_table = gve_id_table, 2778 .probe = gve_probe, 2779 .remove = gve_remove, 2780 .shutdown = gve_shutdown, 2781 #ifdef CONFIG_PM 2782 .suspend = gve_suspend, 2783 .resume = gve_resume, 2784 #endif 2785 }; 2786 2787 module_pci_driver(gve_driver); 2788 2789 MODULE_DEVICE_TABLE(pci, gve_id_table); 2790 MODULE_AUTHOR("Google, Inc."); 2791 MODULE_DESCRIPTION("Google Virtual NIC Driver"); 2792 MODULE_LICENSE("Dual MIT/GPL"); 2793 MODULE_VERSION(GVE_VERSION); 2794