1 // SPDX-License-Identifier: (GPL-2.0 OR MIT) 2 /* Google virtual Ethernet (gve) driver 3 * 4 * Copyright (C) 2015-2024 Google LLC 5 */ 6 7 #include <linux/bpf.h> 8 #include <linux/cpumask.h> 9 #include <linux/etherdevice.h> 10 #include <linux/filter.h> 11 #include <linux/interrupt.h> 12 #include <linux/irq.h> 13 #include <linux/module.h> 14 #include <linux/pci.h> 15 #include <linux/sched.h> 16 #include <linux/timer.h> 17 #include <linux/workqueue.h> 18 #include <linux/utsname.h> 19 #include <linux/version.h> 20 #include <net/netdev_queues.h> 21 #include <net/sch_generic.h> 22 #include <net/xdp_sock_drv.h> 23 #include "gve.h" 24 #include "gve_dqo.h" 25 #include "gve_adminq.h" 26 #include "gve_register.h" 27 #include "gve_utils.h" 28 29 #define GVE_DEFAULT_RX_COPYBREAK (256) 30 31 #define DEFAULT_MSG_LEVEL (NETIF_MSG_DRV | NETIF_MSG_LINK) 32 #define GVE_VERSION "1.0.0" 33 #define GVE_VERSION_PREFIX "GVE-" 34 35 // Minimum amount of time between queue kicks in msec (10 seconds) 36 #define MIN_TX_TIMEOUT_GAP (1000 * 10) 37 38 char gve_driver_name[] = "gve"; 39 const char gve_version_str[] = GVE_VERSION; 40 static const char gve_version_prefix[] = GVE_VERSION_PREFIX; 41 42 static int gve_verify_driver_compatibility(struct gve_priv *priv) 43 { 44 int err; 45 struct gve_driver_info *driver_info; 46 dma_addr_t driver_info_bus; 47 48 driver_info = dma_alloc_coherent(&priv->pdev->dev, 49 sizeof(struct gve_driver_info), 50 &driver_info_bus, GFP_KERNEL); 51 if (!driver_info) 52 return -ENOMEM; 53 54 *driver_info = (struct gve_driver_info) { 55 .os_type = 1, /* Linux */ 56 .os_version_major = cpu_to_be32(LINUX_VERSION_MAJOR), 57 .os_version_minor = cpu_to_be32(LINUX_VERSION_SUBLEVEL), 58 .os_version_sub = cpu_to_be32(LINUX_VERSION_PATCHLEVEL), 59 .driver_capability_flags = { 60 cpu_to_be64(GVE_DRIVER_CAPABILITY_FLAGS1), 61 cpu_to_be64(GVE_DRIVER_CAPABILITY_FLAGS2), 62 cpu_to_be64(GVE_DRIVER_CAPABILITY_FLAGS3), 63 cpu_to_be64(GVE_DRIVER_CAPABILITY_FLAGS4), 64 }, 65 }; 66 strscpy(driver_info->os_version_str1, utsname()->release, 67 sizeof(driver_info->os_version_str1)); 68 strscpy(driver_info->os_version_str2, utsname()->version, 69 sizeof(driver_info->os_version_str2)); 70 71 err = gve_adminq_verify_driver_compatibility(priv, 72 sizeof(struct gve_driver_info), 73 driver_info_bus); 74 75 /* It's ok if the device doesn't support this */ 76 if (err == -EOPNOTSUPP) 77 err = 0; 78 79 dma_free_coherent(&priv->pdev->dev, 80 sizeof(struct gve_driver_info), 81 driver_info, driver_info_bus); 82 return err; 83 } 84 85 static netdev_features_t gve_features_check(struct sk_buff *skb, 86 struct net_device *dev, 87 netdev_features_t features) 88 { 89 struct gve_priv *priv = netdev_priv(dev); 90 91 if (!gve_is_gqi(priv)) 92 return gve_features_check_dqo(skb, dev, features); 93 94 return features; 95 } 96 97 static netdev_tx_t gve_start_xmit(struct sk_buff *skb, struct net_device *dev) 98 { 99 struct gve_priv *priv = netdev_priv(dev); 100 101 if (gve_is_gqi(priv)) 102 return gve_tx(skb, dev); 103 else 104 return gve_tx_dqo(skb, dev); 105 } 106 107 static void gve_get_stats(struct net_device *dev, struct rtnl_link_stats64 *s) 108 { 109 struct gve_priv *priv = netdev_priv(dev); 110 unsigned int start; 111 u64 packets, bytes; 112 int num_tx_queues; 113 int ring; 114 115 num_tx_queues = gve_num_tx_queues(priv); 116 if (priv->rx) { 117 for (ring = 0; ring < priv->rx_cfg.num_queues; ring++) { 118 do { 119 start = 120 u64_stats_fetch_begin(&priv->rx[ring].statss); 121 packets = priv->rx[ring].rpackets; 122 bytes = priv->rx[ring].rbytes; 123 } while (u64_stats_fetch_retry(&priv->rx[ring].statss, 124 start)); 125 s->rx_packets += packets; 126 s->rx_bytes += bytes; 127 } 128 } 129 if (priv->tx) { 130 for (ring = 0; ring < num_tx_queues; ring++) { 131 do { 132 start = 133 u64_stats_fetch_begin(&priv->tx[ring].statss); 134 packets = priv->tx[ring].pkt_done; 135 bytes = priv->tx[ring].bytes_done; 136 } while (u64_stats_fetch_retry(&priv->tx[ring].statss, 137 start)); 138 s->tx_packets += packets; 139 s->tx_bytes += bytes; 140 } 141 } 142 } 143 144 static int gve_alloc_flow_rule_caches(struct gve_priv *priv) 145 { 146 struct gve_flow_rules_cache *flow_rules_cache = &priv->flow_rules_cache; 147 int err = 0; 148 149 if (!priv->max_flow_rules) 150 return 0; 151 152 flow_rules_cache->rules_cache = 153 kvcalloc(GVE_FLOW_RULES_CACHE_SIZE, sizeof(*flow_rules_cache->rules_cache), 154 GFP_KERNEL); 155 if (!flow_rules_cache->rules_cache) { 156 dev_err(&priv->pdev->dev, "Cannot alloc flow rules cache\n"); 157 return -ENOMEM; 158 } 159 160 flow_rules_cache->rule_ids_cache = 161 kvcalloc(GVE_FLOW_RULE_IDS_CACHE_SIZE, sizeof(*flow_rules_cache->rule_ids_cache), 162 GFP_KERNEL); 163 if (!flow_rules_cache->rule_ids_cache) { 164 dev_err(&priv->pdev->dev, "Cannot alloc flow rule ids cache\n"); 165 err = -ENOMEM; 166 goto free_rules_cache; 167 } 168 169 return 0; 170 171 free_rules_cache: 172 kvfree(flow_rules_cache->rules_cache); 173 flow_rules_cache->rules_cache = NULL; 174 return err; 175 } 176 177 static void gve_free_flow_rule_caches(struct gve_priv *priv) 178 { 179 struct gve_flow_rules_cache *flow_rules_cache = &priv->flow_rules_cache; 180 181 kvfree(flow_rules_cache->rule_ids_cache); 182 flow_rules_cache->rule_ids_cache = NULL; 183 kvfree(flow_rules_cache->rules_cache); 184 flow_rules_cache->rules_cache = NULL; 185 } 186 187 static int gve_alloc_rss_config_cache(struct gve_priv *priv) 188 { 189 struct gve_rss_config *rss_config = &priv->rss_config; 190 191 if (!priv->cache_rss_config) 192 return 0; 193 194 rss_config->hash_key = kcalloc(priv->rss_key_size, 195 sizeof(rss_config->hash_key[0]), 196 GFP_KERNEL); 197 if (!rss_config->hash_key) 198 return -ENOMEM; 199 200 rss_config->hash_lut = kcalloc(priv->rss_lut_size, 201 sizeof(rss_config->hash_lut[0]), 202 GFP_KERNEL); 203 if (!rss_config->hash_lut) 204 goto free_rss_key_cache; 205 206 return 0; 207 208 free_rss_key_cache: 209 kfree(rss_config->hash_key); 210 rss_config->hash_key = NULL; 211 return -ENOMEM; 212 } 213 214 static void gve_free_rss_config_cache(struct gve_priv *priv) 215 { 216 struct gve_rss_config *rss_config = &priv->rss_config; 217 218 kfree(rss_config->hash_key); 219 kfree(rss_config->hash_lut); 220 221 memset(rss_config, 0, sizeof(*rss_config)); 222 } 223 224 static int gve_alloc_counter_array(struct gve_priv *priv) 225 { 226 priv->counter_array = 227 dma_alloc_coherent(&priv->pdev->dev, 228 priv->num_event_counters * 229 sizeof(*priv->counter_array), 230 &priv->counter_array_bus, GFP_KERNEL); 231 if (!priv->counter_array) 232 return -ENOMEM; 233 234 return 0; 235 } 236 237 static void gve_free_counter_array(struct gve_priv *priv) 238 { 239 if (!priv->counter_array) 240 return; 241 242 dma_free_coherent(&priv->pdev->dev, 243 priv->num_event_counters * 244 sizeof(*priv->counter_array), 245 priv->counter_array, priv->counter_array_bus); 246 priv->counter_array = NULL; 247 } 248 249 /* NIC requests to report stats */ 250 static void gve_stats_report_task(struct work_struct *work) 251 { 252 struct gve_priv *priv = container_of(work, struct gve_priv, 253 stats_report_task); 254 if (gve_get_do_report_stats(priv)) { 255 gve_handle_report_stats(priv); 256 gve_clear_do_report_stats(priv); 257 } 258 } 259 260 static void gve_stats_report_schedule(struct gve_priv *priv) 261 { 262 if (!gve_get_probe_in_progress(priv) && 263 !gve_get_reset_in_progress(priv)) { 264 gve_set_do_report_stats(priv); 265 queue_work(priv->gve_wq, &priv->stats_report_task); 266 } 267 } 268 269 static void gve_stats_report_timer(struct timer_list *t) 270 { 271 struct gve_priv *priv = timer_container_of(priv, t, 272 stats_report_timer); 273 274 mod_timer(&priv->stats_report_timer, 275 round_jiffies(jiffies + 276 msecs_to_jiffies(priv->stats_report_timer_period))); 277 gve_stats_report_schedule(priv); 278 } 279 280 static int gve_alloc_stats_report(struct gve_priv *priv) 281 { 282 int tx_stats_num, rx_stats_num; 283 284 tx_stats_num = (GVE_TX_STATS_REPORT_NUM + NIC_TX_STATS_REPORT_NUM) * 285 gve_num_tx_queues(priv); 286 rx_stats_num = (GVE_RX_STATS_REPORT_NUM + NIC_RX_STATS_REPORT_NUM) * 287 priv->rx_cfg.num_queues; 288 priv->stats_report_len = struct_size(priv->stats_report, stats, 289 size_add(tx_stats_num, rx_stats_num)); 290 priv->stats_report = 291 dma_alloc_coherent(&priv->pdev->dev, priv->stats_report_len, 292 &priv->stats_report_bus, GFP_KERNEL); 293 if (!priv->stats_report) 294 return -ENOMEM; 295 /* Set up timer for the report-stats task */ 296 timer_setup(&priv->stats_report_timer, gve_stats_report_timer, 0); 297 priv->stats_report_timer_period = GVE_STATS_REPORT_TIMER_PERIOD; 298 return 0; 299 } 300 301 static void gve_free_stats_report(struct gve_priv *priv) 302 { 303 if (!priv->stats_report) 304 return; 305 306 timer_delete_sync(&priv->stats_report_timer); 307 dma_free_coherent(&priv->pdev->dev, priv->stats_report_len, 308 priv->stats_report, priv->stats_report_bus); 309 priv->stats_report = NULL; 310 } 311 312 static irqreturn_t gve_mgmnt_intr(int irq, void *arg) 313 { 314 struct gve_priv *priv = arg; 315 316 queue_work(priv->gve_wq, &priv->service_task); 317 return IRQ_HANDLED; 318 } 319 320 static irqreturn_t gve_intr(int irq, void *arg) 321 { 322 struct gve_notify_block *block = arg; 323 struct gve_priv *priv = block->priv; 324 325 iowrite32be(GVE_IRQ_MASK, gve_irq_doorbell(priv, block)); 326 napi_schedule_irqoff(&block->napi); 327 return IRQ_HANDLED; 328 } 329 330 static irqreturn_t gve_intr_dqo(int irq, void *arg) 331 { 332 struct gve_notify_block *block = arg; 333 334 /* Interrupts are automatically masked */ 335 napi_schedule_irqoff(&block->napi); 336 return IRQ_HANDLED; 337 } 338 339 static int gve_is_napi_on_home_cpu(struct gve_priv *priv, u32 irq) 340 { 341 int cpu_curr = smp_processor_id(); 342 const struct cpumask *aff_mask; 343 344 aff_mask = irq_get_effective_affinity_mask(irq); 345 if (unlikely(!aff_mask)) 346 return 1; 347 348 return cpumask_test_cpu(cpu_curr, aff_mask); 349 } 350 351 int gve_napi_poll(struct napi_struct *napi, int budget) 352 { 353 struct gve_notify_block *block; 354 __be32 __iomem *irq_doorbell; 355 bool reschedule = false; 356 struct gve_priv *priv; 357 int work_done = 0; 358 359 block = container_of(napi, struct gve_notify_block, napi); 360 priv = block->priv; 361 362 if (block->tx) { 363 if (block->tx->q_num < priv->tx_cfg.num_queues) 364 reschedule |= gve_tx_poll(block, budget); 365 else if (budget) 366 reschedule |= gve_xdp_poll(block, budget); 367 } 368 369 if (!budget) 370 return 0; 371 372 if (block->rx) { 373 work_done = gve_rx_poll(block, budget); 374 375 /* Poll XSK TX as part of RX NAPI. Setup re-poll based on max of 376 * TX and RX work done. 377 */ 378 if (priv->xdp_prog) 379 work_done = max_t(int, work_done, 380 gve_xsk_tx_poll(block, budget)); 381 382 reschedule |= work_done == budget; 383 } 384 385 if (reschedule) 386 return budget; 387 388 /* Complete processing - don't unmask irq if busy polling is enabled */ 389 if (likely(napi_complete_done(napi, work_done))) { 390 irq_doorbell = gve_irq_doorbell(priv, block); 391 iowrite32be(GVE_IRQ_ACK | GVE_IRQ_EVENT, irq_doorbell); 392 393 /* Ensure IRQ ACK is visible before we check pending work. 394 * If queue had issued updates, it would be truly visible. 395 */ 396 mb(); 397 398 if (block->tx) 399 reschedule |= gve_tx_clean_pending(priv, block->tx); 400 if (block->rx) 401 reschedule |= gve_rx_work_pending(block->rx); 402 403 if (reschedule && napi_schedule(napi)) 404 iowrite32be(GVE_IRQ_MASK, irq_doorbell); 405 } 406 return work_done; 407 } 408 409 int gve_napi_poll_dqo(struct napi_struct *napi, int budget) 410 { 411 struct gve_notify_block *block = 412 container_of(napi, struct gve_notify_block, napi); 413 struct gve_priv *priv = block->priv; 414 bool reschedule = false; 415 int work_done = 0; 416 417 if (block->tx) { 418 if (block->tx->q_num < priv->tx_cfg.num_queues) 419 reschedule |= gve_tx_poll_dqo(block, /*do_clean=*/true); 420 else 421 reschedule |= gve_xdp_poll_dqo(block); 422 } 423 424 if (!budget) 425 return 0; 426 427 if (block->rx) { 428 work_done = gve_rx_poll_dqo(block, budget); 429 reschedule |= work_done == budget; 430 } 431 432 if (reschedule) { 433 /* Reschedule by returning budget only if already on the correct 434 * cpu. 435 */ 436 if (likely(gve_is_napi_on_home_cpu(priv, block->irq))) 437 return budget; 438 439 /* If not on the cpu with which this queue's irq has affinity 440 * with, we avoid rescheduling napi and arm the irq instead so 441 * that napi gets rescheduled back eventually onto the right 442 * cpu. 443 */ 444 if (work_done == budget) 445 work_done--; 446 } 447 448 if (likely(napi_complete_done(napi, work_done))) { 449 /* Enable interrupts again. 450 * 451 * We don't need to repoll afterwards because HW supports the 452 * PCI MSI-X PBA feature. 453 * 454 * Another interrupt would be triggered if a new event came in 455 * since the last one. 456 */ 457 gve_write_irq_doorbell_dqo(priv, block, 458 GVE_ITR_NO_UPDATE_DQO | GVE_ITR_ENABLE_BIT_DQO); 459 } 460 461 return work_done; 462 } 463 464 static const struct cpumask *gve_get_node_mask(struct gve_priv *priv) 465 { 466 if (priv->numa_node == NUMA_NO_NODE) 467 return cpu_all_mask; 468 else 469 return cpumask_of_node(priv->numa_node); 470 } 471 472 static int gve_alloc_notify_blocks(struct gve_priv *priv) 473 { 474 int num_vecs_requested = priv->num_ntfy_blks + 1; 475 const struct cpumask *node_mask; 476 unsigned int cur_cpu; 477 int vecs_enabled; 478 int i, j; 479 int err; 480 481 priv->msix_vectors = kvcalloc(num_vecs_requested, 482 sizeof(*priv->msix_vectors), GFP_KERNEL); 483 if (!priv->msix_vectors) 484 return -ENOMEM; 485 for (i = 0; i < num_vecs_requested; i++) 486 priv->msix_vectors[i].entry = i; 487 vecs_enabled = pci_enable_msix_range(priv->pdev, priv->msix_vectors, 488 GVE_MIN_MSIX, num_vecs_requested); 489 if (vecs_enabled < 0) { 490 dev_err(&priv->pdev->dev, "Could not enable min msix %d/%d\n", 491 GVE_MIN_MSIX, vecs_enabled); 492 err = vecs_enabled; 493 goto abort_with_msix_vectors; 494 } 495 if (vecs_enabled != num_vecs_requested) { 496 int new_num_ntfy_blks = (vecs_enabled - 1) & ~0x1; 497 int vecs_per_type = new_num_ntfy_blks / 2; 498 int vecs_left = new_num_ntfy_blks % 2; 499 500 priv->num_ntfy_blks = new_num_ntfy_blks; 501 priv->mgmt_msix_idx = priv->num_ntfy_blks; 502 priv->tx_cfg.max_queues = min_t(int, priv->tx_cfg.max_queues, 503 vecs_per_type); 504 priv->rx_cfg.max_queues = min_t(int, priv->rx_cfg.max_queues, 505 vecs_per_type + vecs_left); 506 dev_err(&priv->pdev->dev, 507 "Could not enable desired msix, only enabled %d, adjusting tx max queues to %d, and rx max queues to %d\n", 508 vecs_enabled, priv->tx_cfg.max_queues, 509 priv->rx_cfg.max_queues); 510 if (priv->tx_cfg.num_queues > priv->tx_cfg.max_queues) 511 priv->tx_cfg.num_queues = priv->tx_cfg.max_queues; 512 if (priv->rx_cfg.num_queues > priv->rx_cfg.max_queues) 513 priv->rx_cfg.num_queues = priv->rx_cfg.max_queues; 514 } 515 516 /* Setup Management Vector - the last vector */ 517 snprintf(priv->mgmt_msix_name, sizeof(priv->mgmt_msix_name), "gve-mgmnt@pci:%s", 518 pci_name(priv->pdev)); 519 err = request_irq(priv->msix_vectors[priv->mgmt_msix_idx].vector, 520 gve_mgmnt_intr, 0, priv->mgmt_msix_name, priv); 521 if (err) { 522 dev_err(&priv->pdev->dev, "Did not receive management vector.\n"); 523 goto abort_with_msix_enabled; 524 } 525 priv->irq_db_indices = 526 dma_alloc_coherent(&priv->pdev->dev, 527 priv->num_ntfy_blks * 528 sizeof(*priv->irq_db_indices), 529 &priv->irq_db_indices_bus, GFP_KERNEL); 530 if (!priv->irq_db_indices) { 531 err = -ENOMEM; 532 goto abort_with_mgmt_vector; 533 } 534 535 priv->ntfy_blocks = kvzalloc(priv->num_ntfy_blks * 536 sizeof(*priv->ntfy_blocks), GFP_KERNEL); 537 if (!priv->ntfy_blocks) { 538 err = -ENOMEM; 539 goto abort_with_irq_db_indices; 540 } 541 542 /* Setup the other blocks - the first n-1 vectors */ 543 node_mask = gve_get_node_mask(priv); 544 cur_cpu = cpumask_first(node_mask); 545 for (i = 0; i < priv->num_ntfy_blks; i++) { 546 struct gve_notify_block *block = &priv->ntfy_blocks[i]; 547 int msix_idx = i; 548 549 snprintf(block->name, sizeof(block->name), "gve-ntfy-blk%d@pci:%s", 550 i, pci_name(priv->pdev)); 551 block->priv = priv; 552 err = request_irq(priv->msix_vectors[msix_idx].vector, 553 gve_is_gqi(priv) ? gve_intr : gve_intr_dqo, 554 0, block->name, block); 555 if (err) { 556 dev_err(&priv->pdev->dev, 557 "Failed to receive msix vector %d\n", i); 558 goto abort_with_some_ntfy_blocks; 559 } 560 block->irq = priv->msix_vectors[msix_idx].vector; 561 irq_set_affinity_and_hint(block->irq, 562 cpumask_of(cur_cpu)); 563 block->irq_db_index = &priv->irq_db_indices[i].index; 564 565 cur_cpu = cpumask_next(cur_cpu, node_mask); 566 /* Wrap once CPUs in the node have been exhausted, or when 567 * starting RX queue affinities. TX and RX queues of the same 568 * index share affinity. 569 */ 570 if (cur_cpu >= nr_cpu_ids || (i + 1) == priv->tx_cfg.max_queues) 571 cur_cpu = cpumask_first(node_mask); 572 } 573 return 0; 574 abort_with_some_ntfy_blocks: 575 for (j = 0; j < i; j++) { 576 struct gve_notify_block *block = &priv->ntfy_blocks[j]; 577 int msix_idx = j; 578 579 irq_set_affinity_hint(priv->msix_vectors[msix_idx].vector, 580 NULL); 581 free_irq(priv->msix_vectors[msix_idx].vector, block); 582 block->irq = 0; 583 } 584 kvfree(priv->ntfy_blocks); 585 priv->ntfy_blocks = NULL; 586 abort_with_irq_db_indices: 587 dma_free_coherent(&priv->pdev->dev, priv->num_ntfy_blks * 588 sizeof(*priv->irq_db_indices), 589 priv->irq_db_indices, priv->irq_db_indices_bus); 590 priv->irq_db_indices = NULL; 591 abort_with_mgmt_vector: 592 free_irq(priv->msix_vectors[priv->mgmt_msix_idx].vector, priv); 593 abort_with_msix_enabled: 594 pci_disable_msix(priv->pdev); 595 abort_with_msix_vectors: 596 kvfree(priv->msix_vectors); 597 priv->msix_vectors = NULL; 598 return err; 599 } 600 601 static void gve_free_notify_blocks(struct gve_priv *priv) 602 { 603 int i; 604 605 if (!priv->msix_vectors) 606 return; 607 608 /* Free the irqs */ 609 for (i = 0; i < priv->num_ntfy_blks; i++) { 610 struct gve_notify_block *block = &priv->ntfy_blocks[i]; 611 int msix_idx = i; 612 613 irq_set_affinity_hint(priv->msix_vectors[msix_idx].vector, 614 NULL); 615 free_irq(priv->msix_vectors[msix_idx].vector, block); 616 block->irq = 0; 617 } 618 free_irq(priv->msix_vectors[priv->mgmt_msix_idx].vector, priv); 619 kvfree(priv->ntfy_blocks); 620 priv->ntfy_blocks = NULL; 621 dma_free_coherent(&priv->pdev->dev, priv->num_ntfy_blks * 622 sizeof(*priv->irq_db_indices), 623 priv->irq_db_indices, priv->irq_db_indices_bus); 624 priv->irq_db_indices = NULL; 625 pci_disable_msix(priv->pdev); 626 kvfree(priv->msix_vectors); 627 priv->msix_vectors = NULL; 628 } 629 630 static int gve_setup_device_resources(struct gve_priv *priv) 631 { 632 int err; 633 634 err = gve_alloc_flow_rule_caches(priv); 635 if (err) 636 return err; 637 err = gve_alloc_rss_config_cache(priv); 638 if (err) 639 goto abort_with_flow_rule_caches; 640 err = gve_alloc_counter_array(priv); 641 if (err) 642 goto abort_with_rss_config_cache; 643 err = gve_init_clock(priv); 644 if (err) 645 goto abort_with_counter; 646 err = gve_alloc_notify_blocks(priv); 647 if (err) 648 goto abort_with_clock; 649 err = gve_alloc_stats_report(priv); 650 if (err) 651 goto abort_with_ntfy_blocks; 652 err = gve_adminq_configure_device_resources(priv, 653 priv->counter_array_bus, 654 priv->num_event_counters, 655 priv->irq_db_indices_bus, 656 priv->num_ntfy_blks); 657 if (unlikely(err)) { 658 dev_err(&priv->pdev->dev, 659 "could not setup device_resources: err=%d\n", err); 660 err = -ENXIO; 661 goto abort_with_stats_report; 662 } 663 664 if (!gve_is_gqi(priv)) { 665 priv->ptype_lut_dqo = kvzalloc(sizeof(*priv->ptype_lut_dqo), 666 GFP_KERNEL); 667 if (!priv->ptype_lut_dqo) { 668 err = -ENOMEM; 669 goto abort_with_stats_report; 670 } 671 err = gve_adminq_get_ptype_map_dqo(priv, priv->ptype_lut_dqo); 672 if (err) { 673 dev_err(&priv->pdev->dev, 674 "Failed to get ptype map: err=%d\n", err); 675 goto abort_with_ptype_lut; 676 } 677 } 678 679 err = gve_init_rss_config(priv, priv->rx_cfg.num_queues); 680 if (err) { 681 dev_err(&priv->pdev->dev, "Failed to init RSS config"); 682 goto abort_with_ptype_lut; 683 } 684 685 err = gve_adminq_report_stats(priv, priv->stats_report_len, 686 priv->stats_report_bus, 687 GVE_STATS_REPORT_TIMER_PERIOD); 688 if (err) 689 dev_err(&priv->pdev->dev, 690 "Failed to report stats: err=%d\n", err); 691 gve_set_device_resources_ok(priv); 692 return 0; 693 694 abort_with_ptype_lut: 695 kvfree(priv->ptype_lut_dqo); 696 priv->ptype_lut_dqo = NULL; 697 abort_with_stats_report: 698 gve_free_stats_report(priv); 699 abort_with_ntfy_blocks: 700 gve_free_notify_blocks(priv); 701 abort_with_clock: 702 gve_teardown_clock(priv); 703 abort_with_counter: 704 gve_free_counter_array(priv); 705 abort_with_rss_config_cache: 706 gve_free_rss_config_cache(priv); 707 abort_with_flow_rule_caches: 708 gve_free_flow_rule_caches(priv); 709 710 return err; 711 } 712 713 static void gve_trigger_reset(struct gve_priv *priv); 714 715 static void gve_teardown_device_resources(struct gve_priv *priv) 716 { 717 int err; 718 719 /* Tell device its resources are being freed */ 720 if (gve_get_device_resources_ok(priv)) { 721 err = gve_flow_rules_reset(priv); 722 if (err) { 723 dev_err(&priv->pdev->dev, 724 "Failed to reset flow rules: err=%d\n", err); 725 gve_trigger_reset(priv); 726 } 727 /* detach the stats report */ 728 err = gve_adminq_report_stats(priv, 0, 0x0, GVE_STATS_REPORT_TIMER_PERIOD); 729 if (err) { 730 dev_err(&priv->pdev->dev, 731 "Failed to detach stats report: err=%d\n", err); 732 gve_trigger_reset(priv); 733 } 734 err = gve_adminq_deconfigure_device_resources(priv); 735 if (err) { 736 dev_err(&priv->pdev->dev, 737 "Could not deconfigure device resources: err=%d\n", 738 err); 739 gve_trigger_reset(priv); 740 } 741 } 742 743 kvfree(priv->ptype_lut_dqo); 744 priv->ptype_lut_dqo = NULL; 745 746 gve_free_flow_rule_caches(priv); 747 gve_free_rss_config_cache(priv); 748 gve_free_counter_array(priv); 749 gve_free_notify_blocks(priv); 750 gve_free_stats_report(priv); 751 gve_teardown_clock(priv); 752 gve_clear_device_resources_ok(priv); 753 } 754 755 static int gve_unregister_qpl(struct gve_priv *priv, 756 struct gve_queue_page_list *qpl) 757 { 758 int err; 759 760 if (!qpl) 761 return 0; 762 763 err = gve_adminq_unregister_page_list(priv, qpl->id); 764 if (err) { 765 netif_err(priv, drv, priv->dev, 766 "Failed to unregister queue page list %d\n", 767 qpl->id); 768 return err; 769 } 770 771 priv->num_registered_pages -= qpl->num_entries; 772 return 0; 773 } 774 775 static int gve_register_qpl(struct gve_priv *priv, 776 struct gve_queue_page_list *qpl) 777 { 778 int pages; 779 int err; 780 781 if (!qpl) 782 return 0; 783 784 pages = qpl->num_entries; 785 786 if (pages + priv->num_registered_pages > priv->max_registered_pages) { 787 netif_err(priv, drv, priv->dev, 788 "Reached max number of registered pages %llu > %llu\n", 789 pages + priv->num_registered_pages, 790 priv->max_registered_pages); 791 return -EINVAL; 792 } 793 794 err = gve_adminq_register_page_list(priv, qpl); 795 if (err) { 796 netif_err(priv, drv, priv->dev, 797 "failed to register queue page list %d\n", 798 qpl->id); 799 return err; 800 } 801 802 priv->num_registered_pages += pages; 803 return 0; 804 } 805 806 static struct gve_queue_page_list *gve_tx_get_qpl(struct gve_priv *priv, int idx) 807 { 808 struct gve_tx_ring *tx = &priv->tx[idx]; 809 810 if (gve_is_gqi(priv)) 811 return tx->tx_fifo.qpl; 812 else 813 return tx->dqo.qpl; 814 } 815 816 static struct gve_queue_page_list *gve_rx_get_qpl(struct gve_priv *priv, int idx) 817 { 818 struct gve_rx_ring *rx = &priv->rx[idx]; 819 820 if (gve_is_gqi(priv)) 821 return rx->data.qpl; 822 else 823 return rx->dqo.qpl; 824 } 825 826 static int gve_register_qpls(struct gve_priv *priv) 827 { 828 int num_tx_qpls, num_rx_qpls; 829 int err; 830 int i; 831 832 num_tx_qpls = gve_num_tx_qpls(&priv->tx_cfg, gve_is_qpl(priv)); 833 num_rx_qpls = gve_num_rx_qpls(&priv->rx_cfg, gve_is_qpl(priv)); 834 835 for (i = 0; i < num_tx_qpls; i++) { 836 err = gve_register_qpl(priv, gve_tx_get_qpl(priv, i)); 837 if (err) 838 return err; 839 } 840 841 for (i = 0; i < num_rx_qpls; i++) { 842 err = gve_register_qpl(priv, gve_rx_get_qpl(priv, i)); 843 if (err) 844 return err; 845 } 846 847 return 0; 848 } 849 850 static int gve_unregister_qpls(struct gve_priv *priv) 851 { 852 int num_tx_qpls, num_rx_qpls; 853 int err; 854 int i; 855 856 num_tx_qpls = gve_num_tx_qpls(&priv->tx_cfg, gve_is_qpl(priv)); 857 num_rx_qpls = gve_num_rx_qpls(&priv->rx_cfg, gve_is_qpl(priv)); 858 859 for (i = 0; i < num_tx_qpls; i++) { 860 err = gve_unregister_qpl(priv, gve_tx_get_qpl(priv, i)); 861 /* This failure will trigger a reset - no need to clean */ 862 if (err) 863 return err; 864 } 865 866 for (i = 0; i < num_rx_qpls; i++) { 867 err = gve_unregister_qpl(priv, gve_rx_get_qpl(priv, i)); 868 /* This failure will trigger a reset - no need to clean */ 869 if (err) 870 return err; 871 } 872 return 0; 873 } 874 875 static int gve_create_rings(struct gve_priv *priv) 876 { 877 int num_tx_queues = gve_num_tx_queues(priv); 878 int err; 879 int i; 880 881 err = gve_adminq_create_tx_queues(priv, 0, num_tx_queues); 882 if (err) { 883 netif_err(priv, drv, priv->dev, "failed to create %d tx queues\n", 884 num_tx_queues); 885 /* This failure will trigger a reset - no need to clean 886 * up 887 */ 888 return err; 889 } 890 netif_dbg(priv, drv, priv->dev, "created %d tx queues\n", 891 num_tx_queues); 892 893 err = gve_adminq_create_rx_queues(priv, priv->rx_cfg.num_queues); 894 if (err) { 895 netif_err(priv, drv, priv->dev, "failed to create %d rx queues\n", 896 priv->rx_cfg.num_queues); 897 /* This failure will trigger a reset - no need to clean 898 * up 899 */ 900 return err; 901 } 902 netif_dbg(priv, drv, priv->dev, "created %d rx queues\n", 903 priv->rx_cfg.num_queues); 904 905 if (gve_is_gqi(priv)) { 906 /* Rx data ring has been prefilled with packet buffers at queue 907 * allocation time. 908 * 909 * Write the doorbell to provide descriptor slots and packet 910 * buffers to the NIC. 911 */ 912 for (i = 0; i < priv->rx_cfg.num_queues; i++) 913 gve_rx_write_doorbell(priv, &priv->rx[i]); 914 } else { 915 for (i = 0; i < priv->rx_cfg.num_queues; i++) { 916 /* Post buffers and ring doorbell. */ 917 gve_rx_post_buffers_dqo(&priv->rx[i]); 918 } 919 } 920 921 return 0; 922 } 923 924 static void init_xdp_sync_stats(struct gve_priv *priv) 925 { 926 int start_id = gve_xdp_tx_start_queue_id(priv); 927 int i; 928 929 /* Init stats */ 930 for (i = start_id; i < start_id + priv->tx_cfg.num_xdp_queues; i++) { 931 int ntfy_idx = gve_tx_idx_to_ntfy(priv, i); 932 933 u64_stats_init(&priv->tx[i].statss); 934 priv->tx[i].ntfy_id = ntfy_idx; 935 } 936 } 937 938 static void gve_init_sync_stats(struct gve_priv *priv) 939 { 940 int i; 941 942 for (i = 0; i < priv->tx_cfg.num_queues; i++) 943 u64_stats_init(&priv->tx[i].statss); 944 945 /* Init stats for XDP TX queues */ 946 init_xdp_sync_stats(priv); 947 948 for (i = 0; i < priv->rx_cfg.num_queues; i++) 949 u64_stats_init(&priv->rx[i].statss); 950 } 951 952 static void gve_tx_get_curr_alloc_cfg(struct gve_priv *priv, 953 struct gve_tx_alloc_rings_cfg *cfg) 954 { 955 cfg->qcfg = &priv->tx_cfg; 956 cfg->raw_addressing = !gve_is_qpl(priv); 957 cfg->ring_size = priv->tx_desc_cnt; 958 cfg->num_xdp_rings = cfg->qcfg->num_xdp_queues; 959 cfg->tx = priv->tx; 960 } 961 962 static void gve_tx_stop_rings(struct gve_priv *priv, int num_rings) 963 { 964 int i; 965 966 if (!priv->tx) 967 return; 968 969 for (i = 0; i < num_rings; i++) { 970 if (gve_is_gqi(priv)) 971 gve_tx_stop_ring_gqi(priv, i); 972 else 973 gve_tx_stop_ring_dqo(priv, i); 974 } 975 } 976 977 static void gve_tx_start_rings(struct gve_priv *priv, int num_rings) 978 { 979 int i; 980 981 for (i = 0; i < num_rings; i++) { 982 if (gve_is_gqi(priv)) 983 gve_tx_start_ring_gqi(priv, i); 984 else 985 gve_tx_start_ring_dqo(priv, i); 986 } 987 } 988 989 static int gve_queues_mem_alloc(struct gve_priv *priv, 990 struct gve_tx_alloc_rings_cfg *tx_alloc_cfg, 991 struct gve_rx_alloc_rings_cfg *rx_alloc_cfg) 992 { 993 int err; 994 995 if (gve_is_gqi(priv)) 996 err = gve_tx_alloc_rings_gqi(priv, tx_alloc_cfg); 997 else 998 err = gve_tx_alloc_rings_dqo(priv, tx_alloc_cfg); 999 if (err) 1000 return err; 1001 1002 if (gve_is_gqi(priv)) 1003 err = gve_rx_alloc_rings_gqi(priv, rx_alloc_cfg); 1004 else 1005 err = gve_rx_alloc_rings_dqo(priv, rx_alloc_cfg); 1006 if (err) 1007 goto free_tx; 1008 1009 return 0; 1010 1011 free_tx: 1012 if (gve_is_gqi(priv)) 1013 gve_tx_free_rings_gqi(priv, tx_alloc_cfg); 1014 else 1015 gve_tx_free_rings_dqo(priv, tx_alloc_cfg); 1016 return err; 1017 } 1018 1019 static int gve_destroy_rings(struct gve_priv *priv) 1020 { 1021 int num_tx_queues = gve_num_tx_queues(priv); 1022 int err; 1023 1024 err = gve_adminq_destroy_tx_queues(priv, 0, num_tx_queues); 1025 if (err) { 1026 netif_err(priv, drv, priv->dev, 1027 "failed to destroy tx queues\n"); 1028 /* This failure will trigger a reset - no need to clean up */ 1029 return err; 1030 } 1031 netif_dbg(priv, drv, priv->dev, "destroyed tx queues\n"); 1032 err = gve_adminq_destroy_rx_queues(priv, priv->rx_cfg.num_queues); 1033 if (err) { 1034 netif_err(priv, drv, priv->dev, 1035 "failed to destroy rx queues\n"); 1036 /* This failure will trigger a reset - no need to clean up */ 1037 return err; 1038 } 1039 netif_dbg(priv, drv, priv->dev, "destroyed rx queues\n"); 1040 return 0; 1041 } 1042 1043 static void gve_queues_mem_free(struct gve_priv *priv, 1044 struct gve_tx_alloc_rings_cfg *tx_cfg, 1045 struct gve_rx_alloc_rings_cfg *rx_cfg) 1046 { 1047 if (gve_is_gqi(priv)) { 1048 gve_tx_free_rings_gqi(priv, tx_cfg); 1049 gve_rx_free_rings_gqi(priv, rx_cfg); 1050 } else { 1051 gve_tx_free_rings_dqo(priv, tx_cfg); 1052 gve_rx_free_rings_dqo(priv, rx_cfg); 1053 } 1054 } 1055 1056 int gve_alloc_page(struct gve_priv *priv, struct device *dev, 1057 struct page **page, dma_addr_t *dma, 1058 enum dma_data_direction dir, gfp_t gfp_flags) 1059 { 1060 *page = alloc_pages_node(priv->numa_node, gfp_flags, 0); 1061 if (!*page) { 1062 priv->page_alloc_fail++; 1063 return -ENOMEM; 1064 } 1065 *dma = dma_map_page(dev, *page, 0, PAGE_SIZE, dir); 1066 if (dma_mapping_error(dev, *dma)) { 1067 priv->dma_mapping_error++; 1068 put_page(*page); 1069 return -ENOMEM; 1070 } 1071 return 0; 1072 } 1073 1074 struct gve_queue_page_list *gve_alloc_queue_page_list(struct gve_priv *priv, 1075 u32 id, int pages) 1076 { 1077 struct gve_queue_page_list *qpl; 1078 int err; 1079 int i; 1080 1081 qpl = kvzalloc(sizeof(*qpl), GFP_KERNEL); 1082 if (!qpl) 1083 return NULL; 1084 1085 qpl->id = id; 1086 qpl->num_entries = 0; 1087 qpl->pages = kvcalloc(pages, sizeof(*qpl->pages), GFP_KERNEL); 1088 if (!qpl->pages) 1089 goto abort; 1090 1091 qpl->page_buses = kvcalloc(pages, sizeof(*qpl->page_buses), GFP_KERNEL); 1092 if (!qpl->page_buses) 1093 goto abort; 1094 1095 for (i = 0; i < pages; i++) { 1096 err = gve_alloc_page(priv, &priv->pdev->dev, &qpl->pages[i], 1097 &qpl->page_buses[i], 1098 gve_qpl_dma_dir(priv, id), GFP_KERNEL); 1099 if (err) 1100 goto abort; 1101 qpl->num_entries++; 1102 } 1103 1104 return qpl; 1105 1106 abort: 1107 gve_free_queue_page_list(priv, qpl, id); 1108 return NULL; 1109 } 1110 1111 void gve_free_page(struct device *dev, struct page *page, dma_addr_t dma, 1112 enum dma_data_direction dir) 1113 { 1114 if (!dma_mapping_error(dev, dma)) 1115 dma_unmap_page(dev, dma, PAGE_SIZE, dir); 1116 if (page) 1117 put_page(page); 1118 } 1119 1120 void gve_free_queue_page_list(struct gve_priv *priv, 1121 struct gve_queue_page_list *qpl, 1122 u32 id) 1123 { 1124 int i; 1125 1126 if (!qpl) 1127 return; 1128 if (!qpl->pages) 1129 goto free_qpl; 1130 if (!qpl->page_buses) 1131 goto free_pages; 1132 1133 for (i = 0; i < qpl->num_entries; i++) 1134 gve_free_page(&priv->pdev->dev, qpl->pages[i], 1135 qpl->page_buses[i], gve_qpl_dma_dir(priv, id)); 1136 1137 kvfree(qpl->page_buses); 1138 qpl->page_buses = NULL; 1139 free_pages: 1140 kvfree(qpl->pages); 1141 qpl->pages = NULL; 1142 free_qpl: 1143 kvfree(qpl); 1144 } 1145 1146 /* Use this to schedule a reset when the device is capable of continuing 1147 * to handle other requests in its current state. If it is not, do a reset 1148 * in thread instead. 1149 */ 1150 void gve_schedule_reset(struct gve_priv *priv) 1151 { 1152 gve_set_do_reset(priv); 1153 queue_work(priv->gve_wq, &priv->service_task); 1154 } 1155 1156 static void gve_reset_and_teardown(struct gve_priv *priv, bool was_up); 1157 static int gve_reset_recovery(struct gve_priv *priv, bool was_up); 1158 static void gve_turndown(struct gve_priv *priv); 1159 static void gve_turnup(struct gve_priv *priv); 1160 1161 static int gve_reg_xdp_info(struct gve_priv *priv, struct net_device *dev) 1162 { 1163 struct napi_struct *napi; 1164 struct gve_rx_ring *rx; 1165 int err = 0; 1166 int i, j; 1167 u32 tx_qid; 1168 1169 if (!priv->tx_cfg.num_xdp_queues) 1170 return 0; 1171 1172 for (i = 0; i < priv->rx_cfg.num_queues; i++) { 1173 rx = &priv->rx[i]; 1174 napi = &priv->ntfy_blocks[rx->ntfy_id].napi; 1175 1176 err = xdp_rxq_info_reg(&rx->xdp_rxq, dev, i, 1177 napi->napi_id); 1178 if (err) 1179 goto err; 1180 if (gve_is_qpl(priv)) 1181 err = xdp_rxq_info_reg_mem_model(&rx->xdp_rxq, 1182 MEM_TYPE_PAGE_SHARED, 1183 NULL); 1184 else 1185 err = xdp_rxq_info_reg_mem_model(&rx->xdp_rxq, 1186 MEM_TYPE_PAGE_POOL, 1187 rx->dqo.page_pool); 1188 if (err) 1189 goto err; 1190 rx->xsk_pool = xsk_get_pool_from_qid(dev, i); 1191 if (rx->xsk_pool) { 1192 err = xdp_rxq_info_reg(&rx->xsk_rxq, dev, i, 1193 napi->napi_id); 1194 if (err) 1195 goto err; 1196 err = xdp_rxq_info_reg_mem_model(&rx->xsk_rxq, 1197 MEM_TYPE_XSK_BUFF_POOL, NULL); 1198 if (err) 1199 goto err; 1200 xsk_pool_set_rxq_info(rx->xsk_pool, 1201 &rx->xsk_rxq); 1202 } 1203 } 1204 1205 for (i = 0; i < priv->tx_cfg.num_xdp_queues; i++) { 1206 tx_qid = gve_xdp_tx_queue_id(priv, i); 1207 priv->tx[tx_qid].xsk_pool = xsk_get_pool_from_qid(dev, i); 1208 } 1209 return 0; 1210 1211 err: 1212 for (j = i; j >= 0; j--) { 1213 rx = &priv->rx[j]; 1214 if (xdp_rxq_info_is_reg(&rx->xdp_rxq)) 1215 xdp_rxq_info_unreg(&rx->xdp_rxq); 1216 if (xdp_rxq_info_is_reg(&rx->xsk_rxq)) 1217 xdp_rxq_info_unreg(&rx->xsk_rxq); 1218 } 1219 return err; 1220 } 1221 1222 static void gve_unreg_xdp_info(struct gve_priv *priv) 1223 { 1224 int i, tx_qid; 1225 1226 if (!priv->tx_cfg.num_xdp_queues || !priv->rx || !priv->tx) 1227 return; 1228 1229 for (i = 0; i < priv->rx_cfg.num_queues; i++) { 1230 struct gve_rx_ring *rx = &priv->rx[i]; 1231 1232 xdp_rxq_info_unreg(&rx->xdp_rxq); 1233 if (rx->xsk_pool) { 1234 xdp_rxq_info_unreg(&rx->xsk_rxq); 1235 rx->xsk_pool = NULL; 1236 } 1237 } 1238 1239 for (i = 0; i < priv->tx_cfg.num_xdp_queues; i++) { 1240 tx_qid = gve_xdp_tx_queue_id(priv, i); 1241 priv->tx[tx_qid].xsk_pool = NULL; 1242 } 1243 } 1244 1245 static void gve_drain_page_cache(struct gve_priv *priv) 1246 { 1247 int i; 1248 1249 for (i = 0; i < priv->rx_cfg.num_queues; i++) 1250 page_frag_cache_drain(&priv->rx[i].page_cache); 1251 } 1252 1253 static void gve_rx_get_curr_alloc_cfg(struct gve_priv *priv, 1254 struct gve_rx_alloc_rings_cfg *cfg) 1255 { 1256 cfg->qcfg_rx = &priv->rx_cfg; 1257 cfg->qcfg_tx = &priv->tx_cfg; 1258 cfg->raw_addressing = !gve_is_qpl(priv); 1259 cfg->enable_header_split = priv->header_split_enabled; 1260 cfg->ring_size = priv->rx_desc_cnt; 1261 cfg->packet_buffer_size = priv->rx_cfg.packet_buffer_size; 1262 cfg->rx = priv->rx; 1263 cfg->xdp = !!cfg->qcfg_tx->num_xdp_queues; 1264 } 1265 1266 void gve_get_curr_alloc_cfgs(struct gve_priv *priv, 1267 struct gve_tx_alloc_rings_cfg *tx_alloc_cfg, 1268 struct gve_rx_alloc_rings_cfg *rx_alloc_cfg) 1269 { 1270 gve_tx_get_curr_alloc_cfg(priv, tx_alloc_cfg); 1271 gve_rx_get_curr_alloc_cfg(priv, rx_alloc_cfg); 1272 } 1273 1274 static void gve_rx_start_ring(struct gve_priv *priv, int i) 1275 { 1276 if (gve_is_gqi(priv)) 1277 gve_rx_start_ring_gqi(priv, i); 1278 else 1279 gve_rx_start_ring_dqo(priv, i); 1280 } 1281 1282 static void gve_rx_start_rings(struct gve_priv *priv, int num_rings) 1283 { 1284 int i; 1285 1286 for (i = 0; i < num_rings; i++) 1287 gve_rx_start_ring(priv, i); 1288 } 1289 1290 static void gve_rx_stop_ring(struct gve_priv *priv, int i) 1291 { 1292 if (gve_is_gqi(priv)) 1293 gve_rx_stop_ring_gqi(priv, i); 1294 else 1295 gve_rx_stop_ring_dqo(priv, i); 1296 } 1297 1298 static void gve_rx_stop_rings(struct gve_priv *priv, int num_rings) 1299 { 1300 int i; 1301 1302 if (!priv->rx) 1303 return; 1304 1305 for (i = 0; i < num_rings; i++) 1306 gve_rx_stop_ring(priv, i); 1307 } 1308 1309 static void gve_queues_mem_remove(struct gve_priv *priv) 1310 { 1311 struct gve_tx_alloc_rings_cfg tx_alloc_cfg = {0}; 1312 struct gve_rx_alloc_rings_cfg rx_alloc_cfg = {0}; 1313 1314 gve_get_curr_alloc_cfgs(priv, &tx_alloc_cfg, &rx_alloc_cfg); 1315 gve_queues_mem_free(priv, &tx_alloc_cfg, &rx_alloc_cfg); 1316 priv->tx = NULL; 1317 priv->rx = NULL; 1318 } 1319 1320 /* The passed-in queue memory is stored into priv and the queues are made live. 1321 * No memory is allocated. Passed-in memory is freed on errors. 1322 */ 1323 static int gve_queues_start(struct gve_priv *priv, 1324 struct gve_tx_alloc_rings_cfg *tx_alloc_cfg, 1325 struct gve_rx_alloc_rings_cfg *rx_alloc_cfg) 1326 { 1327 struct net_device *dev = priv->dev; 1328 int err; 1329 1330 /* Record new resources into priv */ 1331 priv->tx = tx_alloc_cfg->tx; 1332 priv->rx = rx_alloc_cfg->rx; 1333 1334 /* Record new configs into priv */ 1335 priv->tx_cfg = *tx_alloc_cfg->qcfg; 1336 priv->tx_cfg.num_xdp_queues = tx_alloc_cfg->num_xdp_rings; 1337 priv->rx_cfg = *rx_alloc_cfg->qcfg_rx; 1338 priv->tx_desc_cnt = tx_alloc_cfg->ring_size; 1339 priv->rx_desc_cnt = rx_alloc_cfg->ring_size; 1340 1341 gve_tx_start_rings(priv, gve_num_tx_queues(priv)); 1342 gve_rx_start_rings(priv, rx_alloc_cfg->qcfg_rx->num_queues); 1343 gve_init_sync_stats(priv); 1344 1345 err = netif_set_real_num_tx_queues(dev, priv->tx_cfg.num_queues); 1346 if (err) 1347 goto stop_and_free_rings; 1348 err = netif_set_real_num_rx_queues(dev, priv->rx_cfg.num_queues); 1349 if (err) 1350 goto stop_and_free_rings; 1351 1352 err = gve_reg_xdp_info(priv, dev); 1353 if (err) 1354 goto stop_and_free_rings; 1355 1356 if (rx_alloc_cfg->reset_rss) { 1357 err = gve_init_rss_config(priv, priv->rx_cfg.num_queues); 1358 if (err) 1359 goto reset; 1360 } 1361 1362 err = gve_register_qpls(priv); 1363 if (err) 1364 goto reset; 1365 1366 priv->header_split_enabled = rx_alloc_cfg->enable_header_split; 1367 priv->rx_cfg.packet_buffer_size = rx_alloc_cfg->packet_buffer_size; 1368 1369 err = gve_create_rings(priv); 1370 if (err) 1371 goto reset; 1372 1373 gve_set_device_rings_ok(priv); 1374 1375 if (gve_get_report_stats(priv)) 1376 mod_timer(&priv->stats_report_timer, 1377 round_jiffies(jiffies + 1378 msecs_to_jiffies(priv->stats_report_timer_period))); 1379 1380 gve_turnup(priv); 1381 queue_work(priv->gve_wq, &priv->service_task); 1382 priv->interface_up_cnt++; 1383 return 0; 1384 1385 reset: 1386 if (gve_get_reset_in_progress(priv)) 1387 goto stop_and_free_rings; 1388 gve_reset_and_teardown(priv, true); 1389 /* if this fails there is nothing we can do so just ignore the return */ 1390 gve_reset_recovery(priv, false); 1391 /* return the original error */ 1392 return err; 1393 stop_and_free_rings: 1394 gve_tx_stop_rings(priv, gve_num_tx_queues(priv)); 1395 gve_rx_stop_rings(priv, priv->rx_cfg.num_queues); 1396 gve_queues_mem_remove(priv); 1397 return err; 1398 } 1399 1400 static int gve_open(struct net_device *dev) 1401 { 1402 struct gve_tx_alloc_rings_cfg tx_alloc_cfg = {0}; 1403 struct gve_rx_alloc_rings_cfg rx_alloc_cfg = {0}; 1404 struct gve_priv *priv = netdev_priv(dev); 1405 int err; 1406 1407 gve_get_curr_alloc_cfgs(priv, &tx_alloc_cfg, &rx_alloc_cfg); 1408 1409 err = gve_queues_mem_alloc(priv, &tx_alloc_cfg, &rx_alloc_cfg); 1410 if (err) 1411 return err; 1412 1413 /* No need to free on error: ownership of resources is lost after 1414 * calling gve_queues_start. 1415 */ 1416 err = gve_queues_start(priv, &tx_alloc_cfg, &rx_alloc_cfg); 1417 if (err) 1418 return err; 1419 1420 return 0; 1421 } 1422 1423 static int gve_queues_stop(struct gve_priv *priv) 1424 { 1425 int err; 1426 1427 netif_carrier_off(priv->dev); 1428 if (gve_get_device_rings_ok(priv)) { 1429 gve_turndown(priv); 1430 gve_drain_page_cache(priv); 1431 err = gve_destroy_rings(priv); 1432 if (err) 1433 goto err; 1434 err = gve_unregister_qpls(priv); 1435 if (err) 1436 goto err; 1437 gve_clear_device_rings_ok(priv); 1438 } 1439 timer_delete_sync(&priv->stats_report_timer); 1440 1441 gve_unreg_xdp_info(priv); 1442 1443 gve_tx_stop_rings(priv, gve_num_tx_queues(priv)); 1444 gve_rx_stop_rings(priv, priv->rx_cfg.num_queues); 1445 1446 priv->interface_down_cnt++; 1447 return 0; 1448 1449 err: 1450 /* This must have been called from a reset due to the rtnl lock 1451 * so just return at this point. 1452 */ 1453 if (gve_get_reset_in_progress(priv)) 1454 return err; 1455 /* Otherwise reset before returning */ 1456 gve_reset_and_teardown(priv, true); 1457 return gve_reset_recovery(priv, false); 1458 } 1459 1460 static int gve_close(struct net_device *dev) 1461 { 1462 struct gve_priv *priv = netdev_priv(dev); 1463 int err; 1464 1465 err = gve_queues_stop(priv); 1466 if (err) 1467 return err; 1468 1469 gve_queues_mem_remove(priv); 1470 return 0; 1471 } 1472 1473 static void gve_handle_link_status(struct gve_priv *priv, bool link_status) 1474 { 1475 if (!gve_get_napi_enabled(priv)) 1476 return; 1477 1478 if (link_status == netif_carrier_ok(priv->dev)) 1479 return; 1480 1481 if (link_status) { 1482 netdev_info(priv->dev, "Device link is up.\n"); 1483 netif_carrier_on(priv->dev); 1484 } else { 1485 netdev_info(priv->dev, "Device link is down.\n"); 1486 netif_carrier_off(priv->dev); 1487 } 1488 } 1489 1490 static int gve_configure_rings_xdp(struct gve_priv *priv, 1491 u16 num_xdp_rings) 1492 { 1493 struct gve_tx_alloc_rings_cfg tx_alloc_cfg = {0}; 1494 struct gve_rx_alloc_rings_cfg rx_alloc_cfg = {0}; 1495 1496 gve_get_curr_alloc_cfgs(priv, &tx_alloc_cfg, &rx_alloc_cfg); 1497 tx_alloc_cfg.num_xdp_rings = num_xdp_rings; 1498 1499 rx_alloc_cfg.xdp = !!num_xdp_rings; 1500 return gve_adjust_config(priv, &tx_alloc_cfg, &rx_alloc_cfg); 1501 } 1502 1503 static int gve_set_xdp(struct gve_priv *priv, struct bpf_prog *prog, 1504 struct netlink_ext_ack *extack) 1505 { 1506 struct bpf_prog *old_prog; 1507 int err = 0; 1508 u32 status; 1509 1510 old_prog = READ_ONCE(priv->xdp_prog); 1511 if (!netif_running(priv->dev)) { 1512 WRITE_ONCE(priv->xdp_prog, prog); 1513 if (old_prog) 1514 bpf_prog_put(old_prog); 1515 1516 /* Update priv XDP queue configuration */ 1517 priv->tx_cfg.num_xdp_queues = priv->xdp_prog ? 1518 priv->rx_cfg.num_queues : 0; 1519 return 0; 1520 } 1521 1522 if (!old_prog && prog) 1523 err = gve_configure_rings_xdp(priv, priv->rx_cfg.num_queues); 1524 else if (old_prog && !prog) 1525 err = gve_configure_rings_xdp(priv, 0); 1526 1527 if (err) 1528 goto out; 1529 1530 WRITE_ONCE(priv->xdp_prog, prog); 1531 if (old_prog) 1532 bpf_prog_put(old_prog); 1533 1534 out: 1535 status = ioread32be(&priv->reg_bar0->device_status); 1536 gve_handle_link_status(priv, GVE_DEVICE_STATUS_LINK_STATUS_MASK & status); 1537 return err; 1538 } 1539 1540 static int gve_xdp_xmit(struct net_device *dev, int n, 1541 struct xdp_frame **frames, u32 flags) 1542 { 1543 struct gve_priv *priv = netdev_priv(dev); 1544 1545 if (priv->queue_format == GVE_GQI_QPL_FORMAT) 1546 return gve_xdp_xmit_gqi(dev, n, frames, flags); 1547 else if (priv->queue_format == GVE_DQO_RDA_FORMAT) 1548 return gve_xdp_xmit_dqo(dev, n, frames, flags); 1549 1550 return -EOPNOTSUPP; 1551 } 1552 1553 static int gve_xsk_pool_enable(struct net_device *dev, 1554 struct xsk_buff_pool *pool, 1555 u16 qid) 1556 { 1557 struct gve_priv *priv = netdev_priv(dev); 1558 struct napi_struct *napi; 1559 struct gve_rx_ring *rx; 1560 int tx_qid; 1561 int err; 1562 1563 if (qid >= priv->rx_cfg.num_queues) { 1564 dev_err(&priv->pdev->dev, "xsk pool invalid qid %d", qid); 1565 return -EINVAL; 1566 } 1567 if (xsk_pool_get_rx_frame_size(pool) < 1568 priv->dev->max_mtu + sizeof(struct ethhdr)) { 1569 dev_err(&priv->pdev->dev, "xsk pool frame_len too small"); 1570 return -EINVAL; 1571 } 1572 1573 err = xsk_pool_dma_map(pool, &priv->pdev->dev, 1574 DMA_ATTR_SKIP_CPU_SYNC | DMA_ATTR_WEAK_ORDERING); 1575 if (err) 1576 return err; 1577 1578 /* If XDP prog is not installed or interface is down, return. */ 1579 if (!priv->xdp_prog || !netif_running(dev)) 1580 return 0; 1581 1582 rx = &priv->rx[qid]; 1583 napi = &priv->ntfy_blocks[rx->ntfy_id].napi; 1584 err = xdp_rxq_info_reg(&rx->xsk_rxq, dev, qid, napi->napi_id); 1585 if (err) 1586 goto err; 1587 1588 err = xdp_rxq_info_reg_mem_model(&rx->xsk_rxq, 1589 MEM_TYPE_XSK_BUFF_POOL, NULL); 1590 if (err) 1591 goto err; 1592 1593 xsk_pool_set_rxq_info(pool, &rx->xsk_rxq); 1594 rx->xsk_pool = pool; 1595 1596 tx_qid = gve_xdp_tx_queue_id(priv, qid); 1597 priv->tx[tx_qid].xsk_pool = pool; 1598 1599 return 0; 1600 err: 1601 if (xdp_rxq_info_is_reg(&rx->xsk_rxq)) 1602 xdp_rxq_info_unreg(&rx->xsk_rxq); 1603 1604 xsk_pool_dma_unmap(pool, 1605 DMA_ATTR_SKIP_CPU_SYNC | DMA_ATTR_WEAK_ORDERING); 1606 return err; 1607 } 1608 1609 static int gve_xsk_pool_disable(struct net_device *dev, 1610 u16 qid) 1611 { 1612 struct gve_priv *priv = netdev_priv(dev); 1613 struct napi_struct *napi_rx; 1614 struct napi_struct *napi_tx; 1615 struct xsk_buff_pool *pool; 1616 int tx_qid; 1617 1618 pool = xsk_get_pool_from_qid(dev, qid); 1619 if (!pool) 1620 return -EINVAL; 1621 if (qid >= priv->rx_cfg.num_queues) 1622 return -EINVAL; 1623 1624 /* If XDP prog is not installed or interface is down, unmap DMA and 1625 * return. 1626 */ 1627 if (!priv->xdp_prog || !netif_running(dev)) 1628 goto done; 1629 1630 napi_rx = &priv->ntfy_blocks[priv->rx[qid].ntfy_id].napi; 1631 napi_disable(napi_rx); /* make sure current rx poll is done */ 1632 1633 tx_qid = gve_xdp_tx_queue_id(priv, qid); 1634 napi_tx = &priv->ntfy_blocks[priv->tx[tx_qid].ntfy_id].napi; 1635 napi_disable(napi_tx); /* make sure current tx poll is done */ 1636 1637 priv->rx[qid].xsk_pool = NULL; 1638 xdp_rxq_info_unreg(&priv->rx[qid].xsk_rxq); 1639 priv->tx[tx_qid].xsk_pool = NULL; 1640 smp_mb(); /* Make sure it is visible to the workers on datapath */ 1641 1642 napi_enable(napi_rx); 1643 if (gve_rx_work_pending(&priv->rx[qid])) 1644 napi_schedule(napi_rx); 1645 1646 napi_enable(napi_tx); 1647 if (gve_tx_clean_pending(priv, &priv->tx[tx_qid])) 1648 napi_schedule(napi_tx); 1649 1650 done: 1651 xsk_pool_dma_unmap(pool, 1652 DMA_ATTR_SKIP_CPU_SYNC | DMA_ATTR_WEAK_ORDERING); 1653 return 0; 1654 } 1655 1656 static int gve_xsk_wakeup(struct net_device *dev, u32 queue_id, u32 flags) 1657 { 1658 struct gve_priv *priv = netdev_priv(dev); 1659 struct napi_struct *napi; 1660 1661 if (!gve_get_napi_enabled(priv)) 1662 return -ENETDOWN; 1663 1664 if (queue_id >= priv->rx_cfg.num_queues || !priv->xdp_prog) 1665 return -EINVAL; 1666 1667 napi = &priv->ntfy_blocks[gve_rx_idx_to_ntfy(priv, queue_id)].napi; 1668 if (!napi_if_scheduled_mark_missed(napi)) { 1669 /* Call local_bh_enable to trigger SoftIRQ processing */ 1670 local_bh_disable(); 1671 napi_schedule(napi); 1672 local_bh_enable(); 1673 } 1674 1675 return 0; 1676 } 1677 1678 static int verify_xdp_configuration(struct net_device *dev) 1679 { 1680 struct gve_priv *priv = netdev_priv(dev); 1681 u16 max_xdp_mtu; 1682 1683 if (dev->features & NETIF_F_LRO) { 1684 netdev_warn(dev, "XDP is not supported when LRO is on.\n"); 1685 return -EOPNOTSUPP; 1686 } 1687 1688 if (priv->header_split_enabled) { 1689 netdev_warn(dev, "XDP is not supported when header-data split is enabled.\n"); 1690 return -EOPNOTSUPP; 1691 } 1692 1693 max_xdp_mtu = priv->rx_cfg.packet_buffer_size - sizeof(struct ethhdr); 1694 if (priv->queue_format == GVE_GQI_QPL_FORMAT) 1695 max_xdp_mtu -= GVE_RX_PAD; 1696 1697 if (dev->mtu > max_xdp_mtu) { 1698 netdev_warn(dev, "XDP is not supported for mtu %d.\n", 1699 dev->mtu); 1700 return -EOPNOTSUPP; 1701 } 1702 1703 if (priv->rx_cfg.num_queues != priv->tx_cfg.num_queues || 1704 (2 * priv->tx_cfg.num_queues > priv->tx_cfg.max_queues)) { 1705 netdev_warn(dev, "XDP load failed: The number of configured RX queues %d should be equal to the number of configured TX queues %d and the number of configured RX/TX queues should be less than or equal to half the maximum number of RX/TX queues %d", 1706 priv->rx_cfg.num_queues, 1707 priv->tx_cfg.num_queues, 1708 priv->tx_cfg.max_queues); 1709 return -EINVAL; 1710 } 1711 return 0; 1712 } 1713 1714 static int gve_xdp(struct net_device *dev, struct netdev_bpf *xdp) 1715 { 1716 struct gve_priv *priv = netdev_priv(dev); 1717 int err; 1718 1719 err = verify_xdp_configuration(dev); 1720 if (err) 1721 return err; 1722 switch (xdp->command) { 1723 case XDP_SETUP_PROG: 1724 return gve_set_xdp(priv, xdp->prog, xdp->extack); 1725 case XDP_SETUP_XSK_POOL: 1726 if (xdp->xsk.pool) 1727 return gve_xsk_pool_enable(dev, xdp->xsk.pool, xdp->xsk.queue_id); 1728 else 1729 return gve_xsk_pool_disable(dev, xdp->xsk.queue_id); 1730 default: 1731 return -EINVAL; 1732 } 1733 } 1734 1735 int gve_init_rss_config(struct gve_priv *priv, u16 num_queues) 1736 { 1737 struct gve_rss_config *rss_config = &priv->rss_config; 1738 struct ethtool_rxfh_param rxfh = {0}; 1739 u16 i; 1740 1741 if (!priv->cache_rss_config) 1742 return 0; 1743 1744 for (i = 0; i < priv->rss_lut_size; i++) 1745 rss_config->hash_lut[i] = 1746 ethtool_rxfh_indir_default(i, num_queues); 1747 1748 netdev_rss_key_fill(rss_config->hash_key, priv->rss_key_size); 1749 1750 rxfh.hfunc = ETH_RSS_HASH_TOP; 1751 1752 return gve_adminq_configure_rss(priv, &rxfh); 1753 } 1754 1755 int gve_flow_rules_reset(struct gve_priv *priv) 1756 { 1757 if (!priv->max_flow_rules) 1758 return 0; 1759 1760 return gve_adminq_reset_flow_rules(priv); 1761 } 1762 1763 int gve_adjust_config(struct gve_priv *priv, 1764 struct gve_tx_alloc_rings_cfg *tx_alloc_cfg, 1765 struct gve_rx_alloc_rings_cfg *rx_alloc_cfg) 1766 { 1767 int err; 1768 1769 /* Allocate resources for the new configuration */ 1770 err = gve_queues_mem_alloc(priv, tx_alloc_cfg, rx_alloc_cfg); 1771 if (err) { 1772 netif_err(priv, drv, priv->dev, 1773 "Adjust config failed to alloc new queues"); 1774 return err; 1775 } 1776 1777 /* Teardown the device and free existing resources */ 1778 err = gve_close(priv->dev); 1779 if (err) { 1780 netif_err(priv, drv, priv->dev, 1781 "Adjust config failed to close old queues"); 1782 gve_queues_mem_free(priv, tx_alloc_cfg, rx_alloc_cfg); 1783 return err; 1784 } 1785 1786 /* Bring the device back up again with the new resources. */ 1787 err = gve_queues_start(priv, tx_alloc_cfg, rx_alloc_cfg); 1788 if (err) { 1789 netif_err(priv, drv, priv->dev, 1790 "Adjust config failed to start new queues, !!! DISABLING ALL QUEUES !!!\n"); 1791 /* No need to free on error: ownership of resources is lost after 1792 * calling gve_queues_start. 1793 */ 1794 gve_turndown(priv); 1795 return err; 1796 } 1797 1798 return 0; 1799 } 1800 1801 int gve_adjust_queues(struct gve_priv *priv, 1802 struct gve_rx_queue_config new_rx_config, 1803 struct gve_tx_queue_config new_tx_config, 1804 bool reset_rss) 1805 { 1806 struct gve_tx_alloc_rings_cfg tx_alloc_cfg = {0}; 1807 struct gve_rx_alloc_rings_cfg rx_alloc_cfg = {0}; 1808 int err; 1809 1810 gve_get_curr_alloc_cfgs(priv, &tx_alloc_cfg, &rx_alloc_cfg); 1811 1812 /* Relay the new config from ethtool */ 1813 tx_alloc_cfg.qcfg = &new_tx_config; 1814 rx_alloc_cfg.qcfg_tx = &new_tx_config; 1815 rx_alloc_cfg.qcfg_rx = &new_rx_config; 1816 rx_alloc_cfg.reset_rss = reset_rss; 1817 1818 if (netif_running(priv->dev)) { 1819 err = gve_adjust_config(priv, &tx_alloc_cfg, &rx_alloc_cfg); 1820 return err; 1821 } 1822 /* Set the config for the next up. */ 1823 if (reset_rss) { 1824 err = gve_init_rss_config(priv, new_rx_config.num_queues); 1825 if (err) 1826 return err; 1827 } 1828 priv->tx_cfg = new_tx_config; 1829 priv->rx_cfg = new_rx_config; 1830 1831 return 0; 1832 } 1833 1834 static void gve_turndown(struct gve_priv *priv) 1835 { 1836 int idx; 1837 1838 if (netif_carrier_ok(priv->dev)) 1839 netif_carrier_off(priv->dev); 1840 1841 if (!gve_get_napi_enabled(priv)) 1842 return; 1843 1844 /* Disable napi to prevent more work from coming in */ 1845 for (idx = 0; idx < gve_num_tx_queues(priv); idx++) { 1846 int ntfy_idx = gve_tx_idx_to_ntfy(priv, idx); 1847 struct gve_notify_block *block = &priv->ntfy_blocks[ntfy_idx]; 1848 1849 if (!gve_tx_was_added_to_block(priv, idx)) 1850 continue; 1851 1852 if (idx < priv->tx_cfg.num_queues) 1853 netif_queue_set_napi(priv->dev, idx, 1854 NETDEV_QUEUE_TYPE_TX, NULL); 1855 1856 napi_disable_locked(&block->napi); 1857 } 1858 for (idx = 0; idx < priv->rx_cfg.num_queues; idx++) { 1859 int ntfy_idx = gve_rx_idx_to_ntfy(priv, idx); 1860 struct gve_notify_block *block = &priv->ntfy_blocks[ntfy_idx]; 1861 1862 if (!gve_rx_was_added_to_block(priv, idx)) 1863 continue; 1864 1865 netif_queue_set_napi(priv->dev, idx, NETDEV_QUEUE_TYPE_RX, 1866 NULL); 1867 napi_disable_locked(&block->napi); 1868 } 1869 1870 /* Stop tx queues */ 1871 netif_tx_disable(priv->dev); 1872 1873 xdp_features_clear_redirect_target_locked(priv->dev); 1874 1875 gve_clear_napi_enabled(priv); 1876 gve_clear_report_stats(priv); 1877 1878 /* Make sure that all traffic is finished processing. */ 1879 synchronize_net(); 1880 } 1881 1882 static void gve_turnup(struct gve_priv *priv) 1883 { 1884 int idx; 1885 1886 /* Start the tx queues */ 1887 netif_tx_start_all_queues(priv->dev); 1888 1889 /* Enable napi and unmask interrupts for all queues */ 1890 for (idx = 0; idx < gve_num_tx_queues(priv); idx++) { 1891 int ntfy_idx = gve_tx_idx_to_ntfy(priv, idx); 1892 struct gve_notify_block *block = &priv->ntfy_blocks[ntfy_idx]; 1893 1894 if (!gve_tx_was_added_to_block(priv, idx)) 1895 continue; 1896 1897 napi_enable_locked(&block->napi); 1898 1899 if (idx < priv->tx_cfg.num_queues) 1900 netif_queue_set_napi(priv->dev, idx, 1901 NETDEV_QUEUE_TYPE_TX, 1902 &block->napi); 1903 1904 if (gve_is_gqi(priv)) { 1905 iowrite32be(0, gve_irq_doorbell(priv, block)); 1906 } else { 1907 gve_set_itr_coalesce_usecs_dqo(priv, block, 1908 priv->tx_coalesce_usecs); 1909 } 1910 1911 /* Any descs written by the NIC before this barrier will be 1912 * handled by the one-off napi schedule below. Whereas any 1913 * descs after the barrier will generate interrupts. 1914 */ 1915 mb(); 1916 napi_schedule(&block->napi); 1917 } 1918 for (idx = 0; idx < priv->rx_cfg.num_queues; idx++) { 1919 int ntfy_idx = gve_rx_idx_to_ntfy(priv, idx); 1920 struct gve_notify_block *block = &priv->ntfy_blocks[ntfy_idx]; 1921 1922 if (!gve_rx_was_added_to_block(priv, idx)) 1923 continue; 1924 1925 napi_enable_locked(&block->napi); 1926 netif_queue_set_napi(priv->dev, idx, NETDEV_QUEUE_TYPE_RX, 1927 &block->napi); 1928 1929 if (gve_is_gqi(priv)) { 1930 iowrite32be(0, gve_irq_doorbell(priv, block)); 1931 } else { 1932 gve_set_itr_coalesce_usecs_dqo(priv, block, 1933 priv->rx_coalesce_usecs); 1934 } 1935 1936 /* Any descs written by the NIC before this barrier will be 1937 * handled by the one-off napi schedule below. Whereas any 1938 * descs after the barrier will generate interrupts. 1939 */ 1940 mb(); 1941 napi_schedule(&block->napi); 1942 } 1943 1944 if (priv->tx_cfg.num_xdp_queues && gve_supports_xdp_xmit(priv)) 1945 xdp_features_set_redirect_target_locked(priv->dev, false); 1946 1947 gve_set_napi_enabled(priv); 1948 } 1949 1950 static void gve_turnup_and_check_status(struct gve_priv *priv) 1951 { 1952 u32 status; 1953 1954 gve_turnup(priv); 1955 status = ioread32be(&priv->reg_bar0->device_status); 1956 gve_handle_link_status(priv, GVE_DEVICE_STATUS_LINK_STATUS_MASK & status); 1957 } 1958 1959 static void gve_tx_timeout(struct net_device *dev, unsigned int txqueue) 1960 { 1961 struct gve_notify_block *block; 1962 struct gve_tx_ring *tx = NULL; 1963 struct gve_priv *priv; 1964 u32 last_nic_done; 1965 u32 current_time; 1966 u32 ntfy_idx; 1967 1968 netdev_info(dev, "Timeout on tx queue, %d", txqueue); 1969 priv = netdev_priv(dev); 1970 if (txqueue > priv->tx_cfg.num_queues) 1971 goto reset; 1972 1973 ntfy_idx = gve_tx_idx_to_ntfy(priv, txqueue); 1974 if (ntfy_idx >= priv->num_ntfy_blks) 1975 goto reset; 1976 1977 block = &priv->ntfy_blocks[ntfy_idx]; 1978 tx = block->tx; 1979 1980 current_time = jiffies_to_msecs(jiffies); 1981 if (tx->last_kick_msec + MIN_TX_TIMEOUT_GAP > current_time) 1982 goto reset; 1983 1984 /* Check to see if there are missed completions, which will allow us to 1985 * kick the queue. 1986 */ 1987 last_nic_done = gve_tx_load_event_counter(priv, tx); 1988 if (last_nic_done - tx->done) { 1989 netdev_info(dev, "Kicking queue %d", txqueue); 1990 iowrite32be(GVE_IRQ_MASK, gve_irq_doorbell(priv, block)); 1991 napi_schedule(&block->napi); 1992 tx->last_kick_msec = current_time; 1993 goto out; 1994 } // Else reset. 1995 1996 reset: 1997 gve_schedule_reset(priv); 1998 1999 out: 2000 if (tx) 2001 tx->queue_timeout++; 2002 priv->tx_timeo_cnt++; 2003 } 2004 2005 u16 gve_get_pkt_buf_size(const struct gve_priv *priv, bool enable_hsplit) 2006 { 2007 if (enable_hsplit && priv->max_rx_buffer_size >= GVE_MAX_RX_BUFFER_SIZE) 2008 return GVE_MAX_RX_BUFFER_SIZE; 2009 else 2010 return GVE_DEFAULT_RX_BUFFER_SIZE; 2011 } 2012 2013 /* Header split is only supported on DQ RDA queue format. If XDP is enabled, 2014 * header split is not allowed. 2015 */ 2016 bool gve_header_split_supported(const struct gve_priv *priv) 2017 { 2018 return priv->header_buf_size && 2019 priv->queue_format == GVE_DQO_RDA_FORMAT && !priv->xdp_prog; 2020 } 2021 2022 int gve_set_hsplit_config(struct gve_priv *priv, u8 tcp_data_split) 2023 { 2024 struct gve_tx_alloc_rings_cfg tx_alloc_cfg = {0}; 2025 struct gve_rx_alloc_rings_cfg rx_alloc_cfg = {0}; 2026 bool enable_hdr_split; 2027 int err = 0; 2028 2029 if (tcp_data_split == ETHTOOL_TCP_DATA_SPLIT_UNKNOWN) 2030 return 0; 2031 2032 if (!gve_header_split_supported(priv)) { 2033 dev_err(&priv->pdev->dev, "Header-split not supported\n"); 2034 return -EOPNOTSUPP; 2035 } 2036 2037 if (tcp_data_split == ETHTOOL_TCP_DATA_SPLIT_ENABLED) 2038 enable_hdr_split = true; 2039 else 2040 enable_hdr_split = false; 2041 2042 if (enable_hdr_split == priv->header_split_enabled) 2043 return 0; 2044 2045 gve_get_curr_alloc_cfgs(priv, &tx_alloc_cfg, &rx_alloc_cfg); 2046 2047 rx_alloc_cfg.enable_header_split = enable_hdr_split; 2048 rx_alloc_cfg.packet_buffer_size = gve_get_pkt_buf_size(priv, enable_hdr_split); 2049 2050 if (netif_running(priv->dev)) 2051 err = gve_adjust_config(priv, &tx_alloc_cfg, &rx_alloc_cfg); 2052 return err; 2053 } 2054 2055 static int gve_set_features(struct net_device *netdev, 2056 netdev_features_t features) 2057 { 2058 const netdev_features_t orig_features = netdev->features; 2059 struct gve_tx_alloc_rings_cfg tx_alloc_cfg = {0}; 2060 struct gve_rx_alloc_rings_cfg rx_alloc_cfg = {0}; 2061 struct gve_priv *priv = netdev_priv(netdev); 2062 int err; 2063 2064 gve_get_curr_alloc_cfgs(priv, &tx_alloc_cfg, &rx_alloc_cfg); 2065 2066 if ((netdev->features & NETIF_F_LRO) != (features & NETIF_F_LRO)) { 2067 netdev->features ^= NETIF_F_LRO; 2068 if (priv->xdp_prog && (netdev->features & NETIF_F_LRO)) { 2069 netdev_warn(netdev, 2070 "XDP is not supported when LRO is on.\n"); 2071 err = -EOPNOTSUPP; 2072 goto revert_features; 2073 } 2074 if (netif_running(netdev)) { 2075 err = gve_adjust_config(priv, &tx_alloc_cfg, &rx_alloc_cfg); 2076 if (err) 2077 goto revert_features; 2078 } 2079 } 2080 if ((netdev->features & NETIF_F_NTUPLE) && !(features & NETIF_F_NTUPLE)) { 2081 err = gve_flow_rules_reset(priv); 2082 if (err) 2083 goto revert_features; 2084 } 2085 2086 return 0; 2087 2088 revert_features: 2089 netdev->features = orig_features; 2090 return err; 2091 } 2092 2093 static int gve_get_ts_config(struct net_device *dev, 2094 struct kernel_hwtstamp_config *kernel_config) 2095 { 2096 struct gve_priv *priv = netdev_priv(dev); 2097 2098 *kernel_config = priv->ts_config; 2099 return 0; 2100 } 2101 2102 static int gve_set_ts_config(struct net_device *dev, 2103 struct kernel_hwtstamp_config *kernel_config, 2104 struct netlink_ext_ack *extack) 2105 { 2106 struct gve_priv *priv = netdev_priv(dev); 2107 2108 if (kernel_config->tx_type != HWTSTAMP_TX_OFF) { 2109 NL_SET_ERR_MSG_MOD(extack, "TX timestamping is not supported"); 2110 return -ERANGE; 2111 } 2112 2113 if (kernel_config->rx_filter != HWTSTAMP_FILTER_NONE) { 2114 if (!priv->nic_ts_report) { 2115 NL_SET_ERR_MSG_MOD(extack, 2116 "RX timestamping is not supported"); 2117 kernel_config->rx_filter = HWTSTAMP_FILTER_NONE; 2118 return -EOPNOTSUPP; 2119 } 2120 2121 kernel_config->rx_filter = HWTSTAMP_FILTER_ALL; 2122 gve_clock_nic_ts_read(priv); 2123 ptp_schedule_worker(priv->ptp->clock, 0); 2124 } else { 2125 ptp_cancel_worker_sync(priv->ptp->clock); 2126 } 2127 2128 priv->ts_config.rx_filter = kernel_config->rx_filter; 2129 2130 return 0; 2131 } 2132 2133 static const struct net_device_ops gve_netdev_ops = { 2134 .ndo_start_xmit = gve_start_xmit, 2135 .ndo_features_check = gve_features_check, 2136 .ndo_open = gve_open, 2137 .ndo_stop = gve_close, 2138 .ndo_get_stats64 = gve_get_stats, 2139 .ndo_tx_timeout = gve_tx_timeout, 2140 .ndo_set_features = gve_set_features, 2141 .ndo_bpf = gve_xdp, 2142 .ndo_xdp_xmit = gve_xdp_xmit, 2143 .ndo_xsk_wakeup = gve_xsk_wakeup, 2144 .ndo_hwtstamp_get = gve_get_ts_config, 2145 .ndo_hwtstamp_set = gve_set_ts_config, 2146 }; 2147 2148 static void gve_handle_status(struct gve_priv *priv, u32 status) 2149 { 2150 if (GVE_DEVICE_STATUS_RESET_MASK & status) { 2151 dev_info(&priv->pdev->dev, "Device requested reset.\n"); 2152 gve_set_do_reset(priv); 2153 } 2154 if (GVE_DEVICE_STATUS_REPORT_STATS_MASK & status) { 2155 priv->stats_report_trigger_cnt++; 2156 gve_set_do_report_stats(priv); 2157 } 2158 } 2159 2160 static void gve_handle_reset(struct gve_priv *priv) 2161 { 2162 /* A service task will be scheduled at the end of probe to catch any 2163 * resets that need to happen, and we don't want to reset until 2164 * probe is done. 2165 */ 2166 if (gve_get_probe_in_progress(priv)) 2167 return; 2168 2169 if (gve_get_do_reset(priv)) { 2170 rtnl_lock(); 2171 netdev_lock(priv->dev); 2172 gve_reset(priv, false); 2173 netdev_unlock(priv->dev); 2174 rtnl_unlock(); 2175 } 2176 } 2177 2178 void gve_handle_report_stats(struct gve_priv *priv) 2179 { 2180 struct stats *stats = priv->stats_report->stats; 2181 int idx, stats_idx = 0; 2182 unsigned int start = 0; 2183 u64 tx_bytes; 2184 2185 if (!gve_get_report_stats(priv)) 2186 return; 2187 2188 be64_add_cpu(&priv->stats_report->written_count, 1); 2189 /* tx stats */ 2190 if (priv->tx) { 2191 for (idx = 0; idx < gve_num_tx_queues(priv); idx++) { 2192 u32 last_completion = 0; 2193 u32 tx_frames = 0; 2194 2195 /* DQO doesn't currently support these metrics. */ 2196 if (gve_is_gqi(priv)) { 2197 last_completion = priv->tx[idx].done; 2198 tx_frames = priv->tx[idx].req; 2199 } 2200 2201 do { 2202 start = u64_stats_fetch_begin(&priv->tx[idx].statss); 2203 tx_bytes = priv->tx[idx].bytes_done; 2204 } while (u64_stats_fetch_retry(&priv->tx[idx].statss, start)); 2205 stats[stats_idx++] = (struct stats) { 2206 .stat_name = cpu_to_be32(TX_WAKE_CNT), 2207 .value = cpu_to_be64(priv->tx[idx].wake_queue), 2208 .queue_id = cpu_to_be32(idx), 2209 }; 2210 stats[stats_idx++] = (struct stats) { 2211 .stat_name = cpu_to_be32(TX_STOP_CNT), 2212 .value = cpu_to_be64(priv->tx[idx].stop_queue), 2213 .queue_id = cpu_to_be32(idx), 2214 }; 2215 stats[stats_idx++] = (struct stats) { 2216 .stat_name = cpu_to_be32(TX_FRAMES_SENT), 2217 .value = cpu_to_be64(tx_frames), 2218 .queue_id = cpu_to_be32(idx), 2219 }; 2220 stats[stats_idx++] = (struct stats) { 2221 .stat_name = cpu_to_be32(TX_BYTES_SENT), 2222 .value = cpu_to_be64(tx_bytes), 2223 .queue_id = cpu_to_be32(idx), 2224 }; 2225 stats[stats_idx++] = (struct stats) { 2226 .stat_name = cpu_to_be32(TX_LAST_COMPLETION_PROCESSED), 2227 .value = cpu_to_be64(last_completion), 2228 .queue_id = cpu_to_be32(idx), 2229 }; 2230 stats[stats_idx++] = (struct stats) { 2231 .stat_name = cpu_to_be32(TX_TIMEOUT_CNT), 2232 .value = cpu_to_be64(priv->tx[idx].queue_timeout), 2233 .queue_id = cpu_to_be32(idx), 2234 }; 2235 } 2236 } 2237 /* rx stats */ 2238 if (priv->rx) { 2239 for (idx = 0; idx < priv->rx_cfg.num_queues; idx++) { 2240 stats[stats_idx++] = (struct stats) { 2241 .stat_name = cpu_to_be32(RX_NEXT_EXPECTED_SEQUENCE), 2242 .value = cpu_to_be64(priv->rx[idx].desc.seqno), 2243 .queue_id = cpu_to_be32(idx), 2244 }; 2245 stats[stats_idx++] = (struct stats) { 2246 .stat_name = cpu_to_be32(RX_BUFFERS_POSTED), 2247 .value = cpu_to_be64(priv->rx[idx].fill_cnt), 2248 .queue_id = cpu_to_be32(idx), 2249 }; 2250 } 2251 } 2252 } 2253 2254 /* Handle NIC status register changes, reset requests and report stats */ 2255 static void gve_service_task(struct work_struct *work) 2256 { 2257 struct gve_priv *priv = container_of(work, struct gve_priv, 2258 service_task); 2259 u32 status = ioread32be(&priv->reg_bar0->device_status); 2260 2261 gve_handle_status(priv, status); 2262 2263 gve_handle_reset(priv); 2264 gve_handle_link_status(priv, GVE_DEVICE_STATUS_LINK_STATUS_MASK & status); 2265 } 2266 2267 static void gve_set_netdev_xdp_features(struct gve_priv *priv) 2268 { 2269 xdp_features_t xdp_features; 2270 2271 if (priv->queue_format == GVE_GQI_QPL_FORMAT) { 2272 xdp_features = NETDEV_XDP_ACT_BASIC; 2273 xdp_features |= NETDEV_XDP_ACT_REDIRECT; 2274 xdp_features |= NETDEV_XDP_ACT_XSK_ZEROCOPY; 2275 } else if (priv->queue_format == GVE_DQO_RDA_FORMAT) { 2276 xdp_features = NETDEV_XDP_ACT_BASIC; 2277 xdp_features |= NETDEV_XDP_ACT_REDIRECT; 2278 } else { 2279 xdp_features = 0; 2280 } 2281 2282 xdp_set_features_flag_locked(priv->dev, xdp_features); 2283 } 2284 2285 static int gve_init_priv(struct gve_priv *priv, bool skip_describe_device) 2286 { 2287 int num_ntfy; 2288 int err; 2289 2290 /* Set up the adminq */ 2291 err = gve_adminq_alloc(&priv->pdev->dev, priv); 2292 if (err) { 2293 dev_err(&priv->pdev->dev, 2294 "Failed to alloc admin queue: err=%d\n", err); 2295 return err; 2296 } 2297 2298 err = gve_verify_driver_compatibility(priv); 2299 if (err) { 2300 dev_err(&priv->pdev->dev, 2301 "Could not verify driver compatibility: err=%d\n", err); 2302 goto err; 2303 } 2304 2305 priv->num_registered_pages = 0; 2306 2307 if (skip_describe_device) 2308 goto setup_device; 2309 2310 priv->queue_format = GVE_QUEUE_FORMAT_UNSPECIFIED; 2311 /* Get the initial information we need from the device */ 2312 err = gve_adminq_describe_device(priv); 2313 if (err) { 2314 dev_err(&priv->pdev->dev, 2315 "Could not get device information: err=%d\n", err); 2316 goto err; 2317 } 2318 priv->dev->mtu = priv->dev->max_mtu; 2319 num_ntfy = pci_msix_vec_count(priv->pdev); 2320 if (num_ntfy <= 0) { 2321 dev_err(&priv->pdev->dev, 2322 "could not count MSI-x vectors: err=%d\n", num_ntfy); 2323 err = num_ntfy; 2324 goto err; 2325 } else if (num_ntfy < GVE_MIN_MSIX) { 2326 dev_err(&priv->pdev->dev, "gve needs at least %d MSI-x vectors, but only has %d\n", 2327 GVE_MIN_MSIX, num_ntfy); 2328 err = -EINVAL; 2329 goto err; 2330 } 2331 2332 /* Big TCP is only supported on DQO */ 2333 if (!gve_is_gqi(priv)) 2334 netif_set_tso_max_size(priv->dev, GVE_DQO_TX_MAX); 2335 2336 priv->rx_copybreak = GVE_DEFAULT_RX_COPYBREAK; 2337 /* gvnic has one Notification Block per MSI-x vector, except for the 2338 * management vector 2339 */ 2340 priv->num_ntfy_blks = (num_ntfy - 1) & ~0x1; 2341 priv->mgmt_msix_idx = priv->num_ntfy_blks; 2342 priv->numa_node = dev_to_node(&priv->pdev->dev); 2343 2344 priv->tx_cfg.max_queues = 2345 min_t(int, priv->tx_cfg.max_queues, priv->num_ntfy_blks / 2); 2346 priv->rx_cfg.max_queues = 2347 min_t(int, priv->rx_cfg.max_queues, priv->num_ntfy_blks / 2); 2348 2349 priv->tx_cfg.num_queues = priv->tx_cfg.max_queues; 2350 priv->rx_cfg.num_queues = priv->rx_cfg.max_queues; 2351 if (priv->default_num_queues > 0) { 2352 priv->tx_cfg.num_queues = min_t(int, priv->default_num_queues, 2353 priv->tx_cfg.num_queues); 2354 priv->rx_cfg.num_queues = min_t(int, priv->default_num_queues, 2355 priv->rx_cfg.num_queues); 2356 } 2357 priv->tx_cfg.num_xdp_queues = 0; 2358 2359 dev_info(&priv->pdev->dev, "TX queues %d, RX queues %d\n", 2360 priv->tx_cfg.num_queues, priv->rx_cfg.num_queues); 2361 dev_info(&priv->pdev->dev, "Max TX queues %d, Max RX queues %d\n", 2362 priv->tx_cfg.max_queues, priv->rx_cfg.max_queues); 2363 2364 if (!gve_is_gqi(priv)) { 2365 priv->tx_coalesce_usecs = GVE_TX_IRQ_RATELIMIT_US_DQO; 2366 priv->rx_coalesce_usecs = GVE_RX_IRQ_RATELIMIT_US_DQO; 2367 } 2368 2369 priv->ts_config.tx_type = HWTSTAMP_TX_OFF; 2370 priv->ts_config.rx_filter = HWTSTAMP_FILTER_NONE; 2371 2372 setup_device: 2373 gve_set_netdev_xdp_features(priv); 2374 err = gve_setup_device_resources(priv); 2375 if (!err) 2376 return 0; 2377 err: 2378 gve_adminq_free(&priv->pdev->dev, priv); 2379 return err; 2380 } 2381 2382 static void gve_teardown_priv_resources(struct gve_priv *priv) 2383 { 2384 gve_teardown_device_resources(priv); 2385 gve_adminq_free(&priv->pdev->dev, priv); 2386 } 2387 2388 static void gve_trigger_reset(struct gve_priv *priv) 2389 { 2390 /* Reset the device by releasing the AQ */ 2391 gve_adminq_release(priv); 2392 } 2393 2394 static void gve_reset_and_teardown(struct gve_priv *priv, bool was_up) 2395 { 2396 gve_trigger_reset(priv); 2397 /* With the reset having already happened, close cannot fail */ 2398 if (was_up) 2399 gve_close(priv->dev); 2400 gve_teardown_priv_resources(priv); 2401 } 2402 2403 static int gve_reset_recovery(struct gve_priv *priv, bool was_up) 2404 { 2405 int err; 2406 2407 err = gve_init_priv(priv, true); 2408 if (err) 2409 goto err; 2410 if (was_up) { 2411 err = gve_open(priv->dev); 2412 if (err) 2413 goto err; 2414 } 2415 return 0; 2416 err: 2417 dev_err(&priv->pdev->dev, "Reset failed! !!! DISABLING ALL QUEUES !!!\n"); 2418 gve_turndown(priv); 2419 return err; 2420 } 2421 2422 int gve_reset(struct gve_priv *priv, bool attempt_teardown) 2423 { 2424 bool was_up = netif_running(priv->dev); 2425 int err; 2426 2427 dev_info(&priv->pdev->dev, "Performing reset\n"); 2428 gve_clear_do_reset(priv); 2429 gve_set_reset_in_progress(priv); 2430 /* If we aren't attempting to teardown normally, just go turndown and 2431 * reset right away. 2432 */ 2433 if (!attempt_teardown) { 2434 gve_turndown(priv); 2435 gve_reset_and_teardown(priv, was_up); 2436 } else { 2437 /* Otherwise attempt to close normally */ 2438 if (was_up) { 2439 err = gve_close(priv->dev); 2440 /* If that fails reset as we did above */ 2441 if (err) 2442 gve_reset_and_teardown(priv, was_up); 2443 } 2444 /* Clean up any remaining resources */ 2445 gve_teardown_priv_resources(priv); 2446 } 2447 2448 /* Set it all back up */ 2449 err = gve_reset_recovery(priv, was_up); 2450 gve_clear_reset_in_progress(priv); 2451 priv->reset_cnt++; 2452 priv->interface_up_cnt = 0; 2453 priv->interface_down_cnt = 0; 2454 priv->stats_report_trigger_cnt = 0; 2455 return err; 2456 } 2457 2458 static void gve_write_version(u8 __iomem *driver_version_register) 2459 { 2460 const char *c = gve_version_prefix; 2461 2462 while (*c) { 2463 writeb(*c, driver_version_register); 2464 c++; 2465 } 2466 2467 c = gve_version_str; 2468 while (*c) { 2469 writeb(*c, driver_version_register); 2470 c++; 2471 } 2472 writeb('\n', driver_version_register); 2473 } 2474 2475 static int gve_rx_queue_stop(struct net_device *dev, void *per_q_mem, int idx) 2476 { 2477 struct gve_priv *priv = netdev_priv(dev); 2478 struct gve_rx_ring *gve_per_q_mem; 2479 int err; 2480 2481 if (!priv->rx) 2482 return -EAGAIN; 2483 2484 /* Destroying queue 0 while other queues exist is not supported in DQO */ 2485 if (!gve_is_gqi(priv) && idx == 0) 2486 return -ERANGE; 2487 2488 /* Single-queue destruction requires quiescence on all queues */ 2489 gve_turndown(priv); 2490 2491 /* This failure will trigger a reset - no need to clean up */ 2492 err = gve_adminq_destroy_single_rx_queue(priv, idx); 2493 if (err) 2494 return err; 2495 2496 if (gve_is_qpl(priv)) { 2497 /* This failure will trigger a reset - no need to clean up */ 2498 err = gve_unregister_qpl(priv, gve_rx_get_qpl(priv, idx)); 2499 if (err) 2500 return err; 2501 } 2502 2503 gve_rx_stop_ring(priv, idx); 2504 2505 /* Turn the unstopped queues back up */ 2506 gve_turnup_and_check_status(priv); 2507 2508 gve_per_q_mem = (struct gve_rx_ring *)per_q_mem; 2509 *gve_per_q_mem = priv->rx[idx]; 2510 memset(&priv->rx[idx], 0, sizeof(priv->rx[idx])); 2511 return 0; 2512 } 2513 2514 static void gve_rx_queue_mem_free(struct net_device *dev, void *per_q_mem) 2515 { 2516 struct gve_priv *priv = netdev_priv(dev); 2517 struct gve_rx_alloc_rings_cfg cfg = {0}; 2518 struct gve_rx_ring *gve_per_q_mem; 2519 2520 gve_per_q_mem = (struct gve_rx_ring *)per_q_mem; 2521 gve_rx_get_curr_alloc_cfg(priv, &cfg); 2522 2523 if (gve_is_gqi(priv)) 2524 gve_rx_free_ring_gqi(priv, gve_per_q_mem, &cfg); 2525 else 2526 gve_rx_free_ring_dqo(priv, gve_per_q_mem, &cfg); 2527 } 2528 2529 static int gve_rx_queue_mem_alloc(struct net_device *dev, void *per_q_mem, 2530 int idx) 2531 { 2532 struct gve_priv *priv = netdev_priv(dev); 2533 struct gve_rx_alloc_rings_cfg cfg = {0}; 2534 struct gve_rx_ring *gve_per_q_mem; 2535 int err; 2536 2537 if (!priv->rx) 2538 return -EAGAIN; 2539 2540 gve_per_q_mem = (struct gve_rx_ring *)per_q_mem; 2541 gve_rx_get_curr_alloc_cfg(priv, &cfg); 2542 2543 if (gve_is_gqi(priv)) 2544 err = gve_rx_alloc_ring_gqi(priv, &cfg, gve_per_q_mem, idx); 2545 else 2546 err = gve_rx_alloc_ring_dqo(priv, &cfg, gve_per_q_mem, idx); 2547 2548 return err; 2549 } 2550 2551 static int gve_rx_queue_start(struct net_device *dev, void *per_q_mem, int idx) 2552 { 2553 struct gve_priv *priv = netdev_priv(dev); 2554 struct gve_rx_ring *gve_per_q_mem; 2555 int err; 2556 2557 if (!priv->rx) 2558 return -EAGAIN; 2559 2560 gve_per_q_mem = (struct gve_rx_ring *)per_q_mem; 2561 priv->rx[idx] = *gve_per_q_mem; 2562 2563 /* Single-queue creation requires quiescence on all queues */ 2564 gve_turndown(priv); 2565 2566 gve_rx_start_ring(priv, idx); 2567 2568 if (gve_is_qpl(priv)) { 2569 /* This failure will trigger a reset - no need to clean up */ 2570 err = gve_register_qpl(priv, gve_rx_get_qpl(priv, idx)); 2571 if (err) 2572 goto abort; 2573 } 2574 2575 /* This failure will trigger a reset - no need to clean up */ 2576 err = gve_adminq_create_single_rx_queue(priv, idx); 2577 if (err) 2578 goto abort; 2579 2580 if (gve_is_gqi(priv)) 2581 gve_rx_write_doorbell(priv, &priv->rx[idx]); 2582 else 2583 gve_rx_post_buffers_dqo(&priv->rx[idx]); 2584 2585 /* Turn the unstopped queues back up */ 2586 gve_turnup_and_check_status(priv); 2587 return 0; 2588 2589 abort: 2590 gve_rx_stop_ring(priv, idx); 2591 2592 /* All failures in this func result in a reset, by clearing the struct 2593 * at idx, we prevent a double free when that reset runs. The reset, 2594 * which needs the rtnl lock, will not run till this func returns and 2595 * its caller gives up the lock. 2596 */ 2597 memset(&priv->rx[idx], 0, sizeof(priv->rx[idx])); 2598 return err; 2599 } 2600 2601 static const struct netdev_queue_mgmt_ops gve_queue_mgmt_ops = { 2602 .ndo_queue_mem_size = sizeof(struct gve_rx_ring), 2603 .ndo_queue_mem_alloc = gve_rx_queue_mem_alloc, 2604 .ndo_queue_mem_free = gve_rx_queue_mem_free, 2605 .ndo_queue_start = gve_rx_queue_start, 2606 .ndo_queue_stop = gve_rx_queue_stop, 2607 }; 2608 2609 static void gve_get_rx_queue_stats(struct net_device *dev, int idx, 2610 struct netdev_queue_stats_rx *rx_stats) 2611 { 2612 struct gve_priv *priv = netdev_priv(dev); 2613 struct gve_rx_ring *rx = &priv->rx[idx]; 2614 unsigned int start; 2615 2616 do { 2617 start = u64_stats_fetch_begin(&rx->statss); 2618 rx_stats->packets = rx->rpackets; 2619 rx_stats->bytes = rx->rbytes; 2620 rx_stats->alloc_fail = rx->rx_skb_alloc_fail + 2621 rx->rx_buf_alloc_fail; 2622 } while (u64_stats_fetch_retry(&rx->statss, start)); 2623 } 2624 2625 static void gve_get_tx_queue_stats(struct net_device *dev, int idx, 2626 struct netdev_queue_stats_tx *tx_stats) 2627 { 2628 struct gve_priv *priv = netdev_priv(dev); 2629 struct gve_tx_ring *tx = &priv->tx[idx]; 2630 unsigned int start; 2631 2632 do { 2633 start = u64_stats_fetch_begin(&tx->statss); 2634 tx_stats->packets = tx->pkt_done; 2635 tx_stats->bytes = tx->bytes_done; 2636 } while (u64_stats_fetch_retry(&tx->statss, start)); 2637 } 2638 2639 static void gve_get_base_stats(struct net_device *dev, 2640 struct netdev_queue_stats_rx *rx, 2641 struct netdev_queue_stats_tx *tx) 2642 { 2643 rx->packets = 0; 2644 rx->bytes = 0; 2645 rx->alloc_fail = 0; 2646 2647 tx->packets = 0; 2648 tx->bytes = 0; 2649 } 2650 2651 static const struct netdev_stat_ops gve_stat_ops = { 2652 .get_queue_stats_rx = gve_get_rx_queue_stats, 2653 .get_queue_stats_tx = gve_get_tx_queue_stats, 2654 .get_base_stats = gve_get_base_stats, 2655 }; 2656 2657 static int gve_probe(struct pci_dev *pdev, const struct pci_device_id *ent) 2658 { 2659 int max_tx_queues, max_rx_queues; 2660 struct net_device *dev; 2661 __be32 __iomem *db_bar; 2662 struct gve_registers __iomem *reg_bar; 2663 struct gve_priv *priv; 2664 int err; 2665 2666 err = pci_enable_device(pdev); 2667 if (err) 2668 return err; 2669 2670 err = pci_request_regions(pdev, gve_driver_name); 2671 if (err) 2672 goto abort_with_enabled; 2673 2674 pci_set_master(pdev); 2675 2676 err = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(64)); 2677 if (err) { 2678 dev_err(&pdev->dev, "Failed to set dma mask: err=%d\n", err); 2679 goto abort_with_pci_region; 2680 } 2681 2682 reg_bar = pci_iomap(pdev, GVE_REGISTER_BAR, 0); 2683 if (!reg_bar) { 2684 dev_err(&pdev->dev, "Failed to map pci bar!\n"); 2685 err = -ENOMEM; 2686 goto abort_with_pci_region; 2687 } 2688 2689 db_bar = pci_iomap(pdev, GVE_DOORBELL_BAR, 0); 2690 if (!db_bar) { 2691 dev_err(&pdev->dev, "Failed to map doorbell bar!\n"); 2692 err = -ENOMEM; 2693 goto abort_with_reg_bar; 2694 } 2695 2696 gve_write_version(®_bar->driver_version); 2697 /* Get max queues to alloc etherdev */ 2698 max_tx_queues = ioread32be(®_bar->max_tx_queues); 2699 max_rx_queues = ioread32be(®_bar->max_rx_queues); 2700 /* Alloc and setup the netdev and priv */ 2701 dev = alloc_etherdev_mqs(sizeof(*priv), max_tx_queues, max_rx_queues); 2702 if (!dev) { 2703 dev_err(&pdev->dev, "could not allocate netdev\n"); 2704 err = -ENOMEM; 2705 goto abort_with_db_bar; 2706 } 2707 SET_NETDEV_DEV(dev, &pdev->dev); 2708 pci_set_drvdata(pdev, dev); 2709 dev->ethtool_ops = &gve_ethtool_ops; 2710 dev->netdev_ops = &gve_netdev_ops; 2711 dev->queue_mgmt_ops = &gve_queue_mgmt_ops; 2712 dev->stat_ops = &gve_stat_ops; 2713 2714 /* Set default and supported features. 2715 * 2716 * Features might be set in other locations as well (such as 2717 * `gve_adminq_describe_device`). 2718 */ 2719 dev->hw_features = NETIF_F_HIGHDMA; 2720 dev->hw_features |= NETIF_F_SG; 2721 dev->hw_features |= NETIF_F_HW_CSUM; 2722 dev->hw_features |= NETIF_F_TSO; 2723 dev->hw_features |= NETIF_F_TSO6; 2724 dev->hw_features |= NETIF_F_TSO_ECN; 2725 dev->hw_features |= NETIF_F_RXCSUM; 2726 dev->hw_features |= NETIF_F_RXHASH; 2727 dev->features = dev->hw_features; 2728 dev->watchdog_timeo = 5 * HZ; 2729 dev->min_mtu = ETH_MIN_MTU; 2730 netif_carrier_off(dev); 2731 2732 priv = netdev_priv(dev); 2733 priv->dev = dev; 2734 priv->pdev = pdev; 2735 priv->msg_enable = DEFAULT_MSG_LEVEL; 2736 priv->reg_bar0 = reg_bar; 2737 priv->db_bar2 = db_bar; 2738 priv->service_task_flags = 0x0; 2739 priv->state_flags = 0x0; 2740 priv->ethtool_flags = 0x0; 2741 priv->rx_cfg.packet_buffer_size = GVE_DEFAULT_RX_BUFFER_SIZE; 2742 priv->max_rx_buffer_size = GVE_DEFAULT_RX_BUFFER_SIZE; 2743 2744 gve_set_probe_in_progress(priv); 2745 priv->gve_wq = alloc_ordered_workqueue("gve", 0); 2746 if (!priv->gve_wq) { 2747 dev_err(&pdev->dev, "Could not allocate workqueue"); 2748 err = -ENOMEM; 2749 goto abort_with_netdev; 2750 } 2751 INIT_WORK(&priv->service_task, gve_service_task); 2752 INIT_WORK(&priv->stats_report_task, gve_stats_report_task); 2753 priv->tx_cfg.max_queues = max_tx_queues; 2754 priv->rx_cfg.max_queues = max_rx_queues; 2755 2756 err = gve_init_priv(priv, false); 2757 if (err) 2758 goto abort_with_wq; 2759 2760 if (!gve_is_gqi(priv) && !gve_is_qpl(priv)) 2761 dev->netmem_tx = true; 2762 2763 err = register_netdev(dev); 2764 if (err) 2765 goto abort_with_gve_init; 2766 2767 dev_info(&pdev->dev, "GVE version %s\n", gve_version_str); 2768 dev_info(&pdev->dev, "GVE queue format %d\n", (int)priv->queue_format); 2769 gve_clear_probe_in_progress(priv); 2770 queue_work(priv->gve_wq, &priv->service_task); 2771 return 0; 2772 2773 abort_with_gve_init: 2774 gve_teardown_priv_resources(priv); 2775 2776 abort_with_wq: 2777 destroy_workqueue(priv->gve_wq); 2778 2779 abort_with_netdev: 2780 free_netdev(dev); 2781 2782 abort_with_db_bar: 2783 pci_iounmap(pdev, db_bar); 2784 2785 abort_with_reg_bar: 2786 pci_iounmap(pdev, reg_bar); 2787 2788 abort_with_pci_region: 2789 pci_release_regions(pdev); 2790 2791 abort_with_enabled: 2792 pci_disable_device(pdev); 2793 return err; 2794 } 2795 2796 static void gve_remove(struct pci_dev *pdev) 2797 { 2798 struct net_device *netdev = pci_get_drvdata(pdev); 2799 struct gve_priv *priv = netdev_priv(netdev); 2800 __be32 __iomem *db_bar = priv->db_bar2; 2801 void __iomem *reg_bar = priv->reg_bar0; 2802 2803 unregister_netdev(netdev); 2804 gve_teardown_priv_resources(priv); 2805 destroy_workqueue(priv->gve_wq); 2806 free_netdev(netdev); 2807 pci_iounmap(pdev, db_bar); 2808 pci_iounmap(pdev, reg_bar); 2809 pci_release_regions(pdev); 2810 pci_disable_device(pdev); 2811 } 2812 2813 static void gve_shutdown(struct pci_dev *pdev) 2814 { 2815 struct net_device *netdev = pci_get_drvdata(pdev); 2816 struct gve_priv *priv = netdev_priv(netdev); 2817 bool was_up = netif_running(priv->dev); 2818 2819 rtnl_lock(); 2820 netdev_lock(netdev); 2821 if (was_up && gve_close(priv->dev)) { 2822 /* If the dev was up, attempt to close, if close fails, reset */ 2823 gve_reset_and_teardown(priv, was_up); 2824 } else { 2825 /* If the dev wasn't up or close worked, finish tearing down */ 2826 gve_teardown_priv_resources(priv); 2827 } 2828 netdev_unlock(netdev); 2829 rtnl_unlock(); 2830 } 2831 2832 #ifdef CONFIG_PM 2833 static int gve_suspend(struct pci_dev *pdev, pm_message_t state) 2834 { 2835 struct net_device *netdev = pci_get_drvdata(pdev); 2836 struct gve_priv *priv = netdev_priv(netdev); 2837 bool was_up = netif_running(priv->dev); 2838 2839 priv->suspend_cnt++; 2840 rtnl_lock(); 2841 netdev_lock(netdev); 2842 if (was_up && gve_close(priv->dev)) { 2843 /* If the dev was up, attempt to close, if close fails, reset */ 2844 gve_reset_and_teardown(priv, was_up); 2845 } else { 2846 /* If the dev wasn't up or close worked, finish tearing down */ 2847 gve_teardown_priv_resources(priv); 2848 } 2849 priv->up_before_suspend = was_up; 2850 netdev_unlock(netdev); 2851 rtnl_unlock(); 2852 return 0; 2853 } 2854 2855 static int gve_resume(struct pci_dev *pdev) 2856 { 2857 struct net_device *netdev = pci_get_drvdata(pdev); 2858 struct gve_priv *priv = netdev_priv(netdev); 2859 int err; 2860 2861 priv->resume_cnt++; 2862 rtnl_lock(); 2863 netdev_lock(netdev); 2864 err = gve_reset_recovery(priv, priv->up_before_suspend); 2865 netdev_unlock(netdev); 2866 rtnl_unlock(); 2867 return err; 2868 } 2869 #endif /* CONFIG_PM */ 2870 2871 static const struct pci_device_id gve_id_table[] = { 2872 { PCI_DEVICE(PCI_VENDOR_ID_GOOGLE, PCI_DEV_ID_GVNIC) }, 2873 { } 2874 }; 2875 2876 static struct pci_driver gve_driver = { 2877 .name = gve_driver_name, 2878 .id_table = gve_id_table, 2879 .probe = gve_probe, 2880 .remove = gve_remove, 2881 .shutdown = gve_shutdown, 2882 #ifdef CONFIG_PM 2883 .suspend = gve_suspend, 2884 .resume = gve_resume, 2885 #endif 2886 }; 2887 2888 module_pci_driver(gve_driver); 2889 2890 MODULE_DEVICE_TABLE(pci, gve_id_table); 2891 MODULE_AUTHOR("Google, Inc."); 2892 MODULE_DESCRIPTION("Google Virtual NIC Driver"); 2893 MODULE_LICENSE("Dual MIT/GPL"); 2894 MODULE_VERSION(GVE_VERSION); 2895