1 // SPDX-License-Identifier: (GPL-2.0 OR MIT) 2 /* Google virtual Ethernet (gve) driver 3 * 4 * Copyright (C) 2015-2024 Google LLC 5 */ 6 7 #include <linux/bpf.h> 8 #include <linux/cpumask.h> 9 #include <linux/etherdevice.h> 10 #include <linux/filter.h> 11 #include <linux/interrupt.h> 12 #include <linux/irq.h> 13 #include <linux/module.h> 14 #include <linux/pci.h> 15 #include <linux/sched.h> 16 #include <linux/timer.h> 17 #include <linux/workqueue.h> 18 #include <linux/utsname.h> 19 #include <linux/version.h> 20 #include <net/netdev_queues.h> 21 #include <net/sch_generic.h> 22 #include <net/xdp_sock_drv.h> 23 #include "gve.h" 24 #include "gve_dqo.h" 25 #include "gve_adminq.h" 26 #include "gve_register.h" 27 #include "gve_utils.h" 28 29 #define GVE_DEFAULT_RX_COPYBREAK (256) 30 31 #define DEFAULT_MSG_LEVEL (NETIF_MSG_DRV | NETIF_MSG_LINK) 32 #define GVE_VERSION "1.0.0" 33 #define GVE_VERSION_PREFIX "GVE-" 34 35 // Minimum amount of time between queue kicks in msec (10 seconds) 36 #define MIN_TX_TIMEOUT_GAP (1000 * 10) 37 38 char gve_driver_name[] = "gve"; 39 const char gve_version_str[] = GVE_VERSION; 40 static const char gve_version_prefix[] = GVE_VERSION_PREFIX; 41 42 static int gve_verify_driver_compatibility(struct gve_priv *priv) 43 { 44 int err; 45 struct gve_driver_info *driver_info; 46 dma_addr_t driver_info_bus; 47 48 driver_info = dma_alloc_coherent(&priv->pdev->dev, 49 sizeof(struct gve_driver_info), 50 &driver_info_bus, GFP_KERNEL); 51 if (!driver_info) 52 return -ENOMEM; 53 54 *driver_info = (struct gve_driver_info) { 55 .os_type = 1, /* Linux */ 56 .os_version_major = cpu_to_be32(LINUX_VERSION_MAJOR), 57 .os_version_minor = cpu_to_be32(LINUX_VERSION_SUBLEVEL), 58 .os_version_sub = cpu_to_be32(LINUX_VERSION_PATCHLEVEL), 59 .driver_capability_flags = { 60 cpu_to_be64(GVE_DRIVER_CAPABILITY_FLAGS1), 61 cpu_to_be64(GVE_DRIVER_CAPABILITY_FLAGS2), 62 cpu_to_be64(GVE_DRIVER_CAPABILITY_FLAGS3), 63 cpu_to_be64(GVE_DRIVER_CAPABILITY_FLAGS4), 64 }, 65 }; 66 strscpy(driver_info->os_version_str1, utsname()->release, 67 sizeof(driver_info->os_version_str1)); 68 strscpy(driver_info->os_version_str2, utsname()->version, 69 sizeof(driver_info->os_version_str2)); 70 71 err = gve_adminq_verify_driver_compatibility(priv, 72 sizeof(struct gve_driver_info), 73 driver_info_bus); 74 75 /* It's ok if the device doesn't support this */ 76 if (err == -EOPNOTSUPP) 77 err = 0; 78 79 dma_free_coherent(&priv->pdev->dev, 80 sizeof(struct gve_driver_info), 81 driver_info, driver_info_bus); 82 return err; 83 } 84 85 static netdev_features_t gve_features_check(struct sk_buff *skb, 86 struct net_device *dev, 87 netdev_features_t features) 88 { 89 struct gve_priv *priv = netdev_priv(dev); 90 91 if (!gve_is_gqi(priv)) 92 return gve_features_check_dqo(skb, dev, features); 93 94 return features; 95 } 96 97 static netdev_tx_t gve_start_xmit(struct sk_buff *skb, struct net_device *dev) 98 { 99 struct gve_priv *priv = netdev_priv(dev); 100 101 if (gve_is_gqi(priv)) 102 return gve_tx(skb, dev); 103 else 104 return gve_tx_dqo(skb, dev); 105 } 106 107 static void gve_get_stats(struct net_device *dev, struct rtnl_link_stats64 *s) 108 { 109 struct gve_priv *priv = netdev_priv(dev); 110 unsigned int start; 111 u64 packets, bytes; 112 int num_tx_queues; 113 int ring; 114 115 num_tx_queues = gve_num_tx_queues(priv); 116 if (priv->rx) { 117 for (ring = 0; ring < priv->rx_cfg.num_queues; ring++) { 118 do { 119 start = 120 u64_stats_fetch_begin(&priv->rx[ring].statss); 121 packets = priv->rx[ring].rpackets; 122 bytes = priv->rx[ring].rbytes; 123 } while (u64_stats_fetch_retry(&priv->rx[ring].statss, 124 start)); 125 s->rx_packets += packets; 126 s->rx_bytes += bytes; 127 } 128 } 129 if (priv->tx) { 130 for (ring = 0; ring < num_tx_queues; ring++) { 131 do { 132 start = 133 u64_stats_fetch_begin(&priv->tx[ring].statss); 134 packets = priv->tx[ring].pkt_done; 135 bytes = priv->tx[ring].bytes_done; 136 } while (u64_stats_fetch_retry(&priv->tx[ring].statss, 137 start)); 138 s->tx_packets += packets; 139 s->tx_bytes += bytes; 140 } 141 } 142 } 143 144 static int gve_alloc_flow_rule_caches(struct gve_priv *priv) 145 { 146 struct gve_flow_rules_cache *flow_rules_cache = &priv->flow_rules_cache; 147 int err = 0; 148 149 if (!priv->max_flow_rules) 150 return 0; 151 152 flow_rules_cache->rules_cache = 153 kvcalloc(GVE_FLOW_RULES_CACHE_SIZE, sizeof(*flow_rules_cache->rules_cache), 154 GFP_KERNEL); 155 if (!flow_rules_cache->rules_cache) { 156 dev_err(&priv->pdev->dev, "Cannot alloc flow rules cache\n"); 157 return -ENOMEM; 158 } 159 160 flow_rules_cache->rule_ids_cache = 161 kvcalloc(GVE_FLOW_RULE_IDS_CACHE_SIZE, sizeof(*flow_rules_cache->rule_ids_cache), 162 GFP_KERNEL); 163 if (!flow_rules_cache->rule_ids_cache) { 164 dev_err(&priv->pdev->dev, "Cannot alloc flow rule ids cache\n"); 165 err = -ENOMEM; 166 goto free_rules_cache; 167 } 168 169 return 0; 170 171 free_rules_cache: 172 kvfree(flow_rules_cache->rules_cache); 173 flow_rules_cache->rules_cache = NULL; 174 return err; 175 } 176 177 static void gve_free_flow_rule_caches(struct gve_priv *priv) 178 { 179 struct gve_flow_rules_cache *flow_rules_cache = &priv->flow_rules_cache; 180 181 kvfree(flow_rules_cache->rule_ids_cache); 182 flow_rules_cache->rule_ids_cache = NULL; 183 kvfree(flow_rules_cache->rules_cache); 184 flow_rules_cache->rules_cache = NULL; 185 } 186 187 static int gve_alloc_counter_array(struct gve_priv *priv) 188 { 189 priv->counter_array = 190 dma_alloc_coherent(&priv->pdev->dev, 191 priv->num_event_counters * 192 sizeof(*priv->counter_array), 193 &priv->counter_array_bus, GFP_KERNEL); 194 if (!priv->counter_array) 195 return -ENOMEM; 196 197 return 0; 198 } 199 200 static void gve_free_counter_array(struct gve_priv *priv) 201 { 202 if (!priv->counter_array) 203 return; 204 205 dma_free_coherent(&priv->pdev->dev, 206 priv->num_event_counters * 207 sizeof(*priv->counter_array), 208 priv->counter_array, priv->counter_array_bus); 209 priv->counter_array = NULL; 210 } 211 212 /* NIC requests to report stats */ 213 static void gve_stats_report_task(struct work_struct *work) 214 { 215 struct gve_priv *priv = container_of(work, struct gve_priv, 216 stats_report_task); 217 if (gve_get_do_report_stats(priv)) { 218 gve_handle_report_stats(priv); 219 gve_clear_do_report_stats(priv); 220 } 221 } 222 223 static void gve_stats_report_schedule(struct gve_priv *priv) 224 { 225 if (!gve_get_probe_in_progress(priv) && 226 !gve_get_reset_in_progress(priv)) { 227 gve_set_do_report_stats(priv); 228 queue_work(priv->gve_wq, &priv->stats_report_task); 229 } 230 } 231 232 static void gve_stats_report_timer(struct timer_list *t) 233 { 234 struct gve_priv *priv = from_timer(priv, t, stats_report_timer); 235 236 mod_timer(&priv->stats_report_timer, 237 round_jiffies(jiffies + 238 msecs_to_jiffies(priv->stats_report_timer_period))); 239 gve_stats_report_schedule(priv); 240 } 241 242 static int gve_alloc_stats_report(struct gve_priv *priv) 243 { 244 int tx_stats_num, rx_stats_num; 245 246 tx_stats_num = (GVE_TX_STATS_REPORT_NUM + NIC_TX_STATS_REPORT_NUM) * 247 gve_num_tx_queues(priv); 248 rx_stats_num = (GVE_RX_STATS_REPORT_NUM + NIC_RX_STATS_REPORT_NUM) * 249 priv->rx_cfg.num_queues; 250 priv->stats_report_len = struct_size(priv->stats_report, stats, 251 size_add(tx_stats_num, rx_stats_num)); 252 priv->stats_report = 253 dma_alloc_coherent(&priv->pdev->dev, priv->stats_report_len, 254 &priv->stats_report_bus, GFP_KERNEL); 255 if (!priv->stats_report) 256 return -ENOMEM; 257 /* Set up timer for the report-stats task */ 258 timer_setup(&priv->stats_report_timer, gve_stats_report_timer, 0); 259 priv->stats_report_timer_period = GVE_STATS_REPORT_TIMER_PERIOD; 260 return 0; 261 } 262 263 static void gve_free_stats_report(struct gve_priv *priv) 264 { 265 if (!priv->stats_report) 266 return; 267 268 del_timer_sync(&priv->stats_report_timer); 269 dma_free_coherent(&priv->pdev->dev, priv->stats_report_len, 270 priv->stats_report, priv->stats_report_bus); 271 priv->stats_report = NULL; 272 } 273 274 static irqreturn_t gve_mgmnt_intr(int irq, void *arg) 275 { 276 struct gve_priv *priv = arg; 277 278 queue_work(priv->gve_wq, &priv->service_task); 279 return IRQ_HANDLED; 280 } 281 282 static irqreturn_t gve_intr(int irq, void *arg) 283 { 284 struct gve_notify_block *block = arg; 285 struct gve_priv *priv = block->priv; 286 287 iowrite32be(GVE_IRQ_MASK, gve_irq_doorbell(priv, block)); 288 napi_schedule_irqoff(&block->napi); 289 return IRQ_HANDLED; 290 } 291 292 static irqreturn_t gve_intr_dqo(int irq, void *arg) 293 { 294 struct gve_notify_block *block = arg; 295 296 /* Interrupts are automatically masked */ 297 napi_schedule_irqoff(&block->napi); 298 return IRQ_HANDLED; 299 } 300 301 static int gve_is_napi_on_home_cpu(struct gve_priv *priv, u32 irq) 302 { 303 int cpu_curr = smp_processor_id(); 304 const struct cpumask *aff_mask; 305 306 aff_mask = irq_get_effective_affinity_mask(irq); 307 if (unlikely(!aff_mask)) 308 return 1; 309 310 return cpumask_test_cpu(cpu_curr, aff_mask); 311 } 312 313 int gve_napi_poll(struct napi_struct *napi, int budget) 314 { 315 struct gve_notify_block *block; 316 __be32 __iomem *irq_doorbell; 317 bool reschedule = false; 318 struct gve_priv *priv; 319 int work_done = 0; 320 321 block = container_of(napi, struct gve_notify_block, napi); 322 priv = block->priv; 323 324 if (block->tx) { 325 if (block->tx->q_num < priv->tx_cfg.num_queues) 326 reschedule |= gve_tx_poll(block, budget); 327 else if (budget) 328 reschedule |= gve_xdp_poll(block, budget); 329 } 330 331 if (!budget) 332 return 0; 333 334 if (block->rx) { 335 work_done = gve_rx_poll(block, budget); 336 337 /* Poll XSK TX as part of RX NAPI. Setup re-poll based on max of 338 * TX and RX work done. 339 */ 340 if (priv->xdp_prog) 341 work_done = max_t(int, work_done, 342 gve_xsk_tx_poll(block, budget)); 343 344 reschedule |= work_done == budget; 345 } 346 347 if (reschedule) 348 return budget; 349 350 /* Complete processing - don't unmask irq if busy polling is enabled */ 351 if (likely(napi_complete_done(napi, work_done))) { 352 irq_doorbell = gve_irq_doorbell(priv, block); 353 iowrite32be(GVE_IRQ_ACK | GVE_IRQ_EVENT, irq_doorbell); 354 355 /* Ensure IRQ ACK is visible before we check pending work. 356 * If queue had issued updates, it would be truly visible. 357 */ 358 mb(); 359 360 if (block->tx) 361 reschedule |= gve_tx_clean_pending(priv, block->tx); 362 if (block->rx) 363 reschedule |= gve_rx_work_pending(block->rx); 364 365 if (reschedule && napi_schedule(napi)) 366 iowrite32be(GVE_IRQ_MASK, irq_doorbell); 367 } 368 return work_done; 369 } 370 371 int gve_napi_poll_dqo(struct napi_struct *napi, int budget) 372 { 373 struct gve_notify_block *block = 374 container_of(napi, struct gve_notify_block, napi); 375 struct gve_priv *priv = block->priv; 376 bool reschedule = false; 377 int work_done = 0; 378 379 if (block->tx) 380 reschedule |= gve_tx_poll_dqo(block, /*do_clean=*/true); 381 382 if (!budget) 383 return 0; 384 385 if (block->rx) { 386 work_done = gve_rx_poll_dqo(block, budget); 387 reschedule |= work_done == budget; 388 } 389 390 if (reschedule) { 391 /* Reschedule by returning budget only if already on the correct 392 * cpu. 393 */ 394 if (likely(gve_is_napi_on_home_cpu(priv, block->irq))) 395 return budget; 396 397 /* If not on the cpu with which this queue's irq has affinity 398 * with, we avoid rescheduling napi and arm the irq instead so 399 * that napi gets rescheduled back eventually onto the right 400 * cpu. 401 */ 402 if (work_done == budget) 403 work_done--; 404 } 405 406 if (likely(napi_complete_done(napi, work_done))) { 407 /* Enable interrupts again. 408 * 409 * We don't need to repoll afterwards because HW supports the 410 * PCI MSI-X PBA feature. 411 * 412 * Another interrupt would be triggered if a new event came in 413 * since the last one. 414 */ 415 gve_write_irq_doorbell_dqo(priv, block, 416 GVE_ITR_NO_UPDATE_DQO | GVE_ITR_ENABLE_BIT_DQO); 417 } 418 419 return work_done; 420 } 421 422 static int gve_alloc_notify_blocks(struct gve_priv *priv) 423 { 424 int num_vecs_requested = priv->num_ntfy_blks + 1; 425 unsigned int active_cpus; 426 int vecs_enabled; 427 int i, j; 428 int err; 429 430 priv->msix_vectors = kvcalloc(num_vecs_requested, 431 sizeof(*priv->msix_vectors), GFP_KERNEL); 432 if (!priv->msix_vectors) 433 return -ENOMEM; 434 for (i = 0; i < num_vecs_requested; i++) 435 priv->msix_vectors[i].entry = i; 436 vecs_enabled = pci_enable_msix_range(priv->pdev, priv->msix_vectors, 437 GVE_MIN_MSIX, num_vecs_requested); 438 if (vecs_enabled < 0) { 439 dev_err(&priv->pdev->dev, "Could not enable min msix %d/%d\n", 440 GVE_MIN_MSIX, vecs_enabled); 441 err = vecs_enabled; 442 goto abort_with_msix_vectors; 443 } 444 if (vecs_enabled != num_vecs_requested) { 445 int new_num_ntfy_blks = (vecs_enabled - 1) & ~0x1; 446 int vecs_per_type = new_num_ntfy_blks / 2; 447 int vecs_left = new_num_ntfy_blks % 2; 448 449 priv->num_ntfy_blks = new_num_ntfy_blks; 450 priv->mgmt_msix_idx = priv->num_ntfy_blks; 451 priv->tx_cfg.max_queues = min_t(int, priv->tx_cfg.max_queues, 452 vecs_per_type); 453 priv->rx_cfg.max_queues = min_t(int, priv->rx_cfg.max_queues, 454 vecs_per_type + vecs_left); 455 dev_err(&priv->pdev->dev, 456 "Could not enable desired msix, only enabled %d, adjusting tx max queues to %d, and rx max queues to %d\n", 457 vecs_enabled, priv->tx_cfg.max_queues, 458 priv->rx_cfg.max_queues); 459 if (priv->tx_cfg.num_queues > priv->tx_cfg.max_queues) 460 priv->tx_cfg.num_queues = priv->tx_cfg.max_queues; 461 if (priv->rx_cfg.num_queues > priv->rx_cfg.max_queues) 462 priv->rx_cfg.num_queues = priv->rx_cfg.max_queues; 463 } 464 /* Half the notification blocks go to TX and half to RX */ 465 active_cpus = min_t(int, priv->num_ntfy_blks / 2, num_online_cpus()); 466 467 /* Setup Management Vector - the last vector */ 468 snprintf(priv->mgmt_msix_name, sizeof(priv->mgmt_msix_name), "gve-mgmnt@pci:%s", 469 pci_name(priv->pdev)); 470 err = request_irq(priv->msix_vectors[priv->mgmt_msix_idx].vector, 471 gve_mgmnt_intr, 0, priv->mgmt_msix_name, priv); 472 if (err) { 473 dev_err(&priv->pdev->dev, "Did not receive management vector.\n"); 474 goto abort_with_msix_enabled; 475 } 476 priv->irq_db_indices = 477 dma_alloc_coherent(&priv->pdev->dev, 478 priv->num_ntfy_blks * 479 sizeof(*priv->irq_db_indices), 480 &priv->irq_db_indices_bus, GFP_KERNEL); 481 if (!priv->irq_db_indices) { 482 err = -ENOMEM; 483 goto abort_with_mgmt_vector; 484 } 485 486 priv->ntfy_blocks = kvzalloc(priv->num_ntfy_blks * 487 sizeof(*priv->ntfy_blocks), GFP_KERNEL); 488 if (!priv->ntfy_blocks) { 489 err = -ENOMEM; 490 goto abort_with_irq_db_indices; 491 } 492 493 /* Setup the other blocks - the first n-1 vectors */ 494 for (i = 0; i < priv->num_ntfy_blks; i++) { 495 struct gve_notify_block *block = &priv->ntfy_blocks[i]; 496 int msix_idx = i; 497 498 snprintf(block->name, sizeof(block->name), "gve-ntfy-blk%d@pci:%s", 499 i, pci_name(priv->pdev)); 500 block->priv = priv; 501 err = request_irq(priv->msix_vectors[msix_idx].vector, 502 gve_is_gqi(priv) ? gve_intr : gve_intr_dqo, 503 0, block->name, block); 504 if (err) { 505 dev_err(&priv->pdev->dev, 506 "Failed to receive msix vector %d\n", i); 507 goto abort_with_some_ntfy_blocks; 508 } 509 block->irq = priv->msix_vectors[msix_idx].vector; 510 irq_set_affinity_hint(priv->msix_vectors[msix_idx].vector, 511 get_cpu_mask(i % active_cpus)); 512 block->irq_db_index = &priv->irq_db_indices[i].index; 513 } 514 return 0; 515 abort_with_some_ntfy_blocks: 516 for (j = 0; j < i; j++) { 517 struct gve_notify_block *block = &priv->ntfy_blocks[j]; 518 int msix_idx = j; 519 520 irq_set_affinity_hint(priv->msix_vectors[msix_idx].vector, 521 NULL); 522 free_irq(priv->msix_vectors[msix_idx].vector, block); 523 block->irq = 0; 524 } 525 kvfree(priv->ntfy_blocks); 526 priv->ntfy_blocks = NULL; 527 abort_with_irq_db_indices: 528 dma_free_coherent(&priv->pdev->dev, priv->num_ntfy_blks * 529 sizeof(*priv->irq_db_indices), 530 priv->irq_db_indices, priv->irq_db_indices_bus); 531 priv->irq_db_indices = NULL; 532 abort_with_mgmt_vector: 533 free_irq(priv->msix_vectors[priv->mgmt_msix_idx].vector, priv); 534 abort_with_msix_enabled: 535 pci_disable_msix(priv->pdev); 536 abort_with_msix_vectors: 537 kvfree(priv->msix_vectors); 538 priv->msix_vectors = NULL; 539 return err; 540 } 541 542 static void gve_free_notify_blocks(struct gve_priv *priv) 543 { 544 int i; 545 546 if (!priv->msix_vectors) 547 return; 548 549 /* Free the irqs */ 550 for (i = 0; i < priv->num_ntfy_blks; i++) { 551 struct gve_notify_block *block = &priv->ntfy_blocks[i]; 552 int msix_idx = i; 553 554 irq_set_affinity_hint(priv->msix_vectors[msix_idx].vector, 555 NULL); 556 free_irq(priv->msix_vectors[msix_idx].vector, block); 557 block->irq = 0; 558 } 559 free_irq(priv->msix_vectors[priv->mgmt_msix_idx].vector, priv); 560 kvfree(priv->ntfy_blocks); 561 priv->ntfy_blocks = NULL; 562 dma_free_coherent(&priv->pdev->dev, priv->num_ntfy_blks * 563 sizeof(*priv->irq_db_indices), 564 priv->irq_db_indices, priv->irq_db_indices_bus); 565 priv->irq_db_indices = NULL; 566 pci_disable_msix(priv->pdev); 567 kvfree(priv->msix_vectors); 568 priv->msix_vectors = NULL; 569 } 570 571 static int gve_setup_device_resources(struct gve_priv *priv) 572 { 573 int err; 574 575 err = gve_alloc_flow_rule_caches(priv); 576 if (err) 577 return err; 578 err = gve_alloc_counter_array(priv); 579 if (err) 580 goto abort_with_flow_rule_caches; 581 err = gve_alloc_notify_blocks(priv); 582 if (err) 583 goto abort_with_counter; 584 err = gve_alloc_stats_report(priv); 585 if (err) 586 goto abort_with_ntfy_blocks; 587 err = gve_adminq_configure_device_resources(priv, 588 priv->counter_array_bus, 589 priv->num_event_counters, 590 priv->irq_db_indices_bus, 591 priv->num_ntfy_blks); 592 if (unlikely(err)) { 593 dev_err(&priv->pdev->dev, 594 "could not setup device_resources: err=%d\n", err); 595 err = -ENXIO; 596 goto abort_with_stats_report; 597 } 598 599 if (!gve_is_gqi(priv)) { 600 priv->ptype_lut_dqo = kvzalloc(sizeof(*priv->ptype_lut_dqo), 601 GFP_KERNEL); 602 if (!priv->ptype_lut_dqo) { 603 err = -ENOMEM; 604 goto abort_with_stats_report; 605 } 606 err = gve_adminq_get_ptype_map_dqo(priv, priv->ptype_lut_dqo); 607 if (err) { 608 dev_err(&priv->pdev->dev, 609 "Failed to get ptype map: err=%d\n", err); 610 goto abort_with_ptype_lut; 611 } 612 } 613 614 err = gve_adminq_report_stats(priv, priv->stats_report_len, 615 priv->stats_report_bus, 616 GVE_STATS_REPORT_TIMER_PERIOD); 617 if (err) 618 dev_err(&priv->pdev->dev, 619 "Failed to report stats: err=%d\n", err); 620 gve_set_device_resources_ok(priv); 621 return 0; 622 623 abort_with_ptype_lut: 624 kvfree(priv->ptype_lut_dqo); 625 priv->ptype_lut_dqo = NULL; 626 abort_with_stats_report: 627 gve_free_stats_report(priv); 628 abort_with_ntfy_blocks: 629 gve_free_notify_blocks(priv); 630 abort_with_counter: 631 gve_free_counter_array(priv); 632 abort_with_flow_rule_caches: 633 gve_free_flow_rule_caches(priv); 634 635 return err; 636 } 637 638 static void gve_trigger_reset(struct gve_priv *priv); 639 640 static void gve_teardown_device_resources(struct gve_priv *priv) 641 { 642 int err; 643 644 /* Tell device its resources are being freed */ 645 if (gve_get_device_resources_ok(priv)) { 646 err = gve_flow_rules_reset(priv); 647 if (err) { 648 dev_err(&priv->pdev->dev, 649 "Failed to reset flow rules: err=%d\n", err); 650 gve_trigger_reset(priv); 651 } 652 /* detach the stats report */ 653 err = gve_adminq_report_stats(priv, 0, 0x0, GVE_STATS_REPORT_TIMER_PERIOD); 654 if (err) { 655 dev_err(&priv->pdev->dev, 656 "Failed to detach stats report: err=%d\n", err); 657 gve_trigger_reset(priv); 658 } 659 err = gve_adminq_deconfigure_device_resources(priv); 660 if (err) { 661 dev_err(&priv->pdev->dev, 662 "Could not deconfigure device resources: err=%d\n", 663 err); 664 gve_trigger_reset(priv); 665 } 666 } 667 668 kvfree(priv->ptype_lut_dqo); 669 priv->ptype_lut_dqo = NULL; 670 671 gve_free_flow_rule_caches(priv); 672 gve_free_counter_array(priv); 673 gve_free_notify_blocks(priv); 674 gve_free_stats_report(priv); 675 gve_clear_device_resources_ok(priv); 676 } 677 678 static int gve_unregister_qpl(struct gve_priv *priv, 679 struct gve_queue_page_list *qpl) 680 { 681 int err; 682 683 if (!qpl) 684 return 0; 685 686 err = gve_adminq_unregister_page_list(priv, qpl->id); 687 if (err) { 688 netif_err(priv, drv, priv->dev, 689 "Failed to unregister queue page list %d\n", 690 qpl->id); 691 return err; 692 } 693 694 priv->num_registered_pages -= qpl->num_entries; 695 return 0; 696 } 697 698 static int gve_register_qpl(struct gve_priv *priv, 699 struct gve_queue_page_list *qpl) 700 { 701 int pages; 702 int err; 703 704 if (!qpl) 705 return 0; 706 707 pages = qpl->num_entries; 708 709 if (pages + priv->num_registered_pages > priv->max_registered_pages) { 710 netif_err(priv, drv, priv->dev, 711 "Reached max number of registered pages %llu > %llu\n", 712 pages + priv->num_registered_pages, 713 priv->max_registered_pages); 714 return -EINVAL; 715 } 716 717 err = gve_adminq_register_page_list(priv, qpl); 718 if (err) { 719 netif_err(priv, drv, priv->dev, 720 "failed to register queue page list %d\n", 721 qpl->id); 722 return err; 723 } 724 725 priv->num_registered_pages += pages; 726 return 0; 727 } 728 729 static struct gve_queue_page_list *gve_tx_get_qpl(struct gve_priv *priv, int idx) 730 { 731 struct gve_tx_ring *tx = &priv->tx[idx]; 732 733 if (gve_is_gqi(priv)) 734 return tx->tx_fifo.qpl; 735 else 736 return tx->dqo.qpl; 737 } 738 739 static struct gve_queue_page_list *gve_rx_get_qpl(struct gve_priv *priv, int idx) 740 { 741 struct gve_rx_ring *rx = &priv->rx[idx]; 742 743 if (gve_is_gqi(priv)) 744 return rx->data.qpl; 745 else 746 return rx->dqo.qpl; 747 } 748 749 static int gve_register_xdp_qpls(struct gve_priv *priv) 750 { 751 int start_id; 752 int err; 753 int i; 754 755 start_id = gve_xdp_tx_start_queue_id(priv); 756 for (i = start_id; i < start_id + gve_num_xdp_qpls(priv); i++) { 757 err = gve_register_qpl(priv, gve_tx_get_qpl(priv, i)); 758 /* This failure will trigger a reset - no need to clean up */ 759 if (err) 760 return err; 761 } 762 return 0; 763 } 764 765 static int gve_register_qpls(struct gve_priv *priv) 766 { 767 int num_tx_qpls, num_rx_qpls; 768 int err; 769 int i; 770 771 num_tx_qpls = gve_num_tx_qpls(&priv->tx_cfg, gve_num_xdp_qpls(priv), 772 gve_is_qpl(priv)); 773 num_rx_qpls = gve_num_rx_qpls(&priv->rx_cfg, gve_is_qpl(priv)); 774 775 for (i = 0; i < num_tx_qpls; i++) { 776 err = gve_register_qpl(priv, gve_tx_get_qpl(priv, i)); 777 if (err) 778 return err; 779 } 780 781 for (i = 0; i < num_rx_qpls; i++) { 782 err = gve_register_qpl(priv, gve_rx_get_qpl(priv, i)); 783 if (err) 784 return err; 785 } 786 787 return 0; 788 } 789 790 static int gve_unregister_xdp_qpls(struct gve_priv *priv) 791 { 792 int start_id; 793 int err; 794 int i; 795 796 start_id = gve_xdp_tx_start_queue_id(priv); 797 for (i = start_id; i < start_id + gve_num_xdp_qpls(priv); i++) { 798 err = gve_unregister_qpl(priv, gve_tx_get_qpl(priv, i)); 799 /* This failure will trigger a reset - no need to clean */ 800 if (err) 801 return err; 802 } 803 return 0; 804 } 805 806 static int gve_unregister_qpls(struct gve_priv *priv) 807 { 808 int num_tx_qpls, num_rx_qpls; 809 int err; 810 int i; 811 812 num_tx_qpls = gve_num_tx_qpls(&priv->tx_cfg, gve_num_xdp_qpls(priv), 813 gve_is_qpl(priv)); 814 num_rx_qpls = gve_num_rx_qpls(&priv->rx_cfg, gve_is_qpl(priv)); 815 816 for (i = 0; i < num_tx_qpls; i++) { 817 err = gve_unregister_qpl(priv, gve_tx_get_qpl(priv, i)); 818 /* This failure will trigger a reset - no need to clean */ 819 if (err) 820 return err; 821 } 822 823 for (i = 0; i < num_rx_qpls; i++) { 824 err = gve_unregister_qpl(priv, gve_rx_get_qpl(priv, i)); 825 /* This failure will trigger a reset - no need to clean */ 826 if (err) 827 return err; 828 } 829 return 0; 830 } 831 832 static int gve_create_xdp_rings(struct gve_priv *priv) 833 { 834 int err; 835 836 err = gve_adminq_create_tx_queues(priv, 837 gve_xdp_tx_start_queue_id(priv), 838 priv->num_xdp_queues); 839 if (err) { 840 netif_err(priv, drv, priv->dev, "failed to create %d XDP tx queues\n", 841 priv->num_xdp_queues); 842 /* This failure will trigger a reset - no need to clean 843 * up 844 */ 845 return err; 846 } 847 netif_dbg(priv, drv, priv->dev, "created %d XDP tx queues\n", 848 priv->num_xdp_queues); 849 850 return 0; 851 } 852 853 static int gve_create_rings(struct gve_priv *priv) 854 { 855 int num_tx_queues = gve_num_tx_queues(priv); 856 int err; 857 int i; 858 859 err = gve_adminq_create_tx_queues(priv, 0, num_tx_queues); 860 if (err) { 861 netif_err(priv, drv, priv->dev, "failed to create %d tx queues\n", 862 num_tx_queues); 863 /* This failure will trigger a reset - no need to clean 864 * up 865 */ 866 return err; 867 } 868 netif_dbg(priv, drv, priv->dev, "created %d tx queues\n", 869 num_tx_queues); 870 871 err = gve_adminq_create_rx_queues(priv, priv->rx_cfg.num_queues); 872 if (err) { 873 netif_err(priv, drv, priv->dev, "failed to create %d rx queues\n", 874 priv->rx_cfg.num_queues); 875 /* This failure will trigger a reset - no need to clean 876 * up 877 */ 878 return err; 879 } 880 netif_dbg(priv, drv, priv->dev, "created %d rx queues\n", 881 priv->rx_cfg.num_queues); 882 883 if (gve_is_gqi(priv)) { 884 /* Rx data ring has been prefilled with packet buffers at queue 885 * allocation time. 886 * 887 * Write the doorbell to provide descriptor slots and packet 888 * buffers to the NIC. 889 */ 890 for (i = 0; i < priv->rx_cfg.num_queues; i++) 891 gve_rx_write_doorbell(priv, &priv->rx[i]); 892 } else { 893 for (i = 0; i < priv->rx_cfg.num_queues; i++) { 894 /* Post buffers and ring doorbell. */ 895 gve_rx_post_buffers_dqo(&priv->rx[i]); 896 } 897 } 898 899 return 0; 900 } 901 902 static void init_xdp_sync_stats(struct gve_priv *priv) 903 { 904 int start_id = gve_xdp_tx_start_queue_id(priv); 905 int i; 906 907 /* Init stats */ 908 for (i = start_id; i < start_id + priv->num_xdp_queues; i++) { 909 int ntfy_idx = gve_tx_idx_to_ntfy(priv, i); 910 911 u64_stats_init(&priv->tx[i].statss); 912 priv->tx[i].ntfy_id = ntfy_idx; 913 } 914 } 915 916 static void gve_init_sync_stats(struct gve_priv *priv) 917 { 918 int i; 919 920 for (i = 0; i < priv->tx_cfg.num_queues; i++) 921 u64_stats_init(&priv->tx[i].statss); 922 923 /* Init stats for XDP TX queues */ 924 init_xdp_sync_stats(priv); 925 926 for (i = 0; i < priv->rx_cfg.num_queues; i++) 927 u64_stats_init(&priv->rx[i].statss); 928 } 929 930 static void gve_tx_get_curr_alloc_cfg(struct gve_priv *priv, 931 struct gve_tx_alloc_rings_cfg *cfg) 932 { 933 int num_xdp_queues = priv->xdp_prog ? priv->rx_cfg.num_queues : 0; 934 935 cfg->qcfg = &priv->tx_cfg; 936 cfg->raw_addressing = !gve_is_qpl(priv); 937 cfg->ring_size = priv->tx_desc_cnt; 938 cfg->start_idx = 0; 939 cfg->num_rings = priv->tx_cfg.num_queues + num_xdp_queues; 940 cfg->tx = priv->tx; 941 } 942 943 static void gve_tx_stop_rings(struct gve_priv *priv, int start_id, int num_rings) 944 { 945 int i; 946 947 if (!priv->tx) 948 return; 949 950 for (i = start_id; i < start_id + num_rings; i++) { 951 if (gve_is_gqi(priv)) 952 gve_tx_stop_ring_gqi(priv, i); 953 else 954 gve_tx_stop_ring_dqo(priv, i); 955 } 956 } 957 958 static void gve_tx_start_rings(struct gve_priv *priv, int start_id, 959 int num_rings) 960 { 961 int i; 962 963 for (i = start_id; i < start_id + num_rings; i++) { 964 if (gve_is_gqi(priv)) 965 gve_tx_start_ring_gqi(priv, i); 966 else 967 gve_tx_start_ring_dqo(priv, i); 968 } 969 } 970 971 static int gve_alloc_xdp_rings(struct gve_priv *priv) 972 { 973 struct gve_tx_alloc_rings_cfg cfg = {0}; 974 int err = 0; 975 976 if (!priv->num_xdp_queues) 977 return 0; 978 979 gve_tx_get_curr_alloc_cfg(priv, &cfg); 980 cfg.start_idx = gve_xdp_tx_start_queue_id(priv); 981 cfg.num_rings = priv->num_xdp_queues; 982 983 err = gve_tx_alloc_rings_gqi(priv, &cfg); 984 if (err) 985 return err; 986 987 gve_tx_start_rings(priv, cfg.start_idx, cfg.num_rings); 988 init_xdp_sync_stats(priv); 989 990 return 0; 991 } 992 993 static int gve_queues_mem_alloc(struct gve_priv *priv, 994 struct gve_tx_alloc_rings_cfg *tx_alloc_cfg, 995 struct gve_rx_alloc_rings_cfg *rx_alloc_cfg) 996 { 997 int err; 998 999 if (gve_is_gqi(priv)) 1000 err = gve_tx_alloc_rings_gqi(priv, tx_alloc_cfg); 1001 else 1002 err = gve_tx_alloc_rings_dqo(priv, tx_alloc_cfg); 1003 if (err) 1004 return err; 1005 1006 if (gve_is_gqi(priv)) 1007 err = gve_rx_alloc_rings_gqi(priv, rx_alloc_cfg); 1008 else 1009 err = gve_rx_alloc_rings_dqo(priv, rx_alloc_cfg); 1010 if (err) 1011 goto free_tx; 1012 1013 return 0; 1014 1015 free_tx: 1016 if (gve_is_gqi(priv)) 1017 gve_tx_free_rings_gqi(priv, tx_alloc_cfg); 1018 else 1019 gve_tx_free_rings_dqo(priv, tx_alloc_cfg); 1020 return err; 1021 } 1022 1023 static int gve_destroy_xdp_rings(struct gve_priv *priv) 1024 { 1025 int start_id; 1026 int err; 1027 1028 start_id = gve_xdp_tx_start_queue_id(priv); 1029 err = gve_adminq_destroy_tx_queues(priv, 1030 start_id, 1031 priv->num_xdp_queues); 1032 if (err) { 1033 netif_err(priv, drv, priv->dev, 1034 "failed to destroy XDP queues\n"); 1035 /* This failure will trigger a reset - no need to clean up */ 1036 return err; 1037 } 1038 netif_dbg(priv, drv, priv->dev, "destroyed XDP queues\n"); 1039 1040 return 0; 1041 } 1042 1043 static int gve_destroy_rings(struct gve_priv *priv) 1044 { 1045 int num_tx_queues = gve_num_tx_queues(priv); 1046 int err; 1047 1048 err = gve_adminq_destroy_tx_queues(priv, 0, num_tx_queues); 1049 if (err) { 1050 netif_err(priv, drv, priv->dev, 1051 "failed to destroy tx queues\n"); 1052 /* This failure will trigger a reset - no need to clean up */ 1053 return err; 1054 } 1055 netif_dbg(priv, drv, priv->dev, "destroyed tx queues\n"); 1056 err = gve_adminq_destroy_rx_queues(priv, priv->rx_cfg.num_queues); 1057 if (err) { 1058 netif_err(priv, drv, priv->dev, 1059 "failed to destroy rx queues\n"); 1060 /* This failure will trigger a reset - no need to clean up */ 1061 return err; 1062 } 1063 netif_dbg(priv, drv, priv->dev, "destroyed rx queues\n"); 1064 return 0; 1065 } 1066 1067 static void gve_free_xdp_rings(struct gve_priv *priv) 1068 { 1069 struct gve_tx_alloc_rings_cfg cfg = {0}; 1070 1071 gve_tx_get_curr_alloc_cfg(priv, &cfg); 1072 cfg.start_idx = gve_xdp_tx_start_queue_id(priv); 1073 cfg.num_rings = priv->num_xdp_queues; 1074 1075 if (priv->tx) { 1076 gve_tx_stop_rings(priv, cfg.start_idx, cfg.num_rings); 1077 gve_tx_free_rings_gqi(priv, &cfg); 1078 } 1079 } 1080 1081 static void gve_queues_mem_free(struct gve_priv *priv, 1082 struct gve_tx_alloc_rings_cfg *tx_cfg, 1083 struct gve_rx_alloc_rings_cfg *rx_cfg) 1084 { 1085 if (gve_is_gqi(priv)) { 1086 gve_tx_free_rings_gqi(priv, tx_cfg); 1087 gve_rx_free_rings_gqi(priv, rx_cfg); 1088 } else { 1089 gve_tx_free_rings_dqo(priv, tx_cfg); 1090 gve_rx_free_rings_dqo(priv, rx_cfg); 1091 } 1092 } 1093 1094 int gve_alloc_page(struct gve_priv *priv, struct device *dev, 1095 struct page **page, dma_addr_t *dma, 1096 enum dma_data_direction dir, gfp_t gfp_flags) 1097 { 1098 *page = alloc_page(gfp_flags); 1099 if (!*page) { 1100 priv->page_alloc_fail++; 1101 return -ENOMEM; 1102 } 1103 *dma = dma_map_page(dev, *page, 0, PAGE_SIZE, dir); 1104 if (dma_mapping_error(dev, *dma)) { 1105 priv->dma_mapping_error++; 1106 put_page(*page); 1107 return -ENOMEM; 1108 } 1109 return 0; 1110 } 1111 1112 struct gve_queue_page_list *gve_alloc_queue_page_list(struct gve_priv *priv, 1113 u32 id, int pages) 1114 { 1115 struct gve_queue_page_list *qpl; 1116 int err; 1117 int i; 1118 1119 qpl = kvzalloc(sizeof(*qpl), GFP_KERNEL); 1120 if (!qpl) 1121 return NULL; 1122 1123 qpl->id = id; 1124 qpl->num_entries = 0; 1125 qpl->pages = kvcalloc(pages, sizeof(*qpl->pages), GFP_KERNEL); 1126 if (!qpl->pages) 1127 goto abort; 1128 1129 qpl->page_buses = kvcalloc(pages, sizeof(*qpl->page_buses), GFP_KERNEL); 1130 if (!qpl->page_buses) 1131 goto abort; 1132 1133 for (i = 0; i < pages; i++) { 1134 err = gve_alloc_page(priv, &priv->pdev->dev, &qpl->pages[i], 1135 &qpl->page_buses[i], 1136 gve_qpl_dma_dir(priv, id), GFP_KERNEL); 1137 if (err) 1138 goto abort; 1139 qpl->num_entries++; 1140 } 1141 1142 return qpl; 1143 1144 abort: 1145 gve_free_queue_page_list(priv, qpl, id); 1146 return NULL; 1147 } 1148 1149 void gve_free_page(struct device *dev, struct page *page, dma_addr_t dma, 1150 enum dma_data_direction dir) 1151 { 1152 if (!dma_mapping_error(dev, dma)) 1153 dma_unmap_page(dev, dma, PAGE_SIZE, dir); 1154 if (page) 1155 put_page(page); 1156 } 1157 1158 void gve_free_queue_page_list(struct gve_priv *priv, 1159 struct gve_queue_page_list *qpl, 1160 u32 id) 1161 { 1162 int i; 1163 1164 if (!qpl) 1165 return; 1166 if (!qpl->pages) 1167 goto free_qpl; 1168 if (!qpl->page_buses) 1169 goto free_pages; 1170 1171 for (i = 0; i < qpl->num_entries; i++) 1172 gve_free_page(&priv->pdev->dev, qpl->pages[i], 1173 qpl->page_buses[i], gve_qpl_dma_dir(priv, id)); 1174 1175 kvfree(qpl->page_buses); 1176 qpl->page_buses = NULL; 1177 free_pages: 1178 kvfree(qpl->pages); 1179 qpl->pages = NULL; 1180 free_qpl: 1181 kvfree(qpl); 1182 } 1183 1184 /* Use this to schedule a reset when the device is capable of continuing 1185 * to handle other requests in its current state. If it is not, do a reset 1186 * in thread instead. 1187 */ 1188 void gve_schedule_reset(struct gve_priv *priv) 1189 { 1190 gve_set_do_reset(priv); 1191 queue_work(priv->gve_wq, &priv->service_task); 1192 } 1193 1194 static void gve_reset_and_teardown(struct gve_priv *priv, bool was_up); 1195 static int gve_reset_recovery(struct gve_priv *priv, bool was_up); 1196 static void gve_turndown(struct gve_priv *priv); 1197 static void gve_turnup(struct gve_priv *priv); 1198 1199 static int gve_reg_xdp_info(struct gve_priv *priv, struct net_device *dev) 1200 { 1201 struct napi_struct *napi; 1202 struct gve_rx_ring *rx; 1203 int err = 0; 1204 int i, j; 1205 u32 tx_qid; 1206 1207 if (!priv->num_xdp_queues) 1208 return 0; 1209 1210 for (i = 0; i < priv->rx_cfg.num_queues; i++) { 1211 rx = &priv->rx[i]; 1212 napi = &priv->ntfy_blocks[rx->ntfy_id].napi; 1213 1214 err = xdp_rxq_info_reg(&rx->xdp_rxq, dev, i, 1215 napi->napi_id); 1216 if (err) 1217 goto err; 1218 err = xdp_rxq_info_reg_mem_model(&rx->xdp_rxq, 1219 MEM_TYPE_PAGE_SHARED, NULL); 1220 if (err) 1221 goto err; 1222 rx->xsk_pool = xsk_get_pool_from_qid(dev, i); 1223 if (rx->xsk_pool) { 1224 err = xdp_rxq_info_reg(&rx->xsk_rxq, dev, i, 1225 napi->napi_id); 1226 if (err) 1227 goto err; 1228 err = xdp_rxq_info_reg_mem_model(&rx->xsk_rxq, 1229 MEM_TYPE_XSK_BUFF_POOL, NULL); 1230 if (err) 1231 goto err; 1232 xsk_pool_set_rxq_info(rx->xsk_pool, 1233 &rx->xsk_rxq); 1234 } 1235 } 1236 1237 for (i = 0; i < priv->num_xdp_queues; i++) { 1238 tx_qid = gve_xdp_tx_queue_id(priv, i); 1239 priv->tx[tx_qid].xsk_pool = xsk_get_pool_from_qid(dev, i); 1240 } 1241 return 0; 1242 1243 err: 1244 for (j = i; j >= 0; j--) { 1245 rx = &priv->rx[j]; 1246 if (xdp_rxq_info_is_reg(&rx->xdp_rxq)) 1247 xdp_rxq_info_unreg(&rx->xdp_rxq); 1248 if (xdp_rxq_info_is_reg(&rx->xsk_rxq)) 1249 xdp_rxq_info_unreg(&rx->xsk_rxq); 1250 } 1251 return err; 1252 } 1253 1254 static void gve_unreg_xdp_info(struct gve_priv *priv) 1255 { 1256 int i, tx_qid; 1257 1258 if (!priv->num_xdp_queues) 1259 return; 1260 1261 for (i = 0; i < priv->rx_cfg.num_queues; i++) { 1262 struct gve_rx_ring *rx = &priv->rx[i]; 1263 1264 xdp_rxq_info_unreg(&rx->xdp_rxq); 1265 if (rx->xsk_pool) { 1266 xdp_rxq_info_unreg(&rx->xsk_rxq); 1267 rx->xsk_pool = NULL; 1268 } 1269 } 1270 1271 for (i = 0; i < priv->num_xdp_queues; i++) { 1272 tx_qid = gve_xdp_tx_queue_id(priv, i); 1273 priv->tx[tx_qid].xsk_pool = NULL; 1274 } 1275 } 1276 1277 static void gve_drain_page_cache(struct gve_priv *priv) 1278 { 1279 int i; 1280 1281 for (i = 0; i < priv->rx_cfg.num_queues; i++) 1282 page_frag_cache_drain(&priv->rx[i].page_cache); 1283 } 1284 1285 static void gve_rx_get_curr_alloc_cfg(struct gve_priv *priv, 1286 struct gve_rx_alloc_rings_cfg *cfg) 1287 { 1288 cfg->qcfg = &priv->rx_cfg; 1289 cfg->qcfg_tx = &priv->tx_cfg; 1290 cfg->raw_addressing = !gve_is_qpl(priv); 1291 cfg->enable_header_split = priv->header_split_enabled; 1292 cfg->ring_size = priv->rx_desc_cnt; 1293 cfg->packet_buffer_size = gve_is_gqi(priv) ? 1294 GVE_DEFAULT_RX_BUFFER_SIZE : 1295 priv->data_buffer_size_dqo; 1296 cfg->rx = priv->rx; 1297 } 1298 1299 void gve_get_curr_alloc_cfgs(struct gve_priv *priv, 1300 struct gve_tx_alloc_rings_cfg *tx_alloc_cfg, 1301 struct gve_rx_alloc_rings_cfg *rx_alloc_cfg) 1302 { 1303 gve_tx_get_curr_alloc_cfg(priv, tx_alloc_cfg); 1304 gve_rx_get_curr_alloc_cfg(priv, rx_alloc_cfg); 1305 } 1306 1307 static void gve_rx_start_ring(struct gve_priv *priv, int i) 1308 { 1309 if (gve_is_gqi(priv)) 1310 gve_rx_start_ring_gqi(priv, i); 1311 else 1312 gve_rx_start_ring_dqo(priv, i); 1313 } 1314 1315 static void gve_rx_start_rings(struct gve_priv *priv, int num_rings) 1316 { 1317 int i; 1318 1319 for (i = 0; i < num_rings; i++) 1320 gve_rx_start_ring(priv, i); 1321 } 1322 1323 static void gve_rx_stop_ring(struct gve_priv *priv, int i) 1324 { 1325 if (gve_is_gqi(priv)) 1326 gve_rx_stop_ring_gqi(priv, i); 1327 else 1328 gve_rx_stop_ring_dqo(priv, i); 1329 } 1330 1331 static void gve_rx_stop_rings(struct gve_priv *priv, int num_rings) 1332 { 1333 int i; 1334 1335 if (!priv->rx) 1336 return; 1337 1338 for (i = 0; i < num_rings; i++) 1339 gve_rx_stop_ring(priv, i); 1340 } 1341 1342 static void gve_queues_mem_remove(struct gve_priv *priv) 1343 { 1344 struct gve_tx_alloc_rings_cfg tx_alloc_cfg = {0}; 1345 struct gve_rx_alloc_rings_cfg rx_alloc_cfg = {0}; 1346 1347 gve_get_curr_alloc_cfgs(priv, &tx_alloc_cfg, &rx_alloc_cfg); 1348 gve_queues_mem_free(priv, &tx_alloc_cfg, &rx_alloc_cfg); 1349 priv->tx = NULL; 1350 priv->rx = NULL; 1351 } 1352 1353 /* The passed-in queue memory is stored into priv and the queues are made live. 1354 * No memory is allocated. Passed-in memory is freed on errors. 1355 */ 1356 static int gve_queues_start(struct gve_priv *priv, 1357 struct gve_tx_alloc_rings_cfg *tx_alloc_cfg, 1358 struct gve_rx_alloc_rings_cfg *rx_alloc_cfg) 1359 { 1360 struct net_device *dev = priv->dev; 1361 int err; 1362 1363 /* Record new resources into priv */ 1364 priv->tx = tx_alloc_cfg->tx; 1365 priv->rx = rx_alloc_cfg->rx; 1366 1367 /* Record new configs into priv */ 1368 priv->tx_cfg = *tx_alloc_cfg->qcfg; 1369 priv->rx_cfg = *rx_alloc_cfg->qcfg; 1370 priv->tx_desc_cnt = tx_alloc_cfg->ring_size; 1371 priv->rx_desc_cnt = rx_alloc_cfg->ring_size; 1372 1373 if (priv->xdp_prog) 1374 priv->num_xdp_queues = priv->rx_cfg.num_queues; 1375 else 1376 priv->num_xdp_queues = 0; 1377 1378 gve_tx_start_rings(priv, 0, tx_alloc_cfg->num_rings); 1379 gve_rx_start_rings(priv, rx_alloc_cfg->qcfg->num_queues); 1380 gve_init_sync_stats(priv); 1381 1382 err = netif_set_real_num_tx_queues(dev, priv->tx_cfg.num_queues); 1383 if (err) 1384 goto stop_and_free_rings; 1385 err = netif_set_real_num_rx_queues(dev, priv->rx_cfg.num_queues); 1386 if (err) 1387 goto stop_and_free_rings; 1388 1389 err = gve_reg_xdp_info(priv, dev); 1390 if (err) 1391 goto stop_and_free_rings; 1392 1393 err = gve_register_qpls(priv); 1394 if (err) 1395 goto reset; 1396 1397 priv->header_split_enabled = rx_alloc_cfg->enable_header_split; 1398 priv->data_buffer_size_dqo = rx_alloc_cfg->packet_buffer_size; 1399 1400 err = gve_create_rings(priv); 1401 if (err) 1402 goto reset; 1403 1404 gve_set_device_rings_ok(priv); 1405 1406 if (gve_get_report_stats(priv)) 1407 mod_timer(&priv->stats_report_timer, 1408 round_jiffies(jiffies + 1409 msecs_to_jiffies(priv->stats_report_timer_period))); 1410 1411 gve_turnup(priv); 1412 queue_work(priv->gve_wq, &priv->service_task); 1413 priv->interface_up_cnt++; 1414 return 0; 1415 1416 reset: 1417 if (gve_get_reset_in_progress(priv)) 1418 goto stop_and_free_rings; 1419 gve_reset_and_teardown(priv, true); 1420 /* if this fails there is nothing we can do so just ignore the return */ 1421 gve_reset_recovery(priv, false); 1422 /* return the original error */ 1423 return err; 1424 stop_and_free_rings: 1425 gve_tx_stop_rings(priv, 0, gve_num_tx_queues(priv)); 1426 gve_rx_stop_rings(priv, priv->rx_cfg.num_queues); 1427 gve_queues_mem_remove(priv); 1428 return err; 1429 } 1430 1431 static int gve_open(struct net_device *dev) 1432 { 1433 struct gve_tx_alloc_rings_cfg tx_alloc_cfg = {0}; 1434 struct gve_rx_alloc_rings_cfg rx_alloc_cfg = {0}; 1435 struct gve_priv *priv = netdev_priv(dev); 1436 int err; 1437 1438 gve_get_curr_alloc_cfgs(priv, &tx_alloc_cfg, &rx_alloc_cfg); 1439 1440 err = gve_queues_mem_alloc(priv, &tx_alloc_cfg, &rx_alloc_cfg); 1441 if (err) 1442 return err; 1443 1444 /* No need to free on error: ownership of resources is lost after 1445 * calling gve_queues_start. 1446 */ 1447 err = gve_queues_start(priv, &tx_alloc_cfg, &rx_alloc_cfg); 1448 if (err) 1449 return err; 1450 1451 return 0; 1452 } 1453 1454 static int gve_queues_stop(struct gve_priv *priv) 1455 { 1456 int err; 1457 1458 netif_carrier_off(priv->dev); 1459 if (gve_get_device_rings_ok(priv)) { 1460 gve_turndown(priv); 1461 gve_drain_page_cache(priv); 1462 err = gve_destroy_rings(priv); 1463 if (err) 1464 goto err; 1465 err = gve_unregister_qpls(priv); 1466 if (err) 1467 goto err; 1468 gve_clear_device_rings_ok(priv); 1469 } 1470 del_timer_sync(&priv->stats_report_timer); 1471 1472 gve_unreg_xdp_info(priv); 1473 1474 gve_tx_stop_rings(priv, 0, gve_num_tx_queues(priv)); 1475 gve_rx_stop_rings(priv, priv->rx_cfg.num_queues); 1476 1477 priv->interface_down_cnt++; 1478 return 0; 1479 1480 err: 1481 /* This must have been called from a reset due to the rtnl lock 1482 * so just return at this point. 1483 */ 1484 if (gve_get_reset_in_progress(priv)) 1485 return err; 1486 /* Otherwise reset before returning */ 1487 gve_reset_and_teardown(priv, true); 1488 return gve_reset_recovery(priv, false); 1489 } 1490 1491 static int gve_close(struct net_device *dev) 1492 { 1493 struct gve_priv *priv = netdev_priv(dev); 1494 int err; 1495 1496 err = gve_queues_stop(priv); 1497 if (err) 1498 return err; 1499 1500 gve_queues_mem_remove(priv); 1501 return 0; 1502 } 1503 1504 static int gve_remove_xdp_queues(struct gve_priv *priv) 1505 { 1506 int err; 1507 1508 err = gve_destroy_xdp_rings(priv); 1509 if (err) 1510 return err; 1511 1512 err = gve_unregister_xdp_qpls(priv); 1513 if (err) 1514 return err; 1515 1516 gve_unreg_xdp_info(priv); 1517 gve_free_xdp_rings(priv); 1518 1519 priv->num_xdp_queues = 0; 1520 return 0; 1521 } 1522 1523 static int gve_add_xdp_queues(struct gve_priv *priv) 1524 { 1525 int err; 1526 1527 priv->num_xdp_queues = priv->rx_cfg.num_queues; 1528 1529 err = gve_alloc_xdp_rings(priv); 1530 if (err) 1531 goto err; 1532 1533 err = gve_reg_xdp_info(priv, priv->dev); 1534 if (err) 1535 goto free_xdp_rings; 1536 1537 err = gve_register_xdp_qpls(priv); 1538 if (err) 1539 goto free_xdp_rings; 1540 1541 err = gve_create_xdp_rings(priv); 1542 if (err) 1543 goto free_xdp_rings; 1544 1545 return 0; 1546 1547 free_xdp_rings: 1548 gve_free_xdp_rings(priv); 1549 err: 1550 priv->num_xdp_queues = 0; 1551 return err; 1552 } 1553 1554 static void gve_handle_link_status(struct gve_priv *priv, bool link_status) 1555 { 1556 if (!gve_get_napi_enabled(priv)) 1557 return; 1558 1559 if (link_status == netif_carrier_ok(priv->dev)) 1560 return; 1561 1562 if (link_status) { 1563 netdev_info(priv->dev, "Device link is up.\n"); 1564 netif_carrier_on(priv->dev); 1565 } else { 1566 netdev_info(priv->dev, "Device link is down.\n"); 1567 netif_carrier_off(priv->dev); 1568 } 1569 } 1570 1571 static int gve_set_xdp(struct gve_priv *priv, struct bpf_prog *prog, 1572 struct netlink_ext_ack *extack) 1573 { 1574 struct bpf_prog *old_prog; 1575 int err = 0; 1576 u32 status; 1577 1578 old_prog = READ_ONCE(priv->xdp_prog); 1579 if (!netif_running(priv->dev)) { 1580 WRITE_ONCE(priv->xdp_prog, prog); 1581 if (old_prog) 1582 bpf_prog_put(old_prog); 1583 return 0; 1584 } 1585 1586 gve_turndown(priv); 1587 if (!old_prog && prog) { 1588 // Allocate XDP TX queues if an XDP program is 1589 // being installed 1590 err = gve_add_xdp_queues(priv); 1591 if (err) 1592 goto out; 1593 } else if (old_prog && !prog) { 1594 // Remove XDP TX queues if an XDP program is 1595 // being uninstalled 1596 err = gve_remove_xdp_queues(priv); 1597 if (err) 1598 goto out; 1599 } 1600 WRITE_ONCE(priv->xdp_prog, prog); 1601 if (old_prog) 1602 bpf_prog_put(old_prog); 1603 1604 out: 1605 gve_turnup(priv); 1606 status = ioread32be(&priv->reg_bar0->device_status); 1607 gve_handle_link_status(priv, GVE_DEVICE_STATUS_LINK_STATUS_MASK & status); 1608 return err; 1609 } 1610 1611 static int gve_xsk_pool_enable(struct net_device *dev, 1612 struct xsk_buff_pool *pool, 1613 u16 qid) 1614 { 1615 struct gve_priv *priv = netdev_priv(dev); 1616 struct napi_struct *napi; 1617 struct gve_rx_ring *rx; 1618 int tx_qid; 1619 int err; 1620 1621 if (qid >= priv->rx_cfg.num_queues) { 1622 dev_err(&priv->pdev->dev, "xsk pool invalid qid %d", qid); 1623 return -EINVAL; 1624 } 1625 if (xsk_pool_get_rx_frame_size(pool) < 1626 priv->dev->max_mtu + sizeof(struct ethhdr)) { 1627 dev_err(&priv->pdev->dev, "xsk pool frame_len too small"); 1628 return -EINVAL; 1629 } 1630 1631 err = xsk_pool_dma_map(pool, &priv->pdev->dev, 1632 DMA_ATTR_SKIP_CPU_SYNC | DMA_ATTR_WEAK_ORDERING); 1633 if (err) 1634 return err; 1635 1636 /* If XDP prog is not installed or interface is down, return. */ 1637 if (!priv->xdp_prog || !netif_running(dev)) 1638 return 0; 1639 1640 rx = &priv->rx[qid]; 1641 napi = &priv->ntfy_blocks[rx->ntfy_id].napi; 1642 err = xdp_rxq_info_reg(&rx->xsk_rxq, dev, qid, napi->napi_id); 1643 if (err) 1644 goto err; 1645 1646 err = xdp_rxq_info_reg_mem_model(&rx->xsk_rxq, 1647 MEM_TYPE_XSK_BUFF_POOL, NULL); 1648 if (err) 1649 goto err; 1650 1651 xsk_pool_set_rxq_info(pool, &rx->xsk_rxq); 1652 rx->xsk_pool = pool; 1653 1654 tx_qid = gve_xdp_tx_queue_id(priv, qid); 1655 priv->tx[tx_qid].xsk_pool = pool; 1656 1657 return 0; 1658 err: 1659 if (xdp_rxq_info_is_reg(&rx->xsk_rxq)) 1660 xdp_rxq_info_unreg(&rx->xsk_rxq); 1661 1662 xsk_pool_dma_unmap(pool, 1663 DMA_ATTR_SKIP_CPU_SYNC | DMA_ATTR_WEAK_ORDERING); 1664 return err; 1665 } 1666 1667 static int gve_xsk_pool_disable(struct net_device *dev, 1668 u16 qid) 1669 { 1670 struct gve_priv *priv = netdev_priv(dev); 1671 struct napi_struct *napi_rx; 1672 struct napi_struct *napi_tx; 1673 struct xsk_buff_pool *pool; 1674 int tx_qid; 1675 1676 pool = xsk_get_pool_from_qid(dev, qid); 1677 if (!pool) 1678 return -EINVAL; 1679 if (qid >= priv->rx_cfg.num_queues) 1680 return -EINVAL; 1681 1682 /* If XDP prog is not installed or interface is down, unmap DMA and 1683 * return. 1684 */ 1685 if (!priv->xdp_prog || !netif_running(dev)) 1686 goto done; 1687 1688 napi_rx = &priv->ntfy_blocks[priv->rx[qid].ntfy_id].napi; 1689 napi_disable(napi_rx); /* make sure current rx poll is done */ 1690 1691 tx_qid = gve_xdp_tx_queue_id(priv, qid); 1692 napi_tx = &priv->ntfy_blocks[priv->tx[tx_qid].ntfy_id].napi; 1693 napi_disable(napi_tx); /* make sure current tx poll is done */ 1694 1695 priv->rx[qid].xsk_pool = NULL; 1696 xdp_rxq_info_unreg(&priv->rx[qid].xsk_rxq); 1697 priv->tx[tx_qid].xsk_pool = NULL; 1698 smp_mb(); /* Make sure it is visible to the workers on datapath */ 1699 1700 napi_enable(napi_rx); 1701 if (gve_rx_work_pending(&priv->rx[qid])) 1702 napi_schedule(napi_rx); 1703 1704 napi_enable(napi_tx); 1705 if (gve_tx_clean_pending(priv, &priv->tx[tx_qid])) 1706 napi_schedule(napi_tx); 1707 1708 done: 1709 xsk_pool_dma_unmap(pool, 1710 DMA_ATTR_SKIP_CPU_SYNC | DMA_ATTR_WEAK_ORDERING); 1711 return 0; 1712 } 1713 1714 static int gve_xsk_wakeup(struct net_device *dev, u32 queue_id, u32 flags) 1715 { 1716 struct gve_priv *priv = netdev_priv(dev); 1717 struct napi_struct *napi; 1718 1719 if (!gve_get_napi_enabled(priv)) 1720 return -ENETDOWN; 1721 1722 if (queue_id >= priv->rx_cfg.num_queues || !priv->xdp_prog) 1723 return -EINVAL; 1724 1725 napi = &priv->ntfy_blocks[gve_rx_idx_to_ntfy(priv, queue_id)].napi; 1726 if (!napi_if_scheduled_mark_missed(napi)) { 1727 /* Call local_bh_enable to trigger SoftIRQ processing */ 1728 local_bh_disable(); 1729 napi_schedule(napi); 1730 local_bh_enable(); 1731 } 1732 1733 return 0; 1734 } 1735 1736 static int verify_xdp_configuration(struct net_device *dev) 1737 { 1738 struct gve_priv *priv = netdev_priv(dev); 1739 1740 if (dev->features & NETIF_F_LRO) { 1741 netdev_warn(dev, "XDP is not supported when LRO is on.\n"); 1742 return -EOPNOTSUPP; 1743 } 1744 1745 if (priv->queue_format != GVE_GQI_QPL_FORMAT) { 1746 netdev_warn(dev, "XDP is not supported in mode %d.\n", 1747 priv->queue_format); 1748 return -EOPNOTSUPP; 1749 } 1750 1751 if (dev->mtu > GVE_DEFAULT_RX_BUFFER_SIZE - sizeof(struct ethhdr) - GVE_RX_PAD) { 1752 netdev_warn(dev, "XDP is not supported for mtu %d.\n", 1753 dev->mtu); 1754 return -EOPNOTSUPP; 1755 } 1756 1757 if (priv->rx_cfg.num_queues != priv->tx_cfg.num_queues || 1758 (2 * priv->tx_cfg.num_queues > priv->tx_cfg.max_queues)) { 1759 netdev_warn(dev, "XDP load failed: The number of configured RX queues %d should be equal to the number of configured TX queues %d and the number of configured RX/TX queues should be less than or equal to half the maximum number of RX/TX queues %d", 1760 priv->rx_cfg.num_queues, 1761 priv->tx_cfg.num_queues, 1762 priv->tx_cfg.max_queues); 1763 return -EINVAL; 1764 } 1765 return 0; 1766 } 1767 1768 static int gve_xdp(struct net_device *dev, struct netdev_bpf *xdp) 1769 { 1770 struct gve_priv *priv = netdev_priv(dev); 1771 int err; 1772 1773 err = verify_xdp_configuration(dev); 1774 if (err) 1775 return err; 1776 switch (xdp->command) { 1777 case XDP_SETUP_PROG: 1778 return gve_set_xdp(priv, xdp->prog, xdp->extack); 1779 case XDP_SETUP_XSK_POOL: 1780 if (xdp->xsk.pool) 1781 return gve_xsk_pool_enable(dev, xdp->xsk.pool, xdp->xsk.queue_id); 1782 else 1783 return gve_xsk_pool_disable(dev, xdp->xsk.queue_id); 1784 default: 1785 return -EINVAL; 1786 } 1787 } 1788 1789 int gve_flow_rules_reset(struct gve_priv *priv) 1790 { 1791 if (!priv->max_flow_rules) 1792 return 0; 1793 1794 return gve_adminq_reset_flow_rules(priv); 1795 } 1796 1797 int gve_adjust_config(struct gve_priv *priv, 1798 struct gve_tx_alloc_rings_cfg *tx_alloc_cfg, 1799 struct gve_rx_alloc_rings_cfg *rx_alloc_cfg) 1800 { 1801 int err; 1802 1803 /* Allocate resources for the new confiugration */ 1804 err = gve_queues_mem_alloc(priv, tx_alloc_cfg, rx_alloc_cfg); 1805 if (err) { 1806 netif_err(priv, drv, priv->dev, 1807 "Adjust config failed to alloc new queues"); 1808 return err; 1809 } 1810 1811 /* Teardown the device and free existing resources */ 1812 err = gve_close(priv->dev); 1813 if (err) { 1814 netif_err(priv, drv, priv->dev, 1815 "Adjust config failed to close old queues"); 1816 gve_queues_mem_free(priv, tx_alloc_cfg, rx_alloc_cfg); 1817 return err; 1818 } 1819 1820 /* Bring the device back up again with the new resources. */ 1821 err = gve_queues_start(priv, tx_alloc_cfg, rx_alloc_cfg); 1822 if (err) { 1823 netif_err(priv, drv, priv->dev, 1824 "Adjust config failed to start new queues, !!! DISABLING ALL QUEUES !!!\n"); 1825 /* No need to free on error: ownership of resources is lost after 1826 * calling gve_queues_start. 1827 */ 1828 gve_turndown(priv); 1829 return err; 1830 } 1831 1832 return 0; 1833 } 1834 1835 int gve_adjust_queues(struct gve_priv *priv, 1836 struct gve_queue_config new_rx_config, 1837 struct gve_queue_config new_tx_config) 1838 { 1839 struct gve_tx_alloc_rings_cfg tx_alloc_cfg = {0}; 1840 struct gve_rx_alloc_rings_cfg rx_alloc_cfg = {0}; 1841 int num_xdp_queues; 1842 int err; 1843 1844 gve_get_curr_alloc_cfgs(priv, &tx_alloc_cfg, &rx_alloc_cfg); 1845 1846 /* Relay the new config from ethtool */ 1847 tx_alloc_cfg.qcfg = &new_tx_config; 1848 rx_alloc_cfg.qcfg_tx = &new_tx_config; 1849 rx_alloc_cfg.qcfg = &new_rx_config; 1850 tx_alloc_cfg.num_rings = new_tx_config.num_queues; 1851 1852 /* Add dedicated XDP TX queues if enabled. */ 1853 num_xdp_queues = priv->xdp_prog ? new_rx_config.num_queues : 0; 1854 tx_alloc_cfg.num_rings += num_xdp_queues; 1855 1856 if (netif_running(priv->dev)) { 1857 err = gve_adjust_config(priv, &tx_alloc_cfg, &rx_alloc_cfg); 1858 return err; 1859 } 1860 /* Set the config for the next up. */ 1861 priv->tx_cfg = new_tx_config; 1862 priv->rx_cfg = new_rx_config; 1863 1864 return 0; 1865 } 1866 1867 static void gve_turndown(struct gve_priv *priv) 1868 { 1869 int idx; 1870 1871 if (netif_carrier_ok(priv->dev)) 1872 netif_carrier_off(priv->dev); 1873 1874 if (!gve_get_napi_enabled(priv)) 1875 return; 1876 1877 /* Disable napi to prevent more work from coming in */ 1878 for (idx = 0; idx < gve_num_tx_queues(priv); idx++) { 1879 int ntfy_idx = gve_tx_idx_to_ntfy(priv, idx); 1880 struct gve_notify_block *block = &priv->ntfy_blocks[ntfy_idx]; 1881 1882 if (!gve_tx_was_added_to_block(priv, idx)) 1883 continue; 1884 1885 if (idx < priv->tx_cfg.num_queues) 1886 netif_queue_set_napi(priv->dev, idx, 1887 NETDEV_QUEUE_TYPE_TX, NULL); 1888 1889 napi_disable(&block->napi); 1890 } 1891 for (idx = 0; idx < priv->rx_cfg.num_queues; idx++) { 1892 int ntfy_idx = gve_rx_idx_to_ntfy(priv, idx); 1893 struct gve_notify_block *block = &priv->ntfy_blocks[ntfy_idx]; 1894 1895 if (!gve_rx_was_added_to_block(priv, idx)) 1896 continue; 1897 1898 netif_queue_set_napi(priv->dev, idx, NETDEV_QUEUE_TYPE_RX, 1899 NULL); 1900 napi_disable(&block->napi); 1901 } 1902 1903 /* Stop tx queues */ 1904 netif_tx_disable(priv->dev); 1905 1906 gve_clear_napi_enabled(priv); 1907 gve_clear_report_stats(priv); 1908 1909 /* Make sure that all traffic is finished processing. */ 1910 synchronize_net(); 1911 } 1912 1913 static void gve_turnup(struct gve_priv *priv) 1914 { 1915 int idx; 1916 1917 /* Start the tx queues */ 1918 netif_tx_start_all_queues(priv->dev); 1919 1920 /* Enable napi and unmask interrupts for all queues */ 1921 for (idx = 0; idx < gve_num_tx_queues(priv); idx++) { 1922 int ntfy_idx = gve_tx_idx_to_ntfy(priv, idx); 1923 struct gve_notify_block *block = &priv->ntfy_blocks[ntfy_idx]; 1924 1925 if (!gve_tx_was_added_to_block(priv, idx)) 1926 continue; 1927 1928 napi_enable(&block->napi); 1929 1930 if (idx < priv->tx_cfg.num_queues) 1931 netif_queue_set_napi(priv->dev, idx, 1932 NETDEV_QUEUE_TYPE_TX, 1933 &block->napi); 1934 1935 if (gve_is_gqi(priv)) { 1936 iowrite32be(0, gve_irq_doorbell(priv, block)); 1937 } else { 1938 gve_set_itr_coalesce_usecs_dqo(priv, block, 1939 priv->tx_coalesce_usecs); 1940 } 1941 1942 /* Any descs written by the NIC before this barrier will be 1943 * handled by the one-off napi schedule below. Whereas any 1944 * descs after the barrier will generate interrupts. 1945 */ 1946 mb(); 1947 napi_schedule(&block->napi); 1948 } 1949 for (idx = 0; idx < priv->rx_cfg.num_queues; idx++) { 1950 int ntfy_idx = gve_rx_idx_to_ntfy(priv, idx); 1951 struct gve_notify_block *block = &priv->ntfy_blocks[ntfy_idx]; 1952 1953 if (!gve_rx_was_added_to_block(priv, idx)) 1954 continue; 1955 1956 napi_enable(&block->napi); 1957 netif_queue_set_napi(priv->dev, idx, NETDEV_QUEUE_TYPE_RX, 1958 &block->napi); 1959 1960 if (gve_is_gqi(priv)) { 1961 iowrite32be(0, gve_irq_doorbell(priv, block)); 1962 } else { 1963 gve_set_itr_coalesce_usecs_dqo(priv, block, 1964 priv->rx_coalesce_usecs); 1965 } 1966 1967 /* Any descs written by the NIC before this barrier will be 1968 * handled by the one-off napi schedule below. Whereas any 1969 * descs after the barrier will generate interrupts. 1970 */ 1971 mb(); 1972 napi_schedule(&block->napi); 1973 } 1974 1975 gve_set_napi_enabled(priv); 1976 } 1977 1978 static void gve_turnup_and_check_status(struct gve_priv *priv) 1979 { 1980 u32 status; 1981 1982 gve_turnup(priv); 1983 status = ioread32be(&priv->reg_bar0->device_status); 1984 gve_handle_link_status(priv, GVE_DEVICE_STATUS_LINK_STATUS_MASK & status); 1985 } 1986 1987 static void gve_tx_timeout(struct net_device *dev, unsigned int txqueue) 1988 { 1989 struct gve_notify_block *block; 1990 struct gve_tx_ring *tx = NULL; 1991 struct gve_priv *priv; 1992 u32 last_nic_done; 1993 u32 current_time; 1994 u32 ntfy_idx; 1995 1996 netdev_info(dev, "Timeout on tx queue, %d", txqueue); 1997 priv = netdev_priv(dev); 1998 if (txqueue > priv->tx_cfg.num_queues) 1999 goto reset; 2000 2001 ntfy_idx = gve_tx_idx_to_ntfy(priv, txqueue); 2002 if (ntfy_idx >= priv->num_ntfy_blks) 2003 goto reset; 2004 2005 block = &priv->ntfy_blocks[ntfy_idx]; 2006 tx = block->tx; 2007 2008 current_time = jiffies_to_msecs(jiffies); 2009 if (tx->last_kick_msec + MIN_TX_TIMEOUT_GAP > current_time) 2010 goto reset; 2011 2012 /* Check to see if there are missed completions, which will allow us to 2013 * kick the queue. 2014 */ 2015 last_nic_done = gve_tx_load_event_counter(priv, tx); 2016 if (last_nic_done - tx->done) { 2017 netdev_info(dev, "Kicking queue %d", txqueue); 2018 iowrite32be(GVE_IRQ_MASK, gve_irq_doorbell(priv, block)); 2019 napi_schedule(&block->napi); 2020 tx->last_kick_msec = current_time; 2021 goto out; 2022 } // Else reset. 2023 2024 reset: 2025 gve_schedule_reset(priv); 2026 2027 out: 2028 if (tx) 2029 tx->queue_timeout++; 2030 priv->tx_timeo_cnt++; 2031 } 2032 2033 u16 gve_get_pkt_buf_size(const struct gve_priv *priv, bool enable_hsplit) 2034 { 2035 if (enable_hsplit && priv->max_rx_buffer_size >= GVE_MAX_RX_BUFFER_SIZE) 2036 return GVE_MAX_RX_BUFFER_SIZE; 2037 else 2038 return GVE_DEFAULT_RX_BUFFER_SIZE; 2039 } 2040 2041 /* header-split is not supported on non-DQO_RDA yet even if device advertises it */ 2042 bool gve_header_split_supported(const struct gve_priv *priv) 2043 { 2044 return priv->header_buf_size && priv->queue_format == GVE_DQO_RDA_FORMAT; 2045 } 2046 2047 int gve_set_hsplit_config(struct gve_priv *priv, u8 tcp_data_split) 2048 { 2049 struct gve_tx_alloc_rings_cfg tx_alloc_cfg = {0}; 2050 struct gve_rx_alloc_rings_cfg rx_alloc_cfg = {0}; 2051 bool enable_hdr_split; 2052 int err = 0; 2053 2054 if (tcp_data_split == ETHTOOL_TCP_DATA_SPLIT_UNKNOWN) 2055 return 0; 2056 2057 if (!gve_header_split_supported(priv)) { 2058 dev_err(&priv->pdev->dev, "Header-split not supported\n"); 2059 return -EOPNOTSUPP; 2060 } 2061 2062 if (tcp_data_split == ETHTOOL_TCP_DATA_SPLIT_ENABLED) 2063 enable_hdr_split = true; 2064 else 2065 enable_hdr_split = false; 2066 2067 if (enable_hdr_split == priv->header_split_enabled) 2068 return 0; 2069 2070 gve_get_curr_alloc_cfgs(priv, &tx_alloc_cfg, &rx_alloc_cfg); 2071 2072 rx_alloc_cfg.enable_header_split = enable_hdr_split; 2073 rx_alloc_cfg.packet_buffer_size = gve_get_pkt_buf_size(priv, enable_hdr_split); 2074 2075 if (netif_running(priv->dev)) 2076 err = gve_adjust_config(priv, &tx_alloc_cfg, &rx_alloc_cfg); 2077 return err; 2078 } 2079 2080 static int gve_set_features(struct net_device *netdev, 2081 netdev_features_t features) 2082 { 2083 const netdev_features_t orig_features = netdev->features; 2084 struct gve_tx_alloc_rings_cfg tx_alloc_cfg = {0}; 2085 struct gve_rx_alloc_rings_cfg rx_alloc_cfg = {0}; 2086 struct gve_priv *priv = netdev_priv(netdev); 2087 int err; 2088 2089 gve_get_curr_alloc_cfgs(priv, &tx_alloc_cfg, &rx_alloc_cfg); 2090 2091 if ((netdev->features & NETIF_F_LRO) != (features & NETIF_F_LRO)) { 2092 netdev->features ^= NETIF_F_LRO; 2093 if (netif_running(netdev)) { 2094 err = gve_adjust_config(priv, &tx_alloc_cfg, &rx_alloc_cfg); 2095 if (err) 2096 goto revert_features; 2097 } 2098 } 2099 if ((netdev->features & NETIF_F_NTUPLE) && !(features & NETIF_F_NTUPLE)) { 2100 err = gve_flow_rules_reset(priv); 2101 if (err) 2102 goto revert_features; 2103 } 2104 2105 return 0; 2106 2107 revert_features: 2108 netdev->features = orig_features; 2109 return err; 2110 } 2111 2112 static const struct net_device_ops gve_netdev_ops = { 2113 .ndo_start_xmit = gve_start_xmit, 2114 .ndo_features_check = gve_features_check, 2115 .ndo_open = gve_open, 2116 .ndo_stop = gve_close, 2117 .ndo_get_stats64 = gve_get_stats, 2118 .ndo_tx_timeout = gve_tx_timeout, 2119 .ndo_set_features = gve_set_features, 2120 .ndo_bpf = gve_xdp, 2121 .ndo_xdp_xmit = gve_xdp_xmit, 2122 .ndo_xsk_wakeup = gve_xsk_wakeup, 2123 }; 2124 2125 static void gve_handle_status(struct gve_priv *priv, u32 status) 2126 { 2127 if (GVE_DEVICE_STATUS_RESET_MASK & status) { 2128 dev_info(&priv->pdev->dev, "Device requested reset.\n"); 2129 gve_set_do_reset(priv); 2130 } 2131 if (GVE_DEVICE_STATUS_REPORT_STATS_MASK & status) { 2132 priv->stats_report_trigger_cnt++; 2133 gve_set_do_report_stats(priv); 2134 } 2135 } 2136 2137 static void gve_handle_reset(struct gve_priv *priv) 2138 { 2139 /* A service task will be scheduled at the end of probe to catch any 2140 * resets that need to happen, and we don't want to reset until 2141 * probe is done. 2142 */ 2143 if (gve_get_probe_in_progress(priv)) 2144 return; 2145 2146 if (gve_get_do_reset(priv)) { 2147 rtnl_lock(); 2148 gve_reset(priv, false); 2149 rtnl_unlock(); 2150 } 2151 } 2152 2153 void gve_handle_report_stats(struct gve_priv *priv) 2154 { 2155 struct stats *stats = priv->stats_report->stats; 2156 int idx, stats_idx = 0; 2157 unsigned int start = 0; 2158 u64 tx_bytes; 2159 2160 if (!gve_get_report_stats(priv)) 2161 return; 2162 2163 be64_add_cpu(&priv->stats_report->written_count, 1); 2164 /* tx stats */ 2165 if (priv->tx) { 2166 for (idx = 0; idx < gve_num_tx_queues(priv); idx++) { 2167 u32 last_completion = 0; 2168 u32 tx_frames = 0; 2169 2170 /* DQO doesn't currently support these metrics. */ 2171 if (gve_is_gqi(priv)) { 2172 last_completion = priv->tx[idx].done; 2173 tx_frames = priv->tx[idx].req; 2174 } 2175 2176 do { 2177 start = u64_stats_fetch_begin(&priv->tx[idx].statss); 2178 tx_bytes = priv->tx[idx].bytes_done; 2179 } while (u64_stats_fetch_retry(&priv->tx[idx].statss, start)); 2180 stats[stats_idx++] = (struct stats) { 2181 .stat_name = cpu_to_be32(TX_WAKE_CNT), 2182 .value = cpu_to_be64(priv->tx[idx].wake_queue), 2183 .queue_id = cpu_to_be32(idx), 2184 }; 2185 stats[stats_idx++] = (struct stats) { 2186 .stat_name = cpu_to_be32(TX_STOP_CNT), 2187 .value = cpu_to_be64(priv->tx[idx].stop_queue), 2188 .queue_id = cpu_to_be32(idx), 2189 }; 2190 stats[stats_idx++] = (struct stats) { 2191 .stat_name = cpu_to_be32(TX_FRAMES_SENT), 2192 .value = cpu_to_be64(tx_frames), 2193 .queue_id = cpu_to_be32(idx), 2194 }; 2195 stats[stats_idx++] = (struct stats) { 2196 .stat_name = cpu_to_be32(TX_BYTES_SENT), 2197 .value = cpu_to_be64(tx_bytes), 2198 .queue_id = cpu_to_be32(idx), 2199 }; 2200 stats[stats_idx++] = (struct stats) { 2201 .stat_name = cpu_to_be32(TX_LAST_COMPLETION_PROCESSED), 2202 .value = cpu_to_be64(last_completion), 2203 .queue_id = cpu_to_be32(idx), 2204 }; 2205 stats[stats_idx++] = (struct stats) { 2206 .stat_name = cpu_to_be32(TX_TIMEOUT_CNT), 2207 .value = cpu_to_be64(priv->tx[idx].queue_timeout), 2208 .queue_id = cpu_to_be32(idx), 2209 }; 2210 } 2211 } 2212 /* rx stats */ 2213 if (priv->rx) { 2214 for (idx = 0; idx < priv->rx_cfg.num_queues; idx++) { 2215 stats[stats_idx++] = (struct stats) { 2216 .stat_name = cpu_to_be32(RX_NEXT_EXPECTED_SEQUENCE), 2217 .value = cpu_to_be64(priv->rx[idx].desc.seqno), 2218 .queue_id = cpu_to_be32(idx), 2219 }; 2220 stats[stats_idx++] = (struct stats) { 2221 .stat_name = cpu_to_be32(RX_BUFFERS_POSTED), 2222 .value = cpu_to_be64(priv->rx[0].fill_cnt), 2223 .queue_id = cpu_to_be32(idx), 2224 }; 2225 } 2226 } 2227 } 2228 2229 /* Handle NIC status register changes, reset requests and report stats */ 2230 static void gve_service_task(struct work_struct *work) 2231 { 2232 struct gve_priv *priv = container_of(work, struct gve_priv, 2233 service_task); 2234 u32 status = ioread32be(&priv->reg_bar0->device_status); 2235 2236 gve_handle_status(priv, status); 2237 2238 gve_handle_reset(priv); 2239 gve_handle_link_status(priv, GVE_DEVICE_STATUS_LINK_STATUS_MASK & status); 2240 } 2241 2242 static void gve_set_netdev_xdp_features(struct gve_priv *priv) 2243 { 2244 xdp_features_t xdp_features; 2245 2246 if (priv->queue_format == GVE_GQI_QPL_FORMAT) { 2247 xdp_features = NETDEV_XDP_ACT_BASIC; 2248 xdp_features |= NETDEV_XDP_ACT_REDIRECT; 2249 xdp_features |= NETDEV_XDP_ACT_NDO_XMIT; 2250 xdp_features |= NETDEV_XDP_ACT_XSK_ZEROCOPY; 2251 } else { 2252 xdp_features = 0; 2253 } 2254 2255 xdp_set_features_flag(priv->dev, xdp_features); 2256 } 2257 2258 static int gve_init_priv(struct gve_priv *priv, bool skip_describe_device) 2259 { 2260 int num_ntfy; 2261 int err; 2262 2263 /* Set up the adminq */ 2264 err = gve_adminq_alloc(&priv->pdev->dev, priv); 2265 if (err) { 2266 dev_err(&priv->pdev->dev, 2267 "Failed to alloc admin queue: err=%d\n", err); 2268 return err; 2269 } 2270 2271 err = gve_verify_driver_compatibility(priv); 2272 if (err) { 2273 dev_err(&priv->pdev->dev, 2274 "Could not verify driver compatibility: err=%d\n", err); 2275 goto err; 2276 } 2277 2278 priv->num_registered_pages = 0; 2279 2280 if (skip_describe_device) 2281 goto setup_device; 2282 2283 priv->queue_format = GVE_QUEUE_FORMAT_UNSPECIFIED; 2284 /* Get the initial information we need from the device */ 2285 err = gve_adminq_describe_device(priv); 2286 if (err) { 2287 dev_err(&priv->pdev->dev, 2288 "Could not get device information: err=%d\n", err); 2289 goto err; 2290 } 2291 priv->dev->mtu = priv->dev->max_mtu; 2292 num_ntfy = pci_msix_vec_count(priv->pdev); 2293 if (num_ntfy <= 0) { 2294 dev_err(&priv->pdev->dev, 2295 "could not count MSI-x vectors: err=%d\n", num_ntfy); 2296 err = num_ntfy; 2297 goto err; 2298 } else if (num_ntfy < GVE_MIN_MSIX) { 2299 dev_err(&priv->pdev->dev, "gve needs at least %d MSI-x vectors, but only has %d\n", 2300 GVE_MIN_MSIX, num_ntfy); 2301 err = -EINVAL; 2302 goto err; 2303 } 2304 2305 /* Big TCP is only supported on DQ*/ 2306 if (!gve_is_gqi(priv)) 2307 netif_set_tso_max_size(priv->dev, GVE_DQO_TX_MAX); 2308 2309 priv->rx_copybreak = GVE_DEFAULT_RX_COPYBREAK; 2310 /* gvnic has one Notification Block per MSI-x vector, except for the 2311 * management vector 2312 */ 2313 priv->num_ntfy_blks = (num_ntfy - 1) & ~0x1; 2314 priv->mgmt_msix_idx = priv->num_ntfy_blks; 2315 2316 priv->tx_cfg.max_queues = 2317 min_t(int, priv->tx_cfg.max_queues, priv->num_ntfy_blks / 2); 2318 priv->rx_cfg.max_queues = 2319 min_t(int, priv->rx_cfg.max_queues, priv->num_ntfy_blks / 2); 2320 2321 priv->tx_cfg.num_queues = priv->tx_cfg.max_queues; 2322 priv->rx_cfg.num_queues = priv->rx_cfg.max_queues; 2323 if (priv->default_num_queues > 0) { 2324 priv->tx_cfg.num_queues = min_t(int, priv->default_num_queues, 2325 priv->tx_cfg.num_queues); 2326 priv->rx_cfg.num_queues = min_t(int, priv->default_num_queues, 2327 priv->rx_cfg.num_queues); 2328 } 2329 2330 dev_info(&priv->pdev->dev, "TX queues %d, RX queues %d\n", 2331 priv->tx_cfg.num_queues, priv->rx_cfg.num_queues); 2332 dev_info(&priv->pdev->dev, "Max TX queues %d, Max RX queues %d\n", 2333 priv->tx_cfg.max_queues, priv->rx_cfg.max_queues); 2334 2335 if (!gve_is_gqi(priv)) { 2336 priv->tx_coalesce_usecs = GVE_TX_IRQ_RATELIMIT_US_DQO; 2337 priv->rx_coalesce_usecs = GVE_RX_IRQ_RATELIMIT_US_DQO; 2338 } 2339 2340 setup_device: 2341 gve_set_netdev_xdp_features(priv); 2342 err = gve_setup_device_resources(priv); 2343 if (!err) 2344 return 0; 2345 err: 2346 gve_adminq_free(&priv->pdev->dev, priv); 2347 return err; 2348 } 2349 2350 static void gve_teardown_priv_resources(struct gve_priv *priv) 2351 { 2352 gve_teardown_device_resources(priv); 2353 gve_adminq_free(&priv->pdev->dev, priv); 2354 } 2355 2356 static void gve_trigger_reset(struct gve_priv *priv) 2357 { 2358 /* Reset the device by releasing the AQ */ 2359 gve_adminq_release(priv); 2360 } 2361 2362 static void gve_reset_and_teardown(struct gve_priv *priv, bool was_up) 2363 { 2364 gve_trigger_reset(priv); 2365 /* With the reset having already happened, close cannot fail */ 2366 if (was_up) 2367 gve_close(priv->dev); 2368 gve_teardown_priv_resources(priv); 2369 } 2370 2371 static int gve_reset_recovery(struct gve_priv *priv, bool was_up) 2372 { 2373 int err; 2374 2375 err = gve_init_priv(priv, true); 2376 if (err) 2377 goto err; 2378 if (was_up) { 2379 err = gve_open(priv->dev); 2380 if (err) 2381 goto err; 2382 } 2383 return 0; 2384 err: 2385 dev_err(&priv->pdev->dev, "Reset failed! !!! DISABLING ALL QUEUES !!!\n"); 2386 gve_turndown(priv); 2387 return err; 2388 } 2389 2390 int gve_reset(struct gve_priv *priv, bool attempt_teardown) 2391 { 2392 bool was_up = netif_running(priv->dev); 2393 int err; 2394 2395 dev_info(&priv->pdev->dev, "Performing reset\n"); 2396 gve_clear_do_reset(priv); 2397 gve_set_reset_in_progress(priv); 2398 /* If we aren't attempting to teardown normally, just go turndown and 2399 * reset right away. 2400 */ 2401 if (!attempt_teardown) { 2402 gve_turndown(priv); 2403 gve_reset_and_teardown(priv, was_up); 2404 } else { 2405 /* Otherwise attempt to close normally */ 2406 if (was_up) { 2407 err = gve_close(priv->dev); 2408 /* If that fails reset as we did above */ 2409 if (err) 2410 gve_reset_and_teardown(priv, was_up); 2411 } 2412 /* Clean up any remaining resources */ 2413 gve_teardown_priv_resources(priv); 2414 } 2415 2416 /* Set it all back up */ 2417 err = gve_reset_recovery(priv, was_up); 2418 gve_clear_reset_in_progress(priv); 2419 priv->reset_cnt++; 2420 priv->interface_up_cnt = 0; 2421 priv->interface_down_cnt = 0; 2422 priv->stats_report_trigger_cnt = 0; 2423 return err; 2424 } 2425 2426 static void gve_write_version(u8 __iomem *driver_version_register) 2427 { 2428 const char *c = gve_version_prefix; 2429 2430 while (*c) { 2431 writeb(*c, driver_version_register); 2432 c++; 2433 } 2434 2435 c = gve_version_str; 2436 while (*c) { 2437 writeb(*c, driver_version_register); 2438 c++; 2439 } 2440 writeb('\n', driver_version_register); 2441 } 2442 2443 static int gve_rx_queue_stop(struct net_device *dev, void *per_q_mem, int idx) 2444 { 2445 struct gve_priv *priv = netdev_priv(dev); 2446 struct gve_rx_ring *gve_per_q_mem; 2447 int err; 2448 2449 if (!priv->rx) 2450 return -EAGAIN; 2451 2452 /* Destroying queue 0 while other queues exist is not supported in DQO */ 2453 if (!gve_is_gqi(priv) && idx == 0) 2454 return -ERANGE; 2455 2456 /* Single-queue destruction requires quiescence on all queues */ 2457 gve_turndown(priv); 2458 2459 /* This failure will trigger a reset - no need to clean up */ 2460 err = gve_adminq_destroy_single_rx_queue(priv, idx); 2461 if (err) 2462 return err; 2463 2464 if (gve_is_qpl(priv)) { 2465 /* This failure will trigger a reset - no need to clean up */ 2466 err = gve_unregister_qpl(priv, gve_rx_get_qpl(priv, idx)); 2467 if (err) 2468 return err; 2469 } 2470 2471 gve_rx_stop_ring(priv, idx); 2472 2473 /* Turn the unstopped queues back up */ 2474 gve_turnup_and_check_status(priv); 2475 2476 gve_per_q_mem = (struct gve_rx_ring *)per_q_mem; 2477 *gve_per_q_mem = priv->rx[idx]; 2478 memset(&priv->rx[idx], 0, sizeof(priv->rx[idx])); 2479 return 0; 2480 } 2481 2482 static void gve_rx_queue_mem_free(struct net_device *dev, void *per_q_mem) 2483 { 2484 struct gve_priv *priv = netdev_priv(dev); 2485 struct gve_rx_alloc_rings_cfg cfg = {0}; 2486 struct gve_rx_ring *gve_per_q_mem; 2487 2488 gve_per_q_mem = (struct gve_rx_ring *)per_q_mem; 2489 gve_rx_get_curr_alloc_cfg(priv, &cfg); 2490 2491 if (gve_is_gqi(priv)) 2492 gve_rx_free_ring_gqi(priv, gve_per_q_mem, &cfg); 2493 else 2494 gve_rx_free_ring_dqo(priv, gve_per_q_mem, &cfg); 2495 } 2496 2497 static int gve_rx_queue_mem_alloc(struct net_device *dev, void *per_q_mem, 2498 int idx) 2499 { 2500 struct gve_priv *priv = netdev_priv(dev); 2501 struct gve_rx_alloc_rings_cfg cfg = {0}; 2502 struct gve_rx_ring *gve_per_q_mem; 2503 int err; 2504 2505 if (!priv->rx) 2506 return -EAGAIN; 2507 2508 gve_per_q_mem = (struct gve_rx_ring *)per_q_mem; 2509 gve_rx_get_curr_alloc_cfg(priv, &cfg); 2510 2511 if (gve_is_gqi(priv)) 2512 err = gve_rx_alloc_ring_gqi(priv, &cfg, gve_per_q_mem, idx); 2513 else 2514 err = gve_rx_alloc_ring_dqo(priv, &cfg, gve_per_q_mem, idx); 2515 2516 return err; 2517 } 2518 2519 static int gve_rx_queue_start(struct net_device *dev, void *per_q_mem, int idx) 2520 { 2521 struct gve_priv *priv = netdev_priv(dev); 2522 struct gve_rx_ring *gve_per_q_mem; 2523 int err; 2524 2525 if (!priv->rx) 2526 return -EAGAIN; 2527 2528 gve_per_q_mem = (struct gve_rx_ring *)per_q_mem; 2529 priv->rx[idx] = *gve_per_q_mem; 2530 2531 /* Single-queue creation requires quiescence on all queues */ 2532 gve_turndown(priv); 2533 2534 gve_rx_start_ring(priv, idx); 2535 2536 if (gve_is_qpl(priv)) { 2537 /* This failure will trigger a reset - no need to clean up */ 2538 err = gve_register_qpl(priv, gve_rx_get_qpl(priv, idx)); 2539 if (err) 2540 goto abort; 2541 } 2542 2543 /* This failure will trigger a reset - no need to clean up */ 2544 err = gve_adminq_create_single_rx_queue(priv, idx); 2545 if (err) 2546 goto abort; 2547 2548 if (gve_is_gqi(priv)) 2549 gve_rx_write_doorbell(priv, &priv->rx[idx]); 2550 else 2551 gve_rx_post_buffers_dqo(&priv->rx[idx]); 2552 2553 /* Turn the unstopped queues back up */ 2554 gve_turnup_and_check_status(priv); 2555 return 0; 2556 2557 abort: 2558 gve_rx_stop_ring(priv, idx); 2559 2560 /* All failures in this func result in a reset, by clearing the struct 2561 * at idx, we prevent a double free when that reset runs. The reset, 2562 * which needs the rtnl lock, will not run till this func returns and 2563 * its caller gives up the lock. 2564 */ 2565 memset(&priv->rx[idx], 0, sizeof(priv->rx[idx])); 2566 return err; 2567 } 2568 2569 static const struct netdev_queue_mgmt_ops gve_queue_mgmt_ops = { 2570 .ndo_queue_mem_size = sizeof(struct gve_rx_ring), 2571 .ndo_queue_mem_alloc = gve_rx_queue_mem_alloc, 2572 .ndo_queue_mem_free = gve_rx_queue_mem_free, 2573 .ndo_queue_start = gve_rx_queue_start, 2574 .ndo_queue_stop = gve_rx_queue_stop, 2575 }; 2576 2577 static void gve_get_rx_queue_stats(struct net_device *dev, int idx, 2578 struct netdev_queue_stats_rx *rx_stats) 2579 { 2580 struct gve_priv *priv = netdev_priv(dev); 2581 struct gve_rx_ring *rx = &priv->rx[idx]; 2582 unsigned int start; 2583 2584 do { 2585 start = u64_stats_fetch_begin(&rx->statss); 2586 rx_stats->packets = rx->rpackets; 2587 rx_stats->bytes = rx->rbytes; 2588 rx_stats->alloc_fail = rx->rx_skb_alloc_fail + 2589 rx->rx_buf_alloc_fail; 2590 } while (u64_stats_fetch_retry(&rx->statss, start)); 2591 } 2592 2593 static void gve_get_tx_queue_stats(struct net_device *dev, int idx, 2594 struct netdev_queue_stats_tx *tx_stats) 2595 { 2596 struct gve_priv *priv = netdev_priv(dev); 2597 struct gve_tx_ring *tx = &priv->tx[idx]; 2598 unsigned int start; 2599 2600 do { 2601 start = u64_stats_fetch_begin(&tx->statss); 2602 tx_stats->packets = tx->pkt_done; 2603 tx_stats->bytes = tx->bytes_done; 2604 } while (u64_stats_fetch_retry(&tx->statss, start)); 2605 } 2606 2607 static void gve_get_base_stats(struct net_device *dev, 2608 struct netdev_queue_stats_rx *rx, 2609 struct netdev_queue_stats_tx *tx) 2610 { 2611 rx->packets = 0; 2612 rx->bytes = 0; 2613 rx->alloc_fail = 0; 2614 2615 tx->packets = 0; 2616 tx->bytes = 0; 2617 } 2618 2619 static const struct netdev_stat_ops gve_stat_ops = { 2620 .get_queue_stats_rx = gve_get_rx_queue_stats, 2621 .get_queue_stats_tx = gve_get_tx_queue_stats, 2622 .get_base_stats = gve_get_base_stats, 2623 }; 2624 2625 static int gve_probe(struct pci_dev *pdev, const struct pci_device_id *ent) 2626 { 2627 int max_tx_queues, max_rx_queues; 2628 struct net_device *dev; 2629 __be32 __iomem *db_bar; 2630 struct gve_registers __iomem *reg_bar; 2631 struct gve_priv *priv; 2632 int err; 2633 2634 err = pci_enable_device(pdev); 2635 if (err) 2636 return err; 2637 2638 err = pci_request_regions(pdev, gve_driver_name); 2639 if (err) 2640 goto abort_with_enabled; 2641 2642 pci_set_master(pdev); 2643 2644 err = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(64)); 2645 if (err) { 2646 dev_err(&pdev->dev, "Failed to set dma mask: err=%d\n", err); 2647 goto abort_with_pci_region; 2648 } 2649 2650 reg_bar = pci_iomap(pdev, GVE_REGISTER_BAR, 0); 2651 if (!reg_bar) { 2652 dev_err(&pdev->dev, "Failed to map pci bar!\n"); 2653 err = -ENOMEM; 2654 goto abort_with_pci_region; 2655 } 2656 2657 db_bar = pci_iomap(pdev, GVE_DOORBELL_BAR, 0); 2658 if (!db_bar) { 2659 dev_err(&pdev->dev, "Failed to map doorbell bar!\n"); 2660 err = -ENOMEM; 2661 goto abort_with_reg_bar; 2662 } 2663 2664 gve_write_version(®_bar->driver_version); 2665 /* Get max queues to alloc etherdev */ 2666 max_tx_queues = ioread32be(®_bar->max_tx_queues); 2667 max_rx_queues = ioread32be(®_bar->max_rx_queues); 2668 /* Alloc and setup the netdev and priv */ 2669 dev = alloc_etherdev_mqs(sizeof(*priv), max_tx_queues, max_rx_queues); 2670 if (!dev) { 2671 dev_err(&pdev->dev, "could not allocate netdev\n"); 2672 err = -ENOMEM; 2673 goto abort_with_db_bar; 2674 } 2675 SET_NETDEV_DEV(dev, &pdev->dev); 2676 pci_set_drvdata(pdev, dev); 2677 dev->ethtool_ops = &gve_ethtool_ops; 2678 dev->netdev_ops = &gve_netdev_ops; 2679 dev->queue_mgmt_ops = &gve_queue_mgmt_ops; 2680 dev->stat_ops = &gve_stat_ops; 2681 2682 /* Set default and supported features. 2683 * 2684 * Features might be set in other locations as well (such as 2685 * `gve_adminq_describe_device`). 2686 */ 2687 dev->hw_features = NETIF_F_HIGHDMA; 2688 dev->hw_features |= NETIF_F_SG; 2689 dev->hw_features |= NETIF_F_HW_CSUM; 2690 dev->hw_features |= NETIF_F_TSO; 2691 dev->hw_features |= NETIF_F_TSO6; 2692 dev->hw_features |= NETIF_F_TSO_ECN; 2693 dev->hw_features |= NETIF_F_RXCSUM; 2694 dev->hw_features |= NETIF_F_RXHASH; 2695 dev->features = dev->hw_features; 2696 dev->watchdog_timeo = 5 * HZ; 2697 dev->min_mtu = ETH_MIN_MTU; 2698 netif_carrier_off(dev); 2699 2700 priv = netdev_priv(dev); 2701 priv->dev = dev; 2702 priv->pdev = pdev; 2703 priv->msg_enable = DEFAULT_MSG_LEVEL; 2704 priv->reg_bar0 = reg_bar; 2705 priv->db_bar2 = db_bar; 2706 priv->service_task_flags = 0x0; 2707 priv->state_flags = 0x0; 2708 priv->ethtool_flags = 0x0; 2709 priv->data_buffer_size_dqo = GVE_DEFAULT_RX_BUFFER_SIZE; 2710 priv->max_rx_buffer_size = GVE_DEFAULT_RX_BUFFER_SIZE; 2711 2712 gve_set_probe_in_progress(priv); 2713 priv->gve_wq = alloc_ordered_workqueue("gve", 0); 2714 if (!priv->gve_wq) { 2715 dev_err(&pdev->dev, "Could not allocate workqueue"); 2716 err = -ENOMEM; 2717 goto abort_with_netdev; 2718 } 2719 INIT_WORK(&priv->service_task, gve_service_task); 2720 INIT_WORK(&priv->stats_report_task, gve_stats_report_task); 2721 priv->tx_cfg.max_queues = max_tx_queues; 2722 priv->rx_cfg.max_queues = max_rx_queues; 2723 2724 err = gve_init_priv(priv, false); 2725 if (err) 2726 goto abort_with_wq; 2727 2728 err = register_netdev(dev); 2729 if (err) 2730 goto abort_with_gve_init; 2731 2732 dev_info(&pdev->dev, "GVE version %s\n", gve_version_str); 2733 dev_info(&pdev->dev, "GVE queue format %d\n", (int)priv->queue_format); 2734 gve_clear_probe_in_progress(priv); 2735 queue_work(priv->gve_wq, &priv->service_task); 2736 return 0; 2737 2738 abort_with_gve_init: 2739 gve_teardown_priv_resources(priv); 2740 2741 abort_with_wq: 2742 destroy_workqueue(priv->gve_wq); 2743 2744 abort_with_netdev: 2745 free_netdev(dev); 2746 2747 abort_with_db_bar: 2748 pci_iounmap(pdev, db_bar); 2749 2750 abort_with_reg_bar: 2751 pci_iounmap(pdev, reg_bar); 2752 2753 abort_with_pci_region: 2754 pci_release_regions(pdev); 2755 2756 abort_with_enabled: 2757 pci_disable_device(pdev); 2758 return err; 2759 } 2760 2761 static void gve_remove(struct pci_dev *pdev) 2762 { 2763 struct net_device *netdev = pci_get_drvdata(pdev); 2764 struct gve_priv *priv = netdev_priv(netdev); 2765 __be32 __iomem *db_bar = priv->db_bar2; 2766 void __iomem *reg_bar = priv->reg_bar0; 2767 2768 unregister_netdev(netdev); 2769 gve_teardown_priv_resources(priv); 2770 destroy_workqueue(priv->gve_wq); 2771 free_netdev(netdev); 2772 pci_iounmap(pdev, db_bar); 2773 pci_iounmap(pdev, reg_bar); 2774 pci_release_regions(pdev); 2775 pci_disable_device(pdev); 2776 } 2777 2778 static void gve_shutdown(struct pci_dev *pdev) 2779 { 2780 struct net_device *netdev = pci_get_drvdata(pdev); 2781 struct gve_priv *priv = netdev_priv(netdev); 2782 bool was_up = netif_running(priv->dev); 2783 2784 rtnl_lock(); 2785 if (was_up && gve_close(priv->dev)) { 2786 /* If the dev was up, attempt to close, if close fails, reset */ 2787 gve_reset_and_teardown(priv, was_up); 2788 } else { 2789 /* If the dev wasn't up or close worked, finish tearing down */ 2790 gve_teardown_priv_resources(priv); 2791 } 2792 rtnl_unlock(); 2793 } 2794 2795 #ifdef CONFIG_PM 2796 static int gve_suspend(struct pci_dev *pdev, pm_message_t state) 2797 { 2798 struct net_device *netdev = pci_get_drvdata(pdev); 2799 struct gve_priv *priv = netdev_priv(netdev); 2800 bool was_up = netif_running(priv->dev); 2801 2802 priv->suspend_cnt++; 2803 rtnl_lock(); 2804 if (was_up && gve_close(priv->dev)) { 2805 /* If the dev was up, attempt to close, if close fails, reset */ 2806 gve_reset_and_teardown(priv, was_up); 2807 } else { 2808 /* If the dev wasn't up or close worked, finish tearing down */ 2809 gve_teardown_priv_resources(priv); 2810 } 2811 priv->up_before_suspend = was_up; 2812 rtnl_unlock(); 2813 return 0; 2814 } 2815 2816 static int gve_resume(struct pci_dev *pdev) 2817 { 2818 struct net_device *netdev = pci_get_drvdata(pdev); 2819 struct gve_priv *priv = netdev_priv(netdev); 2820 int err; 2821 2822 priv->resume_cnt++; 2823 rtnl_lock(); 2824 err = gve_reset_recovery(priv, priv->up_before_suspend); 2825 rtnl_unlock(); 2826 return err; 2827 } 2828 #endif /* CONFIG_PM */ 2829 2830 static const struct pci_device_id gve_id_table[] = { 2831 { PCI_DEVICE(PCI_VENDOR_ID_GOOGLE, PCI_DEV_ID_GVNIC) }, 2832 { } 2833 }; 2834 2835 static struct pci_driver gve_driver = { 2836 .name = gve_driver_name, 2837 .id_table = gve_id_table, 2838 .probe = gve_probe, 2839 .remove = gve_remove, 2840 .shutdown = gve_shutdown, 2841 #ifdef CONFIG_PM 2842 .suspend = gve_suspend, 2843 .resume = gve_resume, 2844 #endif 2845 }; 2846 2847 module_pci_driver(gve_driver); 2848 2849 MODULE_DEVICE_TABLE(pci, gve_id_table); 2850 MODULE_AUTHOR("Google, Inc."); 2851 MODULE_DESCRIPTION("Google Virtual NIC Driver"); 2852 MODULE_LICENSE("Dual MIT/GPL"); 2853 MODULE_VERSION(GVE_VERSION); 2854