1 // SPDX-License-Identifier: (GPL-2.0 OR MIT) 2 /* Google virtual Ethernet (gve) driver 3 * 4 * Copyright (C) 2015-2024 Google LLC 5 */ 6 7 #include <linux/bpf.h> 8 #include <linux/cpumask.h> 9 #include <linux/etherdevice.h> 10 #include <linux/filter.h> 11 #include <linux/interrupt.h> 12 #include <linux/irq.h> 13 #include <linux/module.h> 14 #include <linux/pci.h> 15 #include <linux/sched.h> 16 #include <linux/timer.h> 17 #include <linux/workqueue.h> 18 #include <linux/utsname.h> 19 #include <linux/version.h> 20 #include <net/netdev_queues.h> 21 #include <net/sch_generic.h> 22 #include <net/xdp_sock_drv.h> 23 #include "gve.h" 24 #include "gve_dqo.h" 25 #include "gve_adminq.h" 26 #include "gve_register.h" 27 #include "gve_utils.h" 28 29 #define GVE_DEFAULT_RX_COPYBREAK (256) 30 31 #define DEFAULT_MSG_LEVEL (NETIF_MSG_DRV | NETIF_MSG_LINK) 32 #define GVE_VERSION "1.0.0" 33 #define GVE_VERSION_PREFIX "GVE-" 34 35 // Minimum amount of time between queue kicks in msec (10 seconds) 36 #define MIN_TX_TIMEOUT_GAP (1000 * 10) 37 38 char gve_driver_name[] = "gve"; 39 const char gve_version_str[] = GVE_VERSION; 40 static const char gve_version_prefix[] = GVE_VERSION_PREFIX; 41 42 static int gve_verify_driver_compatibility(struct gve_priv *priv) 43 { 44 int err; 45 struct gve_driver_info *driver_info; 46 dma_addr_t driver_info_bus; 47 48 driver_info = dma_alloc_coherent(&priv->pdev->dev, 49 sizeof(struct gve_driver_info), 50 &driver_info_bus, GFP_KERNEL); 51 if (!driver_info) 52 return -ENOMEM; 53 54 *driver_info = (struct gve_driver_info) { 55 .os_type = 1, /* Linux */ 56 .os_version_major = cpu_to_be32(LINUX_VERSION_MAJOR), 57 .os_version_minor = cpu_to_be32(LINUX_VERSION_SUBLEVEL), 58 .os_version_sub = cpu_to_be32(LINUX_VERSION_PATCHLEVEL), 59 .driver_capability_flags = { 60 cpu_to_be64(GVE_DRIVER_CAPABILITY_FLAGS1), 61 cpu_to_be64(GVE_DRIVER_CAPABILITY_FLAGS2), 62 cpu_to_be64(GVE_DRIVER_CAPABILITY_FLAGS3), 63 cpu_to_be64(GVE_DRIVER_CAPABILITY_FLAGS4), 64 }, 65 }; 66 strscpy(driver_info->os_version_str1, utsname()->release, 67 sizeof(driver_info->os_version_str1)); 68 strscpy(driver_info->os_version_str2, utsname()->version, 69 sizeof(driver_info->os_version_str2)); 70 71 err = gve_adminq_verify_driver_compatibility(priv, 72 sizeof(struct gve_driver_info), 73 driver_info_bus); 74 75 /* It's ok if the device doesn't support this */ 76 if (err == -EOPNOTSUPP) 77 err = 0; 78 79 dma_free_coherent(&priv->pdev->dev, 80 sizeof(struct gve_driver_info), 81 driver_info, driver_info_bus); 82 return err; 83 } 84 85 static netdev_features_t gve_features_check(struct sk_buff *skb, 86 struct net_device *dev, 87 netdev_features_t features) 88 { 89 struct gve_priv *priv = netdev_priv(dev); 90 91 if (!gve_is_gqi(priv)) 92 return gve_features_check_dqo(skb, dev, features); 93 94 return features; 95 } 96 97 static netdev_tx_t gve_start_xmit(struct sk_buff *skb, struct net_device *dev) 98 { 99 struct gve_priv *priv = netdev_priv(dev); 100 101 if (gve_is_gqi(priv)) 102 return gve_tx(skb, dev); 103 else 104 return gve_tx_dqo(skb, dev); 105 } 106 107 static void gve_get_stats(struct net_device *dev, struct rtnl_link_stats64 *s) 108 { 109 struct gve_priv *priv = netdev_priv(dev); 110 unsigned int start; 111 u64 packets, bytes; 112 int num_tx_queues; 113 int ring; 114 115 num_tx_queues = gve_num_tx_queues(priv); 116 if (priv->rx) { 117 for (ring = 0; ring < priv->rx_cfg.num_queues; ring++) { 118 do { 119 start = 120 u64_stats_fetch_begin(&priv->rx[ring].statss); 121 packets = priv->rx[ring].rpackets; 122 bytes = priv->rx[ring].rbytes; 123 } while (u64_stats_fetch_retry(&priv->rx[ring].statss, 124 start)); 125 s->rx_packets += packets; 126 s->rx_bytes += bytes; 127 } 128 } 129 if (priv->tx) { 130 for (ring = 0; ring < num_tx_queues; ring++) { 131 do { 132 start = 133 u64_stats_fetch_begin(&priv->tx[ring].statss); 134 packets = priv->tx[ring].pkt_done; 135 bytes = priv->tx[ring].bytes_done; 136 } while (u64_stats_fetch_retry(&priv->tx[ring].statss, 137 start)); 138 s->tx_packets += packets; 139 s->tx_bytes += bytes; 140 } 141 } 142 } 143 144 static int gve_alloc_flow_rule_caches(struct gve_priv *priv) 145 { 146 struct gve_flow_rules_cache *flow_rules_cache = &priv->flow_rules_cache; 147 int err = 0; 148 149 if (!priv->max_flow_rules) 150 return 0; 151 152 flow_rules_cache->rules_cache = 153 kvcalloc(GVE_FLOW_RULES_CACHE_SIZE, sizeof(*flow_rules_cache->rules_cache), 154 GFP_KERNEL); 155 if (!flow_rules_cache->rules_cache) { 156 dev_err(&priv->pdev->dev, "Cannot alloc flow rules cache\n"); 157 return -ENOMEM; 158 } 159 160 flow_rules_cache->rule_ids_cache = 161 kvcalloc(GVE_FLOW_RULE_IDS_CACHE_SIZE, sizeof(*flow_rules_cache->rule_ids_cache), 162 GFP_KERNEL); 163 if (!flow_rules_cache->rule_ids_cache) { 164 dev_err(&priv->pdev->dev, "Cannot alloc flow rule ids cache\n"); 165 err = -ENOMEM; 166 goto free_rules_cache; 167 } 168 169 return 0; 170 171 free_rules_cache: 172 kvfree(flow_rules_cache->rules_cache); 173 flow_rules_cache->rules_cache = NULL; 174 return err; 175 } 176 177 static void gve_free_flow_rule_caches(struct gve_priv *priv) 178 { 179 struct gve_flow_rules_cache *flow_rules_cache = &priv->flow_rules_cache; 180 181 kvfree(flow_rules_cache->rule_ids_cache); 182 flow_rules_cache->rule_ids_cache = NULL; 183 kvfree(flow_rules_cache->rules_cache); 184 flow_rules_cache->rules_cache = NULL; 185 } 186 187 static int gve_alloc_counter_array(struct gve_priv *priv) 188 { 189 priv->counter_array = 190 dma_alloc_coherent(&priv->pdev->dev, 191 priv->num_event_counters * 192 sizeof(*priv->counter_array), 193 &priv->counter_array_bus, GFP_KERNEL); 194 if (!priv->counter_array) 195 return -ENOMEM; 196 197 return 0; 198 } 199 200 static void gve_free_counter_array(struct gve_priv *priv) 201 { 202 if (!priv->counter_array) 203 return; 204 205 dma_free_coherent(&priv->pdev->dev, 206 priv->num_event_counters * 207 sizeof(*priv->counter_array), 208 priv->counter_array, priv->counter_array_bus); 209 priv->counter_array = NULL; 210 } 211 212 /* NIC requests to report stats */ 213 static void gve_stats_report_task(struct work_struct *work) 214 { 215 struct gve_priv *priv = container_of(work, struct gve_priv, 216 stats_report_task); 217 if (gve_get_do_report_stats(priv)) { 218 gve_handle_report_stats(priv); 219 gve_clear_do_report_stats(priv); 220 } 221 } 222 223 static void gve_stats_report_schedule(struct gve_priv *priv) 224 { 225 if (!gve_get_probe_in_progress(priv) && 226 !gve_get_reset_in_progress(priv)) { 227 gve_set_do_report_stats(priv); 228 queue_work(priv->gve_wq, &priv->stats_report_task); 229 } 230 } 231 232 static void gve_stats_report_timer(struct timer_list *t) 233 { 234 struct gve_priv *priv = from_timer(priv, t, stats_report_timer); 235 236 mod_timer(&priv->stats_report_timer, 237 round_jiffies(jiffies + 238 msecs_to_jiffies(priv->stats_report_timer_period))); 239 gve_stats_report_schedule(priv); 240 } 241 242 static int gve_alloc_stats_report(struct gve_priv *priv) 243 { 244 int tx_stats_num, rx_stats_num; 245 246 tx_stats_num = (GVE_TX_STATS_REPORT_NUM + NIC_TX_STATS_REPORT_NUM) * 247 gve_num_tx_queues(priv); 248 rx_stats_num = (GVE_RX_STATS_REPORT_NUM + NIC_RX_STATS_REPORT_NUM) * 249 priv->rx_cfg.num_queues; 250 priv->stats_report_len = struct_size(priv->stats_report, stats, 251 size_add(tx_stats_num, rx_stats_num)); 252 priv->stats_report = 253 dma_alloc_coherent(&priv->pdev->dev, priv->stats_report_len, 254 &priv->stats_report_bus, GFP_KERNEL); 255 if (!priv->stats_report) 256 return -ENOMEM; 257 /* Set up timer for the report-stats task */ 258 timer_setup(&priv->stats_report_timer, gve_stats_report_timer, 0); 259 priv->stats_report_timer_period = GVE_STATS_REPORT_TIMER_PERIOD; 260 return 0; 261 } 262 263 static void gve_free_stats_report(struct gve_priv *priv) 264 { 265 if (!priv->stats_report) 266 return; 267 268 del_timer_sync(&priv->stats_report_timer); 269 dma_free_coherent(&priv->pdev->dev, priv->stats_report_len, 270 priv->stats_report, priv->stats_report_bus); 271 priv->stats_report = NULL; 272 } 273 274 static irqreturn_t gve_mgmnt_intr(int irq, void *arg) 275 { 276 struct gve_priv *priv = arg; 277 278 queue_work(priv->gve_wq, &priv->service_task); 279 return IRQ_HANDLED; 280 } 281 282 static irqreturn_t gve_intr(int irq, void *arg) 283 { 284 struct gve_notify_block *block = arg; 285 struct gve_priv *priv = block->priv; 286 287 iowrite32be(GVE_IRQ_MASK, gve_irq_doorbell(priv, block)); 288 napi_schedule_irqoff(&block->napi); 289 return IRQ_HANDLED; 290 } 291 292 static irqreturn_t gve_intr_dqo(int irq, void *arg) 293 { 294 struct gve_notify_block *block = arg; 295 296 /* Interrupts are automatically masked */ 297 napi_schedule_irqoff(&block->napi); 298 return IRQ_HANDLED; 299 } 300 301 static int gve_is_napi_on_home_cpu(struct gve_priv *priv, u32 irq) 302 { 303 int cpu_curr = smp_processor_id(); 304 const struct cpumask *aff_mask; 305 306 aff_mask = irq_get_effective_affinity_mask(irq); 307 if (unlikely(!aff_mask)) 308 return 1; 309 310 return cpumask_test_cpu(cpu_curr, aff_mask); 311 } 312 313 int gve_napi_poll(struct napi_struct *napi, int budget) 314 { 315 struct gve_notify_block *block; 316 __be32 __iomem *irq_doorbell; 317 bool reschedule = false; 318 struct gve_priv *priv; 319 int work_done = 0; 320 321 block = container_of(napi, struct gve_notify_block, napi); 322 priv = block->priv; 323 324 if (block->tx) { 325 if (block->tx->q_num < priv->tx_cfg.num_queues) 326 reschedule |= gve_tx_poll(block, budget); 327 else if (budget) 328 reschedule |= gve_xdp_poll(block, budget); 329 } 330 331 if (!budget) 332 return 0; 333 334 if (block->rx) { 335 work_done = gve_rx_poll(block, budget); 336 337 /* Poll XSK TX as part of RX NAPI. Setup re-poll based on max of 338 * TX and RX work done. 339 */ 340 if (priv->xdp_prog) 341 work_done = max_t(int, work_done, 342 gve_xsk_tx_poll(block, budget)); 343 344 reschedule |= work_done == budget; 345 } 346 347 if (reschedule) 348 return budget; 349 350 /* Complete processing - don't unmask irq if busy polling is enabled */ 351 if (likely(napi_complete_done(napi, work_done))) { 352 irq_doorbell = gve_irq_doorbell(priv, block); 353 iowrite32be(GVE_IRQ_ACK | GVE_IRQ_EVENT, irq_doorbell); 354 355 /* Ensure IRQ ACK is visible before we check pending work. 356 * If queue had issued updates, it would be truly visible. 357 */ 358 mb(); 359 360 if (block->tx) 361 reschedule |= gve_tx_clean_pending(priv, block->tx); 362 if (block->rx) 363 reschedule |= gve_rx_work_pending(block->rx); 364 365 if (reschedule && napi_schedule(napi)) 366 iowrite32be(GVE_IRQ_MASK, irq_doorbell); 367 } 368 return work_done; 369 } 370 371 int gve_napi_poll_dqo(struct napi_struct *napi, int budget) 372 { 373 struct gve_notify_block *block = 374 container_of(napi, struct gve_notify_block, napi); 375 struct gve_priv *priv = block->priv; 376 bool reschedule = false; 377 int work_done = 0; 378 379 if (block->tx) 380 reschedule |= gve_tx_poll_dqo(block, /*do_clean=*/true); 381 382 if (!budget) 383 return 0; 384 385 if (block->rx) { 386 work_done = gve_rx_poll_dqo(block, budget); 387 reschedule |= work_done == budget; 388 } 389 390 if (reschedule) { 391 /* Reschedule by returning budget only if already on the correct 392 * cpu. 393 */ 394 if (likely(gve_is_napi_on_home_cpu(priv, block->irq))) 395 return budget; 396 397 /* If not on the cpu with which this queue's irq has affinity 398 * with, we avoid rescheduling napi and arm the irq instead so 399 * that napi gets rescheduled back eventually onto the right 400 * cpu. 401 */ 402 if (work_done == budget) 403 work_done--; 404 } 405 406 if (likely(napi_complete_done(napi, work_done))) { 407 /* Enable interrupts again. 408 * 409 * We don't need to repoll afterwards because HW supports the 410 * PCI MSI-X PBA feature. 411 * 412 * Another interrupt would be triggered if a new event came in 413 * since the last one. 414 */ 415 gve_write_irq_doorbell_dqo(priv, block, 416 GVE_ITR_NO_UPDATE_DQO | GVE_ITR_ENABLE_BIT_DQO); 417 } 418 419 return work_done; 420 } 421 422 static int gve_alloc_notify_blocks(struct gve_priv *priv) 423 { 424 int num_vecs_requested = priv->num_ntfy_blks + 1; 425 unsigned int active_cpus; 426 int vecs_enabled; 427 int i, j; 428 int err; 429 430 priv->msix_vectors = kvcalloc(num_vecs_requested, 431 sizeof(*priv->msix_vectors), GFP_KERNEL); 432 if (!priv->msix_vectors) 433 return -ENOMEM; 434 for (i = 0; i < num_vecs_requested; i++) 435 priv->msix_vectors[i].entry = i; 436 vecs_enabled = pci_enable_msix_range(priv->pdev, priv->msix_vectors, 437 GVE_MIN_MSIX, num_vecs_requested); 438 if (vecs_enabled < 0) { 439 dev_err(&priv->pdev->dev, "Could not enable min msix %d/%d\n", 440 GVE_MIN_MSIX, vecs_enabled); 441 err = vecs_enabled; 442 goto abort_with_msix_vectors; 443 } 444 if (vecs_enabled != num_vecs_requested) { 445 int new_num_ntfy_blks = (vecs_enabled - 1) & ~0x1; 446 int vecs_per_type = new_num_ntfy_blks / 2; 447 int vecs_left = new_num_ntfy_blks % 2; 448 449 priv->num_ntfy_blks = new_num_ntfy_blks; 450 priv->mgmt_msix_idx = priv->num_ntfy_blks; 451 priv->tx_cfg.max_queues = min_t(int, priv->tx_cfg.max_queues, 452 vecs_per_type); 453 priv->rx_cfg.max_queues = min_t(int, priv->rx_cfg.max_queues, 454 vecs_per_type + vecs_left); 455 dev_err(&priv->pdev->dev, 456 "Could not enable desired msix, only enabled %d, adjusting tx max queues to %d, and rx max queues to %d\n", 457 vecs_enabled, priv->tx_cfg.max_queues, 458 priv->rx_cfg.max_queues); 459 if (priv->tx_cfg.num_queues > priv->tx_cfg.max_queues) 460 priv->tx_cfg.num_queues = priv->tx_cfg.max_queues; 461 if (priv->rx_cfg.num_queues > priv->rx_cfg.max_queues) 462 priv->rx_cfg.num_queues = priv->rx_cfg.max_queues; 463 } 464 /* Half the notification blocks go to TX and half to RX */ 465 active_cpus = min_t(int, priv->num_ntfy_blks / 2, num_online_cpus()); 466 467 /* Setup Management Vector - the last vector */ 468 snprintf(priv->mgmt_msix_name, sizeof(priv->mgmt_msix_name), "gve-mgmnt@pci:%s", 469 pci_name(priv->pdev)); 470 err = request_irq(priv->msix_vectors[priv->mgmt_msix_idx].vector, 471 gve_mgmnt_intr, 0, priv->mgmt_msix_name, priv); 472 if (err) { 473 dev_err(&priv->pdev->dev, "Did not receive management vector.\n"); 474 goto abort_with_msix_enabled; 475 } 476 priv->irq_db_indices = 477 dma_alloc_coherent(&priv->pdev->dev, 478 priv->num_ntfy_blks * 479 sizeof(*priv->irq_db_indices), 480 &priv->irq_db_indices_bus, GFP_KERNEL); 481 if (!priv->irq_db_indices) { 482 err = -ENOMEM; 483 goto abort_with_mgmt_vector; 484 } 485 486 priv->ntfy_blocks = kvzalloc(priv->num_ntfy_blks * 487 sizeof(*priv->ntfy_blocks), GFP_KERNEL); 488 if (!priv->ntfy_blocks) { 489 err = -ENOMEM; 490 goto abort_with_irq_db_indices; 491 } 492 493 /* Setup the other blocks - the first n-1 vectors */ 494 for (i = 0; i < priv->num_ntfy_blks; i++) { 495 struct gve_notify_block *block = &priv->ntfy_blocks[i]; 496 int msix_idx = i; 497 498 snprintf(block->name, sizeof(block->name), "gve-ntfy-blk%d@pci:%s", 499 i, pci_name(priv->pdev)); 500 block->priv = priv; 501 err = request_irq(priv->msix_vectors[msix_idx].vector, 502 gve_is_gqi(priv) ? gve_intr : gve_intr_dqo, 503 0, block->name, block); 504 if (err) { 505 dev_err(&priv->pdev->dev, 506 "Failed to receive msix vector %d\n", i); 507 goto abort_with_some_ntfy_blocks; 508 } 509 block->irq = priv->msix_vectors[msix_idx].vector; 510 irq_set_affinity_hint(priv->msix_vectors[msix_idx].vector, 511 get_cpu_mask(i % active_cpus)); 512 block->irq_db_index = &priv->irq_db_indices[i].index; 513 } 514 return 0; 515 abort_with_some_ntfy_blocks: 516 for (j = 0; j < i; j++) { 517 struct gve_notify_block *block = &priv->ntfy_blocks[j]; 518 int msix_idx = j; 519 520 irq_set_affinity_hint(priv->msix_vectors[msix_idx].vector, 521 NULL); 522 free_irq(priv->msix_vectors[msix_idx].vector, block); 523 block->irq = 0; 524 } 525 kvfree(priv->ntfy_blocks); 526 priv->ntfy_blocks = NULL; 527 abort_with_irq_db_indices: 528 dma_free_coherent(&priv->pdev->dev, priv->num_ntfy_blks * 529 sizeof(*priv->irq_db_indices), 530 priv->irq_db_indices, priv->irq_db_indices_bus); 531 priv->irq_db_indices = NULL; 532 abort_with_mgmt_vector: 533 free_irq(priv->msix_vectors[priv->mgmt_msix_idx].vector, priv); 534 abort_with_msix_enabled: 535 pci_disable_msix(priv->pdev); 536 abort_with_msix_vectors: 537 kvfree(priv->msix_vectors); 538 priv->msix_vectors = NULL; 539 return err; 540 } 541 542 static void gve_free_notify_blocks(struct gve_priv *priv) 543 { 544 int i; 545 546 if (!priv->msix_vectors) 547 return; 548 549 /* Free the irqs */ 550 for (i = 0; i < priv->num_ntfy_blks; i++) { 551 struct gve_notify_block *block = &priv->ntfy_blocks[i]; 552 int msix_idx = i; 553 554 irq_set_affinity_hint(priv->msix_vectors[msix_idx].vector, 555 NULL); 556 free_irq(priv->msix_vectors[msix_idx].vector, block); 557 block->irq = 0; 558 } 559 free_irq(priv->msix_vectors[priv->mgmt_msix_idx].vector, priv); 560 kvfree(priv->ntfy_blocks); 561 priv->ntfy_blocks = NULL; 562 dma_free_coherent(&priv->pdev->dev, priv->num_ntfy_blks * 563 sizeof(*priv->irq_db_indices), 564 priv->irq_db_indices, priv->irq_db_indices_bus); 565 priv->irq_db_indices = NULL; 566 pci_disable_msix(priv->pdev); 567 kvfree(priv->msix_vectors); 568 priv->msix_vectors = NULL; 569 } 570 571 static int gve_setup_device_resources(struct gve_priv *priv) 572 { 573 int err; 574 575 err = gve_alloc_flow_rule_caches(priv); 576 if (err) 577 return err; 578 err = gve_alloc_counter_array(priv); 579 if (err) 580 goto abort_with_flow_rule_caches; 581 err = gve_alloc_notify_blocks(priv); 582 if (err) 583 goto abort_with_counter; 584 err = gve_alloc_stats_report(priv); 585 if (err) 586 goto abort_with_ntfy_blocks; 587 err = gve_adminq_configure_device_resources(priv, 588 priv->counter_array_bus, 589 priv->num_event_counters, 590 priv->irq_db_indices_bus, 591 priv->num_ntfy_blks); 592 if (unlikely(err)) { 593 dev_err(&priv->pdev->dev, 594 "could not setup device_resources: err=%d\n", err); 595 err = -ENXIO; 596 goto abort_with_stats_report; 597 } 598 599 if (!gve_is_gqi(priv)) { 600 priv->ptype_lut_dqo = kvzalloc(sizeof(*priv->ptype_lut_dqo), 601 GFP_KERNEL); 602 if (!priv->ptype_lut_dqo) { 603 err = -ENOMEM; 604 goto abort_with_stats_report; 605 } 606 err = gve_adminq_get_ptype_map_dqo(priv, priv->ptype_lut_dqo); 607 if (err) { 608 dev_err(&priv->pdev->dev, 609 "Failed to get ptype map: err=%d\n", err); 610 goto abort_with_ptype_lut; 611 } 612 } 613 614 err = gve_adminq_report_stats(priv, priv->stats_report_len, 615 priv->stats_report_bus, 616 GVE_STATS_REPORT_TIMER_PERIOD); 617 if (err) 618 dev_err(&priv->pdev->dev, 619 "Failed to report stats: err=%d\n", err); 620 gve_set_device_resources_ok(priv); 621 return 0; 622 623 abort_with_ptype_lut: 624 kvfree(priv->ptype_lut_dqo); 625 priv->ptype_lut_dqo = NULL; 626 abort_with_stats_report: 627 gve_free_stats_report(priv); 628 abort_with_ntfy_blocks: 629 gve_free_notify_blocks(priv); 630 abort_with_counter: 631 gve_free_counter_array(priv); 632 abort_with_flow_rule_caches: 633 gve_free_flow_rule_caches(priv); 634 635 return err; 636 } 637 638 static void gve_trigger_reset(struct gve_priv *priv); 639 640 static void gve_teardown_device_resources(struct gve_priv *priv) 641 { 642 int err; 643 644 /* Tell device its resources are being freed */ 645 if (gve_get_device_resources_ok(priv)) { 646 err = gve_flow_rules_reset(priv); 647 if (err) { 648 dev_err(&priv->pdev->dev, 649 "Failed to reset flow rules: err=%d\n", err); 650 gve_trigger_reset(priv); 651 } 652 /* detach the stats report */ 653 err = gve_adminq_report_stats(priv, 0, 0x0, GVE_STATS_REPORT_TIMER_PERIOD); 654 if (err) { 655 dev_err(&priv->pdev->dev, 656 "Failed to detach stats report: err=%d\n", err); 657 gve_trigger_reset(priv); 658 } 659 err = gve_adminq_deconfigure_device_resources(priv); 660 if (err) { 661 dev_err(&priv->pdev->dev, 662 "Could not deconfigure device resources: err=%d\n", 663 err); 664 gve_trigger_reset(priv); 665 } 666 } 667 668 kvfree(priv->ptype_lut_dqo); 669 priv->ptype_lut_dqo = NULL; 670 671 gve_free_flow_rule_caches(priv); 672 gve_free_counter_array(priv); 673 gve_free_notify_blocks(priv); 674 gve_free_stats_report(priv); 675 gve_clear_device_resources_ok(priv); 676 } 677 678 static int gve_unregister_qpl(struct gve_priv *priv, 679 struct gve_queue_page_list *qpl) 680 { 681 int err; 682 683 if (!qpl) 684 return 0; 685 686 err = gve_adminq_unregister_page_list(priv, qpl->id); 687 if (err) { 688 netif_err(priv, drv, priv->dev, 689 "Failed to unregister queue page list %d\n", 690 qpl->id); 691 return err; 692 } 693 694 priv->num_registered_pages -= qpl->num_entries; 695 return 0; 696 } 697 698 static int gve_register_qpl(struct gve_priv *priv, 699 struct gve_queue_page_list *qpl) 700 { 701 int pages; 702 int err; 703 704 if (!qpl) 705 return 0; 706 707 pages = qpl->num_entries; 708 709 if (pages + priv->num_registered_pages > priv->max_registered_pages) { 710 netif_err(priv, drv, priv->dev, 711 "Reached max number of registered pages %llu > %llu\n", 712 pages + priv->num_registered_pages, 713 priv->max_registered_pages); 714 return -EINVAL; 715 } 716 717 err = gve_adminq_register_page_list(priv, qpl); 718 if (err) { 719 netif_err(priv, drv, priv->dev, 720 "failed to register queue page list %d\n", 721 qpl->id); 722 return err; 723 } 724 725 priv->num_registered_pages += pages; 726 return 0; 727 } 728 729 static struct gve_queue_page_list *gve_tx_get_qpl(struct gve_priv *priv, int idx) 730 { 731 struct gve_tx_ring *tx = &priv->tx[idx]; 732 733 if (gve_is_gqi(priv)) 734 return tx->tx_fifo.qpl; 735 else 736 return tx->dqo.qpl; 737 } 738 739 static struct gve_queue_page_list *gve_rx_get_qpl(struct gve_priv *priv, int idx) 740 { 741 struct gve_rx_ring *rx = &priv->rx[idx]; 742 743 if (gve_is_gqi(priv)) 744 return rx->data.qpl; 745 else 746 return rx->dqo.qpl; 747 } 748 749 static int gve_register_xdp_qpls(struct gve_priv *priv) 750 { 751 int start_id; 752 int err; 753 int i; 754 755 start_id = gve_xdp_tx_start_queue_id(priv); 756 for (i = start_id; i < start_id + gve_num_xdp_qpls(priv); i++) { 757 err = gve_register_qpl(priv, gve_tx_get_qpl(priv, i)); 758 /* This failure will trigger a reset - no need to clean up */ 759 if (err) 760 return err; 761 } 762 return 0; 763 } 764 765 static int gve_register_qpls(struct gve_priv *priv) 766 { 767 int num_tx_qpls, num_rx_qpls; 768 int err; 769 int i; 770 771 num_tx_qpls = gve_num_tx_qpls(&priv->tx_cfg, gve_num_xdp_qpls(priv), 772 gve_is_qpl(priv)); 773 num_rx_qpls = gve_num_rx_qpls(&priv->rx_cfg, gve_is_qpl(priv)); 774 775 for (i = 0; i < num_tx_qpls; i++) { 776 err = gve_register_qpl(priv, gve_tx_get_qpl(priv, i)); 777 if (err) 778 return err; 779 } 780 781 for (i = 0; i < num_rx_qpls; i++) { 782 err = gve_register_qpl(priv, gve_rx_get_qpl(priv, i)); 783 if (err) 784 return err; 785 } 786 787 return 0; 788 } 789 790 static int gve_unregister_xdp_qpls(struct gve_priv *priv) 791 { 792 int start_id; 793 int err; 794 int i; 795 796 start_id = gve_xdp_tx_start_queue_id(priv); 797 for (i = start_id; i < start_id + gve_num_xdp_qpls(priv); i++) { 798 err = gve_unregister_qpl(priv, gve_tx_get_qpl(priv, i)); 799 /* This failure will trigger a reset - no need to clean */ 800 if (err) 801 return err; 802 } 803 return 0; 804 } 805 806 static int gve_unregister_qpls(struct gve_priv *priv) 807 { 808 int num_tx_qpls, num_rx_qpls; 809 int err; 810 int i; 811 812 num_tx_qpls = gve_num_tx_qpls(&priv->tx_cfg, gve_num_xdp_qpls(priv), 813 gve_is_qpl(priv)); 814 num_rx_qpls = gve_num_rx_qpls(&priv->rx_cfg, gve_is_qpl(priv)); 815 816 for (i = 0; i < num_tx_qpls; i++) { 817 err = gve_unregister_qpl(priv, gve_tx_get_qpl(priv, i)); 818 /* This failure will trigger a reset - no need to clean */ 819 if (err) 820 return err; 821 } 822 823 for (i = 0; i < num_rx_qpls; i++) { 824 err = gve_unregister_qpl(priv, gve_rx_get_qpl(priv, i)); 825 /* This failure will trigger a reset - no need to clean */ 826 if (err) 827 return err; 828 } 829 return 0; 830 } 831 832 static int gve_create_xdp_rings(struct gve_priv *priv) 833 { 834 int err; 835 836 err = gve_adminq_create_tx_queues(priv, 837 gve_xdp_tx_start_queue_id(priv), 838 priv->num_xdp_queues); 839 if (err) { 840 netif_err(priv, drv, priv->dev, "failed to create %d XDP tx queues\n", 841 priv->num_xdp_queues); 842 /* This failure will trigger a reset - no need to clean 843 * up 844 */ 845 return err; 846 } 847 netif_dbg(priv, drv, priv->dev, "created %d XDP tx queues\n", 848 priv->num_xdp_queues); 849 850 return 0; 851 } 852 853 static int gve_create_rings(struct gve_priv *priv) 854 { 855 int num_tx_queues = gve_num_tx_queues(priv); 856 int err; 857 int i; 858 859 err = gve_adminq_create_tx_queues(priv, 0, num_tx_queues); 860 if (err) { 861 netif_err(priv, drv, priv->dev, "failed to create %d tx queues\n", 862 num_tx_queues); 863 /* This failure will trigger a reset - no need to clean 864 * up 865 */ 866 return err; 867 } 868 netif_dbg(priv, drv, priv->dev, "created %d tx queues\n", 869 num_tx_queues); 870 871 err = gve_adminq_create_rx_queues(priv, priv->rx_cfg.num_queues); 872 if (err) { 873 netif_err(priv, drv, priv->dev, "failed to create %d rx queues\n", 874 priv->rx_cfg.num_queues); 875 /* This failure will trigger a reset - no need to clean 876 * up 877 */ 878 return err; 879 } 880 netif_dbg(priv, drv, priv->dev, "created %d rx queues\n", 881 priv->rx_cfg.num_queues); 882 883 if (gve_is_gqi(priv)) { 884 /* Rx data ring has been prefilled with packet buffers at queue 885 * allocation time. 886 * 887 * Write the doorbell to provide descriptor slots and packet 888 * buffers to the NIC. 889 */ 890 for (i = 0; i < priv->rx_cfg.num_queues; i++) 891 gve_rx_write_doorbell(priv, &priv->rx[i]); 892 } else { 893 for (i = 0; i < priv->rx_cfg.num_queues; i++) { 894 /* Post buffers and ring doorbell. */ 895 gve_rx_post_buffers_dqo(&priv->rx[i]); 896 } 897 } 898 899 return 0; 900 } 901 902 static void init_xdp_sync_stats(struct gve_priv *priv) 903 { 904 int start_id = gve_xdp_tx_start_queue_id(priv); 905 int i; 906 907 /* Init stats */ 908 for (i = start_id; i < start_id + priv->num_xdp_queues; i++) { 909 int ntfy_idx = gve_tx_idx_to_ntfy(priv, i); 910 911 u64_stats_init(&priv->tx[i].statss); 912 priv->tx[i].ntfy_id = ntfy_idx; 913 } 914 } 915 916 static void gve_init_sync_stats(struct gve_priv *priv) 917 { 918 int i; 919 920 for (i = 0; i < priv->tx_cfg.num_queues; i++) 921 u64_stats_init(&priv->tx[i].statss); 922 923 /* Init stats for XDP TX queues */ 924 init_xdp_sync_stats(priv); 925 926 for (i = 0; i < priv->rx_cfg.num_queues; i++) 927 u64_stats_init(&priv->rx[i].statss); 928 } 929 930 static void gve_tx_get_curr_alloc_cfg(struct gve_priv *priv, 931 struct gve_tx_alloc_rings_cfg *cfg) 932 { 933 int num_xdp_queues = priv->xdp_prog ? priv->rx_cfg.num_queues : 0; 934 935 cfg->qcfg = &priv->tx_cfg; 936 cfg->raw_addressing = !gve_is_qpl(priv); 937 cfg->ring_size = priv->tx_desc_cnt; 938 cfg->start_idx = 0; 939 cfg->num_rings = priv->tx_cfg.num_queues + num_xdp_queues; 940 cfg->tx = priv->tx; 941 } 942 943 static void gve_tx_stop_rings(struct gve_priv *priv, int start_id, int num_rings) 944 { 945 int i; 946 947 if (!priv->tx) 948 return; 949 950 for (i = start_id; i < start_id + num_rings; i++) { 951 if (gve_is_gqi(priv)) 952 gve_tx_stop_ring_gqi(priv, i); 953 else 954 gve_tx_stop_ring_dqo(priv, i); 955 } 956 } 957 958 static void gve_tx_start_rings(struct gve_priv *priv, int start_id, 959 int num_rings) 960 { 961 int i; 962 963 for (i = start_id; i < start_id + num_rings; i++) { 964 if (gve_is_gqi(priv)) 965 gve_tx_start_ring_gqi(priv, i); 966 else 967 gve_tx_start_ring_dqo(priv, i); 968 } 969 } 970 971 static int gve_alloc_xdp_rings(struct gve_priv *priv) 972 { 973 struct gve_tx_alloc_rings_cfg cfg = {0}; 974 int err = 0; 975 976 if (!priv->num_xdp_queues) 977 return 0; 978 979 gve_tx_get_curr_alloc_cfg(priv, &cfg); 980 cfg.start_idx = gve_xdp_tx_start_queue_id(priv); 981 cfg.num_rings = priv->num_xdp_queues; 982 983 err = gve_tx_alloc_rings_gqi(priv, &cfg); 984 if (err) 985 return err; 986 987 gve_tx_start_rings(priv, cfg.start_idx, cfg.num_rings); 988 init_xdp_sync_stats(priv); 989 990 return 0; 991 } 992 993 static int gve_queues_mem_alloc(struct gve_priv *priv, 994 struct gve_tx_alloc_rings_cfg *tx_alloc_cfg, 995 struct gve_rx_alloc_rings_cfg *rx_alloc_cfg) 996 { 997 int err; 998 999 if (gve_is_gqi(priv)) 1000 err = gve_tx_alloc_rings_gqi(priv, tx_alloc_cfg); 1001 else 1002 err = gve_tx_alloc_rings_dqo(priv, tx_alloc_cfg); 1003 if (err) 1004 return err; 1005 1006 if (gve_is_gqi(priv)) 1007 err = gve_rx_alloc_rings_gqi(priv, rx_alloc_cfg); 1008 else 1009 err = gve_rx_alloc_rings_dqo(priv, rx_alloc_cfg); 1010 if (err) 1011 goto free_tx; 1012 1013 return 0; 1014 1015 free_tx: 1016 if (gve_is_gqi(priv)) 1017 gve_tx_free_rings_gqi(priv, tx_alloc_cfg); 1018 else 1019 gve_tx_free_rings_dqo(priv, tx_alloc_cfg); 1020 return err; 1021 } 1022 1023 static int gve_destroy_xdp_rings(struct gve_priv *priv) 1024 { 1025 int start_id; 1026 int err; 1027 1028 start_id = gve_xdp_tx_start_queue_id(priv); 1029 err = gve_adminq_destroy_tx_queues(priv, 1030 start_id, 1031 priv->num_xdp_queues); 1032 if (err) { 1033 netif_err(priv, drv, priv->dev, 1034 "failed to destroy XDP queues\n"); 1035 /* This failure will trigger a reset - no need to clean up */ 1036 return err; 1037 } 1038 netif_dbg(priv, drv, priv->dev, "destroyed XDP queues\n"); 1039 1040 return 0; 1041 } 1042 1043 static int gve_destroy_rings(struct gve_priv *priv) 1044 { 1045 int num_tx_queues = gve_num_tx_queues(priv); 1046 int err; 1047 1048 err = gve_adminq_destroy_tx_queues(priv, 0, num_tx_queues); 1049 if (err) { 1050 netif_err(priv, drv, priv->dev, 1051 "failed to destroy tx queues\n"); 1052 /* This failure will trigger a reset - no need to clean up */ 1053 return err; 1054 } 1055 netif_dbg(priv, drv, priv->dev, "destroyed tx queues\n"); 1056 err = gve_adminq_destroy_rx_queues(priv, priv->rx_cfg.num_queues); 1057 if (err) { 1058 netif_err(priv, drv, priv->dev, 1059 "failed to destroy rx queues\n"); 1060 /* This failure will trigger a reset - no need to clean up */ 1061 return err; 1062 } 1063 netif_dbg(priv, drv, priv->dev, "destroyed rx queues\n"); 1064 return 0; 1065 } 1066 1067 static void gve_free_xdp_rings(struct gve_priv *priv) 1068 { 1069 struct gve_tx_alloc_rings_cfg cfg = {0}; 1070 1071 gve_tx_get_curr_alloc_cfg(priv, &cfg); 1072 cfg.start_idx = gve_xdp_tx_start_queue_id(priv); 1073 cfg.num_rings = priv->num_xdp_queues; 1074 1075 if (priv->tx) { 1076 gve_tx_stop_rings(priv, cfg.start_idx, cfg.num_rings); 1077 gve_tx_free_rings_gqi(priv, &cfg); 1078 } 1079 } 1080 1081 static void gve_queues_mem_free(struct gve_priv *priv, 1082 struct gve_tx_alloc_rings_cfg *tx_cfg, 1083 struct gve_rx_alloc_rings_cfg *rx_cfg) 1084 { 1085 if (gve_is_gqi(priv)) { 1086 gve_tx_free_rings_gqi(priv, tx_cfg); 1087 gve_rx_free_rings_gqi(priv, rx_cfg); 1088 } else { 1089 gve_tx_free_rings_dqo(priv, tx_cfg); 1090 gve_rx_free_rings_dqo(priv, rx_cfg); 1091 } 1092 } 1093 1094 int gve_alloc_page(struct gve_priv *priv, struct device *dev, 1095 struct page **page, dma_addr_t *dma, 1096 enum dma_data_direction dir, gfp_t gfp_flags) 1097 { 1098 *page = alloc_page(gfp_flags); 1099 if (!*page) { 1100 priv->page_alloc_fail++; 1101 return -ENOMEM; 1102 } 1103 *dma = dma_map_page(dev, *page, 0, PAGE_SIZE, dir); 1104 if (dma_mapping_error(dev, *dma)) { 1105 priv->dma_mapping_error++; 1106 put_page(*page); 1107 return -ENOMEM; 1108 } 1109 return 0; 1110 } 1111 1112 struct gve_queue_page_list *gve_alloc_queue_page_list(struct gve_priv *priv, 1113 u32 id, int pages) 1114 { 1115 struct gve_queue_page_list *qpl; 1116 int err; 1117 int i; 1118 1119 qpl = kvzalloc(sizeof(*qpl), GFP_KERNEL); 1120 if (!qpl) 1121 return NULL; 1122 1123 qpl->id = id; 1124 qpl->num_entries = 0; 1125 qpl->pages = kvcalloc(pages, sizeof(*qpl->pages), GFP_KERNEL); 1126 if (!qpl->pages) 1127 goto abort; 1128 1129 qpl->page_buses = kvcalloc(pages, sizeof(*qpl->page_buses), GFP_KERNEL); 1130 if (!qpl->page_buses) 1131 goto abort; 1132 1133 for (i = 0; i < pages; i++) { 1134 err = gve_alloc_page(priv, &priv->pdev->dev, &qpl->pages[i], 1135 &qpl->page_buses[i], 1136 gve_qpl_dma_dir(priv, id), GFP_KERNEL); 1137 if (err) 1138 goto abort; 1139 qpl->num_entries++; 1140 } 1141 1142 return qpl; 1143 1144 abort: 1145 gve_free_queue_page_list(priv, qpl, id); 1146 return NULL; 1147 } 1148 1149 void gve_free_page(struct device *dev, struct page *page, dma_addr_t dma, 1150 enum dma_data_direction dir) 1151 { 1152 if (!dma_mapping_error(dev, dma)) 1153 dma_unmap_page(dev, dma, PAGE_SIZE, dir); 1154 if (page) 1155 put_page(page); 1156 } 1157 1158 void gve_free_queue_page_list(struct gve_priv *priv, 1159 struct gve_queue_page_list *qpl, 1160 u32 id) 1161 { 1162 int i; 1163 1164 if (!qpl) 1165 return; 1166 if (!qpl->pages) 1167 goto free_qpl; 1168 if (!qpl->page_buses) 1169 goto free_pages; 1170 1171 for (i = 0; i < qpl->num_entries; i++) 1172 gve_free_page(&priv->pdev->dev, qpl->pages[i], 1173 qpl->page_buses[i], gve_qpl_dma_dir(priv, id)); 1174 1175 kvfree(qpl->page_buses); 1176 qpl->page_buses = NULL; 1177 free_pages: 1178 kvfree(qpl->pages); 1179 qpl->pages = NULL; 1180 free_qpl: 1181 kvfree(qpl); 1182 } 1183 1184 /* Use this to schedule a reset when the device is capable of continuing 1185 * to handle other requests in its current state. If it is not, do a reset 1186 * in thread instead. 1187 */ 1188 void gve_schedule_reset(struct gve_priv *priv) 1189 { 1190 gve_set_do_reset(priv); 1191 queue_work(priv->gve_wq, &priv->service_task); 1192 } 1193 1194 static void gve_reset_and_teardown(struct gve_priv *priv, bool was_up); 1195 static int gve_reset_recovery(struct gve_priv *priv, bool was_up); 1196 static void gve_turndown(struct gve_priv *priv); 1197 static void gve_turnup(struct gve_priv *priv); 1198 1199 static int gve_reg_xdp_info(struct gve_priv *priv, struct net_device *dev) 1200 { 1201 struct napi_struct *napi; 1202 struct gve_rx_ring *rx; 1203 int err = 0; 1204 int i, j; 1205 u32 tx_qid; 1206 1207 if (!priv->num_xdp_queues) 1208 return 0; 1209 1210 for (i = 0; i < priv->rx_cfg.num_queues; i++) { 1211 rx = &priv->rx[i]; 1212 napi = &priv->ntfy_blocks[rx->ntfy_id].napi; 1213 1214 err = xdp_rxq_info_reg(&rx->xdp_rxq, dev, i, 1215 napi->napi_id); 1216 if (err) 1217 goto err; 1218 err = xdp_rxq_info_reg_mem_model(&rx->xdp_rxq, 1219 MEM_TYPE_PAGE_SHARED, NULL); 1220 if (err) 1221 goto err; 1222 rx->xsk_pool = xsk_get_pool_from_qid(dev, i); 1223 if (rx->xsk_pool) { 1224 err = xdp_rxq_info_reg(&rx->xsk_rxq, dev, i, 1225 napi->napi_id); 1226 if (err) 1227 goto err; 1228 err = xdp_rxq_info_reg_mem_model(&rx->xsk_rxq, 1229 MEM_TYPE_XSK_BUFF_POOL, NULL); 1230 if (err) 1231 goto err; 1232 xsk_pool_set_rxq_info(rx->xsk_pool, 1233 &rx->xsk_rxq); 1234 } 1235 } 1236 1237 for (i = 0; i < priv->num_xdp_queues; i++) { 1238 tx_qid = gve_xdp_tx_queue_id(priv, i); 1239 priv->tx[tx_qid].xsk_pool = xsk_get_pool_from_qid(dev, i); 1240 } 1241 return 0; 1242 1243 err: 1244 for (j = i; j >= 0; j--) { 1245 rx = &priv->rx[j]; 1246 if (xdp_rxq_info_is_reg(&rx->xdp_rxq)) 1247 xdp_rxq_info_unreg(&rx->xdp_rxq); 1248 if (xdp_rxq_info_is_reg(&rx->xsk_rxq)) 1249 xdp_rxq_info_unreg(&rx->xsk_rxq); 1250 } 1251 return err; 1252 } 1253 1254 static void gve_unreg_xdp_info(struct gve_priv *priv) 1255 { 1256 int i, tx_qid; 1257 1258 if (!priv->num_xdp_queues) 1259 return; 1260 1261 for (i = 0; i < priv->rx_cfg.num_queues; i++) { 1262 struct gve_rx_ring *rx = &priv->rx[i]; 1263 1264 xdp_rxq_info_unreg(&rx->xdp_rxq); 1265 if (rx->xsk_pool) { 1266 xdp_rxq_info_unreg(&rx->xsk_rxq); 1267 rx->xsk_pool = NULL; 1268 } 1269 } 1270 1271 for (i = 0; i < priv->num_xdp_queues; i++) { 1272 tx_qid = gve_xdp_tx_queue_id(priv, i); 1273 priv->tx[tx_qid].xsk_pool = NULL; 1274 } 1275 } 1276 1277 static void gve_drain_page_cache(struct gve_priv *priv) 1278 { 1279 int i; 1280 1281 for (i = 0; i < priv->rx_cfg.num_queues; i++) 1282 page_frag_cache_drain(&priv->rx[i].page_cache); 1283 } 1284 1285 static void gve_rx_get_curr_alloc_cfg(struct gve_priv *priv, 1286 struct gve_rx_alloc_rings_cfg *cfg) 1287 { 1288 cfg->qcfg = &priv->rx_cfg; 1289 cfg->qcfg_tx = &priv->tx_cfg; 1290 cfg->raw_addressing = !gve_is_qpl(priv); 1291 cfg->enable_header_split = priv->header_split_enabled; 1292 cfg->ring_size = priv->rx_desc_cnt; 1293 cfg->packet_buffer_size = gve_is_gqi(priv) ? 1294 GVE_DEFAULT_RX_BUFFER_SIZE : 1295 priv->data_buffer_size_dqo; 1296 cfg->rx = priv->rx; 1297 } 1298 1299 void gve_get_curr_alloc_cfgs(struct gve_priv *priv, 1300 struct gve_tx_alloc_rings_cfg *tx_alloc_cfg, 1301 struct gve_rx_alloc_rings_cfg *rx_alloc_cfg) 1302 { 1303 gve_tx_get_curr_alloc_cfg(priv, tx_alloc_cfg); 1304 gve_rx_get_curr_alloc_cfg(priv, rx_alloc_cfg); 1305 } 1306 1307 static void gve_rx_start_ring(struct gve_priv *priv, int i) 1308 { 1309 if (gve_is_gqi(priv)) 1310 gve_rx_start_ring_gqi(priv, i); 1311 else 1312 gve_rx_start_ring_dqo(priv, i); 1313 } 1314 1315 static void gve_rx_start_rings(struct gve_priv *priv, int num_rings) 1316 { 1317 int i; 1318 1319 for (i = 0; i < num_rings; i++) 1320 gve_rx_start_ring(priv, i); 1321 } 1322 1323 static void gve_rx_stop_ring(struct gve_priv *priv, int i) 1324 { 1325 if (gve_is_gqi(priv)) 1326 gve_rx_stop_ring_gqi(priv, i); 1327 else 1328 gve_rx_stop_ring_dqo(priv, i); 1329 } 1330 1331 static void gve_rx_stop_rings(struct gve_priv *priv, int num_rings) 1332 { 1333 int i; 1334 1335 if (!priv->rx) 1336 return; 1337 1338 for (i = 0; i < num_rings; i++) 1339 gve_rx_stop_ring(priv, i); 1340 } 1341 1342 static void gve_queues_mem_remove(struct gve_priv *priv) 1343 { 1344 struct gve_tx_alloc_rings_cfg tx_alloc_cfg = {0}; 1345 struct gve_rx_alloc_rings_cfg rx_alloc_cfg = {0}; 1346 1347 gve_get_curr_alloc_cfgs(priv, &tx_alloc_cfg, &rx_alloc_cfg); 1348 gve_queues_mem_free(priv, &tx_alloc_cfg, &rx_alloc_cfg); 1349 priv->tx = NULL; 1350 priv->rx = NULL; 1351 } 1352 1353 /* The passed-in queue memory is stored into priv and the queues are made live. 1354 * No memory is allocated. Passed-in memory is freed on errors. 1355 */ 1356 static int gve_queues_start(struct gve_priv *priv, 1357 struct gve_tx_alloc_rings_cfg *tx_alloc_cfg, 1358 struct gve_rx_alloc_rings_cfg *rx_alloc_cfg) 1359 { 1360 struct net_device *dev = priv->dev; 1361 int err; 1362 1363 /* Record new resources into priv */ 1364 priv->tx = tx_alloc_cfg->tx; 1365 priv->rx = rx_alloc_cfg->rx; 1366 1367 /* Record new configs into priv */ 1368 priv->tx_cfg = *tx_alloc_cfg->qcfg; 1369 priv->rx_cfg = *rx_alloc_cfg->qcfg; 1370 priv->tx_desc_cnt = tx_alloc_cfg->ring_size; 1371 priv->rx_desc_cnt = rx_alloc_cfg->ring_size; 1372 1373 if (priv->xdp_prog) 1374 priv->num_xdp_queues = priv->rx_cfg.num_queues; 1375 else 1376 priv->num_xdp_queues = 0; 1377 1378 gve_tx_start_rings(priv, 0, tx_alloc_cfg->num_rings); 1379 gve_rx_start_rings(priv, rx_alloc_cfg->qcfg->num_queues); 1380 gve_init_sync_stats(priv); 1381 1382 err = netif_set_real_num_tx_queues(dev, priv->tx_cfg.num_queues); 1383 if (err) 1384 goto stop_and_free_rings; 1385 err = netif_set_real_num_rx_queues(dev, priv->rx_cfg.num_queues); 1386 if (err) 1387 goto stop_and_free_rings; 1388 1389 err = gve_reg_xdp_info(priv, dev); 1390 if (err) 1391 goto stop_and_free_rings; 1392 1393 err = gve_register_qpls(priv); 1394 if (err) 1395 goto reset; 1396 1397 priv->header_split_enabled = rx_alloc_cfg->enable_header_split; 1398 priv->data_buffer_size_dqo = rx_alloc_cfg->packet_buffer_size; 1399 1400 err = gve_create_rings(priv); 1401 if (err) 1402 goto reset; 1403 1404 gve_set_device_rings_ok(priv); 1405 1406 if (gve_get_report_stats(priv)) 1407 mod_timer(&priv->stats_report_timer, 1408 round_jiffies(jiffies + 1409 msecs_to_jiffies(priv->stats_report_timer_period))); 1410 1411 gve_turnup(priv); 1412 queue_work(priv->gve_wq, &priv->service_task); 1413 priv->interface_up_cnt++; 1414 return 0; 1415 1416 reset: 1417 if (gve_get_reset_in_progress(priv)) 1418 goto stop_and_free_rings; 1419 gve_reset_and_teardown(priv, true); 1420 /* if this fails there is nothing we can do so just ignore the return */ 1421 gve_reset_recovery(priv, false); 1422 /* return the original error */ 1423 return err; 1424 stop_and_free_rings: 1425 gve_tx_stop_rings(priv, 0, gve_num_tx_queues(priv)); 1426 gve_rx_stop_rings(priv, priv->rx_cfg.num_queues); 1427 gve_queues_mem_remove(priv); 1428 return err; 1429 } 1430 1431 static int gve_open(struct net_device *dev) 1432 { 1433 struct gve_tx_alloc_rings_cfg tx_alloc_cfg = {0}; 1434 struct gve_rx_alloc_rings_cfg rx_alloc_cfg = {0}; 1435 struct gve_priv *priv = netdev_priv(dev); 1436 int err; 1437 1438 gve_get_curr_alloc_cfgs(priv, &tx_alloc_cfg, &rx_alloc_cfg); 1439 1440 err = gve_queues_mem_alloc(priv, &tx_alloc_cfg, &rx_alloc_cfg); 1441 if (err) 1442 return err; 1443 1444 /* No need to free on error: ownership of resources is lost after 1445 * calling gve_queues_start. 1446 */ 1447 err = gve_queues_start(priv, &tx_alloc_cfg, &rx_alloc_cfg); 1448 if (err) 1449 return err; 1450 1451 return 0; 1452 } 1453 1454 static int gve_queues_stop(struct gve_priv *priv) 1455 { 1456 int err; 1457 1458 netif_carrier_off(priv->dev); 1459 if (gve_get_device_rings_ok(priv)) { 1460 gve_turndown(priv); 1461 gve_drain_page_cache(priv); 1462 err = gve_destroy_rings(priv); 1463 if (err) 1464 goto err; 1465 err = gve_unregister_qpls(priv); 1466 if (err) 1467 goto err; 1468 gve_clear_device_rings_ok(priv); 1469 } 1470 del_timer_sync(&priv->stats_report_timer); 1471 1472 gve_unreg_xdp_info(priv); 1473 1474 gve_tx_stop_rings(priv, 0, gve_num_tx_queues(priv)); 1475 gve_rx_stop_rings(priv, priv->rx_cfg.num_queues); 1476 1477 priv->interface_down_cnt++; 1478 return 0; 1479 1480 err: 1481 /* This must have been called from a reset due to the rtnl lock 1482 * so just return at this point. 1483 */ 1484 if (gve_get_reset_in_progress(priv)) 1485 return err; 1486 /* Otherwise reset before returning */ 1487 gve_reset_and_teardown(priv, true); 1488 return gve_reset_recovery(priv, false); 1489 } 1490 1491 static int gve_close(struct net_device *dev) 1492 { 1493 struct gve_priv *priv = netdev_priv(dev); 1494 int err; 1495 1496 err = gve_queues_stop(priv); 1497 if (err) 1498 return err; 1499 1500 gve_queues_mem_remove(priv); 1501 return 0; 1502 } 1503 1504 static int gve_remove_xdp_queues(struct gve_priv *priv) 1505 { 1506 int err; 1507 1508 err = gve_destroy_xdp_rings(priv); 1509 if (err) 1510 return err; 1511 1512 err = gve_unregister_xdp_qpls(priv); 1513 if (err) 1514 return err; 1515 1516 gve_unreg_xdp_info(priv); 1517 gve_free_xdp_rings(priv); 1518 1519 priv->num_xdp_queues = 0; 1520 return 0; 1521 } 1522 1523 static int gve_add_xdp_queues(struct gve_priv *priv) 1524 { 1525 int err; 1526 1527 priv->num_xdp_queues = priv->rx_cfg.num_queues; 1528 1529 err = gve_alloc_xdp_rings(priv); 1530 if (err) 1531 goto err; 1532 1533 err = gve_reg_xdp_info(priv, priv->dev); 1534 if (err) 1535 goto free_xdp_rings; 1536 1537 err = gve_register_xdp_qpls(priv); 1538 if (err) 1539 goto free_xdp_rings; 1540 1541 err = gve_create_xdp_rings(priv); 1542 if (err) 1543 goto free_xdp_rings; 1544 1545 return 0; 1546 1547 free_xdp_rings: 1548 gve_free_xdp_rings(priv); 1549 err: 1550 priv->num_xdp_queues = 0; 1551 return err; 1552 } 1553 1554 static void gve_handle_link_status(struct gve_priv *priv, bool link_status) 1555 { 1556 if (!gve_get_napi_enabled(priv)) 1557 return; 1558 1559 if (link_status == netif_carrier_ok(priv->dev)) 1560 return; 1561 1562 if (link_status) { 1563 netdev_info(priv->dev, "Device link is up.\n"); 1564 netif_carrier_on(priv->dev); 1565 } else { 1566 netdev_info(priv->dev, "Device link is down.\n"); 1567 netif_carrier_off(priv->dev); 1568 } 1569 } 1570 1571 static int gve_set_xdp(struct gve_priv *priv, struct bpf_prog *prog, 1572 struct netlink_ext_ack *extack) 1573 { 1574 struct bpf_prog *old_prog; 1575 int err = 0; 1576 u32 status; 1577 1578 old_prog = READ_ONCE(priv->xdp_prog); 1579 if (!netif_running(priv->dev)) { 1580 WRITE_ONCE(priv->xdp_prog, prog); 1581 if (old_prog) 1582 bpf_prog_put(old_prog); 1583 return 0; 1584 } 1585 1586 gve_turndown(priv); 1587 if (!old_prog && prog) { 1588 // Allocate XDP TX queues if an XDP program is 1589 // being installed 1590 err = gve_add_xdp_queues(priv); 1591 if (err) 1592 goto out; 1593 } else if (old_prog && !prog) { 1594 // Remove XDP TX queues if an XDP program is 1595 // being uninstalled 1596 err = gve_remove_xdp_queues(priv); 1597 if (err) 1598 goto out; 1599 } 1600 WRITE_ONCE(priv->xdp_prog, prog); 1601 if (old_prog) 1602 bpf_prog_put(old_prog); 1603 1604 out: 1605 gve_turnup(priv); 1606 status = ioread32be(&priv->reg_bar0->device_status); 1607 gve_handle_link_status(priv, GVE_DEVICE_STATUS_LINK_STATUS_MASK & status); 1608 return err; 1609 } 1610 1611 static int gve_xsk_pool_enable(struct net_device *dev, 1612 struct xsk_buff_pool *pool, 1613 u16 qid) 1614 { 1615 struct gve_priv *priv = netdev_priv(dev); 1616 struct napi_struct *napi; 1617 struct gve_rx_ring *rx; 1618 int tx_qid; 1619 int err; 1620 1621 if (qid >= priv->rx_cfg.num_queues) { 1622 dev_err(&priv->pdev->dev, "xsk pool invalid qid %d", qid); 1623 return -EINVAL; 1624 } 1625 if (xsk_pool_get_rx_frame_size(pool) < 1626 priv->dev->max_mtu + sizeof(struct ethhdr)) { 1627 dev_err(&priv->pdev->dev, "xsk pool frame_len too small"); 1628 return -EINVAL; 1629 } 1630 1631 err = xsk_pool_dma_map(pool, &priv->pdev->dev, 1632 DMA_ATTR_SKIP_CPU_SYNC | DMA_ATTR_WEAK_ORDERING); 1633 if (err) 1634 return err; 1635 1636 /* If XDP prog is not installed or interface is down, return. */ 1637 if (!priv->xdp_prog || !netif_running(dev)) 1638 return 0; 1639 1640 rx = &priv->rx[qid]; 1641 napi = &priv->ntfy_blocks[rx->ntfy_id].napi; 1642 err = xdp_rxq_info_reg(&rx->xsk_rxq, dev, qid, napi->napi_id); 1643 if (err) 1644 goto err; 1645 1646 err = xdp_rxq_info_reg_mem_model(&rx->xsk_rxq, 1647 MEM_TYPE_XSK_BUFF_POOL, NULL); 1648 if (err) 1649 goto err; 1650 1651 xsk_pool_set_rxq_info(pool, &rx->xsk_rxq); 1652 rx->xsk_pool = pool; 1653 1654 tx_qid = gve_xdp_tx_queue_id(priv, qid); 1655 priv->tx[tx_qid].xsk_pool = pool; 1656 1657 return 0; 1658 err: 1659 if (xdp_rxq_info_is_reg(&rx->xsk_rxq)) 1660 xdp_rxq_info_unreg(&rx->xsk_rxq); 1661 1662 xsk_pool_dma_unmap(pool, 1663 DMA_ATTR_SKIP_CPU_SYNC | DMA_ATTR_WEAK_ORDERING); 1664 return err; 1665 } 1666 1667 static int gve_xsk_pool_disable(struct net_device *dev, 1668 u16 qid) 1669 { 1670 struct gve_priv *priv = netdev_priv(dev); 1671 struct napi_struct *napi_rx; 1672 struct napi_struct *napi_tx; 1673 struct xsk_buff_pool *pool; 1674 int tx_qid; 1675 1676 pool = xsk_get_pool_from_qid(dev, qid); 1677 if (!pool) 1678 return -EINVAL; 1679 if (qid >= priv->rx_cfg.num_queues) 1680 return -EINVAL; 1681 1682 /* If XDP prog is not installed or interface is down, unmap DMA and 1683 * return. 1684 */ 1685 if (!priv->xdp_prog || !netif_running(dev)) 1686 goto done; 1687 1688 napi_rx = &priv->ntfy_blocks[priv->rx[qid].ntfy_id].napi; 1689 napi_disable(napi_rx); /* make sure current rx poll is done */ 1690 1691 tx_qid = gve_xdp_tx_queue_id(priv, qid); 1692 napi_tx = &priv->ntfy_blocks[priv->tx[tx_qid].ntfy_id].napi; 1693 napi_disable(napi_tx); /* make sure current tx poll is done */ 1694 1695 priv->rx[qid].xsk_pool = NULL; 1696 xdp_rxq_info_unreg(&priv->rx[qid].xsk_rxq); 1697 priv->tx[tx_qid].xsk_pool = NULL; 1698 smp_mb(); /* Make sure it is visible to the workers on datapath */ 1699 1700 napi_enable(napi_rx); 1701 if (gve_rx_work_pending(&priv->rx[qid])) 1702 napi_schedule(napi_rx); 1703 1704 napi_enable(napi_tx); 1705 if (gve_tx_clean_pending(priv, &priv->tx[tx_qid])) 1706 napi_schedule(napi_tx); 1707 1708 done: 1709 xsk_pool_dma_unmap(pool, 1710 DMA_ATTR_SKIP_CPU_SYNC | DMA_ATTR_WEAK_ORDERING); 1711 return 0; 1712 } 1713 1714 static int gve_xsk_wakeup(struct net_device *dev, u32 queue_id, u32 flags) 1715 { 1716 struct gve_priv *priv = netdev_priv(dev); 1717 struct napi_struct *napi; 1718 1719 if (!gve_get_napi_enabled(priv)) 1720 return -ENETDOWN; 1721 1722 if (queue_id >= priv->rx_cfg.num_queues || !priv->xdp_prog) 1723 return -EINVAL; 1724 1725 napi = &priv->ntfy_blocks[gve_rx_idx_to_ntfy(priv, queue_id)].napi; 1726 if (!napi_if_scheduled_mark_missed(napi)) { 1727 /* Call local_bh_enable to trigger SoftIRQ processing */ 1728 local_bh_disable(); 1729 napi_schedule(napi); 1730 local_bh_enable(); 1731 } 1732 1733 return 0; 1734 } 1735 1736 static int verify_xdp_configuration(struct net_device *dev) 1737 { 1738 struct gve_priv *priv = netdev_priv(dev); 1739 1740 if (dev->features & NETIF_F_LRO) { 1741 netdev_warn(dev, "XDP is not supported when LRO is on.\n"); 1742 return -EOPNOTSUPP; 1743 } 1744 1745 if (priv->queue_format != GVE_GQI_QPL_FORMAT) { 1746 netdev_warn(dev, "XDP is not supported in mode %d.\n", 1747 priv->queue_format); 1748 return -EOPNOTSUPP; 1749 } 1750 1751 if (dev->mtu > GVE_DEFAULT_RX_BUFFER_SIZE - sizeof(struct ethhdr) - GVE_RX_PAD) { 1752 netdev_warn(dev, "XDP is not supported for mtu %d.\n", 1753 dev->mtu); 1754 return -EOPNOTSUPP; 1755 } 1756 1757 if (priv->rx_cfg.num_queues != priv->tx_cfg.num_queues || 1758 (2 * priv->tx_cfg.num_queues > priv->tx_cfg.max_queues)) { 1759 netdev_warn(dev, "XDP load failed: The number of configured RX queues %d should be equal to the number of configured TX queues %d and the number of configured RX/TX queues should be less than or equal to half the maximum number of RX/TX queues %d", 1760 priv->rx_cfg.num_queues, 1761 priv->tx_cfg.num_queues, 1762 priv->tx_cfg.max_queues); 1763 return -EINVAL; 1764 } 1765 return 0; 1766 } 1767 1768 static int gve_xdp(struct net_device *dev, struct netdev_bpf *xdp) 1769 { 1770 struct gve_priv *priv = netdev_priv(dev); 1771 int err; 1772 1773 err = verify_xdp_configuration(dev); 1774 if (err) 1775 return err; 1776 switch (xdp->command) { 1777 case XDP_SETUP_PROG: 1778 return gve_set_xdp(priv, xdp->prog, xdp->extack); 1779 case XDP_SETUP_XSK_POOL: 1780 if (xdp->xsk.pool) 1781 return gve_xsk_pool_enable(dev, xdp->xsk.pool, xdp->xsk.queue_id); 1782 else 1783 return gve_xsk_pool_disable(dev, xdp->xsk.queue_id); 1784 default: 1785 return -EINVAL; 1786 } 1787 } 1788 1789 int gve_flow_rules_reset(struct gve_priv *priv) 1790 { 1791 if (!priv->max_flow_rules) 1792 return 0; 1793 1794 return gve_adminq_reset_flow_rules(priv); 1795 } 1796 1797 int gve_adjust_config(struct gve_priv *priv, 1798 struct gve_tx_alloc_rings_cfg *tx_alloc_cfg, 1799 struct gve_rx_alloc_rings_cfg *rx_alloc_cfg) 1800 { 1801 int err; 1802 1803 /* Allocate resources for the new confiugration */ 1804 err = gve_queues_mem_alloc(priv, tx_alloc_cfg, rx_alloc_cfg); 1805 if (err) { 1806 netif_err(priv, drv, priv->dev, 1807 "Adjust config failed to alloc new queues"); 1808 return err; 1809 } 1810 1811 /* Teardown the device and free existing resources */ 1812 err = gve_close(priv->dev); 1813 if (err) { 1814 netif_err(priv, drv, priv->dev, 1815 "Adjust config failed to close old queues"); 1816 gve_queues_mem_free(priv, tx_alloc_cfg, rx_alloc_cfg); 1817 return err; 1818 } 1819 1820 /* Bring the device back up again with the new resources. */ 1821 err = gve_queues_start(priv, tx_alloc_cfg, rx_alloc_cfg); 1822 if (err) { 1823 netif_err(priv, drv, priv->dev, 1824 "Adjust config failed to start new queues, !!! DISABLING ALL QUEUES !!!\n"); 1825 /* No need to free on error: ownership of resources is lost after 1826 * calling gve_queues_start. 1827 */ 1828 gve_turndown(priv); 1829 return err; 1830 } 1831 1832 return 0; 1833 } 1834 1835 int gve_adjust_queues(struct gve_priv *priv, 1836 struct gve_queue_config new_rx_config, 1837 struct gve_queue_config new_tx_config) 1838 { 1839 struct gve_tx_alloc_rings_cfg tx_alloc_cfg = {0}; 1840 struct gve_rx_alloc_rings_cfg rx_alloc_cfg = {0}; 1841 int num_xdp_queues; 1842 int err; 1843 1844 gve_get_curr_alloc_cfgs(priv, &tx_alloc_cfg, &rx_alloc_cfg); 1845 1846 /* Relay the new config from ethtool */ 1847 tx_alloc_cfg.qcfg = &new_tx_config; 1848 rx_alloc_cfg.qcfg_tx = &new_tx_config; 1849 rx_alloc_cfg.qcfg = &new_rx_config; 1850 tx_alloc_cfg.num_rings = new_tx_config.num_queues; 1851 1852 /* Add dedicated XDP TX queues if enabled. */ 1853 num_xdp_queues = priv->xdp_prog ? new_rx_config.num_queues : 0; 1854 tx_alloc_cfg.num_rings += num_xdp_queues; 1855 1856 if (netif_running(priv->dev)) { 1857 err = gve_adjust_config(priv, &tx_alloc_cfg, &rx_alloc_cfg); 1858 return err; 1859 } 1860 /* Set the config for the next up. */ 1861 priv->tx_cfg = new_tx_config; 1862 priv->rx_cfg = new_rx_config; 1863 1864 return 0; 1865 } 1866 1867 static void gve_turndown(struct gve_priv *priv) 1868 { 1869 int idx; 1870 1871 if (netif_carrier_ok(priv->dev)) 1872 netif_carrier_off(priv->dev); 1873 1874 if (!gve_get_napi_enabled(priv)) 1875 return; 1876 1877 /* Disable napi to prevent more work from coming in */ 1878 for (idx = 0; idx < gve_num_tx_queues(priv); idx++) { 1879 int ntfy_idx = gve_tx_idx_to_ntfy(priv, idx); 1880 struct gve_notify_block *block = &priv->ntfy_blocks[ntfy_idx]; 1881 1882 if (!gve_tx_was_added_to_block(priv, idx)) 1883 continue; 1884 1885 if (idx < priv->tx_cfg.num_queues) 1886 netif_queue_set_napi(priv->dev, idx, 1887 NETDEV_QUEUE_TYPE_TX, NULL); 1888 1889 napi_disable(&block->napi); 1890 } 1891 for (idx = 0; idx < priv->rx_cfg.num_queues; idx++) { 1892 int ntfy_idx = gve_rx_idx_to_ntfy(priv, idx); 1893 struct gve_notify_block *block = &priv->ntfy_blocks[ntfy_idx]; 1894 1895 if (!gve_rx_was_added_to_block(priv, idx)) 1896 continue; 1897 1898 netif_queue_set_napi(priv->dev, idx, NETDEV_QUEUE_TYPE_RX, 1899 NULL); 1900 napi_disable(&block->napi); 1901 } 1902 1903 /* Stop tx queues */ 1904 netif_tx_disable(priv->dev); 1905 1906 xdp_features_clear_redirect_target(priv->dev); 1907 1908 gve_clear_napi_enabled(priv); 1909 gve_clear_report_stats(priv); 1910 1911 /* Make sure that all traffic is finished processing. */ 1912 synchronize_net(); 1913 } 1914 1915 static void gve_turnup(struct gve_priv *priv) 1916 { 1917 int idx; 1918 1919 /* Start the tx queues */ 1920 netif_tx_start_all_queues(priv->dev); 1921 1922 /* Enable napi and unmask interrupts for all queues */ 1923 for (idx = 0; idx < gve_num_tx_queues(priv); idx++) { 1924 int ntfy_idx = gve_tx_idx_to_ntfy(priv, idx); 1925 struct gve_notify_block *block = &priv->ntfy_blocks[ntfy_idx]; 1926 1927 if (!gve_tx_was_added_to_block(priv, idx)) 1928 continue; 1929 1930 napi_enable(&block->napi); 1931 1932 if (idx < priv->tx_cfg.num_queues) 1933 netif_queue_set_napi(priv->dev, idx, 1934 NETDEV_QUEUE_TYPE_TX, 1935 &block->napi); 1936 1937 if (gve_is_gqi(priv)) { 1938 iowrite32be(0, gve_irq_doorbell(priv, block)); 1939 } else { 1940 gve_set_itr_coalesce_usecs_dqo(priv, block, 1941 priv->tx_coalesce_usecs); 1942 } 1943 1944 /* Any descs written by the NIC before this barrier will be 1945 * handled by the one-off napi schedule below. Whereas any 1946 * descs after the barrier will generate interrupts. 1947 */ 1948 mb(); 1949 napi_schedule(&block->napi); 1950 } 1951 for (idx = 0; idx < priv->rx_cfg.num_queues; idx++) { 1952 int ntfy_idx = gve_rx_idx_to_ntfy(priv, idx); 1953 struct gve_notify_block *block = &priv->ntfy_blocks[ntfy_idx]; 1954 1955 if (!gve_rx_was_added_to_block(priv, idx)) 1956 continue; 1957 1958 napi_enable(&block->napi); 1959 netif_queue_set_napi(priv->dev, idx, NETDEV_QUEUE_TYPE_RX, 1960 &block->napi); 1961 1962 if (gve_is_gqi(priv)) { 1963 iowrite32be(0, gve_irq_doorbell(priv, block)); 1964 } else { 1965 gve_set_itr_coalesce_usecs_dqo(priv, block, 1966 priv->rx_coalesce_usecs); 1967 } 1968 1969 /* Any descs written by the NIC before this barrier will be 1970 * handled by the one-off napi schedule below. Whereas any 1971 * descs after the barrier will generate interrupts. 1972 */ 1973 mb(); 1974 napi_schedule(&block->napi); 1975 } 1976 1977 if (priv->num_xdp_queues && gve_supports_xdp_xmit(priv)) 1978 xdp_features_set_redirect_target(priv->dev, false); 1979 1980 gve_set_napi_enabled(priv); 1981 } 1982 1983 static void gve_turnup_and_check_status(struct gve_priv *priv) 1984 { 1985 u32 status; 1986 1987 gve_turnup(priv); 1988 status = ioread32be(&priv->reg_bar0->device_status); 1989 gve_handle_link_status(priv, GVE_DEVICE_STATUS_LINK_STATUS_MASK & status); 1990 } 1991 1992 static void gve_tx_timeout(struct net_device *dev, unsigned int txqueue) 1993 { 1994 struct gve_notify_block *block; 1995 struct gve_tx_ring *tx = NULL; 1996 struct gve_priv *priv; 1997 u32 last_nic_done; 1998 u32 current_time; 1999 u32 ntfy_idx; 2000 2001 netdev_info(dev, "Timeout on tx queue, %d", txqueue); 2002 priv = netdev_priv(dev); 2003 if (txqueue > priv->tx_cfg.num_queues) 2004 goto reset; 2005 2006 ntfy_idx = gve_tx_idx_to_ntfy(priv, txqueue); 2007 if (ntfy_idx >= priv->num_ntfy_blks) 2008 goto reset; 2009 2010 block = &priv->ntfy_blocks[ntfy_idx]; 2011 tx = block->tx; 2012 2013 current_time = jiffies_to_msecs(jiffies); 2014 if (tx->last_kick_msec + MIN_TX_TIMEOUT_GAP > current_time) 2015 goto reset; 2016 2017 /* Check to see if there are missed completions, which will allow us to 2018 * kick the queue. 2019 */ 2020 last_nic_done = gve_tx_load_event_counter(priv, tx); 2021 if (last_nic_done - tx->done) { 2022 netdev_info(dev, "Kicking queue %d", txqueue); 2023 iowrite32be(GVE_IRQ_MASK, gve_irq_doorbell(priv, block)); 2024 napi_schedule(&block->napi); 2025 tx->last_kick_msec = current_time; 2026 goto out; 2027 } // Else reset. 2028 2029 reset: 2030 gve_schedule_reset(priv); 2031 2032 out: 2033 if (tx) 2034 tx->queue_timeout++; 2035 priv->tx_timeo_cnt++; 2036 } 2037 2038 u16 gve_get_pkt_buf_size(const struct gve_priv *priv, bool enable_hsplit) 2039 { 2040 if (enable_hsplit && priv->max_rx_buffer_size >= GVE_MAX_RX_BUFFER_SIZE) 2041 return GVE_MAX_RX_BUFFER_SIZE; 2042 else 2043 return GVE_DEFAULT_RX_BUFFER_SIZE; 2044 } 2045 2046 /* header-split is not supported on non-DQO_RDA yet even if device advertises it */ 2047 bool gve_header_split_supported(const struct gve_priv *priv) 2048 { 2049 return priv->header_buf_size && priv->queue_format == GVE_DQO_RDA_FORMAT; 2050 } 2051 2052 int gve_set_hsplit_config(struct gve_priv *priv, u8 tcp_data_split) 2053 { 2054 struct gve_tx_alloc_rings_cfg tx_alloc_cfg = {0}; 2055 struct gve_rx_alloc_rings_cfg rx_alloc_cfg = {0}; 2056 bool enable_hdr_split; 2057 int err = 0; 2058 2059 if (tcp_data_split == ETHTOOL_TCP_DATA_SPLIT_UNKNOWN) 2060 return 0; 2061 2062 if (!gve_header_split_supported(priv)) { 2063 dev_err(&priv->pdev->dev, "Header-split not supported\n"); 2064 return -EOPNOTSUPP; 2065 } 2066 2067 if (tcp_data_split == ETHTOOL_TCP_DATA_SPLIT_ENABLED) 2068 enable_hdr_split = true; 2069 else 2070 enable_hdr_split = false; 2071 2072 if (enable_hdr_split == priv->header_split_enabled) 2073 return 0; 2074 2075 gve_get_curr_alloc_cfgs(priv, &tx_alloc_cfg, &rx_alloc_cfg); 2076 2077 rx_alloc_cfg.enable_header_split = enable_hdr_split; 2078 rx_alloc_cfg.packet_buffer_size = gve_get_pkt_buf_size(priv, enable_hdr_split); 2079 2080 if (netif_running(priv->dev)) 2081 err = gve_adjust_config(priv, &tx_alloc_cfg, &rx_alloc_cfg); 2082 return err; 2083 } 2084 2085 static int gve_set_features(struct net_device *netdev, 2086 netdev_features_t features) 2087 { 2088 const netdev_features_t orig_features = netdev->features; 2089 struct gve_tx_alloc_rings_cfg tx_alloc_cfg = {0}; 2090 struct gve_rx_alloc_rings_cfg rx_alloc_cfg = {0}; 2091 struct gve_priv *priv = netdev_priv(netdev); 2092 int err; 2093 2094 gve_get_curr_alloc_cfgs(priv, &tx_alloc_cfg, &rx_alloc_cfg); 2095 2096 if ((netdev->features & NETIF_F_LRO) != (features & NETIF_F_LRO)) { 2097 netdev->features ^= NETIF_F_LRO; 2098 if (netif_running(netdev)) { 2099 err = gve_adjust_config(priv, &tx_alloc_cfg, &rx_alloc_cfg); 2100 if (err) 2101 goto revert_features; 2102 } 2103 } 2104 if ((netdev->features & NETIF_F_NTUPLE) && !(features & NETIF_F_NTUPLE)) { 2105 err = gve_flow_rules_reset(priv); 2106 if (err) 2107 goto revert_features; 2108 } 2109 2110 return 0; 2111 2112 revert_features: 2113 netdev->features = orig_features; 2114 return err; 2115 } 2116 2117 static const struct net_device_ops gve_netdev_ops = { 2118 .ndo_start_xmit = gve_start_xmit, 2119 .ndo_features_check = gve_features_check, 2120 .ndo_open = gve_open, 2121 .ndo_stop = gve_close, 2122 .ndo_get_stats64 = gve_get_stats, 2123 .ndo_tx_timeout = gve_tx_timeout, 2124 .ndo_set_features = gve_set_features, 2125 .ndo_bpf = gve_xdp, 2126 .ndo_xdp_xmit = gve_xdp_xmit, 2127 .ndo_xsk_wakeup = gve_xsk_wakeup, 2128 }; 2129 2130 static void gve_handle_status(struct gve_priv *priv, u32 status) 2131 { 2132 if (GVE_DEVICE_STATUS_RESET_MASK & status) { 2133 dev_info(&priv->pdev->dev, "Device requested reset.\n"); 2134 gve_set_do_reset(priv); 2135 } 2136 if (GVE_DEVICE_STATUS_REPORT_STATS_MASK & status) { 2137 priv->stats_report_trigger_cnt++; 2138 gve_set_do_report_stats(priv); 2139 } 2140 } 2141 2142 static void gve_handle_reset(struct gve_priv *priv) 2143 { 2144 /* A service task will be scheduled at the end of probe to catch any 2145 * resets that need to happen, and we don't want to reset until 2146 * probe is done. 2147 */ 2148 if (gve_get_probe_in_progress(priv)) 2149 return; 2150 2151 if (gve_get_do_reset(priv)) { 2152 rtnl_lock(); 2153 gve_reset(priv, false); 2154 rtnl_unlock(); 2155 } 2156 } 2157 2158 void gve_handle_report_stats(struct gve_priv *priv) 2159 { 2160 struct stats *stats = priv->stats_report->stats; 2161 int idx, stats_idx = 0; 2162 unsigned int start = 0; 2163 u64 tx_bytes; 2164 2165 if (!gve_get_report_stats(priv)) 2166 return; 2167 2168 be64_add_cpu(&priv->stats_report->written_count, 1); 2169 /* tx stats */ 2170 if (priv->tx) { 2171 for (idx = 0; idx < gve_num_tx_queues(priv); idx++) { 2172 u32 last_completion = 0; 2173 u32 tx_frames = 0; 2174 2175 /* DQO doesn't currently support these metrics. */ 2176 if (gve_is_gqi(priv)) { 2177 last_completion = priv->tx[idx].done; 2178 tx_frames = priv->tx[idx].req; 2179 } 2180 2181 do { 2182 start = u64_stats_fetch_begin(&priv->tx[idx].statss); 2183 tx_bytes = priv->tx[idx].bytes_done; 2184 } while (u64_stats_fetch_retry(&priv->tx[idx].statss, start)); 2185 stats[stats_idx++] = (struct stats) { 2186 .stat_name = cpu_to_be32(TX_WAKE_CNT), 2187 .value = cpu_to_be64(priv->tx[idx].wake_queue), 2188 .queue_id = cpu_to_be32(idx), 2189 }; 2190 stats[stats_idx++] = (struct stats) { 2191 .stat_name = cpu_to_be32(TX_STOP_CNT), 2192 .value = cpu_to_be64(priv->tx[idx].stop_queue), 2193 .queue_id = cpu_to_be32(idx), 2194 }; 2195 stats[stats_idx++] = (struct stats) { 2196 .stat_name = cpu_to_be32(TX_FRAMES_SENT), 2197 .value = cpu_to_be64(tx_frames), 2198 .queue_id = cpu_to_be32(idx), 2199 }; 2200 stats[stats_idx++] = (struct stats) { 2201 .stat_name = cpu_to_be32(TX_BYTES_SENT), 2202 .value = cpu_to_be64(tx_bytes), 2203 .queue_id = cpu_to_be32(idx), 2204 }; 2205 stats[stats_idx++] = (struct stats) { 2206 .stat_name = cpu_to_be32(TX_LAST_COMPLETION_PROCESSED), 2207 .value = cpu_to_be64(last_completion), 2208 .queue_id = cpu_to_be32(idx), 2209 }; 2210 stats[stats_idx++] = (struct stats) { 2211 .stat_name = cpu_to_be32(TX_TIMEOUT_CNT), 2212 .value = cpu_to_be64(priv->tx[idx].queue_timeout), 2213 .queue_id = cpu_to_be32(idx), 2214 }; 2215 } 2216 } 2217 /* rx stats */ 2218 if (priv->rx) { 2219 for (idx = 0; idx < priv->rx_cfg.num_queues; idx++) { 2220 stats[stats_idx++] = (struct stats) { 2221 .stat_name = cpu_to_be32(RX_NEXT_EXPECTED_SEQUENCE), 2222 .value = cpu_to_be64(priv->rx[idx].desc.seqno), 2223 .queue_id = cpu_to_be32(idx), 2224 }; 2225 stats[stats_idx++] = (struct stats) { 2226 .stat_name = cpu_to_be32(RX_BUFFERS_POSTED), 2227 .value = cpu_to_be64(priv->rx[0].fill_cnt), 2228 .queue_id = cpu_to_be32(idx), 2229 }; 2230 } 2231 } 2232 } 2233 2234 /* Handle NIC status register changes, reset requests and report stats */ 2235 static void gve_service_task(struct work_struct *work) 2236 { 2237 struct gve_priv *priv = container_of(work, struct gve_priv, 2238 service_task); 2239 u32 status = ioread32be(&priv->reg_bar0->device_status); 2240 2241 gve_handle_status(priv, status); 2242 2243 gve_handle_reset(priv); 2244 gve_handle_link_status(priv, GVE_DEVICE_STATUS_LINK_STATUS_MASK & status); 2245 } 2246 2247 static void gve_set_netdev_xdp_features(struct gve_priv *priv) 2248 { 2249 xdp_features_t xdp_features; 2250 2251 if (priv->queue_format == GVE_GQI_QPL_FORMAT) { 2252 xdp_features = NETDEV_XDP_ACT_BASIC; 2253 xdp_features |= NETDEV_XDP_ACT_REDIRECT; 2254 xdp_features |= NETDEV_XDP_ACT_XSK_ZEROCOPY; 2255 } else { 2256 xdp_features = 0; 2257 } 2258 2259 xdp_set_features_flag(priv->dev, xdp_features); 2260 } 2261 2262 static int gve_init_priv(struct gve_priv *priv, bool skip_describe_device) 2263 { 2264 int num_ntfy; 2265 int err; 2266 2267 /* Set up the adminq */ 2268 err = gve_adminq_alloc(&priv->pdev->dev, priv); 2269 if (err) { 2270 dev_err(&priv->pdev->dev, 2271 "Failed to alloc admin queue: err=%d\n", err); 2272 return err; 2273 } 2274 2275 err = gve_verify_driver_compatibility(priv); 2276 if (err) { 2277 dev_err(&priv->pdev->dev, 2278 "Could not verify driver compatibility: err=%d\n", err); 2279 goto err; 2280 } 2281 2282 priv->num_registered_pages = 0; 2283 2284 if (skip_describe_device) 2285 goto setup_device; 2286 2287 priv->queue_format = GVE_QUEUE_FORMAT_UNSPECIFIED; 2288 /* Get the initial information we need from the device */ 2289 err = gve_adminq_describe_device(priv); 2290 if (err) { 2291 dev_err(&priv->pdev->dev, 2292 "Could not get device information: err=%d\n", err); 2293 goto err; 2294 } 2295 priv->dev->mtu = priv->dev->max_mtu; 2296 num_ntfy = pci_msix_vec_count(priv->pdev); 2297 if (num_ntfy <= 0) { 2298 dev_err(&priv->pdev->dev, 2299 "could not count MSI-x vectors: err=%d\n", num_ntfy); 2300 err = num_ntfy; 2301 goto err; 2302 } else if (num_ntfy < GVE_MIN_MSIX) { 2303 dev_err(&priv->pdev->dev, "gve needs at least %d MSI-x vectors, but only has %d\n", 2304 GVE_MIN_MSIX, num_ntfy); 2305 err = -EINVAL; 2306 goto err; 2307 } 2308 2309 /* Big TCP is only supported on DQ*/ 2310 if (!gve_is_gqi(priv)) 2311 netif_set_tso_max_size(priv->dev, GVE_DQO_TX_MAX); 2312 2313 priv->rx_copybreak = GVE_DEFAULT_RX_COPYBREAK; 2314 /* gvnic has one Notification Block per MSI-x vector, except for the 2315 * management vector 2316 */ 2317 priv->num_ntfy_blks = (num_ntfy - 1) & ~0x1; 2318 priv->mgmt_msix_idx = priv->num_ntfy_blks; 2319 2320 priv->tx_cfg.max_queues = 2321 min_t(int, priv->tx_cfg.max_queues, priv->num_ntfy_blks / 2); 2322 priv->rx_cfg.max_queues = 2323 min_t(int, priv->rx_cfg.max_queues, priv->num_ntfy_blks / 2); 2324 2325 priv->tx_cfg.num_queues = priv->tx_cfg.max_queues; 2326 priv->rx_cfg.num_queues = priv->rx_cfg.max_queues; 2327 if (priv->default_num_queues > 0) { 2328 priv->tx_cfg.num_queues = min_t(int, priv->default_num_queues, 2329 priv->tx_cfg.num_queues); 2330 priv->rx_cfg.num_queues = min_t(int, priv->default_num_queues, 2331 priv->rx_cfg.num_queues); 2332 } 2333 2334 dev_info(&priv->pdev->dev, "TX queues %d, RX queues %d\n", 2335 priv->tx_cfg.num_queues, priv->rx_cfg.num_queues); 2336 dev_info(&priv->pdev->dev, "Max TX queues %d, Max RX queues %d\n", 2337 priv->tx_cfg.max_queues, priv->rx_cfg.max_queues); 2338 2339 if (!gve_is_gqi(priv)) { 2340 priv->tx_coalesce_usecs = GVE_TX_IRQ_RATELIMIT_US_DQO; 2341 priv->rx_coalesce_usecs = GVE_RX_IRQ_RATELIMIT_US_DQO; 2342 } 2343 2344 setup_device: 2345 gve_set_netdev_xdp_features(priv); 2346 err = gve_setup_device_resources(priv); 2347 if (!err) 2348 return 0; 2349 err: 2350 gve_adminq_free(&priv->pdev->dev, priv); 2351 return err; 2352 } 2353 2354 static void gve_teardown_priv_resources(struct gve_priv *priv) 2355 { 2356 gve_teardown_device_resources(priv); 2357 gve_adminq_free(&priv->pdev->dev, priv); 2358 } 2359 2360 static void gve_trigger_reset(struct gve_priv *priv) 2361 { 2362 /* Reset the device by releasing the AQ */ 2363 gve_adminq_release(priv); 2364 } 2365 2366 static void gve_reset_and_teardown(struct gve_priv *priv, bool was_up) 2367 { 2368 gve_trigger_reset(priv); 2369 /* With the reset having already happened, close cannot fail */ 2370 if (was_up) 2371 gve_close(priv->dev); 2372 gve_teardown_priv_resources(priv); 2373 } 2374 2375 static int gve_reset_recovery(struct gve_priv *priv, bool was_up) 2376 { 2377 int err; 2378 2379 err = gve_init_priv(priv, true); 2380 if (err) 2381 goto err; 2382 if (was_up) { 2383 err = gve_open(priv->dev); 2384 if (err) 2385 goto err; 2386 } 2387 return 0; 2388 err: 2389 dev_err(&priv->pdev->dev, "Reset failed! !!! DISABLING ALL QUEUES !!!\n"); 2390 gve_turndown(priv); 2391 return err; 2392 } 2393 2394 int gve_reset(struct gve_priv *priv, bool attempt_teardown) 2395 { 2396 bool was_up = netif_running(priv->dev); 2397 int err; 2398 2399 dev_info(&priv->pdev->dev, "Performing reset\n"); 2400 gve_clear_do_reset(priv); 2401 gve_set_reset_in_progress(priv); 2402 /* If we aren't attempting to teardown normally, just go turndown and 2403 * reset right away. 2404 */ 2405 if (!attempt_teardown) { 2406 gve_turndown(priv); 2407 gve_reset_and_teardown(priv, was_up); 2408 } else { 2409 /* Otherwise attempt to close normally */ 2410 if (was_up) { 2411 err = gve_close(priv->dev); 2412 /* If that fails reset as we did above */ 2413 if (err) 2414 gve_reset_and_teardown(priv, was_up); 2415 } 2416 /* Clean up any remaining resources */ 2417 gve_teardown_priv_resources(priv); 2418 } 2419 2420 /* Set it all back up */ 2421 err = gve_reset_recovery(priv, was_up); 2422 gve_clear_reset_in_progress(priv); 2423 priv->reset_cnt++; 2424 priv->interface_up_cnt = 0; 2425 priv->interface_down_cnt = 0; 2426 priv->stats_report_trigger_cnt = 0; 2427 return err; 2428 } 2429 2430 static void gve_write_version(u8 __iomem *driver_version_register) 2431 { 2432 const char *c = gve_version_prefix; 2433 2434 while (*c) { 2435 writeb(*c, driver_version_register); 2436 c++; 2437 } 2438 2439 c = gve_version_str; 2440 while (*c) { 2441 writeb(*c, driver_version_register); 2442 c++; 2443 } 2444 writeb('\n', driver_version_register); 2445 } 2446 2447 static int gve_rx_queue_stop(struct net_device *dev, void *per_q_mem, int idx) 2448 { 2449 struct gve_priv *priv = netdev_priv(dev); 2450 struct gve_rx_ring *gve_per_q_mem; 2451 int err; 2452 2453 if (!priv->rx) 2454 return -EAGAIN; 2455 2456 /* Destroying queue 0 while other queues exist is not supported in DQO */ 2457 if (!gve_is_gqi(priv) && idx == 0) 2458 return -ERANGE; 2459 2460 /* Single-queue destruction requires quiescence on all queues */ 2461 gve_turndown(priv); 2462 2463 /* This failure will trigger a reset - no need to clean up */ 2464 err = gve_adminq_destroy_single_rx_queue(priv, idx); 2465 if (err) 2466 return err; 2467 2468 if (gve_is_qpl(priv)) { 2469 /* This failure will trigger a reset - no need to clean up */ 2470 err = gve_unregister_qpl(priv, gve_rx_get_qpl(priv, idx)); 2471 if (err) 2472 return err; 2473 } 2474 2475 gve_rx_stop_ring(priv, idx); 2476 2477 /* Turn the unstopped queues back up */ 2478 gve_turnup_and_check_status(priv); 2479 2480 gve_per_q_mem = (struct gve_rx_ring *)per_q_mem; 2481 *gve_per_q_mem = priv->rx[idx]; 2482 memset(&priv->rx[idx], 0, sizeof(priv->rx[idx])); 2483 return 0; 2484 } 2485 2486 static void gve_rx_queue_mem_free(struct net_device *dev, void *per_q_mem) 2487 { 2488 struct gve_priv *priv = netdev_priv(dev); 2489 struct gve_rx_alloc_rings_cfg cfg = {0}; 2490 struct gve_rx_ring *gve_per_q_mem; 2491 2492 gve_per_q_mem = (struct gve_rx_ring *)per_q_mem; 2493 gve_rx_get_curr_alloc_cfg(priv, &cfg); 2494 2495 if (gve_is_gqi(priv)) 2496 gve_rx_free_ring_gqi(priv, gve_per_q_mem, &cfg); 2497 else 2498 gve_rx_free_ring_dqo(priv, gve_per_q_mem, &cfg); 2499 } 2500 2501 static int gve_rx_queue_mem_alloc(struct net_device *dev, void *per_q_mem, 2502 int idx) 2503 { 2504 struct gve_priv *priv = netdev_priv(dev); 2505 struct gve_rx_alloc_rings_cfg cfg = {0}; 2506 struct gve_rx_ring *gve_per_q_mem; 2507 int err; 2508 2509 if (!priv->rx) 2510 return -EAGAIN; 2511 2512 gve_per_q_mem = (struct gve_rx_ring *)per_q_mem; 2513 gve_rx_get_curr_alloc_cfg(priv, &cfg); 2514 2515 if (gve_is_gqi(priv)) 2516 err = gve_rx_alloc_ring_gqi(priv, &cfg, gve_per_q_mem, idx); 2517 else 2518 err = gve_rx_alloc_ring_dqo(priv, &cfg, gve_per_q_mem, idx); 2519 2520 return err; 2521 } 2522 2523 static int gve_rx_queue_start(struct net_device *dev, void *per_q_mem, int idx) 2524 { 2525 struct gve_priv *priv = netdev_priv(dev); 2526 struct gve_rx_ring *gve_per_q_mem; 2527 int err; 2528 2529 if (!priv->rx) 2530 return -EAGAIN; 2531 2532 gve_per_q_mem = (struct gve_rx_ring *)per_q_mem; 2533 priv->rx[idx] = *gve_per_q_mem; 2534 2535 /* Single-queue creation requires quiescence on all queues */ 2536 gve_turndown(priv); 2537 2538 gve_rx_start_ring(priv, idx); 2539 2540 if (gve_is_qpl(priv)) { 2541 /* This failure will trigger a reset - no need to clean up */ 2542 err = gve_register_qpl(priv, gve_rx_get_qpl(priv, idx)); 2543 if (err) 2544 goto abort; 2545 } 2546 2547 /* This failure will trigger a reset - no need to clean up */ 2548 err = gve_adminq_create_single_rx_queue(priv, idx); 2549 if (err) 2550 goto abort; 2551 2552 if (gve_is_gqi(priv)) 2553 gve_rx_write_doorbell(priv, &priv->rx[idx]); 2554 else 2555 gve_rx_post_buffers_dqo(&priv->rx[idx]); 2556 2557 /* Turn the unstopped queues back up */ 2558 gve_turnup_and_check_status(priv); 2559 return 0; 2560 2561 abort: 2562 gve_rx_stop_ring(priv, idx); 2563 2564 /* All failures in this func result in a reset, by clearing the struct 2565 * at idx, we prevent a double free when that reset runs. The reset, 2566 * which needs the rtnl lock, will not run till this func returns and 2567 * its caller gives up the lock. 2568 */ 2569 memset(&priv->rx[idx], 0, sizeof(priv->rx[idx])); 2570 return err; 2571 } 2572 2573 static const struct netdev_queue_mgmt_ops gve_queue_mgmt_ops = { 2574 .ndo_queue_mem_size = sizeof(struct gve_rx_ring), 2575 .ndo_queue_mem_alloc = gve_rx_queue_mem_alloc, 2576 .ndo_queue_mem_free = gve_rx_queue_mem_free, 2577 .ndo_queue_start = gve_rx_queue_start, 2578 .ndo_queue_stop = gve_rx_queue_stop, 2579 }; 2580 2581 static void gve_get_rx_queue_stats(struct net_device *dev, int idx, 2582 struct netdev_queue_stats_rx *rx_stats) 2583 { 2584 struct gve_priv *priv = netdev_priv(dev); 2585 struct gve_rx_ring *rx = &priv->rx[idx]; 2586 unsigned int start; 2587 2588 do { 2589 start = u64_stats_fetch_begin(&rx->statss); 2590 rx_stats->packets = rx->rpackets; 2591 rx_stats->bytes = rx->rbytes; 2592 rx_stats->alloc_fail = rx->rx_skb_alloc_fail + 2593 rx->rx_buf_alloc_fail; 2594 } while (u64_stats_fetch_retry(&rx->statss, start)); 2595 } 2596 2597 static void gve_get_tx_queue_stats(struct net_device *dev, int idx, 2598 struct netdev_queue_stats_tx *tx_stats) 2599 { 2600 struct gve_priv *priv = netdev_priv(dev); 2601 struct gve_tx_ring *tx = &priv->tx[idx]; 2602 unsigned int start; 2603 2604 do { 2605 start = u64_stats_fetch_begin(&tx->statss); 2606 tx_stats->packets = tx->pkt_done; 2607 tx_stats->bytes = tx->bytes_done; 2608 } while (u64_stats_fetch_retry(&tx->statss, start)); 2609 } 2610 2611 static void gve_get_base_stats(struct net_device *dev, 2612 struct netdev_queue_stats_rx *rx, 2613 struct netdev_queue_stats_tx *tx) 2614 { 2615 rx->packets = 0; 2616 rx->bytes = 0; 2617 rx->alloc_fail = 0; 2618 2619 tx->packets = 0; 2620 tx->bytes = 0; 2621 } 2622 2623 static const struct netdev_stat_ops gve_stat_ops = { 2624 .get_queue_stats_rx = gve_get_rx_queue_stats, 2625 .get_queue_stats_tx = gve_get_tx_queue_stats, 2626 .get_base_stats = gve_get_base_stats, 2627 }; 2628 2629 static int gve_probe(struct pci_dev *pdev, const struct pci_device_id *ent) 2630 { 2631 int max_tx_queues, max_rx_queues; 2632 struct net_device *dev; 2633 __be32 __iomem *db_bar; 2634 struct gve_registers __iomem *reg_bar; 2635 struct gve_priv *priv; 2636 int err; 2637 2638 err = pci_enable_device(pdev); 2639 if (err) 2640 return err; 2641 2642 err = pci_request_regions(pdev, gve_driver_name); 2643 if (err) 2644 goto abort_with_enabled; 2645 2646 pci_set_master(pdev); 2647 2648 err = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(64)); 2649 if (err) { 2650 dev_err(&pdev->dev, "Failed to set dma mask: err=%d\n", err); 2651 goto abort_with_pci_region; 2652 } 2653 2654 reg_bar = pci_iomap(pdev, GVE_REGISTER_BAR, 0); 2655 if (!reg_bar) { 2656 dev_err(&pdev->dev, "Failed to map pci bar!\n"); 2657 err = -ENOMEM; 2658 goto abort_with_pci_region; 2659 } 2660 2661 db_bar = pci_iomap(pdev, GVE_DOORBELL_BAR, 0); 2662 if (!db_bar) { 2663 dev_err(&pdev->dev, "Failed to map doorbell bar!\n"); 2664 err = -ENOMEM; 2665 goto abort_with_reg_bar; 2666 } 2667 2668 gve_write_version(®_bar->driver_version); 2669 /* Get max queues to alloc etherdev */ 2670 max_tx_queues = ioread32be(®_bar->max_tx_queues); 2671 max_rx_queues = ioread32be(®_bar->max_rx_queues); 2672 /* Alloc and setup the netdev and priv */ 2673 dev = alloc_etherdev_mqs(sizeof(*priv), max_tx_queues, max_rx_queues); 2674 if (!dev) { 2675 dev_err(&pdev->dev, "could not allocate netdev\n"); 2676 err = -ENOMEM; 2677 goto abort_with_db_bar; 2678 } 2679 SET_NETDEV_DEV(dev, &pdev->dev); 2680 pci_set_drvdata(pdev, dev); 2681 dev->ethtool_ops = &gve_ethtool_ops; 2682 dev->netdev_ops = &gve_netdev_ops; 2683 dev->queue_mgmt_ops = &gve_queue_mgmt_ops; 2684 dev->stat_ops = &gve_stat_ops; 2685 2686 /* Set default and supported features. 2687 * 2688 * Features might be set in other locations as well (such as 2689 * `gve_adminq_describe_device`). 2690 */ 2691 dev->hw_features = NETIF_F_HIGHDMA; 2692 dev->hw_features |= NETIF_F_SG; 2693 dev->hw_features |= NETIF_F_HW_CSUM; 2694 dev->hw_features |= NETIF_F_TSO; 2695 dev->hw_features |= NETIF_F_TSO6; 2696 dev->hw_features |= NETIF_F_TSO_ECN; 2697 dev->hw_features |= NETIF_F_RXCSUM; 2698 dev->hw_features |= NETIF_F_RXHASH; 2699 dev->features = dev->hw_features; 2700 dev->watchdog_timeo = 5 * HZ; 2701 dev->min_mtu = ETH_MIN_MTU; 2702 netif_carrier_off(dev); 2703 2704 priv = netdev_priv(dev); 2705 priv->dev = dev; 2706 priv->pdev = pdev; 2707 priv->msg_enable = DEFAULT_MSG_LEVEL; 2708 priv->reg_bar0 = reg_bar; 2709 priv->db_bar2 = db_bar; 2710 priv->service_task_flags = 0x0; 2711 priv->state_flags = 0x0; 2712 priv->ethtool_flags = 0x0; 2713 priv->data_buffer_size_dqo = GVE_DEFAULT_RX_BUFFER_SIZE; 2714 priv->max_rx_buffer_size = GVE_DEFAULT_RX_BUFFER_SIZE; 2715 2716 gve_set_probe_in_progress(priv); 2717 priv->gve_wq = alloc_ordered_workqueue("gve", 0); 2718 if (!priv->gve_wq) { 2719 dev_err(&pdev->dev, "Could not allocate workqueue"); 2720 err = -ENOMEM; 2721 goto abort_with_netdev; 2722 } 2723 INIT_WORK(&priv->service_task, gve_service_task); 2724 INIT_WORK(&priv->stats_report_task, gve_stats_report_task); 2725 priv->tx_cfg.max_queues = max_tx_queues; 2726 priv->rx_cfg.max_queues = max_rx_queues; 2727 2728 err = gve_init_priv(priv, false); 2729 if (err) 2730 goto abort_with_wq; 2731 2732 err = register_netdev(dev); 2733 if (err) 2734 goto abort_with_gve_init; 2735 2736 dev_info(&pdev->dev, "GVE version %s\n", gve_version_str); 2737 dev_info(&pdev->dev, "GVE queue format %d\n", (int)priv->queue_format); 2738 gve_clear_probe_in_progress(priv); 2739 queue_work(priv->gve_wq, &priv->service_task); 2740 return 0; 2741 2742 abort_with_gve_init: 2743 gve_teardown_priv_resources(priv); 2744 2745 abort_with_wq: 2746 destroy_workqueue(priv->gve_wq); 2747 2748 abort_with_netdev: 2749 free_netdev(dev); 2750 2751 abort_with_db_bar: 2752 pci_iounmap(pdev, db_bar); 2753 2754 abort_with_reg_bar: 2755 pci_iounmap(pdev, reg_bar); 2756 2757 abort_with_pci_region: 2758 pci_release_regions(pdev); 2759 2760 abort_with_enabled: 2761 pci_disable_device(pdev); 2762 return err; 2763 } 2764 2765 static void gve_remove(struct pci_dev *pdev) 2766 { 2767 struct net_device *netdev = pci_get_drvdata(pdev); 2768 struct gve_priv *priv = netdev_priv(netdev); 2769 __be32 __iomem *db_bar = priv->db_bar2; 2770 void __iomem *reg_bar = priv->reg_bar0; 2771 2772 unregister_netdev(netdev); 2773 gve_teardown_priv_resources(priv); 2774 destroy_workqueue(priv->gve_wq); 2775 free_netdev(netdev); 2776 pci_iounmap(pdev, db_bar); 2777 pci_iounmap(pdev, reg_bar); 2778 pci_release_regions(pdev); 2779 pci_disable_device(pdev); 2780 } 2781 2782 static void gve_shutdown(struct pci_dev *pdev) 2783 { 2784 struct net_device *netdev = pci_get_drvdata(pdev); 2785 struct gve_priv *priv = netdev_priv(netdev); 2786 bool was_up = netif_running(priv->dev); 2787 2788 rtnl_lock(); 2789 if (was_up && gve_close(priv->dev)) { 2790 /* If the dev was up, attempt to close, if close fails, reset */ 2791 gve_reset_and_teardown(priv, was_up); 2792 } else { 2793 /* If the dev wasn't up or close worked, finish tearing down */ 2794 gve_teardown_priv_resources(priv); 2795 } 2796 rtnl_unlock(); 2797 } 2798 2799 #ifdef CONFIG_PM 2800 static int gve_suspend(struct pci_dev *pdev, pm_message_t state) 2801 { 2802 struct net_device *netdev = pci_get_drvdata(pdev); 2803 struct gve_priv *priv = netdev_priv(netdev); 2804 bool was_up = netif_running(priv->dev); 2805 2806 priv->suspend_cnt++; 2807 rtnl_lock(); 2808 if (was_up && gve_close(priv->dev)) { 2809 /* If the dev was up, attempt to close, if close fails, reset */ 2810 gve_reset_and_teardown(priv, was_up); 2811 } else { 2812 /* If the dev wasn't up or close worked, finish tearing down */ 2813 gve_teardown_priv_resources(priv); 2814 } 2815 priv->up_before_suspend = was_up; 2816 rtnl_unlock(); 2817 return 0; 2818 } 2819 2820 static int gve_resume(struct pci_dev *pdev) 2821 { 2822 struct net_device *netdev = pci_get_drvdata(pdev); 2823 struct gve_priv *priv = netdev_priv(netdev); 2824 int err; 2825 2826 priv->resume_cnt++; 2827 rtnl_lock(); 2828 err = gve_reset_recovery(priv, priv->up_before_suspend); 2829 rtnl_unlock(); 2830 return err; 2831 } 2832 #endif /* CONFIG_PM */ 2833 2834 static const struct pci_device_id gve_id_table[] = { 2835 { PCI_DEVICE(PCI_VENDOR_ID_GOOGLE, PCI_DEV_ID_GVNIC) }, 2836 { } 2837 }; 2838 2839 static struct pci_driver gve_driver = { 2840 .name = gve_driver_name, 2841 .id_table = gve_id_table, 2842 .probe = gve_probe, 2843 .remove = gve_remove, 2844 .shutdown = gve_shutdown, 2845 #ifdef CONFIG_PM 2846 .suspend = gve_suspend, 2847 .resume = gve_resume, 2848 #endif 2849 }; 2850 2851 module_pci_driver(gve_driver); 2852 2853 MODULE_DEVICE_TABLE(pci, gve_id_table); 2854 MODULE_AUTHOR("Google, Inc."); 2855 MODULE_DESCRIPTION("Google Virtual NIC Driver"); 2856 MODULE_LICENSE("Dual MIT/GPL"); 2857 MODULE_VERSION(GVE_VERSION); 2858