1 // SPDX-License-Identifier: (GPL-2.0 OR MIT) 2 /* Google virtual Ethernet (gve) driver 3 * 4 * Copyright (C) 2015-2024 Google LLC 5 */ 6 7 #include <linux/bpf.h> 8 #include <linux/cpumask.h> 9 #include <linux/etherdevice.h> 10 #include <linux/filter.h> 11 #include <linux/interrupt.h> 12 #include <linux/irq.h> 13 #include <linux/module.h> 14 #include <linux/pci.h> 15 #include <linux/sched.h> 16 #include <linux/timer.h> 17 #include <linux/workqueue.h> 18 #include <linux/utsname.h> 19 #include <linux/version.h> 20 #include <net/netdev_queues.h> 21 #include <net/sch_generic.h> 22 #include <net/xdp_sock_drv.h> 23 #include "gve.h" 24 #include "gve_dqo.h" 25 #include "gve_adminq.h" 26 #include "gve_register.h" 27 #include "gve_utils.h" 28 29 #define GVE_DEFAULT_RX_COPYBREAK (256) 30 31 #define DEFAULT_MSG_LEVEL (NETIF_MSG_DRV | NETIF_MSG_LINK) 32 #define GVE_VERSION "1.0.0" 33 #define GVE_VERSION_PREFIX "GVE-" 34 35 // Minimum amount of time between queue kicks in msec (10 seconds) 36 #define MIN_TX_TIMEOUT_GAP (1000 * 10) 37 38 char gve_driver_name[] = "gve"; 39 const char gve_version_str[] = GVE_VERSION; 40 static const char gve_version_prefix[] = GVE_VERSION_PREFIX; 41 42 static int gve_verify_driver_compatibility(struct gve_priv *priv) 43 { 44 int err; 45 struct gve_driver_info *driver_info; 46 dma_addr_t driver_info_bus; 47 48 driver_info = dma_alloc_coherent(&priv->pdev->dev, 49 sizeof(struct gve_driver_info), 50 &driver_info_bus, GFP_KERNEL); 51 if (!driver_info) 52 return -ENOMEM; 53 54 *driver_info = (struct gve_driver_info) { 55 .os_type = 1, /* Linux */ 56 .os_version_major = cpu_to_be32(LINUX_VERSION_MAJOR), 57 .os_version_minor = cpu_to_be32(LINUX_VERSION_SUBLEVEL), 58 .os_version_sub = cpu_to_be32(LINUX_VERSION_PATCHLEVEL), 59 .driver_capability_flags = { 60 cpu_to_be64(GVE_DRIVER_CAPABILITY_FLAGS1), 61 cpu_to_be64(GVE_DRIVER_CAPABILITY_FLAGS2), 62 cpu_to_be64(GVE_DRIVER_CAPABILITY_FLAGS3), 63 cpu_to_be64(GVE_DRIVER_CAPABILITY_FLAGS4), 64 }, 65 }; 66 strscpy(driver_info->os_version_str1, utsname()->release, 67 sizeof(driver_info->os_version_str1)); 68 strscpy(driver_info->os_version_str2, utsname()->version, 69 sizeof(driver_info->os_version_str2)); 70 71 err = gve_adminq_verify_driver_compatibility(priv, 72 sizeof(struct gve_driver_info), 73 driver_info_bus); 74 75 /* It's ok if the device doesn't support this */ 76 if (err == -EOPNOTSUPP) 77 err = 0; 78 79 dma_free_coherent(&priv->pdev->dev, 80 sizeof(struct gve_driver_info), 81 driver_info, driver_info_bus); 82 return err; 83 } 84 85 static netdev_features_t gve_features_check(struct sk_buff *skb, 86 struct net_device *dev, 87 netdev_features_t features) 88 { 89 struct gve_priv *priv = netdev_priv(dev); 90 91 if (!gve_is_gqi(priv)) 92 return gve_features_check_dqo(skb, dev, features); 93 94 return features; 95 } 96 97 static netdev_tx_t gve_start_xmit(struct sk_buff *skb, struct net_device *dev) 98 { 99 struct gve_priv *priv = netdev_priv(dev); 100 101 if (gve_is_gqi(priv)) 102 return gve_tx(skb, dev); 103 else 104 return gve_tx_dqo(skb, dev); 105 } 106 107 static void gve_get_stats(struct net_device *dev, struct rtnl_link_stats64 *s) 108 { 109 struct gve_priv *priv = netdev_priv(dev); 110 unsigned int start; 111 u64 packets, bytes; 112 int num_tx_queues; 113 int ring; 114 115 num_tx_queues = gve_num_tx_queues(priv); 116 if (priv->rx) { 117 for (ring = 0; ring < priv->rx_cfg.num_queues; ring++) { 118 do { 119 start = 120 u64_stats_fetch_begin(&priv->rx[ring].statss); 121 packets = priv->rx[ring].rpackets; 122 bytes = priv->rx[ring].rbytes; 123 } while (u64_stats_fetch_retry(&priv->rx[ring].statss, 124 start)); 125 s->rx_packets += packets; 126 s->rx_bytes += bytes; 127 } 128 } 129 if (priv->tx) { 130 for (ring = 0; ring < num_tx_queues; ring++) { 131 do { 132 start = 133 u64_stats_fetch_begin(&priv->tx[ring].statss); 134 packets = priv->tx[ring].pkt_done; 135 bytes = priv->tx[ring].bytes_done; 136 } while (u64_stats_fetch_retry(&priv->tx[ring].statss, 137 start)); 138 s->tx_packets += packets; 139 s->tx_bytes += bytes; 140 } 141 } 142 } 143 144 static int gve_alloc_flow_rule_caches(struct gve_priv *priv) 145 { 146 struct gve_flow_rules_cache *flow_rules_cache = &priv->flow_rules_cache; 147 int err = 0; 148 149 if (!priv->max_flow_rules) 150 return 0; 151 152 flow_rules_cache->rules_cache = 153 kvcalloc(GVE_FLOW_RULES_CACHE_SIZE, sizeof(*flow_rules_cache->rules_cache), 154 GFP_KERNEL); 155 if (!flow_rules_cache->rules_cache) { 156 dev_err(&priv->pdev->dev, "Cannot alloc flow rules cache\n"); 157 return -ENOMEM; 158 } 159 160 flow_rules_cache->rule_ids_cache = 161 kvcalloc(GVE_FLOW_RULE_IDS_CACHE_SIZE, sizeof(*flow_rules_cache->rule_ids_cache), 162 GFP_KERNEL); 163 if (!flow_rules_cache->rule_ids_cache) { 164 dev_err(&priv->pdev->dev, "Cannot alloc flow rule ids cache\n"); 165 err = -ENOMEM; 166 goto free_rules_cache; 167 } 168 169 return 0; 170 171 free_rules_cache: 172 kvfree(flow_rules_cache->rules_cache); 173 flow_rules_cache->rules_cache = NULL; 174 return err; 175 } 176 177 static void gve_free_flow_rule_caches(struct gve_priv *priv) 178 { 179 struct gve_flow_rules_cache *flow_rules_cache = &priv->flow_rules_cache; 180 181 kvfree(flow_rules_cache->rule_ids_cache); 182 flow_rules_cache->rule_ids_cache = NULL; 183 kvfree(flow_rules_cache->rules_cache); 184 flow_rules_cache->rules_cache = NULL; 185 } 186 187 static int gve_alloc_counter_array(struct gve_priv *priv) 188 { 189 priv->counter_array = 190 dma_alloc_coherent(&priv->pdev->dev, 191 priv->num_event_counters * 192 sizeof(*priv->counter_array), 193 &priv->counter_array_bus, GFP_KERNEL); 194 if (!priv->counter_array) 195 return -ENOMEM; 196 197 return 0; 198 } 199 200 static void gve_free_counter_array(struct gve_priv *priv) 201 { 202 if (!priv->counter_array) 203 return; 204 205 dma_free_coherent(&priv->pdev->dev, 206 priv->num_event_counters * 207 sizeof(*priv->counter_array), 208 priv->counter_array, priv->counter_array_bus); 209 priv->counter_array = NULL; 210 } 211 212 /* NIC requests to report stats */ 213 static void gve_stats_report_task(struct work_struct *work) 214 { 215 struct gve_priv *priv = container_of(work, struct gve_priv, 216 stats_report_task); 217 if (gve_get_do_report_stats(priv)) { 218 gve_handle_report_stats(priv); 219 gve_clear_do_report_stats(priv); 220 } 221 } 222 223 static void gve_stats_report_schedule(struct gve_priv *priv) 224 { 225 if (!gve_get_probe_in_progress(priv) && 226 !gve_get_reset_in_progress(priv)) { 227 gve_set_do_report_stats(priv); 228 queue_work(priv->gve_wq, &priv->stats_report_task); 229 } 230 } 231 232 static void gve_stats_report_timer(struct timer_list *t) 233 { 234 struct gve_priv *priv = from_timer(priv, t, stats_report_timer); 235 236 mod_timer(&priv->stats_report_timer, 237 round_jiffies(jiffies + 238 msecs_to_jiffies(priv->stats_report_timer_period))); 239 gve_stats_report_schedule(priv); 240 } 241 242 static int gve_alloc_stats_report(struct gve_priv *priv) 243 { 244 int tx_stats_num, rx_stats_num; 245 246 tx_stats_num = (GVE_TX_STATS_REPORT_NUM + NIC_TX_STATS_REPORT_NUM) * 247 gve_num_tx_queues(priv); 248 rx_stats_num = (GVE_RX_STATS_REPORT_NUM + NIC_RX_STATS_REPORT_NUM) * 249 priv->rx_cfg.num_queues; 250 priv->stats_report_len = struct_size(priv->stats_report, stats, 251 size_add(tx_stats_num, rx_stats_num)); 252 priv->stats_report = 253 dma_alloc_coherent(&priv->pdev->dev, priv->stats_report_len, 254 &priv->stats_report_bus, GFP_KERNEL); 255 if (!priv->stats_report) 256 return -ENOMEM; 257 /* Set up timer for the report-stats task */ 258 timer_setup(&priv->stats_report_timer, gve_stats_report_timer, 0); 259 priv->stats_report_timer_period = GVE_STATS_REPORT_TIMER_PERIOD; 260 return 0; 261 } 262 263 static void gve_free_stats_report(struct gve_priv *priv) 264 { 265 if (!priv->stats_report) 266 return; 267 268 del_timer_sync(&priv->stats_report_timer); 269 dma_free_coherent(&priv->pdev->dev, priv->stats_report_len, 270 priv->stats_report, priv->stats_report_bus); 271 priv->stats_report = NULL; 272 } 273 274 static irqreturn_t gve_mgmnt_intr(int irq, void *arg) 275 { 276 struct gve_priv *priv = arg; 277 278 queue_work(priv->gve_wq, &priv->service_task); 279 return IRQ_HANDLED; 280 } 281 282 static irqreturn_t gve_intr(int irq, void *arg) 283 { 284 struct gve_notify_block *block = arg; 285 struct gve_priv *priv = block->priv; 286 287 iowrite32be(GVE_IRQ_MASK, gve_irq_doorbell(priv, block)); 288 napi_schedule_irqoff(&block->napi); 289 return IRQ_HANDLED; 290 } 291 292 static irqreturn_t gve_intr_dqo(int irq, void *arg) 293 { 294 struct gve_notify_block *block = arg; 295 296 /* Interrupts are automatically masked */ 297 napi_schedule_irqoff(&block->napi); 298 return IRQ_HANDLED; 299 } 300 301 static int gve_is_napi_on_home_cpu(struct gve_priv *priv, u32 irq) 302 { 303 int cpu_curr = smp_processor_id(); 304 const struct cpumask *aff_mask; 305 306 aff_mask = irq_get_effective_affinity_mask(irq); 307 if (unlikely(!aff_mask)) 308 return 1; 309 310 return cpumask_test_cpu(cpu_curr, aff_mask); 311 } 312 313 int gve_napi_poll(struct napi_struct *napi, int budget) 314 { 315 struct gve_notify_block *block; 316 __be32 __iomem *irq_doorbell; 317 bool reschedule = false; 318 struct gve_priv *priv; 319 int work_done = 0; 320 321 block = container_of(napi, struct gve_notify_block, napi); 322 priv = block->priv; 323 324 if (block->tx) { 325 if (block->tx->q_num < priv->tx_cfg.num_queues) 326 reschedule |= gve_tx_poll(block, budget); 327 else if (budget) 328 reschedule |= gve_xdp_poll(block, budget); 329 } 330 331 if (!budget) 332 return 0; 333 334 if (block->rx) { 335 work_done = gve_rx_poll(block, budget); 336 reschedule |= work_done == budget; 337 } 338 339 if (reschedule) 340 return budget; 341 342 /* Complete processing - don't unmask irq if busy polling is enabled */ 343 if (likely(napi_complete_done(napi, work_done))) { 344 irq_doorbell = gve_irq_doorbell(priv, block); 345 iowrite32be(GVE_IRQ_ACK | GVE_IRQ_EVENT, irq_doorbell); 346 347 /* Ensure IRQ ACK is visible before we check pending work. 348 * If queue had issued updates, it would be truly visible. 349 */ 350 mb(); 351 352 if (block->tx) 353 reschedule |= gve_tx_clean_pending(priv, block->tx); 354 if (block->rx) 355 reschedule |= gve_rx_work_pending(block->rx); 356 357 if (reschedule && napi_schedule(napi)) 358 iowrite32be(GVE_IRQ_MASK, irq_doorbell); 359 } 360 return work_done; 361 } 362 363 int gve_napi_poll_dqo(struct napi_struct *napi, int budget) 364 { 365 struct gve_notify_block *block = 366 container_of(napi, struct gve_notify_block, napi); 367 struct gve_priv *priv = block->priv; 368 bool reschedule = false; 369 int work_done = 0; 370 371 if (block->tx) 372 reschedule |= gve_tx_poll_dqo(block, /*do_clean=*/true); 373 374 if (!budget) 375 return 0; 376 377 if (block->rx) { 378 work_done = gve_rx_poll_dqo(block, budget); 379 reschedule |= work_done == budget; 380 } 381 382 if (reschedule) { 383 /* Reschedule by returning budget only if already on the correct 384 * cpu. 385 */ 386 if (likely(gve_is_napi_on_home_cpu(priv, block->irq))) 387 return budget; 388 389 /* If not on the cpu with which this queue's irq has affinity 390 * with, we avoid rescheduling napi and arm the irq instead so 391 * that napi gets rescheduled back eventually onto the right 392 * cpu. 393 */ 394 if (work_done == budget) 395 work_done--; 396 } 397 398 if (likely(napi_complete_done(napi, work_done))) { 399 /* Enable interrupts again. 400 * 401 * We don't need to repoll afterwards because HW supports the 402 * PCI MSI-X PBA feature. 403 * 404 * Another interrupt would be triggered if a new event came in 405 * since the last one. 406 */ 407 gve_write_irq_doorbell_dqo(priv, block, 408 GVE_ITR_NO_UPDATE_DQO | GVE_ITR_ENABLE_BIT_DQO); 409 } 410 411 return work_done; 412 } 413 414 static int gve_alloc_notify_blocks(struct gve_priv *priv) 415 { 416 int num_vecs_requested = priv->num_ntfy_blks + 1; 417 unsigned int active_cpus; 418 int vecs_enabled; 419 int i, j; 420 int err; 421 422 priv->msix_vectors = kvcalloc(num_vecs_requested, 423 sizeof(*priv->msix_vectors), GFP_KERNEL); 424 if (!priv->msix_vectors) 425 return -ENOMEM; 426 for (i = 0; i < num_vecs_requested; i++) 427 priv->msix_vectors[i].entry = i; 428 vecs_enabled = pci_enable_msix_range(priv->pdev, priv->msix_vectors, 429 GVE_MIN_MSIX, num_vecs_requested); 430 if (vecs_enabled < 0) { 431 dev_err(&priv->pdev->dev, "Could not enable min msix %d/%d\n", 432 GVE_MIN_MSIX, vecs_enabled); 433 err = vecs_enabled; 434 goto abort_with_msix_vectors; 435 } 436 if (vecs_enabled != num_vecs_requested) { 437 int new_num_ntfy_blks = (vecs_enabled - 1) & ~0x1; 438 int vecs_per_type = new_num_ntfy_blks / 2; 439 int vecs_left = new_num_ntfy_blks % 2; 440 441 priv->num_ntfy_blks = new_num_ntfy_blks; 442 priv->mgmt_msix_idx = priv->num_ntfy_blks; 443 priv->tx_cfg.max_queues = min_t(int, priv->tx_cfg.max_queues, 444 vecs_per_type); 445 priv->rx_cfg.max_queues = min_t(int, priv->rx_cfg.max_queues, 446 vecs_per_type + vecs_left); 447 dev_err(&priv->pdev->dev, 448 "Could not enable desired msix, only enabled %d, adjusting tx max queues to %d, and rx max queues to %d\n", 449 vecs_enabled, priv->tx_cfg.max_queues, 450 priv->rx_cfg.max_queues); 451 if (priv->tx_cfg.num_queues > priv->tx_cfg.max_queues) 452 priv->tx_cfg.num_queues = priv->tx_cfg.max_queues; 453 if (priv->rx_cfg.num_queues > priv->rx_cfg.max_queues) 454 priv->rx_cfg.num_queues = priv->rx_cfg.max_queues; 455 } 456 /* Half the notification blocks go to TX and half to RX */ 457 active_cpus = min_t(int, priv->num_ntfy_blks / 2, num_online_cpus()); 458 459 /* Setup Management Vector - the last vector */ 460 snprintf(priv->mgmt_msix_name, sizeof(priv->mgmt_msix_name), "gve-mgmnt@pci:%s", 461 pci_name(priv->pdev)); 462 err = request_irq(priv->msix_vectors[priv->mgmt_msix_idx].vector, 463 gve_mgmnt_intr, 0, priv->mgmt_msix_name, priv); 464 if (err) { 465 dev_err(&priv->pdev->dev, "Did not receive management vector.\n"); 466 goto abort_with_msix_enabled; 467 } 468 priv->irq_db_indices = 469 dma_alloc_coherent(&priv->pdev->dev, 470 priv->num_ntfy_blks * 471 sizeof(*priv->irq_db_indices), 472 &priv->irq_db_indices_bus, GFP_KERNEL); 473 if (!priv->irq_db_indices) { 474 err = -ENOMEM; 475 goto abort_with_mgmt_vector; 476 } 477 478 priv->ntfy_blocks = kvzalloc(priv->num_ntfy_blks * 479 sizeof(*priv->ntfy_blocks), GFP_KERNEL); 480 if (!priv->ntfy_blocks) { 481 err = -ENOMEM; 482 goto abort_with_irq_db_indices; 483 } 484 485 /* Setup the other blocks - the first n-1 vectors */ 486 for (i = 0; i < priv->num_ntfy_blks; i++) { 487 struct gve_notify_block *block = &priv->ntfy_blocks[i]; 488 int msix_idx = i; 489 490 snprintf(block->name, sizeof(block->name), "gve-ntfy-blk%d@pci:%s", 491 i, pci_name(priv->pdev)); 492 block->priv = priv; 493 err = request_irq(priv->msix_vectors[msix_idx].vector, 494 gve_is_gqi(priv) ? gve_intr : gve_intr_dqo, 495 0, block->name, block); 496 if (err) { 497 dev_err(&priv->pdev->dev, 498 "Failed to receive msix vector %d\n", i); 499 goto abort_with_some_ntfy_blocks; 500 } 501 block->irq = priv->msix_vectors[msix_idx].vector; 502 irq_set_affinity_hint(priv->msix_vectors[msix_idx].vector, 503 get_cpu_mask(i % active_cpus)); 504 block->irq_db_index = &priv->irq_db_indices[i].index; 505 } 506 return 0; 507 abort_with_some_ntfy_blocks: 508 for (j = 0; j < i; j++) { 509 struct gve_notify_block *block = &priv->ntfy_blocks[j]; 510 int msix_idx = j; 511 512 irq_set_affinity_hint(priv->msix_vectors[msix_idx].vector, 513 NULL); 514 free_irq(priv->msix_vectors[msix_idx].vector, block); 515 block->irq = 0; 516 } 517 kvfree(priv->ntfy_blocks); 518 priv->ntfy_blocks = NULL; 519 abort_with_irq_db_indices: 520 dma_free_coherent(&priv->pdev->dev, priv->num_ntfy_blks * 521 sizeof(*priv->irq_db_indices), 522 priv->irq_db_indices, priv->irq_db_indices_bus); 523 priv->irq_db_indices = NULL; 524 abort_with_mgmt_vector: 525 free_irq(priv->msix_vectors[priv->mgmt_msix_idx].vector, priv); 526 abort_with_msix_enabled: 527 pci_disable_msix(priv->pdev); 528 abort_with_msix_vectors: 529 kvfree(priv->msix_vectors); 530 priv->msix_vectors = NULL; 531 return err; 532 } 533 534 static void gve_free_notify_blocks(struct gve_priv *priv) 535 { 536 int i; 537 538 if (!priv->msix_vectors) 539 return; 540 541 /* Free the irqs */ 542 for (i = 0; i < priv->num_ntfy_blks; i++) { 543 struct gve_notify_block *block = &priv->ntfy_blocks[i]; 544 int msix_idx = i; 545 546 irq_set_affinity_hint(priv->msix_vectors[msix_idx].vector, 547 NULL); 548 free_irq(priv->msix_vectors[msix_idx].vector, block); 549 block->irq = 0; 550 } 551 free_irq(priv->msix_vectors[priv->mgmt_msix_idx].vector, priv); 552 kvfree(priv->ntfy_blocks); 553 priv->ntfy_blocks = NULL; 554 dma_free_coherent(&priv->pdev->dev, priv->num_ntfy_blks * 555 sizeof(*priv->irq_db_indices), 556 priv->irq_db_indices, priv->irq_db_indices_bus); 557 priv->irq_db_indices = NULL; 558 pci_disable_msix(priv->pdev); 559 kvfree(priv->msix_vectors); 560 priv->msix_vectors = NULL; 561 } 562 563 static int gve_setup_device_resources(struct gve_priv *priv) 564 { 565 int err; 566 567 err = gve_alloc_flow_rule_caches(priv); 568 if (err) 569 return err; 570 err = gve_alloc_counter_array(priv); 571 if (err) 572 goto abort_with_flow_rule_caches; 573 err = gve_alloc_notify_blocks(priv); 574 if (err) 575 goto abort_with_counter; 576 err = gve_alloc_stats_report(priv); 577 if (err) 578 goto abort_with_ntfy_blocks; 579 err = gve_adminq_configure_device_resources(priv, 580 priv->counter_array_bus, 581 priv->num_event_counters, 582 priv->irq_db_indices_bus, 583 priv->num_ntfy_blks); 584 if (unlikely(err)) { 585 dev_err(&priv->pdev->dev, 586 "could not setup device_resources: err=%d\n", err); 587 err = -ENXIO; 588 goto abort_with_stats_report; 589 } 590 591 if (!gve_is_gqi(priv)) { 592 priv->ptype_lut_dqo = kvzalloc(sizeof(*priv->ptype_lut_dqo), 593 GFP_KERNEL); 594 if (!priv->ptype_lut_dqo) { 595 err = -ENOMEM; 596 goto abort_with_stats_report; 597 } 598 err = gve_adminq_get_ptype_map_dqo(priv, priv->ptype_lut_dqo); 599 if (err) { 600 dev_err(&priv->pdev->dev, 601 "Failed to get ptype map: err=%d\n", err); 602 goto abort_with_ptype_lut; 603 } 604 } 605 606 err = gve_adminq_report_stats(priv, priv->stats_report_len, 607 priv->stats_report_bus, 608 GVE_STATS_REPORT_TIMER_PERIOD); 609 if (err) 610 dev_err(&priv->pdev->dev, 611 "Failed to report stats: err=%d\n", err); 612 gve_set_device_resources_ok(priv); 613 return 0; 614 615 abort_with_ptype_lut: 616 kvfree(priv->ptype_lut_dqo); 617 priv->ptype_lut_dqo = NULL; 618 abort_with_stats_report: 619 gve_free_stats_report(priv); 620 abort_with_ntfy_blocks: 621 gve_free_notify_blocks(priv); 622 abort_with_counter: 623 gve_free_counter_array(priv); 624 abort_with_flow_rule_caches: 625 gve_free_flow_rule_caches(priv); 626 627 return err; 628 } 629 630 static void gve_trigger_reset(struct gve_priv *priv); 631 632 static void gve_teardown_device_resources(struct gve_priv *priv) 633 { 634 int err; 635 636 /* Tell device its resources are being freed */ 637 if (gve_get_device_resources_ok(priv)) { 638 err = gve_flow_rules_reset(priv); 639 if (err) { 640 dev_err(&priv->pdev->dev, 641 "Failed to reset flow rules: err=%d\n", err); 642 gve_trigger_reset(priv); 643 } 644 /* detach the stats report */ 645 err = gve_adminq_report_stats(priv, 0, 0x0, GVE_STATS_REPORT_TIMER_PERIOD); 646 if (err) { 647 dev_err(&priv->pdev->dev, 648 "Failed to detach stats report: err=%d\n", err); 649 gve_trigger_reset(priv); 650 } 651 err = gve_adminq_deconfigure_device_resources(priv); 652 if (err) { 653 dev_err(&priv->pdev->dev, 654 "Could not deconfigure device resources: err=%d\n", 655 err); 656 gve_trigger_reset(priv); 657 } 658 } 659 660 kvfree(priv->ptype_lut_dqo); 661 priv->ptype_lut_dqo = NULL; 662 663 gve_free_flow_rule_caches(priv); 664 gve_free_counter_array(priv); 665 gve_free_notify_blocks(priv); 666 gve_free_stats_report(priv); 667 gve_clear_device_resources_ok(priv); 668 } 669 670 static int gve_unregister_qpl(struct gve_priv *priv, 671 struct gve_queue_page_list *qpl) 672 { 673 int err; 674 675 if (!qpl) 676 return 0; 677 678 err = gve_adminq_unregister_page_list(priv, qpl->id); 679 if (err) { 680 netif_err(priv, drv, priv->dev, 681 "Failed to unregister queue page list %d\n", 682 qpl->id); 683 return err; 684 } 685 686 priv->num_registered_pages -= qpl->num_entries; 687 return 0; 688 } 689 690 static int gve_register_qpl(struct gve_priv *priv, 691 struct gve_queue_page_list *qpl) 692 { 693 int pages; 694 int err; 695 696 if (!qpl) 697 return 0; 698 699 pages = qpl->num_entries; 700 701 if (pages + priv->num_registered_pages > priv->max_registered_pages) { 702 netif_err(priv, drv, priv->dev, 703 "Reached max number of registered pages %llu > %llu\n", 704 pages + priv->num_registered_pages, 705 priv->max_registered_pages); 706 return -EINVAL; 707 } 708 709 err = gve_adminq_register_page_list(priv, qpl); 710 if (err) { 711 netif_err(priv, drv, priv->dev, 712 "failed to register queue page list %d\n", 713 qpl->id); 714 return err; 715 } 716 717 priv->num_registered_pages += pages; 718 return 0; 719 } 720 721 static struct gve_queue_page_list *gve_tx_get_qpl(struct gve_priv *priv, int idx) 722 { 723 struct gve_tx_ring *tx = &priv->tx[idx]; 724 725 if (gve_is_gqi(priv)) 726 return tx->tx_fifo.qpl; 727 else 728 return tx->dqo.qpl; 729 } 730 731 static struct gve_queue_page_list *gve_rx_get_qpl(struct gve_priv *priv, int idx) 732 { 733 struct gve_rx_ring *rx = &priv->rx[idx]; 734 735 if (gve_is_gqi(priv)) 736 return rx->data.qpl; 737 else 738 return rx->dqo.qpl; 739 } 740 741 static int gve_register_xdp_qpls(struct gve_priv *priv) 742 { 743 int start_id; 744 int err; 745 int i; 746 747 start_id = gve_xdp_tx_start_queue_id(priv); 748 for (i = start_id; i < start_id + gve_num_xdp_qpls(priv); i++) { 749 err = gve_register_qpl(priv, gve_tx_get_qpl(priv, i)); 750 /* This failure will trigger a reset - no need to clean up */ 751 if (err) 752 return err; 753 } 754 return 0; 755 } 756 757 static int gve_register_qpls(struct gve_priv *priv) 758 { 759 int num_tx_qpls, num_rx_qpls; 760 int err; 761 int i; 762 763 num_tx_qpls = gve_num_tx_qpls(&priv->tx_cfg, gve_num_xdp_qpls(priv), 764 gve_is_qpl(priv)); 765 num_rx_qpls = gve_num_rx_qpls(&priv->rx_cfg, gve_is_qpl(priv)); 766 767 for (i = 0; i < num_tx_qpls; i++) { 768 err = gve_register_qpl(priv, gve_tx_get_qpl(priv, i)); 769 if (err) 770 return err; 771 } 772 773 for (i = 0; i < num_rx_qpls; i++) { 774 err = gve_register_qpl(priv, gve_rx_get_qpl(priv, i)); 775 if (err) 776 return err; 777 } 778 779 return 0; 780 } 781 782 static int gve_unregister_xdp_qpls(struct gve_priv *priv) 783 { 784 int start_id; 785 int err; 786 int i; 787 788 start_id = gve_xdp_tx_start_queue_id(priv); 789 for (i = start_id; i < start_id + gve_num_xdp_qpls(priv); i++) { 790 err = gve_unregister_qpl(priv, gve_tx_get_qpl(priv, i)); 791 /* This failure will trigger a reset - no need to clean */ 792 if (err) 793 return err; 794 } 795 return 0; 796 } 797 798 static int gve_unregister_qpls(struct gve_priv *priv) 799 { 800 int num_tx_qpls, num_rx_qpls; 801 int err; 802 int i; 803 804 num_tx_qpls = gve_num_tx_qpls(&priv->tx_cfg, gve_num_xdp_qpls(priv), 805 gve_is_qpl(priv)); 806 num_rx_qpls = gve_num_rx_qpls(&priv->rx_cfg, gve_is_qpl(priv)); 807 808 for (i = 0; i < num_tx_qpls; i++) { 809 err = gve_unregister_qpl(priv, gve_tx_get_qpl(priv, i)); 810 /* This failure will trigger a reset - no need to clean */ 811 if (err) 812 return err; 813 } 814 815 for (i = 0; i < num_rx_qpls; i++) { 816 err = gve_unregister_qpl(priv, gve_rx_get_qpl(priv, i)); 817 /* This failure will trigger a reset - no need to clean */ 818 if (err) 819 return err; 820 } 821 return 0; 822 } 823 824 static int gve_create_xdp_rings(struct gve_priv *priv) 825 { 826 int err; 827 828 err = gve_adminq_create_tx_queues(priv, 829 gve_xdp_tx_start_queue_id(priv), 830 priv->num_xdp_queues); 831 if (err) { 832 netif_err(priv, drv, priv->dev, "failed to create %d XDP tx queues\n", 833 priv->num_xdp_queues); 834 /* This failure will trigger a reset - no need to clean 835 * up 836 */ 837 return err; 838 } 839 netif_dbg(priv, drv, priv->dev, "created %d XDP tx queues\n", 840 priv->num_xdp_queues); 841 842 return 0; 843 } 844 845 static int gve_create_rings(struct gve_priv *priv) 846 { 847 int num_tx_queues = gve_num_tx_queues(priv); 848 int err; 849 int i; 850 851 err = gve_adminq_create_tx_queues(priv, 0, num_tx_queues); 852 if (err) { 853 netif_err(priv, drv, priv->dev, "failed to create %d tx queues\n", 854 num_tx_queues); 855 /* This failure will trigger a reset - no need to clean 856 * up 857 */ 858 return err; 859 } 860 netif_dbg(priv, drv, priv->dev, "created %d tx queues\n", 861 num_tx_queues); 862 863 err = gve_adminq_create_rx_queues(priv, priv->rx_cfg.num_queues); 864 if (err) { 865 netif_err(priv, drv, priv->dev, "failed to create %d rx queues\n", 866 priv->rx_cfg.num_queues); 867 /* This failure will trigger a reset - no need to clean 868 * up 869 */ 870 return err; 871 } 872 netif_dbg(priv, drv, priv->dev, "created %d rx queues\n", 873 priv->rx_cfg.num_queues); 874 875 if (gve_is_gqi(priv)) { 876 /* Rx data ring has been prefilled with packet buffers at queue 877 * allocation time. 878 * 879 * Write the doorbell to provide descriptor slots and packet 880 * buffers to the NIC. 881 */ 882 for (i = 0; i < priv->rx_cfg.num_queues; i++) 883 gve_rx_write_doorbell(priv, &priv->rx[i]); 884 } else { 885 for (i = 0; i < priv->rx_cfg.num_queues; i++) { 886 /* Post buffers and ring doorbell. */ 887 gve_rx_post_buffers_dqo(&priv->rx[i]); 888 } 889 } 890 891 return 0; 892 } 893 894 static void init_xdp_sync_stats(struct gve_priv *priv) 895 { 896 int start_id = gve_xdp_tx_start_queue_id(priv); 897 int i; 898 899 /* Init stats */ 900 for (i = start_id; i < start_id + priv->num_xdp_queues; i++) { 901 int ntfy_idx = gve_tx_idx_to_ntfy(priv, i); 902 903 u64_stats_init(&priv->tx[i].statss); 904 priv->tx[i].ntfy_id = ntfy_idx; 905 } 906 } 907 908 static void gve_init_sync_stats(struct gve_priv *priv) 909 { 910 int i; 911 912 for (i = 0; i < priv->tx_cfg.num_queues; i++) 913 u64_stats_init(&priv->tx[i].statss); 914 915 /* Init stats for XDP TX queues */ 916 init_xdp_sync_stats(priv); 917 918 for (i = 0; i < priv->rx_cfg.num_queues; i++) 919 u64_stats_init(&priv->rx[i].statss); 920 } 921 922 static void gve_tx_get_curr_alloc_cfg(struct gve_priv *priv, 923 struct gve_tx_alloc_rings_cfg *cfg) 924 { 925 cfg->qcfg = &priv->tx_cfg; 926 cfg->raw_addressing = !gve_is_qpl(priv); 927 cfg->ring_size = priv->tx_desc_cnt; 928 cfg->start_idx = 0; 929 cfg->num_rings = gve_num_tx_queues(priv); 930 cfg->tx = priv->tx; 931 } 932 933 static void gve_tx_stop_rings(struct gve_priv *priv, int start_id, int num_rings) 934 { 935 int i; 936 937 if (!priv->tx) 938 return; 939 940 for (i = start_id; i < start_id + num_rings; i++) { 941 if (gve_is_gqi(priv)) 942 gve_tx_stop_ring_gqi(priv, i); 943 else 944 gve_tx_stop_ring_dqo(priv, i); 945 } 946 } 947 948 static void gve_tx_start_rings(struct gve_priv *priv, int start_id, 949 int num_rings) 950 { 951 int i; 952 953 for (i = start_id; i < start_id + num_rings; i++) { 954 if (gve_is_gqi(priv)) 955 gve_tx_start_ring_gqi(priv, i); 956 else 957 gve_tx_start_ring_dqo(priv, i); 958 } 959 } 960 961 static int gve_alloc_xdp_rings(struct gve_priv *priv) 962 { 963 struct gve_tx_alloc_rings_cfg cfg = {0}; 964 int err = 0; 965 966 if (!priv->num_xdp_queues) 967 return 0; 968 969 gve_tx_get_curr_alloc_cfg(priv, &cfg); 970 cfg.start_idx = gve_xdp_tx_start_queue_id(priv); 971 cfg.num_rings = priv->num_xdp_queues; 972 973 err = gve_tx_alloc_rings_gqi(priv, &cfg); 974 if (err) 975 return err; 976 977 gve_tx_start_rings(priv, cfg.start_idx, cfg.num_rings); 978 init_xdp_sync_stats(priv); 979 980 return 0; 981 } 982 983 static int gve_queues_mem_alloc(struct gve_priv *priv, 984 struct gve_tx_alloc_rings_cfg *tx_alloc_cfg, 985 struct gve_rx_alloc_rings_cfg *rx_alloc_cfg) 986 { 987 int err; 988 989 if (gve_is_gqi(priv)) 990 err = gve_tx_alloc_rings_gqi(priv, tx_alloc_cfg); 991 else 992 err = gve_tx_alloc_rings_dqo(priv, tx_alloc_cfg); 993 if (err) 994 return err; 995 996 if (gve_is_gqi(priv)) 997 err = gve_rx_alloc_rings_gqi(priv, rx_alloc_cfg); 998 else 999 err = gve_rx_alloc_rings_dqo(priv, rx_alloc_cfg); 1000 if (err) 1001 goto free_tx; 1002 1003 return 0; 1004 1005 free_tx: 1006 if (gve_is_gqi(priv)) 1007 gve_tx_free_rings_gqi(priv, tx_alloc_cfg); 1008 else 1009 gve_tx_free_rings_dqo(priv, tx_alloc_cfg); 1010 return err; 1011 } 1012 1013 static int gve_destroy_xdp_rings(struct gve_priv *priv) 1014 { 1015 int start_id; 1016 int err; 1017 1018 start_id = gve_xdp_tx_start_queue_id(priv); 1019 err = gve_adminq_destroy_tx_queues(priv, 1020 start_id, 1021 priv->num_xdp_queues); 1022 if (err) { 1023 netif_err(priv, drv, priv->dev, 1024 "failed to destroy XDP queues\n"); 1025 /* This failure will trigger a reset - no need to clean up */ 1026 return err; 1027 } 1028 netif_dbg(priv, drv, priv->dev, "destroyed XDP queues\n"); 1029 1030 return 0; 1031 } 1032 1033 static int gve_destroy_rings(struct gve_priv *priv) 1034 { 1035 int num_tx_queues = gve_num_tx_queues(priv); 1036 int err; 1037 1038 err = gve_adminq_destroy_tx_queues(priv, 0, num_tx_queues); 1039 if (err) { 1040 netif_err(priv, drv, priv->dev, 1041 "failed to destroy tx queues\n"); 1042 /* This failure will trigger a reset - no need to clean up */ 1043 return err; 1044 } 1045 netif_dbg(priv, drv, priv->dev, "destroyed tx queues\n"); 1046 err = gve_adminq_destroy_rx_queues(priv, priv->rx_cfg.num_queues); 1047 if (err) { 1048 netif_err(priv, drv, priv->dev, 1049 "failed to destroy rx queues\n"); 1050 /* This failure will trigger a reset - no need to clean up */ 1051 return err; 1052 } 1053 netif_dbg(priv, drv, priv->dev, "destroyed rx queues\n"); 1054 return 0; 1055 } 1056 1057 static void gve_free_xdp_rings(struct gve_priv *priv) 1058 { 1059 struct gve_tx_alloc_rings_cfg cfg = {0}; 1060 1061 gve_tx_get_curr_alloc_cfg(priv, &cfg); 1062 cfg.start_idx = gve_xdp_tx_start_queue_id(priv); 1063 cfg.num_rings = priv->num_xdp_queues; 1064 1065 if (priv->tx) { 1066 gve_tx_stop_rings(priv, cfg.start_idx, cfg.num_rings); 1067 gve_tx_free_rings_gqi(priv, &cfg); 1068 } 1069 } 1070 1071 static void gve_queues_mem_free(struct gve_priv *priv, 1072 struct gve_tx_alloc_rings_cfg *tx_cfg, 1073 struct gve_rx_alloc_rings_cfg *rx_cfg) 1074 { 1075 if (gve_is_gqi(priv)) { 1076 gve_tx_free_rings_gqi(priv, tx_cfg); 1077 gve_rx_free_rings_gqi(priv, rx_cfg); 1078 } else { 1079 gve_tx_free_rings_dqo(priv, tx_cfg); 1080 gve_rx_free_rings_dqo(priv, rx_cfg); 1081 } 1082 } 1083 1084 int gve_alloc_page(struct gve_priv *priv, struct device *dev, 1085 struct page **page, dma_addr_t *dma, 1086 enum dma_data_direction dir, gfp_t gfp_flags) 1087 { 1088 *page = alloc_page(gfp_flags); 1089 if (!*page) { 1090 priv->page_alloc_fail++; 1091 return -ENOMEM; 1092 } 1093 *dma = dma_map_page(dev, *page, 0, PAGE_SIZE, dir); 1094 if (dma_mapping_error(dev, *dma)) { 1095 priv->dma_mapping_error++; 1096 put_page(*page); 1097 return -ENOMEM; 1098 } 1099 return 0; 1100 } 1101 1102 struct gve_queue_page_list *gve_alloc_queue_page_list(struct gve_priv *priv, 1103 u32 id, int pages) 1104 { 1105 struct gve_queue_page_list *qpl; 1106 int err; 1107 int i; 1108 1109 qpl = kvzalloc(sizeof(*qpl), GFP_KERNEL); 1110 if (!qpl) 1111 return NULL; 1112 1113 qpl->id = id; 1114 qpl->num_entries = 0; 1115 qpl->pages = kvcalloc(pages, sizeof(*qpl->pages), GFP_KERNEL); 1116 if (!qpl->pages) 1117 goto abort; 1118 1119 qpl->page_buses = kvcalloc(pages, sizeof(*qpl->page_buses), GFP_KERNEL); 1120 if (!qpl->page_buses) 1121 goto abort; 1122 1123 for (i = 0; i < pages; i++) { 1124 err = gve_alloc_page(priv, &priv->pdev->dev, &qpl->pages[i], 1125 &qpl->page_buses[i], 1126 gve_qpl_dma_dir(priv, id), GFP_KERNEL); 1127 if (err) 1128 goto abort; 1129 qpl->num_entries++; 1130 } 1131 1132 return qpl; 1133 1134 abort: 1135 gve_free_queue_page_list(priv, qpl, id); 1136 return NULL; 1137 } 1138 1139 void gve_free_page(struct device *dev, struct page *page, dma_addr_t dma, 1140 enum dma_data_direction dir) 1141 { 1142 if (!dma_mapping_error(dev, dma)) 1143 dma_unmap_page(dev, dma, PAGE_SIZE, dir); 1144 if (page) 1145 put_page(page); 1146 } 1147 1148 void gve_free_queue_page_list(struct gve_priv *priv, 1149 struct gve_queue_page_list *qpl, 1150 u32 id) 1151 { 1152 int i; 1153 1154 if (!qpl) 1155 return; 1156 if (!qpl->pages) 1157 goto free_qpl; 1158 if (!qpl->page_buses) 1159 goto free_pages; 1160 1161 for (i = 0; i < qpl->num_entries; i++) 1162 gve_free_page(&priv->pdev->dev, qpl->pages[i], 1163 qpl->page_buses[i], gve_qpl_dma_dir(priv, id)); 1164 1165 kvfree(qpl->page_buses); 1166 qpl->page_buses = NULL; 1167 free_pages: 1168 kvfree(qpl->pages); 1169 qpl->pages = NULL; 1170 free_qpl: 1171 kvfree(qpl); 1172 } 1173 1174 /* Use this to schedule a reset when the device is capable of continuing 1175 * to handle other requests in its current state. If it is not, do a reset 1176 * in thread instead. 1177 */ 1178 void gve_schedule_reset(struct gve_priv *priv) 1179 { 1180 gve_set_do_reset(priv); 1181 queue_work(priv->gve_wq, &priv->service_task); 1182 } 1183 1184 static void gve_reset_and_teardown(struct gve_priv *priv, bool was_up); 1185 static int gve_reset_recovery(struct gve_priv *priv, bool was_up); 1186 static void gve_turndown(struct gve_priv *priv); 1187 static void gve_turnup(struct gve_priv *priv); 1188 1189 static int gve_reg_xdp_info(struct gve_priv *priv, struct net_device *dev) 1190 { 1191 struct napi_struct *napi; 1192 struct gve_rx_ring *rx; 1193 int err = 0; 1194 int i, j; 1195 u32 tx_qid; 1196 1197 if (!priv->num_xdp_queues) 1198 return 0; 1199 1200 for (i = 0; i < priv->rx_cfg.num_queues; i++) { 1201 rx = &priv->rx[i]; 1202 napi = &priv->ntfy_blocks[rx->ntfy_id].napi; 1203 1204 err = xdp_rxq_info_reg(&rx->xdp_rxq, dev, i, 1205 napi->napi_id); 1206 if (err) 1207 goto err; 1208 err = xdp_rxq_info_reg_mem_model(&rx->xdp_rxq, 1209 MEM_TYPE_PAGE_SHARED, NULL); 1210 if (err) 1211 goto err; 1212 rx->xsk_pool = xsk_get_pool_from_qid(dev, i); 1213 if (rx->xsk_pool) { 1214 err = xdp_rxq_info_reg(&rx->xsk_rxq, dev, i, 1215 napi->napi_id); 1216 if (err) 1217 goto err; 1218 err = xdp_rxq_info_reg_mem_model(&rx->xsk_rxq, 1219 MEM_TYPE_XSK_BUFF_POOL, NULL); 1220 if (err) 1221 goto err; 1222 xsk_pool_set_rxq_info(rx->xsk_pool, 1223 &rx->xsk_rxq); 1224 } 1225 } 1226 1227 for (i = 0; i < priv->num_xdp_queues; i++) { 1228 tx_qid = gve_xdp_tx_queue_id(priv, i); 1229 priv->tx[tx_qid].xsk_pool = xsk_get_pool_from_qid(dev, i); 1230 } 1231 return 0; 1232 1233 err: 1234 for (j = i; j >= 0; j--) { 1235 rx = &priv->rx[j]; 1236 if (xdp_rxq_info_is_reg(&rx->xdp_rxq)) 1237 xdp_rxq_info_unreg(&rx->xdp_rxq); 1238 if (xdp_rxq_info_is_reg(&rx->xsk_rxq)) 1239 xdp_rxq_info_unreg(&rx->xsk_rxq); 1240 } 1241 return err; 1242 } 1243 1244 static void gve_unreg_xdp_info(struct gve_priv *priv) 1245 { 1246 int i, tx_qid; 1247 1248 if (!priv->num_xdp_queues) 1249 return; 1250 1251 for (i = 0; i < priv->rx_cfg.num_queues; i++) { 1252 struct gve_rx_ring *rx = &priv->rx[i]; 1253 1254 xdp_rxq_info_unreg(&rx->xdp_rxq); 1255 if (rx->xsk_pool) { 1256 xdp_rxq_info_unreg(&rx->xsk_rxq); 1257 rx->xsk_pool = NULL; 1258 } 1259 } 1260 1261 for (i = 0; i < priv->num_xdp_queues; i++) { 1262 tx_qid = gve_xdp_tx_queue_id(priv, i); 1263 priv->tx[tx_qid].xsk_pool = NULL; 1264 } 1265 } 1266 1267 static void gve_drain_page_cache(struct gve_priv *priv) 1268 { 1269 int i; 1270 1271 for (i = 0; i < priv->rx_cfg.num_queues; i++) 1272 page_frag_cache_drain(&priv->rx[i].page_cache); 1273 } 1274 1275 static void gve_rx_get_curr_alloc_cfg(struct gve_priv *priv, 1276 struct gve_rx_alloc_rings_cfg *cfg) 1277 { 1278 cfg->qcfg = &priv->rx_cfg; 1279 cfg->qcfg_tx = &priv->tx_cfg; 1280 cfg->raw_addressing = !gve_is_qpl(priv); 1281 cfg->enable_header_split = priv->header_split_enabled; 1282 cfg->ring_size = priv->rx_desc_cnt; 1283 cfg->packet_buffer_size = gve_is_gqi(priv) ? 1284 GVE_DEFAULT_RX_BUFFER_SIZE : 1285 priv->data_buffer_size_dqo; 1286 cfg->rx = priv->rx; 1287 } 1288 1289 void gve_get_curr_alloc_cfgs(struct gve_priv *priv, 1290 struct gve_tx_alloc_rings_cfg *tx_alloc_cfg, 1291 struct gve_rx_alloc_rings_cfg *rx_alloc_cfg) 1292 { 1293 gve_tx_get_curr_alloc_cfg(priv, tx_alloc_cfg); 1294 gve_rx_get_curr_alloc_cfg(priv, rx_alloc_cfg); 1295 } 1296 1297 static void gve_rx_start_ring(struct gve_priv *priv, int i) 1298 { 1299 if (gve_is_gqi(priv)) 1300 gve_rx_start_ring_gqi(priv, i); 1301 else 1302 gve_rx_start_ring_dqo(priv, i); 1303 } 1304 1305 static void gve_rx_start_rings(struct gve_priv *priv, int num_rings) 1306 { 1307 int i; 1308 1309 for (i = 0; i < num_rings; i++) 1310 gve_rx_start_ring(priv, i); 1311 } 1312 1313 static void gve_rx_stop_ring(struct gve_priv *priv, int i) 1314 { 1315 if (gve_is_gqi(priv)) 1316 gve_rx_stop_ring_gqi(priv, i); 1317 else 1318 gve_rx_stop_ring_dqo(priv, i); 1319 } 1320 1321 static void gve_rx_stop_rings(struct gve_priv *priv, int num_rings) 1322 { 1323 int i; 1324 1325 if (!priv->rx) 1326 return; 1327 1328 for (i = 0; i < num_rings; i++) 1329 gve_rx_stop_ring(priv, i); 1330 } 1331 1332 static void gve_queues_mem_remove(struct gve_priv *priv) 1333 { 1334 struct gve_tx_alloc_rings_cfg tx_alloc_cfg = {0}; 1335 struct gve_rx_alloc_rings_cfg rx_alloc_cfg = {0}; 1336 1337 gve_get_curr_alloc_cfgs(priv, &tx_alloc_cfg, &rx_alloc_cfg); 1338 gve_queues_mem_free(priv, &tx_alloc_cfg, &rx_alloc_cfg); 1339 priv->tx = NULL; 1340 priv->rx = NULL; 1341 } 1342 1343 /* The passed-in queue memory is stored into priv and the queues are made live. 1344 * No memory is allocated. Passed-in memory is freed on errors. 1345 */ 1346 static int gve_queues_start(struct gve_priv *priv, 1347 struct gve_tx_alloc_rings_cfg *tx_alloc_cfg, 1348 struct gve_rx_alloc_rings_cfg *rx_alloc_cfg) 1349 { 1350 struct net_device *dev = priv->dev; 1351 int err; 1352 1353 /* Record new resources into priv */ 1354 priv->tx = tx_alloc_cfg->tx; 1355 priv->rx = rx_alloc_cfg->rx; 1356 1357 /* Record new configs into priv */ 1358 priv->tx_cfg = *tx_alloc_cfg->qcfg; 1359 priv->rx_cfg = *rx_alloc_cfg->qcfg; 1360 priv->tx_desc_cnt = tx_alloc_cfg->ring_size; 1361 priv->rx_desc_cnt = rx_alloc_cfg->ring_size; 1362 1363 if (priv->xdp_prog) 1364 priv->num_xdp_queues = priv->rx_cfg.num_queues; 1365 else 1366 priv->num_xdp_queues = 0; 1367 1368 gve_tx_start_rings(priv, 0, tx_alloc_cfg->num_rings); 1369 gve_rx_start_rings(priv, rx_alloc_cfg->qcfg->num_queues); 1370 gve_init_sync_stats(priv); 1371 1372 err = netif_set_real_num_tx_queues(dev, priv->tx_cfg.num_queues); 1373 if (err) 1374 goto stop_and_free_rings; 1375 err = netif_set_real_num_rx_queues(dev, priv->rx_cfg.num_queues); 1376 if (err) 1377 goto stop_and_free_rings; 1378 1379 err = gve_reg_xdp_info(priv, dev); 1380 if (err) 1381 goto stop_and_free_rings; 1382 1383 err = gve_register_qpls(priv); 1384 if (err) 1385 goto reset; 1386 1387 priv->header_split_enabled = rx_alloc_cfg->enable_header_split; 1388 priv->data_buffer_size_dqo = rx_alloc_cfg->packet_buffer_size; 1389 1390 err = gve_create_rings(priv); 1391 if (err) 1392 goto reset; 1393 1394 gve_set_device_rings_ok(priv); 1395 1396 if (gve_get_report_stats(priv)) 1397 mod_timer(&priv->stats_report_timer, 1398 round_jiffies(jiffies + 1399 msecs_to_jiffies(priv->stats_report_timer_period))); 1400 1401 gve_turnup(priv); 1402 queue_work(priv->gve_wq, &priv->service_task); 1403 priv->interface_up_cnt++; 1404 return 0; 1405 1406 reset: 1407 if (gve_get_reset_in_progress(priv)) 1408 goto stop_and_free_rings; 1409 gve_reset_and_teardown(priv, true); 1410 /* if this fails there is nothing we can do so just ignore the return */ 1411 gve_reset_recovery(priv, false); 1412 /* return the original error */ 1413 return err; 1414 stop_and_free_rings: 1415 gve_tx_stop_rings(priv, 0, gve_num_tx_queues(priv)); 1416 gve_rx_stop_rings(priv, priv->rx_cfg.num_queues); 1417 gve_queues_mem_remove(priv); 1418 return err; 1419 } 1420 1421 static int gve_open(struct net_device *dev) 1422 { 1423 struct gve_tx_alloc_rings_cfg tx_alloc_cfg = {0}; 1424 struct gve_rx_alloc_rings_cfg rx_alloc_cfg = {0}; 1425 struct gve_priv *priv = netdev_priv(dev); 1426 int err; 1427 1428 gve_get_curr_alloc_cfgs(priv, &tx_alloc_cfg, &rx_alloc_cfg); 1429 1430 err = gve_queues_mem_alloc(priv, &tx_alloc_cfg, &rx_alloc_cfg); 1431 if (err) 1432 return err; 1433 1434 /* No need to free on error: ownership of resources is lost after 1435 * calling gve_queues_start. 1436 */ 1437 err = gve_queues_start(priv, &tx_alloc_cfg, &rx_alloc_cfg); 1438 if (err) 1439 return err; 1440 1441 return 0; 1442 } 1443 1444 static int gve_queues_stop(struct gve_priv *priv) 1445 { 1446 int err; 1447 1448 netif_carrier_off(priv->dev); 1449 if (gve_get_device_rings_ok(priv)) { 1450 gve_turndown(priv); 1451 gve_drain_page_cache(priv); 1452 err = gve_destroy_rings(priv); 1453 if (err) 1454 goto err; 1455 err = gve_unregister_qpls(priv); 1456 if (err) 1457 goto err; 1458 gve_clear_device_rings_ok(priv); 1459 } 1460 del_timer_sync(&priv->stats_report_timer); 1461 1462 gve_unreg_xdp_info(priv); 1463 1464 gve_tx_stop_rings(priv, 0, gve_num_tx_queues(priv)); 1465 gve_rx_stop_rings(priv, priv->rx_cfg.num_queues); 1466 1467 priv->interface_down_cnt++; 1468 return 0; 1469 1470 err: 1471 /* This must have been called from a reset due to the rtnl lock 1472 * so just return at this point. 1473 */ 1474 if (gve_get_reset_in_progress(priv)) 1475 return err; 1476 /* Otherwise reset before returning */ 1477 gve_reset_and_teardown(priv, true); 1478 return gve_reset_recovery(priv, false); 1479 } 1480 1481 static int gve_close(struct net_device *dev) 1482 { 1483 struct gve_priv *priv = netdev_priv(dev); 1484 int err; 1485 1486 err = gve_queues_stop(priv); 1487 if (err) 1488 return err; 1489 1490 gve_queues_mem_remove(priv); 1491 return 0; 1492 } 1493 1494 static int gve_remove_xdp_queues(struct gve_priv *priv) 1495 { 1496 int err; 1497 1498 err = gve_destroy_xdp_rings(priv); 1499 if (err) 1500 return err; 1501 1502 err = gve_unregister_xdp_qpls(priv); 1503 if (err) 1504 return err; 1505 1506 gve_unreg_xdp_info(priv); 1507 gve_free_xdp_rings(priv); 1508 1509 priv->num_xdp_queues = 0; 1510 return 0; 1511 } 1512 1513 static int gve_add_xdp_queues(struct gve_priv *priv) 1514 { 1515 int err; 1516 1517 priv->num_xdp_queues = priv->rx_cfg.num_queues; 1518 1519 err = gve_alloc_xdp_rings(priv); 1520 if (err) 1521 goto err; 1522 1523 err = gve_reg_xdp_info(priv, priv->dev); 1524 if (err) 1525 goto free_xdp_rings; 1526 1527 err = gve_register_xdp_qpls(priv); 1528 if (err) 1529 goto free_xdp_rings; 1530 1531 err = gve_create_xdp_rings(priv); 1532 if (err) 1533 goto free_xdp_rings; 1534 1535 return 0; 1536 1537 free_xdp_rings: 1538 gve_free_xdp_rings(priv); 1539 err: 1540 priv->num_xdp_queues = 0; 1541 return err; 1542 } 1543 1544 static void gve_handle_link_status(struct gve_priv *priv, bool link_status) 1545 { 1546 if (!gve_get_napi_enabled(priv)) 1547 return; 1548 1549 if (link_status == netif_carrier_ok(priv->dev)) 1550 return; 1551 1552 if (link_status) { 1553 netdev_info(priv->dev, "Device link is up.\n"); 1554 netif_carrier_on(priv->dev); 1555 } else { 1556 netdev_info(priv->dev, "Device link is down.\n"); 1557 netif_carrier_off(priv->dev); 1558 } 1559 } 1560 1561 static int gve_set_xdp(struct gve_priv *priv, struct bpf_prog *prog, 1562 struct netlink_ext_ack *extack) 1563 { 1564 struct bpf_prog *old_prog; 1565 int err = 0; 1566 u32 status; 1567 1568 old_prog = READ_ONCE(priv->xdp_prog); 1569 if (!netif_running(priv->dev)) { 1570 WRITE_ONCE(priv->xdp_prog, prog); 1571 if (old_prog) 1572 bpf_prog_put(old_prog); 1573 return 0; 1574 } 1575 1576 gve_turndown(priv); 1577 if (!old_prog && prog) { 1578 // Allocate XDP TX queues if an XDP program is 1579 // being installed 1580 err = gve_add_xdp_queues(priv); 1581 if (err) 1582 goto out; 1583 } else if (old_prog && !prog) { 1584 // Remove XDP TX queues if an XDP program is 1585 // being uninstalled 1586 err = gve_remove_xdp_queues(priv); 1587 if (err) 1588 goto out; 1589 } 1590 WRITE_ONCE(priv->xdp_prog, prog); 1591 if (old_prog) 1592 bpf_prog_put(old_prog); 1593 1594 out: 1595 gve_turnup(priv); 1596 status = ioread32be(&priv->reg_bar0->device_status); 1597 gve_handle_link_status(priv, GVE_DEVICE_STATUS_LINK_STATUS_MASK & status); 1598 return err; 1599 } 1600 1601 static int gve_xsk_pool_enable(struct net_device *dev, 1602 struct xsk_buff_pool *pool, 1603 u16 qid) 1604 { 1605 struct gve_priv *priv = netdev_priv(dev); 1606 struct napi_struct *napi; 1607 struct gve_rx_ring *rx; 1608 int tx_qid; 1609 int err; 1610 1611 if (qid >= priv->rx_cfg.num_queues) { 1612 dev_err(&priv->pdev->dev, "xsk pool invalid qid %d", qid); 1613 return -EINVAL; 1614 } 1615 if (xsk_pool_get_rx_frame_size(pool) < 1616 priv->dev->max_mtu + sizeof(struct ethhdr)) { 1617 dev_err(&priv->pdev->dev, "xsk pool frame_len too small"); 1618 return -EINVAL; 1619 } 1620 1621 err = xsk_pool_dma_map(pool, &priv->pdev->dev, 1622 DMA_ATTR_SKIP_CPU_SYNC | DMA_ATTR_WEAK_ORDERING); 1623 if (err) 1624 return err; 1625 1626 /* If XDP prog is not installed, return */ 1627 if (!priv->xdp_prog) 1628 return 0; 1629 1630 rx = &priv->rx[qid]; 1631 napi = &priv->ntfy_blocks[rx->ntfy_id].napi; 1632 err = xdp_rxq_info_reg(&rx->xsk_rxq, dev, qid, napi->napi_id); 1633 if (err) 1634 goto err; 1635 1636 err = xdp_rxq_info_reg_mem_model(&rx->xsk_rxq, 1637 MEM_TYPE_XSK_BUFF_POOL, NULL); 1638 if (err) 1639 goto err; 1640 1641 xsk_pool_set_rxq_info(pool, &rx->xsk_rxq); 1642 rx->xsk_pool = pool; 1643 1644 tx_qid = gve_xdp_tx_queue_id(priv, qid); 1645 priv->tx[tx_qid].xsk_pool = pool; 1646 1647 return 0; 1648 err: 1649 if (xdp_rxq_info_is_reg(&rx->xsk_rxq)) 1650 xdp_rxq_info_unreg(&rx->xsk_rxq); 1651 1652 xsk_pool_dma_unmap(pool, 1653 DMA_ATTR_SKIP_CPU_SYNC | DMA_ATTR_WEAK_ORDERING); 1654 return err; 1655 } 1656 1657 static int gve_xsk_pool_disable(struct net_device *dev, 1658 u16 qid) 1659 { 1660 struct gve_priv *priv = netdev_priv(dev); 1661 struct napi_struct *napi_rx; 1662 struct napi_struct *napi_tx; 1663 struct xsk_buff_pool *pool; 1664 int tx_qid; 1665 1666 pool = xsk_get_pool_from_qid(dev, qid); 1667 if (!pool) 1668 return -EINVAL; 1669 if (qid >= priv->rx_cfg.num_queues) 1670 return -EINVAL; 1671 1672 /* If XDP prog is not installed, unmap DMA and return */ 1673 if (!priv->xdp_prog) 1674 goto done; 1675 1676 tx_qid = gve_xdp_tx_queue_id(priv, qid); 1677 if (!netif_running(dev)) { 1678 priv->rx[qid].xsk_pool = NULL; 1679 xdp_rxq_info_unreg(&priv->rx[qid].xsk_rxq); 1680 priv->tx[tx_qid].xsk_pool = NULL; 1681 goto done; 1682 } 1683 1684 napi_rx = &priv->ntfy_blocks[priv->rx[qid].ntfy_id].napi; 1685 napi_disable(napi_rx); /* make sure current rx poll is done */ 1686 1687 napi_tx = &priv->ntfy_blocks[priv->tx[tx_qid].ntfy_id].napi; 1688 napi_disable(napi_tx); /* make sure current tx poll is done */ 1689 1690 priv->rx[qid].xsk_pool = NULL; 1691 xdp_rxq_info_unreg(&priv->rx[qid].xsk_rxq); 1692 priv->tx[tx_qid].xsk_pool = NULL; 1693 smp_mb(); /* Make sure it is visible to the workers on datapath */ 1694 1695 napi_enable(napi_rx); 1696 if (gve_rx_work_pending(&priv->rx[qid])) 1697 napi_schedule(napi_rx); 1698 1699 napi_enable(napi_tx); 1700 if (gve_tx_clean_pending(priv, &priv->tx[tx_qid])) 1701 napi_schedule(napi_tx); 1702 1703 done: 1704 xsk_pool_dma_unmap(pool, 1705 DMA_ATTR_SKIP_CPU_SYNC | DMA_ATTR_WEAK_ORDERING); 1706 return 0; 1707 } 1708 1709 static int gve_xsk_wakeup(struct net_device *dev, u32 queue_id, u32 flags) 1710 { 1711 struct gve_priv *priv = netdev_priv(dev); 1712 int tx_queue_id = gve_xdp_tx_queue_id(priv, queue_id); 1713 1714 if (queue_id >= priv->rx_cfg.num_queues || !priv->xdp_prog) 1715 return -EINVAL; 1716 1717 if (flags & XDP_WAKEUP_TX) { 1718 struct gve_tx_ring *tx = &priv->tx[tx_queue_id]; 1719 struct napi_struct *napi = 1720 &priv->ntfy_blocks[tx->ntfy_id].napi; 1721 1722 if (!napi_if_scheduled_mark_missed(napi)) { 1723 /* Call local_bh_enable to trigger SoftIRQ processing */ 1724 local_bh_disable(); 1725 napi_schedule(napi); 1726 local_bh_enable(); 1727 } 1728 1729 tx->xdp_xsk_wakeup++; 1730 } 1731 1732 return 0; 1733 } 1734 1735 static int verify_xdp_configuration(struct net_device *dev) 1736 { 1737 struct gve_priv *priv = netdev_priv(dev); 1738 1739 if (dev->features & NETIF_F_LRO) { 1740 netdev_warn(dev, "XDP is not supported when LRO is on.\n"); 1741 return -EOPNOTSUPP; 1742 } 1743 1744 if (priv->queue_format != GVE_GQI_QPL_FORMAT) { 1745 netdev_warn(dev, "XDP is not supported in mode %d.\n", 1746 priv->queue_format); 1747 return -EOPNOTSUPP; 1748 } 1749 1750 if (dev->mtu > GVE_DEFAULT_RX_BUFFER_SIZE - sizeof(struct ethhdr) - GVE_RX_PAD) { 1751 netdev_warn(dev, "XDP is not supported for mtu %d.\n", 1752 dev->mtu); 1753 return -EOPNOTSUPP; 1754 } 1755 1756 if (priv->rx_cfg.num_queues != priv->tx_cfg.num_queues || 1757 (2 * priv->tx_cfg.num_queues > priv->tx_cfg.max_queues)) { 1758 netdev_warn(dev, "XDP load failed: The number of configured RX queues %d should be equal to the number of configured TX queues %d and the number of configured RX/TX queues should be less than or equal to half the maximum number of RX/TX queues %d", 1759 priv->rx_cfg.num_queues, 1760 priv->tx_cfg.num_queues, 1761 priv->tx_cfg.max_queues); 1762 return -EINVAL; 1763 } 1764 return 0; 1765 } 1766 1767 static int gve_xdp(struct net_device *dev, struct netdev_bpf *xdp) 1768 { 1769 struct gve_priv *priv = netdev_priv(dev); 1770 int err; 1771 1772 err = verify_xdp_configuration(dev); 1773 if (err) 1774 return err; 1775 switch (xdp->command) { 1776 case XDP_SETUP_PROG: 1777 return gve_set_xdp(priv, xdp->prog, xdp->extack); 1778 case XDP_SETUP_XSK_POOL: 1779 if (xdp->xsk.pool) 1780 return gve_xsk_pool_enable(dev, xdp->xsk.pool, xdp->xsk.queue_id); 1781 else 1782 return gve_xsk_pool_disable(dev, xdp->xsk.queue_id); 1783 default: 1784 return -EINVAL; 1785 } 1786 } 1787 1788 int gve_flow_rules_reset(struct gve_priv *priv) 1789 { 1790 if (!priv->max_flow_rules) 1791 return 0; 1792 1793 return gve_adminq_reset_flow_rules(priv); 1794 } 1795 1796 int gve_adjust_config(struct gve_priv *priv, 1797 struct gve_tx_alloc_rings_cfg *tx_alloc_cfg, 1798 struct gve_rx_alloc_rings_cfg *rx_alloc_cfg) 1799 { 1800 int err; 1801 1802 /* Allocate resources for the new confiugration */ 1803 err = gve_queues_mem_alloc(priv, tx_alloc_cfg, rx_alloc_cfg); 1804 if (err) { 1805 netif_err(priv, drv, priv->dev, 1806 "Adjust config failed to alloc new queues"); 1807 return err; 1808 } 1809 1810 /* Teardown the device and free existing resources */ 1811 err = gve_close(priv->dev); 1812 if (err) { 1813 netif_err(priv, drv, priv->dev, 1814 "Adjust config failed to close old queues"); 1815 gve_queues_mem_free(priv, tx_alloc_cfg, rx_alloc_cfg); 1816 return err; 1817 } 1818 1819 /* Bring the device back up again with the new resources. */ 1820 err = gve_queues_start(priv, tx_alloc_cfg, rx_alloc_cfg); 1821 if (err) { 1822 netif_err(priv, drv, priv->dev, 1823 "Adjust config failed to start new queues, !!! DISABLING ALL QUEUES !!!\n"); 1824 /* No need to free on error: ownership of resources is lost after 1825 * calling gve_queues_start. 1826 */ 1827 gve_turndown(priv); 1828 return err; 1829 } 1830 1831 return 0; 1832 } 1833 1834 int gve_adjust_queues(struct gve_priv *priv, 1835 struct gve_queue_config new_rx_config, 1836 struct gve_queue_config new_tx_config) 1837 { 1838 struct gve_tx_alloc_rings_cfg tx_alloc_cfg = {0}; 1839 struct gve_rx_alloc_rings_cfg rx_alloc_cfg = {0}; 1840 int err; 1841 1842 gve_get_curr_alloc_cfgs(priv, &tx_alloc_cfg, &rx_alloc_cfg); 1843 1844 /* Relay the new config from ethtool */ 1845 tx_alloc_cfg.qcfg = &new_tx_config; 1846 rx_alloc_cfg.qcfg_tx = &new_tx_config; 1847 rx_alloc_cfg.qcfg = &new_rx_config; 1848 tx_alloc_cfg.num_rings = new_tx_config.num_queues; 1849 1850 if (netif_running(priv->dev)) { 1851 err = gve_adjust_config(priv, &tx_alloc_cfg, &rx_alloc_cfg); 1852 return err; 1853 } 1854 /* Set the config for the next up. */ 1855 priv->tx_cfg = new_tx_config; 1856 priv->rx_cfg = new_rx_config; 1857 1858 return 0; 1859 } 1860 1861 static void gve_turndown(struct gve_priv *priv) 1862 { 1863 int idx; 1864 1865 if (netif_carrier_ok(priv->dev)) 1866 netif_carrier_off(priv->dev); 1867 1868 if (!gve_get_napi_enabled(priv)) 1869 return; 1870 1871 /* Disable napi to prevent more work from coming in */ 1872 for (idx = 0; idx < gve_num_tx_queues(priv); idx++) { 1873 int ntfy_idx = gve_tx_idx_to_ntfy(priv, idx); 1874 struct gve_notify_block *block = &priv->ntfy_blocks[ntfy_idx]; 1875 1876 if (!gve_tx_was_added_to_block(priv, idx)) 1877 continue; 1878 1879 if (idx < priv->tx_cfg.num_queues) 1880 netif_queue_set_napi(priv->dev, idx, 1881 NETDEV_QUEUE_TYPE_TX, NULL); 1882 1883 napi_disable(&block->napi); 1884 } 1885 for (idx = 0; idx < priv->rx_cfg.num_queues; idx++) { 1886 int ntfy_idx = gve_rx_idx_to_ntfy(priv, idx); 1887 struct gve_notify_block *block = &priv->ntfy_blocks[ntfy_idx]; 1888 1889 if (!gve_rx_was_added_to_block(priv, idx)) 1890 continue; 1891 1892 netif_queue_set_napi(priv->dev, idx, NETDEV_QUEUE_TYPE_RX, 1893 NULL); 1894 napi_disable(&block->napi); 1895 } 1896 1897 /* Stop tx queues */ 1898 netif_tx_disable(priv->dev); 1899 1900 gve_clear_napi_enabled(priv); 1901 gve_clear_report_stats(priv); 1902 } 1903 1904 static void gve_turnup(struct gve_priv *priv) 1905 { 1906 int idx; 1907 1908 /* Start the tx queues */ 1909 netif_tx_start_all_queues(priv->dev); 1910 1911 /* Enable napi and unmask interrupts for all queues */ 1912 for (idx = 0; idx < gve_num_tx_queues(priv); idx++) { 1913 int ntfy_idx = gve_tx_idx_to_ntfy(priv, idx); 1914 struct gve_notify_block *block = &priv->ntfy_blocks[ntfy_idx]; 1915 1916 if (!gve_tx_was_added_to_block(priv, idx)) 1917 continue; 1918 1919 napi_enable(&block->napi); 1920 1921 if (idx < priv->tx_cfg.num_queues) 1922 netif_queue_set_napi(priv->dev, idx, 1923 NETDEV_QUEUE_TYPE_TX, 1924 &block->napi); 1925 1926 if (gve_is_gqi(priv)) { 1927 iowrite32be(0, gve_irq_doorbell(priv, block)); 1928 } else { 1929 gve_set_itr_coalesce_usecs_dqo(priv, block, 1930 priv->tx_coalesce_usecs); 1931 } 1932 1933 /* Any descs written by the NIC before this barrier will be 1934 * handled by the one-off napi schedule below. Whereas any 1935 * descs after the barrier will generate interrupts. 1936 */ 1937 mb(); 1938 napi_schedule(&block->napi); 1939 } 1940 for (idx = 0; idx < priv->rx_cfg.num_queues; idx++) { 1941 int ntfy_idx = gve_rx_idx_to_ntfy(priv, idx); 1942 struct gve_notify_block *block = &priv->ntfy_blocks[ntfy_idx]; 1943 1944 if (!gve_rx_was_added_to_block(priv, idx)) 1945 continue; 1946 1947 napi_enable(&block->napi); 1948 netif_queue_set_napi(priv->dev, idx, NETDEV_QUEUE_TYPE_RX, 1949 &block->napi); 1950 1951 if (gve_is_gqi(priv)) { 1952 iowrite32be(0, gve_irq_doorbell(priv, block)); 1953 } else { 1954 gve_set_itr_coalesce_usecs_dqo(priv, block, 1955 priv->rx_coalesce_usecs); 1956 } 1957 1958 /* Any descs written by the NIC before this barrier will be 1959 * handled by the one-off napi schedule below. Whereas any 1960 * descs after the barrier will generate interrupts. 1961 */ 1962 mb(); 1963 napi_schedule(&block->napi); 1964 } 1965 1966 gve_set_napi_enabled(priv); 1967 } 1968 1969 static void gve_turnup_and_check_status(struct gve_priv *priv) 1970 { 1971 u32 status; 1972 1973 gve_turnup(priv); 1974 status = ioread32be(&priv->reg_bar0->device_status); 1975 gve_handle_link_status(priv, GVE_DEVICE_STATUS_LINK_STATUS_MASK & status); 1976 } 1977 1978 static void gve_tx_timeout(struct net_device *dev, unsigned int txqueue) 1979 { 1980 struct gve_notify_block *block; 1981 struct gve_tx_ring *tx = NULL; 1982 struct gve_priv *priv; 1983 u32 last_nic_done; 1984 u32 current_time; 1985 u32 ntfy_idx; 1986 1987 netdev_info(dev, "Timeout on tx queue, %d", txqueue); 1988 priv = netdev_priv(dev); 1989 if (txqueue > priv->tx_cfg.num_queues) 1990 goto reset; 1991 1992 ntfy_idx = gve_tx_idx_to_ntfy(priv, txqueue); 1993 if (ntfy_idx >= priv->num_ntfy_blks) 1994 goto reset; 1995 1996 block = &priv->ntfy_blocks[ntfy_idx]; 1997 tx = block->tx; 1998 1999 current_time = jiffies_to_msecs(jiffies); 2000 if (tx->last_kick_msec + MIN_TX_TIMEOUT_GAP > current_time) 2001 goto reset; 2002 2003 /* Check to see if there are missed completions, which will allow us to 2004 * kick the queue. 2005 */ 2006 last_nic_done = gve_tx_load_event_counter(priv, tx); 2007 if (last_nic_done - tx->done) { 2008 netdev_info(dev, "Kicking queue %d", txqueue); 2009 iowrite32be(GVE_IRQ_MASK, gve_irq_doorbell(priv, block)); 2010 napi_schedule(&block->napi); 2011 tx->last_kick_msec = current_time; 2012 goto out; 2013 } // Else reset. 2014 2015 reset: 2016 gve_schedule_reset(priv); 2017 2018 out: 2019 if (tx) 2020 tx->queue_timeout++; 2021 priv->tx_timeo_cnt++; 2022 } 2023 2024 u16 gve_get_pkt_buf_size(const struct gve_priv *priv, bool enable_hsplit) 2025 { 2026 if (enable_hsplit && priv->max_rx_buffer_size >= GVE_MAX_RX_BUFFER_SIZE) 2027 return GVE_MAX_RX_BUFFER_SIZE; 2028 else 2029 return GVE_DEFAULT_RX_BUFFER_SIZE; 2030 } 2031 2032 /* header-split is not supported on non-DQO_RDA yet even if device advertises it */ 2033 bool gve_header_split_supported(const struct gve_priv *priv) 2034 { 2035 return priv->header_buf_size && priv->queue_format == GVE_DQO_RDA_FORMAT; 2036 } 2037 2038 int gve_set_hsplit_config(struct gve_priv *priv, u8 tcp_data_split) 2039 { 2040 struct gve_tx_alloc_rings_cfg tx_alloc_cfg = {0}; 2041 struct gve_rx_alloc_rings_cfg rx_alloc_cfg = {0}; 2042 bool enable_hdr_split; 2043 int err = 0; 2044 2045 if (tcp_data_split == ETHTOOL_TCP_DATA_SPLIT_UNKNOWN) 2046 return 0; 2047 2048 if (!gve_header_split_supported(priv)) { 2049 dev_err(&priv->pdev->dev, "Header-split not supported\n"); 2050 return -EOPNOTSUPP; 2051 } 2052 2053 if (tcp_data_split == ETHTOOL_TCP_DATA_SPLIT_ENABLED) 2054 enable_hdr_split = true; 2055 else 2056 enable_hdr_split = false; 2057 2058 if (enable_hdr_split == priv->header_split_enabled) 2059 return 0; 2060 2061 gve_get_curr_alloc_cfgs(priv, &tx_alloc_cfg, &rx_alloc_cfg); 2062 2063 rx_alloc_cfg.enable_header_split = enable_hdr_split; 2064 rx_alloc_cfg.packet_buffer_size = gve_get_pkt_buf_size(priv, enable_hdr_split); 2065 2066 if (netif_running(priv->dev)) 2067 err = gve_adjust_config(priv, &tx_alloc_cfg, &rx_alloc_cfg); 2068 return err; 2069 } 2070 2071 static int gve_set_features(struct net_device *netdev, 2072 netdev_features_t features) 2073 { 2074 const netdev_features_t orig_features = netdev->features; 2075 struct gve_tx_alloc_rings_cfg tx_alloc_cfg = {0}; 2076 struct gve_rx_alloc_rings_cfg rx_alloc_cfg = {0}; 2077 struct gve_priv *priv = netdev_priv(netdev); 2078 int err; 2079 2080 gve_get_curr_alloc_cfgs(priv, &tx_alloc_cfg, &rx_alloc_cfg); 2081 2082 if ((netdev->features & NETIF_F_LRO) != (features & NETIF_F_LRO)) { 2083 netdev->features ^= NETIF_F_LRO; 2084 if (netif_running(netdev)) { 2085 err = gve_adjust_config(priv, &tx_alloc_cfg, &rx_alloc_cfg); 2086 if (err) 2087 goto revert_features; 2088 } 2089 } 2090 if ((netdev->features & NETIF_F_NTUPLE) && !(features & NETIF_F_NTUPLE)) { 2091 err = gve_flow_rules_reset(priv); 2092 if (err) 2093 goto revert_features; 2094 } 2095 2096 return 0; 2097 2098 revert_features: 2099 netdev->features = orig_features; 2100 return err; 2101 } 2102 2103 static const struct net_device_ops gve_netdev_ops = { 2104 .ndo_start_xmit = gve_start_xmit, 2105 .ndo_features_check = gve_features_check, 2106 .ndo_open = gve_open, 2107 .ndo_stop = gve_close, 2108 .ndo_get_stats64 = gve_get_stats, 2109 .ndo_tx_timeout = gve_tx_timeout, 2110 .ndo_set_features = gve_set_features, 2111 .ndo_bpf = gve_xdp, 2112 .ndo_xdp_xmit = gve_xdp_xmit, 2113 .ndo_xsk_wakeup = gve_xsk_wakeup, 2114 }; 2115 2116 static void gve_handle_status(struct gve_priv *priv, u32 status) 2117 { 2118 if (GVE_DEVICE_STATUS_RESET_MASK & status) { 2119 dev_info(&priv->pdev->dev, "Device requested reset.\n"); 2120 gve_set_do_reset(priv); 2121 } 2122 if (GVE_DEVICE_STATUS_REPORT_STATS_MASK & status) { 2123 priv->stats_report_trigger_cnt++; 2124 gve_set_do_report_stats(priv); 2125 } 2126 } 2127 2128 static void gve_handle_reset(struct gve_priv *priv) 2129 { 2130 /* A service task will be scheduled at the end of probe to catch any 2131 * resets that need to happen, and we don't want to reset until 2132 * probe is done. 2133 */ 2134 if (gve_get_probe_in_progress(priv)) 2135 return; 2136 2137 if (gve_get_do_reset(priv)) { 2138 rtnl_lock(); 2139 gve_reset(priv, false); 2140 rtnl_unlock(); 2141 } 2142 } 2143 2144 void gve_handle_report_stats(struct gve_priv *priv) 2145 { 2146 struct stats *stats = priv->stats_report->stats; 2147 int idx, stats_idx = 0; 2148 unsigned int start = 0; 2149 u64 tx_bytes; 2150 2151 if (!gve_get_report_stats(priv)) 2152 return; 2153 2154 be64_add_cpu(&priv->stats_report->written_count, 1); 2155 /* tx stats */ 2156 if (priv->tx) { 2157 for (idx = 0; idx < gve_num_tx_queues(priv); idx++) { 2158 u32 last_completion = 0; 2159 u32 tx_frames = 0; 2160 2161 /* DQO doesn't currently support these metrics. */ 2162 if (gve_is_gqi(priv)) { 2163 last_completion = priv->tx[idx].done; 2164 tx_frames = priv->tx[idx].req; 2165 } 2166 2167 do { 2168 start = u64_stats_fetch_begin(&priv->tx[idx].statss); 2169 tx_bytes = priv->tx[idx].bytes_done; 2170 } while (u64_stats_fetch_retry(&priv->tx[idx].statss, start)); 2171 stats[stats_idx++] = (struct stats) { 2172 .stat_name = cpu_to_be32(TX_WAKE_CNT), 2173 .value = cpu_to_be64(priv->tx[idx].wake_queue), 2174 .queue_id = cpu_to_be32(idx), 2175 }; 2176 stats[stats_idx++] = (struct stats) { 2177 .stat_name = cpu_to_be32(TX_STOP_CNT), 2178 .value = cpu_to_be64(priv->tx[idx].stop_queue), 2179 .queue_id = cpu_to_be32(idx), 2180 }; 2181 stats[stats_idx++] = (struct stats) { 2182 .stat_name = cpu_to_be32(TX_FRAMES_SENT), 2183 .value = cpu_to_be64(tx_frames), 2184 .queue_id = cpu_to_be32(idx), 2185 }; 2186 stats[stats_idx++] = (struct stats) { 2187 .stat_name = cpu_to_be32(TX_BYTES_SENT), 2188 .value = cpu_to_be64(tx_bytes), 2189 .queue_id = cpu_to_be32(idx), 2190 }; 2191 stats[stats_idx++] = (struct stats) { 2192 .stat_name = cpu_to_be32(TX_LAST_COMPLETION_PROCESSED), 2193 .value = cpu_to_be64(last_completion), 2194 .queue_id = cpu_to_be32(idx), 2195 }; 2196 stats[stats_idx++] = (struct stats) { 2197 .stat_name = cpu_to_be32(TX_TIMEOUT_CNT), 2198 .value = cpu_to_be64(priv->tx[idx].queue_timeout), 2199 .queue_id = cpu_to_be32(idx), 2200 }; 2201 } 2202 } 2203 /* rx stats */ 2204 if (priv->rx) { 2205 for (idx = 0; idx < priv->rx_cfg.num_queues; idx++) { 2206 stats[stats_idx++] = (struct stats) { 2207 .stat_name = cpu_to_be32(RX_NEXT_EXPECTED_SEQUENCE), 2208 .value = cpu_to_be64(priv->rx[idx].desc.seqno), 2209 .queue_id = cpu_to_be32(idx), 2210 }; 2211 stats[stats_idx++] = (struct stats) { 2212 .stat_name = cpu_to_be32(RX_BUFFERS_POSTED), 2213 .value = cpu_to_be64(priv->rx[0].fill_cnt), 2214 .queue_id = cpu_to_be32(idx), 2215 }; 2216 } 2217 } 2218 } 2219 2220 /* Handle NIC status register changes, reset requests and report stats */ 2221 static void gve_service_task(struct work_struct *work) 2222 { 2223 struct gve_priv *priv = container_of(work, struct gve_priv, 2224 service_task); 2225 u32 status = ioread32be(&priv->reg_bar0->device_status); 2226 2227 gve_handle_status(priv, status); 2228 2229 gve_handle_reset(priv); 2230 gve_handle_link_status(priv, GVE_DEVICE_STATUS_LINK_STATUS_MASK & status); 2231 } 2232 2233 static void gve_set_netdev_xdp_features(struct gve_priv *priv) 2234 { 2235 if (priv->queue_format == GVE_GQI_QPL_FORMAT) { 2236 priv->dev->xdp_features = NETDEV_XDP_ACT_BASIC; 2237 priv->dev->xdp_features |= NETDEV_XDP_ACT_REDIRECT; 2238 priv->dev->xdp_features |= NETDEV_XDP_ACT_NDO_XMIT; 2239 priv->dev->xdp_features |= NETDEV_XDP_ACT_XSK_ZEROCOPY; 2240 } else { 2241 priv->dev->xdp_features = 0; 2242 } 2243 } 2244 2245 static int gve_init_priv(struct gve_priv *priv, bool skip_describe_device) 2246 { 2247 int num_ntfy; 2248 int err; 2249 2250 /* Set up the adminq */ 2251 err = gve_adminq_alloc(&priv->pdev->dev, priv); 2252 if (err) { 2253 dev_err(&priv->pdev->dev, 2254 "Failed to alloc admin queue: err=%d\n", err); 2255 return err; 2256 } 2257 2258 err = gve_verify_driver_compatibility(priv); 2259 if (err) { 2260 dev_err(&priv->pdev->dev, 2261 "Could not verify driver compatibility: err=%d\n", err); 2262 goto err; 2263 } 2264 2265 priv->num_registered_pages = 0; 2266 2267 if (skip_describe_device) 2268 goto setup_device; 2269 2270 priv->queue_format = GVE_QUEUE_FORMAT_UNSPECIFIED; 2271 /* Get the initial information we need from the device */ 2272 err = gve_adminq_describe_device(priv); 2273 if (err) { 2274 dev_err(&priv->pdev->dev, 2275 "Could not get device information: err=%d\n", err); 2276 goto err; 2277 } 2278 priv->dev->mtu = priv->dev->max_mtu; 2279 num_ntfy = pci_msix_vec_count(priv->pdev); 2280 if (num_ntfy <= 0) { 2281 dev_err(&priv->pdev->dev, 2282 "could not count MSI-x vectors: err=%d\n", num_ntfy); 2283 err = num_ntfy; 2284 goto err; 2285 } else if (num_ntfy < GVE_MIN_MSIX) { 2286 dev_err(&priv->pdev->dev, "gve needs at least %d MSI-x vectors, but only has %d\n", 2287 GVE_MIN_MSIX, num_ntfy); 2288 err = -EINVAL; 2289 goto err; 2290 } 2291 2292 /* Big TCP is only supported on DQ*/ 2293 if (!gve_is_gqi(priv)) 2294 netif_set_tso_max_size(priv->dev, GVE_DQO_TX_MAX); 2295 2296 priv->rx_copybreak = GVE_DEFAULT_RX_COPYBREAK; 2297 /* gvnic has one Notification Block per MSI-x vector, except for the 2298 * management vector 2299 */ 2300 priv->num_ntfy_blks = (num_ntfy - 1) & ~0x1; 2301 priv->mgmt_msix_idx = priv->num_ntfy_blks; 2302 2303 priv->tx_cfg.max_queues = 2304 min_t(int, priv->tx_cfg.max_queues, priv->num_ntfy_blks / 2); 2305 priv->rx_cfg.max_queues = 2306 min_t(int, priv->rx_cfg.max_queues, priv->num_ntfy_blks / 2); 2307 2308 priv->tx_cfg.num_queues = priv->tx_cfg.max_queues; 2309 priv->rx_cfg.num_queues = priv->rx_cfg.max_queues; 2310 if (priv->default_num_queues > 0) { 2311 priv->tx_cfg.num_queues = min_t(int, priv->default_num_queues, 2312 priv->tx_cfg.num_queues); 2313 priv->rx_cfg.num_queues = min_t(int, priv->default_num_queues, 2314 priv->rx_cfg.num_queues); 2315 } 2316 2317 dev_info(&priv->pdev->dev, "TX queues %d, RX queues %d\n", 2318 priv->tx_cfg.num_queues, priv->rx_cfg.num_queues); 2319 dev_info(&priv->pdev->dev, "Max TX queues %d, Max RX queues %d\n", 2320 priv->tx_cfg.max_queues, priv->rx_cfg.max_queues); 2321 2322 if (!gve_is_gqi(priv)) { 2323 priv->tx_coalesce_usecs = GVE_TX_IRQ_RATELIMIT_US_DQO; 2324 priv->rx_coalesce_usecs = GVE_RX_IRQ_RATELIMIT_US_DQO; 2325 } 2326 2327 setup_device: 2328 gve_set_netdev_xdp_features(priv); 2329 err = gve_setup_device_resources(priv); 2330 if (!err) 2331 return 0; 2332 err: 2333 gve_adminq_free(&priv->pdev->dev, priv); 2334 return err; 2335 } 2336 2337 static void gve_teardown_priv_resources(struct gve_priv *priv) 2338 { 2339 gve_teardown_device_resources(priv); 2340 gve_adminq_free(&priv->pdev->dev, priv); 2341 } 2342 2343 static void gve_trigger_reset(struct gve_priv *priv) 2344 { 2345 /* Reset the device by releasing the AQ */ 2346 gve_adminq_release(priv); 2347 } 2348 2349 static void gve_reset_and_teardown(struct gve_priv *priv, bool was_up) 2350 { 2351 gve_trigger_reset(priv); 2352 /* With the reset having already happened, close cannot fail */ 2353 if (was_up) 2354 gve_close(priv->dev); 2355 gve_teardown_priv_resources(priv); 2356 } 2357 2358 static int gve_reset_recovery(struct gve_priv *priv, bool was_up) 2359 { 2360 int err; 2361 2362 err = gve_init_priv(priv, true); 2363 if (err) 2364 goto err; 2365 if (was_up) { 2366 err = gve_open(priv->dev); 2367 if (err) 2368 goto err; 2369 } 2370 return 0; 2371 err: 2372 dev_err(&priv->pdev->dev, "Reset failed! !!! DISABLING ALL QUEUES !!!\n"); 2373 gve_turndown(priv); 2374 return err; 2375 } 2376 2377 int gve_reset(struct gve_priv *priv, bool attempt_teardown) 2378 { 2379 bool was_up = netif_running(priv->dev); 2380 int err; 2381 2382 dev_info(&priv->pdev->dev, "Performing reset\n"); 2383 gve_clear_do_reset(priv); 2384 gve_set_reset_in_progress(priv); 2385 /* If we aren't attempting to teardown normally, just go turndown and 2386 * reset right away. 2387 */ 2388 if (!attempt_teardown) { 2389 gve_turndown(priv); 2390 gve_reset_and_teardown(priv, was_up); 2391 } else { 2392 /* Otherwise attempt to close normally */ 2393 if (was_up) { 2394 err = gve_close(priv->dev); 2395 /* If that fails reset as we did above */ 2396 if (err) 2397 gve_reset_and_teardown(priv, was_up); 2398 } 2399 /* Clean up any remaining resources */ 2400 gve_teardown_priv_resources(priv); 2401 } 2402 2403 /* Set it all back up */ 2404 err = gve_reset_recovery(priv, was_up); 2405 gve_clear_reset_in_progress(priv); 2406 priv->reset_cnt++; 2407 priv->interface_up_cnt = 0; 2408 priv->interface_down_cnt = 0; 2409 priv->stats_report_trigger_cnt = 0; 2410 return err; 2411 } 2412 2413 static void gve_write_version(u8 __iomem *driver_version_register) 2414 { 2415 const char *c = gve_version_prefix; 2416 2417 while (*c) { 2418 writeb(*c, driver_version_register); 2419 c++; 2420 } 2421 2422 c = gve_version_str; 2423 while (*c) { 2424 writeb(*c, driver_version_register); 2425 c++; 2426 } 2427 writeb('\n', driver_version_register); 2428 } 2429 2430 static int gve_rx_queue_stop(struct net_device *dev, void *per_q_mem, int idx) 2431 { 2432 struct gve_priv *priv = netdev_priv(dev); 2433 struct gve_rx_ring *gve_per_q_mem; 2434 int err; 2435 2436 if (!priv->rx) 2437 return -EAGAIN; 2438 2439 /* Destroying queue 0 while other queues exist is not supported in DQO */ 2440 if (!gve_is_gqi(priv) && idx == 0) 2441 return -ERANGE; 2442 2443 /* Single-queue destruction requires quiescence on all queues */ 2444 gve_turndown(priv); 2445 2446 /* This failure will trigger a reset - no need to clean up */ 2447 err = gve_adminq_destroy_single_rx_queue(priv, idx); 2448 if (err) 2449 return err; 2450 2451 if (gve_is_qpl(priv)) { 2452 /* This failure will trigger a reset - no need to clean up */ 2453 err = gve_unregister_qpl(priv, gve_rx_get_qpl(priv, idx)); 2454 if (err) 2455 return err; 2456 } 2457 2458 gve_rx_stop_ring(priv, idx); 2459 2460 /* Turn the unstopped queues back up */ 2461 gve_turnup_and_check_status(priv); 2462 2463 gve_per_q_mem = (struct gve_rx_ring *)per_q_mem; 2464 *gve_per_q_mem = priv->rx[idx]; 2465 memset(&priv->rx[idx], 0, sizeof(priv->rx[idx])); 2466 return 0; 2467 } 2468 2469 static void gve_rx_queue_mem_free(struct net_device *dev, void *per_q_mem) 2470 { 2471 struct gve_priv *priv = netdev_priv(dev); 2472 struct gve_rx_alloc_rings_cfg cfg = {0}; 2473 struct gve_rx_ring *gve_per_q_mem; 2474 2475 gve_per_q_mem = (struct gve_rx_ring *)per_q_mem; 2476 gve_rx_get_curr_alloc_cfg(priv, &cfg); 2477 2478 if (gve_is_gqi(priv)) 2479 gve_rx_free_ring_gqi(priv, gve_per_q_mem, &cfg); 2480 else 2481 gve_rx_free_ring_dqo(priv, gve_per_q_mem, &cfg); 2482 } 2483 2484 static int gve_rx_queue_mem_alloc(struct net_device *dev, void *per_q_mem, 2485 int idx) 2486 { 2487 struct gve_priv *priv = netdev_priv(dev); 2488 struct gve_rx_alloc_rings_cfg cfg = {0}; 2489 struct gve_rx_ring *gve_per_q_mem; 2490 int err; 2491 2492 if (!priv->rx) 2493 return -EAGAIN; 2494 2495 gve_per_q_mem = (struct gve_rx_ring *)per_q_mem; 2496 gve_rx_get_curr_alloc_cfg(priv, &cfg); 2497 2498 if (gve_is_gqi(priv)) 2499 err = gve_rx_alloc_ring_gqi(priv, &cfg, gve_per_q_mem, idx); 2500 else 2501 err = gve_rx_alloc_ring_dqo(priv, &cfg, gve_per_q_mem, idx); 2502 2503 return err; 2504 } 2505 2506 static int gve_rx_queue_start(struct net_device *dev, void *per_q_mem, int idx) 2507 { 2508 struct gve_priv *priv = netdev_priv(dev); 2509 struct gve_rx_ring *gve_per_q_mem; 2510 int err; 2511 2512 if (!priv->rx) 2513 return -EAGAIN; 2514 2515 gve_per_q_mem = (struct gve_rx_ring *)per_q_mem; 2516 priv->rx[idx] = *gve_per_q_mem; 2517 2518 /* Single-queue creation requires quiescence on all queues */ 2519 gve_turndown(priv); 2520 2521 gve_rx_start_ring(priv, idx); 2522 2523 if (gve_is_qpl(priv)) { 2524 /* This failure will trigger a reset - no need to clean up */ 2525 err = gve_register_qpl(priv, gve_rx_get_qpl(priv, idx)); 2526 if (err) 2527 goto abort; 2528 } 2529 2530 /* This failure will trigger a reset - no need to clean up */ 2531 err = gve_adminq_create_single_rx_queue(priv, idx); 2532 if (err) 2533 goto abort; 2534 2535 if (gve_is_gqi(priv)) 2536 gve_rx_write_doorbell(priv, &priv->rx[idx]); 2537 else 2538 gve_rx_post_buffers_dqo(&priv->rx[idx]); 2539 2540 /* Turn the unstopped queues back up */ 2541 gve_turnup_and_check_status(priv); 2542 return 0; 2543 2544 abort: 2545 gve_rx_stop_ring(priv, idx); 2546 2547 /* All failures in this func result in a reset, by clearing the struct 2548 * at idx, we prevent a double free when that reset runs. The reset, 2549 * which needs the rtnl lock, will not run till this func returns and 2550 * its caller gives up the lock. 2551 */ 2552 memset(&priv->rx[idx], 0, sizeof(priv->rx[idx])); 2553 return err; 2554 } 2555 2556 static const struct netdev_queue_mgmt_ops gve_queue_mgmt_ops = { 2557 .ndo_queue_mem_size = sizeof(struct gve_rx_ring), 2558 .ndo_queue_mem_alloc = gve_rx_queue_mem_alloc, 2559 .ndo_queue_mem_free = gve_rx_queue_mem_free, 2560 .ndo_queue_start = gve_rx_queue_start, 2561 .ndo_queue_stop = gve_rx_queue_stop, 2562 }; 2563 2564 static int gve_probe(struct pci_dev *pdev, const struct pci_device_id *ent) 2565 { 2566 int max_tx_queues, max_rx_queues; 2567 struct net_device *dev; 2568 __be32 __iomem *db_bar; 2569 struct gve_registers __iomem *reg_bar; 2570 struct gve_priv *priv; 2571 int err; 2572 2573 err = pci_enable_device(pdev); 2574 if (err) 2575 return err; 2576 2577 err = pci_request_regions(pdev, gve_driver_name); 2578 if (err) 2579 goto abort_with_enabled; 2580 2581 pci_set_master(pdev); 2582 2583 err = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(64)); 2584 if (err) { 2585 dev_err(&pdev->dev, "Failed to set dma mask: err=%d\n", err); 2586 goto abort_with_pci_region; 2587 } 2588 2589 reg_bar = pci_iomap(pdev, GVE_REGISTER_BAR, 0); 2590 if (!reg_bar) { 2591 dev_err(&pdev->dev, "Failed to map pci bar!\n"); 2592 err = -ENOMEM; 2593 goto abort_with_pci_region; 2594 } 2595 2596 db_bar = pci_iomap(pdev, GVE_DOORBELL_BAR, 0); 2597 if (!db_bar) { 2598 dev_err(&pdev->dev, "Failed to map doorbell bar!\n"); 2599 err = -ENOMEM; 2600 goto abort_with_reg_bar; 2601 } 2602 2603 gve_write_version(®_bar->driver_version); 2604 /* Get max queues to alloc etherdev */ 2605 max_tx_queues = ioread32be(®_bar->max_tx_queues); 2606 max_rx_queues = ioread32be(®_bar->max_rx_queues); 2607 /* Alloc and setup the netdev and priv */ 2608 dev = alloc_etherdev_mqs(sizeof(*priv), max_tx_queues, max_rx_queues); 2609 if (!dev) { 2610 dev_err(&pdev->dev, "could not allocate netdev\n"); 2611 err = -ENOMEM; 2612 goto abort_with_db_bar; 2613 } 2614 SET_NETDEV_DEV(dev, &pdev->dev); 2615 pci_set_drvdata(pdev, dev); 2616 dev->ethtool_ops = &gve_ethtool_ops; 2617 dev->netdev_ops = &gve_netdev_ops; 2618 dev->queue_mgmt_ops = &gve_queue_mgmt_ops; 2619 2620 /* Set default and supported features. 2621 * 2622 * Features might be set in other locations as well (such as 2623 * `gve_adminq_describe_device`). 2624 */ 2625 dev->hw_features = NETIF_F_HIGHDMA; 2626 dev->hw_features |= NETIF_F_SG; 2627 dev->hw_features |= NETIF_F_HW_CSUM; 2628 dev->hw_features |= NETIF_F_TSO; 2629 dev->hw_features |= NETIF_F_TSO6; 2630 dev->hw_features |= NETIF_F_TSO_ECN; 2631 dev->hw_features |= NETIF_F_RXCSUM; 2632 dev->hw_features |= NETIF_F_RXHASH; 2633 dev->features = dev->hw_features; 2634 dev->watchdog_timeo = 5 * HZ; 2635 dev->min_mtu = ETH_MIN_MTU; 2636 netif_carrier_off(dev); 2637 2638 priv = netdev_priv(dev); 2639 priv->dev = dev; 2640 priv->pdev = pdev; 2641 priv->msg_enable = DEFAULT_MSG_LEVEL; 2642 priv->reg_bar0 = reg_bar; 2643 priv->db_bar2 = db_bar; 2644 priv->service_task_flags = 0x0; 2645 priv->state_flags = 0x0; 2646 priv->ethtool_flags = 0x0; 2647 priv->data_buffer_size_dqo = GVE_DEFAULT_RX_BUFFER_SIZE; 2648 priv->max_rx_buffer_size = GVE_DEFAULT_RX_BUFFER_SIZE; 2649 2650 gve_set_probe_in_progress(priv); 2651 priv->gve_wq = alloc_ordered_workqueue("gve", 0); 2652 if (!priv->gve_wq) { 2653 dev_err(&pdev->dev, "Could not allocate workqueue"); 2654 err = -ENOMEM; 2655 goto abort_with_netdev; 2656 } 2657 INIT_WORK(&priv->service_task, gve_service_task); 2658 INIT_WORK(&priv->stats_report_task, gve_stats_report_task); 2659 priv->tx_cfg.max_queues = max_tx_queues; 2660 priv->rx_cfg.max_queues = max_rx_queues; 2661 2662 err = gve_init_priv(priv, false); 2663 if (err) 2664 goto abort_with_wq; 2665 2666 err = register_netdev(dev); 2667 if (err) 2668 goto abort_with_gve_init; 2669 2670 dev_info(&pdev->dev, "GVE version %s\n", gve_version_str); 2671 dev_info(&pdev->dev, "GVE queue format %d\n", (int)priv->queue_format); 2672 gve_clear_probe_in_progress(priv); 2673 queue_work(priv->gve_wq, &priv->service_task); 2674 return 0; 2675 2676 abort_with_gve_init: 2677 gve_teardown_priv_resources(priv); 2678 2679 abort_with_wq: 2680 destroy_workqueue(priv->gve_wq); 2681 2682 abort_with_netdev: 2683 free_netdev(dev); 2684 2685 abort_with_db_bar: 2686 pci_iounmap(pdev, db_bar); 2687 2688 abort_with_reg_bar: 2689 pci_iounmap(pdev, reg_bar); 2690 2691 abort_with_pci_region: 2692 pci_release_regions(pdev); 2693 2694 abort_with_enabled: 2695 pci_disable_device(pdev); 2696 return err; 2697 } 2698 2699 static void gve_remove(struct pci_dev *pdev) 2700 { 2701 struct net_device *netdev = pci_get_drvdata(pdev); 2702 struct gve_priv *priv = netdev_priv(netdev); 2703 __be32 __iomem *db_bar = priv->db_bar2; 2704 void __iomem *reg_bar = priv->reg_bar0; 2705 2706 unregister_netdev(netdev); 2707 gve_teardown_priv_resources(priv); 2708 destroy_workqueue(priv->gve_wq); 2709 free_netdev(netdev); 2710 pci_iounmap(pdev, db_bar); 2711 pci_iounmap(pdev, reg_bar); 2712 pci_release_regions(pdev); 2713 pci_disable_device(pdev); 2714 } 2715 2716 static void gve_shutdown(struct pci_dev *pdev) 2717 { 2718 struct net_device *netdev = pci_get_drvdata(pdev); 2719 struct gve_priv *priv = netdev_priv(netdev); 2720 bool was_up = netif_running(priv->dev); 2721 2722 rtnl_lock(); 2723 if (was_up && gve_close(priv->dev)) { 2724 /* If the dev was up, attempt to close, if close fails, reset */ 2725 gve_reset_and_teardown(priv, was_up); 2726 } else { 2727 /* If the dev wasn't up or close worked, finish tearing down */ 2728 gve_teardown_priv_resources(priv); 2729 } 2730 rtnl_unlock(); 2731 } 2732 2733 #ifdef CONFIG_PM 2734 static int gve_suspend(struct pci_dev *pdev, pm_message_t state) 2735 { 2736 struct net_device *netdev = pci_get_drvdata(pdev); 2737 struct gve_priv *priv = netdev_priv(netdev); 2738 bool was_up = netif_running(priv->dev); 2739 2740 priv->suspend_cnt++; 2741 rtnl_lock(); 2742 if (was_up && gve_close(priv->dev)) { 2743 /* If the dev was up, attempt to close, if close fails, reset */ 2744 gve_reset_and_teardown(priv, was_up); 2745 } else { 2746 /* If the dev wasn't up or close worked, finish tearing down */ 2747 gve_teardown_priv_resources(priv); 2748 } 2749 priv->up_before_suspend = was_up; 2750 rtnl_unlock(); 2751 return 0; 2752 } 2753 2754 static int gve_resume(struct pci_dev *pdev) 2755 { 2756 struct net_device *netdev = pci_get_drvdata(pdev); 2757 struct gve_priv *priv = netdev_priv(netdev); 2758 int err; 2759 2760 priv->resume_cnt++; 2761 rtnl_lock(); 2762 err = gve_reset_recovery(priv, priv->up_before_suspend); 2763 rtnl_unlock(); 2764 return err; 2765 } 2766 #endif /* CONFIG_PM */ 2767 2768 static const struct pci_device_id gve_id_table[] = { 2769 { PCI_DEVICE(PCI_VENDOR_ID_GOOGLE, PCI_DEV_ID_GVNIC) }, 2770 { } 2771 }; 2772 2773 static struct pci_driver gve_driver = { 2774 .name = gve_driver_name, 2775 .id_table = gve_id_table, 2776 .probe = gve_probe, 2777 .remove = gve_remove, 2778 .shutdown = gve_shutdown, 2779 #ifdef CONFIG_PM 2780 .suspend = gve_suspend, 2781 .resume = gve_resume, 2782 #endif 2783 }; 2784 2785 module_pci_driver(gve_driver); 2786 2787 MODULE_DEVICE_TABLE(pci, gve_id_table); 2788 MODULE_AUTHOR("Google, Inc."); 2789 MODULE_DESCRIPTION("Google Virtual NIC Driver"); 2790 MODULE_LICENSE("Dual MIT/GPL"); 2791 MODULE_VERSION(GVE_VERSION); 2792