1 // SPDX-License-Identifier: (GPL-2.0 OR MIT) 2 /* Google virtual Ethernet (gve) driver 3 * 4 * Copyright (C) 2015-2021 Google, Inc. 5 */ 6 7 #include <linux/bpf.h> 8 #include <linux/cpumask.h> 9 #include <linux/etherdevice.h> 10 #include <linux/filter.h> 11 #include <linux/interrupt.h> 12 #include <linux/module.h> 13 #include <linux/pci.h> 14 #include <linux/sched.h> 15 #include <linux/timer.h> 16 #include <linux/workqueue.h> 17 #include <linux/utsname.h> 18 #include <linux/version.h> 19 #include <net/sch_generic.h> 20 #include <net/xdp_sock_drv.h> 21 #include "gve.h" 22 #include "gve_dqo.h" 23 #include "gve_adminq.h" 24 #include "gve_register.h" 25 26 #define GVE_DEFAULT_RX_COPYBREAK (256) 27 28 #define DEFAULT_MSG_LEVEL (NETIF_MSG_DRV | NETIF_MSG_LINK) 29 #define GVE_VERSION "1.0.0" 30 #define GVE_VERSION_PREFIX "GVE-" 31 32 // Minimum amount of time between queue kicks in msec (10 seconds) 33 #define MIN_TX_TIMEOUT_GAP (1000 * 10) 34 35 char gve_driver_name[] = "gve"; 36 const char gve_version_str[] = GVE_VERSION; 37 static const char gve_version_prefix[] = GVE_VERSION_PREFIX; 38 39 static int gve_verify_driver_compatibility(struct gve_priv *priv) 40 { 41 int err; 42 struct gve_driver_info *driver_info; 43 dma_addr_t driver_info_bus; 44 45 driver_info = dma_alloc_coherent(&priv->pdev->dev, 46 sizeof(struct gve_driver_info), 47 &driver_info_bus, GFP_KERNEL); 48 if (!driver_info) 49 return -ENOMEM; 50 51 *driver_info = (struct gve_driver_info) { 52 .os_type = 1, /* Linux */ 53 .os_version_major = cpu_to_be32(LINUX_VERSION_MAJOR), 54 .os_version_minor = cpu_to_be32(LINUX_VERSION_SUBLEVEL), 55 .os_version_sub = cpu_to_be32(LINUX_VERSION_PATCHLEVEL), 56 .driver_capability_flags = { 57 cpu_to_be64(GVE_DRIVER_CAPABILITY_FLAGS1), 58 cpu_to_be64(GVE_DRIVER_CAPABILITY_FLAGS2), 59 cpu_to_be64(GVE_DRIVER_CAPABILITY_FLAGS3), 60 cpu_to_be64(GVE_DRIVER_CAPABILITY_FLAGS4), 61 }, 62 }; 63 strscpy(driver_info->os_version_str1, utsname()->release, 64 sizeof(driver_info->os_version_str1)); 65 strscpy(driver_info->os_version_str2, utsname()->version, 66 sizeof(driver_info->os_version_str2)); 67 68 err = gve_adminq_verify_driver_compatibility(priv, 69 sizeof(struct gve_driver_info), 70 driver_info_bus); 71 72 /* It's ok if the device doesn't support this */ 73 if (err == -EOPNOTSUPP) 74 err = 0; 75 76 dma_free_coherent(&priv->pdev->dev, 77 sizeof(struct gve_driver_info), 78 driver_info, driver_info_bus); 79 return err; 80 } 81 82 static netdev_tx_t gve_start_xmit(struct sk_buff *skb, struct net_device *dev) 83 { 84 struct gve_priv *priv = netdev_priv(dev); 85 86 if (gve_is_gqi(priv)) 87 return gve_tx(skb, dev); 88 else 89 return gve_tx_dqo(skb, dev); 90 } 91 92 static void gve_get_stats(struct net_device *dev, struct rtnl_link_stats64 *s) 93 { 94 struct gve_priv *priv = netdev_priv(dev); 95 unsigned int start; 96 u64 packets, bytes; 97 int num_tx_queues; 98 int ring; 99 100 num_tx_queues = gve_num_tx_queues(priv); 101 if (priv->rx) { 102 for (ring = 0; ring < priv->rx_cfg.num_queues; ring++) { 103 do { 104 start = 105 u64_stats_fetch_begin(&priv->rx[ring].statss); 106 packets = priv->rx[ring].rpackets; 107 bytes = priv->rx[ring].rbytes; 108 } while (u64_stats_fetch_retry(&priv->rx[ring].statss, 109 start)); 110 s->rx_packets += packets; 111 s->rx_bytes += bytes; 112 } 113 } 114 if (priv->tx) { 115 for (ring = 0; ring < num_tx_queues; ring++) { 116 do { 117 start = 118 u64_stats_fetch_begin(&priv->tx[ring].statss); 119 packets = priv->tx[ring].pkt_done; 120 bytes = priv->tx[ring].bytes_done; 121 } while (u64_stats_fetch_retry(&priv->tx[ring].statss, 122 start)); 123 s->tx_packets += packets; 124 s->tx_bytes += bytes; 125 } 126 } 127 } 128 129 static int gve_alloc_counter_array(struct gve_priv *priv) 130 { 131 priv->counter_array = 132 dma_alloc_coherent(&priv->pdev->dev, 133 priv->num_event_counters * 134 sizeof(*priv->counter_array), 135 &priv->counter_array_bus, GFP_KERNEL); 136 if (!priv->counter_array) 137 return -ENOMEM; 138 139 return 0; 140 } 141 142 static void gve_free_counter_array(struct gve_priv *priv) 143 { 144 if (!priv->counter_array) 145 return; 146 147 dma_free_coherent(&priv->pdev->dev, 148 priv->num_event_counters * 149 sizeof(*priv->counter_array), 150 priv->counter_array, priv->counter_array_bus); 151 priv->counter_array = NULL; 152 } 153 154 /* NIC requests to report stats */ 155 static void gve_stats_report_task(struct work_struct *work) 156 { 157 struct gve_priv *priv = container_of(work, struct gve_priv, 158 stats_report_task); 159 if (gve_get_do_report_stats(priv)) { 160 gve_handle_report_stats(priv); 161 gve_clear_do_report_stats(priv); 162 } 163 } 164 165 static void gve_stats_report_schedule(struct gve_priv *priv) 166 { 167 if (!gve_get_probe_in_progress(priv) && 168 !gve_get_reset_in_progress(priv)) { 169 gve_set_do_report_stats(priv); 170 queue_work(priv->gve_wq, &priv->stats_report_task); 171 } 172 } 173 174 static void gve_stats_report_timer(struct timer_list *t) 175 { 176 struct gve_priv *priv = from_timer(priv, t, stats_report_timer); 177 178 mod_timer(&priv->stats_report_timer, 179 round_jiffies(jiffies + 180 msecs_to_jiffies(priv->stats_report_timer_period))); 181 gve_stats_report_schedule(priv); 182 } 183 184 static int gve_alloc_stats_report(struct gve_priv *priv) 185 { 186 int tx_stats_num, rx_stats_num; 187 188 tx_stats_num = (GVE_TX_STATS_REPORT_NUM + NIC_TX_STATS_REPORT_NUM) * 189 gve_num_tx_queues(priv); 190 rx_stats_num = (GVE_RX_STATS_REPORT_NUM + NIC_RX_STATS_REPORT_NUM) * 191 priv->rx_cfg.num_queues; 192 priv->stats_report_len = struct_size(priv->stats_report, stats, 193 size_add(tx_stats_num, rx_stats_num)); 194 priv->stats_report = 195 dma_alloc_coherent(&priv->pdev->dev, priv->stats_report_len, 196 &priv->stats_report_bus, GFP_KERNEL); 197 if (!priv->stats_report) 198 return -ENOMEM; 199 /* Set up timer for the report-stats task */ 200 timer_setup(&priv->stats_report_timer, gve_stats_report_timer, 0); 201 priv->stats_report_timer_period = GVE_STATS_REPORT_TIMER_PERIOD; 202 return 0; 203 } 204 205 static void gve_free_stats_report(struct gve_priv *priv) 206 { 207 if (!priv->stats_report) 208 return; 209 210 del_timer_sync(&priv->stats_report_timer); 211 dma_free_coherent(&priv->pdev->dev, priv->stats_report_len, 212 priv->stats_report, priv->stats_report_bus); 213 priv->stats_report = NULL; 214 } 215 216 static irqreturn_t gve_mgmnt_intr(int irq, void *arg) 217 { 218 struct gve_priv *priv = arg; 219 220 queue_work(priv->gve_wq, &priv->service_task); 221 return IRQ_HANDLED; 222 } 223 224 static irqreturn_t gve_intr(int irq, void *arg) 225 { 226 struct gve_notify_block *block = arg; 227 struct gve_priv *priv = block->priv; 228 229 iowrite32be(GVE_IRQ_MASK, gve_irq_doorbell(priv, block)); 230 napi_schedule_irqoff(&block->napi); 231 return IRQ_HANDLED; 232 } 233 234 static irqreturn_t gve_intr_dqo(int irq, void *arg) 235 { 236 struct gve_notify_block *block = arg; 237 238 /* Interrupts are automatically masked */ 239 napi_schedule_irqoff(&block->napi); 240 return IRQ_HANDLED; 241 } 242 243 static int gve_napi_poll(struct napi_struct *napi, int budget) 244 { 245 struct gve_notify_block *block; 246 __be32 __iomem *irq_doorbell; 247 bool reschedule = false; 248 struct gve_priv *priv; 249 int work_done = 0; 250 251 block = container_of(napi, struct gve_notify_block, napi); 252 priv = block->priv; 253 254 if (block->tx) { 255 if (block->tx->q_num < priv->tx_cfg.num_queues) 256 reschedule |= gve_tx_poll(block, budget); 257 else 258 reschedule |= gve_xdp_poll(block, budget); 259 } 260 261 if (block->rx) { 262 work_done = gve_rx_poll(block, budget); 263 reschedule |= work_done == budget; 264 } 265 266 if (reschedule) 267 return budget; 268 269 /* Complete processing - don't unmask irq if busy polling is enabled */ 270 if (likely(napi_complete_done(napi, work_done))) { 271 irq_doorbell = gve_irq_doorbell(priv, block); 272 iowrite32be(GVE_IRQ_ACK | GVE_IRQ_EVENT, irq_doorbell); 273 274 /* Ensure IRQ ACK is visible before we check pending work. 275 * If queue had issued updates, it would be truly visible. 276 */ 277 mb(); 278 279 if (block->tx) 280 reschedule |= gve_tx_clean_pending(priv, block->tx); 281 if (block->rx) 282 reschedule |= gve_rx_work_pending(block->rx); 283 284 if (reschedule && napi_schedule(napi)) 285 iowrite32be(GVE_IRQ_MASK, irq_doorbell); 286 } 287 return work_done; 288 } 289 290 static int gve_napi_poll_dqo(struct napi_struct *napi, int budget) 291 { 292 struct gve_notify_block *block = 293 container_of(napi, struct gve_notify_block, napi); 294 struct gve_priv *priv = block->priv; 295 bool reschedule = false; 296 int work_done = 0; 297 298 if (block->tx) 299 reschedule |= gve_tx_poll_dqo(block, /*do_clean=*/true); 300 301 if (block->rx) { 302 work_done = gve_rx_poll_dqo(block, budget); 303 reschedule |= work_done == budget; 304 } 305 306 if (reschedule) 307 return budget; 308 309 if (likely(napi_complete_done(napi, work_done))) { 310 /* Enable interrupts again. 311 * 312 * We don't need to repoll afterwards because HW supports the 313 * PCI MSI-X PBA feature. 314 * 315 * Another interrupt would be triggered if a new event came in 316 * since the last one. 317 */ 318 gve_write_irq_doorbell_dqo(priv, block, 319 GVE_ITR_NO_UPDATE_DQO | GVE_ITR_ENABLE_BIT_DQO); 320 } 321 322 return work_done; 323 } 324 325 static int gve_alloc_notify_blocks(struct gve_priv *priv) 326 { 327 int num_vecs_requested = priv->num_ntfy_blks + 1; 328 unsigned int active_cpus; 329 int vecs_enabled; 330 int i, j; 331 int err; 332 333 priv->msix_vectors = kvcalloc(num_vecs_requested, 334 sizeof(*priv->msix_vectors), GFP_KERNEL); 335 if (!priv->msix_vectors) 336 return -ENOMEM; 337 for (i = 0; i < num_vecs_requested; i++) 338 priv->msix_vectors[i].entry = i; 339 vecs_enabled = pci_enable_msix_range(priv->pdev, priv->msix_vectors, 340 GVE_MIN_MSIX, num_vecs_requested); 341 if (vecs_enabled < 0) { 342 dev_err(&priv->pdev->dev, "Could not enable min msix %d/%d\n", 343 GVE_MIN_MSIX, vecs_enabled); 344 err = vecs_enabled; 345 goto abort_with_msix_vectors; 346 } 347 if (vecs_enabled != num_vecs_requested) { 348 int new_num_ntfy_blks = (vecs_enabled - 1) & ~0x1; 349 int vecs_per_type = new_num_ntfy_blks / 2; 350 int vecs_left = new_num_ntfy_blks % 2; 351 352 priv->num_ntfy_blks = new_num_ntfy_blks; 353 priv->mgmt_msix_idx = priv->num_ntfy_blks; 354 priv->tx_cfg.max_queues = min_t(int, priv->tx_cfg.max_queues, 355 vecs_per_type); 356 priv->rx_cfg.max_queues = min_t(int, priv->rx_cfg.max_queues, 357 vecs_per_type + vecs_left); 358 dev_err(&priv->pdev->dev, 359 "Could not enable desired msix, only enabled %d, adjusting tx max queues to %d, and rx max queues to %d\n", 360 vecs_enabled, priv->tx_cfg.max_queues, 361 priv->rx_cfg.max_queues); 362 if (priv->tx_cfg.num_queues > priv->tx_cfg.max_queues) 363 priv->tx_cfg.num_queues = priv->tx_cfg.max_queues; 364 if (priv->rx_cfg.num_queues > priv->rx_cfg.max_queues) 365 priv->rx_cfg.num_queues = priv->rx_cfg.max_queues; 366 } 367 /* Half the notification blocks go to TX and half to RX */ 368 active_cpus = min_t(int, priv->num_ntfy_blks / 2, num_online_cpus()); 369 370 /* Setup Management Vector - the last vector */ 371 snprintf(priv->mgmt_msix_name, sizeof(priv->mgmt_msix_name), "gve-mgmnt@pci:%s", 372 pci_name(priv->pdev)); 373 err = request_irq(priv->msix_vectors[priv->mgmt_msix_idx].vector, 374 gve_mgmnt_intr, 0, priv->mgmt_msix_name, priv); 375 if (err) { 376 dev_err(&priv->pdev->dev, "Did not receive management vector.\n"); 377 goto abort_with_msix_enabled; 378 } 379 priv->irq_db_indices = 380 dma_alloc_coherent(&priv->pdev->dev, 381 priv->num_ntfy_blks * 382 sizeof(*priv->irq_db_indices), 383 &priv->irq_db_indices_bus, GFP_KERNEL); 384 if (!priv->irq_db_indices) { 385 err = -ENOMEM; 386 goto abort_with_mgmt_vector; 387 } 388 389 priv->ntfy_blocks = kvzalloc(priv->num_ntfy_blks * 390 sizeof(*priv->ntfy_blocks), GFP_KERNEL); 391 if (!priv->ntfy_blocks) { 392 err = -ENOMEM; 393 goto abort_with_irq_db_indices; 394 } 395 396 /* Setup the other blocks - the first n-1 vectors */ 397 for (i = 0; i < priv->num_ntfy_blks; i++) { 398 struct gve_notify_block *block = &priv->ntfy_blocks[i]; 399 int msix_idx = i; 400 401 snprintf(block->name, sizeof(block->name), "gve-ntfy-blk%d@pci:%s", 402 i, pci_name(priv->pdev)); 403 block->priv = priv; 404 err = request_irq(priv->msix_vectors[msix_idx].vector, 405 gve_is_gqi(priv) ? gve_intr : gve_intr_dqo, 406 0, block->name, block); 407 if (err) { 408 dev_err(&priv->pdev->dev, 409 "Failed to receive msix vector %d\n", i); 410 goto abort_with_some_ntfy_blocks; 411 } 412 irq_set_affinity_hint(priv->msix_vectors[msix_idx].vector, 413 get_cpu_mask(i % active_cpus)); 414 block->irq_db_index = &priv->irq_db_indices[i].index; 415 } 416 return 0; 417 abort_with_some_ntfy_blocks: 418 for (j = 0; j < i; j++) { 419 struct gve_notify_block *block = &priv->ntfy_blocks[j]; 420 int msix_idx = j; 421 422 irq_set_affinity_hint(priv->msix_vectors[msix_idx].vector, 423 NULL); 424 free_irq(priv->msix_vectors[msix_idx].vector, block); 425 } 426 kvfree(priv->ntfy_blocks); 427 priv->ntfy_blocks = NULL; 428 abort_with_irq_db_indices: 429 dma_free_coherent(&priv->pdev->dev, priv->num_ntfy_blks * 430 sizeof(*priv->irq_db_indices), 431 priv->irq_db_indices, priv->irq_db_indices_bus); 432 priv->irq_db_indices = NULL; 433 abort_with_mgmt_vector: 434 free_irq(priv->msix_vectors[priv->mgmt_msix_idx].vector, priv); 435 abort_with_msix_enabled: 436 pci_disable_msix(priv->pdev); 437 abort_with_msix_vectors: 438 kvfree(priv->msix_vectors); 439 priv->msix_vectors = NULL; 440 return err; 441 } 442 443 static void gve_free_notify_blocks(struct gve_priv *priv) 444 { 445 int i; 446 447 if (!priv->msix_vectors) 448 return; 449 450 /* Free the irqs */ 451 for (i = 0; i < priv->num_ntfy_blks; i++) { 452 struct gve_notify_block *block = &priv->ntfy_blocks[i]; 453 int msix_idx = i; 454 455 irq_set_affinity_hint(priv->msix_vectors[msix_idx].vector, 456 NULL); 457 free_irq(priv->msix_vectors[msix_idx].vector, block); 458 } 459 free_irq(priv->msix_vectors[priv->mgmt_msix_idx].vector, priv); 460 kvfree(priv->ntfy_blocks); 461 priv->ntfy_blocks = NULL; 462 dma_free_coherent(&priv->pdev->dev, priv->num_ntfy_blks * 463 sizeof(*priv->irq_db_indices), 464 priv->irq_db_indices, priv->irq_db_indices_bus); 465 priv->irq_db_indices = NULL; 466 pci_disable_msix(priv->pdev); 467 kvfree(priv->msix_vectors); 468 priv->msix_vectors = NULL; 469 } 470 471 static int gve_setup_device_resources(struct gve_priv *priv) 472 { 473 int err; 474 475 err = gve_alloc_counter_array(priv); 476 if (err) 477 return err; 478 err = gve_alloc_notify_blocks(priv); 479 if (err) 480 goto abort_with_counter; 481 err = gve_alloc_stats_report(priv); 482 if (err) 483 goto abort_with_ntfy_blocks; 484 err = gve_adminq_configure_device_resources(priv, 485 priv->counter_array_bus, 486 priv->num_event_counters, 487 priv->irq_db_indices_bus, 488 priv->num_ntfy_blks); 489 if (unlikely(err)) { 490 dev_err(&priv->pdev->dev, 491 "could not setup device_resources: err=%d\n", err); 492 err = -ENXIO; 493 goto abort_with_stats_report; 494 } 495 496 if (!gve_is_gqi(priv)) { 497 priv->ptype_lut_dqo = kvzalloc(sizeof(*priv->ptype_lut_dqo), 498 GFP_KERNEL); 499 if (!priv->ptype_lut_dqo) { 500 err = -ENOMEM; 501 goto abort_with_stats_report; 502 } 503 err = gve_adminq_get_ptype_map_dqo(priv, priv->ptype_lut_dqo); 504 if (err) { 505 dev_err(&priv->pdev->dev, 506 "Failed to get ptype map: err=%d\n", err); 507 goto abort_with_ptype_lut; 508 } 509 } 510 511 err = gve_adminq_report_stats(priv, priv->stats_report_len, 512 priv->stats_report_bus, 513 GVE_STATS_REPORT_TIMER_PERIOD); 514 if (err) 515 dev_err(&priv->pdev->dev, 516 "Failed to report stats: err=%d\n", err); 517 gve_set_device_resources_ok(priv); 518 return 0; 519 520 abort_with_ptype_lut: 521 kvfree(priv->ptype_lut_dqo); 522 priv->ptype_lut_dqo = NULL; 523 abort_with_stats_report: 524 gve_free_stats_report(priv); 525 abort_with_ntfy_blocks: 526 gve_free_notify_blocks(priv); 527 abort_with_counter: 528 gve_free_counter_array(priv); 529 530 return err; 531 } 532 533 static void gve_trigger_reset(struct gve_priv *priv); 534 535 static void gve_teardown_device_resources(struct gve_priv *priv) 536 { 537 int err; 538 539 /* Tell device its resources are being freed */ 540 if (gve_get_device_resources_ok(priv)) { 541 /* detach the stats report */ 542 err = gve_adminq_report_stats(priv, 0, 0x0, GVE_STATS_REPORT_TIMER_PERIOD); 543 if (err) { 544 dev_err(&priv->pdev->dev, 545 "Failed to detach stats report: err=%d\n", err); 546 gve_trigger_reset(priv); 547 } 548 err = gve_adminq_deconfigure_device_resources(priv); 549 if (err) { 550 dev_err(&priv->pdev->dev, 551 "Could not deconfigure device resources: err=%d\n", 552 err); 553 gve_trigger_reset(priv); 554 } 555 } 556 557 kvfree(priv->ptype_lut_dqo); 558 priv->ptype_lut_dqo = NULL; 559 560 gve_free_counter_array(priv); 561 gve_free_notify_blocks(priv); 562 gve_free_stats_report(priv); 563 gve_clear_device_resources_ok(priv); 564 } 565 566 static void gve_add_napi(struct gve_priv *priv, int ntfy_idx, 567 int (*gve_poll)(struct napi_struct *, int)) 568 { 569 struct gve_notify_block *block = &priv->ntfy_blocks[ntfy_idx]; 570 571 netif_napi_add(priv->dev, &block->napi, gve_poll); 572 } 573 574 static void gve_remove_napi(struct gve_priv *priv, int ntfy_idx) 575 { 576 struct gve_notify_block *block = &priv->ntfy_blocks[ntfy_idx]; 577 578 netif_napi_del(&block->napi); 579 } 580 581 static int gve_register_xdp_qpls(struct gve_priv *priv) 582 { 583 int start_id; 584 int err; 585 int i; 586 587 start_id = gve_tx_qpl_id(priv, gve_xdp_tx_start_queue_id(priv)); 588 for (i = start_id; i < start_id + gve_num_xdp_qpls(priv); i++) { 589 err = gve_adminq_register_page_list(priv, &priv->qpls[i]); 590 if (err) { 591 netif_err(priv, drv, priv->dev, 592 "failed to register queue page list %d\n", 593 priv->qpls[i].id); 594 /* This failure will trigger a reset - no need to clean 595 * up 596 */ 597 return err; 598 } 599 } 600 return 0; 601 } 602 603 static int gve_register_qpls(struct gve_priv *priv) 604 { 605 int start_id; 606 int err; 607 int i; 608 609 start_id = gve_tx_start_qpl_id(priv); 610 for (i = start_id; i < start_id + gve_num_tx_qpls(priv); i++) { 611 err = gve_adminq_register_page_list(priv, &priv->qpls[i]); 612 if (err) { 613 netif_err(priv, drv, priv->dev, 614 "failed to register queue page list %d\n", 615 priv->qpls[i].id); 616 /* This failure will trigger a reset - no need to clean 617 * up 618 */ 619 return err; 620 } 621 } 622 623 start_id = gve_rx_start_qpl_id(priv); 624 for (i = start_id; i < start_id + gve_num_rx_qpls(priv); i++) { 625 err = gve_adminq_register_page_list(priv, &priv->qpls[i]); 626 if (err) { 627 netif_err(priv, drv, priv->dev, 628 "failed to register queue page list %d\n", 629 priv->qpls[i].id); 630 /* This failure will trigger a reset - no need to clean 631 * up 632 */ 633 return err; 634 } 635 } 636 return 0; 637 } 638 639 static int gve_unregister_xdp_qpls(struct gve_priv *priv) 640 { 641 int start_id; 642 int err; 643 int i; 644 645 start_id = gve_tx_qpl_id(priv, gve_xdp_tx_start_queue_id(priv)); 646 for (i = start_id; i < start_id + gve_num_xdp_qpls(priv); i++) { 647 err = gve_adminq_unregister_page_list(priv, priv->qpls[i].id); 648 /* This failure will trigger a reset - no need to clean up */ 649 if (err) { 650 netif_err(priv, drv, priv->dev, 651 "Failed to unregister queue page list %d\n", 652 priv->qpls[i].id); 653 return err; 654 } 655 } 656 return 0; 657 } 658 659 static int gve_unregister_qpls(struct gve_priv *priv) 660 { 661 int start_id; 662 int err; 663 int i; 664 665 start_id = gve_tx_start_qpl_id(priv); 666 for (i = start_id; i < start_id + gve_num_tx_qpls(priv); i++) { 667 err = gve_adminq_unregister_page_list(priv, priv->qpls[i].id); 668 /* This failure will trigger a reset - no need to clean up */ 669 if (err) { 670 netif_err(priv, drv, priv->dev, 671 "Failed to unregister queue page list %d\n", 672 priv->qpls[i].id); 673 return err; 674 } 675 } 676 677 start_id = gve_rx_start_qpl_id(priv); 678 for (i = start_id; i < start_id + gve_num_rx_qpls(priv); i++) { 679 err = gve_adminq_unregister_page_list(priv, priv->qpls[i].id); 680 /* This failure will trigger a reset - no need to clean up */ 681 if (err) { 682 netif_err(priv, drv, priv->dev, 683 "Failed to unregister queue page list %d\n", 684 priv->qpls[i].id); 685 return err; 686 } 687 } 688 return 0; 689 } 690 691 static int gve_create_xdp_rings(struct gve_priv *priv) 692 { 693 int err; 694 695 err = gve_adminq_create_tx_queues(priv, 696 gve_xdp_tx_start_queue_id(priv), 697 priv->num_xdp_queues); 698 if (err) { 699 netif_err(priv, drv, priv->dev, "failed to create %d XDP tx queues\n", 700 priv->num_xdp_queues); 701 /* This failure will trigger a reset - no need to clean 702 * up 703 */ 704 return err; 705 } 706 netif_dbg(priv, drv, priv->dev, "created %d XDP tx queues\n", 707 priv->num_xdp_queues); 708 709 return 0; 710 } 711 712 static int gve_create_rings(struct gve_priv *priv) 713 { 714 int num_tx_queues = gve_num_tx_queues(priv); 715 int err; 716 int i; 717 718 err = gve_adminq_create_tx_queues(priv, 0, num_tx_queues); 719 if (err) { 720 netif_err(priv, drv, priv->dev, "failed to create %d tx queues\n", 721 num_tx_queues); 722 /* This failure will trigger a reset - no need to clean 723 * up 724 */ 725 return err; 726 } 727 netif_dbg(priv, drv, priv->dev, "created %d tx queues\n", 728 num_tx_queues); 729 730 err = gve_adminq_create_rx_queues(priv, priv->rx_cfg.num_queues); 731 if (err) { 732 netif_err(priv, drv, priv->dev, "failed to create %d rx queues\n", 733 priv->rx_cfg.num_queues); 734 /* This failure will trigger a reset - no need to clean 735 * up 736 */ 737 return err; 738 } 739 netif_dbg(priv, drv, priv->dev, "created %d rx queues\n", 740 priv->rx_cfg.num_queues); 741 742 if (gve_is_gqi(priv)) { 743 /* Rx data ring has been prefilled with packet buffers at queue 744 * allocation time. 745 * 746 * Write the doorbell to provide descriptor slots and packet 747 * buffers to the NIC. 748 */ 749 for (i = 0; i < priv->rx_cfg.num_queues; i++) 750 gve_rx_write_doorbell(priv, &priv->rx[i]); 751 } else { 752 for (i = 0; i < priv->rx_cfg.num_queues; i++) { 753 /* Post buffers and ring doorbell. */ 754 gve_rx_post_buffers_dqo(&priv->rx[i]); 755 } 756 } 757 758 return 0; 759 } 760 761 static void add_napi_init_xdp_sync_stats(struct gve_priv *priv, 762 int (*napi_poll)(struct napi_struct *napi, 763 int budget)) 764 { 765 int start_id = gve_xdp_tx_start_queue_id(priv); 766 int i; 767 768 /* Add xdp tx napi & init sync stats*/ 769 for (i = start_id; i < start_id + priv->num_xdp_queues; i++) { 770 int ntfy_idx = gve_tx_idx_to_ntfy(priv, i); 771 772 u64_stats_init(&priv->tx[i].statss); 773 priv->tx[i].ntfy_id = ntfy_idx; 774 gve_add_napi(priv, ntfy_idx, napi_poll); 775 } 776 } 777 778 static void add_napi_init_sync_stats(struct gve_priv *priv, 779 int (*napi_poll)(struct napi_struct *napi, 780 int budget)) 781 { 782 int i; 783 784 /* Add tx napi & init sync stats*/ 785 for (i = 0; i < gve_num_tx_queues(priv); i++) { 786 int ntfy_idx = gve_tx_idx_to_ntfy(priv, i); 787 788 u64_stats_init(&priv->tx[i].statss); 789 priv->tx[i].ntfy_id = ntfy_idx; 790 gve_add_napi(priv, ntfy_idx, napi_poll); 791 } 792 /* Add rx napi & init sync stats*/ 793 for (i = 0; i < priv->rx_cfg.num_queues; i++) { 794 int ntfy_idx = gve_rx_idx_to_ntfy(priv, i); 795 796 u64_stats_init(&priv->rx[i].statss); 797 priv->rx[i].ntfy_id = ntfy_idx; 798 gve_add_napi(priv, ntfy_idx, napi_poll); 799 } 800 } 801 802 static void gve_tx_free_rings(struct gve_priv *priv, int start_id, int num_rings) 803 { 804 if (gve_is_gqi(priv)) { 805 gve_tx_free_rings_gqi(priv, start_id, num_rings); 806 } else { 807 gve_tx_free_rings_dqo(priv); 808 } 809 } 810 811 static int gve_alloc_xdp_rings(struct gve_priv *priv) 812 { 813 int start_id; 814 int err = 0; 815 816 if (!priv->num_xdp_queues) 817 return 0; 818 819 start_id = gve_xdp_tx_start_queue_id(priv); 820 err = gve_tx_alloc_rings(priv, start_id, priv->num_xdp_queues); 821 if (err) 822 return err; 823 add_napi_init_xdp_sync_stats(priv, gve_napi_poll); 824 825 return 0; 826 } 827 828 static int gve_alloc_rings(struct gve_priv *priv) 829 { 830 int err; 831 832 /* Setup tx rings */ 833 priv->tx = kvcalloc(priv->tx_cfg.max_queues, sizeof(*priv->tx), 834 GFP_KERNEL); 835 if (!priv->tx) 836 return -ENOMEM; 837 838 if (gve_is_gqi(priv)) 839 err = gve_tx_alloc_rings(priv, 0, gve_num_tx_queues(priv)); 840 else 841 err = gve_tx_alloc_rings_dqo(priv); 842 if (err) 843 goto free_tx; 844 845 /* Setup rx rings */ 846 priv->rx = kvcalloc(priv->rx_cfg.max_queues, sizeof(*priv->rx), 847 GFP_KERNEL); 848 if (!priv->rx) { 849 err = -ENOMEM; 850 goto free_tx_queue; 851 } 852 853 if (gve_is_gqi(priv)) 854 err = gve_rx_alloc_rings(priv); 855 else 856 err = gve_rx_alloc_rings_dqo(priv); 857 if (err) 858 goto free_rx; 859 860 if (gve_is_gqi(priv)) 861 add_napi_init_sync_stats(priv, gve_napi_poll); 862 else 863 add_napi_init_sync_stats(priv, gve_napi_poll_dqo); 864 865 return 0; 866 867 free_rx: 868 kvfree(priv->rx); 869 priv->rx = NULL; 870 free_tx_queue: 871 gve_tx_free_rings(priv, 0, gve_num_tx_queues(priv)); 872 free_tx: 873 kvfree(priv->tx); 874 priv->tx = NULL; 875 return err; 876 } 877 878 static int gve_destroy_xdp_rings(struct gve_priv *priv) 879 { 880 int start_id; 881 int err; 882 883 start_id = gve_xdp_tx_start_queue_id(priv); 884 err = gve_adminq_destroy_tx_queues(priv, 885 start_id, 886 priv->num_xdp_queues); 887 if (err) { 888 netif_err(priv, drv, priv->dev, 889 "failed to destroy XDP queues\n"); 890 /* This failure will trigger a reset - no need to clean up */ 891 return err; 892 } 893 netif_dbg(priv, drv, priv->dev, "destroyed XDP queues\n"); 894 895 return 0; 896 } 897 898 static int gve_destroy_rings(struct gve_priv *priv) 899 { 900 int num_tx_queues = gve_num_tx_queues(priv); 901 int err; 902 903 err = gve_adminq_destroy_tx_queues(priv, 0, num_tx_queues); 904 if (err) { 905 netif_err(priv, drv, priv->dev, 906 "failed to destroy tx queues\n"); 907 /* This failure will trigger a reset - no need to clean up */ 908 return err; 909 } 910 netif_dbg(priv, drv, priv->dev, "destroyed tx queues\n"); 911 err = gve_adminq_destroy_rx_queues(priv, priv->rx_cfg.num_queues); 912 if (err) { 913 netif_err(priv, drv, priv->dev, 914 "failed to destroy rx queues\n"); 915 /* This failure will trigger a reset - no need to clean up */ 916 return err; 917 } 918 netif_dbg(priv, drv, priv->dev, "destroyed rx queues\n"); 919 return 0; 920 } 921 922 static void gve_rx_free_rings(struct gve_priv *priv) 923 { 924 if (gve_is_gqi(priv)) 925 gve_rx_free_rings_gqi(priv); 926 else 927 gve_rx_free_rings_dqo(priv); 928 } 929 930 static void gve_free_xdp_rings(struct gve_priv *priv) 931 { 932 int ntfy_idx, start_id; 933 int i; 934 935 start_id = gve_xdp_tx_start_queue_id(priv); 936 if (priv->tx) { 937 for (i = start_id; i < start_id + priv->num_xdp_queues; i++) { 938 ntfy_idx = gve_tx_idx_to_ntfy(priv, i); 939 gve_remove_napi(priv, ntfy_idx); 940 } 941 gve_tx_free_rings(priv, start_id, priv->num_xdp_queues); 942 } 943 } 944 945 static void gve_free_rings(struct gve_priv *priv) 946 { 947 int num_tx_queues = gve_num_tx_queues(priv); 948 int ntfy_idx; 949 int i; 950 951 if (priv->tx) { 952 for (i = 0; i < num_tx_queues; i++) { 953 ntfy_idx = gve_tx_idx_to_ntfy(priv, i); 954 gve_remove_napi(priv, ntfy_idx); 955 } 956 gve_tx_free_rings(priv, 0, num_tx_queues); 957 kvfree(priv->tx); 958 priv->tx = NULL; 959 } 960 if (priv->rx) { 961 for (i = 0; i < priv->rx_cfg.num_queues; i++) { 962 ntfy_idx = gve_rx_idx_to_ntfy(priv, i); 963 gve_remove_napi(priv, ntfy_idx); 964 } 965 gve_rx_free_rings(priv); 966 kvfree(priv->rx); 967 priv->rx = NULL; 968 } 969 } 970 971 int gve_alloc_page(struct gve_priv *priv, struct device *dev, 972 struct page **page, dma_addr_t *dma, 973 enum dma_data_direction dir, gfp_t gfp_flags) 974 { 975 *page = alloc_page(gfp_flags); 976 if (!*page) { 977 priv->page_alloc_fail++; 978 return -ENOMEM; 979 } 980 *dma = dma_map_page(dev, *page, 0, PAGE_SIZE, dir); 981 if (dma_mapping_error(dev, *dma)) { 982 priv->dma_mapping_error++; 983 put_page(*page); 984 return -ENOMEM; 985 } 986 return 0; 987 } 988 989 static int gve_alloc_queue_page_list(struct gve_priv *priv, u32 id, 990 int pages) 991 { 992 struct gve_queue_page_list *qpl = &priv->qpls[id]; 993 int err; 994 int i; 995 996 if (pages + priv->num_registered_pages > priv->max_registered_pages) { 997 netif_err(priv, drv, priv->dev, 998 "Reached max number of registered pages %llu > %llu\n", 999 pages + priv->num_registered_pages, 1000 priv->max_registered_pages); 1001 return -EINVAL; 1002 } 1003 1004 qpl->id = id; 1005 qpl->num_entries = 0; 1006 qpl->pages = kvcalloc(pages, sizeof(*qpl->pages), GFP_KERNEL); 1007 /* caller handles clean up */ 1008 if (!qpl->pages) 1009 return -ENOMEM; 1010 qpl->page_buses = kvcalloc(pages, sizeof(*qpl->page_buses), GFP_KERNEL); 1011 /* caller handles clean up */ 1012 if (!qpl->page_buses) 1013 return -ENOMEM; 1014 1015 for (i = 0; i < pages; i++) { 1016 err = gve_alloc_page(priv, &priv->pdev->dev, &qpl->pages[i], 1017 &qpl->page_buses[i], 1018 gve_qpl_dma_dir(priv, id), GFP_KERNEL); 1019 /* caller handles clean up */ 1020 if (err) 1021 return -ENOMEM; 1022 qpl->num_entries++; 1023 } 1024 priv->num_registered_pages += pages; 1025 1026 return 0; 1027 } 1028 1029 void gve_free_page(struct device *dev, struct page *page, dma_addr_t dma, 1030 enum dma_data_direction dir) 1031 { 1032 if (!dma_mapping_error(dev, dma)) 1033 dma_unmap_page(dev, dma, PAGE_SIZE, dir); 1034 if (page) 1035 put_page(page); 1036 } 1037 1038 static void gve_free_queue_page_list(struct gve_priv *priv, u32 id) 1039 { 1040 struct gve_queue_page_list *qpl = &priv->qpls[id]; 1041 int i; 1042 1043 if (!qpl->pages) 1044 return; 1045 if (!qpl->page_buses) 1046 goto free_pages; 1047 1048 for (i = 0; i < qpl->num_entries; i++) 1049 gve_free_page(&priv->pdev->dev, qpl->pages[i], 1050 qpl->page_buses[i], gve_qpl_dma_dir(priv, id)); 1051 1052 kvfree(qpl->page_buses); 1053 qpl->page_buses = NULL; 1054 free_pages: 1055 kvfree(qpl->pages); 1056 qpl->pages = NULL; 1057 priv->num_registered_pages -= qpl->num_entries; 1058 } 1059 1060 static int gve_alloc_xdp_qpls(struct gve_priv *priv) 1061 { 1062 int start_id; 1063 int i, j; 1064 int err; 1065 1066 start_id = gve_tx_qpl_id(priv, gve_xdp_tx_start_queue_id(priv)); 1067 for (i = start_id; i < start_id + gve_num_xdp_qpls(priv); i++) { 1068 err = gve_alloc_queue_page_list(priv, i, 1069 priv->tx_pages_per_qpl); 1070 if (err) 1071 goto free_qpls; 1072 } 1073 1074 return 0; 1075 1076 free_qpls: 1077 for (j = start_id; j <= i; j++) 1078 gve_free_queue_page_list(priv, j); 1079 return err; 1080 } 1081 1082 static int gve_alloc_qpls(struct gve_priv *priv) 1083 { 1084 int max_queues = priv->tx_cfg.max_queues + priv->rx_cfg.max_queues; 1085 int page_count; 1086 int start_id; 1087 int i, j; 1088 int err; 1089 1090 if (!gve_is_qpl(priv)) 1091 return 0; 1092 1093 priv->qpls = kvcalloc(max_queues, sizeof(*priv->qpls), GFP_KERNEL); 1094 if (!priv->qpls) 1095 return -ENOMEM; 1096 1097 start_id = gve_tx_start_qpl_id(priv); 1098 page_count = priv->tx_pages_per_qpl; 1099 for (i = start_id; i < start_id + gve_num_tx_qpls(priv); i++) { 1100 err = gve_alloc_queue_page_list(priv, i, 1101 page_count); 1102 if (err) 1103 goto free_qpls; 1104 } 1105 1106 start_id = gve_rx_start_qpl_id(priv); 1107 1108 /* For GQI_QPL number of pages allocated have 1:1 relationship with 1109 * number of descriptors. For DQO, number of pages required are 1110 * more than descriptors (because of out of order completions). 1111 */ 1112 page_count = priv->queue_format == GVE_GQI_QPL_FORMAT ? 1113 priv->rx_data_slot_cnt : priv->rx_pages_per_qpl; 1114 for (i = start_id; i < start_id + gve_num_rx_qpls(priv); i++) { 1115 err = gve_alloc_queue_page_list(priv, i, 1116 page_count); 1117 if (err) 1118 goto free_qpls; 1119 } 1120 1121 priv->qpl_cfg.qpl_map_size = BITS_TO_LONGS(max_queues) * 1122 sizeof(unsigned long) * BITS_PER_BYTE; 1123 priv->qpl_cfg.qpl_id_map = kvcalloc(BITS_TO_LONGS(max_queues), 1124 sizeof(unsigned long), GFP_KERNEL); 1125 if (!priv->qpl_cfg.qpl_id_map) { 1126 err = -ENOMEM; 1127 goto free_qpls; 1128 } 1129 1130 return 0; 1131 1132 free_qpls: 1133 for (j = 0; j <= i; j++) 1134 gve_free_queue_page_list(priv, j); 1135 kvfree(priv->qpls); 1136 priv->qpls = NULL; 1137 return err; 1138 } 1139 1140 static void gve_free_xdp_qpls(struct gve_priv *priv) 1141 { 1142 int start_id; 1143 int i; 1144 1145 start_id = gve_tx_qpl_id(priv, gve_xdp_tx_start_queue_id(priv)); 1146 for (i = start_id; i < start_id + gve_num_xdp_qpls(priv); i++) 1147 gve_free_queue_page_list(priv, i); 1148 } 1149 1150 static void gve_free_qpls(struct gve_priv *priv) 1151 { 1152 int max_queues = priv->tx_cfg.max_queues + priv->rx_cfg.max_queues; 1153 int i; 1154 1155 if (!priv->qpls) 1156 return; 1157 1158 kvfree(priv->qpl_cfg.qpl_id_map); 1159 priv->qpl_cfg.qpl_id_map = NULL; 1160 1161 for (i = 0; i < max_queues; i++) 1162 gve_free_queue_page_list(priv, i); 1163 1164 kvfree(priv->qpls); 1165 priv->qpls = NULL; 1166 } 1167 1168 /* Use this to schedule a reset when the device is capable of continuing 1169 * to handle other requests in its current state. If it is not, do a reset 1170 * in thread instead. 1171 */ 1172 void gve_schedule_reset(struct gve_priv *priv) 1173 { 1174 gve_set_do_reset(priv); 1175 queue_work(priv->gve_wq, &priv->service_task); 1176 } 1177 1178 static void gve_reset_and_teardown(struct gve_priv *priv, bool was_up); 1179 static int gve_reset_recovery(struct gve_priv *priv, bool was_up); 1180 static void gve_turndown(struct gve_priv *priv); 1181 static void gve_turnup(struct gve_priv *priv); 1182 1183 static int gve_reg_xdp_info(struct gve_priv *priv, struct net_device *dev) 1184 { 1185 struct napi_struct *napi; 1186 struct gve_rx_ring *rx; 1187 int err = 0; 1188 int i, j; 1189 u32 tx_qid; 1190 1191 if (!priv->num_xdp_queues) 1192 return 0; 1193 1194 for (i = 0; i < priv->rx_cfg.num_queues; i++) { 1195 rx = &priv->rx[i]; 1196 napi = &priv->ntfy_blocks[rx->ntfy_id].napi; 1197 1198 err = xdp_rxq_info_reg(&rx->xdp_rxq, dev, i, 1199 napi->napi_id); 1200 if (err) 1201 goto err; 1202 err = xdp_rxq_info_reg_mem_model(&rx->xdp_rxq, 1203 MEM_TYPE_PAGE_SHARED, NULL); 1204 if (err) 1205 goto err; 1206 rx->xsk_pool = xsk_get_pool_from_qid(dev, i); 1207 if (rx->xsk_pool) { 1208 err = xdp_rxq_info_reg(&rx->xsk_rxq, dev, i, 1209 napi->napi_id); 1210 if (err) 1211 goto err; 1212 err = xdp_rxq_info_reg_mem_model(&rx->xsk_rxq, 1213 MEM_TYPE_XSK_BUFF_POOL, NULL); 1214 if (err) 1215 goto err; 1216 xsk_pool_set_rxq_info(rx->xsk_pool, 1217 &rx->xsk_rxq); 1218 } 1219 } 1220 1221 for (i = 0; i < priv->num_xdp_queues; i++) { 1222 tx_qid = gve_xdp_tx_queue_id(priv, i); 1223 priv->tx[tx_qid].xsk_pool = xsk_get_pool_from_qid(dev, i); 1224 } 1225 return 0; 1226 1227 err: 1228 for (j = i; j >= 0; j--) { 1229 rx = &priv->rx[j]; 1230 if (xdp_rxq_info_is_reg(&rx->xdp_rxq)) 1231 xdp_rxq_info_unreg(&rx->xdp_rxq); 1232 if (xdp_rxq_info_is_reg(&rx->xsk_rxq)) 1233 xdp_rxq_info_unreg(&rx->xsk_rxq); 1234 } 1235 return err; 1236 } 1237 1238 static void gve_unreg_xdp_info(struct gve_priv *priv) 1239 { 1240 int i, tx_qid; 1241 1242 if (!priv->num_xdp_queues) 1243 return; 1244 1245 for (i = 0; i < priv->rx_cfg.num_queues; i++) { 1246 struct gve_rx_ring *rx = &priv->rx[i]; 1247 1248 xdp_rxq_info_unreg(&rx->xdp_rxq); 1249 if (rx->xsk_pool) { 1250 xdp_rxq_info_unreg(&rx->xsk_rxq); 1251 rx->xsk_pool = NULL; 1252 } 1253 } 1254 1255 for (i = 0; i < priv->num_xdp_queues; i++) { 1256 tx_qid = gve_xdp_tx_queue_id(priv, i); 1257 priv->tx[tx_qid].xsk_pool = NULL; 1258 } 1259 } 1260 1261 static void gve_drain_page_cache(struct gve_priv *priv) 1262 { 1263 struct page_frag_cache *nc; 1264 int i; 1265 1266 for (i = 0; i < priv->rx_cfg.num_queues; i++) { 1267 nc = &priv->rx[i].page_cache; 1268 if (nc->va) { 1269 __page_frag_cache_drain(virt_to_page(nc->va), 1270 nc->pagecnt_bias); 1271 nc->va = NULL; 1272 } 1273 } 1274 } 1275 1276 static int gve_open(struct net_device *dev) 1277 { 1278 struct gve_priv *priv = netdev_priv(dev); 1279 int err; 1280 1281 if (priv->xdp_prog) 1282 priv->num_xdp_queues = priv->rx_cfg.num_queues; 1283 else 1284 priv->num_xdp_queues = 0; 1285 1286 err = gve_alloc_qpls(priv); 1287 if (err) 1288 return err; 1289 1290 err = gve_alloc_rings(priv); 1291 if (err) 1292 goto free_qpls; 1293 1294 err = netif_set_real_num_tx_queues(dev, priv->tx_cfg.num_queues); 1295 if (err) 1296 goto free_rings; 1297 err = netif_set_real_num_rx_queues(dev, priv->rx_cfg.num_queues); 1298 if (err) 1299 goto free_rings; 1300 1301 err = gve_reg_xdp_info(priv, dev); 1302 if (err) 1303 goto free_rings; 1304 1305 err = gve_register_qpls(priv); 1306 if (err) 1307 goto reset; 1308 1309 if (!gve_is_gqi(priv)) { 1310 /* Hard code this for now. This may be tuned in the future for 1311 * performance. 1312 */ 1313 priv->data_buffer_size_dqo = GVE_RX_BUFFER_SIZE_DQO; 1314 } 1315 err = gve_create_rings(priv); 1316 if (err) 1317 goto reset; 1318 1319 gve_set_device_rings_ok(priv); 1320 1321 if (gve_get_report_stats(priv)) 1322 mod_timer(&priv->stats_report_timer, 1323 round_jiffies(jiffies + 1324 msecs_to_jiffies(priv->stats_report_timer_period))); 1325 1326 gve_turnup(priv); 1327 queue_work(priv->gve_wq, &priv->service_task); 1328 priv->interface_up_cnt++; 1329 return 0; 1330 1331 free_rings: 1332 gve_free_rings(priv); 1333 free_qpls: 1334 gve_free_qpls(priv); 1335 return err; 1336 1337 reset: 1338 /* This must have been called from a reset due to the rtnl lock 1339 * so just return at this point. 1340 */ 1341 if (gve_get_reset_in_progress(priv)) 1342 return err; 1343 /* Otherwise reset before returning */ 1344 gve_reset_and_teardown(priv, true); 1345 /* if this fails there is nothing we can do so just ignore the return */ 1346 gve_reset_recovery(priv, false); 1347 /* return the original error */ 1348 return err; 1349 } 1350 1351 static int gve_close(struct net_device *dev) 1352 { 1353 struct gve_priv *priv = netdev_priv(dev); 1354 int err; 1355 1356 netif_carrier_off(dev); 1357 if (gve_get_device_rings_ok(priv)) { 1358 gve_turndown(priv); 1359 gve_drain_page_cache(priv); 1360 err = gve_destroy_rings(priv); 1361 if (err) 1362 goto err; 1363 err = gve_unregister_qpls(priv); 1364 if (err) 1365 goto err; 1366 gve_clear_device_rings_ok(priv); 1367 } 1368 del_timer_sync(&priv->stats_report_timer); 1369 1370 gve_unreg_xdp_info(priv); 1371 gve_free_rings(priv); 1372 gve_free_qpls(priv); 1373 priv->interface_down_cnt++; 1374 return 0; 1375 1376 err: 1377 /* This must have been called from a reset due to the rtnl lock 1378 * so just return at this point. 1379 */ 1380 if (gve_get_reset_in_progress(priv)) 1381 return err; 1382 /* Otherwise reset before returning */ 1383 gve_reset_and_teardown(priv, true); 1384 return gve_reset_recovery(priv, false); 1385 } 1386 1387 static int gve_remove_xdp_queues(struct gve_priv *priv) 1388 { 1389 int err; 1390 1391 err = gve_destroy_xdp_rings(priv); 1392 if (err) 1393 return err; 1394 1395 err = gve_unregister_xdp_qpls(priv); 1396 if (err) 1397 return err; 1398 1399 gve_unreg_xdp_info(priv); 1400 gve_free_xdp_rings(priv); 1401 gve_free_xdp_qpls(priv); 1402 priv->num_xdp_queues = 0; 1403 return 0; 1404 } 1405 1406 static int gve_add_xdp_queues(struct gve_priv *priv) 1407 { 1408 int err; 1409 1410 priv->num_xdp_queues = priv->tx_cfg.num_queues; 1411 1412 err = gve_alloc_xdp_qpls(priv); 1413 if (err) 1414 goto err; 1415 1416 err = gve_alloc_xdp_rings(priv); 1417 if (err) 1418 goto free_xdp_qpls; 1419 1420 err = gve_reg_xdp_info(priv, priv->dev); 1421 if (err) 1422 goto free_xdp_rings; 1423 1424 err = gve_register_xdp_qpls(priv); 1425 if (err) 1426 goto free_xdp_rings; 1427 1428 err = gve_create_xdp_rings(priv); 1429 if (err) 1430 goto free_xdp_rings; 1431 1432 return 0; 1433 1434 free_xdp_rings: 1435 gve_free_xdp_rings(priv); 1436 free_xdp_qpls: 1437 gve_free_xdp_qpls(priv); 1438 err: 1439 priv->num_xdp_queues = 0; 1440 return err; 1441 } 1442 1443 static void gve_handle_link_status(struct gve_priv *priv, bool link_status) 1444 { 1445 if (!gve_get_napi_enabled(priv)) 1446 return; 1447 1448 if (link_status == netif_carrier_ok(priv->dev)) 1449 return; 1450 1451 if (link_status) { 1452 netdev_info(priv->dev, "Device link is up.\n"); 1453 netif_carrier_on(priv->dev); 1454 } else { 1455 netdev_info(priv->dev, "Device link is down.\n"); 1456 netif_carrier_off(priv->dev); 1457 } 1458 } 1459 1460 static int gve_set_xdp(struct gve_priv *priv, struct bpf_prog *prog, 1461 struct netlink_ext_ack *extack) 1462 { 1463 struct bpf_prog *old_prog; 1464 int err = 0; 1465 u32 status; 1466 1467 old_prog = READ_ONCE(priv->xdp_prog); 1468 if (!netif_carrier_ok(priv->dev)) { 1469 WRITE_ONCE(priv->xdp_prog, prog); 1470 if (old_prog) 1471 bpf_prog_put(old_prog); 1472 return 0; 1473 } 1474 1475 gve_turndown(priv); 1476 if (!old_prog && prog) { 1477 // Allocate XDP TX queues if an XDP program is 1478 // being installed 1479 err = gve_add_xdp_queues(priv); 1480 if (err) 1481 goto out; 1482 } else if (old_prog && !prog) { 1483 // Remove XDP TX queues if an XDP program is 1484 // being uninstalled 1485 err = gve_remove_xdp_queues(priv); 1486 if (err) 1487 goto out; 1488 } 1489 WRITE_ONCE(priv->xdp_prog, prog); 1490 if (old_prog) 1491 bpf_prog_put(old_prog); 1492 1493 out: 1494 gve_turnup(priv); 1495 status = ioread32be(&priv->reg_bar0->device_status); 1496 gve_handle_link_status(priv, GVE_DEVICE_STATUS_LINK_STATUS_MASK & status); 1497 return err; 1498 } 1499 1500 static int gve_xsk_pool_enable(struct net_device *dev, 1501 struct xsk_buff_pool *pool, 1502 u16 qid) 1503 { 1504 struct gve_priv *priv = netdev_priv(dev); 1505 struct napi_struct *napi; 1506 struct gve_rx_ring *rx; 1507 int tx_qid; 1508 int err; 1509 1510 if (qid >= priv->rx_cfg.num_queues) { 1511 dev_err(&priv->pdev->dev, "xsk pool invalid qid %d", qid); 1512 return -EINVAL; 1513 } 1514 if (xsk_pool_get_rx_frame_size(pool) < 1515 priv->dev->max_mtu + sizeof(struct ethhdr)) { 1516 dev_err(&priv->pdev->dev, "xsk pool frame_len too small"); 1517 return -EINVAL; 1518 } 1519 1520 err = xsk_pool_dma_map(pool, &priv->pdev->dev, 1521 DMA_ATTR_SKIP_CPU_SYNC | DMA_ATTR_WEAK_ORDERING); 1522 if (err) 1523 return err; 1524 1525 /* If XDP prog is not installed, return */ 1526 if (!priv->xdp_prog) 1527 return 0; 1528 1529 rx = &priv->rx[qid]; 1530 napi = &priv->ntfy_blocks[rx->ntfy_id].napi; 1531 err = xdp_rxq_info_reg(&rx->xsk_rxq, dev, qid, napi->napi_id); 1532 if (err) 1533 goto err; 1534 1535 err = xdp_rxq_info_reg_mem_model(&rx->xsk_rxq, 1536 MEM_TYPE_XSK_BUFF_POOL, NULL); 1537 if (err) 1538 goto err; 1539 1540 xsk_pool_set_rxq_info(pool, &rx->xsk_rxq); 1541 rx->xsk_pool = pool; 1542 1543 tx_qid = gve_xdp_tx_queue_id(priv, qid); 1544 priv->tx[tx_qid].xsk_pool = pool; 1545 1546 return 0; 1547 err: 1548 if (xdp_rxq_info_is_reg(&rx->xsk_rxq)) 1549 xdp_rxq_info_unreg(&rx->xsk_rxq); 1550 1551 xsk_pool_dma_unmap(pool, 1552 DMA_ATTR_SKIP_CPU_SYNC | DMA_ATTR_WEAK_ORDERING); 1553 return err; 1554 } 1555 1556 static int gve_xsk_pool_disable(struct net_device *dev, 1557 u16 qid) 1558 { 1559 struct gve_priv *priv = netdev_priv(dev); 1560 struct napi_struct *napi_rx; 1561 struct napi_struct *napi_tx; 1562 struct xsk_buff_pool *pool; 1563 int tx_qid; 1564 1565 pool = xsk_get_pool_from_qid(dev, qid); 1566 if (!pool) 1567 return -EINVAL; 1568 if (qid >= priv->rx_cfg.num_queues) 1569 return -EINVAL; 1570 1571 /* If XDP prog is not installed, unmap DMA and return */ 1572 if (!priv->xdp_prog) 1573 goto done; 1574 1575 tx_qid = gve_xdp_tx_queue_id(priv, qid); 1576 if (!netif_running(dev)) { 1577 priv->rx[qid].xsk_pool = NULL; 1578 xdp_rxq_info_unreg(&priv->rx[qid].xsk_rxq); 1579 priv->tx[tx_qid].xsk_pool = NULL; 1580 goto done; 1581 } 1582 1583 napi_rx = &priv->ntfy_blocks[priv->rx[qid].ntfy_id].napi; 1584 napi_disable(napi_rx); /* make sure current rx poll is done */ 1585 1586 napi_tx = &priv->ntfy_blocks[priv->tx[tx_qid].ntfy_id].napi; 1587 napi_disable(napi_tx); /* make sure current tx poll is done */ 1588 1589 priv->rx[qid].xsk_pool = NULL; 1590 xdp_rxq_info_unreg(&priv->rx[qid].xsk_rxq); 1591 priv->tx[tx_qid].xsk_pool = NULL; 1592 smp_mb(); /* Make sure it is visible to the workers on datapath */ 1593 1594 napi_enable(napi_rx); 1595 if (gve_rx_work_pending(&priv->rx[qid])) 1596 napi_schedule(napi_rx); 1597 1598 napi_enable(napi_tx); 1599 if (gve_tx_clean_pending(priv, &priv->tx[tx_qid])) 1600 napi_schedule(napi_tx); 1601 1602 done: 1603 xsk_pool_dma_unmap(pool, 1604 DMA_ATTR_SKIP_CPU_SYNC | DMA_ATTR_WEAK_ORDERING); 1605 return 0; 1606 } 1607 1608 static int gve_xsk_wakeup(struct net_device *dev, u32 queue_id, u32 flags) 1609 { 1610 struct gve_priv *priv = netdev_priv(dev); 1611 int tx_queue_id = gve_xdp_tx_queue_id(priv, queue_id); 1612 1613 if (queue_id >= priv->rx_cfg.num_queues || !priv->xdp_prog) 1614 return -EINVAL; 1615 1616 if (flags & XDP_WAKEUP_TX) { 1617 struct gve_tx_ring *tx = &priv->tx[tx_queue_id]; 1618 struct napi_struct *napi = 1619 &priv->ntfy_blocks[tx->ntfy_id].napi; 1620 1621 if (!napi_if_scheduled_mark_missed(napi)) { 1622 /* Call local_bh_enable to trigger SoftIRQ processing */ 1623 local_bh_disable(); 1624 napi_schedule(napi); 1625 local_bh_enable(); 1626 } 1627 1628 tx->xdp_xsk_wakeup++; 1629 } 1630 1631 return 0; 1632 } 1633 1634 static int verify_xdp_configuration(struct net_device *dev) 1635 { 1636 struct gve_priv *priv = netdev_priv(dev); 1637 1638 if (dev->features & NETIF_F_LRO) { 1639 netdev_warn(dev, "XDP is not supported when LRO is on.\n"); 1640 return -EOPNOTSUPP; 1641 } 1642 1643 if (priv->queue_format != GVE_GQI_QPL_FORMAT) { 1644 netdev_warn(dev, "XDP is not supported in mode %d.\n", 1645 priv->queue_format); 1646 return -EOPNOTSUPP; 1647 } 1648 1649 if (dev->mtu > (PAGE_SIZE / 2) - sizeof(struct ethhdr) - GVE_RX_PAD) { 1650 netdev_warn(dev, "XDP is not supported for mtu %d.\n", 1651 dev->mtu); 1652 return -EOPNOTSUPP; 1653 } 1654 1655 if (priv->rx_cfg.num_queues != priv->tx_cfg.num_queues || 1656 (2 * priv->tx_cfg.num_queues > priv->tx_cfg.max_queues)) { 1657 netdev_warn(dev, "XDP load failed: The number of configured RX queues %d should be equal to the number of configured TX queues %d and the number of configured RX/TX queues should be less than or equal to half the maximum number of RX/TX queues %d", 1658 priv->rx_cfg.num_queues, 1659 priv->tx_cfg.num_queues, 1660 priv->tx_cfg.max_queues); 1661 return -EINVAL; 1662 } 1663 return 0; 1664 } 1665 1666 static int gve_xdp(struct net_device *dev, struct netdev_bpf *xdp) 1667 { 1668 struct gve_priv *priv = netdev_priv(dev); 1669 int err; 1670 1671 err = verify_xdp_configuration(dev); 1672 if (err) 1673 return err; 1674 switch (xdp->command) { 1675 case XDP_SETUP_PROG: 1676 return gve_set_xdp(priv, xdp->prog, xdp->extack); 1677 case XDP_SETUP_XSK_POOL: 1678 if (xdp->xsk.pool) 1679 return gve_xsk_pool_enable(dev, xdp->xsk.pool, xdp->xsk.queue_id); 1680 else 1681 return gve_xsk_pool_disable(dev, xdp->xsk.queue_id); 1682 default: 1683 return -EINVAL; 1684 } 1685 } 1686 1687 int gve_adjust_queues(struct gve_priv *priv, 1688 struct gve_queue_config new_rx_config, 1689 struct gve_queue_config new_tx_config) 1690 { 1691 int err; 1692 1693 if (netif_carrier_ok(priv->dev)) { 1694 /* To make this process as simple as possible we teardown the 1695 * device, set the new configuration, and then bring the device 1696 * up again. 1697 */ 1698 err = gve_close(priv->dev); 1699 /* we have already tried to reset in close, 1700 * just fail at this point 1701 */ 1702 if (err) 1703 return err; 1704 priv->tx_cfg = new_tx_config; 1705 priv->rx_cfg = new_rx_config; 1706 1707 err = gve_open(priv->dev); 1708 if (err) 1709 goto err; 1710 1711 return 0; 1712 } 1713 /* Set the config for the next up. */ 1714 priv->tx_cfg = new_tx_config; 1715 priv->rx_cfg = new_rx_config; 1716 1717 return 0; 1718 err: 1719 netif_err(priv, drv, priv->dev, 1720 "Adjust queues failed! !!! DISABLING ALL QUEUES !!!\n"); 1721 gve_turndown(priv); 1722 return err; 1723 } 1724 1725 static void gve_turndown(struct gve_priv *priv) 1726 { 1727 int idx; 1728 1729 if (netif_carrier_ok(priv->dev)) 1730 netif_carrier_off(priv->dev); 1731 1732 if (!gve_get_napi_enabled(priv)) 1733 return; 1734 1735 /* Disable napi to prevent more work from coming in */ 1736 for (idx = 0; idx < gve_num_tx_queues(priv); idx++) { 1737 int ntfy_idx = gve_tx_idx_to_ntfy(priv, idx); 1738 struct gve_notify_block *block = &priv->ntfy_blocks[ntfy_idx]; 1739 1740 napi_disable(&block->napi); 1741 } 1742 for (idx = 0; idx < priv->rx_cfg.num_queues; idx++) { 1743 int ntfy_idx = gve_rx_idx_to_ntfy(priv, idx); 1744 struct gve_notify_block *block = &priv->ntfy_blocks[ntfy_idx]; 1745 1746 napi_disable(&block->napi); 1747 } 1748 1749 /* Stop tx queues */ 1750 netif_tx_disable(priv->dev); 1751 1752 gve_clear_napi_enabled(priv); 1753 gve_clear_report_stats(priv); 1754 } 1755 1756 static void gve_turnup(struct gve_priv *priv) 1757 { 1758 int idx; 1759 1760 /* Start the tx queues */ 1761 netif_tx_start_all_queues(priv->dev); 1762 1763 /* Enable napi and unmask interrupts for all queues */ 1764 for (idx = 0; idx < gve_num_tx_queues(priv); idx++) { 1765 int ntfy_idx = gve_tx_idx_to_ntfy(priv, idx); 1766 struct gve_notify_block *block = &priv->ntfy_blocks[ntfy_idx]; 1767 1768 napi_enable(&block->napi); 1769 if (gve_is_gqi(priv)) { 1770 iowrite32be(0, gve_irq_doorbell(priv, block)); 1771 } else { 1772 gve_set_itr_coalesce_usecs_dqo(priv, block, 1773 priv->tx_coalesce_usecs); 1774 } 1775 } 1776 for (idx = 0; idx < priv->rx_cfg.num_queues; idx++) { 1777 int ntfy_idx = gve_rx_idx_to_ntfy(priv, idx); 1778 struct gve_notify_block *block = &priv->ntfy_blocks[ntfy_idx]; 1779 1780 napi_enable(&block->napi); 1781 if (gve_is_gqi(priv)) { 1782 iowrite32be(0, gve_irq_doorbell(priv, block)); 1783 } else { 1784 gve_set_itr_coalesce_usecs_dqo(priv, block, 1785 priv->rx_coalesce_usecs); 1786 } 1787 } 1788 1789 gve_set_napi_enabled(priv); 1790 } 1791 1792 static void gve_tx_timeout(struct net_device *dev, unsigned int txqueue) 1793 { 1794 struct gve_notify_block *block; 1795 struct gve_tx_ring *tx = NULL; 1796 struct gve_priv *priv; 1797 u32 last_nic_done; 1798 u32 current_time; 1799 u32 ntfy_idx; 1800 1801 netdev_info(dev, "Timeout on tx queue, %d", txqueue); 1802 priv = netdev_priv(dev); 1803 if (txqueue > priv->tx_cfg.num_queues) 1804 goto reset; 1805 1806 ntfy_idx = gve_tx_idx_to_ntfy(priv, txqueue); 1807 if (ntfy_idx >= priv->num_ntfy_blks) 1808 goto reset; 1809 1810 block = &priv->ntfy_blocks[ntfy_idx]; 1811 tx = block->tx; 1812 1813 current_time = jiffies_to_msecs(jiffies); 1814 if (tx->last_kick_msec + MIN_TX_TIMEOUT_GAP > current_time) 1815 goto reset; 1816 1817 /* Check to see if there are missed completions, which will allow us to 1818 * kick the queue. 1819 */ 1820 last_nic_done = gve_tx_load_event_counter(priv, tx); 1821 if (last_nic_done - tx->done) { 1822 netdev_info(dev, "Kicking queue %d", txqueue); 1823 iowrite32be(GVE_IRQ_MASK, gve_irq_doorbell(priv, block)); 1824 napi_schedule(&block->napi); 1825 tx->last_kick_msec = current_time; 1826 goto out; 1827 } // Else reset. 1828 1829 reset: 1830 gve_schedule_reset(priv); 1831 1832 out: 1833 if (tx) 1834 tx->queue_timeout++; 1835 priv->tx_timeo_cnt++; 1836 } 1837 1838 static int gve_set_features(struct net_device *netdev, 1839 netdev_features_t features) 1840 { 1841 const netdev_features_t orig_features = netdev->features; 1842 struct gve_priv *priv = netdev_priv(netdev); 1843 int err; 1844 1845 if ((netdev->features & NETIF_F_LRO) != (features & NETIF_F_LRO)) { 1846 netdev->features ^= NETIF_F_LRO; 1847 if (netif_carrier_ok(netdev)) { 1848 /* To make this process as simple as possible we 1849 * teardown the device, set the new configuration, 1850 * and then bring the device up again. 1851 */ 1852 err = gve_close(netdev); 1853 /* We have already tried to reset in close, just fail 1854 * at this point. 1855 */ 1856 if (err) 1857 goto err; 1858 1859 err = gve_open(netdev); 1860 if (err) 1861 goto err; 1862 } 1863 } 1864 1865 return 0; 1866 err: 1867 /* Reverts the change on error. */ 1868 netdev->features = orig_features; 1869 netif_err(priv, drv, netdev, 1870 "Set features failed! !!! DISABLING ALL QUEUES !!!\n"); 1871 return err; 1872 } 1873 1874 static const struct net_device_ops gve_netdev_ops = { 1875 .ndo_start_xmit = gve_start_xmit, 1876 .ndo_open = gve_open, 1877 .ndo_stop = gve_close, 1878 .ndo_get_stats64 = gve_get_stats, 1879 .ndo_tx_timeout = gve_tx_timeout, 1880 .ndo_set_features = gve_set_features, 1881 .ndo_bpf = gve_xdp, 1882 .ndo_xdp_xmit = gve_xdp_xmit, 1883 .ndo_xsk_wakeup = gve_xsk_wakeup, 1884 }; 1885 1886 static void gve_handle_status(struct gve_priv *priv, u32 status) 1887 { 1888 if (GVE_DEVICE_STATUS_RESET_MASK & status) { 1889 dev_info(&priv->pdev->dev, "Device requested reset.\n"); 1890 gve_set_do_reset(priv); 1891 } 1892 if (GVE_DEVICE_STATUS_REPORT_STATS_MASK & status) { 1893 priv->stats_report_trigger_cnt++; 1894 gve_set_do_report_stats(priv); 1895 } 1896 } 1897 1898 static void gve_handle_reset(struct gve_priv *priv) 1899 { 1900 /* A service task will be scheduled at the end of probe to catch any 1901 * resets that need to happen, and we don't want to reset until 1902 * probe is done. 1903 */ 1904 if (gve_get_probe_in_progress(priv)) 1905 return; 1906 1907 if (gve_get_do_reset(priv)) { 1908 rtnl_lock(); 1909 gve_reset(priv, false); 1910 rtnl_unlock(); 1911 } 1912 } 1913 1914 void gve_handle_report_stats(struct gve_priv *priv) 1915 { 1916 struct stats *stats = priv->stats_report->stats; 1917 int idx, stats_idx = 0; 1918 unsigned int start = 0; 1919 u64 tx_bytes; 1920 1921 if (!gve_get_report_stats(priv)) 1922 return; 1923 1924 be64_add_cpu(&priv->stats_report->written_count, 1); 1925 /* tx stats */ 1926 if (priv->tx) { 1927 for (idx = 0; idx < gve_num_tx_queues(priv); idx++) { 1928 u32 last_completion = 0; 1929 u32 tx_frames = 0; 1930 1931 /* DQO doesn't currently support these metrics. */ 1932 if (gve_is_gqi(priv)) { 1933 last_completion = priv->tx[idx].done; 1934 tx_frames = priv->tx[idx].req; 1935 } 1936 1937 do { 1938 start = u64_stats_fetch_begin(&priv->tx[idx].statss); 1939 tx_bytes = priv->tx[idx].bytes_done; 1940 } while (u64_stats_fetch_retry(&priv->tx[idx].statss, start)); 1941 stats[stats_idx++] = (struct stats) { 1942 .stat_name = cpu_to_be32(TX_WAKE_CNT), 1943 .value = cpu_to_be64(priv->tx[idx].wake_queue), 1944 .queue_id = cpu_to_be32(idx), 1945 }; 1946 stats[stats_idx++] = (struct stats) { 1947 .stat_name = cpu_to_be32(TX_STOP_CNT), 1948 .value = cpu_to_be64(priv->tx[idx].stop_queue), 1949 .queue_id = cpu_to_be32(idx), 1950 }; 1951 stats[stats_idx++] = (struct stats) { 1952 .stat_name = cpu_to_be32(TX_FRAMES_SENT), 1953 .value = cpu_to_be64(tx_frames), 1954 .queue_id = cpu_to_be32(idx), 1955 }; 1956 stats[stats_idx++] = (struct stats) { 1957 .stat_name = cpu_to_be32(TX_BYTES_SENT), 1958 .value = cpu_to_be64(tx_bytes), 1959 .queue_id = cpu_to_be32(idx), 1960 }; 1961 stats[stats_idx++] = (struct stats) { 1962 .stat_name = cpu_to_be32(TX_LAST_COMPLETION_PROCESSED), 1963 .value = cpu_to_be64(last_completion), 1964 .queue_id = cpu_to_be32(idx), 1965 }; 1966 stats[stats_idx++] = (struct stats) { 1967 .stat_name = cpu_to_be32(TX_TIMEOUT_CNT), 1968 .value = cpu_to_be64(priv->tx[idx].queue_timeout), 1969 .queue_id = cpu_to_be32(idx), 1970 }; 1971 } 1972 } 1973 /* rx stats */ 1974 if (priv->rx) { 1975 for (idx = 0; idx < priv->rx_cfg.num_queues; idx++) { 1976 stats[stats_idx++] = (struct stats) { 1977 .stat_name = cpu_to_be32(RX_NEXT_EXPECTED_SEQUENCE), 1978 .value = cpu_to_be64(priv->rx[idx].desc.seqno), 1979 .queue_id = cpu_to_be32(idx), 1980 }; 1981 stats[stats_idx++] = (struct stats) { 1982 .stat_name = cpu_to_be32(RX_BUFFERS_POSTED), 1983 .value = cpu_to_be64(priv->rx[0].fill_cnt), 1984 .queue_id = cpu_to_be32(idx), 1985 }; 1986 } 1987 } 1988 } 1989 1990 /* Handle NIC status register changes, reset requests and report stats */ 1991 static void gve_service_task(struct work_struct *work) 1992 { 1993 struct gve_priv *priv = container_of(work, struct gve_priv, 1994 service_task); 1995 u32 status = ioread32be(&priv->reg_bar0->device_status); 1996 1997 gve_handle_status(priv, status); 1998 1999 gve_handle_reset(priv); 2000 gve_handle_link_status(priv, GVE_DEVICE_STATUS_LINK_STATUS_MASK & status); 2001 } 2002 2003 static void gve_set_netdev_xdp_features(struct gve_priv *priv) 2004 { 2005 if (priv->queue_format == GVE_GQI_QPL_FORMAT) { 2006 priv->dev->xdp_features = NETDEV_XDP_ACT_BASIC; 2007 priv->dev->xdp_features |= NETDEV_XDP_ACT_REDIRECT; 2008 priv->dev->xdp_features |= NETDEV_XDP_ACT_NDO_XMIT; 2009 priv->dev->xdp_features |= NETDEV_XDP_ACT_XSK_ZEROCOPY; 2010 } else { 2011 priv->dev->xdp_features = 0; 2012 } 2013 } 2014 2015 static int gve_init_priv(struct gve_priv *priv, bool skip_describe_device) 2016 { 2017 int num_ntfy; 2018 int err; 2019 2020 /* Set up the adminq */ 2021 err = gve_adminq_alloc(&priv->pdev->dev, priv); 2022 if (err) { 2023 dev_err(&priv->pdev->dev, 2024 "Failed to alloc admin queue: err=%d\n", err); 2025 return err; 2026 } 2027 2028 err = gve_verify_driver_compatibility(priv); 2029 if (err) { 2030 dev_err(&priv->pdev->dev, 2031 "Could not verify driver compatibility: err=%d\n", err); 2032 goto err; 2033 } 2034 2035 if (skip_describe_device) 2036 goto setup_device; 2037 2038 priv->queue_format = GVE_QUEUE_FORMAT_UNSPECIFIED; 2039 /* Get the initial information we need from the device */ 2040 err = gve_adminq_describe_device(priv); 2041 if (err) { 2042 dev_err(&priv->pdev->dev, 2043 "Could not get device information: err=%d\n", err); 2044 goto err; 2045 } 2046 priv->dev->mtu = priv->dev->max_mtu; 2047 num_ntfy = pci_msix_vec_count(priv->pdev); 2048 if (num_ntfy <= 0) { 2049 dev_err(&priv->pdev->dev, 2050 "could not count MSI-x vectors: err=%d\n", num_ntfy); 2051 err = num_ntfy; 2052 goto err; 2053 } else if (num_ntfy < GVE_MIN_MSIX) { 2054 dev_err(&priv->pdev->dev, "gve needs at least %d MSI-x vectors, but only has %d\n", 2055 GVE_MIN_MSIX, num_ntfy); 2056 err = -EINVAL; 2057 goto err; 2058 } 2059 2060 /* Big TCP is only supported on DQ*/ 2061 if (!gve_is_gqi(priv)) 2062 netif_set_tso_max_size(priv->dev, GVE_DQO_TX_MAX); 2063 2064 priv->num_registered_pages = 0; 2065 priv->rx_copybreak = GVE_DEFAULT_RX_COPYBREAK; 2066 /* gvnic has one Notification Block per MSI-x vector, except for the 2067 * management vector 2068 */ 2069 priv->num_ntfy_blks = (num_ntfy - 1) & ~0x1; 2070 priv->mgmt_msix_idx = priv->num_ntfy_blks; 2071 2072 priv->tx_cfg.max_queues = 2073 min_t(int, priv->tx_cfg.max_queues, priv->num_ntfy_blks / 2); 2074 priv->rx_cfg.max_queues = 2075 min_t(int, priv->rx_cfg.max_queues, priv->num_ntfy_blks / 2); 2076 2077 priv->tx_cfg.num_queues = priv->tx_cfg.max_queues; 2078 priv->rx_cfg.num_queues = priv->rx_cfg.max_queues; 2079 if (priv->default_num_queues > 0) { 2080 priv->tx_cfg.num_queues = min_t(int, priv->default_num_queues, 2081 priv->tx_cfg.num_queues); 2082 priv->rx_cfg.num_queues = min_t(int, priv->default_num_queues, 2083 priv->rx_cfg.num_queues); 2084 } 2085 2086 dev_info(&priv->pdev->dev, "TX queues %d, RX queues %d\n", 2087 priv->tx_cfg.num_queues, priv->rx_cfg.num_queues); 2088 dev_info(&priv->pdev->dev, "Max TX queues %d, Max RX queues %d\n", 2089 priv->tx_cfg.max_queues, priv->rx_cfg.max_queues); 2090 2091 if (!gve_is_gqi(priv)) { 2092 priv->tx_coalesce_usecs = GVE_TX_IRQ_RATELIMIT_US_DQO; 2093 priv->rx_coalesce_usecs = GVE_RX_IRQ_RATELIMIT_US_DQO; 2094 } 2095 2096 setup_device: 2097 gve_set_netdev_xdp_features(priv); 2098 err = gve_setup_device_resources(priv); 2099 if (!err) 2100 return 0; 2101 err: 2102 gve_adminq_free(&priv->pdev->dev, priv); 2103 return err; 2104 } 2105 2106 static void gve_teardown_priv_resources(struct gve_priv *priv) 2107 { 2108 gve_teardown_device_resources(priv); 2109 gve_adminq_free(&priv->pdev->dev, priv); 2110 } 2111 2112 static void gve_trigger_reset(struct gve_priv *priv) 2113 { 2114 /* Reset the device by releasing the AQ */ 2115 gve_adminq_release(priv); 2116 } 2117 2118 static void gve_reset_and_teardown(struct gve_priv *priv, bool was_up) 2119 { 2120 gve_trigger_reset(priv); 2121 /* With the reset having already happened, close cannot fail */ 2122 if (was_up) 2123 gve_close(priv->dev); 2124 gve_teardown_priv_resources(priv); 2125 } 2126 2127 static int gve_reset_recovery(struct gve_priv *priv, bool was_up) 2128 { 2129 int err; 2130 2131 err = gve_init_priv(priv, true); 2132 if (err) 2133 goto err; 2134 if (was_up) { 2135 err = gve_open(priv->dev); 2136 if (err) 2137 goto err; 2138 } 2139 return 0; 2140 err: 2141 dev_err(&priv->pdev->dev, "Reset failed! !!! DISABLING ALL QUEUES !!!\n"); 2142 gve_turndown(priv); 2143 return err; 2144 } 2145 2146 int gve_reset(struct gve_priv *priv, bool attempt_teardown) 2147 { 2148 bool was_up = netif_carrier_ok(priv->dev); 2149 int err; 2150 2151 dev_info(&priv->pdev->dev, "Performing reset\n"); 2152 gve_clear_do_reset(priv); 2153 gve_set_reset_in_progress(priv); 2154 /* If we aren't attempting to teardown normally, just go turndown and 2155 * reset right away. 2156 */ 2157 if (!attempt_teardown) { 2158 gve_turndown(priv); 2159 gve_reset_and_teardown(priv, was_up); 2160 } else { 2161 /* Otherwise attempt to close normally */ 2162 if (was_up) { 2163 err = gve_close(priv->dev); 2164 /* If that fails reset as we did above */ 2165 if (err) 2166 gve_reset_and_teardown(priv, was_up); 2167 } 2168 /* Clean up any remaining resources */ 2169 gve_teardown_priv_resources(priv); 2170 } 2171 2172 /* Set it all back up */ 2173 err = gve_reset_recovery(priv, was_up); 2174 gve_clear_reset_in_progress(priv); 2175 priv->reset_cnt++; 2176 priv->interface_up_cnt = 0; 2177 priv->interface_down_cnt = 0; 2178 priv->stats_report_trigger_cnt = 0; 2179 return err; 2180 } 2181 2182 static void gve_write_version(u8 __iomem *driver_version_register) 2183 { 2184 const char *c = gve_version_prefix; 2185 2186 while (*c) { 2187 writeb(*c, driver_version_register); 2188 c++; 2189 } 2190 2191 c = gve_version_str; 2192 while (*c) { 2193 writeb(*c, driver_version_register); 2194 c++; 2195 } 2196 writeb('\n', driver_version_register); 2197 } 2198 2199 static int gve_probe(struct pci_dev *pdev, const struct pci_device_id *ent) 2200 { 2201 int max_tx_queues, max_rx_queues; 2202 struct net_device *dev; 2203 __be32 __iomem *db_bar; 2204 struct gve_registers __iomem *reg_bar; 2205 struct gve_priv *priv; 2206 int err; 2207 2208 err = pci_enable_device(pdev); 2209 if (err) 2210 return err; 2211 2212 err = pci_request_regions(pdev, gve_driver_name); 2213 if (err) 2214 goto abort_with_enabled; 2215 2216 pci_set_master(pdev); 2217 2218 err = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(64)); 2219 if (err) { 2220 dev_err(&pdev->dev, "Failed to set dma mask: err=%d\n", err); 2221 goto abort_with_pci_region; 2222 } 2223 2224 reg_bar = pci_iomap(pdev, GVE_REGISTER_BAR, 0); 2225 if (!reg_bar) { 2226 dev_err(&pdev->dev, "Failed to map pci bar!\n"); 2227 err = -ENOMEM; 2228 goto abort_with_pci_region; 2229 } 2230 2231 db_bar = pci_iomap(pdev, GVE_DOORBELL_BAR, 0); 2232 if (!db_bar) { 2233 dev_err(&pdev->dev, "Failed to map doorbell bar!\n"); 2234 err = -ENOMEM; 2235 goto abort_with_reg_bar; 2236 } 2237 2238 gve_write_version(®_bar->driver_version); 2239 /* Get max queues to alloc etherdev */ 2240 max_tx_queues = ioread32be(®_bar->max_tx_queues); 2241 max_rx_queues = ioread32be(®_bar->max_rx_queues); 2242 /* Alloc and setup the netdev and priv */ 2243 dev = alloc_etherdev_mqs(sizeof(*priv), max_tx_queues, max_rx_queues); 2244 if (!dev) { 2245 dev_err(&pdev->dev, "could not allocate netdev\n"); 2246 err = -ENOMEM; 2247 goto abort_with_db_bar; 2248 } 2249 SET_NETDEV_DEV(dev, &pdev->dev); 2250 pci_set_drvdata(pdev, dev); 2251 dev->ethtool_ops = &gve_ethtool_ops; 2252 dev->netdev_ops = &gve_netdev_ops; 2253 2254 /* Set default and supported features. 2255 * 2256 * Features might be set in other locations as well (such as 2257 * `gve_adminq_describe_device`). 2258 */ 2259 dev->hw_features = NETIF_F_HIGHDMA; 2260 dev->hw_features |= NETIF_F_SG; 2261 dev->hw_features |= NETIF_F_HW_CSUM; 2262 dev->hw_features |= NETIF_F_TSO; 2263 dev->hw_features |= NETIF_F_TSO6; 2264 dev->hw_features |= NETIF_F_TSO_ECN; 2265 dev->hw_features |= NETIF_F_RXCSUM; 2266 dev->hw_features |= NETIF_F_RXHASH; 2267 dev->features = dev->hw_features; 2268 dev->watchdog_timeo = 5 * HZ; 2269 dev->min_mtu = ETH_MIN_MTU; 2270 netif_carrier_off(dev); 2271 2272 priv = netdev_priv(dev); 2273 priv->dev = dev; 2274 priv->pdev = pdev; 2275 priv->msg_enable = DEFAULT_MSG_LEVEL; 2276 priv->reg_bar0 = reg_bar; 2277 priv->db_bar2 = db_bar; 2278 priv->service_task_flags = 0x0; 2279 priv->state_flags = 0x0; 2280 priv->ethtool_flags = 0x0; 2281 2282 gve_set_probe_in_progress(priv); 2283 priv->gve_wq = alloc_ordered_workqueue("gve", 0); 2284 if (!priv->gve_wq) { 2285 dev_err(&pdev->dev, "Could not allocate workqueue"); 2286 err = -ENOMEM; 2287 goto abort_with_netdev; 2288 } 2289 INIT_WORK(&priv->service_task, gve_service_task); 2290 INIT_WORK(&priv->stats_report_task, gve_stats_report_task); 2291 priv->tx_cfg.max_queues = max_tx_queues; 2292 priv->rx_cfg.max_queues = max_rx_queues; 2293 2294 err = gve_init_priv(priv, false); 2295 if (err) 2296 goto abort_with_wq; 2297 2298 err = register_netdev(dev); 2299 if (err) 2300 goto abort_with_gve_init; 2301 2302 dev_info(&pdev->dev, "GVE version %s\n", gve_version_str); 2303 dev_info(&pdev->dev, "GVE queue format %d\n", (int)priv->queue_format); 2304 gve_clear_probe_in_progress(priv); 2305 queue_work(priv->gve_wq, &priv->service_task); 2306 return 0; 2307 2308 abort_with_gve_init: 2309 gve_teardown_priv_resources(priv); 2310 2311 abort_with_wq: 2312 destroy_workqueue(priv->gve_wq); 2313 2314 abort_with_netdev: 2315 free_netdev(dev); 2316 2317 abort_with_db_bar: 2318 pci_iounmap(pdev, db_bar); 2319 2320 abort_with_reg_bar: 2321 pci_iounmap(pdev, reg_bar); 2322 2323 abort_with_pci_region: 2324 pci_release_regions(pdev); 2325 2326 abort_with_enabled: 2327 pci_disable_device(pdev); 2328 return err; 2329 } 2330 2331 static void gve_remove(struct pci_dev *pdev) 2332 { 2333 struct net_device *netdev = pci_get_drvdata(pdev); 2334 struct gve_priv *priv = netdev_priv(netdev); 2335 __be32 __iomem *db_bar = priv->db_bar2; 2336 void __iomem *reg_bar = priv->reg_bar0; 2337 2338 unregister_netdev(netdev); 2339 gve_teardown_priv_resources(priv); 2340 destroy_workqueue(priv->gve_wq); 2341 free_netdev(netdev); 2342 pci_iounmap(pdev, db_bar); 2343 pci_iounmap(pdev, reg_bar); 2344 pci_release_regions(pdev); 2345 pci_disable_device(pdev); 2346 } 2347 2348 static void gve_shutdown(struct pci_dev *pdev) 2349 { 2350 struct net_device *netdev = pci_get_drvdata(pdev); 2351 struct gve_priv *priv = netdev_priv(netdev); 2352 bool was_up = netif_carrier_ok(priv->dev); 2353 2354 rtnl_lock(); 2355 if (was_up && gve_close(priv->dev)) { 2356 /* If the dev was up, attempt to close, if close fails, reset */ 2357 gve_reset_and_teardown(priv, was_up); 2358 } else { 2359 /* If the dev wasn't up or close worked, finish tearing down */ 2360 gve_teardown_priv_resources(priv); 2361 } 2362 rtnl_unlock(); 2363 } 2364 2365 #ifdef CONFIG_PM 2366 static int gve_suspend(struct pci_dev *pdev, pm_message_t state) 2367 { 2368 struct net_device *netdev = pci_get_drvdata(pdev); 2369 struct gve_priv *priv = netdev_priv(netdev); 2370 bool was_up = netif_carrier_ok(priv->dev); 2371 2372 priv->suspend_cnt++; 2373 rtnl_lock(); 2374 if (was_up && gve_close(priv->dev)) { 2375 /* If the dev was up, attempt to close, if close fails, reset */ 2376 gve_reset_and_teardown(priv, was_up); 2377 } else { 2378 /* If the dev wasn't up or close worked, finish tearing down */ 2379 gve_teardown_priv_resources(priv); 2380 } 2381 priv->up_before_suspend = was_up; 2382 rtnl_unlock(); 2383 return 0; 2384 } 2385 2386 static int gve_resume(struct pci_dev *pdev) 2387 { 2388 struct net_device *netdev = pci_get_drvdata(pdev); 2389 struct gve_priv *priv = netdev_priv(netdev); 2390 int err; 2391 2392 priv->resume_cnt++; 2393 rtnl_lock(); 2394 err = gve_reset_recovery(priv, priv->up_before_suspend); 2395 rtnl_unlock(); 2396 return err; 2397 } 2398 #endif /* CONFIG_PM */ 2399 2400 static const struct pci_device_id gve_id_table[] = { 2401 { PCI_DEVICE(PCI_VENDOR_ID_GOOGLE, PCI_DEV_ID_GVNIC) }, 2402 { } 2403 }; 2404 2405 static struct pci_driver gve_driver = { 2406 .name = gve_driver_name, 2407 .id_table = gve_id_table, 2408 .probe = gve_probe, 2409 .remove = gve_remove, 2410 .shutdown = gve_shutdown, 2411 #ifdef CONFIG_PM 2412 .suspend = gve_suspend, 2413 .resume = gve_resume, 2414 #endif 2415 }; 2416 2417 module_pci_driver(gve_driver); 2418 2419 MODULE_DEVICE_TABLE(pci, gve_id_table); 2420 MODULE_AUTHOR("Google, Inc."); 2421 MODULE_DESCRIPTION("Google Virtual NIC Driver"); 2422 MODULE_LICENSE("Dual MIT/GPL"); 2423 MODULE_VERSION(GVE_VERSION); 2424