1 // SPDX-License-Identifier: (GPL-2.0 OR MIT) 2 /* Google virtual Ethernet (gve) driver 3 * 4 * Copyright (C) 2015-2019 Google, Inc. 5 */ 6 7 #include <linux/cpumask.h> 8 #include <linux/etherdevice.h> 9 #include <linux/interrupt.h> 10 #include <linux/module.h> 11 #include <linux/pci.h> 12 #include <linux/sched.h> 13 #include <linux/timer.h> 14 #include <linux/workqueue.h> 15 #include <net/sch_generic.h> 16 #include "gve.h" 17 #include "gve_adminq.h" 18 #include "gve_register.h" 19 20 #define GVE_DEFAULT_RX_COPYBREAK (256) 21 22 #define DEFAULT_MSG_LEVEL (NETIF_MSG_DRV | NETIF_MSG_LINK) 23 #define GVE_VERSION "1.0.0" 24 #define GVE_VERSION_PREFIX "GVE-" 25 26 const char gve_version_str[] = GVE_VERSION; 27 static const char gve_version_prefix[] = GVE_VERSION_PREFIX; 28 29 static void gve_get_stats(struct net_device *dev, struct rtnl_link_stats64 *s) 30 { 31 struct gve_priv *priv = netdev_priv(dev); 32 unsigned int start; 33 int ring; 34 35 if (priv->rx) { 36 for (ring = 0; ring < priv->rx_cfg.num_queues; ring++) { 37 do { 38 start = 39 u64_stats_fetch_begin(&priv->rx[ring].statss); 40 s->rx_packets += priv->rx[ring].rpackets; 41 s->rx_bytes += priv->rx[ring].rbytes; 42 } while (u64_stats_fetch_retry(&priv->rx[ring].statss, 43 start)); 44 } 45 } 46 if (priv->tx) { 47 for (ring = 0; ring < priv->tx_cfg.num_queues; ring++) { 48 do { 49 start = 50 u64_stats_fetch_begin(&priv->tx[ring].statss); 51 s->tx_packets += priv->tx[ring].pkt_done; 52 s->tx_bytes += priv->tx[ring].bytes_done; 53 } while (u64_stats_fetch_retry(&priv->tx[ring].statss, 54 start)); 55 } 56 } 57 } 58 59 static int gve_alloc_counter_array(struct gve_priv *priv) 60 { 61 priv->counter_array = 62 dma_alloc_coherent(&priv->pdev->dev, 63 priv->num_event_counters * 64 sizeof(*priv->counter_array), 65 &priv->counter_array_bus, GFP_KERNEL); 66 if (!priv->counter_array) 67 return -ENOMEM; 68 69 return 0; 70 } 71 72 static void gve_free_counter_array(struct gve_priv *priv) 73 { 74 dma_free_coherent(&priv->pdev->dev, 75 priv->num_event_counters * 76 sizeof(*priv->counter_array), 77 priv->counter_array, priv->counter_array_bus); 78 priv->counter_array = NULL; 79 } 80 81 static irqreturn_t gve_mgmnt_intr(int irq, void *arg) 82 { 83 struct gve_priv *priv = arg; 84 85 queue_work(priv->gve_wq, &priv->service_task); 86 return IRQ_HANDLED; 87 } 88 89 static irqreturn_t gve_intr(int irq, void *arg) 90 { 91 struct gve_notify_block *block = arg; 92 struct gve_priv *priv = block->priv; 93 94 iowrite32be(GVE_IRQ_MASK, gve_irq_doorbell(priv, block)); 95 napi_schedule_irqoff(&block->napi); 96 return IRQ_HANDLED; 97 } 98 99 static int gve_napi_poll(struct napi_struct *napi, int budget) 100 { 101 struct gve_notify_block *block; 102 __be32 __iomem *irq_doorbell; 103 bool reschedule = false; 104 struct gve_priv *priv; 105 106 block = container_of(napi, struct gve_notify_block, napi); 107 priv = block->priv; 108 109 if (block->tx) 110 reschedule |= gve_tx_poll(block, budget); 111 if (block->rx) 112 reschedule |= gve_rx_poll(block, budget); 113 114 if (reschedule) 115 return budget; 116 117 napi_complete(napi); 118 irq_doorbell = gve_irq_doorbell(priv, block); 119 iowrite32be(GVE_IRQ_ACK | GVE_IRQ_EVENT, irq_doorbell); 120 121 /* Double check we have no extra work. 122 * Ensure unmask synchronizes with checking for work. 123 */ 124 dma_rmb(); 125 if (block->tx) 126 reschedule |= gve_tx_poll(block, -1); 127 if (block->rx) 128 reschedule |= gve_rx_poll(block, -1); 129 if (reschedule && napi_reschedule(napi)) 130 iowrite32be(GVE_IRQ_MASK, irq_doorbell); 131 132 return 0; 133 } 134 135 static int gve_alloc_notify_blocks(struct gve_priv *priv) 136 { 137 int num_vecs_requested = priv->num_ntfy_blks + 1; 138 char *name = priv->dev->name; 139 unsigned int active_cpus; 140 int vecs_enabled; 141 int i, j; 142 int err; 143 144 priv->msix_vectors = kvzalloc(num_vecs_requested * 145 sizeof(*priv->msix_vectors), GFP_KERNEL); 146 if (!priv->msix_vectors) 147 return -ENOMEM; 148 for (i = 0; i < num_vecs_requested; i++) 149 priv->msix_vectors[i].entry = i; 150 vecs_enabled = pci_enable_msix_range(priv->pdev, priv->msix_vectors, 151 GVE_MIN_MSIX, num_vecs_requested); 152 if (vecs_enabled < 0) { 153 dev_err(&priv->pdev->dev, "Could not enable min msix %d/%d\n", 154 GVE_MIN_MSIX, vecs_enabled); 155 err = vecs_enabled; 156 goto abort_with_msix_vectors; 157 } 158 if (vecs_enabled != num_vecs_requested) { 159 int new_num_ntfy_blks = (vecs_enabled - 1) & ~0x1; 160 int vecs_per_type = new_num_ntfy_blks / 2; 161 int vecs_left = new_num_ntfy_blks % 2; 162 163 priv->num_ntfy_blks = new_num_ntfy_blks; 164 priv->tx_cfg.max_queues = min_t(int, priv->tx_cfg.max_queues, 165 vecs_per_type); 166 priv->rx_cfg.max_queues = min_t(int, priv->rx_cfg.max_queues, 167 vecs_per_type + vecs_left); 168 dev_err(&priv->pdev->dev, 169 "Could not enable desired msix, only enabled %d, adjusting tx max queues to %d, and rx max queues to %d\n", 170 vecs_enabled, priv->tx_cfg.max_queues, 171 priv->rx_cfg.max_queues); 172 if (priv->tx_cfg.num_queues > priv->tx_cfg.max_queues) 173 priv->tx_cfg.num_queues = priv->tx_cfg.max_queues; 174 if (priv->rx_cfg.num_queues > priv->rx_cfg.max_queues) 175 priv->rx_cfg.num_queues = priv->rx_cfg.max_queues; 176 } 177 /* Half the notification blocks go to TX and half to RX */ 178 active_cpus = min_t(int, priv->num_ntfy_blks / 2, num_online_cpus()); 179 180 /* Setup Management Vector - the last vector */ 181 snprintf(priv->mgmt_msix_name, sizeof(priv->mgmt_msix_name), "%s-mgmnt", 182 name); 183 err = request_irq(priv->msix_vectors[priv->mgmt_msix_idx].vector, 184 gve_mgmnt_intr, 0, priv->mgmt_msix_name, priv); 185 if (err) { 186 dev_err(&priv->pdev->dev, "Did not receive management vector.\n"); 187 goto abort_with_msix_enabled; 188 } 189 priv->ntfy_blocks = 190 dma_alloc_coherent(&priv->pdev->dev, 191 priv->num_ntfy_blks * 192 sizeof(*priv->ntfy_blocks), 193 &priv->ntfy_block_bus, GFP_KERNEL); 194 if (!priv->ntfy_blocks) { 195 err = -ENOMEM; 196 goto abort_with_mgmt_vector; 197 } 198 /* Setup the other blocks - the first n-1 vectors */ 199 for (i = 0; i < priv->num_ntfy_blks; i++) { 200 struct gve_notify_block *block = &priv->ntfy_blocks[i]; 201 int msix_idx = i; 202 203 snprintf(block->name, sizeof(block->name), "%s-ntfy-block.%d", 204 name, i); 205 block->priv = priv; 206 err = request_irq(priv->msix_vectors[msix_idx].vector, 207 gve_intr, 0, block->name, block); 208 if (err) { 209 dev_err(&priv->pdev->dev, 210 "Failed to receive msix vector %d\n", i); 211 goto abort_with_some_ntfy_blocks; 212 } 213 irq_set_affinity_hint(priv->msix_vectors[msix_idx].vector, 214 get_cpu_mask(i % active_cpus)); 215 } 216 return 0; 217 abort_with_some_ntfy_blocks: 218 for (j = 0; j < i; j++) { 219 struct gve_notify_block *block = &priv->ntfy_blocks[j]; 220 int msix_idx = j; 221 222 irq_set_affinity_hint(priv->msix_vectors[msix_idx].vector, 223 NULL); 224 free_irq(priv->msix_vectors[msix_idx].vector, block); 225 } 226 dma_free_coherent(&priv->pdev->dev, priv->num_ntfy_blks * 227 sizeof(*priv->ntfy_blocks), 228 priv->ntfy_blocks, priv->ntfy_block_bus); 229 priv->ntfy_blocks = NULL; 230 abort_with_mgmt_vector: 231 free_irq(priv->msix_vectors[priv->mgmt_msix_idx].vector, priv); 232 abort_with_msix_enabled: 233 pci_disable_msix(priv->pdev); 234 abort_with_msix_vectors: 235 kvfree(priv->msix_vectors); 236 priv->msix_vectors = NULL; 237 return err; 238 } 239 240 static void gve_free_notify_blocks(struct gve_priv *priv) 241 { 242 int i; 243 244 /* Free the irqs */ 245 for (i = 0; i < priv->num_ntfy_blks; i++) { 246 struct gve_notify_block *block = &priv->ntfy_blocks[i]; 247 int msix_idx = i; 248 249 irq_set_affinity_hint(priv->msix_vectors[msix_idx].vector, 250 NULL); 251 free_irq(priv->msix_vectors[msix_idx].vector, block); 252 } 253 dma_free_coherent(&priv->pdev->dev, 254 priv->num_ntfy_blks * sizeof(*priv->ntfy_blocks), 255 priv->ntfy_blocks, priv->ntfy_block_bus); 256 priv->ntfy_blocks = NULL; 257 free_irq(priv->msix_vectors[priv->mgmt_msix_idx].vector, priv); 258 pci_disable_msix(priv->pdev); 259 kvfree(priv->msix_vectors); 260 priv->msix_vectors = NULL; 261 } 262 263 static int gve_setup_device_resources(struct gve_priv *priv) 264 { 265 int err; 266 267 err = gve_alloc_counter_array(priv); 268 if (err) 269 return err; 270 err = gve_alloc_notify_blocks(priv); 271 if (err) 272 goto abort_with_counter; 273 err = gve_adminq_configure_device_resources(priv, 274 priv->counter_array_bus, 275 priv->num_event_counters, 276 priv->ntfy_block_bus, 277 priv->num_ntfy_blks); 278 if (unlikely(err)) { 279 dev_err(&priv->pdev->dev, 280 "could not setup device_resources: err=%d\n", err); 281 err = -ENXIO; 282 goto abort_with_ntfy_blocks; 283 } 284 gve_set_device_resources_ok(priv); 285 return 0; 286 abort_with_ntfy_blocks: 287 gve_free_notify_blocks(priv); 288 abort_with_counter: 289 gve_free_counter_array(priv); 290 return err; 291 } 292 293 static void gve_trigger_reset(struct gve_priv *priv); 294 295 static void gve_teardown_device_resources(struct gve_priv *priv) 296 { 297 int err; 298 299 /* Tell device its resources are being freed */ 300 if (gve_get_device_resources_ok(priv)) { 301 err = gve_adminq_deconfigure_device_resources(priv); 302 if (err) { 303 dev_err(&priv->pdev->dev, 304 "Could not deconfigure device resources: err=%d\n", 305 err); 306 gve_trigger_reset(priv); 307 } 308 } 309 gve_free_counter_array(priv); 310 gve_free_notify_blocks(priv); 311 gve_clear_device_resources_ok(priv); 312 } 313 314 static void gve_add_napi(struct gve_priv *priv, int ntfy_idx) 315 { 316 struct gve_notify_block *block = &priv->ntfy_blocks[ntfy_idx]; 317 318 netif_napi_add(priv->dev, &block->napi, gve_napi_poll, 319 NAPI_POLL_WEIGHT); 320 } 321 322 static void gve_remove_napi(struct gve_priv *priv, int ntfy_idx) 323 { 324 struct gve_notify_block *block = &priv->ntfy_blocks[ntfy_idx]; 325 326 netif_napi_del(&block->napi); 327 } 328 329 static int gve_register_qpls(struct gve_priv *priv) 330 { 331 int num_qpls = gve_num_tx_qpls(priv) + gve_num_rx_qpls(priv); 332 int err; 333 int i; 334 335 for (i = 0; i < num_qpls; i++) { 336 err = gve_adminq_register_page_list(priv, &priv->qpls[i]); 337 if (err) { 338 netif_err(priv, drv, priv->dev, 339 "failed to register queue page list %d\n", 340 priv->qpls[i].id); 341 /* This failure will trigger a reset - no need to clean 342 * up 343 */ 344 return err; 345 } 346 } 347 return 0; 348 } 349 350 static int gve_unregister_qpls(struct gve_priv *priv) 351 { 352 int num_qpls = gve_num_tx_qpls(priv) + gve_num_rx_qpls(priv); 353 int err; 354 int i; 355 356 for (i = 0; i < num_qpls; i++) { 357 err = gve_adminq_unregister_page_list(priv, priv->qpls[i].id); 358 /* This failure will trigger a reset - no need to clean up */ 359 if (err) { 360 netif_err(priv, drv, priv->dev, 361 "Failed to unregister queue page list %d\n", 362 priv->qpls[i].id); 363 return err; 364 } 365 } 366 return 0; 367 } 368 369 static int gve_create_rings(struct gve_priv *priv) 370 { 371 int err; 372 int i; 373 374 for (i = 0; i < priv->tx_cfg.num_queues; i++) { 375 err = gve_adminq_create_tx_queue(priv, i); 376 if (err) { 377 netif_err(priv, drv, priv->dev, "failed to create tx queue %d\n", 378 i); 379 /* This failure will trigger a reset - no need to clean 380 * up 381 */ 382 return err; 383 } 384 netif_dbg(priv, drv, priv->dev, "created tx queue %d\n", i); 385 } 386 for (i = 0; i < priv->rx_cfg.num_queues; i++) { 387 err = gve_adminq_create_rx_queue(priv, i); 388 if (err) { 389 netif_err(priv, drv, priv->dev, "failed to create rx queue %d\n", 390 i); 391 /* This failure will trigger a reset - no need to clean 392 * up 393 */ 394 return err; 395 } 396 /* Rx data ring has been prefilled with packet buffers at 397 * queue allocation time. 398 * Write the doorbell to provide descriptor slots and packet 399 * buffers to the NIC. 400 */ 401 gve_rx_write_doorbell(priv, &priv->rx[i]); 402 netif_dbg(priv, drv, priv->dev, "created rx queue %d\n", i); 403 } 404 405 return 0; 406 } 407 408 static int gve_alloc_rings(struct gve_priv *priv) 409 { 410 int ntfy_idx; 411 int err; 412 int i; 413 414 /* Setup tx rings */ 415 priv->tx = kvzalloc(priv->tx_cfg.num_queues * sizeof(*priv->tx), 416 GFP_KERNEL); 417 if (!priv->tx) 418 return -ENOMEM; 419 err = gve_tx_alloc_rings(priv); 420 if (err) 421 goto free_tx; 422 /* Setup rx rings */ 423 priv->rx = kvzalloc(priv->rx_cfg.num_queues * sizeof(*priv->rx), 424 GFP_KERNEL); 425 if (!priv->rx) { 426 err = -ENOMEM; 427 goto free_tx_queue; 428 } 429 err = gve_rx_alloc_rings(priv); 430 if (err) 431 goto free_rx; 432 /* Add tx napi & init sync stats*/ 433 for (i = 0; i < priv->tx_cfg.num_queues; i++) { 434 u64_stats_init(&priv->tx[i].statss); 435 ntfy_idx = gve_tx_idx_to_ntfy(priv, i); 436 gve_add_napi(priv, ntfy_idx); 437 } 438 /* Add rx napi & init sync stats*/ 439 for (i = 0; i < priv->rx_cfg.num_queues; i++) { 440 u64_stats_init(&priv->rx[i].statss); 441 ntfy_idx = gve_rx_idx_to_ntfy(priv, i); 442 gve_add_napi(priv, ntfy_idx); 443 } 444 445 return 0; 446 447 free_rx: 448 kvfree(priv->rx); 449 priv->rx = NULL; 450 free_tx_queue: 451 gve_tx_free_rings(priv); 452 free_tx: 453 kvfree(priv->tx); 454 priv->tx = NULL; 455 return err; 456 } 457 458 static int gve_destroy_rings(struct gve_priv *priv) 459 { 460 int err; 461 int i; 462 463 for (i = 0; i < priv->tx_cfg.num_queues; i++) { 464 err = gve_adminq_destroy_tx_queue(priv, i); 465 if (err) { 466 netif_err(priv, drv, priv->dev, 467 "failed to destroy tx queue %d\n", 468 i); 469 /* This failure will trigger a reset - no need to clean 470 * up 471 */ 472 return err; 473 } 474 netif_dbg(priv, drv, priv->dev, "destroyed tx queue %d\n", i); 475 } 476 for (i = 0; i < priv->rx_cfg.num_queues; i++) { 477 err = gve_adminq_destroy_rx_queue(priv, i); 478 if (err) { 479 netif_err(priv, drv, priv->dev, 480 "failed to destroy rx queue %d\n", 481 i); 482 /* This failure will trigger a reset - no need to clean 483 * up 484 */ 485 return err; 486 } 487 netif_dbg(priv, drv, priv->dev, "destroyed rx queue %d\n", i); 488 } 489 return 0; 490 } 491 492 static void gve_free_rings(struct gve_priv *priv) 493 { 494 int ntfy_idx; 495 int i; 496 497 if (priv->tx) { 498 for (i = 0; i < priv->tx_cfg.num_queues; i++) { 499 ntfy_idx = gve_tx_idx_to_ntfy(priv, i); 500 gve_remove_napi(priv, ntfy_idx); 501 } 502 gve_tx_free_rings(priv); 503 kvfree(priv->tx); 504 priv->tx = NULL; 505 } 506 if (priv->rx) { 507 for (i = 0; i < priv->rx_cfg.num_queues; i++) { 508 ntfy_idx = gve_rx_idx_to_ntfy(priv, i); 509 gve_remove_napi(priv, ntfy_idx); 510 } 511 gve_rx_free_rings(priv); 512 kvfree(priv->rx); 513 priv->rx = NULL; 514 } 515 } 516 517 int gve_alloc_page(struct device *dev, struct page **page, dma_addr_t *dma, 518 enum dma_data_direction dir) 519 { 520 *page = alloc_page(GFP_KERNEL); 521 if (!*page) 522 return -ENOMEM; 523 *dma = dma_map_page(dev, *page, 0, PAGE_SIZE, dir); 524 if (dma_mapping_error(dev, *dma)) { 525 put_page(*page); 526 return -ENOMEM; 527 } 528 return 0; 529 } 530 531 static int gve_alloc_queue_page_list(struct gve_priv *priv, u32 id, 532 int pages) 533 { 534 struct gve_queue_page_list *qpl = &priv->qpls[id]; 535 int err; 536 int i; 537 538 if (pages + priv->num_registered_pages > priv->max_registered_pages) { 539 netif_err(priv, drv, priv->dev, 540 "Reached max number of registered pages %llu > %llu\n", 541 pages + priv->num_registered_pages, 542 priv->max_registered_pages); 543 return -EINVAL; 544 } 545 546 qpl->id = id; 547 qpl->num_entries = 0; 548 qpl->pages = kvzalloc(pages * sizeof(*qpl->pages), GFP_KERNEL); 549 /* caller handles clean up */ 550 if (!qpl->pages) 551 return -ENOMEM; 552 qpl->page_buses = kvzalloc(pages * sizeof(*qpl->page_buses), 553 GFP_KERNEL); 554 /* caller handles clean up */ 555 if (!qpl->page_buses) 556 return -ENOMEM; 557 558 for (i = 0; i < pages; i++) { 559 err = gve_alloc_page(&priv->pdev->dev, &qpl->pages[i], 560 &qpl->page_buses[i], 561 gve_qpl_dma_dir(priv, id)); 562 /* caller handles clean up */ 563 if (err) 564 return -ENOMEM; 565 qpl->num_entries++; 566 } 567 priv->num_registered_pages += pages; 568 569 return 0; 570 } 571 572 void gve_free_page(struct device *dev, struct page *page, dma_addr_t dma, 573 enum dma_data_direction dir) 574 { 575 if (!dma_mapping_error(dev, dma)) 576 dma_unmap_page(dev, dma, PAGE_SIZE, dir); 577 if (page) 578 put_page(page); 579 } 580 581 static void gve_free_queue_page_list(struct gve_priv *priv, 582 int id) 583 { 584 struct gve_queue_page_list *qpl = &priv->qpls[id]; 585 int i; 586 587 if (!qpl->pages) 588 return; 589 if (!qpl->page_buses) 590 goto free_pages; 591 592 for (i = 0; i < qpl->num_entries; i++) 593 gve_free_page(&priv->pdev->dev, qpl->pages[i], 594 qpl->page_buses[i], gve_qpl_dma_dir(priv, id)); 595 596 kvfree(qpl->page_buses); 597 free_pages: 598 kvfree(qpl->pages); 599 priv->num_registered_pages -= qpl->num_entries; 600 } 601 602 static int gve_alloc_qpls(struct gve_priv *priv) 603 { 604 int num_qpls = gve_num_tx_qpls(priv) + gve_num_rx_qpls(priv); 605 int i, j; 606 int err; 607 608 priv->qpls = kvzalloc(num_qpls * sizeof(*priv->qpls), GFP_KERNEL); 609 if (!priv->qpls) 610 return -ENOMEM; 611 612 for (i = 0; i < gve_num_tx_qpls(priv); i++) { 613 err = gve_alloc_queue_page_list(priv, i, 614 priv->tx_pages_per_qpl); 615 if (err) 616 goto free_qpls; 617 } 618 for (; i < num_qpls; i++) { 619 err = gve_alloc_queue_page_list(priv, i, 620 priv->rx_pages_per_qpl); 621 if (err) 622 goto free_qpls; 623 } 624 625 priv->qpl_cfg.qpl_map_size = BITS_TO_LONGS(num_qpls) * 626 sizeof(unsigned long) * BITS_PER_BYTE; 627 priv->qpl_cfg.qpl_id_map = kvzalloc(BITS_TO_LONGS(num_qpls) * 628 sizeof(unsigned long), GFP_KERNEL); 629 if (!priv->qpl_cfg.qpl_id_map) { 630 err = -ENOMEM; 631 goto free_qpls; 632 } 633 634 return 0; 635 636 free_qpls: 637 for (j = 0; j <= i; j++) 638 gve_free_queue_page_list(priv, j); 639 kvfree(priv->qpls); 640 return err; 641 } 642 643 static void gve_free_qpls(struct gve_priv *priv) 644 { 645 int num_qpls = gve_num_tx_qpls(priv) + gve_num_rx_qpls(priv); 646 int i; 647 648 kvfree(priv->qpl_cfg.qpl_id_map); 649 650 for (i = 0; i < num_qpls; i++) 651 gve_free_queue_page_list(priv, i); 652 653 kvfree(priv->qpls); 654 } 655 656 /* Use this to schedule a reset when the device is capable of continuing 657 * to handle other requests in its current state. If it is not, do a reset 658 * in thread instead. 659 */ 660 void gve_schedule_reset(struct gve_priv *priv) 661 { 662 gve_set_do_reset(priv); 663 queue_work(priv->gve_wq, &priv->service_task); 664 } 665 666 static void gve_reset_and_teardown(struct gve_priv *priv, bool was_up); 667 static int gve_reset_recovery(struct gve_priv *priv, bool was_up); 668 static void gve_turndown(struct gve_priv *priv); 669 static void gve_turnup(struct gve_priv *priv); 670 671 static int gve_open(struct net_device *dev) 672 { 673 struct gve_priv *priv = netdev_priv(dev); 674 int err; 675 676 err = gve_alloc_qpls(priv); 677 if (err) 678 return err; 679 err = gve_alloc_rings(priv); 680 if (err) 681 goto free_qpls; 682 683 err = netif_set_real_num_tx_queues(dev, priv->tx_cfg.num_queues); 684 if (err) 685 goto free_rings; 686 err = netif_set_real_num_rx_queues(dev, priv->rx_cfg.num_queues); 687 if (err) 688 goto free_rings; 689 690 err = gve_register_qpls(priv); 691 if (err) 692 goto reset; 693 err = gve_create_rings(priv); 694 if (err) 695 goto reset; 696 gve_set_device_rings_ok(priv); 697 698 gve_turnup(priv); 699 netif_carrier_on(dev); 700 return 0; 701 702 free_rings: 703 gve_free_rings(priv); 704 free_qpls: 705 gve_free_qpls(priv); 706 return err; 707 708 reset: 709 /* This must have been called from a reset due to the rtnl lock 710 * so just return at this point. 711 */ 712 if (gve_get_reset_in_progress(priv)) 713 return err; 714 /* Otherwise reset before returning */ 715 gve_reset_and_teardown(priv, true); 716 /* if this fails there is nothing we can do so just ignore the return */ 717 gve_reset_recovery(priv, false); 718 /* return the original error */ 719 return err; 720 } 721 722 static int gve_close(struct net_device *dev) 723 { 724 struct gve_priv *priv = netdev_priv(dev); 725 int err; 726 727 netif_carrier_off(dev); 728 if (gve_get_device_rings_ok(priv)) { 729 gve_turndown(priv); 730 err = gve_destroy_rings(priv); 731 if (err) 732 goto err; 733 err = gve_unregister_qpls(priv); 734 if (err) 735 goto err; 736 gve_clear_device_rings_ok(priv); 737 } 738 739 gve_free_rings(priv); 740 gve_free_qpls(priv); 741 return 0; 742 743 err: 744 /* This must have been called from a reset due to the rtnl lock 745 * so just return at this point. 746 */ 747 if (gve_get_reset_in_progress(priv)) 748 return err; 749 /* Otherwise reset before returning */ 750 gve_reset_and_teardown(priv, true); 751 return gve_reset_recovery(priv, false); 752 } 753 754 int gve_adjust_queues(struct gve_priv *priv, 755 struct gve_queue_config new_rx_config, 756 struct gve_queue_config new_tx_config) 757 { 758 int err; 759 760 if (netif_carrier_ok(priv->dev)) { 761 /* To make this process as simple as possible we teardown the 762 * device, set the new configuration, and then bring the device 763 * up again. 764 */ 765 err = gve_close(priv->dev); 766 /* we have already tried to reset in close, 767 * just fail at this point 768 */ 769 if (err) 770 return err; 771 priv->tx_cfg = new_tx_config; 772 priv->rx_cfg = new_rx_config; 773 774 err = gve_open(priv->dev); 775 if (err) 776 goto err; 777 778 return 0; 779 } 780 /* Set the config for the next up. */ 781 priv->tx_cfg = new_tx_config; 782 priv->rx_cfg = new_rx_config; 783 784 return 0; 785 err: 786 netif_err(priv, drv, priv->dev, 787 "Adjust queues failed! !!! DISABLING ALL QUEUES !!!\n"); 788 gve_turndown(priv); 789 return err; 790 } 791 792 static void gve_turndown(struct gve_priv *priv) 793 { 794 int idx; 795 796 if (netif_carrier_ok(priv->dev)) 797 netif_carrier_off(priv->dev); 798 799 if (!gve_get_napi_enabled(priv)) 800 return; 801 802 /* Disable napi to prevent more work from coming in */ 803 for (idx = 0; idx < priv->tx_cfg.num_queues; idx++) { 804 int ntfy_idx = gve_tx_idx_to_ntfy(priv, idx); 805 struct gve_notify_block *block = &priv->ntfy_blocks[ntfy_idx]; 806 807 napi_disable(&block->napi); 808 } 809 for (idx = 0; idx < priv->rx_cfg.num_queues; idx++) { 810 int ntfy_idx = gve_rx_idx_to_ntfy(priv, idx); 811 struct gve_notify_block *block = &priv->ntfy_blocks[ntfy_idx]; 812 813 napi_disable(&block->napi); 814 } 815 816 /* Stop tx queues */ 817 netif_tx_disable(priv->dev); 818 819 gve_clear_napi_enabled(priv); 820 } 821 822 static void gve_turnup(struct gve_priv *priv) 823 { 824 int idx; 825 826 /* Start the tx queues */ 827 netif_tx_start_all_queues(priv->dev); 828 829 /* Enable napi and unmask interrupts for all queues */ 830 for (idx = 0; idx < priv->tx_cfg.num_queues; idx++) { 831 int ntfy_idx = gve_tx_idx_to_ntfy(priv, idx); 832 struct gve_notify_block *block = &priv->ntfy_blocks[ntfy_idx]; 833 834 napi_enable(&block->napi); 835 iowrite32be(0, gve_irq_doorbell(priv, block)); 836 } 837 for (idx = 0; idx < priv->rx_cfg.num_queues; idx++) { 838 int ntfy_idx = gve_rx_idx_to_ntfy(priv, idx); 839 struct gve_notify_block *block = &priv->ntfy_blocks[ntfy_idx]; 840 841 napi_enable(&block->napi); 842 iowrite32be(0, gve_irq_doorbell(priv, block)); 843 } 844 845 gve_set_napi_enabled(priv); 846 } 847 848 static void gve_tx_timeout(struct net_device *dev) 849 { 850 struct gve_priv *priv = netdev_priv(dev); 851 852 gve_schedule_reset(priv); 853 priv->tx_timeo_cnt++; 854 } 855 856 static const struct net_device_ops gve_netdev_ops = { 857 .ndo_start_xmit = gve_tx, 858 .ndo_open = gve_open, 859 .ndo_stop = gve_close, 860 .ndo_get_stats64 = gve_get_stats, 861 .ndo_tx_timeout = gve_tx_timeout, 862 }; 863 864 static void gve_handle_status(struct gve_priv *priv, u32 status) 865 { 866 if (GVE_DEVICE_STATUS_RESET_MASK & status) { 867 dev_info(&priv->pdev->dev, "Device requested reset.\n"); 868 gve_set_do_reset(priv); 869 } 870 } 871 872 static void gve_handle_reset(struct gve_priv *priv) 873 { 874 /* A service task will be scheduled at the end of probe to catch any 875 * resets that need to happen, and we don't want to reset until 876 * probe is done. 877 */ 878 if (gve_get_probe_in_progress(priv)) 879 return; 880 881 if (gve_get_do_reset(priv)) { 882 rtnl_lock(); 883 gve_reset(priv, false); 884 rtnl_unlock(); 885 } 886 } 887 888 /* Handle NIC status register changes and reset requests */ 889 static void gve_service_task(struct work_struct *work) 890 { 891 struct gve_priv *priv = container_of(work, struct gve_priv, 892 service_task); 893 894 gve_handle_status(priv, 895 ioread32be(&priv->reg_bar0->device_status)); 896 897 gve_handle_reset(priv); 898 } 899 900 static int gve_init_priv(struct gve_priv *priv, bool skip_describe_device) 901 { 902 int num_ntfy; 903 int err; 904 905 /* Set up the adminq */ 906 err = gve_adminq_alloc(&priv->pdev->dev, priv); 907 if (err) { 908 dev_err(&priv->pdev->dev, 909 "Failed to alloc admin queue: err=%d\n", err); 910 return err; 911 } 912 913 if (skip_describe_device) 914 goto setup_device; 915 916 /* Get the initial information we need from the device */ 917 err = gve_adminq_describe_device(priv); 918 if (err) { 919 dev_err(&priv->pdev->dev, 920 "Could not get device information: err=%d\n", err); 921 goto err; 922 } 923 if (priv->dev->max_mtu > PAGE_SIZE) { 924 priv->dev->max_mtu = PAGE_SIZE; 925 err = gve_adminq_set_mtu(priv, priv->dev->mtu); 926 if (err) { 927 netif_err(priv, drv, priv->dev, "Could not set mtu"); 928 goto err; 929 } 930 } 931 priv->dev->mtu = priv->dev->max_mtu; 932 num_ntfy = pci_msix_vec_count(priv->pdev); 933 if (num_ntfy <= 0) { 934 dev_err(&priv->pdev->dev, 935 "could not count MSI-x vectors: err=%d\n", num_ntfy); 936 err = num_ntfy; 937 goto err; 938 } else if (num_ntfy < GVE_MIN_MSIX) { 939 dev_err(&priv->pdev->dev, "gve needs at least %d MSI-x vectors, but only has %d\n", 940 GVE_MIN_MSIX, num_ntfy); 941 err = -EINVAL; 942 goto err; 943 } 944 945 priv->num_registered_pages = 0; 946 priv->rx_copybreak = GVE_DEFAULT_RX_COPYBREAK; 947 /* gvnic has one Notification Block per MSI-x vector, except for the 948 * management vector 949 */ 950 priv->num_ntfy_blks = (num_ntfy - 1) & ~0x1; 951 priv->mgmt_msix_idx = priv->num_ntfy_blks; 952 953 priv->tx_cfg.max_queues = 954 min_t(int, priv->tx_cfg.max_queues, priv->num_ntfy_blks / 2); 955 priv->rx_cfg.max_queues = 956 min_t(int, priv->rx_cfg.max_queues, priv->num_ntfy_blks / 2); 957 958 priv->tx_cfg.num_queues = priv->tx_cfg.max_queues; 959 priv->rx_cfg.num_queues = priv->rx_cfg.max_queues; 960 if (priv->default_num_queues > 0) { 961 priv->tx_cfg.num_queues = min_t(int, priv->default_num_queues, 962 priv->tx_cfg.num_queues); 963 priv->rx_cfg.num_queues = min_t(int, priv->default_num_queues, 964 priv->rx_cfg.num_queues); 965 } 966 967 netif_info(priv, drv, priv->dev, "TX queues %d, RX queues %d\n", 968 priv->tx_cfg.num_queues, priv->rx_cfg.num_queues); 969 netif_info(priv, drv, priv->dev, "Max TX queues %d, Max RX queues %d\n", 970 priv->tx_cfg.max_queues, priv->rx_cfg.max_queues); 971 972 setup_device: 973 err = gve_setup_device_resources(priv); 974 if (!err) 975 return 0; 976 err: 977 gve_adminq_free(&priv->pdev->dev, priv); 978 return err; 979 } 980 981 static void gve_teardown_priv_resources(struct gve_priv *priv) 982 { 983 gve_teardown_device_resources(priv); 984 gve_adminq_free(&priv->pdev->dev, priv); 985 } 986 987 static void gve_trigger_reset(struct gve_priv *priv) 988 { 989 /* Reset the device by releasing the AQ */ 990 gve_adminq_release(priv); 991 } 992 993 static void gve_reset_and_teardown(struct gve_priv *priv, bool was_up) 994 { 995 gve_trigger_reset(priv); 996 /* With the reset having already happened, close cannot fail */ 997 if (was_up) 998 gve_close(priv->dev); 999 gve_teardown_priv_resources(priv); 1000 } 1001 1002 static int gve_reset_recovery(struct gve_priv *priv, bool was_up) 1003 { 1004 int err; 1005 1006 err = gve_init_priv(priv, true); 1007 if (err) 1008 goto err; 1009 if (was_up) { 1010 err = gve_open(priv->dev); 1011 if (err) 1012 goto err; 1013 } 1014 return 0; 1015 err: 1016 dev_err(&priv->pdev->dev, "Reset failed! !!! DISABLING ALL QUEUES !!!\n"); 1017 gve_turndown(priv); 1018 return err; 1019 } 1020 1021 int gve_reset(struct gve_priv *priv, bool attempt_teardown) 1022 { 1023 bool was_up = netif_carrier_ok(priv->dev); 1024 int err; 1025 1026 dev_info(&priv->pdev->dev, "Performing reset\n"); 1027 gve_clear_do_reset(priv); 1028 gve_set_reset_in_progress(priv); 1029 /* If we aren't attempting to teardown normally, just go turndown and 1030 * reset right away. 1031 */ 1032 if (!attempt_teardown) { 1033 gve_turndown(priv); 1034 gve_reset_and_teardown(priv, was_up); 1035 } else { 1036 /* Otherwise attempt to close normally */ 1037 if (was_up) { 1038 err = gve_close(priv->dev); 1039 /* If that fails reset as we did above */ 1040 if (err) 1041 gve_reset_and_teardown(priv, was_up); 1042 } 1043 /* Clean up any remaining resources */ 1044 gve_teardown_priv_resources(priv); 1045 } 1046 1047 /* Set it all back up */ 1048 err = gve_reset_recovery(priv, was_up); 1049 gve_clear_reset_in_progress(priv); 1050 return err; 1051 } 1052 1053 static void gve_write_version(u8 __iomem *driver_version_register) 1054 { 1055 const char *c = gve_version_prefix; 1056 1057 while (*c) { 1058 writeb(*c, driver_version_register); 1059 c++; 1060 } 1061 1062 c = gve_version_str; 1063 while (*c) { 1064 writeb(*c, driver_version_register); 1065 c++; 1066 } 1067 writeb('\n', driver_version_register); 1068 } 1069 1070 static int gve_probe(struct pci_dev *pdev, const struct pci_device_id *ent) 1071 { 1072 int max_tx_queues, max_rx_queues; 1073 struct net_device *dev; 1074 __be32 __iomem *db_bar; 1075 struct gve_registers __iomem *reg_bar; 1076 struct gve_priv *priv; 1077 int err; 1078 1079 err = pci_enable_device(pdev); 1080 if (err) 1081 return -ENXIO; 1082 1083 err = pci_request_regions(pdev, "gvnic-cfg"); 1084 if (err) 1085 goto abort_with_enabled; 1086 1087 pci_set_master(pdev); 1088 1089 err = pci_set_dma_mask(pdev, DMA_BIT_MASK(64)); 1090 if (err) { 1091 dev_err(&pdev->dev, "Failed to set dma mask: err=%d\n", err); 1092 goto abort_with_pci_region; 1093 } 1094 1095 err = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(64)); 1096 if (err) { 1097 dev_err(&pdev->dev, 1098 "Failed to set consistent dma mask: err=%d\n", err); 1099 goto abort_with_pci_region; 1100 } 1101 1102 reg_bar = pci_iomap(pdev, GVE_REGISTER_BAR, 0); 1103 if (!reg_bar) { 1104 dev_err(&pdev->dev, "Failed to map pci bar!\n"); 1105 err = -ENOMEM; 1106 goto abort_with_pci_region; 1107 } 1108 1109 db_bar = pci_iomap(pdev, GVE_DOORBELL_BAR, 0); 1110 if (!db_bar) { 1111 dev_err(&pdev->dev, "Failed to map doorbell bar!\n"); 1112 err = -ENOMEM; 1113 goto abort_with_reg_bar; 1114 } 1115 1116 gve_write_version(®_bar->driver_version); 1117 /* Get max queues to alloc etherdev */ 1118 max_rx_queues = ioread32be(®_bar->max_tx_queues); 1119 max_tx_queues = ioread32be(®_bar->max_rx_queues); 1120 /* Alloc and setup the netdev and priv */ 1121 dev = alloc_etherdev_mqs(sizeof(*priv), max_tx_queues, max_rx_queues); 1122 if (!dev) { 1123 dev_err(&pdev->dev, "could not allocate netdev\n"); 1124 goto abort_with_db_bar; 1125 } 1126 SET_NETDEV_DEV(dev, &pdev->dev); 1127 pci_set_drvdata(pdev, dev); 1128 dev->ethtool_ops = &gve_ethtool_ops; 1129 dev->netdev_ops = &gve_netdev_ops; 1130 /* advertise features */ 1131 dev->hw_features = NETIF_F_HIGHDMA; 1132 dev->hw_features |= NETIF_F_SG; 1133 dev->hw_features |= NETIF_F_HW_CSUM; 1134 dev->hw_features |= NETIF_F_TSO; 1135 dev->hw_features |= NETIF_F_TSO6; 1136 dev->hw_features |= NETIF_F_TSO_ECN; 1137 dev->hw_features |= NETIF_F_RXCSUM; 1138 dev->hw_features |= NETIF_F_RXHASH; 1139 dev->features = dev->hw_features; 1140 dev->watchdog_timeo = 5 * HZ; 1141 dev->min_mtu = ETH_MIN_MTU; 1142 netif_carrier_off(dev); 1143 1144 priv = netdev_priv(dev); 1145 priv->dev = dev; 1146 priv->pdev = pdev; 1147 priv->msg_enable = DEFAULT_MSG_LEVEL; 1148 priv->reg_bar0 = reg_bar; 1149 priv->db_bar2 = db_bar; 1150 priv->service_task_flags = 0x0; 1151 priv->state_flags = 0x0; 1152 1153 gve_set_probe_in_progress(priv); 1154 priv->gve_wq = alloc_ordered_workqueue("gve", 0); 1155 if (!priv->gve_wq) { 1156 dev_err(&pdev->dev, "Could not allocate workqueue"); 1157 err = -ENOMEM; 1158 goto abort_with_netdev; 1159 } 1160 INIT_WORK(&priv->service_task, gve_service_task); 1161 priv->tx_cfg.max_queues = max_tx_queues; 1162 priv->rx_cfg.max_queues = max_rx_queues; 1163 1164 err = gve_init_priv(priv, false); 1165 if (err) 1166 goto abort_with_wq; 1167 1168 err = register_netdev(dev); 1169 if (err) 1170 goto abort_with_wq; 1171 1172 dev_info(&pdev->dev, "GVE version %s\n", gve_version_str); 1173 gve_clear_probe_in_progress(priv); 1174 queue_work(priv->gve_wq, &priv->service_task); 1175 return 0; 1176 1177 abort_with_wq: 1178 destroy_workqueue(priv->gve_wq); 1179 1180 abort_with_netdev: 1181 free_netdev(dev); 1182 1183 abort_with_db_bar: 1184 pci_iounmap(pdev, db_bar); 1185 1186 abort_with_reg_bar: 1187 pci_iounmap(pdev, reg_bar); 1188 1189 abort_with_pci_region: 1190 pci_release_regions(pdev); 1191 1192 abort_with_enabled: 1193 pci_disable_device(pdev); 1194 return -ENXIO; 1195 } 1196 1197 static void gve_remove(struct pci_dev *pdev) 1198 { 1199 struct net_device *netdev = pci_get_drvdata(pdev); 1200 struct gve_priv *priv = netdev_priv(netdev); 1201 __be32 __iomem *db_bar = priv->db_bar2; 1202 void __iomem *reg_bar = priv->reg_bar0; 1203 1204 unregister_netdev(netdev); 1205 gve_teardown_priv_resources(priv); 1206 destroy_workqueue(priv->gve_wq); 1207 free_netdev(netdev); 1208 pci_iounmap(pdev, db_bar); 1209 pci_iounmap(pdev, reg_bar); 1210 pci_release_regions(pdev); 1211 pci_disable_device(pdev); 1212 } 1213 1214 static const struct pci_device_id gve_id_table[] = { 1215 { PCI_DEVICE(PCI_VENDOR_ID_GOOGLE, PCI_DEV_ID_GVNIC) }, 1216 { } 1217 }; 1218 1219 static struct pci_driver gvnic_driver = { 1220 .name = "gvnic", 1221 .id_table = gve_id_table, 1222 .probe = gve_probe, 1223 .remove = gve_remove, 1224 }; 1225 1226 module_pci_driver(gvnic_driver); 1227 1228 MODULE_DEVICE_TABLE(pci, gve_id_table); 1229 MODULE_AUTHOR("Google, Inc."); 1230 MODULE_DESCRIPTION("gVNIC Driver"); 1231 MODULE_LICENSE("Dual MIT/GPL"); 1232 MODULE_VERSION(GVE_VERSION); 1233