1 /* 2 * Copyright (C) 2016-2018 Vincenzo Maffione 3 * Copyright (C) 2015 Stefano Garzarella 4 * All rights reserved. 5 * 6 * Redistribution and use in source and binary forms, with or without 7 * modification, are permitted provided that the following conditions 8 * are met: 9 * 1. Redistributions of source code must retain the above copyright 10 * notice, this list of conditions and the following disclaimer. 11 * 2. Redistributions in binary form must reproduce the above copyright 12 * notice, this list of conditions and the following disclaimer in the 13 * documentation and/or other materials provided with the distribution. 14 * 15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 16 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 17 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 18 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 19 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 20 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 21 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 22 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 23 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 24 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 25 * SUCH DAMAGE. 26 * 27 * $FreeBSD$ 28 */ 29 30 /* 31 * common headers 32 */ 33 #if defined(__FreeBSD__) 34 #include <sys/cdefs.h> 35 #include <sys/param.h> 36 #include <sys/kernel.h> 37 #include <sys/types.h> 38 #include <sys/selinfo.h> 39 #include <sys/socket.h> 40 #include <net/if.h> 41 #include <net/if_var.h> 42 #include <machine/bus.h> 43 44 #define usleep_range(_1, _2) \ 45 pause_sbt("sync-kloop-sleep", SBT_1US * _1, SBT_1US * 1, C_ABSOLUTE) 46 47 #elif defined(linux) 48 #include <bsd_glue.h> 49 #include <linux/file.h> 50 #include <linux/eventfd.h> 51 #endif 52 53 #include <net/netmap.h> 54 #include <dev/netmap/netmap_kern.h> 55 #include <net/netmap_virt.h> 56 #include <dev/netmap/netmap_mem2.h> 57 58 /* Support for eventfd-based notifications. */ 59 #if defined(linux) 60 #define SYNC_KLOOP_POLL 61 #endif 62 63 /* Write kring pointers (hwcur, hwtail) to the CSB. 64 * This routine is coupled with ptnetmap_guest_read_kring_csb(). */ 65 static inline void 66 sync_kloop_kernel_write(struct nm_csb_ktoa __user *ptr, uint32_t hwcur, 67 uint32_t hwtail) 68 { 69 /* Issue a first store-store barrier to make sure writes to the 70 * netmap ring do not overcome updates on ktoa->hwcur and ktoa->hwtail. */ 71 nm_stst_barrier(); 72 73 /* 74 * The same scheme used in nm_sync_kloop_appl_write() applies here. 75 * We allow the application to read a value of hwcur more recent than the value 76 * of hwtail, since this would anyway result in a consistent view of the 77 * ring state (and hwcur can never wraparound hwtail, since hwcur must be 78 * behind head). 79 * 80 * The following memory barrier scheme is used to make this happen: 81 * 82 * Application Kernel 83 * 84 * STORE(hwcur) LOAD(hwtail) 85 * wmb() <-------------> rmb() 86 * STORE(hwtail) LOAD(hwcur) 87 */ 88 CSB_WRITE(ptr, hwcur, hwcur); 89 nm_stst_barrier(); 90 CSB_WRITE(ptr, hwtail, hwtail); 91 } 92 93 /* Read kring pointers (head, cur, sync_flags) from the CSB. 94 * This routine is coupled with ptnetmap_guest_write_kring_csb(). */ 95 static inline void 96 sync_kloop_kernel_read(struct nm_csb_atok __user *ptr, 97 struct netmap_ring *shadow_ring, 98 uint32_t num_slots) 99 { 100 /* 101 * We place a memory barrier to make sure that the update of head never 102 * overtakes the update of cur. 103 * (see explanation in sync_kloop_kernel_write). 104 */ 105 CSB_READ(ptr, head, shadow_ring->head); 106 nm_ldld_barrier(); 107 CSB_READ(ptr, cur, shadow_ring->cur); 108 CSB_READ(ptr, sync_flags, shadow_ring->flags); 109 110 /* Make sure that loads from atok->head and atok->cur are not delayed 111 * after the loads from the netmap ring. */ 112 nm_ldld_barrier(); 113 } 114 115 /* Enable or disable application --> kernel kicks. */ 116 static inline void 117 csb_ktoa_kick_enable(struct nm_csb_ktoa __user *csb_ktoa, uint32_t val) 118 { 119 CSB_WRITE(csb_ktoa, kern_need_kick, val); 120 } 121 122 #ifdef SYNC_KLOOP_POLL 123 /* Are application interrupt enabled or disabled? */ 124 static inline uint32_t 125 csb_atok_intr_enabled(struct nm_csb_atok __user *csb_atok) 126 { 127 uint32_t v; 128 129 CSB_READ(csb_atok, appl_need_kick, v); 130 131 return v; 132 } 133 #endif /* SYNC_KLOOP_POLL */ 134 135 static inline void 136 sync_kloop_kring_dump(const char *title, const struct netmap_kring *kring) 137 { 138 nm_prinf("%s, kring %s, hwcur %d, rhead %d, " 139 "rcur %d, rtail %d, hwtail %d", 140 title, kring->name, kring->nr_hwcur, kring->rhead, 141 kring->rcur, kring->rtail, kring->nr_hwtail); 142 } 143 144 /* Arguments for netmap_sync_kloop_tx_ring() and 145 * netmap_sync_kloop_rx_ring(). 146 */ 147 struct sync_kloop_ring_args { 148 struct netmap_kring *kring; 149 struct nm_csb_atok *csb_atok; 150 struct nm_csb_ktoa *csb_ktoa; 151 #ifdef SYNC_KLOOP_POLL 152 struct eventfd_ctx *irq_ctx; 153 #endif /* SYNC_KLOOP_POLL */ 154 /* Are we busy waiting rather than using a schedule() loop ? */ 155 bool busy_wait; 156 /* Are we processing in the context of VM exit ? */ 157 bool direct; 158 }; 159 160 static void 161 netmap_sync_kloop_tx_ring(const struct sync_kloop_ring_args *a) 162 { 163 struct netmap_kring *kring = a->kring; 164 struct nm_csb_atok *csb_atok = a->csb_atok; 165 struct nm_csb_ktoa *csb_ktoa = a->csb_ktoa; 166 struct netmap_ring shadow_ring; /* shadow copy of the netmap_ring */ 167 #ifdef SYNC_KLOOP_POLL 168 bool more_txspace = false; 169 #endif /* SYNC_KLOOP_POLL */ 170 uint32_t num_slots; 171 int batch; 172 173 if (unlikely(nm_kr_tryget(kring, 1, NULL))) { 174 return; 175 } 176 177 num_slots = kring->nkr_num_slots; 178 179 /* Disable application --> kernel notifications. */ 180 if (!a->direct) { 181 csb_ktoa_kick_enable(csb_ktoa, 0); 182 } 183 /* Copy the application kring pointers from the CSB */ 184 sync_kloop_kernel_read(csb_atok, &shadow_ring, num_slots); 185 186 for (;;) { 187 batch = shadow_ring.head - kring->nr_hwcur; 188 if (batch < 0) 189 batch += num_slots; 190 191 #ifdef PTN_TX_BATCH_LIM 192 if (batch > PTN_TX_BATCH_LIM(num_slots)) { 193 /* If application moves ahead too fast, let's cut the move so 194 * that we don't exceed our batch limit. */ 195 uint32_t head_lim = kring->nr_hwcur + PTN_TX_BATCH_LIM(num_slots); 196 197 if (head_lim >= num_slots) 198 head_lim -= num_slots; 199 nm_prdis(1, "batch: %d head: %d head_lim: %d", batch, shadow_ring.head, 200 head_lim); 201 shadow_ring.head = head_lim; 202 batch = PTN_TX_BATCH_LIM(num_slots); 203 } 204 #endif /* PTN_TX_BATCH_LIM */ 205 206 if (nm_kr_txspace(kring) <= (num_slots >> 1)) { 207 shadow_ring.flags |= NAF_FORCE_RECLAIM; 208 } 209 210 /* Netmap prologue */ 211 shadow_ring.tail = kring->rtail; 212 if (unlikely(nm_txsync_prologue(kring, &shadow_ring) >= num_slots)) { 213 /* Reinit ring and enable notifications. */ 214 netmap_ring_reinit(kring); 215 if (!a->busy_wait) { 216 csb_ktoa_kick_enable(csb_ktoa, 1); 217 } 218 break; 219 } 220 221 if (unlikely(netmap_debug & NM_DEBUG_TXSYNC)) { 222 sync_kloop_kring_dump("pre txsync", kring); 223 } 224 225 if (unlikely(kring->nm_sync(kring, shadow_ring.flags))) { 226 if (!a->busy_wait) { 227 /* Re-enable notifications. */ 228 csb_ktoa_kick_enable(csb_ktoa, 1); 229 } 230 nm_prerr("txsync() failed"); 231 break; 232 } 233 234 /* 235 * Finalize 236 * Copy kernel hwcur and hwtail into the CSB for the application sync(), and 237 * do the nm_sync_finalize. 238 */ 239 sync_kloop_kernel_write(csb_ktoa, kring->nr_hwcur, 240 kring->nr_hwtail); 241 if (kring->rtail != kring->nr_hwtail) { 242 /* Some more room available in the parent adapter. */ 243 kring->rtail = kring->nr_hwtail; 244 #ifdef SYNC_KLOOP_POLL 245 more_txspace = true; 246 #endif /* SYNC_KLOOP_POLL */ 247 } 248 249 if (unlikely(netmap_debug & NM_DEBUG_TXSYNC)) { 250 sync_kloop_kring_dump("post txsync", kring); 251 } 252 253 /* Interrupt the application if needed. */ 254 #ifdef SYNC_KLOOP_POLL 255 if (a->irq_ctx && more_txspace && csb_atok_intr_enabled(csb_atok)) { 256 /* We could disable kernel --> application kicks here, 257 * to avoid spurious interrupts. */ 258 eventfd_signal(a->irq_ctx, 1); 259 more_txspace = false; 260 } 261 #endif /* SYNC_KLOOP_POLL */ 262 263 /* Read CSB to see if there is more work to do. */ 264 sync_kloop_kernel_read(csb_atok, &shadow_ring, num_slots); 265 if (shadow_ring.head == kring->rhead) { 266 if (a->busy_wait) { 267 break; 268 } 269 /* 270 * No more packets to transmit. We enable notifications and 271 * go to sleep, waiting for a kick from the application when new 272 * new slots are ready for transmission. 273 */ 274 /* Re-enable notifications. */ 275 csb_ktoa_kick_enable(csb_ktoa, 1); 276 /* Double check, with store-load memory barrier. */ 277 nm_stld_barrier(); 278 sync_kloop_kernel_read(csb_atok, &shadow_ring, num_slots); 279 if (shadow_ring.head != kring->rhead) { 280 /* We won the race condition, there are more packets to 281 * transmit. Disable notifications and do another cycle */ 282 csb_ktoa_kick_enable(csb_ktoa, 0); 283 continue; 284 } 285 break; 286 } 287 288 if (nm_kr_txempty(kring)) { 289 /* No more available TX slots. We stop waiting for a notification 290 * from the backend (netmap_tx_irq). */ 291 nm_prdis(1, "TX ring"); 292 break; 293 } 294 } 295 296 nm_kr_put(kring); 297 298 #ifdef SYNC_KLOOP_POLL 299 if (a->irq_ctx && more_txspace && csb_atok_intr_enabled(csb_atok)) { 300 eventfd_signal(a->irq_ctx, 1); 301 } 302 #endif /* SYNC_KLOOP_POLL */ 303 } 304 305 /* RX cycle without receive any packets */ 306 #define SYNC_LOOP_RX_DRY_CYCLES_MAX 2 307 308 static inline int 309 sync_kloop_norxslots(struct netmap_kring *kring, uint32_t g_head) 310 { 311 return (NM_ACCESS_ONCE(kring->nr_hwtail) == nm_prev(g_head, 312 kring->nkr_num_slots - 1)); 313 } 314 315 static void 316 netmap_sync_kloop_rx_ring(const struct sync_kloop_ring_args *a) 317 { 318 319 struct netmap_kring *kring = a->kring; 320 struct nm_csb_atok *csb_atok = a->csb_atok; 321 struct nm_csb_ktoa *csb_ktoa = a->csb_ktoa; 322 struct netmap_ring shadow_ring; /* shadow copy of the netmap_ring */ 323 int dry_cycles = 0; 324 #ifdef SYNC_KLOOP_POLL 325 bool some_recvd = false; 326 #endif /* SYNC_KLOOP_POLL */ 327 uint32_t num_slots; 328 329 if (unlikely(nm_kr_tryget(kring, 1, NULL))) { 330 return; 331 } 332 333 num_slots = kring->nkr_num_slots; 334 335 /* Get RX csb_atok and csb_ktoa pointers from the CSB. */ 336 num_slots = kring->nkr_num_slots; 337 338 /* Disable notifications. */ 339 if (!a->direct) { 340 csb_ktoa_kick_enable(csb_ktoa, 0); 341 } 342 /* Copy the application kring pointers from the CSB */ 343 sync_kloop_kernel_read(csb_atok, &shadow_ring, num_slots); 344 345 for (;;) { 346 uint32_t hwtail; 347 348 /* Netmap prologue */ 349 shadow_ring.tail = kring->rtail; 350 if (unlikely(nm_rxsync_prologue(kring, &shadow_ring) >= num_slots)) { 351 /* Reinit ring and enable notifications. */ 352 netmap_ring_reinit(kring); 353 if (!a->busy_wait) { 354 csb_ktoa_kick_enable(csb_ktoa, 1); 355 } 356 break; 357 } 358 359 if (unlikely(netmap_debug & NM_DEBUG_RXSYNC)) { 360 sync_kloop_kring_dump("pre rxsync", kring); 361 } 362 363 if (unlikely(kring->nm_sync(kring, shadow_ring.flags))) { 364 if (!a->busy_wait) { 365 /* Re-enable notifications. */ 366 csb_ktoa_kick_enable(csb_ktoa, 1); 367 } 368 nm_prerr("rxsync() failed"); 369 break; 370 } 371 372 /* 373 * Finalize 374 * Copy kernel hwcur and hwtail into the CSB for the application sync() 375 */ 376 hwtail = NM_ACCESS_ONCE(kring->nr_hwtail); 377 sync_kloop_kernel_write(csb_ktoa, kring->nr_hwcur, hwtail); 378 if (kring->rtail != hwtail) { 379 kring->rtail = hwtail; 380 #ifdef SYNC_KLOOP_POLL 381 some_recvd = true; 382 #endif /* SYNC_KLOOP_POLL */ 383 dry_cycles = 0; 384 } else { 385 dry_cycles++; 386 } 387 388 if (unlikely(netmap_debug & NM_DEBUG_RXSYNC)) { 389 sync_kloop_kring_dump("post rxsync", kring); 390 } 391 392 #ifdef SYNC_KLOOP_POLL 393 /* Interrupt the application if needed. */ 394 if (a->irq_ctx && some_recvd && csb_atok_intr_enabled(csb_atok)) { 395 /* We could disable kernel --> application kicks here, 396 * to avoid spurious interrupts. */ 397 eventfd_signal(a->irq_ctx, 1); 398 some_recvd = false; 399 } 400 #endif /* SYNC_KLOOP_POLL */ 401 402 /* Read CSB to see if there is more work to do. */ 403 sync_kloop_kernel_read(csb_atok, &shadow_ring, num_slots); 404 if (sync_kloop_norxslots(kring, shadow_ring.head)) { 405 if (a->busy_wait) { 406 break; 407 } 408 /* 409 * No more slots available for reception. We enable notification and 410 * go to sleep, waiting for a kick from the application when new receive 411 * slots are available. 412 */ 413 /* Re-enable notifications. */ 414 csb_ktoa_kick_enable(csb_ktoa, 1); 415 /* Double check, with store-load memory barrier. */ 416 nm_stld_barrier(); 417 sync_kloop_kernel_read(csb_atok, &shadow_ring, num_slots); 418 if (!sync_kloop_norxslots(kring, shadow_ring.head)) { 419 /* We won the race condition, more slots are available. Disable 420 * notifications and do another cycle. */ 421 csb_ktoa_kick_enable(csb_ktoa, 0); 422 continue; 423 } 424 break; 425 } 426 427 hwtail = NM_ACCESS_ONCE(kring->nr_hwtail); 428 if (unlikely(hwtail == kring->rhead || 429 dry_cycles >= SYNC_LOOP_RX_DRY_CYCLES_MAX)) { 430 /* No more packets to be read from the backend. We stop and 431 * wait for a notification from the backend (netmap_rx_irq). */ 432 nm_prdis(1, "nr_hwtail: %d rhead: %d dry_cycles: %d", 433 hwtail, kring->rhead, dry_cycles); 434 break; 435 } 436 } 437 438 nm_kr_put(kring); 439 440 #ifdef SYNC_KLOOP_POLL 441 /* Interrupt the application if needed. */ 442 if (a->irq_ctx && some_recvd && csb_atok_intr_enabled(csb_atok)) { 443 eventfd_signal(a->irq_ctx, 1); 444 } 445 #endif /* SYNC_KLOOP_POLL */ 446 } 447 448 #ifdef SYNC_KLOOP_POLL 449 struct sync_kloop_poll_ctx; 450 struct sync_kloop_poll_entry { 451 /* Support for receiving notifications from 452 * a netmap ring or from the application. */ 453 struct file *filp; 454 wait_queue_t wait; 455 wait_queue_head_t *wqh; 456 457 /* Support for sending notifications to the application. */ 458 struct eventfd_ctx *irq_ctx; 459 struct file *irq_filp; 460 461 /* Arguments for the ring processing function. Useful 462 * in case of custom wake-up function. */ 463 struct sync_kloop_ring_args *args; 464 struct sync_kloop_poll_ctx *parent; 465 466 }; 467 468 struct sync_kloop_poll_ctx { 469 poll_table wait_table; 470 unsigned int next_entry; 471 int (*next_wake_fun)(wait_queue_t *, unsigned, int, void *); 472 unsigned int num_entries; 473 unsigned int num_tx_rings; 474 unsigned int num_rings; 475 /* First num_tx_rings entries are for the TX kicks. 476 * Then the RX kicks entries follow. The last two 477 * entries are for TX irq, and RX irq. */ 478 struct sync_kloop_poll_entry entries[0]; 479 }; 480 481 static void 482 sync_kloop_poll_table_queue_proc(struct file *file, wait_queue_head_t *wqh, 483 poll_table *pt) 484 { 485 struct sync_kloop_poll_ctx *poll_ctx = 486 container_of(pt, struct sync_kloop_poll_ctx, wait_table); 487 struct sync_kloop_poll_entry *entry = poll_ctx->entries + 488 poll_ctx->next_entry; 489 490 BUG_ON(poll_ctx->next_entry >= poll_ctx->num_entries); 491 entry->wqh = wqh; 492 entry->filp = file; 493 /* Use the default wake up function. */ 494 if (poll_ctx->next_wake_fun == NULL) { 495 init_waitqueue_entry(&entry->wait, current); 496 } else { 497 init_waitqueue_func_entry(&entry->wait, 498 poll_ctx->next_wake_fun); 499 } 500 add_wait_queue(wqh, &entry->wait); 501 } 502 503 static int 504 sync_kloop_tx_kick_wake_fun(wait_queue_t *wait, unsigned mode, 505 int wake_flags, void *key) 506 { 507 struct sync_kloop_poll_entry *entry = 508 container_of(wait, struct sync_kloop_poll_entry, wait); 509 510 netmap_sync_kloop_tx_ring(entry->args); 511 512 return 0; 513 } 514 515 static int 516 sync_kloop_tx_irq_wake_fun(wait_queue_t *wait, unsigned mode, 517 int wake_flags, void *key) 518 { 519 struct sync_kloop_poll_entry *entry = 520 container_of(wait, struct sync_kloop_poll_entry, wait); 521 struct sync_kloop_poll_ctx *poll_ctx = entry->parent; 522 int i; 523 524 for (i = 0; i < poll_ctx->num_tx_rings; i++) { 525 struct eventfd_ctx *irq_ctx = poll_ctx->entries[i].irq_ctx; 526 527 if (irq_ctx) { 528 eventfd_signal(irq_ctx, 1); 529 } 530 } 531 532 return 0; 533 } 534 535 static int 536 sync_kloop_rx_kick_wake_fun(wait_queue_t *wait, unsigned mode, 537 int wake_flags, void *key) 538 { 539 struct sync_kloop_poll_entry *entry = 540 container_of(wait, struct sync_kloop_poll_entry, wait); 541 542 netmap_sync_kloop_rx_ring(entry->args); 543 544 return 0; 545 } 546 547 static int 548 sync_kloop_rx_irq_wake_fun(wait_queue_t *wait, unsigned mode, 549 int wake_flags, void *key) 550 { 551 struct sync_kloop_poll_entry *entry = 552 container_of(wait, struct sync_kloop_poll_entry, wait); 553 struct sync_kloop_poll_ctx *poll_ctx = entry->parent; 554 int i; 555 556 for (i = poll_ctx->num_tx_rings; i < poll_ctx->num_rings; i++) { 557 struct eventfd_ctx *irq_ctx = poll_ctx->entries[i].irq_ctx; 558 559 if (irq_ctx) { 560 eventfd_signal(irq_ctx, 1); 561 } 562 } 563 564 return 0; 565 } 566 #endif /* SYNC_KLOOP_POLL */ 567 568 int 569 netmap_sync_kloop(struct netmap_priv_d *priv, struct nmreq_header *hdr) 570 { 571 struct nmreq_sync_kloop_start *req = 572 (struct nmreq_sync_kloop_start *)(uintptr_t)hdr->nr_body; 573 struct nmreq_opt_sync_kloop_eventfds *eventfds_opt = NULL; 574 #ifdef SYNC_KLOOP_POLL 575 struct sync_kloop_poll_ctx *poll_ctx = NULL; 576 #endif /* SYNC_KLOOP_POLL */ 577 int num_rx_rings, num_tx_rings, num_rings; 578 struct sync_kloop_ring_args *args = NULL; 579 uint32_t sleep_us = req->sleep_us; 580 struct nm_csb_atok* csb_atok_base; 581 struct nm_csb_ktoa* csb_ktoa_base; 582 struct netmap_adapter *na; 583 struct nmreq_option *opt; 584 bool na_could_sleep = false; 585 bool busy_wait = true; 586 bool direct_tx = false; 587 bool direct_rx = false; 588 int err = 0; 589 int i; 590 591 if (sleep_us > 1000000) { 592 /* We do not accept sleeping for more than a second. */ 593 return EINVAL; 594 } 595 596 if (priv->np_nifp == NULL) { 597 return ENXIO; 598 } 599 mb(); /* make sure following reads are not from cache */ 600 601 na = priv->np_na; 602 if (!nm_netmap_on(na)) { 603 return ENXIO; 604 } 605 606 NMG_LOCK(); 607 /* Make sure the application is working in CSB mode. */ 608 if (!priv->np_csb_atok_base || !priv->np_csb_ktoa_base) { 609 NMG_UNLOCK(); 610 nm_prerr("sync-kloop on %s requires " 611 "NETMAP_REQ_OPT_CSB option", na->name); 612 return EINVAL; 613 } 614 615 csb_atok_base = priv->np_csb_atok_base; 616 csb_ktoa_base = priv->np_csb_ktoa_base; 617 618 /* Make sure that no kloop is currently running. */ 619 if (priv->np_kloop_state & NM_SYNC_KLOOP_RUNNING) { 620 err = EBUSY; 621 } 622 priv->np_kloop_state |= NM_SYNC_KLOOP_RUNNING; 623 NMG_UNLOCK(); 624 if (err) { 625 return err; 626 } 627 628 num_rx_rings = priv->np_qlast[NR_RX] - priv->np_qfirst[NR_RX]; 629 num_tx_rings = priv->np_qlast[NR_TX] - priv->np_qfirst[NR_TX]; 630 num_rings = num_tx_rings + num_rx_rings; 631 632 args = nm_os_malloc(num_rings * sizeof(args[0])); 633 if (!args) { 634 err = ENOMEM; 635 goto out; 636 } 637 638 /* Prepare the arguments for netmap_sync_kloop_tx_ring() 639 * and netmap_sync_kloop_rx_ring(). */ 640 for (i = 0; i < num_tx_rings; i++) { 641 struct sync_kloop_ring_args *a = args + i; 642 643 a->kring = NMR(na, NR_TX)[i + priv->np_qfirst[NR_TX]]; 644 a->csb_atok = csb_atok_base + i; 645 a->csb_ktoa = csb_ktoa_base + i; 646 a->busy_wait = busy_wait; 647 a->direct = direct_tx; 648 } 649 for (i = 0; i < num_rx_rings; i++) { 650 struct sync_kloop_ring_args *a = args + num_tx_rings + i; 651 652 a->kring = NMR(na, NR_RX)[i + priv->np_qfirst[NR_RX]]; 653 a->csb_atok = csb_atok_base + num_tx_rings + i; 654 a->csb_ktoa = csb_ktoa_base + num_tx_rings + i; 655 a->busy_wait = busy_wait; 656 a->direct = direct_rx; 657 } 658 659 /* Validate notification options. */ 660 opt = nmreq_getoption(hdr, NETMAP_REQ_OPT_SYNC_KLOOP_MODE); 661 if (opt != NULL) { 662 struct nmreq_opt_sync_kloop_mode *mode_opt = 663 (struct nmreq_opt_sync_kloop_mode *)opt; 664 665 direct_tx = !!(mode_opt->mode & NM_OPT_SYNC_KLOOP_DIRECT_TX); 666 direct_rx = !!(mode_opt->mode & NM_OPT_SYNC_KLOOP_DIRECT_RX); 667 if (mode_opt->mode & ~(NM_OPT_SYNC_KLOOP_DIRECT_TX | 668 NM_OPT_SYNC_KLOOP_DIRECT_RX)) { 669 opt->nro_status = err = EINVAL; 670 goto out; 671 } 672 opt->nro_status = 0; 673 } 674 opt = nmreq_getoption(hdr, NETMAP_REQ_OPT_SYNC_KLOOP_EVENTFDS); 675 if (opt != NULL) { 676 if (opt->nro_size != sizeof(*eventfds_opt) + 677 sizeof(eventfds_opt->eventfds[0]) * num_rings) { 678 /* Option size not consistent with the number of 679 * entries. */ 680 opt->nro_status = err = EINVAL; 681 goto out; 682 } 683 #ifdef SYNC_KLOOP_POLL 684 eventfds_opt = (struct nmreq_opt_sync_kloop_eventfds *)opt; 685 opt->nro_status = 0; 686 687 /* Check if some ioeventfd entry is not defined, and force sleep 688 * synchronization in that case. */ 689 busy_wait = false; 690 for (i = 0; i < num_rings; i++) { 691 if (eventfds_opt->eventfds[i].ioeventfd < 0) { 692 busy_wait = true; 693 break; 694 } 695 } 696 697 if (busy_wait && (direct_tx || direct_rx)) { 698 /* For direct processing we need all the 699 * ioeventfds to be valid. */ 700 opt->nro_status = err = EINVAL; 701 goto out; 702 } 703 704 /* We need 2 poll entries for TX and RX notifications coming 705 * from the netmap adapter, plus one entries per ring for the 706 * notifications coming from the application. */ 707 poll_ctx = nm_os_malloc(sizeof(*poll_ctx) + 708 (num_rings + 2) * sizeof(poll_ctx->entries[0])); 709 init_poll_funcptr(&poll_ctx->wait_table, 710 sync_kloop_poll_table_queue_proc); 711 poll_ctx->num_entries = 2 + num_rings; 712 poll_ctx->num_tx_rings = num_tx_rings; 713 poll_ctx->num_rings = num_rings; 714 poll_ctx->next_entry = 0; 715 poll_ctx->next_wake_fun = NULL; 716 717 if (direct_tx && (na->na_flags & NAF_BDG_MAYSLEEP)) { 718 /* In direct mode, VALE txsync is called from 719 * wake-up context, where it is not possible 720 * to sleep. 721 */ 722 na->na_flags &= ~NAF_BDG_MAYSLEEP; 723 na_could_sleep = true; 724 } 725 726 for (i = 0; i < num_rings + 2; i++) { 727 poll_ctx->entries[i].args = args + i; 728 poll_ctx->entries[i].parent = poll_ctx; 729 } 730 731 /* Poll for notifications coming from the applications through 732 * eventfds. */ 733 for (i = 0; i < num_rings; i++, poll_ctx->next_entry++) { 734 struct eventfd_ctx *irq = NULL; 735 struct file *filp = NULL; 736 unsigned long mask; 737 bool tx_ring = (i < num_tx_rings); 738 739 if (eventfds_opt->eventfds[i].irqfd >= 0) { 740 filp = eventfd_fget( 741 eventfds_opt->eventfds[i].irqfd); 742 if (IS_ERR(filp)) { 743 err = PTR_ERR(filp); 744 goto out; 745 } 746 irq = eventfd_ctx_fileget(filp); 747 if (IS_ERR(irq)) { 748 err = PTR_ERR(irq); 749 goto out; 750 } 751 } 752 poll_ctx->entries[i].irq_filp = filp; 753 poll_ctx->entries[i].irq_ctx = irq; 754 poll_ctx->entries[i].args->busy_wait = busy_wait; 755 /* Don't let netmap_sync_kloop_*x_ring() use 756 * IRQs in direct mode. */ 757 poll_ctx->entries[i].args->irq_ctx = 758 ((tx_ring && direct_tx) || 759 (!tx_ring && direct_rx)) ? NULL : 760 poll_ctx->entries[i].irq_ctx; 761 poll_ctx->entries[i].args->direct = 762 (tx_ring ? direct_tx : direct_rx); 763 764 if (!busy_wait) { 765 filp = eventfd_fget( 766 eventfds_opt->eventfds[i].ioeventfd); 767 if (IS_ERR(filp)) { 768 err = PTR_ERR(filp); 769 goto out; 770 } 771 if (tx_ring && direct_tx) { 772 /* Override the wake up function 773 * so that it can directly call 774 * netmap_sync_kloop_tx_ring(). 775 */ 776 poll_ctx->next_wake_fun = 777 sync_kloop_tx_kick_wake_fun; 778 } else if (!tx_ring && direct_rx) { 779 /* Same for direct RX. */ 780 poll_ctx->next_wake_fun = 781 sync_kloop_rx_kick_wake_fun; 782 } else { 783 poll_ctx->next_wake_fun = NULL; 784 } 785 mask = filp->f_op->poll(filp, 786 &poll_ctx->wait_table); 787 if (mask & POLLERR) { 788 err = EINVAL; 789 goto out; 790 } 791 } 792 } 793 794 /* Poll for notifications coming from the netmap rings bound to 795 * this file descriptor. */ 796 if (!busy_wait) { 797 NMG_LOCK(); 798 /* In direct mode, override the wake up function so 799 * that it can forward the netmap_tx_irq() to the 800 * guest. */ 801 poll_ctx->next_wake_fun = direct_tx ? 802 sync_kloop_tx_irq_wake_fun : NULL; 803 poll_wait(priv->np_filp, priv->np_si[NR_TX], 804 &poll_ctx->wait_table); 805 poll_ctx->next_entry++; 806 807 poll_ctx->next_wake_fun = direct_rx ? 808 sync_kloop_rx_irq_wake_fun : NULL; 809 poll_wait(priv->np_filp, priv->np_si[NR_RX], 810 &poll_ctx->wait_table); 811 poll_ctx->next_entry++; 812 NMG_UNLOCK(); 813 } 814 #else /* SYNC_KLOOP_POLL */ 815 opt->nro_status = EOPNOTSUPP; 816 goto out; 817 #endif /* SYNC_KLOOP_POLL */ 818 } 819 820 nm_prinf("kloop busy_wait %u, direct_tx %u, direct_rx %u, " 821 "na_could_sleep %u", busy_wait, direct_tx, direct_rx, 822 na_could_sleep); 823 824 /* Main loop. */ 825 for (;;) { 826 if (unlikely(NM_ACCESS_ONCE(priv->np_kloop_state) & NM_SYNC_KLOOP_STOPPING)) { 827 break; 828 } 829 830 #ifdef SYNC_KLOOP_POLL 831 if (!busy_wait) { 832 /* It is important to set the task state as 833 * interruptible before processing any TX/RX ring, 834 * so that if a notification on ring Y comes after 835 * we have processed ring Y, but before we call 836 * schedule(), we don't miss it. This is true because 837 * the wake up function will change the task state, 838 * and therefore the schedule_timeout() call below 839 * will observe the change). 840 */ 841 set_current_state(TASK_INTERRUPTIBLE); 842 } 843 #endif /* SYNC_KLOOP_POLL */ 844 845 /* Process all the TX rings bound to this file descriptor. */ 846 for (i = 0; !direct_tx && i < num_tx_rings; i++) { 847 struct sync_kloop_ring_args *a = args + i; 848 netmap_sync_kloop_tx_ring(a); 849 } 850 851 /* Process all the RX rings bound to this file descriptor. */ 852 for (i = 0; !direct_rx && i < num_rx_rings; i++) { 853 struct sync_kloop_ring_args *a = args + num_tx_rings + i; 854 netmap_sync_kloop_rx_ring(a); 855 } 856 857 if (busy_wait) { 858 /* Default synchronization method: sleep for a while. */ 859 usleep_range(sleep_us, sleep_us); 860 } 861 #ifdef SYNC_KLOOP_POLL 862 else { 863 /* Yield to the scheduler waiting for a notification 864 * to come either from netmap or the application. */ 865 schedule_timeout(msecs_to_jiffies(3000)); 866 } 867 #endif /* SYNC_KLOOP_POLL */ 868 } 869 out: 870 #ifdef SYNC_KLOOP_POLL 871 if (poll_ctx) { 872 /* Stop polling from netmap and the eventfds, and deallocate 873 * the poll context. */ 874 if (!busy_wait) { 875 __set_current_state(TASK_RUNNING); 876 } 877 for (i = 0; i < poll_ctx->next_entry; i++) { 878 struct sync_kloop_poll_entry *entry = 879 poll_ctx->entries + i; 880 881 if (entry->wqh) 882 remove_wait_queue(entry->wqh, &entry->wait); 883 /* We did not get a reference to the eventfds, but 884 * don't do that on netmap file descriptors (since 885 * a reference was not taken. */ 886 if (entry->filp && entry->filp != priv->np_filp) 887 fput(entry->filp); 888 if (entry->irq_ctx) 889 eventfd_ctx_put(entry->irq_ctx); 890 if (entry->irq_filp) 891 fput(entry->irq_filp); 892 } 893 nm_os_free(poll_ctx); 894 poll_ctx = NULL; 895 } 896 #endif /* SYNC_KLOOP_POLL */ 897 898 if (args) { 899 nm_os_free(args); 900 args = NULL; 901 } 902 903 /* Reset the kloop state. */ 904 NMG_LOCK(); 905 priv->np_kloop_state = 0; 906 if (na_could_sleep) { 907 na->na_flags |= NAF_BDG_MAYSLEEP; 908 } 909 NMG_UNLOCK(); 910 911 return err; 912 } 913 914 int 915 netmap_sync_kloop_stop(struct netmap_priv_d *priv) 916 { 917 struct netmap_adapter *na; 918 bool running = true; 919 int err = 0; 920 921 if (priv->np_nifp == NULL) { 922 return ENXIO; 923 } 924 mb(); /* make sure following reads are not from cache */ 925 926 na = priv->np_na; 927 if (!nm_netmap_on(na)) { 928 return ENXIO; 929 } 930 931 /* Set the kloop stopping flag. */ 932 NMG_LOCK(); 933 priv->np_kloop_state |= NM_SYNC_KLOOP_STOPPING; 934 NMG_UNLOCK(); 935 936 /* Send a notification to the kloop, in case it is blocked in 937 * schedule_timeout(). We can use either RX or TX, because the 938 * kloop is waiting on both. */ 939 nm_os_selwakeup(priv->np_si[NR_RX]); 940 941 /* Wait for the kloop to actually terminate. */ 942 while (running) { 943 usleep_range(1000, 1500); 944 NMG_LOCK(); 945 running = (NM_ACCESS_ONCE(priv->np_kloop_state) 946 & NM_SYNC_KLOOP_RUNNING); 947 NMG_UNLOCK(); 948 } 949 950 return err; 951 } 952 953 #ifdef WITH_PTNETMAP 954 /* 955 * Guest ptnetmap txsync()/rxsync() routines, used in ptnet device drivers. 956 * These routines are reused across the different operating systems supported 957 * by netmap. 958 */ 959 960 /* 961 * Reconcile host and guest views of the transmit ring. 962 * 963 * Guest user wants to transmit packets up to the one before ring->head, 964 * and guest kernel knows tx_ring->hwcur is the first packet unsent 965 * by the host kernel. 966 * 967 * We push out as many packets as possible, and possibly 968 * reclaim buffers from previously completed transmission. 969 * 970 * Notifications from the host are enabled only if the user guest would 971 * block (no space in the ring). 972 */ 973 bool 974 netmap_pt_guest_txsync(struct nm_csb_atok *atok, struct nm_csb_ktoa *ktoa, 975 struct netmap_kring *kring, int flags) 976 { 977 bool notify = false; 978 979 /* Disable notifications */ 980 atok->appl_need_kick = 0; 981 982 /* 983 * First part: tell the host to process the new packets, 984 * updating the CSB. 985 */ 986 kring->nr_hwcur = ktoa->hwcur; 987 nm_sync_kloop_appl_write(atok, kring->rcur, kring->rhead); 988 989 /* Ask for a kick from a guest to the host if needed. */ 990 if (((kring->rhead != kring->nr_hwcur || nm_kr_wouldblock(kring)) 991 && NM_ACCESS_ONCE(ktoa->kern_need_kick)) || 992 (flags & NAF_FORCE_RECLAIM)) { 993 atok->sync_flags = flags; 994 notify = true; 995 } 996 997 /* 998 * Second part: reclaim buffers for completed transmissions. 999 */ 1000 if (nm_kr_wouldblock(kring) || (flags & NAF_FORCE_RECLAIM)) { 1001 nm_sync_kloop_appl_read(ktoa, &kring->nr_hwtail, 1002 &kring->nr_hwcur); 1003 } 1004 1005 /* 1006 * No more room in the ring for new transmissions. The user thread will 1007 * go to sleep and we need to be notified by the host when more free 1008 * space is available. 1009 */ 1010 if (nm_kr_wouldblock(kring) && !(kring->nr_kflags & NKR_NOINTR)) { 1011 /* Re-enable notifications. */ 1012 atok->appl_need_kick = 1; 1013 /* Double check, with store-load memory barrier. */ 1014 nm_stld_barrier(); 1015 nm_sync_kloop_appl_read(ktoa, &kring->nr_hwtail, 1016 &kring->nr_hwcur); 1017 /* If there is new free space, disable notifications */ 1018 if (unlikely(!nm_kr_wouldblock(kring))) { 1019 atok->appl_need_kick = 0; 1020 } 1021 } 1022 1023 nm_prdis(1, "%s CSB(head:%u cur:%u hwtail:%u) KRING(head:%u cur:%u tail:%u)", 1024 kring->name, atok->head, atok->cur, ktoa->hwtail, 1025 kring->rhead, kring->rcur, kring->nr_hwtail); 1026 1027 return notify; 1028 } 1029 1030 /* 1031 * Reconcile host and guest view of the receive ring. 1032 * 1033 * Update hwcur/hwtail from host (reading from CSB). 1034 * 1035 * If guest user has released buffers up to the one before ring->head, we 1036 * also give them to the host. 1037 * 1038 * Notifications from the host are enabled only if the user guest would 1039 * block (no more completed slots in the ring). 1040 */ 1041 bool 1042 netmap_pt_guest_rxsync(struct nm_csb_atok *atok, struct nm_csb_ktoa *ktoa, 1043 struct netmap_kring *kring, int flags) 1044 { 1045 bool notify = false; 1046 1047 /* Disable notifications */ 1048 atok->appl_need_kick = 0; 1049 1050 /* 1051 * First part: import newly received packets, by updating the kring 1052 * hwtail to the hwtail known from the host (read from the CSB). 1053 * This also updates the kring hwcur. 1054 */ 1055 nm_sync_kloop_appl_read(ktoa, &kring->nr_hwtail, &kring->nr_hwcur); 1056 kring->nr_kflags &= ~NKR_PENDINTR; 1057 1058 /* 1059 * Second part: tell the host about the slots that guest user has 1060 * released, by updating cur and head in the CSB. 1061 */ 1062 if (kring->rhead != kring->nr_hwcur) { 1063 nm_sync_kloop_appl_write(atok, kring->rcur, kring->rhead); 1064 } 1065 1066 /* 1067 * No more completed RX slots. The user thread will go to sleep and 1068 * we need to be notified by the host when more RX slots have been 1069 * completed. 1070 */ 1071 if (nm_kr_wouldblock(kring) && !(kring->nr_kflags & NKR_NOINTR)) { 1072 /* Re-enable notifications. */ 1073 atok->appl_need_kick = 1; 1074 /* Double check, with store-load memory barrier. */ 1075 nm_stld_barrier(); 1076 nm_sync_kloop_appl_read(ktoa, &kring->nr_hwtail, 1077 &kring->nr_hwcur); 1078 /* If there are new slots, disable notifications. */ 1079 if (!nm_kr_wouldblock(kring)) { 1080 atok->appl_need_kick = 0; 1081 } 1082 } 1083 1084 /* Ask for a kick from the guest to the host if needed. */ 1085 if ((kring->rhead != kring->nr_hwcur || nm_kr_wouldblock(kring)) 1086 && NM_ACCESS_ONCE(ktoa->kern_need_kick)) { 1087 atok->sync_flags = flags; 1088 notify = true; 1089 } 1090 1091 nm_prdis(1, "%s CSB(head:%u cur:%u hwtail:%u) KRING(head:%u cur:%u tail:%u)", 1092 kring->name, atok->head, atok->cur, ktoa->hwtail, 1093 kring->rhead, kring->rcur, kring->nr_hwtail); 1094 1095 return notify; 1096 } 1097 1098 /* 1099 * Callbacks for ptnet drivers: nm_krings_create, nm_krings_delete, nm_dtor. 1100 */ 1101 int 1102 ptnet_nm_krings_create(struct netmap_adapter *na) 1103 { 1104 struct netmap_pt_guest_adapter *ptna = 1105 (struct netmap_pt_guest_adapter *)na; /* Upcast. */ 1106 struct netmap_adapter *na_nm = &ptna->hwup.up; 1107 struct netmap_adapter *na_dr = &ptna->dr.up; 1108 int ret; 1109 1110 if (ptna->backend_users) { 1111 return 0; 1112 } 1113 1114 /* Create krings on the public netmap adapter. */ 1115 ret = netmap_hw_krings_create(na_nm); 1116 if (ret) { 1117 return ret; 1118 } 1119 1120 /* Copy krings into the netmap adapter private to the driver. */ 1121 na_dr->tx_rings = na_nm->tx_rings; 1122 na_dr->rx_rings = na_nm->rx_rings; 1123 1124 return 0; 1125 } 1126 1127 void 1128 ptnet_nm_krings_delete(struct netmap_adapter *na) 1129 { 1130 struct netmap_pt_guest_adapter *ptna = 1131 (struct netmap_pt_guest_adapter *)na; /* Upcast. */ 1132 struct netmap_adapter *na_nm = &ptna->hwup.up; 1133 struct netmap_adapter *na_dr = &ptna->dr.up; 1134 1135 if (ptna->backend_users) { 1136 return; 1137 } 1138 1139 na_dr->tx_rings = NULL; 1140 na_dr->rx_rings = NULL; 1141 1142 netmap_hw_krings_delete(na_nm); 1143 } 1144 1145 void 1146 ptnet_nm_dtor(struct netmap_adapter *na) 1147 { 1148 struct netmap_pt_guest_adapter *ptna = 1149 (struct netmap_pt_guest_adapter *)na; 1150 1151 netmap_mem_put(ptna->dr.up.nm_mem); 1152 memset(&ptna->dr, 0, sizeof(ptna->dr)); 1153 netmap_mem_pt_guest_ifp_del(na->nm_mem, na->ifp); 1154 } 1155 1156 int 1157 netmap_pt_guest_attach(struct netmap_adapter *arg, 1158 unsigned int nifp_offset, unsigned int memid) 1159 { 1160 struct netmap_pt_guest_adapter *ptna; 1161 struct ifnet *ifp = arg ? arg->ifp : NULL; 1162 int error; 1163 1164 /* get allocator */ 1165 arg->nm_mem = netmap_mem_pt_guest_new(ifp, nifp_offset, memid); 1166 if (arg->nm_mem == NULL) 1167 return ENOMEM; 1168 arg->na_flags |= NAF_MEM_OWNER; 1169 error = netmap_attach_ext(arg, sizeof(struct netmap_pt_guest_adapter), 1); 1170 if (error) 1171 return error; 1172 1173 /* get the netmap_pt_guest_adapter */ 1174 ptna = (struct netmap_pt_guest_adapter *) NA(ifp); 1175 1176 /* Initialize a separate pass-through netmap adapter that is going to 1177 * be used by the ptnet driver only, and so never exposed to netmap 1178 * applications. We only need a subset of the available fields. */ 1179 memset(&ptna->dr, 0, sizeof(ptna->dr)); 1180 ptna->dr.up.ifp = ifp; 1181 ptna->dr.up.nm_mem = netmap_mem_get(ptna->hwup.up.nm_mem); 1182 ptna->dr.up.nm_config = ptna->hwup.up.nm_config; 1183 1184 ptna->backend_users = 0; 1185 1186 return 0; 1187 } 1188 1189 #endif /* WITH_PTNETMAP */ 1190