1 /* 2 * Copyright (C) 2016-2018 Vincenzo Maffione 3 * Copyright (C) 2015 Stefano Garzarella 4 * All rights reserved. 5 * 6 * Redistribution and use in source and binary forms, with or without 7 * modification, are permitted provided that the following conditions 8 * are met: 9 * 1. Redistributions of source code must retain the above copyright 10 * notice, this list of conditions and the following disclaimer. 11 * 2. Redistributions in binary form must reproduce the above copyright 12 * notice, this list of conditions and the following disclaimer in the 13 * documentation and/or other materials provided with the distribution. 14 * 15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 16 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 17 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 18 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 19 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 20 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 21 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 22 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 23 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 24 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 25 * SUCH DAMAGE. 26 * 27 * $FreeBSD$ 28 */ 29 30 /* 31 * common headers 32 */ 33 #if defined(__FreeBSD__) 34 #include <sys/cdefs.h> 35 #include <sys/param.h> 36 #include <sys/kernel.h> 37 #include <sys/types.h> 38 #include <sys/selinfo.h> 39 #include <sys/socket.h> 40 #include <net/if.h> 41 #include <net/if_var.h> 42 #include <machine/bus.h> 43 44 #define usleep_range(_1, _2) \ 45 pause_sbt("sync-kloop-sleep", SBT_1US * _1, SBT_1US * 1, C_ABSOLUTE) 46 47 #elif defined(linux) 48 #include <bsd_glue.h> 49 #include <linux/file.h> 50 #include <linux/eventfd.h> 51 #endif 52 53 #include <net/netmap.h> 54 #include <dev/netmap/netmap_kern.h> 55 #include <net/netmap_virt.h> 56 #include <dev/netmap/netmap_mem2.h> 57 58 /* Support for eventfd-based notifications. */ 59 #if defined(linux) 60 #define SYNC_KLOOP_POLL 61 #endif 62 63 /* Write kring pointers (hwcur, hwtail) to the CSB. 64 * This routine is coupled with ptnetmap_guest_read_kring_csb(). */ 65 static inline void 66 sync_kloop_kernel_write(struct nm_csb_ktoa __user *ptr, uint32_t hwcur, 67 uint32_t hwtail) 68 { 69 /* Issue a first store-store barrier to make sure writes to the 70 * netmap ring do not overcome updates on ktoa->hwcur and ktoa->hwtail. */ 71 nm_stst_barrier(); 72 73 /* 74 * The same scheme used in nm_sync_kloop_appl_write() applies here. 75 * We allow the application to read a value of hwcur more recent than the value 76 * of hwtail, since this would anyway result in a consistent view of the 77 * ring state (and hwcur can never wraparound hwtail, since hwcur must be 78 * behind head). 79 * 80 * The following memory barrier scheme is used to make this happen: 81 * 82 * Application Kernel 83 * 84 * STORE(hwcur) LOAD(hwtail) 85 * wmb() <-------------> rmb() 86 * STORE(hwtail) LOAD(hwcur) 87 */ 88 CSB_WRITE(ptr, hwcur, hwcur); 89 nm_stst_barrier(); 90 CSB_WRITE(ptr, hwtail, hwtail); 91 } 92 93 /* Read kring pointers (head, cur, sync_flags) from the CSB. 94 * This routine is coupled with ptnetmap_guest_write_kring_csb(). */ 95 static inline void 96 sync_kloop_kernel_read(struct nm_csb_atok __user *ptr, 97 struct netmap_ring *shadow_ring, 98 uint32_t num_slots) 99 { 100 /* 101 * We place a memory barrier to make sure that the update of head never 102 * overtakes the update of cur. 103 * (see explanation in sync_kloop_kernel_write). 104 */ 105 CSB_READ(ptr, head, shadow_ring->head); 106 nm_ldld_barrier(); 107 CSB_READ(ptr, cur, shadow_ring->cur); 108 CSB_READ(ptr, sync_flags, shadow_ring->flags); 109 110 /* Make sure that loads from atok->head and atok->cur are not delayed 111 * after the loads from the netmap ring. */ 112 nm_ldld_barrier(); 113 } 114 115 /* Enable or disable application --> kernel kicks. */ 116 static inline void 117 csb_ktoa_kick_enable(struct nm_csb_ktoa __user *csb_ktoa, uint32_t val) 118 { 119 CSB_WRITE(csb_ktoa, kern_need_kick, val); 120 } 121 122 #ifdef SYNC_KLOOP_POLL 123 /* Are application interrupt enabled or disabled? */ 124 static inline uint32_t 125 csb_atok_intr_enabled(struct nm_csb_atok __user *csb_atok) 126 { 127 uint32_t v; 128 129 CSB_READ(csb_atok, appl_need_kick, v); 130 131 return v; 132 } 133 #endif /* SYNC_KLOOP_POLL */ 134 135 static inline void 136 sync_kloop_kring_dump(const char *title, const struct netmap_kring *kring) 137 { 138 nm_prinf("%s, kring %s, hwcur %d, rhead %d, " 139 "rcur %d, rtail %d, hwtail %d", 140 title, kring->name, kring->nr_hwcur, kring->rhead, 141 kring->rcur, kring->rtail, kring->nr_hwtail); 142 } 143 144 /* Arguments for netmap_sync_kloop_tx_ring() and 145 * netmap_sync_kloop_rx_ring(). 146 */ 147 struct sync_kloop_ring_args { 148 struct netmap_kring *kring; 149 struct nm_csb_atok *csb_atok; 150 struct nm_csb_ktoa *csb_ktoa; 151 #ifdef SYNC_KLOOP_POLL 152 struct eventfd_ctx *irq_ctx; 153 #endif /* SYNC_KLOOP_POLL */ 154 /* Are we busy waiting rather than using a schedule() loop ? */ 155 bool busy_wait; 156 /* Are we processing in the context of VM exit ? */ 157 bool direct; 158 }; 159 160 static void 161 netmap_sync_kloop_tx_ring(const struct sync_kloop_ring_args *a) 162 { 163 struct netmap_kring *kring = a->kring; 164 struct nm_csb_atok *csb_atok = a->csb_atok; 165 struct nm_csb_ktoa *csb_ktoa = a->csb_ktoa; 166 struct netmap_ring shadow_ring; /* shadow copy of the netmap_ring */ 167 bool more_txspace = false; 168 uint32_t num_slots; 169 int batch; 170 171 if (unlikely(nm_kr_tryget(kring, 1, NULL))) { 172 return; 173 } 174 175 num_slots = kring->nkr_num_slots; 176 177 /* Disable application --> kernel notifications. */ 178 if (!a->direct) { 179 csb_ktoa_kick_enable(csb_ktoa, 0); 180 } 181 /* Copy the application kring pointers from the CSB */ 182 sync_kloop_kernel_read(csb_atok, &shadow_ring, num_slots); 183 184 for (;;) { 185 batch = shadow_ring.head - kring->nr_hwcur; 186 if (batch < 0) 187 batch += num_slots; 188 189 #ifdef PTN_TX_BATCH_LIM 190 if (batch > PTN_TX_BATCH_LIM(num_slots)) { 191 /* If application moves ahead too fast, let's cut the move so 192 * that we don't exceed our batch limit. */ 193 uint32_t head_lim = kring->nr_hwcur + PTN_TX_BATCH_LIM(num_slots); 194 195 if (head_lim >= num_slots) 196 head_lim -= num_slots; 197 nm_prdis(1, "batch: %d head: %d head_lim: %d", batch, shadow_ring.head, 198 head_lim); 199 shadow_ring.head = head_lim; 200 batch = PTN_TX_BATCH_LIM(num_slots); 201 } 202 #endif /* PTN_TX_BATCH_LIM */ 203 204 if (nm_kr_txspace(kring) <= (num_slots >> 1)) { 205 shadow_ring.flags |= NAF_FORCE_RECLAIM; 206 } 207 208 /* Netmap prologue */ 209 shadow_ring.tail = kring->rtail; 210 if (unlikely(nm_txsync_prologue(kring, &shadow_ring) >= num_slots)) { 211 /* Reinit ring and enable notifications. */ 212 netmap_ring_reinit(kring); 213 if (!a->busy_wait) { 214 csb_ktoa_kick_enable(csb_ktoa, 1); 215 } 216 break; 217 } 218 219 if (unlikely(netmap_debug & NM_DEBUG_TXSYNC)) { 220 sync_kloop_kring_dump("pre txsync", kring); 221 } 222 223 if (unlikely(kring->nm_sync(kring, shadow_ring.flags))) { 224 if (!a->busy_wait) { 225 /* Reenable notifications. */ 226 csb_ktoa_kick_enable(csb_ktoa, 1); 227 } 228 nm_prerr("txsync() failed"); 229 break; 230 } 231 232 /* 233 * Finalize 234 * Copy kernel hwcur and hwtail into the CSB for the application sync(), and 235 * do the nm_sync_finalize. 236 */ 237 sync_kloop_kernel_write(csb_ktoa, kring->nr_hwcur, 238 kring->nr_hwtail); 239 if (kring->rtail != kring->nr_hwtail) { 240 /* Some more room available in the parent adapter. */ 241 kring->rtail = kring->nr_hwtail; 242 more_txspace = true; 243 } 244 245 if (unlikely(netmap_debug & NM_DEBUG_TXSYNC)) { 246 sync_kloop_kring_dump("post txsync", kring); 247 } 248 249 /* Interrupt the application if needed. */ 250 #ifdef SYNC_KLOOP_POLL 251 if (a->irq_ctx && more_txspace && csb_atok_intr_enabled(csb_atok)) { 252 /* We could disable kernel --> application kicks here, 253 * to avoid spurious interrupts. */ 254 eventfd_signal(a->irq_ctx, 1); 255 more_txspace = false; 256 } 257 #endif /* SYNC_KLOOP_POLL */ 258 259 /* Read CSB to see if there is more work to do. */ 260 sync_kloop_kernel_read(csb_atok, &shadow_ring, num_slots); 261 if (shadow_ring.head == kring->rhead) { 262 if (a->busy_wait) { 263 break; 264 } 265 /* 266 * No more packets to transmit. We enable notifications and 267 * go to sleep, waiting for a kick from the application when new 268 * new slots are ready for transmission. 269 */ 270 /* Reenable notifications. */ 271 csb_ktoa_kick_enable(csb_ktoa, 1); 272 /* Double check, with store-load memory barrier. */ 273 nm_stld_barrier(); 274 sync_kloop_kernel_read(csb_atok, &shadow_ring, num_slots); 275 if (shadow_ring.head != kring->rhead) { 276 /* We won the race condition, there are more packets to 277 * transmit. Disable notifications and do another cycle */ 278 csb_ktoa_kick_enable(csb_ktoa, 0); 279 continue; 280 } 281 break; 282 } 283 284 if (nm_kr_txempty(kring)) { 285 /* No more available TX slots. We stop waiting for a notification 286 * from the backend (netmap_tx_irq). */ 287 nm_prdis(1, "TX ring"); 288 break; 289 } 290 } 291 292 nm_kr_put(kring); 293 294 #ifdef SYNC_KLOOP_POLL 295 if (a->irq_ctx && more_txspace && csb_atok_intr_enabled(csb_atok)) { 296 eventfd_signal(a->irq_ctx, 1); 297 } 298 #endif /* SYNC_KLOOP_POLL */ 299 } 300 301 /* RX cycle without receive any packets */ 302 #define SYNC_LOOP_RX_DRY_CYCLES_MAX 2 303 304 static inline int 305 sync_kloop_norxslots(struct netmap_kring *kring, uint32_t g_head) 306 { 307 return (NM_ACCESS_ONCE(kring->nr_hwtail) == nm_prev(g_head, 308 kring->nkr_num_slots - 1)); 309 } 310 311 static void 312 netmap_sync_kloop_rx_ring(const struct sync_kloop_ring_args *a) 313 { 314 315 struct netmap_kring *kring = a->kring; 316 struct nm_csb_atok *csb_atok = a->csb_atok; 317 struct nm_csb_ktoa *csb_ktoa = a->csb_ktoa; 318 struct netmap_ring shadow_ring; /* shadow copy of the netmap_ring */ 319 int dry_cycles = 0; 320 bool some_recvd = false; 321 uint32_t num_slots; 322 323 if (unlikely(nm_kr_tryget(kring, 1, NULL))) { 324 return; 325 } 326 327 num_slots = kring->nkr_num_slots; 328 329 /* Get RX csb_atok and csb_ktoa pointers from the CSB. */ 330 num_slots = kring->nkr_num_slots; 331 332 /* Disable notifications. */ 333 if (!a->direct) { 334 csb_ktoa_kick_enable(csb_ktoa, 0); 335 } 336 /* Copy the application kring pointers from the CSB */ 337 sync_kloop_kernel_read(csb_atok, &shadow_ring, num_slots); 338 339 for (;;) { 340 uint32_t hwtail; 341 342 /* Netmap prologue */ 343 shadow_ring.tail = kring->rtail; 344 if (unlikely(nm_rxsync_prologue(kring, &shadow_ring) >= num_slots)) { 345 /* Reinit ring and enable notifications. */ 346 netmap_ring_reinit(kring); 347 if (!a->busy_wait) { 348 csb_ktoa_kick_enable(csb_ktoa, 1); 349 } 350 break; 351 } 352 353 if (unlikely(netmap_debug & NM_DEBUG_RXSYNC)) { 354 sync_kloop_kring_dump("pre rxsync", kring); 355 } 356 357 if (unlikely(kring->nm_sync(kring, shadow_ring.flags))) { 358 if (!a->busy_wait) { 359 /* Reenable notifications. */ 360 csb_ktoa_kick_enable(csb_ktoa, 1); 361 } 362 nm_prerr("rxsync() failed"); 363 break; 364 } 365 366 /* 367 * Finalize 368 * Copy kernel hwcur and hwtail into the CSB for the application sync() 369 */ 370 hwtail = NM_ACCESS_ONCE(kring->nr_hwtail); 371 sync_kloop_kernel_write(csb_ktoa, kring->nr_hwcur, hwtail); 372 if (kring->rtail != hwtail) { 373 kring->rtail = hwtail; 374 some_recvd = true; 375 dry_cycles = 0; 376 } else { 377 dry_cycles++; 378 } 379 380 if (unlikely(netmap_debug & NM_DEBUG_RXSYNC)) { 381 sync_kloop_kring_dump("post rxsync", kring); 382 } 383 384 #ifdef SYNC_KLOOP_POLL 385 /* Interrupt the application if needed. */ 386 if (a->irq_ctx && some_recvd && csb_atok_intr_enabled(csb_atok)) { 387 /* We could disable kernel --> application kicks here, 388 * to avoid spurious interrupts. */ 389 eventfd_signal(a->irq_ctx, 1); 390 some_recvd = false; 391 } 392 #endif /* SYNC_KLOOP_POLL */ 393 394 /* Read CSB to see if there is more work to do. */ 395 sync_kloop_kernel_read(csb_atok, &shadow_ring, num_slots); 396 if (sync_kloop_norxslots(kring, shadow_ring.head)) { 397 if (a->busy_wait) { 398 break; 399 } 400 /* 401 * No more slots available for reception. We enable notification and 402 * go to sleep, waiting for a kick from the application when new receive 403 * slots are available. 404 */ 405 /* Reenable notifications. */ 406 csb_ktoa_kick_enable(csb_ktoa, 1); 407 /* Double check, with store-load memory barrier. */ 408 nm_stld_barrier(); 409 sync_kloop_kernel_read(csb_atok, &shadow_ring, num_slots); 410 if (!sync_kloop_norxslots(kring, shadow_ring.head)) { 411 /* We won the race condition, more slots are available. Disable 412 * notifications and do another cycle. */ 413 csb_ktoa_kick_enable(csb_ktoa, 0); 414 continue; 415 } 416 break; 417 } 418 419 hwtail = NM_ACCESS_ONCE(kring->nr_hwtail); 420 if (unlikely(hwtail == kring->rhead || 421 dry_cycles >= SYNC_LOOP_RX_DRY_CYCLES_MAX)) { 422 /* No more packets to be read from the backend. We stop and 423 * wait for a notification from the backend (netmap_rx_irq). */ 424 nm_prdis(1, "nr_hwtail: %d rhead: %d dry_cycles: %d", 425 hwtail, kring->rhead, dry_cycles); 426 break; 427 } 428 } 429 430 nm_kr_put(kring); 431 432 #ifdef SYNC_KLOOP_POLL 433 /* Interrupt the application if needed. */ 434 if (a->irq_ctx && some_recvd && csb_atok_intr_enabled(csb_atok)) { 435 eventfd_signal(a->irq_ctx, 1); 436 } 437 #endif /* SYNC_KLOOP_POLL */ 438 } 439 440 #ifdef SYNC_KLOOP_POLL 441 struct sync_kloop_poll_ctx; 442 struct sync_kloop_poll_entry { 443 /* Support for receiving notifications from 444 * a netmap ring or from the application. */ 445 struct file *filp; 446 wait_queue_t wait; 447 wait_queue_head_t *wqh; 448 449 /* Support for sending notifications to the application. */ 450 struct eventfd_ctx *irq_ctx; 451 struct file *irq_filp; 452 453 /* Arguments for the ring processing function. Useful 454 * in case of custom wake-up function. */ 455 struct sync_kloop_ring_args *args; 456 struct sync_kloop_poll_ctx *parent; 457 458 }; 459 460 struct sync_kloop_poll_ctx { 461 poll_table wait_table; 462 unsigned int next_entry; 463 int (*next_wake_fun)(wait_queue_t *, unsigned, int, void *); 464 unsigned int num_entries; 465 unsigned int num_tx_rings; 466 unsigned int num_rings; 467 /* First num_tx_rings entries are for the TX kicks. 468 * Then the RX kicks entries follow. The last two 469 * entries are for TX irq, and RX irq. */ 470 struct sync_kloop_poll_entry entries[0]; 471 }; 472 473 static void 474 sync_kloop_poll_table_queue_proc(struct file *file, wait_queue_head_t *wqh, 475 poll_table *pt) 476 { 477 struct sync_kloop_poll_ctx *poll_ctx = 478 container_of(pt, struct sync_kloop_poll_ctx, wait_table); 479 struct sync_kloop_poll_entry *entry = poll_ctx->entries + 480 poll_ctx->next_entry; 481 482 BUG_ON(poll_ctx->next_entry >= poll_ctx->num_entries); 483 entry->wqh = wqh; 484 entry->filp = file; 485 /* Use the default wake up function. */ 486 if (poll_ctx->next_wake_fun == NULL) { 487 init_waitqueue_entry(&entry->wait, current); 488 } else { 489 init_waitqueue_func_entry(&entry->wait, 490 poll_ctx->next_wake_fun); 491 } 492 add_wait_queue(wqh, &entry->wait); 493 } 494 495 static int 496 sync_kloop_tx_kick_wake_fun(wait_queue_t *wait, unsigned mode, 497 int wake_flags, void *key) 498 { 499 struct sync_kloop_poll_entry *entry = 500 container_of(wait, struct sync_kloop_poll_entry, wait); 501 502 netmap_sync_kloop_tx_ring(entry->args); 503 504 return 0; 505 } 506 507 static int 508 sync_kloop_tx_irq_wake_fun(wait_queue_t *wait, unsigned mode, 509 int wake_flags, void *key) 510 { 511 struct sync_kloop_poll_entry *entry = 512 container_of(wait, struct sync_kloop_poll_entry, wait); 513 struct sync_kloop_poll_ctx *poll_ctx = entry->parent; 514 int i; 515 516 for (i = 0; i < poll_ctx->num_tx_rings; i++) { 517 struct eventfd_ctx *irq_ctx = poll_ctx->entries[i].irq_ctx; 518 519 if (irq_ctx) { 520 eventfd_signal(irq_ctx, 1); 521 } 522 } 523 524 return 0; 525 } 526 527 static int 528 sync_kloop_rx_kick_wake_fun(wait_queue_t *wait, unsigned mode, 529 int wake_flags, void *key) 530 { 531 struct sync_kloop_poll_entry *entry = 532 container_of(wait, struct sync_kloop_poll_entry, wait); 533 534 netmap_sync_kloop_rx_ring(entry->args); 535 536 return 0; 537 } 538 539 static int 540 sync_kloop_rx_irq_wake_fun(wait_queue_t *wait, unsigned mode, 541 int wake_flags, void *key) 542 { 543 struct sync_kloop_poll_entry *entry = 544 container_of(wait, struct sync_kloop_poll_entry, wait); 545 struct sync_kloop_poll_ctx *poll_ctx = entry->parent; 546 int i; 547 548 for (i = poll_ctx->num_tx_rings; i < poll_ctx->num_rings; i++) { 549 struct eventfd_ctx *irq_ctx = poll_ctx->entries[i].irq_ctx; 550 551 if (irq_ctx) { 552 eventfd_signal(irq_ctx, 1); 553 } 554 } 555 556 return 0; 557 } 558 #endif /* SYNC_KLOOP_POLL */ 559 560 int 561 netmap_sync_kloop(struct netmap_priv_d *priv, struct nmreq_header *hdr) 562 { 563 struct nmreq_sync_kloop_start *req = 564 (struct nmreq_sync_kloop_start *)(uintptr_t)hdr->nr_body; 565 struct nmreq_opt_sync_kloop_eventfds *eventfds_opt = NULL; 566 #ifdef SYNC_KLOOP_POLL 567 struct sync_kloop_poll_ctx *poll_ctx = NULL; 568 #endif /* SYNC_KLOOP_POLL */ 569 int num_rx_rings, num_tx_rings, num_rings; 570 struct sync_kloop_ring_args *args = NULL; 571 uint32_t sleep_us = req->sleep_us; 572 struct nm_csb_atok* csb_atok_base; 573 struct nm_csb_ktoa* csb_ktoa_base; 574 struct netmap_adapter *na; 575 struct nmreq_option *opt; 576 bool na_could_sleep = false; 577 bool busy_wait = true; 578 bool direct_tx = false; 579 bool direct_rx = false; 580 int err = 0; 581 int i; 582 583 if (sleep_us > 1000000) { 584 /* We do not accept sleeping for more than a second. */ 585 return EINVAL; 586 } 587 588 if (priv->np_nifp == NULL) { 589 return ENXIO; 590 } 591 mb(); /* make sure following reads are not from cache */ 592 593 na = priv->np_na; 594 if (!nm_netmap_on(na)) { 595 return ENXIO; 596 } 597 598 NMG_LOCK(); 599 /* Make sure the application is working in CSB mode. */ 600 if (!priv->np_csb_atok_base || !priv->np_csb_ktoa_base) { 601 NMG_UNLOCK(); 602 nm_prerr("sync-kloop on %s requires " 603 "NETMAP_REQ_OPT_CSB option", na->name); 604 return EINVAL; 605 } 606 607 csb_atok_base = priv->np_csb_atok_base; 608 csb_ktoa_base = priv->np_csb_ktoa_base; 609 610 /* Make sure that no kloop is currently running. */ 611 if (priv->np_kloop_state & NM_SYNC_KLOOP_RUNNING) { 612 err = EBUSY; 613 } 614 priv->np_kloop_state |= NM_SYNC_KLOOP_RUNNING; 615 NMG_UNLOCK(); 616 if (err) { 617 return err; 618 } 619 620 num_rx_rings = priv->np_qlast[NR_RX] - priv->np_qfirst[NR_RX]; 621 num_tx_rings = priv->np_qlast[NR_TX] - priv->np_qfirst[NR_TX]; 622 num_rings = num_tx_rings + num_rx_rings; 623 624 args = nm_os_malloc(num_rings * sizeof(args[0])); 625 if (!args) { 626 err = ENOMEM; 627 goto out; 628 } 629 630 /* Prepare the arguments for netmap_sync_kloop_tx_ring() 631 * and netmap_sync_kloop_rx_ring(). */ 632 for (i = 0; i < num_tx_rings; i++) { 633 struct sync_kloop_ring_args *a = args + i; 634 635 a->kring = NMR(na, NR_TX)[i + priv->np_qfirst[NR_TX]]; 636 a->csb_atok = csb_atok_base + i; 637 a->csb_ktoa = csb_ktoa_base + i; 638 a->busy_wait = busy_wait; 639 a->direct = direct_tx; 640 } 641 for (i = 0; i < num_rx_rings; i++) { 642 struct sync_kloop_ring_args *a = args + num_tx_rings + i; 643 644 a->kring = NMR(na, NR_RX)[i + priv->np_qfirst[NR_RX]]; 645 a->csb_atok = csb_atok_base + num_tx_rings + i; 646 a->csb_ktoa = csb_ktoa_base + num_tx_rings + i; 647 a->busy_wait = busy_wait; 648 a->direct = direct_rx; 649 } 650 651 /* Validate notification options. */ 652 opt = nmreq_findoption((struct nmreq_option *)(uintptr_t)hdr->nr_options, 653 NETMAP_REQ_OPT_SYNC_KLOOP_MODE); 654 if (opt != NULL) { 655 struct nmreq_opt_sync_kloop_mode *mode_opt = 656 (struct nmreq_opt_sync_kloop_mode *)opt; 657 658 direct_tx = !!(mode_opt->mode & NM_OPT_SYNC_KLOOP_DIRECT_TX); 659 direct_rx = !!(mode_opt->mode & NM_OPT_SYNC_KLOOP_DIRECT_RX); 660 if (mode_opt->mode & ~(NM_OPT_SYNC_KLOOP_DIRECT_TX | 661 NM_OPT_SYNC_KLOOP_DIRECT_RX)) { 662 opt->nro_status = err = EINVAL; 663 goto out; 664 } 665 opt->nro_status = 0; 666 } 667 opt = nmreq_findoption((struct nmreq_option *)(uintptr_t)hdr->nr_options, 668 NETMAP_REQ_OPT_SYNC_KLOOP_EVENTFDS); 669 if (opt != NULL) { 670 err = nmreq_checkduplicate(opt); 671 if (err) { 672 opt->nro_status = err; 673 goto out; 674 } 675 if (opt->nro_size != sizeof(*eventfds_opt) + 676 sizeof(eventfds_opt->eventfds[0]) * num_rings) { 677 /* Option size not consistent with the number of 678 * entries. */ 679 opt->nro_status = err = EINVAL; 680 goto out; 681 } 682 #ifdef SYNC_KLOOP_POLL 683 eventfds_opt = (struct nmreq_opt_sync_kloop_eventfds *)opt; 684 opt->nro_status = 0; 685 686 /* Check if some ioeventfd entry is not defined, and force sleep 687 * synchronization in that case. */ 688 busy_wait = false; 689 for (i = 0; i < num_rings; i++) { 690 if (eventfds_opt->eventfds[i].ioeventfd < 0) { 691 busy_wait = true; 692 break; 693 } 694 } 695 696 if (busy_wait && (direct_tx || direct_rx)) { 697 /* For direct processing we need all the 698 * ioeventfds to be valid. */ 699 opt->nro_status = err = EINVAL; 700 goto out; 701 } 702 703 /* We need 2 poll entries for TX and RX notifications coming 704 * from the netmap adapter, plus one entries per ring for the 705 * notifications coming from the application. */ 706 poll_ctx = nm_os_malloc(sizeof(*poll_ctx) + 707 (num_rings + 2) * sizeof(poll_ctx->entries[0])); 708 init_poll_funcptr(&poll_ctx->wait_table, 709 sync_kloop_poll_table_queue_proc); 710 poll_ctx->num_entries = 2 + num_rings; 711 poll_ctx->num_tx_rings = num_tx_rings; 712 poll_ctx->num_rings = num_rings; 713 poll_ctx->next_entry = 0; 714 poll_ctx->next_wake_fun = NULL; 715 716 if (direct_tx && (na->na_flags & NAF_BDG_MAYSLEEP)) { 717 /* In direct mode, VALE txsync is called from 718 * wake-up context, where it is not possible 719 * to sleep. 720 */ 721 na->na_flags &= ~NAF_BDG_MAYSLEEP; 722 na_could_sleep = true; 723 } 724 725 for (i = 0; i < num_rings + 2; i++) { 726 poll_ctx->entries[i].args = args + i; 727 poll_ctx->entries[i].parent = poll_ctx; 728 } 729 730 /* Poll for notifications coming from the applications through 731 * eventfds. */ 732 for (i = 0; i < num_rings; i++, poll_ctx->next_entry++) { 733 struct eventfd_ctx *irq = NULL; 734 struct file *filp = NULL; 735 unsigned long mask; 736 bool tx_ring = (i < num_tx_rings); 737 738 if (eventfds_opt->eventfds[i].irqfd >= 0) { 739 filp = eventfd_fget( 740 eventfds_opt->eventfds[i].irqfd); 741 if (IS_ERR(filp)) { 742 err = PTR_ERR(filp); 743 goto out; 744 } 745 irq = eventfd_ctx_fileget(filp); 746 if (IS_ERR(irq)) { 747 err = PTR_ERR(irq); 748 goto out; 749 } 750 } 751 poll_ctx->entries[i].irq_filp = filp; 752 poll_ctx->entries[i].irq_ctx = irq; 753 poll_ctx->entries[i].args->busy_wait = busy_wait; 754 /* Don't let netmap_sync_kloop_*x_ring() use 755 * IRQs in direct mode. */ 756 poll_ctx->entries[i].args->irq_ctx = 757 ((tx_ring && direct_tx) || 758 (!tx_ring && direct_rx)) ? NULL : 759 poll_ctx->entries[i].irq_ctx; 760 poll_ctx->entries[i].args->direct = 761 (tx_ring ? direct_tx : direct_rx); 762 763 if (!busy_wait) { 764 filp = eventfd_fget( 765 eventfds_opt->eventfds[i].ioeventfd); 766 if (IS_ERR(filp)) { 767 err = PTR_ERR(filp); 768 goto out; 769 } 770 if (tx_ring && direct_tx) { 771 /* Override the wake up function 772 * so that it can directly call 773 * netmap_sync_kloop_tx_ring(). 774 */ 775 poll_ctx->next_wake_fun = 776 sync_kloop_tx_kick_wake_fun; 777 } else if (!tx_ring && direct_rx) { 778 /* Same for direct RX. */ 779 poll_ctx->next_wake_fun = 780 sync_kloop_rx_kick_wake_fun; 781 } else { 782 poll_ctx->next_wake_fun = NULL; 783 } 784 mask = filp->f_op->poll(filp, 785 &poll_ctx->wait_table); 786 if (mask & POLLERR) { 787 err = EINVAL; 788 goto out; 789 } 790 } 791 } 792 793 /* Poll for notifications coming from the netmap rings bound to 794 * this file descriptor. */ 795 if (!busy_wait) { 796 NMG_LOCK(); 797 /* In direct mode, override the wake up function so 798 * that it can forward the netmap_tx_irq() to the 799 * guest. */ 800 poll_ctx->next_wake_fun = direct_tx ? 801 sync_kloop_tx_irq_wake_fun : NULL; 802 poll_wait(priv->np_filp, priv->np_si[NR_TX], 803 &poll_ctx->wait_table); 804 poll_ctx->next_entry++; 805 806 poll_ctx->next_wake_fun = direct_rx ? 807 sync_kloop_rx_irq_wake_fun : NULL; 808 poll_wait(priv->np_filp, priv->np_si[NR_RX], 809 &poll_ctx->wait_table); 810 poll_ctx->next_entry++; 811 NMG_UNLOCK(); 812 } 813 #else /* SYNC_KLOOP_POLL */ 814 opt->nro_status = EOPNOTSUPP; 815 goto out; 816 #endif /* SYNC_KLOOP_POLL */ 817 } 818 819 nm_prinf("kloop busy_wait %u, direct_tx %u, direct_rx %u, " 820 "na_could_sleep %u", busy_wait, direct_tx, direct_rx, 821 na_could_sleep); 822 823 /* Main loop. */ 824 for (;;) { 825 if (unlikely(NM_ACCESS_ONCE(priv->np_kloop_state) & NM_SYNC_KLOOP_STOPPING)) { 826 break; 827 } 828 829 #ifdef SYNC_KLOOP_POLL 830 if (!busy_wait) { 831 /* It is important to set the task state as 832 * interruptible before processing any TX/RX ring, 833 * so that if a notification on ring Y comes after 834 * we have processed ring Y, but before we call 835 * schedule(), we don't miss it. This is true because 836 * the wake up function will change the the task state, 837 * and therefore the schedule_timeout() call below 838 * will observe the change). 839 */ 840 set_current_state(TASK_INTERRUPTIBLE); 841 } 842 #endif /* SYNC_KLOOP_POLL */ 843 844 /* Process all the TX rings bound to this file descriptor. */ 845 for (i = 0; !direct_tx && i < num_tx_rings; i++) { 846 struct sync_kloop_ring_args *a = args + i; 847 netmap_sync_kloop_tx_ring(a); 848 } 849 850 /* Process all the RX rings bound to this file descriptor. */ 851 for (i = 0; !direct_rx && i < num_rx_rings; i++) { 852 struct sync_kloop_ring_args *a = args + num_tx_rings + i; 853 netmap_sync_kloop_rx_ring(a); 854 } 855 856 if (busy_wait) { 857 /* Default synchronization method: sleep for a while. */ 858 usleep_range(sleep_us, sleep_us); 859 } 860 #ifdef SYNC_KLOOP_POLL 861 else { 862 /* Yield to the scheduler waiting for a notification 863 * to come either from netmap or the application. */ 864 schedule_timeout(msecs_to_jiffies(3000)); 865 } 866 #endif /* SYNC_KLOOP_POLL */ 867 } 868 out: 869 #ifdef SYNC_KLOOP_POLL 870 if (poll_ctx) { 871 /* Stop polling from netmap and the eventfds, and deallocate 872 * the poll context. */ 873 if (!busy_wait) { 874 __set_current_state(TASK_RUNNING); 875 } 876 for (i = 0; i < poll_ctx->next_entry; i++) { 877 struct sync_kloop_poll_entry *entry = 878 poll_ctx->entries + i; 879 880 if (entry->wqh) 881 remove_wait_queue(entry->wqh, &entry->wait); 882 /* We did not get a reference to the eventfds, but 883 * don't do that on netmap file descriptors (since 884 * a reference was not taken. */ 885 if (entry->filp && entry->filp != priv->np_filp) 886 fput(entry->filp); 887 if (entry->irq_ctx) 888 eventfd_ctx_put(entry->irq_ctx); 889 if (entry->irq_filp) 890 fput(entry->irq_filp); 891 } 892 nm_os_free(poll_ctx); 893 poll_ctx = NULL; 894 } 895 #endif /* SYNC_KLOOP_POLL */ 896 897 if (args) { 898 nm_os_free(args); 899 args = NULL; 900 } 901 902 /* Reset the kloop state. */ 903 NMG_LOCK(); 904 priv->np_kloop_state = 0; 905 if (na_could_sleep) { 906 na->na_flags |= NAF_BDG_MAYSLEEP; 907 } 908 NMG_UNLOCK(); 909 910 return err; 911 } 912 913 int 914 netmap_sync_kloop_stop(struct netmap_priv_d *priv) 915 { 916 struct netmap_adapter *na; 917 bool running = true; 918 int err = 0; 919 920 if (priv->np_nifp == NULL) { 921 return ENXIO; 922 } 923 mb(); /* make sure following reads are not from cache */ 924 925 na = priv->np_na; 926 if (!nm_netmap_on(na)) { 927 return ENXIO; 928 } 929 930 /* Set the kloop stopping flag. */ 931 NMG_LOCK(); 932 priv->np_kloop_state |= NM_SYNC_KLOOP_STOPPING; 933 NMG_UNLOCK(); 934 935 /* Send a notification to the kloop, in case it is blocked in 936 * schedule_timeout(). We can use either RX or TX, because the 937 * kloop is waiting on both. */ 938 nm_os_selwakeup(priv->np_si[NR_RX]); 939 940 /* Wait for the kloop to actually terminate. */ 941 while (running) { 942 usleep_range(1000, 1500); 943 NMG_LOCK(); 944 running = (NM_ACCESS_ONCE(priv->np_kloop_state) 945 & NM_SYNC_KLOOP_RUNNING); 946 NMG_UNLOCK(); 947 } 948 949 return err; 950 } 951 952 #ifdef WITH_PTNETMAP 953 /* 954 * Guest ptnetmap txsync()/rxsync() routines, used in ptnet device drivers. 955 * These routines are reused across the different operating systems supported 956 * by netmap. 957 */ 958 959 /* 960 * Reconcile host and guest views of the transmit ring. 961 * 962 * Guest user wants to transmit packets up to the one before ring->head, 963 * and guest kernel knows tx_ring->hwcur is the first packet unsent 964 * by the host kernel. 965 * 966 * We push out as many packets as possible, and possibly 967 * reclaim buffers from previously completed transmission. 968 * 969 * Notifications from the host are enabled only if the user guest would 970 * block (no space in the ring). 971 */ 972 bool 973 netmap_pt_guest_txsync(struct nm_csb_atok *atok, struct nm_csb_ktoa *ktoa, 974 struct netmap_kring *kring, int flags) 975 { 976 bool notify = false; 977 978 /* Disable notifications */ 979 atok->appl_need_kick = 0; 980 981 /* 982 * First part: tell the host to process the new packets, 983 * updating the CSB. 984 */ 985 kring->nr_hwcur = ktoa->hwcur; 986 nm_sync_kloop_appl_write(atok, kring->rcur, kring->rhead); 987 988 /* Ask for a kick from a guest to the host if needed. */ 989 if (((kring->rhead != kring->nr_hwcur || nm_kr_wouldblock(kring)) 990 && NM_ACCESS_ONCE(ktoa->kern_need_kick)) || 991 (flags & NAF_FORCE_RECLAIM)) { 992 atok->sync_flags = flags; 993 notify = true; 994 } 995 996 /* 997 * Second part: reclaim buffers for completed transmissions. 998 */ 999 if (nm_kr_wouldblock(kring) || (flags & NAF_FORCE_RECLAIM)) { 1000 nm_sync_kloop_appl_read(ktoa, &kring->nr_hwtail, 1001 &kring->nr_hwcur); 1002 } 1003 1004 /* 1005 * No more room in the ring for new transmissions. The user thread will 1006 * go to sleep and we need to be notified by the host when more free 1007 * space is available. 1008 */ 1009 if (nm_kr_wouldblock(kring) && !(kring->nr_kflags & NKR_NOINTR)) { 1010 /* Reenable notifications. */ 1011 atok->appl_need_kick = 1; 1012 /* Double check, with store-load memory barrier. */ 1013 nm_stld_barrier(); 1014 nm_sync_kloop_appl_read(ktoa, &kring->nr_hwtail, 1015 &kring->nr_hwcur); 1016 /* If there is new free space, disable notifications */ 1017 if (unlikely(!nm_kr_wouldblock(kring))) { 1018 atok->appl_need_kick = 0; 1019 } 1020 } 1021 1022 nm_prdis(1, "%s CSB(head:%u cur:%u hwtail:%u) KRING(head:%u cur:%u tail:%u)", 1023 kring->name, atok->head, atok->cur, ktoa->hwtail, 1024 kring->rhead, kring->rcur, kring->nr_hwtail); 1025 1026 return notify; 1027 } 1028 1029 /* 1030 * Reconcile host and guest view of the receive ring. 1031 * 1032 * Update hwcur/hwtail from host (reading from CSB). 1033 * 1034 * If guest user has released buffers up to the one before ring->head, we 1035 * also give them to the host. 1036 * 1037 * Notifications from the host are enabled only if the user guest would 1038 * block (no more completed slots in the ring). 1039 */ 1040 bool 1041 netmap_pt_guest_rxsync(struct nm_csb_atok *atok, struct nm_csb_ktoa *ktoa, 1042 struct netmap_kring *kring, int flags) 1043 { 1044 bool notify = false; 1045 1046 /* Disable notifications */ 1047 atok->appl_need_kick = 0; 1048 1049 /* 1050 * First part: import newly received packets, by updating the kring 1051 * hwtail to the hwtail known from the host (read from the CSB). 1052 * This also updates the kring hwcur. 1053 */ 1054 nm_sync_kloop_appl_read(ktoa, &kring->nr_hwtail, &kring->nr_hwcur); 1055 kring->nr_kflags &= ~NKR_PENDINTR; 1056 1057 /* 1058 * Second part: tell the host about the slots that guest user has 1059 * released, by updating cur and head in the CSB. 1060 */ 1061 if (kring->rhead != kring->nr_hwcur) { 1062 nm_sync_kloop_appl_write(atok, kring->rcur, kring->rhead); 1063 } 1064 1065 /* 1066 * No more completed RX slots. The user thread will go to sleep and 1067 * we need to be notified by the host when more RX slots have been 1068 * completed. 1069 */ 1070 if (nm_kr_wouldblock(kring) && !(kring->nr_kflags & NKR_NOINTR)) { 1071 /* Reenable notifications. */ 1072 atok->appl_need_kick = 1; 1073 /* Double check, with store-load memory barrier. */ 1074 nm_stld_barrier(); 1075 nm_sync_kloop_appl_read(ktoa, &kring->nr_hwtail, 1076 &kring->nr_hwcur); 1077 /* If there are new slots, disable notifications. */ 1078 if (!nm_kr_wouldblock(kring)) { 1079 atok->appl_need_kick = 0; 1080 } 1081 } 1082 1083 /* Ask for a kick from the guest to the host if needed. */ 1084 if ((kring->rhead != kring->nr_hwcur || nm_kr_wouldblock(kring)) 1085 && NM_ACCESS_ONCE(ktoa->kern_need_kick)) { 1086 atok->sync_flags = flags; 1087 notify = true; 1088 } 1089 1090 nm_prdis(1, "%s CSB(head:%u cur:%u hwtail:%u) KRING(head:%u cur:%u tail:%u)", 1091 kring->name, atok->head, atok->cur, ktoa->hwtail, 1092 kring->rhead, kring->rcur, kring->nr_hwtail); 1093 1094 return notify; 1095 } 1096 1097 /* 1098 * Callbacks for ptnet drivers: nm_krings_create, nm_krings_delete, nm_dtor. 1099 */ 1100 int 1101 ptnet_nm_krings_create(struct netmap_adapter *na) 1102 { 1103 struct netmap_pt_guest_adapter *ptna = 1104 (struct netmap_pt_guest_adapter *)na; /* Upcast. */ 1105 struct netmap_adapter *na_nm = &ptna->hwup.up; 1106 struct netmap_adapter *na_dr = &ptna->dr.up; 1107 int ret; 1108 1109 if (ptna->backend_users) { 1110 return 0; 1111 } 1112 1113 /* Create krings on the public netmap adapter. */ 1114 ret = netmap_hw_krings_create(na_nm); 1115 if (ret) { 1116 return ret; 1117 } 1118 1119 /* Copy krings into the netmap adapter private to the driver. */ 1120 na_dr->tx_rings = na_nm->tx_rings; 1121 na_dr->rx_rings = na_nm->rx_rings; 1122 1123 return 0; 1124 } 1125 1126 void 1127 ptnet_nm_krings_delete(struct netmap_adapter *na) 1128 { 1129 struct netmap_pt_guest_adapter *ptna = 1130 (struct netmap_pt_guest_adapter *)na; /* Upcast. */ 1131 struct netmap_adapter *na_nm = &ptna->hwup.up; 1132 struct netmap_adapter *na_dr = &ptna->dr.up; 1133 1134 if (ptna->backend_users) { 1135 return; 1136 } 1137 1138 na_dr->tx_rings = NULL; 1139 na_dr->rx_rings = NULL; 1140 1141 netmap_hw_krings_delete(na_nm); 1142 } 1143 1144 void 1145 ptnet_nm_dtor(struct netmap_adapter *na) 1146 { 1147 struct netmap_pt_guest_adapter *ptna = 1148 (struct netmap_pt_guest_adapter *)na; 1149 1150 netmap_mem_put(ptna->dr.up.nm_mem); 1151 memset(&ptna->dr, 0, sizeof(ptna->dr)); 1152 netmap_mem_pt_guest_ifp_del(na->nm_mem, na->ifp); 1153 } 1154 1155 int 1156 netmap_pt_guest_attach(struct netmap_adapter *arg, 1157 unsigned int nifp_offset, unsigned int memid) 1158 { 1159 struct netmap_pt_guest_adapter *ptna; 1160 struct ifnet *ifp = arg ? arg->ifp : NULL; 1161 int error; 1162 1163 /* get allocator */ 1164 arg->nm_mem = netmap_mem_pt_guest_new(ifp, nifp_offset, memid); 1165 if (arg->nm_mem == NULL) 1166 return ENOMEM; 1167 arg->na_flags |= NAF_MEM_OWNER; 1168 error = netmap_attach_ext(arg, sizeof(struct netmap_pt_guest_adapter), 1); 1169 if (error) 1170 return error; 1171 1172 /* get the netmap_pt_guest_adapter */ 1173 ptna = (struct netmap_pt_guest_adapter *) NA(ifp); 1174 1175 /* Initialize a separate pass-through netmap adapter that is going to 1176 * be used by the ptnet driver only, and so never exposed to netmap 1177 * applications. We only need a subset of the available fields. */ 1178 memset(&ptna->dr, 0, sizeof(ptna->dr)); 1179 ptna->dr.up.ifp = ifp; 1180 ptna->dr.up.nm_mem = netmap_mem_get(ptna->hwup.up.nm_mem); 1181 ptna->dr.up.nm_config = ptna->hwup.up.nm_config; 1182 1183 ptna->backend_users = 0; 1184 1185 return 0; 1186 } 1187 1188 #endif /* WITH_PTNETMAP */ 1189