1 /* 2 * Copyright (C) 2016-2018 Vincenzo Maffione 3 * Copyright (C) 2015 Stefano Garzarella 4 * All rights reserved. 5 * 6 * Redistribution and use in source and binary forms, with or without 7 * modification, are permitted provided that the following conditions 8 * are met: 9 * 1. Redistributions of source code must retain the above copyright 10 * notice, this list of conditions and the following disclaimer. 11 * 2. Redistributions in binary form must reproduce the above copyright 12 * notice, this list of conditions and the following disclaimer in the 13 * documentation and/or other materials provided with the distribution. 14 * 15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 16 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 17 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 18 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 19 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 20 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 21 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 22 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 23 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 24 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 25 * SUCH DAMAGE. 26 */ 27 28 /* 29 * common headers 30 */ 31 #if defined(__FreeBSD__) 32 33 #include <sys/param.h> 34 #include <sys/kernel.h> 35 #include <sys/types.h> 36 #include <sys/selinfo.h> 37 #include <sys/socket.h> 38 #include <net/if.h> 39 #include <net/if_var.h> 40 #include <machine/bus.h> 41 42 #define usleep_range(_1, _2) \ 43 pause_sbt("sync-kloop-sleep", SBT_1US * _1, SBT_1US * 1, C_ABSOLUTE) 44 45 #elif defined(linux) 46 #include <bsd_glue.h> 47 #include <linux/file.h> 48 #include <linux/eventfd.h> 49 #endif 50 51 #include <net/netmap.h> 52 #include <dev/netmap/netmap_kern.h> 53 #include <net/netmap_virt.h> 54 #include <dev/netmap/netmap_mem2.h> 55 56 /* Support for eventfd-based notifications. */ 57 #if defined(linux) 58 #define SYNC_KLOOP_POLL 59 #endif 60 61 /* Write kring pointers (hwcur, hwtail) to the CSB. 62 * This routine is coupled with ptnetmap_guest_read_kring_csb(). */ 63 static inline void 64 sync_kloop_kernel_write(struct nm_csb_ktoa __user *ptr, uint32_t hwcur, 65 uint32_t hwtail) 66 { 67 /* Issue a first store-store barrier to make sure writes to the 68 * netmap ring do not overcome updates on ktoa->hwcur and ktoa->hwtail. */ 69 nm_stst_barrier(); 70 71 /* 72 * The same scheme used in nm_sync_kloop_appl_write() applies here. 73 * We allow the application to read a value of hwcur more recent than the value 74 * of hwtail, since this would anyway result in a consistent view of the 75 * ring state (and hwcur can never wraparound hwtail, since hwcur must be 76 * behind head). 77 * 78 * The following memory barrier scheme is used to make this happen: 79 * 80 * Application Kernel 81 * 82 * STORE(hwcur) LOAD(hwtail) 83 * wmb() <-------------> rmb() 84 * STORE(hwtail) LOAD(hwcur) 85 */ 86 CSB_WRITE(ptr, hwcur, hwcur); 87 nm_stst_barrier(); 88 CSB_WRITE(ptr, hwtail, hwtail); 89 } 90 91 /* Read kring pointers (head, cur, sync_flags) from the CSB. 92 * This routine is coupled with ptnetmap_guest_write_kring_csb(). */ 93 static inline void 94 sync_kloop_kernel_read(struct nm_csb_atok __user *ptr, 95 struct netmap_ring *shadow_ring, 96 uint32_t num_slots) 97 { 98 /* 99 * We place a memory barrier to make sure that the update of head never 100 * overtakes the update of cur. 101 * (see explanation in sync_kloop_kernel_write). 102 */ 103 CSB_READ(ptr, head, shadow_ring->head); 104 nm_ldld_barrier(); 105 CSB_READ(ptr, cur, shadow_ring->cur); 106 CSB_READ(ptr, sync_flags, shadow_ring->flags); 107 108 /* Make sure that loads from atok->head and atok->cur are not delayed 109 * after the loads from the netmap ring. */ 110 nm_ldld_barrier(); 111 } 112 113 /* Enable or disable application --> kernel kicks. */ 114 static inline void 115 csb_ktoa_kick_enable(struct nm_csb_ktoa __user *csb_ktoa, uint32_t val) 116 { 117 CSB_WRITE(csb_ktoa, kern_need_kick, val); 118 } 119 120 #ifdef SYNC_KLOOP_POLL 121 /* Are application interrupt enabled or disabled? */ 122 static inline uint32_t 123 csb_atok_intr_enabled(struct nm_csb_atok __user *csb_atok) 124 { 125 uint32_t v; 126 127 CSB_READ(csb_atok, appl_need_kick, v); 128 129 return v; 130 } 131 #endif /* SYNC_KLOOP_POLL */ 132 133 static inline void 134 sync_kloop_kring_dump(const char *title, const struct netmap_kring *kring) 135 { 136 nm_prinf("%s, kring %s, hwcur %d, rhead %d, " 137 "rcur %d, rtail %d, hwtail %d", 138 title, kring->name, kring->nr_hwcur, kring->rhead, 139 kring->rcur, kring->rtail, kring->nr_hwtail); 140 } 141 142 /* Arguments for netmap_sync_kloop_tx_ring() and 143 * netmap_sync_kloop_rx_ring(). 144 */ 145 struct sync_kloop_ring_args { 146 struct netmap_kring *kring; 147 struct nm_csb_atok *csb_atok; 148 struct nm_csb_ktoa *csb_ktoa; 149 #ifdef SYNC_KLOOP_POLL 150 struct eventfd_ctx *irq_ctx; 151 #endif /* SYNC_KLOOP_POLL */ 152 /* Are we busy waiting rather than using a schedule() loop ? */ 153 bool busy_wait; 154 /* Are we processing in the context of VM exit ? */ 155 bool direct; 156 }; 157 158 static void 159 netmap_sync_kloop_tx_ring(const struct sync_kloop_ring_args *a) 160 { 161 struct netmap_kring *kring = a->kring; 162 struct nm_csb_atok *csb_atok = a->csb_atok; 163 struct nm_csb_ktoa *csb_ktoa = a->csb_ktoa; 164 struct netmap_ring shadow_ring; /* shadow copy of the netmap_ring */ 165 #ifdef SYNC_KLOOP_POLL 166 bool more_txspace = false; 167 #endif /* SYNC_KLOOP_POLL */ 168 uint32_t num_slots; 169 int batch; 170 171 if (unlikely(nm_kr_tryget(kring, 1, NULL))) { 172 return; 173 } 174 175 num_slots = kring->nkr_num_slots; 176 177 /* Disable application --> kernel notifications. */ 178 if (!a->direct) { 179 csb_ktoa_kick_enable(csb_ktoa, 0); 180 } 181 /* Copy the application kring pointers from the CSB */ 182 sync_kloop_kernel_read(csb_atok, &shadow_ring, num_slots); 183 184 for (;;) { 185 batch = shadow_ring.head - kring->nr_hwcur; 186 if (batch < 0) 187 batch += num_slots; 188 189 #ifdef PTN_TX_BATCH_LIM 190 if (batch > PTN_TX_BATCH_LIM(num_slots)) { 191 /* If application moves ahead too fast, let's cut the move so 192 * that we don't exceed our batch limit. */ 193 uint32_t head_lim = kring->nr_hwcur + PTN_TX_BATCH_LIM(num_slots); 194 195 if (head_lim >= num_slots) 196 head_lim -= num_slots; 197 nm_prdis(1, "batch: %d head: %d head_lim: %d", batch, shadow_ring.head, 198 head_lim); 199 shadow_ring.head = head_lim; 200 batch = PTN_TX_BATCH_LIM(num_slots); 201 } 202 #endif /* PTN_TX_BATCH_LIM */ 203 204 if (nm_kr_txspace(kring) <= (num_slots >> 1)) { 205 shadow_ring.flags |= NAF_FORCE_RECLAIM; 206 } 207 208 /* Netmap prologue */ 209 shadow_ring.tail = kring->rtail; 210 if (unlikely(nm_txsync_prologue(kring, &shadow_ring) >= num_slots)) { 211 /* Reinit ring and enable notifications. */ 212 netmap_ring_reinit(kring); 213 if (!a->busy_wait) { 214 csb_ktoa_kick_enable(csb_ktoa, 1); 215 } 216 break; 217 } 218 219 if (unlikely(netmap_debug & NM_DEBUG_TXSYNC)) { 220 sync_kloop_kring_dump("pre txsync", kring); 221 } 222 223 if (unlikely(kring->nm_sync(kring, shadow_ring.flags))) { 224 if (!a->busy_wait) { 225 /* Re-enable notifications. */ 226 csb_ktoa_kick_enable(csb_ktoa, 1); 227 } 228 nm_prerr("txsync() failed"); 229 break; 230 } 231 232 /* 233 * Finalize 234 * Copy kernel hwcur and hwtail into the CSB for the application sync(), and 235 * do the nm_sync_finalize. 236 */ 237 sync_kloop_kernel_write(csb_ktoa, kring->nr_hwcur, 238 kring->nr_hwtail); 239 if (kring->rtail != kring->nr_hwtail) { 240 /* Some more room available in the parent adapter. */ 241 kring->rtail = kring->nr_hwtail; 242 #ifdef SYNC_KLOOP_POLL 243 more_txspace = true; 244 #endif /* SYNC_KLOOP_POLL */ 245 } 246 247 if (unlikely(netmap_debug & NM_DEBUG_TXSYNC)) { 248 sync_kloop_kring_dump("post txsync", kring); 249 } 250 251 /* Interrupt the application if needed. */ 252 #ifdef SYNC_KLOOP_POLL 253 if (a->irq_ctx && more_txspace && csb_atok_intr_enabled(csb_atok)) { 254 /* We could disable kernel --> application kicks here, 255 * to avoid spurious interrupts. */ 256 eventfd_signal(a->irq_ctx, 1); 257 more_txspace = false; 258 } 259 #endif /* SYNC_KLOOP_POLL */ 260 261 /* Read CSB to see if there is more work to do. */ 262 sync_kloop_kernel_read(csb_atok, &shadow_ring, num_slots); 263 if (shadow_ring.head == kring->rhead) { 264 if (a->busy_wait) { 265 break; 266 } 267 /* 268 * No more packets to transmit. We enable notifications and 269 * go to sleep, waiting for a kick from the application when new 270 * new slots are ready for transmission. 271 */ 272 /* Re-enable notifications. */ 273 csb_ktoa_kick_enable(csb_ktoa, 1); 274 /* Double check, with store-load memory barrier. */ 275 nm_stld_barrier(); 276 sync_kloop_kernel_read(csb_atok, &shadow_ring, num_slots); 277 if (shadow_ring.head != kring->rhead) { 278 /* We won the race condition, there are more packets to 279 * transmit. Disable notifications and do another cycle */ 280 csb_ktoa_kick_enable(csb_ktoa, 0); 281 continue; 282 } 283 break; 284 } 285 286 if (nm_kr_txempty(kring)) { 287 /* No more available TX slots. We stop waiting for a notification 288 * from the backend (netmap_tx_irq). */ 289 nm_prdis(1, "TX ring"); 290 break; 291 } 292 } 293 294 nm_kr_put(kring); 295 296 #ifdef SYNC_KLOOP_POLL 297 if (a->irq_ctx && more_txspace && csb_atok_intr_enabled(csb_atok)) { 298 eventfd_signal(a->irq_ctx, 1); 299 } 300 #endif /* SYNC_KLOOP_POLL */ 301 } 302 303 /* RX cycle without receive any packets */ 304 #define SYNC_LOOP_RX_DRY_CYCLES_MAX 2 305 306 static inline int 307 sync_kloop_norxslots(struct netmap_kring *kring, uint32_t g_head) 308 { 309 return (NM_ACCESS_ONCE(kring->nr_hwtail) == nm_prev(g_head, 310 kring->nkr_num_slots - 1)); 311 } 312 313 static void 314 netmap_sync_kloop_rx_ring(const struct sync_kloop_ring_args *a) 315 { 316 317 struct netmap_kring *kring = a->kring; 318 struct nm_csb_atok *csb_atok = a->csb_atok; 319 struct nm_csb_ktoa *csb_ktoa = a->csb_ktoa; 320 struct netmap_ring shadow_ring; /* shadow copy of the netmap_ring */ 321 int dry_cycles = 0; 322 #ifdef SYNC_KLOOP_POLL 323 bool some_recvd = false; 324 #endif /* SYNC_KLOOP_POLL */ 325 uint32_t num_slots; 326 327 if (unlikely(nm_kr_tryget(kring, 1, NULL))) { 328 return; 329 } 330 331 num_slots = kring->nkr_num_slots; 332 333 /* Get RX csb_atok and csb_ktoa pointers from the CSB. */ 334 num_slots = kring->nkr_num_slots; 335 336 /* Disable notifications. */ 337 if (!a->direct) { 338 csb_ktoa_kick_enable(csb_ktoa, 0); 339 } 340 /* Copy the application kring pointers from the CSB */ 341 sync_kloop_kernel_read(csb_atok, &shadow_ring, num_slots); 342 343 for (;;) { 344 uint32_t hwtail; 345 346 /* Netmap prologue */ 347 shadow_ring.tail = kring->rtail; 348 if (unlikely(nm_rxsync_prologue(kring, &shadow_ring) >= num_slots)) { 349 /* Reinit ring and enable notifications. */ 350 netmap_ring_reinit(kring); 351 if (!a->busy_wait) { 352 csb_ktoa_kick_enable(csb_ktoa, 1); 353 } 354 break; 355 } 356 357 if (unlikely(netmap_debug & NM_DEBUG_RXSYNC)) { 358 sync_kloop_kring_dump("pre rxsync", kring); 359 } 360 361 if (unlikely(kring->nm_sync(kring, shadow_ring.flags))) { 362 if (!a->busy_wait) { 363 /* Re-enable notifications. */ 364 csb_ktoa_kick_enable(csb_ktoa, 1); 365 } 366 nm_prerr("rxsync() failed"); 367 break; 368 } 369 370 /* 371 * Finalize 372 * Copy kernel hwcur and hwtail into the CSB for the application sync() 373 */ 374 hwtail = NM_ACCESS_ONCE(kring->nr_hwtail); 375 sync_kloop_kernel_write(csb_ktoa, kring->nr_hwcur, hwtail); 376 if (kring->rtail != hwtail) { 377 kring->rtail = hwtail; 378 #ifdef SYNC_KLOOP_POLL 379 some_recvd = true; 380 #endif /* SYNC_KLOOP_POLL */ 381 dry_cycles = 0; 382 } else { 383 dry_cycles++; 384 } 385 386 if (unlikely(netmap_debug & NM_DEBUG_RXSYNC)) { 387 sync_kloop_kring_dump("post rxsync", kring); 388 } 389 390 #ifdef SYNC_KLOOP_POLL 391 /* Interrupt the application if needed. */ 392 if (a->irq_ctx && some_recvd && csb_atok_intr_enabled(csb_atok)) { 393 /* We could disable kernel --> application kicks here, 394 * to avoid spurious interrupts. */ 395 eventfd_signal(a->irq_ctx, 1); 396 some_recvd = false; 397 } 398 #endif /* SYNC_KLOOP_POLL */ 399 400 /* Read CSB to see if there is more work to do. */ 401 sync_kloop_kernel_read(csb_atok, &shadow_ring, num_slots); 402 if (sync_kloop_norxslots(kring, shadow_ring.head)) { 403 if (a->busy_wait) { 404 break; 405 } 406 /* 407 * No more slots available for reception. We enable notification and 408 * go to sleep, waiting for a kick from the application when new receive 409 * slots are available. 410 */ 411 /* Re-enable notifications. */ 412 csb_ktoa_kick_enable(csb_ktoa, 1); 413 /* Double check, with store-load memory barrier. */ 414 nm_stld_barrier(); 415 sync_kloop_kernel_read(csb_atok, &shadow_ring, num_slots); 416 if (!sync_kloop_norxslots(kring, shadow_ring.head)) { 417 /* We won the race condition, more slots are available. Disable 418 * notifications and do another cycle. */ 419 csb_ktoa_kick_enable(csb_ktoa, 0); 420 continue; 421 } 422 break; 423 } 424 425 hwtail = NM_ACCESS_ONCE(kring->nr_hwtail); 426 if (unlikely(hwtail == kring->rhead || 427 dry_cycles >= SYNC_LOOP_RX_DRY_CYCLES_MAX)) { 428 /* No more packets to be read from the backend. We stop and 429 * wait for a notification from the backend (netmap_rx_irq). */ 430 nm_prdis(1, "nr_hwtail: %d rhead: %d dry_cycles: %d", 431 hwtail, kring->rhead, dry_cycles); 432 break; 433 } 434 } 435 436 nm_kr_put(kring); 437 438 #ifdef SYNC_KLOOP_POLL 439 /* Interrupt the application if needed. */ 440 if (a->irq_ctx && some_recvd && csb_atok_intr_enabled(csb_atok)) { 441 eventfd_signal(a->irq_ctx, 1); 442 } 443 #endif /* SYNC_KLOOP_POLL */ 444 } 445 446 #ifdef SYNC_KLOOP_POLL 447 struct sync_kloop_poll_ctx; 448 struct sync_kloop_poll_entry { 449 /* Support for receiving notifications from 450 * a netmap ring or from the application. */ 451 struct file *filp; 452 wait_queue_t wait; 453 wait_queue_head_t *wqh; 454 455 /* Support for sending notifications to the application. */ 456 struct eventfd_ctx *irq_ctx; 457 struct file *irq_filp; 458 459 /* Arguments for the ring processing function. Useful 460 * in case of custom wake-up function. */ 461 struct sync_kloop_ring_args *args; 462 struct sync_kloop_poll_ctx *parent; 463 464 }; 465 466 struct sync_kloop_poll_ctx { 467 poll_table wait_table; 468 unsigned int next_entry; 469 int (*next_wake_fun)(wait_queue_t *, unsigned, int, void *); 470 unsigned int num_entries; 471 unsigned int num_tx_rings; 472 unsigned int num_rings; 473 /* First num_tx_rings entries are for the TX kicks. 474 * Then the RX kicks entries follow. The last two 475 * entries are for TX irq, and RX irq. */ 476 struct sync_kloop_poll_entry entries[0]; 477 }; 478 479 static void 480 sync_kloop_poll_table_queue_proc(struct file *file, wait_queue_head_t *wqh, 481 poll_table *pt) 482 { 483 struct sync_kloop_poll_ctx *poll_ctx = 484 container_of(pt, struct sync_kloop_poll_ctx, wait_table); 485 struct sync_kloop_poll_entry *entry = poll_ctx->entries + 486 poll_ctx->next_entry; 487 488 BUG_ON(poll_ctx->next_entry >= poll_ctx->num_entries); 489 entry->wqh = wqh; 490 entry->filp = file; 491 /* Use the default wake up function. */ 492 if (poll_ctx->next_wake_fun == NULL) { 493 init_waitqueue_entry(&entry->wait, current); 494 } else { 495 init_waitqueue_func_entry(&entry->wait, 496 poll_ctx->next_wake_fun); 497 } 498 add_wait_queue(wqh, &entry->wait); 499 } 500 501 static int 502 sync_kloop_tx_kick_wake_fun(wait_queue_t *wait, unsigned mode, 503 int wake_flags, void *key) 504 { 505 struct sync_kloop_poll_entry *entry = 506 container_of(wait, struct sync_kloop_poll_entry, wait); 507 508 netmap_sync_kloop_tx_ring(entry->args); 509 510 return 0; 511 } 512 513 static int 514 sync_kloop_tx_irq_wake_fun(wait_queue_t *wait, unsigned mode, 515 int wake_flags, void *key) 516 { 517 struct sync_kloop_poll_entry *entry = 518 container_of(wait, struct sync_kloop_poll_entry, wait); 519 struct sync_kloop_poll_ctx *poll_ctx = entry->parent; 520 int i; 521 522 for (i = 0; i < poll_ctx->num_tx_rings; i++) { 523 struct eventfd_ctx *irq_ctx = poll_ctx->entries[i].irq_ctx; 524 525 if (irq_ctx) { 526 eventfd_signal(irq_ctx, 1); 527 } 528 } 529 530 return 0; 531 } 532 533 static int 534 sync_kloop_rx_kick_wake_fun(wait_queue_t *wait, unsigned mode, 535 int wake_flags, void *key) 536 { 537 struct sync_kloop_poll_entry *entry = 538 container_of(wait, struct sync_kloop_poll_entry, wait); 539 540 netmap_sync_kloop_rx_ring(entry->args); 541 542 return 0; 543 } 544 545 static int 546 sync_kloop_rx_irq_wake_fun(wait_queue_t *wait, unsigned mode, 547 int wake_flags, void *key) 548 { 549 struct sync_kloop_poll_entry *entry = 550 container_of(wait, struct sync_kloop_poll_entry, wait); 551 struct sync_kloop_poll_ctx *poll_ctx = entry->parent; 552 int i; 553 554 for (i = poll_ctx->num_tx_rings; i < poll_ctx->num_rings; i++) { 555 struct eventfd_ctx *irq_ctx = poll_ctx->entries[i].irq_ctx; 556 557 if (irq_ctx) { 558 eventfd_signal(irq_ctx, 1); 559 } 560 } 561 562 return 0; 563 } 564 #endif /* SYNC_KLOOP_POLL */ 565 566 int 567 netmap_sync_kloop(struct netmap_priv_d *priv, struct nmreq_header *hdr) 568 { 569 struct nmreq_sync_kloop_start *req = 570 (struct nmreq_sync_kloop_start *)(uintptr_t)hdr->nr_body; 571 struct nmreq_opt_sync_kloop_eventfds *eventfds_opt = NULL; 572 #ifdef SYNC_KLOOP_POLL 573 struct sync_kloop_poll_ctx *poll_ctx = NULL; 574 #endif /* SYNC_KLOOP_POLL */ 575 int num_rx_rings, num_tx_rings, num_rings; 576 struct sync_kloop_ring_args *args = NULL; 577 uint32_t sleep_us = req->sleep_us; 578 struct nm_csb_atok* csb_atok_base; 579 struct nm_csb_ktoa* csb_ktoa_base; 580 struct netmap_adapter *na; 581 struct nmreq_option *opt; 582 bool na_could_sleep = false; 583 bool busy_wait = true; 584 bool direct_tx = false; 585 bool direct_rx = false; 586 int err = 0; 587 int i; 588 589 if (sleep_us > 1000000) { 590 /* We do not accept sleeping for more than a second. */ 591 return EINVAL; 592 } 593 594 if (priv->np_nifp == NULL) { 595 return ENXIO; 596 } 597 mb(); /* make sure following reads are not from cache */ 598 599 na = priv->np_na; 600 if (!nm_netmap_on(na)) { 601 return ENXIO; 602 } 603 604 NMG_LOCK(); 605 /* Make sure the application is working in CSB mode. */ 606 if (!priv->np_csb_atok_base || !priv->np_csb_ktoa_base) { 607 NMG_UNLOCK(); 608 nm_prerr("sync-kloop on %s requires " 609 "NETMAP_REQ_OPT_CSB option", na->name); 610 return EINVAL; 611 } 612 613 csb_atok_base = priv->np_csb_atok_base; 614 csb_ktoa_base = priv->np_csb_ktoa_base; 615 616 /* Make sure that no kloop is currently running. */ 617 if (priv->np_kloop_state & NM_SYNC_KLOOP_RUNNING) { 618 err = EBUSY; 619 } 620 priv->np_kloop_state |= NM_SYNC_KLOOP_RUNNING; 621 NMG_UNLOCK(); 622 if (err) { 623 return err; 624 } 625 626 num_rx_rings = priv->np_qlast[NR_RX] - priv->np_qfirst[NR_RX]; 627 num_tx_rings = priv->np_qlast[NR_TX] - priv->np_qfirst[NR_TX]; 628 num_rings = num_tx_rings + num_rx_rings; 629 630 args = nm_os_malloc(num_rings * sizeof(args[0])); 631 if (!args) { 632 err = ENOMEM; 633 goto out; 634 } 635 636 /* Prepare the arguments for netmap_sync_kloop_tx_ring() 637 * and netmap_sync_kloop_rx_ring(). */ 638 for (i = 0; i < num_tx_rings; i++) { 639 struct sync_kloop_ring_args *a = args + i; 640 641 a->kring = NMR(na, NR_TX)[i + priv->np_qfirst[NR_TX]]; 642 a->csb_atok = csb_atok_base + i; 643 a->csb_ktoa = csb_ktoa_base + i; 644 a->busy_wait = busy_wait; 645 a->direct = direct_tx; 646 } 647 for (i = 0; i < num_rx_rings; i++) { 648 struct sync_kloop_ring_args *a = args + num_tx_rings + i; 649 650 a->kring = NMR(na, NR_RX)[i + priv->np_qfirst[NR_RX]]; 651 a->csb_atok = csb_atok_base + num_tx_rings + i; 652 a->csb_ktoa = csb_ktoa_base + num_tx_rings + i; 653 a->busy_wait = busy_wait; 654 a->direct = direct_rx; 655 } 656 657 /* Validate notification options. */ 658 opt = nmreq_getoption(hdr, NETMAP_REQ_OPT_SYNC_KLOOP_MODE); 659 if (opt != NULL) { 660 struct nmreq_opt_sync_kloop_mode *mode_opt = 661 (struct nmreq_opt_sync_kloop_mode *)opt; 662 663 direct_tx = !!(mode_opt->mode & NM_OPT_SYNC_KLOOP_DIRECT_TX); 664 direct_rx = !!(mode_opt->mode & NM_OPT_SYNC_KLOOP_DIRECT_RX); 665 if (mode_opt->mode & ~(NM_OPT_SYNC_KLOOP_DIRECT_TX | 666 NM_OPT_SYNC_KLOOP_DIRECT_RX)) { 667 opt->nro_status = err = EINVAL; 668 goto out; 669 } 670 opt->nro_status = 0; 671 } 672 opt = nmreq_getoption(hdr, NETMAP_REQ_OPT_SYNC_KLOOP_EVENTFDS); 673 if (opt != NULL) { 674 if (opt->nro_size != sizeof(*eventfds_opt) + 675 sizeof(eventfds_opt->eventfds[0]) * num_rings) { 676 /* Option size not consistent with the number of 677 * entries. */ 678 opt->nro_status = err = EINVAL; 679 goto out; 680 } 681 #ifdef SYNC_KLOOP_POLL 682 eventfds_opt = (struct nmreq_opt_sync_kloop_eventfds *)opt; 683 opt->nro_status = 0; 684 685 /* Check if some ioeventfd entry is not defined, and force sleep 686 * synchronization in that case. */ 687 busy_wait = false; 688 for (i = 0; i < num_rings; i++) { 689 if (eventfds_opt->eventfds[i].ioeventfd < 0) { 690 busy_wait = true; 691 break; 692 } 693 } 694 695 if (busy_wait && (direct_tx || direct_rx)) { 696 /* For direct processing we need all the 697 * ioeventfds to be valid. */ 698 opt->nro_status = err = EINVAL; 699 goto out; 700 } 701 702 /* We need 2 poll entries for TX and RX notifications coming 703 * from the netmap adapter, plus one entries per ring for the 704 * notifications coming from the application. */ 705 poll_ctx = nm_os_malloc(sizeof(*poll_ctx) + 706 (num_rings + 2) * sizeof(poll_ctx->entries[0])); 707 init_poll_funcptr(&poll_ctx->wait_table, 708 sync_kloop_poll_table_queue_proc); 709 poll_ctx->num_entries = 2 + num_rings; 710 poll_ctx->num_tx_rings = num_tx_rings; 711 poll_ctx->num_rings = num_rings; 712 poll_ctx->next_entry = 0; 713 poll_ctx->next_wake_fun = NULL; 714 715 if (direct_tx && (na->na_flags & NAF_BDG_MAYSLEEP)) { 716 /* In direct mode, VALE txsync is called from 717 * wake-up context, where it is not possible 718 * to sleep. 719 */ 720 na->na_flags &= ~NAF_BDG_MAYSLEEP; 721 na_could_sleep = true; 722 } 723 724 for (i = 0; i < num_rings + 2; i++) { 725 poll_ctx->entries[i].args = args + i; 726 poll_ctx->entries[i].parent = poll_ctx; 727 } 728 729 /* Poll for notifications coming from the applications through 730 * eventfds. */ 731 for (i = 0; i < num_rings; i++, poll_ctx->next_entry++) { 732 struct eventfd_ctx *irq = NULL; 733 struct file *filp = NULL; 734 unsigned long mask; 735 bool tx_ring = (i < num_tx_rings); 736 737 if (eventfds_opt->eventfds[i].irqfd >= 0) { 738 filp = eventfd_fget( 739 eventfds_opt->eventfds[i].irqfd); 740 if (IS_ERR(filp)) { 741 err = PTR_ERR(filp); 742 goto out; 743 } 744 irq = eventfd_ctx_fileget(filp); 745 if (IS_ERR(irq)) { 746 err = PTR_ERR(irq); 747 goto out; 748 } 749 } 750 poll_ctx->entries[i].irq_filp = filp; 751 poll_ctx->entries[i].irq_ctx = irq; 752 poll_ctx->entries[i].args->busy_wait = busy_wait; 753 /* Don't let netmap_sync_kloop_*x_ring() use 754 * IRQs in direct mode. */ 755 poll_ctx->entries[i].args->irq_ctx = 756 ((tx_ring && direct_tx) || 757 (!tx_ring && direct_rx)) ? NULL : 758 poll_ctx->entries[i].irq_ctx; 759 poll_ctx->entries[i].args->direct = 760 (tx_ring ? direct_tx : direct_rx); 761 762 if (!busy_wait) { 763 filp = eventfd_fget( 764 eventfds_opt->eventfds[i].ioeventfd); 765 if (IS_ERR(filp)) { 766 err = PTR_ERR(filp); 767 goto out; 768 } 769 if (tx_ring && direct_tx) { 770 /* Override the wake up function 771 * so that it can directly call 772 * netmap_sync_kloop_tx_ring(). 773 */ 774 poll_ctx->next_wake_fun = 775 sync_kloop_tx_kick_wake_fun; 776 } else if (!tx_ring && direct_rx) { 777 /* Same for direct RX. */ 778 poll_ctx->next_wake_fun = 779 sync_kloop_rx_kick_wake_fun; 780 } else { 781 poll_ctx->next_wake_fun = NULL; 782 } 783 mask = filp->f_op->poll(filp, 784 &poll_ctx->wait_table); 785 if (mask & POLLERR) { 786 err = EINVAL; 787 goto out; 788 } 789 } 790 } 791 792 /* Poll for notifications coming from the netmap rings bound to 793 * this file descriptor. */ 794 if (!busy_wait) { 795 NMG_LOCK(); 796 /* In direct mode, override the wake up function so 797 * that it can forward the netmap_tx_irq() to the 798 * guest. */ 799 poll_ctx->next_wake_fun = direct_tx ? 800 sync_kloop_tx_irq_wake_fun : NULL; 801 poll_wait(priv->np_filp, priv->np_si[NR_TX], 802 &poll_ctx->wait_table); 803 poll_ctx->next_entry++; 804 805 poll_ctx->next_wake_fun = direct_rx ? 806 sync_kloop_rx_irq_wake_fun : NULL; 807 poll_wait(priv->np_filp, priv->np_si[NR_RX], 808 &poll_ctx->wait_table); 809 poll_ctx->next_entry++; 810 NMG_UNLOCK(); 811 } 812 #else /* SYNC_KLOOP_POLL */ 813 opt->nro_status = EOPNOTSUPP; 814 goto out; 815 #endif /* SYNC_KLOOP_POLL */ 816 } 817 818 nm_prinf("kloop busy_wait %u, direct_tx %u, direct_rx %u, " 819 "na_could_sleep %u", busy_wait, direct_tx, direct_rx, 820 na_could_sleep); 821 822 /* Main loop. */ 823 for (;;) { 824 if (unlikely(NM_ACCESS_ONCE(priv->np_kloop_state) & NM_SYNC_KLOOP_STOPPING)) { 825 break; 826 } 827 828 #ifdef SYNC_KLOOP_POLL 829 if (!busy_wait) { 830 /* It is important to set the task state as 831 * interruptible before processing any TX/RX ring, 832 * so that if a notification on ring Y comes after 833 * we have processed ring Y, but before we call 834 * schedule(), we don't miss it. This is true because 835 * the wake up function will change the task state, 836 * and therefore the schedule_timeout() call below 837 * will observe the change). 838 */ 839 set_current_state(TASK_INTERRUPTIBLE); 840 } 841 #endif /* SYNC_KLOOP_POLL */ 842 843 /* Process all the TX rings bound to this file descriptor. */ 844 for (i = 0; !direct_tx && i < num_tx_rings; i++) { 845 struct sync_kloop_ring_args *a = args + i; 846 netmap_sync_kloop_tx_ring(a); 847 } 848 849 /* Process all the RX rings bound to this file descriptor. */ 850 for (i = 0; !direct_rx && i < num_rx_rings; i++) { 851 struct sync_kloop_ring_args *a = args + num_tx_rings + i; 852 netmap_sync_kloop_rx_ring(a); 853 } 854 855 if (busy_wait) { 856 /* Default synchronization method: sleep for a while. */ 857 usleep_range(sleep_us, sleep_us); 858 } 859 #ifdef SYNC_KLOOP_POLL 860 else { 861 /* Yield to the scheduler waiting for a notification 862 * to come either from netmap or the application. */ 863 schedule_timeout(msecs_to_jiffies(3000)); 864 } 865 #endif /* SYNC_KLOOP_POLL */ 866 } 867 out: 868 #ifdef SYNC_KLOOP_POLL 869 if (poll_ctx) { 870 /* Stop polling from netmap and the eventfds, and deallocate 871 * the poll context. */ 872 if (!busy_wait) { 873 __set_current_state(TASK_RUNNING); 874 } 875 for (i = 0; i < poll_ctx->next_entry; i++) { 876 struct sync_kloop_poll_entry *entry = 877 poll_ctx->entries + i; 878 879 if (entry->wqh) 880 remove_wait_queue(entry->wqh, &entry->wait); 881 /* We did not get a reference to the eventfds, but 882 * don't do that on netmap file descriptors (since 883 * a reference was not taken. */ 884 if (entry->filp && entry->filp != priv->np_filp) 885 fput(entry->filp); 886 if (entry->irq_ctx) 887 eventfd_ctx_put(entry->irq_ctx); 888 if (entry->irq_filp) 889 fput(entry->irq_filp); 890 } 891 nm_os_free(poll_ctx); 892 poll_ctx = NULL; 893 } 894 #endif /* SYNC_KLOOP_POLL */ 895 896 if (args) { 897 nm_os_free(args); 898 args = NULL; 899 } 900 901 /* Reset the kloop state. */ 902 NMG_LOCK(); 903 priv->np_kloop_state = 0; 904 if (na_could_sleep) { 905 na->na_flags |= NAF_BDG_MAYSLEEP; 906 } 907 NMG_UNLOCK(); 908 909 return err; 910 } 911 912 int 913 netmap_sync_kloop_stop(struct netmap_priv_d *priv) 914 { 915 struct netmap_adapter *na; 916 bool running = true; 917 int err = 0; 918 919 if (priv->np_nifp == NULL) { 920 return ENXIO; 921 } 922 mb(); /* make sure following reads are not from cache */ 923 924 na = priv->np_na; 925 if (!nm_netmap_on(na)) { 926 return ENXIO; 927 } 928 929 /* Set the kloop stopping flag. */ 930 NMG_LOCK(); 931 priv->np_kloop_state |= NM_SYNC_KLOOP_STOPPING; 932 NMG_UNLOCK(); 933 934 /* Send a notification to the kloop, in case it is blocked in 935 * schedule_timeout(). We can use either RX or TX, because the 936 * kloop is waiting on both. */ 937 nm_os_selwakeup(priv->np_si[NR_RX]); 938 939 /* Wait for the kloop to actually terminate. */ 940 while (running) { 941 usleep_range(1000, 1500); 942 NMG_LOCK(); 943 running = (NM_ACCESS_ONCE(priv->np_kloop_state) 944 & NM_SYNC_KLOOP_RUNNING); 945 NMG_UNLOCK(); 946 } 947 948 return err; 949 } 950 951 #ifdef WITH_PTNETMAP 952 /* 953 * Guest ptnetmap txsync()/rxsync() routines, used in ptnet device drivers. 954 * These routines are reused across the different operating systems supported 955 * by netmap. 956 */ 957 958 /* 959 * Reconcile host and guest views of the transmit ring. 960 * 961 * Guest user wants to transmit packets up to the one before ring->head, 962 * and guest kernel knows tx_ring->hwcur is the first packet unsent 963 * by the host kernel. 964 * 965 * We push out as many packets as possible, and possibly 966 * reclaim buffers from previously completed transmission. 967 * 968 * Notifications from the host are enabled only if the user guest would 969 * block (no space in the ring). 970 */ 971 bool 972 netmap_pt_guest_txsync(struct nm_csb_atok *atok, struct nm_csb_ktoa *ktoa, 973 struct netmap_kring *kring, int flags) 974 { 975 bool notify = false; 976 977 /* Disable notifications */ 978 atok->appl_need_kick = 0; 979 980 /* 981 * First part: tell the host to process the new packets, 982 * updating the CSB. 983 */ 984 kring->nr_hwcur = ktoa->hwcur; 985 nm_sync_kloop_appl_write(atok, kring->rcur, kring->rhead); 986 987 /* Ask for a kick from a guest to the host if needed. */ 988 if (((kring->rhead != kring->nr_hwcur || nm_kr_wouldblock(kring)) 989 && NM_ACCESS_ONCE(ktoa->kern_need_kick)) || 990 (flags & NAF_FORCE_RECLAIM)) { 991 atok->sync_flags = flags; 992 notify = true; 993 } 994 995 /* 996 * Second part: reclaim buffers for completed transmissions. 997 */ 998 if (nm_kr_wouldblock(kring) || (flags & NAF_FORCE_RECLAIM)) { 999 nm_sync_kloop_appl_read(ktoa, &kring->nr_hwtail, 1000 &kring->nr_hwcur); 1001 } 1002 1003 /* 1004 * No more room in the ring for new transmissions. The user thread will 1005 * go to sleep and we need to be notified by the host when more free 1006 * space is available. 1007 */ 1008 if (nm_kr_wouldblock(kring) && !(kring->nr_kflags & NKR_NOINTR)) { 1009 /* Re-enable notifications. */ 1010 atok->appl_need_kick = 1; 1011 /* Double check, with store-load memory barrier. */ 1012 nm_stld_barrier(); 1013 nm_sync_kloop_appl_read(ktoa, &kring->nr_hwtail, 1014 &kring->nr_hwcur); 1015 /* If there is new free space, disable notifications */ 1016 if (unlikely(!nm_kr_wouldblock(kring))) { 1017 atok->appl_need_kick = 0; 1018 } 1019 } 1020 1021 nm_prdis(1, "%s CSB(head:%u cur:%u hwtail:%u) KRING(head:%u cur:%u tail:%u)", 1022 kring->name, atok->head, atok->cur, ktoa->hwtail, 1023 kring->rhead, kring->rcur, kring->nr_hwtail); 1024 1025 return notify; 1026 } 1027 1028 /* 1029 * Reconcile host and guest view of the receive ring. 1030 * 1031 * Update hwcur/hwtail from host (reading from CSB). 1032 * 1033 * If guest user has released buffers up to the one before ring->head, we 1034 * also give them to the host. 1035 * 1036 * Notifications from the host are enabled only if the user guest would 1037 * block (no more completed slots in the ring). 1038 */ 1039 bool 1040 netmap_pt_guest_rxsync(struct nm_csb_atok *atok, struct nm_csb_ktoa *ktoa, 1041 struct netmap_kring *kring, int flags) 1042 { 1043 bool notify = false; 1044 1045 /* Disable notifications */ 1046 atok->appl_need_kick = 0; 1047 1048 /* 1049 * First part: import newly received packets, by updating the kring 1050 * hwtail to the hwtail known from the host (read from the CSB). 1051 * This also updates the kring hwcur. 1052 */ 1053 nm_sync_kloop_appl_read(ktoa, &kring->nr_hwtail, &kring->nr_hwcur); 1054 kring->nr_kflags &= ~NKR_PENDINTR; 1055 1056 /* 1057 * Second part: tell the host about the slots that guest user has 1058 * released, by updating cur and head in the CSB. 1059 */ 1060 if (kring->rhead != kring->nr_hwcur) { 1061 nm_sync_kloop_appl_write(atok, kring->rcur, kring->rhead); 1062 } 1063 1064 /* 1065 * No more completed RX slots. The user thread will go to sleep and 1066 * we need to be notified by the host when more RX slots have been 1067 * completed. 1068 */ 1069 if (nm_kr_wouldblock(kring) && !(kring->nr_kflags & NKR_NOINTR)) { 1070 /* Re-enable notifications. */ 1071 atok->appl_need_kick = 1; 1072 /* Double check, with store-load memory barrier. */ 1073 nm_stld_barrier(); 1074 nm_sync_kloop_appl_read(ktoa, &kring->nr_hwtail, 1075 &kring->nr_hwcur); 1076 /* If there are new slots, disable notifications. */ 1077 if (!nm_kr_wouldblock(kring)) { 1078 atok->appl_need_kick = 0; 1079 } 1080 } 1081 1082 /* Ask for a kick from the guest to the host if needed. */ 1083 if ((kring->rhead != kring->nr_hwcur || nm_kr_wouldblock(kring)) 1084 && NM_ACCESS_ONCE(ktoa->kern_need_kick)) { 1085 atok->sync_flags = flags; 1086 notify = true; 1087 } 1088 1089 nm_prdis(1, "%s CSB(head:%u cur:%u hwtail:%u) KRING(head:%u cur:%u tail:%u)", 1090 kring->name, atok->head, atok->cur, ktoa->hwtail, 1091 kring->rhead, kring->rcur, kring->nr_hwtail); 1092 1093 return notify; 1094 } 1095 1096 /* 1097 * Callbacks for ptnet drivers: nm_krings_create, nm_krings_delete, nm_dtor. 1098 */ 1099 int 1100 ptnet_nm_krings_create(struct netmap_adapter *na) 1101 { 1102 struct netmap_pt_guest_adapter *ptna = 1103 (struct netmap_pt_guest_adapter *)na; /* Upcast. */ 1104 struct netmap_adapter *na_nm = &ptna->hwup.up; 1105 struct netmap_adapter *na_dr = &ptna->dr.up; 1106 int ret; 1107 1108 if (ptna->backend_users) { 1109 return 0; 1110 } 1111 1112 /* Create krings on the public netmap adapter. */ 1113 ret = netmap_hw_krings_create(na_nm); 1114 if (ret) { 1115 return ret; 1116 } 1117 1118 /* Copy krings into the netmap adapter private to the driver. */ 1119 na_dr->tx_rings = na_nm->tx_rings; 1120 na_dr->rx_rings = na_nm->rx_rings; 1121 1122 return 0; 1123 } 1124 1125 void 1126 ptnet_nm_krings_delete(struct netmap_adapter *na) 1127 { 1128 struct netmap_pt_guest_adapter *ptna = 1129 (struct netmap_pt_guest_adapter *)na; /* Upcast. */ 1130 struct netmap_adapter *na_nm = &ptna->hwup.up; 1131 struct netmap_adapter *na_dr = &ptna->dr.up; 1132 1133 if (ptna->backend_users) { 1134 return; 1135 } 1136 1137 na_dr->tx_rings = NULL; 1138 na_dr->rx_rings = NULL; 1139 1140 netmap_hw_krings_delete(na_nm); 1141 } 1142 1143 void 1144 ptnet_nm_dtor(struct netmap_adapter *na) 1145 { 1146 struct netmap_pt_guest_adapter *ptna = 1147 (struct netmap_pt_guest_adapter *)na; 1148 1149 netmap_mem_put(ptna->dr.up.nm_mem); 1150 memset(&ptna->dr, 0, sizeof(ptna->dr)); 1151 netmap_mem_pt_guest_ifp_del(na->nm_mem, na->ifp); 1152 } 1153 1154 int 1155 netmap_pt_guest_attach(struct netmap_adapter *arg, 1156 unsigned int nifp_offset, unsigned int memid) 1157 { 1158 struct netmap_pt_guest_adapter *ptna; 1159 if_t ifp = arg ? arg->ifp : NULL; 1160 int error; 1161 1162 /* get allocator */ 1163 arg->nm_mem = netmap_mem_pt_guest_new(ifp, nifp_offset, memid); 1164 if (arg->nm_mem == NULL) 1165 return ENOMEM; 1166 arg->na_flags |= NAF_MEM_OWNER; 1167 error = netmap_attach_ext(arg, sizeof(struct netmap_pt_guest_adapter), 1); 1168 if (error) 1169 return error; 1170 1171 /* get the netmap_pt_guest_adapter */ 1172 ptna = (struct netmap_pt_guest_adapter *) NA(ifp); 1173 1174 /* Initialize a separate pass-through netmap adapter that is going to 1175 * be used by the ptnet driver only, and so never exposed to netmap 1176 * applications. We only need a subset of the available fields. */ 1177 memset(&ptna->dr, 0, sizeof(ptna->dr)); 1178 ptna->dr.up.ifp = ifp; 1179 ptna->dr.up.nm_mem = netmap_mem_get(ptna->hwup.up.nm_mem); 1180 ptna->dr.up.nm_config = ptna->hwup.up.nm_config; 1181 1182 ptna->backend_users = 0; 1183 1184 return 0; 1185 } 1186 1187 #endif /* WITH_PTNETMAP */ 1188