1 /* 2 * Copyright (C) 2014-2016 Giuseppe Lettieri 3 * All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * 2. Redistributions in binary form must reproduce the above copyright 11 * notice, this list of conditions and the following disclaimer in the 12 * documentation and/or other materials provided with the distribution. 13 * 14 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 15 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 17 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 22 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 23 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 24 * SUCH DAMAGE. 25 */ 26 27 /* $FreeBSD$ */ 28 29 #if defined(__FreeBSD__) 30 #include <sys/cdefs.h> /* prerequisite */ 31 32 #include <sys/types.h> 33 #include <sys/errno.h> 34 #include <sys/param.h> /* defines used in kernel.h */ 35 #include <sys/kernel.h> /* types used in module initialization */ 36 #include <sys/malloc.h> 37 #include <sys/poll.h> 38 #include <sys/lock.h> 39 #include <sys/rwlock.h> 40 #include <sys/selinfo.h> 41 #include <sys/sysctl.h> 42 #include <sys/socket.h> /* sockaddrs */ 43 #include <net/if.h> 44 #include <net/if_var.h> 45 #include <machine/bus.h> /* bus_dmamap_* */ 46 #include <sys/refcount.h> 47 48 49 #elif defined(linux) 50 51 #include "bsd_glue.h" 52 53 #elif defined(__APPLE__) 54 55 #warning OSX support is only partial 56 #include "osx_glue.h" 57 58 #elif defined(_WIN32) 59 #include "win_glue.h" 60 61 #else 62 63 #error Unsupported platform 64 65 #endif /* unsupported */ 66 67 /* 68 * common headers 69 */ 70 71 #include <net/netmap.h> 72 #include <dev/netmap/netmap_kern.h> 73 #include <dev/netmap/netmap_mem2.h> 74 75 #ifdef WITH_PIPES 76 77 #define NM_PIPE_MAXSLOTS 4096 78 79 static int netmap_default_pipes = 0; /* ignored, kept for compatibility */ 80 SYSBEGIN(vars_pipes); 81 SYSCTL_DECL(_dev_netmap); 82 SYSCTL_INT(_dev_netmap, OID_AUTO, default_pipes, CTLFLAG_RW, &netmap_default_pipes, 0 , ""); 83 SYSEND; 84 85 /* allocate the pipe array in the parent adapter */ 86 static int 87 nm_pipe_alloc(struct netmap_adapter *na, u_int npipes) 88 { 89 size_t len; 90 struct netmap_pipe_adapter **npa; 91 92 if (npipes <= na->na_max_pipes) 93 /* we already have more entries that requested */ 94 return 0; 95 96 if (npipes < na->na_next_pipe || npipes > NM_MAXPIPES) 97 return EINVAL; 98 99 len = sizeof(struct netmap_pipe_adapter *) * npipes; 100 #ifndef _WIN32 101 npa = realloc(na->na_pipes, len, M_DEVBUF, M_NOWAIT | M_ZERO); 102 #else 103 npa = realloc(na->na_pipes, len, sizeof(struct netmap_pipe_adapter *)*na->na_max_pipes); 104 #endif 105 if (npa == NULL) 106 return ENOMEM; 107 108 na->na_pipes = npa; 109 na->na_max_pipes = npipes; 110 111 return 0; 112 } 113 114 /* deallocate the parent array in the parent adapter */ 115 void 116 netmap_pipe_dealloc(struct netmap_adapter *na) 117 { 118 if (na->na_pipes) { 119 if (na->na_next_pipe > 0) { 120 D("freeing not empty pipe array for %s (%d dangling pipes)!", na->name, 121 na->na_next_pipe); 122 } 123 free(na->na_pipes, M_DEVBUF); 124 na->na_pipes = NULL; 125 na->na_max_pipes = 0; 126 na->na_next_pipe = 0; 127 } 128 } 129 130 /* find a pipe endpoint with the given id among the parent's pipes */ 131 static struct netmap_pipe_adapter * 132 netmap_pipe_find(struct netmap_adapter *parent, u_int pipe_id) 133 { 134 int i; 135 struct netmap_pipe_adapter *na; 136 137 for (i = 0; i < parent->na_next_pipe; i++) { 138 na = parent->na_pipes[i]; 139 if (na->id == pipe_id) { 140 return na; 141 } 142 } 143 return NULL; 144 } 145 146 /* add a new pipe endpoint to the parent array */ 147 static int 148 netmap_pipe_add(struct netmap_adapter *parent, struct netmap_pipe_adapter *na) 149 { 150 if (parent->na_next_pipe >= parent->na_max_pipes) { 151 u_int npipes = parent->na_max_pipes ? 2*parent->na_max_pipes : 2; 152 int error = nm_pipe_alloc(parent, npipes); 153 if (error) 154 return error; 155 } 156 157 parent->na_pipes[parent->na_next_pipe] = na; 158 na->parent_slot = parent->na_next_pipe; 159 parent->na_next_pipe++; 160 return 0; 161 } 162 163 /* remove the given pipe endpoint from the parent array */ 164 static void 165 netmap_pipe_remove(struct netmap_adapter *parent, struct netmap_pipe_adapter *na) 166 { 167 u_int n; 168 n = --parent->na_next_pipe; 169 if (n != na->parent_slot) { 170 struct netmap_pipe_adapter **p = 171 &parent->na_pipes[na->parent_slot]; 172 *p = parent->na_pipes[n]; 173 (*p)->parent_slot = na->parent_slot; 174 } 175 parent->na_pipes[n] = NULL; 176 } 177 178 static int 179 netmap_pipe_txsync(struct netmap_kring *txkring, int flags) 180 { 181 struct netmap_kring *rxkring = txkring->pipe; 182 u_int limit; /* slots to transfer */ 183 u_int j, k, lim_tx = txkring->nkr_num_slots - 1, 184 lim_rx = rxkring->nkr_num_slots - 1; 185 int m, busy; 186 187 ND("%p: %s %x -> %s", txkring, txkring->name, flags, rxkring->name); 188 ND(2, "before: hwcur %d hwtail %d cur %d head %d tail %d", txkring->nr_hwcur, txkring->nr_hwtail, 189 txkring->rcur, txkring->rhead, txkring->rtail); 190 191 j = rxkring->nr_hwtail; /* RX */ 192 k = txkring->nr_hwcur; /* TX */ 193 m = txkring->rhead - txkring->nr_hwcur; /* new slots */ 194 if (m < 0) 195 m += txkring->nkr_num_slots; 196 limit = m; 197 m = lim_rx; /* max avail space on destination */ 198 busy = j - rxkring->nr_hwcur; /* busy slots */ 199 if (busy < 0) 200 busy += rxkring->nkr_num_slots; 201 m -= busy; /* subtract busy slots */ 202 ND(2, "m %d limit %d", m, limit); 203 if (m < limit) 204 limit = m; 205 206 if (limit == 0) { 207 /* either the rxring is full, or nothing to send */ 208 return 0; 209 } 210 211 while (limit-- > 0) { 212 struct netmap_slot *rs = &rxkring->ring->slot[j]; 213 struct netmap_slot *ts = &txkring->ring->slot[k]; 214 struct netmap_slot tmp; 215 216 /* swap the slots */ 217 tmp = *rs; 218 *rs = *ts; 219 *ts = tmp; 220 221 /* report the buffer change */ 222 ts->flags |= NS_BUF_CHANGED; 223 rs->flags |= NS_BUF_CHANGED; 224 225 j = nm_next(j, lim_rx); 226 k = nm_next(k, lim_tx); 227 } 228 229 mb(); /* make sure the slots are updated before publishing them */ 230 rxkring->nr_hwtail = j; 231 txkring->nr_hwcur = k; 232 txkring->nr_hwtail = nm_prev(k, lim_tx); 233 234 ND(2, "after: hwcur %d hwtail %d cur %d head %d tail %d j %d", txkring->nr_hwcur, txkring->nr_hwtail, 235 txkring->rcur, txkring->rhead, txkring->rtail, j); 236 237 mb(); /* make sure rxkring->nr_hwtail is updated before notifying */ 238 rxkring->nm_notify(rxkring, 0); 239 240 return 0; 241 } 242 243 static int 244 netmap_pipe_rxsync(struct netmap_kring *rxkring, int flags) 245 { 246 struct netmap_kring *txkring = rxkring->pipe; 247 uint32_t oldhwcur = rxkring->nr_hwcur; 248 249 ND("%s %x <- %s", rxkring->name, flags, txkring->name); 250 rxkring->nr_hwcur = rxkring->rhead; /* recover user-relased slots */ 251 ND(5, "hwcur %d hwtail %d cur %d head %d tail %d", rxkring->nr_hwcur, rxkring->nr_hwtail, 252 rxkring->rcur, rxkring->rhead, rxkring->rtail); 253 mb(); /* paired with the first mb() in txsync */ 254 255 if (oldhwcur != rxkring->nr_hwcur) { 256 /* we have released some slots, notify the other end */ 257 mb(); /* make sure nr_hwcur is updated before notifying */ 258 txkring->nm_notify(txkring, 0); 259 } 260 return 0; 261 } 262 263 /* Pipe endpoints are created and destroyed together, so that endopoints do not 264 * have to check for the existence of their peer at each ?xsync. 265 * 266 * To play well with the existing netmap infrastructure (refcounts etc.), we 267 * adopt the following strategy: 268 * 269 * 1) The first endpoint that is created also creates the other endpoint and 270 * grabs a reference to it. 271 * 272 * state A) user1 --> endpoint1 --> endpoint2 273 * 274 * 2) If, starting from state A, endpoint2 is then registered, endpoint1 gives 275 * its reference to the user: 276 * 277 * state B) user1 --> endpoint1 endpoint2 <--- user2 278 * 279 * 3) Assume that, starting from state B endpoint2 is closed. In the unregister 280 * callback endpoint2 notes that endpoint1 is still active and adds a reference 281 * from endpoint1 to itself. When user2 then releases her own reference, 282 * endpoint2 is not destroyed and we are back to state A. A symmetrical state 283 * would be reached if endpoint1 were released instead. 284 * 285 * 4) If, starting from state A, endpoint1 is closed, the destructor notes that 286 * it owns a reference to endpoint2 and releases it. 287 * 288 * Something similar goes on for the creation and destruction of the krings. 289 */ 290 291 292 /* netmap_pipe_krings_delete. 293 * 294 * There are two cases: 295 * 296 * 1) state is 297 * 298 * usr1 --> e1 --> e2 299 * 300 * and we are e1. We have to create both sets 301 * of krings. 302 * 303 * 2) state is 304 * 305 * usr1 --> e1 --> e2 306 * 307 * and we are e2. e1 is certainly registered and our 308 * krings already exist. Nothing to do. 309 */ 310 static int 311 netmap_pipe_krings_create(struct netmap_adapter *na) 312 { 313 struct netmap_pipe_adapter *pna = 314 (struct netmap_pipe_adapter *)na; 315 struct netmap_adapter *ona = &pna->peer->up; 316 int error = 0; 317 enum txrx t; 318 319 if (pna->peer_ref) { 320 int i; 321 322 /* case 1) above */ 323 D("%p: case 1, create both ends", na); 324 error = netmap_krings_create(na, 0); 325 if (error) 326 goto err; 327 328 /* create the krings of the other end */ 329 error = netmap_krings_create(ona, 0); 330 if (error) 331 goto del_krings1; 332 333 /* cross link the krings */ 334 for_rx_tx(t) { 335 enum txrx r = nm_txrx_swap(t); /* swap NR_TX <-> NR_RX */ 336 for (i = 0; i < nma_get_nrings(na, t); i++) { 337 NMR(na, t)[i].pipe = NMR(&pna->peer->up, r) + i; 338 NMR(&pna->peer->up, r)[i].pipe = NMR(na, t) + i; 339 } 340 } 341 342 } 343 return 0; 344 345 del_krings1: 346 netmap_krings_delete(na); 347 err: 348 return error; 349 } 350 351 /* netmap_pipe_reg. 352 * 353 * There are two cases on registration (onoff==1) 354 * 355 * 1.a) state is 356 * 357 * usr1 --> e1 --> e2 358 * 359 * and we are e1. Create the needed rings of the 360 * other end. 361 * 362 * 1.b) state is 363 * 364 * usr1 --> e1 --> e2 <-- usr2 365 * 366 * and we are e2. Drop the ref e1 is holding. 367 * 368 * There are two additional cases on unregister (onoff==0) 369 * 370 * 2.a) state is 371 * 372 * usr1 --> e1 --> e2 373 * 374 * and we are e1. Nothing special to do, e2 will 375 * be cleaned up by the destructor of e1. 376 * 377 * 2.b) state is 378 * 379 * usr1 --> e1 e2 <-- usr2 380 * 381 * and we are either e1 or e2. Add a ref from the 382 * other end and hide our rings. 383 */ 384 static int 385 netmap_pipe_reg(struct netmap_adapter *na, int onoff) 386 { 387 struct netmap_pipe_adapter *pna = 388 (struct netmap_pipe_adapter *)na; 389 struct netmap_adapter *ona = &pna->peer->up; 390 int i, error = 0; 391 enum txrx t; 392 393 ND("%p: onoff %d", na, onoff); 394 if (onoff) { 395 for_rx_tx(t) { 396 for (i = 0; i < nma_get_nrings(na, t) + 1; i++) { 397 struct netmap_kring *kring = &NMR(na, t)[i]; 398 399 if (nm_kring_pending_on(kring)) { 400 /* mark the partner ring as needed */ 401 kring->pipe->nr_kflags |= NKR_NEEDRING; 402 } 403 } 404 } 405 406 /* create all missing needed rings on the other end */ 407 error = netmap_mem_rings_create(ona); 408 if (error) 409 return error; 410 411 /* In case of no error we put our rings in netmap mode */ 412 for_rx_tx(t) { 413 for (i = 0; i < nma_get_nrings(na, t) + 1; i++) { 414 struct netmap_kring *kring = &NMR(na, t)[i]; 415 416 if (nm_kring_pending_on(kring)) { 417 kring->nr_mode = NKR_NETMAP_ON; 418 } 419 } 420 } 421 if (na->active_fds == 0) 422 na->na_flags |= NAF_NETMAP_ON; 423 } else { 424 if (na->active_fds == 0) 425 na->na_flags &= ~NAF_NETMAP_ON; 426 for_rx_tx(t) { 427 for (i = 0; i < nma_get_nrings(na, t) + 1; i++) { 428 struct netmap_kring *kring = &NMR(na, t)[i]; 429 430 if (nm_kring_pending_off(kring)) { 431 kring->nr_mode = NKR_NETMAP_OFF; 432 /* mark the peer ring as no longer needed by us 433 * (it may still be kept if sombody else is using it) 434 */ 435 kring->pipe->nr_kflags &= ~NKR_NEEDRING; 436 } 437 } 438 } 439 /* delete all the peer rings that are no longer needed */ 440 netmap_mem_rings_delete(ona); 441 } 442 443 if (na->active_fds) { 444 D("active_fds %d", na->active_fds); 445 return 0; 446 } 447 448 if (pna->peer_ref) { 449 ND("%p: case 1.a or 2.a, nothing to do", na); 450 return 0; 451 } 452 if (onoff) { 453 ND("%p: case 1.b, drop peer", na); 454 pna->peer->peer_ref = 0; 455 netmap_adapter_put(na); 456 } else { 457 ND("%p: case 2.b, grab peer", na); 458 netmap_adapter_get(na); 459 pna->peer->peer_ref = 1; 460 } 461 return error; 462 } 463 464 /* netmap_pipe_krings_delete. 465 * 466 * There are two cases: 467 * 468 * 1) state is 469 * 470 * usr1 --> e1 --> e2 471 * 472 * and we are e1 (e2 is not registered, so krings_delete cannot be 473 * called on it); 474 * 475 * 2) state is 476 * 477 * usr1 --> e1 e2 <-- usr2 478 * 479 * and we are either e1 or e2. 480 * 481 * In the former case we have to also delete the krings of e2; 482 * in the latter case we do nothing (note that our krings 483 * have already been hidden in the unregister callback). 484 */ 485 static void 486 netmap_pipe_krings_delete(struct netmap_adapter *na) 487 { 488 struct netmap_pipe_adapter *pna = 489 (struct netmap_pipe_adapter *)na; 490 struct netmap_adapter *ona; /* na of the other end */ 491 492 if (!pna->peer_ref) { 493 ND("%p: case 2, kept alive by peer", na); 494 return; 495 } 496 /* case 1) above */ 497 ND("%p: case 1, deleting everyhing", na); 498 netmap_krings_delete(na); /* also zeroes tx_rings etc. */ 499 ona = &pna->peer->up; 500 if (ona->tx_rings == NULL) { 501 /* already deleted, we must be on an 502 * cleanup-after-error path */ 503 return; 504 } 505 netmap_krings_delete(ona); 506 } 507 508 509 static void 510 netmap_pipe_dtor(struct netmap_adapter *na) 511 { 512 struct netmap_pipe_adapter *pna = 513 (struct netmap_pipe_adapter *)na; 514 ND("%p", na); 515 if (pna->peer_ref) { 516 ND("%p: clean up peer", na); 517 pna->peer_ref = 0; 518 netmap_adapter_put(&pna->peer->up); 519 } 520 if (pna->role == NR_REG_PIPE_MASTER) 521 netmap_pipe_remove(pna->parent, pna); 522 netmap_adapter_put(pna->parent); 523 pna->parent = NULL; 524 } 525 526 int 527 netmap_get_pipe_na(struct nmreq *nmr, struct netmap_adapter **na, int create) 528 { 529 struct nmreq pnmr; 530 struct netmap_adapter *pna; /* parent adapter */ 531 struct netmap_pipe_adapter *mna, *sna, *req; 532 struct ifnet *ifp = NULL; 533 u_int pipe_id; 534 int role = nmr->nr_flags & NR_REG_MASK; 535 int error; 536 537 ND("flags %x", nmr->nr_flags); 538 539 if (role != NR_REG_PIPE_MASTER && role != NR_REG_PIPE_SLAVE) { 540 ND("not a pipe"); 541 return 0; 542 } 543 role = nmr->nr_flags & NR_REG_MASK; 544 545 /* first, try to find the parent adapter */ 546 bzero(&pnmr, sizeof(pnmr)); 547 memcpy(&pnmr.nr_name, nmr->nr_name, IFNAMSIZ); 548 /* pass to parent the requested number of pipes */ 549 pnmr.nr_arg1 = nmr->nr_arg1; 550 error = netmap_get_na(&pnmr, &pna, &ifp, create); 551 if (error) { 552 ND("parent lookup failed: %d", error); 553 return error; 554 } 555 ND("found parent: %s", na->name); 556 557 if (NETMAP_OWNED_BY_KERN(pna)) { 558 ND("parent busy"); 559 error = EBUSY; 560 goto put_out; 561 } 562 563 /* next, lookup the pipe id in the parent list */ 564 req = NULL; 565 pipe_id = nmr->nr_ringid & NETMAP_RING_MASK; 566 mna = netmap_pipe_find(pna, pipe_id); 567 if (mna) { 568 if (mna->role == role) { 569 ND("found %d directly at %d", pipe_id, mna->parent_slot); 570 req = mna; 571 } else { 572 ND("found %d indirectly at %d", pipe_id, mna->parent_slot); 573 req = mna->peer; 574 } 575 /* the pipe we have found already holds a ref to the parent, 576 * so we need to drop the one we got from netmap_get_na() 577 */ 578 netmap_adapter_put(pna); 579 goto found; 580 } 581 ND("pipe %d not found, create %d", pipe_id, create); 582 if (!create) { 583 error = ENODEV; 584 goto put_out; 585 } 586 /* we create both master and slave. 587 * The endpoint we were asked for holds a reference to 588 * the other one. 589 */ 590 mna = malloc(sizeof(*mna), M_DEVBUF, M_NOWAIT | M_ZERO); 591 if (mna == NULL) { 592 error = ENOMEM; 593 goto put_out; 594 } 595 snprintf(mna->up.name, sizeof(mna->up.name), "%s{%d", pna->name, pipe_id); 596 597 mna->id = pipe_id; 598 mna->role = NR_REG_PIPE_MASTER; 599 mna->parent = pna; 600 601 mna->up.nm_txsync = netmap_pipe_txsync; 602 mna->up.nm_rxsync = netmap_pipe_rxsync; 603 mna->up.nm_register = netmap_pipe_reg; 604 mna->up.nm_dtor = netmap_pipe_dtor; 605 mna->up.nm_krings_create = netmap_pipe_krings_create; 606 mna->up.nm_krings_delete = netmap_pipe_krings_delete; 607 mna->up.nm_mem = pna->nm_mem; 608 mna->up.na_lut = pna->na_lut; 609 610 mna->up.num_tx_rings = 1; 611 mna->up.num_rx_rings = 1; 612 mna->up.num_tx_desc = nmr->nr_tx_slots; 613 nm_bound_var(&mna->up.num_tx_desc, pna->num_tx_desc, 614 1, NM_PIPE_MAXSLOTS, NULL); 615 mna->up.num_rx_desc = nmr->nr_rx_slots; 616 nm_bound_var(&mna->up.num_rx_desc, pna->num_rx_desc, 617 1, NM_PIPE_MAXSLOTS, NULL); 618 error = netmap_attach_common(&mna->up); 619 if (error) 620 goto free_mna; 621 /* register the master with the parent */ 622 error = netmap_pipe_add(pna, mna); 623 if (error) 624 goto free_mna; 625 626 /* create the slave */ 627 sna = malloc(sizeof(*mna), M_DEVBUF, M_NOWAIT | M_ZERO); 628 if (sna == NULL) { 629 error = ENOMEM; 630 goto unregister_mna; 631 } 632 /* most fields are the same, copy from master and then fix */ 633 *sna = *mna; 634 snprintf(sna->up.name, sizeof(sna->up.name), "%s}%d", pna->name, pipe_id); 635 sna->role = NR_REG_PIPE_SLAVE; 636 error = netmap_attach_common(&sna->up); 637 if (error) 638 goto free_sna; 639 640 /* join the two endpoints */ 641 mna->peer = sna; 642 sna->peer = mna; 643 644 /* we already have a reference to the parent, but we 645 * need another one for the other endpoint we created 646 */ 647 netmap_adapter_get(pna); 648 649 if (role == NR_REG_PIPE_MASTER) { 650 req = mna; 651 mna->peer_ref = 1; 652 netmap_adapter_get(&sna->up); 653 } else { 654 req = sna; 655 sna->peer_ref = 1; 656 netmap_adapter_get(&mna->up); 657 } 658 ND("created master %p and slave %p", mna, sna); 659 found: 660 661 ND("pipe %d %s at %p", pipe_id, 662 (req->role == NR_REG_PIPE_MASTER ? "master" : "slave"), req); 663 *na = &req->up; 664 netmap_adapter_get(*na); 665 666 /* keep the reference to the parent. 667 * It will be released by the req destructor 668 */ 669 670 /* drop the ifp reference, if any */ 671 if (ifp) { 672 if_rele(ifp); 673 } 674 675 return 0; 676 677 free_sna: 678 free(sna, M_DEVBUF); 679 unregister_mna: 680 netmap_pipe_remove(pna, mna); 681 free_mna: 682 free(mna, M_DEVBUF); 683 put_out: 684 netmap_unget_na(pna, ifp); 685 return error; 686 } 687 688 689 #endif /* WITH_PIPES */ 690