1 /* 2 * Copyright (C) 2013-2016 Universita` di Pisa 3 * All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * 2. Redistributions in binary form must reproduce the above copyright 11 * notice, this list of conditions and the following disclaimer in the 12 * documentation and/or other materials provided with the distribution. 13 * 14 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 15 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 17 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 22 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 23 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 24 * SUCH DAMAGE. 25 */ 26 27 28 /* 29 * This module implements the VALE switch for netmap 30 31 --- VALE SWITCH --- 32 33 NMG_LOCK() serializes all modifications to switches and ports. 34 A switch cannot be deleted until all ports are gone. 35 36 For each switch, an SX lock (RWlock on linux) protects 37 deletion of ports. When configuring or deleting a new port, the 38 lock is acquired in exclusive mode (after holding NMG_LOCK). 39 When forwarding, the lock is acquired in shared mode (without NMG_LOCK). 40 The lock is held throughout the entire forwarding cycle, 41 during which the thread may incur in a page fault. 42 Hence it is important that sleepable shared locks are used. 43 44 On the rx ring, the per-port lock is grabbed initially to reserve 45 a number of slot in the ring, then the lock is released, 46 packets are copied from source to destination, and then 47 the lock is acquired again and the receive ring is updated. 48 (A similar thing is done on the tx ring for NIC and host stack 49 ports attached to the switch) 50 51 */ 52 53 /* 54 * OS-specific code that is used only within this file. 55 * Other OS-specific code that must be accessed by drivers 56 * is present in netmap_kern.h 57 */ 58 59 #if defined(__FreeBSD__) 60 #include <sys/cdefs.h> /* prerequisite */ 61 __FBSDID("$FreeBSD$"); 62 63 #include <sys/types.h> 64 #include <sys/errno.h> 65 #include <sys/param.h> /* defines used in kernel.h */ 66 #include <sys/kernel.h> /* types used in module initialization */ 67 #include <sys/conf.h> /* cdevsw struct, UID, GID */ 68 #include <sys/sockio.h> 69 #include <sys/socketvar.h> /* struct socket */ 70 #include <sys/malloc.h> 71 #include <sys/poll.h> 72 #include <sys/rwlock.h> 73 #include <sys/socket.h> /* sockaddrs */ 74 #include <sys/selinfo.h> 75 #include <sys/sysctl.h> 76 #include <net/if.h> 77 #include <net/if_var.h> 78 #include <net/bpf.h> /* BIOCIMMEDIATE */ 79 #include <machine/bus.h> /* bus_dmamap_* */ 80 #include <sys/endian.h> 81 #include <sys/refcount.h> 82 #include <sys/smp.h> 83 84 85 #elif defined(linux) 86 87 #include "bsd_glue.h" 88 89 #elif defined(__APPLE__) 90 91 #warning OSX support is only partial 92 #include "osx_glue.h" 93 94 #elif defined(_WIN32) 95 #include "win_glue.h" 96 97 #else 98 99 #error Unsupported platform 100 101 #endif /* unsupported */ 102 103 /* 104 * common headers 105 */ 106 107 #include <net/netmap.h> 108 #include <dev/netmap/netmap_kern.h> 109 #include <dev/netmap/netmap_mem2.h> 110 111 #include <dev/netmap/netmap_bdg.h> 112 113 const char* 114 netmap_bdg_name(struct netmap_vp_adapter *vp) 115 { 116 struct nm_bridge *b = vp->na_bdg; 117 if (b == NULL) 118 return NULL; 119 return b->bdg_basename; 120 } 121 122 123 #ifndef CONFIG_NET_NS 124 /* 125 * XXX in principle nm_bridges could be created dynamically 126 * Right now we have a static array and deletions are protected 127 * by an exclusive lock. 128 */ 129 struct nm_bridge *nm_bridges; 130 #endif /* !CONFIG_NET_NS */ 131 132 133 static int 134 nm_is_id_char(const char c) 135 { 136 return (c >= 'a' && c <= 'z') || 137 (c >= 'A' && c <= 'Z') || 138 (c >= '0' && c <= '9') || 139 (c == '_'); 140 } 141 142 /* Validate the name of a bdg port and return the 143 * position of the ":" character. */ 144 static int 145 nm_bdg_name_validate(const char *name, size_t prefixlen) 146 { 147 int colon_pos = -1; 148 int i; 149 150 if (!name || strlen(name) < prefixlen) { 151 return -1; 152 } 153 154 for (i = 0; i < NM_BDG_IFNAMSIZ && name[i]; i++) { 155 if (name[i] == ':') { 156 colon_pos = i; 157 break; 158 } else if (!nm_is_id_char(name[i])) { 159 return -1; 160 } 161 } 162 163 if (strlen(name) - colon_pos > IFNAMSIZ) { 164 /* interface name too long */ 165 return -1; 166 } 167 168 return colon_pos; 169 } 170 171 /* 172 * locate a bridge among the existing ones. 173 * MUST BE CALLED WITH NMG_LOCK() 174 * 175 * a ':' in the name terminates the bridge name. Otherwise, just NM_NAME. 176 * We assume that this is called with a name of at least NM_NAME chars. 177 */ 178 struct nm_bridge * 179 nm_find_bridge(const char *name, int create, struct netmap_bdg_ops *ops) 180 { 181 int i, namelen; 182 struct nm_bridge *b = NULL, *bridges; 183 u_int num_bridges; 184 185 NMG_LOCK_ASSERT(); 186 187 netmap_bns_getbridges(&bridges, &num_bridges); 188 189 namelen = nm_bdg_name_validate(name, 190 (ops != NULL ? strlen(ops->name) : 0)); 191 if (namelen < 0) { 192 nm_prerr("invalid bridge name %s", name ? name : NULL); 193 return NULL; 194 } 195 196 /* lookup the name, remember empty slot if there is one */ 197 for (i = 0; i < num_bridges; i++) { 198 struct nm_bridge *x = bridges + i; 199 200 if ((x->bdg_flags & NM_BDG_ACTIVE) + x->bdg_active_ports == 0) { 201 if (create && b == NULL) 202 b = x; /* record empty slot */ 203 } else if (x->bdg_namelen != namelen) { 204 continue; 205 } else if (strncmp(name, x->bdg_basename, namelen) == 0) { 206 ND("found '%.*s' at %d", namelen, name, i); 207 b = x; 208 break; 209 } 210 } 211 if (i == num_bridges && b) { /* name not found, can create entry */ 212 /* initialize the bridge */ 213 ND("create new bridge %s with ports %d", b->bdg_basename, 214 b->bdg_active_ports); 215 b->ht = nm_os_malloc(sizeof(struct nm_hash_ent) * NM_BDG_HASH); 216 if (b->ht == NULL) { 217 nm_prerr("failed to allocate hash table"); 218 return NULL; 219 } 220 strncpy(b->bdg_basename, name, namelen); 221 b->bdg_namelen = namelen; 222 b->bdg_active_ports = 0; 223 for (i = 0; i < NM_BDG_MAXPORTS; i++) 224 b->bdg_port_index[i] = i; 225 /* set the default function */ 226 b->bdg_ops = b->bdg_saved_ops = *ops; 227 b->private_data = b->ht; 228 b->bdg_flags = 0; 229 NM_BNS_GET(b); 230 } 231 return b; 232 } 233 234 235 int 236 netmap_bdg_free(struct nm_bridge *b) 237 { 238 if ((b->bdg_flags & NM_BDG_ACTIVE) + b->bdg_active_ports != 0) { 239 return EBUSY; 240 } 241 242 ND("marking bridge %s as free", b->bdg_basename); 243 nm_os_free(b->ht); 244 memset(&b->bdg_ops, 0, sizeof(b->bdg_ops)); 245 memset(&b->bdg_saved_ops, 0, sizeof(b->bdg_saved_ops)); 246 b->bdg_flags = 0; 247 NM_BNS_PUT(b); 248 return 0; 249 } 250 251 /* Called by external kernel modules (e.g., Openvswitch). 252 * to modify the private data previously given to regops(). 253 * 'name' may be just bridge's name (including ':' if it 254 * is not just NM_BDG_NAME). 255 * Called without NMG_LOCK. 256 */ 257 int 258 netmap_bdg_update_private_data(const char *name, bdg_update_private_data_fn_t callback, 259 void *callback_data, void *auth_token) 260 { 261 void *private_data = NULL; 262 struct nm_bridge *b; 263 int error = 0; 264 265 NMG_LOCK(); 266 b = nm_find_bridge(name, 0 /* don't create */, NULL); 267 if (!b) { 268 error = EINVAL; 269 goto unlock_update_priv; 270 } 271 if (!nm_bdg_valid_auth_token(b, auth_token)) { 272 error = EACCES; 273 goto unlock_update_priv; 274 } 275 BDG_WLOCK(b); 276 private_data = callback(b->private_data, callback_data, &error); 277 b->private_data = private_data; 278 BDG_WUNLOCK(b); 279 280 unlock_update_priv: 281 NMG_UNLOCK(); 282 return error; 283 } 284 285 286 287 /* remove from bridge b the ports in slots hw and sw 288 * (sw can be -1 if not needed) 289 */ 290 void 291 netmap_bdg_detach_common(struct nm_bridge *b, int hw, int sw) 292 { 293 int s_hw = hw, s_sw = sw; 294 int i, lim =b->bdg_active_ports; 295 uint32_t *tmp = b->tmp_bdg_port_index; 296 297 /* 298 New algorithm: 299 make a copy of bdg_port_index; 300 lookup NA(ifp)->bdg_port and SWNA(ifp)->bdg_port 301 in the array of bdg_port_index, replacing them with 302 entries from the bottom of the array; 303 decrement bdg_active_ports; 304 acquire BDG_WLOCK() and copy back the array. 305 */ 306 307 if (netmap_debug & NM_DEBUG_BDG) 308 nm_prinf("detach %d and %d (lim %d)", hw, sw, lim); 309 /* make a copy of the list of active ports, update it, 310 * and then copy back within BDG_WLOCK(). 311 */ 312 memcpy(b->tmp_bdg_port_index, b->bdg_port_index, sizeof(b->tmp_bdg_port_index)); 313 for (i = 0; (hw >= 0 || sw >= 0) && i < lim; ) { 314 if (hw >= 0 && tmp[i] == hw) { 315 ND("detach hw %d at %d", hw, i); 316 lim--; /* point to last active port */ 317 tmp[i] = tmp[lim]; /* swap with i */ 318 tmp[lim] = hw; /* now this is inactive */ 319 hw = -1; 320 } else if (sw >= 0 && tmp[i] == sw) { 321 ND("detach sw %d at %d", sw, i); 322 lim--; 323 tmp[i] = tmp[lim]; 324 tmp[lim] = sw; 325 sw = -1; 326 } else { 327 i++; 328 } 329 } 330 if (hw >= 0 || sw >= 0) { 331 nm_prerr("delete failed hw %d sw %d, should panic...", hw, sw); 332 } 333 334 BDG_WLOCK(b); 335 if (b->bdg_ops.dtor) 336 b->bdg_ops.dtor(b->bdg_ports[s_hw]); 337 b->bdg_ports[s_hw] = NULL; 338 if (s_sw >= 0) { 339 b->bdg_ports[s_sw] = NULL; 340 } 341 memcpy(b->bdg_port_index, b->tmp_bdg_port_index, sizeof(b->tmp_bdg_port_index)); 342 b->bdg_active_ports = lim; 343 BDG_WUNLOCK(b); 344 345 ND("now %d active ports", lim); 346 netmap_bdg_free(b); 347 } 348 349 350 /* nm_bdg_ctl callback for VALE ports */ 351 int 352 netmap_vp_bdg_ctl(struct nmreq_header *hdr, struct netmap_adapter *na) 353 { 354 struct netmap_vp_adapter *vpna = (struct netmap_vp_adapter *)na; 355 struct nm_bridge *b = vpna->na_bdg; 356 357 if (hdr->nr_reqtype == NETMAP_REQ_VALE_ATTACH) { 358 return 0; /* nothing to do */ 359 } 360 if (b) { 361 netmap_set_all_rings(na, 0 /* disable */); 362 netmap_bdg_detach_common(b, vpna->bdg_port, -1); 363 vpna->na_bdg = NULL; 364 netmap_set_all_rings(na, 1 /* enable */); 365 } 366 /* I have took reference just for attach */ 367 netmap_adapter_put(na); 368 return 0; 369 } 370 371 int 372 netmap_default_bdg_attach(const char *name, struct netmap_adapter *na, 373 struct nm_bridge *b) 374 { 375 return NM_NEED_BWRAP; 376 } 377 378 /* Try to get a reference to a netmap adapter attached to a VALE switch. 379 * If the adapter is found (or is created), this function returns 0, a 380 * non NULL pointer is returned into *na, and the caller holds a 381 * reference to the adapter. 382 * If an adapter is not found, then no reference is grabbed and the 383 * function returns an error code, or 0 if there is just a VALE prefix 384 * mismatch. Therefore the caller holds a reference when 385 * (*na != NULL && return == 0). 386 */ 387 int 388 netmap_get_bdg_na(struct nmreq_header *hdr, struct netmap_adapter **na, 389 struct netmap_mem_d *nmd, int create, struct netmap_bdg_ops *ops) 390 { 391 char *nr_name = hdr->nr_name; 392 const char *ifname; 393 struct ifnet *ifp = NULL; 394 int error = 0; 395 struct netmap_vp_adapter *vpna, *hostna = NULL; 396 struct nm_bridge *b; 397 uint32_t i, j; 398 uint32_t cand = NM_BDG_NOPORT, cand2 = NM_BDG_NOPORT; 399 int needed; 400 401 *na = NULL; /* default return value */ 402 403 /* first try to see if this is a bridge port. */ 404 NMG_LOCK_ASSERT(); 405 if (strncmp(nr_name, ops->name, strlen(ops->name) - 1)) { 406 return 0; /* no error, but no VALE prefix */ 407 } 408 409 b = nm_find_bridge(nr_name, create, ops); 410 if (b == NULL) { 411 ND("no bridges available for '%s'", nr_name); 412 return (create ? ENOMEM : ENXIO); 413 } 414 if (strlen(nr_name) < b->bdg_namelen) /* impossible */ 415 panic("x"); 416 417 /* Now we are sure that name starts with the bridge's name, 418 * lookup the port in the bridge. We need to scan the entire 419 * list. It is not important to hold a WLOCK on the bridge 420 * during the search because NMG_LOCK already guarantees 421 * that there are no other possible writers. 422 */ 423 424 /* lookup in the local list of ports */ 425 for (j = 0; j < b->bdg_active_ports; j++) { 426 i = b->bdg_port_index[j]; 427 vpna = b->bdg_ports[i]; 428 ND("checking %s", vpna->up.name); 429 if (!strcmp(vpna->up.name, nr_name)) { 430 netmap_adapter_get(&vpna->up); 431 ND("found existing if %s refs %d", nr_name) 432 *na = &vpna->up; 433 return 0; 434 } 435 } 436 /* not found, should we create it? */ 437 if (!create) 438 return ENXIO; 439 /* yes we should, see if we have space to attach entries */ 440 needed = 2; /* in some cases we only need 1 */ 441 if (b->bdg_active_ports + needed >= NM_BDG_MAXPORTS) { 442 nm_prerr("bridge full %d, cannot create new port", b->bdg_active_ports); 443 return ENOMEM; 444 } 445 /* record the next two ports available, but do not allocate yet */ 446 cand = b->bdg_port_index[b->bdg_active_ports]; 447 cand2 = b->bdg_port_index[b->bdg_active_ports + 1]; 448 ND("+++ bridge %s port %s used %d avail %d %d", 449 b->bdg_basename, ifname, b->bdg_active_ports, cand, cand2); 450 451 /* 452 * try see if there is a matching NIC with this name 453 * (after the bridge's name) 454 */ 455 ifname = nr_name + b->bdg_namelen + 1; 456 ifp = ifunit_ref(ifname); 457 if (!ifp) { 458 /* Create an ephemeral virtual port. 459 * This block contains all the ephemeral-specific logic. 460 */ 461 462 if (hdr->nr_reqtype != NETMAP_REQ_REGISTER) { 463 error = EINVAL; 464 goto out; 465 } 466 467 /* bdg_netmap_attach creates a struct netmap_adapter */ 468 error = b->bdg_ops.vp_create(hdr, NULL, nmd, &vpna); 469 if (error) { 470 if (netmap_debug & NM_DEBUG_BDG) 471 nm_prerr("error %d", error); 472 goto out; 473 } 474 /* shortcut - we can skip get_hw_na(), 475 * ownership check and nm_bdg_attach() 476 */ 477 478 } else { 479 struct netmap_adapter *hw; 480 481 /* the vale:nic syntax is only valid for some commands */ 482 switch (hdr->nr_reqtype) { 483 case NETMAP_REQ_VALE_ATTACH: 484 case NETMAP_REQ_VALE_DETACH: 485 case NETMAP_REQ_VALE_POLLING_ENABLE: 486 case NETMAP_REQ_VALE_POLLING_DISABLE: 487 break; /* ok */ 488 default: 489 error = EINVAL; 490 goto out; 491 } 492 493 error = netmap_get_hw_na(ifp, nmd, &hw); 494 if (error || hw == NULL) 495 goto out; 496 497 /* host adapter might not be created */ 498 error = hw->nm_bdg_attach(nr_name, hw, b); 499 if (error == NM_NEED_BWRAP) { 500 error = b->bdg_ops.bwrap_attach(nr_name, hw); 501 } 502 if (error) 503 goto out; 504 vpna = hw->na_vp; 505 hostna = hw->na_hostvp; 506 if (hdr->nr_reqtype == NETMAP_REQ_VALE_ATTACH) { 507 /* Check if we need to skip the host rings. */ 508 struct nmreq_vale_attach *areq = 509 (struct nmreq_vale_attach *)(uintptr_t)hdr->nr_body; 510 if (areq->reg.nr_mode != NR_REG_NIC_SW) { 511 hostna = NULL; 512 } 513 } 514 } 515 516 BDG_WLOCK(b); 517 vpna->bdg_port = cand; 518 ND("NIC %p to bridge port %d", vpna, cand); 519 /* bind the port to the bridge (virtual ports are not active) */ 520 b->bdg_ports[cand] = vpna; 521 vpna->na_bdg = b; 522 b->bdg_active_ports++; 523 if (hostna != NULL) { 524 /* also bind the host stack to the bridge */ 525 b->bdg_ports[cand2] = hostna; 526 hostna->bdg_port = cand2; 527 hostna->na_bdg = b; 528 b->bdg_active_ports++; 529 ND("host %p to bridge port %d", hostna, cand2); 530 } 531 ND("if %s refs %d", ifname, vpna->up.na_refcount); 532 BDG_WUNLOCK(b); 533 *na = &vpna->up; 534 netmap_adapter_get(*na); 535 536 out: 537 if (ifp) 538 if_rele(ifp); 539 540 return error; 541 } 542 543 544 int 545 nm_is_bwrap(struct netmap_adapter *na) 546 { 547 return na->nm_register == netmap_bwrap_reg; 548 } 549 550 551 struct nm_bdg_polling_state; 552 struct 553 nm_bdg_kthread { 554 struct nm_kctx *nmk; 555 u_int qfirst; 556 u_int qlast; 557 struct nm_bdg_polling_state *bps; 558 }; 559 560 struct nm_bdg_polling_state { 561 bool configured; 562 bool stopped; 563 struct netmap_bwrap_adapter *bna; 564 uint32_t mode; 565 u_int qfirst; 566 u_int qlast; 567 u_int cpu_from; 568 u_int ncpus; 569 struct nm_bdg_kthread *kthreads; 570 }; 571 572 static void 573 netmap_bwrap_polling(void *data) 574 { 575 struct nm_bdg_kthread *nbk = data; 576 struct netmap_bwrap_adapter *bna; 577 u_int qfirst, qlast, i; 578 struct netmap_kring **kring0, *kring; 579 580 if (!nbk) 581 return; 582 qfirst = nbk->qfirst; 583 qlast = nbk->qlast; 584 bna = nbk->bps->bna; 585 kring0 = NMR(bna->hwna, NR_RX); 586 587 for (i = qfirst; i < qlast; i++) { 588 kring = kring0[i]; 589 kring->nm_notify(kring, 0); 590 } 591 } 592 593 static int 594 nm_bdg_create_kthreads(struct nm_bdg_polling_state *bps) 595 { 596 struct nm_kctx_cfg kcfg; 597 int i, j; 598 599 bps->kthreads = nm_os_malloc(sizeof(struct nm_bdg_kthread) * bps->ncpus); 600 if (bps->kthreads == NULL) 601 return ENOMEM; 602 603 bzero(&kcfg, sizeof(kcfg)); 604 kcfg.worker_fn = netmap_bwrap_polling; 605 for (i = 0; i < bps->ncpus; i++) { 606 struct nm_bdg_kthread *t = bps->kthreads + i; 607 int all = (bps->ncpus == 1 && 608 bps->mode == NETMAP_POLLING_MODE_SINGLE_CPU); 609 int affinity = bps->cpu_from + i; 610 611 t->bps = bps; 612 t->qfirst = all ? bps->qfirst /* must be 0 */: affinity; 613 t->qlast = all ? bps->qlast : t->qfirst + 1; 614 if (netmap_verbose) 615 nm_prinf("kthread %d a:%u qf:%u ql:%u", i, affinity, t->qfirst, 616 t->qlast); 617 618 kcfg.type = i; 619 kcfg.worker_private = t; 620 t->nmk = nm_os_kctx_create(&kcfg, NULL); 621 if (t->nmk == NULL) { 622 goto cleanup; 623 } 624 nm_os_kctx_worker_setaff(t->nmk, affinity); 625 } 626 return 0; 627 628 cleanup: 629 for (j = 0; j < i; j++) { 630 struct nm_bdg_kthread *t = bps->kthreads + i; 631 nm_os_kctx_destroy(t->nmk); 632 } 633 nm_os_free(bps->kthreads); 634 return EFAULT; 635 } 636 637 /* A variant of ptnetmap_start_kthreads() */ 638 static int 639 nm_bdg_polling_start_kthreads(struct nm_bdg_polling_state *bps) 640 { 641 int error, i, j; 642 643 if (!bps) { 644 nm_prerr("polling is not configured"); 645 return EFAULT; 646 } 647 bps->stopped = false; 648 649 for (i = 0; i < bps->ncpus; i++) { 650 struct nm_bdg_kthread *t = bps->kthreads + i; 651 error = nm_os_kctx_worker_start(t->nmk); 652 if (error) { 653 nm_prerr("error in nm_kthread_start(): %d", error); 654 goto cleanup; 655 } 656 } 657 return 0; 658 659 cleanup: 660 for (j = 0; j < i; j++) { 661 struct nm_bdg_kthread *t = bps->kthreads + i; 662 nm_os_kctx_worker_stop(t->nmk); 663 } 664 bps->stopped = true; 665 return error; 666 } 667 668 static void 669 nm_bdg_polling_stop_delete_kthreads(struct nm_bdg_polling_state *bps) 670 { 671 int i; 672 673 if (!bps) 674 return; 675 676 for (i = 0; i < bps->ncpus; i++) { 677 struct nm_bdg_kthread *t = bps->kthreads + i; 678 nm_os_kctx_worker_stop(t->nmk); 679 nm_os_kctx_destroy(t->nmk); 680 } 681 bps->stopped = true; 682 } 683 684 static int 685 get_polling_cfg(struct nmreq_vale_polling *req, struct netmap_adapter *na, 686 struct nm_bdg_polling_state *bps) 687 { 688 unsigned int avail_cpus, core_from; 689 unsigned int qfirst, qlast; 690 uint32_t i = req->nr_first_cpu_id; 691 uint32_t req_cpus = req->nr_num_polling_cpus; 692 693 avail_cpus = nm_os_ncpus(); 694 695 if (req_cpus == 0) { 696 nm_prerr("req_cpus must be > 0"); 697 return EINVAL; 698 } else if (req_cpus >= avail_cpus) { 699 nm_prerr("Cannot use all the CPUs in the system"); 700 return EINVAL; 701 } 702 703 if (req->nr_mode == NETMAP_POLLING_MODE_MULTI_CPU) { 704 /* Use a separate core for each ring. If nr_num_polling_cpus>1 705 * more consecutive rings are polled. 706 * For example, if nr_first_cpu_id=2 and nr_num_polling_cpus=2, 707 * ring 2 and 3 are polled by core 2 and 3, respectively. */ 708 if (i + req_cpus > nma_get_nrings(na, NR_RX)) { 709 nm_prerr("Rings %u-%u not in range (have %d rings)", 710 i, i + req_cpus, nma_get_nrings(na, NR_RX)); 711 return EINVAL; 712 } 713 qfirst = i; 714 qlast = qfirst + req_cpus; 715 core_from = qfirst; 716 717 } else if (req->nr_mode == NETMAP_POLLING_MODE_SINGLE_CPU) { 718 /* Poll all the rings using a core specified by nr_first_cpu_id. 719 * the number of cores must be 1. */ 720 if (req_cpus != 1) { 721 nm_prerr("ncpus must be 1 for NETMAP_POLLING_MODE_SINGLE_CPU " 722 "(was %d)", req_cpus); 723 return EINVAL; 724 } 725 qfirst = 0; 726 qlast = nma_get_nrings(na, NR_RX); 727 core_from = i; 728 } else { 729 nm_prerr("Invalid polling mode"); 730 return EINVAL; 731 } 732 733 bps->mode = req->nr_mode; 734 bps->qfirst = qfirst; 735 bps->qlast = qlast; 736 bps->cpu_from = core_from; 737 bps->ncpus = req_cpus; 738 nm_prinf("%s qfirst %u qlast %u cpu_from %u ncpus %u", 739 req->nr_mode == NETMAP_POLLING_MODE_MULTI_CPU ? 740 "MULTI" : "SINGLE", 741 qfirst, qlast, core_from, req_cpus); 742 return 0; 743 } 744 745 static int 746 nm_bdg_ctl_polling_start(struct nmreq_vale_polling *req, struct netmap_adapter *na) 747 { 748 struct nm_bdg_polling_state *bps; 749 struct netmap_bwrap_adapter *bna; 750 int error; 751 752 bna = (struct netmap_bwrap_adapter *)na; 753 if (bna->na_polling_state) { 754 nm_prerr("ERROR adapter already in polling mode"); 755 return EFAULT; 756 } 757 758 bps = nm_os_malloc(sizeof(*bps)); 759 if (!bps) 760 return ENOMEM; 761 bps->configured = false; 762 bps->stopped = true; 763 764 if (get_polling_cfg(req, na, bps)) { 765 nm_os_free(bps); 766 return EINVAL; 767 } 768 769 if (nm_bdg_create_kthreads(bps)) { 770 nm_os_free(bps); 771 return EFAULT; 772 } 773 774 bps->configured = true; 775 bna->na_polling_state = bps; 776 bps->bna = bna; 777 778 /* disable interrupts if possible */ 779 nma_intr_enable(bna->hwna, 0); 780 /* start kthread now */ 781 error = nm_bdg_polling_start_kthreads(bps); 782 if (error) { 783 nm_prerr("ERROR nm_bdg_polling_start_kthread()"); 784 nm_os_free(bps->kthreads); 785 nm_os_free(bps); 786 bna->na_polling_state = NULL; 787 nma_intr_enable(bna->hwna, 1); 788 } 789 return error; 790 } 791 792 static int 793 nm_bdg_ctl_polling_stop(struct netmap_adapter *na) 794 { 795 struct netmap_bwrap_adapter *bna = (struct netmap_bwrap_adapter *)na; 796 struct nm_bdg_polling_state *bps; 797 798 if (!bna->na_polling_state) { 799 nm_prerr("ERROR adapter is not in polling mode"); 800 return EFAULT; 801 } 802 bps = bna->na_polling_state; 803 nm_bdg_polling_stop_delete_kthreads(bna->na_polling_state); 804 bps->configured = false; 805 nm_os_free(bps); 806 bna->na_polling_state = NULL; 807 /* reenable interrupts */ 808 nma_intr_enable(bna->hwna, 1); 809 return 0; 810 } 811 812 int 813 nm_bdg_polling(struct nmreq_header *hdr) 814 { 815 struct nmreq_vale_polling *req = 816 (struct nmreq_vale_polling *)(uintptr_t)hdr->nr_body; 817 struct netmap_adapter *na = NULL; 818 int error = 0; 819 820 NMG_LOCK(); 821 error = netmap_get_vale_na(hdr, &na, NULL, /*create=*/0); 822 if (na && !error) { 823 if (!nm_is_bwrap(na)) { 824 error = EOPNOTSUPP; 825 } else if (hdr->nr_reqtype == NETMAP_BDG_POLLING_ON) { 826 error = nm_bdg_ctl_polling_start(req, na); 827 if (!error) 828 netmap_adapter_get(na); 829 } else { 830 error = nm_bdg_ctl_polling_stop(na); 831 if (!error) 832 netmap_adapter_put(na); 833 } 834 netmap_adapter_put(na); 835 } else if (!na && !error) { 836 /* Not VALE port. */ 837 error = EINVAL; 838 } 839 NMG_UNLOCK(); 840 841 return error; 842 } 843 844 /* Called by external kernel modules (e.g., Openvswitch). 845 * to set configure/lookup/dtor functions of a VALE instance. 846 * Register callbacks to the given bridge. 'name' may be just 847 * bridge's name (including ':' if it is not just NM_BDG_NAME). 848 * 849 * Called without NMG_LOCK. 850 */ 851 852 int 853 netmap_bdg_regops(const char *name, struct netmap_bdg_ops *bdg_ops, void *private_data, void *auth_token) 854 { 855 struct nm_bridge *b; 856 int error = 0; 857 858 NMG_LOCK(); 859 b = nm_find_bridge(name, 0 /* don't create */, NULL); 860 if (!b) { 861 error = ENXIO; 862 goto unlock_regops; 863 } 864 if (!nm_bdg_valid_auth_token(b, auth_token)) { 865 error = EACCES; 866 goto unlock_regops; 867 } 868 869 BDG_WLOCK(b); 870 if (!bdg_ops) { 871 /* resetting the bridge */ 872 bzero(b->ht, sizeof(struct nm_hash_ent) * NM_BDG_HASH); 873 b->bdg_ops = b->bdg_saved_ops; 874 b->private_data = b->ht; 875 } else { 876 /* modifying the bridge */ 877 b->private_data = private_data; 878 #define nm_bdg_override(m) if (bdg_ops->m) b->bdg_ops.m = bdg_ops->m 879 nm_bdg_override(lookup); 880 nm_bdg_override(config); 881 nm_bdg_override(dtor); 882 nm_bdg_override(vp_create); 883 nm_bdg_override(bwrap_attach); 884 #undef nm_bdg_override 885 886 } 887 BDG_WUNLOCK(b); 888 889 unlock_regops: 890 NMG_UNLOCK(); 891 return error; 892 } 893 894 895 int 896 netmap_bdg_config(struct nm_ifreq *nr) 897 { 898 struct nm_bridge *b; 899 int error = EINVAL; 900 901 NMG_LOCK(); 902 b = nm_find_bridge(nr->nifr_name, 0, NULL); 903 if (!b) { 904 NMG_UNLOCK(); 905 return error; 906 } 907 NMG_UNLOCK(); 908 /* Don't call config() with NMG_LOCK() held */ 909 BDG_RLOCK(b); 910 if (b->bdg_ops.config != NULL) 911 error = b->bdg_ops.config(nr); 912 BDG_RUNLOCK(b); 913 return error; 914 } 915 916 917 /* nm_register callback for VALE ports */ 918 int 919 netmap_vp_reg(struct netmap_adapter *na, int onoff) 920 { 921 struct netmap_vp_adapter *vpna = 922 (struct netmap_vp_adapter*)na; 923 enum txrx t; 924 int i; 925 926 /* persistent ports may be put in netmap mode 927 * before being attached to a bridge 928 */ 929 if (vpna->na_bdg) 930 BDG_WLOCK(vpna->na_bdg); 931 if (onoff) { 932 for_rx_tx(t) { 933 for (i = 0; i < netmap_real_rings(na, t); i++) { 934 struct netmap_kring *kring = NMR(na, t)[i]; 935 936 if (nm_kring_pending_on(kring)) 937 kring->nr_mode = NKR_NETMAP_ON; 938 } 939 } 940 if (na->active_fds == 0) 941 na->na_flags |= NAF_NETMAP_ON; 942 /* XXX on FreeBSD, persistent VALE ports should also 943 * toggle IFCAP_NETMAP in na->ifp (2014-03-16) 944 */ 945 } else { 946 if (na->active_fds == 0) 947 na->na_flags &= ~NAF_NETMAP_ON; 948 for_rx_tx(t) { 949 for (i = 0; i < netmap_real_rings(na, t); i++) { 950 struct netmap_kring *kring = NMR(na, t)[i]; 951 952 if (nm_kring_pending_off(kring)) 953 kring->nr_mode = NKR_NETMAP_OFF; 954 } 955 } 956 } 957 if (vpna->na_bdg) 958 BDG_WUNLOCK(vpna->na_bdg); 959 return 0; 960 } 961 962 963 /* rxsync code used by VALE ports nm_rxsync callback and also 964 * internally by the brwap 965 */ 966 static int 967 netmap_vp_rxsync_locked(struct netmap_kring *kring, int flags) 968 { 969 struct netmap_adapter *na = kring->na; 970 struct netmap_ring *ring = kring->ring; 971 u_int nm_i, lim = kring->nkr_num_slots - 1; 972 u_int head = kring->rhead; 973 int n; 974 975 if (head > lim) { 976 nm_prerr("ouch dangerous reset!!!"); 977 n = netmap_ring_reinit(kring); 978 goto done; 979 } 980 981 /* First part, import newly received packets. */ 982 /* actually nothing to do here, they are already in the kring */ 983 984 /* Second part, skip past packets that userspace has released. */ 985 nm_i = kring->nr_hwcur; 986 if (nm_i != head) { 987 /* consistency check, but nothing really important here */ 988 for (n = 0; likely(nm_i != head); n++) { 989 struct netmap_slot *slot = &ring->slot[nm_i]; 990 void *addr = NMB(na, slot); 991 992 if (addr == NETMAP_BUF_BASE(kring->na)) { /* bad buf */ 993 nm_prerr("bad buffer index %d, ignore ?", 994 slot->buf_idx); 995 } 996 slot->flags &= ~NS_BUF_CHANGED; 997 nm_i = nm_next(nm_i, lim); 998 } 999 kring->nr_hwcur = head; 1000 } 1001 1002 n = 0; 1003 done: 1004 return n; 1005 } 1006 1007 /* 1008 * nm_rxsync callback for VALE ports 1009 * user process reading from a VALE switch. 1010 * Already protected against concurrent calls from userspace, 1011 * but we must acquire the queue's lock to protect against 1012 * writers on the same queue. 1013 */ 1014 int 1015 netmap_vp_rxsync(struct netmap_kring *kring, int flags) 1016 { 1017 int n; 1018 1019 mtx_lock(&kring->q_lock); 1020 n = netmap_vp_rxsync_locked(kring, flags); 1021 mtx_unlock(&kring->q_lock); 1022 return n; 1023 } 1024 1025 int 1026 netmap_bwrap_attach(const char *nr_name, struct netmap_adapter *hwna, 1027 struct netmap_bdg_ops *ops) 1028 { 1029 return ops->bwrap_attach(nr_name, hwna); 1030 } 1031 1032 1033 /* Bridge wrapper code (bwrap). 1034 * This is used to connect a non-VALE-port netmap_adapter (hwna) to a 1035 * VALE switch. 1036 * The main task is to swap the meaning of tx and rx rings to match the 1037 * expectations of the VALE switch code (see nm_bdg_flush). 1038 * 1039 * The bwrap works by interposing a netmap_bwrap_adapter between the 1040 * rest of the system and the hwna. The netmap_bwrap_adapter looks like 1041 * a netmap_vp_adapter to the rest the system, but, internally, it 1042 * translates all callbacks to what the hwna expects. 1043 * 1044 * Note that we have to intercept callbacks coming from two sides: 1045 * 1046 * - callbacks coming from the netmap module are intercepted by 1047 * passing around the netmap_bwrap_adapter instead of the hwna 1048 * 1049 * - callbacks coming from outside of the netmap module only know 1050 * about the hwna. This, however, only happens in interrupt 1051 * handlers, where only the hwna->nm_notify callback is called. 1052 * What the bwrap does is to overwrite the hwna->nm_notify callback 1053 * with its own netmap_bwrap_intr_notify. 1054 * XXX This assumes that the hwna->nm_notify callback was the 1055 * standard netmap_notify(), as it is the case for nic adapters. 1056 * Any additional action performed by hwna->nm_notify will not be 1057 * performed by netmap_bwrap_intr_notify. 1058 * 1059 * Additionally, the bwrap can optionally attach the host rings pair 1060 * of the wrapped adapter to a different port of the switch. 1061 */ 1062 1063 1064 static void 1065 netmap_bwrap_dtor(struct netmap_adapter *na) 1066 { 1067 struct netmap_bwrap_adapter *bna = (struct netmap_bwrap_adapter*)na; 1068 struct netmap_adapter *hwna = bna->hwna; 1069 struct nm_bridge *b = bna->up.na_bdg, 1070 *bh = bna->host.na_bdg; 1071 1072 if (bna->host.up.nm_mem) 1073 netmap_mem_put(bna->host.up.nm_mem); 1074 1075 if (b) { 1076 netmap_bdg_detach_common(b, bna->up.bdg_port, 1077 (bh ? bna->host.bdg_port : -1)); 1078 } 1079 1080 ND("na %p", na); 1081 na->ifp = NULL; 1082 bna->host.up.ifp = NULL; 1083 hwna->na_vp = bna->saved_na_vp; 1084 hwna->na_hostvp = NULL; 1085 hwna->na_private = NULL; 1086 hwna->na_flags &= ~NAF_BUSY; 1087 netmap_adapter_put(hwna); 1088 1089 } 1090 1091 1092 /* 1093 * Intr callback for NICs connected to a bridge. 1094 * Simply ignore tx interrupts (maybe we could try to recover space ?) 1095 * and pass received packets from nic to the bridge. 1096 * 1097 * XXX TODO check locking: this is called from the interrupt 1098 * handler so we should make sure that the interface is not 1099 * disconnected while passing down an interrupt. 1100 * 1101 * Note, no user process can access this NIC or the host stack. 1102 * The only part of the ring that is significant are the slots, 1103 * and head/cur/tail are set from the kring as needed 1104 * (part as a receive ring, part as a transmit ring). 1105 * 1106 * callback that overwrites the hwna notify callback. 1107 * Packets come from the outside or from the host stack and are put on an 1108 * hwna rx ring. 1109 * The bridge wrapper then sends the packets through the bridge. 1110 */ 1111 static int 1112 netmap_bwrap_intr_notify(struct netmap_kring *kring, int flags) 1113 { 1114 struct netmap_adapter *na = kring->na; 1115 struct netmap_bwrap_adapter *bna = na->na_private; 1116 struct netmap_kring *bkring; 1117 struct netmap_vp_adapter *vpna = &bna->up; 1118 u_int ring_nr = kring->ring_id; 1119 int ret = NM_IRQ_COMPLETED; 1120 int error; 1121 1122 if (netmap_debug & NM_DEBUG_RXINTR) 1123 nm_prinf("%s %s 0x%x", na->name, kring->name, flags); 1124 1125 bkring = vpna->up.tx_rings[ring_nr]; 1126 1127 /* make sure the ring is not disabled */ 1128 if (nm_kr_tryget(kring, 0 /* can't sleep */, NULL)) { 1129 return EIO; 1130 } 1131 1132 if (netmap_debug & NM_DEBUG_RXINTR) 1133 nm_prinf("%s head %d cur %d tail %d", na->name, 1134 kring->rhead, kring->rcur, kring->rtail); 1135 1136 /* simulate a user wakeup on the rx ring 1137 * fetch packets that have arrived. 1138 */ 1139 error = kring->nm_sync(kring, 0); 1140 if (error) 1141 goto put_out; 1142 if (kring->nr_hwcur == kring->nr_hwtail) { 1143 if (netmap_verbose) 1144 nm_prerr("how strange, interrupt with no packets on %s", 1145 na->name); 1146 goto put_out; 1147 } 1148 1149 /* new packets are kring->rcur to kring->nr_hwtail, and the bkring 1150 * had hwcur == bkring->rhead. So advance bkring->rhead to kring->nr_hwtail 1151 * to push all packets out. 1152 */ 1153 bkring->rhead = bkring->rcur = kring->nr_hwtail; 1154 1155 bkring->nm_sync(bkring, flags); 1156 1157 /* mark all buffers as released on this ring */ 1158 kring->rhead = kring->rcur = kring->rtail = kring->nr_hwtail; 1159 /* another call to actually release the buffers */ 1160 error = kring->nm_sync(kring, 0); 1161 1162 /* The second rxsync may have further advanced hwtail. If this happens, 1163 * return NM_IRQ_RESCHED, otherwise just return NM_IRQ_COMPLETED. */ 1164 if (kring->rcur != kring->nr_hwtail) { 1165 ret = NM_IRQ_RESCHED; 1166 } 1167 put_out: 1168 nm_kr_put(kring); 1169 1170 return error ? error : ret; 1171 } 1172 1173 1174 /* nm_register callback for bwrap */ 1175 int 1176 netmap_bwrap_reg(struct netmap_adapter *na, int onoff) 1177 { 1178 struct netmap_bwrap_adapter *bna = 1179 (struct netmap_bwrap_adapter *)na; 1180 struct netmap_adapter *hwna = bna->hwna; 1181 struct netmap_vp_adapter *hostna = &bna->host; 1182 int error, i; 1183 enum txrx t; 1184 1185 ND("%s %s", na->name, onoff ? "on" : "off"); 1186 1187 if (onoff) { 1188 /* netmap_do_regif has been called on the bwrap na. 1189 * We need to pass the information about the 1190 * memory allocator down to the hwna before 1191 * putting it in netmap mode 1192 */ 1193 hwna->na_lut = na->na_lut; 1194 1195 if (hostna->na_bdg) { 1196 /* if the host rings have been attached to switch, 1197 * we need to copy the memory allocator information 1198 * in the hostna also 1199 */ 1200 hostna->up.na_lut = na->na_lut; 1201 } 1202 1203 } 1204 1205 /* pass down the pending ring state information */ 1206 for_rx_tx(t) { 1207 for (i = 0; i < netmap_all_rings(na, t); i++) { 1208 NMR(hwna, nm_txrx_swap(t))[i]->nr_pending_mode = 1209 NMR(na, t)[i]->nr_pending_mode; 1210 } 1211 } 1212 1213 /* forward the request to the hwna */ 1214 error = hwna->nm_register(hwna, onoff); 1215 if (error) 1216 return error; 1217 1218 /* copy up the current ring state information */ 1219 for_rx_tx(t) { 1220 for (i = 0; i < netmap_all_rings(na, t); i++) { 1221 struct netmap_kring *kring = NMR(hwna, nm_txrx_swap(t))[i]; 1222 NMR(na, t)[i]->nr_mode = kring->nr_mode; 1223 } 1224 } 1225 1226 /* impersonate a netmap_vp_adapter */ 1227 netmap_vp_reg(na, onoff); 1228 if (hostna->na_bdg) 1229 netmap_vp_reg(&hostna->up, onoff); 1230 1231 if (onoff) { 1232 u_int i; 1233 /* intercept the hwna nm_nofify callback on the hw rings */ 1234 for (i = 0; i < hwna->num_rx_rings; i++) { 1235 hwna->rx_rings[i]->save_notify = hwna->rx_rings[i]->nm_notify; 1236 hwna->rx_rings[i]->nm_notify = netmap_bwrap_intr_notify; 1237 } 1238 i = hwna->num_rx_rings; /* for safety */ 1239 /* save the host ring notify unconditionally */ 1240 for (; i < netmap_real_rings(hwna, NR_RX); i++) { 1241 hwna->rx_rings[i]->save_notify = 1242 hwna->rx_rings[i]->nm_notify; 1243 if (hostna->na_bdg) { 1244 /* also intercept the host ring notify */ 1245 hwna->rx_rings[i]->nm_notify = 1246 netmap_bwrap_intr_notify; 1247 na->tx_rings[i]->nm_sync = na->nm_txsync; 1248 } 1249 } 1250 if (na->active_fds == 0) 1251 na->na_flags |= NAF_NETMAP_ON; 1252 } else { 1253 u_int i; 1254 1255 if (na->active_fds == 0) 1256 na->na_flags &= ~NAF_NETMAP_ON; 1257 1258 /* reset all notify callbacks (including host ring) */ 1259 for (i = 0; i < netmap_all_rings(hwna, NR_RX); i++) { 1260 hwna->rx_rings[i]->nm_notify = 1261 hwna->rx_rings[i]->save_notify; 1262 hwna->rx_rings[i]->save_notify = NULL; 1263 } 1264 hwna->na_lut.lut = NULL; 1265 hwna->na_lut.plut = NULL; 1266 hwna->na_lut.objtotal = 0; 1267 hwna->na_lut.objsize = 0; 1268 1269 /* pass ownership of the netmap rings to the hwna */ 1270 for_rx_tx(t) { 1271 for (i = 0; i < netmap_all_rings(na, t); i++) { 1272 NMR(na, t)[i]->ring = NULL; 1273 } 1274 } 1275 /* reset the number of host rings to default */ 1276 for_rx_tx(t) { 1277 nma_set_host_nrings(hwna, t, 1); 1278 } 1279 1280 } 1281 1282 return 0; 1283 } 1284 1285 /* nm_config callback for bwrap */ 1286 static int 1287 netmap_bwrap_config(struct netmap_adapter *na, struct nm_config_info *info) 1288 { 1289 struct netmap_bwrap_adapter *bna = 1290 (struct netmap_bwrap_adapter *)na; 1291 struct netmap_adapter *hwna = bna->hwna; 1292 int error; 1293 1294 /* Forward the request to the hwna. It may happen that nobody 1295 * registered hwna yet, so netmap_mem_get_lut() may have not 1296 * been called yet. */ 1297 error = netmap_mem_get_lut(hwna->nm_mem, &hwna->na_lut); 1298 if (error) 1299 return error; 1300 netmap_update_config(hwna); 1301 /* swap the results and propagate */ 1302 info->num_tx_rings = hwna->num_rx_rings; 1303 info->num_tx_descs = hwna->num_rx_desc; 1304 info->num_rx_rings = hwna->num_tx_rings; 1305 info->num_rx_descs = hwna->num_tx_desc; 1306 info->rx_buf_maxsize = hwna->rx_buf_maxsize; 1307 1308 return 0; 1309 } 1310 1311 1312 /* nm_krings_create callback for bwrap */ 1313 int 1314 netmap_bwrap_krings_create_common(struct netmap_adapter *na) 1315 { 1316 struct netmap_bwrap_adapter *bna = 1317 (struct netmap_bwrap_adapter *)na; 1318 struct netmap_adapter *hwna = bna->hwna; 1319 struct netmap_adapter *hostna = &bna->host.up; 1320 int i, error = 0; 1321 enum txrx t; 1322 1323 /* also create the hwna krings */ 1324 error = hwna->nm_krings_create(hwna); 1325 if (error) { 1326 return error; 1327 } 1328 1329 /* increment the usage counter for all the hwna krings */ 1330 for_rx_tx(t) { 1331 for (i = 0; i < netmap_all_rings(hwna, t); i++) { 1332 NMR(hwna, t)[i]->users++; 1333 } 1334 } 1335 1336 /* now create the actual rings */ 1337 error = netmap_mem_rings_create(hwna); 1338 if (error) { 1339 goto err_dec_users; 1340 } 1341 1342 /* cross-link the netmap rings 1343 * The original number of rings comes from hwna, 1344 * rx rings on one side equals tx rings on the other. 1345 */ 1346 for_rx_tx(t) { 1347 enum txrx r = nm_txrx_swap(t); /* swap NR_TX <-> NR_RX */ 1348 for (i = 0; i < netmap_all_rings(hwna, r); i++) { 1349 NMR(na, t)[i]->nkr_num_slots = NMR(hwna, r)[i]->nkr_num_slots; 1350 NMR(na, t)[i]->ring = NMR(hwna, r)[i]->ring; 1351 } 1352 } 1353 1354 if (na->na_flags & NAF_HOST_RINGS) { 1355 /* the hostna rings are the host rings of the bwrap. 1356 * The corresponding krings must point back to the 1357 * hostna 1358 */ 1359 hostna->tx_rings = &na->tx_rings[na->num_tx_rings]; 1360 hostna->rx_rings = &na->rx_rings[na->num_rx_rings]; 1361 for_rx_tx(t) { 1362 for (i = 0; i < nma_get_nrings(hostna, t); i++) { 1363 NMR(hostna, t)[i]->na = hostna; 1364 } 1365 } 1366 } 1367 1368 return 0; 1369 1370 err_dec_users: 1371 for_rx_tx(t) { 1372 for (i = 0; i < netmap_all_rings(hwna, t); i++) { 1373 NMR(hwna, t)[i]->users--; 1374 } 1375 } 1376 hwna->nm_krings_delete(hwna); 1377 return error; 1378 } 1379 1380 1381 void 1382 netmap_bwrap_krings_delete_common(struct netmap_adapter *na) 1383 { 1384 struct netmap_bwrap_adapter *bna = 1385 (struct netmap_bwrap_adapter *)na; 1386 struct netmap_adapter *hwna = bna->hwna; 1387 enum txrx t; 1388 int i; 1389 1390 ND("%s", na->name); 1391 1392 /* decrement the usage counter for all the hwna krings */ 1393 for_rx_tx(t) { 1394 for (i = 0; i < netmap_all_rings(hwna, t); i++) { 1395 NMR(hwna, t)[i]->users--; 1396 } 1397 } 1398 1399 /* delete any netmap rings that are no longer needed */ 1400 netmap_mem_rings_delete(hwna); 1401 hwna->nm_krings_delete(hwna); 1402 } 1403 1404 1405 /* notify method for the bridge-->hwna direction */ 1406 int 1407 netmap_bwrap_notify(struct netmap_kring *kring, int flags) 1408 { 1409 struct netmap_adapter *na = kring->na; 1410 struct netmap_bwrap_adapter *bna = na->na_private; 1411 struct netmap_adapter *hwna = bna->hwna; 1412 u_int ring_n = kring->ring_id; 1413 u_int lim = kring->nkr_num_slots - 1; 1414 struct netmap_kring *hw_kring; 1415 int error; 1416 1417 ND("%s: na %s hwna %s", 1418 (kring ? kring->name : "NULL!"), 1419 (na ? na->name : "NULL!"), 1420 (hwna ? hwna->name : "NULL!")); 1421 hw_kring = hwna->tx_rings[ring_n]; 1422 1423 if (nm_kr_tryget(hw_kring, 0, NULL)) { 1424 return ENXIO; 1425 } 1426 1427 /* first step: simulate a user wakeup on the rx ring */ 1428 netmap_vp_rxsync(kring, flags); 1429 ND("%s[%d] PRE rx(c%3d t%3d l%3d) ring(h%3d c%3d t%3d) tx(c%3d ht%3d t%3d)", 1430 na->name, ring_n, 1431 kring->nr_hwcur, kring->nr_hwtail, kring->nkr_hwlease, 1432 kring->rhead, kring->rcur, kring->rtail, 1433 hw_kring->nr_hwcur, hw_kring->nr_hwtail, hw_kring->rtail); 1434 /* second step: the new packets are sent on the tx ring 1435 * (which is actually the same ring) 1436 */ 1437 hw_kring->rhead = hw_kring->rcur = kring->nr_hwtail; 1438 error = hw_kring->nm_sync(hw_kring, flags); 1439 if (error) 1440 goto put_out; 1441 1442 /* third step: now we are back the rx ring */ 1443 /* claim ownership on all hw owned bufs */ 1444 kring->rhead = kring->rcur = nm_next(hw_kring->nr_hwtail, lim); /* skip past reserved slot */ 1445 1446 /* fourth step: the user goes to sleep again, causing another rxsync */ 1447 netmap_vp_rxsync(kring, flags); 1448 ND("%s[%d] PST rx(c%3d t%3d l%3d) ring(h%3d c%3d t%3d) tx(c%3d ht%3d t%3d)", 1449 na->name, ring_n, 1450 kring->nr_hwcur, kring->nr_hwtail, kring->nkr_hwlease, 1451 kring->rhead, kring->rcur, kring->rtail, 1452 hw_kring->nr_hwcur, hw_kring->nr_hwtail, hw_kring->rtail); 1453 put_out: 1454 nm_kr_put(hw_kring); 1455 1456 return error ? error : NM_IRQ_COMPLETED; 1457 } 1458 1459 1460 /* nm_bdg_ctl callback for the bwrap. 1461 * Called on bridge-attach and detach, as an effect of vale-ctl -[ahd]. 1462 * On attach, it needs to provide a fake netmap_priv_d structure and 1463 * perform a netmap_do_regif() on the bwrap. This will put both the 1464 * bwrap and the hwna in netmap mode, with the netmap rings shared 1465 * and cross linked. Moroever, it will start intercepting interrupts 1466 * directed to hwna. 1467 */ 1468 static int 1469 netmap_bwrap_bdg_ctl(struct nmreq_header *hdr, struct netmap_adapter *na) 1470 { 1471 struct netmap_priv_d *npriv; 1472 struct netmap_bwrap_adapter *bna = (struct netmap_bwrap_adapter*)na; 1473 int error = 0; 1474 1475 if (hdr->nr_reqtype == NETMAP_REQ_VALE_ATTACH) { 1476 struct nmreq_vale_attach *req = 1477 (struct nmreq_vale_attach *)(uintptr_t)hdr->nr_body; 1478 if (req->reg.nr_ringid != 0 || 1479 (req->reg.nr_mode != NR_REG_ALL_NIC && 1480 req->reg.nr_mode != NR_REG_NIC_SW)) { 1481 /* We only support attaching all the NIC rings 1482 * and/or the host stack. */ 1483 return EINVAL; 1484 } 1485 if (NETMAP_OWNED_BY_ANY(na)) { 1486 return EBUSY; 1487 } 1488 if (bna->na_kpriv) { 1489 /* nothing to do */ 1490 return 0; 1491 } 1492 npriv = netmap_priv_new(); 1493 if (npriv == NULL) 1494 return ENOMEM; 1495 npriv->np_ifp = na->ifp; /* let the priv destructor release the ref */ 1496 error = netmap_do_regif(npriv, na, req->reg.nr_mode, 1497 req->reg.nr_ringid, req->reg.nr_flags); 1498 if (error) { 1499 netmap_priv_delete(npriv); 1500 return error; 1501 } 1502 bna->na_kpriv = npriv; 1503 na->na_flags |= NAF_BUSY; 1504 } else { 1505 if (na->active_fds == 0) /* not registered */ 1506 return EINVAL; 1507 netmap_priv_delete(bna->na_kpriv); 1508 bna->na_kpriv = NULL; 1509 na->na_flags &= ~NAF_BUSY; 1510 } 1511 1512 return error; 1513 } 1514 1515 /* attach a bridge wrapper to the 'real' device */ 1516 int 1517 netmap_bwrap_attach_common(struct netmap_adapter *na, 1518 struct netmap_adapter *hwna) 1519 { 1520 struct netmap_bwrap_adapter *bna; 1521 struct netmap_adapter *hostna = NULL; 1522 int error = 0; 1523 enum txrx t; 1524 1525 /* make sure the NIC is not already in use */ 1526 if (NETMAP_OWNED_BY_ANY(hwna)) { 1527 nm_prerr("NIC %s busy, cannot attach to bridge", hwna->name); 1528 return EBUSY; 1529 } 1530 1531 bna = (struct netmap_bwrap_adapter *)na; 1532 /* make bwrap ifp point to the real ifp */ 1533 na->ifp = hwna->ifp; 1534 if_ref(na->ifp); 1535 na->na_private = bna; 1536 /* fill the ring data for the bwrap adapter with rx/tx meanings 1537 * swapped. The real cross-linking will be done during register, 1538 * when all the krings will have been created. 1539 */ 1540 for_rx_tx(t) { 1541 enum txrx r = nm_txrx_swap(t); /* swap NR_TX <-> NR_RX */ 1542 nma_set_nrings(na, t, nma_get_nrings(hwna, r)); 1543 nma_set_ndesc(na, t, nma_get_ndesc(hwna, r)); 1544 } 1545 na->nm_dtor = netmap_bwrap_dtor; 1546 na->nm_config = netmap_bwrap_config; 1547 na->nm_bdg_ctl = netmap_bwrap_bdg_ctl; 1548 na->pdev = hwna->pdev; 1549 na->nm_mem = netmap_mem_get(hwna->nm_mem); 1550 na->virt_hdr_len = hwna->virt_hdr_len; 1551 na->rx_buf_maxsize = hwna->rx_buf_maxsize; 1552 1553 bna->hwna = hwna; 1554 netmap_adapter_get(hwna); 1555 hwna->na_private = bna; /* weak reference */ 1556 bna->saved_na_vp = hwna->na_vp; 1557 hwna->na_vp = &bna->up; 1558 bna->up.up.na_vp = &(bna->up); 1559 1560 if (hwna->na_flags & NAF_HOST_RINGS) { 1561 if (hwna->na_flags & NAF_SW_ONLY) 1562 na->na_flags |= NAF_SW_ONLY; 1563 na->na_flags |= NAF_HOST_RINGS; 1564 hostna = &bna->host.up; 1565 1566 /* limit the number of host rings to that of hw */ 1567 nm_bound_var(&hostna->num_tx_rings, 1, 1, 1568 nma_get_nrings(hwna, NR_TX), NULL); 1569 nm_bound_var(&hostna->num_rx_rings, 1, 1, 1570 nma_get_nrings(hwna, NR_RX), NULL); 1571 1572 snprintf(hostna->name, sizeof(hostna->name), "%s^", na->name); 1573 hostna->ifp = hwna->ifp; 1574 for_rx_tx(t) { 1575 enum txrx r = nm_txrx_swap(t); 1576 u_int nr = nma_get_nrings(hostna, t); 1577 1578 nma_set_nrings(hostna, t, nr); 1579 nma_set_host_nrings(na, t, nr); 1580 if (nma_get_host_nrings(hwna, t) < nr) { 1581 nma_set_host_nrings(hwna, t, nr); 1582 } 1583 nma_set_ndesc(hostna, t, nma_get_ndesc(hwna, r)); 1584 } 1585 // hostna->nm_txsync = netmap_bwrap_host_txsync; 1586 // hostna->nm_rxsync = netmap_bwrap_host_rxsync; 1587 hostna->nm_mem = netmap_mem_get(na->nm_mem); 1588 hostna->na_private = bna; 1589 hostna->na_vp = &bna->up; 1590 na->na_hostvp = hwna->na_hostvp = 1591 hostna->na_hostvp = &bna->host; 1592 hostna->na_flags = NAF_BUSY; /* prevent NIOCREGIF */ 1593 hostna->rx_buf_maxsize = hwna->rx_buf_maxsize; 1594 } 1595 if (hwna->na_flags & NAF_MOREFRAG) 1596 na->na_flags |= NAF_MOREFRAG; 1597 1598 ND("%s<->%s txr %d txd %d rxr %d rxd %d", 1599 na->name, ifp->if_xname, 1600 na->num_tx_rings, na->num_tx_desc, 1601 na->num_rx_rings, na->num_rx_desc); 1602 1603 error = netmap_attach_common(na); 1604 if (error) { 1605 goto err_put; 1606 } 1607 hwna->na_flags |= NAF_BUSY; 1608 return 0; 1609 1610 err_put: 1611 hwna->na_vp = hwna->na_hostvp = NULL; 1612 netmap_adapter_put(hwna); 1613 return error; 1614 1615 } 1616 1617 struct nm_bridge * 1618 netmap_init_bridges2(u_int n) 1619 { 1620 int i; 1621 struct nm_bridge *b; 1622 1623 b = nm_os_malloc(sizeof(struct nm_bridge) * n); 1624 if (b == NULL) 1625 return NULL; 1626 for (i = 0; i < n; i++) 1627 BDG_RWINIT(&b[i]); 1628 return b; 1629 } 1630 1631 void 1632 netmap_uninit_bridges2(struct nm_bridge *b, u_int n) 1633 { 1634 int i; 1635 1636 if (b == NULL) 1637 return; 1638 1639 for (i = 0; i < n; i++) 1640 BDG_RWDESTROY(&b[i]); 1641 nm_os_free(b); 1642 } 1643 1644 int 1645 netmap_init_bridges(void) 1646 { 1647 #ifdef CONFIG_NET_NS 1648 return netmap_bns_register(); 1649 #else 1650 nm_bridges = netmap_init_bridges2(NM_BRIDGES); 1651 if (nm_bridges == NULL) 1652 return ENOMEM; 1653 return 0; 1654 #endif 1655 } 1656 1657 void 1658 netmap_uninit_bridges(void) 1659 { 1660 #ifdef CONFIG_NET_NS 1661 netmap_bns_unregister(); 1662 #else 1663 netmap_uninit_bridges2(nm_bridges, NM_BRIDGES); 1664 #endif 1665 } 1666