1 /* 2 * Copyright (C) 2013-2016 Universita` di Pisa 3 * All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * 2. Redistributions in binary form must reproduce the above copyright 11 * notice, this list of conditions and the following disclaimer in the 12 * documentation and/or other materials provided with the distribution. 13 * 14 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 15 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 17 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 22 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 23 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 24 * SUCH DAMAGE. 25 */ 26 27 28 /* 29 * This module implements the VALE switch for netmap 30 31 --- VALE SWITCH --- 32 33 NMG_LOCK() serializes all modifications to switches and ports. 34 A switch cannot be deleted until all ports are gone. 35 36 For each switch, an SX lock (RWlock on linux) protects 37 deletion of ports. When configuring or deleting a new port, the 38 lock is acquired in exclusive mode (after holding NMG_LOCK). 39 When forwarding, the lock is acquired in shared mode (without NMG_LOCK). 40 The lock is held throughout the entire forwarding cycle, 41 during which the thread may incur in a page fault. 42 Hence it is important that sleepable shared locks are used. 43 44 On the rx ring, the per-port lock is grabbed initially to reserve 45 a number of slot in the ring, then the lock is released, 46 packets are copied from source to destination, and then 47 the lock is acquired again and the receive ring is updated. 48 (A similar thing is done on the tx ring for NIC and host stack 49 ports attached to the switch) 50 51 */ 52 53 /* 54 * OS-specific code that is used only within this file. 55 * Other OS-specific code that must be accessed by drivers 56 * is present in netmap_kern.h 57 */ 58 59 #if defined(__FreeBSD__) 60 #include <sys/cdefs.h> /* prerequisite */ 61 __FBSDID("$FreeBSD$"); 62 63 #include <sys/types.h> 64 #include <sys/errno.h> 65 #include <sys/param.h> /* defines used in kernel.h */ 66 #include <sys/kernel.h> /* types used in module initialization */ 67 #include <sys/conf.h> /* cdevsw struct, UID, GID */ 68 #include <sys/sockio.h> 69 #include <sys/socketvar.h> /* struct socket */ 70 #include <sys/malloc.h> 71 #include <sys/poll.h> 72 #include <sys/rwlock.h> 73 #include <sys/socket.h> /* sockaddrs */ 74 #include <sys/selinfo.h> 75 #include <sys/sysctl.h> 76 #include <net/if.h> 77 #include <net/if_var.h> 78 #include <net/bpf.h> /* BIOCIMMEDIATE */ 79 #include <machine/bus.h> /* bus_dmamap_* */ 80 #include <sys/endian.h> 81 #include <sys/refcount.h> 82 #include <sys/smp.h> 83 84 85 #elif defined(linux) 86 87 #include "bsd_glue.h" 88 89 #elif defined(__APPLE__) 90 91 #warning OSX support is only partial 92 #include "osx_glue.h" 93 94 #elif defined(_WIN32) 95 #include "win_glue.h" 96 97 #else 98 99 #error Unsupported platform 100 101 #endif /* unsupported */ 102 103 /* 104 * common headers 105 */ 106 107 #include <net/netmap.h> 108 #include <dev/netmap/netmap_kern.h> 109 #include <dev/netmap/netmap_mem2.h> 110 111 #include <dev/netmap/netmap_bdg.h> 112 113 const char* 114 netmap_bdg_name(struct netmap_vp_adapter *vp) 115 { 116 struct nm_bridge *b = vp->na_bdg; 117 if (b == NULL) 118 return NULL; 119 return b->bdg_basename; 120 } 121 122 123 #ifndef CONFIG_NET_NS 124 /* 125 * XXX in principle nm_bridges could be created dynamically 126 * Right now we have a static array and deletions are protected 127 * by an exclusive lock. 128 */ 129 struct nm_bridge *nm_bridges; 130 #endif /* !CONFIG_NET_NS */ 131 132 133 static int 134 nm_is_id_char(const char c) 135 { 136 return (c >= 'a' && c <= 'z') || 137 (c >= 'A' && c <= 'Z') || 138 (c >= '0' && c <= '9') || 139 (c == '_'); 140 } 141 142 /* Validate the name of a bdg port and return the 143 * position of the ":" character. */ 144 static int 145 nm_bdg_name_validate(const char *name, size_t prefixlen) 146 { 147 int colon_pos = -1; 148 int i; 149 150 if (!name || strlen(name) < prefixlen) { 151 return -1; 152 } 153 154 for (i = 0; i < NM_BDG_IFNAMSIZ && name[i]; i++) { 155 if (name[i] == ':') { 156 colon_pos = i; 157 break; 158 } else if (!nm_is_id_char(name[i])) { 159 return -1; 160 } 161 } 162 163 if (strlen(name) - colon_pos > IFNAMSIZ) { 164 /* interface name too long */ 165 return -1; 166 } 167 168 return colon_pos; 169 } 170 171 /* 172 * locate a bridge among the existing ones. 173 * MUST BE CALLED WITH NMG_LOCK() 174 * 175 * a ':' in the name terminates the bridge name. Otherwise, just NM_NAME. 176 * We assume that this is called with a name of at least NM_NAME chars. 177 */ 178 struct nm_bridge * 179 nm_find_bridge(const char *name, int create, struct netmap_bdg_ops *ops) 180 { 181 int i, namelen; 182 struct nm_bridge *b = NULL, *bridges; 183 u_int num_bridges; 184 185 NMG_LOCK_ASSERT(); 186 187 netmap_bns_getbridges(&bridges, &num_bridges); 188 189 namelen = nm_bdg_name_validate(name, 190 (ops != NULL ? strlen(ops->name) : 0)); 191 if (namelen < 0) { 192 nm_prerr("invalid bridge name %s", name ? name : NULL); 193 return NULL; 194 } 195 196 /* lookup the name, remember empty slot if there is one */ 197 for (i = 0; i < num_bridges; i++) { 198 struct nm_bridge *x = bridges + i; 199 200 if ((x->bdg_flags & NM_BDG_ACTIVE) + x->bdg_active_ports == 0) { 201 if (create && b == NULL) 202 b = x; /* record empty slot */ 203 } else if (x->bdg_namelen != namelen) { 204 continue; 205 } else if (strncmp(name, x->bdg_basename, namelen) == 0) { 206 nm_prdis("found '%.*s' at %d", namelen, name, i); 207 b = x; 208 break; 209 } 210 } 211 if (i == num_bridges && b) { /* name not found, can create entry */ 212 /* initialize the bridge */ 213 nm_prdis("create new bridge %s with ports %d", b->bdg_basename, 214 b->bdg_active_ports); 215 b->ht = nm_os_malloc(sizeof(struct nm_hash_ent) * NM_BDG_HASH); 216 if (b->ht == NULL) { 217 nm_prerr("failed to allocate hash table"); 218 return NULL; 219 } 220 strncpy(b->bdg_basename, name, namelen); 221 b->bdg_namelen = namelen; 222 b->bdg_active_ports = 0; 223 for (i = 0; i < NM_BDG_MAXPORTS; i++) 224 b->bdg_port_index[i] = i; 225 /* set the default function */ 226 b->bdg_ops = b->bdg_saved_ops = *ops; 227 b->private_data = b->ht; 228 b->bdg_flags = 0; 229 NM_BNS_GET(b); 230 } 231 return b; 232 } 233 234 235 int 236 netmap_bdg_free(struct nm_bridge *b) 237 { 238 if ((b->bdg_flags & NM_BDG_ACTIVE) + b->bdg_active_ports != 0) { 239 return EBUSY; 240 } 241 242 nm_prdis("marking bridge %s as free", b->bdg_basename); 243 nm_os_free(b->ht); 244 memset(&b->bdg_ops, 0, sizeof(b->bdg_ops)); 245 memset(&b->bdg_saved_ops, 0, sizeof(b->bdg_saved_ops)); 246 b->bdg_flags = 0; 247 NM_BNS_PUT(b); 248 return 0; 249 } 250 251 /* Called by external kernel modules (e.g., Openvswitch). 252 * to modify the private data previously given to regops(). 253 * 'name' may be just bridge's name (including ':' if it 254 * is not just NM_BDG_NAME). 255 * Called without NMG_LOCK. 256 */ 257 int 258 netmap_bdg_update_private_data(const char *name, bdg_update_private_data_fn_t callback, 259 void *callback_data, void *auth_token) 260 { 261 void *private_data = NULL; 262 struct nm_bridge *b; 263 int error = 0; 264 265 NMG_LOCK(); 266 b = nm_find_bridge(name, 0 /* don't create */, NULL); 267 if (!b) { 268 error = EINVAL; 269 goto unlock_update_priv; 270 } 271 if (!nm_bdg_valid_auth_token(b, auth_token)) { 272 error = EACCES; 273 goto unlock_update_priv; 274 } 275 BDG_WLOCK(b); 276 private_data = callback(b->private_data, callback_data, &error); 277 b->private_data = private_data; 278 BDG_WUNLOCK(b); 279 280 unlock_update_priv: 281 NMG_UNLOCK(); 282 return error; 283 } 284 285 286 287 /* remove from bridge b the ports in slots hw and sw 288 * (sw can be -1 if not needed) 289 */ 290 void 291 netmap_bdg_detach_common(struct nm_bridge *b, int hw, int sw) 292 { 293 int s_hw = hw, s_sw = sw; 294 int i, lim =b->bdg_active_ports; 295 uint32_t *tmp = b->tmp_bdg_port_index; 296 297 /* 298 New algorithm: 299 make a copy of bdg_port_index; 300 lookup NA(ifp)->bdg_port and SWNA(ifp)->bdg_port 301 in the array of bdg_port_index, replacing them with 302 entries from the bottom of the array; 303 decrement bdg_active_ports; 304 acquire BDG_WLOCK() and copy back the array. 305 */ 306 307 if (netmap_debug & NM_DEBUG_BDG) 308 nm_prinf("detach %d and %d (lim %d)", hw, sw, lim); 309 /* make a copy of the list of active ports, update it, 310 * and then copy back within BDG_WLOCK(). 311 */ 312 memcpy(b->tmp_bdg_port_index, b->bdg_port_index, sizeof(b->tmp_bdg_port_index)); 313 for (i = 0; (hw >= 0 || sw >= 0) && i < lim; ) { 314 if (hw >= 0 && tmp[i] == hw) { 315 nm_prdis("detach hw %d at %d", hw, i); 316 lim--; /* point to last active port */ 317 tmp[i] = tmp[lim]; /* swap with i */ 318 tmp[lim] = hw; /* now this is inactive */ 319 hw = -1; 320 } else if (sw >= 0 && tmp[i] == sw) { 321 nm_prdis("detach sw %d at %d", sw, i); 322 lim--; 323 tmp[i] = tmp[lim]; 324 tmp[lim] = sw; 325 sw = -1; 326 } else { 327 i++; 328 } 329 } 330 if (hw >= 0 || sw >= 0) { 331 nm_prerr("delete failed hw %d sw %d, should panic...", hw, sw); 332 } 333 334 BDG_WLOCK(b); 335 if (b->bdg_ops.dtor) 336 b->bdg_ops.dtor(b->bdg_ports[s_hw]); 337 b->bdg_ports[s_hw] = NULL; 338 if (s_sw >= 0) { 339 b->bdg_ports[s_sw] = NULL; 340 } 341 memcpy(b->bdg_port_index, b->tmp_bdg_port_index, sizeof(b->tmp_bdg_port_index)); 342 b->bdg_active_ports = lim; 343 BDG_WUNLOCK(b); 344 345 nm_prdis("now %d active ports", lim); 346 netmap_bdg_free(b); 347 } 348 349 350 /* nm_bdg_ctl callback for VALE ports */ 351 int 352 netmap_vp_bdg_ctl(struct nmreq_header *hdr, struct netmap_adapter *na) 353 { 354 struct netmap_vp_adapter *vpna = (struct netmap_vp_adapter *)na; 355 struct nm_bridge *b = vpna->na_bdg; 356 357 if (hdr->nr_reqtype == NETMAP_REQ_VALE_ATTACH) { 358 return 0; /* nothing to do */ 359 } 360 if (b) { 361 netmap_set_all_rings(na, 0 /* disable */); 362 netmap_bdg_detach_common(b, vpna->bdg_port, -1); 363 vpna->na_bdg = NULL; 364 netmap_set_all_rings(na, 1 /* enable */); 365 } 366 /* I have took reference just for attach */ 367 netmap_adapter_put(na); 368 return 0; 369 } 370 371 int 372 netmap_default_bdg_attach(const char *name, struct netmap_adapter *na, 373 struct nm_bridge *b) 374 { 375 return NM_NEED_BWRAP; 376 } 377 378 /* Try to get a reference to a netmap adapter attached to a VALE switch. 379 * If the adapter is found (or is created), this function returns 0, a 380 * non NULL pointer is returned into *na, and the caller holds a 381 * reference to the adapter. 382 * If an adapter is not found, then no reference is grabbed and the 383 * function returns an error code, or 0 if there is just a VALE prefix 384 * mismatch. Therefore the caller holds a reference when 385 * (*na != NULL && return == 0). 386 */ 387 int 388 netmap_get_bdg_na(struct nmreq_header *hdr, struct netmap_adapter **na, 389 struct netmap_mem_d *nmd, int create, struct netmap_bdg_ops *ops) 390 { 391 char *nr_name = hdr->nr_name; 392 const char *ifname; 393 if_t ifp = NULL; 394 int error = 0; 395 struct netmap_vp_adapter *vpna, *hostna = NULL; 396 struct nm_bridge *b; 397 uint32_t i, j; 398 uint32_t cand = NM_BDG_NOPORT, cand2 = NM_BDG_NOPORT; 399 int needed; 400 401 *na = NULL; /* default return value */ 402 403 /* first try to see if this is a bridge port. */ 404 NMG_LOCK_ASSERT(); 405 if (strncmp(nr_name, ops->name, strlen(ops->name) - 1)) { 406 return 0; /* no error, but no VALE prefix */ 407 } 408 409 b = nm_find_bridge(nr_name, create, ops); 410 if (b == NULL) { 411 nm_prdis("no bridges available for '%s'", nr_name); 412 return (create ? ENOMEM : ENXIO); 413 } 414 if (strlen(nr_name) < b->bdg_namelen) /* impossible */ 415 panic("x"); 416 417 /* Now we are sure that name starts with the bridge's name, 418 * lookup the port in the bridge. We need to scan the entire 419 * list. It is not important to hold a WLOCK on the bridge 420 * during the search because NMG_LOCK already guarantees 421 * that there are no other possible writers. 422 */ 423 424 /* lookup in the local list of ports */ 425 for (j = 0; j < b->bdg_active_ports; j++) { 426 i = b->bdg_port_index[j]; 427 vpna = b->bdg_ports[i]; 428 nm_prdis("checking %s", vpna->up.name); 429 if (!strcmp(vpna->up.name, nr_name)) { 430 netmap_adapter_get(&vpna->up); 431 nm_prdis("found existing if %s refs %d", nr_name) 432 *na = &vpna->up; 433 return 0; 434 } 435 } 436 /* not found, should we create it? */ 437 if (!create) 438 return ENXIO; 439 /* yes we should, see if we have space to attach entries */ 440 needed = 2; /* in some cases we only need 1 */ 441 if (b->bdg_active_ports + needed >= NM_BDG_MAXPORTS) { 442 nm_prerr("bridge full %d, cannot create new port", b->bdg_active_ports); 443 return ENOMEM; 444 } 445 /* record the next two ports available, but do not allocate yet */ 446 cand = b->bdg_port_index[b->bdg_active_ports]; 447 cand2 = b->bdg_port_index[b->bdg_active_ports + 1]; 448 nm_prdis("+++ bridge %s port %s used %d avail %d %d", 449 b->bdg_basename, ifname, b->bdg_active_ports, cand, cand2); 450 451 /* 452 * try see if there is a matching NIC with this name 453 * (after the bridge's name) 454 */ 455 ifname = nr_name + b->bdg_namelen + 1; 456 ifp = ifunit_ref(ifname); 457 if (!ifp) { 458 /* Create an ephemeral virtual port. 459 * This block contains all the ephemeral-specific logic. 460 */ 461 462 if (hdr->nr_reqtype != NETMAP_REQ_REGISTER) { 463 error = EINVAL; 464 goto out; 465 } 466 467 /* bdg_netmap_attach creates a struct netmap_adapter */ 468 error = b->bdg_ops.vp_create(hdr, NULL, nmd, &vpna); 469 if (error) { 470 if (netmap_debug & NM_DEBUG_BDG) 471 nm_prerr("error %d", error); 472 goto out; 473 } 474 /* shortcut - we can skip get_hw_na(), 475 * ownership check and nm_bdg_attach() 476 */ 477 478 } else { 479 struct netmap_adapter *hw; 480 481 /* the vale:nic syntax is only valid for some commands */ 482 switch (hdr->nr_reqtype) { 483 case NETMAP_REQ_VALE_ATTACH: 484 case NETMAP_REQ_VALE_DETACH: 485 case NETMAP_REQ_VALE_POLLING_ENABLE: 486 case NETMAP_REQ_VALE_POLLING_DISABLE: 487 break; /* ok */ 488 default: 489 error = EINVAL; 490 goto out; 491 } 492 493 error = netmap_get_hw_na(ifp, nmd, &hw); 494 if (error || hw == NULL) 495 goto out; 496 497 /* host adapter might not be created */ 498 error = hw->nm_bdg_attach(nr_name, hw, b); 499 if (error == NM_NEED_BWRAP) { 500 error = b->bdg_ops.bwrap_attach(nr_name, hw); 501 } 502 if (error) 503 goto out; 504 vpna = hw->na_vp; 505 hostna = hw->na_hostvp; 506 if (hdr->nr_reqtype == NETMAP_REQ_VALE_ATTACH) { 507 /* Check if we need to skip the host rings. */ 508 struct nmreq_vale_attach *areq = 509 (struct nmreq_vale_attach *)(uintptr_t)hdr->nr_body; 510 if (areq->reg.nr_mode != NR_REG_NIC_SW) { 511 hostna = NULL; 512 } 513 } 514 } 515 516 BDG_WLOCK(b); 517 vpna->bdg_port = cand; 518 nm_prdis("NIC %p to bridge port %d", vpna, cand); 519 /* bind the port to the bridge (virtual ports are not active) */ 520 b->bdg_ports[cand] = vpna; 521 vpna->na_bdg = b; 522 b->bdg_active_ports++; 523 if (hostna != NULL) { 524 /* also bind the host stack to the bridge */ 525 b->bdg_ports[cand2] = hostna; 526 hostna->bdg_port = cand2; 527 hostna->na_bdg = b; 528 b->bdg_active_ports++; 529 nm_prdis("host %p to bridge port %d", hostna, cand2); 530 } 531 nm_prdis("if %s refs %d", ifname, vpna->up.na_refcount); 532 BDG_WUNLOCK(b); 533 *na = &vpna->up; 534 netmap_adapter_get(*na); 535 536 out: 537 if (ifp) 538 if_rele(ifp); 539 540 return error; 541 } 542 543 /* Process NETMAP_REQ_VALE_ATTACH. 544 */ 545 int 546 netmap_bdg_attach(struct nmreq_header *hdr, void *auth_token) 547 { 548 struct nmreq_vale_attach *req = 549 (struct nmreq_vale_attach *)(uintptr_t)hdr->nr_body; 550 struct netmap_vp_adapter * vpna; 551 struct netmap_adapter *na = NULL; 552 struct netmap_mem_d *nmd = NULL; 553 struct nm_bridge *b = NULL; 554 int error; 555 556 NMG_LOCK(); 557 /* permission check for modified bridges */ 558 b = nm_find_bridge(hdr->nr_name, 0 /* don't create */, NULL); 559 if (b && !nm_bdg_valid_auth_token(b, auth_token)) { 560 error = EACCES; 561 goto unlock_exit; 562 } 563 564 if (req->reg.nr_mem_id) { 565 nmd = netmap_mem_find(req->reg.nr_mem_id); 566 if (nmd == NULL) { 567 error = EINVAL; 568 goto unlock_exit; 569 } 570 } 571 572 /* check for existing one */ 573 error = netmap_get_vale_na(hdr, &na, nmd, 0); 574 if (na) { 575 error = EBUSY; 576 goto unref_exit; 577 } 578 error = netmap_get_vale_na(hdr, &na, 579 nmd, 1 /* create if not exists */); 580 if (error) { /* no device */ 581 goto unlock_exit; 582 } 583 584 if (na == NULL) { /* VALE prefix missing */ 585 error = EINVAL; 586 goto unlock_exit; 587 } 588 589 if (NETMAP_OWNED_BY_ANY(na)) { 590 error = EBUSY; 591 goto unref_exit; 592 } 593 594 if (na->nm_bdg_ctl) { 595 /* nop for VALE ports. The bwrap needs to put the hwna 596 * in netmap mode (see netmap_bwrap_bdg_ctl) 597 */ 598 error = na->nm_bdg_ctl(hdr, na); 599 if (error) 600 goto unref_exit; 601 nm_prdis("registered %s to netmap-mode", na->name); 602 } 603 vpna = (struct netmap_vp_adapter *)na; 604 req->port_index = vpna->bdg_port; 605 606 if (nmd) 607 netmap_mem_put(nmd); 608 609 NMG_UNLOCK(); 610 return 0; 611 612 unref_exit: 613 netmap_adapter_put(na); 614 unlock_exit: 615 if (nmd) 616 netmap_mem_put(nmd); 617 618 NMG_UNLOCK(); 619 return error; 620 } 621 622 623 int 624 nm_is_bwrap(struct netmap_adapter *na) 625 { 626 return na->nm_register == netmap_bwrap_reg; 627 } 628 629 /* Process NETMAP_REQ_VALE_DETACH. 630 */ 631 int 632 netmap_bdg_detach(struct nmreq_header *hdr, void *auth_token) 633 { 634 int error; 635 636 NMG_LOCK(); 637 error = netmap_bdg_detach_locked(hdr, auth_token); 638 NMG_UNLOCK(); 639 return error; 640 } 641 642 int 643 netmap_bdg_detach_locked(struct nmreq_header *hdr, void *auth_token) 644 { 645 struct nmreq_vale_detach *nmreq_det = (void *)(uintptr_t)hdr->nr_body; 646 struct netmap_vp_adapter *vpna; 647 struct netmap_adapter *na; 648 struct nm_bridge *b = NULL; 649 int error; 650 651 /* permission check for modified bridges */ 652 b = nm_find_bridge(hdr->nr_name, 0 /* don't create */, NULL); 653 if (b && !nm_bdg_valid_auth_token(b, auth_token)) { 654 error = EACCES; 655 goto error_exit; 656 } 657 658 error = netmap_get_vale_na(hdr, &na, NULL, 0 /* don't create */); 659 if (error) { /* no device, or another bridge or user owns the device */ 660 goto error_exit; 661 } 662 663 if (na == NULL) { /* VALE prefix missing */ 664 error = EINVAL; 665 goto error_exit; 666 } else if (nm_is_bwrap(na) && 667 ((struct netmap_bwrap_adapter *)na)->na_polling_state) { 668 /* Don't detach a NIC with polling */ 669 error = EBUSY; 670 goto unref_exit; 671 } 672 673 vpna = (struct netmap_vp_adapter *)na; 674 if (na->na_vp != vpna) { 675 /* trying to detach first attach of VALE persistent port attached 676 * to 2 bridges 677 */ 678 error = EBUSY; 679 goto unref_exit; 680 } 681 nmreq_det->port_index = vpna->bdg_port; 682 683 if (na->nm_bdg_ctl) { 684 /* remove the port from bridge. The bwrap 685 * also needs to put the hwna in normal mode 686 */ 687 error = na->nm_bdg_ctl(hdr, na); 688 } 689 690 unref_exit: 691 netmap_adapter_put(na); 692 error_exit: 693 return error; 694 695 } 696 697 698 struct nm_bdg_polling_state; 699 struct 700 nm_bdg_kthread { 701 struct nm_kctx *nmk; 702 u_int qfirst; 703 u_int qlast; 704 struct nm_bdg_polling_state *bps; 705 }; 706 707 struct nm_bdg_polling_state { 708 bool configured; 709 bool stopped; 710 struct netmap_bwrap_adapter *bna; 711 uint32_t mode; 712 u_int qfirst; 713 u_int qlast; 714 u_int cpu_from; 715 u_int ncpus; 716 struct nm_bdg_kthread *kthreads; 717 }; 718 719 static void 720 netmap_bwrap_polling(void *data) 721 { 722 struct nm_bdg_kthread *nbk = data; 723 struct netmap_bwrap_adapter *bna; 724 u_int qfirst, qlast, i; 725 struct netmap_kring **kring0, *kring; 726 727 if (!nbk) 728 return; 729 qfirst = nbk->qfirst; 730 qlast = nbk->qlast; 731 bna = nbk->bps->bna; 732 kring0 = NMR(bna->hwna, NR_RX); 733 734 for (i = qfirst; i < qlast; i++) { 735 kring = kring0[i]; 736 kring->nm_notify(kring, 0); 737 } 738 } 739 740 static int 741 nm_bdg_create_kthreads(struct nm_bdg_polling_state *bps) 742 { 743 struct nm_kctx_cfg kcfg; 744 int i, j; 745 746 bps->kthreads = nm_os_malloc(sizeof(struct nm_bdg_kthread) * bps->ncpus); 747 if (bps->kthreads == NULL) 748 return ENOMEM; 749 750 bzero(&kcfg, sizeof(kcfg)); 751 kcfg.worker_fn = netmap_bwrap_polling; 752 for (i = 0; i < bps->ncpus; i++) { 753 struct nm_bdg_kthread *t = bps->kthreads + i; 754 int all = (bps->ncpus == 1 && 755 bps->mode == NETMAP_POLLING_MODE_SINGLE_CPU); 756 int affinity = bps->cpu_from + i; 757 758 t->bps = bps; 759 t->qfirst = all ? bps->qfirst /* must be 0 */: affinity; 760 t->qlast = all ? bps->qlast : t->qfirst + 1; 761 if (netmap_verbose) 762 nm_prinf("kthread %d a:%u qf:%u ql:%u", i, affinity, t->qfirst, 763 t->qlast); 764 765 kcfg.type = i; 766 kcfg.worker_private = t; 767 t->nmk = nm_os_kctx_create(&kcfg, NULL); 768 if (t->nmk == NULL) { 769 goto cleanup; 770 } 771 nm_os_kctx_worker_setaff(t->nmk, affinity); 772 } 773 return 0; 774 775 cleanup: 776 for (j = 0; j < i; j++) { 777 struct nm_bdg_kthread *t = bps->kthreads + i; 778 nm_os_kctx_destroy(t->nmk); 779 } 780 nm_os_free(bps->kthreads); 781 return EFAULT; 782 } 783 784 /* A variant of ptnetmap_start_kthreads() */ 785 static int 786 nm_bdg_polling_start_kthreads(struct nm_bdg_polling_state *bps) 787 { 788 int error, i, j; 789 790 if (!bps) { 791 nm_prerr("polling is not configured"); 792 return EFAULT; 793 } 794 bps->stopped = false; 795 796 for (i = 0; i < bps->ncpus; i++) { 797 struct nm_bdg_kthread *t = bps->kthreads + i; 798 error = nm_os_kctx_worker_start(t->nmk); 799 if (error) { 800 nm_prerr("error in nm_kthread_start(): %d", error); 801 goto cleanup; 802 } 803 } 804 return 0; 805 806 cleanup: 807 for (j = 0; j < i; j++) { 808 struct nm_bdg_kthread *t = bps->kthreads + i; 809 nm_os_kctx_worker_stop(t->nmk); 810 } 811 bps->stopped = true; 812 return error; 813 } 814 815 static void 816 nm_bdg_polling_stop_delete_kthreads(struct nm_bdg_polling_state *bps) 817 { 818 int i; 819 820 if (!bps) 821 return; 822 823 for (i = 0; i < bps->ncpus; i++) { 824 struct nm_bdg_kthread *t = bps->kthreads + i; 825 nm_os_kctx_worker_stop(t->nmk); 826 nm_os_kctx_destroy(t->nmk); 827 } 828 bps->stopped = true; 829 } 830 831 static int 832 get_polling_cfg(struct nmreq_vale_polling *req, struct netmap_adapter *na, 833 struct nm_bdg_polling_state *bps) 834 { 835 unsigned int avail_cpus, core_from; 836 unsigned int qfirst, qlast; 837 uint32_t i = req->nr_first_cpu_id; 838 uint32_t req_cpus = req->nr_num_polling_cpus; 839 840 avail_cpus = nm_os_ncpus(); 841 842 if (req_cpus == 0) { 843 nm_prerr("req_cpus must be > 0"); 844 return EINVAL; 845 } else if (req_cpus >= avail_cpus) { 846 nm_prerr("Cannot use all the CPUs in the system"); 847 return EINVAL; 848 } 849 850 if (req->nr_mode == NETMAP_POLLING_MODE_MULTI_CPU) { 851 /* Use a separate core for each ring. If nr_num_polling_cpus>1 852 * more consecutive rings are polled. 853 * For example, if nr_first_cpu_id=2 and nr_num_polling_cpus=2, 854 * ring 2 and 3 are polled by core 2 and 3, respectively. */ 855 if (i + req_cpus > nma_get_nrings(na, NR_RX)) { 856 nm_prerr("Rings %u-%u not in range (have %d rings)", 857 i, i + req_cpus, nma_get_nrings(na, NR_RX)); 858 return EINVAL; 859 } 860 qfirst = i; 861 qlast = qfirst + req_cpus; 862 core_from = qfirst; 863 864 } else if (req->nr_mode == NETMAP_POLLING_MODE_SINGLE_CPU) { 865 /* Poll all the rings using a core specified by nr_first_cpu_id. 866 * the number of cores must be 1. */ 867 if (req_cpus != 1) { 868 nm_prerr("ncpus must be 1 for NETMAP_POLLING_MODE_SINGLE_CPU " 869 "(was %d)", req_cpus); 870 return EINVAL; 871 } 872 qfirst = 0; 873 qlast = nma_get_nrings(na, NR_RX); 874 core_from = i; 875 } else { 876 nm_prerr("Invalid polling mode"); 877 return EINVAL; 878 } 879 880 bps->mode = req->nr_mode; 881 bps->qfirst = qfirst; 882 bps->qlast = qlast; 883 bps->cpu_from = core_from; 884 bps->ncpus = req_cpus; 885 nm_prinf("%s qfirst %u qlast %u cpu_from %u ncpus %u", 886 req->nr_mode == NETMAP_POLLING_MODE_MULTI_CPU ? 887 "MULTI" : "SINGLE", 888 qfirst, qlast, core_from, req_cpus); 889 return 0; 890 } 891 892 static int 893 nm_bdg_ctl_polling_start(struct nmreq_vale_polling *req, struct netmap_adapter *na) 894 { 895 struct nm_bdg_polling_state *bps; 896 struct netmap_bwrap_adapter *bna; 897 int error; 898 899 bna = (struct netmap_bwrap_adapter *)na; 900 if (bna->na_polling_state) { 901 nm_prerr("ERROR adapter already in polling mode"); 902 return EFAULT; 903 } 904 905 bps = nm_os_malloc(sizeof(*bps)); 906 if (!bps) 907 return ENOMEM; 908 bps->configured = false; 909 bps->stopped = true; 910 911 if (get_polling_cfg(req, na, bps)) { 912 nm_os_free(bps); 913 return EINVAL; 914 } 915 916 if (nm_bdg_create_kthreads(bps)) { 917 nm_os_free(bps); 918 return EFAULT; 919 } 920 921 bps->configured = true; 922 bna->na_polling_state = bps; 923 bps->bna = bna; 924 925 /* disable interrupts if possible */ 926 nma_intr_enable(bna->hwna, 0); 927 /* start kthread now */ 928 error = nm_bdg_polling_start_kthreads(bps); 929 if (error) { 930 nm_prerr("ERROR nm_bdg_polling_start_kthread()"); 931 nm_os_free(bps->kthreads); 932 nm_os_free(bps); 933 bna->na_polling_state = NULL; 934 nma_intr_enable(bna->hwna, 1); 935 } 936 return error; 937 } 938 939 static int 940 nm_bdg_ctl_polling_stop(struct netmap_adapter *na) 941 { 942 struct netmap_bwrap_adapter *bna = (struct netmap_bwrap_adapter *)na; 943 struct nm_bdg_polling_state *bps; 944 945 if (!bna->na_polling_state) { 946 nm_prerr("ERROR adapter is not in polling mode"); 947 return EFAULT; 948 } 949 bps = bna->na_polling_state; 950 nm_bdg_polling_stop_delete_kthreads(bna->na_polling_state); 951 bps->configured = false; 952 nm_os_free(bps); 953 bna->na_polling_state = NULL; 954 /* re-enable interrupts */ 955 nma_intr_enable(bna->hwna, 1); 956 return 0; 957 } 958 959 int 960 nm_bdg_polling(struct nmreq_header *hdr) 961 { 962 struct nmreq_vale_polling *req = 963 (struct nmreq_vale_polling *)(uintptr_t)hdr->nr_body; 964 struct netmap_adapter *na = NULL; 965 int error = 0; 966 967 NMG_LOCK(); 968 error = netmap_get_vale_na(hdr, &na, NULL, /*create=*/0); 969 if (na && !error) { 970 if (!nm_is_bwrap(na)) { 971 error = EOPNOTSUPP; 972 } else if (hdr->nr_reqtype == NETMAP_BDG_POLLING_ON) { 973 error = nm_bdg_ctl_polling_start(req, na); 974 if (!error) 975 netmap_adapter_get(na); 976 } else { 977 error = nm_bdg_ctl_polling_stop(na); 978 if (!error) 979 netmap_adapter_put(na); 980 } 981 netmap_adapter_put(na); 982 } else if (!na && !error) { 983 /* Not VALE port. */ 984 error = EINVAL; 985 } 986 NMG_UNLOCK(); 987 988 return error; 989 } 990 991 /* Called by external kernel modules (e.g., Openvswitch). 992 * to set configure/lookup/dtor functions of a VALE instance. 993 * Register callbacks to the given bridge. 'name' may be just 994 * bridge's name (including ':' if it is not just NM_BDG_NAME). 995 * 996 * Called without NMG_LOCK. 997 */ 998 999 int 1000 netmap_bdg_regops(const char *name, struct netmap_bdg_ops *bdg_ops, void *private_data, void *auth_token) 1001 { 1002 struct nm_bridge *b; 1003 int error = 0; 1004 1005 NMG_LOCK(); 1006 b = nm_find_bridge(name, 0 /* don't create */, NULL); 1007 if (!b) { 1008 error = ENXIO; 1009 goto unlock_regops; 1010 } 1011 if (!nm_bdg_valid_auth_token(b, auth_token)) { 1012 error = EACCES; 1013 goto unlock_regops; 1014 } 1015 1016 BDG_WLOCK(b); 1017 if (!bdg_ops) { 1018 /* resetting the bridge */ 1019 bzero(b->ht, sizeof(struct nm_hash_ent) * NM_BDG_HASH); 1020 b->bdg_ops = b->bdg_saved_ops; 1021 b->private_data = b->ht; 1022 } else { 1023 /* modifying the bridge */ 1024 b->private_data = private_data; 1025 #define nm_bdg_override(m) if (bdg_ops->m) b->bdg_ops.m = bdg_ops->m 1026 nm_bdg_override(lookup); 1027 nm_bdg_override(config); 1028 nm_bdg_override(dtor); 1029 nm_bdg_override(vp_create); 1030 nm_bdg_override(bwrap_attach); 1031 #undef nm_bdg_override 1032 1033 } 1034 BDG_WUNLOCK(b); 1035 1036 unlock_regops: 1037 NMG_UNLOCK(); 1038 return error; 1039 } 1040 1041 1042 int 1043 netmap_bdg_config(struct nm_ifreq *nr) 1044 { 1045 struct nm_bridge *b; 1046 int error = EINVAL; 1047 1048 NMG_LOCK(); 1049 b = nm_find_bridge(nr->nifr_name, 0, NULL); 1050 if (!b) { 1051 NMG_UNLOCK(); 1052 return error; 1053 } 1054 NMG_UNLOCK(); 1055 /* Don't call config() with NMG_LOCK() held */ 1056 BDG_RLOCK(b); 1057 if (b->bdg_ops.config != NULL) 1058 error = b->bdg_ops.config(nr); 1059 BDG_RUNLOCK(b); 1060 return error; 1061 } 1062 1063 1064 /* nm_register callback for VALE ports */ 1065 int 1066 netmap_vp_reg(struct netmap_adapter *na, int onoff) 1067 { 1068 struct netmap_vp_adapter *vpna = 1069 (struct netmap_vp_adapter*)na; 1070 1071 /* persistent ports may be put in netmap mode 1072 * before being attached to a bridge 1073 */ 1074 if (vpna->na_bdg) 1075 BDG_WLOCK(vpna->na_bdg); 1076 if (onoff) { 1077 netmap_krings_mode_commit(na, onoff); 1078 if (na->active_fds == 0) 1079 na->na_flags |= NAF_NETMAP_ON; 1080 /* XXX on FreeBSD, persistent VALE ports should also 1081 * toggle IFCAP_NETMAP in na->ifp (2014-03-16) 1082 */ 1083 } else { 1084 if (na->active_fds == 0) 1085 na->na_flags &= ~NAF_NETMAP_ON; 1086 netmap_krings_mode_commit(na, onoff); 1087 } 1088 if (vpna->na_bdg) 1089 BDG_WUNLOCK(vpna->na_bdg); 1090 return 0; 1091 } 1092 1093 1094 /* rxsync code used by VALE ports nm_rxsync callback and also 1095 * internally by the brwap 1096 */ 1097 static int 1098 netmap_vp_rxsync_locked(struct netmap_kring *kring, int flags) 1099 { 1100 struct netmap_adapter *na = kring->na; 1101 struct netmap_ring *ring = kring->ring; 1102 u_int nm_i, lim = kring->nkr_num_slots - 1; 1103 u_int head = kring->rhead; 1104 int n; 1105 1106 if (head > lim) { 1107 nm_prerr("ouch dangerous reset!!!"); 1108 n = netmap_ring_reinit(kring); 1109 goto done; 1110 } 1111 1112 /* First part, import newly received packets. */ 1113 /* actually nothing to do here, they are already in the kring */ 1114 1115 /* Second part, skip past packets that userspace has released. */ 1116 nm_i = kring->nr_hwcur; 1117 if (nm_i != head) { 1118 /* consistency check, but nothing really important here */ 1119 for (n = 0; likely(nm_i != head); n++) { 1120 struct netmap_slot *slot = &ring->slot[nm_i]; 1121 void *addr = NMB(na, slot); 1122 1123 if (addr == NETMAP_BUF_BASE(kring->na)) { /* bad buf */ 1124 nm_prerr("bad buffer index %d, ignore ?", 1125 slot->buf_idx); 1126 } 1127 slot->flags &= ~NS_BUF_CHANGED; 1128 nm_i = nm_next(nm_i, lim); 1129 } 1130 kring->nr_hwcur = head; 1131 } 1132 1133 n = 0; 1134 done: 1135 return n; 1136 } 1137 1138 /* 1139 * nm_rxsync callback for VALE ports 1140 * user process reading from a VALE switch. 1141 * Already protected against concurrent calls from userspace, 1142 * but we must acquire the queue's lock to protect against 1143 * writers on the same queue. 1144 */ 1145 int 1146 netmap_vp_rxsync(struct netmap_kring *kring, int flags) 1147 { 1148 int n; 1149 1150 mtx_lock(&kring->q_lock); 1151 n = netmap_vp_rxsync_locked(kring, flags); 1152 mtx_unlock(&kring->q_lock); 1153 return n; 1154 } 1155 1156 int 1157 netmap_bwrap_attach(const char *nr_name, struct netmap_adapter *hwna, 1158 struct netmap_bdg_ops *ops) 1159 { 1160 return ops->bwrap_attach(nr_name, hwna); 1161 } 1162 1163 1164 /* Bridge wrapper code (bwrap). 1165 * This is used to connect a non-VALE-port netmap_adapter (hwna) to a 1166 * VALE switch. 1167 * The main task is to swap the meaning of tx and rx rings to match the 1168 * expectations of the VALE switch code (see nm_bdg_flush). 1169 * 1170 * The bwrap works by interposing a netmap_bwrap_adapter between the 1171 * rest of the system and the hwna. The netmap_bwrap_adapter looks like 1172 * a netmap_vp_adapter to the rest the system, but, internally, it 1173 * translates all callbacks to what the hwna expects. 1174 * 1175 * Note that we have to intercept callbacks coming from two sides: 1176 * 1177 * - callbacks coming from the netmap module are intercepted by 1178 * passing around the netmap_bwrap_adapter instead of the hwna 1179 * 1180 * - callbacks coming from outside of the netmap module only know 1181 * about the hwna. This, however, only happens in interrupt 1182 * handlers, where only the hwna->nm_notify callback is called. 1183 * What the bwrap does is to overwrite the hwna->nm_notify callback 1184 * with its own netmap_bwrap_intr_notify. 1185 * XXX This assumes that the hwna->nm_notify callback was the 1186 * standard netmap_notify(), as it is the case for nic adapters. 1187 * Any additional action performed by hwna->nm_notify will not be 1188 * performed by netmap_bwrap_intr_notify. 1189 * 1190 * Additionally, the bwrap can optionally attach the host rings pair 1191 * of the wrapped adapter to a different port of the switch. 1192 */ 1193 1194 1195 static void 1196 netmap_bwrap_dtor(struct netmap_adapter *na) 1197 { 1198 struct netmap_bwrap_adapter *bna = (struct netmap_bwrap_adapter*)na; 1199 struct netmap_adapter *hwna = bna->hwna; 1200 struct nm_bridge *b = bna->up.na_bdg, 1201 *bh = bna->host.na_bdg; 1202 1203 if (bna->host.up.nm_mem) 1204 netmap_mem_put(bna->host.up.nm_mem); 1205 1206 if (b) { 1207 netmap_bdg_detach_common(b, bna->up.bdg_port, 1208 (bh ? bna->host.bdg_port : -1)); 1209 } 1210 1211 nm_prdis("na %p", na); 1212 na->ifp = NULL; 1213 bna->host.up.ifp = NULL; 1214 hwna->na_vp = bna->saved_na_vp; 1215 hwna->na_hostvp = NULL; 1216 hwna->na_private = NULL; 1217 hwna->na_flags &= ~NAF_BUSY; 1218 netmap_adapter_put(hwna); 1219 1220 } 1221 1222 1223 /* 1224 * Intr callback for NICs connected to a bridge. 1225 * Simply ignore tx interrupts (maybe we could try to recover space ?) 1226 * and pass received packets from nic to the bridge. 1227 * 1228 * XXX TODO check locking: this is called from the interrupt 1229 * handler so we should make sure that the interface is not 1230 * disconnected while passing down an interrupt. 1231 * 1232 * Note, no user process can access this NIC or the host stack. 1233 * The only part of the ring that is significant are the slots, 1234 * and head/cur/tail are set from the kring as needed 1235 * (part as a receive ring, part as a transmit ring). 1236 * 1237 * callback that overwrites the hwna notify callback. 1238 * Packets come from the outside or from the host stack and are put on an 1239 * hwna rx ring. 1240 * The bridge wrapper then sends the packets through the bridge. 1241 */ 1242 int 1243 netmap_bwrap_intr_notify(struct netmap_kring *kring, int flags) 1244 { 1245 struct netmap_adapter *na = kring->na; 1246 struct netmap_bwrap_adapter *bna = na->na_private; 1247 struct netmap_kring *bkring; 1248 struct netmap_vp_adapter *vpna = &bna->up; 1249 u_int ring_nr = kring->ring_id; 1250 int ret = NM_IRQ_COMPLETED; 1251 int error; 1252 1253 if (netmap_debug & NM_DEBUG_RXINTR) 1254 nm_prinf("%s %s 0x%x", na->name, kring->name, flags); 1255 1256 bkring = vpna->up.tx_rings[ring_nr]; 1257 1258 /* make sure the ring is not disabled */ 1259 if (nm_kr_tryget(kring, 0 /* can't sleep */, NULL)) { 1260 return EIO; 1261 } 1262 1263 if (netmap_debug & NM_DEBUG_RXINTR) 1264 nm_prinf("%s head %d cur %d tail %d", na->name, 1265 kring->rhead, kring->rcur, kring->rtail); 1266 1267 /* simulate a user wakeup on the rx ring 1268 * fetch packets that have arrived. 1269 */ 1270 error = kring->nm_sync(kring, 0); 1271 if (error) 1272 goto put_out; 1273 if (kring->nr_hwcur == kring->nr_hwtail) { 1274 if (netmap_verbose) 1275 nm_prlim(1, "interrupt with no packets on %s", 1276 kring->name); 1277 goto put_out; 1278 } 1279 1280 /* new packets are kring->rcur to kring->nr_hwtail, and the bkring 1281 * had hwcur == bkring->rhead. So advance bkring->rhead to kring->nr_hwtail 1282 * to push all packets out. 1283 */ 1284 bkring->rhead = bkring->rcur = kring->nr_hwtail; 1285 1286 bkring->nm_sync(bkring, flags); 1287 1288 /* mark all buffers as released on this ring */ 1289 kring->rhead = kring->rcur = kring->rtail = kring->nr_hwtail; 1290 /* another call to actually release the buffers */ 1291 error = kring->nm_sync(kring, 0); 1292 1293 /* The second rxsync may have further advanced hwtail. If this happens, 1294 * return NM_IRQ_RESCHED, otherwise just return NM_IRQ_COMPLETED. */ 1295 if (kring->rcur != kring->nr_hwtail) { 1296 ret = NM_IRQ_RESCHED; 1297 } 1298 put_out: 1299 nm_kr_put(kring); 1300 1301 return error ? error : ret; 1302 } 1303 1304 1305 /* nm_register callback for bwrap */ 1306 int 1307 netmap_bwrap_reg(struct netmap_adapter *na, int onoff) 1308 { 1309 struct netmap_bwrap_adapter *bna = 1310 (struct netmap_bwrap_adapter *)na; 1311 struct netmap_adapter *hwna = bna->hwna; 1312 struct netmap_vp_adapter *hostna = &bna->host; 1313 int error, i; 1314 enum txrx t; 1315 1316 nm_prdis("%s %s", na->name, onoff ? "on" : "off"); 1317 1318 if (onoff) { 1319 /* netmap_do_regif has been called on the bwrap na. 1320 * We need to pass the information about the 1321 * memory allocator down to the hwna before 1322 * putting it in netmap mode 1323 */ 1324 hwna->na_lut = na->na_lut; 1325 1326 if (hostna->na_bdg) { 1327 /* if the host rings have been attached to switch, 1328 * we need to copy the memory allocator information 1329 * in the hostna also 1330 */ 1331 hostna->up.na_lut = na->na_lut; 1332 } 1333 1334 } 1335 1336 /* pass down the pending ring state information */ 1337 for_rx_tx(t) { 1338 for (i = 0; i < netmap_all_rings(na, t); i++) { 1339 NMR(hwna, nm_txrx_swap(t))[i]->nr_pending_mode = 1340 NMR(na, t)[i]->nr_pending_mode; 1341 } 1342 } 1343 1344 /* forward the request to the hwna */ 1345 error = hwna->nm_register(hwna, onoff); 1346 if (error) 1347 return error; 1348 1349 /* copy up the current ring state information */ 1350 for_rx_tx(t) { 1351 for (i = 0; i < netmap_all_rings(na, t); i++) { 1352 struct netmap_kring *kring = NMR(hwna, nm_txrx_swap(t))[i]; 1353 NMR(na, t)[i]->nr_mode = kring->nr_mode; 1354 } 1355 } 1356 1357 /* impersonate a netmap_vp_adapter */ 1358 netmap_vp_reg(na, onoff); 1359 if (hostna->na_bdg) 1360 netmap_vp_reg(&hostna->up, onoff); 1361 1362 if (onoff) { 1363 u_int i; 1364 /* intercept the hwna nm_nofify callback on the hw rings */ 1365 for (i = 0; i < hwna->num_rx_rings; i++) { 1366 hwna->rx_rings[i]->save_notify = hwna->rx_rings[i]->nm_notify; 1367 hwna->rx_rings[i]->nm_notify = bna->nm_intr_notify; 1368 } 1369 i = hwna->num_rx_rings; /* for safety */ 1370 /* save the host ring notify unconditionally */ 1371 for (; i < netmap_real_rings(hwna, NR_RX); i++) { 1372 hwna->rx_rings[i]->save_notify = 1373 hwna->rx_rings[i]->nm_notify; 1374 if (hostna->na_bdg) { 1375 /* also intercept the host ring notify */ 1376 hwna->rx_rings[i]->nm_notify = 1377 netmap_bwrap_intr_notify; 1378 na->tx_rings[i]->nm_sync = na->nm_txsync; 1379 } 1380 } 1381 if (na->active_fds == 0) 1382 na->na_flags |= NAF_NETMAP_ON; 1383 } else { 1384 u_int i; 1385 1386 if (na->active_fds == 0) 1387 na->na_flags &= ~NAF_NETMAP_ON; 1388 1389 /* reset all notify callbacks (including host ring) */ 1390 for (i = 0; i < netmap_all_rings(hwna, NR_RX); i++) { 1391 hwna->rx_rings[i]->nm_notify = 1392 hwna->rx_rings[i]->save_notify; 1393 hwna->rx_rings[i]->save_notify = NULL; 1394 } 1395 hwna->na_lut.lut = NULL; 1396 hwna->na_lut.plut = NULL; 1397 hwna->na_lut.objtotal = 0; 1398 hwna->na_lut.objsize = 0; 1399 1400 /* reset the number of host rings to default */ 1401 for_rx_tx(t) { 1402 nma_set_host_nrings(hwna, t, 1); 1403 } 1404 1405 } 1406 1407 return 0; 1408 } 1409 1410 /* nm_config callback for bwrap */ 1411 static int 1412 netmap_bwrap_config(struct netmap_adapter *na, struct nm_config_info *info) 1413 { 1414 struct netmap_bwrap_adapter *bna = 1415 (struct netmap_bwrap_adapter *)na; 1416 struct netmap_adapter *hwna = bna->hwna; 1417 int error; 1418 1419 /* cache the lut in the embedded host adapter */ 1420 error = netmap_mem_get_lut(hwna->nm_mem, &bna->host.up.na_lut); 1421 if (error) 1422 return error; 1423 1424 /* Forward the request to the hwna. It may happen that nobody 1425 * registered hwna yet, so netmap_mem_get_lut() may have not 1426 * been called yet. */ 1427 error = netmap_mem_get_lut(hwna->nm_mem, &hwna->na_lut); 1428 if (error) 1429 return error; 1430 netmap_update_config(hwna); 1431 /* swap the results and propagate */ 1432 info->num_tx_rings = hwna->num_rx_rings; 1433 info->num_tx_descs = hwna->num_rx_desc; 1434 info->num_rx_rings = hwna->num_tx_rings; 1435 info->num_rx_descs = hwna->num_tx_desc; 1436 info->rx_buf_maxsize = hwna->rx_buf_maxsize; 1437 1438 if (na->na_flags & NAF_HOST_RINGS) { 1439 struct netmap_adapter *hostna = &bna->host.up; 1440 enum txrx t; 1441 1442 /* limit the number of host rings to that of hw */ 1443 if (na->na_flags & NAF_HOST_ALL) { 1444 hostna->num_tx_rings = nma_get_nrings(hwna, NR_RX); 1445 hostna->num_rx_rings = nma_get_nrings(hwna, NR_TX); 1446 } else { 1447 nm_bound_var(&hostna->num_tx_rings, 1, 1, 1448 nma_get_nrings(hwna, NR_TX), NULL); 1449 nm_bound_var(&hostna->num_rx_rings, 1, 1, 1450 nma_get_nrings(hwna, NR_RX), NULL); 1451 } 1452 for_rx_tx(t) { 1453 enum txrx r = nm_txrx_swap(t); 1454 u_int nr = nma_get_nrings(hostna, t); 1455 1456 nma_set_host_nrings(na, t, nr); 1457 if (nma_get_host_nrings(hwna, t) < nr) { 1458 nma_set_host_nrings(hwna, t, nr); 1459 } 1460 nma_set_ndesc(hostna, t, nma_get_ndesc(hwna, r)); 1461 } 1462 } 1463 1464 return 0; 1465 } 1466 1467 /* nm_bufcfg callback for bwrap */ 1468 static int 1469 netmap_bwrap_bufcfg(struct netmap_kring *kring, uint64_t target) 1470 { 1471 struct netmap_adapter *na = kring->na; 1472 struct netmap_bwrap_adapter *bna = 1473 (struct netmap_bwrap_adapter *)na; 1474 struct netmap_adapter *hwna = bna->hwna; 1475 struct netmap_kring *hwkring; 1476 enum txrx r; 1477 int error; 1478 1479 /* we need the hw kring that corresponds to the bwrap one: 1480 * remember that rx and tx are swapped 1481 */ 1482 r = nm_txrx_swap(kring->tx); 1483 hwkring = NMR(hwna, r)[kring->ring_id]; 1484 1485 /* copy down the offset information, forward the request 1486 * and copy up the results 1487 */ 1488 hwkring->offset_mask = kring->offset_mask; 1489 hwkring->offset_max = kring->offset_max; 1490 hwkring->offset_gap = kring->offset_gap; 1491 1492 error = hwkring->nm_bufcfg(hwkring, target); 1493 if (error) 1494 return error; 1495 1496 kring->hwbuf_len = hwkring->hwbuf_len; 1497 kring->buf_align = hwkring->buf_align; 1498 1499 return 0; 1500 } 1501 1502 /* nm_krings_create callback for bwrap */ 1503 int 1504 netmap_bwrap_krings_create_common(struct netmap_adapter *na) 1505 { 1506 struct netmap_bwrap_adapter *bna = 1507 (struct netmap_bwrap_adapter *)na; 1508 struct netmap_adapter *hwna = bna->hwna; 1509 struct netmap_adapter *hostna = &bna->host.up; 1510 int i, error = 0; 1511 enum txrx t; 1512 1513 /* also create the hwna krings */ 1514 error = hwna->nm_krings_create(hwna); 1515 if (error) { 1516 return error; 1517 } 1518 1519 /* increment the usage counter for all the hwna krings */ 1520 for_rx_tx(t) { 1521 for (i = 0; i < netmap_all_rings(hwna, t); i++) { 1522 NMR(hwna, t)[i]->users++; 1523 /* this to prevent deletion of the rings through 1524 * our krings, instead of through the hwna ones */ 1525 NMR(na, t)[i]->nr_kflags |= NKR_NEEDRING; 1526 } 1527 } 1528 1529 /* now create the actual rings */ 1530 error = netmap_mem_rings_create(hwna); 1531 if (error) { 1532 goto err_dec_users; 1533 } 1534 1535 /* cross-link the netmap rings 1536 * The original number of rings comes from hwna, 1537 * rx rings on one side equals tx rings on the other. 1538 */ 1539 for_rx_tx(t) { 1540 enum txrx r = nm_txrx_swap(t); /* swap NR_TX <-> NR_RX */ 1541 for (i = 0; i < netmap_all_rings(hwna, r); i++) { 1542 NMR(na, t)[i]->nkr_num_slots = NMR(hwna, r)[i]->nkr_num_slots; 1543 NMR(na, t)[i]->ring = NMR(hwna, r)[i]->ring; 1544 } 1545 } 1546 1547 if (na->na_flags & NAF_HOST_RINGS) { 1548 /* the hostna rings are the host rings of the bwrap. 1549 * The corresponding krings must point back to the 1550 * hostna 1551 */ 1552 hostna->tx_rings = &na->tx_rings[na->num_tx_rings]; 1553 hostna->rx_rings = &na->rx_rings[na->num_rx_rings]; 1554 for_rx_tx(t) { 1555 for (i = 0; i < nma_get_nrings(hostna, t); i++) { 1556 NMR(hostna, t)[i]->na = hostna; 1557 } 1558 } 1559 } 1560 1561 return 0; 1562 1563 err_dec_users: 1564 for_rx_tx(t) { 1565 for (i = 0; i < netmap_all_rings(hwna, t); i++) { 1566 NMR(hwna, t)[i]->users--; 1567 NMR(na, t)[i]->users--; 1568 } 1569 } 1570 hwna->nm_krings_delete(hwna); 1571 return error; 1572 } 1573 1574 1575 void 1576 netmap_bwrap_krings_delete_common(struct netmap_adapter *na) 1577 { 1578 struct netmap_bwrap_adapter *bna = 1579 (struct netmap_bwrap_adapter *)na; 1580 struct netmap_adapter *hwna = bna->hwna; 1581 enum txrx t; 1582 int i; 1583 1584 nm_prdis("%s", na->name); 1585 1586 /* decrement the usage counter for all the hwna krings */ 1587 for_rx_tx(t) { 1588 for (i = 0; i < netmap_all_rings(hwna, t); i++) { 1589 NMR(hwna, t)[i]->users--; 1590 NMR(na, t)[i]->users--; 1591 } 1592 } 1593 1594 /* delete any netmap rings that are no longer needed */ 1595 netmap_mem_rings_delete(hwna); 1596 hwna->nm_krings_delete(hwna); 1597 } 1598 1599 1600 /* notify method for the bridge-->hwna direction */ 1601 int 1602 netmap_bwrap_notify(struct netmap_kring *kring, int flags) 1603 { 1604 struct netmap_adapter *na = kring->na; 1605 struct netmap_bwrap_adapter *bna = na->na_private; 1606 struct netmap_adapter *hwna = bna->hwna; 1607 u_int ring_n = kring->ring_id; 1608 u_int lim = kring->nkr_num_slots - 1; 1609 struct netmap_kring *hw_kring; 1610 int error; 1611 1612 nm_prdis("%s: na %s hwna %s", 1613 (kring ? kring->name : "NULL!"), 1614 (na ? na->name : "NULL!"), 1615 (hwna ? hwna->name : "NULL!")); 1616 hw_kring = hwna->tx_rings[ring_n]; 1617 1618 if (nm_kr_tryget(hw_kring, 0, NULL)) { 1619 return ENXIO; 1620 } 1621 1622 /* first step: simulate a user wakeup on the rx ring */ 1623 netmap_vp_rxsync(kring, flags); 1624 nm_prdis("%s[%d] PRE rx(c%3d t%3d l%3d) ring(h%3d c%3d t%3d) tx(c%3d ht%3d t%3d)", 1625 na->name, ring_n, 1626 kring->nr_hwcur, kring->nr_hwtail, kring->nkr_hwlease, 1627 kring->rhead, kring->rcur, kring->rtail, 1628 hw_kring->nr_hwcur, hw_kring->nr_hwtail, hw_kring->rtail); 1629 /* second step: the new packets are sent on the tx ring 1630 * (which is actually the same ring) 1631 */ 1632 hw_kring->rhead = hw_kring->rcur = kring->nr_hwtail; 1633 error = hw_kring->nm_sync(hw_kring, flags); 1634 if (error) 1635 goto put_out; 1636 1637 /* third step: now we are back the rx ring */ 1638 /* claim ownership on all hw owned bufs */ 1639 kring->rhead = kring->rcur = nm_next(hw_kring->nr_hwtail, lim); /* skip past reserved slot */ 1640 1641 /* fourth step: the user goes to sleep again, causing another rxsync */ 1642 netmap_vp_rxsync(kring, flags); 1643 nm_prdis("%s[%d] PST rx(c%3d t%3d l%3d) ring(h%3d c%3d t%3d) tx(c%3d ht%3d t%3d)", 1644 na->name, ring_n, 1645 kring->nr_hwcur, kring->nr_hwtail, kring->nkr_hwlease, 1646 kring->rhead, kring->rcur, kring->rtail, 1647 hw_kring->nr_hwcur, hw_kring->nr_hwtail, hw_kring->rtail); 1648 put_out: 1649 nm_kr_put(hw_kring); 1650 1651 return error ? error : NM_IRQ_COMPLETED; 1652 } 1653 1654 1655 /* nm_bdg_ctl callback for the bwrap. 1656 * Called on bridge-attach and detach, as an effect of valectl -[ahd]. 1657 * On attach, it needs to provide a fake netmap_priv_d structure and 1658 * perform a netmap_do_regif() on the bwrap. This will put both the 1659 * bwrap and the hwna in netmap mode, with the netmap rings shared 1660 * and cross linked. Moroever, it will start intercepting interrupts 1661 * directed to hwna. 1662 */ 1663 static int 1664 netmap_bwrap_bdg_ctl(struct nmreq_header *hdr, struct netmap_adapter *na) 1665 { 1666 struct netmap_priv_d *npriv; 1667 struct netmap_bwrap_adapter *bna = (struct netmap_bwrap_adapter*)na; 1668 int error = 0; 1669 1670 if (hdr->nr_reqtype == NETMAP_REQ_VALE_ATTACH) { 1671 struct nmreq_vale_attach *req = 1672 (struct nmreq_vale_attach *)(uintptr_t)hdr->nr_body; 1673 if (req->reg.nr_ringid != 0 || 1674 (req->reg.nr_mode != NR_REG_ALL_NIC && 1675 req->reg.nr_mode != NR_REG_NIC_SW)) { 1676 /* We only support attaching all the NIC rings 1677 * and/or the host stack. */ 1678 return EINVAL; 1679 } 1680 if (NETMAP_OWNED_BY_ANY(na)) { 1681 return EBUSY; 1682 } 1683 if (bna->na_kpriv) { 1684 /* nothing to do */ 1685 return 0; 1686 } 1687 npriv = netmap_priv_new(); 1688 if (npriv == NULL) 1689 return ENOMEM; 1690 npriv->np_ifp = na->ifp; /* let the priv destructor release the ref */ 1691 error = netmap_do_regif(npriv, na, hdr); 1692 if (error) { 1693 netmap_priv_delete(npriv); 1694 netmap_mem_restore(bna->hwna); 1695 return error; 1696 } 1697 bna->na_kpriv = npriv; 1698 na->na_flags |= NAF_BUSY; 1699 } else { 1700 if (na->active_fds == 0) /* not registered */ 1701 return EINVAL; 1702 netmap_priv_delete(bna->na_kpriv); 1703 bna->na_kpriv = NULL; 1704 na->na_flags &= ~NAF_BUSY; 1705 netmap_mem_restore(bna->hwna); 1706 } 1707 1708 return error; 1709 } 1710 1711 /* attach a bridge wrapper to the 'real' device */ 1712 int 1713 netmap_bwrap_attach_common(struct netmap_adapter *na, 1714 struct netmap_adapter *hwna) 1715 { 1716 struct netmap_bwrap_adapter *bna; 1717 struct netmap_adapter *hostna = NULL; 1718 int error = 0; 1719 enum txrx t; 1720 1721 /* make sure the NIC is not already in use */ 1722 if (NETMAP_OWNED_BY_ANY(hwna)) { 1723 nm_prerr("NIC %s busy, cannot attach to bridge", hwna->name); 1724 return EBUSY; 1725 } 1726 1727 bna = (struct netmap_bwrap_adapter *)na; 1728 /* make bwrap ifp point to the real ifp */ 1729 na->ifp = hwna->ifp; 1730 if_ref(na->ifp); 1731 na->na_private = bna; 1732 /* fill the ring data for the bwrap adapter with rx/tx meanings 1733 * swapped. The real cross-linking will be done during register, 1734 * when all the krings will have been created. 1735 */ 1736 for_rx_tx(t) { 1737 enum txrx r = nm_txrx_swap(t); /* swap NR_TX <-> NR_RX */ 1738 nma_set_nrings(na, t, nma_get_nrings(hwna, r)); 1739 nma_set_ndesc(na, t, nma_get_ndesc(hwna, r)); 1740 } 1741 na->nm_dtor = netmap_bwrap_dtor; 1742 na->nm_config = netmap_bwrap_config; 1743 na->nm_bufcfg = netmap_bwrap_bufcfg; 1744 na->nm_bdg_ctl = netmap_bwrap_bdg_ctl; 1745 na->pdev = hwna->pdev; 1746 na->nm_mem = netmap_mem_get(hwna->nm_mem); 1747 na->virt_hdr_len = hwna->virt_hdr_len; 1748 na->rx_buf_maxsize = hwna->rx_buf_maxsize; 1749 1750 bna->hwna = hwna; 1751 netmap_adapter_get(hwna); 1752 hwna->na_private = bna; /* weak reference */ 1753 bna->saved_na_vp = hwna->na_vp; 1754 hwna->na_vp = &bna->up; 1755 bna->up.up.na_vp = &(bna->up); 1756 1757 if (hwna->na_flags & NAF_HOST_RINGS) { 1758 if (hwna->na_flags & NAF_SW_ONLY) 1759 na->na_flags |= NAF_SW_ONLY; 1760 na->na_flags |= NAF_HOST_RINGS; 1761 hostna = &bna->host.up; 1762 1763 snprintf(hostna->name, sizeof(hostna->name), "%s^", na->name); 1764 hostna->ifp = hwna->ifp; 1765 // hostna->nm_txsync = netmap_bwrap_host_txsync; 1766 // hostna->nm_rxsync = netmap_bwrap_host_rxsync; 1767 hostna->nm_mem = netmap_mem_get(na->nm_mem); 1768 hostna->na_private = bna; 1769 hostna->na_vp = &bna->up; 1770 na->na_hostvp = hwna->na_hostvp = 1771 hostna->na_hostvp = &bna->host; 1772 hostna->na_flags = NAF_BUSY; /* prevent NIOCREGIF */ 1773 hostna->rx_buf_maxsize = hwna->rx_buf_maxsize; 1774 /* bwrap_config() will determine the number of host rings */ 1775 } 1776 if (hwna->na_flags & NAF_MOREFRAG) 1777 na->na_flags |= NAF_MOREFRAG; 1778 1779 nm_prdis("%s<->%s txr %d txd %d rxr %d rxd %d", 1780 na->name, if_name(ifp), 1781 na->num_tx_rings, na->num_tx_desc, 1782 na->num_rx_rings, na->num_rx_desc); 1783 1784 error = netmap_attach_common(na); 1785 if (error) { 1786 goto err_put; 1787 } 1788 hwna->na_flags |= NAF_BUSY; 1789 return 0; 1790 1791 err_put: 1792 hwna->na_vp = hwna->na_hostvp = NULL; 1793 netmap_adapter_put(hwna); 1794 return error; 1795 1796 } 1797 1798 struct nm_bridge * 1799 netmap_init_bridges2(u_int n) 1800 { 1801 int i; 1802 struct nm_bridge *b; 1803 1804 b = nm_os_malloc(sizeof(struct nm_bridge) * n); 1805 if (b == NULL) 1806 return NULL; 1807 for (i = 0; i < n; i++) 1808 BDG_RWINIT(&b[i]); 1809 return b; 1810 } 1811 1812 void 1813 netmap_uninit_bridges2(struct nm_bridge *b, u_int n) 1814 { 1815 int i; 1816 1817 if (b == NULL) 1818 return; 1819 1820 for (i = 0; i < n; i++) 1821 BDG_RWDESTROY(&b[i]); 1822 nm_os_free(b); 1823 } 1824 1825 int 1826 netmap_init_bridges(void) 1827 { 1828 #ifdef CONFIG_NET_NS 1829 return netmap_bns_register(); 1830 #else 1831 nm_bridges = netmap_init_bridges2(vale_max_bridges); 1832 if (nm_bridges == NULL) 1833 return ENOMEM; 1834 return 0; 1835 #endif 1836 } 1837 1838 void 1839 netmap_uninit_bridges(void) 1840 { 1841 #ifdef CONFIG_NET_NS 1842 netmap_bns_unregister(); 1843 #else 1844 netmap_uninit_bridges2(nm_bridges, vale_max_bridges); 1845 #endif 1846 } 1847