1 /* 2 * Copyright (C) 2013-2016 Universita` di Pisa 3 * All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * 2. Redistributions in binary form must reproduce the above copyright 11 * notice, this list of conditions and the following disclaimer in the 12 * documentation and/or other materials provided with the distribution. 13 * 14 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 15 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 17 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 22 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 23 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 24 * SUCH DAMAGE. 25 */ 26 27 28 /* 29 * This module implements the VALE switch for netmap 30 31 --- VALE SWITCH --- 32 33 NMG_LOCK() serializes all modifications to switches and ports. 34 A switch cannot be deleted until all ports are gone. 35 36 For each switch, an SX lock (RWlock on linux) protects 37 deletion of ports. When configuring or deleting a new port, the 38 lock is acquired in exclusive mode (after holding NMG_LOCK). 39 When forwarding, the lock is acquired in shared mode (without NMG_LOCK). 40 The lock is held throughout the entire forwarding cycle, 41 during which the thread may incur in a page fault. 42 Hence it is important that sleepable shared locks are used. 43 44 On the rx ring, the per-port lock is grabbed initially to reserve 45 a number of slot in the ring, then the lock is released, 46 packets are copied from source to destination, and then 47 the lock is acquired again and the receive ring is updated. 48 (A similar thing is done on the tx ring for NIC and host stack 49 ports attached to the switch) 50 51 */ 52 53 /* 54 * OS-specific code that is used only within this file. 55 * Other OS-specific code that must be accessed by drivers 56 * is present in netmap_kern.h 57 */ 58 59 #if defined(__FreeBSD__) 60 #include <sys/cdefs.h> /* prerequisite */ 61 #include <sys/types.h> 62 #include <sys/errno.h> 63 #include <sys/param.h> /* defines used in kernel.h */ 64 #include <sys/kernel.h> /* types used in module initialization */ 65 #include <sys/conf.h> /* cdevsw struct, UID, GID */ 66 #include <sys/sockio.h> 67 #include <sys/socketvar.h> /* struct socket */ 68 #include <sys/malloc.h> 69 #include <sys/poll.h> 70 #include <sys/rwlock.h> 71 #include <sys/socket.h> /* sockaddrs */ 72 #include <sys/selinfo.h> 73 #include <sys/sysctl.h> 74 #include <net/if.h> 75 #include <net/if_var.h> 76 #include <net/bpf.h> /* BIOCIMMEDIATE */ 77 #include <machine/bus.h> /* bus_dmamap_* */ 78 #include <sys/endian.h> 79 #include <sys/refcount.h> 80 #include <sys/smp.h> 81 82 83 #elif defined(linux) 84 85 #include "bsd_glue.h" 86 87 #elif defined(__APPLE__) 88 89 #warning OSX support is only partial 90 #include "osx_glue.h" 91 92 #elif defined(_WIN32) 93 #include "win_glue.h" 94 95 #else 96 97 #error Unsupported platform 98 99 #endif /* unsupported */ 100 101 /* 102 * common headers 103 */ 104 105 #include <net/netmap.h> 106 #include <dev/netmap/netmap_kern.h> 107 #include <dev/netmap/netmap_mem2.h> 108 109 #include <dev/netmap/netmap_bdg.h> 110 111 const char* 112 netmap_bdg_name(struct netmap_vp_adapter *vp) 113 { 114 struct nm_bridge *b = vp->na_bdg; 115 if (b == NULL) 116 return NULL; 117 return b->bdg_basename; 118 } 119 120 121 #ifndef CONFIG_NET_NS 122 /* 123 * XXX in principle nm_bridges could be created dynamically 124 * Right now we have a static array and deletions are protected 125 * by an exclusive lock. 126 */ 127 struct nm_bridge *nm_bridges; 128 #endif /* !CONFIG_NET_NS */ 129 130 131 static int 132 nm_is_id_char(const char c) 133 { 134 return (c >= 'a' && c <= 'z') || 135 (c >= 'A' && c <= 'Z') || 136 (c >= '0' && c <= '9') || 137 (c == '_'); 138 } 139 140 /* Validate the name of a bdg port and return the 141 * position of the ":" character. */ 142 static int 143 nm_bdg_name_validate(const char *name, size_t prefixlen) 144 { 145 int colon_pos = -1; 146 int i; 147 148 if (!name || strlen(name) < prefixlen) { 149 return -1; 150 } 151 152 for (i = 0; i < NM_BDG_IFNAMSIZ && name[i]; i++) { 153 if (name[i] == ':') { 154 colon_pos = i; 155 break; 156 } else if (!nm_is_id_char(name[i])) { 157 return -1; 158 } 159 } 160 161 if (strlen(name) - colon_pos > IFNAMSIZ) { 162 /* interface name too long */ 163 return -1; 164 } 165 166 return colon_pos; 167 } 168 169 /* 170 * locate a bridge among the existing ones. 171 * MUST BE CALLED WITH NMG_LOCK() 172 * 173 * a ':' in the name terminates the bridge name. Otherwise, just NM_NAME. 174 * We assume that this is called with a name of at least NM_NAME chars. 175 */ 176 struct nm_bridge * 177 nm_find_bridge(const char *name, int create, struct netmap_bdg_ops *ops) 178 { 179 int i, namelen; 180 struct nm_bridge *b = NULL, *bridges; 181 u_int num_bridges; 182 183 NMG_LOCK_ASSERT(); 184 185 netmap_bns_getbridges(&bridges, &num_bridges); 186 187 namelen = nm_bdg_name_validate(name, 188 (ops != NULL ? strlen(ops->name) : 0)); 189 if (namelen < 0) { 190 nm_prerr("invalid bridge name %s", name ? name : NULL); 191 return NULL; 192 } 193 194 /* lookup the name, remember empty slot if there is one */ 195 for (i = 0; i < num_bridges; i++) { 196 struct nm_bridge *x = bridges + i; 197 198 if ((x->bdg_flags & NM_BDG_ACTIVE) + x->bdg_active_ports == 0) { 199 if (create && b == NULL) 200 b = x; /* record empty slot */ 201 } else if (x->bdg_namelen != namelen) { 202 continue; 203 } else if (strncmp(name, x->bdg_basename, namelen) == 0) { 204 nm_prdis("found '%.*s' at %d", namelen, name, i); 205 b = x; 206 break; 207 } 208 } 209 if (i == num_bridges && b) { /* name not found, can create entry */ 210 /* initialize the bridge */ 211 nm_prdis("create new bridge %s with ports %d", b->bdg_basename, 212 b->bdg_active_ports); 213 b->ht = nm_os_malloc(sizeof(struct nm_hash_ent) * NM_BDG_HASH); 214 if (b->ht == NULL) { 215 nm_prerr("failed to allocate hash table"); 216 return NULL; 217 } 218 strncpy(b->bdg_basename, name, namelen); 219 b->bdg_namelen = namelen; 220 b->bdg_active_ports = 0; 221 for (i = 0; i < NM_BDG_MAXPORTS; i++) 222 b->bdg_port_index[i] = i; 223 /* set the default function */ 224 b->bdg_ops = b->bdg_saved_ops = *ops; 225 b->private_data = b->ht; 226 b->bdg_flags = 0; 227 NM_BNS_GET(b); 228 } 229 return b; 230 } 231 232 233 int 234 netmap_bdg_free(struct nm_bridge *b) 235 { 236 if ((b->bdg_flags & NM_BDG_ACTIVE) + b->bdg_active_ports != 0) { 237 return EBUSY; 238 } 239 240 nm_prdis("marking bridge %s as free", b->bdg_basename); 241 nm_os_free(b->ht); 242 memset(&b->bdg_ops, 0, sizeof(b->bdg_ops)); 243 memset(&b->bdg_saved_ops, 0, sizeof(b->bdg_saved_ops)); 244 b->bdg_flags = 0; 245 NM_BNS_PUT(b); 246 return 0; 247 } 248 249 /* Called by external kernel modules (e.g., Openvswitch). 250 * to modify the private data previously given to regops(). 251 * 'name' may be just bridge's name (including ':' if it 252 * is not just NM_BDG_NAME). 253 * Called without NMG_LOCK. 254 */ 255 int 256 netmap_bdg_update_private_data(const char *name, bdg_update_private_data_fn_t callback, 257 void *callback_data, void *auth_token) 258 { 259 void *private_data = NULL; 260 struct nm_bridge *b; 261 int error = 0; 262 263 NMG_LOCK(); 264 b = nm_find_bridge(name, 0 /* don't create */, NULL); 265 if (!b) { 266 error = EINVAL; 267 goto unlock_update_priv; 268 } 269 if (!nm_bdg_valid_auth_token(b, auth_token)) { 270 error = EACCES; 271 goto unlock_update_priv; 272 } 273 BDG_WLOCK(b); 274 private_data = callback(b->private_data, callback_data, &error); 275 b->private_data = private_data; 276 BDG_WUNLOCK(b); 277 278 unlock_update_priv: 279 NMG_UNLOCK(); 280 return error; 281 } 282 283 284 285 /* remove from bridge b the ports in slots hw and sw 286 * (sw can be -1 if not needed) 287 */ 288 void 289 netmap_bdg_detach_common(struct nm_bridge *b, int hw, int sw) 290 { 291 int s_hw = hw, s_sw = sw; 292 int i, lim =b->bdg_active_ports; 293 uint32_t *tmp = b->tmp_bdg_port_index; 294 295 /* 296 New algorithm: 297 make a copy of bdg_port_index; 298 lookup NA(ifp)->bdg_port and SWNA(ifp)->bdg_port 299 in the array of bdg_port_index, replacing them with 300 entries from the bottom of the array; 301 decrement bdg_active_ports; 302 acquire BDG_WLOCK() and copy back the array. 303 */ 304 305 if (netmap_debug & NM_DEBUG_BDG) 306 nm_prinf("detach %d and %d (lim %d)", hw, sw, lim); 307 /* make a copy of the list of active ports, update it, 308 * and then copy back within BDG_WLOCK(). 309 */ 310 memcpy(b->tmp_bdg_port_index, b->bdg_port_index, sizeof(b->tmp_bdg_port_index)); 311 for (i = 0; (hw >= 0 || sw >= 0) && i < lim; ) { 312 if (hw >= 0 && tmp[i] == hw) { 313 nm_prdis("detach hw %d at %d", hw, i); 314 lim--; /* point to last active port */ 315 tmp[i] = tmp[lim]; /* swap with i */ 316 tmp[lim] = hw; /* now this is inactive */ 317 hw = -1; 318 } else if (sw >= 0 && tmp[i] == sw) { 319 nm_prdis("detach sw %d at %d", sw, i); 320 lim--; 321 tmp[i] = tmp[lim]; 322 tmp[lim] = sw; 323 sw = -1; 324 } else { 325 i++; 326 } 327 } 328 if (hw >= 0 || sw >= 0) { 329 nm_prerr("delete failed hw %d sw %d, should panic...", hw, sw); 330 } 331 332 BDG_WLOCK(b); 333 if (b->bdg_ops.dtor) 334 b->bdg_ops.dtor(b->bdg_ports[s_hw]); 335 b->bdg_ports[s_hw] = NULL; 336 if (s_sw >= 0) { 337 b->bdg_ports[s_sw] = NULL; 338 } 339 memcpy(b->bdg_port_index, b->tmp_bdg_port_index, sizeof(b->tmp_bdg_port_index)); 340 b->bdg_active_ports = lim; 341 BDG_WUNLOCK(b); 342 343 nm_prdis("now %d active ports", lim); 344 netmap_bdg_free(b); 345 } 346 347 348 /* nm_bdg_ctl callback for VALE ports */ 349 int 350 netmap_vp_bdg_ctl(struct nmreq_header *hdr, struct netmap_adapter *na) 351 { 352 struct netmap_vp_adapter *vpna = (struct netmap_vp_adapter *)na; 353 struct nm_bridge *b = vpna->na_bdg; 354 355 if (hdr->nr_reqtype == NETMAP_REQ_VALE_ATTACH) { 356 return 0; /* nothing to do */ 357 } 358 if (b) { 359 netmap_set_all_rings(na, 0 /* disable */); 360 netmap_bdg_detach_common(b, vpna->bdg_port, -1); 361 vpna->na_bdg = NULL; 362 netmap_set_all_rings(na, 1 /* enable */); 363 } 364 /* I have took reference just for attach */ 365 netmap_adapter_put(na); 366 return 0; 367 } 368 369 int 370 netmap_default_bdg_attach(const char *name, struct netmap_adapter *na, 371 struct nm_bridge *b) 372 { 373 return NM_NEED_BWRAP; 374 } 375 376 /* Try to get a reference to a netmap adapter attached to a VALE switch. 377 * If the adapter is found (or is created), this function returns 0, a 378 * non NULL pointer is returned into *na, and the caller holds a 379 * reference to the adapter. 380 * If an adapter is not found, then no reference is grabbed and the 381 * function returns an error code, or 0 if there is just a VALE prefix 382 * mismatch. Therefore the caller holds a reference when 383 * (*na != NULL && return == 0). 384 */ 385 int 386 netmap_get_bdg_na(struct nmreq_header *hdr, struct netmap_adapter **na, 387 struct netmap_mem_d *nmd, int create, struct netmap_bdg_ops *ops) 388 { 389 char *nr_name = hdr->nr_name; 390 const char *ifname; 391 if_t ifp = NULL; 392 int error = 0; 393 struct netmap_vp_adapter *vpna, *hostna = NULL; 394 struct nm_bridge *b; 395 uint32_t i, j; 396 uint32_t cand = NM_BDG_NOPORT, cand2 = NM_BDG_NOPORT; 397 int needed; 398 399 *na = NULL; /* default return value */ 400 401 /* first try to see if this is a bridge port. */ 402 NMG_LOCK_ASSERT(); 403 if (strncmp(nr_name, ops->name, strlen(ops->name) - 1)) { 404 return 0; /* no error, but no VALE prefix */ 405 } 406 407 b = nm_find_bridge(nr_name, create, ops); 408 if (b == NULL) { 409 nm_prdis("no bridges available for '%s'", nr_name); 410 return (create ? ENOMEM : ENXIO); 411 } 412 if (strlen(nr_name) < b->bdg_namelen) /* impossible */ 413 panic("x"); 414 415 /* Now we are sure that name starts with the bridge's name, 416 * lookup the port in the bridge. We need to scan the entire 417 * list. It is not important to hold a WLOCK on the bridge 418 * during the search because NMG_LOCK already guarantees 419 * that there are no other possible writers. 420 */ 421 422 /* lookup in the local list of ports */ 423 for (j = 0; j < b->bdg_active_ports; j++) { 424 i = b->bdg_port_index[j]; 425 vpna = b->bdg_ports[i]; 426 nm_prdis("checking %s", vpna->up.name); 427 if (!strcmp(vpna->up.name, nr_name)) { 428 netmap_adapter_get(&vpna->up); 429 nm_prdis("found existing if %s refs %d", nr_name) 430 *na = &vpna->up; 431 return 0; 432 } 433 } 434 /* not found, should we create it? */ 435 if (!create) 436 return ENXIO; 437 /* yes we should, see if we have space to attach entries */ 438 needed = 2; /* in some cases we only need 1 */ 439 if (b->bdg_active_ports + needed >= NM_BDG_MAXPORTS) { 440 nm_prerr("bridge full %d, cannot create new port", b->bdg_active_ports); 441 return ENOMEM; 442 } 443 /* record the next two ports available, but do not allocate yet */ 444 cand = b->bdg_port_index[b->bdg_active_ports]; 445 cand2 = b->bdg_port_index[b->bdg_active_ports + 1]; 446 nm_prdis("+++ bridge %s port %s used %d avail %d %d", 447 b->bdg_basename, ifname, b->bdg_active_ports, cand, cand2); 448 449 /* 450 * try see if there is a matching NIC with this name 451 * (after the bridge's name) 452 */ 453 ifname = nr_name + b->bdg_namelen + 1; 454 ifp = ifunit_ref(ifname); 455 if (!ifp) { 456 /* Create an ephemeral virtual port. 457 * This block contains all the ephemeral-specific logic. 458 */ 459 460 if (hdr->nr_reqtype != NETMAP_REQ_REGISTER) { 461 error = EINVAL; 462 goto out; 463 } 464 465 /* bdg_netmap_attach creates a struct netmap_adapter */ 466 error = b->bdg_ops.vp_create(hdr, NULL, nmd, &vpna); 467 if (error) { 468 if (netmap_debug & NM_DEBUG_BDG) 469 nm_prerr("error %d", error); 470 goto out; 471 } 472 /* shortcut - we can skip get_hw_na(), 473 * ownership check and nm_bdg_attach() 474 */ 475 476 } else { 477 struct netmap_adapter *hw; 478 479 /* the vale:nic syntax is only valid for some commands */ 480 switch (hdr->nr_reqtype) { 481 case NETMAP_REQ_VALE_ATTACH: 482 case NETMAP_REQ_VALE_DETACH: 483 case NETMAP_REQ_VALE_POLLING_ENABLE: 484 case NETMAP_REQ_VALE_POLLING_DISABLE: 485 break; /* ok */ 486 default: 487 error = EINVAL; 488 goto out; 489 } 490 491 error = netmap_get_hw_na(ifp, nmd, &hw); 492 if (error || hw == NULL) 493 goto out; 494 495 /* host adapter might not be created */ 496 error = hw->nm_bdg_attach(nr_name, hw, b); 497 if (error == NM_NEED_BWRAP) { 498 error = b->bdg_ops.bwrap_attach(nr_name, hw); 499 } 500 if (error) 501 goto out; 502 vpna = hw->na_vp; 503 hostna = hw->na_hostvp; 504 if (hdr->nr_reqtype == NETMAP_REQ_VALE_ATTACH) { 505 /* Check if we need to skip the host rings. */ 506 struct nmreq_vale_attach *areq = 507 (struct nmreq_vale_attach *)(uintptr_t)hdr->nr_body; 508 if (areq->reg.nr_mode != NR_REG_NIC_SW) { 509 hostna = NULL; 510 } 511 } 512 } 513 514 BDG_WLOCK(b); 515 vpna->bdg_port = cand; 516 nm_prdis("NIC %p to bridge port %d", vpna, cand); 517 /* bind the port to the bridge (virtual ports are not active) */ 518 b->bdg_ports[cand] = vpna; 519 vpna->na_bdg = b; 520 b->bdg_active_ports++; 521 if (hostna != NULL) { 522 /* also bind the host stack to the bridge */ 523 b->bdg_ports[cand2] = hostna; 524 hostna->bdg_port = cand2; 525 hostna->na_bdg = b; 526 b->bdg_active_ports++; 527 nm_prdis("host %p to bridge port %d", hostna, cand2); 528 } 529 nm_prdis("if %s refs %d", ifname, vpna->up.na_refcount); 530 BDG_WUNLOCK(b); 531 *na = &vpna->up; 532 netmap_adapter_get(*na); 533 534 out: 535 if (ifp) 536 if_rele(ifp); 537 538 return error; 539 } 540 541 /* Process NETMAP_REQ_VALE_ATTACH. 542 */ 543 int 544 netmap_bdg_attach(struct nmreq_header *hdr, void *auth_token) 545 { 546 struct nmreq_vale_attach *req = 547 (struct nmreq_vale_attach *)(uintptr_t)hdr->nr_body; 548 struct netmap_vp_adapter * vpna; 549 struct netmap_adapter *na = NULL; 550 struct netmap_mem_d *nmd = NULL; 551 struct nm_bridge *b = NULL; 552 int error; 553 554 NMG_LOCK(); 555 /* permission check for modified bridges */ 556 b = nm_find_bridge(hdr->nr_name, 0 /* don't create */, NULL); 557 if (b && !nm_bdg_valid_auth_token(b, auth_token)) { 558 error = EACCES; 559 goto unlock_exit; 560 } 561 562 if (req->reg.nr_mem_id) { 563 nmd = netmap_mem_find(req->reg.nr_mem_id); 564 if (nmd == NULL) { 565 error = EINVAL; 566 goto unlock_exit; 567 } 568 } 569 570 /* check for existing one */ 571 error = netmap_get_vale_na(hdr, &na, nmd, 0); 572 if (na) { 573 error = EBUSY; 574 goto unref_exit; 575 } 576 error = netmap_get_vale_na(hdr, &na, 577 nmd, 1 /* create if not exists */); 578 if (error) { /* no device */ 579 goto unlock_exit; 580 } 581 582 if (na == NULL) { /* VALE prefix missing */ 583 error = EINVAL; 584 goto unlock_exit; 585 } 586 587 if (NETMAP_OWNED_BY_ANY(na)) { 588 error = EBUSY; 589 goto unref_exit; 590 } 591 592 if (na->nm_bdg_ctl) { 593 /* nop for VALE ports. The bwrap needs to put the hwna 594 * in netmap mode (see netmap_bwrap_bdg_ctl) 595 */ 596 error = na->nm_bdg_ctl(hdr, na); 597 if (error) 598 goto unref_exit; 599 nm_prdis("registered %s to netmap-mode", na->name); 600 } 601 vpna = (struct netmap_vp_adapter *)na; 602 req->port_index = vpna->bdg_port; 603 604 if (nmd) 605 netmap_mem_put(nmd); 606 607 NMG_UNLOCK(); 608 return 0; 609 610 unref_exit: 611 netmap_adapter_put(na); 612 unlock_exit: 613 if (nmd) 614 netmap_mem_put(nmd); 615 616 NMG_UNLOCK(); 617 return error; 618 } 619 620 621 int 622 nm_is_bwrap(struct netmap_adapter *na) 623 { 624 return na->nm_register == netmap_bwrap_reg; 625 } 626 627 /* Process NETMAP_REQ_VALE_DETACH. 628 */ 629 int 630 netmap_bdg_detach(struct nmreq_header *hdr, void *auth_token) 631 { 632 int error; 633 634 NMG_LOCK(); 635 error = netmap_bdg_detach_locked(hdr, auth_token); 636 NMG_UNLOCK(); 637 return error; 638 } 639 640 int 641 netmap_bdg_detach_locked(struct nmreq_header *hdr, void *auth_token) 642 { 643 struct nmreq_vale_detach *nmreq_det = (void *)(uintptr_t)hdr->nr_body; 644 struct netmap_vp_adapter *vpna; 645 struct netmap_adapter *na; 646 struct nm_bridge *b = NULL; 647 int error; 648 649 /* permission check for modified bridges */ 650 b = nm_find_bridge(hdr->nr_name, 0 /* don't create */, NULL); 651 if (b && !nm_bdg_valid_auth_token(b, auth_token)) { 652 error = EACCES; 653 goto error_exit; 654 } 655 656 error = netmap_get_vale_na(hdr, &na, NULL, 0 /* don't create */); 657 if (error) { /* no device, or another bridge or user owns the device */ 658 goto error_exit; 659 } 660 661 if (na == NULL) { /* VALE prefix missing */ 662 error = EINVAL; 663 goto error_exit; 664 } else if (nm_is_bwrap(na) && 665 ((struct netmap_bwrap_adapter *)na)->na_polling_state) { 666 /* Don't detach a NIC with polling */ 667 error = EBUSY; 668 goto unref_exit; 669 } 670 671 vpna = (struct netmap_vp_adapter *)na; 672 if (na->na_vp != vpna) { 673 /* trying to detach first attach of VALE persistent port attached 674 * to 2 bridges 675 */ 676 error = EBUSY; 677 goto unref_exit; 678 } 679 nmreq_det->port_index = vpna->bdg_port; 680 681 if (na->nm_bdg_ctl) { 682 /* remove the port from bridge. The bwrap 683 * also needs to put the hwna in normal mode 684 */ 685 error = na->nm_bdg_ctl(hdr, na); 686 } 687 688 unref_exit: 689 netmap_adapter_put(na); 690 error_exit: 691 return error; 692 693 } 694 695 696 struct nm_bdg_polling_state; 697 struct 698 nm_bdg_kthread { 699 struct nm_kctx *nmk; 700 u_int qfirst; 701 u_int qlast; 702 struct nm_bdg_polling_state *bps; 703 }; 704 705 struct nm_bdg_polling_state { 706 bool configured; 707 bool stopped; 708 struct netmap_bwrap_adapter *bna; 709 uint32_t mode; 710 u_int qfirst; 711 u_int qlast; 712 u_int cpu_from; 713 u_int ncpus; 714 struct nm_bdg_kthread *kthreads; 715 }; 716 717 static void 718 netmap_bwrap_polling(void *data) 719 { 720 struct nm_bdg_kthread *nbk = data; 721 struct netmap_bwrap_adapter *bna; 722 u_int qfirst, qlast, i; 723 struct netmap_kring **kring0, *kring; 724 725 if (!nbk) 726 return; 727 qfirst = nbk->qfirst; 728 qlast = nbk->qlast; 729 bna = nbk->bps->bna; 730 kring0 = NMR(bna->hwna, NR_RX); 731 732 for (i = qfirst; i < qlast; i++) { 733 kring = kring0[i]; 734 kring->nm_notify(kring, 0); 735 } 736 } 737 738 static int 739 nm_bdg_create_kthreads(struct nm_bdg_polling_state *bps) 740 { 741 struct nm_kctx_cfg kcfg; 742 int i, j; 743 744 bps->kthreads = nm_os_malloc(sizeof(struct nm_bdg_kthread) * bps->ncpus); 745 if (bps->kthreads == NULL) 746 return ENOMEM; 747 748 bzero(&kcfg, sizeof(kcfg)); 749 kcfg.worker_fn = netmap_bwrap_polling; 750 for (i = 0; i < bps->ncpus; i++) { 751 struct nm_bdg_kthread *t = bps->kthreads + i; 752 int all = (bps->ncpus == 1 && 753 bps->mode == NETMAP_POLLING_MODE_SINGLE_CPU); 754 int affinity = bps->cpu_from + i; 755 756 t->bps = bps; 757 t->qfirst = all ? bps->qfirst /* must be 0 */: affinity; 758 t->qlast = all ? bps->qlast : t->qfirst + 1; 759 if (netmap_verbose) 760 nm_prinf("kthread %d a:%u qf:%u ql:%u", i, affinity, t->qfirst, 761 t->qlast); 762 763 kcfg.type = i; 764 kcfg.worker_private = t; 765 t->nmk = nm_os_kctx_create(&kcfg, NULL); 766 if (t->nmk == NULL) { 767 goto cleanup; 768 } 769 nm_os_kctx_worker_setaff(t->nmk, affinity); 770 } 771 return 0; 772 773 cleanup: 774 for (j = 0; j < i; j++) { 775 struct nm_bdg_kthread *t = bps->kthreads + i; 776 nm_os_kctx_destroy(t->nmk); 777 } 778 nm_os_free(bps->kthreads); 779 return EFAULT; 780 } 781 782 /* A variant of ptnetmap_start_kthreads() */ 783 static int 784 nm_bdg_polling_start_kthreads(struct nm_bdg_polling_state *bps) 785 { 786 int error, i, j; 787 788 if (!bps) { 789 nm_prerr("polling is not configured"); 790 return EFAULT; 791 } 792 bps->stopped = false; 793 794 for (i = 0; i < bps->ncpus; i++) { 795 struct nm_bdg_kthread *t = bps->kthreads + i; 796 error = nm_os_kctx_worker_start(t->nmk); 797 if (error) { 798 nm_prerr("error in nm_kthread_start(): %d", error); 799 goto cleanup; 800 } 801 } 802 return 0; 803 804 cleanup: 805 for (j = 0; j < i; j++) { 806 struct nm_bdg_kthread *t = bps->kthreads + i; 807 nm_os_kctx_worker_stop(t->nmk); 808 } 809 bps->stopped = true; 810 return error; 811 } 812 813 static void 814 nm_bdg_polling_stop_delete_kthreads(struct nm_bdg_polling_state *bps) 815 { 816 int i; 817 818 if (!bps) 819 return; 820 821 for (i = 0; i < bps->ncpus; i++) { 822 struct nm_bdg_kthread *t = bps->kthreads + i; 823 nm_os_kctx_worker_stop(t->nmk); 824 nm_os_kctx_destroy(t->nmk); 825 } 826 bps->stopped = true; 827 } 828 829 static int 830 get_polling_cfg(struct nmreq_vale_polling *req, struct netmap_adapter *na, 831 struct nm_bdg_polling_state *bps) 832 { 833 unsigned int avail_cpus, core_from; 834 unsigned int qfirst, qlast; 835 uint32_t i = req->nr_first_cpu_id; 836 uint32_t req_cpus = req->nr_num_polling_cpus; 837 838 avail_cpus = nm_os_ncpus(); 839 840 if (req_cpus == 0) { 841 nm_prerr("req_cpus must be > 0"); 842 return EINVAL; 843 } else if (req_cpus >= avail_cpus) { 844 nm_prerr("Cannot use all the CPUs in the system"); 845 return EINVAL; 846 } 847 848 if (req->nr_mode == NETMAP_POLLING_MODE_MULTI_CPU) { 849 /* Use a separate core for each ring. If nr_num_polling_cpus>1 850 * more consecutive rings are polled. 851 * For example, if nr_first_cpu_id=2 and nr_num_polling_cpus=2, 852 * ring 2 and 3 are polled by core 2 and 3, respectively. */ 853 if (i + req_cpus > nma_get_nrings(na, NR_RX)) { 854 nm_prerr("Rings %u-%u not in range (have %d rings)", 855 i, i + req_cpus, nma_get_nrings(na, NR_RX)); 856 return EINVAL; 857 } 858 qfirst = i; 859 qlast = qfirst + req_cpus; 860 core_from = qfirst; 861 862 } else if (req->nr_mode == NETMAP_POLLING_MODE_SINGLE_CPU) { 863 /* Poll all the rings using a core specified by nr_first_cpu_id. 864 * the number of cores must be 1. */ 865 if (req_cpus != 1) { 866 nm_prerr("ncpus must be 1 for NETMAP_POLLING_MODE_SINGLE_CPU " 867 "(was %d)", req_cpus); 868 return EINVAL; 869 } 870 qfirst = 0; 871 qlast = nma_get_nrings(na, NR_RX); 872 core_from = i; 873 } else { 874 nm_prerr("Invalid polling mode"); 875 return EINVAL; 876 } 877 878 bps->mode = req->nr_mode; 879 bps->qfirst = qfirst; 880 bps->qlast = qlast; 881 bps->cpu_from = core_from; 882 bps->ncpus = req_cpus; 883 nm_prinf("%s qfirst %u qlast %u cpu_from %u ncpus %u", 884 req->nr_mode == NETMAP_POLLING_MODE_MULTI_CPU ? 885 "MULTI" : "SINGLE", 886 qfirst, qlast, core_from, req_cpus); 887 return 0; 888 } 889 890 static int 891 nm_bdg_ctl_polling_start(struct nmreq_vale_polling *req, struct netmap_adapter *na) 892 { 893 struct nm_bdg_polling_state *bps; 894 struct netmap_bwrap_adapter *bna; 895 int error; 896 897 bna = (struct netmap_bwrap_adapter *)na; 898 if (bna->na_polling_state) { 899 nm_prerr("ERROR adapter already in polling mode"); 900 return EFAULT; 901 } 902 903 bps = nm_os_malloc(sizeof(*bps)); 904 if (!bps) 905 return ENOMEM; 906 bps->configured = false; 907 bps->stopped = true; 908 909 if (get_polling_cfg(req, na, bps)) { 910 nm_os_free(bps); 911 return EINVAL; 912 } 913 914 if (nm_bdg_create_kthreads(bps)) { 915 nm_os_free(bps); 916 return EFAULT; 917 } 918 919 bps->configured = true; 920 bna->na_polling_state = bps; 921 bps->bna = bna; 922 923 /* disable interrupts if possible */ 924 nma_intr_enable(bna->hwna, 0); 925 /* start kthread now */ 926 error = nm_bdg_polling_start_kthreads(bps); 927 if (error) { 928 nm_prerr("ERROR nm_bdg_polling_start_kthread()"); 929 nm_os_free(bps->kthreads); 930 nm_os_free(bps); 931 bna->na_polling_state = NULL; 932 nma_intr_enable(bna->hwna, 1); 933 } 934 return error; 935 } 936 937 static int 938 nm_bdg_ctl_polling_stop(struct netmap_adapter *na) 939 { 940 struct netmap_bwrap_adapter *bna = (struct netmap_bwrap_adapter *)na; 941 struct nm_bdg_polling_state *bps; 942 943 if (!bna->na_polling_state) { 944 nm_prerr("ERROR adapter is not in polling mode"); 945 return EFAULT; 946 } 947 bps = bna->na_polling_state; 948 nm_bdg_polling_stop_delete_kthreads(bna->na_polling_state); 949 bps->configured = false; 950 nm_os_free(bps); 951 bna->na_polling_state = NULL; 952 /* re-enable interrupts */ 953 nma_intr_enable(bna->hwna, 1); 954 return 0; 955 } 956 957 int 958 nm_bdg_polling(struct nmreq_header *hdr) 959 { 960 struct nmreq_vale_polling *req = 961 (struct nmreq_vale_polling *)(uintptr_t)hdr->nr_body; 962 struct netmap_adapter *na = NULL; 963 int error = 0; 964 965 NMG_LOCK(); 966 error = netmap_get_vale_na(hdr, &na, NULL, /*create=*/0); 967 if (na && !error) { 968 if (!nm_is_bwrap(na)) { 969 error = EOPNOTSUPP; 970 } else if (hdr->nr_reqtype == NETMAP_BDG_POLLING_ON) { 971 error = nm_bdg_ctl_polling_start(req, na); 972 if (!error) 973 netmap_adapter_get(na); 974 } else { 975 error = nm_bdg_ctl_polling_stop(na); 976 if (!error) 977 netmap_adapter_put(na); 978 } 979 netmap_adapter_put(na); 980 } else if (!na && !error) { 981 /* Not VALE port. */ 982 error = EINVAL; 983 } 984 NMG_UNLOCK(); 985 986 return error; 987 } 988 989 /* Called by external kernel modules (e.g., Openvswitch). 990 * to set configure/lookup/dtor functions of a VALE instance. 991 * Register callbacks to the given bridge. 'name' may be just 992 * bridge's name (including ':' if it is not just NM_BDG_NAME). 993 * 994 * Called without NMG_LOCK. 995 */ 996 997 int 998 netmap_bdg_regops(const char *name, struct netmap_bdg_ops *bdg_ops, void *private_data, void *auth_token) 999 { 1000 struct nm_bridge *b; 1001 int error = 0; 1002 1003 NMG_LOCK(); 1004 b = nm_find_bridge(name, 0 /* don't create */, NULL); 1005 if (!b) { 1006 error = ENXIO; 1007 goto unlock_regops; 1008 } 1009 if (!nm_bdg_valid_auth_token(b, auth_token)) { 1010 error = EACCES; 1011 goto unlock_regops; 1012 } 1013 1014 BDG_WLOCK(b); 1015 if (!bdg_ops) { 1016 /* resetting the bridge */ 1017 bzero(b->ht, sizeof(struct nm_hash_ent) * NM_BDG_HASH); 1018 b->bdg_ops = b->bdg_saved_ops; 1019 b->private_data = b->ht; 1020 } else { 1021 /* modifying the bridge */ 1022 b->private_data = private_data; 1023 #define nm_bdg_override(m) if (bdg_ops->m) b->bdg_ops.m = bdg_ops->m 1024 nm_bdg_override(lookup); 1025 nm_bdg_override(config); 1026 nm_bdg_override(dtor); 1027 nm_bdg_override(vp_create); 1028 nm_bdg_override(bwrap_attach); 1029 #undef nm_bdg_override 1030 1031 } 1032 BDG_WUNLOCK(b); 1033 1034 unlock_regops: 1035 NMG_UNLOCK(); 1036 return error; 1037 } 1038 1039 1040 int 1041 netmap_bdg_config(struct nm_ifreq *nr) 1042 { 1043 struct nm_bridge *b; 1044 int error = EINVAL; 1045 1046 NMG_LOCK(); 1047 b = nm_find_bridge(nr->nifr_name, 0, NULL); 1048 if (!b) { 1049 NMG_UNLOCK(); 1050 return error; 1051 } 1052 NMG_UNLOCK(); 1053 /* Don't call config() with NMG_LOCK() held */ 1054 BDG_RLOCK(b); 1055 if (b->bdg_ops.config != NULL) 1056 error = b->bdg_ops.config(nr); 1057 BDG_RUNLOCK(b); 1058 return error; 1059 } 1060 1061 1062 /* nm_register callback for VALE ports */ 1063 int 1064 netmap_vp_reg(struct netmap_adapter *na, int onoff) 1065 { 1066 struct netmap_vp_adapter *vpna = 1067 (struct netmap_vp_adapter*)na; 1068 1069 /* persistent ports may be put in netmap mode 1070 * before being attached to a bridge 1071 */ 1072 if (vpna->na_bdg) 1073 BDG_WLOCK(vpna->na_bdg); 1074 if (onoff) { 1075 netmap_krings_mode_commit(na, onoff); 1076 if (na->active_fds == 0) 1077 na->na_flags |= NAF_NETMAP_ON; 1078 /* XXX on FreeBSD, persistent VALE ports should also 1079 * toggle IFCAP_NETMAP in na->ifp (2014-03-16) 1080 */ 1081 } else { 1082 if (na->active_fds == 0) 1083 na->na_flags &= ~NAF_NETMAP_ON; 1084 netmap_krings_mode_commit(na, onoff); 1085 } 1086 if (vpna->na_bdg) 1087 BDG_WUNLOCK(vpna->na_bdg); 1088 return 0; 1089 } 1090 1091 1092 /* rxsync code used by VALE ports nm_rxsync callback and also 1093 * internally by the brwap 1094 */ 1095 static int 1096 netmap_vp_rxsync_locked(struct netmap_kring *kring, int flags) 1097 { 1098 struct netmap_adapter *na = kring->na; 1099 struct netmap_ring *ring = kring->ring; 1100 u_int nm_i, lim = kring->nkr_num_slots - 1; 1101 u_int head = kring->rhead; 1102 int n; 1103 1104 if (head > lim) { 1105 nm_prerr("ouch dangerous reset!!!"); 1106 n = netmap_ring_reinit(kring); 1107 goto done; 1108 } 1109 1110 /* First part, import newly received packets. */ 1111 /* actually nothing to do here, they are already in the kring */ 1112 1113 /* Second part, skip past packets that userspace has released. */ 1114 nm_i = kring->nr_hwcur; 1115 if (nm_i != head) { 1116 /* consistency check, but nothing really important here */ 1117 for (n = 0; likely(nm_i != head); n++) { 1118 struct netmap_slot *slot = &ring->slot[nm_i]; 1119 void *addr = NMB(na, slot); 1120 1121 if (addr == NETMAP_BUF_BASE(kring->na)) { /* bad buf */ 1122 nm_prerr("bad buffer index %d, ignore ?", 1123 slot->buf_idx); 1124 } 1125 slot->flags &= ~NS_BUF_CHANGED; 1126 nm_i = nm_next(nm_i, lim); 1127 } 1128 kring->nr_hwcur = head; 1129 } 1130 1131 n = 0; 1132 done: 1133 return n; 1134 } 1135 1136 /* 1137 * nm_rxsync callback for VALE ports 1138 * user process reading from a VALE switch. 1139 * Already protected against concurrent calls from userspace, 1140 * but we must acquire the queue's lock to protect against 1141 * writers on the same queue. 1142 */ 1143 int 1144 netmap_vp_rxsync(struct netmap_kring *kring, int flags) 1145 { 1146 int n; 1147 1148 mtx_lock(&kring->q_lock); 1149 n = netmap_vp_rxsync_locked(kring, flags); 1150 mtx_unlock(&kring->q_lock); 1151 return n; 1152 } 1153 1154 int 1155 netmap_bwrap_attach(const char *nr_name, struct netmap_adapter *hwna, 1156 struct netmap_bdg_ops *ops) 1157 { 1158 return ops->bwrap_attach(nr_name, hwna); 1159 } 1160 1161 1162 /* Bridge wrapper code (bwrap). 1163 * This is used to connect a non-VALE-port netmap_adapter (hwna) to a 1164 * VALE switch. 1165 * The main task is to swap the meaning of tx and rx rings to match the 1166 * expectations of the VALE switch code (see nm_bdg_flush). 1167 * 1168 * The bwrap works by interposing a netmap_bwrap_adapter between the 1169 * rest of the system and the hwna. The netmap_bwrap_adapter looks like 1170 * a netmap_vp_adapter to the rest the system, but, internally, it 1171 * translates all callbacks to what the hwna expects. 1172 * 1173 * Note that we have to intercept callbacks coming from two sides: 1174 * 1175 * - callbacks coming from the netmap module are intercepted by 1176 * passing around the netmap_bwrap_adapter instead of the hwna 1177 * 1178 * - callbacks coming from outside of the netmap module only know 1179 * about the hwna. This, however, only happens in interrupt 1180 * handlers, where only the hwna->nm_notify callback is called. 1181 * What the bwrap does is to overwrite the hwna->nm_notify callback 1182 * with its own netmap_bwrap_intr_notify. 1183 * XXX This assumes that the hwna->nm_notify callback was the 1184 * standard netmap_notify(), as it is the case for nic adapters. 1185 * Any additional action performed by hwna->nm_notify will not be 1186 * performed by netmap_bwrap_intr_notify. 1187 * 1188 * Additionally, the bwrap can optionally attach the host rings pair 1189 * of the wrapped adapter to a different port of the switch. 1190 */ 1191 1192 1193 static void 1194 netmap_bwrap_dtor(struct netmap_adapter *na) 1195 { 1196 struct netmap_bwrap_adapter *bna = (struct netmap_bwrap_adapter*)na; 1197 struct netmap_adapter *hwna = bna->hwna; 1198 struct nm_bridge *b = bna->up.na_bdg, 1199 *bh = bna->host.na_bdg; 1200 1201 if (bna->host.up.nm_mem) 1202 netmap_mem_put(bna->host.up.nm_mem); 1203 1204 if (b) { 1205 netmap_bdg_detach_common(b, bna->up.bdg_port, 1206 (bh ? bna->host.bdg_port : -1)); 1207 } 1208 1209 nm_prdis("na %p", na); 1210 na->ifp = NULL; 1211 bna->host.up.ifp = NULL; 1212 hwna->na_vp = bna->saved_na_vp; 1213 hwna->na_hostvp = NULL; 1214 hwna->na_private = NULL; 1215 hwna->na_flags &= ~NAF_BUSY; 1216 netmap_adapter_put(hwna); 1217 1218 } 1219 1220 1221 /* 1222 * Intr callback for NICs connected to a bridge. 1223 * Simply ignore tx interrupts (maybe we could try to recover space ?) 1224 * and pass received packets from nic to the bridge. 1225 * 1226 * XXX TODO check locking: this is called from the interrupt 1227 * handler so we should make sure that the interface is not 1228 * disconnected while passing down an interrupt. 1229 * 1230 * Note, no user process can access this NIC or the host stack. 1231 * The only part of the ring that is significant are the slots, 1232 * and head/cur/tail are set from the kring as needed 1233 * (part as a receive ring, part as a transmit ring). 1234 * 1235 * callback that overwrites the hwna notify callback. 1236 * Packets come from the outside or from the host stack and are put on an 1237 * hwna rx ring. 1238 * The bridge wrapper then sends the packets through the bridge. 1239 */ 1240 int 1241 netmap_bwrap_intr_notify(struct netmap_kring *kring, int flags) 1242 { 1243 struct netmap_adapter *na = kring->na; 1244 struct netmap_bwrap_adapter *bna = na->na_private; 1245 struct netmap_kring *bkring; 1246 struct netmap_vp_adapter *vpna = &bna->up; 1247 u_int ring_nr = kring->ring_id; 1248 int ret = NM_IRQ_COMPLETED; 1249 int error; 1250 1251 if (netmap_debug & NM_DEBUG_RXINTR) 1252 nm_prinf("%s %s 0x%x", na->name, kring->name, flags); 1253 1254 bkring = vpna->up.tx_rings[ring_nr]; 1255 1256 /* make sure the ring is not disabled */ 1257 if (nm_kr_tryget(kring, 0 /* can't sleep */, NULL)) { 1258 return EIO; 1259 } 1260 1261 if (netmap_debug & NM_DEBUG_RXINTR) 1262 nm_prinf("%s head %d cur %d tail %d", na->name, 1263 kring->rhead, kring->rcur, kring->rtail); 1264 1265 /* simulate a user wakeup on the rx ring 1266 * fetch packets that have arrived. 1267 */ 1268 error = kring->nm_sync(kring, 0); 1269 if (error) 1270 goto put_out; 1271 if (kring->nr_hwcur == kring->nr_hwtail) { 1272 if (netmap_verbose) 1273 nm_prlim(1, "interrupt with no packets on %s", 1274 kring->name); 1275 goto put_out; 1276 } 1277 1278 /* new packets are kring->rcur to kring->nr_hwtail, and the bkring 1279 * had hwcur == bkring->rhead. So advance bkring->rhead to kring->nr_hwtail 1280 * to push all packets out. 1281 */ 1282 bkring->rhead = bkring->rcur = kring->nr_hwtail; 1283 1284 bkring->nm_sync(bkring, flags); 1285 1286 /* mark all buffers as released on this ring */ 1287 kring->rhead = kring->rcur = kring->rtail = kring->nr_hwtail; 1288 /* another call to actually release the buffers */ 1289 error = kring->nm_sync(kring, 0); 1290 1291 /* The second rxsync may have further advanced hwtail. If this happens, 1292 * return NM_IRQ_RESCHED, otherwise just return NM_IRQ_COMPLETED. */ 1293 if (kring->rcur != kring->nr_hwtail) { 1294 ret = NM_IRQ_RESCHED; 1295 } 1296 put_out: 1297 nm_kr_put(kring); 1298 1299 return error ? error : ret; 1300 } 1301 1302 1303 /* nm_register callback for bwrap */ 1304 int 1305 netmap_bwrap_reg(struct netmap_adapter *na, int onoff) 1306 { 1307 struct netmap_bwrap_adapter *bna = 1308 (struct netmap_bwrap_adapter *)na; 1309 struct netmap_adapter *hwna = bna->hwna; 1310 struct netmap_vp_adapter *hostna = &bna->host; 1311 int error, i; 1312 enum txrx t; 1313 1314 nm_prdis("%s %s", na->name, onoff ? "on" : "off"); 1315 1316 if (onoff) { 1317 /* netmap_do_regif has been called on the bwrap na. 1318 * We need to pass the information about the 1319 * memory allocator down to the hwna before 1320 * putting it in netmap mode 1321 */ 1322 hwna->na_lut = na->na_lut; 1323 1324 if (hostna->na_bdg) { 1325 /* if the host rings have been attached to switch, 1326 * we need to copy the memory allocator information 1327 * in the hostna also 1328 */ 1329 hostna->up.na_lut = na->na_lut; 1330 } 1331 1332 } 1333 1334 /* pass down the pending ring state information */ 1335 for_rx_tx(t) { 1336 for (i = 0; i < netmap_all_rings(na, t); i++) { 1337 NMR(hwna, nm_txrx_swap(t))[i]->nr_pending_mode = 1338 NMR(na, t)[i]->nr_pending_mode; 1339 } 1340 } 1341 1342 /* forward the request to the hwna */ 1343 error = hwna->nm_register(hwna, onoff); 1344 if (error) 1345 return error; 1346 1347 /* copy up the current ring state information */ 1348 for_rx_tx(t) { 1349 for (i = 0; i < netmap_all_rings(na, t); i++) { 1350 struct netmap_kring *kring = NMR(hwna, nm_txrx_swap(t))[i]; 1351 NMR(na, t)[i]->nr_mode = kring->nr_mode; 1352 } 1353 } 1354 1355 /* impersonate a netmap_vp_adapter */ 1356 netmap_vp_reg(na, onoff); 1357 if (hostna->na_bdg) 1358 netmap_vp_reg(&hostna->up, onoff); 1359 1360 if (onoff) { 1361 u_int i; 1362 /* intercept the hwna nm_nofify callback on the hw rings */ 1363 for (i = 0; i < hwna->num_rx_rings; i++) { 1364 hwna->rx_rings[i]->save_notify = hwna->rx_rings[i]->nm_notify; 1365 hwna->rx_rings[i]->nm_notify = bna->nm_intr_notify; 1366 } 1367 i = hwna->num_rx_rings; /* for safety */ 1368 /* save the host ring notify unconditionally */ 1369 for (; i < netmap_real_rings(hwna, NR_RX); i++) { 1370 hwna->rx_rings[i]->save_notify = 1371 hwna->rx_rings[i]->nm_notify; 1372 if (hostna->na_bdg) { 1373 /* also intercept the host ring notify */ 1374 hwna->rx_rings[i]->nm_notify = 1375 netmap_bwrap_intr_notify; 1376 na->tx_rings[i]->nm_sync = na->nm_txsync; 1377 } 1378 } 1379 if (na->active_fds == 0) 1380 na->na_flags |= NAF_NETMAP_ON; 1381 } else { 1382 u_int i; 1383 1384 if (na->active_fds == 0) 1385 na->na_flags &= ~NAF_NETMAP_ON; 1386 1387 /* reset all notify callbacks (including host ring) */ 1388 for (i = 0; i < netmap_all_rings(hwna, NR_RX); i++) { 1389 hwna->rx_rings[i]->nm_notify = 1390 hwna->rx_rings[i]->save_notify; 1391 hwna->rx_rings[i]->save_notify = NULL; 1392 } 1393 hwna->na_lut.lut = NULL; 1394 hwna->na_lut.plut = NULL; 1395 hwna->na_lut.objtotal = 0; 1396 hwna->na_lut.objsize = 0; 1397 1398 /* reset the number of host rings to default */ 1399 for_rx_tx(t) { 1400 nma_set_host_nrings(hwna, t, 1); 1401 } 1402 1403 } 1404 1405 return 0; 1406 } 1407 1408 /* nm_config callback for bwrap */ 1409 static int 1410 netmap_bwrap_config(struct netmap_adapter *na, struct nm_config_info *info) 1411 { 1412 struct netmap_bwrap_adapter *bna = 1413 (struct netmap_bwrap_adapter *)na; 1414 struct netmap_adapter *hwna = bna->hwna; 1415 int error; 1416 1417 /* cache the lut in the embedded host adapter */ 1418 error = netmap_mem_get_lut(hwna->nm_mem, &bna->host.up.na_lut); 1419 if (error) 1420 return error; 1421 1422 /* Forward the request to the hwna. It may happen that nobody 1423 * registered hwna yet, so netmap_mem_get_lut() may have not 1424 * been called yet. */ 1425 error = netmap_mem_get_lut(hwna->nm_mem, &hwna->na_lut); 1426 if (error) 1427 return error; 1428 netmap_update_config(hwna); 1429 /* swap the results and propagate */ 1430 info->num_tx_rings = hwna->num_rx_rings; 1431 info->num_tx_descs = hwna->num_rx_desc; 1432 info->num_rx_rings = hwna->num_tx_rings; 1433 info->num_rx_descs = hwna->num_tx_desc; 1434 info->rx_buf_maxsize = hwna->rx_buf_maxsize; 1435 1436 if (na->na_flags & NAF_HOST_RINGS) { 1437 struct netmap_adapter *hostna = &bna->host.up; 1438 enum txrx t; 1439 1440 /* limit the number of host rings to that of hw */ 1441 if (na->na_flags & NAF_HOST_ALL) { 1442 hostna->num_tx_rings = nma_get_nrings(hwna, NR_RX); 1443 hostna->num_rx_rings = nma_get_nrings(hwna, NR_TX); 1444 } else { 1445 nm_bound_var(&hostna->num_tx_rings, 1, 1, 1446 nma_get_nrings(hwna, NR_TX), NULL); 1447 nm_bound_var(&hostna->num_rx_rings, 1, 1, 1448 nma_get_nrings(hwna, NR_RX), NULL); 1449 } 1450 for_rx_tx(t) { 1451 enum txrx r = nm_txrx_swap(t); 1452 u_int nr = nma_get_nrings(hostna, t); 1453 1454 nma_set_host_nrings(na, t, nr); 1455 if (nma_get_host_nrings(hwna, t) < nr) { 1456 nma_set_host_nrings(hwna, t, nr); 1457 } 1458 nma_set_ndesc(hostna, t, nma_get_ndesc(hwna, r)); 1459 } 1460 } 1461 1462 return 0; 1463 } 1464 1465 /* nm_bufcfg callback for bwrap */ 1466 static int 1467 netmap_bwrap_bufcfg(struct netmap_kring *kring, uint64_t target) 1468 { 1469 struct netmap_adapter *na = kring->na; 1470 struct netmap_bwrap_adapter *bna = 1471 (struct netmap_bwrap_adapter *)na; 1472 struct netmap_adapter *hwna = bna->hwna; 1473 struct netmap_kring *hwkring; 1474 enum txrx r; 1475 int error; 1476 1477 /* we need the hw kring that corresponds to the bwrap one: 1478 * remember that rx and tx are swapped 1479 */ 1480 r = nm_txrx_swap(kring->tx); 1481 hwkring = NMR(hwna, r)[kring->ring_id]; 1482 1483 /* copy down the offset information, forward the request 1484 * and copy up the results 1485 */ 1486 hwkring->offset_mask = kring->offset_mask; 1487 hwkring->offset_max = kring->offset_max; 1488 hwkring->offset_gap = kring->offset_gap; 1489 1490 error = hwkring->nm_bufcfg(hwkring, target); 1491 if (error) 1492 return error; 1493 1494 kring->hwbuf_len = hwkring->hwbuf_len; 1495 kring->buf_align = hwkring->buf_align; 1496 1497 return 0; 1498 } 1499 1500 /* nm_krings_create callback for bwrap */ 1501 int 1502 netmap_bwrap_krings_create_common(struct netmap_adapter *na) 1503 { 1504 struct netmap_bwrap_adapter *bna = 1505 (struct netmap_bwrap_adapter *)na; 1506 struct netmap_adapter *hwna = bna->hwna; 1507 struct netmap_adapter *hostna = &bna->host.up; 1508 int i, error = 0; 1509 enum txrx t; 1510 1511 /* also create the hwna krings */ 1512 error = hwna->nm_krings_create(hwna); 1513 if (error) { 1514 return error; 1515 } 1516 1517 /* increment the usage counter for all the hwna krings */ 1518 for_rx_tx(t) { 1519 for (i = 0; i < netmap_all_rings(hwna, t); i++) { 1520 NMR(hwna, t)[i]->users++; 1521 /* this to prevent deletion of the rings through 1522 * our krings, instead of through the hwna ones */ 1523 NMR(na, t)[i]->nr_kflags |= NKR_NEEDRING; 1524 } 1525 } 1526 1527 /* now create the actual rings */ 1528 error = netmap_mem_rings_create(hwna); 1529 if (error) { 1530 goto err_dec_users; 1531 } 1532 1533 /* cross-link the netmap rings 1534 * The original number of rings comes from hwna, 1535 * rx rings on one side equals tx rings on the other. 1536 */ 1537 for_rx_tx(t) { 1538 enum txrx r = nm_txrx_swap(t); /* swap NR_TX <-> NR_RX */ 1539 for (i = 0; i < netmap_all_rings(hwna, r); i++) { 1540 NMR(na, t)[i]->nkr_num_slots = NMR(hwna, r)[i]->nkr_num_slots; 1541 NMR(na, t)[i]->ring = NMR(hwna, r)[i]->ring; 1542 } 1543 } 1544 1545 if (na->na_flags & NAF_HOST_RINGS) { 1546 /* the hostna rings are the host rings of the bwrap. 1547 * The corresponding krings must point back to the 1548 * hostna 1549 */ 1550 hostna->tx_rings = &na->tx_rings[na->num_tx_rings]; 1551 hostna->rx_rings = &na->rx_rings[na->num_rx_rings]; 1552 for_rx_tx(t) { 1553 for (i = 0; i < nma_get_nrings(hostna, t); i++) { 1554 NMR(hostna, t)[i]->na = hostna; 1555 } 1556 } 1557 } 1558 1559 return 0; 1560 1561 err_dec_users: 1562 for_rx_tx(t) { 1563 for (i = 0; i < netmap_all_rings(hwna, t); i++) { 1564 NMR(hwna, t)[i]->users--; 1565 NMR(na, t)[i]->users--; 1566 } 1567 } 1568 hwna->nm_krings_delete(hwna); 1569 return error; 1570 } 1571 1572 1573 void 1574 netmap_bwrap_krings_delete_common(struct netmap_adapter *na) 1575 { 1576 struct netmap_bwrap_adapter *bna = 1577 (struct netmap_bwrap_adapter *)na; 1578 struct netmap_adapter *hwna = bna->hwna; 1579 enum txrx t; 1580 int i; 1581 1582 nm_prdis("%s", na->name); 1583 1584 /* decrement the usage counter for all the hwna krings */ 1585 for_rx_tx(t) { 1586 for (i = 0; i < netmap_all_rings(hwna, t); i++) { 1587 NMR(hwna, t)[i]->users--; 1588 NMR(na, t)[i]->users--; 1589 } 1590 } 1591 1592 /* delete any netmap rings that are no longer needed */ 1593 netmap_mem_rings_delete(hwna); 1594 hwna->nm_krings_delete(hwna); 1595 } 1596 1597 1598 /* notify method for the bridge-->hwna direction */ 1599 int 1600 netmap_bwrap_notify(struct netmap_kring *kring, int flags) 1601 { 1602 struct netmap_adapter *na = kring->na; 1603 struct netmap_bwrap_adapter *bna = na->na_private; 1604 struct netmap_adapter *hwna = bna->hwna; 1605 u_int ring_n = kring->ring_id; 1606 u_int lim = kring->nkr_num_slots - 1; 1607 struct netmap_kring *hw_kring; 1608 int error; 1609 1610 nm_prdis("%s: na %s hwna %s", 1611 (kring ? kring->name : "NULL!"), 1612 (na ? na->name : "NULL!"), 1613 (hwna ? hwna->name : "NULL!")); 1614 hw_kring = hwna->tx_rings[ring_n]; 1615 1616 if (nm_kr_tryget(hw_kring, 0, NULL)) { 1617 return ENXIO; 1618 } 1619 1620 /* first step: simulate a user wakeup on the rx ring */ 1621 netmap_vp_rxsync(kring, flags); 1622 nm_prdis("%s[%d] PRE rx(c%3d t%3d l%3d) ring(h%3d c%3d t%3d) tx(c%3d ht%3d t%3d)", 1623 na->name, ring_n, 1624 kring->nr_hwcur, kring->nr_hwtail, kring->nkr_hwlease, 1625 kring->rhead, kring->rcur, kring->rtail, 1626 hw_kring->nr_hwcur, hw_kring->nr_hwtail, hw_kring->rtail); 1627 /* second step: the new packets are sent on the tx ring 1628 * (which is actually the same ring) 1629 */ 1630 hw_kring->rhead = hw_kring->rcur = kring->nr_hwtail; 1631 error = hw_kring->nm_sync(hw_kring, flags); 1632 if (error) 1633 goto put_out; 1634 1635 /* third step: now we are back the rx ring */ 1636 /* claim ownership on all hw owned bufs */ 1637 kring->rhead = kring->rcur = nm_next(hw_kring->nr_hwtail, lim); /* skip past reserved slot */ 1638 1639 /* fourth step: the user goes to sleep again, causing another rxsync */ 1640 netmap_vp_rxsync(kring, flags); 1641 nm_prdis("%s[%d] PST rx(c%3d t%3d l%3d) ring(h%3d c%3d t%3d) tx(c%3d ht%3d t%3d)", 1642 na->name, ring_n, 1643 kring->nr_hwcur, kring->nr_hwtail, kring->nkr_hwlease, 1644 kring->rhead, kring->rcur, kring->rtail, 1645 hw_kring->nr_hwcur, hw_kring->nr_hwtail, hw_kring->rtail); 1646 put_out: 1647 nm_kr_put(hw_kring); 1648 1649 return error ? error : NM_IRQ_COMPLETED; 1650 } 1651 1652 1653 /* nm_bdg_ctl callback for the bwrap. 1654 * Called on bridge-attach and detach, as an effect of valectl -[ahd]. 1655 * On attach, it needs to provide a fake netmap_priv_d structure and 1656 * perform a netmap_do_regif() on the bwrap. This will put both the 1657 * bwrap and the hwna in netmap mode, with the netmap rings shared 1658 * and cross linked. Moroever, it will start intercepting interrupts 1659 * directed to hwna. 1660 */ 1661 static int 1662 netmap_bwrap_bdg_ctl(struct nmreq_header *hdr, struct netmap_adapter *na) 1663 { 1664 struct netmap_priv_d *npriv; 1665 struct netmap_bwrap_adapter *bna = (struct netmap_bwrap_adapter*)na; 1666 int error = 0; 1667 1668 if (hdr->nr_reqtype == NETMAP_REQ_VALE_ATTACH) { 1669 struct nmreq_vale_attach *req = 1670 (struct nmreq_vale_attach *)(uintptr_t)hdr->nr_body; 1671 if (req->reg.nr_ringid != 0 || 1672 (req->reg.nr_mode != NR_REG_ALL_NIC && 1673 req->reg.nr_mode != NR_REG_NIC_SW)) { 1674 /* We only support attaching all the NIC rings 1675 * and/or the host stack. */ 1676 return EINVAL; 1677 } 1678 if (NETMAP_OWNED_BY_ANY(na)) { 1679 return EBUSY; 1680 } 1681 if (bna->na_kpriv) { 1682 /* nothing to do */ 1683 return 0; 1684 } 1685 npriv = netmap_priv_new(); 1686 if (npriv == NULL) 1687 return ENOMEM; 1688 npriv->np_ifp = na->ifp; /* let the priv destructor release the ref */ 1689 error = netmap_do_regif(npriv, na, hdr); 1690 if (error) { 1691 netmap_priv_delete(npriv); 1692 netmap_mem_restore(bna->hwna); 1693 return error; 1694 } 1695 bna->na_kpriv = npriv; 1696 na->na_flags |= NAF_BUSY; 1697 } else { 1698 if (na->active_fds == 0) /* not registered */ 1699 return EINVAL; 1700 netmap_priv_delete(bna->na_kpriv); 1701 bna->na_kpriv = NULL; 1702 na->na_flags &= ~NAF_BUSY; 1703 netmap_mem_restore(bna->hwna); 1704 } 1705 1706 return error; 1707 } 1708 1709 /* attach a bridge wrapper to the 'real' device */ 1710 int 1711 netmap_bwrap_attach_common(struct netmap_adapter *na, 1712 struct netmap_adapter *hwna) 1713 { 1714 struct netmap_bwrap_adapter *bna; 1715 struct netmap_adapter *hostna = NULL; 1716 int error = 0; 1717 enum txrx t; 1718 1719 /* make sure the NIC is not already in use */ 1720 if (NETMAP_OWNED_BY_ANY(hwna)) { 1721 nm_prerr("NIC %s busy, cannot attach to bridge", hwna->name); 1722 return EBUSY; 1723 } 1724 1725 bna = (struct netmap_bwrap_adapter *)na; 1726 /* make bwrap ifp point to the real ifp */ 1727 na->ifp = hwna->ifp; 1728 if_ref(na->ifp); 1729 na->na_private = bna; 1730 /* fill the ring data for the bwrap adapter with rx/tx meanings 1731 * swapped. The real cross-linking will be done during register, 1732 * when all the krings will have been created. 1733 */ 1734 for_rx_tx(t) { 1735 enum txrx r = nm_txrx_swap(t); /* swap NR_TX <-> NR_RX */ 1736 nma_set_nrings(na, t, nma_get_nrings(hwna, r)); 1737 nma_set_ndesc(na, t, nma_get_ndesc(hwna, r)); 1738 } 1739 na->nm_dtor = netmap_bwrap_dtor; 1740 na->nm_config = netmap_bwrap_config; 1741 na->nm_bufcfg = netmap_bwrap_bufcfg; 1742 na->nm_bdg_ctl = netmap_bwrap_bdg_ctl; 1743 na->pdev = hwna->pdev; 1744 na->nm_mem = netmap_mem_get(hwna->nm_mem); 1745 na->virt_hdr_len = hwna->virt_hdr_len; 1746 na->rx_buf_maxsize = hwna->rx_buf_maxsize; 1747 1748 bna->hwna = hwna; 1749 netmap_adapter_get(hwna); 1750 hwna->na_private = bna; /* weak reference */ 1751 bna->saved_na_vp = hwna->na_vp; 1752 hwna->na_vp = &bna->up; 1753 bna->up.up.na_vp = &(bna->up); 1754 1755 if (hwna->na_flags & NAF_HOST_RINGS) { 1756 if (hwna->na_flags & NAF_SW_ONLY) 1757 na->na_flags |= NAF_SW_ONLY; 1758 na->na_flags |= NAF_HOST_RINGS; 1759 hostna = &bna->host.up; 1760 1761 snprintf(hostna->name, sizeof(hostna->name), "%s^", na->name); 1762 hostna->ifp = hwna->ifp; 1763 // hostna->nm_txsync = netmap_bwrap_host_txsync; 1764 // hostna->nm_rxsync = netmap_bwrap_host_rxsync; 1765 hostna->nm_mem = netmap_mem_get(na->nm_mem); 1766 hostna->na_private = bna; 1767 hostna->na_vp = &bna->up; 1768 na->na_hostvp = hwna->na_hostvp = 1769 hostna->na_hostvp = &bna->host; 1770 hostna->na_flags = NAF_BUSY; /* prevent NIOCREGIF */ 1771 hostna->rx_buf_maxsize = hwna->rx_buf_maxsize; 1772 /* bwrap_config() will determine the number of host rings */ 1773 } 1774 if (hwna->na_flags & NAF_MOREFRAG) 1775 na->na_flags |= NAF_MOREFRAG; 1776 1777 nm_prdis("%s<->%s txr %d txd %d rxr %d rxd %d", 1778 na->name, if_name(ifp), 1779 na->num_tx_rings, na->num_tx_desc, 1780 na->num_rx_rings, na->num_rx_desc); 1781 1782 error = netmap_attach_common(na); 1783 if (error) { 1784 goto err_put; 1785 } 1786 hwna->na_flags |= NAF_BUSY; 1787 return 0; 1788 1789 err_put: 1790 hwna->na_vp = hwna->na_hostvp = NULL; 1791 netmap_adapter_put(hwna); 1792 return error; 1793 1794 } 1795 1796 struct nm_bridge * 1797 netmap_init_bridges2(u_int n) 1798 { 1799 int i; 1800 struct nm_bridge *b; 1801 1802 b = nm_os_malloc(sizeof(struct nm_bridge) * n); 1803 if (b == NULL) 1804 return NULL; 1805 for (i = 0; i < n; i++) 1806 BDG_RWINIT(&b[i]); 1807 return b; 1808 } 1809 1810 void 1811 netmap_uninit_bridges2(struct nm_bridge *b, u_int n) 1812 { 1813 int i; 1814 1815 if (b == NULL) 1816 return; 1817 1818 for (i = 0; i < n; i++) 1819 BDG_RWDESTROY(&b[i]); 1820 nm_os_free(b); 1821 } 1822 1823 int 1824 netmap_init_bridges(void) 1825 { 1826 #ifdef CONFIG_NET_NS 1827 return netmap_bns_register(); 1828 #else 1829 nm_bridges = netmap_init_bridges2(vale_max_bridges); 1830 if (nm_bridges == NULL) 1831 return ENOMEM; 1832 return 0; 1833 #endif 1834 } 1835 1836 void 1837 netmap_uninit_bridges(void) 1838 { 1839 #ifdef CONFIG_NET_NS 1840 netmap_bns_unregister(); 1841 #else 1842 netmap_uninit_bridges2(nm_bridges, vale_max_bridges); 1843 #endif 1844 } 1845