1 /* 2 * Copyright (C) 2013-2016 Universita` di Pisa 3 * All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * 2. Redistributions in binary form must reproduce the above copyright 11 * notice, this list of conditions and the following disclaimer in the 12 * documentation and/or other materials provided with the distribution. 13 * 14 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 15 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 17 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 22 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 23 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 24 * SUCH DAMAGE. 25 */ 26 27 28 /* 29 * This module implements the VALE switch for netmap 30 31 --- VALE SWITCH --- 32 33 NMG_LOCK() serializes all modifications to switches and ports. 34 A switch cannot be deleted until all ports are gone. 35 36 For each switch, an SX lock (RWlock on linux) protects 37 deletion of ports. When configuring or deleting a new port, the 38 lock is acquired in exclusive mode (after holding NMG_LOCK). 39 When forwarding, the lock is acquired in shared mode (without NMG_LOCK). 40 The lock is held throughout the entire forwarding cycle, 41 during which the thread may incur in a page fault. 42 Hence it is important that sleepable shared locks are used. 43 44 On the rx ring, the per-port lock is grabbed initially to reserve 45 a number of slot in the ring, then the lock is released, 46 packets are copied from source to destination, and then 47 the lock is acquired again and the receive ring is updated. 48 (A similar thing is done on the tx ring for NIC and host stack 49 ports attached to the switch) 50 51 */ 52 53 /* 54 * OS-specific code that is used only within this file. 55 * Other OS-specific code that must be accessed by drivers 56 * is present in netmap_kern.h 57 */ 58 59 #if defined(__FreeBSD__) 60 #include <sys/cdefs.h> /* prerequisite */ 61 __FBSDID("$FreeBSD$"); 62 63 #include <sys/types.h> 64 #include <sys/errno.h> 65 #include <sys/param.h> /* defines used in kernel.h */ 66 #include <sys/kernel.h> /* types used in module initialization */ 67 #include <sys/conf.h> /* cdevsw struct, UID, GID */ 68 #include <sys/sockio.h> 69 #include <sys/socketvar.h> /* struct socket */ 70 #include <sys/malloc.h> 71 #include <sys/poll.h> 72 #include <sys/rwlock.h> 73 #include <sys/socket.h> /* sockaddrs */ 74 #include <sys/selinfo.h> 75 #include <sys/sysctl.h> 76 #include <net/if.h> 77 #include <net/if_var.h> 78 #include <net/bpf.h> /* BIOCIMMEDIATE */ 79 #include <machine/bus.h> /* bus_dmamap_* */ 80 #include <sys/endian.h> 81 #include <sys/refcount.h> 82 #include <sys/smp.h> 83 84 85 #elif defined(linux) 86 87 #include "bsd_glue.h" 88 89 #elif defined(__APPLE__) 90 91 #warning OSX support is only partial 92 #include "osx_glue.h" 93 94 #elif defined(_WIN32) 95 #include "win_glue.h" 96 97 #else 98 99 #error Unsupported platform 100 101 #endif /* unsupported */ 102 103 /* 104 * common headers 105 */ 106 107 #include <net/netmap.h> 108 #include <dev/netmap/netmap_kern.h> 109 #include <dev/netmap/netmap_mem2.h> 110 111 #include <dev/netmap/netmap_bdg.h> 112 113 const char* 114 netmap_bdg_name(struct netmap_vp_adapter *vp) 115 { 116 struct nm_bridge *b = vp->na_bdg; 117 if (b == NULL) 118 return NULL; 119 return b->bdg_basename; 120 } 121 122 123 #ifndef CONFIG_NET_NS 124 /* 125 * XXX in principle nm_bridges could be created dynamically 126 * Right now we have a static array and deletions are protected 127 * by an exclusive lock. 128 */ 129 static struct nm_bridge *nm_bridges; 130 #endif /* !CONFIG_NET_NS */ 131 132 133 static int 134 nm_is_id_char(const char c) 135 { 136 return (c >= 'a' && c <= 'z') || 137 (c >= 'A' && c <= 'Z') || 138 (c >= '0' && c <= '9') || 139 (c == '_'); 140 } 141 142 /* Validate the name of a VALE bridge port and return the 143 * position of the ":" character. */ 144 static int 145 nm_vale_name_validate(const char *name) 146 { 147 int colon_pos = -1; 148 int i; 149 150 if (!name || strlen(name) < strlen(NM_BDG_NAME)) { 151 return -1; 152 } 153 154 for (i = 0; i < NM_BDG_IFNAMSIZ && name[i]; i++) { 155 if (name[i] == ':') { 156 colon_pos = i; 157 break; 158 } else if (!nm_is_id_char(name[i])) { 159 return -1; 160 } 161 } 162 163 if (strlen(name) - colon_pos > IFNAMSIZ) { 164 /* interface name too long */ 165 return -1; 166 } 167 168 return colon_pos; 169 } 170 171 /* 172 * locate a bridge among the existing ones. 173 * MUST BE CALLED WITH NMG_LOCK() 174 * 175 * a ':' in the name terminates the bridge name. Otherwise, just NM_NAME. 176 * We assume that this is called with a name of at least NM_NAME chars. 177 */ 178 struct nm_bridge * 179 nm_find_bridge(const char *name, int create, struct netmap_bdg_ops *ops) 180 { 181 int i, namelen; 182 struct nm_bridge *b = NULL, *bridges; 183 u_int num_bridges; 184 185 NMG_LOCK_ASSERT(); 186 187 netmap_bns_getbridges(&bridges, &num_bridges); 188 189 namelen = nm_vale_name_validate(name); 190 if (namelen < 0) { 191 D("invalid bridge name %s", name ? name : NULL); 192 return NULL; 193 } 194 195 /* lookup the name, remember empty slot if there is one */ 196 for (i = 0; i < num_bridges; i++) { 197 struct nm_bridge *x = bridges + i; 198 199 if ((x->bdg_flags & NM_BDG_ACTIVE) + x->bdg_active_ports == 0) { 200 if (create && b == NULL) 201 b = x; /* record empty slot */ 202 } else if (x->bdg_namelen != namelen) { 203 continue; 204 } else if (strncmp(name, x->bdg_basename, namelen) == 0) { 205 ND("found '%.*s' at %d", namelen, name, i); 206 b = x; 207 break; 208 } 209 } 210 if (i == num_bridges && b) { /* name not found, can create entry */ 211 /* initialize the bridge */ 212 ND("create new bridge %s with ports %d", b->bdg_basename, 213 b->bdg_active_ports); 214 b->ht = nm_os_malloc(sizeof(struct nm_hash_ent) * NM_BDG_HASH); 215 if (b->ht == NULL) { 216 D("failed to allocate hash table"); 217 return NULL; 218 } 219 strncpy(b->bdg_basename, name, namelen); 220 b->bdg_namelen = namelen; 221 b->bdg_active_ports = 0; 222 for (i = 0; i < NM_BDG_MAXPORTS; i++) 223 b->bdg_port_index[i] = i; 224 /* set the default function */ 225 b->bdg_ops = ops; 226 b->private_data = b->ht; 227 b->bdg_flags = 0; 228 NM_BNS_GET(b); 229 } 230 return b; 231 } 232 233 234 int 235 netmap_bdg_free(struct nm_bridge *b) 236 { 237 if ((b->bdg_flags & NM_BDG_ACTIVE) + b->bdg_active_ports != 0) { 238 return EBUSY; 239 } 240 241 ND("marking bridge %s as free", b->bdg_basename); 242 nm_os_free(b->ht); 243 b->bdg_ops = NULL; 244 b->bdg_flags = 0; 245 NM_BNS_PUT(b); 246 return 0; 247 } 248 249 250 /* remove from bridge b the ports in slots hw and sw 251 * (sw can be -1 if not needed) 252 */ 253 void 254 netmap_bdg_detach_common(struct nm_bridge *b, int hw, int sw) 255 { 256 int s_hw = hw, s_sw = sw; 257 int i, lim =b->bdg_active_ports; 258 uint32_t *tmp = b->tmp_bdg_port_index; 259 260 /* 261 New algorithm: 262 make a copy of bdg_port_index; 263 lookup NA(ifp)->bdg_port and SWNA(ifp)->bdg_port 264 in the array of bdg_port_index, replacing them with 265 entries from the bottom of the array; 266 decrement bdg_active_ports; 267 acquire BDG_WLOCK() and copy back the array. 268 */ 269 270 if (netmap_verbose) 271 D("detach %d and %d (lim %d)", hw, sw, lim); 272 /* make a copy of the list of active ports, update it, 273 * and then copy back within BDG_WLOCK(). 274 */ 275 memcpy(b->tmp_bdg_port_index, b->bdg_port_index, sizeof(b->tmp_bdg_port_index)); 276 for (i = 0; (hw >= 0 || sw >= 0) && i < lim; ) { 277 if (hw >= 0 && tmp[i] == hw) { 278 ND("detach hw %d at %d", hw, i); 279 lim--; /* point to last active port */ 280 tmp[i] = tmp[lim]; /* swap with i */ 281 tmp[lim] = hw; /* now this is inactive */ 282 hw = -1; 283 } else if (sw >= 0 && tmp[i] == sw) { 284 ND("detach sw %d at %d", sw, i); 285 lim--; 286 tmp[i] = tmp[lim]; 287 tmp[lim] = sw; 288 sw = -1; 289 } else { 290 i++; 291 } 292 } 293 if (hw >= 0 || sw >= 0) { 294 D("XXX delete failed hw %d sw %d, should panic...", hw, sw); 295 } 296 297 BDG_WLOCK(b); 298 if (b->bdg_ops->dtor) 299 b->bdg_ops->dtor(b->bdg_ports[s_hw]); 300 b->bdg_ports[s_hw] = NULL; 301 if (s_sw >= 0) { 302 b->bdg_ports[s_sw] = NULL; 303 } 304 memcpy(b->bdg_port_index, b->tmp_bdg_port_index, sizeof(b->tmp_bdg_port_index)); 305 b->bdg_active_ports = lim; 306 BDG_WUNLOCK(b); 307 308 ND("now %d active ports", lim); 309 netmap_bdg_free(b); 310 } 311 312 313 /* nm_bdg_ctl callback for VALE ports */ 314 int 315 netmap_vp_bdg_ctl(struct nmreq_header *hdr, struct netmap_adapter *na) 316 { 317 struct netmap_vp_adapter *vpna = (struct netmap_vp_adapter *)na; 318 struct nm_bridge *b = vpna->na_bdg; 319 320 if (hdr->nr_reqtype == NETMAP_REQ_VALE_ATTACH) { 321 return 0; /* nothing to do */ 322 } 323 if (b) { 324 netmap_set_all_rings(na, 0 /* disable */); 325 netmap_bdg_detach_common(b, vpna->bdg_port, -1); 326 vpna->na_bdg = NULL; 327 netmap_set_all_rings(na, 1 /* enable */); 328 } 329 /* I have took reference just for attach */ 330 netmap_adapter_put(na); 331 return 0; 332 } 333 334 int 335 netmap_default_bdg_attach(const char *name, struct netmap_adapter *na, 336 struct nm_bridge *b) 337 { 338 return NM_NEED_BWRAP; 339 } 340 341 /* Try to get a reference to a netmap adapter attached to a VALE switch. 342 * If the adapter is found (or is created), this function returns 0, a 343 * non NULL pointer is returned into *na, and the caller holds a 344 * reference to the adapter. 345 * If an adapter is not found, then no reference is grabbed and the 346 * function returns an error code, or 0 if there is just a VALE prefix 347 * mismatch. Therefore the caller holds a reference when 348 * (*na != NULL && return == 0). 349 */ 350 int 351 netmap_get_bdg_na(struct nmreq_header *hdr, struct netmap_adapter **na, 352 struct netmap_mem_d *nmd, int create, struct netmap_bdg_ops *ops) 353 { 354 char *nr_name = hdr->nr_name; 355 const char *ifname; 356 struct ifnet *ifp = NULL; 357 int error = 0; 358 struct netmap_vp_adapter *vpna, *hostna = NULL; 359 struct nm_bridge *b; 360 uint32_t i, j; 361 uint32_t cand = NM_BDG_NOPORT, cand2 = NM_BDG_NOPORT; 362 int needed; 363 364 *na = NULL; /* default return value */ 365 366 /* first try to see if this is a bridge port. */ 367 NMG_LOCK_ASSERT(); 368 if (strncmp(nr_name, ops->name, strlen(ops->name) - 1)) { 369 return 0; /* no error, but no VALE prefix */ 370 } 371 372 b = nm_find_bridge(nr_name, create, ops); 373 if (b == NULL) { 374 ND("no bridges available for '%s'", nr_name); 375 return (create ? ENOMEM : ENXIO); 376 } 377 if (strlen(nr_name) < b->bdg_namelen) /* impossible */ 378 panic("x"); 379 380 /* Now we are sure that name starts with the bridge's name, 381 * lookup the port in the bridge. We need to scan the entire 382 * list. It is not important to hold a WLOCK on the bridge 383 * during the search because NMG_LOCK already guarantees 384 * that there are no other possible writers. 385 */ 386 387 /* lookup in the local list of ports */ 388 for (j = 0; j < b->bdg_active_ports; j++) { 389 i = b->bdg_port_index[j]; 390 vpna = b->bdg_ports[i]; 391 ND("checking %s", vpna->up.name); 392 if (!strcmp(vpna->up.name, nr_name)) { 393 netmap_adapter_get(&vpna->up); 394 ND("found existing if %s refs %d", nr_name) 395 *na = &vpna->up; 396 return 0; 397 } 398 } 399 /* not found, should we create it? */ 400 if (!create) 401 return ENXIO; 402 /* yes we should, see if we have space to attach entries */ 403 needed = 2; /* in some cases we only need 1 */ 404 if (b->bdg_active_ports + needed >= NM_BDG_MAXPORTS) { 405 D("bridge full %d, cannot create new port", b->bdg_active_ports); 406 return ENOMEM; 407 } 408 /* record the next two ports available, but do not allocate yet */ 409 cand = b->bdg_port_index[b->bdg_active_ports]; 410 cand2 = b->bdg_port_index[b->bdg_active_ports + 1]; 411 ND("+++ bridge %s port %s used %d avail %d %d", 412 b->bdg_basename, ifname, b->bdg_active_ports, cand, cand2); 413 414 /* 415 * try see if there is a matching NIC with this name 416 * (after the bridge's name) 417 */ 418 ifname = nr_name + b->bdg_namelen + 1; 419 ifp = ifunit_ref(ifname); 420 if (!ifp) { 421 /* Create an ephemeral virtual port. 422 * This block contains all the ephemeral-specific logic. 423 */ 424 425 if (hdr->nr_reqtype != NETMAP_REQ_REGISTER) { 426 error = EINVAL; 427 goto out; 428 } 429 430 /* bdg_netmap_attach creates a struct netmap_adapter */ 431 error = b->bdg_ops->vp_create(hdr, NULL, nmd, &vpna); 432 if (error) { 433 D("error %d", error); 434 goto out; 435 } 436 /* shortcut - we can skip get_hw_na(), 437 * ownership check and nm_bdg_attach() 438 */ 439 440 } else { 441 struct netmap_adapter *hw; 442 443 /* the vale:nic syntax is only valid for some commands */ 444 switch (hdr->nr_reqtype) { 445 case NETMAP_REQ_VALE_ATTACH: 446 case NETMAP_REQ_VALE_DETACH: 447 case NETMAP_REQ_VALE_POLLING_ENABLE: 448 case NETMAP_REQ_VALE_POLLING_DISABLE: 449 break; /* ok */ 450 default: 451 error = EINVAL; 452 goto out; 453 } 454 455 error = netmap_get_hw_na(ifp, nmd, &hw); 456 if (error || hw == NULL) 457 goto out; 458 459 /* host adapter might not be created */ 460 error = hw->nm_bdg_attach(nr_name, hw, b); 461 if (error == NM_NEED_BWRAP) { 462 error = b->bdg_ops->bwrap_attach(nr_name, hw); 463 } 464 if (error) 465 goto out; 466 vpna = hw->na_vp; 467 hostna = hw->na_hostvp; 468 if (hdr->nr_reqtype == NETMAP_REQ_VALE_ATTACH) { 469 /* Check if we need to skip the host rings. */ 470 struct nmreq_vale_attach *areq = 471 (struct nmreq_vale_attach *)(uintptr_t)hdr->nr_body; 472 if (areq->reg.nr_mode != NR_REG_NIC_SW) { 473 hostna = NULL; 474 } 475 } 476 } 477 478 BDG_WLOCK(b); 479 vpna->bdg_port = cand; 480 ND("NIC %p to bridge port %d", vpna, cand); 481 /* bind the port to the bridge (virtual ports are not active) */ 482 b->bdg_ports[cand] = vpna; 483 vpna->na_bdg = b; 484 b->bdg_active_ports++; 485 if (hostna != NULL) { 486 /* also bind the host stack to the bridge */ 487 b->bdg_ports[cand2] = hostna; 488 hostna->bdg_port = cand2; 489 hostna->na_bdg = b; 490 b->bdg_active_ports++; 491 ND("host %p to bridge port %d", hostna, cand2); 492 } 493 ND("if %s refs %d", ifname, vpna->up.na_refcount); 494 BDG_WUNLOCK(b); 495 *na = &vpna->up; 496 netmap_adapter_get(*na); 497 498 out: 499 if (ifp) 500 if_rele(ifp); 501 502 return error; 503 } 504 505 /* Process NETMAP_REQ_VALE_ATTACH. 506 */ 507 int 508 nm_bdg_ctl_attach(struct nmreq_header *hdr, void *auth_token) 509 { 510 struct nmreq_vale_attach *req = 511 (struct nmreq_vale_attach *)(uintptr_t)hdr->nr_body; 512 struct netmap_vp_adapter * vpna; 513 struct netmap_adapter *na = NULL; 514 struct netmap_mem_d *nmd = NULL; 515 struct nm_bridge *b = NULL; 516 int error; 517 518 NMG_LOCK(); 519 /* permission check for modified bridges */ 520 b = nm_find_bridge(hdr->nr_name, 0 /* don't create */, NULL); 521 if (b && !nm_bdg_valid_auth_token(b, auth_token)) { 522 error = EACCES; 523 goto unlock_exit; 524 } 525 526 if (req->reg.nr_mem_id) { 527 nmd = netmap_mem_find(req->reg.nr_mem_id); 528 if (nmd == NULL) { 529 error = EINVAL; 530 goto unlock_exit; 531 } 532 } 533 534 /* check for existing one */ 535 error = netmap_get_vale_na(hdr, &na, nmd, 0); 536 if (na) { 537 error = EBUSY; 538 goto unref_exit; 539 } 540 error = netmap_get_vale_na(hdr, &na, 541 nmd, 1 /* create if not exists */); 542 if (error) { /* no device */ 543 goto unlock_exit; 544 } 545 546 if (na == NULL) { /* VALE prefix missing */ 547 error = EINVAL; 548 goto unlock_exit; 549 } 550 551 if (NETMAP_OWNED_BY_ANY(na)) { 552 error = EBUSY; 553 goto unref_exit; 554 } 555 556 if (na->nm_bdg_ctl) { 557 /* nop for VALE ports. The bwrap needs to put the hwna 558 * in netmap mode (see netmap_bwrap_bdg_ctl) 559 */ 560 error = na->nm_bdg_ctl(hdr, na); 561 if (error) 562 goto unref_exit; 563 ND("registered %s to netmap-mode", na->name); 564 } 565 vpna = (struct netmap_vp_adapter *)na; 566 req->port_index = vpna->bdg_port; 567 NMG_UNLOCK(); 568 return 0; 569 570 unref_exit: 571 netmap_adapter_put(na); 572 unlock_exit: 573 NMG_UNLOCK(); 574 return error; 575 } 576 577 static inline int 578 nm_is_bwrap(struct netmap_adapter *na) 579 { 580 return na->nm_register == netmap_bwrap_reg; 581 } 582 583 /* Process NETMAP_REQ_VALE_DETACH. 584 */ 585 int 586 nm_bdg_ctl_detach(struct nmreq_header *hdr, void *auth_token) 587 { 588 struct nmreq_vale_detach *nmreq_det = (void *)(uintptr_t)hdr->nr_body; 589 struct netmap_vp_adapter *vpna; 590 struct netmap_adapter *na; 591 struct nm_bridge *b = NULL; 592 int error; 593 594 NMG_LOCK(); 595 /* permission check for modified bridges */ 596 b = nm_find_bridge(hdr->nr_name, 0 /* don't create */, NULL); 597 if (b && !nm_bdg_valid_auth_token(b, auth_token)) { 598 error = EACCES; 599 goto unlock_exit; 600 } 601 602 error = netmap_get_vale_na(hdr, &na, NULL, 0 /* don't create */); 603 if (error) { /* no device, or another bridge or user owns the device */ 604 goto unlock_exit; 605 } 606 607 if (na == NULL) { /* VALE prefix missing */ 608 error = EINVAL; 609 goto unlock_exit; 610 } else if (nm_is_bwrap(na) && 611 ((struct netmap_bwrap_adapter *)na)->na_polling_state) { 612 /* Don't detach a NIC with polling */ 613 error = EBUSY; 614 goto unref_exit; 615 } 616 617 vpna = (struct netmap_vp_adapter *)na; 618 if (na->na_vp != vpna) { 619 /* trying to detach first attach of VALE persistent port attached 620 * to 2 bridges 621 */ 622 error = EBUSY; 623 goto unref_exit; 624 } 625 nmreq_det->port_index = vpna->bdg_port; 626 627 if (na->nm_bdg_ctl) { 628 /* remove the port from bridge. The bwrap 629 * also needs to put the hwna in normal mode 630 */ 631 error = na->nm_bdg_ctl(hdr, na); 632 } 633 634 unref_exit: 635 netmap_adapter_put(na); 636 unlock_exit: 637 NMG_UNLOCK(); 638 return error; 639 640 } 641 642 struct nm_bdg_polling_state; 643 struct 644 nm_bdg_kthread { 645 struct nm_kctx *nmk; 646 u_int qfirst; 647 u_int qlast; 648 struct nm_bdg_polling_state *bps; 649 }; 650 651 struct nm_bdg_polling_state { 652 bool configured; 653 bool stopped; 654 struct netmap_bwrap_adapter *bna; 655 uint32_t mode; 656 u_int qfirst; 657 u_int qlast; 658 u_int cpu_from; 659 u_int ncpus; 660 struct nm_bdg_kthread *kthreads; 661 }; 662 663 static void 664 netmap_bwrap_polling(void *data, int is_kthread) 665 { 666 struct nm_bdg_kthread *nbk = data; 667 struct netmap_bwrap_adapter *bna; 668 u_int qfirst, qlast, i; 669 struct netmap_kring **kring0, *kring; 670 671 if (!nbk) 672 return; 673 qfirst = nbk->qfirst; 674 qlast = nbk->qlast; 675 bna = nbk->bps->bna; 676 kring0 = NMR(bna->hwna, NR_RX); 677 678 for (i = qfirst; i < qlast; i++) { 679 kring = kring0[i]; 680 kring->nm_notify(kring, 0); 681 } 682 } 683 684 static int 685 nm_bdg_create_kthreads(struct nm_bdg_polling_state *bps) 686 { 687 struct nm_kctx_cfg kcfg; 688 int i, j; 689 690 bps->kthreads = nm_os_malloc(sizeof(struct nm_bdg_kthread) * bps->ncpus); 691 if (bps->kthreads == NULL) 692 return ENOMEM; 693 694 bzero(&kcfg, sizeof(kcfg)); 695 kcfg.worker_fn = netmap_bwrap_polling; 696 kcfg.use_kthread = 1; 697 for (i = 0; i < bps->ncpus; i++) { 698 struct nm_bdg_kthread *t = bps->kthreads + i; 699 int all = (bps->ncpus == 1 && 700 bps->mode == NETMAP_POLLING_MODE_SINGLE_CPU); 701 int affinity = bps->cpu_from + i; 702 703 t->bps = bps; 704 t->qfirst = all ? bps->qfirst /* must be 0 */: affinity; 705 t->qlast = all ? bps->qlast : t->qfirst + 1; 706 D("kthread %d a:%u qf:%u ql:%u", i, affinity, t->qfirst, 707 t->qlast); 708 709 kcfg.type = i; 710 kcfg.worker_private = t; 711 t->nmk = nm_os_kctx_create(&kcfg, NULL); 712 if (t->nmk == NULL) { 713 goto cleanup; 714 } 715 nm_os_kctx_worker_setaff(t->nmk, affinity); 716 } 717 return 0; 718 719 cleanup: 720 for (j = 0; j < i; j++) { 721 struct nm_bdg_kthread *t = bps->kthreads + i; 722 nm_os_kctx_destroy(t->nmk); 723 } 724 nm_os_free(bps->kthreads); 725 return EFAULT; 726 } 727 728 /* A variant of ptnetmap_start_kthreads() */ 729 static int 730 nm_bdg_polling_start_kthreads(struct nm_bdg_polling_state *bps) 731 { 732 int error, i, j; 733 734 if (!bps) { 735 D("polling is not configured"); 736 return EFAULT; 737 } 738 bps->stopped = false; 739 740 for (i = 0; i < bps->ncpus; i++) { 741 struct nm_bdg_kthread *t = bps->kthreads + i; 742 error = nm_os_kctx_worker_start(t->nmk); 743 if (error) { 744 D("error in nm_kthread_start()"); 745 goto cleanup; 746 } 747 } 748 return 0; 749 750 cleanup: 751 for (j = 0; j < i; j++) { 752 struct nm_bdg_kthread *t = bps->kthreads + i; 753 nm_os_kctx_worker_stop(t->nmk); 754 } 755 bps->stopped = true; 756 return error; 757 } 758 759 static void 760 nm_bdg_polling_stop_delete_kthreads(struct nm_bdg_polling_state *bps) 761 { 762 int i; 763 764 if (!bps) 765 return; 766 767 for (i = 0; i < bps->ncpus; i++) { 768 struct nm_bdg_kthread *t = bps->kthreads + i; 769 nm_os_kctx_worker_stop(t->nmk); 770 nm_os_kctx_destroy(t->nmk); 771 } 772 bps->stopped = true; 773 } 774 775 static int 776 get_polling_cfg(struct nmreq_vale_polling *req, struct netmap_adapter *na, 777 struct nm_bdg_polling_state *bps) 778 { 779 unsigned int avail_cpus, core_from; 780 unsigned int qfirst, qlast; 781 uint32_t i = req->nr_first_cpu_id; 782 uint32_t req_cpus = req->nr_num_polling_cpus; 783 784 avail_cpus = nm_os_ncpus(); 785 786 if (req_cpus == 0) { 787 D("req_cpus must be > 0"); 788 return EINVAL; 789 } else if (req_cpus >= avail_cpus) { 790 D("Cannot use all the CPUs in the system"); 791 return EINVAL; 792 } 793 794 if (req->nr_mode == NETMAP_POLLING_MODE_MULTI_CPU) { 795 /* Use a separate core for each ring. If nr_num_polling_cpus>1 796 * more consecutive rings are polled. 797 * For example, if nr_first_cpu_id=2 and nr_num_polling_cpus=2, 798 * ring 2 and 3 are polled by core 2 and 3, respectively. */ 799 if (i + req_cpus > nma_get_nrings(na, NR_RX)) { 800 D("Rings %u-%u not in range (have %d rings)", 801 i, i + req_cpus, nma_get_nrings(na, NR_RX)); 802 return EINVAL; 803 } 804 qfirst = i; 805 qlast = qfirst + req_cpus; 806 core_from = qfirst; 807 808 } else if (req->nr_mode == NETMAP_POLLING_MODE_SINGLE_CPU) { 809 /* Poll all the rings using a core specified by nr_first_cpu_id. 810 * the number of cores must be 1. */ 811 if (req_cpus != 1) { 812 D("ncpus must be 1 for NETMAP_POLLING_MODE_SINGLE_CPU " 813 "(was %d)", req_cpus); 814 return EINVAL; 815 } 816 qfirst = 0; 817 qlast = nma_get_nrings(na, NR_RX); 818 core_from = i; 819 } else { 820 D("Invalid polling mode"); 821 return EINVAL; 822 } 823 824 bps->mode = req->nr_mode; 825 bps->qfirst = qfirst; 826 bps->qlast = qlast; 827 bps->cpu_from = core_from; 828 bps->ncpus = req_cpus; 829 D("%s qfirst %u qlast %u cpu_from %u ncpus %u", 830 req->nr_mode == NETMAP_POLLING_MODE_MULTI_CPU ? 831 "MULTI" : "SINGLE", 832 qfirst, qlast, core_from, req_cpus); 833 return 0; 834 } 835 836 static int 837 nm_bdg_ctl_polling_start(struct nmreq_vale_polling *req, struct netmap_adapter *na) 838 { 839 struct nm_bdg_polling_state *bps; 840 struct netmap_bwrap_adapter *bna; 841 int error; 842 843 bna = (struct netmap_bwrap_adapter *)na; 844 if (bna->na_polling_state) { 845 D("ERROR adapter already in polling mode"); 846 return EFAULT; 847 } 848 849 bps = nm_os_malloc(sizeof(*bps)); 850 if (!bps) 851 return ENOMEM; 852 bps->configured = false; 853 bps->stopped = true; 854 855 if (get_polling_cfg(req, na, bps)) { 856 nm_os_free(bps); 857 return EINVAL; 858 } 859 860 if (nm_bdg_create_kthreads(bps)) { 861 nm_os_free(bps); 862 return EFAULT; 863 } 864 865 bps->configured = true; 866 bna->na_polling_state = bps; 867 bps->bna = bna; 868 869 /* disable interrupts if possible */ 870 nma_intr_enable(bna->hwna, 0); 871 /* start kthread now */ 872 error = nm_bdg_polling_start_kthreads(bps); 873 if (error) { 874 D("ERROR nm_bdg_polling_start_kthread()"); 875 nm_os_free(bps->kthreads); 876 nm_os_free(bps); 877 bna->na_polling_state = NULL; 878 nma_intr_enable(bna->hwna, 1); 879 } 880 return error; 881 } 882 883 static int 884 nm_bdg_ctl_polling_stop(struct netmap_adapter *na) 885 { 886 struct netmap_bwrap_adapter *bna = (struct netmap_bwrap_adapter *)na; 887 struct nm_bdg_polling_state *bps; 888 889 if (!bna->na_polling_state) { 890 D("ERROR adapter is not in polling mode"); 891 return EFAULT; 892 } 893 bps = bna->na_polling_state; 894 nm_bdg_polling_stop_delete_kthreads(bna->na_polling_state); 895 bps->configured = false; 896 nm_os_free(bps); 897 bna->na_polling_state = NULL; 898 /* reenable interrupts */ 899 nma_intr_enable(bna->hwna, 1); 900 return 0; 901 } 902 903 int 904 nm_bdg_polling(struct nmreq_header *hdr) 905 { 906 struct nmreq_vale_polling *req = 907 (struct nmreq_vale_polling *)(uintptr_t)hdr->nr_body; 908 struct netmap_adapter *na = NULL; 909 int error = 0; 910 911 NMG_LOCK(); 912 error = netmap_get_vale_na(hdr, &na, NULL, /*create=*/0); 913 if (na && !error) { 914 if (!nm_is_bwrap(na)) { 915 error = EOPNOTSUPP; 916 } else if (hdr->nr_reqtype == NETMAP_BDG_POLLING_ON) { 917 error = nm_bdg_ctl_polling_start(req, na); 918 if (!error) 919 netmap_adapter_get(na); 920 } else { 921 error = nm_bdg_ctl_polling_stop(na); 922 if (!error) 923 netmap_adapter_put(na); 924 } 925 netmap_adapter_put(na); 926 } else if (!na && !error) { 927 /* Not VALE port. */ 928 error = EINVAL; 929 } 930 NMG_UNLOCK(); 931 932 return error; 933 } 934 935 /* Process NETMAP_REQ_VALE_LIST. */ 936 int 937 netmap_bdg_list(struct nmreq_header *hdr) 938 { 939 struct nmreq_vale_list *req = 940 (struct nmreq_vale_list *)(uintptr_t)hdr->nr_body; 941 int namelen = strlen(hdr->nr_name); 942 struct nm_bridge *b, *bridges; 943 struct netmap_vp_adapter *vpna; 944 int error = 0, i, j; 945 u_int num_bridges; 946 947 netmap_bns_getbridges(&bridges, &num_bridges); 948 949 /* this is used to enumerate bridges and ports */ 950 if (namelen) { /* look up indexes of bridge and port */ 951 if (strncmp(hdr->nr_name, NM_BDG_NAME, 952 strlen(NM_BDG_NAME))) { 953 return EINVAL; 954 } 955 NMG_LOCK(); 956 b = nm_find_bridge(hdr->nr_name, 0 /* don't create */, NULL); 957 if (!b) { 958 NMG_UNLOCK(); 959 return ENOENT; 960 } 961 962 req->nr_bridge_idx = b - bridges; /* bridge index */ 963 req->nr_port_idx = NM_BDG_NOPORT; 964 for (j = 0; j < b->bdg_active_ports; j++) { 965 i = b->bdg_port_index[j]; 966 vpna = b->bdg_ports[i]; 967 if (vpna == NULL) { 968 D("This should not happen"); 969 continue; 970 } 971 /* the former and the latter identify a 972 * virtual port and a NIC, respectively 973 */ 974 if (!strcmp(vpna->up.name, hdr->nr_name)) { 975 req->nr_port_idx = i; /* port index */ 976 break; 977 } 978 } 979 NMG_UNLOCK(); 980 } else { 981 /* return the first non-empty entry starting from 982 * bridge nr_arg1 and port nr_arg2. 983 * 984 * Users can detect the end of the same bridge by 985 * seeing the new and old value of nr_arg1, and can 986 * detect the end of all the bridge by error != 0 987 */ 988 i = req->nr_bridge_idx; 989 j = req->nr_port_idx; 990 991 NMG_LOCK(); 992 for (error = ENOENT; i < NM_BRIDGES; i++) { 993 b = bridges + i; 994 for ( ; j < NM_BDG_MAXPORTS; j++) { 995 if (b->bdg_ports[j] == NULL) 996 continue; 997 vpna = b->bdg_ports[j]; 998 /* write back the VALE switch name */ 999 strncpy(hdr->nr_name, vpna->up.name, 1000 (size_t)IFNAMSIZ); 1001 error = 0; 1002 goto out; 1003 } 1004 j = 0; /* following bridges scan from 0 */ 1005 } 1006 out: 1007 req->nr_bridge_idx = i; 1008 req->nr_port_idx = j; 1009 NMG_UNLOCK(); 1010 } 1011 1012 return error; 1013 } 1014 1015 /* Called by external kernel modules (e.g., Openvswitch). 1016 * to set configure/lookup/dtor functions of a VALE instance. 1017 * Register callbacks to the given bridge. 'name' may be just 1018 * bridge's name (including ':' if it is not just NM_BDG_NAME). 1019 * 1020 * Called without NMG_LOCK. 1021 */ 1022 1023 int 1024 netmap_bdg_regops(const char *name, struct netmap_bdg_ops *bdg_ops, void *private_data, void *auth_token) 1025 { 1026 struct nm_bridge *b; 1027 int error = 0; 1028 1029 NMG_LOCK(); 1030 b = nm_find_bridge(name, 0 /* don't create */, NULL); 1031 if (!b) { 1032 error = ENXIO; 1033 goto unlock_regops; 1034 } 1035 if (!nm_bdg_valid_auth_token(b, auth_token)) { 1036 error = EACCES; 1037 goto unlock_regops; 1038 } 1039 1040 BDG_WLOCK(b); 1041 if (!bdg_ops) { 1042 /* resetting the bridge */ 1043 bzero(b->ht, sizeof(struct nm_hash_ent) * NM_BDG_HASH); 1044 b->bdg_ops = NULL; 1045 b->private_data = b->ht; 1046 } else { 1047 /* modifying the bridge */ 1048 b->private_data = private_data; 1049 b->bdg_ops = bdg_ops; 1050 } 1051 BDG_WUNLOCK(b); 1052 1053 unlock_regops: 1054 NMG_UNLOCK(); 1055 return error; 1056 } 1057 1058 1059 int 1060 netmap_bdg_config(struct nm_ifreq *nr) 1061 { 1062 struct nm_bridge *b; 1063 int error = EINVAL; 1064 1065 NMG_LOCK(); 1066 b = nm_find_bridge(nr->nifr_name, 0, NULL); 1067 if (!b) { 1068 NMG_UNLOCK(); 1069 return error; 1070 } 1071 NMG_UNLOCK(); 1072 /* Don't call config() with NMG_LOCK() held */ 1073 BDG_RLOCK(b); 1074 if (b->bdg_ops->config != NULL) 1075 error = b->bdg_ops->config(nr); 1076 BDG_RUNLOCK(b); 1077 return error; 1078 } 1079 1080 1081 /* nm_register callback for VALE ports */ 1082 int 1083 netmap_vp_reg(struct netmap_adapter *na, int onoff) 1084 { 1085 struct netmap_vp_adapter *vpna = 1086 (struct netmap_vp_adapter*)na; 1087 enum txrx t; 1088 int i; 1089 1090 /* persistent ports may be put in netmap mode 1091 * before being attached to a bridge 1092 */ 1093 if (vpna->na_bdg) 1094 BDG_WLOCK(vpna->na_bdg); 1095 if (onoff) { 1096 for_rx_tx(t) { 1097 for (i = 0; i < netmap_real_rings(na, t); i++) { 1098 struct netmap_kring *kring = NMR(na, t)[i]; 1099 1100 if (nm_kring_pending_on(kring)) 1101 kring->nr_mode = NKR_NETMAP_ON; 1102 } 1103 } 1104 if (na->active_fds == 0) 1105 na->na_flags |= NAF_NETMAP_ON; 1106 /* XXX on FreeBSD, persistent VALE ports should also 1107 * toggle IFCAP_NETMAP in na->ifp (2014-03-16) 1108 */ 1109 } else { 1110 if (na->active_fds == 0) 1111 na->na_flags &= ~NAF_NETMAP_ON; 1112 for_rx_tx(t) { 1113 for (i = 0; i < netmap_real_rings(na, t); i++) { 1114 struct netmap_kring *kring = NMR(na, t)[i]; 1115 1116 if (nm_kring_pending_off(kring)) 1117 kring->nr_mode = NKR_NETMAP_OFF; 1118 } 1119 } 1120 } 1121 if (vpna->na_bdg) 1122 BDG_WUNLOCK(vpna->na_bdg); 1123 return 0; 1124 } 1125 1126 1127 /* rxsync code used by VALE ports nm_rxsync callback and also 1128 * internally by the brwap 1129 */ 1130 static int 1131 netmap_vp_rxsync_locked(struct netmap_kring *kring, int flags) 1132 { 1133 struct netmap_adapter *na = kring->na; 1134 struct netmap_ring *ring = kring->ring; 1135 u_int nm_i, lim = kring->nkr_num_slots - 1; 1136 u_int head = kring->rhead; 1137 int n; 1138 1139 if (head > lim) { 1140 D("ouch dangerous reset!!!"); 1141 n = netmap_ring_reinit(kring); 1142 goto done; 1143 } 1144 1145 /* First part, import newly received packets. */ 1146 /* actually nothing to do here, they are already in the kring */ 1147 1148 /* Second part, skip past packets that userspace has released. */ 1149 nm_i = kring->nr_hwcur; 1150 if (nm_i != head) { 1151 /* consistency check, but nothing really important here */ 1152 for (n = 0; likely(nm_i != head); n++) { 1153 struct netmap_slot *slot = &ring->slot[nm_i]; 1154 void *addr = NMB(na, slot); 1155 1156 if (addr == NETMAP_BUF_BASE(kring->na)) { /* bad buf */ 1157 D("bad buffer index %d, ignore ?", 1158 slot->buf_idx); 1159 } 1160 slot->flags &= ~NS_BUF_CHANGED; 1161 nm_i = nm_next(nm_i, lim); 1162 } 1163 kring->nr_hwcur = head; 1164 } 1165 1166 n = 0; 1167 done: 1168 return n; 1169 } 1170 1171 /* 1172 * nm_rxsync callback for VALE ports 1173 * user process reading from a VALE switch. 1174 * Already protected against concurrent calls from userspace, 1175 * but we must acquire the queue's lock to protect against 1176 * writers on the same queue. 1177 */ 1178 int 1179 netmap_vp_rxsync(struct netmap_kring *kring, int flags) 1180 { 1181 int n; 1182 1183 mtx_lock(&kring->q_lock); 1184 n = netmap_vp_rxsync_locked(kring, flags); 1185 mtx_unlock(&kring->q_lock); 1186 return n; 1187 } 1188 1189 int 1190 netmap_bwrap_attach(const char *nr_name, struct netmap_adapter *hwna, 1191 struct netmap_bdg_ops *ops) 1192 { 1193 return ops->bwrap_attach(nr_name, hwna); 1194 } 1195 1196 1197 /* Bridge wrapper code (bwrap). 1198 * This is used to connect a non-VALE-port netmap_adapter (hwna) to a 1199 * VALE switch. 1200 * The main task is to swap the meaning of tx and rx rings to match the 1201 * expectations of the VALE switch code (see nm_bdg_flush). 1202 * 1203 * The bwrap works by interposing a netmap_bwrap_adapter between the 1204 * rest of the system and the hwna. The netmap_bwrap_adapter looks like 1205 * a netmap_vp_adapter to the rest the system, but, internally, it 1206 * translates all callbacks to what the hwna expects. 1207 * 1208 * Note that we have to intercept callbacks coming from two sides: 1209 * 1210 * - callbacks coming from the netmap module are intercepted by 1211 * passing around the netmap_bwrap_adapter instead of the hwna 1212 * 1213 * - callbacks coming from outside of the netmap module only know 1214 * about the hwna. This, however, only happens in interrupt 1215 * handlers, where only the hwna->nm_notify callback is called. 1216 * What the bwrap does is to overwrite the hwna->nm_notify callback 1217 * with its own netmap_bwrap_intr_notify. 1218 * XXX This assumes that the hwna->nm_notify callback was the 1219 * standard netmap_notify(), as it is the case for nic adapters. 1220 * Any additional action performed by hwna->nm_notify will not be 1221 * performed by netmap_bwrap_intr_notify. 1222 * 1223 * Additionally, the bwrap can optionally attach the host rings pair 1224 * of the wrapped adapter to a different port of the switch. 1225 */ 1226 1227 1228 static void 1229 netmap_bwrap_dtor(struct netmap_adapter *na) 1230 { 1231 struct netmap_bwrap_adapter *bna = (struct netmap_bwrap_adapter*)na; 1232 struct netmap_adapter *hwna = bna->hwna; 1233 struct nm_bridge *b = bna->up.na_bdg, 1234 *bh = bna->host.na_bdg; 1235 1236 if (bna->host.up.nm_mem) 1237 netmap_mem_put(bna->host.up.nm_mem); 1238 1239 if (b) { 1240 netmap_bdg_detach_common(b, bna->up.bdg_port, 1241 (bh ? bna->host.bdg_port : -1)); 1242 } 1243 1244 ND("na %p", na); 1245 na->ifp = NULL; 1246 bna->host.up.ifp = NULL; 1247 hwna->na_vp = bna->saved_na_vp; 1248 hwna->na_hostvp = NULL; 1249 hwna->na_private = NULL; 1250 hwna->na_flags &= ~NAF_BUSY; 1251 netmap_adapter_put(hwna); 1252 1253 } 1254 1255 1256 /* 1257 * Intr callback for NICs connected to a bridge. 1258 * Simply ignore tx interrupts (maybe we could try to recover space ?) 1259 * and pass received packets from nic to the bridge. 1260 * 1261 * XXX TODO check locking: this is called from the interrupt 1262 * handler so we should make sure that the interface is not 1263 * disconnected while passing down an interrupt. 1264 * 1265 * Note, no user process can access this NIC or the host stack. 1266 * The only part of the ring that is significant are the slots, 1267 * and head/cur/tail are set from the kring as needed 1268 * (part as a receive ring, part as a transmit ring). 1269 * 1270 * callback that overwrites the hwna notify callback. 1271 * Packets come from the outside or from the host stack and are put on an 1272 * hwna rx ring. 1273 * The bridge wrapper then sends the packets through the bridge. 1274 */ 1275 static int 1276 netmap_bwrap_intr_notify(struct netmap_kring *kring, int flags) 1277 { 1278 struct netmap_adapter *na = kring->na; 1279 struct netmap_bwrap_adapter *bna = na->na_private; 1280 struct netmap_kring *bkring; 1281 struct netmap_vp_adapter *vpna = &bna->up; 1282 u_int ring_nr = kring->ring_id; 1283 int ret = NM_IRQ_COMPLETED; 1284 int error; 1285 1286 if (netmap_verbose) 1287 D("%s %s 0x%x", na->name, kring->name, flags); 1288 1289 bkring = vpna->up.tx_rings[ring_nr]; 1290 1291 /* make sure the ring is not disabled */ 1292 if (nm_kr_tryget(kring, 0 /* can't sleep */, NULL)) { 1293 return EIO; 1294 } 1295 1296 if (netmap_verbose) 1297 D("%s head %d cur %d tail %d", na->name, 1298 kring->rhead, kring->rcur, kring->rtail); 1299 1300 /* simulate a user wakeup on the rx ring 1301 * fetch packets that have arrived. 1302 */ 1303 error = kring->nm_sync(kring, 0); 1304 if (error) 1305 goto put_out; 1306 if (kring->nr_hwcur == kring->nr_hwtail) { 1307 if (netmap_verbose) 1308 D("how strange, interrupt with no packets on %s", 1309 na->name); 1310 goto put_out; 1311 } 1312 1313 /* new packets are kring->rcur to kring->nr_hwtail, and the bkring 1314 * had hwcur == bkring->rhead. So advance bkring->rhead to kring->nr_hwtail 1315 * to push all packets out. 1316 */ 1317 bkring->rhead = bkring->rcur = kring->nr_hwtail; 1318 1319 bkring->nm_sync(bkring, flags); 1320 1321 /* mark all buffers as released on this ring */ 1322 kring->rhead = kring->rcur = kring->rtail = kring->nr_hwtail; 1323 /* another call to actually release the buffers */ 1324 error = kring->nm_sync(kring, 0); 1325 1326 /* The second rxsync may have further advanced hwtail. If this happens, 1327 * return NM_IRQ_RESCHED, otherwise just return NM_IRQ_COMPLETED. */ 1328 if (kring->rcur != kring->nr_hwtail) { 1329 ret = NM_IRQ_RESCHED; 1330 } 1331 put_out: 1332 nm_kr_put(kring); 1333 1334 return error ? error : ret; 1335 } 1336 1337 1338 /* nm_register callback for bwrap */ 1339 int 1340 netmap_bwrap_reg(struct netmap_adapter *na, int onoff) 1341 { 1342 struct netmap_bwrap_adapter *bna = 1343 (struct netmap_bwrap_adapter *)na; 1344 struct netmap_adapter *hwna = bna->hwna; 1345 struct netmap_vp_adapter *hostna = &bna->host; 1346 int error, i; 1347 enum txrx t; 1348 1349 ND("%s %s", na->name, onoff ? "on" : "off"); 1350 1351 if (onoff) { 1352 /* netmap_do_regif has been called on the bwrap na. 1353 * We need to pass the information about the 1354 * memory allocator down to the hwna before 1355 * putting it in netmap mode 1356 */ 1357 hwna->na_lut = na->na_lut; 1358 1359 if (hostna->na_bdg) { 1360 /* if the host rings have been attached to switch, 1361 * we need to copy the memory allocator information 1362 * in the hostna also 1363 */ 1364 hostna->up.na_lut = na->na_lut; 1365 } 1366 1367 } 1368 1369 /* pass down the pending ring state information */ 1370 for_rx_tx(t) { 1371 for (i = 0; i < netmap_all_rings(na, t); i++) { 1372 NMR(hwna, nm_txrx_swap(t))[i]->nr_pending_mode = 1373 NMR(na, t)[i]->nr_pending_mode; 1374 } 1375 } 1376 1377 /* forward the request to the hwna */ 1378 error = hwna->nm_register(hwna, onoff); 1379 if (error) 1380 return error; 1381 1382 /* copy up the current ring state information */ 1383 for_rx_tx(t) { 1384 for (i = 0; i < netmap_all_rings(na, t); i++) { 1385 struct netmap_kring *kring = NMR(hwna, nm_txrx_swap(t))[i]; 1386 NMR(na, t)[i]->nr_mode = kring->nr_mode; 1387 } 1388 } 1389 1390 /* impersonate a netmap_vp_adapter */ 1391 netmap_vp_reg(na, onoff); 1392 if (hostna->na_bdg) 1393 netmap_vp_reg(&hostna->up, onoff); 1394 1395 if (onoff) { 1396 u_int i; 1397 /* intercept the hwna nm_nofify callback on the hw rings */ 1398 for (i = 0; i < hwna->num_rx_rings; i++) { 1399 hwna->rx_rings[i]->save_notify = hwna->rx_rings[i]->nm_notify; 1400 hwna->rx_rings[i]->nm_notify = netmap_bwrap_intr_notify; 1401 } 1402 i = hwna->num_rx_rings; /* for safety */ 1403 /* save the host ring notify unconditionally */ 1404 for (; i < netmap_real_rings(hwna, NR_RX); i++) { 1405 hwna->rx_rings[i]->save_notify = 1406 hwna->rx_rings[i]->nm_notify; 1407 if (hostna->na_bdg) { 1408 /* also intercept the host ring notify */ 1409 hwna->rx_rings[i]->nm_notify = 1410 netmap_bwrap_intr_notify; 1411 na->tx_rings[i]->nm_sync = na->nm_txsync; 1412 } 1413 } 1414 if (na->active_fds == 0) 1415 na->na_flags |= NAF_NETMAP_ON; 1416 } else { 1417 u_int i; 1418 1419 if (na->active_fds == 0) 1420 na->na_flags &= ~NAF_NETMAP_ON; 1421 1422 /* reset all notify callbacks (including host ring) */ 1423 for (i = 0; i < netmap_all_rings(hwna, NR_RX); i++) { 1424 hwna->rx_rings[i]->nm_notify = 1425 hwna->rx_rings[i]->save_notify; 1426 hwna->rx_rings[i]->save_notify = NULL; 1427 } 1428 hwna->na_lut.lut = NULL; 1429 hwna->na_lut.plut = NULL; 1430 hwna->na_lut.objtotal = 0; 1431 hwna->na_lut.objsize = 0; 1432 1433 /* pass ownership of the netmap rings to the hwna */ 1434 for_rx_tx(t) { 1435 for (i = 0; i < netmap_all_rings(na, t); i++) { 1436 NMR(na, t)[i]->ring = NULL; 1437 } 1438 } 1439 /* reset the number of host rings to default */ 1440 for_rx_tx(t) { 1441 nma_set_host_nrings(hwna, t, 1); 1442 } 1443 1444 } 1445 1446 return 0; 1447 } 1448 1449 /* nm_config callback for bwrap */ 1450 static int 1451 netmap_bwrap_config(struct netmap_adapter *na, struct nm_config_info *info) 1452 { 1453 struct netmap_bwrap_adapter *bna = 1454 (struct netmap_bwrap_adapter *)na; 1455 struct netmap_adapter *hwna = bna->hwna; 1456 int error; 1457 1458 /* Forward the request to the hwna. It may happen that nobody 1459 * registered hwna yet, so netmap_mem_get_lut() may have not 1460 * been called yet. */ 1461 error = netmap_mem_get_lut(hwna->nm_mem, &hwna->na_lut); 1462 if (error) 1463 return error; 1464 netmap_update_config(hwna); 1465 /* swap the results and propagate */ 1466 info->num_tx_rings = hwna->num_rx_rings; 1467 info->num_tx_descs = hwna->num_rx_desc; 1468 info->num_rx_rings = hwna->num_tx_rings; 1469 info->num_rx_descs = hwna->num_tx_desc; 1470 info->rx_buf_maxsize = hwna->rx_buf_maxsize; 1471 1472 return 0; 1473 } 1474 1475 1476 /* nm_krings_create callback for bwrap */ 1477 int 1478 netmap_bwrap_krings_create_common(struct netmap_adapter *na) 1479 { 1480 struct netmap_bwrap_adapter *bna = 1481 (struct netmap_bwrap_adapter *)na; 1482 struct netmap_adapter *hwna = bna->hwna; 1483 struct netmap_adapter *hostna = &bna->host.up; 1484 int i, error = 0; 1485 enum txrx t; 1486 1487 /* also create the hwna krings */ 1488 error = hwna->nm_krings_create(hwna); 1489 if (error) { 1490 return error; 1491 } 1492 1493 /* increment the usage counter for all the hwna krings */ 1494 for_rx_tx(t) { 1495 for (i = 0; i < netmap_all_rings(hwna, t); i++) { 1496 NMR(hwna, t)[i]->users++; 1497 } 1498 } 1499 1500 /* now create the actual rings */ 1501 error = netmap_mem_rings_create(hwna); 1502 if (error) { 1503 goto err_dec_users; 1504 } 1505 1506 /* cross-link the netmap rings 1507 * The original number of rings comes from hwna, 1508 * rx rings on one side equals tx rings on the other. 1509 */ 1510 for_rx_tx(t) { 1511 enum txrx r = nm_txrx_swap(t); /* swap NR_TX <-> NR_RX */ 1512 for (i = 0; i < netmap_all_rings(hwna, r); i++) { 1513 NMR(na, t)[i]->nkr_num_slots = NMR(hwna, r)[i]->nkr_num_slots; 1514 NMR(na, t)[i]->ring = NMR(hwna, r)[i]->ring; 1515 } 1516 } 1517 1518 if (na->na_flags & NAF_HOST_RINGS) { 1519 /* the hostna rings are the host rings of the bwrap. 1520 * The corresponding krings must point back to the 1521 * hostna 1522 */ 1523 hostna->tx_rings = &na->tx_rings[na->num_tx_rings]; 1524 hostna->rx_rings = &na->rx_rings[na->num_rx_rings]; 1525 for_rx_tx(t) { 1526 for (i = 0; i < nma_get_nrings(hostna, t); i++) { 1527 NMR(hostna, t)[i]->na = hostna; 1528 } 1529 } 1530 } 1531 1532 return 0; 1533 1534 err_dec_users: 1535 for_rx_tx(t) { 1536 for (i = 0; i < netmap_all_rings(hwna, t); i++) { 1537 NMR(hwna, t)[i]->users--; 1538 } 1539 } 1540 hwna->nm_krings_delete(hwna); 1541 return error; 1542 } 1543 1544 1545 void 1546 netmap_bwrap_krings_delete_common(struct netmap_adapter *na) 1547 { 1548 struct netmap_bwrap_adapter *bna = 1549 (struct netmap_bwrap_adapter *)na; 1550 struct netmap_adapter *hwna = bna->hwna; 1551 enum txrx t; 1552 int i; 1553 1554 ND("%s", na->name); 1555 1556 /* decrement the usage counter for all the hwna krings */ 1557 for_rx_tx(t) { 1558 for (i = 0; i < netmap_all_rings(hwna, t); i++) { 1559 NMR(hwna, t)[i]->users--; 1560 } 1561 } 1562 1563 /* delete any netmap rings that are no longer needed */ 1564 netmap_mem_rings_delete(hwna); 1565 hwna->nm_krings_delete(hwna); 1566 } 1567 1568 1569 /* notify method for the bridge-->hwna direction */ 1570 int 1571 netmap_bwrap_notify(struct netmap_kring *kring, int flags) 1572 { 1573 struct netmap_adapter *na = kring->na; 1574 struct netmap_bwrap_adapter *bna = na->na_private; 1575 struct netmap_adapter *hwna = bna->hwna; 1576 u_int ring_n = kring->ring_id; 1577 u_int lim = kring->nkr_num_slots - 1; 1578 struct netmap_kring *hw_kring; 1579 int error; 1580 1581 ND("%s: na %s hwna %s", 1582 (kring ? kring->name : "NULL!"), 1583 (na ? na->name : "NULL!"), 1584 (hwna ? hwna->name : "NULL!")); 1585 hw_kring = hwna->tx_rings[ring_n]; 1586 1587 if (nm_kr_tryget(hw_kring, 0, NULL)) { 1588 return ENXIO; 1589 } 1590 1591 /* first step: simulate a user wakeup on the rx ring */ 1592 netmap_vp_rxsync(kring, flags); 1593 ND("%s[%d] PRE rx(c%3d t%3d l%3d) ring(h%3d c%3d t%3d) tx(c%3d ht%3d t%3d)", 1594 na->name, ring_n, 1595 kring->nr_hwcur, kring->nr_hwtail, kring->nkr_hwlease, 1596 ring->head, ring->cur, ring->tail, 1597 hw_kring->nr_hwcur, hw_kring->nr_hwtail, hw_ring->rtail); 1598 /* second step: the new packets are sent on the tx ring 1599 * (which is actually the same ring) 1600 */ 1601 hw_kring->rhead = hw_kring->rcur = kring->nr_hwtail; 1602 error = hw_kring->nm_sync(hw_kring, flags); 1603 if (error) 1604 goto put_out; 1605 1606 /* third step: now we are back the rx ring */ 1607 /* claim ownership on all hw owned bufs */ 1608 kring->rhead = kring->rcur = nm_next(hw_kring->nr_hwtail, lim); /* skip past reserved slot */ 1609 1610 /* fourth step: the user goes to sleep again, causing another rxsync */ 1611 netmap_vp_rxsync(kring, flags); 1612 ND("%s[%d] PST rx(c%3d t%3d l%3d) ring(h%3d c%3d t%3d) tx(c%3d ht%3d t%3d)", 1613 na->name, ring_n, 1614 kring->nr_hwcur, kring->nr_hwtail, kring->nkr_hwlease, 1615 ring->head, ring->cur, ring->tail, 1616 hw_kring->nr_hwcur, hw_kring->nr_hwtail, hw_kring->rtail); 1617 put_out: 1618 nm_kr_put(hw_kring); 1619 1620 return error ? error : NM_IRQ_COMPLETED; 1621 } 1622 1623 1624 /* nm_bdg_ctl callback for the bwrap. 1625 * Called on bridge-attach and detach, as an effect of vale-ctl -[ahd]. 1626 * On attach, it needs to provide a fake netmap_priv_d structure and 1627 * perform a netmap_do_regif() on the bwrap. This will put both the 1628 * bwrap and the hwna in netmap mode, with the netmap rings shared 1629 * and cross linked. Moroever, it will start intercepting interrupts 1630 * directed to hwna. 1631 */ 1632 static int 1633 netmap_bwrap_bdg_ctl(struct nmreq_header *hdr, struct netmap_adapter *na) 1634 { 1635 struct netmap_priv_d *npriv; 1636 struct netmap_bwrap_adapter *bna = (struct netmap_bwrap_adapter*)na; 1637 int error = 0; 1638 1639 if (hdr->nr_reqtype == NETMAP_REQ_VALE_ATTACH) { 1640 struct nmreq_vale_attach *req = 1641 (struct nmreq_vale_attach *)(uintptr_t)hdr->nr_body; 1642 if (req->reg.nr_ringid != 0 || 1643 (req->reg.nr_mode != NR_REG_ALL_NIC && 1644 req->reg.nr_mode != NR_REG_NIC_SW)) { 1645 /* We only support attaching all the NIC rings 1646 * and/or the host stack. */ 1647 return EINVAL; 1648 } 1649 if (NETMAP_OWNED_BY_ANY(na)) { 1650 return EBUSY; 1651 } 1652 if (bna->na_kpriv) { 1653 /* nothing to do */ 1654 return 0; 1655 } 1656 npriv = netmap_priv_new(); 1657 if (npriv == NULL) 1658 return ENOMEM; 1659 npriv->np_ifp = na->ifp; /* let the priv destructor release the ref */ 1660 error = netmap_do_regif(npriv, na, req->reg.nr_mode, 1661 req->reg.nr_ringid, req->reg.nr_flags); 1662 if (error) { 1663 netmap_priv_delete(npriv); 1664 return error; 1665 } 1666 bna->na_kpriv = npriv; 1667 na->na_flags |= NAF_BUSY; 1668 } else { 1669 if (na->active_fds == 0) /* not registered */ 1670 return EINVAL; 1671 netmap_priv_delete(bna->na_kpriv); 1672 bna->na_kpriv = NULL; 1673 na->na_flags &= ~NAF_BUSY; 1674 } 1675 1676 return error; 1677 } 1678 1679 /* attach a bridge wrapper to the 'real' device */ 1680 int 1681 netmap_bwrap_attach_common(struct netmap_adapter *na, 1682 struct netmap_adapter *hwna) 1683 { 1684 struct netmap_bwrap_adapter *bna; 1685 struct netmap_adapter *hostna = NULL; 1686 int error = 0; 1687 enum txrx t; 1688 1689 /* make sure the NIC is not already in use */ 1690 if (NETMAP_OWNED_BY_ANY(hwna)) { 1691 D("NIC %s busy, cannot attach to bridge", hwna->name); 1692 return EBUSY; 1693 } 1694 1695 bna = (struct netmap_bwrap_adapter *)na; 1696 /* make bwrap ifp point to the real ifp */ 1697 na->ifp = hwna->ifp; 1698 if_ref(na->ifp); 1699 na->na_private = bna; 1700 /* fill the ring data for the bwrap adapter with rx/tx meanings 1701 * swapped. The real cross-linking will be done during register, 1702 * when all the krings will have been created. 1703 */ 1704 for_rx_tx(t) { 1705 enum txrx r = nm_txrx_swap(t); /* swap NR_TX <-> NR_RX */ 1706 nma_set_nrings(na, t, nma_get_nrings(hwna, r)); 1707 nma_set_ndesc(na, t, nma_get_ndesc(hwna, r)); 1708 } 1709 na->nm_dtor = netmap_bwrap_dtor; 1710 na->nm_config = netmap_bwrap_config; 1711 na->nm_bdg_ctl = netmap_bwrap_bdg_ctl; 1712 na->pdev = hwna->pdev; 1713 na->nm_mem = netmap_mem_get(hwna->nm_mem); 1714 na->virt_hdr_len = hwna->virt_hdr_len; 1715 na->rx_buf_maxsize = hwna->rx_buf_maxsize; 1716 1717 bna->hwna = hwna; 1718 netmap_adapter_get(hwna); 1719 hwna->na_private = bna; /* weak reference */ 1720 bna->saved_na_vp = hwna->na_vp; 1721 hwna->na_vp = &bna->up; 1722 bna->up.up.na_vp = &(bna->up); 1723 1724 if (hwna->na_flags & NAF_HOST_RINGS) { 1725 if (hwna->na_flags & NAF_SW_ONLY) 1726 na->na_flags |= NAF_SW_ONLY; 1727 na->na_flags |= NAF_HOST_RINGS; 1728 hostna = &bna->host.up; 1729 1730 /* limit the number of host rings to that of hw */ 1731 nm_bound_var(&hostna->num_tx_rings, 1, 1, 1732 nma_get_nrings(hwna, NR_TX), NULL); 1733 nm_bound_var(&hostna->num_rx_rings, 1, 1, 1734 nma_get_nrings(hwna, NR_RX), NULL); 1735 1736 snprintf(hostna->name, sizeof(hostna->name), "%s^", na->name); 1737 hostna->ifp = hwna->ifp; 1738 for_rx_tx(t) { 1739 enum txrx r = nm_txrx_swap(t); 1740 u_int nr = nma_get_nrings(hostna, t); 1741 1742 nma_set_nrings(hostna, t, nr); 1743 nma_set_host_nrings(na, t, nr); 1744 if (nma_get_host_nrings(hwna, t) < nr) { 1745 nma_set_host_nrings(hwna, t, nr); 1746 } 1747 nma_set_ndesc(hostna, t, nma_get_ndesc(hwna, r)); 1748 } 1749 // hostna->nm_txsync = netmap_bwrap_host_txsync; 1750 // hostna->nm_rxsync = netmap_bwrap_host_rxsync; 1751 hostna->nm_mem = netmap_mem_get(na->nm_mem); 1752 hostna->na_private = bna; 1753 hostna->na_vp = &bna->up; 1754 na->na_hostvp = hwna->na_hostvp = 1755 hostna->na_hostvp = &bna->host; 1756 hostna->na_flags = NAF_BUSY; /* prevent NIOCREGIF */ 1757 hostna->rx_buf_maxsize = hwna->rx_buf_maxsize; 1758 } 1759 1760 ND("%s<->%s txr %d txd %d rxr %d rxd %d", 1761 na->name, ifp->if_xname, 1762 na->num_tx_rings, na->num_tx_desc, 1763 na->num_rx_rings, na->num_rx_desc); 1764 1765 error = netmap_attach_common(na); 1766 if (error) { 1767 goto err_put; 1768 } 1769 hwna->na_flags |= NAF_BUSY; 1770 return 0; 1771 1772 err_put: 1773 hwna->na_vp = hwna->na_hostvp = NULL; 1774 netmap_adapter_put(hwna); 1775 return error; 1776 1777 } 1778 1779 struct nm_bridge * 1780 netmap_init_bridges2(u_int n) 1781 { 1782 int i; 1783 struct nm_bridge *b; 1784 1785 b = nm_os_malloc(sizeof(struct nm_bridge) * n); 1786 if (b == NULL) 1787 return NULL; 1788 for (i = 0; i < n; i++) 1789 BDG_RWINIT(&b[i]); 1790 return b; 1791 } 1792 1793 void 1794 netmap_uninit_bridges2(struct nm_bridge *b, u_int n) 1795 { 1796 int i; 1797 1798 if (b == NULL) 1799 return; 1800 1801 for (i = 0; i < n; i++) 1802 BDG_RWDESTROY(&b[i]); 1803 nm_os_free(b); 1804 } 1805 1806 int 1807 netmap_init_bridges(void) 1808 { 1809 #ifdef CONFIG_NET_NS 1810 return netmap_bns_register(); 1811 #else 1812 nm_bridges = netmap_init_bridges2(NM_BRIDGES); 1813 if (nm_bridges == NULL) 1814 return ENOMEM; 1815 return 0; 1816 #endif 1817 } 1818 1819 void 1820 netmap_uninit_bridges(void) 1821 { 1822 #ifdef CONFIG_NET_NS 1823 netmap_bns_unregister(); 1824 #else 1825 netmap_uninit_bridges2(nm_bridges, NM_BRIDGES); 1826 #endif 1827 } 1828