1 /* 2 * Copyright (C) 2013-2014 Universita` di Pisa. All rights reserved. 3 * 4 * Redistribution and use in source and binary forms, with or without 5 * modification, are permitted provided that the following conditions 6 * are met: 7 * 1. Redistributions of source code must retain the above copyright 8 * notice, this list of conditions and the following disclaimer. 9 * 2. Redistributions in binary form must reproduce the above copyright 10 * notice, this list of conditions and the following disclaimer in the 11 * documentation and/or other materials provided with the distribution. 12 * 13 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 14 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 15 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 16 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 17 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 18 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 19 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 20 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 21 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 22 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 23 * SUCH DAMAGE. 24 */ 25 26 27 /* 28 * This module implements the VALE switch for netmap 29 30 --- VALE SWITCH --- 31 32 NMG_LOCK() serializes all modifications to switches and ports. 33 A switch cannot be deleted until all ports are gone. 34 35 For each switch, an SX lock (RWlock on linux) protects 36 deletion of ports. When configuring or deleting a new port, the 37 lock is acquired in exclusive mode (after holding NMG_LOCK). 38 When forwarding, the lock is acquired in shared mode (without NMG_LOCK). 39 The lock is held throughout the entire forwarding cycle, 40 during which the thread may incur in a page fault. 41 Hence it is important that sleepable shared locks are used. 42 43 On the rx ring, the per-port lock is grabbed initially to reserve 44 a number of slot in the ring, then the lock is released, 45 packets are copied from source to destination, and then 46 the lock is acquired again and the receive ring is updated. 47 (A similar thing is done on the tx ring for NIC and host stack 48 ports attached to the switch) 49 50 */ 51 52 /* 53 * OS-specific code that is used only within this file. 54 * Other OS-specific code that must be accessed by drivers 55 * is present in netmap_kern.h 56 */ 57 58 #if defined(__FreeBSD__) 59 #include <sys/cdefs.h> /* prerequisite */ 60 __FBSDID("$FreeBSD$"); 61 62 #include <sys/types.h> 63 #include <sys/errno.h> 64 #include <sys/param.h> /* defines used in kernel.h */ 65 #include <sys/kernel.h> /* types used in module initialization */ 66 #include <sys/conf.h> /* cdevsw struct, UID, GID */ 67 #include <sys/sockio.h> 68 #include <sys/socketvar.h> /* struct socket */ 69 #include <sys/malloc.h> 70 #include <sys/poll.h> 71 #include <sys/rwlock.h> 72 #include <sys/socket.h> /* sockaddrs */ 73 #include <sys/selinfo.h> 74 #include <sys/sysctl.h> 75 #include <net/if.h> 76 #include <net/if_var.h> 77 #include <net/bpf.h> /* BIOCIMMEDIATE */ 78 #include <machine/bus.h> /* bus_dmamap_* */ 79 #include <sys/endian.h> 80 #include <sys/refcount.h> 81 82 83 #define BDG_RWLOCK_T struct rwlock // struct rwlock 84 85 #define BDG_RWINIT(b) \ 86 rw_init_flags(&(b)->bdg_lock, "bdg lock", RW_NOWITNESS) 87 #define BDG_WLOCK(b) rw_wlock(&(b)->bdg_lock) 88 #define BDG_WUNLOCK(b) rw_wunlock(&(b)->bdg_lock) 89 #define BDG_RLOCK(b) rw_rlock(&(b)->bdg_lock) 90 #define BDG_RTRYLOCK(b) rw_try_rlock(&(b)->bdg_lock) 91 #define BDG_RUNLOCK(b) rw_runlock(&(b)->bdg_lock) 92 #define BDG_RWDESTROY(b) rw_destroy(&(b)->bdg_lock) 93 94 95 #elif defined(linux) 96 97 #include "bsd_glue.h" 98 99 #elif defined(__APPLE__) 100 101 #warning OSX support is only partial 102 #include "osx_glue.h" 103 104 #else 105 106 #error Unsupported platform 107 108 #endif /* unsupported */ 109 110 /* 111 * common headers 112 */ 113 114 #include <net/netmap.h> 115 #include <dev/netmap/netmap_kern.h> 116 #include <dev/netmap/netmap_mem2.h> 117 118 #ifdef WITH_VALE 119 120 /* 121 * system parameters (most of them in netmap_kern.h) 122 * NM_NAME prefix for switch port names, default "vale" 123 * NM_BDG_MAXPORTS number of ports 124 * NM_BRIDGES max number of switches in the system. 125 * XXX should become a sysctl or tunable 126 * 127 * Switch ports are named valeX:Y where X is the switch name and Y 128 * is the port. If Y matches a physical interface name, the port is 129 * connected to a physical device. 130 * 131 * Unlike physical interfaces, switch ports use their own memory region 132 * for rings and buffers. 133 * The virtual interfaces use per-queue lock instead of core lock. 134 * In the tx loop, we aggregate traffic in batches to make all operations 135 * faster. The batch size is bridge_batch. 136 */ 137 #define NM_BDG_MAXRINGS 16 /* XXX unclear how many. */ 138 #define NM_BDG_MAXSLOTS 4096 /* XXX same as above */ 139 #define NM_BRIDGE_RINGSIZE 1024 /* in the device */ 140 #define NM_BDG_HASH 1024 /* forwarding table entries */ 141 #define NM_BDG_BATCH 1024 /* entries in the forwarding buffer */ 142 #define NM_MULTISEG 64 /* max size of a chain of bufs */ 143 /* actual size of the tables */ 144 #define NM_BDG_BATCH_MAX (NM_BDG_BATCH + NM_MULTISEG) 145 /* NM_FT_NULL terminates a list of slots in the ft */ 146 #define NM_FT_NULL NM_BDG_BATCH_MAX 147 #define NM_BRIDGES 8 /* number of bridges */ 148 149 150 /* 151 * bridge_batch is set via sysctl to the max batch size to be 152 * used in the bridge. The actual value may be larger as the 153 * last packet in the block may overflow the size. 154 */ 155 int bridge_batch = NM_BDG_BATCH; /* bridge batch size */ 156 SYSCTL_DECL(_dev_netmap); 157 SYSCTL_INT(_dev_netmap, OID_AUTO, bridge_batch, CTLFLAG_RW, &bridge_batch, 0 , ""); 158 159 160 static int bdg_netmap_attach(struct nmreq *nmr, struct ifnet *ifp); 161 static int bdg_netmap_reg(struct netmap_adapter *na, int onoff); 162 static int netmap_bwrap_attach(struct ifnet *, struct ifnet *); 163 static int netmap_bwrap_register(struct netmap_adapter *, int onoff); 164 int kern_netmap_regif(struct nmreq *nmr); 165 166 /* 167 * Each transmit queue accumulates a batch of packets into 168 * a structure before forwarding. Packets to the same 169 * destination are put in a list using ft_next as a link field. 170 * ft_frags and ft_next are valid only on the first fragment. 171 */ 172 struct nm_bdg_fwd { /* forwarding entry for a bridge */ 173 void *ft_buf; /* netmap or indirect buffer */ 174 uint8_t ft_frags; /* how many fragments (only on 1st frag) */ 175 uint8_t _ft_port; /* dst port (unused) */ 176 uint16_t ft_flags; /* flags, e.g. indirect */ 177 uint16_t ft_len; /* src fragment len */ 178 uint16_t ft_next; /* next packet to same destination */ 179 }; 180 181 /* 182 * For each output interface, nm_bdg_q is used to construct a list. 183 * bq_len is the number of output buffers (we can have coalescing 184 * during the copy). 185 */ 186 struct nm_bdg_q { 187 uint16_t bq_head; 188 uint16_t bq_tail; 189 uint32_t bq_len; /* number of buffers */ 190 }; 191 192 /* XXX revise this */ 193 struct nm_hash_ent { 194 uint64_t mac; /* the top 2 bytes are the epoch */ 195 uint64_t ports; 196 }; 197 198 /* 199 * nm_bridge is a descriptor for a VALE switch. 200 * Interfaces for a bridge are all in bdg_ports[]. 201 * The array has fixed size, an empty entry does not terminate 202 * the search, but lookups only occur on attach/detach so we 203 * don't mind if they are slow. 204 * 205 * The bridge is non blocking on the transmit ports: excess 206 * packets are dropped if there is no room on the output port. 207 * 208 * bdg_lock protects accesses to the bdg_ports array. 209 * This is a rw lock (or equivalent). 210 */ 211 struct nm_bridge { 212 /* XXX what is the proper alignment/layout ? */ 213 BDG_RWLOCK_T bdg_lock; /* protects bdg_ports */ 214 int bdg_namelen; 215 uint32_t bdg_active_ports; /* 0 means free */ 216 char bdg_basename[IFNAMSIZ]; 217 218 /* Indexes of active ports (up to active_ports) 219 * and all other remaining ports. 220 */ 221 uint8_t bdg_port_index[NM_BDG_MAXPORTS]; 222 223 struct netmap_vp_adapter *bdg_ports[NM_BDG_MAXPORTS]; 224 225 226 /* 227 * The function to decide the destination port. 228 * It returns either of an index of the destination port, 229 * NM_BDG_BROADCAST to broadcast this packet, or NM_BDG_NOPORT not to 230 * forward this packet. ring_nr is the source ring index, and the 231 * function may overwrite this value to forward this packet to a 232 * different ring index. 233 * This function must be set by netmap_bdgctl(). 234 */ 235 bdg_lookup_fn_t nm_bdg_lookup; 236 237 /* the forwarding table, MAC+ports. 238 * XXX should be changed to an argument to be passed to 239 * the lookup function, and allocated on attach 240 */ 241 struct nm_hash_ent ht[NM_BDG_HASH]; 242 }; 243 244 245 /* 246 * XXX in principle nm_bridges could be created dynamically 247 * Right now we have a static array and deletions are protected 248 * by an exclusive lock. 249 */ 250 struct nm_bridge nm_bridges[NM_BRIDGES]; 251 252 253 /* 254 * this is a slightly optimized copy routine which rounds 255 * to multiple of 64 bytes and is often faster than dealing 256 * with other odd sizes. We assume there is enough room 257 * in the source and destination buffers. 258 * 259 * XXX only for multiples of 64 bytes, non overlapped. 260 */ 261 static inline void 262 pkt_copy(void *_src, void *_dst, int l) 263 { 264 uint64_t *src = _src; 265 uint64_t *dst = _dst; 266 if (unlikely(l >= 1024)) { 267 memcpy(dst, src, l); 268 return; 269 } 270 for (; likely(l > 0); l-=64) { 271 *dst++ = *src++; 272 *dst++ = *src++; 273 *dst++ = *src++; 274 *dst++ = *src++; 275 *dst++ = *src++; 276 *dst++ = *src++; 277 *dst++ = *src++; 278 *dst++ = *src++; 279 } 280 } 281 282 283 /* 284 * locate a bridge among the existing ones. 285 * MUST BE CALLED WITH NMG_LOCK() 286 * 287 * a ':' in the name terminates the bridge name. Otherwise, just NM_NAME. 288 * We assume that this is called with a name of at least NM_NAME chars. 289 */ 290 static struct nm_bridge * 291 nm_find_bridge(const char *name, int create) 292 { 293 int i, l, namelen; 294 struct nm_bridge *b = NULL; 295 296 NMG_LOCK_ASSERT(); 297 298 namelen = strlen(NM_NAME); /* base length */ 299 l = name ? strlen(name) : 0; /* actual length */ 300 if (l < namelen) { 301 D("invalid bridge name %s", name ? name : NULL); 302 return NULL; 303 } 304 for (i = namelen + 1; i < l; i++) { 305 if (name[i] == ':') { 306 namelen = i; 307 break; 308 } 309 } 310 if (namelen >= IFNAMSIZ) 311 namelen = IFNAMSIZ; 312 ND("--- prefix is '%.*s' ---", namelen, name); 313 314 /* lookup the name, remember empty slot if there is one */ 315 for (i = 0; i < NM_BRIDGES; i++) { 316 struct nm_bridge *x = nm_bridges + i; 317 318 if (x->bdg_active_ports == 0) { 319 if (create && b == NULL) 320 b = x; /* record empty slot */ 321 } else if (x->bdg_namelen != namelen) { 322 continue; 323 } else if (strncmp(name, x->bdg_basename, namelen) == 0) { 324 ND("found '%.*s' at %d", namelen, name, i); 325 b = x; 326 break; 327 } 328 } 329 if (i == NM_BRIDGES && b) { /* name not found, can create entry */ 330 /* initialize the bridge */ 331 strncpy(b->bdg_basename, name, namelen); 332 ND("create new bridge %s with ports %d", b->bdg_basename, 333 b->bdg_active_ports); 334 b->bdg_namelen = namelen; 335 b->bdg_active_ports = 0; 336 for (i = 0; i < NM_BDG_MAXPORTS; i++) 337 b->bdg_port_index[i] = i; 338 /* set the default function */ 339 b->nm_bdg_lookup = netmap_bdg_learning; 340 /* reset the MAC address table */ 341 bzero(b->ht, sizeof(struct nm_hash_ent) * NM_BDG_HASH); 342 } 343 return b; 344 } 345 346 347 /* 348 * Free the forwarding tables for rings attached to switch ports. 349 */ 350 static void 351 nm_free_bdgfwd(struct netmap_adapter *na) 352 { 353 int nrings, i; 354 struct netmap_kring *kring; 355 356 NMG_LOCK_ASSERT(); 357 nrings = na->num_tx_rings; 358 kring = na->tx_rings; 359 for (i = 0; i < nrings; i++) { 360 if (kring[i].nkr_ft) { 361 free(kring[i].nkr_ft, M_DEVBUF); 362 kring[i].nkr_ft = NULL; /* protect from freeing twice */ 363 } 364 } 365 } 366 367 368 /* 369 * Allocate the forwarding tables for the rings attached to the bridge ports. 370 */ 371 static int 372 nm_alloc_bdgfwd(struct netmap_adapter *na) 373 { 374 int nrings, l, i, num_dstq; 375 struct netmap_kring *kring; 376 377 NMG_LOCK_ASSERT(); 378 /* all port:rings + broadcast */ 379 num_dstq = NM_BDG_MAXPORTS * NM_BDG_MAXRINGS + 1; 380 l = sizeof(struct nm_bdg_fwd) * NM_BDG_BATCH_MAX; 381 l += sizeof(struct nm_bdg_q) * num_dstq; 382 l += sizeof(uint16_t) * NM_BDG_BATCH_MAX; 383 384 nrings = na->num_tx_rings + 1; 385 kring = na->tx_rings; 386 for (i = 0; i < nrings; i++) { 387 struct nm_bdg_fwd *ft; 388 struct nm_bdg_q *dstq; 389 int j; 390 391 ft = malloc(l, M_DEVBUF, M_NOWAIT | M_ZERO); 392 if (!ft) { 393 nm_free_bdgfwd(na); 394 return ENOMEM; 395 } 396 dstq = (struct nm_bdg_q *)(ft + NM_BDG_BATCH_MAX); 397 for (j = 0; j < num_dstq; j++) { 398 dstq[j].bq_head = dstq[j].bq_tail = NM_FT_NULL; 399 dstq[j].bq_len = 0; 400 } 401 kring[i].nkr_ft = ft; 402 } 403 return 0; 404 } 405 406 407 static void 408 netmap_bdg_detach_common(struct nm_bridge *b, int hw, int sw) 409 { 410 int s_hw = hw, s_sw = sw; 411 int i, lim =b->bdg_active_ports; 412 uint8_t tmp[NM_BDG_MAXPORTS]; 413 414 /* 415 New algorithm: 416 make a copy of bdg_port_index; 417 lookup NA(ifp)->bdg_port and SWNA(ifp)->bdg_port 418 in the array of bdg_port_index, replacing them with 419 entries from the bottom of the array; 420 decrement bdg_active_ports; 421 acquire BDG_WLOCK() and copy back the array. 422 */ 423 424 D("detach %d and %d (lim %d)", hw, sw, lim); 425 /* make a copy of the list of active ports, update it, 426 * and then copy back within BDG_WLOCK(). 427 */ 428 memcpy(tmp, b->bdg_port_index, sizeof(tmp)); 429 for (i = 0; (hw >= 0 || sw >= 0) && i < lim; ) { 430 if (hw >= 0 && tmp[i] == hw) { 431 ND("detach hw %d at %d", hw, i); 432 lim--; /* point to last active port */ 433 tmp[i] = tmp[lim]; /* swap with i */ 434 tmp[lim] = hw; /* now this is inactive */ 435 hw = -1; 436 } else if (sw >= 0 && tmp[i] == sw) { 437 ND("detach sw %d at %d", sw, i); 438 lim--; 439 tmp[i] = tmp[lim]; 440 tmp[lim] = sw; 441 sw = -1; 442 } else { 443 i++; 444 } 445 } 446 if (hw >= 0 || sw >= 0) { 447 D("XXX delete failed hw %d sw %d, should panic...", hw, sw); 448 } 449 450 BDG_WLOCK(b); 451 b->bdg_ports[s_hw] = NULL; 452 if (s_sw >= 0) { 453 b->bdg_ports[s_sw] = NULL; 454 } 455 memcpy(b->bdg_port_index, tmp, sizeof(tmp)); 456 b->bdg_active_ports = lim; 457 BDG_WUNLOCK(b); 458 459 ND("now %d active ports", lim); 460 if (lim == 0) { 461 ND("marking bridge %s as free", b->bdg_basename); 462 b->nm_bdg_lookup = NULL; 463 } 464 } 465 466 467 static void 468 netmap_adapter_vp_dtor(struct netmap_adapter *na) 469 { 470 struct netmap_vp_adapter *vpna = (struct netmap_vp_adapter*)na; 471 struct nm_bridge *b = vpna->na_bdg; 472 struct ifnet *ifp = na->ifp; 473 474 ND("%s has %d references", NM_IFPNAME(ifp), na->na_refcount); 475 476 if (b) { 477 netmap_bdg_detach_common(b, vpna->bdg_port, -1); 478 } 479 480 bzero(ifp, sizeof(*ifp)); 481 free(ifp, M_DEVBUF); 482 na->ifp = NULL; 483 } 484 485 486 /* Try to get a reference to a netmap adapter attached to a VALE switch. 487 * If the adapter is found (or is created), this function returns 0, a 488 * non NULL pointer is returned into *na, and the caller holds a 489 * reference to the adapter. 490 * If an adapter is not found, then no reference is grabbed and the 491 * function returns an error code, or 0 if there is just a VALE prefix 492 * mismatch. Therefore the caller holds a reference when 493 * (*na != NULL && return == 0). 494 */ 495 int 496 netmap_get_bdg_na(struct nmreq *nmr, struct netmap_adapter **na, int create) 497 { 498 const char *name = nmr->nr_name; 499 struct ifnet *ifp; 500 int error = 0; 501 struct netmap_adapter *ret; 502 struct netmap_vp_adapter *vpna; 503 struct nm_bridge *b; 504 int i, j, cand = -1, cand2 = -1; 505 int needed; 506 507 *na = NULL; /* default return value */ 508 509 /* first try to see if this is a bridge port. */ 510 NMG_LOCK_ASSERT(); 511 if (strncmp(name, NM_NAME, sizeof(NM_NAME) - 1)) { 512 return 0; /* no error, but no VALE prefix */ 513 } 514 515 b = nm_find_bridge(name, create); 516 if (b == NULL) { 517 D("no bridges available for '%s'", name); 518 return (create ? ENOMEM : ENXIO); 519 } 520 521 /* Now we are sure that name starts with the bridge's name, 522 * lookup the port in the bridge. We need to scan the entire 523 * list. It is not important to hold a WLOCK on the bridge 524 * during the search because NMG_LOCK already guarantees 525 * that there are no other possible writers. 526 */ 527 528 /* lookup in the local list of ports */ 529 for (j = 0; j < b->bdg_active_ports; j++) { 530 i = b->bdg_port_index[j]; 531 vpna = b->bdg_ports[i]; 532 // KASSERT(na != NULL); 533 ifp = vpna->up.ifp; 534 /* XXX make sure the name only contains one : */ 535 if (!strcmp(NM_IFPNAME(ifp), name)) { 536 netmap_adapter_get(&vpna->up); 537 ND("found existing if %s refs %d", name, 538 vpna->na_bdg_refcount); 539 *na = (struct netmap_adapter *)vpna; 540 return 0; 541 } 542 } 543 /* not found, should we create it? */ 544 if (!create) 545 return ENXIO; 546 /* yes we should, see if we have space to attach entries */ 547 needed = 2; /* in some cases we only need 1 */ 548 if (b->bdg_active_ports + needed >= NM_BDG_MAXPORTS) { 549 D("bridge full %d, cannot create new port", b->bdg_active_ports); 550 return ENOMEM; 551 } 552 /* record the next two ports available, but do not allocate yet */ 553 cand = b->bdg_port_index[b->bdg_active_ports]; 554 cand2 = b->bdg_port_index[b->bdg_active_ports + 1]; 555 ND("+++ bridge %s port %s used %d avail %d %d", 556 b->bdg_basename, name, b->bdg_active_ports, cand, cand2); 557 558 /* 559 * try see if there is a matching NIC with this name 560 * (after the bridge's name) 561 */ 562 ifp = ifunit_ref(name + b->bdg_namelen + 1); 563 if (!ifp) { /* this is a virtual port */ 564 if (nmr->nr_cmd) { 565 /* nr_cmd must be 0 for a virtual port */ 566 return EINVAL; 567 } 568 569 /* create a struct ifnet for the new port. 570 * need M_NOWAIT as we are under nma_lock 571 */ 572 ifp = malloc(sizeof(*ifp), M_DEVBUF, M_NOWAIT | M_ZERO); 573 if (!ifp) 574 return ENOMEM; 575 576 strcpy(ifp->if_xname, name); 577 /* bdg_netmap_attach creates a struct netmap_adapter */ 578 error = bdg_netmap_attach(nmr, ifp); 579 if (error) { 580 D("error %d", error); 581 free(ifp, M_DEVBUF); 582 return error; 583 } 584 ret = NA(ifp); 585 cand2 = -1; /* only need one port */ 586 } else { /* this is a NIC */ 587 struct ifnet *fake_ifp; 588 589 error = netmap_get_hw_na(ifp, &ret); 590 if (error || ret == NULL) 591 goto out; 592 593 /* make sure the NIC is not already in use */ 594 if (NETMAP_OWNED_BY_ANY(ret)) { 595 D("NIC %s busy, cannot attach to bridge", 596 NM_IFPNAME(ifp)); 597 error = EBUSY; 598 goto out; 599 } 600 /* create a fake interface */ 601 fake_ifp = malloc(sizeof(*ifp), M_DEVBUF, M_NOWAIT | M_ZERO); 602 if (!fake_ifp) { 603 error = ENOMEM; 604 goto out; 605 } 606 strcpy(fake_ifp->if_xname, name); 607 error = netmap_bwrap_attach(fake_ifp, ifp); 608 if (error) { 609 free(fake_ifp, M_DEVBUF); 610 goto out; 611 } 612 ret = NA(fake_ifp); 613 if (nmr->nr_arg1 != NETMAP_BDG_HOST) 614 cand2 = -1; /* only need one port */ 615 if_rele(ifp); 616 } 617 vpna = (struct netmap_vp_adapter *)ret; 618 619 BDG_WLOCK(b); 620 vpna->bdg_port = cand; 621 ND("NIC %p to bridge port %d", vpna, cand); 622 /* bind the port to the bridge (virtual ports are not active) */ 623 b->bdg_ports[cand] = vpna; 624 vpna->na_bdg = b; 625 b->bdg_active_ports++; 626 if (cand2 >= 0) { 627 struct netmap_vp_adapter *hostna = vpna + 1; 628 /* also bind the host stack to the bridge */ 629 b->bdg_ports[cand2] = hostna; 630 hostna->bdg_port = cand2; 631 hostna->na_bdg = b; 632 b->bdg_active_ports++; 633 ND("host %p to bridge port %d", hostna, cand2); 634 } 635 ND("if %s refs %d", name, vpna->up.na_refcount); 636 BDG_WUNLOCK(b); 637 *na = ret; 638 netmap_adapter_get(ret); 639 return 0; 640 641 out: 642 if_rele(ifp); 643 644 return error; 645 } 646 647 648 /* Process NETMAP_BDG_ATTACH and NETMAP_BDG_DETACH */ 649 static int 650 nm_bdg_attach(struct nmreq *nmr) 651 { 652 struct netmap_adapter *na; 653 struct netmap_if *nifp; 654 struct netmap_priv_d *npriv; 655 struct netmap_bwrap_adapter *bna; 656 int error; 657 658 npriv = malloc(sizeof(*npriv), M_DEVBUF, M_NOWAIT|M_ZERO); 659 if (npriv == NULL) 660 return ENOMEM; 661 662 NMG_LOCK(); 663 664 error = netmap_get_bdg_na(nmr, &na, 1 /* create if not exists */); 665 if (error) /* no device, or another bridge or user owns the device */ 666 goto unlock_exit; 667 668 if (na == NULL) { /* VALE prefix missing */ 669 error = EINVAL; 670 goto unlock_exit; 671 } 672 673 if (na->active_fds > 0) { /* already registered */ 674 error = EBUSY; 675 goto unref_exit; 676 } 677 678 nifp = netmap_do_regif(npriv, na, nmr->nr_ringid, &error); 679 if (!nifp) { 680 goto unref_exit; 681 } 682 683 bna = (struct netmap_bwrap_adapter*)na; 684 bna->na_kpriv = npriv; 685 NMG_UNLOCK(); 686 ND("registered %s to netmap-mode", NM_IFPNAME(na->ifp)); 687 return 0; 688 689 unref_exit: 690 netmap_adapter_put(na); 691 unlock_exit: 692 NMG_UNLOCK(); 693 bzero(npriv, sizeof(*npriv)); 694 free(npriv, M_DEVBUF); 695 return error; 696 } 697 698 699 static int 700 nm_bdg_detach(struct nmreq *nmr) 701 { 702 struct netmap_adapter *na; 703 int error; 704 struct netmap_bwrap_adapter *bna; 705 int last_instance; 706 707 NMG_LOCK(); 708 error = netmap_get_bdg_na(nmr, &na, 0 /* don't create */); 709 if (error) { /* no device, or another bridge or user owns the device */ 710 goto unlock_exit; 711 } 712 713 if (na == NULL) { /* VALE prefix missing */ 714 error = EINVAL; 715 goto unlock_exit; 716 } 717 718 bna = (struct netmap_bwrap_adapter *)na; 719 720 if (na->active_fds == 0) { /* not registered */ 721 error = EINVAL; 722 goto unref_exit; 723 } 724 725 last_instance = netmap_dtor_locked(bna->na_kpriv); /* unregister */ 726 if (!last_instance) { 727 D("--- error, trying to detach an entry with active mmaps"); 728 error = EINVAL; 729 } else { 730 struct netmap_priv_d *npriv = bna->na_kpriv; 731 732 bna->na_kpriv = NULL; 733 D("deleting priv"); 734 735 bzero(npriv, sizeof(*npriv)); 736 free(npriv, M_DEVBUF); 737 } 738 739 unref_exit: 740 netmap_adapter_put(na); 741 unlock_exit: 742 NMG_UNLOCK(); 743 return error; 744 745 } 746 747 748 /* exported to kernel callers, e.g. OVS ? 749 * Entry point. 750 * Called without NMG_LOCK. 751 */ 752 int 753 netmap_bdg_ctl(struct nmreq *nmr, bdg_lookup_fn_t func) 754 { 755 struct nm_bridge *b; 756 struct netmap_adapter *na; 757 struct netmap_vp_adapter *vpna; 758 struct ifnet *iter; 759 char *name = nmr->nr_name; 760 int cmd = nmr->nr_cmd, namelen = strlen(name); 761 int error = 0, i, j; 762 763 switch (cmd) { 764 case NETMAP_BDG_ATTACH: 765 error = nm_bdg_attach(nmr); 766 break; 767 768 case NETMAP_BDG_DETACH: 769 error = nm_bdg_detach(nmr); 770 break; 771 772 case NETMAP_BDG_LIST: 773 /* this is used to enumerate bridges and ports */ 774 if (namelen) { /* look up indexes of bridge and port */ 775 if (strncmp(name, NM_NAME, strlen(NM_NAME))) { 776 error = EINVAL; 777 break; 778 } 779 NMG_LOCK(); 780 b = nm_find_bridge(name, 0 /* don't create */); 781 if (!b) { 782 error = ENOENT; 783 NMG_UNLOCK(); 784 break; 785 } 786 787 error = ENOENT; 788 for (j = 0; j < b->bdg_active_ports; j++) { 789 i = b->bdg_port_index[j]; 790 vpna = b->bdg_ports[i]; 791 if (vpna == NULL) { 792 D("---AAAAAAAAARGH-------"); 793 continue; 794 } 795 iter = vpna->up.ifp; 796 /* the former and the latter identify a 797 * virtual port and a NIC, respectively 798 */ 799 if (!strcmp(iter->if_xname, name)) { 800 /* bridge index */ 801 nmr->nr_arg1 = b - nm_bridges; 802 nmr->nr_arg2 = i; /* port index */ 803 error = 0; 804 break; 805 } 806 } 807 NMG_UNLOCK(); 808 } else { 809 /* return the first non-empty entry starting from 810 * bridge nr_arg1 and port nr_arg2. 811 * 812 * Users can detect the end of the same bridge by 813 * seeing the new and old value of nr_arg1, and can 814 * detect the end of all the bridge by error != 0 815 */ 816 i = nmr->nr_arg1; 817 j = nmr->nr_arg2; 818 819 NMG_LOCK(); 820 for (error = ENOENT; i < NM_BRIDGES; i++) { 821 b = nm_bridges + i; 822 if (j >= b->bdg_active_ports) { 823 j = 0; /* following bridges scan from 0 */ 824 continue; 825 } 826 nmr->nr_arg1 = i; 827 nmr->nr_arg2 = j; 828 j = b->bdg_port_index[j]; 829 vpna = b->bdg_ports[j]; 830 iter = vpna->up.ifp; 831 strncpy(name, iter->if_xname, (size_t)IFNAMSIZ); 832 error = 0; 833 break; 834 } 835 NMG_UNLOCK(); 836 } 837 break; 838 839 case NETMAP_BDG_LOOKUP_REG: 840 /* register a lookup function to the given bridge. 841 * nmr->nr_name may be just bridge's name (including ':' 842 * if it is not just NM_NAME). 843 */ 844 if (!func) { 845 error = EINVAL; 846 break; 847 } 848 NMG_LOCK(); 849 b = nm_find_bridge(name, 0 /* don't create */); 850 if (!b) { 851 error = EINVAL; 852 } else { 853 b->nm_bdg_lookup = func; 854 } 855 NMG_UNLOCK(); 856 break; 857 858 case NETMAP_BDG_OFFSET: 859 NMG_LOCK(); 860 error = netmap_get_bdg_na(nmr, &na, 0); 861 if (na && !error) { 862 vpna = (struct netmap_vp_adapter *)na; 863 if (nmr->nr_arg1 > NETMAP_BDG_MAX_OFFSET) 864 nmr->nr_arg1 = NETMAP_BDG_MAX_OFFSET; 865 vpna->offset = nmr->nr_arg1; 866 D("Using offset %d for %p", vpna->offset, vpna); 867 netmap_adapter_put(na); 868 } 869 NMG_UNLOCK(); 870 break; 871 872 default: 873 D("invalid cmd (nmr->nr_cmd) (0x%x)", cmd); 874 error = EINVAL; 875 break; 876 } 877 return error; 878 } 879 880 881 static int 882 netmap_vp_krings_create(struct netmap_adapter *na) 883 { 884 u_int ntx, nrx, tailroom; 885 int error, i; 886 uint32_t *leases; 887 888 /* XXX vps do not need host rings, 889 * but we crash if we don't have one 890 */ 891 ntx = na->num_tx_rings + 1; 892 nrx = na->num_rx_rings + 1; 893 894 /* 895 * Leases are attached to RX rings on vale ports 896 */ 897 tailroom = sizeof(uint32_t) * na->num_rx_desc * nrx; 898 899 error = netmap_krings_create(na, ntx, nrx, tailroom); 900 if (error) 901 return error; 902 903 leases = na->tailroom; 904 905 for (i = 0; i < nrx; i++) { /* Receive rings */ 906 na->rx_rings[i].nkr_leases = leases; 907 leases += na->num_rx_desc; 908 } 909 910 error = nm_alloc_bdgfwd(na); 911 if (error) { 912 netmap_krings_delete(na); 913 return error; 914 } 915 916 return 0; 917 } 918 919 920 static void 921 netmap_vp_krings_delete(struct netmap_adapter *na) 922 { 923 nm_free_bdgfwd(na); 924 netmap_krings_delete(na); 925 } 926 927 928 static int 929 nm_bdg_flush(struct nm_bdg_fwd *ft, u_int n, 930 struct netmap_vp_adapter *na, u_int ring_nr); 931 932 933 /* 934 * Grab packets from a kring, move them into the ft structure 935 * associated to the tx (input) port. Max one instance per port, 936 * filtered on input (ioctl, poll or XXX). 937 * Returns the next position in the ring. 938 */ 939 static int 940 nm_bdg_preflush(struct netmap_vp_adapter *na, u_int ring_nr, 941 struct netmap_kring *kring, u_int end) 942 { 943 struct netmap_ring *ring = kring->ring; 944 struct nm_bdg_fwd *ft; 945 u_int j = kring->nr_hwcur, lim = kring->nkr_num_slots - 1; 946 u_int ft_i = 0; /* start from 0 */ 947 u_int frags = 1; /* how many frags ? */ 948 struct nm_bridge *b = na->na_bdg; 949 950 /* To protect against modifications to the bridge we acquire a 951 * shared lock, waiting if we can sleep (if the source port is 952 * attached to a user process) or with a trylock otherwise (NICs). 953 */ 954 ND("wait rlock for %d packets", ((j > end ? lim+1 : 0) + end) - j); 955 if (na->up.na_flags & NAF_BDG_MAYSLEEP) 956 BDG_RLOCK(b); 957 else if (!BDG_RTRYLOCK(b)) 958 return 0; 959 ND(5, "rlock acquired for %d packets", ((j > end ? lim+1 : 0) + end) - j); 960 ft = kring->nkr_ft; 961 962 for (; likely(j != end); j = nm_next(j, lim)) { 963 struct netmap_slot *slot = &ring->slot[j]; 964 char *buf; 965 966 ft[ft_i].ft_len = slot->len; 967 ft[ft_i].ft_flags = slot->flags; 968 969 ND("flags is 0x%x", slot->flags); 970 /* this slot goes into a list so initialize the link field */ 971 ft[ft_i].ft_next = NM_FT_NULL; 972 buf = ft[ft_i].ft_buf = (slot->flags & NS_INDIRECT) ? 973 (void *)(uintptr_t)slot->ptr : BDG_NMB(&na->up, slot); 974 __builtin_prefetch(buf); 975 ++ft_i; 976 if (slot->flags & NS_MOREFRAG) { 977 frags++; 978 continue; 979 } 980 if (unlikely(netmap_verbose && frags > 1)) 981 RD(5, "%d frags at %d", frags, ft_i - frags); 982 ft[ft_i - frags].ft_frags = frags; 983 frags = 1; 984 if (unlikely((int)ft_i >= bridge_batch)) 985 ft_i = nm_bdg_flush(ft, ft_i, na, ring_nr); 986 } 987 if (frags > 1) { 988 D("truncate incomplete fragment at %d (%d frags)", ft_i, frags); 989 // ft_i > 0, ft[ft_i-1].flags has NS_MOREFRAG 990 ft[ft_i - 1].ft_frags &= ~NS_MOREFRAG; 991 ft[ft_i - frags].ft_frags = frags - 1; 992 } 993 if (ft_i) 994 ft_i = nm_bdg_flush(ft, ft_i, na, ring_nr); 995 BDG_RUNLOCK(b); 996 return j; 997 } 998 999 1000 /* ----- FreeBSD if_bridge hash function ------- */ 1001 1002 /* 1003 * The following hash function is adapted from "Hash Functions" by Bob Jenkins 1004 * ("Algorithm Alley", Dr. Dobbs Journal, September 1997). 1005 * 1006 * http://www.burtleburtle.net/bob/hash/spooky.html 1007 */ 1008 #define mix(a, b, c) \ 1009 do { \ 1010 a -= b; a -= c; a ^= (c >> 13); \ 1011 b -= c; b -= a; b ^= (a << 8); \ 1012 c -= a; c -= b; c ^= (b >> 13); \ 1013 a -= b; a -= c; a ^= (c >> 12); \ 1014 b -= c; b -= a; b ^= (a << 16); \ 1015 c -= a; c -= b; c ^= (b >> 5); \ 1016 a -= b; a -= c; a ^= (c >> 3); \ 1017 b -= c; b -= a; b ^= (a << 10); \ 1018 c -= a; c -= b; c ^= (b >> 15); \ 1019 } while (/*CONSTCOND*/0) 1020 1021 1022 static __inline uint32_t 1023 nm_bridge_rthash(const uint8_t *addr) 1024 { 1025 uint32_t a = 0x9e3779b9, b = 0x9e3779b9, c = 0; // hask key 1026 1027 b += addr[5] << 8; 1028 b += addr[4]; 1029 a += addr[3] << 24; 1030 a += addr[2] << 16; 1031 a += addr[1] << 8; 1032 a += addr[0]; 1033 1034 mix(a, b, c); 1035 #define BRIDGE_RTHASH_MASK (NM_BDG_HASH-1) 1036 return (c & BRIDGE_RTHASH_MASK); 1037 } 1038 1039 #undef mix 1040 1041 1042 static int 1043 bdg_netmap_reg(struct netmap_adapter *na, int onoff) 1044 { 1045 struct netmap_vp_adapter *vpna = 1046 (struct netmap_vp_adapter*)na; 1047 struct ifnet *ifp = na->ifp; 1048 1049 /* the interface is already attached to the bridge, 1050 * so we only need to toggle IFCAP_NETMAP. 1051 */ 1052 BDG_WLOCK(vpna->na_bdg); 1053 if (onoff) { 1054 ifp->if_capenable |= IFCAP_NETMAP; 1055 } else { 1056 ifp->if_capenable &= ~IFCAP_NETMAP; 1057 } 1058 BDG_WUNLOCK(vpna->na_bdg); 1059 return 0; 1060 } 1061 1062 1063 /* 1064 * Lookup function for a learning bridge. 1065 * Update the hash table with the source address, 1066 * and then returns the destination port index, and the 1067 * ring in *dst_ring (at the moment, always use ring 0) 1068 */ 1069 u_int 1070 netmap_bdg_learning(char *buf, u_int buf_len, uint8_t *dst_ring, 1071 struct netmap_vp_adapter *na) 1072 { 1073 struct nm_hash_ent *ht = na->na_bdg->ht; 1074 uint32_t sh, dh; 1075 u_int dst, mysrc = na->bdg_port; 1076 uint64_t smac, dmac; 1077 1078 if (buf_len < 14) { 1079 D("invalid buf length %d", buf_len); 1080 return NM_BDG_NOPORT; 1081 } 1082 dmac = le64toh(*(uint64_t *)(buf)) & 0xffffffffffff; 1083 smac = le64toh(*(uint64_t *)(buf + 4)); 1084 smac >>= 16; 1085 1086 /* 1087 * The hash is somewhat expensive, there might be some 1088 * worthwhile optimizations here. 1089 */ 1090 if ((buf[6] & 1) == 0) { /* valid src */ 1091 uint8_t *s = buf+6; 1092 sh = nm_bridge_rthash(s); // XXX hash of source 1093 /* update source port forwarding entry */ 1094 ht[sh].mac = smac; /* XXX expire ? */ 1095 ht[sh].ports = mysrc; 1096 if (netmap_verbose) 1097 D("src %02x:%02x:%02x:%02x:%02x:%02x on port %d", 1098 s[0], s[1], s[2], s[3], s[4], s[5], mysrc); 1099 } 1100 dst = NM_BDG_BROADCAST; 1101 if ((buf[0] & 1) == 0) { /* unicast */ 1102 dh = nm_bridge_rthash(buf); // XXX hash of dst 1103 if (ht[dh].mac == dmac) { /* found dst */ 1104 dst = ht[dh].ports; 1105 } 1106 /* XXX otherwise return NM_BDG_UNKNOWN ? */ 1107 } 1108 *dst_ring = 0; 1109 return dst; 1110 } 1111 1112 1113 /* 1114 * Available space in the ring. Only used in VALE code 1115 * and only with is_rx = 1 1116 */ 1117 static inline uint32_t 1118 nm_kr_space(struct netmap_kring *k, int is_rx) 1119 { 1120 int space; 1121 1122 if (is_rx) { 1123 int busy = k->nkr_hwlease - k->nr_hwcur; 1124 if (busy < 0) 1125 busy += k->nkr_num_slots; 1126 space = k->nkr_num_slots - 1 - busy; 1127 } else { 1128 /* XXX never used in this branch */ 1129 space = k->nr_hwtail - k->nkr_hwlease; 1130 if (space < 0) 1131 space += k->nkr_num_slots; 1132 } 1133 #if 0 1134 // sanity check 1135 if (k->nkr_hwlease >= k->nkr_num_slots || 1136 k->nr_hwcur >= k->nkr_num_slots || 1137 k->nr_tail >= k->nkr_num_slots || 1138 busy < 0 || 1139 busy >= k->nkr_num_slots) { 1140 D("invalid kring, cur %d tail %d lease %d lease_idx %d lim %d", k->nr_hwcur, k->nr_hwtail, k->nkr_hwlease, 1141 k->nkr_lease_idx, k->nkr_num_slots); 1142 } 1143 #endif 1144 return space; 1145 } 1146 1147 1148 1149 1150 /* make a lease on the kring for N positions. return the 1151 * lease index 1152 * XXX only used in VALE code and with is_rx = 1 1153 */ 1154 static inline uint32_t 1155 nm_kr_lease(struct netmap_kring *k, u_int n, int is_rx) 1156 { 1157 uint32_t lim = k->nkr_num_slots - 1; 1158 uint32_t lease_idx = k->nkr_lease_idx; 1159 1160 k->nkr_leases[lease_idx] = NR_NOSLOT; 1161 k->nkr_lease_idx = nm_next(lease_idx, lim); 1162 1163 if (n > nm_kr_space(k, is_rx)) { 1164 D("invalid request for %d slots", n); 1165 panic("x"); 1166 } 1167 /* XXX verify that there are n slots */ 1168 k->nkr_hwlease += n; 1169 if (k->nkr_hwlease > lim) 1170 k->nkr_hwlease -= lim + 1; 1171 1172 if (k->nkr_hwlease >= k->nkr_num_slots || 1173 k->nr_hwcur >= k->nkr_num_slots || 1174 k->nr_hwtail >= k->nkr_num_slots || 1175 k->nkr_lease_idx >= k->nkr_num_slots) { 1176 D("invalid kring %s, cur %d tail %d lease %d lease_idx %d lim %d", 1177 k->na->ifp->if_xname, 1178 k->nr_hwcur, k->nr_hwtail, k->nkr_hwlease, 1179 k->nkr_lease_idx, k->nkr_num_slots); 1180 } 1181 return lease_idx; 1182 } 1183 1184 /* 1185 * This flush routine supports only unicast and broadcast but a large 1186 * number of ports, and lets us replace the learn and dispatch functions. 1187 */ 1188 int 1189 nm_bdg_flush(struct nm_bdg_fwd *ft, u_int n, struct netmap_vp_adapter *na, 1190 u_int ring_nr) 1191 { 1192 struct nm_bdg_q *dst_ents, *brddst; 1193 uint16_t num_dsts = 0, *dsts; 1194 struct nm_bridge *b = na->na_bdg; 1195 u_int i, j, me = na->bdg_port; 1196 1197 /* 1198 * The work area (pointed by ft) is followed by an array of 1199 * pointers to queues , dst_ents; there are NM_BDG_MAXRINGS 1200 * queues per port plus one for the broadcast traffic. 1201 * Then we have an array of destination indexes. 1202 */ 1203 dst_ents = (struct nm_bdg_q *)(ft + NM_BDG_BATCH_MAX); 1204 dsts = (uint16_t *)(dst_ents + NM_BDG_MAXPORTS * NM_BDG_MAXRINGS + 1); 1205 1206 /* first pass: find a destination for each packet in the batch */ 1207 for (i = 0; likely(i < n); i += ft[i].ft_frags) { 1208 uint8_t dst_ring = ring_nr; /* default, same ring as origin */ 1209 uint16_t dst_port, d_i; 1210 struct nm_bdg_q *d; 1211 uint8_t *buf = ft[i].ft_buf; 1212 u_int len = ft[i].ft_len; 1213 1214 ND("slot %d frags %d", i, ft[i].ft_frags); 1215 /* Drop the packet if the offset is not into the first 1216 fragment nor at the very beginning of the second. */ 1217 if (unlikely(na->offset > len)) 1218 continue; 1219 if (len == na->offset) { 1220 buf = ft[i+1].ft_buf; 1221 len = ft[i+1].ft_len; 1222 } else { 1223 buf += na->offset; 1224 len -= na->offset; 1225 } 1226 dst_port = b->nm_bdg_lookup(buf, len, &dst_ring, na); 1227 if (netmap_verbose > 255) 1228 RD(5, "slot %d port %d -> %d", i, me, dst_port); 1229 if (dst_port == NM_BDG_NOPORT) 1230 continue; /* this packet is identified to be dropped */ 1231 else if (unlikely(dst_port > NM_BDG_MAXPORTS)) 1232 continue; 1233 else if (dst_port == NM_BDG_BROADCAST) 1234 dst_ring = 0; /* broadcasts always go to ring 0 */ 1235 else if (unlikely(dst_port == me || 1236 !b->bdg_ports[dst_port])) 1237 continue; 1238 1239 /* get a position in the scratch pad */ 1240 d_i = dst_port * NM_BDG_MAXRINGS + dst_ring; 1241 d = dst_ents + d_i; 1242 1243 /* append the first fragment to the list */ 1244 if (d->bq_head == NM_FT_NULL) { /* new destination */ 1245 d->bq_head = d->bq_tail = i; 1246 /* remember this position to be scanned later */ 1247 if (dst_port != NM_BDG_BROADCAST) 1248 dsts[num_dsts++] = d_i; 1249 } else { 1250 ft[d->bq_tail].ft_next = i; 1251 d->bq_tail = i; 1252 } 1253 d->bq_len += ft[i].ft_frags; 1254 } 1255 1256 /* 1257 * Broadcast traffic goes to ring 0 on all destinations. 1258 * So we need to add these rings to the list of ports to scan. 1259 * XXX at the moment we scan all NM_BDG_MAXPORTS ports, which is 1260 * expensive. We should keep a compact list of active destinations 1261 * so we could shorten this loop. 1262 */ 1263 brddst = dst_ents + NM_BDG_BROADCAST * NM_BDG_MAXRINGS; 1264 if (brddst->bq_head != NM_FT_NULL) { 1265 for (j = 0; likely(j < b->bdg_active_ports); j++) { 1266 uint16_t d_i; 1267 i = b->bdg_port_index[j]; 1268 if (unlikely(i == me)) 1269 continue; 1270 d_i = i * NM_BDG_MAXRINGS; 1271 if (dst_ents[d_i].bq_head == NM_FT_NULL) 1272 dsts[num_dsts++] = d_i; 1273 } 1274 } 1275 1276 ND(5, "pass 1 done %d pkts %d dsts", n, num_dsts); 1277 /* second pass: scan destinations (XXX will be modular somehow) */ 1278 for (i = 0; i < num_dsts; i++) { 1279 struct ifnet *dst_ifp; 1280 struct netmap_vp_adapter *dst_na; 1281 struct netmap_kring *kring; 1282 struct netmap_ring *ring; 1283 u_int dst_nr, lim, j, sent = 0, d_i, next, brd_next; 1284 u_int needed, howmany; 1285 int retry = netmap_txsync_retry; 1286 struct nm_bdg_q *d; 1287 uint32_t my_start = 0, lease_idx = 0; 1288 int nrings; 1289 int offset_mismatch; 1290 1291 d_i = dsts[i]; 1292 ND("second pass %d port %d", i, d_i); 1293 d = dst_ents + d_i; 1294 // XXX fix the division 1295 dst_na = b->bdg_ports[d_i/NM_BDG_MAXRINGS]; 1296 /* protect from the lookup function returning an inactive 1297 * destination port 1298 */ 1299 if (unlikely(dst_na == NULL)) 1300 goto cleanup; 1301 if (dst_na->up.na_flags & NAF_SW_ONLY) 1302 goto cleanup; 1303 dst_ifp = dst_na->up.ifp; 1304 /* 1305 * The interface may be in !netmap mode in two cases: 1306 * - when na is attached but not activated yet; 1307 * - when na is being deactivated but is still attached. 1308 */ 1309 if (unlikely(!(dst_ifp->if_capenable & IFCAP_NETMAP))) { 1310 ND("not in netmap mode!"); 1311 goto cleanup; 1312 } 1313 1314 offset_mismatch = (dst_na->offset != na->offset); 1315 1316 /* there is at least one either unicast or broadcast packet */ 1317 brd_next = brddst->bq_head; 1318 next = d->bq_head; 1319 /* we need to reserve this many slots. If fewer are 1320 * available, some packets will be dropped. 1321 * Packets may have multiple fragments, so we may not use 1322 * there is a chance that we may not use all of the slots 1323 * we have claimed, so we will need to handle the leftover 1324 * ones when we regain the lock. 1325 */ 1326 needed = d->bq_len + brddst->bq_len; 1327 1328 ND(5, "pass 2 dst %d is %x %s", 1329 i, d_i, is_vp ? "virtual" : "nic/host"); 1330 dst_nr = d_i & (NM_BDG_MAXRINGS-1); 1331 nrings = dst_na->up.num_rx_rings; 1332 if (dst_nr >= nrings) 1333 dst_nr = dst_nr % nrings; 1334 kring = &dst_na->up.rx_rings[dst_nr]; 1335 ring = kring->ring; 1336 lim = kring->nkr_num_slots - 1; 1337 1338 retry: 1339 1340 /* reserve the buffers in the queue and an entry 1341 * to report completion, and drop lock. 1342 * XXX this might become a helper function. 1343 */ 1344 mtx_lock(&kring->q_lock); 1345 if (kring->nkr_stopped) { 1346 mtx_unlock(&kring->q_lock); 1347 goto cleanup; 1348 } 1349 if (dst_na->retry) { 1350 dst_na->up.nm_notify(&dst_na->up, dst_nr, NR_RX, 0); 1351 } 1352 my_start = j = kring->nkr_hwlease; 1353 howmany = nm_kr_space(kring, 1); 1354 if (needed < howmany) 1355 howmany = needed; 1356 lease_idx = nm_kr_lease(kring, howmany, 1); 1357 mtx_unlock(&kring->q_lock); 1358 1359 /* only retry if we need more than available slots */ 1360 if (retry && needed <= howmany) 1361 retry = 0; 1362 1363 /* copy to the destination queue */ 1364 while (howmany > 0) { 1365 struct netmap_slot *slot; 1366 struct nm_bdg_fwd *ft_p, *ft_end; 1367 u_int cnt; 1368 int fix_mismatch = offset_mismatch; 1369 1370 /* find the queue from which we pick next packet. 1371 * NM_FT_NULL is always higher than valid indexes 1372 * so we never dereference it if the other list 1373 * has packets (and if both are empty we never 1374 * get here). 1375 */ 1376 if (next < brd_next) { 1377 ft_p = ft + next; 1378 next = ft_p->ft_next; 1379 } else { /* insert broadcast */ 1380 ft_p = ft + brd_next; 1381 brd_next = ft_p->ft_next; 1382 } 1383 cnt = ft_p->ft_frags; // cnt > 0 1384 if (unlikely(cnt > howmany)) 1385 break; /* no more space */ 1386 howmany -= cnt; 1387 if (netmap_verbose && cnt > 1) 1388 RD(5, "rx %d frags to %d", cnt, j); 1389 ft_end = ft_p + cnt; 1390 do { 1391 char *dst, *src = ft_p->ft_buf; 1392 size_t copy_len = ft_p->ft_len, dst_len = copy_len; 1393 1394 slot = &ring->slot[j]; 1395 dst = BDG_NMB(&dst_na->up, slot); 1396 1397 if (unlikely(fix_mismatch)) { 1398 /* We are processing the first fragment 1399 * and there is a mismatch between source 1400 * and destination offsets. Create a zeroed 1401 * header for the destination, independently 1402 * of the source header length and content. 1403 */ 1404 src += na->offset; 1405 copy_len -= na->offset; 1406 bzero(dst, dst_na->offset); 1407 dst += dst_na->offset; 1408 dst_len = dst_na->offset + copy_len; 1409 /* fix the first fragment only */ 1410 fix_mismatch = 0; 1411 /* Here it could be copy_len == dst_len == 0, 1412 * and so a zero length fragment is passed. 1413 */ 1414 } 1415 1416 ND("send [%d] %d(%d) bytes at %s:%d", 1417 i, (int)copy_len, (int)dst_len, 1418 NM_IFPNAME(dst_ifp), j); 1419 /* round to a multiple of 64 */ 1420 copy_len = (copy_len + 63) & ~63; 1421 1422 if (ft_p->ft_flags & NS_INDIRECT) { 1423 if (copyin(src, dst, copy_len)) { 1424 // invalid user pointer, pretend len is 0 1425 dst_len = 0; 1426 } 1427 } else { 1428 //memcpy(dst, src, copy_len); 1429 pkt_copy(src, dst, (int)copy_len); 1430 } 1431 slot->len = dst_len; 1432 slot->flags = (cnt << 8)| NS_MOREFRAG; 1433 j = nm_next(j, lim); 1434 ft_p++; 1435 sent++; 1436 } while (ft_p != ft_end); 1437 slot->flags = (cnt << 8); /* clear flag on last entry */ 1438 /* are we done ? */ 1439 if (next == NM_FT_NULL && brd_next == NM_FT_NULL) 1440 break; 1441 } 1442 { 1443 /* current position */ 1444 uint32_t *p = kring->nkr_leases; /* shorthand */ 1445 uint32_t update_pos; 1446 int still_locked = 1; 1447 1448 mtx_lock(&kring->q_lock); 1449 if (unlikely(howmany > 0)) { 1450 /* not used all bufs. If i am the last one 1451 * i can recover the slots, otherwise must 1452 * fill them with 0 to mark empty packets. 1453 */ 1454 ND("leftover %d bufs", howmany); 1455 if (nm_next(lease_idx, lim) == kring->nkr_lease_idx) { 1456 /* yes i am the last one */ 1457 ND("roll back nkr_hwlease to %d", j); 1458 kring->nkr_hwlease = j; 1459 } else { 1460 while (howmany-- > 0) { 1461 ring->slot[j].len = 0; 1462 ring->slot[j].flags = 0; 1463 j = nm_next(j, lim); 1464 } 1465 } 1466 } 1467 p[lease_idx] = j; /* report I am done */ 1468 1469 update_pos = kring->nr_hwtail; 1470 1471 if (my_start == update_pos) { 1472 /* all slots before my_start have been reported, 1473 * so scan subsequent leases to see if other ranges 1474 * have been completed, and to a selwakeup or txsync. 1475 */ 1476 while (lease_idx != kring->nkr_lease_idx && 1477 p[lease_idx] != NR_NOSLOT) { 1478 j = p[lease_idx]; 1479 p[lease_idx] = NR_NOSLOT; 1480 lease_idx = nm_next(lease_idx, lim); 1481 } 1482 /* j is the new 'write' position. j != my_start 1483 * means there are new buffers to report 1484 */ 1485 if (likely(j != my_start)) { 1486 kring->nr_hwtail = j; 1487 dst_na->up.nm_notify(&dst_na->up, dst_nr, NR_RX, 0); 1488 still_locked = 0; 1489 mtx_unlock(&kring->q_lock); 1490 if (dst_na->retry && retry--) 1491 goto retry; 1492 } 1493 } 1494 if (still_locked) 1495 mtx_unlock(&kring->q_lock); 1496 } 1497 cleanup: 1498 d->bq_head = d->bq_tail = NM_FT_NULL; /* cleanup */ 1499 d->bq_len = 0; 1500 } 1501 brddst->bq_head = brddst->bq_tail = NM_FT_NULL; /* cleanup */ 1502 brddst->bq_len = 0; 1503 return 0; 1504 } 1505 1506 1507 static int 1508 netmap_vp_txsync(struct netmap_vp_adapter *na, u_int ring_nr, int flags) 1509 { 1510 struct netmap_kring *kring = &na->up.tx_rings[ring_nr]; 1511 u_int done; 1512 u_int const lim = kring->nkr_num_slots - 1; 1513 u_int const cur = kring->rcur; 1514 1515 if (bridge_batch <= 0) { /* testing only */ 1516 done = cur; // used all 1517 goto done; 1518 } 1519 if (bridge_batch > NM_BDG_BATCH) 1520 bridge_batch = NM_BDG_BATCH; 1521 1522 done = nm_bdg_preflush(na, ring_nr, kring, cur); 1523 done: 1524 if (done != cur) 1525 D("early break at %d/ %d, tail %d", done, cur, kring->nr_hwtail); 1526 /* 1527 * packets between 'done' and 'cur' are left unsent. 1528 */ 1529 kring->nr_hwcur = done; 1530 kring->nr_hwtail = nm_prev(done, lim); 1531 nm_txsync_finalize(kring); 1532 if (netmap_verbose) 1533 D("%s ring %d flags %d", NM_IFPNAME(na->up.ifp), ring_nr, flags); 1534 return 0; 1535 } 1536 1537 1538 /* 1539 * main dispatch routine for the bridge. 1540 * We already know that only one thread is running this. 1541 * we must run nm_bdg_preflush without lock. 1542 */ 1543 static int 1544 bdg_netmap_txsync(struct netmap_adapter *na, u_int ring_nr, int flags) 1545 { 1546 struct netmap_vp_adapter *vpna = (struct netmap_vp_adapter*)na; 1547 return netmap_vp_txsync(vpna, ring_nr, flags); 1548 } 1549 1550 static int 1551 netmap_vp_rxsync(struct netmap_adapter *na, u_int ring_nr, int flags) 1552 { 1553 struct netmap_kring *kring = &na->rx_rings[ring_nr]; 1554 struct netmap_ring *ring = kring->ring; 1555 u_int nm_i, lim = kring->nkr_num_slots - 1; 1556 u_int head = nm_rxsync_prologue(kring); 1557 int n; 1558 1559 if (head > lim) { 1560 D("ouch dangerous reset!!!"); 1561 n = netmap_ring_reinit(kring); 1562 goto done; 1563 } 1564 1565 /* First part, import newly received packets. */ 1566 /* actually nothing to do here, they are already in the kring */ 1567 1568 /* Second part, skip past packets that userspace has released. */ 1569 nm_i = kring->nr_hwcur; 1570 if (nm_i != head) { 1571 /* consistency check, but nothing really important here */ 1572 for (n = 0; likely(nm_i != head); n++) { 1573 struct netmap_slot *slot = &ring->slot[nm_i]; 1574 void *addr = BDG_NMB(na, slot); 1575 1576 if (addr == netmap_buffer_base) { /* bad buf */ 1577 D("bad buffer index %d, ignore ?", 1578 slot->buf_idx); 1579 } 1580 slot->flags &= ~NS_BUF_CHANGED; 1581 nm_i = nm_next(nm_i, lim); 1582 } 1583 kring->nr_hwcur = head; 1584 } 1585 1586 /* tell userspace that there are new packets */ 1587 nm_rxsync_finalize(kring); 1588 n = 0; 1589 done: 1590 return n; 1591 } 1592 1593 /* 1594 * user process reading from a VALE switch. 1595 * Already protected against concurrent calls from userspace, 1596 * but we must acquire the queue's lock to protect against 1597 * writers on the same queue. 1598 */ 1599 static int 1600 bdg_netmap_rxsync(struct netmap_adapter *na, u_int ring_nr, int flags) 1601 { 1602 struct netmap_kring *kring = &na->rx_rings[ring_nr]; 1603 int n; 1604 1605 mtx_lock(&kring->q_lock); 1606 n = netmap_vp_rxsync(na, ring_nr, flags); 1607 mtx_unlock(&kring->q_lock); 1608 return n; 1609 } 1610 1611 1612 static int 1613 bdg_netmap_attach(struct nmreq *nmr, struct ifnet *ifp) 1614 { 1615 struct netmap_vp_adapter *vpna; 1616 struct netmap_adapter *na; 1617 int error; 1618 1619 vpna = malloc(sizeof(*vpna), M_DEVBUF, M_NOWAIT | M_ZERO); 1620 if (vpna == NULL) 1621 return ENOMEM; 1622 1623 na = &vpna->up; 1624 1625 na->ifp = ifp; 1626 1627 /* bound checking */ 1628 na->num_tx_rings = nmr->nr_tx_rings; 1629 nm_bound_var(&na->num_tx_rings, 1, 1, NM_BDG_MAXRINGS, NULL); 1630 nmr->nr_tx_rings = na->num_tx_rings; // write back 1631 na->num_rx_rings = nmr->nr_rx_rings; 1632 nm_bound_var(&na->num_rx_rings, 1, 1, NM_BDG_MAXRINGS, NULL); 1633 nmr->nr_rx_rings = na->num_rx_rings; // write back 1634 nm_bound_var(&nmr->nr_tx_slots, NM_BRIDGE_RINGSIZE, 1635 1, NM_BDG_MAXSLOTS, NULL); 1636 na->num_tx_desc = nmr->nr_tx_slots; 1637 nm_bound_var(&nmr->nr_rx_slots, NM_BRIDGE_RINGSIZE, 1638 1, NM_BDG_MAXSLOTS, NULL); 1639 na->num_rx_desc = nmr->nr_rx_slots; 1640 vpna->offset = 0; 1641 1642 na->na_flags |= NAF_BDG_MAYSLEEP | NAF_MEM_OWNER; 1643 na->nm_txsync = bdg_netmap_txsync; 1644 na->nm_rxsync = bdg_netmap_rxsync; 1645 na->nm_register = bdg_netmap_reg; 1646 na->nm_dtor = netmap_adapter_vp_dtor; 1647 na->nm_krings_create = netmap_vp_krings_create; 1648 na->nm_krings_delete = netmap_vp_krings_delete; 1649 na->nm_mem = netmap_mem_private_new(NM_IFPNAME(na->ifp), 1650 na->num_tx_rings, na->num_tx_desc, 1651 na->num_rx_rings, na->num_rx_desc); 1652 /* other nmd fields are set in the common routine */ 1653 error = netmap_attach_common(na); 1654 if (error) { 1655 free(vpna, M_DEVBUF); 1656 return error; 1657 } 1658 return 0; 1659 } 1660 1661 1662 static void 1663 netmap_bwrap_dtor(struct netmap_adapter *na) 1664 { 1665 struct netmap_bwrap_adapter *bna = (struct netmap_bwrap_adapter*)na; 1666 struct netmap_adapter *hwna = bna->hwna; 1667 struct nm_bridge *b = bna->up.na_bdg, 1668 *bh = bna->host.na_bdg; 1669 struct ifnet *ifp = na->ifp; 1670 1671 ND("na %p", na); 1672 1673 if (b) { 1674 netmap_bdg_detach_common(b, bna->up.bdg_port, 1675 (bh ? bna->host.bdg_port : -1)); 1676 } 1677 1678 hwna->na_private = NULL; 1679 netmap_adapter_put(hwna); 1680 1681 bzero(ifp, sizeof(*ifp)); 1682 free(ifp, M_DEVBUF); 1683 na->ifp = NULL; 1684 1685 } 1686 1687 1688 /* 1689 * Intr callback for NICs connected to a bridge. 1690 * Simply ignore tx interrupts (maybe we could try to recover space ?) 1691 * and pass received packets from nic to the bridge. 1692 * 1693 * XXX TODO check locking: this is called from the interrupt 1694 * handler so we should make sure that the interface is not 1695 * disconnected while passing down an interrupt. 1696 * 1697 * Note, no user process can access this NIC or the host stack. 1698 * The only part of the ring that is significant are the slots, 1699 * and head/cur/tail are set from the kring as needed 1700 * (part as a receive ring, part as a transmit ring). 1701 * 1702 * callback that overwrites the hwna notify callback. 1703 * Packets come from the outside or from the host stack and are put on an hwna rx ring. 1704 * The bridge wrapper then sends the packets through the bridge. 1705 */ 1706 static int 1707 netmap_bwrap_intr_notify(struct netmap_adapter *na, u_int ring_nr, enum txrx tx, int flags) 1708 { 1709 struct ifnet *ifp = na->ifp; 1710 struct netmap_bwrap_adapter *bna = na->na_private; 1711 struct netmap_vp_adapter *hostna = &bna->host; 1712 struct netmap_kring *kring, *bkring; 1713 struct netmap_ring *ring; 1714 int is_host_ring = ring_nr == na->num_rx_rings; 1715 struct netmap_vp_adapter *vpna = &bna->up; 1716 int error = 0; 1717 1718 if (netmap_verbose) 1719 D("%s %s%d 0x%x", NM_IFPNAME(ifp), 1720 (tx == NR_TX ? "TX" : "RX"), ring_nr, flags); 1721 1722 if (flags & NAF_DISABLE_NOTIFY) { 1723 kring = tx == NR_TX ? na->tx_rings : na->rx_rings; 1724 bkring = tx == NR_TX ? vpna->up.rx_rings : vpna->up.tx_rings; 1725 if (kring[ring_nr].nkr_stopped) 1726 netmap_disable_ring(&bkring[ring_nr]); 1727 else 1728 bkring[ring_nr].nkr_stopped = 0; 1729 return 0; 1730 } 1731 1732 if (ifp == NULL || !(ifp->if_capenable & IFCAP_NETMAP)) 1733 return 0; 1734 1735 /* we only care about receive interrupts */ 1736 if (tx == NR_TX) 1737 return 0; 1738 1739 kring = &na->rx_rings[ring_nr]; 1740 ring = kring->ring; 1741 1742 /* make sure the ring is not disabled */ 1743 if (nm_kr_tryget(kring)) 1744 return 0; 1745 1746 if (is_host_ring && hostna->na_bdg == NULL) { 1747 error = bna->save_notify(na, ring_nr, tx, flags); 1748 goto put_out; 1749 } 1750 1751 /* Here we expect ring->head = ring->cur = ring->tail 1752 * because everything has been released from the previous round. 1753 * However the ring is shared and we might have info from 1754 * the wrong side (the tx ring). Hence we overwrite with 1755 * the info from the rx kring. 1756 */ 1757 if (netmap_verbose) 1758 D("%s head %d cur %d tail %d (kring %d %d %d)", NM_IFPNAME(ifp), 1759 ring->head, ring->cur, ring->tail, 1760 kring->rhead, kring->rcur, kring->rtail); 1761 1762 ring->head = kring->rhead; 1763 ring->cur = kring->rcur; 1764 ring->tail = kring->rtail; 1765 1766 /* simulate a user wakeup on the rx ring */ 1767 if (is_host_ring) { 1768 netmap_rxsync_from_host(na, NULL, NULL); 1769 vpna = hostna; 1770 ring_nr = 0; 1771 } else { 1772 /* fetch packets that have arrived. 1773 * XXX maybe do this in a loop ? 1774 */ 1775 error = na->nm_rxsync(na, ring_nr, 0); 1776 if (error) 1777 goto put_out; 1778 } 1779 if (kring->nr_hwcur == kring->nr_hwtail && netmap_verbose) { 1780 D("how strange, interrupt with no packets on %s", 1781 NM_IFPNAME(ifp)); 1782 goto put_out; 1783 } 1784 1785 /* new packets are ring->cur to ring->tail, and the bkring 1786 * had hwcur == ring->cur. So advance ring->cur to ring->tail 1787 * to push all packets out. 1788 */ 1789 ring->head = ring->cur = ring->tail; 1790 1791 /* also set tail to what the bwrap expects */ 1792 bkring = &vpna->up.tx_rings[ring_nr]; 1793 ring->tail = bkring->nr_hwtail; // rtail too ? 1794 1795 /* pass packets to the switch */ 1796 nm_txsync_prologue(bkring); // XXX error checking ? 1797 netmap_vp_txsync(vpna, ring_nr, flags); 1798 1799 /* mark all buffers as released on this ring */ 1800 ring->head = ring->cur = kring->nr_hwtail; 1801 ring->tail = kring->rtail; 1802 /* another call to actually release the buffers */ 1803 if (!is_host_ring) { 1804 error = na->nm_rxsync(na, ring_nr, 0); 1805 } else { 1806 /* mark all packets as released, as in the 1807 * second part of netmap_rxsync_from_host() 1808 */ 1809 kring->nr_hwcur = kring->nr_hwtail; 1810 nm_rxsync_finalize(kring); 1811 } 1812 1813 put_out: 1814 nm_kr_put(kring); 1815 return error; 1816 } 1817 1818 1819 static int 1820 netmap_bwrap_register(struct netmap_adapter *na, int onoff) 1821 { 1822 struct netmap_bwrap_adapter *bna = 1823 (struct netmap_bwrap_adapter *)na; 1824 struct netmap_adapter *hwna = bna->hwna; 1825 struct netmap_vp_adapter *hostna = &bna->host; 1826 int error; 1827 1828 ND("%s %s", NM_IFPNAME(na->ifp), onoff ? "on" : "off"); 1829 1830 if (onoff) { 1831 int i; 1832 1833 hwna->na_lut = na->na_lut; 1834 hwna->na_lut_objtotal = na->na_lut_objtotal; 1835 1836 if (hostna->na_bdg) { 1837 hostna->up.na_lut = na->na_lut; 1838 hostna->up.na_lut_objtotal = na->na_lut_objtotal; 1839 } 1840 1841 /* cross-link the netmap rings 1842 * The original number of rings comes from hwna, 1843 * rx rings on one side equals tx rings on the other. 1844 */ 1845 for (i = 0; i <= na->num_rx_rings; i++) { 1846 hwna->tx_rings[i].nkr_num_slots = na->rx_rings[i].nkr_num_slots; 1847 hwna->tx_rings[i].ring = na->rx_rings[i].ring; 1848 } 1849 for (i = 0; i <= na->num_tx_rings; i++) { 1850 hwna->rx_rings[i].nkr_num_slots = na->tx_rings[i].nkr_num_slots; 1851 hwna->rx_rings[i].ring = na->tx_rings[i].ring; 1852 } 1853 } 1854 1855 if (hwna->ifp) { 1856 error = hwna->nm_register(hwna, onoff); 1857 if (error) 1858 return error; 1859 } 1860 1861 bdg_netmap_reg(na, onoff); 1862 1863 if (onoff) { 1864 bna->save_notify = hwna->nm_notify; 1865 hwna->nm_notify = netmap_bwrap_intr_notify; 1866 } else { 1867 hwna->nm_notify = bna->save_notify; 1868 hwna->na_lut = NULL; 1869 hwna->na_lut_objtotal = 0; 1870 } 1871 1872 return 0; 1873 } 1874 1875 1876 static int 1877 netmap_bwrap_config(struct netmap_adapter *na, u_int *txr, u_int *txd, 1878 u_int *rxr, u_int *rxd) 1879 { 1880 struct netmap_bwrap_adapter *bna = 1881 (struct netmap_bwrap_adapter *)na; 1882 struct netmap_adapter *hwna = bna->hwna; 1883 1884 /* forward the request */ 1885 netmap_update_config(hwna); 1886 /* swap the results */ 1887 *txr = hwna->num_rx_rings; 1888 *txd = hwna->num_rx_desc; 1889 *rxr = hwna->num_tx_rings; 1890 *rxd = hwna->num_rx_desc; 1891 1892 return 0; 1893 } 1894 1895 1896 static int 1897 netmap_bwrap_krings_create(struct netmap_adapter *na) 1898 { 1899 struct netmap_bwrap_adapter *bna = 1900 (struct netmap_bwrap_adapter *)na; 1901 struct netmap_adapter *hwna = bna->hwna; 1902 struct netmap_adapter *hostna = &bna->host.up; 1903 int error; 1904 1905 ND("%s", NM_IFPNAME(na->ifp)); 1906 1907 error = netmap_vp_krings_create(na); 1908 if (error) 1909 return error; 1910 1911 error = hwna->nm_krings_create(hwna); 1912 if (error) { 1913 netmap_vp_krings_delete(na); 1914 return error; 1915 } 1916 1917 hostna->tx_rings = na->tx_rings + na->num_tx_rings; 1918 hostna->rx_rings = na->rx_rings + na->num_rx_rings; 1919 1920 return 0; 1921 } 1922 1923 1924 static void 1925 netmap_bwrap_krings_delete(struct netmap_adapter *na) 1926 { 1927 struct netmap_bwrap_adapter *bna = 1928 (struct netmap_bwrap_adapter *)na; 1929 struct netmap_adapter *hwna = bna->hwna; 1930 1931 ND("%s", NM_IFPNAME(na->ifp)); 1932 1933 hwna->nm_krings_delete(hwna); 1934 netmap_vp_krings_delete(na); 1935 } 1936 1937 1938 /* notify method for the bridge-->hwna direction */ 1939 static int 1940 netmap_bwrap_notify(struct netmap_adapter *na, u_int ring_n, enum txrx tx, int flags) 1941 { 1942 struct netmap_bwrap_adapter *bna = 1943 (struct netmap_bwrap_adapter *)na; 1944 struct netmap_adapter *hwna = bna->hwna; 1945 struct netmap_kring *kring, *hw_kring; 1946 struct netmap_ring *ring; 1947 u_int lim; 1948 int error = 0; 1949 1950 if (tx == NR_TX) 1951 return EINVAL; 1952 1953 kring = &na->rx_rings[ring_n]; 1954 hw_kring = &hwna->tx_rings[ring_n]; 1955 ring = kring->ring; 1956 lim = kring->nkr_num_slots - 1; 1957 1958 if (hwna->ifp == NULL || !(hwna->ifp->if_capenable & IFCAP_NETMAP)) 1959 return 0; 1960 /* first step: simulate a user wakeup on the rx ring */ 1961 netmap_vp_rxsync(na, ring_n, flags); 1962 ND("%s[%d] PRE rx(c%3d t%3d l%3d) ring(h%3d c%3d t%3d) tx(c%3d ht%3d t%3d)", 1963 NM_IFPNAME(na->ifp), ring_n, 1964 kring->nr_hwcur, kring->nr_hwtail, kring->nkr_hwlease, 1965 ring->head, ring->cur, ring->tail, 1966 hw_kring->nr_hwcur, hw_kring->nr_hwtail, hw_ring->rtail); 1967 /* second step: the simulated user consumes all new packets */ 1968 ring->head = ring->cur = ring->tail; 1969 1970 /* third step: the new packets are sent on the tx ring 1971 * (which is actually the same ring) 1972 */ 1973 /* set tail to what the hw expects */ 1974 ring->tail = hw_kring->rtail; 1975 if (ring_n == na->num_rx_rings) { 1976 netmap_txsync_to_host(hwna); 1977 } else { 1978 nm_txsync_prologue(&hwna->tx_rings[ring_n]); // XXX error checking ? 1979 error = hwna->nm_txsync(hwna, ring_n, flags); 1980 } 1981 1982 /* fourth step: now we are back the rx ring */ 1983 /* claim ownership on all hw owned bufs */ 1984 ring->head = nm_next(ring->tail, lim); /* skip past reserved slot */ 1985 ring->tail = kring->rtail; /* restore saved value of tail, for safety */ 1986 1987 /* fifth step: the user goes to sleep again, causing another rxsync */ 1988 netmap_vp_rxsync(na, ring_n, flags); 1989 ND("%s[%d] PST rx(c%3d t%3d l%3d) ring(h%3d c%3d t%3d) tx(c%3d ht%3d t%3d)", 1990 NM_IFPNAME(na->ifp), ring_n, 1991 kring->nr_hwcur, kring->nr_hwtail, kring->nkr_hwlease, 1992 ring->head, ring->cur, ring->tail, 1993 hw_kring->nr_hwcur, hw_kring->nr_hwtail, hw_kring->rtail); 1994 1995 return error; 1996 } 1997 1998 1999 static int 2000 netmap_bwrap_host_notify(struct netmap_adapter *na, u_int ring_n, enum txrx tx, int flags) 2001 { 2002 struct netmap_bwrap_adapter *bna = na->na_private; 2003 struct netmap_adapter *port_na = &bna->up.up; 2004 if (tx == NR_TX || ring_n != 0) 2005 return EINVAL; 2006 return netmap_bwrap_notify(port_na, port_na->num_rx_rings, NR_RX, flags); 2007 } 2008 2009 2010 /* attach a bridge wrapper to the 'real' device */ 2011 static int 2012 netmap_bwrap_attach(struct ifnet *fake, struct ifnet *real) 2013 { 2014 struct netmap_bwrap_adapter *bna; 2015 struct netmap_adapter *na; 2016 struct netmap_adapter *hwna = NA(real); 2017 struct netmap_adapter *hostna; 2018 int error; 2019 2020 2021 bna = malloc(sizeof(*bna), M_DEVBUF, M_NOWAIT | M_ZERO); 2022 if (bna == NULL) 2023 return ENOMEM; 2024 2025 na = &bna->up.up; 2026 na->ifp = fake; 2027 /* fill the ring data for the bwrap adapter with rx/tx meanings 2028 * swapped. The real cross-linking will be done during register, 2029 * when all the krings will have been created. 2030 */ 2031 na->num_rx_rings = hwna->num_tx_rings; 2032 na->num_tx_rings = hwna->num_rx_rings; 2033 na->num_tx_desc = hwna->num_rx_desc; 2034 na->num_rx_desc = hwna->num_tx_desc; 2035 na->nm_dtor = netmap_bwrap_dtor; 2036 na->nm_register = netmap_bwrap_register; 2037 // na->nm_txsync = netmap_bwrap_txsync; 2038 // na->nm_rxsync = netmap_bwrap_rxsync; 2039 na->nm_config = netmap_bwrap_config; 2040 na->nm_krings_create = netmap_bwrap_krings_create; 2041 na->nm_krings_delete = netmap_bwrap_krings_delete; 2042 na->nm_notify = netmap_bwrap_notify; 2043 na->nm_mem = hwna->nm_mem; 2044 na->na_private = na; /* prevent NIOCREGIF */ 2045 bna->up.retry = 1; /* XXX maybe this should depend on the hwna */ 2046 2047 bna->hwna = hwna; 2048 netmap_adapter_get(hwna); 2049 hwna->na_private = bna; /* weak reference */ 2050 2051 hostna = &bna->host.up; 2052 hostna->ifp = hwna->ifp; 2053 hostna->num_tx_rings = 1; 2054 hostna->num_tx_desc = hwna->num_rx_desc; 2055 hostna->num_rx_rings = 1; 2056 hostna->num_rx_desc = hwna->num_tx_desc; 2057 // hostna->nm_txsync = netmap_bwrap_host_txsync; 2058 // hostna->nm_rxsync = netmap_bwrap_host_rxsync; 2059 hostna->nm_notify = netmap_bwrap_host_notify; 2060 hostna->nm_mem = na->nm_mem; 2061 hostna->na_private = bna; 2062 2063 ND("%s<->%s txr %d txd %d rxr %d rxd %d", 2064 fake->if_xname, real->if_xname, 2065 na->num_tx_rings, na->num_tx_desc, 2066 na->num_rx_rings, na->num_rx_desc); 2067 2068 error = netmap_attach_common(na); 2069 if (error) { 2070 netmap_adapter_put(hwna); 2071 free(bna, M_DEVBUF); 2072 return error; 2073 } 2074 return 0; 2075 } 2076 2077 2078 void 2079 netmap_init_bridges(void) 2080 { 2081 int i; 2082 bzero(nm_bridges, sizeof(struct nm_bridge) * NM_BRIDGES); /* safety */ 2083 for (i = 0; i < NM_BRIDGES; i++) 2084 BDG_RWINIT(&nm_bridges[i]); 2085 } 2086 #endif /* WITH_VALE */ 2087