1 /* 2 * Copyright (C) 2013-2014 Universita` di Pisa. All rights reserved. 3 * 4 * Redistribution and use in source and binary forms, with or without 5 * modification, are permitted provided that the following conditions 6 * are met: 7 * 1. Redistributions of source code must retain the above copyright 8 * notice, this list of conditions and the following disclaimer. 9 * 2. Redistributions in binary form must reproduce the above copyright 10 * notice, this list of conditions and the following disclaimer in the 11 * documentation and/or other materials provided with the distribution. 12 * 13 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 14 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 15 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 16 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 17 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 18 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 19 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 20 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 21 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 22 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 23 * SUCH DAMAGE. 24 */ 25 26 27 /* 28 * This module implements the VALE switch for netmap 29 30 --- VALE SWITCH --- 31 32 NMG_LOCK() serializes all modifications to switches and ports. 33 A switch cannot be deleted until all ports are gone. 34 35 For each switch, an SX lock (RWlock on linux) protects 36 deletion of ports. When configuring or deleting a new port, the 37 lock is acquired in exclusive mode (after holding NMG_LOCK). 38 When forwarding, the lock is acquired in shared mode (without NMG_LOCK). 39 The lock is held throughout the entire forwarding cycle, 40 during which the thread may incur in a page fault. 41 Hence it is important that sleepable shared locks are used. 42 43 On the rx ring, the per-port lock is grabbed initially to reserve 44 a number of slot in the ring, then the lock is released, 45 packets are copied from source to destination, and then 46 the lock is acquired again and the receive ring is updated. 47 (A similar thing is done on the tx ring for NIC and host stack 48 ports attached to the switch) 49 50 */ 51 52 /* 53 * OS-specific code that is used only within this file. 54 * Other OS-specific code that must be accessed by drivers 55 * is present in netmap_kern.h 56 */ 57 58 #if defined(__FreeBSD__) 59 #include <sys/cdefs.h> /* prerequisite */ 60 __FBSDID("$FreeBSD$"); 61 62 #include <sys/types.h> 63 #include <sys/errno.h> 64 #include <sys/param.h> /* defines used in kernel.h */ 65 #include <sys/kernel.h> /* types used in module initialization */ 66 #include <sys/conf.h> /* cdevsw struct, UID, GID */ 67 #include <sys/sockio.h> 68 #include <sys/socketvar.h> /* struct socket */ 69 #include <sys/malloc.h> 70 #include <sys/poll.h> 71 #include <sys/rwlock.h> 72 #include <sys/socket.h> /* sockaddrs */ 73 #include <sys/selinfo.h> 74 #include <sys/sysctl.h> 75 #include <net/if.h> 76 #include <net/if_var.h> 77 #include <net/bpf.h> /* BIOCIMMEDIATE */ 78 #include <machine/bus.h> /* bus_dmamap_* */ 79 #include <sys/endian.h> 80 #include <sys/refcount.h> 81 82 83 #define BDG_RWLOCK_T struct rwlock // struct rwlock 84 85 #define BDG_RWINIT(b) \ 86 rw_init_flags(&(b)->bdg_lock, "bdg lock", RW_NOWITNESS) 87 #define BDG_WLOCK(b) rw_wlock(&(b)->bdg_lock) 88 #define BDG_WUNLOCK(b) rw_wunlock(&(b)->bdg_lock) 89 #define BDG_RLOCK(b) rw_rlock(&(b)->bdg_lock) 90 #define BDG_RTRYLOCK(b) rw_try_rlock(&(b)->bdg_lock) 91 #define BDG_RUNLOCK(b) rw_runlock(&(b)->bdg_lock) 92 #define BDG_RWDESTROY(b) rw_destroy(&(b)->bdg_lock) 93 94 95 #elif defined(linux) 96 97 #include "bsd_glue.h" 98 99 #elif defined(__APPLE__) 100 101 #warning OSX support is only partial 102 #include "osx_glue.h" 103 104 #else 105 106 #error Unsupported platform 107 108 #endif /* unsupported */ 109 110 /* 111 * common headers 112 */ 113 114 #include <net/netmap.h> 115 #include <dev/netmap/netmap_kern.h> 116 #include <dev/netmap/netmap_mem2.h> 117 118 #ifdef WITH_VALE 119 120 /* 121 * system parameters (most of them in netmap_kern.h) 122 * NM_NAME prefix for switch port names, default "vale" 123 * NM_BDG_MAXPORTS number of ports 124 * NM_BRIDGES max number of switches in the system. 125 * XXX should become a sysctl or tunable 126 * 127 * Switch ports are named valeX:Y where X is the switch name and Y 128 * is the port. If Y matches a physical interface name, the port is 129 * connected to a physical device. 130 * 131 * Unlike physical interfaces, switch ports use their own memory region 132 * for rings and buffers. 133 * The virtual interfaces use per-queue lock instead of core lock. 134 * In the tx loop, we aggregate traffic in batches to make all operations 135 * faster. The batch size is bridge_batch. 136 */ 137 #define NM_BDG_MAXRINGS 16 /* XXX unclear how many. */ 138 #define NM_BDG_MAXSLOTS 4096 /* XXX same as above */ 139 #define NM_BRIDGE_RINGSIZE 1024 /* in the device */ 140 #define NM_BDG_HASH 1024 /* forwarding table entries */ 141 #define NM_BDG_BATCH 1024 /* entries in the forwarding buffer */ 142 #define NM_MULTISEG 64 /* max size of a chain of bufs */ 143 /* actual size of the tables */ 144 #define NM_BDG_BATCH_MAX (NM_BDG_BATCH + NM_MULTISEG) 145 /* NM_FT_NULL terminates a list of slots in the ft */ 146 #define NM_FT_NULL NM_BDG_BATCH_MAX 147 #define NM_BRIDGES 8 /* number of bridges */ 148 149 150 /* 151 * bridge_batch is set via sysctl to the max batch size to be 152 * used in the bridge. The actual value may be larger as the 153 * last packet in the block may overflow the size. 154 */ 155 int bridge_batch = NM_BDG_BATCH; /* bridge batch size */ 156 SYSCTL_DECL(_dev_netmap); 157 SYSCTL_INT(_dev_netmap, OID_AUTO, bridge_batch, CTLFLAG_RW, &bridge_batch, 0 , ""); 158 159 160 static int bdg_netmap_attach(struct nmreq *nmr, struct ifnet *ifp); 161 static int bdg_netmap_reg(struct netmap_adapter *na, int onoff); 162 static int netmap_bwrap_attach(struct ifnet *, struct ifnet *); 163 static int netmap_bwrap_register(struct netmap_adapter *, int onoff); 164 int kern_netmap_regif(struct nmreq *nmr); 165 166 /* 167 * Each transmit queue accumulates a batch of packets into 168 * a structure before forwarding. Packets to the same 169 * destination are put in a list using ft_next as a link field. 170 * ft_frags and ft_next are valid only on the first fragment. 171 */ 172 struct nm_bdg_fwd { /* forwarding entry for a bridge */ 173 void *ft_buf; /* netmap or indirect buffer */ 174 uint8_t ft_frags; /* how many fragments (only on 1st frag) */ 175 uint8_t _ft_port; /* dst port (unused) */ 176 uint16_t ft_flags; /* flags, e.g. indirect */ 177 uint16_t ft_len; /* src fragment len */ 178 uint16_t ft_next; /* next packet to same destination */ 179 }; 180 181 /* 182 * For each output interface, nm_bdg_q is used to construct a list. 183 * bq_len is the number of output buffers (we can have coalescing 184 * during the copy). 185 */ 186 struct nm_bdg_q { 187 uint16_t bq_head; 188 uint16_t bq_tail; 189 uint32_t bq_len; /* number of buffers */ 190 }; 191 192 /* XXX revise this */ 193 struct nm_hash_ent { 194 uint64_t mac; /* the top 2 bytes are the epoch */ 195 uint64_t ports; 196 }; 197 198 /* 199 * nm_bridge is a descriptor for a VALE switch. 200 * Interfaces for a bridge are all in bdg_ports[]. 201 * The array has fixed size, an empty entry does not terminate 202 * the search, but lookups only occur on attach/detach so we 203 * don't mind if they are slow. 204 * 205 * The bridge is non blocking on the transmit ports: excess 206 * packets are dropped if there is no room on the output port. 207 * 208 * bdg_lock protects accesses to the bdg_ports array. 209 * This is a rw lock (or equivalent). 210 */ 211 struct nm_bridge { 212 /* XXX what is the proper alignment/layout ? */ 213 BDG_RWLOCK_T bdg_lock; /* protects bdg_ports */ 214 int bdg_namelen; 215 uint32_t bdg_active_ports; /* 0 means free */ 216 char bdg_basename[IFNAMSIZ]; 217 218 /* Indexes of active ports (up to active_ports) 219 * and all other remaining ports. 220 */ 221 uint8_t bdg_port_index[NM_BDG_MAXPORTS]; 222 223 struct netmap_vp_adapter *bdg_ports[NM_BDG_MAXPORTS]; 224 225 226 /* 227 * The function to decide the destination port. 228 * It returns either of an index of the destination port, 229 * NM_BDG_BROADCAST to broadcast this packet, or NM_BDG_NOPORT not to 230 * forward this packet. ring_nr is the source ring index, and the 231 * function may overwrite this value to forward this packet to a 232 * different ring index. 233 * This function must be set by netmap_bdgctl(). 234 */ 235 bdg_lookup_fn_t nm_bdg_lookup; 236 237 /* the forwarding table, MAC+ports. 238 * XXX should be changed to an argument to be passed to 239 * the lookup function, and allocated on attach 240 */ 241 struct nm_hash_ent ht[NM_BDG_HASH]; 242 }; 243 244 245 /* 246 * XXX in principle nm_bridges could be created dynamically 247 * Right now we have a static array and deletions are protected 248 * by an exclusive lock. 249 */ 250 struct nm_bridge nm_bridges[NM_BRIDGES]; 251 252 253 /* 254 * this is a slightly optimized copy routine which rounds 255 * to multiple of 64 bytes and is often faster than dealing 256 * with other odd sizes. We assume there is enough room 257 * in the source and destination buffers. 258 * 259 * XXX only for multiples of 64 bytes, non overlapped. 260 */ 261 static inline void 262 pkt_copy(void *_src, void *_dst, int l) 263 { 264 uint64_t *src = _src; 265 uint64_t *dst = _dst; 266 if (unlikely(l >= 1024)) { 267 memcpy(dst, src, l); 268 return; 269 } 270 for (; likely(l > 0); l-=64) { 271 *dst++ = *src++; 272 *dst++ = *src++; 273 *dst++ = *src++; 274 *dst++ = *src++; 275 *dst++ = *src++; 276 *dst++ = *src++; 277 *dst++ = *src++; 278 *dst++ = *src++; 279 } 280 } 281 282 283 /* 284 * locate a bridge among the existing ones. 285 * MUST BE CALLED WITH NMG_LOCK() 286 * 287 * a ':' in the name terminates the bridge name. Otherwise, just NM_NAME. 288 * We assume that this is called with a name of at least NM_NAME chars. 289 */ 290 static struct nm_bridge * 291 nm_find_bridge(const char *name, int create) 292 { 293 int i, l, namelen; 294 struct nm_bridge *b = NULL; 295 296 NMG_LOCK_ASSERT(); 297 298 namelen = strlen(NM_NAME); /* base length */ 299 l = name ? strlen(name) : 0; /* actual length */ 300 if (l < namelen) { 301 D("invalid bridge name %s", name ? name : NULL); 302 return NULL; 303 } 304 for (i = namelen + 1; i < l; i++) { 305 if (name[i] == ':') { 306 namelen = i; 307 break; 308 } 309 } 310 if (namelen >= IFNAMSIZ) 311 namelen = IFNAMSIZ; 312 ND("--- prefix is '%.*s' ---", namelen, name); 313 314 /* lookup the name, remember empty slot if there is one */ 315 for (i = 0; i < NM_BRIDGES; i++) { 316 struct nm_bridge *x = nm_bridges + i; 317 318 if (x->bdg_active_ports == 0) { 319 if (create && b == NULL) 320 b = x; /* record empty slot */ 321 } else if (x->bdg_namelen != namelen) { 322 continue; 323 } else if (strncmp(name, x->bdg_basename, namelen) == 0) { 324 ND("found '%.*s' at %d", namelen, name, i); 325 b = x; 326 break; 327 } 328 } 329 if (i == NM_BRIDGES && b) { /* name not found, can create entry */ 330 /* initialize the bridge */ 331 strncpy(b->bdg_basename, name, namelen); 332 ND("create new bridge %s with ports %d", b->bdg_basename, 333 b->bdg_active_ports); 334 b->bdg_namelen = namelen; 335 b->bdg_active_ports = 0; 336 for (i = 0; i < NM_BDG_MAXPORTS; i++) 337 b->bdg_port_index[i] = i; 338 /* set the default function */ 339 b->nm_bdg_lookup = netmap_bdg_learning; 340 /* reset the MAC address table */ 341 bzero(b->ht, sizeof(struct nm_hash_ent) * NM_BDG_HASH); 342 } 343 return b; 344 } 345 346 347 /* 348 * Free the forwarding tables for rings attached to switch ports. 349 */ 350 static void 351 nm_free_bdgfwd(struct netmap_adapter *na) 352 { 353 int nrings, i; 354 struct netmap_kring *kring; 355 356 NMG_LOCK_ASSERT(); 357 nrings = na->num_tx_rings; 358 kring = na->tx_rings; 359 for (i = 0; i < nrings; i++) { 360 if (kring[i].nkr_ft) { 361 free(kring[i].nkr_ft, M_DEVBUF); 362 kring[i].nkr_ft = NULL; /* protect from freeing twice */ 363 } 364 } 365 } 366 367 368 /* 369 * Allocate the forwarding tables for the rings attached to the bridge ports. 370 */ 371 static int 372 nm_alloc_bdgfwd(struct netmap_adapter *na) 373 { 374 int nrings, l, i, num_dstq; 375 struct netmap_kring *kring; 376 377 NMG_LOCK_ASSERT(); 378 /* all port:rings + broadcast */ 379 num_dstq = NM_BDG_MAXPORTS * NM_BDG_MAXRINGS + 1; 380 l = sizeof(struct nm_bdg_fwd) * NM_BDG_BATCH_MAX; 381 l += sizeof(struct nm_bdg_q) * num_dstq; 382 l += sizeof(uint16_t) * NM_BDG_BATCH_MAX; 383 384 nrings = na->num_tx_rings + 1; 385 kring = na->tx_rings; 386 for (i = 0; i < nrings; i++) { 387 struct nm_bdg_fwd *ft; 388 struct nm_bdg_q *dstq; 389 int j; 390 391 ft = malloc(l, M_DEVBUF, M_NOWAIT | M_ZERO); 392 if (!ft) { 393 nm_free_bdgfwd(na); 394 return ENOMEM; 395 } 396 dstq = (struct nm_bdg_q *)(ft + NM_BDG_BATCH_MAX); 397 for (j = 0; j < num_dstq; j++) { 398 dstq[j].bq_head = dstq[j].bq_tail = NM_FT_NULL; 399 dstq[j].bq_len = 0; 400 } 401 kring[i].nkr_ft = ft; 402 } 403 return 0; 404 } 405 406 407 static void 408 netmap_bdg_detach_common(struct nm_bridge *b, int hw, int sw) 409 { 410 int s_hw = hw, s_sw = sw; 411 int i, lim =b->bdg_active_ports; 412 uint8_t tmp[NM_BDG_MAXPORTS]; 413 414 /* 415 New algorithm: 416 make a copy of bdg_port_index; 417 lookup NA(ifp)->bdg_port and SWNA(ifp)->bdg_port 418 in the array of bdg_port_index, replacing them with 419 entries from the bottom of the array; 420 decrement bdg_active_ports; 421 acquire BDG_WLOCK() and copy back the array. 422 */ 423 424 D("detach %d and %d (lim %d)", hw, sw, lim); 425 /* make a copy of the list of active ports, update it, 426 * and then copy back within BDG_WLOCK(). 427 */ 428 memcpy(tmp, b->bdg_port_index, sizeof(tmp)); 429 for (i = 0; (hw >= 0 || sw >= 0) && i < lim; ) { 430 if (hw >= 0 && tmp[i] == hw) { 431 ND("detach hw %d at %d", hw, i); 432 lim--; /* point to last active port */ 433 tmp[i] = tmp[lim]; /* swap with i */ 434 tmp[lim] = hw; /* now this is inactive */ 435 hw = -1; 436 } else if (sw >= 0 && tmp[i] == sw) { 437 ND("detach sw %d at %d", sw, i); 438 lim--; 439 tmp[i] = tmp[lim]; 440 tmp[lim] = sw; 441 sw = -1; 442 } else { 443 i++; 444 } 445 } 446 if (hw >= 0 || sw >= 0) { 447 D("XXX delete failed hw %d sw %d, should panic...", hw, sw); 448 } 449 450 BDG_WLOCK(b); 451 b->bdg_ports[s_hw] = NULL; 452 if (s_sw >= 0) { 453 b->bdg_ports[s_sw] = NULL; 454 } 455 memcpy(b->bdg_port_index, tmp, sizeof(tmp)); 456 b->bdg_active_ports = lim; 457 BDG_WUNLOCK(b); 458 459 ND("now %d active ports", lim); 460 if (lim == 0) { 461 ND("marking bridge %s as free", b->bdg_basename); 462 b->nm_bdg_lookup = NULL; 463 } 464 } 465 466 467 static void 468 netmap_adapter_vp_dtor(struct netmap_adapter *na) 469 { 470 struct netmap_vp_adapter *vpna = (struct netmap_vp_adapter*)na; 471 struct nm_bridge *b = vpna->na_bdg; 472 struct ifnet *ifp = na->ifp; 473 474 ND("%s has %d references", NM_IFPNAME(ifp), na->na_refcount); 475 476 if (b) { 477 netmap_bdg_detach_common(b, vpna->bdg_port, -1); 478 } 479 480 bzero(ifp, sizeof(*ifp)); 481 free(ifp, M_DEVBUF); 482 na->ifp = NULL; 483 } 484 485 486 /* Try to get a reference to a netmap adapter attached to a VALE switch. 487 * If the adapter is found (or is created), this function returns 0, a 488 * non NULL pointer is returned into *na, and the caller holds a 489 * reference to the adapter. 490 * If an adapter is not found, then no reference is grabbed and the 491 * function returns an error code, or 0 if there is just a VALE prefix 492 * mismatch. Therefore the caller holds a reference when 493 * (*na != NULL && return == 0). 494 */ 495 int 496 netmap_get_bdg_na(struct nmreq *nmr, struct netmap_adapter **na, int create) 497 { 498 const char *name = nmr->nr_name; 499 struct ifnet *ifp; 500 int error = 0; 501 struct netmap_adapter *ret; 502 struct netmap_vp_adapter *vpna; 503 struct nm_bridge *b; 504 int i, j, cand = -1, cand2 = -1; 505 int needed; 506 507 *na = NULL; /* default return value */ 508 509 /* first try to see if this is a bridge port. */ 510 NMG_LOCK_ASSERT(); 511 if (strncmp(name, NM_NAME, sizeof(NM_NAME) - 1)) { 512 return 0; /* no error, but no VALE prefix */ 513 } 514 515 b = nm_find_bridge(name, create); 516 if (b == NULL) { 517 D("no bridges available for '%s'", name); 518 return (ENXIO); 519 } 520 521 /* Now we are sure that name starts with the bridge's name, 522 * lookup the port in the bridge. We need to scan the entire 523 * list. It is not important to hold a WLOCK on the bridge 524 * during the search because NMG_LOCK already guarantees 525 * that there are no other possible writers. 526 */ 527 528 /* lookup in the local list of ports */ 529 for (j = 0; j < b->bdg_active_ports; j++) { 530 i = b->bdg_port_index[j]; 531 vpna = b->bdg_ports[i]; 532 // KASSERT(na != NULL); 533 ifp = vpna->up.ifp; 534 /* XXX make sure the name only contains one : */ 535 if (!strcmp(NM_IFPNAME(ifp), name)) { 536 netmap_adapter_get(&vpna->up); 537 ND("found existing if %s refs %d", name, 538 vpna->na_bdg_refcount); 539 *na = (struct netmap_adapter *)vpna; 540 return 0; 541 } 542 } 543 /* not found, should we create it? */ 544 if (!create) 545 return ENXIO; 546 /* yes we should, see if we have space to attach entries */ 547 needed = 2; /* in some cases we only need 1 */ 548 if (b->bdg_active_ports + needed >= NM_BDG_MAXPORTS) { 549 D("bridge full %d, cannot create new port", b->bdg_active_ports); 550 return EINVAL; 551 } 552 /* record the next two ports available, but do not allocate yet */ 553 cand = b->bdg_port_index[b->bdg_active_ports]; 554 cand2 = b->bdg_port_index[b->bdg_active_ports + 1]; 555 ND("+++ bridge %s port %s used %d avail %d %d", 556 b->bdg_basename, name, b->bdg_active_ports, cand, cand2); 557 558 /* 559 * try see if there is a matching NIC with this name 560 * (after the bridge's name) 561 */ 562 ifp = ifunit_ref(name + b->bdg_namelen + 1); 563 if (!ifp) { /* this is a virtual port */ 564 if (nmr->nr_cmd) { 565 /* nr_cmd must be 0 for a virtual port */ 566 return EINVAL; 567 } 568 569 /* create a struct ifnet for the new port. 570 * need M_NOWAIT as we are under nma_lock 571 */ 572 ifp = malloc(sizeof(*ifp), M_DEVBUF, M_NOWAIT | M_ZERO); 573 if (!ifp) 574 return ENOMEM; 575 576 strcpy(ifp->if_xname, name); 577 /* bdg_netmap_attach creates a struct netmap_adapter */ 578 error = bdg_netmap_attach(nmr, ifp); 579 if (error) { 580 D("error %d", error); 581 free(ifp, M_DEVBUF); 582 return error; 583 } 584 ret = NA(ifp); 585 cand2 = -1; /* only need one port */ 586 } else { /* this is a NIC */ 587 struct ifnet *fake_ifp; 588 589 error = netmap_get_hw_na(ifp, &ret); 590 if (error || ret == NULL) 591 goto out; 592 593 /* make sure the NIC is not already in use */ 594 if (NETMAP_OWNED_BY_ANY(ret)) { 595 D("NIC %s busy, cannot attach to bridge", 596 NM_IFPNAME(ifp)); 597 error = EINVAL; 598 goto out; 599 } 600 /* create a fake interface */ 601 fake_ifp = malloc(sizeof(*ifp), M_DEVBUF, M_NOWAIT | M_ZERO); 602 if (!fake_ifp) { 603 error = ENOMEM; 604 goto out; 605 } 606 strcpy(fake_ifp->if_xname, name); 607 error = netmap_bwrap_attach(fake_ifp, ifp); 608 if (error) { 609 free(fake_ifp, M_DEVBUF); 610 goto out; 611 } 612 ret = NA(fake_ifp); 613 if (nmr->nr_arg1 != NETMAP_BDG_HOST) 614 cand2 = -1; /* only need one port */ 615 if_rele(ifp); 616 } 617 vpna = (struct netmap_vp_adapter *)ret; 618 619 BDG_WLOCK(b); 620 vpna->bdg_port = cand; 621 ND("NIC %p to bridge port %d", vpna, cand); 622 /* bind the port to the bridge (virtual ports are not active) */ 623 b->bdg_ports[cand] = vpna; 624 vpna->na_bdg = b; 625 b->bdg_active_ports++; 626 if (cand2 >= 0) { 627 struct netmap_vp_adapter *hostna = vpna + 1; 628 /* also bind the host stack to the bridge */ 629 b->bdg_ports[cand2] = hostna; 630 hostna->bdg_port = cand2; 631 hostna->na_bdg = b; 632 b->bdg_active_ports++; 633 ND("host %p to bridge port %d", hostna, cand2); 634 } 635 ND("if %s refs %d", name, vpna->up.na_refcount); 636 BDG_WUNLOCK(b); 637 *na = ret; 638 netmap_adapter_get(ret); 639 return 0; 640 641 out: 642 if_rele(ifp); 643 644 return error; 645 } 646 647 648 /* Process NETMAP_BDG_ATTACH and NETMAP_BDG_DETACH */ 649 static int 650 nm_bdg_attach(struct nmreq *nmr) 651 { 652 struct netmap_adapter *na; 653 struct netmap_if *nifp; 654 struct netmap_priv_d *npriv; 655 struct netmap_bwrap_adapter *bna; 656 int error; 657 658 npriv = malloc(sizeof(*npriv), M_DEVBUF, M_NOWAIT|M_ZERO); 659 if (npriv == NULL) 660 return ENOMEM; 661 NMG_LOCK(); 662 /* XXX probably netmap_get_bdg_na() */ 663 error = netmap_get_bdg_na(nmr, &na, 1 /* create if not exists */); 664 if (error) /* no device, or another bridge or user owns the device */ 665 goto unlock_exit; 666 if (na == NULL) { /* VALE prefix missing */ 667 error = EINVAL; 668 goto unlock_exit; 669 } 670 671 if (na->active_fds > 0) { /* already registered */ 672 error = EBUSY; 673 goto unref_exit; 674 } 675 676 nifp = netmap_do_regif(npriv, na, nmr->nr_ringid, &error); 677 if (!nifp) { 678 goto unref_exit; 679 } 680 681 bna = (struct netmap_bwrap_adapter*)na; 682 bna->na_kpriv = npriv; 683 NMG_UNLOCK(); 684 ND("registered %s to netmap-mode", NM_IFPNAME(na->ifp)); 685 return 0; 686 687 unref_exit: 688 netmap_adapter_put(na); 689 unlock_exit: 690 NMG_UNLOCK(); 691 bzero(npriv, sizeof(*npriv)); 692 free(npriv, M_DEVBUF); 693 return error; 694 } 695 696 697 static int 698 nm_bdg_detach(struct nmreq *nmr) 699 { 700 struct netmap_adapter *na; 701 int error; 702 struct netmap_bwrap_adapter *bna; 703 int last_instance; 704 705 NMG_LOCK(); 706 error = netmap_get_bdg_na(nmr, &na, 0 /* don't create */); 707 if (error) { /* no device, or another bridge or user owns the device */ 708 goto unlock_exit; 709 } 710 if (na == NULL) { /* VALE prefix missing */ 711 error = EINVAL; 712 goto unlock_exit; 713 } 714 715 bna = (struct netmap_bwrap_adapter *)na; 716 717 if (na->active_fds == 0) { /* not registered */ 718 error = EINVAL; 719 goto unref_exit; 720 } 721 722 last_instance = netmap_dtor_locked(bna->na_kpriv); /* unregister */ 723 if (!last_instance) { 724 D("--- error, trying to detach an entry with active mmaps"); 725 error = EINVAL; 726 } else { 727 struct netmap_priv_d *npriv = bna->na_kpriv; 728 729 bna->na_kpriv = NULL; 730 D("deleting priv"); 731 732 bzero(npriv, sizeof(*npriv)); 733 free(npriv, M_DEVBUF); 734 } 735 736 unref_exit: 737 netmap_adapter_put(na); 738 unlock_exit: 739 NMG_UNLOCK(); 740 return error; 741 742 } 743 744 745 /* exported to kernel callers, e.g. OVS ? 746 * Entry point. 747 * Called without NMG_LOCK. 748 */ 749 int 750 netmap_bdg_ctl(struct nmreq *nmr, bdg_lookup_fn_t func) 751 { 752 struct nm_bridge *b; 753 struct netmap_adapter *na; 754 struct netmap_vp_adapter *vpna; 755 struct ifnet *iter; 756 char *name = nmr->nr_name; 757 int cmd = nmr->nr_cmd, namelen = strlen(name); 758 int error = 0, i, j; 759 760 switch (cmd) { 761 case NETMAP_BDG_ATTACH: 762 error = nm_bdg_attach(nmr); 763 break; 764 765 case NETMAP_BDG_DETACH: 766 error = nm_bdg_detach(nmr); 767 break; 768 769 case NETMAP_BDG_LIST: 770 /* this is used to enumerate bridges and ports */ 771 if (namelen) { /* look up indexes of bridge and port */ 772 if (strncmp(name, NM_NAME, strlen(NM_NAME))) { 773 error = EINVAL; 774 break; 775 } 776 NMG_LOCK(); 777 b = nm_find_bridge(name, 0 /* don't create */); 778 if (!b) { 779 error = ENOENT; 780 NMG_UNLOCK(); 781 break; 782 } 783 784 error = ENOENT; 785 for (j = 0; j < b->bdg_active_ports; j++) { 786 i = b->bdg_port_index[j]; 787 vpna = b->bdg_ports[i]; 788 if (vpna == NULL) { 789 D("---AAAAAAAAARGH-------"); 790 continue; 791 } 792 iter = vpna->up.ifp; 793 /* the former and the latter identify a 794 * virtual port and a NIC, respectively 795 */ 796 if (!strcmp(iter->if_xname, name)) { 797 /* bridge index */ 798 nmr->nr_arg1 = b - nm_bridges; 799 nmr->nr_arg2 = i; /* port index */ 800 error = 0; 801 break; 802 } 803 } 804 NMG_UNLOCK(); 805 } else { 806 /* return the first non-empty entry starting from 807 * bridge nr_arg1 and port nr_arg2. 808 * 809 * Users can detect the end of the same bridge by 810 * seeing the new and old value of nr_arg1, and can 811 * detect the end of all the bridge by error != 0 812 */ 813 i = nmr->nr_arg1; 814 j = nmr->nr_arg2; 815 816 NMG_LOCK(); 817 for (error = ENOENT; i < NM_BRIDGES; i++) { 818 b = nm_bridges + i; 819 if (j >= b->bdg_active_ports) { 820 j = 0; /* following bridges scan from 0 */ 821 continue; 822 } 823 nmr->nr_arg1 = i; 824 nmr->nr_arg2 = j; 825 j = b->bdg_port_index[j]; 826 vpna = b->bdg_ports[j]; 827 iter = vpna->up.ifp; 828 strncpy(name, iter->if_xname, (size_t)IFNAMSIZ); 829 error = 0; 830 break; 831 } 832 NMG_UNLOCK(); 833 } 834 break; 835 836 case NETMAP_BDG_LOOKUP_REG: 837 /* register a lookup function to the given bridge. 838 * nmr->nr_name may be just bridge's name (including ':' 839 * if it is not just NM_NAME). 840 */ 841 if (!func) { 842 error = EINVAL; 843 break; 844 } 845 NMG_LOCK(); 846 b = nm_find_bridge(name, 0 /* don't create */); 847 if (!b) { 848 error = EINVAL; 849 } else { 850 b->nm_bdg_lookup = func; 851 } 852 NMG_UNLOCK(); 853 break; 854 855 case NETMAP_BDG_OFFSET: 856 NMG_LOCK(); 857 error = netmap_get_bdg_na(nmr, &na, 0); 858 if (na && !error) { 859 vpna = (struct netmap_vp_adapter *)na; 860 if (nmr->nr_arg1 > NETMAP_BDG_MAX_OFFSET) 861 nmr->nr_arg1 = NETMAP_BDG_MAX_OFFSET; 862 vpna->offset = nmr->nr_arg1; 863 D("Using offset %d for %p", vpna->offset, vpna); 864 netmap_adapter_put(na); 865 } 866 NMG_UNLOCK(); 867 break; 868 869 default: 870 D("invalid cmd (nmr->nr_cmd) (0x%x)", cmd); 871 error = EINVAL; 872 break; 873 } 874 return error; 875 } 876 877 878 static int 879 netmap_vp_krings_create(struct netmap_adapter *na) 880 { 881 u_int ntx, nrx, tailroom; 882 int error, i; 883 uint32_t *leases; 884 885 /* XXX vps do not need host rings, 886 * but we crash if we don't have one 887 */ 888 ntx = na->num_tx_rings + 1; 889 nrx = na->num_rx_rings + 1; 890 891 /* 892 * Leases are attached to RX rings on vale ports 893 */ 894 tailroom = sizeof(uint32_t) * na->num_rx_desc * nrx; 895 896 error = netmap_krings_create(na, ntx, nrx, tailroom); 897 if (error) 898 return error; 899 900 leases = na->tailroom; 901 902 for (i = 0; i < nrx; i++) { /* Receive rings */ 903 na->rx_rings[i].nkr_leases = leases; 904 leases += na->num_rx_desc; 905 } 906 907 error = nm_alloc_bdgfwd(na); 908 if (error) { 909 netmap_krings_delete(na); 910 return error; 911 } 912 913 return 0; 914 } 915 916 917 static void 918 netmap_vp_krings_delete(struct netmap_adapter *na) 919 { 920 nm_free_bdgfwd(na); 921 netmap_krings_delete(na); 922 } 923 924 925 static int 926 nm_bdg_flush(struct nm_bdg_fwd *ft, u_int n, 927 struct netmap_vp_adapter *na, u_int ring_nr); 928 929 930 /* 931 * Grab packets from a kring, move them into the ft structure 932 * associated to the tx (input) port. Max one instance per port, 933 * filtered on input (ioctl, poll or XXX). 934 * Returns the next position in the ring. 935 */ 936 static int 937 nm_bdg_preflush(struct netmap_vp_adapter *na, u_int ring_nr, 938 struct netmap_kring *kring, u_int end) 939 { 940 struct netmap_ring *ring = kring->ring; 941 struct nm_bdg_fwd *ft; 942 u_int j = kring->nr_hwcur, lim = kring->nkr_num_slots - 1; 943 u_int ft_i = 0; /* start from 0 */ 944 u_int frags = 1; /* how many frags ? */ 945 struct nm_bridge *b = na->na_bdg; 946 947 /* To protect against modifications to the bridge we acquire a 948 * shared lock, waiting if we can sleep (if the source port is 949 * attached to a user process) or with a trylock otherwise (NICs). 950 */ 951 ND("wait rlock for %d packets", ((j > end ? lim+1 : 0) + end) - j); 952 if (na->up.na_flags & NAF_BDG_MAYSLEEP) 953 BDG_RLOCK(b); 954 else if (!BDG_RTRYLOCK(b)) 955 return 0; 956 ND(5, "rlock acquired for %d packets", ((j > end ? lim+1 : 0) + end) - j); 957 ft = kring->nkr_ft; 958 959 for (; likely(j != end); j = nm_next(j, lim)) { 960 struct netmap_slot *slot = &ring->slot[j]; 961 char *buf; 962 963 ft[ft_i].ft_len = slot->len; 964 ft[ft_i].ft_flags = slot->flags; 965 966 ND("flags is 0x%x", slot->flags); 967 /* this slot goes into a list so initialize the link field */ 968 ft[ft_i].ft_next = NM_FT_NULL; 969 buf = ft[ft_i].ft_buf = (slot->flags & NS_INDIRECT) ? 970 (void *)(uintptr_t)slot->ptr : BDG_NMB(&na->up, slot); 971 __builtin_prefetch(buf); 972 ++ft_i; 973 if (slot->flags & NS_MOREFRAG) { 974 frags++; 975 continue; 976 } 977 if (unlikely(netmap_verbose && frags > 1)) 978 RD(5, "%d frags at %d", frags, ft_i - frags); 979 ft[ft_i - frags].ft_frags = frags; 980 frags = 1; 981 if (unlikely((int)ft_i >= bridge_batch)) 982 ft_i = nm_bdg_flush(ft, ft_i, na, ring_nr); 983 } 984 if (frags > 1) { 985 D("truncate incomplete fragment at %d (%d frags)", ft_i, frags); 986 // ft_i > 0, ft[ft_i-1].flags has NS_MOREFRAG 987 ft[ft_i - 1].ft_frags &= ~NS_MOREFRAG; 988 ft[ft_i - frags].ft_frags = frags - 1; 989 } 990 if (ft_i) 991 ft_i = nm_bdg_flush(ft, ft_i, na, ring_nr); 992 BDG_RUNLOCK(b); 993 return j; 994 } 995 996 997 /* ----- FreeBSD if_bridge hash function ------- */ 998 999 /* 1000 * The following hash function is adapted from "Hash Functions" by Bob Jenkins 1001 * ("Algorithm Alley", Dr. Dobbs Journal, September 1997). 1002 * 1003 * http://www.burtleburtle.net/bob/hash/spooky.html 1004 */ 1005 #define mix(a, b, c) \ 1006 do { \ 1007 a -= b; a -= c; a ^= (c >> 13); \ 1008 b -= c; b -= a; b ^= (a << 8); \ 1009 c -= a; c -= b; c ^= (b >> 13); \ 1010 a -= b; a -= c; a ^= (c >> 12); \ 1011 b -= c; b -= a; b ^= (a << 16); \ 1012 c -= a; c -= b; c ^= (b >> 5); \ 1013 a -= b; a -= c; a ^= (c >> 3); \ 1014 b -= c; b -= a; b ^= (a << 10); \ 1015 c -= a; c -= b; c ^= (b >> 15); \ 1016 } while (/*CONSTCOND*/0) 1017 1018 1019 static __inline uint32_t 1020 nm_bridge_rthash(const uint8_t *addr) 1021 { 1022 uint32_t a = 0x9e3779b9, b = 0x9e3779b9, c = 0; // hask key 1023 1024 b += addr[5] << 8; 1025 b += addr[4]; 1026 a += addr[3] << 24; 1027 a += addr[2] << 16; 1028 a += addr[1] << 8; 1029 a += addr[0]; 1030 1031 mix(a, b, c); 1032 #define BRIDGE_RTHASH_MASK (NM_BDG_HASH-1) 1033 return (c & BRIDGE_RTHASH_MASK); 1034 } 1035 1036 #undef mix 1037 1038 1039 static int 1040 bdg_netmap_reg(struct netmap_adapter *na, int onoff) 1041 { 1042 struct netmap_vp_adapter *vpna = 1043 (struct netmap_vp_adapter*)na; 1044 struct ifnet *ifp = na->ifp; 1045 1046 /* the interface is already attached to the bridge, 1047 * so we only need to toggle IFCAP_NETMAP. 1048 */ 1049 BDG_WLOCK(vpna->na_bdg); 1050 if (onoff) { 1051 ifp->if_capenable |= IFCAP_NETMAP; 1052 } else { 1053 ifp->if_capenable &= ~IFCAP_NETMAP; 1054 } 1055 BDG_WUNLOCK(vpna->na_bdg); 1056 return 0; 1057 } 1058 1059 1060 /* 1061 * Lookup function for a learning bridge. 1062 * Update the hash table with the source address, 1063 * and then returns the destination port index, and the 1064 * ring in *dst_ring (at the moment, always use ring 0) 1065 */ 1066 u_int 1067 netmap_bdg_learning(char *buf, u_int buf_len, uint8_t *dst_ring, 1068 struct netmap_vp_adapter *na) 1069 { 1070 struct nm_hash_ent *ht = na->na_bdg->ht; 1071 uint32_t sh, dh; 1072 u_int dst, mysrc = na->bdg_port; 1073 uint64_t smac, dmac; 1074 1075 if (buf_len < 14) { 1076 D("invalid buf length %d", buf_len); 1077 return NM_BDG_NOPORT; 1078 } 1079 dmac = le64toh(*(uint64_t *)(buf)) & 0xffffffffffff; 1080 smac = le64toh(*(uint64_t *)(buf + 4)); 1081 smac >>= 16; 1082 1083 /* 1084 * The hash is somewhat expensive, there might be some 1085 * worthwhile optimizations here. 1086 */ 1087 if ((buf[6] & 1) == 0) { /* valid src */ 1088 uint8_t *s = buf+6; 1089 sh = nm_bridge_rthash(s); // XXX hash of source 1090 /* update source port forwarding entry */ 1091 ht[sh].mac = smac; /* XXX expire ? */ 1092 ht[sh].ports = mysrc; 1093 if (netmap_verbose) 1094 D("src %02x:%02x:%02x:%02x:%02x:%02x on port %d", 1095 s[0], s[1], s[2], s[3], s[4], s[5], mysrc); 1096 } 1097 dst = NM_BDG_BROADCAST; 1098 if ((buf[0] & 1) == 0) { /* unicast */ 1099 dh = nm_bridge_rthash(buf); // XXX hash of dst 1100 if (ht[dh].mac == dmac) { /* found dst */ 1101 dst = ht[dh].ports; 1102 } 1103 /* XXX otherwise return NM_BDG_UNKNOWN ? */ 1104 } 1105 *dst_ring = 0; 1106 return dst; 1107 } 1108 1109 1110 /* 1111 * Available space in the ring. Only used in VALE code 1112 * and only with is_rx = 1 1113 */ 1114 static inline uint32_t 1115 nm_kr_space(struct netmap_kring *k, int is_rx) 1116 { 1117 int space; 1118 1119 if (is_rx) { 1120 int busy = k->nkr_hwlease - k->nr_hwcur; 1121 if (busy < 0) 1122 busy += k->nkr_num_slots; 1123 space = k->nkr_num_slots - 1 - busy; 1124 } else { 1125 /* XXX never used in this branch */ 1126 space = k->nr_hwtail - k->nkr_hwlease; 1127 if (space < 0) 1128 space += k->nkr_num_slots; 1129 } 1130 #if 0 1131 // sanity check 1132 if (k->nkr_hwlease >= k->nkr_num_slots || 1133 k->nr_hwcur >= k->nkr_num_slots || 1134 k->nr_tail >= k->nkr_num_slots || 1135 busy < 0 || 1136 busy >= k->nkr_num_slots) { 1137 D("invalid kring, cur %d tail %d lease %d lease_idx %d lim %d", k->nr_hwcur, k->nr_hwtail, k->nkr_hwlease, 1138 k->nkr_lease_idx, k->nkr_num_slots); 1139 } 1140 #endif 1141 return space; 1142 } 1143 1144 1145 1146 1147 /* make a lease on the kring for N positions. return the 1148 * lease index 1149 * XXX only used in VALE code and with is_rx = 1 1150 */ 1151 static inline uint32_t 1152 nm_kr_lease(struct netmap_kring *k, u_int n, int is_rx) 1153 { 1154 uint32_t lim = k->nkr_num_slots - 1; 1155 uint32_t lease_idx = k->nkr_lease_idx; 1156 1157 k->nkr_leases[lease_idx] = NR_NOSLOT; 1158 k->nkr_lease_idx = nm_next(lease_idx, lim); 1159 1160 if (n > nm_kr_space(k, is_rx)) { 1161 D("invalid request for %d slots", n); 1162 panic("x"); 1163 } 1164 /* XXX verify that there are n slots */ 1165 k->nkr_hwlease += n; 1166 if (k->nkr_hwlease > lim) 1167 k->nkr_hwlease -= lim + 1; 1168 1169 if (k->nkr_hwlease >= k->nkr_num_slots || 1170 k->nr_hwcur >= k->nkr_num_slots || 1171 k->nr_hwtail >= k->nkr_num_slots || 1172 k->nkr_lease_idx >= k->nkr_num_slots) { 1173 D("invalid kring %s, cur %d tail %d lease %d lease_idx %d lim %d", 1174 k->na->ifp->if_xname, 1175 k->nr_hwcur, k->nr_hwtail, k->nkr_hwlease, 1176 k->nkr_lease_idx, k->nkr_num_slots); 1177 } 1178 return lease_idx; 1179 } 1180 1181 /* 1182 * This flush routine supports only unicast and broadcast but a large 1183 * number of ports, and lets us replace the learn and dispatch functions. 1184 */ 1185 int 1186 nm_bdg_flush(struct nm_bdg_fwd *ft, u_int n, struct netmap_vp_adapter *na, 1187 u_int ring_nr) 1188 { 1189 struct nm_bdg_q *dst_ents, *brddst; 1190 uint16_t num_dsts = 0, *dsts; 1191 struct nm_bridge *b = na->na_bdg; 1192 u_int i, j, me = na->bdg_port; 1193 1194 /* 1195 * The work area (pointed by ft) is followed by an array of 1196 * pointers to queues , dst_ents; there are NM_BDG_MAXRINGS 1197 * queues per port plus one for the broadcast traffic. 1198 * Then we have an array of destination indexes. 1199 */ 1200 dst_ents = (struct nm_bdg_q *)(ft + NM_BDG_BATCH_MAX); 1201 dsts = (uint16_t *)(dst_ents + NM_BDG_MAXPORTS * NM_BDG_MAXRINGS + 1); 1202 1203 /* first pass: find a destination for each packet in the batch */ 1204 for (i = 0; likely(i < n); i += ft[i].ft_frags) { 1205 uint8_t dst_ring = ring_nr; /* default, same ring as origin */ 1206 uint16_t dst_port, d_i; 1207 struct nm_bdg_q *d; 1208 uint8_t *buf = ft[i].ft_buf; 1209 u_int len = ft[i].ft_len; 1210 1211 ND("slot %d frags %d", i, ft[i].ft_frags); 1212 /* Drop the packet if the offset is not into the first 1213 fragment nor at the very beginning of the second. */ 1214 if (unlikely(na->offset > len)) 1215 continue; 1216 if (len == na->offset) { 1217 buf = ft[i+1].ft_buf; 1218 len = ft[i+1].ft_len; 1219 } else { 1220 buf += na->offset; 1221 len -= na->offset; 1222 } 1223 dst_port = b->nm_bdg_lookup(buf, len, &dst_ring, na); 1224 if (netmap_verbose > 255) 1225 RD(5, "slot %d port %d -> %d", i, me, dst_port); 1226 if (dst_port == NM_BDG_NOPORT) 1227 continue; /* this packet is identified to be dropped */ 1228 else if (unlikely(dst_port > NM_BDG_MAXPORTS)) 1229 continue; 1230 else if (dst_port == NM_BDG_BROADCAST) 1231 dst_ring = 0; /* broadcasts always go to ring 0 */ 1232 else if (unlikely(dst_port == me || 1233 !b->bdg_ports[dst_port])) 1234 continue; 1235 1236 /* get a position in the scratch pad */ 1237 d_i = dst_port * NM_BDG_MAXRINGS + dst_ring; 1238 d = dst_ents + d_i; 1239 1240 /* append the first fragment to the list */ 1241 if (d->bq_head == NM_FT_NULL) { /* new destination */ 1242 d->bq_head = d->bq_tail = i; 1243 /* remember this position to be scanned later */ 1244 if (dst_port != NM_BDG_BROADCAST) 1245 dsts[num_dsts++] = d_i; 1246 } else { 1247 ft[d->bq_tail].ft_next = i; 1248 d->bq_tail = i; 1249 } 1250 d->bq_len += ft[i].ft_frags; 1251 } 1252 1253 /* 1254 * Broadcast traffic goes to ring 0 on all destinations. 1255 * So we need to add these rings to the list of ports to scan. 1256 * XXX at the moment we scan all NM_BDG_MAXPORTS ports, which is 1257 * expensive. We should keep a compact list of active destinations 1258 * so we could shorten this loop. 1259 */ 1260 brddst = dst_ents + NM_BDG_BROADCAST * NM_BDG_MAXRINGS; 1261 if (brddst->bq_head != NM_FT_NULL) { 1262 for (j = 0; likely(j < b->bdg_active_ports); j++) { 1263 uint16_t d_i; 1264 i = b->bdg_port_index[j]; 1265 if (unlikely(i == me)) 1266 continue; 1267 d_i = i * NM_BDG_MAXRINGS; 1268 if (dst_ents[d_i].bq_head == NM_FT_NULL) 1269 dsts[num_dsts++] = d_i; 1270 } 1271 } 1272 1273 ND(5, "pass 1 done %d pkts %d dsts", n, num_dsts); 1274 /* second pass: scan destinations (XXX will be modular somehow) */ 1275 for (i = 0; i < num_dsts; i++) { 1276 struct ifnet *dst_ifp; 1277 struct netmap_vp_adapter *dst_na; 1278 struct netmap_kring *kring; 1279 struct netmap_ring *ring; 1280 u_int dst_nr, lim, j, sent = 0, d_i, next, brd_next; 1281 u_int needed, howmany; 1282 int retry = netmap_txsync_retry; 1283 struct nm_bdg_q *d; 1284 uint32_t my_start = 0, lease_idx = 0; 1285 int nrings; 1286 int offset_mismatch; 1287 1288 d_i = dsts[i]; 1289 ND("second pass %d port %d", i, d_i); 1290 d = dst_ents + d_i; 1291 // XXX fix the division 1292 dst_na = b->bdg_ports[d_i/NM_BDG_MAXRINGS]; 1293 /* protect from the lookup function returning an inactive 1294 * destination port 1295 */ 1296 if (unlikely(dst_na == NULL)) 1297 goto cleanup; 1298 if (dst_na->up.na_flags & NAF_SW_ONLY) 1299 goto cleanup; 1300 dst_ifp = dst_na->up.ifp; 1301 /* 1302 * The interface may be in !netmap mode in two cases: 1303 * - when na is attached but not activated yet; 1304 * - when na is being deactivated but is still attached. 1305 */ 1306 if (unlikely(!(dst_ifp->if_capenable & IFCAP_NETMAP))) { 1307 ND("not in netmap mode!"); 1308 goto cleanup; 1309 } 1310 1311 offset_mismatch = (dst_na->offset != na->offset); 1312 1313 /* there is at least one either unicast or broadcast packet */ 1314 brd_next = brddst->bq_head; 1315 next = d->bq_head; 1316 /* we need to reserve this many slots. If fewer are 1317 * available, some packets will be dropped. 1318 * Packets may have multiple fragments, so we may not use 1319 * there is a chance that we may not use all of the slots 1320 * we have claimed, so we will need to handle the leftover 1321 * ones when we regain the lock. 1322 */ 1323 needed = d->bq_len + brddst->bq_len; 1324 1325 ND(5, "pass 2 dst %d is %x %s", 1326 i, d_i, is_vp ? "virtual" : "nic/host"); 1327 dst_nr = d_i & (NM_BDG_MAXRINGS-1); 1328 nrings = dst_na->up.num_rx_rings; 1329 if (dst_nr >= nrings) 1330 dst_nr = dst_nr % nrings; 1331 kring = &dst_na->up.rx_rings[dst_nr]; 1332 ring = kring->ring; 1333 lim = kring->nkr_num_slots - 1; 1334 1335 retry: 1336 1337 /* reserve the buffers in the queue and an entry 1338 * to report completion, and drop lock. 1339 * XXX this might become a helper function. 1340 */ 1341 mtx_lock(&kring->q_lock); 1342 if (kring->nkr_stopped) { 1343 mtx_unlock(&kring->q_lock); 1344 goto cleanup; 1345 } 1346 if (dst_na->retry) { 1347 dst_na->up.nm_notify(&dst_na->up, dst_nr, NR_RX, 0); 1348 } 1349 my_start = j = kring->nkr_hwlease; 1350 howmany = nm_kr_space(kring, 1); 1351 if (needed < howmany) 1352 howmany = needed; 1353 lease_idx = nm_kr_lease(kring, howmany, 1); 1354 mtx_unlock(&kring->q_lock); 1355 1356 /* only retry if we need more than available slots */ 1357 if (retry && needed <= howmany) 1358 retry = 0; 1359 1360 /* copy to the destination queue */ 1361 while (howmany > 0) { 1362 struct netmap_slot *slot; 1363 struct nm_bdg_fwd *ft_p, *ft_end; 1364 u_int cnt; 1365 int fix_mismatch = offset_mismatch; 1366 1367 /* find the queue from which we pick next packet. 1368 * NM_FT_NULL is always higher than valid indexes 1369 * so we never dereference it if the other list 1370 * has packets (and if both are empty we never 1371 * get here). 1372 */ 1373 if (next < brd_next) { 1374 ft_p = ft + next; 1375 next = ft_p->ft_next; 1376 } else { /* insert broadcast */ 1377 ft_p = ft + brd_next; 1378 brd_next = ft_p->ft_next; 1379 } 1380 cnt = ft_p->ft_frags; // cnt > 0 1381 if (unlikely(cnt > howmany)) 1382 break; /* no more space */ 1383 howmany -= cnt; 1384 if (netmap_verbose && cnt > 1) 1385 RD(5, "rx %d frags to %d", cnt, j); 1386 ft_end = ft_p + cnt; 1387 do { 1388 char *dst, *src = ft_p->ft_buf; 1389 size_t copy_len = ft_p->ft_len, dst_len = copy_len; 1390 1391 slot = &ring->slot[j]; 1392 dst = BDG_NMB(&dst_na->up, slot); 1393 1394 if (unlikely(fix_mismatch)) { 1395 /* We are processing the first fragment 1396 * and there is a mismatch between source 1397 * and destination offsets. Create a zeroed 1398 * header for the destination, independently 1399 * of the source header length and content. 1400 */ 1401 src += na->offset; 1402 copy_len -= na->offset; 1403 bzero(dst, dst_na->offset); 1404 dst += dst_na->offset; 1405 dst_len = dst_na->offset + copy_len; 1406 /* fix the first fragment only */ 1407 fix_mismatch = 0; 1408 /* Here it could be copy_len == dst_len == 0, 1409 * and so a zero length fragment is passed. 1410 */ 1411 } 1412 1413 ND("send [%d] %d(%d) bytes at %s:%d", 1414 i, (int)copy_len, (int)dst_len, 1415 NM_IFPNAME(dst_ifp), j); 1416 /* round to a multiple of 64 */ 1417 copy_len = (copy_len + 63) & ~63; 1418 1419 if (ft_p->ft_flags & NS_INDIRECT) { 1420 if (copyin(src, dst, copy_len)) { 1421 // invalid user pointer, pretend len is 0 1422 dst_len = 0; 1423 } 1424 } else { 1425 //memcpy(dst, src, copy_len); 1426 pkt_copy(src, dst, (int)copy_len); 1427 } 1428 slot->len = dst_len; 1429 slot->flags = (cnt << 8)| NS_MOREFRAG; 1430 j = nm_next(j, lim); 1431 ft_p++; 1432 sent++; 1433 } while (ft_p != ft_end); 1434 slot->flags = (cnt << 8); /* clear flag on last entry */ 1435 /* are we done ? */ 1436 if (next == NM_FT_NULL && brd_next == NM_FT_NULL) 1437 break; 1438 } 1439 { 1440 /* current position */ 1441 uint32_t *p = kring->nkr_leases; /* shorthand */ 1442 uint32_t update_pos; 1443 int still_locked = 1; 1444 1445 mtx_lock(&kring->q_lock); 1446 if (unlikely(howmany > 0)) { 1447 /* not used all bufs. If i am the last one 1448 * i can recover the slots, otherwise must 1449 * fill them with 0 to mark empty packets. 1450 */ 1451 ND("leftover %d bufs", howmany); 1452 if (nm_next(lease_idx, lim) == kring->nkr_lease_idx) { 1453 /* yes i am the last one */ 1454 ND("roll back nkr_hwlease to %d", j); 1455 kring->nkr_hwlease = j; 1456 } else { 1457 while (howmany-- > 0) { 1458 ring->slot[j].len = 0; 1459 ring->slot[j].flags = 0; 1460 j = nm_next(j, lim); 1461 } 1462 } 1463 } 1464 p[lease_idx] = j; /* report I am done */ 1465 1466 update_pos = kring->nr_hwtail; 1467 1468 if (my_start == update_pos) { 1469 /* all slots before my_start have been reported, 1470 * so scan subsequent leases to see if other ranges 1471 * have been completed, and to a selwakeup or txsync. 1472 */ 1473 while (lease_idx != kring->nkr_lease_idx && 1474 p[lease_idx] != NR_NOSLOT) { 1475 j = p[lease_idx]; 1476 p[lease_idx] = NR_NOSLOT; 1477 lease_idx = nm_next(lease_idx, lim); 1478 } 1479 /* j is the new 'write' position. j != my_start 1480 * means there are new buffers to report 1481 */ 1482 if (likely(j != my_start)) { 1483 kring->nr_hwtail = j; 1484 dst_na->up.nm_notify(&dst_na->up, dst_nr, NR_RX, 0); 1485 still_locked = 0; 1486 mtx_unlock(&kring->q_lock); 1487 if (dst_na->retry && retry--) 1488 goto retry; 1489 } 1490 } 1491 if (still_locked) 1492 mtx_unlock(&kring->q_lock); 1493 } 1494 cleanup: 1495 d->bq_head = d->bq_tail = NM_FT_NULL; /* cleanup */ 1496 d->bq_len = 0; 1497 } 1498 brddst->bq_head = brddst->bq_tail = NM_FT_NULL; /* cleanup */ 1499 brddst->bq_len = 0; 1500 return 0; 1501 } 1502 1503 1504 static int 1505 netmap_vp_txsync(struct netmap_vp_adapter *na, u_int ring_nr, int flags) 1506 { 1507 struct netmap_kring *kring = &na->up.tx_rings[ring_nr]; 1508 u_int done; 1509 u_int const lim = kring->nkr_num_slots - 1; 1510 u_int const cur = kring->rcur; 1511 1512 if (bridge_batch <= 0) { /* testing only */ 1513 done = cur; // used all 1514 goto done; 1515 } 1516 if (bridge_batch > NM_BDG_BATCH) 1517 bridge_batch = NM_BDG_BATCH; 1518 1519 done = nm_bdg_preflush(na, ring_nr, kring, cur); 1520 done: 1521 if (done != cur) 1522 D("early break at %d/ %d, tail %d", done, cur, kring->nr_hwtail); 1523 /* 1524 * packets between 'done' and 'cur' are left unsent. 1525 */ 1526 kring->nr_hwcur = done; 1527 kring->nr_hwtail = nm_prev(done, lim); 1528 nm_txsync_finalize(kring); 1529 if (netmap_verbose) 1530 D("%s ring %d flags %d", NM_IFPNAME(na->up.ifp), ring_nr, flags); 1531 return 0; 1532 } 1533 1534 1535 /* 1536 * main dispatch routine for the bridge. 1537 * We already know that only one thread is running this. 1538 * we must run nm_bdg_preflush without lock. 1539 */ 1540 static int 1541 bdg_netmap_txsync(struct netmap_adapter *na, u_int ring_nr, int flags) 1542 { 1543 struct netmap_vp_adapter *vpna = (struct netmap_vp_adapter*)na; 1544 return netmap_vp_txsync(vpna, ring_nr, flags); 1545 } 1546 1547 static int 1548 netmap_vp_rxsync(struct netmap_adapter *na, u_int ring_nr, int flags) 1549 { 1550 struct netmap_kring *kring = &na->rx_rings[ring_nr]; 1551 struct netmap_ring *ring = kring->ring; 1552 u_int nm_i, lim = kring->nkr_num_slots - 1; 1553 u_int head = nm_rxsync_prologue(kring); 1554 int n; 1555 1556 if (head > lim) { 1557 D("ouch dangerous reset!!!"); 1558 n = netmap_ring_reinit(kring); 1559 goto done; 1560 } 1561 1562 /* First part, import newly received packets. */ 1563 /* actually nothing to do here, they are already in the kring */ 1564 1565 /* Second part, skip past packets that userspace has released. */ 1566 nm_i = kring->nr_hwcur; 1567 if (nm_i != head) { 1568 /* consistency check, but nothing really important here */ 1569 for (n = 0; likely(nm_i != head); n++) { 1570 struct netmap_slot *slot = &ring->slot[nm_i]; 1571 void *addr = BDG_NMB(na, slot); 1572 1573 if (addr == netmap_buffer_base) { /* bad buf */ 1574 D("bad buffer index %d, ignore ?", 1575 slot->buf_idx); 1576 } 1577 slot->flags &= ~NS_BUF_CHANGED; 1578 nm_i = nm_next(nm_i, lim); 1579 } 1580 kring->nr_hwcur = head; 1581 } 1582 1583 /* tell userspace that there are new packets */ 1584 nm_rxsync_finalize(kring); 1585 n = 0; 1586 done: 1587 return n; 1588 } 1589 1590 /* 1591 * user process reading from a VALE switch. 1592 * Already protected against concurrent calls from userspace, 1593 * but we must acquire the queue's lock to protect against 1594 * writers on the same queue. 1595 */ 1596 static int 1597 bdg_netmap_rxsync(struct netmap_adapter *na, u_int ring_nr, int flags) 1598 { 1599 struct netmap_kring *kring = &na->rx_rings[ring_nr]; 1600 int n; 1601 1602 mtx_lock(&kring->q_lock); 1603 n = netmap_vp_rxsync(na, ring_nr, flags); 1604 mtx_unlock(&kring->q_lock); 1605 return n; 1606 } 1607 1608 1609 static int 1610 bdg_netmap_attach(struct nmreq *nmr, struct ifnet *ifp) 1611 { 1612 struct netmap_vp_adapter *vpna; 1613 struct netmap_adapter *na; 1614 int error; 1615 1616 vpna = malloc(sizeof(*vpna), M_DEVBUF, M_NOWAIT | M_ZERO); 1617 if (vpna == NULL) 1618 return ENOMEM; 1619 1620 na = &vpna->up; 1621 1622 na->ifp = ifp; 1623 1624 /* bound checking */ 1625 na->num_tx_rings = nmr->nr_tx_rings; 1626 nm_bound_var(&na->num_tx_rings, 1, 1, NM_BDG_MAXRINGS, NULL); 1627 nmr->nr_tx_rings = na->num_tx_rings; // write back 1628 na->num_rx_rings = nmr->nr_rx_rings; 1629 nm_bound_var(&na->num_rx_rings, 1, 1, NM_BDG_MAXRINGS, NULL); 1630 nmr->nr_rx_rings = na->num_rx_rings; // write back 1631 nm_bound_var(&nmr->nr_tx_slots, NM_BRIDGE_RINGSIZE, 1632 1, NM_BDG_MAXSLOTS, NULL); 1633 na->num_tx_desc = nmr->nr_tx_slots; 1634 nm_bound_var(&nmr->nr_rx_slots, NM_BRIDGE_RINGSIZE, 1635 1, NM_BDG_MAXSLOTS, NULL); 1636 na->num_rx_desc = nmr->nr_rx_slots; 1637 vpna->offset = 0; 1638 1639 na->na_flags |= NAF_BDG_MAYSLEEP | NAF_MEM_OWNER; 1640 na->nm_txsync = bdg_netmap_txsync; 1641 na->nm_rxsync = bdg_netmap_rxsync; 1642 na->nm_register = bdg_netmap_reg; 1643 na->nm_dtor = netmap_adapter_vp_dtor; 1644 na->nm_krings_create = netmap_vp_krings_create; 1645 na->nm_krings_delete = netmap_vp_krings_delete; 1646 na->nm_mem = netmap_mem_private_new(NM_IFPNAME(na->ifp), 1647 na->num_tx_rings, na->num_tx_desc, 1648 na->num_rx_rings, na->num_rx_desc); 1649 /* other nmd fields are set in the common routine */ 1650 error = netmap_attach_common(na); 1651 if (error) { 1652 free(vpna, M_DEVBUF); 1653 return error; 1654 } 1655 return 0; 1656 } 1657 1658 1659 static void 1660 netmap_bwrap_dtor(struct netmap_adapter *na) 1661 { 1662 struct netmap_bwrap_adapter *bna = (struct netmap_bwrap_adapter*)na; 1663 struct netmap_adapter *hwna = bna->hwna; 1664 struct nm_bridge *b = bna->up.na_bdg, 1665 *bh = bna->host.na_bdg; 1666 struct ifnet *ifp = na->ifp; 1667 1668 ND("na %p", na); 1669 1670 if (b) { 1671 netmap_bdg_detach_common(b, bna->up.bdg_port, 1672 (bh ? bna->host.bdg_port : -1)); 1673 } 1674 1675 hwna->na_private = NULL; 1676 netmap_adapter_put(hwna); 1677 1678 bzero(ifp, sizeof(*ifp)); 1679 free(ifp, M_DEVBUF); 1680 na->ifp = NULL; 1681 1682 } 1683 1684 1685 /* 1686 * Intr callback for NICs connected to a bridge. 1687 * Simply ignore tx interrupts (maybe we could try to recover space ?) 1688 * and pass received packets from nic to the bridge. 1689 * 1690 * XXX TODO check locking: this is called from the interrupt 1691 * handler so we should make sure that the interface is not 1692 * disconnected while passing down an interrupt. 1693 * 1694 * Note, no user process can access this NIC or the host stack. 1695 * The only part of the ring that is significant are the slots, 1696 * and head/cur/tail are set from the kring as needed 1697 * (part as a receive ring, part as a transmit ring). 1698 * 1699 * callback that overwrites the hwna notify callback. 1700 * Packets come from the outside or from the host stack and are put on an hwna rx ring. 1701 * The bridge wrapper then sends the packets through the bridge. 1702 */ 1703 static int 1704 netmap_bwrap_intr_notify(struct netmap_adapter *na, u_int ring_nr, enum txrx tx, int flags) 1705 { 1706 struct ifnet *ifp = na->ifp; 1707 struct netmap_bwrap_adapter *bna = na->na_private; 1708 struct netmap_vp_adapter *hostna = &bna->host; 1709 struct netmap_kring *kring, *bkring; 1710 struct netmap_ring *ring; 1711 int is_host_ring = ring_nr == na->num_rx_rings; 1712 struct netmap_vp_adapter *vpna = &bna->up; 1713 int error = 0; 1714 1715 if (netmap_verbose) 1716 D("%s %s%d 0x%x", NM_IFPNAME(ifp), 1717 (tx == NR_TX ? "TX" : "RX"), ring_nr, flags); 1718 1719 if (flags & NAF_DISABLE_NOTIFY) { 1720 kring = tx == NR_TX ? na->tx_rings : na->rx_rings; 1721 bkring = tx == NR_TX ? vpna->up.rx_rings : vpna->up.tx_rings; 1722 if (kring[ring_nr].nkr_stopped) 1723 netmap_disable_ring(&bkring[ring_nr]); 1724 else 1725 bkring[ring_nr].nkr_stopped = 0; 1726 return 0; 1727 } 1728 1729 if (ifp == NULL || !(ifp->if_capenable & IFCAP_NETMAP)) 1730 return 0; 1731 1732 /* we only care about receive interrupts */ 1733 if (tx == NR_TX) 1734 return 0; 1735 1736 kring = &na->rx_rings[ring_nr]; 1737 ring = kring->ring; 1738 1739 /* make sure the ring is not disabled */ 1740 if (nm_kr_tryget(kring)) 1741 return 0; 1742 1743 if (is_host_ring && hostna->na_bdg == NULL) { 1744 error = bna->save_notify(na, ring_nr, tx, flags); 1745 goto put_out; 1746 } 1747 1748 /* Here we expect ring->head = ring->cur = ring->tail 1749 * because everything has been released from the previous round. 1750 * However the ring is shared and we might have info from 1751 * the wrong side (the tx ring). Hence we overwrite with 1752 * the info from the rx kring. 1753 */ 1754 if (netmap_verbose) 1755 D("%s head %d cur %d tail %d (kring %d %d %d)", NM_IFPNAME(ifp), 1756 ring->head, ring->cur, ring->tail, 1757 kring->rhead, kring->rcur, kring->rtail); 1758 1759 ring->head = kring->rhead; 1760 ring->cur = kring->rcur; 1761 ring->tail = kring->rtail; 1762 1763 /* simulate a user wakeup on the rx ring */ 1764 if (is_host_ring) { 1765 netmap_rxsync_from_host(na, NULL, NULL); 1766 vpna = hostna; 1767 ring_nr = 0; 1768 } else { 1769 /* fetch packets that have arrived. 1770 * XXX maybe do this in a loop ? 1771 */ 1772 error = na->nm_rxsync(na, ring_nr, 0); 1773 if (error) 1774 goto put_out; 1775 } 1776 if (kring->nr_hwcur == kring->nr_hwtail && netmap_verbose) { 1777 D("how strange, interrupt with no packets on %s", 1778 NM_IFPNAME(ifp)); 1779 goto put_out; 1780 } 1781 1782 /* new packets are ring->cur to ring->tail, and the bkring 1783 * had hwcur == ring->cur. So advance ring->cur to ring->tail 1784 * to push all packets out. 1785 */ 1786 ring->head = ring->cur = ring->tail; 1787 1788 /* also set tail to what the bwrap expects */ 1789 bkring = &vpna->up.tx_rings[ring_nr]; 1790 ring->tail = bkring->nr_hwtail; // rtail too ? 1791 1792 /* pass packets to the switch */ 1793 nm_txsync_prologue(bkring); // XXX error checking ? 1794 netmap_vp_txsync(vpna, ring_nr, flags); 1795 1796 /* mark all buffers as released on this ring */ 1797 ring->head = ring->cur = kring->nr_hwtail; 1798 ring->tail = kring->rtail; 1799 /* another call to actually release the buffers */ 1800 if (!is_host_ring) { 1801 error = na->nm_rxsync(na, ring_nr, 0); 1802 } else { 1803 /* mark all packets as released, as in the 1804 * second part of netmap_rxsync_from_host() 1805 */ 1806 kring->nr_hwcur = kring->nr_hwtail; 1807 nm_rxsync_finalize(kring); 1808 } 1809 1810 put_out: 1811 nm_kr_put(kring); 1812 return error; 1813 } 1814 1815 1816 static int 1817 netmap_bwrap_register(struct netmap_adapter *na, int onoff) 1818 { 1819 struct netmap_bwrap_adapter *bna = 1820 (struct netmap_bwrap_adapter *)na; 1821 struct netmap_adapter *hwna = bna->hwna; 1822 struct netmap_vp_adapter *hostna = &bna->host; 1823 int error; 1824 1825 ND("%s %s", NM_IFPNAME(na->ifp), onoff ? "on" : "off"); 1826 1827 if (onoff) { 1828 int i; 1829 1830 hwna->na_lut = na->na_lut; 1831 hwna->na_lut_objtotal = na->na_lut_objtotal; 1832 1833 if (hostna->na_bdg) { 1834 hostna->up.na_lut = na->na_lut; 1835 hostna->up.na_lut_objtotal = na->na_lut_objtotal; 1836 } 1837 1838 /* cross-link the netmap rings 1839 * The original number of rings comes from hwna, 1840 * rx rings on one side equals tx rings on the other. 1841 */ 1842 for (i = 0; i <= na->num_rx_rings; i++) { 1843 hwna->tx_rings[i].nkr_num_slots = na->rx_rings[i].nkr_num_slots; 1844 hwna->tx_rings[i].ring = na->rx_rings[i].ring; 1845 } 1846 for (i = 0; i <= na->num_tx_rings; i++) { 1847 hwna->rx_rings[i].nkr_num_slots = na->tx_rings[i].nkr_num_slots; 1848 hwna->rx_rings[i].ring = na->tx_rings[i].ring; 1849 } 1850 } 1851 1852 if (hwna->ifp) { 1853 error = hwna->nm_register(hwna, onoff); 1854 if (error) 1855 return error; 1856 } 1857 1858 bdg_netmap_reg(na, onoff); 1859 1860 if (onoff) { 1861 bna->save_notify = hwna->nm_notify; 1862 hwna->nm_notify = netmap_bwrap_intr_notify; 1863 } else { 1864 hwna->nm_notify = bna->save_notify; 1865 hwna->na_lut = NULL; 1866 hwna->na_lut_objtotal = 0; 1867 } 1868 1869 return 0; 1870 } 1871 1872 1873 static int 1874 netmap_bwrap_config(struct netmap_adapter *na, u_int *txr, u_int *txd, 1875 u_int *rxr, u_int *rxd) 1876 { 1877 struct netmap_bwrap_adapter *bna = 1878 (struct netmap_bwrap_adapter *)na; 1879 struct netmap_adapter *hwna = bna->hwna; 1880 1881 /* forward the request */ 1882 netmap_update_config(hwna); 1883 /* swap the results */ 1884 *txr = hwna->num_rx_rings; 1885 *txd = hwna->num_rx_desc; 1886 *rxr = hwna->num_tx_rings; 1887 *rxd = hwna->num_rx_desc; 1888 1889 return 0; 1890 } 1891 1892 1893 static int 1894 netmap_bwrap_krings_create(struct netmap_adapter *na) 1895 { 1896 struct netmap_bwrap_adapter *bna = 1897 (struct netmap_bwrap_adapter *)na; 1898 struct netmap_adapter *hwna = bna->hwna; 1899 struct netmap_adapter *hostna = &bna->host.up; 1900 int error; 1901 1902 ND("%s", NM_IFPNAME(na->ifp)); 1903 1904 error = netmap_vp_krings_create(na); 1905 if (error) 1906 return error; 1907 1908 error = hwna->nm_krings_create(hwna); 1909 if (error) { 1910 netmap_vp_krings_delete(na); 1911 return error; 1912 } 1913 1914 hostna->tx_rings = na->tx_rings + na->num_tx_rings; 1915 hostna->rx_rings = na->rx_rings + na->num_rx_rings; 1916 1917 return 0; 1918 } 1919 1920 1921 static void 1922 netmap_bwrap_krings_delete(struct netmap_adapter *na) 1923 { 1924 struct netmap_bwrap_adapter *bna = 1925 (struct netmap_bwrap_adapter *)na; 1926 struct netmap_adapter *hwna = bna->hwna; 1927 1928 ND("%s", NM_IFPNAME(na->ifp)); 1929 1930 hwna->nm_krings_delete(hwna); 1931 netmap_vp_krings_delete(na); 1932 } 1933 1934 1935 /* notify method for the bridge-->hwna direction */ 1936 static int 1937 netmap_bwrap_notify(struct netmap_adapter *na, u_int ring_n, enum txrx tx, int flags) 1938 { 1939 struct netmap_bwrap_adapter *bna = 1940 (struct netmap_bwrap_adapter *)na; 1941 struct netmap_adapter *hwna = bna->hwna; 1942 struct netmap_kring *kring, *hw_kring; 1943 struct netmap_ring *ring; 1944 u_int lim; 1945 int error = 0; 1946 1947 if (tx == NR_TX) 1948 return ENXIO; 1949 1950 kring = &na->rx_rings[ring_n]; 1951 hw_kring = &hwna->tx_rings[ring_n]; 1952 ring = kring->ring; 1953 lim = kring->nkr_num_slots - 1; 1954 1955 if (hwna->ifp == NULL || !(hwna->ifp->if_capenable & IFCAP_NETMAP)) 1956 return 0; 1957 /* first step: simulate a user wakeup on the rx ring */ 1958 netmap_vp_rxsync(na, ring_n, flags); 1959 ND("%s[%d] PRE rx(c%3d t%3d l%3d) ring(h%3d c%3d t%3d) tx(c%3d ht%3d t%3d)", 1960 NM_IFPNAME(na->ifp), ring_n, 1961 kring->nr_hwcur, kring->nr_hwtail, kring->nkr_hwlease, 1962 ring->head, ring->cur, ring->tail, 1963 hw_kring->nr_hwcur, hw_kring->nr_hwtail, hw_ring->rtail); 1964 /* second step: the simulated user consumes all new packets */ 1965 ring->head = ring->cur = ring->tail; 1966 1967 /* third step: the new packets are sent on the tx ring 1968 * (which is actually the same ring) 1969 */ 1970 /* set tail to what the hw expects */ 1971 ring->tail = hw_kring->rtail; 1972 if (ring_n == na->num_rx_rings) { 1973 netmap_txsync_to_host(hwna); 1974 } else { 1975 nm_txsync_prologue(&hwna->tx_rings[ring_n]); // XXX error checking ? 1976 error = hwna->nm_txsync(hwna, ring_n, flags); 1977 } 1978 1979 /* fourth step: now we are back the rx ring */ 1980 /* claim ownership on all hw owned bufs */ 1981 ring->head = nm_next(ring->tail, lim); /* skip past reserved slot */ 1982 ring->tail = kring->rtail; /* restore saved value of tail, for safety */ 1983 1984 /* fifth step: the user goes to sleep again, causing another rxsync */ 1985 netmap_vp_rxsync(na, ring_n, flags); 1986 ND("%s[%d] PST rx(c%3d t%3d l%3d) ring(h%3d c%3d t%3d) tx(c%3d ht%3d t%3d)", 1987 NM_IFPNAME(na->ifp), ring_n, 1988 kring->nr_hwcur, kring->nr_hwtail, kring->nkr_hwlease, 1989 ring->head, ring->cur, ring->tail, 1990 hw_kring->nr_hwcur, hw_kring->nr_hwtail, hw_kring->rtail); 1991 1992 return error; 1993 } 1994 1995 1996 static int 1997 netmap_bwrap_host_notify(struct netmap_adapter *na, u_int ring_n, enum txrx tx, int flags) 1998 { 1999 struct netmap_bwrap_adapter *bna = na->na_private; 2000 struct netmap_adapter *port_na = &bna->up.up; 2001 if (tx == NR_TX || ring_n != 0) 2002 return ENXIO; 2003 return netmap_bwrap_notify(port_na, port_na->num_rx_rings, NR_RX, flags); 2004 } 2005 2006 2007 /* attach a bridge wrapper to the 'real' device */ 2008 static int 2009 netmap_bwrap_attach(struct ifnet *fake, struct ifnet *real) 2010 { 2011 struct netmap_bwrap_adapter *bna; 2012 struct netmap_adapter *na; 2013 struct netmap_adapter *hwna = NA(real); 2014 struct netmap_adapter *hostna; 2015 int error; 2016 2017 2018 bna = malloc(sizeof(*bna), M_DEVBUF, M_NOWAIT | M_ZERO); 2019 if (bna == NULL) 2020 return ENOMEM; 2021 2022 na = &bna->up.up; 2023 na->ifp = fake; 2024 /* fill the ring data for the bwrap adapter with rx/tx meanings 2025 * swapped. The real cross-linking will be done during register, 2026 * when all the krings will have been created. 2027 */ 2028 na->num_rx_rings = hwna->num_tx_rings; 2029 na->num_tx_rings = hwna->num_rx_rings; 2030 na->num_tx_desc = hwna->num_rx_desc; 2031 na->num_rx_desc = hwna->num_tx_desc; 2032 na->nm_dtor = netmap_bwrap_dtor; 2033 na->nm_register = netmap_bwrap_register; 2034 // na->nm_txsync = netmap_bwrap_txsync; 2035 // na->nm_rxsync = netmap_bwrap_rxsync; 2036 na->nm_config = netmap_bwrap_config; 2037 na->nm_krings_create = netmap_bwrap_krings_create; 2038 na->nm_krings_delete = netmap_bwrap_krings_delete; 2039 na->nm_notify = netmap_bwrap_notify; 2040 na->nm_mem = hwna->nm_mem; 2041 na->na_private = na; /* prevent NIOCREGIF */ 2042 bna->up.retry = 1; /* XXX maybe this should depend on the hwna */ 2043 2044 bna->hwna = hwna; 2045 netmap_adapter_get(hwna); 2046 hwna->na_private = bna; /* weak reference */ 2047 2048 hostna = &bna->host.up; 2049 hostna->ifp = hwna->ifp; 2050 hostna->num_tx_rings = 1; 2051 hostna->num_tx_desc = hwna->num_rx_desc; 2052 hostna->num_rx_rings = 1; 2053 hostna->num_rx_desc = hwna->num_tx_desc; 2054 // hostna->nm_txsync = netmap_bwrap_host_txsync; 2055 // hostna->nm_rxsync = netmap_bwrap_host_rxsync; 2056 hostna->nm_notify = netmap_bwrap_host_notify; 2057 hostna->nm_mem = na->nm_mem; 2058 hostna->na_private = bna; 2059 2060 ND("%s<->%s txr %d txd %d rxr %d rxd %d", 2061 fake->if_xname, real->if_xname, 2062 na->num_tx_rings, na->num_tx_desc, 2063 na->num_rx_rings, na->num_rx_desc); 2064 2065 error = netmap_attach_common(na); 2066 if (error) { 2067 netmap_adapter_put(hwna); 2068 free(bna, M_DEVBUF); 2069 return error; 2070 } 2071 return 0; 2072 } 2073 2074 2075 void 2076 netmap_init_bridges(void) 2077 { 2078 int i; 2079 bzero(nm_bridges, sizeof(struct nm_bridge) * NM_BRIDGES); /* safety */ 2080 for (i = 0; i < NM_BRIDGES; i++) 2081 BDG_RWINIT(&nm_bridges[i]); 2082 } 2083 #endif /* WITH_VALE */ 2084