1 /* 2 * Copyright (C) 2011-2014 Matteo Landi, Luigi Rizzo. All rights reserved. 3 * 4 * Redistribution and use in source and binary forms, with or without 5 * modification, are permitted provided that the following conditions 6 * are met: 7 * 1. Redistributions of source code must retain the above copyright 8 * notice, this list of conditions and the following disclaimer. 9 * 2. Redistributions in binary form must reproduce the above copyright 10 * notice, this list of conditions and the following disclaimer in the 11 * documentation and/or other materials provided with the distribution. 12 * 13 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 14 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 15 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 16 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 17 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 18 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 19 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 20 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 21 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 22 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 23 * SUCH DAMAGE. 24 */ 25 26 27 /* 28 * $FreeBSD$ 29 * 30 * This module supports memory mapped access to network devices, 31 * see netmap(4). 32 * 33 * The module uses a large, memory pool allocated by the kernel 34 * and accessible as mmapped memory by multiple userspace threads/processes. 35 * The memory pool contains packet buffers and "netmap rings", 36 * i.e. user-accessible copies of the interface's queues. 37 * 38 * Access to the network card works like this: 39 * 1. a process/thread issues one or more open() on /dev/netmap, to create 40 * select()able file descriptor on which events are reported. 41 * 2. on each descriptor, the process issues an ioctl() to identify 42 * the interface that should report events to the file descriptor. 43 * 3. on each descriptor, the process issues an mmap() request to 44 * map the shared memory region within the process' address space. 45 * The list of interesting queues is indicated by a location in 46 * the shared memory region. 47 * 4. using the functions in the netmap(4) userspace API, a process 48 * can look up the occupation state of a queue, access memory buffers, 49 * and retrieve received packets or enqueue packets to transmit. 50 * 5. using some ioctl()s the process can synchronize the userspace view 51 * of the queue with the actual status in the kernel. This includes both 52 * receiving the notification of new packets, and transmitting new 53 * packets on the output interface. 54 * 6. select() or poll() can be used to wait for events on individual 55 * transmit or receive queues (or all queues for a given interface). 56 * 57 58 SYNCHRONIZATION (USER) 59 60 The netmap rings and data structures may be shared among multiple 61 user threads or even independent processes. 62 Any synchronization among those threads/processes is delegated 63 to the threads themselves. Only one thread at a time can be in 64 a system call on the same netmap ring. The OS does not enforce 65 this and only guarantees against system crashes in case of 66 invalid usage. 67 68 LOCKING (INTERNAL) 69 70 Within the kernel, access to the netmap rings is protected as follows: 71 72 - a spinlock on each ring, to handle producer/consumer races on 73 RX rings attached to the host stack (against multiple host 74 threads writing from the host stack to the same ring), 75 and on 'destination' rings attached to a VALE switch 76 (i.e. RX rings in VALE ports, and TX rings in NIC/host ports) 77 protecting multiple active senders for the same destination) 78 79 - an atomic variable to guarantee that there is at most one 80 instance of *_*xsync() on the ring at any time. 81 For rings connected to user file 82 descriptors, an atomic_test_and_set() protects this, and the 83 lock on the ring is not actually used. 84 For NIC RX rings connected to a VALE switch, an atomic_test_and_set() 85 is also used to prevent multiple executions (the driver might indeed 86 already guarantee this). 87 For NIC TX rings connected to a VALE switch, the lock arbitrates 88 access to the queue (both when allocating buffers and when pushing 89 them out). 90 91 - *xsync() should be protected against initializations of the card. 92 On FreeBSD most devices have the reset routine protected by 93 a RING lock (ixgbe, igb, em) or core lock (re). lem is missing 94 the RING protection on rx_reset(), this should be added. 95 96 On linux there is an external lock on the tx path, which probably 97 also arbitrates access to the reset routine. XXX to be revised 98 99 - a per-interface core_lock protecting access from the host stack 100 while interfaces may be detached from netmap mode. 101 XXX there should be no need for this lock if we detach the interfaces 102 only while they are down. 103 104 105 --- VALE SWITCH --- 106 107 NMG_LOCK() serializes all modifications to switches and ports. 108 A switch cannot be deleted until all ports are gone. 109 110 For each switch, an SX lock (RWlock on linux) protects 111 deletion of ports. When configuring or deleting a new port, the 112 lock is acquired in exclusive mode (after holding NMG_LOCK). 113 When forwarding, the lock is acquired in shared mode (without NMG_LOCK). 114 The lock is held throughout the entire forwarding cycle, 115 during which the thread may incur in a page fault. 116 Hence it is important that sleepable shared locks are used. 117 118 On the rx ring, the per-port lock is grabbed initially to reserve 119 a number of slot in the ring, then the lock is released, 120 packets are copied from source to destination, and then 121 the lock is acquired again and the receive ring is updated. 122 (A similar thing is done on the tx ring for NIC and host stack 123 ports attached to the switch) 124 125 */ 126 127 /* 128 * OS-specific code that is used only within this file. 129 * Other OS-specific code that must be accessed by drivers 130 * is present in netmap_kern.h 131 */ 132 133 #if defined(__FreeBSD__) 134 #include <sys/cdefs.h> /* prerequisite */ 135 #include <sys/types.h> 136 #include <sys/errno.h> 137 #include <sys/param.h> /* defines used in kernel.h */ 138 #include <sys/kernel.h> /* types used in module initialization */ 139 #include <sys/conf.h> /* cdevsw struct, UID, GID */ 140 #include <sys/filio.h> /* FIONBIO */ 141 #include <sys/sockio.h> 142 #include <sys/socketvar.h> /* struct socket */ 143 #include <sys/malloc.h> 144 #include <sys/poll.h> 145 #include <sys/rwlock.h> 146 #include <sys/socket.h> /* sockaddrs */ 147 #include <sys/selinfo.h> 148 #include <sys/sysctl.h> 149 #include <sys/jail.h> 150 #include <net/vnet.h> 151 #include <net/if.h> 152 #include <net/if_var.h> 153 #include <net/bpf.h> /* BIOCIMMEDIATE */ 154 #include <machine/bus.h> /* bus_dmamap_* */ 155 #include <sys/endian.h> 156 #include <sys/refcount.h> 157 158 159 /* reduce conditional code */ 160 // linux API, use for the knlist in FreeBSD 161 #define init_waitqueue_head(x) knlist_init_mtx(&(x)->si_note, NULL) 162 163 void freebsd_selwakeup(struct selinfo *si, int pri); 164 #define OS_selwakeup(a, b) freebsd_selwakeup(a, b) 165 166 #elif defined(linux) 167 168 #include "bsd_glue.h" 169 170 171 172 #elif defined(__APPLE__) 173 174 #warning OSX support is only partial 175 #include "osx_glue.h" 176 177 #else 178 179 #error Unsupported platform 180 181 #endif /* unsupported */ 182 183 /* 184 * common headers 185 */ 186 #include <net/netmap.h> 187 #include <dev/netmap/netmap_kern.h> 188 #include <dev/netmap/netmap_mem2.h> 189 190 191 MALLOC_DEFINE(M_NETMAP, "netmap", "Network memory map"); 192 193 /* 194 * The following variables are used by the drivers and replicate 195 * fields in the global memory pool. They only refer to buffers 196 * used by physical interfaces. 197 */ 198 u_int netmap_total_buffers; 199 u_int netmap_buf_size; 200 char *netmap_buffer_base; /* also address of an invalid buffer */ 201 202 /* user-controlled variables */ 203 int netmap_verbose; 204 205 static int netmap_no_timestamp; /* don't timestamp on rxsync */ 206 207 SYSCTL_NODE(_dev, OID_AUTO, netmap, CTLFLAG_RW, 0, "Netmap args"); 208 SYSCTL_INT(_dev_netmap, OID_AUTO, verbose, 209 CTLFLAG_RW, &netmap_verbose, 0, "Verbose mode"); 210 SYSCTL_INT(_dev_netmap, OID_AUTO, no_timestamp, 211 CTLFLAG_RW, &netmap_no_timestamp, 0, "no_timestamp"); 212 int netmap_mitigate = 1; 213 SYSCTL_INT(_dev_netmap, OID_AUTO, mitigate, CTLFLAG_RW, &netmap_mitigate, 0, ""); 214 int netmap_no_pendintr = 1; 215 SYSCTL_INT(_dev_netmap, OID_AUTO, no_pendintr, 216 CTLFLAG_RW, &netmap_no_pendintr, 0, "Always look for new received packets."); 217 int netmap_txsync_retry = 2; 218 SYSCTL_INT(_dev_netmap, OID_AUTO, txsync_retry, CTLFLAG_RW, 219 &netmap_txsync_retry, 0 , "Number of txsync loops in bridge's flush."); 220 221 int netmap_flags = 0; /* debug flags */ 222 int netmap_fwd = 0; /* force transparent mode */ 223 int netmap_mmap_unreg = 0; /* allow mmap of unregistered fds */ 224 225 /* 226 * netmap_admode selects the netmap mode to use. 227 * Invalid values are reset to NETMAP_ADMODE_BEST 228 */ 229 enum { NETMAP_ADMODE_BEST = 0, /* use native, fallback to generic */ 230 NETMAP_ADMODE_NATIVE, /* either native or none */ 231 NETMAP_ADMODE_GENERIC, /* force generic */ 232 NETMAP_ADMODE_LAST }; 233 static int netmap_admode = NETMAP_ADMODE_BEST; 234 235 int netmap_generic_mit = 100*1000; /* Generic mitigation interval in nanoseconds. */ 236 int netmap_generic_ringsize = 1024; /* Generic ringsize. */ 237 int netmap_generic_rings = 1; /* number of queues in generic. */ 238 239 SYSCTL_INT(_dev_netmap, OID_AUTO, flags, CTLFLAG_RW, &netmap_flags, 0 , ""); 240 SYSCTL_INT(_dev_netmap, OID_AUTO, fwd, CTLFLAG_RW, &netmap_fwd, 0 , ""); 241 SYSCTL_INT(_dev_netmap, OID_AUTO, mmap_unreg, CTLFLAG_RW, &netmap_mmap_unreg, 0, ""); 242 SYSCTL_INT(_dev_netmap, OID_AUTO, admode, CTLFLAG_RW, &netmap_admode, 0 , ""); 243 SYSCTL_INT(_dev_netmap, OID_AUTO, generic_mit, CTLFLAG_RW, &netmap_generic_mit, 0 , ""); 244 SYSCTL_INT(_dev_netmap, OID_AUTO, generic_ringsize, CTLFLAG_RW, &netmap_generic_ringsize, 0 , ""); 245 SYSCTL_INT(_dev_netmap, OID_AUTO, generic_rings, CTLFLAG_RW, &netmap_generic_rings, 0 , ""); 246 247 NMG_LOCK_T netmap_global_lock; 248 249 250 static void 251 nm_kr_get(struct netmap_kring *kr) 252 { 253 while (NM_ATOMIC_TEST_AND_SET(&kr->nr_busy)) 254 tsleep(kr, 0, "NM_KR_GET", 4); 255 } 256 257 258 /* 259 * mark the ring as stopped, and run through the locks 260 * to make sure other users get to see it. 261 */ 262 void 263 netmap_disable_ring(struct netmap_kring *kr) 264 { 265 kr->nkr_stopped = 1; 266 nm_kr_get(kr); 267 mtx_lock(&kr->q_lock); 268 mtx_unlock(&kr->q_lock); 269 nm_kr_put(kr); 270 } 271 272 273 /* stop or enable all the rings of na */ 274 static void 275 netmap_set_all_rings(struct ifnet *ifp, int stopped) 276 { 277 struct netmap_adapter *na; 278 int i; 279 u_int ntx, nrx; 280 281 if (!(ifp->if_capenable & IFCAP_NETMAP)) 282 return; 283 284 na = NA(ifp); 285 286 ntx = netmap_real_tx_rings(na); 287 nrx = netmap_real_rx_rings(na); 288 289 for (i = 0; i < ntx; i++) { 290 if (stopped) 291 netmap_disable_ring(na->tx_rings + i); 292 else 293 na->tx_rings[i].nkr_stopped = 0; 294 na->nm_notify(na, i, NR_TX, NAF_DISABLE_NOTIFY); 295 } 296 297 for (i = 0; i < nrx; i++) { 298 if (stopped) 299 netmap_disable_ring(na->rx_rings + i); 300 else 301 na->rx_rings[i].nkr_stopped = 0; 302 na->nm_notify(na, i, NR_RX, NAF_DISABLE_NOTIFY); 303 } 304 } 305 306 307 /* 308 * Convenience function used in drivers. Waits for current txsync()s/rxsync()s 309 * to finish and prevents any new one from starting. Call this before turning 310 * netmap mode off, or before removing the harware rings (e.g., on module 311 * onload). As a rule of thumb for linux drivers, this should be placed near 312 * each napi_disable(). 313 */ 314 void 315 netmap_disable_all_rings(struct ifnet *ifp) 316 { 317 netmap_set_all_rings(ifp, 1 /* stopped */); 318 } 319 320 321 /* 322 * Convenience function used in drivers. Re-enables rxsync and txsync on the 323 * adapter's rings In linux drivers, this should be placed near each 324 * napi_enable(). 325 */ 326 void 327 netmap_enable_all_rings(struct ifnet *ifp) 328 { 329 netmap_set_all_rings(ifp, 0 /* enabled */); 330 } 331 332 333 /* 334 * generic bound_checking function 335 */ 336 u_int 337 nm_bound_var(u_int *v, u_int dflt, u_int lo, u_int hi, const char *msg) 338 { 339 u_int oldv = *v; 340 const char *op = NULL; 341 342 if (dflt < lo) 343 dflt = lo; 344 if (dflt > hi) 345 dflt = hi; 346 if (oldv < lo) { 347 *v = dflt; 348 op = "Bump"; 349 } else if (oldv > hi) { 350 *v = hi; 351 op = "Clamp"; 352 } 353 if (op && msg) 354 printf("%s %s to %d (was %d)\n", op, msg, *v, oldv); 355 return *v; 356 } 357 358 359 /* 360 * packet-dump function, user-supplied or static buffer. 361 * The destination buffer must be at least 30+4*len 362 */ 363 const char * 364 nm_dump_buf(char *p, int len, int lim, char *dst) 365 { 366 static char _dst[8192]; 367 int i, j, i0; 368 static char hex[] ="0123456789abcdef"; 369 char *o; /* output position */ 370 371 #define P_HI(x) hex[((x) & 0xf0)>>4] 372 #define P_LO(x) hex[((x) & 0xf)] 373 #define P_C(x) ((x) >= 0x20 && (x) <= 0x7e ? (x) : '.') 374 if (!dst) 375 dst = _dst; 376 if (lim <= 0 || lim > len) 377 lim = len; 378 o = dst; 379 sprintf(o, "buf 0x%p len %d lim %d\n", p, len, lim); 380 o += strlen(o); 381 /* hexdump routine */ 382 for (i = 0; i < lim; ) { 383 sprintf(o, "%5d: ", i); 384 o += strlen(o); 385 memset(o, ' ', 48); 386 i0 = i; 387 for (j=0; j < 16 && i < lim; i++, j++) { 388 o[j*3] = P_HI(p[i]); 389 o[j*3+1] = P_LO(p[i]); 390 } 391 i = i0; 392 for (j=0; j < 16 && i < lim; i++, j++) 393 o[j + 48] = P_C(p[i]); 394 o[j+48] = '\n'; 395 o += j+49; 396 } 397 *o = '\0'; 398 #undef P_HI 399 #undef P_LO 400 #undef P_C 401 return dst; 402 } 403 404 405 /* 406 * Fetch configuration from the device, to cope with dynamic 407 * reconfigurations after loading the module. 408 */ 409 /* call with NMG_LOCK held */ 410 int 411 netmap_update_config(struct netmap_adapter *na) 412 { 413 struct ifnet *ifp = na->ifp; 414 u_int txr, txd, rxr, rxd; 415 416 txr = txd = rxr = rxd = 0; 417 if (na->nm_config) { 418 na->nm_config(na, &txr, &txd, &rxr, &rxd); 419 } else { 420 /* take whatever we had at init time */ 421 txr = na->num_tx_rings; 422 txd = na->num_tx_desc; 423 rxr = na->num_rx_rings; 424 rxd = na->num_rx_desc; 425 } 426 427 if (na->num_tx_rings == txr && na->num_tx_desc == txd && 428 na->num_rx_rings == rxr && na->num_rx_desc == rxd) 429 return 0; /* nothing changed */ 430 if (netmap_verbose || na->active_fds > 0) { 431 D("stored config %s: txring %d x %d, rxring %d x %d", 432 NM_IFPNAME(ifp), 433 na->num_tx_rings, na->num_tx_desc, 434 na->num_rx_rings, na->num_rx_desc); 435 D("new config %s: txring %d x %d, rxring %d x %d", 436 NM_IFPNAME(ifp), txr, txd, rxr, rxd); 437 } 438 if (na->active_fds == 0) { 439 D("configuration changed (but fine)"); 440 na->num_tx_rings = txr; 441 na->num_tx_desc = txd; 442 na->num_rx_rings = rxr; 443 na->num_rx_desc = rxd; 444 return 0; 445 } 446 D("configuration changed while active, this is bad..."); 447 return 1; 448 } 449 450 static int 451 netmap_txsync_compat(struct netmap_kring *kring, int flags) 452 { 453 struct netmap_adapter *na = kring->na; 454 return na->nm_txsync(na, kring->ring_id, flags); 455 } 456 457 static int 458 netmap_rxsync_compat(struct netmap_kring *kring, int flags) 459 { 460 struct netmap_adapter *na = kring->na; 461 return na->nm_rxsync(na, kring->ring_id, flags); 462 } 463 464 /* kring->nm_sync callback for the host tx ring */ 465 static int 466 netmap_txsync_to_host_compat(struct netmap_kring *kring, int flags) 467 { 468 (void)flags; /* unused */ 469 netmap_txsync_to_host(kring->na); 470 return 0; 471 } 472 473 /* kring->nm_sync callback for the host rx ring */ 474 static int 475 netmap_rxsync_from_host_compat(struct netmap_kring *kring, int flags) 476 { 477 (void)flags; /* unused */ 478 netmap_rxsync_from_host(kring->na, NULL, NULL); 479 return 0; 480 } 481 482 483 484 /* create the krings array and initialize the fields common to all adapters. 485 * The array layout is this: 486 * 487 * +----------+ 488 * na->tx_rings ----->| | \ 489 * | | } na->num_tx_ring 490 * | | / 491 * +----------+ 492 * | | host tx kring 493 * na->rx_rings ----> +----------+ 494 * | | \ 495 * | | } na->num_rx_rings 496 * | | / 497 * +----------+ 498 * | | host rx kring 499 * +----------+ 500 * na->tailroom ----->| | \ 501 * | | } tailroom bytes 502 * | | / 503 * +----------+ 504 * 505 * Note: for compatibility, host krings are created even when not needed. 506 * The tailroom space is currently used by vale ports for allocating leases. 507 */ 508 /* call with NMG_LOCK held */ 509 int 510 netmap_krings_create(struct netmap_adapter *na, u_int tailroom) 511 { 512 u_int i, len, ndesc; 513 struct netmap_kring *kring; 514 u_int ntx, nrx; 515 516 /* account for the (possibly fake) host rings */ 517 ntx = na->num_tx_rings + 1; 518 nrx = na->num_rx_rings + 1; 519 520 len = (ntx + nrx) * sizeof(struct netmap_kring) + tailroom; 521 522 na->tx_rings = malloc((size_t)len, M_DEVBUF, M_NOWAIT | M_ZERO); 523 if (na->tx_rings == NULL) { 524 D("Cannot allocate krings"); 525 return ENOMEM; 526 } 527 na->rx_rings = na->tx_rings + ntx; 528 529 /* 530 * All fields in krings are 0 except the one initialized below. 531 * but better be explicit on important kring fields. 532 */ 533 ndesc = na->num_tx_desc; 534 for (i = 0; i < ntx; i++) { /* Transmit rings */ 535 kring = &na->tx_rings[i]; 536 bzero(kring, sizeof(*kring)); 537 kring->na = na; 538 kring->ring_id = i; 539 kring->nkr_num_slots = ndesc; 540 if (i < na->num_tx_rings) { 541 kring->nm_sync = netmap_txsync_compat; // XXX 542 } else if (i == na->num_tx_rings) { 543 kring->nm_sync = netmap_txsync_to_host_compat; 544 } 545 /* 546 * IMPORTANT: Always keep one slot empty. 547 */ 548 kring->rhead = kring->rcur = kring->nr_hwcur = 0; 549 kring->rtail = kring->nr_hwtail = ndesc - 1; 550 snprintf(kring->name, sizeof(kring->name) - 1, "%s TX%d", NM_IFPNAME(na->ifp), i); 551 ND("ktx %s h %d c %d t %d", 552 kring->name, kring->rhead, kring->rcur, kring->rtail); 553 mtx_init(&kring->q_lock, "nm_txq_lock", NULL, MTX_DEF); 554 init_waitqueue_head(&kring->si); 555 } 556 557 ndesc = na->num_rx_desc; 558 for (i = 0; i < nrx; i++) { /* Receive rings */ 559 kring = &na->rx_rings[i]; 560 bzero(kring, sizeof(*kring)); 561 kring->na = na; 562 kring->ring_id = i; 563 kring->nkr_num_slots = ndesc; 564 if (i < na->num_rx_rings) { 565 kring->nm_sync = netmap_rxsync_compat; // XXX 566 } else if (i == na->num_rx_rings) { 567 kring->nm_sync = netmap_rxsync_from_host_compat; 568 } 569 kring->rhead = kring->rcur = kring->nr_hwcur = 0; 570 kring->rtail = kring->nr_hwtail = 0; 571 snprintf(kring->name, sizeof(kring->name) - 1, "%s RX%d", NM_IFPNAME(na->ifp), i); 572 ND("krx %s h %d c %d t %d", 573 kring->name, kring->rhead, kring->rcur, kring->rtail); 574 mtx_init(&kring->q_lock, "nm_rxq_lock", NULL, MTX_DEF); 575 init_waitqueue_head(&kring->si); 576 } 577 init_waitqueue_head(&na->tx_si); 578 init_waitqueue_head(&na->rx_si); 579 580 na->tailroom = na->rx_rings + nrx; 581 582 return 0; 583 } 584 585 586 /* undo the actions performed by netmap_krings_create */ 587 /* call with NMG_LOCK held */ 588 void 589 netmap_krings_delete(struct netmap_adapter *na) 590 { 591 struct netmap_kring *kring = na->tx_rings; 592 593 /* we rely on the krings layout described above */ 594 for ( ; kring != na->tailroom; kring++) { 595 mtx_destroy(&kring->q_lock); 596 } 597 free(na->tx_rings, M_DEVBUF); 598 na->tx_rings = na->rx_rings = na->tailroom = NULL; 599 } 600 601 602 /* 603 * Destructor for NIC ports. They also have an mbuf queue 604 * on the rings connected to the host so we need to purge 605 * them first. 606 */ 607 /* call with NMG_LOCK held */ 608 static void 609 netmap_hw_krings_delete(struct netmap_adapter *na) 610 { 611 struct mbq *q = &na->rx_rings[na->num_rx_rings].rx_queue; 612 613 ND("destroy sw mbq with len %d", mbq_len(q)); 614 mbq_purge(q); 615 mbq_safe_destroy(q); 616 netmap_krings_delete(na); 617 } 618 619 620 /* create a new netmap_if for a newly registered fd. 621 * If this is the first registration of the adapter, 622 * also create the netmap rings and their in-kernel view, 623 * the netmap krings. 624 */ 625 /* call with NMG_LOCK held */ 626 static struct netmap_if* 627 netmap_if_new(const char *ifname, struct netmap_adapter *na) 628 { 629 struct netmap_if *nifp; 630 631 if (netmap_update_config(na)) { 632 /* configuration mismatch, report and fail */ 633 return NULL; 634 } 635 636 if (na->active_fds) /* already registered */ 637 goto final; 638 639 /* create and init the krings arrays. 640 * Depending on the adapter, this may also create 641 * the netmap rings themselves 642 */ 643 if (na->nm_krings_create(na)) 644 goto cleanup; 645 646 /* create all missing netmap rings */ 647 if (netmap_mem_rings_create(na)) 648 goto cleanup; 649 650 final: 651 652 /* in all cases, create a new netmap if */ 653 nifp = netmap_mem_if_new(ifname, na); 654 if (nifp == NULL) 655 goto cleanup; 656 657 return (nifp); 658 659 cleanup: 660 661 if (na->active_fds == 0) { 662 netmap_mem_rings_delete(na); 663 na->nm_krings_delete(na); 664 } 665 666 return NULL; 667 } 668 669 670 /* grab a reference to the memory allocator, if we don't have one already. The 671 * reference is taken from the netmap_adapter registered with the priv. 672 */ 673 /* call with NMG_LOCK held */ 674 static int 675 netmap_get_memory_locked(struct netmap_priv_d* p) 676 { 677 struct netmap_mem_d *nmd; 678 int error = 0; 679 680 if (p->np_na == NULL) { 681 if (!netmap_mmap_unreg) 682 return ENODEV; 683 /* for compatibility with older versions of the API 684 * we use the global allocator when no interface has been 685 * registered 686 */ 687 nmd = &nm_mem; 688 } else { 689 nmd = p->np_na->nm_mem; 690 } 691 if (p->np_mref == NULL) { 692 error = netmap_mem_finalize(nmd); 693 if (!error) 694 p->np_mref = nmd; 695 } else if (p->np_mref != nmd) { 696 /* a virtual port has been registered, but previous 697 * syscalls already used the global allocator. 698 * We cannot continue 699 */ 700 error = ENODEV; 701 } 702 return error; 703 } 704 705 706 /* call with NMG_LOCK *not* held */ 707 int 708 netmap_get_memory(struct netmap_priv_d* p) 709 { 710 int error; 711 NMG_LOCK(); 712 error = netmap_get_memory_locked(p); 713 NMG_UNLOCK(); 714 return error; 715 } 716 717 718 /* call with NMG_LOCK held */ 719 static int 720 netmap_have_memory_locked(struct netmap_priv_d* p) 721 { 722 return p->np_mref != NULL; 723 } 724 725 726 /* call with NMG_LOCK held */ 727 static void 728 netmap_drop_memory_locked(struct netmap_priv_d* p) 729 { 730 if (p->np_mref) { 731 netmap_mem_deref(p->np_mref); 732 p->np_mref = NULL; 733 } 734 } 735 736 737 /* 738 * File descriptor's private data destructor. 739 * 740 * Call nm_register(ifp,0) to stop netmap mode on the interface and 741 * revert to normal operation. We expect that np_na->ifp has not gone. 742 * The second argument is the nifp to work on. In some cases it is 743 * not attached yet to the netmap_priv_d so we need to pass it as 744 * a separate argument. 745 */ 746 /* call with NMG_LOCK held */ 747 static void 748 netmap_do_unregif(struct netmap_priv_d *priv, struct netmap_if *nifp) 749 { 750 struct netmap_adapter *na = priv->np_na; 751 struct ifnet *ifp = na->ifp; 752 753 NMG_LOCK_ASSERT(); 754 na->active_fds--; 755 if (na->active_fds <= 0) { /* last instance */ 756 757 if (netmap_verbose) 758 D("deleting last instance for %s", NM_IFPNAME(ifp)); 759 /* 760 * (TO CHECK) This function is only called 761 * when the last reference to this file descriptor goes 762 * away. This means we cannot have any pending poll() 763 * or interrupt routine operating on the structure. 764 * XXX The file may be closed in a thread while 765 * another thread is using it. 766 * Linux keeps the file opened until the last reference 767 * by any outstanding ioctl/poll or mmap is gone. 768 * FreeBSD does not track mmap()s (but we do) and 769 * wakes up any sleeping poll(). Need to check what 770 * happens if the close() occurs while a concurrent 771 * syscall is running. 772 */ 773 if (ifp) 774 na->nm_register(na, 0); /* off, clear flags */ 775 /* Wake up any sleeping threads. netmap_poll will 776 * then return POLLERR 777 * XXX The wake up now must happen during *_down(), when 778 * we order all activities to stop. -gl 779 */ 780 /* XXX kqueue(9) needed; these will mirror knlist_init. */ 781 /* knlist_destroy(&na->tx_si.si_note); */ 782 /* knlist_destroy(&na->rx_si.si_note); */ 783 784 /* delete rings and buffers */ 785 netmap_mem_rings_delete(na); 786 na->nm_krings_delete(na); 787 } 788 /* delete the nifp */ 789 netmap_mem_if_delete(na, nifp); 790 } 791 792 /* call with NMG_LOCK held */ 793 static __inline int 794 nm_tx_si_user(struct netmap_priv_d *priv) 795 { 796 return (priv->np_na != NULL && 797 (priv->np_txqlast - priv->np_txqfirst > 1)); 798 } 799 800 /* call with NMG_LOCK held */ 801 static __inline int 802 nm_rx_si_user(struct netmap_priv_d *priv) 803 { 804 return (priv->np_na != NULL && 805 (priv->np_rxqlast - priv->np_rxqfirst > 1)); 806 } 807 808 809 /* 810 * Destructor of the netmap_priv_d, called when the fd has 811 * no active open() and mmap(). Also called in error paths. 812 * 813 * returns 1 if this is the last instance and we can free priv 814 */ 815 /* call with NMG_LOCK held */ 816 int 817 netmap_dtor_locked(struct netmap_priv_d *priv) 818 { 819 struct netmap_adapter *na = priv->np_na; 820 821 #ifdef __FreeBSD__ 822 /* 823 * np_refcount is the number of active mmaps on 824 * this file descriptor 825 */ 826 if (--priv->np_refcount > 0) { 827 return 0; 828 } 829 #endif /* __FreeBSD__ */ 830 if (!na) { 831 return 1; //XXX is it correct? 832 } 833 netmap_do_unregif(priv, priv->np_nifp); 834 priv->np_nifp = NULL; 835 netmap_drop_memory_locked(priv); 836 if (priv->np_na) { 837 if (nm_tx_si_user(priv)) 838 na->tx_si_users--; 839 if (nm_rx_si_user(priv)) 840 na->rx_si_users--; 841 netmap_adapter_put(na); 842 priv->np_na = NULL; 843 } 844 return 1; 845 } 846 847 848 /* call with NMG_LOCK *not* held */ 849 void 850 netmap_dtor(void *data) 851 { 852 struct netmap_priv_d *priv = data; 853 int last_instance; 854 855 NMG_LOCK(); 856 last_instance = netmap_dtor_locked(priv); 857 NMG_UNLOCK(); 858 if (last_instance) { 859 bzero(priv, sizeof(*priv)); /* for safety */ 860 free(priv, M_DEVBUF); 861 } 862 } 863 864 865 866 867 /* 868 * Handlers for synchronization of the queues from/to the host. 869 * Netmap has two operating modes: 870 * - in the default mode, the rings connected to the host stack are 871 * just another ring pair managed by userspace; 872 * - in transparent mode (XXX to be defined) incoming packets 873 * (from the host or the NIC) are marked as NS_FORWARD upon 874 * arrival, and the user application has a chance to reset the 875 * flag for packets that should be dropped. 876 * On the RXSYNC or poll(), packets in RX rings between 877 * kring->nr_kcur and ring->cur with NS_FORWARD still set are moved 878 * to the other side. 879 * The transfer NIC --> host is relatively easy, just encapsulate 880 * into mbufs and we are done. The host --> NIC side is slightly 881 * harder because there might not be room in the tx ring so it 882 * might take a while before releasing the buffer. 883 */ 884 885 886 /* 887 * pass a chain of buffers to the host stack as coming from 'dst' 888 * We do not need to lock because the queue is private. 889 */ 890 static void 891 netmap_send_up(struct ifnet *dst, struct mbq *q) 892 { 893 struct mbuf *m; 894 895 /* send packets up, outside the lock */ 896 while ((m = mbq_dequeue(q)) != NULL) { 897 if (netmap_verbose & NM_VERB_HOST) 898 D("sending up pkt %p size %d", m, MBUF_LEN(m)); 899 NM_SEND_UP(dst, m); 900 } 901 mbq_destroy(q); 902 } 903 904 905 /* 906 * put a copy of the buffers marked NS_FORWARD into an mbuf chain. 907 * Take packets from hwcur to ring->head marked NS_FORWARD (or forced) 908 * and pass them up. Drop remaining packets in the unlikely event 909 * of an mbuf shortage. 910 */ 911 static void 912 netmap_grab_packets(struct netmap_kring *kring, struct mbq *q, int force) 913 { 914 u_int const lim = kring->nkr_num_slots - 1; 915 u_int const head = kring->ring->head; 916 u_int n; 917 struct netmap_adapter *na = kring->na; 918 919 for (n = kring->nr_hwcur; n != head; n = nm_next(n, lim)) { 920 struct mbuf *m; 921 struct netmap_slot *slot = &kring->ring->slot[n]; 922 923 if ((slot->flags & NS_FORWARD) == 0 && !force) 924 continue; 925 if (slot->len < 14 || slot->len > NETMAP_BDG_BUF_SIZE(na->nm_mem)) { 926 RD(5, "bad pkt at %d len %d", n, slot->len); 927 continue; 928 } 929 slot->flags &= ~NS_FORWARD; // XXX needed ? 930 /* XXX TODO: adapt to the case of a multisegment packet */ 931 m = m_devget(BDG_NMB(na, slot), slot->len, 0, na->ifp, NULL); 932 933 if (m == NULL) 934 break; 935 mbq_enqueue(q, m); 936 } 937 } 938 939 940 /* 941 * Send to the NIC rings packets marked NS_FORWARD between 942 * kring->nr_hwcur and kring->rhead 943 * Called under kring->rx_queue.lock on the sw rx ring, 944 */ 945 static u_int 946 netmap_sw_to_nic(struct netmap_adapter *na) 947 { 948 struct netmap_kring *kring = &na->rx_rings[na->num_rx_rings]; 949 struct netmap_slot *rxslot = kring->ring->slot; 950 u_int i, rxcur = kring->nr_hwcur; 951 u_int const head = kring->rhead; 952 u_int const src_lim = kring->nkr_num_slots - 1; 953 u_int sent = 0; 954 955 /* scan rings to find space, then fill as much as possible */ 956 for (i = 0; i < na->num_tx_rings; i++) { 957 struct netmap_kring *kdst = &na->tx_rings[i]; 958 struct netmap_ring *rdst = kdst->ring; 959 u_int const dst_lim = kdst->nkr_num_slots - 1; 960 961 /* XXX do we trust ring or kring->rcur,rtail ? */ 962 for (; rxcur != head && !nm_ring_empty(rdst); 963 rxcur = nm_next(rxcur, src_lim) ) { 964 struct netmap_slot *src, *dst, tmp; 965 u_int dst_cur = rdst->cur; 966 967 src = &rxslot[rxcur]; 968 if ((src->flags & NS_FORWARD) == 0 && !netmap_fwd) 969 continue; 970 971 sent++; 972 973 dst = &rdst->slot[dst_cur]; 974 975 tmp = *src; 976 977 src->buf_idx = dst->buf_idx; 978 src->flags = NS_BUF_CHANGED; 979 980 dst->buf_idx = tmp.buf_idx; 981 dst->len = tmp.len; 982 dst->flags = NS_BUF_CHANGED; 983 984 rdst->cur = nm_next(dst_cur, dst_lim); 985 } 986 /* if (sent) XXX txsync ? */ 987 } 988 return sent; 989 } 990 991 992 /* 993 * netmap_txsync_to_host() passes packets up. We are called from a 994 * system call in user process context, and the only contention 995 * can be among multiple user threads erroneously calling 996 * this routine concurrently. 997 */ 998 void 999 netmap_txsync_to_host(struct netmap_adapter *na) 1000 { 1001 struct netmap_kring *kring = &na->tx_rings[na->num_tx_rings]; 1002 struct netmap_ring *ring = kring->ring; 1003 u_int const lim = kring->nkr_num_slots - 1; 1004 u_int const head = kring->rhead; 1005 struct mbq q; 1006 1007 /* Take packets from hwcur to head and pass them up. 1008 * force head = cur since netmap_grab_packets() stops at head 1009 * In case of no buffers we give up. At the end of the loop, 1010 * the queue is drained in all cases. 1011 */ 1012 mbq_init(&q); 1013 ring->cur = head; 1014 netmap_grab_packets(kring, &q, 1 /* force */); 1015 ND("have %d pkts in queue", mbq_len(&q)); 1016 kring->nr_hwcur = head; 1017 kring->nr_hwtail = head + lim; 1018 if (kring->nr_hwtail > lim) 1019 kring->nr_hwtail -= lim + 1; 1020 nm_txsync_finalize(kring); 1021 1022 netmap_send_up(na->ifp, &q); 1023 } 1024 1025 1026 /* 1027 * rxsync backend for packets coming from the host stack. 1028 * They have been put in kring->rx_queue by netmap_transmit(). 1029 * We protect access to the kring using kring->rx_queue.lock 1030 * 1031 * This routine also does the selrecord if called from the poll handler 1032 * (we know because td != NULL). 1033 * 1034 * NOTE: on linux, selrecord() is defined as a macro and uses pwait 1035 * as an additional hidden argument. 1036 * returns the number of packets delivered to tx queues in 1037 * transparent mode, or a negative value if error 1038 */ 1039 int 1040 netmap_rxsync_from_host(struct netmap_adapter *na, struct thread *td, void *pwait) 1041 { 1042 struct netmap_kring *kring = &na->rx_rings[na->num_rx_rings]; 1043 struct netmap_ring *ring = kring->ring; 1044 u_int nm_i, n; 1045 u_int const lim = kring->nkr_num_slots - 1; 1046 u_int const head = kring->rhead; 1047 int ret = 0; 1048 struct mbq *q = &kring->rx_queue; 1049 1050 (void)pwait; /* disable unused warnings */ 1051 (void)td; 1052 1053 mbq_lock(q); 1054 1055 /* First part: import newly received packets */ 1056 n = mbq_len(q); 1057 if (n) { /* grab packets from the queue */ 1058 struct mbuf *m; 1059 uint32_t stop_i; 1060 1061 nm_i = kring->nr_hwtail; 1062 stop_i = nm_prev(nm_i, lim); 1063 while ( nm_i != stop_i && (m = mbq_dequeue(q)) != NULL ) { 1064 int len = MBUF_LEN(m); 1065 struct netmap_slot *slot = &ring->slot[nm_i]; 1066 1067 m_copydata(m, 0, len, BDG_NMB(na, slot)); 1068 ND("nm %d len %d", nm_i, len); 1069 if (netmap_verbose) 1070 D("%s", nm_dump_buf(BDG_NMB(na, slot),len, 128, NULL)); 1071 1072 slot->len = len; 1073 slot->flags = kring->nkr_slot_flags; 1074 nm_i = nm_next(nm_i, lim); 1075 } 1076 kring->nr_hwtail = nm_i; 1077 } 1078 1079 /* 1080 * Second part: skip past packets that userspace has released. 1081 */ 1082 nm_i = kring->nr_hwcur; 1083 if (nm_i != head) { /* something was released */ 1084 if (netmap_fwd || kring->ring->flags & NR_FORWARD) 1085 ret = netmap_sw_to_nic(na); 1086 kring->nr_hwcur = head; 1087 } 1088 1089 nm_rxsync_finalize(kring); 1090 1091 /* access copies of cur,tail in the kring */ 1092 if (kring->rcur == kring->rtail && td) /* no bufs available */ 1093 selrecord(td, &kring->si); 1094 1095 mbq_unlock(q); 1096 return ret; 1097 } 1098 1099 1100 /* Get a netmap adapter for the port. 1101 * 1102 * If it is possible to satisfy the request, return 0 1103 * with *na containing the netmap adapter found. 1104 * Otherwise return an error code, with *na containing NULL. 1105 * 1106 * When the port is attached to a bridge, we always return 1107 * EBUSY. 1108 * Otherwise, if the port is already bound to a file descriptor, 1109 * then we unconditionally return the existing adapter into *na. 1110 * In all the other cases, we return (into *na) either native, 1111 * generic or NULL, according to the following table: 1112 * 1113 * native_support 1114 * active_fds dev.netmap.admode YES NO 1115 * ------------------------------------------------------- 1116 * >0 * NA(ifp) NA(ifp) 1117 * 1118 * 0 NETMAP_ADMODE_BEST NATIVE GENERIC 1119 * 0 NETMAP_ADMODE_NATIVE NATIVE NULL 1120 * 0 NETMAP_ADMODE_GENERIC GENERIC GENERIC 1121 * 1122 */ 1123 1124 int 1125 netmap_get_hw_na(struct ifnet *ifp, struct netmap_adapter **na) 1126 { 1127 /* generic support */ 1128 int i = netmap_admode; /* Take a snapshot. */ 1129 int error = 0; 1130 struct netmap_adapter *prev_na; 1131 struct netmap_generic_adapter *gna; 1132 1133 *na = NULL; /* default */ 1134 1135 /* reset in case of invalid value */ 1136 if (i < NETMAP_ADMODE_BEST || i >= NETMAP_ADMODE_LAST) 1137 i = netmap_admode = NETMAP_ADMODE_BEST; 1138 1139 if (NETMAP_CAPABLE(ifp)) { 1140 /* If an adapter already exists, but is 1141 * attached to a vale port, we report that the 1142 * port is busy. 1143 */ 1144 if (NETMAP_OWNED_BY_KERN(NA(ifp))) 1145 return EBUSY; 1146 1147 /* If an adapter already exists, return it if 1148 * there are active file descriptors or if 1149 * netmap is not forced to use generic 1150 * adapters. 1151 */ 1152 if (NA(ifp)->active_fds > 0 || 1153 i != NETMAP_ADMODE_GENERIC) { 1154 *na = NA(ifp); 1155 return 0; 1156 } 1157 } 1158 1159 /* If there isn't native support and netmap is not allowed 1160 * to use generic adapters, we cannot satisfy the request. 1161 */ 1162 if (!NETMAP_CAPABLE(ifp) && i == NETMAP_ADMODE_NATIVE) 1163 return EOPNOTSUPP; 1164 1165 /* Otherwise, create a generic adapter and return it, 1166 * saving the previously used netmap adapter, if any. 1167 * 1168 * Note that here 'prev_na', if not NULL, MUST be a 1169 * native adapter, and CANNOT be a generic one. This is 1170 * true because generic adapters are created on demand, and 1171 * destroyed when not used anymore. Therefore, if the adapter 1172 * currently attached to an interface 'ifp' is generic, it 1173 * must be that 1174 * (NA(ifp)->active_fds > 0 || NETMAP_OWNED_BY_KERN(NA(ifp))). 1175 * Consequently, if NA(ifp) is generic, we will enter one of 1176 * the branches above. This ensures that we never override 1177 * a generic adapter with another generic adapter. 1178 */ 1179 prev_na = NA(ifp); 1180 error = generic_netmap_attach(ifp); 1181 if (error) 1182 return error; 1183 1184 *na = NA(ifp); 1185 gna = (struct netmap_generic_adapter*)NA(ifp); 1186 gna->prev = prev_na; /* save old na */ 1187 if (prev_na != NULL) { 1188 ifunit_ref(ifp->if_xname); 1189 // XXX add a refcount ? 1190 netmap_adapter_get(prev_na); 1191 } 1192 ND("Created generic NA %p (prev %p)", gna, gna->prev); 1193 1194 return 0; 1195 } 1196 1197 1198 /* 1199 * MUST BE CALLED UNDER NMG_LOCK() 1200 * 1201 * Get a refcounted reference to a netmap adapter attached 1202 * to the interface specified by nmr. 1203 * This is always called in the execution of an ioctl(). 1204 * 1205 * Return ENXIO if the interface specified by the request does 1206 * not exist, ENOTSUP if netmap is not supported by the interface, 1207 * EBUSY if the interface is already attached to a bridge, 1208 * EINVAL if parameters are invalid, ENOMEM if needed resources 1209 * could not be allocated. 1210 * If successful, hold a reference to the netmap adapter. 1211 * 1212 * No reference is kept on the real interface, which may then 1213 * disappear at any time. 1214 */ 1215 int 1216 netmap_get_na(struct nmreq *nmr, struct netmap_adapter **na, int create) 1217 { 1218 struct ifnet *ifp = NULL; 1219 int error = 0; 1220 struct netmap_adapter *ret = NULL; 1221 1222 *na = NULL; /* default return value */ 1223 1224 /* first try to see if this is a bridge port. */ 1225 NMG_LOCK_ASSERT(); 1226 1227 error = netmap_get_pipe_na(nmr, na, create); 1228 if (error || *na != NULL) 1229 return error; 1230 1231 error = netmap_get_bdg_na(nmr, na, create); 1232 if (error) 1233 return error; 1234 1235 if (*na != NULL) /* valid match in netmap_get_bdg_na() */ 1236 goto pipes; 1237 1238 /* 1239 * This must be a hardware na, lookup the name in the system. 1240 * Note that by hardware we actually mean "it shows up in ifconfig". 1241 * This may still be a tap, a veth/epair, or even a 1242 * persistent VALE port. 1243 */ 1244 ifp = ifunit_ref(nmr->nr_name); 1245 if (ifp == NULL) { 1246 return ENXIO; 1247 } 1248 1249 error = netmap_get_hw_na(ifp, &ret); 1250 if (error) 1251 goto out; 1252 1253 /* Users cannot use the NIC attached to a bridge directly */ 1254 if (NETMAP_OWNED_BY_KERN(ret)) { 1255 error = EBUSY; 1256 goto out; 1257 } 1258 *na = ret; 1259 netmap_adapter_get(ret); 1260 1261 pipes: 1262 /* 1263 * If we are opening a pipe whose parent was not in netmap mode, 1264 * we have to allocate the pipe array now. 1265 * XXX get rid of this clumsiness (2014-03-15) 1266 */ 1267 error = netmap_pipe_alloc(*na, nmr); 1268 1269 out: 1270 if (error && ret != NULL) 1271 netmap_adapter_put(ret); 1272 1273 if (ifp) 1274 if_rele(ifp); /* allow live unloading of drivers modules */ 1275 1276 return error; 1277 } 1278 1279 1280 /* 1281 * validate parameters on entry for *_txsync() 1282 * Returns ring->cur if ok, or something >= kring->nkr_num_slots 1283 * in case of error. 1284 * 1285 * rhead, rcur and rtail=hwtail are stored from previous round. 1286 * hwcur is the next packet to send to the ring. 1287 * 1288 * We want 1289 * hwcur <= *rhead <= head <= cur <= tail = *rtail <= hwtail 1290 * 1291 * hwcur, rhead, rtail and hwtail are reliable 1292 */ 1293 u_int 1294 nm_txsync_prologue(struct netmap_kring *kring) 1295 { 1296 struct netmap_ring *ring = kring->ring; 1297 u_int head = ring->head; /* read only once */ 1298 u_int cur = ring->cur; /* read only once */ 1299 u_int n = kring->nkr_num_slots; 1300 1301 ND(5, "%s kcur %d ktail %d head %d cur %d tail %d", 1302 kring->name, 1303 kring->nr_hwcur, kring->nr_hwtail, 1304 ring->head, ring->cur, ring->tail); 1305 #if 1 /* kernel sanity checks; but we can trust the kring. */ 1306 if (kring->nr_hwcur >= n || kring->rhead >= n || 1307 kring->rtail >= n || kring->nr_hwtail >= n) 1308 goto error; 1309 #endif /* kernel sanity checks */ 1310 /* 1311 * user sanity checks. We only use 'cur', 1312 * A, B, ... are possible positions for cur: 1313 * 1314 * 0 A cur B tail C n-1 1315 * 0 D tail E cur F n-1 1316 * 1317 * B, F, D are valid. A, C, E are wrong 1318 */ 1319 if (kring->rtail >= kring->rhead) { 1320 /* want rhead <= head <= rtail */ 1321 if (head < kring->rhead || head > kring->rtail) 1322 goto error; 1323 /* and also head <= cur <= rtail */ 1324 if (cur < head || cur > kring->rtail) 1325 goto error; 1326 } else { /* here rtail < rhead */ 1327 /* we need head outside rtail .. rhead */ 1328 if (head > kring->rtail && head < kring->rhead) 1329 goto error; 1330 1331 /* two cases now: head <= rtail or head >= rhead */ 1332 if (head <= kring->rtail) { 1333 /* want head <= cur <= rtail */ 1334 if (cur < head || cur > kring->rtail) 1335 goto error; 1336 } else { /* head >= rhead */ 1337 /* cur must be outside rtail..head */ 1338 if (cur > kring->rtail && cur < head) 1339 goto error; 1340 } 1341 } 1342 if (ring->tail != kring->rtail) { 1343 RD(5, "tail overwritten was %d need %d", 1344 ring->tail, kring->rtail); 1345 ring->tail = kring->rtail; 1346 } 1347 kring->rhead = head; 1348 kring->rcur = cur; 1349 return head; 1350 1351 error: 1352 RD(5, "%s kring error: hwcur %d rcur %d hwtail %d cur %d tail %d", 1353 kring->name, 1354 kring->nr_hwcur, 1355 kring->rcur, kring->nr_hwtail, 1356 cur, ring->tail); 1357 return n; 1358 } 1359 1360 1361 /* 1362 * validate parameters on entry for *_rxsync() 1363 * Returns ring->head if ok, kring->nkr_num_slots on error. 1364 * 1365 * For a valid configuration, 1366 * hwcur <= head <= cur <= tail <= hwtail 1367 * 1368 * We only consider head and cur. 1369 * hwcur and hwtail are reliable. 1370 * 1371 */ 1372 u_int 1373 nm_rxsync_prologue(struct netmap_kring *kring) 1374 { 1375 struct netmap_ring *ring = kring->ring; 1376 uint32_t const n = kring->nkr_num_slots; 1377 uint32_t head, cur; 1378 1379 ND("%s kc %d kt %d h %d c %d t %d", 1380 kring->name, 1381 kring->nr_hwcur, kring->nr_hwtail, 1382 ring->head, ring->cur, ring->tail); 1383 /* 1384 * Before storing the new values, we should check they do not 1385 * move backwards. However: 1386 * - head is not an issue because the previous value is hwcur; 1387 * - cur could in principle go back, however it does not matter 1388 * because we are processing a brand new rxsync() 1389 */ 1390 cur = kring->rcur = ring->cur; /* read only once */ 1391 head = kring->rhead = ring->head; /* read only once */ 1392 #if 1 /* kernel sanity checks */ 1393 if (kring->nr_hwcur >= n || kring->nr_hwtail >= n) 1394 goto error; 1395 #endif /* kernel sanity checks */ 1396 /* user sanity checks */ 1397 if (kring->nr_hwtail >= kring->nr_hwcur) { 1398 /* want hwcur <= rhead <= hwtail */ 1399 if (head < kring->nr_hwcur || head > kring->nr_hwtail) 1400 goto error; 1401 /* and also rhead <= rcur <= hwtail */ 1402 if (cur < head || cur > kring->nr_hwtail) 1403 goto error; 1404 } else { 1405 /* we need rhead outside hwtail..hwcur */ 1406 if (head < kring->nr_hwcur && head > kring->nr_hwtail) 1407 goto error; 1408 /* two cases now: head <= hwtail or head >= hwcur */ 1409 if (head <= kring->nr_hwtail) { 1410 /* want head <= cur <= hwtail */ 1411 if (cur < head || cur > kring->nr_hwtail) 1412 goto error; 1413 } else { 1414 /* cur must be outside hwtail..head */ 1415 if (cur < head && cur > kring->nr_hwtail) 1416 goto error; 1417 } 1418 } 1419 if (ring->tail != kring->rtail) { 1420 RD(5, "%s tail overwritten was %d need %d", 1421 kring->name, 1422 ring->tail, kring->rtail); 1423 ring->tail = kring->rtail; 1424 } 1425 return head; 1426 1427 error: 1428 RD(5, "kring error: hwcur %d rcur %d hwtail %d head %d cur %d tail %d", 1429 kring->nr_hwcur, 1430 kring->rcur, kring->nr_hwtail, 1431 kring->rhead, kring->rcur, ring->tail); 1432 return n; 1433 } 1434 1435 1436 /* 1437 * Error routine called when txsync/rxsync detects an error. 1438 * Can't do much more than resetting head =cur = hwcur, tail = hwtail 1439 * Return 1 on reinit. 1440 * 1441 * This routine is only called by the upper half of the kernel. 1442 * It only reads hwcur (which is changed only by the upper half, too) 1443 * and hwtail (which may be changed by the lower half, but only on 1444 * a tx ring and only to increase it, so any error will be recovered 1445 * on the next call). For the above, we don't strictly need to call 1446 * it under lock. 1447 */ 1448 int 1449 netmap_ring_reinit(struct netmap_kring *kring) 1450 { 1451 struct netmap_ring *ring = kring->ring; 1452 u_int i, lim = kring->nkr_num_slots - 1; 1453 int errors = 0; 1454 1455 // XXX KASSERT nm_kr_tryget 1456 RD(10, "called for %s", NM_IFPNAME(kring->na->ifp)); 1457 // XXX probably wrong to trust userspace 1458 kring->rhead = ring->head; 1459 kring->rcur = ring->cur; 1460 kring->rtail = ring->tail; 1461 1462 if (ring->cur > lim) 1463 errors++; 1464 if (ring->head > lim) 1465 errors++; 1466 if (ring->tail > lim) 1467 errors++; 1468 for (i = 0; i <= lim; i++) { 1469 u_int idx = ring->slot[i].buf_idx; 1470 u_int len = ring->slot[i].len; 1471 if (idx < 2 || idx >= netmap_total_buffers) { 1472 RD(5, "bad index at slot %d idx %d len %d ", i, idx, len); 1473 ring->slot[i].buf_idx = 0; 1474 ring->slot[i].len = 0; 1475 } else if (len > NETMAP_BDG_BUF_SIZE(kring->na->nm_mem)) { 1476 ring->slot[i].len = 0; 1477 RD(5, "bad len at slot %d idx %d len %d", i, idx, len); 1478 } 1479 } 1480 if (errors) { 1481 RD(10, "total %d errors", errors); 1482 RD(10, "%s reinit, cur %d -> %d tail %d -> %d", 1483 kring->name, 1484 ring->cur, kring->nr_hwcur, 1485 ring->tail, kring->nr_hwtail); 1486 ring->head = kring->rhead = kring->nr_hwcur; 1487 ring->cur = kring->rcur = kring->nr_hwcur; 1488 ring->tail = kring->rtail = kring->nr_hwtail; 1489 } 1490 return (errors ? 1 : 0); 1491 } 1492 1493 1494 /* 1495 * Set the ring ID. For devices with a single queue, a request 1496 * for all rings is the same as a single ring. 1497 */ 1498 static int 1499 netmap_set_ringid(struct netmap_priv_d *priv, uint16_t ringid, uint32_t flags) 1500 { 1501 struct netmap_adapter *na = priv->np_na; 1502 u_int j, i = ringid & NETMAP_RING_MASK; 1503 u_int reg = flags & NR_REG_MASK; 1504 1505 if (reg == NR_REG_DEFAULT) { 1506 /* convert from old ringid to flags */ 1507 if (ringid & NETMAP_SW_RING) { 1508 reg = NR_REG_SW; 1509 } else if (ringid & NETMAP_HW_RING) { 1510 reg = NR_REG_ONE_NIC; 1511 } else { 1512 reg = NR_REG_ALL_NIC; 1513 } 1514 D("deprecated API, old ringid 0x%x -> ringid %x reg %d", ringid, i, reg); 1515 } 1516 switch (reg) { 1517 case NR_REG_ALL_NIC: 1518 case NR_REG_PIPE_MASTER: 1519 case NR_REG_PIPE_SLAVE: 1520 priv->np_txqfirst = 0; 1521 priv->np_txqlast = na->num_tx_rings; 1522 priv->np_rxqfirst = 0; 1523 priv->np_rxqlast = na->num_rx_rings; 1524 ND("%s %d %d", "ALL/PIPE", 1525 priv->np_rxqfirst, priv->np_rxqlast); 1526 break; 1527 case NR_REG_SW: 1528 case NR_REG_NIC_SW: 1529 if (!(na->na_flags & NAF_HOST_RINGS)) { 1530 D("host rings not supported"); 1531 return EINVAL; 1532 } 1533 priv->np_txqfirst = (reg == NR_REG_SW ? 1534 na->num_tx_rings : 0); 1535 priv->np_txqlast = na->num_tx_rings + 1; 1536 priv->np_rxqfirst = (reg == NR_REG_SW ? 1537 na->num_rx_rings : 0); 1538 priv->np_rxqlast = na->num_rx_rings + 1; 1539 ND("%s %d %d", reg == NR_REG_SW ? "SW" : "NIC+SW", 1540 priv->np_rxqfirst, priv->np_rxqlast); 1541 break; 1542 case NR_REG_ONE_NIC: 1543 if (i >= na->num_tx_rings && i >= na->num_rx_rings) { 1544 D("invalid ring id %d", i); 1545 return EINVAL; 1546 } 1547 /* if not enough rings, use the first one */ 1548 j = i; 1549 if (j >= na->num_tx_rings) 1550 j = 0; 1551 priv->np_txqfirst = j; 1552 priv->np_txqlast = j + 1; 1553 j = i; 1554 if (j >= na->num_rx_rings) 1555 j = 0; 1556 priv->np_rxqfirst = j; 1557 priv->np_rxqlast = j + 1; 1558 break; 1559 default: 1560 D("invalid regif type %d", reg); 1561 return EINVAL; 1562 } 1563 priv->np_txpoll = (ringid & NETMAP_NO_TX_POLL) ? 0 : 1; 1564 priv->np_flags = (flags & ~NR_REG_MASK) | reg; 1565 if (nm_tx_si_user(priv)) 1566 na->tx_si_users++; 1567 if (nm_rx_si_user(priv)) 1568 na->rx_si_users++; 1569 if (netmap_verbose) { 1570 D("%s: tx [%d,%d) rx [%d,%d) id %d", 1571 NM_IFPNAME(na->ifp), 1572 priv->np_txqfirst, 1573 priv->np_txqlast, 1574 priv->np_rxqfirst, 1575 priv->np_rxqlast, 1576 i); 1577 } 1578 return 0; 1579 } 1580 1581 /* 1582 * possibly move the interface to netmap-mode. 1583 * If success it returns a pointer to netmap_if, otherwise NULL. 1584 * This must be called with NMG_LOCK held. 1585 */ 1586 struct netmap_if * 1587 netmap_do_regif(struct netmap_priv_d *priv, struct netmap_adapter *na, 1588 uint16_t ringid, uint32_t flags, int *err) 1589 { 1590 struct ifnet *ifp = na->ifp; 1591 struct netmap_if *nifp = NULL; 1592 int error, need_mem = 0; 1593 1594 NMG_LOCK_ASSERT(); 1595 /* ring configuration may have changed, fetch from the card */ 1596 netmap_update_config(na); 1597 priv->np_na = na; /* store the reference */ 1598 error = netmap_set_ringid(priv, ringid, flags); 1599 if (error) 1600 goto out; 1601 /* ensure allocators are ready */ 1602 need_mem = !netmap_have_memory_locked(priv); 1603 if (need_mem) { 1604 error = netmap_get_memory_locked(priv); 1605 ND("get_memory returned %d", error); 1606 if (error) 1607 goto out; 1608 } 1609 nifp = netmap_if_new(NM_IFPNAME(ifp), na); 1610 1611 /* Allocate a netmap_if and, if necessary, all the netmap_ring's */ 1612 if (nifp == NULL) { /* allocation failed */ 1613 error = ENOMEM; 1614 goto out; 1615 } 1616 na->active_fds++; 1617 if (ifp->if_capenable & IFCAP_NETMAP) { 1618 /* was already set */ 1619 } else { 1620 /* Otherwise set the card in netmap mode 1621 * and make it use the shared buffers. 1622 */ 1623 /* cache the allocator info in the na */ 1624 na->na_lut = na->nm_mem->pools[NETMAP_BUF_POOL].lut; 1625 ND("%p->na_lut == %p", na, na->na_lut); 1626 na->na_lut_objtotal = na->nm_mem->pools[NETMAP_BUF_POOL].objtotal; 1627 error = na->nm_register(na, 1); /* mode on */ 1628 if (error) { 1629 netmap_do_unregif(priv, nifp); 1630 nifp = NULL; 1631 } 1632 } 1633 out: 1634 *err = error; 1635 if (error) { 1636 priv->np_na = NULL; 1637 /* we should drop the allocator, but only 1638 * if we were the ones who grabbed it 1639 */ 1640 if (need_mem) 1641 netmap_drop_memory_locked(priv); 1642 } 1643 if (nifp != NULL) { 1644 /* 1645 * advertise that the interface is ready bt setting ni_nifp. 1646 * The barrier is needed because readers (poll and *SYNC) 1647 * check for priv->np_nifp != NULL without locking 1648 */ 1649 wmb(); /* make sure previous writes are visible to all CPUs */ 1650 priv->np_nifp = nifp; 1651 } 1652 return nifp; 1653 } 1654 1655 1656 1657 /* 1658 * ioctl(2) support for the "netmap" device. 1659 * 1660 * Following a list of accepted commands: 1661 * - NIOCGINFO 1662 * - SIOCGIFADDR just for convenience 1663 * - NIOCREGIF 1664 * - NIOCTXSYNC 1665 * - NIOCRXSYNC 1666 * 1667 * Return 0 on success, errno otherwise. 1668 */ 1669 int 1670 netmap_ioctl(struct cdev *dev, u_long cmd, caddr_t data, 1671 int fflag, struct thread *td) 1672 { 1673 struct netmap_priv_d *priv = NULL; 1674 struct ifnet *ifp = NULL; 1675 struct nmreq *nmr = (struct nmreq *) data; 1676 struct netmap_adapter *na = NULL; 1677 int error; 1678 u_int i, qfirst, qlast; 1679 struct netmap_if *nifp; 1680 struct netmap_kring *krings; 1681 1682 (void)dev; /* UNUSED */ 1683 (void)fflag; /* UNUSED */ 1684 1685 if (cmd == NIOCGINFO || cmd == NIOCREGIF) { 1686 /* truncate name */ 1687 nmr->nr_name[sizeof(nmr->nr_name) - 1] = '\0'; 1688 if (nmr->nr_version != NETMAP_API) { 1689 D("API mismatch for %s got %d need %d", 1690 nmr->nr_name, 1691 nmr->nr_version, NETMAP_API); 1692 nmr->nr_version = NETMAP_API; 1693 } 1694 if (nmr->nr_version < NETMAP_MIN_API || 1695 nmr->nr_version > NETMAP_MAX_API) { 1696 return EINVAL; 1697 } 1698 } 1699 CURVNET_SET(TD_TO_VNET(td)); 1700 1701 error = devfs_get_cdevpriv((void **)&priv); 1702 if (error) { 1703 CURVNET_RESTORE(); 1704 /* XXX ENOENT should be impossible, since the priv 1705 * is now created in the open */ 1706 return (error == ENOENT ? ENXIO : error); 1707 } 1708 1709 switch (cmd) { 1710 case NIOCGINFO: /* return capabilities etc */ 1711 if (nmr->nr_cmd == NETMAP_BDG_LIST) { 1712 error = netmap_bdg_ctl(nmr, NULL); 1713 break; 1714 } 1715 1716 NMG_LOCK(); 1717 do { 1718 /* memsize is always valid */ 1719 struct netmap_mem_d *nmd = &nm_mem; 1720 u_int memflags; 1721 1722 if (nmr->nr_name[0] != '\0') { 1723 /* get a refcount */ 1724 error = netmap_get_na(nmr, &na, 1 /* create */); 1725 if (error) 1726 break; 1727 nmd = na->nm_mem; /* get memory allocator */ 1728 } 1729 1730 error = netmap_mem_get_info(nmd, &nmr->nr_memsize, &memflags, 1731 &nmr->nr_arg2); 1732 if (error) 1733 break; 1734 if (na == NULL) /* only memory info */ 1735 break; 1736 nmr->nr_offset = 0; 1737 nmr->nr_rx_slots = nmr->nr_tx_slots = 0; 1738 netmap_update_config(na); 1739 nmr->nr_rx_rings = na->num_rx_rings; 1740 nmr->nr_tx_rings = na->num_tx_rings; 1741 nmr->nr_rx_slots = na->num_rx_desc; 1742 nmr->nr_tx_slots = na->num_tx_desc; 1743 netmap_adapter_put(na); 1744 } while (0); 1745 NMG_UNLOCK(); 1746 break; 1747 1748 case NIOCREGIF: 1749 /* possibly attach/detach NIC and VALE switch */ 1750 i = nmr->nr_cmd; 1751 if (i == NETMAP_BDG_ATTACH || i == NETMAP_BDG_DETACH 1752 || i == NETMAP_BDG_VNET_HDR) { 1753 error = netmap_bdg_ctl(nmr, NULL); 1754 break; 1755 } else if (i != 0) { 1756 D("nr_cmd must be 0 not %d", i); 1757 error = EINVAL; 1758 break; 1759 } 1760 1761 /* protect access to priv from concurrent NIOCREGIF */ 1762 NMG_LOCK(); 1763 do { 1764 u_int memflags; 1765 1766 if (priv->np_na != NULL) { /* thread already registered */ 1767 error = EBUSY; 1768 break; 1769 } 1770 /* find the interface and a reference */ 1771 error = netmap_get_na(nmr, &na, 1 /* create */); /* keep reference */ 1772 if (error) 1773 break; 1774 ifp = na->ifp; 1775 if (NETMAP_OWNED_BY_KERN(na)) { 1776 netmap_adapter_put(na); 1777 error = EBUSY; 1778 break; 1779 } 1780 nifp = netmap_do_regif(priv, na, nmr->nr_ringid, nmr->nr_flags, &error); 1781 if (!nifp) { /* reg. failed, release priv and ref */ 1782 netmap_adapter_put(na); 1783 priv->np_nifp = NULL; 1784 break; 1785 } 1786 priv->np_td = td; // XXX kqueue, debugging only 1787 1788 /* return the offset of the netmap_if object */ 1789 nmr->nr_rx_rings = na->num_rx_rings; 1790 nmr->nr_tx_rings = na->num_tx_rings; 1791 nmr->nr_rx_slots = na->num_rx_desc; 1792 nmr->nr_tx_slots = na->num_tx_desc; 1793 error = netmap_mem_get_info(na->nm_mem, &nmr->nr_memsize, &memflags, 1794 &nmr->nr_arg2); 1795 if (error) { 1796 netmap_adapter_put(na); 1797 break; 1798 } 1799 if (memflags & NETMAP_MEM_PRIVATE) { 1800 *(uint32_t *)(uintptr_t)&nifp->ni_flags |= NI_PRIV_MEM; 1801 } 1802 priv->np_txsi = (priv->np_txqlast - priv->np_txqfirst > 1) ? 1803 &na->tx_si : &na->tx_rings[priv->np_txqfirst].si; 1804 priv->np_rxsi = (priv->np_rxqlast - priv->np_rxqfirst > 1) ? 1805 &na->rx_si : &na->rx_rings[priv->np_rxqfirst].si; 1806 1807 if (nmr->nr_arg3) { 1808 D("requested %d extra buffers", nmr->nr_arg3); 1809 nmr->nr_arg3 = netmap_extra_alloc(na, 1810 &nifp->ni_bufs_head, nmr->nr_arg3); 1811 D("got %d extra buffers", nmr->nr_arg3); 1812 } 1813 nmr->nr_offset = netmap_mem_if_offset(na->nm_mem, nifp); 1814 } while (0); 1815 NMG_UNLOCK(); 1816 break; 1817 1818 case NIOCTXSYNC: 1819 case NIOCRXSYNC: 1820 nifp = priv->np_nifp; 1821 1822 if (nifp == NULL) { 1823 error = ENXIO; 1824 break; 1825 } 1826 rmb(); /* make sure following reads are not from cache */ 1827 1828 na = priv->np_na; /* we have a reference */ 1829 1830 if (na == NULL) { 1831 D("Internal error: nifp != NULL && na == NULL"); 1832 error = ENXIO; 1833 break; 1834 } 1835 1836 ifp = na->ifp; 1837 if (ifp == NULL) { 1838 RD(1, "the ifp is gone"); 1839 error = ENXIO; 1840 break; 1841 } 1842 1843 if (cmd == NIOCTXSYNC) { 1844 krings = na->tx_rings; 1845 qfirst = priv->np_txqfirst; 1846 qlast = priv->np_txqlast; 1847 } else { 1848 krings = na->rx_rings; 1849 qfirst = priv->np_rxqfirst; 1850 qlast = priv->np_rxqlast; 1851 } 1852 1853 for (i = qfirst; i < qlast; i++) { 1854 struct netmap_kring *kring = krings + i; 1855 if (nm_kr_tryget(kring)) { 1856 error = EBUSY; 1857 goto out; 1858 } 1859 if (cmd == NIOCTXSYNC) { 1860 if (netmap_verbose & NM_VERB_TXSYNC) 1861 D("pre txsync ring %d cur %d hwcur %d", 1862 i, kring->ring->cur, 1863 kring->nr_hwcur); 1864 if (nm_txsync_prologue(kring) >= kring->nkr_num_slots) { 1865 netmap_ring_reinit(kring); 1866 } else { 1867 kring->nm_sync(kring, NAF_FORCE_RECLAIM); 1868 } 1869 if (netmap_verbose & NM_VERB_TXSYNC) 1870 D("post txsync ring %d cur %d hwcur %d", 1871 i, kring->ring->cur, 1872 kring->nr_hwcur); 1873 } else { 1874 kring->nm_sync(kring, NAF_FORCE_READ); 1875 microtime(&na->rx_rings[i].ring->ts); 1876 } 1877 nm_kr_put(kring); 1878 } 1879 1880 break; 1881 1882 #ifdef __FreeBSD__ 1883 case FIONBIO: 1884 case FIOASYNC: 1885 ND("FIONBIO/FIOASYNC are no-ops"); 1886 break; 1887 1888 case BIOCIMMEDIATE: 1889 case BIOCGHDRCMPLT: 1890 case BIOCSHDRCMPLT: 1891 case BIOCSSEESENT: 1892 D("ignore BIOCIMMEDIATE/BIOCSHDRCMPLT/BIOCSHDRCMPLT/BIOCSSEESENT"); 1893 break; 1894 1895 default: /* allow device-specific ioctls */ 1896 { 1897 struct socket so; 1898 1899 bzero(&so, sizeof(so)); 1900 NMG_LOCK(); 1901 error = netmap_get_na(nmr, &na, 0 /* don't create */); /* keep reference */ 1902 if (error) { 1903 netmap_adapter_put(na); 1904 NMG_UNLOCK(); 1905 break; 1906 } 1907 ifp = na->ifp; 1908 so.so_vnet = ifp->if_vnet; 1909 // so->so_proto not null. 1910 error = ifioctl(&so, cmd, data, td); 1911 netmap_adapter_put(na); 1912 NMG_UNLOCK(); 1913 break; 1914 } 1915 1916 #else /* linux */ 1917 default: 1918 error = EOPNOTSUPP; 1919 #endif /* linux */ 1920 } 1921 out: 1922 1923 CURVNET_RESTORE(); 1924 return (error); 1925 } 1926 1927 1928 /* 1929 * select(2) and poll(2) handlers for the "netmap" device. 1930 * 1931 * Can be called for one or more queues. 1932 * Return true the event mask corresponding to ready events. 1933 * If there are no ready events, do a selrecord on either individual 1934 * selinfo or on the global one. 1935 * Device-dependent parts (locking and sync of tx/rx rings) 1936 * are done through callbacks. 1937 * 1938 * On linux, arguments are really pwait, the poll table, and 'td' is struct file * 1939 * The first one is remapped to pwait as selrecord() uses the name as an 1940 * hidden argument. 1941 */ 1942 int 1943 netmap_poll(struct cdev *dev, int events, struct thread *td) 1944 { 1945 struct netmap_priv_d *priv = NULL; 1946 struct netmap_adapter *na; 1947 struct ifnet *ifp; 1948 struct netmap_kring *kring; 1949 u_int i, check_all_tx, check_all_rx, want_tx, want_rx, revents = 0; 1950 struct mbq q; /* packets from hw queues to host stack */ 1951 void *pwait = dev; /* linux compatibility */ 1952 int is_kevent = 0; 1953 1954 /* 1955 * In order to avoid nested locks, we need to "double check" 1956 * txsync and rxsync if we decide to do a selrecord(). 1957 * retry_tx (and retry_rx, later) prevent looping forever. 1958 */ 1959 int retry_tx = 1, retry_rx = 1; 1960 1961 (void)pwait; 1962 mbq_init(&q); 1963 1964 /* 1965 * XXX kevent has curthread->tp_fop == NULL, 1966 * so devfs_get_cdevpriv() fails. We circumvent this by passing 1967 * priv as the first argument, which is also useful to avoid 1968 * the selrecord() which are not necessary in that case. 1969 */ 1970 if (devfs_get_cdevpriv((void **)&priv) != 0) { 1971 is_kevent = 1; 1972 if (netmap_verbose) 1973 D("called from kevent"); 1974 priv = (struct netmap_priv_d *)dev; 1975 } 1976 if (priv == NULL) 1977 return POLLERR; 1978 1979 if (priv->np_nifp == NULL) { 1980 D("No if registered"); 1981 return POLLERR; 1982 } 1983 rmb(); /* make sure following reads are not from cache */ 1984 1985 na = priv->np_na; 1986 ifp = na->ifp; 1987 // check for deleted 1988 if (ifp == NULL) { 1989 RD(1, "the ifp is gone"); 1990 return POLLERR; 1991 } 1992 1993 if ( (ifp->if_capenable & IFCAP_NETMAP) == 0) 1994 return POLLERR; 1995 1996 if (netmap_verbose & 0x8000) 1997 D("device %s events 0x%x", NM_IFPNAME(ifp), events); 1998 want_tx = events & (POLLOUT | POLLWRNORM); 1999 want_rx = events & (POLLIN | POLLRDNORM); 2000 2001 2002 /* 2003 * check_all_{tx|rx} are set if the card has more than one queue AND 2004 * the file descriptor is bound to all of them. If so, we sleep on 2005 * the "global" selinfo, otherwise we sleep on individual selinfo 2006 * (FreeBSD only allows two selinfo's per file descriptor). 2007 * The interrupt routine in the driver wake one or the other 2008 * (or both) depending on which clients are active. 2009 * 2010 * rxsync() is only called if we run out of buffers on a POLLIN. 2011 * txsync() is called if we run out of buffers on POLLOUT, or 2012 * there are pending packets to send. The latter can be disabled 2013 * passing NETMAP_NO_TX_POLL in the NIOCREG call. 2014 */ 2015 check_all_tx = nm_tx_si_user(priv); 2016 check_all_rx = nm_rx_si_user(priv); 2017 2018 /* 2019 * We start with a lock free round which is cheap if we have 2020 * slots available. If this fails, then lock and call the sync 2021 * routines. 2022 */ 2023 for (i = priv->np_rxqfirst; want_rx && i < priv->np_rxqlast; i++) { 2024 kring = &na->rx_rings[i]; 2025 /* XXX compare ring->cur and kring->tail */ 2026 if (!nm_ring_empty(kring->ring)) { 2027 revents |= want_rx; 2028 want_rx = 0; /* also breaks the loop */ 2029 } 2030 } 2031 for (i = priv->np_txqfirst; want_tx && i < priv->np_txqlast; i++) { 2032 kring = &na->tx_rings[i]; 2033 /* XXX compare ring->cur and kring->tail */ 2034 if (!nm_ring_empty(kring->ring)) { 2035 revents |= want_tx; 2036 want_tx = 0; /* also breaks the loop */ 2037 } 2038 } 2039 2040 /* 2041 * If we want to push packets out (priv->np_txpoll) or 2042 * want_tx is still set, we must issue txsync calls 2043 * (on all rings, to avoid that the tx rings stall). 2044 * XXX should also check cur != hwcur on the tx rings. 2045 * Fortunately, normal tx mode has np_txpoll set. 2046 */ 2047 if (priv->np_txpoll || want_tx) { 2048 /* 2049 * The first round checks if anyone is ready, if not 2050 * do a selrecord and another round to handle races. 2051 * want_tx goes to 0 if any space is found, and is 2052 * used to skip rings with no pending transmissions. 2053 */ 2054 flush_tx: 2055 for (i = priv->np_txqfirst; i < priv->np_txqlast; i++) { 2056 int found = 0; 2057 2058 kring = &na->tx_rings[i]; 2059 if (!want_tx && kring->ring->cur == kring->nr_hwcur) 2060 continue; 2061 /* only one thread does txsync */ 2062 if (nm_kr_tryget(kring)) { 2063 /* either busy or stopped 2064 * XXX if the ring is stopped, sleeping would 2065 * be better. In current code, however, we only 2066 * stop the rings for brief intervals (2014-03-14) 2067 */ 2068 2069 if (netmap_verbose) 2070 RD(2, "%p lost race on txring %d, ok", 2071 priv, i); 2072 continue; 2073 } 2074 if (nm_txsync_prologue(kring) >= kring->nkr_num_slots) { 2075 netmap_ring_reinit(kring); 2076 revents |= POLLERR; 2077 } else { 2078 if (kring->nm_sync(kring, 0)) 2079 revents |= POLLERR; 2080 } 2081 2082 /* 2083 * If we found new slots, notify potential 2084 * listeners on the same ring. 2085 * Since we just did a txsync, look at the copies 2086 * of cur,tail in the kring. 2087 */ 2088 found = kring->rcur != kring->rtail; 2089 nm_kr_put(kring); 2090 if (found) { /* notify other listeners */ 2091 revents |= want_tx; 2092 want_tx = 0; 2093 na->nm_notify(na, i, NR_TX, 0); 2094 } 2095 } 2096 if (want_tx && retry_tx && !is_kevent) { 2097 selrecord(td, check_all_tx ? 2098 &na->tx_si : &na->tx_rings[priv->np_txqfirst].si); 2099 retry_tx = 0; 2100 goto flush_tx; 2101 } 2102 } 2103 2104 /* 2105 * If want_rx is still set scan receive rings. 2106 * Do it on all rings because otherwise we starve. 2107 */ 2108 if (want_rx) { 2109 int send_down = 0; /* transparent mode */ 2110 /* two rounds here for race avoidance */ 2111 do_retry_rx: 2112 for (i = priv->np_rxqfirst; i < priv->np_rxqlast; i++) { 2113 int found = 0; 2114 2115 kring = &na->rx_rings[i]; 2116 2117 if (nm_kr_tryget(kring)) { 2118 if (netmap_verbose) 2119 RD(2, "%p lost race on rxring %d, ok", 2120 priv, i); 2121 continue; 2122 } 2123 2124 /* 2125 * transparent mode support: collect packets 2126 * from the rxring(s). 2127 * XXX NR_FORWARD should only be read on 2128 * physical or NIC ports 2129 */ 2130 if (netmap_fwd ||kring->ring->flags & NR_FORWARD) { 2131 ND(10, "forwarding some buffers up %d to %d", 2132 kring->nr_hwcur, kring->ring->cur); 2133 netmap_grab_packets(kring, &q, netmap_fwd); 2134 } 2135 2136 if (kring->nm_sync(kring, 0)) 2137 revents |= POLLERR; 2138 if (netmap_no_timestamp == 0 || 2139 kring->ring->flags & NR_TIMESTAMP) { 2140 microtime(&kring->ring->ts); 2141 } 2142 /* after an rxsync we can use kring->rcur, rtail */ 2143 found = kring->rcur != kring->rtail; 2144 nm_kr_put(kring); 2145 if (found) { 2146 revents |= want_rx; 2147 retry_rx = 0; 2148 na->nm_notify(na, i, NR_RX, 0); 2149 } 2150 } 2151 2152 /* transparent mode XXX only during first pass ? */ 2153 if (na->na_flags & NAF_HOST_RINGS) { 2154 kring = &na->rx_rings[na->num_rx_rings]; 2155 if (check_all_rx 2156 && (netmap_fwd || kring->ring->flags & NR_FORWARD)) { 2157 /* XXX fix to use kring fields */ 2158 if (nm_ring_empty(kring->ring)) 2159 send_down = netmap_rxsync_from_host(na, td, dev); 2160 if (!nm_ring_empty(kring->ring)) 2161 revents |= want_rx; 2162 } 2163 } 2164 2165 if (retry_rx && !is_kevent) 2166 selrecord(td, check_all_rx ? 2167 &na->rx_si : &na->rx_rings[priv->np_rxqfirst].si); 2168 if (send_down > 0 || retry_rx) { 2169 retry_rx = 0; 2170 if (send_down) 2171 goto flush_tx; /* and retry_rx */ 2172 else 2173 goto do_retry_rx; 2174 } 2175 } 2176 2177 /* 2178 * Transparent mode: marked bufs on rx rings between 2179 * kring->nr_hwcur and ring->head 2180 * are passed to the other endpoint. 2181 * 2182 * In this mode we also scan the sw rxring, which in 2183 * turn passes packets up. 2184 * 2185 * XXX Transparent mode at the moment requires to bind all 2186 * rings to a single file descriptor. 2187 */ 2188 2189 if (q.head) 2190 netmap_send_up(na->ifp, &q); 2191 2192 return (revents); 2193 } 2194 2195 2196 /*-------------------- driver support routines -------------------*/ 2197 2198 static int netmap_hw_krings_create(struct netmap_adapter *); 2199 2200 /* default notify callback */ 2201 static int 2202 netmap_notify(struct netmap_adapter *na, u_int n_ring, 2203 enum txrx tx, int flags) 2204 { 2205 struct netmap_kring *kring; 2206 2207 if (tx == NR_TX) { 2208 kring = na->tx_rings + n_ring; 2209 OS_selwakeup(&kring->si, PI_NET); 2210 /* optimization: avoid a wake up on the global 2211 * queue if nobody has registered for more 2212 * than one ring 2213 */ 2214 if (na->tx_si_users > 0) 2215 OS_selwakeup(&na->tx_si, PI_NET); 2216 } else { 2217 kring = na->rx_rings + n_ring; 2218 OS_selwakeup(&kring->si, PI_NET); 2219 /* optimization: same as above */ 2220 if (na->rx_si_users > 0) 2221 OS_selwakeup(&na->rx_si, PI_NET); 2222 } 2223 return 0; 2224 } 2225 2226 2227 /* called by all routines that create netmap_adapters. 2228 * Attach na to the ifp (if any) and provide defaults 2229 * for optional callbacks. Defaults assume that we 2230 * are creating an hardware netmap_adapter. 2231 */ 2232 int 2233 netmap_attach_common(struct netmap_adapter *na) 2234 { 2235 struct ifnet *ifp = na->ifp; 2236 2237 if (na->num_tx_rings == 0 || na->num_rx_rings == 0) { 2238 D("%s: invalid rings tx %d rx %d", 2239 ifp->if_xname, na->num_tx_rings, na->num_rx_rings); 2240 return EINVAL; 2241 } 2242 WNA(ifp) = na; 2243 2244 /* the following is only needed for na that use the host port. 2245 * XXX do we have something similar for linux ? 2246 */ 2247 #ifdef __FreeBSD__ 2248 na->if_input = ifp->if_input; /* for netmap_send_up */ 2249 #endif /* __FreeBSD__ */ 2250 2251 NETMAP_SET_CAPABLE(ifp); 2252 if (na->nm_krings_create == NULL) { 2253 /* we assume that we have been called by a driver, 2254 * since other port types all provide their own 2255 * nm_krings_create 2256 */ 2257 na->nm_krings_create = netmap_hw_krings_create; 2258 na->nm_krings_delete = netmap_hw_krings_delete; 2259 } 2260 if (na->nm_notify == NULL) 2261 na->nm_notify = netmap_notify; 2262 na->active_fds = 0; 2263 2264 if (na->nm_mem == NULL) 2265 na->nm_mem = &nm_mem; 2266 return 0; 2267 } 2268 2269 2270 /* standard cleanup, called by all destructors */ 2271 void 2272 netmap_detach_common(struct netmap_adapter *na) 2273 { 2274 if (na->ifp != NULL) 2275 WNA(na->ifp) = NULL; /* XXX do we need this? */ 2276 2277 if (na->tx_rings) { /* XXX should not happen */ 2278 D("freeing leftover tx_rings"); 2279 na->nm_krings_delete(na); 2280 } 2281 netmap_pipe_dealloc(na); 2282 if (na->na_flags & NAF_MEM_OWNER) 2283 netmap_mem_private_delete(na->nm_mem); 2284 bzero(na, sizeof(*na)); 2285 free(na, M_DEVBUF); 2286 } 2287 2288 2289 /* 2290 * Initialize a ``netmap_adapter`` object created by driver on attach. 2291 * We allocate a block of memory with room for a struct netmap_adapter 2292 * plus two sets of N+2 struct netmap_kring (where N is the number 2293 * of hardware rings): 2294 * krings 0..N-1 are for the hardware queues. 2295 * kring N is for the host stack queue 2296 * kring N+1 is only used for the selinfo for all queues. // XXX still true ? 2297 * Return 0 on success, ENOMEM otherwise. 2298 */ 2299 int 2300 netmap_attach(struct netmap_adapter *arg) 2301 { 2302 struct netmap_hw_adapter *hwna = NULL; 2303 // XXX when is arg == NULL ? 2304 struct ifnet *ifp = arg ? arg->ifp : NULL; 2305 2306 if (arg == NULL || ifp == NULL) 2307 goto fail; 2308 hwna = malloc(sizeof(*hwna), M_DEVBUF, M_NOWAIT | M_ZERO); 2309 if (hwna == NULL) 2310 goto fail; 2311 hwna->up = *arg; 2312 hwna->up.na_flags |= NAF_HOST_RINGS; 2313 if (netmap_attach_common(&hwna->up)) { 2314 free(hwna, M_DEVBUF); 2315 goto fail; 2316 } 2317 netmap_adapter_get(&hwna->up); 2318 2319 #ifdef linux 2320 if (ifp->netdev_ops) { 2321 /* prepare a clone of the netdev ops */ 2322 #if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 28) 2323 hwna->nm_ndo.ndo_start_xmit = ifp->netdev_ops; 2324 #else 2325 hwna->nm_ndo = *ifp->netdev_ops; 2326 #endif 2327 } 2328 hwna->nm_ndo.ndo_start_xmit = linux_netmap_start_xmit; 2329 #endif /* linux */ 2330 2331 D("success for %s tx %d/%d rx %d/%d queues/slots", 2332 NM_IFPNAME(ifp), 2333 hwna->up.num_tx_rings, hwna->up.num_tx_desc, 2334 hwna->up.num_rx_rings, hwna->up.num_rx_desc 2335 ); 2336 return 0; 2337 2338 fail: 2339 D("fail, arg %p ifp %p na %p", arg, ifp, hwna); 2340 if (ifp) 2341 netmap_detach(ifp); 2342 return (hwna ? EINVAL : ENOMEM); 2343 } 2344 2345 2346 void 2347 NM_DBG(netmap_adapter_get)(struct netmap_adapter *na) 2348 { 2349 if (!na) { 2350 return; 2351 } 2352 2353 refcount_acquire(&na->na_refcount); 2354 } 2355 2356 2357 /* returns 1 iff the netmap_adapter is destroyed */ 2358 int 2359 NM_DBG(netmap_adapter_put)(struct netmap_adapter *na) 2360 { 2361 if (!na) 2362 return 1; 2363 2364 if (!refcount_release(&na->na_refcount)) 2365 return 0; 2366 2367 if (na->nm_dtor) 2368 na->nm_dtor(na); 2369 2370 netmap_detach_common(na); 2371 2372 return 1; 2373 } 2374 2375 /* nm_krings_create callback for all hardware native adapters */ 2376 int 2377 netmap_hw_krings_create(struct netmap_adapter *na) 2378 { 2379 int ret = netmap_krings_create(na, 0); 2380 if (ret == 0) { 2381 /* initialize the mbq for the sw rx ring */ 2382 mbq_safe_init(&na->rx_rings[na->num_rx_rings].rx_queue); 2383 ND("initialized sw rx queue %d", na->num_rx_rings); 2384 } 2385 return ret; 2386 } 2387 2388 2389 2390 /* 2391 * Called on module unload by the netmap-enabled drivers 2392 */ 2393 void 2394 netmap_detach(struct ifnet *ifp) 2395 { 2396 struct netmap_adapter *na = NA(ifp); 2397 2398 if (!na) 2399 return; 2400 2401 NMG_LOCK(); 2402 netmap_disable_all_rings(ifp); 2403 if (!netmap_adapter_put(na)) { 2404 /* someone is still using the adapter, 2405 * tell them that the interface is gone 2406 */ 2407 na->ifp = NULL; 2408 /* give them a chance to notice */ 2409 netmap_enable_all_rings(ifp); 2410 } 2411 NMG_UNLOCK(); 2412 } 2413 2414 2415 /* 2416 * Intercept packets from the network stack and pass them 2417 * to netmap as incoming packets on the 'software' ring. 2418 * 2419 * We only store packets in a bounded mbq and then copy them 2420 * in the relevant rxsync routine. 2421 * 2422 * We rely on the OS to make sure that the ifp and na do not go 2423 * away (typically the caller checks for IFF_DRV_RUNNING or the like). 2424 * In nm_register() or whenever there is a reinitialization, 2425 * we make sure to make the mode change visible here. 2426 */ 2427 int 2428 netmap_transmit(struct ifnet *ifp, struct mbuf *m) 2429 { 2430 struct netmap_adapter *na = NA(ifp); 2431 struct netmap_kring *kring; 2432 u_int len = MBUF_LEN(m); 2433 u_int error = ENOBUFS; 2434 struct mbq *q; 2435 int space; 2436 2437 // XXX [Linux] we do not need this lock 2438 // if we follow the down/configure/up protocol -gl 2439 // mtx_lock(&na->core_lock); 2440 2441 if ( (ifp->if_capenable & IFCAP_NETMAP) == 0) { 2442 D("%s not in netmap mode anymore", NM_IFPNAME(ifp)); 2443 error = ENXIO; 2444 goto done; 2445 } 2446 2447 kring = &na->rx_rings[na->num_rx_rings]; 2448 q = &kring->rx_queue; 2449 2450 // XXX reconsider long packets if we handle fragments 2451 if (len > NETMAP_BDG_BUF_SIZE(na->nm_mem)) { /* too long for us */ 2452 D("%s from_host, drop packet size %d > %d", NM_IFPNAME(ifp), 2453 len, NETMAP_BDG_BUF_SIZE(na->nm_mem)); 2454 goto done; 2455 } 2456 2457 /* protect against rxsync_from_host(), netmap_sw_to_nic() 2458 * and maybe other instances of netmap_transmit (the latter 2459 * not possible on Linux). 2460 * Also avoid overflowing the queue. 2461 */ 2462 mbq_lock(q); 2463 2464 space = kring->nr_hwtail - kring->nr_hwcur; 2465 if (space < 0) 2466 space += kring->nkr_num_slots; 2467 if (space + mbq_len(q) >= kring->nkr_num_slots - 1) { // XXX 2468 RD(10, "%s full hwcur %d hwtail %d qlen %d len %d m %p", 2469 NM_IFPNAME(ifp), kring->nr_hwcur, kring->nr_hwtail, mbq_len(q), 2470 len, m); 2471 } else { 2472 mbq_enqueue(q, m); 2473 ND(10, "%s %d bufs in queue len %d m %p", 2474 NM_IFPNAME(ifp), mbq_len(q), len, m); 2475 /* notify outside the lock */ 2476 m = NULL; 2477 error = 0; 2478 } 2479 mbq_unlock(q); 2480 2481 done: 2482 if (m) 2483 m_freem(m); 2484 /* unconditionally wake up listeners */ 2485 na->nm_notify(na, na->num_rx_rings, NR_RX, 0); 2486 /* this is normally netmap_notify(), but for nics 2487 * connected to a bridge it is netmap_bwrap_intr_notify(), 2488 * that possibly forwards the frames through the switch 2489 */ 2490 2491 return (error); 2492 } 2493 2494 2495 /* 2496 * netmap_reset() is called by the driver routines when reinitializing 2497 * a ring. The driver is in charge of locking to protect the kring. 2498 * If native netmap mode is not set just return NULL. 2499 */ 2500 struct netmap_slot * 2501 netmap_reset(struct netmap_adapter *na, enum txrx tx, u_int n, 2502 u_int new_cur) 2503 { 2504 struct netmap_kring *kring; 2505 int new_hwofs, lim; 2506 2507 if (na == NULL) { 2508 D("NULL na, should not happen"); 2509 return NULL; /* no netmap support here */ 2510 } 2511 if (!(na->ifp->if_capenable & IFCAP_NETMAP)) { 2512 ND("interface not in netmap mode"); 2513 return NULL; /* nothing to reinitialize */ 2514 } 2515 2516 /* XXX note- in the new scheme, we are not guaranteed to be 2517 * under lock (e.g. when called on a device reset). 2518 * In this case, we should set a flag and do not trust too 2519 * much the values. In practice: TODO 2520 * - set a RESET flag somewhere in the kring 2521 * - do the processing in a conservative way 2522 * - let the *sync() fixup at the end. 2523 */ 2524 if (tx == NR_TX) { 2525 if (n >= na->num_tx_rings) 2526 return NULL; 2527 kring = na->tx_rings + n; 2528 // XXX check whether we should use hwcur or rcur 2529 new_hwofs = kring->nr_hwcur - new_cur; 2530 } else { 2531 if (n >= na->num_rx_rings) 2532 return NULL; 2533 kring = na->rx_rings + n; 2534 new_hwofs = kring->nr_hwtail - new_cur; 2535 } 2536 lim = kring->nkr_num_slots - 1; 2537 if (new_hwofs > lim) 2538 new_hwofs -= lim + 1; 2539 2540 /* Always set the new offset value and realign the ring. */ 2541 if (netmap_verbose) 2542 D("%s %s%d hwofs %d -> %d, hwtail %d -> %d", 2543 NM_IFPNAME(na->ifp), 2544 tx == NR_TX ? "TX" : "RX", n, 2545 kring->nkr_hwofs, new_hwofs, 2546 kring->nr_hwtail, 2547 tx == NR_TX ? lim : kring->nr_hwtail); 2548 kring->nkr_hwofs = new_hwofs; 2549 if (tx == NR_TX) { 2550 kring->nr_hwtail = kring->nr_hwcur + lim; 2551 if (kring->nr_hwtail > lim) 2552 kring->nr_hwtail -= lim + 1; 2553 } 2554 2555 #if 0 // def linux 2556 /* XXX check that the mappings are correct */ 2557 /* need ring_nr, adapter->pdev, direction */ 2558 buffer_info->dma = dma_map_single(&pdev->dev, addr, adapter->rx_buffer_len, DMA_FROM_DEVICE); 2559 if (dma_mapping_error(&adapter->pdev->dev, buffer_info->dma)) { 2560 D("error mapping rx netmap buffer %d", i); 2561 // XXX fix error handling 2562 } 2563 2564 #endif /* linux */ 2565 /* 2566 * Wakeup on the individual and global selwait 2567 * We do the wakeup here, but the ring is not yet reconfigured. 2568 * However, we are under lock so there are no races. 2569 */ 2570 na->nm_notify(na, n, tx, 0); 2571 return kring->ring->slot; 2572 } 2573 2574 2575 /* 2576 * Dispatch rx/tx interrupts to the netmap rings. 2577 * 2578 * "work_done" is non-null on the RX path, NULL for the TX path. 2579 * We rely on the OS to make sure that there is only one active 2580 * instance per queue, and that there is appropriate locking. 2581 * 2582 * The 'notify' routine depends on what the ring is attached to. 2583 * - for a netmap file descriptor, do a selwakeup on the individual 2584 * waitqueue, plus one on the global one if needed 2585 * - for a switch, call the proper forwarding routine 2586 * - XXX more ? 2587 */ 2588 void 2589 netmap_common_irq(struct ifnet *ifp, u_int q, u_int *work_done) 2590 { 2591 struct netmap_adapter *na = NA(ifp); 2592 struct netmap_kring *kring; 2593 2594 q &= NETMAP_RING_MASK; 2595 2596 if (netmap_verbose) { 2597 RD(5, "received %s queue %d", work_done ? "RX" : "TX" , q); 2598 } 2599 2600 if (work_done) { /* RX path */ 2601 if (q >= na->num_rx_rings) 2602 return; // not a physical queue 2603 kring = na->rx_rings + q; 2604 kring->nr_kflags |= NKR_PENDINTR; // XXX atomic ? 2605 na->nm_notify(na, q, NR_RX, 0); 2606 *work_done = 1; /* do not fire napi again */ 2607 } else { /* TX path */ 2608 if (q >= na->num_tx_rings) 2609 return; // not a physical queue 2610 kring = na->tx_rings + q; 2611 na->nm_notify(na, q, NR_TX, 0); 2612 } 2613 } 2614 2615 2616 /* 2617 * Default functions to handle rx/tx interrupts from a physical device. 2618 * "work_done" is non-null on the RX path, NULL for the TX path. 2619 * 2620 * If the card is not in netmap mode, simply return 0, 2621 * so that the caller proceeds with regular processing. 2622 * Otherwise call netmap_common_irq() and return 1. 2623 * 2624 * If the card is connected to a netmap file descriptor, 2625 * do a selwakeup on the individual queue, plus one on the global one 2626 * if needed (multiqueue card _and_ there are multiqueue listeners), 2627 * and return 1. 2628 * 2629 * Finally, if called on rx from an interface connected to a switch, 2630 * calls the proper forwarding routine, and return 1. 2631 */ 2632 int 2633 netmap_rx_irq(struct ifnet *ifp, u_int q, u_int *work_done) 2634 { 2635 // XXX could we check NAF_NATIVE_ON ? 2636 if (!(ifp->if_capenable & IFCAP_NETMAP)) 2637 return 0; 2638 2639 if (NA(ifp)->na_flags & NAF_SKIP_INTR) { 2640 ND("use regular interrupt"); 2641 return 0; 2642 } 2643 2644 netmap_common_irq(ifp, q, work_done); 2645 return 1; 2646 } 2647 2648 2649 /* 2650 * Module loader and unloader 2651 * 2652 * netmap_init() creates the /dev/netmap device and initializes 2653 * all global variables. Returns 0 on success, errno on failure 2654 * (but there is no chance) 2655 * 2656 * netmap_fini() destroys everything. 2657 */ 2658 2659 static struct cdev *netmap_dev; /* /dev/netmap character device. */ 2660 extern struct cdevsw netmap_cdevsw; 2661 2662 2663 void 2664 netmap_fini(void) 2665 { 2666 // XXX destroy_bridges() ? 2667 if (netmap_dev) 2668 destroy_dev(netmap_dev); 2669 netmap_mem_fini(); 2670 NMG_LOCK_DESTROY(); 2671 printf("netmap: unloaded module.\n"); 2672 } 2673 2674 2675 int 2676 netmap_init(void) 2677 { 2678 int error; 2679 2680 NMG_LOCK_INIT(); 2681 2682 error = netmap_mem_init(); 2683 if (error != 0) 2684 goto fail; 2685 /* XXX could use make_dev_credv() to get error number */ 2686 netmap_dev = make_dev(&netmap_cdevsw, 0, UID_ROOT, GID_WHEEL, 0660, 2687 "netmap"); 2688 if (!netmap_dev) 2689 goto fail; 2690 2691 netmap_init_bridges(); 2692 printf("netmap: loaded module\n"); 2693 return (0); 2694 fail: 2695 netmap_fini(); 2696 return (EINVAL); /* may be incorrect */ 2697 } 2698