1 /* 2 * Copyright (C) 2011-2014 Matteo Landi, Luigi Rizzo. All rights reserved. 3 * Copyright (C) 2013-2014 Universita` di Pisa. All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * 2. Redistributions in binary form must reproduce the above copyright 11 * notice, this list of conditions and the following disclaimer in the 12 * documentation and/or other materials provided with the distribution. 13 * 14 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 15 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 17 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 22 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 23 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 24 * SUCH DAMAGE. 25 */ 26 27 /* 28 * $FreeBSD$ 29 * 30 * The header contains the definitions of constants and function 31 * prototypes used only in kernelspace. 32 */ 33 34 #ifndef _NET_NETMAP_KERN_H_ 35 #define _NET_NETMAP_KERN_H_ 36 37 #define WITH_VALE // comment out to disable VALE support 38 #define WITH_PIPES 39 #define WITH_MONITOR 40 #define WITH_GENERIC 41 42 #if defined(__FreeBSD__) 43 44 #define likely(x) __builtin_expect((long)!!(x), 1L) 45 #define unlikely(x) __builtin_expect((long)!!(x), 0L) 46 47 #define NM_LOCK_T struct mtx 48 49 /* netmap global lock */ 50 #define NMG_LOCK_T struct sx 51 #define NMG_LOCK_INIT() sx_init(&netmap_global_lock, \ 52 "netmap global lock") 53 #define NMG_LOCK_DESTROY() sx_destroy(&netmap_global_lock) 54 #define NMG_LOCK() sx_xlock(&netmap_global_lock) 55 #define NMG_UNLOCK() sx_xunlock(&netmap_global_lock) 56 #define NMG_LOCK_ASSERT() sx_assert(&netmap_global_lock, SA_XLOCKED) 57 58 #define NM_SELINFO_T struct selinfo 59 #define MBUF_LEN(m) ((m)->m_pkthdr.len) 60 #define MBUF_IFP(m) ((m)->m_pkthdr.rcvif) 61 #define NM_SEND_UP(ifp, m) ((NA(ifp))->if_input)(ifp, m) 62 63 #define NM_ATOMIC_T volatile int // XXX ? 64 /* atomic operations */ 65 #include <machine/atomic.h> 66 #define NM_ATOMIC_TEST_AND_SET(p) (!atomic_cmpset_acq_int((p), 0, 1)) 67 #define NM_ATOMIC_CLEAR(p) atomic_store_rel_int((p), 0) 68 69 #if __FreeBSD_version >= 1100030 70 #define WNA(_ifp) (_ifp)->if_netmap 71 #else /* older FreeBSD */ 72 #define WNA(_ifp) (_ifp)->if_pspare[0] 73 #endif /* older FreeBSD */ 74 75 #if __FreeBSD_version >= 1100005 76 struct netmap_adapter *netmap_getna(if_t ifp); 77 #endif 78 79 #if __FreeBSD_version >= 1100027 80 #define GET_MBUF_REFCNT(m) ((m)->m_ext.ext_cnt ? *((m)->m_ext.ext_cnt) : -1) 81 #define SET_MBUF_REFCNT(m, x) *((m)->m_ext.ext_cnt) = x 82 #define PNT_MBUF_REFCNT(m) ((m)->m_ext.ext_cnt) 83 #else 84 #define GET_MBUF_REFCNT(m) ((m)->m_ext.ref_cnt ? *((m)->m_ext.ref_cnt) : -1) 85 #define SET_MBUF_REFCNT(m, x) *((m)->m_ext.ref_cnt) = x 86 #define PNT_MBUF_REFCNT(m) ((m)->m_ext.ref_cnt) 87 #endif 88 89 MALLOC_DECLARE(M_NETMAP); 90 91 // XXX linux struct, not used in FreeBSD 92 struct net_device_ops { 93 }; 94 struct ethtool_ops { 95 }; 96 struct hrtimer { 97 }; 98 99 #elif defined (linux) 100 101 #define NM_LOCK_T safe_spinlock_t // see bsd_glue.h 102 #define NM_SELINFO_T wait_queue_head_t 103 #define MBUF_LEN(m) ((m)->len) 104 #define MBUF_IFP(m) ((m)->dev) 105 #define NM_SEND_UP(ifp, m) \ 106 do { \ 107 m->priority = NM_MAGIC_PRIORITY_RX; \ 108 netif_rx(m); \ 109 } while (0) 110 111 #define NM_ATOMIC_T volatile long unsigned int 112 113 #define NM_MTX_T struct mutex 114 #define NM_MTX_INIT(m, s) do { (void)s; mutex_init(&(m)); } while (0) 115 #define NM_MTX_DESTROY(m) do { (void)m; } while (0) 116 #define NM_MTX_LOCK(m) mutex_lock(&(m)) 117 #define NM_MTX_UNLOCK(m) mutex_unlock(&(m)) 118 #define NM_MTX_LOCK_ASSERT(m) mutex_is_locked(&(m)) 119 120 #define NMG_LOCK_T NM_MTX_T 121 #define NMG_LOCK_INIT() NM_MTX_INIT(netmap_global_lock, \ 122 "netmap_global_lock") 123 #define NMG_LOCK_DESTROY() NM_MTX_DESTROY(netmap_global_lock) 124 #define NMG_LOCK() NM_MTX_LOCK(netmap_global_lock) 125 #define NMG_UNLOCK() NM_MTX_UNLOCK(netmap_global_lock) 126 #define NMG_LOCK_ASSERT() NM_MTX_LOCK_ASSERT(netmap_global_lock) 127 128 #ifndef DEV_NETMAP 129 #define DEV_NETMAP 130 #endif /* DEV_NETMAP */ 131 132 #elif defined (__APPLE__) 133 134 #warning apple support is incomplete. 135 #define likely(x) __builtin_expect(!!(x), 1) 136 #define unlikely(x) __builtin_expect(!!(x), 0) 137 #define NM_LOCK_T IOLock * 138 #define NM_SELINFO_T struct selinfo 139 #define MBUF_LEN(m) ((m)->m_pkthdr.len) 140 #define NM_SEND_UP(ifp, m) ((ifp)->if_input)(ifp, m) 141 142 #else 143 144 #error unsupported platform 145 146 #endif /* end - platform-specific code */ 147 148 #define ND(format, ...) 149 #define D(format, ...) \ 150 do { \ 151 struct timeval __xxts; \ 152 microtime(&__xxts); \ 153 printf("%03d.%06d [%4d] %-25s " format "\n", \ 154 (int)__xxts.tv_sec % 1000, (int)__xxts.tv_usec, \ 155 __LINE__, __FUNCTION__, ##__VA_ARGS__); \ 156 } while (0) 157 158 /* rate limited, lps indicates how many per second */ 159 #define RD(lps, format, ...) \ 160 do { \ 161 static int t0, __cnt; \ 162 if (t0 != time_second) { \ 163 t0 = time_second; \ 164 __cnt = 0; \ 165 } \ 166 if (__cnt++ < lps) \ 167 D(format, ##__VA_ARGS__); \ 168 } while (0) 169 170 struct netmap_adapter; 171 struct nm_bdg_fwd; 172 struct nm_bridge; 173 struct netmap_priv_d; 174 175 const char *nm_dump_buf(char *p, int len, int lim, char *dst); 176 177 #include "netmap_mbq.h" 178 179 extern NMG_LOCK_T netmap_global_lock; 180 181 /* 182 * private, kernel view of a ring. Keeps track of the status of 183 * a ring across system calls. 184 * 185 * nr_hwcur index of the next buffer to refill. 186 * It corresponds to ring->head 187 * at the time the system call returns. 188 * 189 * nr_hwtail index of the first buffer owned by the kernel. 190 * On RX, hwcur->hwtail are receive buffers 191 * not yet released. hwcur is advanced following 192 * ring->head, hwtail is advanced on incoming packets, 193 * and a wakeup is generated when hwtail passes ring->cur 194 * On TX, hwcur->rcur have been filled by the sender 195 * but not sent yet to the NIC; rcur->hwtail are available 196 * for new transmissions, and hwtail->hwcur-1 are pending 197 * transmissions not yet acknowledged. 198 * 199 * The indexes in the NIC and netmap rings are offset by nkr_hwofs slots. 200 * This is so that, on a reset, buffers owned by userspace are not 201 * modified by the kernel. In particular: 202 * RX rings: the next empty buffer (hwtail + hwofs) coincides with 203 * the next empty buffer as known by the hardware (next_to_check or so). 204 * TX rings: hwcur + hwofs coincides with next_to_send 205 * 206 * For received packets, slot->flags is set to nkr_slot_flags 207 * so we can provide a proper initial value (e.g. set NS_FORWARD 208 * when operating in 'transparent' mode). 209 * 210 * The following fields are used to implement lock-free copy of packets 211 * from input to output ports in VALE switch: 212 * nkr_hwlease buffer after the last one being copied. 213 * A writer in nm_bdg_flush reserves N buffers 214 * from nr_hwlease, advances it, then does the 215 * copy outside the lock. 216 * In RX rings (used for VALE ports), 217 * nkr_hwtail <= nkr_hwlease < nkr_hwcur+N-1 218 * In TX rings (used for NIC or host stack ports) 219 * nkr_hwcur <= nkr_hwlease < nkr_hwtail 220 * nkr_leases array of nkr_num_slots where writers can report 221 * completion of their block. NR_NOSLOT (~0) indicates 222 * that the writer has not finished yet 223 * nkr_lease_idx index of next free slot in nr_leases, to be assigned 224 * 225 * The kring is manipulated by txsync/rxsync and generic netmap function. 226 * 227 * Concurrent rxsync or txsync on the same ring are prevented through 228 * by nm_kr_(try)lock() which in turn uses nr_busy. This is all we need 229 * for NIC rings, and for TX rings attached to the host stack. 230 * 231 * RX rings attached to the host stack use an mbq (rx_queue) on both 232 * rxsync_from_host() and netmap_transmit(). The mbq is protected 233 * by its internal lock. 234 * 235 * RX rings attached to the VALE switch are accessed by both senders 236 * and receiver. They are protected through the q_lock on the RX ring. 237 */ 238 struct netmap_kring { 239 struct netmap_ring *ring; 240 241 uint32_t nr_hwcur; 242 uint32_t nr_hwtail; 243 244 /* 245 * Copies of values in user rings, so we do not need to look 246 * at the ring (which could be modified). These are set in the 247 * *sync_prologue()/finalize() routines. 248 */ 249 uint32_t rhead; 250 uint32_t rcur; 251 uint32_t rtail; 252 253 uint32_t nr_kflags; /* private driver flags */ 254 #define NKR_PENDINTR 0x1 // Pending interrupt. 255 uint32_t nkr_num_slots; 256 257 /* 258 * On a NIC reset, the NIC ring indexes may be reset but the 259 * indexes in the netmap rings remain the same. nkr_hwofs 260 * keeps track of the offset between the two. 261 */ 262 int32_t nkr_hwofs; 263 264 uint16_t nkr_slot_flags; /* initial value for flags */ 265 266 /* last_reclaim is opaque marker to help reduce the frequency 267 * of operations such as reclaiming tx buffers. A possible use 268 * is set it to ticks and do the reclaim only once per tick. 269 */ 270 uint64_t last_reclaim; 271 272 273 NM_SELINFO_T si; /* poll/select wait queue */ 274 NM_LOCK_T q_lock; /* protects kring and ring. */ 275 NM_ATOMIC_T nr_busy; /* prevent concurrent syscalls */ 276 277 struct netmap_adapter *na; 278 279 /* The following fields are for VALE switch support */ 280 struct nm_bdg_fwd *nkr_ft; 281 uint32_t *nkr_leases; 282 #define NR_NOSLOT ((uint32_t)~0) /* used in nkr_*lease* */ 283 uint32_t nkr_hwlease; 284 uint32_t nkr_lease_idx; 285 286 /* while nkr_stopped is set, no new [tr]xsync operations can 287 * be started on this kring. 288 * This is used by netmap_disable_all_rings() 289 * to find a synchronization point where critical data 290 * structures pointed to by the kring can be added or removed 291 */ 292 volatile int nkr_stopped; 293 294 /* Support for adapters without native netmap support. 295 * On tx rings we preallocate an array of tx buffers 296 * (same size as the netmap ring), on rx rings we 297 * store incoming mbufs in a queue that is drained by 298 * a rxsync. 299 */ 300 struct mbuf **tx_pool; 301 // u_int nr_ntc; /* Emulation of a next-to-clean RX ring pointer. */ 302 struct mbq rx_queue; /* intercepted rx mbufs. */ 303 304 uint32_t ring_id; /* debugging */ 305 char name[64]; /* diagnostic */ 306 307 /* [tx]sync callback for this kring. 308 * The default nm_kring_create callback (netmap_krings_create) 309 * sets the nm_sync callback of each hardware tx(rx) kring to 310 * the corresponding nm_txsync(nm_rxsync) taken from the 311 * netmap_adapter; moreover, it sets the sync callback 312 * of the host tx(rx) ring to netmap_txsync_to_host 313 * (netmap_rxsync_from_host). 314 * 315 * Overrides: the above configuration is not changed by 316 * any of the nm_krings_create callbacks. 317 */ 318 int (*nm_sync)(struct netmap_kring *kring, int flags); 319 320 #ifdef WITH_PIPES 321 struct netmap_kring *pipe; /* if this is a pipe ring, 322 * pointer to the other end 323 */ 324 struct netmap_ring *save_ring; /* pointer to hidden rings 325 * (see netmap_pipe.c for details) 326 */ 327 #endif /* WITH_PIPES */ 328 329 #ifdef WITH_MONITOR 330 /* pointer to the adapter that is monitoring this kring (if any) 331 */ 332 struct netmap_monitor_adapter *monitor; 333 /* 334 * Monitors work by intercepting the txsync and/or rxsync of the 335 * monitored krings. This is implemented by replacing 336 * the nm_sync pointer above and saving the previous 337 * one in save_sync below. 338 */ 339 int (*save_sync)(struct netmap_kring *kring, int flags); 340 #endif 341 } __attribute__((__aligned__(64))); 342 343 344 /* return the next index, with wraparound */ 345 static inline uint32_t 346 nm_next(uint32_t i, uint32_t lim) 347 { 348 return unlikely (i == lim) ? 0 : i + 1; 349 } 350 351 352 /* return the previous index, with wraparound */ 353 static inline uint32_t 354 nm_prev(uint32_t i, uint32_t lim) 355 { 356 return unlikely (i == 0) ? lim : i - 1; 357 } 358 359 360 /* 361 * 362 * Here is the layout for the Rx and Tx rings. 363 364 RxRING TxRING 365 366 +-----------------+ +-----------------+ 367 | | | | 368 |XXX free slot XXX| |XXX free slot XXX| 369 +-----------------+ +-----------------+ 370 head->| owned by user |<-hwcur | not sent to nic |<-hwcur 371 | | | yet | 372 +-----------------+ | | 373 cur->| available to | | | 374 | user, not read | +-----------------+ 375 | yet | cur->| (being | 376 | | | prepared) | 377 | | | | 378 +-----------------+ + ------ + 379 tail->| |<-hwtail | |<-hwlease 380 | (being | ... | | ... 381 | prepared) | ... | | ... 382 +-----------------+ ... | | ... 383 | |<-hwlease +-----------------+ 384 | | tail->| |<-hwtail 385 | | | | 386 | | | | 387 | | | | 388 +-----------------+ +-----------------+ 389 390 * The cur/tail (user view) and hwcur/hwtail (kernel view) 391 * are used in the normal operation of the card. 392 * 393 * When a ring is the output of a switch port (Rx ring for 394 * a VALE port, Tx ring for the host stack or NIC), slots 395 * are reserved in blocks through 'hwlease' which points 396 * to the next unused slot. 397 * On an Rx ring, hwlease is always after hwtail, 398 * and completions cause hwtail to advance. 399 * On a Tx ring, hwlease is always between cur and hwtail, 400 * and completions cause cur to advance. 401 * 402 * nm_kr_space() returns the maximum number of slots that 403 * can be assigned. 404 * nm_kr_lease() reserves the required number of buffers, 405 * advances nkr_hwlease and also returns an entry in 406 * a circular array where completions should be reported. 407 */ 408 409 410 411 enum txrx { NR_RX = 0, NR_TX = 1 }; 412 413 struct netmap_vp_adapter; // forward 414 415 /* 416 * The "struct netmap_adapter" extends the "struct adapter" 417 * (or equivalent) device descriptor. 418 * It contains all base fields needed to support netmap operation. 419 * There are in fact different types of netmap adapters 420 * (native, generic, VALE switch...) so a netmap_adapter is 421 * just the first field in the derived type. 422 */ 423 struct netmap_adapter { 424 /* 425 * On linux we do not have a good way to tell if an interface 426 * is netmap-capable. So we always use the following trick: 427 * NA(ifp) points here, and the first entry (which hopefully 428 * always exists and is at least 32 bits) contains a magic 429 * value which we can use to detect that the interface is good. 430 */ 431 uint32_t magic; 432 uint32_t na_flags; /* enabled, and other flags */ 433 #define NAF_SKIP_INTR 1 /* use the regular interrupt handler. 434 * useful during initialization 435 */ 436 #define NAF_SW_ONLY 2 /* forward packets only to sw adapter */ 437 #define NAF_BDG_MAYSLEEP 4 /* the bridge is allowed to sleep when 438 * forwarding packets coming from this 439 * interface 440 */ 441 #define NAF_MEM_OWNER 8 /* the adapter is responsible for the 442 * deallocation of the memory allocator 443 */ 444 #define NAF_NATIVE_ON 16 /* the adapter is native and the attached 445 * interface is in netmap mode. 446 * Virtual ports (vale, pipe, monitor...) 447 * should never use this flag. 448 */ 449 #define NAF_NETMAP_ON 32 /* netmap is active (either native or 450 * emulated). Where possible (e.g. FreeBSD) 451 * IFCAP_NETMAP also mirrors this flag. 452 */ 453 #define NAF_HOST_RINGS 64 /* the adapter supports the host rings */ 454 #define NAF_FORCE_NATIVE 128 /* the adapter is always NATIVE */ 455 #define NAF_BUSY (1U<<31) /* the adapter is used internally and 456 * cannot be registered from userspace 457 */ 458 int active_fds; /* number of user-space descriptors using this 459 interface, which is equal to the number of 460 struct netmap_if objs in the mapped region. */ 461 462 u_int num_rx_rings; /* number of adapter receive rings */ 463 u_int num_tx_rings; /* number of adapter transmit rings */ 464 465 u_int num_tx_desc; /* number of descriptor in each queue */ 466 u_int num_rx_desc; 467 468 /* tx_rings and rx_rings are private but allocated 469 * as a contiguous chunk of memory. Each array has 470 * N+1 entries, for the adapter queues and for the host queue. 471 */ 472 struct netmap_kring *tx_rings; /* array of TX rings. */ 473 struct netmap_kring *rx_rings; /* array of RX rings. */ 474 475 void *tailroom; /* space below the rings array */ 476 /* (used for leases) */ 477 478 479 NM_SELINFO_T tx_si, rx_si; /* global wait queues */ 480 481 /* count users of the global wait queues */ 482 int tx_si_users, rx_si_users; 483 484 void *pdev; /* used to store pci device */ 485 486 /* copy of if_qflush and if_transmit pointers, to intercept 487 * packets from the network stack when netmap is active. 488 */ 489 int (*if_transmit)(struct ifnet *, struct mbuf *); 490 491 /* copy of if_input for netmap_send_up() */ 492 void (*if_input)(struct ifnet *, struct mbuf *); 493 494 /* references to the ifnet and device routines, used by 495 * the generic netmap functions. 496 */ 497 struct ifnet *ifp; /* adapter is ifp->if_softc */ 498 499 /*---- callbacks for this netmap adapter -----*/ 500 /* 501 * nm_dtor() is the cleanup routine called when destroying 502 * the adapter. 503 * Called with NMG_LOCK held. 504 * 505 * nm_register() is called on NIOCREGIF and close() to enter 506 * or exit netmap mode on the NIC 507 * Called with NNG_LOCK held. 508 * 509 * nm_txsync() pushes packets to the underlying hw/switch 510 * 511 * nm_rxsync() collects packets from the underlying hw/switch 512 * 513 * nm_config() returns configuration information from the OS 514 * Called with NMG_LOCK held. 515 * 516 * nm_krings_create() create and init the tx_rings and 517 * rx_rings arrays of kring structures. In particular, 518 * set the nm_sync callbacks for each ring. 519 * There is no need to also allocate the corresponding 520 * netmap_rings, since netmap_mem_rings_create() will always 521 * be called to provide the missing ones. 522 * Called with NNG_LOCK held. 523 * 524 * nm_krings_delete() cleanup and delete the tx_rings and rx_rings 525 * arrays 526 * Called with NMG_LOCK held. 527 * 528 * nm_notify() is used to act after data have become available 529 * (or the stopped state of the ring has changed) 530 * For hw devices this is typically a selwakeup(), 531 * but for NIC/host ports attached to a switch (or vice-versa) 532 * we also need to invoke the 'txsync' code downstream. 533 */ 534 void (*nm_dtor)(struct netmap_adapter *); 535 536 int (*nm_register)(struct netmap_adapter *, int onoff); 537 538 int (*nm_txsync)(struct netmap_kring *kring, int flags); 539 int (*nm_rxsync)(struct netmap_kring *kring, int flags); 540 #define NAF_FORCE_READ 1 541 #define NAF_FORCE_RECLAIM 2 542 /* return configuration information */ 543 int (*nm_config)(struct netmap_adapter *, 544 u_int *txr, u_int *txd, u_int *rxr, u_int *rxd); 545 int (*nm_krings_create)(struct netmap_adapter *); 546 void (*nm_krings_delete)(struct netmap_adapter *); 547 int (*nm_notify)(struct netmap_adapter *, 548 u_int ring, enum txrx, int flags); 549 #define NAF_DISABLE_NOTIFY 8 /* notify that the stopped state of the 550 * ring has changed (kring->nkr_stopped) 551 */ 552 553 #ifdef WITH_VALE 554 /* 555 * nm_bdg_attach() initializes the na_vp field to point 556 * to an adapter that can be attached to a VALE switch. If the 557 * current adapter is already a VALE port, na_vp is simply a cast; 558 * otherwise, na_vp points to a netmap_bwrap_adapter. 559 * If applicable, this callback also initializes na_hostvp, 560 * that can be used to connect the adapter host rings to the 561 * switch. 562 * Called with NMG_LOCK held. 563 * 564 * nm_bdg_ctl() is called on the actual attach/detach to/from 565 * to/from the switch, to perform adapter-specific 566 * initializations 567 * Called with NMG_LOCK held. 568 */ 569 int (*nm_bdg_attach)(const char *bdg_name, struct netmap_adapter *); 570 int (*nm_bdg_ctl)(struct netmap_adapter *, struct nmreq *, int); 571 572 /* adapter used to attach this adapter to a VALE switch (if any) */ 573 struct netmap_vp_adapter *na_vp; 574 /* adapter used to attach the host rings of this adapter 575 * to a VALE switch (if any) */ 576 struct netmap_vp_adapter *na_hostvp; 577 #endif 578 579 /* standard refcount to control the lifetime of the adapter 580 * (it should be equal to the lifetime of the corresponding ifp) 581 */ 582 int na_refcount; 583 584 /* memory allocator (opaque) 585 * We also cache a pointer to the lut_entry for translating 586 * buffer addresses, and the total number of buffers. 587 */ 588 struct netmap_mem_d *nm_mem; 589 struct lut_entry *na_lut; 590 uint32_t na_lut_objtotal; /* max buffer index */ 591 uint32_t na_lut_objsize; /* buffer size */ 592 593 /* additional information attached to this adapter 594 * by other netmap subsystems. Currently used by 595 * bwrap and LINUX/v1000. 596 */ 597 void *na_private; 598 599 #ifdef WITH_PIPES 600 /* array of pipes that have this adapter as a parent */ 601 struct netmap_pipe_adapter **na_pipes; 602 int na_next_pipe; /* next free slot in the array */ 603 int na_max_pipes; /* size of the array */ 604 #endif /* WITH_PIPES */ 605 606 char name[64]; 607 }; 608 609 610 /* 611 * If the NIC is owned by the kernel 612 * (i.e., bridge), neither another bridge nor user can use it; 613 * if the NIC is owned by a user, only users can share it. 614 * Evaluation must be done under NMG_LOCK(). 615 */ 616 #define NETMAP_OWNED_BY_KERN(na) ((na)->na_flags & NAF_BUSY) 617 #define NETMAP_OWNED_BY_ANY(na) \ 618 (NETMAP_OWNED_BY_KERN(na) || ((na)->active_fds > 0)) 619 620 621 /* 622 * derived netmap adapters for various types of ports 623 */ 624 struct netmap_vp_adapter { /* VALE software port */ 625 struct netmap_adapter up; 626 627 /* 628 * Bridge support: 629 * 630 * bdg_port is the port number used in the bridge; 631 * na_bdg points to the bridge this NA is attached to. 632 */ 633 int bdg_port; 634 struct nm_bridge *na_bdg; 635 int retry; 636 637 /* Offset of ethernet header for each packet. */ 638 u_int virt_hdr_len; 639 /* Maximum Frame Size, used in bdg_mismatch_datapath() */ 640 u_int mfs; 641 }; 642 643 644 struct netmap_hw_adapter { /* physical device */ 645 struct netmap_adapter up; 646 647 struct net_device_ops nm_ndo; // XXX linux only 648 struct ethtool_ops nm_eto; // XXX linux only 649 const struct ethtool_ops* save_ethtool; 650 651 int (*nm_hw_register)(struct netmap_adapter *, int onoff); 652 }; 653 654 #ifdef WITH_GENERIC 655 /* Mitigation support. */ 656 struct nm_generic_mit { 657 struct hrtimer mit_timer; 658 int mit_pending; 659 int mit_ring_idx; /* index of the ring being mitigated */ 660 struct netmap_adapter *mit_na; /* backpointer */ 661 }; 662 663 struct netmap_generic_adapter { /* emulated device */ 664 struct netmap_hw_adapter up; 665 666 /* Pointer to a previously used netmap adapter. */ 667 struct netmap_adapter *prev; 668 669 /* generic netmap adapters support: 670 * a net_device_ops struct overrides ndo_select_queue(), 671 * save_if_input saves the if_input hook (FreeBSD), 672 * mit implements rx interrupt mitigation, 673 */ 674 struct net_device_ops generic_ndo; 675 void (*save_if_input)(struct ifnet *, struct mbuf *); 676 677 struct nm_generic_mit *mit; 678 #ifdef linux 679 netdev_tx_t (*save_start_xmit)(struct mbuf *, struct ifnet *); 680 #endif 681 }; 682 #endif /* WITH_GENERIC */ 683 684 static __inline int 685 netmap_real_tx_rings(struct netmap_adapter *na) 686 { 687 return na->num_tx_rings + !!(na->na_flags & NAF_HOST_RINGS); 688 } 689 690 static __inline int 691 netmap_real_rx_rings(struct netmap_adapter *na) 692 { 693 return na->num_rx_rings + !!(na->na_flags & NAF_HOST_RINGS); 694 } 695 696 #ifdef WITH_VALE 697 698 /* 699 * Bridge wrapper for non VALE ports attached to a VALE switch. 700 * 701 * The real device must already have its own netmap adapter (hwna). 702 * The bridge wrapper and the hwna adapter share the same set of 703 * netmap rings and buffers, but they have two separate sets of 704 * krings descriptors, with tx/rx meanings swapped: 705 * 706 * netmap 707 * bwrap krings rings krings hwna 708 * +------+ +------+ +-----+ +------+ +------+ 709 * |tx_rings->| |\ /| |----| |<-tx_rings| 710 * | | +------+ \ / +-----+ +------+ | | 711 * | | X | | 712 * | | / \ | | 713 * | | +------+/ \+-----+ +------+ | | 714 * |rx_rings->| | | |----| |<-rx_rings| 715 * | | +------+ +-----+ +------+ | | 716 * +------+ +------+ 717 * 718 * - packets coming from the bridge go to the brwap rx rings, 719 * which are also the hwna tx rings. The bwrap notify callback 720 * will then complete the hwna tx (see netmap_bwrap_notify). 721 * 722 * - packets coming from the outside go to the hwna rx rings, 723 * which are also the bwrap tx rings. The (overwritten) hwna 724 * notify method will then complete the bridge tx 725 * (see netmap_bwrap_intr_notify). 726 * 727 * The bridge wrapper may optionally connect the hwna 'host' rings 728 * to the bridge. This is done by using a second port in the 729 * bridge and connecting it to the 'host' netmap_vp_adapter 730 * contained in the netmap_bwrap_adapter. The brwap host adapter 731 * cross-links the hwna host rings in the same way as shown above. 732 * 733 * - packets coming from the bridge and directed to the host stack 734 * are handled by the bwrap host notify callback 735 * (see netmap_bwrap_host_notify) 736 * 737 * - packets coming from the host stack are still handled by the 738 * overwritten hwna notify callback (netmap_bwrap_intr_notify), 739 * but are diverted to the host adapter depending on the ring number. 740 * 741 */ 742 struct netmap_bwrap_adapter { 743 struct netmap_vp_adapter up; 744 struct netmap_vp_adapter host; /* for host rings */ 745 struct netmap_adapter *hwna; /* the underlying device */ 746 747 /* backup of the hwna notify callback */ 748 int (*save_notify)(struct netmap_adapter *, 749 u_int ring, enum txrx, int flags); 750 /* backup of the hwna memory allocator */ 751 struct netmap_mem_d *save_nmd; 752 753 /* 754 * When we attach a physical interface to the bridge, we 755 * allow the controlling process to terminate, so we need 756 * a place to store the n_detmap_priv_d data structure. 757 * This is only done when physical interfaces 758 * are attached to a bridge. 759 */ 760 struct netmap_priv_d *na_kpriv; 761 }; 762 int netmap_bwrap_attach(const char *name, struct netmap_adapter *); 763 764 765 #endif /* WITH_VALE */ 766 767 #ifdef WITH_PIPES 768 769 #define NM_MAXPIPES 64 /* max number of pipes per adapter */ 770 771 struct netmap_pipe_adapter { 772 struct netmap_adapter up; 773 774 u_int id; /* pipe identifier */ 775 int role; /* either NR_REG_PIPE_MASTER or NR_REG_PIPE_SLAVE */ 776 777 struct netmap_adapter *parent; /* adapter that owns the memory */ 778 struct netmap_pipe_adapter *peer; /* the other end of the pipe */ 779 int peer_ref; /* 1 iff we are holding a ref to the peer */ 780 781 u_int parent_slot; /* index in the parent pipe array */ 782 }; 783 784 #endif /* WITH_PIPES */ 785 786 787 /* return slots reserved to rx clients; used in drivers */ 788 static inline uint32_t 789 nm_kr_rxspace(struct netmap_kring *k) 790 { 791 int space = k->nr_hwtail - k->nr_hwcur; 792 if (space < 0) 793 space += k->nkr_num_slots; 794 ND("preserving %d rx slots %d -> %d", space, k->nr_hwcur, k->nr_hwtail); 795 796 return space; 797 } 798 799 800 /* True if no space in the tx ring. only valid after txsync_prologue */ 801 static inline int 802 nm_kr_txempty(struct netmap_kring *kring) 803 { 804 return kring->rcur == kring->nr_hwtail; 805 } 806 807 808 /* 809 * protect against multiple threads using the same ring. 810 * also check that the ring has not been stopped. 811 * We only care for 0 or !=0 as a return code. 812 */ 813 #define NM_KR_BUSY 1 814 #define NM_KR_STOPPED 2 815 816 817 static __inline void nm_kr_put(struct netmap_kring *kr) 818 { 819 NM_ATOMIC_CLEAR(&kr->nr_busy); 820 } 821 822 823 static __inline int nm_kr_tryget(struct netmap_kring *kr) 824 { 825 /* check a first time without taking the lock 826 * to avoid starvation for nm_kr_get() 827 */ 828 if (unlikely(kr->nkr_stopped)) { 829 ND("ring %p stopped (%d)", kr, kr->nkr_stopped); 830 return NM_KR_STOPPED; 831 } 832 if (unlikely(NM_ATOMIC_TEST_AND_SET(&kr->nr_busy))) 833 return NM_KR_BUSY; 834 /* check a second time with lock held */ 835 if (unlikely(kr->nkr_stopped)) { 836 ND("ring %p stopped (%d)", kr, kr->nkr_stopped); 837 nm_kr_put(kr); 838 return NM_KR_STOPPED; 839 } 840 return 0; 841 } 842 843 844 /* 845 * The following functions are used by individual drivers to 846 * support netmap operation. 847 * 848 * netmap_attach() initializes a struct netmap_adapter, allocating the 849 * struct netmap_ring's and the struct selinfo. 850 * 851 * netmap_detach() frees the memory allocated by netmap_attach(). 852 * 853 * netmap_transmit() replaces the if_transmit routine of the interface, 854 * and is used to intercept packets coming from the stack. 855 * 856 * netmap_load_map/netmap_reload_map are helper routines to set/reset 857 * the dmamap for a packet buffer 858 * 859 * netmap_reset() is a helper routine to be called in the hw driver 860 * when reinitializing a ring. It should not be called by 861 * virtual ports (vale, pipes, monitor) 862 */ 863 int netmap_attach(struct netmap_adapter *); 864 void netmap_detach(struct ifnet *); 865 int netmap_transmit(struct ifnet *, struct mbuf *); 866 struct netmap_slot *netmap_reset(struct netmap_adapter *na, 867 enum txrx tx, u_int n, u_int new_cur); 868 int netmap_ring_reinit(struct netmap_kring *); 869 870 /* default functions to handle rx/tx interrupts */ 871 int netmap_rx_irq(struct ifnet *, u_int, u_int *); 872 #define netmap_tx_irq(_n, _q) netmap_rx_irq(_n, _q, NULL) 873 void netmap_common_irq(struct ifnet *, u_int, u_int *work_done); 874 875 876 #ifdef WITH_VALE 877 /* functions used by external modules to interface with VALE */ 878 #define netmap_vp_to_ifp(_vp) ((_vp)->up.ifp) 879 #define netmap_ifp_to_vp(_ifp) (NA(_ifp)->na_vp) 880 #define netmap_ifp_to_host_vp(_ifp) (NA(_ifp)->na_hostvp) 881 #define netmap_bdg_idx(_vp) ((_vp)->bdg_port) 882 const char *netmap_bdg_name(struct netmap_vp_adapter *); 883 #else /* !WITH_VALE */ 884 #define netmap_vp_to_ifp(_vp) NULL 885 #define netmap_ifp_to_vp(_ifp) NULL 886 #define netmap_ifp_to_host_vp(_ifp) NULL 887 #define netmap_bdg_idx(_vp) -1 888 #define netmap_bdg_name(_vp) NULL 889 #endif /* WITH_VALE */ 890 891 static inline int 892 nm_native_on(struct netmap_adapter *na) 893 { 894 return na && na->na_flags & NAF_NATIVE_ON; 895 } 896 897 static inline int 898 nm_netmap_on(struct netmap_adapter *na) 899 { 900 return na && na->na_flags & NAF_NETMAP_ON; 901 } 902 903 /* set/clear native flags and if_transmit/netdev_ops */ 904 static inline void 905 nm_set_native_flags(struct netmap_adapter *na) 906 { 907 struct ifnet *ifp = na->ifp; 908 909 na->na_flags |= (NAF_NATIVE_ON | NAF_NETMAP_ON); 910 #ifdef IFCAP_NETMAP /* or FreeBSD ? */ 911 ifp->if_capenable |= IFCAP_NETMAP; 912 #endif 913 #ifdef __FreeBSD__ 914 na->if_transmit = ifp->if_transmit; 915 ifp->if_transmit = netmap_transmit; 916 #else 917 na->if_transmit = (void *)ifp->netdev_ops; 918 ifp->netdev_ops = &((struct netmap_hw_adapter *)na)->nm_ndo; 919 ((struct netmap_hw_adapter *)na)->save_ethtool = ifp->ethtool_ops; 920 ifp->ethtool_ops = &((struct netmap_hw_adapter*)na)->nm_eto; 921 #endif 922 } 923 924 925 static inline void 926 nm_clear_native_flags(struct netmap_adapter *na) 927 { 928 struct ifnet *ifp = na->ifp; 929 930 #ifdef __FreeBSD__ 931 ifp->if_transmit = na->if_transmit; 932 #else 933 ifp->netdev_ops = (void *)na->if_transmit; 934 ifp->ethtool_ops = ((struct netmap_hw_adapter*)na)->save_ethtool; 935 #endif 936 na->na_flags &= ~(NAF_NATIVE_ON | NAF_NETMAP_ON); 937 #ifdef IFCAP_NETMAP /* or FreeBSD ? */ 938 ifp->if_capenable &= ~IFCAP_NETMAP; 939 #endif 940 } 941 942 943 /* 944 * validates parameters in the ring/kring, returns a value for head 945 * If any error, returns ring_size to force a reinit. 946 */ 947 uint32_t nm_txsync_prologue(struct netmap_kring *); 948 949 950 /* 951 * validates parameters in the ring/kring, returns a value for head, 952 * and the 'reserved' value in the argument. 953 * If any error, returns ring_size lim to force a reinit. 954 */ 955 uint32_t nm_rxsync_prologue(struct netmap_kring *); 956 957 958 /* 959 * update kring and ring at the end of txsync. 960 */ 961 static inline void 962 nm_txsync_finalize(struct netmap_kring *kring) 963 { 964 /* update ring tail to what the kernel knows */ 965 kring->ring->tail = kring->rtail = kring->nr_hwtail; 966 967 /* note, head/rhead/hwcur might be behind cur/rcur 968 * if no carrier 969 */ 970 ND(5, "%s now hwcur %d hwtail %d head %d cur %d tail %d", 971 kring->name, kring->nr_hwcur, kring->nr_hwtail, 972 kring->rhead, kring->rcur, kring->rtail); 973 } 974 975 976 /* 977 * update kring and ring at the end of rxsync 978 */ 979 static inline void 980 nm_rxsync_finalize(struct netmap_kring *kring) 981 { 982 /* tell userspace that there might be new packets */ 983 //struct netmap_ring *ring = kring->ring; 984 ND("head %d cur %d tail %d -> %d", ring->head, ring->cur, ring->tail, 985 kring->nr_hwtail); 986 kring->ring->tail = kring->rtail = kring->nr_hwtail; 987 /* make a copy of the state for next round */ 988 kring->rhead = kring->ring->head; 989 kring->rcur = kring->ring->cur; 990 } 991 992 993 /* check/fix address and len in tx rings */ 994 #if 1 /* debug version */ 995 #define NM_CHECK_ADDR_LEN(_na, _a, _l) do { \ 996 if (_a == NETMAP_BUF_BASE(_na) || _l > NETMAP_BUF_SIZE(_na)) { \ 997 RD(5, "bad addr/len ring %d slot %d idx %d len %d", \ 998 kring->ring_id, nm_i, slot->buf_idx, len); \ 999 if (_l > NETMAP_BUF_SIZE(_na)) \ 1000 _l = NETMAP_BUF_SIZE(_na); \ 1001 } } while (0) 1002 #else /* no debug version */ 1003 #define NM_CHECK_ADDR_LEN(_na, _a, _l) do { \ 1004 if (_l > NETMAP_BUF_SIZE(_na)) \ 1005 _l = NETMAP_BUF_SIZE(_na); \ 1006 } while (0) 1007 #endif 1008 1009 1010 /*---------------------------------------------------------------*/ 1011 /* 1012 * Support routines used by netmap subsystems 1013 * (native drivers, VALE, generic, pipes, monitors, ...) 1014 */ 1015 1016 1017 /* common routine for all functions that create a netmap adapter. It performs 1018 * two main tasks: 1019 * - if the na points to an ifp, mark the ifp as netmap capable 1020 * using na as its native adapter; 1021 * - provide defaults for the setup callbacks and the memory allocator 1022 */ 1023 int netmap_attach_common(struct netmap_adapter *); 1024 /* common actions to be performed on netmap adapter destruction */ 1025 void netmap_detach_common(struct netmap_adapter *); 1026 /* fill priv->np_[tr]xq{first,last} using the ringid and flags information 1027 * coming from a struct nmreq 1028 */ 1029 int netmap_interp_ringid(struct netmap_priv_d *priv, uint16_t ringid, uint32_t flags); 1030 /* update the ring parameters (number and size of tx and rx rings). 1031 * It calls the nm_config callback, if available. 1032 */ 1033 int netmap_update_config(struct netmap_adapter *na); 1034 /* create and initialize the common fields of the krings array. 1035 * using the information that must be already available in the na. 1036 * tailroom can be used to request the allocation of additional 1037 * tailroom bytes after the krings array. This is used by 1038 * netmap_vp_adapter's (i.e., VALE ports) to make room for 1039 * leasing-related data structures 1040 */ 1041 int netmap_krings_create(struct netmap_adapter *na, u_int tailroom); 1042 /* deletes the kring array of the adapter. The array must have 1043 * been created using netmap_krings_create 1044 */ 1045 void netmap_krings_delete(struct netmap_adapter *na); 1046 1047 /* set the stopped/enabled status of ring 1048 * When stopping, they also wait for all current activity on the ring to 1049 * terminate. The status change is then notified using the na nm_notify 1050 * callback. 1051 */ 1052 void netmap_set_txring(struct netmap_adapter *, u_int ring_id, int stopped); 1053 void netmap_set_rxring(struct netmap_adapter *, u_int ring_id, int stopped); 1054 /* set the stopped/enabled status of all rings of the adapter. */ 1055 void netmap_set_all_rings(struct netmap_adapter *, int stopped); 1056 /* convenience wrappers for netmap_set_all_rings, used in drivers */ 1057 void netmap_disable_all_rings(struct ifnet *); 1058 void netmap_enable_all_rings(struct ifnet *); 1059 1060 int netmap_rxsync_from_host(struct netmap_adapter *na, struct thread *td, void *pwait); 1061 1062 struct netmap_if * 1063 netmap_do_regif(struct netmap_priv_d *priv, struct netmap_adapter *na, 1064 uint16_t ringid, uint32_t flags, int *err); 1065 1066 1067 1068 u_int nm_bound_var(u_int *v, u_int dflt, u_int lo, u_int hi, const char *msg); 1069 int netmap_get_na(struct nmreq *nmr, struct netmap_adapter **na, int create); 1070 int netmap_get_hw_na(struct ifnet *ifp, struct netmap_adapter **na); 1071 1072 1073 #ifdef WITH_VALE 1074 /* 1075 * The following bridge-related functions are used by other 1076 * kernel modules. 1077 * 1078 * VALE only supports unicast or broadcast. The lookup 1079 * function can return 0 .. NM_BDG_MAXPORTS-1 for regular ports, 1080 * NM_BDG_MAXPORTS for broadcast, NM_BDG_MAXPORTS+1 for unknown. 1081 * XXX in practice "unknown" might be handled same as broadcast. 1082 */ 1083 typedef u_int (*bdg_lookup_fn_t)(struct nm_bdg_fwd *ft, uint8_t *ring_nr, 1084 const struct netmap_vp_adapter *); 1085 typedef int (*bdg_config_fn_t)(struct nm_ifreq *); 1086 typedef void (*bdg_dtor_fn_t)(const struct netmap_vp_adapter *); 1087 struct netmap_bdg_ops { 1088 bdg_lookup_fn_t lookup; 1089 bdg_config_fn_t config; 1090 bdg_dtor_fn_t dtor; 1091 }; 1092 1093 u_int netmap_bdg_learning(struct nm_bdg_fwd *ft, uint8_t *dst_ring, 1094 const struct netmap_vp_adapter *); 1095 1096 #define NM_BDG_MAXPORTS 254 /* up to 254 */ 1097 #define NM_BDG_BROADCAST NM_BDG_MAXPORTS 1098 #define NM_BDG_NOPORT (NM_BDG_MAXPORTS+1) 1099 1100 #define NM_NAME "vale" /* prefix for bridge port name */ 1101 1102 /* these are redefined in case of no VALE support */ 1103 int netmap_get_bdg_na(struct nmreq *nmr, struct netmap_adapter **na, int create); 1104 void netmap_init_bridges(void); 1105 int netmap_bdg_ctl(struct nmreq *nmr, struct netmap_bdg_ops *bdg_ops); 1106 int netmap_bdg_config(struct nmreq *nmr); 1107 1108 #else /* !WITH_VALE */ 1109 #define netmap_get_bdg_na(_1, _2, _3) 0 1110 #define netmap_init_bridges(_1) 1111 #define netmap_bdg_ctl(_1, _2) EINVAL 1112 #endif /* !WITH_VALE */ 1113 1114 #ifdef WITH_PIPES 1115 /* max number of pipes per device */ 1116 #define NM_MAXPIPES 64 /* XXX how many? */ 1117 /* in case of no error, returns the actual number of pipes in nmr->nr_arg1 */ 1118 int netmap_pipe_alloc(struct netmap_adapter *, struct nmreq *nmr); 1119 void netmap_pipe_dealloc(struct netmap_adapter *); 1120 int netmap_get_pipe_na(struct nmreq *nmr, struct netmap_adapter **na, int create); 1121 #else /* !WITH_PIPES */ 1122 #define NM_MAXPIPES 0 1123 #define netmap_pipe_alloc(_1, _2) EOPNOTSUPP 1124 #define netmap_pipe_dealloc(_1) 1125 #define netmap_get_pipe_na(_1, _2, _3) 0 1126 #endif 1127 1128 #ifdef WITH_MONITOR 1129 int netmap_get_monitor_na(struct nmreq *nmr, struct netmap_adapter **na, int create); 1130 #else 1131 #define netmap_get_monitor_na(_1, _2, _3) 0 1132 #endif 1133 1134 /* Various prototypes */ 1135 int netmap_poll(struct cdev *dev, int events, struct thread *td); 1136 int netmap_init(void); 1137 void netmap_fini(void); 1138 int netmap_get_memory(struct netmap_priv_d* p); 1139 void netmap_dtor(void *data); 1140 int netmap_dtor_locked(struct netmap_priv_d *priv); 1141 1142 int netmap_ioctl(struct cdev *dev, u_long cmd, caddr_t data, int fflag, struct thread *td); 1143 1144 /* netmap_adapter creation/destruction */ 1145 1146 // #define NM_DEBUG_PUTGET 1 1147 1148 #ifdef NM_DEBUG_PUTGET 1149 1150 #define NM_DBG(f) __##f 1151 1152 void __netmap_adapter_get(struct netmap_adapter *na); 1153 1154 #define netmap_adapter_get(na) \ 1155 do { \ 1156 struct netmap_adapter *__na = na; \ 1157 D("getting %p:%s (%d)", __na, (__na)->name, (__na)->na_refcount); \ 1158 __netmap_adapter_get(__na); \ 1159 } while (0) 1160 1161 int __netmap_adapter_put(struct netmap_adapter *na); 1162 1163 #define netmap_adapter_put(na) \ 1164 ({ \ 1165 struct netmap_adapter *__na = na; \ 1166 D("putting %p:%s (%d)", __na, (__na)->name, (__na)->na_refcount); \ 1167 __netmap_adapter_put(__na); \ 1168 }) 1169 1170 #else /* !NM_DEBUG_PUTGET */ 1171 1172 #define NM_DBG(f) f 1173 void netmap_adapter_get(struct netmap_adapter *na); 1174 int netmap_adapter_put(struct netmap_adapter *na); 1175 1176 #endif /* !NM_DEBUG_PUTGET */ 1177 1178 1179 /* 1180 * module variables 1181 */ 1182 #define NETMAP_BUF_BASE(na) ((na)->na_lut[0].vaddr) 1183 #define NETMAP_BUF_SIZE(na) ((na)->na_lut_objsize) 1184 extern int netmap_mitigate; // XXX not really used 1185 extern int netmap_no_pendintr; 1186 extern int netmap_verbose; // XXX debugging 1187 enum { /* verbose flags */ 1188 NM_VERB_ON = 1, /* generic verbose */ 1189 NM_VERB_HOST = 0x2, /* verbose host stack */ 1190 NM_VERB_RXSYNC = 0x10, /* verbose on rxsync/txsync */ 1191 NM_VERB_TXSYNC = 0x20, 1192 NM_VERB_RXINTR = 0x100, /* verbose on rx/tx intr (driver) */ 1193 NM_VERB_TXINTR = 0x200, 1194 NM_VERB_NIC_RXSYNC = 0x1000, /* verbose on rx/tx intr (driver) */ 1195 NM_VERB_NIC_TXSYNC = 0x2000, 1196 }; 1197 1198 extern int netmap_txsync_retry; 1199 extern int netmap_generic_mit; 1200 extern int netmap_generic_ringsize; 1201 extern int netmap_generic_rings; 1202 1203 /* 1204 * NA returns a pointer to the struct netmap adapter from the ifp, 1205 * WNA is used to write it. 1206 */ 1207 #define NA(_ifp) ((struct netmap_adapter *)WNA(_ifp)) 1208 1209 /* 1210 * Macros to determine if an interface is netmap capable or netmap enabled. 1211 * See the magic field in struct netmap_adapter. 1212 */ 1213 #ifdef __FreeBSD__ 1214 /* 1215 * on FreeBSD just use if_capabilities and if_capenable. 1216 */ 1217 #define NETMAP_CAPABLE(ifp) (NA(ifp) && \ 1218 (ifp)->if_capabilities & IFCAP_NETMAP ) 1219 1220 #define NETMAP_SET_CAPABLE(ifp) \ 1221 (ifp)->if_capabilities |= IFCAP_NETMAP 1222 1223 #else /* linux */ 1224 1225 /* 1226 * on linux: 1227 * we check if NA(ifp) is set and its first element has a related 1228 * magic value. The capenable is within the struct netmap_adapter. 1229 */ 1230 #define NETMAP_MAGIC 0x52697a7a 1231 1232 #define NETMAP_CAPABLE(ifp) (NA(ifp) && \ 1233 ((uint32_t)(uintptr_t)NA(ifp) ^ NA(ifp)->magic) == NETMAP_MAGIC ) 1234 1235 #define NETMAP_SET_CAPABLE(ifp) \ 1236 NA(ifp)->magic = ((uint32_t)(uintptr_t)NA(ifp)) ^ NETMAP_MAGIC 1237 1238 #endif /* linux */ 1239 1240 #ifdef __FreeBSD__ 1241 1242 /* Assigns the device IOMMU domain to an allocator. 1243 * Returns -ENOMEM in case the domain is different */ 1244 #define nm_iommu_group_id(dev) (0) 1245 1246 /* Callback invoked by the dma machinery after a successful dmamap_load */ 1247 static void netmap_dmamap_cb(__unused void *arg, 1248 __unused bus_dma_segment_t * segs, __unused int nseg, __unused int error) 1249 { 1250 } 1251 1252 /* bus_dmamap_load wrapper: call aforementioned function if map != NULL. 1253 * XXX can we do it without a callback ? 1254 */ 1255 static inline void 1256 netmap_load_map(struct netmap_adapter *na, 1257 bus_dma_tag_t tag, bus_dmamap_t map, void *buf) 1258 { 1259 if (map) 1260 bus_dmamap_load(tag, map, buf, NETMAP_BUF_SIZE(na), 1261 netmap_dmamap_cb, NULL, BUS_DMA_NOWAIT); 1262 } 1263 1264 static inline void 1265 netmap_unload_map(struct netmap_adapter *na, 1266 bus_dma_tag_t tag, bus_dmamap_t map) 1267 { 1268 if (map) 1269 bus_dmamap_unload(tag, map); 1270 } 1271 1272 /* update the map when a buffer changes. */ 1273 static inline void 1274 netmap_reload_map(struct netmap_adapter *na, 1275 bus_dma_tag_t tag, bus_dmamap_t map, void *buf) 1276 { 1277 if (map) { 1278 bus_dmamap_unload(tag, map); 1279 bus_dmamap_load(tag, map, buf, NETMAP_BUF_SIZE(na), 1280 netmap_dmamap_cb, NULL, BUS_DMA_NOWAIT); 1281 } 1282 } 1283 1284 #else /* linux */ 1285 1286 int nm_iommu_group_id(bus_dma_tag_t dev); 1287 extern size_t netmap_mem_get_bufsize(struct netmap_mem_d *); 1288 #include <linux/dma-mapping.h> 1289 1290 static inline void 1291 netmap_load_map(struct netmap_adapter *na, 1292 bus_dma_tag_t tag, bus_dmamap_t map, void *buf) 1293 { 1294 if (map) { 1295 *map = dma_map_single(na->pdev, buf, netmap_mem_get_bufsize(na->nm_mem), 1296 DMA_BIDIRECTIONAL); 1297 } 1298 } 1299 1300 static inline void 1301 netmap_unload_map(struct netmap_adapter *na, 1302 bus_dma_tag_t tag, bus_dmamap_t map) 1303 { 1304 u_int sz = netmap_mem_get_bufsize(na->nm_mem); 1305 1306 if (*map) { 1307 dma_unmap_single(na->pdev, *map, sz, 1308 DMA_BIDIRECTIONAL); 1309 } 1310 } 1311 1312 static inline void 1313 netmap_reload_map(struct netmap_adapter *na, 1314 bus_dma_tag_t tag, bus_dmamap_t map, void *buf) 1315 { 1316 u_int sz = netmap_mem_get_bufsize(na->nm_mem); 1317 1318 if (*map) { 1319 dma_unmap_single(na->pdev, *map, sz, 1320 DMA_BIDIRECTIONAL); 1321 } 1322 1323 *map = dma_map_single(na->pdev, buf, sz, 1324 DMA_BIDIRECTIONAL); 1325 } 1326 1327 /* 1328 * XXX How do we redefine these functions: 1329 * 1330 * on linux we need 1331 * dma_map_single(&pdev->dev, virt_addr, len, direction) 1332 * dma_unmap_single(&adapter->pdev->dev, phys_addr, len, direction 1333 * The len can be implicit (on netmap it is NETMAP_BUF_SIZE) 1334 * unfortunately the direction is not, so we need to change 1335 * something to have a cross API 1336 */ 1337 1338 #if 0 1339 struct e1000_buffer *buffer_info = &tx_ring->buffer_info[l]; 1340 /* set time_stamp *before* dma to help avoid a possible race */ 1341 buffer_info->time_stamp = jiffies; 1342 buffer_info->mapped_as_page = false; 1343 buffer_info->length = len; 1344 //buffer_info->next_to_watch = l; 1345 /* reload dma map */ 1346 dma_unmap_single(&adapter->pdev->dev, buffer_info->dma, 1347 NETMAP_BUF_SIZE, DMA_TO_DEVICE); 1348 buffer_info->dma = dma_map_single(&adapter->pdev->dev, 1349 addr, NETMAP_BUF_SIZE, DMA_TO_DEVICE); 1350 1351 if (dma_mapping_error(&adapter->pdev->dev, buffer_info->dma)) { 1352 D("dma mapping error"); 1353 /* goto dma_error; See e1000_put_txbuf() */ 1354 /* XXX reset */ 1355 } 1356 tx_desc->buffer_addr = htole64(buffer_info->dma); //XXX 1357 1358 #endif 1359 1360 /* 1361 * The bus_dmamap_sync() can be one of wmb() or rmb() depending on direction. 1362 */ 1363 #define bus_dmamap_sync(_a, _b, _c) 1364 1365 #endif /* linux */ 1366 1367 1368 /* 1369 * functions to map NIC to KRING indexes (n2k) and vice versa (k2n) 1370 */ 1371 static inline int 1372 netmap_idx_n2k(struct netmap_kring *kr, int idx) 1373 { 1374 int n = kr->nkr_num_slots; 1375 idx += kr->nkr_hwofs; 1376 if (idx < 0) 1377 return idx + n; 1378 else if (idx < n) 1379 return idx; 1380 else 1381 return idx - n; 1382 } 1383 1384 1385 static inline int 1386 netmap_idx_k2n(struct netmap_kring *kr, int idx) 1387 { 1388 int n = kr->nkr_num_slots; 1389 idx -= kr->nkr_hwofs; 1390 if (idx < 0) 1391 return idx + n; 1392 else if (idx < n) 1393 return idx; 1394 else 1395 return idx - n; 1396 } 1397 1398 1399 /* Entries of the look-up table. */ 1400 struct lut_entry { 1401 void *vaddr; /* virtual address. */ 1402 vm_paddr_t paddr; /* physical address. */ 1403 }; 1404 1405 struct netmap_obj_pool; 1406 1407 /* 1408 * NMB return the virtual address of a buffer (buffer 0 on bad index) 1409 * PNMB also fills the physical address 1410 */ 1411 static inline void * 1412 NMB(struct netmap_adapter *na, struct netmap_slot *slot) 1413 { 1414 struct lut_entry *lut = na->na_lut; 1415 uint32_t i = slot->buf_idx; 1416 return (unlikely(i >= na->na_lut_objtotal)) ? 1417 lut[0].vaddr : lut[i].vaddr; 1418 } 1419 1420 static inline void * 1421 PNMB(struct netmap_adapter *na, struct netmap_slot *slot, uint64_t *pp) 1422 { 1423 uint32_t i = slot->buf_idx; 1424 struct lut_entry *lut = na->na_lut; 1425 void *ret = (i >= na->na_lut_objtotal) ? lut[0].vaddr : lut[i].vaddr; 1426 1427 *pp = (i >= na->na_lut_objtotal) ? lut[0].paddr : lut[i].paddr; 1428 return ret; 1429 } 1430 1431 /* Generic version of NMB, which uses device-specific memory. */ 1432 1433 1434 1435 void netmap_txsync_to_host(struct netmap_adapter *na); 1436 1437 1438 /* 1439 * Structure associated to each thread which registered an interface. 1440 * 1441 * The first 4 fields of this structure are written by NIOCREGIF and 1442 * read by poll() and NIOC?XSYNC. 1443 * 1444 * There is low contention among writers (a correct user program 1445 * should have none) and among writers and readers, so we use a 1446 * single global lock to protect the structure initialization; 1447 * since initialization involves the allocation of memory, 1448 * we reuse the memory allocator lock. 1449 * 1450 * Read access to the structure is lock free. Readers must check that 1451 * np_nifp is not NULL before using the other fields. 1452 * If np_nifp is NULL initialization has not been performed, 1453 * so they should return an error to userspace. 1454 * 1455 * The ref_done field is used to regulate access to the refcount in the 1456 * memory allocator. The refcount must be incremented at most once for 1457 * each open("/dev/netmap"). The increment is performed by the first 1458 * function that calls netmap_get_memory() (currently called by 1459 * mmap(), NIOCGINFO and NIOCREGIF). 1460 * If the refcount is incremented, it is then decremented when the 1461 * private structure is destroyed. 1462 */ 1463 struct netmap_priv_d { 1464 struct netmap_if * volatile np_nifp; /* netmap if descriptor. */ 1465 1466 struct netmap_adapter *np_na; 1467 uint32_t np_flags; /* from the ioctl */ 1468 u_int np_txqfirst, np_txqlast; /* range of tx rings to scan */ 1469 u_int np_rxqfirst, np_rxqlast; /* range of rx rings to scan */ 1470 uint16_t np_txpoll; /* XXX and also np_rxpoll ? */ 1471 1472 struct netmap_mem_d *np_mref; /* use with NMG_LOCK held */ 1473 /* np_refcount is only used on FreeBSD */ 1474 int np_refcount; /* use with NMG_LOCK held */ 1475 1476 /* pointers to the selinfo to be used for selrecord. 1477 * Either the local or the global one depending on the 1478 * number of rings. 1479 */ 1480 NM_SELINFO_T *np_rxsi, *np_txsi; 1481 struct thread *np_td; /* kqueue, just debugging */ 1482 }; 1483 1484 #ifdef WITH_MONITOR 1485 1486 struct netmap_monitor_adapter { 1487 struct netmap_adapter up; 1488 1489 struct netmap_priv_d priv; 1490 uint32_t flags; 1491 }; 1492 1493 #endif /* WITH_MONITOR */ 1494 1495 1496 #ifdef WITH_GENERIC 1497 /* 1498 * generic netmap emulation for devices that do not have 1499 * native netmap support. 1500 */ 1501 int generic_netmap_attach(struct ifnet *ifp); 1502 1503 int netmap_catch_rx(struct netmap_adapter *na, int intercept); 1504 void generic_rx_handler(struct ifnet *ifp, struct mbuf *m);; 1505 void netmap_catch_tx(struct netmap_generic_adapter *na, int enable); 1506 int generic_xmit_frame(struct ifnet *ifp, struct mbuf *m, void *addr, u_int len, u_int ring_nr); 1507 int generic_find_num_desc(struct ifnet *ifp, u_int *tx, u_int *rx); 1508 void generic_find_num_queues(struct ifnet *ifp, u_int *txq, u_int *rxq); 1509 1510 //#define RATE_GENERIC /* Enables communication statistics for generic. */ 1511 #ifdef RATE_GENERIC 1512 void generic_rate(int txp, int txs, int txi, int rxp, int rxs, int rxi); 1513 #else 1514 #define generic_rate(txp, txs, txi, rxp, rxs, rxi) 1515 #endif 1516 1517 /* 1518 * netmap_mitigation API. This is used by the generic adapter 1519 * to reduce the number of interrupt requests/selwakeup 1520 * to clients on incoming packets. 1521 */ 1522 void netmap_mitigation_init(struct nm_generic_mit *mit, int idx, 1523 struct netmap_adapter *na); 1524 void netmap_mitigation_start(struct nm_generic_mit *mit); 1525 void netmap_mitigation_restart(struct nm_generic_mit *mit); 1526 int netmap_mitigation_active(struct nm_generic_mit *mit); 1527 void netmap_mitigation_cleanup(struct nm_generic_mit *mit); 1528 #endif /* WITH_GENERIC */ 1529 1530 1531 1532 /* Shared declarations for the VALE switch. */ 1533 1534 /* 1535 * Each transmit queue accumulates a batch of packets into 1536 * a structure before forwarding. Packets to the same 1537 * destination are put in a list using ft_next as a link field. 1538 * ft_frags and ft_next are valid only on the first fragment. 1539 */ 1540 struct nm_bdg_fwd { /* forwarding entry for a bridge */ 1541 void *ft_buf; /* netmap or indirect buffer */ 1542 uint8_t ft_frags; /* how many fragments (only on 1st frag) */ 1543 uint8_t _ft_port; /* dst port (unused) */ 1544 uint16_t ft_flags; /* flags, e.g. indirect */ 1545 uint16_t ft_len; /* src fragment len */ 1546 uint16_t ft_next; /* next packet to same destination */ 1547 }; 1548 1549 /* struct 'virtio_net_hdr' from linux. */ 1550 struct nm_vnet_hdr { 1551 #define VIRTIO_NET_HDR_F_NEEDS_CSUM 1 /* Use csum_start, csum_offset */ 1552 #define VIRTIO_NET_HDR_F_DATA_VALID 2 /* Csum is valid */ 1553 uint8_t flags; 1554 #define VIRTIO_NET_HDR_GSO_NONE 0 /* Not a GSO frame */ 1555 #define VIRTIO_NET_HDR_GSO_TCPV4 1 /* GSO frame, IPv4 TCP (TSO) */ 1556 #define VIRTIO_NET_HDR_GSO_UDP 3 /* GSO frame, IPv4 UDP (UFO) */ 1557 #define VIRTIO_NET_HDR_GSO_TCPV6 4 /* GSO frame, IPv6 TCP */ 1558 #define VIRTIO_NET_HDR_GSO_ECN 0x80 /* TCP has ECN set */ 1559 uint8_t gso_type; 1560 uint16_t hdr_len; 1561 uint16_t gso_size; 1562 uint16_t csum_start; 1563 uint16_t csum_offset; 1564 }; 1565 1566 #define WORST_CASE_GSO_HEADER (14+40+60) /* IPv6 + TCP */ 1567 1568 /* Private definitions for IPv4, IPv6, UDP and TCP headers. */ 1569 1570 struct nm_iphdr { 1571 uint8_t version_ihl; 1572 uint8_t tos; 1573 uint16_t tot_len; 1574 uint16_t id; 1575 uint16_t frag_off; 1576 uint8_t ttl; 1577 uint8_t protocol; 1578 uint16_t check; 1579 uint32_t saddr; 1580 uint32_t daddr; 1581 /*The options start here. */ 1582 }; 1583 1584 struct nm_tcphdr { 1585 uint16_t source; 1586 uint16_t dest; 1587 uint32_t seq; 1588 uint32_t ack_seq; 1589 uint8_t doff; /* Data offset + Reserved */ 1590 uint8_t flags; 1591 uint16_t window; 1592 uint16_t check; 1593 uint16_t urg_ptr; 1594 }; 1595 1596 struct nm_udphdr { 1597 uint16_t source; 1598 uint16_t dest; 1599 uint16_t len; 1600 uint16_t check; 1601 }; 1602 1603 struct nm_ipv6hdr { 1604 uint8_t priority_version; 1605 uint8_t flow_lbl[3]; 1606 1607 uint16_t payload_len; 1608 uint8_t nexthdr; 1609 uint8_t hop_limit; 1610 1611 uint8_t saddr[16]; 1612 uint8_t daddr[16]; 1613 }; 1614 1615 /* Type used to store a checksum (in host byte order) that hasn't been 1616 * folded yet. 1617 */ 1618 #define rawsum_t uint32_t 1619 1620 rawsum_t nm_csum_raw(uint8_t *data, size_t len, rawsum_t cur_sum); 1621 uint16_t nm_csum_ipv4(struct nm_iphdr *iph); 1622 void nm_csum_tcpudp_ipv4(struct nm_iphdr *iph, void *data, 1623 size_t datalen, uint16_t *check); 1624 void nm_csum_tcpudp_ipv6(struct nm_ipv6hdr *ip6h, void *data, 1625 size_t datalen, uint16_t *check); 1626 uint16_t nm_csum_fold(rawsum_t cur_sum); 1627 1628 void bdg_mismatch_datapath(struct netmap_vp_adapter *na, 1629 struct netmap_vp_adapter *dst_na, 1630 struct nm_bdg_fwd *ft_p, struct netmap_ring *ring, 1631 u_int *j, u_int lim, u_int *howmany); 1632 1633 /* persistent virtual port routines */ 1634 int nm_vi_persist(const char *, struct ifnet **); 1635 void nm_vi_detach(struct ifnet *); 1636 void nm_vi_init_index(void); 1637 1638 #endif /* _NET_NETMAP_KERN_H_ */ 1639