1 /* 2 * Copyright (C) 2011-2014 Matteo Landi, Luigi Rizzo. All rights reserved. 3 * Copyright (C) 2013-2014 Universita` di Pisa. All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * 2. Redistributions in binary form must reproduce the above copyright 11 * notice, this list of conditions and the following disclaimer in the 12 * documentation and/or other materials provided with the distribution. 13 * 14 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 15 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 17 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 22 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 23 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 24 * SUCH DAMAGE. 25 */ 26 27 /* 28 * $FreeBSD$ 29 * 30 * The header contains the definitions of constants and function 31 * prototypes used only in kernelspace. 32 */ 33 34 #ifndef _NET_NETMAP_KERN_H_ 35 #define _NET_NETMAP_KERN_H_ 36 37 #define WITH_VALE // comment out to disable VALE support 38 #define WITH_PIPES 39 #define WITH_MONITOR 40 41 #if defined(__FreeBSD__) 42 43 #define likely(x) __builtin_expect((long)!!(x), 1L) 44 #define unlikely(x) __builtin_expect((long)!!(x), 0L) 45 46 #define NM_LOCK_T struct mtx 47 #define NMG_LOCK_T struct mtx 48 #define NMG_LOCK_INIT() mtx_init(&netmap_global_lock, \ 49 "netmap global lock", NULL, MTX_DEF) 50 #define NMG_LOCK_DESTROY() mtx_destroy(&netmap_global_lock) 51 #define NMG_LOCK() mtx_lock(&netmap_global_lock) 52 #define NMG_UNLOCK() mtx_unlock(&netmap_global_lock) 53 #define NMG_LOCK_ASSERT() mtx_assert(&netmap_global_lock, MA_OWNED) 54 55 #define NM_SELINFO_T struct selinfo 56 #define MBUF_LEN(m) ((m)->m_pkthdr.len) 57 #define MBUF_IFP(m) ((m)->m_pkthdr.rcvif) 58 #define NM_SEND_UP(ifp, m) ((NA(ifp))->if_input)(ifp, m) 59 60 #define NM_ATOMIC_T volatile int // XXX ? 61 /* atomic operations */ 62 #include <machine/atomic.h> 63 #define NM_ATOMIC_TEST_AND_SET(p) (!atomic_cmpset_acq_int((p), 0, 1)) 64 #define NM_ATOMIC_CLEAR(p) atomic_store_rel_int((p), 0) 65 66 #if __FreeBSD_version >= 1100005 67 struct netmap_adapter *netmap_getna(if_t ifp); 68 #endif 69 70 #if __FreeBSD_version >= 1100027 71 #define GET_MBUF_REFCNT(m) ((m)->m_ext.ext_cnt ? *((m)->m_ext.ext_cnt) : -1) 72 #define SET_MBUF_REFCNT(m, x) *((m)->m_ext.ext_cnt) = x 73 #define PNT_MBUF_REFCNT(m) ((m)->m_ext.ext_cnt) 74 #else 75 #define GET_MBUF_REFCNT(m) ((m)->m_ext.ref_cnt ? *((m)->m_ext.ref_cnt) : -1) 76 #define SET_MBUF_REFCNT(m, x) *((m)->m_ext.ref_cnt) = x 77 #define PNT_MBUF_REFCNT(m) ((m)->m_ext.ref_cnt) 78 #endif 79 80 MALLOC_DECLARE(M_NETMAP); 81 82 // XXX linux struct, not used in FreeBSD 83 struct net_device_ops { 84 }; 85 struct ethtool_ops { 86 }; 87 struct hrtimer { 88 }; 89 90 #elif defined (linux) 91 92 #define NM_LOCK_T safe_spinlock_t // see bsd_glue.h 93 #define NM_SELINFO_T wait_queue_head_t 94 #define MBUF_LEN(m) ((m)->len) 95 #define MBUF_IFP(m) ((m)->dev) 96 #define NM_SEND_UP(ifp, m) \ 97 do { \ 98 m->priority = NM_MAGIC_PRIORITY_RX; \ 99 netif_rx(m); \ 100 } while (0) 101 102 #define NM_ATOMIC_T volatile long unsigned int 103 104 // XXX a mtx would suffice here too 20130404 gl 105 #define NMG_LOCK_T struct semaphore 106 #define NMG_LOCK_INIT() sema_init(&netmap_global_lock, 1) 107 #define NMG_LOCK_DESTROY() 108 #define NMG_LOCK() down(&netmap_global_lock) 109 #define NMG_UNLOCK() up(&netmap_global_lock) 110 #define NMG_LOCK_ASSERT() // XXX to be completed 111 112 #ifndef DEV_NETMAP 113 #define DEV_NETMAP 114 #endif /* DEV_NETMAP */ 115 116 #elif defined (__APPLE__) 117 118 #warning apple support is incomplete. 119 #define likely(x) __builtin_expect(!!(x), 1) 120 #define unlikely(x) __builtin_expect(!!(x), 0) 121 #define NM_LOCK_T IOLock * 122 #define NM_SELINFO_T struct selinfo 123 #define MBUF_LEN(m) ((m)->m_pkthdr.len) 124 #define NM_SEND_UP(ifp, m) ((ifp)->if_input)(ifp, m) 125 126 #else 127 128 #error unsupported platform 129 130 #endif /* end - platform-specific code */ 131 132 #define ND(format, ...) 133 #define D(format, ...) \ 134 do { \ 135 struct timeval __xxts; \ 136 microtime(&__xxts); \ 137 printf("%03d.%06d [%4d] %-25s " format "\n", \ 138 (int)__xxts.tv_sec % 1000, (int)__xxts.tv_usec, \ 139 __LINE__, __FUNCTION__, ##__VA_ARGS__); \ 140 } while (0) 141 142 /* rate limited, lps indicates how many per second */ 143 #define RD(lps, format, ...) \ 144 do { \ 145 static int t0, __cnt; \ 146 if (t0 != time_second) { \ 147 t0 = time_second; \ 148 __cnt = 0; \ 149 } \ 150 if (__cnt++ < lps) \ 151 D(format, ##__VA_ARGS__); \ 152 } while (0) 153 154 struct netmap_adapter; 155 struct nm_bdg_fwd; 156 struct nm_bridge; 157 struct netmap_priv_d; 158 159 const char *nm_dump_buf(char *p, int len, int lim, char *dst); 160 161 #include "netmap_mbq.h" 162 163 extern NMG_LOCK_T netmap_global_lock; 164 165 /* 166 * private, kernel view of a ring. Keeps track of the status of 167 * a ring across system calls. 168 * 169 * nr_hwcur index of the next buffer to refill. 170 * It corresponds to ring->head 171 * at the time the system call returns. 172 * 173 * nr_hwtail index of the first buffer owned by the kernel. 174 * On RX, hwcur->hwtail are receive buffers 175 * not yet released. hwcur is advanced following 176 * ring->head, hwtail is advanced on incoming packets, 177 * and a wakeup is generated when hwtail passes ring->cur 178 * On TX, hwcur->rcur have been filled by the sender 179 * but not sent yet to the NIC; rcur->hwtail are available 180 * for new transmissions, and hwtail->hwcur-1 are pending 181 * transmissions not yet acknowledged. 182 * 183 * The indexes in the NIC and netmap rings are offset by nkr_hwofs slots. 184 * This is so that, on a reset, buffers owned by userspace are not 185 * modified by the kernel. In particular: 186 * RX rings: the next empty buffer (hwtail + hwofs) coincides with 187 * the next empty buffer as known by the hardware (next_to_check or so). 188 * TX rings: hwcur + hwofs coincides with next_to_send 189 * 190 * For received packets, slot->flags is set to nkr_slot_flags 191 * so we can provide a proper initial value (e.g. set NS_FORWARD 192 * when operating in 'transparent' mode). 193 * 194 * The following fields are used to implement lock-free copy of packets 195 * from input to output ports in VALE switch: 196 * nkr_hwlease buffer after the last one being copied. 197 * A writer in nm_bdg_flush reserves N buffers 198 * from nr_hwlease, advances it, then does the 199 * copy outside the lock. 200 * In RX rings (used for VALE ports), 201 * nkr_hwtail <= nkr_hwlease < nkr_hwcur+N-1 202 * In TX rings (used for NIC or host stack ports) 203 * nkr_hwcur <= nkr_hwlease < nkr_hwtail 204 * nkr_leases array of nkr_num_slots where writers can report 205 * completion of their block. NR_NOSLOT (~0) indicates 206 * that the writer has not finished yet 207 * nkr_lease_idx index of next free slot in nr_leases, to be assigned 208 * 209 * The kring is manipulated by txsync/rxsync and generic netmap function. 210 * 211 * Concurrent rxsync or txsync on the same ring are prevented through 212 * by nm_kr_(try)lock() which in turn uses nr_busy. This is all we need 213 * for NIC rings, and for TX rings attached to the host stack. 214 * 215 * RX rings attached to the host stack use an mbq (rx_queue) on both 216 * rxsync_from_host() and netmap_transmit(). The mbq is protected 217 * by its internal lock. 218 * 219 * RX rings attached to the VALE switch are accessed by both senders 220 * and receiver. They are protected through the q_lock on the RX ring. 221 */ 222 struct netmap_kring { 223 struct netmap_ring *ring; 224 225 uint32_t nr_hwcur; 226 uint32_t nr_hwtail; 227 228 /* 229 * Copies of values in user rings, so we do not need to look 230 * at the ring (which could be modified). These are set in the 231 * *sync_prologue()/finalize() routines. 232 */ 233 uint32_t rhead; 234 uint32_t rcur; 235 uint32_t rtail; 236 237 uint32_t nr_kflags; /* private driver flags */ 238 #define NKR_PENDINTR 0x1 // Pending interrupt. 239 uint32_t nkr_num_slots; 240 241 /* 242 * On a NIC reset, the NIC ring indexes may be reset but the 243 * indexes in the netmap rings remain the same. nkr_hwofs 244 * keeps track of the offset between the two. 245 */ 246 int32_t nkr_hwofs; 247 248 uint16_t nkr_slot_flags; /* initial value for flags */ 249 250 /* last_reclaim is opaque marker to help reduce the frequency 251 * of operations such as reclaiming tx buffers. A possible use 252 * is set it to ticks and do the reclaim only once per tick. 253 */ 254 uint64_t last_reclaim; 255 256 257 NM_SELINFO_T si; /* poll/select wait queue */ 258 NM_LOCK_T q_lock; /* protects kring and ring. */ 259 NM_ATOMIC_T nr_busy; /* prevent concurrent syscalls */ 260 261 struct netmap_adapter *na; 262 263 /* The folloiwing fields are for VALE switch support */ 264 struct nm_bdg_fwd *nkr_ft; 265 uint32_t *nkr_leases; 266 #define NR_NOSLOT ((uint32_t)~0) /* used in nkr_*lease* */ 267 uint32_t nkr_hwlease; 268 uint32_t nkr_lease_idx; 269 270 /* while nkr_stopped is set, no new [tr]xsync operations can 271 * be started on this kring. 272 * This is used by netmap_disable_all_rings() 273 * to find a synchronization point where critical data 274 * structures pointed to by the kring can be added or removed 275 */ 276 volatile int nkr_stopped; 277 278 /* Support for adapters without native netmap support. 279 * On tx rings we preallocate an array of tx buffers 280 * (same size as the netmap ring), on rx rings we 281 * store incoming mbufs in a queue that is drained by 282 * a rxsync. 283 */ 284 struct mbuf **tx_pool; 285 // u_int nr_ntc; /* Emulation of a next-to-clean RX ring pointer. */ 286 struct mbq rx_queue; /* intercepted rx mbufs. */ 287 288 uint32_t ring_id; /* debugging */ 289 char name[64]; /* diagnostic */ 290 291 /* [tx]sync callback for this kring. 292 * The default nm_kring_create callback (netmap_krings_create) 293 * sets the nm_sync callback of each hardware tx(rx) kring to 294 * the corresponding nm_txsync(nm_rxsync) taken from the 295 * netmap_adapter; moreover, it sets the sync callback 296 * of the host tx(rx) ring to netmap_txsync_to_host 297 * (netmap_rxsync_from_host). 298 * 299 * Overrides: the above configuration is not changed by 300 * any of the nm_krings_create callbacks. 301 */ 302 int (*nm_sync)(struct netmap_kring *kring, int flags); 303 304 #ifdef WITH_PIPES 305 struct netmap_kring *pipe; /* if this is a pipe ring, 306 * pointer to the other end 307 */ 308 struct netmap_ring *save_ring; /* pointer to hidden rings 309 * (see netmap_pipe.c for details) 310 */ 311 #endif /* WITH_PIPES */ 312 313 #ifdef WITH_MONITOR 314 /* pointer to the adapter that is monitoring this kring (if any) 315 */ 316 struct netmap_monitor_adapter *monitor; 317 /* 318 * Monitors work by intercepting the txsync and/or rxsync of the 319 * monitored krings. This is implemented by replacing 320 * the nm_sync pointer above and saving the previous 321 * one in save_sync below. 322 */ 323 int (*save_sync)(struct netmap_kring *kring, int flags); 324 #endif 325 } __attribute__((__aligned__(64))); 326 327 328 /* return the next index, with wraparound */ 329 static inline uint32_t 330 nm_next(uint32_t i, uint32_t lim) 331 { 332 return unlikely (i == lim) ? 0 : i + 1; 333 } 334 335 336 /* return the previous index, with wraparound */ 337 static inline uint32_t 338 nm_prev(uint32_t i, uint32_t lim) 339 { 340 return unlikely (i == 0) ? lim : i - 1; 341 } 342 343 344 /* 345 * 346 * Here is the layout for the Rx and Tx rings. 347 348 RxRING TxRING 349 350 +-----------------+ +-----------------+ 351 | | | | 352 |XXX free slot XXX| |XXX free slot XXX| 353 +-----------------+ +-----------------+ 354 head->| owned by user |<-hwcur | not sent to nic |<-hwcur 355 | | | yet | 356 +-----------------+ | | 357 cur->| available to | | | 358 | user, not read | +-----------------+ 359 | yet | cur->| (being | 360 | | | prepared) | 361 | | | | 362 +-----------------+ + ------ + 363 tail->| |<-hwtail | |<-hwlease 364 | (being | ... | | ... 365 | prepared) | ... | | ... 366 +-----------------+ ... | | ... 367 | |<-hwlease +-----------------+ 368 | | tail->| |<-hwtail 369 | | | | 370 | | | | 371 | | | | 372 +-----------------+ +-----------------+ 373 374 * The cur/tail (user view) and hwcur/hwtail (kernel view) 375 * are used in the normal operation of the card. 376 * 377 * When a ring is the output of a switch port (Rx ring for 378 * a VALE port, Tx ring for the host stack or NIC), slots 379 * are reserved in blocks through 'hwlease' which points 380 * to the next unused slot. 381 * On an Rx ring, hwlease is always after hwtail, 382 * and completions cause hwtail to advance. 383 * On a Tx ring, hwlease is always between cur and hwtail, 384 * and completions cause cur to advance. 385 * 386 * nm_kr_space() returns the maximum number of slots that 387 * can be assigned. 388 * nm_kr_lease() reserves the required number of buffers, 389 * advances nkr_hwlease and also returns an entry in 390 * a circular array where completions should be reported. 391 */ 392 393 394 395 enum txrx { NR_RX = 0, NR_TX = 1 }; 396 397 struct netmap_vp_adapter; // forward 398 399 /* 400 * The "struct netmap_adapter" extends the "struct adapter" 401 * (or equivalent) device descriptor. 402 * It contains all base fields needed to support netmap operation. 403 * There are in fact different types of netmap adapters 404 * (native, generic, VALE switch...) so a netmap_adapter is 405 * just the first field in the derived type. 406 */ 407 struct netmap_adapter { 408 /* 409 * On linux we do not have a good way to tell if an interface 410 * is netmap-capable. So we always use the following trick: 411 * NA(ifp) points here, and the first entry (which hopefully 412 * always exists and is at least 32 bits) contains a magic 413 * value which we can use to detect that the interface is good. 414 */ 415 uint32_t magic; 416 uint32_t na_flags; /* enabled, and other flags */ 417 #define NAF_SKIP_INTR 1 /* use the regular interrupt handler. 418 * useful during initialization 419 */ 420 #define NAF_SW_ONLY 2 /* forward packets only to sw adapter */ 421 #define NAF_BDG_MAYSLEEP 4 /* the bridge is allowed to sleep when 422 * forwarding packets coming from this 423 * interface 424 */ 425 #define NAF_MEM_OWNER 8 /* the adapter is responsible for the 426 * deallocation of the memory allocator 427 */ 428 #define NAF_NATIVE_ON 16 /* the adapter is native and the attached 429 * interface is in netmap mode. 430 * Virtual ports (vale, pipe, monitor...) 431 * should never use this flag. 432 */ 433 #define NAF_NETMAP_ON 32 /* netmap is active (either native or 434 * emulated). Where possible (e.g. FreeBSD) 435 * IFCAP_NETMAP also mirrors this flag. 436 */ 437 #define NAF_HOST_RINGS 64 /* the adapter supports the host rings */ 438 #define NAF_FORCE_NATIVE 128 /* the adapter is always NATIVE */ 439 #define NAF_BUSY (1U<<31) /* the adapter is used internally and 440 * cannot be registered from userspace 441 */ 442 int active_fds; /* number of user-space descriptors using this 443 interface, which is equal to the number of 444 struct netmap_if objs in the mapped region. */ 445 446 u_int num_rx_rings; /* number of adapter receive rings */ 447 u_int num_tx_rings; /* number of adapter transmit rings */ 448 449 u_int num_tx_desc; /* number of descriptor in each queue */ 450 u_int num_rx_desc; 451 452 /* tx_rings and rx_rings are private but allocated 453 * as a contiguous chunk of memory. Each array has 454 * N+1 entries, for the adapter queues and for the host queue. 455 */ 456 struct netmap_kring *tx_rings; /* array of TX rings. */ 457 struct netmap_kring *rx_rings; /* array of RX rings. */ 458 459 void *tailroom; /* space below the rings array */ 460 /* (used for leases) */ 461 462 463 NM_SELINFO_T tx_si, rx_si; /* global wait queues */ 464 465 /* count users of the global wait queues */ 466 int tx_si_users, rx_si_users; 467 468 void *pdev; /* used to store pci device */ 469 470 /* copy of if_qflush and if_transmit pointers, to intercept 471 * packets from the network stack when netmap is active. 472 */ 473 int (*if_transmit)(struct ifnet *, struct mbuf *); 474 475 /* copy of if_input for netmap_send_up() */ 476 void (*if_input)(struct ifnet *, struct mbuf *); 477 478 /* references to the ifnet and device routines, used by 479 * the generic netmap functions. 480 */ 481 struct ifnet *ifp; /* adapter is ifp->if_softc */ 482 483 /*---- callbacks for this netmap adapter -----*/ 484 /* 485 * nm_dtor() is the cleanup routine called when destroying 486 * the adapter. 487 * Called with NMG_LOCK held. 488 * 489 * nm_register() is called on NIOCREGIF and close() to enter 490 * or exit netmap mode on the NIC 491 * Called with NNG_LOCK held. 492 * 493 * nm_txsync() pushes packets to the underlying hw/switch 494 * 495 * nm_rxsync() collects packets from the underlying hw/switch 496 * 497 * nm_config() returns configuration information from the OS 498 * Called with NMG_LOCK held. 499 * 500 * nm_krings_create() create and init the tx_rings and 501 * rx_rings arrays of kring structures. In particular, 502 * set the nm_sync callbacks for each ring. 503 * There is no need to also allocate the corresponding 504 * netmap_rings, since netmap_mem_rings_create() will always 505 * be called to provide the missing ones. 506 * Called with NNG_LOCK held. 507 * 508 * nm_krings_delete() cleanup and delete the tx_rings and rx_rings 509 * arrays 510 * Called with NMG_LOCK held. 511 * 512 * nm_notify() is used to act after data have become available 513 * (or the stopped state of the ring has changed) 514 * For hw devices this is typically a selwakeup(), 515 * but for NIC/host ports attached to a switch (or vice-versa) 516 * we also need to invoke the 'txsync' code downstream. 517 */ 518 void (*nm_dtor)(struct netmap_adapter *); 519 520 int (*nm_register)(struct netmap_adapter *, int onoff); 521 522 int (*nm_txsync)(struct netmap_kring *kring, int flags); 523 int (*nm_rxsync)(struct netmap_kring *kring, int flags); 524 #define NAF_FORCE_READ 1 525 #define NAF_FORCE_RECLAIM 2 526 /* return configuration information */ 527 int (*nm_config)(struct netmap_adapter *, 528 u_int *txr, u_int *txd, u_int *rxr, u_int *rxd); 529 int (*nm_krings_create)(struct netmap_adapter *); 530 void (*nm_krings_delete)(struct netmap_adapter *); 531 int (*nm_notify)(struct netmap_adapter *, 532 u_int ring, enum txrx, int flags); 533 #define NAF_DISABLE_NOTIFY 8 /* notify that the stopped state of the 534 * ring has changed (kring->nkr_stopped) 535 */ 536 537 #ifdef WITH_VALE 538 /* 539 * nm_bdg_attach() initializes the na_vp field to point 540 * to an adapter that can be attached to a VALE switch. If the 541 * current adapter is already a VALE port, na_vp is simply a cast; 542 * otherwise, na_vp points to a netmap_bwrap_adapter. 543 * If applicable, this callback also initializes na_hostvp, 544 * that can be used to connect the adapter host rings to the 545 * switch. 546 * Called with NMG_LOCK held. 547 * 548 * nm_bdg_ctl() is called on the actual attach/detach to/from 549 * to/from the switch, to perform adapter-specific 550 * initializations 551 * Called with NMG_LOCK held. 552 */ 553 int (*nm_bdg_attach)(const char *bdg_name, struct netmap_adapter *); 554 int (*nm_bdg_ctl)(struct netmap_adapter *, struct nmreq *, int); 555 556 /* adapter used to attach this adapter to a VALE switch (if any) */ 557 struct netmap_vp_adapter *na_vp; 558 /* adapter used to attach the host rings of this adapter 559 * to a VALE switch (if any) */ 560 struct netmap_vp_adapter *na_hostvp; 561 #endif 562 563 /* standard refcount to control the lifetime of the adapter 564 * (it should be equal to the lifetime of the corresponding ifp) 565 */ 566 int na_refcount; 567 568 /* memory allocator (opaque) 569 * We also cache a pointer to the lut_entry for translating 570 * buffer addresses, and the total number of buffers. 571 */ 572 struct netmap_mem_d *nm_mem; 573 struct lut_entry *na_lut; 574 uint32_t na_lut_objtotal; /* max buffer index */ 575 uint32_t na_lut_objsize; /* buffer size */ 576 577 /* additional information attached to this adapter 578 * by other netmap subsystems. Currently used by 579 * bwrap and LINUX/v1000. 580 */ 581 void *na_private; 582 583 #ifdef WITH_PIPES 584 /* array of pipes that have this adapter as a parent */ 585 struct netmap_pipe_adapter **na_pipes; 586 int na_next_pipe; /* next free slot in the array */ 587 int na_max_pipes; /* size of the array */ 588 #endif /* WITH_PIPES */ 589 590 char name[64]; 591 }; 592 593 594 /* 595 * If the NIC is owned by the kernel 596 * (i.e., bridge), neither another bridge nor user can use it; 597 * if the NIC is owned by a user, only users can share it. 598 * Evaluation must be done under NMG_LOCK(). 599 */ 600 #define NETMAP_OWNED_BY_KERN(na) ((na)->na_flags & NAF_BUSY) 601 #define NETMAP_OWNED_BY_ANY(na) \ 602 (NETMAP_OWNED_BY_KERN(na) || ((na)->active_fds > 0)) 603 604 605 /* 606 * derived netmap adapters for various types of ports 607 */ 608 struct netmap_vp_adapter { /* VALE software port */ 609 struct netmap_adapter up; 610 611 /* 612 * Bridge support: 613 * 614 * bdg_port is the port number used in the bridge; 615 * na_bdg points to the bridge this NA is attached to. 616 */ 617 int bdg_port; 618 struct nm_bridge *na_bdg; 619 int retry; 620 621 /* Offset of ethernet header for each packet. */ 622 u_int virt_hdr_len; 623 /* Maximum Frame Size, used in bdg_mismatch_datapath() */ 624 u_int mfs; 625 }; 626 627 628 struct netmap_hw_adapter { /* physical device */ 629 struct netmap_adapter up; 630 631 struct net_device_ops nm_ndo; // XXX linux only 632 struct ethtool_ops nm_eto; // XXX linux only 633 const struct ethtool_ops* save_ethtool; 634 635 int (*nm_hw_register)(struct netmap_adapter *, int onoff); 636 }; 637 638 /* Mitigation support. */ 639 struct nm_generic_mit { 640 struct hrtimer mit_timer; 641 int mit_pending; 642 int mit_ring_idx; /* index of the ring being mitigated */ 643 struct netmap_adapter *mit_na; /* backpointer */ 644 }; 645 646 struct netmap_generic_adapter { /* emulated device */ 647 struct netmap_hw_adapter up; 648 649 /* Pointer to a previously used netmap adapter. */ 650 struct netmap_adapter *prev; 651 652 /* generic netmap adapters support: 653 * a net_device_ops struct overrides ndo_select_queue(), 654 * save_if_input saves the if_input hook (FreeBSD), 655 * mit implements rx interrupt mitigation, 656 */ 657 struct net_device_ops generic_ndo; 658 void (*save_if_input)(struct ifnet *, struct mbuf *); 659 660 struct nm_generic_mit *mit; 661 #ifdef linux 662 netdev_tx_t (*save_start_xmit)(struct mbuf *, struct ifnet *); 663 #endif 664 }; 665 666 static __inline int 667 netmap_real_tx_rings(struct netmap_adapter *na) 668 { 669 return na->num_tx_rings + !!(na->na_flags & NAF_HOST_RINGS); 670 } 671 672 static __inline int 673 netmap_real_rx_rings(struct netmap_adapter *na) 674 { 675 return na->num_rx_rings + !!(na->na_flags & NAF_HOST_RINGS); 676 } 677 678 #ifdef WITH_VALE 679 680 /* 681 * Bridge wrapper for non VALE ports attached to a VALE switch. 682 * 683 * The real device must already have its own netmap adapter (hwna). 684 * The bridge wrapper and the hwna adapter share the same set of 685 * netmap rings and buffers, but they have two separate sets of 686 * krings descriptors, with tx/rx meanings swapped: 687 * 688 * netmap 689 * bwrap krings rings krings hwna 690 * +------+ +------+ +-----+ +------+ +------+ 691 * |tx_rings->| |\ /| |----| |<-tx_rings| 692 * | | +------+ \ / +-----+ +------+ | | 693 * | | X | | 694 * | | / \ | | 695 * | | +------+/ \+-----+ +------+ | | 696 * |rx_rings->| | | |----| |<-rx_rings| 697 * | | +------+ +-----+ +------+ | | 698 * +------+ +------+ 699 * 700 * - packets coming from the bridge go to the brwap rx rings, 701 * which are also the hwna tx rings. The bwrap notify callback 702 * will then complete the hwna tx (see netmap_bwrap_notify). 703 * 704 * - packets coming from the outside go to the hwna rx rings, 705 * which are also the bwrap tx rings. The (overwritten) hwna 706 * notify method will then complete the bridge tx 707 * (see netmap_bwrap_intr_notify). 708 * 709 * The bridge wrapper may optionally connect the hwna 'host' rings 710 * to the bridge. This is done by using a second port in the 711 * bridge and connecting it to the 'host' netmap_vp_adapter 712 * contained in the netmap_bwrap_adapter. The brwap host adapter 713 * cross-links the hwna host rings in the same way as shown above. 714 * 715 * - packets coming from the bridge and directed to the host stack 716 * are handled by the bwrap host notify callback 717 * (see netmap_bwrap_host_notify) 718 * 719 * - packets coming from the host stack are still handled by the 720 * overwritten hwna notify callback (netmap_bwrap_intr_notify), 721 * but are diverted to the host adapter depending on the ring number. 722 * 723 */ 724 struct netmap_bwrap_adapter { 725 struct netmap_vp_adapter up; 726 struct netmap_vp_adapter host; /* for host rings */ 727 struct netmap_adapter *hwna; /* the underlying device */ 728 729 /* backup of the hwna notify callback */ 730 int (*save_notify)(struct netmap_adapter *, 731 u_int ring, enum txrx, int flags); 732 /* backup of the hwna memory allocator */ 733 struct netmap_mem_d *save_nmd; 734 735 /* 736 * When we attach a physical interface to the bridge, we 737 * allow the controlling process to terminate, so we need 738 * a place to store the n_detmap_priv_d data structure. 739 * This is only done when physical interfaces 740 * are attached to a bridge. 741 */ 742 struct netmap_priv_d *na_kpriv; 743 }; 744 int netmap_bwrap_attach(const char *name, struct netmap_adapter *); 745 746 747 #endif /* WITH_VALE */ 748 749 #ifdef WITH_PIPES 750 751 #define NM_MAXPIPES 64 /* max number of pipes per adapter */ 752 753 struct netmap_pipe_adapter { 754 struct netmap_adapter up; 755 756 u_int id; /* pipe identifier */ 757 int role; /* either NR_REG_PIPE_MASTER or NR_REG_PIPE_SLAVE */ 758 759 struct netmap_adapter *parent; /* adapter that owns the memory */ 760 struct netmap_pipe_adapter *peer; /* the other end of the pipe */ 761 int peer_ref; /* 1 iff we are holding a ref to the peer */ 762 763 u_int parent_slot; /* index in the parent pipe array */ 764 }; 765 766 #endif /* WITH_PIPES */ 767 768 769 /* return slots reserved to rx clients; used in drivers */ 770 static inline uint32_t 771 nm_kr_rxspace(struct netmap_kring *k) 772 { 773 int space = k->nr_hwtail - k->nr_hwcur; 774 if (space < 0) 775 space += k->nkr_num_slots; 776 ND("preserving %d rx slots %d -> %d", space, k->nr_hwcur, k->nr_hwtail); 777 778 return space; 779 } 780 781 782 /* True if no space in the tx ring. only valid after txsync_prologue */ 783 static inline int 784 nm_kr_txempty(struct netmap_kring *kring) 785 { 786 return kring->rcur == kring->nr_hwtail; 787 } 788 789 790 /* 791 * protect against multiple threads using the same ring. 792 * also check that the ring has not been stopped. 793 * We only care for 0 or !=0 as a return code. 794 */ 795 #define NM_KR_BUSY 1 796 #define NM_KR_STOPPED 2 797 798 799 static __inline void nm_kr_put(struct netmap_kring *kr) 800 { 801 NM_ATOMIC_CLEAR(&kr->nr_busy); 802 } 803 804 805 static __inline int nm_kr_tryget(struct netmap_kring *kr) 806 { 807 /* check a first time without taking the lock 808 * to avoid starvation for nm_kr_get() 809 */ 810 if (unlikely(kr->nkr_stopped)) { 811 ND("ring %p stopped (%d)", kr, kr->nkr_stopped); 812 return NM_KR_STOPPED; 813 } 814 if (unlikely(NM_ATOMIC_TEST_AND_SET(&kr->nr_busy))) 815 return NM_KR_BUSY; 816 /* check a second time with lock held */ 817 if (unlikely(kr->nkr_stopped)) { 818 ND("ring %p stopped (%d)", kr, kr->nkr_stopped); 819 nm_kr_put(kr); 820 return NM_KR_STOPPED; 821 } 822 return 0; 823 } 824 825 826 /* 827 * The following functions are used by individual drivers to 828 * support netmap operation. 829 * 830 * netmap_attach() initializes a struct netmap_adapter, allocating the 831 * struct netmap_ring's and the struct selinfo. 832 * 833 * netmap_detach() frees the memory allocated by netmap_attach(). 834 * 835 * netmap_transmit() replaces the if_transmit routine of the interface, 836 * and is used to intercept packets coming from the stack. 837 * 838 * netmap_load_map/netmap_reload_map are helper routines to set/reset 839 * the dmamap for a packet buffer 840 * 841 * netmap_reset() is a helper routine to be called in the hw driver 842 * when reinitializing a ring. It should not be called by 843 * virtual ports (vale, pipes, monitor) 844 */ 845 int netmap_attach(struct netmap_adapter *); 846 void netmap_detach(struct ifnet *); 847 int netmap_transmit(struct ifnet *, struct mbuf *); 848 struct netmap_slot *netmap_reset(struct netmap_adapter *na, 849 enum txrx tx, u_int n, u_int new_cur); 850 int netmap_ring_reinit(struct netmap_kring *); 851 852 /* default functions to handle rx/tx interrupts */ 853 int netmap_rx_irq(struct ifnet *, u_int, u_int *); 854 #define netmap_tx_irq(_n, _q) netmap_rx_irq(_n, _q, NULL) 855 void netmap_common_irq(struct ifnet *, u_int, u_int *work_done); 856 857 858 #ifdef WITH_VALE 859 /* functions used by external modules to interface with VALE */ 860 #define netmap_vp_to_ifp(_vp) ((_vp)->up.ifp) 861 #define netmap_ifp_to_vp(_ifp) (NA(_ifp)->na_vp) 862 #define netmap_ifp_to_host_vp(_ifp) (NA(_ifp)->na_hostvp) 863 #define netmap_bdg_idx(_vp) ((_vp)->bdg_port) 864 const char *netmap_bdg_name(struct netmap_vp_adapter *); 865 #else /* !WITH_VALE */ 866 #define netmap_vp_to_ifp(_vp) NULL 867 #define netmap_ifp_to_vp(_ifp) NULL 868 #define netmap_ifp_to_host_vp(_ifp) NULL 869 #define netmap_bdg_idx(_vp) -1 870 #define netmap_bdg_name(_vp) NULL 871 #endif /* WITH_VALE */ 872 873 static inline int 874 nm_native_on(struct netmap_adapter *na) 875 { 876 return na && na->na_flags & NAF_NATIVE_ON; 877 } 878 879 static inline int 880 nm_netmap_on(struct netmap_adapter *na) 881 { 882 return na && na->na_flags & NAF_NETMAP_ON; 883 } 884 885 /* set/clear native flags and if_transmit/netdev_ops */ 886 static inline void 887 nm_set_native_flags(struct netmap_adapter *na) 888 { 889 struct ifnet *ifp = na->ifp; 890 891 na->na_flags |= (NAF_NATIVE_ON | NAF_NETMAP_ON); 892 #ifdef IFCAP_NETMAP /* or FreeBSD ? */ 893 ifp->if_capenable |= IFCAP_NETMAP; 894 #endif 895 #ifdef __FreeBSD__ 896 na->if_transmit = ifp->if_transmit; 897 ifp->if_transmit = netmap_transmit; 898 #else 899 na->if_transmit = (void *)ifp->netdev_ops; 900 ifp->netdev_ops = &((struct netmap_hw_adapter *)na)->nm_ndo; 901 ((struct netmap_hw_adapter *)na)->save_ethtool = ifp->ethtool_ops; 902 ifp->ethtool_ops = &((struct netmap_hw_adapter*)na)->nm_eto; 903 #endif 904 } 905 906 907 static inline void 908 nm_clear_native_flags(struct netmap_adapter *na) 909 { 910 struct ifnet *ifp = na->ifp; 911 912 #ifdef __FreeBSD__ 913 ifp->if_transmit = na->if_transmit; 914 #else 915 ifp->netdev_ops = (void *)na->if_transmit; 916 ifp->ethtool_ops = ((struct netmap_hw_adapter*)na)->save_ethtool; 917 #endif 918 na->na_flags &= ~(NAF_NATIVE_ON | NAF_NETMAP_ON); 919 #ifdef IFCAP_NETMAP /* or FreeBSD ? */ 920 ifp->if_capenable &= ~IFCAP_NETMAP; 921 #endif 922 } 923 924 925 /* 926 * validates parameters in the ring/kring, returns a value for head 927 * If any error, returns ring_size to force a reinit. 928 */ 929 uint32_t nm_txsync_prologue(struct netmap_kring *); 930 931 932 /* 933 * validates parameters in the ring/kring, returns a value for head, 934 * and the 'reserved' value in the argument. 935 * If any error, returns ring_size lim to force a reinit. 936 */ 937 uint32_t nm_rxsync_prologue(struct netmap_kring *); 938 939 940 /* 941 * update kring and ring at the end of txsync. 942 */ 943 static inline void 944 nm_txsync_finalize(struct netmap_kring *kring) 945 { 946 /* update ring tail to what the kernel knows */ 947 kring->ring->tail = kring->rtail = kring->nr_hwtail; 948 949 /* note, head/rhead/hwcur might be behind cur/rcur 950 * if no carrier 951 */ 952 ND(5, "%s now hwcur %d hwtail %d head %d cur %d tail %d", 953 kring->name, kring->nr_hwcur, kring->nr_hwtail, 954 kring->rhead, kring->rcur, kring->rtail); 955 } 956 957 958 /* 959 * update kring and ring at the end of rxsync 960 */ 961 static inline void 962 nm_rxsync_finalize(struct netmap_kring *kring) 963 { 964 /* tell userspace that there might be new packets */ 965 //struct netmap_ring *ring = kring->ring; 966 ND("head %d cur %d tail %d -> %d", ring->head, ring->cur, ring->tail, 967 kring->nr_hwtail); 968 kring->ring->tail = kring->rtail = kring->nr_hwtail; 969 /* make a copy of the state for next round */ 970 kring->rhead = kring->ring->head; 971 kring->rcur = kring->ring->cur; 972 } 973 974 975 /* check/fix address and len in tx rings */ 976 #if 1 /* debug version */ 977 #define NM_CHECK_ADDR_LEN(_na, _a, _l) do { \ 978 if (_a == NETMAP_BUF_BASE(_na) || _l > NETMAP_BUF_SIZE(_na)) { \ 979 RD(5, "bad addr/len ring %d slot %d idx %d len %d", \ 980 kring->ring_id, nm_i, slot->buf_idx, len); \ 981 if (_l > NETMAP_BUF_SIZE(_na)) \ 982 _l = NETMAP_BUF_SIZE(_na); \ 983 } } while (0) 984 #else /* no debug version */ 985 #define NM_CHECK_ADDR_LEN(_na, _a, _l) do { \ 986 if (_l > NETMAP_BUF_SIZE(_na)) \ 987 _l = NETMAP_BUF_SIZE(_na); \ 988 } while (0) 989 #endif 990 991 992 /*---------------------------------------------------------------*/ 993 /* 994 * Support routines used by netmap subsystems 995 * (native drivers, VALE, generic, pipes, monitors, ...) 996 */ 997 998 999 /* common routine for all functions that create a netmap adapter. It performs 1000 * two main tasks: 1001 * - if the na points to an ifp, mark the ifp as netmap capable 1002 * using na as its native adapter; 1003 * - provide defaults for the setup callbacks and the memory allocator 1004 */ 1005 int netmap_attach_common(struct netmap_adapter *); 1006 /* common actions to be performed on netmap adapter destruction */ 1007 void netmap_detach_common(struct netmap_adapter *); 1008 /* fill priv->np_[tr]xq{first,last} using the ringid and flags information 1009 * coming from a struct nmreq 1010 */ 1011 int netmap_interp_ringid(struct netmap_priv_d *priv, uint16_t ringid, uint32_t flags); 1012 /* update the ring parameters (number and size of tx and rx rings). 1013 * It calls the nm_config callback, if available. 1014 */ 1015 int netmap_update_config(struct netmap_adapter *na); 1016 /* create and initialize the common fields of the krings array. 1017 * using the information that must be already available in the na. 1018 * tailroom can be used to request the allocation of additional 1019 * tailroom bytes after the krings array. This is used by 1020 * netmap_vp_adapter's (i.e., VALE ports) to make room for 1021 * leasing-related data structures 1022 */ 1023 int netmap_krings_create(struct netmap_adapter *na, u_int tailroom); 1024 /* deletes the kring array of the adapter. The array must have 1025 * been created using netmap_krings_create 1026 */ 1027 void netmap_krings_delete(struct netmap_adapter *na); 1028 1029 /* set the stopped/enabled status of ring 1030 * When stopping, they also wait for all current activity on the ring to 1031 * terminate. The status change is then notified using the na nm_notify 1032 * callback. 1033 */ 1034 void netmap_set_txring(struct netmap_adapter *, u_int ring_id, int stopped); 1035 void netmap_set_rxring(struct netmap_adapter *, u_int ring_id, int stopped); 1036 /* set the stopped/enabled status of all rings of the adapter. */ 1037 void netmap_set_all_rings(struct netmap_adapter *, int stopped); 1038 /* convenience wrappers for netmap_set_all_rings, used in drivers */ 1039 void netmap_disable_all_rings(struct ifnet *); 1040 void netmap_enable_all_rings(struct ifnet *); 1041 1042 int netmap_rxsync_from_host(struct netmap_adapter *na, struct thread *td, void *pwait); 1043 1044 struct netmap_if * 1045 netmap_do_regif(struct netmap_priv_d *priv, struct netmap_adapter *na, 1046 uint16_t ringid, uint32_t flags, int *err); 1047 1048 1049 1050 u_int nm_bound_var(u_int *v, u_int dflt, u_int lo, u_int hi, const char *msg); 1051 int netmap_get_na(struct nmreq *nmr, struct netmap_adapter **na, int create); 1052 int netmap_get_hw_na(struct ifnet *ifp, struct netmap_adapter **na); 1053 1054 1055 #ifdef WITH_VALE 1056 /* 1057 * The following bridge-related functions are used by other 1058 * kernel modules. 1059 * 1060 * VALE only supports unicast or broadcast. The lookup 1061 * function can return 0 .. NM_BDG_MAXPORTS-1 for regular ports, 1062 * NM_BDG_MAXPORTS for broadcast, NM_BDG_MAXPORTS+1 for unknown. 1063 * XXX in practice "unknown" might be handled same as broadcast. 1064 */ 1065 typedef u_int (*bdg_lookup_fn_t)(struct nm_bdg_fwd *ft, uint8_t *ring_nr, 1066 const struct netmap_vp_adapter *); 1067 typedef int (*bdg_config_fn_t)(struct nm_ifreq *); 1068 typedef void (*bdg_dtor_fn_t)(const struct netmap_vp_adapter *); 1069 struct netmap_bdg_ops { 1070 bdg_lookup_fn_t lookup; 1071 bdg_config_fn_t config; 1072 bdg_dtor_fn_t dtor; 1073 }; 1074 1075 u_int netmap_bdg_learning(struct nm_bdg_fwd *ft, uint8_t *dst_ring, 1076 const struct netmap_vp_adapter *); 1077 1078 #define NM_BDG_MAXPORTS 254 /* up to 254 */ 1079 #define NM_BDG_BROADCAST NM_BDG_MAXPORTS 1080 #define NM_BDG_NOPORT (NM_BDG_MAXPORTS+1) 1081 1082 #define NM_NAME "vale" /* prefix for bridge port name */ 1083 1084 /* these are redefined in case of no VALE support */ 1085 int netmap_get_bdg_na(struct nmreq *nmr, struct netmap_adapter **na, int create); 1086 void netmap_init_bridges(void); 1087 int netmap_bdg_ctl(struct nmreq *nmr, struct netmap_bdg_ops *bdg_ops); 1088 int netmap_bdg_config(struct nmreq *nmr); 1089 1090 #else /* !WITH_VALE */ 1091 #define netmap_get_bdg_na(_1, _2, _3) 0 1092 #define netmap_init_bridges(_1) 1093 #define netmap_bdg_ctl(_1, _2) EINVAL 1094 #endif /* !WITH_VALE */ 1095 1096 #ifdef WITH_PIPES 1097 /* max number of pipes per device */ 1098 #define NM_MAXPIPES 64 /* XXX how many? */ 1099 /* in case of no error, returns the actual number of pipes in nmr->nr_arg1 */ 1100 int netmap_pipe_alloc(struct netmap_adapter *, struct nmreq *nmr); 1101 void netmap_pipe_dealloc(struct netmap_adapter *); 1102 int netmap_get_pipe_na(struct nmreq *nmr, struct netmap_adapter **na, int create); 1103 #else /* !WITH_PIPES */ 1104 #define NM_MAXPIPES 0 1105 #define netmap_pipe_alloc(_1, _2) EOPNOTSUPP 1106 #define netmap_pipe_dealloc(_1) 1107 #define netmap_get_pipe_na(_1, _2, _3) 0 1108 #endif 1109 1110 #ifdef WITH_MONITOR 1111 int netmap_get_monitor_na(struct nmreq *nmr, struct netmap_adapter **na, int create); 1112 #else 1113 #define netmap_get_monitor_na(_1, _2, _3) 0 1114 #endif 1115 1116 /* Various prototypes */ 1117 int netmap_poll(struct cdev *dev, int events, struct thread *td); 1118 int netmap_init(void); 1119 void netmap_fini(void); 1120 int netmap_get_memory(struct netmap_priv_d* p); 1121 void netmap_dtor(void *data); 1122 int netmap_dtor_locked(struct netmap_priv_d *priv); 1123 1124 int netmap_ioctl(struct cdev *dev, u_long cmd, caddr_t data, int fflag, struct thread *td); 1125 1126 /* netmap_adapter creation/destruction */ 1127 1128 // #define NM_DEBUG_PUTGET 1 1129 1130 #ifdef NM_DEBUG_PUTGET 1131 1132 #define NM_DBG(f) __##f 1133 1134 void __netmap_adapter_get(struct netmap_adapter *na); 1135 1136 #define netmap_adapter_get(na) \ 1137 do { \ 1138 struct netmap_adapter *__na = na; \ 1139 D("getting %p:%s (%d)", __na, (__na)->name, (__na)->na_refcount); \ 1140 __netmap_adapter_get(__na); \ 1141 } while (0) 1142 1143 int __netmap_adapter_put(struct netmap_adapter *na); 1144 1145 #define netmap_adapter_put(na) \ 1146 ({ \ 1147 struct netmap_adapter *__na = na; \ 1148 D("putting %p:%s (%d)", __na, (__na)->name, (__na)->na_refcount); \ 1149 __netmap_adapter_put(__na); \ 1150 }) 1151 1152 #else /* !NM_DEBUG_PUTGET */ 1153 1154 #define NM_DBG(f) f 1155 void netmap_adapter_get(struct netmap_adapter *na); 1156 int netmap_adapter_put(struct netmap_adapter *na); 1157 1158 #endif /* !NM_DEBUG_PUTGET */ 1159 1160 1161 /* 1162 * module variables 1163 */ 1164 #define NETMAP_BUF_BASE(na) ((na)->na_lut[0].vaddr) 1165 #define NETMAP_BUF_SIZE(na) ((na)->na_lut_objsize) 1166 extern int netmap_mitigate; // XXX not really used 1167 extern int netmap_no_pendintr; 1168 extern int netmap_verbose; // XXX debugging 1169 enum { /* verbose flags */ 1170 NM_VERB_ON = 1, /* generic verbose */ 1171 NM_VERB_HOST = 0x2, /* verbose host stack */ 1172 NM_VERB_RXSYNC = 0x10, /* verbose on rxsync/txsync */ 1173 NM_VERB_TXSYNC = 0x20, 1174 NM_VERB_RXINTR = 0x100, /* verbose on rx/tx intr (driver) */ 1175 NM_VERB_TXINTR = 0x200, 1176 NM_VERB_NIC_RXSYNC = 0x1000, /* verbose on rx/tx intr (driver) */ 1177 NM_VERB_NIC_TXSYNC = 0x2000, 1178 }; 1179 1180 extern int netmap_txsync_retry; 1181 extern int netmap_generic_mit; 1182 extern int netmap_generic_ringsize; 1183 extern int netmap_generic_rings; 1184 1185 /* 1186 * NA returns a pointer to the struct netmap adapter from the ifp, 1187 * WNA is used to write it. 1188 */ 1189 #ifndef WNA 1190 #define WNA(_ifp) (_ifp)->if_pspare[0] 1191 #endif 1192 #define NA(_ifp) ((struct netmap_adapter *)WNA(_ifp)) 1193 1194 /* 1195 * Macros to determine if an interface is netmap capable or netmap enabled. 1196 * See the magic field in struct netmap_adapter. 1197 */ 1198 #ifdef __FreeBSD__ 1199 /* 1200 * on FreeBSD just use if_capabilities and if_capenable. 1201 */ 1202 #define NETMAP_CAPABLE(ifp) (NA(ifp) && \ 1203 (ifp)->if_capabilities & IFCAP_NETMAP ) 1204 1205 #define NETMAP_SET_CAPABLE(ifp) \ 1206 (ifp)->if_capabilities |= IFCAP_NETMAP 1207 1208 #else /* linux */ 1209 1210 /* 1211 * on linux: 1212 * we check if NA(ifp) is set and its first element has a related 1213 * magic value. The capenable is within the struct netmap_adapter. 1214 */ 1215 #define NETMAP_MAGIC 0x52697a7a 1216 1217 #define NETMAP_CAPABLE(ifp) (NA(ifp) && \ 1218 ((uint32_t)(uintptr_t)NA(ifp) ^ NA(ifp)->magic) == NETMAP_MAGIC ) 1219 1220 #define NETMAP_SET_CAPABLE(ifp) \ 1221 NA(ifp)->magic = ((uint32_t)(uintptr_t)NA(ifp)) ^ NETMAP_MAGIC 1222 1223 #endif /* linux */ 1224 1225 #ifdef __FreeBSD__ 1226 1227 /* Assigns the device IOMMU domain to an allocator. 1228 * Returns -ENOMEM in case the domain is different */ 1229 #define nm_iommu_group_id(dev) (0) 1230 1231 /* Callback invoked by the dma machinery after a successful dmamap_load */ 1232 static void netmap_dmamap_cb(__unused void *arg, 1233 __unused bus_dma_segment_t * segs, __unused int nseg, __unused int error) 1234 { 1235 } 1236 1237 /* bus_dmamap_load wrapper: call aforementioned function if map != NULL. 1238 * XXX can we do it without a callback ? 1239 */ 1240 static inline void 1241 netmap_load_map(struct netmap_adapter *na, 1242 bus_dma_tag_t tag, bus_dmamap_t map, void *buf) 1243 { 1244 if (map) 1245 bus_dmamap_load(tag, map, buf, NETMAP_BUF_SIZE(na), 1246 netmap_dmamap_cb, NULL, BUS_DMA_NOWAIT); 1247 } 1248 1249 static inline void 1250 netmap_unload_map(struct netmap_adapter *na, 1251 bus_dma_tag_t tag, bus_dmamap_t map) 1252 { 1253 if (map) 1254 bus_dmamap_unload(tag, map); 1255 } 1256 1257 /* update the map when a buffer changes. */ 1258 static inline void 1259 netmap_reload_map(struct netmap_adapter *na, 1260 bus_dma_tag_t tag, bus_dmamap_t map, void *buf) 1261 { 1262 if (map) { 1263 bus_dmamap_unload(tag, map); 1264 bus_dmamap_load(tag, map, buf, NETMAP_BUF_SIZE(na), 1265 netmap_dmamap_cb, NULL, BUS_DMA_NOWAIT); 1266 } 1267 } 1268 1269 #else /* linux */ 1270 1271 int nm_iommu_group_id(bus_dma_tag_t dev); 1272 extern size_t netmap_mem_get_bufsize(struct netmap_mem_d *); 1273 #include <linux/dma-mapping.h> 1274 1275 static inline void 1276 netmap_load_map(struct netmap_adapter *na, 1277 bus_dma_tag_t tag, bus_dmamap_t map, void *buf) 1278 { 1279 if (map) { 1280 *map = dma_map_single(na->pdev, buf, netmap_mem_get_bufsize(na->nm_mem), 1281 DMA_BIDIRECTIONAL); 1282 } 1283 } 1284 1285 static inline void 1286 netmap_unload_map(struct netmap_adapter *na, 1287 bus_dma_tag_t tag, bus_dmamap_t map) 1288 { 1289 u_int sz = netmap_mem_get_bufsize(na->nm_mem); 1290 1291 if (*map) { 1292 dma_unmap_single(na->pdev, *map, sz, 1293 DMA_BIDIRECTIONAL); 1294 } 1295 } 1296 1297 static inline void 1298 netmap_reload_map(struct netmap_adapter *na, 1299 bus_dma_tag_t tag, bus_dmamap_t map, void *buf) 1300 { 1301 u_int sz = netmap_mem_get_bufsize(na->nm_mem); 1302 1303 if (*map) { 1304 dma_unmap_single(na->pdev, *map, sz, 1305 DMA_BIDIRECTIONAL); 1306 } 1307 1308 *map = dma_map_single(na->pdev, buf, sz, 1309 DMA_BIDIRECTIONAL); 1310 } 1311 1312 /* 1313 * XXX How do we redefine these functions: 1314 * 1315 * on linux we need 1316 * dma_map_single(&pdev->dev, virt_addr, len, direction) 1317 * dma_unmap_single(&adapter->pdev->dev, phys_addr, len, direction 1318 * The len can be implicit (on netmap it is NETMAP_BUF_SIZE) 1319 * unfortunately the direction is not, so we need to change 1320 * something to have a cross API 1321 */ 1322 1323 #if 0 1324 struct e1000_buffer *buffer_info = &tx_ring->buffer_info[l]; 1325 /* set time_stamp *before* dma to help avoid a possible race */ 1326 buffer_info->time_stamp = jiffies; 1327 buffer_info->mapped_as_page = false; 1328 buffer_info->length = len; 1329 //buffer_info->next_to_watch = l; 1330 /* reload dma map */ 1331 dma_unmap_single(&adapter->pdev->dev, buffer_info->dma, 1332 NETMAP_BUF_SIZE, DMA_TO_DEVICE); 1333 buffer_info->dma = dma_map_single(&adapter->pdev->dev, 1334 addr, NETMAP_BUF_SIZE, DMA_TO_DEVICE); 1335 1336 if (dma_mapping_error(&adapter->pdev->dev, buffer_info->dma)) { 1337 D("dma mapping error"); 1338 /* goto dma_error; See e1000_put_txbuf() */ 1339 /* XXX reset */ 1340 } 1341 tx_desc->buffer_addr = htole64(buffer_info->dma); //XXX 1342 1343 #endif 1344 1345 /* 1346 * The bus_dmamap_sync() can be one of wmb() or rmb() depending on direction. 1347 */ 1348 #define bus_dmamap_sync(_a, _b, _c) 1349 1350 #endif /* linux */ 1351 1352 1353 /* 1354 * functions to map NIC to KRING indexes (n2k) and vice versa (k2n) 1355 */ 1356 static inline int 1357 netmap_idx_n2k(struct netmap_kring *kr, int idx) 1358 { 1359 int n = kr->nkr_num_slots; 1360 idx += kr->nkr_hwofs; 1361 if (idx < 0) 1362 return idx + n; 1363 else if (idx < n) 1364 return idx; 1365 else 1366 return idx - n; 1367 } 1368 1369 1370 static inline int 1371 netmap_idx_k2n(struct netmap_kring *kr, int idx) 1372 { 1373 int n = kr->nkr_num_slots; 1374 idx -= kr->nkr_hwofs; 1375 if (idx < 0) 1376 return idx + n; 1377 else if (idx < n) 1378 return idx; 1379 else 1380 return idx - n; 1381 } 1382 1383 1384 /* Entries of the look-up table. */ 1385 struct lut_entry { 1386 void *vaddr; /* virtual address. */ 1387 vm_paddr_t paddr; /* physical address. */ 1388 }; 1389 1390 struct netmap_obj_pool; 1391 1392 /* 1393 * NMB return the virtual address of a buffer (buffer 0 on bad index) 1394 * PNMB also fills the physical address 1395 */ 1396 static inline void * 1397 NMB(struct netmap_adapter *na, struct netmap_slot *slot) 1398 { 1399 struct lut_entry *lut = na->na_lut; 1400 uint32_t i = slot->buf_idx; 1401 return (unlikely(i >= na->na_lut_objtotal)) ? 1402 lut[0].vaddr : lut[i].vaddr; 1403 } 1404 1405 static inline void * 1406 PNMB(struct netmap_adapter *na, struct netmap_slot *slot, uint64_t *pp) 1407 { 1408 uint32_t i = slot->buf_idx; 1409 struct lut_entry *lut = na->na_lut; 1410 void *ret = (i >= na->na_lut_objtotal) ? lut[0].vaddr : lut[i].vaddr; 1411 1412 *pp = (i >= na->na_lut_objtotal) ? lut[0].paddr : lut[i].paddr; 1413 return ret; 1414 } 1415 1416 /* Generic version of NMB, which uses device-specific memory. */ 1417 1418 1419 1420 void netmap_txsync_to_host(struct netmap_adapter *na); 1421 1422 1423 /* 1424 * Structure associated to each thread which registered an interface. 1425 * 1426 * The first 4 fields of this structure are written by NIOCREGIF and 1427 * read by poll() and NIOC?XSYNC. 1428 * 1429 * There is low contention among writers (a correct user program 1430 * should have none) and among writers and readers, so we use a 1431 * single global lock to protect the structure initialization; 1432 * since initialization involves the allocation of memory, 1433 * we reuse the memory allocator lock. 1434 * 1435 * Read access to the structure is lock free. Readers must check that 1436 * np_nifp is not NULL before using the other fields. 1437 * If np_nifp is NULL initialization has not been performed, 1438 * so they should return an error to userspace. 1439 * 1440 * The ref_done field is used to regulate access to the refcount in the 1441 * memory allocator. The refcount must be incremented at most once for 1442 * each open("/dev/netmap"). The increment is performed by the first 1443 * function that calls netmap_get_memory() (currently called by 1444 * mmap(), NIOCGINFO and NIOCREGIF). 1445 * If the refcount is incremented, it is then decremented when the 1446 * private structure is destroyed. 1447 */ 1448 struct netmap_priv_d { 1449 struct netmap_if * volatile np_nifp; /* netmap if descriptor. */ 1450 1451 struct netmap_adapter *np_na; 1452 uint32_t np_flags; /* from the ioctl */ 1453 u_int np_txqfirst, np_txqlast; /* range of tx rings to scan */ 1454 u_int np_rxqfirst, np_rxqlast; /* range of rx rings to scan */ 1455 uint16_t np_txpoll; /* XXX and also np_rxpoll ? */ 1456 1457 struct netmap_mem_d *np_mref; /* use with NMG_LOCK held */ 1458 /* np_refcount is only used on FreeBSD */ 1459 int np_refcount; /* use with NMG_LOCK held */ 1460 1461 /* pointers to the selinfo to be used for selrecord. 1462 * Either the local or the global one depending on the 1463 * number of rings. 1464 */ 1465 NM_SELINFO_T *np_rxsi, *np_txsi; 1466 struct thread *np_td; /* kqueue, just debugging */ 1467 }; 1468 1469 #ifdef WITH_MONITOR 1470 1471 struct netmap_monitor_adapter { 1472 struct netmap_adapter up; 1473 1474 struct netmap_priv_d priv; 1475 uint32_t flags; 1476 }; 1477 1478 #endif /* WITH_MONITOR */ 1479 1480 1481 /* 1482 * generic netmap emulation for devices that do not have 1483 * native netmap support. 1484 */ 1485 int generic_netmap_attach(struct ifnet *ifp); 1486 1487 int netmap_catch_rx(struct netmap_adapter *na, int intercept); 1488 void generic_rx_handler(struct ifnet *ifp, struct mbuf *m);; 1489 void netmap_catch_tx(struct netmap_generic_adapter *na, int enable); 1490 int generic_xmit_frame(struct ifnet *ifp, struct mbuf *m, void *addr, u_int len, u_int ring_nr); 1491 int generic_find_num_desc(struct ifnet *ifp, u_int *tx, u_int *rx); 1492 void generic_find_num_queues(struct ifnet *ifp, u_int *txq, u_int *rxq); 1493 1494 //#define RATE_GENERIC /* Enables communication statistics for generic. */ 1495 #ifdef RATE_GENERIC 1496 void generic_rate(int txp, int txs, int txi, int rxp, int rxs, int rxi); 1497 #else 1498 #define generic_rate(txp, txs, txi, rxp, rxs, rxi) 1499 #endif 1500 1501 /* 1502 * netmap_mitigation API. This is used by the generic adapter 1503 * to reduce the number of interrupt requests/selwakeup 1504 * to clients on incoming packets. 1505 */ 1506 void netmap_mitigation_init(struct nm_generic_mit *mit, int idx, 1507 struct netmap_adapter *na); 1508 void netmap_mitigation_start(struct nm_generic_mit *mit); 1509 void netmap_mitigation_restart(struct nm_generic_mit *mit); 1510 int netmap_mitigation_active(struct nm_generic_mit *mit); 1511 void netmap_mitigation_cleanup(struct nm_generic_mit *mit); 1512 1513 1514 1515 /* Shared declarations for the VALE switch. */ 1516 1517 /* 1518 * Each transmit queue accumulates a batch of packets into 1519 * a structure before forwarding. Packets to the same 1520 * destination are put in a list using ft_next as a link field. 1521 * ft_frags and ft_next are valid only on the first fragment. 1522 */ 1523 struct nm_bdg_fwd { /* forwarding entry for a bridge */ 1524 void *ft_buf; /* netmap or indirect buffer */ 1525 uint8_t ft_frags; /* how many fragments (only on 1st frag) */ 1526 uint8_t _ft_port; /* dst port (unused) */ 1527 uint16_t ft_flags; /* flags, e.g. indirect */ 1528 uint16_t ft_len; /* src fragment len */ 1529 uint16_t ft_next; /* next packet to same destination */ 1530 }; 1531 1532 /* struct 'virtio_net_hdr' from linux. */ 1533 struct nm_vnet_hdr { 1534 #define VIRTIO_NET_HDR_F_NEEDS_CSUM 1 /* Use csum_start, csum_offset */ 1535 #define VIRTIO_NET_HDR_F_DATA_VALID 2 /* Csum is valid */ 1536 uint8_t flags; 1537 #define VIRTIO_NET_HDR_GSO_NONE 0 /* Not a GSO frame */ 1538 #define VIRTIO_NET_HDR_GSO_TCPV4 1 /* GSO frame, IPv4 TCP (TSO) */ 1539 #define VIRTIO_NET_HDR_GSO_UDP 3 /* GSO frame, IPv4 UDP (UFO) */ 1540 #define VIRTIO_NET_HDR_GSO_TCPV6 4 /* GSO frame, IPv6 TCP */ 1541 #define VIRTIO_NET_HDR_GSO_ECN 0x80 /* TCP has ECN set */ 1542 uint8_t gso_type; 1543 uint16_t hdr_len; 1544 uint16_t gso_size; 1545 uint16_t csum_start; 1546 uint16_t csum_offset; 1547 }; 1548 1549 #define WORST_CASE_GSO_HEADER (14+40+60) /* IPv6 + TCP */ 1550 1551 /* Private definitions for IPv4, IPv6, UDP and TCP headers. */ 1552 1553 struct nm_iphdr { 1554 uint8_t version_ihl; 1555 uint8_t tos; 1556 uint16_t tot_len; 1557 uint16_t id; 1558 uint16_t frag_off; 1559 uint8_t ttl; 1560 uint8_t protocol; 1561 uint16_t check; 1562 uint32_t saddr; 1563 uint32_t daddr; 1564 /*The options start here. */ 1565 }; 1566 1567 struct nm_tcphdr { 1568 uint16_t source; 1569 uint16_t dest; 1570 uint32_t seq; 1571 uint32_t ack_seq; 1572 uint8_t doff; /* Data offset + Reserved */ 1573 uint8_t flags; 1574 uint16_t window; 1575 uint16_t check; 1576 uint16_t urg_ptr; 1577 }; 1578 1579 struct nm_udphdr { 1580 uint16_t source; 1581 uint16_t dest; 1582 uint16_t len; 1583 uint16_t check; 1584 }; 1585 1586 struct nm_ipv6hdr { 1587 uint8_t priority_version; 1588 uint8_t flow_lbl[3]; 1589 1590 uint16_t payload_len; 1591 uint8_t nexthdr; 1592 uint8_t hop_limit; 1593 1594 uint8_t saddr[16]; 1595 uint8_t daddr[16]; 1596 }; 1597 1598 /* Type used to store a checksum (in host byte order) that hasn't been 1599 * folded yet. 1600 */ 1601 #define rawsum_t uint32_t 1602 1603 rawsum_t nm_csum_raw(uint8_t *data, size_t len, rawsum_t cur_sum); 1604 uint16_t nm_csum_ipv4(struct nm_iphdr *iph); 1605 void nm_csum_tcpudp_ipv4(struct nm_iphdr *iph, void *data, 1606 size_t datalen, uint16_t *check); 1607 void nm_csum_tcpudp_ipv6(struct nm_ipv6hdr *ip6h, void *data, 1608 size_t datalen, uint16_t *check); 1609 uint16_t nm_csum_fold(rawsum_t cur_sum); 1610 1611 void bdg_mismatch_datapath(struct netmap_vp_adapter *na, 1612 struct netmap_vp_adapter *dst_na, 1613 struct nm_bdg_fwd *ft_p, struct netmap_ring *ring, 1614 u_int *j, u_int lim, u_int *howmany); 1615 1616 /* persistent virtual port routines */ 1617 int nm_vi_persist(const char *, struct ifnet **); 1618 void nm_vi_detach(struct ifnet *); 1619 void nm_vi_init_index(void); 1620 1621 #endif /* _NET_NETMAP_KERN_H_ */ 1622