1 /* 2 * Copyright (C) 2011-2014 Universita` di Pisa. All rights reserved. 3 * 4 * Redistribution and use in source and binary forms, with or without 5 * modification, are permitted provided that the following conditions 6 * are met: 7 * 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * 2. Redistributions in binary form must reproduce the above copyright 11 * notice, this list of conditions and the following disclaimer in the 12 * documentation and/or other materials provided with the distribution. 13 * 14 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 15 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 17 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 22 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 23 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 24 * SUCH DAMAGE. 25 */ 26 27 /* 28 * $FreeBSD$ 29 * 30 * Functions and macros to manipulate netmap structures and packets 31 * in userspace. See netmap(4) for more information. 32 * 33 * The address of the struct netmap_if, say nifp, is computed from the 34 * value returned from ioctl(.., NIOCREG, ...) and the mmap region: 35 * ioctl(fd, NIOCREG, &req); 36 * mem = mmap(0, ... ); 37 * nifp = NETMAP_IF(mem, req.nr_nifp); 38 * (so simple, we could just do it manually) 39 * 40 * From there: 41 * struct netmap_ring *NETMAP_TXRING(nifp, index) 42 * struct netmap_ring *NETMAP_RXRING(nifp, index) 43 * we can access ring->nr_cur, ring->nr_avail, ring->nr_flags 44 * 45 * ring->slot[i] gives us the i-th slot (we can access 46 * directly len, flags, buf_idx) 47 * 48 * char *buf = NETMAP_BUF(ring, x) returns a pointer to 49 * the buffer numbered x 50 * 51 * All ring indexes (head, cur, tail) should always move forward. 52 * To compute the next index in a circular ring you can use 53 * i = nm_ring_next(ring, i); 54 * 55 * To ease porting apps from pcap to netmap we supply a few fuctions 56 * that can be called to open, close, read and write on netmap in a way 57 * similar to libpcap. Note that the read/write function depend on 58 * an ioctl()/select()/poll() being issued to refill rings or push 59 * packets out. 60 * 61 * In order to use these, include #define NETMAP_WITH_LIBS 62 * in the source file that invokes these functions. 63 */ 64 65 #ifndef _NET_NETMAP_USER_H_ 66 #define _NET_NETMAP_USER_H_ 67 68 #include <stdint.h> 69 #include <net/if.h> /* IFNAMSIZ */ 70 71 #ifndef likely 72 #define likely(x) __builtin_expect(!!(x), 1) 73 #define unlikely(x) __builtin_expect(!!(x), 0) 74 #endif /* likely and unlikely */ 75 76 #include <net/netmap.h> 77 78 /* helper macro */ 79 #define _NETMAP_OFFSET(type, ptr, offset) \ 80 ((type)(void *)((char *)(ptr) + (offset))) 81 82 #define NETMAP_IF(_base, _ofs) _NETMAP_OFFSET(struct netmap_if *, _base, _ofs) 83 84 #define NETMAP_TXRING(nifp, index) _NETMAP_OFFSET(struct netmap_ring *, \ 85 nifp, (nifp)->ring_ofs[index] ) 86 87 #define NETMAP_RXRING(nifp, index) _NETMAP_OFFSET(struct netmap_ring *, \ 88 nifp, (nifp)->ring_ofs[index + (nifp)->ni_tx_rings + 1] ) 89 90 #define NETMAP_BUF(ring, index) \ 91 ((char *)(ring) + (ring)->buf_ofs + ((index)*(ring)->nr_buf_size)) 92 93 #define NETMAP_BUF_IDX(ring, buf) \ 94 ( ((char *)(buf) - ((char *)(ring) + (ring)->buf_ofs) ) / \ 95 (ring)->nr_buf_size ) 96 97 98 static inline uint32_t 99 nm_ring_next(struct netmap_ring *r, uint32_t i) 100 { 101 return ( unlikely(i + 1 == r->num_slots) ? 0 : i + 1); 102 } 103 104 105 /* 106 * Return 1 if we have pending transmissions in the tx ring. 107 * When everything is complete ring->cur = ring->tail + 1 (modulo ring size) 108 */ 109 static inline int 110 nm_tx_pending(struct netmap_ring *r) 111 { 112 return nm_ring_next(r, r->tail) != r->cur; 113 } 114 115 116 static inline uint32_t 117 nm_ring_space(struct netmap_ring *ring) 118 { 119 int ret = ring->tail - ring->cur; 120 if (ret < 0) 121 ret += ring->num_slots; 122 return ret; 123 } 124 125 126 #ifdef NETMAP_WITH_LIBS 127 /* 128 * Support for simple I/O libraries. 129 * Include other system headers required for compiling this. 130 */ 131 132 #ifndef HAVE_NETMAP_WITH_LIBS 133 #define HAVE_NETMAP_WITH_LIBS 134 135 #include <sys/time.h> 136 #include <sys/mman.h> 137 #include <string.h> /* memset */ 138 #include <sys/ioctl.h> 139 #include <sys/errno.h> /* EINVAL */ 140 #include <fcntl.h> /* O_RDWR */ 141 #include <unistd.h> /* close() */ 142 #include <signal.h> 143 #include <stdlib.h> 144 145 struct nm_hdr_t { /* same as pcap_pkthdr */ 146 struct timeval ts; 147 uint32_t caplen; 148 uint32_t len; 149 }; 150 151 struct nm_stat_t { // pcap_stat 152 u_int ps_recv; 153 u_int ps_drop; 154 u_int ps_ifdrop; 155 #ifdef WIN32 156 u_int bs_capt; 157 #endif /* WIN32 */ 158 }; 159 160 #define NM_ERRBUF_SIZE 512 161 162 struct nm_desc_t { 163 struct nm_desc_t *self; 164 int fd; 165 void *mem; 166 int memsize; 167 struct netmap_if *nifp; 168 uint16_t first_tx_ring, last_tx_ring, cur_tx_ring; 169 uint16_t first_rx_ring, last_rx_ring, cur_rx_ring; 170 struct nmreq req; /* also contains the nr_name = ifname */ 171 struct nm_hdr_t hdr; 172 173 struct netmap_ring *tx, *rx; /* shortcuts to base hw/sw rings */ 174 175 /* parameters from pcap_open_live */ 176 int snaplen; 177 int promisc; 178 int to_ms; 179 char *errbuf; 180 181 /* save flags so we can restore them on close */ 182 uint32_t if_flags; 183 uint32_t if_reqcap; 184 uint32_t if_curcap; 185 186 struct nm_stat_t st; 187 char msg[NM_ERRBUF_SIZE]; 188 }; 189 190 /* 191 * when the descriptor is open correctly, d->self == d 192 * Eventually we should also use some magic number. 193 */ 194 #define P2NMD(p) ((struct nm_desc_t *)(p)) 195 #define IS_NETMAP_DESC(d) (P2NMD(d)->self == P2NMD(d)) 196 #define NETMAP_FD(d) (P2NMD(d)->fd) 197 198 199 /* 200 * this is a slightly optimized copy routine which rounds 201 * to multiple of 64 bytes and is often faster than dealing 202 * with other odd sizes. We assume there is enough room 203 * in the source and destination buffers. 204 * 205 * XXX only for multiples of 64 bytes, non overlapped. 206 */ 207 static inline void 208 pkt_copy(const void *_src, void *_dst, int l) 209 { 210 const uint64_t *src = (const uint64_t *)_src; 211 uint64_t *dst = (uint64_t *)_dst; 212 213 if (unlikely(l >= 1024)) { 214 memcpy(dst, src, l); 215 return; 216 } 217 for (; likely(l > 0); l-=64) { 218 *dst++ = *src++; 219 *dst++ = *src++; 220 *dst++ = *src++; 221 *dst++ = *src++; 222 *dst++ = *src++; 223 *dst++ = *src++; 224 *dst++ = *src++; 225 *dst++ = *src++; 226 } 227 } 228 229 230 /* 231 * The callback, invoked on each received packet. Same as libpcap 232 */ 233 typedef void (*nm_cb_t)(u_char *, const struct nm_hdr_t *, const u_char *d); 234 235 /* 236 *--- the pcap-like API --- 237 * 238 * nm_open() opens a file descriptor, binds to a port and maps memory. 239 * 240 * ifname (netmap:foo or vale:foo) is the port name 241 * flags can be NETMAP_SW_RING or NETMAP_HW_RING etc. 242 * ring_no only used if NETMAP_HW_RING is specified, is interpreted 243 * as a string or integer indicating the ring number 244 * ring_flags is stored in all ring flags (e.g. for transparent mode) 245 * to open. If successful, t opens the fd and maps the memory. 246 */ 247 248 static struct nm_desc_t *nm_open(const char *ifname, 249 const char *ring_no, int flags, int ring_flags); 250 251 /* 252 * nm_close() closes and restores the port to its previous state 253 */ 254 255 static int nm_close(struct nm_desc_t *); 256 257 /* 258 * nm_inject() is the same as pcap_inject() 259 * nm_dispatch() is the same as pcap_dispatch() 260 * nm_nextpkt() is the same as pcap_next() 261 */ 262 263 static int nm_inject(struct nm_desc_t *, const void *, size_t); 264 static int nm_dispatch(struct nm_desc_t *, int, nm_cb_t, u_char *); 265 static u_char *nm_nextpkt(struct nm_desc_t *, struct nm_hdr_t *); 266 267 268 /* 269 * Try to open, return descriptor if successful, NULL otherwise. 270 * An invalid netmap name will return errno = 0; 271 */ 272 static struct nm_desc_t * 273 nm_open(const char *ifname, const char *ring_name, int flags, int ring_flags) 274 { 275 struct nm_desc_t *d; 276 u_int n, namelen; 277 char *port = NULL; 278 279 if (strncmp(ifname, "netmap:", 7) && strncmp(ifname, "vale", 4)) { 280 errno = 0; /* name not recognised */ 281 return NULL; 282 } 283 if (ifname[0] == 'n') 284 ifname += 7; 285 port = strchr(ifname, '-'); 286 if (!port) { 287 namelen = strlen(ifname); 288 } else { 289 namelen = port - ifname; 290 flags &= ~(NETMAP_SW_RING | NETMAP_HW_RING | NETMAP_RING_MASK); 291 if (port[1] == 's') 292 flags |= NETMAP_SW_RING; 293 else 294 ring_name = port; 295 } 296 if (namelen >= sizeof(d->req.nr_name)) 297 namelen = sizeof(d->req.nr_name) - 1; 298 299 d = (struct nm_desc_t *)calloc(1, sizeof(*d)); 300 if (d == NULL) { 301 errno = ENOMEM; 302 return NULL; 303 } 304 d->self = d; /* set this early so nm_close() works */ 305 d->fd = open("/dev/netmap", O_RDWR); 306 if (d->fd < 0) 307 goto fail; 308 309 if (flags & NETMAP_SW_RING) { 310 d->req.nr_ringid = NETMAP_SW_RING; 311 } else { 312 u_int r; 313 if (flags & NETMAP_HW_RING) /* interpret ring as int */ 314 r = (uintptr_t)ring_name; 315 else /* interpret ring as numeric string */ 316 r = ring_name ? atoi(ring_name) : ~0; 317 r = (r < NETMAP_RING_MASK) ? (r | NETMAP_HW_RING) : 0; 318 d->req.nr_ringid = r; /* set the ring */ 319 } 320 d->req.nr_ringid |= (flags & ~NETMAP_RING_MASK); 321 d->req.nr_version = NETMAP_API; 322 memcpy(d->req.nr_name, ifname, namelen); 323 d->req.nr_name[namelen] = '\0'; 324 if (ioctl(d->fd, NIOCREGIF, &d->req)) { 325 goto fail; 326 } 327 328 d->memsize = d->req.nr_memsize; 329 d->mem = mmap(0, d->memsize, PROT_WRITE | PROT_READ, MAP_SHARED, 330 d->fd, 0); 331 if (d->mem == NULL) 332 goto fail; 333 d->nifp = NETMAP_IF(d->mem, d->req.nr_offset); 334 if (d->req.nr_ringid & NETMAP_SW_RING) { 335 d->first_tx_ring = d->last_tx_ring = d->req.nr_tx_rings; 336 d->first_rx_ring = d->last_rx_ring = d->req.nr_rx_rings; 337 } else if (d->req.nr_ringid & NETMAP_HW_RING) { 338 /* XXX check validity */ 339 d->first_tx_ring = d->last_tx_ring = 340 d->first_rx_ring = d->last_rx_ring = 341 d->req.nr_ringid & NETMAP_RING_MASK; 342 } else { 343 d->first_tx_ring = d->last_rx_ring = 0; 344 d->last_tx_ring = d->req.nr_tx_rings - 1; 345 d->last_rx_ring = d->req.nr_rx_rings - 1; 346 } 347 d->tx = NETMAP_TXRING(d->nifp, 0); 348 d->rx = NETMAP_RXRING(d->nifp, 0); 349 d->cur_tx_ring = d->first_tx_ring; 350 d->cur_rx_ring = d->first_rx_ring; 351 for (n = d->first_tx_ring; n <= d->last_tx_ring; n++) { 352 d->tx[n].flags |= ring_flags; 353 } 354 for (n = d->first_rx_ring; n <= d->last_rx_ring; n++) { 355 d->rx[n].flags |= ring_flags; 356 } 357 return d; 358 359 fail: 360 nm_close(d); 361 errno = EINVAL; 362 return NULL; 363 } 364 365 366 static int 367 nm_close(struct nm_desc_t *d) 368 { 369 /* 370 * ugly trick to avoid unused warnings 371 */ 372 static void *__xxzt[] __attribute__ ((unused)) = 373 { (void *)nm_open, (void *)nm_inject, 374 (void *)nm_dispatch, (void *)nm_nextpkt } ; 375 376 if (d == NULL || d->self != d) 377 return EINVAL; 378 if (d->mem) 379 munmap(d->mem, d->memsize); 380 if (d->fd != -1) 381 close(d->fd); 382 bzero(d, sizeof(*d)); 383 free(d); 384 return 0; 385 } 386 387 388 /* 389 * Same prototype as pcap_inject(), only need to cast. 390 */ 391 static int 392 nm_inject(struct nm_desc_t *d, const void *buf, size_t size) 393 { 394 u_int c, n = d->last_tx_ring - d->first_tx_ring + 1; 395 396 for (c = 0; c < n ; c++) { 397 /* compute current ring to use */ 398 struct netmap_ring *ring; 399 uint32_t i, idx; 400 uint32_t ri = d->cur_tx_ring + c; 401 402 if (ri > d->last_tx_ring) 403 ri = d->first_tx_ring; 404 ring = NETMAP_TXRING(d->nifp, ri); 405 if (nm_ring_empty(ring)) { 406 continue; 407 } 408 i = ring->cur; 409 idx = ring->slot[i].buf_idx; 410 ring->slot[i].len = size; 411 pkt_copy(buf, NETMAP_BUF(ring, idx), size); 412 d->cur_tx_ring = ri; 413 ring->head = ring->cur = nm_ring_next(ring, i); 414 return size; 415 } 416 return 0; /* fail */ 417 } 418 419 420 /* 421 * Same prototype as pcap_dispatch(), only need to cast. 422 */ 423 static int 424 nm_dispatch(struct nm_desc_t *d, int cnt, nm_cb_t cb, u_char *arg) 425 { 426 int n = d->last_rx_ring - d->first_rx_ring + 1; 427 int c, got = 0, ri = d->cur_rx_ring; 428 429 if (cnt == 0) 430 cnt = -1; 431 /* cnt == -1 means infinite, but rings have a finite amount 432 * of buffers and the int is large enough that we never wrap, 433 * so we can omit checking for -1 434 */ 435 for (c=0; c < n && cnt != got; c++) { 436 /* compute current ring to use */ 437 struct netmap_ring *ring; 438 439 ri = d->cur_rx_ring + c; 440 if (ri > d->last_rx_ring) 441 ri = d->first_rx_ring; 442 ring = NETMAP_RXRING(d->nifp, ri); 443 for ( ; !nm_ring_empty(ring) && cnt != got; got++) { 444 u_int i = ring->cur; 445 u_int idx = ring->slot[i].buf_idx; 446 u_char *buf = (u_char *)NETMAP_BUF(ring, idx); 447 448 // __builtin_prefetch(buf); 449 d->hdr.len = d->hdr.caplen = ring->slot[i].len; 450 d->hdr.ts = ring->ts; 451 cb(arg, &d->hdr, buf); 452 ring->head = ring->cur = nm_ring_next(ring, i); 453 } 454 } 455 d->cur_rx_ring = ri; 456 return got; 457 } 458 459 static u_char * 460 nm_nextpkt(struct nm_desc_t *d, struct nm_hdr_t *hdr) 461 { 462 int ri = d->cur_rx_ring; 463 464 do { 465 /* compute current ring to use */ 466 struct netmap_ring *ring = NETMAP_RXRING(d->nifp, ri); 467 if (!nm_ring_empty(ring)) { 468 u_int i = ring->cur; 469 u_int idx = ring->slot[i].buf_idx; 470 u_char *buf = (u_char *)NETMAP_BUF(ring, idx); 471 472 // __builtin_prefetch(buf); 473 hdr->ts = ring->ts; 474 hdr->len = hdr->caplen = ring->slot[i].len; 475 ring->cur = nm_ring_next(ring, i); 476 /* we could postpone advancing head if we want 477 * to hold the buffer. This can be supported in 478 * the future. 479 */ 480 ring->head = ring->cur; 481 d->cur_rx_ring = ri; 482 return buf; 483 } 484 ri++; 485 if (ri > d->last_rx_ring) 486 ri = d->first_rx_ring; 487 } while (ri != d->cur_rx_ring); 488 return NULL; /* nothing found */ 489 } 490 491 #endif /* !HAVE_NETMAP_WITH_LIBS */ 492 493 #endif /* NETMAP_WITH_LIBS */ 494 495 #endif /* _NET_NETMAP_USER_H_ */ 496