1 /* 2 * Copyright (C) 2011-2014 Universita` di Pisa. All rights reserved. 3 * 4 * Redistribution and use in source and binary forms, with or without 5 * modification, are permitted provided that the following conditions 6 * are met: 7 * 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * 2. Redistributions in binary form must reproduce the above copyright 11 * notice, this list of conditions and the following disclaimer in the 12 * documentation and/or other materials provided with the distribution. 13 * 14 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 15 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 17 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 22 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 23 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 24 * SUCH DAMAGE. 25 */ 26 27 /* 28 * $FreeBSD$ 29 * 30 * Functions and macros to manipulate netmap structures and packets 31 * in userspace. See netmap(4) for more information. 32 * 33 * The address of the struct netmap_if, say nifp, is computed from the 34 * value returned from ioctl(.., NIOCREG, ...) and the mmap region: 35 * ioctl(fd, NIOCREG, &req); 36 * mem = mmap(0, ... ); 37 * nifp = NETMAP_IF(mem, req.nr_nifp); 38 * (so simple, we could just do it manually) 39 * 40 * From there: 41 * struct netmap_ring *NETMAP_TXRING(nifp, index) 42 * struct netmap_ring *NETMAP_RXRING(nifp, index) 43 * we can access ring->nr_cur, ring->nr_avail, ring->nr_flags 44 * 45 * ring->slot[i] gives us the i-th slot (we can access 46 * directly len, flags, buf_idx) 47 * 48 * char *buf = NETMAP_BUF(ring, x) returns a pointer to 49 * the buffer numbered x 50 * 51 * All ring indexes (head, cur, tail) should always move forward. 52 * To compute the next index in a circular ring you can use 53 * i = nm_ring_next(ring, i); 54 * 55 * To ease porting apps from pcap to netmap we supply a few fuctions 56 * that can be called to open, close, read and write on netmap in a way 57 * similar to libpcap. Note that the read/write function depend on 58 * an ioctl()/select()/poll() being issued to refill rings or push 59 * packets out. 60 * 61 * In order to use these, include #define NETMAP_WITH_LIBS 62 * in the source file that invokes these functions. 63 */ 64 65 #ifndef _NET_NETMAP_USER_H_ 66 #define _NET_NETMAP_USER_H_ 67 68 #include <stdint.h> 69 #include <net/if.h> /* IFNAMSIZ */ 70 71 #ifndef likely 72 #define likely(x) __builtin_expect(!!(x), 1) 73 #define unlikely(x) __builtin_expect(!!(x), 0) 74 #endif /* likely and unlikely */ 75 76 #include <net/netmap.h> 77 78 /* helper macro */ 79 #define _NETMAP_OFFSET(type, ptr, offset) \ 80 ((type)(void *)((char *)(ptr) + (offset))) 81 82 #define NETMAP_IF(_base, _ofs) _NETMAP_OFFSET(struct netmap_if *, _base, _ofs) 83 84 #define NETMAP_TXRING(nifp, index) _NETMAP_OFFSET(struct netmap_ring *, \ 85 nifp, (nifp)->ring_ofs[index] ) 86 87 #define NETMAP_RXRING(nifp, index) _NETMAP_OFFSET(struct netmap_ring *, \ 88 nifp, (nifp)->ring_ofs[index + (nifp)->ni_tx_rings + 1] ) 89 90 #define NETMAP_BUF(ring, index) \ 91 ((char *)(ring) + (ring)->buf_ofs + ((index)*(ring)->nr_buf_size)) 92 93 #define NETMAP_BUF_IDX(ring, buf) \ 94 ( ((char *)(buf) - ((char *)(ring) + (ring)->buf_ofs) ) / \ 95 (ring)->nr_buf_size ) 96 97 98 static inline uint32_t 99 nm_ring_next(struct netmap_ring *r, uint32_t i) 100 { 101 return ( unlikely(i + 1 == r->num_slots) ? 0 : i + 1); 102 } 103 104 105 /* 106 * Return 1 if we have pending transmissions in the tx ring. 107 * When everything is complete ring->cur = ring->tail + 1 (modulo ring size) 108 */ 109 static inline int 110 nm_tx_pending(struct netmap_ring *r) 111 { 112 return nm_ring_next(r, r->tail) != r->cur; 113 } 114 115 116 static inline uint32_t 117 nm_ring_space(struct netmap_ring *ring) 118 { 119 int ret = ring->tail - ring->cur; 120 if (ret < 0) 121 ret += ring->num_slots; 122 return ret; 123 } 124 125 126 #ifdef NETMAP_WITH_LIBS 127 /* 128 * Support for simple I/O libraries. 129 * Include other system headers required for compiling this. 130 */ 131 132 #ifndef HAVE_NETMAP_WITH_LIBS 133 #define HAVE_NETMAP_WITH_LIBS 134 135 #include <sys/time.h> 136 #include <sys/mman.h> 137 #include <string.h> /* memset */ 138 #include <sys/ioctl.h> 139 #include <sys/errno.h> /* EINVAL */ 140 #include <fcntl.h> /* O_RDWR */ 141 #include <unistd.h> /* close() */ 142 #ifdef __FreeBSD__ 143 #include <stdlib.h> 144 #else 145 #include <malloc.h> /* on FreeBSD it is stdlib.h */ 146 #endif 147 148 struct nm_hdr_t { /* same as pcap_pkthdr */ 149 struct timeval ts; 150 uint32_t caplen; 151 uint32_t len; 152 }; 153 154 struct nm_desc_t { 155 struct nm_desc_t *self; 156 int fd; 157 void *mem; 158 int memsize; 159 struct netmap_if *nifp; 160 uint16_t first_ring, last_ring, cur_ring; 161 struct nmreq req; 162 struct nm_hdr_t hdr; 163 }; 164 165 /* 166 * when the descriptor is open correctly, d->self == d 167 */ 168 #define P2NMD(p) ((struct nm_desc_t *)(p)) 169 #define IS_NETMAP_DESC(d) (P2NMD(d)->self == P2NMD(d)) 170 #define NETMAP_FD(d) (P2NMD(d)->fd) 171 172 173 /* 174 * this is a slightly optimized copy routine which rounds 175 * to multiple of 64 bytes and is often faster than dealing 176 * with other odd sizes. We assume there is enough room 177 * in the source and destination buffers. 178 * 179 * XXX only for multiples of 64 bytes, non overlapped. 180 */ 181 static inline void 182 pkt_copy(const void *_src, void *_dst, int l) 183 { 184 const uint64_t *src = _src; 185 uint64_t *dst = _dst; 186 if (unlikely(l >= 1024)) { 187 memcpy(dst, src, l); 188 return; 189 } 190 for (; likely(l > 0); l-=64) { 191 *dst++ = *src++; 192 *dst++ = *src++; 193 *dst++ = *src++; 194 *dst++ = *src++; 195 *dst++ = *src++; 196 *dst++ = *src++; 197 *dst++ = *src++; 198 *dst++ = *src++; 199 } 200 } 201 202 203 /* 204 * The callback, invoked on each received packet. Same as libpcap 205 */ 206 typedef void (*nm_cb_t)(u_char *, const struct nm_hdr_t *, const u_char *d); 207 208 /* 209 *--- the pcap-like API --- 210 * 211 * nm_open() opens a file descriptor, binds to a port and maps memory. 212 * 213 * ifname (netmap:foo or vale:foo) is the port name 214 * flags can be NETMAP_SW_RING or NETMAP_HW_RING etc. 215 * ring_no only used if NETMAP_HW_RING is specified, is interpreted 216 * as a string or integer indicating the ring number 217 * ring_flags is stored in all ring flags (e.g. for transparent mode) 218 * to open. If successful, t opens the fd and maps the memory. 219 */ 220 221 static struct nm_desc_t *nm_open(const char *ifname, 222 const char *ring_no, int flags, int ring_flags); 223 224 /* 225 * nm_close() closes and restores the port to its previous state 226 */ 227 228 static int nm_close(struct nm_desc_t *); 229 230 /* 231 * nm_inject() is the same as pcap_inject() 232 * nm_dispatch() is the same as pcap_dispatch() 233 * nm_nextpkt() is the same as pcap_next() 234 */ 235 236 static int nm_inject(struct nm_desc_t *, const void *, size_t); 237 static int nm_dispatch(struct nm_desc_t *, int, nm_cb_t, u_char *); 238 static u_char *nm_nextpkt(struct nm_desc_t *, struct nm_hdr_t *); 239 240 241 /* 242 * Try to open, return descriptor if successful, NULL otherwise. 243 * An invalid netmap name will return errno = 0; 244 */ 245 static struct nm_desc_t * 246 nm_open(const char *ifname, const char *ring_name, int flags, int ring_flags) 247 { 248 struct nm_desc_t *d; 249 u_int n; 250 251 if (strncmp(ifname, "netmap:", 7) && strncmp(ifname, "vale", 4)) { 252 errno = 0; /* name not recognised */ 253 return NULL; 254 } 255 if (ifname[0] == 'n') 256 ifname += 7; 257 d = (struct nm_desc_t *)calloc(1, sizeof(*d)); 258 if (d == NULL) { 259 errno = ENOMEM; 260 return NULL; 261 } 262 d->self = d; /* set this early so nm_close() works */ 263 d->fd = open("/dev/netmap", O_RDWR); 264 if (d->fd < 0) 265 goto fail; 266 267 if (flags & NETMAP_SW_RING) { 268 d->req.nr_ringid = NETMAP_SW_RING; 269 } else { 270 u_int r; 271 if (flags & NETMAP_HW_RING) /* interpret ring as int */ 272 r = (uintptr_t)ring_name; 273 else /* interpret ring as numeric string */ 274 r = ring_name ? atoi(ring_name) : ~0; 275 r = (r < NETMAP_RING_MASK) ? (r | NETMAP_HW_RING) : 0; 276 d->req.nr_ringid = r; /* set the ring */ 277 } 278 d->req.nr_ringid |= (flags & ~NETMAP_RING_MASK); 279 d->req.nr_version = NETMAP_API; 280 strncpy(d->req.nr_name, ifname, sizeof(d->req.nr_name)); 281 if (ioctl(d->fd, NIOCREGIF, &d->req)) 282 goto fail; 283 284 d->memsize = d->req.nr_memsize; 285 d->mem = mmap(0, d->memsize, PROT_WRITE | PROT_READ, MAP_SHARED, 286 d->fd, 0); 287 if (d->mem == NULL) 288 goto fail; 289 d->nifp = NETMAP_IF(d->mem, d->req.nr_offset); 290 if (d->req.nr_ringid & NETMAP_SW_RING) { 291 d->first_ring = d->last_ring = d->req.nr_rx_rings; 292 } else if (d->req.nr_ringid & NETMAP_HW_RING) { 293 d->first_ring = d->last_ring = 294 d->req.nr_ringid & NETMAP_RING_MASK; 295 } else { 296 d->first_ring = 0; 297 d->last_ring = d->req.nr_rx_rings - 1; 298 } 299 d->cur_ring = d->first_ring; 300 for (n = d->first_ring; n <= d->last_ring; n++) { 301 struct netmap_ring *ring = NETMAP_RXRING(d->nifp, n); 302 ring->flags |= ring_flags; 303 } 304 return d; 305 306 fail: 307 nm_close(d); 308 errno = EINVAL; 309 return NULL; 310 } 311 312 313 static int 314 nm_close(struct nm_desc_t *d) 315 { 316 /* 317 * ugly trick to avoid unused warnings 318 */ 319 static void *__xxzt[] __attribute__ ((unused)) = 320 { nm_open, nm_inject, nm_dispatch, nm_nextpkt } ; 321 322 if (d == NULL || d->self != d) 323 return EINVAL; 324 if (d->mem) 325 munmap(d->mem, d->memsize); 326 if (d->fd != -1) 327 close(d->fd); 328 bzero(d, sizeof(*d)); 329 free(d); 330 return 0; 331 } 332 333 334 /* 335 * Same prototype as pcap_inject(), only need to cast. 336 */ 337 static int 338 nm_inject(struct nm_desc_t *d, const void *buf, size_t size) 339 { 340 u_int c, n = d->last_ring - d->first_ring + 1; 341 342 if (0) fprintf(stderr, "%s rings %d %d %d\n", __FUNCTION__, 343 d->first_ring, d->cur_ring, d->last_ring); 344 for (c = 0; c < n ; c++) { 345 /* compute current ring to use */ 346 struct netmap_ring *ring; 347 uint32_t i, idx; 348 uint32_t ri = d->cur_ring + c; 349 350 if (ri > d->last_ring) 351 ri = d->first_ring; 352 ring = NETMAP_TXRING(d->nifp, ri); 353 if (nm_ring_empty(ring)) { 354 if (0) fprintf(stderr, "%s ring %d cur %d tail %d\n", 355 __FUNCTION__, 356 ri, ring->cur, ring->tail); 357 continue; 358 } 359 i = ring->cur; 360 idx = ring->slot[i].buf_idx; 361 ring->slot[i].len = size; 362 pkt_copy(buf, NETMAP_BUF(ring, idx), size); 363 d->cur_ring = ri; 364 ring->head = ring->cur = nm_ring_next(ring, i); 365 return size; 366 } 367 return 0; /* fail */ 368 } 369 370 371 /* 372 * Same prototype as pcap_dispatch(), only need to cast. 373 */ 374 static int 375 nm_dispatch(struct nm_desc_t *d, int cnt, nm_cb_t cb, u_char *arg) 376 { 377 int n = d->last_ring - d->first_ring + 1; 378 int c, got = 0, ri = d->cur_ring; 379 380 if (cnt == 0) 381 cnt = -1; 382 /* cnt == -1 means infinite, but rings have a finite amount 383 * of buffers and the int is large enough that we never wrap, 384 * so we can omit checking for -1 385 */ 386 for (c=0; c < n && cnt != got; c++) { 387 /* compute current ring to use */ 388 struct netmap_ring *ring; 389 390 ri = d->cur_ring + c; 391 if (ri > d->last_ring) 392 ri = d->first_ring; 393 ring = NETMAP_RXRING(d->nifp, ri); 394 for ( ; !nm_ring_empty(ring) && cnt != got; got++) { 395 u_int i = ring->cur; 396 u_int idx = ring->slot[i].buf_idx; 397 u_char *buf = (u_char *)NETMAP_BUF(ring, idx); 398 // XXX should check valid buf 399 // prefetch(buf); 400 d->hdr.len = d->hdr.caplen = ring->slot[i].len; 401 d->hdr.ts = ring->ts; 402 cb(arg, &d->hdr, buf); 403 ring->head = ring->cur = nm_ring_next(ring, i); 404 } 405 } 406 d->cur_ring = ri; 407 return got; 408 } 409 410 static u_char * 411 nm_nextpkt(struct nm_desc_t *d, struct nm_hdr_t *hdr) 412 { 413 int ri = d->cur_ring; 414 415 do { 416 /* compute current ring to use */ 417 struct netmap_ring *ring = NETMAP_RXRING(d->nifp, ri); 418 if (!nm_ring_empty(ring)) { 419 u_int i = ring->cur; 420 u_int idx = ring->slot[i].buf_idx; 421 u_char *buf = (u_char *)NETMAP_BUF(ring, idx); 422 // XXX should check valid buf 423 // prefetch(buf); 424 hdr->ts = ring->ts; 425 hdr->len = hdr->caplen = ring->slot[i].len; 426 ring->cur = nm_ring_next(ring, i); 427 /* we could postpone advancing head if we want 428 * to hold the buffer. This can be supported in 429 * the future. 430 */ 431 ring->head = ring->cur; 432 d->cur_ring = ri; 433 return buf; 434 } 435 ri++; 436 if (ri > d->last_ring) 437 ri = d->first_ring; 438 } while (ri != d->cur_ring); 439 return NULL; /* nothing found */ 440 } 441 442 #endif /* !HAVE_NETMAP_WITH_LIBS */ 443 444 #endif /* NETMAP_WITH_LIBS */ 445 446 #endif /* _NET_NETMAP_USER_H_ */ 447