1 /* 2 * Copyright (C) 2011-2014 Universita` di Pisa. All rights reserved. 3 * 4 * Redistribution and use in source and binary forms, with or without 5 * modification, are permitted provided that the following conditions 6 * are met: 7 * 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * 2. Redistributions in binary form must reproduce the above copyright 11 * notice, this list of conditions and the following disclaimer in the 12 * documentation and/or other materials provided with the distribution. 13 * 14 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 15 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 17 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 22 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 23 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 24 * SUCH DAMAGE. 25 */ 26 27 /* 28 * $FreeBSD$ 29 * 30 * Functions and macros to manipulate netmap structures and packets 31 * in userspace. See netmap(4) for more information. 32 * 33 * The address of the struct netmap_if, say nifp, is computed from the 34 * value returned from ioctl(.., NIOCREG, ...) and the mmap region: 35 * ioctl(fd, NIOCREG, &req); 36 * mem = mmap(0, ... ); 37 * nifp = NETMAP_IF(mem, req.nr_nifp); 38 * (so simple, we could just do it manually) 39 * 40 * From there: 41 * struct netmap_ring *NETMAP_TXRING(nifp, index) 42 * struct netmap_ring *NETMAP_RXRING(nifp, index) 43 * we can access ring->nr_cur, ring->nr_avail, ring->nr_flags 44 * 45 * ring->slot[i] gives us the i-th slot (we can access 46 * directly len, flags, buf_idx) 47 * 48 * char *buf = NETMAP_BUF(ring, x) returns a pointer to 49 * the buffer numbered x 50 * 51 * All ring indexes (head, cur, tail) should always move forward. 52 * To compute the next index in a circular ring you can use 53 * i = nm_ring_next(ring, i); 54 * 55 * To ease porting apps from pcap to netmap we supply a few fuctions 56 * that can be called to open, close, read and write on netmap in a way 57 * similar to libpcap. Note that the read/write function depend on 58 * an ioctl()/select()/poll() being issued to refill rings or push 59 * packets out. 60 * 61 * In order to use these, include #define NETMAP_WITH_LIBS 62 * in the source file that invokes these functions. 63 */ 64 65 #ifndef _NET_NETMAP_USER_H_ 66 #define _NET_NETMAP_USER_H_ 67 68 #include <stdint.h> 69 #include <net/if.h> /* IFNAMSIZ */ 70 71 #ifndef likely 72 #define likely(x) __builtin_expect(!!(x), 1) 73 #define unlikely(x) __builtin_expect(!!(x), 0) 74 #endif /* likely and unlikely */ 75 76 #include <net/netmap.h> 77 78 /* helper macro */ 79 #define _NETMAP_OFFSET(type, ptr, offset) \ 80 ((type)(void *)((char *)(ptr) + (offset))) 81 82 #define NETMAP_IF(_base, _ofs) _NETMAP_OFFSET(struct netmap_if *, _base, _ofs) 83 84 #define NETMAP_TXRING(nifp, index) _NETMAP_OFFSET(struct netmap_ring *, \ 85 nifp, (nifp)->ring_ofs[index] ) 86 87 #define NETMAP_RXRING(nifp, index) _NETMAP_OFFSET(struct netmap_ring *, \ 88 nifp, (nifp)->ring_ofs[index + (nifp)->ni_tx_rings + 1] ) 89 90 #define NETMAP_BUF(ring, index) \ 91 ((char *)(ring) + (ring)->buf_ofs + ((index)*(ring)->nr_buf_size)) 92 93 #define NETMAP_BUF_IDX(ring, buf) \ 94 ( ((char *)(buf) - ((char *)(ring) + (ring)->buf_ofs) ) / \ 95 (ring)->nr_buf_size ) 96 97 98 static inline uint32_t 99 nm_ring_next(struct netmap_ring *r, uint32_t i) 100 { 101 return ( unlikely(i + 1 == r->num_slots) ? 0 : i + 1); 102 } 103 104 105 /* 106 * Return 1 if we have pending transmissions in the tx ring. 107 * When everything is complete ring->cur = ring->tail + 1 (modulo ring size) 108 */ 109 static inline int 110 nm_tx_pending(struct netmap_ring *r) 111 { 112 return nm_ring_next(r, r->tail) != r->cur; 113 } 114 115 116 static inline uint32_t 117 nm_ring_space(struct netmap_ring *ring) 118 { 119 int ret = ring->tail - ring->cur; 120 if (ret < 0) 121 ret += ring->num_slots; 122 return ret; 123 } 124 125 126 #ifdef NETMAP_WITH_LIBS 127 /* 128 * Support for simple I/O libraries. 129 * Include other system headers required for compiling this. 130 */ 131 132 #ifndef HAVE_NETMAP_WITH_LIBS 133 #define HAVE_NETMAP_WITH_LIBS 134 135 #include <sys/time.h> 136 #include <sys/mman.h> 137 #include <string.h> /* memset */ 138 #include <sys/ioctl.h> 139 #include <sys/errno.h> /* EINVAL */ 140 #include <fcntl.h> /* O_RDWR */ 141 #include <unistd.h> /* close() */ 142 #ifdef __FreeBSD__ 143 #include <stdlib.h> 144 #else 145 #include <malloc.h> /* on FreeBSD it is stdlib.h */ 146 #endif 147 148 struct nm_hdr_t { /* same as pcap_pkthdr */ 149 struct timeval ts; 150 uint32_t caplen; 151 uint32_t len; 152 }; 153 154 struct nm_desc_t { 155 struct nm_desc_t *self; 156 int fd; 157 void *mem; 158 int memsize; 159 struct netmap_if *nifp; 160 uint16_t first_ring, last_ring, cur_ring; 161 struct nmreq req; 162 struct nm_hdr_t hdr; 163 }; 164 165 /* 166 * when the descriptor is open correctly, d->self == d 167 * Eventually we should also use some magic number. 168 */ 169 #define P2NMD(p) ((struct nm_desc_t *)(p)) 170 #define IS_NETMAP_DESC(d) (P2NMD(d)->self == P2NMD(d)) 171 #define NETMAP_FD(d) (P2NMD(d)->fd) 172 173 174 /* 175 * this is a slightly optimized copy routine which rounds 176 * to multiple of 64 bytes and is often faster than dealing 177 * with other odd sizes. We assume there is enough room 178 * in the source and destination buffers. 179 * 180 * XXX only for multiples of 64 bytes, non overlapped. 181 */ 182 static inline void 183 pkt_copy(const void *_src, void *_dst, int l) 184 { 185 const uint64_t *src = (const uint64_t *)_src; 186 uint64_t *dst = (uint64_t *)_dst; 187 188 if (unlikely(l >= 1024)) { 189 memcpy(dst, src, l); 190 return; 191 } 192 for (; likely(l > 0); l-=64) { 193 *dst++ = *src++; 194 *dst++ = *src++; 195 *dst++ = *src++; 196 *dst++ = *src++; 197 *dst++ = *src++; 198 *dst++ = *src++; 199 *dst++ = *src++; 200 *dst++ = *src++; 201 } 202 } 203 204 205 /* 206 * The callback, invoked on each received packet. Same as libpcap 207 */ 208 typedef void (*nm_cb_t)(u_char *, const struct nm_hdr_t *, const u_char *d); 209 210 /* 211 *--- the pcap-like API --- 212 * 213 * nm_open() opens a file descriptor, binds to a port and maps memory. 214 * 215 * ifname (netmap:foo or vale:foo) is the port name 216 * flags can be NETMAP_SW_RING or NETMAP_HW_RING etc. 217 * ring_no only used if NETMAP_HW_RING is specified, is interpreted 218 * as a string or integer indicating the ring number 219 * ring_flags is stored in all ring flags (e.g. for transparent mode) 220 * to open. If successful, t opens the fd and maps the memory. 221 */ 222 223 static struct nm_desc_t *nm_open(const char *ifname, 224 const char *ring_no, int flags, int ring_flags); 225 226 /* 227 * nm_close() closes and restores the port to its previous state 228 */ 229 230 static int nm_close(struct nm_desc_t *); 231 232 /* 233 * nm_inject() is the same as pcap_inject() 234 * nm_dispatch() is the same as pcap_dispatch() 235 * nm_nextpkt() is the same as pcap_next() 236 */ 237 238 static int nm_inject(struct nm_desc_t *, const void *, size_t); 239 static int nm_dispatch(struct nm_desc_t *, int, nm_cb_t, u_char *); 240 static u_char *nm_nextpkt(struct nm_desc_t *, struct nm_hdr_t *); 241 242 243 /* 244 * Try to open, return descriptor if successful, NULL otherwise. 245 * An invalid netmap name will return errno = 0; 246 */ 247 static struct nm_desc_t * 248 nm_open(const char *ifname, const char *ring_name, int flags, int ring_flags) 249 { 250 struct nm_desc_t *d; 251 u_int n; 252 253 if (strncmp(ifname, "netmap:", 7) && strncmp(ifname, "vale", 4)) { 254 errno = 0; /* name not recognised */ 255 return NULL; 256 } 257 if (ifname[0] == 'n') 258 ifname += 7; 259 d = (struct nm_desc_t *)calloc(1, sizeof(*d)); 260 if (d == NULL) { 261 errno = ENOMEM; 262 return NULL; 263 } 264 d->self = d; /* set this early so nm_close() works */ 265 d->fd = open("/dev/netmap", O_RDWR); 266 if (d->fd < 0) 267 goto fail; 268 269 if (flags & NETMAP_SW_RING) { 270 d->req.nr_ringid = NETMAP_SW_RING; 271 } else { 272 u_int r; 273 if (flags & NETMAP_HW_RING) /* interpret ring as int */ 274 r = (uintptr_t)ring_name; 275 else /* interpret ring as numeric string */ 276 r = ring_name ? atoi(ring_name) : ~0; 277 r = (r < NETMAP_RING_MASK) ? (r | NETMAP_HW_RING) : 0; 278 d->req.nr_ringid = r; /* set the ring */ 279 } 280 d->req.nr_ringid |= (flags & ~NETMAP_RING_MASK); 281 d->req.nr_version = NETMAP_API; 282 strncpy(d->req.nr_name, ifname, sizeof(d->req.nr_name)); 283 if (ioctl(d->fd, NIOCREGIF, &d->req)) 284 goto fail; 285 286 d->memsize = d->req.nr_memsize; 287 d->mem = mmap(0, d->memsize, PROT_WRITE | PROT_READ, MAP_SHARED, 288 d->fd, 0); 289 if (d->mem == NULL) 290 goto fail; 291 d->nifp = NETMAP_IF(d->mem, d->req.nr_offset); 292 if (d->req.nr_ringid & NETMAP_SW_RING) { 293 d->first_ring = d->last_ring = d->req.nr_rx_rings; 294 } else if (d->req.nr_ringid & NETMAP_HW_RING) { 295 d->first_ring = d->last_ring = 296 d->req.nr_ringid & NETMAP_RING_MASK; 297 } else { 298 d->first_ring = 0; 299 d->last_ring = d->req.nr_rx_rings - 1; 300 } 301 d->cur_ring = d->first_ring; 302 for (n = d->first_ring; n <= d->last_ring; n++) { 303 struct netmap_ring *ring = NETMAP_RXRING(d->nifp, n); 304 ring->flags |= ring_flags; 305 } 306 return d; 307 308 fail: 309 nm_close(d); 310 errno = EINVAL; 311 return NULL; 312 } 313 314 315 static int 316 nm_close(struct nm_desc_t *d) 317 { 318 /* 319 * ugly trick to avoid unused warnings 320 */ 321 static void *__xxzt[] __attribute__ ((unused)) = 322 { (void *)nm_open, (void *)nm_inject, 323 (void *)nm_dispatch, (void *)nm_nextpkt } ; 324 325 if (d == NULL || d->self != d) 326 return EINVAL; 327 if (d->mem) 328 munmap(d->mem, d->memsize); 329 if (d->fd != -1) 330 close(d->fd); 331 bzero(d, sizeof(*d)); 332 free(d); 333 return 0; 334 } 335 336 337 /* 338 * Same prototype as pcap_inject(), only need to cast. 339 */ 340 static int 341 nm_inject(struct nm_desc_t *d, const void *buf, size_t size) 342 { 343 u_int c, n = d->last_ring - d->first_ring + 1; 344 345 if (0) fprintf(stderr, "%s rings %d %d %d\n", __FUNCTION__, 346 d->first_ring, d->cur_ring, d->last_ring); 347 for (c = 0; c < n ; c++) { 348 /* compute current ring to use */ 349 struct netmap_ring *ring; 350 uint32_t i, idx; 351 uint32_t ri = d->cur_ring + c; 352 353 if (ri > d->last_ring) 354 ri = d->first_ring; 355 ring = NETMAP_TXRING(d->nifp, ri); 356 if (nm_ring_empty(ring)) { 357 if (0) fprintf(stderr, "%s ring %d cur %d tail %d\n", 358 __FUNCTION__, 359 ri, ring->cur, ring->tail); 360 continue; 361 } 362 i = ring->cur; 363 idx = ring->slot[i].buf_idx; 364 ring->slot[i].len = size; 365 pkt_copy(buf, NETMAP_BUF(ring, idx), size); 366 d->cur_ring = ri; 367 ring->head = ring->cur = nm_ring_next(ring, i); 368 return size; 369 } 370 return 0; /* fail */ 371 } 372 373 374 /* 375 * Same prototype as pcap_dispatch(), only need to cast. 376 */ 377 static int 378 nm_dispatch(struct nm_desc_t *d, int cnt, nm_cb_t cb, u_char *arg) 379 { 380 int n = d->last_ring - d->first_ring + 1; 381 int c, got = 0, ri = d->cur_ring; 382 383 if (cnt == 0) 384 cnt = -1; 385 /* cnt == -1 means infinite, but rings have a finite amount 386 * of buffers and the int is large enough that we never wrap, 387 * so we can omit checking for -1 388 */ 389 for (c=0; c < n && cnt != got; c++) { 390 /* compute current ring to use */ 391 struct netmap_ring *ring; 392 393 ri = d->cur_ring + c; 394 if (ri > d->last_ring) 395 ri = d->first_ring; 396 ring = NETMAP_RXRING(d->nifp, ri); 397 for ( ; !nm_ring_empty(ring) && cnt != got; got++) { 398 u_int i = ring->cur; 399 u_int idx = ring->slot[i].buf_idx; 400 u_char *buf = (u_char *)NETMAP_BUF(ring, idx); 401 // XXX should check valid buf 402 // prefetch(buf); 403 d->hdr.len = d->hdr.caplen = ring->slot[i].len; 404 d->hdr.ts = ring->ts; 405 cb(arg, &d->hdr, buf); 406 ring->head = ring->cur = nm_ring_next(ring, i); 407 } 408 } 409 d->cur_ring = ri; 410 return got; 411 } 412 413 static u_char * 414 nm_nextpkt(struct nm_desc_t *d, struct nm_hdr_t *hdr) 415 { 416 int ri = d->cur_ring; 417 418 do { 419 /* compute current ring to use */ 420 struct netmap_ring *ring = NETMAP_RXRING(d->nifp, ri); 421 if (!nm_ring_empty(ring)) { 422 u_int i = ring->cur; 423 u_int idx = ring->slot[i].buf_idx; 424 u_char *buf = (u_char *)NETMAP_BUF(ring, idx); 425 // XXX should check valid buf 426 // prefetch(buf); 427 hdr->ts = ring->ts; 428 hdr->len = hdr->caplen = ring->slot[i].len; 429 ring->cur = nm_ring_next(ring, i); 430 /* we could postpone advancing head if we want 431 * to hold the buffer. This can be supported in 432 * the future. 433 */ 434 ring->head = ring->cur; 435 d->cur_ring = ri; 436 return buf; 437 } 438 ri++; 439 if (ri > d->last_ring) 440 ri = d->first_ring; 441 } while (ri != d->cur_ring); 442 return NULL; /* nothing found */ 443 } 444 445 #endif /* !HAVE_NETMAP_WITH_LIBS */ 446 447 #endif /* NETMAP_WITH_LIBS */ 448 449 #endif /* _NET_NETMAP_USER_H_ */ 450