1 /* 2 * Copyright (C) 2011-2016 Universita` di Pisa 3 * All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 9 * 1. Redistributions of source code must retain the above copyright 10 * notice, this list of conditions and the following disclaimer. 11 * 2. Redistributions in binary form must reproduce the above copyright 12 * notice, this list of conditions and the following disclaimer in the 13 * documentation and/or other materials provided with the distribution. 14 * 15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 16 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 17 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 18 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 19 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 20 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 21 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 22 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 23 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 24 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 25 * SUCH DAMAGE. 26 */ 27 28 /* 29 * $FreeBSD$ 30 * 31 * Functions and macros to manipulate netmap structures and packets 32 * in userspace. See netmap(4) for more information. 33 * 34 * The address of the struct netmap_if, say nifp, is computed from the 35 * value returned from ioctl(.., NIOCREG, ...) and the mmap region: 36 * ioctl(fd, NIOCREG, &req); 37 * mem = mmap(0, ... ); 38 * nifp = NETMAP_IF(mem, req.nr_nifp); 39 * (so simple, we could just do it manually) 40 * 41 * From there: 42 * struct netmap_ring *NETMAP_TXRING(nifp, index) 43 * struct netmap_ring *NETMAP_RXRING(nifp, index) 44 * we can access ring->cur, ring->head, ring->tail, etc. 45 * 46 * ring->slot[i] gives us the i-th slot (we can access 47 * directly len, flags, buf_idx) 48 * 49 * char *buf = NETMAP_BUF(ring, x) returns a pointer to 50 * the buffer numbered x 51 * 52 * All ring indexes (head, cur, tail) should always move forward. 53 * To compute the next index in a circular ring you can use 54 * i = nm_ring_next(ring, i); 55 * 56 * To ease porting apps from pcap to netmap we supply a few fuctions 57 * that can be called to open, close, read and write on netmap in a way 58 * similar to libpcap. Note that the read/write function depend on 59 * an ioctl()/select()/poll() being issued to refill rings or push 60 * packets out. 61 * 62 * In order to use these, include #define NETMAP_WITH_LIBS 63 * in the source file that invokes these functions. 64 */ 65 66 #ifndef _NET_NETMAP_USER_H_ 67 #define _NET_NETMAP_USER_H_ 68 69 #define NETMAP_DEVICE_NAME "/dev/netmap" 70 71 #ifdef __CYGWIN__ 72 /* 73 * we can compile userspace apps with either cygwin or msvc, 74 * and we use _WIN32 to identify windows specific code 75 */ 76 #ifndef _WIN32 77 #define _WIN32 78 #endif /* _WIN32 */ 79 80 #endif /* __CYGWIN__ */ 81 82 #ifdef _WIN32 83 #undef NETMAP_DEVICE_NAME 84 #define NETMAP_DEVICE_NAME "/proc/sys/DosDevices/Global/netmap" 85 #include <windows.h> 86 #include <WinDef.h> 87 #include <sys/cygwin.h> 88 #endif /* _WIN32 */ 89 90 #include <stdint.h> 91 #include <sys/socket.h> /* apple needs sockaddr */ 92 #include <net/if.h> /* IFNAMSIZ */ 93 #include <ctype.h> 94 95 #ifndef likely 96 #define likely(x) __builtin_expect(!!(x), 1) 97 #define unlikely(x) __builtin_expect(!!(x), 0) 98 #endif /* likely and unlikely */ 99 100 #include <net/netmap.h> 101 102 /* helper macro */ 103 #define _NETMAP_OFFSET(type, ptr, offset) \ 104 ((type)(void *)((char *)(ptr) + (offset))) 105 106 #define NETMAP_IF(_base, _ofs) _NETMAP_OFFSET(struct netmap_if *, _base, _ofs) 107 108 #define NETMAP_TXRING(nifp, index) _NETMAP_OFFSET(struct netmap_ring *, \ 109 nifp, (nifp)->ring_ofs[index] ) 110 111 #define NETMAP_RXRING(nifp, index) _NETMAP_OFFSET(struct netmap_ring *, \ 112 nifp, (nifp)->ring_ofs[index + (nifp)->ni_tx_rings + 1] ) 113 114 #define NETMAP_BUF(ring, index) \ 115 ((char *)(ring) + (ring)->buf_ofs + ((index)*(ring)->nr_buf_size)) 116 117 #define NETMAP_BUF_IDX(ring, buf) \ 118 ( ((char *)(buf) - ((char *)(ring) + (ring)->buf_ofs) ) / \ 119 (ring)->nr_buf_size ) 120 121 122 static inline uint32_t 123 nm_ring_next(struct netmap_ring *r, uint32_t i) 124 { 125 return ( unlikely(i + 1 == r->num_slots) ? 0 : i + 1); 126 } 127 128 129 /* 130 * Return 1 if we have pending transmissions in the tx ring. 131 * When everything is complete ring->head = ring->tail + 1 (modulo ring size) 132 */ 133 static inline int 134 nm_tx_pending(struct netmap_ring *r) 135 { 136 return nm_ring_next(r, r->tail) != r->head; 137 } 138 139 140 static inline uint32_t 141 nm_ring_space(struct netmap_ring *ring) 142 { 143 int ret = ring->tail - ring->cur; 144 if (ret < 0) 145 ret += ring->num_slots; 146 return ret; 147 } 148 149 150 #ifdef NETMAP_WITH_LIBS 151 /* 152 * Support for simple I/O libraries. 153 * Include other system headers required for compiling this. 154 */ 155 156 #ifndef HAVE_NETMAP_WITH_LIBS 157 #define HAVE_NETMAP_WITH_LIBS 158 159 #include <stdio.h> 160 #include <sys/time.h> 161 #include <sys/mman.h> 162 #include <string.h> /* memset */ 163 #include <sys/ioctl.h> 164 #include <sys/errno.h> /* EINVAL */ 165 #include <fcntl.h> /* O_RDWR */ 166 #include <unistd.h> /* close() */ 167 #include <signal.h> 168 #include <stdlib.h> 169 170 #ifndef ND /* debug macros */ 171 /* debug support */ 172 #define ND(_fmt, ...) do {} while(0) 173 #define D(_fmt, ...) \ 174 do { \ 175 struct timeval _t0; \ 176 gettimeofday(&_t0, NULL); \ 177 fprintf(stderr, "%03d.%06d %s [%d] " _fmt "\n", \ 178 (int)(_t0.tv_sec % 1000), (int)_t0.tv_usec, \ 179 __FUNCTION__, __LINE__, ##__VA_ARGS__); \ 180 } while (0) 181 182 /* Rate limited version of "D", lps indicates how many per second */ 183 #define RD(lps, format, ...) \ 184 do { \ 185 static int __t0, __cnt; \ 186 struct timeval __xxts; \ 187 gettimeofday(&__xxts, NULL); \ 188 if (__t0 != __xxts.tv_sec) { \ 189 __t0 = __xxts.tv_sec; \ 190 __cnt = 0; \ 191 } \ 192 if (__cnt++ < lps) { \ 193 D(format, ##__VA_ARGS__); \ 194 } \ 195 } while (0) 196 #endif 197 198 struct nm_pkthdr { /* first part is the same as pcap_pkthdr */ 199 struct timeval ts; 200 uint32_t caplen; 201 uint32_t len; 202 203 uint64_t flags; /* NM_MORE_PKTS etc */ 204 #define NM_MORE_PKTS 1 205 struct nm_desc *d; 206 struct netmap_slot *slot; 207 uint8_t *buf; 208 }; 209 210 struct nm_stat { /* same as pcap_stat */ 211 u_int ps_recv; 212 u_int ps_drop; 213 u_int ps_ifdrop; 214 #ifdef WIN32 /* XXX or _WIN32 ? */ 215 u_int bs_capt; 216 #endif /* WIN32 */ 217 }; 218 219 #define NM_ERRBUF_SIZE 512 220 221 struct nm_desc { 222 struct nm_desc *self; /* point to self if netmap. */ 223 int fd; 224 void *mem; 225 uint32_t memsize; 226 int done_mmap; /* set if mem is the result of mmap */ 227 struct netmap_if * const nifp; 228 uint16_t first_tx_ring, last_tx_ring, cur_tx_ring; 229 uint16_t first_rx_ring, last_rx_ring, cur_rx_ring; 230 struct nmreq req; /* also contains the nr_name = ifname */ 231 struct nm_pkthdr hdr; 232 233 /* 234 * The memory contains netmap_if, rings and then buffers. 235 * Given a pointer (e.g. to nm_inject) we can compare with 236 * mem/buf_start/buf_end to tell if it is a buffer or 237 * some other descriptor in our region. 238 * We also store a pointer to some ring as it helps in the 239 * translation from buffer indexes to addresses. 240 */ 241 struct netmap_ring * const some_ring; 242 void * const buf_start; 243 void * const buf_end; 244 /* parameters from pcap_open_live */ 245 int snaplen; 246 int promisc; 247 int to_ms; 248 char *errbuf; 249 250 /* save flags so we can restore them on close */ 251 uint32_t if_flags; 252 uint32_t if_reqcap; 253 uint32_t if_curcap; 254 255 struct nm_stat st; 256 char msg[NM_ERRBUF_SIZE]; 257 }; 258 259 /* 260 * when the descriptor is open correctly, d->self == d 261 * Eventually we should also use some magic number. 262 */ 263 #define P2NMD(p) ((struct nm_desc *)(p)) 264 #define IS_NETMAP_DESC(d) ((d) && P2NMD(d)->self == P2NMD(d)) 265 #define NETMAP_FD(d) (P2NMD(d)->fd) 266 267 268 /* 269 * this is a slightly optimized copy routine which rounds 270 * to multiple of 64 bytes and is often faster than dealing 271 * with other odd sizes. We assume there is enough room 272 * in the source and destination buffers. 273 * 274 * XXX only for multiples of 64 bytes, non overlapped. 275 */ 276 static inline void 277 nm_pkt_copy(const void *_src, void *_dst, int l) 278 { 279 const uint64_t *src = (const uint64_t *)_src; 280 uint64_t *dst = (uint64_t *)_dst; 281 282 if (unlikely(l >= 1024)) { 283 memcpy(dst, src, l); 284 return; 285 } 286 for (; likely(l > 0); l-=64) { 287 *dst++ = *src++; 288 *dst++ = *src++; 289 *dst++ = *src++; 290 *dst++ = *src++; 291 *dst++ = *src++; 292 *dst++ = *src++; 293 *dst++ = *src++; 294 *dst++ = *src++; 295 } 296 } 297 298 299 /* 300 * The callback, invoked on each received packet. Same as libpcap 301 */ 302 typedef void (*nm_cb_t)(u_char *, const struct nm_pkthdr *, const u_char *d); 303 304 /* 305 *--- the pcap-like API --- 306 * 307 * nm_open() opens a file descriptor, binds to a port and maps memory. 308 * 309 * ifname (netmap:foo or vale:foo) is the port name 310 * a suffix can indicate the follwing: 311 * ^ bind the host (sw) ring pair 312 * * bind host and NIC ring pairs 313 * -NN bind individual NIC ring pair 314 * {NN bind master side of pipe NN 315 * }NN bind slave side of pipe NN 316 * a suffix starting with / and the following flags, 317 * in any order: 318 * x exclusive access 319 * z zero copy monitor (both tx and rx) 320 * t monitor tx side (copy monitor) 321 * r monitor rx side (copy monitor) 322 * R bind only RX ring(s) 323 * T bind only TX ring(s) 324 * 325 * req provides the initial values of nmreq before parsing ifname. 326 * Remember that the ifname parsing will override the ring 327 * number in nm_ringid, and part of nm_flags; 328 * flags special functions, normally 0 329 * indicates which fields of *arg are significant 330 * arg special functions, normally NULL 331 * if passed a netmap_desc with mem != NULL, 332 * use that memory instead of mmap. 333 */ 334 335 static struct nm_desc *nm_open(const char *ifname, const struct nmreq *req, 336 uint64_t flags, const struct nm_desc *arg); 337 338 /* 339 * nm_open can import some fields from the parent descriptor. 340 * These flags control which ones. 341 * Also in flags you can specify NETMAP_NO_TX_POLL and NETMAP_DO_RX_POLL, 342 * which set the initial value for these flags. 343 * Note that the 16 low bits of the flags are reserved for data 344 * that may go into the nmreq. 345 */ 346 enum { 347 NM_OPEN_NO_MMAP = 0x040000, /* reuse mmap from parent */ 348 NM_OPEN_IFNAME = 0x080000, /* nr_name, nr_ringid, nr_flags */ 349 NM_OPEN_ARG1 = 0x100000, 350 NM_OPEN_ARG2 = 0x200000, 351 NM_OPEN_ARG3 = 0x400000, 352 NM_OPEN_RING_CFG = 0x800000, /* tx|rx rings|slots */ 353 }; 354 355 356 /* 357 * nm_close() closes and restores the port to its previous state 358 */ 359 360 static int nm_close(struct nm_desc *); 361 362 /* 363 * nm_mmap() do mmap or inherit from parent if the nr_arg2 364 * (memory block) matches. 365 */ 366 367 static int nm_mmap(struct nm_desc *, const struct nm_desc *); 368 369 /* 370 * nm_inject() is the same as pcap_inject() 371 * nm_dispatch() is the same as pcap_dispatch() 372 * nm_nextpkt() is the same as pcap_next() 373 */ 374 375 static int nm_inject(struct nm_desc *, const void *, size_t); 376 static int nm_dispatch(struct nm_desc *, int, nm_cb_t, u_char *); 377 static u_char *nm_nextpkt(struct nm_desc *, struct nm_pkthdr *); 378 379 #ifdef _WIN32 380 381 intptr_t _get_osfhandle(int); /* defined in io.h in windows */ 382 383 /* 384 * In windows we do not have yet native poll support, so we keep track 385 * of file descriptors associated to netmap ports to emulate poll on 386 * them and fall back on regular poll on other file descriptors. 387 */ 388 struct win_netmap_fd_list { 389 struct win_netmap_fd_list *next; 390 int win_netmap_fd; 391 HANDLE win_netmap_handle; 392 }; 393 394 /* 395 * list head containing all the netmap opened fd and their 396 * windows HANDLE counterparts 397 */ 398 static struct win_netmap_fd_list *win_netmap_fd_list_head; 399 400 static void 401 win_insert_fd_record(int fd) 402 { 403 struct win_netmap_fd_list *curr; 404 405 for (curr = win_netmap_fd_list_head; curr; curr = curr->next) { 406 if (fd == curr->win_netmap_fd) { 407 return; 408 } 409 } 410 curr = calloc(1, sizeof(*curr)); 411 curr->next = win_netmap_fd_list_head; 412 curr->win_netmap_fd = fd; 413 curr->win_netmap_handle = IntToPtr(_get_osfhandle(fd)); 414 win_netmap_fd_list_head = curr; 415 } 416 417 void 418 win_remove_fd_record(int fd) 419 { 420 struct win_netmap_fd_list *curr = win_netmap_fd_list_head; 421 struct win_netmap_fd_list *prev = NULL; 422 for (; curr ; prev = curr, curr = curr->next) { 423 if (fd != curr->win_netmap_fd) 424 continue; 425 /* found the entry */ 426 if (prev == NULL) { /* we are freeing the first entry */ 427 win_netmap_fd_list_head = curr->next; 428 } else { 429 prev->next = curr->next; 430 } 431 free(curr); 432 break; 433 } 434 } 435 436 437 HANDLE 438 win_get_netmap_handle(int fd) 439 { 440 struct win_netmap_fd_list *curr; 441 442 for (curr = win_netmap_fd_list_head; curr; curr = curr->next) { 443 if (fd == curr->win_netmap_fd) { 444 return curr->win_netmap_handle; 445 } 446 } 447 return NULL; 448 } 449 450 /* 451 * we need to wrap ioctl and mmap, at least for the netmap file descriptors 452 */ 453 454 /* 455 * use this function only from netmap_user.h internal functions 456 * same as ioctl, returns 0 on success and -1 on error 457 */ 458 static int 459 win_nm_ioctl_internal(HANDLE h, int32_t ctlCode, void *arg) 460 { 461 DWORD bReturn = 0, szIn, szOut; 462 BOOL ioctlReturnStatus; 463 void *inParam = arg, *outParam = arg; 464 465 switch (ctlCode) { 466 case NETMAP_POLL: 467 szIn = sizeof(POLL_REQUEST_DATA); 468 szOut = sizeof(POLL_REQUEST_DATA); 469 break; 470 case NETMAP_MMAP: 471 szIn = 0; 472 szOut = sizeof(void*); 473 inParam = NULL; /* nothing on input */ 474 break; 475 case NIOCTXSYNC: 476 case NIOCRXSYNC: 477 szIn = 0; 478 szOut = 0; 479 break; 480 case NIOCREGIF: 481 szIn = sizeof(struct nmreq); 482 szOut = sizeof(struct nmreq); 483 break; 484 case NIOCCONFIG: 485 D("unsupported NIOCCONFIG!"); 486 return -1; 487 488 default: /* a regular ioctl */ 489 D("invalid ioctl %x on netmap fd", ctlCode); 490 return -1; 491 } 492 493 ioctlReturnStatus = DeviceIoControl(h, 494 ctlCode, inParam, szIn, 495 outParam, szOut, 496 &bReturn, NULL); 497 // XXX note windows returns 0 on error or async call, 1 on success 498 // we could call GetLastError() to figure out what happened 499 return ioctlReturnStatus ? 0 : -1; 500 } 501 502 /* 503 * this function is what must be called from user-space programs 504 * same as ioctl, returns 0 on success and -1 on error 505 */ 506 static int 507 win_nm_ioctl(int fd, int32_t ctlCode, void *arg) 508 { 509 HANDLE h = win_get_netmap_handle(fd); 510 511 if (h == NULL) { 512 return ioctl(fd, ctlCode, arg); 513 } else { 514 return win_nm_ioctl_internal(h, ctlCode, arg); 515 } 516 } 517 518 #define ioctl win_nm_ioctl /* from now on, within this file ... */ 519 520 /* 521 * We cannot use the native mmap on windows 522 * The only parameter used is "fd", the other ones are just declared to 523 * make this signature comparable to the FreeBSD/Linux one 524 */ 525 static void * 526 win32_mmap_emulated(void *addr, size_t length, int prot, int flags, int fd, int32_t offset) 527 { 528 HANDLE h = win_get_netmap_handle(fd); 529 530 if (h == NULL) { 531 return mmap(addr, length, prot, flags, fd, offset); 532 } else { 533 MEMORY_ENTRY ret; 534 535 return win_nm_ioctl_internal(h, NETMAP_MMAP, &ret) ? 536 NULL : ret.pUsermodeVirtualAddress; 537 } 538 } 539 540 #define mmap win32_mmap_emulated 541 542 #include <sys/poll.h> /* XXX needed to use the structure pollfd */ 543 544 static int 545 win_nm_poll(struct pollfd *fds, int nfds, int timeout) 546 { 547 HANDLE h; 548 549 if (nfds != 1 || fds == NULL || (h = win_get_netmap_handle(fds->fd)) == NULL) {; 550 return poll(fds, nfds, timeout); 551 } else { 552 POLL_REQUEST_DATA prd; 553 554 prd.timeout = timeout; 555 prd.events = fds->events; 556 557 win_nm_ioctl_internal(h, NETMAP_POLL, &prd); 558 if ((prd.revents == POLLERR) || (prd.revents == STATUS_TIMEOUT)) { 559 return -1; 560 } 561 return 1; 562 } 563 } 564 565 #define poll win_nm_poll 566 567 static int 568 win_nm_open(char* pathname, int flags) 569 { 570 571 if (strcmp(pathname, NETMAP_DEVICE_NAME) == 0) { 572 int fd = open(NETMAP_DEVICE_NAME, O_RDWR); 573 if (fd < 0) { 574 return -1; 575 } 576 577 win_insert_fd_record(fd); 578 return fd; 579 } else { 580 return open(pathname, flags); 581 } 582 } 583 584 #define open win_nm_open 585 586 static int 587 win_nm_close(int fd) 588 { 589 if (fd != -1) { 590 close(fd); 591 if (win_get_netmap_handle(fd) != NULL) { 592 win_remove_fd_record(fd); 593 } 594 } 595 return 0; 596 } 597 598 #define close win_nm_close 599 600 #endif /* _WIN32 */ 601 602 static int 603 nm_is_identifier(const char *s, const char *e) 604 { 605 for (; s != e; s++) { 606 if (!isalnum(*s) && *s != '_') { 607 return 0; 608 } 609 } 610 611 return 1; 612 } 613 614 /* 615 * Try to open, return descriptor if successful, NULL otherwise. 616 * An invalid netmap name will return errno = 0; 617 * You can pass a pointer to a pre-filled nm_desc to add special 618 * parameters. Flags is used as follows 619 * NM_OPEN_NO_MMAP use the memory from arg, only XXX avoid mmap 620 * if the nr_arg2 (memory block) matches. 621 * NM_OPEN_ARG1 use req.nr_arg1 from arg 622 * NM_OPEN_ARG2 use req.nr_arg2 from arg 623 * NM_OPEN_RING_CFG user ring config from arg 624 */ 625 static struct nm_desc * 626 nm_open(const char *ifname, const struct nmreq *req, 627 uint64_t new_flags, const struct nm_desc *arg) 628 { 629 struct nm_desc *d = NULL; 630 const struct nm_desc *parent = arg; 631 u_int namelen; 632 uint32_t nr_ringid = 0, nr_flags, nr_reg; 633 const char *port = NULL; 634 const char *vpname = NULL; 635 #define MAXERRMSG 80 636 char errmsg[MAXERRMSG] = ""; 637 enum { P_START, P_RNGSFXOK, P_GETNUM, P_FLAGS, P_FLAGSOK, P_MEMID } p_state; 638 int is_vale; 639 long num; 640 uint16_t nr_arg2 = 0; 641 642 if (strncmp(ifname, "netmap:", 7) && 643 strncmp(ifname, NM_BDG_NAME, strlen(NM_BDG_NAME))) { 644 errno = 0; /* name not recognised, not an error */ 645 return NULL; 646 } 647 648 is_vale = (ifname[0] == 'v'); 649 if (is_vale) { 650 port = index(ifname, ':'); 651 if (port == NULL) { 652 snprintf(errmsg, MAXERRMSG, 653 "missing ':' in vale name"); 654 goto fail; 655 } 656 657 if (!nm_is_identifier(ifname + 4, port)) { 658 snprintf(errmsg, MAXERRMSG, "invalid bridge name"); 659 goto fail; 660 } 661 662 vpname = ++port; 663 } else { 664 ifname += 7; 665 port = ifname; 666 } 667 668 /* scan for a separator */ 669 for (; *port && !index("-*^{}/@", *port); port++) 670 ; 671 672 if (is_vale && !nm_is_identifier(vpname, port)) { 673 snprintf(errmsg, MAXERRMSG, "invalid bridge port name"); 674 goto fail; 675 } 676 677 namelen = port - ifname; 678 if (namelen >= sizeof(d->req.nr_name)) { 679 snprintf(errmsg, MAXERRMSG, "name too long"); 680 goto fail; 681 } 682 p_state = P_START; 683 nr_flags = NR_REG_ALL_NIC; /* default for no suffix */ 684 while (*port) { 685 switch (p_state) { 686 case P_START: 687 switch (*port) { 688 case '^': /* only SW ring */ 689 nr_flags = NR_REG_SW; 690 p_state = P_RNGSFXOK; 691 break; 692 case '*': /* NIC and SW */ 693 nr_flags = NR_REG_NIC_SW; 694 p_state = P_RNGSFXOK; 695 break; 696 case '-': /* one NIC ring pair */ 697 nr_flags = NR_REG_ONE_NIC; 698 p_state = P_GETNUM; 699 break; 700 case '{': /* pipe (master endpoint) */ 701 nr_flags = NR_REG_PIPE_MASTER; 702 p_state = P_GETNUM; 703 break; 704 case '}': /* pipe (slave endoint) */ 705 nr_flags = NR_REG_PIPE_SLAVE; 706 p_state = P_GETNUM; 707 break; 708 case '/': /* start of flags */ 709 p_state = P_FLAGS; 710 break; 711 case '@': /* start of memid */ 712 p_state = P_MEMID; 713 break; 714 default: 715 snprintf(errmsg, MAXERRMSG, "unknown modifier: '%c'", *port); 716 goto fail; 717 } 718 port++; 719 break; 720 case P_RNGSFXOK: 721 switch (*port) { 722 case '/': 723 p_state = P_FLAGS; 724 break; 725 case '@': 726 p_state = P_MEMID; 727 break; 728 default: 729 snprintf(errmsg, MAXERRMSG, "unexpected character: '%c'", *port); 730 goto fail; 731 } 732 port++; 733 break; 734 case P_GETNUM: 735 num = strtol(port, (char **)&port, 10); 736 if (num < 0 || num >= NETMAP_RING_MASK) { 737 snprintf(errmsg, MAXERRMSG, "'%ld' out of range [0, %d)", 738 num, NETMAP_RING_MASK); 739 goto fail; 740 } 741 nr_ringid = num & NETMAP_RING_MASK; 742 p_state = P_RNGSFXOK; 743 break; 744 case P_FLAGS: 745 case P_FLAGSOK: 746 if (*port == '@') { 747 port++; 748 p_state = P_MEMID; 749 break; 750 } 751 switch (*port) { 752 case 'x': 753 nr_flags |= NR_EXCLUSIVE; 754 break; 755 case 'z': 756 nr_flags |= NR_ZCOPY_MON; 757 break; 758 case 't': 759 nr_flags |= NR_MONITOR_TX; 760 break; 761 case 'r': 762 nr_flags |= NR_MONITOR_RX; 763 break; 764 case 'R': 765 nr_flags |= NR_RX_RINGS_ONLY; 766 break; 767 case 'T': 768 nr_flags |= NR_TX_RINGS_ONLY; 769 break; 770 default: 771 snprintf(errmsg, MAXERRMSG, "unrecognized flag: '%c'", *port); 772 goto fail; 773 } 774 port++; 775 p_state = P_FLAGSOK; 776 break; 777 case P_MEMID: 778 if (nr_arg2 != 0) { 779 snprintf(errmsg, MAXERRMSG, "double setting of memid"); 780 goto fail; 781 } 782 num = strtol(port, (char **)&port, 10); 783 if (num <= 0) { 784 snprintf(errmsg, MAXERRMSG, "invalid memid %ld, must be >0", num); 785 goto fail; 786 } 787 nr_arg2 = num; 788 p_state = P_RNGSFXOK; 789 break; 790 } 791 } 792 if (p_state != P_START && p_state != P_RNGSFXOK && p_state != P_FLAGSOK) { 793 snprintf(errmsg, MAXERRMSG, "unexpected end of port name"); 794 goto fail; 795 } 796 ND("flags: %s %s %s %s", 797 (nr_flags & NR_EXCLUSIVE) ? "EXCLUSIVE" : "", 798 (nr_flags & NR_ZCOPY_MON) ? "ZCOPY_MON" : "", 799 (nr_flags & NR_MONITOR_TX) ? "MONITOR_TX" : "", 800 (nr_flags & NR_MONITOR_RX) ? "MONITOR_RX" : ""); 801 d = (struct nm_desc *)calloc(1, sizeof(*d)); 802 if (d == NULL) { 803 snprintf(errmsg, MAXERRMSG, "nm_desc alloc failure"); 804 errno = ENOMEM; 805 return NULL; 806 } 807 d->self = d; /* set this early so nm_close() works */ 808 d->fd = open(NETMAP_DEVICE_NAME, O_RDWR); 809 if (d->fd < 0) { 810 snprintf(errmsg, MAXERRMSG, "cannot open /dev/netmap: %s", strerror(errno)); 811 goto fail; 812 } 813 814 if (req) 815 d->req = *req; 816 d->req.nr_version = NETMAP_API; 817 d->req.nr_ringid &= ~NETMAP_RING_MASK; 818 819 /* these fields are overridden by ifname and flags processing */ 820 d->req.nr_ringid |= nr_ringid; 821 d->req.nr_flags |= nr_flags; 822 if (nr_arg2) 823 d->req.nr_arg2 = nr_arg2; 824 memcpy(d->req.nr_name, ifname, namelen); 825 d->req.nr_name[namelen] = '\0'; 826 /* optionally import info from parent */ 827 if (IS_NETMAP_DESC(parent) && new_flags) { 828 if (new_flags & NM_OPEN_ARG1) 829 D("overriding ARG1 %d", parent->req.nr_arg1); 830 d->req.nr_arg1 = new_flags & NM_OPEN_ARG1 ? 831 parent->req.nr_arg1 : 4; 832 if (new_flags & NM_OPEN_ARG2) 833 D("overriding ARG2 %d", parent->req.nr_arg2); 834 d->req.nr_arg2 = new_flags & NM_OPEN_ARG2 ? 835 parent->req.nr_arg2 : 0; 836 if (new_flags & NM_OPEN_ARG3) 837 D("overriding ARG3 %d", parent->req.nr_arg3); 838 d->req.nr_arg3 = new_flags & NM_OPEN_ARG3 ? 839 parent->req.nr_arg3 : 0; 840 if (new_flags & NM_OPEN_RING_CFG) { 841 D("overriding RING_CFG"); 842 d->req.nr_tx_slots = parent->req.nr_tx_slots; 843 d->req.nr_rx_slots = parent->req.nr_rx_slots; 844 d->req.nr_tx_rings = parent->req.nr_tx_rings; 845 d->req.nr_rx_rings = parent->req.nr_rx_rings; 846 } 847 if (new_flags & NM_OPEN_IFNAME) { 848 D("overriding ifname %s ringid 0x%x flags 0x%x", 849 parent->req.nr_name, parent->req.nr_ringid, 850 parent->req.nr_flags); 851 memcpy(d->req.nr_name, parent->req.nr_name, 852 sizeof(d->req.nr_name)); 853 d->req.nr_ringid = parent->req.nr_ringid; 854 d->req.nr_flags = parent->req.nr_flags; 855 } 856 } 857 /* add the *XPOLL flags */ 858 d->req.nr_ringid |= new_flags & (NETMAP_NO_TX_POLL | NETMAP_DO_RX_POLL); 859 860 if (ioctl(d->fd, NIOCREGIF, &d->req)) { 861 snprintf(errmsg, MAXERRMSG, "NIOCREGIF failed: %s", strerror(errno)); 862 goto fail; 863 } 864 865 /* if parent is defined, do nm_mmap() even if NM_OPEN_NO_MMAP is set */ 866 if ((!(new_flags & NM_OPEN_NO_MMAP) || parent) && nm_mmap(d, parent)) { 867 snprintf(errmsg, MAXERRMSG, "mmap failed: %s", strerror(errno)); 868 goto fail; 869 } 870 871 nr_reg = d->req.nr_flags & NR_REG_MASK; 872 873 if (nr_reg == NR_REG_SW) { /* host stack */ 874 d->first_tx_ring = d->last_tx_ring = d->req.nr_tx_rings; 875 d->first_rx_ring = d->last_rx_ring = d->req.nr_rx_rings; 876 } else if (nr_reg == NR_REG_ALL_NIC) { /* only nic */ 877 d->first_tx_ring = 0; 878 d->first_rx_ring = 0; 879 d->last_tx_ring = d->req.nr_tx_rings - 1; 880 d->last_rx_ring = d->req.nr_rx_rings - 1; 881 } else if (nr_reg == NR_REG_NIC_SW) { 882 d->first_tx_ring = 0; 883 d->first_rx_ring = 0; 884 d->last_tx_ring = d->req.nr_tx_rings; 885 d->last_rx_ring = d->req.nr_rx_rings; 886 } else if (nr_reg == NR_REG_ONE_NIC) { 887 /* XXX check validity */ 888 d->first_tx_ring = d->last_tx_ring = 889 d->first_rx_ring = d->last_rx_ring = d->req.nr_ringid & NETMAP_RING_MASK; 890 } else { /* pipes */ 891 d->first_tx_ring = d->last_tx_ring = 0; 892 d->first_rx_ring = d->last_rx_ring = 0; 893 } 894 895 #ifdef DEBUG_NETMAP_USER 896 { /* debugging code */ 897 int i; 898 899 D("%s tx %d .. %d %d rx %d .. %d %d", ifname, 900 d->first_tx_ring, d->last_tx_ring, d->req.nr_tx_rings, 901 d->first_rx_ring, d->last_rx_ring, d->req.nr_rx_rings); 902 for (i = 0; i <= d->req.nr_tx_rings; i++) { 903 struct netmap_ring *r = NETMAP_TXRING(d->nifp, i); 904 D("TX%d %p h %d c %d t %d", i, r, r->head, r->cur, r->tail); 905 } 906 for (i = 0; i <= d->req.nr_rx_rings; i++) { 907 struct netmap_ring *r = NETMAP_RXRING(d->nifp, i); 908 D("RX%d %p h %d c %d t %d", i, r, r->head, r->cur, r->tail); 909 } 910 } 911 #endif /* debugging */ 912 913 d->cur_tx_ring = d->first_tx_ring; 914 d->cur_rx_ring = d->first_rx_ring; 915 return d; 916 917 fail: 918 nm_close(d); 919 if (errmsg[0]) 920 D("%s %s", errmsg, ifname); 921 if (errno == 0) 922 errno = EINVAL; 923 return NULL; 924 } 925 926 927 static int 928 nm_close(struct nm_desc *d) 929 { 930 /* 931 * ugly trick to avoid unused warnings 932 */ 933 static void *__xxzt[] __attribute__ ((unused)) = 934 { (void *)nm_open, (void *)nm_inject, 935 (void *)nm_dispatch, (void *)nm_nextpkt } ; 936 937 if (d == NULL || d->self != d) 938 return EINVAL; 939 if (d->done_mmap && d->mem) 940 munmap(d->mem, d->memsize); 941 if (d->fd != -1) { 942 close(d->fd); 943 } 944 945 bzero(d, sizeof(*d)); 946 free(d); 947 return 0; 948 } 949 950 951 static int 952 nm_mmap(struct nm_desc *d, const struct nm_desc *parent) 953 { 954 //XXX TODO: check if mmap is already done 955 956 if (IS_NETMAP_DESC(parent) && parent->mem && 957 parent->req.nr_arg2 == d->req.nr_arg2) { 958 /* do not mmap, inherit from parent */ 959 D("do not mmap, inherit from parent"); 960 d->memsize = parent->memsize; 961 d->mem = parent->mem; 962 } else { 963 /* XXX TODO: check if memsize is too large (or there is overflow) */ 964 d->memsize = d->req.nr_memsize; 965 d->mem = mmap(0, d->memsize, PROT_WRITE | PROT_READ, MAP_SHARED, 966 d->fd, 0); 967 if (d->mem == MAP_FAILED) { 968 goto fail; 969 } 970 d->done_mmap = 1; 971 } 972 { 973 struct netmap_if *nifp = NETMAP_IF(d->mem, d->req.nr_offset); 974 struct netmap_ring *r = NETMAP_RXRING(nifp, ); 975 976 *(struct netmap_if **)(uintptr_t)&(d->nifp) = nifp; 977 *(struct netmap_ring **)(uintptr_t)&d->some_ring = r; 978 *(void **)(uintptr_t)&d->buf_start = NETMAP_BUF(r, 0); 979 *(void **)(uintptr_t)&d->buf_end = 980 (char *)d->mem + d->memsize; 981 } 982 983 return 0; 984 985 fail: 986 return EINVAL; 987 } 988 989 /* 990 * Same prototype as pcap_inject(), only need to cast. 991 */ 992 static int 993 nm_inject(struct nm_desc *d, const void *buf, size_t size) 994 { 995 u_int c, n = d->last_tx_ring - d->first_tx_ring + 1; 996 997 for (c = 0; c < n ; c++) { 998 /* compute current ring to use */ 999 struct netmap_ring *ring; 1000 uint32_t i, idx; 1001 uint32_t ri = d->cur_tx_ring + c; 1002 1003 if (ri > d->last_tx_ring) 1004 ri = d->first_tx_ring; 1005 ring = NETMAP_TXRING(d->nifp, ri); 1006 if (nm_ring_empty(ring)) { 1007 continue; 1008 } 1009 i = ring->cur; 1010 idx = ring->slot[i].buf_idx; 1011 ring->slot[i].len = size; 1012 nm_pkt_copy(buf, NETMAP_BUF(ring, idx), size); 1013 d->cur_tx_ring = ri; 1014 ring->head = ring->cur = nm_ring_next(ring, i); 1015 return size; 1016 } 1017 return 0; /* fail */ 1018 } 1019 1020 1021 /* 1022 * Same prototype as pcap_dispatch(), only need to cast. 1023 */ 1024 static int 1025 nm_dispatch(struct nm_desc *d, int cnt, nm_cb_t cb, u_char *arg) 1026 { 1027 int n = d->last_rx_ring - d->first_rx_ring + 1; 1028 int c, got = 0, ri = d->cur_rx_ring; 1029 d->hdr.buf = NULL; 1030 d->hdr.flags = NM_MORE_PKTS; 1031 d->hdr.d = d; 1032 1033 if (cnt == 0) 1034 cnt = -1; 1035 /* cnt == -1 means infinite, but rings have a finite amount 1036 * of buffers and the int is large enough that we never wrap, 1037 * so we can omit checking for -1 1038 */ 1039 for (c=0; c < n && cnt != got; c++) { 1040 /* compute current ring to use */ 1041 struct netmap_ring *ring; 1042 1043 ri = d->cur_rx_ring + c; 1044 if (ri > d->last_rx_ring) 1045 ri = d->first_rx_ring; 1046 ring = NETMAP_RXRING(d->nifp, ri); 1047 for ( ; !nm_ring_empty(ring) && cnt != got; got++) { 1048 u_int idx, i; 1049 if (d->hdr.buf) { /* from previous round */ 1050 cb(arg, &d->hdr, d->hdr.buf); 1051 } 1052 i = ring->cur; 1053 idx = ring->slot[i].buf_idx; 1054 d->hdr.slot = &ring->slot[i]; 1055 d->hdr.buf = (u_char *)NETMAP_BUF(ring, idx); 1056 // __builtin_prefetch(buf); 1057 d->hdr.len = d->hdr.caplen = ring->slot[i].len; 1058 d->hdr.ts = ring->ts; 1059 ring->head = ring->cur = nm_ring_next(ring, i); 1060 } 1061 } 1062 if (d->hdr.buf) { /* from previous round */ 1063 d->hdr.flags = 0; 1064 cb(arg, &d->hdr, d->hdr.buf); 1065 } 1066 d->cur_rx_ring = ri; 1067 return got; 1068 } 1069 1070 static u_char * 1071 nm_nextpkt(struct nm_desc *d, struct nm_pkthdr *hdr) 1072 { 1073 int ri = d->cur_rx_ring; 1074 1075 do { 1076 /* compute current ring to use */ 1077 struct netmap_ring *ring = NETMAP_RXRING(d->nifp, ri); 1078 if (!nm_ring_empty(ring)) { 1079 u_int i = ring->cur; 1080 u_int idx = ring->slot[i].buf_idx; 1081 u_char *buf = (u_char *)NETMAP_BUF(ring, idx); 1082 1083 // __builtin_prefetch(buf); 1084 hdr->ts = ring->ts; 1085 hdr->len = hdr->caplen = ring->slot[i].len; 1086 ring->cur = nm_ring_next(ring, i); 1087 /* we could postpone advancing head if we want 1088 * to hold the buffer. This can be supported in 1089 * the future. 1090 */ 1091 ring->head = ring->cur; 1092 d->cur_rx_ring = ri; 1093 return buf; 1094 } 1095 ri++; 1096 if (ri > d->last_rx_ring) 1097 ri = d->first_rx_ring; 1098 } while (ri != d->cur_rx_ring); 1099 return NULL; /* nothing found */ 1100 } 1101 1102 #endif /* !HAVE_NETMAP_WITH_LIBS */ 1103 1104 #endif /* NETMAP_WITH_LIBS */ 1105 1106 #endif /* _NET_NETMAP_USER_H_ */ 1107