1 /* 2 * Copyright (C) 2011-2016 Universita` di Pisa 3 * All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 9 * 1. Redistributions of source code must retain the above copyright 10 * notice, this list of conditions and the following disclaimer. 11 * 2. Redistributions in binary form must reproduce the above copyright 12 * notice, this list of conditions and the following disclaimer in the 13 * documentation and/or other materials provided with the distribution. 14 * 15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 16 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 17 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 18 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 19 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 20 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 21 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 22 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 23 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 24 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 25 * SUCH DAMAGE. 26 */ 27 28 /* 29 * $FreeBSD$ 30 * 31 * Functions and macros to manipulate netmap structures and packets 32 * in userspace. See netmap(4) for more information. 33 * 34 * The address of the struct netmap_if, say nifp, is computed from the 35 * value returned from ioctl(.., NIOCREG, ...) and the mmap region: 36 * ioctl(fd, NIOCREG, &req); 37 * mem = mmap(0, ... ); 38 * nifp = NETMAP_IF(mem, req.nr_nifp); 39 * (so simple, we could just do it manually) 40 * 41 * From there: 42 * struct netmap_ring *NETMAP_TXRING(nifp, index) 43 * struct netmap_ring *NETMAP_RXRING(nifp, index) 44 * we can access ring->cur, ring->head, ring->tail, etc. 45 * 46 * ring->slot[i] gives us the i-th slot (we can access 47 * directly len, flags, buf_idx) 48 * 49 * char *buf = NETMAP_BUF(ring, x) returns a pointer to 50 * the buffer numbered x 51 * 52 * All ring indexes (head, cur, tail) should always move forward. 53 * To compute the next index in a circular ring you can use 54 * i = nm_ring_next(ring, i); 55 * 56 * To ease porting apps from pcap to netmap we supply a few fuctions 57 * that can be called to open, close, read and write on netmap in a way 58 * similar to libpcap. Note that the read/write function depend on 59 * an ioctl()/select()/poll() being issued to refill rings or push 60 * packets out. 61 * 62 * In order to use these, include #define NETMAP_WITH_LIBS 63 * in the source file that invokes these functions. 64 */ 65 66 #ifndef _NET_NETMAP_USER_H_ 67 #define _NET_NETMAP_USER_H_ 68 69 #define NETMAP_DEVICE_NAME "/dev/netmap" 70 71 #ifdef __CYGWIN__ 72 /* 73 * we can compile userspace apps with either cygwin or msvc, 74 * and we use _WIN32 to identify windows specific code 75 */ 76 #ifndef _WIN32 77 #define _WIN32 78 #endif /* _WIN32 */ 79 80 #endif /* __CYGWIN__ */ 81 82 #ifdef _WIN32 83 #undef NETMAP_DEVICE_NAME 84 #define NETMAP_DEVICE_NAME "/proc/sys/DosDevices/Global/netmap" 85 #include <windows.h> 86 #include <WinDef.h> 87 #include <sys/cygwin.h> 88 #endif /* _WIN32 */ 89 90 #include <stdint.h> 91 #include <sys/socket.h> /* apple needs sockaddr */ 92 #include <net/if.h> /* IFNAMSIZ */ 93 #include <ctype.h> 94 95 #ifndef likely 96 #define likely(x) __builtin_expect(!!(x), 1) 97 #define unlikely(x) __builtin_expect(!!(x), 0) 98 #endif /* likely and unlikely */ 99 100 #include <net/netmap.h> 101 102 /* helper macro */ 103 #define _NETMAP_OFFSET(type, ptr, offset) \ 104 ((type)(void *)((char *)(ptr) + (offset))) 105 106 #define NETMAP_IF(_base, _ofs) _NETMAP_OFFSET(struct netmap_if *, _base, _ofs) 107 108 #define NETMAP_TXRING(nifp, index) _NETMAP_OFFSET(struct netmap_ring *, \ 109 nifp, (nifp)->ring_ofs[index] ) 110 111 #define NETMAP_RXRING(nifp, index) _NETMAP_OFFSET(struct netmap_ring *, \ 112 nifp, (nifp)->ring_ofs[index + (nifp)->ni_tx_rings + 1] ) 113 114 #define NETMAP_BUF(ring, index) \ 115 ((char *)(ring) + (ring)->buf_ofs + ((index)*(ring)->nr_buf_size)) 116 117 #define NETMAP_BUF_IDX(ring, buf) \ 118 ( ((char *)(buf) - ((char *)(ring) + (ring)->buf_ofs) ) / \ 119 (ring)->nr_buf_size ) 120 121 122 static inline uint32_t 123 nm_ring_next(struct netmap_ring *r, uint32_t i) 124 { 125 return ( unlikely(i + 1 == r->num_slots) ? 0 : i + 1); 126 } 127 128 129 /* 130 * Return 1 if we have pending transmissions in the tx ring. 131 * When everything is complete ring->head = ring->tail + 1 (modulo ring size) 132 */ 133 static inline int 134 nm_tx_pending(struct netmap_ring *r) 135 { 136 return nm_ring_next(r, r->tail) != r->head; 137 } 138 139 140 static inline uint32_t 141 nm_ring_space(struct netmap_ring *ring) 142 { 143 int ret = ring->tail - ring->cur; 144 if (ret < 0) 145 ret += ring->num_slots; 146 return ret; 147 } 148 149 150 #ifdef NETMAP_WITH_LIBS 151 /* 152 * Support for simple I/O libraries. 153 * Include other system headers required for compiling this. 154 */ 155 156 #ifndef HAVE_NETMAP_WITH_LIBS 157 #define HAVE_NETMAP_WITH_LIBS 158 159 #include <stdio.h> 160 #include <sys/time.h> 161 #include <sys/mman.h> 162 #include <string.h> /* memset */ 163 #include <sys/ioctl.h> 164 #include <sys/errno.h> /* EINVAL */ 165 #include <fcntl.h> /* O_RDWR */ 166 #include <unistd.h> /* close() */ 167 #include <signal.h> 168 #include <stdlib.h> 169 170 #ifndef ND /* debug macros */ 171 /* debug support */ 172 #define ND(_fmt, ...) do {} while(0) 173 #define D(_fmt, ...) \ 174 do { \ 175 struct timeval _t0; \ 176 gettimeofday(&_t0, NULL); \ 177 fprintf(stderr, "%03d.%06d %s [%d] " _fmt "\n", \ 178 (int)(_t0.tv_sec % 1000), (int)_t0.tv_usec, \ 179 __FUNCTION__, __LINE__, ##__VA_ARGS__); \ 180 } while (0) 181 182 /* Rate limited version of "D", lps indicates how many per second */ 183 #define RD(lps, format, ...) \ 184 do { \ 185 static int __t0, __cnt; \ 186 struct timeval __xxts; \ 187 gettimeofday(&__xxts, NULL); \ 188 if (__t0 != __xxts.tv_sec) { \ 189 __t0 = __xxts.tv_sec; \ 190 __cnt = 0; \ 191 } \ 192 if (__cnt++ < lps) { \ 193 D(format, ##__VA_ARGS__); \ 194 } \ 195 } while (0) 196 #endif 197 198 struct nm_pkthdr { /* first part is the same as pcap_pkthdr */ 199 struct timeval ts; 200 uint32_t caplen; 201 uint32_t len; 202 203 uint64_t flags; /* NM_MORE_PKTS etc */ 204 #define NM_MORE_PKTS 1 205 struct nm_desc *d; 206 struct netmap_slot *slot; 207 uint8_t *buf; 208 }; 209 210 struct nm_stat { /* same as pcap_stat */ 211 u_int ps_recv; 212 u_int ps_drop; 213 u_int ps_ifdrop; 214 #ifdef WIN32 /* XXX or _WIN32 ? */ 215 u_int bs_capt; 216 #endif /* WIN32 */ 217 }; 218 219 #define NM_ERRBUF_SIZE 512 220 221 struct nm_desc { 222 struct nm_desc *self; /* point to self if netmap. */ 223 int fd; 224 void *mem; 225 uint32_t memsize; 226 int done_mmap; /* set if mem is the result of mmap */ 227 struct netmap_if * const nifp; 228 uint16_t first_tx_ring, last_tx_ring, cur_tx_ring; 229 uint16_t first_rx_ring, last_rx_ring, cur_rx_ring; 230 struct nmreq req; /* also contains the nr_name = ifname */ 231 struct nm_pkthdr hdr; 232 233 /* 234 * The memory contains netmap_if, rings and then buffers. 235 * Given a pointer (e.g. to nm_inject) we can compare with 236 * mem/buf_start/buf_end to tell if it is a buffer or 237 * some other descriptor in our region. 238 * We also store a pointer to some ring as it helps in the 239 * translation from buffer indexes to addresses. 240 */ 241 struct netmap_ring * const some_ring; 242 void * const buf_start; 243 void * const buf_end; 244 /* parameters from pcap_open_live */ 245 int snaplen; 246 int promisc; 247 int to_ms; 248 char *errbuf; 249 250 /* save flags so we can restore them on close */ 251 uint32_t if_flags; 252 uint32_t if_reqcap; 253 uint32_t if_curcap; 254 255 struct nm_stat st; 256 char msg[NM_ERRBUF_SIZE]; 257 }; 258 259 /* 260 * when the descriptor is open correctly, d->self == d 261 * Eventually we should also use some magic number. 262 */ 263 #define P2NMD(p) ((struct nm_desc *)(p)) 264 #define IS_NETMAP_DESC(d) ((d) && P2NMD(d)->self == P2NMD(d)) 265 #define NETMAP_FD(d) (P2NMD(d)->fd) 266 267 268 /* 269 * this is a slightly optimized copy routine which rounds 270 * to multiple of 64 bytes and is often faster than dealing 271 * with other odd sizes. We assume there is enough room 272 * in the source and destination buffers. 273 * 274 * XXX only for multiples of 64 bytes, non overlapped. 275 */ 276 static inline void 277 nm_pkt_copy(const void *_src, void *_dst, int l) 278 { 279 const uint64_t *src = (const uint64_t *)_src; 280 uint64_t *dst = (uint64_t *)_dst; 281 282 if (unlikely(l >= 1024)) { 283 memcpy(dst, src, l); 284 return; 285 } 286 for (; likely(l > 0); l-=64) { 287 *dst++ = *src++; 288 *dst++ = *src++; 289 *dst++ = *src++; 290 *dst++ = *src++; 291 *dst++ = *src++; 292 *dst++ = *src++; 293 *dst++ = *src++; 294 *dst++ = *src++; 295 } 296 } 297 298 299 /* 300 * The callback, invoked on each received packet. Same as libpcap 301 */ 302 typedef void (*nm_cb_t)(u_char *, const struct nm_pkthdr *, const u_char *d); 303 304 /* 305 *--- the pcap-like API --- 306 * 307 * nm_open() opens a file descriptor, binds to a port and maps memory. 308 * 309 * ifname (netmap:foo or vale:foo) is the port name 310 * a suffix can indicate the follwing: 311 * ^ bind the host (sw) ring pair 312 * * bind host and NIC ring pairs (transparent) 313 * -NN bind individual NIC ring pair 314 * {NN bind master side of pipe NN 315 * }NN bind slave side of pipe NN 316 * a suffix starting with / and the following flags, 317 * in any order: 318 * x exclusive access 319 * z zero copy monitor 320 * t monitor tx side 321 * r monitor rx side 322 * R bind only RX ring(s) 323 * T bind only TX ring(s) 324 * 325 * req provides the initial values of nmreq before parsing ifname. 326 * Remember that the ifname parsing will override the ring 327 * number in nm_ringid, and part of nm_flags; 328 * flags special functions, normally 0 329 * indicates which fields of *arg are significant 330 * arg special functions, normally NULL 331 * if passed a netmap_desc with mem != NULL, 332 * use that memory instead of mmap. 333 */ 334 335 static struct nm_desc *nm_open(const char *ifname, const struct nmreq *req, 336 uint64_t flags, const struct nm_desc *arg); 337 338 /* 339 * nm_open can import some fields from the parent descriptor. 340 * These flags control which ones. 341 * Also in flags you can specify NETMAP_NO_TX_POLL and NETMAP_DO_RX_POLL, 342 * which set the initial value for these flags. 343 * Note that the 16 low bits of the flags are reserved for data 344 * that may go into the nmreq. 345 */ 346 enum { 347 NM_OPEN_NO_MMAP = 0x040000, /* reuse mmap from parent */ 348 NM_OPEN_IFNAME = 0x080000, /* nr_name, nr_ringid, nr_flags */ 349 NM_OPEN_ARG1 = 0x100000, 350 NM_OPEN_ARG2 = 0x200000, 351 NM_OPEN_ARG3 = 0x400000, 352 NM_OPEN_RING_CFG = 0x800000, /* tx|rx rings|slots */ 353 }; 354 355 356 /* 357 * nm_close() closes and restores the port to its previous state 358 */ 359 360 static int nm_close(struct nm_desc *); 361 362 /* 363 * nm_mmap() do mmap or inherit from parent if the nr_arg2 364 * (memory block) matches. 365 */ 366 367 static int nm_mmap(struct nm_desc *, const struct nm_desc *); 368 369 /* 370 * nm_inject() is the same as pcap_inject() 371 * nm_dispatch() is the same as pcap_dispatch() 372 * nm_nextpkt() is the same as pcap_next() 373 */ 374 375 static int nm_inject(struct nm_desc *, const void *, size_t); 376 static int nm_dispatch(struct nm_desc *, int, nm_cb_t, u_char *); 377 static u_char *nm_nextpkt(struct nm_desc *, struct nm_pkthdr *); 378 379 #ifdef _WIN32 380 381 intptr_t _get_osfhandle(int); /* defined in io.h in windows */ 382 383 /* 384 * In windows we do not have yet native poll support, so we keep track 385 * of file descriptors associated to netmap ports to emulate poll on 386 * them and fall back on regular poll on other file descriptors. 387 */ 388 struct win_netmap_fd_list { 389 struct win_netmap_fd_list *next; 390 int win_netmap_fd; 391 HANDLE win_netmap_handle; 392 }; 393 394 /* 395 * list head containing all the netmap opened fd and their 396 * windows HANDLE counterparts 397 */ 398 static struct win_netmap_fd_list *win_netmap_fd_list_head; 399 400 static void 401 win_insert_fd_record(int fd) 402 { 403 struct win_netmap_fd_list *curr; 404 405 for (curr = win_netmap_fd_list_head; curr; curr = curr->next) { 406 if (fd == curr->win_netmap_fd) { 407 return; 408 } 409 } 410 curr = calloc(1, sizeof(*curr)); 411 curr->next = win_netmap_fd_list_head; 412 curr->win_netmap_fd = fd; 413 curr->win_netmap_handle = IntToPtr(_get_osfhandle(fd)); 414 win_netmap_fd_list_head = curr; 415 } 416 417 void 418 win_remove_fd_record(int fd) 419 { 420 struct win_netmap_fd_list *curr = win_netmap_fd_list_head; 421 struct win_netmap_fd_list *prev = NULL; 422 for (; curr ; prev = curr, curr = curr->next) { 423 if (fd != curr->win_netmap_fd) 424 continue; 425 /* found the entry */ 426 if (prev == NULL) { /* we are freeing the first entry */ 427 win_netmap_fd_list_head = curr->next; 428 } else { 429 prev->next = curr->next; 430 } 431 free(curr); 432 break; 433 } 434 } 435 436 437 HANDLE 438 win_get_netmap_handle(int fd) 439 { 440 struct win_netmap_fd_list *curr; 441 442 for (curr = win_netmap_fd_list_head; curr; curr = curr->next) { 443 if (fd == curr->win_netmap_fd) { 444 return curr->win_netmap_handle; 445 } 446 } 447 return NULL; 448 } 449 450 /* 451 * we need to wrap ioctl and mmap, at least for the netmap file descriptors 452 */ 453 454 /* 455 * use this function only from netmap_user.h internal functions 456 * same as ioctl, returns 0 on success and -1 on error 457 */ 458 static int 459 win_nm_ioctl_internal(HANDLE h, int32_t ctlCode, void *arg) 460 { 461 DWORD bReturn = 0, szIn, szOut; 462 BOOL ioctlReturnStatus; 463 void *inParam = arg, *outParam = arg; 464 465 switch (ctlCode) { 466 case NETMAP_POLL: 467 szIn = sizeof(POLL_REQUEST_DATA); 468 szOut = sizeof(POLL_REQUEST_DATA); 469 break; 470 case NETMAP_MMAP: 471 szIn = 0; 472 szOut = sizeof(void*); 473 inParam = NULL; /* nothing on input */ 474 break; 475 case NIOCTXSYNC: 476 case NIOCRXSYNC: 477 szIn = 0; 478 szOut = 0; 479 break; 480 case NIOCREGIF: 481 szIn = sizeof(struct nmreq); 482 szOut = sizeof(struct nmreq); 483 break; 484 case NIOCCONFIG: 485 D("unsupported NIOCCONFIG!"); 486 return -1; 487 488 default: /* a regular ioctl */ 489 D("invalid ioctl %x on netmap fd", ctlCode); 490 return -1; 491 } 492 493 ioctlReturnStatus = DeviceIoControl(h, 494 ctlCode, inParam, szIn, 495 outParam, szOut, 496 &bReturn, NULL); 497 // XXX note windows returns 0 on error or async call, 1 on success 498 // we could call GetLastError() to figure out what happened 499 return ioctlReturnStatus ? 0 : -1; 500 } 501 502 /* 503 * this function is what must be called from user-space programs 504 * same as ioctl, returns 0 on success and -1 on error 505 */ 506 static int 507 win_nm_ioctl(int fd, int32_t ctlCode, void *arg) 508 { 509 HANDLE h = win_get_netmap_handle(fd); 510 511 if (h == NULL) { 512 return ioctl(fd, ctlCode, arg); 513 } else { 514 return win_nm_ioctl_internal(h, ctlCode, arg); 515 } 516 } 517 518 #define ioctl win_nm_ioctl /* from now on, within this file ... */ 519 520 /* 521 * We cannot use the native mmap on windows 522 * The only parameter used is "fd", the other ones are just declared to 523 * make this signature comparable to the FreeBSD/Linux one 524 */ 525 static void * 526 win32_mmap_emulated(void *addr, size_t length, int prot, int flags, int fd, int32_t offset) 527 { 528 HANDLE h = win_get_netmap_handle(fd); 529 530 if (h == NULL) { 531 return mmap(addr, length, prot, flags, fd, offset); 532 } else { 533 MEMORY_ENTRY ret; 534 535 return win_nm_ioctl_internal(h, NETMAP_MMAP, &ret) ? 536 NULL : ret.pUsermodeVirtualAddress; 537 } 538 } 539 540 #define mmap win32_mmap_emulated 541 542 #include <sys/poll.h> /* XXX needed to use the structure pollfd */ 543 544 static int 545 win_nm_poll(struct pollfd *fds, int nfds, int timeout) 546 { 547 HANDLE h; 548 549 if (nfds != 1 || fds == NULL || (h = win_get_netmap_handle(fds->fd)) == NULL) {; 550 return poll(fds, nfds, timeout); 551 } else { 552 POLL_REQUEST_DATA prd; 553 554 prd.timeout = timeout; 555 prd.events = fds->events; 556 557 win_nm_ioctl_internal(h, NETMAP_POLL, &prd); 558 if ((prd.revents == POLLERR) || (prd.revents == STATUS_TIMEOUT)) { 559 return -1; 560 } 561 return 1; 562 } 563 } 564 565 #define poll win_nm_poll 566 567 static int 568 win_nm_open(char* pathname, int flags) 569 { 570 571 if (strcmp(pathname, NETMAP_DEVICE_NAME) == 0) { 572 int fd = open(NETMAP_DEVICE_NAME, O_RDWR); 573 if (fd < 0) { 574 return -1; 575 } 576 577 win_insert_fd_record(fd); 578 return fd; 579 } else { 580 return open(pathname, flags); 581 } 582 } 583 584 #define open win_nm_open 585 586 static int 587 win_nm_close(int fd) 588 { 589 if (fd != -1) { 590 close(fd); 591 if (win_get_netmap_handle(fd) != NULL) { 592 win_remove_fd_record(fd); 593 } 594 } 595 return 0; 596 } 597 598 #define close win_nm_close 599 600 #endif /* _WIN32 */ 601 602 static int 603 nm_is_identifier(const char *s, const char *e) 604 { 605 for (; s != e; s++) { 606 if (!isalnum(*s) && *s != '_') { 607 return 0; 608 } 609 } 610 611 return 1; 612 } 613 614 /* 615 * Try to open, return descriptor if successful, NULL otherwise. 616 * An invalid netmap name will return errno = 0; 617 * You can pass a pointer to a pre-filled nm_desc to add special 618 * parameters. Flags is used as follows 619 * NM_OPEN_NO_MMAP use the memory from arg, only XXX avoid mmap 620 * if the nr_arg2 (memory block) matches. 621 * NM_OPEN_ARG1 use req.nr_arg1 from arg 622 * NM_OPEN_ARG2 use req.nr_arg2 from arg 623 * NM_OPEN_RING_CFG user ring config from arg 624 */ 625 static struct nm_desc * 626 nm_open(const char *ifname, const struct nmreq *req, 627 uint64_t new_flags, const struct nm_desc *arg) 628 { 629 struct nm_desc *d = NULL; 630 const struct nm_desc *parent = arg; 631 u_int namelen; 632 uint32_t nr_ringid = 0, nr_flags, nr_reg; 633 const char *port = NULL; 634 const char *vpname = NULL; 635 #define MAXERRMSG 80 636 char errmsg[MAXERRMSG] = ""; 637 enum { P_START, P_RNGSFXOK, P_GETNUM, P_FLAGS, P_FLAGSOK } p_state; 638 int is_vale; 639 long num; 640 641 if (strncmp(ifname, "netmap:", 7) && 642 strncmp(ifname, NM_BDG_NAME, strlen(NM_BDG_NAME))) { 643 errno = 0; /* name not recognised, not an error */ 644 return NULL; 645 } 646 647 is_vale = (ifname[0] == 'v'); 648 if (is_vale) { 649 port = index(ifname, ':'); 650 if (port == NULL) { 651 snprintf(errmsg, MAXERRMSG, 652 "missing ':' in vale name"); 653 goto fail; 654 } 655 656 if (!nm_is_identifier(ifname + 4, port)) { 657 snprintf(errmsg, MAXERRMSG, "invalid bridge name"); 658 goto fail; 659 } 660 661 vpname = ++port; 662 } else { 663 ifname += 7; 664 port = ifname; 665 } 666 667 /* scan for a separator */ 668 for (; *port && !index("-*^{}/", *port); port++) 669 ; 670 671 if (is_vale && !nm_is_identifier(vpname, port)) { 672 snprintf(errmsg, MAXERRMSG, "invalid bridge port name"); 673 goto fail; 674 } 675 676 namelen = port - ifname; 677 if (namelen >= sizeof(d->req.nr_name)) { 678 snprintf(errmsg, MAXERRMSG, "name too long"); 679 goto fail; 680 } 681 p_state = P_START; 682 nr_flags = NR_REG_ALL_NIC; /* default for no suffix */ 683 while (*port) { 684 switch (p_state) { 685 case P_START: 686 switch (*port) { 687 case '^': /* only SW ring */ 688 nr_flags = NR_REG_SW; 689 p_state = P_RNGSFXOK; 690 break; 691 case '*': /* NIC and SW */ 692 nr_flags = NR_REG_NIC_SW; 693 p_state = P_RNGSFXOK; 694 break; 695 case '-': /* one NIC ring pair */ 696 nr_flags = NR_REG_ONE_NIC; 697 p_state = P_GETNUM; 698 break; 699 case '{': /* pipe (master endpoint) */ 700 nr_flags = NR_REG_PIPE_MASTER; 701 p_state = P_GETNUM; 702 break; 703 case '}': /* pipe (slave endoint) */ 704 nr_flags = NR_REG_PIPE_SLAVE; 705 p_state = P_GETNUM; 706 break; 707 case '/': /* start of flags */ 708 p_state = P_FLAGS; 709 break; 710 default: 711 snprintf(errmsg, MAXERRMSG, "unknown modifier: '%c'", *port); 712 goto fail; 713 } 714 port++; 715 break; 716 case P_RNGSFXOK: 717 switch (*port) { 718 case '/': 719 p_state = P_FLAGS; 720 break; 721 default: 722 snprintf(errmsg, MAXERRMSG, "unexpected character: '%c'", *port); 723 goto fail; 724 } 725 port++; 726 break; 727 case P_GETNUM: 728 num = strtol(port, (char **)&port, 10); 729 if (num < 0 || num >= NETMAP_RING_MASK) { 730 snprintf(errmsg, MAXERRMSG, "'%ld' out of range [0, %d)", 731 num, NETMAP_RING_MASK); 732 goto fail; 733 } 734 nr_ringid = num & NETMAP_RING_MASK; 735 p_state = P_RNGSFXOK; 736 break; 737 case P_FLAGS: 738 case P_FLAGSOK: 739 switch (*port) { 740 case 'x': 741 nr_flags |= NR_EXCLUSIVE; 742 break; 743 case 'z': 744 nr_flags |= NR_ZCOPY_MON; 745 break; 746 case 't': 747 nr_flags |= NR_MONITOR_TX; 748 break; 749 case 'r': 750 nr_flags |= NR_MONITOR_RX; 751 break; 752 case 'R': 753 nr_flags |= NR_RX_RINGS_ONLY; 754 break; 755 case 'T': 756 nr_flags |= NR_TX_RINGS_ONLY; 757 break; 758 default: 759 snprintf(errmsg, MAXERRMSG, "unrecognized flag: '%c'", *port); 760 goto fail; 761 } 762 port++; 763 p_state = P_FLAGSOK; 764 break; 765 } 766 } 767 if (p_state != P_START && p_state != P_RNGSFXOK && p_state != P_FLAGSOK) { 768 snprintf(errmsg, MAXERRMSG, "unexpected end of port name"); 769 goto fail; 770 } 771 if ((nr_flags & NR_ZCOPY_MON) && 772 !(nr_flags & (NR_MONITOR_TX|NR_MONITOR_RX))) { 773 snprintf(errmsg, MAXERRMSG, "'z' used but neither 'r', nor 't' found"); 774 goto fail; 775 } 776 ND("flags: %s %s %s %s", 777 (nr_flags & NR_EXCLUSIVE) ? "EXCLUSIVE" : "", 778 (nr_flags & NR_ZCOPY_MON) ? "ZCOPY_MON" : "", 779 (nr_flags & NR_MONITOR_TX) ? "MONITOR_TX" : "", 780 (nr_flags & NR_MONITOR_RX) ? "MONITOR_RX" : ""); 781 d = (struct nm_desc *)calloc(1, sizeof(*d)); 782 if (d == NULL) { 783 snprintf(errmsg, MAXERRMSG, "nm_desc alloc failure"); 784 errno = ENOMEM; 785 return NULL; 786 } 787 d->self = d; /* set this early so nm_close() works */ 788 d->fd = open(NETMAP_DEVICE_NAME, O_RDWR); 789 if (d->fd < 0) { 790 snprintf(errmsg, MAXERRMSG, "cannot open /dev/netmap: %s", strerror(errno)); 791 goto fail; 792 } 793 794 if (req) 795 d->req = *req; 796 d->req.nr_version = NETMAP_API; 797 d->req.nr_ringid &= ~NETMAP_RING_MASK; 798 799 /* these fields are overridden by ifname and flags processing */ 800 d->req.nr_ringid |= nr_ringid; 801 d->req.nr_flags |= nr_flags; 802 memcpy(d->req.nr_name, ifname, namelen); 803 d->req.nr_name[namelen] = '\0'; 804 /* optionally import info from parent */ 805 if (IS_NETMAP_DESC(parent) && new_flags) { 806 if (new_flags & NM_OPEN_ARG1) 807 D("overriding ARG1 %d", parent->req.nr_arg1); 808 d->req.nr_arg1 = new_flags & NM_OPEN_ARG1 ? 809 parent->req.nr_arg1 : 4; 810 if (new_flags & NM_OPEN_ARG2) 811 D("overriding ARG2 %d", parent->req.nr_arg2); 812 d->req.nr_arg2 = new_flags & NM_OPEN_ARG2 ? 813 parent->req.nr_arg2 : 0; 814 if (new_flags & NM_OPEN_ARG3) 815 D("overriding ARG3 %d", parent->req.nr_arg3); 816 d->req.nr_arg3 = new_flags & NM_OPEN_ARG3 ? 817 parent->req.nr_arg3 : 0; 818 if (new_flags & NM_OPEN_RING_CFG) { 819 D("overriding RING_CFG"); 820 d->req.nr_tx_slots = parent->req.nr_tx_slots; 821 d->req.nr_rx_slots = parent->req.nr_rx_slots; 822 d->req.nr_tx_rings = parent->req.nr_tx_rings; 823 d->req.nr_rx_rings = parent->req.nr_rx_rings; 824 } 825 if (new_flags & NM_OPEN_IFNAME) { 826 D("overriding ifname %s ringid 0x%x flags 0x%x", 827 parent->req.nr_name, parent->req.nr_ringid, 828 parent->req.nr_flags); 829 memcpy(d->req.nr_name, parent->req.nr_name, 830 sizeof(d->req.nr_name)); 831 d->req.nr_ringid = parent->req.nr_ringid; 832 d->req.nr_flags = parent->req.nr_flags; 833 } 834 } 835 /* add the *XPOLL flags */ 836 d->req.nr_ringid |= new_flags & (NETMAP_NO_TX_POLL | NETMAP_DO_RX_POLL); 837 838 if (ioctl(d->fd, NIOCREGIF, &d->req)) { 839 snprintf(errmsg, MAXERRMSG, "NIOCREGIF failed: %s", strerror(errno)); 840 goto fail; 841 } 842 843 /* if parent is defined, do nm_mmap() even if NM_OPEN_NO_MMAP is set */ 844 if ((!(new_flags & NM_OPEN_NO_MMAP) || parent) && nm_mmap(d, parent)) { 845 snprintf(errmsg, MAXERRMSG, "mmap failed: %s", strerror(errno)); 846 goto fail; 847 } 848 849 nr_reg = d->req.nr_flags & NR_REG_MASK; 850 851 if (nr_reg == NR_REG_SW) { /* host stack */ 852 d->first_tx_ring = d->last_tx_ring = d->req.nr_tx_rings; 853 d->first_rx_ring = d->last_rx_ring = d->req.nr_rx_rings; 854 } else if (nr_reg == NR_REG_ALL_NIC) { /* only nic */ 855 d->first_tx_ring = 0; 856 d->first_rx_ring = 0; 857 d->last_tx_ring = d->req.nr_tx_rings - 1; 858 d->last_rx_ring = d->req.nr_rx_rings - 1; 859 } else if (nr_reg == NR_REG_NIC_SW) { 860 d->first_tx_ring = 0; 861 d->first_rx_ring = 0; 862 d->last_tx_ring = d->req.nr_tx_rings; 863 d->last_rx_ring = d->req.nr_rx_rings; 864 } else if (nr_reg == NR_REG_ONE_NIC) { 865 /* XXX check validity */ 866 d->first_tx_ring = d->last_tx_ring = 867 d->first_rx_ring = d->last_rx_ring = d->req.nr_ringid & NETMAP_RING_MASK; 868 } else { /* pipes */ 869 d->first_tx_ring = d->last_tx_ring = 0; 870 d->first_rx_ring = d->last_rx_ring = 0; 871 } 872 873 #ifdef DEBUG_NETMAP_USER 874 { /* debugging code */ 875 int i; 876 877 D("%s tx %d .. %d %d rx %d .. %d %d", ifname, 878 d->first_tx_ring, d->last_tx_ring, d->req.nr_tx_rings, 879 d->first_rx_ring, d->last_rx_ring, d->req.nr_rx_rings); 880 for (i = 0; i <= d->req.nr_tx_rings; i++) { 881 struct netmap_ring *r = NETMAP_TXRING(d->nifp, i); 882 D("TX%d %p h %d c %d t %d", i, r, r->head, r->cur, r->tail); 883 } 884 for (i = 0; i <= d->req.nr_rx_rings; i++) { 885 struct netmap_ring *r = NETMAP_RXRING(d->nifp, i); 886 D("RX%d %p h %d c %d t %d", i, r, r->head, r->cur, r->tail); 887 } 888 } 889 #endif /* debugging */ 890 891 d->cur_tx_ring = d->first_tx_ring; 892 d->cur_rx_ring = d->first_rx_ring; 893 return d; 894 895 fail: 896 nm_close(d); 897 if (errmsg[0]) 898 D("%s %s", errmsg, ifname); 899 if (errno == 0) 900 errno = EINVAL; 901 return NULL; 902 } 903 904 905 static int 906 nm_close(struct nm_desc *d) 907 { 908 /* 909 * ugly trick to avoid unused warnings 910 */ 911 static void *__xxzt[] __attribute__ ((unused)) = 912 { (void *)nm_open, (void *)nm_inject, 913 (void *)nm_dispatch, (void *)nm_nextpkt } ; 914 915 if (d == NULL || d->self != d) 916 return EINVAL; 917 if (d->done_mmap && d->mem) 918 munmap(d->mem, d->memsize); 919 if (d->fd != -1) { 920 close(d->fd); 921 } 922 923 bzero(d, sizeof(*d)); 924 free(d); 925 return 0; 926 } 927 928 929 static int 930 nm_mmap(struct nm_desc *d, const struct nm_desc *parent) 931 { 932 //XXX TODO: check if mmap is already done 933 934 if (IS_NETMAP_DESC(parent) && parent->mem && 935 parent->req.nr_arg2 == d->req.nr_arg2) { 936 /* do not mmap, inherit from parent */ 937 D("do not mmap, inherit from parent"); 938 d->memsize = parent->memsize; 939 d->mem = parent->mem; 940 } else { 941 /* XXX TODO: check if memsize is too large (or there is overflow) */ 942 d->memsize = d->req.nr_memsize; 943 d->mem = mmap(0, d->memsize, PROT_WRITE | PROT_READ, MAP_SHARED, 944 d->fd, 0); 945 if (d->mem == MAP_FAILED) { 946 goto fail; 947 } 948 d->done_mmap = 1; 949 } 950 { 951 struct netmap_if *nifp = NETMAP_IF(d->mem, d->req.nr_offset); 952 struct netmap_ring *r = NETMAP_RXRING(nifp, ); 953 954 *(struct netmap_if **)(uintptr_t)&(d->nifp) = nifp; 955 *(struct netmap_ring **)(uintptr_t)&d->some_ring = r; 956 *(void **)(uintptr_t)&d->buf_start = NETMAP_BUF(r, 0); 957 *(void **)(uintptr_t)&d->buf_end = 958 (char *)d->mem + d->memsize; 959 } 960 961 return 0; 962 963 fail: 964 return EINVAL; 965 } 966 967 /* 968 * Same prototype as pcap_inject(), only need to cast. 969 */ 970 static int 971 nm_inject(struct nm_desc *d, const void *buf, size_t size) 972 { 973 u_int c, n = d->last_tx_ring - d->first_tx_ring + 1; 974 975 for (c = 0; c < n ; c++) { 976 /* compute current ring to use */ 977 struct netmap_ring *ring; 978 uint32_t i, idx; 979 uint32_t ri = d->cur_tx_ring + c; 980 981 if (ri > d->last_tx_ring) 982 ri = d->first_tx_ring; 983 ring = NETMAP_TXRING(d->nifp, ri); 984 if (nm_ring_empty(ring)) { 985 continue; 986 } 987 i = ring->cur; 988 idx = ring->slot[i].buf_idx; 989 ring->slot[i].len = size; 990 nm_pkt_copy(buf, NETMAP_BUF(ring, idx), size); 991 d->cur_tx_ring = ri; 992 ring->head = ring->cur = nm_ring_next(ring, i); 993 return size; 994 } 995 return 0; /* fail */ 996 } 997 998 999 /* 1000 * Same prototype as pcap_dispatch(), only need to cast. 1001 */ 1002 static int 1003 nm_dispatch(struct nm_desc *d, int cnt, nm_cb_t cb, u_char *arg) 1004 { 1005 int n = d->last_rx_ring - d->first_rx_ring + 1; 1006 int c, got = 0, ri = d->cur_rx_ring; 1007 d->hdr.buf = NULL; 1008 d->hdr.flags = NM_MORE_PKTS; 1009 d->hdr.d = d; 1010 1011 if (cnt == 0) 1012 cnt = -1; 1013 /* cnt == -1 means infinite, but rings have a finite amount 1014 * of buffers and the int is large enough that we never wrap, 1015 * so we can omit checking for -1 1016 */ 1017 for (c=0; c < n && cnt != got; c++) { 1018 /* compute current ring to use */ 1019 struct netmap_ring *ring; 1020 1021 ri = d->cur_rx_ring + c; 1022 if (ri > d->last_rx_ring) 1023 ri = d->first_rx_ring; 1024 ring = NETMAP_RXRING(d->nifp, ri); 1025 for ( ; !nm_ring_empty(ring) && cnt != got; got++) { 1026 u_int idx, i; 1027 if (d->hdr.buf) { /* from previous round */ 1028 cb(arg, &d->hdr, d->hdr.buf); 1029 } 1030 i = ring->cur; 1031 idx = ring->slot[i].buf_idx; 1032 d->hdr.slot = &ring->slot[i]; 1033 d->hdr.buf = (u_char *)NETMAP_BUF(ring, idx); 1034 // __builtin_prefetch(buf); 1035 d->hdr.len = d->hdr.caplen = ring->slot[i].len; 1036 d->hdr.ts = ring->ts; 1037 ring->head = ring->cur = nm_ring_next(ring, i); 1038 } 1039 } 1040 if (d->hdr.buf) { /* from previous round */ 1041 d->hdr.flags = 0; 1042 cb(arg, &d->hdr, d->hdr.buf); 1043 } 1044 d->cur_rx_ring = ri; 1045 return got; 1046 } 1047 1048 static u_char * 1049 nm_nextpkt(struct nm_desc *d, struct nm_pkthdr *hdr) 1050 { 1051 int ri = d->cur_rx_ring; 1052 1053 do { 1054 /* compute current ring to use */ 1055 struct netmap_ring *ring = NETMAP_RXRING(d->nifp, ri); 1056 if (!nm_ring_empty(ring)) { 1057 u_int i = ring->cur; 1058 u_int idx = ring->slot[i].buf_idx; 1059 u_char *buf = (u_char *)NETMAP_BUF(ring, idx); 1060 1061 // __builtin_prefetch(buf); 1062 hdr->ts = ring->ts; 1063 hdr->len = hdr->caplen = ring->slot[i].len; 1064 ring->cur = nm_ring_next(ring, i); 1065 /* we could postpone advancing head if we want 1066 * to hold the buffer. This can be supported in 1067 * the future. 1068 */ 1069 ring->head = ring->cur; 1070 d->cur_rx_ring = ri; 1071 return buf; 1072 } 1073 ri++; 1074 if (ri > d->last_rx_ring) 1075 ri = d->first_rx_ring; 1076 } while (ri != d->cur_rx_ring); 1077 return NULL; /* nothing found */ 1078 } 1079 1080 #endif /* !HAVE_NETMAP_WITH_LIBS */ 1081 1082 #endif /* NETMAP_WITH_LIBS */ 1083 1084 #endif /* _NET_NETMAP_USER_H_ */ 1085