1 /*- 2 * SPDX-License-Identifier: BSD-2-Clause-FreeBSD 3 * 4 * Copyright (C) 2011-2016 Universita` di Pisa 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 18 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 20 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 21 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 23 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 27 * SUCH DAMAGE. 28 */ 29 30 /* 31 * $FreeBSD$ 32 * 33 * Functions and macros to manipulate netmap structures and packets 34 * in userspace. See netmap(4) for more information. 35 * 36 * The address of the struct netmap_if, say nifp, is computed from the 37 * value returned from ioctl(.., NIOCREG, ...) and the mmap region: 38 * ioctl(fd, NIOCREG, &req); 39 * mem = mmap(0, ... ); 40 * nifp = NETMAP_IF(mem, req.nr_nifp); 41 * (so simple, we could just do it manually) 42 * 43 * From there: 44 * struct netmap_ring *NETMAP_TXRING(nifp, index) 45 * struct netmap_ring *NETMAP_RXRING(nifp, index) 46 * we can access ring->cur, ring->head, ring->tail, etc. 47 * 48 * ring->slot[i] gives us the i-th slot (we can access 49 * directly len, flags, buf_idx) 50 * 51 * char *buf = NETMAP_BUF(ring, x) returns a pointer to 52 * the buffer numbered x 53 * 54 * All ring indexes (head, cur, tail) should always move forward. 55 * To compute the next index in a circular ring you can use 56 * i = nm_ring_next(ring, i); 57 * 58 * To ease porting apps from pcap to netmap we supply a few fuctions 59 * that can be called to open, close, read and write on netmap in a way 60 * similar to libpcap. Note that the read/write function depend on 61 * an ioctl()/select()/poll() being issued to refill rings or push 62 * packets out. 63 * 64 * In order to use these, include #define NETMAP_WITH_LIBS 65 * in the source file that invokes these functions. 66 */ 67 68 #ifndef _NET_NETMAP_USER_H_ 69 #define _NET_NETMAP_USER_H_ 70 71 #define NETMAP_DEVICE_NAME "/dev/netmap" 72 73 #ifdef __CYGWIN__ 74 /* 75 * we can compile userspace apps with either cygwin or msvc, 76 * and we use _WIN32 to identify windows specific code 77 */ 78 #ifndef _WIN32 79 #define _WIN32 80 #endif /* _WIN32 */ 81 82 #endif /* __CYGWIN__ */ 83 84 #ifdef _WIN32 85 #undef NETMAP_DEVICE_NAME 86 #define NETMAP_DEVICE_NAME "/proc/sys/DosDevices/Global/netmap" 87 #include <windows.h> 88 #include <WinDef.h> 89 #include <sys/cygwin.h> 90 #endif /* _WIN32 */ 91 92 #include <stdint.h> 93 #include <sys/socket.h> /* apple needs sockaddr */ 94 #include <net/if.h> /* IFNAMSIZ */ 95 #include <ctype.h> 96 97 #ifndef likely 98 #define likely(x) __builtin_expect(!!(x), 1) 99 #define unlikely(x) __builtin_expect(!!(x), 0) 100 #endif /* likely and unlikely */ 101 102 #include <net/netmap.h> 103 104 /* helper macro */ 105 #define _NETMAP_OFFSET(type, ptr, offset) \ 106 ((type)(void *)((char *)(ptr) + (offset))) 107 108 #define NETMAP_IF(_base, _ofs) _NETMAP_OFFSET(struct netmap_if *, _base, _ofs) 109 110 #define NETMAP_TXRING(nifp, index) _NETMAP_OFFSET(struct netmap_ring *, \ 111 nifp, (nifp)->ring_ofs[index] ) 112 113 #define NETMAP_RXRING(nifp, index) _NETMAP_OFFSET(struct netmap_ring *, \ 114 nifp, (nifp)->ring_ofs[index + (nifp)->ni_tx_rings + 1] ) 115 116 #define NETMAP_BUF(ring, index) \ 117 ((char *)(ring) + (ring)->buf_ofs + ((index)*(ring)->nr_buf_size)) 118 119 #define NETMAP_BUF_IDX(ring, buf) \ 120 ( ((char *)(buf) - ((char *)(ring) + (ring)->buf_ofs) ) / \ 121 (ring)->nr_buf_size ) 122 123 124 static inline uint32_t 125 nm_ring_next(struct netmap_ring *r, uint32_t i) 126 { 127 return ( unlikely(i + 1 == r->num_slots) ? 0 : i + 1); 128 } 129 130 131 /* 132 * Return 1 if we have pending transmissions in the tx ring. 133 * When everything is complete ring->head = ring->tail + 1 (modulo ring size) 134 */ 135 static inline int 136 nm_tx_pending(struct netmap_ring *r) 137 { 138 return nm_ring_next(r, r->tail) != r->head; 139 } 140 141 142 static inline uint32_t 143 nm_ring_space(struct netmap_ring *ring) 144 { 145 int ret = ring->tail - ring->cur; 146 if (ret < 0) 147 ret += ring->num_slots; 148 return ret; 149 } 150 151 152 #ifdef NETMAP_WITH_LIBS 153 /* 154 * Support for simple I/O libraries. 155 * Include other system headers required for compiling this. 156 */ 157 158 #ifndef HAVE_NETMAP_WITH_LIBS 159 #define HAVE_NETMAP_WITH_LIBS 160 161 #include <stdio.h> 162 #include <sys/time.h> 163 #include <sys/mman.h> 164 #include <string.h> /* memset */ 165 #include <sys/ioctl.h> 166 #include <sys/errno.h> /* EINVAL */ 167 #include <fcntl.h> /* O_RDWR */ 168 #include <unistd.h> /* close() */ 169 #include <signal.h> 170 #include <stdlib.h> 171 172 #ifndef ND /* debug macros */ 173 /* debug support */ 174 #define ND(_fmt, ...) do {} while(0) 175 #define D(_fmt, ...) \ 176 do { \ 177 struct timeval _t0; \ 178 gettimeofday(&_t0, NULL); \ 179 fprintf(stderr, "%03d.%06d %s [%d] " _fmt "\n", \ 180 (int)(_t0.tv_sec % 1000), (int)_t0.tv_usec, \ 181 __FUNCTION__, __LINE__, ##__VA_ARGS__); \ 182 } while (0) 183 184 /* Rate limited version of "D", lps indicates how many per second */ 185 #define RD(lps, format, ...) \ 186 do { \ 187 static int __t0, __cnt; \ 188 struct timeval __xxts; \ 189 gettimeofday(&__xxts, NULL); \ 190 if (__t0 != __xxts.tv_sec) { \ 191 __t0 = __xxts.tv_sec; \ 192 __cnt = 0; \ 193 } \ 194 if (__cnt++ < lps) { \ 195 D(format, ##__VA_ARGS__); \ 196 } \ 197 } while (0) 198 #endif 199 200 struct nm_pkthdr { /* first part is the same as pcap_pkthdr */ 201 struct timeval ts; 202 uint32_t caplen; 203 uint32_t len; 204 205 uint64_t flags; /* NM_MORE_PKTS etc */ 206 #define NM_MORE_PKTS 1 207 struct nm_desc *d; 208 struct netmap_slot *slot; 209 uint8_t *buf; 210 }; 211 212 struct nm_stat { /* same as pcap_stat */ 213 u_int ps_recv; 214 u_int ps_drop; 215 u_int ps_ifdrop; 216 #ifdef WIN32 /* XXX or _WIN32 ? */ 217 u_int bs_capt; 218 #endif /* WIN32 */ 219 }; 220 221 #define NM_ERRBUF_SIZE 512 222 223 struct nm_desc { 224 struct nm_desc *self; /* point to self if netmap. */ 225 int fd; 226 void *mem; 227 uint32_t memsize; 228 int done_mmap; /* set if mem is the result of mmap */ 229 struct netmap_if * const nifp; 230 uint16_t first_tx_ring, last_tx_ring, cur_tx_ring; 231 uint16_t first_rx_ring, last_rx_ring, cur_rx_ring; 232 struct nmreq req; /* also contains the nr_name = ifname */ 233 struct nm_pkthdr hdr; 234 235 /* 236 * The memory contains netmap_if, rings and then buffers. 237 * Given a pointer (e.g. to nm_inject) we can compare with 238 * mem/buf_start/buf_end to tell if it is a buffer or 239 * some other descriptor in our region. 240 * We also store a pointer to some ring as it helps in the 241 * translation from buffer indexes to addresses. 242 */ 243 struct netmap_ring * const some_ring; 244 void * const buf_start; 245 void * const buf_end; 246 /* parameters from pcap_open_live */ 247 int snaplen; 248 int promisc; 249 int to_ms; 250 char *errbuf; 251 252 /* save flags so we can restore them on close */ 253 uint32_t if_flags; 254 uint32_t if_reqcap; 255 uint32_t if_curcap; 256 257 struct nm_stat st; 258 char msg[NM_ERRBUF_SIZE]; 259 }; 260 261 /* 262 * when the descriptor is open correctly, d->self == d 263 * Eventually we should also use some magic number. 264 */ 265 #define P2NMD(p) ((struct nm_desc *)(p)) 266 #define IS_NETMAP_DESC(d) ((d) && P2NMD(d)->self == P2NMD(d)) 267 #define NETMAP_FD(d) (P2NMD(d)->fd) 268 269 270 /* 271 * this is a slightly optimized copy routine which rounds 272 * to multiple of 64 bytes and is often faster than dealing 273 * with other odd sizes. We assume there is enough room 274 * in the source and destination buffers. 275 */ 276 static inline void 277 nm_pkt_copy(const void *_src, void *_dst, int l) 278 { 279 const uint64_t *src = (const uint64_t *)_src; 280 uint64_t *dst = (uint64_t *)_dst; 281 282 if (unlikely(l >= 1024 || l % 64)) { 283 memcpy(dst, src, l); 284 return; 285 } 286 for (; likely(l > 0); l-=64) { 287 *dst++ = *src++; 288 *dst++ = *src++; 289 *dst++ = *src++; 290 *dst++ = *src++; 291 *dst++ = *src++; 292 *dst++ = *src++; 293 *dst++ = *src++; 294 *dst++ = *src++; 295 } 296 } 297 298 299 /* 300 * The callback, invoked on each received packet. Same as libpcap 301 */ 302 typedef void (*nm_cb_t)(u_char *, const struct nm_pkthdr *, const u_char *d); 303 304 /* 305 *--- the pcap-like API --- 306 * 307 * nm_open() opens a file descriptor, binds to a port and maps memory. 308 * 309 * ifname (netmap:foo or vale:foo) is the port name 310 * a suffix can indicate the follwing: 311 * ^ bind the host (sw) ring pair 312 * * bind host and NIC ring pairs 313 * -NN bind individual NIC ring pair 314 * {NN bind master side of pipe NN 315 * }NN bind slave side of pipe NN 316 * a suffix starting with / and the following flags, 317 * in any order: 318 * x exclusive access 319 * z zero copy monitor (both tx and rx) 320 * t monitor tx side (copy monitor) 321 * r monitor rx side (copy monitor) 322 * R bind only RX ring(s) 323 * T bind only TX ring(s) 324 * 325 * req provides the initial values of nmreq before parsing ifname. 326 * Remember that the ifname parsing will override the ring 327 * number in nm_ringid, and part of nm_flags; 328 * flags special functions, normally 0 329 * indicates which fields of *arg are significant 330 * arg special functions, normally NULL 331 * if passed a netmap_desc with mem != NULL, 332 * use that memory instead of mmap. 333 */ 334 335 static struct nm_desc *nm_open(const char *ifname, const struct nmreq *req, 336 uint64_t flags, const struct nm_desc *arg); 337 338 /* 339 * nm_open can import some fields from the parent descriptor. 340 * These flags control which ones. 341 * Also in flags you can specify NETMAP_NO_TX_POLL and NETMAP_DO_RX_POLL, 342 * which set the initial value for these flags. 343 * Note that the 16 low bits of the flags are reserved for data 344 * that may go into the nmreq. 345 */ 346 enum { 347 NM_OPEN_NO_MMAP = 0x040000, /* reuse mmap from parent */ 348 NM_OPEN_IFNAME = 0x080000, /* nr_name, nr_ringid, nr_flags */ 349 NM_OPEN_ARG1 = 0x100000, 350 NM_OPEN_ARG2 = 0x200000, 351 NM_OPEN_ARG3 = 0x400000, 352 NM_OPEN_RING_CFG = 0x800000, /* tx|rx rings|slots */ 353 }; 354 355 356 /* 357 * nm_close() closes and restores the port to its previous state 358 */ 359 360 static int nm_close(struct nm_desc *); 361 362 /* 363 * nm_mmap() do mmap or inherit from parent if the nr_arg2 364 * (memory block) matches. 365 */ 366 367 static int nm_mmap(struct nm_desc *, const struct nm_desc *); 368 369 /* 370 * nm_inject() is the same as pcap_inject() 371 * nm_dispatch() is the same as pcap_dispatch() 372 * nm_nextpkt() is the same as pcap_next() 373 */ 374 375 static int nm_inject(struct nm_desc *, const void *, size_t); 376 static int nm_dispatch(struct nm_desc *, int, nm_cb_t, u_char *); 377 static u_char *nm_nextpkt(struct nm_desc *, struct nm_pkthdr *); 378 379 #ifdef _WIN32 380 381 intptr_t _get_osfhandle(int); /* defined in io.h in windows */ 382 383 /* 384 * In windows we do not have yet native poll support, so we keep track 385 * of file descriptors associated to netmap ports to emulate poll on 386 * them and fall back on regular poll on other file descriptors. 387 */ 388 struct win_netmap_fd_list { 389 struct win_netmap_fd_list *next; 390 int win_netmap_fd; 391 HANDLE win_netmap_handle; 392 }; 393 394 /* 395 * list head containing all the netmap opened fd and their 396 * windows HANDLE counterparts 397 */ 398 static struct win_netmap_fd_list *win_netmap_fd_list_head; 399 400 static void 401 win_insert_fd_record(int fd) 402 { 403 struct win_netmap_fd_list *curr; 404 405 for (curr = win_netmap_fd_list_head; curr; curr = curr->next) { 406 if (fd == curr->win_netmap_fd) { 407 return; 408 } 409 } 410 curr = calloc(1, sizeof(*curr)); 411 curr->next = win_netmap_fd_list_head; 412 curr->win_netmap_fd = fd; 413 curr->win_netmap_handle = IntToPtr(_get_osfhandle(fd)); 414 win_netmap_fd_list_head = curr; 415 } 416 417 void 418 win_remove_fd_record(int fd) 419 { 420 struct win_netmap_fd_list *curr = win_netmap_fd_list_head; 421 struct win_netmap_fd_list *prev = NULL; 422 for (; curr ; prev = curr, curr = curr->next) { 423 if (fd != curr->win_netmap_fd) 424 continue; 425 /* found the entry */ 426 if (prev == NULL) { /* we are freeing the first entry */ 427 win_netmap_fd_list_head = curr->next; 428 } else { 429 prev->next = curr->next; 430 } 431 free(curr); 432 break; 433 } 434 } 435 436 437 HANDLE 438 win_get_netmap_handle(int fd) 439 { 440 struct win_netmap_fd_list *curr; 441 442 for (curr = win_netmap_fd_list_head; curr; curr = curr->next) { 443 if (fd == curr->win_netmap_fd) { 444 return curr->win_netmap_handle; 445 } 446 } 447 return NULL; 448 } 449 450 /* 451 * we need to wrap ioctl and mmap, at least for the netmap file descriptors 452 */ 453 454 /* 455 * use this function only from netmap_user.h internal functions 456 * same as ioctl, returns 0 on success and -1 on error 457 */ 458 static int 459 win_nm_ioctl_internal(HANDLE h, int32_t ctlCode, void *arg) 460 { 461 DWORD bReturn = 0, szIn, szOut; 462 BOOL ioctlReturnStatus; 463 void *inParam = arg, *outParam = arg; 464 465 switch (ctlCode) { 466 case NETMAP_POLL: 467 szIn = sizeof(POLL_REQUEST_DATA); 468 szOut = sizeof(POLL_REQUEST_DATA); 469 break; 470 case NETMAP_MMAP: 471 szIn = 0; 472 szOut = sizeof(void*); 473 inParam = NULL; /* nothing on input */ 474 break; 475 case NIOCTXSYNC: 476 case NIOCRXSYNC: 477 szIn = 0; 478 szOut = 0; 479 break; 480 case NIOCREGIF: 481 szIn = sizeof(struct nmreq); 482 szOut = sizeof(struct nmreq); 483 break; 484 case NIOCCONFIG: 485 D("unsupported NIOCCONFIG!"); 486 return -1; 487 488 default: /* a regular ioctl */ 489 D("invalid ioctl %x on netmap fd", ctlCode); 490 return -1; 491 } 492 493 ioctlReturnStatus = DeviceIoControl(h, 494 ctlCode, inParam, szIn, 495 outParam, szOut, 496 &bReturn, NULL); 497 // XXX note windows returns 0 on error or async call, 1 on success 498 // we could call GetLastError() to figure out what happened 499 return ioctlReturnStatus ? 0 : -1; 500 } 501 502 /* 503 * this function is what must be called from user-space programs 504 * same as ioctl, returns 0 on success and -1 on error 505 */ 506 static int 507 win_nm_ioctl(int fd, int32_t ctlCode, void *arg) 508 { 509 HANDLE h = win_get_netmap_handle(fd); 510 511 if (h == NULL) { 512 return ioctl(fd, ctlCode, arg); 513 } else { 514 return win_nm_ioctl_internal(h, ctlCode, arg); 515 } 516 } 517 518 #define ioctl win_nm_ioctl /* from now on, within this file ... */ 519 520 /* 521 * We cannot use the native mmap on windows 522 * The only parameter used is "fd", the other ones are just declared to 523 * make this signature comparable to the FreeBSD/Linux one 524 */ 525 static void * 526 win32_mmap_emulated(void *addr, size_t length, int prot, int flags, int fd, int32_t offset) 527 { 528 HANDLE h = win_get_netmap_handle(fd); 529 530 if (h == NULL) { 531 return mmap(addr, length, prot, flags, fd, offset); 532 } else { 533 MEMORY_ENTRY ret; 534 535 return win_nm_ioctl_internal(h, NETMAP_MMAP, &ret) ? 536 NULL : ret.pUsermodeVirtualAddress; 537 } 538 } 539 540 #define mmap win32_mmap_emulated 541 542 #include <sys/poll.h> /* XXX needed to use the structure pollfd */ 543 544 static int 545 win_nm_poll(struct pollfd *fds, int nfds, int timeout) 546 { 547 HANDLE h; 548 549 if (nfds != 1 || fds == NULL || (h = win_get_netmap_handle(fds->fd)) == NULL) {; 550 return poll(fds, nfds, timeout); 551 } else { 552 POLL_REQUEST_DATA prd; 553 554 prd.timeout = timeout; 555 prd.events = fds->events; 556 557 win_nm_ioctl_internal(h, NETMAP_POLL, &prd); 558 if ((prd.revents == POLLERR) || (prd.revents == STATUS_TIMEOUT)) { 559 return -1; 560 } 561 return 1; 562 } 563 } 564 565 #define poll win_nm_poll 566 567 static int 568 win_nm_open(char* pathname, int flags) 569 { 570 571 if (strcmp(pathname, NETMAP_DEVICE_NAME) == 0) { 572 int fd = open(NETMAP_DEVICE_NAME, O_RDWR); 573 if (fd < 0) { 574 return -1; 575 } 576 577 win_insert_fd_record(fd); 578 return fd; 579 } else { 580 return open(pathname, flags); 581 } 582 } 583 584 #define open win_nm_open 585 586 static int 587 win_nm_close(int fd) 588 { 589 if (fd != -1) { 590 close(fd); 591 if (win_get_netmap_handle(fd) != NULL) { 592 win_remove_fd_record(fd); 593 } 594 } 595 return 0; 596 } 597 598 #define close win_nm_close 599 600 #endif /* _WIN32 */ 601 602 static int 603 nm_is_identifier(const char *s, const char *e) 604 { 605 for (; s != e; s++) { 606 if (!isalnum(*s) && *s != '_') { 607 return 0; 608 } 609 } 610 611 return 1; 612 } 613 614 #define MAXERRMSG 80 615 static int 616 nm_parse(const char *ifname, struct nm_desc *d, char *err) 617 { 618 int is_vale; 619 const char *port = NULL; 620 const char *vpname = NULL; 621 u_int namelen; 622 uint32_t nr_ringid = 0, nr_flags; 623 char errmsg[MAXERRMSG] = ""; 624 long num; 625 uint16_t nr_arg2 = 0; 626 enum { P_START, P_RNGSFXOK, P_GETNUM, P_FLAGS, P_FLAGSOK, P_MEMID } p_state; 627 628 errno = 0; 629 630 is_vale = (ifname[0] == 'v'); 631 if (is_vale) { 632 port = index(ifname, ':'); 633 if (port == NULL) { 634 snprintf(errmsg, MAXERRMSG, 635 "missing ':' in vale name"); 636 goto fail; 637 } 638 639 if (!nm_is_identifier(ifname + 4, port)) { 640 snprintf(errmsg, MAXERRMSG, "invalid bridge name"); 641 goto fail; 642 } 643 644 vpname = ++port; 645 } else { 646 ifname += 7; 647 port = ifname; 648 } 649 650 /* scan for a separator */ 651 for (; *port && !index("-*^{}/@", *port); port++) 652 ; 653 654 if (is_vale && !nm_is_identifier(vpname, port)) { 655 snprintf(errmsg, MAXERRMSG, "invalid bridge port name"); 656 goto fail; 657 } 658 659 namelen = port - ifname; 660 if (namelen >= sizeof(d->req.nr_name)) { 661 snprintf(errmsg, MAXERRMSG, "name too long"); 662 goto fail; 663 } 664 memcpy(d->req.nr_name, ifname, namelen); 665 d->req.nr_name[namelen] = '\0'; 666 667 p_state = P_START; 668 nr_flags = NR_REG_ALL_NIC; /* default for no suffix */ 669 while (*port) { 670 switch (p_state) { 671 case P_START: 672 switch (*port) { 673 case '^': /* only SW ring */ 674 nr_flags = NR_REG_SW; 675 p_state = P_RNGSFXOK; 676 break; 677 case '*': /* NIC and SW */ 678 nr_flags = NR_REG_NIC_SW; 679 p_state = P_RNGSFXOK; 680 break; 681 case '-': /* one NIC ring pair */ 682 nr_flags = NR_REG_ONE_NIC; 683 p_state = P_GETNUM; 684 break; 685 case '{': /* pipe (master endpoint) */ 686 nr_flags = NR_REG_PIPE_MASTER; 687 p_state = P_GETNUM; 688 break; 689 case '}': /* pipe (slave endoint) */ 690 nr_flags = NR_REG_PIPE_SLAVE; 691 p_state = P_GETNUM; 692 break; 693 case '/': /* start of flags */ 694 p_state = P_FLAGS; 695 break; 696 case '@': /* start of memid */ 697 p_state = P_MEMID; 698 break; 699 default: 700 snprintf(errmsg, MAXERRMSG, "unknown modifier: '%c'", *port); 701 goto fail; 702 } 703 port++; 704 break; 705 case P_RNGSFXOK: 706 switch (*port) { 707 case '/': 708 p_state = P_FLAGS; 709 break; 710 case '@': 711 p_state = P_MEMID; 712 break; 713 default: 714 snprintf(errmsg, MAXERRMSG, "unexpected character: '%c'", *port); 715 goto fail; 716 } 717 port++; 718 break; 719 case P_GETNUM: 720 num = strtol(port, (char **)&port, 10); 721 if (num < 0 || num >= NETMAP_RING_MASK) { 722 snprintf(errmsg, MAXERRMSG, "'%ld' out of range [0, %d)", 723 num, NETMAP_RING_MASK); 724 goto fail; 725 } 726 nr_ringid = num & NETMAP_RING_MASK; 727 p_state = P_RNGSFXOK; 728 break; 729 case P_FLAGS: 730 case P_FLAGSOK: 731 if (*port == '@') { 732 port++; 733 p_state = P_MEMID; 734 break; 735 } 736 switch (*port) { 737 case 'x': 738 nr_flags |= NR_EXCLUSIVE; 739 break; 740 case 'z': 741 nr_flags |= NR_ZCOPY_MON; 742 break; 743 case 't': 744 nr_flags |= NR_MONITOR_TX; 745 break; 746 case 'r': 747 nr_flags |= NR_MONITOR_RX; 748 break; 749 case 'R': 750 nr_flags |= NR_RX_RINGS_ONLY; 751 break; 752 case 'T': 753 nr_flags |= NR_TX_RINGS_ONLY; 754 break; 755 default: 756 snprintf(errmsg, MAXERRMSG, "unrecognized flag: '%c'", *port); 757 goto fail; 758 } 759 port++; 760 p_state = P_FLAGSOK; 761 break; 762 case P_MEMID: 763 if (nr_arg2 != 0) { 764 snprintf(errmsg, MAXERRMSG, "double setting of memid"); 765 goto fail; 766 } 767 num = strtol(port, (char **)&port, 10); 768 if (num <= 0) { 769 snprintf(errmsg, MAXERRMSG, "invalid memid %ld, must be >0", num); 770 goto fail; 771 } 772 nr_arg2 = num; 773 p_state = P_RNGSFXOK; 774 break; 775 } 776 } 777 if (p_state != P_START && p_state != P_RNGSFXOK && p_state != P_FLAGSOK) { 778 snprintf(errmsg, MAXERRMSG, "unexpected end of port name"); 779 goto fail; 780 } 781 ND("flags: %s %s %s %s", 782 (nr_flags & NR_EXCLUSIVE) ? "EXCLUSIVE" : "", 783 (nr_flags & NR_ZCOPY_MON) ? "ZCOPY_MON" : "", 784 (nr_flags & NR_MONITOR_TX) ? "MONITOR_TX" : "", 785 (nr_flags & NR_MONITOR_RX) ? "MONITOR_RX" : ""); 786 787 d->req.nr_flags |= nr_flags; 788 d->req.nr_ringid |= nr_ringid; 789 d->req.nr_arg2 = nr_arg2; 790 791 d->self = d; 792 793 return 0; 794 fail: 795 if (!errno) 796 errno = EINVAL; 797 if (err) 798 strncpy(err, errmsg, MAXERRMSG); 799 return -1; 800 } 801 802 /* 803 * Try to open, return descriptor if successful, NULL otherwise. 804 * An invalid netmap name will return errno = 0; 805 * You can pass a pointer to a pre-filled nm_desc to add special 806 * parameters. Flags is used as follows 807 * NM_OPEN_NO_MMAP use the memory from arg, only XXX avoid mmap 808 * if the nr_arg2 (memory block) matches. 809 * NM_OPEN_ARG1 use req.nr_arg1 from arg 810 * NM_OPEN_ARG2 use req.nr_arg2 from arg 811 * NM_OPEN_RING_CFG user ring config from arg 812 */ 813 static struct nm_desc * 814 nm_open(const char *ifname, const struct nmreq *req, 815 uint64_t new_flags, const struct nm_desc *arg) 816 { 817 struct nm_desc *d = NULL; 818 const struct nm_desc *parent = arg; 819 char errmsg[MAXERRMSG] = ""; 820 uint32_t nr_reg; 821 822 if (strncmp(ifname, "netmap:", 7) && 823 strncmp(ifname, NM_BDG_NAME, strlen(NM_BDG_NAME))) { 824 errno = 0; /* name not recognised, not an error */ 825 return NULL; 826 } 827 828 d = (struct nm_desc *)calloc(1, sizeof(*d)); 829 if (d == NULL) { 830 snprintf(errmsg, MAXERRMSG, "nm_desc alloc failure"); 831 errno = ENOMEM; 832 return NULL; 833 } 834 d->self = d; /* set this early so nm_close() works */ 835 d->fd = open(NETMAP_DEVICE_NAME, O_RDWR); 836 if (d->fd < 0) { 837 snprintf(errmsg, MAXERRMSG, "cannot open /dev/netmap: %s", strerror(errno)); 838 goto fail; 839 } 840 841 if (req) 842 d->req = *req; 843 844 if (!(new_flags & NM_OPEN_IFNAME)) { 845 if (nm_parse(ifname, d, errmsg) < 0) 846 goto fail; 847 } 848 849 d->req.nr_version = NETMAP_API; 850 d->req.nr_ringid &= NETMAP_RING_MASK; 851 852 /* optionally import info from parent */ 853 if (IS_NETMAP_DESC(parent) && new_flags) { 854 if (new_flags & NM_OPEN_ARG1) 855 D("overriding ARG1 %d", parent->req.nr_arg1); 856 d->req.nr_arg1 = new_flags & NM_OPEN_ARG1 ? 857 parent->req.nr_arg1 : 4; 858 if (new_flags & NM_OPEN_ARG2) { 859 D("overriding ARG2 %d", parent->req.nr_arg2); 860 d->req.nr_arg2 = parent->req.nr_arg2; 861 } 862 if (new_flags & NM_OPEN_ARG3) 863 D("overriding ARG3 %d", parent->req.nr_arg3); 864 d->req.nr_arg3 = new_flags & NM_OPEN_ARG3 ? 865 parent->req.nr_arg3 : 0; 866 if (new_flags & NM_OPEN_RING_CFG) { 867 D("overriding RING_CFG"); 868 d->req.nr_tx_slots = parent->req.nr_tx_slots; 869 d->req.nr_rx_slots = parent->req.nr_rx_slots; 870 d->req.nr_tx_rings = parent->req.nr_tx_rings; 871 d->req.nr_rx_rings = parent->req.nr_rx_rings; 872 } 873 if (new_flags & NM_OPEN_IFNAME) { 874 D("overriding ifname %s ringid 0x%x flags 0x%x", 875 parent->req.nr_name, parent->req.nr_ringid, 876 parent->req.nr_flags); 877 memcpy(d->req.nr_name, parent->req.nr_name, 878 sizeof(d->req.nr_name)); 879 d->req.nr_ringid = parent->req.nr_ringid; 880 d->req.nr_flags = parent->req.nr_flags; 881 } 882 } 883 /* add the *XPOLL flags */ 884 d->req.nr_ringid |= new_flags & (NETMAP_NO_TX_POLL | NETMAP_DO_RX_POLL); 885 886 if (ioctl(d->fd, NIOCREGIF, &d->req)) { 887 snprintf(errmsg, MAXERRMSG, "NIOCREGIF failed: %s", strerror(errno)); 888 goto fail; 889 } 890 891 nr_reg = d->req.nr_flags & NR_REG_MASK; 892 893 if (nr_reg == NR_REG_SW) { /* host stack */ 894 d->first_tx_ring = d->last_tx_ring = d->req.nr_tx_rings; 895 d->first_rx_ring = d->last_rx_ring = d->req.nr_rx_rings; 896 } else if (nr_reg == NR_REG_ALL_NIC) { /* only nic */ 897 d->first_tx_ring = 0; 898 d->first_rx_ring = 0; 899 d->last_tx_ring = d->req.nr_tx_rings - 1; 900 d->last_rx_ring = d->req.nr_rx_rings - 1; 901 } else if (nr_reg == NR_REG_NIC_SW) { 902 d->first_tx_ring = 0; 903 d->first_rx_ring = 0; 904 d->last_tx_ring = d->req.nr_tx_rings; 905 d->last_rx_ring = d->req.nr_rx_rings; 906 } else if (nr_reg == NR_REG_ONE_NIC) { 907 /* XXX check validity */ 908 d->first_tx_ring = d->last_tx_ring = 909 d->first_rx_ring = d->last_rx_ring = d->req.nr_ringid & NETMAP_RING_MASK; 910 } else { /* pipes */ 911 d->first_tx_ring = d->last_tx_ring = 0; 912 d->first_rx_ring = d->last_rx_ring = 0; 913 } 914 915 /* if parent is defined, do nm_mmap() even if NM_OPEN_NO_MMAP is set */ 916 if ((!(new_flags & NM_OPEN_NO_MMAP) || parent) && nm_mmap(d, parent)) { 917 snprintf(errmsg, MAXERRMSG, "mmap failed: %s", strerror(errno)); 918 goto fail; 919 } 920 921 922 #ifdef DEBUG_NETMAP_USER 923 { /* debugging code */ 924 int i; 925 926 D("%s tx %d .. %d %d rx %d .. %d %d", ifname, 927 d->first_tx_ring, d->last_tx_ring, d->req.nr_tx_rings, 928 d->first_rx_ring, d->last_rx_ring, d->req.nr_rx_rings); 929 for (i = 0; i <= d->req.nr_tx_rings; i++) { 930 struct netmap_ring *r = NETMAP_TXRING(d->nifp, i); 931 D("TX%d %p h %d c %d t %d", i, r, r->head, r->cur, r->tail); 932 } 933 for (i = 0; i <= d->req.nr_rx_rings; i++) { 934 struct netmap_ring *r = NETMAP_RXRING(d->nifp, i); 935 D("RX%d %p h %d c %d t %d", i, r, r->head, r->cur, r->tail); 936 } 937 } 938 #endif /* debugging */ 939 940 d->cur_tx_ring = d->first_tx_ring; 941 d->cur_rx_ring = d->first_rx_ring; 942 return d; 943 944 fail: 945 nm_close(d); 946 if (errmsg[0]) 947 D("%s %s", errmsg, ifname); 948 if (errno == 0) 949 errno = EINVAL; 950 return NULL; 951 } 952 953 954 static int 955 nm_close(struct nm_desc *d) 956 { 957 /* 958 * ugly trick to avoid unused warnings 959 */ 960 static void *__xxzt[] __attribute__ ((unused)) = 961 { (void *)nm_open, (void *)nm_inject, 962 (void *)nm_dispatch, (void *)nm_nextpkt } ; 963 964 if (d == NULL || d->self != d) 965 return EINVAL; 966 if (d->done_mmap && d->mem) 967 munmap(d->mem, d->memsize); 968 if (d->fd != -1) { 969 close(d->fd); 970 } 971 972 bzero(d, sizeof(*d)); 973 free(d); 974 return 0; 975 } 976 977 978 static int 979 nm_mmap(struct nm_desc *d, const struct nm_desc *parent) 980 { 981 //XXX TODO: check if mmap is already done 982 983 if (IS_NETMAP_DESC(parent) && parent->mem && 984 parent->req.nr_arg2 == d->req.nr_arg2) { 985 /* do not mmap, inherit from parent */ 986 D("do not mmap, inherit from parent"); 987 d->memsize = parent->memsize; 988 d->mem = parent->mem; 989 } else { 990 /* XXX TODO: check if memsize is too large (or there is overflow) */ 991 d->memsize = d->req.nr_memsize; 992 d->mem = mmap(0, d->memsize, PROT_WRITE | PROT_READ, MAP_SHARED, 993 d->fd, 0); 994 if (d->mem == MAP_FAILED) { 995 goto fail; 996 } 997 d->done_mmap = 1; 998 } 999 { 1000 struct netmap_if *nifp = NETMAP_IF(d->mem, d->req.nr_offset); 1001 struct netmap_ring *r = NETMAP_RXRING(nifp, d->first_rx_ring); 1002 if ((void *)r == (void *)nifp) { 1003 /* the descriptor is open for TX only */ 1004 r = NETMAP_TXRING(nifp, d->first_tx_ring); 1005 } 1006 1007 *(struct netmap_if **)(uintptr_t)&(d->nifp) = nifp; 1008 *(struct netmap_ring **)(uintptr_t)&d->some_ring = r; 1009 *(void **)(uintptr_t)&d->buf_start = NETMAP_BUF(r, 0); 1010 *(void **)(uintptr_t)&d->buf_end = 1011 (char *)d->mem + d->memsize; 1012 } 1013 1014 return 0; 1015 1016 fail: 1017 return EINVAL; 1018 } 1019 1020 /* 1021 * Same prototype as pcap_inject(), only need to cast. 1022 */ 1023 static int 1024 nm_inject(struct nm_desc *d, const void *buf, size_t size) 1025 { 1026 u_int c, n = d->last_tx_ring - d->first_tx_ring + 1, 1027 ri = d->cur_tx_ring; 1028 1029 for (c = 0; c < n ; c++, ri++) { 1030 /* compute current ring to use */ 1031 struct netmap_ring *ring; 1032 uint32_t i, idx; 1033 1034 if (ri > d->last_tx_ring) 1035 ri = d->first_tx_ring; 1036 ring = NETMAP_TXRING(d->nifp, ri); 1037 if (nm_ring_empty(ring)) { 1038 continue; 1039 } 1040 i = ring->cur; 1041 idx = ring->slot[i].buf_idx; 1042 ring->slot[i].len = size; 1043 nm_pkt_copy(buf, NETMAP_BUF(ring, idx), size); 1044 d->cur_tx_ring = ri; 1045 ring->head = ring->cur = nm_ring_next(ring, i); 1046 return size; 1047 } 1048 return 0; /* fail */ 1049 } 1050 1051 1052 /* 1053 * Same prototype as pcap_dispatch(), only need to cast. 1054 */ 1055 static int 1056 nm_dispatch(struct nm_desc *d, int cnt, nm_cb_t cb, u_char *arg) 1057 { 1058 int n = d->last_rx_ring - d->first_rx_ring + 1; 1059 int c, got = 0, ri = d->cur_rx_ring; 1060 d->hdr.buf = NULL; 1061 d->hdr.flags = NM_MORE_PKTS; 1062 d->hdr.d = d; 1063 1064 if (cnt == 0) 1065 cnt = -1; 1066 /* cnt == -1 means infinite, but rings have a finite amount 1067 * of buffers and the int is large enough that we never wrap, 1068 * so we can omit checking for -1 1069 */ 1070 for (c=0; c < n && cnt != got; c++, ri++) { 1071 /* compute current ring to use */ 1072 struct netmap_ring *ring; 1073 1074 if (ri > d->last_rx_ring) 1075 ri = d->first_rx_ring; 1076 ring = NETMAP_RXRING(d->nifp, ri); 1077 for ( ; !nm_ring_empty(ring) && cnt != got; got++) { 1078 u_int idx, i; 1079 if (d->hdr.buf) { /* from previous round */ 1080 cb(arg, &d->hdr, d->hdr.buf); 1081 } 1082 i = ring->cur; 1083 idx = ring->slot[i].buf_idx; 1084 /* d->cur_rx_ring doesn't change inside this loop, but 1085 * set it here, so it reflects d->hdr.buf's ring */ 1086 d->cur_rx_ring = ri; 1087 d->hdr.slot = &ring->slot[i]; 1088 d->hdr.buf = (u_char *)NETMAP_BUF(ring, idx); 1089 // __builtin_prefetch(buf); 1090 d->hdr.len = d->hdr.caplen = ring->slot[i].len; 1091 d->hdr.ts = ring->ts; 1092 ring->head = ring->cur = nm_ring_next(ring, i); 1093 } 1094 } 1095 if (d->hdr.buf) { /* from previous round */ 1096 d->hdr.flags = 0; 1097 cb(arg, &d->hdr, d->hdr.buf); 1098 } 1099 return got; 1100 } 1101 1102 static u_char * 1103 nm_nextpkt(struct nm_desc *d, struct nm_pkthdr *hdr) 1104 { 1105 int ri = d->cur_rx_ring; 1106 1107 do { 1108 /* compute current ring to use */ 1109 struct netmap_ring *ring = NETMAP_RXRING(d->nifp, ri); 1110 if (!nm_ring_empty(ring)) { 1111 u_int i = ring->cur; 1112 u_int idx = ring->slot[i].buf_idx; 1113 u_char *buf = (u_char *)NETMAP_BUF(ring, idx); 1114 1115 // __builtin_prefetch(buf); 1116 hdr->ts = ring->ts; 1117 hdr->len = hdr->caplen = ring->slot[i].len; 1118 ring->cur = nm_ring_next(ring, i); 1119 /* we could postpone advancing head if we want 1120 * to hold the buffer. This can be supported in 1121 * the future. 1122 */ 1123 ring->head = ring->cur; 1124 d->cur_rx_ring = ri; 1125 return buf; 1126 } 1127 ri++; 1128 if (ri > d->last_rx_ring) 1129 ri = d->first_rx_ring; 1130 } while (ri != d->cur_rx_ring); 1131 return NULL; /* nothing found */ 1132 } 1133 1134 #endif /* !HAVE_NETMAP_WITH_LIBS */ 1135 1136 #endif /* NETMAP_WITH_LIBS */ 1137 1138 #endif /* _NET_NETMAP_USER_H_ */ 1139