1 /*- 2 * SPDX-License-Identifier: BSD-2-Clause-FreeBSD 3 * 4 * Copyright (C) 2011-2016 Universita` di Pisa 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 18 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 20 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 21 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 23 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 27 * SUCH DAMAGE. 28 */ 29 30 /* 31 * $FreeBSD$ 32 * 33 * Functions and macros to manipulate netmap structures and packets 34 * in userspace. See netmap(4) for more information. 35 * 36 * The address of the struct netmap_if, say nifp, is computed from the 37 * value returned from ioctl(.., NIOCREG, ...) and the mmap region: 38 * ioctl(fd, NIOCREG, &req); 39 * mem = mmap(0, ... ); 40 * nifp = NETMAP_IF(mem, req.nr_nifp); 41 * (so simple, we could just do it manually) 42 * 43 * From there: 44 * struct netmap_ring *NETMAP_TXRING(nifp, index) 45 * struct netmap_ring *NETMAP_RXRING(nifp, index) 46 * we can access ring->cur, ring->head, ring->tail, etc. 47 * 48 * ring->slot[i] gives us the i-th slot (we can access 49 * directly len, flags, buf_idx) 50 * 51 * char *buf = NETMAP_BUF(ring, x) returns a pointer to 52 * the buffer numbered x 53 * 54 * All ring indexes (head, cur, tail) should always move forward. 55 * To compute the next index in a circular ring you can use 56 * i = nm_ring_next(ring, i); 57 * 58 * To ease porting apps from pcap to netmap we supply a few fuctions 59 * that can be called to open, close, read and write on netmap in a way 60 * similar to libpcap. Note that the read/write function depend on 61 * an ioctl()/select()/poll() being issued to refill rings or push 62 * packets out. 63 * 64 * In order to use these, include #define NETMAP_WITH_LIBS 65 * in the source file that invokes these functions. 66 */ 67 68 #ifndef _NET_NETMAP_USER_H_ 69 #define _NET_NETMAP_USER_H_ 70 71 #define NETMAP_DEVICE_NAME "/dev/netmap" 72 73 #ifdef __CYGWIN__ 74 /* 75 * we can compile userspace apps with either cygwin or msvc, 76 * and we use _WIN32 to identify windows specific code 77 */ 78 #ifndef _WIN32 79 #define _WIN32 80 #endif /* _WIN32 */ 81 82 #endif /* __CYGWIN__ */ 83 84 #ifdef _WIN32 85 #undef NETMAP_DEVICE_NAME 86 #define NETMAP_DEVICE_NAME "/proc/sys/DosDevices/Global/netmap" 87 #include <windows.h> 88 #include <WinDef.h> 89 #include <sys/cygwin.h> 90 #endif /* _WIN32 */ 91 92 #include <stdint.h> 93 #include <sys/socket.h> /* apple needs sockaddr */ 94 #include <net/if.h> /* IFNAMSIZ */ 95 #include <ctype.h> 96 #include <string.h> /* memset */ 97 #include <sys/time.h> /* gettimeofday */ 98 99 #ifndef likely 100 #define likely(x) __builtin_expect(!!(x), 1) 101 #define unlikely(x) __builtin_expect(!!(x), 0) 102 #endif /* likely and unlikely */ 103 104 #include <net/netmap.h> 105 106 /* helper macro */ 107 #define _NETMAP_OFFSET(type, ptr, offset) \ 108 ((type)(void *)((char *)(ptr) + (offset))) 109 110 #define NETMAP_IF(_base, _ofs) _NETMAP_OFFSET(struct netmap_if *, _base, _ofs) 111 112 #define NETMAP_TXRING(nifp, index) _NETMAP_OFFSET(struct netmap_ring *, \ 113 nifp, (nifp)->ring_ofs[index] ) 114 115 #define NETMAP_RXRING(nifp, index) _NETMAP_OFFSET(struct netmap_ring *, \ 116 nifp, (nifp)->ring_ofs[index + (nifp)->ni_tx_rings + \ 117 (nifp)->ni_host_tx_rings] ) 118 119 #define NETMAP_BUF(ring, index) \ 120 ((char *)(ring) + (ring)->buf_ofs + ((size_t)(index)*(ring)->nr_buf_size)) 121 122 #define NETMAP_BUF_IDX(ring, buf) \ 123 ( ((char *)(buf) - ((char *)(ring) + (ring)->buf_ofs) ) / \ 124 (ring)->nr_buf_size ) 125 126 127 static inline uint32_t 128 nm_ring_next(struct netmap_ring *r, uint32_t i) 129 { 130 return ( unlikely(i + 1 == r->num_slots) ? 0 : i + 1); 131 } 132 133 134 /* 135 * Return 1 if we have pending transmissions in the tx ring. 136 * When everything is complete ring->head = ring->tail + 1 (modulo ring size) 137 */ 138 static inline int 139 nm_tx_pending(struct netmap_ring *r) 140 { 141 return nm_ring_next(r, r->tail) != r->head; 142 } 143 144 /* Compute the number of slots available in the netmap ring. We use 145 * ring->head as explained in the comment above nm_ring_empty(). */ 146 static inline uint32_t 147 nm_ring_space(struct netmap_ring *ring) 148 { 149 int ret = ring->tail - ring->head; 150 if (ret < 0) 151 ret += ring->num_slots; 152 return ret; 153 } 154 155 #ifndef ND /* debug macros */ 156 /* debug support */ 157 #define ND(_fmt, ...) do {} while(0) 158 #define D(_fmt, ...) \ 159 do { \ 160 struct timeval _t0; \ 161 gettimeofday(&_t0, NULL); \ 162 fprintf(stderr, "%03d.%06d %s [%d] " _fmt "\n", \ 163 (int)(_t0.tv_sec % 1000), (int)_t0.tv_usec, \ 164 __FUNCTION__, __LINE__, ##__VA_ARGS__); \ 165 } while (0) 166 167 /* Rate limited version of "D", lps indicates how many per second */ 168 #define RD(lps, format, ...) \ 169 do { \ 170 static int __t0, __cnt; \ 171 struct timeval __xxts; \ 172 gettimeofday(&__xxts, NULL); \ 173 if (__t0 != __xxts.tv_sec) { \ 174 __t0 = __xxts.tv_sec; \ 175 __cnt = 0; \ 176 } \ 177 if (__cnt++ < lps) { \ 178 D(format, ##__VA_ARGS__); \ 179 } \ 180 } while (0) 181 #endif 182 183 /* 184 * this is a slightly optimized copy routine which rounds 185 * to multiple of 64 bytes and is often faster than dealing 186 * with other odd sizes. We assume there is enough room 187 * in the source and destination buffers. 188 */ 189 static inline void 190 nm_pkt_copy(const void *_src, void *_dst, int l) 191 { 192 const uint64_t *src = (const uint64_t *)_src; 193 uint64_t *dst = (uint64_t *)_dst; 194 195 if (unlikely(l >= 1024 || l % 64)) { 196 memcpy(dst, src, l); 197 return; 198 } 199 for (; likely(l > 0); l-=64) { 200 *dst++ = *src++; 201 *dst++ = *src++; 202 *dst++ = *src++; 203 *dst++ = *src++; 204 *dst++ = *src++; 205 *dst++ = *src++; 206 *dst++ = *src++; 207 *dst++ = *src++; 208 } 209 } 210 211 #ifdef NETMAP_WITH_LIBS 212 /* 213 * Support for simple I/O libraries. 214 * Include other system headers required for compiling this. 215 */ 216 217 #ifndef HAVE_NETMAP_WITH_LIBS 218 #define HAVE_NETMAP_WITH_LIBS 219 220 #include <stdio.h> 221 #include <sys/time.h> 222 #include <sys/mman.h> 223 #include <sys/ioctl.h> 224 #include <sys/errno.h> /* EINVAL */ 225 #include <fcntl.h> /* O_RDWR */ 226 #include <unistd.h> /* close() */ 227 #include <signal.h> 228 #include <stdlib.h> 229 230 struct nm_pkthdr { /* first part is the same as pcap_pkthdr */ 231 struct timeval ts; 232 uint32_t caplen; 233 uint32_t len; 234 235 uint64_t flags; /* NM_MORE_PKTS etc */ 236 #define NM_MORE_PKTS 1 237 struct nm_desc *d; 238 struct netmap_slot *slot; 239 uint8_t *buf; 240 }; 241 242 struct nm_stat { /* same as pcap_stat */ 243 u_int ps_recv; 244 u_int ps_drop; 245 u_int ps_ifdrop; 246 #ifdef WIN32 /* XXX or _WIN32 ? */ 247 u_int bs_capt; 248 #endif /* WIN32 */ 249 }; 250 251 #define NM_ERRBUF_SIZE 512 252 253 struct nm_desc { 254 struct nm_desc *self; /* point to self if netmap. */ 255 int fd; 256 void *mem; 257 size_t memsize; 258 int done_mmap; /* set if mem is the result of mmap */ 259 struct netmap_if * const nifp; 260 uint16_t first_tx_ring, last_tx_ring, cur_tx_ring; 261 uint16_t first_rx_ring, last_rx_ring, cur_rx_ring; 262 struct nmreq req; /* also contains the nr_name = ifname */ 263 struct nm_pkthdr hdr; 264 265 /* 266 * The memory contains netmap_if, rings and then buffers. 267 * Given a pointer (e.g. to nm_inject) we can compare with 268 * mem/buf_start/buf_end to tell if it is a buffer or 269 * some other descriptor in our region. 270 * We also store a pointer to some ring as it helps in the 271 * translation from buffer indexes to addresses. 272 */ 273 struct netmap_ring * const some_ring; 274 void * const buf_start; 275 void * const buf_end; 276 /* parameters from pcap_open_live */ 277 int snaplen; 278 int promisc; 279 int to_ms; 280 char *errbuf; 281 282 /* save flags so we can restore them on close */ 283 uint32_t if_flags; 284 uint32_t if_reqcap; 285 uint32_t if_curcap; 286 287 struct nm_stat st; 288 char msg[NM_ERRBUF_SIZE]; 289 }; 290 291 /* 292 * when the descriptor is open correctly, d->self == d 293 * Eventually we should also use some magic number. 294 */ 295 #define P2NMD(p) ((struct nm_desc *)(p)) 296 #define IS_NETMAP_DESC(d) ((d) && P2NMD(d)->self == P2NMD(d)) 297 #define NETMAP_FD(d) (P2NMD(d)->fd) 298 299 300 301 302 /* 303 * The callback, invoked on each received packet. Same as libpcap 304 */ 305 typedef void (*nm_cb_t)(u_char *, const struct nm_pkthdr *, const u_char *d); 306 307 /* 308 *--- the pcap-like API --- 309 * 310 * nm_open() opens a file descriptor, binds to a port and maps memory. 311 * 312 * ifname (netmap:foo or vale:foo) is the port name 313 * a suffix can indicate the follwing: 314 * ^ bind the host (sw) ring pair 315 * * bind host and NIC ring pairs 316 * -NN bind individual NIC ring pair 317 * {NN bind master side of pipe NN 318 * }NN bind slave side of pipe NN 319 * a suffix starting with / and the following flags, 320 * in any order: 321 * x exclusive access 322 * z zero copy monitor (both tx and rx) 323 * t monitor tx side (copy monitor) 324 * r monitor rx side (copy monitor) 325 * R bind only RX ring(s) 326 * T bind only TX ring(s) 327 * 328 * req provides the initial values of nmreq before parsing ifname. 329 * Remember that the ifname parsing will override the ring 330 * number in nm_ringid, and part of nm_flags; 331 * flags special functions, normally 0 332 * indicates which fields of *arg are significant 333 * arg special functions, normally NULL 334 * if passed a netmap_desc with mem != NULL, 335 * use that memory instead of mmap. 336 */ 337 338 static struct nm_desc *nm_open(const char *ifname, const struct nmreq *req, 339 uint64_t flags, const struct nm_desc *arg); 340 341 /* 342 * nm_open can import some fields from the parent descriptor. 343 * These flags control which ones. 344 * Also in flags you can specify NETMAP_NO_TX_POLL and NETMAP_DO_RX_POLL, 345 * which set the initial value for these flags. 346 * Note that the 16 low bits of the flags are reserved for data 347 * that may go into the nmreq. 348 */ 349 enum { 350 NM_OPEN_NO_MMAP = 0x040000, /* reuse mmap from parent */ 351 NM_OPEN_IFNAME = 0x080000, /* nr_name, nr_ringid, nr_flags */ 352 NM_OPEN_ARG1 = 0x100000, 353 NM_OPEN_ARG2 = 0x200000, 354 NM_OPEN_ARG3 = 0x400000, 355 NM_OPEN_RING_CFG = 0x800000, /* tx|rx rings|slots */ 356 }; 357 358 359 /* 360 * nm_close() closes and restores the port to its previous state 361 */ 362 363 static int nm_close(struct nm_desc *); 364 365 /* 366 * nm_mmap() do mmap or inherit from parent if the nr_arg2 367 * (memory block) matches. 368 */ 369 370 static int nm_mmap(struct nm_desc *, const struct nm_desc *); 371 372 /* 373 * nm_inject() is the same as pcap_inject() 374 * nm_dispatch() is the same as pcap_dispatch() 375 * nm_nextpkt() is the same as pcap_next() 376 */ 377 378 static int nm_inject(struct nm_desc *, const void *, size_t); 379 static int nm_dispatch(struct nm_desc *, int, nm_cb_t, u_char *); 380 static u_char *nm_nextpkt(struct nm_desc *, struct nm_pkthdr *); 381 382 #ifdef _WIN32 383 384 intptr_t _get_osfhandle(int); /* defined in io.h in windows */ 385 386 /* 387 * In windows we do not have yet native poll support, so we keep track 388 * of file descriptors associated to netmap ports to emulate poll on 389 * them and fall back on regular poll on other file descriptors. 390 */ 391 struct win_netmap_fd_list { 392 struct win_netmap_fd_list *next; 393 int win_netmap_fd; 394 HANDLE win_netmap_handle; 395 }; 396 397 /* 398 * list head containing all the netmap opened fd and their 399 * windows HANDLE counterparts 400 */ 401 static struct win_netmap_fd_list *win_netmap_fd_list_head; 402 403 static void 404 win_insert_fd_record(int fd) 405 { 406 struct win_netmap_fd_list *curr; 407 408 for (curr = win_netmap_fd_list_head; curr; curr = curr->next) { 409 if (fd == curr->win_netmap_fd) { 410 return; 411 } 412 } 413 curr = calloc(1, sizeof(*curr)); 414 curr->next = win_netmap_fd_list_head; 415 curr->win_netmap_fd = fd; 416 curr->win_netmap_handle = IntToPtr(_get_osfhandle(fd)); 417 win_netmap_fd_list_head = curr; 418 } 419 420 void 421 win_remove_fd_record(int fd) 422 { 423 struct win_netmap_fd_list *curr = win_netmap_fd_list_head; 424 struct win_netmap_fd_list *prev = NULL; 425 for (; curr ; prev = curr, curr = curr->next) { 426 if (fd != curr->win_netmap_fd) 427 continue; 428 /* found the entry */ 429 if (prev == NULL) { /* we are freeing the first entry */ 430 win_netmap_fd_list_head = curr->next; 431 } else { 432 prev->next = curr->next; 433 } 434 free(curr); 435 break; 436 } 437 } 438 439 440 HANDLE 441 win_get_netmap_handle(int fd) 442 { 443 struct win_netmap_fd_list *curr; 444 445 for (curr = win_netmap_fd_list_head; curr; curr = curr->next) { 446 if (fd == curr->win_netmap_fd) { 447 return curr->win_netmap_handle; 448 } 449 } 450 return NULL; 451 } 452 453 /* 454 * we need to wrap ioctl and mmap, at least for the netmap file descriptors 455 */ 456 457 /* 458 * use this function only from netmap_user.h internal functions 459 * same as ioctl, returns 0 on success and -1 on error 460 */ 461 static int 462 win_nm_ioctl_internal(HANDLE h, int32_t ctlCode, void *arg) 463 { 464 DWORD bReturn = 0, szIn, szOut; 465 BOOL ioctlReturnStatus; 466 void *inParam = arg, *outParam = arg; 467 468 switch (ctlCode) { 469 case NETMAP_POLL: 470 szIn = sizeof(POLL_REQUEST_DATA); 471 szOut = sizeof(POLL_REQUEST_DATA); 472 break; 473 case NETMAP_MMAP: 474 szIn = 0; 475 szOut = sizeof(void*); 476 inParam = NULL; /* nothing on input */ 477 break; 478 case NIOCTXSYNC: 479 case NIOCRXSYNC: 480 szIn = 0; 481 szOut = 0; 482 break; 483 case NIOCREGIF: 484 szIn = sizeof(struct nmreq); 485 szOut = sizeof(struct nmreq); 486 break; 487 case NIOCCONFIG: 488 D("unsupported NIOCCONFIG!"); 489 return -1; 490 491 default: /* a regular ioctl */ 492 D("invalid ioctl %x on netmap fd", ctlCode); 493 return -1; 494 } 495 496 ioctlReturnStatus = DeviceIoControl(h, 497 ctlCode, inParam, szIn, 498 outParam, szOut, 499 &bReturn, NULL); 500 // XXX note windows returns 0 on error or async call, 1 on success 501 // we could call GetLastError() to figure out what happened 502 return ioctlReturnStatus ? 0 : -1; 503 } 504 505 /* 506 * this function is what must be called from user-space programs 507 * same as ioctl, returns 0 on success and -1 on error 508 */ 509 static int 510 win_nm_ioctl(int fd, int32_t ctlCode, void *arg) 511 { 512 HANDLE h = win_get_netmap_handle(fd); 513 514 if (h == NULL) { 515 return ioctl(fd, ctlCode, arg); 516 } else { 517 return win_nm_ioctl_internal(h, ctlCode, arg); 518 } 519 } 520 521 #define ioctl win_nm_ioctl /* from now on, within this file ... */ 522 523 /* 524 * We cannot use the native mmap on windows 525 * The only parameter used is "fd", the other ones are just declared to 526 * make this signature comparable to the FreeBSD/Linux one 527 */ 528 static void * 529 win32_mmap_emulated(void *addr, size_t length, int prot, int flags, int fd, int32_t offset) 530 { 531 HANDLE h = win_get_netmap_handle(fd); 532 533 if (h == NULL) { 534 return mmap(addr, length, prot, flags, fd, offset); 535 } else { 536 MEMORY_ENTRY ret; 537 538 return win_nm_ioctl_internal(h, NETMAP_MMAP, &ret) ? 539 NULL : ret.pUsermodeVirtualAddress; 540 } 541 } 542 543 #define mmap win32_mmap_emulated 544 545 #include <sys/poll.h> /* XXX needed to use the structure pollfd */ 546 547 static int 548 win_nm_poll(struct pollfd *fds, int nfds, int timeout) 549 { 550 HANDLE h; 551 552 if (nfds != 1 || fds == NULL || (h = win_get_netmap_handle(fds->fd)) == NULL) {; 553 return poll(fds, nfds, timeout); 554 } else { 555 POLL_REQUEST_DATA prd; 556 557 prd.timeout = timeout; 558 prd.events = fds->events; 559 560 win_nm_ioctl_internal(h, NETMAP_POLL, &prd); 561 if ((prd.revents == POLLERR) || (prd.revents == STATUS_TIMEOUT)) { 562 return -1; 563 } 564 return 1; 565 } 566 } 567 568 #define poll win_nm_poll 569 570 static int 571 win_nm_open(char* pathname, int flags) 572 { 573 574 if (strcmp(pathname, NETMAP_DEVICE_NAME) == 0) { 575 int fd = open(NETMAP_DEVICE_NAME, O_RDWR); 576 if (fd < 0) { 577 return -1; 578 } 579 580 win_insert_fd_record(fd); 581 return fd; 582 } else { 583 return open(pathname, flags); 584 } 585 } 586 587 #define open win_nm_open 588 589 static int 590 win_nm_close(int fd) 591 { 592 if (fd != -1) { 593 close(fd); 594 if (win_get_netmap_handle(fd) != NULL) { 595 win_remove_fd_record(fd); 596 } 597 } 598 return 0; 599 } 600 601 #define close win_nm_close 602 603 #endif /* _WIN32 */ 604 605 static int 606 nm_is_identifier(const char *s, const char *e) 607 { 608 for (; s != e; s++) { 609 if (!isalnum(*s) && *s != '_') { 610 return 0; 611 } 612 } 613 614 return 1; 615 } 616 617 #define MAXERRMSG 80 618 static int 619 nm_parse(const char *ifname, struct nm_desc *d, char *err) 620 { 621 int is_vale; 622 const char *port = NULL; 623 const char *vpname = NULL; 624 u_int namelen; 625 uint32_t nr_ringid = 0, nr_flags; 626 char errmsg[MAXERRMSG] = ""; 627 long num; 628 uint16_t nr_arg2 = 0; 629 enum { P_START, P_RNGSFXOK, P_GETNUM, P_FLAGS, P_FLAGSOK, P_MEMID } p_state; 630 631 errno = 0; 632 633 is_vale = (ifname[0] == 'v'); 634 if (is_vale) { 635 port = index(ifname, ':'); 636 if (port == NULL) { 637 snprintf(errmsg, MAXERRMSG, 638 "missing ':' in vale name"); 639 goto fail; 640 } 641 642 if (!nm_is_identifier(ifname + 4, port)) { 643 snprintf(errmsg, MAXERRMSG, "invalid bridge name"); 644 goto fail; 645 } 646 647 vpname = ++port; 648 } else { 649 ifname += 7; 650 port = ifname; 651 } 652 653 /* scan for a separator */ 654 for (; *port && !index("-*^{}/@", *port); port++) 655 ; 656 657 if (is_vale && !nm_is_identifier(vpname, port)) { 658 snprintf(errmsg, MAXERRMSG, "invalid bridge port name"); 659 goto fail; 660 } 661 662 namelen = port - ifname; 663 if (namelen >= sizeof(d->req.nr_name)) { 664 snprintf(errmsg, MAXERRMSG, "name too long"); 665 goto fail; 666 } 667 memcpy(d->req.nr_name, ifname, namelen); 668 d->req.nr_name[namelen] = '\0'; 669 670 p_state = P_START; 671 nr_flags = NR_REG_ALL_NIC; /* default for no suffix */ 672 while (*port) { 673 switch (p_state) { 674 case P_START: 675 switch (*port) { 676 case '^': /* only SW ring */ 677 nr_flags = NR_REG_SW; 678 p_state = P_RNGSFXOK; 679 break; 680 case '*': /* NIC and SW */ 681 nr_flags = NR_REG_NIC_SW; 682 p_state = P_RNGSFXOK; 683 break; 684 case '-': /* one NIC ring pair */ 685 nr_flags = NR_REG_ONE_NIC; 686 p_state = P_GETNUM; 687 break; 688 case '{': /* pipe (master endpoint) */ 689 nr_flags = NR_REG_PIPE_MASTER; 690 p_state = P_GETNUM; 691 break; 692 case '}': /* pipe (slave endoint) */ 693 nr_flags = NR_REG_PIPE_SLAVE; 694 p_state = P_GETNUM; 695 break; 696 case '/': /* start of flags */ 697 p_state = P_FLAGS; 698 break; 699 case '@': /* start of memid */ 700 p_state = P_MEMID; 701 break; 702 default: 703 snprintf(errmsg, MAXERRMSG, "unknown modifier: '%c'", *port); 704 goto fail; 705 } 706 port++; 707 break; 708 case P_RNGSFXOK: 709 switch (*port) { 710 case '/': 711 p_state = P_FLAGS; 712 break; 713 case '@': 714 p_state = P_MEMID; 715 break; 716 default: 717 snprintf(errmsg, MAXERRMSG, "unexpected character: '%c'", *port); 718 goto fail; 719 } 720 port++; 721 break; 722 case P_GETNUM: 723 num = strtol(port, (char **)&port, 10); 724 if (num < 0 || num >= NETMAP_RING_MASK) { 725 snprintf(errmsg, MAXERRMSG, "'%ld' out of range [0, %d)", 726 num, NETMAP_RING_MASK); 727 goto fail; 728 } 729 nr_ringid = num & NETMAP_RING_MASK; 730 p_state = P_RNGSFXOK; 731 break; 732 case P_FLAGS: 733 case P_FLAGSOK: 734 if (*port == '@') { 735 port++; 736 p_state = P_MEMID; 737 break; 738 } 739 switch (*port) { 740 case 'x': 741 nr_flags |= NR_EXCLUSIVE; 742 break; 743 case 'z': 744 nr_flags |= NR_ZCOPY_MON; 745 break; 746 case 't': 747 nr_flags |= NR_MONITOR_TX; 748 break; 749 case 'r': 750 nr_flags |= NR_MONITOR_RX; 751 break; 752 case 'R': 753 nr_flags |= NR_RX_RINGS_ONLY; 754 break; 755 case 'T': 756 nr_flags |= NR_TX_RINGS_ONLY; 757 break; 758 default: 759 snprintf(errmsg, MAXERRMSG, "unrecognized flag: '%c'", *port); 760 goto fail; 761 } 762 port++; 763 p_state = P_FLAGSOK; 764 break; 765 case P_MEMID: 766 if (nr_arg2 != 0) { 767 snprintf(errmsg, MAXERRMSG, "double setting of memid"); 768 goto fail; 769 } 770 num = strtol(port, (char **)&port, 10); 771 if (num <= 0) { 772 snprintf(errmsg, MAXERRMSG, "invalid memid %ld, must be >0", num); 773 goto fail; 774 } 775 nr_arg2 = num; 776 p_state = P_RNGSFXOK; 777 break; 778 } 779 } 780 if (p_state != P_START && p_state != P_RNGSFXOK && p_state != P_FLAGSOK) { 781 snprintf(errmsg, MAXERRMSG, "unexpected end of port name"); 782 goto fail; 783 } 784 ND("flags: %s %s %s %s", 785 (nr_flags & NR_EXCLUSIVE) ? "EXCLUSIVE" : "", 786 (nr_flags & NR_ZCOPY_MON) ? "ZCOPY_MON" : "", 787 (nr_flags & NR_MONITOR_TX) ? "MONITOR_TX" : "", 788 (nr_flags & NR_MONITOR_RX) ? "MONITOR_RX" : ""); 789 790 d->req.nr_flags |= nr_flags; 791 d->req.nr_ringid |= nr_ringid; 792 d->req.nr_arg2 = nr_arg2; 793 794 d->self = d; 795 796 return 0; 797 fail: 798 if (!errno) 799 errno = EINVAL; 800 if (err) 801 strncpy(err, errmsg, MAXERRMSG); 802 return -1; 803 } 804 805 /* 806 * Try to open, return descriptor if successful, NULL otherwise. 807 * An invalid netmap name will return errno = 0; 808 * You can pass a pointer to a pre-filled nm_desc to add special 809 * parameters. Flags is used as follows 810 * NM_OPEN_NO_MMAP use the memory from arg, only XXX avoid mmap 811 * if the nr_arg2 (memory block) matches. 812 * NM_OPEN_ARG1 use req.nr_arg1 from arg 813 * NM_OPEN_ARG2 use req.nr_arg2 from arg 814 * NM_OPEN_RING_CFG user ring config from arg 815 */ 816 static struct nm_desc * 817 nm_open(const char *ifname, const struct nmreq *req, 818 uint64_t new_flags, const struct nm_desc *arg) 819 { 820 struct nm_desc *d = NULL; 821 const struct nm_desc *parent = arg; 822 char errmsg[MAXERRMSG] = ""; 823 uint32_t nr_reg; 824 825 if (strncmp(ifname, "netmap:", 7) && 826 strncmp(ifname, NM_BDG_NAME, strlen(NM_BDG_NAME))) { 827 errno = 0; /* name not recognised, not an error */ 828 return NULL; 829 } 830 831 d = (struct nm_desc *)calloc(1, sizeof(*d)); 832 if (d == NULL) { 833 snprintf(errmsg, MAXERRMSG, "nm_desc alloc failure"); 834 errno = ENOMEM; 835 return NULL; 836 } 837 d->self = d; /* set this early so nm_close() works */ 838 d->fd = open(NETMAP_DEVICE_NAME, O_RDWR); 839 if (d->fd < 0) { 840 snprintf(errmsg, MAXERRMSG, "cannot open /dev/netmap: %s", strerror(errno)); 841 goto fail; 842 } 843 844 if (req) 845 d->req = *req; 846 847 if (!(new_flags & NM_OPEN_IFNAME)) { 848 if (nm_parse(ifname, d, errmsg) < 0) 849 goto fail; 850 } 851 852 d->req.nr_version = NETMAP_API; 853 d->req.nr_ringid &= NETMAP_RING_MASK; 854 855 /* optionally import info from parent */ 856 if (IS_NETMAP_DESC(parent) && new_flags) { 857 if (new_flags & NM_OPEN_ARG1) 858 D("overriding ARG1 %d", parent->req.nr_arg1); 859 d->req.nr_arg1 = new_flags & NM_OPEN_ARG1 ? 860 parent->req.nr_arg1 : 4; 861 if (new_flags & NM_OPEN_ARG2) { 862 D("overriding ARG2 %d", parent->req.nr_arg2); 863 d->req.nr_arg2 = parent->req.nr_arg2; 864 } 865 if (new_flags & NM_OPEN_ARG3) 866 D("overriding ARG3 %d", parent->req.nr_arg3); 867 d->req.nr_arg3 = new_flags & NM_OPEN_ARG3 ? 868 parent->req.nr_arg3 : 0; 869 if (new_flags & NM_OPEN_RING_CFG) { 870 D("overriding RING_CFG"); 871 d->req.nr_tx_slots = parent->req.nr_tx_slots; 872 d->req.nr_rx_slots = parent->req.nr_rx_slots; 873 d->req.nr_tx_rings = parent->req.nr_tx_rings; 874 d->req.nr_rx_rings = parent->req.nr_rx_rings; 875 } 876 if (new_flags & NM_OPEN_IFNAME) { 877 D("overriding ifname %s ringid 0x%x flags 0x%x", 878 parent->req.nr_name, parent->req.nr_ringid, 879 parent->req.nr_flags); 880 memcpy(d->req.nr_name, parent->req.nr_name, 881 sizeof(d->req.nr_name)); 882 d->req.nr_ringid = parent->req.nr_ringid; 883 d->req.nr_flags = parent->req.nr_flags; 884 } 885 } 886 /* add the *XPOLL flags */ 887 d->req.nr_ringid |= new_flags & (NETMAP_NO_TX_POLL | NETMAP_DO_RX_POLL); 888 889 if (ioctl(d->fd, NIOCREGIF, &d->req)) { 890 snprintf(errmsg, MAXERRMSG, "NIOCREGIF failed: %s", strerror(errno)); 891 goto fail; 892 } 893 894 nr_reg = d->req.nr_flags & NR_REG_MASK; 895 896 if (nr_reg == NR_REG_SW) { /* host stack */ 897 d->first_tx_ring = d->last_tx_ring = d->req.nr_tx_rings; 898 d->first_rx_ring = d->last_rx_ring = d->req.nr_rx_rings; 899 } else if (nr_reg == NR_REG_ALL_NIC) { /* only nic */ 900 d->first_tx_ring = 0; 901 d->first_rx_ring = 0; 902 d->last_tx_ring = d->req.nr_tx_rings - 1; 903 d->last_rx_ring = d->req.nr_rx_rings - 1; 904 } else if (nr_reg == NR_REG_NIC_SW) { 905 d->first_tx_ring = 0; 906 d->first_rx_ring = 0; 907 d->last_tx_ring = d->req.nr_tx_rings; 908 d->last_rx_ring = d->req.nr_rx_rings; 909 } else if (nr_reg == NR_REG_ONE_NIC) { 910 /* XXX check validity */ 911 d->first_tx_ring = d->last_tx_ring = 912 d->first_rx_ring = d->last_rx_ring = d->req.nr_ringid & NETMAP_RING_MASK; 913 } else { /* pipes */ 914 d->first_tx_ring = d->last_tx_ring = 0; 915 d->first_rx_ring = d->last_rx_ring = 0; 916 } 917 918 /* if parent is defined, do nm_mmap() even if NM_OPEN_NO_MMAP is set */ 919 if ((!(new_flags & NM_OPEN_NO_MMAP) || parent) && nm_mmap(d, parent)) { 920 snprintf(errmsg, MAXERRMSG, "mmap failed: %s", strerror(errno)); 921 goto fail; 922 } 923 924 925 #ifdef DEBUG_NETMAP_USER 926 { /* debugging code */ 927 int i; 928 929 D("%s tx %d .. %d %d rx %d .. %d %d", ifname, 930 d->first_tx_ring, d->last_tx_ring, d->req.nr_tx_rings, 931 d->first_rx_ring, d->last_rx_ring, d->req.nr_rx_rings); 932 for (i = 0; i <= d->req.nr_tx_rings; i++) { 933 struct netmap_ring *r = NETMAP_TXRING(d->nifp, i); 934 D("TX%d %p h %d c %d t %d", i, r, r->head, r->cur, r->tail); 935 } 936 for (i = 0; i <= d->req.nr_rx_rings; i++) { 937 struct netmap_ring *r = NETMAP_RXRING(d->nifp, i); 938 D("RX%d %p h %d c %d t %d", i, r, r->head, r->cur, r->tail); 939 } 940 } 941 #endif /* debugging */ 942 943 d->cur_tx_ring = d->first_tx_ring; 944 d->cur_rx_ring = d->first_rx_ring; 945 return d; 946 947 fail: 948 nm_close(d); 949 if (errmsg[0]) 950 D("%s %s", errmsg, ifname); 951 if (errno == 0) 952 errno = EINVAL; 953 return NULL; 954 } 955 956 957 static int 958 nm_close(struct nm_desc *d) 959 { 960 /* 961 * ugly trick to avoid unused warnings 962 */ 963 static void *__xxzt[] __attribute__ ((unused)) = 964 { (void *)nm_open, (void *)nm_inject, 965 (void *)nm_dispatch, (void *)nm_nextpkt } ; 966 967 if (d == NULL || d->self != d) 968 return EINVAL; 969 if (d->done_mmap && d->mem) 970 munmap(d->mem, d->memsize); 971 if (d->fd != -1) { 972 close(d->fd); 973 } 974 975 bzero(d, sizeof(*d)); 976 free(d); 977 return 0; 978 } 979 980 981 static int 982 nm_mmap(struct nm_desc *d, const struct nm_desc *parent) 983 { 984 //XXX TODO: check if mmap is already done 985 986 if (IS_NETMAP_DESC(parent) && parent->mem && 987 parent->req.nr_arg2 == d->req.nr_arg2) { 988 /* do not mmap, inherit from parent */ 989 D("do not mmap, inherit from parent"); 990 d->memsize = parent->memsize; 991 d->mem = parent->mem; 992 } else { 993 /* XXX TODO: check if memsize is too large (or there is overflow) */ 994 d->memsize = d->req.nr_memsize; 995 d->mem = mmap(0, d->memsize, PROT_WRITE | PROT_READ, MAP_SHARED, 996 d->fd, 0); 997 if (d->mem == MAP_FAILED) { 998 goto fail; 999 } 1000 d->done_mmap = 1; 1001 } 1002 { 1003 struct netmap_if *nifp = NETMAP_IF(d->mem, d->req.nr_offset); 1004 struct netmap_ring *r = NETMAP_RXRING(nifp, d->first_rx_ring); 1005 if ((void *)r == (void *)nifp) { 1006 /* the descriptor is open for TX only */ 1007 r = NETMAP_TXRING(nifp, d->first_tx_ring); 1008 } 1009 1010 *(struct netmap_if **)(uintptr_t)&(d->nifp) = nifp; 1011 *(struct netmap_ring **)(uintptr_t)&d->some_ring = r; 1012 *(void **)(uintptr_t)&d->buf_start = NETMAP_BUF(r, 0); 1013 *(void **)(uintptr_t)&d->buf_end = 1014 (char *)d->mem + d->memsize; 1015 } 1016 1017 return 0; 1018 1019 fail: 1020 return EINVAL; 1021 } 1022 1023 /* 1024 * Same prototype as pcap_inject(), only need to cast. 1025 */ 1026 static int 1027 nm_inject(struct nm_desc *d, const void *buf, size_t size) 1028 { 1029 u_int c, n = d->last_tx_ring - d->first_tx_ring + 1, 1030 ri = d->cur_tx_ring; 1031 1032 for (c = 0; c < n ; c++, ri++) { 1033 /* compute current ring to use */ 1034 struct netmap_ring *ring; 1035 uint32_t i, j, idx; 1036 size_t rem; 1037 1038 if (ri > d->last_tx_ring) 1039 ri = d->first_tx_ring; 1040 ring = NETMAP_TXRING(d->nifp, ri); 1041 rem = size; 1042 j = ring->cur; 1043 while (rem > ring->nr_buf_size && j != ring->tail) { 1044 rem -= ring->nr_buf_size; 1045 j = nm_ring_next(ring, j); 1046 } 1047 if (j == ring->tail && rem > 0) 1048 continue; 1049 i = ring->cur; 1050 while (i != j) { 1051 idx = ring->slot[i].buf_idx; 1052 ring->slot[i].len = ring->nr_buf_size; 1053 ring->slot[i].flags = NS_MOREFRAG; 1054 nm_pkt_copy(buf, NETMAP_BUF(ring, idx), ring->nr_buf_size); 1055 i = nm_ring_next(ring, i); 1056 buf = (char *)buf + ring->nr_buf_size; 1057 } 1058 idx = ring->slot[i].buf_idx; 1059 ring->slot[i].len = rem; 1060 ring->slot[i].flags = 0; 1061 nm_pkt_copy(buf, NETMAP_BUF(ring, idx), rem); 1062 ring->head = ring->cur = nm_ring_next(ring, i); 1063 d->cur_tx_ring = ri; 1064 return size; 1065 } 1066 return 0; /* fail */ 1067 } 1068 1069 1070 /* 1071 * Same prototype as pcap_dispatch(), only need to cast. 1072 */ 1073 static int 1074 nm_dispatch(struct nm_desc *d, int cnt, nm_cb_t cb, u_char *arg) 1075 { 1076 int n = d->last_rx_ring - d->first_rx_ring + 1; 1077 int c, got = 0, ri = d->cur_rx_ring; 1078 d->hdr.buf = NULL; 1079 d->hdr.flags = NM_MORE_PKTS; 1080 d->hdr.d = d; 1081 1082 if (cnt == 0) 1083 cnt = -1; 1084 /* cnt == -1 means infinite, but rings have a finite amount 1085 * of buffers and the int is large enough that we never wrap, 1086 * so we can omit checking for -1 1087 */ 1088 for (c=0; c < n && cnt != got; c++, ri++) { 1089 /* compute current ring to use */ 1090 struct netmap_ring *ring; 1091 1092 if (ri > d->last_rx_ring) 1093 ri = d->first_rx_ring; 1094 ring = NETMAP_RXRING(d->nifp, ri); 1095 for ( ; !nm_ring_empty(ring) && cnt != got; got++) { 1096 u_int idx, i; 1097 u_char *oldbuf; 1098 struct netmap_slot *slot; 1099 if (d->hdr.buf) { /* from previous round */ 1100 cb(arg, &d->hdr, d->hdr.buf); 1101 } 1102 i = ring->cur; 1103 slot = &ring->slot[i]; 1104 idx = slot->buf_idx; 1105 /* d->cur_rx_ring doesn't change inside this loop, but 1106 * set it here, so it reflects d->hdr.buf's ring */ 1107 d->cur_rx_ring = ri; 1108 d->hdr.slot = slot; 1109 oldbuf = d->hdr.buf = (u_char *)NETMAP_BUF(ring, idx); 1110 // __builtin_prefetch(buf); 1111 d->hdr.len = d->hdr.caplen = slot->len; 1112 while (slot->flags & NS_MOREFRAG) { 1113 u_char *nbuf; 1114 u_int oldlen = slot->len; 1115 i = nm_ring_next(ring, i); 1116 slot = &ring->slot[i]; 1117 d->hdr.len += slot->len; 1118 nbuf = (u_char *)NETMAP_BUF(ring, slot->buf_idx); 1119 if (oldbuf != NULL && nbuf - oldbuf == ring->nr_buf_size && 1120 oldlen == ring->nr_buf_size) { 1121 d->hdr.caplen += slot->len; 1122 oldbuf = nbuf; 1123 } else { 1124 oldbuf = NULL; 1125 } 1126 } 1127 d->hdr.ts = ring->ts; 1128 ring->head = ring->cur = nm_ring_next(ring, i); 1129 } 1130 } 1131 if (d->hdr.buf) { /* from previous round */ 1132 d->hdr.flags = 0; 1133 cb(arg, &d->hdr, d->hdr.buf); 1134 } 1135 return got; 1136 } 1137 1138 static u_char * 1139 nm_nextpkt(struct nm_desc *d, struct nm_pkthdr *hdr) 1140 { 1141 int ri = d->cur_rx_ring; 1142 1143 do { 1144 /* compute current ring to use */ 1145 struct netmap_ring *ring = NETMAP_RXRING(d->nifp, ri); 1146 if (!nm_ring_empty(ring)) { 1147 u_int i = ring->cur; 1148 u_int idx = ring->slot[i].buf_idx; 1149 u_char *buf = (u_char *)NETMAP_BUF(ring, idx); 1150 1151 // __builtin_prefetch(buf); 1152 hdr->ts = ring->ts; 1153 hdr->len = hdr->caplen = ring->slot[i].len; 1154 ring->cur = nm_ring_next(ring, i); 1155 /* we could postpone advancing head if we want 1156 * to hold the buffer. This can be supported in 1157 * the future. 1158 */ 1159 ring->head = ring->cur; 1160 d->cur_rx_ring = ri; 1161 return buf; 1162 } 1163 ri++; 1164 if (ri > d->last_rx_ring) 1165 ri = d->first_rx_ring; 1166 } while (ri != d->cur_rx_ring); 1167 return NULL; /* nothing found */ 1168 } 1169 1170 #endif /* !HAVE_NETMAP_WITH_LIBS */ 1171 1172 #endif /* NETMAP_WITH_LIBS */ 1173 1174 #endif /* _NET_NETMAP_USER_H_ */ 1175