1 /*- 2 * SPDX-License-Identifier: BSD-2-Clause-FreeBSD 3 * 4 * Copyright (C) 2011-2016 Universita` di Pisa 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 18 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 20 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 21 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 23 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 27 * SUCH DAMAGE. 28 */ 29 30 /* 31 * $FreeBSD$ 32 * 33 * Functions and macros to manipulate netmap structures and packets 34 * in userspace. See netmap(4) for more information. 35 * 36 * The address of the struct netmap_if, say nifp, is computed from the 37 * value returned from ioctl(.., NIOCREG, ...) and the mmap region: 38 * ioctl(fd, NIOCREG, &req); 39 * mem = mmap(0, ... ); 40 * nifp = NETMAP_IF(mem, req.nr_nifp); 41 * (so simple, we could just do it manually) 42 * 43 * From there: 44 * struct netmap_ring *NETMAP_TXRING(nifp, index) 45 * struct netmap_ring *NETMAP_RXRING(nifp, index) 46 * we can access ring->cur, ring->head, ring->tail, etc. 47 * 48 * ring->slot[i] gives us the i-th slot (we can access 49 * directly len, flags, buf_idx) 50 * 51 * char *buf = NETMAP_BUF(ring, x) returns a pointer to 52 * the buffer numbered x 53 * 54 * All ring indexes (head, cur, tail) should always move forward. 55 * To compute the next index in a circular ring you can use 56 * i = nm_ring_next(ring, i); 57 * 58 * To ease porting apps from pcap to netmap we supply a few fuctions 59 * that can be called to open, close, read and write on netmap in a way 60 * similar to libpcap. Note that the read/write function depend on 61 * an ioctl()/select()/poll() being issued to refill rings or push 62 * packets out. 63 * 64 * In order to use these, include #define NETMAP_WITH_LIBS 65 * in the source file that invokes these functions. 66 */ 67 68 #ifndef _NET_NETMAP_USER_H_ 69 #define _NET_NETMAP_USER_H_ 70 71 #define NETMAP_DEVICE_NAME "/dev/netmap" 72 73 #ifdef __CYGWIN__ 74 /* 75 * we can compile userspace apps with either cygwin or msvc, 76 * and we use _WIN32 to identify windows specific code 77 */ 78 #ifndef _WIN32 79 #define _WIN32 80 #endif /* _WIN32 */ 81 82 #endif /* __CYGWIN__ */ 83 84 #ifdef _WIN32 85 #undef NETMAP_DEVICE_NAME 86 #define NETMAP_DEVICE_NAME "/proc/sys/DosDevices/Global/netmap" 87 #include <windows.h> 88 #include <WinDef.h> 89 #include <sys/cygwin.h> 90 #endif /* _WIN32 */ 91 92 #include <stdint.h> 93 #include <sys/socket.h> /* apple needs sockaddr */ 94 #include <net/if.h> /* IFNAMSIZ */ 95 #include <ctype.h> 96 97 #ifndef likely 98 #define likely(x) __builtin_expect(!!(x), 1) 99 #define unlikely(x) __builtin_expect(!!(x), 0) 100 #endif /* likely and unlikely */ 101 102 #include <net/netmap.h> 103 104 /* helper macro */ 105 #define _NETMAP_OFFSET(type, ptr, offset) \ 106 ((type)(void *)((char *)(ptr) + (offset))) 107 108 #define NETMAP_IF(_base, _ofs) _NETMAP_OFFSET(struct netmap_if *, _base, _ofs) 109 110 #define NETMAP_TXRING(nifp, index) _NETMAP_OFFSET(struct netmap_ring *, \ 111 nifp, (nifp)->ring_ofs[index] ) 112 113 #define NETMAP_RXRING(nifp, index) _NETMAP_OFFSET(struct netmap_ring *, \ 114 nifp, (nifp)->ring_ofs[index + (nifp)->ni_tx_rings + 1] ) 115 116 #define NETMAP_BUF(ring, index) \ 117 ((char *)(ring) + (ring)->buf_ofs + ((index)*(ring)->nr_buf_size)) 118 119 #define NETMAP_BUF_IDX(ring, buf) \ 120 ( ((char *)(buf) - ((char *)(ring) + (ring)->buf_ofs) ) / \ 121 (ring)->nr_buf_size ) 122 123 124 static inline uint32_t 125 nm_ring_next(struct netmap_ring *r, uint32_t i) 126 { 127 return ( unlikely(i + 1 == r->num_slots) ? 0 : i + 1); 128 } 129 130 131 /* 132 * Return 1 if we have pending transmissions in the tx ring. 133 * When everything is complete ring->head = ring->tail + 1 (modulo ring size) 134 */ 135 static inline int 136 nm_tx_pending(struct netmap_ring *r) 137 { 138 return nm_ring_next(r, r->tail) != r->head; 139 } 140 141 /* Compute the number of slots available in the netmap ring. We use 142 * ring->head as explained in the comment above nm_ring_empty(). */ 143 static inline uint32_t 144 nm_ring_space(struct netmap_ring *ring) 145 { 146 int ret = ring->tail - ring->head; 147 if (ret < 0) 148 ret += ring->num_slots; 149 return ret; 150 } 151 152 153 #ifdef NETMAP_WITH_LIBS 154 /* 155 * Support for simple I/O libraries. 156 * Include other system headers required for compiling this. 157 */ 158 159 #ifndef HAVE_NETMAP_WITH_LIBS 160 #define HAVE_NETMAP_WITH_LIBS 161 162 #include <stdio.h> 163 #include <sys/time.h> 164 #include <sys/mman.h> 165 #include <string.h> /* memset */ 166 #include <sys/ioctl.h> 167 #include <sys/errno.h> /* EINVAL */ 168 #include <fcntl.h> /* O_RDWR */ 169 #include <unistd.h> /* close() */ 170 #include <signal.h> 171 #include <stdlib.h> 172 173 #ifndef ND /* debug macros */ 174 /* debug support */ 175 #define ND(_fmt, ...) do {} while(0) 176 #define D(_fmt, ...) \ 177 do { \ 178 struct timeval _t0; \ 179 gettimeofday(&_t0, NULL); \ 180 fprintf(stderr, "%03d.%06d %s [%d] " _fmt "\n", \ 181 (int)(_t0.tv_sec % 1000), (int)_t0.tv_usec, \ 182 __FUNCTION__, __LINE__, ##__VA_ARGS__); \ 183 } while (0) 184 185 /* Rate limited version of "D", lps indicates how many per second */ 186 #define RD(lps, format, ...) \ 187 do { \ 188 static int __t0, __cnt; \ 189 struct timeval __xxts; \ 190 gettimeofday(&__xxts, NULL); \ 191 if (__t0 != __xxts.tv_sec) { \ 192 __t0 = __xxts.tv_sec; \ 193 __cnt = 0; \ 194 } \ 195 if (__cnt++ < lps) { \ 196 D(format, ##__VA_ARGS__); \ 197 } \ 198 } while (0) 199 #endif 200 201 struct nm_pkthdr { /* first part is the same as pcap_pkthdr */ 202 struct timeval ts; 203 uint32_t caplen; 204 uint32_t len; 205 206 uint64_t flags; /* NM_MORE_PKTS etc */ 207 #define NM_MORE_PKTS 1 208 struct nm_desc *d; 209 struct netmap_slot *slot; 210 uint8_t *buf; 211 }; 212 213 struct nm_stat { /* same as pcap_stat */ 214 u_int ps_recv; 215 u_int ps_drop; 216 u_int ps_ifdrop; 217 #ifdef WIN32 /* XXX or _WIN32 ? */ 218 u_int bs_capt; 219 #endif /* WIN32 */ 220 }; 221 222 #define NM_ERRBUF_SIZE 512 223 224 struct nm_desc { 225 struct nm_desc *self; /* point to self if netmap. */ 226 int fd; 227 void *mem; 228 uint32_t memsize; 229 int done_mmap; /* set if mem is the result of mmap */ 230 struct netmap_if * const nifp; 231 uint16_t first_tx_ring, last_tx_ring, cur_tx_ring; 232 uint16_t first_rx_ring, last_rx_ring, cur_rx_ring; 233 struct nmreq req; /* also contains the nr_name = ifname */ 234 struct nm_pkthdr hdr; 235 236 /* 237 * The memory contains netmap_if, rings and then buffers. 238 * Given a pointer (e.g. to nm_inject) we can compare with 239 * mem/buf_start/buf_end to tell if it is a buffer or 240 * some other descriptor in our region. 241 * We also store a pointer to some ring as it helps in the 242 * translation from buffer indexes to addresses. 243 */ 244 struct netmap_ring * const some_ring; 245 void * const buf_start; 246 void * const buf_end; 247 /* parameters from pcap_open_live */ 248 int snaplen; 249 int promisc; 250 int to_ms; 251 char *errbuf; 252 253 /* save flags so we can restore them on close */ 254 uint32_t if_flags; 255 uint32_t if_reqcap; 256 uint32_t if_curcap; 257 258 struct nm_stat st; 259 char msg[NM_ERRBUF_SIZE]; 260 }; 261 262 /* 263 * when the descriptor is open correctly, d->self == d 264 * Eventually we should also use some magic number. 265 */ 266 #define P2NMD(p) ((struct nm_desc *)(p)) 267 #define IS_NETMAP_DESC(d) ((d) && P2NMD(d)->self == P2NMD(d)) 268 #define NETMAP_FD(d) (P2NMD(d)->fd) 269 270 271 /* 272 * this is a slightly optimized copy routine which rounds 273 * to multiple of 64 bytes and is often faster than dealing 274 * with other odd sizes. We assume there is enough room 275 * in the source and destination buffers. 276 */ 277 static inline void 278 nm_pkt_copy(const void *_src, void *_dst, int l) 279 { 280 const uint64_t *src = (const uint64_t *)_src; 281 uint64_t *dst = (uint64_t *)_dst; 282 283 if (unlikely(l >= 1024 || l % 64)) { 284 memcpy(dst, src, l); 285 return; 286 } 287 for (; likely(l > 0); l-=64) { 288 *dst++ = *src++; 289 *dst++ = *src++; 290 *dst++ = *src++; 291 *dst++ = *src++; 292 *dst++ = *src++; 293 *dst++ = *src++; 294 *dst++ = *src++; 295 *dst++ = *src++; 296 } 297 } 298 299 300 /* 301 * The callback, invoked on each received packet. Same as libpcap 302 */ 303 typedef void (*nm_cb_t)(u_char *, const struct nm_pkthdr *, const u_char *d); 304 305 /* 306 *--- the pcap-like API --- 307 * 308 * nm_open() opens a file descriptor, binds to a port and maps memory. 309 * 310 * ifname (netmap:foo or vale:foo) is the port name 311 * a suffix can indicate the follwing: 312 * ^ bind the host (sw) ring pair 313 * * bind host and NIC ring pairs 314 * -NN bind individual NIC ring pair 315 * {NN bind master side of pipe NN 316 * }NN bind slave side of pipe NN 317 * a suffix starting with / and the following flags, 318 * in any order: 319 * x exclusive access 320 * z zero copy monitor (both tx and rx) 321 * t monitor tx side (copy monitor) 322 * r monitor rx side (copy monitor) 323 * R bind only RX ring(s) 324 * T bind only TX ring(s) 325 * 326 * req provides the initial values of nmreq before parsing ifname. 327 * Remember that the ifname parsing will override the ring 328 * number in nm_ringid, and part of nm_flags; 329 * flags special functions, normally 0 330 * indicates which fields of *arg are significant 331 * arg special functions, normally NULL 332 * if passed a netmap_desc with mem != NULL, 333 * use that memory instead of mmap. 334 */ 335 336 static struct nm_desc *nm_open(const char *ifname, const struct nmreq *req, 337 uint64_t flags, const struct nm_desc *arg); 338 339 /* 340 * nm_open can import some fields from the parent descriptor. 341 * These flags control which ones. 342 * Also in flags you can specify NETMAP_NO_TX_POLL and NETMAP_DO_RX_POLL, 343 * which set the initial value for these flags. 344 * Note that the 16 low bits of the flags are reserved for data 345 * that may go into the nmreq. 346 */ 347 enum { 348 NM_OPEN_NO_MMAP = 0x040000, /* reuse mmap from parent */ 349 NM_OPEN_IFNAME = 0x080000, /* nr_name, nr_ringid, nr_flags */ 350 NM_OPEN_ARG1 = 0x100000, 351 NM_OPEN_ARG2 = 0x200000, 352 NM_OPEN_ARG3 = 0x400000, 353 NM_OPEN_RING_CFG = 0x800000, /* tx|rx rings|slots */ 354 }; 355 356 357 /* 358 * nm_close() closes and restores the port to its previous state 359 */ 360 361 static int nm_close(struct nm_desc *); 362 363 /* 364 * nm_mmap() do mmap or inherit from parent if the nr_arg2 365 * (memory block) matches. 366 */ 367 368 static int nm_mmap(struct nm_desc *, const struct nm_desc *); 369 370 /* 371 * nm_inject() is the same as pcap_inject() 372 * nm_dispatch() is the same as pcap_dispatch() 373 * nm_nextpkt() is the same as pcap_next() 374 */ 375 376 static int nm_inject(struct nm_desc *, const void *, size_t); 377 static int nm_dispatch(struct nm_desc *, int, nm_cb_t, u_char *); 378 static u_char *nm_nextpkt(struct nm_desc *, struct nm_pkthdr *); 379 380 #ifdef _WIN32 381 382 intptr_t _get_osfhandle(int); /* defined in io.h in windows */ 383 384 /* 385 * In windows we do not have yet native poll support, so we keep track 386 * of file descriptors associated to netmap ports to emulate poll on 387 * them and fall back on regular poll on other file descriptors. 388 */ 389 struct win_netmap_fd_list { 390 struct win_netmap_fd_list *next; 391 int win_netmap_fd; 392 HANDLE win_netmap_handle; 393 }; 394 395 /* 396 * list head containing all the netmap opened fd and their 397 * windows HANDLE counterparts 398 */ 399 static struct win_netmap_fd_list *win_netmap_fd_list_head; 400 401 static void 402 win_insert_fd_record(int fd) 403 { 404 struct win_netmap_fd_list *curr; 405 406 for (curr = win_netmap_fd_list_head; curr; curr = curr->next) { 407 if (fd == curr->win_netmap_fd) { 408 return; 409 } 410 } 411 curr = calloc(1, sizeof(*curr)); 412 curr->next = win_netmap_fd_list_head; 413 curr->win_netmap_fd = fd; 414 curr->win_netmap_handle = IntToPtr(_get_osfhandle(fd)); 415 win_netmap_fd_list_head = curr; 416 } 417 418 void 419 win_remove_fd_record(int fd) 420 { 421 struct win_netmap_fd_list *curr = win_netmap_fd_list_head; 422 struct win_netmap_fd_list *prev = NULL; 423 for (; curr ; prev = curr, curr = curr->next) { 424 if (fd != curr->win_netmap_fd) 425 continue; 426 /* found the entry */ 427 if (prev == NULL) { /* we are freeing the first entry */ 428 win_netmap_fd_list_head = curr->next; 429 } else { 430 prev->next = curr->next; 431 } 432 free(curr); 433 break; 434 } 435 } 436 437 438 HANDLE 439 win_get_netmap_handle(int fd) 440 { 441 struct win_netmap_fd_list *curr; 442 443 for (curr = win_netmap_fd_list_head; curr; curr = curr->next) { 444 if (fd == curr->win_netmap_fd) { 445 return curr->win_netmap_handle; 446 } 447 } 448 return NULL; 449 } 450 451 /* 452 * we need to wrap ioctl and mmap, at least for the netmap file descriptors 453 */ 454 455 /* 456 * use this function only from netmap_user.h internal functions 457 * same as ioctl, returns 0 on success and -1 on error 458 */ 459 static int 460 win_nm_ioctl_internal(HANDLE h, int32_t ctlCode, void *arg) 461 { 462 DWORD bReturn = 0, szIn, szOut; 463 BOOL ioctlReturnStatus; 464 void *inParam = arg, *outParam = arg; 465 466 switch (ctlCode) { 467 case NETMAP_POLL: 468 szIn = sizeof(POLL_REQUEST_DATA); 469 szOut = sizeof(POLL_REQUEST_DATA); 470 break; 471 case NETMAP_MMAP: 472 szIn = 0; 473 szOut = sizeof(void*); 474 inParam = NULL; /* nothing on input */ 475 break; 476 case NIOCTXSYNC: 477 case NIOCRXSYNC: 478 szIn = 0; 479 szOut = 0; 480 break; 481 case NIOCREGIF: 482 szIn = sizeof(struct nmreq); 483 szOut = sizeof(struct nmreq); 484 break; 485 case NIOCCONFIG: 486 D("unsupported NIOCCONFIG!"); 487 return -1; 488 489 default: /* a regular ioctl */ 490 D("invalid ioctl %x on netmap fd", ctlCode); 491 return -1; 492 } 493 494 ioctlReturnStatus = DeviceIoControl(h, 495 ctlCode, inParam, szIn, 496 outParam, szOut, 497 &bReturn, NULL); 498 // XXX note windows returns 0 on error or async call, 1 on success 499 // we could call GetLastError() to figure out what happened 500 return ioctlReturnStatus ? 0 : -1; 501 } 502 503 /* 504 * this function is what must be called from user-space programs 505 * same as ioctl, returns 0 on success and -1 on error 506 */ 507 static int 508 win_nm_ioctl(int fd, int32_t ctlCode, void *arg) 509 { 510 HANDLE h = win_get_netmap_handle(fd); 511 512 if (h == NULL) { 513 return ioctl(fd, ctlCode, arg); 514 } else { 515 return win_nm_ioctl_internal(h, ctlCode, arg); 516 } 517 } 518 519 #define ioctl win_nm_ioctl /* from now on, within this file ... */ 520 521 /* 522 * We cannot use the native mmap on windows 523 * The only parameter used is "fd", the other ones are just declared to 524 * make this signature comparable to the FreeBSD/Linux one 525 */ 526 static void * 527 win32_mmap_emulated(void *addr, size_t length, int prot, int flags, int fd, int32_t offset) 528 { 529 HANDLE h = win_get_netmap_handle(fd); 530 531 if (h == NULL) { 532 return mmap(addr, length, prot, flags, fd, offset); 533 } else { 534 MEMORY_ENTRY ret; 535 536 return win_nm_ioctl_internal(h, NETMAP_MMAP, &ret) ? 537 NULL : ret.pUsermodeVirtualAddress; 538 } 539 } 540 541 #define mmap win32_mmap_emulated 542 543 #include <sys/poll.h> /* XXX needed to use the structure pollfd */ 544 545 static int 546 win_nm_poll(struct pollfd *fds, int nfds, int timeout) 547 { 548 HANDLE h; 549 550 if (nfds != 1 || fds == NULL || (h = win_get_netmap_handle(fds->fd)) == NULL) {; 551 return poll(fds, nfds, timeout); 552 } else { 553 POLL_REQUEST_DATA prd; 554 555 prd.timeout = timeout; 556 prd.events = fds->events; 557 558 win_nm_ioctl_internal(h, NETMAP_POLL, &prd); 559 if ((prd.revents == POLLERR) || (prd.revents == STATUS_TIMEOUT)) { 560 return -1; 561 } 562 return 1; 563 } 564 } 565 566 #define poll win_nm_poll 567 568 static int 569 win_nm_open(char* pathname, int flags) 570 { 571 572 if (strcmp(pathname, NETMAP_DEVICE_NAME) == 0) { 573 int fd = open(NETMAP_DEVICE_NAME, O_RDWR); 574 if (fd < 0) { 575 return -1; 576 } 577 578 win_insert_fd_record(fd); 579 return fd; 580 } else { 581 return open(pathname, flags); 582 } 583 } 584 585 #define open win_nm_open 586 587 static int 588 win_nm_close(int fd) 589 { 590 if (fd != -1) { 591 close(fd); 592 if (win_get_netmap_handle(fd) != NULL) { 593 win_remove_fd_record(fd); 594 } 595 } 596 return 0; 597 } 598 599 #define close win_nm_close 600 601 #endif /* _WIN32 */ 602 603 static int 604 nm_is_identifier(const char *s, const char *e) 605 { 606 for (; s != e; s++) { 607 if (!isalnum(*s) && *s != '_') { 608 return 0; 609 } 610 } 611 612 return 1; 613 } 614 615 #define MAXERRMSG 80 616 static int 617 nm_parse(const char *ifname, struct nm_desc *d, char *err) 618 { 619 int is_vale; 620 const char *port = NULL; 621 const char *vpname = NULL; 622 u_int namelen; 623 uint32_t nr_ringid = 0, nr_flags; 624 char errmsg[MAXERRMSG] = ""; 625 long num; 626 uint16_t nr_arg2 = 0; 627 enum { P_START, P_RNGSFXOK, P_GETNUM, P_FLAGS, P_FLAGSOK, P_MEMID } p_state; 628 629 errno = 0; 630 631 is_vale = (ifname[0] == 'v'); 632 if (is_vale) { 633 port = index(ifname, ':'); 634 if (port == NULL) { 635 snprintf(errmsg, MAXERRMSG, 636 "missing ':' in vale name"); 637 goto fail; 638 } 639 640 if (!nm_is_identifier(ifname + 4, port)) { 641 snprintf(errmsg, MAXERRMSG, "invalid bridge name"); 642 goto fail; 643 } 644 645 vpname = ++port; 646 } else { 647 ifname += 7; 648 port = ifname; 649 } 650 651 /* scan for a separator */ 652 for (; *port && !index("-*^{}/@", *port); port++) 653 ; 654 655 if (is_vale && !nm_is_identifier(vpname, port)) { 656 snprintf(errmsg, MAXERRMSG, "invalid bridge port name"); 657 goto fail; 658 } 659 660 namelen = port - ifname; 661 if (namelen >= sizeof(d->req.nr_name)) { 662 snprintf(errmsg, MAXERRMSG, "name too long"); 663 goto fail; 664 } 665 memcpy(d->req.nr_name, ifname, namelen); 666 d->req.nr_name[namelen] = '\0'; 667 668 p_state = P_START; 669 nr_flags = NR_REG_ALL_NIC; /* default for no suffix */ 670 while (*port) { 671 switch (p_state) { 672 case P_START: 673 switch (*port) { 674 case '^': /* only SW ring */ 675 nr_flags = NR_REG_SW; 676 p_state = P_RNGSFXOK; 677 break; 678 case '*': /* NIC and SW */ 679 nr_flags = NR_REG_NIC_SW; 680 p_state = P_RNGSFXOK; 681 break; 682 case '-': /* one NIC ring pair */ 683 nr_flags = NR_REG_ONE_NIC; 684 p_state = P_GETNUM; 685 break; 686 case '{': /* pipe (master endpoint) */ 687 nr_flags = NR_REG_PIPE_MASTER; 688 p_state = P_GETNUM; 689 break; 690 case '}': /* pipe (slave endoint) */ 691 nr_flags = NR_REG_PIPE_SLAVE; 692 p_state = P_GETNUM; 693 break; 694 case '/': /* start of flags */ 695 p_state = P_FLAGS; 696 break; 697 case '@': /* start of memid */ 698 p_state = P_MEMID; 699 break; 700 default: 701 snprintf(errmsg, MAXERRMSG, "unknown modifier: '%c'", *port); 702 goto fail; 703 } 704 port++; 705 break; 706 case P_RNGSFXOK: 707 switch (*port) { 708 case '/': 709 p_state = P_FLAGS; 710 break; 711 case '@': 712 p_state = P_MEMID; 713 break; 714 default: 715 snprintf(errmsg, MAXERRMSG, "unexpected character: '%c'", *port); 716 goto fail; 717 } 718 port++; 719 break; 720 case P_GETNUM: 721 num = strtol(port, (char **)&port, 10); 722 if (num < 0 || num >= NETMAP_RING_MASK) { 723 snprintf(errmsg, MAXERRMSG, "'%ld' out of range [0, %d)", 724 num, NETMAP_RING_MASK); 725 goto fail; 726 } 727 nr_ringid = num & NETMAP_RING_MASK; 728 p_state = P_RNGSFXOK; 729 break; 730 case P_FLAGS: 731 case P_FLAGSOK: 732 if (*port == '@') { 733 port++; 734 p_state = P_MEMID; 735 break; 736 } 737 switch (*port) { 738 case 'x': 739 nr_flags |= NR_EXCLUSIVE; 740 break; 741 case 'z': 742 nr_flags |= NR_ZCOPY_MON; 743 break; 744 case 't': 745 nr_flags |= NR_MONITOR_TX; 746 break; 747 case 'r': 748 nr_flags |= NR_MONITOR_RX; 749 break; 750 case 'R': 751 nr_flags |= NR_RX_RINGS_ONLY; 752 break; 753 case 'T': 754 nr_flags |= NR_TX_RINGS_ONLY; 755 break; 756 default: 757 snprintf(errmsg, MAXERRMSG, "unrecognized flag: '%c'", *port); 758 goto fail; 759 } 760 port++; 761 p_state = P_FLAGSOK; 762 break; 763 case P_MEMID: 764 if (nr_arg2 != 0) { 765 snprintf(errmsg, MAXERRMSG, "double setting of memid"); 766 goto fail; 767 } 768 num = strtol(port, (char **)&port, 10); 769 if (num <= 0) { 770 snprintf(errmsg, MAXERRMSG, "invalid memid %ld, must be >0", num); 771 goto fail; 772 } 773 nr_arg2 = num; 774 p_state = P_RNGSFXOK; 775 break; 776 } 777 } 778 if (p_state != P_START && p_state != P_RNGSFXOK && p_state != P_FLAGSOK) { 779 snprintf(errmsg, MAXERRMSG, "unexpected end of port name"); 780 goto fail; 781 } 782 ND("flags: %s %s %s %s", 783 (nr_flags & NR_EXCLUSIVE) ? "EXCLUSIVE" : "", 784 (nr_flags & NR_ZCOPY_MON) ? "ZCOPY_MON" : "", 785 (nr_flags & NR_MONITOR_TX) ? "MONITOR_TX" : "", 786 (nr_flags & NR_MONITOR_RX) ? "MONITOR_RX" : ""); 787 788 d->req.nr_flags |= nr_flags; 789 d->req.nr_ringid |= nr_ringid; 790 d->req.nr_arg2 = nr_arg2; 791 792 d->self = d; 793 794 return 0; 795 fail: 796 if (!errno) 797 errno = EINVAL; 798 if (err) 799 strncpy(err, errmsg, MAXERRMSG); 800 return -1; 801 } 802 803 /* 804 * Try to open, return descriptor if successful, NULL otherwise. 805 * An invalid netmap name will return errno = 0; 806 * You can pass a pointer to a pre-filled nm_desc to add special 807 * parameters. Flags is used as follows 808 * NM_OPEN_NO_MMAP use the memory from arg, only XXX avoid mmap 809 * if the nr_arg2 (memory block) matches. 810 * NM_OPEN_ARG1 use req.nr_arg1 from arg 811 * NM_OPEN_ARG2 use req.nr_arg2 from arg 812 * NM_OPEN_RING_CFG user ring config from arg 813 */ 814 static struct nm_desc * 815 nm_open(const char *ifname, const struct nmreq *req, 816 uint64_t new_flags, const struct nm_desc *arg) 817 { 818 struct nm_desc *d = NULL; 819 const struct nm_desc *parent = arg; 820 char errmsg[MAXERRMSG] = ""; 821 uint32_t nr_reg; 822 823 if (strncmp(ifname, "netmap:", 7) && 824 strncmp(ifname, NM_BDG_NAME, strlen(NM_BDG_NAME))) { 825 errno = 0; /* name not recognised, not an error */ 826 return NULL; 827 } 828 829 d = (struct nm_desc *)calloc(1, sizeof(*d)); 830 if (d == NULL) { 831 snprintf(errmsg, MAXERRMSG, "nm_desc alloc failure"); 832 errno = ENOMEM; 833 return NULL; 834 } 835 d->self = d; /* set this early so nm_close() works */ 836 d->fd = open(NETMAP_DEVICE_NAME, O_RDWR); 837 if (d->fd < 0) { 838 snprintf(errmsg, MAXERRMSG, "cannot open /dev/netmap: %s", strerror(errno)); 839 goto fail; 840 } 841 842 if (req) 843 d->req = *req; 844 845 if (!(new_flags & NM_OPEN_IFNAME)) { 846 if (nm_parse(ifname, d, errmsg) < 0) 847 goto fail; 848 } 849 850 d->req.nr_version = NETMAP_API; 851 d->req.nr_ringid &= NETMAP_RING_MASK; 852 853 /* optionally import info from parent */ 854 if (IS_NETMAP_DESC(parent) && new_flags) { 855 if (new_flags & NM_OPEN_ARG1) 856 D("overriding ARG1 %d", parent->req.nr_arg1); 857 d->req.nr_arg1 = new_flags & NM_OPEN_ARG1 ? 858 parent->req.nr_arg1 : 4; 859 if (new_flags & NM_OPEN_ARG2) { 860 D("overriding ARG2 %d", parent->req.nr_arg2); 861 d->req.nr_arg2 = parent->req.nr_arg2; 862 } 863 if (new_flags & NM_OPEN_ARG3) 864 D("overriding ARG3 %d", parent->req.nr_arg3); 865 d->req.nr_arg3 = new_flags & NM_OPEN_ARG3 ? 866 parent->req.nr_arg3 : 0; 867 if (new_flags & NM_OPEN_RING_CFG) { 868 D("overriding RING_CFG"); 869 d->req.nr_tx_slots = parent->req.nr_tx_slots; 870 d->req.nr_rx_slots = parent->req.nr_rx_slots; 871 d->req.nr_tx_rings = parent->req.nr_tx_rings; 872 d->req.nr_rx_rings = parent->req.nr_rx_rings; 873 } 874 if (new_flags & NM_OPEN_IFNAME) { 875 D("overriding ifname %s ringid 0x%x flags 0x%x", 876 parent->req.nr_name, parent->req.nr_ringid, 877 parent->req.nr_flags); 878 memcpy(d->req.nr_name, parent->req.nr_name, 879 sizeof(d->req.nr_name)); 880 d->req.nr_ringid = parent->req.nr_ringid; 881 d->req.nr_flags = parent->req.nr_flags; 882 } 883 } 884 /* add the *XPOLL flags */ 885 d->req.nr_ringid |= new_flags & (NETMAP_NO_TX_POLL | NETMAP_DO_RX_POLL); 886 887 if (ioctl(d->fd, NIOCREGIF, &d->req)) { 888 snprintf(errmsg, MAXERRMSG, "NIOCREGIF failed: %s", strerror(errno)); 889 goto fail; 890 } 891 892 nr_reg = d->req.nr_flags & NR_REG_MASK; 893 894 if (nr_reg == NR_REG_SW) { /* host stack */ 895 d->first_tx_ring = d->last_tx_ring = d->req.nr_tx_rings; 896 d->first_rx_ring = d->last_rx_ring = d->req.nr_rx_rings; 897 } else if (nr_reg == NR_REG_ALL_NIC) { /* only nic */ 898 d->first_tx_ring = 0; 899 d->first_rx_ring = 0; 900 d->last_tx_ring = d->req.nr_tx_rings - 1; 901 d->last_rx_ring = d->req.nr_rx_rings - 1; 902 } else if (nr_reg == NR_REG_NIC_SW) { 903 d->first_tx_ring = 0; 904 d->first_rx_ring = 0; 905 d->last_tx_ring = d->req.nr_tx_rings; 906 d->last_rx_ring = d->req.nr_rx_rings; 907 } else if (nr_reg == NR_REG_ONE_NIC) { 908 /* XXX check validity */ 909 d->first_tx_ring = d->last_tx_ring = 910 d->first_rx_ring = d->last_rx_ring = d->req.nr_ringid & NETMAP_RING_MASK; 911 } else { /* pipes */ 912 d->first_tx_ring = d->last_tx_ring = 0; 913 d->first_rx_ring = d->last_rx_ring = 0; 914 } 915 916 /* if parent is defined, do nm_mmap() even if NM_OPEN_NO_MMAP is set */ 917 if ((!(new_flags & NM_OPEN_NO_MMAP) || parent) && nm_mmap(d, parent)) { 918 snprintf(errmsg, MAXERRMSG, "mmap failed: %s", strerror(errno)); 919 goto fail; 920 } 921 922 923 #ifdef DEBUG_NETMAP_USER 924 { /* debugging code */ 925 int i; 926 927 D("%s tx %d .. %d %d rx %d .. %d %d", ifname, 928 d->first_tx_ring, d->last_tx_ring, d->req.nr_tx_rings, 929 d->first_rx_ring, d->last_rx_ring, d->req.nr_rx_rings); 930 for (i = 0; i <= d->req.nr_tx_rings; i++) { 931 struct netmap_ring *r = NETMAP_TXRING(d->nifp, i); 932 D("TX%d %p h %d c %d t %d", i, r, r->head, r->cur, r->tail); 933 } 934 for (i = 0; i <= d->req.nr_rx_rings; i++) { 935 struct netmap_ring *r = NETMAP_RXRING(d->nifp, i); 936 D("RX%d %p h %d c %d t %d", i, r, r->head, r->cur, r->tail); 937 } 938 } 939 #endif /* debugging */ 940 941 d->cur_tx_ring = d->first_tx_ring; 942 d->cur_rx_ring = d->first_rx_ring; 943 return d; 944 945 fail: 946 nm_close(d); 947 if (errmsg[0]) 948 D("%s %s", errmsg, ifname); 949 if (errno == 0) 950 errno = EINVAL; 951 return NULL; 952 } 953 954 955 static int 956 nm_close(struct nm_desc *d) 957 { 958 /* 959 * ugly trick to avoid unused warnings 960 */ 961 static void *__xxzt[] __attribute__ ((unused)) = 962 { (void *)nm_open, (void *)nm_inject, 963 (void *)nm_dispatch, (void *)nm_nextpkt } ; 964 965 if (d == NULL || d->self != d) 966 return EINVAL; 967 if (d->done_mmap && d->mem) 968 munmap(d->mem, d->memsize); 969 if (d->fd != -1) { 970 close(d->fd); 971 } 972 973 bzero(d, sizeof(*d)); 974 free(d); 975 return 0; 976 } 977 978 979 static int 980 nm_mmap(struct nm_desc *d, const struct nm_desc *parent) 981 { 982 //XXX TODO: check if mmap is already done 983 984 if (IS_NETMAP_DESC(parent) && parent->mem && 985 parent->req.nr_arg2 == d->req.nr_arg2) { 986 /* do not mmap, inherit from parent */ 987 D("do not mmap, inherit from parent"); 988 d->memsize = parent->memsize; 989 d->mem = parent->mem; 990 } else { 991 /* XXX TODO: check if memsize is too large (or there is overflow) */ 992 d->memsize = d->req.nr_memsize; 993 d->mem = mmap(0, d->memsize, PROT_WRITE | PROT_READ, MAP_SHARED, 994 d->fd, 0); 995 if (d->mem == MAP_FAILED) { 996 goto fail; 997 } 998 d->done_mmap = 1; 999 } 1000 { 1001 struct netmap_if *nifp = NETMAP_IF(d->mem, d->req.nr_offset); 1002 struct netmap_ring *r = NETMAP_RXRING(nifp, d->first_rx_ring); 1003 if ((void *)r == (void *)nifp) { 1004 /* the descriptor is open for TX only */ 1005 r = NETMAP_TXRING(nifp, d->first_tx_ring); 1006 } 1007 1008 *(struct netmap_if **)(uintptr_t)&(d->nifp) = nifp; 1009 *(struct netmap_ring **)(uintptr_t)&d->some_ring = r; 1010 *(void **)(uintptr_t)&d->buf_start = NETMAP_BUF(r, 0); 1011 *(void **)(uintptr_t)&d->buf_end = 1012 (char *)d->mem + d->memsize; 1013 } 1014 1015 return 0; 1016 1017 fail: 1018 return EINVAL; 1019 } 1020 1021 /* 1022 * Same prototype as pcap_inject(), only need to cast. 1023 */ 1024 static int 1025 nm_inject(struct nm_desc *d, const void *buf, size_t size) 1026 { 1027 u_int c, n = d->last_tx_ring - d->first_tx_ring + 1, 1028 ri = d->cur_tx_ring; 1029 1030 for (c = 0; c < n ; c++, ri++) { 1031 /* compute current ring to use */ 1032 struct netmap_ring *ring; 1033 uint32_t i, j, idx; 1034 size_t rem; 1035 1036 if (ri > d->last_tx_ring) 1037 ri = d->first_tx_ring; 1038 ring = NETMAP_TXRING(d->nifp, ri); 1039 rem = size; 1040 j = ring->cur; 1041 while (rem > ring->nr_buf_size && j != ring->tail) { 1042 rem -= ring->nr_buf_size; 1043 j = nm_ring_next(ring, j); 1044 } 1045 if (j == ring->tail && rem > 0) 1046 continue; 1047 i = ring->cur; 1048 while (i != j) { 1049 idx = ring->slot[i].buf_idx; 1050 ring->slot[i].len = ring->nr_buf_size; 1051 ring->slot[i].flags = NS_MOREFRAG; 1052 nm_pkt_copy(buf, NETMAP_BUF(ring, idx), ring->nr_buf_size); 1053 i = nm_ring_next(ring, i); 1054 buf = (char *)buf + ring->nr_buf_size; 1055 } 1056 idx = ring->slot[i].buf_idx; 1057 ring->slot[i].len = rem; 1058 ring->slot[i].flags = 0; 1059 nm_pkt_copy(buf, NETMAP_BUF(ring, idx), rem); 1060 ring->head = ring->cur = nm_ring_next(ring, i); 1061 d->cur_tx_ring = ri; 1062 return size; 1063 } 1064 return 0; /* fail */ 1065 } 1066 1067 1068 /* 1069 * Same prototype as pcap_dispatch(), only need to cast. 1070 */ 1071 static int 1072 nm_dispatch(struct nm_desc *d, int cnt, nm_cb_t cb, u_char *arg) 1073 { 1074 int n = d->last_rx_ring - d->first_rx_ring + 1; 1075 int c, got = 0, ri = d->cur_rx_ring; 1076 d->hdr.buf = NULL; 1077 d->hdr.flags = NM_MORE_PKTS; 1078 d->hdr.d = d; 1079 1080 if (cnt == 0) 1081 cnt = -1; 1082 /* cnt == -1 means infinite, but rings have a finite amount 1083 * of buffers and the int is large enough that we never wrap, 1084 * so we can omit checking for -1 1085 */ 1086 for (c=0; c < n && cnt != got; c++, ri++) { 1087 /* compute current ring to use */ 1088 struct netmap_ring *ring; 1089 1090 if (ri > d->last_rx_ring) 1091 ri = d->first_rx_ring; 1092 ring = NETMAP_RXRING(d->nifp, ri); 1093 for ( ; !nm_ring_empty(ring) && cnt != got; got++) { 1094 u_int idx, i; 1095 u_char *oldbuf; 1096 struct netmap_slot *slot; 1097 if (d->hdr.buf) { /* from previous round */ 1098 cb(arg, &d->hdr, d->hdr.buf); 1099 } 1100 i = ring->cur; 1101 slot = &ring->slot[i]; 1102 idx = slot->buf_idx; 1103 /* d->cur_rx_ring doesn't change inside this loop, but 1104 * set it here, so it reflects d->hdr.buf's ring */ 1105 d->cur_rx_ring = ri; 1106 d->hdr.slot = slot; 1107 oldbuf = d->hdr.buf = (u_char *)NETMAP_BUF(ring, idx); 1108 // __builtin_prefetch(buf); 1109 d->hdr.len = d->hdr.caplen = slot->len; 1110 while (slot->flags & NS_MOREFRAG) { 1111 u_char *nbuf; 1112 u_int oldlen = slot->len; 1113 i = nm_ring_next(ring, i); 1114 slot = &ring->slot[i]; 1115 d->hdr.len += slot->len; 1116 nbuf = (u_char *)NETMAP_BUF(ring, slot->buf_idx); 1117 if (oldbuf != NULL && nbuf - oldbuf == ring->nr_buf_size && 1118 oldlen == ring->nr_buf_size) { 1119 d->hdr.caplen += slot->len; 1120 oldbuf = nbuf; 1121 } else { 1122 oldbuf = NULL; 1123 } 1124 } 1125 d->hdr.ts = ring->ts; 1126 ring->head = ring->cur = nm_ring_next(ring, i); 1127 } 1128 } 1129 if (d->hdr.buf) { /* from previous round */ 1130 d->hdr.flags = 0; 1131 cb(arg, &d->hdr, d->hdr.buf); 1132 } 1133 return got; 1134 } 1135 1136 static u_char * 1137 nm_nextpkt(struct nm_desc *d, struct nm_pkthdr *hdr) 1138 { 1139 int ri = d->cur_rx_ring; 1140 1141 do { 1142 /* compute current ring to use */ 1143 struct netmap_ring *ring = NETMAP_RXRING(d->nifp, ri); 1144 if (!nm_ring_empty(ring)) { 1145 u_int i = ring->cur; 1146 u_int idx = ring->slot[i].buf_idx; 1147 u_char *buf = (u_char *)NETMAP_BUF(ring, idx); 1148 1149 // __builtin_prefetch(buf); 1150 hdr->ts = ring->ts; 1151 hdr->len = hdr->caplen = ring->slot[i].len; 1152 ring->cur = nm_ring_next(ring, i); 1153 /* we could postpone advancing head if we want 1154 * to hold the buffer. This can be supported in 1155 * the future. 1156 */ 1157 ring->head = ring->cur; 1158 d->cur_rx_ring = ri; 1159 return buf; 1160 } 1161 ri++; 1162 if (ri > d->last_rx_ring) 1163 ri = d->first_rx_ring; 1164 } while (ri != d->cur_rx_ring); 1165 return NULL; /* nothing found */ 1166 } 1167 1168 #endif /* !HAVE_NETMAP_WITH_LIBS */ 1169 1170 #endif /* NETMAP_WITH_LIBS */ 1171 1172 #endif /* _NET_NETMAP_USER_H_ */ 1173