1 /*- 2 * SPDX-License-Identifier: BSD-2-Clause-FreeBSD 3 * 4 * Copyright (C) 2011-2016 Universita` di Pisa 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 18 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 20 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 21 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 23 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 27 * SUCH DAMAGE. 28 */ 29 30 /* 31 * $FreeBSD$ 32 * 33 * Functions and macros to manipulate netmap structures and packets 34 * in userspace. See netmap(4) for more information. 35 * 36 * The address of the struct netmap_if, say nifp, is computed from the 37 * value returned from ioctl(.., NIOCREG, ...) and the mmap region: 38 * ioctl(fd, NIOCREG, &req); 39 * mem = mmap(0, ... ); 40 * nifp = NETMAP_IF(mem, req.nr_nifp); 41 * (so simple, we could just do it manually) 42 * 43 * From there: 44 * struct netmap_ring *NETMAP_TXRING(nifp, index) 45 * struct netmap_ring *NETMAP_RXRING(nifp, index) 46 * we can access ring->cur, ring->head, ring->tail, etc. 47 * 48 * ring->slot[i] gives us the i-th slot (we can access 49 * directly len, flags, buf_idx) 50 * 51 * char *buf = NETMAP_BUF(ring, x) returns a pointer to 52 * the buffer numbered x 53 * 54 * All ring indexes (head, cur, tail) should always move forward. 55 * To compute the next index in a circular ring you can use 56 * i = nm_ring_next(ring, i); 57 * 58 * To ease porting apps from pcap to netmap we supply a few fuctions 59 * that can be called to open, close, read and write on netmap in a way 60 * similar to libpcap. Note that the read/write function depend on 61 * an ioctl()/select()/poll() being issued to refill rings or push 62 * packets out. 63 * 64 * In order to use these, include #define NETMAP_WITH_LIBS 65 * in the source file that invokes these functions. 66 */ 67 68 #ifndef _NET_NETMAP_USER_H_ 69 #define _NET_NETMAP_USER_H_ 70 71 #define NETMAP_DEVICE_NAME "/dev/netmap" 72 73 #ifdef __CYGWIN__ 74 /* 75 * we can compile userspace apps with either cygwin or msvc, 76 * and we use _WIN32 to identify windows specific code 77 */ 78 #ifndef _WIN32 79 #define _WIN32 80 #endif /* _WIN32 */ 81 82 #endif /* __CYGWIN__ */ 83 84 #ifdef _WIN32 85 #undef NETMAP_DEVICE_NAME 86 #define NETMAP_DEVICE_NAME "/proc/sys/DosDevices/Global/netmap" 87 #include <windows.h> 88 #include <WinDef.h> 89 #include <sys/cygwin.h> 90 #endif /* _WIN32 */ 91 92 #include <stdint.h> 93 #include <sys/socket.h> /* apple needs sockaddr */ 94 #include <net/if.h> /* IFNAMSIZ */ 95 #include <ctype.h> 96 97 #ifndef likely 98 #define likely(x) __builtin_expect(!!(x), 1) 99 #define unlikely(x) __builtin_expect(!!(x), 0) 100 #endif /* likely and unlikely */ 101 102 #include <net/netmap.h> 103 #include <net/netmap_virt.h> /* nmreq_pointer_get() */ 104 105 /* helper macro */ 106 #define _NETMAP_OFFSET(type, ptr, offset) \ 107 ((type)(void *)((char *)(ptr) + (offset))) 108 109 #define NETMAP_IF(_base, _ofs) _NETMAP_OFFSET(struct netmap_if *, _base, _ofs) 110 111 #define NETMAP_TXRING(nifp, index) _NETMAP_OFFSET(struct netmap_ring *, \ 112 nifp, (nifp)->ring_ofs[index] ) 113 114 #define NETMAP_RXRING(nifp, index) _NETMAP_OFFSET(struct netmap_ring *, \ 115 nifp, (nifp)->ring_ofs[index + (nifp)->ni_tx_rings + 1] ) 116 117 #define NETMAP_BUF(ring, index) \ 118 ((char *)(ring) + (ring)->buf_ofs + ((long)(index)*(ring)->nr_buf_size)) 119 120 #define NETMAP_BUF_IDX(ring, buf) \ 121 ( ((char *)(buf) - ((char *)(ring) + (ring)->buf_ofs) ) / \ 122 (ring)->nr_buf_size ) 123 124 125 static inline uint32_t 126 nm_ring_next(struct netmap_ring *r, uint32_t i) 127 { 128 return ( unlikely(i + 1 == r->num_slots) ? 0 : i + 1); 129 } 130 131 132 /* 133 * Return 1 if we have pending transmissions in the tx ring. 134 * When everything is complete ring->head = ring->tail + 1 (modulo ring size) 135 */ 136 static inline int 137 nm_tx_pending(struct netmap_ring *r) 138 { 139 return nm_ring_next(r, r->tail) != r->head; 140 } 141 142 143 static inline uint32_t 144 nm_ring_space(struct netmap_ring *ring) 145 { 146 int ret = ring->tail - ring->cur; 147 if (ret < 0) 148 ret += ring->num_slots; 149 return ret; 150 } 151 152 153 #ifdef NETMAP_WITH_LIBS 154 /* 155 * Support for simple I/O libraries. 156 * Include other system headers required for compiling this. 157 */ 158 159 #ifndef HAVE_NETMAP_WITH_LIBS 160 #define HAVE_NETMAP_WITH_LIBS 161 162 #include <stdio.h> 163 #include <sys/time.h> 164 #include <sys/mman.h> 165 #include <string.h> /* memset */ 166 #include <sys/ioctl.h> 167 #include <sys/errno.h> /* EINVAL */ 168 #include <fcntl.h> /* O_RDWR */ 169 #include <unistd.h> /* close() */ 170 #include <signal.h> 171 #include <stdlib.h> 172 173 #ifndef ND /* debug macros */ 174 /* debug support */ 175 #define ND(_fmt, ...) do {} while(0) 176 #define D(_fmt, ...) \ 177 do { \ 178 struct timeval _t0; \ 179 gettimeofday(&_t0, NULL); \ 180 fprintf(stderr, "%03d.%06d %s [%d] " _fmt "\n", \ 181 (int)(_t0.tv_sec % 1000), (int)_t0.tv_usec, \ 182 __FUNCTION__, __LINE__, ##__VA_ARGS__); \ 183 } while (0) 184 185 /* Rate limited version of "D", lps indicates how many per second */ 186 #define RD(lps, format, ...) \ 187 do { \ 188 static int __t0, __cnt; \ 189 struct timeval __xxts; \ 190 gettimeofday(&__xxts, NULL); \ 191 if (__t0 != __xxts.tv_sec) { \ 192 __t0 = __xxts.tv_sec; \ 193 __cnt = 0; \ 194 } \ 195 if (__cnt++ < lps) { \ 196 D(format, ##__VA_ARGS__); \ 197 } \ 198 } while (0) 199 #endif 200 201 struct nm_pkthdr { /* first part is the same as pcap_pkthdr */ 202 struct timeval ts; 203 uint32_t caplen; 204 uint32_t len; 205 206 uint64_t flags; /* NM_MORE_PKTS etc */ 207 #define NM_MORE_PKTS 1 208 struct nm_desc *d; 209 struct netmap_slot *slot; 210 uint8_t *buf; 211 }; 212 213 struct nm_stat { /* same as pcap_stat */ 214 u_int ps_recv; 215 u_int ps_drop; 216 u_int ps_ifdrop; 217 #ifdef WIN32 /* XXX or _WIN32 ? */ 218 u_int bs_capt; 219 #endif /* WIN32 */ 220 }; 221 222 #define NM_ERRBUF_SIZE 512 223 224 struct nm_desc { 225 struct nm_desc *self; /* point to self if netmap. */ 226 int fd; 227 void *mem; 228 uint64_t memsize; 229 int done_mmap; /* set if mem is the result of mmap */ 230 struct netmap_if * const nifp; 231 uint16_t first_tx_ring, last_tx_ring, cur_tx_ring; 232 uint16_t first_rx_ring, last_rx_ring, cur_rx_ring; 233 struct nmreq req; /* also contains the nr_name = ifname */ 234 struct nm_pkthdr hdr; 235 236 /* 237 * The memory contains netmap_if, rings and then buffers. 238 * Given a pointer (e.g. to nm_inject) we can compare with 239 * mem/buf_start/buf_end to tell if it is a buffer or 240 * some other descriptor in our region. 241 * We also store a pointer to some ring as it helps in the 242 * translation from buffer indexes to addresses. 243 */ 244 struct netmap_ring * const some_ring; 245 void * const buf_start; 246 void * const buf_end; 247 /* parameters from pcap_open_live */ 248 int snaplen; 249 int promisc; 250 int to_ms; 251 char *errbuf; 252 253 /* save flags so we can restore them on close */ 254 uint32_t if_flags; 255 uint32_t if_reqcap; 256 uint32_t if_curcap; 257 258 struct nm_stat st; 259 char msg[NM_ERRBUF_SIZE]; 260 }; 261 262 /* 263 * when the descriptor is open correctly, d->self == d 264 * Eventually we should also use some magic number. 265 */ 266 #define P2NMD(p) ((struct nm_desc *)(p)) 267 #define IS_NETMAP_DESC(d) ((d) && P2NMD(d)->self == P2NMD(d)) 268 #define NETMAP_FD(d) (P2NMD(d)->fd) 269 270 271 /* 272 * this is a slightly optimized copy routine which rounds 273 * to multiple of 64 bytes and is often faster than dealing 274 * with other odd sizes. We assume there is enough room 275 * in the source and destination buffers. 276 */ 277 static inline void 278 nm_pkt_copy(const void *_src, void *_dst, int l) 279 { 280 const uint64_t *src = (const uint64_t *)_src; 281 uint64_t *dst = (uint64_t *)_dst; 282 283 if (unlikely(l >= 1024 || l % 64)) { 284 memcpy(dst, src, l); 285 return; 286 } 287 for (; likely(l > 0); l-=64) { 288 *dst++ = *src++; 289 *dst++ = *src++; 290 *dst++ = *src++; 291 *dst++ = *src++; 292 *dst++ = *src++; 293 *dst++ = *src++; 294 *dst++ = *src++; 295 *dst++ = *src++; 296 } 297 } 298 299 300 /* 301 * The callback, invoked on each received packet. Same as libpcap 302 */ 303 typedef void (*nm_cb_t)(u_char *, const struct nm_pkthdr *, const u_char *d); 304 305 /* 306 *--- the pcap-like API --- 307 * 308 * nm_open() opens a file descriptor, binds to a port and maps memory. 309 * 310 * ifname (netmap:foo or vale:foo) is the port name 311 * a suffix can indicate the follwing: 312 * ^ bind the host (sw) ring pair 313 * * bind host and NIC ring pairs 314 * -NN bind individual NIC ring pair 315 * {NN bind master side of pipe NN 316 * }NN bind slave side of pipe NN 317 * a suffix starting with / and the following flags, 318 * in any order: 319 * x exclusive access 320 * z zero copy monitor (both tx and rx) 321 * t monitor tx side (copy monitor) 322 * r monitor rx side (copy monitor) 323 * R bind only RX ring(s) 324 * T bind only TX ring(s) 325 * 326 * req provides the initial values of nmreq before parsing ifname. 327 * Remember that the ifname parsing will override the ring 328 * number in nm_ringid, and part of nm_flags; 329 * flags special functions, normally 0 330 * indicates which fields of *arg are significant 331 * arg special functions, normally NULL 332 * if passed a netmap_desc with mem != NULL, 333 * use that memory instead of mmap. 334 */ 335 336 static struct nm_desc *nm_open(const char *ifname, const struct nmreq *req, 337 uint64_t flags, const struct nm_desc *arg); 338 339 /* 340 * nm_open can import some fields from the parent descriptor. 341 * These flags control which ones. 342 * Also in flags you can specify NETMAP_NO_TX_POLL and NETMAP_DO_RX_POLL, 343 * which set the initial value for these flags. 344 * Note that the 16 low bits of the flags are reserved for data 345 * that may go into the nmreq. 346 */ 347 enum { 348 NM_OPEN_NO_MMAP = 0x040000, /* reuse mmap from parent */ 349 NM_OPEN_IFNAME = 0x080000, /* nr_name, nr_ringid, nr_flags */ 350 NM_OPEN_ARG1 = 0x100000, 351 NM_OPEN_ARG2 = 0x200000, 352 NM_OPEN_ARG3 = 0x400000, 353 NM_OPEN_RING_CFG = 0x800000, /* tx|rx rings|slots */ 354 NM_OPEN_EXTMEM = 0x1000000, 355 }; 356 357 358 /* 359 * nm_close() closes and restores the port to its previous state 360 */ 361 362 static int nm_close(struct nm_desc *); 363 364 /* 365 * nm_mmap() do mmap or inherit from parent if the nr_arg2 366 * (memory block) matches. 367 */ 368 369 static int nm_mmap(struct nm_desc *, const struct nm_desc *); 370 371 /* 372 * nm_inject() is the same as pcap_inject() 373 * nm_dispatch() is the same as pcap_dispatch() 374 * nm_nextpkt() is the same as pcap_next() 375 */ 376 377 static int nm_inject(struct nm_desc *, const void *, size_t); 378 static int nm_dispatch(struct nm_desc *, int, nm_cb_t, u_char *); 379 static u_char *nm_nextpkt(struct nm_desc *, struct nm_pkthdr *); 380 381 #ifdef _WIN32 382 383 intptr_t _get_osfhandle(int); /* defined in io.h in windows */ 384 385 /* 386 * In windows we do not have yet native poll support, so we keep track 387 * of file descriptors associated to netmap ports to emulate poll on 388 * them and fall back on regular poll on other file descriptors. 389 */ 390 struct win_netmap_fd_list { 391 struct win_netmap_fd_list *next; 392 int win_netmap_fd; 393 HANDLE win_netmap_handle; 394 }; 395 396 /* 397 * list head containing all the netmap opened fd and their 398 * windows HANDLE counterparts 399 */ 400 static struct win_netmap_fd_list *win_netmap_fd_list_head; 401 402 static void 403 win_insert_fd_record(int fd) 404 { 405 struct win_netmap_fd_list *curr; 406 407 for (curr = win_netmap_fd_list_head; curr; curr = curr->next) { 408 if (fd == curr->win_netmap_fd) { 409 return; 410 } 411 } 412 curr = calloc(1, sizeof(*curr)); 413 curr->next = win_netmap_fd_list_head; 414 curr->win_netmap_fd = fd; 415 curr->win_netmap_handle = IntToPtr(_get_osfhandle(fd)); 416 win_netmap_fd_list_head = curr; 417 } 418 419 void 420 win_remove_fd_record(int fd) 421 { 422 struct win_netmap_fd_list *curr = win_netmap_fd_list_head; 423 struct win_netmap_fd_list *prev = NULL; 424 for (; curr ; prev = curr, curr = curr->next) { 425 if (fd != curr->win_netmap_fd) 426 continue; 427 /* found the entry */ 428 if (prev == NULL) { /* we are freeing the first entry */ 429 win_netmap_fd_list_head = curr->next; 430 } else { 431 prev->next = curr->next; 432 } 433 free(curr); 434 break; 435 } 436 } 437 438 439 HANDLE 440 win_get_netmap_handle(int fd) 441 { 442 struct win_netmap_fd_list *curr; 443 444 for (curr = win_netmap_fd_list_head; curr; curr = curr->next) { 445 if (fd == curr->win_netmap_fd) { 446 return curr->win_netmap_handle; 447 } 448 } 449 return NULL; 450 } 451 452 /* 453 * we need to wrap ioctl and mmap, at least for the netmap file descriptors 454 */ 455 456 /* 457 * use this function only from netmap_user.h internal functions 458 * same as ioctl, returns 0 on success and -1 on error 459 */ 460 static int 461 win_nm_ioctl_internal(HANDLE h, int32_t ctlCode, void *arg) 462 { 463 DWORD bReturn = 0, szIn, szOut; 464 BOOL ioctlReturnStatus; 465 void *inParam = arg, *outParam = arg; 466 467 switch (ctlCode) { 468 case NETMAP_POLL: 469 szIn = sizeof(POLL_REQUEST_DATA); 470 szOut = sizeof(POLL_REQUEST_DATA); 471 break; 472 case NETMAP_MMAP: 473 szIn = 0; 474 szOut = sizeof(void*); 475 inParam = NULL; /* nothing on input */ 476 break; 477 case NIOCTXSYNC: 478 case NIOCRXSYNC: 479 szIn = 0; 480 szOut = 0; 481 break; 482 case NIOCREGIF: 483 szIn = sizeof(struct nmreq); 484 szOut = sizeof(struct nmreq); 485 break; 486 case NIOCCONFIG: 487 D("unsupported NIOCCONFIG!"); 488 return -1; 489 490 default: /* a regular ioctl */ 491 D("invalid ioctl %x on netmap fd", ctlCode); 492 return -1; 493 } 494 495 ioctlReturnStatus = DeviceIoControl(h, 496 ctlCode, inParam, szIn, 497 outParam, szOut, 498 &bReturn, NULL); 499 // XXX note windows returns 0 on error or async call, 1 on success 500 // we could call GetLastError() to figure out what happened 501 return ioctlReturnStatus ? 0 : -1; 502 } 503 504 /* 505 * this function is what must be called from user-space programs 506 * same as ioctl, returns 0 on success and -1 on error 507 */ 508 static int 509 win_nm_ioctl(int fd, int32_t ctlCode, void *arg) 510 { 511 HANDLE h = win_get_netmap_handle(fd); 512 513 if (h == NULL) { 514 return ioctl(fd, ctlCode, arg); 515 } else { 516 return win_nm_ioctl_internal(h, ctlCode, arg); 517 } 518 } 519 520 #define ioctl win_nm_ioctl /* from now on, within this file ... */ 521 522 /* 523 * We cannot use the native mmap on windows 524 * The only parameter used is "fd", the other ones are just declared to 525 * make this signature comparable to the FreeBSD/Linux one 526 */ 527 static void * 528 win32_mmap_emulated(void *addr, size_t length, int prot, int flags, int fd, int32_t offset) 529 { 530 HANDLE h = win_get_netmap_handle(fd); 531 532 if (h == NULL) { 533 return mmap(addr, length, prot, flags, fd, offset); 534 } else { 535 MEMORY_ENTRY ret; 536 537 return win_nm_ioctl_internal(h, NETMAP_MMAP, &ret) ? 538 NULL : ret.pUsermodeVirtualAddress; 539 } 540 } 541 542 #define mmap win32_mmap_emulated 543 544 #include <sys/poll.h> /* XXX needed to use the structure pollfd */ 545 546 static int 547 win_nm_poll(struct pollfd *fds, int nfds, int timeout) 548 { 549 HANDLE h; 550 551 if (nfds != 1 || fds == NULL || (h = win_get_netmap_handle(fds->fd)) == NULL) {; 552 return poll(fds, nfds, timeout); 553 } else { 554 POLL_REQUEST_DATA prd; 555 556 prd.timeout = timeout; 557 prd.events = fds->events; 558 559 win_nm_ioctl_internal(h, NETMAP_POLL, &prd); 560 if ((prd.revents == POLLERR) || (prd.revents == STATUS_TIMEOUT)) { 561 return -1; 562 } 563 return 1; 564 } 565 } 566 567 #define poll win_nm_poll 568 569 static int 570 win_nm_open(char* pathname, int flags) 571 { 572 573 if (strcmp(pathname, NETMAP_DEVICE_NAME) == 0) { 574 int fd = open(NETMAP_DEVICE_NAME, O_RDWR); 575 if (fd < 0) { 576 return -1; 577 } 578 579 win_insert_fd_record(fd); 580 return fd; 581 } else { 582 return open(pathname, flags); 583 } 584 } 585 586 #define open win_nm_open 587 588 static int 589 win_nm_close(int fd) 590 { 591 if (fd != -1) { 592 close(fd); 593 if (win_get_netmap_handle(fd) != NULL) { 594 win_remove_fd_record(fd); 595 } 596 } 597 return 0; 598 } 599 600 #define close win_nm_close 601 602 #endif /* _WIN32 */ 603 604 static int 605 nm_is_identifier(const char *s, const char *e) 606 { 607 for (; s != e; s++) { 608 if (!isalnum(*s) && *s != '_') { 609 return 0; 610 } 611 } 612 613 return 1; 614 } 615 616 static void 617 nm_init_offsets(struct nm_desc *d) 618 { 619 struct netmap_if *nifp = NETMAP_IF(d->mem, d->req.nr_offset); 620 struct netmap_ring *r = NETMAP_RXRING(nifp, d->first_rx_ring); 621 if ((void *)r == (void *)nifp) { 622 /* the descriptor is open for TX only */ 623 r = NETMAP_TXRING(nifp, d->first_tx_ring); 624 } 625 626 *(struct netmap_if **)(uintptr_t)&(d->nifp) = nifp; 627 *(struct netmap_ring **)(uintptr_t)&d->some_ring = r; 628 *(void **)(uintptr_t)&d->buf_start = NETMAP_BUF(r, 0); 629 *(void **)(uintptr_t)&d->buf_end = 630 (char *)d->mem + d->memsize; 631 } 632 633 #define MAXERRMSG 80 634 #define NM_PARSE_OK 0 635 #define NM_PARSE_MEMID 1 636 static int 637 nm_parse_one(const char *ifname, struct nmreq *d, char **out, int memid_allowed) 638 { 639 int is_vale; 640 const char *port = NULL; 641 const char *vpname = NULL; 642 u_int namelen; 643 uint32_t nr_ringid = 0, nr_flags; 644 char errmsg[MAXERRMSG] = ""; 645 long num; 646 uint16_t nr_arg2 = 0; 647 enum { P_START, P_RNGSFXOK, P_GETNUM, P_FLAGS, P_FLAGSOK, P_MEMID } p_state; 648 649 errno = 0; 650 651 if (strncmp(ifname, "netmap:", 7) && 652 strncmp(ifname, NM_BDG_NAME, strlen(NM_BDG_NAME))) { 653 snprintf(errmsg, MAXERRMSG, "invalid port name: %s", ifname); 654 errno = EINVAL; 655 goto fail; 656 } 657 658 is_vale = (ifname[0] == 'v'); 659 if (is_vale) { 660 port = index(ifname, ':'); 661 if (port == NULL) { 662 snprintf(errmsg, MAXERRMSG, 663 "missing ':' in vale name"); 664 goto fail; 665 } 666 667 if (!nm_is_identifier(ifname + 4, port)) { 668 snprintf(errmsg, MAXERRMSG, "invalid bridge name"); 669 goto fail; 670 } 671 672 vpname = ++port; 673 } else { 674 ifname += 7; 675 port = ifname; 676 } 677 678 /* scan for a separator */ 679 for (; *port && !index("-*^{}/@", *port); port++) 680 ; 681 682 if (is_vale && !nm_is_identifier(vpname, port)) { 683 snprintf(errmsg, MAXERRMSG, "invalid bridge port name"); 684 goto fail; 685 } 686 687 namelen = port - ifname; 688 if (namelen >= sizeof(d->nr_name)) { 689 snprintf(errmsg, MAXERRMSG, "name too long"); 690 goto fail; 691 } 692 memcpy(d->nr_name, ifname, namelen); 693 d->nr_name[namelen] = '\0'; 694 D("name %s", d->nr_name); 695 696 p_state = P_START; 697 nr_flags = NR_REG_ALL_NIC; /* default for no suffix */ 698 while (*port) { 699 switch (p_state) { 700 case P_START: 701 switch (*port) { 702 case '^': /* only SW ring */ 703 nr_flags = NR_REG_SW; 704 p_state = P_RNGSFXOK; 705 break; 706 case '*': /* NIC and SW */ 707 nr_flags = NR_REG_NIC_SW; 708 p_state = P_RNGSFXOK; 709 break; 710 case '-': /* one NIC ring pair */ 711 nr_flags = NR_REG_ONE_NIC; 712 p_state = P_GETNUM; 713 break; 714 case '{': /* pipe (master endpoint) */ 715 nr_flags = NR_REG_PIPE_MASTER; 716 p_state = P_GETNUM; 717 break; 718 case '}': /* pipe (slave endoint) */ 719 nr_flags = NR_REG_PIPE_SLAVE; 720 p_state = P_GETNUM; 721 break; 722 case '/': /* start of flags */ 723 p_state = P_FLAGS; 724 break; 725 case '@': /* start of memid */ 726 p_state = P_MEMID; 727 break; 728 default: 729 snprintf(errmsg, MAXERRMSG, "unknown modifier: '%c'", *port); 730 goto fail; 731 } 732 port++; 733 break; 734 case P_RNGSFXOK: 735 switch (*port) { 736 case '/': 737 p_state = P_FLAGS; 738 break; 739 case '@': 740 p_state = P_MEMID; 741 break; 742 default: 743 snprintf(errmsg, MAXERRMSG, "unexpected character: '%c'", *port); 744 goto fail; 745 } 746 port++; 747 break; 748 case P_GETNUM: 749 num = strtol(port, (char **)&port, 10); 750 if (num < 0 || num >= NETMAP_RING_MASK) { 751 snprintf(errmsg, MAXERRMSG, "'%ld' out of range [0, %d)", 752 num, NETMAP_RING_MASK); 753 goto fail; 754 } 755 nr_ringid = num & NETMAP_RING_MASK; 756 p_state = P_RNGSFXOK; 757 break; 758 case P_FLAGS: 759 case P_FLAGSOK: 760 if (*port == '@') { 761 port++; 762 p_state = P_MEMID; 763 break; 764 } 765 switch (*port) { 766 case 'x': 767 nr_flags |= NR_EXCLUSIVE; 768 break; 769 case 'z': 770 nr_flags |= NR_ZCOPY_MON; 771 break; 772 case 't': 773 nr_flags |= NR_MONITOR_TX; 774 break; 775 case 'r': 776 nr_flags |= NR_MONITOR_RX; 777 break; 778 case 'R': 779 nr_flags |= NR_RX_RINGS_ONLY; 780 break; 781 case 'T': 782 nr_flags |= NR_TX_RINGS_ONLY; 783 break; 784 default: 785 snprintf(errmsg, MAXERRMSG, "unrecognized flag: '%c'", *port); 786 goto fail; 787 } 788 port++; 789 p_state = P_FLAGSOK; 790 break; 791 case P_MEMID: 792 if (!memid_allowed) { 793 snprintf(errmsg, MAXERRMSG, "double setting of memid"); 794 goto fail; 795 } 796 num = strtol(port, (char **)&port, 10); 797 if (num <= 0) { 798 ND("non-numeric memid %s (out = %p)", port, out); 799 if (out == NULL) 800 goto fail; 801 *out = (char *)port; 802 while (*port) 803 port++; 804 } else { 805 nr_arg2 = num; 806 memid_allowed = 0; 807 p_state = P_RNGSFXOK; 808 } 809 break; 810 } 811 } 812 if (p_state != P_START && p_state != P_RNGSFXOK && 813 p_state != P_FLAGSOK && p_state != P_MEMID) { 814 snprintf(errmsg, MAXERRMSG, "unexpected end of port name"); 815 goto fail; 816 } 817 ND("flags: %s %s %s %s", 818 (nr_flags & NR_EXCLUSIVE) ? "EXCLUSIVE" : "", 819 (nr_flags & NR_ZCOPY_MON) ? "ZCOPY_MON" : "", 820 (nr_flags & NR_MONITOR_TX) ? "MONITOR_TX" : "", 821 (nr_flags & NR_MONITOR_RX) ? "MONITOR_RX" : ""); 822 823 d->nr_flags |= nr_flags; 824 d->nr_ringid |= nr_ringid; 825 d->nr_arg2 = nr_arg2; 826 827 return (p_state == P_MEMID) ? NM_PARSE_MEMID : NM_PARSE_OK; 828 fail: 829 if (!errno) 830 errno = EINVAL; 831 if (out) 832 *out = strdup(errmsg); 833 return -1; 834 } 835 836 static int 837 nm_interp_memid(const char *memid, struct nmreq *req, char **err) 838 { 839 int fd = -1; 840 char errmsg[MAXERRMSG] = ""; 841 struct nmreq greq; 842 off_t mapsize; 843 struct netmap_pools_info *pi; 844 845 /* first, try to look for a netmap port with this name */ 846 fd = open("/dev/netmap", O_RDONLY); 847 if (fd < 0) { 848 snprintf(errmsg, MAXERRMSG, "cannot open /dev/netmap: %s", strerror(errno)); 849 goto fail; 850 } 851 memset(&greq, 0, sizeof(greq)); 852 if (nm_parse_one(memid, &greq, err, 0) == NM_PARSE_OK) { 853 greq.nr_version = NETMAP_API; 854 if (ioctl(fd, NIOCGINFO, &greq) < 0) { 855 if (errno == ENOENT || errno == ENXIO) 856 goto try_external; 857 snprintf(errmsg, MAXERRMSG, "cannot getinfo for %s: %s", memid, strerror(errno)); 858 goto fail; 859 } 860 req->nr_arg2 = greq.nr_arg2; 861 close(fd); 862 return 0; 863 } 864 try_external: 865 D("trying with external memory"); 866 close(fd); 867 fd = open(memid, O_RDWR); 868 if (fd < 0) { 869 snprintf(errmsg, MAXERRMSG, "cannot open %s: %s", memid, strerror(errno)); 870 goto fail; 871 } 872 mapsize = lseek(fd, 0, SEEK_END); 873 if (mapsize < 0) { 874 snprintf(errmsg, MAXERRMSG, "failed to obtain filesize of %s: %s", memid, strerror(errno)); 875 goto fail; 876 } 877 pi = mmap(0, mapsize, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0); 878 if (pi == MAP_FAILED) { 879 snprintf(errmsg, MAXERRMSG, "cannot map %s: %s", memid, strerror(errno)); 880 goto fail; 881 } 882 req->nr_cmd = NETMAP_POOLS_CREATE; 883 pi->memsize = mapsize; 884 nmreq_pointer_put(req, pi); 885 D("mapped %zu bytes at %p from file %s", mapsize, pi, memid); 886 return 0; 887 888 fail: 889 D("%s", errmsg); 890 close(fd); 891 if (err && !*err) 892 *err = strdup(errmsg); 893 return errno; 894 } 895 896 static int 897 nm_parse(const char *ifname, struct nm_desc *d, char *errmsg) 898 { 899 char *err; 900 switch (nm_parse_one(ifname, &d->req, &err, 1)) { 901 case NM_PARSE_OK: 902 D("parse OK"); 903 break; 904 case NM_PARSE_MEMID: 905 D("memid: %s", err); 906 errno = nm_interp_memid(err, &d->req, &err); 907 D("errno = %d", errno); 908 if (!errno) 909 break; 910 /* fallthrough */ 911 default: 912 D("error"); 913 strncpy(errmsg, err, MAXERRMSG); 914 errmsg[MAXERRMSG-1] = '\0'; 915 free(err); 916 return -1; 917 } 918 D("parsed name: %s", d->req.nr_name); 919 d->self = d; 920 return 0; 921 } 922 923 /* 924 * Try to open, return descriptor if successful, NULL otherwise. 925 * An invalid netmap name will return errno = 0; 926 * You can pass a pointer to a pre-filled nm_desc to add special 927 * parameters. Flags is used as follows 928 * NM_OPEN_NO_MMAP use the memory from arg, only 929 * if the nr_arg2 (memory block) matches. 930 * Special case: if arg is NULL, skip the 931 * mmap entirely (maybe because you are going 932 * to do it by yourself, or you plan to call 933 * nm_mmap() only later) 934 * NM_OPEN_ARG1 use req.nr_arg1 from arg 935 * NM_OPEN_ARG2 use req.nr_arg2 from arg 936 * NM_OPEN_RING_CFG user ring config from arg 937 */ 938 static struct nm_desc * 939 nm_open(const char *ifname, const struct nmreq *req, 940 uint64_t new_flags, const struct nm_desc *arg) 941 { 942 struct nm_desc *d = NULL; 943 const struct nm_desc *parent = arg; 944 char errmsg[MAXERRMSG] = ""; 945 uint32_t nr_reg; 946 struct netmap_pools_info *pi = NULL; 947 948 if (strncmp(ifname, "netmap:", 7) && 949 strncmp(ifname, NM_BDG_NAME, strlen(NM_BDG_NAME))) { 950 errno = 0; /* name not recognised, not an error */ 951 return NULL; 952 } 953 954 d = (struct nm_desc *)calloc(1, sizeof(*d)); 955 if (d == NULL) { 956 snprintf(errmsg, MAXERRMSG, "nm_desc alloc failure"); 957 errno = ENOMEM; 958 return NULL; 959 } 960 d->self = d; /* set this early so nm_close() works */ 961 d->fd = open(NETMAP_DEVICE_NAME, O_RDWR); 962 if (d->fd < 0) { 963 snprintf(errmsg, MAXERRMSG, "cannot open /dev/netmap: %s", strerror(errno)); 964 goto fail; 965 } 966 967 if (req) { 968 d->req = *req; 969 } else { 970 d->req.nr_arg1 = 4; 971 d->req.nr_arg2 = 0; 972 d->req.nr_arg3 = 0; 973 } 974 975 if (!(new_flags & NM_OPEN_IFNAME)) { 976 char *err; 977 switch (nm_parse_one(ifname, &d->req, &err, 1)) { 978 case NM_PARSE_OK: 979 break; 980 case NM_PARSE_MEMID: 981 if ((new_flags & NM_OPEN_NO_MMAP) && 982 IS_NETMAP_DESC(parent)) { 983 /* ignore the memid setting, since we are 984 * going to use the parent's one 985 */ 986 break; 987 } 988 errno = nm_interp_memid(err, &d->req, &err); 989 if (!errno) 990 break; 991 /* fallthrough */ 992 default: 993 strncpy(errmsg, err, MAXERRMSG); 994 errmsg[MAXERRMSG-1] = '\0'; 995 free(err); 996 goto fail; 997 } 998 d->self = d; 999 } 1000 1001 /* compatibility checks for POOL_SCREATE and NM_OPEN flags 1002 * the first check may be dropped once we have a larger nreq 1003 */ 1004 if (d->req.nr_cmd == NETMAP_POOLS_CREATE) { 1005 if (IS_NETMAP_DESC(parent)) { 1006 if (new_flags & (NM_OPEN_ARG1 | NM_OPEN_ARG2 | NM_OPEN_ARG3)) { 1007 snprintf(errmsg, MAXERRMSG, 1008 "POOLS_CREATE is incompatibile " 1009 "with NM_OPEN_ARG? flags"); 1010 errno = EINVAL; 1011 goto fail; 1012 } 1013 if (new_flags & NM_OPEN_NO_MMAP) { 1014 snprintf(errmsg, MAXERRMSG, 1015 "POOLS_CREATE is incompatible " 1016 "with NM_OPEN_NO_MMAP flag"); 1017 errno = EINVAL; 1018 goto fail; 1019 } 1020 } 1021 } 1022 1023 d->req.nr_version = NETMAP_API; 1024 d->req.nr_ringid &= NETMAP_RING_MASK; 1025 1026 /* optionally import info from parent */ 1027 if (IS_NETMAP_DESC(parent) && new_flags) { 1028 if (new_flags & NM_OPEN_EXTMEM) { 1029 if (parent->req.nr_cmd == NETMAP_POOLS_CREATE) { 1030 d->req.nr_cmd = NETMAP_POOLS_CREATE; 1031 nmreq_pointer_put(&d->req, nmreq_pointer_get(&parent->req)); 1032 D("Warning: not overriding arg[1-3] since external memory is being used"); 1033 new_flags &= ~(NM_OPEN_ARG1 | NM_OPEN_ARG2 | NM_OPEN_ARG3); 1034 } 1035 } 1036 if (new_flags & NM_OPEN_ARG1) { 1037 D("overriding ARG1 %d", parent->req.nr_arg1); 1038 d->req.nr_arg1 = parent->req.nr_arg1; 1039 } 1040 if (new_flags & (NM_OPEN_ARG2 | NM_OPEN_NO_MMAP)) { 1041 D("overriding ARG2 %d", parent->req.nr_arg2); 1042 d->req.nr_arg2 = parent->req.nr_arg2; 1043 } 1044 if (new_flags & NM_OPEN_ARG3) { 1045 D("overriding ARG3 %d", parent->req.nr_arg3); 1046 d->req.nr_arg3 = parent->req.nr_arg3; 1047 } 1048 if (new_flags & NM_OPEN_RING_CFG) { 1049 D("overriding RING_CFG"); 1050 d->req.nr_tx_slots = parent->req.nr_tx_slots; 1051 d->req.nr_rx_slots = parent->req.nr_rx_slots; 1052 d->req.nr_tx_rings = parent->req.nr_tx_rings; 1053 d->req.nr_rx_rings = parent->req.nr_rx_rings; 1054 } 1055 if (new_flags & NM_OPEN_IFNAME) { 1056 D("overriding ifname %s ringid 0x%x flags 0x%x", 1057 parent->req.nr_name, parent->req.nr_ringid, 1058 parent->req.nr_flags); 1059 memcpy(d->req.nr_name, parent->req.nr_name, 1060 sizeof(d->req.nr_name)); 1061 d->req.nr_ringid = parent->req.nr_ringid; 1062 d->req.nr_flags = parent->req.nr_flags; 1063 } 1064 } 1065 /* add the *XPOLL flags */ 1066 d->req.nr_ringid |= new_flags & (NETMAP_NO_TX_POLL | NETMAP_DO_RX_POLL); 1067 1068 if (d->req.nr_cmd == NETMAP_POOLS_CREATE) { 1069 pi = nmreq_pointer_get(&d->req); 1070 } 1071 1072 if (ioctl(d->fd, NIOCREGIF, &d->req)) { 1073 snprintf(errmsg, MAXERRMSG, "NIOCREGIF failed: %s", strerror(errno)); 1074 goto fail; 1075 } 1076 1077 if (pi != NULL) { 1078 d->mem = pi; 1079 d->memsize = pi->memsize; 1080 nm_init_offsets(d); 1081 } else if ((!(new_flags & NM_OPEN_NO_MMAP) || parent)) { 1082 /* if parent is defined, do nm_mmap() even if NM_OPEN_NO_MMAP is set */ 1083 errno = nm_mmap(d, parent); 1084 if (errno) { 1085 snprintf(errmsg, MAXERRMSG, "mmap failed: %s", strerror(errno)); 1086 goto fail; 1087 } 1088 } 1089 1090 nr_reg = d->req.nr_flags & NR_REG_MASK; 1091 1092 if (nr_reg == NR_REG_SW) { /* host stack */ 1093 d->first_tx_ring = d->last_tx_ring = d->req.nr_tx_rings; 1094 d->first_rx_ring = d->last_rx_ring = d->req.nr_rx_rings; 1095 } else if (nr_reg == NR_REG_ALL_NIC) { /* only nic */ 1096 d->first_tx_ring = 0; 1097 d->first_rx_ring = 0; 1098 d->last_tx_ring = d->req.nr_tx_rings - 1; 1099 d->last_rx_ring = d->req.nr_rx_rings - 1; 1100 } else if (nr_reg == NR_REG_NIC_SW) { 1101 d->first_tx_ring = 0; 1102 d->first_rx_ring = 0; 1103 d->last_tx_ring = d->req.nr_tx_rings; 1104 d->last_rx_ring = d->req.nr_rx_rings; 1105 } else if (nr_reg == NR_REG_ONE_NIC) { 1106 /* XXX check validity */ 1107 d->first_tx_ring = d->last_tx_ring = 1108 d->first_rx_ring = d->last_rx_ring = d->req.nr_ringid & NETMAP_RING_MASK; 1109 } else { /* pipes */ 1110 d->first_tx_ring = d->last_tx_ring = 0; 1111 d->first_rx_ring = d->last_rx_ring = 0; 1112 } 1113 1114 #ifdef DEBUG_NETMAP_USER 1115 { /* debugging code */ 1116 int i; 1117 1118 D("%s tx %d .. %d %d rx %d .. %d %d", ifname, 1119 d->first_tx_ring, d->last_tx_ring, d->req.nr_tx_rings, 1120 d->first_rx_ring, d->last_rx_ring, d->req.nr_rx_rings); 1121 for (i = 0; i <= d->req.nr_tx_rings; i++) { 1122 struct netmap_ring *r = NETMAP_TXRING(d->nifp, i); 1123 D("TX%d %p h %d c %d t %d", i, r, r->head, r->cur, r->tail); 1124 } 1125 for (i = 0; i <= d->req.nr_rx_rings; i++) { 1126 struct netmap_ring *r = NETMAP_RXRING(d->nifp, i); 1127 D("RX%d %p h %d c %d t %d", i, r, r->head, r->cur, r->tail); 1128 } 1129 } 1130 #endif /* debugging */ 1131 1132 d->cur_tx_ring = d->first_tx_ring; 1133 d->cur_rx_ring = d->first_rx_ring; 1134 return d; 1135 1136 fail: 1137 nm_close(d); 1138 if (errmsg[0]) 1139 D("%s %s", errmsg, ifname); 1140 if (errno == 0) 1141 errno = EINVAL; 1142 return NULL; 1143 } 1144 1145 1146 static int 1147 nm_close(struct nm_desc *d) 1148 { 1149 /* 1150 * ugly trick to avoid unused warnings 1151 */ 1152 static void *__xxzt[] __attribute__ ((unused)) = 1153 { (void *)nm_open, (void *)nm_inject, 1154 (void *)nm_dispatch, (void *)nm_nextpkt, 1155 (void *)nm_parse } ; 1156 1157 if (d == NULL || d->self != d) 1158 return EINVAL; 1159 if (d->done_mmap && d->mem) 1160 munmap(d->mem, d->memsize); 1161 if (d->fd != -1) { 1162 close(d->fd); 1163 } 1164 1165 bzero(d, sizeof(*d)); 1166 free(d); 1167 return 0; 1168 } 1169 1170 1171 static int 1172 nm_mmap(struct nm_desc *d, const struct nm_desc *parent) 1173 { 1174 //XXX TODO: check if mmap is already done 1175 1176 if (IS_NETMAP_DESC(parent) && parent->mem && 1177 parent->req.nr_arg2 == d->req.nr_arg2) { 1178 /* do not mmap, inherit from parent */ 1179 D("do not mmap, inherit from parent"); 1180 d->memsize = parent->memsize; 1181 d->mem = parent->mem; 1182 } else { 1183 /* XXX TODO: check if memsize is too large (or there is overflow) */ 1184 d->memsize = d->req.nr_memsize; 1185 d->mem = mmap(0, d->memsize, PROT_WRITE | PROT_READ, MAP_SHARED, 1186 d->fd, 0); 1187 if (d->mem == MAP_FAILED) { 1188 goto fail; 1189 } 1190 d->done_mmap = 1; 1191 } 1192 1193 nm_init_offsets(d); 1194 return 0; 1195 1196 fail: 1197 return EINVAL; 1198 } 1199 1200 /* 1201 * Same prototype as pcap_inject(), only need to cast. 1202 */ 1203 static int 1204 nm_inject(struct nm_desc *d, const void *buf, size_t size) 1205 { 1206 u_int c, n = d->last_tx_ring - d->first_tx_ring + 1, 1207 ri = d->cur_tx_ring; 1208 1209 for (c = 0; c < n ; c++, ri++) { 1210 /* compute current ring to use */ 1211 struct netmap_ring *ring; 1212 uint32_t i, idx; 1213 1214 if (ri > d->last_tx_ring) 1215 ri = d->first_tx_ring; 1216 ring = NETMAP_TXRING(d->nifp, ri); 1217 if (nm_ring_empty(ring)) { 1218 continue; 1219 } 1220 i = ring->cur; 1221 idx = ring->slot[i].buf_idx; 1222 ring->slot[i].len = size; 1223 nm_pkt_copy(buf, NETMAP_BUF(ring, idx), size); 1224 d->cur_tx_ring = ri; 1225 ring->head = ring->cur = nm_ring_next(ring, i); 1226 return size; 1227 } 1228 return 0; /* fail */ 1229 } 1230 1231 1232 /* 1233 * Same prototype as pcap_dispatch(), only need to cast. 1234 */ 1235 static int 1236 nm_dispatch(struct nm_desc *d, int cnt, nm_cb_t cb, u_char *arg) 1237 { 1238 int n = d->last_rx_ring - d->first_rx_ring + 1; 1239 int c, got = 0, ri = d->cur_rx_ring; 1240 d->hdr.buf = NULL; 1241 d->hdr.flags = NM_MORE_PKTS; 1242 d->hdr.d = d; 1243 1244 if (cnt == 0) 1245 cnt = -1; 1246 /* cnt == -1 means infinite, but rings have a finite amount 1247 * of buffers and the int is large enough that we never wrap, 1248 * so we can omit checking for -1 1249 */ 1250 for (c=0; c < n && cnt != got; c++, ri++) { 1251 /* compute current ring to use */ 1252 struct netmap_ring *ring; 1253 1254 if (ri > d->last_rx_ring) 1255 ri = d->first_rx_ring; 1256 ring = NETMAP_RXRING(d->nifp, ri); 1257 for ( ; !nm_ring_empty(ring) && cnt != got; got++) { 1258 u_int idx, i; 1259 if (d->hdr.buf) { /* from previous round */ 1260 cb(arg, &d->hdr, d->hdr.buf); 1261 } 1262 i = ring->cur; 1263 idx = ring->slot[i].buf_idx; 1264 /* d->cur_rx_ring doesn't change inside this loop, but 1265 * set it here, so it reflects d->hdr.buf's ring */ 1266 d->cur_rx_ring = ri; 1267 d->hdr.slot = &ring->slot[i]; 1268 d->hdr.buf = (u_char *)NETMAP_BUF(ring, idx); 1269 // __builtin_prefetch(buf); 1270 d->hdr.len = d->hdr.caplen = ring->slot[i].len; 1271 d->hdr.ts = ring->ts; 1272 ring->head = ring->cur = nm_ring_next(ring, i); 1273 } 1274 } 1275 if (d->hdr.buf) { /* from previous round */ 1276 d->hdr.flags = 0; 1277 cb(arg, &d->hdr, d->hdr.buf); 1278 } 1279 return got; 1280 } 1281 1282 static u_char * 1283 nm_nextpkt(struct nm_desc *d, struct nm_pkthdr *hdr) 1284 { 1285 int ri = d->cur_rx_ring; 1286 1287 do { 1288 /* compute current ring to use */ 1289 struct netmap_ring *ring = NETMAP_RXRING(d->nifp, ri); 1290 if (!nm_ring_empty(ring)) { 1291 u_int i = ring->cur; 1292 u_int idx = ring->slot[i].buf_idx; 1293 u_char *buf = (u_char *)NETMAP_BUF(ring, idx); 1294 1295 // __builtin_prefetch(buf); 1296 hdr->ts = ring->ts; 1297 hdr->len = hdr->caplen = ring->slot[i].len; 1298 ring->cur = nm_ring_next(ring, i); 1299 /* we could postpone advancing head if we want 1300 * to hold the buffer. This can be supported in 1301 * the future. 1302 */ 1303 ring->head = ring->cur; 1304 d->cur_rx_ring = ri; 1305 return buf; 1306 } 1307 ri++; 1308 if (ri > d->last_rx_ring) 1309 ri = d->first_rx_ring; 1310 } while (ri != d->cur_rx_ring); 1311 return NULL; /* nothing found */ 1312 } 1313 1314 #endif /* !HAVE_NETMAP_WITH_LIBS */ 1315 1316 #endif /* NETMAP_WITH_LIBS */ 1317 1318 #endif /* _NET_NETMAP_USER_H_ */ 1319