1 /*- 2 * SPDX-License-Identifier: BSD-2-Clause-FreeBSD 3 * 4 * Copyright (C) 2011-2016 Universita` di Pisa 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 18 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 20 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 21 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 23 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 27 * SUCH DAMAGE. 28 */ 29 30 /* 31 * $FreeBSD$ 32 * 33 * Functions and macros to manipulate netmap structures and packets 34 * in userspace. See netmap(4) for more information. 35 * 36 * The address of the struct netmap_if, say nifp, is computed from the 37 * value returned from ioctl(.., NIOCREG, ...) and the mmap region: 38 * ioctl(fd, NIOCREG, &req); 39 * mem = mmap(0, ... ); 40 * nifp = NETMAP_IF(mem, req.nr_nifp); 41 * (so simple, we could just do it manually) 42 * 43 * From there: 44 * struct netmap_ring *NETMAP_TXRING(nifp, index) 45 * struct netmap_ring *NETMAP_RXRING(nifp, index) 46 * we can access ring->cur, ring->head, ring->tail, etc. 47 * 48 * ring->slot[i] gives us the i-th slot (we can access 49 * directly len, flags, buf_idx) 50 * 51 * char *buf = NETMAP_BUF(ring, x) returns a pointer to 52 * the buffer numbered x 53 * 54 * All ring indexes (head, cur, tail) should always move forward. 55 * To compute the next index in a circular ring you can use 56 * i = nm_ring_next(ring, i); 57 * 58 * To ease porting apps from pcap to netmap we supply a few fuctions 59 * that can be called to open, close, read and write on netmap in a way 60 * similar to libpcap. Note that the read/write function depend on 61 * an ioctl()/select()/poll() being issued to refill rings or push 62 * packets out. 63 * 64 * In order to use these, include #define NETMAP_WITH_LIBS 65 * in the source file that invokes these functions. 66 */ 67 68 #ifndef _NET_NETMAP_USER_H_ 69 #define _NET_NETMAP_USER_H_ 70 71 #define NETMAP_DEVICE_NAME "/dev/netmap" 72 73 #ifdef __CYGWIN__ 74 /* 75 * we can compile userspace apps with either cygwin or msvc, 76 * and we use _WIN32 to identify windows specific code 77 */ 78 #ifndef _WIN32 79 #define _WIN32 80 #endif /* _WIN32 */ 81 82 #endif /* __CYGWIN__ */ 83 84 #ifdef _WIN32 85 #undef NETMAP_DEVICE_NAME 86 #define NETMAP_DEVICE_NAME "/proc/sys/DosDevices/Global/netmap" 87 #include <windows.h> 88 #include <WinDef.h> 89 #include <sys/cygwin.h> 90 #endif /* _WIN32 */ 91 92 #include <stdint.h> 93 #include <sys/socket.h> /* apple needs sockaddr */ 94 #include <net/if.h> /* IFNAMSIZ */ 95 #include <ctype.h> 96 #include <string.h> /* memset */ 97 #include <sys/time.h> /* gettimeofday */ 98 99 #ifndef likely 100 #define likely(x) __builtin_expect(!!(x), 1) 101 #define unlikely(x) __builtin_expect(!!(x), 0) 102 #endif /* likely and unlikely */ 103 104 #include <net/netmap.h> 105 106 /* helper macro */ 107 #define _NETMAP_OFFSET(type, ptr, offset) \ 108 ((type)(void *)((char *)(ptr) + (offset))) 109 110 #define NETMAP_IF(_base, _ofs) _NETMAP_OFFSET(struct netmap_if *, _base, _ofs) 111 112 #define NETMAP_TXRING(nifp, index) _NETMAP_OFFSET(struct netmap_ring *, \ 113 nifp, (nifp)->ring_ofs[index] ) 114 115 #define NETMAP_RXRING(nifp, index) _NETMAP_OFFSET(struct netmap_ring *, \ 116 nifp, (nifp)->ring_ofs[index + (nifp)->ni_tx_rings + \ 117 (nifp)->ni_host_tx_rings] ) 118 119 #define NETMAP_BUF(ring, index) \ 120 ((char *)(ring) + (ring)->buf_ofs + ((size_t)(index)*(ring)->nr_buf_size)) 121 122 #define NETMAP_BUF_IDX(ring, buf) \ 123 ( ((char *)(buf) - ((char *)(ring) + (ring)->buf_ofs) ) / \ 124 (ring)->nr_buf_size ) 125 126 static inline uint32_t 127 nm_ring_next(struct netmap_ring *r, uint32_t i) 128 { 129 return ( unlikely(i + 1 == r->num_slots) ? 0 : i + 1); 130 } 131 132 /* 133 * Return 1 if we have pending transmissions in the tx ring. 134 * When everything is complete ring->head = ring->tail + 1 (modulo ring size) 135 */ 136 static inline int 137 nm_tx_pending(struct netmap_ring *r) 138 { 139 return nm_ring_next(r, r->tail) != r->head; 140 } 141 142 /* Compute the number of slots available in the netmap ring. We use 143 * ring->head as explained in the comment above nm_ring_empty(). */ 144 static inline uint32_t 145 nm_ring_space(struct netmap_ring *ring) 146 { 147 int ret = ring->tail - ring->head; 148 if (ret < 0) 149 ret += ring->num_slots; 150 return ret; 151 } 152 153 #ifndef ND /* debug macros */ 154 /* debug support */ 155 #define ND(_fmt, ...) do {} while(0) 156 #define D(_fmt, ...) \ 157 do { \ 158 struct timeval _t0; \ 159 gettimeofday(&_t0, NULL); \ 160 fprintf(stderr, "%03d.%06d %s [%d] " _fmt "\n", \ 161 (int)(_t0.tv_sec % 1000), (int)_t0.tv_usec, \ 162 __FUNCTION__, __LINE__, ##__VA_ARGS__); \ 163 } while (0) 164 165 /* Rate limited version of "D", lps indicates how many per second */ 166 #define RD(lps, format, ...) \ 167 do { \ 168 static int __t0, __cnt; \ 169 struct timeval __xxts; \ 170 gettimeofday(&__xxts, NULL); \ 171 if (__t0 != __xxts.tv_sec) { \ 172 __t0 = __xxts.tv_sec; \ 173 __cnt = 0; \ 174 } \ 175 if (__cnt++ < lps) { \ 176 D(format, ##__VA_ARGS__); \ 177 } \ 178 } while (0) 179 #endif 180 181 /* 182 * this is a slightly optimized copy routine which rounds 183 * to multiple of 64 bytes and is often faster than dealing 184 * with other odd sizes. We assume there is enough room 185 * in the source and destination buffers. 186 */ 187 static inline void 188 nm_pkt_copy(const void *_src, void *_dst, int l) 189 { 190 const uint64_t *src = (const uint64_t *)_src; 191 uint64_t *dst = (uint64_t *)_dst; 192 193 if (unlikely(l >= 1024 || l % 64)) { 194 memcpy(dst, src, l); 195 return; 196 } 197 for (; likely(l > 0); l-=64) { 198 *dst++ = *src++; 199 *dst++ = *src++; 200 *dst++ = *src++; 201 *dst++ = *src++; 202 *dst++ = *src++; 203 *dst++ = *src++; 204 *dst++ = *src++; 205 *dst++ = *src++; 206 } 207 } 208 209 #ifdef NETMAP_WITH_LIBS 210 /* 211 * Support for simple I/O libraries. 212 * Include other system headers required for compiling this. 213 */ 214 215 #ifndef HAVE_NETMAP_WITH_LIBS 216 #define HAVE_NETMAP_WITH_LIBS 217 218 #include <stdio.h> 219 #include <sys/time.h> 220 #include <sys/mman.h> 221 #include <sys/ioctl.h> 222 #include <sys/errno.h> /* EINVAL */ 223 #include <fcntl.h> /* O_RDWR */ 224 #include <unistd.h> /* close() */ 225 #include <signal.h> 226 #include <stdlib.h> 227 228 struct nm_pkthdr { /* first part is the same as pcap_pkthdr */ 229 struct timeval ts; 230 uint32_t caplen; 231 uint32_t len; 232 233 uint64_t flags; /* NM_MORE_PKTS etc */ 234 #define NM_MORE_PKTS 1 235 struct nm_desc *d; 236 struct netmap_slot *slot; 237 uint8_t *buf; 238 }; 239 240 struct nm_stat { /* same as pcap_stat */ 241 u_int ps_recv; 242 u_int ps_drop; 243 u_int ps_ifdrop; 244 #ifdef WIN32 /* XXX or _WIN32 ? */ 245 u_int bs_capt; 246 #endif /* WIN32 */ 247 }; 248 249 #define NM_ERRBUF_SIZE 512 250 251 struct nm_desc { 252 struct nm_desc *self; /* point to self if netmap. */ 253 int fd; 254 void *mem; 255 size_t memsize; 256 int done_mmap; /* set if mem is the result of mmap */ 257 struct netmap_if * const nifp; 258 uint16_t first_tx_ring, last_tx_ring, cur_tx_ring; 259 uint16_t first_rx_ring, last_rx_ring, cur_rx_ring; 260 struct nmreq req; /* also contains the nr_name = ifname */ 261 struct nm_pkthdr hdr; 262 263 /* 264 * The memory contains netmap_if, rings and then buffers. 265 * Given a pointer (e.g. to nm_inject) we can compare with 266 * mem/buf_start/buf_end to tell if it is a buffer or 267 * some other descriptor in our region. 268 * We also store a pointer to some ring as it helps in the 269 * translation from buffer indexes to addresses. 270 */ 271 struct netmap_ring * const some_ring; 272 void * const buf_start; 273 void * const buf_end; 274 /* parameters from pcap_open_live */ 275 int snaplen; 276 int promisc; 277 int to_ms; 278 char *errbuf; 279 280 /* save flags so we can restore them on close */ 281 uint32_t if_flags; 282 uint32_t if_reqcap; 283 uint32_t if_curcap; 284 285 struct nm_stat st; 286 char msg[NM_ERRBUF_SIZE]; 287 }; 288 289 /* 290 * when the descriptor is open correctly, d->self == d 291 * Eventually we should also use some magic number. 292 */ 293 #define P2NMD(p) ((struct nm_desc *)(p)) 294 #define IS_NETMAP_DESC(d) ((d) && P2NMD(d)->self == P2NMD(d)) 295 #define NETMAP_FD(d) (P2NMD(d)->fd) 296 297 /* 298 * The callback, invoked on each received packet. Same as libpcap 299 */ 300 typedef void (*nm_cb_t)(u_char *, const struct nm_pkthdr *, const u_char *d); 301 302 /* 303 *--- the pcap-like API --- 304 * 305 * nm_open() opens a file descriptor, binds to a port and maps memory. 306 * 307 * ifname (netmap:foo or vale:foo) is the port name 308 * a suffix can indicate the follwing: 309 * ^ bind the host (sw) ring pair 310 * * bind host and NIC ring pairs 311 * -NN bind individual NIC ring pair 312 * {NN bind master side of pipe NN 313 * }NN bind slave side of pipe NN 314 * a suffix starting with / and the following flags, 315 * in any order: 316 * x exclusive access 317 * z zero copy monitor (both tx and rx) 318 * t monitor tx side (copy monitor) 319 * r monitor rx side (copy monitor) 320 * R bind only RX ring(s) 321 * T bind only TX ring(s) 322 * 323 * req provides the initial values of nmreq before parsing ifname. 324 * Remember that the ifname parsing will override the ring 325 * number in nm_ringid, and part of nm_flags; 326 * flags special functions, normally 0 327 * indicates which fields of *arg are significant 328 * arg special functions, normally NULL 329 * if passed a netmap_desc with mem != NULL, 330 * use that memory instead of mmap. 331 */ 332 333 static struct nm_desc *nm_open(const char *ifname, const struct nmreq *req, 334 uint64_t flags, const struct nm_desc *arg); 335 336 /* 337 * nm_open can import some fields from the parent descriptor. 338 * These flags control which ones. 339 * Also in flags you can specify NETMAP_NO_TX_POLL and NETMAP_DO_RX_POLL, 340 * which set the initial value for these flags. 341 * Note that the 16 low bits of the flags are reserved for data 342 * that may go into the nmreq. 343 */ 344 enum { 345 NM_OPEN_NO_MMAP = 0x040000, /* reuse mmap from parent */ 346 NM_OPEN_IFNAME = 0x080000, /* nr_name, nr_ringid, nr_flags */ 347 NM_OPEN_ARG1 = 0x100000, 348 NM_OPEN_ARG2 = 0x200000, 349 NM_OPEN_ARG3 = 0x400000, 350 NM_OPEN_RING_CFG = 0x800000, /* tx|rx rings|slots */ 351 }; 352 353 /* 354 * nm_close() closes and restores the port to its previous state 355 */ 356 357 static int nm_close(struct nm_desc *); 358 359 /* 360 * nm_mmap() do mmap or inherit from parent if the nr_arg2 361 * (memory block) matches. 362 */ 363 364 static int nm_mmap(struct nm_desc *, const struct nm_desc *); 365 366 /* 367 * nm_inject() is the same as pcap_inject() 368 * nm_dispatch() is the same as pcap_dispatch() 369 * nm_nextpkt() is the same as pcap_next() 370 */ 371 372 static int nm_inject(struct nm_desc *, const void *, size_t); 373 static int nm_dispatch(struct nm_desc *, int, nm_cb_t, u_char *); 374 static u_char *nm_nextpkt(struct nm_desc *, struct nm_pkthdr *); 375 376 #ifdef _WIN32 377 378 intptr_t _get_osfhandle(int); /* defined in io.h in windows */ 379 380 /* 381 * In windows we do not have yet native poll support, so we keep track 382 * of file descriptors associated to netmap ports to emulate poll on 383 * them and fall back on regular poll on other file descriptors. 384 */ 385 struct win_netmap_fd_list { 386 struct win_netmap_fd_list *next; 387 int win_netmap_fd; 388 HANDLE win_netmap_handle; 389 }; 390 391 /* 392 * list head containing all the netmap opened fd and their 393 * windows HANDLE counterparts 394 */ 395 static struct win_netmap_fd_list *win_netmap_fd_list_head; 396 397 static void 398 win_insert_fd_record(int fd) 399 { 400 struct win_netmap_fd_list *curr; 401 402 for (curr = win_netmap_fd_list_head; curr; curr = curr->next) { 403 if (fd == curr->win_netmap_fd) { 404 return; 405 } 406 } 407 curr = calloc(1, sizeof(*curr)); 408 curr->next = win_netmap_fd_list_head; 409 curr->win_netmap_fd = fd; 410 curr->win_netmap_handle = IntToPtr(_get_osfhandle(fd)); 411 win_netmap_fd_list_head = curr; 412 } 413 414 void 415 win_remove_fd_record(int fd) 416 { 417 struct win_netmap_fd_list *curr = win_netmap_fd_list_head; 418 struct win_netmap_fd_list *prev = NULL; 419 for (; curr ; prev = curr, curr = curr->next) { 420 if (fd != curr->win_netmap_fd) 421 continue; 422 /* found the entry */ 423 if (prev == NULL) { /* we are freeing the first entry */ 424 win_netmap_fd_list_head = curr->next; 425 } else { 426 prev->next = curr->next; 427 } 428 free(curr); 429 break; 430 } 431 } 432 433 HANDLE 434 win_get_netmap_handle(int fd) 435 { 436 struct win_netmap_fd_list *curr; 437 438 for (curr = win_netmap_fd_list_head; curr; curr = curr->next) { 439 if (fd == curr->win_netmap_fd) { 440 return curr->win_netmap_handle; 441 } 442 } 443 return NULL; 444 } 445 446 /* 447 * we need to wrap ioctl and mmap, at least for the netmap file descriptors 448 */ 449 450 /* 451 * use this function only from netmap_user.h internal functions 452 * same as ioctl, returns 0 on success and -1 on error 453 */ 454 static int 455 win_nm_ioctl_internal(HANDLE h, int32_t ctlCode, void *arg) 456 { 457 DWORD bReturn = 0, szIn, szOut; 458 BOOL ioctlReturnStatus; 459 void *inParam = arg, *outParam = arg; 460 461 switch (ctlCode) { 462 case NETMAP_POLL: 463 szIn = sizeof(POLL_REQUEST_DATA); 464 szOut = sizeof(POLL_REQUEST_DATA); 465 break; 466 case NETMAP_MMAP: 467 szIn = 0; 468 szOut = sizeof(void*); 469 inParam = NULL; /* nothing on input */ 470 break; 471 case NIOCTXSYNC: 472 case NIOCRXSYNC: 473 szIn = 0; 474 szOut = 0; 475 break; 476 case NIOCREGIF: 477 szIn = sizeof(struct nmreq); 478 szOut = sizeof(struct nmreq); 479 break; 480 case NIOCCONFIG: 481 D("unsupported NIOCCONFIG!"); 482 return -1; 483 484 default: /* a regular ioctl */ 485 D("invalid ioctl %x on netmap fd", ctlCode); 486 return -1; 487 } 488 489 ioctlReturnStatus = DeviceIoControl(h, 490 ctlCode, inParam, szIn, 491 outParam, szOut, 492 &bReturn, NULL); 493 // XXX note windows returns 0 on error or async call, 1 on success 494 // we could call GetLastError() to figure out what happened 495 return ioctlReturnStatus ? 0 : -1; 496 } 497 498 /* 499 * this function is what must be called from user-space programs 500 * same as ioctl, returns 0 on success and -1 on error 501 */ 502 static int 503 win_nm_ioctl(int fd, int32_t ctlCode, void *arg) 504 { 505 HANDLE h = win_get_netmap_handle(fd); 506 507 if (h == NULL) { 508 return ioctl(fd, ctlCode, arg); 509 } else { 510 return win_nm_ioctl_internal(h, ctlCode, arg); 511 } 512 } 513 514 #define ioctl win_nm_ioctl /* from now on, within this file ... */ 515 516 /* 517 * We cannot use the native mmap on windows 518 * The only parameter used is "fd", the other ones are just declared to 519 * make this signature comparable to the FreeBSD/Linux one 520 */ 521 static void * 522 win32_mmap_emulated(void *addr, size_t length, int prot, int flags, int fd, int32_t offset) 523 { 524 HANDLE h = win_get_netmap_handle(fd); 525 526 if (h == NULL) { 527 return mmap(addr, length, prot, flags, fd, offset); 528 } else { 529 MEMORY_ENTRY ret; 530 531 return win_nm_ioctl_internal(h, NETMAP_MMAP, &ret) ? 532 NULL : ret.pUsermodeVirtualAddress; 533 } 534 } 535 536 #define mmap win32_mmap_emulated 537 538 #include <sys/poll.h> /* XXX needed to use the structure pollfd */ 539 540 static int 541 win_nm_poll(struct pollfd *fds, int nfds, int timeout) 542 { 543 HANDLE h; 544 545 if (nfds != 1 || fds == NULL || (h = win_get_netmap_handle(fds->fd)) == NULL) {; 546 return poll(fds, nfds, timeout); 547 } else { 548 POLL_REQUEST_DATA prd; 549 550 prd.timeout = timeout; 551 prd.events = fds->events; 552 553 win_nm_ioctl_internal(h, NETMAP_POLL, &prd); 554 if ((prd.revents == POLLERR) || (prd.revents == STATUS_TIMEOUT)) { 555 return -1; 556 } 557 return 1; 558 } 559 } 560 561 #define poll win_nm_poll 562 563 static int 564 win_nm_open(char* pathname, int flags) 565 { 566 567 if (strcmp(pathname, NETMAP_DEVICE_NAME) == 0) { 568 int fd = open(NETMAP_DEVICE_NAME, O_RDWR); 569 if (fd < 0) { 570 return -1; 571 } 572 573 win_insert_fd_record(fd); 574 return fd; 575 } else { 576 return open(pathname, flags); 577 } 578 } 579 580 #define open win_nm_open 581 582 static int 583 win_nm_close(int fd) 584 { 585 if (fd != -1) { 586 close(fd); 587 if (win_get_netmap_handle(fd) != NULL) { 588 win_remove_fd_record(fd); 589 } 590 } 591 return 0; 592 } 593 594 #define close win_nm_close 595 596 #endif /* _WIN32 */ 597 598 static int 599 nm_is_identifier(const char *s, const char *e) 600 { 601 for (; s != e; s++) { 602 if (!isalnum(*s) && *s != '_') { 603 return 0; 604 } 605 } 606 607 return 1; 608 } 609 610 #define MAXERRMSG 80 611 static int 612 nm_parse(const char *ifname, struct nm_desc *d, char *err) 613 { 614 int is_vale; 615 const char *port = NULL; 616 const char *vpname = NULL; 617 u_int namelen; 618 uint32_t nr_ringid = 0, nr_flags; 619 char errmsg[MAXERRMSG] = ""; 620 long num; 621 uint16_t nr_arg2 = 0; 622 enum { P_START, P_RNGSFXOK, P_GETNUM, P_FLAGS, P_FLAGSOK, P_MEMID } p_state; 623 624 errno = 0; 625 626 is_vale = (ifname[0] == 'v'); 627 if (is_vale) { 628 port = index(ifname, ':'); 629 if (port == NULL) { 630 snprintf(errmsg, MAXERRMSG, 631 "missing ':' in vale name"); 632 goto fail; 633 } 634 635 if (!nm_is_identifier(ifname + 4, port)) { 636 snprintf(errmsg, MAXERRMSG, "invalid bridge name"); 637 goto fail; 638 } 639 640 vpname = ++port; 641 } else { 642 ifname += 7; 643 port = ifname; 644 } 645 646 /* scan for a separator */ 647 for (; *port && !index("-*^{}/@", *port); port++) 648 ; 649 650 if (is_vale && !nm_is_identifier(vpname, port)) { 651 snprintf(errmsg, MAXERRMSG, "invalid bridge port name"); 652 goto fail; 653 } 654 655 namelen = port - ifname; 656 if (namelen >= sizeof(d->req.nr_name)) { 657 snprintf(errmsg, MAXERRMSG, "name too long"); 658 goto fail; 659 } 660 memcpy(d->req.nr_name, ifname, namelen); 661 d->req.nr_name[namelen] = '\0'; 662 663 p_state = P_START; 664 nr_flags = NR_REG_ALL_NIC; /* default for no suffix */ 665 while (*port) { 666 switch (p_state) { 667 case P_START: 668 switch (*port) { 669 case '^': /* only SW ring */ 670 nr_flags = NR_REG_SW; 671 p_state = P_RNGSFXOK; 672 break; 673 case '*': /* NIC and SW */ 674 nr_flags = NR_REG_NIC_SW; 675 p_state = P_RNGSFXOK; 676 break; 677 case '-': /* one NIC ring pair */ 678 nr_flags = NR_REG_ONE_NIC; 679 p_state = P_GETNUM; 680 break; 681 case '{': /* pipe (master endpoint) */ 682 nr_flags = NR_REG_PIPE_MASTER; 683 p_state = P_GETNUM; 684 break; 685 case '}': /* pipe (slave endoint) */ 686 nr_flags = NR_REG_PIPE_SLAVE; 687 p_state = P_GETNUM; 688 break; 689 case '/': /* start of flags */ 690 p_state = P_FLAGS; 691 break; 692 case '@': /* start of memid */ 693 p_state = P_MEMID; 694 break; 695 default: 696 snprintf(errmsg, MAXERRMSG, "unknown modifier: '%c'", *port); 697 goto fail; 698 } 699 port++; 700 break; 701 case P_RNGSFXOK: 702 switch (*port) { 703 case '/': 704 p_state = P_FLAGS; 705 break; 706 case '@': 707 p_state = P_MEMID; 708 break; 709 default: 710 snprintf(errmsg, MAXERRMSG, "unexpected character: '%c'", *port); 711 goto fail; 712 } 713 port++; 714 break; 715 case P_GETNUM: 716 num = strtol(port, (char **)&port, 10); 717 if (num < 0 || num >= NETMAP_RING_MASK) { 718 snprintf(errmsg, MAXERRMSG, "'%ld' out of range [0, %d)", 719 num, NETMAP_RING_MASK); 720 goto fail; 721 } 722 nr_ringid = num & NETMAP_RING_MASK; 723 p_state = P_RNGSFXOK; 724 break; 725 case P_FLAGS: 726 case P_FLAGSOK: 727 if (*port == '@') { 728 port++; 729 p_state = P_MEMID; 730 break; 731 } 732 switch (*port) { 733 case 'x': 734 nr_flags |= NR_EXCLUSIVE; 735 break; 736 case 'z': 737 nr_flags |= NR_ZCOPY_MON; 738 break; 739 case 't': 740 nr_flags |= NR_MONITOR_TX; 741 break; 742 case 'r': 743 nr_flags |= NR_MONITOR_RX; 744 break; 745 case 'R': 746 nr_flags |= NR_RX_RINGS_ONLY; 747 break; 748 case 'T': 749 nr_flags |= NR_TX_RINGS_ONLY; 750 break; 751 default: 752 snprintf(errmsg, MAXERRMSG, "unrecognized flag: '%c'", *port); 753 goto fail; 754 } 755 port++; 756 p_state = P_FLAGSOK; 757 break; 758 case P_MEMID: 759 if (nr_arg2 != 0) { 760 snprintf(errmsg, MAXERRMSG, "double setting of memid"); 761 goto fail; 762 } 763 num = strtol(port, (char **)&port, 10); 764 if (num <= 0) { 765 snprintf(errmsg, MAXERRMSG, "invalid memid %ld, must be >0", num); 766 goto fail; 767 } 768 nr_arg2 = num; 769 p_state = P_RNGSFXOK; 770 break; 771 } 772 } 773 if (p_state != P_START && p_state != P_RNGSFXOK && p_state != P_FLAGSOK) { 774 snprintf(errmsg, MAXERRMSG, "unexpected end of port name"); 775 goto fail; 776 } 777 ND("flags: %s %s %s %s", 778 (nr_flags & NR_EXCLUSIVE) ? "EXCLUSIVE" : "", 779 (nr_flags & NR_ZCOPY_MON) ? "ZCOPY_MON" : "", 780 (nr_flags & NR_MONITOR_TX) ? "MONITOR_TX" : "", 781 (nr_flags & NR_MONITOR_RX) ? "MONITOR_RX" : ""); 782 783 d->req.nr_flags |= nr_flags; 784 d->req.nr_ringid |= nr_ringid; 785 d->req.nr_arg2 = nr_arg2; 786 787 d->self = d; 788 789 return 0; 790 fail: 791 if (!errno) 792 errno = EINVAL; 793 if (err) 794 strncpy(err, errmsg, MAXERRMSG); 795 return -1; 796 } 797 798 /* 799 * Try to open, return descriptor if successful, NULL otherwise. 800 * An invalid netmap name will return errno = 0; 801 * You can pass a pointer to a pre-filled nm_desc to add special 802 * parameters. Flags is used as follows 803 * NM_OPEN_NO_MMAP use the memory from arg, only XXX avoid mmap 804 * if the nr_arg2 (memory block) matches. 805 * NM_OPEN_ARG1 use req.nr_arg1 from arg 806 * NM_OPEN_ARG2 use req.nr_arg2 from arg 807 * NM_OPEN_RING_CFG user ring config from arg 808 */ 809 static struct nm_desc * 810 nm_open(const char *ifname, const struct nmreq *req, 811 uint64_t new_flags, const struct nm_desc *arg) 812 { 813 struct nm_desc *d = NULL; 814 const struct nm_desc *parent = arg; 815 char errmsg[MAXERRMSG] = ""; 816 uint32_t nr_reg; 817 818 if (strncmp(ifname, "netmap:", 7) && 819 strncmp(ifname, NM_BDG_NAME, strlen(NM_BDG_NAME))) { 820 errno = 0; /* name not recognised, not an error */ 821 return NULL; 822 } 823 824 d = (struct nm_desc *)calloc(1, sizeof(*d)); 825 if (d == NULL) { 826 snprintf(errmsg, MAXERRMSG, "nm_desc alloc failure"); 827 errno = ENOMEM; 828 return NULL; 829 } 830 d->self = d; /* set this early so nm_close() works */ 831 d->fd = open(NETMAP_DEVICE_NAME, O_RDWR); 832 if (d->fd < 0) { 833 snprintf(errmsg, MAXERRMSG, "cannot open /dev/netmap: %s", strerror(errno)); 834 goto fail; 835 } 836 837 if (req) 838 d->req = *req; 839 840 if (!(new_flags & NM_OPEN_IFNAME)) { 841 if (nm_parse(ifname, d, errmsg) < 0) 842 goto fail; 843 } 844 845 d->req.nr_version = NETMAP_API; 846 d->req.nr_ringid &= NETMAP_RING_MASK; 847 848 /* optionally import info from parent */ 849 if (IS_NETMAP_DESC(parent) && new_flags) { 850 if (new_flags & NM_OPEN_ARG1) 851 D("overriding ARG1 %d", parent->req.nr_arg1); 852 d->req.nr_arg1 = new_flags & NM_OPEN_ARG1 ? 853 parent->req.nr_arg1 : 4; 854 if (new_flags & NM_OPEN_ARG2) { 855 D("overriding ARG2 %d", parent->req.nr_arg2); 856 d->req.nr_arg2 = parent->req.nr_arg2; 857 } 858 if (new_flags & NM_OPEN_ARG3) 859 D("overriding ARG3 %d", parent->req.nr_arg3); 860 d->req.nr_arg3 = new_flags & NM_OPEN_ARG3 ? 861 parent->req.nr_arg3 : 0; 862 if (new_flags & NM_OPEN_RING_CFG) { 863 D("overriding RING_CFG"); 864 d->req.nr_tx_slots = parent->req.nr_tx_slots; 865 d->req.nr_rx_slots = parent->req.nr_rx_slots; 866 d->req.nr_tx_rings = parent->req.nr_tx_rings; 867 d->req.nr_rx_rings = parent->req.nr_rx_rings; 868 } 869 if (new_flags & NM_OPEN_IFNAME) { 870 D("overriding ifname %s ringid 0x%x flags 0x%x", 871 parent->req.nr_name, parent->req.nr_ringid, 872 parent->req.nr_flags); 873 memcpy(d->req.nr_name, parent->req.nr_name, 874 sizeof(d->req.nr_name)); 875 d->req.nr_ringid = parent->req.nr_ringid; 876 d->req.nr_flags = parent->req.nr_flags; 877 } 878 } 879 /* add the *XPOLL flags */ 880 d->req.nr_ringid |= new_flags & (NETMAP_NO_TX_POLL | NETMAP_DO_RX_POLL); 881 882 if (ioctl(d->fd, NIOCREGIF, &d->req)) { 883 snprintf(errmsg, MAXERRMSG, "NIOCREGIF failed: %s", strerror(errno)); 884 goto fail; 885 } 886 887 nr_reg = d->req.nr_flags & NR_REG_MASK; 888 889 if (nr_reg == NR_REG_SW) { /* host stack */ 890 d->first_tx_ring = d->last_tx_ring = d->req.nr_tx_rings; 891 d->first_rx_ring = d->last_rx_ring = d->req.nr_rx_rings; 892 } else if (nr_reg == NR_REG_ALL_NIC) { /* only nic */ 893 d->first_tx_ring = 0; 894 d->first_rx_ring = 0; 895 d->last_tx_ring = d->req.nr_tx_rings - 1; 896 d->last_rx_ring = d->req.nr_rx_rings - 1; 897 } else if (nr_reg == NR_REG_NIC_SW) { 898 d->first_tx_ring = 0; 899 d->first_rx_ring = 0; 900 d->last_tx_ring = d->req.nr_tx_rings; 901 d->last_rx_ring = d->req.nr_rx_rings; 902 } else if (nr_reg == NR_REG_ONE_NIC) { 903 /* XXX check validity */ 904 d->first_tx_ring = d->last_tx_ring = 905 d->first_rx_ring = d->last_rx_ring = d->req.nr_ringid & NETMAP_RING_MASK; 906 } else { /* pipes */ 907 d->first_tx_ring = d->last_tx_ring = 0; 908 d->first_rx_ring = d->last_rx_ring = 0; 909 } 910 911 /* if parent is defined, do nm_mmap() even if NM_OPEN_NO_MMAP is set */ 912 if ((!(new_flags & NM_OPEN_NO_MMAP) || parent) && nm_mmap(d, parent)) { 913 snprintf(errmsg, MAXERRMSG, "mmap failed: %s", strerror(errno)); 914 goto fail; 915 } 916 917 #ifdef DEBUG_NETMAP_USER 918 { /* debugging code */ 919 int i; 920 921 D("%s tx %d .. %d %d rx %d .. %d %d", ifname, 922 d->first_tx_ring, d->last_tx_ring, d->req.nr_tx_rings, 923 d->first_rx_ring, d->last_rx_ring, d->req.nr_rx_rings); 924 for (i = 0; i <= d->req.nr_tx_rings; i++) { 925 struct netmap_ring *r = NETMAP_TXRING(d->nifp, i); 926 D("TX%d %p h %d c %d t %d", i, r, r->head, r->cur, r->tail); 927 } 928 for (i = 0; i <= d->req.nr_rx_rings; i++) { 929 struct netmap_ring *r = NETMAP_RXRING(d->nifp, i); 930 D("RX%d %p h %d c %d t %d", i, r, r->head, r->cur, r->tail); 931 } 932 } 933 #endif /* debugging */ 934 935 d->cur_tx_ring = d->first_tx_ring; 936 d->cur_rx_ring = d->first_rx_ring; 937 return d; 938 939 fail: 940 nm_close(d); 941 if (errmsg[0]) 942 D("%s %s", errmsg, ifname); 943 if (errno == 0) 944 errno = EINVAL; 945 return NULL; 946 } 947 948 static int 949 nm_close(struct nm_desc *d) 950 { 951 /* 952 * ugly trick to avoid unused warnings 953 */ 954 static void *__xxzt[] __attribute__ ((unused)) = 955 { (void *)nm_open, (void *)nm_inject, 956 (void *)nm_dispatch, (void *)nm_nextpkt } ; 957 958 if (d == NULL || d->self != d) 959 return EINVAL; 960 if (d->done_mmap && d->mem) 961 munmap(d->mem, d->memsize); 962 if (d->fd != -1) { 963 close(d->fd); 964 } 965 966 bzero(d, sizeof(*d)); 967 free(d); 968 return 0; 969 } 970 971 static int 972 nm_mmap(struct nm_desc *d, const struct nm_desc *parent) 973 { 974 if (d->done_mmap) 975 return 0; 976 977 if (IS_NETMAP_DESC(parent) && parent->mem && 978 parent->req.nr_arg2 == d->req.nr_arg2) { 979 /* do not mmap, inherit from parent */ 980 D("do not mmap, inherit from parent"); 981 d->memsize = parent->memsize; 982 d->mem = parent->mem; 983 } else { 984 /* XXX TODO: check if memsize is too large (or there is overflow) */ 985 d->memsize = d->req.nr_memsize; 986 d->mem = mmap(0, d->memsize, PROT_WRITE | PROT_READ, MAP_SHARED, 987 d->fd, 0); 988 if (d->mem == MAP_FAILED) { 989 goto fail; 990 } 991 d->done_mmap = 1; 992 } 993 { 994 struct netmap_if *nifp = NETMAP_IF(d->mem, d->req.nr_offset); 995 struct netmap_ring *r = NETMAP_RXRING(nifp, d->first_rx_ring); 996 if ((void *)r == (void *)nifp) { 997 /* the descriptor is open for TX only */ 998 r = NETMAP_TXRING(nifp, d->first_tx_ring); 999 } 1000 1001 *(struct netmap_if **)(uintptr_t)&(d->nifp) = nifp; 1002 *(struct netmap_ring **)(uintptr_t)&d->some_ring = r; 1003 *(void **)(uintptr_t)&d->buf_start = NETMAP_BUF(r, 0); 1004 *(void **)(uintptr_t)&d->buf_end = 1005 (char *)d->mem + d->memsize; 1006 } 1007 1008 return 0; 1009 1010 fail: 1011 return EINVAL; 1012 } 1013 1014 /* 1015 * Same prototype as pcap_inject(), only need to cast. 1016 */ 1017 static int 1018 nm_inject(struct nm_desc *d, const void *buf, size_t size) 1019 { 1020 u_int c, n = d->last_tx_ring - d->first_tx_ring + 1, 1021 ri = d->cur_tx_ring; 1022 1023 for (c = 0; c < n ; c++, ri++) { 1024 /* compute current ring to use */ 1025 struct netmap_ring *ring; 1026 uint32_t i, j, idx; 1027 size_t rem; 1028 1029 if (ri > d->last_tx_ring) 1030 ri = d->first_tx_ring; 1031 ring = NETMAP_TXRING(d->nifp, ri); 1032 rem = size; 1033 j = ring->cur; 1034 while (rem > ring->nr_buf_size && j != ring->tail) { 1035 rem -= ring->nr_buf_size; 1036 j = nm_ring_next(ring, j); 1037 } 1038 if (j == ring->tail && rem > 0) 1039 continue; 1040 i = ring->cur; 1041 while (i != j) { 1042 idx = ring->slot[i].buf_idx; 1043 ring->slot[i].len = ring->nr_buf_size; 1044 ring->slot[i].flags = NS_MOREFRAG; 1045 nm_pkt_copy(buf, NETMAP_BUF(ring, idx), ring->nr_buf_size); 1046 i = nm_ring_next(ring, i); 1047 buf = (char *)buf + ring->nr_buf_size; 1048 } 1049 idx = ring->slot[i].buf_idx; 1050 ring->slot[i].len = rem; 1051 ring->slot[i].flags = 0; 1052 nm_pkt_copy(buf, NETMAP_BUF(ring, idx), rem); 1053 ring->head = ring->cur = nm_ring_next(ring, i); 1054 d->cur_tx_ring = ri; 1055 return size; 1056 } 1057 return 0; /* fail */ 1058 } 1059 1060 /* 1061 * Same prototype as pcap_dispatch(), only need to cast. 1062 */ 1063 static int 1064 nm_dispatch(struct nm_desc *d, int cnt, nm_cb_t cb, u_char *arg) 1065 { 1066 int n = d->last_rx_ring - d->first_rx_ring + 1; 1067 int c, got = 0, ri = d->cur_rx_ring; 1068 d->hdr.buf = NULL; 1069 d->hdr.flags = NM_MORE_PKTS; 1070 d->hdr.d = d; 1071 1072 if (cnt == 0) 1073 cnt = -1; 1074 /* cnt == -1 means infinite, but rings have a finite amount 1075 * of buffers and the int is large enough that we never wrap, 1076 * so we can omit checking for -1 1077 */ 1078 for (c=0; c < n && cnt != got; c++, ri++) { 1079 /* compute current ring to use */ 1080 struct netmap_ring *ring; 1081 1082 if (ri > d->last_rx_ring) 1083 ri = d->first_rx_ring; 1084 ring = NETMAP_RXRING(d->nifp, ri); 1085 for ( ; !nm_ring_empty(ring) && cnt != got; got++) { 1086 u_int idx, i; 1087 u_char *oldbuf; 1088 struct netmap_slot *slot; 1089 if (d->hdr.buf) { /* from previous round */ 1090 cb(arg, &d->hdr, d->hdr.buf); 1091 } 1092 i = ring->cur; 1093 slot = &ring->slot[i]; 1094 idx = slot->buf_idx; 1095 /* d->cur_rx_ring doesn't change inside this loop, but 1096 * set it here, so it reflects d->hdr.buf's ring */ 1097 d->cur_rx_ring = ri; 1098 d->hdr.slot = slot; 1099 oldbuf = d->hdr.buf = (u_char *)NETMAP_BUF(ring, idx); 1100 // __builtin_prefetch(buf); 1101 d->hdr.len = d->hdr.caplen = slot->len; 1102 while (slot->flags & NS_MOREFRAG) { 1103 u_char *nbuf; 1104 u_int oldlen = slot->len; 1105 i = nm_ring_next(ring, i); 1106 slot = &ring->slot[i]; 1107 d->hdr.len += slot->len; 1108 nbuf = (u_char *)NETMAP_BUF(ring, slot->buf_idx); 1109 if (oldbuf != NULL && nbuf - oldbuf == (int)ring->nr_buf_size && 1110 oldlen == ring->nr_buf_size) { 1111 d->hdr.caplen += slot->len; 1112 oldbuf = nbuf; 1113 } else { 1114 oldbuf = NULL; 1115 } 1116 } 1117 d->hdr.ts = ring->ts; 1118 ring->head = ring->cur = nm_ring_next(ring, i); 1119 } 1120 } 1121 if (d->hdr.buf) { /* from previous round */ 1122 d->hdr.flags = 0; 1123 cb(arg, &d->hdr, d->hdr.buf); 1124 } 1125 return got; 1126 } 1127 1128 static u_char * 1129 nm_nextpkt(struct nm_desc *d, struct nm_pkthdr *hdr) 1130 { 1131 int ri = d->cur_rx_ring; 1132 1133 do { 1134 /* compute current ring to use */ 1135 struct netmap_ring *ring = NETMAP_RXRING(d->nifp, ri); 1136 if (!nm_ring_empty(ring)) { 1137 u_int i = ring->cur; 1138 u_int idx = ring->slot[i].buf_idx; 1139 u_char *buf = (u_char *)NETMAP_BUF(ring, idx); 1140 1141 // __builtin_prefetch(buf); 1142 hdr->ts = ring->ts; 1143 hdr->len = hdr->caplen = ring->slot[i].len; 1144 ring->cur = nm_ring_next(ring, i); 1145 /* we could postpone advancing head if we want 1146 * to hold the buffer. This can be supported in 1147 * the future. 1148 */ 1149 ring->head = ring->cur; 1150 d->cur_rx_ring = ri; 1151 return buf; 1152 } 1153 ri++; 1154 if (ri > d->last_rx_ring) 1155 ri = d->first_rx_ring; 1156 } while (ri != d->cur_rx_ring); 1157 return NULL; /* nothing found */ 1158 } 1159 1160 #endif /* !HAVE_NETMAP_WITH_LIBS */ 1161 1162 #endif /* NETMAP_WITH_LIBS */ 1163 1164 #endif /* _NET_NETMAP_USER_H_ */ 1165