1 /*- 2 * SPDX-License-Identifier: BSD-2-Clause-FreeBSD 3 * 4 * Copyright (C) 2011-2016 Universita` di Pisa 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 18 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 20 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 21 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 23 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 27 * SUCH DAMAGE. 28 */ 29 30 /* 31 * $FreeBSD$ 32 * 33 * Functions and macros to manipulate netmap structures and packets 34 * in userspace. See netmap(4) for more information. 35 * 36 * The address of the struct netmap_if, say nifp, is computed from the 37 * value returned from ioctl(.., NIOCREG, ...) and the mmap region: 38 * ioctl(fd, NIOCREG, &req); 39 * mem = mmap(0, ... ); 40 * nifp = NETMAP_IF(mem, req.nr_nifp); 41 * (so simple, we could just do it manually) 42 * 43 * From there: 44 * struct netmap_ring *NETMAP_TXRING(nifp, index) 45 * struct netmap_ring *NETMAP_RXRING(nifp, index) 46 * we can access ring->cur, ring->head, ring->tail, etc. 47 * 48 * ring->slot[i] gives us the i-th slot (we can access 49 * directly len, flags, buf_idx) 50 * 51 * char *buf = NETMAP_BUF(ring, x) returns a pointer to 52 * the buffer numbered x 53 * 54 * All ring indexes (head, cur, tail) should always move forward. 55 * To compute the next index in a circular ring you can use 56 * i = nm_ring_next(ring, i); 57 * 58 * To ease porting apps from pcap to netmap we supply a few fuctions 59 * that can be called to open, close, read and write on netmap in a way 60 * similar to libpcap. Note that the read/write function depend on 61 * an ioctl()/select()/poll() being issued to refill rings or push 62 * packets out. 63 * 64 * In order to use these, include #define NETMAP_WITH_LIBS 65 * in the source file that invokes these functions. 66 */ 67 68 #ifndef _NET_NETMAP_USER_H_ 69 #define _NET_NETMAP_USER_H_ 70 71 #define NETMAP_DEVICE_NAME "/dev/netmap" 72 73 #ifdef __CYGWIN__ 74 /* 75 * we can compile userspace apps with either cygwin or msvc, 76 * and we use _WIN32 to identify windows specific code 77 */ 78 #ifndef _WIN32 79 #define _WIN32 80 #endif /* _WIN32 */ 81 82 #endif /* __CYGWIN__ */ 83 84 #ifdef _WIN32 85 #undef NETMAP_DEVICE_NAME 86 #define NETMAP_DEVICE_NAME "/proc/sys/DosDevices/Global/netmap" 87 #include <windows.h> 88 #include <WinDef.h> 89 #include <sys/cygwin.h> 90 #endif /* _WIN32 */ 91 92 #include <stdint.h> 93 #include <sys/socket.h> /* apple needs sockaddr */ 94 #include <net/if.h> /* IFNAMSIZ */ 95 #include <ctype.h> 96 #include <string.h> /* memset */ 97 #include <sys/time.h> /* gettimeofday */ 98 99 #ifndef likely 100 #define likely(x) __builtin_expect(!!(x), 1) 101 #define unlikely(x) __builtin_expect(!!(x), 0) 102 #endif /* likely and unlikely */ 103 104 #include <net/netmap.h> 105 106 /* helper macro */ 107 #define _NETMAP_OFFSET(type, ptr, offset) \ 108 ((type)(void *)((char *)(ptr) + (offset))) 109 110 #define NETMAP_IF(_base, _ofs) _NETMAP_OFFSET(struct netmap_if *, _base, _ofs) 111 112 #define NETMAP_TXRING(nifp, index) _NETMAP_OFFSET(struct netmap_ring *, \ 113 nifp, (nifp)->ring_ofs[index] ) 114 115 #define NETMAP_RXRING(nifp, index) _NETMAP_OFFSET(struct netmap_ring *, \ 116 nifp, (nifp)->ring_ofs[index + (nifp)->ni_tx_rings + \ 117 (nifp)->ni_host_tx_rings] ) 118 119 #define NETMAP_BUF(ring, index) \ 120 ((char *)(ring) + (ring)->buf_ofs + ((size_t)(index)*(ring)->nr_buf_size)) 121 122 #define NETMAP_BUF_IDX(ring, buf) \ 123 ( ((char *)(buf) - ((char *)(ring) + (ring)->buf_ofs) ) / \ 124 (ring)->nr_buf_size ) 125 126 static inline uint32_t 127 nm_ring_next(struct netmap_ring *r, uint32_t i) 128 { 129 return ( unlikely(i + 1 == r->num_slots) ? 0 : i + 1); 130 } 131 132 /* 133 * Return 1 if we have pending transmissions in the tx ring. 134 * When everything is complete ring->head = ring->tail + 1 (modulo ring size) 135 */ 136 static inline int 137 nm_tx_pending(struct netmap_ring *r) 138 { 139 return nm_ring_next(r, r->tail) != r->head; 140 } 141 142 /* Compute the number of slots available in the netmap ring. We use 143 * ring->head as explained in the comment above nm_ring_empty(). */ 144 static inline uint32_t 145 nm_ring_space(struct netmap_ring *ring) 146 { 147 int ret = ring->tail - ring->head; 148 if (ret < 0) 149 ret += ring->num_slots; 150 return ret; 151 } 152 153 #ifndef ND /* debug macros */ 154 /* debug support */ 155 #define ND(_fmt, ...) do {} while(0) 156 #define D(_fmt, ...) \ 157 do { \ 158 struct timeval _t0; \ 159 gettimeofday(&_t0, NULL); \ 160 fprintf(stderr, "%03d.%06d %s [%d] " _fmt "\n", \ 161 (int)(_t0.tv_sec % 1000), (int)_t0.tv_usec, \ 162 __FUNCTION__, __LINE__, ##__VA_ARGS__); \ 163 } while (0) 164 165 /* Rate limited version of "D", lps indicates how many per second */ 166 #define RD(lps, format, ...) \ 167 do { \ 168 static int __t0, __cnt; \ 169 struct timeval __xxts; \ 170 gettimeofday(&__xxts, NULL); \ 171 if (__t0 != __xxts.tv_sec) { \ 172 __t0 = __xxts.tv_sec; \ 173 __cnt = 0; \ 174 } \ 175 if (__cnt++ < lps) { \ 176 D(format, ##__VA_ARGS__); \ 177 } \ 178 } while (0) 179 #endif 180 181 /* 182 * this is a slightly optimized copy routine which rounds 183 * to multiple of 64 bytes and is often faster than dealing 184 * with other odd sizes. We assume there is enough room 185 * in the source and destination buffers. 186 */ 187 static inline void 188 nm_pkt_copy(const void *_src, void *_dst, int l) 189 { 190 const uint64_t *src = (const uint64_t *)_src; 191 uint64_t *dst = (uint64_t *)_dst; 192 193 if (unlikely(l >= 1024 || l % 64)) { 194 memcpy(dst, src, l); 195 return; 196 } 197 for (; likely(l > 0); l-=64) { 198 *dst++ = *src++; 199 *dst++ = *src++; 200 *dst++ = *src++; 201 *dst++ = *src++; 202 *dst++ = *src++; 203 *dst++ = *src++; 204 *dst++ = *src++; 205 *dst++ = *src++; 206 } 207 } 208 209 #ifdef NETMAP_WITH_LIBS 210 /* 211 * Support for simple I/O libraries. 212 * Include other system headers required for compiling this. 213 */ 214 215 #ifndef HAVE_NETMAP_WITH_LIBS 216 #define HAVE_NETMAP_WITH_LIBS 217 218 #include <stdio.h> 219 #include <sys/time.h> 220 #include <sys/mman.h> 221 #include <sys/ioctl.h> 222 #include <sys/errno.h> /* EINVAL */ 223 #include <fcntl.h> /* O_RDWR */ 224 #include <unistd.h> /* close() */ 225 #include <signal.h> 226 #include <stdlib.h> 227 228 struct nm_pkthdr { /* first part is the same as pcap_pkthdr */ 229 struct timeval ts; 230 uint32_t caplen; 231 uint32_t len; 232 233 uint64_t flags; /* NM_MORE_PKTS etc */ 234 #define NM_MORE_PKTS 1 235 struct nm_desc *d; 236 struct netmap_slot *slot; 237 uint8_t *buf; 238 }; 239 240 struct nm_stat { /* same as pcap_stat */ 241 u_int ps_recv; 242 u_int ps_drop; 243 u_int ps_ifdrop; 244 #ifdef WIN32 /* XXX or _WIN32 ? */ 245 u_int bs_capt; 246 #endif /* WIN32 */ 247 }; 248 249 #define NM_ERRBUF_SIZE 512 250 251 struct nm_desc { 252 struct nm_desc *self; /* point to self if netmap. */ 253 int fd; 254 void *mem; 255 size_t memsize; 256 int done_mmap; /* set if mem is the result of mmap */ 257 struct netmap_if * const nifp; 258 uint16_t first_tx_ring, last_tx_ring, cur_tx_ring; 259 uint16_t first_rx_ring, last_rx_ring, cur_rx_ring; 260 struct nmreq req; /* also contains the nr_name = ifname */ 261 struct nm_pkthdr hdr; 262 263 /* 264 * The memory contains netmap_if, rings and then buffers. 265 * Given a pointer (e.g. to nm_inject) we can compare with 266 * mem/buf_start/buf_end to tell if it is a buffer or 267 * some other descriptor in our region. 268 * We also store a pointer to some ring as it helps in the 269 * translation from buffer indexes to addresses. 270 */ 271 struct netmap_ring * const some_ring; 272 void * const buf_start; 273 void * const buf_end; 274 /* parameters from pcap_open_live */ 275 int snaplen; 276 int promisc; 277 int to_ms; 278 char *errbuf; 279 280 /* save flags so we can restore them on close */ 281 uint32_t if_flags; 282 uint32_t if_reqcap; 283 uint32_t if_curcap; 284 285 struct nm_stat st; 286 char msg[NM_ERRBUF_SIZE]; 287 }; 288 289 /* 290 * when the descriptor is open correctly, d->self == d 291 * Eventually we should also use some magic number. 292 */ 293 #define P2NMD(p) ((const struct nm_desc *)(p)) 294 #define IS_NETMAP_DESC(d) ((d) && P2NMD(d)->self == P2NMD(d)) 295 #define NETMAP_FD(d) (P2NMD(d)->fd) 296 297 /* 298 * The callback, invoked on each received packet. Same as libpcap 299 */ 300 typedef void (*nm_cb_t)(u_char *, const struct nm_pkthdr *, const u_char *d); 301 302 /* 303 *--- the pcap-like API --- 304 * 305 * nm_open() opens a file descriptor, binds to a port and maps memory. 306 * 307 * ifname (netmap:foo or vale:foo) is the port name 308 * a suffix can indicate the follwing: 309 * ^ bind the host (sw) ring pair 310 * * bind host and NIC ring pairs 311 * -NN bind individual NIC ring pair 312 * {NN bind master side of pipe NN 313 * }NN bind slave side of pipe NN 314 * a suffix starting with / and the following flags, 315 * in any order: 316 * x exclusive access 317 * z zero copy monitor (both tx and rx) 318 * t monitor tx side (copy monitor) 319 * r monitor rx side (copy monitor) 320 * R bind only RX ring(s) 321 * T bind only TX ring(s) 322 * 323 * req provides the initial values of nmreq before parsing ifname. 324 * Remember that the ifname parsing will override the ring 325 * number in nm_ringid, and part of nm_flags; 326 * flags special functions, normally 0 327 * indicates which fields of *arg are significant 328 * arg special functions, normally NULL 329 * if passed a netmap_desc with mem != NULL, 330 * use that memory instead of mmap. 331 */ 332 333 static struct nm_desc *nm_open(const char *ifname, const struct nmreq *req, 334 uint64_t flags, const struct nm_desc *arg); 335 336 /* 337 * nm_open can import some fields from the parent descriptor. 338 * These flags control which ones. 339 * Also in flags you can specify NETMAP_NO_TX_POLL and NETMAP_DO_RX_POLL, 340 * which set the initial value for these flags. 341 * Note that the 16 low bits of the flags are reserved for data 342 * that may go into the nmreq. 343 */ 344 enum { 345 NM_OPEN_NO_MMAP = 0x040000, /* reuse mmap from parent */ 346 NM_OPEN_IFNAME = 0x080000, /* nr_name, nr_ringid, nr_flags */ 347 NM_OPEN_ARG1 = 0x100000, 348 NM_OPEN_ARG2 = 0x200000, 349 NM_OPEN_ARG3 = 0x400000, 350 NM_OPEN_RING_CFG = 0x800000, /* tx|rx rings|slots */ 351 }; 352 353 /* 354 * nm_close() closes and restores the port to its previous state 355 */ 356 357 static int nm_close(struct nm_desc *); 358 359 /* 360 * nm_mmap() do mmap or inherit from parent if the nr_arg2 361 * (memory block) matches. 362 */ 363 364 static int nm_mmap(struct nm_desc *, const struct nm_desc *); 365 366 /* 367 * nm_inject() is the same as pcap_inject() 368 * nm_dispatch() is the same as pcap_dispatch() 369 * nm_nextpkt() is the same as pcap_next() 370 */ 371 372 static int nm_inject(struct nm_desc *, const void *, size_t); 373 static int nm_dispatch(struct nm_desc *, int, nm_cb_t, u_char *); 374 static u_char *nm_nextpkt(struct nm_desc *, struct nm_pkthdr *); 375 376 #ifdef _WIN32 377 378 intptr_t _get_osfhandle(int); /* defined in io.h in windows */ 379 380 /* 381 * In windows we do not have yet native poll support, so we keep track 382 * of file descriptors associated to netmap ports to emulate poll on 383 * them and fall back on regular poll on other file descriptors. 384 */ 385 struct win_netmap_fd_list { 386 struct win_netmap_fd_list *next; 387 int win_netmap_fd; 388 HANDLE win_netmap_handle; 389 }; 390 391 /* 392 * list head containing all the netmap opened fd and their 393 * windows HANDLE counterparts 394 */ 395 static struct win_netmap_fd_list *win_netmap_fd_list_head; 396 397 static void 398 win_insert_fd_record(int fd) 399 { 400 struct win_netmap_fd_list *curr; 401 402 for (curr = win_netmap_fd_list_head; curr; curr = curr->next) { 403 if (fd == curr->win_netmap_fd) { 404 return; 405 } 406 } 407 curr = calloc(1, sizeof(*curr)); 408 curr->next = win_netmap_fd_list_head; 409 curr->win_netmap_fd = fd; 410 curr->win_netmap_handle = IntToPtr(_get_osfhandle(fd)); 411 win_netmap_fd_list_head = curr; 412 } 413 414 void 415 win_remove_fd_record(int fd) 416 { 417 struct win_netmap_fd_list *curr = win_netmap_fd_list_head; 418 struct win_netmap_fd_list *prev = NULL; 419 for (; curr ; prev = curr, curr = curr->next) { 420 if (fd != curr->win_netmap_fd) 421 continue; 422 /* found the entry */ 423 if (prev == NULL) { /* we are freeing the first entry */ 424 win_netmap_fd_list_head = curr->next; 425 } else { 426 prev->next = curr->next; 427 } 428 free(curr); 429 break; 430 } 431 } 432 433 HANDLE 434 win_get_netmap_handle(int fd) 435 { 436 struct win_netmap_fd_list *curr; 437 438 for (curr = win_netmap_fd_list_head; curr; curr = curr->next) { 439 if (fd == curr->win_netmap_fd) { 440 return curr->win_netmap_handle; 441 } 442 } 443 return NULL; 444 } 445 446 /* 447 * we need to wrap ioctl and mmap, at least for the netmap file descriptors 448 */ 449 450 /* 451 * use this function only from netmap_user.h internal functions 452 * same as ioctl, returns 0 on success and -1 on error 453 */ 454 static int 455 win_nm_ioctl_internal(HANDLE h, int32_t ctlCode, void *arg) 456 { 457 DWORD bReturn = 0, szIn, szOut; 458 BOOL ioctlReturnStatus; 459 void *inParam = arg, *outParam = arg; 460 461 switch (ctlCode) { 462 case NETMAP_POLL: 463 szIn = sizeof(POLL_REQUEST_DATA); 464 szOut = sizeof(POLL_REQUEST_DATA); 465 break; 466 case NETMAP_MMAP: 467 szIn = 0; 468 szOut = sizeof(void*); 469 inParam = NULL; /* nothing on input */ 470 break; 471 case NIOCTXSYNC: 472 case NIOCRXSYNC: 473 szIn = 0; 474 szOut = 0; 475 break; 476 case NIOCREGIF: 477 szIn = sizeof(struct nmreq); 478 szOut = sizeof(struct nmreq); 479 break; 480 case NIOCCONFIG: 481 D("unsupported NIOCCONFIG!"); 482 return -1; 483 484 default: /* a regular ioctl */ 485 D("invalid ioctl %x on netmap fd", ctlCode); 486 return -1; 487 } 488 489 ioctlReturnStatus = DeviceIoControl(h, 490 ctlCode, inParam, szIn, 491 outParam, szOut, 492 &bReturn, NULL); 493 // XXX note windows returns 0 on error or async call, 1 on success 494 // we could call GetLastError() to figure out what happened 495 return ioctlReturnStatus ? 0 : -1; 496 } 497 498 /* 499 * this function is what must be called from user-space programs 500 * same as ioctl, returns 0 on success and -1 on error 501 */ 502 static int 503 win_nm_ioctl(int fd, int32_t ctlCode, void *arg) 504 { 505 HANDLE h = win_get_netmap_handle(fd); 506 507 if (h == NULL) { 508 return ioctl(fd, ctlCode, arg); 509 } else { 510 return win_nm_ioctl_internal(h, ctlCode, arg); 511 } 512 } 513 514 #define ioctl win_nm_ioctl /* from now on, within this file ... */ 515 516 /* 517 * We cannot use the native mmap on windows 518 * The only parameter used is "fd", the other ones are just declared to 519 * make this signature comparable to the FreeBSD/Linux one 520 */ 521 static void * 522 win32_mmap_emulated(void *addr, size_t length, int prot, int flags, int fd, int32_t offset) 523 { 524 HANDLE h = win_get_netmap_handle(fd); 525 526 if (h == NULL) { 527 return mmap(addr, length, prot, flags, fd, offset); 528 } else { 529 MEMORY_ENTRY ret; 530 531 return win_nm_ioctl_internal(h, NETMAP_MMAP, &ret) ? 532 NULL : ret.pUsermodeVirtualAddress; 533 } 534 } 535 536 #define mmap win32_mmap_emulated 537 538 #include <sys/poll.h> /* XXX needed to use the structure pollfd */ 539 540 static int 541 win_nm_poll(struct pollfd *fds, int nfds, int timeout) 542 { 543 HANDLE h; 544 545 if (nfds != 1 || fds == NULL || (h = win_get_netmap_handle(fds->fd)) == NULL) {; 546 return poll(fds, nfds, timeout); 547 } else { 548 POLL_REQUEST_DATA prd; 549 550 prd.timeout = timeout; 551 prd.events = fds->events; 552 553 win_nm_ioctl_internal(h, NETMAP_POLL, &prd); 554 if ((prd.revents == POLLERR) || (prd.revents == STATUS_TIMEOUT)) { 555 return -1; 556 } 557 return 1; 558 } 559 } 560 561 #define poll win_nm_poll 562 563 static int 564 win_nm_open(char* pathname, int flags) 565 { 566 567 if (strcmp(pathname, NETMAP_DEVICE_NAME) == 0) { 568 int fd = open(NETMAP_DEVICE_NAME, O_RDWR); 569 if (fd < 0) { 570 return -1; 571 } 572 573 win_insert_fd_record(fd); 574 return fd; 575 } else { 576 return open(pathname, flags); 577 } 578 } 579 580 #define open win_nm_open 581 582 static int 583 win_nm_close(int fd) 584 { 585 if (fd != -1) { 586 close(fd); 587 if (win_get_netmap_handle(fd) != NULL) { 588 win_remove_fd_record(fd); 589 } 590 } 591 return 0; 592 } 593 594 #define close win_nm_close 595 596 #endif /* _WIN32 */ 597 598 static int 599 nm_is_identifier(const char *s, const char *e) 600 { 601 for (; s != e; s++) { 602 if (!isalnum(*s) && *s != '_') { 603 return 0; 604 } 605 } 606 607 return 1; 608 } 609 610 #define MAXERRMSG 80 611 static int 612 nm_parse(const char *ifname, struct nm_desc *d, char *err) 613 { 614 int is_vale; 615 const char *port = NULL; 616 const char *vpname = NULL; 617 u_int namelen; 618 uint32_t nr_ringid = 0, nr_flags; 619 char errmsg[MAXERRMSG] = "", *tmp; 620 long num; 621 uint16_t nr_arg2 = 0; 622 enum { P_START, P_RNGSFXOK, P_GETNUM, P_FLAGS, P_FLAGSOK, P_MEMID } p_state; 623 624 errno = 0; 625 626 is_vale = (ifname[0] == 'v'); 627 if (is_vale) { 628 port = index(ifname, ':'); 629 if (port == NULL) { 630 snprintf(errmsg, MAXERRMSG, 631 "missing ':' in vale name"); 632 goto fail; 633 } 634 635 if (!nm_is_identifier(ifname + 4, port)) { 636 snprintf(errmsg, MAXERRMSG, "invalid bridge name"); 637 goto fail; 638 } 639 640 vpname = ++port; 641 } else { 642 ifname += 7; 643 port = ifname; 644 } 645 646 /* scan for a separator */ 647 for (; *port && !index("-*^{}/@", *port); port++) 648 ; 649 650 if (is_vale && !nm_is_identifier(vpname, port)) { 651 snprintf(errmsg, MAXERRMSG, "invalid bridge port name"); 652 goto fail; 653 } 654 655 namelen = port - ifname; 656 if (namelen >= sizeof(d->req.nr_name)) { 657 snprintf(errmsg, MAXERRMSG, "name too long"); 658 goto fail; 659 } 660 memcpy(d->req.nr_name, ifname, namelen); 661 d->req.nr_name[namelen] = '\0'; 662 663 p_state = P_START; 664 nr_flags = NR_REG_ALL_NIC; /* default for no suffix */ 665 while (*port) { 666 switch (p_state) { 667 case P_START: 668 switch (*port) { 669 case '^': /* only SW ring */ 670 nr_flags = NR_REG_SW; 671 p_state = P_RNGSFXOK; 672 break; 673 case '*': /* NIC and SW */ 674 nr_flags = NR_REG_NIC_SW; 675 p_state = P_RNGSFXOK; 676 break; 677 case '-': /* one NIC ring pair */ 678 nr_flags = NR_REG_ONE_NIC; 679 p_state = P_GETNUM; 680 break; 681 case '{': /* pipe (master endpoint) */ 682 nr_flags = NR_REG_PIPE_MASTER; 683 p_state = P_GETNUM; 684 break; 685 case '}': /* pipe (slave endoint) */ 686 nr_flags = NR_REG_PIPE_SLAVE; 687 p_state = P_GETNUM; 688 break; 689 case '/': /* start of flags */ 690 p_state = P_FLAGS; 691 break; 692 case '@': /* start of memid */ 693 p_state = P_MEMID; 694 break; 695 default: 696 snprintf(errmsg, MAXERRMSG, "unknown modifier: '%c'", *port); 697 goto fail; 698 } 699 port++; 700 break; 701 case P_RNGSFXOK: 702 switch (*port) { 703 case '/': 704 p_state = P_FLAGS; 705 break; 706 case '@': 707 p_state = P_MEMID; 708 break; 709 default: 710 snprintf(errmsg, MAXERRMSG, "unexpected character: '%c'", *port); 711 goto fail; 712 } 713 port++; 714 break; 715 case P_GETNUM: 716 num = strtol(port, &tmp, 10); 717 if (num < 0 || num >= NETMAP_RING_MASK) { 718 snprintf(errmsg, MAXERRMSG, "'%ld' out of range [0, %d)", 719 num, NETMAP_RING_MASK); 720 goto fail; 721 } 722 port = tmp; 723 nr_ringid = num & NETMAP_RING_MASK; 724 p_state = P_RNGSFXOK; 725 break; 726 case P_FLAGS: 727 case P_FLAGSOK: 728 if (*port == '@') { 729 port++; 730 p_state = P_MEMID; 731 break; 732 } 733 switch (*port) { 734 case 'x': 735 nr_flags |= NR_EXCLUSIVE; 736 break; 737 case 'z': 738 nr_flags |= NR_ZCOPY_MON; 739 break; 740 case 't': 741 nr_flags |= NR_MONITOR_TX; 742 break; 743 case 'r': 744 nr_flags |= NR_MONITOR_RX; 745 break; 746 case 'R': 747 nr_flags |= NR_RX_RINGS_ONLY; 748 break; 749 case 'T': 750 nr_flags |= NR_TX_RINGS_ONLY; 751 break; 752 default: 753 snprintf(errmsg, MAXERRMSG, "unrecognized flag: '%c'", *port); 754 goto fail; 755 } 756 port++; 757 p_state = P_FLAGSOK; 758 break; 759 case P_MEMID: 760 if (nr_arg2 != 0) { 761 snprintf(errmsg, MAXERRMSG, "double setting of memid"); 762 goto fail; 763 } 764 num = strtol(port, &tmp, 10); 765 if (num <= 0) { 766 snprintf(errmsg, MAXERRMSG, "invalid memid %ld, must be >0", num); 767 goto fail; 768 } 769 port = tmp; 770 nr_arg2 = num; 771 p_state = P_RNGSFXOK; 772 break; 773 } 774 } 775 if (p_state != P_START && p_state != P_RNGSFXOK && p_state != P_FLAGSOK) { 776 snprintf(errmsg, MAXERRMSG, "unexpected end of port name"); 777 goto fail; 778 } 779 ND("flags: %s %s %s %s", 780 (nr_flags & NR_EXCLUSIVE) ? "EXCLUSIVE" : "", 781 (nr_flags & NR_ZCOPY_MON) ? "ZCOPY_MON" : "", 782 (nr_flags & NR_MONITOR_TX) ? "MONITOR_TX" : "", 783 (nr_flags & NR_MONITOR_RX) ? "MONITOR_RX" : ""); 784 785 d->req.nr_flags |= nr_flags; 786 d->req.nr_ringid |= nr_ringid; 787 d->req.nr_arg2 = nr_arg2; 788 789 d->self = d; 790 791 return 0; 792 fail: 793 if (!errno) 794 errno = EINVAL; 795 if (err) 796 strncpy(err, errmsg, MAXERRMSG); 797 return -1; 798 } 799 800 /* 801 * Try to open, return descriptor if successful, NULL otherwise. 802 * An invalid netmap name will return errno = 0; 803 * You can pass a pointer to a pre-filled nm_desc to add special 804 * parameters. Flags is used as follows 805 * NM_OPEN_NO_MMAP use the memory from arg, only XXX avoid mmap 806 * if the nr_arg2 (memory block) matches. 807 * NM_OPEN_ARG1 use req.nr_arg1 from arg 808 * NM_OPEN_ARG2 use req.nr_arg2 from arg 809 * NM_OPEN_RING_CFG user ring config from arg 810 */ 811 static struct nm_desc * 812 nm_open(const char *ifname, const struct nmreq *req, 813 uint64_t new_flags, const struct nm_desc *arg) 814 { 815 struct nm_desc *d = NULL; 816 const struct nm_desc *parent = arg; 817 char errmsg[MAXERRMSG] = ""; 818 uint32_t nr_reg; 819 820 if (strncmp(ifname, "netmap:", 7) && 821 strncmp(ifname, NM_BDG_NAME, strlen(NM_BDG_NAME))) { 822 errno = 0; /* name not recognised, not an error */ 823 return NULL; 824 } 825 826 d = (struct nm_desc *)calloc(1, sizeof(*d)); 827 if (d == NULL) { 828 snprintf(errmsg, MAXERRMSG, "nm_desc alloc failure"); 829 errno = ENOMEM; 830 return NULL; 831 } 832 d->self = d; /* set this early so nm_close() works */ 833 d->fd = open(NETMAP_DEVICE_NAME, O_RDWR); 834 if (d->fd < 0) { 835 snprintf(errmsg, MAXERRMSG, "cannot open /dev/netmap: %s", strerror(errno)); 836 goto fail; 837 } 838 839 if (req) 840 d->req = *req; 841 842 if (!(new_flags & NM_OPEN_IFNAME)) { 843 if (nm_parse(ifname, d, errmsg) < 0) 844 goto fail; 845 } 846 847 d->req.nr_version = NETMAP_API; 848 d->req.nr_ringid &= NETMAP_RING_MASK; 849 850 /* optionally import info from parent */ 851 if (IS_NETMAP_DESC(parent) && new_flags) { 852 if (new_flags & NM_OPEN_ARG1) 853 D("overriding ARG1 %d", parent->req.nr_arg1); 854 d->req.nr_arg1 = new_flags & NM_OPEN_ARG1 ? 855 parent->req.nr_arg1 : 4; 856 if (new_flags & NM_OPEN_ARG2) { 857 D("overriding ARG2 %d", parent->req.nr_arg2); 858 d->req.nr_arg2 = parent->req.nr_arg2; 859 } 860 if (new_flags & NM_OPEN_ARG3) 861 D("overriding ARG3 %d", parent->req.nr_arg3); 862 d->req.nr_arg3 = new_flags & NM_OPEN_ARG3 ? 863 parent->req.nr_arg3 : 0; 864 if (new_flags & NM_OPEN_RING_CFG) { 865 D("overriding RING_CFG"); 866 d->req.nr_tx_slots = parent->req.nr_tx_slots; 867 d->req.nr_rx_slots = parent->req.nr_rx_slots; 868 d->req.nr_tx_rings = parent->req.nr_tx_rings; 869 d->req.nr_rx_rings = parent->req.nr_rx_rings; 870 } 871 if (new_flags & NM_OPEN_IFNAME) { 872 D("overriding ifname %s ringid 0x%x flags 0x%x", 873 parent->req.nr_name, parent->req.nr_ringid, 874 parent->req.nr_flags); 875 memcpy(d->req.nr_name, parent->req.nr_name, 876 sizeof(d->req.nr_name)); 877 d->req.nr_ringid = parent->req.nr_ringid; 878 d->req.nr_flags = parent->req.nr_flags; 879 } 880 } 881 /* add the *XPOLL flags */ 882 d->req.nr_ringid |= new_flags & (NETMAP_NO_TX_POLL | NETMAP_DO_RX_POLL); 883 884 if (ioctl(d->fd, NIOCREGIF, &d->req)) { 885 snprintf(errmsg, MAXERRMSG, "NIOCREGIF failed: %s", strerror(errno)); 886 goto fail; 887 } 888 889 nr_reg = d->req.nr_flags & NR_REG_MASK; 890 891 if (nr_reg == NR_REG_SW) { /* host stack */ 892 d->first_tx_ring = d->last_tx_ring = d->req.nr_tx_rings; 893 d->first_rx_ring = d->last_rx_ring = d->req.nr_rx_rings; 894 } else if (nr_reg == NR_REG_ALL_NIC) { /* only nic */ 895 d->first_tx_ring = 0; 896 d->first_rx_ring = 0; 897 d->last_tx_ring = d->req.nr_tx_rings - 1; 898 d->last_rx_ring = d->req.nr_rx_rings - 1; 899 } else if (nr_reg == NR_REG_NIC_SW) { 900 d->first_tx_ring = 0; 901 d->first_rx_ring = 0; 902 d->last_tx_ring = d->req.nr_tx_rings; 903 d->last_rx_ring = d->req.nr_rx_rings; 904 } else if (nr_reg == NR_REG_ONE_NIC) { 905 /* XXX check validity */ 906 d->first_tx_ring = d->last_tx_ring = 907 d->first_rx_ring = d->last_rx_ring = d->req.nr_ringid & NETMAP_RING_MASK; 908 } else { /* pipes */ 909 d->first_tx_ring = d->last_tx_ring = 0; 910 d->first_rx_ring = d->last_rx_ring = 0; 911 } 912 913 /* if parent is defined, do nm_mmap() even if NM_OPEN_NO_MMAP is set */ 914 if ((!(new_flags & NM_OPEN_NO_MMAP) || parent) && nm_mmap(d, parent)) { 915 snprintf(errmsg, MAXERRMSG, "mmap failed: %s", strerror(errno)); 916 goto fail; 917 } 918 919 #ifdef DEBUG_NETMAP_USER 920 { /* debugging code */ 921 int i; 922 923 D("%s tx %d .. %d %d rx %d .. %d %d", ifname, 924 d->first_tx_ring, d->last_tx_ring, d->req.nr_tx_rings, 925 d->first_rx_ring, d->last_rx_ring, d->req.nr_rx_rings); 926 for (i = 0; i <= d->req.nr_tx_rings; i++) { 927 struct netmap_ring *r = NETMAP_TXRING(d->nifp, i); 928 D("TX%d %p h %d c %d t %d", i, r, r->head, r->cur, r->tail); 929 } 930 for (i = 0; i <= d->req.nr_rx_rings; i++) { 931 struct netmap_ring *r = NETMAP_RXRING(d->nifp, i); 932 D("RX%d %p h %d c %d t %d", i, r, r->head, r->cur, r->tail); 933 } 934 } 935 #endif /* debugging */ 936 937 d->cur_tx_ring = d->first_tx_ring; 938 d->cur_rx_ring = d->first_rx_ring; 939 return d; 940 941 fail: 942 nm_close(d); 943 if (errmsg[0]) 944 D("%s %s", errmsg, ifname); 945 if (errno == 0) 946 errno = EINVAL; 947 return NULL; 948 } 949 950 static int 951 nm_close(struct nm_desc *d) 952 { 953 /* 954 * ugly trick to avoid unused warnings 955 */ 956 static void *__xxzt[] __attribute__ ((unused)) = 957 { (void *)nm_open, (void *)nm_inject, 958 (void *)nm_dispatch, (void *)nm_nextpkt } ; 959 960 if (d == NULL || d->self != d) 961 return EINVAL; 962 if (d->done_mmap && d->mem) 963 munmap(d->mem, d->memsize); 964 if (d->fd != -1) { 965 close(d->fd); 966 } 967 968 bzero(d, sizeof(*d)); 969 free(d); 970 return 0; 971 } 972 973 static int 974 nm_mmap(struct nm_desc *d, const struct nm_desc *parent) 975 { 976 if (d->done_mmap) 977 return 0; 978 979 if (IS_NETMAP_DESC(parent) && parent->mem && 980 parent->req.nr_arg2 == d->req.nr_arg2) { 981 /* do not mmap, inherit from parent */ 982 D("do not mmap, inherit from parent"); 983 d->memsize = parent->memsize; 984 d->mem = parent->mem; 985 } else { 986 /* XXX TODO: check if memsize is too large (or there is overflow) */ 987 d->memsize = d->req.nr_memsize; 988 d->mem = mmap(0, d->memsize, PROT_WRITE | PROT_READ, MAP_SHARED, 989 d->fd, 0); 990 if (d->mem == MAP_FAILED) { 991 goto fail; 992 } 993 d->done_mmap = 1; 994 } 995 { 996 struct netmap_if *nifp = NETMAP_IF(d->mem, d->req.nr_offset); 997 struct netmap_ring *r = NETMAP_RXRING(nifp, d->first_rx_ring); 998 if ((void *)r == (void *)nifp) { 999 /* the descriptor is open for TX only */ 1000 r = NETMAP_TXRING(nifp, d->first_tx_ring); 1001 } 1002 1003 *(struct netmap_if **)(uintptr_t)&(d->nifp) = nifp; 1004 *(struct netmap_ring **)(uintptr_t)&d->some_ring = r; 1005 *(void **)(uintptr_t)&d->buf_start = NETMAP_BUF(r, 0); 1006 *(void **)(uintptr_t)&d->buf_end = 1007 (char *)d->mem + d->memsize; 1008 } 1009 1010 return 0; 1011 1012 fail: 1013 return EINVAL; 1014 } 1015 1016 /* 1017 * Same prototype as pcap_inject(), only need to cast. 1018 */ 1019 static int 1020 nm_inject(struct nm_desc *d, const void *buf, size_t size) 1021 { 1022 u_int c, n = d->last_tx_ring - d->first_tx_ring + 1, 1023 ri = d->cur_tx_ring; 1024 1025 for (c = 0; c < n ; c++, ri++) { 1026 /* compute current ring to use */ 1027 struct netmap_ring *ring; 1028 uint32_t i, j, idx; 1029 size_t rem; 1030 1031 if (ri > d->last_tx_ring) 1032 ri = d->first_tx_ring; 1033 ring = NETMAP_TXRING(d->nifp, ri); 1034 rem = size; 1035 j = ring->cur; 1036 while (rem > ring->nr_buf_size && j != ring->tail) { 1037 rem -= ring->nr_buf_size; 1038 j = nm_ring_next(ring, j); 1039 } 1040 if (j == ring->tail && rem > 0) 1041 continue; 1042 i = ring->cur; 1043 while (i != j) { 1044 idx = ring->slot[i].buf_idx; 1045 ring->slot[i].len = ring->nr_buf_size; 1046 ring->slot[i].flags = NS_MOREFRAG; 1047 nm_pkt_copy(buf, NETMAP_BUF(ring, idx), ring->nr_buf_size); 1048 i = nm_ring_next(ring, i); 1049 buf = (const char *)buf + ring->nr_buf_size; 1050 } 1051 idx = ring->slot[i].buf_idx; 1052 ring->slot[i].len = rem; 1053 ring->slot[i].flags = 0; 1054 nm_pkt_copy(buf, NETMAP_BUF(ring, idx), rem); 1055 ring->head = ring->cur = nm_ring_next(ring, i); 1056 d->cur_tx_ring = ri; 1057 return size; 1058 } 1059 return 0; /* fail */ 1060 } 1061 1062 /* 1063 * Same prototype as pcap_dispatch(), only need to cast. 1064 */ 1065 static int 1066 nm_dispatch(struct nm_desc *d, int cnt, nm_cb_t cb, u_char *arg) 1067 { 1068 int n = d->last_rx_ring - d->first_rx_ring + 1; 1069 int c, got = 0, ri = d->cur_rx_ring; 1070 d->hdr.buf = NULL; 1071 d->hdr.flags = NM_MORE_PKTS; 1072 d->hdr.d = d; 1073 1074 if (cnt == 0) 1075 cnt = -1; 1076 /* cnt == -1 means infinite, but rings have a finite amount 1077 * of buffers and the int is large enough that we never wrap, 1078 * so we can omit checking for -1 1079 */ 1080 for (c=0; c < n && cnt != got; c++, ri++) { 1081 /* compute current ring to use */ 1082 struct netmap_ring *ring; 1083 1084 if (ri > d->last_rx_ring) 1085 ri = d->first_rx_ring; 1086 ring = NETMAP_RXRING(d->nifp, ri); 1087 for ( ; !nm_ring_empty(ring) && cnt != got; got++) { 1088 u_int idx, i; 1089 u_char *oldbuf; 1090 struct netmap_slot *slot; 1091 if (d->hdr.buf) { /* from previous round */ 1092 cb(arg, &d->hdr, d->hdr.buf); 1093 } 1094 i = ring->cur; 1095 slot = &ring->slot[i]; 1096 idx = slot->buf_idx; 1097 /* d->cur_rx_ring doesn't change inside this loop, but 1098 * set it here, so it reflects d->hdr.buf's ring */ 1099 d->cur_rx_ring = ri; 1100 d->hdr.slot = slot; 1101 oldbuf = d->hdr.buf = (u_char *)NETMAP_BUF(ring, idx); 1102 // __builtin_prefetch(buf); 1103 d->hdr.len = d->hdr.caplen = slot->len; 1104 while (slot->flags & NS_MOREFRAG) { 1105 u_char *nbuf; 1106 u_int oldlen = slot->len; 1107 i = nm_ring_next(ring, i); 1108 slot = &ring->slot[i]; 1109 d->hdr.len += slot->len; 1110 nbuf = (u_char *)NETMAP_BUF(ring, slot->buf_idx); 1111 if (oldbuf != NULL && nbuf - oldbuf == (int)ring->nr_buf_size && 1112 oldlen == ring->nr_buf_size) { 1113 d->hdr.caplen += slot->len; 1114 oldbuf = nbuf; 1115 } else { 1116 oldbuf = NULL; 1117 } 1118 } 1119 d->hdr.ts = ring->ts; 1120 ring->head = ring->cur = nm_ring_next(ring, i); 1121 } 1122 } 1123 if (d->hdr.buf) { /* from previous round */ 1124 d->hdr.flags = 0; 1125 cb(arg, &d->hdr, d->hdr.buf); 1126 } 1127 return got; 1128 } 1129 1130 static u_char * 1131 nm_nextpkt(struct nm_desc *d, struct nm_pkthdr *hdr) 1132 { 1133 int ri = d->cur_rx_ring; 1134 1135 do { 1136 /* compute current ring to use */ 1137 struct netmap_ring *ring = NETMAP_RXRING(d->nifp, ri); 1138 if (!nm_ring_empty(ring)) { 1139 u_int i = ring->cur; 1140 u_int idx = ring->slot[i].buf_idx; 1141 u_char *buf = (u_char *)NETMAP_BUF(ring, idx); 1142 1143 // __builtin_prefetch(buf); 1144 hdr->ts = ring->ts; 1145 hdr->len = hdr->caplen = ring->slot[i].len; 1146 ring->cur = nm_ring_next(ring, i); 1147 /* we could postpone advancing head if we want 1148 * to hold the buffer. This can be supported in 1149 * the future. 1150 */ 1151 ring->head = ring->cur; 1152 d->cur_rx_ring = ri; 1153 return buf; 1154 } 1155 ri++; 1156 if (ri > d->last_rx_ring) 1157 ri = d->first_rx_ring; 1158 } while (ri != d->cur_rx_ring); 1159 return NULL; /* nothing found */ 1160 } 1161 1162 #endif /* !HAVE_NETMAP_WITH_LIBS */ 1163 1164 #endif /* NETMAP_WITH_LIBS */ 1165 1166 #endif /* _NET_NETMAP_USER_H_ */ 1167