1 /*- 2 * SPDX-License-Identifier: BSD-2-Clause-FreeBSD 3 * 4 * Copyright (C) 2011-2016 Universita` di Pisa 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 18 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 20 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 21 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 23 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 27 * SUCH DAMAGE. 28 */ 29 30 /* 31 * $FreeBSD$ 32 * 33 * Functions and macros to manipulate netmap structures and packets 34 * in userspace. See netmap(4) for more information. 35 * 36 * The address of the struct netmap_if, say nifp, is computed from the 37 * value returned from ioctl(.., NIOCREG, ...) and the mmap region: 38 * ioctl(fd, NIOCREG, &req); 39 * mem = mmap(0, ... ); 40 * nifp = NETMAP_IF(mem, req.nr_nifp); 41 * (so simple, we could just do it manually) 42 * 43 * From there: 44 * struct netmap_ring *NETMAP_TXRING(nifp, index) 45 * struct netmap_ring *NETMAP_RXRING(nifp, index) 46 * we can access ring->cur, ring->head, ring->tail, etc. 47 * 48 * ring->slot[i] gives us the i-th slot (we can access 49 * directly len, flags, buf_idx) 50 * 51 * char *buf = NETMAP_BUF(ring, x) returns a pointer to 52 * the buffer numbered x 53 * 54 * All ring indexes (head, cur, tail) should always move forward. 55 * To compute the next index in a circular ring you can use 56 * i = nm_ring_next(ring, i); 57 * 58 * To ease porting apps from pcap to netmap we supply a few fuctions 59 * that can be called to open, close, read and write on netmap in a way 60 * similar to libpcap. Note that the read/write function depend on 61 * an ioctl()/select()/poll() being issued to refill rings or push 62 * packets out. 63 * 64 * In order to use these, include #define NETMAP_WITH_LIBS 65 * in the source file that invokes these functions. 66 */ 67 68 #ifndef _NET_NETMAP_USER_H_ 69 #define _NET_NETMAP_USER_H_ 70 71 #define NETMAP_DEVICE_NAME "/dev/netmap" 72 73 #ifdef __CYGWIN__ 74 /* 75 * we can compile userspace apps with either cygwin or msvc, 76 * and we use _WIN32 to identify windows specific code 77 */ 78 #ifndef _WIN32 79 #define _WIN32 80 #endif /* _WIN32 */ 81 82 #endif /* __CYGWIN__ */ 83 84 #ifdef _WIN32 85 #undef NETMAP_DEVICE_NAME 86 #define NETMAP_DEVICE_NAME "/proc/sys/DosDevices/Global/netmap" 87 #include <windows.h> 88 #include <WinDef.h> 89 #include <sys/cygwin.h> 90 #endif /* _WIN32 */ 91 92 #include <stdint.h> 93 #include <sys/socket.h> /* apple needs sockaddr */ 94 #include <net/if.h> /* IFNAMSIZ */ 95 #include <ctype.h> 96 97 #ifndef likely 98 #define likely(x) __builtin_expect(!!(x), 1) 99 #define unlikely(x) __builtin_expect(!!(x), 0) 100 #endif /* likely and unlikely */ 101 102 #include <net/netmap.h> 103 104 /* helper macro */ 105 #define _NETMAP_OFFSET(type, ptr, offset) \ 106 ((type)(void *)((char *)(ptr) + (offset))) 107 108 #define NETMAP_IF(_base, _ofs) _NETMAP_OFFSET(struct netmap_if *, _base, _ofs) 109 110 #define NETMAP_TXRING(nifp, index) _NETMAP_OFFSET(struct netmap_ring *, \ 111 nifp, (nifp)->ring_ofs[index] ) 112 113 #define NETMAP_RXRING(nifp, index) _NETMAP_OFFSET(struct netmap_ring *, \ 114 nifp, (nifp)->ring_ofs[index + (nifp)->ni_tx_rings + 1] ) 115 116 #define NETMAP_BUF(ring, index) \ 117 ((char *)(ring) + (ring)->buf_ofs + ((index)*(ring)->nr_buf_size)) 118 119 #define NETMAP_BUF_IDX(ring, buf) \ 120 ( ((char *)(buf) - ((char *)(ring) + (ring)->buf_ofs) ) / \ 121 (ring)->nr_buf_size ) 122 123 124 static inline uint32_t 125 nm_ring_next(struct netmap_ring *r, uint32_t i) 126 { 127 return ( unlikely(i + 1 == r->num_slots) ? 0 : i + 1); 128 } 129 130 131 /* 132 * Return 1 if we have pending transmissions in the tx ring. 133 * When everything is complete ring->head = ring->tail + 1 (modulo ring size) 134 */ 135 static inline int 136 nm_tx_pending(struct netmap_ring *r) 137 { 138 return nm_ring_next(r, r->tail) != r->head; 139 } 140 141 142 static inline uint32_t 143 nm_ring_space(struct netmap_ring *ring) 144 { 145 int ret = ring->tail - ring->cur; 146 if (ret < 0) 147 ret += ring->num_slots; 148 return ret; 149 } 150 151 152 #ifdef NETMAP_WITH_LIBS 153 /* 154 * Support for simple I/O libraries. 155 * Include other system headers required for compiling this. 156 */ 157 158 #ifndef HAVE_NETMAP_WITH_LIBS 159 #define HAVE_NETMAP_WITH_LIBS 160 161 #include <stdio.h> 162 #include <sys/time.h> 163 #include <sys/mman.h> 164 #include <string.h> /* memset */ 165 #include <sys/ioctl.h> 166 #include <sys/errno.h> /* EINVAL */ 167 #include <fcntl.h> /* O_RDWR */ 168 #include <unistd.h> /* close() */ 169 #include <signal.h> 170 #include <stdlib.h> 171 172 #ifndef ND /* debug macros */ 173 /* debug support */ 174 #define ND(_fmt, ...) do {} while(0) 175 #define D(_fmt, ...) \ 176 do { \ 177 struct timeval _t0; \ 178 gettimeofday(&_t0, NULL); \ 179 fprintf(stderr, "%03d.%06d %s [%d] " _fmt "\n", \ 180 (int)(_t0.tv_sec % 1000), (int)_t0.tv_usec, \ 181 __FUNCTION__, __LINE__, ##__VA_ARGS__); \ 182 } while (0) 183 184 /* Rate limited version of "D", lps indicates how many per second */ 185 #define RD(lps, format, ...) \ 186 do { \ 187 static int __t0, __cnt; \ 188 struct timeval __xxts; \ 189 gettimeofday(&__xxts, NULL); \ 190 if (__t0 != __xxts.tv_sec) { \ 191 __t0 = __xxts.tv_sec; \ 192 __cnt = 0; \ 193 } \ 194 if (__cnt++ < lps) { \ 195 D(format, ##__VA_ARGS__); \ 196 } \ 197 } while (0) 198 #endif 199 200 struct nm_pkthdr { /* first part is the same as pcap_pkthdr */ 201 struct timeval ts; 202 uint32_t caplen; 203 uint32_t len; 204 205 uint64_t flags; /* NM_MORE_PKTS etc */ 206 #define NM_MORE_PKTS 1 207 struct nm_desc *d; 208 struct netmap_slot *slot; 209 uint8_t *buf; 210 }; 211 212 struct nm_stat { /* same as pcap_stat */ 213 u_int ps_recv; 214 u_int ps_drop; 215 u_int ps_ifdrop; 216 #ifdef WIN32 /* XXX or _WIN32 ? */ 217 u_int bs_capt; 218 #endif /* WIN32 */ 219 }; 220 221 #define NM_ERRBUF_SIZE 512 222 223 struct nm_desc { 224 struct nm_desc *self; /* point to self if netmap. */ 225 int fd; 226 void *mem; 227 uint32_t memsize; 228 int done_mmap; /* set if mem is the result of mmap */ 229 struct netmap_if * const nifp; 230 uint16_t first_tx_ring, last_tx_ring, cur_tx_ring; 231 uint16_t first_rx_ring, last_rx_ring, cur_rx_ring; 232 struct nmreq req; /* also contains the nr_name = ifname */ 233 struct nm_pkthdr hdr; 234 235 /* 236 * The memory contains netmap_if, rings and then buffers. 237 * Given a pointer (e.g. to nm_inject) we can compare with 238 * mem/buf_start/buf_end to tell if it is a buffer or 239 * some other descriptor in our region. 240 * We also store a pointer to some ring as it helps in the 241 * translation from buffer indexes to addresses. 242 */ 243 struct netmap_ring * const some_ring; 244 void * const buf_start; 245 void * const buf_end; 246 /* parameters from pcap_open_live */ 247 int snaplen; 248 int promisc; 249 int to_ms; 250 char *errbuf; 251 252 /* save flags so we can restore them on close */ 253 uint32_t if_flags; 254 uint32_t if_reqcap; 255 uint32_t if_curcap; 256 257 struct nm_stat st; 258 char msg[NM_ERRBUF_SIZE]; 259 }; 260 261 /* 262 * when the descriptor is open correctly, d->self == d 263 * Eventually we should also use some magic number. 264 */ 265 #define P2NMD(p) ((struct nm_desc *)(p)) 266 #define IS_NETMAP_DESC(d) ((d) && P2NMD(d)->self == P2NMD(d)) 267 #define NETMAP_FD(d) (P2NMD(d)->fd) 268 269 270 /* 271 * this is a slightly optimized copy routine which rounds 272 * to multiple of 64 bytes and is often faster than dealing 273 * with other odd sizes. We assume there is enough room 274 * in the source and destination buffers. 275 * 276 * XXX only for multiples of 64 bytes, non overlapped. 277 */ 278 static inline void 279 nm_pkt_copy(const void *_src, void *_dst, int l) 280 { 281 const uint64_t *src = (const uint64_t *)_src; 282 uint64_t *dst = (uint64_t *)_dst; 283 284 if (unlikely(l >= 1024)) { 285 memcpy(dst, src, l); 286 return; 287 } 288 for (; likely(l > 0); l-=64) { 289 *dst++ = *src++; 290 *dst++ = *src++; 291 *dst++ = *src++; 292 *dst++ = *src++; 293 *dst++ = *src++; 294 *dst++ = *src++; 295 *dst++ = *src++; 296 *dst++ = *src++; 297 } 298 } 299 300 301 /* 302 * The callback, invoked on each received packet. Same as libpcap 303 */ 304 typedef void (*nm_cb_t)(u_char *, const struct nm_pkthdr *, const u_char *d); 305 306 /* 307 *--- the pcap-like API --- 308 * 309 * nm_open() opens a file descriptor, binds to a port and maps memory. 310 * 311 * ifname (netmap:foo or vale:foo) is the port name 312 * a suffix can indicate the follwing: 313 * ^ bind the host (sw) ring pair 314 * * bind host and NIC ring pairs 315 * -NN bind individual NIC ring pair 316 * {NN bind master side of pipe NN 317 * }NN bind slave side of pipe NN 318 * a suffix starting with / and the following flags, 319 * in any order: 320 * x exclusive access 321 * z zero copy monitor (both tx and rx) 322 * t monitor tx side (copy monitor) 323 * r monitor rx side (copy monitor) 324 * R bind only RX ring(s) 325 * T bind only TX ring(s) 326 * 327 * req provides the initial values of nmreq before parsing ifname. 328 * Remember that the ifname parsing will override the ring 329 * number in nm_ringid, and part of nm_flags; 330 * flags special functions, normally 0 331 * indicates which fields of *arg are significant 332 * arg special functions, normally NULL 333 * if passed a netmap_desc with mem != NULL, 334 * use that memory instead of mmap. 335 */ 336 337 static struct nm_desc *nm_open(const char *ifname, const struct nmreq *req, 338 uint64_t flags, const struct nm_desc *arg); 339 340 /* 341 * nm_open can import some fields from the parent descriptor. 342 * These flags control which ones. 343 * Also in flags you can specify NETMAP_NO_TX_POLL and NETMAP_DO_RX_POLL, 344 * which set the initial value for these flags. 345 * Note that the 16 low bits of the flags are reserved for data 346 * that may go into the nmreq. 347 */ 348 enum { 349 NM_OPEN_NO_MMAP = 0x040000, /* reuse mmap from parent */ 350 NM_OPEN_IFNAME = 0x080000, /* nr_name, nr_ringid, nr_flags */ 351 NM_OPEN_ARG1 = 0x100000, 352 NM_OPEN_ARG2 = 0x200000, 353 NM_OPEN_ARG3 = 0x400000, 354 NM_OPEN_RING_CFG = 0x800000, /* tx|rx rings|slots */ 355 }; 356 357 358 /* 359 * nm_close() closes and restores the port to its previous state 360 */ 361 362 static int nm_close(struct nm_desc *); 363 364 /* 365 * nm_mmap() do mmap or inherit from parent if the nr_arg2 366 * (memory block) matches. 367 */ 368 369 static int nm_mmap(struct nm_desc *, const struct nm_desc *); 370 371 /* 372 * nm_inject() is the same as pcap_inject() 373 * nm_dispatch() is the same as pcap_dispatch() 374 * nm_nextpkt() is the same as pcap_next() 375 */ 376 377 static int nm_inject(struct nm_desc *, const void *, size_t); 378 static int nm_dispatch(struct nm_desc *, int, nm_cb_t, u_char *); 379 static u_char *nm_nextpkt(struct nm_desc *, struct nm_pkthdr *); 380 381 #ifdef _WIN32 382 383 intptr_t _get_osfhandle(int); /* defined in io.h in windows */ 384 385 /* 386 * In windows we do not have yet native poll support, so we keep track 387 * of file descriptors associated to netmap ports to emulate poll on 388 * them and fall back on regular poll on other file descriptors. 389 */ 390 struct win_netmap_fd_list { 391 struct win_netmap_fd_list *next; 392 int win_netmap_fd; 393 HANDLE win_netmap_handle; 394 }; 395 396 /* 397 * list head containing all the netmap opened fd and their 398 * windows HANDLE counterparts 399 */ 400 static struct win_netmap_fd_list *win_netmap_fd_list_head; 401 402 static void 403 win_insert_fd_record(int fd) 404 { 405 struct win_netmap_fd_list *curr; 406 407 for (curr = win_netmap_fd_list_head; curr; curr = curr->next) { 408 if (fd == curr->win_netmap_fd) { 409 return; 410 } 411 } 412 curr = calloc(1, sizeof(*curr)); 413 curr->next = win_netmap_fd_list_head; 414 curr->win_netmap_fd = fd; 415 curr->win_netmap_handle = IntToPtr(_get_osfhandle(fd)); 416 win_netmap_fd_list_head = curr; 417 } 418 419 void 420 win_remove_fd_record(int fd) 421 { 422 struct win_netmap_fd_list *curr = win_netmap_fd_list_head; 423 struct win_netmap_fd_list *prev = NULL; 424 for (; curr ; prev = curr, curr = curr->next) { 425 if (fd != curr->win_netmap_fd) 426 continue; 427 /* found the entry */ 428 if (prev == NULL) { /* we are freeing the first entry */ 429 win_netmap_fd_list_head = curr->next; 430 } else { 431 prev->next = curr->next; 432 } 433 free(curr); 434 break; 435 } 436 } 437 438 439 HANDLE 440 win_get_netmap_handle(int fd) 441 { 442 struct win_netmap_fd_list *curr; 443 444 for (curr = win_netmap_fd_list_head; curr; curr = curr->next) { 445 if (fd == curr->win_netmap_fd) { 446 return curr->win_netmap_handle; 447 } 448 } 449 return NULL; 450 } 451 452 /* 453 * we need to wrap ioctl and mmap, at least for the netmap file descriptors 454 */ 455 456 /* 457 * use this function only from netmap_user.h internal functions 458 * same as ioctl, returns 0 on success and -1 on error 459 */ 460 static int 461 win_nm_ioctl_internal(HANDLE h, int32_t ctlCode, void *arg) 462 { 463 DWORD bReturn = 0, szIn, szOut; 464 BOOL ioctlReturnStatus; 465 void *inParam = arg, *outParam = arg; 466 467 switch (ctlCode) { 468 case NETMAP_POLL: 469 szIn = sizeof(POLL_REQUEST_DATA); 470 szOut = sizeof(POLL_REQUEST_DATA); 471 break; 472 case NETMAP_MMAP: 473 szIn = 0; 474 szOut = sizeof(void*); 475 inParam = NULL; /* nothing on input */ 476 break; 477 case NIOCTXSYNC: 478 case NIOCRXSYNC: 479 szIn = 0; 480 szOut = 0; 481 break; 482 case NIOCREGIF: 483 szIn = sizeof(struct nmreq); 484 szOut = sizeof(struct nmreq); 485 break; 486 case NIOCCONFIG: 487 D("unsupported NIOCCONFIG!"); 488 return -1; 489 490 default: /* a regular ioctl */ 491 D("invalid ioctl %x on netmap fd", ctlCode); 492 return -1; 493 } 494 495 ioctlReturnStatus = DeviceIoControl(h, 496 ctlCode, inParam, szIn, 497 outParam, szOut, 498 &bReturn, NULL); 499 // XXX note windows returns 0 on error or async call, 1 on success 500 // we could call GetLastError() to figure out what happened 501 return ioctlReturnStatus ? 0 : -1; 502 } 503 504 /* 505 * this function is what must be called from user-space programs 506 * same as ioctl, returns 0 on success and -1 on error 507 */ 508 static int 509 win_nm_ioctl(int fd, int32_t ctlCode, void *arg) 510 { 511 HANDLE h = win_get_netmap_handle(fd); 512 513 if (h == NULL) { 514 return ioctl(fd, ctlCode, arg); 515 } else { 516 return win_nm_ioctl_internal(h, ctlCode, arg); 517 } 518 } 519 520 #define ioctl win_nm_ioctl /* from now on, within this file ... */ 521 522 /* 523 * We cannot use the native mmap on windows 524 * The only parameter used is "fd", the other ones are just declared to 525 * make this signature comparable to the FreeBSD/Linux one 526 */ 527 static void * 528 win32_mmap_emulated(void *addr, size_t length, int prot, int flags, int fd, int32_t offset) 529 { 530 HANDLE h = win_get_netmap_handle(fd); 531 532 if (h == NULL) { 533 return mmap(addr, length, prot, flags, fd, offset); 534 } else { 535 MEMORY_ENTRY ret; 536 537 return win_nm_ioctl_internal(h, NETMAP_MMAP, &ret) ? 538 NULL : ret.pUsermodeVirtualAddress; 539 } 540 } 541 542 #define mmap win32_mmap_emulated 543 544 #include <sys/poll.h> /* XXX needed to use the structure pollfd */ 545 546 static int 547 win_nm_poll(struct pollfd *fds, int nfds, int timeout) 548 { 549 HANDLE h; 550 551 if (nfds != 1 || fds == NULL || (h = win_get_netmap_handle(fds->fd)) == NULL) {; 552 return poll(fds, nfds, timeout); 553 } else { 554 POLL_REQUEST_DATA prd; 555 556 prd.timeout = timeout; 557 prd.events = fds->events; 558 559 win_nm_ioctl_internal(h, NETMAP_POLL, &prd); 560 if ((prd.revents == POLLERR) || (prd.revents == STATUS_TIMEOUT)) { 561 return -1; 562 } 563 return 1; 564 } 565 } 566 567 #define poll win_nm_poll 568 569 static int 570 win_nm_open(char* pathname, int flags) 571 { 572 573 if (strcmp(pathname, NETMAP_DEVICE_NAME) == 0) { 574 int fd = open(NETMAP_DEVICE_NAME, O_RDWR); 575 if (fd < 0) { 576 return -1; 577 } 578 579 win_insert_fd_record(fd); 580 return fd; 581 } else { 582 return open(pathname, flags); 583 } 584 } 585 586 #define open win_nm_open 587 588 static int 589 win_nm_close(int fd) 590 { 591 if (fd != -1) { 592 close(fd); 593 if (win_get_netmap_handle(fd) != NULL) { 594 win_remove_fd_record(fd); 595 } 596 } 597 return 0; 598 } 599 600 #define close win_nm_close 601 602 #endif /* _WIN32 */ 603 604 static int 605 nm_is_identifier(const char *s, const char *e) 606 { 607 for (; s != e; s++) { 608 if (!isalnum(*s) && *s != '_') { 609 return 0; 610 } 611 } 612 613 return 1; 614 } 615 616 /* 617 * Try to open, return descriptor if successful, NULL otherwise. 618 * An invalid netmap name will return errno = 0; 619 * You can pass a pointer to a pre-filled nm_desc to add special 620 * parameters. Flags is used as follows 621 * NM_OPEN_NO_MMAP use the memory from arg, only XXX avoid mmap 622 * if the nr_arg2 (memory block) matches. 623 * NM_OPEN_ARG1 use req.nr_arg1 from arg 624 * NM_OPEN_ARG2 use req.nr_arg2 from arg 625 * NM_OPEN_RING_CFG user ring config from arg 626 */ 627 static struct nm_desc * 628 nm_open(const char *ifname, const struct nmreq *req, 629 uint64_t new_flags, const struct nm_desc *arg) 630 { 631 struct nm_desc *d = NULL; 632 const struct nm_desc *parent = arg; 633 u_int namelen; 634 uint32_t nr_ringid = 0, nr_flags, nr_reg; 635 const char *port = NULL; 636 const char *vpname = NULL; 637 #define MAXERRMSG 80 638 char errmsg[MAXERRMSG] = ""; 639 enum { P_START, P_RNGSFXOK, P_GETNUM, P_FLAGS, P_FLAGSOK, P_MEMID } p_state; 640 int is_vale; 641 long num; 642 uint16_t nr_arg2 = 0; 643 644 if (strncmp(ifname, "netmap:", 7) && 645 strncmp(ifname, NM_BDG_NAME, strlen(NM_BDG_NAME))) { 646 errno = 0; /* name not recognised, not an error */ 647 return NULL; 648 } 649 650 is_vale = (ifname[0] == 'v'); 651 if (is_vale) { 652 port = index(ifname, ':'); 653 if (port == NULL) { 654 snprintf(errmsg, MAXERRMSG, 655 "missing ':' in vale name"); 656 goto fail; 657 } 658 659 if (!nm_is_identifier(ifname + 4, port)) { 660 snprintf(errmsg, MAXERRMSG, "invalid bridge name"); 661 goto fail; 662 } 663 664 vpname = ++port; 665 } else { 666 ifname += 7; 667 port = ifname; 668 } 669 670 /* scan for a separator */ 671 for (; *port && !index("-*^{}/@", *port); port++) 672 ; 673 674 if (is_vale && !nm_is_identifier(vpname, port)) { 675 snprintf(errmsg, MAXERRMSG, "invalid bridge port name"); 676 goto fail; 677 } 678 679 namelen = port - ifname; 680 if (namelen >= sizeof(d->req.nr_name)) { 681 snprintf(errmsg, MAXERRMSG, "name too long"); 682 goto fail; 683 } 684 p_state = P_START; 685 nr_flags = NR_REG_ALL_NIC; /* default for no suffix */ 686 while (*port) { 687 switch (p_state) { 688 case P_START: 689 switch (*port) { 690 case '^': /* only SW ring */ 691 nr_flags = NR_REG_SW; 692 p_state = P_RNGSFXOK; 693 break; 694 case '*': /* NIC and SW */ 695 nr_flags = NR_REG_NIC_SW; 696 p_state = P_RNGSFXOK; 697 break; 698 case '-': /* one NIC ring pair */ 699 nr_flags = NR_REG_ONE_NIC; 700 p_state = P_GETNUM; 701 break; 702 case '{': /* pipe (master endpoint) */ 703 nr_flags = NR_REG_PIPE_MASTER; 704 p_state = P_GETNUM; 705 break; 706 case '}': /* pipe (slave endoint) */ 707 nr_flags = NR_REG_PIPE_SLAVE; 708 p_state = P_GETNUM; 709 break; 710 case '/': /* start of flags */ 711 p_state = P_FLAGS; 712 break; 713 case '@': /* start of memid */ 714 p_state = P_MEMID; 715 break; 716 default: 717 snprintf(errmsg, MAXERRMSG, "unknown modifier: '%c'", *port); 718 goto fail; 719 } 720 port++; 721 break; 722 case P_RNGSFXOK: 723 switch (*port) { 724 case '/': 725 p_state = P_FLAGS; 726 break; 727 case '@': 728 p_state = P_MEMID; 729 break; 730 default: 731 snprintf(errmsg, MAXERRMSG, "unexpected character: '%c'", *port); 732 goto fail; 733 } 734 port++; 735 break; 736 case P_GETNUM: 737 num = strtol(port, (char **)&port, 10); 738 if (num < 0 || num >= NETMAP_RING_MASK) { 739 snprintf(errmsg, MAXERRMSG, "'%ld' out of range [0, %d)", 740 num, NETMAP_RING_MASK); 741 goto fail; 742 } 743 nr_ringid = num & NETMAP_RING_MASK; 744 p_state = P_RNGSFXOK; 745 break; 746 case P_FLAGS: 747 case P_FLAGSOK: 748 if (*port == '@') { 749 port++; 750 p_state = P_MEMID; 751 break; 752 } 753 switch (*port) { 754 case 'x': 755 nr_flags |= NR_EXCLUSIVE; 756 break; 757 case 'z': 758 nr_flags |= NR_ZCOPY_MON; 759 break; 760 case 't': 761 nr_flags |= NR_MONITOR_TX; 762 break; 763 case 'r': 764 nr_flags |= NR_MONITOR_RX; 765 break; 766 case 'R': 767 nr_flags |= NR_RX_RINGS_ONLY; 768 break; 769 case 'T': 770 nr_flags |= NR_TX_RINGS_ONLY; 771 break; 772 default: 773 snprintf(errmsg, MAXERRMSG, "unrecognized flag: '%c'", *port); 774 goto fail; 775 } 776 port++; 777 p_state = P_FLAGSOK; 778 break; 779 case P_MEMID: 780 if (nr_arg2 != 0) { 781 snprintf(errmsg, MAXERRMSG, "double setting of memid"); 782 goto fail; 783 } 784 num = strtol(port, (char **)&port, 10); 785 if (num <= 0) { 786 snprintf(errmsg, MAXERRMSG, "invalid memid %ld, must be >0", num); 787 goto fail; 788 } 789 nr_arg2 = num; 790 p_state = P_RNGSFXOK; 791 break; 792 } 793 } 794 if (p_state != P_START && p_state != P_RNGSFXOK && p_state != P_FLAGSOK) { 795 snprintf(errmsg, MAXERRMSG, "unexpected end of port name"); 796 goto fail; 797 } 798 ND("flags: %s %s %s %s", 799 (nr_flags & NR_EXCLUSIVE) ? "EXCLUSIVE" : "", 800 (nr_flags & NR_ZCOPY_MON) ? "ZCOPY_MON" : "", 801 (nr_flags & NR_MONITOR_TX) ? "MONITOR_TX" : "", 802 (nr_flags & NR_MONITOR_RX) ? "MONITOR_RX" : ""); 803 d = (struct nm_desc *)calloc(1, sizeof(*d)); 804 if (d == NULL) { 805 snprintf(errmsg, MAXERRMSG, "nm_desc alloc failure"); 806 errno = ENOMEM; 807 return NULL; 808 } 809 d->self = d; /* set this early so nm_close() works */ 810 d->fd = open(NETMAP_DEVICE_NAME, O_RDWR); 811 if (d->fd < 0) { 812 snprintf(errmsg, MAXERRMSG, "cannot open /dev/netmap: %s", strerror(errno)); 813 goto fail; 814 } 815 816 if (req) 817 d->req = *req; 818 d->req.nr_version = NETMAP_API; 819 d->req.nr_ringid &= ~NETMAP_RING_MASK; 820 821 /* these fields are overridden by ifname and flags processing */ 822 d->req.nr_ringid |= nr_ringid; 823 d->req.nr_flags |= nr_flags; 824 if (nr_arg2) 825 d->req.nr_arg2 = nr_arg2; 826 memcpy(d->req.nr_name, ifname, namelen); 827 d->req.nr_name[namelen] = '\0'; 828 /* optionally import info from parent */ 829 if (IS_NETMAP_DESC(parent) && new_flags) { 830 if (new_flags & NM_OPEN_ARG1) 831 D("overriding ARG1 %d", parent->req.nr_arg1); 832 d->req.nr_arg1 = new_flags & NM_OPEN_ARG1 ? 833 parent->req.nr_arg1 : 4; 834 if (new_flags & NM_OPEN_ARG2) 835 D("overriding ARG2 %d", parent->req.nr_arg2); 836 d->req.nr_arg2 = new_flags & NM_OPEN_ARG2 ? 837 parent->req.nr_arg2 : 0; 838 if (new_flags & NM_OPEN_ARG3) 839 D("overriding ARG3 %d", parent->req.nr_arg3); 840 d->req.nr_arg3 = new_flags & NM_OPEN_ARG3 ? 841 parent->req.nr_arg3 : 0; 842 if (new_flags & NM_OPEN_RING_CFG) { 843 D("overriding RING_CFG"); 844 d->req.nr_tx_slots = parent->req.nr_tx_slots; 845 d->req.nr_rx_slots = parent->req.nr_rx_slots; 846 d->req.nr_tx_rings = parent->req.nr_tx_rings; 847 d->req.nr_rx_rings = parent->req.nr_rx_rings; 848 } 849 if (new_flags & NM_OPEN_IFNAME) { 850 D("overriding ifname %s ringid 0x%x flags 0x%x", 851 parent->req.nr_name, parent->req.nr_ringid, 852 parent->req.nr_flags); 853 memcpy(d->req.nr_name, parent->req.nr_name, 854 sizeof(d->req.nr_name)); 855 d->req.nr_ringid = parent->req.nr_ringid; 856 d->req.nr_flags = parent->req.nr_flags; 857 } 858 } 859 /* add the *XPOLL flags */ 860 d->req.nr_ringid |= new_flags & (NETMAP_NO_TX_POLL | NETMAP_DO_RX_POLL); 861 862 if (ioctl(d->fd, NIOCREGIF, &d->req)) { 863 snprintf(errmsg, MAXERRMSG, "NIOCREGIF failed: %s", strerror(errno)); 864 goto fail; 865 } 866 867 /* if parent is defined, do nm_mmap() even if NM_OPEN_NO_MMAP is set */ 868 if ((!(new_flags & NM_OPEN_NO_MMAP) || parent) && nm_mmap(d, parent)) { 869 snprintf(errmsg, MAXERRMSG, "mmap failed: %s", strerror(errno)); 870 goto fail; 871 } 872 873 nr_reg = d->req.nr_flags & NR_REG_MASK; 874 875 if (nr_reg == NR_REG_SW) { /* host stack */ 876 d->first_tx_ring = d->last_tx_ring = d->req.nr_tx_rings; 877 d->first_rx_ring = d->last_rx_ring = d->req.nr_rx_rings; 878 } else if (nr_reg == NR_REG_ALL_NIC) { /* only nic */ 879 d->first_tx_ring = 0; 880 d->first_rx_ring = 0; 881 d->last_tx_ring = d->req.nr_tx_rings - 1; 882 d->last_rx_ring = d->req.nr_rx_rings - 1; 883 } else if (nr_reg == NR_REG_NIC_SW) { 884 d->first_tx_ring = 0; 885 d->first_rx_ring = 0; 886 d->last_tx_ring = d->req.nr_tx_rings; 887 d->last_rx_ring = d->req.nr_rx_rings; 888 } else if (nr_reg == NR_REG_ONE_NIC) { 889 /* XXX check validity */ 890 d->first_tx_ring = d->last_tx_ring = 891 d->first_rx_ring = d->last_rx_ring = d->req.nr_ringid & NETMAP_RING_MASK; 892 } else { /* pipes */ 893 d->first_tx_ring = d->last_tx_ring = 0; 894 d->first_rx_ring = d->last_rx_ring = 0; 895 } 896 897 #ifdef DEBUG_NETMAP_USER 898 { /* debugging code */ 899 int i; 900 901 D("%s tx %d .. %d %d rx %d .. %d %d", ifname, 902 d->first_tx_ring, d->last_tx_ring, d->req.nr_tx_rings, 903 d->first_rx_ring, d->last_rx_ring, d->req.nr_rx_rings); 904 for (i = 0; i <= d->req.nr_tx_rings; i++) { 905 struct netmap_ring *r = NETMAP_TXRING(d->nifp, i); 906 D("TX%d %p h %d c %d t %d", i, r, r->head, r->cur, r->tail); 907 } 908 for (i = 0; i <= d->req.nr_rx_rings; i++) { 909 struct netmap_ring *r = NETMAP_RXRING(d->nifp, i); 910 D("RX%d %p h %d c %d t %d", i, r, r->head, r->cur, r->tail); 911 } 912 } 913 #endif /* debugging */ 914 915 d->cur_tx_ring = d->first_tx_ring; 916 d->cur_rx_ring = d->first_rx_ring; 917 return d; 918 919 fail: 920 nm_close(d); 921 if (errmsg[0]) 922 D("%s %s", errmsg, ifname); 923 if (errno == 0) 924 errno = EINVAL; 925 return NULL; 926 } 927 928 929 static int 930 nm_close(struct nm_desc *d) 931 { 932 /* 933 * ugly trick to avoid unused warnings 934 */ 935 static void *__xxzt[] __attribute__ ((unused)) = 936 { (void *)nm_open, (void *)nm_inject, 937 (void *)nm_dispatch, (void *)nm_nextpkt } ; 938 939 if (d == NULL || d->self != d) 940 return EINVAL; 941 if (d->done_mmap && d->mem) 942 munmap(d->mem, d->memsize); 943 if (d->fd != -1) { 944 close(d->fd); 945 } 946 947 bzero(d, sizeof(*d)); 948 free(d); 949 return 0; 950 } 951 952 953 static int 954 nm_mmap(struct nm_desc *d, const struct nm_desc *parent) 955 { 956 //XXX TODO: check if mmap is already done 957 958 if (IS_NETMAP_DESC(parent) && parent->mem && 959 parent->req.nr_arg2 == d->req.nr_arg2) { 960 /* do not mmap, inherit from parent */ 961 D("do not mmap, inherit from parent"); 962 d->memsize = parent->memsize; 963 d->mem = parent->mem; 964 } else { 965 /* XXX TODO: check if memsize is too large (or there is overflow) */ 966 d->memsize = d->req.nr_memsize; 967 d->mem = mmap(0, d->memsize, PROT_WRITE | PROT_READ, MAP_SHARED, 968 d->fd, 0); 969 if (d->mem == MAP_FAILED) { 970 goto fail; 971 } 972 d->done_mmap = 1; 973 } 974 { 975 struct netmap_if *nifp = NETMAP_IF(d->mem, d->req.nr_offset); 976 struct netmap_ring *r = NETMAP_RXRING(nifp, ); 977 978 *(struct netmap_if **)(uintptr_t)&(d->nifp) = nifp; 979 *(struct netmap_ring **)(uintptr_t)&d->some_ring = r; 980 *(void **)(uintptr_t)&d->buf_start = NETMAP_BUF(r, 0); 981 *(void **)(uintptr_t)&d->buf_end = 982 (char *)d->mem + d->memsize; 983 } 984 985 return 0; 986 987 fail: 988 return EINVAL; 989 } 990 991 /* 992 * Same prototype as pcap_inject(), only need to cast. 993 */ 994 static int 995 nm_inject(struct nm_desc *d, const void *buf, size_t size) 996 { 997 u_int c, n = d->last_tx_ring - d->first_tx_ring + 1; 998 999 for (c = 0; c < n ; c++) { 1000 /* compute current ring to use */ 1001 struct netmap_ring *ring; 1002 uint32_t i, idx; 1003 uint32_t ri = d->cur_tx_ring + c; 1004 1005 if (ri > d->last_tx_ring) 1006 ri = d->first_tx_ring; 1007 ring = NETMAP_TXRING(d->nifp, ri); 1008 if (nm_ring_empty(ring)) { 1009 continue; 1010 } 1011 i = ring->cur; 1012 idx = ring->slot[i].buf_idx; 1013 ring->slot[i].len = size; 1014 nm_pkt_copy(buf, NETMAP_BUF(ring, idx), size); 1015 d->cur_tx_ring = ri; 1016 ring->head = ring->cur = nm_ring_next(ring, i); 1017 return size; 1018 } 1019 return 0; /* fail */ 1020 } 1021 1022 1023 /* 1024 * Same prototype as pcap_dispatch(), only need to cast. 1025 */ 1026 static int 1027 nm_dispatch(struct nm_desc *d, int cnt, nm_cb_t cb, u_char *arg) 1028 { 1029 int n = d->last_rx_ring - d->first_rx_ring + 1; 1030 int c, got = 0, ri = d->cur_rx_ring; 1031 d->hdr.buf = NULL; 1032 d->hdr.flags = NM_MORE_PKTS; 1033 d->hdr.d = d; 1034 1035 if (cnt == 0) 1036 cnt = -1; 1037 /* cnt == -1 means infinite, but rings have a finite amount 1038 * of buffers and the int is large enough that we never wrap, 1039 * so we can omit checking for -1 1040 */ 1041 for (c=0; c < n && cnt != got; c++) { 1042 /* compute current ring to use */ 1043 struct netmap_ring *ring; 1044 1045 ri = d->cur_rx_ring + c; 1046 if (ri > d->last_rx_ring) 1047 ri = d->first_rx_ring; 1048 ring = NETMAP_RXRING(d->nifp, ri); 1049 for ( ; !nm_ring_empty(ring) && cnt != got; got++) { 1050 u_int idx, i; 1051 if (d->hdr.buf) { /* from previous round */ 1052 cb(arg, &d->hdr, d->hdr.buf); 1053 } 1054 i = ring->cur; 1055 idx = ring->slot[i].buf_idx; 1056 d->hdr.slot = &ring->slot[i]; 1057 d->hdr.buf = (u_char *)NETMAP_BUF(ring, idx); 1058 // __builtin_prefetch(buf); 1059 d->hdr.len = d->hdr.caplen = ring->slot[i].len; 1060 d->hdr.ts = ring->ts; 1061 ring->head = ring->cur = nm_ring_next(ring, i); 1062 } 1063 } 1064 if (d->hdr.buf) { /* from previous round */ 1065 d->hdr.flags = 0; 1066 cb(arg, &d->hdr, d->hdr.buf); 1067 } 1068 d->cur_rx_ring = ri; 1069 return got; 1070 } 1071 1072 static u_char * 1073 nm_nextpkt(struct nm_desc *d, struct nm_pkthdr *hdr) 1074 { 1075 int ri = d->cur_rx_ring; 1076 1077 do { 1078 /* compute current ring to use */ 1079 struct netmap_ring *ring = NETMAP_RXRING(d->nifp, ri); 1080 if (!nm_ring_empty(ring)) { 1081 u_int i = ring->cur; 1082 u_int idx = ring->slot[i].buf_idx; 1083 u_char *buf = (u_char *)NETMAP_BUF(ring, idx); 1084 1085 // __builtin_prefetch(buf); 1086 hdr->ts = ring->ts; 1087 hdr->len = hdr->caplen = ring->slot[i].len; 1088 ring->cur = nm_ring_next(ring, i); 1089 /* we could postpone advancing head if we want 1090 * to hold the buffer. This can be supported in 1091 * the future. 1092 */ 1093 ring->head = ring->cur; 1094 d->cur_rx_ring = ri; 1095 return buf; 1096 } 1097 ri++; 1098 if (ri > d->last_rx_ring) 1099 ri = d->first_rx_ring; 1100 } while (ri != d->cur_rx_ring); 1101 return NULL; /* nothing found */ 1102 } 1103 1104 #endif /* !HAVE_NETMAP_WITH_LIBS */ 1105 1106 #endif /* NETMAP_WITH_LIBS */ 1107 1108 #endif /* _NET_NETMAP_USER_H_ */ 1109