1 /*- 2 * SPDX-License-Identifier: BSD-2-Clause 3 * 4 * Copyright (C) 2011-2016 Universita` di Pisa 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 18 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 20 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 21 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 23 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 27 * SUCH DAMAGE. 28 */ 29 30 /* 31 * 32 * Functions and macros to manipulate netmap structures and packets 33 * in userspace. See netmap(4) for more information. 34 * 35 * The address of the struct netmap_if, say nifp, is computed from the 36 * value returned from ioctl(.., NIOCREG, ...) and the mmap region: 37 * ioctl(fd, NIOCREG, &req); 38 * mem = mmap(0, ... ); 39 * nifp = NETMAP_IF(mem, req.nr_nifp); 40 * (so simple, we could just do it manually) 41 * 42 * From there: 43 * struct netmap_ring *NETMAP_TXRING(nifp, index) 44 * struct netmap_ring *NETMAP_RXRING(nifp, index) 45 * we can access ring->cur, ring->head, ring->tail, etc. 46 * 47 * ring->slot[i] gives us the i-th slot (we can access 48 * directly len, flags, buf_idx) 49 * 50 * char *buf = NETMAP_BUF(ring, x) returns a pointer to 51 * the buffer numbered x 52 * 53 * All ring indexes (head, cur, tail) should always move forward. 54 * To compute the next index in a circular ring you can use 55 * i = nm_ring_next(ring, i); 56 * 57 * To ease porting apps from pcap to netmap we supply a few functions 58 * that can be called to open, close, read and write on netmap in a way 59 * similar to libpcap. Note that the read/write function depend on 60 * an ioctl()/select()/poll() being issued to refill rings or push 61 * packets out. 62 * 63 * In order to use these, include #define NETMAP_WITH_LIBS 64 * in the source file that invokes these functions. 65 */ 66 67 #ifndef _NET_NETMAP_USER_H_ 68 #define _NET_NETMAP_USER_H_ 69 70 #define NETMAP_DEVICE_NAME "/dev/netmap" 71 72 #ifdef __CYGWIN__ 73 /* 74 * we can compile userspace apps with either cygwin or msvc, 75 * and we use _WIN32 to identify windows specific code 76 */ 77 #ifndef _WIN32 78 #define _WIN32 79 #endif /* _WIN32 */ 80 81 #endif /* __CYGWIN__ */ 82 83 #ifdef _WIN32 84 #undef NETMAP_DEVICE_NAME 85 #define NETMAP_DEVICE_NAME "/proc/sys/DosDevices/Global/netmap" 86 #include <windows.h> 87 #include <WinDef.h> 88 #include <sys/cygwin.h> 89 #endif /* _WIN32 */ 90 91 #include <stdint.h> 92 #include <sys/socket.h> /* apple needs sockaddr */ 93 #include <net/if.h> /* IFNAMSIZ */ 94 #include <ctype.h> 95 #include <string.h> /* memset */ 96 #include <sys/time.h> /* gettimeofday */ 97 98 #ifndef likely 99 #define likely(x) __builtin_expect(!!(x), 1) 100 #define unlikely(x) __builtin_expect(!!(x), 0) 101 #endif /* likely and unlikely */ 102 103 #include <net/netmap.h> 104 105 /* helper macro */ 106 #define _NETMAP_OFFSET(type, ptr, offset) \ 107 ((type)(void *)((char *)(ptr) + (offset))) 108 109 #define NETMAP_IF(_base, _ofs) _NETMAP_OFFSET(struct netmap_if *, _base, _ofs) 110 111 #define NETMAP_TXRING(nifp, index) _NETMAP_OFFSET(struct netmap_ring *, \ 112 nifp, (nifp)->ring_ofs[index] ) 113 114 #define NETMAP_RXRING(nifp, index) _NETMAP_OFFSET(struct netmap_ring *, \ 115 nifp, (nifp)->ring_ofs[index + (nifp)->ni_tx_rings + \ 116 (nifp)->ni_host_tx_rings] ) 117 118 #define NETMAP_BUF(ring, index) \ 119 ((char *)(ring) + (ring)->buf_ofs + ((size_t)(index)*(ring)->nr_buf_size)) 120 121 #define NETMAP_BUF_IDX(ring, buf) \ 122 ( ((char *)(buf) - ((char *)(ring) + (ring)->buf_ofs) ) / \ 123 (ring)->nr_buf_size ) 124 125 /* read the offset field in a ring's slot */ 126 #define NETMAP_ROFFSET(ring, slot) \ 127 ((slot)->ptr & (ring)->offset_mask) 128 129 /* update the offset field in a ring's slot */ 130 #define NETMAP_WOFFSET(ring, slot, offset) \ 131 do { (slot)->ptr = ((slot)->ptr & ~(ring)->offset_mask) | \ 132 ((offset) & (ring)->offset_mask); } while (0) 133 134 /* obtain the start of the buffer pointed to by a ring's slot, taking the 135 * offset field into account 136 */ 137 #define NETMAP_BUF_OFFSET(ring, slot) \ 138 (NETMAP_BUF(ring, (slot)->buf_idx) + NETMAP_ROFFSET(ring, slot)) 139 140 141 static inline uint32_t 142 nm_ring_next(struct netmap_ring *r, uint32_t i) 143 { 144 return ( unlikely(i + 1 == r->num_slots) ? 0 : i + 1); 145 } 146 147 148 /* 149 * Return 1 if we have pending transmissions in the tx ring. 150 * When everything is complete ring->head = ring->tail + 1 (modulo ring size) 151 */ 152 static inline int 153 nm_tx_pending(struct netmap_ring *r) 154 { 155 return nm_ring_next(r, r->tail) != r->head; 156 } 157 158 /* Compute the number of slots available in the netmap ring. We use 159 * ring->head as explained in the comment above nm_ring_empty(). */ 160 static inline uint32_t 161 nm_ring_space(struct netmap_ring *ring) 162 { 163 int ret = ring->tail - ring->head; 164 if (ret < 0) 165 ret += ring->num_slots; 166 return ret; 167 } 168 169 #ifndef ND /* debug macros */ 170 /* debug support */ 171 #define ND(_fmt, ...) do {} while(0) 172 #define D(_fmt, ...) \ 173 do { \ 174 struct timeval _t0; \ 175 gettimeofday(&_t0, NULL); \ 176 fprintf(stderr, "%03d.%06d %s [%d] " _fmt "\n", \ 177 (int)(_t0.tv_sec % 1000), (int)_t0.tv_usec, \ 178 __FUNCTION__, __LINE__, ##__VA_ARGS__); \ 179 } while (0) 180 181 /* Rate limited version of "D", lps indicates how many per second */ 182 #define RD(lps, format, ...) \ 183 do { \ 184 static int __t0, __cnt; \ 185 struct timeval __xxts; \ 186 gettimeofday(&__xxts, NULL); \ 187 if (__t0 != __xxts.tv_sec) { \ 188 __t0 = __xxts.tv_sec; \ 189 __cnt = 0; \ 190 } \ 191 if (__cnt++ < lps) { \ 192 D(format, ##__VA_ARGS__); \ 193 } \ 194 } while (0) 195 #endif 196 197 /* 198 * this is a slightly optimized copy routine which rounds 199 * to multiple of 64 bytes and is often faster than dealing 200 * with other odd sizes. We assume there is enough room 201 * in the source and destination buffers. 202 */ 203 static inline void 204 nm_pkt_copy(const void *_src, void *_dst, int l) 205 { 206 const uint64_t *src = (const uint64_t *)_src; 207 uint64_t *dst = (uint64_t *)_dst; 208 209 if (unlikely(l >= 1024 || l % 64)) { 210 memcpy(dst, src, l); 211 return; 212 } 213 for (; likely(l > 0); l-=64) { 214 *dst++ = *src++; 215 *dst++ = *src++; 216 *dst++ = *src++; 217 *dst++ = *src++; 218 *dst++ = *src++; 219 *dst++ = *src++; 220 *dst++ = *src++; 221 *dst++ = *src++; 222 } 223 } 224 225 #ifdef NETMAP_WITH_LIBS 226 /* 227 * Support for simple I/O libraries. 228 * Include other system headers required for compiling this. 229 */ 230 231 #ifndef HAVE_NETMAP_WITH_LIBS 232 #define HAVE_NETMAP_WITH_LIBS 233 234 #include <stdio.h> 235 #include <sys/time.h> 236 #include <sys/mman.h> 237 #include <sys/ioctl.h> 238 #include <sys/errno.h> /* EINVAL */ 239 #include <fcntl.h> /* O_RDWR */ 240 #include <unistd.h> /* close() */ 241 #include <signal.h> 242 #include <stdlib.h> 243 244 struct nm_pkthdr { /* first part is the same as pcap_pkthdr */ 245 struct timeval ts; 246 uint32_t caplen; 247 uint32_t len; 248 249 uint64_t flags; /* NM_MORE_PKTS etc */ 250 #define NM_MORE_PKTS 1 251 struct nm_desc *d; 252 struct netmap_slot *slot; 253 uint8_t *buf; 254 }; 255 256 struct nm_stat { /* same as pcap_stat */ 257 u_int ps_recv; 258 u_int ps_drop; 259 u_int ps_ifdrop; 260 #ifdef WIN32 /* XXX or _WIN32 ? */ 261 u_int bs_capt; 262 #endif /* WIN32 */ 263 }; 264 265 #define NM_ERRBUF_SIZE 512 266 267 struct nm_desc { 268 struct nm_desc *self; /* point to self if netmap. */ 269 int fd; 270 void *mem; 271 size_t memsize; 272 int done_mmap; /* set if mem is the result of mmap */ 273 struct netmap_if * const nifp; 274 uint16_t first_tx_ring, last_tx_ring, cur_tx_ring; 275 uint16_t first_rx_ring, last_rx_ring, cur_rx_ring; 276 struct nmreq req; /* also contains the nr_name = ifname */ 277 struct nm_pkthdr hdr; 278 279 /* 280 * The memory contains netmap_if, rings and then buffers. 281 * Given a pointer (e.g. to nm_inject) we can compare with 282 * mem/buf_start/buf_end to tell if it is a buffer or 283 * some other descriptor in our region. 284 * We also store a pointer to some ring as it helps in the 285 * translation from buffer indexes to addresses. 286 */ 287 struct netmap_ring * const some_ring; 288 void * const buf_start; 289 void * const buf_end; 290 /* parameters from pcap_open_live */ 291 int snaplen; 292 int promisc; 293 int to_ms; 294 char *errbuf; 295 296 /* save flags so we can restore them on close */ 297 uint32_t if_flags; 298 uint32_t if_reqcap; 299 uint32_t if_curcap; 300 301 struct nm_stat st; 302 char msg[NM_ERRBUF_SIZE]; 303 }; 304 305 /* 306 * when the descriptor is open correctly, d->self == d 307 * Eventually we should also use some magic number. 308 */ 309 #define P2NMD(p) ((const struct nm_desc *)(p)) 310 #define IS_NETMAP_DESC(d) ((d) && P2NMD(d)->self == P2NMD(d)) 311 #define NETMAP_FD(d) (P2NMD(d)->fd) 312 313 /* 314 * The callback, invoked on each received packet. Same as libpcap 315 */ 316 typedef void (*nm_cb_t)(u_char *, const struct nm_pkthdr *, const u_char *d); 317 318 /* 319 *--- the pcap-like API --- 320 * 321 * nm_open() opens a file descriptor, binds to a port and maps memory. 322 * 323 * ifname (netmap:foo or vale:foo) is the port name 324 * a suffix can indicate the following: 325 * ^ bind the host (sw) ring pair 326 * * bind host and NIC ring pairs 327 * -NN bind individual NIC ring pair 328 * {NN bind master side of pipe NN 329 * }NN bind slave side of pipe NN 330 * a suffix starting with / and the following flags, 331 * in any order: 332 * x exclusive access 333 * z zero copy monitor (both tx and rx) 334 * t monitor tx side (copy monitor) 335 * r monitor rx side (copy monitor) 336 * R bind only RX ring(s) 337 * T bind only TX ring(s) 338 * 339 * req provides the initial values of nmreq before parsing ifname. 340 * Remember that the ifname parsing will override the ring 341 * number in nm_ringid, and part of nm_flags; 342 * flags special functions, normally 0 343 * indicates which fields of *arg are significant 344 * arg special functions, normally NULL 345 * if passed a netmap_desc with mem != NULL, 346 * use that memory instead of mmap. 347 */ 348 349 static struct nm_desc *nm_open(const char *ifname, const struct nmreq *req, 350 uint64_t flags, const struct nm_desc *arg); 351 352 /* 353 * nm_open can import some fields from the parent descriptor. 354 * These flags control which ones. 355 * Also in flags you can specify NETMAP_NO_TX_POLL and NETMAP_DO_RX_POLL, 356 * which set the initial value for these flags. 357 * Note that the 16 low bits of the flags are reserved for data 358 * that may go into the nmreq. 359 */ 360 enum { 361 NM_OPEN_NO_MMAP = 0x040000, /* reuse mmap from parent */ 362 NM_OPEN_IFNAME = 0x080000, /* nr_name, nr_ringid, nr_flags */ 363 NM_OPEN_ARG1 = 0x100000, 364 NM_OPEN_ARG2 = 0x200000, 365 NM_OPEN_ARG3 = 0x400000, 366 NM_OPEN_RING_CFG = 0x800000, /* tx|rx rings|slots */ 367 }; 368 369 370 /* 371 * nm_close() closes and restores the port to its previous state 372 */ 373 374 static int nm_close(struct nm_desc *); 375 376 /* 377 * nm_mmap() do mmap or inherit from parent if the nr_arg2 378 * (memory block) matches. 379 */ 380 381 static int nm_mmap(struct nm_desc *, const struct nm_desc *); 382 383 /* 384 * nm_inject() is the same as pcap_inject() 385 * nm_dispatch() is the same as pcap_dispatch() 386 * nm_nextpkt() is the same as pcap_next() 387 */ 388 389 static int nm_inject(struct nm_desc *, const void *, size_t); 390 static int nm_dispatch(struct nm_desc *, int, nm_cb_t, u_char *); 391 static u_char *nm_nextpkt(struct nm_desc *, struct nm_pkthdr *); 392 393 #ifdef _WIN32 394 395 intptr_t _get_osfhandle(int); /* defined in io.h in windows */ 396 397 /* 398 * In windows we do not have yet native poll support, so we keep track 399 * of file descriptors associated to netmap ports to emulate poll on 400 * them and fall back on regular poll on other file descriptors. 401 */ 402 struct win_netmap_fd_list { 403 struct win_netmap_fd_list *next; 404 int win_netmap_fd; 405 HANDLE win_netmap_handle; 406 }; 407 408 /* 409 * list head containing all the netmap opened fd and their 410 * windows HANDLE counterparts 411 */ 412 static struct win_netmap_fd_list *win_netmap_fd_list_head; 413 414 static void 415 win_insert_fd_record(int fd) 416 { 417 struct win_netmap_fd_list *curr; 418 419 for (curr = win_netmap_fd_list_head; curr; curr = curr->next) { 420 if (fd == curr->win_netmap_fd) { 421 return; 422 } 423 } 424 curr = calloc(1, sizeof(*curr)); 425 curr->next = win_netmap_fd_list_head; 426 curr->win_netmap_fd = fd; 427 curr->win_netmap_handle = IntToPtr(_get_osfhandle(fd)); 428 win_netmap_fd_list_head = curr; 429 } 430 431 void 432 win_remove_fd_record(int fd) 433 { 434 struct win_netmap_fd_list *curr = win_netmap_fd_list_head; 435 struct win_netmap_fd_list *prev = NULL; 436 for (; curr ; prev = curr, curr = curr->next) { 437 if (fd != curr->win_netmap_fd) 438 continue; 439 /* found the entry */ 440 if (prev == NULL) { /* we are freeing the first entry */ 441 win_netmap_fd_list_head = curr->next; 442 } else { 443 prev->next = curr->next; 444 } 445 free(curr); 446 break; 447 } 448 } 449 450 451 HANDLE 452 win_get_netmap_handle(int fd) 453 { 454 struct win_netmap_fd_list *curr; 455 456 for (curr = win_netmap_fd_list_head; curr; curr = curr->next) { 457 if (fd == curr->win_netmap_fd) { 458 return curr->win_netmap_handle; 459 } 460 } 461 return NULL; 462 } 463 464 /* 465 * we need to wrap ioctl and mmap, at least for the netmap file descriptors 466 */ 467 468 /* 469 * use this function only from netmap_user.h internal functions 470 * same as ioctl, returns 0 on success and -1 on error 471 */ 472 static int 473 win_nm_ioctl_internal(HANDLE h, int32_t ctlCode, void *arg) 474 { 475 DWORD bReturn = 0, szIn, szOut; 476 BOOL ioctlReturnStatus; 477 void *inParam = arg, *outParam = arg; 478 479 switch (ctlCode) { 480 case NETMAP_POLL: 481 szIn = sizeof(POLL_REQUEST_DATA); 482 szOut = sizeof(POLL_REQUEST_DATA); 483 break; 484 case NETMAP_MMAP: 485 szIn = 0; 486 szOut = sizeof(void*); 487 inParam = NULL; /* nothing on input */ 488 break; 489 case NIOCTXSYNC: 490 case NIOCRXSYNC: 491 szIn = 0; 492 szOut = 0; 493 break; 494 case NIOCREGIF: 495 szIn = sizeof(struct nmreq); 496 szOut = sizeof(struct nmreq); 497 break; 498 case NIOCCONFIG: 499 D("unsupported NIOCCONFIG!"); 500 return -1; 501 502 default: /* a regular ioctl */ 503 D("invalid ioctl %x on netmap fd", ctlCode); 504 return -1; 505 } 506 507 ioctlReturnStatus = DeviceIoControl(h, 508 ctlCode, inParam, szIn, 509 outParam, szOut, 510 &bReturn, NULL); 511 // XXX note windows returns 0 on error or async call, 1 on success 512 // we could call GetLastError() to figure out what happened 513 return ioctlReturnStatus ? 0 : -1; 514 } 515 516 /* 517 * this function is what must be called from user-space programs 518 * same as ioctl, returns 0 on success and -1 on error 519 */ 520 static int 521 win_nm_ioctl(int fd, int32_t ctlCode, void *arg) 522 { 523 HANDLE h = win_get_netmap_handle(fd); 524 525 if (h == NULL) { 526 return ioctl(fd, ctlCode, arg); 527 } else { 528 return win_nm_ioctl_internal(h, ctlCode, arg); 529 } 530 } 531 532 #define ioctl win_nm_ioctl /* from now on, within this file ... */ 533 534 /* 535 * We cannot use the native mmap on windows 536 * The only parameter used is "fd", the other ones are just declared to 537 * make this signature comparable to the FreeBSD/Linux one 538 */ 539 static void * 540 win32_mmap_emulated(void *addr, size_t length, int prot, int flags, int fd, int32_t offset) 541 { 542 HANDLE h = win_get_netmap_handle(fd); 543 544 if (h == NULL) { 545 return mmap(addr, length, prot, flags, fd, offset); 546 } else { 547 MEMORY_ENTRY ret; 548 549 return win_nm_ioctl_internal(h, NETMAP_MMAP, &ret) ? 550 NULL : ret.pUsermodeVirtualAddress; 551 } 552 } 553 554 #define mmap win32_mmap_emulated 555 556 #include <sys/poll.h> /* XXX needed to use the structure pollfd */ 557 558 static int 559 win_nm_poll(struct pollfd *fds, int nfds, int timeout) 560 { 561 HANDLE h; 562 563 if (nfds != 1 || fds == NULL || (h = win_get_netmap_handle(fds->fd)) == NULL) {; 564 return poll(fds, nfds, timeout); 565 } else { 566 POLL_REQUEST_DATA prd; 567 568 prd.timeout = timeout; 569 prd.events = fds->events; 570 571 win_nm_ioctl_internal(h, NETMAP_POLL, &prd); 572 if ((prd.revents == POLLERR) || (prd.revents == STATUS_TIMEOUT)) { 573 return -1; 574 } 575 return 1; 576 } 577 } 578 579 #define poll win_nm_poll 580 581 static int 582 win_nm_open(char* pathname, int flags) 583 { 584 585 if (strcmp(pathname, NETMAP_DEVICE_NAME) == 0) { 586 int fd = open(NETMAP_DEVICE_NAME, O_RDWR); 587 if (fd < 0) { 588 return -1; 589 } 590 591 win_insert_fd_record(fd); 592 return fd; 593 } else { 594 return open(pathname, flags); 595 } 596 } 597 598 #define open win_nm_open 599 600 static int 601 win_nm_close(int fd) 602 { 603 if (fd != -1) { 604 close(fd); 605 if (win_get_netmap_handle(fd) != NULL) { 606 win_remove_fd_record(fd); 607 } 608 } 609 return 0; 610 } 611 612 #define close win_nm_close 613 614 #endif /* _WIN32 */ 615 616 static int 617 nm_is_identifier(const char *s, const char *e) 618 { 619 for (; s != e; s++) { 620 if (!isalnum(*s) && *s != '_') { 621 return 0; 622 } 623 } 624 625 return 1; 626 } 627 628 #define MAXERRMSG 80 629 static int 630 nm_parse(const char *ifname, struct nm_desc *d, char *err) 631 { 632 int is_vale; 633 const char *port = NULL; 634 const char *vpname = NULL; 635 u_int namelen; 636 uint32_t nr_ringid = 0, nr_flags; 637 char errmsg[MAXERRMSG] = "", *tmp; 638 long num; 639 uint16_t nr_arg2 = 0; 640 enum { P_START, P_RNGSFXOK, P_GETNUM, P_FLAGS, P_FLAGSOK, P_MEMID } p_state; 641 642 errno = 0; 643 644 is_vale = (ifname[0] == 'v'); 645 if (is_vale) { 646 port = index(ifname, ':'); 647 if (port == NULL) { 648 snprintf(errmsg, MAXERRMSG, 649 "missing ':' in vale name"); 650 goto fail; 651 } 652 653 if (!nm_is_identifier(ifname + 4, port)) { 654 snprintf(errmsg, MAXERRMSG, "invalid bridge name"); 655 goto fail; 656 } 657 658 vpname = ++port; 659 } else { 660 ifname += 7; 661 port = ifname; 662 } 663 664 /* scan for a separator */ 665 for (; *port && !index("-*^{}/@", *port); port++) 666 ; 667 668 if (is_vale && !nm_is_identifier(vpname, port)) { 669 snprintf(errmsg, MAXERRMSG, "invalid bridge port name"); 670 goto fail; 671 } 672 673 namelen = port - ifname; 674 if (namelen >= sizeof(d->req.nr_name)) { 675 snprintf(errmsg, MAXERRMSG, "name too long"); 676 goto fail; 677 } 678 memcpy(d->req.nr_name, ifname, namelen); 679 d->req.nr_name[namelen] = '\0'; 680 681 p_state = P_START; 682 nr_flags = NR_REG_ALL_NIC; /* default for no suffix */ 683 while (*port) { 684 switch (p_state) { 685 case P_START: 686 switch (*port) { 687 case '^': /* only SW ring */ 688 nr_flags = NR_REG_SW; 689 p_state = P_RNGSFXOK; 690 break; 691 case '*': /* NIC and SW */ 692 nr_flags = NR_REG_NIC_SW; 693 p_state = P_RNGSFXOK; 694 break; 695 case '-': /* one NIC ring pair */ 696 nr_flags = NR_REG_ONE_NIC; 697 p_state = P_GETNUM; 698 break; 699 case '{': /* pipe (master endpoint) */ 700 nr_flags = NR_REG_PIPE_MASTER; 701 p_state = P_GETNUM; 702 break; 703 case '}': /* pipe (slave endpoint) */ 704 nr_flags = NR_REG_PIPE_SLAVE; 705 p_state = P_GETNUM; 706 break; 707 case '/': /* start of flags */ 708 p_state = P_FLAGS; 709 break; 710 case '@': /* start of memid */ 711 p_state = P_MEMID; 712 break; 713 default: 714 snprintf(errmsg, MAXERRMSG, "unknown modifier: '%c'", *port); 715 goto fail; 716 } 717 port++; 718 break; 719 case P_RNGSFXOK: 720 switch (*port) { 721 case '/': 722 p_state = P_FLAGS; 723 break; 724 case '@': 725 p_state = P_MEMID; 726 break; 727 default: 728 snprintf(errmsg, MAXERRMSG, "unexpected character: '%c'", *port); 729 goto fail; 730 } 731 port++; 732 break; 733 case P_GETNUM: 734 num = strtol(port, &tmp, 10); 735 if (num < 0 || num >= NETMAP_RING_MASK) { 736 snprintf(errmsg, MAXERRMSG, "'%ld' out of range [0, %d)", 737 num, NETMAP_RING_MASK); 738 goto fail; 739 } 740 port = tmp; 741 nr_ringid = num & NETMAP_RING_MASK; 742 p_state = P_RNGSFXOK; 743 break; 744 case P_FLAGS: 745 case P_FLAGSOK: 746 if (*port == '@') { 747 port++; 748 p_state = P_MEMID; 749 break; 750 } 751 switch (*port) { 752 case 'x': 753 nr_flags |= NR_EXCLUSIVE; 754 break; 755 case 'z': 756 nr_flags |= NR_ZCOPY_MON; 757 break; 758 case 't': 759 nr_flags |= NR_MONITOR_TX; 760 break; 761 case 'r': 762 nr_flags |= NR_MONITOR_RX; 763 break; 764 case 'R': 765 nr_flags |= NR_RX_RINGS_ONLY; 766 break; 767 case 'T': 768 nr_flags |= NR_TX_RINGS_ONLY; 769 break; 770 default: 771 snprintf(errmsg, MAXERRMSG, "unrecognized flag: '%c'", *port); 772 goto fail; 773 } 774 port++; 775 p_state = P_FLAGSOK; 776 break; 777 case P_MEMID: 778 if (nr_arg2 != 0) { 779 snprintf(errmsg, MAXERRMSG, "double setting of memid"); 780 goto fail; 781 } 782 num = strtol(port, &tmp, 10); 783 if (num <= 0) { 784 snprintf(errmsg, MAXERRMSG, "invalid memid %ld, must be >0", num); 785 goto fail; 786 } 787 port = tmp; 788 nr_arg2 = num; 789 p_state = P_RNGSFXOK; 790 break; 791 } 792 } 793 if (p_state != P_START && p_state != P_RNGSFXOK && p_state != P_FLAGSOK) { 794 snprintf(errmsg, MAXERRMSG, "unexpected end of port name"); 795 goto fail; 796 } 797 ND("flags: %s %s %s %s", 798 (nr_flags & NR_EXCLUSIVE) ? "EXCLUSIVE" : "", 799 (nr_flags & NR_ZCOPY_MON) ? "ZCOPY_MON" : "", 800 (nr_flags & NR_MONITOR_TX) ? "MONITOR_TX" : "", 801 (nr_flags & NR_MONITOR_RX) ? "MONITOR_RX" : ""); 802 803 d->req.nr_flags |= nr_flags; 804 d->req.nr_ringid |= nr_ringid; 805 d->req.nr_arg2 = nr_arg2; 806 807 d->self = d; 808 809 return 0; 810 fail: 811 if (!errno) 812 errno = EINVAL; 813 if (err) 814 strncpy(err, errmsg, MAXERRMSG); 815 return -1; 816 } 817 818 /* 819 * Try to open, return descriptor if successful, NULL otherwise. 820 * An invalid netmap name will return errno = 0; 821 * You can pass a pointer to a pre-filled nm_desc to add special 822 * parameters. Flags is used as follows 823 * NM_OPEN_NO_MMAP use the memory from arg, only XXX avoid mmap 824 * if the nr_arg2 (memory block) matches. 825 * NM_OPEN_ARG1 use req.nr_arg1 from arg 826 * NM_OPEN_ARG2 use req.nr_arg2 from arg 827 * NM_OPEN_RING_CFG user ring config from arg 828 */ 829 static struct nm_desc * 830 nm_open(const char *ifname, const struct nmreq *req, 831 uint64_t new_flags, const struct nm_desc *arg) 832 { 833 struct nm_desc *d = NULL; 834 const struct nm_desc *parent = arg; 835 char errmsg[MAXERRMSG] = ""; 836 uint32_t nr_reg; 837 838 if (strncmp(ifname, "netmap:", 7) && 839 strncmp(ifname, NM_BDG_NAME, strlen(NM_BDG_NAME))) { 840 errno = 0; /* name not recognised, not an error */ 841 return NULL; 842 } 843 844 d = (struct nm_desc *)calloc(1, sizeof(*d)); 845 if (d == NULL) { 846 snprintf(errmsg, MAXERRMSG, "nm_desc alloc failure"); 847 errno = ENOMEM; 848 return NULL; 849 } 850 d->self = d; /* set this early so nm_close() works */ 851 d->fd = open(NETMAP_DEVICE_NAME, O_RDWR); 852 if (d->fd < 0) { 853 snprintf(errmsg, MAXERRMSG, "cannot open /dev/netmap: %s", strerror(errno)); 854 goto fail; 855 } 856 857 if (req) 858 d->req = *req; 859 860 if (!(new_flags & NM_OPEN_IFNAME)) { 861 if (nm_parse(ifname, d, errmsg) < 0) 862 goto fail; 863 } 864 865 d->req.nr_version = NETMAP_API; 866 d->req.nr_ringid &= NETMAP_RING_MASK; 867 868 /* optionally import info from parent */ 869 if (IS_NETMAP_DESC(parent) && new_flags) { 870 if (new_flags & NM_OPEN_ARG1) 871 D("overriding ARG1 %d", parent->req.nr_arg1); 872 d->req.nr_arg1 = new_flags & NM_OPEN_ARG1 ? 873 parent->req.nr_arg1 : 4; 874 if (new_flags & NM_OPEN_ARG2) { 875 D("overriding ARG2 %d", parent->req.nr_arg2); 876 d->req.nr_arg2 = parent->req.nr_arg2; 877 } 878 if (new_flags & NM_OPEN_ARG3) 879 D("overriding ARG3 %d", parent->req.nr_arg3); 880 d->req.nr_arg3 = new_flags & NM_OPEN_ARG3 ? 881 parent->req.nr_arg3 : 0; 882 if (new_flags & NM_OPEN_RING_CFG) { 883 D("overriding RING_CFG"); 884 d->req.nr_tx_slots = parent->req.nr_tx_slots; 885 d->req.nr_rx_slots = parent->req.nr_rx_slots; 886 d->req.nr_tx_rings = parent->req.nr_tx_rings; 887 d->req.nr_rx_rings = parent->req.nr_rx_rings; 888 } 889 if (new_flags & NM_OPEN_IFNAME) { 890 D("overriding ifname %s ringid 0x%x flags 0x%x", 891 parent->req.nr_name, parent->req.nr_ringid, 892 parent->req.nr_flags); 893 memcpy(d->req.nr_name, parent->req.nr_name, 894 sizeof(d->req.nr_name)); 895 d->req.nr_ringid = parent->req.nr_ringid; 896 d->req.nr_flags = parent->req.nr_flags; 897 } 898 } 899 /* add the *XPOLL flags */ 900 d->req.nr_ringid |= new_flags & (NETMAP_NO_TX_POLL | NETMAP_DO_RX_POLL); 901 902 if (ioctl(d->fd, NIOCREGIF, &d->req)) { 903 snprintf(errmsg, MAXERRMSG, "NIOCREGIF failed: %s", strerror(errno)); 904 goto fail; 905 } 906 907 nr_reg = d->req.nr_flags & NR_REG_MASK; 908 909 if (nr_reg == NR_REG_SW) { /* host stack */ 910 d->first_tx_ring = d->last_tx_ring = d->req.nr_tx_rings; 911 d->first_rx_ring = d->last_rx_ring = d->req.nr_rx_rings; 912 } else if (nr_reg == NR_REG_ALL_NIC) { /* only nic */ 913 d->first_tx_ring = 0; 914 d->first_rx_ring = 0; 915 d->last_tx_ring = d->req.nr_tx_rings - 1; 916 d->last_rx_ring = d->req.nr_rx_rings - 1; 917 } else if (nr_reg == NR_REG_NIC_SW) { 918 d->first_tx_ring = 0; 919 d->first_rx_ring = 0; 920 d->last_tx_ring = d->req.nr_tx_rings; 921 d->last_rx_ring = d->req.nr_rx_rings; 922 } else if (nr_reg == NR_REG_ONE_NIC) { 923 /* XXX check validity */ 924 d->first_tx_ring = d->last_tx_ring = 925 d->first_rx_ring = d->last_rx_ring = d->req.nr_ringid & NETMAP_RING_MASK; 926 } else { /* pipes */ 927 d->first_tx_ring = d->last_tx_ring = 0; 928 d->first_rx_ring = d->last_rx_ring = 0; 929 } 930 931 /* if parent is defined, do nm_mmap() even if NM_OPEN_NO_MMAP is set */ 932 if ((!(new_flags & NM_OPEN_NO_MMAP) || parent) && nm_mmap(d, parent)) { 933 snprintf(errmsg, MAXERRMSG, "mmap failed: %s", strerror(errno)); 934 goto fail; 935 } 936 937 938 #ifdef DEBUG_NETMAP_USER 939 { /* debugging code */ 940 int i; 941 942 D("%s tx %d .. %d %d rx %d .. %d %d", ifname, 943 d->first_tx_ring, d->last_tx_ring, d->req.nr_tx_rings, 944 d->first_rx_ring, d->last_rx_ring, d->req.nr_rx_rings); 945 for (i = 0; i <= d->req.nr_tx_rings; i++) { 946 struct netmap_ring *r = NETMAP_TXRING(d->nifp, i); 947 D("TX%d %p h %d c %d t %d", i, r, r->head, r->cur, r->tail); 948 } 949 for (i = 0; i <= d->req.nr_rx_rings; i++) { 950 struct netmap_ring *r = NETMAP_RXRING(d->nifp, i); 951 D("RX%d %p h %d c %d t %d", i, r, r->head, r->cur, r->tail); 952 } 953 } 954 #endif /* debugging */ 955 956 d->cur_tx_ring = d->first_tx_ring; 957 d->cur_rx_ring = d->first_rx_ring; 958 return d; 959 960 fail: 961 nm_close(d); 962 if (errmsg[0]) 963 D("%s %s", errmsg, ifname); 964 if (errno == 0) 965 errno = EINVAL; 966 return NULL; 967 } 968 969 970 static int 971 nm_close(struct nm_desc *d) 972 { 973 /* 974 * ugly trick to avoid unused warnings 975 */ 976 static void *__xxzt[] __attribute__ ((unused)) = 977 { (void *)nm_open, (void *)nm_inject, 978 (void *)nm_dispatch, (void *)nm_nextpkt } ; 979 980 if (d == NULL || d->self != d) 981 return EINVAL; 982 if (d->done_mmap && d->mem) 983 munmap(d->mem, d->memsize); 984 if (d->fd != -1) { 985 close(d->fd); 986 } 987 988 bzero((char *)d, sizeof(*d)); 989 free(d); 990 return 0; 991 } 992 993 994 static int 995 nm_mmap(struct nm_desc *d, const struct nm_desc *parent) 996 { 997 if (d->done_mmap) 998 return 0; 999 1000 if (IS_NETMAP_DESC(parent) && parent->mem && 1001 parent->req.nr_arg2 == d->req.nr_arg2) { 1002 /* do not mmap, inherit from parent */ 1003 D("do not mmap, inherit from parent"); 1004 d->memsize = parent->memsize; 1005 d->mem = parent->mem; 1006 } else { 1007 /* XXX TODO: check if memsize is too large (or there is overflow) */ 1008 d->memsize = d->req.nr_memsize; 1009 d->mem = mmap(0, d->memsize, PROT_WRITE | PROT_READ, MAP_SHARED, 1010 d->fd, 0); 1011 if (d->mem == MAP_FAILED) { 1012 goto fail; 1013 } 1014 d->done_mmap = 1; 1015 } 1016 { 1017 struct netmap_if *nifp = NETMAP_IF(d->mem, d->req.nr_offset); 1018 struct netmap_ring *r = NETMAP_RXRING(nifp, d->first_rx_ring); 1019 if ((void *)r == (void *)nifp) { 1020 /* the descriptor is open for TX only */ 1021 r = NETMAP_TXRING(nifp, d->first_tx_ring); 1022 } 1023 1024 *(struct netmap_if **)(uintptr_t)&(d->nifp) = nifp; 1025 *(struct netmap_ring **)(uintptr_t)&d->some_ring = r; 1026 *(void **)(uintptr_t)&d->buf_start = NETMAP_BUF(r, 0); 1027 *(void **)(uintptr_t)&d->buf_end = 1028 (char *)d->mem + d->memsize; 1029 } 1030 1031 return 0; 1032 1033 fail: 1034 return EINVAL; 1035 } 1036 1037 /* 1038 * Same prototype as pcap_inject(), only need to cast. 1039 */ 1040 static int 1041 nm_inject(struct nm_desc *d, const void *buf, size_t size) 1042 { 1043 u_int c, n = d->last_tx_ring - d->first_tx_ring + 1, 1044 ri = d->cur_tx_ring; 1045 1046 for (c = 0; c < n ; c++, ri++) { 1047 /* compute current ring to use */ 1048 struct netmap_ring *ring; 1049 uint32_t i, j, idx; 1050 size_t rem; 1051 1052 if (ri > d->last_tx_ring) 1053 ri = d->first_tx_ring; 1054 ring = NETMAP_TXRING(d->nifp, ri); 1055 rem = size; 1056 j = ring->cur; 1057 while (rem > ring->nr_buf_size && j != ring->tail) { 1058 rem -= ring->nr_buf_size; 1059 j = nm_ring_next(ring, j); 1060 } 1061 if (j == ring->tail && rem > 0) 1062 continue; 1063 i = ring->cur; 1064 while (i != j) { 1065 idx = ring->slot[i].buf_idx; 1066 ring->slot[i].len = ring->nr_buf_size; 1067 ring->slot[i].flags = NS_MOREFRAG; 1068 nm_pkt_copy(buf, NETMAP_BUF(ring, idx), ring->nr_buf_size); 1069 i = nm_ring_next(ring, i); 1070 buf = (const char *)buf + ring->nr_buf_size; 1071 } 1072 idx = ring->slot[i].buf_idx; 1073 ring->slot[i].len = rem; 1074 ring->slot[i].flags = 0; 1075 nm_pkt_copy(buf, NETMAP_BUF(ring, idx), rem); 1076 ring->head = ring->cur = nm_ring_next(ring, i); 1077 d->cur_tx_ring = ri; 1078 return size; 1079 } 1080 return 0; /* fail */ 1081 } 1082 1083 1084 /* 1085 * Same prototype as pcap_dispatch(), only need to cast. 1086 */ 1087 static int 1088 nm_dispatch(struct nm_desc *d, int cnt, nm_cb_t cb, u_char *arg) 1089 { 1090 int n = d->last_rx_ring - d->first_rx_ring + 1; 1091 int c, got = 0, ri = d->cur_rx_ring; 1092 d->hdr.buf = NULL; 1093 d->hdr.flags = NM_MORE_PKTS; 1094 d->hdr.d = d; 1095 1096 if (cnt == 0) 1097 cnt = -1; 1098 /* cnt == -1 means infinite, but rings have a finite amount 1099 * of buffers and the int is large enough that we never wrap, 1100 * so we can omit checking for -1 1101 */ 1102 for (c=0; c < n && cnt != got; c++, ri++) { 1103 /* compute current ring to use */ 1104 struct netmap_ring *ring; 1105 1106 if (ri > d->last_rx_ring) 1107 ri = d->first_rx_ring; 1108 ring = NETMAP_RXRING(d->nifp, ri); 1109 for ( ; !nm_ring_empty(ring) && cnt != got; got++) { 1110 u_int idx, i; 1111 u_char *oldbuf; 1112 struct netmap_slot *slot; 1113 if (d->hdr.buf) { /* from previous round */ 1114 cb(arg, &d->hdr, d->hdr.buf); 1115 } 1116 i = ring->cur; 1117 slot = &ring->slot[i]; 1118 idx = slot->buf_idx; 1119 /* d->cur_rx_ring doesn't change inside this loop, but 1120 * set it here, so it reflects d->hdr.buf's ring */ 1121 d->cur_rx_ring = ri; 1122 d->hdr.slot = slot; 1123 oldbuf = d->hdr.buf = (u_char *)NETMAP_BUF(ring, idx); 1124 // __builtin_prefetch(buf); 1125 d->hdr.len = d->hdr.caplen = slot->len; 1126 while (slot->flags & NS_MOREFRAG) { 1127 u_char *nbuf; 1128 u_int oldlen = slot->len; 1129 i = nm_ring_next(ring, i); 1130 slot = &ring->slot[i]; 1131 d->hdr.len += slot->len; 1132 nbuf = (u_char *)NETMAP_BUF(ring, slot->buf_idx); 1133 if (oldbuf != NULL && (uint32_t)(nbuf - oldbuf) == ring->nr_buf_size && 1134 oldlen == ring->nr_buf_size) { 1135 d->hdr.caplen += slot->len; 1136 oldbuf = nbuf; 1137 } else { 1138 oldbuf = NULL; 1139 } 1140 } 1141 d->hdr.ts = ring->ts; 1142 ring->head = ring->cur = nm_ring_next(ring, i); 1143 } 1144 } 1145 if (d->hdr.buf) { /* from previous round */ 1146 d->hdr.flags = 0; 1147 cb(arg, &d->hdr, d->hdr.buf); 1148 } 1149 return got; 1150 } 1151 1152 static u_char * 1153 nm_nextpkt(struct nm_desc *d, struct nm_pkthdr *hdr) 1154 { 1155 int ri = d->cur_rx_ring; 1156 1157 do { 1158 /* compute current ring to use */ 1159 struct netmap_ring *ring = NETMAP_RXRING(d->nifp, ri); 1160 if (!nm_ring_empty(ring)) { 1161 u_int i = ring->cur; 1162 u_int idx = ring->slot[i].buf_idx; 1163 u_char *buf = (u_char *)NETMAP_BUF(ring, idx); 1164 1165 // __builtin_prefetch(buf); 1166 hdr->ts = ring->ts; 1167 hdr->len = hdr->caplen = ring->slot[i].len; 1168 ring->cur = nm_ring_next(ring, i); 1169 /* we could postpone advancing head if we want 1170 * to hold the buffer. This can be supported in 1171 * the future. 1172 */ 1173 ring->head = ring->cur; 1174 d->cur_rx_ring = ri; 1175 return buf; 1176 } 1177 ri++; 1178 if (ri > d->last_rx_ring) 1179 ri = d->first_rx_ring; 1180 } while (ri != d->cur_rx_ring); 1181 return NULL; /* nothing found */ 1182 } 1183 1184 #endif /* !HAVE_NETMAP_WITH_LIBS */ 1185 1186 #endif /* NETMAP_WITH_LIBS */ 1187 1188 #endif /* _NET_NETMAP_USER_H_ */ 1189