1 /*- 2 * SPDX-License-Identifier: BSD-2-Clause 3 * 4 * Copyright (C) 2011-2016 Universita` di Pisa 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 18 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 20 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 21 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 23 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 27 * SUCH DAMAGE. 28 */ 29 30 /* 31 * $FreeBSD$ 32 * 33 * Functions and macros to manipulate netmap structures and packets 34 * in userspace. See netmap(4) for more information. 35 * 36 * The address of the struct netmap_if, say nifp, is computed from the 37 * value returned from ioctl(.., NIOCREG, ...) and the mmap region: 38 * ioctl(fd, NIOCREG, &req); 39 * mem = mmap(0, ... ); 40 * nifp = NETMAP_IF(mem, req.nr_nifp); 41 * (so simple, we could just do it manually) 42 * 43 * From there: 44 * struct netmap_ring *NETMAP_TXRING(nifp, index) 45 * struct netmap_ring *NETMAP_RXRING(nifp, index) 46 * we can access ring->cur, ring->head, ring->tail, etc. 47 * 48 * ring->slot[i] gives us the i-th slot (we can access 49 * directly len, flags, buf_idx) 50 * 51 * char *buf = NETMAP_BUF(ring, x) returns a pointer to 52 * the buffer numbered x 53 * 54 * All ring indexes (head, cur, tail) should always move forward. 55 * To compute the next index in a circular ring you can use 56 * i = nm_ring_next(ring, i); 57 * 58 * To ease porting apps from pcap to netmap we supply a few functions 59 * that can be called to open, close, read and write on netmap in a way 60 * similar to libpcap. Note that the read/write function depend on 61 * an ioctl()/select()/poll() being issued to refill rings or push 62 * packets out. 63 * 64 * In order to use these, include #define NETMAP_WITH_LIBS 65 * in the source file that invokes these functions. 66 */ 67 68 #ifndef _NET_NETMAP_USER_H_ 69 #define _NET_NETMAP_USER_H_ 70 71 #define NETMAP_DEVICE_NAME "/dev/netmap" 72 73 #ifdef __CYGWIN__ 74 /* 75 * we can compile userspace apps with either cygwin or msvc, 76 * and we use _WIN32 to identify windows specific code 77 */ 78 #ifndef _WIN32 79 #define _WIN32 80 #endif /* _WIN32 */ 81 82 #endif /* __CYGWIN__ */ 83 84 #ifdef _WIN32 85 #undef NETMAP_DEVICE_NAME 86 #define NETMAP_DEVICE_NAME "/proc/sys/DosDevices/Global/netmap" 87 #include <windows.h> 88 #include <WinDef.h> 89 #include <sys/cygwin.h> 90 #endif /* _WIN32 */ 91 92 #include <stdint.h> 93 #include <sys/socket.h> /* apple needs sockaddr */ 94 #include <net/if.h> /* IFNAMSIZ */ 95 #include <ctype.h> 96 #include <string.h> /* memset */ 97 #include <sys/time.h> /* gettimeofday */ 98 99 #ifndef likely 100 #define likely(x) __builtin_expect(!!(x), 1) 101 #define unlikely(x) __builtin_expect(!!(x), 0) 102 #endif /* likely and unlikely */ 103 104 #include <net/netmap.h> 105 106 /* helper macro */ 107 #define _NETMAP_OFFSET(type, ptr, offset) \ 108 ((type)(void *)((char *)(ptr) + (offset))) 109 110 #define NETMAP_IF(_base, _ofs) _NETMAP_OFFSET(struct netmap_if *, _base, _ofs) 111 112 #define NETMAP_TXRING(nifp, index) _NETMAP_OFFSET(struct netmap_ring *, \ 113 nifp, (nifp)->ring_ofs[index] ) 114 115 #define NETMAP_RXRING(nifp, index) _NETMAP_OFFSET(struct netmap_ring *, \ 116 nifp, (nifp)->ring_ofs[index + (nifp)->ni_tx_rings + \ 117 (nifp)->ni_host_tx_rings] ) 118 119 #define NETMAP_BUF(ring, index) \ 120 ((char *)(ring) + (ring)->buf_ofs + ((size_t)(index)*(ring)->nr_buf_size)) 121 122 #define NETMAP_BUF_IDX(ring, buf) \ 123 ( ((char *)(buf) - ((char *)(ring) + (ring)->buf_ofs) ) / \ 124 (ring)->nr_buf_size ) 125 126 /* read the offset field in a ring's slot */ 127 #define NETMAP_ROFFSET(ring, slot) \ 128 ((slot)->ptr & (ring)->offset_mask) 129 130 /* update the offset field in a ring's slot */ 131 #define NETMAP_WOFFSET(ring, slot, offset) \ 132 do { (slot)->ptr = ((slot)->ptr & ~(ring)->offset_mask) | \ 133 ((offset) & (ring)->offset_mask); } while (0) 134 135 /* obtain the start of the buffer pointed to by a ring's slot, taking the 136 * offset field into account 137 */ 138 #define NETMAP_BUF_OFFSET(ring, slot) \ 139 (NETMAP_BUF(ring, (slot)->buf_idx) + NETMAP_ROFFSET(ring, slot)) 140 141 142 static inline uint32_t 143 nm_ring_next(struct netmap_ring *r, uint32_t i) 144 { 145 return ( unlikely(i + 1 == r->num_slots) ? 0 : i + 1); 146 } 147 148 149 /* 150 * Return 1 if we have pending transmissions in the tx ring. 151 * When everything is complete ring->head = ring->tail + 1 (modulo ring size) 152 */ 153 static inline int 154 nm_tx_pending(struct netmap_ring *r) 155 { 156 return nm_ring_next(r, r->tail) != r->head; 157 } 158 159 /* Compute the number of slots available in the netmap ring. We use 160 * ring->head as explained in the comment above nm_ring_empty(). */ 161 static inline uint32_t 162 nm_ring_space(struct netmap_ring *ring) 163 { 164 int ret = ring->tail - ring->head; 165 if (ret < 0) 166 ret += ring->num_slots; 167 return ret; 168 } 169 170 #ifndef ND /* debug macros */ 171 /* debug support */ 172 #define ND(_fmt, ...) do {} while(0) 173 #define D(_fmt, ...) \ 174 do { \ 175 struct timeval _t0; \ 176 gettimeofday(&_t0, NULL); \ 177 fprintf(stderr, "%03d.%06d %s [%d] " _fmt "\n", \ 178 (int)(_t0.tv_sec % 1000), (int)_t0.tv_usec, \ 179 __FUNCTION__, __LINE__, ##__VA_ARGS__); \ 180 } while (0) 181 182 /* Rate limited version of "D", lps indicates how many per second */ 183 #define RD(lps, format, ...) \ 184 do { \ 185 static int __t0, __cnt; \ 186 struct timeval __xxts; \ 187 gettimeofday(&__xxts, NULL); \ 188 if (__t0 != __xxts.tv_sec) { \ 189 __t0 = __xxts.tv_sec; \ 190 __cnt = 0; \ 191 } \ 192 if (__cnt++ < lps) { \ 193 D(format, ##__VA_ARGS__); \ 194 } \ 195 } while (0) 196 #endif 197 198 /* 199 * this is a slightly optimized copy routine which rounds 200 * to multiple of 64 bytes and is often faster than dealing 201 * with other odd sizes. We assume there is enough room 202 * in the source and destination buffers. 203 */ 204 static inline void 205 nm_pkt_copy(const void *_src, void *_dst, int l) 206 { 207 const uint64_t *src = (const uint64_t *)_src; 208 uint64_t *dst = (uint64_t *)_dst; 209 210 if (unlikely(l >= 1024 || l % 64)) { 211 memcpy(dst, src, l); 212 return; 213 } 214 for (; likely(l > 0); l-=64) { 215 *dst++ = *src++; 216 *dst++ = *src++; 217 *dst++ = *src++; 218 *dst++ = *src++; 219 *dst++ = *src++; 220 *dst++ = *src++; 221 *dst++ = *src++; 222 *dst++ = *src++; 223 } 224 } 225 226 #ifdef NETMAP_WITH_LIBS 227 /* 228 * Support for simple I/O libraries. 229 * Include other system headers required for compiling this. 230 */ 231 232 #ifndef HAVE_NETMAP_WITH_LIBS 233 #define HAVE_NETMAP_WITH_LIBS 234 235 #include <stdio.h> 236 #include <sys/time.h> 237 #include <sys/mman.h> 238 #include <sys/ioctl.h> 239 #include <sys/errno.h> /* EINVAL */ 240 #include <fcntl.h> /* O_RDWR */ 241 #include <unistd.h> /* close() */ 242 #include <signal.h> 243 #include <stdlib.h> 244 245 struct nm_pkthdr { /* first part is the same as pcap_pkthdr */ 246 struct timeval ts; 247 uint32_t caplen; 248 uint32_t len; 249 250 uint64_t flags; /* NM_MORE_PKTS etc */ 251 #define NM_MORE_PKTS 1 252 struct nm_desc *d; 253 struct netmap_slot *slot; 254 uint8_t *buf; 255 }; 256 257 struct nm_stat { /* same as pcap_stat */ 258 u_int ps_recv; 259 u_int ps_drop; 260 u_int ps_ifdrop; 261 #ifdef WIN32 /* XXX or _WIN32 ? */ 262 u_int bs_capt; 263 #endif /* WIN32 */ 264 }; 265 266 #define NM_ERRBUF_SIZE 512 267 268 struct nm_desc { 269 struct nm_desc *self; /* point to self if netmap. */ 270 int fd; 271 void *mem; 272 size_t memsize; 273 int done_mmap; /* set if mem is the result of mmap */ 274 struct netmap_if * const nifp; 275 uint16_t first_tx_ring, last_tx_ring, cur_tx_ring; 276 uint16_t first_rx_ring, last_rx_ring, cur_rx_ring; 277 struct nmreq req; /* also contains the nr_name = ifname */ 278 struct nm_pkthdr hdr; 279 280 /* 281 * The memory contains netmap_if, rings and then buffers. 282 * Given a pointer (e.g. to nm_inject) we can compare with 283 * mem/buf_start/buf_end to tell if it is a buffer or 284 * some other descriptor in our region. 285 * We also store a pointer to some ring as it helps in the 286 * translation from buffer indexes to addresses. 287 */ 288 struct netmap_ring * const some_ring; 289 void * const buf_start; 290 void * const buf_end; 291 /* parameters from pcap_open_live */ 292 int snaplen; 293 int promisc; 294 int to_ms; 295 char *errbuf; 296 297 /* save flags so we can restore them on close */ 298 uint32_t if_flags; 299 uint32_t if_reqcap; 300 uint32_t if_curcap; 301 302 struct nm_stat st; 303 char msg[NM_ERRBUF_SIZE]; 304 }; 305 306 /* 307 * when the descriptor is open correctly, d->self == d 308 * Eventually we should also use some magic number. 309 */ 310 #define P2NMD(p) ((const struct nm_desc *)(p)) 311 #define IS_NETMAP_DESC(d) ((d) && P2NMD(d)->self == P2NMD(d)) 312 #define NETMAP_FD(d) (P2NMD(d)->fd) 313 314 /* 315 * The callback, invoked on each received packet. Same as libpcap 316 */ 317 typedef void (*nm_cb_t)(u_char *, const struct nm_pkthdr *, const u_char *d); 318 319 /* 320 *--- the pcap-like API --- 321 * 322 * nm_open() opens a file descriptor, binds to a port and maps memory. 323 * 324 * ifname (netmap:foo or vale:foo) is the port name 325 * a suffix can indicate the following: 326 * ^ bind the host (sw) ring pair 327 * * bind host and NIC ring pairs 328 * -NN bind individual NIC ring pair 329 * {NN bind master side of pipe NN 330 * }NN bind slave side of pipe NN 331 * a suffix starting with / and the following flags, 332 * in any order: 333 * x exclusive access 334 * z zero copy monitor (both tx and rx) 335 * t monitor tx side (copy monitor) 336 * r monitor rx side (copy monitor) 337 * R bind only RX ring(s) 338 * T bind only TX ring(s) 339 * 340 * req provides the initial values of nmreq before parsing ifname. 341 * Remember that the ifname parsing will override the ring 342 * number in nm_ringid, and part of nm_flags; 343 * flags special functions, normally 0 344 * indicates which fields of *arg are significant 345 * arg special functions, normally NULL 346 * if passed a netmap_desc with mem != NULL, 347 * use that memory instead of mmap. 348 */ 349 350 static struct nm_desc *nm_open(const char *ifname, const struct nmreq *req, 351 uint64_t flags, const struct nm_desc *arg); 352 353 /* 354 * nm_open can import some fields from the parent descriptor. 355 * These flags control which ones. 356 * Also in flags you can specify NETMAP_NO_TX_POLL and NETMAP_DO_RX_POLL, 357 * which set the initial value for these flags. 358 * Note that the 16 low bits of the flags are reserved for data 359 * that may go into the nmreq. 360 */ 361 enum { 362 NM_OPEN_NO_MMAP = 0x040000, /* reuse mmap from parent */ 363 NM_OPEN_IFNAME = 0x080000, /* nr_name, nr_ringid, nr_flags */ 364 NM_OPEN_ARG1 = 0x100000, 365 NM_OPEN_ARG2 = 0x200000, 366 NM_OPEN_ARG3 = 0x400000, 367 NM_OPEN_RING_CFG = 0x800000, /* tx|rx rings|slots */ 368 }; 369 370 371 /* 372 * nm_close() closes and restores the port to its previous state 373 */ 374 375 static int nm_close(struct nm_desc *); 376 377 /* 378 * nm_mmap() do mmap or inherit from parent if the nr_arg2 379 * (memory block) matches. 380 */ 381 382 static int nm_mmap(struct nm_desc *, const struct nm_desc *); 383 384 /* 385 * nm_inject() is the same as pcap_inject() 386 * nm_dispatch() is the same as pcap_dispatch() 387 * nm_nextpkt() is the same as pcap_next() 388 */ 389 390 static int nm_inject(struct nm_desc *, const void *, size_t); 391 static int nm_dispatch(struct nm_desc *, int, nm_cb_t, u_char *); 392 static u_char *nm_nextpkt(struct nm_desc *, struct nm_pkthdr *); 393 394 #ifdef _WIN32 395 396 intptr_t _get_osfhandle(int); /* defined in io.h in windows */ 397 398 /* 399 * In windows we do not have yet native poll support, so we keep track 400 * of file descriptors associated to netmap ports to emulate poll on 401 * them and fall back on regular poll on other file descriptors. 402 */ 403 struct win_netmap_fd_list { 404 struct win_netmap_fd_list *next; 405 int win_netmap_fd; 406 HANDLE win_netmap_handle; 407 }; 408 409 /* 410 * list head containing all the netmap opened fd and their 411 * windows HANDLE counterparts 412 */ 413 static struct win_netmap_fd_list *win_netmap_fd_list_head; 414 415 static void 416 win_insert_fd_record(int fd) 417 { 418 struct win_netmap_fd_list *curr; 419 420 for (curr = win_netmap_fd_list_head; curr; curr = curr->next) { 421 if (fd == curr->win_netmap_fd) { 422 return; 423 } 424 } 425 curr = calloc(1, sizeof(*curr)); 426 curr->next = win_netmap_fd_list_head; 427 curr->win_netmap_fd = fd; 428 curr->win_netmap_handle = IntToPtr(_get_osfhandle(fd)); 429 win_netmap_fd_list_head = curr; 430 } 431 432 void 433 win_remove_fd_record(int fd) 434 { 435 struct win_netmap_fd_list *curr = win_netmap_fd_list_head; 436 struct win_netmap_fd_list *prev = NULL; 437 for (; curr ; prev = curr, curr = curr->next) { 438 if (fd != curr->win_netmap_fd) 439 continue; 440 /* found the entry */ 441 if (prev == NULL) { /* we are freeing the first entry */ 442 win_netmap_fd_list_head = curr->next; 443 } else { 444 prev->next = curr->next; 445 } 446 free(curr); 447 break; 448 } 449 } 450 451 452 HANDLE 453 win_get_netmap_handle(int fd) 454 { 455 struct win_netmap_fd_list *curr; 456 457 for (curr = win_netmap_fd_list_head; curr; curr = curr->next) { 458 if (fd == curr->win_netmap_fd) { 459 return curr->win_netmap_handle; 460 } 461 } 462 return NULL; 463 } 464 465 /* 466 * we need to wrap ioctl and mmap, at least for the netmap file descriptors 467 */ 468 469 /* 470 * use this function only from netmap_user.h internal functions 471 * same as ioctl, returns 0 on success and -1 on error 472 */ 473 static int 474 win_nm_ioctl_internal(HANDLE h, int32_t ctlCode, void *arg) 475 { 476 DWORD bReturn = 0, szIn, szOut; 477 BOOL ioctlReturnStatus; 478 void *inParam = arg, *outParam = arg; 479 480 switch (ctlCode) { 481 case NETMAP_POLL: 482 szIn = sizeof(POLL_REQUEST_DATA); 483 szOut = sizeof(POLL_REQUEST_DATA); 484 break; 485 case NETMAP_MMAP: 486 szIn = 0; 487 szOut = sizeof(void*); 488 inParam = NULL; /* nothing on input */ 489 break; 490 case NIOCTXSYNC: 491 case NIOCRXSYNC: 492 szIn = 0; 493 szOut = 0; 494 break; 495 case NIOCREGIF: 496 szIn = sizeof(struct nmreq); 497 szOut = sizeof(struct nmreq); 498 break; 499 case NIOCCONFIG: 500 D("unsupported NIOCCONFIG!"); 501 return -1; 502 503 default: /* a regular ioctl */ 504 D("invalid ioctl %x on netmap fd", ctlCode); 505 return -1; 506 } 507 508 ioctlReturnStatus = DeviceIoControl(h, 509 ctlCode, inParam, szIn, 510 outParam, szOut, 511 &bReturn, NULL); 512 // XXX note windows returns 0 on error or async call, 1 on success 513 // we could call GetLastError() to figure out what happened 514 return ioctlReturnStatus ? 0 : -1; 515 } 516 517 /* 518 * this function is what must be called from user-space programs 519 * same as ioctl, returns 0 on success and -1 on error 520 */ 521 static int 522 win_nm_ioctl(int fd, int32_t ctlCode, void *arg) 523 { 524 HANDLE h = win_get_netmap_handle(fd); 525 526 if (h == NULL) { 527 return ioctl(fd, ctlCode, arg); 528 } else { 529 return win_nm_ioctl_internal(h, ctlCode, arg); 530 } 531 } 532 533 #define ioctl win_nm_ioctl /* from now on, within this file ... */ 534 535 /* 536 * We cannot use the native mmap on windows 537 * The only parameter used is "fd", the other ones are just declared to 538 * make this signature comparable to the FreeBSD/Linux one 539 */ 540 static void * 541 win32_mmap_emulated(void *addr, size_t length, int prot, int flags, int fd, int32_t offset) 542 { 543 HANDLE h = win_get_netmap_handle(fd); 544 545 if (h == NULL) { 546 return mmap(addr, length, prot, flags, fd, offset); 547 } else { 548 MEMORY_ENTRY ret; 549 550 return win_nm_ioctl_internal(h, NETMAP_MMAP, &ret) ? 551 NULL : ret.pUsermodeVirtualAddress; 552 } 553 } 554 555 #define mmap win32_mmap_emulated 556 557 #include <sys/poll.h> /* XXX needed to use the structure pollfd */ 558 559 static int 560 win_nm_poll(struct pollfd *fds, int nfds, int timeout) 561 { 562 HANDLE h; 563 564 if (nfds != 1 || fds == NULL || (h = win_get_netmap_handle(fds->fd)) == NULL) {; 565 return poll(fds, nfds, timeout); 566 } else { 567 POLL_REQUEST_DATA prd; 568 569 prd.timeout = timeout; 570 prd.events = fds->events; 571 572 win_nm_ioctl_internal(h, NETMAP_POLL, &prd); 573 if ((prd.revents == POLLERR) || (prd.revents == STATUS_TIMEOUT)) { 574 return -1; 575 } 576 return 1; 577 } 578 } 579 580 #define poll win_nm_poll 581 582 static int 583 win_nm_open(char* pathname, int flags) 584 { 585 586 if (strcmp(pathname, NETMAP_DEVICE_NAME) == 0) { 587 int fd = open(NETMAP_DEVICE_NAME, O_RDWR); 588 if (fd < 0) { 589 return -1; 590 } 591 592 win_insert_fd_record(fd); 593 return fd; 594 } else { 595 return open(pathname, flags); 596 } 597 } 598 599 #define open win_nm_open 600 601 static int 602 win_nm_close(int fd) 603 { 604 if (fd != -1) { 605 close(fd); 606 if (win_get_netmap_handle(fd) != NULL) { 607 win_remove_fd_record(fd); 608 } 609 } 610 return 0; 611 } 612 613 #define close win_nm_close 614 615 #endif /* _WIN32 */ 616 617 static int 618 nm_is_identifier(const char *s, const char *e) 619 { 620 for (; s != e; s++) { 621 if (!isalnum(*s) && *s != '_') { 622 return 0; 623 } 624 } 625 626 return 1; 627 } 628 629 #define MAXERRMSG 80 630 static int 631 nm_parse(const char *ifname, struct nm_desc *d, char *err) 632 { 633 int is_vale; 634 const char *port = NULL; 635 const char *vpname = NULL; 636 u_int namelen; 637 uint32_t nr_ringid = 0, nr_flags; 638 char errmsg[MAXERRMSG] = "", *tmp; 639 long num; 640 uint16_t nr_arg2 = 0; 641 enum { P_START, P_RNGSFXOK, P_GETNUM, P_FLAGS, P_FLAGSOK, P_MEMID } p_state; 642 643 errno = 0; 644 645 is_vale = (ifname[0] == 'v'); 646 if (is_vale) { 647 port = index(ifname, ':'); 648 if (port == NULL) { 649 snprintf(errmsg, MAXERRMSG, 650 "missing ':' in vale name"); 651 goto fail; 652 } 653 654 if (!nm_is_identifier(ifname + 4, port)) { 655 snprintf(errmsg, MAXERRMSG, "invalid bridge name"); 656 goto fail; 657 } 658 659 vpname = ++port; 660 } else { 661 ifname += 7; 662 port = ifname; 663 } 664 665 /* scan for a separator */ 666 for (; *port && !index("-*^{}/@", *port); port++) 667 ; 668 669 if (is_vale && !nm_is_identifier(vpname, port)) { 670 snprintf(errmsg, MAXERRMSG, "invalid bridge port name"); 671 goto fail; 672 } 673 674 namelen = port - ifname; 675 if (namelen >= sizeof(d->req.nr_name)) { 676 snprintf(errmsg, MAXERRMSG, "name too long"); 677 goto fail; 678 } 679 memcpy(d->req.nr_name, ifname, namelen); 680 d->req.nr_name[namelen] = '\0'; 681 682 p_state = P_START; 683 nr_flags = NR_REG_ALL_NIC; /* default for no suffix */ 684 while (*port) { 685 switch (p_state) { 686 case P_START: 687 switch (*port) { 688 case '^': /* only SW ring */ 689 nr_flags = NR_REG_SW; 690 p_state = P_RNGSFXOK; 691 break; 692 case '*': /* NIC and SW */ 693 nr_flags = NR_REG_NIC_SW; 694 p_state = P_RNGSFXOK; 695 break; 696 case '-': /* one NIC ring pair */ 697 nr_flags = NR_REG_ONE_NIC; 698 p_state = P_GETNUM; 699 break; 700 case '{': /* pipe (master endpoint) */ 701 nr_flags = NR_REG_PIPE_MASTER; 702 p_state = P_GETNUM; 703 break; 704 case '}': /* pipe (slave endpoint) */ 705 nr_flags = NR_REG_PIPE_SLAVE; 706 p_state = P_GETNUM; 707 break; 708 case '/': /* start of flags */ 709 p_state = P_FLAGS; 710 break; 711 case '@': /* start of memid */ 712 p_state = P_MEMID; 713 break; 714 default: 715 snprintf(errmsg, MAXERRMSG, "unknown modifier: '%c'", *port); 716 goto fail; 717 } 718 port++; 719 break; 720 case P_RNGSFXOK: 721 switch (*port) { 722 case '/': 723 p_state = P_FLAGS; 724 break; 725 case '@': 726 p_state = P_MEMID; 727 break; 728 default: 729 snprintf(errmsg, MAXERRMSG, "unexpected character: '%c'", *port); 730 goto fail; 731 } 732 port++; 733 break; 734 case P_GETNUM: 735 num = strtol(port, &tmp, 10); 736 if (num < 0 || num >= NETMAP_RING_MASK) { 737 snprintf(errmsg, MAXERRMSG, "'%ld' out of range [0, %d)", 738 num, NETMAP_RING_MASK); 739 goto fail; 740 } 741 port = tmp; 742 nr_ringid = num & NETMAP_RING_MASK; 743 p_state = P_RNGSFXOK; 744 break; 745 case P_FLAGS: 746 case P_FLAGSOK: 747 if (*port == '@') { 748 port++; 749 p_state = P_MEMID; 750 break; 751 } 752 switch (*port) { 753 case 'x': 754 nr_flags |= NR_EXCLUSIVE; 755 break; 756 case 'z': 757 nr_flags |= NR_ZCOPY_MON; 758 break; 759 case 't': 760 nr_flags |= NR_MONITOR_TX; 761 break; 762 case 'r': 763 nr_flags |= NR_MONITOR_RX; 764 break; 765 case 'R': 766 nr_flags |= NR_RX_RINGS_ONLY; 767 break; 768 case 'T': 769 nr_flags |= NR_TX_RINGS_ONLY; 770 break; 771 default: 772 snprintf(errmsg, MAXERRMSG, "unrecognized flag: '%c'", *port); 773 goto fail; 774 } 775 port++; 776 p_state = P_FLAGSOK; 777 break; 778 case P_MEMID: 779 if (nr_arg2 != 0) { 780 snprintf(errmsg, MAXERRMSG, "double setting of memid"); 781 goto fail; 782 } 783 num = strtol(port, &tmp, 10); 784 if (num <= 0) { 785 snprintf(errmsg, MAXERRMSG, "invalid memid %ld, must be >0", num); 786 goto fail; 787 } 788 port = tmp; 789 nr_arg2 = num; 790 p_state = P_RNGSFXOK; 791 break; 792 } 793 } 794 if (p_state != P_START && p_state != P_RNGSFXOK && p_state != P_FLAGSOK) { 795 snprintf(errmsg, MAXERRMSG, "unexpected end of port name"); 796 goto fail; 797 } 798 ND("flags: %s %s %s %s", 799 (nr_flags & NR_EXCLUSIVE) ? "EXCLUSIVE" : "", 800 (nr_flags & NR_ZCOPY_MON) ? "ZCOPY_MON" : "", 801 (nr_flags & NR_MONITOR_TX) ? "MONITOR_TX" : "", 802 (nr_flags & NR_MONITOR_RX) ? "MONITOR_RX" : ""); 803 804 d->req.nr_flags |= nr_flags; 805 d->req.nr_ringid |= nr_ringid; 806 d->req.nr_arg2 = nr_arg2; 807 808 d->self = d; 809 810 return 0; 811 fail: 812 if (!errno) 813 errno = EINVAL; 814 if (err) 815 strncpy(err, errmsg, MAXERRMSG); 816 return -1; 817 } 818 819 /* 820 * Try to open, return descriptor if successful, NULL otherwise. 821 * An invalid netmap name will return errno = 0; 822 * You can pass a pointer to a pre-filled nm_desc to add special 823 * parameters. Flags is used as follows 824 * NM_OPEN_NO_MMAP use the memory from arg, only XXX avoid mmap 825 * if the nr_arg2 (memory block) matches. 826 * NM_OPEN_ARG1 use req.nr_arg1 from arg 827 * NM_OPEN_ARG2 use req.nr_arg2 from arg 828 * NM_OPEN_RING_CFG user ring config from arg 829 */ 830 static struct nm_desc * 831 nm_open(const char *ifname, const struct nmreq *req, 832 uint64_t new_flags, const struct nm_desc *arg) 833 { 834 struct nm_desc *d = NULL; 835 const struct nm_desc *parent = arg; 836 char errmsg[MAXERRMSG] = ""; 837 uint32_t nr_reg; 838 839 if (strncmp(ifname, "netmap:", 7) && 840 strncmp(ifname, NM_BDG_NAME, strlen(NM_BDG_NAME))) { 841 errno = 0; /* name not recognised, not an error */ 842 return NULL; 843 } 844 845 d = (struct nm_desc *)calloc(1, sizeof(*d)); 846 if (d == NULL) { 847 snprintf(errmsg, MAXERRMSG, "nm_desc alloc failure"); 848 errno = ENOMEM; 849 return NULL; 850 } 851 d->self = d; /* set this early so nm_close() works */ 852 d->fd = open(NETMAP_DEVICE_NAME, O_RDWR); 853 if (d->fd < 0) { 854 snprintf(errmsg, MAXERRMSG, "cannot open /dev/netmap: %s", strerror(errno)); 855 goto fail; 856 } 857 858 if (req) 859 d->req = *req; 860 861 if (!(new_flags & NM_OPEN_IFNAME)) { 862 if (nm_parse(ifname, d, errmsg) < 0) 863 goto fail; 864 } 865 866 d->req.nr_version = NETMAP_API; 867 d->req.nr_ringid &= NETMAP_RING_MASK; 868 869 /* optionally import info from parent */ 870 if (IS_NETMAP_DESC(parent) && new_flags) { 871 if (new_flags & NM_OPEN_ARG1) 872 D("overriding ARG1 %d", parent->req.nr_arg1); 873 d->req.nr_arg1 = new_flags & NM_OPEN_ARG1 ? 874 parent->req.nr_arg1 : 4; 875 if (new_flags & NM_OPEN_ARG2) { 876 D("overriding ARG2 %d", parent->req.nr_arg2); 877 d->req.nr_arg2 = parent->req.nr_arg2; 878 } 879 if (new_flags & NM_OPEN_ARG3) 880 D("overriding ARG3 %d", parent->req.nr_arg3); 881 d->req.nr_arg3 = new_flags & NM_OPEN_ARG3 ? 882 parent->req.nr_arg3 : 0; 883 if (new_flags & NM_OPEN_RING_CFG) { 884 D("overriding RING_CFG"); 885 d->req.nr_tx_slots = parent->req.nr_tx_slots; 886 d->req.nr_rx_slots = parent->req.nr_rx_slots; 887 d->req.nr_tx_rings = parent->req.nr_tx_rings; 888 d->req.nr_rx_rings = parent->req.nr_rx_rings; 889 } 890 if (new_flags & NM_OPEN_IFNAME) { 891 D("overriding ifname %s ringid 0x%x flags 0x%x", 892 parent->req.nr_name, parent->req.nr_ringid, 893 parent->req.nr_flags); 894 memcpy(d->req.nr_name, parent->req.nr_name, 895 sizeof(d->req.nr_name)); 896 d->req.nr_ringid = parent->req.nr_ringid; 897 d->req.nr_flags = parent->req.nr_flags; 898 } 899 } 900 /* add the *XPOLL flags */ 901 d->req.nr_ringid |= new_flags & (NETMAP_NO_TX_POLL | NETMAP_DO_RX_POLL); 902 903 if (ioctl(d->fd, NIOCREGIF, &d->req)) { 904 snprintf(errmsg, MAXERRMSG, "NIOCREGIF failed: %s", strerror(errno)); 905 goto fail; 906 } 907 908 nr_reg = d->req.nr_flags & NR_REG_MASK; 909 910 if (nr_reg == NR_REG_SW) { /* host stack */ 911 d->first_tx_ring = d->last_tx_ring = d->req.nr_tx_rings; 912 d->first_rx_ring = d->last_rx_ring = d->req.nr_rx_rings; 913 } else if (nr_reg == NR_REG_ALL_NIC) { /* only nic */ 914 d->first_tx_ring = 0; 915 d->first_rx_ring = 0; 916 d->last_tx_ring = d->req.nr_tx_rings - 1; 917 d->last_rx_ring = d->req.nr_rx_rings - 1; 918 } else if (nr_reg == NR_REG_NIC_SW) { 919 d->first_tx_ring = 0; 920 d->first_rx_ring = 0; 921 d->last_tx_ring = d->req.nr_tx_rings; 922 d->last_rx_ring = d->req.nr_rx_rings; 923 } else if (nr_reg == NR_REG_ONE_NIC) { 924 /* XXX check validity */ 925 d->first_tx_ring = d->last_tx_ring = 926 d->first_rx_ring = d->last_rx_ring = d->req.nr_ringid & NETMAP_RING_MASK; 927 } else { /* pipes */ 928 d->first_tx_ring = d->last_tx_ring = 0; 929 d->first_rx_ring = d->last_rx_ring = 0; 930 } 931 932 /* if parent is defined, do nm_mmap() even if NM_OPEN_NO_MMAP is set */ 933 if ((!(new_flags & NM_OPEN_NO_MMAP) || parent) && nm_mmap(d, parent)) { 934 snprintf(errmsg, MAXERRMSG, "mmap failed: %s", strerror(errno)); 935 goto fail; 936 } 937 938 939 #ifdef DEBUG_NETMAP_USER 940 { /* debugging code */ 941 int i; 942 943 D("%s tx %d .. %d %d rx %d .. %d %d", ifname, 944 d->first_tx_ring, d->last_tx_ring, d->req.nr_tx_rings, 945 d->first_rx_ring, d->last_rx_ring, d->req.nr_rx_rings); 946 for (i = 0; i <= d->req.nr_tx_rings; i++) { 947 struct netmap_ring *r = NETMAP_TXRING(d->nifp, i); 948 D("TX%d %p h %d c %d t %d", i, r, r->head, r->cur, r->tail); 949 } 950 for (i = 0; i <= d->req.nr_rx_rings; i++) { 951 struct netmap_ring *r = NETMAP_RXRING(d->nifp, i); 952 D("RX%d %p h %d c %d t %d", i, r, r->head, r->cur, r->tail); 953 } 954 } 955 #endif /* debugging */ 956 957 d->cur_tx_ring = d->first_tx_ring; 958 d->cur_rx_ring = d->first_rx_ring; 959 return d; 960 961 fail: 962 nm_close(d); 963 if (errmsg[0]) 964 D("%s %s", errmsg, ifname); 965 if (errno == 0) 966 errno = EINVAL; 967 return NULL; 968 } 969 970 971 static int 972 nm_close(struct nm_desc *d) 973 { 974 /* 975 * ugly trick to avoid unused warnings 976 */ 977 static void *__xxzt[] __attribute__ ((unused)) = 978 { (void *)nm_open, (void *)nm_inject, 979 (void *)nm_dispatch, (void *)nm_nextpkt } ; 980 981 if (d == NULL || d->self != d) 982 return EINVAL; 983 if (d->done_mmap && d->mem) 984 munmap(d->mem, d->memsize); 985 if (d->fd != -1) { 986 close(d->fd); 987 } 988 989 bzero((char *)d, sizeof(*d)); 990 free(d); 991 return 0; 992 } 993 994 995 static int 996 nm_mmap(struct nm_desc *d, const struct nm_desc *parent) 997 { 998 if (d->done_mmap) 999 return 0; 1000 1001 if (IS_NETMAP_DESC(parent) && parent->mem && 1002 parent->req.nr_arg2 == d->req.nr_arg2) { 1003 /* do not mmap, inherit from parent */ 1004 D("do not mmap, inherit from parent"); 1005 d->memsize = parent->memsize; 1006 d->mem = parent->mem; 1007 } else { 1008 /* XXX TODO: check if memsize is too large (or there is overflow) */ 1009 d->memsize = d->req.nr_memsize; 1010 d->mem = mmap(0, d->memsize, PROT_WRITE | PROT_READ, MAP_SHARED, 1011 d->fd, 0); 1012 if (d->mem == MAP_FAILED) { 1013 goto fail; 1014 } 1015 d->done_mmap = 1; 1016 } 1017 { 1018 struct netmap_if *nifp = NETMAP_IF(d->mem, d->req.nr_offset); 1019 struct netmap_ring *r = NETMAP_RXRING(nifp, d->first_rx_ring); 1020 if ((void *)r == (void *)nifp) { 1021 /* the descriptor is open for TX only */ 1022 r = NETMAP_TXRING(nifp, d->first_tx_ring); 1023 } 1024 1025 *(struct netmap_if **)(uintptr_t)&(d->nifp) = nifp; 1026 *(struct netmap_ring **)(uintptr_t)&d->some_ring = r; 1027 *(void **)(uintptr_t)&d->buf_start = NETMAP_BUF(r, 0); 1028 *(void **)(uintptr_t)&d->buf_end = 1029 (char *)d->mem + d->memsize; 1030 } 1031 1032 return 0; 1033 1034 fail: 1035 return EINVAL; 1036 } 1037 1038 /* 1039 * Same prototype as pcap_inject(), only need to cast. 1040 */ 1041 static int 1042 nm_inject(struct nm_desc *d, const void *buf, size_t size) 1043 { 1044 u_int c, n = d->last_tx_ring - d->first_tx_ring + 1, 1045 ri = d->cur_tx_ring; 1046 1047 for (c = 0; c < n ; c++, ri++) { 1048 /* compute current ring to use */ 1049 struct netmap_ring *ring; 1050 uint32_t i, j, idx; 1051 size_t rem; 1052 1053 if (ri > d->last_tx_ring) 1054 ri = d->first_tx_ring; 1055 ring = NETMAP_TXRING(d->nifp, ri); 1056 rem = size; 1057 j = ring->cur; 1058 while (rem > ring->nr_buf_size && j != ring->tail) { 1059 rem -= ring->nr_buf_size; 1060 j = nm_ring_next(ring, j); 1061 } 1062 if (j == ring->tail && rem > 0) 1063 continue; 1064 i = ring->cur; 1065 while (i != j) { 1066 idx = ring->slot[i].buf_idx; 1067 ring->slot[i].len = ring->nr_buf_size; 1068 ring->slot[i].flags = NS_MOREFRAG; 1069 nm_pkt_copy(buf, NETMAP_BUF(ring, idx), ring->nr_buf_size); 1070 i = nm_ring_next(ring, i); 1071 buf = (const char *)buf + ring->nr_buf_size; 1072 } 1073 idx = ring->slot[i].buf_idx; 1074 ring->slot[i].len = rem; 1075 ring->slot[i].flags = 0; 1076 nm_pkt_copy(buf, NETMAP_BUF(ring, idx), rem); 1077 ring->head = ring->cur = nm_ring_next(ring, i); 1078 d->cur_tx_ring = ri; 1079 return size; 1080 } 1081 return 0; /* fail */ 1082 } 1083 1084 1085 /* 1086 * Same prototype as pcap_dispatch(), only need to cast. 1087 */ 1088 static int 1089 nm_dispatch(struct nm_desc *d, int cnt, nm_cb_t cb, u_char *arg) 1090 { 1091 int n = d->last_rx_ring - d->first_rx_ring + 1; 1092 int c, got = 0, ri = d->cur_rx_ring; 1093 d->hdr.buf = NULL; 1094 d->hdr.flags = NM_MORE_PKTS; 1095 d->hdr.d = d; 1096 1097 if (cnt == 0) 1098 cnt = -1; 1099 /* cnt == -1 means infinite, but rings have a finite amount 1100 * of buffers and the int is large enough that we never wrap, 1101 * so we can omit checking for -1 1102 */ 1103 for (c=0; c < n && cnt != got; c++, ri++) { 1104 /* compute current ring to use */ 1105 struct netmap_ring *ring; 1106 1107 if (ri > d->last_rx_ring) 1108 ri = d->first_rx_ring; 1109 ring = NETMAP_RXRING(d->nifp, ri); 1110 for ( ; !nm_ring_empty(ring) && cnt != got; got++) { 1111 u_int idx, i; 1112 u_char *oldbuf; 1113 struct netmap_slot *slot; 1114 if (d->hdr.buf) { /* from previous round */ 1115 cb(arg, &d->hdr, d->hdr.buf); 1116 } 1117 i = ring->cur; 1118 slot = &ring->slot[i]; 1119 idx = slot->buf_idx; 1120 /* d->cur_rx_ring doesn't change inside this loop, but 1121 * set it here, so it reflects d->hdr.buf's ring */ 1122 d->cur_rx_ring = ri; 1123 d->hdr.slot = slot; 1124 oldbuf = d->hdr.buf = (u_char *)NETMAP_BUF(ring, idx); 1125 // __builtin_prefetch(buf); 1126 d->hdr.len = d->hdr.caplen = slot->len; 1127 while (slot->flags & NS_MOREFRAG) { 1128 u_char *nbuf; 1129 u_int oldlen = slot->len; 1130 i = nm_ring_next(ring, i); 1131 slot = &ring->slot[i]; 1132 d->hdr.len += slot->len; 1133 nbuf = (u_char *)NETMAP_BUF(ring, slot->buf_idx); 1134 if (oldbuf != NULL && (uint32_t)(nbuf - oldbuf) == ring->nr_buf_size && 1135 oldlen == ring->nr_buf_size) { 1136 d->hdr.caplen += slot->len; 1137 oldbuf = nbuf; 1138 } else { 1139 oldbuf = NULL; 1140 } 1141 } 1142 d->hdr.ts = ring->ts; 1143 ring->head = ring->cur = nm_ring_next(ring, i); 1144 } 1145 } 1146 if (d->hdr.buf) { /* from previous round */ 1147 d->hdr.flags = 0; 1148 cb(arg, &d->hdr, d->hdr.buf); 1149 } 1150 return got; 1151 } 1152 1153 static u_char * 1154 nm_nextpkt(struct nm_desc *d, struct nm_pkthdr *hdr) 1155 { 1156 int ri = d->cur_rx_ring; 1157 1158 do { 1159 /* compute current ring to use */ 1160 struct netmap_ring *ring = NETMAP_RXRING(d->nifp, ri); 1161 if (!nm_ring_empty(ring)) { 1162 u_int i = ring->cur; 1163 u_int idx = ring->slot[i].buf_idx; 1164 u_char *buf = (u_char *)NETMAP_BUF(ring, idx); 1165 1166 // __builtin_prefetch(buf); 1167 hdr->ts = ring->ts; 1168 hdr->len = hdr->caplen = ring->slot[i].len; 1169 ring->cur = nm_ring_next(ring, i); 1170 /* we could postpone advancing head if we want 1171 * to hold the buffer. This can be supported in 1172 * the future. 1173 */ 1174 ring->head = ring->cur; 1175 d->cur_rx_ring = ri; 1176 return buf; 1177 } 1178 ri++; 1179 if (ri > d->last_rx_ring) 1180 ri = d->first_rx_ring; 1181 } while (ri != d->cur_rx_ring); 1182 return NULL; /* nothing found */ 1183 } 1184 1185 #endif /* !HAVE_NETMAP_WITH_LIBS */ 1186 1187 #endif /* NETMAP_WITH_LIBS */ 1188 1189 #endif /* _NET_NETMAP_USER_H_ */ 1190