1 /* 2 * Copyright (C) 2013-2014 Universita` di Pisa. All rights reserved. 3 * 4 * Redistribution and use in source and binary forms, with or without 5 * modification, are permitted provided that the following conditions 6 * are met: 7 * 1. Redistributions of source code must retain the above copyright 8 * notice, this list of conditions and the following disclaimer. 9 * 2. Redistributions in binary form must reproduce the above copyright 10 * notice, this list of conditions and the following disclaimer in the 11 * documentation and/or other materials provided with the distribution. 12 * 13 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 14 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 15 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 16 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 17 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 18 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 19 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 20 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 21 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 22 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 23 * SUCH DAMAGE. 24 */ 25 26 /* $FreeBSD$ */ 27 28 #include <sys/types.h> 29 #include <sys/module.h> 30 #include <sys/errno.h> 31 #include <sys/param.h> /* defines used in kernel.h */ 32 #include <sys/poll.h> /* POLLIN, POLLOUT */ 33 #include <sys/kernel.h> /* types used in module initialization */ 34 #include <sys/conf.h> /* DEV_MODULE */ 35 #include <sys/endian.h> 36 37 #include <sys/rwlock.h> 38 39 #include <vm/vm.h> /* vtophys */ 40 #include <vm/pmap.h> /* vtophys */ 41 #include <vm/vm_param.h> 42 #include <vm/vm_object.h> 43 #include <vm/vm_page.h> 44 #include <vm/vm_pager.h> 45 #include <vm/uma.h> 46 47 48 #include <sys/malloc.h> 49 #include <sys/socket.h> /* sockaddrs */ 50 #include <sys/selinfo.h> 51 #include <net/if.h> 52 #include <net/if_var.h> 53 #include <net/if_types.h> /* IFT_ETHER */ 54 #include <net/ethernet.h> /* ether_ifdetach */ 55 #include <net/if_dl.h> /* LLADDR */ 56 #include <machine/bus.h> /* bus_dmamap_* */ 57 #include <netinet/in.h> /* in6_cksum_pseudo() */ 58 #include <machine/in_cksum.h> /* in_pseudo(), in_cksum_hdr() */ 59 60 #include <net/netmap.h> 61 #include <dev/netmap/netmap_kern.h> 62 #include <dev/netmap/netmap_mem2.h> 63 64 65 /* ======================== FREEBSD-SPECIFIC ROUTINES ================== */ 66 67 rawsum_t 68 nm_csum_raw(uint8_t *data, size_t len, rawsum_t cur_sum) 69 { 70 /* TODO XXX please use the FreeBSD implementation for this. */ 71 uint16_t *words = (uint16_t *)data; 72 int nw = len / 2; 73 int i; 74 75 for (i = 0; i < nw; i++) 76 cur_sum += be16toh(words[i]); 77 78 if (len & 1) 79 cur_sum += (data[len-1] << 8); 80 81 return cur_sum; 82 } 83 84 /* Fold a raw checksum: 'cur_sum' is in host byte order, while the 85 * return value is in network byte order. 86 */ 87 uint16_t 88 nm_csum_fold(rawsum_t cur_sum) 89 { 90 /* TODO XXX please use the FreeBSD implementation for this. */ 91 while (cur_sum >> 16) 92 cur_sum = (cur_sum & 0xFFFF) + (cur_sum >> 16); 93 94 return htobe16((~cur_sum) & 0xFFFF); 95 } 96 97 uint16_t nm_csum_ipv4(struct nm_iphdr *iph) 98 { 99 #if 0 100 return in_cksum_hdr((void *)iph); 101 #else 102 return nm_csum_fold(nm_csum_raw((uint8_t*)iph, sizeof(struct nm_iphdr), 0)); 103 #endif 104 } 105 106 void 107 nm_csum_tcpudp_ipv4(struct nm_iphdr *iph, void *data, 108 size_t datalen, uint16_t *check) 109 { 110 #ifdef INET 111 uint16_t pseudolen = datalen + iph->protocol; 112 113 /* Compute and insert the pseudo-header cheksum. */ 114 *check = in_pseudo(iph->saddr, iph->daddr, 115 htobe16(pseudolen)); 116 /* Compute the checksum on TCP/UDP header + payload 117 * (includes the pseudo-header). 118 */ 119 *check = nm_csum_fold(nm_csum_raw(data, datalen, 0)); 120 #else 121 static int notsupported = 0; 122 if (!notsupported) { 123 notsupported = 1; 124 D("inet4 segmentation not supported"); 125 } 126 #endif 127 } 128 129 void 130 nm_csum_tcpudp_ipv6(struct nm_ipv6hdr *ip6h, void *data, 131 size_t datalen, uint16_t *check) 132 { 133 #ifdef INET6 134 *check = in6_cksum_pseudo((void*)ip6h, datalen, ip6h->nexthdr, 0); 135 *check = nm_csum_fold(nm_csum_raw(data, datalen, 0)); 136 #else 137 static int notsupported = 0; 138 if (!notsupported) { 139 notsupported = 1; 140 D("inet6 segmentation not supported"); 141 } 142 #endif 143 } 144 145 146 /* 147 * Intercept the rx routine in the standard device driver. 148 * Second argument is non-zero to intercept, 0 to restore 149 */ 150 int 151 netmap_catch_rx(struct netmap_adapter *na, int intercept) 152 { 153 struct netmap_generic_adapter *gna = (struct netmap_generic_adapter *)na; 154 struct ifnet *ifp = na->ifp; 155 156 if (intercept) { 157 if (gna->save_if_input) { 158 D("cannot intercept again"); 159 return EINVAL; /* already set */ 160 } 161 gna->save_if_input = ifp->if_input; 162 ifp->if_input = generic_rx_handler; 163 } else { 164 if (!gna->save_if_input){ 165 D("cannot restore"); 166 return EINVAL; /* not saved */ 167 } 168 ifp->if_input = gna->save_if_input; 169 gna->save_if_input = NULL; 170 } 171 172 return 0; 173 } 174 175 176 /* 177 * Intercept the packet steering routine in the tx path, 178 * so that we can decide which queue is used for an mbuf. 179 * Second argument is non-zero to intercept, 0 to restore. 180 * On freebsd we just intercept if_transmit. 181 */ 182 void 183 netmap_catch_tx(struct netmap_generic_adapter *gna, int enable) 184 { 185 struct netmap_adapter *na = &gna->up.up; 186 struct ifnet *ifp = na->ifp; 187 188 if (enable) { 189 na->if_transmit = ifp->if_transmit; 190 ifp->if_transmit = netmap_transmit; 191 } else { 192 ifp->if_transmit = na->if_transmit; 193 } 194 } 195 196 197 /* 198 * Transmit routine used by generic_netmap_txsync(). Returns 0 on success 199 * and non-zero on error (which may be packet drops or other errors). 200 * addr and len identify the netmap buffer, m is the (preallocated) 201 * mbuf to use for transmissions. 202 * 203 * We should add a reference to the mbuf so the m_freem() at the end 204 * of the transmission does not consume resources. 205 * 206 * On FreeBSD, and on multiqueue cards, we can force the queue using 207 * if ((m->m_flags & M_FLOWID) != 0) 208 * i = m->m_pkthdr.flowid % adapter->num_queues; 209 * else 210 * i = curcpu % adapter->num_queues; 211 * 212 */ 213 int 214 generic_xmit_frame(struct ifnet *ifp, struct mbuf *m, 215 void *addr, u_int len, u_int ring_nr) 216 { 217 int ret; 218 219 /* 220 * The mbuf should be a cluster from our special pool, 221 * so we do not need to do an m_copyback but just copy 222 * (and eventually, just reference the netmap buffer) 223 */ 224 225 if (GET_MBUF_REFCNT(m) != 1) { 226 D("invalid refcnt %d for %p", 227 GET_MBUF_REFCNT(m), m); 228 panic("in generic_xmit_frame"); 229 } 230 // XXX the ext_size check is unnecessary if we link the netmap buf 231 if (m->m_ext.ext_size < len) { 232 RD(5, "size %d < len %d", m->m_ext.ext_size, len); 233 len = m->m_ext.ext_size; 234 } 235 if (0) { /* XXX seems to have negligible benefits */ 236 m->m_ext.ext_buf = m->m_data = addr; 237 } else { 238 bcopy(addr, m->m_data, len); 239 } 240 m->m_len = m->m_pkthdr.len = len; 241 // inc refcount. All ours, we could skip the atomic 242 atomic_fetchadd_int(PNT_MBUF_REFCNT(m), 1); 243 m->m_flags |= M_FLOWID; 244 m->m_pkthdr.flowid = ring_nr; 245 m->m_pkthdr.rcvif = ifp; /* used for tx notification */ 246 ret = NA(ifp)->if_transmit(ifp, m); 247 return ret; 248 } 249 250 251 #if __FreeBSD_version >= 1100005 252 struct netmap_adapter * 253 netmap_getna(if_t ifp) 254 { 255 return (NA((struct ifnet *)ifp)); 256 } 257 #endif /* __FreeBSD_version >= 1100005 */ 258 259 /* 260 * The following two functions are empty until we have a generic 261 * way to extract the info from the ifp 262 */ 263 int 264 generic_find_num_desc(struct ifnet *ifp, unsigned int *tx, unsigned int *rx) 265 { 266 D("called, in tx %d rx %d", *tx, *rx); 267 return 0; 268 } 269 270 271 void 272 generic_find_num_queues(struct ifnet *ifp, u_int *txq, u_int *rxq) 273 { 274 D("called, in txq %d rxq %d", *txq, *rxq); 275 *txq = netmap_generic_rings; 276 *rxq = netmap_generic_rings; 277 } 278 279 280 void 281 netmap_mitigation_init(struct nm_generic_mit *mit, int idx, struct netmap_adapter *na) 282 { 283 ND("called"); 284 mit->mit_pending = 0; 285 mit->mit_ring_idx = idx; 286 mit->mit_na = na; 287 } 288 289 290 void 291 netmap_mitigation_start(struct nm_generic_mit *mit) 292 { 293 ND("called"); 294 } 295 296 297 void 298 netmap_mitigation_restart(struct nm_generic_mit *mit) 299 { 300 ND("called"); 301 } 302 303 304 int 305 netmap_mitigation_active(struct nm_generic_mit *mit) 306 { 307 ND("called"); 308 return 0; 309 } 310 311 312 void 313 netmap_mitigation_cleanup(struct nm_generic_mit *mit) 314 { 315 ND("called"); 316 } 317 318 static int 319 nm_vi_dummy(struct ifnet *ifp, u_long cmd, caddr_t addr) 320 { 321 return EINVAL; 322 } 323 324 static void 325 nm_vi_start(struct ifnet *ifp) 326 { 327 panic("nm_vi_start() must not be called"); 328 } 329 330 /* 331 * Index manager of persistent virtual interfaces. 332 * It is used to decide the lowest byte of the MAC address. 333 * We use the same algorithm with management of bridge port index. 334 */ 335 #define NM_VI_MAX 255 336 static struct { 337 uint8_t index[NM_VI_MAX]; /* XXX just for a reasonable number */ 338 uint8_t active; 339 struct mtx lock; 340 } nm_vi_indices; 341 342 void 343 nm_vi_init_index(void) 344 { 345 int i; 346 for (i = 0; i < NM_VI_MAX; i++) 347 nm_vi_indices.index[i] = i; 348 nm_vi_indices.active = 0; 349 mtx_init(&nm_vi_indices.lock, "nm_vi_indices_lock", NULL, MTX_DEF); 350 } 351 352 /* return -1 if no index available */ 353 static int 354 nm_vi_get_index(void) 355 { 356 int ret; 357 358 mtx_lock(&nm_vi_indices.lock); 359 ret = nm_vi_indices.active == NM_VI_MAX ? -1 : 360 nm_vi_indices.index[nm_vi_indices.active++]; 361 mtx_unlock(&nm_vi_indices.lock); 362 return ret; 363 } 364 365 static void 366 nm_vi_free_index(uint8_t val) 367 { 368 int i, lim; 369 370 mtx_lock(&nm_vi_indices.lock); 371 lim = nm_vi_indices.active; 372 for (i = 0; i < lim; i++) { 373 if (nm_vi_indices.index[i] == val) { 374 /* swap index[lim-1] and j */ 375 int tmp = nm_vi_indices.index[lim-1]; 376 nm_vi_indices.index[lim-1] = val; 377 nm_vi_indices.index[i] = tmp; 378 nm_vi_indices.active--; 379 break; 380 } 381 } 382 if (lim == nm_vi_indices.active) 383 D("funny, index %u didn't found", val); 384 mtx_unlock(&nm_vi_indices.lock); 385 } 386 #undef NM_VI_MAX 387 388 /* 389 * Implementation of a netmap-capable virtual interface that 390 * registered to the system. 391 * It is based on if_tap.c and ip_fw_log.c in FreeBSD 9. 392 * 393 * Note: Linux sets refcount to 0 on allocation of net_device, 394 * then increments it on registration to the system. 395 * FreeBSD sets refcount to 1 on if_alloc(), and does not 396 * increment this refcount on if_attach(). 397 */ 398 int 399 nm_vi_persist(const char *name, struct ifnet **ret) 400 { 401 struct ifnet *ifp; 402 u_short macaddr_hi; 403 uint32_t macaddr_mid; 404 u_char eaddr[6]; 405 int unit = nm_vi_get_index(); /* just to decide MAC address */ 406 407 if (unit < 0) 408 return EBUSY; 409 /* 410 * We use the same MAC address generation method with tap 411 * except for the highest octet is 00:be instead of 00:bd 412 */ 413 macaddr_hi = htons(0x00be); /* XXX tap + 1 */ 414 macaddr_mid = (uint32_t) ticks; 415 bcopy(&macaddr_hi, eaddr, sizeof(short)); 416 bcopy(&macaddr_mid, &eaddr[2], sizeof(uint32_t)); 417 eaddr[5] = (uint8_t)unit; 418 419 ifp = if_alloc(IFT_ETHER); 420 if (ifp == NULL) { 421 D("if_alloc failed"); 422 return ENOMEM; 423 } 424 if_initname(ifp, name, IF_DUNIT_NONE); 425 ifp->if_mtu = 65536; 426 ifp->if_flags = IFF_UP | IFF_SIMPLEX | IFF_MULTICAST; 427 ifp->if_init = (void *)nm_vi_dummy; 428 ifp->if_ioctl = nm_vi_dummy; 429 ifp->if_start = nm_vi_start; 430 ifp->if_mtu = ETHERMTU; 431 IFQ_SET_MAXLEN(&ifp->if_snd, ifqmaxlen); 432 ifp->if_capabilities |= IFCAP_LINKSTATE; 433 ifp->if_capenable |= IFCAP_LINKSTATE; 434 435 ether_ifattach(ifp, eaddr); 436 *ret = ifp; 437 return 0; 438 } 439 /* unregister from the system and drop the final refcount */ 440 void 441 nm_vi_detach(struct ifnet *ifp) 442 { 443 nm_vi_free_index(((char *)IF_LLADDR(ifp))[5]); 444 ether_ifdetach(ifp); 445 if_free(ifp); 446 } 447 448 /* 449 * In order to track whether pages are still mapped, we hook into 450 * the standard cdev_pager and intercept the constructor and 451 * destructor. 452 */ 453 454 struct netmap_vm_handle_t { 455 struct cdev *dev; 456 struct netmap_priv_d *priv; 457 }; 458 459 460 static int 461 netmap_dev_pager_ctor(void *handle, vm_ooffset_t size, vm_prot_t prot, 462 vm_ooffset_t foff, struct ucred *cred, u_short *color) 463 { 464 struct netmap_vm_handle_t *vmh = handle; 465 466 if (netmap_verbose) 467 D("handle %p size %jd prot %d foff %jd", 468 handle, (intmax_t)size, prot, (intmax_t)foff); 469 dev_ref(vmh->dev); 470 return 0; 471 } 472 473 474 static void 475 netmap_dev_pager_dtor(void *handle) 476 { 477 struct netmap_vm_handle_t *vmh = handle; 478 struct cdev *dev = vmh->dev; 479 struct netmap_priv_d *priv = vmh->priv; 480 481 if (netmap_verbose) 482 D("handle %p", handle); 483 netmap_dtor(priv); 484 free(vmh, M_DEVBUF); 485 dev_rel(dev); 486 } 487 488 489 static int 490 netmap_dev_pager_fault(vm_object_t object, vm_ooffset_t offset, 491 int prot, vm_page_t *mres) 492 { 493 struct netmap_vm_handle_t *vmh = object->handle; 494 struct netmap_priv_d *priv = vmh->priv; 495 vm_paddr_t paddr; 496 vm_page_t page; 497 vm_memattr_t memattr; 498 vm_pindex_t pidx; 499 500 ND("object %p offset %jd prot %d mres %p", 501 object, (intmax_t)offset, prot, mres); 502 memattr = object->memattr; 503 pidx = OFF_TO_IDX(offset); 504 paddr = netmap_mem_ofstophys(priv->np_mref, offset); 505 if (paddr == 0) 506 return VM_PAGER_FAIL; 507 508 if (((*mres)->flags & PG_FICTITIOUS) != 0) { 509 /* 510 * If the passed in result page is a fake page, update it with 511 * the new physical address. 512 */ 513 page = *mres; 514 vm_page_updatefake(page, paddr, memattr); 515 } else { 516 /* 517 * Replace the passed in reqpage page with our own fake page and 518 * free up the all of the original pages. 519 */ 520 #ifndef VM_OBJECT_WUNLOCK /* FreeBSD < 10.x */ 521 #define VM_OBJECT_WUNLOCK VM_OBJECT_UNLOCK 522 #define VM_OBJECT_WLOCK VM_OBJECT_LOCK 523 #endif /* VM_OBJECT_WUNLOCK */ 524 525 VM_OBJECT_WUNLOCK(object); 526 page = vm_page_getfake(paddr, memattr); 527 VM_OBJECT_WLOCK(object); 528 vm_page_lock(*mres); 529 vm_page_free(*mres); 530 vm_page_unlock(*mres); 531 *mres = page; 532 vm_page_insert(page, object, pidx); 533 } 534 page->valid = VM_PAGE_BITS_ALL; 535 return (VM_PAGER_OK); 536 } 537 538 539 static struct cdev_pager_ops netmap_cdev_pager_ops = { 540 .cdev_pg_ctor = netmap_dev_pager_ctor, 541 .cdev_pg_dtor = netmap_dev_pager_dtor, 542 .cdev_pg_fault = netmap_dev_pager_fault, 543 }; 544 545 546 static int 547 netmap_mmap_single(struct cdev *cdev, vm_ooffset_t *foff, 548 vm_size_t objsize, vm_object_t *objp, int prot) 549 { 550 int error; 551 struct netmap_vm_handle_t *vmh; 552 struct netmap_priv_d *priv; 553 vm_object_t obj; 554 555 if (netmap_verbose) 556 D("cdev %p foff %jd size %jd objp %p prot %d", cdev, 557 (intmax_t )*foff, (intmax_t )objsize, objp, prot); 558 559 vmh = malloc(sizeof(struct netmap_vm_handle_t), M_DEVBUF, 560 M_NOWAIT | M_ZERO); 561 if (vmh == NULL) 562 return ENOMEM; 563 vmh->dev = cdev; 564 565 NMG_LOCK(); 566 error = devfs_get_cdevpriv((void**)&priv); 567 if (error) 568 goto err_unlock; 569 vmh->priv = priv; 570 priv->np_refcount++; 571 NMG_UNLOCK(); 572 573 error = netmap_get_memory(priv); 574 if (error) 575 goto err_deref; 576 577 obj = cdev_pager_allocate(vmh, OBJT_DEVICE, 578 &netmap_cdev_pager_ops, objsize, prot, 579 *foff, NULL); 580 if (obj == NULL) { 581 D("cdev_pager_allocate failed"); 582 error = EINVAL; 583 goto err_deref; 584 } 585 586 *objp = obj; 587 return 0; 588 589 err_deref: 590 NMG_LOCK(); 591 priv->np_refcount--; 592 err_unlock: 593 NMG_UNLOCK(); 594 // err: 595 free(vmh, M_DEVBUF); 596 return error; 597 } 598 599 600 // XXX can we remove this ? 601 static int 602 netmap_close(struct cdev *dev, int fflag, int devtype, struct thread *td) 603 { 604 if (netmap_verbose) 605 D("dev %p fflag 0x%x devtype %d td %p", 606 dev, fflag, devtype, td); 607 return 0; 608 } 609 610 611 static int 612 netmap_open(struct cdev *dev, int oflags, int devtype, struct thread *td) 613 { 614 struct netmap_priv_d *priv; 615 int error; 616 617 (void)dev; 618 (void)oflags; 619 (void)devtype; 620 (void)td; 621 622 // XXX wait or nowait ? 623 priv = malloc(sizeof(struct netmap_priv_d), M_DEVBUF, 624 M_NOWAIT | M_ZERO); 625 if (priv == NULL) 626 return ENOMEM; 627 628 error = devfs_set_cdevpriv(priv, netmap_dtor); 629 if (error) 630 return error; 631 632 priv->np_refcount = 1; 633 634 return 0; 635 } 636 637 /******************** kqueue support ****************/ 638 639 /* 640 * The OS_selwakeup also needs to issue a KNOTE_UNLOCKED. 641 * We use a non-zero argument to distinguish the call from the one 642 * in kevent_scan() which instead also needs to run netmap_poll(). 643 * The knote uses a global mutex for the time being. We might 644 * try to reuse the one in the si, but it is not allocated 645 * permanently so it might be a bit tricky. 646 * 647 * The *kqfilter function registers one or another f_event 648 * depending on read or write mode. 649 * In the call to f_event() td_fpop is NULL so any child function 650 * calling devfs_get_cdevpriv() would fail - and we need it in 651 * netmap_poll(). As a workaround we store priv into kn->kn_hook 652 * and pass it as first argument to netmap_poll(), which then 653 * uses the failure to tell that we are called from f_event() 654 * and do not need the selrecord(). 655 */ 656 657 void freebsd_selwakeup(struct selinfo *si, int pri); 658 659 void 660 freebsd_selwakeup(struct selinfo *si, int pri) 661 { 662 if (netmap_verbose) 663 D("on knote %p", &si->si_note); 664 selwakeuppri(si, pri); 665 /* use a non-zero hint to tell the notification from the 666 * call done in kqueue_scan() which uses 0 667 */ 668 KNOTE_UNLOCKED(&si->si_note, 0x100 /* notification */); 669 } 670 671 static void 672 netmap_knrdetach(struct knote *kn) 673 { 674 struct netmap_priv_d *priv = (struct netmap_priv_d *)kn->kn_hook; 675 struct selinfo *si = priv->np_rxsi; 676 677 D("remove selinfo %p", si); 678 knlist_remove(&si->si_note, kn, 0); 679 } 680 681 static void 682 netmap_knwdetach(struct knote *kn) 683 { 684 struct netmap_priv_d *priv = (struct netmap_priv_d *)kn->kn_hook; 685 struct selinfo *si = priv->np_txsi; 686 687 D("remove selinfo %p", si); 688 knlist_remove(&si->si_note, kn, 0); 689 } 690 691 /* 692 * callback from notifies (generated externally) and our 693 * calls to kevent(). The former we just return 1 (ready) 694 * since we do not know better. 695 * In the latter we call netmap_poll and return 0/1 accordingly. 696 */ 697 static int 698 netmap_knrw(struct knote *kn, long hint, int events) 699 { 700 struct netmap_priv_d *priv; 701 int revents; 702 703 if (hint != 0) { 704 ND(5, "call from notify"); 705 return 1; /* assume we are ready */ 706 } 707 priv = kn->kn_hook; 708 /* the notification may come from an external thread, 709 * in which case we do not want to run the netmap_poll 710 * This should be filtered above, but check just in case. 711 */ 712 if (curthread != priv->np_td) { /* should not happen */ 713 RD(5, "curthread changed %p %p", curthread, priv->np_td); 714 return 1; 715 } else { 716 revents = netmap_poll((void *)priv, events, curthread); 717 return (events & revents) ? 1 : 0; 718 } 719 } 720 721 static int 722 netmap_knread(struct knote *kn, long hint) 723 { 724 return netmap_knrw(kn, hint, POLLIN); 725 } 726 727 static int 728 netmap_knwrite(struct knote *kn, long hint) 729 { 730 return netmap_knrw(kn, hint, POLLOUT); 731 } 732 733 static struct filterops netmap_rfiltops = { 734 .f_isfd = 1, 735 .f_detach = netmap_knrdetach, 736 .f_event = netmap_knread, 737 }; 738 739 static struct filterops netmap_wfiltops = { 740 .f_isfd = 1, 741 .f_detach = netmap_knwdetach, 742 .f_event = netmap_knwrite, 743 }; 744 745 746 /* 747 * This is called when a thread invokes kevent() to record 748 * a change in the configuration of the kqueue(). 749 * The 'priv' should be the same as in the netmap device. 750 */ 751 static int 752 netmap_kqfilter(struct cdev *dev, struct knote *kn) 753 { 754 struct netmap_priv_d *priv; 755 int error; 756 struct netmap_adapter *na; 757 struct selinfo *si; 758 int ev = kn->kn_filter; 759 760 if (ev != EVFILT_READ && ev != EVFILT_WRITE) { 761 D("bad filter request %d", ev); 762 return 1; 763 } 764 error = devfs_get_cdevpriv((void**)&priv); 765 if (error) { 766 D("device not yet setup"); 767 return 1; 768 } 769 na = priv->np_na; 770 if (na == NULL) { 771 D("no netmap adapter for this file descriptor"); 772 return 1; 773 } 774 /* the si is indicated in the priv */ 775 si = (ev == EVFILT_WRITE) ? priv->np_txsi : priv->np_rxsi; 776 // XXX lock(priv) ? 777 kn->kn_fop = (ev == EVFILT_WRITE) ? 778 &netmap_wfiltops : &netmap_rfiltops; 779 kn->kn_hook = priv; 780 knlist_add(&si->si_note, kn, 1); 781 // XXX unlock(priv) 782 ND("register %p %s td %p priv %p kn %p np_nifp %p kn_fp/fpop %s", 783 na, na->ifp->if_xname, curthread, priv, kn, 784 priv->np_nifp, 785 kn->kn_fp == curthread->td_fpop ? "match" : "MISMATCH"); 786 return 0; 787 } 788 789 struct cdevsw netmap_cdevsw = { 790 .d_version = D_VERSION, 791 .d_name = "netmap", 792 .d_open = netmap_open, 793 .d_mmap_single = netmap_mmap_single, 794 .d_ioctl = netmap_ioctl, 795 .d_poll = netmap_poll, 796 .d_kqfilter = netmap_kqfilter, 797 .d_close = netmap_close, 798 }; 799 /*--- end of kqueue support ----*/ 800 801 /* 802 * Kernel entry point. 803 * 804 * Initialize/finalize the module and return. 805 * 806 * Return 0 on success, errno on failure. 807 */ 808 static int 809 netmap_loader(__unused struct module *module, int event, __unused void *arg) 810 { 811 int error = 0; 812 813 switch (event) { 814 case MOD_LOAD: 815 error = netmap_init(); 816 break; 817 818 case MOD_UNLOAD: 819 netmap_fini(); 820 break; 821 822 default: 823 error = EOPNOTSUPP; 824 break; 825 } 826 827 return (error); 828 } 829 830 831 DEV_MODULE(netmap, netmap_loader, NULL); 832