1 /* 2 * Copyright (C) 2013-2014 Universita` di Pisa. All rights reserved. 3 * 4 * Redistribution and use in source and binary forms, with or without 5 * modification, are permitted provided that the following conditions 6 * are met: 7 * 1. Redistributions of source code must retain the above copyright 8 * notice, this list of conditions and the following disclaimer. 9 * 2. Redistributions in binary form must reproduce the above copyright 10 * notice, this list of conditions and the following disclaimer in the 11 * documentation and/or other materials provided with the distribution. 12 * 13 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 14 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 15 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 16 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 17 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 18 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 19 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 20 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 21 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 22 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 23 * SUCH DAMAGE. 24 */ 25 26 /* $FreeBSD$ */ 27 28 #include <sys/types.h> 29 #include <sys/module.h> 30 #include <sys/errno.h> 31 #include <sys/param.h> /* defines used in kernel.h */ 32 #include <sys/poll.h> /* POLLIN, POLLOUT */ 33 #include <sys/kernel.h> /* types used in module initialization */ 34 #include <sys/conf.h> /* DEV_MODULE */ 35 #include <sys/endian.h> 36 37 #include <sys/rwlock.h> 38 39 #include <vm/vm.h> /* vtophys */ 40 #include <vm/pmap.h> /* vtophys */ 41 #include <vm/vm_param.h> 42 #include <vm/vm_object.h> 43 #include <vm/vm_page.h> 44 #include <vm/vm_pager.h> 45 #include <vm/uma.h> 46 47 48 #include <sys/malloc.h> 49 #include <sys/socket.h> /* sockaddrs */ 50 #include <sys/selinfo.h> 51 #include <net/if.h> 52 #include <net/if_var.h> 53 #include <net/if_types.h> /* IFT_ETHER */ 54 #include <net/ethernet.h> /* ether_ifdetach */ 55 #include <net/if_dl.h> /* LLADDR */ 56 #include <machine/bus.h> /* bus_dmamap_* */ 57 #include <netinet/in.h> /* in6_cksum_pseudo() */ 58 #include <machine/in_cksum.h> /* in_pseudo(), in_cksum_hdr() */ 59 60 #include <net/netmap.h> 61 #include <dev/netmap/netmap_kern.h> 62 #include <dev/netmap/netmap_mem2.h> 63 64 65 /* ======================== FREEBSD-SPECIFIC ROUTINES ================== */ 66 67 rawsum_t 68 nm_csum_raw(uint8_t *data, size_t len, rawsum_t cur_sum) 69 { 70 /* TODO XXX please use the FreeBSD implementation for this. */ 71 uint16_t *words = (uint16_t *)data; 72 int nw = len / 2; 73 int i; 74 75 for (i = 0; i < nw; i++) 76 cur_sum += be16toh(words[i]); 77 78 if (len & 1) 79 cur_sum += (data[len-1] << 8); 80 81 return cur_sum; 82 } 83 84 /* Fold a raw checksum: 'cur_sum' is in host byte order, while the 85 * return value is in network byte order. 86 */ 87 uint16_t 88 nm_csum_fold(rawsum_t cur_sum) 89 { 90 /* TODO XXX please use the FreeBSD implementation for this. */ 91 while (cur_sum >> 16) 92 cur_sum = (cur_sum & 0xFFFF) + (cur_sum >> 16); 93 94 return htobe16((~cur_sum) & 0xFFFF); 95 } 96 97 uint16_t nm_csum_ipv4(struct nm_iphdr *iph) 98 { 99 #if 0 100 return in_cksum_hdr((void *)iph); 101 #else 102 return nm_csum_fold(nm_csum_raw((uint8_t*)iph, sizeof(struct nm_iphdr), 0)); 103 #endif 104 } 105 106 void 107 nm_csum_tcpudp_ipv4(struct nm_iphdr *iph, void *data, 108 size_t datalen, uint16_t *check) 109 { 110 #ifdef INET 111 uint16_t pseudolen = datalen + iph->protocol; 112 113 /* Compute and insert the pseudo-header cheksum. */ 114 *check = in_pseudo(iph->saddr, iph->daddr, 115 htobe16(pseudolen)); 116 /* Compute the checksum on TCP/UDP header + payload 117 * (includes the pseudo-header). 118 */ 119 *check = nm_csum_fold(nm_csum_raw(data, datalen, 0)); 120 #else 121 static int notsupported = 0; 122 if (!notsupported) { 123 notsupported = 1; 124 D("inet4 segmentation not supported"); 125 } 126 #endif 127 } 128 129 void 130 nm_csum_tcpudp_ipv6(struct nm_ipv6hdr *ip6h, void *data, 131 size_t datalen, uint16_t *check) 132 { 133 #ifdef INET6 134 *check = in6_cksum_pseudo((void*)ip6h, datalen, ip6h->nexthdr, 0); 135 *check = nm_csum_fold(nm_csum_raw(data, datalen, 0)); 136 #else 137 static int notsupported = 0; 138 if (!notsupported) { 139 notsupported = 1; 140 D("inet6 segmentation not supported"); 141 } 142 #endif 143 } 144 145 146 /* 147 * Intercept the rx routine in the standard device driver. 148 * Second argument is non-zero to intercept, 0 to restore 149 */ 150 int 151 netmap_catch_rx(struct netmap_adapter *na, int intercept) 152 { 153 struct netmap_generic_adapter *gna = (struct netmap_generic_adapter *)na; 154 struct ifnet *ifp = na->ifp; 155 156 if (intercept) { 157 if (gna->save_if_input) { 158 D("cannot intercept again"); 159 return EINVAL; /* already set */ 160 } 161 gna->save_if_input = ifp->if_input; 162 ifp->if_input = generic_rx_handler; 163 } else { 164 if (!gna->save_if_input){ 165 D("cannot restore"); 166 return EINVAL; /* not saved */ 167 } 168 ifp->if_input = gna->save_if_input; 169 gna->save_if_input = NULL; 170 } 171 172 return 0; 173 } 174 175 176 /* 177 * Intercept the packet steering routine in the tx path, 178 * so that we can decide which queue is used for an mbuf. 179 * Second argument is non-zero to intercept, 0 to restore. 180 * On freebsd we just intercept if_transmit. 181 */ 182 void 183 netmap_catch_tx(struct netmap_generic_adapter *gna, int enable) 184 { 185 struct netmap_adapter *na = &gna->up.up; 186 struct ifnet *ifp = na->ifp; 187 188 if (enable) { 189 na->if_transmit = ifp->if_transmit; 190 ifp->if_transmit = netmap_transmit; 191 } else { 192 ifp->if_transmit = na->if_transmit; 193 } 194 } 195 196 197 /* 198 * Transmit routine used by generic_netmap_txsync(). Returns 0 on success 199 * and non-zero on error (which may be packet drops or other errors). 200 * addr and len identify the netmap buffer, m is the (preallocated) 201 * mbuf to use for transmissions. 202 * 203 * We should add a reference to the mbuf so the m_freem() at the end 204 * of the transmission does not consume resources. 205 * 206 * On FreeBSD, and on multiqueue cards, we can force the queue using 207 * if ((m->m_flags & M_FLOWID) != 0) 208 * i = m->m_pkthdr.flowid % adapter->num_queues; 209 * else 210 * i = curcpu % adapter->num_queues; 211 * 212 */ 213 int 214 generic_xmit_frame(struct ifnet *ifp, struct mbuf *m, 215 void *addr, u_int len, u_int ring_nr) 216 { 217 int ret; 218 219 /* 220 * The mbuf should be a cluster from our special pool, 221 * so we do not need to do an m_copyback but just copy 222 * (and eventually, just reference the netmap buffer) 223 */ 224 225 if (GET_MBUF_REFCNT(m) != 1) { 226 D("invalid refcnt %d for %p", 227 GET_MBUF_REFCNT(m), m); 228 panic("in generic_xmit_frame"); 229 } 230 // XXX the ext_size check is unnecessary if we link the netmap buf 231 if (m->m_ext.ext_size < len) { 232 RD(5, "size %d < len %d", m->m_ext.ext_size, len); 233 len = m->m_ext.ext_size; 234 } 235 if (0) { /* XXX seems to have negligible benefits */ 236 m->m_ext.ext_buf = m->m_data = addr; 237 } else { 238 bcopy(addr, m->m_data, len); 239 } 240 m->m_len = m->m_pkthdr.len = len; 241 // inc refcount. All ours, we could skip the atomic 242 atomic_fetchadd_int(PNT_MBUF_REFCNT(m), 1); 243 m->m_flags |= M_FLOWID; 244 m->m_pkthdr.flowid = ring_nr; 245 m->m_pkthdr.rcvif = ifp; /* used for tx notification */ 246 ret = NA(ifp)->if_transmit(ifp, m); 247 return ret; 248 } 249 250 251 #if __FreeBSD_version >= 1100005 252 struct netmap_adapter * 253 netmap_getna(if_t ifp) 254 { 255 return (NA((struct ifnet *)ifp)); 256 } 257 #endif /* __FreeBSD_version >= 1100005 */ 258 259 /* 260 * The following two functions are empty until we have a generic 261 * way to extract the info from the ifp 262 */ 263 int 264 generic_find_num_desc(struct ifnet *ifp, unsigned int *tx, unsigned int *rx) 265 { 266 D("called, in tx %d rx %d", *tx, *rx); 267 return 0; 268 } 269 270 271 void 272 generic_find_num_queues(struct ifnet *ifp, u_int *txq, u_int *rxq) 273 { 274 D("called, in txq %d rxq %d", *txq, *rxq); 275 *txq = netmap_generic_rings; 276 *rxq = netmap_generic_rings; 277 } 278 279 280 void 281 netmap_mitigation_init(struct nm_generic_mit *mit, int idx, struct netmap_adapter *na) 282 { 283 ND("called"); 284 mit->mit_pending = 0; 285 mit->mit_ring_idx = idx; 286 mit->mit_na = na; 287 } 288 289 290 void 291 netmap_mitigation_start(struct nm_generic_mit *mit) 292 { 293 ND("called"); 294 } 295 296 297 void 298 netmap_mitigation_restart(struct nm_generic_mit *mit) 299 { 300 ND("called"); 301 } 302 303 304 int 305 netmap_mitigation_active(struct nm_generic_mit *mit) 306 { 307 ND("called"); 308 return 0; 309 } 310 311 312 void 313 netmap_mitigation_cleanup(struct nm_generic_mit *mit) 314 { 315 ND("called"); 316 } 317 318 static int 319 nm_vi_dummy(struct ifnet *ifp, u_long cmd, caddr_t addr) 320 { 321 return EINVAL; 322 } 323 324 static void 325 nm_vi_start(struct ifnet *ifp) 326 { 327 panic("nm_vi_start() must not be called"); 328 } 329 330 /* 331 * Index manager of persistent virtual interfaces. 332 * It is used to decide the lowest byte of the MAC address. 333 * We use the same algorithm with management of bridge port index. 334 */ 335 #define NM_VI_MAX 255 336 static struct { 337 uint8_t index[NM_VI_MAX]; /* XXX just for a reasonable number */ 338 uint8_t active; 339 struct mtx lock; 340 } nm_vi_indices; 341 342 void 343 nm_vi_init_index(void) 344 { 345 int i; 346 for (i = 0; i < NM_VI_MAX; i++) 347 nm_vi_indices.index[i] = i; 348 nm_vi_indices.active = 0; 349 mtx_init(&nm_vi_indices.lock, "nm_vi_indices_lock", NULL, MTX_DEF); 350 } 351 352 /* return -1 if no index available */ 353 static int 354 nm_vi_get_index(void) 355 { 356 int ret; 357 358 mtx_lock(&nm_vi_indices.lock); 359 ret = nm_vi_indices.active == NM_VI_MAX ? -1 : 360 nm_vi_indices.index[nm_vi_indices.active++]; 361 mtx_unlock(&nm_vi_indices.lock); 362 return ret; 363 } 364 365 static void 366 nm_vi_free_index(uint8_t val) 367 { 368 int i, lim; 369 370 mtx_lock(&nm_vi_indices.lock); 371 lim = nm_vi_indices.active; 372 for (i = 0; i < lim; i++) { 373 if (nm_vi_indices.index[i] == val) { 374 /* swap index[lim-1] and j */ 375 int tmp = nm_vi_indices.index[lim-1]; 376 nm_vi_indices.index[lim-1] = val; 377 nm_vi_indices.index[i] = tmp; 378 nm_vi_indices.active--; 379 break; 380 } 381 } 382 if (lim == nm_vi_indices.active) 383 D("funny, index %u didn't found", val); 384 mtx_unlock(&nm_vi_indices.lock); 385 } 386 #undef NM_VI_MAX 387 388 /* 389 * Implementation of a netmap-capable virtual interface that 390 * registered to the system. 391 * It is based on if_tap.c and ip_fw_log.c in FreeBSD 9. 392 * 393 * Note: Linux sets refcount to 0 on allocation of net_device, 394 * then increments it on registration to the system. 395 * FreeBSD sets refcount to 1 on if_alloc(), and does not 396 * increment this refcount on if_attach(). 397 */ 398 int 399 nm_vi_persist(const char *name, struct ifnet **ret) 400 { 401 struct ifnet *ifp; 402 u_short macaddr_hi; 403 uint32_t macaddr_mid; 404 u_char eaddr[6]; 405 int unit = nm_vi_get_index(); /* just to decide MAC address */ 406 407 if (unit < 0) 408 return EBUSY; 409 /* 410 * We use the same MAC address generation method with tap 411 * except for the highest octet is 00:be instead of 00:bd 412 */ 413 macaddr_hi = htons(0x00be); /* XXX tap + 1 */ 414 macaddr_mid = (uint32_t) ticks; 415 bcopy(&macaddr_hi, eaddr, sizeof(short)); 416 bcopy(&macaddr_mid, &eaddr[2], sizeof(uint32_t)); 417 eaddr[5] = (uint8_t)unit; 418 419 ifp = if_alloc(IFT_ETHER); 420 if (ifp == NULL) { 421 D("if_alloc failed"); 422 return ENOMEM; 423 } 424 if_initname(ifp, name, IF_DUNIT_NONE); 425 ifp->if_mtu = 65536; 426 ifp->if_flags = IFF_UP | IFF_SIMPLEX | IFF_MULTICAST; 427 ifp->if_init = (void *)nm_vi_dummy; 428 ifp->if_ioctl = nm_vi_dummy; 429 ifp->if_start = nm_vi_start; 430 ifp->if_mtu = ETHERMTU; 431 IFQ_SET_MAXLEN(&ifp->if_snd, ifqmaxlen); 432 ifp->if_capabilities |= IFCAP_LINKSTATE; 433 ifp->if_capenable |= IFCAP_LINKSTATE; 434 435 ether_ifattach(ifp, eaddr); 436 *ret = ifp; 437 return 0; 438 } 439 /* unregister from the system and drop the final refcount */ 440 void 441 nm_vi_detach(struct ifnet *ifp) 442 { 443 nm_vi_free_index(((char *)IF_LLADDR(ifp))[5]); 444 ether_ifdetach(ifp); 445 if_free(ifp); 446 } 447 448 /* 449 * In order to track whether pages are still mapped, we hook into 450 * the standard cdev_pager and intercept the constructor and 451 * destructor. 452 */ 453 454 struct netmap_vm_handle_t { 455 struct cdev *dev; 456 struct netmap_priv_d *priv; 457 }; 458 459 460 static int 461 netmap_dev_pager_ctor(void *handle, vm_ooffset_t size, vm_prot_t prot, 462 vm_ooffset_t foff, struct ucred *cred, u_short *color) 463 { 464 struct netmap_vm_handle_t *vmh = handle; 465 466 if (netmap_verbose) 467 D("handle %p size %jd prot %d foff %jd", 468 handle, (intmax_t)size, prot, (intmax_t)foff); 469 if (color) 470 *color = 0; 471 dev_ref(vmh->dev); 472 return 0; 473 } 474 475 476 static void 477 netmap_dev_pager_dtor(void *handle) 478 { 479 struct netmap_vm_handle_t *vmh = handle; 480 struct cdev *dev = vmh->dev; 481 struct netmap_priv_d *priv = vmh->priv; 482 483 if (netmap_verbose) 484 D("handle %p", handle); 485 netmap_dtor(priv); 486 free(vmh, M_DEVBUF); 487 dev_rel(dev); 488 } 489 490 491 static int 492 netmap_dev_pager_fault(vm_object_t object, vm_ooffset_t offset, 493 int prot, vm_page_t *mres) 494 { 495 struct netmap_vm_handle_t *vmh = object->handle; 496 struct netmap_priv_d *priv = vmh->priv; 497 vm_paddr_t paddr; 498 vm_page_t page; 499 vm_memattr_t memattr; 500 vm_pindex_t pidx; 501 502 ND("object %p offset %jd prot %d mres %p", 503 object, (intmax_t)offset, prot, mres); 504 memattr = object->memattr; 505 pidx = OFF_TO_IDX(offset); 506 paddr = netmap_mem_ofstophys(priv->np_mref, offset); 507 if (paddr == 0) 508 return VM_PAGER_FAIL; 509 510 if (((*mres)->flags & PG_FICTITIOUS) != 0) { 511 /* 512 * If the passed in result page is a fake page, update it with 513 * the new physical address. 514 */ 515 page = *mres; 516 vm_page_updatefake(page, paddr, memattr); 517 } else { 518 /* 519 * Replace the passed in reqpage page with our own fake page and 520 * free up the all of the original pages. 521 */ 522 #ifndef VM_OBJECT_WUNLOCK /* FreeBSD < 10.x */ 523 #define VM_OBJECT_WUNLOCK VM_OBJECT_UNLOCK 524 #define VM_OBJECT_WLOCK VM_OBJECT_LOCK 525 #endif /* VM_OBJECT_WUNLOCK */ 526 527 VM_OBJECT_WUNLOCK(object); 528 page = vm_page_getfake(paddr, memattr); 529 VM_OBJECT_WLOCK(object); 530 vm_page_lock(*mres); 531 vm_page_free(*mres); 532 vm_page_unlock(*mres); 533 *mres = page; 534 vm_page_insert(page, object, pidx); 535 } 536 page->valid = VM_PAGE_BITS_ALL; 537 return (VM_PAGER_OK); 538 } 539 540 541 static struct cdev_pager_ops netmap_cdev_pager_ops = { 542 .cdev_pg_ctor = netmap_dev_pager_ctor, 543 .cdev_pg_dtor = netmap_dev_pager_dtor, 544 .cdev_pg_fault = netmap_dev_pager_fault, 545 }; 546 547 548 static int 549 netmap_mmap_single(struct cdev *cdev, vm_ooffset_t *foff, 550 vm_size_t objsize, vm_object_t *objp, int prot) 551 { 552 int error; 553 struct netmap_vm_handle_t *vmh; 554 struct netmap_priv_d *priv; 555 vm_object_t obj; 556 557 if (netmap_verbose) 558 D("cdev %p foff %jd size %jd objp %p prot %d", cdev, 559 (intmax_t )*foff, (intmax_t )objsize, objp, prot); 560 561 vmh = malloc(sizeof(struct netmap_vm_handle_t), M_DEVBUF, 562 M_NOWAIT | M_ZERO); 563 if (vmh == NULL) 564 return ENOMEM; 565 vmh->dev = cdev; 566 567 NMG_LOCK(); 568 error = devfs_get_cdevpriv((void**)&priv); 569 if (error) 570 goto err_unlock; 571 vmh->priv = priv; 572 priv->np_refcount++; 573 NMG_UNLOCK(); 574 575 error = netmap_get_memory(priv); 576 if (error) 577 goto err_deref; 578 579 obj = cdev_pager_allocate(vmh, OBJT_DEVICE, 580 &netmap_cdev_pager_ops, objsize, prot, 581 *foff, NULL); 582 if (obj == NULL) { 583 D("cdev_pager_allocate failed"); 584 error = EINVAL; 585 goto err_deref; 586 } 587 588 *objp = obj; 589 return 0; 590 591 err_deref: 592 NMG_LOCK(); 593 priv->np_refcount--; 594 err_unlock: 595 NMG_UNLOCK(); 596 // err: 597 free(vmh, M_DEVBUF); 598 return error; 599 } 600 601 602 // XXX can we remove this ? 603 static int 604 netmap_close(struct cdev *dev, int fflag, int devtype, struct thread *td) 605 { 606 if (netmap_verbose) 607 D("dev %p fflag 0x%x devtype %d td %p", 608 dev, fflag, devtype, td); 609 return 0; 610 } 611 612 613 static int 614 netmap_open(struct cdev *dev, int oflags, int devtype, struct thread *td) 615 { 616 struct netmap_priv_d *priv; 617 int error; 618 619 (void)dev; 620 (void)oflags; 621 (void)devtype; 622 (void)td; 623 624 // XXX wait or nowait ? 625 priv = malloc(sizeof(struct netmap_priv_d), M_DEVBUF, 626 M_NOWAIT | M_ZERO); 627 if (priv == NULL) 628 return ENOMEM; 629 630 error = devfs_set_cdevpriv(priv, netmap_dtor); 631 if (error) 632 return error; 633 634 priv->np_refcount = 1; 635 636 return 0; 637 } 638 639 /******************** kqueue support ****************/ 640 641 /* 642 * The OS_selwakeup also needs to issue a KNOTE_UNLOCKED. 643 * We use a non-zero argument to distinguish the call from the one 644 * in kevent_scan() which instead also needs to run netmap_poll(). 645 * The knote uses a global mutex for the time being. We might 646 * try to reuse the one in the si, but it is not allocated 647 * permanently so it might be a bit tricky. 648 * 649 * The *kqfilter function registers one or another f_event 650 * depending on read or write mode. 651 * In the call to f_event() td_fpop is NULL so any child function 652 * calling devfs_get_cdevpriv() would fail - and we need it in 653 * netmap_poll(). As a workaround we store priv into kn->kn_hook 654 * and pass it as first argument to netmap_poll(), which then 655 * uses the failure to tell that we are called from f_event() 656 * and do not need the selrecord(). 657 */ 658 659 void freebsd_selwakeup(struct selinfo *si, int pri); 660 661 void 662 freebsd_selwakeup(struct selinfo *si, int pri) 663 { 664 if (netmap_verbose) 665 D("on knote %p", &si->si_note); 666 selwakeuppri(si, pri); 667 /* use a non-zero hint to tell the notification from the 668 * call done in kqueue_scan() which uses 0 669 */ 670 KNOTE_UNLOCKED(&si->si_note, 0x100 /* notification */); 671 } 672 673 static void 674 netmap_knrdetach(struct knote *kn) 675 { 676 struct netmap_priv_d *priv = (struct netmap_priv_d *)kn->kn_hook; 677 struct selinfo *si = priv->np_rxsi; 678 679 D("remove selinfo %p", si); 680 knlist_remove(&si->si_note, kn, 0); 681 } 682 683 static void 684 netmap_knwdetach(struct knote *kn) 685 { 686 struct netmap_priv_d *priv = (struct netmap_priv_d *)kn->kn_hook; 687 struct selinfo *si = priv->np_txsi; 688 689 D("remove selinfo %p", si); 690 knlist_remove(&si->si_note, kn, 0); 691 } 692 693 /* 694 * callback from notifies (generated externally) and our 695 * calls to kevent(). The former we just return 1 (ready) 696 * since we do not know better. 697 * In the latter we call netmap_poll and return 0/1 accordingly. 698 */ 699 static int 700 netmap_knrw(struct knote *kn, long hint, int events) 701 { 702 struct netmap_priv_d *priv; 703 int revents; 704 705 if (hint != 0) { 706 ND(5, "call from notify"); 707 return 1; /* assume we are ready */ 708 } 709 priv = kn->kn_hook; 710 /* the notification may come from an external thread, 711 * in which case we do not want to run the netmap_poll 712 * This should be filtered above, but check just in case. 713 */ 714 if (curthread != priv->np_td) { /* should not happen */ 715 RD(5, "curthread changed %p %p", curthread, priv->np_td); 716 return 1; 717 } else { 718 revents = netmap_poll((void *)priv, events, curthread); 719 return (events & revents) ? 1 : 0; 720 } 721 } 722 723 static int 724 netmap_knread(struct knote *kn, long hint) 725 { 726 return netmap_knrw(kn, hint, POLLIN); 727 } 728 729 static int 730 netmap_knwrite(struct knote *kn, long hint) 731 { 732 return netmap_knrw(kn, hint, POLLOUT); 733 } 734 735 static struct filterops netmap_rfiltops = { 736 .f_isfd = 1, 737 .f_detach = netmap_knrdetach, 738 .f_event = netmap_knread, 739 }; 740 741 static struct filterops netmap_wfiltops = { 742 .f_isfd = 1, 743 .f_detach = netmap_knwdetach, 744 .f_event = netmap_knwrite, 745 }; 746 747 748 /* 749 * This is called when a thread invokes kevent() to record 750 * a change in the configuration of the kqueue(). 751 * The 'priv' should be the same as in the netmap device. 752 */ 753 static int 754 netmap_kqfilter(struct cdev *dev, struct knote *kn) 755 { 756 struct netmap_priv_d *priv; 757 int error; 758 struct netmap_adapter *na; 759 struct selinfo *si; 760 int ev = kn->kn_filter; 761 762 if (ev != EVFILT_READ && ev != EVFILT_WRITE) { 763 D("bad filter request %d", ev); 764 return 1; 765 } 766 error = devfs_get_cdevpriv((void**)&priv); 767 if (error) { 768 D("device not yet setup"); 769 return 1; 770 } 771 na = priv->np_na; 772 if (na == NULL) { 773 D("no netmap adapter for this file descriptor"); 774 return 1; 775 } 776 /* the si is indicated in the priv */ 777 si = (ev == EVFILT_WRITE) ? priv->np_txsi : priv->np_rxsi; 778 // XXX lock(priv) ? 779 kn->kn_fop = (ev == EVFILT_WRITE) ? 780 &netmap_wfiltops : &netmap_rfiltops; 781 kn->kn_hook = priv; 782 knlist_add(&si->si_note, kn, 1); 783 // XXX unlock(priv) 784 ND("register %p %s td %p priv %p kn %p np_nifp %p kn_fp/fpop %s", 785 na, na->ifp->if_xname, curthread, priv, kn, 786 priv->np_nifp, 787 kn->kn_fp == curthread->td_fpop ? "match" : "MISMATCH"); 788 return 0; 789 } 790 791 struct cdevsw netmap_cdevsw = { 792 .d_version = D_VERSION, 793 .d_name = "netmap", 794 .d_open = netmap_open, 795 .d_mmap_single = netmap_mmap_single, 796 .d_ioctl = netmap_ioctl, 797 .d_poll = netmap_poll, 798 .d_kqfilter = netmap_kqfilter, 799 .d_close = netmap_close, 800 }; 801 /*--- end of kqueue support ----*/ 802 803 /* 804 * Kernel entry point. 805 * 806 * Initialize/finalize the module and return. 807 * 808 * Return 0 on success, errno on failure. 809 */ 810 static int 811 netmap_loader(__unused struct module *module, int event, __unused void *arg) 812 { 813 int error = 0; 814 815 switch (event) { 816 case MOD_LOAD: 817 error = netmap_init(); 818 break; 819 820 case MOD_UNLOAD: 821 netmap_fini(); 822 break; 823 824 default: 825 error = EOPNOTSUPP; 826 break; 827 } 828 829 return (error); 830 } 831 832 833 DEV_MODULE(netmap, netmap_loader, NULL); 834