17c478bd9Sstevel@tonic-gate /* 27c478bd9Sstevel@tonic-gate * CDDL HEADER START 37c478bd9Sstevel@tonic-gate * 47c478bd9Sstevel@tonic-gate * The contents of this file are subject to the terms of the 55f684e24Ssp92102 * Common Development and Distribution License (the "License"). 65f684e24Ssp92102 * You may not use this file except in compliance with the License. 77c478bd9Sstevel@tonic-gate * 87c478bd9Sstevel@tonic-gate * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 97c478bd9Sstevel@tonic-gate * or http://www.opensolaris.org/os/licensing. 107c478bd9Sstevel@tonic-gate * See the License for the specific language governing permissions 117c478bd9Sstevel@tonic-gate * and limitations under the License. 127c478bd9Sstevel@tonic-gate * 137c478bd9Sstevel@tonic-gate * When distributing Covered Code, include this CDDL HEADER in each 147c478bd9Sstevel@tonic-gate * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 157c478bd9Sstevel@tonic-gate * If applicable, add the following below this CDDL HEADER, with the 167c478bd9Sstevel@tonic-gate * fields enclosed by brackets "[]" replaced with your own identifying 177c478bd9Sstevel@tonic-gate * information: Portions Copyright [yyyy] [name of copyright owner] 187c478bd9Sstevel@tonic-gate * 197c478bd9Sstevel@tonic-gate * CDDL HEADER END 207c478bd9Sstevel@tonic-gate */ 217c478bd9Sstevel@tonic-gate /* 22a85084caSmeem * Copyright 2008 Sun Microsystems, Inc. All rights reserved. 237c478bd9Sstevel@tonic-gate * Use is subject to license terms. 247c478bd9Sstevel@tonic-gate */ 257c478bd9Sstevel@tonic-gate 26cd1c8b85SMatthew Ahrens /* 27cd1c8b85SMatthew Ahrens * Copyright (c) 2012 by Delphix. All rights reserved. 28*a5eb7107SBryan Cantrill * Copyright (c) 2015, Joyent, Inc. All rights reserved. 29cd1c8b85SMatthew Ahrens */ 307c478bd9Sstevel@tonic-gate 317c478bd9Sstevel@tonic-gate #include <sys/types.h> 327c478bd9Sstevel@tonic-gate #include <sys/devops.h> 337c478bd9Sstevel@tonic-gate #include <sys/conf.h> 347c478bd9Sstevel@tonic-gate #include <sys/modctl.h> 357c478bd9Sstevel@tonic-gate #include <sys/sunddi.h> 367c478bd9Sstevel@tonic-gate #include <sys/stat.h> 377c478bd9Sstevel@tonic-gate #include <sys/poll_impl.h> 387c478bd9Sstevel@tonic-gate #include <sys/errno.h> 397c478bd9Sstevel@tonic-gate #include <sys/kmem.h> 407c478bd9Sstevel@tonic-gate #include <sys/mkdev.h> 417c478bd9Sstevel@tonic-gate #include <sys/debug.h> 427c478bd9Sstevel@tonic-gate #include <sys/file.h> 437c478bd9Sstevel@tonic-gate #include <sys/sysmacros.h> 447c478bd9Sstevel@tonic-gate #include <sys/systm.h> 457c478bd9Sstevel@tonic-gate #include <sys/bitmap.h> 467c478bd9Sstevel@tonic-gate #include <sys/devpoll.h> 477c478bd9Sstevel@tonic-gate #include <sys/rctl.h> 487c478bd9Sstevel@tonic-gate #include <sys/resource.h> 49*a5eb7107SBryan Cantrill #include <sys/schedctl.h> 50*a5eb7107SBryan Cantrill #include <sys/epoll.h> 517c478bd9Sstevel@tonic-gate 527c478bd9Sstevel@tonic-gate #define RESERVED 1 537c478bd9Sstevel@tonic-gate 547c478bd9Sstevel@tonic-gate /* local data struct */ 557c478bd9Sstevel@tonic-gate static dp_entry_t **devpolltbl; /* dev poll entries */ 567c478bd9Sstevel@tonic-gate static size_t dptblsize; 577c478bd9Sstevel@tonic-gate 587c478bd9Sstevel@tonic-gate static kmutex_t devpoll_lock; /* lock protecting dev tbl */ 597c478bd9Sstevel@tonic-gate int devpoll_init; /* is /dev/poll initialized already */ 607c478bd9Sstevel@tonic-gate 617c478bd9Sstevel@tonic-gate /* device local functions */ 627c478bd9Sstevel@tonic-gate 637c478bd9Sstevel@tonic-gate static int dpopen(dev_t *devp, int flag, int otyp, cred_t *credp); 647c478bd9Sstevel@tonic-gate static int dpwrite(dev_t dev, struct uio *uiop, cred_t *credp); 657c478bd9Sstevel@tonic-gate static int dpioctl(dev_t dev, int cmd, intptr_t arg, int mode, cred_t *credp, 667c478bd9Sstevel@tonic-gate int *rvalp); 677c478bd9Sstevel@tonic-gate static int dppoll(dev_t dev, short events, int anyyet, short *reventsp, 687c478bd9Sstevel@tonic-gate struct pollhead **phpp); 697c478bd9Sstevel@tonic-gate static int dpclose(dev_t dev, int flag, int otyp, cred_t *credp); 707c478bd9Sstevel@tonic-gate static dev_info_t *dpdevi; 717c478bd9Sstevel@tonic-gate 727c478bd9Sstevel@tonic-gate 737c478bd9Sstevel@tonic-gate static struct cb_ops dp_cb_ops = { 747c478bd9Sstevel@tonic-gate dpopen, /* open */ 757c478bd9Sstevel@tonic-gate dpclose, /* close */ 767c478bd9Sstevel@tonic-gate nodev, /* strategy */ 777c478bd9Sstevel@tonic-gate nodev, /* print */ 787c478bd9Sstevel@tonic-gate nodev, /* dump */ 797c478bd9Sstevel@tonic-gate nodev, /* read */ 807c478bd9Sstevel@tonic-gate dpwrite, /* write */ 817c478bd9Sstevel@tonic-gate dpioctl, /* ioctl */ 827c478bd9Sstevel@tonic-gate nodev, /* devmap */ 837c478bd9Sstevel@tonic-gate nodev, /* mmap */ 847c478bd9Sstevel@tonic-gate nodev, /* segmap */ 857c478bd9Sstevel@tonic-gate dppoll, /* poll */ 86a913d554Scth ddi_prop_op, /* prop_op */ 877c478bd9Sstevel@tonic-gate (struct streamtab *)0, /* streamtab */ 88a913d554Scth D_MP, /* flags */ 89a913d554Scth CB_REV, /* cb_ops revision */ 90a913d554Scth nodev, /* aread */ 91a913d554Scth nodev /* awrite */ 927c478bd9Sstevel@tonic-gate }; 937c478bd9Sstevel@tonic-gate 947c478bd9Sstevel@tonic-gate static int dpattach(dev_info_t *, ddi_attach_cmd_t); 957c478bd9Sstevel@tonic-gate static int dpdetach(dev_info_t *, ddi_detach_cmd_t); 967c478bd9Sstevel@tonic-gate static int dpinfo(dev_info_t *, ddi_info_cmd_t, void *, void **); 977c478bd9Sstevel@tonic-gate 987c478bd9Sstevel@tonic-gate static struct dev_ops dp_ops = { 997c478bd9Sstevel@tonic-gate DEVO_REV, /* devo_rev */ 1007c478bd9Sstevel@tonic-gate 0, /* refcnt */ 1017c478bd9Sstevel@tonic-gate dpinfo, /* info */ 1027c478bd9Sstevel@tonic-gate nulldev, /* identify */ 1037c478bd9Sstevel@tonic-gate nulldev, /* probe */ 1047c478bd9Sstevel@tonic-gate dpattach, /* attach */ 1057c478bd9Sstevel@tonic-gate dpdetach, /* detach */ 1067c478bd9Sstevel@tonic-gate nodev, /* reset */ 1077c478bd9Sstevel@tonic-gate &dp_cb_ops, /* driver operations */ 1087c478bd9Sstevel@tonic-gate (struct bus_ops *)NULL, /* bus operations */ 10919397407SSherry Moore nulldev, /* power */ 11019397407SSherry Moore ddi_quiesce_not_needed, /* quiesce */ 1117c478bd9Sstevel@tonic-gate }; 1127c478bd9Sstevel@tonic-gate 1137c478bd9Sstevel@tonic-gate 1147c478bd9Sstevel@tonic-gate static struct modldrv modldrv = { 1157c478bd9Sstevel@tonic-gate &mod_driverops, /* type of module - a driver */ 116a85084caSmeem "/dev/poll driver", 1177c478bd9Sstevel@tonic-gate &dp_ops, 1187c478bd9Sstevel@tonic-gate }; 1197c478bd9Sstevel@tonic-gate 1207c478bd9Sstevel@tonic-gate static struct modlinkage modlinkage = { 1217c478bd9Sstevel@tonic-gate MODREV_1, 1227c478bd9Sstevel@tonic-gate (void *)&modldrv, 1237c478bd9Sstevel@tonic-gate NULL 1247c478bd9Sstevel@tonic-gate }; 1257c478bd9Sstevel@tonic-gate 1267c478bd9Sstevel@tonic-gate /* 1277c478bd9Sstevel@tonic-gate * Locking Design 1287c478bd9Sstevel@tonic-gate * 1297c478bd9Sstevel@tonic-gate * The /dev/poll driver shares most of its code with poll sys call whose 1307c478bd9Sstevel@tonic-gate * code is in common/syscall/poll.c. In poll(2) design, the pollcache 1317c478bd9Sstevel@tonic-gate * structure is per lwp. An implicit assumption is made there that some 1327c478bd9Sstevel@tonic-gate * portion of pollcache will never be touched by other lwps. E.g., in 1337c478bd9Sstevel@tonic-gate * poll(2) design, no lwp will ever need to grow bitmap of other lwp. 1347c478bd9Sstevel@tonic-gate * This assumption is not true for /dev/poll; hence the need for extra 1357c478bd9Sstevel@tonic-gate * locking. 1367c478bd9Sstevel@tonic-gate * 137da6c28aaSamw * To allow more parallelism, each /dev/poll file descriptor (indexed by 1387c478bd9Sstevel@tonic-gate * minor number) has its own lock. Since read (dpioctl) is a much more 1397c478bd9Sstevel@tonic-gate * frequent operation than write, we want to allow multiple reads on same 1407c478bd9Sstevel@tonic-gate * /dev/poll fd. However, we prevent writes from being starved by giving 1417c478bd9Sstevel@tonic-gate * priority to write operation. Theoretically writes can starve reads as 142da6c28aaSamw * well. But in practical sense this is not important because (1) writes 1437c478bd9Sstevel@tonic-gate * happens less often than reads, and (2) write operation defines the 1447c478bd9Sstevel@tonic-gate * content of poll fd a cache set. If writes happens so often that they 1457c478bd9Sstevel@tonic-gate * can starve reads, that means the cached set is very unstable. It may 1467c478bd9Sstevel@tonic-gate * not make sense to read an unstable cache set anyway. Therefore, the 1477c478bd9Sstevel@tonic-gate * writers starving readers case is not handled in this design. 1487c478bd9Sstevel@tonic-gate */ 1497c478bd9Sstevel@tonic-gate 1507c478bd9Sstevel@tonic-gate int 1517c478bd9Sstevel@tonic-gate _init() 1527c478bd9Sstevel@tonic-gate { 1537c478bd9Sstevel@tonic-gate int error; 1547c478bd9Sstevel@tonic-gate 1557c478bd9Sstevel@tonic-gate dptblsize = DEVPOLLSIZE; 1567c478bd9Sstevel@tonic-gate devpolltbl = kmem_zalloc(sizeof (caddr_t) * dptblsize, KM_SLEEP); 1577c478bd9Sstevel@tonic-gate mutex_init(&devpoll_lock, NULL, MUTEX_DEFAULT, NULL); 1587c478bd9Sstevel@tonic-gate devpoll_init = 1; 1597c478bd9Sstevel@tonic-gate if ((error = mod_install(&modlinkage)) != 0) { 1607c478bd9Sstevel@tonic-gate mutex_destroy(&devpoll_lock); 1617c478bd9Sstevel@tonic-gate kmem_free(devpolltbl, sizeof (caddr_t) * dptblsize); 1627c478bd9Sstevel@tonic-gate devpoll_init = 0; 1637c478bd9Sstevel@tonic-gate } 1647c478bd9Sstevel@tonic-gate return (error); 1657c478bd9Sstevel@tonic-gate } 1667c478bd9Sstevel@tonic-gate 1677c478bd9Sstevel@tonic-gate int 1687c478bd9Sstevel@tonic-gate _fini() 1697c478bd9Sstevel@tonic-gate { 1707c478bd9Sstevel@tonic-gate int error; 1717c478bd9Sstevel@tonic-gate 1727c478bd9Sstevel@tonic-gate if ((error = mod_remove(&modlinkage)) != 0) { 1737c478bd9Sstevel@tonic-gate return (error); 1747c478bd9Sstevel@tonic-gate } 1757c478bd9Sstevel@tonic-gate mutex_destroy(&devpoll_lock); 1767c478bd9Sstevel@tonic-gate kmem_free(devpolltbl, sizeof (caddr_t) * dptblsize); 1777c478bd9Sstevel@tonic-gate return (0); 1787c478bd9Sstevel@tonic-gate } 1797c478bd9Sstevel@tonic-gate 1807c478bd9Sstevel@tonic-gate int 1817c478bd9Sstevel@tonic-gate _info(struct modinfo *modinfop) 1827c478bd9Sstevel@tonic-gate { 1837c478bd9Sstevel@tonic-gate return (mod_info(&modlinkage, modinfop)); 1847c478bd9Sstevel@tonic-gate } 1857c478bd9Sstevel@tonic-gate 1867c478bd9Sstevel@tonic-gate /*ARGSUSED*/ 1877c478bd9Sstevel@tonic-gate static int 1887c478bd9Sstevel@tonic-gate dpattach(dev_info_t *devi, ddi_attach_cmd_t cmd) 1897c478bd9Sstevel@tonic-gate { 1907c478bd9Sstevel@tonic-gate if (ddi_create_minor_node(devi, "poll", S_IFCHR, 0, DDI_PSEUDO, NULL) 1917c478bd9Sstevel@tonic-gate == DDI_FAILURE) { 1927c478bd9Sstevel@tonic-gate ddi_remove_minor_node(devi, NULL); 1937c478bd9Sstevel@tonic-gate return (DDI_FAILURE); 1947c478bd9Sstevel@tonic-gate } 1957c478bd9Sstevel@tonic-gate dpdevi = devi; 1967c478bd9Sstevel@tonic-gate return (DDI_SUCCESS); 1977c478bd9Sstevel@tonic-gate } 1987c478bd9Sstevel@tonic-gate 1997c478bd9Sstevel@tonic-gate static int 2007c478bd9Sstevel@tonic-gate dpdetach(dev_info_t *devi, ddi_detach_cmd_t cmd) 2017c478bd9Sstevel@tonic-gate { 2027c478bd9Sstevel@tonic-gate if (cmd != DDI_DETACH) 2037c478bd9Sstevel@tonic-gate return (DDI_FAILURE); 2047c478bd9Sstevel@tonic-gate 2057c478bd9Sstevel@tonic-gate ddi_remove_minor_node(devi, NULL); 2067c478bd9Sstevel@tonic-gate return (DDI_SUCCESS); 2077c478bd9Sstevel@tonic-gate } 2087c478bd9Sstevel@tonic-gate 2097c478bd9Sstevel@tonic-gate /* ARGSUSED */ 2107c478bd9Sstevel@tonic-gate static int 2117c478bd9Sstevel@tonic-gate dpinfo(dev_info_t *dip, ddi_info_cmd_t infocmd, void *arg, void **result) 2127c478bd9Sstevel@tonic-gate { 2137c478bd9Sstevel@tonic-gate int error; 2147c478bd9Sstevel@tonic-gate 2157c478bd9Sstevel@tonic-gate switch (infocmd) { 2167c478bd9Sstevel@tonic-gate case DDI_INFO_DEVT2DEVINFO: 2177c478bd9Sstevel@tonic-gate *result = (void *)dpdevi; 2187c478bd9Sstevel@tonic-gate error = DDI_SUCCESS; 2197c478bd9Sstevel@tonic-gate break; 2207c478bd9Sstevel@tonic-gate case DDI_INFO_DEVT2INSTANCE: 2217c478bd9Sstevel@tonic-gate *result = (void *)0; 2227c478bd9Sstevel@tonic-gate error = DDI_SUCCESS; 2237c478bd9Sstevel@tonic-gate break; 2247c478bd9Sstevel@tonic-gate default: 2257c478bd9Sstevel@tonic-gate error = DDI_FAILURE; 2267c478bd9Sstevel@tonic-gate } 2277c478bd9Sstevel@tonic-gate return (error); 2287c478bd9Sstevel@tonic-gate } 2297c478bd9Sstevel@tonic-gate 2307c478bd9Sstevel@tonic-gate /* 2317c478bd9Sstevel@tonic-gate * dp_pcache_poll has similar logic to pcache_poll() in poll.c. The major 2327c478bd9Sstevel@tonic-gate * differences are: (1) /dev/poll requires scanning the bitmap starting at 2337c478bd9Sstevel@tonic-gate * where it was stopped last time, instead of always starting from 0, 2347c478bd9Sstevel@tonic-gate * (2) since user may not have cleaned up the cached fds when they are 2357c478bd9Sstevel@tonic-gate * closed, some polldats in cache may refer to closed or reused fds. We 2367c478bd9Sstevel@tonic-gate * need to check for those cases. 2377c478bd9Sstevel@tonic-gate * 2387c478bd9Sstevel@tonic-gate * NOTE: Upon closing an fd, automatic poll cache cleanup is done for 2397c478bd9Sstevel@tonic-gate * poll(2) caches but NOT for /dev/poll caches. So expect some 2407c478bd9Sstevel@tonic-gate * stale entries! 2417c478bd9Sstevel@tonic-gate */ 2427c478bd9Sstevel@tonic-gate static int 243*a5eb7107SBryan Cantrill dp_pcache_poll(dp_entry_t *dpep, void *dpbuf, 244*a5eb7107SBryan Cantrill pollcache_t *pcp, nfds_t nfds, int *fdcntp) 2457c478bd9Sstevel@tonic-gate { 2467c478bd9Sstevel@tonic-gate int start, ostart, end; 2477c478bd9Sstevel@tonic-gate int fdcnt, fd; 2487c478bd9Sstevel@tonic-gate boolean_t done; 2497c478bd9Sstevel@tonic-gate file_t *fp; 2507c478bd9Sstevel@tonic-gate short revent; 2517c478bd9Sstevel@tonic-gate boolean_t no_wrap; 2527c478bd9Sstevel@tonic-gate pollhead_t *php; 2537c478bd9Sstevel@tonic-gate polldat_t *pdp; 254*a5eb7107SBryan Cantrill pollfd_t *pfdp; 255*a5eb7107SBryan Cantrill epoll_event_t *epoll; 2567c478bd9Sstevel@tonic-gate int error = 0; 257*a5eb7107SBryan Cantrill short mask = POLLRDHUP | POLLWRBAND; 2587c478bd9Sstevel@tonic-gate 2597c478bd9Sstevel@tonic-gate ASSERT(MUTEX_HELD(&pcp->pc_lock)); 2607c478bd9Sstevel@tonic-gate if (pcp->pc_bitmap == NULL) { 2617c478bd9Sstevel@tonic-gate /* 2627c478bd9Sstevel@tonic-gate * No Need to search because no poll fd 2637c478bd9Sstevel@tonic-gate * has been cached. 2647c478bd9Sstevel@tonic-gate */ 2657c478bd9Sstevel@tonic-gate return (error); 2667c478bd9Sstevel@tonic-gate } 267*a5eb7107SBryan Cantrill 268*a5eb7107SBryan Cantrill if (dpep->dpe_flag & DP_ISEPOLLCOMPAT) { 269*a5eb7107SBryan Cantrill pfdp = NULL; 270*a5eb7107SBryan Cantrill epoll = (epoll_event_t *)dpbuf; 271*a5eb7107SBryan Cantrill } else { 272*a5eb7107SBryan Cantrill pfdp = (pollfd_t *)dpbuf; 273*a5eb7107SBryan Cantrill epoll = NULL; 274*a5eb7107SBryan Cantrill } 2757c478bd9Sstevel@tonic-gate retry: 2767c478bd9Sstevel@tonic-gate start = ostart = pcp->pc_mapstart; 2777c478bd9Sstevel@tonic-gate end = pcp->pc_mapend; 2787c478bd9Sstevel@tonic-gate php = NULL; 2797c478bd9Sstevel@tonic-gate 2807c478bd9Sstevel@tonic-gate if (start == 0) { 2817c478bd9Sstevel@tonic-gate /* 2827c478bd9Sstevel@tonic-gate * started from every begining, no need to wrap around. 2837c478bd9Sstevel@tonic-gate */ 2847c478bd9Sstevel@tonic-gate no_wrap = B_TRUE; 2857c478bd9Sstevel@tonic-gate } else { 2867c478bd9Sstevel@tonic-gate no_wrap = B_FALSE; 2877c478bd9Sstevel@tonic-gate } 2887c478bd9Sstevel@tonic-gate done = B_FALSE; 2897c478bd9Sstevel@tonic-gate fdcnt = 0; 2907c478bd9Sstevel@tonic-gate while ((fdcnt < nfds) && !done) { 2917c478bd9Sstevel@tonic-gate php = NULL; 2927c478bd9Sstevel@tonic-gate revent = 0; 2937c478bd9Sstevel@tonic-gate /* 2947c478bd9Sstevel@tonic-gate * Examine the bit map in a circular fashion 2957c478bd9Sstevel@tonic-gate * to avoid starvation. Always resume from 2967c478bd9Sstevel@tonic-gate * last stop. Scan till end of the map. Then 2977c478bd9Sstevel@tonic-gate * wrap around. 2987c478bd9Sstevel@tonic-gate */ 2997c478bd9Sstevel@tonic-gate fd = bt_getlowbit(pcp->pc_bitmap, start, end); 3007c478bd9Sstevel@tonic-gate ASSERT(fd <= end); 3017c478bd9Sstevel@tonic-gate if (fd >= 0) { 3027c478bd9Sstevel@tonic-gate if (fd == end) { 3037c478bd9Sstevel@tonic-gate if (no_wrap) { 3047c478bd9Sstevel@tonic-gate done = B_TRUE; 3057c478bd9Sstevel@tonic-gate } else { 3067c478bd9Sstevel@tonic-gate start = 0; 3077c478bd9Sstevel@tonic-gate end = ostart - 1; 3087c478bd9Sstevel@tonic-gate no_wrap = B_TRUE; 3097c478bd9Sstevel@tonic-gate } 3107c478bd9Sstevel@tonic-gate } else { 3117c478bd9Sstevel@tonic-gate start = fd + 1; 3127c478bd9Sstevel@tonic-gate } 3137c478bd9Sstevel@tonic-gate pdp = pcache_lookup_fd(pcp, fd); 314a85084caSmeem repoll: 3157c478bd9Sstevel@tonic-gate ASSERT(pdp != NULL); 3167c478bd9Sstevel@tonic-gate ASSERT(pdp->pd_fd == fd); 3177c478bd9Sstevel@tonic-gate if (pdp->pd_fp == NULL) { 3187c478bd9Sstevel@tonic-gate /* 3197c478bd9Sstevel@tonic-gate * The fd is POLLREMOVed. This fd is 3207c478bd9Sstevel@tonic-gate * logically no longer cached. So move 3217c478bd9Sstevel@tonic-gate * on to the next one. 3227c478bd9Sstevel@tonic-gate */ 3237c478bd9Sstevel@tonic-gate continue; 3247c478bd9Sstevel@tonic-gate } 3257c478bd9Sstevel@tonic-gate if ((fp = getf(fd)) == NULL) { 3267c478bd9Sstevel@tonic-gate /* 3277c478bd9Sstevel@tonic-gate * The fd has been closed, but user has not 3287c478bd9Sstevel@tonic-gate * done a POLLREMOVE on this fd yet. Instead 3297c478bd9Sstevel@tonic-gate * of cleaning it here implicitly, we return 3307c478bd9Sstevel@tonic-gate * POLLNVAL. This is consistent with poll(2) 3317c478bd9Sstevel@tonic-gate * polling a closed fd. Hope this will remind 3327c478bd9Sstevel@tonic-gate * user to do a POLLREMOVE. 3337c478bd9Sstevel@tonic-gate */ 334*a5eb7107SBryan Cantrill if (pfdp != NULL) { 3357c478bd9Sstevel@tonic-gate pfdp[fdcnt].fd = fd; 3367c478bd9Sstevel@tonic-gate pfdp[fdcnt].revents = POLLNVAL; 3377c478bd9Sstevel@tonic-gate fdcnt++; 3387c478bd9Sstevel@tonic-gate continue; 3397c478bd9Sstevel@tonic-gate } 340*a5eb7107SBryan Cantrill 341*a5eb7107SBryan Cantrill /* 342*a5eb7107SBryan Cantrill * In the epoll compatibility case, we actually 343*a5eb7107SBryan Cantrill * perform the implicit removal to remain 344*a5eb7107SBryan Cantrill * closer to the epoll semantics. 345*a5eb7107SBryan Cantrill */ 346*a5eb7107SBryan Cantrill ASSERT(epoll != NULL); 347*a5eb7107SBryan Cantrill 348*a5eb7107SBryan Cantrill pdp->pd_fp = NULL; 349*a5eb7107SBryan Cantrill pdp->pd_events = 0; 350*a5eb7107SBryan Cantrill 351*a5eb7107SBryan Cantrill if (php != NULL) { 352*a5eb7107SBryan Cantrill pollhead_delete(php, pdp); 353*a5eb7107SBryan Cantrill pdp->pd_php = NULL; 354*a5eb7107SBryan Cantrill } 355*a5eb7107SBryan Cantrill 356*a5eb7107SBryan Cantrill BT_CLEAR(pcp->pc_bitmap, fd); 357*a5eb7107SBryan Cantrill continue; 358*a5eb7107SBryan Cantrill } 359*a5eb7107SBryan Cantrill 3607c478bd9Sstevel@tonic-gate if (fp != pdp->pd_fp) { 3617c478bd9Sstevel@tonic-gate /* 3627c478bd9Sstevel@tonic-gate * user is polling on a cached fd which was 3637c478bd9Sstevel@tonic-gate * closed and then reused. Unfortunately 3647c478bd9Sstevel@tonic-gate * there is no good way to inform user. 3657c478bd9Sstevel@tonic-gate * If the file struct is also reused, we 3667c478bd9Sstevel@tonic-gate * may not be able to detect the fd reuse 3677c478bd9Sstevel@tonic-gate * at all. As long as this does not 3687c478bd9Sstevel@tonic-gate * cause system failure and/or memory leak, 3697c478bd9Sstevel@tonic-gate * we will play along. Man page states if 3707c478bd9Sstevel@tonic-gate * user does not clean up closed fds, polling 3717c478bd9Sstevel@tonic-gate * results will be indeterministic. 3727c478bd9Sstevel@tonic-gate * 3737c478bd9Sstevel@tonic-gate * XXX - perhaps log the detection of fd 3747c478bd9Sstevel@tonic-gate * reuse? 3757c478bd9Sstevel@tonic-gate */ 3767c478bd9Sstevel@tonic-gate pdp->pd_fp = fp; 3777c478bd9Sstevel@tonic-gate } 3787c478bd9Sstevel@tonic-gate /* 3797c478bd9Sstevel@tonic-gate * XXX - pollrelock() logic needs to know which 3807c478bd9Sstevel@tonic-gate * which pollcache lock to grab. It'd be a 3817c478bd9Sstevel@tonic-gate * cleaner solution if we could pass pcp as 3827c478bd9Sstevel@tonic-gate * an arguement in VOP_POLL interface instead 3837c478bd9Sstevel@tonic-gate * of implicitly passing it using thread_t 3847c478bd9Sstevel@tonic-gate * struct. On the other hand, changing VOP_POLL 3857c478bd9Sstevel@tonic-gate * interface will require all driver/file system 3867c478bd9Sstevel@tonic-gate * poll routine to change. May want to revisit 3877c478bd9Sstevel@tonic-gate * the tradeoff later. 3887c478bd9Sstevel@tonic-gate */ 3897c478bd9Sstevel@tonic-gate curthread->t_pollcache = pcp; 3907c478bd9Sstevel@tonic-gate error = VOP_POLL(fp->f_vnode, pdp->pd_events, 0, 391da6c28aaSamw &revent, &php, NULL); 3927c478bd9Sstevel@tonic-gate curthread->t_pollcache = NULL; 3937c478bd9Sstevel@tonic-gate releasef(fd); 3947c478bd9Sstevel@tonic-gate if (error != 0) { 3957c478bd9Sstevel@tonic-gate break; 3967c478bd9Sstevel@tonic-gate } 3977c478bd9Sstevel@tonic-gate /* 3987c478bd9Sstevel@tonic-gate * layered devices (e.g. console driver) 3997c478bd9Sstevel@tonic-gate * may change the vnode and thus the pollhead 4007c478bd9Sstevel@tonic-gate * pointer out from underneath us. 4017c478bd9Sstevel@tonic-gate */ 4027c478bd9Sstevel@tonic-gate if (php != NULL && pdp->pd_php != NULL && 4037c478bd9Sstevel@tonic-gate php != pdp->pd_php) { 4047c478bd9Sstevel@tonic-gate pollhead_delete(pdp->pd_php, pdp); 4057c478bd9Sstevel@tonic-gate pdp->pd_php = php; 4067c478bd9Sstevel@tonic-gate pollhead_insert(php, pdp); 4077c478bd9Sstevel@tonic-gate /* 4087c478bd9Sstevel@tonic-gate * The bit should still be set. 4097c478bd9Sstevel@tonic-gate */ 4107c478bd9Sstevel@tonic-gate ASSERT(BT_TEST(pcp->pc_bitmap, fd)); 4117c478bd9Sstevel@tonic-gate goto retry; 4127c478bd9Sstevel@tonic-gate } 4137c478bd9Sstevel@tonic-gate 4147c478bd9Sstevel@tonic-gate if (revent != 0) { 415*a5eb7107SBryan Cantrill if (pfdp != NULL) { 4167c478bd9Sstevel@tonic-gate pfdp[fdcnt].fd = fd; 4177c478bd9Sstevel@tonic-gate pfdp[fdcnt].events = pdp->pd_events; 4187c478bd9Sstevel@tonic-gate pfdp[fdcnt].revents = revent; 419*a5eb7107SBryan Cantrill } else { 420*a5eb7107SBryan Cantrill epoll_event_t *ep = &epoll[fdcnt]; 421*a5eb7107SBryan Cantrill 422*a5eb7107SBryan Cantrill ASSERT(epoll != NULL); 423*a5eb7107SBryan Cantrill ep->data.u64 = pdp->pd_epolldata; 424*a5eb7107SBryan Cantrill 425*a5eb7107SBryan Cantrill /* 426*a5eb7107SBryan Cantrill * If any of the event bits are set for 427*a5eb7107SBryan Cantrill * which poll and epoll representations 428*a5eb7107SBryan Cantrill * differ, swizzle in the native epoll 429*a5eb7107SBryan Cantrill * values. 430*a5eb7107SBryan Cantrill */ 431*a5eb7107SBryan Cantrill if (revent & mask) { 432*a5eb7107SBryan Cantrill ep->events = (revent & ~mask) | 433*a5eb7107SBryan Cantrill ((revent & POLLRDHUP) ? 434*a5eb7107SBryan Cantrill EPOLLRDHUP : 0) | 435*a5eb7107SBryan Cantrill ((revent & POLLWRBAND) ? 436*a5eb7107SBryan Cantrill EPOLLWRBAND : 0); 437*a5eb7107SBryan Cantrill } else { 438*a5eb7107SBryan Cantrill ep->events = revent; 439*a5eb7107SBryan Cantrill } 440*a5eb7107SBryan Cantrill 441*a5eb7107SBryan Cantrill /* 442*a5eb7107SBryan Cantrill * We define POLLWRNORM to be POLLOUT, 443*a5eb7107SBryan Cantrill * but epoll has separate definitions 444*a5eb7107SBryan Cantrill * for them; if POLLOUT is set and the 445*a5eb7107SBryan Cantrill * user has asked for EPOLLWRNORM, set 446*a5eb7107SBryan Cantrill * that as well. 447*a5eb7107SBryan Cantrill */ 448*a5eb7107SBryan Cantrill if ((revent & POLLOUT) && 449*a5eb7107SBryan Cantrill (pdp->pd_events & EPOLLWRNORM)) { 450*a5eb7107SBryan Cantrill ep->events |= EPOLLWRNORM; 451*a5eb7107SBryan Cantrill } 452*a5eb7107SBryan Cantrill } 453*a5eb7107SBryan Cantrill 454*a5eb7107SBryan Cantrill /* 455*a5eb7107SBryan Cantrill * If POLLET is set, clear the bit in the 456*a5eb7107SBryan Cantrill * bitmap -- which effectively latches the 457*a5eb7107SBryan Cantrill * edge on a pollwakeup() from the driver. 458*a5eb7107SBryan Cantrill */ 459*a5eb7107SBryan Cantrill if (pdp->pd_events & POLLET) 460*a5eb7107SBryan Cantrill BT_CLEAR(pcp->pc_bitmap, fd); 461*a5eb7107SBryan Cantrill 462*a5eb7107SBryan Cantrill /* 463*a5eb7107SBryan Cantrill * If POLLONESHOT is set, perform the implicit 464*a5eb7107SBryan Cantrill * POLLREMOVE. 465*a5eb7107SBryan Cantrill */ 466*a5eb7107SBryan Cantrill if (pdp->pd_events & POLLONESHOT) { 467*a5eb7107SBryan Cantrill pdp->pd_fp = NULL; 468*a5eb7107SBryan Cantrill pdp->pd_events = 0; 469*a5eb7107SBryan Cantrill 470*a5eb7107SBryan Cantrill if (php != NULL) { 471*a5eb7107SBryan Cantrill pollhead_delete(php, pdp); 472*a5eb7107SBryan Cantrill pdp->pd_php = NULL; 473*a5eb7107SBryan Cantrill } 474*a5eb7107SBryan Cantrill 475*a5eb7107SBryan Cantrill BT_CLEAR(pcp->pc_bitmap, fd); 476*a5eb7107SBryan Cantrill } 477*a5eb7107SBryan Cantrill 4787c478bd9Sstevel@tonic-gate fdcnt++; 4797c478bd9Sstevel@tonic-gate } else if (php != NULL) { 4807c478bd9Sstevel@tonic-gate /* 4817c478bd9Sstevel@tonic-gate * We clear a bit or cache a poll fd if 4827c478bd9Sstevel@tonic-gate * the driver returns a poll head ptr, 4837c478bd9Sstevel@tonic-gate * which is expected in the case of 0 4847c478bd9Sstevel@tonic-gate * revents. Some buggy driver may return 4857c478bd9Sstevel@tonic-gate * NULL php pointer with 0 revents. In 4867c478bd9Sstevel@tonic-gate * this case, we just treat the driver as 4877c478bd9Sstevel@tonic-gate * "noncachable" and not clearing the bit 4887c478bd9Sstevel@tonic-gate * in bitmap. 4897c478bd9Sstevel@tonic-gate */ 4907c478bd9Sstevel@tonic-gate if ((pdp->pd_php != NULL) && 491*a5eb7107SBryan Cantrill ((pcp->pc_flag & PC_POLLWAKE) == 0)) { 4927c478bd9Sstevel@tonic-gate BT_CLEAR(pcp->pc_bitmap, fd); 4937c478bd9Sstevel@tonic-gate } 4947c478bd9Sstevel@tonic-gate if (pdp->pd_php == NULL) { 4957c478bd9Sstevel@tonic-gate pollhead_insert(php, pdp); 4967c478bd9Sstevel@tonic-gate pdp->pd_php = php; 497a85084caSmeem /* 498a85084caSmeem * An event of interest may have 499a85084caSmeem * arrived between the VOP_POLL() and 500a85084caSmeem * the pollhead_insert(); check again. 501a85084caSmeem */ 502a85084caSmeem goto repoll; 5037c478bd9Sstevel@tonic-gate } 5047c478bd9Sstevel@tonic-gate } 5057c478bd9Sstevel@tonic-gate } else { 5067c478bd9Sstevel@tonic-gate /* 5077c478bd9Sstevel@tonic-gate * No bit set in the range. Check for wrap around. 5087c478bd9Sstevel@tonic-gate */ 5097c478bd9Sstevel@tonic-gate if (!no_wrap) { 5107c478bd9Sstevel@tonic-gate start = 0; 5117c478bd9Sstevel@tonic-gate end = ostart - 1; 5127c478bd9Sstevel@tonic-gate no_wrap = B_TRUE; 5137c478bd9Sstevel@tonic-gate } else { 5147c478bd9Sstevel@tonic-gate done = B_TRUE; 5157c478bd9Sstevel@tonic-gate } 5167c478bd9Sstevel@tonic-gate } 5177c478bd9Sstevel@tonic-gate } 5187c478bd9Sstevel@tonic-gate 5197c478bd9Sstevel@tonic-gate if (!done) { 5207c478bd9Sstevel@tonic-gate pcp->pc_mapstart = start; 5217c478bd9Sstevel@tonic-gate } 5227c478bd9Sstevel@tonic-gate ASSERT(*fdcntp == 0); 5237c478bd9Sstevel@tonic-gate *fdcntp = fdcnt; 5247c478bd9Sstevel@tonic-gate return (error); 5257c478bd9Sstevel@tonic-gate } 5267c478bd9Sstevel@tonic-gate 5277c478bd9Sstevel@tonic-gate /*ARGSUSED*/ 5287c478bd9Sstevel@tonic-gate static int 5297c478bd9Sstevel@tonic-gate dpopen(dev_t *devp, int flag, int otyp, cred_t *credp) 5307c478bd9Sstevel@tonic-gate { 5317c478bd9Sstevel@tonic-gate minor_t minordev; 5327c478bd9Sstevel@tonic-gate dp_entry_t *dpep; 5337c478bd9Sstevel@tonic-gate pollcache_t *pcp; 5347c478bd9Sstevel@tonic-gate 5357c478bd9Sstevel@tonic-gate ASSERT(devpoll_init); 5367c478bd9Sstevel@tonic-gate ASSERT(dptblsize <= MAXMIN); 5377c478bd9Sstevel@tonic-gate mutex_enter(&devpoll_lock); 5387c478bd9Sstevel@tonic-gate for (minordev = 0; minordev < dptblsize; minordev++) { 5397c478bd9Sstevel@tonic-gate if (devpolltbl[minordev] == NULL) { 5407c478bd9Sstevel@tonic-gate devpolltbl[minordev] = (dp_entry_t *)RESERVED; 5417c478bd9Sstevel@tonic-gate break; 5427c478bd9Sstevel@tonic-gate } 5437c478bd9Sstevel@tonic-gate } 5447c478bd9Sstevel@tonic-gate if (minordev == dptblsize) { 5457c478bd9Sstevel@tonic-gate dp_entry_t **newtbl; 5467c478bd9Sstevel@tonic-gate size_t oldsize; 5477c478bd9Sstevel@tonic-gate 5487c478bd9Sstevel@tonic-gate /* 5497c478bd9Sstevel@tonic-gate * Used up every entry in the existing devpoll table. 5507c478bd9Sstevel@tonic-gate * Grow the table by DEVPOLLSIZE. 5517c478bd9Sstevel@tonic-gate */ 5527c478bd9Sstevel@tonic-gate if ((oldsize = dptblsize) >= MAXMIN) { 5537c478bd9Sstevel@tonic-gate mutex_exit(&devpoll_lock); 5547c478bd9Sstevel@tonic-gate return (ENXIO); 5557c478bd9Sstevel@tonic-gate } 5567c478bd9Sstevel@tonic-gate dptblsize += DEVPOLLSIZE; 5577c478bd9Sstevel@tonic-gate if (dptblsize > MAXMIN) { 5587c478bd9Sstevel@tonic-gate dptblsize = MAXMIN; 5597c478bd9Sstevel@tonic-gate } 5607c478bd9Sstevel@tonic-gate newtbl = kmem_zalloc(sizeof (caddr_t) * dptblsize, KM_SLEEP); 5617c478bd9Sstevel@tonic-gate bcopy(devpolltbl, newtbl, sizeof (caddr_t) * oldsize); 5627c478bd9Sstevel@tonic-gate kmem_free(devpolltbl, sizeof (caddr_t) * oldsize); 5637c478bd9Sstevel@tonic-gate devpolltbl = newtbl; 5647c478bd9Sstevel@tonic-gate devpolltbl[minordev] = (dp_entry_t *)RESERVED; 5657c478bd9Sstevel@tonic-gate } 5667c478bd9Sstevel@tonic-gate mutex_exit(&devpoll_lock); 5677c478bd9Sstevel@tonic-gate 5687c478bd9Sstevel@tonic-gate dpep = kmem_zalloc(sizeof (dp_entry_t), KM_SLEEP); 5697c478bd9Sstevel@tonic-gate /* 5707c478bd9Sstevel@tonic-gate * allocate a pollcache skeleton here. Delay allocating bitmap 5717c478bd9Sstevel@tonic-gate * structures until dpwrite() time, since we don't know the 572*a5eb7107SBryan Cantrill * optimal size yet. We also delay setting the pid until either 573*a5eb7107SBryan Cantrill * dpwrite() or attempt to poll on the instance, allowing parents 574*a5eb7107SBryan Cantrill * to create instances of /dev/poll for their children. (In the 575*a5eb7107SBryan Cantrill * epoll compatibility case, this check isn't performed to maintain 576*a5eb7107SBryan Cantrill * semantic compatibility.) 5777c478bd9Sstevel@tonic-gate */ 5787c478bd9Sstevel@tonic-gate pcp = pcache_alloc(); 5797c478bd9Sstevel@tonic-gate dpep->dpe_pcache = pcp; 580*a5eb7107SBryan Cantrill pcp->pc_pid = -1; 5817c478bd9Sstevel@tonic-gate *devp = makedevice(getmajor(*devp), minordev); /* clone the driver */ 5827c478bd9Sstevel@tonic-gate mutex_enter(&devpoll_lock); 5837c478bd9Sstevel@tonic-gate ASSERT(minordev < dptblsize); 5847c478bd9Sstevel@tonic-gate ASSERT(devpolltbl[minordev] == (dp_entry_t *)RESERVED); 5857c478bd9Sstevel@tonic-gate devpolltbl[minordev] = dpep; 5867c478bd9Sstevel@tonic-gate mutex_exit(&devpoll_lock); 5877c478bd9Sstevel@tonic-gate return (0); 5887c478bd9Sstevel@tonic-gate } 5897c478bd9Sstevel@tonic-gate 5907c478bd9Sstevel@tonic-gate /* 5917c478bd9Sstevel@tonic-gate * Write to dev/poll add/remove fd's to/from a cached poll fd set, 5927c478bd9Sstevel@tonic-gate * or change poll events for a watched fd. 5937c478bd9Sstevel@tonic-gate */ 5947c478bd9Sstevel@tonic-gate /*ARGSUSED*/ 5957c478bd9Sstevel@tonic-gate static int 5967c478bd9Sstevel@tonic-gate dpwrite(dev_t dev, struct uio *uiop, cred_t *credp) 5977c478bd9Sstevel@tonic-gate { 5987c478bd9Sstevel@tonic-gate minor_t minor; 5997c478bd9Sstevel@tonic-gate dp_entry_t *dpep; 6007c478bd9Sstevel@tonic-gate pollcache_t *pcp; 6017c478bd9Sstevel@tonic-gate pollfd_t *pollfdp, *pfdp; 602*a5eb7107SBryan Cantrill dvpoll_epollfd_t *epfdp; 603*a5eb7107SBryan Cantrill uintptr_t limit; 604*a5eb7107SBryan Cantrill int error, size; 6057c478bd9Sstevel@tonic-gate ssize_t uiosize; 6067c478bd9Sstevel@tonic-gate nfds_t pollfdnum; 6077c478bd9Sstevel@tonic-gate struct pollhead *php = NULL; 6087c478bd9Sstevel@tonic-gate polldat_t *pdp; 6097c478bd9Sstevel@tonic-gate int fd; 6107c478bd9Sstevel@tonic-gate file_t *fp; 6117c478bd9Sstevel@tonic-gate 6127c478bd9Sstevel@tonic-gate minor = getminor(dev); 6137c478bd9Sstevel@tonic-gate 6147c478bd9Sstevel@tonic-gate mutex_enter(&devpoll_lock); 6157c478bd9Sstevel@tonic-gate ASSERT(minor < dptblsize); 6167c478bd9Sstevel@tonic-gate dpep = devpolltbl[minor]; 6177c478bd9Sstevel@tonic-gate ASSERT(dpep != NULL); 6187c478bd9Sstevel@tonic-gate mutex_exit(&devpoll_lock); 6197c478bd9Sstevel@tonic-gate pcp = dpep->dpe_pcache; 620*a5eb7107SBryan Cantrill 621*a5eb7107SBryan Cantrill if (!(dpep->dpe_flag & DP_ISEPOLLCOMPAT) && 622*a5eb7107SBryan Cantrill curproc->p_pid != pcp->pc_pid) { 623*a5eb7107SBryan Cantrill if (pcp->pc_pid != -1) 6247c478bd9Sstevel@tonic-gate return (EACCES); 625*a5eb7107SBryan Cantrill 626*a5eb7107SBryan Cantrill pcp->pc_pid = curproc->p_pid; 6277c478bd9Sstevel@tonic-gate } 628*a5eb7107SBryan Cantrill 629*a5eb7107SBryan Cantrill if (dpep->dpe_flag & DP_ISEPOLLCOMPAT) { 630*a5eb7107SBryan Cantrill size = sizeof (dvpoll_epollfd_t); 631*a5eb7107SBryan Cantrill } else { 632*a5eb7107SBryan Cantrill size = sizeof (pollfd_t); 633*a5eb7107SBryan Cantrill } 634*a5eb7107SBryan Cantrill 6357c478bd9Sstevel@tonic-gate uiosize = uiop->uio_resid; 636*a5eb7107SBryan Cantrill pollfdnum = uiosize / size; 6377c478bd9Sstevel@tonic-gate mutex_enter(&curproc->p_lock); 6387c478bd9Sstevel@tonic-gate if (pollfdnum > (uint_t)rctl_enforced_value( 6397c478bd9Sstevel@tonic-gate rctlproc_legacy[RLIMIT_NOFILE], curproc->p_rctls, curproc)) { 6407c478bd9Sstevel@tonic-gate (void) rctl_action(rctlproc_legacy[RLIMIT_NOFILE], 6417c478bd9Sstevel@tonic-gate curproc->p_rctls, curproc, RCA_SAFE); 6427c478bd9Sstevel@tonic-gate mutex_exit(&curproc->p_lock); 6437c478bd9Sstevel@tonic-gate return (set_errno(EINVAL)); 6447c478bd9Sstevel@tonic-gate } 6457c478bd9Sstevel@tonic-gate mutex_exit(&curproc->p_lock); 6467c478bd9Sstevel@tonic-gate /* 6477c478bd9Sstevel@tonic-gate * Copy in the pollfd array. Walk through the array and add 6487c478bd9Sstevel@tonic-gate * each polled fd to the cached set. 6497c478bd9Sstevel@tonic-gate */ 6507c478bd9Sstevel@tonic-gate pollfdp = kmem_alloc(uiosize, KM_SLEEP); 651*a5eb7107SBryan Cantrill limit = (uintptr_t)pollfdp + (pollfdnum * size); 6527c478bd9Sstevel@tonic-gate 6537c478bd9Sstevel@tonic-gate /* 6547c478bd9Sstevel@tonic-gate * Although /dev/poll uses the write(2) interface to cache fds, it's 6557c478bd9Sstevel@tonic-gate * not supposed to function as a seekable device. To prevent offset 6567c478bd9Sstevel@tonic-gate * from growing and eventually exceed the maximum, reset the offset 6577c478bd9Sstevel@tonic-gate * here for every call. 6587c478bd9Sstevel@tonic-gate */ 6597c478bd9Sstevel@tonic-gate uiop->uio_loffset = 0; 6607c478bd9Sstevel@tonic-gate if ((error = uiomove((caddr_t)pollfdp, uiosize, UIO_WRITE, uiop)) 6617c478bd9Sstevel@tonic-gate != 0) { 6627c478bd9Sstevel@tonic-gate kmem_free(pollfdp, uiosize); 6637c478bd9Sstevel@tonic-gate return (error); 6647c478bd9Sstevel@tonic-gate } 6657c478bd9Sstevel@tonic-gate /* 6667c478bd9Sstevel@tonic-gate * We are about to enter the core portion of dpwrite(). Make sure this 6677c478bd9Sstevel@tonic-gate * write has exclusive access in this portion of the code, i.e., no 6687c478bd9Sstevel@tonic-gate * other writers in this code and no other readers in dpioctl. 6697c478bd9Sstevel@tonic-gate */ 6707c478bd9Sstevel@tonic-gate mutex_enter(&dpep->dpe_lock); 6717c478bd9Sstevel@tonic-gate dpep->dpe_writerwait++; 6727c478bd9Sstevel@tonic-gate while (dpep->dpe_refcnt != 0) { 673*a5eb7107SBryan Cantrill /* 674*a5eb7107SBryan Cantrill * We need to do a bit of a dance here: we need to drop 675*a5eb7107SBryan Cantrill * our dpe_lock and grab the pc_lock to broadcast the pc_cv to 676*a5eb7107SBryan Cantrill * kick any DP_POLL/DP_PPOLL sleepers. 677*a5eb7107SBryan Cantrill */ 678*a5eb7107SBryan Cantrill mutex_exit(&dpep->dpe_lock); 679*a5eb7107SBryan Cantrill mutex_enter(&pcp->pc_lock); 680*a5eb7107SBryan Cantrill pcp->pc_flag |= PC_WRITEWANTED; 681*a5eb7107SBryan Cantrill cv_broadcast(&pcp->pc_cv); 682*a5eb7107SBryan Cantrill mutex_exit(&pcp->pc_lock); 683*a5eb7107SBryan Cantrill mutex_enter(&dpep->dpe_lock); 684*a5eb7107SBryan Cantrill 685*a5eb7107SBryan Cantrill if (dpep->dpe_refcnt == 0) 686*a5eb7107SBryan Cantrill break; 687*a5eb7107SBryan Cantrill 6887c478bd9Sstevel@tonic-gate if (!cv_wait_sig_swap(&dpep->dpe_cv, &dpep->dpe_lock)) { 6897c478bd9Sstevel@tonic-gate dpep->dpe_writerwait--; 6907c478bd9Sstevel@tonic-gate mutex_exit(&dpep->dpe_lock); 691*a5eb7107SBryan Cantrill mutex_enter(&pcp->pc_lock); 692*a5eb7107SBryan Cantrill pcp->pc_flag &= ~PC_WRITEWANTED; 693*a5eb7107SBryan Cantrill mutex_exit(&pcp->pc_lock); 6947c478bd9Sstevel@tonic-gate kmem_free(pollfdp, uiosize); 6957c478bd9Sstevel@tonic-gate return (set_errno(EINTR)); 6967c478bd9Sstevel@tonic-gate } 6977c478bd9Sstevel@tonic-gate } 6987c478bd9Sstevel@tonic-gate dpep->dpe_writerwait--; 6997c478bd9Sstevel@tonic-gate dpep->dpe_flag |= DP_WRITER_PRESENT; 7007c478bd9Sstevel@tonic-gate dpep->dpe_refcnt++; 701*a5eb7107SBryan Cantrill 7027c478bd9Sstevel@tonic-gate mutex_exit(&dpep->dpe_lock); 7037c478bd9Sstevel@tonic-gate 7047c478bd9Sstevel@tonic-gate mutex_enter(&pcp->pc_lock); 705*a5eb7107SBryan Cantrill pcp->pc_flag &= ~PC_WRITEWANTED; 706*a5eb7107SBryan Cantrill 7077c478bd9Sstevel@tonic-gate if (pcp->pc_bitmap == NULL) { 7087c478bd9Sstevel@tonic-gate pcache_create(pcp, pollfdnum); 7097c478bd9Sstevel@tonic-gate } 710*a5eb7107SBryan Cantrill for (pfdp = pollfdp; (uintptr_t)pfdp < limit; 711*a5eb7107SBryan Cantrill pfdp = (pollfd_t *)((uintptr_t)pfdp + size)) { 7127c478bd9Sstevel@tonic-gate fd = pfdp->fd; 713*a5eb7107SBryan Cantrill if ((uint_t)fd >= P_FINFO(curproc)->fi_nfiles) { 714*a5eb7107SBryan Cantrill /* 715*a5eb7107SBryan Cantrill * epoll semantics demand that we return EBADF if our 716*a5eb7107SBryan Cantrill * specified fd is invalid. 717*a5eb7107SBryan Cantrill */ 718*a5eb7107SBryan Cantrill if (dpep->dpe_flag & DP_ISEPOLLCOMPAT) { 719*a5eb7107SBryan Cantrill error = EBADF; 720*a5eb7107SBryan Cantrill break; 721*a5eb7107SBryan Cantrill } 722*a5eb7107SBryan Cantrill 7237c478bd9Sstevel@tonic-gate continue; 724*a5eb7107SBryan Cantrill } 725*a5eb7107SBryan Cantrill 7267c478bd9Sstevel@tonic-gate pdp = pcache_lookup_fd(pcp, fd); 7277c478bd9Sstevel@tonic-gate if (pfdp->events != POLLREMOVE) { 728*a5eb7107SBryan Cantrill 729*a5eb7107SBryan Cantrill fp = NULL; 730*a5eb7107SBryan Cantrill 7317c478bd9Sstevel@tonic-gate if (pdp == NULL) { 732*a5eb7107SBryan Cantrill /* 733*a5eb7107SBryan Cantrill * If we're in epoll compatibility mode, check 734*a5eb7107SBryan Cantrill * that the fd is valid before allocating 735*a5eb7107SBryan Cantrill * anything for it; epoll semantics demand that 736*a5eb7107SBryan Cantrill * we return EBADF if our specified fd is 737*a5eb7107SBryan Cantrill * invalid. 738*a5eb7107SBryan Cantrill */ 739*a5eb7107SBryan Cantrill if (dpep->dpe_flag & DP_ISEPOLLCOMPAT) { 740*a5eb7107SBryan Cantrill if ((fp = getf(fd)) == NULL) { 741*a5eb7107SBryan Cantrill error = EBADF; 742*a5eb7107SBryan Cantrill break; 743*a5eb7107SBryan Cantrill } 744*a5eb7107SBryan Cantrill } 745*a5eb7107SBryan Cantrill 7467c478bd9Sstevel@tonic-gate pdp = pcache_alloc_fd(0); 7477c478bd9Sstevel@tonic-gate pdp->pd_fd = fd; 7487c478bd9Sstevel@tonic-gate pdp->pd_pcache = pcp; 7497c478bd9Sstevel@tonic-gate pcache_insert_fd(pcp, pdp, pollfdnum); 750*a5eb7107SBryan Cantrill } else { 751*a5eb7107SBryan Cantrill /* 752*a5eb7107SBryan Cantrill * epoll semantics demand that we error out if 753*a5eb7107SBryan Cantrill * a file descriptor is added twice, which we 754*a5eb7107SBryan Cantrill * check (imperfectly) by checking if we both 755*a5eb7107SBryan Cantrill * have the file descriptor cached and the 756*a5eb7107SBryan Cantrill * file pointer that correponds to the file 757*a5eb7107SBryan Cantrill * descriptor matches our cached value. If 758*a5eb7107SBryan Cantrill * there is a pointer mismatch, the file 759*a5eb7107SBryan Cantrill * descriptor was closed without being removed. 760*a5eb7107SBryan Cantrill * The converse is clearly not true, however, 761*a5eb7107SBryan Cantrill * so to narrow the window by which a spurious 762*a5eb7107SBryan Cantrill * EEXIST may be returned, we also check if 763*a5eb7107SBryan Cantrill * this fp has been added to an epoll control 764*a5eb7107SBryan Cantrill * descriptor in the past; if it hasn't, we 765*a5eb7107SBryan Cantrill * know that this is due to fp reuse -- it's 766*a5eb7107SBryan Cantrill * not a true EEXIST case. (By performing this 767*a5eb7107SBryan Cantrill * additional check, we limit the window of 768*a5eb7107SBryan Cantrill * spurious EEXIST to situations where a single 769*a5eb7107SBryan Cantrill * file descriptor is being used across two or 770*a5eb7107SBryan Cantrill * more epoll control descriptors -- and even 771*a5eb7107SBryan Cantrill * then, the file descriptor must be closed and 772*a5eb7107SBryan Cantrill * reused in a relatively tight time span.) 773*a5eb7107SBryan Cantrill */ 774*a5eb7107SBryan Cantrill if (dpep->dpe_flag & DP_ISEPOLLCOMPAT) { 775*a5eb7107SBryan Cantrill if (pdp->pd_fp != NULL && 776*a5eb7107SBryan Cantrill (fp = getf(fd)) != NULL && 777*a5eb7107SBryan Cantrill fp == pdp->pd_fp && 778*a5eb7107SBryan Cantrill (fp->f_flag2 & FEPOLLED)) { 779*a5eb7107SBryan Cantrill error = EEXIST; 780*a5eb7107SBryan Cantrill releasef(fd); 781*a5eb7107SBryan Cantrill break; 7827c478bd9Sstevel@tonic-gate } 783*a5eb7107SBryan Cantrill 784*a5eb7107SBryan Cantrill /* 785*a5eb7107SBryan Cantrill * We have decided that the cached 786*a5eb7107SBryan Cantrill * information was stale: it either 787*a5eb7107SBryan Cantrill * didn't match, or the fp had never 788*a5eb7107SBryan Cantrill * actually been epoll()'d on before. 789*a5eb7107SBryan Cantrill * We need to now clear our pd_events 790*a5eb7107SBryan Cantrill * to assure that we don't mistakenly 791*a5eb7107SBryan Cantrill * operate on cached event disposition. 792*a5eb7107SBryan Cantrill */ 793*a5eb7107SBryan Cantrill pdp->pd_events = 0; 794*a5eb7107SBryan Cantrill } 795*a5eb7107SBryan Cantrill } 796*a5eb7107SBryan Cantrill 797*a5eb7107SBryan Cantrill if (dpep->dpe_flag & DP_ISEPOLLCOMPAT) { 798*a5eb7107SBryan Cantrill epfdp = (dvpoll_epollfd_t *)pfdp; 799*a5eb7107SBryan Cantrill pdp->pd_epolldata = epfdp->dpep_data; 800*a5eb7107SBryan Cantrill } 801*a5eb7107SBryan Cantrill 8027c478bd9Sstevel@tonic-gate ASSERT(pdp->pd_fd == fd); 8037c478bd9Sstevel@tonic-gate ASSERT(pdp->pd_pcache == pcp); 8047c478bd9Sstevel@tonic-gate if (fd >= pcp->pc_mapsize) { 8057c478bd9Sstevel@tonic-gate mutex_exit(&pcp->pc_lock); 8067c478bd9Sstevel@tonic-gate pcache_grow_map(pcp, fd); 8077c478bd9Sstevel@tonic-gate mutex_enter(&pcp->pc_lock); 8087c478bd9Sstevel@tonic-gate } 8097c478bd9Sstevel@tonic-gate if (fd > pcp->pc_mapend) { 8107c478bd9Sstevel@tonic-gate pcp->pc_mapend = fd; 8117c478bd9Sstevel@tonic-gate } 812*a5eb7107SBryan Cantrill if (fp == NULL && (fp = getf(fd)) == NULL) { 8137c478bd9Sstevel@tonic-gate /* 8147c478bd9Sstevel@tonic-gate * The fd is not valid. Since we can't pass 8157c478bd9Sstevel@tonic-gate * this error back in the write() call, set 8167c478bd9Sstevel@tonic-gate * the bit in bitmap to force DP_POLL ioctl 8177c478bd9Sstevel@tonic-gate * to examine it. 8187c478bd9Sstevel@tonic-gate */ 8197c478bd9Sstevel@tonic-gate BT_SET(pcp->pc_bitmap, fd); 8207c478bd9Sstevel@tonic-gate pdp->pd_events |= pfdp->events; 8217c478bd9Sstevel@tonic-gate continue; 8227c478bd9Sstevel@tonic-gate } 823*a5eb7107SBryan Cantrill 824*a5eb7107SBryan Cantrill /* 825*a5eb7107SBryan Cantrill * To (greatly) reduce EEXIST false positives, we 826*a5eb7107SBryan Cantrill * denote that this fp has been epoll()'d. We do this 827*a5eb7107SBryan Cantrill * regardless of epoll compatibility mode, as the flag 828*a5eb7107SBryan Cantrill * is harmless if not in epoll compatibility mode. 829*a5eb7107SBryan Cantrill */ 830*a5eb7107SBryan Cantrill fp->f_flag2 |= FEPOLLED; 831*a5eb7107SBryan Cantrill 8327c478bd9Sstevel@tonic-gate /* 8337c478bd9Sstevel@tonic-gate * Don't do VOP_POLL for an already cached fd with 8347c478bd9Sstevel@tonic-gate * same poll events. 8357c478bd9Sstevel@tonic-gate */ 8367c478bd9Sstevel@tonic-gate if ((pdp->pd_events == pfdp->events) && 837*a5eb7107SBryan Cantrill (pdp->pd_fp == fp)) { 8387c478bd9Sstevel@tonic-gate /* 8397c478bd9Sstevel@tonic-gate * the events are already cached 8407c478bd9Sstevel@tonic-gate */ 8417c478bd9Sstevel@tonic-gate releasef(fd); 8427c478bd9Sstevel@tonic-gate continue; 8437c478bd9Sstevel@tonic-gate } 8447c478bd9Sstevel@tonic-gate 8457c478bd9Sstevel@tonic-gate /* 8467c478bd9Sstevel@tonic-gate * do VOP_POLL and cache this poll fd. 8477c478bd9Sstevel@tonic-gate */ 8487c478bd9Sstevel@tonic-gate /* 8497c478bd9Sstevel@tonic-gate * XXX - pollrelock() logic needs to know which 8507c478bd9Sstevel@tonic-gate * which pollcache lock to grab. It'd be a 8517c478bd9Sstevel@tonic-gate * cleaner solution if we could pass pcp as 8527c478bd9Sstevel@tonic-gate * an arguement in VOP_POLL interface instead 8537c478bd9Sstevel@tonic-gate * of implicitly passing it using thread_t 8547c478bd9Sstevel@tonic-gate * struct. On the other hand, changing VOP_POLL 8557c478bd9Sstevel@tonic-gate * interface will require all driver/file system 8567c478bd9Sstevel@tonic-gate * poll routine to change. May want to revisit 8577c478bd9Sstevel@tonic-gate * the tradeoff later. 8587c478bd9Sstevel@tonic-gate */ 8597c478bd9Sstevel@tonic-gate curthread->t_pollcache = pcp; 8607c478bd9Sstevel@tonic-gate error = VOP_POLL(fp->f_vnode, pfdp->events, 0, 861da6c28aaSamw &pfdp->revents, &php, NULL); 8627c478bd9Sstevel@tonic-gate curthread->t_pollcache = NULL; 8637c478bd9Sstevel@tonic-gate /* 864a85084caSmeem * We always set the bit when this fd is cached; 865a85084caSmeem * this forces the first DP_POLL to poll this fd. 8667c478bd9Sstevel@tonic-gate * Real performance gain comes from subsequent 867a85084caSmeem * DP_POLL. We also attempt a pollhead_insert(); 868a85084caSmeem * if it's not possible, we'll do it in dpioctl(). 8697c478bd9Sstevel@tonic-gate */ 8707c478bd9Sstevel@tonic-gate BT_SET(pcp->pc_bitmap, fd); 8717c478bd9Sstevel@tonic-gate if (error != 0) { 8727c478bd9Sstevel@tonic-gate releasef(fd); 8737c478bd9Sstevel@tonic-gate break; 8747c478bd9Sstevel@tonic-gate } 8757c478bd9Sstevel@tonic-gate pdp->pd_fp = fp; 8767c478bd9Sstevel@tonic-gate pdp->pd_events |= pfdp->events; 8777c478bd9Sstevel@tonic-gate if (php != NULL) { 8787c478bd9Sstevel@tonic-gate if (pdp->pd_php == NULL) { 8797c478bd9Sstevel@tonic-gate pollhead_insert(php, pdp); 8807c478bd9Sstevel@tonic-gate pdp->pd_php = php; 8817c478bd9Sstevel@tonic-gate } else { 8827c478bd9Sstevel@tonic-gate if (pdp->pd_php != php) { 8837c478bd9Sstevel@tonic-gate pollhead_delete(pdp->pd_php, 8847c478bd9Sstevel@tonic-gate pdp); 8857c478bd9Sstevel@tonic-gate pollhead_insert(php, pdp); 8867c478bd9Sstevel@tonic-gate pdp->pd_php = php; 8877c478bd9Sstevel@tonic-gate } 8887c478bd9Sstevel@tonic-gate } 8897c478bd9Sstevel@tonic-gate 8907c478bd9Sstevel@tonic-gate } 8917c478bd9Sstevel@tonic-gate releasef(fd); 8927c478bd9Sstevel@tonic-gate } else { 893*a5eb7107SBryan Cantrill if (pdp == NULL || pdp->pd_fp == NULL) { 894*a5eb7107SBryan Cantrill if (dpep->dpe_flag & DP_ISEPOLLCOMPAT) { 895*a5eb7107SBryan Cantrill /* 896*a5eb7107SBryan Cantrill * As with the add case (above), epoll 897*a5eb7107SBryan Cantrill * semantics demand that we error out 898*a5eb7107SBryan Cantrill * in this case. 899*a5eb7107SBryan Cantrill */ 900*a5eb7107SBryan Cantrill error = ENOENT; 901*a5eb7107SBryan Cantrill break; 902*a5eb7107SBryan Cantrill } 903*a5eb7107SBryan Cantrill 9047c478bd9Sstevel@tonic-gate continue; 9057c478bd9Sstevel@tonic-gate } 9067c478bd9Sstevel@tonic-gate ASSERT(pdp->pd_fd == fd); 9077c478bd9Sstevel@tonic-gate pdp->pd_fp = NULL; 9087c478bd9Sstevel@tonic-gate pdp->pd_events = 0; 9097c478bd9Sstevel@tonic-gate ASSERT(pdp->pd_thread == NULL); 9107c478bd9Sstevel@tonic-gate if (pdp->pd_php != NULL) { 9117c478bd9Sstevel@tonic-gate pollhead_delete(pdp->pd_php, pdp); 9127c478bd9Sstevel@tonic-gate pdp->pd_php = NULL; 9137c478bd9Sstevel@tonic-gate } 9147c478bd9Sstevel@tonic-gate BT_CLEAR(pcp->pc_bitmap, fd); 9157c478bd9Sstevel@tonic-gate } 9167c478bd9Sstevel@tonic-gate } 9177c478bd9Sstevel@tonic-gate mutex_exit(&pcp->pc_lock); 9187c478bd9Sstevel@tonic-gate mutex_enter(&dpep->dpe_lock); 9197c478bd9Sstevel@tonic-gate dpep->dpe_flag &= ~DP_WRITER_PRESENT; 9207c478bd9Sstevel@tonic-gate ASSERT(dpep->dpe_refcnt == 1); 9217c478bd9Sstevel@tonic-gate dpep->dpe_refcnt--; 9227c478bd9Sstevel@tonic-gate cv_broadcast(&dpep->dpe_cv); 9237c478bd9Sstevel@tonic-gate mutex_exit(&dpep->dpe_lock); 9247c478bd9Sstevel@tonic-gate kmem_free(pollfdp, uiosize); 9257c478bd9Sstevel@tonic-gate return (error); 9267c478bd9Sstevel@tonic-gate } 9277c478bd9Sstevel@tonic-gate 928*a5eb7107SBryan Cantrill #define DP_SIGMASK_RESTORE(ksetp) { \ 929*a5eb7107SBryan Cantrill if (ksetp != NULL) { \ 930*a5eb7107SBryan Cantrill mutex_enter(&p->p_lock); \ 931*a5eb7107SBryan Cantrill if (lwp->lwp_cursig == 0) { \ 932*a5eb7107SBryan Cantrill t->t_hold = lwp->lwp_sigoldmask; \ 933*a5eb7107SBryan Cantrill t->t_flag &= ~T_TOMASK; \ 934*a5eb7107SBryan Cantrill } \ 935*a5eb7107SBryan Cantrill mutex_exit(&p->p_lock); \ 936*a5eb7107SBryan Cantrill } \ 937*a5eb7107SBryan Cantrill } 938*a5eb7107SBryan Cantrill 9397c478bd9Sstevel@tonic-gate /*ARGSUSED*/ 9407c478bd9Sstevel@tonic-gate static int 9417c478bd9Sstevel@tonic-gate dpioctl(dev_t dev, int cmd, intptr_t arg, int mode, cred_t *credp, int *rvalp) 9427c478bd9Sstevel@tonic-gate { 9437c478bd9Sstevel@tonic-gate minor_t minor; 9447c478bd9Sstevel@tonic-gate dp_entry_t *dpep; 9457c478bd9Sstevel@tonic-gate pollcache_t *pcp; 946cd1c8b85SMatthew Ahrens hrtime_t now; 9477c478bd9Sstevel@tonic-gate int error = 0; 9487c478bd9Sstevel@tonic-gate STRUCT_DECL(dvpoll, dvpoll); 9497c478bd9Sstevel@tonic-gate 950*a5eb7107SBryan Cantrill if (cmd == DP_POLL || cmd == DP_PPOLL) { 951cd1c8b85SMatthew Ahrens /* do this now, before we sleep on DP_WRITER_PRESENT */ 952cd1c8b85SMatthew Ahrens now = gethrtime(); 953cd1c8b85SMatthew Ahrens } 954cd1c8b85SMatthew Ahrens 9557c478bd9Sstevel@tonic-gate minor = getminor(dev); 9567c478bd9Sstevel@tonic-gate mutex_enter(&devpoll_lock); 9577c478bd9Sstevel@tonic-gate ASSERT(minor < dptblsize); 9587c478bd9Sstevel@tonic-gate dpep = devpolltbl[minor]; 9597c478bd9Sstevel@tonic-gate mutex_exit(&devpoll_lock); 9607c478bd9Sstevel@tonic-gate ASSERT(dpep != NULL); 9617c478bd9Sstevel@tonic-gate pcp = dpep->dpe_pcache; 9627c478bd9Sstevel@tonic-gate 9637c478bd9Sstevel@tonic-gate mutex_enter(&dpep->dpe_lock); 964*a5eb7107SBryan Cantrill 965*a5eb7107SBryan Cantrill if (cmd == DP_EPOLLCOMPAT) { 966*a5eb7107SBryan Cantrill if (dpep->dpe_refcnt != 0) { 967*a5eb7107SBryan Cantrill /* 968*a5eb7107SBryan Cantrill * We can't turn on epoll compatibility while there 969*a5eb7107SBryan Cantrill * are outstanding operations. 970*a5eb7107SBryan Cantrill */ 971*a5eb7107SBryan Cantrill mutex_exit(&dpep->dpe_lock); 972*a5eb7107SBryan Cantrill return (EBUSY); 973*a5eb7107SBryan Cantrill } 974*a5eb7107SBryan Cantrill 975*a5eb7107SBryan Cantrill /* 976*a5eb7107SBryan Cantrill * epoll compatibility is a one-way street: there's no way 977*a5eb7107SBryan Cantrill * to turn it off for a particular open. 978*a5eb7107SBryan Cantrill */ 979*a5eb7107SBryan Cantrill dpep->dpe_flag |= DP_ISEPOLLCOMPAT; 980*a5eb7107SBryan Cantrill mutex_exit(&dpep->dpe_lock); 981*a5eb7107SBryan Cantrill 982*a5eb7107SBryan Cantrill return (0); 983*a5eb7107SBryan Cantrill } 984*a5eb7107SBryan Cantrill 985*a5eb7107SBryan Cantrill if (!(dpep->dpe_flag & DP_ISEPOLLCOMPAT) && 986*a5eb7107SBryan Cantrill curproc->p_pid != pcp->pc_pid) { 987*a5eb7107SBryan Cantrill if (pcp->pc_pid != -1) { 988*a5eb7107SBryan Cantrill mutex_exit(&dpep->dpe_lock); 989*a5eb7107SBryan Cantrill return (EACCES); 990*a5eb7107SBryan Cantrill } 991*a5eb7107SBryan Cantrill 992*a5eb7107SBryan Cantrill pcp->pc_pid = curproc->p_pid; 993*a5eb7107SBryan Cantrill } 994*a5eb7107SBryan Cantrill 9957c478bd9Sstevel@tonic-gate while ((dpep->dpe_flag & DP_WRITER_PRESENT) || 9967c478bd9Sstevel@tonic-gate (dpep->dpe_writerwait != 0)) { 9977c478bd9Sstevel@tonic-gate if (!cv_wait_sig_swap(&dpep->dpe_cv, &dpep->dpe_lock)) { 9987c478bd9Sstevel@tonic-gate mutex_exit(&dpep->dpe_lock); 9997c478bd9Sstevel@tonic-gate return (EINTR); 10007c478bd9Sstevel@tonic-gate } 10017c478bd9Sstevel@tonic-gate } 10027c478bd9Sstevel@tonic-gate dpep->dpe_refcnt++; 10037c478bd9Sstevel@tonic-gate mutex_exit(&dpep->dpe_lock); 10047c478bd9Sstevel@tonic-gate 10057c478bd9Sstevel@tonic-gate switch (cmd) { 10067c478bd9Sstevel@tonic-gate case DP_POLL: 1007*a5eb7107SBryan Cantrill case DP_PPOLL: 10087c478bd9Sstevel@tonic-gate { 10097c478bd9Sstevel@tonic-gate pollstate_t *ps; 10107c478bd9Sstevel@tonic-gate nfds_t nfds; 10117c478bd9Sstevel@tonic-gate int fdcnt = 0; 1012*a5eb7107SBryan Cantrill size_t size, fdsize, dpsize; 1013cd1c8b85SMatthew Ahrens hrtime_t deadline = 0; 1014*a5eb7107SBryan Cantrill k_sigset_t *ksetp = NULL; 1015*a5eb7107SBryan Cantrill k_sigset_t kset; 1016*a5eb7107SBryan Cantrill sigset_t set; 1017*a5eb7107SBryan Cantrill kthread_t *t = curthread; 1018*a5eb7107SBryan Cantrill klwp_t *lwp = ttolwp(t); 1019*a5eb7107SBryan Cantrill struct proc *p = ttoproc(curthread); 10207c478bd9Sstevel@tonic-gate 10217c478bd9Sstevel@tonic-gate STRUCT_INIT(dvpoll, mode); 1022*a5eb7107SBryan Cantrill 1023*a5eb7107SBryan Cantrill /* 1024*a5eb7107SBryan Cantrill * The dp_setp member is only required/consumed for DP_PPOLL, 1025*a5eb7107SBryan Cantrill * which otherwise uses the same structure as DP_POLL. 1026*a5eb7107SBryan Cantrill */ 1027*a5eb7107SBryan Cantrill if (cmd == DP_POLL) { 1028*a5eb7107SBryan Cantrill dpsize = (uintptr_t)STRUCT_FADDR(dvpoll, dp_setp) - 1029*a5eb7107SBryan Cantrill (uintptr_t)STRUCT_FADDR(dvpoll, dp_fds); 1030*a5eb7107SBryan Cantrill } else { 1031*a5eb7107SBryan Cantrill ASSERT(cmd == DP_PPOLL); 1032*a5eb7107SBryan Cantrill dpsize = STRUCT_SIZE(dvpoll); 1033*a5eb7107SBryan Cantrill } 1034*a5eb7107SBryan Cantrill 1035*a5eb7107SBryan Cantrill if ((mode & FKIOCTL) != 0) { 1036*a5eb7107SBryan Cantrill /* Kernel-internal ioctl call */ 1037*a5eb7107SBryan Cantrill bcopy((caddr_t)arg, STRUCT_BUF(dvpoll), dpsize); 1038*a5eb7107SBryan Cantrill error = 0; 1039*a5eb7107SBryan Cantrill } else { 10407c478bd9Sstevel@tonic-gate error = copyin((caddr_t)arg, STRUCT_BUF(dvpoll), 1041*a5eb7107SBryan Cantrill dpsize); 1042*a5eb7107SBryan Cantrill } 1043*a5eb7107SBryan Cantrill 10447c478bd9Sstevel@tonic-gate if (error) { 10457c478bd9Sstevel@tonic-gate DP_REFRELE(dpep); 10467c478bd9Sstevel@tonic-gate return (EFAULT); 10477c478bd9Sstevel@tonic-gate } 10487c478bd9Sstevel@tonic-gate 1049cd1c8b85SMatthew Ahrens deadline = STRUCT_FGET(dvpoll, dp_timeout); 1050cd1c8b85SMatthew Ahrens if (deadline > 0) { 10517c478bd9Sstevel@tonic-gate /* 1052cd1c8b85SMatthew Ahrens * Convert the deadline from relative milliseconds 1053cd1c8b85SMatthew Ahrens * to absolute nanoseconds. They must wait for at 1054cd1c8b85SMatthew Ahrens * least a tick. 10557c478bd9Sstevel@tonic-gate */ 105619449258SJosef 'Jeff' Sipek deadline = MSEC2NSEC(deadline); 1057cd1c8b85SMatthew Ahrens deadline = MAX(deadline, nsec_per_tick); 1058cd1c8b85SMatthew Ahrens deadline += now; 10597c478bd9Sstevel@tonic-gate } 10607c478bd9Sstevel@tonic-gate 1061*a5eb7107SBryan Cantrill if (cmd == DP_PPOLL) { 1062*a5eb7107SBryan Cantrill void *setp = STRUCT_FGETP(dvpoll, dp_setp); 1063*a5eb7107SBryan Cantrill 1064*a5eb7107SBryan Cantrill if (setp != NULL) { 1065*a5eb7107SBryan Cantrill if (copyin(setp, &set, sizeof (set))) { 1066*a5eb7107SBryan Cantrill DP_REFRELE(dpep); 1067*a5eb7107SBryan Cantrill return (EFAULT); 1068*a5eb7107SBryan Cantrill } 1069*a5eb7107SBryan Cantrill 1070*a5eb7107SBryan Cantrill sigutok(&set, &kset); 1071*a5eb7107SBryan Cantrill ksetp = &kset; 1072*a5eb7107SBryan Cantrill 1073*a5eb7107SBryan Cantrill mutex_enter(&p->p_lock); 1074*a5eb7107SBryan Cantrill schedctl_finish_sigblock(t); 1075*a5eb7107SBryan Cantrill lwp->lwp_sigoldmask = t->t_hold; 1076*a5eb7107SBryan Cantrill t->t_hold = *ksetp; 1077*a5eb7107SBryan Cantrill t->t_flag |= T_TOMASK; 1078*a5eb7107SBryan Cantrill 1079*a5eb7107SBryan Cantrill /* 1080*a5eb7107SBryan Cantrill * Like ppoll() with a non-NULL sigset, we'll 1081*a5eb7107SBryan Cantrill * call cv_reltimedwait_sig() just to check for 1082*a5eb7107SBryan Cantrill * signals. This call will return immediately 1083*a5eb7107SBryan Cantrill * with either 0 (signalled) or -1 (no signal). 1084*a5eb7107SBryan Cantrill * There are some conditions whereby we can 1085*a5eb7107SBryan Cantrill * get 0 from cv_reltimedwait_sig() without 1086*a5eb7107SBryan Cantrill * a true signal (e.g., a directed stop), so 1087*a5eb7107SBryan Cantrill * we restore our signal mask in the unlikely 1088*a5eb7107SBryan Cantrill * event that lwp_cursig is 0. 1089*a5eb7107SBryan Cantrill */ 1090*a5eb7107SBryan Cantrill if (!cv_reltimedwait_sig(&t->t_delay_cv, 1091*a5eb7107SBryan Cantrill &p->p_lock, 0, TR_CLOCK_TICK)) { 1092*a5eb7107SBryan Cantrill if (lwp->lwp_cursig == 0) { 1093*a5eb7107SBryan Cantrill t->t_hold = lwp->lwp_sigoldmask; 1094*a5eb7107SBryan Cantrill t->t_flag &= ~T_TOMASK; 1095*a5eb7107SBryan Cantrill } 1096*a5eb7107SBryan Cantrill 1097*a5eb7107SBryan Cantrill mutex_exit(&p->p_lock); 1098*a5eb7107SBryan Cantrill 1099*a5eb7107SBryan Cantrill DP_REFRELE(dpep); 1100*a5eb7107SBryan Cantrill return (EINTR); 1101*a5eb7107SBryan Cantrill } 1102*a5eb7107SBryan Cantrill 1103*a5eb7107SBryan Cantrill mutex_exit(&p->p_lock); 1104*a5eb7107SBryan Cantrill } 1105*a5eb7107SBryan Cantrill } 1106*a5eb7107SBryan Cantrill 11077c478bd9Sstevel@tonic-gate if ((nfds = STRUCT_FGET(dvpoll, dp_nfds)) == 0) { 11087c478bd9Sstevel@tonic-gate /* 11097c478bd9Sstevel@tonic-gate * We are just using DP_POLL to sleep, so 11107c478bd9Sstevel@tonic-gate * we don't any of the devpoll apparatus. 11117c478bd9Sstevel@tonic-gate * Do not check for signals if we have a zero timeout. 11127c478bd9Sstevel@tonic-gate */ 11137c478bd9Sstevel@tonic-gate DP_REFRELE(dpep); 1114*a5eb7107SBryan Cantrill if (deadline == 0) { 1115*a5eb7107SBryan Cantrill DP_SIGMASK_RESTORE(ksetp); 11167c478bd9Sstevel@tonic-gate return (0); 1117*a5eb7107SBryan Cantrill } 1118*a5eb7107SBryan Cantrill 11197c478bd9Sstevel@tonic-gate mutex_enter(&curthread->t_delay_lock); 1120cd1c8b85SMatthew Ahrens while ((error = 1121cd1c8b85SMatthew Ahrens cv_timedwait_sig_hrtime(&curthread->t_delay_cv, 1122cd1c8b85SMatthew Ahrens &curthread->t_delay_lock, deadline)) > 0) 11237c478bd9Sstevel@tonic-gate continue; 11247c478bd9Sstevel@tonic-gate mutex_exit(&curthread->t_delay_lock); 1125*a5eb7107SBryan Cantrill 1126*a5eb7107SBryan Cantrill DP_SIGMASK_RESTORE(ksetp); 1127*a5eb7107SBryan Cantrill 1128cd1c8b85SMatthew Ahrens return (error == 0 ? EINTR : 0); 11297c478bd9Sstevel@tonic-gate } 11307c478bd9Sstevel@tonic-gate 1131*a5eb7107SBryan Cantrill if (dpep->dpe_flag & DP_ISEPOLLCOMPAT) { 1132*a5eb7107SBryan Cantrill size = nfds * (fdsize = sizeof (epoll_event_t)); 1133*a5eb7107SBryan Cantrill } else { 1134*a5eb7107SBryan Cantrill size = nfds * (fdsize = sizeof (pollfd_t)); 1135*a5eb7107SBryan Cantrill } 1136*a5eb7107SBryan Cantrill 11377c478bd9Sstevel@tonic-gate /* 1138fe234e7cSMatt Amdur * XXX It would be nice not to have to alloc each time, but it 1139fe234e7cSMatt Amdur * requires another per thread structure hook. This can be 1140fe234e7cSMatt Amdur * implemented later if data suggests that it's necessary. 11417c478bd9Sstevel@tonic-gate */ 11427c478bd9Sstevel@tonic-gate if ((ps = curthread->t_pollstate) == NULL) { 11437c478bd9Sstevel@tonic-gate curthread->t_pollstate = pollstate_create(); 11447c478bd9Sstevel@tonic-gate ps = curthread->t_pollstate; 11457c478bd9Sstevel@tonic-gate } 1146*a5eb7107SBryan Cantrill 1147*a5eb7107SBryan Cantrill if (ps->ps_dpbufsize < size) { 11487c478bd9Sstevel@tonic-gate /* 1149*a5eb7107SBryan Cantrill * If nfds is larger than twice the current maximum 1150*a5eb7107SBryan Cantrill * open file count, we'll silently clamp it. This 1151*a5eb7107SBryan Cantrill * only limits our exposure to allocating an 1152*a5eb7107SBryan Cantrill * inordinate amount of kernel memory; it doesn't 1153*a5eb7107SBryan Cantrill * otherwise affect the semantics. (We have this 1154*a5eb7107SBryan Cantrill * check at twice the maximum instead of merely the 1155*a5eb7107SBryan Cantrill * maximum because some applications pass an nfds that 1156*a5eb7107SBryan Cantrill * is only slightly larger than their limit.) 11577c478bd9Sstevel@tonic-gate */ 11587c478bd9Sstevel@tonic-gate mutex_enter(&p->p_lock); 1159*a5eb7107SBryan Cantrill if ((nfds >> 1) > p->p_fno_ctl) { 1160*a5eb7107SBryan Cantrill nfds = p->p_fno_ctl; 1161*a5eb7107SBryan Cantrill size = nfds * fdsize; 11627c478bd9Sstevel@tonic-gate } 11637c478bd9Sstevel@tonic-gate mutex_exit(&p->p_lock); 1164*a5eb7107SBryan Cantrill 1165*a5eb7107SBryan Cantrill if (ps->ps_dpbufsize < size) { 1166*a5eb7107SBryan Cantrill kmem_free(ps->ps_dpbuf, ps->ps_dpbufsize); 1167*a5eb7107SBryan Cantrill ps->ps_dpbuf = kmem_zalloc(size, KM_SLEEP); 1168*a5eb7107SBryan Cantrill ps->ps_dpbufsize = size; 1169*a5eb7107SBryan Cantrill } 11707c478bd9Sstevel@tonic-gate } 11717c478bd9Sstevel@tonic-gate 11727c478bd9Sstevel@tonic-gate mutex_enter(&pcp->pc_lock); 11737c478bd9Sstevel@tonic-gate for (;;) { 1174*a5eb7107SBryan Cantrill pcp->pc_flag &= ~PC_POLLWAKE; 1175*a5eb7107SBryan Cantrill 1176*a5eb7107SBryan Cantrill error = dp_pcache_poll(dpep, ps->ps_dpbuf, 1177*a5eb7107SBryan Cantrill pcp, nfds, &fdcnt); 11787c478bd9Sstevel@tonic-gate if (fdcnt > 0 || error != 0) 11797c478bd9Sstevel@tonic-gate break; 11807c478bd9Sstevel@tonic-gate 11817c478bd9Sstevel@tonic-gate /* 11827c478bd9Sstevel@tonic-gate * A pollwake has happened since we polled cache. 11837c478bd9Sstevel@tonic-gate */ 1184*a5eb7107SBryan Cantrill if (pcp->pc_flag & PC_POLLWAKE) 11857c478bd9Sstevel@tonic-gate continue; 11867c478bd9Sstevel@tonic-gate 11877c478bd9Sstevel@tonic-gate /* 1188da6c28aaSamw * Sleep until we are notified, signaled, or timed out. 11897c478bd9Sstevel@tonic-gate */ 1190cd1c8b85SMatthew Ahrens if (deadline == 0) { 1191cd1c8b85SMatthew Ahrens /* immediate timeout; do not check signals */ 11927c478bd9Sstevel@tonic-gate break; 1193cd1c8b85SMatthew Ahrens } 1194*a5eb7107SBryan Cantrill 1195*a5eb7107SBryan Cantrill if (!(pcp->pc_flag & PC_WRITEWANTED)) { 1196cd1c8b85SMatthew Ahrens error = cv_timedwait_sig_hrtime(&pcp->pc_cv, 1197cd1c8b85SMatthew Ahrens &pcp->pc_lock, deadline); 1198*a5eb7107SBryan Cantrill } else { 1199*a5eb7107SBryan Cantrill error = 1; 1200*a5eb7107SBryan Cantrill } 1201*a5eb7107SBryan Cantrill 1202*a5eb7107SBryan Cantrill if (error > 0 && (pcp->pc_flag & PC_WRITEWANTED)) { 1203*a5eb7107SBryan Cantrill /* 1204*a5eb7107SBryan Cantrill * We've been kicked off of our cv because a 1205*a5eb7107SBryan Cantrill * writer wants in. We're going to drop our 1206*a5eb7107SBryan Cantrill * reference count and then wait until the 1207*a5eb7107SBryan Cantrill * writer is gone -- at which point we'll 1208*a5eb7107SBryan Cantrill * reacquire the pc_lock and call into 1209*a5eb7107SBryan Cantrill * dp_pcache_poll() to get the updated state. 1210*a5eb7107SBryan Cantrill */ 1211*a5eb7107SBryan Cantrill mutex_exit(&pcp->pc_lock); 1212*a5eb7107SBryan Cantrill 1213*a5eb7107SBryan Cantrill mutex_enter(&dpep->dpe_lock); 1214*a5eb7107SBryan Cantrill dpep->dpe_refcnt--; 1215*a5eb7107SBryan Cantrill cv_broadcast(&dpep->dpe_cv); 1216*a5eb7107SBryan Cantrill 1217*a5eb7107SBryan Cantrill while ((dpep->dpe_flag & DP_WRITER_PRESENT) || 1218*a5eb7107SBryan Cantrill (dpep->dpe_writerwait != 0)) { 1219*a5eb7107SBryan Cantrill error = cv_wait_sig_swap(&dpep->dpe_cv, 1220*a5eb7107SBryan Cantrill &dpep->dpe_lock); 1221*a5eb7107SBryan Cantrill } 1222*a5eb7107SBryan Cantrill 1223*a5eb7107SBryan Cantrill dpep->dpe_refcnt++; 1224*a5eb7107SBryan Cantrill mutex_exit(&dpep->dpe_lock); 1225*a5eb7107SBryan Cantrill mutex_enter(&pcp->pc_lock); 1226*a5eb7107SBryan Cantrill } 1227*a5eb7107SBryan Cantrill 12287c478bd9Sstevel@tonic-gate /* 12297c478bd9Sstevel@tonic-gate * If we were awakened by a signal or timeout 12307c478bd9Sstevel@tonic-gate * then break the loop, else poll again. 12317c478bd9Sstevel@tonic-gate */ 1232cd1c8b85SMatthew Ahrens if (error <= 0) { 1233cd1c8b85SMatthew Ahrens error = (error == 0) ? EINTR : 0; 12347c478bd9Sstevel@tonic-gate break; 1235cd1c8b85SMatthew Ahrens } else { 1236cd1c8b85SMatthew Ahrens error = 0; 12377c478bd9Sstevel@tonic-gate } 12387c478bd9Sstevel@tonic-gate } 12397c478bd9Sstevel@tonic-gate mutex_exit(&pcp->pc_lock); 12407c478bd9Sstevel@tonic-gate 1241*a5eb7107SBryan Cantrill DP_SIGMASK_RESTORE(ksetp); 1242*a5eb7107SBryan Cantrill 12437c478bd9Sstevel@tonic-gate if (error == 0 && fdcnt > 0) { 1244*a5eb7107SBryan Cantrill if (copyout(ps->ps_dpbuf, 1245*a5eb7107SBryan Cantrill STRUCT_FGETP(dvpoll, dp_fds), fdcnt * fdsize)) { 12467c478bd9Sstevel@tonic-gate DP_REFRELE(dpep); 12477c478bd9Sstevel@tonic-gate return (EFAULT); 12487c478bd9Sstevel@tonic-gate } 12497c478bd9Sstevel@tonic-gate *rvalp = fdcnt; 12507c478bd9Sstevel@tonic-gate } 12517c478bd9Sstevel@tonic-gate break; 12527c478bd9Sstevel@tonic-gate } 12537c478bd9Sstevel@tonic-gate 12547c478bd9Sstevel@tonic-gate case DP_ISPOLLED: 12557c478bd9Sstevel@tonic-gate { 12567c478bd9Sstevel@tonic-gate pollfd_t pollfd; 12577c478bd9Sstevel@tonic-gate polldat_t *pdp; 12587c478bd9Sstevel@tonic-gate 12597c478bd9Sstevel@tonic-gate STRUCT_INIT(dvpoll, mode); 12607c478bd9Sstevel@tonic-gate error = copyin((caddr_t)arg, &pollfd, sizeof (pollfd_t)); 12617c478bd9Sstevel@tonic-gate if (error) { 12627c478bd9Sstevel@tonic-gate DP_REFRELE(dpep); 12637c478bd9Sstevel@tonic-gate return (EFAULT); 12647c478bd9Sstevel@tonic-gate } 12657c478bd9Sstevel@tonic-gate mutex_enter(&pcp->pc_lock); 12667c478bd9Sstevel@tonic-gate if (pcp->pc_hash == NULL) { 12677c478bd9Sstevel@tonic-gate /* 12687c478bd9Sstevel@tonic-gate * No Need to search because no poll fd 12697c478bd9Sstevel@tonic-gate * has been cached. 12707c478bd9Sstevel@tonic-gate */ 12717c478bd9Sstevel@tonic-gate mutex_exit(&pcp->pc_lock); 12727c478bd9Sstevel@tonic-gate DP_REFRELE(dpep); 12737c478bd9Sstevel@tonic-gate return (0); 12747c478bd9Sstevel@tonic-gate } 12757c478bd9Sstevel@tonic-gate if (pollfd.fd < 0) { 12767c478bd9Sstevel@tonic-gate mutex_exit(&pcp->pc_lock); 12777c478bd9Sstevel@tonic-gate break; 12787c478bd9Sstevel@tonic-gate } 12797c478bd9Sstevel@tonic-gate pdp = pcache_lookup_fd(pcp, pollfd.fd); 12807c478bd9Sstevel@tonic-gate if ((pdp != NULL) && (pdp->pd_fd == pollfd.fd) && 12817c478bd9Sstevel@tonic-gate (pdp->pd_fp != NULL)) { 12827c478bd9Sstevel@tonic-gate pollfd.revents = pdp->pd_events; 12837c478bd9Sstevel@tonic-gate if (copyout(&pollfd, (caddr_t)arg, sizeof (pollfd_t))) { 12847c478bd9Sstevel@tonic-gate mutex_exit(&pcp->pc_lock); 12857c478bd9Sstevel@tonic-gate DP_REFRELE(dpep); 12867c478bd9Sstevel@tonic-gate return (EFAULT); 12877c478bd9Sstevel@tonic-gate } 12887c478bd9Sstevel@tonic-gate *rvalp = 1; 12897c478bd9Sstevel@tonic-gate } 12907c478bd9Sstevel@tonic-gate mutex_exit(&pcp->pc_lock); 12917c478bd9Sstevel@tonic-gate break; 12927c478bd9Sstevel@tonic-gate } 12937c478bd9Sstevel@tonic-gate 12947c478bd9Sstevel@tonic-gate default: 12957c478bd9Sstevel@tonic-gate DP_REFRELE(dpep); 12967c478bd9Sstevel@tonic-gate return (EINVAL); 12977c478bd9Sstevel@tonic-gate } 12987c478bd9Sstevel@tonic-gate DP_REFRELE(dpep); 12997c478bd9Sstevel@tonic-gate return (error); 13007c478bd9Sstevel@tonic-gate } 13017c478bd9Sstevel@tonic-gate 13027c478bd9Sstevel@tonic-gate /*ARGSUSED*/ 13037c478bd9Sstevel@tonic-gate static int 13047c478bd9Sstevel@tonic-gate dppoll(dev_t dev, short events, int anyyet, short *reventsp, 13057c478bd9Sstevel@tonic-gate struct pollhead **phpp) 13067c478bd9Sstevel@tonic-gate { 1307*a5eb7107SBryan Cantrill minor_t minor; 1308*a5eb7107SBryan Cantrill dp_entry_t *dpep; 1309*a5eb7107SBryan Cantrill 1310*a5eb7107SBryan Cantrill minor = getminor(dev); 1311*a5eb7107SBryan Cantrill 1312*a5eb7107SBryan Cantrill mutex_enter(&devpoll_lock); 1313*a5eb7107SBryan Cantrill dpep = devpolltbl[minor]; 1314*a5eb7107SBryan Cantrill ASSERT(dpep != NULL); 1315*a5eb7107SBryan Cantrill mutex_exit(&devpoll_lock); 1316*a5eb7107SBryan Cantrill 13177c478bd9Sstevel@tonic-gate /* 13187c478bd9Sstevel@tonic-gate * Polling on a /dev/poll fd is not fully supported yet. 13197c478bd9Sstevel@tonic-gate */ 1320*a5eb7107SBryan Cantrill if (dpep->dpe_flag & DP_ISEPOLLCOMPAT) { 1321*a5eb7107SBryan Cantrill /* no error in epoll compat. mode */ 1322*a5eb7107SBryan Cantrill *reventsp = 0; 1323*a5eb7107SBryan Cantrill } else { 13247c478bd9Sstevel@tonic-gate *reventsp = POLLERR; 1325*a5eb7107SBryan Cantrill } 13267c478bd9Sstevel@tonic-gate return (0); 13277c478bd9Sstevel@tonic-gate } 13287c478bd9Sstevel@tonic-gate 13297c478bd9Sstevel@tonic-gate /* 13307c478bd9Sstevel@tonic-gate * devpoll close should do enough clean up before the pollcache is deleted, 13317c478bd9Sstevel@tonic-gate * i.e., it should ensure no one still references the pollcache later. 13327c478bd9Sstevel@tonic-gate * There is no "permission" check in here. Any process having the last 13337c478bd9Sstevel@tonic-gate * reference of this /dev/poll fd can close. 13347c478bd9Sstevel@tonic-gate */ 13357c478bd9Sstevel@tonic-gate /*ARGSUSED*/ 13367c478bd9Sstevel@tonic-gate static int 13377c478bd9Sstevel@tonic-gate dpclose(dev_t dev, int flag, int otyp, cred_t *credp) 13387c478bd9Sstevel@tonic-gate { 13397c478bd9Sstevel@tonic-gate minor_t minor; 13407c478bd9Sstevel@tonic-gate dp_entry_t *dpep; 13417c478bd9Sstevel@tonic-gate pollcache_t *pcp; 13427c478bd9Sstevel@tonic-gate int i; 13437c478bd9Sstevel@tonic-gate polldat_t **hashtbl; 13447c478bd9Sstevel@tonic-gate polldat_t *pdp; 13457c478bd9Sstevel@tonic-gate 13467c478bd9Sstevel@tonic-gate minor = getminor(dev); 13477c478bd9Sstevel@tonic-gate 13487c478bd9Sstevel@tonic-gate mutex_enter(&devpoll_lock); 13497c478bd9Sstevel@tonic-gate dpep = devpolltbl[minor]; 13507c478bd9Sstevel@tonic-gate ASSERT(dpep != NULL); 13517c478bd9Sstevel@tonic-gate devpolltbl[minor] = NULL; 13527c478bd9Sstevel@tonic-gate mutex_exit(&devpoll_lock); 13537c478bd9Sstevel@tonic-gate pcp = dpep->dpe_pcache; 13547c478bd9Sstevel@tonic-gate ASSERT(pcp != NULL); 13557c478bd9Sstevel@tonic-gate /* 13567c478bd9Sstevel@tonic-gate * At this point, no other lwp can access this pollcache via the 13577c478bd9Sstevel@tonic-gate * /dev/poll fd. This pollcache is going away, so do the clean 13587c478bd9Sstevel@tonic-gate * up without the pc_lock. 13597c478bd9Sstevel@tonic-gate */ 13607c478bd9Sstevel@tonic-gate hashtbl = pcp->pc_hash; 13617c478bd9Sstevel@tonic-gate for (i = 0; i < pcp->pc_hashsize; i++) { 13627c478bd9Sstevel@tonic-gate for (pdp = hashtbl[i]; pdp; pdp = pdp->pd_hashnext) { 13637c478bd9Sstevel@tonic-gate if (pdp->pd_php != NULL) { 13647c478bd9Sstevel@tonic-gate pollhead_delete(pdp->pd_php, pdp); 13657c478bd9Sstevel@tonic-gate pdp->pd_php = NULL; 13667c478bd9Sstevel@tonic-gate pdp->pd_fp = NULL; 13677c478bd9Sstevel@tonic-gate } 13687c478bd9Sstevel@tonic-gate } 13697c478bd9Sstevel@tonic-gate } 13707c478bd9Sstevel@tonic-gate /* 13717c478bd9Sstevel@tonic-gate * pollwakeup() may still interact with this pollcache. Wait until 13727c478bd9Sstevel@tonic-gate * it is done. 13737c478bd9Sstevel@tonic-gate */ 13747c478bd9Sstevel@tonic-gate mutex_enter(&pcp->pc_no_exit); 13757c478bd9Sstevel@tonic-gate ASSERT(pcp->pc_busy >= 0); 13767c478bd9Sstevel@tonic-gate while (pcp->pc_busy > 0) 13777c478bd9Sstevel@tonic-gate cv_wait(&pcp->pc_busy_cv, &pcp->pc_no_exit); 13787c478bd9Sstevel@tonic-gate mutex_exit(&pcp->pc_no_exit); 13797c478bd9Sstevel@tonic-gate pcache_destroy(pcp); 13807c478bd9Sstevel@tonic-gate ASSERT(dpep->dpe_refcnt == 0); 13817c478bd9Sstevel@tonic-gate kmem_free(dpep, sizeof (dp_entry_t)); 13827c478bd9Sstevel@tonic-gate return (0); 13837c478bd9Sstevel@tonic-gate } 1384